{ "best_metric": 0.18269842863082886, "best_model_checkpoint": "./ap_train_outputs_new/checkpoint-7725", "epoch": 150.0, "eval_steps": 500, "global_step": 386250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.999948220064725e-05, "loss": 2.4306, "step": 10 }, { "epoch": 0.01, "learning_rate": 1.99989644012945e-05, "loss": 2.3106, "step": 20 }, { "epoch": 0.01, "learning_rate": 1.999844660194175e-05, "loss": 2.1619, "step": 30 }, { "epoch": 0.02, "learning_rate": 1.9997928802589e-05, "loss": 2.042, "step": 40 }, { "epoch": 0.02, "learning_rate": 1.9997411003236248e-05, "loss": 2.0323, "step": 50 }, { "epoch": 0.02, "learning_rate": 1.9996893203883496e-05, "loss": 1.9385, "step": 60 }, { "epoch": 0.03, "learning_rate": 1.9996375404530747e-05, "loss": 1.8729, "step": 70 }, { "epoch": 0.03, "learning_rate": 1.9995857605177995e-05, "loss": 1.8625, "step": 80 }, { "epoch": 0.03, "learning_rate": 1.9995339805825243e-05, "loss": 1.6289, "step": 90 }, { "epoch": 0.04, "learning_rate": 1.9994822006472494e-05, "loss": 1.664, "step": 100 }, { "epoch": 0.04, "learning_rate": 1.9994304207119742e-05, "loss": 1.7535, "step": 110 }, { "epoch": 0.05, "learning_rate": 1.9993786407766994e-05, "loss": 1.5424, "step": 120 }, { "epoch": 0.05, "learning_rate": 1.999326860841424e-05, "loss": 1.5944, "step": 130 }, { "epoch": 0.05, "learning_rate": 1.999275080906149e-05, "loss": 1.4155, "step": 140 }, { "epoch": 0.06, "learning_rate": 1.999223300970874e-05, "loss": 1.3795, "step": 150 }, { "epoch": 0.06, "learning_rate": 1.999171521035599e-05, "loss": 1.3255, "step": 160 }, { "epoch": 0.07, "learning_rate": 1.9991197411003237e-05, "loss": 1.4067, "step": 170 }, { "epoch": 0.07, "learning_rate": 1.9990679611650488e-05, "loss": 1.423, "step": 180 }, { "epoch": 0.07, "learning_rate": 1.9990161812297736e-05, "loss": 1.2975, "step": 190 }, { "epoch": 0.08, "learning_rate": 1.9989644012944987e-05, "loss": 1.2546, "step": 200 }, { "epoch": 0.08, "learning_rate": 1.9989126213592235e-05, "loss": 1.1929, "step": 210 }, { "epoch": 0.09, "learning_rate": 1.9988608414239483e-05, "loss": 1.2079, "step": 220 }, { "epoch": 0.09, "learning_rate": 1.998809061488673e-05, "loss": 1.2546, "step": 230 }, { "epoch": 0.09, "learning_rate": 1.9987572815533982e-05, "loss": 1.1213, "step": 240 }, { "epoch": 0.1, "learning_rate": 1.998705501618123e-05, "loss": 1.0472, "step": 250 }, { "epoch": 0.1, "learning_rate": 1.998653721682848e-05, "loss": 1.0242, "step": 260 }, { "epoch": 0.1, "learning_rate": 1.998601941747573e-05, "loss": 1.202, "step": 270 }, { "epoch": 0.11, "learning_rate": 1.998550161812298e-05, "loss": 1.0183, "step": 280 }, { "epoch": 0.11, "learning_rate": 1.998498381877023e-05, "loss": 0.9347, "step": 290 }, { "epoch": 0.12, "learning_rate": 1.9984466019417477e-05, "loss": 0.9239, "step": 300 }, { "epoch": 0.12, "learning_rate": 1.9983948220064725e-05, "loss": 1.037, "step": 310 }, { "epoch": 0.12, "learning_rate": 1.9983430420711976e-05, "loss": 1.1471, "step": 320 }, { "epoch": 0.13, "learning_rate": 1.9982912621359224e-05, "loss": 0.9171, "step": 330 }, { "epoch": 0.13, "learning_rate": 1.9982394822006475e-05, "loss": 0.7951, "step": 340 }, { "epoch": 0.14, "learning_rate": 1.9981877022653723e-05, "loss": 0.8798, "step": 350 }, { "epoch": 0.14, "learning_rate": 1.9981359223300974e-05, "loss": 0.9753, "step": 360 }, { "epoch": 0.14, "learning_rate": 1.9980841423948222e-05, "loss": 0.8387, "step": 370 }, { "epoch": 0.15, "learning_rate": 1.998032362459547e-05, "loss": 0.9324, "step": 380 }, { "epoch": 0.15, "learning_rate": 1.9979805825242718e-05, "loss": 0.8375, "step": 390 }, { "epoch": 0.16, "learning_rate": 1.997928802588997e-05, "loss": 0.7599, "step": 400 }, { "epoch": 0.16, "learning_rate": 1.9978770226537217e-05, "loss": 0.8142, "step": 410 }, { "epoch": 0.16, "learning_rate": 1.997825242718447e-05, "loss": 0.7867, "step": 420 }, { "epoch": 0.17, "learning_rate": 1.9977734627831717e-05, "loss": 0.7268, "step": 430 }, { "epoch": 0.17, "learning_rate": 1.9977216828478968e-05, "loss": 0.7552, "step": 440 }, { "epoch": 0.17, "learning_rate": 1.9976699029126216e-05, "loss": 0.8379, "step": 450 }, { "epoch": 0.18, "learning_rate": 1.9976181229773464e-05, "loss": 0.7393, "step": 460 }, { "epoch": 0.18, "learning_rate": 1.997566343042071e-05, "loss": 0.7218, "step": 470 }, { "epoch": 0.19, "learning_rate": 1.9975145631067963e-05, "loss": 0.7362, "step": 480 }, { "epoch": 0.19, "learning_rate": 1.997462783171521e-05, "loss": 0.7993, "step": 490 }, { "epoch": 0.19, "learning_rate": 1.9974110032362462e-05, "loss": 0.7622, "step": 500 }, { "epoch": 0.2, "learning_rate": 1.997359223300971e-05, "loss": 0.6441, "step": 510 }, { "epoch": 0.2, "learning_rate": 1.997307443365696e-05, "loss": 0.7303, "step": 520 }, { "epoch": 0.21, "learning_rate": 1.9972556634304206e-05, "loss": 0.6459, "step": 530 }, { "epoch": 0.21, "learning_rate": 1.9972038834951457e-05, "loss": 0.5734, "step": 540 }, { "epoch": 0.21, "learning_rate": 1.9971521035598705e-05, "loss": 0.6804, "step": 550 }, { "epoch": 0.22, "learning_rate": 1.9971003236245957e-05, "loss": 0.6168, "step": 560 }, { "epoch": 0.22, "learning_rate": 1.9970485436893204e-05, "loss": 0.6384, "step": 570 }, { "epoch": 0.23, "learning_rate": 1.9969967637540456e-05, "loss": 0.6387, "step": 580 }, { "epoch": 0.23, "learning_rate": 1.9969449838187704e-05, "loss": 0.7027, "step": 590 }, { "epoch": 0.23, "learning_rate": 1.9968932038834955e-05, "loss": 0.6637, "step": 600 }, { "epoch": 0.24, "learning_rate": 1.9968414239482203e-05, "loss": 0.5355, "step": 610 }, { "epoch": 0.24, "learning_rate": 1.996789644012945e-05, "loss": 0.5599, "step": 620 }, { "epoch": 0.24, "learning_rate": 1.99673786407767e-05, "loss": 0.551, "step": 630 }, { "epoch": 0.25, "learning_rate": 1.996686084142395e-05, "loss": 0.5663, "step": 640 }, { "epoch": 0.25, "learning_rate": 1.9966343042071198e-05, "loss": 0.6923, "step": 650 }, { "epoch": 0.26, "learning_rate": 1.996582524271845e-05, "loss": 0.6918, "step": 660 }, { "epoch": 0.26, "learning_rate": 1.9965307443365697e-05, "loss": 0.576, "step": 670 }, { "epoch": 0.26, "learning_rate": 1.996478964401295e-05, "loss": 0.5371, "step": 680 }, { "epoch": 0.27, "learning_rate": 1.9964271844660196e-05, "loss": 0.5559, "step": 690 }, { "epoch": 0.27, "learning_rate": 1.9963754045307444e-05, "loss": 0.4964, "step": 700 }, { "epoch": 0.28, "learning_rate": 1.9963236245954692e-05, "loss": 0.6167, "step": 710 }, { "epoch": 0.28, "learning_rate": 1.9962718446601944e-05, "loss": 0.6102, "step": 720 }, { "epoch": 0.28, "learning_rate": 1.996220064724919e-05, "loss": 0.4764, "step": 730 }, { "epoch": 0.29, "learning_rate": 1.9961682847896443e-05, "loss": 0.5488, "step": 740 }, { "epoch": 0.29, "learning_rate": 1.996116504854369e-05, "loss": 0.544, "step": 750 }, { "epoch": 0.3, "learning_rate": 1.9960647249190942e-05, "loss": 0.3878, "step": 760 }, { "epoch": 0.3, "learning_rate": 1.996012944983819e-05, "loss": 0.4912, "step": 770 }, { "epoch": 0.3, "learning_rate": 1.9959611650485438e-05, "loss": 0.5108, "step": 780 }, { "epoch": 0.31, "learning_rate": 1.9959093851132686e-05, "loss": 0.3333, "step": 790 }, { "epoch": 0.31, "learning_rate": 1.9958576051779937e-05, "loss": 0.5888, "step": 800 }, { "epoch": 0.31, "learning_rate": 1.9958058252427185e-05, "loss": 0.5131, "step": 810 }, { "epoch": 0.32, "learning_rate": 1.9957540453074436e-05, "loss": 0.4222, "step": 820 }, { "epoch": 0.32, "learning_rate": 1.9957022653721684e-05, "loss": 0.5501, "step": 830 }, { "epoch": 0.33, "learning_rate": 1.9956504854368936e-05, "loss": 0.5809, "step": 840 }, { "epoch": 0.33, "learning_rate": 1.9955987055016184e-05, "loss": 0.4968, "step": 850 }, { "epoch": 0.33, "learning_rate": 1.995546925566343e-05, "loss": 0.4028, "step": 860 }, { "epoch": 0.34, "learning_rate": 1.995495145631068e-05, "loss": 0.5314, "step": 870 }, { "epoch": 0.34, "learning_rate": 1.995443365695793e-05, "loss": 0.5976, "step": 880 }, { "epoch": 0.35, "learning_rate": 1.995391585760518e-05, "loss": 0.4516, "step": 890 }, { "epoch": 0.35, "learning_rate": 1.995339805825243e-05, "loss": 0.4242, "step": 900 }, { "epoch": 0.35, "learning_rate": 1.9952880258899678e-05, "loss": 0.5095, "step": 910 }, { "epoch": 0.36, "learning_rate": 1.995236245954693e-05, "loss": 0.4831, "step": 920 }, { "epoch": 0.36, "learning_rate": 1.9951844660194177e-05, "loss": 0.4483, "step": 930 }, { "epoch": 0.37, "learning_rate": 1.9951326860841425e-05, "loss": 0.5792, "step": 940 }, { "epoch": 0.37, "learning_rate": 1.9950809061488673e-05, "loss": 0.4076, "step": 950 }, { "epoch": 0.37, "learning_rate": 1.9950291262135924e-05, "loss": 0.4534, "step": 960 }, { "epoch": 0.38, "learning_rate": 1.9949773462783172e-05, "loss": 0.4433, "step": 970 }, { "epoch": 0.38, "learning_rate": 1.9949255663430424e-05, "loss": 0.4012, "step": 980 }, { "epoch": 0.38, "learning_rate": 1.994873786407767e-05, "loss": 0.4392, "step": 990 }, { "epoch": 0.39, "learning_rate": 1.9948220064724923e-05, "loss": 0.346, "step": 1000 }, { "epoch": 0.39, "learning_rate": 1.994770226537217e-05, "loss": 0.3584, "step": 1010 }, { "epoch": 0.4, "learning_rate": 1.994718446601942e-05, "loss": 0.466, "step": 1020 }, { "epoch": 0.4, "learning_rate": 1.9946666666666667e-05, "loss": 0.4881, "step": 1030 }, { "epoch": 0.4, "learning_rate": 1.9946148867313918e-05, "loss": 0.4798, "step": 1040 }, { "epoch": 0.41, "learning_rate": 1.9945631067961166e-05, "loss": 0.4721, "step": 1050 }, { "epoch": 0.41, "learning_rate": 1.9945113268608417e-05, "loss": 0.6007, "step": 1060 }, { "epoch": 0.42, "learning_rate": 1.9944595469255665e-05, "loss": 0.3789, "step": 1070 }, { "epoch": 0.42, "learning_rate": 1.9944077669902916e-05, "loss": 0.3315, "step": 1080 }, { "epoch": 0.42, "learning_rate": 1.9943559870550164e-05, "loss": 0.4815, "step": 1090 }, { "epoch": 0.43, "learning_rate": 1.9943042071197412e-05, "loss": 0.3936, "step": 1100 }, { "epoch": 0.43, "learning_rate": 1.994252427184466e-05, "loss": 0.5256, "step": 1110 }, { "epoch": 0.43, "learning_rate": 1.994200647249191e-05, "loss": 0.366, "step": 1120 }, { "epoch": 0.44, "learning_rate": 1.994148867313916e-05, "loss": 0.3964, "step": 1130 }, { "epoch": 0.44, "learning_rate": 1.994097087378641e-05, "loss": 0.5443, "step": 1140 }, { "epoch": 0.45, "learning_rate": 1.994045307443366e-05, "loss": 0.3093, "step": 1150 }, { "epoch": 0.45, "learning_rate": 1.993993527508091e-05, "loss": 0.3967, "step": 1160 }, { "epoch": 0.45, "learning_rate": 1.9939417475728158e-05, "loss": 0.4326, "step": 1170 }, { "epoch": 0.46, "learning_rate": 1.9938899676375406e-05, "loss": 0.2558, "step": 1180 }, { "epoch": 0.46, "learning_rate": 1.9938381877022654e-05, "loss": 0.5055, "step": 1190 }, { "epoch": 0.47, "learning_rate": 1.9937864077669905e-05, "loss": 0.3684, "step": 1200 }, { "epoch": 0.47, "learning_rate": 1.9937346278317153e-05, "loss": 0.2573, "step": 1210 }, { "epoch": 0.47, "learning_rate": 1.9936828478964404e-05, "loss": 0.4943, "step": 1220 }, { "epoch": 0.48, "learning_rate": 1.9936310679611652e-05, "loss": 0.3349, "step": 1230 }, { "epoch": 0.48, "learning_rate": 1.9935792880258903e-05, "loss": 0.4965, "step": 1240 }, { "epoch": 0.49, "learning_rate": 1.993527508090615e-05, "loss": 0.4059, "step": 1250 }, { "epoch": 0.49, "learning_rate": 1.99347572815534e-05, "loss": 0.3418, "step": 1260 }, { "epoch": 0.49, "learning_rate": 1.9934239482200647e-05, "loss": 0.3945, "step": 1270 }, { "epoch": 0.5, "learning_rate": 1.99337216828479e-05, "loss": 0.3294, "step": 1280 }, { "epoch": 0.5, "learning_rate": 1.9933203883495146e-05, "loss": 0.4028, "step": 1290 }, { "epoch": 0.5, "learning_rate": 1.9932686084142398e-05, "loss": 0.3644, "step": 1300 }, { "epoch": 0.51, "learning_rate": 1.9932168284789646e-05, "loss": 0.461, "step": 1310 }, { "epoch": 0.51, "learning_rate": 1.9931650485436894e-05, "loss": 0.3644, "step": 1320 }, { "epoch": 0.52, "learning_rate": 1.9931132686084145e-05, "loss": 0.426, "step": 1330 }, { "epoch": 0.52, "learning_rate": 1.9930614886731393e-05, "loss": 0.3641, "step": 1340 }, { "epoch": 0.52, "learning_rate": 1.993009708737864e-05, "loss": 0.3879, "step": 1350 }, { "epoch": 0.53, "learning_rate": 1.9929579288025892e-05, "loss": 0.3104, "step": 1360 }, { "epoch": 0.53, "learning_rate": 1.992906148867314e-05, "loss": 0.415, "step": 1370 }, { "epoch": 0.54, "learning_rate": 1.992854368932039e-05, "loss": 0.3901, "step": 1380 }, { "epoch": 0.54, "learning_rate": 1.992802588996764e-05, "loss": 0.3854, "step": 1390 }, { "epoch": 0.54, "learning_rate": 1.9927508090614887e-05, "loss": 0.2604, "step": 1400 }, { "epoch": 0.55, "learning_rate": 1.992699029126214e-05, "loss": 0.3043, "step": 1410 }, { "epoch": 0.55, "learning_rate": 1.9926472491909386e-05, "loss": 0.3011, "step": 1420 }, { "epoch": 0.56, "learning_rate": 1.9925954692556634e-05, "loss": 0.356, "step": 1430 }, { "epoch": 0.56, "learning_rate": 1.9925436893203886e-05, "loss": 0.3095, "step": 1440 }, { "epoch": 0.56, "learning_rate": 1.9924919093851134e-05, "loss": 0.5059, "step": 1450 }, { "epoch": 0.57, "learning_rate": 1.9924401294498385e-05, "loss": 0.3261, "step": 1460 }, { "epoch": 0.57, "learning_rate": 1.9923883495145633e-05, "loss": 0.434, "step": 1470 }, { "epoch": 0.57, "learning_rate": 1.992336569579288e-05, "loss": 0.4395, "step": 1480 }, { "epoch": 0.58, "learning_rate": 1.9922847896440132e-05, "loss": 0.2863, "step": 1490 }, { "epoch": 0.58, "learning_rate": 1.992233009708738e-05, "loss": 0.4658, "step": 1500 }, { "epoch": 0.59, "learning_rate": 1.9921812297734628e-05, "loss": 0.3183, "step": 1510 }, { "epoch": 0.59, "learning_rate": 1.992129449838188e-05, "loss": 0.2679, "step": 1520 }, { "epoch": 0.59, "learning_rate": 1.9920776699029127e-05, "loss": 0.2732, "step": 1530 }, { "epoch": 0.6, "learning_rate": 1.992025889967638e-05, "loss": 0.2877, "step": 1540 }, { "epoch": 0.6, "learning_rate": 1.9919741100323626e-05, "loss": 0.3261, "step": 1550 }, { "epoch": 0.61, "learning_rate": 1.9919223300970874e-05, "loss": 0.3296, "step": 1560 }, { "epoch": 0.61, "learning_rate": 1.9918705501618126e-05, "loss": 0.3115, "step": 1570 }, { "epoch": 0.61, "learning_rate": 1.9918187702265374e-05, "loss": 0.3859, "step": 1580 }, { "epoch": 0.62, "learning_rate": 1.991766990291262e-05, "loss": 0.3414, "step": 1590 }, { "epoch": 0.62, "learning_rate": 1.9917152103559873e-05, "loss": 0.3421, "step": 1600 }, { "epoch": 0.63, "learning_rate": 1.991663430420712e-05, "loss": 0.3558, "step": 1610 }, { "epoch": 0.63, "learning_rate": 1.9916116504854372e-05, "loss": 0.3691, "step": 1620 }, { "epoch": 0.63, "learning_rate": 1.991559870550162e-05, "loss": 0.2435, "step": 1630 }, { "epoch": 0.64, "learning_rate": 1.9915080906148868e-05, "loss": 0.283, "step": 1640 }, { "epoch": 0.64, "learning_rate": 1.991456310679612e-05, "loss": 0.3628, "step": 1650 }, { "epoch": 0.64, "learning_rate": 1.9914045307443367e-05, "loss": 0.3772, "step": 1660 }, { "epoch": 0.65, "learning_rate": 1.9913527508090615e-05, "loss": 0.3549, "step": 1670 }, { "epoch": 0.65, "learning_rate": 1.9913009708737866e-05, "loss": 0.2832, "step": 1680 }, { "epoch": 0.66, "learning_rate": 1.9912491909385114e-05, "loss": 0.3916, "step": 1690 }, { "epoch": 0.66, "learning_rate": 1.9911974110032362e-05, "loss": 0.4128, "step": 1700 }, { "epoch": 0.66, "learning_rate": 1.9911456310679613e-05, "loss": 0.3781, "step": 1710 }, { "epoch": 0.67, "learning_rate": 1.991093851132686e-05, "loss": 0.4316, "step": 1720 }, { "epoch": 0.67, "learning_rate": 1.9910420711974113e-05, "loss": 0.4245, "step": 1730 }, { "epoch": 0.68, "learning_rate": 1.990990291262136e-05, "loss": 0.2219, "step": 1740 }, { "epoch": 0.68, "learning_rate": 1.9909385113268612e-05, "loss": 0.3784, "step": 1750 }, { "epoch": 0.68, "learning_rate": 1.990886731391586e-05, "loss": 0.1817, "step": 1760 }, { "epoch": 0.69, "learning_rate": 1.9908349514563108e-05, "loss": 0.3053, "step": 1770 }, { "epoch": 0.69, "learning_rate": 1.9907831715210356e-05, "loss": 0.3578, "step": 1780 }, { "epoch": 0.7, "learning_rate": 1.9907313915857607e-05, "loss": 0.5767, "step": 1790 }, { "epoch": 0.7, "learning_rate": 1.9906796116504855e-05, "loss": 0.5208, "step": 1800 }, { "epoch": 0.7, "learning_rate": 1.9906278317152106e-05, "loss": 0.2593, "step": 1810 }, { "epoch": 0.71, "learning_rate": 1.9905760517799354e-05, "loss": 0.2761, "step": 1820 }, { "epoch": 0.71, "learning_rate": 1.9905242718446606e-05, "loss": 0.3884, "step": 1830 }, { "epoch": 0.71, "learning_rate": 1.9904724919093853e-05, "loss": 0.2963, "step": 1840 }, { "epoch": 0.72, "learning_rate": 1.99042071197411e-05, "loss": 0.3972, "step": 1850 }, { "epoch": 0.72, "learning_rate": 1.990368932038835e-05, "loss": 0.407, "step": 1860 }, { "epoch": 0.73, "learning_rate": 1.99031715210356e-05, "loss": 0.4125, "step": 1870 }, { "epoch": 0.73, "learning_rate": 1.990265372168285e-05, "loss": 0.3526, "step": 1880 }, { "epoch": 0.73, "learning_rate": 1.99021359223301e-05, "loss": 0.2346, "step": 1890 }, { "epoch": 0.74, "learning_rate": 1.9901618122977348e-05, "loss": 0.4111, "step": 1900 }, { "epoch": 0.74, "learning_rate": 1.99011003236246e-05, "loss": 0.3567, "step": 1910 }, { "epoch": 0.75, "learning_rate": 1.9900582524271847e-05, "loss": 0.3123, "step": 1920 }, { "epoch": 0.75, "learning_rate": 1.9900064724919095e-05, "loss": 0.447, "step": 1930 }, { "epoch": 0.75, "learning_rate": 1.9899546925566343e-05, "loss": 0.2182, "step": 1940 }, { "epoch": 0.76, "learning_rate": 1.9899029126213594e-05, "loss": 0.196, "step": 1950 }, { "epoch": 0.76, "learning_rate": 1.9898511326860842e-05, "loss": 0.3428, "step": 1960 }, { "epoch": 0.77, "learning_rate": 1.9897993527508093e-05, "loss": 0.4228, "step": 1970 }, { "epoch": 0.77, "learning_rate": 1.989747572815534e-05, "loss": 0.4352, "step": 1980 }, { "epoch": 0.77, "learning_rate": 1.9896957928802593e-05, "loss": 0.3245, "step": 1990 }, { "epoch": 0.78, "learning_rate": 1.9896440129449837e-05, "loss": 0.5161, "step": 2000 }, { "epoch": 0.78, "learning_rate": 1.989592233009709e-05, "loss": 0.3086, "step": 2010 }, { "epoch": 0.78, "learning_rate": 1.9895404530744336e-05, "loss": 0.3461, "step": 2020 }, { "epoch": 0.79, "learning_rate": 1.9894886731391588e-05, "loss": 0.5122, "step": 2030 }, { "epoch": 0.79, "learning_rate": 1.9894368932038836e-05, "loss": 0.339, "step": 2040 }, { "epoch": 0.8, "learning_rate": 1.9893851132686087e-05, "loss": 0.2856, "step": 2050 }, { "epoch": 0.8, "learning_rate": 1.9893333333333335e-05, "loss": 0.3003, "step": 2060 }, { "epoch": 0.8, "learning_rate": 1.9892815533980586e-05, "loss": 0.374, "step": 2070 }, { "epoch": 0.81, "learning_rate": 1.989229773462783e-05, "loss": 0.234, "step": 2080 }, { "epoch": 0.81, "learning_rate": 1.9891779935275082e-05, "loss": 0.2748, "step": 2090 }, { "epoch": 0.82, "learning_rate": 1.989126213592233e-05, "loss": 0.4099, "step": 2100 }, { "epoch": 0.82, "learning_rate": 1.989074433656958e-05, "loss": 0.3507, "step": 2110 }, { "epoch": 0.82, "learning_rate": 1.989022653721683e-05, "loss": 0.3572, "step": 2120 }, { "epoch": 0.83, "learning_rate": 1.988970873786408e-05, "loss": 0.3164, "step": 2130 }, { "epoch": 0.83, "learning_rate": 1.988919093851133e-05, "loss": 0.1405, "step": 2140 }, { "epoch": 0.83, "learning_rate": 1.988867313915858e-05, "loss": 0.3479, "step": 2150 }, { "epoch": 0.84, "learning_rate": 1.9888155339805824e-05, "loss": 0.3915, "step": 2160 }, { "epoch": 0.84, "learning_rate": 1.9887637540453076e-05, "loss": 0.1772, "step": 2170 }, { "epoch": 0.85, "learning_rate": 1.9887119741100324e-05, "loss": 0.1413, "step": 2180 }, { "epoch": 0.85, "learning_rate": 1.9886601941747575e-05, "loss": 0.4333, "step": 2190 }, { "epoch": 0.85, "learning_rate": 1.9886084142394823e-05, "loss": 0.3713, "step": 2200 }, { "epoch": 0.86, "learning_rate": 1.9885566343042074e-05, "loss": 0.2158, "step": 2210 }, { "epoch": 0.86, "learning_rate": 1.9885048543689322e-05, "loss": 0.3854, "step": 2220 }, { "epoch": 0.87, "learning_rate": 1.9884530744336573e-05, "loss": 0.2415, "step": 2230 }, { "epoch": 0.87, "learning_rate": 1.9884012944983818e-05, "loss": 0.2313, "step": 2240 }, { "epoch": 0.87, "learning_rate": 1.988349514563107e-05, "loss": 0.1163, "step": 2250 }, { "epoch": 0.88, "learning_rate": 1.9882977346278317e-05, "loss": 0.4688, "step": 2260 }, { "epoch": 0.88, "learning_rate": 1.988245954692557e-05, "loss": 0.3287, "step": 2270 }, { "epoch": 0.89, "learning_rate": 1.9881941747572816e-05, "loss": 0.4998, "step": 2280 }, { "epoch": 0.89, "learning_rate": 1.9881423948220068e-05, "loss": 0.3809, "step": 2290 }, { "epoch": 0.89, "learning_rate": 1.9880906148867316e-05, "loss": 0.3248, "step": 2300 }, { "epoch": 0.9, "learning_rate": 1.9880388349514567e-05, "loss": 0.294, "step": 2310 }, { "epoch": 0.9, "learning_rate": 1.9879870550161815e-05, "loss": 0.3172, "step": 2320 }, { "epoch": 0.9, "learning_rate": 1.9879352750809063e-05, "loss": 0.0986, "step": 2330 }, { "epoch": 0.91, "learning_rate": 1.987883495145631e-05, "loss": 0.3691, "step": 2340 }, { "epoch": 0.91, "learning_rate": 1.9878317152103562e-05, "loss": 0.2916, "step": 2350 }, { "epoch": 0.92, "learning_rate": 1.987779935275081e-05, "loss": 0.3384, "step": 2360 }, { "epoch": 0.92, "learning_rate": 1.987728155339806e-05, "loss": 0.2748, "step": 2370 }, { "epoch": 0.92, "learning_rate": 1.987676375404531e-05, "loss": 0.4302, "step": 2380 }, { "epoch": 0.93, "learning_rate": 1.987624595469256e-05, "loss": 0.1998, "step": 2390 }, { "epoch": 0.93, "learning_rate": 1.987572815533981e-05, "loss": 0.1732, "step": 2400 }, { "epoch": 0.94, "learning_rate": 1.9875210355987056e-05, "loss": 0.2847, "step": 2410 }, { "epoch": 0.94, "learning_rate": 1.9874692556634304e-05, "loss": 0.3177, "step": 2420 }, { "epoch": 0.94, "learning_rate": 1.9874174757281555e-05, "loss": 0.259, "step": 2430 }, { "epoch": 0.95, "learning_rate": 1.9873656957928803e-05, "loss": 0.3444, "step": 2440 }, { "epoch": 0.95, "learning_rate": 1.9873139158576055e-05, "loss": 0.3259, "step": 2450 }, { "epoch": 0.96, "learning_rate": 1.9872621359223303e-05, "loss": 0.3542, "step": 2460 }, { "epoch": 0.96, "learning_rate": 1.9872103559870554e-05, "loss": 0.3646, "step": 2470 }, { "epoch": 0.96, "learning_rate": 1.9871585760517802e-05, "loss": 0.3131, "step": 2480 }, { "epoch": 0.97, "learning_rate": 1.987106796116505e-05, "loss": 0.2609, "step": 2490 }, { "epoch": 0.97, "learning_rate": 1.9870550161812298e-05, "loss": 0.135, "step": 2500 }, { "epoch": 0.97, "learning_rate": 1.987003236245955e-05, "loss": 0.3044, "step": 2510 }, { "epoch": 0.98, "learning_rate": 1.9869514563106797e-05, "loss": 0.3589, "step": 2520 }, { "epoch": 0.98, "learning_rate": 1.9868996763754048e-05, "loss": 0.3952, "step": 2530 }, { "epoch": 0.99, "learning_rate": 1.9868478964401296e-05, "loss": 0.2184, "step": 2540 }, { "epoch": 0.99, "learning_rate": 1.9867961165048548e-05, "loss": 0.237, "step": 2550 }, { "epoch": 0.99, "learning_rate": 1.9867443365695795e-05, "loss": 0.2093, "step": 2560 }, { "epoch": 1.0, "learning_rate": 1.9866925566343043e-05, "loss": 0.2639, "step": 2570 }, { "epoch": 1.0, "eval_accuracy": 0.946079779917469, "eval_loss": 0.21915487945079803, "eval_runtime": 8.8172, "eval_samples_per_second": 412.261, "eval_steps_per_second": 51.603, "step": 2575 }, { "epoch": 1.0, "learning_rate": 1.986640776699029e-05, "loss": 0.268, "step": 2580 }, { "epoch": 1.01, "learning_rate": 1.9865889967637543e-05, "loss": 0.2991, "step": 2590 }, { "epoch": 1.01, "learning_rate": 1.986537216828479e-05, "loss": 0.3589, "step": 2600 }, { "epoch": 1.01, "learning_rate": 1.9864854368932042e-05, "loss": 0.2938, "step": 2610 }, { "epoch": 1.02, "learning_rate": 1.986433656957929e-05, "loss": 0.3323, "step": 2620 }, { "epoch": 1.02, "learning_rate": 1.986381877022654e-05, "loss": 0.3702, "step": 2630 }, { "epoch": 1.03, "learning_rate": 1.986330097087379e-05, "loss": 0.2629, "step": 2640 }, { "epoch": 1.03, "learning_rate": 1.9862783171521037e-05, "loss": 0.3314, "step": 2650 }, { "epoch": 1.03, "learning_rate": 1.9862265372168285e-05, "loss": 0.2506, "step": 2660 }, { "epoch": 1.04, "learning_rate": 1.9861747572815536e-05, "loss": 0.2867, "step": 2670 }, { "epoch": 1.04, "learning_rate": 1.9861229773462784e-05, "loss": 0.2409, "step": 2680 }, { "epoch": 1.04, "learning_rate": 1.9860711974110035e-05, "loss": 0.2056, "step": 2690 }, { "epoch": 1.05, "learning_rate": 1.9860194174757283e-05, "loss": 0.3161, "step": 2700 }, { "epoch": 1.05, "learning_rate": 1.9859676375404535e-05, "loss": 0.2486, "step": 2710 }, { "epoch": 1.06, "learning_rate": 1.9859158576051783e-05, "loss": 0.2606, "step": 2720 }, { "epoch": 1.06, "learning_rate": 1.985864077669903e-05, "loss": 0.3479, "step": 2730 }, { "epoch": 1.06, "learning_rate": 1.985812297734628e-05, "loss": 0.3214, "step": 2740 }, { "epoch": 1.07, "learning_rate": 1.985760517799353e-05, "loss": 0.2541, "step": 2750 }, { "epoch": 1.07, "learning_rate": 1.9857087378640778e-05, "loss": 0.3196, "step": 2760 }, { "epoch": 1.08, "learning_rate": 1.985656957928803e-05, "loss": 0.229, "step": 2770 }, { "epoch": 1.08, "learning_rate": 1.9856051779935277e-05, "loss": 0.2583, "step": 2780 }, { "epoch": 1.08, "learning_rate": 1.9855533980582525e-05, "loss": 0.2678, "step": 2790 }, { "epoch": 1.09, "learning_rate": 1.9855016181229776e-05, "loss": 0.2552, "step": 2800 }, { "epoch": 1.09, "learning_rate": 1.9854498381877024e-05, "loss": 0.2432, "step": 2810 }, { "epoch": 1.1, "learning_rate": 1.9853980582524272e-05, "loss": 0.2503, "step": 2820 }, { "epoch": 1.1, "learning_rate": 1.9853462783171523e-05, "loss": 0.3042, "step": 2830 }, { "epoch": 1.1, "learning_rate": 1.985294498381877e-05, "loss": 0.2171, "step": 2840 }, { "epoch": 1.11, "learning_rate": 1.9852427184466022e-05, "loss": 0.2555, "step": 2850 }, { "epoch": 1.11, "learning_rate": 1.985190938511327e-05, "loss": 0.2951, "step": 2860 }, { "epoch": 1.11, "learning_rate": 1.985139158576052e-05, "loss": 0.2772, "step": 2870 }, { "epoch": 1.12, "learning_rate": 1.985087378640777e-05, "loss": 0.2971, "step": 2880 }, { "epoch": 1.12, "learning_rate": 1.9850355987055018e-05, "loss": 0.304, "step": 2890 }, { "epoch": 1.13, "learning_rate": 1.9849838187702266e-05, "loss": 0.3845, "step": 2900 }, { "epoch": 1.13, "learning_rate": 1.9849320388349517e-05, "loss": 0.24, "step": 2910 }, { "epoch": 1.13, "learning_rate": 1.9848802588996765e-05, "loss": 0.3067, "step": 2920 }, { "epoch": 1.14, "learning_rate": 1.9848284789644016e-05, "loss": 0.3467, "step": 2930 }, { "epoch": 1.14, "learning_rate": 1.9847766990291264e-05, "loss": 0.1481, "step": 2940 }, { "epoch": 1.15, "learning_rate": 1.9847249190938512e-05, "loss": 0.1497, "step": 2950 }, { "epoch": 1.15, "learning_rate": 1.9846731391585763e-05, "loss": 0.3703, "step": 2960 }, { "epoch": 1.15, "learning_rate": 1.984621359223301e-05, "loss": 0.192, "step": 2970 }, { "epoch": 1.16, "learning_rate": 1.984569579288026e-05, "loss": 0.3424, "step": 2980 }, { "epoch": 1.16, "learning_rate": 1.984517799352751e-05, "loss": 0.2612, "step": 2990 }, { "epoch": 1.17, "learning_rate": 1.9844660194174758e-05, "loss": 0.1468, "step": 3000 }, { "epoch": 1.17, "learning_rate": 1.984414239482201e-05, "loss": 0.176, "step": 3010 }, { "epoch": 1.17, "learning_rate": 1.9843624595469258e-05, "loss": 0.2704, "step": 3020 }, { "epoch": 1.18, "learning_rate": 1.9843106796116505e-05, "loss": 0.1322, "step": 3030 }, { "epoch": 1.18, "learning_rate": 1.9842588996763757e-05, "loss": 0.3755, "step": 3040 }, { "epoch": 1.18, "learning_rate": 1.9842071197411005e-05, "loss": 0.165, "step": 3050 }, { "epoch": 1.19, "learning_rate": 1.9841553398058253e-05, "loss": 0.298, "step": 3060 }, { "epoch": 1.19, "learning_rate": 1.9841035598705504e-05, "loss": 0.1262, "step": 3070 }, { "epoch": 1.2, "learning_rate": 1.9840517799352752e-05, "loss": 0.2392, "step": 3080 }, { "epoch": 1.2, "learning_rate": 1.9840000000000003e-05, "loss": 0.3062, "step": 3090 }, { "epoch": 1.2, "learning_rate": 1.983948220064725e-05, "loss": 0.251, "step": 3100 }, { "epoch": 1.21, "learning_rate": 1.98389644012945e-05, "loss": 0.2077, "step": 3110 }, { "epoch": 1.21, "learning_rate": 1.983844660194175e-05, "loss": 0.362, "step": 3120 }, { "epoch": 1.22, "learning_rate": 1.9837928802588998e-05, "loss": 0.2112, "step": 3130 }, { "epoch": 1.22, "learning_rate": 1.9837411003236246e-05, "loss": 0.2425, "step": 3140 }, { "epoch": 1.22, "learning_rate": 1.9836893203883497e-05, "loss": 0.2709, "step": 3150 }, { "epoch": 1.23, "learning_rate": 1.9836375404530745e-05, "loss": 0.2759, "step": 3160 }, { "epoch": 1.23, "learning_rate": 1.9835857605177993e-05, "loss": 0.2465, "step": 3170 }, { "epoch": 1.23, "learning_rate": 1.9835339805825245e-05, "loss": 0.2674, "step": 3180 }, { "epoch": 1.24, "learning_rate": 1.9834822006472493e-05, "loss": 0.2807, "step": 3190 }, { "epoch": 1.24, "learning_rate": 1.9834304207119744e-05, "loss": 0.1559, "step": 3200 }, { "epoch": 1.25, "learning_rate": 1.9833786407766992e-05, "loss": 0.2219, "step": 3210 }, { "epoch": 1.25, "learning_rate": 1.983326860841424e-05, "loss": 0.2027, "step": 3220 }, { "epoch": 1.25, "learning_rate": 1.983275080906149e-05, "loss": 0.1939, "step": 3230 }, { "epoch": 1.26, "learning_rate": 1.983223300970874e-05, "loss": 0.4326, "step": 3240 }, { "epoch": 1.26, "learning_rate": 1.9831715210355987e-05, "loss": 0.1498, "step": 3250 }, { "epoch": 1.27, "learning_rate": 1.9831197411003238e-05, "loss": 0.5611, "step": 3260 }, { "epoch": 1.27, "learning_rate": 1.9830679611650486e-05, "loss": 0.4854, "step": 3270 }, { "epoch": 1.27, "learning_rate": 1.9830161812297737e-05, "loss": 0.2406, "step": 3280 }, { "epoch": 1.28, "learning_rate": 1.9829644012944985e-05, "loss": 0.1788, "step": 3290 }, { "epoch": 1.28, "learning_rate": 1.9829126213592233e-05, "loss": 0.3951, "step": 3300 }, { "epoch": 1.29, "learning_rate": 1.9828608414239485e-05, "loss": 0.1912, "step": 3310 }, { "epoch": 1.29, "learning_rate": 1.9828090614886733e-05, "loss": 0.1423, "step": 3320 }, { "epoch": 1.29, "learning_rate": 1.982757281553398e-05, "loss": 0.3095, "step": 3330 }, { "epoch": 1.3, "learning_rate": 1.9827055016181232e-05, "loss": 0.3002, "step": 3340 }, { "epoch": 1.3, "learning_rate": 1.982653721682848e-05, "loss": 0.2693, "step": 3350 }, { "epoch": 1.3, "learning_rate": 1.982601941747573e-05, "loss": 0.5431, "step": 3360 }, { "epoch": 1.31, "learning_rate": 1.982550161812298e-05, "loss": 0.2523, "step": 3370 }, { "epoch": 1.31, "learning_rate": 1.982498381877023e-05, "loss": 0.3174, "step": 3380 }, { "epoch": 1.32, "learning_rate": 1.9824466019417478e-05, "loss": 0.2084, "step": 3390 }, { "epoch": 1.32, "learning_rate": 1.9823948220064726e-05, "loss": 0.2482, "step": 3400 }, { "epoch": 1.32, "learning_rate": 1.9823430420711974e-05, "loss": 0.1637, "step": 3410 }, { "epoch": 1.33, "learning_rate": 1.9822912621359225e-05, "loss": 0.4316, "step": 3420 }, { "epoch": 1.33, "learning_rate": 1.9822394822006473e-05, "loss": 0.1623, "step": 3430 }, { "epoch": 1.34, "learning_rate": 1.9821877022653725e-05, "loss": 0.2505, "step": 3440 }, { "epoch": 1.34, "learning_rate": 1.9821359223300972e-05, "loss": 0.282, "step": 3450 }, { "epoch": 1.34, "learning_rate": 1.9820841423948224e-05, "loss": 0.2962, "step": 3460 }, { "epoch": 1.35, "learning_rate": 1.982032362459547e-05, "loss": 0.1224, "step": 3470 }, { "epoch": 1.35, "learning_rate": 1.981980582524272e-05, "loss": 0.3132, "step": 3480 }, { "epoch": 1.36, "learning_rate": 1.9819288025889968e-05, "loss": 0.4015, "step": 3490 }, { "epoch": 1.36, "learning_rate": 1.981877022653722e-05, "loss": 0.3483, "step": 3500 }, { "epoch": 1.36, "learning_rate": 1.9818252427184467e-05, "loss": 0.2043, "step": 3510 }, { "epoch": 1.37, "learning_rate": 1.9817734627831718e-05, "loss": 0.3625, "step": 3520 }, { "epoch": 1.37, "learning_rate": 1.9817216828478966e-05, "loss": 0.2839, "step": 3530 }, { "epoch": 1.37, "learning_rate": 1.9816699029126217e-05, "loss": 0.2373, "step": 3540 }, { "epoch": 1.38, "learning_rate": 1.9816181229773462e-05, "loss": 0.2288, "step": 3550 }, { "epoch": 1.38, "learning_rate": 1.9815663430420713e-05, "loss": 0.2, "step": 3560 }, { "epoch": 1.39, "learning_rate": 1.981514563106796e-05, "loss": 0.3061, "step": 3570 }, { "epoch": 1.39, "learning_rate": 1.9814627831715212e-05, "loss": 0.2568, "step": 3580 }, { "epoch": 1.39, "learning_rate": 1.981411003236246e-05, "loss": 0.3587, "step": 3590 }, { "epoch": 1.4, "learning_rate": 1.981359223300971e-05, "loss": 0.4764, "step": 3600 }, { "epoch": 1.4, "learning_rate": 1.981307443365696e-05, "loss": 0.2079, "step": 3610 }, { "epoch": 1.41, "learning_rate": 1.981255663430421e-05, "loss": 0.2546, "step": 3620 }, { "epoch": 1.41, "learning_rate": 1.9812038834951455e-05, "loss": 0.3153, "step": 3630 }, { "epoch": 1.41, "learning_rate": 1.9811521035598707e-05, "loss": 0.1584, "step": 3640 }, { "epoch": 1.42, "learning_rate": 1.9811003236245955e-05, "loss": 0.4659, "step": 3650 }, { "epoch": 1.42, "learning_rate": 1.9810485436893206e-05, "loss": 0.1902, "step": 3660 }, { "epoch": 1.43, "learning_rate": 1.9809967637540454e-05, "loss": 0.2489, "step": 3670 }, { "epoch": 1.43, "learning_rate": 1.9809449838187705e-05, "loss": 0.4111, "step": 3680 }, { "epoch": 1.43, "learning_rate": 1.9808932038834953e-05, "loss": 0.3518, "step": 3690 }, { "epoch": 1.44, "learning_rate": 1.9808414239482204e-05, "loss": 0.3978, "step": 3700 }, { "epoch": 1.44, "learning_rate": 1.980789644012945e-05, "loss": 0.1893, "step": 3710 }, { "epoch": 1.44, "learning_rate": 1.98073786407767e-05, "loss": 0.2453, "step": 3720 }, { "epoch": 1.45, "learning_rate": 1.9806860841423948e-05, "loss": 0.2749, "step": 3730 }, { "epoch": 1.45, "learning_rate": 1.98063430420712e-05, "loss": 0.2735, "step": 3740 }, { "epoch": 1.46, "learning_rate": 1.9805825242718447e-05, "loss": 0.2187, "step": 3750 }, { "epoch": 1.46, "learning_rate": 1.98053074433657e-05, "loss": 0.266, "step": 3760 }, { "epoch": 1.46, "learning_rate": 1.9804789644012947e-05, "loss": 0.2829, "step": 3770 }, { "epoch": 1.47, "learning_rate": 1.9804271844660198e-05, "loss": 0.1069, "step": 3780 }, { "epoch": 1.47, "learning_rate": 1.9803754045307443e-05, "loss": 0.4782, "step": 3790 }, { "epoch": 1.48, "learning_rate": 1.9803236245954694e-05, "loss": 0.3305, "step": 3800 }, { "epoch": 1.48, "learning_rate": 1.9802718446601942e-05, "loss": 0.2876, "step": 3810 }, { "epoch": 1.48, "learning_rate": 1.9802200647249193e-05, "loss": 0.2173, "step": 3820 }, { "epoch": 1.49, "learning_rate": 1.980168284789644e-05, "loss": 0.3115, "step": 3830 }, { "epoch": 1.49, "learning_rate": 1.9801165048543692e-05, "loss": 0.266, "step": 3840 }, { "epoch": 1.5, "learning_rate": 1.980064724919094e-05, "loss": 0.4971, "step": 3850 }, { "epoch": 1.5, "learning_rate": 1.980012944983819e-05, "loss": 0.1597, "step": 3860 }, { "epoch": 1.5, "learning_rate": 1.9799611650485436e-05, "loss": 0.3279, "step": 3870 }, { "epoch": 1.51, "learning_rate": 1.9799093851132687e-05, "loss": 0.2791, "step": 3880 }, { "epoch": 1.51, "learning_rate": 1.9798576051779935e-05, "loss": 0.2035, "step": 3890 }, { "epoch": 1.51, "learning_rate": 1.9798058252427187e-05, "loss": 0.3005, "step": 3900 }, { "epoch": 1.52, "learning_rate": 1.9797540453074435e-05, "loss": 0.2286, "step": 3910 }, { "epoch": 1.52, "learning_rate": 1.9797022653721686e-05, "loss": 0.2433, "step": 3920 }, { "epoch": 1.53, "learning_rate": 1.9796504854368934e-05, "loss": 0.1744, "step": 3930 }, { "epoch": 1.53, "learning_rate": 1.9795987055016185e-05, "loss": 0.2935, "step": 3940 }, { "epoch": 1.53, "learning_rate": 1.9795469255663433e-05, "loss": 0.2748, "step": 3950 }, { "epoch": 1.54, "learning_rate": 1.979495145631068e-05, "loss": 0.1726, "step": 3960 }, { "epoch": 1.54, "learning_rate": 1.979443365695793e-05, "loss": 0.3466, "step": 3970 }, { "epoch": 1.55, "learning_rate": 1.979391585760518e-05, "loss": 0.3159, "step": 3980 }, { "epoch": 1.55, "learning_rate": 1.9793398058252428e-05, "loss": 0.2478, "step": 3990 }, { "epoch": 1.55, "learning_rate": 1.979288025889968e-05, "loss": 0.3374, "step": 4000 }, { "epoch": 1.56, "learning_rate": 1.9792362459546927e-05, "loss": 0.2616, "step": 4010 }, { "epoch": 1.56, "learning_rate": 1.979184466019418e-05, "loss": 0.43, "step": 4020 }, { "epoch": 1.57, "learning_rate": 1.9791326860841427e-05, "loss": 0.1363, "step": 4030 }, { "epoch": 1.57, "learning_rate": 1.9790809061488675e-05, "loss": 0.1238, "step": 4040 }, { "epoch": 1.57, "learning_rate": 1.9790291262135922e-05, "loss": 0.3047, "step": 4050 }, { "epoch": 1.58, "learning_rate": 1.9789773462783174e-05, "loss": 0.1906, "step": 4060 }, { "epoch": 1.58, "learning_rate": 1.978925566343042e-05, "loss": 0.3596, "step": 4070 }, { "epoch": 1.58, "learning_rate": 1.9788737864077673e-05, "loss": 0.3239, "step": 4080 }, { "epoch": 1.59, "learning_rate": 1.978822006472492e-05, "loss": 0.4306, "step": 4090 }, { "epoch": 1.59, "learning_rate": 1.9787702265372172e-05, "loss": 0.1729, "step": 4100 }, { "epoch": 1.6, "learning_rate": 1.978718446601942e-05, "loss": 0.4153, "step": 4110 }, { "epoch": 1.6, "learning_rate": 1.9786666666666668e-05, "loss": 0.4504, "step": 4120 }, { "epoch": 1.6, "learning_rate": 1.9786148867313916e-05, "loss": 0.1816, "step": 4130 }, { "epoch": 1.61, "learning_rate": 1.9785631067961167e-05, "loss": 0.2314, "step": 4140 }, { "epoch": 1.61, "learning_rate": 1.9785113268608415e-05, "loss": 0.3013, "step": 4150 }, { "epoch": 1.62, "learning_rate": 1.9784595469255667e-05, "loss": 0.2564, "step": 4160 }, { "epoch": 1.62, "learning_rate": 1.9784077669902914e-05, "loss": 0.1081, "step": 4170 }, { "epoch": 1.62, "learning_rate": 1.9783559870550166e-05, "loss": 0.3437, "step": 4180 }, { "epoch": 1.63, "learning_rate": 1.9783042071197414e-05, "loss": 0.2626, "step": 4190 }, { "epoch": 1.63, "learning_rate": 1.978252427184466e-05, "loss": 0.204, "step": 4200 }, { "epoch": 1.63, "learning_rate": 1.978200647249191e-05, "loss": 0.1679, "step": 4210 }, { "epoch": 1.64, "learning_rate": 1.978148867313916e-05, "loss": 0.3692, "step": 4220 }, { "epoch": 1.64, "learning_rate": 1.978097087378641e-05, "loss": 0.2078, "step": 4230 }, { "epoch": 1.65, "learning_rate": 1.978045307443366e-05, "loss": 0.3114, "step": 4240 }, { "epoch": 1.65, "learning_rate": 1.9779935275080908e-05, "loss": 0.3261, "step": 4250 }, { "epoch": 1.65, "learning_rate": 1.9779417475728156e-05, "loss": 0.3261, "step": 4260 }, { "epoch": 1.66, "learning_rate": 1.9778899676375407e-05, "loss": 0.1267, "step": 4270 }, { "epoch": 1.66, "learning_rate": 1.9778381877022655e-05, "loss": 0.1723, "step": 4280 }, { "epoch": 1.67, "learning_rate": 1.9777864077669903e-05, "loss": 0.2227, "step": 4290 }, { "epoch": 1.67, "learning_rate": 1.9777346278317154e-05, "loss": 0.3133, "step": 4300 }, { "epoch": 1.67, "learning_rate": 1.9776828478964402e-05, "loss": 0.2955, "step": 4310 }, { "epoch": 1.68, "learning_rate": 1.9776310679611654e-05, "loss": 0.2596, "step": 4320 }, { "epoch": 1.68, "learning_rate": 1.97757928802589e-05, "loss": 0.3367, "step": 4330 }, { "epoch": 1.69, "learning_rate": 1.977527508090615e-05, "loss": 0.3484, "step": 4340 }, { "epoch": 1.69, "learning_rate": 1.97747572815534e-05, "loss": 0.2865, "step": 4350 }, { "epoch": 1.69, "learning_rate": 1.977423948220065e-05, "loss": 0.1856, "step": 4360 }, { "epoch": 1.7, "learning_rate": 1.9773721682847897e-05, "loss": 0.3409, "step": 4370 }, { "epoch": 1.7, "learning_rate": 1.9773203883495148e-05, "loss": 0.4301, "step": 4380 }, { "epoch": 1.7, "learning_rate": 1.9772686084142396e-05, "loss": 0.1799, "step": 4390 }, { "epoch": 1.71, "learning_rate": 1.9772168284789647e-05, "loss": 0.227, "step": 4400 }, { "epoch": 1.71, "learning_rate": 1.9771650485436895e-05, "loss": 0.2825, "step": 4410 }, { "epoch": 1.72, "learning_rate": 1.9771132686084143e-05, "loss": 0.1994, "step": 4420 }, { "epoch": 1.72, "learning_rate": 1.9770614886731394e-05, "loss": 0.2616, "step": 4430 }, { "epoch": 1.72, "learning_rate": 1.9770097087378642e-05, "loss": 0.2436, "step": 4440 }, { "epoch": 1.73, "learning_rate": 1.976957928802589e-05, "loss": 0.1199, "step": 4450 }, { "epoch": 1.73, "learning_rate": 1.976906148867314e-05, "loss": 0.3097, "step": 4460 }, { "epoch": 1.74, "learning_rate": 1.976854368932039e-05, "loss": 0.3633, "step": 4470 }, { "epoch": 1.74, "learning_rate": 1.976802588996764e-05, "loss": 0.4737, "step": 4480 }, { "epoch": 1.74, "learning_rate": 1.976750809061489e-05, "loss": 0.4733, "step": 4490 }, { "epoch": 1.75, "learning_rate": 1.9766990291262137e-05, "loss": 0.2922, "step": 4500 }, { "epoch": 1.75, "learning_rate": 1.9766472491909388e-05, "loss": 0.1793, "step": 4510 }, { "epoch": 1.76, "learning_rate": 1.9765954692556636e-05, "loss": 0.2487, "step": 4520 }, { "epoch": 1.76, "learning_rate": 1.9765436893203884e-05, "loss": 0.1645, "step": 4530 }, { "epoch": 1.76, "learning_rate": 1.9764919093851135e-05, "loss": 0.2696, "step": 4540 }, { "epoch": 1.77, "learning_rate": 1.9764401294498383e-05, "loss": 0.1997, "step": 4550 }, { "epoch": 1.77, "learning_rate": 1.9763883495145634e-05, "loss": 0.2653, "step": 4560 }, { "epoch": 1.77, "learning_rate": 1.9763365695792882e-05, "loss": 0.2104, "step": 4570 }, { "epoch": 1.78, "learning_rate": 1.976284789644013e-05, "loss": 0.1888, "step": 4580 }, { "epoch": 1.78, "learning_rate": 1.976233009708738e-05, "loss": 0.3716, "step": 4590 }, { "epoch": 1.79, "learning_rate": 1.976181229773463e-05, "loss": 0.207, "step": 4600 }, { "epoch": 1.79, "learning_rate": 1.9761294498381877e-05, "loss": 0.3744, "step": 4610 }, { "epoch": 1.79, "learning_rate": 1.976077669902913e-05, "loss": 0.4255, "step": 4620 }, { "epoch": 1.8, "learning_rate": 1.9760258899676377e-05, "loss": 0.366, "step": 4630 }, { "epoch": 1.8, "learning_rate": 1.9759741100323625e-05, "loss": 0.3249, "step": 4640 }, { "epoch": 1.81, "learning_rate": 1.9759223300970876e-05, "loss": 0.4179, "step": 4650 }, { "epoch": 1.81, "learning_rate": 1.9758705501618124e-05, "loss": 0.4137, "step": 4660 }, { "epoch": 1.81, "learning_rate": 1.9758187702265375e-05, "loss": 0.2887, "step": 4670 }, { "epoch": 1.82, "learning_rate": 1.9757669902912623e-05, "loss": 0.3047, "step": 4680 }, { "epoch": 1.82, "learning_rate": 1.975715210355987e-05, "loss": 0.2051, "step": 4690 }, { "epoch": 1.83, "learning_rate": 1.9756634304207122e-05, "loss": 0.3425, "step": 4700 }, { "epoch": 1.83, "learning_rate": 1.975611650485437e-05, "loss": 0.3201, "step": 4710 }, { "epoch": 1.83, "learning_rate": 1.9755598705501618e-05, "loss": 0.2463, "step": 4720 }, { "epoch": 1.84, "learning_rate": 1.975508090614887e-05, "loss": 0.3898, "step": 4730 }, { "epoch": 1.84, "learning_rate": 1.9754563106796117e-05, "loss": 0.3263, "step": 4740 }, { "epoch": 1.84, "learning_rate": 1.975404530744337e-05, "loss": 0.0947, "step": 4750 }, { "epoch": 1.85, "learning_rate": 1.9753527508090617e-05, "loss": 0.1428, "step": 4760 }, { "epoch": 1.85, "learning_rate": 1.9753009708737864e-05, "loss": 0.1649, "step": 4770 }, { "epoch": 1.86, "learning_rate": 1.9752491909385116e-05, "loss": 0.2952, "step": 4780 }, { "epoch": 1.86, "learning_rate": 1.9751974110032364e-05, "loss": 0.1883, "step": 4790 }, { "epoch": 1.86, "learning_rate": 1.975145631067961e-05, "loss": 0.3043, "step": 4800 }, { "epoch": 1.87, "learning_rate": 1.9750938511326863e-05, "loss": 0.262, "step": 4810 }, { "epoch": 1.87, "learning_rate": 1.975042071197411e-05, "loss": 0.2134, "step": 4820 }, { "epoch": 1.88, "learning_rate": 1.9749902912621362e-05, "loss": 0.3354, "step": 4830 }, { "epoch": 1.88, "learning_rate": 1.974938511326861e-05, "loss": 0.4463, "step": 4840 }, { "epoch": 1.88, "learning_rate": 1.9748867313915858e-05, "loss": 0.3004, "step": 4850 }, { "epoch": 1.89, "learning_rate": 1.974834951456311e-05, "loss": 0.225, "step": 4860 }, { "epoch": 1.89, "learning_rate": 1.9747831715210357e-05, "loss": 0.3776, "step": 4870 }, { "epoch": 1.9, "learning_rate": 1.9747313915857605e-05, "loss": 0.3347, "step": 4880 }, { "epoch": 1.9, "learning_rate": 1.9746796116504856e-05, "loss": 0.1414, "step": 4890 }, { "epoch": 1.9, "learning_rate": 1.9746278317152104e-05, "loss": 0.1621, "step": 4900 }, { "epoch": 1.91, "learning_rate": 1.9745760517799356e-05, "loss": 0.2268, "step": 4910 }, { "epoch": 1.91, "learning_rate": 1.9745242718446604e-05, "loss": 0.2664, "step": 4920 }, { "epoch": 1.91, "learning_rate": 1.974472491909385e-05, "loss": 0.347, "step": 4930 }, { "epoch": 1.92, "learning_rate": 1.97442071197411e-05, "loss": 0.2537, "step": 4940 }, { "epoch": 1.92, "learning_rate": 1.974368932038835e-05, "loss": 0.3155, "step": 4950 }, { "epoch": 1.93, "learning_rate": 1.97431715210356e-05, "loss": 0.2735, "step": 4960 }, { "epoch": 1.93, "learning_rate": 1.974265372168285e-05, "loss": 0.3012, "step": 4970 }, { "epoch": 1.93, "learning_rate": 1.9742135922330098e-05, "loss": 0.2388, "step": 4980 }, { "epoch": 1.94, "learning_rate": 1.974161812297735e-05, "loss": 0.332, "step": 4990 }, { "epoch": 1.94, "learning_rate": 1.9741100323624597e-05, "loss": 0.114, "step": 5000 }, { "epoch": 1.95, "learning_rate": 1.9740582524271845e-05, "loss": 0.171, "step": 5010 }, { "epoch": 1.95, "learning_rate": 1.9740064724919093e-05, "loss": 0.3387, "step": 5020 }, { "epoch": 1.95, "learning_rate": 1.9739546925566344e-05, "loss": 0.2106, "step": 5030 }, { "epoch": 1.96, "learning_rate": 1.9739029126213592e-05, "loss": 0.2017, "step": 5040 }, { "epoch": 1.96, "learning_rate": 1.9738511326860844e-05, "loss": 0.3964, "step": 5050 }, { "epoch": 1.97, "learning_rate": 1.973799352750809e-05, "loss": 0.2325, "step": 5060 }, { "epoch": 1.97, "learning_rate": 1.9737475728155343e-05, "loss": 0.2173, "step": 5070 }, { "epoch": 1.97, "learning_rate": 1.973695792880259e-05, "loss": 0.3135, "step": 5080 }, { "epoch": 1.98, "learning_rate": 1.9736440129449842e-05, "loss": 0.2401, "step": 5090 }, { "epoch": 1.98, "learning_rate": 1.9735922330097087e-05, "loss": 0.3315, "step": 5100 }, { "epoch": 1.98, "learning_rate": 1.9735404530744338e-05, "loss": 0.5079, "step": 5110 }, { "epoch": 1.99, "learning_rate": 1.9734886731391586e-05, "loss": 0.1738, "step": 5120 }, { "epoch": 1.99, "learning_rate": 1.9734368932038837e-05, "loss": 0.2585, "step": 5130 }, { "epoch": 2.0, "learning_rate": 1.9733851132686085e-05, "loss": 0.3199, "step": 5140 }, { "epoch": 2.0, "learning_rate": 1.9733333333333336e-05, "loss": 0.2457, "step": 5150 }, { "epoch": 2.0, "eval_accuracy": 0.9463548830811555, "eval_loss": 0.20650140941143036, "eval_runtime": 8.2354, "eval_samples_per_second": 441.386, "eval_steps_per_second": 55.249, "step": 5150 }, { "epoch": 2.0, "learning_rate": 1.9732815533980584e-05, "loss": 0.2043, "step": 5160 }, { "epoch": 2.01, "learning_rate": 1.9732297734627836e-05, "loss": 0.3017, "step": 5170 }, { "epoch": 2.01, "learning_rate": 1.973177993527508e-05, "loss": 0.2352, "step": 5180 }, { "epoch": 2.02, "learning_rate": 1.973126213592233e-05, "loss": 0.25, "step": 5190 }, { "epoch": 2.02, "learning_rate": 1.973074433656958e-05, "loss": 0.1942, "step": 5200 }, { "epoch": 2.02, "learning_rate": 1.973022653721683e-05, "loss": 0.1456, "step": 5210 }, { "epoch": 2.03, "learning_rate": 1.972970873786408e-05, "loss": 0.1798, "step": 5220 }, { "epoch": 2.03, "learning_rate": 1.972919093851133e-05, "loss": 0.2216, "step": 5230 }, { "epoch": 2.03, "learning_rate": 1.9728673139158578e-05, "loss": 0.3577, "step": 5240 }, { "epoch": 2.04, "learning_rate": 1.972815533980583e-05, "loss": 0.267, "step": 5250 }, { "epoch": 2.04, "learning_rate": 1.9727637540453074e-05, "loss": 0.179, "step": 5260 }, { "epoch": 2.05, "learning_rate": 1.9727119741100325e-05, "loss": 0.1976, "step": 5270 }, { "epoch": 2.05, "learning_rate": 1.9726601941747573e-05, "loss": 0.2285, "step": 5280 }, { "epoch": 2.05, "learning_rate": 1.9726084142394824e-05, "loss": 0.2729, "step": 5290 }, { "epoch": 2.06, "learning_rate": 1.9725566343042072e-05, "loss": 0.2632, "step": 5300 }, { "epoch": 2.06, "learning_rate": 1.9725048543689324e-05, "loss": 0.3938, "step": 5310 }, { "epoch": 2.07, "learning_rate": 1.972453074433657e-05, "loss": 0.251, "step": 5320 }, { "epoch": 2.07, "learning_rate": 1.9724012944983823e-05, "loss": 0.1824, "step": 5330 }, { "epoch": 2.07, "learning_rate": 1.9723495145631067e-05, "loss": 0.1875, "step": 5340 }, { "epoch": 2.08, "learning_rate": 1.972297734627832e-05, "loss": 0.4658, "step": 5350 }, { "epoch": 2.08, "learning_rate": 1.9722459546925567e-05, "loss": 0.2576, "step": 5360 }, { "epoch": 2.09, "learning_rate": 1.9721941747572818e-05, "loss": 0.2174, "step": 5370 }, { "epoch": 2.09, "learning_rate": 1.9721423948220066e-05, "loss": 0.2031, "step": 5380 }, { "epoch": 2.09, "learning_rate": 1.9720906148867317e-05, "loss": 0.2399, "step": 5390 }, { "epoch": 2.1, "learning_rate": 1.9720388349514565e-05, "loss": 0.2067, "step": 5400 }, { "epoch": 2.1, "learning_rate": 1.9719870550161816e-05, "loss": 0.323, "step": 5410 }, { "epoch": 2.1, "learning_rate": 1.971935275080906e-05, "loss": 0.3193, "step": 5420 }, { "epoch": 2.11, "learning_rate": 1.9718834951456312e-05, "loss": 0.1714, "step": 5430 }, { "epoch": 2.11, "learning_rate": 1.971831715210356e-05, "loss": 0.3194, "step": 5440 }, { "epoch": 2.12, "learning_rate": 1.971779935275081e-05, "loss": 0.3148, "step": 5450 }, { "epoch": 2.12, "learning_rate": 1.971728155339806e-05, "loss": 0.2355, "step": 5460 }, { "epoch": 2.12, "learning_rate": 1.971676375404531e-05, "loss": 0.5888, "step": 5470 }, { "epoch": 2.13, "learning_rate": 1.971624595469256e-05, "loss": 0.1942, "step": 5480 }, { "epoch": 2.13, "learning_rate": 1.971572815533981e-05, "loss": 0.3442, "step": 5490 }, { "epoch": 2.14, "learning_rate": 1.9715210355987054e-05, "loss": 0.2042, "step": 5500 }, { "epoch": 2.14, "learning_rate": 1.9714692556634306e-05, "loss": 0.3018, "step": 5510 }, { "epoch": 2.14, "learning_rate": 1.9714174757281554e-05, "loss": 0.2446, "step": 5520 }, { "epoch": 2.15, "learning_rate": 1.9713656957928805e-05, "loss": 0.0684, "step": 5530 }, { "epoch": 2.15, "learning_rate": 1.9713139158576053e-05, "loss": 0.2362, "step": 5540 }, { "epoch": 2.16, "learning_rate": 1.9712621359223304e-05, "loss": 0.1807, "step": 5550 }, { "epoch": 2.16, "learning_rate": 1.9712103559870552e-05, "loss": 0.3736, "step": 5560 }, { "epoch": 2.16, "learning_rate": 1.9711585760517803e-05, "loss": 0.3552, "step": 5570 }, { "epoch": 2.17, "learning_rate": 1.9711067961165048e-05, "loss": 0.165, "step": 5580 }, { "epoch": 2.17, "learning_rate": 1.97105501618123e-05, "loss": 0.3257, "step": 5590 }, { "epoch": 2.17, "learning_rate": 1.9710032362459547e-05, "loss": 0.2476, "step": 5600 }, { "epoch": 2.18, "learning_rate": 1.97095145631068e-05, "loss": 0.1588, "step": 5610 }, { "epoch": 2.18, "learning_rate": 1.9708996763754046e-05, "loss": 0.1726, "step": 5620 }, { "epoch": 2.19, "learning_rate": 1.9708478964401298e-05, "loss": 0.2349, "step": 5630 }, { "epoch": 2.19, "learning_rate": 1.9707961165048546e-05, "loss": 0.2167, "step": 5640 }, { "epoch": 2.19, "learning_rate": 1.9707443365695797e-05, "loss": 0.1561, "step": 5650 }, { "epoch": 2.2, "learning_rate": 1.9706925566343045e-05, "loss": 0.3793, "step": 5660 }, { "epoch": 2.2, "learning_rate": 1.9706407766990293e-05, "loss": 0.1965, "step": 5670 }, { "epoch": 2.21, "learning_rate": 1.970588996763754e-05, "loss": 0.3141, "step": 5680 }, { "epoch": 2.21, "learning_rate": 1.9705372168284792e-05, "loss": 0.1715, "step": 5690 }, { "epoch": 2.21, "learning_rate": 1.970485436893204e-05, "loss": 0.2172, "step": 5700 }, { "epoch": 2.22, "learning_rate": 1.970433656957929e-05, "loss": 0.1682, "step": 5710 }, { "epoch": 2.22, "learning_rate": 1.970381877022654e-05, "loss": 0.0884, "step": 5720 }, { "epoch": 2.23, "learning_rate": 1.9703300970873787e-05, "loss": 0.1397, "step": 5730 }, { "epoch": 2.23, "learning_rate": 1.970278317152104e-05, "loss": 0.2729, "step": 5740 }, { "epoch": 2.23, "learning_rate": 1.9702265372168286e-05, "loss": 0.4754, "step": 5750 }, { "epoch": 2.24, "learning_rate": 1.9701747572815534e-05, "loss": 0.1378, "step": 5760 }, { "epoch": 2.24, "learning_rate": 1.9701229773462786e-05, "loss": 0.2418, "step": 5770 }, { "epoch": 2.24, "learning_rate": 1.9700711974110034e-05, "loss": 0.1602, "step": 5780 }, { "epoch": 2.25, "learning_rate": 1.9700194174757285e-05, "loss": 0.2374, "step": 5790 }, { "epoch": 2.25, "learning_rate": 1.9699676375404533e-05, "loss": 0.3039, "step": 5800 }, { "epoch": 2.26, "learning_rate": 1.969915857605178e-05, "loss": 0.1992, "step": 5810 }, { "epoch": 2.26, "learning_rate": 1.9698640776699032e-05, "loss": 0.253, "step": 5820 }, { "epoch": 2.26, "learning_rate": 1.969812297734628e-05, "loss": 0.2149, "step": 5830 }, { "epoch": 2.27, "learning_rate": 1.9697605177993528e-05, "loss": 0.2403, "step": 5840 }, { "epoch": 2.27, "learning_rate": 1.969708737864078e-05, "loss": 0.1518, "step": 5850 }, { "epoch": 2.28, "learning_rate": 1.9696569579288027e-05, "loss": 0.1981, "step": 5860 }, { "epoch": 2.28, "learning_rate": 1.969605177993528e-05, "loss": 0.3326, "step": 5870 }, { "epoch": 2.28, "learning_rate": 1.9695533980582526e-05, "loss": 0.1952, "step": 5880 }, { "epoch": 2.29, "learning_rate": 1.9695016181229774e-05, "loss": 0.1975, "step": 5890 }, { "epoch": 2.29, "learning_rate": 1.9694498381877026e-05, "loss": 0.2171, "step": 5900 }, { "epoch": 2.3, "learning_rate": 1.9693980582524273e-05, "loss": 0.2066, "step": 5910 }, { "epoch": 2.3, "learning_rate": 1.969346278317152e-05, "loss": 0.2405, "step": 5920 }, { "epoch": 2.3, "learning_rate": 1.9692944983818773e-05, "loss": 0.0774, "step": 5930 }, { "epoch": 2.31, "learning_rate": 1.969242718446602e-05, "loss": 0.2246, "step": 5940 }, { "epoch": 2.31, "learning_rate": 1.9691909385113272e-05, "loss": 0.3062, "step": 5950 }, { "epoch": 2.31, "learning_rate": 1.969139158576052e-05, "loss": 0.1248, "step": 5960 }, { "epoch": 2.32, "learning_rate": 1.9690873786407768e-05, "loss": 0.2669, "step": 5970 }, { "epoch": 2.32, "learning_rate": 1.969035598705502e-05, "loss": 0.2293, "step": 5980 }, { "epoch": 2.33, "learning_rate": 1.9689838187702267e-05, "loss": 0.1692, "step": 5990 }, { "epoch": 2.33, "learning_rate": 1.9689320388349515e-05, "loss": 0.2158, "step": 6000 }, { "epoch": 2.33, "learning_rate": 1.9688802588996766e-05, "loss": 0.2814, "step": 6010 }, { "epoch": 2.34, "learning_rate": 1.9688284789644014e-05, "loss": 0.3069, "step": 6020 }, { "epoch": 2.34, "learning_rate": 1.9687766990291262e-05, "loss": 0.2581, "step": 6030 }, { "epoch": 2.35, "learning_rate": 1.9687249190938513e-05, "loss": 0.2023, "step": 6040 }, { "epoch": 2.35, "learning_rate": 1.968673139158576e-05, "loss": 0.2735, "step": 6050 }, { "epoch": 2.35, "learning_rate": 1.9686213592233013e-05, "loss": 0.2229, "step": 6060 }, { "epoch": 2.36, "learning_rate": 1.968569579288026e-05, "loss": 0.3472, "step": 6070 }, { "epoch": 2.36, "learning_rate": 1.968517799352751e-05, "loss": 0.2975, "step": 6080 }, { "epoch": 2.37, "learning_rate": 1.968466019417476e-05, "loss": 0.2075, "step": 6090 }, { "epoch": 2.37, "learning_rate": 1.9684142394822008e-05, "loss": 0.2801, "step": 6100 }, { "epoch": 2.37, "learning_rate": 1.9683624595469256e-05, "loss": 0.1996, "step": 6110 }, { "epoch": 2.38, "learning_rate": 1.9683106796116507e-05, "loss": 0.3453, "step": 6120 }, { "epoch": 2.38, "learning_rate": 1.9682588996763755e-05, "loss": 0.252, "step": 6130 }, { "epoch": 2.38, "learning_rate": 1.9682071197411006e-05, "loss": 0.3192, "step": 6140 }, { "epoch": 2.39, "learning_rate": 1.9681553398058254e-05, "loss": 0.2626, "step": 6150 }, { "epoch": 2.39, "learning_rate": 1.9681035598705502e-05, "loss": 0.2132, "step": 6160 }, { "epoch": 2.4, "learning_rate": 1.9680517799352753e-05, "loss": 0.142, "step": 6170 }, { "epoch": 2.4, "learning_rate": 1.968e-05, "loss": 0.3669, "step": 6180 }, { "epoch": 2.4, "learning_rate": 1.967948220064725e-05, "loss": 0.1967, "step": 6190 }, { "epoch": 2.41, "learning_rate": 1.96789644012945e-05, "loss": 0.2668, "step": 6200 }, { "epoch": 2.41, "learning_rate": 1.967844660194175e-05, "loss": 0.4368, "step": 6210 }, { "epoch": 2.42, "learning_rate": 1.9677928802589e-05, "loss": 0.2825, "step": 6220 }, { "epoch": 2.42, "learning_rate": 1.9677411003236248e-05, "loss": 0.3686, "step": 6230 }, { "epoch": 2.42, "learning_rate": 1.9676893203883496e-05, "loss": 0.3099, "step": 6240 }, { "epoch": 2.43, "learning_rate": 1.9676375404530747e-05, "loss": 0.3312, "step": 6250 }, { "epoch": 2.43, "learning_rate": 1.9675857605177995e-05, "loss": 0.1675, "step": 6260 }, { "epoch": 2.43, "learning_rate": 1.9675339805825243e-05, "loss": 0.1346, "step": 6270 }, { "epoch": 2.44, "learning_rate": 1.9674822006472494e-05, "loss": 0.2002, "step": 6280 }, { "epoch": 2.44, "learning_rate": 1.9674304207119742e-05, "loss": 0.0604, "step": 6290 }, { "epoch": 2.45, "learning_rate": 1.9673786407766993e-05, "loss": 0.1218, "step": 6300 }, { "epoch": 2.45, "learning_rate": 1.967326860841424e-05, "loss": 0.2355, "step": 6310 }, { "epoch": 2.45, "learning_rate": 1.967275080906149e-05, "loss": 0.2469, "step": 6320 }, { "epoch": 2.46, "learning_rate": 1.967223300970874e-05, "loss": 0.2579, "step": 6330 }, { "epoch": 2.46, "learning_rate": 1.967171521035599e-05, "loss": 0.2691, "step": 6340 }, { "epoch": 2.47, "learning_rate": 1.9671197411003236e-05, "loss": 0.2664, "step": 6350 }, { "epoch": 2.47, "learning_rate": 1.9670679611650488e-05, "loss": 0.4055, "step": 6360 }, { "epoch": 2.47, "learning_rate": 1.9670161812297736e-05, "loss": 0.2265, "step": 6370 }, { "epoch": 2.48, "learning_rate": 1.9669644012944987e-05, "loss": 0.2793, "step": 6380 }, { "epoch": 2.48, "learning_rate": 1.9669126213592235e-05, "loss": 0.1894, "step": 6390 }, { "epoch": 2.49, "learning_rate": 1.9668608414239483e-05, "loss": 0.1919, "step": 6400 }, { "epoch": 2.49, "learning_rate": 1.966809061488673e-05, "loss": 0.229, "step": 6410 }, { "epoch": 2.49, "learning_rate": 1.9667572815533982e-05, "loss": 0.2536, "step": 6420 }, { "epoch": 2.5, "learning_rate": 1.966705501618123e-05, "loss": 0.4089, "step": 6430 }, { "epoch": 2.5, "learning_rate": 1.966653721682848e-05, "loss": 0.1393, "step": 6440 }, { "epoch": 2.5, "learning_rate": 1.966601941747573e-05, "loss": 0.1711, "step": 6450 }, { "epoch": 2.51, "learning_rate": 1.966550161812298e-05, "loss": 0.2937, "step": 6460 }, { "epoch": 2.51, "learning_rate": 1.966498381877023e-05, "loss": 0.314, "step": 6470 }, { "epoch": 2.52, "learning_rate": 1.9664466019417476e-05, "loss": 0.3018, "step": 6480 }, { "epoch": 2.52, "learning_rate": 1.9663948220064724e-05, "loss": 0.4462, "step": 6490 }, { "epoch": 2.52, "learning_rate": 1.9663430420711976e-05, "loss": 0.0828, "step": 6500 }, { "epoch": 2.53, "learning_rate": 1.9662912621359223e-05, "loss": 0.1944, "step": 6510 }, { "epoch": 2.53, "learning_rate": 1.9662394822006475e-05, "loss": 0.3108, "step": 6520 }, { "epoch": 2.54, "learning_rate": 1.9661877022653723e-05, "loss": 0.1355, "step": 6530 }, { "epoch": 2.54, "learning_rate": 1.9661359223300974e-05, "loss": 0.4539, "step": 6540 }, { "epoch": 2.54, "learning_rate": 1.9660841423948222e-05, "loss": 0.2266, "step": 6550 }, { "epoch": 2.55, "learning_rate": 1.966032362459547e-05, "loss": 0.1012, "step": 6560 }, { "epoch": 2.55, "learning_rate": 1.9659805825242718e-05, "loss": 0.2279, "step": 6570 }, { "epoch": 2.56, "learning_rate": 1.965928802588997e-05, "loss": 0.1487, "step": 6580 }, { "epoch": 2.56, "learning_rate": 1.9658770226537217e-05, "loss": 0.2495, "step": 6590 }, { "epoch": 2.56, "learning_rate": 1.965825242718447e-05, "loss": 0.2881, "step": 6600 }, { "epoch": 2.57, "learning_rate": 1.9657734627831716e-05, "loss": 0.4532, "step": 6610 }, { "epoch": 2.57, "learning_rate": 1.9657216828478968e-05, "loss": 0.1642, "step": 6620 }, { "epoch": 2.57, "learning_rate": 1.9656699029126215e-05, "loss": 0.2477, "step": 6630 }, { "epoch": 2.58, "learning_rate": 1.9656181229773463e-05, "loss": 0.1566, "step": 6640 }, { "epoch": 2.58, "learning_rate": 1.965566343042071e-05, "loss": 0.3306, "step": 6650 }, { "epoch": 2.59, "learning_rate": 1.9655145631067963e-05, "loss": 0.232, "step": 6660 }, { "epoch": 2.59, "learning_rate": 1.965462783171521e-05, "loss": 0.1865, "step": 6670 }, { "epoch": 2.59, "learning_rate": 1.9654110032362462e-05, "loss": 0.3633, "step": 6680 }, { "epoch": 2.6, "learning_rate": 1.965359223300971e-05, "loss": 0.3195, "step": 6690 }, { "epoch": 2.6, "learning_rate": 1.965307443365696e-05, "loss": 0.1437, "step": 6700 }, { "epoch": 2.61, "learning_rate": 1.965255663430421e-05, "loss": 0.2772, "step": 6710 }, { "epoch": 2.61, "learning_rate": 1.9652038834951457e-05, "loss": 0.4101, "step": 6720 }, { "epoch": 2.61, "learning_rate": 1.9651521035598705e-05, "loss": 0.1842, "step": 6730 }, { "epoch": 2.62, "learning_rate": 1.9651003236245956e-05, "loss": 0.2516, "step": 6740 }, { "epoch": 2.62, "learning_rate": 1.9650485436893204e-05, "loss": 0.3446, "step": 6750 }, { "epoch": 2.63, "learning_rate": 1.9649967637540455e-05, "loss": 0.3189, "step": 6760 }, { "epoch": 2.63, "learning_rate": 1.9649449838187703e-05, "loss": 0.159, "step": 6770 }, { "epoch": 2.63, "learning_rate": 1.9648932038834955e-05, "loss": 0.1236, "step": 6780 }, { "epoch": 2.64, "learning_rate": 1.9648414239482203e-05, "loss": 0.2854, "step": 6790 }, { "epoch": 2.64, "learning_rate": 1.9647896440129454e-05, "loss": 0.1754, "step": 6800 }, { "epoch": 2.64, "learning_rate": 1.96473786407767e-05, "loss": 0.2756, "step": 6810 }, { "epoch": 2.65, "learning_rate": 1.964686084142395e-05, "loss": 0.5186, "step": 6820 }, { "epoch": 2.65, "learning_rate": 1.9646343042071198e-05, "loss": 0.2379, "step": 6830 }, { "epoch": 2.66, "learning_rate": 1.964582524271845e-05, "loss": 0.3095, "step": 6840 }, { "epoch": 2.66, "learning_rate": 1.9645307443365697e-05, "loss": 0.1464, "step": 6850 }, { "epoch": 2.66, "learning_rate": 1.9644789644012948e-05, "loss": 0.213, "step": 6860 }, { "epoch": 2.67, "learning_rate": 1.9644271844660196e-05, "loss": 0.1876, "step": 6870 }, { "epoch": 2.67, "learning_rate": 1.9643754045307447e-05, "loss": 0.1391, "step": 6880 }, { "epoch": 2.68, "learning_rate": 1.9643236245954692e-05, "loss": 0.1769, "step": 6890 }, { "epoch": 2.68, "learning_rate": 1.9642718446601943e-05, "loss": 0.4605, "step": 6900 }, { "epoch": 2.68, "learning_rate": 1.964220064724919e-05, "loss": 0.229, "step": 6910 }, { "epoch": 2.69, "learning_rate": 1.9641682847896443e-05, "loss": 0.2681, "step": 6920 }, { "epoch": 2.69, "learning_rate": 1.964116504854369e-05, "loss": 0.3891, "step": 6930 }, { "epoch": 2.7, "learning_rate": 1.9640647249190942e-05, "loss": 0.2151, "step": 6940 }, { "epoch": 2.7, "learning_rate": 1.964012944983819e-05, "loss": 0.4364, "step": 6950 }, { "epoch": 2.7, "learning_rate": 1.963961165048544e-05, "loss": 0.3009, "step": 6960 }, { "epoch": 2.71, "learning_rate": 1.9639093851132686e-05, "loss": 0.0845, "step": 6970 }, { "epoch": 2.71, "learning_rate": 1.9638576051779937e-05, "loss": 0.1373, "step": 6980 }, { "epoch": 2.71, "learning_rate": 1.9638058252427185e-05, "loss": 0.1446, "step": 6990 }, { "epoch": 2.72, "learning_rate": 1.9637540453074436e-05, "loss": 0.1031, "step": 7000 }, { "epoch": 2.72, "learning_rate": 1.9637022653721684e-05, "loss": 0.2145, "step": 7010 }, { "epoch": 2.73, "learning_rate": 1.9636504854368935e-05, "loss": 0.1547, "step": 7020 }, { "epoch": 2.73, "learning_rate": 1.9635987055016183e-05, "loss": 0.2423, "step": 7030 }, { "epoch": 2.73, "learning_rate": 1.9635469255663435e-05, "loss": 0.4544, "step": 7040 }, { "epoch": 2.74, "learning_rate": 1.963495145631068e-05, "loss": 0.421, "step": 7050 }, { "epoch": 2.74, "learning_rate": 1.963443365695793e-05, "loss": 0.3462, "step": 7060 }, { "epoch": 2.75, "learning_rate": 1.963391585760518e-05, "loss": 0.1668, "step": 7070 }, { "epoch": 2.75, "learning_rate": 1.963339805825243e-05, "loss": 0.1112, "step": 7080 }, { "epoch": 2.75, "learning_rate": 1.9632880258899678e-05, "loss": 0.1069, "step": 7090 }, { "epoch": 2.76, "learning_rate": 1.963236245954693e-05, "loss": 0.1501, "step": 7100 }, { "epoch": 2.76, "learning_rate": 1.9631844660194177e-05, "loss": 0.2587, "step": 7110 }, { "epoch": 2.77, "learning_rate": 1.9631326860841428e-05, "loss": 0.163, "step": 7120 }, { "epoch": 2.77, "learning_rate": 1.9630809061488673e-05, "loss": 0.3376, "step": 7130 }, { "epoch": 2.77, "learning_rate": 1.9630291262135924e-05, "loss": 0.2132, "step": 7140 }, { "epoch": 2.78, "learning_rate": 1.9629773462783172e-05, "loss": 0.2756, "step": 7150 }, { "epoch": 2.78, "learning_rate": 1.9629255663430423e-05, "loss": 0.22, "step": 7160 }, { "epoch": 2.78, "learning_rate": 1.962873786407767e-05, "loss": 0.1825, "step": 7170 }, { "epoch": 2.79, "learning_rate": 1.9628220064724922e-05, "loss": 0.204, "step": 7180 }, { "epoch": 2.79, "learning_rate": 1.962770226537217e-05, "loss": 0.1456, "step": 7190 }, { "epoch": 2.8, "learning_rate": 1.962718446601942e-05, "loss": 0.0983, "step": 7200 }, { "epoch": 2.8, "learning_rate": 1.9626666666666666e-05, "loss": 0.252, "step": 7210 }, { "epoch": 2.8, "learning_rate": 1.9626148867313918e-05, "loss": 0.1811, "step": 7220 }, { "epoch": 2.81, "learning_rate": 1.9625631067961165e-05, "loss": 0.2915, "step": 7230 }, { "epoch": 2.81, "learning_rate": 1.9625113268608417e-05, "loss": 0.3266, "step": 7240 }, { "epoch": 2.82, "learning_rate": 1.9624595469255665e-05, "loss": 0.3237, "step": 7250 }, { "epoch": 2.82, "learning_rate": 1.9624077669902916e-05, "loss": 0.475, "step": 7260 }, { "epoch": 2.82, "learning_rate": 1.9623559870550164e-05, "loss": 0.4171, "step": 7270 }, { "epoch": 2.83, "learning_rate": 1.9623042071197412e-05, "loss": 0.4969, "step": 7280 }, { "epoch": 2.83, "learning_rate": 1.9622524271844663e-05, "loss": 0.1132, "step": 7290 }, { "epoch": 2.83, "learning_rate": 1.962200647249191e-05, "loss": 0.1173, "step": 7300 }, { "epoch": 2.84, "learning_rate": 1.962148867313916e-05, "loss": 0.2134, "step": 7310 }, { "epoch": 2.84, "learning_rate": 1.962097087378641e-05, "loss": 0.1437, "step": 7320 }, { "epoch": 2.85, "learning_rate": 1.9620453074433658e-05, "loss": 0.1454, "step": 7330 }, { "epoch": 2.85, "learning_rate": 1.961993527508091e-05, "loss": 0.1514, "step": 7340 }, { "epoch": 2.85, "learning_rate": 1.9619417475728157e-05, "loss": 0.2684, "step": 7350 }, { "epoch": 2.86, "learning_rate": 1.9618899676375405e-05, "loss": 0.2487, "step": 7360 }, { "epoch": 2.86, "learning_rate": 1.9618381877022657e-05, "loss": 0.2238, "step": 7370 }, { "epoch": 2.87, "learning_rate": 1.9617864077669905e-05, "loss": 0.137, "step": 7380 }, { "epoch": 2.87, "learning_rate": 1.9617346278317153e-05, "loss": 0.3095, "step": 7390 }, { "epoch": 2.87, "learning_rate": 1.9616828478964404e-05, "loss": 0.3518, "step": 7400 }, { "epoch": 2.88, "learning_rate": 1.9616310679611652e-05, "loss": 0.1554, "step": 7410 }, { "epoch": 2.88, "learning_rate": 1.9615792880258903e-05, "loss": 0.2279, "step": 7420 }, { "epoch": 2.89, "learning_rate": 1.961527508090615e-05, "loss": 0.4632, "step": 7430 }, { "epoch": 2.89, "learning_rate": 1.96147572815534e-05, "loss": 0.2284, "step": 7440 }, { "epoch": 2.89, "learning_rate": 1.961423948220065e-05, "loss": 0.1091, "step": 7450 }, { "epoch": 2.9, "learning_rate": 1.9613721682847898e-05, "loss": 0.2456, "step": 7460 }, { "epoch": 2.9, "learning_rate": 1.9613203883495146e-05, "loss": 0.1793, "step": 7470 }, { "epoch": 2.9, "learning_rate": 1.9612686084142397e-05, "loss": 0.3559, "step": 7480 }, { "epoch": 2.91, "learning_rate": 1.9612168284789645e-05, "loss": 0.2734, "step": 7490 }, { "epoch": 2.91, "learning_rate": 1.9611650485436893e-05, "loss": 0.3259, "step": 7500 }, { "epoch": 2.92, "learning_rate": 1.9611132686084145e-05, "loss": 0.157, "step": 7510 }, { "epoch": 2.92, "learning_rate": 1.9610614886731393e-05, "loss": 0.2073, "step": 7520 }, { "epoch": 2.92, "learning_rate": 1.9610097087378644e-05, "loss": 0.1661, "step": 7530 }, { "epoch": 2.93, "learning_rate": 1.9609579288025892e-05, "loss": 0.2655, "step": 7540 }, { "epoch": 2.93, "learning_rate": 1.960906148867314e-05, "loss": 0.438, "step": 7550 }, { "epoch": 2.94, "learning_rate": 1.960854368932039e-05, "loss": 0.2627, "step": 7560 }, { "epoch": 2.94, "learning_rate": 1.960802588996764e-05, "loss": 0.1767, "step": 7570 }, { "epoch": 2.94, "learning_rate": 1.9607508090614887e-05, "loss": 0.1808, "step": 7580 }, { "epoch": 2.95, "learning_rate": 1.9606990291262138e-05, "loss": 0.1576, "step": 7590 }, { "epoch": 2.95, "learning_rate": 1.9606472491909386e-05, "loss": 0.1789, "step": 7600 }, { "epoch": 2.96, "learning_rate": 1.9605954692556637e-05, "loss": 0.0895, "step": 7610 }, { "epoch": 2.96, "learning_rate": 1.9605436893203885e-05, "loss": 0.2316, "step": 7620 }, { "epoch": 2.96, "learning_rate": 1.9604919093851133e-05, "loss": 0.1546, "step": 7630 }, { "epoch": 2.97, "learning_rate": 1.9604401294498385e-05, "loss": 0.1032, "step": 7640 }, { "epoch": 2.97, "learning_rate": 1.9603883495145632e-05, "loss": 0.2615, "step": 7650 }, { "epoch": 2.97, "learning_rate": 1.960336569579288e-05, "loss": 0.2983, "step": 7660 }, { "epoch": 2.98, "learning_rate": 1.9602847896440132e-05, "loss": 0.1713, "step": 7670 }, { "epoch": 2.98, "learning_rate": 1.960233009708738e-05, "loss": 0.3184, "step": 7680 }, { "epoch": 2.99, "learning_rate": 1.960181229773463e-05, "loss": 0.2289, "step": 7690 }, { "epoch": 2.99, "learning_rate": 1.960129449838188e-05, "loss": 0.1238, "step": 7700 }, { "epoch": 2.99, "learning_rate": 1.9600776699029127e-05, "loss": 0.3203, "step": 7710 }, { "epoch": 3.0, "learning_rate": 1.9600258899676378e-05, "loss": 0.3157, "step": 7720 }, { "epoch": 3.0, "eval_accuracy": 0.9515818431911967, "eval_loss": 0.18269842863082886, "eval_runtime": 8.2373, "eval_samples_per_second": 441.284, "eval_steps_per_second": 55.236, "step": 7725 }, { "epoch": 3.0, "learning_rate": 1.9599741100323626e-05, "loss": 0.0376, "step": 7730 }, { "epoch": 3.01, "learning_rate": 1.9599223300970874e-05, "loss": 0.4194, "step": 7740 }, { "epoch": 3.01, "learning_rate": 1.9598705501618125e-05, "loss": 0.1455, "step": 7750 }, { "epoch": 3.01, "learning_rate": 1.9598187702265373e-05, "loss": 0.4301, "step": 7760 }, { "epoch": 3.02, "learning_rate": 1.9597669902912625e-05, "loss": 0.2781, "step": 7770 }, { "epoch": 3.02, "learning_rate": 1.9597152103559872e-05, "loss": 0.3277, "step": 7780 }, { "epoch": 3.03, "learning_rate": 1.959663430420712e-05, "loss": 0.106, "step": 7790 }, { "epoch": 3.03, "learning_rate": 1.959611650485437e-05, "loss": 0.2019, "step": 7800 }, { "epoch": 3.03, "learning_rate": 1.959559870550162e-05, "loss": 0.1731, "step": 7810 }, { "epoch": 3.04, "learning_rate": 1.9595080906148868e-05, "loss": 0.3396, "step": 7820 }, { "epoch": 3.04, "learning_rate": 1.959456310679612e-05, "loss": 0.4465, "step": 7830 }, { "epoch": 3.04, "learning_rate": 1.9594045307443367e-05, "loss": 0.3698, "step": 7840 }, { "epoch": 3.05, "learning_rate": 1.9593527508090618e-05, "loss": 0.2267, "step": 7850 }, { "epoch": 3.05, "learning_rate": 1.9593009708737866e-05, "loss": 0.3049, "step": 7860 }, { "epoch": 3.06, "learning_rate": 1.9592491909385114e-05, "loss": 0.2227, "step": 7870 }, { "epoch": 3.06, "learning_rate": 1.9591974110032362e-05, "loss": 0.168, "step": 7880 }, { "epoch": 3.06, "learning_rate": 1.9591456310679613e-05, "loss": 0.3125, "step": 7890 }, { "epoch": 3.07, "learning_rate": 1.959093851132686e-05, "loss": 0.2256, "step": 7900 }, { "epoch": 3.07, "learning_rate": 1.9590420711974112e-05, "loss": 0.2052, "step": 7910 }, { "epoch": 3.08, "learning_rate": 1.958990291262136e-05, "loss": 0.2826, "step": 7920 }, { "epoch": 3.08, "learning_rate": 1.958938511326861e-05, "loss": 0.2107, "step": 7930 }, { "epoch": 3.08, "learning_rate": 1.958886731391586e-05, "loss": 0.3363, "step": 7940 }, { "epoch": 3.09, "learning_rate": 1.9588349514563107e-05, "loss": 0.221, "step": 7950 }, { "epoch": 3.09, "learning_rate": 1.9587831715210355e-05, "loss": 0.0989, "step": 7960 }, { "epoch": 3.1, "learning_rate": 1.9587313915857607e-05, "loss": 0.1815, "step": 7970 }, { "epoch": 3.1, "learning_rate": 1.9586796116504855e-05, "loss": 0.1745, "step": 7980 }, { "epoch": 3.1, "learning_rate": 1.9586278317152106e-05, "loss": 0.1451, "step": 7990 }, { "epoch": 3.11, "learning_rate": 1.9585760517799354e-05, "loss": 0.1384, "step": 8000 }, { "epoch": 3.11, "learning_rate": 1.9585242718446605e-05, "loss": 0.1165, "step": 8010 }, { "epoch": 3.11, "learning_rate": 1.9584724919093853e-05, "loss": 0.137, "step": 8020 }, { "epoch": 3.12, "learning_rate": 1.95842071197411e-05, "loss": 0.3212, "step": 8030 }, { "epoch": 3.12, "learning_rate": 1.958368932038835e-05, "loss": 0.2297, "step": 8040 }, { "epoch": 3.13, "learning_rate": 1.95831715210356e-05, "loss": 0.5524, "step": 8050 }, { "epoch": 3.13, "learning_rate": 1.9582653721682848e-05, "loss": 0.2841, "step": 8060 }, { "epoch": 3.13, "learning_rate": 1.95821359223301e-05, "loss": 0.1424, "step": 8070 }, { "epoch": 3.14, "learning_rate": 1.9581618122977347e-05, "loss": 0.1698, "step": 8080 }, { "epoch": 3.14, "learning_rate": 1.95811003236246e-05, "loss": 0.2587, "step": 8090 }, { "epoch": 3.15, "learning_rate": 1.9580582524271847e-05, "loss": 0.1922, "step": 8100 }, { "epoch": 3.15, "learning_rate": 1.9580064724919095e-05, "loss": 0.1846, "step": 8110 }, { "epoch": 3.15, "learning_rate": 1.9579546925566343e-05, "loss": 0.2551, "step": 8120 }, { "epoch": 3.16, "learning_rate": 1.9579029126213594e-05, "loss": 0.3196, "step": 8130 }, { "epoch": 3.16, "learning_rate": 1.9578511326860842e-05, "loss": 0.1019, "step": 8140 }, { "epoch": 3.17, "learning_rate": 1.9577993527508093e-05, "loss": 0.1025, "step": 8150 }, { "epoch": 3.17, "learning_rate": 1.957747572815534e-05, "loss": 0.152, "step": 8160 }, { "epoch": 3.17, "learning_rate": 1.9576957928802592e-05, "loss": 0.0749, "step": 8170 }, { "epoch": 3.18, "learning_rate": 1.957644012944984e-05, "loss": 0.1116, "step": 8180 }, { "epoch": 3.18, "learning_rate": 1.9575922330097088e-05, "loss": 0.2604, "step": 8190 }, { "epoch": 3.18, "learning_rate": 1.9575404530744336e-05, "loss": 0.4164, "step": 8200 }, { "epoch": 3.19, "learning_rate": 1.9574886731391587e-05, "loss": 0.1808, "step": 8210 }, { "epoch": 3.19, "learning_rate": 1.9574368932038835e-05, "loss": 0.2979, "step": 8220 }, { "epoch": 3.2, "learning_rate": 1.9573851132686087e-05, "loss": 0.2347, "step": 8230 }, { "epoch": 3.2, "learning_rate": 1.9573333333333335e-05, "loss": 0.2159, "step": 8240 }, { "epoch": 3.2, "learning_rate": 1.9572815533980586e-05, "loss": 0.4439, "step": 8250 }, { "epoch": 3.21, "learning_rate": 1.9572297734627834e-05, "loss": 0.2083, "step": 8260 }, { "epoch": 3.21, "learning_rate": 1.9571779935275082e-05, "loss": 0.0448, "step": 8270 }, { "epoch": 3.22, "learning_rate": 1.957126213592233e-05, "loss": 0.2388, "step": 8280 }, { "epoch": 3.22, "learning_rate": 1.957074433656958e-05, "loss": 0.1335, "step": 8290 }, { "epoch": 3.22, "learning_rate": 1.957022653721683e-05, "loss": 0.4199, "step": 8300 }, { "epoch": 3.23, "learning_rate": 1.956970873786408e-05, "loss": 0.1894, "step": 8310 }, { "epoch": 3.23, "learning_rate": 1.9569190938511328e-05, "loss": 0.1142, "step": 8320 }, { "epoch": 3.23, "learning_rate": 1.956867313915858e-05, "loss": 0.1971, "step": 8330 }, { "epoch": 3.24, "learning_rate": 1.9568155339805827e-05, "loss": 0.2359, "step": 8340 }, { "epoch": 3.24, "learning_rate": 1.9567637540453075e-05, "loss": 0.2582, "step": 8350 }, { "epoch": 3.25, "learning_rate": 1.9567119741100323e-05, "loss": 0.1369, "step": 8360 }, { "epoch": 3.25, "learning_rate": 1.9566601941747574e-05, "loss": 0.3086, "step": 8370 }, { "epoch": 3.25, "learning_rate": 1.9566084142394822e-05, "loss": 0.3685, "step": 8380 }, { "epoch": 3.26, "learning_rate": 1.9565566343042074e-05, "loss": 0.3185, "step": 8390 }, { "epoch": 3.26, "learning_rate": 1.956504854368932e-05, "loss": 0.2913, "step": 8400 }, { "epoch": 3.27, "learning_rate": 1.9564530744336573e-05, "loss": 0.139, "step": 8410 }, { "epoch": 3.27, "learning_rate": 1.956401294498382e-05, "loss": 0.2689, "step": 8420 }, { "epoch": 3.27, "learning_rate": 1.9563495145631072e-05, "loss": 0.2636, "step": 8430 }, { "epoch": 3.28, "learning_rate": 1.9562977346278317e-05, "loss": 0.2229, "step": 8440 }, { "epoch": 3.28, "learning_rate": 1.9562459546925568e-05, "loss": 0.1888, "step": 8450 }, { "epoch": 3.29, "learning_rate": 1.9561941747572816e-05, "loss": 0.1098, "step": 8460 }, { "epoch": 3.29, "learning_rate": 1.9561423948220067e-05, "loss": 0.13, "step": 8470 }, { "epoch": 3.29, "learning_rate": 1.9560906148867315e-05, "loss": 0.1429, "step": 8480 }, { "epoch": 3.3, "learning_rate": 1.9560388349514567e-05, "loss": 0.2206, "step": 8490 }, { "epoch": 3.3, "learning_rate": 1.9559870550161814e-05, "loss": 0.1083, "step": 8500 }, { "epoch": 3.3, "learning_rate": 1.9559352750809066e-05, "loss": 0.0738, "step": 8510 }, { "epoch": 3.31, "learning_rate": 1.955883495145631e-05, "loss": 0.2183, "step": 8520 }, { "epoch": 3.31, "learning_rate": 1.955831715210356e-05, "loss": 0.1192, "step": 8530 }, { "epoch": 3.32, "learning_rate": 1.955779935275081e-05, "loss": 0.3058, "step": 8540 }, { "epoch": 3.32, "learning_rate": 1.955728155339806e-05, "loss": 0.3926, "step": 8550 }, { "epoch": 3.32, "learning_rate": 1.955676375404531e-05, "loss": 0.1216, "step": 8560 }, { "epoch": 3.33, "learning_rate": 1.955624595469256e-05, "loss": 0.2993, "step": 8570 }, { "epoch": 3.33, "learning_rate": 1.9555728155339808e-05, "loss": 0.2026, "step": 8580 }, { "epoch": 3.34, "learning_rate": 1.955521035598706e-05, "loss": 0.2474, "step": 8590 }, { "epoch": 3.34, "learning_rate": 1.9554692556634304e-05, "loss": 0.1775, "step": 8600 }, { "epoch": 3.34, "learning_rate": 1.9554174757281555e-05, "loss": 0.2943, "step": 8610 }, { "epoch": 3.35, "learning_rate": 1.9553656957928803e-05, "loss": 0.2321, "step": 8620 }, { "epoch": 3.35, "learning_rate": 1.9553139158576054e-05, "loss": 0.148, "step": 8630 }, { "epoch": 3.36, "learning_rate": 1.9552621359223302e-05, "loss": 0.1469, "step": 8640 }, { "epoch": 3.36, "learning_rate": 1.9552103559870554e-05, "loss": 0.1932, "step": 8650 }, { "epoch": 3.36, "learning_rate": 1.95515857605178e-05, "loss": 0.1786, "step": 8660 }, { "epoch": 3.37, "learning_rate": 1.955106796116505e-05, "loss": 0.3065, "step": 8670 }, { "epoch": 3.37, "learning_rate": 1.9550550161812297e-05, "loss": 0.125, "step": 8680 }, { "epoch": 3.37, "learning_rate": 1.955003236245955e-05, "loss": 0.1871, "step": 8690 }, { "epoch": 3.38, "learning_rate": 1.9549514563106797e-05, "loss": 0.41, "step": 8700 }, { "epoch": 3.38, "learning_rate": 1.9548996763754048e-05, "loss": 0.1822, "step": 8710 }, { "epoch": 3.39, "learning_rate": 1.9548478964401296e-05, "loss": 0.1299, "step": 8720 }, { "epoch": 3.39, "learning_rate": 1.9547961165048547e-05, "loss": 0.194, "step": 8730 }, { "epoch": 3.39, "learning_rate": 1.9547443365695795e-05, "loss": 0.1477, "step": 8740 }, { "epoch": 3.4, "learning_rate": 1.9546925566343043e-05, "loss": 0.4681, "step": 8750 }, { "epoch": 3.4, "learning_rate": 1.954640776699029e-05, "loss": 0.4321, "step": 8760 }, { "epoch": 3.41, "learning_rate": 1.9545889967637542e-05, "loss": 0.2347, "step": 8770 }, { "epoch": 3.41, "learning_rate": 1.954537216828479e-05, "loss": 0.2222, "step": 8780 }, { "epoch": 3.41, "learning_rate": 1.954485436893204e-05, "loss": 0.116, "step": 8790 }, { "epoch": 3.42, "learning_rate": 1.954433656957929e-05, "loss": 0.1101, "step": 8800 }, { "epoch": 3.42, "learning_rate": 1.954381877022654e-05, "loss": 0.1199, "step": 8810 }, { "epoch": 3.43, "learning_rate": 1.954330097087379e-05, "loss": 0.2333, "step": 8820 }, { "epoch": 3.43, "learning_rate": 1.9542783171521037e-05, "loss": 0.291, "step": 8830 }, { "epoch": 3.43, "learning_rate": 1.9542265372168285e-05, "loss": 0.1248, "step": 8840 }, { "epoch": 3.44, "learning_rate": 1.9541747572815536e-05, "loss": 0.2109, "step": 8850 }, { "epoch": 3.44, "learning_rate": 1.9541229773462784e-05, "loss": 0.1833, "step": 8860 }, { "epoch": 3.44, "learning_rate": 1.9540711974110035e-05, "loss": 0.196, "step": 8870 }, { "epoch": 3.45, "learning_rate": 1.9540194174757283e-05, "loss": 0.1072, "step": 8880 }, { "epoch": 3.45, "learning_rate": 1.9539676375404534e-05, "loss": 0.2757, "step": 8890 }, { "epoch": 3.46, "learning_rate": 1.9539158576051782e-05, "loss": 0.2158, "step": 8900 }, { "epoch": 3.46, "learning_rate": 1.953864077669903e-05, "loss": 0.3908, "step": 8910 }, { "epoch": 3.46, "learning_rate": 1.9538122977346278e-05, "loss": 0.4817, "step": 8920 }, { "epoch": 3.47, "learning_rate": 1.953760517799353e-05, "loss": 0.2603, "step": 8930 }, { "epoch": 3.47, "learning_rate": 1.9537087378640777e-05, "loss": 0.0576, "step": 8940 }, { "epoch": 3.48, "learning_rate": 1.953656957928803e-05, "loss": 0.0653, "step": 8950 }, { "epoch": 3.48, "learning_rate": 1.9536051779935277e-05, "loss": 0.1654, "step": 8960 }, { "epoch": 3.48, "learning_rate": 1.9535533980582524e-05, "loss": 0.2734, "step": 8970 }, { "epoch": 3.49, "learning_rate": 1.9535016181229776e-05, "loss": 0.2598, "step": 8980 }, { "epoch": 3.49, "learning_rate": 1.9534498381877024e-05, "loss": 0.2994, "step": 8990 }, { "epoch": 3.5, "learning_rate": 1.9533980582524275e-05, "loss": 0.1238, "step": 9000 }, { "epoch": 3.5, "learning_rate": 1.9533462783171523e-05, "loss": 0.1811, "step": 9010 }, { "epoch": 3.5, "learning_rate": 1.953294498381877e-05, "loss": 0.3206, "step": 9020 }, { "epoch": 3.51, "learning_rate": 1.9532427184466022e-05, "loss": 0.2931, "step": 9030 }, { "epoch": 3.51, "learning_rate": 1.953190938511327e-05, "loss": 0.2408, "step": 9040 }, { "epoch": 3.51, "learning_rate": 1.9531391585760518e-05, "loss": 0.2155, "step": 9050 }, { "epoch": 3.52, "learning_rate": 1.953087378640777e-05, "loss": 0.2846, "step": 9060 }, { "epoch": 3.52, "learning_rate": 1.9530355987055017e-05, "loss": 0.3123, "step": 9070 }, { "epoch": 3.53, "learning_rate": 1.952983818770227e-05, "loss": 0.0593, "step": 9080 }, { "epoch": 3.53, "learning_rate": 1.9529320388349516e-05, "loss": 0.2538, "step": 9090 }, { "epoch": 3.53, "learning_rate": 1.9528802588996764e-05, "loss": 0.3458, "step": 9100 }, { "epoch": 3.54, "learning_rate": 1.9528284789644016e-05, "loss": 0.1533, "step": 9110 }, { "epoch": 3.54, "learning_rate": 1.9527766990291264e-05, "loss": 0.2968, "step": 9120 }, { "epoch": 3.55, "learning_rate": 1.952724919093851e-05, "loss": 0.1229, "step": 9130 }, { "epoch": 3.55, "learning_rate": 1.9526731391585763e-05, "loss": 0.1086, "step": 9140 }, { "epoch": 3.55, "learning_rate": 1.952621359223301e-05, "loss": 0.2779, "step": 9150 }, { "epoch": 3.56, "learning_rate": 1.9525695792880262e-05, "loss": 0.2477, "step": 9160 }, { "epoch": 3.56, "learning_rate": 1.952517799352751e-05, "loss": 0.1254, "step": 9170 }, { "epoch": 3.57, "learning_rate": 1.9524660194174758e-05, "loss": 0.3448, "step": 9180 }, { "epoch": 3.57, "learning_rate": 1.952414239482201e-05, "loss": 0.2008, "step": 9190 }, { "epoch": 3.57, "learning_rate": 1.9523624595469257e-05, "loss": 0.1633, "step": 9200 }, { "epoch": 3.58, "learning_rate": 1.9523106796116505e-05, "loss": 0.1776, "step": 9210 }, { "epoch": 3.58, "learning_rate": 1.9522588996763756e-05, "loss": 0.2706, "step": 9220 }, { "epoch": 3.58, "learning_rate": 1.9522071197411004e-05, "loss": 0.1618, "step": 9230 }, { "epoch": 3.59, "learning_rate": 1.9521553398058256e-05, "loss": 0.2364, "step": 9240 }, { "epoch": 3.59, "learning_rate": 1.9521035598705504e-05, "loss": 0.2281, "step": 9250 }, { "epoch": 3.6, "learning_rate": 1.952051779935275e-05, "loss": 0.1844, "step": 9260 }, { "epoch": 3.6, "learning_rate": 1.9520000000000003e-05, "loss": 0.3725, "step": 9270 }, { "epoch": 3.6, "learning_rate": 1.951948220064725e-05, "loss": 0.208, "step": 9280 }, { "epoch": 3.61, "learning_rate": 1.95189644012945e-05, "loss": 0.1317, "step": 9290 }, { "epoch": 3.61, "learning_rate": 1.951844660194175e-05, "loss": 0.2119, "step": 9300 }, { "epoch": 3.62, "learning_rate": 1.9517928802588998e-05, "loss": 0.1847, "step": 9310 }, { "epoch": 3.62, "learning_rate": 1.951741100323625e-05, "loss": 0.1502, "step": 9320 }, { "epoch": 3.62, "learning_rate": 1.9516893203883497e-05, "loss": 0.0884, "step": 9330 }, { "epoch": 3.63, "learning_rate": 1.9516375404530745e-05, "loss": 0.3843, "step": 9340 }, { "epoch": 3.63, "learning_rate": 1.9515857605177993e-05, "loss": 0.2725, "step": 9350 }, { "epoch": 3.63, "learning_rate": 1.9515339805825244e-05, "loss": 0.0753, "step": 9360 }, { "epoch": 3.64, "learning_rate": 1.9514822006472492e-05, "loss": 0.3185, "step": 9370 }, { "epoch": 3.64, "learning_rate": 1.9514304207119744e-05, "loss": 0.0921, "step": 9380 }, { "epoch": 3.65, "learning_rate": 1.951378640776699e-05, "loss": 0.2169, "step": 9390 }, { "epoch": 3.65, "learning_rate": 1.9513268608414243e-05, "loss": 0.1702, "step": 9400 }, { "epoch": 3.65, "learning_rate": 1.951275080906149e-05, "loss": 0.2917, "step": 9410 }, { "epoch": 3.66, "learning_rate": 1.951223300970874e-05, "loss": 0.1971, "step": 9420 }, { "epoch": 3.66, "learning_rate": 1.9511715210355987e-05, "loss": 0.2863, "step": 9430 }, { "epoch": 3.67, "learning_rate": 1.9511197411003238e-05, "loss": 0.1765, "step": 9440 }, { "epoch": 3.67, "learning_rate": 1.9510679611650486e-05, "loss": 0.2719, "step": 9450 }, { "epoch": 3.67, "learning_rate": 1.9510161812297737e-05, "loss": 0.2328, "step": 9460 }, { "epoch": 3.68, "learning_rate": 1.9509644012944985e-05, "loss": 0.2412, "step": 9470 }, { "epoch": 3.68, "learning_rate": 1.9509126213592236e-05, "loss": 0.2867, "step": 9480 }, { "epoch": 3.69, "learning_rate": 1.9508608414239484e-05, "loss": 0.1914, "step": 9490 }, { "epoch": 3.69, "learning_rate": 1.9508090614886732e-05, "loss": 0.1623, "step": 9500 }, { "epoch": 3.69, "learning_rate": 1.950757281553398e-05, "loss": 0.1388, "step": 9510 }, { "epoch": 3.7, "learning_rate": 1.950705501618123e-05, "loss": 0.1678, "step": 9520 }, { "epoch": 3.7, "learning_rate": 1.950653721682848e-05, "loss": 0.3427, "step": 9530 }, { "epoch": 3.7, "learning_rate": 1.950601941747573e-05, "loss": 0.0431, "step": 9540 }, { "epoch": 3.71, "learning_rate": 1.950550161812298e-05, "loss": 0.2108, "step": 9550 }, { "epoch": 3.71, "learning_rate": 1.950498381877023e-05, "loss": 0.2641, "step": 9560 }, { "epoch": 3.72, "learning_rate": 1.9504466019417478e-05, "loss": 0.3133, "step": 9570 }, { "epoch": 3.72, "learning_rate": 1.9503948220064726e-05, "loss": 0.489, "step": 9580 }, { "epoch": 3.72, "learning_rate": 1.9503430420711974e-05, "loss": 0.358, "step": 9590 }, { "epoch": 3.73, "learning_rate": 1.9502912621359225e-05, "loss": 0.1569, "step": 9600 }, { "epoch": 3.73, "learning_rate": 1.9502394822006473e-05, "loss": 0.3264, "step": 9610 }, { "epoch": 3.74, "learning_rate": 1.9501877022653724e-05, "loss": 0.0809, "step": 9620 }, { "epoch": 3.74, "learning_rate": 1.9501359223300972e-05, "loss": 0.4272, "step": 9630 }, { "epoch": 3.74, "learning_rate": 1.9500841423948223e-05, "loss": 0.2774, "step": 9640 }, { "epoch": 3.75, "learning_rate": 1.950032362459547e-05, "loss": 0.2113, "step": 9650 }, { "epoch": 3.75, "learning_rate": 1.949980582524272e-05, "loss": 0.2256, "step": 9660 }, { "epoch": 3.76, "learning_rate": 1.9499288025889967e-05, "loss": 0.083, "step": 9670 }, { "epoch": 3.76, "learning_rate": 1.949877022653722e-05, "loss": 0.0949, "step": 9680 }, { "epoch": 3.76, "learning_rate": 1.9498252427184466e-05, "loss": 0.248, "step": 9690 }, { "epoch": 3.77, "learning_rate": 1.9497734627831718e-05, "loss": 0.1129, "step": 9700 }, { "epoch": 3.77, "learning_rate": 1.9497216828478966e-05, "loss": 0.1563, "step": 9710 }, { "epoch": 3.77, "learning_rate": 1.9496699029126217e-05, "loss": 0.2122, "step": 9720 }, { "epoch": 3.78, "learning_rate": 1.9496181229773465e-05, "loss": 0.1765, "step": 9730 }, { "epoch": 3.78, "learning_rate": 1.9495663430420713e-05, "loss": 0.2865, "step": 9740 }, { "epoch": 3.79, "learning_rate": 1.949514563106796e-05, "loss": 0.274, "step": 9750 }, { "epoch": 3.79, "learning_rate": 1.9494627831715212e-05, "loss": 0.2661, "step": 9760 }, { "epoch": 3.79, "learning_rate": 1.949411003236246e-05, "loss": 0.1903, "step": 9770 }, { "epoch": 3.8, "learning_rate": 1.949359223300971e-05, "loss": 0.1783, "step": 9780 }, { "epoch": 3.8, "learning_rate": 1.949307443365696e-05, "loss": 0.1957, "step": 9790 }, { "epoch": 3.81, "learning_rate": 1.949255663430421e-05, "loss": 0.1045, "step": 9800 }, { "epoch": 3.81, "learning_rate": 1.949203883495146e-05, "loss": 0.1721, "step": 9810 }, { "epoch": 3.81, "learning_rate": 1.9491521035598706e-05, "loss": 0.2984, "step": 9820 }, { "epoch": 3.82, "learning_rate": 1.9491003236245954e-05, "loss": 0.1043, "step": 9830 }, { "epoch": 3.82, "learning_rate": 1.9490485436893206e-05, "loss": 0.2982, "step": 9840 }, { "epoch": 3.83, "learning_rate": 1.9489967637540454e-05, "loss": 0.0695, "step": 9850 }, { "epoch": 3.83, "learning_rate": 1.9489449838187705e-05, "loss": 0.1379, "step": 9860 }, { "epoch": 3.83, "learning_rate": 1.9488932038834953e-05, "loss": 0.6523, "step": 9870 }, { "epoch": 3.84, "learning_rate": 1.9488414239482204e-05, "loss": 0.1844, "step": 9880 }, { "epoch": 3.84, "learning_rate": 1.9487896440129452e-05, "loss": 0.1405, "step": 9890 }, { "epoch": 3.84, "learning_rate": 1.94873786407767e-05, "loss": 0.263, "step": 9900 }, { "epoch": 3.85, "learning_rate": 1.9486860841423948e-05, "loss": 0.2052, "step": 9910 }, { "epoch": 3.85, "learning_rate": 1.94863430420712e-05, "loss": 0.3914, "step": 9920 }, { "epoch": 3.86, "learning_rate": 1.9485825242718447e-05, "loss": 0.2323, "step": 9930 }, { "epoch": 3.86, "learning_rate": 1.94853074433657e-05, "loss": 0.2894, "step": 9940 }, { "epoch": 3.86, "learning_rate": 1.9484789644012946e-05, "loss": 0.0943, "step": 9950 }, { "epoch": 3.87, "learning_rate": 1.9484271844660198e-05, "loss": 0.3892, "step": 9960 }, { "epoch": 3.87, "learning_rate": 1.9483754045307446e-05, "loss": 0.172, "step": 9970 }, { "epoch": 3.88, "learning_rate": 1.9483236245954694e-05, "loss": 0.2609, "step": 9980 }, { "epoch": 3.88, "learning_rate": 1.948271844660194e-05, "loss": 0.2567, "step": 9990 }, { "epoch": 3.88, "learning_rate": 1.9482200647249193e-05, "loss": 0.0511, "step": 10000 }, { "epoch": 3.89, "learning_rate": 1.948168284789644e-05, "loss": 0.224, "step": 10010 }, { "epoch": 3.89, "learning_rate": 1.9481165048543692e-05, "loss": 0.2571, "step": 10020 }, { "epoch": 3.9, "learning_rate": 1.948064724919094e-05, "loss": 0.1287, "step": 10030 }, { "epoch": 3.9, "learning_rate": 1.948012944983819e-05, "loss": 0.2959, "step": 10040 }, { "epoch": 3.9, "learning_rate": 1.947961165048544e-05, "loss": 0.1872, "step": 10050 }, { "epoch": 3.91, "learning_rate": 1.9479093851132687e-05, "loss": 0.1789, "step": 10060 }, { "epoch": 3.91, "learning_rate": 1.9478576051779935e-05, "loss": 0.4255, "step": 10070 }, { "epoch": 3.91, "learning_rate": 1.9478058252427186e-05, "loss": 0.1198, "step": 10080 }, { "epoch": 3.92, "learning_rate": 1.9477540453074434e-05, "loss": 0.2448, "step": 10090 }, { "epoch": 3.92, "learning_rate": 1.9477022653721686e-05, "loss": 0.1974, "step": 10100 }, { "epoch": 3.93, "learning_rate": 1.9476504854368933e-05, "loss": 0.335, "step": 10110 }, { "epoch": 3.93, "learning_rate": 1.9475987055016185e-05, "loss": 0.2979, "step": 10120 }, { "epoch": 3.93, "learning_rate": 1.9475469255663433e-05, "loss": 0.1669, "step": 10130 }, { "epoch": 3.94, "learning_rate": 1.947495145631068e-05, "loss": 0.2886, "step": 10140 }, { "epoch": 3.94, "learning_rate": 1.947443365695793e-05, "loss": 0.2445, "step": 10150 }, { "epoch": 3.95, "learning_rate": 1.947391585760518e-05, "loss": 0.3029, "step": 10160 }, { "epoch": 3.95, "learning_rate": 1.9473398058252428e-05, "loss": 0.156, "step": 10170 }, { "epoch": 3.95, "learning_rate": 1.947288025889968e-05, "loss": 0.1507, "step": 10180 }, { "epoch": 3.96, "learning_rate": 1.9472362459546927e-05, "loss": 0.133, "step": 10190 }, { "epoch": 3.96, "learning_rate": 1.947184466019418e-05, "loss": 0.2364, "step": 10200 }, { "epoch": 3.97, "learning_rate": 1.9471326860841426e-05, "loss": 0.1888, "step": 10210 }, { "epoch": 3.97, "learning_rate": 1.9470809061488674e-05, "loss": 0.3673, "step": 10220 }, { "epoch": 3.97, "learning_rate": 1.9470291262135922e-05, "loss": 0.1687, "step": 10230 }, { "epoch": 3.98, "learning_rate": 1.9469773462783173e-05, "loss": 0.082, "step": 10240 }, { "epoch": 3.98, "learning_rate": 1.946925566343042e-05, "loss": 0.2059, "step": 10250 }, { "epoch": 3.98, "learning_rate": 1.9468737864077673e-05, "loss": 0.3736, "step": 10260 }, { "epoch": 3.99, "learning_rate": 1.946822006472492e-05, "loss": 0.1831, "step": 10270 }, { "epoch": 3.99, "learning_rate": 1.9467702265372172e-05, "loss": 0.1977, "step": 10280 }, { "epoch": 4.0, "learning_rate": 1.946718446601942e-05, "loss": 0.1659, "step": 10290 }, { "epoch": 4.0, "learning_rate": 1.9466666666666668e-05, "loss": 0.3149, "step": 10300 }, { "epoch": 4.0, "eval_accuracy": 0.9488308115543329, "eval_loss": 0.1855359673500061, "eval_runtime": 8.1547, "eval_samples_per_second": 445.755, "eval_steps_per_second": 55.796, "step": 10300 }, { "epoch": 4.0, "learning_rate": 1.9466148867313916e-05, "loss": 0.1827, "step": 10310 }, { "epoch": 4.01, "learning_rate": 1.9465631067961167e-05, "loss": 0.2903, "step": 10320 }, { "epoch": 4.01, "learning_rate": 1.9465113268608415e-05, "loss": 0.2063, "step": 10330 }, { "epoch": 4.02, "learning_rate": 1.9464595469255666e-05, "loss": 0.0959, "step": 10340 }, { "epoch": 4.02, "learning_rate": 1.9464077669902914e-05, "loss": 0.2297, "step": 10350 }, { "epoch": 4.02, "learning_rate": 1.9463559870550165e-05, "loss": 0.1273, "step": 10360 }, { "epoch": 4.03, "learning_rate": 1.9463042071197413e-05, "loss": 0.2147, "step": 10370 }, { "epoch": 4.03, "learning_rate": 1.946252427184466e-05, "loss": 0.1313, "step": 10380 }, { "epoch": 4.03, "learning_rate": 1.946200647249191e-05, "loss": 0.3251, "step": 10390 }, { "epoch": 4.04, "learning_rate": 1.946148867313916e-05, "loss": 0.3569, "step": 10400 }, { "epoch": 4.04, "learning_rate": 1.946097087378641e-05, "loss": 0.1368, "step": 10410 }, { "epoch": 4.05, "learning_rate": 1.946045307443366e-05, "loss": 0.116, "step": 10420 }, { "epoch": 4.05, "learning_rate": 1.9459935275080908e-05, "loss": 0.3314, "step": 10430 }, { "epoch": 4.05, "learning_rate": 1.9459417475728156e-05, "loss": 0.2843, "step": 10440 }, { "epoch": 4.06, "learning_rate": 1.9458899676375407e-05, "loss": 0.0952, "step": 10450 }, { "epoch": 4.06, "learning_rate": 1.9458381877022655e-05, "loss": 0.3193, "step": 10460 }, { "epoch": 4.07, "learning_rate": 1.9457864077669903e-05, "loss": 0.172, "step": 10470 }, { "epoch": 4.07, "learning_rate": 1.9457346278317154e-05, "loss": 0.1472, "step": 10480 }, { "epoch": 4.07, "learning_rate": 1.9456828478964402e-05, "loss": 0.2518, "step": 10490 }, { "epoch": 4.08, "learning_rate": 1.9456310679611653e-05, "loss": 0.189, "step": 10500 }, { "epoch": 4.08, "learning_rate": 1.94557928802589e-05, "loss": 0.221, "step": 10510 }, { "epoch": 4.09, "learning_rate": 1.945527508090615e-05, "loss": 0.1219, "step": 10520 }, { "epoch": 4.09, "learning_rate": 1.94547572815534e-05, "loss": 0.0485, "step": 10530 }, { "epoch": 4.09, "learning_rate": 1.945423948220065e-05, "loss": 0.2431, "step": 10540 }, { "epoch": 4.1, "learning_rate": 1.9453721682847896e-05, "loss": 0.3147, "step": 10550 }, { "epoch": 4.1, "learning_rate": 1.9453203883495148e-05, "loss": 0.3031, "step": 10560 }, { "epoch": 4.1, "learning_rate": 1.9452686084142396e-05, "loss": 0.1062, "step": 10570 }, { "epoch": 4.11, "learning_rate": 1.9452168284789647e-05, "loss": 0.1931, "step": 10580 }, { "epoch": 4.11, "learning_rate": 1.9451650485436895e-05, "loss": 0.2967, "step": 10590 }, { "epoch": 4.12, "learning_rate": 1.9451132686084143e-05, "loss": 0.2966, "step": 10600 }, { "epoch": 4.12, "learning_rate": 1.9450614886731394e-05, "loss": 0.1978, "step": 10610 }, { "epoch": 4.12, "learning_rate": 1.9450097087378642e-05, "loss": 0.079, "step": 10620 }, { "epoch": 4.13, "learning_rate": 1.944957928802589e-05, "loss": 0.1591, "step": 10630 }, { "epoch": 4.13, "learning_rate": 1.944906148867314e-05, "loss": 0.2534, "step": 10640 }, { "epoch": 4.14, "learning_rate": 1.944854368932039e-05, "loss": 0.1422, "step": 10650 }, { "epoch": 4.14, "learning_rate": 1.944802588996764e-05, "loss": 0.0285, "step": 10660 }, { "epoch": 4.14, "learning_rate": 1.944750809061489e-05, "loss": 0.2822, "step": 10670 }, { "epoch": 4.15, "learning_rate": 1.9446990291262136e-05, "loss": 0.1967, "step": 10680 }, { "epoch": 4.15, "learning_rate": 1.9446472491909388e-05, "loss": 0.3299, "step": 10690 }, { "epoch": 4.16, "learning_rate": 1.9445954692556636e-05, "loss": 0.1077, "step": 10700 }, { "epoch": 4.16, "learning_rate": 1.9445436893203887e-05, "loss": 0.2159, "step": 10710 }, { "epoch": 4.16, "learning_rate": 1.9444919093851135e-05, "loss": 0.2205, "step": 10720 }, { "epoch": 4.17, "learning_rate": 1.9444401294498383e-05, "loss": 0.1952, "step": 10730 }, { "epoch": 4.17, "learning_rate": 1.9443883495145634e-05, "loss": 0.1188, "step": 10740 }, { "epoch": 4.17, "learning_rate": 1.9443365695792882e-05, "loss": 0.2033, "step": 10750 }, { "epoch": 4.18, "learning_rate": 1.944284789644013e-05, "loss": 0.107, "step": 10760 }, { "epoch": 4.18, "learning_rate": 1.944233009708738e-05, "loss": 0.0967, "step": 10770 }, { "epoch": 4.19, "learning_rate": 1.944181229773463e-05, "loss": 0.2369, "step": 10780 }, { "epoch": 4.19, "learning_rate": 1.944129449838188e-05, "loss": 0.259, "step": 10790 }, { "epoch": 4.19, "learning_rate": 1.944077669902913e-05, "loss": 0.4383, "step": 10800 }, { "epoch": 4.2, "learning_rate": 1.9440258899676376e-05, "loss": 0.3939, "step": 10810 }, { "epoch": 4.2, "learning_rate": 1.9439741100323624e-05, "loss": 0.1121, "step": 10820 }, { "epoch": 4.21, "learning_rate": 1.9439223300970875e-05, "loss": 0.189, "step": 10830 }, { "epoch": 4.21, "learning_rate": 1.9438705501618123e-05, "loss": 0.0679, "step": 10840 }, { "epoch": 4.21, "learning_rate": 1.9438187702265375e-05, "loss": 0.3046, "step": 10850 }, { "epoch": 4.22, "learning_rate": 1.9437669902912623e-05, "loss": 0.2156, "step": 10860 }, { "epoch": 4.22, "learning_rate": 1.9437152103559874e-05, "loss": 0.2912, "step": 10870 }, { "epoch": 4.23, "learning_rate": 1.9436634304207122e-05, "loss": 0.2968, "step": 10880 }, { "epoch": 4.23, "learning_rate": 1.943611650485437e-05, "loss": 0.2222, "step": 10890 }, { "epoch": 4.23, "learning_rate": 1.9435598705501618e-05, "loss": 0.1531, "step": 10900 }, { "epoch": 4.24, "learning_rate": 1.943508090614887e-05, "loss": 0.0796, "step": 10910 }, { "epoch": 4.24, "learning_rate": 1.9434563106796117e-05, "loss": 0.0699, "step": 10920 }, { "epoch": 4.24, "learning_rate": 1.9434045307443368e-05, "loss": 0.2842, "step": 10930 }, { "epoch": 4.25, "learning_rate": 1.9433527508090616e-05, "loss": 0.0577, "step": 10940 }, { "epoch": 4.25, "learning_rate": 1.9433009708737868e-05, "loss": 0.1124, "step": 10950 }, { "epoch": 4.26, "learning_rate": 1.9432491909385115e-05, "loss": 0.2961, "step": 10960 }, { "epoch": 4.26, "learning_rate": 1.9431974110032363e-05, "loss": 0.2303, "step": 10970 }, { "epoch": 4.26, "learning_rate": 1.943145631067961e-05, "loss": 0.1608, "step": 10980 }, { "epoch": 4.27, "learning_rate": 1.9430938511326863e-05, "loss": 0.1517, "step": 10990 }, { "epoch": 4.27, "learning_rate": 1.943042071197411e-05, "loss": 0.1787, "step": 11000 }, { "epoch": 4.28, "learning_rate": 1.9429902912621362e-05, "loss": 0.2766, "step": 11010 }, { "epoch": 4.28, "learning_rate": 1.942938511326861e-05, "loss": 0.1146, "step": 11020 }, { "epoch": 4.28, "learning_rate": 1.942886731391586e-05, "loss": 0.1747, "step": 11030 }, { "epoch": 4.29, "learning_rate": 1.942834951456311e-05, "loss": 0.1637, "step": 11040 }, { "epoch": 4.29, "learning_rate": 1.9427831715210357e-05, "loss": 0.2721, "step": 11050 }, { "epoch": 4.3, "learning_rate": 1.9427313915857605e-05, "loss": 0.2411, "step": 11060 }, { "epoch": 4.3, "learning_rate": 1.9426796116504856e-05, "loss": 0.2553, "step": 11070 }, { "epoch": 4.3, "learning_rate": 1.9426278317152104e-05, "loss": 0.1392, "step": 11080 }, { "epoch": 4.31, "learning_rate": 1.9425760517799355e-05, "loss": 0.397, "step": 11090 }, { "epoch": 4.31, "learning_rate": 1.9425242718446603e-05, "loss": 0.1047, "step": 11100 }, { "epoch": 4.31, "learning_rate": 1.9424724919093855e-05, "loss": 0.2552, "step": 11110 }, { "epoch": 4.32, "learning_rate": 1.94242071197411e-05, "loss": 0.2713, "step": 11120 }, { "epoch": 4.32, "learning_rate": 1.942368932038835e-05, "loss": 0.1791, "step": 11130 }, { "epoch": 4.33, "learning_rate": 1.94231715210356e-05, "loss": 0.1451, "step": 11140 }, { "epoch": 4.33, "learning_rate": 1.942265372168285e-05, "loss": 0.0645, "step": 11150 }, { "epoch": 4.33, "learning_rate": 1.9422135922330098e-05, "loss": 0.3552, "step": 11160 }, { "epoch": 4.34, "learning_rate": 1.942161812297735e-05, "loss": 0.1695, "step": 11170 }, { "epoch": 4.34, "learning_rate": 1.9421100323624597e-05, "loss": 0.0469, "step": 11180 }, { "epoch": 4.35, "learning_rate": 1.9420582524271848e-05, "loss": 0.2501, "step": 11190 }, { "epoch": 4.35, "learning_rate": 1.9420064724919093e-05, "loss": 0.2035, "step": 11200 }, { "epoch": 4.35, "learning_rate": 1.9419546925566344e-05, "loss": 0.1851, "step": 11210 }, { "epoch": 4.36, "learning_rate": 1.9419029126213592e-05, "loss": 0.2064, "step": 11220 }, { "epoch": 4.36, "learning_rate": 1.9418511326860843e-05, "loss": 0.1586, "step": 11230 }, { "epoch": 4.37, "learning_rate": 1.941799352750809e-05, "loss": 0.2779, "step": 11240 }, { "epoch": 4.37, "learning_rate": 1.9417475728155343e-05, "loss": 0.1905, "step": 11250 }, { "epoch": 4.37, "learning_rate": 1.941695792880259e-05, "loss": 0.3921, "step": 11260 }, { "epoch": 4.38, "learning_rate": 1.9416440129449842e-05, "loss": 0.3075, "step": 11270 }, { "epoch": 4.38, "learning_rate": 1.941592233009709e-05, "loss": 0.1345, "step": 11280 }, { "epoch": 4.38, "learning_rate": 1.9415404530744338e-05, "loss": 0.1818, "step": 11290 }, { "epoch": 4.39, "learning_rate": 1.9414886731391586e-05, "loss": 0.1156, "step": 11300 }, { "epoch": 4.39, "learning_rate": 1.9414368932038837e-05, "loss": 0.1625, "step": 11310 }, { "epoch": 4.4, "learning_rate": 1.9413851132686085e-05, "loss": 0.0837, "step": 11320 }, { "epoch": 4.4, "learning_rate": 1.9413333333333336e-05, "loss": 0.2448, "step": 11330 }, { "epoch": 4.4, "learning_rate": 1.9412815533980584e-05, "loss": 0.3734, "step": 11340 }, { "epoch": 4.41, "learning_rate": 1.9412297734627835e-05, "loss": 0.3253, "step": 11350 }, { "epoch": 4.41, "learning_rate": 1.9411779935275083e-05, "loss": 0.2675, "step": 11360 }, { "epoch": 4.42, "learning_rate": 1.941126213592233e-05, "loss": 0.1732, "step": 11370 }, { "epoch": 4.42, "learning_rate": 1.941074433656958e-05, "loss": 0.1855, "step": 11380 }, { "epoch": 4.42, "learning_rate": 1.941022653721683e-05, "loss": 0.1948, "step": 11390 }, { "epoch": 4.43, "learning_rate": 1.940970873786408e-05, "loss": 0.2701, "step": 11400 }, { "epoch": 4.43, "learning_rate": 1.940919093851133e-05, "loss": 0.1931, "step": 11410 }, { "epoch": 4.43, "learning_rate": 1.9408673139158578e-05, "loss": 0.2496, "step": 11420 }, { "epoch": 4.44, "learning_rate": 1.940815533980583e-05, "loss": 0.1995, "step": 11430 }, { "epoch": 4.44, "learning_rate": 1.9407637540453077e-05, "loss": 0.2041, "step": 11440 }, { "epoch": 4.45, "learning_rate": 1.9407119741100325e-05, "loss": 0.2528, "step": 11450 }, { "epoch": 4.45, "learning_rate": 1.9406601941747573e-05, "loss": 0.2433, "step": 11460 }, { "epoch": 4.45, "learning_rate": 1.9406084142394824e-05, "loss": 0.3492, "step": 11470 }, { "epoch": 4.46, "learning_rate": 1.9405566343042072e-05, "loss": 0.1016, "step": 11480 }, { "epoch": 4.46, "learning_rate": 1.9405048543689323e-05, "loss": 0.0442, "step": 11490 }, { "epoch": 4.47, "learning_rate": 1.940453074433657e-05, "loss": 0.1863, "step": 11500 }, { "epoch": 4.47, "learning_rate": 1.9404012944983822e-05, "loss": 0.2436, "step": 11510 }, { "epoch": 4.47, "learning_rate": 1.940349514563107e-05, "loss": 0.3174, "step": 11520 }, { "epoch": 4.48, "learning_rate": 1.9402977346278318e-05, "loss": 0.229, "step": 11530 }, { "epoch": 4.48, "learning_rate": 1.9402459546925566e-05, "loss": 0.3688, "step": 11540 }, { "epoch": 4.49, "learning_rate": 1.9401941747572818e-05, "loss": 0.1861, "step": 11550 }, { "epoch": 4.49, "learning_rate": 1.9401423948220065e-05, "loss": 0.3283, "step": 11560 }, { "epoch": 4.49, "learning_rate": 1.9400906148867317e-05, "loss": 0.1399, "step": 11570 }, { "epoch": 4.5, "learning_rate": 1.9400388349514565e-05, "loss": 0.25, "step": 11580 }, { "epoch": 4.5, "learning_rate": 1.9399870550161816e-05, "loss": 0.147, "step": 11590 }, { "epoch": 4.5, "learning_rate": 1.9399352750809064e-05, "loss": 0.3092, "step": 11600 }, { "epoch": 4.51, "learning_rate": 1.9398834951456312e-05, "loss": 0.4029, "step": 11610 }, { "epoch": 4.51, "learning_rate": 1.939831715210356e-05, "loss": 0.2057, "step": 11620 }, { "epoch": 4.52, "learning_rate": 1.939779935275081e-05, "loss": 0.0883, "step": 11630 }, { "epoch": 4.52, "learning_rate": 1.939728155339806e-05, "loss": 0.256, "step": 11640 }, { "epoch": 4.52, "learning_rate": 1.939676375404531e-05, "loss": 0.149, "step": 11650 }, { "epoch": 4.53, "learning_rate": 1.9396245954692558e-05, "loss": 0.1123, "step": 11660 }, { "epoch": 4.53, "learning_rate": 1.939572815533981e-05, "loss": 0.2805, "step": 11670 }, { "epoch": 4.54, "learning_rate": 1.9395210355987057e-05, "loss": 0.3653, "step": 11680 }, { "epoch": 4.54, "learning_rate": 1.9394692556634305e-05, "loss": 0.2283, "step": 11690 }, { "epoch": 4.54, "learning_rate": 1.9394174757281553e-05, "loss": 0.2955, "step": 11700 }, { "epoch": 4.55, "learning_rate": 1.9393656957928805e-05, "loss": 0.0699, "step": 11710 }, { "epoch": 4.55, "learning_rate": 1.9393139158576053e-05, "loss": 0.2213, "step": 11720 }, { "epoch": 4.56, "learning_rate": 1.9392621359223304e-05, "loss": 0.0633, "step": 11730 }, { "epoch": 4.56, "learning_rate": 1.9392103559870552e-05, "loss": 0.1132, "step": 11740 }, { "epoch": 4.56, "learning_rate": 1.9391585760517803e-05, "loss": 0.121, "step": 11750 }, { "epoch": 4.57, "learning_rate": 1.939106796116505e-05, "loss": 0.1701, "step": 11760 }, { "epoch": 4.57, "learning_rate": 1.93905501618123e-05, "loss": 0.2263, "step": 11770 }, { "epoch": 4.57, "learning_rate": 1.9390032362459547e-05, "loss": 0.2276, "step": 11780 }, { "epoch": 4.58, "learning_rate": 1.9389514563106798e-05, "loss": 0.2396, "step": 11790 }, { "epoch": 4.58, "learning_rate": 1.9388996763754046e-05, "loss": 0.4364, "step": 11800 }, { "epoch": 4.59, "learning_rate": 1.9388478964401297e-05, "loss": 0.1599, "step": 11810 }, { "epoch": 4.59, "learning_rate": 1.9387961165048545e-05, "loss": 0.3382, "step": 11820 }, { "epoch": 4.59, "learning_rate": 1.9387443365695797e-05, "loss": 0.1922, "step": 11830 }, { "epoch": 4.6, "learning_rate": 1.9386925566343045e-05, "loss": 0.1832, "step": 11840 }, { "epoch": 4.6, "learning_rate": 1.9386407766990292e-05, "loss": 0.1679, "step": 11850 }, { "epoch": 4.61, "learning_rate": 1.938588996763754e-05, "loss": 0.1343, "step": 11860 }, { "epoch": 4.61, "learning_rate": 1.9385372168284792e-05, "loss": 0.2609, "step": 11870 }, { "epoch": 4.61, "learning_rate": 1.938485436893204e-05, "loss": 0.1784, "step": 11880 }, { "epoch": 4.62, "learning_rate": 1.938433656957929e-05, "loss": 0.3379, "step": 11890 }, { "epoch": 4.62, "learning_rate": 1.938381877022654e-05, "loss": 0.284, "step": 11900 }, { "epoch": 4.63, "learning_rate": 1.9383300970873787e-05, "loss": 0.2585, "step": 11910 }, { "epoch": 4.63, "learning_rate": 1.9382783171521038e-05, "loss": 0.2068, "step": 11920 }, { "epoch": 4.63, "learning_rate": 1.9382265372168286e-05, "loss": 0.0549, "step": 11930 }, { "epoch": 4.64, "learning_rate": 1.9381747572815534e-05, "loss": 0.1902, "step": 11940 }, { "epoch": 4.64, "learning_rate": 1.9381229773462785e-05, "loss": 0.1435, "step": 11950 }, { "epoch": 4.64, "learning_rate": 1.9380711974110033e-05, "loss": 0.0556, "step": 11960 }, { "epoch": 4.65, "learning_rate": 1.9380194174757285e-05, "loss": 0.1662, "step": 11970 }, { "epoch": 4.65, "learning_rate": 1.9379676375404532e-05, "loss": 0.1787, "step": 11980 }, { "epoch": 4.66, "learning_rate": 1.937915857605178e-05, "loss": 0.0364, "step": 11990 }, { "epoch": 4.66, "learning_rate": 1.937864077669903e-05, "loss": 0.2, "step": 12000 }, { "epoch": 4.66, "learning_rate": 1.937812297734628e-05, "loss": 0.1333, "step": 12010 }, { "epoch": 4.67, "learning_rate": 1.9377605177993528e-05, "loss": 0.1067, "step": 12020 }, { "epoch": 4.67, "learning_rate": 1.937708737864078e-05, "loss": 0.2023, "step": 12030 }, { "epoch": 4.68, "learning_rate": 1.9376569579288027e-05, "loss": 0.2237, "step": 12040 }, { "epoch": 4.68, "learning_rate": 1.9376051779935278e-05, "loss": 0.062, "step": 12050 }, { "epoch": 4.68, "learning_rate": 1.9375533980582526e-05, "loss": 0.0995, "step": 12060 }, { "epoch": 4.69, "learning_rate": 1.9375016181229774e-05, "loss": 0.0906, "step": 12070 }, { "epoch": 4.69, "learning_rate": 1.9374498381877025e-05, "loss": 0.2716, "step": 12080 }, { "epoch": 4.7, "learning_rate": 1.9373980582524273e-05, "loss": 0.3635, "step": 12090 }, { "epoch": 4.7, "learning_rate": 1.937346278317152e-05, "loss": 0.2923, "step": 12100 }, { "epoch": 4.7, "learning_rate": 1.9372944983818772e-05, "loss": 0.0635, "step": 12110 }, { "epoch": 4.71, "learning_rate": 1.937242718446602e-05, "loss": 0.1497, "step": 12120 }, { "epoch": 4.71, "learning_rate": 1.937190938511327e-05, "loss": 0.1464, "step": 12130 }, { "epoch": 4.71, "learning_rate": 1.937139158576052e-05, "loss": 0.1473, "step": 12140 }, { "epoch": 4.72, "learning_rate": 1.9370873786407767e-05, "loss": 0.1652, "step": 12150 }, { "epoch": 4.72, "learning_rate": 1.937035598705502e-05, "loss": 0.2174, "step": 12160 }, { "epoch": 4.73, "learning_rate": 1.9369838187702267e-05, "loss": 0.2283, "step": 12170 }, { "epoch": 4.73, "learning_rate": 1.9369320388349515e-05, "loss": 0.2774, "step": 12180 }, { "epoch": 4.73, "learning_rate": 1.9368802588996766e-05, "loss": 0.1594, "step": 12190 }, { "epoch": 4.74, "learning_rate": 1.9368284789644014e-05, "loss": 0.1372, "step": 12200 }, { "epoch": 4.74, "learning_rate": 1.9367766990291265e-05, "loss": 0.1494, "step": 12210 }, { "epoch": 4.75, "learning_rate": 1.9367249190938513e-05, "loss": 0.3242, "step": 12220 }, { "epoch": 4.75, "learning_rate": 1.936673139158576e-05, "loss": 0.2053, "step": 12230 }, { "epoch": 4.75, "learning_rate": 1.9366213592233012e-05, "loss": 0.1955, "step": 12240 }, { "epoch": 4.76, "learning_rate": 1.936569579288026e-05, "loss": 0.1725, "step": 12250 }, { "epoch": 4.76, "learning_rate": 1.9365177993527508e-05, "loss": 0.0831, "step": 12260 }, { "epoch": 4.77, "learning_rate": 1.936466019417476e-05, "loss": 0.2362, "step": 12270 }, { "epoch": 4.77, "learning_rate": 1.9364142394822007e-05, "loss": 0.0279, "step": 12280 }, { "epoch": 4.77, "learning_rate": 1.9363624595469255e-05, "loss": 0.3173, "step": 12290 }, { "epoch": 4.78, "learning_rate": 1.9363106796116507e-05, "loss": 0.1956, "step": 12300 }, { "epoch": 4.78, "learning_rate": 1.9362588996763755e-05, "loss": 0.2058, "step": 12310 }, { "epoch": 4.78, "learning_rate": 1.9362071197411006e-05, "loss": 0.3117, "step": 12320 }, { "epoch": 4.79, "learning_rate": 1.9361553398058254e-05, "loss": 0.3645, "step": 12330 }, { "epoch": 4.79, "learning_rate": 1.9361035598705505e-05, "loss": 0.2066, "step": 12340 }, { "epoch": 4.8, "learning_rate": 1.9360517799352753e-05, "loss": 0.1871, "step": 12350 }, { "epoch": 4.8, "learning_rate": 1.936e-05, "loss": 0.343, "step": 12360 }, { "epoch": 4.8, "learning_rate": 1.935948220064725e-05, "loss": 0.1753, "step": 12370 }, { "epoch": 4.81, "learning_rate": 1.93589644012945e-05, "loss": 0.1749, "step": 12380 }, { "epoch": 4.81, "learning_rate": 1.9358446601941748e-05, "loss": 0.3164, "step": 12390 }, { "epoch": 4.82, "learning_rate": 1.9357928802589e-05, "loss": 0.3404, "step": 12400 }, { "epoch": 4.82, "learning_rate": 1.9357411003236247e-05, "loss": 0.113, "step": 12410 }, { "epoch": 4.82, "learning_rate": 1.93568932038835e-05, "loss": 0.1865, "step": 12420 }, { "epoch": 4.83, "learning_rate": 1.9356375404530747e-05, "loss": 0.235, "step": 12430 }, { "epoch": 4.83, "learning_rate": 1.9355857605177995e-05, "loss": 0.3242, "step": 12440 }, { "epoch": 4.83, "learning_rate": 1.9355339805825242e-05, "loss": 0.0272, "step": 12450 }, { "epoch": 4.84, "learning_rate": 1.9354822006472494e-05, "loss": 0.0691, "step": 12460 }, { "epoch": 4.84, "learning_rate": 1.9354304207119742e-05, "loss": 0.241, "step": 12470 }, { "epoch": 4.85, "learning_rate": 1.9353786407766993e-05, "loss": 0.1624, "step": 12480 }, { "epoch": 4.85, "learning_rate": 1.935326860841424e-05, "loss": 0.2917, "step": 12490 }, { "epoch": 4.85, "learning_rate": 1.9352750809061492e-05, "loss": 0.1886, "step": 12500 }, { "epoch": 4.86, "learning_rate": 1.935223300970874e-05, "loss": 0.1041, "step": 12510 }, { "epoch": 4.86, "learning_rate": 1.9351715210355988e-05, "loss": 0.4151, "step": 12520 }, { "epoch": 4.87, "learning_rate": 1.9351197411003236e-05, "loss": 0.2313, "step": 12530 }, { "epoch": 4.87, "learning_rate": 1.9350679611650487e-05, "loss": 0.1339, "step": 12540 }, { "epoch": 4.87, "learning_rate": 1.9350161812297735e-05, "loss": 0.2213, "step": 12550 }, { "epoch": 4.88, "learning_rate": 1.9349644012944987e-05, "loss": 0.2801, "step": 12560 }, { "epoch": 4.88, "learning_rate": 1.9349126213592234e-05, "loss": 0.1102, "step": 12570 }, { "epoch": 4.89, "learning_rate": 1.9348608414239486e-05, "loss": 0.1684, "step": 12580 }, { "epoch": 4.89, "learning_rate": 1.934809061488673e-05, "loss": 0.079, "step": 12590 }, { "epoch": 4.89, "learning_rate": 1.934757281553398e-05, "loss": 0.0728, "step": 12600 }, { "epoch": 4.9, "learning_rate": 1.934705501618123e-05, "loss": 0.329, "step": 12610 }, { "epoch": 4.9, "learning_rate": 1.934653721682848e-05, "loss": 0.3821, "step": 12620 }, { "epoch": 4.9, "learning_rate": 1.934601941747573e-05, "loss": 0.3502, "step": 12630 }, { "epoch": 4.91, "learning_rate": 1.934550161812298e-05, "loss": 0.1426, "step": 12640 }, { "epoch": 4.91, "learning_rate": 1.9344983818770228e-05, "loss": 0.1978, "step": 12650 }, { "epoch": 4.92, "learning_rate": 1.934446601941748e-05, "loss": 0.2777, "step": 12660 }, { "epoch": 4.92, "learning_rate": 1.9343948220064724e-05, "loss": 0.2774, "step": 12670 }, { "epoch": 4.92, "learning_rate": 1.9343430420711975e-05, "loss": 0.1971, "step": 12680 }, { "epoch": 4.93, "learning_rate": 1.9342912621359223e-05, "loss": 0.1427, "step": 12690 }, { "epoch": 4.93, "learning_rate": 1.9342394822006474e-05, "loss": 0.3145, "step": 12700 }, { "epoch": 4.94, "learning_rate": 1.9341877022653722e-05, "loss": 0.2714, "step": 12710 }, { "epoch": 4.94, "learning_rate": 1.9341359223300974e-05, "loss": 0.2632, "step": 12720 }, { "epoch": 4.94, "learning_rate": 1.934084142394822e-05, "loss": 0.2628, "step": 12730 }, { "epoch": 4.95, "learning_rate": 1.9340323624595473e-05, "loss": 0.1228, "step": 12740 }, { "epoch": 4.95, "learning_rate": 1.9339805825242717e-05, "loss": 0.1835, "step": 12750 }, { "epoch": 4.96, "learning_rate": 1.933928802588997e-05, "loss": 0.1612, "step": 12760 }, { "epoch": 4.96, "learning_rate": 1.9338770226537217e-05, "loss": 0.3655, "step": 12770 }, { "epoch": 4.96, "learning_rate": 1.9338252427184468e-05, "loss": 0.1295, "step": 12780 }, { "epoch": 4.97, "learning_rate": 1.9337734627831716e-05, "loss": 0.1606, "step": 12790 }, { "epoch": 4.97, "learning_rate": 1.9337216828478967e-05, "loss": 0.1875, "step": 12800 }, { "epoch": 4.97, "learning_rate": 1.9336699029126215e-05, "loss": 0.0929, "step": 12810 }, { "epoch": 4.98, "learning_rate": 1.9336181229773466e-05, "loss": 0.079, "step": 12820 }, { "epoch": 4.98, "learning_rate": 1.933566343042071e-05, "loss": 0.1728, "step": 12830 }, { "epoch": 4.99, "learning_rate": 1.9335145631067962e-05, "loss": 0.2651, "step": 12840 }, { "epoch": 4.99, "learning_rate": 1.933462783171521e-05, "loss": 0.1756, "step": 12850 }, { "epoch": 4.99, "learning_rate": 1.933411003236246e-05, "loss": 0.1863, "step": 12860 }, { "epoch": 5.0, "learning_rate": 1.933359223300971e-05, "loss": 0.1212, "step": 12870 }, { "epoch": 5.0, "eval_accuracy": 0.9480055020632737, "eval_loss": 0.20791418850421906, "eval_runtime": 8.2063, "eval_samples_per_second": 442.953, "eval_steps_per_second": 55.445, "step": 12875 }, { "epoch": 5.0, "learning_rate": 1.933307443365696e-05, "loss": 0.1102, "step": 12880 }, { "epoch": 5.01, "learning_rate": 1.933255663430421e-05, "loss": 0.3159, "step": 12890 }, { "epoch": 5.01, "learning_rate": 1.933203883495146e-05, "loss": 0.0541, "step": 12900 }, { "epoch": 5.01, "learning_rate": 1.9331521035598708e-05, "loss": 0.1296, "step": 12910 }, { "epoch": 5.02, "learning_rate": 1.9331003236245956e-05, "loss": 0.1728, "step": 12920 }, { "epoch": 5.02, "learning_rate": 1.9330485436893204e-05, "loss": 0.212, "step": 12930 }, { "epoch": 5.03, "learning_rate": 1.9329967637540455e-05, "loss": 0.2518, "step": 12940 }, { "epoch": 5.03, "learning_rate": 1.9329449838187703e-05, "loss": 0.2406, "step": 12950 }, { "epoch": 5.03, "learning_rate": 1.9328932038834954e-05, "loss": 0.0417, "step": 12960 }, { "epoch": 5.04, "learning_rate": 1.9328414239482202e-05, "loss": 0.2155, "step": 12970 }, { "epoch": 5.04, "learning_rate": 1.9327896440129454e-05, "loss": 0.2082, "step": 12980 }, { "epoch": 5.04, "learning_rate": 1.93273786407767e-05, "loss": 0.1845, "step": 12990 }, { "epoch": 5.05, "learning_rate": 1.932686084142395e-05, "loss": 0.1418, "step": 13000 }, { "epoch": 5.05, "learning_rate": 1.9326343042071197e-05, "loss": 0.2885, "step": 13010 }, { "epoch": 5.06, "learning_rate": 1.932582524271845e-05, "loss": 0.228, "step": 13020 }, { "epoch": 5.06, "learning_rate": 1.9325307443365697e-05, "loss": 0.2872, "step": 13030 }, { "epoch": 5.06, "learning_rate": 1.9324789644012948e-05, "loss": 0.1394, "step": 13040 }, { "epoch": 5.07, "learning_rate": 1.9324271844660196e-05, "loss": 0.1646, "step": 13050 }, { "epoch": 5.07, "learning_rate": 1.9323754045307447e-05, "loss": 0.2454, "step": 13060 }, { "epoch": 5.08, "learning_rate": 1.9323236245954695e-05, "loss": 0.218, "step": 13070 }, { "epoch": 5.08, "learning_rate": 1.9322718446601943e-05, "loss": 0.115, "step": 13080 }, { "epoch": 5.08, "learning_rate": 1.932220064724919e-05, "loss": 0.1885, "step": 13090 }, { "epoch": 5.09, "learning_rate": 1.9321682847896442e-05, "loss": 0.1807, "step": 13100 }, { "epoch": 5.09, "learning_rate": 1.932116504854369e-05, "loss": 0.1697, "step": 13110 }, { "epoch": 5.1, "learning_rate": 1.932064724919094e-05, "loss": 0.1327, "step": 13120 }, { "epoch": 5.1, "learning_rate": 1.932012944983819e-05, "loss": 0.2862, "step": 13130 }, { "epoch": 5.1, "learning_rate": 1.931961165048544e-05, "loss": 0.1249, "step": 13140 }, { "epoch": 5.11, "learning_rate": 1.931909385113269e-05, "loss": 0.2647, "step": 13150 }, { "epoch": 5.11, "learning_rate": 1.9318576051779937e-05, "loss": 0.3135, "step": 13160 }, { "epoch": 5.11, "learning_rate": 1.9318058252427184e-05, "loss": 0.1216, "step": 13170 }, { "epoch": 5.12, "learning_rate": 1.9317540453074436e-05, "loss": 0.2142, "step": 13180 }, { "epoch": 5.12, "learning_rate": 1.9317022653721684e-05, "loss": 0.2801, "step": 13190 }, { "epoch": 5.13, "learning_rate": 1.9316504854368935e-05, "loss": 0.2111, "step": 13200 }, { "epoch": 5.13, "learning_rate": 1.9315987055016183e-05, "loss": 0.1448, "step": 13210 }, { "epoch": 5.13, "learning_rate": 1.9315469255663434e-05, "loss": 0.2453, "step": 13220 }, { "epoch": 5.14, "learning_rate": 1.9314951456310682e-05, "loss": 0.2063, "step": 13230 }, { "epoch": 5.14, "learning_rate": 1.931443365695793e-05, "loss": 0.164, "step": 13240 }, { "epoch": 5.15, "learning_rate": 1.9313915857605178e-05, "loss": 0.4629, "step": 13250 }, { "epoch": 5.15, "learning_rate": 1.931339805825243e-05, "loss": 0.1818, "step": 13260 }, { "epoch": 5.15, "learning_rate": 1.9312880258899677e-05, "loss": 0.2935, "step": 13270 }, { "epoch": 5.16, "learning_rate": 1.931236245954693e-05, "loss": 0.2923, "step": 13280 }, { "epoch": 5.16, "learning_rate": 1.9311844660194177e-05, "loss": 0.0491, "step": 13290 }, { "epoch": 5.17, "learning_rate": 1.9311326860841428e-05, "loss": 0.1612, "step": 13300 }, { "epoch": 5.17, "learning_rate": 1.9310809061488676e-05, "loss": 0.3004, "step": 13310 }, { "epoch": 5.17, "learning_rate": 1.9310291262135924e-05, "loss": 0.2165, "step": 13320 }, { "epoch": 5.18, "learning_rate": 1.930977346278317e-05, "loss": 0.2939, "step": 13330 }, { "epoch": 5.18, "learning_rate": 1.9309255663430423e-05, "loss": 0.1104, "step": 13340 }, { "epoch": 5.18, "learning_rate": 1.930873786407767e-05, "loss": 0.1774, "step": 13350 }, { "epoch": 5.19, "learning_rate": 1.9308220064724922e-05, "loss": 0.333, "step": 13360 }, { "epoch": 5.19, "learning_rate": 1.930770226537217e-05, "loss": 0.1636, "step": 13370 }, { "epoch": 5.2, "learning_rate": 1.9307184466019418e-05, "loss": 0.1958, "step": 13380 }, { "epoch": 5.2, "learning_rate": 1.930666666666667e-05, "loss": 0.2305, "step": 13390 }, { "epoch": 5.2, "learning_rate": 1.9306148867313917e-05, "loss": 0.1662, "step": 13400 }, { "epoch": 5.21, "learning_rate": 1.9305631067961165e-05, "loss": 0.0824, "step": 13410 }, { "epoch": 5.21, "learning_rate": 1.9305113268608416e-05, "loss": 0.193, "step": 13420 }, { "epoch": 5.22, "learning_rate": 1.9304595469255664e-05, "loss": 0.1063, "step": 13430 }, { "epoch": 5.22, "learning_rate": 1.9304077669902916e-05, "loss": 0.2268, "step": 13440 }, { "epoch": 5.22, "learning_rate": 1.9303559870550164e-05, "loss": 0.0798, "step": 13450 }, { "epoch": 5.23, "learning_rate": 1.930304207119741e-05, "loss": 0.1345, "step": 13460 }, { "epoch": 5.23, "learning_rate": 1.9302524271844663e-05, "loss": 0.1321, "step": 13470 }, { "epoch": 5.23, "learning_rate": 1.930200647249191e-05, "loss": 0.2028, "step": 13480 }, { "epoch": 5.24, "learning_rate": 1.930148867313916e-05, "loss": 0.1012, "step": 13490 }, { "epoch": 5.24, "learning_rate": 1.930097087378641e-05, "loss": 0.1971, "step": 13500 }, { "epoch": 5.25, "learning_rate": 1.9300453074433658e-05, "loss": 0.1594, "step": 13510 }, { "epoch": 5.25, "learning_rate": 1.929993527508091e-05, "loss": 0.2138, "step": 13520 }, { "epoch": 5.25, "learning_rate": 1.9299417475728157e-05, "loss": 0.2599, "step": 13530 }, { "epoch": 5.26, "learning_rate": 1.9298899676375405e-05, "loss": 0.2212, "step": 13540 }, { "epoch": 5.26, "learning_rate": 1.9298381877022656e-05, "loss": 0.2012, "step": 13550 }, { "epoch": 5.27, "learning_rate": 1.9297864077669904e-05, "loss": 0.1309, "step": 13560 }, { "epoch": 5.27, "learning_rate": 1.9297346278317152e-05, "loss": 0.1861, "step": 13570 }, { "epoch": 5.27, "learning_rate": 1.9296828478964404e-05, "loss": 0.0331, "step": 13580 }, { "epoch": 5.28, "learning_rate": 1.929631067961165e-05, "loss": 0.3318, "step": 13590 }, { "epoch": 5.28, "learning_rate": 1.9295792880258903e-05, "loss": 0.1876, "step": 13600 }, { "epoch": 5.29, "learning_rate": 1.929527508090615e-05, "loss": 0.2995, "step": 13610 }, { "epoch": 5.29, "learning_rate": 1.92947572815534e-05, "loss": 0.2846, "step": 13620 }, { "epoch": 5.29, "learning_rate": 1.929423948220065e-05, "loss": 0.0782, "step": 13630 }, { "epoch": 5.3, "learning_rate": 1.9293721682847898e-05, "loss": 0.2103, "step": 13640 }, { "epoch": 5.3, "learning_rate": 1.9293203883495146e-05, "loss": 0.1982, "step": 13650 }, { "epoch": 5.3, "learning_rate": 1.9292686084142397e-05, "loss": 0.3472, "step": 13660 }, { "epoch": 5.31, "learning_rate": 1.9292168284789645e-05, "loss": 0.1286, "step": 13670 }, { "epoch": 5.31, "learning_rate": 1.9291650485436896e-05, "loss": 0.0243, "step": 13680 }, { "epoch": 5.32, "learning_rate": 1.9291132686084144e-05, "loss": 0.2426, "step": 13690 }, { "epoch": 5.32, "learning_rate": 1.9290614886731392e-05, "loss": 0.1475, "step": 13700 }, { "epoch": 5.32, "learning_rate": 1.9290097087378644e-05, "loss": 0.2549, "step": 13710 }, { "epoch": 5.33, "learning_rate": 1.928957928802589e-05, "loss": 0.1732, "step": 13720 }, { "epoch": 5.33, "learning_rate": 1.928906148867314e-05, "loss": 0.0929, "step": 13730 }, { "epoch": 5.34, "learning_rate": 1.928854368932039e-05, "loss": 0.2855, "step": 13740 }, { "epoch": 5.34, "learning_rate": 1.928802588996764e-05, "loss": 0.1721, "step": 13750 }, { "epoch": 5.34, "learning_rate": 1.9287508090614887e-05, "loss": 0.0967, "step": 13760 }, { "epoch": 5.35, "learning_rate": 1.9286990291262138e-05, "loss": 0.3465, "step": 13770 }, { "epoch": 5.35, "learning_rate": 1.9286472491909386e-05, "loss": 0.2041, "step": 13780 }, { "epoch": 5.36, "learning_rate": 1.9285954692556637e-05, "loss": 0.2375, "step": 13790 }, { "epoch": 5.36, "learning_rate": 1.9285436893203885e-05, "loss": 0.2217, "step": 13800 }, { "epoch": 5.36, "learning_rate": 1.9284919093851133e-05, "loss": 0.2053, "step": 13810 }, { "epoch": 5.37, "learning_rate": 1.9284401294498384e-05, "loss": 0.1924, "step": 13820 }, { "epoch": 5.37, "learning_rate": 1.9283883495145632e-05, "loss": 0.1181, "step": 13830 }, { "epoch": 5.37, "learning_rate": 1.928336569579288e-05, "loss": 0.1699, "step": 13840 }, { "epoch": 5.38, "learning_rate": 1.928284789644013e-05, "loss": 0.244, "step": 13850 }, { "epoch": 5.38, "learning_rate": 1.928233009708738e-05, "loss": 0.4094, "step": 13860 }, { "epoch": 5.39, "learning_rate": 1.928181229773463e-05, "loss": 0.2045, "step": 13870 }, { "epoch": 5.39, "learning_rate": 1.928129449838188e-05, "loss": 0.1492, "step": 13880 }, { "epoch": 5.39, "learning_rate": 1.9280776699029126e-05, "loss": 0.1771, "step": 13890 }, { "epoch": 5.4, "learning_rate": 1.9280258899676378e-05, "loss": 0.1408, "step": 13900 }, { "epoch": 5.4, "learning_rate": 1.9279741100323626e-05, "loss": 0.1234, "step": 13910 }, { "epoch": 5.41, "learning_rate": 1.9279223300970874e-05, "loss": 0.1192, "step": 13920 }, { "epoch": 5.41, "learning_rate": 1.9278705501618125e-05, "loss": 0.1041, "step": 13930 }, { "epoch": 5.41, "learning_rate": 1.9278187702265373e-05, "loss": 0.1082, "step": 13940 }, { "epoch": 5.42, "learning_rate": 1.9277669902912624e-05, "loss": 0.0488, "step": 13950 }, { "epoch": 5.42, "learning_rate": 1.9277152103559872e-05, "loss": 0.0322, "step": 13960 }, { "epoch": 5.43, "learning_rate": 1.927663430420712e-05, "loss": 0.1699, "step": 13970 }, { "epoch": 5.43, "learning_rate": 1.927611650485437e-05, "loss": 0.125, "step": 13980 }, { "epoch": 5.43, "learning_rate": 1.927559870550162e-05, "loss": 0.3023, "step": 13990 }, { "epoch": 5.44, "learning_rate": 1.9275080906148867e-05, "loss": 0.1151, "step": 14000 }, { "epoch": 5.44, "learning_rate": 1.927456310679612e-05, "loss": 0.1746, "step": 14010 }, { "epoch": 5.44, "learning_rate": 1.9274045307443366e-05, "loss": 0.1369, "step": 14020 }, { "epoch": 5.45, "learning_rate": 1.9273527508090618e-05, "loss": 0.2252, "step": 14030 }, { "epoch": 5.45, "learning_rate": 1.9273009708737866e-05, "loss": 0.2103, "step": 14040 }, { "epoch": 5.46, "learning_rate": 1.9272491909385117e-05, "loss": 0.1524, "step": 14050 }, { "epoch": 5.46, "learning_rate": 1.927197411003236e-05, "loss": 0.3122, "step": 14060 }, { "epoch": 5.46, "learning_rate": 1.9271456310679613e-05, "loss": 0.132, "step": 14070 }, { "epoch": 5.47, "learning_rate": 1.927093851132686e-05, "loss": 0.3348, "step": 14080 }, { "epoch": 5.47, "learning_rate": 1.9270420711974112e-05, "loss": 0.088, "step": 14090 }, { "epoch": 5.48, "learning_rate": 1.926990291262136e-05, "loss": 0.1786, "step": 14100 }, { "epoch": 5.48, "learning_rate": 1.926938511326861e-05, "loss": 0.1415, "step": 14110 }, { "epoch": 5.48, "learning_rate": 1.926886731391586e-05, "loss": 0.4074, "step": 14120 }, { "epoch": 5.49, "learning_rate": 1.926834951456311e-05, "loss": 0.1725, "step": 14130 }, { "epoch": 5.49, "learning_rate": 1.9267831715210355e-05, "loss": 0.0728, "step": 14140 }, { "epoch": 5.5, "learning_rate": 1.9267313915857606e-05, "loss": 0.0673, "step": 14150 }, { "epoch": 5.5, "learning_rate": 1.9266796116504854e-05, "loss": 0.2062, "step": 14160 }, { "epoch": 5.5, "learning_rate": 1.9266278317152106e-05, "loss": 0.117, "step": 14170 }, { "epoch": 5.51, "learning_rate": 1.9265760517799354e-05, "loss": 0.2056, "step": 14180 }, { "epoch": 5.51, "learning_rate": 1.9265242718446605e-05, "loss": 0.1338, "step": 14190 }, { "epoch": 5.51, "learning_rate": 1.9264724919093853e-05, "loss": 0.2366, "step": 14200 }, { "epoch": 5.52, "learning_rate": 1.9264207119741104e-05, "loss": 0.1643, "step": 14210 }, { "epoch": 5.52, "learning_rate": 1.926368932038835e-05, "loss": 0.2852, "step": 14220 }, { "epoch": 5.53, "learning_rate": 1.92631715210356e-05, "loss": 0.3748, "step": 14230 }, { "epoch": 5.53, "learning_rate": 1.9262653721682848e-05, "loss": 0.3026, "step": 14240 }, { "epoch": 5.53, "learning_rate": 1.92621359223301e-05, "loss": 0.3083, "step": 14250 }, { "epoch": 5.54, "learning_rate": 1.9261618122977347e-05, "loss": 0.2295, "step": 14260 }, { "epoch": 5.54, "learning_rate": 1.92611003236246e-05, "loss": 0.146, "step": 14270 }, { "epoch": 5.55, "learning_rate": 1.9260582524271846e-05, "loss": 0.395, "step": 14280 }, { "epoch": 5.55, "learning_rate": 1.9260064724919098e-05, "loss": 0.1961, "step": 14290 }, { "epoch": 5.55, "learning_rate": 1.9259546925566342e-05, "loss": 0.0909, "step": 14300 }, { "epoch": 5.56, "learning_rate": 1.9259029126213593e-05, "loss": 0.1856, "step": 14310 }, { "epoch": 5.56, "learning_rate": 1.925851132686084e-05, "loss": 0.0965, "step": 14320 }, { "epoch": 5.57, "learning_rate": 1.9257993527508093e-05, "loss": 0.2034, "step": 14330 }, { "epoch": 5.57, "learning_rate": 1.925747572815534e-05, "loss": 0.2148, "step": 14340 }, { "epoch": 5.57, "learning_rate": 1.9256957928802592e-05, "loss": 0.0668, "step": 14350 }, { "epoch": 5.58, "learning_rate": 1.925644012944984e-05, "loss": 0.1503, "step": 14360 }, { "epoch": 5.58, "learning_rate": 1.925592233009709e-05, "loss": 0.109, "step": 14370 }, { "epoch": 5.58, "learning_rate": 1.9255404530744336e-05, "loss": 0.1229, "step": 14380 }, { "epoch": 5.59, "learning_rate": 1.9254886731391587e-05, "loss": 0.2157, "step": 14390 }, { "epoch": 5.59, "learning_rate": 1.9254368932038835e-05, "loss": 0.1816, "step": 14400 }, { "epoch": 5.6, "learning_rate": 1.9253851132686086e-05, "loss": 0.1208, "step": 14410 }, { "epoch": 5.6, "learning_rate": 1.9253333333333334e-05, "loss": 0.0814, "step": 14420 }, { "epoch": 5.6, "learning_rate": 1.9252815533980586e-05, "loss": 0.131, "step": 14430 }, { "epoch": 5.61, "learning_rate": 1.9252297734627833e-05, "loss": 0.0829, "step": 14440 }, { "epoch": 5.61, "learning_rate": 1.9251779935275085e-05, "loss": 0.3475, "step": 14450 }, { "epoch": 5.62, "learning_rate": 1.925126213592233e-05, "loss": 0.2335, "step": 14460 }, { "epoch": 5.62, "learning_rate": 1.925074433656958e-05, "loss": 0.2357, "step": 14470 }, { "epoch": 5.62, "learning_rate": 1.925022653721683e-05, "loss": 0.1109, "step": 14480 }, { "epoch": 5.63, "learning_rate": 1.924970873786408e-05, "loss": 0.1486, "step": 14490 }, { "epoch": 5.63, "learning_rate": 1.9249190938511328e-05, "loss": 0.155, "step": 14500 }, { "epoch": 5.63, "learning_rate": 1.924867313915858e-05, "loss": 0.2239, "step": 14510 }, { "epoch": 5.64, "learning_rate": 1.9248155339805827e-05, "loss": 0.3423, "step": 14520 }, { "epoch": 5.64, "learning_rate": 1.924763754045308e-05, "loss": 0.0854, "step": 14530 }, { "epoch": 5.65, "learning_rate": 1.9247119741100323e-05, "loss": 0.3277, "step": 14540 }, { "epoch": 5.65, "learning_rate": 1.9246601941747574e-05, "loss": 0.2824, "step": 14550 }, { "epoch": 5.65, "learning_rate": 1.9246084142394822e-05, "loss": 0.2528, "step": 14560 }, { "epoch": 5.66, "learning_rate": 1.9245566343042073e-05, "loss": 0.1082, "step": 14570 }, { "epoch": 5.66, "learning_rate": 1.924504854368932e-05, "loss": 0.2426, "step": 14580 }, { "epoch": 5.67, "learning_rate": 1.9244530744336573e-05, "loss": 0.2329, "step": 14590 }, { "epoch": 5.67, "learning_rate": 1.924401294498382e-05, "loss": 0.2598, "step": 14600 }, { "epoch": 5.67, "learning_rate": 1.9243495145631072e-05, "loss": 0.3209, "step": 14610 }, { "epoch": 5.68, "learning_rate": 1.924297734627832e-05, "loss": 0.2159, "step": 14620 }, { "epoch": 5.68, "learning_rate": 1.9242459546925568e-05, "loss": 0.1869, "step": 14630 }, { "epoch": 5.69, "learning_rate": 1.9241941747572816e-05, "loss": 0.349, "step": 14640 }, { "epoch": 5.69, "learning_rate": 1.9241423948220067e-05, "loss": 0.0921, "step": 14650 }, { "epoch": 5.69, "learning_rate": 1.9240906148867315e-05, "loss": 0.2549, "step": 14660 }, { "epoch": 5.7, "learning_rate": 1.9240388349514566e-05, "loss": 0.3339, "step": 14670 }, { "epoch": 5.7, "learning_rate": 1.9239870550161814e-05, "loss": 0.3539, "step": 14680 }, { "epoch": 5.7, "learning_rate": 1.9239352750809065e-05, "loss": 0.1854, "step": 14690 }, { "epoch": 5.71, "learning_rate": 1.9238834951456313e-05, "loss": 0.1628, "step": 14700 }, { "epoch": 5.71, "learning_rate": 1.923831715210356e-05, "loss": 0.1387, "step": 14710 }, { "epoch": 5.72, "learning_rate": 1.923779935275081e-05, "loss": 0.2845, "step": 14720 }, { "epoch": 5.72, "learning_rate": 1.923728155339806e-05, "loss": 0.148, "step": 14730 }, { "epoch": 5.72, "learning_rate": 1.923676375404531e-05, "loss": 0.2681, "step": 14740 }, { "epoch": 5.73, "learning_rate": 1.923624595469256e-05, "loss": 0.1988, "step": 14750 }, { "epoch": 5.73, "learning_rate": 1.9235728155339808e-05, "loss": 0.1291, "step": 14760 }, { "epoch": 5.74, "learning_rate": 1.923521035598706e-05, "loss": 0.1396, "step": 14770 }, { "epoch": 5.74, "learning_rate": 1.9234692556634307e-05, "loss": 0.3225, "step": 14780 }, { "epoch": 5.74, "learning_rate": 1.9234174757281555e-05, "loss": 0.1079, "step": 14790 }, { "epoch": 5.75, "learning_rate": 1.9233656957928803e-05, "loss": 0.2939, "step": 14800 }, { "epoch": 5.75, "learning_rate": 1.9233139158576054e-05, "loss": 0.1918, "step": 14810 }, { "epoch": 5.76, "learning_rate": 1.9232621359223302e-05, "loss": 0.1802, "step": 14820 }, { "epoch": 5.76, "learning_rate": 1.9232103559870553e-05, "loss": 0.2616, "step": 14830 }, { "epoch": 5.76, "learning_rate": 1.92315857605178e-05, "loss": 0.1443, "step": 14840 }, { "epoch": 5.77, "learning_rate": 1.923106796116505e-05, "loss": 0.1951, "step": 14850 }, { "epoch": 5.77, "learning_rate": 1.92305501618123e-05, "loss": 0.1339, "step": 14860 }, { "epoch": 5.77, "learning_rate": 1.923003236245955e-05, "loss": 0.2865, "step": 14870 }, { "epoch": 5.78, "learning_rate": 1.9229514563106796e-05, "loss": 0.4193, "step": 14880 }, { "epoch": 5.78, "learning_rate": 1.9228996763754048e-05, "loss": 0.0555, "step": 14890 }, { "epoch": 5.79, "learning_rate": 1.9228478964401296e-05, "loss": 0.3844, "step": 14900 }, { "epoch": 5.79, "learning_rate": 1.9227961165048547e-05, "loss": 0.1302, "step": 14910 }, { "epoch": 5.79, "learning_rate": 1.9227443365695795e-05, "loss": 0.1897, "step": 14920 }, { "epoch": 5.8, "learning_rate": 1.9226925566343043e-05, "loss": 0.0411, "step": 14930 }, { "epoch": 5.8, "learning_rate": 1.9226407766990294e-05, "loss": 0.1769, "step": 14940 }, { "epoch": 5.81, "learning_rate": 1.9225889967637542e-05, "loss": 0.3511, "step": 14950 }, { "epoch": 5.81, "learning_rate": 1.922537216828479e-05, "loss": 0.2955, "step": 14960 }, { "epoch": 5.81, "learning_rate": 1.922485436893204e-05, "loss": 0.1421, "step": 14970 }, { "epoch": 5.82, "learning_rate": 1.922433656957929e-05, "loss": 0.1483, "step": 14980 }, { "epoch": 5.82, "learning_rate": 1.922381877022654e-05, "loss": 0.1852, "step": 14990 }, { "epoch": 5.83, "learning_rate": 1.922330097087379e-05, "loss": 0.0685, "step": 15000 }, { "epoch": 5.83, "learning_rate": 1.9222783171521036e-05, "loss": 0.2201, "step": 15010 }, { "epoch": 5.83, "learning_rate": 1.9222265372168288e-05, "loss": 0.1539, "step": 15020 }, { "epoch": 5.84, "learning_rate": 1.9221747572815536e-05, "loss": 0.1496, "step": 15030 }, { "epoch": 5.84, "learning_rate": 1.9221229773462783e-05, "loss": 0.1749, "step": 15040 }, { "epoch": 5.84, "learning_rate": 1.9220711974110035e-05, "loss": 0.1988, "step": 15050 }, { "epoch": 5.85, "learning_rate": 1.9220194174757283e-05, "loss": 0.2328, "step": 15060 }, { "epoch": 5.85, "learning_rate": 1.9219676375404534e-05, "loss": 0.1989, "step": 15070 }, { "epoch": 5.86, "learning_rate": 1.9219158576051782e-05, "loss": 0.1949, "step": 15080 }, { "epoch": 5.86, "learning_rate": 1.921864077669903e-05, "loss": 0.1698, "step": 15090 }, { "epoch": 5.86, "learning_rate": 1.921812297734628e-05, "loss": 0.1237, "step": 15100 }, { "epoch": 5.87, "learning_rate": 1.921760517799353e-05, "loss": 0.0926, "step": 15110 }, { "epoch": 5.87, "learning_rate": 1.9217087378640777e-05, "loss": 0.3122, "step": 15120 }, { "epoch": 5.88, "learning_rate": 1.9216569579288028e-05, "loss": 0.3145, "step": 15130 }, { "epoch": 5.88, "learning_rate": 1.9216051779935276e-05, "loss": 0.0698, "step": 15140 }, { "epoch": 5.88, "learning_rate": 1.9215533980582528e-05, "loss": 0.0747, "step": 15150 }, { "epoch": 5.89, "learning_rate": 1.9215016181229775e-05, "loss": 0.1761, "step": 15160 }, { "epoch": 5.89, "learning_rate": 1.9214498381877023e-05, "loss": 0.4981, "step": 15170 }, { "epoch": 5.9, "learning_rate": 1.9213980582524275e-05, "loss": 0.1832, "step": 15180 }, { "epoch": 5.9, "learning_rate": 1.9213462783171523e-05, "loss": 0.2347, "step": 15190 }, { "epoch": 5.9, "learning_rate": 1.921294498381877e-05, "loss": 0.0525, "step": 15200 }, { "epoch": 5.91, "learning_rate": 1.9212427184466022e-05, "loss": 0.2044, "step": 15210 }, { "epoch": 5.91, "learning_rate": 1.921190938511327e-05, "loss": 0.2739, "step": 15220 }, { "epoch": 5.91, "learning_rate": 1.9211391585760518e-05, "loss": 0.0824, "step": 15230 }, { "epoch": 5.92, "learning_rate": 1.921087378640777e-05, "loss": 0.058, "step": 15240 }, { "epoch": 5.92, "learning_rate": 1.9210355987055017e-05, "loss": 0.0653, "step": 15250 }, { "epoch": 5.93, "learning_rate": 1.9209838187702268e-05, "loss": 0.1564, "step": 15260 }, { "epoch": 5.93, "learning_rate": 1.9209320388349516e-05, "loss": 0.4103, "step": 15270 }, { "epoch": 5.93, "learning_rate": 1.9208802588996764e-05, "loss": 0.2333, "step": 15280 }, { "epoch": 5.94, "learning_rate": 1.9208284789644015e-05, "loss": 0.0356, "step": 15290 }, { "epoch": 5.94, "learning_rate": 1.9207766990291263e-05, "loss": 0.1436, "step": 15300 }, { "epoch": 5.95, "learning_rate": 1.920724919093851e-05, "loss": 0.3055, "step": 15310 }, { "epoch": 5.95, "learning_rate": 1.9206731391585763e-05, "loss": 0.1501, "step": 15320 }, { "epoch": 5.95, "learning_rate": 1.920621359223301e-05, "loss": 0.4844, "step": 15330 }, { "epoch": 5.96, "learning_rate": 1.9205695792880262e-05, "loss": 0.077, "step": 15340 }, { "epoch": 5.96, "learning_rate": 1.920517799352751e-05, "loss": 0.1104, "step": 15350 }, { "epoch": 5.97, "learning_rate": 1.9204660194174758e-05, "loss": 0.0874, "step": 15360 }, { "epoch": 5.97, "learning_rate": 1.920414239482201e-05, "loss": 0.0952, "step": 15370 }, { "epoch": 5.97, "learning_rate": 1.9203624595469257e-05, "loss": 0.2246, "step": 15380 }, { "epoch": 5.98, "learning_rate": 1.9203106796116505e-05, "loss": 0.1307, "step": 15390 }, { "epoch": 5.98, "learning_rate": 1.9202588996763756e-05, "loss": 0.3018, "step": 15400 }, { "epoch": 5.98, "learning_rate": 1.9202071197411004e-05, "loss": 0.1101, "step": 15410 }, { "epoch": 5.99, "learning_rate": 1.9201553398058255e-05, "loss": 0.1423, "step": 15420 }, { "epoch": 5.99, "learning_rate": 1.9201035598705503e-05, "loss": 0.2311, "step": 15430 }, { "epoch": 6.0, "learning_rate": 1.920051779935275e-05, "loss": 0.1141, "step": 15440 }, { "epoch": 6.0, "learning_rate": 1.9200000000000003e-05, "loss": 0.078, "step": 15450 }, { "epoch": 6.0, "eval_accuracy": 0.9515818431911967, "eval_loss": 0.20075452327728271, "eval_runtime": 8.2494, "eval_samples_per_second": 440.638, "eval_steps_per_second": 55.156, "step": 15450 }, { "epoch": 6.0, "learning_rate": 1.919948220064725e-05, "loss": 0.3557, "step": 15460 }, { "epoch": 6.01, "learning_rate": 1.91989644012945e-05, "loss": 0.0501, "step": 15470 }, { "epoch": 6.01, "learning_rate": 1.919844660194175e-05, "loss": 0.1672, "step": 15480 }, { "epoch": 6.02, "learning_rate": 1.9197928802588998e-05, "loss": 0.1336, "step": 15490 }, { "epoch": 6.02, "learning_rate": 1.919741100323625e-05, "loss": 0.2093, "step": 15500 }, { "epoch": 6.02, "learning_rate": 1.9196893203883497e-05, "loss": 0.1891, "step": 15510 }, { "epoch": 6.03, "learning_rate": 1.9196375404530745e-05, "loss": 0.2799, "step": 15520 }, { "epoch": 6.03, "learning_rate": 1.9195857605177993e-05, "loss": 0.1978, "step": 15530 }, { "epoch": 6.03, "learning_rate": 1.9195339805825244e-05, "loss": 0.2252, "step": 15540 }, { "epoch": 6.04, "learning_rate": 1.9194822006472492e-05, "loss": 0.1, "step": 15550 }, { "epoch": 6.04, "learning_rate": 1.9194304207119743e-05, "loss": 0.1596, "step": 15560 }, { "epoch": 6.05, "learning_rate": 1.919378640776699e-05, "loss": 0.1937, "step": 15570 }, { "epoch": 6.05, "learning_rate": 1.9193268608414242e-05, "loss": 0.1196, "step": 15580 }, { "epoch": 6.05, "learning_rate": 1.919275080906149e-05, "loss": 0.1603, "step": 15590 }, { "epoch": 6.06, "learning_rate": 1.919223300970874e-05, "loss": 0.0718, "step": 15600 }, { "epoch": 6.06, "learning_rate": 1.9191715210355986e-05, "loss": 0.1527, "step": 15610 }, { "epoch": 6.07, "learning_rate": 1.9191197411003238e-05, "loss": 0.0953, "step": 15620 }, { "epoch": 6.07, "learning_rate": 1.9190679611650485e-05, "loss": 0.1299, "step": 15630 }, { "epoch": 6.07, "learning_rate": 1.9190161812297737e-05, "loss": 0.2235, "step": 15640 }, { "epoch": 6.08, "learning_rate": 1.9189644012944985e-05, "loss": 0.1687, "step": 15650 }, { "epoch": 6.08, "learning_rate": 1.9189126213592236e-05, "loss": 0.2346, "step": 15660 }, { "epoch": 6.09, "learning_rate": 1.9188608414239484e-05, "loss": 0.3069, "step": 15670 }, { "epoch": 6.09, "learning_rate": 1.9188090614886732e-05, "loss": 0.3043, "step": 15680 }, { "epoch": 6.09, "learning_rate": 1.918757281553398e-05, "loss": 0.2171, "step": 15690 }, { "epoch": 6.1, "learning_rate": 1.918705501618123e-05, "loss": 0.1069, "step": 15700 }, { "epoch": 6.1, "learning_rate": 1.918653721682848e-05, "loss": 0.0284, "step": 15710 }, { "epoch": 6.1, "learning_rate": 1.918601941747573e-05, "loss": 0.162, "step": 15720 }, { "epoch": 6.11, "learning_rate": 1.9185501618122978e-05, "loss": 0.2666, "step": 15730 }, { "epoch": 6.11, "learning_rate": 1.918498381877023e-05, "loss": 0.1653, "step": 15740 }, { "epoch": 6.12, "learning_rate": 1.9184466019417478e-05, "loss": 0.1882, "step": 15750 }, { "epoch": 6.12, "learning_rate": 1.918394822006473e-05, "loss": 0.1736, "step": 15760 }, { "epoch": 6.12, "learning_rate": 1.9183430420711973e-05, "loss": 0.1771, "step": 15770 }, { "epoch": 6.13, "learning_rate": 1.9182912621359225e-05, "loss": 0.194, "step": 15780 }, { "epoch": 6.13, "learning_rate": 1.9182394822006473e-05, "loss": 0.1675, "step": 15790 }, { "epoch": 6.14, "learning_rate": 1.9181877022653724e-05, "loss": 0.135, "step": 15800 }, { "epoch": 6.14, "learning_rate": 1.9181359223300972e-05, "loss": 0.1382, "step": 15810 }, { "epoch": 6.14, "learning_rate": 1.9180841423948223e-05, "loss": 0.246, "step": 15820 }, { "epoch": 6.15, "learning_rate": 1.918032362459547e-05, "loss": 0.119, "step": 15830 }, { "epoch": 6.15, "learning_rate": 1.9179805825242722e-05, "loss": 0.1854, "step": 15840 }, { "epoch": 6.16, "learning_rate": 1.9179288025889967e-05, "loss": 0.019, "step": 15850 }, { "epoch": 6.16, "learning_rate": 1.9178770226537218e-05, "loss": 0.2809, "step": 15860 }, { "epoch": 6.16, "learning_rate": 1.9178252427184466e-05, "loss": 0.1841, "step": 15870 }, { "epoch": 6.17, "learning_rate": 1.9177734627831717e-05, "loss": 0.4664, "step": 15880 }, { "epoch": 6.17, "learning_rate": 1.9177216828478965e-05, "loss": 0.2266, "step": 15890 }, { "epoch": 6.17, "learning_rate": 1.9176699029126217e-05, "loss": 0.1631, "step": 15900 }, { "epoch": 6.18, "learning_rate": 1.9176181229773465e-05, "loss": 0.2704, "step": 15910 }, { "epoch": 6.18, "learning_rate": 1.9175663430420716e-05, "loss": 0.2586, "step": 15920 }, { "epoch": 6.19, "learning_rate": 1.917514563106796e-05, "loss": 0.0879, "step": 15930 }, { "epoch": 6.19, "learning_rate": 1.9174627831715212e-05, "loss": 0.2178, "step": 15940 }, { "epoch": 6.19, "learning_rate": 1.917411003236246e-05, "loss": 0.1129, "step": 15950 }, { "epoch": 6.2, "learning_rate": 1.917359223300971e-05, "loss": 0.161, "step": 15960 }, { "epoch": 6.2, "learning_rate": 1.917307443365696e-05, "loss": 0.0851, "step": 15970 }, { "epoch": 6.21, "learning_rate": 1.917255663430421e-05, "loss": 0.1581, "step": 15980 }, { "epoch": 6.21, "learning_rate": 1.9172038834951458e-05, "loss": 0.201, "step": 15990 }, { "epoch": 6.21, "learning_rate": 1.917152103559871e-05, "loss": 0.0281, "step": 16000 }, { "epoch": 6.22, "learning_rate": 1.9171003236245954e-05, "loss": 0.1721, "step": 16010 }, { "epoch": 6.22, "learning_rate": 1.9170485436893205e-05, "loss": 0.271, "step": 16020 }, { "epoch": 6.23, "learning_rate": 1.9169967637540453e-05, "loss": 0.2183, "step": 16030 }, { "epoch": 6.23, "learning_rate": 1.9169449838187705e-05, "loss": 0.1707, "step": 16040 }, { "epoch": 6.23, "learning_rate": 1.9168932038834952e-05, "loss": 0.1961, "step": 16050 }, { "epoch": 6.24, "learning_rate": 1.9168414239482204e-05, "loss": 0.1559, "step": 16060 }, { "epoch": 6.24, "learning_rate": 1.9167896440129452e-05, "loss": 0.1369, "step": 16070 }, { "epoch": 6.24, "learning_rate": 1.9167378640776703e-05, "loss": 0.2759, "step": 16080 }, { "epoch": 6.25, "learning_rate": 1.9166860841423948e-05, "loss": 0.2847, "step": 16090 }, { "epoch": 6.25, "learning_rate": 1.91663430420712e-05, "loss": 0.1215, "step": 16100 }, { "epoch": 6.26, "learning_rate": 1.9165825242718447e-05, "loss": 0.1622, "step": 16110 }, { "epoch": 6.26, "learning_rate": 1.9165307443365698e-05, "loss": 0.196, "step": 16120 }, { "epoch": 6.26, "learning_rate": 1.9164789644012946e-05, "loss": 0.2124, "step": 16130 }, { "epoch": 6.27, "learning_rate": 1.9164271844660197e-05, "loss": 0.08, "step": 16140 }, { "epoch": 6.27, "learning_rate": 1.9163754045307445e-05, "loss": 0.0445, "step": 16150 }, { "epoch": 6.28, "learning_rate": 1.9163236245954697e-05, "loss": 0.1426, "step": 16160 }, { "epoch": 6.28, "learning_rate": 1.916271844660194e-05, "loss": 0.1171, "step": 16170 }, { "epoch": 6.28, "learning_rate": 1.9162200647249192e-05, "loss": 0.174, "step": 16180 }, { "epoch": 6.29, "learning_rate": 1.916168284789644e-05, "loss": 0.2205, "step": 16190 }, { "epoch": 6.29, "learning_rate": 1.916116504854369e-05, "loss": 0.2687, "step": 16200 }, { "epoch": 6.3, "learning_rate": 1.916064724919094e-05, "loss": 0.3148, "step": 16210 }, { "epoch": 6.3, "learning_rate": 1.916012944983819e-05, "loss": 0.3026, "step": 16220 }, { "epoch": 6.3, "learning_rate": 1.915961165048544e-05, "loss": 0.234, "step": 16230 }, { "epoch": 6.31, "learning_rate": 1.915909385113269e-05, "loss": 0.2651, "step": 16240 }, { "epoch": 6.31, "learning_rate": 1.9158576051779935e-05, "loss": 0.0716, "step": 16250 }, { "epoch": 6.31, "learning_rate": 1.9158058252427186e-05, "loss": 0.3009, "step": 16260 }, { "epoch": 6.32, "learning_rate": 1.9157540453074434e-05, "loss": 0.1907, "step": 16270 }, { "epoch": 6.32, "learning_rate": 1.9157022653721685e-05, "loss": 0.0772, "step": 16280 }, { "epoch": 6.33, "learning_rate": 1.9156504854368933e-05, "loss": 0.1022, "step": 16290 }, { "epoch": 6.33, "learning_rate": 1.9155987055016184e-05, "loss": 0.0499, "step": 16300 }, { "epoch": 6.33, "learning_rate": 1.9155469255663432e-05, "loss": 0.0518, "step": 16310 }, { "epoch": 6.34, "learning_rate": 1.915495145631068e-05, "loss": 0.1924, "step": 16320 }, { "epoch": 6.34, "learning_rate": 1.915443365695793e-05, "loss": 0.2682, "step": 16330 }, { "epoch": 6.35, "learning_rate": 1.915391585760518e-05, "loss": 0.0451, "step": 16340 }, { "epoch": 6.35, "learning_rate": 1.9153398058252427e-05, "loss": 0.2326, "step": 16350 }, { "epoch": 6.35, "learning_rate": 1.915288025889968e-05, "loss": 0.2737, "step": 16360 }, { "epoch": 6.36, "learning_rate": 1.9152362459546927e-05, "loss": 0.3229, "step": 16370 }, { "epoch": 6.36, "learning_rate": 1.9151844660194178e-05, "loss": 0.3361, "step": 16380 }, { "epoch": 6.37, "learning_rate": 1.9151326860841426e-05, "loss": 0.2134, "step": 16390 }, { "epoch": 6.37, "learning_rate": 1.9150809061488674e-05, "loss": 0.0914, "step": 16400 }, { "epoch": 6.37, "learning_rate": 1.9150291262135925e-05, "loss": 0.0745, "step": 16410 }, { "epoch": 6.38, "learning_rate": 1.9149773462783173e-05, "loss": 0.4557, "step": 16420 }, { "epoch": 6.38, "learning_rate": 1.914925566343042e-05, "loss": 0.0096, "step": 16430 }, { "epoch": 6.38, "learning_rate": 1.9148737864077672e-05, "loss": 0.1587, "step": 16440 }, { "epoch": 6.39, "learning_rate": 1.914822006472492e-05, "loss": 0.2632, "step": 16450 }, { "epoch": 6.39, "learning_rate": 1.914770226537217e-05, "loss": 0.1912, "step": 16460 }, { "epoch": 6.4, "learning_rate": 1.914718446601942e-05, "loss": 0.2155, "step": 16470 }, { "epoch": 6.4, "learning_rate": 1.9146666666666667e-05, "loss": 0.2165, "step": 16480 }, { "epoch": 6.4, "learning_rate": 1.914614886731392e-05, "loss": 0.1098, "step": 16490 }, { "epoch": 6.41, "learning_rate": 1.9145631067961167e-05, "loss": 0.151, "step": 16500 }, { "epoch": 6.41, "learning_rate": 1.9145113268608415e-05, "loss": 0.1824, "step": 16510 }, { "epoch": 6.42, "learning_rate": 1.9144595469255666e-05, "loss": 0.1634, "step": 16520 }, { "epoch": 6.42, "learning_rate": 1.9144077669902914e-05, "loss": 0.2347, "step": 16530 }, { "epoch": 6.42, "learning_rate": 1.9143559870550165e-05, "loss": 0.2851, "step": 16540 }, { "epoch": 6.43, "learning_rate": 1.9143042071197413e-05, "loss": 0.1948, "step": 16550 }, { "epoch": 6.43, "learning_rate": 1.914252427184466e-05, "loss": 0.0837, "step": 16560 }, { "epoch": 6.43, "learning_rate": 1.9142006472491912e-05, "loss": 0.2453, "step": 16570 }, { "epoch": 6.44, "learning_rate": 1.914148867313916e-05, "loss": 0.137, "step": 16580 }, { "epoch": 6.44, "learning_rate": 1.9140970873786408e-05, "loss": 0.1914, "step": 16590 }, { "epoch": 6.45, "learning_rate": 1.914045307443366e-05, "loss": 0.2187, "step": 16600 }, { "epoch": 6.45, "learning_rate": 1.9139935275080907e-05, "loss": 0.1347, "step": 16610 }, { "epoch": 6.45, "learning_rate": 1.9139417475728155e-05, "loss": 0.1845, "step": 16620 }, { "epoch": 6.46, "learning_rate": 1.9138899676375407e-05, "loss": 0.1783, "step": 16630 }, { "epoch": 6.46, "learning_rate": 1.9138381877022655e-05, "loss": 0.0791, "step": 16640 }, { "epoch": 6.47, "learning_rate": 1.9137864077669906e-05, "loss": 0.2269, "step": 16650 }, { "epoch": 6.47, "learning_rate": 1.9137346278317154e-05, "loss": 0.1276, "step": 16660 }, { "epoch": 6.47, "learning_rate": 1.9136828478964402e-05, "loss": 0.22, "step": 16670 }, { "epoch": 6.48, "learning_rate": 1.9136310679611653e-05, "loss": 0.1866, "step": 16680 }, { "epoch": 6.48, "learning_rate": 1.91357928802589e-05, "loss": 0.1774, "step": 16690 }, { "epoch": 6.49, "learning_rate": 1.913527508090615e-05, "loss": 0.0928, "step": 16700 }, { "epoch": 6.49, "learning_rate": 1.91347572815534e-05, "loss": 0.1171, "step": 16710 }, { "epoch": 6.49, "learning_rate": 1.9134239482200648e-05, "loss": 0.1176, "step": 16720 }, { "epoch": 6.5, "learning_rate": 1.91337216828479e-05, "loss": 0.1518, "step": 16730 }, { "epoch": 6.5, "learning_rate": 1.9133203883495147e-05, "loss": 0.1251, "step": 16740 }, { "epoch": 6.5, "learning_rate": 1.9132686084142395e-05, "loss": 0.328, "step": 16750 }, { "epoch": 6.51, "learning_rate": 1.9132168284789647e-05, "loss": 0.2399, "step": 16760 }, { "epoch": 6.51, "learning_rate": 1.9131650485436895e-05, "loss": 0.0783, "step": 16770 }, { "epoch": 6.52, "learning_rate": 1.9131132686084142e-05, "loss": 0.1425, "step": 16780 }, { "epoch": 6.52, "learning_rate": 1.9130614886731394e-05, "loss": 0.1957, "step": 16790 }, { "epoch": 6.52, "learning_rate": 1.913009708737864e-05, "loss": 0.3248, "step": 16800 }, { "epoch": 6.53, "learning_rate": 1.9129579288025893e-05, "loss": 0.3196, "step": 16810 }, { "epoch": 6.53, "learning_rate": 1.912906148867314e-05, "loss": 0.4002, "step": 16820 }, { "epoch": 6.54, "learning_rate": 1.912854368932039e-05, "loss": 0.0971, "step": 16830 }, { "epoch": 6.54, "learning_rate": 1.912802588996764e-05, "loss": 0.203, "step": 16840 }, { "epoch": 6.54, "learning_rate": 1.9127508090614888e-05, "loss": 0.0624, "step": 16850 }, { "epoch": 6.55, "learning_rate": 1.9126990291262136e-05, "loss": 0.2052, "step": 16860 }, { "epoch": 6.55, "learning_rate": 1.9126472491909387e-05, "loss": 0.0696, "step": 16870 }, { "epoch": 6.56, "learning_rate": 1.9125954692556635e-05, "loss": 0.1223, "step": 16880 }, { "epoch": 6.56, "learning_rate": 1.9125436893203887e-05, "loss": 0.0699, "step": 16890 }, { "epoch": 6.56, "learning_rate": 1.9124919093851134e-05, "loss": 0.1825, "step": 16900 }, { "epoch": 6.57, "learning_rate": 1.9124401294498382e-05, "loss": 0.0294, "step": 16910 }, { "epoch": 6.57, "learning_rate": 1.9123883495145634e-05, "loss": 0.0947, "step": 16920 }, { "epoch": 6.57, "learning_rate": 1.912336569579288e-05, "loss": 0.1121, "step": 16930 }, { "epoch": 6.58, "learning_rate": 1.912284789644013e-05, "loss": 0.127, "step": 16940 }, { "epoch": 6.58, "learning_rate": 1.912233009708738e-05, "loss": 0.2293, "step": 16950 }, { "epoch": 6.59, "learning_rate": 1.912181229773463e-05, "loss": 0.1297, "step": 16960 }, { "epoch": 6.59, "learning_rate": 1.912129449838188e-05, "loss": 0.122, "step": 16970 }, { "epoch": 6.59, "learning_rate": 1.9120776699029128e-05, "loss": 0.1205, "step": 16980 }, { "epoch": 6.6, "learning_rate": 1.9120258899676376e-05, "loss": 0.3043, "step": 16990 }, { "epoch": 6.6, "learning_rate": 1.9119741100323624e-05, "loss": 0.158, "step": 17000 }, { "epoch": 6.61, "learning_rate": 1.9119223300970875e-05, "loss": 0.0832, "step": 17010 }, { "epoch": 6.61, "learning_rate": 1.9118705501618123e-05, "loss": 0.1239, "step": 17020 }, { "epoch": 6.61, "learning_rate": 1.9118187702265374e-05, "loss": 0.1316, "step": 17030 }, { "epoch": 6.62, "learning_rate": 1.9117669902912622e-05, "loss": 0.2139, "step": 17040 }, { "epoch": 6.62, "learning_rate": 1.9117152103559874e-05, "loss": 0.2754, "step": 17050 }, { "epoch": 6.63, "learning_rate": 1.911663430420712e-05, "loss": 0.1551, "step": 17060 }, { "epoch": 6.63, "learning_rate": 1.911611650485437e-05, "loss": 0.2108, "step": 17070 }, { "epoch": 6.63, "learning_rate": 1.9115598705501617e-05, "loss": 0.1862, "step": 17080 }, { "epoch": 6.64, "learning_rate": 1.911508090614887e-05, "loss": 0.1362, "step": 17090 }, { "epoch": 6.64, "learning_rate": 1.9114563106796117e-05, "loss": 0.3145, "step": 17100 }, { "epoch": 6.64, "learning_rate": 1.9114045307443368e-05, "loss": 0.3299, "step": 17110 }, { "epoch": 6.65, "learning_rate": 1.9113527508090616e-05, "loss": 0.1604, "step": 17120 }, { "epoch": 6.65, "learning_rate": 1.9113009708737867e-05, "loss": 0.2704, "step": 17130 }, { "epoch": 6.66, "learning_rate": 1.9112491909385115e-05, "loss": 0.0881, "step": 17140 }, { "epoch": 6.66, "learning_rate": 1.9111974110032363e-05, "loss": 0.2175, "step": 17150 }, { "epoch": 6.66, "learning_rate": 1.911145631067961e-05, "loss": 0.1405, "step": 17160 }, { "epoch": 6.67, "learning_rate": 1.9110938511326862e-05, "loss": 0.0751, "step": 17170 }, { "epoch": 6.67, "learning_rate": 1.911042071197411e-05, "loss": 0.1503, "step": 17180 }, { "epoch": 6.68, "learning_rate": 1.910990291262136e-05, "loss": 0.2502, "step": 17190 }, { "epoch": 6.68, "learning_rate": 1.910938511326861e-05, "loss": 0.2273, "step": 17200 }, { "epoch": 6.68, "learning_rate": 1.910886731391586e-05, "loss": 0.2392, "step": 17210 }, { "epoch": 6.69, "learning_rate": 1.910834951456311e-05, "loss": 0.1301, "step": 17220 }, { "epoch": 6.69, "learning_rate": 1.9107831715210357e-05, "loss": 0.1793, "step": 17230 }, { "epoch": 6.7, "learning_rate": 1.9107313915857605e-05, "loss": 0.1312, "step": 17240 }, { "epoch": 6.7, "learning_rate": 1.9106796116504856e-05, "loss": 0.1766, "step": 17250 }, { "epoch": 6.7, "learning_rate": 1.9106278317152104e-05, "loss": 0.0627, "step": 17260 }, { "epoch": 6.71, "learning_rate": 1.9105760517799355e-05, "loss": 0.2604, "step": 17270 }, { "epoch": 6.71, "learning_rate": 1.9105242718446603e-05, "loss": 0.3821, "step": 17280 }, { "epoch": 6.71, "learning_rate": 1.9104724919093854e-05, "loss": 0.2116, "step": 17290 }, { "epoch": 6.72, "learning_rate": 1.9104207119741102e-05, "loss": 0.1375, "step": 17300 }, { "epoch": 6.72, "learning_rate": 1.910368932038835e-05, "loss": 0.347, "step": 17310 }, { "epoch": 6.73, "learning_rate": 1.9103171521035598e-05, "loss": 0.2109, "step": 17320 }, { "epoch": 6.73, "learning_rate": 1.910265372168285e-05, "loss": 0.2196, "step": 17330 }, { "epoch": 6.73, "learning_rate": 1.9102135922330097e-05, "loss": 0.178, "step": 17340 }, { "epoch": 6.74, "learning_rate": 1.910161812297735e-05, "loss": 0.0664, "step": 17350 }, { "epoch": 6.74, "learning_rate": 1.9101100323624597e-05, "loss": 0.1796, "step": 17360 }, { "epoch": 6.75, "learning_rate": 1.9100582524271848e-05, "loss": 0.2695, "step": 17370 }, { "epoch": 6.75, "learning_rate": 1.9100064724919096e-05, "loss": 0.1135, "step": 17380 }, { "epoch": 6.75, "learning_rate": 1.9099546925566347e-05, "loss": 0.2008, "step": 17390 }, { "epoch": 6.76, "learning_rate": 1.909902912621359e-05, "loss": 0.214, "step": 17400 }, { "epoch": 6.76, "learning_rate": 1.9098511326860843e-05, "loss": 0.103, "step": 17410 }, { "epoch": 6.77, "learning_rate": 1.909799352750809e-05, "loss": 0.2317, "step": 17420 }, { "epoch": 6.77, "learning_rate": 1.9097475728155342e-05, "loss": 0.2703, "step": 17430 }, { "epoch": 6.77, "learning_rate": 1.909695792880259e-05, "loss": 0.2738, "step": 17440 }, { "epoch": 6.78, "learning_rate": 1.909644012944984e-05, "loss": 0.2464, "step": 17450 }, { "epoch": 6.78, "learning_rate": 1.909592233009709e-05, "loss": 0.1197, "step": 17460 }, { "epoch": 6.78, "learning_rate": 1.909540453074434e-05, "loss": 0.0769, "step": 17470 }, { "epoch": 6.79, "learning_rate": 1.9094886731391585e-05, "loss": 0.1328, "step": 17480 }, { "epoch": 6.79, "learning_rate": 1.9094368932038837e-05, "loss": 0.1637, "step": 17490 }, { "epoch": 6.8, "learning_rate": 1.9093851132686084e-05, "loss": 0.1053, "step": 17500 }, { "epoch": 6.8, "learning_rate": 1.9093333333333336e-05, "loss": 0.1495, "step": 17510 }, { "epoch": 6.8, "learning_rate": 1.9092815533980584e-05, "loss": 0.3424, "step": 17520 }, { "epoch": 6.81, "learning_rate": 1.9092297734627835e-05, "loss": 0.157, "step": 17530 }, { "epoch": 6.81, "learning_rate": 1.9091779935275083e-05, "loss": 0.1431, "step": 17540 }, { "epoch": 6.82, "learning_rate": 1.9091262135922334e-05, "loss": 0.2201, "step": 17550 }, { "epoch": 6.82, "learning_rate": 1.909074433656958e-05, "loss": 0.14, "step": 17560 }, { "epoch": 6.82, "learning_rate": 1.909022653721683e-05, "loss": 0.1362, "step": 17570 }, { "epoch": 6.83, "learning_rate": 1.9089708737864078e-05, "loss": 0.2197, "step": 17580 }, { "epoch": 6.83, "learning_rate": 1.908919093851133e-05, "loss": 0.4042, "step": 17590 }, { "epoch": 6.83, "learning_rate": 1.9088673139158577e-05, "loss": 0.1047, "step": 17600 }, { "epoch": 6.84, "learning_rate": 1.908815533980583e-05, "loss": 0.2355, "step": 17610 }, { "epoch": 6.84, "learning_rate": 1.9087637540453076e-05, "loss": 0.1239, "step": 17620 }, { "epoch": 6.85, "learning_rate": 1.9087119741100328e-05, "loss": 0.236, "step": 17630 }, { "epoch": 6.85, "learning_rate": 1.9086601941747572e-05, "loss": 0.1671, "step": 17640 }, { "epoch": 6.85, "learning_rate": 1.9086084142394824e-05, "loss": 0.1873, "step": 17650 }, { "epoch": 6.86, "learning_rate": 1.908556634304207e-05, "loss": 0.3182, "step": 17660 }, { "epoch": 6.86, "learning_rate": 1.9085048543689323e-05, "loss": 0.1453, "step": 17670 }, { "epoch": 6.87, "learning_rate": 1.908453074433657e-05, "loss": 0.145, "step": 17680 }, { "epoch": 6.87, "learning_rate": 1.9084012944983822e-05, "loss": 0.1821, "step": 17690 }, { "epoch": 6.87, "learning_rate": 1.908349514563107e-05, "loss": 0.1633, "step": 17700 }, { "epoch": 6.88, "learning_rate": 1.908297734627832e-05, "loss": 0.0998, "step": 17710 }, { "epoch": 6.88, "learning_rate": 1.9082459546925566e-05, "loss": 0.0711, "step": 17720 }, { "epoch": 6.89, "learning_rate": 1.9081941747572817e-05, "loss": 0.2573, "step": 17730 }, { "epoch": 6.89, "learning_rate": 1.9081423948220065e-05, "loss": 0.1048, "step": 17740 }, { "epoch": 6.89, "learning_rate": 1.9080906148867316e-05, "loss": 0.1529, "step": 17750 }, { "epoch": 6.9, "learning_rate": 1.9080388349514564e-05, "loss": 0.1716, "step": 17760 }, { "epoch": 6.9, "learning_rate": 1.9079870550161816e-05, "loss": 0.1658, "step": 17770 }, { "epoch": 6.9, "learning_rate": 1.9079352750809064e-05, "loss": 0.2003, "step": 17780 }, { "epoch": 6.91, "learning_rate": 1.907883495145631e-05, "loss": 0.0673, "step": 17790 }, { "epoch": 6.91, "learning_rate": 1.907831715210356e-05, "loss": 0.1982, "step": 17800 }, { "epoch": 6.92, "learning_rate": 1.907779935275081e-05, "loss": 0.1791, "step": 17810 }, { "epoch": 6.92, "learning_rate": 1.907728155339806e-05, "loss": 0.1167, "step": 17820 }, { "epoch": 6.92, "learning_rate": 1.907676375404531e-05, "loss": 0.0787, "step": 17830 }, { "epoch": 6.93, "learning_rate": 1.9076245954692558e-05, "loss": 0.201, "step": 17840 }, { "epoch": 6.93, "learning_rate": 1.907572815533981e-05, "loss": 0.0963, "step": 17850 }, { "epoch": 6.94, "learning_rate": 1.9075210355987057e-05, "loss": 0.1458, "step": 17860 }, { "epoch": 6.94, "learning_rate": 1.9074692556634305e-05, "loss": 0.0864, "step": 17870 }, { "epoch": 6.94, "learning_rate": 1.9074174757281553e-05, "loss": 0.1299, "step": 17880 }, { "epoch": 6.95, "learning_rate": 1.9073656957928804e-05, "loss": 0.1835, "step": 17890 }, { "epoch": 6.95, "learning_rate": 1.9073139158576052e-05, "loss": 0.176, "step": 17900 }, { "epoch": 6.96, "learning_rate": 1.9072621359223304e-05, "loss": 0.2493, "step": 17910 }, { "epoch": 6.96, "learning_rate": 1.907210355987055e-05, "loss": 0.0915, "step": 17920 }, { "epoch": 6.96, "learning_rate": 1.9071585760517803e-05, "loss": 0.1241, "step": 17930 }, { "epoch": 6.97, "learning_rate": 1.907106796116505e-05, "loss": 0.2423, "step": 17940 }, { "epoch": 6.97, "learning_rate": 1.90705501618123e-05, "loss": 0.1171, "step": 17950 }, { "epoch": 6.97, "learning_rate": 1.907003236245955e-05, "loss": 0.2988, "step": 17960 }, { "epoch": 6.98, "learning_rate": 1.9069514563106798e-05, "loss": 0.1784, "step": 17970 }, { "epoch": 6.98, "learning_rate": 1.9068996763754046e-05, "loss": 0.142, "step": 17980 }, { "epoch": 6.99, "learning_rate": 1.9068478964401297e-05, "loss": 0.3226, "step": 17990 }, { "epoch": 6.99, "learning_rate": 1.9067961165048545e-05, "loss": 0.1368, "step": 18000 }, { "epoch": 6.99, "learning_rate": 1.9067443365695796e-05, "loss": 0.1914, "step": 18010 }, { "epoch": 7.0, "learning_rate": 1.9066925566343044e-05, "loss": 0.3493, "step": 18020 }, { "epoch": 7.0, "eval_accuracy": 0.949656121045392, "eval_loss": 0.20380502939224243, "eval_runtime": 8.1755, "eval_samples_per_second": 444.622, "eval_steps_per_second": 55.654, "step": 18025 }, { "epoch": 7.0, "learning_rate": 1.9066407766990292e-05, "loss": 0.2773, "step": 18030 }, { "epoch": 7.01, "learning_rate": 1.9065889967637543e-05, "loss": 0.1871, "step": 18040 }, { "epoch": 7.01, "learning_rate": 1.906537216828479e-05, "loss": 0.2832, "step": 18050 }, { "epoch": 7.01, "learning_rate": 1.906485436893204e-05, "loss": 0.1827, "step": 18060 }, { "epoch": 7.02, "learning_rate": 1.906433656957929e-05, "loss": 0.3218, "step": 18070 }, { "epoch": 7.02, "learning_rate": 1.906381877022654e-05, "loss": 0.2045, "step": 18080 }, { "epoch": 7.03, "learning_rate": 1.9063300970873786e-05, "loss": 0.1801, "step": 18090 }, { "epoch": 7.03, "learning_rate": 1.9062783171521038e-05, "loss": 0.0777, "step": 18100 }, { "epoch": 7.03, "learning_rate": 1.9062265372168286e-05, "loss": 0.1323, "step": 18110 }, { "epoch": 7.04, "learning_rate": 1.9061747572815537e-05, "loss": 0.1679, "step": 18120 }, { "epoch": 7.04, "learning_rate": 1.9061229773462785e-05, "loss": 0.2358, "step": 18130 }, { "epoch": 7.04, "learning_rate": 1.9060711974110033e-05, "loss": 0.2176, "step": 18140 }, { "epoch": 7.05, "learning_rate": 1.9060194174757284e-05, "loss": 0.0727, "step": 18150 }, { "epoch": 7.05, "learning_rate": 1.9059676375404532e-05, "loss": 0.2949, "step": 18160 }, { "epoch": 7.06, "learning_rate": 1.905915857605178e-05, "loss": 0.2343, "step": 18170 }, { "epoch": 7.06, "learning_rate": 1.905864077669903e-05, "loss": 0.2669, "step": 18180 }, { "epoch": 7.06, "learning_rate": 1.905812297734628e-05, "loss": 0.135, "step": 18190 }, { "epoch": 7.07, "learning_rate": 1.905760517799353e-05, "loss": 0.0075, "step": 18200 }, { "epoch": 7.07, "learning_rate": 1.905708737864078e-05, "loss": 0.196, "step": 18210 }, { "epoch": 7.08, "learning_rate": 1.9056569579288026e-05, "loss": 0.1789, "step": 18220 }, { "epoch": 7.08, "learning_rate": 1.9056051779935278e-05, "loss": 0.1155, "step": 18230 }, { "epoch": 7.08, "learning_rate": 1.9055533980582526e-05, "loss": 0.2555, "step": 18240 }, { "epoch": 7.09, "learning_rate": 1.9055016181229774e-05, "loss": 0.1308, "step": 18250 }, { "epoch": 7.09, "learning_rate": 1.9054498381877025e-05, "loss": 0.2177, "step": 18260 }, { "epoch": 7.1, "learning_rate": 1.9053980582524273e-05, "loss": 0.1907, "step": 18270 }, { "epoch": 7.1, "learning_rate": 1.9053462783171524e-05, "loss": 0.176, "step": 18280 }, { "epoch": 7.1, "learning_rate": 1.9052944983818772e-05, "loss": 0.1049, "step": 18290 }, { "epoch": 7.11, "learning_rate": 1.905242718446602e-05, "loss": 0.1525, "step": 18300 }, { "epoch": 7.11, "learning_rate": 1.905190938511327e-05, "loss": 0.1468, "step": 18310 }, { "epoch": 7.11, "learning_rate": 1.905139158576052e-05, "loss": 0.0204, "step": 18320 }, { "epoch": 7.12, "learning_rate": 1.9050873786407767e-05, "loss": 0.3421, "step": 18330 }, { "epoch": 7.12, "learning_rate": 1.905035598705502e-05, "loss": 0.1184, "step": 18340 }, { "epoch": 7.13, "learning_rate": 1.9049838187702266e-05, "loss": 0.0951, "step": 18350 }, { "epoch": 7.13, "learning_rate": 1.9049320388349518e-05, "loss": 0.1476, "step": 18360 }, { "epoch": 7.13, "learning_rate": 1.9048802588996766e-05, "loss": 0.2102, "step": 18370 }, { "epoch": 7.14, "learning_rate": 1.9048284789644014e-05, "loss": 0.2523, "step": 18380 }, { "epoch": 7.14, "learning_rate": 1.9047766990291265e-05, "loss": 0.1768, "step": 18390 }, { "epoch": 7.15, "learning_rate": 1.9047249190938513e-05, "loss": 0.1273, "step": 18400 }, { "epoch": 7.15, "learning_rate": 1.904673139158576e-05, "loss": 0.19, "step": 18410 }, { "epoch": 7.15, "learning_rate": 1.9046213592233012e-05, "loss": 0.2322, "step": 18420 }, { "epoch": 7.16, "learning_rate": 1.904569579288026e-05, "loss": 0.2602, "step": 18430 }, { "epoch": 7.16, "learning_rate": 1.904517799352751e-05, "loss": 0.2249, "step": 18440 }, { "epoch": 7.17, "learning_rate": 1.904466019417476e-05, "loss": 0.1441, "step": 18450 }, { "epoch": 7.17, "learning_rate": 1.9044142394822007e-05, "loss": 0.3593, "step": 18460 }, { "epoch": 7.17, "learning_rate": 1.9043624595469255e-05, "loss": 0.2256, "step": 18470 }, { "epoch": 7.18, "learning_rate": 1.9043106796116506e-05, "loss": 0.0688, "step": 18480 }, { "epoch": 7.18, "learning_rate": 1.9042588996763754e-05, "loss": 0.2086, "step": 18490 }, { "epoch": 7.18, "learning_rate": 1.9042071197411006e-05, "loss": 0.1778, "step": 18500 }, { "epoch": 7.19, "learning_rate": 1.9041553398058254e-05, "loss": 0.1509, "step": 18510 }, { "epoch": 7.19, "learning_rate": 1.9041035598705505e-05, "loss": 0.1653, "step": 18520 }, { "epoch": 7.2, "learning_rate": 1.9040517799352753e-05, "loss": 0.3335, "step": 18530 }, { "epoch": 7.2, "learning_rate": 1.904e-05, "loss": 0.1318, "step": 18540 }, { "epoch": 7.2, "learning_rate": 1.903948220064725e-05, "loss": 0.1013, "step": 18550 }, { "epoch": 7.21, "learning_rate": 1.90389644012945e-05, "loss": 0.3407, "step": 18560 }, { "epoch": 7.21, "learning_rate": 1.9038446601941748e-05, "loss": 0.0784, "step": 18570 }, { "epoch": 7.22, "learning_rate": 1.9037928802589e-05, "loss": 0.1686, "step": 18580 }, { "epoch": 7.22, "learning_rate": 1.9037411003236247e-05, "loss": 0.0417, "step": 18590 }, { "epoch": 7.22, "learning_rate": 1.90368932038835e-05, "loss": 0.0899, "step": 18600 }, { "epoch": 7.23, "learning_rate": 1.9036375404530746e-05, "loss": 0.0554, "step": 18610 }, { "epoch": 7.23, "learning_rate": 1.9035857605177994e-05, "loss": 0.0539, "step": 18620 }, { "epoch": 7.23, "learning_rate": 1.9035339805825242e-05, "loss": 0.1866, "step": 18630 }, { "epoch": 7.24, "learning_rate": 1.9034822006472493e-05, "loss": 0.1733, "step": 18640 }, { "epoch": 7.24, "learning_rate": 1.903430420711974e-05, "loss": 0.1772, "step": 18650 }, { "epoch": 7.25, "learning_rate": 1.9033786407766993e-05, "loss": 0.2179, "step": 18660 }, { "epoch": 7.25, "learning_rate": 1.903326860841424e-05, "loss": 0.1995, "step": 18670 }, { "epoch": 7.25, "learning_rate": 1.9032750809061492e-05, "loss": 0.0716, "step": 18680 }, { "epoch": 7.26, "learning_rate": 1.903223300970874e-05, "loss": 0.1851, "step": 18690 }, { "epoch": 7.26, "learning_rate": 1.9031715210355988e-05, "loss": 0.3621, "step": 18700 }, { "epoch": 7.27, "learning_rate": 1.9031197411003236e-05, "loss": 0.3943, "step": 18710 }, { "epoch": 7.27, "learning_rate": 1.9030679611650487e-05, "loss": 0.1051, "step": 18720 }, { "epoch": 7.27, "learning_rate": 1.9030161812297735e-05, "loss": 0.0623, "step": 18730 }, { "epoch": 7.28, "learning_rate": 1.9029644012944986e-05, "loss": 0.1224, "step": 18740 }, { "epoch": 7.28, "learning_rate": 1.9029126213592234e-05, "loss": 0.1794, "step": 18750 }, { "epoch": 7.29, "learning_rate": 1.9028608414239485e-05, "loss": 0.3187, "step": 18760 }, { "epoch": 7.29, "learning_rate": 1.9028090614886733e-05, "loss": 0.1153, "step": 18770 }, { "epoch": 7.29, "learning_rate": 1.902757281553398e-05, "loss": 0.2892, "step": 18780 }, { "epoch": 7.3, "learning_rate": 1.902705501618123e-05, "loss": 0.24, "step": 18790 }, { "epoch": 7.3, "learning_rate": 1.902653721682848e-05, "loss": 0.1071, "step": 18800 }, { "epoch": 7.3, "learning_rate": 1.902601941747573e-05, "loss": 0.1086, "step": 18810 }, { "epoch": 7.31, "learning_rate": 1.902550161812298e-05, "loss": 0.118, "step": 18820 }, { "epoch": 7.31, "learning_rate": 1.9024983818770228e-05, "loss": 0.0782, "step": 18830 }, { "epoch": 7.32, "learning_rate": 1.902446601941748e-05, "loss": 0.2691, "step": 18840 }, { "epoch": 7.32, "learning_rate": 1.9023948220064727e-05, "loss": 0.1606, "step": 18850 }, { "epoch": 7.32, "learning_rate": 1.9023430420711975e-05, "loss": 0.2073, "step": 18860 }, { "epoch": 7.33, "learning_rate": 1.9022912621359223e-05, "loss": 0.1287, "step": 18870 }, { "epoch": 7.33, "learning_rate": 1.9022394822006474e-05, "loss": 0.1349, "step": 18880 }, { "epoch": 7.34, "learning_rate": 1.9021877022653722e-05, "loss": 0.0426, "step": 18890 }, { "epoch": 7.34, "learning_rate": 1.9021359223300973e-05, "loss": 0.1176, "step": 18900 }, { "epoch": 7.34, "learning_rate": 1.902084142394822e-05, "loss": 0.0702, "step": 18910 }, { "epoch": 7.35, "learning_rate": 1.9020323624595473e-05, "loss": 0.129, "step": 18920 }, { "epoch": 7.35, "learning_rate": 1.901980582524272e-05, "loss": 0.1214, "step": 18930 }, { "epoch": 7.36, "learning_rate": 1.901928802588997e-05, "loss": 0.1423, "step": 18940 }, { "epoch": 7.36, "learning_rate": 1.9018770226537216e-05, "loss": 0.1851, "step": 18950 }, { "epoch": 7.36, "learning_rate": 1.9018252427184468e-05, "loss": 0.1689, "step": 18960 }, { "epoch": 7.37, "learning_rate": 1.9017734627831716e-05, "loss": 0.1754, "step": 18970 }, { "epoch": 7.37, "learning_rate": 1.9017216828478967e-05, "loss": 0.2236, "step": 18980 }, { "epoch": 7.37, "learning_rate": 1.9016699029126215e-05, "loss": 0.08, "step": 18990 }, { "epoch": 7.38, "learning_rate": 1.9016181229773466e-05, "loss": 0.1958, "step": 19000 }, { "epoch": 7.38, "learning_rate": 1.9015663430420714e-05, "loss": 0.0383, "step": 19010 }, { "epoch": 7.39, "learning_rate": 1.9015145631067962e-05, "loss": 0.0816, "step": 19020 }, { "epoch": 7.39, "learning_rate": 1.901462783171521e-05, "loss": 0.3736, "step": 19030 }, { "epoch": 7.39, "learning_rate": 1.901411003236246e-05, "loss": 0.1936, "step": 19040 }, { "epoch": 7.4, "learning_rate": 1.901359223300971e-05, "loss": 0.3916, "step": 19050 }, { "epoch": 7.4, "learning_rate": 1.901307443365696e-05, "loss": 0.2206, "step": 19060 }, { "epoch": 7.41, "learning_rate": 1.901255663430421e-05, "loss": 0.1495, "step": 19070 }, { "epoch": 7.41, "learning_rate": 1.901203883495146e-05, "loss": 0.4515, "step": 19080 }, { "epoch": 7.41, "learning_rate": 1.9011521035598708e-05, "loss": 0.208, "step": 19090 }, { "epoch": 7.42, "learning_rate": 1.901100323624596e-05, "loss": 0.1397, "step": 19100 }, { "epoch": 7.42, "learning_rate": 1.9010485436893203e-05, "loss": 0.3406, "step": 19110 }, { "epoch": 7.43, "learning_rate": 1.9009967637540455e-05, "loss": 0.2142, "step": 19120 }, { "epoch": 7.43, "learning_rate": 1.9009449838187703e-05, "loss": 0.1653, "step": 19130 }, { "epoch": 7.43, "learning_rate": 1.9008932038834954e-05, "loss": 0.2037, "step": 19140 }, { "epoch": 7.44, "learning_rate": 1.9008414239482202e-05, "loss": 0.2726, "step": 19150 }, { "epoch": 7.44, "learning_rate": 1.9007896440129453e-05, "loss": 0.2485, "step": 19160 }, { "epoch": 7.44, "learning_rate": 1.90073786407767e-05, "loss": 0.2741, "step": 19170 }, { "epoch": 7.45, "learning_rate": 1.9006860841423953e-05, "loss": 0.0828, "step": 19180 }, { "epoch": 7.45, "learning_rate": 1.9006343042071197e-05, "loss": 0.0898, "step": 19190 }, { "epoch": 7.46, "learning_rate": 1.900582524271845e-05, "loss": 0.1281, "step": 19200 }, { "epoch": 7.46, "learning_rate": 1.9005307443365696e-05, "loss": 0.1504, "step": 19210 }, { "epoch": 7.46, "learning_rate": 1.9004789644012948e-05, "loss": 0.1912, "step": 19220 }, { "epoch": 7.47, "learning_rate": 1.9004271844660196e-05, "loss": 0.1776, "step": 19230 }, { "epoch": 7.47, "learning_rate": 1.9003754045307447e-05, "loss": 0.214, "step": 19240 }, { "epoch": 7.48, "learning_rate": 1.9003236245954695e-05, "loss": 0.2167, "step": 19250 }, { "epoch": 7.48, "learning_rate": 1.9002718446601943e-05, "loss": 0.3702, "step": 19260 }, { "epoch": 7.48, "learning_rate": 1.900220064724919e-05, "loss": 0.1665, "step": 19270 }, { "epoch": 7.49, "learning_rate": 1.9001682847896442e-05, "loss": 0.2016, "step": 19280 }, { "epoch": 7.49, "learning_rate": 1.900116504854369e-05, "loss": 0.1171, "step": 19290 }, { "epoch": 7.5, "learning_rate": 1.900064724919094e-05, "loss": 0.0423, "step": 19300 }, { "epoch": 7.5, "learning_rate": 1.900012944983819e-05, "loss": 0.1928, "step": 19310 }, { "epoch": 7.5, "learning_rate": 1.899961165048544e-05, "loss": 0.2297, "step": 19320 }, { "epoch": 7.51, "learning_rate": 1.8999093851132688e-05, "loss": 0.1445, "step": 19330 }, { "epoch": 7.51, "learning_rate": 1.8998576051779936e-05, "loss": 0.1277, "step": 19340 }, { "epoch": 7.51, "learning_rate": 1.8998058252427184e-05, "loss": 0.1308, "step": 19350 }, { "epoch": 7.52, "learning_rate": 1.8997540453074435e-05, "loss": 0.1162, "step": 19360 }, { "epoch": 7.52, "learning_rate": 1.8997022653721683e-05, "loss": 0.23, "step": 19370 }, { "epoch": 7.53, "learning_rate": 1.8996504854368935e-05, "loss": 0.1473, "step": 19380 }, { "epoch": 7.53, "learning_rate": 1.8995987055016183e-05, "loss": 0.2774, "step": 19390 }, { "epoch": 7.53, "learning_rate": 1.8995469255663434e-05, "loss": 0.1961, "step": 19400 }, { "epoch": 7.54, "learning_rate": 1.8994951456310682e-05, "loss": 0.1145, "step": 19410 }, { "epoch": 7.54, "learning_rate": 1.899443365695793e-05, "loss": 0.1871, "step": 19420 }, { "epoch": 7.55, "learning_rate": 1.8993915857605178e-05, "loss": 0.0781, "step": 19430 }, { "epoch": 7.55, "learning_rate": 1.899339805825243e-05, "loss": 0.2614, "step": 19440 }, { "epoch": 7.55, "learning_rate": 1.8992880258899677e-05, "loss": 0.3089, "step": 19450 }, { "epoch": 7.56, "learning_rate": 1.8992362459546928e-05, "loss": 0.2111, "step": 19460 }, { "epoch": 7.56, "learning_rate": 1.8991844660194176e-05, "loss": 0.1352, "step": 19470 }, { "epoch": 7.57, "learning_rate": 1.8991326860841427e-05, "loss": 0.0735, "step": 19480 }, { "epoch": 7.57, "learning_rate": 1.8990809061488675e-05, "loss": 0.2005, "step": 19490 }, { "epoch": 7.57, "learning_rate": 1.8990291262135923e-05, "loss": 0.1818, "step": 19500 }, { "epoch": 7.58, "learning_rate": 1.898977346278317e-05, "loss": 0.1991, "step": 19510 }, { "epoch": 7.58, "learning_rate": 1.8989255663430423e-05, "loss": 0.1019, "step": 19520 }, { "epoch": 7.58, "learning_rate": 1.898873786407767e-05, "loss": 0.0849, "step": 19530 }, { "epoch": 7.59, "learning_rate": 1.8988220064724922e-05, "loss": 0.1094, "step": 19540 }, { "epoch": 7.59, "learning_rate": 1.898770226537217e-05, "loss": 0.1223, "step": 19550 }, { "epoch": 7.6, "learning_rate": 1.8987184466019418e-05, "loss": 0.3245, "step": 19560 }, { "epoch": 7.6, "learning_rate": 1.898666666666667e-05, "loss": 0.1273, "step": 19570 }, { "epoch": 7.6, "learning_rate": 1.8986148867313917e-05, "loss": 0.1751, "step": 19580 }, { "epoch": 7.61, "learning_rate": 1.8985631067961165e-05, "loss": 0.1757, "step": 19590 }, { "epoch": 7.61, "learning_rate": 1.8985113268608416e-05, "loss": 0.1356, "step": 19600 }, { "epoch": 7.62, "learning_rate": 1.8984595469255664e-05, "loss": 0.1659, "step": 19610 }, { "epoch": 7.62, "learning_rate": 1.8984077669902915e-05, "loss": 0.1526, "step": 19620 }, { "epoch": 7.62, "learning_rate": 1.8983559870550163e-05, "loss": 0.175, "step": 19630 }, { "epoch": 7.63, "learning_rate": 1.898304207119741e-05, "loss": 0.1747, "step": 19640 }, { "epoch": 7.63, "learning_rate": 1.8982524271844663e-05, "loss": 0.1583, "step": 19650 }, { "epoch": 7.63, "learning_rate": 1.898200647249191e-05, "loss": 0.1331, "step": 19660 }, { "epoch": 7.64, "learning_rate": 1.8981488673139162e-05, "loss": 0.2647, "step": 19670 }, { "epoch": 7.64, "learning_rate": 1.898097087378641e-05, "loss": 0.11, "step": 19680 }, { "epoch": 7.65, "learning_rate": 1.8980453074433658e-05, "loss": 0.0666, "step": 19690 }, { "epoch": 7.65, "learning_rate": 1.897993527508091e-05, "loss": 0.1649, "step": 19700 }, { "epoch": 7.65, "learning_rate": 1.8979417475728157e-05, "loss": 0.1123, "step": 19710 }, { "epoch": 7.66, "learning_rate": 1.8978899676375405e-05, "loss": 0.15, "step": 19720 }, { "epoch": 7.66, "learning_rate": 1.8978381877022656e-05, "loss": 0.0799, "step": 19730 }, { "epoch": 7.67, "learning_rate": 1.8977864077669904e-05, "loss": 0.0805, "step": 19740 }, { "epoch": 7.67, "learning_rate": 1.8977346278317155e-05, "loss": 0.0692, "step": 19750 }, { "epoch": 7.67, "learning_rate": 1.8976828478964403e-05, "loss": 0.2302, "step": 19760 }, { "epoch": 7.68, "learning_rate": 1.897631067961165e-05, "loss": 0.2335, "step": 19770 }, { "epoch": 7.68, "learning_rate": 1.8975792880258902e-05, "loss": 0.1859, "step": 19780 }, { "epoch": 7.69, "learning_rate": 1.897527508090615e-05, "loss": 0.1434, "step": 19790 }, { "epoch": 7.69, "learning_rate": 1.89747572815534e-05, "loss": 0.1904, "step": 19800 }, { "epoch": 7.69, "learning_rate": 1.897423948220065e-05, "loss": 0.2598, "step": 19810 }, { "epoch": 7.7, "learning_rate": 1.8973721682847898e-05, "loss": 0.2165, "step": 19820 }, { "epoch": 7.7, "learning_rate": 1.897320388349515e-05, "loss": 0.2428, "step": 19830 }, { "epoch": 7.7, "learning_rate": 1.8972686084142397e-05, "loss": 0.1131, "step": 19840 }, { "epoch": 7.71, "learning_rate": 1.8972168284789645e-05, "loss": 0.182, "step": 19850 }, { "epoch": 7.71, "learning_rate": 1.8971650485436896e-05, "loss": 0.1989, "step": 19860 }, { "epoch": 7.72, "learning_rate": 1.8971132686084144e-05, "loss": 0.0783, "step": 19870 }, { "epoch": 7.72, "learning_rate": 1.8970614886731392e-05, "loss": 0.2297, "step": 19880 }, { "epoch": 7.72, "learning_rate": 1.8970097087378643e-05, "loss": 0.0564, "step": 19890 }, { "epoch": 7.73, "learning_rate": 1.896957928802589e-05, "loss": 0.086, "step": 19900 }, { "epoch": 7.73, "learning_rate": 1.8969061488673142e-05, "loss": 0.2131, "step": 19910 }, { "epoch": 7.74, "learning_rate": 1.896854368932039e-05, "loss": 0.2196, "step": 19920 }, { "epoch": 7.74, "learning_rate": 1.8968025889967638e-05, "loss": 0.1693, "step": 19930 }, { "epoch": 7.74, "learning_rate": 1.8967508090614886e-05, "loss": 0.1422, "step": 19940 }, { "epoch": 7.75, "learning_rate": 1.8966990291262138e-05, "loss": 0.242, "step": 19950 }, { "epoch": 7.75, "learning_rate": 1.8966472491909385e-05, "loss": 0.1268, "step": 19960 }, { "epoch": 7.76, "learning_rate": 1.8965954692556637e-05, "loss": 0.2631, "step": 19970 }, { "epoch": 7.76, "learning_rate": 1.8965436893203885e-05, "loss": 0.1402, "step": 19980 }, { "epoch": 7.76, "learning_rate": 1.8964919093851136e-05, "loss": 0.152, "step": 19990 }, { "epoch": 7.77, "learning_rate": 1.8964401294498384e-05, "loss": 0.3736, "step": 20000 }, { "epoch": 7.77, "learning_rate": 1.8963883495145632e-05, "loss": 0.1815, "step": 20010 }, { "epoch": 7.77, "learning_rate": 1.896336569579288e-05, "loss": 0.1719, "step": 20020 }, { "epoch": 7.78, "learning_rate": 1.896284789644013e-05, "loss": 0.0539, "step": 20030 }, { "epoch": 7.78, "learning_rate": 1.896233009708738e-05, "loss": 0.1113, "step": 20040 }, { "epoch": 7.79, "learning_rate": 1.896181229773463e-05, "loss": 0.0947, "step": 20050 }, { "epoch": 7.79, "learning_rate": 1.8961294498381878e-05, "loss": 0.3626, "step": 20060 }, { "epoch": 7.79, "learning_rate": 1.896077669902913e-05, "loss": 0.2355, "step": 20070 }, { "epoch": 7.8, "learning_rate": 1.8960258899676377e-05, "loss": 0.1243, "step": 20080 }, { "epoch": 7.8, "learning_rate": 1.8959741100323625e-05, "loss": 0.1073, "step": 20090 }, { "epoch": 7.81, "learning_rate": 1.8959223300970873e-05, "loss": 0.0712, "step": 20100 }, { "epoch": 7.81, "learning_rate": 1.8958705501618125e-05, "loss": 0.129, "step": 20110 }, { "epoch": 7.81, "learning_rate": 1.8958187702265373e-05, "loss": 0.334, "step": 20120 }, { "epoch": 7.82, "learning_rate": 1.8957669902912624e-05, "loss": 0.2766, "step": 20130 }, { "epoch": 7.82, "learning_rate": 1.8957152103559872e-05, "loss": 0.2735, "step": 20140 }, { "epoch": 7.83, "learning_rate": 1.8956634304207123e-05, "loss": 0.0949, "step": 20150 }, { "epoch": 7.83, "learning_rate": 1.895611650485437e-05, "loss": 0.0997, "step": 20160 }, { "epoch": 7.83, "learning_rate": 1.895559870550162e-05, "loss": 0.164, "step": 20170 }, { "epoch": 7.84, "learning_rate": 1.8955080906148867e-05, "loss": 0.1456, "step": 20180 }, { "epoch": 7.84, "learning_rate": 1.8954563106796118e-05, "loss": 0.2678, "step": 20190 }, { "epoch": 7.84, "learning_rate": 1.8954045307443366e-05, "loss": 0.2769, "step": 20200 }, { "epoch": 7.85, "learning_rate": 1.8953527508090617e-05, "loss": 0.3176, "step": 20210 }, { "epoch": 7.85, "learning_rate": 1.8953009708737865e-05, "loss": 0.1447, "step": 20220 }, { "epoch": 7.86, "learning_rate": 1.8952491909385117e-05, "loss": 0.1337, "step": 20230 }, { "epoch": 7.86, "learning_rate": 1.8951974110032365e-05, "loss": 0.1835, "step": 20240 }, { "epoch": 7.86, "learning_rate": 1.8951456310679613e-05, "loss": 0.1446, "step": 20250 }, { "epoch": 7.87, "learning_rate": 1.895093851132686e-05, "loss": 0.1245, "step": 20260 }, { "epoch": 7.87, "learning_rate": 1.8950420711974112e-05, "loss": 0.1795, "step": 20270 }, { "epoch": 7.88, "learning_rate": 1.894990291262136e-05, "loss": 0.1811, "step": 20280 }, { "epoch": 7.88, "learning_rate": 1.894938511326861e-05, "loss": 0.2446, "step": 20290 }, { "epoch": 7.88, "learning_rate": 1.894886731391586e-05, "loss": 0.1747, "step": 20300 }, { "epoch": 7.89, "learning_rate": 1.894834951456311e-05, "loss": 0.1293, "step": 20310 }, { "epoch": 7.89, "learning_rate": 1.8947831715210358e-05, "loss": 0.18, "step": 20320 }, { "epoch": 7.9, "learning_rate": 1.8947313915857606e-05, "loss": 0.1283, "step": 20330 }, { "epoch": 7.9, "learning_rate": 1.8946796116504854e-05, "loss": 0.1031, "step": 20340 }, { "epoch": 7.9, "learning_rate": 1.8946278317152105e-05, "loss": 0.1859, "step": 20350 }, { "epoch": 7.91, "learning_rate": 1.8945760517799353e-05, "loss": 0.1025, "step": 20360 }, { "epoch": 7.91, "learning_rate": 1.8945242718446605e-05, "loss": 0.1444, "step": 20370 }, { "epoch": 7.91, "learning_rate": 1.8944724919093852e-05, "loss": 0.0487, "step": 20380 }, { "epoch": 7.92, "learning_rate": 1.8944207119741104e-05, "loss": 0.2183, "step": 20390 }, { "epoch": 7.92, "learning_rate": 1.894368932038835e-05, "loss": 0.0852, "step": 20400 }, { "epoch": 7.93, "learning_rate": 1.89431715210356e-05, "loss": 0.1409, "step": 20410 }, { "epoch": 7.93, "learning_rate": 1.8942653721682848e-05, "loss": 0.092, "step": 20420 }, { "epoch": 7.93, "learning_rate": 1.89421359223301e-05, "loss": 0.0441, "step": 20430 }, { "epoch": 7.94, "learning_rate": 1.8941618122977347e-05, "loss": 0.028, "step": 20440 }, { "epoch": 7.94, "learning_rate": 1.8941100323624598e-05, "loss": 0.2525, "step": 20450 }, { "epoch": 7.95, "learning_rate": 1.8940582524271846e-05, "loss": 0.1649, "step": 20460 }, { "epoch": 7.95, "learning_rate": 1.8940064724919097e-05, "loss": 0.1598, "step": 20470 }, { "epoch": 7.95, "learning_rate": 1.8939546925566345e-05, "loss": 0.3265, "step": 20480 }, { "epoch": 7.96, "learning_rate": 1.8939029126213593e-05, "loss": 0.0942, "step": 20490 }, { "epoch": 7.96, "learning_rate": 1.893851132686084e-05, "loss": 0.0392, "step": 20500 }, { "epoch": 7.97, "learning_rate": 1.8937993527508092e-05, "loss": 0.0193, "step": 20510 }, { "epoch": 7.97, "learning_rate": 1.893747572815534e-05, "loss": 0.1528, "step": 20520 }, { "epoch": 7.97, "learning_rate": 1.893695792880259e-05, "loss": 0.2312, "step": 20530 }, { "epoch": 7.98, "learning_rate": 1.893644012944984e-05, "loss": 0.1399, "step": 20540 }, { "epoch": 7.98, "learning_rate": 1.893592233009709e-05, "loss": 0.1623, "step": 20550 }, { "epoch": 7.98, "learning_rate": 1.893540453074434e-05, "loss": 0.2021, "step": 20560 }, { "epoch": 7.99, "learning_rate": 1.8934886731391587e-05, "loss": 0.0852, "step": 20570 }, { "epoch": 7.99, "learning_rate": 1.8934368932038835e-05, "loss": 0.2163, "step": 20580 }, { "epoch": 8.0, "learning_rate": 1.8933851132686086e-05, "loss": 0.1724, "step": 20590 }, { "epoch": 8.0, "learning_rate": 1.8933333333333334e-05, "loss": 0.131, "step": 20600 }, { "epoch": 8.0, "eval_accuracy": 0.951031636863824, "eval_loss": 0.20585355162620544, "eval_runtime": 8.2357, "eval_samples_per_second": 441.37, "eval_steps_per_second": 55.247, "step": 20600 }, { "epoch": 8.0, "learning_rate": 1.8932815533980585e-05, "loss": 0.1869, "step": 20610 }, { "epoch": 8.01, "learning_rate": 1.8932297734627833e-05, "loss": 0.0795, "step": 20620 }, { "epoch": 8.01, "learning_rate": 1.8931779935275084e-05, "loss": 0.2046, "step": 20630 }, { "epoch": 8.02, "learning_rate": 1.8931262135922332e-05, "loss": 0.0538, "step": 20640 }, { "epoch": 8.02, "learning_rate": 1.893074433656958e-05, "loss": 0.0775, "step": 20650 }, { "epoch": 8.02, "learning_rate": 1.8930226537216828e-05, "loss": 0.0677, "step": 20660 }, { "epoch": 8.03, "learning_rate": 1.892970873786408e-05, "loss": 0.3207, "step": 20670 }, { "epoch": 8.03, "learning_rate": 1.8929190938511327e-05, "loss": 0.1489, "step": 20680 }, { "epoch": 8.03, "learning_rate": 1.892867313915858e-05, "loss": 0.0776, "step": 20690 }, { "epoch": 8.04, "learning_rate": 1.8928155339805827e-05, "loss": 0.2284, "step": 20700 }, { "epoch": 8.04, "learning_rate": 1.8927637540453078e-05, "loss": 0.1624, "step": 20710 }, { "epoch": 8.05, "learning_rate": 1.8927119741100326e-05, "loss": 0.1928, "step": 20720 }, { "epoch": 8.05, "learning_rate": 1.8926601941747574e-05, "loss": 0.0365, "step": 20730 }, { "epoch": 8.05, "learning_rate": 1.8926084142394822e-05, "loss": 0.1466, "step": 20740 }, { "epoch": 8.06, "learning_rate": 1.8925566343042073e-05, "loss": 0.075, "step": 20750 }, { "epoch": 8.06, "learning_rate": 1.892504854368932e-05, "loss": 0.3438, "step": 20760 }, { "epoch": 8.07, "learning_rate": 1.8924530744336572e-05, "loss": 0.1094, "step": 20770 }, { "epoch": 8.07, "learning_rate": 1.892401294498382e-05, "loss": 0.3573, "step": 20780 }, { "epoch": 8.07, "learning_rate": 1.892349514563107e-05, "loss": 0.138, "step": 20790 }, { "epoch": 8.08, "learning_rate": 1.892297734627832e-05, "loss": 0.1114, "step": 20800 }, { "epoch": 8.08, "learning_rate": 1.8922459546925567e-05, "loss": 0.1337, "step": 20810 }, { "epoch": 8.09, "learning_rate": 1.8921941747572815e-05, "loss": 0.2441, "step": 20820 }, { "epoch": 8.09, "learning_rate": 1.8921423948220067e-05, "loss": 0.1587, "step": 20830 }, { "epoch": 8.09, "learning_rate": 1.8920906148867315e-05, "loss": 0.1904, "step": 20840 }, { "epoch": 8.1, "learning_rate": 1.8920388349514566e-05, "loss": 0.0232, "step": 20850 }, { "epoch": 8.1, "learning_rate": 1.8919870550161814e-05, "loss": 0.1243, "step": 20860 }, { "epoch": 8.1, "learning_rate": 1.8919352750809065e-05, "loss": 0.2785, "step": 20870 }, { "epoch": 8.11, "learning_rate": 1.8918834951456313e-05, "loss": 0.1649, "step": 20880 }, { "epoch": 8.11, "learning_rate": 1.891831715210356e-05, "loss": 0.0937, "step": 20890 }, { "epoch": 8.12, "learning_rate": 1.891779935275081e-05, "loss": 0.1416, "step": 20900 }, { "epoch": 8.12, "learning_rate": 1.891728155339806e-05, "loss": 0.0647, "step": 20910 }, { "epoch": 8.12, "learning_rate": 1.8916763754045308e-05, "loss": 0.3121, "step": 20920 }, { "epoch": 8.13, "learning_rate": 1.891624595469256e-05, "loss": 0.092, "step": 20930 }, { "epoch": 8.13, "learning_rate": 1.8915728155339807e-05, "loss": 0.0704, "step": 20940 }, { "epoch": 8.14, "learning_rate": 1.891521035598706e-05, "loss": 0.1044, "step": 20950 }, { "epoch": 8.14, "learning_rate": 1.8914692556634307e-05, "loss": 0.1408, "step": 20960 }, { "epoch": 8.14, "learning_rate": 1.8914174757281555e-05, "loss": 0.2793, "step": 20970 }, { "epoch": 8.15, "learning_rate": 1.8913656957928802e-05, "loss": 0.1684, "step": 20980 }, { "epoch": 8.15, "learning_rate": 1.8913139158576054e-05, "loss": 0.0694, "step": 20990 }, { "epoch": 8.16, "learning_rate": 1.89126213592233e-05, "loss": 0.117, "step": 21000 }, { "epoch": 8.16, "learning_rate": 1.8912103559870553e-05, "loss": 0.2127, "step": 21010 }, { "epoch": 8.16, "learning_rate": 1.89115857605178e-05, "loss": 0.1166, "step": 21020 }, { "epoch": 8.17, "learning_rate": 1.891106796116505e-05, "loss": 0.1383, "step": 21030 }, { "epoch": 8.17, "learning_rate": 1.89105501618123e-05, "loss": 0.1603, "step": 21040 }, { "epoch": 8.17, "learning_rate": 1.8910032362459548e-05, "loss": 0.2259, "step": 21050 }, { "epoch": 8.18, "learning_rate": 1.8909514563106796e-05, "loss": 0.2835, "step": 21060 }, { "epoch": 8.18, "learning_rate": 1.8908996763754047e-05, "loss": 0.1208, "step": 21070 }, { "epoch": 8.19, "learning_rate": 1.8908478964401295e-05, "loss": 0.0486, "step": 21080 }, { "epoch": 8.19, "learning_rate": 1.8907961165048547e-05, "loss": 0.0696, "step": 21090 }, { "epoch": 8.19, "learning_rate": 1.8907443365695794e-05, "loss": 0.2886, "step": 21100 }, { "epoch": 8.2, "learning_rate": 1.8906925566343042e-05, "loss": 0.2904, "step": 21110 }, { "epoch": 8.2, "learning_rate": 1.8906407766990294e-05, "loss": 0.1555, "step": 21120 }, { "epoch": 8.21, "learning_rate": 1.890588996763754e-05, "loss": 0.1413, "step": 21130 }, { "epoch": 8.21, "learning_rate": 1.890537216828479e-05, "loss": 0.1657, "step": 21140 }, { "epoch": 8.21, "learning_rate": 1.890485436893204e-05, "loss": 0.0657, "step": 21150 }, { "epoch": 8.22, "learning_rate": 1.890433656957929e-05, "loss": 0.2767, "step": 21160 }, { "epoch": 8.22, "learning_rate": 1.890381877022654e-05, "loss": 0.0921, "step": 21170 }, { "epoch": 8.23, "learning_rate": 1.8903300970873788e-05, "loss": 0.1288, "step": 21180 }, { "epoch": 8.23, "learning_rate": 1.8902783171521036e-05, "loss": 0.3064, "step": 21190 }, { "epoch": 8.23, "learning_rate": 1.8902265372168287e-05, "loss": 0.2353, "step": 21200 }, { "epoch": 8.24, "learning_rate": 1.8901747572815535e-05, "loss": 0.2186, "step": 21210 }, { "epoch": 8.24, "learning_rate": 1.8901229773462783e-05, "loss": 0.2613, "step": 21220 }, { "epoch": 8.24, "learning_rate": 1.8900711974110034e-05, "loss": 0.2139, "step": 21230 }, { "epoch": 8.25, "learning_rate": 1.8900194174757282e-05, "loss": 0.0371, "step": 21240 }, { "epoch": 8.25, "learning_rate": 1.8899676375404534e-05, "loss": 0.4219, "step": 21250 }, { "epoch": 8.26, "learning_rate": 1.889915857605178e-05, "loss": 0.2926, "step": 21260 }, { "epoch": 8.26, "learning_rate": 1.889864077669903e-05, "loss": 0.0565, "step": 21270 }, { "epoch": 8.26, "learning_rate": 1.889812297734628e-05, "loss": 0.1745, "step": 21280 }, { "epoch": 8.27, "learning_rate": 1.889760517799353e-05, "loss": 0.1794, "step": 21290 }, { "epoch": 8.27, "learning_rate": 1.8897087378640777e-05, "loss": 0.1272, "step": 21300 }, { "epoch": 8.28, "learning_rate": 1.8896569579288028e-05, "loss": 0.0818, "step": 21310 }, { "epoch": 8.28, "learning_rate": 1.8896051779935276e-05, "loss": 0.11, "step": 21320 }, { "epoch": 8.28, "learning_rate": 1.8895533980582527e-05, "loss": 0.0895, "step": 21330 }, { "epoch": 8.29, "learning_rate": 1.8895016181229775e-05, "loss": 0.0777, "step": 21340 }, { "epoch": 8.29, "learning_rate": 1.8894498381877023e-05, "loss": 0.0032, "step": 21350 }, { "epoch": 8.3, "learning_rate": 1.8893980582524274e-05, "loss": 0.1753, "step": 21360 }, { "epoch": 8.3, "learning_rate": 1.8893462783171522e-05, "loss": 0.0212, "step": 21370 }, { "epoch": 8.3, "learning_rate": 1.8892944983818774e-05, "loss": 0.2488, "step": 21380 }, { "epoch": 8.31, "learning_rate": 1.889242718446602e-05, "loss": 0.1281, "step": 21390 }, { "epoch": 8.31, "learning_rate": 1.889190938511327e-05, "loss": 0.1374, "step": 21400 }, { "epoch": 8.31, "learning_rate": 1.8891391585760517e-05, "loss": 0.2153, "step": 21410 }, { "epoch": 8.32, "learning_rate": 1.889087378640777e-05, "loss": 0.3827, "step": 21420 }, { "epoch": 8.32, "learning_rate": 1.8890355987055017e-05, "loss": 0.232, "step": 21430 }, { "epoch": 8.33, "learning_rate": 1.8889838187702268e-05, "loss": 0.138, "step": 21440 }, { "epoch": 8.33, "learning_rate": 1.8889320388349516e-05, "loss": 0.3031, "step": 21450 }, { "epoch": 8.33, "learning_rate": 1.8888802588996767e-05, "loss": 0.3781, "step": 21460 }, { "epoch": 8.34, "learning_rate": 1.8888284789644015e-05, "loss": 0.0534, "step": 21470 }, { "epoch": 8.34, "learning_rate": 1.8887766990291263e-05, "loss": 0.1801, "step": 21480 }, { "epoch": 8.35, "learning_rate": 1.888724919093851e-05, "loss": 0.0915, "step": 21490 }, { "epoch": 8.35, "learning_rate": 1.8886731391585762e-05, "loss": 0.0893, "step": 21500 }, { "epoch": 8.35, "learning_rate": 1.888621359223301e-05, "loss": 0.164, "step": 21510 }, { "epoch": 8.36, "learning_rate": 1.888569579288026e-05, "loss": 0.2233, "step": 21520 }, { "epoch": 8.36, "learning_rate": 1.888517799352751e-05, "loss": 0.1314, "step": 21530 }, { "epoch": 8.37, "learning_rate": 1.888466019417476e-05, "loss": 0.0655, "step": 21540 }, { "epoch": 8.37, "learning_rate": 1.888414239482201e-05, "loss": 0.1256, "step": 21550 }, { "epoch": 8.37, "learning_rate": 1.8883624595469257e-05, "loss": 0.2023, "step": 21560 }, { "epoch": 8.38, "learning_rate": 1.8883106796116504e-05, "loss": 0.1501, "step": 21570 }, { "epoch": 8.38, "learning_rate": 1.8882588996763756e-05, "loss": 0.0956, "step": 21580 }, { "epoch": 8.38, "learning_rate": 1.8882071197411004e-05, "loss": 0.0981, "step": 21590 }, { "epoch": 8.39, "learning_rate": 1.8881553398058255e-05, "loss": 0.0134, "step": 21600 }, { "epoch": 8.39, "learning_rate": 1.8881035598705503e-05, "loss": 0.2811, "step": 21610 }, { "epoch": 8.4, "learning_rate": 1.8880517799352754e-05, "loss": 0.1102, "step": 21620 }, { "epoch": 8.4, "learning_rate": 1.8880000000000002e-05, "loss": 0.0439, "step": 21630 }, { "epoch": 8.4, "learning_rate": 1.887948220064725e-05, "loss": 0.2291, "step": 21640 }, { "epoch": 8.41, "learning_rate": 1.8878964401294498e-05, "loss": 0.1487, "step": 21650 }, { "epoch": 8.41, "learning_rate": 1.887844660194175e-05, "loss": 0.2221, "step": 21660 }, { "epoch": 8.42, "learning_rate": 1.8877928802588997e-05, "loss": 0.1701, "step": 21670 }, { "epoch": 8.42, "learning_rate": 1.887741100323625e-05, "loss": 0.1539, "step": 21680 }, { "epoch": 8.42, "learning_rate": 1.8876893203883497e-05, "loss": 0.2316, "step": 21690 }, { "epoch": 8.43, "learning_rate": 1.8876375404530748e-05, "loss": 0.3173, "step": 21700 }, { "epoch": 8.43, "learning_rate": 1.8875857605177992e-05, "loss": 0.1826, "step": 21710 }, { "epoch": 8.43, "learning_rate": 1.8875339805825244e-05, "loss": 0.1129, "step": 21720 }, { "epoch": 8.44, "learning_rate": 1.887482200647249e-05, "loss": 0.2427, "step": 21730 }, { "epoch": 8.44, "learning_rate": 1.8874304207119743e-05, "loss": 0.1366, "step": 21740 }, { "epoch": 8.45, "learning_rate": 1.887378640776699e-05, "loss": 0.0836, "step": 21750 }, { "epoch": 8.45, "learning_rate": 1.8873268608414242e-05, "loss": 0.1552, "step": 21760 }, { "epoch": 8.45, "learning_rate": 1.887275080906149e-05, "loss": 0.1704, "step": 21770 }, { "epoch": 8.46, "learning_rate": 1.887223300970874e-05, "loss": 0.1901, "step": 21780 }, { "epoch": 8.46, "learning_rate": 1.8871715210355986e-05, "loss": 0.1486, "step": 21790 }, { "epoch": 8.47, "learning_rate": 1.8871197411003237e-05, "loss": 0.1737, "step": 21800 }, { "epoch": 8.47, "learning_rate": 1.8870679611650485e-05, "loss": 0.1333, "step": 21810 }, { "epoch": 8.47, "learning_rate": 1.8870161812297736e-05, "loss": 0.2282, "step": 21820 }, { "epoch": 8.48, "learning_rate": 1.8869644012944984e-05, "loss": 0.1288, "step": 21830 }, { "epoch": 8.48, "learning_rate": 1.8869126213592236e-05, "loss": 0.0718, "step": 21840 }, { "epoch": 8.49, "learning_rate": 1.8868608414239484e-05, "loss": 0.1946, "step": 21850 }, { "epoch": 8.49, "learning_rate": 1.8868090614886735e-05, "loss": 0.2471, "step": 21860 }, { "epoch": 8.49, "learning_rate": 1.8867572815533983e-05, "loss": 0.1732, "step": 21870 }, { "epoch": 8.5, "learning_rate": 1.886705501618123e-05, "loss": 0.045, "step": 21880 }, { "epoch": 8.5, "learning_rate": 1.886653721682848e-05, "loss": 0.2658, "step": 21890 }, { "epoch": 8.5, "learning_rate": 1.886601941747573e-05, "loss": 0.007, "step": 21900 }, { "epoch": 8.51, "learning_rate": 1.8865501618122978e-05, "loss": 0.1167, "step": 21910 }, { "epoch": 8.51, "learning_rate": 1.886498381877023e-05, "loss": 0.2181, "step": 21920 }, { "epoch": 8.52, "learning_rate": 1.8864466019417477e-05, "loss": 0.3278, "step": 21930 }, { "epoch": 8.52, "learning_rate": 1.886394822006473e-05, "loss": 0.0442, "step": 21940 }, { "epoch": 8.52, "learning_rate": 1.8863430420711976e-05, "loss": 0.2037, "step": 21950 }, { "epoch": 8.53, "learning_rate": 1.8862912621359224e-05, "loss": 0.1656, "step": 21960 }, { "epoch": 8.53, "learning_rate": 1.8862394822006472e-05, "loss": 0.2284, "step": 21970 }, { "epoch": 8.54, "learning_rate": 1.8861877022653724e-05, "loss": 0.1784, "step": 21980 }, { "epoch": 8.54, "learning_rate": 1.886135922330097e-05, "loss": 0.1356, "step": 21990 }, { "epoch": 8.54, "learning_rate": 1.8860841423948223e-05, "loss": 0.1266, "step": 22000 }, { "epoch": 8.55, "learning_rate": 1.886032362459547e-05, "loss": 0.1452, "step": 22010 }, { "epoch": 8.55, "learning_rate": 1.8859805825242722e-05, "loss": 0.1829, "step": 22020 }, { "epoch": 8.56, "learning_rate": 1.885928802588997e-05, "loss": 0.0303, "step": 22030 }, { "epoch": 8.56, "learning_rate": 1.8858770226537218e-05, "loss": 0.342, "step": 22040 }, { "epoch": 8.56, "learning_rate": 1.8858252427184466e-05, "loss": 0.2479, "step": 22050 }, { "epoch": 8.57, "learning_rate": 1.8857734627831717e-05, "loss": 0.2139, "step": 22060 }, { "epoch": 8.57, "learning_rate": 1.8857216828478965e-05, "loss": 0.2386, "step": 22070 }, { "epoch": 8.57, "learning_rate": 1.8856699029126216e-05, "loss": 0.1465, "step": 22080 }, { "epoch": 8.58, "learning_rate": 1.8856181229773464e-05, "loss": 0.2001, "step": 22090 }, { "epoch": 8.58, "learning_rate": 1.8855663430420716e-05, "loss": 0.1362, "step": 22100 }, { "epoch": 8.59, "learning_rate": 1.8855145631067964e-05, "loss": 0.0865, "step": 22110 }, { "epoch": 8.59, "learning_rate": 1.885462783171521e-05, "loss": 0.1359, "step": 22120 }, { "epoch": 8.59, "learning_rate": 1.885411003236246e-05, "loss": 0.1911, "step": 22130 }, { "epoch": 8.6, "learning_rate": 1.885359223300971e-05, "loss": 0.1852, "step": 22140 }, { "epoch": 8.6, "learning_rate": 1.885307443365696e-05, "loss": 0.2367, "step": 22150 }, { "epoch": 8.61, "learning_rate": 1.885255663430421e-05, "loss": 0.1994, "step": 22160 }, { "epoch": 8.61, "learning_rate": 1.8852038834951458e-05, "loss": 0.2431, "step": 22170 }, { "epoch": 8.61, "learning_rate": 1.885152103559871e-05, "loss": 0.1813, "step": 22180 }, { "epoch": 8.62, "learning_rate": 1.8851003236245957e-05, "loss": 0.2346, "step": 22190 }, { "epoch": 8.62, "learning_rate": 1.8850485436893205e-05, "loss": 0.1245, "step": 22200 }, { "epoch": 8.63, "learning_rate": 1.8849967637540453e-05, "loss": 0.1945, "step": 22210 }, { "epoch": 8.63, "learning_rate": 1.8849449838187704e-05, "loss": 0.4594, "step": 22220 }, { "epoch": 8.63, "learning_rate": 1.8848932038834952e-05, "loss": 0.1934, "step": 22230 }, { "epoch": 8.64, "learning_rate": 1.8848414239482203e-05, "loss": 0.1258, "step": 22240 }, { "epoch": 8.64, "learning_rate": 1.884789644012945e-05, "loss": 0.1497, "step": 22250 }, { "epoch": 8.64, "learning_rate": 1.8847378640776703e-05, "loss": 0.0801, "step": 22260 }, { "epoch": 8.65, "learning_rate": 1.884686084142395e-05, "loss": 0.0904, "step": 22270 }, { "epoch": 8.65, "learning_rate": 1.88463430420712e-05, "loss": 0.186, "step": 22280 }, { "epoch": 8.66, "learning_rate": 1.8845825242718446e-05, "loss": 0.136, "step": 22290 }, { "epoch": 8.66, "learning_rate": 1.8845307443365698e-05, "loss": 0.0736, "step": 22300 }, { "epoch": 8.66, "learning_rate": 1.8844789644012946e-05, "loss": 0.1106, "step": 22310 }, { "epoch": 8.67, "learning_rate": 1.8844271844660197e-05, "loss": 0.1226, "step": 22320 }, { "epoch": 8.67, "learning_rate": 1.8843754045307445e-05, "loss": 0.1307, "step": 22330 }, { "epoch": 8.68, "learning_rate": 1.8843236245954696e-05, "loss": 0.2153, "step": 22340 }, { "epoch": 8.68, "learning_rate": 1.8842718446601944e-05, "loss": 0.2044, "step": 22350 }, { "epoch": 8.68, "learning_rate": 1.8842200647249192e-05, "loss": 0.2067, "step": 22360 }, { "epoch": 8.69, "learning_rate": 1.884168284789644e-05, "loss": 0.1372, "step": 22370 }, { "epoch": 8.69, "learning_rate": 1.884116504854369e-05, "loss": 0.1949, "step": 22380 }, { "epoch": 8.7, "learning_rate": 1.884064724919094e-05, "loss": 0.1666, "step": 22390 }, { "epoch": 8.7, "learning_rate": 1.884012944983819e-05, "loss": 0.1552, "step": 22400 }, { "epoch": 8.7, "learning_rate": 1.883961165048544e-05, "loss": 0.2195, "step": 22410 }, { "epoch": 8.71, "learning_rate": 1.883909385113269e-05, "loss": 0.1618, "step": 22420 }, { "epoch": 8.71, "learning_rate": 1.8838576051779938e-05, "loss": 0.1074, "step": 22430 }, { "epoch": 8.71, "learning_rate": 1.8838058252427186e-05, "loss": 0.1523, "step": 22440 }, { "epoch": 8.72, "learning_rate": 1.8837540453074434e-05, "loss": 0.2489, "step": 22450 }, { "epoch": 8.72, "learning_rate": 1.8837022653721685e-05, "loss": 0.0645, "step": 22460 }, { "epoch": 8.73, "learning_rate": 1.8836504854368933e-05, "loss": 0.08, "step": 22470 }, { "epoch": 8.73, "learning_rate": 1.8835987055016184e-05, "loss": 0.0803, "step": 22480 }, { "epoch": 8.73, "learning_rate": 1.8835469255663432e-05, "loss": 0.1131, "step": 22490 }, { "epoch": 8.74, "learning_rate": 1.883495145631068e-05, "loss": 0.1339, "step": 22500 }, { "epoch": 8.74, "learning_rate": 1.883443365695793e-05, "loss": 0.179, "step": 22510 }, { "epoch": 8.75, "learning_rate": 1.883391585760518e-05, "loss": 0.1558, "step": 22520 }, { "epoch": 8.75, "learning_rate": 1.8833398058252427e-05, "loss": 0.2514, "step": 22530 }, { "epoch": 8.75, "learning_rate": 1.883288025889968e-05, "loss": 0.2662, "step": 22540 }, { "epoch": 8.76, "learning_rate": 1.8832362459546926e-05, "loss": 0.0908, "step": 22550 }, { "epoch": 8.76, "learning_rate": 1.8831844660194178e-05, "loss": 0.191, "step": 22560 }, { "epoch": 8.77, "learning_rate": 1.8831326860841426e-05, "loss": 0.1862, "step": 22570 }, { "epoch": 8.77, "learning_rate": 1.8830809061488674e-05, "loss": 0.0145, "step": 22580 }, { "epoch": 8.77, "learning_rate": 1.8830291262135925e-05, "loss": 0.2788, "step": 22590 }, { "epoch": 8.78, "learning_rate": 1.8829773462783173e-05, "loss": 0.1295, "step": 22600 }, { "epoch": 8.78, "learning_rate": 1.882925566343042e-05, "loss": 0.0575, "step": 22610 }, { "epoch": 8.78, "learning_rate": 1.8828737864077672e-05, "loss": 0.0336, "step": 22620 }, { "epoch": 8.79, "learning_rate": 1.882822006472492e-05, "loss": 0.1354, "step": 22630 }, { "epoch": 8.79, "learning_rate": 1.882770226537217e-05, "loss": 0.1404, "step": 22640 }, { "epoch": 8.8, "learning_rate": 1.882718446601942e-05, "loss": 0.0881, "step": 22650 }, { "epoch": 8.8, "learning_rate": 1.8826666666666667e-05, "loss": 0.0713, "step": 22660 }, { "epoch": 8.8, "learning_rate": 1.882614886731392e-05, "loss": 0.2582, "step": 22670 }, { "epoch": 8.81, "learning_rate": 1.8825631067961166e-05, "loss": 0.2333, "step": 22680 }, { "epoch": 8.81, "learning_rate": 1.8825113268608414e-05, "loss": 0.279, "step": 22690 }, { "epoch": 8.82, "learning_rate": 1.8824595469255666e-05, "loss": 0.0331, "step": 22700 }, { "epoch": 8.82, "learning_rate": 1.8824077669902914e-05, "loss": 0.1121, "step": 22710 }, { "epoch": 8.82, "learning_rate": 1.8823559870550165e-05, "loss": 0.178, "step": 22720 }, { "epoch": 8.83, "learning_rate": 1.8823042071197413e-05, "loss": 0.0887, "step": 22730 }, { "epoch": 8.83, "learning_rate": 1.882252427184466e-05, "loss": 0.1439, "step": 22740 }, { "epoch": 8.83, "learning_rate": 1.8822006472491912e-05, "loss": 0.1999, "step": 22750 }, { "epoch": 8.84, "learning_rate": 1.882148867313916e-05, "loss": 0.1251, "step": 22760 }, { "epoch": 8.84, "learning_rate": 1.8820970873786408e-05, "loss": 0.1234, "step": 22770 }, { "epoch": 8.85, "learning_rate": 1.882045307443366e-05, "loss": 0.2392, "step": 22780 }, { "epoch": 8.85, "learning_rate": 1.8819935275080907e-05, "loss": 0.2333, "step": 22790 }, { "epoch": 8.85, "learning_rate": 1.881941747572816e-05, "loss": 0.0658, "step": 22800 }, { "epoch": 8.86, "learning_rate": 1.8818899676375406e-05, "loss": 0.2955, "step": 22810 }, { "epoch": 8.86, "learning_rate": 1.8818381877022654e-05, "loss": 0.1832, "step": 22820 }, { "epoch": 8.87, "learning_rate": 1.8817864077669906e-05, "loss": 0.1502, "step": 22830 }, { "epoch": 8.87, "learning_rate": 1.8817346278317153e-05, "loss": 0.0341, "step": 22840 }, { "epoch": 8.87, "learning_rate": 1.88168284789644e-05, "loss": 0.1151, "step": 22850 }, { "epoch": 8.88, "learning_rate": 1.8816310679611653e-05, "loss": 0.2136, "step": 22860 }, { "epoch": 8.88, "learning_rate": 1.88157928802589e-05, "loss": 0.0592, "step": 22870 }, { "epoch": 8.89, "learning_rate": 1.881527508090615e-05, "loss": 0.1838, "step": 22880 }, { "epoch": 8.89, "learning_rate": 1.88147572815534e-05, "loss": 0.2911, "step": 22890 }, { "epoch": 8.89, "learning_rate": 1.8814239482200648e-05, "loss": 0.2536, "step": 22900 }, { "epoch": 8.9, "learning_rate": 1.88137216828479e-05, "loss": 0.059, "step": 22910 }, { "epoch": 8.9, "learning_rate": 1.8813203883495147e-05, "loss": 0.0499, "step": 22920 }, { "epoch": 8.9, "learning_rate": 1.8812686084142395e-05, "loss": 0.0978, "step": 22930 }, { "epoch": 8.91, "learning_rate": 1.8812168284789646e-05, "loss": 0.1474, "step": 22940 }, { "epoch": 8.91, "learning_rate": 1.8811650485436894e-05, "loss": 0.2528, "step": 22950 }, { "epoch": 8.92, "learning_rate": 1.8811132686084142e-05, "loss": 0.2705, "step": 22960 }, { "epoch": 8.92, "learning_rate": 1.8810614886731393e-05, "loss": 0.177, "step": 22970 }, { "epoch": 8.92, "learning_rate": 1.881009708737864e-05, "loss": 0.1231, "step": 22980 }, { "epoch": 8.93, "learning_rate": 1.8809579288025893e-05, "loss": 0.3388, "step": 22990 }, { "epoch": 8.93, "learning_rate": 1.880906148867314e-05, "loss": 0.1711, "step": 23000 }, { "epoch": 8.94, "learning_rate": 1.8808543689320392e-05, "loss": 0.1108, "step": 23010 }, { "epoch": 8.94, "learning_rate": 1.880802588996764e-05, "loss": 0.0579, "step": 23020 }, { "epoch": 8.94, "learning_rate": 1.8807508090614888e-05, "loss": 0.1931, "step": 23030 }, { "epoch": 8.95, "learning_rate": 1.8806990291262136e-05, "loss": 0.2855, "step": 23040 }, { "epoch": 8.95, "learning_rate": 1.8806472491909387e-05, "loss": 0.1447, "step": 23050 }, { "epoch": 8.96, "learning_rate": 1.8805954692556635e-05, "loss": 0.4596, "step": 23060 }, { "epoch": 8.96, "learning_rate": 1.8805436893203886e-05, "loss": 0.1517, "step": 23070 }, { "epoch": 8.96, "learning_rate": 1.8804919093851134e-05, "loss": 0.0564, "step": 23080 }, { "epoch": 8.97, "learning_rate": 1.8804401294498385e-05, "loss": 0.0409, "step": 23090 }, { "epoch": 8.97, "learning_rate": 1.8803883495145633e-05, "loss": 0.1732, "step": 23100 }, { "epoch": 8.97, "learning_rate": 1.880336569579288e-05, "loss": 0.1264, "step": 23110 }, { "epoch": 8.98, "learning_rate": 1.880284789644013e-05, "loss": 0.0731, "step": 23120 }, { "epoch": 8.98, "learning_rate": 1.880233009708738e-05, "loss": 0.0486, "step": 23130 }, { "epoch": 8.99, "learning_rate": 1.880181229773463e-05, "loss": 0.2034, "step": 23140 }, { "epoch": 8.99, "learning_rate": 1.880129449838188e-05, "loss": 0.2087, "step": 23150 }, { "epoch": 8.99, "learning_rate": 1.8800776699029128e-05, "loss": 0.0761, "step": 23160 }, { "epoch": 9.0, "learning_rate": 1.880025889967638e-05, "loss": 0.2658, "step": 23170 }, { "epoch": 9.0, "eval_accuracy": 0.951031636863824, "eval_loss": 0.20891419053077698, "eval_runtime": 8.1732, "eval_samples_per_second": 444.745, "eval_steps_per_second": 55.67, "step": 23175 }, { "epoch": 9.0, "learning_rate": 1.8799741100323624e-05, "loss": 0.1638, "step": 23180 }, { "epoch": 9.01, "learning_rate": 1.8799223300970875e-05, "loss": 0.1532, "step": 23190 }, { "epoch": 9.01, "learning_rate": 1.8798705501618123e-05, "loss": 0.1557, "step": 23200 }, { "epoch": 9.01, "learning_rate": 1.8798187702265374e-05, "loss": 0.0958, "step": 23210 }, { "epoch": 9.02, "learning_rate": 1.8797669902912622e-05, "loss": 0.1796, "step": 23220 }, { "epoch": 9.02, "learning_rate": 1.8797152103559873e-05, "loss": 0.1515, "step": 23230 }, { "epoch": 9.03, "learning_rate": 1.879663430420712e-05, "loss": 0.1639, "step": 23240 }, { "epoch": 9.03, "learning_rate": 1.8796116504854373e-05, "loss": 0.2391, "step": 23250 }, { "epoch": 9.03, "learning_rate": 1.8795598705501617e-05, "loss": 0.1436, "step": 23260 }, { "epoch": 9.04, "learning_rate": 1.879508090614887e-05, "loss": 0.1783, "step": 23270 }, { "epoch": 9.04, "learning_rate": 1.8794563106796116e-05, "loss": 0.1956, "step": 23280 }, { "epoch": 9.04, "learning_rate": 1.8794045307443368e-05, "loss": 0.3148, "step": 23290 }, { "epoch": 9.05, "learning_rate": 1.8793527508090616e-05, "loss": 0.0489, "step": 23300 }, { "epoch": 9.05, "learning_rate": 1.8793009708737867e-05, "loss": 0.1087, "step": 23310 }, { "epoch": 9.06, "learning_rate": 1.8792491909385115e-05, "loss": 0.1003, "step": 23320 }, { "epoch": 9.06, "learning_rate": 1.8791974110032366e-05, "loss": 0.0212, "step": 23330 }, { "epoch": 9.06, "learning_rate": 1.879145631067961e-05, "loss": 0.2206, "step": 23340 }, { "epoch": 9.07, "learning_rate": 1.8790938511326862e-05, "loss": 0.1293, "step": 23350 }, { "epoch": 9.07, "learning_rate": 1.879042071197411e-05, "loss": 0.0781, "step": 23360 }, { "epoch": 9.08, "learning_rate": 1.878990291262136e-05, "loss": 0.1474, "step": 23370 }, { "epoch": 9.08, "learning_rate": 1.878938511326861e-05, "loss": 0.1563, "step": 23380 }, { "epoch": 9.08, "learning_rate": 1.878886731391586e-05, "loss": 0.0669, "step": 23390 }, { "epoch": 9.09, "learning_rate": 1.878834951456311e-05, "loss": 0.1168, "step": 23400 }, { "epoch": 9.09, "learning_rate": 1.878783171521036e-05, "loss": 0.3145, "step": 23410 }, { "epoch": 9.1, "learning_rate": 1.8787313915857604e-05, "loss": 0.4579, "step": 23420 }, { "epoch": 9.1, "learning_rate": 1.8786796116504856e-05, "loss": 0.1775, "step": 23430 }, { "epoch": 9.1, "learning_rate": 1.8786278317152103e-05, "loss": 0.042, "step": 23440 }, { "epoch": 9.11, "learning_rate": 1.8785760517799355e-05, "loss": 0.1732, "step": 23450 }, { "epoch": 9.11, "learning_rate": 1.8785242718446603e-05, "loss": 0.1292, "step": 23460 }, { "epoch": 9.11, "learning_rate": 1.8784724919093854e-05, "loss": 0.1745, "step": 23470 }, { "epoch": 9.12, "learning_rate": 1.8784207119741102e-05, "loss": 0.1205, "step": 23480 }, { "epoch": 9.12, "learning_rate": 1.8783689320388353e-05, "loss": 0.1973, "step": 23490 }, { "epoch": 9.13, "learning_rate": 1.8783171521035598e-05, "loss": 0.174, "step": 23500 }, { "epoch": 9.13, "learning_rate": 1.878265372168285e-05, "loss": 0.1793, "step": 23510 }, { "epoch": 9.13, "learning_rate": 1.8782135922330097e-05, "loss": 0.0831, "step": 23520 }, { "epoch": 9.14, "learning_rate": 1.878161812297735e-05, "loss": 0.16, "step": 23530 }, { "epoch": 9.14, "learning_rate": 1.8781100323624596e-05, "loss": 0.2863, "step": 23540 }, { "epoch": 9.15, "learning_rate": 1.8780582524271848e-05, "loss": 0.1604, "step": 23550 }, { "epoch": 9.15, "learning_rate": 1.8780064724919095e-05, "loss": 0.1071, "step": 23560 }, { "epoch": 9.15, "learning_rate": 1.8779546925566347e-05, "loss": 0.2428, "step": 23570 }, { "epoch": 9.16, "learning_rate": 1.8779029126213595e-05, "loss": 0.2421, "step": 23580 }, { "epoch": 9.16, "learning_rate": 1.8778511326860843e-05, "loss": 0.2965, "step": 23590 }, { "epoch": 9.17, "learning_rate": 1.877799352750809e-05, "loss": 0.2266, "step": 23600 }, { "epoch": 9.17, "learning_rate": 1.8777475728155342e-05, "loss": 0.1337, "step": 23610 }, { "epoch": 9.17, "learning_rate": 1.877695792880259e-05, "loss": 0.0963, "step": 23620 }, { "epoch": 9.18, "learning_rate": 1.877644012944984e-05, "loss": 0.1094, "step": 23630 }, { "epoch": 9.18, "learning_rate": 1.877592233009709e-05, "loss": 0.24, "step": 23640 }, { "epoch": 9.18, "learning_rate": 1.877540453074434e-05, "loss": 0.1363, "step": 23650 }, { "epoch": 9.19, "learning_rate": 1.8774886731391588e-05, "loss": 0.1944, "step": 23660 }, { "epoch": 9.19, "learning_rate": 1.8774368932038836e-05, "loss": 0.207, "step": 23670 }, { "epoch": 9.2, "learning_rate": 1.8773851132686084e-05, "loss": 0.0788, "step": 23680 }, { "epoch": 9.2, "learning_rate": 1.8773333333333335e-05, "loss": 0.0214, "step": 23690 }, { "epoch": 9.2, "learning_rate": 1.8772815533980583e-05, "loss": 0.0217, "step": 23700 }, { "epoch": 9.21, "learning_rate": 1.8772297734627835e-05, "loss": 0.1607, "step": 23710 }, { "epoch": 9.21, "learning_rate": 1.8771779935275083e-05, "loss": 0.1618, "step": 23720 }, { "epoch": 9.22, "learning_rate": 1.8771262135922334e-05, "loss": 0.1063, "step": 23730 }, { "epoch": 9.22, "learning_rate": 1.8770744336569582e-05, "loss": 0.1788, "step": 23740 }, { "epoch": 9.22, "learning_rate": 1.877022653721683e-05, "loss": 0.1484, "step": 23750 }, { "epoch": 9.23, "learning_rate": 1.8769708737864078e-05, "loss": 0.28, "step": 23760 }, { "epoch": 9.23, "learning_rate": 1.876919093851133e-05, "loss": 0.29, "step": 23770 }, { "epoch": 9.23, "learning_rate": 1.8768673139158577e-05, "loss": 0.1091, "step": 23780 }, { "epoch": 9.24, "learning_rate": 1.8768155339805828e-05, "loss": 0.1332, "step": 23790 }, { "epoch": 9.24, "learning_rate": 1.8767637540453076e-05, "loss": 0.0741, "step": 23800 }, { "epoch": 9.25, "learning_rate": 1.8767119741100327e-05, "loss": 0.0882, "step": 23810 }, { "epoch": 9.25, "learning_rate": 1.8766601941747575e-05, "loss": 0.0816, "step": 23820 }, { "epoch": 9.25, "learning_rate": 1.8766084142394823e-05, "loss": 0.1641, "step": 23830 }, { "epoch": 9.26, "learning_rate": 1.876556634304207e-05, "loss": 0.1122, "step": 23840 }, { "epoch": 9.26, "learning_rate": 1.8765048543689323e-05, "loss": 0.3096, "step": 23850 }, { "epoch": 9.27, "learning_rate": 1.876453074433657e-05, "loss": 0.0827, "step": 23860 }, { "epoch": 9.27, "learning_rate": 1.8764012944983822e-05, "loss": 0.0879, "step": 23870 }, { "epoch": 9.27, "learning_rate": 1.876349514563107e-05, "loss": 0.0866, "step": 23880 }, { "epoch": 9.28, "learning_rate": 1.876297734627832e-05, "loss": 0.1006, "step": 23890 }, { "epoch": 9.28, "learning_rate": 1.876245954692557e-05, "loss": 0.3453, "step": 23900 }, { "epoch": 9.29, "learning_rate": 1.8761941747572817e-05, "loss": 0.0474, "step": 23910 }, { "epoch": 9.29, "learning_rate": 1.8761423948220065e-05, "loss": 0.2194, "step": 23920 }, { "epoch": 9.29, "learning_rate": 1.8760906148867316e-05, "loss": 0.097, "step": 23930 }, { "epoch": 9.3, "learning_rate": 1.8760388349514564e-05, "loss": 0.1382, "step": 23940 }, { "epoch": 9.3, "learning_rate": 1.8759870550161815e-05, "loss": 0.3141, "step": 23950 }, { "epoch": 9.3, "learning_rate": 1.8759352750809063e-05, "loss": 0.3154, "step": 23960 }, { "epoch": 9.31, "learning_rate": 1.875883495145631e-05, "loss": 0.168, "step": 23970 }, { "epoch": 9.31, "learning_rate": 1.8758317152103562e-05, "loss": 0.4143, "step": 23980 }, { "epoch": 9.32, "learning_rate": 1.875779935275081e-05, "loss": 0.0658, "step": 23990 }, { "epoch": 9.32, "learning_rate": 1.875728155339806e-05, "loss": 0.0296, "step": 24000 }, { "epoch": 9.32, "learning_rate": 1.875676375404531e-05, "loss": 0.0106, "step": 24010 }, { "epoch": 9.33, "learning_rate": 1.8756245954692558e-05, "loss": 0.161, "step": 24020 }, { "epoch": 9.33, "learning_rate": 1.875572815533981e-05, "loss": 0.1689, "step": 24030 }, { "epoch": 9.34, "learning_rate": 1.8755210355987057e-05, "loss": 0.2394, "step": 24040 }, { "epoch": 9.34, "learning_rate": 1.8754692556634305e-05, "loss": 0.2823, "step": 24050 }, { "epoch": 9.34, "learning_rate": 1.8754174757281556e-05, "loss": 0.207, "step": 24060 }, { "epoch": 9.35, "learning_rate": 1.8753656957928804e-05, "loss": 0.0422, "step": 24070 }, { "epoch": 9.35, "learning_rate": 1.8753139158576052e-05, "loss": 0.0894, "step": 24080 }, { "epoch": 9.36, "learning_rate": 1.8752621359223303e-05, "loss": 0.2712, "step": 24090 }, { "epoch": 9.36, "learning_rate": 1.875210355987055e-05, "loss": 0.0805, "step": 24100 }, { "epoch": 9.36, "learning_rate": 1.8751585760517802e-05, "loss": 0.2002, "step": 24110 }, { "epoch": 9.37, "learning_rate": 1.875106796116505e-05, "loss": 0.1737, "step": 24120 }, { "epoch": 9.37, "learning_rate": 1.8750550161812298e-05, "loss": 0.2772, "step": 24130 }, { "epoch": 9.37, "learning_rate": 1.875003236245955e-05, "loss": 0.1467, "step": 24140 }, { "epoch": 9.38, "learning_rate": 1.8749514563106798e-05, "loss": 0.2014, "step": 24150 }, { "epoch": 9.38, "learning_rate": 1.8748996763754045e-05, "loss": 0.0934, "step": 24160 }, { "epoch": 9.39, "learning_rate": 1.8748478964401297e-05, "loss": 0.1686, "step": 24170 }, { "epoch": 9.39, "learning_rate": 1.8747961165048545e-05, "loss": 0.0807, "step": 24180 }, { "epoch": 9.39, "learning_rate": 1.8747443365695796e-05, "loss": 0.1102, "step": 24190 }, { "epoch": 9.4, "learning_rate": 1.8746925566343044e-05, "loss": 0.3011, "step": 24200 }, { "epoch": 9.4, "learning_rate": 1.8746407766990292e-05, "loss": 0.1743, "step": 24210 }, { "epoch": 9.41, "learning_rate": 1.8745889967637543e-05, "loss": 0.1716, "step": 24220 }, { "epoch": 9.41, "learning_rate": 1.874537216828479e-05, "loss": 0.2234, "step": 24230 }, { "epoch": 9.41, "learning_rate": 1.874485436893204e-05, "loss": 0.1947, "step": 24240 }, { "epoch": 9.42, "learning_rate": 1.874433656957929e-05, "loss": 0.0559, "step": 24250 }, { "epoch": 9.42, "learning_rate": 1.8743818770226538e-05, "loss": 0.2059, "step": 24260 }, { "epoch": 9.43, "learning_rate": 1.874330097087379e-05, "loss": 0.0878, "step": 24270 }, { "epoch": 9.43, "learning_rate": 1.8742783171521037e-05, "loss": 0.1335, "step": 24280 }, { "epoch": 9.43, "learning_rate": 1.8742265372168285e-05, "loss": 0.0188, "step": 24290 }, { "epoch": 9.44, "learning_rate": 1.8741747572815537e-05, "loss": 0.1089, "step": 24300 }, { "epoch": 9.44, "learning_rate": 1.8741229773462785e-05, "loss": 0.1117, "step": 24310 }, { "epoch": 9.44, "learning_rate": 1.8740711974110033e-05, "loss": 0.1254, "step": 24320 }, { "epoch": 9.45, "learning_rate": 1.8740194174757284e-05, "loss": 0.2168, "step": 24330 }, { "epoch": 9.45, "learning_rate": 1.8739676375404532e-05, "loss": 0.2825, "step": 24340 }, { "epoch": 9.46, "learning_rate": 1.873915857605178e-05, "loss": 0.1318, "step": 24350 }, { "epoch": 9.46, "learning_rate": 1.873864077669903e-05, "loss": 0.1697, "step": 24360 }, { "epoch": 9.46, "learning_rate": 1.873812297734628e-05, "loss": 0.044, "step": 24370 }, { "epoch": 9.47, "learning_rate": 1.873760517799353e-05, "loss": 0.093, "step": 24380 }, { "epoch": 9.47, "learning_rate": 1.8737087378640778e-05, "loss": 0.2223, "step": 24390 }, { "epoch": 9.48, "learning_rate": 1.8736569579288026e-05, "loss": 0.1688, "step": 24400 }, { "epoch": 9.48, "learning_rate": 1.8736051779935277e-05, "loss": 0.1534, "step": 24410 }, { "epoch": 9.48, "learning_rate": 1.8735533980582525e-05, "loss": 0.3042, "step": 24420 }, { "epoch": 9.49, "learning_rate": 1.8735016181229773e-05, "loss": 0.117, "step": 24430 }, { "epoch": 9.49, "learning_rate": 1.8734498381877025e-05, "loss": 0.3192, "step": 24440 }, { "epoch": 9.5, "learning_rate": 1.8733980582524273e-05, "loss": 0.1599, "step": 24450 }, { "epoch": 9.5, "learning_rate": 1.8733462783171524e-05, "loss": 0.1845, "step": 24460 }, { "epoch": 9.5, "learning_rate": 1.8732944983818772e-05, "loss": 0.3663, "step": 24470 }, { "epoch": 9.51, "learning_rate": 1.873242718446602e-05, "loss": 0.1378, "step": 24480 }, { "epoch": 9.51, "learning_rate": 1.873190938511327e-05, "loss": 0.1054, "step": 24490 }, { "epoch": 9.51, "learning_rate": 1.873139158576052e-05, "loss": 0.2037, "step": 24500 }, { "epoch": 9.52, "learning_rate": 1.8730873786407767e-05, "loss": 0.1062, "step": 24510 }, { "epoch": 9.52, "learning_rate": 1.8730355987055018e-05, "loss": 0.1122, "step": 24520 }, { "epoch": 9.53, "learning_rate": 1.8729838187702266e-05, "loss": 0.2145, "step": 24530 }, { "epoch": 9.53, "learning_rate": 1.8729320388349517e-05, "loss": 0.1264, "step": 24540 }, { "epoch": 9.53, "learning_rate": 1.8728802588996765e-05, "loss": 0.1689, "step": 24550 }, { "epoch": 9.54, "learning_rate": 1.8728284789644013e-05, "loss": 0.524, "step": 24560 }, { "epoch": 9.54, "learning_rate": 1.8727766990291265e-05, "loss": 0.0244, "step": 24570 }, { "epoch": 9.55, "learning_rate": 1.8727249190938512e-05, "loss": 0.1799, "step": 24580 }, { "epoch": 9.55, "learning_rate": 1.872673139158576e-05, "loss": 0.2759, "step": 24590 }, { "epoch": 9.55, "learning_rate": 1.8726213592233012e-05, "loss": 0.1937, "step": 24600 }, { "epoch": 9.56, "learning_rate": 1.872569579288026e-05, "loss": 0.146, "step": 24610 }, { "epoch": 9.56, "learning_rate": 1.872517799352751e-05, "loss": 0.1948, "step": 24620 }, { "epoch": 9.57, "learning_rate": 1.872466019417476e-05, "loss": 0.2984, "step": 24630 }, { "epoch": 9.57, "learning_rate": 1.8724142394822007e-05, "loss": 0.0892, "step": 24640 }, { "epoch": 9.57, "learning_rate": 1.8723624595469255e-05, "loss": 0.0497, "step": 24650 }, { "epoch": 9.58, "learning_rate": 1.8723106796116506e-05, "loss": 0.3148, "step": 24660 }, { "epoch": 9.58, "learning_rate": 1.8722588996763754e-05, "loss": 0.1518, "step": 24670 }, { "epoch": 9.58, "learning_rate": 1.8722071197411005e-05, "loss": 0.0815, "step": 24680 }, { "epoch": 9.59, "learning_rate": 1.8721553398058253e-05, "loss": 0.1222, "step": 24690 }, { "epoch": 9.59, "learning_rate": 1.8721035598705504e-05, "loss": 0.0833, "step": 24700 }, { "epoch": 9.6, "learning_rate": 1.8720517799352752e-05, "loss": 0.0584, "step": 24710 }, { "epoch": 9.6, "learning_rate": 1.8720000000000004e-05, "loss": 0.1356, "step": 24720 }, { "epoch": 9.6, "learning_rate": 1.8719482200647248e-05, "loss": 0.1552, "step": 24730 }, { "epoch": 9.61, "learning_rate": 1.87189644012945e-05, "loss": 0.0614, "step": 24740 }, { "epoch": 9.61, "learning_rate": 1.8718446601941747e-05, "loss": 0.1161, "step": 24750 }, { "epoch": 9.62, "learning_rate": 1.8717928802589e-05, "loss": 0.1495, "step": 24760 }, { "epoch": 9.62, "learning_rate": 1.8717411003236247e-05, "loss": 0.1536, "step": 24770 }, { "epoch": 9.62, "learning_rate": 1.8716893203883498e-05, "loss": 0.2008, "step": 24780 }, { "epoch": 9.63, "learning_rate": 1.8716375404530746e-05, "loss": 0.066, "step": 24790 }, { "epoch": 9.63, "learning_rate": 1.8715857605177997e-05, "loss": 0.1124, "step": 24800 }, { "epoch": 9.63, "learning_rate": 1.8715339805825242e-05, "loss": 0.383, "step": 24810 }, { "epoch": 9.64, "learning_rate": 1.8714822006472493e-05, "loss": 0.2125, "step": 24820 }, { "epoch": 9.64, "learning_rate": 1.871430420711974e-05, "loss": 0.2071, "step": 24830 }, { "epoch": 9.65, "learning_rate": 1.8713786407766992e-05, "loss": 0.1297, "step": 24840 }, { "epoch": 9.65, "learning_rate": 1.871326860841424e-05, "loss": 0.1529, "step": 24850 }, { "epoch": 9.65, "learning_rate": 1.871275080906149e-05, "loss": 0.247, "step": 24860 }, { "epoch": 9.66, "learning_rate": 1.871223300970874e-05, "loss": 0.2264, "step": 24870 }, { "epoch": 9.66, "learning_rate": 1.871171521035599e-05, "loss": 0.0495, "step": 24880 }, { "epoch": 9.67, "learning_rate": 1.8711197411003235e-05, "loss": 0.0692, "step": 24890 }, { "epoch": 9.67, "learning_rate": 1.8710679611650487e-05, "loss": 0.0613, "step": 24900 }, { "epoch": 9.67, "learning_rate": 1.8710161812297735e-05, "loss": 0.0759, "step": 24910 }, { "epoch": 9.68, "learning_rate": 1.8709644012944986e-05, "loss": 0.2253, "step": 24920 }, { "epoch": 9.68, "learning_rate": 1.8709126213592234e-05, "loss": 0.1108, "step": 24930 }, { "epoch": 9.69, "learning_rate": 1.8708608414239485e-05, "loss": 0.296, "step": 24940 }, { "epoch": 9.69, "learning_rate": 1.8708090614886733e-05, "loss": 0.1144, "step": 24950 }, { "epoch": 9.69, "learning_rate": 1.8707572815533984e-05, "loss": 0.2958, "step": 24960 }, { "epoch": 9.7, "learning_rate": 1.870705501618123e-05, "loss": 0.0722, "step": 24970 }, { "epoch": 9.7, "learning_rate": 1.870653721682848e-05, "loss": 0.1813, "step": 24980 }, { "epoch": 9.7, "learning_rate": 1.8706019417475728e-05, "loss": 0.1417, "step": 24990 }, { "epoch": 9.71, "learning_rate": 1.870550161812298e-05, "loss": 0.0523, "step": 25000 }, { "epoch": 9.71, "learning_rate": 1.8704983818770227e-05, "loss": 0.1182, "step": 25010 }, { "epoch": 9.72, "learning_rate": 1.870446601941748e-05, "loss": 0.2804, "step": 25020 }, { "epoch": 9.72, "learning_rate": 1.8703948220064727e-05, "loss": 0.1601, "step": 25030 }, { "epoch": 9.72, "learning_rate": 1.8703430420711978e-05, "loss": 0.1039, "step": 25040 }, { "epoch": 9.73, "learning_rate": 1.8702912621359222e-05, "loss": 0.1146, "step": 25050 }, { "epoch": 9.73, "learning_rate": 1.8702394822006474e-05, "loss": 0.4542, "step": 25060 }, { "epoch": 9.74, "learning_rate": 1.8701877022653722e-05, "loss": 0.1719, "step": 25070 }, { "epoch": 9.74, "learning_rate": 1.8701359223300973e-05, "loss": 0.0769, "step": 25080 }, { "epoch": 9.74, "learning_rate": 1.870084142394822e-05, "loss": 0.1786, "step": 25090 }, { "epoch": 9.75, "learning_rate": 1.8700323624595472e-05, "loss": 0.2288, "step": 25100 }, { "epoch": 9.75, "learning_rate": 1.869980582524272e-05, "loss": 0.1279, "step": 25110 }, { "epoch": 9.76, "learning_rate": 1.869928802588997e-05, "loss": 0.1431, "step": 25120 }, { "epoch": 9.76, "learning_rate": 1.8698770226537216e-05, "loss": 0.0392, "step": 25130 }, { "epoch": 9.76, "learning_rate": 1.8698252427184467e-05, "loss": 0.3541, "step": 25140 }, { "epoch": 9.77, "learning_rate": 1.8697734627831715e-05, "loss": 0.2483, "step": 25150 }, { "epoch": 9.77, "learning_rate": 1.8697216828478967e-05, "loss": 0.119, "step": 25160 }, { "epoch": 9.77, "learning_rate": 1.8696699029126215e-05, "loss": 0.0419, "step": 25170 }, { "epoch": 9.78, "learning_rate": 1.8696181229773466e-05, "loss": 0.2045, "step": 25180 }, { "epoch": 9.78, "learning_rate": 1.8695663430420714e-05, "loss": 0.1985, "step": 25190 }, { "epoch": 9.79, "learning_rate": 1.8695145631067965e-05, "loss": 0.0111, "step": 25200 }, { "epoch": 9.79, "learning_rate": 1.869462783171521e-05, "loss": 0.1265, "step": 25210 }, { "epoch": 9.79, "learning_rate": 1.869411003236246e-05, "loss": 0.1387, "step": 25220 }, { "epoch": 9.8, "learning_rate": 1.869359223300971e-05, "loss": 0.1604, "step": 25230 }, { "epoch": 9.8, "learning_rate": 1.869307443365696e-05, "loss": 0.1711, "step": 25240 }, { "epoch": 9.81, "learning_rate": 1.8692556634304208e-05, "loss": 0.1361, "step": 25250 }, { "epoch": 9.81, "learning_rate": 1.869203883495146e-05, "loss": 0.2162, "step": 25260 }, { "epoch": 9.81, "learning_rate": 1.8691521035598707e-05, "loss": 0.161, "step": 25270 }, { "epoch": 9.82, "learning_rate": 1.869100323624596e-05, "loss": 0.0384, "step": 25280 }, { "epoch": 9.82, "learning_rate": 1.8690485436893207e-05, "loss": 0.2032, "step": 25290 }, { "epoch": 9.83, "learning_rate": 1.8689967637540454e-05, "loss": 0.1772, "step": 25300 }, { "epoch": 9.83, "learning_rate": 1.8689449838187702e-05, "loss": 0.0793, "step": 25310 }, { "epoch": 9.83, "learning_rate": 1.8688932038834954e-05, "loss": 0.1546, "step": 25320 }, { "epoch": 9.84, "learning_rate": 1.86884142394822e-05, "loss": 0.1464, "step": 25330 }, { "epoch": 9.84, "learning_rate": 1.8687896440129453e-05, "loss": 0.2053, "step": 25340 }, { "epoch": 9.84, "learning_rate": 1.86873786407767e-05, "loss": 0.0882, "step": 25350 }, { "epoch": 9.85, "learning_rate": 1.8686860841423952e-05, "loss": 0.1026, "step": 25360 }, { "epoch": 9.85, "learning_rate": 1.86863430420712e-05, "loss": 0.1821, "step": 25370 }, { "epoch": 9.86, "learning_rate": 1.8685825242718448e-05, "loss": 0.1298, "step": 25380 }, { "epoch": 9.86, "learning_rate": 1.8685307443365696e-05, "loss": 0.0835, "step": 25390 }, { "epoch": 9.86, "learning_rate": 1.8684789644012947e-05, "loss": 0.1794, "step": 25400 }, { "epoch": 9.87, "learning_rate": 1.8684271844660195e-05, "loss": 0.1723, "step": 25410 }, { "epoch": 9.87, "learning_rate": 1.8683754045307446e-05, "loss": 0.1283, "step": 25420 }, { "epoch": 9.88, "learning_rate": 1.8683236245954694e-05, "loss": 0.0295, "step": 25430 }, { "epoch": 9.88, "learning_rate": 1.8682718446601942e-05, "loss": 0.0658, "step": 25440 }, { "epoch": 9.88, "learning_rate": 1.8682200647249194e-05, "loss": 0.1019, "step": 25450 }, { "epoch": 9.89, "learning_rate": 1.868168284789644e-05, "loss": 0.0565, "step": 25460 }, { "epoch": 9.89, "learning_rate": 1.868116504854369e-05, "loss": 0.0662, "step": 25470 }, { "epoch": 9.9, "learning_rate": 1.868064724919094e-05, "loss": 0.1908, "step": 25480 }, { "epoch": 9.9, "learning_rate": 1.868012944983819e-05, "loss": 0.2838, "step": 25490 }, { "epoch": 9.9, "learning_rate": 1.867961165048544e-05, "loss": 0.0283, "step": 25500 }, { "epoch": 9.91, "learning_rate": 1.8679093851132688e-05, "loss": 0.1748, "step": 25510 }, { "epoch": 9.91, "learning_rate": 1.8678576051779936e-05, "loss": 0.1246, "step": 25520 }, { "epoch": 9.91, "learning_rate": 1.8678058252427187e-05, "loss": 0.2015, "step": 25530 }, { "epoch": 9.92, "learning_rate": 1.8677540453074435e-05, "loss": 0.1296, "step": 25540 }, { "epoch": 9.92, "learning_rate": 1.8677022653721683e-05, "loss": 0.1104, "step": 25550 }, { "epoch": 9.93, "learning_rate": 1.8676504854368934e-05, "loss": 0.0931, "step": 25560 }, { "epoch": 9.93, "learning_rate": 1.8675987055016182e-05, "loss": 0.1437, "step": 25570 }, { "epoch": 9.93, "learning_rate": 1.8675469255663434e-05, "loss": 0.116, "step": 25580 }, { "epoch": 9.94, "learning_rate": 1.867495145631068e-05, "loss": 0.1506, "step": 25590 }, { "epoch": 9.94, "learning_rate": 1.867443365695793e-05, "loss": 0.1208, "step": 25600 }, { "epoch": 9.95, "learning_rate": 1.867391585760518e-05, "loss": 0.1302, "step": 25610 }, { "epoch": 9.95, "learning_rate": 1.867339805825243e-05, "loss": 0.1621, "step": 25620 }, { "epoch": 9.95, "learning_rate": 1.8672880258899677e-05, "loss": 0.0848, "step": 25630 }, { "epoch": 9.96, "learning_rate": 1.8672362459546928e-05, "loss": 0.1485, "step": 25640 }, { "epoch": 9.96, "learning_rate": 1.8671844660194176e-05, "loss": 0.118, "step": 25650 }, { "epoch": 9.97, "learning_rate": 1.8671326860841427e-05, "loss": 0.1084, "step": 25660 }, { "epoch": 9.97, "learning_rate": 1.8670809061488675e-05, "loss": 0.0492, "step": 25670 }, { "epoch": 9.97, "learning_rate": 1.8670291262135923e-05, "loss": 0.0966, "step": 25680 }, { "epoch": 9.98, "learning_rate": 1.8669773462783174e-05, "loss": 0.2145, "step": 25690 }, { "epoch": 9.98, "learning_rate": 1.8669255663430422e-05, "loss": 0.0569, "step": 25700 }, { "epoch": 9.98, "learning_rate": 1.866873786407767e-05, "loss": 0.1551, "step": 25710 }, { "epoch": 9.99, "learning_rate": 1.866822006472492e-05, "loss": 0.1834, "step": 25720 }, { "epoch": 9.99, "learning_rate": 1.866770226537217e-05, "loss": 0.0876, "step": 25730 }, { "epoch": 10.0, "learning_rate": 1.8667184466019417e-05, "loss": 0.3497, "step": 25740 }, { "epoch": 10.0, "learning_rate": 1.866666666666667e-05, "loss": 0.0762, "step": 25750 }, { "epoch": 10.0, "eval_accuracy": 0.9540577716643741, "eval_loss": 0.2068237066268921, "eval_runtime": 8.2623, "eval_samples_per_second": 439.95, "eval_steps_per_second": 55.069, "step": 25750 }, { "epoch": 10.0, "learning_rate": 1.8666148867313917e-05, "loss": 0.1413, "step": 25760 }, { "epoch": 10.01, "learning_rate": 1.8665631067961168e-05, "loss": 0.0883, "step": 25770 }, { "epoch": 10.01, "learning_rate": 1.8665113268608416e-05, "loss": 0.0944, "step": 25780 }, { "epoch": 10.02, "learning_rate": 1.8664595469255664e-05, "loss": 0.1235, "step": 25790 }, { "epoch": 10.02, "learning_rate": 1.8664077669902915e-05, "loss": 0.1089, "step": 25800 }, { "epoch": 10.02, "learning_rate": 1.8663559870550163e-05, "loss": 0.0502, "step": 25810 }, { "epoch": 10.03, "learning_rate": 1.866304207119741e-05, "loss": 0.2277, "step": 25820 }, { "epoch": 10.03, "learning_rate": 1.8662524271844662e-05, "loss": 0.1843, "step": 25830 }, { "epoch": 10.03, "learning_rate": 1.866200647249191e-05, "loss": 0.2591, "step": 25840 }, { "epoch": 10.04, "learning_rate": 1.866148867313916e-05, "loss": 0.1709, "step": 25850 }, { "epoch": 10.04, "learning_rate": 1.866097087378641e-05, "loss": 0.1836, "step": 25860 }, { "epoch": 10.05, "learning_rate": 1.8660453074433657e-05, "loss": 0.196, "step": 25870 }, { "epoch": 10.05, "learning_rate": 1.865993527508091e-05, "loss": 0.2469, "step": 25880 }, { "epoch": 10.05, "learning_rate": 1.8659417475728157e-05, "loss": 0.1956, "step": 25890 }, { "epoch": 10.06, "learning_rate": 1.8658899676375404e-05, "loss": 0.2022, "step": 25900 }, { "epoch": 10.06, "learning_rate": 1.8658381877022656e-05, "loss": 0.1065, "step": 25910 }, { "epoch": 10.07, "learning_rate": 1.8657864077669904e-05, "loss": 0.2449, "step": 25920 }, { "epoch": 10.07, "learning_rate": 1.8657346278317155e-05, "loss": 0.1399, "step": 25930 }, { "epoch": 10.07, "learning_rate": 1.8656828478964403e-05, "loss": 0.2987, "step": 25940 }, { "epoch": 10.08, "learning_rate": 1.865631067961165e-05, "loss": 0.0369, "step": 25950 }, { "epoch": 10.08, "learning_rate": 1.8655792880258902e-05, "loss": 0.1711, "step": 25960 }, { "epoch": 10.09, "learning_rate": 1.865527508090615e-05, "loss": 0.0085, "step": 25970 }, { "epoch": 10.09, "learning_rate": 1.8654757281553398e-05, "loss": 0.0986, "step": 25980 }, { "epoch": 10.09, "learning_rate": 1.865423948220065e-05, "loss": 0.0604, "step": 25990 }, { "epoch": 10.1, "learning_rate": 1.8653721682847897e-05, "loss": 0.219, "step": 26000 }, { "epoch": 10.1, "learning_rate": 1.865320388349515e-05, "loss": 0.1227, "step": 26010 }, { "epoch": 10.1, "learning_rate": 1.8652686084142396e-05, "loss": 0.1623, "step": 26020 }, { "epoch": 10.11, "learning_rate": 1.8652168284789644e-05, "loss": 0.0108, "step": 26030 }, { "epoch": 10.11, "learning_rate": 1.8651650485436896e-05, "loss": 0.1783, "step": 26040 }, { "epoch": 10.12, "learning_rate": 1.8651132686084144e-05, "loss": 0.2796, "step": 26050 }, { "epoch": 10.12, "learning_rate": 1.865061488673139e-05, "loss": 0.1663, "step": 26060 }, { "epoch": 10.12, "learning_rate": 1.8650097087378643e-05, "loss": 0.0145, "step": 26070 }, { "epoch": 10.13, "learning_rate": 1.864957928802589e-05, "loss": 0.0374, "step": 26080 }, { "epoch": 10.13, "learning_rate": 1.8649061488673142e-05, "loss": 0.1968, "step": 26090 }, { "epoch": 10.14, "learning_rate": 1.864854368932039e-05, "loss": 0.1045, "step": 26100 }, { "epoch": 10.14, "learning_rate": 1.8648025889967638e-05, "loss": 0.2047, "step": 26110 }, { "epoch": 10.14, "learning_rate": 1.8647508090614886e-05, "loss": 0.213, "step": 26120 }, { "epoch": 10.15, "learning_rate": 1.8646990291262137e-05, "loss": 0.0913, "step": 26130 }, { "epoch": 10.15, "learning_rate": 1.8646472491909385e-05, "loss": 0.0958, "step": 26140 }, { "epoch": 10.16, "learning_rate": 1.8645954692556636e-05, "loss": 0.3672, "step": 26150 }, { "epoch": 10.16, "learning_rate": 1.8645436893203884e-05, "loss": 0.1826, "step": 26160 }, { "epoch": 10.16, "learning_rate": 1.8644919093851136e-05, "loss": 0.142, "step": 26170 }, { "epoch": 10.17, "learning_rate": 1.8644401294498384e-05, "loss": 0.1153, "step": 26180 }, { "epoch": 10.17, "learning_rate": 1.864388349514563e-05, "loss": 0.2934, "step": 26190 }, { "epoch": 10.17, "learning_rate": 1.864336569579288e-05, "loss": 0.1105, "step": 26200 }, { "epoch": 10.18, "learning_rate": 1.864284789644013e-05, "loss": 0.2273, "step": 26210 }, { "epoch": 10.18, "learning_rate": 1.864233009708738e-05, "loss": 0.0234, "step": 26220 }, { "epoch": 10.19, "learning_rate": 1.864181229773463e-05, "loss": 0.173, "step": 26230 }, { "epoch": 10.19, "learning_rate": 1.8641294498381878e-05, "loss": 0.0606, "step": 26240 }, { "epoch": 10.19, "learning_rate": 1.864077669902913e-05, "loss": 0.1138, "step": 26250 }, { "epoch": 10.2, "learning_rate": 1.8640258899676377e-05, "loss": 0.2025, "step": 26260 }, { "epoch": 10.2, "learning_rate": 1.8639741100323625e-05, "loss": 0.0531, "step": 26270 }, { "epoch": 10.21, "learning_rate": 1.8639223300970873e-05, "loss": 0.1708, "step": 26280 }, { "epoch": 10.21, "learning_rate": 1.8638705501618124e-05, "loss": 0.3425, "step": 26290 }, { "epoch": 10.21, "learning_rate": 1.8638187702265372e-05, "loss": 0.2293, "step": 26300 }, { "epoch": 10.22, "learning_rate": 1.8637669902912624e-05, "loss": 0.1665, "step": 26310 }, { "epoch": 10.22, "learning_rate": 1.863715210355987e-05, "loss": 0.1295, "step": 26320 }, { "epoch": 10.23, "learning_rate": 1.8636634304207123e-05, "loss": 0.1373, "step": 26330 }, { "epoch": 10.23, "learning_rate": 1.863611650485437e-05, "loss": 0.268, "step": 26340 }, { "epoch": 10.23, "learning_rate": 1.8635598705501622e-05, "loss": 0.2208, "step": 26350 }, { "epoch": 10.24, "learning_rate": 1.8635080906148867e-05, "loss": 0.177, "step": 26360 }, { "epoch": 10.24, "learning_rate": 1.8634563106796118e-05, "loss": 0.0601, "step": 26370 }, { "epoch": 10.24, "learning_rate": 1.8634045307443366e-05, "loss": 0.1081, "step": 26380 }, { "epoch": 10.25, "learning_rate": 1.8633527508090617e-05, "loss": 0.1796, "step": 26390 }, { "epoch": 10.25, "learning_rate": 1.8633009708737865e-05, "loss": 0.1517, "step": 26400 }, { "epoch": 10.26, "learning_rate": 1.8632491909385116e-05, "loss": 0.1894, "step": 26410 }, { "epoch": 10.26, "learning_rate": 1.8631974110032364e-05, "loss": 0.1361, "step": 26420 }, { "epoch": 10.26, "learning_rate": 1.8631456310679616e-05, "loss": 0.1004, "step": 26430 }, { "epoch": 10.27, "learning_rate": 1.863093851132686e-05, "loss": 0.2438, "step": 26440 }, { "epoch": 10.27, "learning_rate": 1.863042071197411e-05, "loss": 0.3972, "step": 26450 }, { "epoch": 10.28, "learning_rate": 1.862990291262136e-05, "loss": 0.2347, "step": 26460 }, { "epoch": 10.28, "learning_rate": 1.862938511326861e-05, "loss": 0.1886, "step": 26470 }, { "epoch": 10.28, "learning_rate": 1.862886731391586e-05, "loss": 0.1277, "step": 26480 }, { "epoch": 10.29, "learning_rate": 1.862834951456311e-05, "loss": 0.1699, "step": 26490 }, { "epoch": 10.29, "learning_rate": 1.8627831715210358e-05, "loss": 0.1224, "step": 26500 }, { "epoch": 10.3, "learning_rate": 1.862731391585761e-05, "loss": 0.1342, "step": 26510 }, { "epoch": 10.3, "learning_rate": 1.8626796116504854e-05, "loss": 0.0604, "step": 26520 }, { "epoch": 10.3, "learning_rate": 1.8626278317152105e-05, "loss": 0.0914, "step": 26530 }, { "epoch": 10.31, "learning_rate": 1.8625760517799353e-05, "loss": 0.1004, "step": 26540 }, { "epoch": 10.31, "learning_rate": 1.8625242718446604e-05, "loss": 0.044, "step": 26550 }, { "epoch": 10.31, "learning_rate": 1.8624724919093852e-05, "loss": 0.0796, "step": 26560 }, { "epoch": 10.32, "learning_rate": 1.8624207119741103e-05, "loss": 0.2106, "step": 26570 }, { "epoch": 10.32, "learning_rate": 1.862368932038835e-05, "loss": 0.145, "step": 26580 }, { "epoch": 10.33, "learning_rate": 1.8623171521035603e-05, "loss": 0.0645, "step": 26590 }, { "epoch": 10.33, "learning_rate": 1.8622653721682847e-05, "loss": 0.0597, "step": 26600 }, { "epoch": 10.33, "learning_rate": 1.86221359223301e-05, "loss": 0.2596, "step": 26610 }, { "epoch": 10.34, "learning_rate": 1.8621618122977346e-05, "loss": 0.1288, "step": 26620 }, { "epoch": 10.34, "learning_rate": 1.8621100323624598e-05, "loss": 0.091, "step": 26630 }, { "epoch": 10.35, "learning_rate": 1.8620582524271846e-05, "loss": 0.0473, "step": 26640 }, { "epoch": 10.35, "learning_rate": 1.8620064724919097e-05, "loss": 0.2155, "step": 26650 }, { "epoch": 10.35, "learning_rate": 1.8619546925566345e-05, "loss": 0.1076, "step": 26660 }, { "epoch": 10.36, "learning_rate": 1.8619029126213596e-05, "loss": 0.1084, "step": 26670 }, { "epoch": 10.36, "learning_rate": 1.861851132686084e-05, "loss": 0.1246, "step": 26680 }, { "epoch": 10.37, "learning_rate": 1.8617993527508092e-05, "loss": 0.1036, "step": 26690 }, { "epoch": 10.37, "learning_rate": 1.861747572815534e-05, "loss": 0.2292, "step": 26700 }, { "epoch": 10.37, "learning_rate": 1.861695792880259e-05, "loss": 0.2097, "step": 26710 }, { "epoch": 10.38, "learning_rate": 1.861644012944984e-05, "loss": 0.1032, "step": 26720 }, { "epoch": 10.38, "learning_rate": 1.861592233009709e-05, "loss": 0.3145, "step": 26730 }, { "epoch": 10.38, "learning_rate": 1.861540453074434e-05, "loss": 0.0944, "step": 26740 }, { "epoch": 10.39, "learning_rate": 1.861488673139159e-05, "loss": 0.0892, "step": 26750 }, { "epoch": 10.39, "learning_rate": 1.8614368932038834e-05, "loss": 0.2271, "step": 26760 }, { "epoch": 10.4, "learning_rate": 1.8613851132686086e-05, "loss": 0.1669, "step": 26770 }, { "epoch": 10.4, "learning_rate": 1.8613333333333334e-05, "loss": 0.2078, "step": 26780 }, { "epoch": 10.4, "learning_rate": 1.8612815533980585e-05, "loss": 0.2071, "step": 26790 }, { "epoch": 10.41, "learning_rate": 1.8612297734627833e-05, "loss": 0.187, "step": 26800 }, { "epoch": 10.41, "learning_rate": 1.8611779935275084e-05, "loss": 0.1229, "step": 26810 }, { "epoch": 10.42, "learning_rate": 1.8611262135922332e-05, "loss": 0.0654, "step": 26820 }, { "epoch": 10.42, "learning_rate": 1.8610744336569583e-05, "loss": 0.146, "step": 26830 }, { "epoch": 10.42, "learning_rate": 1.8610226537216828e-05, "loss": 0.0753, "step": 26840 }, { "epoch": 10.43, "learning_rate": 1.860970873786408e-05, "loss": 0.0933, "step": 26850 }, { "epoch": 10.43, "learning_rate": 1.8609190938511327e-05, "loss": 0.3146, "step": 26860 }, { "epoch": 10.43, "learning_rate": 1.860867313915858e-05, "loss": 0.1426, "step": 26870 }, { "epoch": 10.44, "learning_rate": 1.8608155339805826e-05, "loss": 0.0857, "step": 26880 }, { "epoch": 10.44, "learning_rate": 1.8607637540453078e-05, "loss": 0.1185, "step": 26890 }, { "epoch": 10.45, "learning_rate": 1.8607119741100326e-05, "loss": 0.2082, "step": 26900 }, { "epoch": 10.45, "learning_rate": 1.8606601941747574e-05, "loss": 0.1496, "step": 26910 }, { "epoch": 10.45, "learning_rate": 1.8606084142394825e-05, "loss": 0.3489, "step": 26920 }, { "epoch": 10.46, "learning_rate": 1.8605566343042073e-05, "loss": 0.0337, "step": 26930 }, { "epoch": 10.46, "learning_rate": 1.860504854368932e-05, "loss": 0.1334, "step": 26940 }, { "epoch": 10.47, "learning_rate": 1.8604530744336572e-05, "loss": 0.033, "step": 26950 }, { "epoch": 10.47, "learning_rate": 1.860401294498382e-05, "loss": 0.1618, "step": 26960 }, { "epoch": 10.47, "learning_rate": 1.860349514563107e-05, "loss": 0.0418, "step": 26970 }, { "epoch": 10.48, "learning_rate": 1.860297734627832e-05, "loss": 0.1975, "step": 26980 }, { "epoch": 10.48, "learning_rate": 1.8602459546925567e-05, "loss": 0.1968, "step": 26990 }, { "epoch": 10.49, "learning_rate": 1.860194174757282e-05, "loss": 0.1373, "step": 27000 }, { "epoch": 10.49, "learning_rate": 1.8601423948220066e-05, "loss": 0.227, "step": 27010 }, { "epoch": 10.49, "learning_rate": 1.8600906148867314e-05, "loss": 0.0598, "step": 27020 }, { "epoch": 10.5, "learning_rate": 1.8600388349514566e-05, "loss": 0.0967, "step": 27030 }, { "epoch": 10.5, "learning_rate": 1.8599870550161813e-05, "loss": 0.1639, "step": 27040 }, { "epoch": 10.5, "learning_rate": 1.8599352750809065e-05, "loss": 0.2628, "step": 27050 }, { "epoch": 10.51, "learning_rate": 1.8598834951456313e-05, "loss": 0.2398, "step": 27060 }, { "epoch": 10.51, "learning_rate": 1.859831715210356e-05, "loss": 0.1593, "step": 27070 }, { "epoch": 10.52, "learning_rate": 1.8597799352750812e-05, "loss": 0.0469, "step": 27080 }, { "epoch": 10.52, "learning_rate": 1.859728155339806e-05, "loss": 0.1226, "step": 27090 }, { "epoch": 10.52, "learning_rate": 1.8596763754045308e-05, "loss": 0.2323, "step": 27100 }, { "epoch": 10.53, "learning_rate": 1.859624595469256e-05, "loss": 0.1481, "step": 27110 }, { "epoch": 10.53, "learning_rate": 1.8595728155339807e-05, "loss": 0.2359, "step": 27120 }, { "epoch": 10.54, "learning_rate": 1.859521035598706e-05, "loss": 0.1212, "step": 27130 }, { "epoch": 10.54, "learning_rate": 1.8594692556634306e-05, "loss": 0.5169, "step": 27140 }, { "epoch": 10.54, "learning_rate": 1.8594174757281554e-05, "loss": 0.1984, "step": 27150 }, { "epoch": 10.55, "learning_rate": 1.8593656957928805e-05, "loss": 0.043, "step": 27160 }, { "epoch": 10.55, "learning_rate": 1.8593139158576053e-05, "loss": 0.0485, "step": 27170 }, { "epoch": 10.56, "learning_rate": 1.85926213592233e-05, "loss": 0.108, "step": 27180 }, { "epoch": 10.56, "learning_rate": 1.8592103559870553e-05, "loss": 0.3133, "step": 27190 }, { "epoch": 10.56, "learning_rate": 1.85915857605178e-05, "loss": 0.4284, "step": 27200 }, { "epoch": 10.57, "learning_rate": 1.859106796116505e-05, "loss": 0.094, "step": 27210 }, { "epoch": 10.57, "learning_rate": 1.85905501618123e-05, "loss": 0.2154, "step": 27220 }, { "epoch": 10.57, "learning_rate": 1.8590032362459548e-05, "loss": 0.1836, "step": 27230 }, { "epoch": 10.58, "learning_rate": 1.85895145631068e-05, "loss": 0.2227, "step": 27240 }, { "epoch": 10.58, "learning_rate": 1.8588996763754047e-05, "loss": 0.0724, "step": 27250 }, { "epoch": 10.59, "learning_rate": 1.8588478964401295e-05, "loss": 0.0855, "step": 27260 }, { "epoch": 10.59, "learning_rate": 1.8587961165048546e-05, "loss": 0.105, "step": 27270 }, { "epoch": 10.59, "learning_rate": 1.8587443365695794e-05, "loss": 0.1321, "step": 27280 }, { "epoch": 10.6, "learning_rate": 1.8586925566343042e-05, "loss": 0.0996, "step": 27290 }, { "epoch": 10.6, "learning_rate": 1.8586407766990293e-05, "loss": 0.071, "step": 27300 }, { "epoch": 10.61, "learning_rate": 1.858588996763754e-05, "loss": 0.291, "step": 27310 }, { "epoch": 10.61, "learning_rate": 1.8585372168284793e-05, "loss": 0.1029, "step": 27320 }, { "epoch": 10.61, "learning_rate": 1.858485436893204e-05, "loss": 0.2579, "step": 27330 }, { "epoch": 10.62, "learning_rate": 1.858433656957929e-05, "loss": 0.0925, "step": 27340 }, { "epoch": 10.62, "learning_rate": 1.858381877022654e-05, "loss": 0.0832, "step": 27350 }, { "epoch": 10.63, "learning_rate": 1.8583300970873788e-05, "loss": 0.0713, "step": 27360 }, { "epoch": 10.63, "learning_rate": 1.8582783171521036e-05, "loss": 0.0871, "step": 27370 }, { "epoch": 10.63, "learning_rate": 1.8582265372168287e-05, "loss": 0.1462, "step": 27380 }, { "epoch": 10.64, "learning_rate": 1.8581747572815535e-05, "loss": 0.1943, "step": 27390 }, { "epoch": 10.64, "learning_rate": 1.8581229773462786e-05, "loss": 0.0648, "step": 27400 }, { "epoch": 10.64, "learning_rate": 1.8580711974110034e-05, "loss": 0.0124, "step": 27410 }, { "epoch": 10.65, "learning_rate": 1.8580194174757282e-05, "loss": 0.1284, "step": 27420 }, { "epoch": 10.65, "learning_rate": 1.8579676375404533e-05, "loss": 0.0942, "step": 27430 }, { "epoch": 10.66, "learning_rate": 1.857915857605178e-05, "loss": 0.1293, "step": 27440 }, { "epoch": 10.66, "learning_rate": 1.857864077669903e-05, "loss": 0.1612, "step": 27450 }, { "epoch": 10.66, "learning_rate": 1.857812297734628e-05, "loss": 0.1978, "step": 27460 }, { "epoch": 10.67, "learning_rate": 1.857760517799353e-05, "loss": 0.1312, "step": 27470 }, { "epoch": 10.67, "learning_rate": 1.857708737864078e-05, "loss": 0.0827, "step": 27480 }, { "epoch": 10.68, "learning_rate": 1.8576569579288028e-05, "loss": 0.2018, "step": 27490 }, { "epoch": 10.68, "learning_rate": 1.8576051779935276e-05, "loss": 0.0057, "step": 27500 }, { "epoch": 10.68, "learning_rate": 1.8575533980582527e-05, "loss": 0.466, "step": 27510 }, { "epoch": 10.69, "learning_rate": 1.8575016181229775e-05, "loss": 0.1091, "step": 27520 }, { "epoch": 10.69, "learning_rate": 1.8574498381877023e-05, "loss": 0.1364, "step": 27530 }, { "epoch": 10.7, "learning_rate": 1.8573980582524274e-05, "loss": 0.1108, "step": 27540 }, { "epoch": 10.7, "learning_rate": 1.8573462783171522e-05, "loss": 0.1916, "step": 27550 }, { "epoch": 10.7, "learning_rate": 1.8572944983818773e-05, "loss": 0.027, "step": 27560 }, { "epoch": 10.71, "learning_rate": 1.857242718446602e-05, "loss": 0.1526, "step": 27570 }, { "epoch": 10.71, "learning_rate": 1.857190938511327e-05, "loss": 0.1889, "step": 27580 }, { "epoch": 10.71, "learning_rate": 1.8571391585760517e-05, "loss": 0.1239, "step": 27590 }, { "epoch": 10.72, "learning_rate": 1.857087378640777e-05, "loss": 0.0917, "step": 27600 }, { "epoch": 10.72, "learning_rate": 1.8570355987055016e-05, "loss": 0.0597, "step": 27610 }, { "epoch": 10.73, "learning_rate": 1.8569838187702268e-05, "loss": 0.2808, "step": 27620 }, { "epoch": 10.73, "learning_rate": 1.8569320388349516e-05, "loss": 0.2267, "step": 27630 }, { "epoch": 10.73, "learning_rate": 1.8568802588996767e-05, "loss": 0.2857, "step": 27640 }, { "epoch": 10.74, "learning_rate": 1.8568284789644015e-05, "loss": 0.1459, "step": 27650 }, { "epoch": 10.74, "learning_rate": 1.8567766990291263e-05, "loss": 0.1205, "step": 27660 }, { "epoch": 10.75, "learning_rate": 1.856724919093851e-05, "loss": 0.1098, "step": 27670 }, { "epoch": 10.75, "learning_rate": 1.8566731391585762e-05, "loss": 0.1632, "step": 27680 }, { "epoch": 10.75, "learning_rate": 1.856621359223301e-05, "loss": 0.2184, "step": 27690 }, { "epoch": 10.76, "learning_rate": 1.856569579288026e-05, "loss": 0.1218, "step": 27700 }, { "epoch": 10.76, "learning_rate": 1.856517799352751e-05, "loss": 0.1731, "step": 27710 }, { "epoch": 10.77, "learning_rate": 1.856466019417476e-05, "loss": 0.0595, "step": 27720 }, { "epoch": 10.77, "learning_rate": 1.856414239482201e-05, "loss": 0.2491, "step": 27730 }, { "epoch": 10.77, "learning_rate": 1.8563624595469256e-05, "loss": 0.1026, "step": 27740 }, { "epoch": 10.78, "learning_rate": 1.8563106796116504e-05, "loss": 0.1624, "step": 27750 }, { "epoch": 10.78, "learning_rate": 1.8562588996763755e-05, "loss": 0.1069, "step": 27760 }, { "epoch": 10.78, "learning_rate": 1.8562071197411003e-05, "loss": 0.1931, "step": 27770 }, { "epoch": 10.79, "learning_rate": 1.8561553398058255e-05, "loss": 0.1642, "step": 27780 }, { "epoch": 10.79, "learning_rate": 1.8561035598705503e-05, "loss": 0.1252, "step": 27790 }, { "epoch": 10.8, "learning_rate": 1.8560517799352754e-05, "loss": 0.274, "step": 27800 }, { "epoch": 10.8, "learning_rate": 1.8560000000000002e-05, "loss": 0.0973, "step": 27810 }, { "epoch": 10.8, "learning_rate": 1.855948220064725e-05, "loss": 0.1297, "step": 27820 }, { "epoch": 10.81, "learning_rate": 1.8558964401294498e-05, "loss": 0.1829, "step": 27830 }, { "epoch": 10.81, "learning_rate": 1.855844660194175e-05, "loss": 0.1329, "step": 27840 }, { "epoch": 10.82, "learning_rate": 1.8557928802588997e-05, "loss": 0.2081, "step": 27850 }, { "epoch": 10.82, "learning_rate": 1.8557411003236248e-05, "loss": 0.1544, "step": 27860 }, { "epoch": 10.82, "learning_rate": 1.8556893203883496e-05, "loss": 0.196, "step": 27870 }, { "epoch": 10.83, "learning_rate": 1.8556375404530748e-05, "loss": 0.0522, "step": 27880 }, { "epoch": 10.83, "learning_rate": 1.8555857605177995e-05, "loss": 0.1635, "step": 27890 }, { "epoch": 10.83, "learning_rate": 1.8555339805825243e-05, "loss": 0.126, "step": 27900 }, { "epoch": 10.84, "learning_rate": 1.855482200647249e-05, "loss": 0.1632, "step": 27910 }, { "epoch": 10.84, "learning_rate": 1.8554304207119743e-05, "loss": 0.1862, "step": 27920 }, { "epoch": 10.85, "learning_rate": 1.855378640776699e-05, "loss": 0.0555, "step": 27930 }, { "epoch": 10.85, "learning_rate": 1.8553268608414242e-05, "loss": 0.0298, "step": 27940 }, { "epoch": 10.85, "learning_rate": 1.855275080906149e-05, "loss": 0.2461, "step": 27950 }, { "epoch": 10.86, "learning_rate": 1.855223300970874e-05, "loss": 0.1963, "step": 27960 }, { "epoch": 10.86, "learning_rate": 1.855171521035599e-05, "loss": 0.1222, "step": 27970 }, { "epoch": 10.87, "learning_rate": 1.8551197411003237e-05, "loss": 0.0932, "step": 27980 }, { "epoch": 10.87, "learning_rate": 1.8550679611650485e-05, "loss": 0.0844, "step": 27990 }, { "epoch": 10.87, "learning_rate": 1.8550161812297736e-05, "loss": 0.0896, "step": 28000 }, { "epoch": 10.88, "learning_rate": 1.8549644012944984e-05, "loss": 0.1786, "step": 28010 }, { "epoch": 10.88, "learning_rate": 1.8549126213592235e-05, "loss": 0.3595, "step": 28020 }, { "epoch": 10.89, "learning_rate": 1.8548608414239483e-05, "loss": 0.2076, "step": 28030 }, { "epoch": 10.89, "learning_rate": 1.8548090614886735e-05, "loss": 0.094, "step": 28040 }, { "epoch": 10.89, "learning_rate": 1.8547572815533983e-05, "loss": 0.1093, "step": 28050 }, { "epoch": 10.9, "learning_rate": 1.8547055016181234e-05, "loss": 0.2313, "step": 28060 }, { "epoch": 10.9, "learning_rate": 1.854653721682848e-05, "loss": 0.1294, "step": 28070 }, { "epoch": 10.9, "learning_rate": 1.854601941747573e-05, "loss": 0.1969, "step": 28080 }, { "epoch": 10.91, "learning_rate": 1.8545501618122978e-05, "loss": 0.0361, "step": 28090 }, { "epoch": 10.91, "learning_rate": 1.854498381877023e-05, "loss": 0.0569, "step": 28100 }, { "epoch": 10.92, "learning_rate": 1.8544466019417477e-05, "loss": 0.1107, "step": 28110 }, { "epoch": 10.92, "learning_rate": 1.8543948220064728e-05, "loss": 0.0683, "step": 28120 }, { "epoch": 10.92, "learning_rate": 1.8543430420711976e-05, "loss": 0.0947, "step": 28130 }, { "epoch": 10.93, "learning_rate": 1.8542912621359227e-05, "loss": 0.0736, "step": 28140 }, { "epoch": 10.93, "learning_rate": 1.8542394822006472e-05, "loss": 0.0343, "step": 28150 }, { "epoch": 10.94, "learning_rate": 1.8541877022653723e-05, "loss": 0.2855, "step": 28160 }, { "epoch": 10.94, "learning_rate": 1.854135922330097e-05, "loss": 0.1257, "step": 28170 }, { "epoch": 10.94, "learning_rate": 1.8540841423948222e-05, "loss": 0.2852, "step": 28180 }, { "epoch": 10.95, "learning_rate": 1.854032362459547e-05, "loss": 0.2797, "step": 28190 }, { "epoch": 10.95, "learning_rate": 1.8539805825242722e-05, "loss": 0.1107, "step": 28200 }, { "epoch": 10.96, "learning_rate": 1.853928802588997e-05, "loss": 0.1122, "step": 28210 }, { "epoch": 10.96, "learning_rate": 1.853877022653722e-05, "loss": 0.1052, "step": 28220 }, { "epoch": 10.96, "learning_rate": 1.8538252427184465e-05, "loss": 0.0379, "step": 28230 }, { "epoch": 10.97, "learning_rate": 1.8537734627831717e-05, "loss": 0.2837, "step": 28240 }, { "epoch": 10.97, "learning_rate": 1.8537216828478965e-05, "loss": 0.1936, "step": 28250 }, { "epoch": 10.97, "learning_rate": 1.8536699029126216e-05, "loss": 0.3791, "step": 28260 }, { "epoch": 10.98, "learning_rate": 1.8536181229773464e-05, "loss": 0.2145, "step": 28270 }, { "epoch": 10.98, "learning_rate": 1.8535663430420715e-05, "loss": 0.0559, "step": 28280 }, { "epoch": 10.99, "learning_rate": 1.8535145631067963e-05, "loss": 0.0722, "step": 28290 }, { "epoch": 10.99, "learning_rate": 1.8534627831715215e-05, "loss": 0.2436, "step": 28300 }, { "epoch": 10.99, "learning_rate": 1.853411003236246e-05, "loss": 0.1536, "step": 28310 }, { "epoch": 11.0, "learning_rate": 1.853359223300971e-05, "loss": 0.127, "step": 28320 }, { "epoch": 11.0, "eval_accuracy": 0.9543328748280605, "eval_loss": 0.19855087995529175, "eval_runtime": 8.2681, "eval_samples_per_second": 439.642, "eval_steps_per_second": 55.031, "step": 28325 }, { "epoch": 11.0, "learning_rate": 1.8533074433656958e-05, "loss": 0.2633, "step": 28330 }, { "epoch": 11.01, "learning_rate": 1.853255663430421e-05, "loss": 0.119, "step": 28340 }, { "epoch": 11.01, "learning_rate": 1.8532038834951458e-05, "loss": 0.2158, "step": 28350 }, { "epoch": 11.01, "learning_rate": 1.853152103559871e-05, "loss": 0.0274, "step": 28360 }, { "epoch": 11.02, "learning_rate": 1.8531003236245957e-05, "loss": 0.2392, "step": 28370 }, { "epoch": 11.02, "learning_rate": 1.8530485436893205e-05, "loss": 0.2463, "step": 28380 }, { "epoch": 11.03, "learning_rate": 1.8529967637540453e-05, "loss": 0.1769, "step": 28390 }, { "epoch": 11.03, "learning_rate": 1.8529449838187704e-05, "loss": 0.0163, "step": 28400 }, { "epoch": 11.03, "learning_rate": 1.8528932038834952e-05, "loss": 0.2622, "step": 28410 }, { "epoch": 11.04, "learning_rate": 1.8528414239482203e-05, "loss": 0.1987, "step": 28420 }, { "epoch": 11.04, "learning_rate": 1.852789644012945e-05, "loss": 0.0565, "step": 28430 }, { "epoch": 11.04, "learning_rate": 1.8527378640776702e-05, "loss": 0.1533, "step": 28440 }, { "epoch": 11.05, "learning_rate": 1.852686084142395e-05, "loss": 0.0539, "step": 28450 }, { "epoch": 11.05, "learning_rate": 1.8526343042071198e-05, "loss": 0.1568, "step": 28460 }, { "epoch": 11.06, "learning_rate": 1.8525825242718446e-05, "loss": 0.1002, "step": 28470 }, { "epoch": 11.06, "learning_rate": 1.8525307443365697e-05, "loss": 0.0666, "step": 28480 }, { "epoch": 11.06, "learning_rate": 1.8524789644012945e-05, "loss": 0.2074, "step": 28490 }, { "epoch": 11.07, "learning_rate": 1.8524271844660197e-05, "loss": 0.1428, "step": 28500 }, { "epoch": 11.07, "learning_rate": 1.8523754045307445e-05, "loss": 0.1215, "step": 28510 }, { "epoch": 11.08, "learning_rate": 1.8523236245954696e-05, "loss": 0.1956, "step": 28520 }, { "epoch": 11.08, "learning_rate": 1.8522718446601944e-05, "loss": 0.0998, "step": 28530 }, { "epoch": 11.08, "learning_rate": 1.8522200647249192e-05, "loss": 0.083, "step": 28540 }, { "epoch": 11.09, "learning_rate": 1.852168284789644e-05, "loss": 0.2017, "step": 28550 }, { "epoch": 11.09, "learning_rate": 1.852116504854369e-05, "loss": 0.131, "step": 28560 }, { "epoch": 11.1, "learning_rate": 1.852064724919094e-05, "loss": 0.115, "step": 28570 }, { "epoch": 11.1, "learning_rate": 1.852012944983819e-05, "loss": 0.0231, "step": 28580 }, { "epoch": 11.1, "learning_rate": 1.8519611650485438e-05, "loss": 0.0994, "step": 28590 }, { "epoch": 11.11, "learning_rate": 1.851909385113269e-05, "loss": 0.1065, "step": 28600 }, { "epoch": 11.11, "learning_rate": 1.8518576051779937e-05, "loss": 0.1115, "step": 28610 }, { "epoch": 11.11, "learning_rate": 1.8518058252427185e-05, "loss": 0.1223, "step": 28620 }, { "epoch": 11.12, "learning_rate": 1.8517540453074437e-05, "loss": 0.0024, "step": 28630 }, { "epoch": 11.12, "learning_rate": 1.8517022653721685e-05, "loss": 0.1297, "step": 28640 }, { "epoch": 11.13, "learning_rate": 1.8516504854368933e-05, "loss": 0.0852, "step": 28650 }, { "epoch": 11.13, "learning_rate": 1.8515987055016184e-05, "loss": 0.1143, "step": 28660 }, { "epoch": 11.13, "learning_rate": 1.8515469255663432e-05, "loss": 0.2768, "step": 28670 }, { "epoch": 11.14, "learning_rate": 1.851495145631068e-05, "loss": 0.1342, "step": 28680 }, { "epoch": 11.14, "learning_rate": 1.851443365695793e-05, "loss": 0.055, "step": 28690 }, { "epoch": 11.15, "learning_rate": 1.851391585760518e-05, "loss": 0.1669, "step": 28700 }, { "epoch": 11.15, "learning_rate": 1.851339805825243e-05, "loss": 0.0961, "step": 28710 }, { "epoch": 11.15, "learning_rate": 1.8512880258899678e-05, "loss": 0.2785, "step": 28720 }, { "epoch": 11.16, "learning_rate": 1.8512362459546926e-05, "loss": 0.027, "step": 28730 }, { "epoch": 11.16, "learning_rate": 1.8511844660194177e-05, "loss": 0.1963, "step": 28740 }, { "epoch": 11.17, "learning_rate": 1.8511326860841425e-05, "loss": 0.0683, "step": 28750 }, { "epoch": 11.17, "learning_rate": 1.8510809061488673e-05, "loss": 0.4444, "step": 28760 }, { "epoch": 11.17, "learning_rate": 1.8510291262135925e-05, "loss": 0.0804, "step": 28770 }, { "epoch": 11.18, "learning_rate": 1.8509773462783172e-05, "loss": 0.0778, "step": 28780 }, { "epoch": 11.18, "learning_rate": 1.8509255663430424e-05, "loss": 0.1115, "step": 28790 }, { "epoch": 11.18, "learning_rate": 1.8508737864077672e-05, "loss": 0.2214, "step": 28800 }, { "epoch": 11.19, "learning_rate": 1.850822006472492e-05, "loss": 0.0614, "step": 28810 }, { "epoch": 11.19, "learning_rate": 1.850770226537217e-05, "loss": 0.2345, "step": 28820 }, { "epoch": 11.2, "learning_rate": 1.850718446601942e-05, "loss": 0.2467, "step": 28830 }, { "epoch": 11.2, "learning_rate": 1.8506666666666667e-05, "loss": 0.0686, "step": 28840 }, { "epoch": 11.2, "learning_rate": 1.8506148867313918e-05, "loss": 0.3031, "step": 28850 }, { "epoch": 11.21, "learning_rate": 1.8505631067961166e-05, "loss": 0.172, "step": 28860 }, { "epoch": 11.21, "learning_rate": 1.8505113268608417e-05, "loss": 0.4145, "step": 28870 }, { "epoch": 11.22, "learning_rate": 1.8504595469255665e-05, "loss": 0.0354, "step": 28880 }, { "epoch": 11.22, "learning_rate": 1.8504077669902913e-05, "loss": 0.0488, "step": 28890 }, { "epoch": 11.22, "learning_rate": 1.8503559870550164e-05, "loss": 0.173, "step": 28900 }, { "epoch": 11.23, "learning_rate": 1.8503042071197412e-05, "loss": 0.063, "step": 28910 }, { "epoch": 11.23, "learning_rate": 1.850252427184466e-05, "loss": 0.0848, "step": 28920 }, { "epoch": 11.23, "learning_rate": 1.850200647249191e-05, "loss": 0.1574, "step": 28930 }, { "epoch": 11.24, "learning_rate": 1.850148867313916e-05, "loss": 0.205, "step": 28940 }, { "epoch": 11.24, "learning_rate": 1.850097087378641e-05, "loss": 0.0258, "step": 28950 }, { "epoch": 11.25, "learning_rate": 1.850045307443366e-05, "loss": 0.1676, "step": 28960 }, { "epoch": 11.25, "learning_rate": 1.8499935275080907e-05, "loss": 0.094, "step": 28970 }, { "epoch": 11.25, "learning_rate": 1.8499417475728158e-05, "loss": 0.2293, "step": 28980 }, { "epoch": 11.26, "learning_rate": 1.8498899676375406e-05, "loss": 0.2111, "step": 28990 }, { "epoch": 11.26, "learning_rate": 1.8498381877022654e-05, "loss": 0.1803, "step": 29000 }, { "epoch": 11.27, "learning_rate": 1.8497864077669905e-05, "loss": 0.2255, "step": 29010 }, { "epoch": 11.27, "learning_rate": 1.8497346278317153e-05, "loss": 0.1102, "step": 29020 }, { "epoch": 11.27, "learning_rate": 1.8496828478964404e-05, "loss": 0.1347, "step": 29030 }, { "epoch": 11.28, "learning_rate": 1.8496310679611652e-05, "loss": 0.0068, "step": 29040 }, { "epoch": 11.28, "learning_rate": 1.84957928802589e-05, "loss": 0.0684, "step": 29050 }, { "epoch": 11.29, "learning_rate": 1.8495275080906148e-05, "loss": 0.0341, "step": 29060 }, { "epoch": 11.29, "learning_rate": 1.84947572815534e-05, "loss": 0.1484, "step": 29070 }, { "epoch": 11.29, "learning_rate": 1.8494239482200647e-05, "loss": 0.2158, "step": 29080 }, { "epoch": 11.3, "learning_rate": 1.84937216828479e-05, "loss": 0.1737, "step": 29090 }, { "epoch": 11.3, "learning_rate": 1.8493203883495147e-05, "loss": 0.1307, "step": 29100 }, { "epoch": 11.3, "learning_rate": 1.8492686084142398e-05, "loss": 0.0873, "step": 29110 }, { "epoch": 11.31, "learning_rate": 1.8492168284789646e-05, "loss": 0.268, "step": 29120 }, { "epoch": 11.31, "learning_rate": 1.8491650485436894e-05, "loss": 0.1517, "step": 29130 }, { "epoch": 11.32, "learning_rate": 1.8491132686084142e-05, "loss": 0.1677, "step": 29140 }, { "epoch": 11.32, "learning_rate": 1.8490614886731393e-05, "loss": 0.0447, "step": 29150 }, { "epoch": 11.32, "learning_rate": 1.849009708737864e-05, "loss": 0.0773, "step": 29160 }, { "epoch": 11.33, "learning_rate": 1.8489579288025892e-05, "loss": 0.0748, "step": 29170 }, { "epoch": 11.33, "learning_rate": 1.848906148867314e-05, "loss": 0.157, "step": 29180 }, { "epoch": 11.34, "learning_rate": 1.848854368932039e-05, "loss": 0.1562, "step": 29190 }, { "epoch": 11.34, "learning_rate": 1.848802588996764e-05, "loss": 0.2562, "step": 29200 }, { "epoch": 11.34, "learning_rate": 1.8487508090614887e-05, "loss": 0.2108, "step": 29210 }, { "epoch": 11.35, "learning_rate": 1.8486990291262135e-05, "loss": 0.0312, "step": 29220 }, { "epoch": 11.35, "learning_rate": 1.8486472491909387e-05, "loss": 0.5272, "step": 29230 }, { "epoch": 11.36, "learning_rate": 1.8485954692556635e-05, "loss": 0.2136, "step": 29240 }, { "epoch": 11.36, "learning_rate": 1.8485436893203886e-05, "loss": 0.1327, "step": 29250 }, { "epoch": 11.36, "learning_rate": 1.8484919093851134e-05, "loss": 0.0731, "step": 29260 }, { "epoch": 11.37, "learning_rate": 1.8484401294498385e-05, "loss": 0.1238, "step": 29270 }, { "epoch": 11.37, "learning_rate": 1.8483883495145633e-05, "loss": 0.0453, "step": 29280 }, { "epoch": 11.37, "learning_rate": 1.848336569579288e-05, "loss": 0.2308, "step": 29290 }, { "epoch": 11.38, "learning_rate": 1.848284789644013e-05, "loss": 0.1713, "step": 29300 }, { "epoch": 11.38, "learning_rate": 1.848233009708738e-05, "loss": 0.1046, "step": 29310 }, { "epoch": 11.39, "learning_rate": 1.8481812297734628e-05, "loss": 0.1481, "step": 29320 }, { "epoch": 11.39, "learning_rate": 1.848129449838188e-05, "loss": 0.0161, "step": 29330 }, { "epoch": 11.39, "learning_rate": 1.8480776699029127e-05, "loss": 0.1971, "step": 29340 }, { "epoch": 11.4, "learning_rate": 1.848025889967638e-05, "loss": 0.0982, "step": 29350 }, { "epoch": 11.4, "learning_rate": 1.8479741100323627e-05, "loss": 0.2388, "step": 29360 }, { "epoch": 11.41, "learning_rate": 1.8479223300970875e-05, "loss": 0.1016, "step": 29370 }, { "epoch": 11.41, "learning_rate": 1.8478705501618122e-05, "loss": 0.0556, "step": 29380 }, { "epoch": 11.41, "learning_rate": 1.8478187702265374e-05, "loss": 0.063, "step": 29390 }, { "epoch": 11.42, "learning_rate": 1.847766990291262e-05, "loss": 0.1998, "step": 29400 }, { "epoch": 11.42, "learning_rate": 1.8477152103559873e-05, "loss": 0.1542, "step": 29410 }, { "epoch": 11.43, "learning_rate": 1.847663430420712e-05, "loss": 0.1173, "step": 29420 }, { "epoch": 11.43, "learning_rate": 1.8476116504854372e-05, "loss": 0.0643, "step": 29430 }, { "epoch": 11.43, "learning_rate": 1.847559870550162e-05, "loss": 0.0344, "step": 29440 }, { "epoch": 11.44, "learning_rate": 1.8475080906148868e-05, "loss": 0.1412, "step": 29450 }, { "epoch": 11.44, "learning_rate": 1.8474563106796116e-05, "loss": 0.1518, "step": 29460 }, { "epoch": 11.44, "learning_rate": 1.8474045307443367e-05, "loss": 0.037, "step": 29470 }, { "epoch": 11.45, "learning_rate": 1.8473527508090615e-05, "loss": 0.2464, "step": 29480 }, { "epoch": 11.45, "learning_rate": 1.8473009708737867e-05, "loss": 0.2612, "step": 29490 }, { "epoch": 11.46, "learning_rate": 1.8472491909385114e-05, "loss": 0.0337, "step": 29500 }, { "epoch": 11.46, "learning_rate": 1.8471974110032366e-05, "loss": 0.0956, "step": 29510 }, { "epoch": 11.46, "learning_rate": 1.8471456310679614e-05, "loss": 0.2567, "step": 29520 }, { "epoch": 11.47, "learning_rate": 1.847093851132686e-05, "loss": 0.1401, "step": 29530 }, { "epoch": 11.47, "learning_rate": 1.847042071197411e-05, "loss": 0.1877, "step": 29540 }, { "epoch": 11.48, "learning_rate": 1.846990291262136e-05, "loss": 0.2209, "step": 29550 }, { "epoch": 11.48, "learning_rate": 1.846938511326861e-05, "loss": 0.1312, "step": 29560 }, { "epoch": 11.48, "learning_rate": 1.846886731391586e-05, "loss": 0.1419, "step": 29570 }, { "epoch": 11.49, "learning_rate": 1.8468349514563108e-05, "loss": 0.1617, "step": 29580 }, { "epoch": 11.49, "learning_rate": 1.846783171521036e-05, "loss": 0.1461, "step": 29590 }, { "epoch": 11.5, "learning_rate": 1.8467313915857607e-05, "loss": 0.1026, "step": 29600 }, { "epoch": 11.5, "learning_rate": 1.8466796116504855e-05, "loss": 0.1873, "step": 29610 }, { "epoch": 11.5, "learning_rate": 1.8466278317152103e-05, "loss": 0.3198, "step": 29620 }, { "epoch": 11.51, "learning_rate": 1.8465760517799354e-05, "loss": 0.1514, "step": 29630 }, { "epoch": 11.51, "learning_rate": 1.8465242718446602e-05, "loss": 0.0979, "step": 29640 }, { "epoch": 11.51, "learning_rate": 1.8464724919093854e-05, "loss": 0.1201, "step": 29650 }, { "epoch": 11.52, "learning_rate": 1.84642071197411e-05, "loss": 0.1535, "step": 29660 }, { "epoch": 11.52, "learning_rate": 1.8463689320388353e-05, "loss": 0.1397, "step": 29670 }, { "epoch": 11.53, "learning_rate": 1.84631715210356e-05, "loss": 0.0284, "step": 29680 }, { "epoch": 11.53, "learning_rate": 1.846265372168285e-05, "loss": 0.1282, "step": 29690 }, { "epoch": 11.53, "learning_rate": 1.8462135922330097e-05, "loss": 0.1669, "step": 29700 }, { "epoch": 11.54, "learning_rate": 1.8461618122977348e-05, "loss": 0.0772, "step": 29710 }, { "epoch": 11.54, "learning_rate": 1.8461100323624596e-05, "loss": 0.1666, "step": 29720 }, { "epoch": 11.55, "learning_rate": 1.8460582524271847e-05, "loss": 0.1348, "step": 29730 }, { "epoch": 11.55, "learning_rate": 1.8460064724919095e-05, "loss": 0.0316, "step": 29740 }, { "epoch": 11.55, "learning_rate": 1.8459546925566346e-05, "loss": 0.1813, "step": 29750 }, { "epoch": 11.56, "learning_rate": 1.8459029126213594e-05, "loss": 0.3339, "step": 29760 }, { "epoch": 11.56, "learning_rate": 1.8458511326860846e-05, "loss": 0.138, "step": 29770 }, { "epoch": 11.57, "learning_rate": 1.845799352750809e-05, "loss": 0.0726, "step": 29780 }, { "epoch": 11.57, "learning_rate": 1.845747572815534e-05, "loss": 0.2006, "step": 29790 }, { "epoch": 11.57, "learning_rate": 1.845695792880259e-05, "loss": 0.0544, "step": 29800 }, { "epoch": 11.58, "learning_rate": 1.845644012944984e-05, "loss": 0.152, "step": 29810 }, { "epoch": 11.58, "learning_rate": 1.845592233009709e-05, "loss": 0.0808, "step": 29820 }, { "epoch": 11.58, "learning_rate": 1.845540453074434e-05, "loss": 0.024, "step": 29830 }, { "epoch": 11.59, "learning_rate": 1.8454886731391588e-05, "loss": 0.2796, "step": 29840 }, { "epoch": 11.59, "learning_rate": 1.8454368932038836e-05, "loss": 0.0544, "step": 29850 }, { "epoch": 11.6, "learning_rate": 1.8453851132686084e-05, "loss": 0.1197, "step": 29860 }, { "epoch": 11.6, "learning_rate": 1.8453333333333335e-05, "loss": 0.0596, "step": 29870 }, { "epoch": 11.6, "learning_rate": 1.8452815533980583e-05, "loss": 0.1816, "step": 29880 }, { "epoch": 11.61, "learning_rate": 1.8452297734627834e-05, "loss": 0.0503, "step": 29890 }, { "epoch": 11.61, "learning_rate": 1.8451779935275082e-05, "loss": 0.3043, "step": 29900 }, { "epoch": 11.62, "learning_rate": 1.8451262135922334e-05, "loss": 0.1636, "step": 29910 }, { "epoch": 11.62, "learning_rate": 1.845074433656958e-05, "loss": 0.1138, "step": 29920 }, { "epoch": 11.62, "learning_rate": 1.845022653721683e-05, "loss": 0.1644, "step": 29930 }, { "epoch": 11.63, "learning_rate": 1.8449708737864077e-05, "loss": 0.2029, "step": 29940 }, { "epoch": 11.63, "learning_rate": 1.844919093851133e-05, "loss": 0.0987, "step": 29950 }, { "epoch": 11.63, "learning_rate": 1.8448673139158577e-05, "loss": 0.1597, "step": 29960 }, { "epoch": 11.64, "learning_rate": 1.8448155339805828e-05, "loss": 0.2333, "step": 29970 }, { "epoch": 11.64, "learning_rate": 1.8447637540453076e-05, "loss": 0.1462, "step": 29980 }, { "epoch": 11.65, "learning_rate": 1.8447119741100327e-05, "loss": 0.0382, "step": 29990 }, { "epoch": 11.65, "learning_rate": 1.8446601941747575e-05, "loss": 0.2675, "step": 30000 }, { "epoch": 11.65, "learning_rate": 1.8446084142394823e-05, "loss": 0.0828, "step": 30010 }, { "epoch": 11.66, "learning_rate": 1.844556634304207e-05, "loss": 0.1023, "step": 30020 }, { "epoch": 11.66, "learning_rate": 1.8445048543689322e-05, "loss": 0.2264, "step": 30030 }, { "epoch": 11.67, "learning_rate": 1.844453074433657e-05, "loss": 0.1732, "step": 30040 }, { "epoch": 11.67, "learning_rate": 1.844401294498382e-05, "loss": 0.0812, "step": 30050 }, { "epoch": 11.67, "learning_rate": 1.844349514563107e-05, "loss": 0.1494, "step": 30060 }, { "epoch": 11.68, "learning_rate": 1.844297734627832e-05, "loss": 0.3529, "step": 30070 }, { "epoch": 11.68, "learning_rate": 1.844245954692557e-05, "loss": 0.24, "step": 30080 }, { "epoch": 11.69, "learning_rate": 1.8441941747572817e-05, "loss": 0.2826, "step": 30090 }, { "epoch": 11.69, "learning_rate": 1.8441423948220064e-05, "loss": 0.1667, "step": 30100 }, { "epoch": 11.69, "learning_rate": 1.8440906148867316e-05, "loss": 0.1577, "step": 30110 }, { "epoch": 11.7, "learning_rate": 1.8440388349514564e-05, "loss": 0.2155, "step": 30120 }, { "epoch": 11.7, "learning_rate": 1.8439870550161815e-05, "loss": 0.0121, "step": 30130 }, { "epoch": 11.7, "learning_rate": 1.8439352750809063e-05, "loss": 0.2657, "step": 30140 }, { "epoch": 11.71, "learning_rate": 1.843883495145631e-05, "loss": 0.0297, "step": 30150 }, { "epoch": 11.71, "learning_rate": 1.8438317152103562e-05, "loss": 0.175, "step": 30160 }, { "epoch": 11.72, "learning_rate": 1.843779935275081e-05, "loss": 0.0143, "step": 30170 }, { "epoch": 11.72, "learning_rate": 1.8437281553398058e-05, "loss": 0.2095, "step": 30180 }, { "epoch": 11.72, "learning_rate": 1.843676375404531e-05, "loss": 0.0937, "step": 30190 }, { "epoch": 11.73, "learning_rate": 1.8436245954692557e-05, "loss": 0.1573, "step": 30200 }, { "epoch": 11.73, "learning_rate": 1.843572815533981e-05, "loss": 0.1188, "step": 30210 }, { "epoch": 11.74, "learning_rate": 1.8435210355987056e-05, "loss": 0.2221, "step": 30220 }, { "epoch": 11.74, "learning_rate": 1.8434692556634304e-05, "loss": 0.2503, "step": 30230 }, { "epoch": 11.74, "learning_rate": 1.8434174757281556e-05, "loss": 0.1174, "step": 30240 }, { "epoch": 11.75, "learning_rate": 1.8433656957928804e-05, "loss": 0.1356, "step": 30250 }, { "epoch": 11.75, "learning_rate": 1.843313915857605e-05, "loss": 0.1095, "step": 30260 }, { "epoch": 11.76, "learning_rate": 1.8432621359223303e-05, "loss": 0.1392, "step": 30270 }, { "epoch": 11.76, "learning_rate": 1.843210355987055e-05, "loss": 0.2273, "step": 30280 }, { "epoch": 11.76, "learning_rate": 1.8431585760517802e-05, "loss": 0.2479, "step": 30290 }, { "epoch": 11.77, "learning_rate": 1.843106796116505e-05, "loss": 0.1376, "step": 30300 }, { "epoch": 11.77, "learning_rate": 1.8430550161812298e-05, "loss": 0.0605, "step": 30310 }, { "epoch": 11.77, "learning_rate": 1.843003236245955e-05, "loss": 0.076, "step": 30320 }, { "epoch": 11.78, "learning_rate": 1.8429514563106797e-05, "loss": 0.1776, "step": 30330 }, { "epoch": 11.78, "learning_rate": 1.842899676375405e-05, "loss": 0.2124, "step": 30340 }, { "epoch": 11.79, "learning_rate": 1.8428478964401296e-05, "loss": 0.0746, "step": 30350 }, { "epoch": 11.79, "learning_rate": 1.8427961165048544e-05, "loss": 0.1378, "step": 30360 }, { "epoch": 11.79, "learning_rate": 1.8427443365695796e-05, "loss": 0.127, "step": 30370 }, { "epoch": 11.8, "learning_rate": 1.8426925566343044e-05, "loss": 0.141, "step": 30380 }, { "epoch": 11.8, "learning_rate": 1.842640776699029e-05, "loss": 0.1375, "step": 30390 }, { "epoch": 11.81, "learning_rate": 1.8425889967637543e-05, "loss": 0.108, "step": 30400 }, { "epoch": 11.81, "learning_rate": 1.842537216828479e-05, "loss": 0.0768, "step": 30410 }, { "epoch": 11.81, "learning_rate": 1.8424854368932042e-05, "loss": 0.1295, "step": 30420 }, { "epoch": 11.82, "learning_rate": 1.842433656957929e-05, "loss": 0.1432, "step": 30430 }, { "epoch": 11.82, "learning_rate": 1.8423818770226538e-05, "loss": 0.1294, "step": 30440 }, { "epoch": 11.83, "learning_rate": 1.842330097087379e-05, "loss": 0.1831, "step": 30450 }, { "epoch": 11.83, "learning_rate": 1.8422783171521037e-05, "loss": 0.0795, "step": 30460 }, { "epoch": 11.83, "learning_rate": 1.8422265372168285e-05, "loss": 0.1986, "step": 30470 }, { "epoch": 11.84, "learning_rate": 1.8421747572815536e-05, "loss": 0.0149, "step": 30480 }, { "epoch": 11.84, "learning_rate": 1.8421229773462784e-05, "loss": 0.1656, "step": 30490 }, { "epoch": 11.84, "learning_rate": 1.8420711974110036e-05, "loss": 0.341, "step": 30500 }, { "epoch": 11.85, "learning_rate": 1.8420194174757284e-05, "loss": 0.2476, "step": 30510 }, { "epoch": 11.85, "learning_rate": 1.841967637540453e-05, "loss": 0.2287, "step": 30520 }, { "epoch": 11.86, "learning_rate": 1.841915857605178e-05, "loss": 0.2133, "step": 30530 }, { "epoch": 11.86, "learning_rate": 1.841864077669903e-05, "loss": 0.1651, "step": 30540 }, { "epoch": 11.86, "learning_rate": 1.841812297734628e-05, "loss": 0.2385, "step": 30550 }, { "epoch": 11.87, "learning_rate": 1.841760517799353e-05, "loss": 0.0998, "step": 30560 }, { "epoch": 11.87, "learning_rate": 1.8417087378640778e-05, "loss": 0.2349, "step": 30570 }, { "epoch": 11.88, "learning_rate": 1.841656957928803e-05, "loss": 0.108, "step": 30580 }, { "epoch": 11.88, "learning_rate": 1.8416051779935277e-05, "loss": 0.3248, "step": 30590 }, { "epoch": 11.88, "learning_rate": 1.8415533980582525e-05, "loss": 0.1185, "step": 30600 }, { "epoch": 11.89, "learning_rate": 1.8415016181229773e-05, "loss": 0.2152, "step": 30610 }, { "epoch": 11.89, "learning_rate": 1.8414498381877024e-05, "loss": 0.4354, "step": 30620 }, { "epoch": 11.9, "learning_rate": 1.8413980582524272e-05, "loss": 0.131, "step": 30630 }, { "epoch": 11.9, "learning_rate": 1.8413462783171523e-05, "loss": 0.2992, "step": 30640 }, { "epoch": 11.9, "learning_rate": 1.841294498381877e-05, "loss": 0.0904, "step": 30650 }, { "epoch": 11.91, "learning_rate": 1.8412427184466023e-05, "loss": 0.1152, "step": 30660 }, { "epoch": 11.91, "learning_rate": 1.841190938511327e-05, "loss": 0.1348, "step": 30670 }, { "epoch": 11.91, "learning_rate": 1.841139158576052e-05, "loss": 0.0238, "step": 30680 }, { "epoch": 11.92, "learning_rate": 1.8410873786407767e-05, "loss": 0.1011, "step": 30690 }, { "epoch": 11.92, "learning_rate": 1.8410355987055018e-05, "loss": 0.0841, "step": 30700 }, { "epoch": 11.93, "learning_rate": 1.8409838187702266e-05, "loss": 0.2363, "step": 30710 }, { "epoch": 11.93, "learning_rate": 1.8409320388349517e-05, "loss": 0.0177, "step": 30720 }, { "epoch": 11.93, "learning_rate": 1.8408802588996765e-05, "loss": 0.0056, "step": 30730 }, { "epoch": 11.94, "learning_rate": 1.8408284789644016e-05, "loss": 0.0267, "step": 30740 }, { "epoch": 11.94, "learning_rate": 1.8407766990291264e-05, "loss": 0.3607, "step": 30750 }, { "epoch": 11.95, "learning_rate": 1.8407249190938512e-05, "loss": 0.2015, "step": 30760 }, { "epoch": 11.95, "learning_rate": 1.840673139158576e-05, "loss": 0.1517, "step": 30770 }, { "epoch": 11.95, "learning_rate": 1.840621359223301e-05, "loss": 0.0252, "step": 30780 }, { "epoch": 11.96, "learning_rate": 1.840569579288026e-05, "loss": 0.1524, "step": 30790 }, { "epoch": 11.96, "learning_rate": 1.840517799352751e-05, "loss": 0.223, "step": 30800 }, { "epoch": 11.97, "learning_rate": 1.840466019417476e-05, "loss": 0.0442, "step": 30810 }, { "epoch": 11.97, "learning_rate": 1.840414239482201e-05, "loss": 0.2617, "step": 30820 }, { "epoch": 11.97, "learning_rate": 1.8403624595469254e-05, "loss": 0.0484, "step": 30830 }, { "epoch": 11.98, "learning_rate": 1.8403106796116506e-05, "loss": 0.1234, "step": 30840 }, { "epoch": 11.98, "learning_rate": 1.8402588996763754e-05, "loss": 0.1081, "step": 30850 }, { "epoch": 11.98, "learning_rate": 1.8402071197411005e-05, "loss": 0.1792, "step": 30860 }, { "epoch": 11.99, "learning_rate": 1.8401553398058253e-05, "loss": 0.1507, "step": 30870 }, { "epoch": 11.99, "learning_rate": 1.8401035598705504e-05, "loss": 0.3733, "step": 30880 }, { "epoch": 12.0, "learning_rate": 1.8400517799352752e-05, "loss": 0.0819, "step": 30890 }, { "epoch": 12.0, "learning_rate": 1.8400000000000003e-05, "loss": 0.181, "step": 30900 }, { "epoch": 12.0, "eval_accuracy": 0.9513067400275104, "eval_loss": 0.22270826995372772, "eval_runtime": 8.2481, "eval_samples_per_second": 440.707, "eval_steps_per_second": 55.164, "step": 30900 }, { "epoch": 12.0, "learning_rate": 1.839948220064725e-05, "loss": 0.2422, "step": 30910 }, { "epoch": 12.01, "learning_rate": 1.83989644012945e-05, "loss": 0.0784, "step": 30920 }, { "epoch": 12.01, "learning_rate": 1.8398446601941747e-05, "loss": 0.2248, "step": 30930 }, { "epoch": 12.02, "learning_rate": 1.8397928802589e-05, "loss": 0.1229, "step": 30940 }, { "epoch": 12.02, "learning_rate": 1.8397411003236246e-05, "loss": 0.1463, "step": 30950 }, { "epoch": 12.02, "learning_rate": 1.8396893203883498e-05, "loss": 0.0025, "step": 30960 }, { "epoch": 12.03, "learning_rate": 1.8396375404530746e-05, "loss": 0.1016, "step": 30970 }, { "epoch": 12.03, "learning_rate": 1.8395857605177997e-05, "loss": 0.2479, "step": 30980 }, { "epoch": 12.03, "learning_rate": 1.8395339805825245e-05, "loss": 0.114, "step": 30990 }, { "epoch": 12.04, "learning_rate": 1.8394822006472493e-05, "loss": 0.1417, "step": 31000 }, { "epoch": 12.04, "learning_rate": 1.839430420711974e-05, "loss": 0.2496, "step": 31010 }, { "epoch": 12.05, "learning_rate": 1.8393786407766992e-05, "loss": 0.1498, "step": 31020 }, { "epoch": 12.05, "learning_rate": 1.839326860841424e-05, "loss": 0.1102, "step": 31030 }, { "epoch": 12.05, "learning_rate": 1.839275080906149e-05, "loss": 0.0756, "step": 31040 }, { "epoch": 12.06, "learning_rate": 1.839223300970874e-05, "loss": 0.0964, "step": 31050 }, { "epoch": 12.06, "learning_rate": 1.839171521035599e-05, "loss": 0.0309, "step": 31060 }, { "epoch": 12.07, "learning_rate": 1.839119741100324e-05, "loss": 0.1237, "step": 31070 }, { "epoch": 12.07, "learning_rate": 1.8390679611650486e-05, "loss": 0.1544, "step": 31080 }, { "epoch": 12.07, "learning_rate": 1.8390161812297734e-05, "loss": 0.0469, "step": 31090 }, { "epoch": 12.08, "learning_rate": 1.8389644012944986e-05, "loss": 0.073, "step": 31100 }, { "epoch": 12.08, "learning_rate": 1.8389126213592234e-05, "loss": 0.128, "step": 31110 }, { "epoch": 12.09, "learning_rate": 1.8388608414239485e-05, "loss": 0.0868, "step": 31120 }, { "epoch": 12.09, "learning_rate": 1.8388090614886733e-05, "loss": 0.0514, "step": 31130 }, { "epoch": 12.09, "learning_rate": 1.8387572815533984e-05, "loss": 0.0491, "step": 31140 }, { "epoch": 12.1, "learning_rate": 1.8387055016181232e-05, "loss": 0.0764, "step": 31150 }, { "epoch": 12.1, "learning_rate": 1.838653721682848e-05, "loss": 0.2001, "step": 31160 }, { "epoch": 12.1, "learning_rate": 1.8386019417475728e-05, "loss": 0.1513, "step": 31170 }, { "epoch": 12.11, "learning_rate": 1.838550161812298e-05, "loss": 0.1522, "step": 31180 }, { "epoch": 12.11, "learning_rate": 1.8384983818770227e-05, "loss": 0.2692, "step": 31190 }, { "epoch": 12.12, "learning_rate": 1.838446601941748e-05, "loss": 0.026, "step": 31200 }, { "epoch": 12.12, "learning_rate": 1.8383948220064726e-05, "loss": 0.0297, "step": 31210 }, { "epoch": 12.12, "learning_rate": 1.8383430420711978e-05, "loss": 0.2263, "step": 31220 }, { "epoch": 12.13, "learning_rate": 1.8382912621359226e-05, "loss": 0.1093, "step": 31230 }, { "epoch": 12.13, "learning_rate": 1.8382394822006473e-05, "loss": 0.1256, "step": 31240 }, { "epoch": 12.14, "learning_rate": 1.838187702265372e-05, "loss": 0.1786, "step": 31250 }, { "epoch": 12.14, "learning_rate": 1.8381359223300973e-05, "loss": 0.2135, "step": 31260 }, { "epoch": 12.14, "learning_rate": 1.838084142394822e-05, "loss": 0.1648, "step": 31270 }, { "epoch": 12.15, "learning_rate": 1.8380323624595472e-05, "loss": 0.3022, "step": 31280 }, { "epoch": 12.15, "learning_rate": 1.837980582524272e-05, "loss": 0.086, "step": 31290 }, { "epoch": 12.16, "learning_rate": 1.837928802588997e-05, "loss": 0.0662, "step": 31300 }, { "epoch": 12.16, "learning_rate": 1.837877022653722e-05, "loss": 0.267, "step": 31310 }, { "epoch": 12.16, "learning_rate": 1.8378252427184467e-05, "loss": 0.1044, "step": 31320 }, { "epoch": 12.17, "learning_rate": 1.8377734627831715e-05, "loss": 0.0922, "step": 31330 }, { "epoch": 12.17, "learning_rate": 1.8377216828478966e-05, "loss": 0.0894, "step": 31340 }, { "epoch": 12.17, "learning_rate": 1.8376699029126214e-05, "loss": 0.161, "step": 31350 }, { "epoch": 12.18, "learning_rate": 1.8376181229773466e-05, "loss": 0.0277, "step": 31360 }, { "epoch": 12.18, "learning_rate": 1.8375663430420713e-05, "loss": 0.3941, "step": 31370 }, { "epoch": 12.19, "learning_rate": 1.8375145631067965e-05, "loss": 0.0382, "step": 31380 }, { "epoch": 12.19, "learning_rate": 1.8374627831715213e-05, "loss": 0.1621, "step": 31390 }, { "epoch": 12.19, "learning_rate": 1.837411003236246e-05, "loss": 0.3261, "step": 31400 }, { "epoch": 12.2, "learning_rate": 1.837359223300971e-05, "loss": 0.0166, "step": 31410 }, { "epoch": 12.2, "learning_rate": 1.837307443365696e-05, "loss": 0.0627, "step": 31420 }, { "epoch": 12.21, "learning_rate": 1.8372556634304208e-05, "loss": 0.0351, "step": 31430 }, { "epoch": 12.21, "learning_rate": 1.837203883495146e-05, "loss": 0.1453, "step": 31440 }, { "epoch": 12.21, "learning_rate": 1.8371521035598707e-05, "loss": 0.0169, "step": 31450 }, { "epoch": 12.22, "learning_rate": 1.8371003236245958e-05, "loss": 0.1542, "step": 31460 }, { "epoch": 12.22, "learning_rate": 1.8370485436893206e-05, "loss": 0.0705, "step": 31470 }, { "epoch": 12.23, "learning_rate": 1.8369967637540454e-05, "loss": 0.1581, "step": 31480 }, { "epoch": 12.23, "learning_rate": 1.8369449838187702e-05, "loss": 0.1376, "step": 31490 }, { "epoch": 12.23, "learning_rate": 1.8368932038834953e-05, "loss": 0.1243, "step": 31500 }, { "epoch": 12.24, "learning_rate": 1.83684142394822e-05, "loss": 0.139, "step": 31510 }, { "epoch": 12.24, "learning_rate": 1.8367896440129453e-05, "loss": 0.1886, "step": 31520 }, { "epoch": 12.24, "learning_rate": 1.83673786407767e-05, "loss": 0.1449, "step": 31530 }, { "epoch": 12.25, "learning_rate": 1.8366860841423952e-05, "loss": 0.1177, "step": 31540 }, { "epoch": 12.25, "learning_rate": 1.83663430420712e-05, "loss": 0.1878, "step": 31550 }, { "epoch": 12.26, "learning_rate": 1.8365825242718448e-05, "loss": 0.1194, "step": 31560 }, { "epoch": 12.26, "learning_rate": 1.8365307443365696e-05, "loss": 0.1402, "step": 31570 }, { "epoch": 12.26, "learning_rate": 1.8364789644012947e-05, "loss": 0.072, "step": 31580 }, { "epoch": 12.27, "learning_rate": 1.8364271844660195e-05, "loss": 0.2804, "step": 31590 }, { "epoch": 12.27, "learning_rate": 1.8363754045307446e-05, "loss": 0.1631, "step": 31600 }, { "epoch": 12.28, "learning_rate": 1.8363236245954694e-05, "loss": 0.2184, "step": 31610 }, { "epoch": 12.28, "learning_rate": 1.8362718446601942e-05, "loss": 0.2768, "step": 31620 }, { "epoch": 12.28, "learning_rate": 1.8362200647249193e-05, "loss": 0.1384, "step": 31630 }, { "epoch": 12.29, "learning_rate": 1.836168284789644e-05, "loss": 0.1128, "step": 31640 }, { "epoch": 12.29, "learning_rate": 1.836116504854369e-05, "loss": 0.0812, "step": 31650 }, { "epoch": 12.3, "learning_rate": 1.836064724919094e-05, "loss": 0.2223, "step": 31660 }, { "epoch": 12.3, "learning_rate": 1.836012944983819e-05, "loss": 0.0691, "step": 31670 }, { "epoch": 12.3, "learning_rate": 1.835961165048544e-05, "loss": 0.1319, "step": 31680 }, { "epoch": 12.31, "learning_rate": 1.8359093851132688e-05, "loss": 0.2502, "step": 31690 }, { "epoch": 12.31, "learning_rate": 1.8358576051779936e-05, "loss": 0.1089, "step": 31700 }, { "epoch": 12.31, "learning_rate": 1.8358058252427187e-05, "loss": 0.1455, "step": 31710 }, { "epoch": 12.32, "learning_rate": 1.8357540453074435e-05, "loss": 0.1812, "step": 31720 }, { "epoch": 12.32, "learning_rate": 1.8357022653721683e-05, "loss": 0.0979, "step": 31730 }, { "epoch": 12.33, "learning_rate": 1.8356504854368934e-05, "loss": 0.1434, "step": 31740 }, { "epoch": 12.33, "learning_rate": 1.8355987055016182e-05, "loss": 0.16, "step": 31750 }, { "epoch": 12.33, "learning_rate": 1.8355469255663433e-05, "loss": 0.1281, "step": 31760 }, { "epoch": 12.34, "learning_rate": 1.835495145631068e-05, "loss": 0.0247, "step": 31770 }, { "epoch": 12.34, "learning_rate": 1.835443365695793e-05, "loss": 0.1098, "step": 31780 }, { "epoch": 12.35, "learning_rate": 1.835391585760518e-05, "loss": 0.1115, "step": 31790 }, { "epoch": 12.35, "learning_rate": 1.835339805825243e-05, "loss": 0.1328, "step": 31800 }, { "epoch": 12.35, "learning_rate": 1.8352880258899676e-05, "loss": 0.0744, "step": 31810 }, { "epoch": 12.36, "learning_rate": 1.8352362459546928e-05, "loss": 0.0702, "step": 31820 }, { "epoch": 12.36, "learning_rate": 1.8351844660194176e-05, "loss": 0.266, "step": 31830 }, { "epoch": 12.37, "learning_rate": 1.8351326860841427e-05, "loss": 0.0982, "step": 31840 }, { "epoch": 12.37, "learning_rate": 1.8350809061488675e-05, "loss": 0.0464, "step": 31850 }, { "epoch": 12.37, "learning_rate": 1.8350291262135923e-05, "loss": 0.0984, "step": 31860 }, { "epoch": 12.38, "learning_rate": 1.8349773462783174e-05, "loss": 0.214, "step": 31870 }, { "epoch": 12.38, "learning_rate": 1.8349255663430422e-05, "loss": 0.204, "step": 31880 }, { "epoch": 12.38, "learning_rate": 1.834873786407767e-05, "loss": 0.0853, "step": 31890 }, { "epoch": 12.39, "learning_rate": 1.834822006472492e-05, "loss": 0.0712, "step": 31900 }, { "epoch": 12.39, "learning_rate": 1.834770226537217e-05, "loss": 0.1779, "step": 31910 }, { "epoch": 12.4, "learning_rate": 1.834718446601942e-05, "loss": 0.102, "step": 31920 }, { "epoch": 12.4, "learning_rate": 1.834666666666667e-05, "loss": 0.2528, "step": 31930 }, { "epoch": 12.4, "learning_rate": 1.8346148867313916e-05, "loss": 0.1474, "step": 31940 }, { "epoch": 12.41, "learning_rate": 1.8345631067961168e-05, "loss": 0.1474, "step": 31950 }, { "epoch": 12.41, "learning_rate": 1.8345113268608415e-05, "loss": 0.0646, "step": 31960 }, { "epoch": 12.42, "learning_rate": 1.8344595469255667e-05, "loss": 0.1524, "step": 31970 }, { "epoch": 12.42, "learning_rate": 1.8344077669902915e-05, "loss": 0.1509, "step": 31980 }, { "epoch": 12.42, "learning_rate": 1.8343559870550163e-05, "loss": 0.0598, "step": 31990 }, { "epoch": 12.43, "learning_rate": 1.834304207119741e-05, "loss": 0.0483, "step": 32000 }, { "epoch": 12.43, "learning_rate": 1.8342524271844662e-05, "loss": 0.1897, "step": 32010 }, { "epoch": 12.43, "learning_rate": 1.834200647249191e-05, "loss": 0.2733, "step": 32020 }, { "epoch": 12.44, "learning_rate": 1.834148867313916e-05, "loss": 0.1624, "step": 32030 }, { "epoch": 12.44, "learning_rate": 1.834097087378641e-05, "loss": 0.0697, "step": 32040 }, { "epoch": 12.45, "learning_rate": 1.834045307443366e-05, "loss": 0.1437, "step": 32050 }, { "epoch": 12.45, "learning_rate": 1.8339935275080908e-05, "loss": 0.3214, "step": 32060 }, { "epoch": 12.45, "learning_rate": 1.8339417475728156e-05, "loss": 0.0249, "step": 32070 }, { "epoch": 12.46, "learning_rate": 1.8338899676375404e-05, "loss": 0.177, "step": 32080 }, { "epoch": 12.46, "learning_rate": 1.8338381877022655e-05, "loss": 0.2614, "step": 32090 }, { "epoch": 12.47, "learning_rate": 1.8337864077669903e-05, "loss": 0.1922, "step": 32100 }, { "epoch": 12.47, "learning_rate": 1.8337346278317155e-05, "loss": 0.1546, "step": 32110 }, { "epoch": 12.47, "learning_rate": 1.8336828478964403e-05, "loss": 0.1133, "step": 32120 }, { "epoch": 12.48, "learning_rate": 1.8336310679611654e-05, "loss": 0.0516, "step": 32130 }, { "epoch": 12.48, "learning_rate": 1.8335792880258902e-05, "loss": 0.0929, "step": 32140 }, { "epoch": 12.49, "learning_rate": 1.833527508090615e-05, "loss": 0.2044, "step": 32150 }, { "epoch": 12.49, "learning_rate": 1.8334757281553398e-05, "loss": 0.0755, "step": 32160 }, { "epoch": 12.49, "learning_rate": 1.833423948220065e-05, "loss": 0.0961, "step": 32170 }, { "epoch": 12.5, "learning_rate": 1.8333721682847897e-05, "loss": 0.1794, "step": 32180 }, { "epoch": 12.5, "learning_rate": 1.8333203883495148e-05, "loss": 0.0441, "step": 32190 }, { "epoch": 12.5, "learning_rate": 1.8332686084142396e-05, "loss": 0.212, "step": 32200 }, { "epoch": 12.51, "learning_rate": 1.8332168284789647e-05, "loss": 0.1339, "step": 32210 }, { "epoch": 12.51, "learning_rate": 1.8331650485436895e-05, "loss": 0.0652, "step": 32220 }, { "epoch": 12.52, "learning_rate": 1.8331132686084143e-05, "loss": 0.1072, "step": 32230 }, { "epoch": 12.52, "learning_rate": 1.833061488673139e-05, "loss": 0.0717, "step": 32240 }, { "epoch": 12.52, "learning_rate": 1.8330097087378643e-05, "loss": 0.2289, "step": 32250 }, { "epoch": 12.53, "learning_rate": 1.832957928802589e-05, "loss": 0.0811, "step": 32260 }, { "epoch": 12.53, "learning_rate": 1.8329061488673142e-05, "loss": 0.1498, "step": 32270 }, { "epoch": 12.54, "learning_rate": 1.832854368932039e-05, "loss": 0.1239, "step": 32280 }, { "epoch": 12.54, "learning_rate": 1.832802588996764e-05, "loss": 0.1534, "step": 32290 }, { "epoch": 12.54, "learning_rate": 1.8327508090614886e-05, "loss": 0.0458, "step": 32300 }, { "epoch": 12.55, "learning_rate": 1.8326990291262137e-05, "loss": 0.2795, "step": 32310 }, { "epoch": 12.55, "learning_rate": 1.8326472491909385e-05, "loss": 0.1002, "step": 32320 }, { "epoch": 12.56, "learning_rate": 1.8325954692556636e-05, "loss": 0.0867, "step": 32330 }, { "epoch": 12.56, "learning_rate": 1.8325436893203884e-05, "loss": 0.1068, "step": 32340 }, { "epoch": 12.56, "learning_rate": 1.8324919093851135e-05, "loss": 0.1356, "step": 32350 }, { "epoch": 12.57, "learning_rate": 1.8324401294498383e-05, "loss": 0.0825, "step": 32360 }, { "epoch": 12.57, "learning_rate": 1.8323883495145635e-05, "loss": 0.1746, "step": 32370 }, { "epoch": 12.57, "learning_rate": 1.832336569579288e-05, "loss": 0.1291, "step": 32380 }, { "epoch": 12.58, "learning_rate": 1.832284789644013e-05, "loss": 0.1322, "step": 32390 }, { "epoch": 12.58, "learning_rate": 1.832233009708738e-05, "loss": 0.1822, "step": 32400 }, { "epoch": 12.59, "learning_rate": 1.832181229773463e-05, "loss": 0.0568, "step": 32410 }, { "epoch": 12.59, "learning_rate": 1.8321294498381878e-05, "loss": 0.1486, "step": 32420 }, { "epoch": 12.59, "learning_rate": 1.832077669902913e-05, "loss": 0.1412, "step": 32430 }, { "epoch": 12.6, "learning_rate": 1.8320258899676377e-05, "loss": 0.296, "step": 32440 }, { "epoch": 12.6, "learning_rate": 1.8319741100323628e-05, "loss": 0.3106, "step": 32450 }, { "epoch": 12.61, "learning_rate": 1.8319223300970873e-05, "loss": 0.1247, "step": 32460 }, { "epoch": 12.61, "learning_rate": 1.8318705501618124e-05, "loss": 0.2442, "step": 32470 }, { "epoch": 12.61, "learning_rate": 1.8318187702265372e-05, "loss": 0.0681, "step": 32480 }, { "epoch": 12.62, "learning_rate": 1.8317669902912623e-05, "loss": 0.0985, "step": 32490 }, { "epoch": 12.62, "learning_rate": 1.831715210355987e-05, "loss": 0.1817, "step": 32500 }, { "epoch": 12.63, "learning_rate": 1.8316634304207122e-05, "loss": 0.175, "step": 32510 }, { "epoch": 12.63, "learning_rate": 1.831611650485437e-05, "loss": 0.1037, "step": 32520 }, { "epoch": 12.63, "learning_rate": 1.831559870550162e-05, "loss": 0.0533, "step": 32530 }, { "epoch": 12.64, "learning_rate": 1.831508090614887e-05, "loss": 0.1052, "step": 32540 }, { "epoch": 12.64, "learning_rate": 1.8314563106796118e-05, "loss": 0.2132, "step": 32550 }, { "epoch": 12.64, "learning_rate": 1.8314045307443365e-05, "loss": 0.2087, "step": 32560 }, { "epoch": 12.65, "learning_rate": 1.8313527508090617e-05, "loss": 0.3662, "step": 32570 }, { "epoch": 12.65, "learning_rate": 1.8313009708737865e-05, "loss": 0.0445, "step": 32580 }, { "epoch": 12.66, "learning_rate": 1.8312491909385116e-05, "loss": 0.0943, "step": 32590 }, { "epoch": 12.66, "learning_rate": 1.8311974110032364e-05, "loss": 0.2825, "step": 32600 }, { "epoch": 12.66, "learning_rate": 1.8311456310679615e-05, "loss": 0.272, "step": 32610 }, { "epoch": 12.67, "learning_rate": 1.8310938511326863e-05, "loss": 0.119, "step": 32620 }, { "epoch": 12.67, "learning_rate": 1.831042071197411e-05, "loss": 0.1634, "step": 32630 }, { "epoch": 12.68, "learning_rate": 1.830990291262136e-05, "loss": 0.0256, "step": 32640 }, { "epoch": 12.68, "learning_rate": 1.830938511326861e-05, "loss": 0.3519, "step": 32650 }, { "epoch": 12.68, "learning_rate": 1.8308867313915858e-05, "loss": 0.1472, "step": 32660 }, { "epoch": 12.69, "learning_rate": 1.830834951456311e-05, "loss": 0.1875, "step": 32670 }, { "epoch": 12.69, "learning_rate": 1.8307831715210357e-05, "loss": 0.1831, "step": 32680 }, { "epoch": 12.7, "learning_rate": 1.830731391585761e-05, "loss": 0.2519, "step": 32690 }, { "epoch": 12.7, "learning_rate": 1.8306796116504857e-05, "loss": 0.1866, "step": 32700 }, { "epoch": 12.7, "learning_rate": 1.8306278317152105e-05, "loss": 0.2267, "step": 32710 }, { "epoch": 12.71, "learning_rate": 1.8305760517799353e-05, "loss": 0.1787, "step": 32720 }, { "epoch": 12.71, "learning_rate": 1.8305242718446604e-05, "loss": 0.1153, "step": 32730 }, { "epoch": 12.71, "learning_rate": 1.8304724919093852e-05, "loss": 0.1347, "step": 32740 }, { "epoch": 12.72, "learning_rate": 1.8304207119741103e-05, "loss": 0.0355, "step": 32750 }, { "epoch": 12.72, "learning_rate": 1.830368932038835e-05, "loss": 0.1621, "step": 32760 }, { "epoch": 12.73, "learning_rate": 1.8303171521035602e-05, "loss": 0.0986, "step": 32770 }, { "epoch": 12.73, "learning_rate": 1.830265372168285e-05, "loss": 0.1976, "step": 32780 }, { "epoch": 12.73, "learning_rate": 1.8302135922330098e-05, "loss": 0.1079, "step": 32790 }, { "epoch": 12.74, "learning_rate": 1.8301618122977346e-05, "loss": 0.0607, "step": 32800 }, { "epoch": 12.74, "learning_rate": 1.8301100323624597e-05, "loss": 0.0796, "step": 32810 }, { "epoch": 12.75, "learning_rate": 1.8300582524271845e-05, "loss": 0.159, "step": 32820 }, { "epoch": 12.75, "learning_rate": 1.8300064724919097e-05, "loss": 0.0635, "step": 32830 }, { "epoch": 12.75, "learning_rate": 1.8299546925566345e-05, "loss": 0.2037, "step": 32840 }, { "epoch": 12.76, "learning_rate": 1.8299029126213596e-05, "loss": 0.097, "step": 32850 }, { "epoch": 12.76, "learning_rate": 1.8298511326860844e-05, "loss": 0.2327, "step": 32860 }, { "epoch": 12.77, "learning_rate": 1.8297993527508092e-05, "loss": 0.2174, "step": 32870 }, { "epoch": 12.77, "learning_rate": 1.829747572815534e-05, "loss": 0.1167, "step": 32880 }, { "epoch": 12.77, "learning_rate": 1.829695792880259e-05, "loss": 0.0507, "step": 32890 }, { "epoch": 12.78, "learning_rate": 1.829644012944984e-05, "loss": 0.1171, "step": 32900 }, { "epoch": 12.78, "learning_rate": 1.829592233009709e-05, "loss": 0.1201, "step": 32910 }, { "epoch": 12.78, "learning_rate": 1.8295404530744338e-05, "loss": 0.2656, "step": 32920 }, { "epoch": 12.79, "learning_rate": 1.829488673139159e-05, "loss": 0.0648, "step": 32930 }, { "epoch": 12.79, "learning_rate": 1.8294368932038837e-05, "loss": 0.0811, "step": 32940 }, { "epoch": 12.8, "learning_rate": 1.8293851132686085e-05, "loss": 0.0934, "step": 32950 }, { "epoch": 12.8, "learning_rate": 1.8293333333333333e-05, "loss": 0.073, "step": 32960 }, { "epoch": 12.8, "learning_rate": 1.8292815533980585e-05, "loss": 0.0994, "step": 32970 }, { "epoch": 12.81, "learning_rate": 1.8292297734627832e-05, "loss": 0.1127, "step": 32980 }, { "epoch": 12.81, "learning_rate": 1.8291779935275084e-05, "loss": 0.1411, "step": 32990 }, { "epoch": 12.82, "learning_rate": 1.8291262135922332e-05, "loss": 0.124, "step": 33000 }, { "epoch": 12.82, "learning_rate": 1.8290744336569583e-05, "loss": 0.1083, "step": 33010 }, { "epoch": 12.82, "learning_rate": 1.829022653721683e-05, "loss": 0.058, "step": 33020 }, { "epoch": 12.83, "learning_rate": 1.828970873786408e-05, "loss": 0.1674, "step": 33030 }, { "epoch": 12.83, "learning_rate": 1.8289190938511327e-05, "loss": 0.2379, "step": 33040 }, { "epoch": 12.83, "learning_rate": 1.8288673139158578e-05, "loss": 0.0548, "step": 33050 }, { "epoch": 12.84, "learning_rate": 1.8288155339805826e-05, "loss": 0.0388, "step": 33060 }, { "epoch": 12.84, "learning_rate": 1.8287637540453077e-05, "loss": 0.2646, "step": 33070 }, { "epoch": 12.85, "learning_rate": 1.8287119741100325e-05, "loss": 0.1471, "step": 33080 }, { "epoch": 12.85, "learning_rate": 1.8286601941747573e-05, "loss": 0.2605, "step": 33090 }, { "epoch": 12.85, "learning_rate": 1.8286084142394825e-05, "loss": 0.4131, "step": 33100 }, { "epoch": 12.86, "learning_rate": 1.8285566343042072e-05, "loss": 0.0939, "step": 33110 }, { "epoch": 12.86, "learning_rate": 1.828504854368932e-05, "loss": 0.0202, "step": 33120 }, { "epoch": 12.87, "learning_rate": 1.828453074433657e-05, "loss": 0.1134, "step": 33130 }, { "epoch": 12.87, "learning_rate": 1.828401294498382e-05, "loss": 0.1518, "step": 33140 }, { "epoch": 12.87, "learning_rate": 1.828349514563107e-05, "loss": 0.1583, "step": 33150 }, { "epoch": 12.88, "learning_rate": 1.828297734627832e-05, "loss": 0.2037, "step": 33160 }, { "epoch": 12.88, "learning_rate": 1.8282459546925567e-05, "loss": 0.1662, "step": 33170 }, { "epoch": 12.89, "learning_rate": 1.8281941747572818e-05, "loss": 0.1699, "step": 33180 }, { "epoch": 12.89, "learning_rate": 1.8281423948220066e-05, "loss": 0.0736, "step": 33190 }, { "epoch": 12.89, "learning_rate": 1.8280906148867314e-05, "loss": 0.1082, "step": 33200 }, { "epoch": 12.9, "learning_rate": 1.8280388349514565e-05, "loss": 0.1334, "step": 33210 }, { "epoch": 12.9, "learning_rate": 1.8279870550161813e-05, "loss": 0.2713, "step": 33220 }, { "epoch": 12.9, "learning_rate": 1.8279352750809064e-05, "loss": 0.0521, "step": 33230 }, { "epoch": 12.91, "learning_rate": 1.8278834951456312e-05, "loss": 0.167, "step": 33240 }, { "epoch": 12.91, "learning_rate": 1.827831715210356e-05, "loss": 0.2874, "step": 33250 }, { "epoch": 12.92, "learning_rate": 1.827779935275081e-05, "loss": 0.1002, "step": 33260 }, { "epoch": 12.92, "learning_rate": 1.827728155339806e-05, "loss": 0.0337, "step": 33270 }, { "epoch": 12.92, "learning_rate": 1.8276763754045307e-05, "loss": 0.0689, "step": 33280 }, { "epoch": 12.93, "learning_rate": 1.827624595469256e-05, "loss": 0.2639, "step": 33290 }, { "epoch": 12.93, "learning_rate": 1.8275728155339807e-05, "loss": 0.1044, "step": 33300 }, { "epoch": 12.94, "learning_rate": 1.8275210355987058e-05, "loss": 0.0705, "step": 33310 }, { "epoch": 12.94, "learning_rate": 1.8274692556634306e-05, "loss": 0.2664, "step": 33320 }, { "epoch": 12.94, "learning_rate": 1.8274174757281554e-05, "loss": 0.1955, "step": 33330 }, { "epoch": 12.95, "learning_rate": 1.8273656957928805e-05, "loss": 0.159, "step": 33340 }, { "epoch": 12.95, "learning_rate": 1.8273139158576053e-05, "loss": 0.1996, "step": 33350 }, { "epoch": 12.96, "learning_rate": 1.82726213592233e-05, "loss": 0.1577, "step": 33360 }, { "epoch": 12.96, "learning_rate": 1.8272103559870552e-05, "loss": 0.1965, "step": 33370 }, { "epoch": 12.96, "learning_rate": 1.82715857605178e-05, "loss": 0.1096, "step": 33380 }, { "epoch": 12.97, "learning_rate": 1.827106796116505e-05, "loss": 0.2168, "step": 33390 }, { "epoch": 12.97, "learning_rate": 1.82705501618123e-05, "loss": 0.1023, "step": 33400 }, { "epoch": 12.97, "learning_rate": 1.8270032362459547e-05, "loss": 0.1975, "step": 33410 }, { "epoch": 12.98, "learning_rate": 1.82695145631068e-05, "loss": 0.13, "step": 33420 }, { "epoch": 12.98, "learning_rate": 1.8268996763754047e-05, "loss": 0.2114, "step": 33430 }, { "epoch": 12.99, "learning_rate": 1.8268478964401295e-05, "loss": 0.2241, "step": 33440 }, { "epoch": 12.99, "learning_rate": 1.8267961165048546e-05, "loss": 0.2898, "step": 33450 }, { "epoch": 12.99, "learning_rate": 1.8267443365695794e-05, "loss": 0.2021, "step": 33460 }, { "epoch": 13.0, "learning_rate": 1.8266925566343042e-05, "loss": 0.1072, "step": 33470 }, { "epoch": 13.0, "eval_accuracy": 0.9502063273727648, "eval_loss": 0.2302989661693573, "eval_runtime": 8.2265, "eval_samples_per_second": 441.865, "eval_steps_per_second": 55.309, "step": 33475 }, { "epoch": 13.0, "learning_rate": 1.8266407766990293e-05, "loss": 0.1365, "step": 33480 }, { "epoch": 13.01, "learning_rate": 1.826588996763754e-05, "loss": 0.2059, "step": 33490 }, { "epoch": 13.01, "learning_rate": 1.8265372168284792e-05, "loss": 0.3802, "step": 33500 }, { "epoch": 13.01, "learning_rate": 1.826485436893204e-05, "loss": 0.0427, "step": 33510 }, { "epoch": 13.02, "learning_rate": 1.8264336569579288e-05, "loss": 0.214, "step": 33520 }, { "epoch": 13.02, "learning_rate": 1.826381877022654e-05, "loss": 0.1464, "step": 33530 }, { "epoch": 13.03, "learning_rate": 1.8263300970873787e-05, "loss": 0.102, "step": 33540 }, { "epoch": 13.03, "learning_rate": 1.8262783171521035e-05, "loss": 0.0757, "step": 33550 }, { "epoch": 13.03, "learning_rate": 1.8262265372168287e-05, "loss": 0.1547, "step": 33560 }, { "epoch": 13.04, "learning_rate": 1.8261747572815535e-05, "loss": 0.1187, "step": 33570 }, { "epoch": 13.04, "learning_rate": 1.8261229773462786e-05, "loss": 0.092, "step": 33580 }, { "epoch": 13.04, "learning_rate": 1.8260711974110034e-05, "loss": 0.2484, "step": 33590 }, { "epoch": 13.05, "learning_rate": 1.826019417475728e-05, "loss": 0.0708, "step": 33600 }, { "epoch": 13.05, "learning_rate": 1.8259676375404533e-05, "loss": 0.1323, "step": 33610 }, { "epoch": 13.06, "learning_rate": 1.825915857605178e-05, "loss": 0.2608, "step": 33620 }, { "epoch": 13.06, "learning_rate": 1.825864077669903e-05, "loss": 0.3223, "step": 33630 }, { "epoch": 13.06, "learning_rate": 1.825812297734628e-05, "loss": 0.0237, "step": 33640 }, { "epoch": 13.07, "learning_rate": 1.8257605177993528e-05, "loss": 0.2666, "step": 33650 }, { "epoch": 13.07, "learning_rate": 1.825708737864078e-05, "loss": 0.2934, "step": 33660 }, { "epoch": 13.08, "learning_rate": 1.8256569579288027e-05, "loss": 0.0447, "step": 33670 }, { "epoch": 13.08, "learning_rate": 1.825605177993528e-05, "loss": 0.1321, "step": 33680 }, { "epoch": 13.08, "learning_rate": 1.8255533980582527e-05, "loss": 0.2468, "step": 33690 }, { "epoch": 13.09, "learning_rate": 1.8255016181229774e-05, "loss": 0.1322, "step": 33700 }, { "epoch": 13.09, "learning_rate": 1.8254498381877022e-05, "loss": 0.1485, "step": 33710 }, { "epoch": 13.1, "learning_rate": 1.8253980582524274e-05, "loss": 0.2172, "step": 33720 }, { "epoch": 13.1, "learning_rate": 1.825346278317152e-05, "loss": 0.0599, "step": 33730 }, { "epoch": 13.1, "learning_rate": 1.8252944983818773e-05, "loss": 0.1004, "step": 33740 }, { "epoch": 13.11, "learning_rate": 1.825242718446602e-05, "loss": 0.1175, "step": 33750 }, { "epoch": 13.11, "learning_rate": 1.8251909385113272e-05, "loss": 0.0409, "step": 33760 }, { "epoch": 13.11, "learning_rate": 1.8251391585760517e-05, "loss": 0.1746, "step": 33770 }, { "epoch": 13.12, "learning_rate": 1.8250873786407768e-05, "loss": 0.1425, "step": 33780 }, { "epoch": 13.12, "learning_rate": 1.8250355987055016e-05, "loss": 0.0375, "step": 33790 }, { "epoch": 13.13, "learning_rate": 1.8249838187702267e-05, "loss": 0.2546, "step": 33800 }, { "epoch": 13.13, "learning_rate": 1.8249320388349515e-05, "loss": 0.1485, "step": 33810 }, { "epoch": 13.13, "learning_rate": 1.8248802588996767e-05, "loss": 0.1268, "step": 33820 }, { "epoch": 13.14, "learning_rate": 1.8248284789644014e-05, "loss": 0.1427, "step": 33830 }, { "epoch": 13.14, "learning_rate": 1.8247766990291266e-05, "loss": 0.1468, "step": 33840 }, { "epoch": 13.15, "learning_rate": 1.824724919093851e-05, "loss": 0.0339, "step": 33850 }, { "epoch": 13.15, "learning_rate": 1.824673139158576e-05, "loss": 0.1943, "step": 33860 }, { "epoch": 13.15, "learning_rate": 1.824621359223301e-05, "loss": 0.2452, "step": 33870 }, { "epoch": 13.16, "learning_rate": 1.824569579288026e-05, "loss": 0.0081, "step": 33880 }, { "epoch": 13.16, "learning_rate": 1.824517799352751e-05, "loss": 0.3292, "step": 33890 }, { "epoch": 13.17, "learning_rate": 1.824466019417476e-05, "loss": 0.085, "step": 33900 }, { "epoch": 13.17, "learning_rate": 1.8244142394822008e-05, "loss": 0.0759, "step": 33910 }, { "epoch": 13.17, "learning_rate": 1.824362459546926e-05, "loss": 0.1177, "step": 33920 }, { "epoch": 13.18, "learning_rate": 1.8243106796116504e-05, "loss": 0.0926, "step": 33930 }, { "epoch": 13.18, "learning_rate": 1.8242588996763755e-05, "loss": 0.138, "step": 33940 }, { "epoch": 13.18, "learning_rate": 1.8242071197411003e-05, "loss": 0.3009, "step": 33950 }, { "epoch": 13.19, "learning_rate": 1.8241553398058254e-05, "loss": 0.064, "step": 33960 }, { "epoch": 13.19, "learning_rate": 1.8241035598705502e-05, "loss": 0.1526, "step": 33970 }, { "epoch": 13.2, "learning_rate": 1.8240517799352754e-05, "loss": 0.0594, "step": 33980 }, { "epoch": 13.2, "learning_rate": 1.824e-05, "loss": 0.0694, "step": 33990 }, { "epoch": 13.2, "learning_rate": 1.8239482200647253e-05, "loss": 0.2333, "step": 34000 }, { "epoch": 13.21, "learning_rate": 1.8238964401294497e-05, "loss": 0.0691, "step": 34010 }, { "epoch": 13.21, "learning_rate": 1.823844660194175e-05, "loss": 0.0706, "step": 34020 }, { "epoch": 13.22, "learning_rate": 1.8237928802588997e-05, "loss": 0.3733, "step": 34030 }, { "epoch": 13.22, "learning_rate": 1.8237411003236248e-05, "loss": 0.1725, "step": 34040 }, { "epoch": 13.22, "learning_rate": 1.8236893203883496e-05, "loss": 0.1445, "step": 34050 }, { "epoch": 13.23, "learning_rate": 1.8236375404530747e-05, "loss": 0.1844, "step": 34060 }, { "epoch": 13.23, "learning_rate": 1.8235857605177995e-05, "loss": 0.0283, "step": 34070 }, { "epoch": 13.23, "learning_rate": 1.8235339805825246e-05, "loss": 0.0915, "step": 34080 }, { "epoch": 13.24, "learning_rate": 1.823482200647249e-05, "loss": 0.1181, "step": 34090 }, { "epoch": 13.24, "learning_rate": 1.8234304207119742e-05, "loss": 0.1147, "step": 34100 }, { "epoch": 13.25, "learning_rate": 1.823378640776699e-05, "loss": 0.1686, "step": 34110 }, { "epoch": 13.25, "learning_rate": 1.823326860841424e-05, "loss": 0.1336, "step": 34120 }, { "epoch": 13.25, "learning_rate": 1.823275080906149e-05, "loss": 0.1391, "step": 34130 }, { "epoch": 13.26, "learning_rate": 1.823223300970874e-05, "loss": 0.0655, "step": 34140 }, { "epoch": 13.26, "learning_rate": 1.823171521035599e-05, "loss": 0.264, "step": 34150 }, { "epoch": 13.27, "learning_rate": 1.823119741100324e-05, "loss": 0.2483, "step": 34160 }, { "epoch": 13.27, "learning_rate": 1.8230679611650485e-05, "loss": 0.1062, "step": 34170 }, { "epoch": 13.27, "learning_rate": 1.8230161812297736e-05, "loss": 0.2293, "step": 34180 }, { "epoch": 13.28, "learning_rate": 1.8229644012944984e-05, "loss": 0.1499, "step": 34190 }, { "epoch": 13.28, "learning_rate": 1.8229126213592235e-05, "loss": 0.1994, "step": 34200 }, { "epoch": 13.29, "learning_rate": 1.8228608414239483e-05, "loss": 0.0719, "step": 34210 }, { "epoch": 13.29, "learning_rate": 1.8228090614886734e-05, "loss": 0.1006, "step": 34220 }, { "epoch": 13.29, "learning_rate": 1.8227572815533982e-05, "loss": 0.1666, "step": 34230 }, { "epoch": 13.3, "learning_rate": 1.8227055016181234e-05, "loss": 0.1722, "step": 34240 }, { "epoch": 13.3, "learning_rate": 1.822653721682848e-05, "loss": 0.1765, "step": 34250 }, { "epoch": 13.3, "learning_rate": 1.822601941747573e-05, "loss": 0.1195, "step": 34260 }, { "epoch": 13.31, "learning_rate": 1.8225501618122977e-05, "loss": 0.2106, "step": 34270 }, { "epoch": 13.31, "learning_rate": 1.822498381877023e-05, "loss": 0.1151, "step": 34280 }, { "epoch": 13.32, "learning_rate": 1.8224466019417477e-05, "loss": 0.2099, "step": 34290 }, { "epoch": 13.32, "learning_rate": 1.8223948220064728e-05, "loss": 0.0759, "step": 34300 }, { "epoch": 13.32, "learning_rate": 1.8223430420711976e-05, "loss": 0.1435, "step": 34310 }, { "epoch": 13.33, "learning_rate": 1.8222912621359227e-05, "loss": 0.2181, "step": 34320 }, { "epoch": 13.33, "learning_rate": 1.8222394822006475e-05, "loss": 0.1993, "step": 34330 }, { "epoch": 13.34, "learning_rate": 1.8221877022653723e-05, "loss": 0.101, "step": 34340 }, { "epoch": 13.34, "learning_rate": 1.822135922330097e-05, "loss": 0.134, "step": 34350 }, { "epoch": 13.34, "learning_rate": 1.8220841423948222e-05, "loss": 0.1501, "step": 34360 }, { "epoch": 13.35, "learning_rate": 1.822032362459547e-05, "loss": 0.2161, "step": 34370 }, { "epoch": 13.35, "learning_rate": 1.821980582524272e-05, "loss": 0.1767, "step": 34380 }, { "epoch": 13.36, "learning_rate": 1.821928802588997e-05, "loss": 0.0727, "step": 34390 }, { "epoch": 13.36, "learning_rate": 1.821877022653722e-05, "loss": 0.1513, "step": 34400 }, { "epoch": 13.36, "learning_rate": 1.821825242718447e-05, "loss": 0.1426, "step": 34410 }, { "epoch": 13.37, "learning_rate": 1.8217734627831716e-05, "loss": 0.1141, "step": 34420 }, { "epoch": 13.37, "learning_rate": 1.8217216828478964e-05, "loss": 0.073, "step": 34430 }, { "epoch": 13.37, "learning_rate": 1.8216699029126216e-05, "loss": 0.1668, "step": 34440 }, { "epoch": 13.38, "learning_rate": 1.8216181229773464e-05, "loss": 0.1802, "step": 34450 }, { "epoch": 13.38, "learning_rate": 1.8215663430420715e-05, "loss": 0.1654, "step": 34460 }, { "epoch": 13.39, "learning_rate": 1.8215145631067963e-05, "loss": 0.1481, "step": 34470 }, { "epoch": 13.39, "learning_rate": 1.8214627831715214e-05, "loss": 0.2609, "step": 34480 }, { "epoch": 13.39, "learning_rate": 1.8214110032362462e-05, "loss": 0.0737, "step": 34490 }, { "epoch": 13.4, "learning_rate": 1.821359223300971e-05, "loss": 0.0588, "step": 34500 }, { "epoch": 13.4, "learning_rate": 1.8213074433656958e-05, "loss": 0.188, "step": 34510 }, { "epoch": 13.41, "learning_rate": 1.821255663430421e-05, "loss": 0.1259, "step": 34520 }, { "epoch": 13.41, "learning_rate": 1.8212038834951457e-05, "loss": 0.1118, "step": 34530 }, { "epoch": 13.41, "learning_rate": 1.821152103559871e-05, "loss": 0.0493, "step": 34540 }, { "epoch": 13.42, "learning_rate": 1.8211003236245956e-05, "loss": 0.0744, "step": 34550 }, { "epoch": 13.42, "learning_rate": 1.8210485436893204e-05, "loss": 0.2036, "step": 34560 }, { "epoch": 13.43, "learning_rate": 1.8209967637540456e-05, "loss": 0.1385, "step": 34570 }, { "epoch": 13.43, "learning_rate": 1.8209449838187704e-05, "loss": 0.1546, "step": 34580 }, { "epoch": 13.43, "learning_rate": 1.820893203883495e-05, "loss": 0.1128, "step": 34590 }, { "epoch": 13.44, "learning_rate": 1.8208414239482203e-05, "loss": 0.0663, "step": 34600 }, { "epoch": 13.44, "learning_rate": 1.820789644012945e-05, "loss": 0.0962, "step": 34610 }, { "epoch": 13.44, "learning_rate": 1.8207378640776702e-05, "loss": 0.0853, "step": 34620 }, { "epoch": 13.45, "learning_rate": 1.820686084142395e-05, "loss": 0.2256, "step": 34630 }, { "epoch": 13.45, "learning_rate": 1.8206343042071198e-05, "loss": 0.0447, "step": 34640 }, { "epoch": 13.46, "learning_rate": 1.820582524271845e-05, "loss": 0.1325, "step": 34650 }, { "epoch": 13.46, "learning_rate": 1.8205307443365697e-05, "loss": 0.0272, "step": 34660 }, { "epoch": 13.46, "learning_rate": 1.8204789644012945e-05, "loss": 0.016, "step": 34670 }, { "epoch": 13.47, "learning_rate": 1.8204271844660196e-05, "loss": 0.2529, "step": 34680 }, { "epoch": 13.47, "learning_rate": 1.8203754045307444e-05, "loss": 0.1998, "step": 34690 }, { "epoch": 13.48, "learning_rate": 1.8203236245954696e-05, "loss": 0.1606, "step": 34700 }, { "epoch": 13.48, "learning_rate": 1.8202718446601944e-05, "loss": 0.0831, "step": 34710 }, { "epoch": 13.48, "learning_rate": 1.820220064724919e-05, "loss": 0.004, "step": 34720 }, { "epoch": 13.49, "learning_rate": 1.8201682847896443e-05, "loss": 0.085, "step": 34730 }, { "epoch": 13.49, "learning_rate": 1.820116504854369e-05, "loss": 0.0762, "step": 34740 }, { "epoch": 13.5, "learning_rate": 1.820064724919094e-05, "loss": 0.166, "step": 34750 }, { "epoch": 13.5, "learning_rate": 1.820012944983819e-05, "loss": 0.1404, "step": 34760 }, { "epoch": 13.5, "learning_rate": 1.8199611650485438e-05, "loss": 0.1024, "step": 34770 }, { "epoch": 13.51, "learning_rate": 1.819909385113269e-05, "loss": 0.0992, "step": 34780 }, { "epoch": 13.51, "learning_rate": 1.8198576051779937e-05, "loss": 0.1426, "step": 34790 }, { "epoch": 13.51, "learning_rate": 1.8198058252427185e-05, "loss": 0.3103, "step": 34800 }, { "epoch": 13.52, "learning_rate": 1.8197540453074436e-05, "loss": 0.12, "step": 34810 }, { "epoch": 13.52, "learning_rate": 1.8197022653721684e-05, "loss": 0.0674, "step": 34820 }, { "epoch": 13.53, "learning_rate": 1.8196504854368932e-05, "loss": 0.0767, "step": 34830 }, { "epoch": 13.53, "learning_rate": 1.8195987055016184e-05, "loss": 0.2512, "step": 34840 }, { "epoch": 13.53, "learning_rate": 1.819546925566343e-05, "loss": 0.1229, "step": 34850 }, { "epoch": 13.54, "learning_rate": 1.8194951456310683e-05, "loss": 0.1391, "step": 34860 }, { "epoch": 13.54, "learning_rate": 1.819443365695793e-05, "loss": 0.1324, "step": 34870 }, { "epoch": 13.55, "learning_rate": 1.819391585760518e-05, "loss": 0.1865, "step": 34880 }, { "epoch": 13.55, "learning_rate": 1.819339805825243e-05, "loss": 0.1875, "step": 34890 }, { "epoch": 13.55, "learning_rate": 1.8192880258899678e-05, "loss": 0.1846, "step": 34900 }, { "epoch": 13.56, "learning_rate": 1.8192362459546926e-05, "loss": 0.0059, "step": 34910 }, { "epoch": 13.56, "learning_rate": 1.8191844660194177e-05, "loss": 0.4132, "step": 34920 }, { "epoch": 13.57, "learning_rate": 1.8191326860841425e-05, "loss": 0.1391, "step": 34930 }, { "epoch": 13.57, "learning_rate": 1.8190809061488673e-05, "loss": 0.0524, "step": 34940 }, { "epoch": 13.57, "learning_rate": 1.8190291262135924e-05, "loss": 0.1829, "step": 34950 }, { "epoch": 13.58, "learning_rate": 1.8189773462783172e-05, "loss": 0.083, "step": 34960 }, { "epoch": 13.58, "learning_rate": 1.8189255663430423e-05, "loss": 0.2328, "step": 34970 }, { "epoch": 13.58, "learning_rate": 1.818873786407767e-05, "loss": 0.191, "step": 34980 }, { "epoch": 13.59, "learning_rate": 1.818822006472492e-05, "loss": 0.1234, "step": 34990 }, { "epoch": 13.59, "learning_rate": 1.818770226537217e-05, "loss": 0.1355, "step": 35000 }, { "epoch": 13.6, "learning_rate": 1.818718446601942e-05, "loss": 0.209, "step": 35010 }, { "epoch": 13.6, "learning_rate": 1.8186666666666666e-05, "loss": 0.0095, "step": 35020 }, { "epoch": 13.6, "learning_rate": 1.8186148867313918e-05, "loss": 0.1791, "step": 35030 }, { "epoch": 13.61, "learning_rate": 1.8185631067961166e-05, "loss": 0.1262, "step": 35040 }, { "epoch": 13.61, "learning_rate": 1.8185113268608417e-05, "loss": 0.1034, "step": 35050 }, { "epoch": 13.62, "learning_rate": 1.8184595469255665e-05, "loss": 0.0421, "step": 35060 }, { "epoch": 13.62, "learning_rate": 1.8184077669902913e-05, "loss": 0.2702, "step": 35070 }, { "epoch": 13.62, "learning_rate": 1.8183559870550164e-05, "loss": 0.071, "step": 35080 }, { "epoch": 13.63, "learning_rate": 1.8183042071197412e-05, "loss": 0.2968, "step": 35090 }, { "epoch": 13.63, "learning_rate": 1.818252427184466e-05, "loss": 0.1775, "step": 35100 }, { "epoch": 13.63, "learning_rate": 1.818200647249191e-05, "loss": 0.1325, "step": 35110 }, { "epoch": 13.64, "learning_rate": 1.818148867313916e-05, "loss": 0.1273, "step": 35120 }, { "epoch": 13.64, "learning_rate": 1.818097087378641e-05, "loss": 0.1228, "step": 35130 }, { "epoch": 13.65, "learning_rate": 1.818045307443366e-05, "loss": 0.1008, "step": 35140 }, { "epoch": 13.65, "learning_rate": 1.8179935275080906e-05, "loss": 0.2047, "step": 35150 }, { "epoch": 13.65, "learning_rate": 1.8179417475728158e-05, "loss": 0.1245, "step": 35160 }, { "epoch": 13.66, "learning_rate": 1.8178899676375406e-05, "loss": 0.1787, "step": 35170 }, { "epoch": 13.66, "learning_rate": 1.8178381877022654e-05, "loss": 0.0492, "step": 35180 }, { "epoch": 13.67, "learning_rate": 1.8177864077669905e-05, "loss": 0.176, "step": 35190 }, { "epoch": 13.67, "learning_rate": 1.8177346278317153e-05, "loss": 0.1849, "step": 35200 }, { "epoch": 13.67, "learning_rate": 1.8176828478964404e-05, "loss": 0.3279, "step": 35210 }, { "epoch": 13.68, "learning_rate": 1.8176310679611652e-05, "loss": 0.1023, "step": 35220 }, { "epoch": 13.68, "learning_rate": 1.81757928802589e-05, "loss": 0.2586, "step": 35230 }, { "epoch": 13.69, "learning_rate": 1.8175275080906148e-05, "loss": 0.1488, "step": 35240 }, { "epoch": 13.69, "learning_rate": 1.81747572815534e-05, "loss": 0.0795, "step": 35250 }, { "epoch": 13.69, "learning_rate": 1.8174239482200647e-05, "loss": 0.0305, "step": 35260 }, { "epoch": 13.7, "learning_rate": 1.81737216828479e-05, "loss": 0.0984, "step": 35270 }, { "epoch": 13.7, "learning_rate": 1.8173203883495146e-05, "loss": 0.164, "step": 35280 }, { "epoch": 13.7, "learning_rate": 1.8172686084142398e-05, "loss": 0.2855, "step": 35290 }, { "epoch": 13.71, "learning_rate": 1.8172168284789646e-05, "loss": 0.2669, "step": 35300 }, { "epoch": 13.71, "learning_rate": 1.8171650485436894e-05, "loss": 0.0752, "step": 35310 }, { "epoch": 13.72, "learning_rate": 1.817113268608414e-05, "loss": 0.1419, "step": 35320 }, { "epoch": 13.72, "learning_rate": 1.8170614886731393e-05, "loss": 0.0263, "step": 35330 }, { "epoch": 13.72, "learning_rate": 1.817009708737864e-05, "loss": 0.1472, "step": 35340 }, { "epoch": 13.73, "learning_rate": 1.8169579288025892e-05, "loss": 0.0431, "step": 35350 }, { "epoch": 13.73, "learning_rate": 1.816906148867314e-05, "loss": 0.1179, "step": 35360 }, { "epoch": 13.74, "learning_rate": 1.816854368932039e-05, "loss": 0.0734, "step": 35370 }, { "epoch": 13.74, "learning_rate": 1.816802588996764e-05, "loss": 0.1569, "step": 35380 }, { "epoch": 13.74, "learning_rate": 1.816750809061489e-05, "loss": 0.0364, "step": 35390 }, { "epoch": 13.75, "learning_rate": 1.8166990291262135e-05, "loss": 0.1466, "step": 35400 }, { "epoch": 13.75, "learning_rate": 1.8166472491909386e-05, "loss": 0.0613, "step": 35410 }, { "epoch": 13.76, "learning_rate": 1.8165954692556634e-05, "loss": 0.1592, "step": 35420 }, { "epoch": 13.76, "learning_rate": 1.8165436893203886e-05, "loss": 0.0692, "step": 35430 }, { "epoch": 13.76, "learning_rate": 1.8164919093851133e-05, "loss": 0.3594, "step": 35440 }, { "epoch": 13.77, "learning_rate": 1.8164401294498385e-05, "loss": 0.1592, "step": 35450 }, { "epoch": 13.77, "learning_rate": 1.8163883495145633e-05, "loss": 0.0616, "step": 35460 }, { "epoch": 13.77, "learning_rate": 1.8163365695792884e-05, "loss": 0.2361, "step": 35470 }, { "epoch": 13.78, "learning_rate": 1.816284789644013e-05, "loss": 0.0818, "step": 35480 }, { "epoch": 13.78, "learning_rate": 1.816233009708738e-05, "loss": 0.1398, "step": 35490 }, { "epoch": 13.79, "learning_rate": 1.8161812297734628e-05, "loss": 0.1174, "step": 35500 }, { "epoch": 13.79, "learning_rate": 1.816129449838188e-05, "loss": 0.0332, "step": 35510 }, { "epoch": 13.79, "learning_rate": 1.8160776699029127e-05, "loss": 0.1157, "step": 35520 }, { "epoch": 13.8, "learning_rate": 1.816025889967638e-05, "loss": 0.1664, "step": 35530 }, { "epoch": 13.8, "learning_rate": 1.8159741100323626e-05, "loss": 0.2075, "step": 35540 }, { "epoch": 13.81, "learning_rate": 1.8159223300970878e-05, "loss": 0.0129, "step": 35550 }, { "epoch": 13.81, "learning_rate": 1.8158705501618122e-05, "loss": 0.0299, "step": 35560 }, { "epoch": 13.81, "learning_rate": 1.8158187702265373e-05, "loss": 0.0519, "step": 35570 }, { "epoch": 13.82, "learning_rate": 1.815766990291262e-05, "loss": 0.1967, "step": 35580 }, { "epoch": 13.82, "learning_rate": 1.8157152103559873e-05, "loss": 0.1316, "step": 35590 }, { "epoch": 13.83, "learning_rate": 1.815663430420712e-05, "loss": 0.2539, "step": 35600 }, { "epoch": 13.83, "learning_rate": 1.8156116504854372e-05, "loss": 0.201, "step": 35610 }, { "epoch": 13.83, "learning_rate": 1.815559870550162e-05, "loss": 0.1136, "step": 35620 }, { "epoch": 13.84, "learning_rate": 1.815508090614887e-05, "loss": 0.1167, "step": 35630 }, { "epoch": 13.84, "learning_rate": 1.8154563106796116e-05, "loss": 0.1663, "step": 35640 }, { "epoch": 13.84, "learning_rate": 1.8154045307443367e-05, "loss": 0.131, "step": 35650 }, { "epoch": 13.85, "learning_rate": 1.8153527508090615e-05, "loss": 0.135, "step": 35660 }, { "epoch": 13.85, "learning_rate": 1.8153009708737866e-05, "loss": 0.1261, "step": 35670 }, { "epoch": 13.86, "learning_rate": 1.8152491909385114e-05, "loss": 0.1435, "step": 35680 }, { "epoch": 13.86, "learning_rate": 1.8151974110032365e-05, "loss": 0.0684, "step": 35690 }, { "epoch": 13.86, "learning_rate": 1.8151456310679613e-05, "loss": 0.0289, "step": 35700 }, { "epoch": 13.87, "learning_rate": 1.8150938511326865e-05, "loss": 0.0978, "step": 35710 }, { "epoch": 13.87, "learning_rate": 1.815042071197411e-05, "loss": 0.0758, "step": 35720 }, { "epoch": 13.88, "learning_rate": 1.814990291262136e-05, "loss": 0.2285, "step": 35730 }, { "epoch": 13.88, "learning_rate": 1.814938511326861e-05, "loss": 0.1023, "step": 35740 }, { "epoch": 13.88, "learning_rate": 1.814886731391586e-05, "loss": 0.0807, "step": 35750 }, { "epoch": 13.89, "learning_rate": 1.8148349514563108e-05, "loss": 0.1097, "step": 35760 }, { "epoch": 13.89, "learning_rate": 1.814783171521036e-05, "loss": 0.341, "step": 35770 }, { "epoch": 13.9, "learning_rate": 1.8147313915857607e-05, "loss": 0.1208, "step": 35780 }, { "epoch": 13.9, "learning_rate": 1.8146796116504858e-05, "loss": 0.1371, "step": 35790 }, { "epoch": 13.9, "learning_rate": 1.8146278317152103e-05, "loss": 0.0318, "step": 35800 }, { "epoch": 13.91, "learning_rate": 1.8145760517799354e-05, "loss": 0.0884, "step": 35810 }, { "epoch": 13.91, "learning_rate": 1.8145242718446602e-05, "loss": 0.1914, "step": 35820 }, { "epoch": 13.91, "learning_rate": 1.8144724919093853e-05, "loss": 0.2362, "step": 35830 }, { "epoch": 13.92, "learning_rate": 1.81442071197411e-05, "loss": 0.1666, "step": 35840 }, { "epoch": 13.92, "learning_rate": 1.8143689320388353e-05, "loss": 0.135, "step": 35850 }, { "epoch": 13.93, "learning_rate": 1.81431715210356e-05, "loss": 0.1548, "step": 35860 }, { "epoch": 13.93, "learning_rate": 1.8142653721682852e-05, "loss": 0.1692, "step": 35870 }, { "epoch": 13.93, "learning_rate": 1.81421359223301e-05, "loss": 0.0222, "step": 35880 }, { "epoch": 13.94, "learning_rate": 1.8141618122977348e-05, "loss": 0.1975, "step": 35890 }, { "epoch": 13.94, "learning_rate": 1.8141100323624596e-05, "loss": 0.1991, "step": 35900 }, { "epoch": 13.95, "learning_rate": 1.8140582524271847e-05, "loss": 0.1924, "step": 35910 }, { "epoch": 13.95, "learning_rate": 1.8140064724919095e-05, "loss": 0.1797, "step": 35920 }, { "epoch": 13.95, "learning_rate": 1.8139546925566346e-05, "loss": 0.2069, "step": 35930 }, { "epoch": 13.96, "learning_rate": 1.8139029126213594e-05, "loss": 0.4265, "step": 35940 }, { "epoch": 13.96, "learning_rate": 1.8138511326860845e-05, "loss": 0.0579, "step": 35950 }, { "epoch": 13.97, "learning_rate": 1.8137993527508093e-05, "loss": 0.2866, "step": 35960 }, { "epoch": 13.97, "learning_rate": 1.813747572815534e-05, "loss": 0.2069, "step": 35970 }, { "epoch": 13.97, "learning_rate": 1.813695792880259e-05, "loss": 0.1737, "step": 35980 }, { "epoch": 13.98, "learning_rate": 1.813644012944984e-05, "loss": 0.0397, "step": 35990 }, { "epoch": 13.98, "learning_rate": 1.813592233009709e-05, "loss": 0.133, "step": 36000 }, { "epoch": 13.98, "learning_rate": 1.813540453074434e-05, "loss": 0.1399, "step": 36010 }, { "epoch": 13.99, "learning_rate": 1.8134886731391588e-05, "loss": 0.168, "step": 36020 }, { "epoch": 13.99, "learning_rate": 1.8134368932038836e-05, "loss": 0.1163, "step": 36030 }, { "epoch": 14.0, "learning_rate": 1.8133851132686087e-05, "loss": 0.1748, "step": 36040 }, { "epoch": 14.0, "learning_rate": 1.8133333333333335e-05, "loss": 0.0179, "step": 36050 }, { "epoch": 14.0, "eval_accuracy": 0.9482806052269601, "eval_loss": 0.22397606074810028, "eval_runtime": 8.2572, "eval_samples_per_second": 440.222, "eval_steps_per_second": 55.103, "step": 36050 }, { "epoch": 14.0, "learning_rate": 1.8132815533980583e-05, "loss": 0.1235, "step": 36060 }, { "epoch": 14.01, "learning_rate": 1.8132297734627834e-05, "loss": 0.0825, "step": 36070 }, { "epoch": 14.01, "learning_rate": 1.8131779935275082e-05, "loss": 0.1312, "step": 36080 }, { "epoch": 14.02, "learning_rate": 1.8131262135922333e-05, "loss": 0.0936, "step": 36090 }, { "epoch": 14.02, "learning_rate": 1.813074433656958e-05, "loss": 0.0494, "step": 36100 }, { "epoch": 14.02, "learning_rate": 1.813022653721683e-05, "loss": 0.2018, "step": 36110 }, { "epoch": 14.03, "learning_rate": 1.812970873786408e-05, "loss": 0.1093, "step": 36120 }, { "epoch": 14.03, "learning_rate": 1.812919093851133e-05, "loss": 0.2956, "step": 36130 }, { "epoch": 14.03, "learning_rate": 1.8128673139158576e-05, "loss": 0.0853, "step": 36140 }, { "epoch": 14.04, "learning_rate": 1.8128155339805828e-05, "loss": 0.0711, "step": 36150 }, { "epoch": 14.04, "learning_rate": 1.8127637540453075e-05, "loss": 0.098, "step": 36160 }, { "epoch": 14.05, "learning_rate": 1.8127119741100327e-05, "loss": 0.1364, "step": 36170 }, { "epoch": 14.05, "learning_rate": 1.8126601941747575e-05, "loss": 0.1451, "step": 36180 }, { "epoch": 14.05, "learning_rate": 1.8126084142394823e-05, "loss": 0.0825, "step": 36190 }, { "epoch": 14.06, "learning_rate": 1.8125566343042074e-05, "loss": 0.1643, "step": 36200 }, { "epoch": 14.06, "learning_rate": 1.8125048543689322e-05, "loss": 0.1, "step": 36210 }, { "epoch": 14.07, "learning_rate": 1.812453074433657e-05, "loss": 0.2269, "step": 36220 }, { "epoch": 14.07, "learning_rate": 1.812401294498382e-05, "loss": 0.2601, "step": 36230 }, { "epoch": 14.07, "learning_rate": 1.812349514563107e-05, "loss": 0.1232, "step": 36240 }, { "epoch": 14.08, "learning_rate": 1.812297734627832e-05, "loss": 0.0788, "step": 36250 }, { "epoch": 14.08, "learning_rate": 1.8122459546925568e-05, "loss": 0.159, "step": 36260 }, { "epoch": 14.09, "learning_rate": 1.8121941747572816e-05, "loss": 0.1311, "step": 36270 }, { "epoch": 14.09, "learning_rate": 1.8121423948220068e-05, "loss": 0.0923, "step": 36280 }, { "epoch": 14.09, "learning_rate": 1.8120906148867315e-05, "loss": 0.1791, "step": 36290 }, { "epoch": 14.1, "learning_rate": 1.8120388349514563e-05, "loss": 0.1007, "step": 36300 }, { "epoch": 14.1, "learning_rate": 1.8119870550161815e-05, "loss": 0.2406, "step": 36310 }, { "epoch": 14.1, "learning_rate": 1.8119352750809063e-05, "loss": 0.0147, "step": 36320 }, { "epoch": 14.11, "learning_rate": 1.811883495145631e-05, "loss": 0.0484, "step": 36330 }, { "epoch": 14.11, "learning_rate": 1.8118317152103562e-05, "loss": 0.1523, "step": 36340 }, { "epoch": 14.12, "learning_rate": 1.811779935275081e-05, "loss": 0.0185, "step": 36350 }, { "epoch": 14.12, "learning_rate": 1.811728155339806e-05, "loss": 0.0667, "step": 36360 }, { "epoch": 14.12, "learning_rate": 1.811676375404531e-05, "loss": 0.0815, "step": 36370 }, { "epoch": 14.13, "learning_rate": 1.8116245954692557e-05, "loss": 0.0997, "step": 36380 }, { "epoch": 14.13, "learning_rate": 1.8115728155339808e-05, "loss": 0.1242, "step": 36390 }, { "epoch": 14.14, "learning_rate": 1.8115210355987056e-05, "loss": 0.1923, "step": 36400 }, { "epoch": 14.14, "learning_rate": 1.8114692556634304e-05, "loss": 0.0938, "step": 36410 }, { "epoch": 14.14, "learning_rate": 1.8114174757281555e-05, "loss": 0.0798, "step": 36420 }, { "epoch": 14.15, "learning_rate": 1.8113656957928803e-05, "loss": 0.1797, "step": 36430 }, { "epoch": 14.15, "learning_rate": 1.8113139158576055e-05, "loss": 0.0905, "step": 36440 }, { "epoch": 14.16, "learning_rate": 1.8112621359223303e-05, "loss": 0.0905, "step": 36450 }, { "epoch": 14.16, "learning_rate": 1.811210355987055e-05, "loss": 0.2132, "step": 36460 }, { "epoch": 14.16, "learning_rate": 1.8111585760517802e-05, "loss": 0.1924, "step": 36470 }, { "epoch": 14.17, "learning_rate": 1.811106796116505e-05, "loss": 0.2039, "step": 36480 }, { "epoch": 14.17, "learning_rate": 1.8110550161812298e-05, "loss": 0.1392, "step": 36490 }, { "epoch": 14.17, "learning_rate": 1.811003236245955e-05, "loss": 0.0946, "step": 36500 }, { "epoch": 14.18, "learning_rate": 1.8109514563106797e-05, "loss": 0.0832, "step": 36510 }, { "epoch": 14.18, "learning_rate": 1.8108996763754048e-05, "loss": 0.2902, "step": 36520 }, { "epoch": 14.19, "learning_rate": 1.8108478964401296e-05, "loss": 0.0471, "step": 36530 }, { "epoch": 14.19, "learning_rate": 1.8107961165048544e-05, "loss": 0.0537, "step": 36540 }, { "epoch": 14.19, "learning_rate": 1.8107443365695795e-05, "loss": 0.2078, "step": 36550 }, { "epoch": 14.2, "learning_rate": 1.8106925566343043e-05, "loss": 0.0786, "step": 36560 }, { "epoch": 14.2, "learning_rate": 1.810640776699029e-05, "loss": 0.163, "step": 36570 }, { "epoch": 14.21, "learning_rate": 1.8105889967637543e-05, "loss": 0.0875, "step": 36580 }, { "epoch": 14.21, "learning_rate": 1.810537216828479e-05, "loss": 0.0139, "step": 36590 }, { "epoch": 14.21, "learning_rate": 1.8104854368932042e-05, "loss": 0.048, "step": 36600 }, { "epoch": 14.22, "learning_rate": 1.810433656957929e-05, "loss": 0.1637, "step": 36610 }, { "epoch": 14.22, "learning_rate": 1.8103818770226538e-05, "loss": 0.1923, "step": 36620 }, { "epoch": 14.23, "learning_rate": 1.810330097087379e-05, "loss": 0.0557, "step": 36630 }, { "epoch": 14.23, "learning_rate": 1.8102783171521037e-05, "loss": 0.1769, "step": 36640 }, { "epoch": 14.23, "learning_rate": 1.8102265372168285e-05, "loss": 0.087, "step": 36650 }, { "epoch": 14.24, "learning_rate": 1.8101747572815536e-05, "loss": 0.1318, "step": 36660 }, { "epoch": 14.24, "learning_rate": 1.8101229773462784e-05, "loss": 0.1148, "step": 36670 }, { "epoch": 14.24, "learning_rate": 1.8100711974110035e-05, "loss": 0.1719, "step": 36680 }, { "epoch": 14.25, "learning_rate": 1.8100194174757283e-05, "loss": 0.2312, "step": 36690 }, { "epoch": 14.25, "learning_rate": 1.809967637540453e-05, "loss": 0.0628, "step": 36700 }, { "epoch": 14.26, "learning_rate": 1.809915857605178e-05, "loss": 0.0871, "step": 36710 }, { "epoch": 14.26, "learning_rate": 1.809864077669903e-05, "loss": 0.0729, "step": 36720 }, { "epoch": 14.26, "learning_rate": 1.8098122977346278e-05, "loss": 0.0805, "step": 36730 }, { "epoch": 14.27, "learning_rate": 1.809760517799353e-05, "loss": 0.1184, "step": 36740 }, { "epoch": 14.27, "learning_rate": 1.8097087378640778e-05, "loss": 0.1463, "step": 36750 }, { "epoch": 14.28, "learning_rate": 1.809656957928803e-05, "loss": 0.0865, "step": 36760 }, { "epoch": 14.28, "learning_rate": 1.8096051779935277e-05, "loss": 0.1378, "step": 36770 }, { "epoch": 14.28, "learning_rate": 1.8095533980582525e-05, "loss": 0.0954, "step": 36780 }, { "epoch": 14.29, "learning_rate": 1.8095016181229773e-05, "loss": 0.0783, "step": 36790 }, { "epoch": 14.29, "learning_rate": 1.8094498381877024e-05, "loss": 0.1978, "step": 36800 }, { "epoch": 14.3, "learning_rate": 1.8093980582524272e-05, "loss": 0.2386, "step": 36810 }, { "epoch": 14.3, "learning_rate": 1.8093462783171523e-05, "loss": 0.1929, "step": 36820 }, { "epoch": 14.3, "learning_rate": 1.809294498381877e-05, "loss": 0.0918, "step": 36830 }, { "epoch": 14.31, "learning_rate": 1.8092427184466022e-05, "loss": 0.0107, "step": 36840 }, { "epoch": 14.31, "learning_rate": 1.809190938511327e-05, "loss": 0.09, "step": 36850 }, { "epoch": 14.31, "learning_rate": 1.8091391585760518e-05, "loss": 0.1289, "step": 36860 }, { "epoch": 14.32, "learning_rate": 1.8090873786407766e-05, "loss": 0.2442, "step": 36870 }, { "epoch": 14.32, "learning_rate": 1.8090355987055017e-05, "loss": 0.313, "step": 36880 }, { "epoch": 14.33, "learning_rate": 1.8089838187702265e-05, "loss": 0.1892, "step": 36890 }, { "epoch": 14.33, "learning_rate": 1.8089320388349517e-05, "loss": 0.1018, "step": 36900 }, { "epoch": 14.33, "learning_rate": 1.8088802588996765e-05, "loss": 0.0436, "step": 36910 }, { "epoch": 14.34, "learning_rate": 1.8088284789644016e-05, "loss": 0.1088, "step": 36920 }, { "epoch": 14.34, "learning_rate": 1.8087766990291264e-05, "loss": 0.1149, "step": 36930 }, { "epoch": 14.35, "learning_rate": 1.8087249190938512e-05, "loss": 0.1123, "step": 36940 }, { "epoch": 14.35, "learning_rate": 1.808673139158576e-05, "loss": 0.044, "step": 36950 }, { "epoch": 14.35, "learning_rate": 1.808621359223301e-05, "loss": 0.0489, "step": 36960 }, { "epoch": 14.36, "learning_rate": 1.808569579288026e-05, "loss": 0.0751, "step": 36970 }, { "epoch": 14.36, "learning_rate": 1.808517799352751e-05, "loss": 0.1026, "step": 36980 }, { "epoch": 14.37, "learning_rate": 1.8084660194174758e-05, "loss": 0.0914, "step": 36990 }, { "epoch": 14.37, "learning_rate": 1.808414239482201e-05, "loss": 0.0669, "step": 37000 }, { "epoch": 14.37, "learning_rate": 1.8083624595469257e-05, "loss": 0.1055, "step": 37010 }, { "epoch": 14.38, "learning_rate": 1.808310679611651e-05, "loss": 0.0614, "step": 37020 }, { "epoch": 14.38, "learning_rate": 1.8082588996763753e-05, "loss": 0.103, "step": 37030 }, { "epoch": 14.38, "learning_rate": 1.8082071197411005e-05, "loss": 0.0998, "step": 37040 }, { "epoch": 14.39, "learning_rate": 1.8081553398058253e-05, "loss": 0.1021, "step": 37050 }, { "epoch": 14.39, "learning_rate": 1.8081035598705504e-05, "loss": 0.0499, "step": 37060 }, { "epoch": 14.4, "learning_rate": 1.8080517799352752e-05, "loss": 0.1318, "step": 37070 }, { "epoch": 14.4, "learning_rate": 1.8080000000000003e-05, "loss": 0.0337, "step": 37080 }, { "epoch": 14.4, "learning_rate": 1.807948220064725e-05, "loss": 0.1046, "step": 37090 }, { "epoch": 14.41, "learning_rate": 1.8078964401294502e-05, "loss": 0.0693, "step": 37100 }, { "epoch": 14.41, "learning_rate": 1.8078446601941747e-05, "loss": 0.0902, "step": 37110 }, { "epoch": 14.42, "learning_rate": 1.8077928802588998e-05, "loss": 0.1726, "step": 37120 }, { "epoch": 14.42, "learning_rate": 1.8077411003236246e-05, "loss": 0.1457, "step": 37130 }, { "epoch": 14.42, "learning_rate": 1.8076893203883497e-05, "loss": 0.316, "step": 37140 }, { "epoch": 14.43, "learning_rate": 1.8076375404530745e-05, "loss": 0.2566, "step": 37150 }, { "epoch": 14.43, "learning_rate": 1.8075857605177997e-05, "loss": 0.0387, "step": 37160 }, { "epoch": 14.43, "learning_rate": 1.8075339805825245e-05, "loss": 0.0984, "step": 37170 }, { "epoch": 14.44, "learning_rate": 1.8074822006472496e-05, "loss": 0.2197, "step": 37180 }, { "epoch": 14.44, "learning_rate": 1.807430420711974e-05, "loss": 0.1413, "step": 37190 }, { "epoch": 14.45, "learning_rate": 1.8073786407766992e-05, "loss": 0.0654, "step": 37200 }, { "epoch": 14.45, "learning_rate": 1.807326860841424e-05, "loss": 0.1518, "step": 37210 }, { "epoch": 14.45, "learning_rate": 1.807275080906149e-05, "loss": 0.104, "step": 37220 }, { "epoch": 14.46, "learning_rate": 1.807223300970874e-05, "loss": 0.0854, "step": 37230 }, { "epoch": 14.46, "learning_rate": 1.807171521035599e-05, "loss": 0.1246, "step": 37240 }, { "epoch": 14.47, "learning_rate": 1.8071197411003238e-05, "loss": 0.1575, "step": 37250 }, { "epoch": 14.47, "learning_rate": 1.807067961165049e-05, "loss": 0.1385, "step": 37260 }, { "epoch": 14.47, "learning_rate": 1.8070161812297734e-05, "loss": 0.3931, "step": 37270 }, { "epoch": 14.48, "learning_rate": 1.8069644012944985e-05, "loss": 0.1216, "step": 37280 }, { "epoch": 14.48, "learning_rate": 1.8069126213592233e-05, "loss": 0.0779, "step": 37290 }, { "epoch": 14.49, "learning_rate": 1.8068608414239485e-05, "loss": 0.0648, "step": 37300 }, { "epoch": 14.49, "learning_rate": 1.8068090614886732e-05, "loss": 0.1798, "step": 37310 }, { "epoch": 14.49, "learning_rate": 1.8067572815533984e-05, "loss": 0.0722, "step": 37320 }, { "epoch": 14.5, "learning_rate": 1.806705501618123e-05, "loss": 0.1158, "step": 37330 }, { "epoch": 14.5, "learning_rate": 1.8066537216828483e-05, "loss": 0.1127, "step": 37340 }, { "epoch": 14.5, "learning_rate": 1.8066019417475728e-05, "loss": 0.0993, "step": 37350 }, { "epoch": 14.51, "learning_rate": 1.806550161812298e-05, "loss": 0.1285, "step": 37360 }, { "epoch": 14.51, "learning_rate": 1.8064983818770227e-05, "loss": 0.3027, "step": 37370 }, { "epoch": 14.52, "learning_rate": 1.8064466019417478e-05, "loss": 0.0685, "step": 37380 }, { "epoch": 14.52, "learning_rate": 1.8063948220064726e-05, "loss": 0.2246, "step": 37390 }, { "epoch": 14.52, "learning_rate": 1.8063430420711977e-05, "loss": 0.1423, "step": 37400 }, { "epoch": 14.53, "learning_rate": 1.8062912621359225e-05, "loss": 0.0529, "step": 37410 }, { "epoch": 14.53, "learning_rate": 1.8062394822006477e-05, "loss": 0.1809, "step": 37420 }, { "epoch": 14.54, "learning_rate": 1.806187702265372e-05, "loss": 0.053, "step": 37430 }, { "epoch": 14.54, "learning_rate": 1.8061359223300972e-05, "loss": 0.0544, "step": 37440 }, { "epoch": 14.54, "learning_rate": 1.806084142394822e-05, "loss": 0.2064, "step": 37450 }, { "epoch": 14.55, "learning_rate": 1.806032362459547e-05, "loss": 0.1813, "step": 37460 }, { "epoch": 14.55, "learning_rate": 1.805980582524272e-05, "loss": 0.1736, "step": 37470 }, { "epoch": 14.56, "learning_rate": 1.805928802588997e-05, "loss": 0.1937, "step": 37480 }, { "epoch": 14.56, "learning_rate": 1.805877022653722e-05, "loss": 0.0418, "step": 37490 }, { "epoch": 14.56, "learning_rate": 1.8058252427184467e-05, "loss": 0.147, "step": 37500 }, { "epoch": 14.57, "learning_rate": 1.8057734627831715e-05, "loss": 0.1004, "step": 37510 }, { "epoch": 14.57, "learning_rate": 1.8057216828478966e-05, "loss": 0.0995, "step": 37520 }, { "epoch": 14.57, "learning_rate": 1.8056699029126214e-05, "loss": 0.074, "step": 37530 }, { "epoch": 14.58, "learning_rate": 1.8056181229773465e-05, "loss": 0.1279, "step": 37540 }, { "epoch": 14.58, "learning_rate": 1.8055663430420713e-05, "loss": 0.2038, "step": 37550 }, { "epoch": 14.59, "learning_rate": 1.8055145631067964e-05, "loss": 0.3478, "step": 37560 }, { "epoch": 14.59, "learning_rate": 1.8054627831715212e-05, "loss": 0.1836, "step": 37570 }, { "epoch": 14.59, "learning_rate": 1.805411003236246e-05, "loss": 0.0706, "step": 37580 }, { "epoch": 14.6, "learning_rate": 1.805359223300971e-05, "loss": 0.2899, "step": 37590 }, { "epoch": 14.6, "learning_rate": 1.805307443365696e-05, "loss": 0.1112, "step": 37600 }, { "epoch": 14.61, "learning_rate": 1.8052556634304207e-05, "loss": 0.2469, "step": 37610 }, { "epoch": 14.61, "learning_rate": 1.805203883495146e-05, "loss": 0.115, "step": 37620 }, { "epoch": 14.61, "learning_rate": 1.8051521035598707e-05, "loss": 0.1248, "step": 37630 }, { "epoch": 14.62, "learning_rate": 1.8051003236245958e-05, "loss": 0.0057, "step": 37640 }, { "epoch": 14.62, "learning_rate": 1.8050485436893206e-05, "loss": 0.0366, "step": 37650 }, { "epoch": 14.63, "learning_rate": 1.8049967637540454e-05, "loss": 0.2063, "step": 37660 }, { "epoch": 14.63, "learning_rate": 1.8049449838187705e-05, "loss": 0.2725, "step": 37670 }, { "epoch": 14.63, "learning_rate": 1.8048932038834953e-05, "loss": 0.1387, "step": 37680 }, { "epoch": 14.64, "learning_rate": 1.80484142394822e-05, "loss": 0.0462, "step": 37690 }, { "epoch": 14.64, "learning_rate": 1.8047896440129452e-05, "loss": 0.2134, "step": 37700 }, { "epoch": 14.64, "learning_rate": 1.80473786407767e-05, "loss": 0.0802, "step": 37710 }, { "epoch": 14.65, "learning_rate": 1.804686084142395e-05, "loss": 0.276, "step": 37720 }, { "epoch": 14.65, "learning_rate": 1.80463430420712e-05, "loss": 0.0608, "step": 37730 }, { "epoch": 14.66, "learning_rate": 1.8045825242718447e-05, "loss": 0.0997, "step": 37740 }, { "epoch": 14.66, "learning_rate": 1.80453074433657e-05, "loss": 0.1741, "step": 37750 }, { "epoch": 14.66, "learning_rate": 1.8044789644012947e-05, "loss": 0.0546, "step": 37760 }, { "epoch": 14.67, "learning_rate": 1.8044271844660195e-05, "loss": 0.0282, "step": 37770 }, { "epoch": 14.67, "learning_rate": 1.8043754045307446e-05, "loss": 0.1732, "step": 37780 }, { "epoch": 14.68, "learning_rate": 1.8043236245954694e-05, "loss": 0.1249, "step": 37790 }, { "epoch": 14.68, "learning_rate": 1.8042718446601942e-05, "loss": 0.1209, "step": 37800 }, { "epoch": 14.68, "learning_rate": 1.8042200647249193e-05, "loss": 0.0315, "step": 37810 }, { "epoch": 14.69, "learning_rate": 1.804168284789644e-05, "loss": 0.0918, "step": 37820 }, { "epoch": 14.69, "learning_rate": 1.8041165048543692e-05, "loss": 0.2115, "step": 37830 }, { "epoch": 14.7, "learning_rate": 1.804064724919094e-05, "loss": 0.2193, "step": 37840 }, { "epoch": 14.7, "learning_rate": 1.8040129449838188e-05, "loss": 0.139, "step": 37850 }, { "epoch": 14.7, "learning_rate": 1.803961165048544e-05, "loss": 0.0947, "step": 37860 }, { "epoch": 14.71, "learning_rate": 1.8039093851132687e-05, "loss": 0.1034, "step": 37870 }, { "epoch": 14.71, "learning_rate": 1.8038576051779935e-05, "loss": 0.0765, "step": 37880 }, { "epoch": 14.71, "learning_rate": 1.8038058252427187e-05, "loss": 0.1034, "step": 37890 }, { "epoch": 14.72, "learning_rate": 1.8037540453074434e-05, "loss": 0.1092, "step": 37900 }, { "epoch": 14.72, "learning_rate": 1.8037022653721686e-05, "loss": 0.2558, "step": 37910 }, { "epoch": 14.73, "learning_rate": 1.8036504854368934e-05, "loss": 0.0578, "step": 37920 }, { "epoch": 14.73, "learning_rate": 1.803598705501618e-05, "loss": 0.1744, "step": 37930 }, { "epoch": 14.73, "learning_rate": 1.8035469255663433e-05, "loss": 0.0166, "step": 37940 }, { "epoch": 14.74, "learning_rate": 1.803495145631068e-05, "loss": 0.2154, "step": 37950 }, { "epoch": 14.74, "learning_rate": 1.803443365695793e-05, "loss": 0.1728, "step": 37960 }, { "epoch": 14.75, "learning_rate": 1.803391585760518e-05, "loss": 0.1164, "step": 37970 }, { "epoch": 14.75, "learning_rate": 1.8033398058252428e-05, "loss": 0.1836, "step": 37980 }, { "epoch": 14.75, "learning_rate": 1.803288025889968e-05, "loss": 0.3583, "step": 37990 }, { "epoch": 14.76, "learning_rate": 1.8032362459546927e-05, "loss": 0.0838, "step": 38000 }, { "epoch": 14.76, "learning_rate": 1.8031844660194175e-05, "loss": 0.2662, "step": 38010 }, { "epoch": 14.77, "learning_rate": 1.8031326860841427e-05, "loss": 0.0783, "step": 38020 }, { "epoch": 14.77, "learning_rate": 1.8030809061488674e-05, "loss": 0.1042, "step": 38030 }, { "epoch": 14.77, "learning_rate": 1.8030291262135922e-05, "loss": 0.1035, "step": 38040 }, { "epoch": 14.78, "learning_rate": 1.8029773462783174e-05, "loss": 0.0877, "step": 38050 }, { "epoch": 14.78, "learning_rate": 1.802925566343042e-05, "loss": 0.3283, "step": 38060 }, { "epoch": 14.78, "learning_rate": 1.8028737864077673e-05, "loss": 0.1205, "step": 38070 }, { "epoch": 14.79, "learning_rate": 1.802822006472492e-05, "loss": 0.1009, "step": 38080 }, { "epoch": 14.79, "learning_rate": 1.802770226537217e-05, "loss": 0.0571, "step": 38090 }, { "epoch": 14.8, "learning_rate": 1.802718446601942e-05, "loss": 0.1193, "step": 38100 }, { "epoch": 14.8, "learning_rate": 1.8026666666666668e-05, "loss": 0.0988, "step": 38110 }, { "epoch": 14.8, "learning_rate": 1.8026148867313916e-05, "loss": 0.1389, "step": 38120 }, { "epoch": 14.81, "learning_rate": 1.8025631067961167e-05, "loss": 0.2079, "step": 38130 }, { "epoch": 14.81, "learning_rate": 1.8025113268608415e-05, "loss": 0.1587, "step": 38140 }, { "epoch": 14.82, "learning_rate": 1.8024595469255666e-05, "loss": 0.1053, "step": 38150 }, { "epoch": 14.82, "learning_rate": 1.8024077669902914e-05, "loss": 0.1324, "step": 38160 }, { "epoch": 14.82, "learning_rate": 1.8023559870550162e-05, "loss": 0.079, "step": 38170 }, { "epoch": 14.83, "learning_rate": 1.802304207119741e-05, "loss": 0.1886, "step": 38180 }, { "epoch": 14.83, "learning_rate": 1.802252427184466e-05, "loss": 0.1384, "step": 38190 }, { "epoch": 14.83, "learning_rate": 1.802200647249191e-05, "loss": 0.165, "step": 38200 }, { "epoch": 14.84, "learning_rate": 1.802148867313916e-05, "loss": 0.0638, "step": 38210 }, { "epoch": 14.84, "learning_rate": 1.802097087378641e-05, "loss": 0.0895, "step": 38220 }, { "epoch": 14.85, "learning_rate": 1.802045307443366e-05, "loss": 0.1876, "step": 38230 }, { "epoch": 14.85, "learning_rate": 1.8019935275080908e-05, "loss": 0.0944, "step": 38240 }, { "epoch": 14.85, "learning_rate": 1.8019417475728156e-05, "loss": 0.0281, "step": 38250 }, { "epoch": 14.86, "learning_rate": 1.8018899676375404e-05, "loss": 0.2161, "step": 38260 }, { "epoch": 14.86, "learning_rate": 1.8018381877022655e-05, "loss": 0.1949, "step": 38270 }, { "epoch": 14.87, "learning_rate": 1.8017864077669903e-05, "loss": 0.0608, "step": 38280 }, { "epoch": 14.87, "learning_rate": 1.8017346278317154e-05, "loss": 0.2095, "step": 38290 }, { "epoch": 14.87, "learning_rate": 1.8016828478964402e-05, "loss": 0.1398, "step": 38300 }, { "epoch": 14.88, "learning_rate": 1.8016310679611654e-05, "loss": 0.2586, "step": 38310 }, { "epoch": 14.88, "learning_rate": 1.80157928802589e-05, "loss": 0.0606, "step": 38320 }, { "epoch": 14.89, "learning_rate": 1.801527508090615e-05, "loss": 0.099, "step": 38330 }, { "epoch": 14.89, "learning_rate": 1.8014757281553397e-05, "loss": 0.2385, "step": 38340 }, { "epoch": 14.89, "learning_rate": 1.801423948220065e-05, "loss": 0.0977, "step": 38350 }, { "epoch": 14.9, "learning_rate": 1.8013721682847897e-05, "loss": 0.0959, "step": 38360 }, { "epoch": 14.9, "learning_rate": 1.8013203883495148e-05, "loss": 0.2248, "step": 38370 }, { "epoch": 14.9, "learning_rate": 1.8012686084142396e-05, "loss": 0.0647, "step": 38380 }, { "epoch": 14.91, "learning_rate": 1.8012168284789647e-05, "loss": 0.1855, "step": 38390 }, { "epoch": 14.91, "learning_rate": 1.8011650485436895e-05, "loss": 0.3621, "step": 38400 }, { "epoch": 14.92, "learning_rate": 1.8011132686084143e-05, "loss": 0.1729, "step": 38410 }, { "epoch": 14.92, "learning_rate": 1.801061488673139e-05, "loss": 0.0318, "step": 38420 }, { "epoch": 14.92, "learning_rate": 1.8010097087378642e-05, "loss": 0.1176, "step": 38430 }, { "epoch": 14.93, "learning_rate": 1.800957928802589e-05, "loss": 0.2033, "step": 38440 }, { "epoch": 14.93, "learning_rate": 1.800906148867314e-05, "loss": 0.151, "step": 38450 }, { "epoch": 14.94, "learning_rate": 1.800854368932039e-05, "loss": 0.1013, "step": 38460 }, { "epoch": 14.94, "learning_rate": 1.800802588996764e-05, "loss": 0.0302, "step": 38470 }, { "epoch": 14.94, "learning_rate": 1.800750809061489e-05, "loss": 0.0506, "step": 38480 }, { "epoch": 14.95, "learning_rate": 1.8006990291262137e-05, "loss": 0.2014, "step": 38490 }, { "epoch": 14.95, "learning_rate": 1.8006472491909384e-05, "loss": 0.154, "step": 38500 }, { "epoch": 14.96, "learning_rate": 1.8005954692556636e-05, "loss": 0.0977, "step": 38510 }, { "epoch": 14.96, "learning_rate": 1.8005436893203884e-05, "loss": 0.2643, "step": 38520 }, { "epoch": 14.96, "learning_rate": 1.8004919093851135e-05, "loss": 0.093, "step": 38530 }, { "epoch": 14.97, "learning_rate": 1.8004401294498383e-05, "loss": 0.1036, "step": 38540 }, { "epoch": 14.97, "learning_rate": 1.8003883495145634e-05, "loss": 0.2291, "step": 38550 }, { "epoch": 14.97, "learning_rate": 1.8003365695792882e-05, "loss": 0.1159, "step": 38560 }, { "epoch": 14.98, "learning_rate": 1.800284789644013e-05, "loss": 0.2121, "step": 38570 }, { "epoch": 14.98, "learning_rate": 1.8002330097087378e-05, "loss": 0.0277, "step": 38580 }, { "epoch": 14.99, "learning_rate": 1.800181229773463e-05, "loss": 0.3163, "step": 38590 }, { "epoch": 14.99, "learning_rate": 1.8001294498381877e-05, "loss": 0.0929, "step": 38600 }, { "epoch": 14.99, "learning_rate": 1.800077669902913e-05, "loss": 0.0888, "step": 38610 }, { "epoch": 15.0, "learning_rate": 1.8000258899676376e-05, "loss": 0.1447, "step": 38620 }, { "epoch": 15.0, "eval_accuracy": 0.9504814305364512, "eval_loss": 0.23635753989219666, "eval_runtime": 8.2297, "eval_samples_per_second": 441.693, "eval_steps_per_second": 55.288, "step": 38625 }, { "epoch": 15.0, "learning_rate": 1.7999741100323628e-05, "loss": 0.0864, "step": 38630 }, { "epoch": 15.01, "learning_rate": 1.7999223300970876e-05, "loss": 0.0653, "step": 38640 }, { "epoch": 15.01, "learning_rate": 1.7998705501618124e-05, "loss": 0.05, "step": 38650 }, { "epoch": 15.01, "learning_rate": 1.799818770226537e-05, "loss": 0.1601, "step": 38660 }, { "epoch": 15.02, "learning_rate": 1.7997669902912623e-05, "loss": 0.0815, "step": 38670 }, { "epoch": 15.02, "learning_rate": 1.799715210355987e-05, "loss": 0.0874, "step": 38680 }, { "epoch": 15.03, "learning_rate": 1.7996634304207122e-05, "loss": 0.0314, "step": 38690 }, { "epoch": 15.03, "learning_rate": 1.799611650485437e-05, "loss": 0.2666, "step": 38700 }, { "epoch": 15.03, "learning_rate": 1.799559870550162e-05, "loss": 0.1374, "step": 38710 }, { "epoch": 15.04, "learning_rate": 1.799508090614887e-05, "loss": 0.2264, "step": 38720 }, { "epoch": 15.04, "learning_rate": 1.799456310679612e-05, "loss": 0.1946, "step": 38730 }, { "epoch": 15.04, "learning_rate": 1.7994045307443365e-05, "loss": 0.1186, "step": 38740 }, { "epoch": 15.05, "learning_rate": 1.7993527508090616e-05, "loss": 0.0412, "step": 38750 }, { "epoch": 15.05, "learning_rate": 1.7993009708737864e-05, "loss": 0.1545, "step": 38760 }, { "epoch": 15.06, "learning_rate": 1.7992491909385116e-05, "loss": 0.0305, "step": 38770 }, { "epoch": 15.06, "learning_rate": 1.7991974110032364e-05, "loss": 0.1615, "step": 38780 }, { "epoch": 15.06, "learning_rate": 1.7991456310679615e-05, "loss": 0.126, "step": 38790 }, { "epoch": 15.07, "learning_rate": 1.7990938511326863e-05, "loss": 0.1885, "step": 38800 }, { "epoch": 15.07, "learning_rate": 1.7990420711974114e-05, "loss": 0.1673, "step": 38810 }, { "epoch": 15.08, "learning_rate": 1.798990291262136e-05, "loss": 0.1118, "step": 38820 }, { "epoch": 15.08, "learning_rate": 1.798938511326861e-05, "loss": 0.0604, "step": 38830 }, { "epoch": 15.08, "learning_rate": 1.7988867313915858e-05, "loss": 0.0213, "step": 38840 }, { "epoch": 15.09, "learning_rate": 1.798834951456311e-05, "loss": 0.0386, "step": 38850 }, { "epoch": 15.09, "learning_rate": 1.7987831715210357e-05, "loss": 0.0115, "step": 38860 }, { "epoch": 15.1, "learning_rate": 1.798731391585761e-05, "loss": 0.0807, "step": 38870 }, { "epoch": 15.1, "learning_rate": 1.7986796116504856e-05, "loss": 0.0107, "step": 38880 }, { "epoch": 15.1, "learning_rate": 1.7986278317152108e-05, "loss": 0.241, "step": 38890 }, { "epoch": 15.11, "learning_rate": 1.7985760517799352e-05, "loss": 0.0841, "step": 38900 }, { "epoch": 15.11, "learning_rate": 1.7985242718446604e-05, "loss": 0.1765, "step": 38910 }, { "epoch": 15.11, "learning_rate": 1.798472491909385e-05, "loss": 0.2134, "step": 38920 }, { "epoch": 15.12, "learning_rate": 1.7984207119741103e-05, "loss": 0.0625, "step": 38930 }, { "epoch": 15.12, "learning_rate": 1.798368932038835e-05, "loss": 0.2839, "step": 38940 }, { "epoch": 15.13, "learning_rate": 1.7983171521035602e-05, "loss": 0.0397, "step": 38950 }, { "epoch": 15.13, "learning_rate": 1.798265372168285e-05, "loss": 0.3216, "step": 38960 }, { "epoch": 15.13, "learning_rate": 1.7982135922330098e-05, "loss": 0.1513, "step": 38970 }, { "epoch": 15.14, "learning_rate": 1.7981618122977346e-05, "loss": 0.147, "step": 38980 }, { "epoch": 15.14, "learning_rate": 1.7981100323624597e-05, "loss": 0.0437, "step": 38990 }, { "epoch": 15.15, "learning_rate": 1.7980582524271845e-05, "loss": 0.145, "step": 39000 }, { "epoch": 15.15, "learning_rate": 1.7980064724919096e-05, "loss": 0.1021, "step": 39010 }, { "epoch": 15.15, "learning_rate": 1.7979546925566344e-05, "loss": 0.0253, "step": 39020 }, { "epoch": 15.16, "learning_rate": 1.7979029126213596e-05, "loss": 0.1301, "step": 39030 }, { "epoch": 15.16, "learning_rate": 1.7978511326860844e-05, "loss": 0.115, "step": 39040 }, { "epoch": 15.17, "learning_rate": 1.797799352750809e-05, "loss": 0.1015, "step": 39050 }, { "epoch": 15.17, "learning_rate": 1.797747572815534e-05, "loss": 0.1105, "step": 39060 }, { "epoch": 15.17, "learning_rate": 1.797695792880259e-05, "loss": 0.1061, "step": 39070 }, { "epoch": 15.18, "learning_rate": 1.797644012944984e-05, "loss": 0.2207, "step": 39080 }, { "epoch": 15.18, "learning_rate": 1.797592233009709e-05, "loss": 0.1117, "step": 39090 }, { "epoch": 15.18, "learning_rate": 1.7975404530744338e-05, "loss": 0.0341, "step": 39100 }, { "epoch": 15.19, "learning_rate": 1.797488673139159e-05, "loss": 0.2546, "step": 39110 }, { "epoch": 15.19, "learning_rate": 1.7974368932038837e-05, "loss": 0.1726, "step": 39120 }, { "epoch": 15.2, "learning_rate": 1.7973851132686085e-05, "loss": 0.0683, "step": 39130 }, { "epoch": 15.2, "learning_rate": 1.7973333333333333e-05, "loss": 0.1419, "step": 39140 }, { "epoch": 15.2, "learning_rate": 1.7972815533980584e-05, "loss": 0.2322, "step": 39150 }, { "epoch": 15.21, "learning_rate": 1.7972297734627832e-05, "loss": 0.1566, "step": 39160 }, { "epoch": 15.21, "learning_rate": 1.7971779935275083e-05, "loss": 0.1279, "step": 39170 }, { "epoch": 15.22, "learning_rate": 1.797126213592233e-05, "loss": 0.0556, "step": 39180 }, { "epoch": 15.22, "learning_rate": 1.7970744336569583e-05, "loss": 0.1349, "step": 39190 }, { "epoch": 15.22, "learning_rate": 1.797022653721683e-05, "loss": 0.0751, "step": 39200 }, { "epoch": 15.23, "learning_rate": 1.796970873786408e-05, "loss": 0.2012, "step": 39210 }, { "epoch": 15.23, "learning_rate": 1.7969190938511326e-05, "loss": 0.025, "step": 39220 }, { "epoch": 15.23, "learning_rate": 1.7968673139158578e-05, "loss": 0.1013, "step": 39230 }, { "epoch": 15.24, "learning_rate": 1.7968155339805826e-05, "loss": 0.1823, "step": 39240 }, { "epoch": 15.24, "learning_rate": 1.7967637540453077e-05, "loss": 0.1238, "step": 39250 }, { "epoch": 15.25, "learning_rate": 1.7967119741100325e-05, "loss": 0.0867, "step": 39260 }, { "epoch": 15.25, "learning_rate": 1.7966601941747573e-05, "loss": 0.1461, "step": 39270 }, { "epoch": 15.25, "learning_rate": 1.7966084142394824e-05, "loss": 0.1967, "step": 39280 }, { "epoch": 15.26, "learning_rate": 1.7965566343042072e-05, "loss": 0.0784, "step": 39290 }, { "epoch": 15.26, "learning_rate": 1.7965048543689323e-05, "loss": 0.0672, "step": 39300 }, { "epoch": 15.27, "learning_rate": 1.796453074433657e-05, "loss": 0.1685, "step": 39310 }, { "epoch": 15.27, "learning_rate": 1.796401294498382e-05, "loss": 0.0564, "step": 39320 }, { "epoch": 15.27, "learning_rate": 1.796349514563107e-05, "loss": 0.0562, "step": 39330 }, { "epoch": 15.28, "learning_rate": 1.796297734627832e-05, "loss": 0.0073, "step": 39340 }, { "epoch": 15.28, "learning_rate": 1.7962459546925566e-05, "loss": 0.1258, "step": 39350 }, { "epoch": 15.29, "learning_rate": 1.7961941747572818e-05, "loss": 0.1641, "step": 39360 }, { "epoch": 15.29, "learning_rate": 1.7961423948220066e-05, "loss": 0.1339, "step": 39370 }, { "epoch": 15.29, "learning_rate": 1.7960906148867317e-05, "loss": 0.0977, "step": 39380 }, { "epoch": 15.3, "learning_rate": 1.7960388349514565e-05, "loss": 0.2237, "step": 39390 }, { "epoch": 15.3, "learning_rate": 1.7959870550161813e-05, "loss": 0.2083, "step": 39400 }, { "epoch": 15.3, "learning_rate": 1.7959352750809064e-05, "loss": 0.0212, "step": 39410 }, { "epoch": 15.31, "learning_rate": 1.7958834951456312e-05, "loss": 0.0127, "step": 39420 }, { "epoch": 15.31, "learning_rate": 1.795831715210356e-05, "loss": 0.1612, "step": 39430 }, { "epoch": 15.32, "learning_rate": 1.795779935275081e-05, "loss": 0.2009, "step": 39440 }, { "epoch": 15.32, "learning_rate": 1.795728155339806e-05, "loss": 0.0938, "step": 39450 }, { "epoch": 15.32, "learning_rate": 1.795676375404531e-05, "loss": 0.1605, "step": 39460 }, { "epoch": 15.33, "learning_rate": 1.795624595469256e-05, "loss": 0.0836, "step": 39470 }, { "epoch": 15.33, "learning_rate": 1.7955728155339806e-05, "loss": 0.2032, "step": 39480 }, { "epoch": 15.34, "learning_rate": 1.7955210355987058e-05, "loss": 0.1075, "step": 39490 }, { "epoch": 15.34, "learning_rate": 1.7954692556634306e-05, "loss": 0.0299, "step": 39500 }, { "epoch": 15.34, "learning_rate": 1.7954174757281554e-05, "loss": 0.1927, "step": 39510 }, { "epoch": 15.35, "learning_rate": 1.7953656957928805e-05, "loss": 0.1215, "step": 39520 }, { "epoch": 15.35, "learning_rate": 1.7953139158576053e-05, "loss": 0.2151, "step": 39530 }, { "epoch": 15.36, "learning_rate": 1.7952621359223304e-05, "loss": 0.1844, "step": 39540 }, { "epoch": 15.36, "learning_rate": 1.7952103559870552e-05, "loss": 0.2638, "step": 39550 }, { "epoch": 15.36, "learning_rate": 1.79515857605178e-05, "loss": 0.0601, "step": 39560 }, { "epoch": 15.37, "learning_rate": 1.795106796116505e-05, "loss": 0.072, "step": 39570 }, { "epoch": 15.37, "learning_rate": 1.79505501618123e-05, "loss": 0.196, "step": 39580 }, { "epoch": 15.37, "learning_rate": 1.7950032362459547e-05, "loss": 0.1307, "step": 39590 }, { "epoch": 15.38, "learning_rate": 1.79495145631068e-05, "loss": 0.0772, "step": 39600 }, { "epoch": 15.38, "learning_rate": 1.7948996763754046e-05, "loss": 0.2126, "step": 39610 }, { "epoch": 15.39, "learning_rate": 1.7948478964401298e-05, "loss": 0.1666, "step": 39620 }, { "epoch": 15.39, "learning_rate": 1.7947961165048546e-05, "loss": 0.0442, "step": 39630 }, { "epoch": 15.39, "learning_rate": 1.7947443365695793e-05, "loss": 0.1355, "step": 39640 }, { "epoch": 15.4, "learning_rate": 1.794692556634304e-05, "loss": 0.1951, "step": 39650 }, { "epoch": 15.4, "learning_rate": 1.7946407766990293e-05, "loss": 0.1715, "step": 39660 }, { "epoch": 15.41, "learning_rate": 1.794588996763754e-05, "loss": 0.1028, "step": 39670 }, { "epoch": 15.41, "learning_rate": 1.7945372168284792e-05, "loss": 0.0578, "step": 39680 }, { "epoch": 15.41, "learning_rate": 1.794485436893204e-05, "loss": 0.2741, "step": 39690 }, { "epoch": 15.42, "learning_rate": 1.794433656957929e-05, "loss": 0.2347, "step": 39700 }, { "epoch": 15.42, "learning_rate": 1.794381877022654e-05, "loss": 0.078, "step": 39710 }, { "epoch": 15.43, "learning_rate": 1.7943300970873787e-05, "loss": 0.0895, "step": 39720 }, { "epoch": 15.43, "learning_rate": 1.7942783171521035e-05, "loss": 0.0289, "step": 39730 }, { "epoch": 15.43, "learning_rate": 1.7942265372168286e-05, "loss": 0.2151, "step": 39740 }, { "epoch": 15.44, "learning_rate": 1.7941747572815534e-05, "loss": 0.1476, "step": 39750 }, { "epoch": 15.44, "learning_rate": 1.7941229773462786e-05, "loss": 0.0545, "step": 39760 }, { "epoch": 15.44, "learning_rate": 1.7940711974110033e-05, "loss": 0.0808, "step": 39770 }, { "epoch": 15.45, "learning_rate": 1.7940194174757285e-05, "loss": 0.2449, "step": 39780 }, { "epoch": 15.45, "learning_rate": 1.7939676375404533e-05, "loss": 0.2768, "step": 39790 }, { "epoch": 15.46, "learning_rate": 1.793915857605178e-05, "loss": 0.346, "step": 39800 }, { "epoch": 15.46, "learning_rate": 1.793864077669903e-05, "loss": 0.0297, "step": 39810 }, { "epoch": 15.46, "learning_rate": 1.793812297734628e-05, "loss": 0.0363, "step": 39820 }, { "epoch": 15.47, "learning_rate": 1.7937605177993528e-05, "loss": 0.234, "step": 39830 }, { "epoch": 15.47, "learning_rate": 1.793708737864078e-05, "loss": 0.1005, "step": 39840 }, { "epoch": 15.48, "learning_rate": 1.7936569579288027e-05, "loss": 0.066, "step": 39850 }, { "epoch": 15.48, "learning_rate": 1.793605177993528e-05, "loss": 0.1582, "step": 39860 }, { "epoch": 15.48, "learning_rate": 1.7935533980582526e-05, "loss": 0.1144, "step": 39870 }, { "epoch": 15.49, "learning_rate": 1.7935016181229774e-05, "loss": 0.1548, "step": 39880 }, { "epoch": 15.49, "learning_rate": 1.7934498381877022e-05, "loss": 0.0878, "step": 39890 }, { "epoch": 15.5, "learning_rate": 1.7933980582524273e-05, "loss": 0.2833, "step": 39900 }, { "epoch": 15.5, "learning_rate": 1.793346278317152e-05, "loss": 0.0904, "step": 39910 }, { "epoch": 15.5, "learning_rate": 1.7932944983818773e-05, "loss": 0.003, "step": 39920 }, { "epoch": 15.51, "learning_rate": 1.793242718446602e-05, "loss": 0.1377, "step": 39930 }, { "epoch": 15.51, "learning_rate": 1.7931909385113272e-05, "loss": 0.1616, "step": 39940 }, { "epoch": 15.51, "learning_rate": 1.793139158576052e-05, "loss": 0.0985, "step": 39950 }, { "epoch": 15.52, "learning_rate": 1.7930873786407768e-05, "loss": 0.2529, "step": 39960 }, { "epoch": 15.52, "learning_rate": 1.7930355987055016e-05, "loss": 0.112, "step": 39970 }, { "epoch": 15.53, "learning_rate": 1.7929838187702267e-05, "loss": 0.086, "step": 39980 }, { "epoch": 15.53, "learning_rate": 1.7929320388349515e-05, "loss": 0.0861, "step": 39990 }, { "epoch": 15.53, "learning_rate": 1.7928802588996766e-05, "loss": 0.0478, "step": 40000 }, { "epoch": 15.54, "learning_rate": 1.7928284789644014e-05, "loss": 0.1289, "step": 40010 }, { "epoch": 15.54, "learning_rate": 1.7927766990291265e-05, "loss": 0.0726, "step": 40020 }, { "epoch": 15.55, "learning_rate": 1.7927249190938513e-05, "loss": 0.1647, "step": 40030 }, { "epoch": 15.55, "learning_rate": 1.792673139158576e-05, "loss": 0.3235, "step": 40040 }, { "epoch": 15.55, "learning_rate": 1.792621359223301e-05, "loss": 0.0664, "step": 40050 }, { "epoch": 15.56, "learning_rate": 1.792569579288026e-05, "loss": 0.0341, "step": 40060 }, { "epoch": 15.56, "learning_rate": 1.792517799352751e-05, "loss": 0.2211, "step": 40070 }, { "epoch": 15.57, "learning_rate": 1.792466019417476e-05, "loss": 0.0899, "step": 40080 }, { "epoch": 15.57, "learning_rate": 1.7924142394822008e-05, "loss": 0.0423, "step": 40090 }, { "epoch": 15.57, "learning_rate": 1.792362459546926e-05, "loss": 0.0569, "step": 40100 }, { "epoch": 15.58, "learning_rate": 1.7923106796116507e-05, "loss": 0.1058, "step": 40110 }, { "epoch": 15.58, "learning_rate": 1.7922588996763755e-05, "loss": 0.1233, "step": 40120 }, { "epoch": 15.58, "learning_rate": 1.7922071197411003e-05, "loss": 0.1034, "step": 40130 }, { "epoch": 15.59, "learning_rate": 1.7921553398058254e-05, "loss": 0.0297, "step": 40140 }, { "epoch": 15.59, "learning_rate": 1.7921035598705502e-05, "loss": 0.2903, "step": 40150 }, { "epoch": 15.6, "learning_rate": 1.7920517799352753e-05, "loss": 0.0203, "step": 40160 }, { "epoch": 15.6, "learning_rate": 1.792e-05, "loss": 0.0329, "step": 40170 }, { "epoch": 15.6, "learning_rate": 1.7919482200647253e-05, "loss": 0.1956, "step": 40180 }, { "epoch": 15.61, "learning_rate": 1.79189644012945e-05, "loss": 0.2007, "step": 40190 }, { "epoch": 15.61, "learning_rate": 1.791844660194175e-05, "loss": 0.0932, "step": 40200 }, { "epoch": 15.62, "learning_rate": 1.7917928802588996e-05, "loss": 0.1551, "step": 40210 }, { "epoch": 15.62, "learning_rate": 1.7917411003236248e-05, "loss": 0.2259, "step": 40220 }, { "epoch": 15.62, "learning_rate": 1.7916893203883496e-05, "loss": 0.1381, "step": 40230 }, { "epoch": 15.63, "learning_rate": 1.7916375404530747e-05, "loss": 0.1896, "step": 40240 }, { "epoch": 15.63, "learning_rate": 1.7915857605177995e-05, "loss": 0.1515, "step": 40250 }, { "epoch": 15.63, "learning_rate": 1.7915339805825246e-05, "loss": 0.3167, "step": 40260 }, { "epoch": 15.64, "learning_rate": 1.7914822006472494e-05, "loss": 0.0681, "step": 40270 }, { "epoch": 15.64, "learning_rate": 1.7914304207119742e-05, "loss": 0.1796, "step": 40280 }, { "epoch": 15.65, "learning_rate": 1.791378640776699e-05, "loss": 0.1188, "step": 40290 }, { "epoch": 15.65, "learning_rate": 1.791326860841424e-05, "loss": 0.1347, "step": 40300 }, { "epoch": 15.65, "learning_rate": 1.791275080906149e-05, "loss": 0.1585, "step": 40310 }, { "epoch": 15.66, "learning_rate": 1.791223300970874e-05, "loss": 0.2602, "step": 40320 }, { "epoch": 15.66, "learning_rate": 1.791171521035599e-05, "loss": 0.14, "step": 40330 }, { "epoch": 15.67, "learning_rate": 1.791119741100324e-05, "loss": 0.0516, "step": 40340 }, { "epoch": 15.67, "learning_rate": 1.7910679611650488e-05, "loss": 0.1252, "step": 40350 }, { "epoch": 15.67, "learning_rate": 1.7910161812297735e-05, "loss": 0.0097, "step": 40360 }, { "epoch": 15.68, "learning_rate": 1.7909644012944983e-05, "loss": 0.2795, "step": 40370 }, { "epoch": 15.68, "learning_rate": 1.7909126213592235e-05, "loss": 0.0629, "step": 40380 }, { "epoch": 15.69, "learning_rate": 1.7908608414239483e-05, "loss": 0.1671, "step": 40390 }, { "epoch": 15.69, "learning_rate": 1.7908090614886734e-05, "loss": 0.1292, "step": 40400 }, { "epoch": 15.69, "learning_rate": 1.7907572815533982e-05, "loss": 0.0327, "step": 40410 }, { "epoch": 15.7, "learning_rate": 1.7907055016181233e-05, "loss": 0.422, "step": 40420 }, { "epoch": 15.7, "learning_rate": 1.790653721682848e-05, "loss": 0.123, "step": 40430 }, { "epoch": 15.7, "learning_rate": 1.790601941747573e-05, "loss": 0.1824, "step": 40440 }, { "epoch": 15.71, "learning_rate": 1.7905501618122977e-05, "loss": 0.411, "step": 40450 }, { "epoch": 15.71, "learning_rate": 1.7904983818770228e-05, "loss": 0.0302, "step": 40460 }, { "epoch": 15.72, "learning_rate": 1.7904466019417476e-05, "loss": 0.0918, "step": 40470 }, { "epoch": 15.72, "learning_rate": 1.7903948220064728e-05, "loss": 0.1542, "step": 40480 }, { "epoch": 15.72, "learning_rate": 1.7903430420711975e-05, "loss": 0.1298, "step": 40490 }, { "epoch": 15.73, "learning_rate": 1.7902912621359227e-05, "loss": 0.1825, "step": 40500 }, { "epoch": 15.73, "learning_rate": 1.7902394822006475e-05, "loss": 0.2541, "step": 40510 }, { "epoch": 15.74, "learning_rate": 1.7901877022653723e-05, "loss": 0.19, "step": 40520 }, { "epoch": 15.74, "learning_rate": 1.790135922330097e-05, "loss": 0.0482, "step": 40530 }, { "epoch": 15.74, "learning_rate": 1.7900841423948222e-05, "loss": 0.108, "step": 40540 }, { "epoch": 15.75, "learning_rate": 1.790032362459547e-05, "loss": 0.0353, "step": 40550 }, { "epoch": 15.75, "learning_rate": 1.789980582524272e-05, "loss": 0.1436, "step": 40560 }, { "epoch": 15.76, "learning_rate": 1.789928802588997e-05, "loss": 0.1292, "step": 40570 }, { "epoch": 15.76, "learning_rate": 1.789877022653722e-05, "loss": 0.1554, "step": 40580 }, { "epoch": 15.76, "learning_rate": 1.7898252427184468e-05, "loss": 0.2875, "step": 40590 }, { "epoch": 15.77, "learning_rate": 1.7897734627831716e-05, "loss": 0.1604, "step": 40600 }, { "epoch": 15.77, "learning_rate": 1.7897216828478964e-05, "loss": 0.1895, "step": 40610 }, { "epoch": 15.77, "learning_rate": 1.7896699029126215e-05, "loss": 0.1919, "step": 40620 }, { "epoch": 15.78, "learning_rate": 1.7896181229773463e-05, "loss": 0.0992, "step": 40630 }, { "epoch": 15.78, "learning_rate": 1.7895663430420715e-05, "loss": 0.0787, "step": 40640 }, { "epoch": 15.79, "learning_rate": 1.7895145631067963e-05, "loss": 0.1446, "step": 40650 }, { "epoch": 15.79, "learning_rate": 1.7894627831715214e-05, "loss": 0.1886, "step": 40660 }, { "epoch": 15.79, "learning_rate": 1.7894110032362462e-05, "loss": 0.1147, "step": 40670 }, { "epoch": 15.8, "learning_rate": 1.789359223300971e-05, "loss": 0.2694, "step": 40680 }, { "epoch": 15.8, "learning_rate": 1.7893074433656958e-05, "loss": 0.1065, "step": 40690 }, { "epoch": 15.81, "learning_rate": 1.789255663430421e-05, "loss": 0.1849, "step": 40700 }, { "epoch": 15.81, "learning_rate": 1.7892038834951457e-05, "loss": 0.0448, "step": 40710 }, { "epoch": 15.81, "learning_rate": 1.7891521035598708e-05, "loss": 0.0978, "step": 40720 }, { "epoch": 15.82, "learning_rate": 1.7891003236245956e-05, "loss": 0.0824, "step": 40730 }, { "epoch": 15.82, "learning_rate": 1.7890485436893204e-05, "loss": 0.1893, "step": 40740 }, { "epoch": 15.83, "learning_rate": 1.7889967637540455e-05, "loss": 0.1088, "step": 40750 }, { "epoch": 15.83, "learning_rate": 1.7889449838187703e-05, "loss": 0.0185, "step": 40760 }, { "epoch": 15.83, "learning_rate": 1.788893203883495e-05, "loss": 0.1535, "step": 40770 }, { "epoch": 15.84, "learning_rate": 1.7888414239482203e-05, "loss": 0.2427, "step": 40780 }, { "epoch": 15.84, "learning_rate": 1.788789644012945e-05, "loss": 0.1099, "step": 40790 }, { "epoch": 15.84, "learning_rate": 1.7887378640776702e-05, "loss": 0.2341, "step": 40800 }, { "epoch": 15.85, "learning_rate": 1.788686084142395e-05, "loss": 0.1155, "step": 40810 }, { "epoch": 15.85, "learning_rate": 1.7886343042071198e-05, "loss": 0.1967, "step": 40820 }, { "epoch": 15.86, "learning_rate": 1.788582524271845e-05, "loss": 0.1095, "step": 40830 }, { "epoch": 15.86, "learning_rate": 1.7885307443365697e-05, "loss": 0.0438, "step": 40840 }, { "epoch": 15.86, "learning_rate": 1.7884789644012945e-05, "loss": 0.0971, "step": 40850 }, { "epoch": 15.87, "learning_rate": 1.7884271844660196e-05, "loss": 0.1007, "step": 40860 }, { "epoch": 15.87, "learning_rate": 1.7883754045307444e-05, "loss": 0.2499, "step": 40870 }, { "epoch": 15.88, "learning_rate": 1.7883236245954695e-05, "loss": 0.1125, "step": 40880 }, { "epoch": 15.88, "learning_rate": 1.7882718446601943e-05, "loss": 0.1266, "step": 40890 }, { "epoch": 15.88, "learning_rate": 1.788220064724919e-05, "loss": 0.1306, "step": 40900 }, { "epoch": 15.89, "learning_rate": 1.7881682847896442e-05, "loss": 0.3246, "step": 40910 }, { "epoch": 15.89, "learning_rate": 1.788116504854369e-05, "loss": 0.0413, "step": 40920 }, { "epoch": 15.9, "learning_rate": 1.7880647249190942e-05, "loss": 0.2243, "step": 40930 }, { "epoch": 15.9, "learning_rate": 1.788012944983819e-05, "loss": 0.1194, "step": 40940 }, { "epoch": 15.9, "learning_rate": 1.7879611650485438e-05, "loss": 0.2251, "step": 40950 }, { "epoch": 15.91, "learning_rate": 1.787909385113269e-05, "loss": 0.0519, "step": 40960 }, { "epoch": 15.91, "learning_rate": 1.7878576051779937e-05, "loss": 0.1821, "step": 40970 }, { "epoch": 15.91, "learning_rate": 1.7878058252427185e-05, "loss": 0.0971, "step": 40980 }, { "epoch": 15.92, "learning_rate": 1.7877540453074436e-05, "loss": 0.0712, "step": 40990 }, { "epoch": 15.92, "learning_rate": 1.7877022653721684e-05, "loss": 0.0794, "step": 41000 }, { "epoch": 15.93, "learning_rate": 1.7876504854368935e-05, "loss": 0.263, "step": 41010 }, { "epoch": 15.93, "learning_rate": 1.7875987055016183e-05, "loss": 0.1758, "step": 41020 }, { "epoch": 15.93, "learning_rate": 1.787546925566343e-05, "loss": 0.32, "step": 41030 }, { "epoch": 15.94, "learning_rate": 1.7874951456310682e-05, "loss": 0.0714, "step": 41040 }, { "epoch": 15.94, "learning_rate": 1.787443365695793e-05, "loss": 0.1241, "step": 41050 }, { "epoch": 15.95, "learning_rate": 1.7873915857605178e-05, "loss": 0.0938, "step": 41060 }, { "epoch": 15.95, "learning_rate": 1.787339805825243e-05, "loss": 0.0403, "step": 41070 }, { "epoch": 15.95, "learning_rate": 1.7872880258899678e-05, "loss": 0.1188, "step": 41080 }, { "epoch": 15.96, "learning_rate": 1.787236245954693e-05, "loss": 0.1944, "step": 41090 }, { "epoch": 15.96, "learning_rate": 1.7871844660194177e-05, "loss": 0.075, "step": 41100 }, { "epoch": 15.97, "learning_rate": 1.7871326860841425e-05, "loss": 0.2556, "step": 41110 }, { "epoch": 15.97, "learning_rate": 1.7870809061488673e-05, "loss": 0.1277, "step": 41120 }, { "epoch": 15.97, "learning_rate": 1.7870291262135924e-05, "loss": 0.1829, "step": 41130 }, { "epoch": 15.98, "learning_rate": 1.7869773462783172e-05, "loss": 0.1839, "step": 41140 }, { "epoch": 15.98, "learning_rate": 1.7869255663430423e-05, "loss": 0.0591, "step": 41150 }, { "epoch": 15.98, "learning_rate": 1.786873786407767e-05, "loss": 0.0897, "step": 41160 }, { "epoch": 15.99, "learning_rate": 1.7868220064724922e-05, "loss": 0.1837, "step": 41170 }, { "epoch": 15.99, "learning_rate": 1.786770226537217e-05, "loss": 0.0294, "step": 41180 }, { "epoch": 16.0, "learning_rate": 1.7867184466019418e-05, "loss": 0.1413, "step": 41190 }, { "epoch": 16.0, "learning_rate": 1.7866666666666666e-05, "loss": 0.0933, "step": 41200 }, { "epoch": 16.0, "eval_accuracy": 0.953232462173315, "eval_loss": 0.2371627688407898, "eval_runtime": 8.2508, "eval_samples_per_second": 440.563, "eval_steps_per_second": 55.146, "step": 41200 }, { "epoch": 16.0, "learning_rate": 1.7866148867313917e-05, "loss": 0.1805, "step": 41210 }, { "epoch": 16.01, "learning_rate": 1.7865631067961165e-05, "loss": 0.062, "step": 41220 }, { "epoch": 16.01, "learning_rate": 1.7865113268608417e-05, "loss": 0.114, "step": 41230 }, { "epoch": 16.02, "learning_rate": 1.7864595469255665e-05, "loss": 0.0734, "step": 41240 }, { "epoch": 16.02, "learning_rate": 1.7864077669902916e-05, "loss": 0.3459, "step": 41250 }, { "epoch": 16.02, "learning_rate": 1.7863559870550164e-05, "loss": 0.0875, "step": 41260 }, { "epoch": 16.03, "learning_rate": 1.7863042071197412e-05, "loss": 0.0716, "step": 41270 }, { "epoch": 16.03, "learning_rate": 1.786252427184466e-05, "loss": 0.1456, "step": 41280 }, { "epoch": 16.03, "learning_rate": 1.786200647249191e-05, "loss": 0.1918, "step": 41290 }, { "epoch": 16.04, "learning_rate": 1.786148867313916e-05, "loss": 0.1016, "step": 41300 }, { "epoch": 16.04, "learning_rate": 1.786097087378641e-05, "loss": 0.374, "step": 41310 }, { "epoch": 16.05, "learning_rate": 1.7860453074433658e-05, "loss": 0.0803, "step": 41320 }, { "epoch": 16.05, "learning_rate": 1.785993527508091e-05, "loss": 0.1666, "step": 41330 }, { "epoch": 16.05, "learning_rate": 1.7859417475728157e-05, "loss": 0.121, "step": 41340 }, { "epoch": 16.06, "learning_rate": 1.7858899676375405e-05, "loss": 0.245, "step": 41350 }, { "epoch": 16.06, "learning_rate": 1.7858381877022653e-05, "loss": 0.144, "step": 41360 }, { "epoch": 16.07, "learning_rate": 1.7857864077669905e-05, "loss": 0.0129, "step": 41370 }, { "epoch": 16.07, "learning_rate": 1.7857346278317152e-05, "loss": 0.0503, "step": 41380 }, { "epoch": 16.07, "learning_rate": 1.7856828478964404e-05, "loss": 0.0655, "step": 41390 }, { "epoch": 16.08, "learning_rate": 1.7856310679611652e-05, "loss": 0.1186, "step": 41400 }, { "epoch": 16.08, "learning_rate": 1.7855792880258903e-05, "loss": 0.0368, "step": 41410 }, { "epoch": 16.09, "learning_rate": 1.7855275080906148e-05, "loss": 0.1502, "step": 41420 }, { "epoch": 16.09, "learning_rate": 1.78547572815534e-05, "loss": 0.0478, "step": 41430 }, { "epoch": 16.09, "learning_rate": 1.7854239482200647e-05, "loss": 0.1422, "step": 41440 }, { "epoch": 16.1, "learning_rate": 1.7853721682847898e-05, "loss": 0.1043, "step": 41450 }, { "epoch": 16.1, "learning_rate": 1.7853203883495146e-05, "loss": 0.1982, "step": 41460 }, { "epoch": 16.1, "learning_rate": 1.7852686084142397e-05, "loss": 0.2513, "step": 41470 }, { "epoch": 16.11, "learning_rate": 1.7852168284789645e-05, "loss": 0.2204, "step": 41480 }, { "epoch": 16.11, "learning_rate": 1.7851650485436897e-05, "loss": 0.0062, "step": 41490 }, { "epoch": 16.12, "learning_rate": 1.7851132686084145e-05, "loss": 0.0779, "step": 41500 }, { "epoch": 16.12, "learning_rate": 1.7850614886731392e-05, "loss": 0.2577, "step": 41510 }, { "epoch": 16.12, "learning_rate": 1.785009708737864e-05, "loss": 0.1967, "step": 41520 }, { "epoch": 16.13, "learning_rate": 1.784957928802589e-05, "loss": 0.03, "step": 41530 }, { "epoch": 16.13, "learning_rate": 1.784906148867314e-05, "loss": 0.0773, "step": 41540 }, { "epoch": 16.14, "learning_rate": 1.784854368932039e-05, "loss": 0.0615, "step": 41550 }, { "epoch": 16.14, "learning_rate": 1.784802588996764e-05, "loss": 0.0592, "step": 41560 }, { "epoch": 16.14, "learning_rate": 1.784750809061489e-05, "loss": 0.2199, "step": 41570 }, { "epoch": 16.15, "learning_rate": 1.7846990291262138e-05, "loss": 0.0824, "step": 41580 }, { "epoch": 16.15, "learning_rate": 1.7846472491909386e-05, "loss": 0.0444, "step": 41590 }, { "epoch": 16.16, "learning_rate": 1.7845954692556634e-05, "loss": 0.1335, "step": 41600 }, { "epoch": 16.16, "learning_rate": 1.7845436893203885e-05, "loss": 0.1497, "step": 41610 }, { "epoch": 16.16, "learning_rate": 1.7844919093851133e-05, "loss": 0.0379, "step": 41620 }, { "epoch": 16.17, "learning_rate": 1.7844401294498384e-05, "loss": 0.0691, "step": 41630 }, { "epoch": 16.17, "learning_rate": 1.7843883495145632e-05, "loss": 0.0905, "step": 41640 }, { "epoch": 16.17, "learning_rate": 1.7843365695792884e-05, "loss": 0.1528, "step": 41650 }, { "epoch": 16.18, "learning_rate": 1.784284789644013e-05, "loss": 0.0865, "step": 41660 }, { "epoch": 16.18, "learning_rate": 1.784233009708738e-05, "loss": 0.1851, "step": 41670 }, { "epoch": 16.19, "learning_rate": 1.7841812297734627e-05, "loss": 0.1702, "step": 41680 }, { "epoch": 16.19, "learning_rate": 1.784129449838188e-05, "loss": 0.0267, "step": 41690 }, { "epoch": 16.19, "learning_rate": 1.7840776699029127e-05, "loss": 0.1301, "step": 41700 }, { "epoch": 16.2, "learning_rate": 1.7840258899676378e-05, "loss": 0.2268, "step": 41710 }, { "epoch": 16.2, "learning_rate": 1.7839741100323626e-05, "loss": 0.0466, "step": 41720 }, { "epoch": 16.21, "learning_rate": 1.7839223300970877e-05, "loss": 0.1418, "step": 41730 }, { "epoch": 16.21, "learning_rate": 1.7838705501618125e-05, "loss": 0.135, "step": 41740 }, { "epoch": 16.21, "learning_rate": 1.7838187702265373e-05, "loss": 0.0907, "step": 41750 }, { "epoch": 16.22, "learning_rate": 1.783766990291262e-05, "loss": 0.2559, "step": 41760 }, { "epoch": 16.22, "learning_rate": 1.7837152103559872e-05, "loss": 0.2166, "step": 41770 }, { "epoch": 16.23, "learning_rate": 1.783663430420712e-05, "loss": 0.2158, "step": 41780 }, { "epoch": 16.23, "learning_rate": 1.783611650485437e-05, "loss": 0.1589, "step": 41790 }, { "epoch": 16.23, "learning_rate": 1.783559870550162e-05, "loss": 0.2711, "step": 41800 }, { "epoch": 16.24, "learning_rate": 1.783508090614887e-05, "loss": 0.1218, "step": 41810 }, { "epoch": 16.24, "learning_rate": 1.783456310679612e-05, "loss": 0.0407, "step": 41820 }, { "epoch": 16.24, "learning_rate": 1.7834045307443367e-05, "loss": 0.0617, "step": 41830 }, { "epoch": 16.25, "learning_rate": 1.7833527508090615e-05, "loss": 0.2487, "step": 41840 }, { "epoch": 16.25, "learning_rate": 1.7833009708737866e-05, "loss": 0.0529, "step": 41850 }, { "epoch": 16.26, "learning_rate": 1.7832491909385114e-05, "loss": 0.115, "step": 41860 }, { "epoch": 16.26, "learning_rate": 1.7831974110032365e-05, "loss": 0.0215, "step": 41870 }, { "epoch": 16.26, "learning_rate": 1.7831456310679613e-05, "loss": 0.0421, "step": 41880 }, { "epoch": 16.27, "learning_rate": 1.7830938511326864e-05, "loss": 0.2363, "step": 41890 }, { "epoch": 16.27, "learning_rate": 1.7830420711974112e-05, "loss": 0.1887, "step": 41900 }, { "epoch": 16.28, "learning_rate": 1.782990291262136e-05, "loss": 0.0109, "step": 41910 }, { "epoch": 16.28, "learning_rate": 1.7829385113268608e-05, "loss": 0.1738, "step": 41920 }, { "epoch": 16.28, "learning_rate": 1.782886731391586e-05, "loss": 0.1176, "step": 41930 }, { "epoch": 16.29, "learning_rate": 1.7828349514563107e-05, "loss": 0.0595, "step": 41940 }, { "epoch": 16.29, "learning_rate": 1.782783171521036e-05, "loss": 0.1036, "step": 41950 }, { "epoch": 16.3, "learning_rate": 1.7827313915857607e-05, "loss": 0.0619, "step": 41960 }, { "epoch": 16.3, "learning_rate": 1.7826796116504858e-05, "loss": 0.0761, "step": 41970 }, { "epoch": 16.3, "learning_rate": 1.7826278317152106e-05, "loss": 0.0891, "step": 41980 }, { "epoch": 16.31, "learning_rate": 1.7825760517799354e-05, "loss": 0.0929, "step": 41990 }, { "epoch": 16.31, "learning_rate": 1.7825242718446602e-05, "loss": 0.0498, "step": 42000 }, { "epoch": 16.31, "learning_rate": 1.7824724919093853e-05, "loss": 0.1508, "step": 42010 }, { "epoch": 16.32, "learning_rate": 1.78242071197411e-05, "loss": 0.0256, "step": 42020 }, { "epoch": 16.32, "learning_rate": 1.7823689320388352e-05, "loss": 0.3526, "step": 42030 }, { "epoch": 16.33, "learning_rate": 1.78231715210356e-05, "loss": 0.2692, "step": 42040 }, { "epoch": 16.33, "learning_rate": 1.782265372168285e-05, "loss": 0.1235, "step": 42050 }, { "epoch": 16.33, "learning_rate": 1.78221359223301e-05, "loss": 0.0486, "step": 42060 }, { "epoch": 16.34, "learning_rate": 1.7821618122977347e-05, "loss": 0.0587, "step": 42070 }, { "epoch": 16.34, "learning_rate": 1.7821100323624595e-05, "loss": 0.1331, "step": 42080 }, { "epoch": 16.35, "learning_rate": 1.7820582524271847e-05, "loss": 0.1003, "step": 42090 }, { "epoch": 16.35, "learning_rate": 1.7820064724919094e-05, "loss": 0.0043, "step": 42100 }, { "epoch": 16.35, "learning_rate": 1.7819546925566346e-05, "loss": 0.1049, "step": 42110 }, { "epoch": 16.36, "learning_rate": 1.7819029126213594e-05, "loss": 0.0632, "step": 42120 }, { "epoch": 16.36, "learning_rate": 1.7818511326860845e-05, "loss": 0.1443, "step": 42130 }, { "epoch": 16.37, "learning_rate": 1.7817993527508093e-05, "loss": 0.0493, "step": 42140 }, { "epoch": 16.37, "learning_rate": 1.781747572815534e-05, "loss": 0.1188, "step": 42150 }, { "epoch": 16.37, "learning_rate": 1.781695792880259e-05, "loss": 0.1276, "step": 42160 }, { "epoch": 16.38, "learning_rate": 1.781644012944984e-05, "loss": 0.1071, "step": 42170 }, { "epoch": 16.38, "learning_rate": 1.7815922330097088e-05, "loss": 0.0263, "step": 42180 }, { "epoch": 16.38, "learning_rate": 1.781540453074434e-05, "loss": 0.0263, "step": 42190 }, { "epoch": 16.39, "learning_rate": 1.7814886731391587e-05, "loss": 0.1223, "step": 42200 }, { "epoch": 16.39, "learning_rate": 1.7814368932038835e-05, "loss": 0.1479, "step": 42210 }, { "epoch": 16.4, "learning_rate": 1.7813851132686087e-05, "loss": 0.1376, "step": 42220 }, { "epoch": 16.4, "learning_rate": 1.7813333333333334e-05, "loss": 0.0379, "step": 42230 }, { "epoch": 16.4, "learning_rate": 1.7812815533980582e-05, "loss": 0.1754, "step": 42240 }, { "epoch": 16.41, "learning_rate": 1.7812297734627834e-05, "loss": 0.0898, "step": 42250 }, { "epoch": 16.41, "learning_rate": 1.781177993527508e-05, "loss": 0.098, "step": 42260 }, { "epoch": 16.42, "learning_rate": 1.7811262135922333e-05, "loss": 0.1197, "step": 42270 }, { "epoch": 16.42, "learning_rate": 1.781074433656958e-05, "loss": 0.1081, "step": 42280 }, { "epoch": 16.42, "learning_rate": 1.781022653721683e-05, "loss": 0.1308, "step": 42290 }, { "epoch": 16.43, "learning_rate": 1.780970873786408e-05, "loss": 0.3245, "step": 42300 }, { "epoch": 16.43, "learning_rate": 1.7809190938511328e-05, "loss": 0.1148, "step": 42310 }, { "epoch": 16.43, "learning_rate": 1.7808673139158576e-05, "loss": 0.2022, "step": 42320 }, { "epoch": 16.44, "learning_rate": 1.7808155339805827e-05, "loss": 0.1809, "step": 42330 }, { "epoch": 16.44, "learning_rate": 1.7807637540453075e-05, "loss": 0.1193, "step": 42340 }, { "epoch": 16.45, "learning_rate": 1.7807119741100326e-05, "loss": 0.1444, "step": 42350 }, { "epoch": 16.45, "learning_rate": 1.7806601941747574e-05, "loss": 0.182, "step": 42360 }, { "epoch": 16.45, "learning_rate": 1.7806084142394822e-05, "loss": 0.272, "step": 42370 }, { "epoch": 16.46, "learning_rate": 1.7805566343042074e-05, "loss": 0.0614, "step": 42380 }, { "epoch": 16.46, "learning_rate": 1.780504854368932e-05, "loss": 0.0546, "step": 42390 }, { "epoch": 16.47, "learning_rate": 1.780453074433657e-05, "loss": 0.1625, "step": 42400 }, { "epoch": 16.47, "learning_rate": 1.780401294498382e-05, "loss": 0.228, "step": 42410 }, { "epoch": 16.47, "learning_rate": 1.780349514563107e-05, "loss": 0.1584, "step": 42420 }, { "epoch": 16.48, "learning_rate": 1.780297734627832e-05, "loss": 0.1176, "step": 42430 }, { "epoch": 16.48, "learning_rate": 1.7802459546925568e-05, "loss": 0.2057, "step": 42440 }, { "epoch": 16.49, "learning_rate": 1.7801941747572816e-05, "loss": 0.164, "step": 42450 }, { "epoch": 16.49, "learning_rate": 1.7801423948220067e-05, "loss": 0.0097, "step": 42460 }, { "epoch": 16.49, "learning_rate": 1.7800906148867315e-05, "loss": 0.1816, "step": 42470 }, { "epoch": 16.5, "learning_rate": 1.7800388349514563e-05, "loss": 0.0245, "step": 42480 }, { "epoch": 16.5, "learning_rate": 1.7799870550161814e-05, "loss": 0.002, "step": 42490 }, { "epoch": 16.5, "learning_rate": 1.7799352750809062e-05, "loss": 0.0919, "step": 42500 }, { "epoch": 16.51, "learning_rate": 1.7798834951456314e-05, "loss": 0.0517, "step": 42510 }, { "epoch": 16.51, "learning_rate": 1.779831715210356e-05, "loss": 0.1919, "step": 42520 }, { "epoch": 16.52, "learning_rate": 1.779779935275081e-05, "loss": 0.1335, "step": 42530 }, { "epoch": 16.52, "learning_rate": 1.779728155339806e-05, "loss": 0.0895, "step": 42540 }, { "epoch": 16.52, "learning_rate": 1.779676375404531e-05, "loss": 0.0179, "step": 42550 }, { "epoch": 16.53, "learning_rate": 1.7796245954692557e-05, "loss": 0.1697, "step": 42560 }, { "epoch": 16.53, "learning_rate": 1.7795728155339808e-05, "loss": 0.1091, "step": 42570 }, { "epoch": 16.54, "learning_rate": 1.7795210355987056e-05, "loss": 0.2665, "step": 42580 }, { "epoch": 16.54, "learning_rate": 1.7794692556634304e-05, "loss": 0.0633, "step": 42590 }, { "epoch": 16.54, "learning_rate": 1.7794174757281555e-05, "loss": 0.0405, "step": 42600 }, { "epoch": 16.55, "learning_rate": 1.7793656957928803e-05, "loss": 0.0191, "step": 42610 }, { "epoch": 16.55, "learning_rate": 1.7793139158576054e-05, "loss": 0.1624, "step": 42620 }, { "epoch": 16.56, "learning_rate": 1.7792621359223302e-05, "loss": 0.0654, "step": 42630 }, { "epoch": 16.56, "learning_rate": 1.7792103559870554e-05, "loss": 0.0721, "step": 42640 }, { "epoch": 16.56, "learning_rate": 1.77915857605178e-05, "loss": 0.1328, "step": 42650 }, { "epoch": 16.57, "learning_rate": 1.779106796116505e-05, "loss": 0.0583, "step": 42660 }, { "epoch": 16.57, "learning_rate": 1.7790550161812297e-05, "loss": 0.0011, "step": 42670 }, { "epoch": 16.57, "learning_rate": 1.779003236245955e-05, "loss": 0.0286, "step": 42680 }, { "epoch": 16.58, "learning_rate": 1.7789514563106797e-05, "loss": 0.1218, "step": 42690 }, { "epoch": 16.58, "learning_rate": 1.7788996763754048e-05, "loss": 0.2277, "step": 42700 }, { "epoch": 16.59, "learning_rate": 1.7788478964401296e-05, "loss": 0.2786, "step": 42710 }, { "epoch": 16.59, "learning_rate": 1.7787961165048547e-05, "loss": 0.0296, "step": 42720 }, { "epoch": 16.59, "learning_rate": 1.7787443365695795e-05, "loss": 0.0994, "step": 42730 }, { "epoch": 16.6, "learning_rate": 1.7786925566343043e-05, "loss": 0.1604, "step": 42740 }, { "epoch": 16.6, "learning_rate": 1.778640776699029e-05, "loss": 0.2233, "step": 42750 }, { "epoch": 16.61, "learning_rate": 1.7785889967637542e-05, "loss": 0.1187, "step": 42760 }, { "epoch": 16.61, "learning_rate": 1.778537216828479e-05, "loss": 0.112, "step": 42770 }, { "epoch": 16.61, "learning_rate": 1.778485436893204e-05, "loss": 0.1021, "step": 42780 }, { "epoch": 16.62, "learning_rate": 1.778433656957929e-05, "loss": 0.1798, "step": 42790 }, { "epoch": 16.62, "learning_rate": 1.778381877022654e-05, "loss": 0.1738, "step": 42800 }, { "epoch": 16.63, "learning_rate": 1.778330097087379e-05, "loss": 0.2663, "step": 42810 }, { "epoch": 16.63, "learning_rate": 1.7782783171521037e-05, "loss": 0.2414, "step": 42820 }, { "epoch": 16.63, "learning_rate": 1.7782265372168284e-05, "loss": 0.0899, "step": 42830 }, { "epoch": 16.64, "learning_rate": 1.7781747572815536e-05, "loss": 0.1003, "step": 42840 }, { "epoch": 16.64, "learning_rate": 1.7781229773462784e-05, "loss": 0.0419, "step": 42850 }, { "epoch": 16.64, "learning_rate": 1.7780711974110035e-05, "loss": 0.1807, "step": 42860 }, { "epoch": 16.65, "learning_rate": 1.7780194174757283e-05, "loss": 0.1218, "step": 42870 }, { "epoch": 16.65, "learning_rate": 1.7779676375404534e-05, "loss": 0.22, "step": 42880 }, { "epoch": 16.66, "learning_rate": 1.777915857605178e-05, "loss": 0.0312, "step": 42890 }, { "epoch": 16.66, "learning_rate": 1.777864077669903e-05, "loss": 0.0735, "step": 42900 }, { "epoch": 16.66, "learning_rate": 1.7778122977346278e-05, "loss": 0.11, "step": 42910 }, { "epoch": 16.67, "learning_rate": 1.777760517799353e-05, "loss": 0.0098, "step": 42920 }, { "epoch": 16.67, "learning_rate": 1.7777087378640777e-05, "loss": 0.0133, "step": 42930 }, { "epoch": 16.68, "learning_rate": 1.777656957928803e-05, "loss": 0.1085, "step": 42940 }, { "epoch": 16.68, "learning_rate": 1.7776051779935276e-05, "loss": 0.0279, "step": 42950 }, { "epoch": 16.68, "learning_rate": 1.7775533980582528e-05, "loss": 0.161, "step": 42960 }, { "epoch": 16.69, "learning_rate": 1.7775016181229772e-05, "loss": 0.0566, "step": 42970 }, { "epoch": 16.69, "learning_rate": 1.7774498381877024e-05, "loss": 0.0877, "step": 42980 }, { "epoch": 16.7, "learning_rate": 1.777398058252427e-05, "loss": 0.2516, "step": 42990 }, { "epoch": 16.7, "learning_rate": 1.7773462783171523e-05, "loss": 0.1728, "step": 43000 }, { "epoch": 16.7, "learning_rate": 1.777294498381877e-05, "loss": 0.2104, "step": 43010 }, { "epoch": 16.71, "learning_rate": 1.7772427184466022e-05, "loss": 0.218, "step": 43020 }, { "epoch": 16.71, "learning_rate": 1.777190938511327e-05, "loss": 0.1869, "step": 43030 }, { "epoch": 16.71, "learning_rate": 1.777139158576052e-05, "loss": 0.0989, "step": 43040 }, { "epoch": 16.72, "learning_rate": 1.7770873786407766e-05, "loss": 0.1797, "step": 43050 }, { "epoch": 16.72, "learning_rate": 1.7770355987055017e-05, "loss": 0.0726, "step": 43060 }, { "epoch": 16.73, "learning_rate": 1.7769838187702265e-05, "loss": 0.1135, "step": 43070 }, { "epoch": 16.73, "learning_rate": 1.7769320388349516e-05, "loss": 0.085, "step": 43080 }, { "epoch": 16.73, "learning_rate": 1.7768802588996764e-05, "loss": 0.2452, "step": 43090 }, { "epoch": 16.74, "learning_rate": 1.7768284789644016e-05, "loss": 0.0529, "step": 43100 }, { "epoch": 16.74, "learning_rate": 1.7767766990291264e-05, "loss": 0.248, "step": 43110 }, { "epoch": 16.75, "learning_rate": 1.7767249190938515e-05, "loss": 0.3458, "step": 43120 }, { "epoch": 16.75, "learning_rate": 1.776673139158576e-05, "loss": 0.1804, "step": 43130 }, { "epoch": 16.75, "learning_rate": 1.776621359223301e-05, "loss": 0.1024, "step": 43140 }, { "epoch": 16.76, "learning_rate": 1.776569579288026e-05, "loss": 0.207, "step": 43150 }, { "epoch": 16.76, "learning_rate": 1.776517799352751e-05, "loss": 0.0705, "step": 43160 }, { "epoch": 16.77, "learning_rate": 1.7764660194174758e-05, "loss": 0.1529, "step": 43170 }, { "epoch": 16.77, "learning_rate": 1.776414239482201e-05, "loss": 0.052, "step": 43180 }, { "epoch": 16.77, "learning_rate": 1.7763624595469257e-05, "loss": 0.123, "step": 43190 }, { "epoch": 16.78, "learning_rate": 1.776310679611651e-05, "loss": 0.0278, "step": 43200 }, { "epoch": 16.78, "learning_rate": 1.7762588996763756e-05, "loss": 0.1315, "step": 43210 }, { "epoch": 16.78, "learning_rate": 1.7762071197411004e-05, "loss": 0.1767, "step": 43220 }, { "epoch": 16.79, "learning_rate": 1.7761553398058252e-05, "loss": 0.0305, "step": 43230 }, { "epoch": 16.79, "learning_rate": 1.7761035598705504e-05, "loss": 0.1318, "step": 43240 }, { "epoch": 16.8, "learning_rate": 1.776051779935275e-05, "loss": 0.0422, "step": 43250 }, { "epoch": 16.8, "learning_rate": 1.7760000000000003e-05, "loss": 0.2118, "step": 43260 }, { "epoch": 16.8, "learning_rate": 1.775948220064725e-05, "loss": 0.1572, "step": 43270 }, { "epoch": 16.81, "learning_rate": 1.7758964401294502e-05, "loss": 0.268, "step": 43280 }, { "epoch": 16.81, "learning_rate": 1.775844660194175e-05, "loss": 0.2522, "step": 43290 }, { "epoch": 16.82, "learning_rate": 1.7757928802588998e-05, "loss": 0.0441, "step": 43300 }, { "epoch": 16.82, "learning_rate": 1.7757411003236246e-05, "loss": 0.2637, "step": 43310 }, { "epoch": 16.82, "learning_rate": 1.7756893203883497e-05, "loss": 0.2136, "step": 43320 }, { "epoch": 16.83, "learning_rate": 1.7756375404530745e-05, "loss": 0.1961, "step": 43330 }, { "epoch": 16.83, "learning_rate": 1.7755857605177996e-05, "loss": 0.0309, "step": 43340 }, { "epoch": 16.83, "learning_rate": 1.7755339805825244e-05, "loss": 0.1434, "step": 43350 }, { "epoch": 16.84, "learning_rate": 1.7754822006472496e-05, "loss": 0.4176, "step": 43360 }, { "epoch": 16.84, "learning_rate": 1.7754304207119743e-05, "loss": 0.0601, "step": 43370 }, { "epoch": 16.85, "learning_rate": 1.775378640776699e-05, "loss": 0.0984, "step": 43380 }, { "epoch": 16.85, "learning_rate": 1.775326860841424e-05, "loss": 0.1693, "step": 43390 }, { "epoch": 16.85, "learning_rate": 1.775275080906149e-05, "loss": 0.2335, "step": 43400 }, { "epoch": 16.86, "learning_rate": 1.775223300970874e-05, "loss": 0.2428, "step": 43410 }, { "epoch": 16.86, "learning_rate": 1.775171521035599e-05, "loss": 0.0497, "step": 43420 }, { "epoch": 16.87, "learning_rate": 1.7751197411003238e-05, "loss": 0.2228, "step": 43430 }, { "epoch": 16.87, "learning_rate": 1.775067961165049e-05, "loss": 0.0434, "step": 43440 }, { "epoch": 16.87, "learning_rate": 1.7750161812297737e-05, "loss": 0.2069, "step": 43450 }, { "epoch": 16.88, "learning_rate": 1.7749644012944985e-05, "loss": 0.1167, "step": 43460 }, { "epoch": 16.88, "learning_rate": 1.7749126213592233e-05, "loss": 0.0365, "step": 43470 }, { "epoch": 16.89, "learning_rate": 1.7748608414239484e-05, "loss": 0.2387, "step": 43480 }, { "epoch": 16.89, "learning_rate": 1.7748090614886732e-05, "loss": 0.0244, "step": 43490 }, { "epoch": 16.89, "learning_rate": 1.7747572815533983e-05, "loss": 0.2399, "step": 43500 }, { "epoch": 16.9, "learning_rate": 1.774705501618123e-05, "loss": 0.1213, "step": 43510 }, { "epoch": 16.9, "learning_rate": 1.7746537216828483e-05, "loss": 0.0406, "step": 43520 }, { "epoch": 16.9, "learning_rate": 1.774601941747573e-05, "loss": 0.1046, "step": 43530 }, { "epoch": 16.91, "learning_rate": 1.774550161812298e-05, "loss": 0.1664, "step": 43540 }, { "epoch": 16.91, "learning_rate": 1.7744983818770226e-05, "loss": 0.3325, "step": 43550 }, { "epoch": 16.92, "learning_rate": 1.7744466019417478e-05, "loss": 0.2133, "step": 43560 }, { "epoch": 16.92, "learning_rate": 1.7743948220064726e-05, "loss": 0.0404, "step": 43570 }, { "epoch": 16.92, "learning_rate": 1.7743430420711977e-05, "loss": 0.1367, "step": 43580 }, { "epoch": 16.93, "learning_rate": 1.7742912621359225e-05, "loss": 0.2669, "step": 43590 }, { "epoch": 16.93, "learning_rate": 1.7742394822006476e-05, "loss": 0.1362, "step": 43600 }, { "epoch": 16.94, "learning_rate": 1.7741877022653724e-05, "loss": 0.08, "step": 43610 }, { "epoch": 16.94, "learning_rate": 1.7741359223300972e-05, "loss": 0.073, "step": 43620 }, { "epoch": 16.94, "learning_rate": 1.774084142394822e-05, "loss": 0.1277, "step": 43630 }, { "epoch": 16.95, "learning_rate": 1.774032362459547e-05, "loss": 0.0986, "step": 43640 }, { "epoch": 16.95, "learning_rate": 1.773980582524272e-05, "loss": 0.2773, "step": 43650 }, { "epoch": 16.96, "learning_rate": 1.773928802588997e-05, "loss": 0.1769, "step": 43660 }, { "epoch": 16.96, "learning_rate": 1.773877022653722e-05, "loss": 0.1013, "step": 43670 }, { "epoch": 16.96, "learning_rate": 1.7738252427184466e-05, "loss": 0.1835, "step": 43680 }, { "epoch": 16.97, "learning_rate": 1.7737734627831718e-05, "loss": 0.0483, "step": 43690 }, { "epoch": 16.97, "learning_rate": 1.7737216828478966e-05, "loss": 0.0968, "step": 43700 }, { "epoch": 16.97, "learning_rate": 1.7736699029126214e-05, "loss": 0.1067, "step": 43710 }, { "epoch": 16.98, "learning_rate": 1.7736181229773465e-05, "loss": 0.1283, "step": 43720 }, { "epoch": 16.98, "learning_rate": 1.7735663430420713e-05, "loss": 0.1628, "step": 43730 }, { "epoch": 16.99, "learning_rate": 1.7735145631067964e-05, "loss": 0.2357, "step": 43740 }, { "epoch": 16.99, "learning_rate": 1.7734627831715212e-05, "loss": 0.0527, "step": 43750 }, { "epoch": 16.99, "learning_rate": 1.773411003236246e-05, "loss": 0.0019, "step": 43760 }, { "epoch": 17.0, "learning_rate": 1.773359223300971e-05, "loss": 0.17, "step": 43770 }, { "epoch": 17.0, "eval_accuracy": 0.9557083906464925, "eval_loss": 0.21660885214805603, "eval_runtime": 8.2156, "eval_samples_per_second": 442.449, "eval_steps_per_second": 55.382, "step": 43775 }, { "epoch": 17.0, "learning_rate": 1.773307443365696e-05, "loss": 0.179, "step": 43780 }, { "epoch": 17.01, "learning_rate": 1.7732556634304207e-05, "loss": 0.0662, "step": 43790 }, { "epoch": 17.01, "learning_rate": 1.773203883495146e-05, "loss": 0.0542, "step": 43800 }, { "epoch": 17.01, "learning_rate": 1.7731521035598706e-05, "loss": 0.098, "step": 43810 }, { "epoch": 17.02, "learning_rate": 1.7731003236245958e-05, "loss": 0.0564, "step": 43820 }, { "epoch": 17.02, "learning_rate": 1.7730485436893206e-05, "loss": 0.1391, "step": 43830 }, { "epoch": 17.03, "learning_rate": 1.7729967637540453e-05, "loss": 0.0964, "step": 43840 }, { "epoch": 17.03, "learning_rate": 1.7729449838187705e-05, "loss": 0.0783, "step": 43850 }, { "epoch": 17.03, "learning_rate": 1.7728932038834953e-05, "loss": 0.0955, "step": 43860 }, { "epoch": 17.04, "learning_rate": 1.77284142394822e-05, "loss": 0.1657, "step": 43870 }, { "epoch": 17.04, "learning_rate": 1.7727896440129452e-05, "loss": 0.03, "step": 43880 }, { "epoch": 17.04, "learning_rate": 1.77273786407767e-05, "loss": 0.0038, "step": 43890 }, { "epoch": 17.05, "learning_rate": 1.772686084142395e-05, "loss": 0.2355, "step": 43900 }, { "epoch": 17.05, "learning_rate": 1.77263430420712e-05, "loss": 0.1176, "step": 43910 }, { "epoch": 17.06, "learning_rate": 1.7725825242718447e-05, "loss": 0.0433, "step": 43920 }, { "epoch": 17.06, "learning_rate": 1.77253074433657e-05, "loss": 0.1647, "step": 43930 }, { "epoch": 17.06, "learning_rate": 1.7724789644012946e-05, "loss": 0.2472, "step": 43940 }, { "epoch": 17.07, "learning_rate": 1.7724271844660194e-05, "loss": 0.2023, "step": 43950 }, { "epoch": 17.07, "learning_rate": 1.7723754045307446e-05, "loss": 0.0983, "step": 43960 }, { "epoch": 17.08, "learning_rate": 1.7723236245954693e-05, "loss": 0.0446, "step": 43970 }, { "epoch": 17.08, "learning_rate": 1.7722718446601945e-05, "loss": 0.1182, "step": 43980 }, { "epoch": 17.08, "learning_rate": 1.7722200647249193e-05, "loss": 0.0486, "step": 43990 }, { "epoch": 17.09, "learning_rate": 1.772168284789644e-05, "loss": 0.2129, "step": 44000 }, { "epoch": 17.09, "learning_rate": 1.7721165048543692e-05, "loss": 0.07, "step": 44010 }, { "epoch": 17.1, "learning_rate": 1.772064724919094e-05, "loss": 0.0754, "step": 44020 }, { "epoch": 17.1, "learning_rate": 1.7720129449838188e-05, "loss": 0.2169, "step": 44030 }, { "epoch": 17.1, "learning_rate": 1.771961165048544e-05, "loss": 0.1001, "step": 44040 }, { "epoch": 17.11, "learning_rate": 1.7719093851132687e-05, "loss": 0.0647, "step": 44050 }, { "epoch": 17.11, "learning_rate": 1.7718576051779935e-05, "loss": 0.0436, "step": 44060 }, { "epoch": 17.11, "learning_rate": 1.7718058252427186e-05, "loss": 0.2746, "step": 44070 }, { "epoch": 17.12, "learning_rate": 1.7717540453074434e-05, "loss": 0.0876, "step": 44080 }, { "epoch": 17.12, "learning_rate": 1.7717022653721685e-05, "loss": 0.0782, "step": 44090 }, { "epoch": 17.13, "learning_rate": 1.7716504854368933e-05, "loss": 0.0963, "step": 44100 }, { "epoch": 17.13, "learning_rate": 1.771598705501618e-05, "loss": 0.0576, "step": 44110 }, { "epoch": 17.13, "learning_rate": 1.7715469255663433e-05, "loss": 0.13, "step": 44120 }, { "epoch": 17.14, "learning_rate": 1.771495145631068e-05, "loss": 0.123, "step": 44130 }, { "epoch": 17.14, "learning_rate": 1.771443365695793e-05, "loss": 0.0775, "step": 44140 }, { "epoch": 17.15, "learning_rate": 1.771391585760518e-05, "loss": 0.1593, "step": 44150 }, { "epoch": 17.15, "learning_rate": 1.7713398058252428e-05, "loss": 0.1236, "step": 44160 }, { "epoch": 17.15, "learning_rate": 1.771288025889968e-05, "loss": 0.0757, "step": 44170 }, { "epoch": 17.16, "learning_rate": 1.7712362459546927e-05, "loss": 0.0481, "step": 44180 }, { "epoch": 17.16, "learning_rate": 1.7711844660194175e-05, "loss": 0.2573, "step": 44190 }, { "epoch": 17.17, "learning_rate": 1.7711326860841426e-05, "loss": 0.2014, "step": 44200 }, { "epoch": 17.17, "learning_rate": 1.7710809061488674e-05, "loss": 0.2625, "step": 44210 }, { "epoch": 17.17, "learning_rate": 1.7710291262135922e-05, "loss": 0.1473, "step": 44220 }, { "epoch": 17.18, "learning_rate": 1.7709773462783173e-05, "loss": 0.0144, "step": 44230 }, { "epoch": 17.18, "learning_rate": 1.770925566343042e-05, "loss": 0.1752, "step": 44240 }, { "epoch": 17.18, "learning_rate": 1.7708737864077673e-05, "loss": 0.1109, "step": 44250 }, { "epoch": 17.19, "learning_rate": 1.770822006472492e-05, "loss": 0.1503, "step": 44260 }, { "epoch": 17.19, "learning_rate": 1.770770226537217e-05, "loss": 0.2252, "step": 44270 }, { "epoch": 17.2, "learning_rate": 1.770718446601942e-05, "loss": 0.107, "step": 44280 }, { "epoch": 17.2, "learning_rate": 1.7706666666666668e-05, "loss": 0.0922, "step": 44290 }, { "epoch": 17.2, "learning_rate": 1.7706148867313916e-05, "loss": 0.1514, "step": 44300 }, { "epoch": 17.21, "learning_rate": 1.7705631067961167e-05, "loss": 0.0022, "step": 44310 }, { "epoch": 17.21, "learning_rate": 1.7705113268608415e-05, "loss": 0.0029, "step": 44320 }, { "epoch": 17.22, "learning_rate": 1.7704595469255666e-05, "loss": 0.1095, "step": 44330 }, { "epoch": 17.22, "learning_rate": 1.7704077669902914e-05, "loss": 0.0231, "step": 44340 }, { "epoch": 17.22, "learning_rate": 1.7703559870550165e-05, "loss": 0.1088, "step": 44350 }, { "epoch": 17.23, "learning_rate": 1.770304207119741e-05, "loss": 0.0391, "step": 44360 }, { "epoch": 17.23, "learning_rate": 1.770252427184466e-05, "loss": 0.226, "step": 44370 }, { "epoch": 17.23, "learning_rate": 1.770200647249191e-05, "loss": 0.1613, "step": 44380 }, { "epoch": 17.24, "learning_rate": 1.770148867313916e-05, "loss": 0.0935, "step": 44390 }, { "epoch": 17.24, "learning_rate": 1.770097087378641e-05, "loss": 0.1589, "step": 44400 }, { "epoch": 17.25, "learning_rate": 1.770045307443366e-05, "loss": 0.1347, "step": 44410 }, { "epoch": 17.25, "learning_rate": 1.7699935275080908e-05, "loss": 0.1847, "step": 44420 }, { "epoch": 17.25, "learning_rate": 1.769941747572816e-05, "loss": 0.1616, "step": 44430 }, { "epoch": 17.26, "learning_rate": 1.7698899676375403e-05, "loss": 0.1403, "step": 44440 }, { "epoch": 17.26, "learning_rate": 1.7698381877022655e-05, "loss": 0.0986, "step": 44450 }, { "epoch": 17.27, "learning_rate": 1.7697864077669903e-05, "loss": 0.0329, "step": 44460 }, { "epoch": 17.27, "learning_rate": 1.7697346278317154e-05, "loss": 0.2119, "step": 44470 }, { "epoch": 17.27, "learning_rate": 1.7696828478964402e-05, "loss": 0.0905, "step": 44480 }, { "epoch": 17.28, "learning_rate": 1.7696310679611653e-05, "loss": 0.0675, "step": 44490 }, { "epoch": 17.28, "learning_rate": 1.76957928802589e-05, "loss": 0.1257, "step": 44500 }, { "epoch": 17.29, "learning_rate": 1.7695275080906152e-05, "loss": 0.1056, "step": 44510 }, { "epoch": 17.29, "learning_rate": 1.7694757281553397e-05, "loss": 0.1767, "step": 44520 }, { "epoch": 17.29, "learning_rate": 1.769423948220065e-05, "loss": 0.0688, "step": 44530 }, { "epoch": 17.3, "learning_rate": 1.7693721682847896e-05, "loss": 0.0957, "step": 44540 }, { "epoch": 17.3, "learning_rate": 1.7693203883495148e-05, "loss": 0.1691, "step": 44550 }, { "epoch": 17.3, "learning_rate": 1.7692686084142396e-05, "loss": 0.1554, "step": 44560 }, { "epoch": 17.31, "learning_rate": 1.7692168284789647e-05, "loss": 0.0364, "step": 44570 }, { "epoch": 17.31, "learning_rate": 1.7691650485436895e-05, "loss": 0.05, "step": 44580 }, { "epoch": 17.32, "learning_rate": 1.7691132686084146e-05, "loss": 0.0879, "step": 44590 }, { "epoch": 17.32, "learning_rate": 1.769061488673139e-05, "loss": 0.1228, "step": 44600 }, { "epoch": 17.32, "learning_rate": 1.7690097087378642e-05, "loss": 0.1043, "step": 44610 }, { "epoch": 17.33, "learning_rate": 1.768957928802589e-05, "loss": 0.0611, "step": 44620 }, { "epoch": 17.33, "learning_rate": 1.768906148867314e-05, "loss": 0.0846, "step": 44630 }, { "epoch": 17.34, "learning_rate": 1.768854368932039e-05, "loss": 0.1771, "step": 44640 }, { "epoch": 17.34, "learning_rate": 1.768802588996764e-05, "loss": 0.0085, "step": 44650 }, { "epoch": 17.34, "learning_rate": 1.7687508090614888e-05, "loss": 0.1479, "step": 44660 }, { "epoch": 17.35, "learning_rate": 1.768699029126214e-05, "loss": 0.1477, "step": 44670 }, { "epoch": 17.35, "learning_rate": 1.7686472491909384e-05, "loss": 0.0772, "step": 44680 }, { "epoch": 17.36, "learning_rate": 1.7685954692556635e-05, "loss": 0.0692, "step": 44690 }, { "epoch": 17.36, "learning_rate": 1.7685436893203883e-05, "loss": 0.0737, "step": 44700 }, { "epoch": 17.36, "learning_rate": 1.7684919093851135e-05, "loss": 0.1382, "step": 44710 }, { "epoch": 17.37, "learning_rate": 1.7684401294498383e-05, "loss": 0.1664, "step": 44720 }, { "epoch": 17.37, "learning_rate": 1.7683883495145634e-05, "loss": 0.1907, "step": 44730 }, { "epoch": 17.37, "learning_rate": 1.7683365695792882e-05, "loss": 0.0496, "step": 44740 }, { "epoch": 17.38, "learning_rate": 1.7682847896440133e-05, "loss": 0.1154, "step": 44750 }, { "epoch": 17.38, "learning_rate": 1.7682330097087378e-05, "loss": 0.1828, "step": 44760 }, { "epoch": 17.39, "learning_rate": 1.768181229773463e-05, "loss": 0.1881, "step": 44770 }, { "epoch": 17.39, "learning_rate": 1.7681294498381877e-05, "loss": 0.0786, "step": 44780 }, { "epoch": 17.39, "learning_rate": 1.7680776699029128e-05, "loss": 0.0438, "step": 44790 }, { "epoch": 17.4, "learning_rate": 1.7680258899676376e-05, "loss": 0.112, "step": 44800 }, { "epoch": 17.4, "learning_rate": 1.7679741100323627e-05, "loss": 0.2575, "step": 44810 }, { "epoch": 17.41, "learning_rate": 1.7679223300970875e-05, "loss": 0.1025, "step": 44820 }, { "epoch": 17.41, "learning_rate": 1.7678705501618127e-05, "loss": 0.1471, "step": 44830 }, { "epoch": 17.41, "learning_rate": 1.767818770226537e-05, "loss": 0.1146, "step": 44840 }, { "epoch": 17.42, "learning_rate": 1.7677669902912623e-05, "loss": 0.0921, "step": 44850 }, { "epoch": 17.42, "learning_rate": 1.767715210355987e-05, "loss": 0.1402, "step": 44860 }, { "epoch": 17.43, "learning_rate": 1.7676634304207122e-05, "loss": 0.1369, "step": 44870 }, { "epoch": 17.43, "learning_rate": 1.767611650485437e-05, "loss": 0.1052, "step": 44880 }, { "epoch": 17.43, "learning_rate": 1.767559870550162e-05, "loss": 0.0109, "step": 44890 }, { "epoch": 17.44, "learning_rate": 1.767508090614887e-05, "loss": 0.0268, "step": 44900 }, { "epoch": 17.44, "learning_rate": 1.767456310679612e-05, "loss": 0.0947, "step": 44910 }, { "epoch": 17.44, "learning_rate": 1.7674045307443368e-05, "loss": 0.015, "step": 44920 }, { "epoch": 17.45, "learning_rate": 1.7673527508090616e-05, "loss": 0.0845, "step": 44930 }, { "epoch": 17.45, "learning_rate": 1.7673009708737864e-05, "loss": 0.2021, "step": 44940 }, { "epoch": 17.46, "learning_rate": 1.7672491909385115e-05, "loss": 0.2342, "step": 44950 }, { "epoch": 17.46, "learning_rate": 1.7671974110032363e-05, "loss": 0.0376, "step": 44960 }, { "epoch": 17.46, "learning_rate": 1.7671456310679615e-05, "loss": 0.0525, "step": 44970 }, { "epoch": 17.47, "learning_rate": 1.7670938511326863e-05, "loss": 0.4019, "step": 44980 }, { "epoch": 17.47, "learning_rate": 1.7670420711974114e-05, "loss": 0.1389, "step": 44990 }, { "epoch": 17.48, "learning_rate": 1.7669902912621362e-05, "loss": 0.0845, "step": 45000 }, { "epoch": 17.48, "learning_rate": 1.766938511326861e-05, "loss": 0.1614, "step": 45010 }, { "epoch": 17.48, "learning_rate": 1.7668867313915858e-05, "loss": 0.0573, "step": 45020 }, { "epoch": 17.49, "learning_rate": 1.766834951456311e-05, "loss": 0.1661, "step": 45030 }, { "epoch": 17.49, "learning_rate": 1.7667831715210357e-05, "loss": 0.1016, "step": 45040 }, { "epoch": 17.5, "learning_rate": 1.7667313915857608e-05, "loss": 0.1448, "step": 45050 }, { "epoch": 17.5, "learning_rate": 1.7666796116504856e-05, "loss": 0.0516, "step": 45060 }, { "epoch": 17.5, "learning_rate": 1.7666278317152107e-05, "loss": 0.0476, "step": 45070 }, { "epoch": 17.51, "learning_rate": 1.7665760517799355e-05, "loss": 0.0762, "step": 45080 }, { "epoch": 17.51, "learning_rate": 1.7665242718446603e-05, "loss": 0.2329, "step": 45090 }, { "epoch": 17.51, "learning_rate": 1.766472491909385e-05, "loss": 0.1523, "step": 45100 }, { "epoch": 17.52, "learning_rate": 1.7664207119741102e-05, "loss": 0.1185, "step": 45110 }, { "epoch": 17.52, "learning_rate": 1.766368932038835e-05, "loss": 0.1227, "step": 45120 }, { "epoch": 17.53, "learning_rate": 1.7663171521035602e-05, "loss": 0.1114, "step": 45130 }, { "epoch": 17.53, "learning_rate": 1.766265372168285e-05, "loss": 0.2574, "step": 45140 }, { "epoch": 17.53, "learning_rate": 1.7662135922330098e-05, "loss": 0.0526, "step": 45150 }, { "epoch": 17.54, "learning_rate": 1.766161812297735e-05, "loss": 0.1223, "step": 45160 }, { "epoch": 17.54, "learning_rate": 1.7661100323624597e-05, "loss": 0.0685, "step": 45170 }, { "epoch": 17.55, "learning_rate": 1.7660582524271845e-05, "loss": 0.2692, "step": 45180 }, { "epoch": 17.55, "learning_rate": 1.7660064724919096e-05, "loss": 0.0724, "step": 45190 }, { "epoch": 17.55, "learning_rate": 1.7659546925566344e-05, "loss": 0.077, "step": 45200 }, { "epoch": 17.56, "learning_rate": 1.7659029126213595e-05, "loss": 0.1528, "step": 45210 }, { "epoch": 17.56, "learning_rate": 1.7658511326860843e-05, "loss": 0.0472, "step": 45220 }, { "epoch": 17.57, "learning_rate": 1.765799352750809e-05, "loss": 0.0096, "step": 45230 }, { "epoch": 17.57, "learning_rate": 1.7657475728155342e-05, "loss": 0.0132, "step": 45240 }, { "epoch": 17.57, "learning_rate": 1.765695792880259e-05, "loss": 0.1671, "step": 45250 }, { "epoch": 17.58, "learning_rate": 1.7656440129449838e-05, "loss": 0.036, "step": 45260 }, { "epoch": 17.58, "learning_rate": 1.765592233009709e-05, "loss": 0.2875, "step": 45270 }, { "epoch": 17.58, "learning_rate": 1.7655404530744338e-05, "loss": 0.1838, "step": 45280 }, { "epoch": 17.59, "learning_rate": 1.765488673139159e-05, "loss": 0.0885, "step": 45290 }, { "epoch": 17.59, "learning_rate": 1.7654368932038837e-05, "loss": 0.1035, "step": 45300 }, { "epoch": 17.6, "learning_rate": 1.7653851132686085e-05, "loss": 0.2421, "step": 45310 }, { "epoch": 17.6, "learning_rate": 1.7653333333333336e-05, "loss": 0.2589, "step": 45320 }, { "epoch": 17.6, "learning_rate": 1.7652815533980584e-05, "loss": 0.0281, "step": 45330 }, { "epoch": 17.61, "learning_rate": 1.7652297734627832e-05, "loss": 0.132, "step": 45340 }, { "epoch": 17.61, "learning_rate": 1.7651779935275083e-05, "loss": 0.1197, "step": 45350 }, { "epoch": 17.62, "learning_rate": 1.765126213592233e-05, "loss": 0.2507, "step": 45360 }, { "epoch": 17.62, "learning_rate": 1.7650744336569582e-05, "loss": 0.0365, "step": 45370 }, { "epoch": 17.62, "learning_rate": 1.765022653721683e-05, "loss": 0.0657, "step": 45380 }, { "epoch": 17.63, "learning_rate": 1.7649708737864078e-05, "loss": 0.2257, "step": 45390 }, { "epoch": 17.63, "learning_rate": 1.764919093851133e-05, "loss": 0.0685, "step": 45400 }, { "epoch": 17.63, "learning_rate": 1.7648673139158577e-05, "loss": 0.0575, "step": 45410 }, { "epoch": 17.64, "learning_rate": 1.7648155339805825e-05, "loss": 0.1274, "step": 45420 }, { "epoch": 17.64, "learning_rate": 1.7647637540453077e-05, "loss": 0.1426, "step": 45430 }, { "epoch": 17.65, "learning_rate": 1.7647119741100325e-05, "loss": 0.1887, "step": 45440 }, { "epoch": 17.65, "learning_rate": 1.7646601941747576e-05, "loss": 0.2834, "step": 45450 }, { "epoch": 17.65, "learning_rate": 1.7646084142394824e-05, "loss": 0.3783, "step": 45460 }, { "epoch": 17.66, "learning_rate": 1.7645566343042072e-05, "loss": 0.1254, "step": 45470 }, { "epoch": 17.66, "learning_rate": 1.7645048543689323e-05, "loss": 0.0853, "step": 45480 }, { "epoch": 17.67, "learning_rate": 1.764453074433657e-05, "loss": 0.1149, "step": 45490 }, { "epoch": 17.67, "learning_rate": 1.764401294498382e-05, "loss": 0.1192, "step": 45500 }, { "epoch": 17.67, "learning_rate": 1.764349514563107e-05, "loss": 0.0112, "step": 45510 }, { "epoch": 17.68, "learning_rate": 1.7642977346278318e-05, "loss": 0.1331, "step": 45520 }, { "epoch": 17.68, "learning_rate": 1.7642459546925566e-05, "loss": 0.1607, "step": 45530 }, { "epoch": 17.69, "learning_rate": 1.7641941747572817e-05, "loss": 0.0372, "step": 45540 }, { "epoch": 17.69, "learning_rate": 1.7641423948220065e-05, "loss": 0.0667, "step": 45550 }, { "epoch": 17.69, "learning_rate": 1.7640906148867317e-05, "loss": 0.1425, "step": 45560 }, { "epoch": 17.7, "learning_rate": 1.7640388349514565e-05, "loss": 0.1035, "step": 45570 }, { "epoch": 17.7, "learning_rate": 1.7639870550161812e-05, "loss": 0.1475, "step": 45580 }, { "epoch": 17.7, "learning_rate": 1.7639352750809064e-05, "loss": 0.1162, "step": 45590 }, { "epoch": 17.71, "learning_rate": 1.7638834951456312e-05, "loss": 0.3411, "step": 45600 }, { "epoch": 17.71, "learning_rate": 1.763831715210356e-05, "loss": 0.2146, "step": 45610 }, { "epoch": 17.72, "learning_rate": 1.763779935275081e-05, "loss": 0.1024, "step": 45620 }, { "epoch": 17.72, "learning_rate": 1.763728155339806e-05, "loss": 0.1062, "step": 45630 }, { "epoch": 17.72, "learning_rate": 1.763676375404531e-05, "loss": 0.1574, "step": 45640 }, { "epoch": 17.73, "learning_rate": 1.7636245954692558e-05, "loss": 0.0468, "step": 45650 }, { "epoch": 17.73, "learning_rate": 1.7635728155339806e-05, "loss": 0.065, "step": 45660 }, { "epoch": 17.74, "learning_rate": 1.7635210355987057e-05, "loss": 0.0823, "step": 45670 }, { "epoch": 17.74, "learning_rate": 1.7634692556634305e-05, "loss": 0.0243, "step": 45680 }, { "epoch": 17.74, "learning_rate": 1.7634174757281553e-05, "loss": 0.1374, "step": 45690 }, { "epoch": 17.75, "learning_rate": 1.7633656957928805e-05, "loss": 0.1334, "step": 45700 }, { "epoch": 17.75, "learning_rate": 1.7633139158576052e-05, "loss": 0.0994, "step": 45710 }, { "epoch": 17.76, "learning_rate": 1.7632621359223304e-05, "loss": 0.2081, "step": 45720 }, { "epoch": 17.76, "learning_rate": 1.763210355987055e-05, "loss": 0.1685, "step": 45730 }, { "epoch": 17.76, "learning_rate": 1.76315857605178e-05, "loss": 0.1136, "step": 45740 }, { "epoch": 17.77, "learning_rate": 1.763106796116505e-05, "loss": 0.1029, "step": 45750 }, { "epoch": 17.77, "learning_rate": 1.76305501618123e-05, "loss": 0.1598, "step": 45760 }, { "epoch": 17.77, "learning_rate": 1.7630032362459547e-05, "loss": 0.0964, "step": 45770 }, { "epoch": 17.78, "learning_rate": 1.7629514563106798e-05, "loss": 0.0707, "step": 45780 }, { "epoch": 17.78, "learning_rate": 1.7628996763754046e-05, "loss": 0.0628, "step": 45790 }, { "epoch": 17.79, "learning_rate": 1.7628478964401297e-05, "loss": 0.1575, "step": 45800 }, { "epoch": 17.79, "learning_rate": 1.7627961165048545e-05, "loss": 0.1441, "step": 45810 }, { "epoch": 17.79, "learning_rate": 1.7627443365695793e-05, "loss": 0.0448, "step": 45820 }, { "epoch": 17.8, "learning_rate": 1.762692556634304e-05, "loss": 0.1605, "step": 45830 }, { "epoch": 17.8, "learning_rate": 1.7626407766990292e-05, "loss": 0.0109, "step": 45840 }, { "epoch": 17.81, "learning_rate": 1.762588996763754e-05, "loss": 0.0072, "step": 45850 }, { "epoch": 17.81, "learning_rate": 1.762537216828479e-05, "loss": 0.114, "step": 45860 }, { "epoch": 17.81, "learning_rate": 1.762485436893204e-05, "loss": 0.0905, "step": 45870 }, { "epoch": 17.82, "learning_rate": 1.762433656957929e-05, "loss": 0.1212, "step": 45880 }, { "epoch": 17.82, "learning_rate": 1.762381877022654e-05, "loss": 0.1181, "step": 45890 }, { "epoch": 17.83, "learning_rate": 1.7623300970873787e-05, "loss": 0.169, "step": 45900 }, { "epoch": 17.83, "learning_rate": 1.7622783171521035e-05, "loss": 0.0953, "step": 45910 }, { "epoch": 17.83, "learning_rate": 1.7622265372168286e-05, "loss": 0.0757, "step": 45920 }, { "epoch": 17.84, "learning_rate": 1.7621747572815534e-05, "loss": 0.0775, "step": 45930 }, { "epoch": 17.84, "learning_rate": 1.7621229773462785e-05, "loss": 0.1722, "step": 45940 }, { "epoch": 17.84, "learning_rate": 1.7620711974110033e-05, "loss": 0.0659, "step": 45950 }, { "epoch": 17.85, "learning_rate": 1.7620194174757284e-05, "loss": 0.1339, "step": 45960 }, { "epoch": 17.85, "learning_rate": 1.7619676375404532e-05, "loss": 0.155, "step": 45970 }, { "epoch": 17.86, "learning_rate": 1.7619158576051784e-05, "loss": 0.2083, "step": 45980 }, { "epoch": 17.86, "learning_rate": 1.7618640776699028e-05, "loss": 0.2501, "step": 45990 }, { "epoch": 17.86, "learning_rate": 1.761812297734628e-05, "loss": 0.21, "step": 46000 }, { "epoch": 17.87, "learning_rate": 1.7617605177993527e-05, "loss": 0.1334, "step": 46010 }, { "epoch": 17.87, "learning_rate": 1.761708737864078e-05, "loss": 0.2606, "step": 46020 }, { "epoch": 17.88, "learning_rate": 1.7616569579288027e-05, "loss": 0.1457, "step": 46030 }, { "epoch": 17.88, "learning_rate": 1.7616051779935278e-05, "loss": 0.1713, "step": 46040 }, { "epoch": 17.88, "learning_rate": 1.7615533980582526e-05, "loss": 0.031, "step": 46050 }, { "epoch": 17.89, "learning_rate": 1.7615016181229777e-05, "loss": 0.137, "step": 46060 }, { "epoch": 17.89, "learning_rate": 1.7614498381877022e-05, "loss": 0.2195, "step": 46070 }, { "epoch": 17.9, "learning_rate": 1.7613980582524273e-05, "loss": 0.1182, "step": 46080 }, { "epoch": 17.9, "learning_rate": 1.761346278317152e-05, "loss": 0.0461, "step": 46090 }, { "epoch": 17.9, "learning_rate": 1.7612944983818772e-05, "loss": 0.1202, "step": 46100 }, { "epoch": 17.91, "learning_rate": 1.761242718446602e-05, "loss": 0.1419, "step": 46110 }, { "epoch": 17.91, "learning_rate": 1.761190938511327e-05, "loss": 0.2035, "step": 46120 }, { "epoch": 17.91, "learning_rate": 1.761139158576052e-05, "loss": 0.0719, "step": 46130 }, { "epoch": 17.92, "learning_rate": 1.761087378640777e-05, "loss": 0.1366, "step": 46140 }, { "epoch": 17.92, "learning_rate": 1.7610355987055015e-05, "loss": 0.2476, "step": 46150 }, { "epoch": 17.93, "learning_rate": 1.7609838187702267e-05, "loss": 0.1552, "step": 46160 }, { "epoch": 17.93, "learning_rate": 1.7609320388349515e-05, "loss": 0.0028, "step": 46170 }, { "epoch": 17.93, "learning_rate": 1.7608802588996766e-05, "loss": 0.1076, "step": 46180 }, { "epoch": 17.94, "learning_rate": 1.7608284789644014e-05, "loss": 0.1243, "step": 46190 }, { "epoch": 17.94, "learning_rate": 1.7607766990291265e-05, "loss": 0.1251, "step": 46200 }, { "epoch": 17.95, "learning_rate": 1.7607249190938513e-05, "loss": 0.0739, "step": 46210 }, { "epoch": 17.95, "learning_rate": 1.7606731391585764e-05, "loss": 0.1059, "step": 46220 }, { "epoch": 17.95, "learning_rate": 1.760621359223301e-05, "loss": 0.1499, "step": 46230 }, { "epoch": 17.96, "learning_rate": 1.760569579288026e-05, "loss": 0.0061, "step": 46240 }, { "epoch": 17.96, "learning_rate": 1.7605177993527508e-05, "loss": 0.0413, "step": 46250 }, { "epoch": 17.97, "learning_rate": 1.760466019417476e-05, "loss": 0.1038, "step": 46260 }, { "epoch": 17.97, "learning_rate": 1.7604142394822007e-05, "loss": 0.1615, "step": 46270 }, { "epoch": 17.97, "learning_rate": 1.760362459546926e-05, "loss": 0.1171, "step": 46280 }, { "epoch": 17.98, "learning_rate": 1.7603106796116507e-05, "loss": 0.0411, "step": 46290 }, { "epoch": 17.98, "learning_rate": 1.7602588996763758e-05, "loss": 0.0988, "step": 46300 }, { "epoch": 17.98, "learning_rate": 1.7602071197411002e-05, "loss": 0.2157, "step": 46310 }, { "epoch": 17.99, "learning_rate": 1.7601553398058254e-05, "loss": 0.0839, "step": 46320 }, { "epoch": 17.99, "learning_rate": 1.76010355987055e-05, "loss": 0.1509, "step": 46330 }, { "epoch": 18.0, "learning_rate": 1.7600517799352753e-05, "loss": 0.1595, "step": 46340 }, { "epoch": 18.0, "learning_rate": 1.76e-05, "loss": 0.0463, "step": 46350 }, { "epoch": 18.0, "eval_accuracy": 0.946079779917469, "eval_loss": 0.28520387411117554, "eval_runtime": 8.2227, "eval_samples_per_second": 442.068, "eval_steps_per_second": 55.335, "step": 46350 }, { "epoch": 18.0, "learning_rate": 1.7599482200647252e-05, "loss": 0.1042, "step": 46360 }, { "epoch": 18.01, "learning_rate": 1.75989644012945e-05, "loss": 0.1458, "step": 46370 }, { "epoch": 18.01, "learning_rate": 1.759844660194175e-05, "loss": 0.0071, "step": 46380 }, { "epoch": 18.02, "learning_rate": 1.7597928802588996e-05, "loss": 0.1704, "step": 46390 }, { "epoch": 18.02, "learning_rate": 1.7597411003236247e-05, "loss": 0.14, "step": 46400 }, { "epoch": 18.02, "learning_rate": 1.7596893203883495e-05, "loss": 0.2009, "step": 46410 }, { "epoch": 18.03, "learning_rate": 1.7596375404530747e-05, "loss": 0.1583, "step": 46420 }, { "epoch": 18.03, "learning_rate": 1.7595857605177994e-05, "loss": 0.1123, "step": 46430 }, { "epoch": 18.03, "learning_rate": 1.7595339805825246e-05, "loss": 0.0533, "step": 46440 }, { "epoch": 18.04, "learning_rate": 1.7594822006472494e-05, "loss": 0.1882, "step": 46450 }, { "epoch": 18.04, "learning_rate": 1.7594304207119745e-05, "loss": 0.2193, "step": 46460 }, { "epoch": 18.05, "learning_rate": 1.759378640776699e-05, "loss": 0.0626, "step": 46470 }, { "epoch": 18.05, "learning_rate": 1.759326860841424e-05, "loss": 0.0767, "step": 46480 }, { "epoch": 18.05, "learning_rate": 1.759275080906149e-05, "loss": 0.1081, "step": 46490 }, { "epoch": 18.06, "learning_rate": 1.759223300970874e-05, "loss": 0.3299, "step": 46500 }, { "epoch": 18.06, "learning_rate": 1.7591715210355988e-05, "loss": 0.2457, "step": 46510 }, { "epoch": 18.07, "learning_rate": 1.759119741100324e-05, "loss": 0.1593, "step": 46520 }, { "epoch": 18.07, "learning_rate": 1.7590679611650487e-05, "loss": 0.1365, "step": 46530 }, { "epoch": 18.07, "learning_rate": 1.759016181229774e-05, "loss": 0.0995, "step": 46540 }, { "epoch": 18.08, "learning_rate": 1.7589644012944986e-05, "loss": 0.2021, "step": 46550 }, { "epoch": 18.08, "learning_rate": 1.7589126213592234e-05, "loss": 0.1281, "step": 46560 }, { "epoch": 18.09, "learning_rate": 1.7588608414239482e-05, "loss": 0.0928, "step": 46570 }, { "epoch": 18.09, "learning_rate": 1.7588090614886734e-05, "loss": 0.1498, "step": 46580 }, { "epoch": 18.09, "learning_rate": 1.758757281553398e-05, "loss": 0.0905, "step": 46590 }, { "epoch": 18.1, "learning_rate": 1.7587055016181233e-05, "loss": 0.0382, "step": 46600 }, { "epoch": 18.1, "learning_rate": 1.758653721682848e-05, "loss": 0.1239, "step": 46610 }, { "epoch": 18.1, "learning_rate": 1.758601941747573e-05, "loss": 0.1121, "step": 46620 }, { "epoch": 18.11, "learning_rate": 1.758550161812298e-05, "loss": 0.1685, "step": 46630 }, { "epoch": 18.11, "learning_rate": 1.7584983818770228e-05, "loss": 0.1674, "step": 46640 }, { "epoch": 18.12, "learning_rate": 1.7584466019417476e-05, "loss": 0.0595, "step": 46650 }, { "epoch": 18.12, "learning_rate": 1.7583948220064727e-05, "loss": 0.0589, "step": 46660 }, { "epoch": 18.12, "learning_rate": 1.7583430420711975e-05, "loss": 0.1053, "step": 46670 }, { "epoch": 18.13, "learning_rate": 1.7582912621359226e-05, "loss": 0.0976, "step": 46680 }, { "epoch": 18.13, "learning_rate": 1.7582394822006474e-05, "loss": 0.0779, "step": 46690 }, { "epoch": 18.14, "learning_rate": 1.7581877022653722e-05, "loss": 0.0884, "step": 46700 }, { "epoch": 18.14, "learning_rate": 1.7581359223300974e-05, "loss": 0.059, "step": 46710 }, { "epoch": 18.14, "learning_rate": 1.758084142394822e-05, "loss": 0.1046, "step": 46720 }, { "epoch": 18.15, "learning_rate": 1.758032362459547e-05, "loss": 0.0854, "step": 46730 }, { "epoch": 18.15, "learning_rate": 1.757980582524272e-05, "loss": 0.1995, "step": 46740 }, { "epoch": 18.16, "learning_rate": 1.757928802588997e-05, "loss": 0.2225, "step": 46750 }, { "epoch": 18.16, "learning_rate": 1.757877022653722e-05, "loss": 0.0064, "step": 46760 }, { "epoch": 18.16, "learning_rate": 1.7578252427184468e-05, "loss": 0.1476, "step": 46770 }, { "epoch": 18.17, "learning_rate": 1.7577734627831716e-05, "loss": 0.2512, "step": 46780 }, { "epoch": 18.17, "learning_rate": 1.7577216828478967e-05, "loss": 0.1123, "step": 46790 }, { "epoch": 18.17, "learning_rate": 1.7576699029126215e-05, "loss": 0.0425, "step": 46800 }, { "epoch": 18.18, "learning_rate": 1.7576181229773463e-05, "loss": 0.0631, "step": 46810 }, { "epoch": 18.18, "learning_rate": 1.7575663430420714e-05, "loss": 0.2232, "step": 46820 }, { "epoch": 18.19, "learning_rate": 1.7575145631067962e-05, "loss": 0.1489, "step": 46830 }, { "epoch": 18.19, "learning_rate": 1.7574627831715214e-05, "loss": 0.0832, "step": 46840 }, { "epoch": 18.19, "learning_rate": 1.757411003236246e-05, "loss": 0.0781, "step": 46850 }, { "epoch": 18.2, "learning_rate": 1.757359223300971e-05, "loss": 0.1738, "step": 46860 }, { "epoch": 18.2, "learning_rate": 1.757307443365696e-05, "loss": 0.0749, "step": 46870 }, { "epoch": 18.21, "learning_rate": 1.757255663430421e-05, "loss": 0.1357, "step": 46880 }, { "epoch": 18.21, "learning_rate": 1.7572038834951457e-05, "loss": 0.2223, "step": 46890 }, { "epoch": 18.21, "learning_rate": 1.7571521035598708e-05, "loss": 0.1789, "step": 46900 }, { "epoch": 18.22, "learning_rate": 1.7571003236245956e-05, "loss": 0.0605, "step": 46910 }, { "epoch": 18.22, "learning_rate": 1.7570485436893204e-05, "loss": 0.0195, "step": 46920 }, { "epoch": 18.23, "learning_rate": 1.7569967637540455e-05, "loss": 0.0836, "step": 46930 }, { "epoch": 18.23, "learning_rate": 1.7569449838187703e-05, "loss": 0.1713, "step": 46940 }, { "epoch": 18.23, "learning_rate": 1.7568932038834954e-05, "loss": 0.1184, "step": 46950 }, { "epoch": 18.24, "learning_rate": 1.7568414239482202e-05, "loss": 0.1037, "step": 46960 }, { "epoch": 18.24, "learning_rate": 1.756789644012945e-05, "loss": 0.1028, "step": 46970 }, { "epoch": 18.24, "learning_rate": 1.75673786407767e-05, "loss": 0.0194, "step": 46980 }, { "epoch": 18.25, "learning_rate": 1.756686084142395e-05, "loss": 0.1779, "step": 46990 }, { "epoch": 18.25, "learning_rate": 1.7566343042071197e-05, "loss": 0.0167, "step": 47000 }, { "epoch": 18.26, "learning_rate": 1.756582524271845e-05, "loss": 0.0474, "step": 47010 }, { "epoch": 18.26, "learning_rate": 1.7565307443365697e-05, "loss": 0.0489, "step": 47020 }, { "epoch": 18.26, "learning_rate": 1.7564789644012948e-05, "loss": 0.0997, "step": 47030 }, { "epoch": 18.27, "learning_rate": 1.7564271844660196e-05, "loss": 0.1489, "step": 47040 }, { "epoch": 18.27, "learning_rate": 1.7563754045307444e-05, "loss": 0.0851, "step": 47050 }, { "epoch": 18.28, "learning_rate": 1.7563236245954695e-05, "loss": 0.1577, "step": 47060 }, { "epoch": 18.28, "learning_rate": 1.7562718446601943e-05, "loss": 0.0022, "step": 47070 }, { "epoch": 18.28, "learning_rate": 1.756220064724919e-05, "loss": 0.193, "step": 47080 }, { "epoch": 18.29, "learning_rate": 1.7561682847896442e-05, "loss": 0.2013, "step": 47090 }, { "epoch": 18.29, "learning_rate": 1.756116504854369e-05, "loss": 0.123, "step": 47100 }, { "epoch": 18.3, "learning_rate": 1.756064724919094e-05, "loss": 0.1383, "step": 47110 }, { "epoch": 18.3, "learning_rate": 1.756012944983819e-05, "loss": 0.0664, "step": 47120 }, { "epoch": 18.3, "learning_rate": 1.7559611650485437e-05, "loss": 0.0822, "step": 47130 }, { "epoch": 18.31, "learning_rate": 1.755909385113269e-05, "loss": 0.0078, "step": 47140 }, { "epoch": 18.31, "learning_rate": 1.7558576051779936e-05, "loss": 0.1105, "step": 47150 }, { "epoch": 18.31, "learning_rate": 1.7558058252427184e-05, "loss": 0.1319, "step": 47160 }, { "epoch": 18.32, "learning_rate": 1.7557540453074436e-05, "loss": 0.1639, "step": 47170 }, { "epoch": 18.32, "learning_rate": 1.7557022653721684e-05, "loss": 0.1652, "step": 47180 }, { "epoch": 18.33, "learning_rate": 1.7556504854368935e-05, "loss": 0.2102, "step": 47190 }, { "epoch": 18.33, "learning_rate": 1.7555987055016183e-05, "loss": 0.0331, "step": 47200 }, { "epoch": 18.33, "learning_rate": 1.755546925566343e-05, "loss": 0.1704, "step": 47210 }, { "epoch": 18.34, "learning_rate": 1.7554951456310682e-05, "loss": 0.182, "step": 47220 }, { "epoch": 18.34, "learning_rate": 1.755443365695793e-05, "loss": 0.0544, "step": 47230 }, { "epoch": 18.35, "learning_rate": 1.7553915857605178e-05, "loss": 0.0668, "step": 47240 }, { "epoch": 18.35, "learning_rate": 1.755339805825243e-05, "loss": 0.2502, "step": 47250 }, { "epoch": 18.35, "learning_rate": 1.7552880258899677e-05, "loss": 0.1947, "step": 47260 }, { "epoch": 18.36, "learning_rate": 1.755236245954693e-05, "loss": 0.0603, "step": 47270 }, { "epoch": 18.36, "learning_rate": 1.7551844660194176e-05, "loss": 0.1317, "step": 47280 }, { "epoch": 18.37, "learning_rate": 1.7551326860841424e-05, "loss": 0.0755, "step": 47290 }, { "epoch": 18.37, "learning_rate": 1.7550809061488672e-05, "loss": 0.179, "step": 47300 }, { "epoch": 18.37, "learning_rate": 1.7550291262135924e-05, "loss": 0.1046, "step": 47310 }, { "epoch": 18.38, "learning_rate": 1.754977346278317e-05, "loss": 0.1702, "step": 47320 }, { "epoch": 18.38, "learning_rate": 1.7549255663430423e-05, "loss": 0.2192, "step": 47330 }, { "epoch": 18.38, "learning_rate": 1.754873786407767e-05, "loss": 0.0659, "step": 47340 }, { "epoch": 18.39, "learning_rate": 1.7548220064724922e-05, "loss": 0.1499, "step": 47350 }, { "epoch": 18.39, "learning_rate": 1.754770226537217e-05, "loss": 0.0945, "step": 47360 }, { "epoch": 18.4, "learning_rate": 1.7547184466019418e-05, "loss": 0.0475, "step": 47370 }, { "epoch": 18.4, "learning_rate": 1.7546666666666666e-05, "loss": 0.1128, "step": 47380 }, { "epoch": 18.4, "learning_rate": 1.7546148867313917e-05, "loss": 0.1344, "step": 47390 }, { "epoch": 18.41, "learning_rate": 1.7545631067961165e-05, "loss": 0.156, "step": 47400 }, { "epoch": 18.41, "learning_rate": 1.7545113268608416e-05, "loss": 0.1078, "step": 47410 }, { "epoch": 18.42, "learning_rate": 1.7544595469255664e-05, "loss": 0.1085, "step": 47420 }, { "epoch": 18.42, "learning_rate": 1.7544077669902916e-05, "loss": 0.1383, "step": 47430 }, { "epoch": 18.42, "learning_rate": 1.7543559870550164e-05, "loss": 0.0491, "step": 47440 }, { "epoch": 18.43, "learning_rate": 1.754304207119741e-05, "loss": 0.2082, "step": 47450 }, { "epoch": 18.43, "learning_rate": 1.754252427184466e-05, "loss": 0.0286, "step": 47460 }, { "epoch": 18.43, "learning_rate": 1.754200647249191e-05, "loss": 0.1292, "step": 47470 }, { "epoch": 18.44, "learning_rate": 1.754148867313916e-05, "loss": 0.1905, "step": 47480 }, { "epoch": 18.44, "learning_rate": 1.754097087378641e-05, "loss": 0.0992, "step": 47490 }, { "epoch": 18.45, "learning_rate": 1.7540453074433658e-05, "loss": 0.02, "step": 47500 }, { "epoch": 18.45, "learning_rate": 1.753993527508091e-05, "loss": 0.0727, "step": 47510 }, { "epoch": 18.45, "learning_rate": 1.7539417475728157e-05, "loss": 0.1293, "step": 47520 }, { "epoch": 18.46, "learning_rate": 1.7538899676375405e-05, "loss": 0.3012, "step": 47530 }, { "epoch": 18.46, "learning_rate": 1.7538381877022653e-05, "loss": 0.1126, "step": 47540 }, { "epoch": 18.47, "learning_rate": 1.7537864077669904e-05, "loss": 0.2203, "step": 47550 }, { "epoch": 18.47, "learning_rate": 1.7537346278317152e-05, "loss": 0.1524, "step": 47560 }, { "epoch": 18.47, "learning_rate": 1.7536828478964403e-05, "loss": 0.0863, "step": 47570 }, { "epoch": 18.48, "learning_rate": 1.753631067961165e-05, "loss": 0.062, "step": 47580 }, { "epoch": 18.48, "learning_rate": 1.7535792880258903e-05, "loss": 0.0528, "step": 47590 }, { "epoch": 18.49, "learning_rate": 1.753527508090615e-05, "loss": 0.1061, "step": 47600 }, { "epoch": 18.49, "learning_rate": 1.75347572815534e-05, "loss": 0.3054, "step": 47610 }, { "epoch": 18.49, "learning_rate": 1.7534239482200646e-05, "loss": 0.096, "step": 47620 }, { "epoch": 18.5, "learning_rate": 1.7533721682847898e-05, "loss": 0.188, "step": 47630 }, { "epoch": 18.5, "learning_rate": 1.7533203883495146e-05, "loss": 0.142, "step": 47640 }, { "epoch": 18.5, "learning_rate": 1.7532686084142397e-05, "loss": 0.0408, "step": 47650 }, { "epoch": 18.51, "learning_rate": 1.7532168284789645e-05, "loss": 0.1064, "step": 47660 }, { "epoch": 18.51, "learning_rate": 1.7531650485436896e-05, "loss": 0.1284, "step": 47670 }, { "epoch": 18.52, "learning_rate": 1.7531132686084144e-05, "loss": 0.093, "step": 47680 }, { "epoch": 18.52, "learning_rate": 1.7530614886731396e-05, "loss": 0.2373, "step": 47690 }, { "epoch": 18.52, "learning_rate": 1.753009708737864e-05, "loss": 0.1456, "step": 47700 }, { "epoch": 18.53, "learning_rate": 1.752957928802589e-05, "loss": 0.2189, "step": 47710 }, { "epoch": 18.53, "learning_rate": 1.752906148867314e-05, "loss": 0.1447, "step": 47720 }, { "epoch": 18.54, "learning_rate": 1.752854368932039e-05, "loss": 0.2176, "step": 47730 }, { "epoch": 18.54, "learning_rate": 1.752802588996764e-05, "loss": 0.1511, "step": 47740 }, { "epoch": 18.54, "learning_rate": 1.752750809061489e-05, "loss": 0.1429, "step": 47750 }, { "epoch": 18.55, "learning_rate": 1.7526990291262138e-05, "loss": 0.1704, "step": 47760 }, { "epoch": 18.55, "learning_rate": 1.752647249190939e-05, "loss": 0.2922, "step": 47770 }, { "epoch": 18.56, "learning_rate": 1.7525954692556634e-05, "loss": 0.0829, "step": 47780 }, { "epoch": 18.56, "learning_rate": 1.7525436893203885e-05, "loss": 0.084, "step": 47790 }, { "epoch": 18.56, "learning_rate": 1.7524919093851133e-05, "loss": 0.0331, "step": 47800 }, { "epoch": 18.57, "learning_rate": 1.7524401294498384e-05, "loss": 0.0745, "step": 47810 }, { "epoch": 18.57, "learning_rate": 1.7523883495145632e-05, "loss": 0.1303, "step": 47820 }, { "epoch": 18.57, "learning_rate": 1.7523365695792883e-05, "loss": 0.1263, "step": 47830 }, { "epoch": 18.58, "learning_rate": 1.752284789644013e-05, "loss": 0.0674, "step": 47840 }, { "epoch": 18.58, "learning_rate": 1.7522330097087383e-05, "loss": 0.0788, "step": 47850 }, { "epoch": 18.59, "learning_rate": 1.7521812297734627e-05, "loss": 0.0803, "step": 47860 }, { "epoch": 18.59, "learning_rate": 1.752129449838188e-05, "loss": 0.1326, "step": 47870 }, { "epoch": 18.59, "learning_rate": 1.7520776699029126e-05, "loss": 0.0636, "step": 47880 }, { "epoch": 18.6, "learning_rate": 1.7520258899676378e-05, "loss": 0.1998, "step": 47890 }, { "epoch": 18.6, "learning_rate": 1.7519741100323626e-05, "loss": 0.0272, "step": 47900 }, { "epoch": 18.61, "learning_rate": 1.7519223300970877e-05, "loss": 0.1409, "step": 47910 }, { "epoch": 18.61, "learning_rate": 1.7518705501618125e-05, "loss": 0.1324, "step": 47920 }, { "epoch": 18.61, "learning_rate": 1.7518187702265376e-05, "loss": 0.2306, "step": 47930 }, { "epoch": 18.62, "learning_rate": 1.751766990291262e-05, "loss": 0.0745, "step": 47940 }, { "epoch": 18.62, "learning_rate": 1.7517152103559872e-05, "loss": 0.2658, "step": 47950 }, { "epoch": 18.63, "learning_rate": 1.751663430420712e-05, "loss": 0.0583, "step": 47960 }, { "epoch": 18.63, "learning_rate": 1.751611650485437e-05, "loss": 0.0448, "step": 47970 }, { "epoch": 18.63, "learning_rate": 1.751559870550162e-05, "loss": 0.1412, "step": 47980 }, { "epoch": 18.64, "learning_rate": 1.751508090614887e-05, "loss": 0.1098, "step": 47990 }, { "epoch": 18.64, "learning_rate": 1.751456310679612e-05, "loss": 0.1051, "step": 48000 }, { "epoch": 18.64, "learning_rate": 1.751404530744337e-05, "loss": 0.1473, "step": 48010 }, { "epoch": 18.65, "learning_rate": 1.7513527508090614e-05, "loss": 0.2206, "step": 48020 }, { "epoch": 18.65, "learning_rate": 1.7513009708737866e-05, "loss": 0.1427, "step": 48030 }, { "epoch": 18.66, "learning_rate": 1.7512491909385114e-05, "loss": 0.3028, "step": 48040 }, { "epoch": 18.66, "learning_rate": 1.7511974110032365e-05, "loss": 0.2534, "step": 48050 }, { "epoch": 18.66, "learning_rate": 1.7511456310679613e-05, "loss": 0.1399, "step": 48060 }, { "epoch": 18.67, "learning_rate": 1.7510938511326864e-05, "loss": 0.0056, "step": 48070 }, { "epoch": 18.67, "learning_rate": 1.7510420711974112e-05, "loss": 0.3489, "step": 48080 }, { "epoch": 18.68, "learning_rate": 1.750990291262136e-05, "loss": 0.0459, "step": 48090 }, { "epoch": 18.68, "learning_rate": 1.7509385113268608e-05, "loss": 0.0334, "step": 48100 }, { "epoch": 18.68, "learning_rate": 1.750886731391586e-05, "loss": 0.2026, "step": 48110 }, { "epoch": 18.69, "learning_rate": 1.7508349514563107e-05, "loss": 0.0388, "step": 48120 }, { "epoch": 18.69, "learning_rate": 1.750783171521036e-05, "loss": 0.1521, "step": 48130 }, { "epoch": 18.7, "learning_rate": 1.7507313915857606e-05, "loss": 0.0793, "step": 48140 }, { "epoch": 18.7, "learning_rate": 1.7506796116504858e-05, "loss": 0.0712, "step": 48150 }, { "epoch": 18.7, "learning_rate": 1.7506278317152106e-05, "loss": 0.15, "step": 48160 }, { "epoch": 18.71, "learning_rate": 1.7505760517799353e-05, "loss": 0.1408, "step": 48170 }, { "epoch": 18.71, "learning_rate": 1.75052427184466e-05, "loss": 0.2519, "step": 48180 }, { "epoch": 18.71, "learning_rate": 1.7504724919093853e-05, "loss": 0.1139, "step": 48190 }, { "epoch": 18.72, "learning_rate": 1.75042071197411e-05, "loss": 0.1582, "step": 48200 }, { "epoch": 18.72, "learning_rate": 1.7503689320388352e-05, "loss": 0.3006, "step": 48210 }, { "epoch": 18.73, "learning_rate": 1.75031715210356e-05, "loss": 0.1292, "step": 48220 }, { "epoch": 18.73, "learning_rate": 1.750265372168285e-05, "loss": 0.1404, "step": 48230 }, { "epoch": 18.73, "learning_rate": 1.75021359223301e-05, "loss": 0.0875, "step": 48240 }, { "epoch": 18.74, "learning_rate": 1.7501618122977347e-05, "loss": 0.0996, "step": 48250 }, { "epoch": 18.74, "learning_rate": 1.75011003236246e-05, "loss": 0.0956, "step": 48260 }, { "epoch": 18.75, "learning_rate": 1.7500582524271846e-05, "loss": 0.2705, "step": 48270 }, { "epoch": 18.75, "learning_rate": 1.7500064724919094e-05, "loss": 0.1118, "step": 48280 }, { "epoch": 18.75, "learning_rate": 1.7499546925566345e-05, "loss": 0.027, "step": 48290 }, { "epoch": 18.76, "learning_rate": 1.7499029126213593e-05, "loss": 0.0379, "step": 48300 }, { "epoch": 18.76, "learning_rate": 1.7498511326860845e-05, "loss": 0.1472, "step": 48310 }, { "epoch": 18.77, "learning_rate": 1.7497993527508093e-05, "loss": 0.0388, "step": 48320 }, { "epoch": 18.77, "learning_rate": 1.749747572815534e-05, "loss": 0.1289, "step": 48330 }, { "epoch": 18.77, "learning_rate": 1.7496957928802592e-05, "loss": 0.0486, "step": 48340 }, { "epoch": 18.78, "learning_rate": 1.749644012944984e-05, "loss": 0.1065, "step": 48350 }, { "epoch": 18.78, "learning_rate": 1.7495922330097088e-05, "loss": 0.1937, "step": 48360 }, { "epoch": 18.78, "learning_rate": 1.749540453074434e-05, "loss": 0.1527, "step": 48370 }, { "epoch": 18.79, "learning_rate": 1.7494886731391587e-05, "loss": 0.1009, "step": 48380 }, { "epoch": 18.79, "learning_rate": 1.7494368932038835e-05, "loss": 0.0856, "step": 48390 }, { "epoch": 18.8, "learning_rate": 1.7493851132686086e-05, "loss": 0.0128, "step": 48400 }, { "epoch": 18.8, "learning_rate": 1.7493333333333334e-05, "loss": 0.0982, "step": 48410 }, { "epoch": 18.8, "learning_rate": 1.7492815533980585e-05, "loss": 0.1788, "step": 48420 }, { "epoch": 18.81, "learning_rate": 1.7492297734627833e-05, "loss": 0.0834, "step": 48430 }, { "epoch": 18.81, "learning_rate": 1.749177993527508e-05, "loss": 0.0765, "step": 48440 }, { "epoch": 18.82, "learning_rate": 1.7491262135922333e-05, "loss": 0.1937, "step": 48450 }, { "epoch": 18.82, "learning_rate": 1.749074433656958e-05, "loss": 0.1023, "step": 48460 }, { "epoch": 18.82, "learning_rate": 1.749022653721683e-05, "loss": 0.1177, "step": 48470 }, { "epoch": 18.83, "learning_rate": 1.748970873786408e-05, "loss": 0.0062, "step": 48480 }, { "epoch": 18.83, "learning_rate": 1.7489190938511328e-05, "loss": 0.0574, "step": 48490 }, { "epoch": 18.83, "learning_rate": 1.748867313915858e-05, "loss": 0.0262, "step": 48500 }, { "epoch": 18.84, "learning_rate": 1.7488155339805827e-05, "loss": 0.1049, "step": 48510 }, { "epoch": 18.84, "learning_rate": 1.7487637540453075e-05, "loss": 0.2655, "step": 48520 }, { "epoch": 18.85, "learning_rate": 1.7487119741100326e-05, "loss": 0.1562, "step": 48530 }, { "epoch": 18.85, "learning_rate": 1.7486601941747574e-05, "loss": 0.207, "step": 48540 }, { "epoch": 18.85, "learning_rate": 1.7486084142394822e-05, "loss": 0.0418, "step": 48550 }, { "epoch": 18.86, "learning_rate": 1.7485566343042073e-05, "loss": 0.1027, "step": 48560 }, { "epoch": 18.86, "learning_rate": 1.748504854368932e-05, "loss": 0.0225, "step": 48570 }, { "epoch": 18.87, "learning_rate": 1.7484530744336573e-05, "loss": 0.126, "step": 48580 }, { "epoch": 18.87, "learning_rate": 1.748401294498382e-05, "loss": 0.135, "step": 48590 }, { "epoch": 18.87, "learning_rate": 1.748349514563107e-05, "loss": 0.0723, "step": 48600 }, { "epoch": 18.88, "learning_rate": 1.748297734627832e-05, "loss": 0.0948, "step": 48610 }, { "epoch": 18.88, "learning_rate": 1.7482459546925568e-05, "loss": 0.0524, "step": 48620 }, { "epoch": 18.89, "learning_rate": 1.7481941747572816e-05, "loss": 0.0915, "step": 48630 }, { "epoch": 18.89, "learning_rate": 1.7481423948220067e-05, "loss": 0.1722, "step": 48640 }, { "epoch": 18.89, "learning_rate": 1.7480906148867315e-05, "loss": 0.042, "step": 48650 }, { "epoch": 18.9, "learning_rate": 1.7480388349514566e-05, "loss": 0.0231, "step": 48660 }, { "epoch": 18.9, "learning_rate": 1.7479870550161814e-05, "loss": 0.0652, "step": 48670 }, { "epoch": 18.9, "learning_rate": 1.7479352750809062e-05, "loss": 0.1463, "step": 48680 }, { "epoch": 18.91, "learning_rate": 1.7478834951456313e-05, "loss": 0.1106, "step": 48690 }, { "epoch": 18.91, "learning_rate": 1.747831715210356e-05, "loss": 0.2006, "step": 48700 }, { "epoch": 18.92, "learning_rate": 1.747779935275081e-05, "loss": 0.3128, "step": 48710 }, { "epoch": 18.92, "learning_rate": 1.747728155339806e-05, "loss": 0.0172, "step": 48720 }, { "epoch": 18.92, "learning_rate": 1.747676375404531e-05, "loss": 0.0497, "step": 48730 }, { "epoch": 18.93, "learning_rate": 1.747624595469256e-05, "loss": 0.1033, "step": 48740 }, { "epoch": 18.93, "learning_rate": 1.7475728155339808e-05, "loss": 0.0796, "step": 48750 }, { "epoch": 18.94, "learning_rate": 1.7475210355987056e-05, "loss": 0.1102, "step": 48760 }, { "epoch": 18.94, "learning_rate": 1.7474692556634303e-05, "loss": 0.0327, "step": 48770 }, { "epoch": 18.94, "learning_rate": 1.7474174757281555e-05, "loss": 0.1929, "step": 48780 }, { "epoch": 18.95, "learning_rate": 1.7473656957928803e-05, "loss": 0.2107, "step": 48790 }, { "epoch": 18.95, "learning_rate": 1.7473139158576054e-05, "loss": 0.1371, "step": 48800 }, { "epoch": 18.96, "learning_rate": 1.7472621359223302e-05, "loss": 0.0976, "step": 48810 }, { "epoch": 18.96, "learning_rate": 1.7472103559870553e-05, "loss": 0.0604, "step": 48820 }, { "epoch": 18.96, "learning_rate": 1.74715857605178e-05, "loss": 0.2363, "step": 48830 }, { "epoch": 18.97, "learning_rate": 1.747106796116505e-05, "loss": 0.1161, "step": 48840 }, { "epoch": 18.97, "learning_rate": 1.7470550161812297e-05, "loss": 0.1229, "step": 48850 }, { "epoch": 18.97, "learning_rate": 1.7470032362459548e-05, "loss": 0.1801, "step": 48860 }, { "epoch": 18.98, "learning_rate": 1.7469514563106796e-05, "loss": 0.1012, "step": 48870 }, { "epoch": 18.98, "learning_rate": 1.7468996763754048e-05, "loss": 0.1942, "step": 48880 }, { "epoch": 18.99, "learning_rate": 1.7468478964401295e-05, "loss": 0.2065, "step": 48890 }, { "epoch": 18.99, "learning_rate": 1.7467961165048547e-05, "loss": 0.0043, "step": 48900 }, { "epoch": 18.99, "learning_rate": 1.7467443365695795e-05, "loss": 0.1087, "step": 48910 }, { "epoch": 19.0, "learning_rate": 1.7466925566343043e-05, "loss": 0.1207, "step": 48920 }, { "epoch": 19.0, "eval_accuracy": 0.9507565337001376, "eval_loss": 0.2652722895145416, "eval_runtime": 8.2313, "eval_samples_per_second": 441.609, "eval_steps_per_second": 55.277, "step": 48925 }, { "epoch": 19.0, "learning_rate": 1.746640776699029e-05, "loss": 0.0245, "step": 48930 }, { "epoch": 19.01, "learning_rate": 1.7465889967637542e-05, "loss": 0.1222, "step": 48940 }, { "epoch": 19.01, "learning_rate": 1.746537216828479e-05, "loss": 0.159, "step": 48950 }, { "epoch": 19.01, "learning_rate": 1.746485436893204e-05, "loss": 0.034, "step": 48960 }, { "epoch": 19.02, "learning_rate": 1.746433656957929e-05, "loss": 0.0806, "step": 48970 }, { "epoch": 19.02, "learning_rate": 1.746381877022654e-05, "loss": 0.1339, "step": 48980 }, { "epoch": 19.03, "learning_rate": 1.7463300970873788e-05, "loss": 0.0634, "step": 48990 }, { "epoch": 19.03, "learning_rate": 1.7462783171521036e-05, "loss": 0.1004, "step": 49000 }, { "epoch": 19.03, "learning_rate": 1.7462265372168284e-05, "loss": 0.0031, "step": 49010 }, { "epoch": 19.04, "learning_rate": 1.7461747572815535e-05, "loss": 0.1074, "step": 49020 }, { "epoch": 19.04, "learning_rate": 1.7461229773462783e-05, "loss": 0.0932, "step": 49030 }, { "epoch": 19.04, "learning_rate": 1.7460711974110035e-05, "loss": 0.1181, "step": 49040 }, { "epoch": 19.05, "learning_rate": 1.7460194174757283e-05, "loss": 0.012, "step": 49050 }, { "epoch": 19.05, "learning_rate": 1.7459676375404534e-05, "loss": 0.1658, "step": 49060 }, { "epoch": 19.06, "learning_rate": 1.7459158576051782e-05, "loss": 0.0783, "step": 49070 }, { "epoch": 19.06, "learning_rate": 1.745864077669903e-05, "loss": 0.2154, "step": 49080 }, { "epoch": 19.06, "learning_rate": 1.7458122977346278e-05, "loss": 0.1293, "step": 49090 }, { "epoch": 19.07, "learning_rate": 1.745760517799353e-05, "loss": 0.164, "step": 49100 }, { "epoch": 19.07, "learning_rate": 1.7457087378640777e-05, "loss": 0.2291, "step": 49110 }, { "epoch": 19.08, "learning_rate": 1.7456569579288028e-05, "loss": 0.2642, "step": 49120 }, { "epoch": 19.08, "learning_rate": 1.7456051779935276e-05, "loss": 0.2168, "step": 49130 }, { "epoch": 19.08, "learning_rate": 1.7455533980582527e-05, "loss": 0.169, "step": 49140 }, { "epoch": 19.09, "learning_rate": 1.7455016181229775e-05, "loss": 0.076, "step": 49150 }, { "epoch": 19.09, "learning_rate": 1.7454498381877023e-05, "loss": 0.1666, "step": 49160 }, { "epoch": 19.1, "learning_rate": 1.745398058252427e-05, "loss": 0.1594, "step": 49170 }, { "epoch": 19.1, "learning_rate": 1.7453462783171523e-05, "loss": 0.1823, "step": 49180 }, { "epoch": 19.1, "learning_rate": 1.745294498381877e-05, "loss": 0.163, "step": 49190 }, { "epoch": 19.11, "learning_rate": 1.7452427184466022e-05, "loss": 0.1596, "step": 49200 }, { "epoch": 19.11, "learning_rate": 1.745190938511327e-05, "loss": 0.2005, "step": 49210 }, { "epoch": 19.11, "learning_rate": 1.745139158576052e-05, "loss": 0.1091, "step": 49220 }, { "epoch": 19.12, "learning_rate": 1.745087378640777e-05, "loss": 0.0673, "step": 49230 }, { "epoch": 19.12, "learning_rate": 1.7450355987055017e-05, "loss": 0.0204, "step": 49240 }, { "epoch": 19.13, "learning_rate": 1.7449838187702265e-05, "loss": 0.1985, "step": 49250 }, { "epoch": 19.13, "learning_rate": 1.7449320388349516e-05, "loss": 0.1189, "step": 49260 }, { "epoch": 19.13, "learning_rate": 1.7448802588996764e-05, "loss": 0.1767, "step": 49270 }, { "epoch": 19.14, "learning_rate": 1.7448284789644015e-05, "loss": 0.1481, "step": 49280 }, { "epoch": 19.14, "learning_rate": 1.7447766990291263e-05, "loss": 0.2195, "step": 49290 }, { "epoch": 19.15, "learning_rate": 1.7447249190938515e-05, "loss": 0.2698, "step": 49300 }, { "epoch": 19.15, "learning_rate": 1.7446731391585762e-05, "loss": 0.1411, "step": 49310 }, { "epoch": 19.15, "learning_rate": 1.744621359223301e-05, "loss": 0.1223, "step": 49320 }, { "epoch": 19.16, "learning_rate": 1.744569579288026e-05, "loss": 0.2185, "step": 49330 }, { "epoch": 19.16, "learning_rate": 1.744517799352751e-05, "loss": 0.1125, "step": 49340 }, { "epoch": 19.17, "learning_rate": 1.7444660194174758e-05, "loss": 0.0959, "step": 49350 }, { "epoch": 19.17, "learning_rate": 1.744414239482201e-05, "loss": 0.0446, "step": 49360 }, { "epoch": 19.17, "learning_rate": 1.7443624595469257e-05, "loss": 0.3299, "step": 49370 }, { "epoch": 19.18, "learning_rate": 1.7443106796116508e-05, "loss": 0.1361, "step": 49380 }, { "epoch": 19.18, "learning_rate": 1.7442588996763756e-05, "loss": 0.0486, "step": 49390 }, { "epoch": 19.18, "learning_rate": 1.7442071197411007e-05, "loss": 0.1496, "step": 49400 }, { "epoch": 19.19, "learning_rate": 1.7441553398058252e-05, "loss": 0.3981, "step": 49410 }, { "epoch": 19.19, "learning_rate": 1.7441035598705503e-05, "loss": 0.1766, "step": 49420 }, { "epoch": 19.2, "learning_rate": 1.744051779935275e-05, "loss": 0.1676, "step": 49430 }, { "epoch": 19.2, "learning_rate": 1.7440000000000002e-05, "loss": 0.0564, "step": 49440 }, { "epoch": 19.2, "learning_rate": 1.743948220064725e-05, "loss": 0.1392, "step": 49450 }, { "epoch": 19.21, "learning_rate": 1.74389644012945e-05, "loss": 0.1182, "step": 49460 }, { "epoch": 19.21, "learning_rate": 1.743844660194175e-05, "loss": 0.2032, "step": 49470 }, { "epoch": 19.22, "learning_rate": 1.7437928802589e-05, "loss": 0.0803, "step": 49480 }, { "epoch": 19.22, "learning_rate": 1.7437411003236245e-05, "loss": 0.0928, "step": 49490 }, { "epoch": 19.22, "learning_rate": 1.7436893203883497e-05, "loss": 0.0449, "step": 49500 }, { "epoch": 19.23, "learning_rate": 1.7436375404530745e-05, "loss": 0.053, "step": 49510 }, { "epoch": 19.23, "learning_rate": 1.7435857605177996e-05, "loss": 0.0515, "step": 49520 }, { "epoch": 19.23, "learning_rate": 1.7435339805825244e-05, "loss": 0.001, "step": 49530 }, { "epoch": 19.24, "learning_rate": 1.7434822006472495e-05, "loss": 0.1151, "step": 49540 }, { "epoch": 19.24, "learning_rate": 1.7434304207119743e-05, "loss": 0.1922, "step": 49550 }, { "epoch": 19.25, "learning_rate": 1.743378640776699e-05, "loss": 0.1342, "step": 49560 }, { "epoch": 19.25, "learning_rate": 1.743326860841424e-05, "loss": 0.0701, "step": 49570 }, { "epoch": 19.25, "learning_rate": 1.743275080906149e-05, "loss": 0.1211, "step": 49580 }, { "epoch": 19.26, "learning_rate": 1.7432233009708738e-05, "loss": 0.0897, "step": 49590 }, { "epoch": 19.26, "learning_rate": 1.743171521035599e-05, "loss": 0.0637, "step": 49600 }, { "epoch": 19.27, "learning_rate": 1.7431197411003237e-05, "loss": 0.0619, "step": 49610 }, { "epoch": 19.27, "learning_rate": 1.743067961165049e-05, "loss": 0.1544, "step": 49620 }, { "epoch": 19.27, "learning_rate": 1.7430161812297737e-05, "loss": 0.0799, "step": 49630 }, { "epoch": 19.28, "learning_rate": 1.7429644012944985e-05, "loss": 0.1566, "step": 49640 }, { "epoch": 19.28, "learning_rate": 1.7429126213592233e-05, "loss": 0.1533, "step": 49650 }, { "epoch": 19.29, "learning_rate": 1.7428608414239484e-05, "loss": 0.2471, "step": 49660 }, { "epoch": 19.29, "learning_rate": 1.7428090614886732e-05, "loss": 0.1948, "step": 49670 }, { "epoch": 19.29, "learning_rate": 1.7427572815533983e-05, "loss": 0.1378, "step": 49680 }, { "epoch": 19.3, "learning_rate": 1.742705501618123e-05, "loss": 0.1995, "step": 49690 }, { "epoch": 19.3, "learning_rate": 1.7426537216828482e-05, "loss": 0.0676, "step": 49700 }, { "epoch": 19.3, "learning_rate": 1.742601941747573e-05, "loss": 0.0299, "step": 49710 }, { "epoch": 19.31, "learning_rate": 1.7425501618122978e-05, "loss": 0.1408, "step": 49720 }, { "epoch": 19.31, "learning_rate": 1.7424983818770226e-05, "loss": 0.0066, "step": 49730 }, { "epoch": 19.32, "learning_rate": 1.7424466019417477e-05, "loss": 0.0566, "step": 49740 }, { "epoch": 19.32, "learning_rate": 1.7423948220064725e-05, "loss": 0.2291, "step": 49750 }, { "epoch": 19.32, "learning_rate": 1.7423430420711977e-05, "loss": 0.1213, "step": 49760 }, { "epoch": 19.33, "learning_rate": 1.7422912621359225e-05, "loss": 0.1675, "step": 49770 }, { "epoch": 19.33, "learning_rate": 1.7422394822006476e-05, "loss": 0.0099, "step": 49780 }, { "epoch": 19.34, "learning_rate": 1.7421877022653724e-05, "loss": 0.1317, "step": 49790 }, { "epoch": 19.34, "learning_rate": 1.7421359223300972e-05, "loss": 0.1112, "step": 49800 }, { "epoch": 19.34, "learning_rate": 1.742084142394822e-05, "loss": 0.1085, "step": 49810 }, { "epoch": 19.35, "learning_rate": 1.742032362459547e-05, "loss": 0.2475, "step": 49820 }, { "epoch": 19.35, "learning_rate": 1.741980582524272e-05, "loss": 0.0022, "step": 49830 }, { "epoch": 19.36, "learning_rate": 1.741928802588997e-05, "loss": 0.0637, "step": 49840 }, { "epoch": 19.36, "learning_rate": 1.7418770226537218e-05, "loss": 0.1123, "step": 49850 }, { "epoch": 19.36, "learning_rate": 1.7418252427184466e-05, "loss": 0.008, "step": 49860 }, { "epoch": 19.37, "learning_rate": 1.7417734627831717e-05, "loss": 0.1519, "step": 49870 }, { "epoch": 19.37, "learning_rate": 1.7417216828478965e-05, "loss": 0.1478, "step": 49880 }, { "epoch": 19.37, "learning_rate": 1.7416699029126213e-05, "loss": 0.1335, "step": 49890 }, { "epoch": 19.38, "learning_rate": 1.7416181229773465e-05, "loss": 0.1403, "step": 49900 }, { "epoch": 19.38, "learning_rate": 1.7415663430420712e-05, "loss": 0.0549, "step": 49910 }, { "epoch": 19.39, "learning_rate": 1.7415145631067964e-05, "loss": 0.0783, "step": 49920 }, { "epoch": 19.39, "learning_rate": 1.741462783171521e-05, "loss": 0.096, "step": 49930 }, { "epoch": 19.39, "learning_rate": 1.741411003236246e-05, "loss": 0.0044, "step": 49940 }, { "epoch": 19.4, "learning_rate": 1.741359223300971e-05, "loss": 0.3061, "step": 49950 }, { "epoch": 19.4, "learning_rate": 1.741307443365696e-05, "loss": 0.1224, "step": 49960 }, { "epoch": 19.41, "learning_rate": 1.741255663430421e-05, "loss": 0.0699, "step": 49970 }, { "epoch": 19.41, "learning_rate": 1.7412038834951458e-05, "loss": 0.1114, "step": 49980 }, { "epoch": 19.41, "learning_rate": 1.7411521035598706e-05, "loss": 0.202, "step": 49990 }, { "epoch": 19.42, "learning_rate": 1.7411003236245957e-05, "loss": 0.1045, "step": 50000 }, { "epoch": 19.42, "learning_rate": 1.7410485436893205e-05, "loss": 0.1783, "step": 50010 }, { "epoch": 19.43, "learning_rate": 1.7409967637540453e-05, "loss": 0.079, "step": 50020 }, { "epoch": 19.43, "learning_rate": 1.7409449838187704e-05, "loss": 0.161, "step": 50030 }, { "epoch": 19.43, "learning_rate": 1.7408932038834952e-05, "loss": 0.064, "step": 50040 }, { "epoch": 19.44, "learning_rate": 1.7408414239482204e-05, "loss": 0.1318, "step": 50050 }, { "epoch": 19.44, "learning_rate": 1.740789644012945e-05, "loss": 0.0926, "step": 50060 }, { "epoch": 19.44, "learning_rate": 1.74073786407767e-05, "loss": 0.1274, "step": 50070 }, { "epoch": 19.45, "learning_rate": 1.740686084142395e-05, "loss": 0.1386, "step": 50080 }, { "epoch": 19.45, "learning_rate": 1.74063430420712e-05, "loss": 0.217, "step": 50090 }, { "epoch": 19.46, "learning_rate": 1.7405825242718447e-05, "loss": 0.0667, "step": 50100 }, { "epoch": 19.46, "learning_rate": 1.7405307443365698e-05, "loss": 0.0133, "step": 50110 }, { "epoch": 19.46, "learning_rate": 1.7404789644012946e-05, "loss": 0.0245, "step": 50120 }, { "epoch": 19.47, "learning_rate": 1.7404271844660197e-05, "loss": 0.1623, "step": 50130 }, { "epoch": 19.47, "learning_rate": 1.7403754045307445e-05, "loss": 0.1507, "step": 50140 }, { "epoch": 19.48, "learning_rate": 1.7403236245954693e-05, "loss": 0.1245, "step": 50150 }, { "epoch": 19.48, "learning_rate": 1.7402718446601944e-05, "loss": 0.0739, "step": 50160 }, { "epoch": 19.48, "learning_rate": 1.7402200647249192e-05, "loss": 0.1465, "step": 50170 }, { "epoch": 19.49, "learning_rate": 1.740168284789644e-05, "loss": 0.1578, "step": 50180 }, { "epoch": 19.49, "learning_rate": 1.740116504854369e-05, "loss": 0.0943, "step": 50190 }, { "epoch": 19.5, "learning_rate": 1.740064724919094e-05, "loss": 0.0279, "step": 50200 }, { "epoch": 19.5, "learning_rate": 1.740012944983819e-05, "loss": 0.0488, "step": 50210 }, { "epoch": 19.5, "learning_rate": 1.739961165048544e-05, "loss": 0.0344, "step": 50220 }, { "epoch": 19.51, "learning_rate": 1.7399093851132687e-05, "loss": 0.1304, "step": 50230 }, { "epoch": 19.51, "learning_rate": 1.7398576051779935e-05, "loss": 0.1182, "step": 50240 }, { "epoch": 19.51, "learning_rate": 1.7398058252427186e-05, "loss": 0.1009, "step": 50250 }, { "epoch": 19.52, "learning_rate": 1.7397540453074434e-05, "loss": 0.0704, "step": 50260 }, { "epoch": 19.52, "learning_rate": 1.7397022653721685e-05, "loss": 0.2013, "step": 50270 }, { "epoch": 19.53, "learning_rate": 1.7396504854368933e-05, "loss": 0.0945, "step": 50280 }, { "epoch": 19.53, "learning_rate": 1.7395987055016184e-05, "loss": 0.0498, "step": 50290 }, { "epoch": 19.53, "learning_rate": 1.7395469255663432e-05, "loss": 0.0619, "step": 50300 }, { "epoch": 19.54, "learning_rate": 1.739495145631068e-05, "loss": 0.0972, "step": 50310 }, { "epoch": 19.54, "learning_rate": 1.7394433656957928e-05, "loss": 0.0718, "step": 50320 }, { "epoch": 19.55, "learning_rate": 1.739391585760518e-05, "loss": 0.0355, "step": 50330 }, { "epoch": 19.55, "learning_rate": 1.7393398058252427e-05, "loss": 0.0788, "step": 50340 }, { "epoch": 19.55, "learning_rate": 1.739288025889968e-05, "loss": 0.2832, "step": 50350 }, { "epoch": 19.56, "learning_rate": 1.7392362459546927e-05, "loss": 0.0647, "step": 50360 }, { "epoch": 19.56, "learning_rate": 1.7391844660194178e-05, "loss": 0.1148, "step": 50370 }, { "epoch": 19.57, "learning_rate": 1.7391326860841426e-05, "loss": 0.1926, "step": 50380 }, { "epoch": 19.57, "learning_rate": 1.7390809061488674e-05, "loss": 0.0913, "step": 50390 }, { "epoch": 19.57, "learning_rate": 1.7390291262135922e-05, "loss": 0.0315, "step": 50400 }, { "epoch": 19.58, "learning_rate": 1.7389773462783173e-05, "loss": 0.1858, "step": 50410 }, { "epoch": 19.58, "learning_rate": 1.738925566343042e-05, "loss": 0.0953, "step": 50420 }, { "epoch": 19.58, "learning_rate": 1.7388737864077672e-05, "loss": 0.1604, "step": 50430 }, { "epoch": 19.59, "learning_rate": 1.738822006472492e-05, "loss": 0.0792, "step": 50440 }, { "epoch": 19.59, "learning_rate": 1.738770226537217e-05, "loss": 0.1755, "step": 50450 }, { "epoch": 19.6, "learning_rate": 1.738718446601942e-05, "loss": 0.0299, "step": 50460 }, { "epoch": 19.6, "learning_rate": 1.7386666666666667e-05, "loss": 0.0274, "step": 50470 }, { "epoch": 19.6, "learning_rate": 1.7386148867313915e-05, "loss": 0.1064, "step": 50480 }, { "epoch": 19.61, "learning_rate": 1.7385631067961167e-05, "loss": 0.0189, "step": 50490 }, { "epoch": 19.61, "learning_rate": 1.7385113268608415e-05, "loss": 0.236, "step": 50500 }, { "epoch": 19.62, "learning_rate": 1.7384595469255666e-05, "loss": 0.1419, "step": 50510 }, { "epoch": 19.62, "learning_rate": 1.7384077669902914e-05, "loss": 0.0801, "step": 50520 }, { "epoch": 19.62, "learning_rate": 1.7383559870550165e-05, "loss": 0.0565, "step": 50530 }, { "epoch": 19.63, "learning_rate": 1.7383042071197413e-05, "loss": 0.0967, "step": 50540 }, { "epoch": 19.63, "learning_rate": 1.738252427184466e-05, "loss": 0.1786, "step": 50550 }, { "epoch": 19.63, "learning_rate": 1.738200647249191e-05, "loss": 0.2562, "step": 50560 }, { "epoch": 19.64, "learning_rate": 1.738148867313916e-05, "loss": 0.1046, "step": 50570 }, { "epoch": 19.64, "learning_rate": 1.7380970873786408e-05, "loss": 0.1057, "step": 50580 }, { "epoch": 19.65, "learning_rate": 1.738045307443366e-05, "loss": 0.2362, "step": 50590 }, { "epoch": 19.65, "learning_rate": 1.7379935275080907e-05, "loss": 0.1915, "step": 50600 }, { "epoch": 19.65, "learning_rate": 1.737941747572816e-05, "loss": 0.1377, "step": 50610 }, { "epoch": 19.66, "learning_rate": 1.7378899676375407e-05, "loss": 0.0067, "step": 50620 }, { "epoch": 19.66, "learning_rate": 1.7378381877022654e-05, "loss": 0.0672, "step": 50630 }, { "epoch": 19.67, "learning_rate": 1.7377864077669902e-05, "loss": 0.0935, "step": 50640 }, { "epoch": 19.67, "learning_rate": 1.7377346278317154e-05, "loss": 0.0526, "step": 50650 }, { "epoch": 19.67, "learning_rate": 1.73768284789644e-05, "loss": 0.2024, "step": 50660 }, { "epoch": 19.68, "learning_rate": 1.7376310679611653e-05, "loss": 0.0413, "step": 50670 }, { "epoch": 19.68, "learning_rate": 1.73757928802589e-05, "loss": 0.1356, "step": 50680 }, { "epoch": 19.69, "learning_rate": 1.7375275080906152e-05, "loss": 0.1866, "step": 50690 }, { "epoch": 19.69, "learning_rate": 1.73747572815534e-05, "loss": 0.1994, "step": 50700 }, { "epoch": 19.69, "learning_rate": 1.7374239482200648e-05, "loss": 0.1442, "step": 50710 }, { "epoch": 19.7, "learning_rate": 1.7373721682847896e-05, "loss": 0.0445, "step": 50720 }, { "epoch": 19.7, "learning_rate": 1.7373203883495147e-05, "loss": 0.102, "step": 50730 }, { "epoch": 19.7, "learning_rate": 1.7372686084142395e-05, "loss": 0.0472, "step": 50740 }, { "epoch": 19.71, "learning_rate": 1.7372168284789646e-05, "loss": 0.0474, "step": 50750 }, { "epoch": 19.71, "learning_rate": 1.7371650485436894e-05, "loss": 0.0209, "step": 50760 }, { "epoch": 19.72, "learning_rate": 1.7371132686084146e-05, "loss": 0.2964, "step": 50770 }, { "epoch": 19.72, "learning_rate": 1.7370614886731394e-05, "loss": 0.2166, "step": 50780 }, { "epoch": 19.72, "learning_rate": 1.737009708737864e-05, "loss": 0.227, "step": 50790 }, { "epoch": 19.73, "learning_rate": 1.736957928802589e-05, "loss": 0.2381, "step": 50800 }, { "epoch": 19.73, "learning_rate": 1.736906148867314e-05, "loss": 0.057, "step": 50810 }, { "epoch": 19.74, "learning_rate": 1.736854368932039e-05, "loss": 0.1084, "step": 50820 }, { "epoch": 19.74, "learning_rate": 1.736802588996764e-05, "loss": 0.2242, "step": 50830 }, { "epoch": 19.74, "learning_rate": 1.7367508090614888e-05, "loss": 0.1989, "step": 50840 }, { "epoch": 19.75, "learning_rate": 1.736699029126214e-05, "loss": 0.1409, "step": 50850 }, { "epoch": 19.75, "learning_rate": 1.7366472491909387e-05, "loss": 0.1323, "step": 50860 }, { "epoch": 19.76, "learning_rate": 1.7365954692556635e-05, "loss": 0.0579, "step": 50870 }, { "epoch": 19.76, "learning_rate": 1.7365436893203883e-05, "loss": 0.037, "step": 50880 }, { "epoch": 19.76, "learning_rate": 1.7364919093851134e-05, "loss": 0.1048, "step": 50890 }, { "epoch": 19.77, "learning_rate": 1.7364401294498382e-05, "loss": 0.1661, "step": 50900 }, { "epoch": 19.77, "learning_rate": 1.7363883495145634e-05, "loss": 0.034, "step": 50910 }, { "epoch": 19.77, "learning_rate": 1.736336569579288e-05, "loss": 0.0959, "step": 50920 }, { "epoch": 19.78, "learning_rate": 1.7362847896440133e-05, "loss": 0.2041, "step": 50930 }, { "epoch": 19.78, "learning_rate": 1.736233009708738e-05, "loss": 0.2057, "step": 50940 }, { "epoch": 19.79, "learning_rate": 1.736181229773463e-05, "loss": 0.0545, "step": 50950 }, { "epoch": 19.79, "learning_rate": 1.7361294498381877e-05, "loss": 0.2424, "step": 50960 }, { "epoch": 19.79, "learning_rate": 1.7360776699029128e-05, "loss": 0.0832, "step": 50970 }, { "epoch": 19.8, "learning_rate": 1.7360258899676376e-05, "loss": 0.1155, "step": 50980 }, { "epoch": 19.8, "learning_rate": 1.7359741100323627e-05, "loss": 0.1719, "step": 50990 }, { "epoch": 19.81, "learning_rate": 1.7359223300970875e-05, "loss": 0.2382, "step": 51000 }, { "epoch": 19.81, "learning_rate": 1.7358705501618126e-05, "loss": 0.0812, "step": 51010 }, { "epoch": 19.81, "learning_rate": 1.7358187702265374e-05, "loss": 0.1687, "step": 51020 }, { "epoch": 19.82, "learning_rate": 1.7357669902912622e-05, "loss": 0.3025, "step": 51030 }, { "epoch": 19.82, "learning_rate": 1.735715210355987e-05, "loss": 0.085, "step": 51040 }, { "epoch": 19.83, "learning_rate": 1.735663430420712e-05, "loss": 0.0088, "step": 51050 }, { "epoch": 19.83, "learning_rate": 1.735611650485437e-05, "loss": 0.1703, "step": 51060 }, { "epoch": 19.83, "learning_rate": 1.735559870550162e-05, "loss": 0.0776, "step": 51070 }, { "epoch": 19.84, "learning_rate": 1.735508090614887e-05, "loss": 0.1223, "step": 51080 }, { "epoch": 19.84, "learning_rate": 1.735456310679612e-05, "loss": 0.0896, "step": 51090 }, { "epoch": 19.84, "learning_rate": 1.7354045307443368e-05, "loss": 0.2157, "step": 51100 }, { "epoch": 19.85, "learning_rate": 1.7353527508090616e-05, "loss": 0.2527, "step": 51110 }, { "epoch": 19.85, "learning_rate": 1.7353009708737864e-05, "loss": 0.2172, "step": 51120 }, { "epoch": 19.86, "learning_rate": 1.7352491909385115e-05, "loss": 0.0325, "step": 51130 }, { "epoch": 19.86, "learning_rate": 1.7351974110032363e-05, "loss": 0.1322, "step": 51140 }, { "epoch": 19.86, "learning_rate": 1.7351456310679614e-05, "loss": 0.1467, "step": 51150 }, { "epoch": 19.87, "learning_rate": 1.7350938511326862e-05, "loss": 0.0802, "step": 51160 }, { "epoch": 19.87, "learning_rate": 1.7350420711974114e-05, "loss": 0.0035, "step": 51170 }, { "epoch": 19.88, "learning_rate": 1.734990291262136e-05, "loss": 0.0835, "step": 51180 }, { "epoch": 19.88, "learning_rate": 1.734938511326861e-05, "loss": 0.0306, "step": 51190 }, { "epoch": 19.88, "learning_rate": 1.7348867313915857e-05, "loss": 0.1249, "step": 51200 }, { "epoch": 19.89, "learning_rate": 1.734834951456311e-05, "loss": 0.17, "step": 51210 }, { "epoch": 19.89, "learning_rate": 1.7347831715210357e-05, "loss": 0.1072, "step": 51220 }, { "epoch": 19.9, "learning_rate": 1.7347313915857608e-05, "loss": 0.1426, "step": 51230 }, { "epoch": 19.9, "learning_rate": 1.7346796116504856e-05, "loss": 0.0559, "step": 51240 }, { "epoch": 19.9, "learning_rate": 1.7346278317152107e-05, "loss": 0.1179, "step": 51250 }, { "epoch": 19.91, "learning_rate": 1.7345760517799355e-05, "loss": 0.0412, "step": 51260 }, { "epoch": 19.91, "learning_rate": 1.7345242718446603e-05, "loss": 0.0983, "step": 51270 }, { "epoch": 19.91, "learning_rate": 1.734472491909385e-05, "loss": 0.1342, "step": 51280 }, { "epoch": 19.92, "learning_rate": 1.7344207119741102e-05, "loss": 0.0655, "step": 51290 }, { "epoch": 19.92, "learning_rate": 1.734368932038835e-05, "loss": 0.1522, "step": 51300 }, { "epoch": 19.93, "learning_rate": 1.73431715210356e-05, "loss": 0.0713, "step": 51310 }, { "epoch": 19.93, "learning_rate": 1.734265372168285e-05, "loss": 0.1679, "step": 51320 }, { "epoch": 19.93, "learning_rate": 1.7342135922330097e-05, "loss": 0.2963, "step": 51330 }, { "epoch": 19.94, "learning_rate": 1.734161812297735e-05, "loss": 0.2025, "step": 51340 }, { "epoch": 19.94, "learning_rate": 1.7341100323624596e-05, "loss": 0.137, "step": 51350 }, { "epoch": 19.95, "learning_rate": 1.7340582524271844e-05, "loss": 0.0794, "step": 51360 }, { "epoch": 19.95, "learning_rate": 1.7340064724919096e-05, "loss": 0.2545, "step": 51370 }, { "epoch": 19.95, "learning_rate": 1.7339546925566344e-05, "loss": 0.1331, "step": 51380 }, { "epoch": 19.96, "learning_rate": 1.7339029126213595e-05, "loss": 0.0428, "step": 51390 }, { "epoch": 19.96, "learning_rate": 1.7338511326860843e-05, "loss": 0.0389, "step": 51400 }, { "epoch": 19.97, "learning_rate": 1.733799352750809e-05, "loss": 0.0877, "step": 51410 }, { "epoch": 19.97, "learning_rate": 1.7337475728155342e-05, "loss": 0.1016, "step": 51420 }, { "epoch": 19.97, "learning_rate": 1.733695792880259e-05, "loss": 0.1743, "step": 51430 }, { "epoch": 19.98, "learning_rate": 1.7336440129449838e-05, "loss": 0.1156, "step": 51440 }, { "epoch": 19.98, "learning_rate": 1.733592233009709e-05, "loss": 0.0627, "step": 51450 }, { "epoch": 19.98, "learning_rate": 1.7335404530744337e-05, "loss": 0.0477, "step": 51460 }, { "epoch": 19.99, "learning_rate": 1.733488673139159e-05, "loss": 0.099, "step": 51470 }, { "epoch": 19.99, "learning_rate": 1.7334368932038836e-05, "loss": 0.0362, "step": 51480 }, { "epoch": 20.0, "learning_rate": 1.7333851132686084e-05, "loss": 0.0542, "step": 51490 }, { "epoch": 20.0, "learning_rate": 1.7333333333333336e-05, "loss": 0.1761, "step": 51500 }, { "epoch": 20.0, "eval_accuracy": 0.9521320495185694, "eval_loss": 0.2442636489868164, "eval_runtime": 8.2614, "eval_samples_per_second": 439.997, "eval_steps_per_second": 55.075, "step": 51500 }, { "epoch": 20.0, "learning_rate": 1.7332815533980584e-05, "loss": 0.163, "step": 51510 }, { "epoch": 20.01, "learning_rate": 1.733229773462783e-05, "loss": 0.1032, "step": 51520 }, { "epoch": 20.01, "learning_rate": 1.7331779935275083e-05, "loss": 0.1035, "step": 51530 }, { "epoch": 20.02, "learning_rate": 1.733126213592233e-05, "loss": 0.0978, "step": 51540 }, { "epoch": 20.02, "learning_rate": 1.7330744336569582e-05, "loss": 0.1191, "step": 51550 }, { "epoch": 20.02, "learning_rate": 1.733022653721683e-05, "loss": 0.1474, "step": 51560 }, { "epoch": 20.03, "learning_rate": 1.7329708737864078e-05, "loss": 0.1916, "step": 51570 }, { "epoch": 20.03, "learning_rate": 1.732919093851133e-05, "loss": 0.0577, "step": 51580 }, { "epoch": 20.03, "learning_rate": 1.7328673139158577e-05, "loss": 0.1983, "step": 51590 }, { "epoch": 20.04, "learning_rate": 1.732815533980583e-05, "loss": 0.1542, "step": 51600 }, { "epoch": 20.04, "learning_rate": 1.7327637540453076e-05, "loss": 0.0914, "step": 51610 }, { "epoch": 20.05, "learning_rate": 1.7327119741100324e-05, "loss": 0.0751, "step": 51620 }, { "epoch": 20.05, "learning_rate": 1.7326601941747576e-05, "loss": 0.1366, "step": 51630 }, { "epoch": 20.05, "learning_rate": 1.7326084142394824e-05, "loss": 0.1539, "step": 51640 }, { "epoch": 20.06, "learning_rate": 1.732556634304207e-05, "loss": 0.0435, "step": 51650 }, { "epoch": 20.06, "learning_rate": 1.7325048543689323e-05, "loss": 0.0372, "step": 51660 }, { "epoch": 20.07, "learning_rate": 1.732453074433657e-05, "loss": 0.1208, "step": 51670 }, { "epoch": 20.07, "learning_rate": 1.7324012944983822e-05, "loss": 0.1749, "step": 51680 }, { "epoch": 20.07, "learning_rate": 1.732349514563107e-05, "loss": 0.0378, "step": 51690 }, { "epoch": 20.08, "learning_rate": 1.7322977346278318e-05, "loss": 0.0769, "step": 51700 }, { "epoch": 20.08, "learning_rate": 1.7322459546925566e-05, "loss": 0.1145, "step": 51710 }, { "epoch": 20.09, "learning_rate": 1.7321941747572817e-05, "loss": 0.1796, "step": 51720 }, { "epoch": 20.09, "learning_rate": 1.7321423948220065e-05, "loss": 0.0946, "step": 51730 }, { "epoch": 20.09, "learning_rate": 1.7320906148867316e-05, "loss": 0.0632, "step": 51740 }, { "epoch": 20.1, "learning_rate": 1.7320388349514564e-05, "loss": 0.1655, "step": 51750 }, { "epoch": 20.1, "learning_rate": 1.7319870550161816e-05, "loss": 0.0875, "step": 51760 }, { "epoch": 20.1, "learning_rate": 1.7319352750809063e-05, "loss": 0.1121, "step": 51770 }, { "epoch": 20.11, "learning_rate": 1.731883495145631e-05, "loss": 0.0812, "step": 51780 }, { "epoch": 20.11, "learning_rate": 1.731831715210356e-05, "loss": 0.1672, "step": 51790 }, { "epoch": 20.12, "learning_rate": 1.731779935275081e-05, "loss": 0.1381, "step": 51800 }, { "epoch": 20.12, "learning_rate": 1.731728155339806e-05, "loss": 0.0979, "step": 51810 }, { "epoch": 20.12, "learning_rate": 1.731676375404531e-05, "loss": 0.2654, "step": 51820 }, { "epoch": 20.13, "learning_rate": 1.7316245954692558e-05, "loss": 0.0836, "step": 51830 }, { "epoch": 20.13, "learning_rate": 1.731572815533981e-05, "loss": 0.0668, "step": 51840 }, { "epoch": 20.14, "learning_rate": 1.7315210355987057e-05, "loss": 0.0295, "step": 51850 }, { "epoch": 20.14, "learning_rate": 1.7314692556634305e-05, "loss": 0.0358, "step": 51860 }, { "epoch": 20.14, "learning_rate": 1.7314174757281553e-05, "loss": 0.0309, "step": 51870 }, { "epoch": 20.15, "learning_rate": 1.7313656957928804e-05, "loss": 0.0742, "step": 51880 }, { "epoch": 20.15, "learning_rate": 1.7313139158576052e-05, "loss": 0.0628, "step": 51890 }, { "epoch": 20.16, "learning_rate": 1.7312621359223303e-05, "loss": 0.021, "step": 51900 }, { "epoch": 20.16, "learning_rate": 1.731210355987055e-05, "loss": 0.1452, "step": 51910 }, { "epoch": 20.16, "learning_rate": 1.7311585760517803e-05, "loss": 0.0998, "step": 51920 }, { "epoch": 20.17, "learning_rate": 1.731106796116505e-05, "loss": 0.1236, "step": 51930 }, { "epoch": 20.17, "learning_rate": 1.73105501618123e-05, "loss": 0.137, "step": 51940 }, { "epoch": 20.17, "learning_rate": 1.7310032362459546e-05, "loss": 0.0951, "step": 51950 }, { "epoch": 20.18, "learning_rate": 1.7309514563106798e-05, "loss": 0.2117, "step": 51960 }, { "epoch": 20.18, "learning_rate": 1.7308996763754046e-05, "loss": 0.0242, "step": 51970 }, { "epoch": 20.19, "learning_rate": 1.7308478964401297e-05, "loss": 0.0465, "step": 51980 }, { "epoch": 20.19, "learning_rate": 1.7307961165048545e-05, "loss": 0.3462, "step": 51990 }, { "epoch": 20.19, "learning_rate": 1.7307443365695796e-05, "loss": 0.399, "step": 52000 }, { "epoch": 20.2, "learning_rate": 1.730692556634304e-05, "loss": 0.0979, "step": 52010 }, { "epoch": 20.2, "learning_rate": 1.7306407766990292e-05, "loss": 0.1911, "step": 52020 }, { "epoch": 20.21, "learning_rate": 1.730588996763754e-05, "loss": 0.0092, "step": 52030 }, { "epoch": 20.21, "learning_rate": 1.730537216828479e-05, "loss": 0.235, "step": 52040 }, { "epoch": 20.21, "learning_rate": 1.730485436893204e-05, "loss": 0.1412, "step": 52050 }, { "epoch": 20.22, "learning_rate": 1.730433656957929e-05, "loss": 0.1212, "step": 52060 }, { "epoch": 20.22, "learning_rate": 1.730381877022654e-05, "loss": 0.1283, "step": 52070 }, { "epoch": 20.23, "learning_rate": 1.730330097087379e-05, "loss": 0.2816, "step": 52080 }, { "epoch": 20.23, "learning_rate": 1.7302783171521034e-05, "loss": 0.0096, "step": 52090 }, { "epoch": 20.23, "learning_rate": 1.7302265372168286e-05, "loss": 0.0868, "step": 52100 }, { "epoch": 20.24, "learning_rate": 1.7301747572815534e-05, "loss": 0.1904, "step": 52110 }, { "epoch": 20.24, "learning_rate": 1.7301229773462785e-05, "loss": 0.1036, "step": 52120 }, { "epoch": 20.24, "learning_rate": 1.7300711974110033e-05, "loss": 0.1299, "step": 52130 }, { "epoch": 20.25, "learning_rate": 1.7300194174757284e-05, "loss": 0.1227, "step": 52140 }, { "epoch": 20.25, "learning_rate": 1.7299676375404532e-05, "loss": 0.096, "step": 52150 }, { "epoch": 20.26, "learning_rate": 1.7299158576051783e-05, "loss": 0.1079, "step": 52160 }, { "epoch": 20.26, "learning_rate": 1.729864077669903e-05, "loss": 0.1389, "step": 52170 }, { "epoch": 20.26, "learning_rate": 1.729812297734628e-05, "loss": 0.1551, "step": 52180 }, { "epoch": 20.27, "learning_rate": 1.7297605177993527e-05, "loss": 0.0664, "step": 52190 }, { "epoch": 20.27, "learning_rate": 1.729708737864078e-05, "loss": 0.0615, "step": 52200 }, { "epoch": 20.28, "learning_rate": 1.7296569579288026e-05, "loss": 0.3515, "step": 52210 }, { "epoch": 20.28, "learning_rate": 1.7296051779935278e-05, "loss": 0.2101, "step": 52220 }, { "epoch": 20.28, "learning_rate": 1.7295533980582526e-05, "loss": 0.1597, "step": 52230 }, { "epoch": 20.29, "learning_rate": 1.7295016181229777e-05, "loss": 0.1437, "step": 52240 }, { "epoch": 20.29, "learning_rate": 1.7294498381877025e-05, "loss": 0.1817, "step": 52250 }, { "epoch": 20.3, "learning_rate": 1.7293980582524273e-05, "loss": 0.0254, "step": 52260 }, { "epoch": 20.3, "learning_rate": 1.729346278317152e-05, "loss": 0.0866, "step": 52270 }, { "epoch": 20.3, "learning_rate": 1.7292944983818772e-05, "loss": 0.0428, "step": 52280 }, { "epoch": 20.31, "learning_rate": 1.729242718446602e-05, "loss": 0.2029, "step": 52290 }, { "epoch": 20.31, "learning_rate": 1.729190938511327e-05, "loss": 0.0722, "step": 52300 }, { "epoch": 20.31, "learning_rate": 1.729139158576052e-05, "loss": 0.2068, "step": 52310 }, { "epoch": 20.32, "learning_rate": 1.729087378640777e-05, "loss": 0.0723, "step": 52320 }, { "epoch": 20.32, "learning_rate": 1.729035598705502e-05, "loss": 0.1005, "step": 52330 }, { "epoch": 20.33, "learning_rate": 1.7289838187702266e-05, "loss": 0.0785, "step": 52340 }, { "epoch": 20.33, "learning_rate": 1.7289320388349514e-05, "loss": 0.0551, "step": 52350 }, { "epoch": 20.33, "learning_rate": 1.7288802588996766e-05, "loss": 0.1456, "step": 52360 }, { "epoch": 20.34, "learning_rate": 1.7288284789644013e-05, "loss": 0.0215, "step": 52370 }, { "epoch": 20.34, "learning_rate": 1.7287766990291265e-05, "loss": 0.2247, "step": 52380 }, { "epoch": 20.35, "learning_rate": 1.7287249190938513e-05, "loss": 0.0666, "step": 52390 }, { "epoch": 20.35, "learning_rate": 1.7286731391585764e-05, "loss": 0.2228, "step": 52400 }, { "epoch": 20.35, "learning_rate": 1.7286213592233012e-05, "loss": 0.0682, "step": 52410 }, { "epoch": 20.36, "learning_rate": 1.728569579288026e-05, "loss": 0.1255, "step": 52420 }, { "epoch": 20.36, "learning_rate": 1.7285177993527508e-05, "loss": 0.1495, "step": 52430 }, { "epoch": 20.37, "learning_rate": 1.728466019417476e-05, "loss": 0.1153, "step": 52440 }, { "epoch": 20.37, "learning_rate": 1.7284142394822007e-05, "loss": 0.2259, "step": 52450 }, { "epoch": 20.37, "learning_rate": 1.728362459546926e-05, "loss": 0.1518, "step": 52460 }, { "epoch": 20.38, "learning_rate": 1.7283106796116506e-05, "loss": 0.1179, "step": 52470 }, { "epoch": 20.38, "learning_rate": 1.7282588996763758e-05, "loss": 0.0508, "step": 52480 }, { "epoch": 20.38, "learning_rate": 1.7282071197411005e-05, "loss": 0.073, "step": 52490 }, { "epoch": 20.39, "learning_rate": 1.7281553398058253e-05, "loss": 0.1103, "step": 52500 }, { "epoch": 20.39, "learning_rate": 1.72810355987055e-05, "loss": 0.1284, "step": 52510 }, { "epoch": 20.4, "learning_rate": 1.7280517799352753e-05, "loss": 0.2534, "step": 52520 }, { "epoch": 20.4, "learning_rate": 1.728e-05, "loss": 0.0707, "step": 52530 }, { "epoch": 20.4, "learning_rate": 1.7279482200647252e-05, "loss": 0.2195, "step": 52540 }, { "epoch": 20.41, "learning_rate": 1.72789644012945e-05, "loss": 0.1654, "step": 52550 }, { "epoch": 20.41, "learning_rate": 1.727844660194175e-05, "loss": 0.0739, "step": 52560 }, { "epoch": 20.42, "learning_rate": 1.7277928802589e-05, "loss": 0.0841, "step": 52570 }, { "epoch": 20.42, "learning_rate": 1.7277411003236247e-05, "loss": 0.1261, "step": 52580 }, { "epoch": 20.42, "learning_rate": 1.7276893203883495e-05, "loss": 0.0644, "step": 52590 }, { "epoch": 20.43, "learning_rate": 1.7276375404530746e-05, "loss": 0.1813, "step": 52600 }, { "epoch": 20.43, "learning_rate": 1.7275857605177994e-05, "loss": 0.2338, "step": 52610 }, { "epoch": 20.43, "learning_rate": 1.7275339805825245e-05, "loss": 0.0836, "step": 52620 }, { "epoch": 20.44, "learning_rate": 1.7274822006472493e-05, "loss": 0.1102, "step": 52630 }, { "epoch": 20.44, "learning_rate": 1.7274304207119745e-05, "loss": 0.02, "step": 52640 }, { "epoch": 20.45, "learning_rate": 1.7273786407766993e-05, "loss": 0.0094, "step": 52650 }, { "epoch": 20.45, "learning_rate": 1.727326860841424e-05, "loss": 0.2096, "step": 52660 }, { "epoch": 20.45, "learning_rate": 1.727275080906149e-05, "loss": 0.056, "step": 52670 }, { "epoch": 20.46, "learning_rate": 1.727223300970874e-05, "loss": 0.194, "step": 52680 }, { "epoch": 20.46, "learning_rate": 1.7271715210355988e-05, "loss": 0.1976, "step": 52690 }, { "epoch": 20.47, "learning_rate": 1.727119741100324e-05, "loss": 0.1871, "step": 52700 }, { "epoch": 20.47, "learning_rate": 1.7270679611650487e-05, "loss": 0.0886, "step": 52710 }, { "epoch": 20.47, "learning_rate": 1.7270161812297738e-05, "loss": 0.1816, "step": 52720 }, { "epoch": 20.48, "learning_rate": 1.7269644012944986e-05, "loss": 0.0225, "step": 52730 }, { "epoch": 20.48, "learning_rate": 1.7269126213592234e-05, "loss": 0.0559, "step": 52740 }, { "epoch": 20.49, "learning_rate": 1.7268608414239482e-05, "loss": 0.1423, "step": 52750 }, { "epoch": 20.49, "learning_rate": 1.7268090614886733e-05, "loss": 0.1207, "step": 52760 }, { "epoch": 20.49, "learning_rate": 1.726757281553398e-05, "loss": 0.1362, "step": 52770 }, { "epoch": 20.5, "learning_rate": 1.7267055016181233e-05, "loss": 0.1651, "step": 52780 }, { "epoch": 20.5, "learning_rate": 1.726653721682848e-05, "loss": 0.0464, "step": 52790 }, { "epoch": 20.5, "learning_rate": 1.726601941747573e-05, "loss": 0.0877, "step": 52800 }, { "epoch": 20.51, "learning_rate": 1.726550161812298e-05, "loss": 0.0662, "step": 52810 }, { "epoch": 20.51, "learning_rate": 1.7264983818770228e-05, "loss": 0.079, "step": 52820 }, { "epoch": 20.52, "learning_rate": 1.7264466019417476e-05, "loss": 0.2121, "step": 52830 }, { "epoch": 20.52, "learning_rate": 1.7263948220064727e-05, "loss": 0.0305, "step": 52840 }, { "epoch": 20.52, "learning_rate": 1.7263430420711975e-05, "loss": 0.0381, "step": 52850 }, { "epoch": 20.53, "learning_rate": 1.7262912621359226e-05, "loss": 0.0165, "step": 52860 }, { "epoch": 20.53, "learning_rate": 1.7262394822006474e-05, "loss": 0.071, "step": 52870 }, { "epoch": 20.54, "learning_rate": 1.7261877022653722e-05, "loss": 0.0843, "step": 52880 }, { "epoch": 20.54, "learning_rate": 1.7261359223300973e-05, "loss": 0.1922, "step": 52890 }, { "epoch": 20.54, "learning_rate": 1.726084142394822e-05, "loss": 0.3273, "step": 52900 }, { "epoch": 20.55, "learning_rate": 1.726032362459547e-05, "loss": 0.064, "step": 52910 }, { "epoch": 20.55, "learning_rate": 1.725980582524272e-05, "loss": 0.0848, "step": 52920 }, { "epoch": 20.56, "learning_rate": 1.725928802588997e-05, "loss": 0.0974, "step": 52930 }, { "epoch": 20.56, "learning_rate": 1.725877022653722e-05, "loss": 0.1886, "step": 52940 }, { "epoch": 20.56, "learning_rate": 1.7258252427184468e-05, "loss": 0.0222, "step": 52950 }, { "epoch": 20.57, "learning_rate": 1.7257734627831716e-05, "loss": 0.2862, "step": 52960 }, { "epoch": 20.57, "learning_rate": 1.7257216828478967e-05, "loss": 0.1052, "step": 52970 }, { "epoch": 20.57, "learning_rate": 1.7256699029126215e-05, "loss": 0.1577, "step": 52980 }, { "epoch": 20.58, "learning_rate": 1.7256181229773463e-05, "loss": 0.1558, "step": 52990 }, { "epoch": 20.58, "learning_rate": 1.7255663430420714e-05, "loss": 0.0089, "step": 53000 }, { "epoch": 20.59, "learning_rate": 1.7255145631067962e-05, "loss": 0.1298, "step": 53010 }, { "epoch": 20.59, "learning_rate": 1.7254627831715213e-05, "loss": 0.1453, "step": 53020 }, { "epoch": 20.59, "learning_rate": 1.725411003236246e-05, "loss": 0.1413, "step": 53030 }, { "epoch": 20.6, "learning_rate": 1.725359223300971e-05, "loss": 0.0606, "step": 53040 }, { "epoch": 20.6, "learning_rate": 1.725307443365696e-05, "loss": 0.1143, "step": 53050 }, { "epoch": 20.61, "learning_rate": 1.725255663430421e-05, "loss": 0.0192, "step": 53060 }, { "epoch": 20.61, "learning_rate": 1.7252038834951456e-05, "loss": 0.1011, "step": 53070 }, { "epoch": 20.61, "learning_rate": 1.7251521035598708e-05, "loss": 0.0171, "step": 53080 }, { "epoch": 20.62, "learning_rate": 1.7251003236245955e-05, "loss": 0.3304, "step": 53090 }, { "epoch": 20.62, "learning_rate": 1.7250485436893207e-05, "loss": 0.0536, "step": 53100 }, { "epoch": 20.63, "learning_rate": 1.7249967637540455e-05, "loss": 0.0653, "step": 53110 }, { "epoch": 20.63, "learning_rate": 1.7249449838187703e-05, "loss": 0.0682, "step": 53120 }, { "epoch": 20.63, "learning_rate": 1.7248932038834954e-05, "loss": 0.1384, "step": 53130 }, { "epoch": 20.64, "learning_rate": 1.7248414239482202e-05, "loss": 0.0683, "step": 53140 }, { "epoch": 20.64, "learning_rate": 1.724789644012945e-05, "loss": 0.0719, "step": 53150 }, { "epoch": 20.64, "learning_rate": 1.72473786407767e-05, "loss": 0.2187, "step": 53160 }, { "epoch": 20.65, "learning_rate": 1.724686084142395e-05, "loss": 0.1711, "step": 53170 }, { "epoch": 20.65, "learning_rate": 1.7246343042071197e-05, "loss": 0.1218, "step": 53180 }, { "epoch": 20.66, "learning_rate": 1.7245825242718448e-05, "loss": 0.1284, "step": 53190 }, { "epoch": 20.66, "learning_rate": 1.7245307443365696e-05, "loss": 0.0449, "step": 53200 }, { "epoch": 20.66, "learning_rate": 1.7244789644012947e-05, "loss": 0.0941, "step": 53210 }, { "epoch": 20.67, "learning_rate": 1.7244271844660195e-05, "loss": 0.089, "step": 53220 }, { "epoch": 20.67, "learning_rate": 1.7243754045307443e-05, "loss": 0.2326, "step": 53230 }, { "epoch": 20.68, "learning_rate": 1.7243236245954695e-05, "loss": 0.0099, "step": 53240 }, { "epoch": 20.68, "learning_rate": 1.7242718446601943e-05, "loss": 0.1499, "step": 53250 }, { "epoch": 20.68, "learning_rate": 1.724220064724919e-05, "loss": 0.0619, "step": 53260 }, { "epoch": 20.69, "learning_rate": 1.7241682847896442e-05, "loss": 0.094, "step": 53270 }, { "epoch": 20.69, "learning_rate": 1.724116504854369e-05, "loss": 0.1198, "step": 53280 }, { "epoch": 20.7, "learning_rate": 1.724064724919094e-05, "loss": 0.0321, "step": 53290 }, { "epoch": 20.7, "learning_rate": 1.724012944983819e-05, "loss": 0.1458, "step": 53300 }, { "epoch": 20.7, "learning_rate": 1.723961165048544e-05, "loss": 0.1425, "step": 53310 }, { "epoch": 20.71, "learning_rate": 1.7239093851132688e-05, "loss": 0.223, "step": 53320 }, { "epoch": 20.71, "learning_rate": 1.7238576051779936e-05, "loss": 0.1273, "step": 53330 }, { "epoch": 20.71, "learning_rate": 1.7238058252427184e-05, "loss": 0.2332, "step": 53340 }, { "epoch": 20.72, "learning_rate": 1.7237540453074435e-05, "loss": 0.0277, "step": 53350 }, { "epoch": 20.72, "learning_rate": 1.7237022653721683e-05, "loss": 0.1274, "step": 53360 }, { "epoch": 20.73, "learning_rate": 1.7236504854368935e-05, "loss": 0.1231, "step": 53370 }, { "epoch": 20.73, "learning_rate": 1.7235987055016183e-05, "loss": 0.1353, "step": 53380 }, { "epoch": 20.73, "learning_rate": 1.7235469255663434e-05, "loss": 0.191, "step": 53390 }, { "epoch": 20.74, "learning_rate": 1.7234951456310682e-05, "loss": 0.1519, "step": 53400 }, { "epoch": 20.74, "learning_rate": 1.723443365695793e-05, "loss": 0.0325, "step": 53410 }, { "epoch": 20.75, "learning_rate": 1.7233915857605178e-05, "loss": 0.1875, "step": 53420 }, { "epoch": 20.75, "learning_rate": 1.723339805825243e-05, "loss": 0.0786, "step": 53430 }, { "epoch": 20.75, "learning_rate": 1.7232880258899677e-05, "loss": 0.119, "step": 53440 }, { "epoch": 20.76, "learning_rate": 1.7232362459546928e-05, "loss": 0.1078, "step": 53450 }, { "epoch": 20.76, "learning_rate": 1.7231844660194176e-05, "loss": 0.1561, "step": 53460 }, { "epoch": 20.77, "learning_rate": 1.7231326860841427e-05, "loss": 0.2179, "step": 53470 }, { "epoch": 20.77, "learning_rate": 1.7230809061488672e-05, "loss": 0.0132, "step": 53480 }, { "epoch": 20.77, "learning_rate": 1.7230291262135923e-05, "loss": 0.0332, "step": 53490 }, { "epoch": 20.78, "learning_rate": 1.722977346278317e-05, "loss": 0.2199, "step": 53500 }, { "epoch": 20.78, "learning_rate": 1.7229255663430422e-05, "loss": 0.1592, "step": 53510 }, { "epoch": 20.78, "learning_rate": 1.722873786407767e-05, "loss": 0.0613, "step": 53520 }, { "epoch": 20.79, "learning_rate": 1.7228220064724922e-05, "loss": 0.0638, "step": 53530 }, { "epoch": 20.79, "learning_rate": 1.722770226537217e-05, "loss": 0.0222, "step": 53540 }, { "epoch": 20.8, "learning_rate": 1.722718446601942e-05, "loss": 0.1946, "step": 53550 }, { "epoch": 20.8, "learning_rate": 1.7226666666666665e-05, "loss": 0.2062, "step": 53560 }, { "epoch": 20.8, "learning_rate": 1.7226148867313917e-05, "loss": 0.1673, "step": 53570 }, { "epoch": 20.81, "learning_rate": 1.7225631067961165e-05, "loss": 0.0688, "step": 53580 }, { "epoch": 20.81, "learning_rate": 1.7225113268608416e-05, "loss": 0.1346, "step": 53590 }, { "epoch": 20.82, "learning_rate": 1.7224595469255664e-05, "loss": 0.0634, "step": 53600 }, { "epoch": 20.82, "learning_rate": 1.7224077669902915e-05, "loss": 0.1253, "step": 53610 }, { "epoch": 20.82, "learning_rate": 1.7223559870550163e-05, "loss": 0.1076, "step": 53620 }, { "epoch": 20.83, "learning_rate": 1.7223042071197415e-05, "loss": 0.0792, "step": 53630 }, { "epoch": 20.83, "learning_rate": 1.722252427184466e-05, "loss": 0.0701, "step": 53640 }, { "epoch": 20.83, "learning_rate": 1.722200647249191e-05, "loss": 0.0174, "step": 53650 }, { "epoch": 20.84, "learning_rate": 1.7221488673139158e-05, "loss": 0.2389, "step": 53660 }, { "epoch": 20.84, "learning_rate": 1.722097087378641e-05, "loss": 0.0127, "step": 53670 }, { "epoch": 20.85, "learning_rate": 1.7220453074433658e-05, "loss": 0.0954, "step": 53680 }, { "epoch": 20.85, "learning_rate": 1.721993527508091e-05, "loss": 0.1071, "step": 53690 }, { "epoch": 20.85, "learning_rate": 1.7219417475728157e-05, "loss": 0.1094, "step": 53700 }, { "epoch": 20.86, "learning_rate": 1.7218899676375408e-05, "loss": 0.2007, "step": 53710 }, { "epoch": 20.86, "learning_rate": 1.7218381877022653e-05, "loss": 0.1113, "step": 53720 }, { "epoch": 20.87, "learning_rate": 1.7217864077669904e-05, "loss": 0.1856, "step": 53730 }, { "epoch": 20.87, "learning_rate": 1.7217346278317152e-05, "loss": 0.0138, "step": 53740 }, { "epoch": 20.87, "learning_rate": 1.7216828478964403e-05, "loss": 0.0971, "step": 53750 }, { "epoch": 20.88, "learning_rate": 1.721631067961165e-05, "loss": 0.1506, "step": 53760 }, { "epoch": 20.88, "learning_rate": 1.7215792880258902e-05, "loss": 0.0276, "step": 53770 }, { "epoch": 20.89, "learning_rate": 1.721527508090615e-05, "loss": 0.0198, "step": 53780 }, { "epoch": 20.89, "learning_rate": 1.72147572815534e-05, "loss": 0.0372, "step": 53790 }, { "epoch": 20.89, "learning_rate": 1.7214239482200646e-05, "loss": 0.1151, "step": 53800 }, { "epoch": 20.9, "learning_rate": 1.7213721682847897e-05, "loss": 0.0435, "step": 53810 }, { "epoch": 20.9, "learning_rate": 1.7213203883495145e-05, "loss": 0.0989, "step": 53820 }, { "epoch": 20.9, "learning_rate": 1.7212686084142397e-05, "loss": 0.144, "step": 53830 }, { "epoch": 20.91, "learning_rate": 1.7212168284789645e-05, "loss": 0.1142, "step": 53840 }, { "epoch": 20.91, "learning_rate": 1.7211650485436896e-05, "loss": 0.0974, "step": 53850 }, { "epoch": 20.92, "learning_rate": 1.7211132686084144e-05, "loss": 0.1335, "step": 53860 }, { "epoch": 20.92, "learning_rate": 1.7210614886731395e-05, "loss": 0.0046, "step": 53870 }, { "epoch": 20.92, "learning_rate": 1.7210097087378643e-05, "loss": 0.13, "step": 53880 }, { "epoch": 20.93, "learning_rate": 1.720957928802589e-05, "loss": 0.0353, "step": 53890 }, { "epoch": 20.93, "learning_rate": 1.720906148867314e-05, "loss": 0.0903, "step": 53900 }, { "epoch": 20.94, "learning_rate": 1.720854368932039e-05, "loss": 0.0495, "step": 53910 }, { "epoch": 20.94, "learning_rate": 1.7208025889967638e-05, "loss": 0.2099, "step": 53920 }, { "epoch": 20.94, "learning_rate": 1.720750809061489e-05, "loss": 0.0903, "step": 53930 }, { "epoch": 20.95, "learning_rate": 1.7206990291262137e-05, "loss": 0.1635, "step": 53940 }, { "epoch": 20.95, "learning_rate": 1.720647249190939e-05, "loss": 0.1063, "step": 53950 }, { "epoch": 20.96, "learning_rate": 1.7205954692556637e-05, "loss": 0.0585, "step": 53960 }, { "epoch": 20.96, "learning_rate": 1.7205436893203885e-05, "loss": 0.0919, "step": 53970 }, { "epoch": 20.96, "learning_rate": 1.7204919093851133e-05, "loss": 0.1721, "step": 53980 }, { "epoch": 20.97, "learning_rate": 1.7204401294498384e-05, "loss": 0.0547, "step": 53990 }, { "epoch": 20.97, "learning_rate": 1.7203883495145632e-05, "loss": 0.1883, "step": 54000 }, { "epoch": 20.97, "learning_rate": 1.7203365695792883e-05, "loss": 0.2006, "step": 54010 }, { "epoch": 20.98, "learning_rate": 1.720284789644013e-05, "loss": 0.2289, "step": 54020 }, { "epoch": 20.98, "learning_rate": 1.7202330097087382e-05, "loss": 0.0398, "step": 54030 }, { "epoch": 20.99, "learning_rate": 1.720181229773463e-05, "loss": 0.039, "step": 54040 }, { "epoch": 20.99, "learning_rate": 1.7201294498381878e-05, "loss": 0.0833, "step": 54050 }, { "epoch": 20.99, "learning_rate": 1.7200776699029126e-05, "loss": 0.0038, "step": 54060 }, { "epoch": 21.0, "learning_rate": 1.7200258899676377e-05, "loss": 0.1441, "step": 54070 }, { "epoch": 21.0, "eval_accuracy": 0.9535075653370014, "eval_loss": 0.24637892842292786, "eval_runtime": 8.3191, "eval_samples_per_second": 436.948, "eval_steps_per_second": 54.694, "step": 54075 }, { "epoch": 21.0, "learning_rate": 1.7199741100323625e-05, "loss": 0.0572, "step": 54080 }, { "epoch": 21.01, "learning_rate": 1.7199223300970877e-05, "loss": 0.0622, "step": 54090 }, { "epoch": 21.01, "learning_rate": 1.7198705501618125e-05, "loss": 0.0795, "step": 54100 }, { "epoch": 21.01, "learning_rate": 1.7198187702265376e-05, "loss": 0.1797, "step": 54110 }, { "epoch": 21.02, "learning_rate": 1.7197669902912624e-05, "loss": 0.1026, "step": 54120 }, { "epoch": 21.02, "learning_rate": 1.7197152103559872e-05, "loss": 0.0673, "step": 54130 }, { "epoch": 21.03, "learning_rate": 1.719663430420712e-05, "loss": 0.0365, "step": 54140 }, { "epoch": 21.03, "learning_rate": 1.719611650485437e-05, "loss": 0.0678, "step": 54150 }, { "epoch": 21.03, "learning_rate": 1.719559870550162e-05, "loss": 0.1162, "step": 54160 }, { "epoch": 21.04, "learning_rate": 1.719508090614887e-05, "loss": 0.106, "step": 54170 }, { "epoch": 21.04, "learning_rate": 1.7194563106796118e-05, "loss": 0.0695, "step": 54180 }, { "epoch": 21.04, "learning_rate": 1.719404530744337e-05, "loss": 0.1746, "step": 54190 }, { "epoch": 21.05, "learning_rate": 1.7193527508090617e-05, "loss": 0.0216, "step": 54200 }, { "epoch": 21.05, "learning_rate": 1.7193009708737865e-05, "loss": 0.0578, "step": 54210 }, { "epoch": 21.06, "learning_rate": 1.7192491909385113e-05, "loss": 0.1653, "step": 54220 }, { "epoch": 21.06, "learning_rate": 1.7191974110032364e-05, "loss": 0.0185, "step": 54230 }, { "epoch": 21.06, "learning_rate": 1.7191456310679612e-05, "loss": 0.201, "step": 54240 }, { "epoch": 21.07, "learning_rate": 1.7190938511326864e-05, "loss": 0.0463, "step": 54250 }, { "epoch": 21.07, "learning_rate": 1.719042071197411e-05, "loss": 0.1246, "step": 54260 }, { "epoch": 21.08, "learning_rate": 1.718990291262136e-05, "loss": 0.164, "step": 54270 }, { "epoch": 21.08, "learning_rate": 1.718938511326861e-05, "loss": 0.0791, "step": 54280 }, { "epoch": 21.08, "learning_rate": 1.718886731391586e-05, "loss": 0.0706, "step": 54290 }, { "epoch": 21.09, "learning_rate": 1.7188349514563107e-05, "loss": 0.0284, "step": 54300 }, { "epoch": 21.09, "learning_rate": 1.7187831715210358e-05, "loss": 0.001, "step": 54310 }, { "epoch": 21.1, "learning_rate": 1.7187313915857606e-05, "loss": 0.096, "step": 54320 }, { "epoch": 21.1, "learning_rate": 1.7186796116504857e-05, "loss": 0.0586, "step": 54330 }, { "epoch": 21.1, "learning_rate": 1.7186278317152105e-05, "loss": 0.0814, "step": 54340 }, { "epoch": 21.11, "learning_rate": 1.7185760517799353e-05, "loss": 0.2206, "step": 54350 }, { "epoch": 21.11, "learning_rate": 1.7185242718446604e-05, "loss": 0.1949, "step": 54360 }, { "epoch": 21.11, "learning_rate": 1.7184724919093852e-05, "loss": 0.2117, "step": 54370 }, { "epoch": 21.12, "learning_rate": 1.71842071197411e-05, "loss": 0.0407, "step": 54380 }, { "epoch": 21.12, "learning_rate": 1.718368932038835e-05, "loss": 0.1158, "step": 54390 }, { "epoch": 21.13, "learning_rate": 1.71831715210356e-05, "loss": 0.0969, "step": 54400 }, { "epoch": 21.13, "learning_rate": 1.718265372168285e-05, "loss": 0.1542, "step": 54410 }, { "epoch": 21.13, "learning_rate": 1.71821359223301e-05, "loss": 0.1363, "step": 54420 }, { "epoch": 21.14, "learning_rate": 1.7181618122977347e-05, "loss": 0.0432, "step": 54430 }, { "epoch": 21.14, "learning_rate": 1.7181100323624598e-05, "loss": 0.0786, "step": 54440 }, { "epoch": 21.15, "learning_rate": 1.7180582524271846e-05, "loss": 0.0921, "step": 54450 }, { "epoch": 21.15, "learning_rate": 1.7180064724919094e-05, "loss": 0.0404, "step": 54460 }, { "epoch": 21.15, "learning_rate": 1.7179546925566345e-05, "loss": 0.0746, "step": 54470 }, { "epoch": 21.16, "learning_rate": 1.7179029126213593e-05, "loss": 0.075, "step": 54480 }, { "epoch": 21.16, "learning_rate": 1.7178511326860844e-05, "loss": 0.0497, "step": 54490 }, { "epoch": 21.17, "learning_rate": 1.7177993527508092e-05, "loss": 0.2104, "step": 54500 }, { "epoch": 21.17, "learning_rate": 1.717747572815534e-05, "loss": 0.1068, "step": 54510 }, { "epoch": 21.17, "learning_rate": 1.717695792880259e-05, "loss": 0.0662, "step": 54520 }, { "epoch": 21.18, "learning_rate": 1.717644012944984e-05, "loss": 0.0811, "step": 54530 }, { "epoch": 21.18, "learning_rate": 1.7175922330097087e-05, "loss": 0.0535, "step": 54540 }, { "epoch": 21.18, "learning_rate": 1.717540453074434e-05, "loss": 0.1135, "step": 54550 }, { "epoch": 21.19, "learning_rate": 1.7174886731391587e-05, "loss": 0.1646, "step": 54560 }, { "epoch": 21.19, "learning_rate": 1.7174368932038838e-05, "loss": 0.0235, "step": 54570 }, { "epoch": 21.2, "learning_rate": 1.7173851132686086e-05, "loss": 0.1694, "step": 54580 }, { "epoch": 21.2, "learning_rate": 1.7173333333333334e-05, "loss": 0.0707, "step": 54590 }, { "epoch": 21.2, "learning_rate": 1.7172815533980585e-05, "loss": 0.0241, "step": 54600 }, { "epoch": 21.21, "learning_rate": 1.7172297734627833e-05, "loss": 0.1197, "step": 54610 }, { "epoch": 21.21, "learning_rate": 1.717177993527508e-05, "loss": 0.155, "step": 54620 }, { "epoch": 21.22, "learning_rate": 1.7171262135922332e-05, "loss": 0.1072, "step": 54630 }, { "epoch": 21.22, "learning_rate": 1.717074433656958e-05, "loss": 0.0706, "step": 54640 }, { "epoch": 21.22, "learning_rate": 1.7170226537216828e-05, "loss": 0.1033, "step": 54650 }, { "epoch": 21.23, "learning_rate": 1.716970873786408e-05, "loss": 0.1044, "step": 54660 }, { "epoch": 21.23, "learning_rate": 1.7169190938511327e-05, "loss": 0.0384, "step": 54670 }, { "epoch": 21.23, "learning_rate": 1.716867313915858e-05, "loss": 0.0771, "step": 54680 }, { "epoch": 21.24, "learning_rate": 1.7168155339805827e-05, "loss": 0.0795, "step": 54690 }, { "epoch": 21.24, "learning_rate": 1.7167637540453075e-05, "loss": 0.322, "step": 54700 }, { "epoch": 21.25, "learning_rate": 1.7167119741100326e-05, "loss": 0.0858, "step": 54710 }, { "epoch": 21.25, "learning_rate": 1.7166601941747574e-05, "loss": 0.1502, "step": 54720 }, { "epoch": 21.25, "learning_rate": 1.716608414239482e-05, "loss": 0.0795, "step": 54730 }, { "epoch": 21.26, "learning_rate": 1.7165566343042073e-05, "loss": 0.0275, "step": 54740 }, { "epoch": 21.26, "learning_rate": 1.716504854368932e-05, "loss": 0.2038, "step": 54750 }, { "epoch": 21.27, "learning_rate": 1.7164530744336572e-05, "loss": 0.2753, "step": 54760 }, { "epoch": 21.27, "learning_rate": 1.716401294498382e-05, "loss": 0.0944, "step": 54770 }, { "epoch": 21.27, "learning_rate": 1.7163495145631068e-05, "loss": 0.0654, "step": 54780 }, { "epoch": 21.28, "learning_rate": 1.716297734627832e-05, "loss": 0.126, "step": 54790 }, { "epoch": 21.28, "learning_rate": 1.7162459546925567e-05, "loss": 0.1474, "step": 54800 }, { "epoch": 21.29, "learning_rate": 1.7161941747572815e-05, "loss": 0.0384, "step": 54810 }, { "epoch": 21.29, "learning_rate": 1.7161423948220067e-05, "loss": 0.1351, "step": 54820 }, { "epoch": 21.29, "learning_rate": 1.7160906148867314e-05, "loss": 0.0447, "step": 54830 }, { "epoch": 21.3, "learning_rate": 1.7160388349514566e-05, "loss": 0.0885, "step": 54840 }, { "epoch": 21.3, "learning_rate": 1.7159870550161814e-05, "loss": 0.1213, "step": 54850 }, { "epoch": 21.3, "learning_rate": 1.715935275080906e-05, "loss": 0.1524, "step": 54860 }, { "epoch": 21.31, "learning_rate": 1.7158834951456313e-05, "loss": 0.0191, "step": 54870 }, { "epoch": 21.31, "learning_rate": 1.715831715210356e-05, "loss": 0.1977, "step": 54880 }, { "epoch": 21.32, "learning_rate": 1.715779935275081e-05, "loss": 0.0336, "step": 54890 }, { "epoch": 21.32, "learning_rate": 1.715728155339806e-05, "loss": 0.153, "step": 54900 }, { "epoch": 21.32, "learning_rate": 1.7156763754045308e-05, "loss": 0.1692, "step": 54910 }, { "epoch": 21.33, "learning_rate": 1.715624595469256e-05, "loss": 0.1391, "step": 54920 }, { "epoch": 21.33, "learning_rate": 1.7155728155339807e-05, "loss": 0.1667, "step": 54930 }, { "epoch": 21.34, "learning_rate": 1.715521035598706e-05, "loss": 0.046, "step": 54940 }, { "epoch": 21.34, "learning_rate": 1.7154692556634303e-05, "loss": 0.1033, "step": 54950 }, { "epoch": 21.34, "learning_rate": 1.7154174757281554e-05, "loss": 0.2567, "step": 54960 }, { "epoch": 21.35, "learning_rate": 1.7153656957928802e-05, "loss": 0.2298, "step": 54970 }, { "epoch": 21.35, "learning_rate": 1.7153139158576054e-05, "loss": 0.3167, "step": 54980 }, { "epoch": 21.36, "learning_rate": 1.71526213592233e-05, "loss": 0.1198, "step": 54990 }, { "epoch": 21.36, "learning_rate": 1.7152103559870553e-05, "loss": 0.0241, "step": 55000 }, { "epoch": 21.36, "learning_rate": 1.71515857605178e-05, "loss": 0.2502, "step": 55010 }, { "epoch": 21.37, "learning_rate": 1.7151067961165052e-05, "loss": 0.1034, "step": 55020 }, { "epoch": 21.37, "learning_rate": 1.7150550161812297e-05, "loss": 0.1133, "step": 55030 }, { "epoch": 21.37, "learning_rate": 1.7150032362459548e-05, "loss": 0.0762, "step": 55040 }, { "epoch": 21.38, "learning_rate": 1.7149514563106796e-05, "loss": 0.0366, "step": 55050 }, { "epoch": 21.38, "learning_rate": 1.7148996763754047e-05, "loss": 0.0397, "step": 55060 }, { "epoch": 21.39, "learning_rate": 1.7148478964401295e-05, "loss": 0.0419, "step": 55070 }, { "epoch": 21.39, "learning_rate": 1.7147961165048546e-05, "loss": 0.0258, "step": 55080 }, { "epoch": 21.39, "learning_rate": 1.7147443365695794e-05, "loss": 0.1835, "step": 55090 }, { "epoch": 21.4, "learning_rate": 1.7146925566343046e-05, "loss": 0.0387, "step": 55100 }, { "epoch": 21.4, "learning_rate": 1.714640776699029e-05, "loss": 0.0926, "step": 55110 }, { "epoch": 21.41, "learning_rate": 1.714588996763754e-05, "loss": 0.1444, "step": 55120 }, { "epoch": 21.41, "learning_rate": 1.714537216828479e-05, "loss": 0.0609, "step": 55130 }, { "epoch": 21.41, "learning_rate": 1.714485436893204e-05, "loss": 0.1269, "step": 55140 }, { "epoch": 21.42, "learning_rate": 1.714433656957929e-05, "loss": 0.0858, "step": 55150 }, { "epoch": 21.42, "learning_rate": 1.714381877022654e-05, "loss": 0.0772, "step": 55160 }, { "epoch": 21.43, "learning_rate": 1.7143300970873788e-05, "loss": 0.0914, "step": 55170 }, { "epoch": 21.43, "learning_rate": 1.714278317152104e-05, "loss": 0.1835, "step": 55180 }, { "epoch": 21.43, "learning_rate": 1.7142265372168284e-05, "loss": 0.1271, "step": 55190 }, { "epoch": 21.44, "learning_rate": 1.7141747572815535e-05, "loss": 0.0676, "step": 55200 }, { "epoch": 21.44, "learning_rate": 1.7141229773462783e-05, "loss": 0.0363, "step": 55210 }, { "epoch": 21.44, "learning_rate": 1.7140711974110034e-05, "loss": 0.0981, "step": 55220 }, { "epoch": 21.45, "learning_rate": 1.7140194174757282e-05, "loss": 0.1268, "step": 55230 }, { "epoch": 21.45, "learning_rate": 1.7139676375404534e-05, "loss": 0.1077, "step": 55240 }, { "epoch": 21.46, "learning_rate": 1.713915857605178e-05, "loss": 0.1698, "step": 55250 }, { "epoch": 21.46, "learning_rate": 1.7138640776699033e-05, "loss": 0.1275, "step": 55260 }, { "epoch": 21.46, "learning_rate": 1.7138122977346277e-05, "loss": 0.1187, "step": 55270 }, { "epoch": 21.47, "learning_rate": 1.713760517799353e-05, "loss": 0.2423, "step": 55280 }, { "epoch": 21.47, "learning_rate": 1.7137087378640777e-05, "loss": 0.1485, "step": 55290 }, { "epoch": 21.48, "learning_rate": 1.7136569579288028e-05, "loss": 0.234, "step": 55300 }, { "epoch": 21.48, "learning_rate": 1.7136051779935276e-05, "loss": 0.1527, "step": 55310 }, { "epoch": 21.48, "learning_rate": 1.7135533980582527e-05, "loss": 0.1034, "step": 55320 }, { "epoch": 21.49, "learning_rate": 1.7135016181229775e-05, "loss": 0.0304, "step": 55330 }, { "epoch": 21.49, "learning_rate": 1.7134498381877026e-05, "loss": 0.1296, "step": 55340 }, { "epoch": 21.5, "learning_rate": 1.713398058252427e-05, "loss": 0.0273, "step": 55350 }, { "epoch": 21.5, "learning_rate": 1.7133462783171522e-05, "loss": 0.0918, "step": 55360 }, { "epoch": 21.5, "learning_rate": 1.713294498381877e-05, "loss": 0.0659, "step": 55370 }, { "epoch": 21.51, "learning_rate": 1.713242718446602e-05, "loss": 0.0087, "step": 55380 }, { "epoch": 21.51, "learning_rate": 1.713190938511327e-05, "loss": 0.1006, "step": 55390 }, { "epoch": 21.51, "learning_rate": 1.713139158576052e-05, "loss": 0.1827, "step": 55400 }, { "epoch": 21.52, "learning_rate": 1.713087378640777e-05, "loss": 0.0121, "step": 55410 }, { "epoch": 21.52, "learning_rate": 1.713035598705502e-05, "loss": 0.2817, "step": 55420 }, { "epoch": 21.53, "learning_rate": 1.7129838187702264e-05, "loss": 0.105, "step": 55430 }, { "epoch": 21.53, "learning_rate": 1.7129320388349516e-05, "loss": 0.132, "step": 55440 }, { "epoch": 21.53, "learning_rate": 1.7128802588996764e-05, "loss": 0.1575, "step": 55450 }, { "epoch": 21.54, "learning_rate": 1.7128284789644015e-05, "loss": 0.3516, "step": 55460 }, { "epoch": 21.54, "learning_rate": 1.7127766990291263e-05, "loss": 0.0895, "step": 55470 }, { "epoch": 21.55, "learning_rate": 1.7127249190938514e-05, "loss": 0.0385, "step": 55480 }, { "epoch": 21.55, "learning_rate": 1.7126731391585762e-05, "loss": 0.0981, "step": 55490 }, { "epoch": 21.55, "learning_rate": 1.7126213592233013e-05, "loss": 0.0832, "step": 55500 }, { "epoch": 21.56, "learning_rate": 1.712569579288026e-05, "loss": 0.0416, "step": 55510 }, { "epoch": 21.56, "learning_rate": 1.712517799352751e-05, "loss": 0.1029, "step": 55520 }, { "epoch": 21.57, "learning_rate": 1.7124660194174757e-05, "loss": 0.0724, "step": 55530 }, { "epoch": 21.57, "learning_rate": 1.712414239482201e-05, "loss": 0.2461, "step": 55540 }, { "epoch": 21.57, "learning_rate": 1.7123624595469256e-05, "loss": 0.1188, "step": 55550 }, { "epoch": 21.58, "learning_rate": 1.7123106796116508e-05, "loss": 0.0732, "step": 55560 }, { "epoch": 21.58, "learning_rate": 1.7122588996763756e-05, "loss": 0.1311, "step": 55570 }, { "epoch": 21.58, "learning_rate": 1.7122071197411007e-05, "loss": 0.0857, "step": 55580 }, { "epoch": 21.59, "learning_rate": 1.7121553398058255e-05, "loss": 0.1913, "step": 55590 }, { "epoch": 21.59, "learning_rate": 1.7121035598705503e-05, "loss": 0.1128, "step": 55600 }, { "epoch": 21.6, "learning_rate": 1.712051779935275e-05, "loss": 0.0664, "step": 55610 }, { "epoch": 21.6, "learning_rate": 1.7120000000000002e-05, "loss": 0.1148, "step": 55620 }, { "epoch": 21.6, "learning_rate": 1.711948220064725e-05, "loss": 0.0542, "step": 55630 }, { "epoch": 21.61, "learning_rate": 1.71189644012945e-05, "loss": 0.1333, "step": 55640 }, { "epoch": 21.61, "learning_rate": 1.711844660194175e-05, "loss": 0.2394, "step": 55650 }, { "epoch": 21.62, "learning_rate": 1.7117928802589e-05, "loss": 0.1616, "step": 55660 }, { "epoch": 21.62, "learning_rate": 1.711741100323625e-05, "loss": 0.1626, "step": 55670 }, { "epoch": 21.62, "learning_rate": 1.7116893203883496e-05, "loss": 0.1932, "step": 55680 }, { "epoch": 21.63, "learning_rate": 1.7116375404530744e-05, "loss": 0.0917, "step": 55690 }, { "epoch": 21.63, "learning_rate": 1.7115857605177996e-05, "loss": 0.1041, "step": 55700 }, { "epoch": 21.63, "learning_rate": 1.7115339805825244e-05, "loss": 0.0526, "step": 55710 }, { "epoch": 21.64, "learning_rate": 1.7114822006472495e-05, "loss": 0.0456, "step": 55720 }, { "epoch": 21.64, "learning_rate": 1.7114304207119743e-05, "loss": 0.0498, "step": 55730 }, { "epoch": 21.65, "learning_rate": 1.711378640776699e-05, "loss": 0.1055, "step": 55740 }, { "epoch": 21.65, "learning_rate": 1.7113268608414242e-05, "loss": 0.0494, "step": 55750 }, { "epoch": 21.65, "learning_rate": 1.711275080906149e-05, "loss": 0.1144, "step": 55760 }, { "epoch": 21.66, "learning_rate": 1.7112233009708738e-05, "loss": 0.0932, "step": 55770 }, { "epoch": 21.66, "learning_rate": 1.711171521035599e-05, "loss": 0.1401, "step": 55780 }, { "epoch": 21.67, "learning_rate": 1.7111197411003237e-05, "loss": 0.0484, "step": 55790 }, { "epoch": 21.67, "learning_rate": 1.711067961165049e-05, "loss": 0.0679, "step": 55800 }, { "epoch": 21.67, "learning_rate": 1.7110161812297736e-05, "loss": 0.1739, "step": 55810 }, { "epoch": 21.68, "learning_rate": 1.7109644012944984e-05, "loss": 0.1994, "step": 55820 }, { "epoch": 21.68, "learning_rate": 1.7109126213592236e-05, "loss": 0.1304, "step": 55830 }, { "epoch": 21.69, "learning_rate": 1.7108608414239484e-05, "loss": 0.1328, "step": 55840 }, { "epoch": 21.69, "learning_rate": 1.710809061488673e-05, "loss": 0.103, "step": 55850 }, { "epoch": 21.69, "learning_rate": 1.7107572815533983e-05, "loss": 0.1478, "step": 55860 }, { "epoch": 21.7, "learning_rate": 1.710705501618123e-05, "loss": 0.1, "step": 55870 }, { "epoch": 21.7, "learning_rate": 1.7106537216828482e-05, "loss": 0.1594, "step": 55880 }, { "epoch": 21.7, "learning_rate": 1.710601941747573e-05, "loss": 0.2233, "step": 55890 }, { "epoch": 21.71, "learning_rate": 1.7105501618122978e-05, "loss": 0.1124, "step": 55900 }, { "epoch": 21.71, "learning_rate": 1.710498381877023e-05, "loss": 0.0616, "step": 55910 }, { "epoch": 21.72, "learning_rate": 1.7104466019417477e-05, "loss": 0.105, "step": 55920 }, { "epoch": 21.72, "learning_rate": 1.7103948220064725e-05, "loss": 0.0563, "step": 55930 }, { "epoch": 21.72, "learning_rate": 1.7103430420711976e-05, "loss": 0.2011, "step": 55940 }, { "epoch": 21.73, "learning_rate": 1.7102912621359224e-05, "loss": 0.2352, "step": 55950 }, { "epoch": 21.73, "learning_rate": 1.7102394822006476e-05, "loss": 0.0344, "step": 55960 }, { "epoch": 21.74, "learning_rate": 1.7101877022653723e-05, "loss": 0.1035, "step": 55970 }, { "epoch": 21.74, "learning_rate": 1.710135922330097e-05, "loss": 0.3296, "step": 55980 }, { "epoch": 21.74, "learning_rate": 1.7100841423948223e-05, "loss": 0.0276, "step": 55990 }, { "epoch": 21.75, "learning_rate": 1.710032362459547e-05, "loss": 0.0698, "step": 56000 }, { "epoch": 21.75, "learning_rate": 1.709980582524272e-05, "loss": 0.1225, "step": 56010 }, { "epoch": 21.76, "learning_rate": 1.709928802588997e-05, "loss": 0.0029, "step": 56020 }, { "epoch": 21.76, "learning_rate": 1.7098770226537218e-05, "loss": 0.2885, "step": 56030 }, { "epoch": 21.76, "learning_rate": 1.7098252427184466e-05, "loss": 0.2287, "step": 56040 }, { "epoch": 21.77, "learning_rate": 1.7097734627831717e-05, "loss": 0.0749, "step": 56050 }, { "epoch": 21.77, "learning_rate": 1.7097216828478965e-05, "loss": 0.0121, "step": 56060 }, { "epoch": 21.77, "learning_rate": 1.7096699029126216e-05, "loss": 0.0408, "step": 56070 }, { "epoch": 21.78, "learning_rate": 1.7096181229773464e-05, "loss": 0.0057, "step": 56080 }, { "epoch": 21.78, "learning_rate": 1.7095663430420712e-05, "loss": 0.0796, "step": 56090 }, { "epoch": 21.79, "learning_rate": 1.7095145631067963e-05, "loss": 0.1644, "step": 56100 }, { "epoch": 21.79, "learning_rate": 1.709462783171521e-05, "loss": 0.0027, "step": 56110 }, { "epoch": 21.79, "learning_rate": 1.709411003236246e-05, "loss": 0.3565, "step": 56120 }, { "epoch": 21.8, "learning_rate": 1.709359223300971e-05, "loss": 0.0552, "step": 56130 }, { "epoch": 21.8, "learning_rate": 1.709307443365696e-05, "loss": 0.1265, "step": 56140 }, { "epoch": 21.81, "learning_rate": 1.709255663430421e-05, "loss": 0.206, "step": 56150 }, { "epoch": 21.81, "learning_rate": 1.7092038834951458e-05, "loss": 0.259, "step": 56160 }, { "epoch": 21.81, "learning_rate": 1.7091521035598706e-05, "loss": 0.0806, "step": 56170 }, { "epoch": 21.82, "learning_rate": 1.7091003236245957e-05, "loss": 0.1418, "step": 56180 }, { "epoch": 21.82, "learning_rate": 1.7090485436893205e-05, "loss": 0.1497, "step": 56190 }, { "epoch": 21.83, "learning_rate": 1.7089967637540453e-05, "loss": 0.0129, "step": 56200 }, { "epoch": 21.83, "learning_rate": 1.7089449838187704e-05, "loss": 0.0373, "step": 56210 }, { "epoch": 21.83, "learning_rate": 1.7088932038834952e-05, "loss": 0.0755, "step": 56220 }, { "epoch": 21.84, "learning_rate": 1.7088414239482203e-05, "loss": 0.0171, "step": 56230 }, { "epoch": 21.84, "learning_rate": 1.708789644012945e-05, "loss": 0.0246, "step": 56240 }, { "epoch": 21.84, "learning_rate": 1.70873786407767e-05, "loss": 0.2591, "step": 56250 }, { "epoch": 21.85, "learning_rate": 1.708686084142395e-05, "loss": 0.1271, "step": 56260 }, { "epoch": 21.85, "learning_rate": 1.70863430420712e-05, "loss": 0.0511, "step": 56270 }, { "epoch": 21.86, "learning_rate": 1.7085825242718446e-05, "loss": 0.1657, "step": 56280 }, { "epoch": 21.86, "learning_rate": 1.7085307443365698e-05, "loss": 0.1549, "step": 56290 }, { "epoch": 21.86, "learning_rate": 1.7084789644012946e-05, "loss": 0.1842, "step": 56300 }, { "epoch": 21.87, "learning_rate": 1.7084271844660197e-05, "loss": 0.2872, "step": 56310 }, { "epoch": 21.87, "learning_rate": 1.7083754045307445e-05, "loss": 0.0654, "step": 56320 }, { "epoch": 21.88, "learning_rate": 1.7083236245954693e-05, "loss": 0.0881, "step": 56330 }, { "epoch": 21.88, "learning_rate": 1.7082718446601944e-05, "loss": 0.0687, "step": 56340 }, { "epoch": 21.88, "learning_rate": 1.7082200647249192e-05, "loss": 0.0467, "step": 56350 }, { "epoch": 21.89, "learning_rate": 1.708168284789644e-05, "loss": 0.0152, "step": 56360 }, { "epoch": 21.89, "learning_rate": 1.708116504854369e-05, "loss": 0.0566, "step": 56370 }, { "epoch": 21.9, "learning_rate": 1.708064724919094e-05, "loss": 0.0898, "step": 56380 }, { "epoch": 21.9, "learning_rate": 1.708012944983819e-05, "loss": 0.0456, "step": 56390 }, { "epoch": 21.9, "learning_rate": 1.707961165048544e-05, "loss": 0.1176, "step": 56400 }, { "epoch": 21.91, "learning_rate": 1.7079093851132686e-05, "loss": 0.081, "step": 56410 }, { "epoch": 21.91, "learning_rate": 1.7078576051779934e-05, "loss": 0.0915, "step": 56420 }, { "epoch": 21.91, "learning_rate": 1.7078058252427186e-05, "loss": 0.0776, "step": 56430 }, { "epoch": 21.92, "learning_rate": 1.7077540453074434e-05, "loss": 0.1748, "step": 56440 }, { "epoch": 21.92, "learning_rate": 1.7077022653721685e-05, "loss": 0.0913, "step": 56450 }, { "epoch": 21.93, "learning_rate": 1.7076504854368933e-05, "loss": 0.0735, "step": 56460 }, { "epoch": 21.93, "learning_rate": 1.7075987055016184e-05, "loss": 0.2397, "step": 56470 }, { "epoch": 21.93, "learning_rate": 1.7075469255663432e-05, "loss": 0.1239, "step": 56480 }, { "epoch": 21.94, "learning_rate": 1.707495145631068e-05, "loss": 0.1473, "step": 56490 }, { "epoch": 21.94, "learning_rate": 1.7074433656957928e-05, "loss": 0.0952, "step": 56500 }, { "epoch": 21.95, "learning_rate": 1.707391585760518e-05, "loss": 0.1722, "step": 56510 }, { "epoch": 21.95, "learning_rate": 1.7073398058252427e-05, "loss": 0.0992, "step": 56520 }, { "epoch": 21.95, "learning_rate": 1.707288025889968e-05, "loss": 0.1399, "step": 56530 }, { "epoch": 21.96, "learning_rate": 1.7072362459546926e-05, "loss": 0.1249, "step": 56540 }, { "epoch": 21.96, "learning_rate": 1.7071844660194178e-05, "loss": 0.2056, "step": 56550 }, { "epoch": 21.97, "learning_rate": 1.7071326860841426e-05, "loss": 0.1372, "step": 56560 }, { "epoch": 21.97, "learning_rate": 1.7070809061488673e-05, "loss": 0.0458, "step": 56570 }, { "epoch": 21.97, "learning_rate": 1.707029126213592e-05, "loss": 0.1041, "step": 56580 }, { "epoch": 21.98, "learning_rate": 1.7069773462783173e-05, "loss": 0.1443, "step": 56590 }, { "epoch": 21.98, "learning_rate": 1.706925566343042e-05, "loss": 0.1043, "step": 56600 }, { "epoch": 21.98, "learning_rate": 1.7068737864077672e-05, "loss": 0.0388, "step": 56610 }, { "epoch": 21.99, "learning_rate": 1.706822006472492e-05, "loss": 0.0899, "step": 56620 }, { "epoch": 21.99, "learning_rate": 1.706770226537217e-05, "loss": 0.0438, "step": 56630 }, { "epoch": 22.0, "learning_rate": 1.706718446601942e-05, "loss": 0.0551, "step": 56640 }, { "epoch": 22.0, "learning_rate": 1.706666666666667e-05, "loss": 0.1279, "step": 56650 }, { "epoch": 22.0, "eval_accuracy": 0.9499312242090784, "eval_loss": 0.26807668805122375, "eval_runtime": 8.1793, "eval_samples_per_second": 444.417, "eval_steps_per_second": 55.628, "step": 56650 }, { "epoch": 22.0, "learning_rate": 1.7066148867313915e-05, "loss": 0.0868, "step": 56660 }, { "epoch": 22.01, "learning_rate": 1.7065631067961166e-05, "loss": 0.1483, "step": 56670 }, { "epoch": 22.01, "learning_rate": 1.7065113268608414e-05, "loss": 0.2797, "step": 56680 }, { "epoch": 22.02, "learning_rate": 1.7064595469255665e-05, "loss": 0.0626, "step": 56690 }, { "epoch": 22.02, "learning_rate": 1.7064077669902913e-05, "loss": 0.0286, "step": 56700 }, { "epoch": 22.02, "learning_rate": 1.7063559870550165e-05, "loss": 0.1776, "step": 56710 }, { "epoch": 22.03, "learning_rate": 1.7063042071197413e-05, "loss": 0.1385, "step": 56720 }, { "epoch": 22.03, "learning_rate": 1.7062524271844664e-05, "loss": 0.0497, "step": 56730 }, { "epoch": 22.03, "learning_rate": 1.706200647249191e-05, "loss": 0.081, "step": 56740 }, { "epoch": 22.04, "learning_rate": 1.706148867313916e-05, "loss": 0.0546, "step": 56750 }, { "epoch": 22.04, "learning_rate": 1.7060970873786408e-05, "loss": 0.0368, "step": 56760 }, { "epoch": 22.05, "learning_rate": 1.706045307443366e-05, "loss": 0.0264, "step": 56770 }, { "epoch": 22.05, "learning_rate": 1.7059935275080907e-05, "loss": 0.0859, "step": 56780 }, { "epoch": 22.05, "learning_rate": 1.7059417475728158e-05, "loss": 0.0704, "step": 56790 }, { "epoch": 22.06, "learning_rate": 1.7058899676375406e-05, "loss": 0.0336, "step": 56800 }, { "epoch": 22.06, "learning_rate": 1.7058381877022658e-05, "loss": 0.0723, "step": 56810 }, { "epoch": 22.07, "learning_rate": 1.7057864077669902e-05, "loss": 0.0704, "step": 56820 }, { "epoch": 22.07, "learning_rate": 1.7057346278317153e-05, "loss": 0.0653, "step": 56830 }, { "epoch": 22.07, "learning_rate": 1.70568284789644e-05, "loss": 0.0986, "step": 56840 }, { "epoch": 22.08, "learning_rate": 1.7056310679611653e-05, "loss": 0.1149, "step": 56850 }, { "epoch": 22.08, "learning_rate": 1.70557928802589e-05, "loss": 0.1184, "step": 56860 }, { "epoch": 22.09, "learning_rate": 1.7055275080906152e-05, "loss": 0.1502, "step": 56870 }, { "epoch": 22.09, "learning_rate": 1.70547572815534e-05, "loss": 0.1794, "step": 56880 }, { "epoch": 22.09, "learning_rate": 1.705423948220065e-05, "loss": 0.1691, "step": 56890 }, { "epoch": 22.1, "learning_rate": 1.7053721682847896e-05, "loss": 0.0491, "step": 56900 }, { "epoch": 22.1, "learning_rate": 1.7053203883495147e-05, "loss": 0.1255, "step": 56910 }, { "epoch": 22.1, "learning_rate": 1.7052686084142395e-05, "loss": 0.1401, "step": 56920 }, { "epoch": 22.11, "learning_rate": 1.7052168284789646e-05, "loss": 0.0579, "step": 56930 }, { "epoch": 22.11, "learning_rate": 1.7051650485436894e-05, "loss": 0.0348, "step": 56940 }, { "epoch": 22.12, "learning_rate": 1.7051132686084145e-05, "loss": 0.1184, "step": 56950 }, { "epoch": 22.12, "learning_rate": 1.7050614886731393e-05, "loss": 0.0816, "step": 56960 }, { "epoch": 22.12, "learning_rate": 1.7050097087378645e-05, "loss": 0.0867, "step": 56970 }, { "epoch": 22.13, "learning_rate": 1.704957928802589e-05, "loss": 0.0812, "step": 56980 }, { "epoch": 22.13, "learning_rate": 1.704906148867314e-05, "loss": 0.1725, "step": 56990 }, { "epoch": 22.14, "learning_rate": 1.704854368932039e-05, "loss": 0.0475, "step": 57000 }, { "epoch": 22.14, "learning_rate": 1.704802588996764e-05, "loss": 0.1612, "step": 57010 }, { "epoch": 22.14, "learning_rate": 1.7047508090614888e-05, "loss": 0.111, "step": 57020 }, { "epoch": 22.15, "learning_rate": 1.704699029126214e-05, "loss": 0.0606, "step": 57030 }, { "epoch": 22.15, "learning_rate": 1.7046472491909387e-05, "loss": 0.2177, "step": 57040 }, { "epoch": 22.16, "learning_rate": 1.7045954692556638e-05, "loss": 0.1322, "step": 57050 }, { "epoch": 22.16, "learning_rate": 1.7045436893203883e-05, "loss": 0.1165, "step": 57060 }, { "epoch": 22.16, "learning_rate": 1.7044919093851134e-05, "loss": 0.0717, "step": 57070 }, { "epoch": 22.17, "learning_rate": 1.7044401294498382e-05, "loss": 0.0446, "step": 57080 }, { "epoch": 22.17, "learning_rate": 1.7043883495145633e-05, "loss": 0.1633, "step": 57090 }, { "epoch": 22.17, "learning_rate": 1.704336569579288e-05, "loss": 0.0606, "step": 57100 }, { "epoch": 22.18, "learning_rate": 1.7042847896440133e-05, "loss": 0.0775, "step": 57110 }, { "epoch": 22.18, "learning_rate": 1.704233009708738e-05, "loss": 0.0193, "step": 57120 }, { "epoch": 22.19, "learning_rate": 1.7041812297734632e-05, "loss": 0.0852, "step": 57130 }, { "epoch": 22.19, "learning_rate": 1.7041294498381876e-05, "loss": 0.0289, "step": 57140 }, { "epoch": 22.19, "learning_rate": 1.7040776699029128e-05, "loss": 0.1081, "step": 57150 }, { "epoch": 22.2, "learning_rate": 1.7040258899676376e-05, "loss": 0.0486, "step": 57160 }, { "epoch": 22.2, "learning_rate": 1.7039741100323627e-05, "loss": 0.0552, "step": 57170 }, { "epoch": 22.21, "learning_rate": 1.7039223300970875e-05, "loss": 0.1795, "step": 57180 }, { "epoch": 22.21, "learning_rate": 1.7038705501618126e-05, "loss": 0.0634, "step": 57190 }, { "epoch": 22.21, "learning_rate": 1.7038187702265374e-05, "loss": 0.0021, "step": 57200 }, { "epoch": 22.22, "learning_rate": 1.7037669902912622e-05, "loss": 0.0455, "step": 57210 }, { "epoch": 22.22, "learning_rate": 1.7037152103559873e-05, "loss": 0.1692, "step": 57220 }, { "epoch": 22.23, "learning_rate": 1.703663430420712e-05, "loss": 0.1722, "step": 57230 }, { "epoch": 22.23, "learning_rate": 1.703611650485437e-05, "loss": 0.0644, "step": 57240 }, { "epoch": 22.23, "learning_rate": 1.703559870550162e-05, "loss": 0.0204, "step": 57250 }, { "epoch": 22.24, "learning_rate": 1.703508090614887e-05, "loss": 0.0976, "step": 57260 }, { "epoch": 22.24, "learning_rate": 1.703456310679612e-05, "loss": 0.0794, "step": 57270 }, { "epoch": 22.24, "learning_rate": 1.7034045307443368e-05, "loss": 0.0689, "step": 57280 }, { "epoch": 22.25, "learning_rate": 1.7033527508090615e-05, "loss": 0.2256, "step": 57290 }, { "epoch": 22.25, "learning_rate": 1.7033009708737867e-05, "loss": 0.0108, "step": 57300 }, { "epoch": 22.26, "learning_rate": 1.7032491909385115e-05, "loss": 0.1096, "step": 57310 }, { "epoch": 22.26, "learning_rate": 1.7031974110032363e-05, "loss": 0.0758, "step": 57320 }, { "epoch": 22.26, "learning_rate": 1.7031456310679614e-05, "loss": 0.1962, "step": 57330 }, { "epoch": 22.27, "learning_rate": 1.7030938511326862e-05, "loss": 0.0962, "step": 57340 }, { "epoch": 22.27, "learning_rate": 1.7030420711974113e-05, "loss": 0.1346, "step": 57350 }, { "epoch": 22.28, "learning_rate": 1.702990291262136e-05, "loss": 0.1051, "step": 57360 }, { "epoch": 22.28, "learning_rate": 1.702938511326861e-05, "loss": 0.089, "step": 57370 }, { "epoch": 22.28, "learning_rate": 1.702886731391586e-05, "loss": 0.0816, "step": 57380 }, { "epoch": 22.29, "learning_rate": 1.7028349514563108e-05, "loss": 0.053, "step": 57390 }, { "epoch": 22.29, "learning_rate": 1.7027831715210356e-05, "loss": 0.1248, "step": 57400 }, { "epoch": 22.3, "learning_rate": 1.7027313915857608e-05, "loss": 0.0599, "step": 57410 }, { "epoch": 22.3, "learning_rate": 1.7026796116504855e-05, "loss": 0.2171, "step": 57420 }, { "epoch": 22.3, "learning_rate": 1.7026278317152107e-05, "loss": 0.1923, "step": 57430 }, { "epoch": 22.31, "learning_rate": 1.7025760517799355e-05, "loss": 0.1424, "step": 57440 }, { "epoch": 22.31, "learning_rate": 1.7025242718446603e-05, "loss": 0.2522, "step": 57450 }, { "epoch": 22.31, "learning_rate": 1.7024724919093854e-05, "loss": 0.096, "step": 57460 }, { "epoch": 22.32, "learning_rate": 1.7024207119741102e-05, "loss": 0.127, "step": 57470 }, { "epoch": 22.32, "learning_rate": 1.702368932038835e-05, "loss": 0.1278, "step": 57480 }, { "epoch": 22.33, "learning_rate": 1.70231715210356e-05, "loss": 0.1319, "step": 57490 }, { "epoch": 22.33, "learning_rate": 1.702265372168285e-05, "loss": 0.1965, "step": 57500 }, { "epoch": 22.33, "learning_rate": 1.7022135922330097e-05, "loss": 0.1247, "step": 57510 }, { "epoch": 22.34, "learning_rate": 1.7021618122977348e-05, "loss": 0.0968, "step": 57520 }, { "epoch": 22.34, "learning_rate": 1.7021100323624596e-05, "loss": 0.0787, "step": 57530 }, { "epoch": 22.35, "learning_rate": 1.7020582524271847e-05, "loss": 0.1616, "step": 57540 }, { "epoch": 22.35, "learning_rate": 1.7020064724919095e-05, "loss": 0.1193, "step": 57550 }, { "epoch": 22.35, "learning_rate": 1.7019546925566343e-05, "loss": 0.0987, "step": 57560 }, { "epoch": 22.36, "learning_rate": 1.7019029126213595e-05, "loss": 0.1525, "step": 57570 }, { "epoch": 22.36, "learning_rate": 1.7018511326860843e-05, "loss": 0.0387, "step": 57580 }, { "epoch": 22.37, "learning_rate": 1.701799352750809e-05, "loss": 0.2793, "step": 57590 }, { "epoch": 22.37, "learning_rate": 1.7017475728155342e-05, "loss": 0.0607, "step": 57600 }, { "epoch": 22.37, "learning_rate": 1.701695792880259e-05, "loss": 0.1299, "step": 57610 }, { "epoch": 22.38, "learning_rate": 1.701644012944984e-05, "loss": 0.124, "step": 57620 }, { "epoch": 22.38, "learning_rate": 1.701592233009709e-05, "loss": 0.1442, "step": 57630 }, { "epoch": 22.38, "learning_rate": 1.7015404530744337e-05, "loss": 0.1942, "step": 57640 }, { "epoch": 22.39, "learning_rate": 1.7014886731391588e-05, "loss": 0.0763, "step": 57650 }, { "epoch": 22.39, "learning_rate": 1.7014368932038836e-05, "loss": 0.1281, "step": 57660 }, { "epoch": 22.4, "learning_rate": 1.7013851132686084e-05, "loss": 0.1476, "step": 57670 }, { "epoch": 22.4, "learning_rate": 1.7013333333333335e-05, "loss": 0.1007, "step": 57680 }, { "epoch": 22.4, "learning_rate": 1.7012815533980583e-05, "loss": 0.0673, "step": 57690 }, { "epoch": 22.41, "learning_rate": 1.7012297734627835e-05, "loss": 0.1719, "step": 57700 }, { "epoch": 22.41, "learning_rate": 1.7011779935275082e-05, "loss": 0.0525, "step": 57710 }, { "epoch": 22.42, "learning_rate": 1.701126213592233e-05, "loss": 0.1791, "step": 57720 }, { "epoch": 22.42, "learning_rate": 1.7010744336569582e-05, "loss": 0.299, "step": 57730 }, { "epoch": 22.42, "learning_rate": 1.701022653721683e-05, "loss": 0.1779, "step": 57740 }, { "epoch": 22.43, "learning_rate": 1.7009708737864078e-05, "loss": 0.2476, "step": 57750 }, { "epoch": 22.43, "learning_rate": 1.700919093851133e-05, "loss": 0.1122, "step": 57760 }, { "epoch": 22.43, "learning_rate": 1.7008673139158577e-05, "loss": 0.1066, "step": 57770 }, { "epoch": 22.44, "learning_rate": 1.7008155339805828e-05, "loss": 0.0789, "step": 57780 }, { "epoch": 22.44, "learning_rate": 1.7007637540453076e-05, "loss": 0.0787, "step": 57790 }, { "epoch": 22.45, "learning_rate": 1.7007119741100324e-05, "loss": 0.041, "step": 57800 }, { "epoch": 22.45, "learning_rate": 1.7006601941747575e-05, "loss": 0.1253, "step": 57810 }, { "epoch": 22.45, "learning_rate": 1.7006084142394823e-05, "loss": 0.0553, "step": 57820 }, { "epoch": 22.46, "learning_rate": 1.700556634304207e-05, "loss": 0.0448, "step": 57830 }, { "epoch": 22.46, "learning_rate": 1.7005048543689322e-05, "loss": 0.1449, "step": 57840 }, { "epoch": 22.47, "learning_rate": 1.700453074433657e-05, "loss": 0.0639, "step": 57850 }, { "epoch": 22.47, "learning_rate": 1.700401294498382e-05, "loss": 0.147, "step": 57860 }, { "epoch": 22.47, "learning_rate": 1.700349514563107e-05, "loss": 0.1132, "step": 57870 }, { "epoch": 22.48, "learning_rate": 1.7002977346278318e-05, "loss": 0.0155, "step": 57880 }, { "epoch": 22.48, "learning_rate": 1.7002459546925565e-05, "loss": 0.116, "step": 57890 }, { "epoch": 22.49, "learning_rate": 1.7001941747572817e-05, "loss": 0.1898, "step": 57900 }, { "epoch": 22.49, "learning_rate": 1.7001423948220065e-05, "loss": 0.0544, "step": 57910 }, { "epoch": 22.49, "learning_rate": 1.7000906148867316e-05, "loss": 0.1148, "step": 57920 }, { "epoch": 22.5, "learning_rate": 1.7000388349514564e-05, "loss": 0.1354, "step": 57930 }, { "epoch": 22.5, "learning_rate": 1.6999870550161815e-05, "loss": 0.2258, "step": 57940 }, { "epoch": 22.5, "learning_rate": 1.6999352750809063e-05, "loss": 0.0622, "step": 57950 }, { "epoch": 22.51, "learning_rate": 1.699883495145631e-05, "loss": 0.2274, "step": 57960 }, { "epoch": 22.51, "learning_rate": 1.699831715210356e-05, "loss": 0.071, "step": 57970 }, { "epoch": 22.52, "learning_rate": 1.699779935275081e-05, "loss": 0.305, "step": 57980 }, { "epoch": 22.52, "learning_rate": 1.6997281553398058e-05, "loss": 0.1886, "step": 57990 }, { "epoch": 22.52, "learning_rate": 1.699676375404531e-05, "loss": 0.0855, "step": 58000 }, { "epoch": 22.53, "learning_rate": 1.6996245954692557e-05, "loss": 0.0473, "step": 58010 }, { "epoch": 22.53, "learning_rate": 1.699572815533981e-05, "loss": 0.1049, "step": 58020 }, { "epoch": 22.54, "learning_rate": 1.6995210355987057e-05, "loss": 0.0841, "step": 58030 }, { "epoch": 22.54, "learning_rate": 1.6994692556634305e-05, "loss": 0.1048, "step": 58040 }, { "epoch": 22.54, "learning_rate": 1.6994174757281553e-05, "loss": 0.0617, "step": 58050 }, { "epoch": 22.55, "learning_rate": 1.6993656957928804e-05, "loss": 0.2287, "step": 58060 }, { "epoch": 22.55, "learning_rate": 1.6993139158576052e-05, "loss": 0.144, "step": 58070 }, { "epoch": 22.56, "learning_rate": 1.6992621359223303e-05, "loss": 0.087, "step": 58080 }, { "epoch": 22.56, "learning_rate": 1.699210355987055e-05, "loss": 0.1708, "step": 58090 }, { "epoch": 22.56, "learning_rate": 1.6991585760517802e-05, "loss": 0.0515, "step": 58100 }, { "epoch": 22.57, "learning_rate": 1.699106796116505e-05, "loss": 0.072, "step": 58110 }, { "epoch": 22.57, "learning_rate": 1.6990550161812298e-05, "loss": 0.2188, "step": 58120 }, { "epoch": 22.57, "learning_rate": 1.6990032362459546e-05, "loss": 0.2649, "step": 58130 }, { "epoch": 22.58, "learning_rate": 1.6989514563106797e-05, "loss": 0.0191, "step": 58140 }, { "epoch": 22.58, "learning_rate": 1.6988996763754045e-05, "loss": 0.1785, "step": 58150 }, { "epoch": 22.59, "learning_rate": 1.6988478964401297e-05, "loss": 0.0601, "step": 58160 }, { "epoch": 22.59, "learning_rate": 1.6987961165048545e-05, "loss": 0.0506, "step": 58170 }, { "epoch": 22.59, "learning_rate": 1.6987443365695796e-05, "loss": 0.0513, "step": 58180 }, { "epoch": 22.6, "learning_rate": 1.6986925566343044e-05, "loss": 0.0515, "step": 58190 }, { "epoch": 22.6, "learning_rate": 1.6986407766990292e-05, "loss": 0.0886, "step": 58200 }, { "epoch": 22.61, "learning_rate": 1.698588996763754e-05, "loss": 0.0661, "step": 58210 }, { "epoch": 22.61, "learning_rate": 1.698537216828479e-05, "loss": 0.2795, "step": 58220 }, { "epoch": 22.61, "learning_rate": 1.698485436893204e-05, "loss": 0.0744, "step": 58230 }, { "epoch": 22.62, "learning_rate": 1.698433656957929e-05, "loss": 0.216, "step": 58240 }, { "epoch": 22.62, "learning_rate": 1.6983818770226538e-05, "loss": 0.0652, "step": 58250 }, { "epoch": 22.63, "learning_rate": 1.698330097087379e-05, "loss": 0.1286, "step": 58260 }, { "epoch": 22.63, "learning_rate": 1.6982783171521037e-05, "loss": 0.0827, "step": 58270 }, { "epoch": 22.63, "learning_rate": 1.6982265372168285e-05, "loss": 0.0236, "step": 58280 }, { "epoch": 22.64, "learning_rate": 1.6981747572815533e-05, "loss": 0.126, "step": 58290 }, { "epoch": 22.64, "learning_rate": 1.6981229773462785e-05, "loss": 0.2212, "step": 58300 }, { "epoch": 22.64, "learning_rate": 1.6980711974110032e-05, "loss": 0.042, "step": 58310 }, { "epoch": 22.65, "learning_rate": 1.6980194174757284e-05, "loss": 0.1522, "step": 58320 }, { "epoch": 22.65, "learning_rate": 1.6979676375404532e-05, "loss": 0.1429, "step": 58330 }, { "epoch": 22.66, "learning_rate": 1.6979158576051783e-05, "loss": 0.157, "step": 58340 }, { "epoch": 22.66, "learning_rate": 1.697864077669903e-05, "loss": 0.0994, "step": 58350 }, { "epoch": 22.66, "learning_rate": 1.6978122977346282e-05, "loss": 0.0834, "step": 58360 }, { "epoch": 22.67, "learning_rate": 1.6977605177993527e-05, "loss": 0.1411, "step": 58370 }, { "epoch": 22.67, "learning_rate": 1.6977087378640778e-05, "loss": 0.1666, "step": 58380 }, { "epoch": 22.68, "learning_rate": 1.6976569579288026e-05, "loss": 0.0351, "step": 58390 }, { "epoch": 22.68, "learning_rate": 1.6976051779935277e-05, "loss": 0.103, "step": 58400 }, { "epoch": 22.68, "learning_rate": 1.6975533980582525e-05, "loss": 0.13, "step": 58410 }, { "epoch": 22.69, "learning_rate": 1.6975016181229777e-05, "loss": 0.0492, "step": 58420 }, { "epoch": 22.69, "learning_rate": 1.6974498381877024e-05, "loss": 0.0913, "step": 58430 }, { "epoch": 22.7, "learning_rate": 1.6973980582524276e-05, "loss": 0.0205, "step": 58440 }, { "epoch": 22.7, "learning_rate": 1.697346278317152e-05, "loss": 0.182, "step": 58450 }, { "epoch": 22.7, "learning_rate": 1.697294498381877e-05, "loss": 0.068, "step": 58460 }, { "epoch": 22.71, "learning_rate": 1.697242718446602e-05, "loss": 0.1399, "step": 58470 }, { "epoch": 22.71, "learning_rate": 1.697190938511327e-05, "loss": 0.1844, "step": 58480 }, { "epoch": 22.71, "learning_rate": 1.697139158576052e-05, "loss": 0.0053, "step": 58490 }, { "epoch": 22.72, "learning_rate": 1.697087378640777e-05, "loss": 0.1506, "step": 58500 }, { "epoch": 22.72, "learning_rate": 1.6970355987055018e-05, "loss": 0.0964, "step": 58510 }, { "epoch": 22.73, "learning_rate": 1.696983818770227e-05, "loss": 0.2475, "step": 58520 }, { "epoch": 22.73, "learning_rate": 1.6969320388349514e-05, "loss": 0.1454, "step": 58530 }, { "epoch": 22.73, "learning_rate": 1.6968802588996765e-05, "loss": 0.2255, "step": 58540 }, { "epoch": 22.74, "learning_rate": 1.6968284789644013e-05, "loss": 0.1579, "step": 58550 }, { "epoch": 22.74, "learning_rate": 1.6967766990291264e-05, "loss": 0.0906, "step": 58560 }, { "epoch": 22.75, "learning_rate": 1.6967249190938512e-05, "loss": 0.1187, "step": 58570 }, { "epoch": 22.75, "learning_rate": 1.6966731391585764e-05, "loss": 0.0903, "step": 58580 }, { "epoch": 22.75, "learning_rate": 1.696621359223301e-05, "loss": 0.0859, "step": 58590 }, { "epoch": 22.76, "learning_rate": 1.6965695792880263e-05, "loss": 0.0535, "step": 58600 }, { "epoch": 22.76, "learning_rate": 1.6965177993527507e-05, "loss": 0.0656, "step": 58610 }, { "epoch": 22.77, "learning_rate": 1.696466019417476e-05, "loss": 0.0973, "step": 58620 }, { "epoch": 22.77, "learning_rate": 1.6964142394822007e-05, "loss": 0.1925, "step": 58630 }, { "epoch": 22.77, "learning_rate": 1.6963624595469258e-05, "loss": 0.0856, "step": 58640 }, { "epoch": 22.78, "learning_rate": 1.6963106796116506e-05, "loss": 0.129, "step": 58650 }, { "epoch": 22.78, "learning_rate": 1.6962588996763757e-05, "loss": 0.1561, "step": 58660 }, { "epoch": 22.78, "learning_rate": 1.6962071197411005e-05, "loss": 0.2181, "step": 58670 }, { "epoch": 22.79, "learning_rate": 1.6961553398058253e-05, "loss": 0.0919, "step": 58680 }, { "epoch": 22.79, "learning_rate": 1.69610355987055e-05, "loss": 0.0479, "step": 58690 }, { "epoch": 22.8, "learning_rate": 1.6960517799352752e-05, "loss": 0.2773, "step": 58700 }, { "epoch": 22.8, "learning_rate": 1.696e-05, "loss": 0.2401, "step": 58710 }, { "epoch": 22.8, "learning_rate": 1.695948220064725e-05, "loss": 0.0825, "step": 58720 }, { "epoch": 22.81, "learning_rate": 1.69589644012945e-05, "loss": 0.2304, "step": 58730 }, { "epoch": 22.81, "learning_rate": 1.695844660194175e-05, "loss": 0.2856, "step": 58740 }, { "epoch": 22.82, "learning_rate": 1.6957928802589e-05, "loss": 0.1123, "step": 58750 }, { "epoch": 22.82, "learning_rate": 1.6957411003236247e-05, "loss": 0.0891, "step": 58760 }, { "epoch": 22.82, "learning_rate": 1.6956893203883495e-05, "loss": 0.2267, "step": 58770 }, { "epoch": 22.83, "learning_rate": 1.6956375404530746e-05, "loss": 0.1192, "step": 58780 }, { "epoch": 22.83, "learning_rate": 1.6955857605177994e-05, "loss": 0.0996, "step": 58790 }, { "epoch": 22.83, "learning_rate": 1.6955339805825245e-05, "loss": 0.0248, "step": 58800 }, { "epoch": 22.84, "learning_rate": 1.6954822006472493e-05, "loss": 0.1564, "step": 58810 }, { "epoch": 22.84, "learning_rate": 1.6954304207119744e-05, "loss": 0.244, "step": 58820 }, { "epoch": 22.85, "learning_rate": 1.6953786407766992e-05, "loss": 0.2019, "step": 58830 }, { "epoch": 22.85, "learning_rate": 1.695326860841424e-05, "loss": 0.3624, "step": 58840 }, { "epoch": 22.85, "learning_rate": 1.6952750809061488e-05, "loss": 0.1258, "step": 58850 }, { "epoch": 22.86, "learning_rate": 1.695223300970874e-05, "loss": 0.1295, "step": 58860 }, { "epoch": 22.86, "learning_rate": 1.6951715210355987e-05, "loss": 0.0489, "step": 58870 }, { "epoch": 22.87, "learning_rate": 1.695119741100324e-05, "loss": 0.065, "step": 58880 }, { "epoch": 22.87, "learning_rate": 1.6950679611650487e-05, "loss": 0.0495, "step": 58890 }, { "epoch": 22.87, "learning_rate": 1.6950161812297738e-05, "loss": 0.0875, "step": 58900 }, { "epoch": 22.88, "learning_rate": 1.6949644012944986e-05, "loss": 0.1045, "step": 58910 }, { "epoch": 22.88, "learning_rate": 1.6949126213592234e-05, "loss": 0.0625, "step": 58920 }, { "epoch": 22.89, "learning_rate": 1.6948608414239485e-05, "loss": 0.0866, "step": 58930 }, { "epoch": 22.89, "learning_rate": 1.6948090614886733e-05, "loss": 0.1081, "step": 58940 }, { "epoch": 22.89, "learning_rate": 1.694757281553398e-05, "loss": 0.2141, "step": 58950 }, { "epoch": 22.9, "learning_rate": 1.6947055016181232e-05, "loss": 0.1194, "step": 58960 }, { "epoch": 22.9, "learning_rate": 1.694653721682848e-05, "loss": 0.0461, "step": 58970 }, { "epoch": 22.9, "learning_rate": 1.6946019417475728e-05, "loss": 0.0403, "step": 58980 }, { "epoch": 22.91, "learning_rate": 1.694550161812298e-05, "loss": 0.2732, "step": 58990 }, { "epoch": 22.91, "learning_rate": 1.6944983818770227e-05, "loss": 0.034, "step": 59000 }, { "epoch": 22.92, "learning_rate": 1.694446601941748e-05, "loss": 0.1972, "step": 59010 }, { "epoch": 22.92, "learning_rate": 1.6943948220064727e-05, "loss": 0.1307, "step": 59020 }, { "epoch": 22.92, "learning_rate": 1.6943430420711974e-05, "loss": 0.0412, "step": 59030 }, { "epoch": 22.93, "learning_rate": 1.6942912621359226e-05, "loss": 0.0462, "step": 59040 }, { "epoch": 22.93, "learning_rate": 1.6942394822006474e-05, "loss": 0.0932, "step": 59050 }, { "epoch": 22.94, "learning_rate": 1.694187702265372e-05, "loss": 0.0382, "step": 59060 }, { "epoch": 22.94, "learning_rate": 1.6941359223300973e-05, "loss": 0.1444, "step": 59070 }, { "epoch": 22.94, "learning_rate": 1.694084142394822e-05, "loss": 0.0536, "step": 59080 }, { "epoch": 22.95, "learning_rate": 1.6940323624595472e-05, "loss": 0.2126, "step": 59090 }, { "epoch": 22.95, "learning_rate": 1.693980582524272e-05, "loss": 0.0722, "step": 59100 }, { "epoch": 22.96, "learning_rate": 1.6939288025889968e-05, "loss": 0.0368, "step": 59110 }, { "epoch": 22.96, "learning_rate": 1.693877022653722e-05, "loss": 0.0831, "step": 59120 }, { "epoch": 22.96, "learning_rate": 1.6938252427184467e-05, "loss": 0.0709, "step": 59130 }, { "epoch": 22.97, "learning_rate": 1.6937734627831715e-05, "loss": 0.1769, "step": 59140 }, { "epoch": 22.97, "learning_rate": 1.6937216828478967e-05, "loss": 0.0714, "step": 59150 }, { "epoch": 22.97, "learning_rate": 1.6936699029126214e-05, "loss": 0.0824, "step": 59160 }, { "epoch": 22.98, "learning_rate": 1.6936181229773466e-05, "loss": 0.0851, "step": 59170 }, { "epoch": 22.98, "learning_rate": 1.6935663430420714e-05, "loss": 0.1463, "step": 59180 }, { "epoch": 22.99, "learning_rate": 1.693514563106796e-05, "loss": 0.0013, "step": 59190 }, { "epoch": 22.99, "learning_rate": 1.6934627831715213e-05, "loss": 0.1208, "step": 59200 }, { "epoch": 22.99, "learning_rate": 1.693411003236246e-05, "loss": 0.0521, "step": 59210 }, { "epoch": 23.0, "learning_rate": 1.693359223300971e-05, "loss": 0.1811, "step": 59220 }, { "epoch": 23.0, "eval_accuracy": 0.9537826685006877, "eval_loss": 0.26258033514022827, "eval_runtime": 8.1758, "eval_samples_per_second": 444.606, "eval_steps_per_second": 55.652, "step": 59225 }, { "epoch": 23.0, "learning_rate": 1.693307443365696e-05, "loss": 0.0555, "step": 59230 }, { "epoch": 23.01, "learning_rate": 1.6932556634304208e-05, "loss": 0.0798, "step": 59240 }, { "epoch": 23.01, "learning_rate": 1.693203883495146e-05, "loss": 0.1749, "step": 59250 }, { "epoch": 23.01, "learning_rate": 1.6931521035598707e-05, "loss": 0.2087, "step": 59260 }, { "epoch": 23.02, "learning_rate": 1.6931003236245955e-05, "loss": 0.0888, "step": 59270 }, { "epoch": 23.02, "learning_rate": 1.6930485436893206e-05, "loss": 0.0434, "step": 59280 }, { "epoch": 23.03, "learning_rate": 1.6929967637540454e-05, "loss": 0.1861, "step": 59290 }, { "epoch": 23.03, "learning_rate": 1.6929449838187702e-05, "loss": 0.1173, "step": 59300 }, { "epoch": 23.03, "learning_rate": 1.6928932038834954e-05, "loss": 0.062, "step": 59310 }, { "epoch": 23.04, "learning_rate": 1.69284142394822e-05, "loss": 0.0012, "step": 59320 }, { "epoch": 23.04, "learning_rate": 1.6927896440129453e-05, "loss": 0.1807, "step": 59330 }, { "epoch": 23.04, "learning_rate": 1.69273786407767e-05, "loss": 0.1428, "step": 59340 }, { "epoch": 23.05, "learning_rate": 1.692686084142395e-05, "loss": 0.0456, "step": 59350 }, { "epoch": 23.05, "learning_rate": 1.6926343042071197e-05, "loss": 0.0576, "step": 59360 }, { "epoch": 23.06, "learning_rate": 1.6925825242718448e-05, "loss": 0.0907, "step": 59370 }, { "epoch": 23.06, "learning_rate": 1.6925307443365696e-05, "loss": 0.1856, "step": 59380 }, { "epoch": 23.06, "learning_rate": 1.6924789644012947e-05, "loss": 0.1394, "step": 59390 }, { "epoch": 23.07, "learning_rate": 1.6924271844660195e-05, "loss": 0.1159, "step": 59400 }, { "epoch": 23.07, "learning_rate": 1.6923754045307446e-05, "loss": 0.0387, "step": 59410 }, { "epoch": 23.08, "learning_rate": 1.6923236245954694e-05, "loss": 0.0594, "step": 59420 }, { "epoch": 23.08, "learning_rate": 1.6922718446601942e-05, "loss": 0.1529, "step": 59430 }, { "epoch": 23.08, "learning_rate": 1.692220064724919e-05, "loss": 0.1416, "step": 59440 }, { "epoch": 23.09, "learning_rate": 1.692168284789644e-05, "loss": 0.0862, "step": 59450 }, { "epoch": 23.09, "learning_rate": 1.692116504854369e-05, "loss": 0.0877, "step": 59460 }, { "epoch": 23.1, "learning_rate": 1.692064724919094e-05, "loss": 0.0766, "step": 59470 }, { "epoch": 23.1, "learning_rate": 1.692012944983819e-05, "loss": 0.0084, "step": 59480 }, { "epoch": 23.1, "learning_rate": 1.691961165048544e-05, "loss": 0.0409, "step": 59490 }, { "epoch": 23.11, "learning_rate": 1.6919093851132688e-05, "loss": 0.1379, "step": 59500 }, { "epoch": 23.11, "learning_rate": 1.6918576051779936e-05, "loss": 0.0598, "step": 59510 }, { "epoch": 23.11, "learning_rate": 1.6918058252427184e-05, "loss": 0.0606, "step": 59520 }, { "epoch": 23.12, "learning_rate": 1.6917540453074435e-05, "loss": 0.0702, "step": 59530 }, { "epoch": 23.12, "learning_rate": 1.6917022653721683e-05, "loss": 0.009, "step": 59540 }, { "epoch": 23.13, "learning_rate": 1.6916504854368934e-05, "loss": 0.0149, "step": 59550 }, { "epoch": 23.13, "learning_rate": 1.6915987055016182e-05, "loss": 0.055, "step": 59560 }, { "epoch": 23.13, "learning_rate": 1.6915469255663434e-05, "loss": 0.189, "step": 59570 }, { "epoch": 23.14, "learning_rate": 1.691495145631068e-05, "loss": 0.2194, "step": 59580 }, { "epoch": 23.14, "learning_rate": 1.691443365695793e-05, "loss": 0.0497, "step": 59590 }, { "epoch": 23.15, "learning_rate": 1.6913915857605177e-05, "loss": 0.0698, "step": 59600 }, { "epoch": 23.15, "learning_rate": 1.691339805825243e-05, "loss": 0.0998, "step": 59610 }, { "epoch": 23.15, "learning_rate": 1.6912880258899677e-05, "loss": 0.164, "step": 59620 }, { "epoch": 23.16, "learning_rate": 1.6912362459546928e-05, "loss": 0.0282, "step": 59630 }, { "epoch": 23.16, "learning_rate": 1.6911844660194176e-05, "loss": 0.224, "step": 59640 }, { "epoch": 23.17, "learning_rate": 1.6911326860841427e-05, "loss": 0.1048, "step": 59650 }, { "epoch": 23.17, "learning_rate": 1.6910809061488675e-05, "loss": 0.059, "step": 59660 }, { "epoch": 23.17, "learning_rate": 1.6910291262135923e-05, "loss": 0.1952, "step": 59670 }, { "epoch": 23.18, "learning_rate": 1.690977346278317e-05, "loss": 0.263, "step": 59680 }, { "epoch": 23.18, "learning_rate": 1.6909255663430422e-05, "loss": 0.0432, "step": 59690 }, { "epoch": 23.18, "learning_rate": 1.690873786407767e-05, "loss": 0.0947, "step": 59700 }, { "epoch": 23.19, "learning_rate": 1.690822006472492e-05, "loss": 0.1292, "step": 59710 }, { "epoch": 23.19, "learning_rate": 1.690770226537217e-05, "loss": 0.0824, "step": 59720 }, { "epoch": 23.2, "learning_rate": 1.690718446601942e-05, "loss": 0.1214, "step": 59730 }, { "epoch": 23.2, "learning_rate": 1.690666666666667e-05, "loss": 0.1139, "step": 59740 }, { "epoch": 23.2, "learning_rate": 1.6906148867313916e-05, "loss": 0.0389, "step": 59750 }, { "epoch": 23.21, "learning_rate": 1.6905631067961164e-05, "loss": 0.0309, "step": 59760 }, { "epoch": 23.21, "learning_rate": 1.6905113268608416e-05, "loss": 0.168, "step": 59770 }, { "epoch": 23.22, "learning_rate": 1.6904595469255664e-05, "loss": 0.0307, "step": 59780 }, { "epoch": 23.22, "learning_rate": 1.6904077669902915e-05, "loss": 0.0525, "step": 59790 }, { "epoch": 23.22, "learning_rate": 1.6903559870550163e-05, "loss": 0.1572, "step": 59800 }, { "epoch": 23.23, "learning_rate": 1.6903042071197414e-05, "loss": 0.0713, "step": 59810 }, { "epoch": 23.23, "learning_rate": 1.6902524271844662e-05, "loss": 0.0648, "step": 59820 }, { "epoch": 23.23, "learning_rate": 1.690200647249191e-05, "loss": 0.0912, "step": 59830 }, { "epoch": 23.24, "learning_rate": 1.6901488673139158e-05, "loss": 0.2647, "step": 59840 }, { "epoch": 23.24, "learning_rate": 1.690097087378641e-05, "loss": 0.1224, "step": 59850 }, { "epoch": 23.25, "learning_rate": 1.6900453074433657e-05, "loss": 0.0646, "step": 59860 }, { "epoch": 23.25, "learning_rate": 1.689993527508091e-05, "loss": 0.0245, "step": 59870 }, { "epoch": 23.25, "learning_rate": 1.6899417475728156e-05, "loss": 0.0702, "step": 59880 }, { "epoch": 23.26, "learning_rate": 1.6898899676375408e-05, "loss": 0.0674, "step": 59890 }, { "epoch": 23.26, "learning_rate": 1.6898381877022656e-05, "loss": 0.057, "step": 59900 }, { "epoch": 23.27, "learning_rate": 1.6897864077669904e-05, "loss": 0.0633, "step": 59910 }, { "epoch": 23.27, "learning_rate": 1.689734627831715e-05, "loss": 0.1023, "step": 59920 }, { "epoch": 23.27, "learning_rate": 1.6896828478964403e-05, "loss": 0.079, "step": 59930 }, { "epoch": 23.28, "learning_rate": 1.689631067961165e-05, "loss": 0.0908, "step": 59940 }, { "epoch": 23.28, "learning_rate": 1.6895792880258902e-05, "loss": 0.077, "step": 59950 }, { "epoch": 23.29, "learning_rate": 1.689527508090615e-05, "loss": 0.0877, "step": 59960 }, { "epoch": 23.29, "learning_rate": 1.68947572815534e-05, "loss": 0.4114, "step": 59970 }, { "epoch": 23.29, "learning_rate": 1.689423948220065e-05, "loss": 0.0432, "step": 59980 }, { "epoch": 23.3, "learning_rate": 1.68937216828479e-05, "loss": 0.2354, "step": 59990 }, { "epoch": 23.3, "learning_rate": 1.6893203883495145e-05, "loss": 0.1698, "step": 60000 }, { "epoch": 23.3, "learning_rate": 1.6892686084142396e-05, "loss": 0.0811, "step": 60010 }, { "epoch": 23.31, "learning_rate": 1.6892168284789644e-05, "loss": 0.0182, "step": 60020 }, { "epoch": 23.31, "learning_rate": 1.6891650485436896e-05, "loss": 0.1474, "step": 60030 }, { "epoch": 23.32, "learning_rate": 1.6891132686084144e-05, "loss": 0.158, "step": 60040 }, { "epoch": 23.32, "learning_rate": 1.6890614886731395e-05, "loss": 0.1737, "step": 60050 }, { "epoch": 23.32, "learning_rate": 1.6890097087378643e-05, "loss": 0.0814, "step": 60060 }, { "epoch": 23.33, "learning_rate": 1.6889579288025894e-05, "loss": 0.1402, "step": 60070 }, { "epoch": 23.33, "learning_rate": 1.688906148867314e-05, "loss": 0.079, "step": 60080 }, { "epoch": 23.34, "learning_rate": 1.688854368932039e-05, "loss": 0.0245, "step": 60090 }, { "epoch": 23.34, "learning_rate": 1.6888025889967638e-05, "loss": 0.0968, "step": 60100 }, { "epoch": 23.34, "learning_rate": 1.688750809061489e-05, "loss": 0.2734, "step": 60110 }, { "epoch": 23.35, "learning_rate": 1.6886990291262137e-05, "loss": 0.1262, "step": 60120 }, { "epoch": 23.35, "learning_rate": 1.688647249190939e-05, "loss": 0.1816, "step": 60130 }, { "epoch": 23.36, "learning_rate": 1.6885954692556636e-05, "loss": 0.1202, "step": 60140 }, { "epoch": 23.36, "learning_rate": 1.6885436893203884e-05, "loss": 0.1726, "step": 60150 }, { "epoch": 23.36, "learning_rate": 1.6884919093851132e-05, "loss": 0.107, "step": 60160 }, { "epoch": 23.37, "learning_rate": 1.6884401294498383e-05, "loss": 0.0987, "step": 60170 }, { "epoch": 23.37, "learning_rate": 1.688388349514563e-05, "loss": 0.1186, "step": 60180 }, { "epoch": 23.37, "learning_rate": 1.6883365695792883e-05, "loss": 0.0222, "step": 60190 }, { "epoch": 23.38, "learning_rate": 1.688284789644013e-05, "loss": 0.0677, "step": 60200 }, { "epoch": 23.38, "learning_rate": 1.6882330097087382e-05, "loss": 0.1066, "step": 60210 }, { "epoch": 23.39, "learning_rate": 1.688181229773463e-05, "loss": 0.186, "step": 60220 }, { "epoch": 23.39, "learning_rate": 1.6881294498381878e-05, "loss": 0.0952, "step": 60230 }, { "epoch": 23.39, "learning_rate": 1.6880776699029126e-05, "loss": 0.0346, "step": 60240 }, { "epoch": 23.4, "learning_rate": 1.6880258899676377e-05, "loss": 0.0708, "step": 60250 }, { "epoch": 23.4, "learning_rate": 1.6879741100323625e-05, "loss": 0.0781, "step": 60260 }, { "epoch": 23.41, "learning_rate": 1.6879223300970876e-05, "loss": 0.0944, "step": 60270 }, { "epoch": 23.41, "learning_rate": 1.6878705501618124e-05, "loss": 0.1052, "step": 60280 }, { "epoch": 23.41, "learning_rate": 1.6878187702265376e-05, "loss": 0.1652, "step": 60290 }, { "epoch": 23.42, "learning_rate": 1.6877669902912623e-05, "loss": 0.0112, "step": 60300 }, { "epoch": 23.42, "learning_rate": 1.687715210355987e-05, "loss": 0.0763, "step": 60310 }, { "epoch": 23.43, "learning_rate": 1.687663430420712e-05, "loss": 0.2176, "step": 60320 }, { "epoch": 23.43, "learning_rate": 1.687611650485437e-05, "loss": 0.1148, "step": 60330 }, { "epoch": 23.43, "learning_rate": 1.687559870550162e-05, "loss": 0.1124, "step": 60340 }, { "epoch": 23.44, "learning_rate": 1.687508090614887e-05, "loss": 0.0379, "step": 60350 }, { "epoch": 23.44, "learning_rate": 1.6874563106796118e-05, "loss": 0.043, "step": 60360 }, { "epoch": 23.44, "learning_rate": 1.687404530744337e-05, "loss": 0.1121, "step": 60370 }, { "epoch": 23.45, "learning_rate": 1.6873527508090617e-05, "loss": 0.0255, "step": 60380 }, { "epoch": 23.45, "learning_rate": 1.6873009708737865e-05, "loss": 0.1132, "step": 60390 }, { "epoch": 23.46, "learning_rate": 1.6872491909385113e-05, "loss": 0.1593, "step": 60400 }, { "epoch": 23.46, "learning_rate": 1.6871974110032364e-05, "loss": 0.0571, "step": 60410 }, { "epoch": 23.46, "learning_rate": 1.6871456310679612e-05, "loss": 0.1191, "step": 60420 }, { "epoch": 23.47, "learning_rate": 1.6870938511326863e-05, "loss": 0.097, "step": 60430 }, { "epoch": 23.47, "learning_rate": 1.687042071197411e-05, "loss": 0.1338, "step": 60440 }, { "epoch": 23.48, "learning_rate": 1.686990291262136e-05, "loss": 0.1444, "step": 60450 }, { "epoch": 23.48, "learning_rate": 1.686938511326861e-05, "loss": 0.1539, "step": 60460 }, { "epoch": 23.48, "learning_rate": 1.686886731391586e-05, "loss": 0.0589, "step": 60470 }, { "epoch": 23.49, "learning_rate": 1.6868349514563106e-05, "loss": 0.1808, "step": 60480 }, { "epoch": 23.49, "learning_rate": 1.6867831715210358e-05, "loss": 0.1457, "step": 60490 }, { "epoch": 23.5, "learning_rate": 1.6867313915857606e-05, "loss": 0.1825, "step": 60500 }, { "epoch": 23.5, "learning_rate": 1.6866796116504857e-05, "loss": 0.0853, "step": 60510 }, { "epoch": 23.5, "learning_rate": 1.6866278317152105e-05, "loss": 0.0921, "step": 60520 }, { "epoch": 23.51, "learning_rate": 1.6865760517799353e-05, "loss": 0.1775, "step": 60530 }, { "epoch": 23.51, "learning_rate": 1.6865242718446604e-05, "loss": 0.1445, "step": 60540 }, { "epoch": 23.51, "learning_rate": 1.6864724919093852e-05, "loss": 0.0928, "step": 60550 }, { "epoch": 23.52, "learning_rate": 1.6864207119741103e-05, "loss": 0.2324, "step": 60560 }, { "epoch": 23.52, "learning_rate": 1.686368932038835e-05, "loss": 0.1454, "step": 60570 }, { "epoch": 23.53, "learning_rate": 1.68631715210356e-05, "loss": 0.1046, "step": 60580 }, { "epoch": 23.53, "learning_rate": 1.686265372168285e-05, "loss": 0.0706, "step": 60590 }, { "epoch": 23.53, "learning_rate": 1.68621359223301e-05, "loss": 0.0777, "step": 60600 }, { "epoch": 23.54, "learning_rate": 1.6861618122977346e-05, "loss": 0.1355, "step": 60610 }, { "epoch": 23.54, "learning_rate": 1.6861100323624598e-05, "loss": 0.0708, "step": 60620 }, { "epoch": 23.55, "learning_rate": 1.6860582524271846e-05, "loss": 0.0663, "step": 60630 }, { "epoch": 23.55, "learning_rate": 1.6860064724919097e-05, "loss": 0.1126, "step": 60640 }, { "epoch": 23.55, "learning_rate": 1.6859546925566345e-05, "loss": 0.1843, "step": 60650 }, { "epoch": 23.56, "learning_rate": 1.6859029126213593e-05, "loss": 0.1467, "step": 60660 }, { "epoch": 23.56, "learning_rate": 1.6858511326860844e-05, "loss": 0.1487, "step": 60670 }, { "epoch": 23.57, "learning_rate": 1.6857993527508092e-05, "loss": 0.0759, "step": 60680 }, { "epoch": 23.57, "learning_rate": 1.685747572815534e-05, "loss": 0.0957, "step": 60690 }, { "epoch": 23.57, "learning_rate": 1.685695792880259e-05, "loss": 0.0961, "step": 60700 }, { "epoch": 23.58, "learning_rate": 1.685644012944984e-05, "loss": 0.1028, "step": 60710 }, { "epoch": 23.58, "learning_rate": 1.685592233009709e-05, "loss": 0.0726, "step": 60720 }, { "epoch": 23.58, "learning_rate": 1.685540453074434e-05, "loss": 0.1256, "step": 60730 }, { "epoch": 23.59, "learning_rate": 1.6854886731391586e-05, "loss": 0.0361, "step": 60740 }, { "epoch": 23.59, "learning_rate": 1.6854368932038838e-05, "loss": 0.1173, "step": 60750 }, { "epoch": 23.6, "learning_rate": 1.6853851132686086e-05, "loss": 0.1279, "step": 60760 }, { "epoch": 23.6, "learning_rate": 1.6853333333333333e-05, "loss": 0.095, "step": 60770 }, { "epoch": 23.6, "learning_rate": 1.6852815533980585e-05, "loss": 0.1192, "step": 60780 }, { "epoch": 23.61, "learning_rate": 1.6852297734627833e-05, "loss": 0.0155, "step": 60790 }, { "epoch": 23.61, "learning_rate": 1.6851779935275084e-05, "loss": 0.1017, "step": 60800 }, { "epoch": 23.62, "learning_rate": 1.6851262135922332e-05, "loss": 0.0914, "step": 60810 }, { "epoch": 23.62, "learning_rate": 1.685074433656958e-05, "loss": 0.0601, "step": 60820 }, { "epoch": 23.62, "learning_rate": 1.6850226537216828e-05, "loss": 0.0479, "step": 60830 }, { "epoch": 23.63, "learning_rate": 1.684970873786408e-05, "loss": 0.0344, "step": 60840 }, { "epoch": 23.63, "learning_rate": 1.6849190938511327e-05, "loss": 0.1396, "step": 60850 }, { "epoch": 23.63, "learning_rate": 1.684867313915858e-05, "loss": 0.0618, "step": 60860 }, { "epoch": 23.64, "learning_rate": 1.6848155339805826e-05, "loss": 0.0494, "step": 60870 }, { "epoch": 23.64, "learning_rate": 1.6847637540453078e-05, "loss": 0.1461, "step": 60880 }, { "epoch": 23.65, "learning_rate": 1.6847119741100326e-05, "loss": 0.0476, "step": 60890 }, { "epoch": 23.65, "learning_rate": 1.6846601941747573e-05, "loss": 0.1925, "step": 60900 }, { "epoch": 23.65, "learning_rate": 1.684608414239482e-05, "loss": 0.1592, "step": 60910 }, { "epoch": 23.66, "learning_rate": 1.6845566343042073e-05, "loss": 0.1578, "step": 60920 }, { "epoch": 23.66, "learning_rate": 1.684504854368932e-05, "loss": 0.0588, "step": 60930 }, { "epoch": 23.67, "learning_rate": 1.6844530744336572e-05, "loss": 0.2325, "step": 60940 }, { "epoch": 23.67, "learning_rate": 1.684401294498382e-05, "loss": 0.21, "step": 60950 }, { "epoch": 23.67, "learning_rate": 1.684349514563107e-05, "loss": 0.1721, "step": 60960 }, { "epoch": 23.68, "learning_rate": 1.684297734627832e-05, "loss": 0.2849, "step": 60970 }, { "epoch": 23.68, "learning_rate": 1.6842459546925567e-05, "loss": 0.019, "step": 60980 }, { "epoch": 23.69, "learning_rate": 1.6841941747572815e-05, "loss": 0.1127, "step": 60990 }, { "epoch": 23.69, "learning_rate": 1.6841423948220066e-05, "loss": 0.1381, "step": 61000 }, { "epoch": 23.69, "learning_rate": 1.6840906148867314e-05, "loss": 0.1836, "step": 61010 }, { "epoch": 23.7, "learning_rate": 1.6840388349514565e-05, "loss": 0.0608, "step": 61020 }, { "epoch": 23.7, "learning_rate": 1.6839870550161813e-05, "loss": 0.0216, "step": 61030 }, { "epoch": 23.7, "learning_rate": 1.6839352750809065e-05, "loss": 0.1754, "step": 61040 }, { "epoch": 23.71, "learning_rate": 1.6838834951456313e-05, "loss": 0.1136, "step": 61050 }, { "epoch": 23.71, "learning_rate": 1.683831715210356e-05, "loss": 0.1453, "step": 61060 }, { "epoch": 23.72, "learning_rate": 1.683779935275081e-05, "loss": 0.2038, "step": 61070 }, { "epoch": 23.72, "learning_rate": 1.683728155339806e-05, "loss": 0.0265, "step": 61080 }, { "epoch": 23.72, "learning_rate": 1.6836763754045308e-05, "loss": 0.1537, "step": 61090 }, { "epoch": 23.73, "learning_rate": 1.683624595469256e-05, "loss": 0.2161, "step": 61100 }, { "epoch": 23.73, "learning_rate": 1.6835728155339807e-05, "loss": 0.1176, "step": 61110 }, { "epoch": 23.74, "learning_rate": 1.6835210355987058e-05, "loss": 0.0625, "step": 61120 }, { "epoch": 23.74, "learning_rate": 1.6834692556634306e-05, "loss": 0.0763, "step": 61130 }, { "epoch": 23.74, "learning_rate": 1.6834174757281554e-05, "loss": 0.1168, "step": 61140 }, { "epoch": 23.75, "learning_rate": 1.6833656957928802e-05, "loss": 0.0098, "step": 61150 }, { "epoch": 23.75, "learning_rate": 1.6833139158576053e-05, "loss": 0.1305, "step": 61160 }, { "epoch": 23.76, "learning_rate": 1.68326213592233e-05, "loss": 0.0783, "step": 61170 }, { "epoch": 23.76, "learning_rate": 1.6832103559870553e-05, "loss": 0.0913, "step": 61180 }, { "epoch": 23.76, "learning_rate": 1.68315857605178e-05, "loss": 0.1775, "step": 61190 }, { "epoch": 23.77, "learning_rate": 1.6831067961165052e-05, "loss": 0.0251, "step": 61200 }, { "epoch": 23.77, "learning_rate": 1.68305501618123e-05, "loss": 0.1903, "step": 61210 }, { "epoch": 23.77, "learning_rate": 1.6830032362459548e-05, "loss": 0.1029, "step": 61220 }, { "epoch": 23.78, "learning_rate": 1.6829514563106796e-05, "loss": 0.0956, "step": 61230 }, { "epoch": 23.78, "learning_rate": 1.6828996763754047e-05, "loss": 0.1863, "step": 61240 }, { "epoch": 23.79, "learning_rate": 1.6828478964401295e-05, "loss": 0.1047, "step": 61250 }, { "epoch": 23.79, "learning_rate": 1.6827961165048546e-05, "loss": 0.3181, "step": 61260 }, { "epoch": 23.79, "learning_rate": 1.6827443365695794e-05, "loss": 0.0563, "step": 61270 }, { "epoch": 23.8, "learning_rate": 1.6826925566343045e-05, "loss": 0.097, "step": 61280 }, { "epoch": 23.8, "learning_rate": 1.6826407766990293e-05, "loss": 0.1726, "step": 61290 }, { "epoch": 23.81, "learning_rate": 1.682588996763754e-05, "loss": 0.1518, "step": 61300 }, { "epoch": 23.81, "learning_rate": 1.682537216828479e-05, "loss": 0.1863, "step": 61310 }, { "epoch": 23.81, "learning_rate": 1.682485436893204e-05, "loss": 0.0856, "step": 61320 }, { "epoch": 23.82, "learning_rate": 1.682433656957929e-05, "loss": 0.1047, "step": 61330 }, { "epoch": 23.82, "learning_rate": 1.682381877022654e-05, "loss": 0.3357, "step": 61340 }, { "epoch": 23.83, "learning_rate": 1.6823300970873788e-05, "loss": 0.0987, "step": 61350 }, { "epoch": 23.83, "learning_rate": 1.682278317152104e-05, "loss": 0.2132, "step": 61360 }, { "epoch": 23.83, "learning_rate": 1.6822265372168287e-05, "loss": 0.0929, "step": 61370 }, { "epoch": 23.84, "learning_rate": 1.6821747572815535e-05, "loss": 0.032, "step": 61380 }, { "epoch": 23.84, "learning_rate": 1.6821229773462783e-05, "loss": 0.0101, "step": 61390 }, { "epoch": 23.84, "learning_rate": 1.6820711974110034e-05, "loss": 0.0833, "step": 61400 }, { "epoch": 23.85, "learning_rate": 1.6820194174757282e-05, "loss": 0.1, "step": 61410 }, { "epoch": 23.85, "learning_rate": 1.6819676375404533e-05, "loss": 0.0156, "step": 61420 }, { "epoch": 23.86, "learning_rate": 1.681915857605178e-05, "loss": 0.1022, "step": 61430 }, { "epoch": 23.86, "learning_rate": 1.6818640776699032e-05, "loss": 0.0226, "step": 61440 }, { "epoch": 23.86, "learning_rate": 1.681812297734628e-05, "loss": 0.119, "step": 61450 }, { "epoch": 23.87, "learning_rate": 1.681760517799353e-05, "loss": 0.1717, "step": 61460 }, { "epoch": 23.87, "learning_rate": 1.6817087378640776e-05, "loss": 0.0109, "step": 61470 }, { "epoch": 23.88, "learning_rate": 1.6816569579288028e-05, "loss": 0.1251, "step": 61480 }, { "epoch": 23.88, "learning_rate": 1.6816051779935275e-05, "loss": 0.2281, "step": 61490 }, { "epoch": 23.88, "learning_rate": 1.6815533980582527e-05, "loss": 0.0369, "step": 61500 }, { "epoch": 23.89, "learning_rate": 1.6815016181229775e-05, "loss": 0.1574, "step": 61510 }, { "epoch": 23.89, "learning_rate": 1.6814498381877026e-05, "loss": 0.1556, "step": 61520 }, { "epoch": 23.9, "learning_rate": 1.6813980582524274e-05, "loss": 0.1218, "step": 61530 }, { "epoch": 23.9, "learning_rate": 1.6813462783171522e-05, "loss": 0.1425, "step": 61540 }, { "epoch": 23.9, "learning_rate": 1.681294498381877e-05, "loss": 0.1045, "step": 61550 }, { "epoch": 23.91, "learning_rate": 1.681242718446602e-05, "loss": 0.1001, "step": 61560 }, { "epoch": 23.91, "learning_rate": 1.681190938511327e-05, "loss": 0.0289, "step": 61570 }, { "epoch": 23.91, "learning_rate": 1.681139158576052e-05, "loss": 0.0516, "step": 61580 }, { "epoch": 23.92, "learning_rate": 1.6810873786407768e-05, "loss": 0.1593, "step": 61590 }, { "epoch": 23.92, "learning_rate": 1.681035598705502e-05, "loss": 0.1676, "step": 61600 }, { "epoch": 23.93, "learning_rate": 1.6809838187702268e-05, "loss": 0.1557, "step": 61610 }, { "epoch": 23.93, "learning_rate": 1.6809320388349515e-05, "loss": 0.0409, "step": 61620 }, { "epoch": 23.93, "learning_rate": 1.6808802588996763e-05, "loss": 0.1406, "step": 61630 }, { "epoch": 23.94, "learning_rate": 1.6808284789644015e-05, "loss": 0.1498, "step": 61640 }, { "epoch": 23.94, "learning_rate": 1.6807766990291263e-05, "loss": 0.115, "step": 61650 }, { "epoch": 23.95, "learning_rate": 1.6807249190938514e-05, "loss": 0.1659, "step": 61660 }, { "epoch": 23.95, "learning_rate": 1.6806731391585762e-05, "loss": 0.2063, "step": 61670 }, { "epoch": 23.95, "learning_rate": 1.6806213592233013e-05, "loss": 0.0548, "step": 61680 }, { "epoch": 23.96, "learning_rate": 1.680569579288026e-05, "loss": 0.1658, "step": 61690 }, { "epoch": 23.96, "learning_rate": 1.680517799352751e-05, "loss": 0.1888, "step": 61700 }, { "epoch": 23.97, "learning_rate": 1.6804660194174757e-05, "loss": 0.0352, "step": 61710 }, { "epoch": 23.97, "learning_rate": 1.6804142394822008e-05, "loss": 0.0776, "step": 61720 }, { "epoch": 23.97, "learning_rate": 1.6803624595469256e-05, "loss": 0.0365, "step": 61730 }, { "epoch": 23.98, "learning_rate": 1.6803106796116507e-05, "loss": 0.0237, "step": 61740 }, { "epoch": 23.98, "learning_rate": 1.6802588996763755e-05, "loss": 0.1171, "step": 61750 }, { "epoch": 23.98, "learning_rate": 1.6802071197411007e-05, "loss": 0.0952, "step": 61760 }, { "epoch": 23.99, "learning_rate": 1.6801553398058255e-05, "loss": 0.2077, "step": 61770 }, { "epoch": 23.99, "learning_rate": 1.6801035598705503e-05, "loss": 0.0616, "step": 61780 }, { "epoch": 24.0, "learning_rate": 1.680051779935275e-05, "loss": 0.084, "step": 61790 }, { "epoch": 24.0, "learning_rate": 1.6800000000000002e-05, "loss": 0.1737, "step": 61800 }, { "epoch": 24.0, "eval_accuracy": 0.9540577716643741, "eval_loss": 0.2604363262653351, "eval_runtime": 8.2035, "eval_samples_per_second": 443.105, "eval_steps_per_second": 55.464, "step": 61800 }, { "epoch": 24.0, "learning_rate": 1.679948220064725e-05, "loss": 0.0809, "step": 61810 }, { "epoch": 24.01, "learning_rate": 1.67989644012945e-05, "loss": 0.0872, "step": 61820 }, { "epoch": 24.01, "learning_rate": 1.679844660194175e-05, "loss": 0.1132, "step": 61830 }, { "epoch": 24.02, "learning_rate": 1.6797928802589e-05, "loss": 0.0593, "step": 61840 }, { "epoch": 24.02, "learning_rate": 1.6797411003236248e-05, "loss": 0.0435, "step": 61850 }, { "epoch": 24.02, "learning_rate": 1.6796893203883496e-05, "loss": 0.1239, "step": 61860 }, { "epoch": 24.03, "learning_rate": 1.6796375404530744e-05, "loss": 0.1746, "step": 61870 }, { "epoch": 24.03, "learning_rate": 1.6795857605177995e-05, "loss": 0.04, "step": 61880 }, { "epoch": 24.03, "learning_rate": 1.6795339805825243e-05, "loss": 0.0976, "step": 61890 }, { "epoch": 24.04, "learning_rate": 1.6794822006472495e-05, "loss": 0.0564, "step": 61900 }, { "epoch": 24.04, "learning_rate": 1.6794304207119742e-05, "loss": 0.0306, "step": 61910 }, { "epoch": 24.05, "learning_rate": 1.679378640776699e-05, "loss": 0.092, "step": 61920 }, { "epoch": 24.05, "learning_rate": 1.6793268608414242e-05, "loss": 0.2796, "step": 61930 }, { "epoch": 24.05, "learning_rate": 1.679275080906149e-05, "loss": 0.024, "step": 61940 }, { "epoch": 24.06, "learning_rate": 1.6792233009708738e-05, "loss": 0.1511, "step": 61950 }, { "epoch": 24.06, "learning_rate": 1.679171521035599e-05, "loss": 0.0632, "step": 61960 }, { "epoch": 24.07, "learning_rate": 1.6791197411003237e-05, "loss": 0.1061, "step": 61970 }, { "epoch": 24.07, "learning_rate": 1.6790679611650488e-05, "loss": 0.0479, "step": 61980 }, { "epoch": 24.07, "learning_rate": 1.6790161812297736e-05, "loss": 0.081, "step": 61990 }, { "epoch": 24.08, "learning_rate": 1.6789644012944984e-05, "loss": 0.0802, "step": 62000 }, { "epoch": 24.08, "learning_rate": 1.6789126213592235e-05, "loss": 0.126, "step": 62010 }, { "epoch": 24.09, "learning_rate": 1.6788608414239483e-05, "loss": 0.0588, "step": 62020 }, { "epoch": 24.09, "learning_rate": 1.678809061488673e-05, "loss": 0.0035, "step": 62030 }, { "epoch": 24.09, "learning_rate": 1.6787572815533982e-05, "loss": 0.1253, "step": 62040 }, { "epoch": 24.1, "learning_rate": 1.678705501618123e-05, "loss": 0.1538, "step": 62050 }, { "epoch": 24.1, "learning_rate": 1.678653721682848e-05, "loss": 0.0946, "step": 62060 }, { "epoch": 24.1, "learning_rate": 1.678601941747573e-05, "loss": 0.2093, "step": 62070 }, { "epoch": 24.11, "learning_rate": 1.6785501618122978e-05, "loss": 0.0491, "step": 62080 }, { "epoch": 24.11, "learning_rate": 1.678498381877023e-05, "loss": 0.1637, "step": 62090 }, { "epoch": 24.12, "learning_rate": 1.6784466019417477e-05, "loss": 0.1581, "step": 62100 }, { "epoch": 24.12, "learning_rate": 1.6783948220064725e-05, "loss": 0.082, "step": 62110 }, { "epoch": 24.12, "learning_rate": 1.6783430420711976e-05, "loss": 0.0302, "step": 62120 }, { "epoch": 24.13, "learning_rate": 1.6782912621359224e-05, "loss": 0.21, "step": 62130 }, { "epoch": 24.13, "learning_rate": 1.6782394822006475e-05, "loss": 0.0802, "step": 62140 }, { "epoch": 24.14, "learning_rate": 1.6781877022653723e-05, "loss": 0.1112, "step": 62150 }, { "epoch": 24.14, "learning_rate": 1.678135922330097e-05, "loss": 0.1191, "step": 62160 }, { "epoch": 24.14, "learning_rate": 1.6780841423948222e-05, "loss": 0.1874, "step": 62170 }, { "epoch": 24.15, "learning_rate": 1.678032362459547e-05, "loss": 0.0778, "step": 62180 }, { "epoch": 24.15, "learning_rate": 1.6779805825242718e-05, "loss": 0.2343, "step": 62190 }, { "epoch": 24.16, "learning_rate": 1.677928802588997e-05, "loss": 0.08, "step": 62200 }, { "epoch": 24.16, "learning_rate": 1.6778770226537217e-05, "loss": 0.351, "step": 62210 }, { "epoch": 24.16, "learning_rate": 1.677825242718447e-05, "loss": 0.1439, "step": 62220 }, { "epoch": 24.17, "learning_rate": 1.6777734627831717e-05, "loss": 0.0431, "step": 62230 }, { "epoch": 24.17, "learning_rate": 1.6777216828478965e-05, "loss": 0.1222, "step": 62240 }, { "epoch": 24.17, "learning_rate": 1.6776699029126216e-05, "loss": 0.1403, "step": 62250 }, { "epoch": 24.18, "learning_rate": 1.6776181229773464e-05, "loss": 0.0754, "step": 62260 }, { "epoch": 24.18, "learning_rate": 1.6775663430420715e-05, "loss": 0.1512, "step": 62270 }, { "epoch": 24.19, "learning_rate": 1.6775145631067963e-05, "loss": 0.0886, "step": 62280 }, { "epoch": 24.19, "learning_rate": 1.677462783171521e-05, "loss": 0.1103, "step": 62290 }, { "epoch": 24.19, "learning_rate": 1.677411003236246e-05, "loss": 0.0549, "step": 62300 }, { "epoch": 24.2, "learning_rate": 1.677359223300971e-05, "loss": 0.0647, "step": 62310 }, { "epoch": 24.2, "learning_rate": 1.6773074433656958e-05, "loss": 0.0281, "step": 62320 }, { "epoch": 24.21, "learning_rate": 1.677255663430421e-05, "loss": 0.1221, "step": 62330 }, { "epoch": 24.21, "learning_rate": 1.6772038834951457e-05, "loss": 0.0897, "step": 62340 }, { "epoch": 24.21, "learning_rate": 1.677152103559871e-05, "loss": 0.2709, "step": 62350 }, { "epoch": 24.22, "learning_rate": 1.6771003236245957e-05, "loss": 0.0977, "step": 62360 }, { "epoch": 24.22, "learning_rate": 1.6770485436893205e-05, "loss": 0.1195, "step": 62370 }, { "epoch": 24.23, "learning_rate": 1.6769967637540453e-05, "loss": 0.1038, "step": 62380 }, { "epoch": 24.23, "learning_rate": 1.6769449838187704e-05, "loss": 0.0849, "step": 62390 }, { "epoch": 24.23, "learning_rate": 1.6768932038834952e-05, "loss": 0.0767, "step": 62400 }, { "epoch": 24.24, "learning_rate": 1.6768414239482203e-05, "loss": 0.0058, "step": 62410 }, { "epoch": 24.24, "learning_rate": 1.676789644012945e-05, "loss": 0.0556, "step": 62420 }, { "epoch": 24.24, "learning_rate": 1.6767378640776702e-05, "loss": 0.1618, "step": 62430 }, { "epoch": 24.25, "learning_rate": 1.676686084142395e-05, "loss": 0.0622, "step": 62440 }, { "epoch": 24.25, "learning_rate": 1.6766343042071198e-05, "loss": 0.3328, "step": 62450 }, { "epoch": 24.26, "learning_rate": 1.6765825242718446e-05, "loss": 0.1226, "step": 62460 }, { "epoch": 24.26, "learning_rate": 1.6765307443365697e-05, "loss": 0.1369, "step": 62470 }, { "epoch": 24.26, "learning_rate": 1.6764789644012945e-05, "loss": 0.0765, "step": 62480 }, { "epoch": 24.27, "learning_rate": 1.6764271844660197e-05, "loss": 0.0235, "step": 62490 }, { "epoch": 24.27, "learning_rate": 1.6763754045307445e-05, "loss": 0.1501, "step": 62500 }, { "epoch": 24.28, "learning_rate": 1.6763236245954696e-05, "loss": 0.0727, "step": 62510 }, { "epoch": 24.28, "learning_rate": 1.6762718446601944e-05, "loss": 0.1114, "step": 62520 }, { "epoch": 24.28, "learning_rate": 1.6762200647249192e-05, "loss": 0.1382, "step": 62530 }, { "epoch": 24.29, "learning_rate": 1.676168284789644e-05, "loss": 0.0102, "step": 62540 }, { "epoch": 24.29, "learning_rate": 1.676116504854369e-05, "loss": 0.0511, "step": 62550 }, { "epoch": 24.3, "learning_rate": 1.676064724919094e-05, "loss": 0.0948, "step": 62560 }, { "epoch": 24.3, "learning_rate": 1.676012944983819e-05, "loss": 0.055, "step": 62570 }, { "epoch": 24.3, "learning_rate": 1.6759611650485438e-05, "loss": 0.0788, "step": 62580 }, { "epoch": 24.31, "learning_rate": 1.675909385113269e-05, "loss": 0.0319, "step": 62590 }, { "epoch": 24.31, "learning_rate": 1.6758576051779934e-05, "loss": 0.1356, "step": 62600 }, { "epoch": 24.31, "learning_rate": 1.6758058252427185e-05, "loss": 0.0786, "step": 62610 }, { "epoch": 24.32, "learning_rate": 1.6757540453074433e-05, "loss": 0.2069, "step": 62620 }, { "epoch": 24.32, "learning_rate": 1.6757022653721685e-05, "loss": 0.0171, "step": 62630 }, { "epoch": 24.33, "learning_rate": 1.6756504854368932e-05, "loss": 0.0735, "step": 62640 }, { "epoch": 24.33, "learning_rate": 1.6755987055016184e-05, "loss": 0.0013, "step": 62650 }, { "epoch": 24.33, "learning_rate": 1.675546925566343e-05, "loss": 0.0037, "step": 62660 }, { "epoch": 24.34, "learning_rate": 1.6754951456310683e-05, "loss": 0.0525, "step": 62670 }, { "epoch": 24.34, "learning_rate": 1.6754433656957928e-05, "loss": 0.105, "step": 62680 }, { "epoch": 24.35, "learning_rate": 1.675391585760518e-05, "loss": 0.0305, "step": 62690 }, { "epoch": 24.35, "learning_rate": 1.6753398058252427e-05, "loss": 0.1546, "step": 62700 }, { "epoch": 24.35, "learning_rate": 1.6752880258899678e-05, "loss": 0.1686, "step": 62710 }, { "epoch": 24.36, "learning_rate": 1.6752362459546926e-05, "loss": 0.1888, "step": 62720 }, { "epoch": 24.36, "learning_rate": 1.6751844660194177e-05, "loss": 0.0997, "step": 62730 }, { "epoch": 24.37, "learning_rate": 1.6751326860841425e-05, "loss": 0.1032, "step": 62740 }, { "epoch": 24.37, "learning_rate": 1.6750809061488677e-05, "loss": 0.0866, "step": 62750 }, { "epoch": 24.37, "learning_rate": 1.675029126213592e-05, "loss": 0.0618, "step": 62760 }, { "epoch": 24.38, "learning_rate": 1.6749773462783172e-05, "loss": 0.1807, "step": 62770 }, { "epoch": 24.38, "learning_rate": 1.674925566343042e-05, "loss": 0.2234, "step": 62780 }, { "epoch": 24.38, "learning_rate": 1.674873786407767e-05, "loss": 0.0442, "step": 62790 }, { "epoch": 24.39, "learning_rate": 1.674822006472492e-05, "loss": 0.1047, "step": 62800 }, { "epoch": 24.39, "learning_rate": 1.674770226537217e-05, "loss": 0.0304, "step": 62810 }, { "epoch": 24.4, "learning_rate": 1.674718446601942e-05, "loss": 0.1364, "step": 62820 }, { "epoch": 24.4, "learning_rate": 1.674666666666667e-05, "loss": 0.0706, "step": 62830 }, { "epoch": 24.4, "learning_rate": 1.6746148867313918e-05, "loss": 0.0764, "step": 62840 }, { "epoch": 24.41, "learning_rate": 1.6745631067961166e-05, "loss": 0.1014, "step": 62850 }, { "epoch": 24.41, "learning_rate": 1.6745113268608414e-05, "loss": 0.1057, "step": 62860 }, { "epoch": 24.42, "learning_rate": 1.6744595469255665e-05, "loss": 0.1007, "step": 62870 }, { "epoch": 24.42, "learning_rate": 1.6744077669902913e-05, "loss": 0.0548, "step": 62880 }, { "epoch": 24.42, "learning_rate": 1.6743559870550164e-05, "loss": 0.244, "step": 62890 }, { "epoch": 24.43, "learning_rate": 1.6743042071197412e-05, "loss": 0.0481, "step": 62900 }, { "epoch": 24.43, "learning_rate": 1.6742524271844664e-05, "loss": 0.1096, "step": 62910 }, { "epoch": 24.43, "learning_rate": 1.674200647249191e-05, "loss": 0.1351, "step": 62920 }, { "epoch": 24.44, "learning_rate": 1.674148867313916e-05, "loss": 0.2504, "step": 62930 }, { "epoch": 24.44, "learning_rate": 1.6740970873786407e-05, "loss": 0.0355, "step": 62940 }, { "epoch": 24.45, "learning_rate": 1.674045307443366e-05, "loss": 0.1462, "step": 62950 }, { "epoch": 24.45, "learning_rate": 1.6739935275080907e-05, "loss": 0.0376, "step": 62960 }, { "epoch": 24.45, "learning_rate": 1.6739417475728158e-05, "loss": 0.1971, "step": 62970 }, { "epoch": 24.46, "learning_rate": 1.6738899676375406e-05, "loss": 0.0717, "step": 62980 }, { "epoch": 24.46, "learning_rate": 1.6738381877022657e-05, "loss": 0.1046, "step": 62990 }, { "epoch": 24.47, "learning_rate": 1.6737864077669905e-05, "loss": 0.0817, "step": 63000 }, { "epoch": 24.47, "learning_rate": 1.6737346278317153e-05, "loss": 0.0532, "step": 63010 }, { "epoch": 24.47, "learning_rate": 1.67368284789644e-05, "loss": 0.0794, "step": 63020 }, { "epoch": 24.48, "learning_rate": 1.6736310679611652e-05, "loss": 0.0313, "step": 63030 }, { "epoch": 24.48, "learning_rate": 1.67357928802589e-05, "loss": 0.0028, "step": 63040 }, { "epoch": 24.49, "learning_rate": 1.673527508090615e-05, "loss": 0.019, "step": 63050 }, { "epoch": 24.49, "learning_rate": 1.67347572815534e-05, "loss": 0.1981, "step": 63060 }, { "epoch": 24.49, "learning_rate": 1.673423948220065e-05, "loss": 0.2145, "step": 63070 }, { "epoch": 24.5, "learning_rate": 1.67337216828479e-05, "loss": 0.0765, "step": 63080 }, { "epoch": 24.5, "learning_rate": 1.6733203883495147e-05, "loss": 0.0076, "step": 63090 }, { "epoch": 24.5, "learning_rate": 1.6732686084142395e-05, "loss": 0.1483, "step": 63100 }, { "epoch": 24.51, "learning_rate": 1.6732168284789646e-05, "loss": 0.2438, "step": 63110 }, { "epoch": 24.51, "learning_rate": 1.6731650485436894e-05, "loss": 0.047, "step": 63120 }, { "epoch": 24.52, "learning_rate": 1.6731132686084145e-05, "loss": 0.1352, "step": 63130 }, { "epoch": 24.52, "learning_rate": 1.6730614886731393e-05, "loss": 0.2002, "step": 63140 }, { "epoch": 24.52, "learning_rate": 1.6730097087378644e-05, "loss": 0.2107, "step": 63150 }, { "epoch": 24.53, "learning_rate": 1.6729579288025892e-05, "loss": 0.0021, "step": 63160 }, { "epoch": 24.53, "learning_rate": 1.672906148867314e-05, "loss": 0.0722, "step": 63170 }, { "epoch": 24.54, "learning_rate": 1.6728543689320388e-05, "loss": 0.1017, "step": 63180 }, { "epoch": 24.54, "learning_rate": 1.672802588996764e-05, "loss": 0.064, "step": 63190 }, { "epoch": 24.54, "learning_rate": 1.6727508090614887e-05, "loss": 0.0644, "step": 63200 }, { "epoch": 24.55, "learning_rate": 1.672699029126214e-05, "loss": 0.1606, "step": 63210 }, { "epoch": 24.55, "learning_rate": 1.6726472491909387e-05, "loss": 0.1427, "step": 63220 }, { "epoch": 24.56, "learning_rate": 1.6725954692556638e-05, "loss": 0.0585, "step": 63230 }, { "epoch": 24.56, "learning_rate": 1.6725436893203886e-05, "loss": 0.0679, "step": 63240 }, { "epoch": 24.56, "learning_rate": 1.6724919093851134e-05, "loss": 0.2347, "step": 63250 }, { "epoch": 24.57, "learning_rate": 1.672440129449838e-05, "loss": 0.1291, "step": 63260 }, { "epoch": 24.57, "learning_rate": 1.6723883495145633e-05, "loss": 0.0709, "step": 63270 }, { "epoch": 24.57, "learning_rate": 1.672336569579288e-05, "loss": 0.0474, "step": 63280 }, { "epoch": 24.58, "learning_rate": 1.6722847896440132e-05, "loss": 0.296, "step": 63290 }, { "epoch": 24.58, "learning_rate": 1.672233009708738e-05, "loss": 0.1627, "step": 63300 }, { "epoch": 24.59, "learning_rate": 1.672181229773463e-05, "loss": 0.1216, "step": 63310 }, { "epoch": 24.59, "learning_rate": 1.672129449838188e-05, "loss": 0.0521, "step": 63320 }, { "epoch": 24.59, "learning_rate": 1.6720776699029127e-05, "loss": 0.1048, "step": 63330 }, { "epoch": 24.6, "learning_rate": 1.6720258899676375e-05, "loss": 0.0313, "step": 63340 }, { "epoch": 24.6, "learning_rate": 1.6719741100323627e-05, "loss": 0.1454, "step": 63350 }, { "epoch": 24.61, "learning_rate": 1.6719223300970874e-05, "loss": 0.1352, "step": 63360 }, { "epoch": 24.61, "learning_rate": 1.6718705501618126e-05, "loss": 0.1288, "step": 63370 }, { "epoch": 24.61, "learning_rate": 1.6718187702265374e-05, "loss": 0.1786, "step": 63380 }, { "epoch": 24.62, "learning_rate": 1.671766990291262e-05, "loss": 0.2206, "step": 63390 }, { "epoch": 24.62, "learning_rate": 1.6717152103559873e-05, "loss": 0.1606, "step": 63400 }, { "epoch": 24.63, "learning_rate": 1.671663430420712e-05, "loss": 0.0036, "step": 63410 }, { "epoch": 24.63, "learning_rate": 1.671611650485437e-05, "loss": 0.1591, "step": 63420 }, { "epoch": 24.63, "learning_rate": 1.671559870550162e-05, "loss": 0.0335, "step": 63430 }, { "epoch": 24.64, "learning_rate": 1.6715080906148868e-05, "loss": 0.0819, "step": 63440 }, { "epoch": 24.64, "learning_rate": 1.671456310679612e-05, "loss": 0.0852, "step": 63450 }, { "epoch": 24.64, "learning_rate": 1.6714045307443367e-05, "loss": 0.1022, "step": 63460 }, { "epoch": 24.65, "learning_rate": 1.6713527508090615e-05, "loss": 0.0267, "step": 63470 }, { "epoch": 24.65, "learning_rate": 1.6713009708737866e-05, "loss": 0.0603, "step": 63480 }, { "epoch": 24.66, "learning_rate": 1.6712491909385114e-05, "loss": 0.2353, "step": 63490 }, { "epoch": 24.66, "learning_rate": 1.6711974110032362e-05, "loss": 0.0822, "step": 63500 }, { "epoch": 24.66, "learning_rate": 1.6711456310679614e-05, "loss": 0.0855, "step": 63510 }, { "epoch": 24.67, "learning_rate": 1.671093851132686e-05, "loss": 0.1538, "step": 63520 }, { "epoch": 24.67, "learning_rate": 1.6710420711974113e-05, "loss": 0.0849, "step": 63530 }, { "epoch": 24.68, "learning_rate": 1.670990291262136e-05, "loss": 0.0719, "step": 63540 }, { "epoch": 24.68, "learning_rate": 1.670938511326861e-05, "loss": 0.1341, "step": 63550 }, { "epoch": 24.68, "learning_rate": 1.670886731391586e-05, "loss": 0.1292, "step": 63560 }, { "epoch": 24.69, "learning_rate": 1.6708349514563108e-05, "loss": 0.0485, "step": 63570 }, { "epoch": 24.69, "learning_rate": 1.6707831715210356e-05, "loss": 0.2329, "step": 63580 }, { "epoch": 24.7, "learning_rate": 1.6707313915857607e-05, "loss": 0.017, "step": 63590 }, { "epoch": 24.7, "learning_rate": 1.6706796116504855e-05, "loss": 0.0549, "step": 63600 }, { "epoch": 24.7, "learning_rate": 1.6706278317152106e-05, "loss": 0.0463, "step": 63610 }, { "epoch": 24.71, "learning_rate": 1.6705760517799354e-05, "loss": 0.0924, "step": 63620 }, { "epoch": 24.71, "learning_rate": 1.6705242718446602e-05, "loss": 0.0588, "step": 63630 }, { "epoch": 24.71, "learning_rate": 1.6704724919093854e-05, "loss": 0.0534, "step": 63640 }, { "epoch": 24.72, "learning_rate": 1.67042071197411e-05, "loss": 0.1262, "step": 63650 }, { "epoch": 24.72, "learning_rate": 1.670368932038835e-05, "loss": 0.1905, "step": 63660 }, { "epoch": 24.73, "learning_rate": 1.67031715210356e-05, "loss": 0.0144, "step": 63670 }, { "epoch": 24.73, "learning_rate": 1.670265372168285e-05, "loss": 0.1623, "step": 63680 }, { "epoch": 24.73, "learning_rate": 1.67021359223301e-05, "loss": 0.173, "step": 63690 }, { "epoch": 24.74, "learning_rate": 1.6701618122977348e-05, "loss": 0.1667, "step": 63700 }, { "epoch": 24.74, "learning_rate": 1.6701100323624596e-05, "loss": 0.206, "step": 63710 }, { "epoch": 24.75, "learning_rate": 1.6700582524271847e-05, "loss": 0.0932, "step": 63720 }, { "epoch": 24.75, "learning_rate": 1.6700064724919095e-05, "loss": 0.138, "step": 63730 }, { "epoch": 24.75, "learning_rate": 1.6699546925566343e-05, "loss": 0.1341, "step": 63740 }, { "epoch": 24.76, "learning_rate": 1.6699029126213594e-05, "loss": 0.1059, "step": 63750 }, { "epoch": 24.76, "learning_rate": 1.6698511326860842e-05, "loss": 0.0523, "step": 63760 }, { "epoch": 24.77, "learning_rate": 1.669799352750809e-05, "loss": 0.081, "step": 63770 }, { "epoch": 24.77, "learning_rate": 1.669747572815534e-05, "loss": 0.1879, "step": 63780 }, { "epoch": 24.77, "learning_rate": 1.669695792880259e-05, "loss": 0.0754, "step": 63790 }, { "epoch": 24.78, "learning_rate": 1.669644012944984e-05, "loss": 0.0109, "step": 63800 }, { "epoch": 24.78, "learning_rate": 1.669592233009709e-05, "loss": 0.0914, "step": 63810 }, { "epoch": 24.78, "learning_rate": 1.6695404530744337e-05, "loss": 0.1208, "step": 63820 }, { "epoch": 24.79, "learning_rate": 1.6694886731391588e-05, "loss": 0.0261, "step": 63830 }, { "epoch": 24.79, "learning_rate": 1.6694368932038836e-05, "loss": 0.1462, "step": 63840 }, { "epoch": 24.8, "learning_rate": 1.6693851132686084e-05, "loss": 0.1745, "step": 63850 }, { "epoch": 24.8, "learning_rate": 1.6693333333333335e-05, "loss": 0.0653, "step": 63860 }, { "epoch": 24.8, "learning_rate": 1.6692815533980583e-05, "loss": 0.0236, "step": 63870 }, { "epoch": 24.81, "learning_rate": 1.6692297734627834e-05, "loss": 0.1402, "step": 63880 }, { "epoch": 24.81, "learning_rate": 1.6691779935275082e-05, "loss": 0.2828, "step": 63890 }, { "epoch": 24.82, "learning_rate": 1.669126213592233e-05, "loss": 0.1055, "step": 63900 }, { "epoch": 24.82, "learning_rate": 1.669074433656958e-05, "loss": 0.0644, "step": 63910 }, { "epoch": 24.82, "learning_rate": 1.669022653721683e-05, "loss": 0.1536, "step": 63920 }, { "epoch": 24.83, "learning_rate": 1.6689708737864077e-05, "loss": 0.1238, "step": 63930 }, { "epoch": 24.83, "learning_rate": 1.668919093851133e-05, "loss": 0.1705, "step": 63940 }, { "epoch": 24.83, "learning_rate": 1.6688673139158576e-05, "loss": 0.1166, "step": 63950 }, { "epoch": 24.84, "learning_rate": 1.6688155339805828e-05, "loss": 0.1161, "step": 63960 }, { "epoch": 24.84, "learning_rate": 1.6687637540453076e-05, "loss": 0.2066, "step": 63970 }, { "epoch": 24.85, "learning_rate": 1.6687119741100327e-05, "loss": 0.1563, "step": 63980 }, { "epoch": 24.85, "learning_rate": 1.6686601941747575e-05, "loss": 0.0902, "step": 63990 }, { "epoch": 24.85, "learning_rate": 1.6686084142394823e-05, "loss": 0.053, "step": 64000 }, { "epoch": 24.86, "learning_rate": 1.668556634304207e-05, "loss": 0.0014, "step": 64010 }, { "epoch": 24.86, "learning_rate": 1.6685048543689322e-05, "loss": 0.2784, "step": 64020 }, { "epoch": 24.87, "learning_rate": 1.668453074433657e-05, "loss": 0.1118, "step": 64030 }, { "epoch": 24.87, "learning_rate": 1.668401294498382e-05, "loss": 0.0675, "step": 64040 }, { "epoch": 24.87, "learning_rate": 1.668349514563107e-05, "loss": 0.1076, "step": 64050 }, { "epoch": 24.88, "learning_rate": 1.668297734627832e-05, "loss": 0.1033, "step": 64060 }, { "epoch": 24.88, "learning_rate": 1.6682459546925565e-05, "loss": 0.1769, "step": 64070 }, { "epoch": 24.89, "learning_rate": 1.6681941747572816e-05, "loss": 0.0551, "step": 64080 }, { "epoch": 24.89, "learning_rate": 1.6681423948220064e-05, "loss": 0.1376, "step": 64090 }, { "epoch": 24.89, "learning_rate": 1.6680906148867316e-05, "loss": 0.1855, "step": 64100 }, { "epoch": 24.9, "learning_rate": 1.6680388349514564e-05, "loss": 0.0208, "step": 64110 }, { "epoch": 24.9, "learning_rate": 1.6679870550161815e-05, "loss": 0.1238, "step": 64120 }, { "epoch": 24.9, "learning_rate": 1.6679352750809063e-05, "loss": 0.0856, "step": 64130 }, { "epoch": 24.91, "learning_rate": 1.6678834951456314e-05, "loss": 0.05, "step": 64140 }, { "epoch": 24.91, "learning_rate": 1.667831715210356e-05, "loss": 0.1858, "step": 64150 }, { "epoch": 24.92, "learning_rate": 1.667779935275081e-05, "loss": 0.0596, "step": 64160 }, { "epoch": 24.92, "learning_rate": 1.6677281553398058e-05, "loss": 0.06, "step": 64170 }, { "epoch": 24.92, "learning_rate": 1.667676375404531e-05, "loss": 0.0901, "step": 64180 }, { "epoch": 24.93, "learning_rate": 1.6676245954692557e-05, "loss": 0.0521, "step": 64190 }, { "epoch": 24.93, "learning_rate": 1.667572815533981e-05, "loss": 0.19, "step": 64200 }, { "epoch": 24.94, "learning_rate": 1.6675210355987056e-05, "loss": 0.0783, "step": 64210 }, { "epoch": 24.94, "learning_rate": 1.6674692556634308e-05, "loss": 0.0445, "step": 64220 }, { "epoch": 24.94, "learning_rate": 1.6674174757281552e-05, "loss": 0.2725, "step": 64230 }, { "epoch": 24.95, "learning_rate": 1.6673656957928804e-05, "loss": 0.0178, "step": 64240 }, { "epoch": 24.95, "learning_rate": 1.667313915857605e-05, "loss": 0.0501, "step": 64250 }, { "epoch": 24.96, "learning_rate": 1.6672621359223303e-05, "loss": 0.0562, "step": 64260 }, { "epoch": 24.96, "learning_rate": 1.667210355987055e-05, "loss": 0.1321, "step": 64270 }, { "epoch": 24.96, "learning_rate": 1.6671585760517802e-05, "loss": 0.0311, "step": 64280 }, { "epoch": 24.97, "learning_rate": 1.667106796116505e-05, "loss": 0.0921, "step": 64290 }, { "epoch": 24.97, "learning_rate": 1.66705501618123e-05, "loss": 0.1184, "step": 64300 }, { "epoch": 24.97, "learning_rate": 1.6670032362459546e-05, "loss": 0.0634, "step": 64310 }, { "epoch": 24.98, "learning_rate": 1.6669514563106797e-05, "loss": 0.2518, "step": 64320 }, { "epoch": 24.98, "learning_rate": 1.6668996763754045e-05, "loss": 0.0272, "step": 64330 }, { "epoch": 24.99, "learning_rate": 1.6668478964401296e-05, "loss": 0.1468, "step": 64340 }, { "epoch": 24.99, "learning_rate": 1.6667961165048544e-05, "loss": 0.1336, "step": 64350 }, { "epoch": 24.99, "learning_rate": 1.6667443365695796e-05, "loss": 0.1145, "step": 64360 }, { "epoch": 25.0, "learning_rate": 1.6666925566343044e-05, "loss": 0.0275, "step": 64370 }, { "epoch": 25.0, "eval_accuracy": 0.951031636863824, "eval_loss": 0.26248398423194885, "eval_runtime": 8.3281, "eval_samples_per_second": 436.473, "eval_steps_per_second": 54.634, "step": 64375 }, { "epoch": 25.0, "learning_rate": 1.6666407766990295e-05, "loss": 0.0285, "step": 64380 }, { "epoch": 25.01, "learning_rate": 1.666588996763754e-05, "loss": 0.1049, "step": 64390 }, { "epoch": 25.01, "learning_rate": 1.666537216828479e-05, "loss": 0.1624, "step": 64400 }, { "epoch": 25.01, "learning_rate": 1.666485436893204e-05, "loss": 0.0456, "step": 64410 }, { "epoch": 25.02, "learning_rate": 1.666433656957929e-05, "loss": 0.2012, "step": 64420 }, { "epoch": 25.02, "learning_rate": 1.6663818770226538e-05, "loss": 0.2304, "step": 64430 }, { "epoch": 25.03, "learning_rate": 1.666330097087379e-05, "loss": 0.2281, "step": 64440 }, { "epoch": 25.03, "learning_rate": 1.6662783171521037e-05, "loss": 0.0724, "step": 64450 }, { "epoch": 25.03, "learning_rate": 1.666226537216829e-05, "loss": 0.0483, "step": 64460 }, { "epoch": 25.04, "learning_rate": 1.6661747572815536e-05, "loss": 0.1143, "step": 64470 }, { "epoch": 25.04, "learning_rate": 1.6661229773462784e-05, "loss": 0.0603, "step": 64480 }, { "epoch": 25.04, "learning_rate": 1.6660711974110032e-05, "loss": 0.1586, "step": 64490 }, { "epoch": 25.05, "learning_rate": 1.6660194174757283e-05, "loss": 0.0503, "step": 64500 }, { "epoch": 25.05, "learning_rate": 1.665967637540453e-05, "loss": 0.0187, "step": 64510 }, { "epoch": 25.06, "learning_rate": 1.6659158576051783e-05, "loss": 0.1354, "step": 64520 }, { "epoch": 25.06, "learning_rate": 1.665864077669903e-05, "loss": 0.1974, "step": 64530 }, { "epoch": 25.06, "learning_rate": 1.6658122977346282e-05, "loss": 0.1636, "step": 64540 }, { "epoch": 25.07, "learning_rate": 1.665760517799353e-05, "loss": 0.0756, "step": 64550 }, { "epoch": 25.07, "learning_rate": 1.6657087378640778e-05, "loss": 0.0205, "step": 64560 }, { "epoch": 25.08, "learning_rate": 1.6656569579288026e-05, "loss": 0.0723, "step": 64570 }, { "epoch": 25.08, "learning_rate": 1.6656051779935277e-05, "loss": 0.0924, "step": 64580 }, { "epoch": 25.08, "learning_rate": 1.6655533980582525e-05, "loss": 0.0205, "step": 64590 }, { "epoch": 25.09, "learning_rate": 1.6655016181229776e-05, "loss": 0.0645, "step": 64600 }, { "epoch": 25.09, "learning_rate": 1.6654498381877024e-05, "loss": 0.0473, "step": 64610 }, { "epoch": 25.1, "learning_rate": 1.6653980582524275e-05, "loss": 0.0309, "step": 64620 }, { "epoch": 25.1, "learning_rate": 1.6653462783171523e-05, "loss": 0.0681, "step": 64630 }, { "epoch": 25.1, "learning_rate": 1.665294498381877e-05, "loss": 0.1689, "step": 64640 }, { "epoch": 25.11, "learning_rate": 1.665242718446602e-05, "loss": 0.14, "step": 64650 }, { "epoch": 25.11, "learning_rate": 1.665190938511327e-05, "loss": 0.0514, "step": 64660 }, { "epoch": 25.11, "learning_rate": 1.665139158576052e-05, "loss": 0.0724, "step": 64670 }, { "epoch": 25.12, "learning_rate": 1.665087378640777e-05, "loss": 0.0684, "step": 64680 }, { "epoch": 25.12, "learning_rate": 1.6650355987055018e-05, "loss": 0.1625, "step": 64690 }, { "epoch": 25.13, "learning_rate": 1.664983818770227e-05, "loss": 0.0493, "step": 64700 }, { "epoch": 25.13, "learning_rate": 1.6649320388349517e-05, "loss": 0.1419, "step": 64710 }, { "epoch": 25.13, "learning_rate": 1.6648802588996765e-05, "loss": 0.0942, "step": 64720 }, { "epoch": 25.14, "learning_rate": 1.6648284789644013e-05, "loss": 0.2344, "step": 64730 }, { "epoch": 25.14, "learning_rate": 1.6647766990291264e-05, "loss": 0.0445, "step": 64740 }, { "epoch": 25.15, "learning_rate": 1.6647249190938512e-05, "loss": 0.0269, "step": 64750 }, { "epoch": 25.15, "learning_rate": 1.6646731391585763e-05, "loss": 0.0428, "step": 64760 }, { "epoch": 25.15, "learning_rate": 1.664621359223301e-05, "loss": 0.1814, "step": 64770 }, { "epoch": 25.16, "learning_rate": 1.6645695792880263e-05, "loss": 0.151, "step": 64780 }, { "epoch": 25.16, "learning_rate": 1.664517799352751e-05, "loss": 0.0667, "step": 64790 }, { "epoch": 25.17, "learning_rate": 1.664466019417476e-05, "loss": 0.1344, "step": 64800 }, { "epoch": 25.17, "learning_rate": 1.6644142394822006e-05, "loss": 0.1135, "step": 64810 }, { "epoch": 25.17, "learning_rate": 1.6643624595469258e-05, "loss": 0.0042, "step": 64820 }, { "epoch": 25.18, "learning_rate": 1.6643106796116506e-05, "loss": 0.0629, "step": 64830 }, { "epoch": 25.18, "learning_rate": 1.6642588996763757e-05, "loss": 0.2934, "step": 64840 }, { "epoch": 25.18, "learning_rate": 1.6642071197411005e-05, "loss": 0.112, "step": 64850 }, { "epoch": 25.19, "learning_rate": 1.6641553398058253e-05, "loss": 0.1309, "step": 64860 }, { "epoch": 25.19, "learning_rate": 1.6641035598705504e-05, "loss": 0.1322, "step": 64870 }, { "epoch": 25.2, "learning_rate": 1.6640517799352752e-05, "loss": 0.0448, "step": 64880 }, { "epoch": 25.2, "learning_rate": 1.664e-05, "loss": 0.0663, "step": 64890 }, { "epoch": 25.2, "learning_rate": 1.663948220064725e-05, "loss": 0.1879, "step": 64900 }, { "epoch": 25.21, "learning_rate": 1.66389644012945e-05, "loss": 0.0177, "step": 64910 }, { "epoch": 25.21, "learning_rate": 1.663844660194175e-05, "loss": 0.1343, "step": 64920 }, { "epoch": 25.22, "learning_rate": 1.6637928802589e-05, "loss": 0.1194, "step": 64930 }, { "epoch": 25.22, "learning_rate": 1.6637411003236246e-05, "loss": 0.1405, "step": 64940 }, { "epoch": 25.22, "learning_rate": 1.6636893203883498e-05, "loss": 0.1592, "step": 64950 }, { "epoch": 25.23, "learning_rate": 1.6636375404530746e-05, "loss": 0.0773, "step": 64960 }, { "epoch": 25.23, "learning_rate": 1.6635857605177993e-05, "loss": 0.0662, "step": 64970 }, { "epoch": 25.23, "learning_rate": 1.6635339805825245e-05, "loss": 0.1106, "step": 64980 }, { "epoch": 25.24, "learning_rate": 1.6634822006472493e-05, "loss": 0.2341, "step": 64990 }, { "epoch": 25.24, "learning_rate": 1.6634304207119744e-05, "loss": 0.1738, "step": 65000 }, { "epoch": 25.25, "learning_rate": 1.6633786407766992e-05, "loss": 0.1843, "step": 65010 }, { "epoch": 25.25, "learning_rate": 1.663326860841424e-05, "loss": 0.0805, "step": 65020 }, { "epoch": 25.25, "learning_rate": 1.663275080906149e-05, "loss": 0.0758, "step": 65030 }, { "epoch": 25.26, "learning_rate": 1.663223300970874e-05, "loss": 0.0712, "step": 65040 }, { "epoch": 25.26, "learning_rate": 1.6631715210355987e-05, "loss": 0.0525, "step": 65050 }, { "epoch": 25.27, "learning_rate": 1.663119741100324e-05, "loss": 0.0651, "step": 65060 }, { "epoch": 25.27, "learning_rate": 1.6630679611650486e-05, "loss": 0.0355, "step": 65070 }, { "epoch": 25.27, "learning_rate": 1.6630161812297738e-05, "loss": 0.1036, "step": 65080 }, { "epoch": 25.28, "learning_rate": 1.6629644012944986e-05, "loss": 0.2159, "step": 65090 }, { "epoch": 25.28, "learning_rate": 1.6629126213592233e-05, "loss": 0.1407, "step": 65100 }, { "epoch": 25.29, "learning_rate": 1.6628608414239485e-05, "loss": 0.088, "step": 65110 }, { "epoch": 25.29, "learning_rate": 1.6628090614886733e-05, "loss": 0.0139, "step": 65120 }, { "epoch": 25.29, "learning_rate": 1.662757281553398e-05, "loss": 0.1104, "step": 65130 }, { "epoch": 25.3, "learning_rate": 1.6627055016181232e-05, "loss": 0.2004, "step": 65140 }, { "epoch": 25.3, "learning_rate": 1.662653721682848e-05, "loss": 0.1432, "step": 65150 }, { "epoch": 25.3, "learning_rate": 1.662601941747573e-05, "loss": 0.0038, "step": 65160 }, { "epoch": 25.31, "learning_rate": 1.662550161812298e-05, "loss": 0.1546, "step": 65170 }, { "epoch": 25.31, "learning_rate": 1.6624983818770227e-05, "loss": 0.1645, "step": 65180 }, { "epoch": 25.32, "learning_rate": 1.6624466019417478e-05, "loss": 0.1445, "step": 65190 }, { "epoch": 25.32, "learning_rate": 1.6623948220064726e-05, "loss": 0.1014, "step": 65200 }, { "epoch": 25.32, "learning_rate": 1.6623430420711974e-05, "loss": 0.1674, "step": 65210 }, { "epoch": 25.33, "learning_rate": 1.6622912621359225e-05, "loss": 0.1391, "step": 65220 }, { "epoch": 25.33, "learning_rate": 1.6622394822006473e-05, "loss": 0.0973, "step": 65230 }, { "epoch": 25.34, "learning_rate": 1.662187702265372e-05, "loss": 0.2874, "step": 65240 }, { "epoch": 25.34, "learning_rate": 1.6621359223300973e-05, "loss": 0.1018, "step": 65250 }, { "epoch": 25.34, "learning_rate": 1.662084142394822e-05, "loss": 0.244, "step": 65260 }, { "epoch": 25.35, "learning_rate": 1.6620323624595472e-05, "loss": 0.164, "step": 65270 }, { "epoch": 25.35, "learning_rate": 1.661980582524272e-05, "loss": 0.082, "step": 65280 }, { "epoch": 25.36, "learning_rate": 1.6619288025889968e-05, "loss": 0.0965, "step": 65290 }, { "epoch": 25.36, "learning_rate": 1.661877022653722e-05, "loss": 0.0891, "step": 65300 }, { "epoch": 25.36, "learning_rate": 1.6618252427184467e-05, "loss": 0.0331, "step": 65310 }, { "epoch": 25.37, "learning_rate": 1.6617734627831715e-05, "loss": 0.1873, "step": 65320 }, { "epoch": 25.37, "learning_rate": 1.6617216828478966e-05, "loss": 0.0682, "step": 65330 }, { "epoch": 25.37, "learning_rate": 1.6616699029126214e-05, "loss": 0.063, "step": 65340 }, { "epoch": 25.38, "learning_rate": 1.6616181229773465e-05, "loss": 0.0408, "step": 65350 }, { "epoch": 25.38, "learning_rate": 1.6615663430420713e-05, "loss": 0.1143, "step": 65360 }, { "epoch": 25.39, "learning_rate": 1.661514563106796e-05, "loss": 0.0638, "step": 65370 }, { "epoch": 25.39, "learning_rate": 1.6614627831715213e-05, "loss": 0.0042, "step": 65380 }, { "epoch": 25.39, "learning_rate": 1.661411003236246e-05, "loss": 0.0713, "step": 65390 }, { "epoch": 25.4, "learning_rate": 1.661359223300971e-05, "loss": 0.0578, "step": 65400 }, { "epoch": 25.4, "learning_rate": 1.661307443365696e-05, "loss": 0.0989, "step": 65410 }, { "epoch": 25.41, "learning_rate": 1.6612556634304208e-05, "loss": 0.165, "step": 65420 }, { "epoch": 25.41, "learning_rate": 1.661203883495146e-05, "loss": 0.1092, "step": 65430 }, { "epoch": 25.41, "learning_rate": 1.6611521035598707e-05, "loss": 0.0028, "step": 65440 }, { "epoch": 25.42, "learning_rate": 1.6611003236245955e-05, "loss": 0.077, "step": 65450 }, { "epoch": 25.42, "learning_rate": 1.6610485436893206e-05, "loss": 0.0232, "step": 65460 }, { "epoch": 25.43, "learning_rate": 1.6609967637540454e-05, "loss": 0.0792, "step": 65470 }, { "epoch": 25.43, "learning_rate": 1.6609449838187702e-05, "loss": 0.0069, "step": 65480 }, { "epoch": 25.43, "learning_rate": 1.6608932038834953e-05, "loss": 0.1734, "step": 65490 }, { "epoch": 25.44, "learning_rate": 1.66084142394822e-05, "loss": 0.2823, "step": 65500 }, { "epoch": 25.44, "learning_rate": 1.6607896440129453e-05, "loss": 0.1968, "step": 65510 }, { "epoch": 25.44, "learning_rate": 1.66073786407767e-05, "loss": 0.1496, "step": 65520 }, { "epoch": 25.45, "learning_rate": 1.660686084142395e-05, "loss": 0.0885, "step": 65530 }, { "epoch": 25.45, "learning_rate": 1.6606343042071196e-05, "loss": 0.0965, "step": 65540 }, { "epoch": 25.46, "learning_rate": 1.6605825242718448e-05, "loss": 0.0724, "step": 65550 }, { "epoch": 25.46, "learning_rate": 1.6605307443365696e-05, "loss": 0.0954, "step": 65560 }, { "epoch": 25.46, "learning_rate": 1.6604789644012947e-05, "loss": 0.3889, "step": 65570 }, { "epoch": 25.47, "learning_rate": 1.6604271844660195e-05, "loss": 0.0621, "step": 65580 }, { "epoch": 25.47, "learning_rate": 1.6603754045307446e-05, "loss": 0.012, "step": 65590 }, { "epoch": 25.48, "learning_rate": 1.6603236245954694e-05, "loss": 0.0818, "step": 65600 }, { "epoch": 25.48, "learning_rate": 1.6602718446601945e-05, "loss": 0.0617, "step": 65610 }, { "epoch": 25.48, "learning_rate": 1.660220064724919e-05, "loss": 0.0493, "step": 65620 }, { "epoch": 25.49, "learning_rate": 1.660168284789644e-05, "loss": 0.0038, "step": 65630 }, { "epoch": 25.49, "learning_rate": 1.660116504854369e-05, "loss": 0.0706, "step": 65640 }, { "epoch": 25.5, "learning_rate": 1.660064724919094e-05, "loss": 0.0595, "step": 65650 }, { "epoch": 25.5, "learning_rate": 1.660012944983819e-05, "loss": 0.0454, "step": 65660 }, { "epoch": 25.5, "learning_rate": 1.659961165048544e-05, "loss": 0.2334, "step": 65670 }, { "epoch": 25.51, "learning_rate": 1.6599093851132688e-05, "loss": 0.1045, "step": 65680 }, { "epoch": 25.51, "learning_rate": 1.659857605177994e-05, "loss": 0.1457, "step": 65690 }, { "epoch": 25.51, "learning_rate": 1.6598058252427183e-05, "loss": 0.0924, "step": 65700 }, { "epoch": 25.52, "learning_rate": 1.6597540453074435e-05, "loss": 0.0231, "step": 65710 }, { "epoch": 25.52, "learning_rate": 1.6597022653721683e-05, "loss": 0.071, "step": 65720 }, { "epoch": 25.53, "learning_rate": 1.6596504854368934e-05, "loss": 0.1649, "step": 65730 }, { "epoch": 25.53, "learning_rate": 1.6595987055016182e-05, "loss": 0.0943, "step": 65740 }, { "epoch": 25.53, "learning_rate": 1.6595469255663433e-05, "loss": 0.1265, "step": 65750 }, { "epoch": 25.54, "learning_rate": 1.659495145631068e-05, "loss": 0.144, "step": 65760 }, { "epoch": 25.54, "learning_rate": 1.6594433656957932e-05, "loss": 0.0758, "step": 65770 }, { "epoch": 25.55, "learning_rate": 1.6593915857605177e-05, "loss": 0.0286, "step": 65780 }, { "epoch": 25.55, "learning_rate": 1.6593398058252428e-05, "loss": 0.1999, "step": 65790 }, { "epoch": 25.55, "learning_rate": 1.6592880258899676e-05, "loss": 0.0758, "step": 65800 }, { "epoch": 25.56, "learning_rate": 1.6592362459546928e-05, "loss": 0.0779, "step": 65810 }, { "epoch": 25.56, "learning_rate": 1.6591844660194175e-05, "loss": 0.0936, "step": 65820 }, { "epoch": 25.57, "learning_rate": 1.6591326860841427e-05, "loss": 0.1087, "step": 65830 }, { "epoch": 25.57, "learning_rate": 1.6590809061488675e-05, "loss": 0.1522, "step": 65840 }, { "epoch": 25.57, "learning_rate": 1.6590291262135926e-05, "loss": 0.0241, "step": 65850 }, { "epoch": 25.58, "learning_rate": 1.658977346278317e-05, "loss": 0.0383, "step": 65860 }, { "epoch": 25.58, "learning_rate": 1.6589255663430422e-05, "loss": 0.0951, "step": 65870 }, { "epoch": 25.58, "learning_rate": 1.658873786407767e-05, "loss": 0.0159, "step": 65880 }, { "epoch": 25.59, "learning_rate": 1.658822006472492e-05, "loss": 0.1704, "step": 65890 }, { "epoch": 25.59, "learning_rate": 1.658770226537217e-05, "loss": 0.0196, "step": 65900 }, { "epoch": 25.6, "learning_rate": 1.658718446601942e-05, "loss": 0.1635, "step": 65910 }, { "epoch": 25.6, "learning_rate": 1.6586666666666668e-05, "loss": 0.1256, "step": 65920 }, { "epoch": 25.6, "learning_rate": 1.658614886731392e-05, "loss": 0.0464, "step": 65930 }, { "epoch": 25.61, "learning_rate": 1.6585631067961164e-05, "loss": 0.0855, "step": 65940 }, { "epoch": 25.61, "learning_rate": 1.6585113268608415e-05, "loss": 0.0543, "step": 65950 }, { "epoch": 25.62, "learning_rate": 1.6584595469255663e-05, "loss": 0.0056, "step": 65960 }, { "epoch": 25.62, "learning_rate": 1.6584077669902915e-05, "loss": 0.0458, "step": 65970 }, { "epoch": 25.62, "learning_rate": 1.6583559870550163e-05, "loss": 0.1332, "step": 65980 }, { "epoch": 25.63, "learning_rate": 1.6583042071197414e-05, "loss": 0.0896, "step": 65990 }, { "epoch": 25.63, "learning_rate": 1.6582524271844662e-05, "loss": 0.2005, "step": 66000 }, { "epoch": 25.63, "learning_rate": 1.6582006472491913e-05, "loss": 0.1023, "step": 66010 }, { "epoch": 25.64, "learning_rate": 1.6581488673139158e-05, "loss": 0.1373, "step": 66020 }, { "epoch": 25.64, "learning_rate": 1.658097087378641e-05, "loss": 0.1066, "step": 66030 }, { "epoch": 25.65, "learning_rate": 1.6580453074433657e-05, "loss": 0.2469, "step": 66040 }, { "epoch": 25.65, "learning_rate": 1.6579935275080908e-05, "loss": 0.0675, "step": 66050 }, { "epoch": 25.65, "learning_rate": 1.6579417475728156e-05, "loss": 0.0914, "step": 66060 }, { "epoch": 25.66, "learning_rate": 1.6578899676375407e-05, "loss": 0.1389, "step": 66070 }, { "epoch": 25.66, "learning_rate": 1.6578381877022655e-05, "loss": 0.1254, "step": 66080 }, { "epoch": 25.67, "learning_rate": 1.6577864077669907e-05, "loss": 0.1682, "step": 66090 }, { "epoch": 25.67, "learning_rate": 1.657734627831715e-05, "loss": 0.0067, "step": 66100 }, { "epoch": 25.67, "learning_rate": 1.6576828478964403e-05, "loss": 0.2091, "step": 66110 }, { "epoch": 25.68, "learning_rate": 1.657631067961165e-05, "loss": 0.1589, "step": 66120 }, { "epoch": 25.68, "learning_rate": 1.6575792880258902e-05, "loss": 0.0897, "step": 66130 }, { "epoch": 25.69, "learning_rate": 1.657527508090615e-05, "loss": 0.1822, "step": 66140 }, { "epoch": 25.69, "learning_rate": 1.65747572815534e-05, "loss": 0.0703, "step": 66150 }, { "epoch": 25.69, "learning_rate": 1.657423948220065e-05, "loss": 0.2245, "step": 66160 }, { "epoch": 25.7, "learning_rate": 1.65737216828479e-05, "loss": 0.1029, "step": 66170 }, { "epoch": 25.7, "learning_rate": 1.6573203883495148e-05, "loss": 0.1517, "step": 66180 }, { "epoch": 25.7, "learning_rate": 1.6572686084142396e-05, "loss": 0.0774, "step": 66190 }, { "epoch": 25.71, "learning_rate": 1.6572168284789644e-05, "loss": 0.0308, "step": 66200 }, { "epoch": 25.71, "learning_rate": 1.6571650485436895e-05, "loss": 0.0496, "step": 66210 }, { "epoch": 25.72, "learning_rate": 1.6571132686084143e-05, "loss": 0.0789, "step": 66220 }, { "epoch": 25.72, "learning_rate": 1.6570614886731395e-05, "loss": 0.0966, "step": 66230 }, { "epoch": 25.72, "learning_rate": 1.6570097087378642e-05, "loss": 0.1551, "step": 66240 }, { "epoch": 25.73, "learning_rate": 1.6569579288025894e-05, "loss": 0.0209, "step": 66250 }, { "epoch": 25.73, "learning_rate": 1.656906148867314e-05, "loss": 0.0386, "step": 66260 }, { "epoch": 25.74, "learning_rate": 1.656854368932039e-05, "loss": 0.1258, "step": 66270 }, { "epoch": 25.74, "learning_rate": 1.6568025889967638e-05, "loss": 0.1906, "step": 66280 }, { "epoch": 25.74, "learning_rate": 1.656750809061489e-05, "loss": 0.1585, "step": 66290 }, { "epoch": 25.75, "learning_rate": 1.6566990291262137e-05, "loss": 0.0538, "step": 66300 }, { "epoch": 25.75, "learning_rate": 1.6566472491909388e-05, "loss": 0.176, "step": 66310 }, { "epoch": 25.76, "learning_rate": 1.6565954692556636e-05, "loss": 0.0906, "step": 66320 }, { "epoch": 25.76, "learning_rate": 1.6565436893203884e-05, "loss": 0.0805, "step": 66330 }, { "epoch": 25.76, "learning_rate": 1.6564919093851135e-05, "loss": 0.0667, "step": 66340 }, { "epoch": 25.77, "learning_rate": 1.6564401294498383e-05, "loss": 0.1657, "step": 66350 }, { "epoch": 25.77, "learning_rate": 1.656388349514563e-05, "loss": 0.0775, "step": 66360 }, { "epoch": 25.77, "learning_rate": 1.6563365695792882e-05, "loss": 0.0217, "step": 66370 }, { "epoch": 25.78, "learning_rate": 1.656284789644013e-05, "loss": 0.119, "step": 66380 }, { "epoch": 25.78, "learning_rate": 1.656233009708738e-05, "loss": 0.0688, "step": 66390 }, { "epoch": 25.79, "learning_rate": 1.656181229773463e-05, "loss": 0.0591, "step": 66400 }, { "epoch": 25.79, "learning_rate": 1.6561294498381877e-05, "loss": 0.1574, "step": 66410 }, { "epoch": 25.79, "learning_rate": 1.656077669902913e-05, "loss": 0.0634, "step": 66420 }, { "epoch": 25.8, "learning_rate": 1.6560258899676377e-05, "loss": 0.0924, "step": 66430 }, { "epoch": 25.8, "learning_rate": 1.6559741100323625e-05, "loss": 0.1296, "step": 66440 }, { "epoch": 25.81, "learning_rate": 1.6559223300970876e-05, "loss": 0.0251, "step": 66450 }, { "epoch": 25.81, "learning_rate": 1.6558705501618124e-05, "loss": 0.0963, "step": 66460 }, { "epoch": 25.81, "learning_rate": 1.6558187702265375e-05, "loss": 0.246, "step": 66470 }, { "epoch": 25.82, "learning_rate": 1.6557669902912623e-05, "loss": 0.0491, "step": 66480 }, { "epoch": 25.82, "learning_rate": 1.655715210355987e-05, "loss": 0.019, "step": 66490 }, { "epoch": 25.83, "learning_rate": 1.6556634304207122e-05, "loss": 0.1628, "step": 66500 }, { "epoch": 25.83, "learning_rate": 1.655611650485437e-05, "loss": 0.0888, "step": 66510 }, { "epoch": 25.83, "learning_rate": 1.6555598705501618e-05, "loss": 0.231, "step": 66520 }, { "epoch": 25.84, "learning_rate": 1.655508090614887e-05, "loss": 0.0629, "step": 66530 }, { "epoch": 25.84, "learning_rate": 1.6554563106796117e-05, "loss": 0.0587, "step": 66540 }, { "epoch": 25.84, "learning_rate": 1.655404530744337e-05, "loss": 0.0961, "step": 66550 }, { "epoch": 25.85, "learning_rate": 1.6553527508090617e-05, "loss": 0.2032, "step": 66560 }, { "epoch": 25.85, "learning_rate": 1.6553009708737865e-05, "loss": 0.0945, "step": 66570 }, { "epoch": 25.86, "learning_rate": 1.6552491909385116e-05, "loss": 0.0062, "step": 66580 }, { "epoch": 25.86, "learning_rate": 1.6551974110032364e-05, "loss": 0.0089, "step": 66590 }, { "epoch": 25.86, "learning_rate": 1.6551456310679612e-05, "loss": 0.3244, "step": 66600 }, { "epoch": 25.87, "learning_rate": 1.6550938511326863e-05, "loss": 0.1982, "step": 66610 }, { "epoch": 25.87, "learning_rate": 1.655042071197411e-05, "loss": 0.1438, "step": 66620 }, { "epoch": 25.88, "learning_rate": 1.654990291262136e-05, "loss": 0.0872, "step": 66630 }, { "epoch": 25.88, "learning_rate": 1.654938511326861e-05, "loss": 0.1581, "step": 66640 }, { "epoch": 25.88, "learning_rate": 1.6548867313915858e-05, "loss": 0.2025, "step": 66650 }, { "epoch": 25.89, "learning_rate": 1.654834951456311e-05, "loss": 0.2043, "step": 66660 }, { "epoch": 25.89, "learning_rate": 1.6547831715210357e-05, "loss": 0.1725, "step": 66670 }, { "epoch": 25.9, "learning_rate": 1.6547313915857605e-05, "loss": 0.1061, "step": 66680 }, { "epoch": 25.9, "learning_rate": 1.6546796116504857e-05, "loss": 0.4017, "step": 66690 }, { "epoch": 25.9, "learning_rate": 1.6546278317152105e-05, "loss": 0.0259, "step": 66700 }, { "epoch": 25.91, "learning_rate": 1.6545760517799352e-05, "loss": 0.0706, "step": 66710 }, { "epoch": 25.91, "learning_rate": 1.6545242718446604e-05, "loss": 0.0385, "step": 66720 }, { "epoch": 25.91, "learning_rate": 1.6544724919093852e-05, "loss": 0.0597, "step": 66730 }, { "epoch": 25.92, "learning_rate": 1.6544207119741103e-05, "loss": 0.0756, "step": 66740 }, { "epoch": 25.92, "learning_rate": 1.654368932038835e-05, "loss": 0.1139, "step": 66750 }, { "epoch": 25.93, "learning_rate": 1.65431715210356e-05, "loss": 0.0586, "step": 66760 }, { "epoch": 25.93, "learning_rate": 1.654265372168285e-05, "loss": 0.0811, "step": 66770 }, { "epoch": 25.93, "learning_rate": 1.6542135922330098e-05, "loss": 0.1537, "step": 66780 }, { "epoch": 25.94, "learning_rate": 1.6541618122977346e-05, "loss": 0.0024, "step": 66790 }, { "epoch": 25.94, "learning_rate": 1.6541100323624597e-05, "loss": 0.0245, "step": 66800 }, { "epoch": 25.95, "learning_rate": 1.6540582524271845e-05, "loss": 0.1005, "step": 66810 }, { "epoch": 25.95, "learning_rate": 1.6540064724919097e-05, "loss": 0.0486, "step": 66820 }, { "epoch": 25.95, "learning_rate": 1.6539546925566345e-05, "loss": 0.1163, "step": 66830 }, { "epoch": 25.96, "learning_rate": 1.6539029126213592e-05, "loss": 0.0559, "step": 66840 }, { "epoch": 25.96, "learning_rate": 1.6538511326860844e-05, "loss": 0.0251, "step": 66850 }, { "epoch": 25.97, "learning_rate": 1.653799352750809e-05, "loss": 0.0061, "step": 66860 }, { "epoch": 25.97, "learning_rate": 1.653747572815534e-05, "loss": 0.0352, "step": 66870 }, { "epoch": 25.97, "learning_rate": 1.653695792880259e-05, "loss": 0.1817, "step": 66880 }, { "epoch": 25.98, "learning_rate": 1.653644012944984e-05, "loss": 0.151, "step": 66890 }, { "epoch": 25.98, "learning_rate": 1.653592233009709e-05, "loss": 0.038, "step": 66900 }, { "epoch": 25.98, "learning_rate": 1.6535404530744338e-05, "loss": 0.0657, "step": 66910 }, { "epoch": 25.99, "learning_rate": 1.6534886731391586e-05, "loss": 0.282, "step": 66920 }, { "epoch": 25.99, "learning_rate": 1.6534368932038837e-05, "loss": 0.1226, "step": 66930 }, { "epoch": 26.0, "learning_rate": 1.6533851132686085e-05, "loss": 0.1447, "step": 66940 }, { "epoch": 26.0, "learning_rate": 1.6533333333333333e-05, "loss": 0.1757, "step": 66950 }, { "epoch": 26.0, "eval_accuracy": 0.9488308115543329, "eval_loss": 0.2819267511367798, "eval_runtime": 8.2333, "eval_samples_per_second": 441.5, "eval_steps_per_second": 55.263, "step": 66950 }, { "epoch": 26.0, "learning_rate": 1.6532815533980584e-05, "loss": 0.2687, "step": 66960 }, { "epoch": 26.01, "learning_rate": 1.6532297734627832e-05, "loss": 0.0613, "step": 66970 }, { "epoch": 26.01, "learning_rate": 1.6531779935275084e-05, "loss": 0.1617, "step": 66980 }, { "epoch": 26.02, "learning_rate": 1.653126213592233e-05, "loss": 0.1428, "step": 66990 }, { "epoch": 26.02, "learning_rate": 1.653074433656958e-05, "loss": 0.0238, "step": 67000 }, { "epoch": 26.02, "learning_rate": 1.6530226537216827e-05, "loss": 0.1096, "step": 67010 }, { "epoch": 26.03, "learning_rate": 1.652970873786408e-05, "loss": 0.1035, "step": 67020 }, { "epoch": 26.03, "learning_rate": 1.6529190938511327e-05, "loss": 0.142, "step": 67030 }, { "epoch": 26.03, "learning_rate": 1.6528673139158578e-05, "loss": 0.0796, "step": 67040 }, { "epoch": 26.04, "learning_rate": 1.6528155339805826e-05, "loss": 0.1364, "step": 67050 }, { "epoch": 26.04, "learning_rate": 1.6527637540453077e-05, "loss": 0.0902, "step": 67060 }, { "epoch": 26.05, "learning_rate": 1.6527119741100325e-05, "loss": 0.1364, "step": 67070 }, { "epoch": 26.05, "learning_rate": 1.6526601941747573e-05, "loss": 0.0526, "step": 67080 }, { "epoch": 26.05, "learning_rate": 1.652608414239482e-05, "loss": 0.0587, "step": 67090 }, { "epoch": 26.06, "learning_rate": 1.6525566343042072e-05, "loss": 0.2204, "step": 67100 }, { "epoch": 26.06, "learning_rate": 1.652504854368932e-05, "loss": 0.0142, "step": 67110 }, { "epoch": 26.07, "learning_rate": 1.652453074433657e-05, "loss": 0.1129, "step": 67120 }, { "epoch": 26.07, "learning_rate": 1.652401294498382e-05, "loss": 0.0086, "step": 67130 }, { "epoch": 26.07, "learning_rate": 1.652349514563107e-05, "loss": 0.1265, "step": 67140 }, { "epoch": 26.08, "learning_rate": 1.652297734627832e-05, "loss": 0.0014, "step": 67150 }, { "epoch": 26.08, "learning_rate": 1.6522459546925567e-05, "loss": 0.0905, "step": 67160 }, { "epoch": 26.09, "learning_rate": 1.6521941747572815e-05, "loss": 0.0649, "step": 67170 }, { "epoch": 26.09, "learning_rate": 1.6521423948220066e-05, "loss": 0.1742, "step": 67180 }, { "epoch": 26.09, "learning_rate": 1.6520906148867314e-05, "loss": 0.0351, "step": 67190 }, { "epoch": 26.1, "learning_rate": 1.6520388349514565e-05, "loss": 0.1259, "step": 67200 }, { "epoch": 26.1, "learning_rate": 1.6519870550161813e-05, "loss": 0.0691, "step": 67210 }, { "epoch": 26.1, "learning_rate": 1.6519352750809064e-05, "loss": 0.1702, "step": 67220 }, { "epoch": 26.11, "learning_rate": 1.6518834951456312e-05, "loss": 0.1373, "step": 67230 }, { "epoch": 26.11, "learning_rate": 1.651831715210356e-05, "loss": 0.0456, "step": 67240 }, { "epoch": 26.12, "learning_rate": 1.6517799352750808e-05, "loss": 0.0823, "step": 67250 }, { "epoch": 26.12, "learning_rate": 1.651728155339806e-05, "loss": 0.2093, "step": 67260 }, { "epoch": 26.12, "learning_rate": 1.6516763754045307e-05, "loss": 0.0692, "step": 67270 }, { "epoch": 26.13, "learning_rate": 1.651624595469256e-05, "loss": 0.0819, "step": 67280 }, { "epoch": 26.13, "learning_rate": 1.6515728155339807e-05, "loss": 0.1091, "step": 67290 }, { "epoch": 26.14, "learning_rate": 1.6515210355987058e-05, "loss": 0.0627, "step": 67300 }, { "epoch": 26.14, "learning_rate": 1.6514692556634306e-05, "loss": 0.1097, "step": 67310 }, { "epoch": 26.14, "learning_rate": 1.6514174757281557e-05, "loss": 0.1778, "step": 67320 }, { "epoch": 26.15, "learning_rate": 1.6513656957928802e-05, "loss": 0.0217, "step": 67330 }, { "epoch": 26.15, "learning_rate": 1.6513139158576053e-05, "loss": 0.1584, "step": 67340 }, { "epoch": 26.16, "learning_rate": 1.65126213592233e-05, "loss": 0.0014, "step": 67350 }, { "epoch": 26.16, "learning_rate": 1.6512103559870552e-05, "loss": 0.0829, "step": 67360 }, { "epoch": 26.16, "learning_rate": 1.65115857605178e-05, "loss": 0.0368, "step": 67370 }, { "epoch": 26.17, "learning_rate": 1.651106796116505e-05, "loss": 0.0385, "step": 67380 }, { "epoch": 26.17, "learning_rate": 1.65105501618123e-05, "loss": 0.2557, "step": 67390 }, { "epoch": 26.17, "learning_rate": 1.651003236245955e-05, "loss": 0.0544, "step": 67400 }, { "epoch": 26.18, "learning_rate": 1.6509514563106795e-05, "loss": 0.0241, "step": 67410 }, { "epoch": 26.18, "learning_rate": 1.6508996763754047e-05, "loss": 0.0248, "step": 67420 }, { "epoch": 26.19, "learning_rate": 1.6508478964401294e-05, "loss": 0.1129, "step": 67430 }, { "epoch": 26.19, "learning_rate": 1.6507961165048546e-05, "loss": 0.1352, "step": 67440 }, { "epoch": 26.19, "learning_rate": 1.6507443365695794e-05, "loss": 0.111, "step": 67450 }, { "epoch": 26.2, "learning_rate": 1.6506925566343045e-05, "loss": 0.1857, "step": 67460 }, { "epoch": 26.2, "learning_rate": 1.6506407766990293e-05, "loss": 0.134, "step": 67470 }, { "epoch": 26.21, "learning_rate": 1.6505889967637544e-05, "loss": 0.0525, "step": 67480 }, { "epoch": 26.21, "learning_rate": 1.650537216828479e-05, "loss": 0.3058, "step": 67490 }, { "epoch": 26.21, "learning_rate": 1.650485436893204e-05, "loss": 0.0748, "step": 67500 }, { "epoch": 26.22, "learning_rate": 1.6504336569579288e-05, "loss": 0.1086, "step": 67510 }, { "epoch": 26.22, "learning_rate": 1.650381877022654e-05, "loss": 0.0246, "step": 67520 }, { "epoch": 26.23, "learning_rate": 1.6503300970873787e-05, "loss": 0.0554, "step": 67530 }, { "epoch": 26.23, "learning_rate": 1.650278317152104e-05, "loss": 0.0392, "step": 67540 }, { "epoch": 26.23, "learning_rate": 1.6502265372168287e-05, "loss": 0.1675, "step": 67550 }, { "epoch": 26.24, "learning_rate": 1.6501747572815538e-05, "loss": 0.166, "step": 67560 }, { "epoch": 26.24, "learning_rate": 1.6501229773462782e-05, "loss": 0.0819, "step": 67570 }, { "epoch": 26.24, "learning_rate": 1.6500711974110034e-05, "loss": 0.1918, "step": 67580 }, { "epoch": 26.25, "learning_rate": 1.650019417475728e-05, "loss": 0.2159, "step": 67590 }, { "epoch": 26.25, "learning_rate": 1.6499676375404533e-05, "loss": 0.0261, "step": 67600 }, { "epoch": 26.26, "learning_rate": 1.649915857605178e-05, "loss": 0.1132, "step": 67610 }, { "epoch": 26.26, "learning_rate": 1.6498640776699032e-05, "loss": 0.0565, "step": 67620 }, { "epoch": 26.26, "learning_rate": 1.649812297734628e-05, "loss": 0.0723, "step": 67630 }, { "epoch": 26.27, "learning_rate": 1.649760517799353e-05, "loss": 0.0923, "step": 67640 }, { "epoch": 26.27, "learning_rate": 1.6497087378640776e-05, "loss": 0.105, "step": 67650 }, { "epoch": 26.28, "learning_rate": 1.6496569579288027e-05, "loss": 0.0841, "step": 67660 }, { "epoch": 26.28, "learning_rate": 1.6496051779935275e-05, "loss": 0.046, "step": 67670 }, { "epoch": 26.28, "learning_rate": 1.6495533980582526e-05, "loss": 0.1664, "step": 67680 }, { "epoch": 26.29, "learning_rate": 1.6495016181229774e-05, "loss": 0.0819, "step": 67690 }, { "epoch": 26.29, "learning_rate": 1.6494498381877026e-05, "loss": 0.1499, "step": 67700 }, { "epoch": 26.3, "learning_rate": 1.6493980582524274e-05, "loss": 0.0944, "step": 67710 }, { "epoch": 26.3, "learning_rate": 1.6493462783171525e-05, "loss": 0.1706, "step": 67720 }, { "epoch": 26.3, "learning_rate": 1.649294498381877e-05, "loss": 0.3778, "step": 67730 }, { "epoch": 26.31, "learning_rate": 1.649242718446602e-05, "loss": 0.1216, "step": 67740 }, { "epoch": 26.31, "learning_rate": 1.649190938511327e-05, "loss": 0.0524, "step": 67750 }, { "epoch": 26.31, "learning_rate": 1.649139158576052e-05, "loss": 0.0661, "step": 67760 }, { "epoch": 26.32, "learning_rate": 1.6490873786407768e-05, "loss": 0.0861, "step": 67770 }, { "epoch": 26.32, "learning_rate": 1.649035598705502e-05, "loss": 0.058, "step": 67780 }, { "epoch": 26.33, "learning_rate": 1.6489838187702267e-05, "loss": 0.0395, "step": 67790 }, { "epoch": 26.33, "learning_rate": 1.6489320388349515e-05, "loss": 0.0506, "step": 67800 }, { "epoch": 26.33, "learning_rate": 1.6488802588996763e-05, "loss": 0.0605, "step": 67810 }, { "epoch": 26.34, "learning_rate": 1.6488284789644014e-05, "loss": 0.2354, "step": 67820 }, { "epoch": 26.34, "learning_rate": 1.6487766990291262e-05, "loss": 0.0986, "step": 67830 }, { "epoch": 26.35, "learning_rate": 1.6487249190938514e-05, "loss": 0.2555, "step": 67840 }, { "epoch": 26.35, "learning_rate": 1.648673139158576e-05, "loss": 0.1615, "step": 67850 }, { "epoch": 26.35, "learning_rate": 1.6486213592233013e-05, "loss": 0.184, "step": 67860 }, { "epoch": 26.36, "learning_rate": 1.648569579288026e-05, "loss": 0.0522, "step": 67870 }, { "epoch": 26.36, "learning_rate": 1.648517799352751e-05, "loss": 0.1792, "step": 67880 }, { "epoch": 26.37, "learning_rate": 1.648466019417476e-05, "loss": 0.0953, "step": 67890 }, { "epoch": 26.37, "learning_rate": 1.6484142394822008e-05, "loss": 0.1306, "step": 67900 }, { "epoch": 26.37, "learning_rate": 1.6483624595469256e-05, "loss": 0.0471, "step": 67910 }, { "epoch": 26.38, "learning_rate": 1.6483106796116507e-05, "loss": 0.1284, "step": 67920 }, { "epoch": 26.38, "learning_rate": 1.6482588996763755e-05, "loss": 0.0859, "step": 67930 }, { "epoch": 26.38, "learning_rate": 1.6482071197411006e-05, "loss": 0.1845, "step": 67940 }, { "epoch": 26.39, "learning_rate": 1.6481553398058254e-05, "loss": 0.1414, "step": 67950 }, { "epoch": 26.39, "learning_rate": 1.6481035598705502e-05, "loss": 0.1661, "step": 67960 }, { "epoch": 26.4, "learning_rate": 1.6480517799352754e-05, "loss": 0.1141, "step": 67970 }, { "epoch": 26.4, "learning_rate": 1.648e-05, "loss": 0.1002, "step": 67980 }, { "epoch": 26.4, "learning_rate": 1.647948220064725e-05, "loss": 0.0381, "step": 67990 }, { "epoch": 26.41, "learning_rate": 1.64789644012945e-05, "loss": 0.1842, "step": 68000 }, { "epoch": 26.41, "learning_rate": 1.647844660194175e-05, "loss": 0.0699, "step": 68010 }, { "epoch": 26.42, "learning_rate": 1.6477928802589e-05, "loss": 0.1172, "step": 68020 }, { "epoch": 26.42, "learning_rate": 1.6477411003236248e-05, "loss": 0.0726, "step": 68030 }, { "epoch": 26.42, "learning_rate": 1.6476893203883496e-05, "loss": 0.0632, "step": 68040 }, { "epoch": 26.43, "learning_rate": 1.6476375404530747e-05, "loss": 0.1615, "step": 68050 }, { "epoch": 26.43, "learning_rate": 1.6475857605177995e-05, "loss": 0.048, "step": 68060 }, { "epoch": 26.43, "learning_rate": 1.6475339805825243e-05, "loss": 0.1562, "step": 68070 }, { "epoch": 26.44, "learning_rate": 1.6474822006472494e-05, "loss": 0.1404, "step": 68080 }, { "epoch": 26.44, "learning_rate": 1.6474304207119742e-05, "loss": 0.1668, "step": 68090 }, { "epoch": 26.45, "learning_rate": 1.647378640776699e-05, "loss": 0.0352, "step": 68100 }, { "epoch": 26.45, "learning_rate": 1.647326860841424e-05, "loss": 0.0884, "step": 68110 }, { "epoch": 26.45, "learning_rate": 1.647275080906149e-05, "loss": 0.1149, "step": 68120 }, { "epoch": 26.46, "learning_rate": 1.647223300970874e-05, "loss": 0.1411, "step": 68130 }, { "epoch": 26.46, "learning_rate": 1.647171521035599e-05, "loss": 0.1099, "step": 68140 }, { "epoch": 26.47, "learning_rate": 1.6471197411003236e-05, "loss": 0.3024, "step": 68150 }, { "epoch": 26.47, "learning_rate": 1.6470679611650488e-05, "loss": 0.1554, "step": 68160 }, { "epoch": 26.47, "learning_rate": 1.6470161812297736e-05, "loss": 0.194, "step": 68170 }, { "epoch": 26.48, "learning_rate": 1.6469644012944984e-05, "loss": 0.0663, "step": 68180 }, { "epoch": 26.48, "learning_rate": 1.6469126213592235e-05, "loss": 0.2439, "step": 68190 }, { "epoch": 26.49, "learning_rate": 1.6468608414239483e-05, "loss": 0.0642, "step": 68200 }, { "epoch": 26.49, "learning_rate": 1.6468090614886734e-05, "loss": 0.1155, "step": 68210 }, { "epoch": 26.49, "learning_rate": 1.6467572815533982e-05, "loss": 0.048, "step": 68220 }, { "epoch": 26.5, "learning_rate": 1.646705501618123e-05, "loss": 0.1682, "step": 68230 }, { "epoch": 26.5, "learning_rate": 1.646653721682848e-05, "loss": 0.0655, "step": 68240 }, { "epoch": 26.5, "learning_rate": 1.646601941747573e-05, "loss": 0.0573, "step": 68250 }, { "epoch": 26.51, "learning_rate": 1.6465501618122977e-05, "loss": 0.1785, "step": 68260 }, { "epoch": 26.51, "learning_rate": 1.646498381877023e-05, "loss": 0.0853, "step": 68270 }, { "epoch": 26.52, "learning_rate": 1.6464466019417476e-05, "loss": 0.2649, "step": 68280 }, { "epoch": 26.52, "learning_rate": 1.6463948220064728e-05, "loss": 0.2312, "step": 68290 }, { "epoch": 26.52, "learning_rate": 1.6463430420711976e-05, "loss": 0.0677, "step": 68300 }, { "epoch": 26.53, "learning_rate": 1.6462912621359224e-05, "loss": 0.2945, "step": 68310 }, { "epoch": 26.53, "learning_rate": 1.6462394822006475e-05, "loss": 0.0391, "step": 68320 }, { "epoch": 26.54, "learning_rate": 1.6461877022653723e-05, "loss": 0.0836, "step": 68330 }, { "epoch": 26.54, "learning_rate": 1.646135922330097e-05, "loss": 0.013, "step": 68340 }, { "epoch": 26.54, "learning_rate": 1.6460841423948222e-05, "loss": 0.1046, "step": 68350 }, { "epoch": 26.55, "learning_rate": 1.646032362459547e-05, "loss": 0.1799, "step": 68360 }, { "epoch": 26.55, "learning_rate": 1.645980582524272e-05, "loss": 0.0841, "step": 68370 }, { "epoch": 26.56, "learning_rate": 1.645928802588997e-05, "loss": 0.2196, "step": 68380 }, { "epoch": 26.56, "learning_rate": 1.6458770226537217e-05, "loss": 0.2903, "step": 68390 }, { "epoch": 26.56, "learning_rate": 1.645825242718447e-05, "loss": 0.0067, "step": 68400 }, { "epoch": 26.57, "learning_rate": 1.6457734627831716e-05, "loss": 0.0768, "step": 68410 }, { "epoch": 26.57, "learning_rate": 1.6457216828478964e-05, "loss": 0.0294, "step": 68420 }, { "epoch": 26.57, "learning_rate": 1.6456699029126216e-05, "loss": 0.1569, "step": 68430 }, { "epoch": 26.58, "learning_rate": 1.6456181229773464e-05, "loss": 0.1369, "step": 68440 }, { "epoch": 26.58, "learning_rate": 1.6455663430420715e-05, "loss": 0.0673, "step": 68450 }, { "epoch": 26.59, "learning_rate": 1.6455145631067963e-05, "loss": 0.0211, "step": 68460 }, { "epoch": 26.59, "learning_rate": 1.645462783171521e-05, "loss": 0.1621, "step": 68470 }, { "epoch": 26.59, "learning_rate": 1.645411003236246e-05, "loss": 0.0463, "step": 68480 }, { "epoch": 26.6, "learning_rate": 1.645359223300971e-05, "loss": 0.0704, "step": 68490 }, { "epoch": 26.6, "learning_rate": 1.6453074433656958e-05, "loss": 0.0759, "step": 68500 }, { "epoch": 26.61, "learning_rate": 1.645255663430421e-05, "loss": 0.1141, "step": 68510 }, { "epoch": 26.61, "learning_rate": 1.6452038834951457e-05, "loss": 0.1899, "step": 68520 }, { "epoch": 26.61, "learning_rate": 1.645152103559871e-05, "loss": 0.2431, "step": 68530 }, { "epoch": 26.62, "learning_rate": 1.6451003236245956e-05, "loss": 0.066, "step": 68540 }, { "epoch": 26.62, "learning_rate": 1.6450485436893204e-05, "loss": 0.1753, "step": 68550 }, { "epoch": 26.63, "learning_rate": 1.6449967637540452e-05, "loss": 0.0816, "step": 68560 }, { "epoch": 26.63, "learning_rate": 1.6449449838187704e-05, "loss": 0.0254, "step": 68570 }, { "epoch": 26.63, "learning_rate": 1.644893203883495e-05, "loss": 0.1084, "step": 68580 }, { "epoch": 26.64, "learning_rate": 1.6448414239482203e-05, "loss": 0.0773, "step": 68590 }, { "epoch": 26.64, "learning_rate": 1.644789644012945e-05, "loss": 0.0597, "step": 68600 }, { "epoch": 26.64, "learning_rate": 1.6447378640776702e-05, "loss": 0.0197, "step": 68610 }, { "epoch": 26.65, "learning_rate": 1.644686084142395e-05, "loss": 0.0904, "step": 68620 }, { "epoch": 26.65, "learning_rate": 1.6446343042071198e-05, "loss": 0.0857, "step": 68630 }, { "epoch": 26.66, "learning_rate": 1.6445825242718446e-05, "loss": 0.115, "step": 68640 }, { "epoch": 26.66, "learning_rate": 1.6445307443365697e-05, "loss": 0.0874, "step": 68650 }, { "epoch": 26.66, "learning_rate": 1.6444789644012945e-05, "loss": 0.0221, "step": 68660 }, { "epoch": 26.67, "learning_rate": 1.6444271844660196e-05, "loss": 0.1814, "step": 68670 }, { "epoch": 26.67, "learning_rate": 1.6443754045307444e-05, "loss": 0.1038, "step": 68680 }, { "epoch": 26.68, "learning_rate": 1.6443236245954696e-05, "loss": 0.1718, "step": 68690 }, { "epoch": 26.68, "learning_rate": 1.6442718446601943e-05, "loss": 0.1603, "step": 68700 }, { "epoch": 26.68, "learning_rate": 1.644220064724919e-05, "loss": 0.0919, "step": 68710 }, { "epoch": 26.69, "learning_rate": 1.644168284789644e-05, "loss": 0.0979, "step": 68720 }, { "epoch": 26.69, "learning_rate": 1.644116504854369e-05, "loss": 0.0655, "step": 68730 }, { "epoch": 26.7, "learning_rate": 1.644064724919094e-05, "loss": 0.0955, "step": 68740 }, { "epoch": 26.7, "learning_rate": 1.644012944983819e-05, "loss": 0.0621, "step": 68750 }, { "epoch": 26.7, "learning_rate": 1.6439611650485438e-05, "loss": 0.0737, "step": 68760 }, { "epoch": 26.71, "learning_rate": 1.643909385113269e-05, "loss": 0.0964, "step": 68770 }, { "epoch": 26.71, "learning_rate": 1.6438576051779937e-05, "loss": 0.0451, "step": 68780 }, { "epoch": 26.71, "learning_rate": 1.6438058252427185e-05, "loss": 0.0984, "step": 68790 }, { "epoch": 26.72, "learning_rate": 1.6437540453074433e-05, "loss": 0.0835, "step": 68800 }, { "epoch": 26.72, "learning_rate": 1.6437022653721684e-05, "loss": 0.0407, "step": 68810 }, { "epoch": 26.73, "learning_rate": 1.6436504854368932e-05, "loss": 0.182, "step": 68820 }, { "epoch": 26.73, "learning_rate": 1.6435987055016183e-05, "loss": 0.0139, "step": 68830 }, { "epoch": 26.73, "learning_rate": 1.643546925566343e-05, "loss": 0.053, "step": 68840 }, { "epoch": 26.74, "learning_rate": 1.6434951456310683e-05, "loss": 0.1084, "step": 68850 }, { "epoch": 26.74, "learning_rate": 1.643443365695793e-05, "loss": 0.0142, "step": 68860 }, { "epoch": 26.75, "learning_rate": 1.643391585760518e-05, "loss": 0.0493, "step": 68870 }, { "epoch": 26.75, "learning_rate": 1.6433398058252426e-05, "loss": 0.0842, "step": 68880 }, { "epoch": 26.75, "learning_rate": 1.6432880258899678e-05, "loss": 0.0839, "step": 68890 }, { "epoch": 26.76, "learning_rate": 1.6432362459546926e-05, "loss": 0.0785, "step": 68900 }, { "epoch": 26.76, "learning_rate": 1.6431844660194177e-05, "loss": 0.0374, "step": 68910 }, { "epoch": 26.77, "learning_rate": 1.6431326860841425e-05, "loss": 0.1377, "step": 68920 }, { "epoch": 26.77, "learning_rate": 1.6430809061488676e-05, "loss": 0.1935, "step": 68930 }, { "epoch": 26.77, "learning_rate": 1.6430291262135924e-05, "loss": 0.1529, "step": 68940 }, { "epoch": 26.78, "learning_rate": 1.6429773462783172e-05, "loss": 0.1946, "step": 68950 }, { "epoch": 26.78, "learning_rate": 1.642925566343042e-05, "loss": 0.0249, "step": 68960 }, { "epoch": 26.78, "learning_rate": 1.642873786407767e-05, "loss": 0.0871, "step": 68970 }, { "epoch": 26.79, "learning_rate": 1.642822006472492e-05, "loss": 0.1439, "step": 68980 }, { "epoch": 26.79, "learning_rate": 1.642770226537217e-05, "loss": 0.077, "step": 68990 }, { "epoch": 26.8, "learning_rate": 1.642718446601942e-05, "loss": 0.1414, "step": 69000 }, { "epoch": 26.8, "learning_rate": 1.642666666666667e-05, "loss": 0.1922, "step": 69010 }, { "epoch": 26.8, "learning_rate": 1.6426148867313918e-05, "loss": 0.0268, "step": 69020 }, { "epoch": 26.81, "learning_rate": 1.642563106796117e-05, "loss": 0.1738, "step": 69030 }, { "epoch": 26.81, "learning_rate": 1.6425113268608414e-05, "loss": 0.0785, "step": 69040 }, { "epoch": 26.82, "learning_rate": 1.6424595469255665e-05, "loss": 0.224, "step": 69050 }, { "epoch": 26.82, "learning_rate": 1.6424077669902913e-05, "loss": 0.1111, "step": 69060 }, { "epoch": 26.82, "learning_rate": 1.6423559870550164e-05, "loss": 0.0062, "step": 69070 }, { "epoch": 26.83, "learning_rate": 1.6423042071197412e-05, "loss": 0.0424, "step": 69080 }, { "epoch": 26.83, "learning_rate": 1.6422524271844663e-05, "loss": 0.0462, "step": 69090 }, { "epoch": 26.83, "learning_rate": 1.642200647249191e-05, "loss": 0.1828, "step": 69100 }, { "epoch": 26.84, "learning_rate": 1.6421488673139163e-05, "loss": 0.2045, "step": 69110 }, { "epoch": 26.84, "learning_rate": 1.6420970873786407e-05, "loss": 0.0117, "step": 69120 }, { "epoch": 26.85, "learning_rate": 1.642045307443366e-05, "loss": 0.1222, "step": 69130 }, { "epoch": 26.85, "learning_rate": 1.6419935275080906e-05, "loss": 0.1172, "step": 69140 }, { "epoch": 26.85, "learning_rate": 1.6419417475728158e-05, "loss": 0.1005, "step": 69150 }, { "epoch": 26.86, "learning_rate": 1.6418899676375406e-05, "loss": 0.0821, "step": 69160 }, { "epoch": 26.86, "learning_rate": 1.6418381877022657e-05, "loss": 0.0643, "step": 69170 }, { "epoch": 26.87, "learning_rate": 1.6417864077669905e-05, "loss": 0.1347, "step": 69180 }, { "epoch": 26.87, "learning_rate": 1.6417346278317156e-05, "loss": 0.1221, "step": 69190 }, { "epoch": 26.87, "learning_rate": 1.64168284789644e-05, "loss": 0.1827, "step": 69200 }, { "epoch": 26.88, "learning_rate": 1.6416310679611652e-05, "loss": 0.0663, "step": 69210 }, { "epoch": 26.88, "learning_rate": 1.64157928802589e-05, "loss": 0.0018, "step": 69220 }, { "epoch": 26.89, "learning_rate": 1.641527508090615e-05, "loss": 0.0528, "step": 69230 }, { "epoch": 26.89, "learning_rate": 1.64147572815534e-05, "loss": 0.0617, "step": 69240 }, { "epoch": 26.89, "learning_rate": 1.641423948220065e-05, "loss": 0.164, "step": 69250 }, { "epoch": 26.9, "learning_rate": 1.64137216828479e-05, "loss": 0.0719, "step": 69260 }, { "epoch": 26.9, "learning_rate": 1.6413203883495146e-05, "loss": 0.1525, "step": 69270 }, { "epoch": 26.9, "learning_rate": 1.6412686084142394e-05, "loss": 0.0313, "step": 69280 }, { "epoch": 26.91, "learning_rate": 1.6412168284789646e-05, "loss": 0.0033, "step": 69290 }, { "epoch": 26.91, "learning_rate": 1.6411650485436893e-05, "loss": 0.0746, "step": 69300 }, { "epoch": 26.92, "learning_rate": 1.6411132686084145e-05, "loss": 0.0183, "step": 69310 }, { "epoch": 26.92, "learning_rate": 1.6410614886731393e-05, "loss": 0.4061, "step": 69320 }, { "epoch": 26.92, "learning_rate": 1.6410097087378644e-05, "loss": 0.1769, "step": 69330 }, { "epoch": 26.93, "learning_rate": 1.6409579288025892e-05, "loss": 0.1555, "step": 69340 }, { "epoch": 26.93, "learning_rate": 1.640906148867314e-05, "loss": 0.2055, "step": 69350 }, { "epoch": 26.94, "learning_rate": 1.6408543689320388e-05, "loss": 0.0788, "step": 69360 }, { "epoch": 26.94, "learning_rate": 1.640802588996764e-05, "loss": 0.0964, "step": 69370 }, { "epoch": 26.94, "learning_rate": 1.6407508090614887e-05, "loss": 0.0662, "step": 69380 }, { "epoch": 26.95, "learning_rate": 1.640699029126214e-05, "loss": 0.1674, "step": 69390 }, { "epoch": 26.95, "learning_rate": 1.6406472491909386e-05, "loss": 0.0424, "step": 69400 }, { "epoch": 26.96, "learning_rate": 1.6405954692556638e-05, "loss": 0.0581, "step": 69410 }, { "epoch": 26.96, "learning_rate": 1.6405436893203885e-05, "loss": 0.102, "step": 69420 }, { "epoch": 26.96, "learning_rate": 1.6404919093851133e-05, "loss": 0.138, "step": 69430 }, { "epoch": 26.97, "learning_rate": 1.640440129449838e-05, "loss": 0.0978, "step": 69440 }, { "epoch": 26.97, "learning_rate": 1.6403883495145633e-05, "loss": 0.109, "step": 69450 }, { "epoch": 26.97, "learning_rate": 1.640336569579288e-05, "loss": 0.2634, "step": 69460 }, { "epoch": 26.98, "learning_rate": 1.6402847896440132e-05, "loss": 0.1017, "step": 69470 }, { "epoch": 26.98, "learning_rate": 1.640233009708738e-05, "loss": 0.0072, "step": 69480 }, { "epoch": 26.99, "learning_rate": 1.640181229773463e-05, "loss": 0.1288, "step": 69490 }, { "epoch": 26.99, "learning_rate": 1.640129449838188e-05, "loss": 0.1487, "step": 69500 }, { "epoch": 26.99, "learning_rate": 1.6400776699029127e-05, "loss": 0.0333, "step": 69510 }, { "epoch": 27.0, "learning_rate": 1.6400258899676378e-05, "loss": 0.1257, "step": 69520 }, { "epoch": 27.0, "eval_accuracy": 0.9521320495185694, "eval_loss": 0.27078500390052795, "eval_runtime": 8.1588, "eval_samples_per_second": 445.531, "eval_steps_per_second": 55.768, "step": 69525 }, { "epoch": 27.0, "learning_rate": 1.6399741100323626e-05, "loss": 0.0804, "step": 69530 }, { "epoch": 27.01, "learning_rate": 1.6399223300970874e-05, "loss": 0.007, "step": 69540 }, { "epoch": 27.01, "learning_rate": 1.6398705501618125e-05, "loss": 0.1715, "step": 69550 }, { "epoch": 27.01, "learning_rate": 1.6398187702265373e-05, "loss": 0.0659, "step": 69560 }, { "epoch": 27.02, "learning_rate": 1.639766990291262e-05, "loss": 0.008, "step": 69570 }, { "epoch": 27.02, "learning_rate": 1.6397152103559873e-05, "loss": 0.0922, "step": 69580 }, { "epoch": 27.03, "learning_rate": 1.639663430420712e-05, "loss": 0.0303, "step": 69590 }, { "epoch": 27.03, "learning_rate": 1.6396116504854372e-05, "loss": 0.1504, "step": 69600 }, { "epoch": 27.03, "learning_rate": 1.639559870550162e-05, "loss": 0.1904, "step": 69610 }, { "epoch": 27.04, "learning_rate": 1.6395080906148868e-05, "loss": 0.0808, "step": 69620 }, { "epoch": 27.04, "learning_rate": 1.639456310679612e-05, "loss": 0.1243, "step": 69630 }, { "epoch": 27.04, "learning_rate": 1.6394045307443367e-05, "loss": 0.0445, "step": 69640 }, { "epoch": 27.05, "learning_rate": 1.6393527508090615e-05, "loss": 0.2697, "step": 69650 }, { "epoch": 27.05, "learning_rate": 1.6393009708737866e-05, "loss": 0.2868, "step": 69660 }, { "epoch": 27.06, "learning_rate": 1.6392491909385114e-05, "loss": 0.0066, "step": 69670 }, { "epoch": 27.06, "learning_rate": 1.6391974110032365e-05, "loss": 0.2327, "step": 69680 }, { "epoch": 27.06, "learning_rate": 1.6391456310679613e-05, "loss": 0.1195, "step": 69690 }, { "epoch": 27.07, "learning_rate": 1.639093851132686e-05, "loss": 0.1175, "step": 69700 }, { "epoch": 27.07, "learning_rate": 1.6390420711974113e-05, "loss": 0.156, "step": 69710 }, { "epoch": 27.08, "learning_rate": 1.638990291262136e-05, "loss": 0.0828, "step": 69720 }, { "epoch": 27.08, "learning_rate": 1.638938511326861e-05, "loss": 0.0129, "step": 69730 }, { "epoch": 27.08, "learning_rate": 1.638886731391586e-05, "loss": 0.0173, "step": 69740 }, { "epoch": 27.09, "learning_rate": 1.6388349514563108e-05, "loss": 0.1075, "step": 69750 }, { "epoch": 27.09, "learning_rate": 1.638783171521036e-05, "loss": 0.1064, "step": 69760 }, { "epoch": 27.1, "learning_rate": 1.6387313915857607e-05, "loss": 0.0943, "step": 69770 }, { "epoch": 27.1, "learning_rate": 1.6386796116504855e-05, "loss": 0.0281, "step": 69780 }, { "epoch": 27.1, "learning_rate": 1.6386278317152106e-05, "loss": 0.1748, "step": 69790 }, { "epoch": 27.11, "learning_rate": 1.6385760517799354e-05, "loss": 0.0947, "step": 69800 }, { "epoch": 27.11, "learning_rate": 1.6385242718446602e-05, "loss": 0.1122, "step": 69810 }, { "epoch": 27.11, "learning_rate": 1.6384724919093853e-05, "loss": 0.0615, "step": 69820 }, { "epoch": 27.12, "learning_rate": 1.63842071197411e-05, "loss": 0.0361, "step": 69830 }, { "epoch": 27.12, "learning_rate": 1.6383689320388352e-05, "loss": 0.0861, "step": 69840 }, { "epoch": 27.13, "learning_rate": 1.63831715210356e-05, "loss": 0.1223, "step": 69850 }, { "epoch": 27.13, "learning_rate": 1.638265372168285e-05, "loss": 0.1896, "step": 69860 }, { "epoch": 27.13, "learning_rate": 1.63821359223301e-05, "loss": 0.0317, "step": 69870 }, { "epoch": 27.14, "learning_rate": 1.6381618122977348e-05, "loss": 0.1458, "step": 69880 }, { "epoch": 27.14, "learning_rate": 1.6381100323624595e-05, "loss": 0.2369, "step": 69890 }, { "epoch": 27.15, "learning_rate": 1.6380582524271847e-05, "loss": 0.2179, "step": 69900 }, { "epoch": 27.15, "learning_rate": 1.6380064724919095e-05, "loss": 0.0202, "step": 69910 }, { "epoch": 27.15, "learning_rate": 1.6379546925566346e-05, "loss": 0.0373, "step": 69920 }, { "epoch": 27.16, "learning_rate": 1.6379029126213594e-05, "loss": 0.1711, "step": 69930 }, { "epoch": 27.16, "learning_rate": 1.6378511326860842e-05, "loss": 0.0783, "step": 69940 }, { "epoch": 27.17, "learning_rate": 1.637799352750809e-05, "loss": 0.1276, "step": 69950 }, { "epoch": 27.17, "learning_rate": 1.637747572815534e-05, "loss": 0.1557, "step": 69960 }, { "epoch": 27.17, "learning_rate": 1.637695792880259e-05, "loss": 0.2191, "step": 69970 }, { "epoch": 27.18, "learning_rate": 1.637644012944984e-05, "loss": 0.1372, "step": 69980 }, { "epoch": 27.18, "learning_rate": 1.6375922330097088e-05, "loss": 0.0379, "step": 69990 }, { "epoch": 27.18, "learning_rate": 1.637540453074434e-05, "loss": 0.0897, "step": 70000 }, { "epoch": 27.19, "learning_rate": 1.6374886731391588e-05, "loss": 0.0311, "step": 70010 }, { "epoch": 27.19, "learning_rate": 1.6374368932038835e-05, "loss": 0.0144, "step": 70020 }, { "epoch": 27.2, "learning_rate": 1.6373851132686083e-05, "loss": 0.1099, "step": 70030 }, { "epoch": 27.2, "learning_rate": 1.6373333333333335e-05, "loss": 0.1565, "step": 70040 }, { "epoch": 27.2, "learning_rate": 1.6372815533980583e-05, "loss": 0.035, "step": 70050 }, { "epoch": 27.21, "learning_rate": 1.6372297734627834e-05, "loss": 0.1513, "step": 70060 }, { "epoch": 27.21, "learning_rate": 1.6371779935275082e-05, "loss": 0.0494, "step": 70070 }, { "epoch": 27.22, "learning_rate": 1.6371262135922333e-05, "loss": 0.0836, "step": 70080 }, { "epoch": 27.22, "learning_rate": 1.637074433656958e-05, "loss": 0.0668, "step": 70090 }, { "epoch": 27.22, "learning_rate": 1.637022653721683e-05, "loss": 0.1179, "step": 70100 }, { "epoch": 27.23, "learning_rate": 1.6369708737864077e-05, "loss": 0.0653, "step": 70110 }, { "epoch": 27.23, "learning_rate": 1.6369190938511328e-05, "loss": 0.1278, "step": 70120 }, { "epoch": 27.23, "learning_rate": 1.6368673139158576e-05, "loss": 0.2285, "step": 70130 }, { "epoch": 27.24, "learning_rate": 1.6368155339805827e-05, "loss": 0.0413, "step": 70140 }, { "epoch": 27.24, "learning_rate": 1.6367637540453075e-05, "loss": 0.1451, "step": 70150 }, { "epoch": 27.25, "learning_rate": 1.6367119741100327e-05, "loss": 0.0518, "step": 70160 }, { "epoch": 27.25, "learning_rate": 1.6366601941747575e-05, "loss": 0.0663, "step": 70170 }, { "epoch": 27.25, "learning_rate": 1.6366084142394823e-05, "loss": 0.2328, "step": 70180 }, { "epoch": 27.26, "learning_rate": 1.636556634304207e-05, "loss": 0.0438, "step": 70190 }, { "epoch": 27.26, "learning_rate": 1.6365048543689322e-05, "loss": 0.1098, "step": 70200 }, { "epoch": 27.27, "learning_rate": 1.636453074433657e-05, "loss": 0.0735, "step": 70210 }, { "epoch": 27.27, "learning_rate": 1.636401294498382e-05, "loss": 0.1458, "step": 70220 }, { "epoch": 27.27, "learning_rate": 1.636349514563107e-05, "loss": 0.0174, "step": 70230 }, { "epoch": 27.28, "learning_rate": 1.636297734627832e-05, "loss": 0.1714, "step": 70240 }, { "epoch": 27.28, "learning_rate": 1.6362459546925568e-05, "loss": 0.0386, "step": 70250 }, { "epoch": 27.29, "learning_rate": 1.6361941747572816e-05, "loss": 0.1508, "step": 70260 }, { "epoch": 27.29, "learning_rate": 1.6361423948220064e-05, "loss": 0.0589, "step": 70270 }, { "epoch": 27.29, "learning_rate": 1.6360906148867315e-05, "loss": 0.1071, "step": 70280 }, { "epoch": 27.3, "learning_rate": 1.6360388349514563e-05, "loss": 0.2155, "step": 70290 }, { "epoch": 27.3, "learning_rate": 1.6359870550161815e-05, "loss": 0.3224, "step": 70300 }, { "epoch": 27.3, "learning_rate": 1.6359352750809063e-05, "loss": 0.0213, "step": 70310 }, { "epoch": 27.31, "learning_rate": 1.6358834951456314e-05, "loss": 0.1747, "step": 70320 }, { "epoch": 27.31, "learning_rate": 1.6358317152103562e-05, "loss": 0.087, "step": 70330 }, { "epoch": 27.32, "learning_rate": 1.635779935275081e-05, "loss": 0.1314, "step": 70340 }, { "epoch": 27.32, "learning_rate": 1.6357281553398058e-05, "loss": 0.0077, "step": 70350 }, { "epoch": 27.32, "learning_rate": 1.635676375404531e-05, "loss": 0.2909, "step": 70360 }, { "epoch": 27.33, "learning_rate": 1.6356245954692557e-05, "loss": 0.0812, "step": 70370 }, { "epoch": 27.33, "learning_rate": 1.6355728155339808e-05, "loss": 0.0941, "step": 70380 }, { "epoch": 27.34, "learning_rate": 1.6355210355987056e-05, "loss": 0.0899, "step": 70390 }, { "epoch": 27.34, "learning_rate": 1.6354692556634307e-05, "loss": 0.0732, "step": 70400 }, { "epoch": 27.34, "learning_rate": 1.6354174757281555e-05, "loss": 0.115, "step": 70410 }, { "epoch": 27.35, "learning_rate": 1.6353656957928803e-05, "loss": 0.0934, "step": 70420 }, { "epoch": 27.35, "learning_rate": 1.635313915857605e-05, "loss": 0.1289, "step": 70430 }, { "epoch": 27.36, "learning_rate": 1.6352621359223302e-05, "loss": 0.0564, "step": 70440 }, { "epoch": 27.36, "learning_rate": 1.635210355987055e-05, "loss": 0.0013, "step": 70450 }, { "epoch": 27.36, "learning_rate": 1.6351585760517802e-05, "loss": 0.0507, "step": 70460 }, { "epoch": 27.37, "learning_rate": 1.635106796116505e-05, "loss": 0.097, "step": 70470 }, { "epoch": 27.37, "learning_rate": 1.63505501618123e-05, "loss": 0.1101, "step": 70480 }, { "epoch": 27.37, "learning_rate": 1.635003236245955e-05, "loss": 0.2099, "step": 70490 }, { "epoch": 27.38, "learning_rate": 1.6349514563106797e-05, "loss": 0.1976, "step": 70500 }, { "epoch": 27.38, "learning_rate": 1.6348996763754045e-05, "loss": 0.0594, "step": 70510 }, { "epoch": 27.39, "learning_rate": 1.6348478964401296e-05, "loss": 0.1722, "step": 70520 }, { "epoch": 27.39, "learning_rate": 1.6347961165048544e-05, "loss": 0.1168, "step": 70530 }, { "epoch": 27.39, "learning_rate": 1.6347443365695795e-05, "loss": 0.1872, "step": 70540 }, { "epoch": 27.4, "learning_rate": 1.6346925566343043e-05, "loss": 0.0029, "step": 70550 }, { "epoch": 27.4, "learning_rate": 1.6346407766990294e-05, "loss": 0.0105, "step": 70560 }, { "epoch": 27.41, "learning_rate": 1.6345889967637542e-05, "loss": 0.2131, "step": 70570 }, { "epoch": 27.41, "learning_rate": 1.634537216828479e-05, "loss": 0.0265, "step": 70580 }, { "epoch": 27.41, "learning_rate": 1.6344854368932038e-05, "loss": 0.1323, "step": 70590 }, { "epoch": 27.42, "learning_rate": 1.634433656957929e-05, "loss": 0.0396, "step": 70600 }, { "epoch": 27.42, "learning_rate": 1.6343818770226537e-05, "loss": 0.0135, "step": 70610 }, { "epoch": 27.43, "learning_rate": 1.634330097087379e-05, "loss": 0.0021, "step": 70620 }, { "epoch": 27.43, "learning_rate": 1.6342783171521037e-05, "loss": 0.2407, "step": 70630 }, { "epoch": 27.43, "learning_rate": 1.6342265372168288e-05, "loss": 0.0634, "step": 70640 }, { "epoch": 27.44, "learning_rate": 1.6341747572815536e-05, "loss": 0.0477, "step": 70650 }, { "epoch": 27.44, "learning_rate": 1.6341229773462787e-05, "loss": 0.0612, "step": 70660 }, { "epoch": 27.44, "learning_rate": 1.6340711974110032e-05, "loss": 0.0441, "step": 70670 }, { "epoch": 27.45, "learning_rate": 1.6340194174757283e-05, "loss": 0.071, "step": 70680 }, { "epoch": 27.45, "learning_rate": 1.633967637540453e-05, "loss": 0.0482, "step": 70690 }, { "epoch": 27.46, "learning_rate": 1.6339158576051782e-05, "loss": 0.1563, "step": 70700 }, { "epoch": 27.46, "learning_rate": 1.633864077669903e-05, "loss": 0.2769, "step": 70710 }, { "epoch": 27.46, "learning_rate": 1.633812297734628e-05, "loss": 0.0251, "step": 70720 }, { "epoch": 27.47, "learning_rate": 1.633760517799353e-05, "loss": 0.1206, "step": 70730 }, { "epoch": 27.47, "learning_rate": 1.6337087378640777e-05, "loss": 0.0897, "step": 70740 }, { "epoch": 27.48, "learning_rate": 1.6336569579288025e-05, "loss": 0.2182, "step": 70750 }, { "epoch": 27.48, "learning_rate": 1.6336051779935277e-05, "loss": 0.0534, "step": 70760 }, { "epoch": 27.48, "learning_rate": 1.6335533980582525e-05, "loss": 0.1006, "step": 70770 }, { "epoch": 27.49, "learning_rate": 1.6335016181229776e-05, "loss": 0.0849, "step": 70780 }, { "epoch": 27.49, "learning_rate": 1.6334498381877024e-05, "loss": 0.0947, "step": 70790 }, { "epoch": 27.5, "learning_rate": 1.6333980582524275e-05, "loss": 0.0895, "step": 70800 }, { "epoch": 27.5, "learning_rate": 1.6333462783171523e-05, "loss": 0.1353, "step": 70810 }, { "epoch": 27.5, "learning_rate": 1.633294498381877e-05, "loss": 0.0281, "step": 70820 }, { "epoch": 27.51, "learning_rate": 1.633242718446602e-05, "loss": 0.111, "step": 70830 }, { "epoch": 27.51, "learning_rate": 1.633190938511327e-05, "loss": 0.1613, "step": 70840 }, { "epoch": 27.51, "learning_rate": 1.6331391585760518e-05, "loss": 0.0099, "step": 70850 }, { "epoch": 27.52, "learning_rate": 1.633087378640777e-05, "loss": 0.1011, "step": 70860 }, { "epoch": 27.52, "learning_rate": 1.6330355987055017e-05, "loss": 0.1146, "step": 70870 }, { "epoch": 27.53, "learning_rate": 1.632983818770227e-05, "loss": 0.0456, "step": 70880 }, { "epoch": 27.53, "learning_rate": 1.6329320388349517e-05, "loss": 0.082, "step": 70890 }, { "epoch": 27.53, "learning_rate": 1.6328802588996765e-05, "loss": 0.0691, "step": 70900 }, { "epoch": 27.54, "learning_rate": 1.6328284789644012e-05, "loss": 0.0781, "step": 70910 }, { "epoch": 27.54, "learning_rate": 1.6327766990291264e-05, "loss": 0.2287, "step": 70920 }, { "epoch": 27.55, "learning_rate": 1.6327249190938512e-05, "loss": 0.1457, "step": 70930 }, { "epoch": 27.55, "learning_rate": 1.6326731391585763e-05, "loss": 0.0894, "step": 70940 }, { "epoch": 27.55, "learning_rate": 1.632621359223301e-05, "loss": 0.1618, "step": 70950 }, { "epoch": 27.56, "learning_rate": 1.6325695792880262e-05, "loss": 0.0891, "step": 70960 }, { "epoch": 27.56, "learning_rate": 1.632517799352751e-05, "loss": 0.0198, "step": 70970 }, { "epoch": 27.57, "learning_rate": 1.6324660194174758e-05, "loss": 0.1308, "step": 70980 }, { "epoch": 27.57, "learning_rate": 1.6324142394822006e-05, "loss": 0.1351, "step": 70990 }, { "epoch": 27.57, "learning_rate": 1.6323624595469257e-05, "loss": 0.041, "step": 71000 }, { "epoch": 27.58, "learning_rate": 1.6323106796116505e-05, "loss": 0.0527, "step": 71010 }, { "epoch": 27.58, "learning_rate": 1.6322588996763757e-05, "loss": 0.083, "step": 71020 }, { "epoch": 27.58, "learning_rate": 1.6322071197411005e-05, "loss": 0.0965, "step": 71030 }, { "epoch": 27.59, "learning_rate": 1.6321553398058252e-05, "loss": 0.0661, "step": 71040 }, { "epoch": 27.59, "learning_rate": 1.6321035598705504e-05, "loss": 0.1696, "step": 71050 }, { "epoch": 27.6, "learning_rate": 1.632051779935275e-05, "loss": 0.1174, "step": 71060 }, { "epoch": 27.6, "learning_rate": 1.632e-05, "loss": 0.0841, "step": 71070 }, { "epoch": 27.6, "learning_rate": 1.631948220064725e-05, "loss": 0.1746, "step": 71080 }, { "epoch": 27.61, "learning_rate": 1.63189644012945e-05, "loss": 0.0443, "step": 71090 }, { "epoch": 27.61, "learning_rate": 1.631844660194175e-05, "loss": 0.1345, "step": 71100 }, { "epoch": 27.62, "learning_rate": 1.6317928802588998e-05, "loss": 0.0301, "step": 71110 }, { "epoch": 27.62, "learning_rate": 1.6317411003236246e-05, "loss": 0.0681, "step": 71120 }, { "epoch": 27.62, "learning_rate": 1.6316893203883497e-05, "loss": 0.2174, "step": 71130 }, { "epoch": 27.63, "learning_rate": 1.6316375404530745e-05, "loss": 0.3897, "step": 71140 }, { "epoch": 27.63, "learning_rate": 1.6315857605177993e-05, "loss": 0.0434, "step": 71150 }, { "epoch": 27.63, "learning_rate": 1.6315339805825244e-05, "loss": 0.0844, "step": 71160 }, { "epoch": 27.64, "learning_rate": 1.6314822006472492e-05, "loss": 0.0983, "step": 71170 }, { "epoch": 27.64, "learning_rate": 1.6314304207119744e-05, "loss": 0.1079, "step": 71180 }, { "epoch": 27.65, "learning_rate": 1.631378640776699e-05, "loss": 0.0525, "step": 71190 }, { "epoch": 27.65, "learning_rate": 1.631326860841424e-05, "loss": 0.1151, "step": 71200 }, { "epoch": 27.65, "learning_rate": 1.631275080906149e-05, "loss": 0.2254, "step": 71210 }, { "epoch": 27.66, "learning_rate": 1.631223300970874e-05, "loss": 0.1428, "step": 71220 }, { "epoch": 27.66, "learning_rate": 1.631171521035599e-05, "loss": 0.1309, "step": 71230 }, { "epoch": 27.67, "learning_rate": 1.6311197411003238e-05, "loss": 0.0007, "step": 71240 }, { "epoch": 27.67, "learning_rate": 1.6310679611650486e-05, "loss": 0.0897, "step": 71250 }, { "epoch": 27.67, "learning_rate": 1.6310161812297737e-05, "loss": 0.1592, "step": 71260 }, { "epoch": 27.68, "learning_rate": 1.6309644012944985e-05, "loss": 0.1438, "step": 71270 }, { "epoch": 27.68, "learning_rate": 1.6309126213592233e-05, "loss": 0.1861, "step": 71280 }, { "epoch": 27.69, "learning_rate": 1.6308608414239484e-05, "loss": 0.0301, "step": 71290 }, { "epoch": 27.69, "learning_rate": 1.6308090614886732e-05, "loss": 0.1626, "step": 71300 }, { "epoch": 27.69, "learning_rate": 1.6307572815533984e-05, "loss": 0.0728, "step": 71310 }, { "epoch": 27.7, "learning_rate": 1.630705501618123e-05, "loss": 0.0821, "step": 71320 }, { "epoch": 27.7, "learning_rate": 1.630653721682848e-05, "loss": 0.0519, "step": 71330 }, { "epoch": 27.7, "learning_rate": 1.630601941747573e-05, "loss": 0.0881, "step": 71340 }, { "epoch": 27.71, "learning_rate": 1.630550161812298e-05, "loss": 0.043, "step": 71350 }, { "epoch": 27.71, "learning_rate": 1.6304983818770227e-05, "loss": 0.1049, "step": 71360 }, { "epoch": 27.72, "learning_rate": 1.6304466019417478e-05, "loss": 0.2282, "step": 71370 }, { "epoch": 27.72, "learning_rate": 1.6303948220064726e-05, "loss": 0.0611, "step": 71380 }, { "epoch": 27.72, "learning_rate": 1.6303430420711977e-05, "loss": 0.0403, "step": 71390 }, { "epoch": 27.73, "learning_rate": 1.6302912621359225e-05, "loss": 0.0271, "step": 71400 }, { "epoch": 27.73, "learning_rate": 1.6302394822006473e-05, "loss": 0.0658, "step": 71410 }, { "epoch": 27.74, "learning_rate": 1.630187702265372e-05, "loss": 0.0628, "step": 71420 }, { "epoch": 27.74, "learning_rate": 1.6301359223300972e-05, "loss": 0.2404, "step": 71430 }, { "epoch": 27.74, "learning_rate": 1.630084142394822e-05, "loss": 0.1614, "step": 71440 }, { "epoch": 27.75, "learning_rate": 1.630032362459547e-05, "loss": 0.3891, "step": 71450 }, { "epoch": 27.75, "learning_rate": 1.629980582524272e-05, "loss": 0.1018, "step": 71460 }, { "epoch": 27.76, "learning_rate": 1.629928802588997e-05, "loss": 0.1949, "step": 71470 }, { "epoch": 27.76, "learning_rate": 1.629877022653722e-05, "loss": 0.2713, "step": 71480 }, { "epoch": 27.76, "learning_rate": 1.6298252427184467e-05, "loss": 0.0473, "step": 71490 }, { "epoch": 27.77, "learning_rate": 1.6297734627831715e-05, "loss": 0.0353, "step": 71500 }, { "epoch": 27.77, "learning_rate": 1.6297216828478966e-05, "loss": 0.0321, "step": 71510 }, { "epoch": 27.77, "learning_rate": 1.6296699029126214e-05, "loss": 0.1759, "step": 71520 }, { "epoch": 27.78, "learning_rate": 1.6296181229773465e-05, "loss": 0.0575, "step": 71530 }, { "epoch": 27.78, "learning_rate": 1.6295663430420713e-05, "loss": 0.1882, "step": 71540 }, { "epoch": 27.79, "learning_rate": 1.6295145631067964e-05, "loss": 0.1255, "step": 71550 }, { "epoch": 27.79, "learning_rate": 1.6294627831715212e-05, "loss": 0.0149, "step": 71560 }, { "epoch": 27.79, "learning_rate": 1.629411003236246e-05, "loss": 0.1214, "step": 71570 }, { "epoch": 27.8, "learning_rate": 1.6293592233009708e-05, "loss": 0.1778, "step": 71580 }, { "epoch": 27.8, "learning_rate": 1.629307443365696e-05, "loss": 0.0918, "step": 71590 }, { "epoch": 27.81, "learning_rate": 1.6292556634304207e-05, "loss": 0.1516, "step": 71600 }, { "epoch": 27.81, "learning_rate": 1.629203883495146e-05, "loss": 0.1193, "step": 71610 }, { "epoch": 27.81, "learning_rate": 1.6291521035598707e-05, "loss": 0.2283, "step": 71620 }, { "epoch": 27.82, "learning_rate": 1.6291003236245958e-05, "loss": 0.054, "step": 71630 }, { "epoch": 27.82, "learning_rate": 1.6290485436893206e-05, "loss": 0.079, "step": 71640 }, { "epoch": 27.83, "learning_rate": 1.6289967637540454e-05, "loss": 0.0882, "step": 71650 }, { "epoch": 27.83, "learning_rate": 1.62894498381877e-05, "loss": 0.0023, "step": 71660 }, { "epoch": 27.83, "learning_rate": 1.6288932038834953e-05, "loss": 0.0357, "step": 71670 }, { "epoch": 27.84, "learning_rate": 1.62884142394822e-05, "loss": 0.0231, "step": 71680 }, { "epoch": 27.84, "learning_rate": 1.6287896440129452e-05, "loss": 0.2039, "step": 71690 }, { "epoch": 27.84, "learning_rate": 1.62873786407767e-05, "loss": 0.2393, "step": 71700 }, { "epoch": 27.85, "learning_rate": 1.628686084142395e-05, "loss": 0.1578, "step": 71710 }, { "epoch": 27.85, "learning_rate": 1.6286343042071196e-05, "loss": 0.0662, "step": 71720 }, { "epoch": 27.86, "learning_rate": 1.6285825242718447e-05, "loss": 0.1214, "step": 71730 }, { "epoch": 27.86, "learning_rate": 1.6285307443365695e-05, "loss": 0.0509, "step": 71740 }, { "epoch": 27.86, "learning_rate": 1.6284789644012947e-05, "loss": 0.0152, "step": 71750 }, { "epoch": 27.87, "learning_rate": 1.6284271844660194e-05, "loss": 0.0322, "step": 71760 }, { "epoch": 27.87, "learning_rate": 1.6283754045307446e-05, "loss": 0.1956, "step": 71770 }, { "epoch": 27.88, "learning_rate": 1.6283236245954694e-05, "loss": 0.1457, "step": 71780 }, { "epoch": 27.88, "learning_rate": 1.6282718446601945e-05, "loss": 0.0851, "step": 71790 }, { "epoch": 27.88, "learning_rate": 1.6282200647249193e-05, "loss": 0.1148, "step": 71800 }, { "epoch": 27.89, "learning_rate": 1.628168284789644e-05, "loss": 0.0048, "step": 71810 }, { "epoch": 27.89, "learning_rate": 1.628116504854369e-05, "loss": 0.0442, "step": 71820 }, { "epoch": 27.9, "learning_rate": 1.628064724919094e-05, "loss": 0.0574, "step": 71830 }, { "epoch": 27.9, "learning_rate": 1.6280129449838188e-05, "loss": 0.2253, "step": 71840 }, { "epoch": 27.9, "learning_rate": 1.627961165048544e-05, "loss": 0.1031, "step": 71850 }, { "epoch": 27.91, "learning_rate": 1.6279093851132687e-05, "loss": 0.0822, "step": 71860 }, { "epoch": 27.91, "learning_rate": 1.627857605177994e-05, "loss": 0.1899, "step": 71870 }, { "epoch": 27.91, "learning_rate": 1.6278058252427186e-05, "loss": 0.1379, "step": 71880 }, { "epoch": 27.92, "learning_rate": 1.6277540453074434e-05, "loss": 0.2689, "step": 71890 }, { "epoch": 27.92, "learning_rate": 1.6277022653721682e-05, "loss": 0.171, "step": 71900 }, { "epoch": 27.93, "learning_rate": 1.6276504854368934e-05, "loss": 0.0666, "step": 71910 }, { "epoch": 27.93, "learning_rate": 1.627598705501618e-05, "loss": 0.067, "step": 71920 }, { "epoch": 27.93, "learning_rate": 1.6275469255663433e-05, "loss": 0.1305, "step": 71930 }, { "epoch": 27.94, "learning_rate": 1.627495145631068e-05, "loss": 0.0449, "step": 71940 }, { "epoch": 27.94, "learning_rate": 1.6274433656957932e-05, "loss": 0.1, "step": 71950 }, { "epoch": 27.95, "learning_rate": 1.627391585760518e-05, "loss": 0.2153, "step": 71960 }, { "epoch": 27.95, "learning_rate": 1.6273398058252428e-05, "loss": 0.0631, "step": 71970 }, { "epoch": 27.95, "learning_rate": 1.6272880258899676e-05, "loss": 0.1186, "step": 71980 }, { "epoch": 27.96, "learning_rate": 1.6272362459546927e-05, "loss": 0.1032, "step": 71990 }, { "epoch": 27.96, "learning_rate": 1.6271844660194175e-05, "loss": 0.0692, "step": 72000 }, { "epoch": 27.97, "learning_rate": 1.6271326860841426e-05, "loss": 0.0326, "step": 72010 }, { "epoch": 27.97, "learning_rate": 1.6270809061488674e-05, "loss": 0.1689, "step": 72020 }, { "epoch": 27.97, "learning_rate": 1.6270291262135926e-05, "loss": 0.1152, "step": 72030 }, { "epoch": 27.98, "learning_rate": 1.6269773462783174e-05, "loss": 0.0746, "step": 72040 }, { "epoch": 27.98, "learning_rate": 1.626925566343042e-05, "loss": 0.1996, "step": 72050 }, { "epoch": 27.98, "learning_rate": 1.626873786407767e-05, "loss": 0.199, "step": 72060 }, { "epoch": 27.99, "learning_rate": 1.626822006472492e-05, "loss": 0.1373, "step": 72070 }, { "epoch": 27.99, "learning_rate": 1.626770226537217e-05, "loss": 0.1102, "step": 72080 }, { "epoch": 28.0, "learning_rate": 1.626718446601942e-05, "loss": 0.0525, "step": 72090 }, { "epoch": 28.0, "learning_rate": 1.6266666666666668e-05, "loss": 0.1097, "step": 72100 }, { "epoch": 28.0, "eval_accuracy": 0.951856946354883, "eval_loss": 0.2800503671169281, "eval_runtime": 8.1466, "eval_samples_per_second": 446.199, "eval_steps_per_second": 55.852, "step": 72100 }, { "epoch": 28.0, "learning_rate": 1.626614886731392e-05, "loss": 0.0425, "step": 72110 }, { "epoch": 28.01, "learning_rate": 1.6265631067961167e-05, "loss": 0.0513, "step": 72120 }, { "epoch": 28.01, "learning_rate": 1.6265113268608415e-05, "loss": 0.292, "step": 72130 }, { "epoch": 28.02, "learning_rate": 1.6264595469255663e-05, "loss": 0.0064, "step": 72140 }, { "epoch": 28.02, "learning_rate": 1.6264077669902914e-05, "loss": 0.119, "step": 72150 }, { "epoch": 28.02, "learning_rate": 1.6263559870550162e-05, "loss": 0.1892, "step": 72160 }, { "epoch": 28.03, "learning_rate": 1.6263042071197414e-05, "loss": 0.1641, "step": 72170 }, { "epoch": 28.03, "learning_rate": 1.626252427184466e-05, "loss": 0.0906, "step": 72180 }, { "epoch": 28.03, "learning_rate": 1.6262006472491913e-05, "loss": 0.1782, "step": 72190 }, { "epoch": 28.04, "learning_rate": 1.626148867313916e-05, "loss": 0.137, "step": 72200 }, { "epoch": 28.04, "learning_rate": 1.626097087378641e-05, "loss": 0.0883, "step": 72210 }, { "epoch": 28.05, "learning_rate": 1.6260453074433657e-05, "loss": 0.0546, "step": 72220 }, { "epoch": 28.05, "learning_rate": 1.6259935275080908e-05, "loss": 0.0808, "step": 72230 }, { "epoch": 28.05, "learning_rate": 1.6259417475728156e-05, "loss": 0.1216, "step": 72240 }, { "epoch": 28.06, "learning_rate": 1.6258899676375407e-05, "loss": 0.1176, "step": 72250 }, { "epoch": 28.06, "learning_rate": 1.6258381877022655e-05, "loss": 0.0388, "step": 72260 }, { "epoch": 28.07, "learning_rate": 1.6257864077669906e-05, "loss": 0.0652, "step": 72270 }, { "epoch": 28.07, "learning_rate": 1.6257346278317154e-05, "loss": 0.0593, "step": 72280 }, { "epoch": 28.07, "learning_rate": 1.6256828478964402e-05, "loss": 0.0222, "step": 72290 }, { "epoch": 28.08, "learning_rate": 1.625631067961165e-05, "loss": 0.0705, "step": 72300 }, { "epoch": 28.08, "learning_rate": 1.62557928802589e-05, "loss": 0.0409, "step": 72310 }, { "epoch": 28.09, "learning_rate": 1.625527508090615e-05, "loss": 0.121, "step": 72320 }, { "epoch": 28.09, "learning_rate": 1.62547572815534e-05, "loss": 0.2273, "step": 72330 }, { "epoch": 28.09, "learning_rate": 1.625423948220065e-05, "loss": 0.056, "step": 72340 }, { "epoch": 28.1, "learning_rate": 1.62537216828479e-05, "loss": 0.1972, "step": 72350 }, { "epoch": 28.1, "learning_rate": 1.6253203883495148e-05, "loss": 0.0378, "step": 72360 }, { "epoch": 28.1, "learning_rate": 1.6252686084142396e-05, "loss": 0.0375, "step": 72370 }, { "epoch": 28.11, "learning_rate": 1.6252168284789644e-05, "loss": 0.2175, "step": 72380 }, { "epoch": 28.11, "learning_rate": 1.6251650485436895e-05, "loss": 0.0769, "step": 72390 }, { "epoch": 28.12, "learning_rate": 1.6251132686084143e-05, "loss": 0.0579, "step": 72400 }, { "epoch": 28.12, "learning_rate": 1.6250614886731394e-05, "loss": 0.1902, "step": 72410 }, { "epoch": 28.12, "learning_rate": 1.6250097087378642e-05, "loss": 0.1552, "step": 72420 }, { "epoch": 28.13, "learning_rate": 1.6249579288025893e-05, "loss": 0.1726, "step": 72430 }, { "epoch": 28.13, "learning_rate": 1.624906148867314e-05, "loss": 0.0932, "step": 72440 }, { "epoch": 28.14, "learning_rate": 1.624854368932039e-05, "loss": 0.0876, "step": 72450 }, { "epoch": 28.14, "learning_rate": 1.6248025889967637e-05, "loss": 0.2169, "step": 72460 }, { "epoch": 28.14, "learning_rate": 1.624750809061489e-05, "loss": 0.2056, "step": 72470 }, { "epoch": 28.15, "learning_rate": 1.6246990291262136e-05, "loss": 0.0035, "step": 72480 }, { "epoch": 28.15, "learning_rate": 1.6246472491909388e-05, "loss": 0.0736, "step": 72490 }, { "epoch": 28.16, "learning_rate": 1.6245954692556636e-05, "loss": 0.1212, "step": 72500 }, { "epoch": 28.16, "learning_rate": 1.6245436893203884e-05, "loss": 0.1098, "step": 72510 }, { "epoch": 28.16, "learning_rate": 1.6244919093851135e-05, "loss": 0.0605, "step": 72520 }, { "epoch": 28.17, "learning_rate": 1.6244401294498383e-05, "loss": 0.0569, "step": 72530 }, { "epoch": 28.17, "learning_rate": 1.624388349514563e-05, "loss": 0.0007, "step": 72540 }, { "epoch": 28.17, "learning_rate": 1.6243365695792882e-05, "loss": 0.1097, "step": 72550 }, { "epoch": 28.18, "learning_rate": 1.624284789644013e-05, "loss": 0.0089, "step": 72560 }, { "epoch": 28.18, "learning_rate": 1.624233009708738e-05, "loss": 0.1139, "step": 72570 }, { "epoch": 28.19, "learning_rate": 1.624181229773463e-05, "loss": 0.1448, "step": 72580 }, { "epoch": 28.19, "learning_rate": 1.6241294498381877e-05, "loss": 0.0558, "step": 72590 }, { "epoch": 28.19, "learning_rate": 1.624077669902913e-05, "loss": 0.1234, "step": 72600 }, { "epoch": 28.2, "learning_rate": 1.6240258899676376e-05, "loss": 0.2772, "step": 72610 }, { "epoch": 28.2, "learning_rate": 1.6239741100323624e-05, "loss": 0.1544, "step": 72620 }, { "epoch": 28.21, "learning_rate": 1.6239223300970876e-05, "loss": 0.002, "step": 72630 }, { "epoch": 28.21, "learning_rate": 1.6238705501618124e-05, "loss": 0.1048, "step": 72640 }, { "epoch": 28.21, "learning_rate": 1.6238187702265375e-05, "loss": 0.0262, "step": 72650 }, { "epoch": 28.22, "learning_rate": 1.6237669902912623e-05, "loss": 0.121, "step": 72660 }, { "epoch": 28.22, "learning_rate": 1.623715210355987e-05, "loss": 0.1465, "step": 72670 }, { "epoch": 28.23, "learning_rate": 1.6236634304207122e-05, "loss": 0.2447, "step": 72680 }, { "epoch": 28.23, "learning_rate": 1.623611650485437e-05, "loss": 0.0763, "step": 72690 }, { "epoch": 28.23, "learning_rate": 1.6235598705501618e-05, "loss": 0.0157, "step": 72700 }, { "epoch": 28.24, "learning_rate": 1.623508090614887e-05, "loss": 0.0436, "step": 72710 }, { "epoch": 28.24, "learning_rate": 1.6234563106796117e-05, "loss": 0.0667, "step": 72720 }, { "epoch": 28.24, "learning_rate": 1.623404530744337e-05, "loss": 0.0556, "step": 72730 }, { "epoch": 28.25, "learning_rate": 1.6233527508090616e-05, "loss": 0.0529, "step": 72740 }, { "epoch": 28.25, "learning_rate": 1.6233009708737864e-05, "loss": 0.1948, "step": 72750 }, { "epoch": 28.26, "learning_rate": 1.6232491909385116e-05, "loss": 0.0318, "step": 72760 }, { "epoch": 28.26, "learning_rate": 1.6231974110032364e-05, "loss": 0.0259, "step": 72770 }, { "epoch": 28.26, "learning_rate": 1.623145631067961e-05, "loss": 0.0374, "step": 72780 }, { "epoch": 28.27, "learning_rate": 1.6230938511326863e-05, "loss": 0.1323, "step": 72790 }, { "epoch": 28.27, "learning_rate": 1.623042071197411e-05, "loss": 0.0226, "step": 72800 }, { "epoch": 28.28, "learning_rate": 1.6229902912621362e-05, "loss": 0.076, "step": 72810 }, { "epoch": 28.28, "learning_rate": 1.622938511326861e-05, "loss": 0.0097, "step": 72820 }, { "epoch": 28.28, "learning_rate": 1.6228867313915858e-05, "loss": 0.0789, "step": 72830 }, { "epoch": 28.29, "learning_rate": 1.622834951456311e-05, "loss": 0.0677, "step": 72840 }, { "epoch": 28.29, "learning_rate": 1.6227831715210357e-05, "loss": 0.1291, "step": 72850 }, { "epoch": 28.3, "learning_rate": 1.6227313915857605e-05, "loss": 0.111, "step": 72860 }, { "epoch": 28.3, "learning_rate": 1.6226796116504856e-05, "loss": 0.0478, "step": 72870 }, { "epoch": 28.3, "learning_rate": 1.6226278317152104e-05, "loss": 0.0404, "step": 72880 }, { "epoch": 28.31, "learning_rate": 1.6225760517799352e-05, "loss": 0.0127, "step": 72890 }, { "epoch": 28.31, "learning_rate": 1.6225242718446603e-05, "loss": 0.2506, "step": 72900 }, { "epoch": 28.31, "learning_rate": 1.622472491909385e-05, "loss": 0.1219, "step": 72910 }, { "epoch": 28.32, "learning_rate": 1.6224207119741103e-05, "loss": 0.2397, "step": 72920 }, { "epoch": 28.32, "learning_rate": 1.622368932038835e-05, "loss": 0.133, "step": 72930 }, { "epoch": 28.33, "learning_rate": 1.6223171521035602e-05, "loss": 0.1341, "step": 72940 }, { "epoch": 28.33, "learning_rate": 1.622265372168285e-05, "loss": 0.1546, "step": 72950 }, { "epoch": 28.33, "learning_rate": 1.6222135922330098e-05, "loss": 0.1232, "step": 72960 }, { "epoch": 28.34, "learning_rate": 1.6221618122977346e-05, "loss": 0.106, "step": 72970 }, { "epoch": 28.34, "learning_rate": 1.6221100323624597e-05, "loss": 0.1399, "step": 72980 }, { "epoch": 28.35, "learning_rate": 1.6220582524271845e-05, "loss": 0.1152, "step": 72990 }, { "epoch": 28.35, "learning_rate": 1.6220064724919096e-05, "loss": 0.0388, "step": 73000 }, { "epoch": 28.35, "learning_rate": 1.6219546925566344e-05, "loss": 0.0679, "step": 73010 }, { "epoch": 28.36, "learning_rate": 1.6219029126213595e-05, "loss": 0.1248, "step": 73020 }, { "epoch": 28.36, "learning_rate": 1.6218511326860843e-05, "loss": 0.0831, "step": 73030 }, { "epoch": 28.37, "learning_rate": 1.621799352750809e-05, "loss": 0.1739, "step": 73040 }, { "epoch": 28.37, "learning_rate": 1.621747572815534e-05, "loss": 0.067, "step": 73050 }, { "epoch": 28.37, "learning_rate": 1.621695792880259e-05, "loss": 0.036, "step": 73060 }, { "epoch": 28.38, "learning_rate": 1.621644012944984e-05, "loss": 0.0839, "step": 73070 }, { "epoch": 28.38, "learning_rate": 1.621592233009709e-05, "loss": 0.0762, "step": 73080 }, { "epoch": 28.38, "learning_rate": 1.6215404530744338e-05, "loss": 0.1095, "step": 73090 }, { "epoch": 28.39, "learning_rate": 1.621488673139159e-05, "loss": 0.0824, "step": 73100 }, { "epoch": 28.39, "learning_rate": 1.6214368932038837e-05, "loss": 0.0446, "step": 73110 }, { "epoch": 28.4, "learning_rate": 1.6213851132686085e-05, "loss": 0.1084, "step": 73120 }, { "epoch": 28.4, "learning_rate": 1.6213333333333333e-05, "loss": 0.0489, "step": 73130 }, { "epoch": 28.4, "learning_rate": 1.6212815533980584e-05, "loss": 0.1798, "step": 73140 }, { "epoch": 28.41, "learning_rate": 1.6212297734627832e-05, "loss": 0.0151, "step": 73150 }, { "epoch": 28.41, "learning_rate": 1.6211779935275083e-05, "loss": 0.0184, "step": 73160 }, { "epoch": 28.42, "learning_rate": 1.621126213592233e-05, "loss": 0.0868, "step": 73170 }, { "epoch": 28.42, "learning_rate": 1.6210744336569583e-05, "loss": 0.0572, "step": 73180 }, { "epoch": 28.42, "learning_rate": 1.6210226537216827e-05, "loss": 0.1809, "step": 73190 }, { "epoch": 28.43, "learning_rate": 1.620970873786408e-05, "loss": 0.0885, "step": 73200 }, { "epoch": 28.43, "learning_rate": 1.6209190938511326e-05, "loss": 0.0775, "step": 73210 }, { "epoch": 28.43, "learning_rate": 1.6208673139158578e-05, "loss": 0.1163, "step": 73220 }, { "epoch": 28.44, "learning_rate": 1.6208155339805826e-05, "loss": 0.1513, "step": 73230 }, { "epoch": 28.44, "learning_rate": 1.6207637540453077e-05, "loss": 0.1753, "step": 73240 }, { "epoch": 28.45, "learning_rate": 1.6207119741100325e-05, "loss": 0.0257, "step": 73250 }, { "epoch": 28.45, "learning_rate": 1.6206601941747576e-05, "loss": 0.0923, "step": 73260 }, { "epoch": 28.45, "learning_rate": 1.620608414239482e-05, "loss": 0.0113, "step": 73270 }, { "epoch": 28.46, "learning_rate": 1.6205566343042072e-05, "loss": 0.041, "step": 73280 }, { "epoch": 28.46, "learning_rate": 1.620504854368932e-05, "loss": 0.1642, "step": 73290 }, { "epoch": 28.47, "learning_rate": 1.620453074433657e-05, "loss": 0.1876, "step": 73300 }, { "epoch": 28.47, "learning_rate": 1.620401294498382e-05, "loss": 0.0975, "step": 73310 }, { "epoch": 28.47, "learning_rate": 1.620349514563107e-05, "loss": 0.1995, "step": 73320 }, { "epoch": 28.48, "learning_rate": 1.620297734627832e-05, "loss": 0.2974, "step": 73330 }, { "epoch": 28.48, "learning_rate": 1.620245954692557e-05, "loss": 0.0567, "step": 73340 }, { "epoch": 28.49, "learning_rate": 1.6201941747572814e-05, "loss": 0.0669, "step": 73350 }, { "epoch": 28.49, "learning_rate": 1.6201423948220066e-05, "loss": 0.0586, "step": 73360 }, { "epoch": 28.49, "learning_rate": 1.6200906148867313e-05, "loss": 0.1113, "step": 73370 }, { "epoch": 28.5, "learning_rate": 1.6200388349514565e-05, "loss": 0.1647, "step": 73380 }, { "epoch": 28.5, "learning_rate": 1.6199870550161813e-05, "loss": 0.0337, "step": 73390 }, { "epoch": 28.5, "learning_rate": 1.6199352750809064e-05, "loss": 0.151, "step": 73400 }, { "epoch": 28.51, "learning_rate": 1.6198834951456312e-05, "loss": 0.1066, "step": 73410 }, { "epoch": 28.51, "learning_rate": 1.6198317152103563e-05, "loss": 0.1469, "step": 73420 }, { "epoch": 28.52, "learning_rate": 1.6197799352750808e-05, "loss": 0.1909, "step": 73430 }, { "epoch": 28.52, "learning_rate": 1.619728155339806e-05, "loss": 0.1372, "step": 73440 }, { "epoch": 28.52, "learning_rate": 1.6196763754045307e-05, "loss": 0.0419, "step": 73450 }, { "epoch": 28.53, "learning_rate": 1.619624595469256e-05, "loss": 0.1797, "step": 73460 }, { "epoch": 28.53, "learning_rate": 1.6195728155339806e-05, "loss": 0.0652, "step": 73470 }, { "epoch": 28.54, "learning_rate": 1.6195210355987058e-05, "loss": 0.0712, "step": 73480 }, { "epoch": 28.54, "learning_rate": 1.6194692556634306e-05, "loss": 0.0792, "step": 73490 }, { "epoch": 28.54, "learning_rate": 1.6194174757281557e-05, "loss": 0.103, "step": 73500 }, { "epoch": 28.55, "learning_rate": 1.6193656957928805e-05, "loss": 0.1243, "step": 73510 }, { "epoch": 28.55, "learning_rate": 1.6193139158576053e-05, "loss": 0.1221, "step": 73520 }, { "epoch": 28.56, "learning_rate": 1.61926213592233e-05, "loss": 0.0211, "step": 73530 }, { "epoch": 28.56, "learning_rate": 1.6192103559870552e-05, "loss": 0.1466, "step": 73540 }, { "epoch": 28.56, "learning_rate": 1.61915857605178e-05, "loss": 0.0011, "step": 73550 }, { "epoch": 28.57, "learning_rate": 1.619106796116505e-05, "loss": 0.1886, "step": 73560 }, { "epoch": 28.57, "learning_rate": 1.61905501618123e-05, "loss": 0.1091, "step": 73570 }, { "epoch": 28.57, "learning_rate": 1.619003236245955e-05, "loss": 0.1368, "step": 73580 }, { "epoch": 28.58, "learning_rate": 1.61895145631068e-05, "loss": 0.3583, "step": 73590 }, { "epoch": 28.58, "learning_rate": 1.6188996763754046e-05, "loss": 0.0678, "step": 73600 }, { "epoch": 28.59, "learning_rate": 1.6188478964401294e-05, "loss": 0.0793, "step": 73610 }, { "epoch": 28.59, "learning_rate": 1.6187961165048545e-05, "loss": 0.135, "step": 73620 }, { "epoch": 28.59, "learning_rate": 1.6187443365695793e-05, "loss": 0.0352, "step": 73630 }, { "epoch": 28.6, "learning_rate": 1.6186925566343045e-05, "loss": 0.0855, "step": 73640 }, { "epoch": 28.6, "learning_rate": 1.6186407766990293e-05, "loss": 0.0677, "step": 73650 }, { "epoch": 28.61, "learning_rate": 1.6185889967637544e-05, "loss": 0.1002, "step": 73660 }, { "epoch": 28.61, "learning_rate": 1.6185372168284792e-05, "loss": 0.1231, "step": 73670 }, { "epoch": 28.61, "learning_rate": 1.618485436893204e-05, "loss": 0.0793, "step": 73680 }, { "epoch": 28.62, "learning_rate": 1.6184336569579288e-05, "loss": 0.0618, "step": 73690 }, { "epoch": 28.62, "learning_rate": 1.618381877022654e-05, "loss": 0.1139, "step": 73700 }, { "epoch": 28.63, "learning_rate": 1.6183300970873787e-05, "loss": 0.0651, "step": 73710 }, { "epoch": 28.63, "learning_rate": 1.6182783171521038e-05, "loss": 0.0262, "step": 73720 }, { "epoch": 28.63, "learning_rate": 1.6182265372168286e-05, "loss": 0.0856, "step": 73730 }, { "epoch": 28.64, "learning_rate": 1.6181747572815538e-05, "loss": 0.0426, "step": 73740 }, { "epoch": 28.64, "learning_rate": 1.6181229773462785e-05, "loss": 0.1298, "step": 73750 }, { "epoch": 28.64, "learning_rate": 1.6180711974110033e-05, "loss": 0.2715, "step": 73760 }, { "epoch": 28.65, "learning_rate": 1.618019417475728e-05, "loss": 0.0271, "step": 73770 }, { "epoch": 28.65, "learning_rate": 1.6179676375404533e-05, "loss": 0.0959, "step": 73780 }, { "epoch": 28.66, "learning_rate": 1.617915857605178e-05, "loss": 0.1338, "step": 73790 }, { "epoch": 28.66, "learning_rate": 1.6178640776699032e-05, "loss": 0.1895, "step": 73800 }, { "epoch": 28.66, "learning_rate": 1.617812297734628e-05, "loss": 0.0773, "step": 73810 }, { "epoch": 28.67, "learning_rate": 1.617760517799353e-05, "loss": 0.1972, "step": 73820 }, { "epoch": 28.67, "learning_rate": 1.617708737864078e-05, "loss": 0.004, "step": 73830 }, { "epoch": 28.68, "learning_rate": 1.6176569579288027e-05, "loss": 0.0944, "step": 73840 }, { "epoch": 28.68, "learning_rate": 1.6176051779935275e-05, "loss": 0.0505, "step": 73850 }, { "epoch": 28.68, "learning_rate": 1.6175533980582526e-05, "loss": 0.0363, "step": 73860 }, { "epoch": 28.69, "learning_rate": 1.6175016181229774e-05, "loss": 0.0046, "step": 73870 }, { "epoch": 28.69, "learning_rate": 1.6174498381877025e-05, "loss": 0.0367, "step": 73880 }, { "epoch": 28.7, "learning_rate": 1.6173980582524273e-05, "loss": 0.0847, "step": 73890 }, { "epoch": 28.7, "learning_rate": 1.6173462783171525e-05, "loss": 0.0003, "step": 73900 }, { "epoch": 28.7, "learning_rate": 1.6172944983818773e-05, "loss": 0.1213, "step": 73910 }, { "epoch": 28.71, "learning_rate": 1.617242718446602e-05, "loss": 0.0536, "step": 73920 }, { "epoch": 28.71, "learning_rate": 1.617190938511327e-05, "loss": 0.1264, "step": 73930 }, { "epoch": 28.71, "learning_rate": 1.617139158576052e-05, "loss": 0.1257, "step": 73940 }, { "epoch": 28.72, "learning_rate": 1.6170873786407768e-05, "loss": 0.06, "step": 73950 }, { "epoch": 28.72, "learning_rate": 1.617035598705502e-05, "loss": 0.0884, "step": 73960 }, { "epoch": 28.73, "learning_rate": 1.6169838187702267e-05, "loss": 0.0565, "step": 73970 }, { "epoch": 28.73, "learning_rate": 1.6169320388349515e-05, "loss": 0.1379, "step": 73980 }, { "epoch": 28.73, "learning_rate": 1.6168802588996766e-05, "loss": 0.0362, "step": 73990 }, { "epoch": 28.74, "learning_rate": 1.6168284789644014e-05, "loss": 0.2573, "step": 74000 }, { "epoch": 28.74, "learning_rate": 1.6167766990291262e-05, "loss": 0.0866, "step": 74010 }, { "epoch": 28.75, "learning_rate": 1.6167249190938513e-05, "loss": 0.085, "step": 74020 }, { "epoch": 28.75, "learning_rate": 1.616673139158576e-05, "loss": 0.0483, "step": 74030 }, { "epoch": 28.75, "learning_rate": 1.6166213592233012e-05, "loss": 0.0751, "step": 74040 }, { "epoch": 28.76, "learning_rate": 1.616569579288026e-05, "loss": 0.0948, "step": 74050 }, { "epoch": 28.76, "learning_rate": 1.616517799352751e-05, "loss": 0.0653, "step": 74060 }, { "epoch": 28.77, "learning_rate": 1.616466019417476e-05, "loss": 0.0103, "step": 74070 }, { "epoch": 28.77, "learning_rate": 1.6164142394822008e-05, "loss": 0.2064, "step": 74080 }, { "epoch": 28.77, "learning_rate": 1.6163624595469255e-05, "loss": 0.0406, "step": 74090 }, { "epoch": 28.78, "learning_rate": 1.6163106796116507e-05, "loss": 0.1267, "step": 74100 }, { "epoch": 28.78, "learning_rate": 1.6162588996763755e-05, "loss": 0.016, "step": 74110 }, { "epoch": 28.78, "learning_rate": 1.6162071197411006e-05, "loss": 0.0775, "step": 74120 }, { "epoch": 28.79, "learning_rate": 1.6161553398058254e-05, "loss": 0.008, "step": 74130 }, { "epoch": 28.79, "learning_rate": 1.6161035598705502e-05, "loss": 0.2745, "step": 74140 }, { "epoch": 28.8, "learning_rate": 1.6160517799352753e-05, "loss": 0.0465, "step": 74150 }, { "epoch": 28.8, "learning_rate": 1.616e-05, "loss": 0.1953, "step": 74160 }, { "epoch": 28.8, "learning_rate": 1.615948220064725e-05, "loss": 0.0032, "step": 74170 }, { "epoch": 28.81, "learning_rate": 1.61589644012945e-05, "loss": 0.1072, "step": 74180 }, { "epoch": 28.81, "learning_rate": 1.6158446601941748e-05, "loss": 0.161, "step": 74190 }, { "epoch": 28.82, "learning_rate": 1.6157928802589e-05, "loss": 0.2121, "step": 74200 }, { "epoch": 28.82, "learning_rate": 1.6157411003236248e-05, "loss": 0.2401, "step": 74210 }, { "epoch": 28.82, "learning_rate": 1.6156893203883495e-05, "loss": 0.0091, "step": 74220 }, { "epoch": 28.83, "learning_rate": 1.6156375404530747e-05, "loss": 0.0138, "step": 74230 }, { "epoch": 28.83, "learning_rate": 1.6155857605177995e-05, "loss": 0.0866, "step": 74240 }, { "epoch": 28.83, "learning_rate": 1.6155339805825243e-05, "loss": 0.2105, "step": 74250 }, { "epoch": 28.84, "learning_rate": 1.6154822006472494e-05, "loss": 0.1334, "step": 74260 }, { "epoch": 28.84, "learning_rate": 1.6154304207119742e-05, "loss": 0.053, "step": 74270 }, { "epoch": 28.85, "learning_rate": 1.6153786407766993e-05, "loss": 0.2218, "step": 74280 }, { "epoch": 28.85, "learning_rate": 1.615326860841424e-05, "loss": 0.0488, "step": 74290 }, { "epoch": 28.85, "learning_rate": 1.615275080906149e-05, "loss": 0.0948, "step": 74300 }, { "epoch": 28.86, "learning_rate": 1.615223300970874e-05, "loss": 0.0305, "step": 74310 }, { "epoch": 28.86, "learning_rate": 1.6151715210355988e-05, "loss": 0.1675, "step": 74320 }, { "epoch": 28.87, "learning_rate": 1.6151197411003236e-05, "loss": 0.1703, "step": 74330 }, { "epoch": 28.87, "learning_rate": 1.6150679611650487e-05, "loss": 0.1081, "step": 74340 }, { "epoch": 28.87, "learning_rate": 1.6150161812297735e-05, "loss": 0.0641, "step": 74350 }, { "epoch": 28.88, "learning_rate": 1.6149644012944983e-05, "loss": 0.1166, "step": 74360 }, { "epoch": 28.88, "learning_rate": 1.6149126213592235e-05, "loss": 0.1269, "step": 74370 }, { "epoch": 28.89, "learning_rate": 1.6148608414239483e-05, "loss": 0.0622, "step": 74380 }, { "epoch": 28.89, "learning_rate": 1.6148090614886734e-05, "loss": 0.178, "step": 74390 }, { "epoch": 28.89, "learning_rate": 1.6147572815533982e-05, "loss": 0.2345, "step": 74400 }, { "epoch": 28.9, "learning_rate": 1.614705501618123e-05, "loss": 0.0559, "step": 74410 }, { "epoch": 28.9, "learning_rate": 1.614653721682848e-05, "loss": 0.0899, "step": 74420 }, { "epoch": 28.9, "learning_rate": 1.614601941747573e-05, "loss": 0.1485, "step": 74430 }, { "epoch": 28.91, "learning_rate": 1.6145501618122977e-05, "loss": 0.0703, "step": 74440 }, { "epoch": 28.91, "learning_rate": 1.6144983818770228e-05, "loss": 0.1835, "step": 74450 }, { "epoch": 28.92, "learning_rate": 1.6144466019417476e-05, "loss": 0.0677, "step": 74460 }, { "epoch": 28.92, "learning_rate": 1.6143948220064727e-05, "loss": 0.0454, "step": 74470 }, { "epoch": 28.92, "learning_rate": 1.6143430420711975e-05, "loss": 0.1041, "step": 74480 }, { "epoch": 28.93, "learning_rate": 1.6142912621359223e-05, "loss": 0.0934, "step": 74490 }, { "epoch": 28.93, "learning_rate": 1.6142394822006475e-05, "loss": 0.2275, "step": 74500 }, { "epoch": 28.94, "learning_rate": 1.6141877022653723e-05, "loss": 0.0787, "step": 74510 }, { "epoch": 28.94, "learning_rate": 1.614135922330097e-05, "loss": 0.1337, "step": 74520 }, { "epoch": 28.94, "learning_rate": 1.6140841423948222e-05, "loss": 0.1253, "step": 74530 }, { "epoch": 28.95, "learning_rate": 1.614032362459547e-05, "loss": 0.0463, "step": 74540 }, { "epoch": 28.95, "learning_rate": 1.613980582524272e-05, "loss": 0.0901, "step": 74550 }, { "epoch": 28.96, "learning_rate": 1.613928802588997e-05, "loss": 0.0467, "step": 74560 }, { "epoch": 28.96, "learning_rate": 1.613877022653722e-05, "loss": 0.1027, "step": 74570 }, { "epoch": 28.96, "learning_rate": 1.6138252427184468e-05, "loss": 0.1134, "step": 74580 }, { "epoch": 28.97, "learning_rate": 1.6137734627831716e-05, "loss": 0.0116, "step": 74590 }, { "epoch": 28.97, "learning_rate": 1.6137216828478964e-05, "loss": 0.0777, "step": 74600 }, { "epoch": 28.97, "learning_rate": 1.6136699029126215e-05, "loss": 0.097, "step": 74610 }, { "epoch": 28.98, "learning_rate": 1.6136181229773463e-05, "loss": 0.2128, "step": 74620 }, { "epoch": 28.98, "learning_rate": 1.6135663430420715e-05, "loss": 0.1275, "step": 74630 }, { "epoch": 28.99, "learning_rate": 1.6135145631067962e-05, "loss": 0.1234, "step": 74640 }, { "epoch": 28.99, "learning_rate": 1.6134627831715214e-05, "loss": 0.213, "step": 74650 }, { "epoch": 28.99, "learning_rate": 1.613411003236246e-05, "loss": 0.0163, "step": 74660 }, { "epoch": 29.0, "learning_rate": 1.613359223300971e-05, "loss": 0.0772, "step": 74670 }, { "epoch": 29.0, "eval_accuracy": 0.9499312242090784, "eval_loss": 0.28697529435157776, "eval_runtime": 8.2691, "eval_samples_per_second": 439.587, "eval_steps_per_second": 55.024, "step": 74675 }, { "epoch": 29.0, "learning_rate": 1.6133074433656958e-05, "loss": 0.0545, "step": 74680 }, { "epoch": 29.01, "learning_rate": 1.613255663430421e-05, "loss": 0.0809, "step": 74690 }, { "epoch": 29.01, "learning_rate": 1.6132038834951457e-05, "loss": 0.0523, "step": 74700 }, { "epoch": 29.01, "learning_rate": 1.6131521035598708e-05, "loss": 0.2639, "step": 74710 }, { "epoch": 29.02, "learning_rate": 1.6131003236245956e-05, "loss": 0.0797, "step": 74720 }, { "epoch": 29.02, "learning_rate": 1.6130485436893207e-05, "loss": 0.0814, "step": 74730 }, { "epoch": 29.03, "learning_rate": 1.6129967637540452e-05, "loss": 0.1181, "step": 74740 }, { "epoch": 29.03, "learning_rate": 1.6129449838187703e-05, "loss": 0.2016, "step": 74750 }, { "epoch": 29.03, "learning_rate": 1.612893203883495e-05, "loss": 0.0271, "step": 74760 }, { "epoch": 29.04, "learning_rate": 1.6128414239482202e-05, "loss": 0.2362, "step": 74770 }, { "epoch": 29.04, "learning_rate": 1.612789644012945e-05, "loss": 0.081, "step": 74780 }, { "epoch": 29.04, "learning_rate": 1.61273786407767e-05, "loss": 0.1449, "step": 74790 }, { "epoch": 29.05, "learning_rate": 1.612686084142395e-05, "loss": 0.1793, "step": 74800 }, { "epoch": 29.05, "learning_rate": 1.61263430420712e-05, "loss": 0.0683, "step": 74810 }, { "epoch": 29.06, "learning_rate": 1.6125825242718445e-05, "loss": 0.0615, "step": 74820 }, { "epoch": 29.06, "learning_rate": 1.6125307443365697e-05, "loss": 0.1093, "step": 74830 }, { "epoch": 29.06, "learning_rate": 1.6124789644012945e-05, "loss": 0.0482, "step": 74840 }, { "epoch": 29.07, "learning_rate": 1.6124271844660196e-05, "loss": 0.0406, "step": 74850 }, { "epoch": 29.07, "learning_rate": 1.6123754045307444e-05, "loss": 0.0236, "step": 74860 }, { "epoch": 29.08, "learning_rate": 1.6123236245954695e-05, "loss": 0.0123, "step": 74870 }, { "epoch": 29.08, "learning_rate": 1.6122718446601943e-05, "loss": 0.0126, "step": 74880 }, { "epoch": 29.08, "learning_rate": 1.6122200647249194e-05, "loss": 0.0923, "step": 74890 }, { "epoch": 29.09, "learning_rate": 1.612168284789644e-05, "loss": 0.1948, "step": 74900 }, { "epoch": 29.09, "learning_rate": 1.612116504854369e-05, "loss": 0.2001, "step": 74910 }, { "epoch": 29.1, "learning_rate": 1.6120647249190938e-05, "loss": 0.1796, "step": 74920 }, { "epoch": 29.1, "learning_rate": 1.612012944983819e-05, "loss": 0.0838, "step": 74930 }, { "epoch": 29.1, "learning_rate": 1.6119611650485437e-05, "loss": 0.152, "step": 74940 }, { "epoch": 29.11, "learning_rate": 1.611909385113269e-05, "loss": 0.0312, "step": 74950 }, { "epoch": 29.11, "learning_rate": 1.6118576051779937e-05, "loss": 0.1139, "step": 74960 }, { "epoch": 29.11, "learning_rate": 1.6118058252427188e-05, "loss": 0.1211, "step": 74970 }, { "epoch": 29.12, "learning_rate": 1.6117540453074433e-05, "loss": 0.11, "step": 74980 }, { "epoch": 29.12, "learning_rate": 1.6117022653721684e-05, "loss": 0.076, "step": 74990 }, { "epoch": 29.13, "learning_rate": 1.6116504854368932e-05, "loss": 0.0109, "step": 75000 }, { "epoch": 29.13, "learning_rate": 1.6115987055016183e-05, "loss": 0.1217, "step": 75010 }, { "epoch": 29.13, "learning_rate": 1.611546925566343e-05, "loss": 0.0553, "step": 75020 }, { "epoch": 29.14, "learning_rate": 1.6114951456310682e-05, "loss": 0.0318, "step": 75030 }, { "epoch": 29.14, "learning_rate": 1.611443365695793e-05, "loss": 0.0797, "step": 75040 }, { "epoch": 29.15, "learning_rate": 1.611391585760518e-05, "loss": 0.0851, "step": 75050 }, { "epoch": 29.15, "learning_rate": 1.6113398058252426e-05, "loss": 0.076, "step": 75060 }, { "epoch": 29.15, "learning_rate": 1.6112880258899677e-05, "loss": 0.341, "step": 75070 }, { "epoch": 29.16, "learning_rate": 1.6112362459546925e-05, "loss": 0.0617, "step": 75080 }, { "epoch": 29.16, "learning_rate": 1.6111844660194177e-05, "loss": 0.1326, "step": 75090 }, { "epoch": 29.17, "learning_rate": 1.6111326860841425e-05, "loss": 0.0898, "step": 75100 }, { "epoch": 29.17, "learning_rate": 1.6110809061488676e-05, "loss": 0.0171, "step": 75110 }, { "epoch": 29.17, "learning_rate": 1.6110291262135924e-05, "loss": 0.1276, "step": 75120 }, { "epoch": 29.18, "learning_rate": 1.6109773462783175e-05, "loss": 0.0744, "step": 75130 }, { "epoch": 29.18, "learning_rate": 1.6109255663430423e-05, "loss": 0.1658, "step": 75140 }, { "epoch": 29.18, "learning_rate": 1.610873786407767e-05, "loss": 0.0424, "step": 75150 }, { "epoch": 29.19, "learning_rate": 1.610822006472492e-05, "loss": 0.0401, "step": 75160 }, { "epoch": 29.19, "learning_rate": 1.610770226537217e-05, "loss": 0.0492, "step": 75170 }, { "epoch": 29.2, "learning_rate": 1.6107184466019418e-05, "loss": 0.2079, "step": 75180 }, { "epoch": 29.2, "learning_rate": 1.610666666666667e-05, "loss": 0.1418, "step": 75190 }, { "epoch": 29.2, "learning_rate": 1.6106148867313917e-05, "loss": 0.0382, "step": 75200 }, { "epoch": 29.21, "learning_rate": 1.610563106796117e-05, "loss": 0.0121, "step": 75210 }, { "epoch": 29.21, "learning_rate": 1.6105113268608417e-05, "loss": 0.0222, "step": 75220 }, { "epoch": 29.22, "learning_rate": 1.6104595469255665e-05, "loss": 0.1503, "step": 75230 }, { "epoch": 29.22, "learning_rate": 1.6104077669902912e-05, "loss": 0.3979, "step": 75240 }, { "epoch": 29.22, "learning_rate": 1.6103559870550164e-05, "loss": 0.085, "step": 75250 }, { "epoch": 29.23, "learning_rate": 1.610304207119741e-05, "loss": 0.2353, "step": 75260 }, { "epoch": 29.23, "learning_rate": 1.6102524271844663e-05, "loss": 0.0364, "step": 75270 }, { "epoch": 29.23, "learning_rate": 1.610200647249191e-05, "loss": 0.1273, "step": 75280 }, { "epoch": 29.24, "learning_rate": 1.6101488673139162e-05, "loss": 0.0048, "step": 75290 }, { "epoch": 29.24, "learning_rate": 1.610097087378641e-05, "loss": 0.0949, "step": 75300 }, { "epoch": 29.25, "learning_rate": 1.6100453074433658e-05, "loss": 0.2348, "step": 75310 }, { "epoch": 29.25, "learning_rate": 1.6099935275080906e-05, "loss": 0.0316, "step": 75320 }, { "epoch": 29.25, "learning_rate": 1.6099417475728157e-05, "loss": 0.2552, "step": 75330 }, { "epoch": 29.26, "learning_rate": 1.6098899676375405e-05, "loss": 0.0184, "step": 75340 }, { "epoch": 29.26, "learning_rate": 1.6098381877022657e-05, "loss": 0.1043, "step": 75350 }, { "epoch": 29.27, "learning_rate": 1.6097864077669904e-05, "loss": 0.2142, "step": 75360 }, { "epoch": 29.27, "learning_rate": 1.6097346278317156e-05, "loss": 0.1248, "step": 75370 }, { "epoch": 29.27, "learning_rate": 1.6096828478964404e-05, "loss": 0.0927, "step": 75380 }, { "epoch": 29.28, "learning_rate": 1.609631067961165e-05, "loss": 0.0642, "step": 75390 }, { "epoch": 29.28, "learning_rate": 1.60957928802589e-05, "loss": 0.041, "step": 75400 }, { "epoch": 29.29, "learning_rate": 1.609527508090615e-05, "loss": 0.0398, "step": 75410 }, { "epoch": 29.29, "learning_rate": 1.60947572815534e-05, "loss": 0.1773, "step": 75420 }, { "epoch": 29.29, "learning_rate": 1.609423948220065e-05, "loss": 0.0024, "step": 75430 }, { "epoch": 29.3, "learning_rate": 1.6093721682847898e-05, "loss": 0.1341, "step": 75440 }, { "epoch": 29.3, "learning_rate": 1.6093203883495146e-05, "loss": 0.1057, "step": 75450 }, { "epoch": 29.3, "learning_rate": 1.6092686084142397e-05, "loss": 0.1245, "step": 75460 }, { "epoch": 29.31, "learning_rate": 1.6092168284789645e-05, "loss": 0.0963, "step": 75470 }, { "epoch": 29.31, "learning_rate": 1.6091650485436893e-05, "loss": 0.1639, "step": 75480 }, { "epoch": 29.32, "learning_rate": 1.6091132686084144e-05, "loss": 0.1703, "step": 75490 }, { "epoch": 29.32, "learning_rate": 1.6090614886731392e-05, "loss": 0.2077, "step": 75500 }, { "epoch": 29.32, "learning_rate": 1.6090097087378644e-05, "loss": 0.0693, "step": 75510 }, { "epoch": 29.33, "learning_rate": 1.608957928802589e-05, "loss": 0.0368, "step": 75520 }, { "epoch": 29.33, "learning_rate": 1.608906148867314e-05, "loss": 0.1151, "step": 75530 }, { "epoch": 29.34, "learning_rate": 1.608854368932039e-05, "loss": 0.1013, "step": 75540 }, { "epoch": 29.34, "learning_rate": 1.608802588996764e-05, "loss": 0.1226, "step": 75550 }, { "epoch": 29.34, "learning_rate": 1.6087508090614887e-05, "loss": 0.0953, "step": 75560 }, { "epoch": 29.35, "learning_rate": 1.6086990291262138e-05, "loss": 0.1143, "step": 75570 }, { "epoch": 29.35, "learning_rate": 1.6086472491909386e-05, "loss": 0.1012, "step": 75580 }, { "epoch": 29.36, "learning_rate": 1.6085954692556637e-05, "loss": 0.2549, "step": 75590 }, { "epoch": 29.36, "learning_rate": 1.6085436893203885e-05, "loss": 0.09, "step": 75600 }, { "epoch": 29.36, "learning_rate": 1.6084919093851133e-05, "loss": 0.0375, "step": 75610 }, { "epoch": 29.37, "learning_rate": 1.6084401294498384e-05, "loss": 0.0546, "step": 75620 }, { "epoch": 29.37, "learning_rate": 1.6083883495145632e-05, "loss": 0.0864, "step": 75630 }, { "epoch": 29.37, "learning_rate": 1.608336569579288e-05, "loss": 0.2091, "step": 75640 }, { "epoch": 29.38, "learning_rate": 1.608284789644013e-05, "loss": 0.1206, "step": 75650 }, { "epoch": 29.38, "learning_rate": 1.608233009708738e-05, "loss": 0.0514, "step": 75660 }, { "epoch": 29.39, "learning_rate": 1.608181229773463e-05, "loss": 0.0536, "step": 75670 }, { "epoch": 29.39, "learning_rate": 1.608129449838188e-05, "loss": 0.1172, "step": 75680 }, { "epoch": 29.39, "learning_rate": 1.6080776699029127e-05, "loss": 0.055, "step": 75690 }, { "epoch": 29.4, "learning_rate": 1.6080258899676378e-05, "loss": 0.0694, "step": 75700 }, { "epoch": 29.4, "learning_rate": 1.6079741100323626e-05, "loss": 0.0793, "step": 75710 }, { "epoch": 29.41, "learning_rate": 1.6079223300970874e-05, "loss": 0.1669, "step": 75720 }, { "epoch": 29.41, "learning_rate": 1.6078705501618125e-05, "loss": 0.2075, "step": 75730 }, { "epoch": 29.41, "learning_rate": 1.6078187702265373e-05, "loss": 0.0885, "step": 75740 }, { "epoch": 29.42, "learning_rate": 1.6077669902912624e-05, "loss": 0.0482, "step": 75750 }, { "epoch": 29.42, "learning_rate": 1.6077152103559872e-05, "loss": 0.089, "step": 75760 }, { "epoch": 29.43, "learning_rate": 1.607663430420712e-05, "loss": 0.2351, "step": 75770 }, { "epoch": 29.43, "learning_rate": 1.607611650485437e-05, "loss": 0.1133, "step": 75780 }, { "epoch": 29.43, "learning_rate": 1.607559870550162e-05, "loss": 0.0688, "step": 75790 }, { "epoch": 29.44, "learning_rate": 1.6075080906148867e-05, "loss": 0.0572, "step": 75800 }, { "epoch": 29.44, "learning_rate": 1.607456310679612e-05, "loss": 0.129, "step": 75810 }, { "epoch": 29.44, "learning_rate": 1.6074045307443367e-05, "loss": 0.1221, "step": 75820 }, { "epoch": 29.45, "learning_rate": 1.6073527508090615e-05, "loss": 0.0193, "step": 75830 }, { "epoch": 29.45, "learning_rate": 1.6073009708737866e-05, "loss": 0.113, "step": 75840 }, { "epoch": 29.46, "learning_rate": 1.6072491909385114e-05, "loss": 0.0355, "step": 75850 }, { "epoch": 29.46, "learning_rate": 1.6071974110032365e-05, "loss": 0.0701, "step": 75860 }, { "epoch": 29.46, "learning_rate": 1.6071456310679613e-05, "loss": 0.1277, "step": 75870 }, { "epoch": 29.47, "learning_rate": 1.607093851132686e-05, "loss": 0.1694, "step": 75880 }, { "epoch": 29.47, "learning_rate": 1.6070420711974112e-05, "loss": 0.299, "step": 75890 }, { "epoch": 29.48, "learning_rate": 1.606990291262136e-05, "loss": 0.0403, "step": 75900 }, { "epoch": 29.48, "learning_rate": 1.6069385113268608e-05, "loss": 0.0347, "step": 75910 }, { "epoch": 29.48, "learning_rate": 1.606886731391586e-05, "loss": 0.0623, "step": 75920 }, { "epoch": 29.49, "learning_rate": 1.6068349514563107e-05, "loss": 0.0309, "step": 75930 }, { "epoch": 29.49, "learning_rate": 1.606783171521036e-05, "loss": 0.0269, "step": 75940 }, { "epoch": 29.5, "learning_rate": 1.6067313915857607e-05, "loss": 0.1389, "step": 75950 }, { "epoch": 29.5, "learning_rate": 1.6066796116504854e-05, "loss": 0.1986, "step": 75960 }, { "epoch": 29.5, "learning_rate": 1.6066278317152106e-05, "loss": 0.1505, "step": 75970 }, { "epoch": 29.51, "learning_rate": 1.6065760517799354e-05, "loss": 0.1877, "step": 75980 }, { "epoch": 29.51, "learning_rate": 1.60652427184466e-05, "loss": 0.0499, "step": 75990 }, { "epoch": 29.51, "learning_rate": 1.6064724919093853e-05, "loss": 0.155, "step": 76000 }, { "epoch": 29.52, "learning_rate": 1.60642071197411e-05, "loss": 0.0553, "step": 76010 }, { "epoch": 29.52, "learning_rate": 1.6063689320388352e-05, "loss": 0.1507, "step": 76020 }, { "epoch": 29.53, "learning_rate": 1.60631715210356e-05, "loss": 0.0185, "step": 76030 }, { "epoch": 29.53, "learning_rate": 1.6062653721682848e-05, "loss": 0.1773, "step": 76040 }, { "epoch": 29.53, "learning_rate": 1.60621359223301e-05, "loss": 0.1877, "step": 76050 }, { "epoch": 29.54, "learning_rate": 1.6061618122977347e-05, "loss": 0.006, "step": 76060 }, { "epoch": 29.54, "learning_rate": 1.6061100323624595e-05, "loss": 0.096, "step": 76070 }, { "epoch": 29.55, "learning_rate": 1.6060582524271846e-05, "loss": 0.1379, "step": 76080 }, { "epoch": 29.55, "learning_rate": 1.6060064724919094e-05, "loss": 0.1841, "step": 76090 }, { "epoch": 29.55, "learning_rate": 1.6059546925566346e-05, "loss": 0.1317, "step": 76100 }, { "epoch": 29.56, "learning_rate": 1.6059029126213594e-05, "loss": 0.0853, "step": 76110 }, { "epoch": 29.56, "learning_rate": 1.605851132686084e-05, "loss": 0.0283, "step": 76120 }, { "epoch": 29.57, "learning_rate": 1.605799352750809e-05, "loss": 0.1316, "step": 76130 }, { "epoch": 29.57, "learning_rate": 1.605747572815534e-05, "loss": 0.02, "step": 76140 }, { "epoch": 29.57, "learning_rate": 1.605695792880259e-05, "loss": 0.0106, "step": 76150 }, { "epoch": 29.58, "learning_rate": 1.605644012944984e-05, "loss": 0.1135, "step": 76160 }, { "epoch": 29.58, "learning_rate": 1.6055922330097088e-05, "loss": 0.1295, "step": 76170 }, { "epoch": 29.58, "learning_rate": 1.605540453074434e-05, "loss": 0.0903, "step": 76180 }, { "epoch": 29.59, "learning_rate": 1.6054886731391587e-05, "loss": 0.133, "step": 76190 }, { "epoch": 29.59, "learning_rate": 1.6054368932038835e-05, "loss": 0.1343, "step": 76200 }, { "epoch": 29.6, "learning_rate": 1.6053851132686083e-05, "loss": 0.007, "step": 76210 }, { "epoch": 29.6, "learning_rate": 1.6053333333333334e-05, "loss": 0.0534, "step": 76220 }, { "epoch": 29.6, "learning_rate": 1.6052815533980582e-05, "loss": 0.1395, "step": 76230 }, { "epoch": 29.61, "learning_rate": 1.6052297734627834e-05, "loss": 0.076, "step": 76240 }, { "epoch": 29.61, "learning_rate": 1.605177993527508e-05, "loss": 0.1048, "step": 76250 }, { "epoch": 29.62, "learning_rate": 1.6051262135922333e-05, "loss": 0.1096, "step": 76260 }, { "epoch": 29.62, "learning_rate": 1.605074433656958e-05, "loss": 0.0972, "step": 76270 }, { "epoch": 29.62, "learning_rate": 1.6050226537216832e-05, "loss": 0.1224, "step": 76280 }, { "epoch": 29.63, "learning_rate": 1.6049708737864077e-05, "loss": 0.2984, "step": 76290 }, { "epoch": 29.63, "learning_rate": 1.6049190938511328e-05, "loss": 0.0037, "step": 76300 }, { "epoch": 29.63, "learning_rate": 1.6048673139158576e-05, "loss": 0.1175, "step": 76310 }, { "epoch": 29.64, "learning_rate": 1.6048155339805827e-05, "loss": 0.0328, "step": 76320 }, { "epoch": 29.64, "learning_rate": 1.6047637540453075e-05, "loss": 0.0879, "step": 76330 }, { "epoch": 29.65, "learning_rate": 1.6047119741100326e-05, "loss": 0.0899, "step": 76340 }, { "epoch": 29.65, "learning_rate": 1.6046601941747574e-05, "loss": 0.1657, "step": 76350 }, { "epoch": 29.65, "learning_rate": 1.6046084142394826e-05, "loss": 0.0794, "step": 76360 }, { "epoch": 29.66, "learning_rate": 1.604556634304207e-05, "loss": 0.1634, "step": 76370 }, { "epoch": 29.66, "learning_rate": 1.604504854368932e-05, "loss": 0.0171, "step": 76380 }, { "epoch": 29.67, "learning_rate": 1.604453074433657e-05, "loss": 0.0375, "step": 76390 }, { "epoch": 29.67, "learning_rate": 1.604401294498382e-05, "loss": 0.022, "step": 76400 }, { "epoch": 29.67, "learning_rate": 1.604349514563107e-05, "loss": 0.0801, "step": 76410 }, { "epoch": 29.68, "learning_rate": 1.604297734627832e-05, "loss": 0.1211, "step": 76420 }, { "epoch": 29.68, "learning_rate": 1.6042459546925568e-05, "loss": 0.1322, "step": 76430 }, { "epoch": 29.69, "learning_rate": 1.604194174757282e-05, "loss": 0.0265, "step": 76440 }, { "epoch": 29.69, "learning_rate": 1.6041423948220064e-05, "loss": 0.0865, "step": 76450 }, { "epoch": 29.69, "learning_rate": 1.6040906148867315e-05, "loss": 0.1182, "step": 76460 }, { "epoch": 29.7, "learning_rate": 1.6040388349514563e-05, "loss": 0.0817, "step": 76470 }, { "epoch": 29.7, "learning_rate": 1.6039870550161814e-05, "loss": 0.1714, "step": 76480 }, { "epoch": 29.7, "learning_rate": 1.6039352750809062e-05, "loss": 0.1174, "step": 76490 }, { "epoch": 29.71, "learning_rate": 1.6038834951456313e-05, "loss": 0.1867, "step": 76500 }, { "epoch": 29.71, "learning_rate": 1.603831715210356e-05, "loss": 0.0738, "step": 76510 }, { "epoch": 29.72, "learning_rate": 1.6037799352750813e-05, "loss": 0.0688, "step": 76520 }, { "epoch": 29.72, "learning_rate": 1.6037281553398057e-05, "loss": 0.0673, "step": 76530 }, { "epoch": 29.72, "learning_rate": 1.603676375404531e-05, "loss": 0.0995, "step": 76540 }, { "epoch": 29.73, "learning_rate": 1.6036245954692557e-05, "loss": 0.076, "step": 76550 }, { "epoch": 29.73, "learning_rate": 1.6035728155339808e-05, "loss": 0.0972, "step": 76560 }, { "epoch": 29.74, "learning_rate": 1.6035210355987056e-05, "loss": 0.1128, "step": 76570 }, { "epoch": 29.74, "learning_rate": 1.6034692556634307e-05, "loss": 0.0687, "step": 76580 }, { "epoch": 29.74, "learning_rate": 1.6034174757281555e-05, "loss": 0.0484, "step": 76590 }, { "epoch": 29.75, "learning_rate": 1.6033656957928806e-05, "loss": 0.1293, "step": 76600 }, { "epoch": 29.75, "learning_rate": 1.603313915857605e-05, "loss": 0.348, "step": 76610 }, { "epoch": 29.76, "learning_rate": 1.6032621359223302e-05, "loss": 0.1696, "step": 76620 }, { "epoch": 29.76, "learning_rate": 1.603210355987055e-05, "loss": 0.1494, "step": 76630 }, { "epoch": 29.76, "learning_rate": 1.60315857605178e-05, "loss": 0.2455, "step": 76640 }, { "epoch": 29.77, "learning_rate": 1.603106796116505e-05, "loss": 0.23, "step": 76650 }, { "epoch": 29.77, "learning_rate": 1.60305501618123e-05, "loss": 0.0802, "step": 76660 }, { "epoch": 29.77, "learning_rate": 1.603003236245955e-05, "loss": 0.0397, "step": 76670 }, { "epoch": 29.78, "learning_rate": 1.60295145631068e-05, "loss": 0.0308, "step": 76680 }, { "epoch": 29.78, "learning_rate": 1.6028996763754044e-05, "loss": 0.1286, "step": 76690 }, { "epoch": 29.79, "learning_rate": 1.6028478964401296e-05, "loss": 0.1032, "step": 76700 }, { "epoch": 29.79, "learning_rate": 1.6027961165048544e-05, "loss": 0.0076, "step": 76710 }, { "epoch": 29.79, "learning_rate": 1.6027443365695795e-05, "loss": 0.1466, "step": 76720 }, { "epoch": 29.8, "learning_rate": 1.6026925566343043e-05, "loss": 0.0929, "step": 76730 }, { "epoch": 29.8, "learning_rate": 1.6026407766990294e-05, "loss": 0.0549, "step": 76740 }, { "epoch": 29.81, "learning_rate": 1.6025889967637542e-05, "loss": 0.102, "step": 76750 }, { "epoch": 29.81, "learning_rate": 1.6025372168284793e-05, "loss": 0.1141, "step": 76760 }, { "epoch": 29.81, "learning_rate": 1.6024854368932038e-05, "loss": 0.0856, "step": 76770 }, { "epoch": 29.82, "learning_rate": 1.602433656957929e-05, "loss": 0.1484, "step": 76780 }, { "epoch": 29.82, "learning_rate": 1.6023818770226537e-05, "loss": 0.3503, "step": 76790 }, { "epoch": 29.83, "learning_rate": 1.602330097087379e-05, "loss": 0.0135, "step": 76800 }, { "epoch": 29.83, "learning_rate": 1.6022783171521036e-05, "loss": 0.0364, "step": 76810 }, { "epoch": 29.83, "learning_rate": 1.6022265372168288e-05, "loss": 0.086, "step": 76820 }, { "epoch": 29.84, "learning_rate": 1.6021747572815536e-05, "loss": 0.2839, "step": 76830 }, { "epoch": 29.84, "learning_rate": 1.6021229773462787e-05, "loss": 0.0214, "step": 76840 }, { "epoch": 29.84, "learning_rate": 1.6020711974110035e-05, "loss": 0.0214, "step": 76850 }, { "epoch": 29.85, "learning_rate": 1.6020194174757283e-05, "loss": 0.0309, "step": 76860 }, { "epoch": 29.85, "learning_rate": 1.601967637540453e-05, "loss": 0.3176, "step": 76870 }, { "epoch": 29.86, "learning_rate": 1.6019158576051782e-05, "loss": 0.0974, "step": 76880 }, { "epoch": 29.86, "learning_rate": 1.601864077669903e-05, "loss": 0.0458, "step": 76890 }, { "epoch": 29.86, "learning_rate": 1.601812297734628e-05, "loss": 0.0788, "step": 76900 }, { "epoch": 29.87, "learning_rate": 1.601760517799353e-05, "loss": 0.0787, "step": 76910 }, { "epoch": 29.87, "learning_rate": 1.6017087378640777e-05, "loss": 0.199, "step": 76920 }, { "epoch": 29.88, "learning_rate": 1.601656957928803e-05, "loss": 0.2111, "step": 76930 }, { "epoch": 29.88, "learning_rate": 1.6016051779935276e-05, "loss": 0.0715, "step": 76940 }, { "epoch": 29.88, "learning_rate": 1.6015533980582524e-05, "loss": 0.0519, "step": 76950 }, { "epoch": 29.89, "learning_rate": 1.6015016181229776e-05, "loss": 0.0348, "step": 76960 }, { "epoch": 29.89, "learning_rate": 1.6014498381877024e-05, "loss": 0.0396, "step": 76970 }, { "epoch": 29.9, "learning_rate": 1.6013980582524275e-05, "loss": 0.2503, "step": 76980 }, { "epoch": 29.9, "learning_rate": 1.6013462783171523e-05, "loss": 0.0312, "step": 76990 }, { "epoch": 29.9, "learning_rate": 1.601294498381877e-05, "loss": 0.0995, "step": 77000 }, { "epoch": 29.91, "learning_rate": 1.6012427184466022e-05, "loss": 0.0472, "step": 77010 }, { "epoch": 29.91, "learning_rate": 1.601190938511327e-05, "loss": 0.0589, "step": 77020 }, { "epoch": 29.91, "learning_rate": 1.6011391585760518e-05, "loss": 0.0698, "step": 77030 }, { "epoch": 29.92, "learning_rate": 1.601087378640777e-05, "loss": 0.0796, "step": 77040 }, { "epoch": 29.92, "learning_rate": 1.6010355987055017e-05, "loss": 0.0761, "step": 77050 }, { "epoch": 29.93, "learning_rate": 1.600983818770227e-05, "loss": 0.1826, "step": 77060 }, { "epoch": 29.93, "learning_rate": 1.6009320388349516e-05, "loss": 0.0538, "step": 77070 }, { "epoch": 29.93, "learning_rate": 1.6008802588996764e-05, "loss": 0.1193, "step": 77080 }, { "epoch": 29.94, "learning_rate": 1.6008284789644016e-05, "loss": 0.2437, "step": 77090 }, { "epoch": 29.94, "learning_rate": 1.6007766990291263e-05, "loss": 0.106, "step": 77100 }, { "epoch": 29.95, "learning_rate": 1.600724919093851e-05, "loss": 0.1036, "step": 77110 }, { "epoch": 29.95, "learning_rate": 1.6006731391585763e-05, "loss": 0.0417, "step": 77120 }, { "epoch": 29.95, "learning_rate": 1.600621359223301e-05, "loss": 0.1161, "step": 77130 }, { "epoch": 29.96, "learning_rate": 1.6005695792880262e-05, "loss": 0.002, "step": 77140 }, { "epoch": 29.96, "learning_rate": 1.600517799352751e-05, "loss": 0.1067, "step": 77150 }, { "epoch": 29.97, "learning_rate": 1.6004660194174758e-05, "loss": 0.2566, "step": 77160 }, { "epoch": 29.97, "learning_rate": 1.600414239482201e-05, "loss": 0.1103, "step": 77170 }, { "epoch": 29.97, "learning_rate": 1.6003624595469257e-05, "loss": 0.0126, "step": 77180 }, { "epoch": 29.98, "learning_rate": 1.6003106796116505e-05, "loss": 0.08, "step": 77190 }, { "epoch": 29.98, "learning_rate": 1.6002588996763756e-05, "loss": 0.0198, "step": 77200 }, { "epoch": 29.98, "learning_rate": 1.6002071197411004e-05, "loss": 0.1228, "step": 77210 }, { "epoch": 29.99, "learning_rate": 1.6001553398058252e-05, "loss": 0.0993, "step": 77220 }, { "epoch": 29.99, "learning_rate": 1.6001035598705503e-05, "loss": 0.1189, "step": 77230 }, { "epoch": 30.0, "learning_rate": 1.600051779935275e-05, "loss": 0.0391, "step": 77240 }, { "epoch": 30.0, "learning_rate": 1.6000000000000003e-05, "loss": 0.132, "step": 77250 }, { "epoch": 30.0, "eval_accuracy": 0.949656121045392, "eval_loss": 0.28239795565605164, "eval_runtime": 8.2813, "eval_samples_per_second": 438.943, "eval_steps_per_second": 54.943, "step": 77250 }, { "epoch": 30.0, "learning_rate": 1.599948220064725e-05, "loss": 0.0245, "step": 77260 }, { "epoch": 30.01, "learning_rate": 1.59989644012945e-05, "loss": 0.1941, "step": 77270 }, { "epoch": 30.01, "learning_rate": 1.599844660194175e-05, "loss": 0.0267, "step": 77280 }, { "epoch": 30.02, "learning_rate": 1.5997928802588998e-05, "loss": 0.0661, "step": 77290 }, { "epoch": 30.02, "learning_rate": 1.5997411003236246e-05, "loss": 0.1029, "step": 77300 }, { "epoch": 30.02, "learning_rate": 1.5996893203883497e-05, "loss": 0.0483, "step": 77310 }, { "epoch": 30.03, "learning_rate": 1.5996375404530745e-05, "loss": 0.1283, "step": 77320 }, { "epoch": 30.03, "learning_rate": 1.5995857605177996e-05, "loss": 0.1758, "step": 77330 }, { "epoch": 30.03, "learning_rate": 1.5995339805825244e-05, "loss": 0.1112, "step": 77340 }, { "epoch": 30.04, "learning_rate": 1.5994822006472492e-05, "loss": 0.0491, "step": 77350 }, { "epoch": 30.04, "learning_rate": 1.5994304207119743e-05, "loss": 0.1012, "step": 77360 }, { "epoch": 30.05, "learning_rate": 1.599378640776699e-05, "loss": 0.0417, "step": 77370 }, { "epoch": 30.05, "learning_rate": 1.599326860841424e-05, "loss": 0.1323, "step": 77380 }, { "epoch": 30.05, "learning_rate": 1.599275080906149e-05, "loss": 0.0551, "step": 77390 }, { "epoch": 30.06, "learning_rate": 1.599223300970874e-05, "loss": 0.0913, "step": 77400 }, { "epoch": 30.06, "learning_rate": 1.599171521035599e-05, "loss": 0.0115, "step": 77410 }, { "epoch": 30.07, "learning_rate": 1.5991197411003238e-05, "loss": 0.1244, "step": 77420 }, { "epoch": 30.07, "learning_rate": 1.5990679611650486e-05, "loss": 0.1016, "step": 77430 }, { "epoch": 30.07, "learning_rate": 1.5990161812297737e-05, "loss": 0.1364, "step": 77440 }, { "epoch": 30.08, "learning_rate": 1.5989644012944985e-05, "loss": 0.0438, "step": 77450 }, { "epoch": 30.08, "learning_rate": 1.5989126213592233e-05, "loss": 0.2466, "step": 77460 }, { "epoch": 30.09, "learning_rate": 1.5988608414239484e-05, "loss": 0.2113, "step": 77470 }, { "epoch": 30.09, "learning_rate": 1.5988090614886732e-05, "loss": 0.0988, "step": 77480 }, { "epoch": 30.09, "learning_rate": 1.5987572815533983e-05, "loss": 0.1021, "step": 77490 }, { "epoch": 30.1, "learning_rate": 1.598705501618123e-05, "loss": 0.0423, "step": 77500 }, { "epoch": 30.1, "learning_rate": 1.598653721682848e-05, "loss": 0.0808, "step": 77510 }, { "epoch": 30.1, "learning_rate": 1.598601941747573e-05, "loss": 0.0388, "step": 77520 }, { "epoch": 30.11, "learning_rate": 1.598550161812298e-05, "loss": 0.0625, "step": 77530 }, { "epoch": 30.11, "learning_rate": 1.5984983818770226e-05, "loss": 0.0043, "step": 77540 }, { "epoch": 30.12, "learning_rate": 1.5984466019417478e-05, "loss": 0.0981, "step": 77550 }, { "epoch": 30.12, "learning_rate": 1.5983948220064726e-05, "loss": 0.0692, "step": 77560 }, { "epoch": 30.12, "learning_rate": 1.5983430420711977e-05, "loss": 0.0928, "step": 77570 }, { "epoch": 30.13, "learning_rate": 1.5982912621359225e-05, "loss": 0.0506, "step": 77580 }, { "epoch": 30.13, "learning_rate": 1.5982394822006473e-05, "loss": 0.2043, "step": 77590 }, { "epoch": 30.14, "learning_rate": 1.598187702265372e-05, "loss": 0.0303, "step": 77600 }, { "epoch": 30.14, "learning_rate": 1.5981359223300972e-05, "loss": 0.0751, "step": 77610 }, { "epoch": 30.14, "learning_rate": 1.598084142394822e-05, "loss": 0.0259, "step": 77620 }, { "epoch": 30.15, "learning_rate": 1.598032362459547e-05, "loss": 0.0523, "step": 77630 }, { "epoch": 30.15, "learning_rate": 1.597980582524272e-05, "loss": 0.2317, "step": 77640 }, { "epoch": 30.16, "learning_rate": 1.597928802588997e-05, "loss": 0.0174, "step": 77650 }, { "epoch": 30.16, "learning_rate": 1.597877022653722e-05, "loss": 0.1064, "step": 77660 }, { "epoch": 30.16, "learning_rate": 1.5978252427184466e-05, "loss": 0.067, "step": 77670 }, { "epoch": 30.17, "learning_rate": 1.5977734627831714e-05, "loss": 0.0523, "step": 77680 }, { "epoch": 30.17, "learning_rate": 1.5977216828478966e-05, "loss": 0.1837, "step": 77690 }, { "epoch": 30.17, "learning_rate": 1.5976699029126213e-05, "loss": 0.0691, "step": 77700 }, { "epoch": 30.18, "learning_rate": 1.5976181229773465e-05, "loss": 0.2029, "step": 77710 }, { "epoch": 30.18, "learning_rate": 1.5975663430420713e-05, "loss": 0.0012, "step": 77720 }, { "epoch": 30.19, "learning_rate": 1.5975145631067964e-05, "loss": 0.1693, "step": 77730 }, { "epoch": 30.19, "learning_rate": 1.5974627831715212e-05, "loss": 0.0533, "step": 77740 }, { "epoch": 30.19, "learning_rate": 1.597411003236246e-05, "loss": 0.0548, "step": 77750 }, { "epoch": 30.2, "learning_rate": 1.5973592233009708e-05, "loss": 0.011, "step": 77760 }, { "epoch": 30.2, "learning_rate": 1.597307443365696e-05, "loss": 0.0256, "step": 77770 }, { "epoch": 30.21, "learning_rate": 1.5972556634304207e-05, "loss": 0.1355, "step": 77780 }, { "epoch": 30.21, "learning_rate": 1.597203883495146e-05, "loss": 0.09, "step": 77790 }, { "epoch": 30.21, "learning_rate": 1.5971521035598706e-05, "loss": 0.1083, "step": 77800 }, { "epoch": 30.22, "learning_rate": 1.5971003236245958e-05, "loss": 0.1963, "step": 77810 }, { "epoch": 30.22, "learning_rate": 1.5970485436893205e-05, "loss": 0.0664, "step": 77820 }, { "epoch": 30.23, "learning_rate": 1.5969967637540453e-05, "loss": 0.0594, "step": 77830 }, { "epoch": 30.23, "learning_rate": 1.59694498381877e-05, "loss": 0.0344, "step": 77840 }, { "epoch": 30.23, "learning_rate": 1.5968932038834953e-05, "loss": 0.0562, "step": 77850 }, { "epoch": 30.24, "learning_rate": 1.59684142394822e-05, "loss": 0.0652, "step": 77860 }, { "epoch": 30.24, "learning_rate": 1.5967896440129452e-05, "loss": 0.1704, "step": 77870 }, { "epoch": 30.24, "learning_rate": 1.59673786407767e-05, "loss": 0.1549, "step": 77880 }, { "epoch": 30.25, "learning_rate": 1.596686084142395e-05, "loss": 0.1344, "step": 77890 }, { "epoch": 30.25, "learning_rate": 1.59663430420712e-05, "loss": 0.0471, "step": 77900 }, { "epoch": 30.26, "learning_rate": 1.5965825242718447e-05, "loss": 0.0018, "step": 77910 }, { "epoch": 30.26, "learning_rate": 1.5965307443365695e-05, "loss": 0.09, "step": 77920 }, { "epoch": 30.26, "learning_rate": 1.5964789644012946e-05, "loss": 0.1211, "step": 77930 }, { "epoch": 30.27, "learning_rate": 1.5964271844660194e-05, "loss": 0.0308, "step": 77940 }, { "epoch": 30.27, "learning_rate": 1.5963754045307445e-05, "loss": 0.1577, "step": 77950 }, { "epoch": 30.28, "learning_rate": 1.5963236245954693e-05, "loss": 0.1641, "step": 77960 }, { "epoch": 30.28, "learning_rate": 1.5962718446601945e-05, "loss": 0.0587, "step": 77970 }, { "epoch": 30.28, "learning_rate": 1.5962200647249193e-05, "loss": 0.1388, "step": 77980 }, { "epoch": 30.29, "learning_rate": 1.5961682847896444e-05, "loss": 0.0381, "step": 77990 }, { "epoch": 30.29, "learning_rate": 1.596116504854369e-05, "loss": 0.1661, "step": 78000 }, { "epoch": 30.3, "learning_rate": 1.596064724919094e-05, "loss": 0.1552, "step": 78010 }, { "epoch": 30.3, "learning_rate": 1.5960129449838188e-05, "loss": 0.1135, "step": 78020 }, { "epoch": 30.3, "learning_rate": 1.595961165048544e-05, "loss": 0.2185, "step": 78030 }, { "epoch": 30.31, "learning_rate": 1.5959093851132687e-05, "loss": 0.0663, "step": 78040 }, { "epoch": 30.31, "learning_rate": 1.5958576051779938e-05, "loss": 0.2059, "step": 78050 }, { "epoch": 30.31, "learning_rate": 1.5958058252427186e-05, "loss": 0.1361, "step": 78060 }, { "epoch": 30.32, "learning_rate": 1.5957540453074437e-05, "loss": 0.005, "step": 78070 }, { "epoch": 30.32, "learning_rate": 1.5957022653721682e-05, "loss": 0.0644, "step": 78080 }, { "epoch": 30.33, "learning_rate": 1.5956504854368933e-05, "loss": 0.1107, "step": 78090 }, { "epoch": 30.33, "learning_rate": 1.595598705501618e-05, "loss": 0.0514, "step": 78100 }, { "epoch": 30.33, "learning_rate": 1.5955469255663433e-05, "loss": 0.0695, "step": 78110 }, { "epoch": 30.34, "learning_rate": 1.595495145631068e-05, "loss": 0.0508, "step": 78120 }, { "epoch": 30.34, "learning_rate": 1.5954433656957932e-05, "loss": 0.0908, "step": 78130 }, { "epoch": 30.35, "learning_rate": 1.595391585760518e-05, "loss": 0.2458, "step": 78140 }, { "epoch": 30.35, "learning_rate": 1.595339805825243e-05, "loss": 0.0994, "step": 78150 }, { "epoch": 30.35, "learning_rate": 1.5952880258899676e-05, "loss": 0.105, "step": 78160 }, { "epoch": 30.36, "learning_rate": 1.5952362459546927e-05, "loss": 0.1984, "step": 78170 }, { "epoch": 30.36, "learning_rate": 1.5951844660194175e-05, "loss": 0.073, "step": 78180 }, { "epoch": 30.37, "learning_rate": 1.5951326860841426e-05, "loss": 0.0595, "step": 78190 }, { "epoch": 30.37, "learning_rate": 1.5950809061488674e-05, "loss": 0.0456, "step": 78200 }, { "epoch": 30.37, "learning_rate": 1.5950291262135925e-05, "loss": 0.0888, "step": 78210 }, { "epoch": 30.38, "learning_rate": 1.5949773462783173e-05, "loss": 0.1325, "step": 78220 }, { "epoch": 30.38, "learning_rate": 1.5949255663430425e-05, "loss": 0.2159, "step": 78230 }, { "epoch": 30.38, "learning_rate": 1.594873786407767e-05, "loss": 0.0632, "step": 78240 }, { "epoch": 30.39, "learning_rate": 1.594822006472492e-05, "loss": 0.1127, "step": 78250 }, { "epoch": 30.39, "learning_rate": 1.594770226537217e-05, "loss": 0.103, "step": 78260 }, { "epoch": 30.4, "learning_rate": 1.594718446601942e-05, "loss": 0.0064, "step": 78270 }, { "epoch": 30.4, "learning_rate": 1.5946666666666668e-05, "loss": 0.2122, "step": 78280 }, { "epoch": 30.4, "learning_rate": 1.594614886731392e-05, "loss": 0.0691, "step": 78290 }, { "epoch": 30.41, "learning_rate": 1.5945631067961167e-05, "loss": 0.261, "step": 78300 }, { "epoch": 30.41, "learning_rate": 1.5945113268608418e-05, "loss": 0.0213, "step": 78310 }, { "epoch": 30.42, "learning_rate": 1.5944595469255663e-05, "loss": 0.2535, "step": 78320 }, { "epoch": 30.42, "learning_rate": 1.5944077669902914e-05, "loss": 0.1691, "step": 78330 }, { "epoch": 30.42, "learning_rate": 1.5943559870550162e-05, "loss": 0.0735, "step": 78340 }, { "epoch": 30.43, "learning_rate": 1.5943042071197413e-05, "loss": 0.0706, "step": 78350 }, { "epoch": 30.43, "learning_rate": 1.594252427184466e-05, "loss": 0.0903, "step": 78360 }, { "epoch": 30.43, "learning_rate": 1.5942006472491912e-05, "loss": 0.0716, "step": 78370 }, { "epoch": 30.44, "learning_rate": 1.594148867313916e-05, "loss": 0.1718, "step": 78380 }, { "epoch": 30.44, "learning_rate": 1.5940970873786408e-05, "loss": 0.0486, "step": 78390 }, { "epoch": 30.45, "learning_rate": 1.5940453074433656e-05, "loss": 0.058, "step": 78400 }, { "epoch": 30.45, "learning_rate": 1.5939935275080908e-05, "loss": 0.1012, "step": 78410 }, { "epoch": 30.45, "learning_rate": 1.5939417475728155e-05, "loss": 0.0493, "step": 78420 }, { "epoch": 30.46, "learning_rate": 1.5938899676375407e-05, "loss": 0.09, "step": 78430 }, { "epoch": 30.46, "learning_rate": 1.5938381877022655e-05, "loss": 0.0732, "step": 78440 }, { "epoch": 30.47, "learning_rate": 1.5937864077669906e-05, "loss": 0.1838, "step": 78450 }, { "epoch": 30.47, "learning_rate": 1.5937346278317154e-05, "loss": 0.3345, "step": 78460 }, { "epoch": 30.47, "learning_rate": 1.5936828478964402e-05, "loss": 0.0987, "step": 78470 }, { "epoch": 30.48, "learning_rate": 1.593631067961165e-05, "loss": 0.107, "step": 78480 }, { "epoch": 30.48, "learning_rate": 1.59357928802589e-05, "loss": 0.1197, "step": 78490 }, { "epoch": 30.49, "learning_rate": 1.593527508090615e-05, "loss": 0.0753, "step": 78500 }, { "epoch": 30.49, "learning_rate": 1.59347572815534e-05, "loss": 0.0711, "step": 78510 }, { "epoch": 30.49, "learning_rate": 1.5934239482200648e-05, "loss": 0.1222, "step": 78520 }, { "epoch": 30.5, "learning_rate": 1.59337216828479e-05, "loss": 0.2515, "step": 78530 }, { "epoch": 30.5, "learning_rate": 1.5933203883495147e-05, "loss": 0.1075, "step": 78540 }, { "epoch": 30.5, "learning_rate": 1.5932686084142395e-05, "loss": 0.1315, "step": 78550 }, { "epoch": 30.51, "learning_rate": 1.5932168284789647e-05, "loss": 0.035, "step": 78560 }, { "epoch": 30.51, "learning_rate": 1.5931650485436895e-05, "loss": 0.0501, "step": 78570 }, { "epoch": 30.52, "learning_rate": 1.5931132686084143e-05, "loss": 0.1169, "step": 78580 }, { "epoch": 30.52, "learning_rate": 1.5930614886731394e-05, "loss": 0.1462, "step": 78590 }, { "epoch": 30.52, "learning_rate": 1.5930097087378642e-05, "loss": 0.0369, "step": 78600 }, { "epoch": 30.53, "learning_rate": 1.5929579288025893e-05, "loss": 0.1698, "step": 78610 }, { "epoch": 30.53, "learning_rate": 1.592906148867314e-05, "loss": 0.0301, "step": 78620 }, { "epoch": 30.54, "learning_rate": 1.592854368932039e-05, "loss": 0.1059, "step": 78630 }, { "epoch": 30.54, "learning_rate": 1.592802588996764e-05, "loss": 0.0142, "step": 78640 }, { "epoch": 30.54, "learning_rate": 1.5927508090614888e-05, "loss": 0.1174, "step": 78650 }, { "epoch": 30.55, "learning_rate": 1.5926990291262136e-05, "loss": 0.1363, "step": 78660 }, { "epoch": 30.55, "learning_rate": 1.5926472491909387e-05, "loss": 0.1413, "step": 78670 }, { "epoch": 30.56, "learning_rate": 1.5925954692556635e-05, "loss": 0.1624, "step": 78680 }, { "epoch": 30.56, "learning_rate": 1.5925436893203883e-05, "loss": 0.0621, "step": 78690 }, { "epoch": 30.56, "learning_rate": 1.5924919093851135e-05, "loss": 0.1311, "step": 78700 }, { "epoch": 30.57, "learning_rate": 1.5924401294498383e-05, "loss": 0.0233, "step": 78710 }, { "epoch": 30.57, "learning_rate": 1.5923883495145634e-05, "loss": 0.1499, "step": 78720 }, { "epoch": 30.57, "learning_rate": 1.5923365695792882e-05, "loss": 0.1287, "step": 78730 }, { "epoch": 30.58, "learning_rate": 1.592284789644013e-05, "loss": 0.1573, "step": 78740 }, { "epoch": 30.58, "learning_rate": 1.592233009708738e-05, "loss": 0.058, "step": 78750 }, { "epoch": 30.59, "learning_rate": 1.592181229773463e-05, "loss": 0.1228, "step": 78760 }, { "epoch": 30.59, "learning_rate": 1.5921294498381877e-05, "loss": 0.1803, "step": 78770 }, { "epoch": 30.59, "learning_rate": 1.5920776699029128e-05, "loss": 0.2255, "step": 78780 }, { "epoch": 30.6, "learning_rate": 1.5920258899676376e-05, "loss": 0.1454, "step": 78790 }, { "epoch": 30.6, "learning_rate": 1.5919741100323627e-05, "loss": 0.144, "step": 78800 }, { "epoch": 30.61, "learning_rate": 1.5919223300970875e-05, "loss": 0.0489, "step": 78810 }, { "epoch": 30.61, "learning_rate": 1.5918705501618123e-05, "loss": 0.0261, "step": 78820 }, { "epoch": 30.61, "learning_rate": 1.5918187702265375e-05, "loss": 0.0414, "step": 78830 }, { "epoch": 30.62, "learning_rate": 1.5917669902912622e-05, "loss": 0.1503, "step": 78840 }, { "epoch": 30.62, "learning_rate": 1.591715210355987e-05, "loss": 0.0788, "step": 78850 }, { "epoch": 30.63, "learning_rate": 1.5916634304207122e-05, "loss": 0.2331, "step": 78860 }, { "epoch": 30.63, "learning_rate": 1.591611650485437e-05, "loss": 0.111, "step": 78870 }, { "epoch": 30.63, "learning_rate": 1.591559870550162e-05, "loss": 0.0049, "step": 78880 }, { "epoch": 30.64, "learning_rate": 1.591508090614887e-05, "loss": 0.0429, "step": 78890 }, { "epoch": 30.64, "learning_rate": 1.5914563106796117e-05, "loss": 0.0472, "step": 78900 }, { "epoch": 30.64, "learning_rate": 1.5914045307443368e-05, "loss": 0.0006, "step": 78910 }, { "epoch": 30.65, "learning_rate": 1.5913527508090616e-05, "loss": 0.0679, "step": 78920 }, { "epoch": 30.65, "learning_rate": 1.5913009708737864e-05, "loss": 0.0659, "step": 78930 }, { "epoch": 30.66, "learning_rate": 1.5912491909385115e-05, "loss": 0.0456, "step": 78940 }, { "epoch": 30.66, "learning_rate": 1.5911974110032363e-05, "loss": 0.1605, "step": 78950 }, { "epoch": 30.66, "learning_rate": 1.5911456310679615e-05, "loss": 0.061, "step": 78960 }, { "epoch": 30.67, "learning_rate": 1.5910938511326862e-05, "loss": 0.0461, "step": 78970 }, { "epoch": 30.67, "learning_rate": 1.591042071197411e-05, "loss": 0.1427, "step": 78980 }, { "epoch": 30.68, "learning_rate": 1.590990291262136e-05, "loss": 0.1507, "step": 78990 }, { "epoch": 30.68, "learning_rate": 1.590938511326861e-05, "loss": 0.1419, "step": 79000 }, { "epoch": 30.68, "learning_rate": 1.5908867313915858e-05, "loss": 0.0868, "step": 79010 }, { "epoch": 30.69, "learning_rate": 1.590834951456311e-05, "loss": 0.0648, "step": 79020 }, { "epoch": 30.69, "learning_rate": 1.5907831715210357e-05, "loss": 0.0555, "step": 79030 }, { "epoch": 30.7, "learning_rate": 1.5907313915857608e-05, "loss": 0.0425, "step": 79040 }, { "epoch": 30.7, "learning_rate": 1.5906796116504856e-05, "loss": 0.1427, "step": 79050 }, { "epoch": 30.7, "learning_rate": 1.5906278317152104e-05, "loss": 0.1153, "step": 79060 }, { "epoch": 30.71, "learning_rate": 1.5905760517799352e-05, "loss": 0.078, "step": 79070 }, { "epoch": 30.71, "learning_rate": 1.5905242718446603e-05, "loss": 0.1426, "step": 79080 }, { "epoch": 30.71, "learning_rate": 1.590472491909385e-05, "loss": 0.0023, "step": 79090 }, { "epoch": 30.72, "learning_rate": 1.5904207119741102e-05, "loss": 0.012, "step": 79100 }, { "epoch": 30.72, "learning_rate": 1.590368932038835e-05, "loss": 0.027, "step": 79110 }, { "epoch": 30.73, "learning_rate": 1.59031715210356e-05, "loss": 0.1149, "step": 79120 }, { "epoch": 30.73, "learning_rate": 1.590265372168285e-05, "loss": 0.0271, "step": 79130 }, { "epoch": 30.73, "learning_rate": 1.5902135922330097e-05, "loss": 0.0971, "step": 79140 }, { "epoch": 30.74, "learning_rate": 1.5901618122977345e-05, "loss": 0.1479, "step": 79150 }, { "epoch": 30.74, "learning_rate": 1.5901100323624597e-05, "loss": 0.0799, "step": 79160 }, { "epoch": 30.75, "learning_rate": 1.5900582524271845e-05, "loss": 0.0331, "step": 79170 }, { "epoch": 30.75, "learning_rate": 1.5900064724919096e-05, "loss": 0.096, "step": 79180 }, { "epoch": 30.75, "learning_rate": 1.5899546925566344e-05, "loss": 0.0201, "step": 79190 }, { "epoch": 30.76, "learning_rate": 1.5899029126213595e-05, "loss": 0.1905, "step": 79200 }, { "epoch": 30.76, "learning_rate": 1.5898511326860843e-05, "loss": 0.2479, "step": 79210 }, { "epoch": 30.77, "learning_rate": 1.589799352750809e-05, "loss": 0.0261, "step": 79220 }, { "epoch": 30.77, "learning_rate": 1.589747572815534e-05, "loss": 0.2463, "step": 79230 }, { "epoch": 30.77, "learning_rate": 1.589695792880259e-05, "loss": 0.042, "step": 79240 }, { "epoch": 30.78, "learning_rate": 1.5896440129449838e-05, "loss": 0.0816, "step": 79250 }, { "epoch": 30.78, "learning_rate": 1.589592233009709e-05, "loss": 0.1112, "step": 79260 }, { "epoch": 30.78, "learning_rate": 1.5895404530744337e-05, "loss": 0.1145, "step": 79270 }, { "epoch": 30.79, "learning_rate": 1.589488673139159e-05, "loss": 0.1301, "step": 79280 }, { "epoch": 30.79, "learning_rate": 1.5894368932038837e-05, "loss": 0.2107, "step": 79290 }, { "epoch": 30.8, "learning_rate": 1.5893851132686085e-05, "loss": 0.0291, "step": 79300 }, { "epoch": 30.8, "learning_rate": 1.5893333333333333e-05, "loss": 0.0059, "step": 79310 }, { "epoch": 30.8, "learning_rate": 1.5892815533980584e-05, "loss": 0.0876, "step": 79320 }, { "epoch": 30.81, "learning_rate": 1.5892297734627832e-05, "loss": 0.1514, "step": 79330 }, { "epoch": 30.81, "learning_rate": 1.5891779935275083e-05, "loss": 0.0277, "step": 79340 }, { "epoch": 30.82, "learning_rate": 1.589126213592233e-05, "loss": 0.0741, "step": 79350 }, { "epoch": 30.82, "learning_rate": 1.5890744336569582e-05, "loss": 0.1573, "step": 79360 }, { "epoch": 30.82, "learning_rate": 1.589022653721683e-05, "loss": 0.0784, "step": 79370 }, { "epoch": 30.83, "learning_rate": 1.5889708737864078e-05, "loss": 0.0595, "step": 79380 }, { "epoch": 30.83, "learning_rate": 1.5889190938511326e-05, "loss": 0.0776, "step": 79390 }, { "epoch": 30.83, "learning_rate": 1.5888673139158577e-05, "loss": 0.1074, "step": 79400 }, { "epoch": 30.84, "learning_rate": 1.5888155339805825e-05, "loss": 0.1656, "step": 79410 }, { "epoch": 30.84, "learning_rate": 1.5887637540453077e-05, "loss": 0.2042, "step": 79420 }, { "epoch": 30.85, "learning_rate": 1.5887119741100325e-05, "loss": 0.1905, "step": 79430 }, { "epoch": 30.85, "learning_rate": 1.5886601941747576e-05, "loss": 0.068, "step": 79440 }, { "epoch": 30.85, "learning_rate": 1.5886084142394824e-05, "loss": 0.032, "step": 79450 }, { "epoch": 30.86, "learning_rate": 1.588556634304207e-05, "loss": 0.0262, "step": 79460 }, { "epoch": 30.86, "learning_rate": 1.588504854368932e-05, "loss": 0.031, "step": 79470 }, { "epoch": 30.87, "learning_rate": 1.588453074433657e-05, "loss": 0.0755, "step": 79480 }, { "epoch": 30.87, "learning_rate": 1.588401294498382e-05, "loss": 0.067, "step": 79490 }, { "epoch": 30.87, "learning_rate": 1.588349514563107e-05, "loss": 0.0969, "step": 79500 }, { "epoch": 30.88, "learning_rate": 1.5882977346278318e-05, "loss": 0.0277, "step": 79510 }, { "epoch": 30.88, "learning_rate": 1.588245954692557e-05, "loss": 0.0337, "step": 79520 }, { "epoch": 30.89, "learning_rate": 1.5881941747572817e-05, "loss": 0.095, "step": 79530 }, { "epoch": 30.89, "learning_rate": 1.5881423948220065e-05, "loss": 0.0844, "step": 79540 }, { "epoch": 30.89, "learning_rate": 1.5880906148867313e-05, "loss": 0.1799, "step": 79550 }, { "epoch": 30.9, "learning_rate": 1.5880388349514564e-05, "loss": 0.1793, "step": 79560 }, { "epoch": 30.9, "learning_rate": 1.5879870550161812e-05, "loss": 0.1063, "step": 79570 }, { "epoch": 30.9, "learning_rate": 1.5879352750809064e-05, "loss": 0.0079, "step": 79580 }, { "epoch": 30.91, "learning_rate": 1.587883495145631e-05, "loss": 0.1524, "step": 79590 }, { "epoch": 30.91, "learning_rate": 1.5878317152103563e-05, "loss": 0.0445, "step": 79600 }, { "epoch": 30.92, "learning_rate": 1.587779935275081e-05, "loss": 0.0392, "step": 79610 }, { "epoch": 30.92, "learning_rate": 1.5877281553398062e-05, "loss": 0.0952, "step": 79620 }, { "epoch": 30.92, "learning_rate": 1.5876763754045307e-05, "loss": 0.235, "step": 79630 }, { "epoch": 30.93, "learning_rate": 1.5876245954692558e-05, "loss": 0.1323, "step": 79640 }, { "epoch": 30.93, "learning_rate": 1.5875728155339806e-05, "loss": 0.0152, "step": 79650 }, { "epoch": 30.94, "learning_rate": 1.5875210355987057e-05, "loss": 0.0946, "step": 79660 }, { "epoch": 30.94, "learning_rate": 1.5874692556634305e-05, "loss": 0.0599, "step": 79670 }, { "epoch": 30.94, "learning_rate": 1.5874174757281557e-05, "loss": 0.0856, "step": 79680 }, { "epoch": 30.95, "learning_rate": 1.5873656957928804e-05, "loss": 0.0525, "step": 79690 }, { "epoch": 30.95, "learning_rate": 1.5873139158576056e-05, "loss": 0.0371, "step": 79700 }, { "epoch": 30.96, "learning_rate": 1.58726213592233e-05, "loss": 0.0302, "step": 79710 }, { "epoch": 30.96, "learning_rate": 1.587210355987055e-05, "loss": 0.1451, "step": 79720 }, { "epoch": 30.96, "learning_rate": 1.58715857605178e-05, "loss": 0.038, "step": 79730 }, { "epoch": 30.97, "learning_rate": 1.587106796116505e-05, "loss": 0.0821, "step": 79740 }, { "epoch": 30.97, "learning_rate": 1.58705501618123e-05, "loss": 0.0432, "step": 79750 }, { "epoch": 30.97, "learning_rate": 1.587003236245955e-05, "loss": 0.0279, "step": 79760 }, { "epoch": 30.98, "learning_rate": 1.5869514563106798e-05, "loss": 0.0902, "step": 79770 }, { "epoch": 30.98, "learning_rate": 1.586899676375405e-05, "loss": 0.1225, "step": 79780 }, { "epoch": 30.99, "learning_rate": 1.5868478964401294e-05, "loss": 0.1027, "step": 79790 }, { "epoch": 30.99, "learning_rate": 1.5867961165048545e-05, "loss": 0.0956, "step": 79800 }, { "epoch": 30.99, "learning_rate": 1.5867443365695793e-05, "loss": 0.1283, "step": 79810 }, { "epoch": 31.0, "learning_rate": 1.5866925566343044e-05, "loss": 0.0652, "step": 79820 }, { "epoch": 31.0, "eval_accuracy": 0.9537826685006877, "eval_loss": 0.2627561092376709, "eval_runtime": 8.1542, "eval_samples_per_second": 445.781, "eval_steps_per_second": 55.799, "step": 79825 }, { "epoch": 31.0, "learning_rate": 1.5866407766990292e-05, "loss": 0.1509, "step": 79830 }, { "epoch": 31.01, "learning_rate": 1.5865889967637544e-05, "loss": 0.0994, "step": 79840 }, { "epoch": 31.01, "learning_rate": 1.586537216828479e-05, "loss": 0.2286, "step": 79850 }, { "epoch": 31.01, "learning_rate": 1.586485436893204e-05, "loss": 0.0751, "step": 79860 }, { "epoch": 31.02, "learning_rate": 1.5864336569579287e-05, "loss": 0.0423, "step": 79870 }, { "epoch": 31.02, "learning_rate": 1.586381877022654e-05, "loss": 0.1178, "step": 79880 }, { "epoch": 31.03, "learning_rate": 1.5863300970873787e-05, "loss": 0.303, "step": 79890 }, { "epoch": 31.03, "learning_rate": 1.5862783171521038e-05, "loss": 0.0216, "step": 79900 }, { "epoch": 31.03, "learning_rate": 1.5862265372168286e-05, "loss": 0.0485, "step": 79910 }, { "epoch": 31.04, "learning_rate": 1.5861747572815537e-05, "loss": 0.0815, "step": 79920 }, { "epoch": 31.04, "learning_rate": 1.5861229773462785e-05, "loss": 0.1615, "step": 79930 }, { "epoch": 31.04, "learning_rate": 1.5860711974110033e-05, "loss": 0.2809, "step": 79940 }, { "epoch": 31.05, "learning_rate": 1.586019417475728e-05, "loss": 0.1023, "step": 79950 }, { "epoch": 31.05, "learning_rate": 1.5859676375404532e-05, "loss": 0.1653, "step": 79960 }, { "epoch": 31.06, "learning_rate": 1.585915857605178e-05, "loss": 0.0232, "step": 79970 }, { "epoch": 31.06, "learning_rate": 1.585864077669903e-05, "loss": 0.0751, "step": 79980 }, { "epoch": 31.06, "learning_rate": 1.585812297734628e-05, "loss": 0.185, "step": 79990 }, { "epoch": 31.07, "learning_rate": 1.585760517799353e-05, "loss": 0.0655, "step": 80000 }, { "epoch": 31.07, "learning_rate": 1.585708737864078e-05, "loss": 0.1214, "step": 80010 }, { "epoch": 31.08, "learning_rate": 1.5856569579288027e-05, "loss": 0.167, "step": 80020 }, { "epoch": 31.08, "learning_rate": 1.5856051779935275e-05, "loss": 0.1068, "step": 80030 }, { "epoch": 31.08, "learning_rate": 1.5855533980582526e-05, "loss": 0.1276, "step": 80040 }, { "epoch": 31.09, "learning_rate": 1.5855016181229774e-05, "loss": 0.1457, "step": 80050 }, { "epoch": 31.09, "learning_rate": 1.5854498381877025e-05, "loss": 0.1041, "step": 80060 }, { "epoch": 31.1, "learning_rate": 1.5853980582524273e-05, "loss": 0.0307, "step": 80070 }, { "epoch": 31.1, "learning_rate": 1.5853462783171524e-05, "loss": 0.0349, "step": 80080 }, { "epoch": 31.1, "learning_rate": 1.5852944983818772e-05, "loss": 0.2112, "step": 80090 }, { "epoch": 31.11, "learning_rate": 1.585242718446602e-05, "loss": 0.0432, "step": 80100 }, { "epoch": 31.11, "learning_rate": 1.5851909385113268e-05, "loss": 0.0686, "step": 80110 }, { "epoch": 31.11, "learning_rate": 1.585139158576052e-05, "loss": 0.0527, "step": 80120 }, { "epoch": 31.12, "learning_rate": 1.5850873786407767e-05, "loss": 0.0291, "step": 80130 }, { "epoch": 31.12, "learning_rate": 1.585035598705502e-05, "loss": 0.0454, "step": 80140 }, { "epoch": 31.13, "learning_rate": 1.5849838187702267e-05, "loss": 0.0013, "step": 80150 }, { "epoch": 31.13, "learning_rate": 1.5849320388349514e-05, "loss": 0.0863, "step": 80160 }, { "epoch": 31.13, "learning_rate": 1.5848802588996766e-05, "loss": 0.2131, "step": 80170 }, { "epoch": 31.14, "learning_rate": 1.5848284789644014e-05, "loss": 0.0463, "step": 80180 }, { "epoch": 31.14, "learning_rate": 1.5847766990291265e-05, "loss": 0.1306, "step": 80190 }, { "epoch": 31.15, "learning_rate": 1.5847249190938513e-05, "loss": 0.0969, "step": 80200 }, { "epoch": 31.15, "learning_rate": 1.584673139158576e-05, "loss": 0.1013, "step": 80210 }, { "epoch": 31.15, "learning_rate": 1.5846213592233012e-05, "loss": 0.1169, "step": 80220 }, { "epoch": 31.16, "learning_rate": 1.584569579288026e-05, "loss": 0.1242, "step": 80230 }, { "epoch": 31.16, "learning_rate": 1.5845177993527508e-05, "loss": 0.059, "step": 80240 }, { "epoch": 31.17, "learning_rate": 1.584466019417476e-05, "loss": 0.1266, "step": 80250 }, { "epoch": 31.17, "learning_rate": 1.5844142394822007e-05, "loss": 0.0526, "step": 80260 }, { "epoch": 31.17, "learning_rate": 1.584362459546926e-05, "loss": 0.1307, "step": 80270 }, { "epoch": 31.18, "learning_rate": 1.5843106796116506e-05, "loss": 0.0904, "step": 80280 }, { "epoch": 31.18, "learning_rate": 1.5842588996763754e-05, "loss": 0.1415, "step": 80290 }, { "epoch": 31.18, "learning_rate": 1.5842071197411006e-05, "loss": 0.0838, "step": 80300 }, { "epoch": 31.19, "learning_rate": 1.5841553398058254e-05, "loss": 0.0904, "step": 80310 }, { "epoch": 31.19, "learning_rate": 1.58410355987055e-05, "loss": 0.118, "step": 80320 }, { "epoch": 31.2, "learning_rate": 1.5840517799352753e-05, "loss": 0.0365, "step": 80330 }, { "epoch": 31.2, "learning_rate": 1.584e-05, "loss": 0.1472, "step": 80340 }, { "epoch": 31.2, "learning_rate": 1.5839482200647252e-05, "loss": 0.1572, "step": 80350 }, { "epoch": 31.21, "learning_rate": 1.58389644012945e-05, "loss": 0.0127, "step": 80360 }, { "epoch": 31.21, "learning_rate": 1.5838446601941748e-05, "loss": 0.1373, "step": 80370 }, { "epoch": 31.22, "learning_rate": 1.5837928802589e-05, "loss": 0.033, "step": 80380 }, { "epoch": 31.22, "learning_rate": 1.5837411003236247e-05, "loss": 0.1581, "step": 80390 }, { "epoch": 31.22, "learning_rate": 1.5836893203883495e-05, "loss": 0.135, "step": 80400 }, { "epoch": 31.23, "learning_rate": 1.5836375404530746e-05, "loss": 0.0762, "step": 80410 }, { "epoch": 31.23, "learning_rate": 1.5835857605177994e-05, "loss": 0.1461, "step": 80420 }, { "epoch": 31.23, "learning_rate": 1.5835339805825246e-05, "loss": 0.118, "step": 80430 }, { "epoch": 31.24, "learning_rate": 1.5834822006472494e-05, "loss": 0.1128, "step": 80440 }, { "epoch": 31.24, "learning_rate": 1.583430420711974e-05, "loss": 0.0599, "step": 80450 }, { "epoch": 31.25, "learning_rate": 1.5833786407766993e-05, "loss": 0.0014, "step": 80460 }, { "epoch": 31.25, "learning_rate": 1.583326860841424e-05, "loss": 0.2184, "step": 80470 }, { "epoch": 31.25, "learning_rate": 1.583275080906149e-05, "loss": 0.1088, "step": 80480 }, { "epoch": 31.26, "learning_rate": 1.583223300970874e-05, "loss": 0.0328, "step": 80490 }, { "epoch": 31.26, "learning_rate": 1.5831715210355988e-05, "loss": 0.0661, "step": 80500 }, { "epoch": 31.27, "learning_rate": 1.583119741100324e-05, "loss": 0.1483, "step": 80510 }, { "epoch": 31.27, "learning_rate": 1.5830679611650487e-05, "loss": 0.0874, "step": 80520 }, { "epoch": 31.27, "learning_rate": 1.5830161812297735e-05, "loss": 0.0642, "step": 80530 }, { "epoch": 31.28, "learning_rate": 1.5829644012944983e-05, "loss": 0.1388, "step": 80540 }, { "epoch": 31.28, "learning_rate": 1.5829126213592234e-05, "loss": 0.029, "step": 80550 }, { "epoch": 31.29, "learning_rate": 1.5828608414239482e-05, "loss": 0.0226, "step": 80560 }, { "epoch": 31.29, "learning_rate": 1.5828090614886734e-05, "loss": 0.0293, "step": 80570 }, { "epoch": 31.29, "learning_rate": 1.582757281553398e-05, "loss": 0.0225, "step": 80580 }, { "epoch": 31.3, "learning_rate": 1.5827055016181233e-05, "loss": 0.1591, "step": 80590 }, { "epoch": 31.3, "learning_rate": 1.582653721682848e-05, "loss": 0.134, "step": 80600 }, { "epoch": 31.3, "learning_rate": 1.582601941747573e-05, "loss": 0.0793, "step": 80610 }, { "epoch": 31.31, "learning_rate": 1.5825501618122977e-05, "loss": 0.1073, "step": 80620 }, { "epoch": 31.31, "learning_rate": 1.5824983818770228e-05, "loss": 0.0361, "step": 80630 }, { "epoch": 31.32, "learning_rate": 1.5824466019417476e-05, "loss": 0.0225, "step": 80640 }, { "epoch": 31.32, "learning_rate": 1.5823948220064727e-05, "loss": 0.1086, "step": 80650 }, { "epoch": 31.32, "learning_rate": 1.5823430420711975e-05, "loss": 0.25, "step": 80660 }, { "epoch": 31.33, "learning_rate": 1.5822912621359226e-05, "loss": 0.0878, "step": 80670 }, { "epoch": 31.33, "learning_rate": 1.5822394822006474e-05, "loss": 0.0739, "step": 80680 }, { "epoch": 31.34, "learning_rate": 1.5821877022653722e-05, "loss": 0.0501, "step": 80690 }, { "epoch": 31.34, "learning_rate": 1.582135922330097e-05, "loss": 0.003, "step": 80700 }, { "epoch": 31.34, "learning_rate": 1.582084142394822e-05, "loss": 0.0249, "step": 80710 }, { "epoch": 31.35, "learning_rate": 1.582032362459547e-05, "loss": 0.0447, "step": 80720 }, { "epoch": 31.35, "learning_rate": 1.581980582524272e-05, "loss": 0.1455, "step": 80730 }, { "epoch": 31.36, "learning_rate": 1.581928802588997e-05, "loss": 0.0907, "step": 80740 }, { "epoch": 31.36, "learning_rate": 1.581877022653722e-05, "loss": 0.1561, "step": 80750 }, { "epoch": 31.36, "learning_rate": 1.5818252427184468e-05, "loss": 0.1493, "step": 80760 }, { "epoch": 31.37, "learning_rate": 1.5817734627831716e-05, "loss": 0.0899, "step": 80770 }, { "epoch": 31.37, "learning_rate": 1.5817216828478964e-05, "loss": 0.0159, "step": 80780 }, { "epoch": 31.37, "learning_rate": 1.5816699029126215e-05, "loss": 0.1836, "step": 80790 }, { "epoch": 31.38, "learning_rate": 1.5816181229773463e-05, "loss": 0.0444, "step": 80800 }, { "epoch": 31.38, "learning_rate": 1.5815663430420714e-05, "loss": 0.1191, "step": 80810 }, { "epoch": 31.39, "learning_rate": 1.5815145631067962e-05, "loss": 0.1113, "step": 80820 }, { "epoch": 31.39, "learning_rate": 1.5814627831715213e-05, "loss": 0.1062, "step": 80830 }, { "epoch": 31.39, "learning_rate": 1.581411003236246e-05, "loss": 0.131, "step": 80840 }, { "epoch": 31.4, "learning_rate": 1.581359223300971e-05, "loss": 0.1514, "step": 80850 }, { "epoch": 31.4, "learning_rate": 1.5813074433656957e-05, "loss": 0.1239, "step": 80860 }, { "epoch": 31.41, "learning_rate": 1.581255663430421e-05, "loss": 0.0198, "step": 80870 }, { "epoch": 31.41, "learning_rate": 1.5812038834951456e-05, "loss": 0.0643, "step": 80880 }, { "epoch": 31.41, "learning_rate": 1.5811521035598708e-05, "loss": 0.0768, "step": 80890 }, { "epoch": 31.42, "learning_rate": 1.5811003236245956e-05, "loss": 0.111, "step": 80900 }, { "epoch": 31.42, "learning_rate": 1.5810485436893207e-05, "loss": 0.0403, "step": 80910 }, { "epoch": 31.43, "learning_rate": 1.5809967637540455e-05, "loss": 0.0455, "step": 80920 }, { "epoch": 31.43, "learning_rate": 1.5809449838187703e-05, "loss": 0.1165, "step": 80930 }, { "epoch": 31.43, "learning_rate": 1.580893203883495e-05, "loss": 0.121, "step": 80940 }, { "epoch": 31.44, "learning_rate": 1.5808414239482202e-05, "loss": 0.149, "step": 80950 }, { "epoch": 31.44, "learning_rate": 1.580789644012945e-05, "loss": 0.1459, "step": 80960 }, { "epoch": 31.44, "learning_rate": 1.58073786407767e-05, "loss": 0.1263, "step": 80970 }, { "epoch": 31.45, "learning_rate": 1.580686084142395e-05, "loss": 0.1683, "step": 80980 }, { "epoch": 31.45, "learning_rate": 1.58063430420712e-05, "loss": 0.0472, "step": 80990 }, { "epoch": 31.46, "learning_rate": 1.580582524271845e-05, "loss": 0.1875, "step": 81000 }, { "epoch": 31.46, "learning_rate": 1.5805307443365696e-05, "loss": 0.0189, "step": 81010 }, { "epoch": 31.46, "learning_rate": 1.5804789644012944e-05, "loss": 0.1034, "step": 81020 }, { "epoch": 31.47, "learning_rate": 1.5804271844660196e-05, "loss": 0.193, "step": 81030 }, { "epoch": 31.47, "learning_rate": 1.5803754045307444e-05, "loss": 0.0546, "step": 81040 }, { "epoch": 31.48, "learning_rate": 1.5803236245954695e-05, "loss": 0.0404, "step": 81050 }, { "epoch": 31.48, "learning_rate": 1.5802718446601943e-05, "loss": 0.2437, "step": 81060 }, { "epoch": 31.48, "learning_rate": 1.5802200647249194e-05, "loss": 0.0604, "step": 81070 }, { "epoch": 31.49, "learning_rate": 1.5801682847896442e-05, "loss": 0.1172, "step": 81080 }, { "epoch": 31.49, "learning_rate": 1.580116504854369e-05, "loss": 0.0731, "step": 81090 }, { "epoch": 31.5, "learning_rate": 1.5800647249190938e-05, "loss": 0.0267, "step": 81100 }, { "epoch": 31.5, "learning_rate": 1.580012944983819e-05, "loss": 0.0445, "step": 81110 }, { "epoch": 31.5, "learning_rate": 1.5799611650485437e-05, "loss": 0.1319, "step": 81120 }, { "epoch": 31.51, "learning_rate": 1.579909385113269e-05, "loss": 0.0605, "step": 81130 }, { "epoch": 31.51, "learning_rate": 1.5798576051779936e-05, "loss": 0.0454, "step": 81140 }, { "epoch": 31.51, "learning_rate": 1.5798058252427188e-05, "loss": 0.0788, "step": 81150 }, { "epoch": 31.52, "learning_rate": 1.5797540453074436e-05, "loss": 0.0258, "step": 81160 }, { "epoch": 31.52, "learning_rate": 1.5797022653721684e-05, "loss": 0.0828, "step": 81170 }, { "epoch": 31.53, "learning_rate": 1.579650485436893e-05, "loss": 0.2522, "step": 81180 }, { "epoch": 31.53, "learning_rate": 1.5795987055016183e-05, "loss": 0.169, "step": 81190 }, { "epoch": 31.53, "learning_rate": 1.579546925566343e-05, "loss": 0.0776, "step": 81200 }, { "epoch": 31.54, "learning_rate": 1.5794951456310682e-05, "loss": 0.1859, "step": 81210 }, { "epoch": 31.54, "learning_rate": 1.579443365695793e-05, "loss": 0.1544, "step": 81220 }, { "epoch": 31.55, "learning_rate": 1.579391585760518e-05, "loss": 0.0964, "step": 81230 }, { "epoch": 31.55, "learning_rate": 1.579339805825243e-05, "loss": 0.0879, "step": 81240 }, { "epoch": 31.55, "learning_rate": 1.5792880258899677e-05, "loss": 0.1924, "step": 81250 }, { "epoch": 31.56, "learning_rate": 1.5792362459546925e-05, "loss": 0.1241, "step": 81260 }, { "epoch": 31.56, "learning_rate": 1.5791844660194176e-05, "loss": 0.096, "step": 81270 }, { "epoch": 31.57, "learning_rate": 1.5791326860841424e-05, "loss": 0.0443, "step": 81280 }, { "epoch": 31.57, "learning_rate": 1.5790809061488676e-05, "loss": 0.1975, "step": 81290 }, { "epoch": 31.57, "learning_rate": 1.5790291262135923e-05, "loss": 0.2285, "step": 81300 }, { "epoch": 31.58, "learning_rate": 1.5789773462783175e-05, "loss": 0.0578, "step": 81310 }, { "epoch": 31.58, "learning_rate": 1.5789255663430423e-05, "loss": 0.0368, "step": 81320 }, { "epoch": 31.58, "learning_rate": 1.578873786407767e-05, "loss": 0.1425, "step": 81330 }, { "epoch": 31.59, "learning_rate": 1.578822006472492e-05, "loss": 0.0987, "step": 81340 }, { "epoch": 31.59, "learning_rate": 1.578770226537217e-05, "loss": 0.0983, "step": 81350 }, { "epoch": 31.6, "learning_rate": 1.5787184466019418e-05, "loss": 0.2737, "step": 81360 }, { "epoch": 31.6, "learning_rate": 1.578666666666667e-05, "loss": 0.0201, "step": 81370 }, { "epoch": 31.6, "learning_rate": 1.5786148867313917e-05, "loss": 0.0907, "step": 81380 }, { "epoch": 31.61, "learning_rate": 1.578563106796117e-05, "loss": 0.0222, "step": 81390 }, { "epoch": 31.61, "learning_rate": 1.5785113268608416e-05, "loss": 0.0387, "step": 81400 }, { "epoch": 31.62, "learning_rate": 1.5784595469255664e-05, "loss": 0.0158, "step": 81410 }, { "epoch": 31.62, "learning_rate": 1.5784077669902912e-05, "loss": 0.0356, "step": 81420 }, { "epoch": 31.62, "learning_rate": 1.5783559870550163e-05, "loss": 0.151, "step": 81430 }, { "epoch": 31.63, "learning_rate": 1.578304207119741e-05, "loss": 0.0202, "step": 81440 }, { "epoch": 31.63, "learning_rate": 1.5782524271844663e-05, "loss": 0.1814, "step": 81450 }, { "epoch": 31.63, "learning_rate": 1.578200647249191e-05, "loss": 0.0958, "step": 81460 }, { "epoch": 31.64, "learning_rate": 1.5781488673139162e-05, "loss": 0.1141, "step": 81470 }, { "epoch": 31.64, "learning_rate": 1.578097087378641e-05, "loss": 0.2331, "step": 81480 }, { "epoch": 31.65, "learning_rate": 1.5780453074433658e-05, "loss": 0.1246, "step": 81490 }, { "epoch": 31.65, "learning_rate": 1.5779935275080906e-05, "loss": 0.0844, "step": 81500 }, { "epoch": 31.65, "learning_rate": 1.5779417475728157e-05, "loss": 0.117, "step": 81510 }, { "epoch": 31.66, "learning_rate": 1.5778899676375405e-05, "loss": 0.2999, "step": 81520 }, { "epoch": 31.66, "learning_rate": 1.5778381877022656e-05, "loss": 0.0883, "step": 81530 }, { "epoch": 31.67, "learning_rate": 1.5777864077669904e-05, "loss": 0.2339, "step": 81540 }, { "epoch": 31.67, "learning_rate": 1.5777346278317155e-05, "loss": 0.0613, "step": 81550 }, { "epoch": 31.67, "learning_rate": 1.5776828478964403e-05, "loss": 0.0681, "step": 81560 }, { "epoch": 31.68, "learning_rate": 1.577631067961165e-05, "loss": 0.1319, "step": 81570 }, { "epoch": 31.68, "learning_rate": 1.57757928802589e-05, "loss": 0.0603, "step": 81580 }, { "epoch": 31.69, "learning_rate": 1.577527508090615e-05, "loss": 0.1537, "step": 81590 }, { "epoch": 31.69, "learning_rate": 1.57747572815534e-05, "loss": 0.0671, "step": 81600 }, { "epoch": 31.69, "learning_rate": 1.577423948220065e-05, "loss": 0.142, "step": 81610 }, { "epoch": 31.7, "learning_rate": 1.5773721682847898e-05, "loss": 0.0569, "step": 81620 }, { "epoch": 31.7, "learning_rate": 1.5773203883495146e-05, "loss": 0.1539, "step": 81630 }, { "epoch": 31.7, "learning_rate": 1.5772686084142397e-05, "loss": 0.044, "step": 81640 }, { "epoch": 31.71, "learning_rate": 1.5772168284789645e-05, "loss": 0.1461, "step": 81650 }, { "epoch": 31.71, "learning_rate": 1.5771650485436893e-05, "loss": 0.0335, "step": 81660 }, { "epoch": 31.72, "learning_rate": 1.5771132686084144e-05, "loss": 0.1246, "step": 81670 }, { "epoch": 31.72, "learning_rate": 1.5770614886731392e-05, "loss": 0.1445, "step": 81680 }, { "epoch": 31.72, "learning_rate": 1.5770097087378643e-05, "loss": 0.2244, "step": 81690 }, { "epoch": 31.73, "learning_rate": 1.576957928802589e-05, "loss": 0.0412, "step": 81700 }, { "epoch": 31.73, "learning_rate": 1.576906148867314e-05, "loss": 0.05, "step": 81710 }, { "epoch": 31.74, "learning_rate": 1.576854368932039e-05, "loss": 0.1203, "step": 81720 }, { "epoch": 31.74, "learning_rate": 1.576802588996764e-05, "loss": 0.1738, "step": 81730 }, { "epoch": 31.74, "learning_rate": 1.5767508090614886e-05, "loss": 0.1664, "step": 81740 }, { "epoch": 31.75, "learning_rate": 1.5766990291262138e-05, "loss": 0.0469, "step": 81750 }, { "epoch": 31.75, "learning_rate": 1.5766472491909386e-05, "loss": 0.0702, "step": 81760 }, { "epoch": 31.76, "learning_rate": 1.5765954692556637e-05, "loss": 0.1738, "step": 81770 }, { "epoch": 31.76, "learning_rate": 1.5765436893203885e-05, "loss": 0.2161, "step": 81780 }, { "epoch": 31.76, "learning_rate": 1.5764919093851133e-05, "loss": 0.1349, "step": 81790 }, { "epoch": 31.77, "learning_rate": 1.5764401294498384e-05, "loss": 0.0965, "step": 81800 }, { "epoch": 31.77, "learning_rate": 1.5763883495145632e-05, "loss": 0.1095, "step": 81810 }, { "epoch": 31.77, "learning_rate": 1.576336569579288e-05, "loss": 0.0312, "step": 81820 }, { "epoch": 31.78, "learning_rate": 1.576284789644013e-05, "loss": 0.1628, "step": 81830 }, { "epoch": 31.78, "learning_rate": 1.576233009708738e-05, "loss": 0.0501, "step": 81840 }, { "epoch": 31.79, "learning_rate": 1.576181229773463e-05, "loss": 0.0454, "step": 81850 }, { "epoch": 31.79, "learning_rate": 1.576129449838188e-05, "loss": 0.1038, "step": 81860 }, { "epoch": 31.79, "learning_rate": 1.5760776699029126e-05, "loss": 0.05, "step": 81870 }, { "epoch": 31.8, "learning_rate": 1.5760258899676378e-05, "loss": 0.0945, "step": 81880 }, { "epoch": 31.8, "learning_rate": 1.5759741100323626e-05, "loss": 0.0741, "step": 81890 }, { "epoch": 31.81, "learning_rate": 1.5759223300970877e-05, "loss": 0.0923, "step": 81900 }, { "epoch": 31.81, "learning_rate": 1.5758705501618125e-05, "loss": 0.0404, "step": 81910 }, { "epoch": 31.81, "learning_rate": 1.5758187702265373e-05, "loss": 0.191, "step": 81920 }, { "epoch": 31.82, "learning_rate": 1.5757669902912624e-05, "loss": 0.139, "step": 81930 }, { "epoch": 31.82, "learning_rate": 1.5757152103559872e-05, "loss": 0.0312, "step": 81940 }, { "epoch": 31.83, "learning_rate": 1.575663430420712e-05, "loss": 0.0885, "step": 81950 }, { "epoch": 31.83, "learning_rate": 1.575611650485437e-05, "loss": 0.0501, "step": 81960 }, { "epoch": 31.83, "learning_rate": 1.575559870550162e-05, "loss": 0.1062, "step": 81970 }, { "epoch": 31.84, "learning_rate": 1.575508090614887e-05, "loss": 0.0499, "step": 81980 }, { "epoch": 31.84, "learning_rate": 1.575456310679612e-05, "loss": 0.0018, "step": 81990 }, { "epoch": 31.84, "learning_rate": 1.5754045307443366e-05, "loss": 0.0342, "step": 82000 }, { "epoch": 31.85, "learning_rate": 1.5753527508090614e-05, "loss": 0.0064, "step": 82010 }, { "epoch": 31.85, "learning_rate": 1.5753009708737865e-05, "loss": 0.0604, "step": 82020 }, { "epoch": 31.86, "learning_rate": 1.5752491909385113e-05, "loss": 0.1887, "step": 82030 }, { "epoch": 31.86, "learning_rate": 1.5751974110032365e-05, "loss": 0.0744, "step": 82040 }, { "epoch": 31.86, "learning_rate": 1.5751456310679613e-05, "loss": 0.0989, "step": 82050 }, { "epoch": 31.87, "learning_rate": 1.5750938511326864e-05, "loss": 0.1854, "step": 82060 }, { "epoch": 31.87, "learning_rate": 1.5750420711974112e-05, "loss": 0.1686, "step": 82070 }, { "epoch": 31.88, "learning_rate": 1.574990291262136e-05, "loss": 0.1062, "step": 82080 }, { "epoch": 31.88, "learning_rate": 1.5749385113268608e-05, "loss": 0.0241, "step": 82090 }, { "epoch": 31.88, "learning_rate": 1.574886731391586e-05, "loss": 0.0975, "step": 82100 }, { "epoch": 31.89, "learning_rate": 1.5748349514563107e-05, "loss": 0.0563, "step": 82110 }, { "epoch": 31.89, "learning_rate": 1.5747831715210358e-05, "loss": 0.1118, "step": 82120 }, { "epoch": 31.9, "learning_rate": 1.5747313915857606e-05, "loss": 0.0898, "step": 82130 }, { "epoch": 31.9, "learning_rate": 1.5746796116504858e-05, "loss": 0.117, "step": 82140 }, { "epoch": 31.9, "learning_rate": 1.5746278317152105e-05, "loss": 0.2436, "step": 82150 }, { "epoch": 31.91, "learning_rate": 1.5745760517799353e-05, "loss": 0.0819, "step": 82160 }, { "epoch": 31.91, "learning_rate": 1.57452427184466e-05, "loss": 0.0787, "step": 82170 }, { "epoch": 31.91, "learning_rate": 1.5744724919093853e-05, "loss": 0.0105, "step": 82180 }, { "epoch": 31.92, "learning_rate": 1.57442071197411e-05, "loss": 0.1154, "step": 82190 }, { "epoch": 31.92, "learning_rate": 1.5743689320388352e-05, "loss": 0.0962, "step": 82200 }, { "epoch": 31.93, "learning_rate": 1.57431715210356e-05, "loss": 0.1379, "step": 82210 }, { "epoch": 31.93, "learning_rate": 1.574265372168285e-05, "loss": 0.0586, "step": 82220 }, { "epoch": 31.93, "learning_rate": 1.57421359223301e-05, "loss": 0.1329, "step": 82230 }, { "epoch": 31.94, "learning_rate": 1.5741618122977347e-05, "loss": 0.0336, "step": 82240 }, { "epoch": 31.94, "learning_rate": 1.5741100323624595e-05, "loss": 0.0456, "step": 82250 }, { "epoch": 31.95, "learning_rate": 1.5740582524271846e-05, "loss": 0.2175, "step": 82260 }, { "epoch": 31.95, "learning_rate": 1.5740064724919094e-05, "loss": 0.1196, "step": 82270 }, { "epoch": 31.95, "learning_rate": 1.5739546925566345e-05, "loss": 0.0408, "step": 82280 }, { "epoch": 31.96, "learning_rate": 1.5739029126213593e-05, "loss": 0.166, "step": 82290 }, { "epoch": 31.96, "learning_rate": 1.5738511326860845e-05, "loss": 0.1685, "step": 82300 }, { "epoch": 31.97, "learning_rate": 1.573799352750809e-05, "loss": 0.0751, "step": 82310 }, { "epoch": 31.97, "learning_rate": 1.573747572815534e-05, "loss": 0.0289, "step": 82320 }, { "epoch": 31.97, "learning_rate": 1.573695792880259e-05, "loss": 0.0539, "step": 82330 }, { "epoch": 31.98, "learning_rate": 1.573644012944984e-05, "loss": 0.0254, "step": 82340 }, { "epoch": 31.98, "learning_rate": 1.5735922330097088e-05, "loss": 0.2161, "step": 82350 }, { "epoch": 31.98, "learning_rate": 1.573540453074434e-05, "loss": 0.0679, "step": 82360 }, { "epoch": 31.99, "learning_rate": 1.5734886731391587e-05, "loss": 0.126, "step": 82370 }, { "epoch": 31.99, "learning_rate": 1.5734368932038838e-05, "loss": 0.1806, "step": 82380 }, { "epoch": 32.0, "learning_rate": 1.5733851132686083e-05, "loss": 0.3364, "step": 82390 }, { "epoch": 32.0, "learning_rate": 1.5733333333333334e-05, "loss": 0.0324, "step": 82400 }, { "epoch": 32.0, "eval_accuracy": 0.9452544704264099, "eval_loss": 0.32230931520462036, "eval_runtime": 8.2899, "eval_samples_per_second": 438.486, "eval_steps_per_second": 54.886, "step": 82400 }, { "epoch": 32.0, "learning_rate": 1.5732815533980582e-05, "loss": 0.2413, "step": 82410 }, { "epoch": 32.01, "learning_rate": 1.5732297734627833e-05, "loss": 0.0307, "step": 82420 }, { "epoch": 32.01, "learning_rate": 1.573177993527508e-05, "loss": 0.1562, "step": 82430 }, { "epoch": 32.02, "learning_rate": 1.5731262135922333e-05, "loss": 0.1028, "step": 82440 }, { "epoch": 32.02, "learning_rate": 1.573074433656958e-05, "loss": 0.0045, "step": 82450 }, { "epoch": 32.02, "learning_rate": 1.5730226537216832e-05, "loss": 0.0736, "step": 82460 }, { "epoch": 32.03, "learning_rate": 1.572970873786408e-05, "loss": 0.2257, "step": 82470 }, { "epoch": 32.03, "learning_rate": 1.5729190938511328e-05, "loss": 0.0213, "step": 82480 }, { "epoch": 32.03, "learning_rate": 1.5728673139158576e-05, "loss": 0.11, "step": 82490 }, { "epoch": 32.04, "learning_rate": 1.5728155339805827e-05, "loss": 0.1479, "step": 82500 }, { "epoch": 32.04, "learning_rate": 1.5727637540453075e-05, "loss": 0.1123, "step": 82510 }, { "epoch": 32.05, "learning_rate": 1.5727119741100326e-05, "loss": 0.1181, "step": 82520 }, { "epoch": 32.05, "learning_rate": 1.5726601941747574e-05, "loss": 0.0214, "step": 82530 }, { "epoch": 32.05, "learning_rate": 1.5726084142394825e-05, "loss": 0.0551, "step": 82540 }, { "epoch": 32.06, "learning_rate": 1.5725566343042073e-05, "loss": 0.0624, "step": 82550 }, { "epoch": 32.06, "learning_rate": 1.572504854368932e-05, "loss": 0.0674, "step": 82560 }, { "epoch": 32.07, "learning_rate": 1.572453074433657e-05, "loss": 0.0192, "step": 82570 }, { "epoch": 32.07, "learning_rate": 1.572401294498382e-05, "loss": 0.14, "step": 82580 }, { "epoch": 32.07, "learning_rate": 1.5723495145631068e-05, "loss": 0.0624, "step": 82590 }, { "epoch": 32.08, "learning_rate": 1.572297734627832e-05, "loss": 0.1196, "step": 82600 }, { "epoch": 32.08, "learning_rate": 1.5722459546925568e-05, "loss": 0.0608, "step": 82610 }, { "epoch": 32.09, "learning_rate": 1.572194174757282e-05, "loss": 0.193, "step": 82620 }, { "epoch": 32.09, "learning_rate": 1.5721423948220067e-05, "loss": 0.0403, "step": 82630 }, { "epoch": 32.09, "learning_rate": 1.5720906148867315e-05, "loss": 0.0047, "step": 82640 }, { "epoch": 32.1, "learning_rate": 1.5720388349514563e-05, "loss": 0.0988, "step": 82650 }, { "epoch": 32.1, "learning_rate": 1.5719870550161814e-05, "loss": 0.1564, "step": 82660 }, { "epoch": 32.1, "learning_rate": 1.5719352750809062e-05, "loss": 0.053, "step": 82670 }, { "epoch": 32.11, "learning_rate": 1.5718834951456313e-05, "loss": 0.1556, "step": 82680 }, { "epoch": 32.11, "learning_rate": 1.571831715210356e-05, "loss": 0.0161, "step": 82690 }, { "epoch": 32.12, "learning_rate": 1.5717799352750812e-05, "loss": 0.101, "step": 82700 }, { "epoch": 32.12, "learning_rate": 1.571728155339806e-05, "loss": 0.099, "step": 82710 }, { "epoch": 32.12, "learning_rate": 1.5716763754045308e-05, "loss": 0.0451, "step": 82720 }, { "epoch": 32.13, "learning_rate": 1.5716245954692556e-05, "loss": 0.1561, "step": 82730 }, { "epoch": 32.13, "learning_rate": 1.5715728155339807e-05, "loss": 0.0126, "step": 82740 }, { "epoch": 32.14, "learning_rate": 1.5715210355987055e-05, "loss": 0.029, "step": 82750 }, { "epoch": 32.14, "learning_rate": 1.5714692556634307e-05, "loss": 0.1009, "step": 82760 }, { "epoch": 32.14, "learning_rate": 1.5714174757281555e-05, "loss": 0.052, "step": 82770 }, { "epoch": 32.15, "learning_rate": 1.5713656957928806e-05, "loss": 0.1718, "step": 82780 }, { "epoch": 32.15, "learning_rate": 1.5713139158576054e-05, "loss": 0.1382, "step": 82790 }, { "epoch": 32.16, "learning_rate": 1.5712621359223302e-05, "loss": 0.0477, "step": 82800 }, { "epoch": 32.16, "learning_rate": 1.571210355987055e-05, "loss": 0.0154, "step": 82810 }, { "epoch": 32.16, "learning_rate": 1.57115857605178e-05, "loss": 0.0992, "step": 82820 }, { "epoch": 32.17, "learning_rate": 1.571106796116505e-05, "loss": 0.0791, "step": 82830 }, { "epoch": 32.17, "learning_rate": 1.57105501618123e-05, "loss": 0.1044, "step": 82840 }, { "epoch": 32.17, "learning_rate": 1.5710032362459548e-05, "loss": 0.077, "step": 82850 }, { "epoch": 32.18, "learning_rate": 1.57095145631068e-05, "loss": 0.3981, "step": 82860 }, { "epoch": 32.18, "learning_rate": 1.5708996763754047e-05, "loss": 0.1318, "step": 82870 }, { "epoch": 32.19, "learning_rate": 1.5708478964401295e-05, "loss": 0.1211, "step": 82880 }, { "epoch": 32.19, "learning_rate": 1.5707961165048543e-05, "loss": 0.0905, "step": 82890 }, { "epoch": 32.19, "learning_rate": 1.5707443365695795e-05, "loss": 0.0536, "step": 82900 }, { "epoch": 32.2, "learning_rate": 1.5706925566343043e-05, "loss": 0.207, "step": 82910 }, { "epoch": 32.2, "learning_rate": 1.5706407766990294e-05, "loss": 0.0451, "step": 82920 }, { "epoch": 32.21, "learning_rate": 1.5705889967637542e-05, "loss": 0.2162, "step": 82930 }, { "epoch": 32.21, "learning_rate": 1.5705372168284793e-05, "loss": 0.2077, "step": 82940 }, { "epoch": 32.21, "learning_rate": 1.570485436893204e-05, "loss": 0.0194, "step": 82950 }, { "epoch": 32.22, "learning_rate": 1.570433656957929e-05, "loss": 0.0579, "step": 82960 }, { "epoch": 32.22, "learning_rate": 1.5703818770226537e-05, "loss": 0.0763, "step": 82970 }, { "epoch": 32.23, "learning_rate": 1.5703300970873788e-05, "loss": 0.1409, "step": 82980 }, { "epoch": 32.23, "learning_rate": 1.5702783171521036e-05, "loss": 0.0804, "step": 82990 }, { "epoch": 32.23, "learning_rate": 1.5702265372168287e-05, "loss": 0.1186, "step": 83000 }, { "epoch": 32.24, "learning_rate": 1.5701747572815535e-05, "loss": 0.0263, "step": 83010 }, { "epoch": 32.24, "learning_rate": 1.5701229773462787e-05, "loss": 0.1713, "step": 83020 }, { "epoch": 32.24, "learning_rate": 1.5700711974110035e-05, "loss": 0.0708, "step": 83030 }, { "epoch": 32.25, "learning_rate": 1.5700194174757282e-05, "loss": 0.0702, "step": 83040 }, { "epoch": 32.25, "learning_rate": 1.569967637540453e-05, "loss": 0.085, "step": 83050 }, { "epoch": 32.26, "learning_rate": 1.5699158576051782e-05, "loss": 0.004, "step": 83060 }, { "epoch": 32.26, "learning_rate": 1.569864077669903e-05, "loss": 0.0447, "step": 83070 }, { "epoch": 32.26, "learning_rate": 1.569812297734628e-05, "loss": 0.1735, "step": 83080 }, { "epoch": 32.27, "learning_rate": 1.569760517799353e-05, "loss": 0.257, "step": 83090 }, { "epoch": 32.27, "learning_rate": 1.5697087378640777e-05, "loss": 0.0635, "step": 83100 }, { "epoch": 32.28, "learning_rate": 1.5696569579288028e-05, "loss": 0.0179, "step": 83110 }, { "epoch": 32.28, "learning_rate": 1.5696051779935276e-05, "loss": 0.039, "step": 83120 }, { "epoch": 32.28, "learning_rate": 1.5695533980582524e-05, "loss": 0.0313, "step": 83130 }, { "epoch": 32.29, "learning_rate": 1.5695016181229775e-05, "loss": 0.1883, "step": 83140 }, { "epoch": 32.29, "learning_rate": 1.5694498381877023e-05, "loss": 0.0029, "step": 83150 }, { "epoch": 32.3, "learning_rate": 1.5693980582524275e-05, "loss": 0.0904, "step": 83160 }, { "epoch": 32.3, "learning_rate": 1.5693462783171522e-05, "loss": 0.0563, "step": 83170 }, { "epoch": 32.3, "learning_rate": 1.569294498381877e-05, "loss": 0.0854, "step": 83180 }, { "epoch": 32.31, "learning_rate": 1.569242718446602e-05, "loss": 0.0636, "step": 83190 }, { "epoch": 32.31, "learning_rate": 1.569190938511327e-05, "loss": 0.0248, "step": 83200 }, { "epoch": 32.31, "learning_rate": 1.5691391585760518e-05, "loss": 0.1648, "step": 83210 }, { "epoch": 32.32, "learning_rate": 1.569087378640777e-05, "loss": 0.0142, "step": 83220 }, { "epoch": 32.32, "learning_rate": 1.5690355987055017e-05, "loss": 0.0602, "step": 83230 }, { "epoch": 32.33, "learning_rate": 1.5689838187702268e-05, "loss": 0.1955, "step": 83240 }, { "epoch": 32.33, "learning_rate": 1.5689320388349516e-05, "loss": 0.0561, "step": 83250 }, { "epoch": 32.33, "learning_rate": 1.5688802588996764e-05, "loss": 0.2445, "step": 83260 }, { "epoch": 32.34, "learning_rate": 1.5688284789644015e-05, "loss": 0.0801, "step": 83270 }, { "epoch": 32.34, "learning_rate": 1.5687766990291263e-05, "loss": 0.1716, "step": 83280 }, { "epoch": 32.35, "learning_rate": 1.568724919093851e-05, "loss": 0.0705, "step": 83290 }, { "epoch": 32.35, "learning_rate": 1.5686731391585762e-05, "loss": 0.1622, "step": 83300 }, { "epoch": 32.35, "learning_rate": 1.568621359223301e-05, "loss": 0.0651, "step": 83310 }, { "epoch": 32.36, "learning_rate": 1.568569579288026e-05, "loss": 0.2247, "step": 83320 }, { "epoch": 32.36, "learning_rate": 1.568517799352751e-05, "loss": 0.0639, "step": 83330 }, { "epoch": 32.37, "learning_rate": 1.5684660194174757e-05, "loss": 0.0564, "step": 83340 }, { "epoch": 32.37, "learning_rate": 1.568414239482201e-05, "loss": 0.1711, "step": 83350 }, { "epoch": 32.37, "learning_rate": 1.5683624595469257e-05, "loss": 0.0619, "step": 83360 }, { "epoch": 32.38, "learning_rate": 1.5683106796116505e-05, "loss": 0.1027, "step": 83370 }, { "epoch": 32.38, "learning_rate": 1.5682588996763756e-05, "loss": 0.0578, "step": 83380 }, { "epoch": 32.38, "learning_rate": 1.5682071197411004e-05, "loss": 0.1397, "step": 83390 }, { "epoch": 32.39, "learning_rate": 1.5681553398058255e-05, "loss": 0.2056, "step": 83400 }, { "epoch": 32.39, "learning_rate": 1.5681035598705503e-05, "loss": 0.0224, "step": 83410 }, { "epoch": 32.4, "learning_rate": 1.568051779935275e-05, "loss": 0.1929, "step": 83420 }, { "epoch": 32.4, "learning_rate": 1.5680000000000002e-05, "loss": 0.0187, "step": 83430 }, { "epoch": 32.4, "learning_rate": 1.567948220064725e-05, "loss": 0.0716, "step": 83440 }, { "epoch": 32.41, "learning_rate": 1.5678964401294498e-05, "loss": 0.0739, "step": 83450 }, { "epoch": 32.41, "learning_rate": 1.567844660194175e-05, "loss": 0.312, "step": 83460 }, { "epoch": 32.42, "learning_rate": 1.5677928802588997e-05, "loss": 0.2291, "step": 83470 }, { "epoch": 32.42, "learning_rate": 1.5677411003236245e-05, "loss": 0.0647, "step": 83480 }, { "epoch": 32.42, "learning_rate": 1.5676893203883497e-05, "loss": 0.08, "step": 83490 }, { "epoch": 32.43, "learning_rate": 1.5676375404530745e-05, "loss": 0.0964, "step": 83500 }, { "epoch": 32.43, "learning_rate": 1.5675857605177996e-05, "loss": 0.0485, "step": 83510 }, { "epoch": 32.43, "learning_rate": 1.5675339805825244e-05, "loss": 0.0657, "step": 83520 }, { "epoch": 32.44, "learning_rate": 1.5674822006472495e-05, "loss": 0.2999, "step": 83530 }, { "epoch": 32.44, "learning_rate": 1.5674304207119743e-05, "loss": 0.0512, "step": 83540 }, { "epoch": 32.45, "learning_rate": 1.567378640776699e-05, "loss": 0.1279, "step": 83550 }, { "epoch": 32.45, "learning_rate": 1.567326860841424e-05, "loss": 0.08, "step": 83560 }, { "epoch": 32.45, "learning_rate": 1.567275080906149e-05, "loss": 0.053, "step": 83570 }, { "epoch": 32.46, "learning_rate": 1.5672233009708738e-05, "loss": 0.0145, "step": 83580 }, { "epoch": 32.46, "learning_rate": 1.567171521035599e-05, "loss": 0.036, "step": 83590 }, { "epoch": 32.47, "learning_rate": 1.5671197411003237e-05, "loss": 0.0758, "step": 83600 }, { "epoch": 32.47, "learning_rate": 1.567067961165049e-05, "loss": 0.0118, "step": 83610 }, { "epoch": 32.47, "learning_rate": 1.5670161812297737e-05, "loss": 0.2479, "step": 83620 }, { "epoch": 32.48, "learning_rate": 1.5669644012944985e-05, "loss": 0.1186, "step": 83630 }, { "epoch": 32.48, "learning_rate": 1.5669126213592232e-05, "loss": 0.1545, "step": 83640 }, { "epoch": 32.49, "learning_rate": 1.5668608414239484e-05, "loss": 0.0452, "step": 83650 }, { "epoch": 32.49, "learning_rate": 1.5668090614886732e-05, "loss": 0.149, "step": 83660 }, { "epoch": 32.49, "learning_rate": 1.5667572815533983e-05, "loss": 0.2124, "step": 83670 }, { "epoch": 32.5, "learning_rate": 1.566705501618123e-05, "loss": 0.0628, "step": 83680 }, { "epoch": 32.5, "learning_rate": 1.5666537216828482e-05, "loss": 0.0468, "step": 83690 }, { "epoch": 32.5, "learning_rate": 1.566601941747573e-05, "loss": 0.1043, "step": 83700 }, { "epoch": 32.51, "learning_rate": 1.5665501618122978e-05, "loss": 0.0296, "step": 83710 }, { "epoch": 32.51, "learning_rate": 1.5664983818770226e-05, "loss": 0.0365, "step": 83720 }, { "epoch": 32.52, "learning_rate": 1.5664466019417477e-05, "loss": 0.0523, "step": 83730 }, { "epoch": 32.52, "learning_rate": 1.5663948220064725e-05, "loss": 0.2144, "step": 83740 }, { "epoch": 32.52, "learning_rate": 1.5663430420711977e-05, "loss": 0.105, "step": 83750 }, { "epoch": 32.53, "learning_rate": 1.5662912621359224e-05, "loss": 0.0931, "step": 83760 }, { "epoch": 32.53, "learning_rate": 1.5662394822006476e-05, "loss": 0.2388, "step": 83770 }, { "epoch": 32.54, "learning_rate": 1.566187702265372e-05, "loss": 0.1017, "step": 83780 }, { "epoch": 32.54, "learning_rate": 1.566135922330097e-05, "loss": 0.2607, "step": 83790 }, { "epoch": 32.54, "learning_rate": 1.566084142394822e-05, "loss": 0.1171, "step": 83800 }, { "epoch": 32.55, "learning_rate": 1.566032362459547e-05, "loss": 0.15, "step": 83810 }, { "epoch": 32.55, "learning_rate": 1.565980582524272e-05, "loss": 0.0724, "step": 83820 }, { "epoch": 32.56, "learning_rate": 1.565928802588997e-05, "loss": 0.1723, "step": 83830 }, { "epoch": 32.56, "learning_rate": 1.5658770226537218e-05, "loss": 0.2702, "step": 83840 }, { "epoch": 32.56, "learning_rate": 1.565825242718447e-05, "loss": 0.124, "step": 83850 }, { "epoch": 32.57, "learning_rate": 1.5657734627831714e-05, "loss": 0.0898, "step": 83860 }, { "epoch": 32.57, "learning_rate": 1.5657216828478965e-05, "loss": 0.0312, "step": 83870 }, { "epoch": 32.57, "learning_rate": 1.5656699029126213e-05, "loss": 0.1701, "step": 83880 }, { "epoch": 32.58, "learning_rate": 1.5656181229773464e-05, "loss": 0.193, "step": 83890 }, { "epoch": 32.58, "learning_rate": 1.5655663430420712e-05, "loss": 0.0892, "step": 83900 }, { "epoch": 32.59, "learning_rate": 1.5655145631067964e-05, "loss": 0.0333, "step": 83910 }, { "epoch": 32.59, "learning_rate": 1.565462783171521e-05, "loss": 0.1807, "step": 83920 }, { "epoch": 32.59, "learning_rate": 1.5654110032362463e-05, "loss": 0.1223, "step": 83930 }, { "epoch": 32.6, "learning_rate": 1.5653592233009707e-05, "loss": 0.0346, "step": 83940 }, { "epoch": 32.6, "learning_rate": 1.565307443365696e-05, "loss": 0.1024, "step": 83950 }, { "epoch": 32.61, "learning_rate": 1.5652556634304207e-05, "loss": 0.0647, "step": 83960 }, { "epoch": 32.61, "learning_rate": 1.5652038834951458e-05, "loss": 0.0569, "step": 83970 }, { "epoch": 32.61, "learning_rate": 1.5651521035598706e-05, "loss": 0.1207, "step": 83980 }, { "epoch": 32.62, "learning_rate": 1.5651003236245957e-05, "loss": 0.1328, "step": 83990 }, { "epoch": 32.62, "learning_rate": 1.5650485436893205e-05, "loss": 0.077, "step": 84000 }, { "epoch": 32.63, "learning_rate": 1.5649967637540456e-05, "loss": 0.1139, "step": 84010 }, { "epoch": 32.63, "learning_rate": 1.56494498381877e-05, "loss": 0.0838, "step": 84020 }, { "epoch": 32.63, "learning_rate": 1.5648932038834952e-05, "loss": 0.0298, "step": 84030 }, { "epoch": 32.64, "learning_rate": 1.56484142394822e-05, "loss": 0.0643, "step": 84040 }, { "epoch": 32.64, "learning_rate": 1.564789644012945e-05, "loss": 0.0666, "step": 84050 }, { "epoch": 32.64, "learning_rate": 1.56473786407767e-05, "loss": 0.083, "step": 84060 }, { "epoch": 32.65, "learning_rate": 1.564686084142395e-05, "loss": 0.1468, "step": 84070 }, { "epoch": 32.65, "learning_rate": 1.56463430420712e-05, "loss": 0.2002, "step": 84080 }, { "epoch": 32.66, "learning_rate": 1.564582524271845e-05, "loss": 0.1381, "step": 84090 }, { "epoch": 32.66, "learning_rate": 1.5645307443365698e-05, "loss": 0.1215, "step": 84100 }, { "epoch": 32.66, "learning_rate": 1.5644789644012946e-05, "loss": 0.2945, "step": 84110 }, { "epoch": 32.67, "learning_rate": 1.5644271844660194e-05, "loss": 0.0361, "step": 84120 }, { "epoch": 32.67, "learning_rate": 1.5643754045307445e-05, "loss": 0.0887, "step": 84130 }, { "epoch": 32.68, "learning_rate": 1.5643236245954693e-05, "loss": 0.1044, "step": 84140 }, { "epoch": 32.68, "learning_rate": 1.5642718446601944e-05, "loss": 0.118, "step": 84150 }, { "epoch": 32.68, "learning_rate": 1.5642200647249192e-05, "loss": 0.1307, "step": 84160 }, { "epoch": 32.69, "learning_rate": 1.5641682847896444e-05, "loss": 0.0094, "step": 84170 }, { "epoch": 32.69, "learning_rate": 1.564116504854369e-05, "loss": 0.2218, "step": 84180 }, { "epoch": 32.7, "learning_rate": 1.564064724919094e-05, "loss": 0.2493, "step": 84190 }, { "epoch": 32.7, "learning_rate": 1.5640129449838187e-05, "loss": 0.0933, "step": 84200 }, { "epoch": 32.7, "learning_rate": 1.563961165048544e-05, "loss": 0.0463, "step": 84210 }, { "epoch": 32.71, "learning_rate": 1.5639093851132687e-05, "loss": 0.1694, "step": 84220 }, { "epoch": 32.71, "learning_rate": 1.5638576051779938e-05, "loss": 0.1059, "step": 84230 }, { "epoch": 32.71, "learning_rate": 1.5638058252427186e-05, "loss": 0.1331, "step": 84240 }, { "epoch": 32.72, "learning_rate": 1.5637540453074437e-05, "loss": 0.0403, "step": 84250 }, { "epoch": 32.72, "learning_rate": 1.5637022653721685e-05, "loss": 0.0669, "step": 84260 }, { "epoch": 32.73, "learning_rate": 1.5636504854368933e-05, "loss": 0.0785, "step": 84270 }, { "epoch": 32.73, "learning_rate": 1.563598705501618e-05, "loss": 0.115, "step": 84280 }, { "epoch": 32.73, "learning_rate": 1.5635469255663432e-05, "loss": 0.0087, "step": 84290 }, { "epoch": 32.74, "learning_rate": 1.563495145631068e-05, "loss": 0.1416, "step": 84300 }, { "epoch": 32.74, "learning_rate": 1.563443365695793e-05, "loss": 0.024, "step": 84310 }, { "epoch": 32.75, "learning_rate": 1.563391585760518e-05, "loss": 0.0388, "step": 84320 }, { "epoch": 32.75, "learning_rate": 1.563339805825243e-05, "loss": 0.1352, "step": 84330 }, { "epoch": 32.75, "learning_rate": 1.563288025889968e-05, "loss": 0.1082, "step": 84340 }, { "epoch": 32.76, "learning_rate": 1.5632362459546927e-05, "loss": 0.1232, "step": 84350 }, { "epoch": 32.76, "learning_rate": 1.5631844660194174e-05, "loss": 0.1063, "step": 84360 }, { "epoch": 32.77, "learning_rate": 1.5631326860841426e-05, "loss": 0.0881, "step": 84370 }, { "epoch": 32.77, "learning_rate": 1.5630809061488674e-05, "loss": 0.0398, "step": 84380 }, { "epoch": 32.77, "learning_rate": 1.5630291262135925e-05, "loss": 0.1241, "step": 84390 }, { "epoch": 32.78, "learning_rate": 1.5629773462783173e-05, "loss": 0.1312, "step": 84400 }, { "epoch": 32.78, "learning_rate": 1.5629255663430424e-05, "loss": 0.0044, "step": 84410 }, { "epoch": 32.78, "learning_rate": 1.5628737864077672e-05, "loss": 0.0909, "step": 84420 }, { "epoch": 32.79, "learning_rate": 1.562822006472492e-05, "loss": 0.1683, "step": 84430 }, { "epoch": 32.79, "learning_rate": 1.5627702265372168e-05, "loss": 0.147, "step": 84440 }, { "epoch": 32.8, "learning_rate": 1.562718446601942e-05, "loss": 0.0899, "step": 84450 }, { "epoch": 32.8, "learning_rate": 1.5626666666666667e-05, "loss": 0.1836, "step": 84460 }, { "epoch": 32.8, "learning_rate": 1.562614886731392e-05, "loss": 0.1717, "step": 84470 }, { "epoch": 32.81, "learning_rate": 1.5625631067961166e-05, "loss": 0.0735, "step": 84480 }, { "epoch": 32.81, "learning_rate": 1.5625113268608418e-05, "loss": 0.0494, "step": 84490 }, { "epoch": 32.82, "learning_rate": 1.5624595469255666e-05, "loss": 0.0856, "step": 84500 }, { "epoch": 32.82, "learning_rate": 1.5624077669902914e-05, "loss": 0.1731, "step": 84510 }, { "epoch": 32.82, "learning_rate": 1.562355987055016e-05, "loss": 0.124, "step": 84520 }, { "epoch": 32.83, "learning_rate": 1.5623042071197413e-05, "loss": 0.1261, "step": 84530 }, { "epoch": 32.83, "learning_rate": 1.562252427184466e-05, "loss": 0.0194, "step": 84540 }, { "epoch": 32.83, "learning_rate": 1.5622006472491912e-05, "loss": 0.0407, "step": 84550 }, { "epoch": 32.84, "learning_rate": 1.562148867313916e-05, "loss": 0.1062, "step": 84560 }, { "epoch": 32.84, "learning_rate": 1.5620970873786408e-05, "loss": 0.0873, "step": 84570 }, { "epoch": 32.85, "learning_rate": 1.562045307443366e-05, "loss": 0.0608, "step": 84580 }, { "epoch": 32.85, "learning_rate": 1.5619935275080907e-05, "loss": 0.0723, "step": 84590 }, { "epoch": 32.85, "learning_rate": 1.5619417475728155e-05, "loss": 0.0009, "step": 84600 }, { "epoch": 32.86, "learning_rate": 1.5618899676375406e-05, "loss": 0.0835, "step": 84610 }, { "epoch": 32.86, "learning_rate": 1.5618381877022654e-05, "loss": 0.207, "step": 84620 }, { "epoch": 32.87, "learning_rate": 1.5617864077669906e-05, "loss": 0.084, "step": 84630 }, { "epoch": 32.87, "learning_rate": 1.5617346278317154e-05, "loss": 0.0788, "step": 84640 }, { "epoch": 32.87, "learning_rate": 1.56168284789644e-05, "loss": 0.1668, "step": 84650 }, { "epoch": 32.88, "learning_rate": 1.5616310679611653e-05, "loss": 0.2773, "step": 84660 }, { "epoch": 32.88, "learning_rate": 1.56157928802589e-05, "loss": 0.1947, "step": 84670 }, { "epoch": 32.89, "learning_rate": 1.561527508090615e-05, "loss": 0.1773, "step": 84680 }, { "epoch": 32.89, "learning_rate": 1.56147572815534e-05, "loss": 0.0846, "step": 84690 }, { "epoch": 32.89, "learning_rate": 1.5614239482200648e-05, "loss": 0.0955, "step": 84700 }, { "epoch": 32.9, "learning_rate": 1.56137216828479e-05, "loss": 0.1581, "step": 84710 }, { "epoch": 32.9, "learning_rate": 1.5613203883495147e-05, "loss": 0.1885, "step": 84720 }, { "epoch": 32.9, "learning_rate": 1.5612686084142395e-05, "loss": 0.0497, "step": 84730 }, { "epoch": 32.91, "learning_rate": 1.5612168284789646e-05, "loss": 0.0427, "step": 84740 }, { "epoch": 32.91, "learning_rate": 1.5611650485436894e-05, "loss": 0.1476, "step": 84750 }, { "epoch": 32.92, "learning_rate": 1.5611132686084142e-05, "loss": 0.0444, "step": 84760 }, { "epoch": 32.92, "learning_rate": 1.5610614886731394e-05, "loss": 0.0544, "step": 84770 }, { "epoch": 32.92, "learning_rate": 1.561009708737864e-05, "loss": 0.1773, "step": 84780 }, { "epoch": 32.93, "learning_rate": 1.5609579288025893e-05, "loss": 0.0774, "step": 84790 }, { "epoch": 32.93, "learning_rate": 1.560906148867314e-05, "loss": 0.0431, "step": 84800 }, { "epoch": 32.94, "learning_rate": 1.560854368932039e-05, "loss": 0.0304, "step": 84810 }, { "epoch": 32.94, "learning_rate": 1.560802588996764e-05, "loss": 0.1357, "step": 84820 }, { "epoch": 32.94, "learning_rate": 1.5607508090614888e-05, "loss": 0.0772, "step": 84830 }, { "epoch": 32.95, "learning_rate": 1.5606990291262136e-05, "loss": 0.0906, "step": 84840 }, { "epoch": 32.95, "learning_rate": 1.5606472491909387e-05, "loss": 0.0014, "step": 84850 }, { "epoch": 32.96, "learning_rate": 1.5605954692556635e-05, "loss": 0.1284, "step": 84860 }, { "epoch": 32.96, "learning_rate": 1.5605436893203886e-05, "loss": 0.0182, "step": 84870 }, { "epoch": 32.96, "learning_rate": 1.5604919093851134e-05, "loss": 0.1075, "step": 84880 }, { "epoch": 32.97, "learning_rate": 1.5604401294498382e-05, "loss": 0.025, "step": 84890 }, { "epoch": 32.97, "learning_rate": 1.5603883495145634e-05, "loss": 0.0865, "step": 84900 }, { "epoch": 32.97, "learning_rate": 1.560336569579288e-05, "loss": 0.0605, "step": 84910 }, { "epoch": 32.98, "learning_rate": 1.560284789644013e-05, "loss": 0.1006, "step": 84920 }, { "epoch": 32.98, "learning_rate": 1.560233009708738e-05, "loss": 0.0724, "step": 84930 }, { "epoch": 32.99, "learning_rate": 1.560181229773463e-05, "loss": 0.1276, "step": 84940 }, { "epoch": 32.99, "learning_rate": 1.5601294498381877e-05, "loss": 0.0629, "step": 84950 }, { "epoch": 32.99, "learning_rate": 1.5600776699029128e-05, "loss": 0.0592, "step": 84960 }, { "epoch": 33.0, "learning_rate": 1.5600258899676376e-05, "loss": 0.1774, "step": 84970 }, { "epoch": 33.0, "eval_accuracy": 0.9548830811554333, "eval_loss": 0.27493834495544434, "eval_runtime": 8.2212, "eval_samples_per_second": 442.149, "eval_steps_per_second": 55.345, "step": 84975 }, { "epoch": 33.0, "learning_rate": 1.5599741100323627e-05, "loss": 0.0373, "step": 84980 }, { "epoch": 33.01, "learning_rate": 1.5599223300970875e-05, "loss": 0.0424, "step": 84990 }, { "epoch": 33.01, "learning_rate": 1.5598705501618123e-05, "loss": 0.1049, "step": 85000 }, { "epoch": 33.01, "learning_rate": 1.5598187702265374e-05, "loss": 0.0313, "step": 85010 }, { "epoch": 33.02, "learning_rate": 1.5597669902912622e-05, "loss": 0.1459, "step": 85020 }, { "epoch": 33.02, "learning_rate": 1.559715210355987e-05, "loss": 0.1136, "step": 85030 }, { "epoch": 33.03, "learning_rate": 1.559663430420712e-05, "loss": 0.1216, "step": 85040 }, { "epoch": 33.03, "learning_rate": 1.559611650485437e-05, "loss": 0.0457, "step": 85050 }, { "epoch": 33.03, "learning_rate": 1.559559870550162e-05, "loss": 0.0329, "step": 85060 }, { "epoch": 33.04, "learning_rate": 1.559508090614887e-05, "loss": 0.0442, "step": 85070 }, { "epoch": 33.04, "learning_rate": 1.5594563106796116e-05, "loss": 0.0851, "step": 85080 }, { "epoch": 33.04, "learning_rate": 1.5594045307443368e-05, "loss": 0.1049, "step": 85090 }, { "epoch": 33.05, "learning_rate": 1.5593527508090616e-05, "loss": 0.1549, "step": 85100 }, { "epoch": 33.05, "learning_rate": 1.5593009708737864e-05, "loss": 0.1129, "step": 85110 }, { "epoch": 33.06, "learning_rate": 1.5592491909385115e-05, "loss": 0.1249, "step": 85120 }, { "epoch": 33.06, "learning_rate": 1.5591974110032363e-05, "loss": 0.0785, "step": 85130 }, { "epoch": 33.06, "learning_rate": 1.5591456310679614e-05, "loss": 0.1888, "step": 85140 }, { "epoch": 33.07, "learning_rate": 1.5590938511326862e-05, "loss": 0.0239, "step": 85150 }, { "epoch": 33.07, "learning_rate": 1.559042071197411e-05, "loss": 0.1023, "step": 85160 }, { "epoch": 33.08, "learning_rate": 1.558990291262136e-05, "loss": 0.0238, "step": 85170 }, { "epoch": 33.08, "learning_rate": 1.558938511326861e-05, "loss": 0.0429, "step": 85180 }, { "epoch": 33.08, "learning_rate": 1.5588867313915857e-05, "loss": 0.034, "step": 85190 }, { "epoch": 33.09, "learning_rate": 1.558834951456311e-05, "loss": 0.0933, "step": 85200 }, { "epoch": 33.09, "learning_rate": 1.5587831715210356e-05, "loss": 0.0275, "step": 85210 }, { "epoch": 33.1, "learning_rate": 1.5587313915857608e-05, "loss": 0.1023, "step": 85220 }, { "epoch": 33.1, "learning_rate": 1.5586796116504856e-05, "loss": 0.0137, "step": 85230 }, { "epoch": 33.1, "learning_rate": 1.5586278317152107e-05, "loss": 0.0795, "step": 85240 }, { "epoch": 33.11, "learning_rate": 1.558576051779935e-05, "loss": 0.1196, "step": 85250 }, { "epoch": 33.11, "learning_rate": 1.5585242718446603e-05, "loss": 0.1008, "step": 85260 }, { "epoch": 33.11, "learning_rate": 1.558472491909385e-05, "loss": 0.0556, "step": 85270 }, { "epoch": 33.12, "learning_rate": 1.5584207119741102e-05, "loss": 0.026, "step": 85280 }, { "epoch": 33.12, "learning_rate": 1.558368932038835e-05, "loss": 0.026, "step": 85290 }, { "epoch": 33.13, "learning_rate": 1.55831715210356e-05, "loss": 0.0679, "step": 85300 }, { "epoch": 33.13, "learning_rate": 1.558265372168285e-05, "loss": 0.0833, "step": 85310 }, { "epoch": 33.13, "learning_rate": 1.55821359223301e-05, "loss": 0.0618, "step": 85320 }, { "epoch": 33.14, "learning_rate": 1.5581618122977345e-05, "loss": 0.1821, "step": 85330 }, { "epoch": 33.14, "learning_rate": 1.5581100323624596e-05, "loss": 0.0481, "step": 85340 }, { "epoch": 33.15, "learning_rate": 1.5580582524271844e-05, "loss": 0.0526, "step": 85350 }, { "epoch": 33.15, "learning_rate": 1.5580064724919096e-05, "loss": 0.2016, "step": 85360 }, { "epoch": 33.15, "learning_rate": 1.5579546925566344e-05, "loss": 0.0906, "step": 85370 }, { "epoch": 33.16, "learning_rate": 1.5579029126213595e-05, "loss": 0.1791, "step": 85380 }, { "epoch": 33.16, "learning_rate": 1.5578511326860843e-05, "loss": 0.1076, "step": 85390 }, { "epoch": 33.17, "learning_rate": 1.5577993527508094e-05, "loss": 0.1211, "step": 85400 }, { "epoch": 33.17, "learning_rate": 1.557747572815534e-05, "loss": 0.0944, "step": 85410 }, { "epoch": 33.17, "learning_rate": 1.557695792880259e-05, "loss": 0.1447, "step": 85420 }, { "epoch": 33.18, "learning_rate": 1.5576440129449838e-05, "loss": 0.069, "step": 85430 }, { "epoch": 33.18, "learning_rate": 1.557592233009709e-05, "loss": 0.1043, "step": 85440 }, { "epoch": 33.18, "learning_rate": 1.5575404530744337e-05, "loss": 0.1669, "step": 85450 }, { "epoch": 33.19, "learning_rate": 1.557488673139159e-05, "loss": 0.0318, "step": 85460 }, { "epoch": 33.19, "learning_rate": 1.5574368932038836e-05, "loss": 0.009, "step": 85470 }, { "epoch": 33.2, "learning_rate": 1.5573851132686088e-05, "loss": 0.0065, "step": 85480 }, { "epoch": 33.2, "learning_rate": 1.5573333333333332e-05, "loss": 0.1104, "step": 85490 }, { "epoch": 33.2, "learning_rate": 1.5572815533980583e-05, "loss": 0.1277, "step": 85500 }, { "epoch": 33.21, "learning_rate": 1.557229773462783e-05, "loss": 0.0874, "step": 85510 }, { "epoch": 33.21, "learning_rate": 1.5571779935275083e-05, "loss": 0.2378, "step": 85520 }, { "epoch": 33.22, "learning_rate": 1.557126213592233e-05, "loss": 0.1852, "step": 85530 }, { "epoch": 33.22, "learning_rate": 1.5570744336569582e-05, "loss": 0.1476, "step": 85540 }, { "epoch": 33.22, "learning_rate": 1.557022653721683e-05, "loss": 0.1518, "step": 85550 }, { "epoch": 33.23, "learning_rate": 1.556970873786408e-05, "loss": 0.0617, "step": 85560 }, { "epoch": 33.23, "learning_rate": 1.5569190938511326e-05, "loss": 0.0284, "step": 85570 }, { "epoch": 33.23, "learning_rate": 1.5568673139158577e-05, "loss": 0.0453, "step": 85580 }, { "epoch": 33.24, "learning_rate": 1.5568155339805825e-05, "loss": 0.1235, "step": 85590 }, { "epoch": 33.24, "learning_rate": 1.5567637540453076e-05, "loss": 0.0505, "step": 85600 }, { "epoch": 33.25, "learning_rate": 1.5567119741100324e-05, "loss": 0.0432, "step": 85610 }, { "epoch": 33.25, "learning_rate": 1.5566601941747576e-05, "loss": 0.0729, "step": 85620 }, { "epoch": 33.25, "learning_rate": 1.5566084142394823e-05, "loss": 0.1585, "step": 85630 }, { "epoch": 33.26, "learning_rate": 1.5565566343042075e-05, "loss": 0.1454, "step": 85640 }, { "epoch": 33.26, "learning_rate": 1.556504854368932e-05, "loss": 0.0375, "step": 85650 }, { "epoch": 33.27, "learning_rate": 1.556453074433657e-05, "loss": 0.1341, "step": 85660 }, { "epoch": 33.27, "learning_rate": 1.556401294498382e-05, "loss": 0.0855, "step": 85670 }, { "epoch": 33.27, "learning_rate": 1.556349514563107e-05, "loss": 0.111, "step": 85680 }, { "epoch": 33.28, "learning_rate": 1.5562977346278318e-05, "loss": 0.176, "step": 85690 }, { "epoch": 33.28, "learning_rate": 1.556245954692557e-05, "loss": 0.0348, "step": 85700 }, { "epoch": 33.29, "learning_rate": 1.5561941747572817e-05, "loss": 0.2362, "step": 85710 }, { "epoch": 33.29, "learning_rate": 1.556142394822007e-05, "loss": 0.0729, "step": 85720 }, { "epoch": 33.29, "learning_rate": 1.5560906148867313e-05, "loss": 0.0826, "step": 85730 }, { "epoch": 33.3, "learning_rate": 1.5560388349514564e-05, "loss": 0.0402, "step": 85740 }, { "epoch": 33.3, "learning_rate": 1.5559870550161812e-05, "loss": 0.1061, "step": 85750 }, { "epoch": 33.3, "learning_rate": 1.5559352750809063e-05, "loss": 0.1516, "step": 85760 }, { "epoch": 33.31, "learning_rate": 1.555883495145631e-05, "loss": 0.1515, "step": 85770 }, { "epoch": 33.31, "learning_rate": 1.5558317152103563e-05, "loss": 0.1499, "step": 85780 }, { "epoch": 33.32, "learning_rate": 1.555779935275081e-05, "loss": 0.2311, "step": 85790 }, { "epoch": 33.32, "learning_rate": 1.5557281553398062e-05, "loss": 0.1833, "step": 85800 }, { "epoch": 33.32, "learning_rate": 1.555676375404531e-05, "loss": 0.1761, "step": 85810 }, { "epoch": 33.33, "learning_rate": 1.5556245954692558e-05, "loss": 0.1002, "step": 85820 }, { "epoch": 33.33, "learning_rate": 1.5555728155339806e-05, "loss": 0.0739, "step": 85830 }, { "epoch": 33.34, "learning_rate": 1.5555210355987057e-05, "loss": 0.0132, "step": 85840 }, { "epoch": 33.34, "learning_rate": 1.5554692556634305e-05, "loss": 0.1034, "step": 85850 }, { "epoch": 33.34, "learning_rate": 1.5554174757281556e-05, "loss": 0.1244, "step": 85860 }, { "epoch": 33.35, "learning_rate": 1.5553656957928804e-05, "loss": 0.0664, "step": 85870 }, { "epoch": 33.35, "learning_rate": 1.5553139158576055e-05, "loss": 0.0879, "step": 85880 }, { "epoch": 33.36, "learning_rate": 1.5552621359223303e-05, "loss": 0.0524, "step": 85890 }, { "epoch": 33.36, "learning_rate": 1.555210355987055e-05, "loss": 0.1575, "step": 85900 }, { "epoch": 33.36, "learning_rate": 1.55515857605178e-05, "loss": 0.0156, "step": 85910 }, { "epoch": 33.37, "learning_rate": 1.555106796116505e-05, "loss": 0.1609, "step": 85920 }, { "epoch": 33.37, "learning_rate": 1.55505501618123e-05, "loss": 0.0701, "step": 85930 }, { "epoch": 33.37, "learning_rate": 1.555003236245955e-05, "loss": 0.0657, "step": 85940 }, { "epoch": 33.38, "learning_rate": 1.5549514563106798e-05, "loss": 0.0672, "step": 85950 }, { "epoch": 33.38, "learning_rate": 1.554899676375405e-05, "loss": 0.0806, "step": 85960 }, { "epoch": 33.39, "learning_rate": 1.5548478964401297e-05, "loss": 0.0696, "step": 85970 }, { "epoch": 33.39, "learning_rate": 1.5547961165048545e-05, "loss": 0.0827, "step": 85980 }, { "epoch": 33.39, "learning_rate": 1.5547443365695793e-05, "loss": 0.1087, "step": 85990 }, { "epoch": 33.4, "learning_rate": 1.5546925566343044e-05, "loss": 0.0536, "step": 86000 }, { "epoch": 33.4, "learning_rate": 1.5546407766990292e-05, "loss": 0.0794, "step": 86010 }, { "epoch": 33.41, "learning_rate": 1.5545889967637543e-05, "loss": 0.0234, "step": 86020 }, { "epoch": 33.41, "learning_rate": 1.554537216828479e-05, "loss": 0.0668, "step": 86030 }, { "epoch": 33.41, "learning_rate": 1.554485436893204e-05, "loss": 0.0096, "step": 86040 }, { "epoch": 33.42, "learning_rate": 1.554433656957929e-05, "loss": 0.0759, "step": 86050 }, { "epoch": 33.42, "learning_rate": 1.554381877022654e-05, "loss": 0.1724, "step": 86060 }, { "epoch": 33.43, "learning_rate": 1.5543300970873786e-05, "loss": 0.0943, "step": 86070 }, { "epoch": 33.43, "learning_rate": 1.5542783171521038e-05, "loss": 0.0636, "step": 86080 }, { "epoch": 33.43, "learning_rate": 1.5542265372168286e-05, "loss": 0.033, "step": 86090 }, { "epoch": 33.44, "learning_rate": 1.5541747572815537e-05, "loss": 0.0223, "step": 86100 }, { "epoch": 33.44, "learning_rate": 1.5541229773462785e-05, "loss": 0.1107, "step": 86110 }, { "epoch": 33.44, "learning_rate": 1.5540711974110033e-05, "loss": 0.1048, "step": 86120 }, { "epoch": 33.45, "learning_rate": 1.5540194174757284e-05, "loss": 0.0737, "step": 86130 }, { "epoch": 33.45, "learning_rate": 1.5539676375404532e-05, "loss": 0.1697, "step": 86140 }, { "epoch": 33.46, "learning_rate": 1.553915857605178e-05, "loss": 0.0522, "step": 86150 }, { "epoch": 33.46, "learning_rate": 1.553864077669903e-05, "loss": 0.0388, "step": 86160 }, { "epoch": 33.46, "learning_rate": 1.553812297734628e-05, "loss": 0.0006, "step": 86170 }, { "epoch": 33.47, "learning_rate": 1.553760517799353e-05, "loss": 0.0678, "step": 86180 }, { "epoch": 33.47, "learning_rate": 1.553708737864078e-05, "loss": 0.1789, "step": 86190 }, { "epoch": 33.48, "learning_rate": 1.5536569579288026e-05, "loss": 0.1003, "step": 86200 }, { "epoch": 33.48, "learning_rate": 1.5536051779935278e-05, "loss": 0.139, "step": 86210 }, { "epoch": 33.48, "learning_rate": 1.5535533980582525e-05, "loss": 0.0238, "step": 86220 }, { "epoch": 33.49, "learning_rate": 1.5535016181229773e-05, "loss": 0.0867, "step": 86230 }, { "epoch": 33.49, "learning_rate": 1.5534498381877025e-05, "loss": 0.1994, "step": 86240 }, { "epoch": 33.5, "learning_rate": 1.5533980582524273e-05, "loss": 0.058, "step": 86250 }, { "epoch": 33.5, "learning_rate": 1.5533462783171524e-05, "loss": 0.0705, "step": 86260 }, { "epoch": 33.5, "learning_rate": 1.5532944983818772e-05, "loss": 0.05, "step": 86270 }, { "epoch": 33.51, "learning_rate": 1.553242718446602e-05, "loss": 0.0046, "step": 86280 }, { "epoch": 33.51, "learning_rate": 1.553190938511327e-05, "loss": 0.0955, "step": 86290 }, { "epoch": 33.51, "learning_rate": 1.553139158576052e-05, "loss": 0.1053, "step": 86300 }, { "epoch": 33.52, "learning_rate": 1.5530873786407767e-05, "loss": 0.021, "step": 86310 }, { "epoch": 33.52, "learning_rate": 1.5530355987055018e-05, "loss": 0.0098, "step": 86320 }, { "epoch": 33.53, "learning_rate": 1.5529838187702266e-05, "loss": 0.1033, "step": 86330 }, { "epoch": 33.53, "learning_rate": 1.5529320388349518e-05, "loss": 0.1151, "step": 86340 }, { "epoch": 33.53, "learning_rate": 1.5528802588996765e-05, "loss": 0.0305, "step": 86350 }, { "epoch": 33.54, "learning_rate": 1.5528284789644013e-05, "loss": 0.2592, "step": 86360 }, { "epoch": 33.54, "learning_rate": 1.5527766990291265e-05, "loss": 0.2049, "step": 86370 }, { "epoch": 33.55, "learning_rate": 1.5527249190938513e-05, "loss": 0.0585, "step": 86380 }, { "epoch": 33.55, "learning_rate": 1.552673139158576e-05, "loss": 0.0526, "step": 86390 }, { "epoch": 33.55, "learning_rate": 1.5526213592233012e-05, "loss": 0.0802, "step": 86400 }, { "epoch": 33.56, "learning_rate": 1.552569579288026e-05, "loss": 0.0706, "step": 86410 }, { "epoch": 33.56, "learning_rate": 1.5525177993527508e-05, "loss": 0.0709, "step": 86420 }, { "epoch": 33.57, "learning_rate": 1.552466019417476e-05, "loss": 0.0068, "step": 86430 }, { "epoch": 33.57, "learning_rate": 1.5524142394822007e-05, "loss": 0.0375, "step": 86440 }, { "epoch": 33.57, "learning_rate": 1.5523624595469258e-05, "loss": 0.0488, "step": 86450 }, { "epoch": 33.58, "learning_rate": 1.5523106796116506e-05, "loss": 0.0817, "step": 86460 }, { "epoch": 33.58, "learning_rate": 1.5522588996763754e-05, "loss": 0.2173, "step": 86470 }, { "epoch": 33.58, "learning_rate": 1.5522071197411005e-05, "loss": 0.1146, "step": 86480 }, { "epoch": 33.59, "learning_rate": 1.5521553398058253e-05, "loss": 0.2319, "step": 86490 }, { "epoch": 33.59, "learning_rate": 1.55210355987055e-05, "loss": 0.0715, "step": 86500 }, { "epoch": 33.6, "learning_rate": 1.5520517799352753e-05, "loss": 0.2288, "step": 86510 }, { "epoch": 33.6, "learning_rate": 1.552e-05, "loss": 0.1308, "step": 86520 }, { "epoch": 33.6, "learning_rate": 1.5519482200647252e-05, "loss": 0.1684, "step": 86530 }, { "epoch": 33.61, "learning_rate": 1.55189644012945e-05, "loss": 0.1251, "step": 86540 }, { "epoch": 33.61, "learning_rate": 1.5518446601941748e-05, "loss": 0.0574, "step": 86550 }, { "epoch": 33.62, "learning_rate": 1.5517928802589e-05, "loss": 0.0678, "step": 86560 }, { "epoch": 33.62, "learning_rate": 1.5517411003236247e-05, "loss": 0.0517, "step": 86570 }, { "epoch": 33.62, "learning_rate": 1.5516893203883495e-05, "loss": 0.0281, "step": 86580 }, { "epoch": 33.63, "learning_rate": 1.5516375404530746e-05, "loss": 0.0736, "step": 86590 }, { "epoch": 33.63, "learning_rate": 1.5515857605177994e-05, "loss": 0.0903, "step": 86600 }, { "epoch": 33.63, "learning_rate": 1.5515339805825245e-05, "loss": 0.0599, "step": 86610 }, { "epoch": 33.64, "learning_rate": 1.5514822006472493e-05, "loss": 0.1121, "step": 86620 }, { "epoch": 33.64, "learning_rate": 1.551430420711974e-05, "loss": 0.0607, "step": 86630 }, { "epoch": 33.65, "learning_rate": 1.5513786407766993e-05, "loss": 0.0568, "step": 86640 }, { "epoch": 33.65, "learning_rate": 1.551326860841424e-05, "loss": 0.0046, "step": 86650 }, { "epoch": 33.65, "learning_rate": 1.551275080906149e-05, "loss": 0.1076, "step": 86660 }, { "epoch": 33.66, "learning_rate": 1.551223300970874e-05, "loss": 0.0668, "step": 86670 }, { "epoch": 33.66, "learning_rate": 1.5511715210355988e-05, "loss": 0.0806, "step": 86680 }, { "epoch": 33.67, "learning_rate": 1.551119741100324e-05, "loss": 0.1276, "step": 86690 }, { "epoch": 33.67, "learning_rate": 1.5510679611650487e-05, "loss": 0.1236, "step": 86700 }, { "epoch": 33.67, "learning_rate": 1.5510161812297735e-05, "loss": 0.051, "step": 86710 }, { "epoch": 33.68, "learning_rate": 1.5509644012944983e-05, "loss": 0.0186, "step": 86720 }, { "epoch": 33.68, "learning_rate": 1.5509126213592234e-05, "loss": 0.1289, "step": 86730 }, { "epoch": 33.69, "learning_rate": 1.5508608414239482e-05, "loss": 0.165, "step": 86740 }, { "epoch": 33.69, "learning_rate": 1.5508090614886733e-05, "loss": 0.173, "step": 86750 }, { "epoch": 33.69, "learning_rate": 1.550757281553398e-05, "loss": 0.0113, "step": 86760 }, { "epoch": 33.7, "learning_rate": 1.5507055016181232e-05, "loss": 0.1486, "step": 86770 }, { "epoch": 33.7, "learning_rate": 1.550653721682848e-05, "loss": 0.004, "step": 86780 }, { "epoch": 33.7, "learning_rate": 1.550601941747573e-05, "loss": 0.042, "step": 86790 }, { "epoch": 33.71, "learning_rate": 1.5505501618122976e-05, "loss": 0.0667, "step": 86800 }, { "epoch": 33.71, "learning_rate": 1.5504983818770228e-05, "loss": 0.0074, "step": 86810 }, { "epoch": 33.72, "learning_rate": 1.5504466019417475e-05, "loss": 0.0373, "step": 86820 }, { "epoch": 33.72, "learning_rate": 1.5503948220064727e-05, "loss": 0.0974, "step": 86830 }, { "epoch": 33.72, "learning_rate": 1.5503430420711975e-05, "loss": 0.0151, "step": 86840 }, { "epoch": 33.73, "learning_rate": 1.5502912621359226e-05, "loss": 0.1032, "step": 86850 }, { "epoch": 33.73, "learning_rate": 1.5502394822006474e-05, "loss": 0.007, "step": 86860 }, { "epoch": 33.74, "learning_rate": 1.5501877022653722e-05, "loss": 0.0355, "step": 86870 }, { "epoch": 33.74, "learning_rate": 1.550135922330097e-05, "loss": 0.2135, "step": 86880 }, { "epoch": 33.74, "learning_rate": 1.550084142394822e-05, "loss": 0.0874, "step": 86890 }, { "epoch": 33.75, "learning_rate": 1.550032362459547e-05, "loss": 0.1606, "step": 86900 }, { "epoch": 33.75, "learning_rate": 1.549980582524272e-05, "loss": 0.0406, "step": 86910 }, { "epoch": 33.76, "learning_rate": 1.5499288025889968e-05, "loss": 0.0932, "step": 86920 }, { "epoch": 33.76, "learning_rate": 1.549877022653722e-05, "loss": 0.0961, "step": 86930 }, { "epoch": 33.76, "learning_rate": 1.5498252427184468e-05, "loss": 0.1804, "step": 86940 }, { "epoch": 33.77, "learning_rate": 1.549773462783172e-05, "loss": 0.0592, "step": 86950 }, { "epoch": 33.77, "learning_rate": 1.5497216828478963e-05, "loss": 0.0957, "step": 86960 }, { "epoch": 33.77, "learning_rate": 1.5496699029126215e-05, "loss": 0.0169, "step": 86970 }, { "epoch": 33.78, "learning_rate": 1.5496181229773463e-05, "loss": 0.0023, "step": 86980 }, { "epoch": 33.78, "learning_rate": 1.5495663430420714e-05, "loss": 0.1528, "step": 86990 }, { "epoch": 33.79, "learning_rate": 1.5495145631067962e-05, "loss": 0.1054, "step": 87000 }, { "epoch": 33.79, "learning_rate": 1.5494627831715213e-05, "loss": 0.2137, "step": 87010 }, { "epoch": 33.79, "learning_rate": 1.549411003236246e-05, "loss": 0.0683, "step": 87020 }, { "epoch": 33.8, "learning_rate": 1.5493592233009712e-05, "loss": 0.2045, "step": 87030 }, { "epoch": 33.8, "learning_rate": 1.5493074433656957e-05, "loss": 0.0981, "step": 87040 }, { "epoch": 33.81, "learning_rate": 1.5492556634304208e-05, "loss": 0.1077, "step": 87050 }, { "epoch": 33.81, "learning_rate": 1.5492038834951456e-05, "loss": 0.0214, "step": 87060 }, { "epoch": 33.81, "learning_rate": 1.5491521035598707e-05, "loss": 0.0241, "step": 87070 }, { "epoch": 33.82, "learning_rate": 1.5491003236245955e-05, "loss": 0.1419, "step": 87080 }, { "epoch": 33.82, "learning_rate": 1.5490485436893207e-05, "loss": 0.0352, "step": 87090 }, { "epoch": 33.83, "learning_rate": 1.5489967637540455e-05, "loss": 0.0456, "step": 87100 }, { "epoch": 33.83, "learning_rate": 1.5489449838187706e-05, "loss": 0.1057, "step": 87110 }, { "epoch": 33.83, "learning_rate": 1.548893203883495e-05, "loss": 0.154, "step": 87120 }, { "epoch": 33.84, "learning_rate": 1.5488414239482202e-05, "loss": 0.1137, "step": 87130 }, { "epoch": 33.84, "learning_rate": 1.548789644012945e-05, "loss": 0.0839, "step": 87140 }, { "epoch": 33.84, "learning_rate": 1.54873786407767e-05, "loss": 0.096, "step": 87150 }, { "epoch": 33.85, "learning_rate": 1.548686084142395e-05, "loss": 0.0841, "step": 87160 }, { "epoch": 33.85, "learning_rate": 1.54863430420712e-05, "loss": 0.1806, "step": 87170 }, { "epoch": 33.86, "learning_rate": 1.5485825242718448e-05, "loss": 0.1215, "step": 87180 }, { "epoch": 33.86, "learning_rate": 1.54853074433657e-05, "loss": 0.2255, "step": 87190 }, { "epoch": 33.86, "learning_rate": 1.5484789644012944e-05, "loss": 0.0507, "step": 87200 }, { "epoch": 33.87, "learning_rate": 1.5484271844660195e-05, "loss": 0.1222, "step": 87210 }, { "epoch": 33.87, "learning_rate": 1.5483754045307443e-05, "loss": 0.2802, "step": 87220 }, { "epoch": 33.88, "learning_rate": 1.5483236245954695e-05, "loss": 0.0518, "step": 87230 }, { "epoch": 33.88, "learning_rate": 1.5482718446601942e-05, "loss": 0.109, "step": 87240 }, { "epoch": 33.88, "learning_rate": 1.5482200647249194e-05, "loss": 0.1683, "step": 87250 }, { "epoch": 33.89, "learning_rate": 1.5481682847896442e-05, "loss": 0.0134, "step": 87260 }, { "epoch": 33.89, "learning_rate": 1.5481165048543693e-05, "loss": 0.1729, "step": 87270 }, { "epoch": 33.9, "learning_rate": 1.5480647249190938e-05, "loss": 0.1158, "step": 87280 }, { "epoch": 33.9, "learning_rate": 1.548012944983819e-05, "loss": 0.0244, "step": 87290 }, { "epoch": 33.9, "learning_rate": 1.5479611650485437e-05, "loss": 0.2736, "step": 87300 }, { "epoch": 33.91, "learning_rate": 1.5479093851132688e-05, "loss": 0.0149, "step": 87310 }, { "epoch": 33.91, "learning_rate": 1.5478576051779936e-05, "loss": 0.0039, "step": 87320 }, { "epoch": 33.91, "learning_rate": 1.5478058252427187e-05, "loss": 0.0301, "step": 87330 }, { "epoch": 33.92, "learning_rate": 1.5477540453074435e-05, "loss": 0.0432, "step": 87340 }, { "epoch": 33.92, "learning_rate": 1.5477022653721687e-05, "loss": 0.0589, "step": 87350 }, { "epoch": 33.93, "learning_rate": 1.547650485436893e-05, "loss": 0.016, "step": 87360 }, { "epoch": 33.93, "learning_rate": 1.5475987055016182e-05, "loss": 0.1356, "step": 87370 }, { "epoch": 33.93, "learning_rate": 1.547546925566343e-05, "loss": 0.0913, "step": 87380 }, { "epoch": 33.94, "learning_rate": 1.547495145631068e-05, "loss": 0.0847, "step": 87390 }, { "epoch": 33.94, "learning_rate": 1.547443365695793e-05, "loss": 0.1327, "step": 87400 }, { "epoch": 33.95, "learning_rate": 1.547391585760518e-05, "loss": 0.004, "step": 87410 }, { "epoch": 33.95, "learning_rate": 1.547339805825243e-05, "loss": 0.1022, "step": 87420 }, { "epoch": 33.95, "learning_rate": 1.547288025889968e-05, "loss": 0.0115, "step": 87430 }, { "epoch": 33.96, "learning_rate": 1.5472362459546925e-05, "loss": 0.0508, "step": 87440 }, { "epoch": 33.96, "learning_rate": 1.5471844660194176e-05, "loss": 0.0705, "step": 87450 }, { "epoch": 33.97, "learning_rate": 1.5471326860841424e-05, "loss": 0.127, "step": 87460 }, { "epoch": 33.97, "learning_rate": 1.5470809061488675e-05, "loss": 0.0725, "step": 87470 }, { "epoch": 33.97, "learning_rate": 1.5470291262135923e-05, "loss": 0.0937, "step": 87480 }, { "epoch": 33.98, "learning_rate": 1.5469773462783174e-05, "loss": 0.1534, "step": 87490 }, { "epoch": 33.98, "learning_rate": 1.5469255663430422e-05, "loss": 0.2524, "step": 87500 }, { "epoch": 33.98, "learning_rate": 1.546873786407767e-05, "loss": 0.0724, "step": 87510 }, { "epoch": 33.99, "learning_rate": 1.546822006472492e-05, "loss": 0.1304, "step": 87520 }, { "epoch": 33.99, "learning_rate": 1.546770226537217e-05, "loss": 0.082, "step": 87530 }, { "epoch": 34.0, "learning_rate": 1.5467184466019417e-05, "loss": 0.1633, "step": 87540 }, { "epoch": 34.0, "learning_rate": 1.546666666666667e-05, "loss": 0.1178, "step": 87550 }, { "epoch": 34.0, "eval_accuracy": 0.9513067400275104, "eval_loss": 0.2904781699180603, "eval_runtime": 8.2577, "eval_samples_per_second": 440.194, "eval_steps_per_second": 55.1, "step": 87550 }, { "epoch": 34.0, "learning_rate": 1.5466148867313917e-05, "loss": 0.0713, "step": 87560 }, { "epoch": 34.01, "learning_rate": 1.5465631067961168e-05, "loss": 0.0771, "step": 87570 }, { "epoch": 34.01, "learning_rate": 1.5465113268608416e-05, "loss": 0.013, "step": 87580 }, { "epoch": 34.02, "learning_rate": 1.5464595469255664e-05, "loss": 0.053, "step": 87590 }, { "epoch": 34.02, "learning_rate": 1.5464077669902915e-05, "loss": 0.0288, "step": 87600 }, { "epoch": 34.02, "learning_rate": 1.5463559870550163e-05, "loss": 0.0158, "step": 87610 }, { "epoch": 34.03, "learning_rate": 1.546304207119741e-05, "loss": 0.1144, "step": 87620 }, { "epoch": 34.03, "learning_rate": 1.5462524271844662e-05, "loss": 0.083, "step": 87630 }, { "epoch": 34.03, "learning_rate": 1.546200647249191e-05, "loss": 0.0621, "step": 87640 }, { "epoch": 34.04, "learning_rate": 1.546148867313916e-05, "loss": 0.0595, "step": 87650 }, { "epoch": 34.04, "learning_rate": 1.546097087378641e-05, "loss": 0.0253, "step": 87660 }, { "epoch": 34.05, "learning_rate": 1.5460453074433657e-05, "loss": 0.1537, "step": 87670 }, { "epoch": 34.05, "learning_rate": 1.545993527508091e-05, "loss": 0.0599, "step": 87680 }, { "epoch": 34.05, "learning_rate": 1.5459417475728157e-05, "loss": 0.1361, "step": 87690 }, { "epoch": 34.06, "learning_rate": 1.5458899676375405e-05, "loss": 0.1055, "step": 87700 }, { "epoch": 34.06, "learning_rate": 1.5458381877022656e-05, "loss": 0.1345, "step": 87710 }, { "epoch": 34.07, "learning_rate": 1.5457864077669904e-05, "loss": 0.1939, "step": 87720 }, { "epoch": 34.07, "learning_rate": 1.5457346278317155e-05, "loss": 0.1182, "step": 87730 }, { "epoch": 34.07, "learning_rate": 1.5456828478964403e-05, "loss": 0.0025, "step": 87740 }, { "epoch": 34.08, "learning_rate": 1.545631067961165e-05, "loss": 0.1201, "step": 87750 }, { "epoch": 34.08, "learning_rate": 1.5455792880258902e-05, "loss": 0.0724, "step": 87760 }, { "epoch": 34.09, "learning_rate": 1.545527508090615e-05, "loss": 0.0965, "step": 87770 }, { "epoch": 34.09, "learning_rate": 1.5454757281553398e-05, "loss": 0.0378, "step": 87780 }, { "epoch": 34.09, "learning_rate": 1.545423948220065e-05, "loss": 0.0686, "step": 87790 }, { "epoch": 34.1, "learning_rate": 1.5453721682847897e-05, "loss": 0.0019, "step": 87800 }, { "epoch": 34.1, "learning_rate": 1.5453203883495145e-05, "loss": 0.0922, "step": 87810 }, { "epoch": 34.1, "learning_rate": 1.5452686084142397e-05, "loss": 0.1013, "step": 87820 }, { "epoch": 34.11, "learning_rate": 1.5452168284789645e-05, "loss": 0.0896, "step": 87830 }, { "epoch": 34.11, "learning_rate": 1.5451650485436896e-05, "loss": 0.0815, "step": 87840 }, { "epoch": 34.12, "learning_rate": 1.5451132686084144e-05, "loss": 0.0181, "step": 87850 }, { "epoch": 34.12, "learning_rate": 1.5450614886731392e-05, "loss": 0.1231, "step": 87860 }, { "epoch": 34.12, "learning_rate": 1.5450097087378643e-05, "loss": 0.1713, "step": 87870 }, { "epoch": 34.13, "learning_rate": 1.544957928802589e-05, "loss": 0.0516, "step": 87880 }, { "epoch": 34.13, "learning_rate": 1.544906148867314e-05, "loss": 0.116, "step": 87890 }, { "epoch": 34.14, "learning_rate": 1.544854368932039e-05, "loss": 0.0335, "step": 87900 }, { "epoch": 34.14, "learning_rate": 1.5448025889967638e-05, "loss": 0.0935, "step": 87910 }, { "epoch": 34.14, "learning_rate": 1.544750809061489e-05, "loss": 0.118, "step": 87920 }, { "epoch": 34.15, "learning_rate": 1.5446990291262137e-05, "loss": 0.0956, "step": 87930 }, { "epoch": 34.15, "learning_rate": 1.5446472491909385e-05, "loss": 0.1485, "step": 87940 }, { "epoch": 34.16, "learning_rate": 1.5445954692556637e-05, "loss": 0.1102, "step": 87950 }, { "epoch": 34.16, "learning_rate": 1.5445436893203884e-05, "loss": 0.1864, "step": 87960 }, { "epoch": 34.16, "learning_rate": 1.5444919093851132e-05, "loss": 0.0279, "step": 87970 }, { "epoch": 34.17, "learning_rate": 1.5444401294498384e-05, "loss": 0.0889, "step": 87980 }, { "epoch": 34.17, "learning_rate": 1.544388349514563e-05, "loss": 0.0304, "step": 87990 }, { "epoch": 34.17, "learning_rate": 1.5443365695792883e-05, "loss": 0.0026, "step": 88000 }, { "epoch": 34.18, "learning_rate": 1.544284789644013e-05, "loss": 0.0972, "step": 88010 }, { "epoch": 34.18, "learning_rate": 1.544233009708738e-05, "loss": 0.0086, "step": 88020 }, { "epoch": 34.19, "learning_rate": 1.544181229773463e-05, "loss": 0.1022, "step": 88030 }, { "epoch": 34.19, "learning_rate": 1.5441294498381878e-05, "loss": 0.112, "step": 88040 }, { "epoch": 34.19, "learning_rate": 1.5440776699029126e-05, "loss": 0.1052, "step": 88050 }, { "epoch": 34.2, "learning_rate": 1.5440258899676377e-05, "loss": 0.1455, "step": 88060 }, { "epoch": 34.2, "learning_rate": 1.5439741100323625e-05, "loss": 0.1698, "step": 88070 }, { "epoch": 34.21, "learning_rate": 1.5439223300970877e-05, "loss": 0.044, "step": 88080 }, { "epoch": 34.21, "learning_rate": 1.5438705501618124e-05, "loss": 0.0008, "step": 88090 }, { "epoch": 34.21, "learning_rate": 1.5438187702265372e-05, "loss": 0.0085, "step": 88100 }, { "epoch": 34.22, "learning_rate": 1.5437669902912624e-05, "loss": 0.072, "step": 88110 }, { "epoch": 34.22, "learning_rate": 1.543715210355987e-05, "loss": 0.1665, "step": 88120 }, { "epoch": 34.23, "learning_rate": 1.543663430420712e-05, "loss": 0.0011, "step": 88130 }, { "epoch": 34.23, "learning_rate": 1.543611650485437e-05, "loss": 0.1055, "step": 88140 }, { "epoch": 34.23, "learning_rate": 1.543559870550162e-05, "loss": 0.1532, "step": 88150 }, { "epoch": 34.24, "learning_rate": 1.543508090614887e-05, "loss": 0.0383, "step": 88160 }, { "epoch": 34.24, "learning_rate": 1.5434563106796118e-05, "loss": 0.041, "step": 88170 }, { "epoch": 34.24, "learning_rate": 1.5434045307443366e-05, "loss": 0.1241, "step": 88180 }, { "epoch": 34.25, "learning_rate": 1.5433527508090614e-05, "loss": 0.1371, "step": 88190 }, { "epoch": 34.25, "learning_rate": 1.5433009708737865e-05, "loss": 0.1814, "step": 88200 }, { "epoch": 34.26, "learning_rate": 1.5432491909385113e-05, "loss": 0.0859, "step": 88210 }, { "epoch": 34.26, "learning_rate": 1.5431974110032364e-05, "loss": 0.0144, "step": 88220 }, { "epoch": 34.26, "learning_rate": 1.5431456310679612e-05, "loss": 0.0596, "step": 88230 }, { "epoch": 34.27, "learning_rate": 1.5430938511326864e-05, "loss": 0.2371, "step": 88240 }, { "epoch": 34.27, "learning_rate": 1.543042071197411e-05, "loss": 0.055, "step": 88250 }, { "epoch": 34.28, "learning_rate": 1.542990291262136e-05, "loss": 0.0389, "step": 88260 }, { "epoch": 34.28, "learning_rate": 1.5429385113268607e-05, "loss": 0.0362, "step": 88270 }, { "epoch": 34.28, "learning_rate": 1.542886731391586e-05, "loss": 0.0797, "step": 88280 }, { "epoch": 34.29, "learning_rate": 1.5428349514563107e-05, "loss": 0.0408, "step": 88290 }, { "epoch": 34.29, "learning_rate": 1.5427831715210358e-05, "loss": 0.0789, "step": 88300 }, { "epoch": 34.3, "learning_rate": 1.5427313915857606e-05, "loss": 0.0441, "step": 88310 }, { "epoch": 34.3, "learning_rate": 1.5426796116504857e-05, "loss": 0.0902, "step": 88320 }, { "epoch": 34.3, "learning_rate": 1.5426278317152105e-05, "loss": 0.0659, "step": 88330 }, { "epoch": 34.31, "learning_rate": 1.5425760517799353e-05, "loss": 0.1337, "step": 88340 }, { "epoch": 34.31, "learning_rate": 1.54252427184466e-05, "loss": 0.1205, "step": 88350 }, { "epoch": 34.31, "learning_rate": 1.5424724919093852e-05, "loss": 0.1786, "step": 88360 }, { "epoch": 34.32, "learning_rate": 1.54242071197411e-05, "loss": 0.1009, "step": 88370 }, { "epoch": 34.32, "learning_rate": 1.542368932038835e-05, "loss": 0.0329, "step": 88380 }, { "epoch": 34.33, "learning_rate": 1.54231715210356e-05, "loss": 0.0188, "step": 88390 }, { "epoch": 34.33, "learning_rate": 1.542265372168285e-05, "loss": 0.1515, "step": 88400 }, { "epoch": 34.33, "learning_rate": 1.54221359223301e-05, "loss": 0.0067, "step": 88410 }, { "epoch": 34.34, "learning_rate": 1.5421618122977347e-05, "loss": 0.0042, "step": 88420 }, { "epoch": 34.34, "learning_rate": 1.5421100323624595e-05, "loss": 0.1738, "step": 88430 }, { "epoch": 34.35, "learning_rate": 1.5420582524271846e-05, "loss": 0.0509, "step": 88440 }, { "epoch": 34.35, "learning_rate": 1.5420064724919094e-05, "loss": 0.1376, "step": 88450 }, { "epoch": 34.35, "learning_rate": 1.5419546925566345e-05, "loss": 0.0493, "step": 88460 }, { "epoch": 34.36, "learning_rate": 1.5419029126213593e-05, "loss": 0.2846, "step": 88470 }, { "epoch": 34.36, "learning_rate": 1.5418511326860844e-05, "loss": 0.1113, "step": 88480 }, { "epoch": 34.37, "learning_rate": 1.5417993527508092e-05, "loss": 0.0574, "step": 88490 }, { "epoch": 34.37, "learning_rate": 1.541747572815534e-05, "loss": 0.0532, "step": 88500 }, { "epoch": 34.37, "learning_rate": 1.5416957928802588e-05, "loss": 0.1207, "step": 88510 }, { "epoch": 34.38, "learning_rate": 1.541644012944984e-05, "loss": 0.1223, "step": 88520 }, { "epoch": 34.38, "learning_rate": 1.5415922330097087e-05, "loss": 0.1484, "step": 88530 }, { "epoch": 34.38, "learning_rate": 1.541540453074434e-05, "loss": 0.1104, "step": 88540 }, { "epoch": 34.39, "learning_rate": 1.5414886731391587e-05, "loss": 0.2227, "step": 88550 }, { "epoch": 34.39, "learning_rate": 1.5414368932038838e-05, "loss": 0.1712, "step": 88560 }, { "epoch": 34.4, "learning_rate": 1.5413851132686086e-05, "loss": 0.2455, "step": 88570 }, { "epoch": 34.4, "learning_rate": 1.5413333333333337e-05, "loss": 0.1156, "step": 88580 }, { "epoch": 34.4, "learning_rate": 1.541281553398058e-05, "loss": 0.3218, "step": 88590 }, { "epoch": 34.41, "learning_rate": 1.5412297734627833e-05, "loss": 0.209, "step": 88600 }, { "epoch": 34.41, "learning_rate": 1.541177993527508e-05, "loss": 0.023, "step": 88610 }, { "epoch": 34.42, "learning_rate": 1.5411262135922332e-05, "loss": 0.0785, "step": 88620 }, { "epoch": 34.42, "learning_rate": 1.541074433656958e-05, "loss": 0.0031, "step": 88630 }, { "epoch": 34.42, "learning_rate": 1.541022653721683e-05, "loss": 0.0388, "step": 88640 }, { "epoch": 34.43, "learning_rate": 1.540970873786408e-05, "loss": 0.0182, "step": 88650 }, { "epoch": 34.43, "learning_rate": 1.540919093851133e-05, "loss": 0.1296, "step": 88660 }, { "epoch": 34.43, "learning_rate": 1.5408673139158575e-05, "loss": 0.1659, "step": 88670 }, { "epoch": 34.44, "learning_rate": 1.5408155339805827e-05, "loss": 0.1408, "step": 88680 }, { "epoch": 34.44, "learning_rate": 1.5407637540453074e-05, "loss": 0.0599, "step": 88690 }, { "epoch": 34.45, "learning_rate": 1.5407119741100326e-05, "loss": 0.2001, "step": 88700 }, { "epoch": 34.45, "learning_rate": 1.5406601941747574e-05, "loss": 0.0837, "step": 88710 }, { "epoch": 34.45, "learning_rate": 1.5406084142394825e-05, "loss": 0.0851, "step": 88720 }, { "epoch": 34.46, "learning_rate": 1.5405566343042073e-05, "loss": 0.0694, "step": 88730 }, { "epoch": 34.46, "learning_rate": 1.5405048543689324e-05, "loss": 0.0596, "step": 88740 }, { "epoch": 34.47, "learning_rate": 1.540453074433657e-05, "loss": 0.1862, "step": 88750 }, { "epoch": 34.47, "learning_rate": 1.540401294498382e-05, "loss": 0.0152, "step": 88760 }, { "epoch": 34.47, "learning_rate": 1.5403495145631068e-05, "loss": 0.0497, "step": 88770 }, { "epoch": 34.48, "learning_rate": 1.540297734627832e-05, "loss": 0.0979, "step": 88780 }, { "epoch": 34.48, "learning_rate": 1.5402459546925567e-05, "loss": 0.0946, "step": 88790 }, { "epoch": 34.49, "learning_rate": 1.540194174757282e-05, "loss": 0.0777, "step": 88800 }, { "epoch": 34.49, "learning_rate": 1.5401423948220066e-05, "loss": 0.0239, "step": 88810 }, { "epoch": 34.49, "learning_rate": 1.5400906148867318e-05, "loss": 0.0649, "step": 88820 }, { "epoch": 34.5, "learning_rate": 1.5400388349514562e-05, "loss": 0.0832, "step": 88830 }, { "epoch": 34.5, "learning_rate": 1.5399870550161814e-05, "loss": 0.1227, "step": 88840 }, { "epoch": 34.5, "learning_rate": 1.539935275080906e-05, "loss": 0.038, "step": 88850 }, { "epoch": 34.51, "learning_rate": 1.5398834951456313e-05, "loss": 0.0202, "step": 88860 }, { "epoch": 34.51, "learning_rate": 1.539831715210356e-05, "loss": 0.0112, "step": 88870 }, { "epoch": 34.52, "learning_rate": 1.5397799352750812e-05, "loss": 0.094, "step": 88880 }, { "epoch": 34.52, "learning_rate": 1.539728155339806e-05, "loss": 0.0699, "step": 88890 }, { "epoch": 34.52, "learning_rate": 1.539676375404531e-05, "loss": 0.0881, "step": 88900 }, { "epoch": 34.53, "learning_rate": 1.5396245954692556e-05, "loss": 0.1762, "step": 88910 }, { "epoch": 34.53, "learning_rate": 1.5395728155339807e-05, "loss": 0.3549, "step": 88920 }, { "epoch": 34.54, "learning_rate": 1.5395210355987055e-05, "loss": 0.0835, "step": 88930 }, { "epoch": 34.54, "learning_rate": 1.5394692556634306e-05, "loss": 0.0204, "step": 88940 }, { "epoch": 34.54, "learning_rate": 1.5394174757281554e-05, "loss": 0.0829, "step": 88950 }, { "epoch": 34.55, "learning_rate": 1.5393656957928806e-05, "loss": 0.0316, "step": 88960 }, { "epoch": 34.55, "learning_rate": 1.5393139158576054e-05, "loss": 0.1588, "step": 88970 }, { "epoch": 34.56, "learning_rate": 1.53926213592233e-05, "loss": 0.0321, "step": 88980 }, { "epoch": 34.56, "learning_rate": 1.539210355987055e-05, "loss": 0.0475, "step": 88990 }, { "epoch": 34.56, "learning_rate": 1.53915857605178e-05, "loss": 0.0812, "step": 89000 }, { "epoch": 34.57, "learning_rate": 1.539106796116505e-05, "loss": 0.0845, "step": 89010 }, { "epoch": 34.57, "learning_rate": 1.53905501618123e-05, "loss": 0.0147, "step": 89020 }, { "epoch": 34.57, "learning_rate": 1.5390032362459548e-05, "loss": 0.1045, "step": 89030 }, { "epoch": 34.58, "learning_rate": 1.53895145631068e-05, "loss": 0.0153, "step": 89040 }, { "epoch": 34.58, "learning_rate": 1.5388996763754047e-05, "loss": 0.1033, "step": 89050 }, { "epoch": 34.59, "learning_rate": 1.5388478964401295e-05, "loss": 0.0022, "step": 89060 }, { "epoch": 34.59, "learning_rate": 1.5387961165048543e-05, "loss": 0.1272, "step": 89070 }, { "epoch": 34.59, "learning_rate": 1.5387443365695794e-05, "loss": 0.1584, "step": 89080 }, { "epoch": 34.6, "learning_rate": 1.5386925566343042e-05, "loss": 0.1784, "step": 89090 }, { "epoch": 34.6, "learning_rate": 1.5386407766990294e-05, "loss": 0.0069, "step": 89100 }, { "epoch": 34.61, "learning_rate": 1.538588996763754e-05, "loss": 0.1399, "step": 89110 }, { "epoch": 34.61, "learning_rate": 1.5385372168284793e-05, "loss": 0.1145, "step": 89120 }, { "epoch": 34.61, "learning_rate": 1.538485436893204e-05, "loss": 0.1506, "step": 89130 }, { "epoch": 34.62, "learning_rate": 1.538433656957929e-05, "loss": 0.217, "step": 89140 }, { "epoch": 34.62, "learning_rate": 1.538381877022654e-05, "loss": 0.1038, "step": 89150 }, { "epoch": 34.63, "learning_rate": 1.5383300970873788e-05, "loss": 0.0834, "step": 89160 }, { "epoch": 34.63, "learning_rate": 1.5382783171521036e-05, "loss": 0.0603, "step": 89170 }, { "epoch": 34.63, "learning_rate": 1.5382265372168287e-05, "loss": 0.0496, "step": 89180 }, { "epoch": 34.64, "learning_rate": 1.5381747572815535e-05, "loss": 0.0495, "step": 89190 }, { "epoch": 34.64, "learning_rate": 1.5381229773462786e-05, "loss": 0.089, "step": 89200 }, { "epoch": 34.64, "learning_rate": 1.5380711974110034e-05, "loss": 0.0548, "step": 89210 }, { "epoch": 34.65, "learning_rate": 1.5380194174757282e-05, "loss": 0.0503, "step": 89220 }, { "epoch": 34.65, "learning_rate": 1.5379676375404533e-05, "loss": 0.1261, "step": 89230 }, { "epoch": 34.66, "learning_rate": 1.537915857605178e-05, "loss": 0.1161, "step": 89240 }, { "epoch": 34.66, "learning_rate": 1.537864077669903e-05, "loss": 0.1373, "step": 89250 }, { "epoch": 34.66, "learning_rate": 1.537812297734628e-05, "loss": 0.1411, "step": 89260 }, { "epoch": 34.67, "learning_rate": 1.537760517799353e-05, "loss": 0.2279, "step": 89270 }, { "epoch": 34.67, "learning_rate": 1.5377087378640776e-05, "loss": 0.0915, "step": 89280 }, { "epoch": 34.68, "learning_rate": 1.5376569579288028e-05, "loss": 0.2615, "step": 89290 }, { "epoch": 34.68, "learning_rate": 1.5376051779935276e-05, "loss": 0.0688, "step": 89300 }, { "epoch": 34.68, "learning_rate": 1.5375533980582527e-05, "loss": 0.0842, "step": 89310 }, { "epoch": 34.69, "learning_rate": 1.5375016181229775e-05, "loss": 0.1435, "step": 89320 }, { "epoch": 34.69, "learning_rate": 1.5374498381877023e-05, "loss": 0.1902, "step": 89330 }, { "epoch": 34.7, "learning_rate": 1.5373980582524274e-05, "loss": 0.0642, "step": 89340 }, { "epoch": 34.7, "learning_rate": 1.5373462783171522e-05, "loss": 0.034, "step": 89350 }, { "epoch": 34.7, "learning_rate": 1.537294498381877e-05, "loss": 0.1001, "step": 89360 }, { "epoch": 34.71, "learning_rate": 1.537242718446602e-05, "loss": 0.2239, "step": 89370 }, { "epoch": 34.71, "learning_rate": 1.537190938511327e-05, "loss": 0.0725, "step": 89380 }, { "epoch": 34.71, "learning_rate": 1.537139158576052e-05, "loss": 0.1676, "step": 89390 }, { "epoch": 34.72, "learning_rate": 1.537087378640777e-05, "loss": 0.0946, "step": 89400 }, { "epoch": 34.72, "learning_rate": 1.5370355987055016e-05, "loss": 0.0875, "step": 89410 }, { "epoch": 34.73, "learning_rate": 1.5369838187702268e-05, "loss": 0.1889, "step": 89420 }, { "epoch": 34.73, "learning_rate": 1.5369320388349516e-05, "loss": 0.0656, "step": 89430 }, { "epoch": 34.73, "learning_rate": 1.5368802588996764e-05, "loss": 0.0484, "step": 89440 }, { "epoch": 34.74, "learning_rate": 1.5368284789644015e-05, "loss": 0.1189, "step": 89450 }, { "epoch": 34.74, "learning_rate": 1.5367766990291263e-05, "loss": 0.0957, "step": 89460 }, { "epoch": 34.75, "learning_rate": 1.5367249190938514e-05, "loss": 0.1193, "step": 89470 }, { "epoch": 34.75, "learning_rate": 1.5366731391585762e-05, "loss": 0.0489, "step": 89480 }, { "epoch": 34.75, "learning_rate": 1.536621359223301e-05, "loss": 0.1087, "step": 89490 }, { "epoch": 34.76, "learning_rate": 1.536569579288026e-05, "loss": 0.029, "step": 89500 }, { "epoch": 34.76, "learning_rate": 1.536517799352751e-05, "loss": 0.0134, "step": 89510 }, { "epoch": 34.77, "learning_rate": 1.5364660194174757e-05, "loss": 0.1008, "step": 89520 }, { "epoch": 34.77, "learning_rate": 1.536414239482201e-05, "loss": 0.1453, "step": 89530 }, { "epoch": 34.77, "learning_rate": 1.5363624595469256e-05, "loss": 0.0357, "step": 89540 }, { "epoch": 34.78, "learning_rate": 1.5363106796116508e-05, "loss": 0.3234, "step": 89550 }, { "epoch": 34.78, "learning_rate": 1.5362588996763756e-05, "loss": 0.0245, "step": 89560 }, { "epoch": 34.78, "learning_rate": 1.5362071197411004e-05, "loss": 0.0079, "step": 89570 }, { "epoch": 34.79, "learning_rate": 1.5361553398058255e-05, "loss": 0.0464, "step": 89580 }, { "epoch": 34.79, "learning_rate": 1.5361035598705503e-05, "loss": 0.0078, "step": 89590 }, { "epoch": 34.8, "learning_rate": 1.536051779935275e-05, "loss": 0.1744, "step": 89600 }, { "epoch": 34.8, "learning_rate": 1.5360000000000002e-05, "loss": 0.0413, "step": 89610 }, { "epoch": 34.8, "learning_rate": 1.535948220064725e-05, "loss": 0.0185, "step": 89620 }, { "epoch": 34.81, "learning_rate": 1.53589644012945e-05, "loss": 0.0225, "step": 89630 }, { "epoch": 34.81, "learning_rate": 1.535844660194175e-05, "loss": 0.2146, "step": 89640 }, { "epoch": 34.82, "learning_rate": 1.5357928802588997e-05, "loss": 0.3258, "step": 89650 }, { "epoch": 34.82, "learning_rate": 1.5357411003236245e-05, "loss": 0.0363, "step": 89660 }, { "epoch": 34.82, "learning_rate": 1.5356893203883496e-05, "loss": 0.2226, "step": 89670 }, { "epoch": 34.83, "learning_rate": 1.5356375404530744e-05, "loss": 0.0582, "step": 89680 }, { "epoch": 34.83, "learning_rate": 1.5355857605177996e-05, "loss": 0.0854, "step": 89690 }, { "epoch": 34.83, "learning_rate": 1.5355339805825243e-05, "loss": 0.172, "step": 89700 }, { "epoch": 34.84, "learning_rate": 1.5354822006472495e-05, "loss": 0.0775, "step": 89710 }, { "epoch": 34.84, "learning_rate": 1.5354304207119743e-05, "loss": 0.0212, "step": 89720 }, { "epoch": 34.85, "learning_rate": 1.535378640776699e-05, "loss": 0.0155, "step": 89730 }, { "epoch": 34.85, "learning_rate": 1.535326860841424e-05, "loss": 0.0662, "step": 89740 }, { "epoch": 34.85, "learning_rate": 1.535275080906149e-05, "loss": 0.1445, "step": 89750 }, { "epoch": 34.86, "learning_rate": 1.5352233009708738e-05, "loss": 0.1408, "step": 89760 }, { "epoch": 34.86, "learning_rate": 1.535171521035599e-05, "loss": 0.1785, "step": 89770 }, { "epoch": 34.87, "learning_rate": 1.5351197411003237e-05, "loss": 0.1779, "step": 89780 }, { "epoch": 34.87, "learning_rate": 1.535067961165049e-05, "loss": 0.0681, "step": 89790 }, { "epoch": 34.87, "learning_rate": 1.5350161812297736e-05, "loss": 0.0107, "step": 89800 }, { "epoch": 34.88, "learning_rate": 1.5349644012944984e-05, "loss": 0.2209, "step": 89810 }, { "epoch": 34.88, "learning_rate": 1.5349126213592232e-05, "loss": 0.028, "step": 89820 }, { "epoch": 34.89, "learning_rate": 1.5348608414239483e-05, "loss": 0.0759, "step": 89830 }, { "epoch": 34.89, "learning_rate": 1.534809061488673e-05, "loss": 0.1212, "step": 89840 }, { "epoch": 34.89, "learning_rate": 1.5347572815533983e-05, "loss": 0.3049, "step": 89850 }, { "epoch": 34.9, "learning_rate": 1.534705501618123e-05, "loss": 0.1724, "step": 89860 }, { "epoch": 34.9, "learning_rate": 1.5346537216828482e-05, "loss": 0.0757, "step": 89870 }, { "epoch": 34.9, "learning_rate": 1.534601941747573e-05, "loss": 0.1087, "step": 89880 }, { "epoch": 34.91, "learning_rate": 1.5345501618122978e-05, "loss": 0.086, "step": 89890 }, { "epoch": 34.91, "learning_rate": 1.5344983818770226e-05, "loss": 0.0628, "step": 89900 }, { "epoch": 34.92, "learning_rate": 1.5344466019417477e-05, "loss": 0.033, "step": 89910 }, { "epoch": 34.92, "learning_rate": 1.5343948220064725e-05, "loss": 0.2318, "step": 89920 }, { "epoch": 34.92, "learning_rate": 1.5343430420711976e-05, "loss": 0.0361, "step": 89930 }, { "epoch": 34.93, "learning_rate": 1.5342912621359224e-05, "loss": 0.143, "step": 89940 }, { "epoch": 34.93, "learning_rate": 1.5342394822006475e-05, "loss": 0.1206, "step": 89950 }, { "epoch": 34.94, "learning_rate": 1.5341877022653723e-05, "loss": 0.0727, "step": 89960 }, { "epoch": 34.94, "learning_rate": 1.534135922330097e-05, "loss": 0.0441, "step": 89970 }, { "epoch": 34.94, "learning_rate": 1.534084142394822e-05, "loss": 0.0114, "step": 89980 }, { "epoch": 34.95, "learning_rate": 1.534032362459547e-05, "loss": 0.1423, "step": 89990 }, { "epoch": 34.95, "learning_rate": 1.533980582524272e-05, "loss": 0.1311, "step": 90000 }, { "epoch": 34.96, "learning_rate": 1.533928802588997e-05, "loss": 0.127, "step": 90010 }, { "epoch": 34.96, "learning_rate": 1.5338770226537218e-05, "loss": 0.1807, "step": 90020 }, { "epoch": 34.96, "learning_rate": 1.533825242718447e-05, "loss": 0.0445, "step": 90030 }, { "epoch": 34.97, "learning_rate": 1.5337734627831717e-05, "loss": 0.1863, "step": 90040 }, { "epoch": 34.97, "learning_rate": 1.5337216828478965e-05, "loss": 0.1139, "step": 90050 }, { "epoch": 34.97, "learning_rate": 1.5336699029126213e-05, "loss": 0.1244, "step": 90060 }, { "epoch": 34.98, "learning_rate": 1.5336181229773464e-05, "loss": 0.2443, "step": 90070 }, { "epoch": 34.98, "learning_rate": 1.5335663430420712e-05, "loss": 0.1439, "step": 90080 }, { "epoch": 34.99, "learning_rate": 1.5335145631067963e-05, "loss": 0.1472, "step": 90090 }, { "epoch": 34.99, "learning_rate": 1.533462783171521e-05, "loss": 0.0298, "step": 90100 }, { "epoch": 34.99, "learning_rate": 1.5334110032362463e-05, "loss": 0.0622, "step": 90110 }, { "epoch": 35.0, "learning_rate": 1.533359223300971e-05, "loss": 0.0804, "step": 90120 }, { "epoch": 35.0, "eval_accuracy": 0.9480055020632737, "eval_loss": 0.31004270911216736, "eval_runtime": 8.2797, "eval_samples_per_second": 439.026, "eval_steps_per_second": 54.954, "step": 90125 }, { "epoch": 35.0, "learning_rate": 1.533307443365696e-05, "loss": 0.0607, "step": 90130 }, { "epoch": 35.01, "learning_rate": 1.5332556634304206e-05, "loss": 0.0386, "step": 90140 }, { "epoch": 35.01, "learning_rate": 1.5332038834951458e-05, "loss": 0.0873, "step": 90150 }, { "epoch": 35.01, "learning_rate": 1.5331521035598706e-05, "loss": 0.0027, "step": 90160 }, { "epoch": 35.02, "learning_rate": 1.5331003236245957e-05, "loss": 0.1629, "step": 90170 }, { "epoch": 35.02, "learning_rate": 1.5330485436893205e-05, "loss": 0.0639, "step": 90180 }, { "epoch": 35.03, "learning_rate": 1.5329967637540456e-05, "loss": 0.1503, "step": 90190 }, { "epoch": 35.03, "learning_rate": 1.5329449838187704e-05, "loss": 0.0294, "step": 90200 }, { "epoch": 35.03, "learning_rate": 1.5328932038834952e-05, "loss": 0.0771, "step": 90210 }, { "epoch": 35.04, "learning_rate": 1.53284142394822e-05, "loss": 0.0688, "step": 90220 }, { "epoch": 35.04, "learning_rate": 1.532789644012945e-05, "loss": 0.0216, "step": 90230 }, { "epoch": 35.04, "learning_rate": 1.53273786407767e-05, "loss": 0.1791, "step": 90240 }, { "epoch": 35.05, "learning_rate": 1.532686084142395e-05, "loss": 0.0896, "step": 90250 }, { "epoch": 35.05, "learning_rate": 1.53263430420712e-05, "loss": 0.0108, "step": 90260 }, { "epoch": 35.06, "learning_rate": 1.532582524271845e-05, "loss": 0.0367, "step": 90270 }, { "epoch": 35.06, "learning_rate": 1.5325307443365698e-05, "loss": 0.047, "step": 90280 }, { "epoch": 35.06, "learning_rate": 1.532478964401295e-05, "loss": 0.015, "step": 90290 }, { "epoch": 35.07, "learning_rate": 1.5324271844660193e-05, "loss": 0.1414, "step": 90300 }, { "epoch": 35.07, "learning_rate": 1.5323754045307445e-05, "loss": 0.1428, "step": 90310 }, { "epoch": 35.08, "learning_rate": 1.5323236245954693e-05, "loss": 0.1067, "step": 90320 }, { "epoch": 35.08, "learning_rate": 1.5322718446601944e-05, "loss": 0.0126, "step": 90330 }, { "epoch": 35.08, "learning_rate": 1.5322200647249192e-05, "loss": 0.163, "step": 90340 }, { "epoch": 35.09, "learning_rate": 1.5321682847896443e-05, "loss": 0.0429, "step": 90350 }, { "epoch": 35.09, "learning_rate": 1.532116504854369e-05, "loss": 0.0564, "step": 90360 }, { "epoch": 35.1, "learning_rate": 1.5320647249190942e-05, "loss": 0.0974, "step": 90370 }, { "epoch": 35.1, "learning_rate": 1.5320129449838187e-05, "loss": 0.0075, "step": 90380 }, { "epoch": 35.1, "learning_rate": 1.531961165048544e-05, "loss": 0.0841, "step": 90390 }, { "epoch": 35.11, "learning_rate": 1.5319093851132686e-05, "loss": 0.0059, "step": 90400 }, { "epoch": 35.11, "learning_rate": 1.5318576051779938e-05, "loss": 0.1126, "step": 90410 }, { "epoch": 35.11, "learning_rate": 1.5318058252427186e-05, "loss": 0.005, "step": 90420 }, { "epoch": 35.12, "learning_rate": 1.5317540453074437e-05, "loss": 0.1748, "step": 90430 }, { "epoch": 35.12, "learning_rate": 1.5317022653721685e-05, "loss": 0.0943, "step": 90440 }, { "epoch": 35.13, "learning_rate": 1.5316504854368933e-05, "loss": 0.1351, "step": 90450 }, { "epoch": 35.13, "learning_rate": 1.531598705501618e-05, "loss": 0.0592, "step": 90460 }, { "epoch": 35.13, "learning_rate": 1.5315469255663432e-05, "loss": 0.125, "step": 90470 }, { "epoch": 35.14, "learning_rate": 1.531495145631068e-05, "loss": 0.1609, "step": 90480 }, { "epoch": 35.14, "learning_rate": 1.531443365695793e-05, "loss": 0.0776, "step": 90490 }, { "epoch": 35.15, "learning_rate": 1.531391585760518e-05, "loss": 0.1172, "step": 90500 }, { "epoch": 35.15, "learning_rate": 1.531339805825243e-05, "loss": 0.074, "step": 90510 }, { "epoch": 35.15, "learning_rate": 1.5312880258899678e-05, "loss": 0.0441, "step": 90520 }, { "epoch": 35.16, "learning_rate": 1.5312362459546926e-05, "loss": 0.1101, "step": 90530 }, { "epoch": 35.16, "learning_rate": 1.5311844660194174e-05, "loss": 0.0618, "step": 90540 }, { "epoch": 35.17, "learning_rate": 1.5311326860841425e-05, "loss": 0.0632, "step": 90550 }, { "epoch": 35.17, "learning_rate": 1.5310809061488673e-05, "loss": 0.1773, "step": 90560 }, { "epoch": 35.17, "learning_rate": 1.5310291262135925e-05, "loss": 0.0426, "step": 90570 }, { "epoch": 35.18, "learning_rate": 1.5309773462783173e-05, "loss": 0.1006, "step": 90580 }, { "epoch": 35.18, "learning_rate": 1.5309255663430424e-05, "loss": 0.0119, "step": 90590 }, { "epoch": 35.18, "learning_rate": 1.5308737864077672e-05, "loss": 0.1303, "step": 90600 }, { "epoch": 35.19, "learning_rate": 1.530822006472492e-05, "loss": 0.1378, "step": 90610 }, { "epoch": 35.19, "learning_rate": 1.5307702265372168e-05, "loss": 0.2001, "step": 90620 }, { "epoch": 35.2, "learning_rate": 1.530718446601942e-05, "loss": 0.0817, "step": 90630 }, { "epoch": 35.2, "learning_rate": 1.5306666666666667e-05, "loss": 0.0086, "step": 90640 }, { "epoch": 35.2, "learning_rate": 1.5306148867313918e-05, "loss": 0.0195, "step": 90650 }, { "epoch": 35.21, "learning_rate": 1.5305631067961166e-05, "loss": 0.0041, "step": 90660 }, { "epoch": 35.21, "learning_rate": 1.5305113268608417e-05, "loss": 0.0453, "step": 90670 }, { "epoch": 35.22, "learning_rate": 1.5304595469255665e-05, "loss": 0.0071, "step": 90680 }, { "epoch": 35.22, "learning_rate": 1.5304077669902913e-05, "loss": 0.0335, "step": 90690 }, { "epoch": 35.22, "learning_rate": 1.530355987055016e-05, "loss": 0.0998, "step": 90700 }, { "epoch": 35.23, "learning_rate": 1.5303042071197413e-05, "loss": 0.1725, "step": 90710 }, { "epoch": 35.23, "learning_rate": 1.530252427184466e-05, "loss": 0.1592, "step": 90720 }, { "epoch": 35.23, "learning_rate": 1.5302006472491912e-05, "loss": 0.1254, "step": 90730 }, { "epoch": 35.24, "learning_rate": 1.530148867313916e-05, "loss": 0.1049, "step": 90740 }, { "epoch": 35.24, "learning_rate": 1.5300970873786408e-05, "loss": 0.0797, "step": 90750 }, { "epoch": 35.25, "learning_rate": 1.530045307443366e-05, "loss": 0.0189, "step": 90760 }, { "epoch": 35.25, "learning_rate": 1.5299935275080907e-05, "loss": 0.1368, "step": 90770 }, { "epoch": 35.25, "learning_rate": 1.5299417475728155e-05, "loss": 0.1494, "step": 90780 }, { "epoch": 35.26, "learning_rate": 1.5298899676375406e-05, "loss": 0.0352, "step": 90790 }, { "epoch": 35.26, "learning_rate": 1.5298381877022654e-05, "loss": 0.0026, "step": 90800 }, { "epoch": 35.27, "learning_rate": 1.5297864077669905e-05, "loss": 0.07, "step": 90810 }, { "epoch": 35.27, "learning_rate": 1.5297346278317153e-05, "loss": 0.0979, "step": 90820 }, { "epoch": 35.27, "learning_rate": 1.52968284789644e-05, "loss": 0.209, "step": 90830 }, { "epoch": 35.28, "learning_rate": 1.5296310679611653e-05, "loss": 0.1864, "step": 90840 }, { "epoch": 35.28, "learning_rate": 1.52957928802589e-05, "loss": 0.098, "step": 90850 }, { "epoch": 35.29, "learning_rate": 1.5295275080906152e-05, "loss": 0.0288, "step": 90860 }, { "epoch": 35.29, "learning_rate": 1.52947572815534e-05, "loss": 0.143, "step": 90870 }, { "epoch": 35.29, "learning_rate": 1.5294239482200648e-05, "loss": 0.0423, "step": 90880 }, { "epoch": 35.3, "learning_rate": 1.52937216828479e-05, "loss": 0.0238, "step": 90890 }, { "epoch": 35.3, "learning_rate": 1.5293203883495147e-05, "loss": 0.1259, "step": 90900 }, { "epoch": 35.3, "learning_rate": 1.5292686084142395e-05, "loss": 0.1864, "step": 90910 }, { "epoch": 35.31, "learning_rate": 1.5292168284789646e-05, "loss": 0.0438, "step": 90920 }, { "epoch": 35.31, "learning_rate": 1.5291650485436894e-05, "loss": 0.0798, "step": 90930 }, { "epoch": 35.32, "learning_rate": 1.5291132686084145e-05, "loss": 0.1231, "step": 90940 }, { "epoch": 35.32, "learning_rate": 1.5290614886731393e-05, "loss": 0.113, "step": 90950 }, { "epoch": 35.32, "learning_rate": 1.529009708737864e-05, "loss": 0.0624, "step": 90960 }, { "epoch": 35.33, "learning_rate": 1.5289579288025892e-05, "loss": 0.2431, "step": 90970 }, { "epoch": 35.33, "learning_rate": 1.528906148867314e-05, "loss": 0.1014, "step": 90980 }, { "epoch": 35.34, "learning_rate": 1.528854368932039e-05, "loss": 0.128, "step": 90990 }, { "epoch": 35.34, "learning_rate": 1.528802588996764e-05, "loss": 0.054, "step": 91000 }, { "epoch": 35.34, "learning_rate": 1.5287508090614888e-05, "loss": 0.0618, "step": 91010 }, { "epoch": 35.35, "learning_rate": 1.528699029126214e-05, "loss": 0.0382, "step": 91020 }, { "epoch": 35.35, "learning_rate": 1.5286472491909387e-05, "loss": 0.0492, "step": 91030 }, { "epoch": 35.36, "learning_rate": 1.5285954692556635e-05, "loss": 0.0525, "step": 91040 }, { "epoch": 35.36, "learning_rate": 1.5285436893203886e-05, "loss": 0.0844, "step": 91050 }, { "epoch": 35.36, "learning_rate": 1.5284919093851134e-05, "loss": 0.0363, "step": 91060 }, { "epoch": 35.37, "learning_rate": 1.5284401294498382e-05, "loss": 0.0989, "step": 91070 }, { "epoch": 35.37, "learning_rate": 1.5283883495145633e-05, "loss": 0.1451, "step": 91080 }, { "epoch": 35.37, "learning_rate": 1.528336569579288e-05, "loss": 0.0177, "step": 91090 }, { "epoch": 35.38, "learning_rate": 1.5282847896440132e-05, "loss": 0.0049, "step": 91100 }, { "epoch": 35.38, "learning_rate": 1.528233009708738e-05, "loss": 0.0704, "step": 91110 }, { "epoch": 35.39, "learning_rate": 1.5281812297734628e-05, "loss": 0.1944, "step": 91120 }, { "epoch": 35.39, "learning_rate": 1.5281294498381876e-05, "loss": 0.0591, "step": 91130 }, { "epoch": 35.39, "learning_rate": 1.5280776699029128e-05, "loss": 0.0298, "step": 91140 }, { "epoch": 35.4, "learning_rate": 1.5280258899676375e-05, "loss": 0.0935, "step": 91150 }, { "epoch": 35.4, "learning_rate": 1.5279741100323627e-05, "loss": 0.0404, "step": 91160 }, { "epoch": 35.41, "learning_rate": 1.5279223300970875e-05, "loss": 0.047, "step": 91170 }, { "epoch": 35.41, "learning_rate": 1.5278705501618126e-05, "loss": 0.1165, "step": 91180 }, { "epoch": 35.41, "learning_rate": 1.5278187702265374e-05, "loss": 0.1042, "step": 91190 }, { "epoch": 35.42, "learning_rate": 1.5277669902912622e-05, "loss": 0.0404, "step": 91200 }, { "epoch": 35.42, "learning_rate": 1.527715210355987e-05, "loss": 0.2282, "step": 91210 }, { "epoch": 35.43, "learning_rate": 1.527663430420712e-05, "loss": 0.1435, "step": 91220 }, { "epoch": 35.43, "learning_rate": 1.527611650485437e-05, "loss": 0.1177, "step": 91230 }, { "epoch": 35.43, "learning_rate": 1.527559870550162e-05, "loss": 0.1038, "step": 91240 }, { "epoch": 35.44, "learning_rate": 1.5275080906148868e-05, "loss": 0.0036, "step": 91250 }, { "epoch": 35.44, "learning_rate": 1.527456310679612e-05, "loss": 0.04, "step": 91260 }, { "epoch": 35.44, "learning_rate": 1.5274045307443367e-05, "loss": 0.0638, "step": 91270 }, { "epoch": 35.45, "learning_rate": 1.5273527508090615e-05, "loss": 0.0416, "step": 91280 }, { "epoch": 35.45, "learning_rate": 1.5273009708737863e-05, "loss": 0.0266, "step": 91290 }, { "epoch": 35.46, "learning_rate": 1.5272491909385115e-05, "loss": 0.206, "step": 91300 }, { "epoch": 35.46, "learning_rate": 1.5271974110032363e-05, "loss": 0.0846, "step": 91310 }, { "epoch": 35.46, "learning_rate": 1.5271456310679614e-05, "loss": 0.2436, "step": 91320 }, { "epoch": 35.47, "learning_rate": 1.5270938511326862e-05, "loss": 0.0828, "step": 91330 }, { "epoch": 35.47, "learning_rate": 1.5270420711974113e-05, "loss": 0.1973, "step": 91340 }, { "epoch": 35.48, "learning_rate": 1.526990291262136e-05, "loss": 0.2494, "step": 91350 }, { "epoch": 35.48, "learning_rate": 1.526938511326861e-05, "loss": 0.0325, "step": 91360 }, { "epoch": 35.48, "learning_rate": 1.5268867313915857e-05, "loss": 0.0661, "step": 91370 }, { "epoch": 35.49, "learning_rate": 1.5268349514563108e-05, "loss": 0.064, "step": 91380 }, { "epoch": 35.49, "learning_rate": 1.5267831715210356e-05, "loss": 0.1749, "step": 91390 }, { "epoch": 35.5, "learning_rate": 1.5267313915857607e-05, "loss": 0.0979, "step": 91400 }, { "epoch": 35.5, "learning_rate": 1.5266796116504855e-05, "loss": 0.0339, "step": 91410 }, { "epoch": 35.5, "learning_rate": 1.5266278317152107e-05, "loss": 0.0488, "step": 91420 }, { "epoch": 35.51, "learning_rate": 1.5265760517799355e-05, "loss": 0.1321, "step": 91430 }, { "epoch": 35.51, "learning_rate": 1.5265242718446602e-05, "loss": 0.1388, "step": 91440 }, { "epoch": 35.51, "learning_rate": 1.526472491909385e-05, "loss": 0.1698, "step": 91450 }, { "epoch": 35.52, "learning_rate": 1.5264207119741102e-05, "loss": 0.024, "step": 91460 }, { "epoch": 35.52, "learning_rate": 1.526368932038835e-05, "loss": 0.2111, "step": 91470 }, { "epoch": 35.53, "learning_rate": 1.52631715210356e-05, "loss": 0.0246, "step": 91480 }, { "epoch": 35.53, "learning_rate": 1.526265372168285e-05, "loss": 0.044, "step": 91490 }, { "epoch": 35.53, "learning_rate": 1.52621359223301e-05, "loss": 0.1683, "step": 91500 }, { "epoch": 35.54, "learning_rate": 1.5261618122977348e-05, "loss": 0.1492, "step": 91510 }, { "epoch": 35.54, "learning_rate": 1.5261100323624596e-05, "loss": 0.0694, "step": 91520 }, { "epoch": 35.55, "learning_rate": 1.5260582524271844e-05, "loss": 0.1864, "step": 91530 }, { "epoch": 35.55, "learning_rate": 1.5260064724919095e-05, "loss": 0.1392, "step": 91540 }, { "epoch": 35.55, "learning_rate": 1.5259546925566343e-05, "loss": 0.0915, "step": 91550 }, { "epoch": 35.56, "learning_rate": 1.5259029126213595e-05, "loss": 0.1388, "step": 91560 }, { "epoch": 35.56, "learning_rate": 1.5258511326860842e-05, "loss": 0.2212, "step": 91570 }, { "epoch": 35.57, "learning_rate": 1.5257993527508092e-05, "loss": 0.0851, "step": 91580 }, { "epoch": 35.57, "learning_rate": 1.5257475728155342e-05, "loss": 0.0389, "step": 91590 }, { "epoch": 35.57, "learning_rate": 1.525695792880259e-05, "loss": 0.1544, "step": 91600 }, { "epoch": 35.58, "learning_rate": 1.525644012944984e-05, "loss": 0.1926, "step": 91610 }, { "epoch": 35.58, "learning_rate": 1.5255922330097089e-05, "loss": 0.0887, "step": 91620 }, { "epoch": 35.58, "learning_rate": 1.5255404530744338e-05, "loss": 0.0462, "step": 91630 }, { "epoch": 35.59, "learning_rate": 1.5254886731391586e-05, "loss": 0.0546, "step": 91640 }, { "epoch": 35.59, "learning_rate": 1.5254368932038836e-05, "loss": 0.0607, "step": 91650 }, { "epoch": 35.6, "learning_rate": 1.5253851132686086e-05, "loss": 0.153, "step": 91660 }, { "epoch": 35.6, "learning_rate": 1.5253333333333335e-05, "loss": 0.042, "step": 91670 }, { "epoch": 35.6, "learning_rate": 1.5252815533980583e-05, "loss": 0.0588, "step": 91680 }, { "epoch": 35.61, "learning_rate": 1.5252297734627833e-05, "loss": 0.2418, "step": 91690 }, { "epoch": 35.61, "learning_rate": 1.5251779935275082e-05, "loss": 0.1063, "step": 91700 }, { "epoch": 35.62, "learning_rate": 1.5251262135922332e-05, "loss": 0.1134, "step": 91710 }, { "epoch": 35.62, "learning_rate": 1.525074433656958e-05, "loss": 0.098, "step": 91720 }, { "epoch": 35.62, "learning_rate": 1.525022653721683e-05, "loss": 0.1643, "step": 91730 }, { "epoch": 35.63, "learning_rate": 1.524970873786408e-05, "loss": 0.0281, "step": 91740 }, { "epoch": 35.63, "learning_rate": 1.5249190938511329e-05, "loss": 0.0417, "step": 91750 }, { "epoch": 35.63, "learning_rate": 1.5248673139158577e-05, "loss": 0.076, "step": 91760 }, { "epoch": 35.64, "learning_rate": 1.5248155339805826e-05, "loss": 0.2033, "step": 91770 }, { "epoch": 35.64, "learning_rate": 1.5247637540453076e-05, "loss": 0.2248, "step": 91780 }, { "epoch": 35.65, "learning_rate": 1.5247119741100324e-05, "loss": 0.0956, "step": 91790 }, { "epoch": 35.65, "learning_rate": 1.5246601941747573e-05, "loss": 0.1026, "step": 91800 }, { "epoch": 35.65, "learning_rate": 1.5246084142394823e-05, "loss": 0.1232, "step": 91810 }, { "epoch": 35.66, "learning_rate": 1.5245566343042073e-05, "loss": 0.0097, "step": 91820 }, { "epoch": 35.66, "learning_rate": 1.5245048543689322e-05, "loss": 0.0839, "step": 91830 }, { "epoch": 35.67, "learning_rate": 1.524453074433657e-05, "loss": 0.0494, "step": 91840 }, { "epoch": 35.67, "learning_rate": 1.524401294498382e-05, "loss": 0.0427, "step": 91850 }, { "epoch": 35.67, "learning_rate": 1.524349514563107e-05, "loss": 0.1583, "step": 91860 }, { "epoch": 35.68, "learning_rate": 1.5242977346278317e-05, "loss": 0.0148, "step": 91870 }, { "epoch": 35.68, "learning_rate": 1.5242459546925567e-05, "loss": 0.1202, "step": 91880 }, { "epoch": 35.69, "learning_rate": 1.5241941747572817e-05, "loss": 0.134, "step": 91890 }, { "epoch": 35.69, "learning_rate": 1.5241423948220066e-05, "loss": 0.1154, "step": 91900 }, { "epoch": 35.69, "learning_rate": 1.5240906148867316e-05, "loss": 0.0893, "step": 91910 }, { "epoch": 35.7, "learning_rate": 1.5240388349514564e-05, "loss": 0.1528, "step": 91920 }, { "epoch": 35.7, "learning_rate": 1.5239870550161813e-05, "loss": 0.0425, "step": 91930 }, { "epoch": 35.7, "learning_rate": 1.5239352750809061e-05, "loss": 0.1177, "step": 91940 }, { "epoch": 35.71, "learning_rate": 1.5238834951456311e-05, "loss": 0.0695, "step": 91950 }, { "epoch": 35.71, "learning_rate": 1.523831715210356e-05, "loss": 0.0931, "step": 91960 }, { "epoch": 35.72, "learning_rate": 1.523779935275081e-05, "loss": 0.0507, "step": 91970 }, { "epoch": 35.72, "learning_rate": 1.523728155339806e-05, "loss": 0.0183, "step": 91980 }, { "epoch": 35.72, "learning_rate": 1.523676375404531e-05, "loss": 0.058, "step": 91990 }, { "epoch": 35.73, "learning_rate": 1.5236245954692559e-05, "loss": 0.2062, "step": 92000 }, { "epoch": 35.73, "learning_rate": 1.5235728155339807e-05, "loss": 0.0869, "step": 92010 }, { "epoch": 35.74, "learning_rate": 1.5235210355987055e-05, "loss": 0.0437, "step": 92020 }, { "epoch": 35.74, "learning_rate": 1.5234692556634305e-05, "loss": 0.1171, "step": 92030 }, { "epoch": 35.74, "learning_rate": 1.5234174757281554e-05, "loss": 0.0176, "step": 92040 }, { "epoch": 35.75, "learning_rate": 1.5233656957928804e-05, "loss": 0.0372, "step": 92050 }, { "epoch": 35.75, "learning_rate": 1.5233139158576053e-05, "loss": 0.0092, "step": 92060 }, { "epoch": 35.76, "learning_rate": 1.5232621359223303e-05, "loss": 0.0063, "step": 92070 }, { "epoch": 35.76, "learning_rate": 1.5232103559870553e-05, "loss": 0.1541, "step": 92080 }, { "epoch": 35.76, "learning_rate": 1.5231585760517799e-05, "loss": 0.1218, "step": 92090 }, { "epoch": 35.77, "learning_rate": 1.5231067961165048e-05, "loss": 0.0731, "step": 92100 }, { "epoch": 35.77, "learning_rate": 1.5230550161812298e-05, "loss": 0.1774, "step": 92110 }, { "epoch": 35.77, "learning_rate": 1.5230032362459548e-05, "loss": 0.0748, "step": 92120 }, { "epoch": 35.78, "learning_rate": 1.5229514563106797e-05, "loss": 0.1161, "step": 92130 }, { "epoch": 35.78, "learning_rate": 1.5228996763754047e-05, "loss": 0.195, "step": 92140 }, { "epoch": 35.79, "learning_rate": 1.5228478964401297e-05, "loss": 0.1248, "step": 92150 }, { "epoch": 35.79, "learning_rate": 1.5227961165048546e-05, "loss": 0.0818, "step": 92160 }, { "epoch": 35.79, "learning_rate": 1.5227443365695792e-05, "loss": 0.0044, "step": 92170 }, { "epoch": 35.8, "learning_rate": 1.5226925566343042e-05, "loss": 0.0683, "step": 92180 }, { "epoch": 35.8, "learning_rate": 1.5226407766990292e-05, "loss": 0.0857, "step": 92190 }, { "epoch": 35.81, "learning_rate": 1.5225889967637541e-05, "loss": 0.0591, "step": 92200 }, { "epoch": 35.81, "learning_rate": 1.5225372168284791e-05, "loss": 0.0524, "step": 92210 }, { "epoch": 35.81, "learning_rate": 1.522485436893204e-05, "loss": 0.1372, "step": 92220 }, { "epoch": 35.82, "learning_rate": 1.522433656957929e-05, "loss": 0.0102, "step": 92230 }, { "epoch": 35.82, "learning_rate": 1.522381877022654e-05, "loss": 0.0712, "step": 92240 }, { "epoch": 35.83, "learning_rate": 1.5223300970873786e-05, "loss": 0.2411, "step": 92250 }, { "epoch": 35.83, "learning_rate": 1.5222783171521036e-05, "loss": 0.2539, "step": 92260 }, { "epoch": 35.83, "learning_rate": 1.5222265372168285e-05, "loss": 0.0092, "step": 92270 }, { "epoch": 35.84, "learning_rate": 1.5221747572815535e-05, "loss": 0.0935, "step": 92280 }, { "epoch": 35.84, "learning_rate": 1.5221229773462784e-05, "loss": 0.2267, "step": 92290 }, { "epoch": 35.84, "learning_rate": 1.5220711974110034e-05, "loss": 0.0235, "step": 92300 }, { "epoch": 35.85, "learning_rate": 1.5220194174757284e-05, "loss": 0.0207, "step": 92310 }, { "epoch": 35.85, "learning_rate": 1.5219676375404533e-05, "loss": 0.0079, "step": 92320 }, { "epoch": 35.86, "learning_rate": 1.521915857605178e-05, "loss": 0.1499, "step": 92330 }, { "epoch": 35.86, "learning_rate": 1.521864077669903e-05, "loss": 0.009, "step": 92340 }, { "epoch": 35.86, "learning_rate": 1.5218122977346279e-05, "loss": 0.0953, "step": 92350 }, { "epoch": 35.87, "learning_rate": 1.5217605177993528e-05, "loss": 0.0739, "step": 92360 }, { "epoch": 35.87, "learning_rate": 1.5217087378640778e-05, "loss": 0.1097, "step": 92370 }, { "epoch": 35.88, "learning_rate": 1.5216569579288028e-05, "loss": 0.118, "step": 92380 }, { "epoch": 35.88, "learning_rate": 1.5216051779935277e-05, "loss": 0.0331, "step": 92390 }, { "epoch": 35.88, "learning_rate": 1.5215533980582527e-05, "loss": 0.1512, "step": 92400 }, { "epoch": 35.89, "learning_rate": 1.5215016181229773e-05, "loss": 0.0779, "step": 92410 }, { "epoch": 35.89, "learning_rate": 1.5214498381877023e-05, "loss": 0.008, "step": 92420 }, { "epoch": 35.9, "learning_rate": 1.5213980582524272e-05, "loss": 0.1453, "step": 92430 }, { "epoch": 35.9, "learning_rate": 1.5213462783171522e-05, "loss": 0.0238, "step": 92440 }, { "epoch": 35.9, "learning_rate": 1.5212944983818772e-05, "loss": 0.1026, "step": 92450 }, { "epoch": 35.91, "learning_rate": 1.5212427184466021e-05, "loss": 0.0979, "step": 92460 }, { "epoch": 35.91, "learning_rate": 1.521190938511327e-05, "loss": 0.1086, "step": 92470 }, { "epoch": 35.91, "learning_rate": 1.521139158576052e-05, "loss": 0.0119, "step": 92480 }, { "epoch": 35.92, "learning_rate": 1.5210873786407767e-05, "loss": 0.0634, "step": 92490 }, { "epoch": 35.92, "learning_rate": 1.5210355987055016e-05, "loss": 0.1265, "step": 92500 }, { "epoch": 35.93, "learning_rate": 1.5209838187702266e-05, "loss": 0.1179, "step": 92510 }, { "epoch": 35.93, "learning_rate": 1.5209320388349516e-05, "loss": 0.1409, "step": 92520 }, { "epoch": 35.93, "learning_rate": 1.5208802588996765e-05, "loss": 0.1379, "step": 92530 }, { "epoch": 35.94, "learning_rate": 1.5208284789644015e-05, "loss": 0.1276, "step": 92540 }, { "epoch": 35.94, "learning_rate": 1.5207766990291264e-05, "loss": 0.0254, "step": 92550 }, { "epoch": 35.95, "learning_rate": 1.5207249190938514e-05, "loss": 0.1353, "step": 92560 }, { "epoch": 35.95, "learning_rate": 1.5206731391585764e-05, "loss": 0.0549, "step": 92570 }, { "epoch": 35.95, "learning_rate": 1.520621359223301e-05, "loss": 0.0162, "step": 92580 }, { "epoch": 35.96, "learning_rate": 1.520569579288026e-05, "loss": 0.0698, "step": 92590 }, { "epoch": 35.96, "learning_rate": 1.5205177993527509e-05, "loss": 0.0192, "step": 92600 }, { "epoch": 35.97, "learning_rate": 1.5204660194174759e-05, "loss": 0.0527, "step": 92610 }, { "epoch": 35.97, "learning_rate": 1.5204142394822008e-05, "loss": 0.1091, "step": 92620 }, { "epoch": 35.97, "learning_rate": 1.5203624595469258e-05, "loss": 0.0307, "step": 92630 }, { "epoch": 35.98, "learning_rate": 1.5203106796116508e-05, "loss": 0.1475, "step": 92640 }, { "epoch": 35.98, "learning_rate": 1.5202588996763757e-05, "loss": 0.1601, "step": 92650 }, { "epoch": 35.98, "learning_rate": 1.5202071197411003e-05, "loss": 0.0768, "step": 92660 }, { "epoch": 35.99, "learning_rate": 1.5201553398058253e-05, "loss": 0.1309, "step": 92670 }, { "epoch": 35.99, "learning_rate": 1.5201035598705503e-05, "loss": 0.0855, "step": 92680 }, { "epoch": 36.0, "learning_rate": 1.5200517799352752e-05, "loss": 0.0709, "step": 92690 }, { "epoch": 36.0, "learning_rate": 1.5200000000000002e-05, "loss": 0.0617, "step": 92700 }, { "epoch": 36.0, "eval_accuracy": 0.9474552957359009, "eval_loss": 0.3130703270435333, "eval_runtime": 8.2696, "eval_samples_per_second": 439.562, "eval_steps_per_second": 55.021, "step": 92700 }, { "epoch": 36.0, "learning_rate": 1.5199482200647251e-05, "loss": 0.0318, "step": 92710 }, { "epoch": 36.01, "learning_rate": 1.5198964401294501e-05, "loss": 0.031, "step": 92720 }, { "epoch": 36.01, "learning_rate": 1.5198446601941749e-05, "loss": 0.2634, "step": 92730 }, { "epoch": 36.02, "learning_rate": 1.5197928802588997e-05, "loss": 0.0569, "step": 92740 }, { "epoch": 36.02, "learning_rate": 1.5197411003236247e-05, "loss": 0.0431, "step": 92750 }, { "epoch": 36.02, "learning_rate": 1.5196893203883496e-05, "loss": 0.0897, "step": 92760 }, { "epoch": 36.03, "learning_rate": 1.5196375404530746e-05, "loss": 0.058, "step": 92770 }, { "epoch": 36.03, "learning_rate": 1.5195857605177995e-05, "loss": 0.0784, "step": 92780 }, { "epoch": 36.03, "learning_rate": 1.5195339805825245e-05, "loss": 0.0403, "step": 92790 }, { "epoch": 36.04, "learning_rate": 1.5194822006472495e-05, "loss": 0.0583, "step": 92800 }, { "epoch": 36.04, "learning_rate": 1.5194304207119743e-05, "loss": 0.12, "step": 92810 }, { "epoch": 36.05, "learning_rate": 1.519378640776699e-05, "loss": 0.1237, "step": 92820 }, { "epoch": 36.05, "learning_rate": 1.519326860841424e-05, "loss": 0.1155, "step": 92830 }, { "epoch": 36.05, "learning_rate": 1.519275080906149e-05, "loss": 0.0005, "step": 92840 }, { "epoch": 36.06, "learning_rate": 1.519223300970874e-05, "loss": 0.0209, "step": 92850 }, { "epoch": 36.06, "learning_rate": 1.5191715210355989e-05, "loss": 0.064, "step": 92860 }, { "epoch": 36.07, "learning_rate": 1.5191197411003239e-05, "loss": 0.0345, "step": 92870 }, { "epoch": 36.07, "learning_rate": 1.5190679611650487e-05, "loss": 0.3539, "step": 92880 }, { "epoch": 36.07, "learning_rate": 1.5190161812297736e-05, "loss": 0.3365, "step": 92890 }, { "epoch": 36.08, "learning_rate": 1.5189644012944984e-05, "loss": 0.03, "step": 92900 }, { "epoch": 36.08, "learning_rate": 1.5189126213592234e-05, "loss": 0.0983, "step": 92910 }, { "epoch": 36.09, "learning_rate": 1.5188608414239483e-05, "loss": 0.0278, "step": 92920 }, { "epoch": 36.09, "learning_rate": 1.5188090614886733e-05, "loss": 0.0401, "step": 92930 }, { "epoch": 36.09, "learning_rate": 1.5187572815533983e-05, "loss": 0.0419, "step": 92940 }, { "epoch": 36.1, "learning_rate": 1.5187055016181232e-05, "loss": 0.0264, "step": 92950 }, { "epoch": 36.1, "learning_rate": 1.518653721682848e-05, "loss": 0.0326, "step": 92960 }, { "epoch": 36.1, "learning_rate": 1.518601941747573e-05, "loss": 0.0778, "step": 92970 }, { "epoch": 36.11, "learning_rate": 1.5185501618122978e-05, "loss": 0.0923, "step": 92980 }, { "epoch": 36.11, "learning_rate": 1.5184983818770227e-05, "loss": 0.0637, "step": 92990 }, { "epoch": 36.12, "learning_rate": 1.5184466019417477e-05, "loss": 0.0541, "step": 93000 }, { "epoch": 36.12, "learning_rate": 1.5183948220064726e-05, "loss": 0.1683, "step": 93010 }, { "epoch": 36.12, "learning_rate": 1.5183430420711976e-05, "loss": 0.0294, "step": 93020 }, { "epoch": 36.13, "learning_rate": 1.5182912621359224e-05, "loss": 0.0962, "step": 93030 }, { "epoch": 36.13, "learning_rate": 1.5182394822006474e-05, "loss": 0.0985, "step": 93040 }, { "epoch": 36.14, "learning_rate": 1.5181877022653723e-05, "loss": 0.0785, "step": 93050 }, { "epoch": 36.14, "learning_rate": 1.5181359223300973e-05, "loss": 0.1859, "step": 93060 }, { "epoch": 36.14, "learning_rate": 1.518084142394822e-05, "loss": 0.2668, "step": 93070 }, { "epoch": 36.15, "learning_rate": 1.518032362459547e-05, "loss": 0.0589, "step": 93080 }, { "epoch": 36.15, "learning_rate": 1.517980582524272e-05, "loss": 0.1822, "step": 93090 }, { "epoch": 36.16, "learning_rate": 1.517928802588997e-05, "loss": 0.0393, "step": 93100 }, { "epoch": 36.16, "learning_rate": 1.5178770226537218e-05, "loss": 0.0168, "step": 93110 }, { "epoch": 36.16, "learning_rate": 1.5178252427184467e-05, "loss": 0.0191, "step": 93120 }, { "epoch": 36.17, "learning_rate": 1.5177734627831717e-05, "loss": 0.1227, "step": 93130 }, { "epoch": 36.17, "learning_rate": 1.5177216828478966e-05, "loss": 0.1024, "step": 93140 }, { "epoch": 36.17, "learning_rate": 1.5176699029126214e-05, "loss": 0.0283, "step": 93150 }, { "epoch": 36.18, "learning_rate": 1.5176181229773464e-05, "loss": 0.0716, "step": 93160 }, { "epoch": 36.18, "learning_rate": 1.5175663430420714e-05, "loss": 0.0188, "step": 93170 }, { "epoch": 36.19, "learning_rate": 1.5175145631067963e-05, "loss": 0.0383, "step": 93180 }, { "epoch": 36.19, "learning_rate": 1.5174627831715211e-05, "loss": 0.1754, "step": 93190 }, { "epoch": 36.19, "learning_rate": 1.517411003236246e-05, "loss": 0.0824, "step": 93200 }, { "epoch": 36.2, "learning_rate": 1.517359223300971e-05, "loss": 0.0598, "step": 93210 }, { "epoch": 36.2, "learning_rate": 1.517307443365696e-05, "loss": 0.1186, "step": 93220 }, { "epoch": 36.21, "learning_rate": 1.5172556634304208e-05, "loss": 0.0373, "step": 93230 }, { "epoch": 36.21, "learning_rate": 1.5172038834951458e-05, "loss": 0.0238, "step": 93240 }, { "epoch": 36.21, "learning_rate": 1.5171521035598707e-05, "loss": 0.0546, "step": 93250 }, { "epoch": 36.22, "learning_rate": 1.5171003236245955e-05, "loss": 0.0863, "step": 93260 }, { "epoch": 36.22, "learning_rate": 1.5170485436893205e-05, "loss": 0.0704, "step": 93270 }, { "epoch": 36.23, "learning_rate": 1.5169967637540454e-05, "loss": 0.1447, "step": 93280 }, { "epoch": 36.23, "learning_rate": 1.5169449838187704e-05, "loss": 0.017, "step": 93290 }, { "epoch": 36.23, "learning_rate": 1.5168932038834954e-05, "loss": 0.1501, "step": 93300 }, { "epoch": 36.24, "learning_rate": 1.5168414239482201e-05, "loss": 0.0822, "step": 93310 }, { "epoch": 36.24, "learning_rate": 1.5167896440129451e-05, "loss": 0.1364, "step": 93320 }, { "epoch": 36.24, "learning_rate": 1.51673786407767e-05, "loss": 0.0464, "step": 93330 }, { "epoch": 36.25, "learning_rate": 1.5166860841423949e-05, "loss": 0.11, "step": 93340 }, { "epoch": 36.25, "learning_rate": 1.5166343042071198e-05, "loss": 0.1619, "step": 93350 }, { "epoch": 36.26, "learning_rate": 1.5165825242718448e-05, "loss": 0.078, "step": 93360 }, { "epoch": 36.26, "learning_rate": 1.5165307443365697e-05, "loss": 0.1987, "step": 93370 }, { "epoch": 36.26, "learning_rate": 1.5164789644012947e-05, "loss": 0.0572, "step": 93380 }, { "epoch": 36.27, "learning_rate": 1.5164271844660195e-05, "loss": 0.0573, "step": 93390 }, { "epoch": 36.27, "learning_rate": 1.5163754045307445e-05, "loss": 0.0801, "step": 93400 }, { "epoch": 36.28, "learning_rate": 1.5163236245954693e-05, "loss": 0.1024, "step": 93410 }, { "epoch": 36.28, "learning_rate": 1.5162718446601942e-05, "loss": 0.0838, "step": 93420 }, { "epoch": 36.28, "learning_rate": 1.5162200647249192e-05, "loss": 0.1235, "step": 93430 }, { "epoch": 36.29, "learning_rate": 1.5161682847896441e-05, "loss": 0.0799, "step": 93440 }, { "epoch": 36.29, "learning_rate": 1.5161165048543691e-05, "loss": 0.1574, "step": 93450 }, { "epoch": 36.3, "learning_rate": 1.516064724919094e-05, "loss": 0.1974, "step": 93460 }, { "epoch": 36.3, "learning_rate": 1.5160129449838189e-05, "loss": 0.0769, "step": 93470 }, { "epoch": 36.3, "learning_rate": 1.5159611650485438e-05, "loss": 0.1302, "step": 93480 }, { "epoch": 36.31, "learning_rate": 1.5159093851132686e-05, "loss": 0.2974, "step": 93490 }, { "epoch": 36.31, "learning_rate": 1.5158576051779936e-05, "loss": 0.1041, "step": 93500 }, { "epoch": 36.31, "learning_rate": 1.5158058252427185e-05, "loss": 0.0629, "step": 93510 }, { "epoch": 36.32, "learning_rate": 1.5157540453074435e-05, "loss": 0.0975, "step": 93520 }, { "epoch": 36.32, "learning_rate": 1.5157022653721685e-05, "loss": 0.1603, "step": 93530 }, { "epoch": 36.33, "learning_rate": 1.5156504854368934e-05, "loss": 0.1948, "step": 93540 }, { "epoch": 36.33, "learning_rate": 1.5155987055016182e-05, "loss": 0.0159, "step": 93550 }, { "epoch": 36.33, "learning_rate": 1.515546925566343e-05, "loss": 0.0015, "step": 93560 }, { "epoch": 36.34, "learning_rate": 1.515495145631068e-05, "loss": 0.2686, "step": 93570 }, { "epoch": 36.34, "learning_rate": 1.515443365695793e-05, "loss": 0.147, "step": 93580 }, { "epoch": 36.35, "learning_rate": 1.5153915857605179e-05, "loss": 0.054, "step": 93590 }, { "epoch": 36.35, "learning_rate": 1.5153398058252429e-05, "loss": 0.062, "step": 93600 }, { "epoch": 36.35, "learning_rate": 1.5152880258899678e-05, "loss": 0.1067, "step": 93610 }, { "epoch": 36.36, "learning_rate": 1.5152362459546928e-05, "loss": 0.0412, "step": 93620 }, { "epoch": 36.36, "learning_rate": 1.5151844660194177e-05, "loss": 0.1015, "step": 93630 }, { "epoch": 36.37, "learning_rate": 1.5151326860841424e-05, "loss": 0.0682, "step": 93640 }, { "epoch": 36.37, "learning_rate": 1.5150809061488673e-05, "loss": 0.0034, "step": 93650 }, { "epoch": 36.37, "learning_rate": 1.5150291262135923e-05, "loss": 0.2758, "step": 93660 }, { "epoch": 36.38, "learning_rate": 1.5149773462783172e-05, "loss": 0.1171, "step": 93670 }, { "epoch": 36.38, "learning_rate": 1.5149255663430422e-05, "loss": 0.0711, "step": 93680 }, { "epoch": 36.38, "learning_rate": 1.5148737864077672e-05, "loss": 0.0192, "step": 93690 }, { "epoch": 36.39, "learning_rate": 1.5148220064724921e-05, "loss": 0.0894, "step": 93700 }, { "epoch": 36.39, "learning_rate": 1.5147702265372171e-05, "loss": 0.0952, "step": 93710 }, { "epoch": 36.4, "learning_rate": 1.5147184466019417e-05, "loss": 0.2131, "step": 93720 }, { "epoch": 36.4, "learning_rate": 1.5146666666666667e-05, "loss": 0.1252, "step": 93730 }, { "epoch": 36.4, "learning_rate": 1.5146148867313916e-05, "loss": 0.0923, "step": 93740 }, { "epoch": 36.41, "learning_rate": 1.5145631067961166e-05, "loss": 0.054, "step": 93750 }, { "epoch": 36.41, "learning_rate": 1.5145113268608416e-05, "loss": 0.0659, "step": 93760 }, { "epoch": 36.42, "learning_rate": 1.5144595469255665e-05, "loss": 0.236, "step": 93770 }, { "epoch": 36.42, "learning_rate": 1.5144077669902915e-05, "loss": 0.2097, "step": 93780 }, { "epoch": 36.42, "learning_rate": 1.5143559870550164e-05, "loss": 0.1374, "step": 93790 }, { "epoch": 36.43, "learning_rate": 1.514304207119741e-05, "loss": 0.0839, "step": 93800 }, { "epoch": 36.43, "learning_rate": 1.514252427184466e-05, "loss": 0.0563, "step": 93810 }, { "epoch": 36.43, "learning_rate": 1.514200647249191e-05, "loss": 0.1055, "step": 93820 }, { "epoch": 36.44, "learning_rate": 1.514148867313916e-05, "loss": 0.1542, "step": 93830 }, { "epoch": 36.44, "learning_rate": 1.514097087378641e-05, "loss": 0.4709, "step": 93840 }, { "epoch": 36.45, "learning_rate": 1.5140453074433659e-05, "loss": 0.0809, "step": 93850 }, { "epoch": 36.45, "learning_rate": 1.5139935275080908e-05, "loss": 0.0663, "step": 93860 }, { "epoch": 36.45, "learning_rate": 1.5139417475728158e-05, "loss": 0.0428, "step": 93870 }, { "epoch": 36.46, "learning_rate": 1.5138899676375404e-05, "loss": 0.0193, "step": 93880 }, { "epoch": 36.46, "learning_rate": 1.5138381877022654e-05, "loss": 0.1521, "step": 93890 }, { "epoch": 36.47, "learning_rate": 1.5137864077669904e-05, "loss": 0.1255, "step": 93900 }, { "epoch": 36.47, "learning_rate": 1.5137346278317153e-05, "loss": 0.0699, "step": 93910 }, { "epoch": 36.47, "learning_rate": 1.5136828478964403e-05, "loss": 0.0519, "step": 93920 }, { "epoch": 36.48, "learning_rate": 1.5136310679611652e-05, "loss": 0.0827, "step": 93930 }, { "epoch": 36.48, "learning_rate": 1.5135792880258902e-05, "loss": 0.0729, "step": 93940 }, { "epoch": 36.49, "learning_rate": 1.5135275080906152e-05, "loss": 0.1002, "step": 93950 }, { "epoch": 36.49, "learning_rate": 1.5134757281553398e-05, "loss": 0.0167, "step": 93960 }, { "epoch": 36.49, "learning_rate": 1.5134239482200647e-05, "loss": 0.0334, "step": 93970 }, { "epoch": 36.5, "learning_rate": 1.5133721682847897e-05, "loss": 0.133, "step": 93980 }, { "epoch": 36.5, "learning_rate": 1.5133203883495147e-05, "loss": 0.3595, "step": 93990 }, { "epoch": 36.5, "learning_rate": 1.5132686084142396e-05, "loss": 0.1328, "step": 94000 }, { "epoch": 36.51, "learning_rate": 1.5132168284789646e-05, "loss": 0.1484, "step": 94010 }, { "epoch": 36.51, "learning_rate": 1.5131650485436896e-05, "loss": 0.0356, "step": 94020 }, { "epoch": 36.52, "learning_rate": 1.5131132686084145e-05, "loss": 0.1089, "step": 94030 }, { "epoch": 36.52, "learning_rate": 1.5130614886731391e-05, "loss": 0.1307, "step": 94040 }, { "epoch": 36.52, "learning_rate": 1.5130097087378641e-05, "loss": 0.0854, "step": 94050 }, { "epoch": 36.53, "learning_rate": 1.512957928802589e-05, "loss": 0.1309, "step": 94060 }, { "epoch": 36.53, "learning_rate": 1.512906148867314e-05, "loss": 0.1379, "step": 94070 }, { "epoch": 36.54, "learning_rate": 1.512854368932039e-05, "loss": 0.0739, "step": 94080 }, { "epoch": 36.54, "learning_rate": 1.512802588996764e-05, "loss": 0.0581, "step": 94090 }, { "epoch": 36.54, "learning_rate": 1.5127508090614889e-05, "loss": 0.0426, "step": 94100 }, { "epoch": 36.55, "learning_rate": 1.5126990291262139e-05, "loss": 0.0306, "step": 94110 }, { "epoch": 36.55, "learning_rate": 1.5126472491909385e-05, "loss": 0.0463, "step": 94120 }, { "epoch": 36.56, "learning_rate": 1.5125954692556635e-05, "loss": 0.0458, "step": 94130 }, { "epoch": 36.56, "learning_rate": 1.5125436893203884e-05, "loss": 0.139, "step": 94140 }, { "epoch": 36.56, "learning_rate": 1.5124919093851134e-05, "loss": 0.1626, "step": 94150 }, { "epoch": 36.57, "learning_rate": 1.5124401294498383e-05, "loss": 0.1232, "step": 94160 }, { "epoch": 36.57, "learning_rate": 1.5123883495145633e-05, "loss": 0.0344, "step": 94170 }, { "epoch": 36.57, "learning_rate": 1.5123365695792883e-05, "loss": 0.0987, "step": 94180 }, { "epoch": 36.58, "learning_rate": 1.5122847896440132e-05, "loss": 0.0315, "step": 94190 }, { "epoch": 36.58, "learning_rate": 1.512233009708738e-05, "loss": 0.1932, "step": 94200 }, { "epoch": 36.59, "learning_rate": 1.5121812297734628e-05, "loss": 0.0714, "step": 94210 }, { "epoch": 36.59, "learning_rate": 1.5121294498381878e-05, "loss": 0.0703, "step": 94220 }, { "epoch": 36.59, "learning_rate": 1.5120776699029127e-05, "loss": 0.1712, "step": 94230 }, { "epoch": 36.6, "learning_rate": 1.5120258899676377e-05, "loss": 0.1755, "step": 94240 }, { "epoch": 36.6, "learning_rate": 1.5119741100323627e-05, "loss": 0.1147, "step": 94250 }, { "epoch": 36.61, "learning_rate": 1.5119223300970876e-05, "loss": 0.0209, "step": 94260 }, { "epoch": 36.61, "learning_rate": 1.5118705501618126e-05, "loss": 0.0383, "step": 94270 }, { "epoch": 36.61, "learning_rate": 1.5118187702265374e-05, "loss": 0.0615, "step": 94280 }, { "epoch": 36.62, "learning_rate": 1.5117669902912622e-05, "loss": 0.1083, "step": 94290 }, { "epoch": 36.62, "learning_rate": 1.5117152103559871e-05, "loss": 0.0946, "step": 94300 }, { "epoch": 36.63, "learning_rate": 1.5116634304207121e-05, "loss": 0.0993, "step": 94310 }, { "epoch": 36.63, "learning_rate": 1.511611650485437e-05, "loss": 0.0487, "step": 94320 }, { "epoch": 36.63, "learning_rate": 1.511559870550162e-05, "loss": 0.092, "step": 94330 }, { "epoch": 36.64, "learning_rate": 1.511508090614887e-05, "loss": 0.0882, "step": 94340 }, { "epoch": 36.64, "learning_rate": 1.5114563106796118e-05, "loss": 0.1186, "step": 94350 }, { "epoch": 36.64, "learning_rate": 1.5114045307443367e-05, "loss": 0.0575, "step": 94360 }, { "epoch": 36.65, "learning_rate": 1.5113527508090615e-05, "loss": 0.1148, "step": 94370 }, { "epoch": 36.65, "learning_rate": 1.5113009708737865e-05, "loss": 0.1553, "step": 94380 }, { "epoch": 36.66, "learning_rate": 1.5112491909385114e-05, "loss": 0.0683, "step": 94390 }, { "epoch": 36.66, "learning_rate": 1.5111974110032364e-05, "loss": 0.0664, "step": 94400 }, { "epoch": 36.66, "learning_rate": 1.5111456310679614e-05, "loss": 0.1435, "step": 94410 }, { "epoch": 36.67, "learning_rate": 1.5110938511326863e-05, "loss": 0.0857, "step": 94420 }, { "epoch": 36.67, "learning_rate": 1.5110420711974111e-05, "loss": 0.1608, "step": 94430 }, { "epoch": 36.68, "learning_rate": 1.5109902912621361e-05, "loss": 0.0275, "step": 94440 }, { "epoch": 36.68, "learning_rate": 1.5109385113268609e-05, "loss": 0.0518, "step": 94450 }, { "epoch": 36.68, "learning_rate": 1.5108867313915858e-05, "loss": 0.1279, "step": 94460 }, { "epoch": 36.69, "learning_rate": 1.5108349514563108e-05, "loss": 0.1128, "step": 94470 }, { "epoch": 36.69, "learning_rate": 1.5107831715210358e-05, "loss": 0.0688, "step": 94480 }, { "epoch": 36.7, "learning_rate": 1.5107313915857607e-05, "loss": 0.0494, "step": 94490 }, { "epoch": 36.7, "learning_rate": 1.5106796116504855e-05, "loss": 0.0097, "step": 94500 }, { "epoch": 36.7, "learning_rate": 1.5106278317152105e-05, "loss": 0.0653, "step": 94510 }, { "epoch": 36.71, "learning_rate": 1.5105760517799354e-05, "loss": 0.0618, "step": 94520 }, { "epoch": 36.71, "learning_rate": 1.5105242718446602e-05, "loss": 0.0772, "step": 94530 }, { "epoch": 36.71, "learning_rate": 1.5104724919093852e-05, "loss": 0.0782, "step": 94540 }, { "epoch": 36.72, "learning_rate": 1.5104207119741102e-05, "loss": 0.1542, "step": 94550 }, { "epoch": 36.72, "learning_rate": 1.5103689320388351e-05, "loss": 0.1254, "step": 94560 }, { "epoch": 36.73, "learning_rate": 1.51031715210356e-05, "loss": 0.028, "step": 94570 }, { "epoch": 36.73, "learning_rate": 1.5102653721682849e-05, "loss": 0.116, "step": 94580 }, { "epoch": 36.73, "learning_rate": 1.5102135922330098e-05, "loss": 0.028, "step": 94590 }, { "epoch": 36.74, "learning_rate": 1.5101618122977348e-05, "loss": 0.2148, "step": 94600 }, { "epoch": 36.74, "learning_rate": 1.5101100323624596e-05, "loss": 0.1419, "step": 94610 }, { "epoch": 36.75, "learning_rate": 1.5100582524271846e-05, "loss": 0.1406, "step": 94620 }, { "epoch": 36.75, "learning_rate": 1.5100064724919095e-05, "loss": 0.0012, "step": 94630 }, { "epoch": 36.75, "learning_rate": 1.5099546925566345e-05, "loss": 0.1851, "step": 94640 }, { "epoch": 36.76, "learning_rate": 1.5099029126213593e-05, "loss": 0.0631, "step": 94650 }, { "epoch": 36.76, "learning_rate": 1.5098511326860842e-05, "loss": 0.1667, "step": 94660 }, { "epoch": 36.77, "learning_rate": 1.5097993527508092e-05, "loss": 0.0489, "step": 94670 }, { "epoch": 36.77, "learning_rate": 1.5097475728155342e-05, "loss": 0.1357, "step": 94680 }, { "epoch": 36.77, "learning_rate": 1.509695792880259e-05, "loss": 0.0155, "step": 94690 }, { "epoch": 36.78, "learning_rate": 1.5096440129449839e-05, "loss": 0.0062, "step": 94700 }, { "epoch": 36.78, "learning_rate": 1.5095922330097089e-05, "loss": 0.0452, "step": 94710 }, { "epoch": 36.78, "learning_rate": 1.5095404530744338e-05, "loss": 0.1386, "step": 94720 }, { "epoch": 36.79, "learning_rate": 1.5094886731391586e-05, "loss": 0.0236, "step": 94730 }, { "epoch": 36.79, "learning_rate": 1.5094368932038836e-05, "loss": 0.051, "step": 94740 }, { "epoch": 36.8, "learning_rate": 1.5093851132686085e-05, "loss": 0.0336, "step": 94750 }, { "epoch": 36.8, "learning_rate": 1.5093333333333335e-05, "loss": 0.0387, "step": 94760 }, { "epoch": 36.8, "learning_rate": 1.5092815533980585e-05, "loss": 0.0035, "step": 94770 }, { "epoch": 36.81, "learning_rate": 1.5092297734627833e-05, "loss": 0.1151, "step": 94780 }, { "epoch": 36.81, "learning_rate": 1.5091779935275082e-05, "loss": 0.1709, "step": 94790 }, { "epoch": 36.82, "learning_rate": 1.5091262135922332e-05, "loss": 0.1503, "step": 94800 }, { "epoch": 36.82, "learning_rate": 1.509074433656958e-05, "loss": 0.2622, "step": 94810 }, { "epoch": 36.82, "learning_rate": 1.509022653721683e-05, "loss": 0.1608, "step": 94820 }, { "epoch": 36.83, "learning_rate": 1.5089708737864079e-05, "loss": 0.0409, "step": 94830 }, { "epoch": 36.83, "learning_rate": 1.5089190938511329e-05, "loss": 0.1265, "step": 94840 }, { "epoch": 36.83, "learning_rate": 1.5088673139158578e-05, "loss": 0.0602, "step": 94850 }, { "epoch": 36.84, "learning_rate": 1.5088155339805826e-05, "loss": 0.1065, "step": 94860 }, { "epoch": 36.84, "learning_rate": 1.5087637540453076e-05, "loss": 0.2009, "step": 94870 }, { "epoch": 36.85, "learning_rate": 1.5087119741100324e-05, "loss": 0.1632, "step": 94880 }, { "epoch": 36.85, "learning_rate": 1.5086601941747573e-05, "loss": 0.0638, "step": 94890 }, { "epoch": 36.85, "learning_rate": 1.5086084142394823e-05, "loss": 0.1538, "step": 94900 }, { "epoch": 36.86, "learning_rate": 1.5085566343042073e-05, "loss": 0.1409, "step": 94910 }, { "epoch": 36.86, "learning_rate": 1.5085048543689322e-05, "loss": 0.0334, "step": 94920 }, { "epoch": 36.87, "learning_rate": 1.5084530744336572e-05, "loss": 0.2208, "step": 94930 }, { "epoch": 36.87, "learning_rate": 1.508401294498382e-05, "loss": 0.1526, "step": 94940 }, { "epoch": 36.87, "learning_rate": 1.508349514563107e-05, "loss": 0.108, "step": 94950 }, { "epoch": 36.88, "learning_rate": 1.5082977346278317e-05, "loss": 0.1055, "step": 94960 }, { "epoch": 36.88, "learning_rate": 1.5082459546925567e-05, "loss": 0.0666, "step": 94970 }, { "epoch": 36.89, "learning_rate": 1.5081941747572817e-05, "loss": 0.0109, "step": 94980 }, { "epoch": 36.89, "learning_rate": 1.5081423948220066e-05, "loss": 0.1345, "step": 94990 }, { "epoch": 36.89, "learning_rate": 1.5080906148867316e-05, "loss": 0.0656, "step": 95000 }, { "epoch": 36.9, "learning_rate": 1.5080388349514565e-05, "loss": 0.0369, "step": 95010 }, { "epoch": 36.9, "learning_rate": 1.5079870550161813e-05, "loss": 0.0238, "step": 95020 }, { "epoch": 36.9, "learning_rate": 1.5079352750809061e-05, "loss": 0.0998, "step": 95030 }, { "epoch": 36.91, "learning_rate": 1.507883495145631e-05, "loss": 0.0762, "step": 95040 }, { "epoch": 36.91, "learning_rate": 1.507831715210356e-05, "loss": 0.1736, "step": 95050 }, { "epoch": 36.92, "learning_rate": 1.507779935275081e-05, "loss": 0.0631, "step": 95060 }, { "epoch": 36.92, "learning_rate": 1.507728155339806e-05, "loss": 0.0554, "step": 95070 }, { "epoch": 36.92, "learning_rate": 1.507676375404531e-05, "loss": 0.0357, "step": 95080 }, { "epoch": 36.93, "learning_rate": 1.5076245954692559e-05, "loss": 0.0495, "step": 95090 }, { "epoch": 36.93, "learning_rate": 1.5075728155339807e-05, "loss": 0.1017, "step": 95100 }, { "epoch": 36.94, "learning_rate": 1.5075210355987055e-05, "loss": 0.1263, "step": 95110 }, { "epoch": 36.94, "learning_rate": 1.5074692556634304e-05, "loss": 0.0821, "step": 95120 }, { "epoch": 36.94, "learning_rate": 1.5074174757281554e-05, "loss": 0.1253, "step": 95130 }, { "epoch": 36.95, "learning_rate": 1.5073656957928804e-05, "loss": 0.0292, "step": 95140 }, { "epoch": 36.95, "learning_rate": 1.5073139158576053e-05, "loss": 0.0249, "step": 95150 }, { "epoch": 36.96, "learning_rate": 1.5072621359223303e-05, "loss": 0.1046, "step": 95160 }, { "epoch": 36.96, "learning_rate": 1.5072103559870552e-05, "loss": 0.1168, "step": 95170 }, { "epoch": 36.96, "learning_rate": 1.5071585760517799e-05, "loss": 0.0756, "step": 95180 }, { "epoch": 36.97, "learning_rate": 1.5071067961165048e-05, "loss": 0.0435, "step": 95190 }, { "epoch": 36.97, "learning_rate": 1.5070550161812298e-05, "loss": 0.1471, "step": 95200 }, { "epoch": 36.97, "learning_rate": 1.5070032362459548e-05, "loss": 0.0483, "step": 95210 }, { "epoch": 36.98, "learning_rate": 1.5069514563106797e-05, "loss": 0.1216, "step": 95220 }, { "epoch": 36.98, "learning_rate": 1.5068996763754047e-05, "loss": 0.1783, "step": 95230 }, { "epoch": 36.99, "learning_rate": 1.5068478964401296e-05, "loss": 0.1369, "step": 95240 }, { "epoch": 36.99, "learning_rate": 1.5067961165048546e-05, "loss": 0.2083, "step": 95250 }, { "epoch": 36.99, "learning_rate": 1.5067443365695792e-05, "loss": 0.1072, "step": 95260 }, { "epoch": 37.0, "learning_rate": 1.5066925566343042e-05, "loss": 0.0348, "step": 95270 }, { "epoch": 37.0, "eval_accuracy": 0.9485557083906465, "eval_loss": 0.3341105878353119, "eval_runtime": 8.2275, "eval_samples_per_second": 441.813, "eval_steps_per_second": 55.303, "step": 95275 }, { "epoch": 37.0, "learning_rate": 1.5066407766990291e-05, "loss": 0.1514, "step": 95280 }, { "epoch": 37.01, "learning_rate": 1.5065889967637541e-05, "loss": 0.0645, "step": 95290 }, { "epoch": 37.01, "learning_rate": 1.506537216828479e-05, "loss": 0.0286, "step": 95300 }, { "epoch": 37.01, "learning_rate": 1.506485436893204e-05, "loss": 0.0382, "step": 95310 }, { "epoch": 37.02, "learning_rate": 1.506433656957929e-05, "loss": 0.0965, "step": 95320 }, { "epoch": 37.02, "learning_rate": 1.506381877022654e-05, "loss": 0.0798, "step": 95330 }, { "epoch": 37.03, "learning_rate": 1.506330097087379e-05, "loss": 0.1452, "step": 95340 }, { "epoch": 37.03, "learning_rate": 1.5062783171521035e-05, "loss": 0.1878, "step": 95350 }, { "epoch": 37.03, "learning_rate": 1.5062265372168285e-05, "loss": 0.0231, "step": 95360 }, { "epoch": 37.04, "learning_rate": 1.5061747572815535e-05, "loss": 0.1118, "step": 95370 }, { "epoch": 37.04, "learning_rate": 1.5061229773462784e-05, "loss": 0.068, "step": 95380 }, { "epoch": 37.04, "learning_rate": 1.5060711974110034e-05, "loss": 0.1219, "step": 95390 }, { "epoch": 37.05, "learning_rate": 1.5060194174757284e-05, "loss": 0.0682, "step": 95400 }, { "epoch": 37.05, "learning_rate": 1.5059676375404533e-05, "loss": 0.0023, "step": 95410 }, { "epoch": 37.06, "learning_rate": 1.5059158576051783e-05, "loss": 0.1945, "step": 95420 }, { "epoch": 37.06, "learning_rate": 1.5058640776699029e-05, "loss": 0.0544, "step": 95430 }, { "epoch": 37.06, "learning_rate": 1.5058122977346279e-05, "loss": 0.0305, "step": 95440 }, { "epoch": 37.07, "learning_rate": 1.5057605177993528e-05, "loss": 0.0509, "step": 95450 }, { "epoch": 37.07, "learning_rate": 1.5057087378640778e-05, "loss": 0.0857, "step": 95460 }, { "epoch": 37.08, "learning_rate": 1.5056569579288027e-05, "loss": 0.0699, "step": 95470 }, { "epoch": 37.08, "learning_rate": 1.5056051779935277e-05, "loss": 0.1782, "step": 95480 }, { "epoch": 37.08, "learning_rate": 1.5055533980582527e-05, "loss": 0.0065, "step": 95490 }, { "epoch": 37.09, "learning_rate": 1.5055016181229776e-05, "loss": 0.1969, "step": 95500 }, { "epoch": 37.09, "learning_rate": 1.5054498381877023e-05, "loss": 0.0496, "step": 95510 }, { "epoch": 37.1, "learning_rate": 1.5053980582524272e-05, "loss": 0.0877, "step": 95520 }, { "epoch": 37.1, "learning_rate": 1.5053462783171522e-05, "loss": 0.0027, "step": 95530 }, { "epoch": 37.1, "learning_rate": 1.5052944983818771e-05, "loss": 0.0872, "step": 95540 }, { "epoch": 37.11, "learning_rate": 1.5052427184466021e-05, "loss": 0.0163, "step": 95550 }, { "epoch": 37.11, "learning_rate": 1.505190938511327e-05, "loss": 0.1064, "step": 95560 }, { "epoch": 37.11, "learning_rate": 1.505139158576052e-05, "loss": 0.1265, "step": 95570 }, { "epoch": 37.12, "learning_rate": 1.505087378640777e-05, "loss": 0.0262, "step": 95580 }, { "epoch": 37.12, "learning_rate": 1.5050355987055016e-05, "loss": 0.0588, "step": 95590 }, { "epoch": 37.13, "learning_rate": 1.5049838187702266e-05, "loss": 0.0269, "step": 95600 }, { "epoch": 37.13, "learning_rate": 1.5049320388349515e-05, "loss": 0.1557, "step": 95610 }, { "epoch": 37.13, "learning_rate": 1.5048802588996765e-05, "loss": 0.0546, "step": 95620 }, { "epoch": 37.14, "learning_rate": 1.5048284789644015e-05, "loss": 0.0876, "step": 95630 }, { "epoch": 37.14, "learning_rate": 1.5047766990291264e-05, "loss": 0.1005, "step": 95640 }, { "epoch": 37.15, "learning_rate": 1.5047249190938514e-05, "loss": 0.0875, "step": 95650 }, { "epoch": 37.15, "learning_rate": 1.5046731391585763e-05, "loss": 0.0638, "step": 95660 }, { "epoch": 37.15, "learning_rate": 1.504621359223301e-05, "loss": 0.1104, "step": 95670 }, { "epoch": 37.16, "learning_rate": 1.504569579288026e-05, "loss": 0.1343, "step": 95680 }, { "epoch": 37.16, "learning_rate": 1.5045177993527509e-05, "loss": 0.0659, "step": 95690 }, { "epoch": 37.17, "learning_rate": 1.5044660194174759e-05, "loss": 0.112, "step": 95700 }, { "epoch": 37.17, "learning_rate": 1.5044142394822008e-05, "loss": 0.0828, "step": 95710 }, { "epoch": 37.17, "learning_rate": 1.5043624595469258e-05, "loss": 0.0053, "step": 95720 }, { "epoch": 37.18, "learning_rate": 1.5043106796116507e-05, "loss": 0.2158, "step": 95730 }, { "epoch": 37.18, "learning_rate": 1.5042588996763757e-05, "loss": 0.0871, "step": 95740 }, { "epoch": 37.18, "learning_rate": 1.5042071197411003e-05, "loss": 0.0393, "step": 95750 }, { "epoch": 37.19, "learning_rate": 1.5041553398058253e-05, "loss": 0.0584, "step": 95760 }, { "epoch": 37.19, "learning_rate": 1.5041035598705502e-05, "loss": 0.0033, "step": 95770 }, { "epoch": 37.2, "learning_rate": 1.5040517799352752e-05, "loss": 0.0096, "step": 95780 }, { "epoch": 37.2, "learning_rate": 1.5040000000000002e-05, "loss": 0.0433, "step": 95790 }, { "epoch": 37.2, "learning_rate": 1.5039482200647251e-05, "loss": 0.0444, "step": 95800 }, { "epoch": 37.21, "learning_rate": 1.5038964401294501e-05, "loss": 0.0372, "step": 95810 }, { "epoch": 37.21, "learning_rate": 1.5038446601941749e-05, "loss": 0.0556, "step": 95820 }, { "epoch": 37.22, "learning_rate": 1.5037928802588997e-05, "loss": 0.0269, "step": 95830 }, { "epoch": 37.22, "learning_rate": 1.5037411003236246e-05, "loss": 0.0915, "step": 95840 }, { "epoch": 37.22, "learning_rate": 1.5036893203883496e-05, "loss": 0.0237, "step": 95850 }, { "epoch": 37.23, "learning_rate": 1.5036375404530746e-05, "loss": 0.0448, "step": 95860 }, { "epoch": 37.23, "learning_rate": 1.5035857605177995e-05, "loss": 0.1056, "step": 95870 }, { "epoch": 37.23, "learning_rate": 1.5035339805825245e-05, "loss": 0.0583, "step": 95880 }, { "epoch": 37.24, "learning_rate": 1.5034822006472494e-05, "loss": 0.1452, "step": 95890 }, { "epoch": 37.24, "learning_rate": 1.5034304207119742e-05, "loss": 0.0623, "step": 95900 }, { "epoch": 37.25, "learning_rate": 1.5033786407766992e-05, "loss": 0.1408, "step": 95910 }, { "epoch": 37.25, "learning_rate": 1.503326860841424e-05, "loss": 0.0034, "step": 95920 }, { "epoch": 37.25, "learning_rate": 1.503275080906149e-05, "loss": 0.016, "step": 95930 }, { "epoch": 37.26, "learning_rate": 1.503223300970874e-05, "loss": 0.0213, "step": 95940 }, { "epoch": 37.26, "learning_rate": 1.5031715210355989e-05, "loss": 0.0211, "step": 95950 }, { "epoch": 37.27, "learning_rate": 1.5031197411003238e-05, "loss": 0.0779, "step": 95960 }, { "epoch": 37.27, "learning_rate": 1.5030679611650486e-05, "loss": 0.0905, "step": 95970 }, { "epoch": 37.27, "learning_rate": 1.5030161812297736e-05, "loss": 0.013, "step": 95980 }, { "epoch": 37.28, "learning_rate": 1.5029644012944986e-05, "loss": 0.0651, "step": 95990 }, { "epoch": 37.28, "learning_rate": 1.5029126213592234e-05, "loss": 0.0211, "step": 96000 }, { "epoch": 37.29, "learning_rate": 1.5028608414239483e-05, "loss": 0.0303, "step": 96010 }, { "epoch": 37.29, "learning_rate": 1.5028090614886733e-05, "loss": 0.0275, "step": 96020 }, { "epoch": 37.29, "learning_rate": 1.5027572815533982e-05, "loss": 0.1318, "step": 96030 }, { "epoch": 37.3, "learning_rate": 1.5027055016181232e-05, "loss": 0.0548, "step": 96040 }, { "epoch": 37.3, "learning_rate": 1.502653721682848e-05, "loss": 0.0998, "step": 96050 }, { "epoch": 37.3, "learning_rate": 1.502601941747573e-05, "loss": 0.0907, "step": 96060 }, { "epoch": 37.31, "learning_rate": 1.5025501618122979e-05, "loss": 0.0111, "step": 96070 }, { "epoch": 37.31, "learning_rate": 1.5024983818770227e-05, "loss": 0.1035, "step": 96080 }, { "epoch": 37.32, "learning_rate": 1.5024466019417477e-05, "loss": 0.0453, "step": 96090 }, { "epoch": 37.32, "learning_rate": 1.5023948220064726e-05, "loss": 0.0853, "step": 96100 }, { "epoch": 37.32, "learning_rate": 1.5023430420711976e-05, "loss": 0.0925, "step": 96110 }, { "epoch": 37.33, "learning_rate": 1.5022912621359224e-05, "loss": 0.1193, "step": 96120 }, { "epoch": 37.33, "learning_rate": 1.5022394822006473e-05, "loss": 0.1431, "step": 96130 }, { "epoch": 37.34, "learning_rate": 1.5021877022653723e-05, "loss": 0.2297, "step": 96140 }, { "epoch": 37.34, "learning_rate": 1.5021359223300973e-05, "loss": 0.0422, "step": 96150 }, { "epoch": 37.34, "learning_rate": 1.502084142394822e-05, "loss": 0.1476, "step": 96160 }, { "epoch": 37.35, "learning_rate": 1.502032362459547e-05, "loss": 0.0651, "step": 96170 }, { "epoch": 37.35, "learning_rate": 1.501980582524272e-05, "loss": 0.0756, "step": 96180 }, { "epoch": 37.36, "learning_rate": 1.501928802588997e-05, "loss": 0.0229, "step": 96190 }, { "epoch": 37.36, "learning_rate": 1.5018770226537217e-05, "loss": 0.1384, "step": 96200 }, { "epoch": 37.36, "learning_rate": 1.5018252427184467e-05, "loss": 0.1253, "step": 96210 }, { "epoch": 37.37, "learning_rate": 1.5017734627831717e-05, "loss": 0.1963, "step": 96220 }, { "epoch": 37.37, "learning_rate": 1.5017216828478966e-05, "loss": 0.1905, "step": 96230 }, { "epoch": 37.37, "learning_rate": 1.5016699029126214e-05, "loss": 0.0245, "step": 96240 }, { "epoch": 37.38, "learning_rate": 1.5016181229773464e-05, "loss": 0.0322, "step": 96250 }, { "epoch": 37.38, "learning_rate": 1.5015663430420713e-05, "loss": 0.0751, "step": 96260 }, { "epoch": 37.39, "learning_rate": 1.5015145631067963e-05, "loss": 0.1146, "step": 96270 }, { "epoch": 37.39, "learning_rate": 1.5014627831715211e-05, "loss": 0.1243, "step": 96280 }, { "epoch": 37.39, "learning_rate": 1.501411003236246e-05, "loss": 0.409, "step": 96290 }, { "epoch": 37.4, "learning_rate": 1.501359223300971e-05, "loss": 0.1193, "step": 96300 }, { "epoch": 37.4, "learning_rate": 1.501307443365696e-05, "loss": 0.1095, "step": 96310 }, { "epoch": 37.41, "learning_rate": 1.5012556634304208e-05, "loss": 0.0263, "step": 96320 }, { "epoch": 37.41, "learning_rate": 1.5012038834951457e-05, "loss": 0.0408, "step": 96330 }, { "epoch": 37.41, "learning_rate": 1.5011521035598707e-05, "loss": 0.0275, "step": 96340 }, { "epoch": 37.42, "learning_rate": 1.5011003236245955e-05, "loss": 0.0318, "step": 96350 }, { "epoch": 37.42, "learning_rate": 1.5010485436893205e-05, "loss": 0.0965, "step": 96360 }, { "epoch": 37.43, "learning_rate": 1.5009967637540454e-05, "loss": 0.0929, "step": 96370 }, { "epoch": 37.43, "learning_rate": 1.5009449838187704e-05, "loss": 0.0698, "step": 96380 }, { "epoch": 37.43, "learning_rate": 1.5008932038834953e-05, "loss": 0.0766, "step": 96390 }, { "epoch": 37.44, "learning_rate": 1.5008414239482201e-05, "loss": 0.1149, "step": 96400 }, { "epoch": 37.44, "learning_rate": 1.5007896440129451e-05, "loss": 0.1017, "step": 96410 }, { "epoch": 37.44, "learning_rate": 1.50073786407767e-05, "loss": 0.1253, "step": 96420 }, { "epoch": 37.45, "learning_rate": 1.5006860841423948e-05, "loss": 0.0344, "step": 96430 }, { "epoch": 37.45, "learning_rate": 1.5006343042071198e-05, "loss": 0.0654, "step": 96440 }, { "epoch": 37.46, "learning_rate": 1.5005825242718448e-05, "loss": 0.0243, "step": 96450 }, { "epoch": 37.46, "learning_rate": 1.5005307443365697e-05, "loss": 0.1067, "step": 96460 }, { "epoch": 37.46, "learning_rate": 1.5004789644012947e-05, "loss": 0.1098, "step": 96470 }, { "epoch": 37.47, "learning_rate": 1.5004271844660197e-05, "loss": 0.1027, "step": 96480 }, { "epoch": 37.47, "learning_rate": 1.5003754045307444e-05, "loss": 0.1278, "step": 96490 }, { "epoch": 37.48, "learning_rate": 1.5003236245954692e-05, "loss": 0.0046, "step": 96500 }, { "epoch": 37.48, "learning_rate": 1.5002718446601942e-05, "loss": 0.2438, "step": 96510 }, { "epoch": 37.48, "learning_rate": 1.5002200647249192e-05, "loss": 0.0997, "step": 96520 }, { "epoch": 37.49, "learning_rate": 1.5001682847896441e-05, "loss": 0.0094, "step": 96530 }, { "epoch": 37.49, "learning_rate": 1.5001165048543691e-05, "loss": 0.0013, "step": 96540 }, { "epoch": 37.5, "learning_rate": 1.500064724919094e-05, "loss": 0.026, "step": 96550 }, { "epoch": 37.5, "learning_rate": 1.500012944983819e-05, "loss": 0.1674, "step": 96560 }, { "epoch": 37.5, "learning_rate": 1.4999611650485438e-05, "loss": 0.0873, "step": 96570 }, { "epoch": 37.51, "learning_rate": 1.4999093851132686e-05, "loss": 0.0752, "step": 96580 }, { "epoch": 37.51, "learning_rate": 1.4998576051779936e-05, "loss": 0.0027, "step": 96590 }, { "epoch": 37.51, "learning_rate": 1.4998058252427185e-05, "loss": 0.036, "step": 96600 }, { "epoch": 37.52, "learning_rate": 1.4997540453074435e-05, "loss": 0.1156, "step": 96610 }, { "epoch": 37.52, "learning_rate": 1.4997022653721684e-05, "loss": 0.0375, "step": 96620 }, { "epoch": 37.53, "learning_rate": 1.4996504854368934e-05, "loss": 0.0554, "step": 96630 }, { "epoch": 37.53, "learning_rate": 1.4995987055016184e-05, "loss": 0.1024, "step": 96640 }, { "epoch": 37.53, "learning_rate": 1.499546925566343e-05, "loss": 0.1406, "step": 96650 }, { "epoch": 37.54, "learning_rate": 1.499495145631068e-05, "loss": 0.1546, "step": 96660 }, { "epoch": 37.54, "learning_rate": 1.4994433656957929e-05, "loss": 0.0728, "step": 96670 }, { "epoch": 37.55, "learning_rate": 1.4993915857605179e-05, "loss": 0.0944, "step": 96680 }, { "epoch": 37.55, "learning_rate": 1.4993398058252428e-05, "loss": 0.1897, "step": 96690 }, { "epoch": 37.55, "learning_rate": 1.4992880258899678e-05, "loss": 0.1632, "step": 96700 }, { "epoch": 37.56, "learning_rate": 1.4992362459546928e-05, "loss": 0.0213, "step": 96710 }, { "epoch": 37.56, "learning_rate": 1.4991844660194177e-05, "loss": 0.0678, "step": 96720 }, { "epoch": 37.57, "learning_rate": 1.4991326860841423e-05, "loss": 0.0711, "step": 96730 }, { "epoch": 37.57, "learning_rate": 1.4990809061488673e-05, "loss": 0.1229, "step": 96740 }, { "epoch": 37.57, "learning_rate": 1.4990291262135923e-05, "loss": 0.2215, "step": 96750 }, { "epoch": 37.58, "learning_rate": 1.4989773462783172e-05, "loss": 0.1466, "step": 96760 }, { "epoch": 37.58, "learning_rate": 1.4989255663430422e-05, "loss": 0.0234, "step": 96770 }, { "epoch": 37.58, "learning_rate": 1.4988737864077672e-05, "loss": 0.1164, "step": 96780 }, { "epoch": 37.59, "learning_rate": 1.4988220064724921e-05, "loss": 0.1013, "step": 96790 }, { "epoch": 37.59, "learning_rate": 1.498770226537217e-05, "loss": 0.2159, "step": 96800 }, { "epoch": 37.6, "learning_rate": 1.4987184466019417e-05, "loss": 0.1013, "step": 96810 }, { "epoch": 37.6, "learning_rate": 1.4986666666666667e-05, "loss": 0.2663, "step": 96820 }, { "epoch": 37.6, "learning_rate": 1.4986148867313916e-05, "loss": 0.1339, "step": 96830 }, { "epoch": 37.61, "learning_rate": 1.4985631067961166e-05, "loss": 0.175, "step": 96840 }, { "epoch": 37.61, "learning_rate": 1.4985113268608415e-05, "loss": 0.0549, "step": 96850 }, { "epoch": 37.62, "learning_rate": 1.4984595469255665e-05, "loss": 0.0305, "step": 96860 }, { "epoch": 37.62, "learning_rate": 1.4984077669902915e-05, "loss": 0.2895, "step": 96870 }, { "epoch": 37.62, "learning_rate": 1.4983559870550164e-05, "loss": 0.03, "step": 96880 }, { "epoch": 37.63, "learning_rate": 1.498304207119741e-05, "loss": 0.1723, "step": 96890 }, { "epoch": 37.63, "learning_rate": 1.498252427184466e-05, "loss": 0.0824, "step": 96900 }, { "epoch": 37.63, "learning_rate": 1.498200647249191e-05, "loss": 0.0065, "step": 96910 }, { "epoch": 37.64, "learning_rate": 1.498148867313916e-05, "loss": 0.1941, "step": 96920 }, { "epoch": 37.64, "learning_rate": 1.4980970873786409e-05, "loss": 0.0297, "step": 96930 }, { "epoch": 37.65, "learning_rate": 1.4980453074433659e-05, "loss": 0.0185, "step": 96940 }, { "epoch": 37.65, "learning_rate": 1.4979935275080908e-05, "loss": 0.0917, "step": 96950 }, { "epoch": 37.65, "learning_rate": 1.4979417475728158e-05, "loss": 0.0583, "step": 96960 }, { "epoch": 37.66, "learning_rate": 1.4978899676375404e-05, "loss": 0.0536, "step": 96970 }, { "epoch": 37.66, "learning_rate": 1.4978381877022654e-05, "loss": 0.1402, "step": 96980 }, { "epoch": 37.67, "learning_rate": 1.4977864077669903e-05, "loss": 0.0915, "step": 96990 }, { "epoch": 37.67, "learning_rate": 1.4977346278317153e-05, "loss": 0.0288, "step": 97000 }, { "epoch": 37.67, "learning_rate": 1.4976828478964403e-05, "loss": 0.1571, "step": 97010 }, { "epoch": 37.68, "learning_rate": 1.4976310679611652e-05, "loss": 0.0853, "step": 97020 }, { "epoch": 37.68, "learning_rate": 1.4975792880258902e-05, "loss": 0.2014, "step": 97030 }, { "epoch": 37.69, "learning_rate": 1.4975275080906151e-05, "loss": 0.1132, "step": 97040 }, { "epoch": 37.69, "learning_rate": 1.4974757281553401e-05, "loss": 0.0319, "step": 97050 }, { "epoch": 37.69, "learning_rate": 1.4974239482200647e-05, "loss": 0.1447, "step": 97060 }, { "epoch": 37.7, "learning_rate": 1.4973721682847897e-05, "loss": 0.0286, "step": 97070 }, { "epoch": 37.7, "learning_rate": 1.4973203883495147e-05, "loss": 0.0375, "step": 97080 }, { "epoch": 37.7, "learning_rate": 1.4972686084142396e-05, "loss": 0.1596, "step": 97090 }, { "epoch": 37.71, "learning_rate": 1.4972168284789646e-05, "loss": 0.1598, "step": 97100 }, { "epoch": 37.71, "learning_rate": 1.4971650485436895e-05, "loss": 0.0608, "step": 97110 }, { "epoch": 37.72, "learning_rate": 1.4971132686084145e-05, "loss": 0.14, "step": 97120 }, { "epoch": 37.72, "learning_rate": 1.4970614886731395e-05, "loss": 0.0995, "step": 97130 }, { "epoch": 37.72, "learning_rate": 1.497009708737864e-05, "loss": 0.2159, "step": 97140 }, { "epoch": 37.73, "learning_rate": 1.496957928802589e-05, "loss": 0.1292, "step": 97150 }, { "epoch": 37.73, "learning_rate": 1.496906148867314e-05, "loss": 0.0732, "step": 97160 }, { "epoch": 37.74, "learning_rate": 1.496854368932039e-05, "loss": 0.0011, "step": 97170 }, { "epoch": 37.74, "learning_rate": 1.496802588996764e-05, "loss": 0.0329, "step": 97180 }, { "epoch": 37.74, "learning_rate": 1.4967508090614889e-05, "loss": 0.0605, "step": 97190 }, { "epoch": 37.75, "learning_rate": 1.4966990291262139e-05, "loss": 0.1259, "step": 97200 }, { "epoch": 37.75, "learning_rate": 1.4966472491909388e-05, "loss": 0.0552, "step": 97210 }, { "epoch": 37.76, "learning_rate": 1.4965954692556634e-05, "loss": 0.0562, "step": 97220 }, { "epoch": 37.76, "learning_rate": 1.4965436893203884e-05, "loss": 0.0922, "step": 97230 }, { "epoch": 37.76, "learning_rate": 1.4964919093851134e-05, "loss": 0.2566, "step": 97240 }, { "epoch": 37.77, "learning_rate": 1.4964401294498383e-05, "loss": 0.0814, "step": 97250 }, { "epoch": 37.77, "learning_rate": 1.4963883495145633e-05, "loss": 0.0759, "step": 97260 }, { "epoch": 37.77, "learning_rate": 1.4963365695792882e-05, "loss": 0.0873, "step": 97270 }, { "epoch": 37.78, "learning_rate": 1.4962847896440132e-05, "loss": 0.0482, "step": 97280 }, { "epoch": 37.78, "learning_rate": 1.496233009708738e-05, "loss": 0.1491, "step": 97290 }, { "epoch": 37.79, "learning_rate": 1.4961812297734628e-05, "loss": 0.1066, "step": 97300 }, { "epoch": 37.79, "learning_rate": 1.4961294498381878e-05, "loss": 0.0982, "step": 97310 }, { "epoch": 37.79, "learning_rate": 1.4960776699029127e-05, "loss": 0.1538, "step": 97320 }, { "epoch": 37.8, "learning_rate": 1.4960258899676377e-05, "loss": 0.1668, "step": 97330 }, { "epoch": 37.8, "learning_rate": 1.4959741100323626e-05, "loss": 0.1228, "step": 97340 }, { "epoch": 37.81, "learning_rate": 1.4959223300970876e-05, "loss": 0.1134, "step": 97350 }, { "epoch": 37.81, "learning_rate": 1.4958705501618126e-05, "loss": 0.1175, "step": 97360 }, { "epoch": 37.81, "learning_rate": 1.4958187702265374e-05, "loss": 0.2772, "step": 97370 }, { "epoch": 37.82, "learning_rate": 1.4957669902912622e-05, "loss": 0.0169, "step": 97380 }, { "epoch": 37.82, "learning_rate": 1.4957152103559871e-05, "loss": 0.0731, "step": 97390 }, { "epoch": 37.83, "learning_rate": 1.495663430420712e-05, "loss": 0.0143, "step": 97400 }, { "epoch": 37.83, "learning_rate": 1.495611650485437e-05, "loss": 0.1522, "step": 97410 }, { "epoch": 37.83, "learning_rate": 1.495559870550162e-05, "loss": 0.257, "step": 97420 }, { "epoch": 37.84, "learning_rate": 1.495508090614887e-05, "loss": 0.1106, "step": 97430 }, { "epoch": 37.84, "learning_rate": 1.4954563106796118e-05, "loss": 0.0584, "step": 97440 }, { "epoch": 37.84, "learning_rate": 1.4954045307443367e-05, "loss": 0.0513, "step": 97450 }, { "epoch": 37.85, "learning_rate": 1.4953527508090615e-05, "loss": 0.1277, "step": 97460 }, { "epoch": 37.85, "learning_rate": 1.4953009708737865e-05, "loss": 0.0379, "step": 97470 }, { "epoch": 37.86, "learning_rate": 1.4952491909385114e-05, "loss": 0.0425, "step": 97480 }, { "epoch": 37.86, "learning_rate": 1.4951974110032364e-05, "loss": 0.0308, "step": 97490 }, { "epoch": 37.86, "learning_rate": 1.4951456310679614e-05, "loss": 0.0262, "step": 97500 }, { "epoch": 37.87, "learning_rate": 1.4950938511326863e-05, "loss": 0.1337, "step": 97510 }, { "epoch": 37.87, "learning_rate": 1.4950420711974111e-05, "loss": 0.0584, "step": 97520 }, { "epoch": 37.88, "learning_rate": 1.494990291262136e-05, "loss": 0.1228, "step": 97530 }, { "epoch": 37.88, "learning_rate": 1.4949385113268609e-05, "loss": 0.058, "step": 97540 }, { "epoch": 37.88, "learning_rate": 1.4948867313915858e-05, "loss": 0.1081, "step": 97550 }, { "epoch": 37.89, "learning_rate": 1.4948349514563108e-05, "loss": 0.1565, "step": 97560 }, { "epoch": 37.89, "learning_rate": 1.4947831715210357e-05, "loss": 0.0958, "step": 97570 }, { "epoch": 37.9, "learning_rate": 1.4947313915857607e-05, "loss": 0.1235, "step": 97580 }, { "epoch": 37.9, "learning_rate": 1.4946796116504855e-05, "loss": 0.0807, "step": 97590 }, { "epoch": 37.9, "learning_rate": 1.4946278317152105e-05, "loss": 0.1023, "step": 97600 }, { "epoch": 37.91, "learning_rate": 1.4945760517799354e-05, "loss": 0.1485, "step": 97610 }, { "epoch": 37.91, "learning_rate": 1.4945242718446604e-05, "loss": 0.1425, "step": 97620 }, { "epoch": 37.91, "learning_rate": 1.4944724919093852e-05, "loss": 0.0066, "step": 97630 }, { "epoch": 37.92, "learning_rate": 1.4944207119741101e-05, "loss": 0.0868, "step": 97640 }, { "epoch": 37.92, "learning_rate": 1.4943689320388351e-05, "loss": 0.0684, "step": 97650 }, { "epoch": 37.93, "learning_rate": 1.49431715210356e-05, "loss": 0.0031, "step": 97660 }, { "epoch": 37.93, "learning_rate": 1.4942653721682849e-05, "loss": 0.134, "step": 97670 }, { "epoch": 37.93, "learning_rate": 1.4942135922330098e-05, "loss": 0.0351, "step": 97680 }, { "epoch": 37.94, "learning_rate": 1.4941618122977348e-05, "loss": 0.1142, "step": 97690 }, { "epoch": 37.94, "learning_rate": 1.4941100323624597e-05, "loss": 0.1095, "step": 97700 }, { "epoch": 37.95, "learning_rate": 1.4940582524271845e-05, "loss": 0.1249, "step": 97710 }, { "epoch": 37.95, "learning_rate": 1.4940064724919095e-05, "loss": 0.2617, "step": 97720 }, { "epoch": 37.95, "learning_rate": 1.4939546925566345e-05, "loss": 0.2503, "step": 97730 }, { "epoch": 37.96, "learning_rate": 1.4939029126213594e-05, "loss": 0.0557, "step": 97740 }, { "epoch": 37.96, "learning_rate": 1.4938511326860842e-05, "loss": 0.0771, "step": 97750 }, { "epoch": 37.97, "learning_rate": 1.4937993527508092e-05, "loss": 0.0464, "step": 97760 }, { "epoch": 37.97, "learning_rate": 1.4937475728155341e-05, "loss": 0.1281, "step": 97770 }, { "epoch": 37.97, "learning_rate": 1.4936957928802591e-05, "loss": 0.0758, "step": 97780 }, { "epoch": 37.98, "learning_rate": 1.4936440129449839e-05, "loss": 0.1397, "step": 97790 }, { "epoch": 37.98, "learning_rate": 1.4935922330097089e-05, "loss": 0.0245, "step": 97800 }, { "epoch": 37.98, "learning_rate": 1.4935404530744338e-05, "loss": 0.0273, "step": 97810 }, { "epoch": 37.99, "learning_rate": 1.4934886731391586e-05, "loss": 0.1155, "step": 97820 }, { "epoch": 37.99, "learning_rate": 1.4934368932038836e-05, "loss": 0.0088, "step": 97830 }, { "epoch": 38.0, "learning_rate": 1.4933851132686085e-05, "loss": 0.0832, "step": 97840 }, { "epoch": 38.0, "learning_rate": 1.4933333333333335e-05, "loss": 0.0057, "step": 97850 }, { "epoch": 38.0, "eval_accuracy": 0.9466299862448418, "eval_loss": 0.3225008249282837, "eval_runtime": 8.2867, "eval_samples_per_second": 438.657, "eval_steps_per_second": 54.908, "step": 97850 }, { "epoch": 38.0, "learning_rate": 1.4932815533980585e-05, "loss": 0.0213, "step": 97860 }, { "epoch": 38.01, "learning_rate": 1.4932297734627832e-05, "loss": 0.0784, "step": 97870 }, { "epoch": 38.01, "learning_rate": 1.4931779935275082e-05, "loss": 0.0805, "step": 97880 }, { "epoch": 38.02, "learning_rate": 1.4931262135922332e-05, "loss": 0.1741, "step": 97890 }, { "epoch": 38.02, "learning_rate": 1.493074433656958e-05, "loss": 0.0553, "step": 97900 }, { "epoch": 38.02, "learning_rate": 1.493022653721683e-05, "loss": 0.0756, "step": 97910 }, { "epoch": 38.03, "learning_rate": 1.4929708737864079e-05, "loss": 0.0686, "step": 97920 }, { "epoch": 38.03, "learning_rate": 1.4929190938511328e-05, "loss": 0.1228, "step": 97930 }, { "epoch": 38.03, "learning_rate": 1.4928673139158578e-05, "loss": 0.0651, "step": 97940 }, { "epoch": 38.04, "learning_rate": 1.4928155339805826e-05, "loss": 0.0689, "step": 97950 }, { "epoch": 38.04, "learning_rate": 1.4927637540453076e-05, "loss": 0.149, "step": 97960 }, { "epoch": 38.05, "learning_rate": 1.4927119741100324e-05, "loss": 0.2265, "step": 97970 }, { "epoch": 38.05, "learning_rate": 1.4926601941747573e-05, "loss": 0.043, "step": 97980 }, { "epoch": 38.05, "learning_rate": 1.4926084142394823e-05, "loss": 0.0893, "step": 97990 }, { "epoch": 38.06, "learning_rate": 1.4925566343042072e-05, "loss": 0.2239, "step": 98000 }, { "epoch": 38.06, "learning_rate": 1.4925048543689322e-05, "loss": 0.0606, "step": 98010 }, { "epoch": 38.07, "learning_rate": 1.4924530744336572e-05, "loss": 0.0445, "step": 98020 }, { "epoch": 38.07, "learning_rate": 1.492401294498382e-05, "loss": 0.2089, "step": 98030 }, { "epoch": 38.07, "learning_rate": 1.492349514563107e-05, "loss": 0.1229, "step": 98040 }, { "epoch": 38.08, "learning_rate": 1.4922977346278317e-05, "loss": 0.0649, "step": 98050 }, { "epoch": 38.08, "learning_rate": 1.4922459546925567e-05, "loss": 0.1671, "step": 98060 }, { "epoch": 38.09, "learning_rate": 1.4921941747572816e-05, "loss": 0.0612, "step": 98070 }, { "epoch": 38.09, "learning_rate": 1.4921423948220066e-05, "loss": 0.0411, "step": 98080 }, { "epoch": 38.09, "learning_rate": 1.4920906148867316e-05, "loss": 0.0514, "step": 98090 }, { "epoch": 38.1, "learning_rate": 1.4920388349514565e-05, "loss": 0.0103, "step": 98100 }, { "epoch": 38.1, "learning_rate": 1.4919870550161815e-05, "loss": 0.0283, "step": 98110 }, { "epoch": 38.1, "learning_rate": 1.4919352750809061e-05, "loss": 0.1448, "step": 98120 }, { "epoch": 38.11, "learning_rate": 1.491883495145631e-05, "loss": 0.0572, "step": 98130 }, { "epoch": 38.11, "learning_rate": 1.491831715210356e-05, "loss": 0.0059, "step": 98140 }, { "epoch": 38.12, "learning_rate": 1.491779935275081e-05, "loss": 0.0229, "step": 98150 }, { "epoch": 38.12, "learning_rate": 1.491728155339806e-05, "loss": 0.0564, "step": 98160 }, { "epoch": 38.12, "learning_rate": 1.4916763754045309e-05, "loss": 0.0138, "step": 98170 }, { "epoch": 38.13, "learning_rate": 1.4916245954692559e-05, "loss": 0.105, "step": 98180 }, { "epoch": 38.13, "learning_rate": 1.4915728155339808e-05, "loss": 0.0942, "step": 98190 }, { "epoch": 38.14, "learning_rate": 1.4915210355987055e-05, "loss": 0.0097, "step": 98200 }, { "epoch": 38.14, "learning_rate": 1.4914692556634304e-05, "loss": 0.2045, "step": 98210 }, { "epoch": 38.14, "learning_rate": 1.4914174757281554e-05, "loss": 0.0349, "step": 98220 }, { "epoch": 38.15, "learning_rate": 1.4913656957928803e-05, "loss": 0.0274, "step": 98230 }, { "epoch": 38.15, "learning_rate": 1.4913139158576053e-05, "loss": 0.014, "step": 98240 }, { "epoch": 38.16, "learning_rate": 1.4912621359223303e-05, "loss": 0.1243, "step": 98250 }, { "epoch": 38.16, "learning_rate": 1.4912103559870552e-05, "loss": 0.1181, "step": 98260 }, { "epoch": 38.16, "learning_rate": 1.4911585760517802e-05, "loss": 0.1167, "step": 98270 }, { "epoch": 38.17, "learning_rate": 1.4911067961165048e-05, "loss": 0.0165, "step": 98280 }, { "epoch": 38.17, "learning_rate": 1.4910550161812298e-05, "loss": 0.0169, "step": 98290 }, { "epoch": 38.17, "learning_rate": 1.4910032362459547e-05, "loss": 0.2017, "step": 98300 }, { "epoch": 38.18, "learning_rate": 1.4909514563106797e-05, "loss": 0.0331, "step": 98310 }, { "epoch": 38.18, "learning_rate": 1.4908996763754047e-05, "loss": 0.0805, "step": 98320 }, { "epoch": 38.19, "learning_rate": 1.4908478964401296e-05, "loss": 0.0559, "step": 98330 }, { "epoch": 38.19, "learning_rate": 1.4907961165048546e-05, "loss": 0.1904, "step": 98340 }, { "epoch": 38.19, "learning_rate": 1.4907443365695795e-05, "loss": 0.1672, "step": 98350 }, { "epoch": 38.2, "learning_rate": 1.4906925566343042e-05, "loss": 0.0821, "step": 98360 }, { "epoch": 38.2, "learning_rate": 1.4906407766990291e-05, "loss": 0.1725, "step": 98370 }, { "epoch": 38.21, "learning_rate": 1.4905889967637541e-05, "loss": 0.0242, "step": 98380 }, { "epoch": 38.21, "learning_rate": 1.490537216828479e-05, "loss": 0.1843, "step": 98390 }, { "epoch": 38.21, "learning_rate": 1.490485436893204e-05, "loss": 0.0928, "step": 98400 }, { "epoch": 38.22, "learning_rate": 1.490433656957929e-05, "loss": 0.1337, "step": 98410 }, { "epoch": 38.22, "learning_rate": 1.490381877022654e-05, "loss": 0.1252, "step": 98420 }, { "epoch": 38.23, "learning_rate": 1.4903300970873789e-05, "loss": 0.0405, "step": 98430 }, { "epoch": 38.23, "learning_rate": 1.4902783171521035e-05, "loss": 0.0574, "step": 98440 }, { "epoch": 38.23, "learning_rate": 1.4902265372168285e-05, "loss": 0.1459, "step": 98450 }, { "epoch": 38.24, "learning_rate": 1.4901747572815535e-05, "loss": 0.0598, "step": 98460 }, { "epoch": 38.24, "learning_rate": 1.4901229773462784e-05, "loss": 0.1163, "step": 98470 }, { "epoch": 38.24, "learning_rate": 1.4900711974110034e-05, "loss": 0.1276, "step": 98480 }, { "epoch": 38.25, "learning_rate": 1.4900194174757283e-05, "loss": 0.0791, "step": 98490 }, { "epoch": 38.25, "learning_rate": 1.4899676375404533e-05, "loss": 0.0288, "step": 98500 }, { "epoch": 38.26, "learning_rate": 1.4899158576051783e-05, "loss": 0.0464, "step": 98510 }, { "epoch": 38.26, "learning_rate": 1.4898640776699029e-05, "loss": 0.0077, "step": 98520 }, { "epoch": 38.26, "learning_rate": 1.4898122977346278e-05, "loss": 0.0025, "step": 98530 }, { "epoch": 38.27, "learning_rate": 1.4897605177993528e-05, "loss": 0.0439, "step": 98540 }, { "epoch": 38.27, "learning_rate": 1.4897087378640778e-05, "loss": 0.0452, "step": 98550 }, { "epoch": 38.28, "learning_rate": 1.4896569579288027e-05, "loss": 0.123, "step": 98560 }, { "epoch": 38.28, "learning_rate": 1.4896051779935277e-05, "loss": 0.0867, "step": 98570 }, { "epoch": 38.28, "learning_rate": 1.4895533980582527e-05, "loss": 0.0124, "step": 98580 }, { "epoch": 38.29, "learning_rate": 1.4895016181229776e-05, "loss": 0.0146, "step": 98590 }, { "epoch": 38.29, "learning_rate": 1.4894498381877022e-05, "loss": 0.2028, "step": 98600 }, { "epoch": 38.3, "learning_rate": 1.4893980582524272e-05, "loss": 0.0547, "step": 98610 }, { "epoch": 38.3, "learning_rate": 1.4893462783171522e-05, "loss": 0.0974, "step": 98620 }, { "epoch": 38.3, "learning_rate": 1.4892944983818771e-05, "loss": 0.1116, "step": 98630 }, { "epoch": 38.31, "learning_rate": 1.4892427184466021e-05, "loss": 0.0284, "step": 98640 }, { "epoch": 38.31, "learning_rate": 1.489190938511327e-05, "loss": 0.0312, "step": 98650 }, { "epoch": 38.31, "learning_rate": 1.489139158576052e-05, "loss": 0.0546, "step": 98660 }, { "epoch": 38.32, "learning_rate": 1.489087378640777e-05, "loss": 0.1118, "step": 98670 }, { "epoch": 38.32, "learning_rate": 1.489035598705502e-05, "loss": 0.1236, "step": 98680 }, { "epoch": 38.33, "learning_rate": 1.4889838187702266e-05, "loss": 0.0501, "step": 98690 }, { "epoch": 38.33, "learning_rate": 1.4889320388349515e-05, "loss": 0.0733, "step": 98700 }, { "epoch": 38.33, "learning_rate": 1.4888802588996765e-05, "loss": 0.1451, "step": 98710 }, { "epoch": 38.34, "learning_rate": 1.4888284789644014e-05, "loss": 0.1026, "step": 98720 }, { "epoch": 38.34, "learning_rate": 1.4887766990291264e-05, "loss": 0.0682, "step": 98730 }, { "epoch": 38.35, "learning_rate": 1.4887249190938514e-05, "loss": 0.0998, "step": 98740 }, { "epoch": 38.35, "learning_rate": 1.4886731391585763e-05, "loss": 0.3023, "step": 98750 }, { "epoch": 38.35, "learning_rate": 1.4886213592233011e-05, "loss": 0.1911, "step": 98760 }, { "epoch": 38.36, "learning_rate": 1.4885695792880259e-05, "loss": 0.0133, "step": 98770 }, { "epoch": 38.36, "learning_rate": 1.4885177993527509e-05, "loss": 0.0247, "step": 98780 }, { "epoch": 38.37, "learning_rate": 1.4884660194174758e-05, "loss": 0.0336, "step": 98790 }, { "epoch": 38.37, "learning_rate": 1.4884142394822008e-05, "loss": 0.1235, "step": 98800 }, { "epoch": 38.37, "learning_rate": 1.4883624595469258e-05, "loss": 0.0945, "step": 98810 }, { "epoch": 38.38, "learning_rate": 1.4883106796116507e-05, "loss": 0.0054, "step": 98820 }, { "epoch": 38.38, "learning_rate": 1.4882588996763757e-05, "loss": 0.3205, "step": 98830 }, { "epoch": 38.38, "learning_rate": 1.4882071197411005e-05, "loss": 0.0298, "step": 98840 }, { "epoch": 38.39, "learning_rate": 1.4881553398058253e-05, "loss": 0.0541, "step": 98850 }, { "epoch": 38.39, "learning_rate": 1.4881035598705502e-05, "loss": 0.0412, "step": 98860 }, { "epoch": 38.4, "learning_rate": 1.4880517799352752e-05, "loss": 0.0576, "step": 98870 }, { "epoch": 38.4, "learning_rate": 1.4880000000000002e-05, "loss": 0.1107, "step": 98880 }, { "epoch": 38.4, "learning_rate": 1.4879482200647251e-05, "loss": 0.0133, "step": 98890 }, { "epoch": 38.41, "learning_rate": 1.48789644012945e-05, "loss": 0.0234, "step": 98900 }, { "epoch": 38.41, "learning_rate": 1.4878446601941749e-05, "loss": 0.1651, "step": 98910 }, { "epoch": 38.42, "learning_rate": 1.4877928802588998e-05, "loss": 0.0051, "step": 98920 }, { "epoch": 38.42, "learning_rate": 1.4877411003236246e-05, "loss": 0.1336, "step": 98930 }, { "epoch": 38.42, "learning_rate": 1.4876893203883496e-05, "loss": 0.0286, "step": 98940 }, { "epoch": 38.43, "learning_rate": 1.4876375404530745e-05, "loss": 0.109, "step": 98950 }, { "epoch": 38.43, "learning_rate": 1.4875857605177995e-05, "loss": 0.0559, "step": 98960 }, { "epoch": 38.43, "learning_rate": 1.4875339805825245e-05, "loss": 0.2066, "step": 98970 }, { "epoch": 38.44, "learning_rate": 1.4874822006472494e-05, "loss": 0.1115, "step": 98980 }, { "epoch": 38.44, "learning_rate": 1.4874304207119742e-05, "loss": 0.0614, "step": 98990 }, { "epoch": 38.45, "learning_rate": 1.4873786407766992e-05, "loss": 0.1449, "step": 99000 }, { "epoch": 38.45, "learning_rate": 1.487326860841424e-05, "loss": 0.0305, "step": 99010 }, { "epoch": 38.45, "learning_rate": 1.487275080906149e-05, "loss": 0.0913, "step": 99020 }, { "epoch": 38.46, "learning_rate": 1.4872233009708739e-05, "loss": 0.0596, "step": 99030 }, { "epoch": 38.46, "learning_rate": 1.4871715210355989e-05, "loss": 0.0827, "step": 99040 }, { "epoch": 38.47, "learning_rate": 1.4871197411003238e-05, "loss": 0.1773, "step": 99050 }, { "epoch": 38.47, "learning_rate": 1.4870679611650486e-05, "loss": 0.1146, "step": 99060 }, { "epoch": 38.47, "learning_rate": 1.4870161812297736e-05, "loss": 0.1473, "step": 99070 }, { "epoch": 38.48, "learning_rate": 1.4869644012944985e-05, "loss": 0.1332, "step": 99080 }, { "epoch": 38.48, "learning_rate": 1.4869126213592233e-05, "loss": 0.0673, "step": 99090 }, { "epoch": 38.49, "learning_rate": 1.4868608414239483e-05, "loss": 0.1182, "step": 99100 }, { "epoch": 38.49, "learning_rate": 1.4868090614886733e-05, "loss": 0.08, "step": 99110 }, { "epoch": 38.49, "learning_rate": 1.4867572815533982e-05, "loss": 0.0318, "step": 99120 }, { "epoch": 38.5, "learning_rate": 1.4867055016181232e-05, "loss": 0.1283, "step": 99130 }, { "epoch": 38.5, "learning_rate": 1.486653721682848e-05, "loss": 0.2015, "step": 99140 }, { "epoch": 38.5, "learning_rate": 1.486601941747573e-05, "loss": 0.1117, "step": 99150 }, { "epoch": 38.51, "learning_rate": 1.4865501618122979e-05, "loss": 0.0761, "step": 99160 }, { "epoch": 38.51, "learning_rate": 1.4864983818770227e-05, "loss": 0.1562, "step": 99170 }, { "epoch": 38.52, "learning_rate": 1.4864466019417477e-05, "loss": 0.0489, "step": 99180 }, { "epoch": 38.52, "learning_rate": 1.4863948220064726e-05, "loss": 0.1195, "step": 99190 }, { "epoch": 38.52, "learning_rate": 1.4863430420711976e-05, "loss": 0.0076, "step": 99200 }, { "epoch": 38.53, "learning_rate": 1.4862912621359224e-05, "loss": 0.118, "step": 99210 }, { "epoch": 38.53, "learning_rate": 1.4862394822006473e-05, "loss": 0.0672, "step": 99220 }, { "epoch": 38.54, "learning_rate": 1.4861877022653723e-05, "loss": 0.1215, "step": 99230 }, { "epoch": 38.54, "learning_rate": 1.4861359223300973e-05, "loss": 0.1152, "step": 99240 }, { "epoch": 38.54, "learning_rate": 1.4860841423948222e-05, "loss": 0.2221, "step": 99250 }, { "epoch": 38.55, "learning_rate": 1.486032362459547e-05, "loss": 0.0165, "step": 99260 }, { "epoch": 38.55, "learning_rate": 1.485980582524272e-05, "loss": 0.1347, "step": 99270 }, { "epoch": 38.56, "learning_rate": 1.485928802588997e-05, "loss": 0.004, "step": 99280 }, { "epoch": 38.56, "learning_rate": 1.4858770226537217e-05, "loss": 0.0853, "step": 99290 }, { "epoch": 38.56, "learning_rate": 1.4858252427184467e-05, "loss": 0.037, "step": 99300 }, { "epoch": 38.57, "learning_rate": 1.4857734627831716e-05, "loss": 0.0797, "step": 99310 }, { "epoch": 38.57, "learning_rate": 1.4857216828478966e-05, "loss": 0.0135, "step": 99320 }, { "epoch": 38.57, "learning_rate": 1.4856699029126216e-05, "loss": 0.105, "step": 99330 }, { "epoch": 38.58, "learning_rate": 1.4856181229773464e-05, "loss": 0.0979, "step": 99340 }, { "epoch": 38.58, "learning_rate": 1.4855663430420713e-05, "loss": 0.0139, "step": 99350 }, { "epoch": 38.59, "learning_rate": 1.4855145631067963e-05, "loss": 0.1495, "step": 99360 }, { "epoch": 38.59, "learning_rate": 1.485462783171521e-05, "loss": 0.0074, "step": 99370 }, { "epoch": 38.59, "learning_rate": 1.485411003236246e-05, "loss": 0.1203, "step": 99380 }, { "epoch": 38.6, "learning_rate": 1.485359223300971e-05, "loss": 0.1085, "step": 99390 }, { "epoch": 38.6, "learning_rate": 1.485307443365696e-05, "loss": 0.0567, "step": 99400 }, { "epoch": 38.61, "learning_rate": 1.485255663430421e-05, "loss": 0.0734, "step": 99410 }, { "epoch": 38.61, "learning_rate": 1.4852038834951457e-05, "loss": 0.0933, "step": 99420 }, { "epoch": 38.61, "learning_rate": 1.4851521035598707e-05, "loss": 0.0551, "step": 99430 }, { "epoch": 38.62, "learning_rate": 1.4851003236245955e-05, "loss": 0.0616, "step": 99440 }, { "epoch": 38.62, "learning_rate": 1.4850485436893204e-05, "loss": 0.0331, "step": 99450 }, { "epoch": 38.63, "learning_rate": 1.4849967637540454e-05, "loss": 0.1591, "step": 99460 }, { "epoch": 38.63, "learning_rate": 1.4849449838187704e-05, "loss": 0.1119, "step": 99470 }, { "epoch": 38.63, "learning_rate": 1.4848932038834953e-05, "loss": 0.1549, "step": 99480 }, { "epoch": 38.64, "learning_rate": 1.4848414239482203e-05, "loss": 0.1135, "step": 99490 }, { "epoch": 38.64, "learning_rate": 1.484789644012945e-05, "loss": 0.0313, "step": 99500 }, { "epoch": 38.64, "learning_rate": 1.48473786407767e-05, "loss": 0.1744, "step": 99510 }, { "epoch": 38.65, "learning_rate": 1.4846860841423948e-05, "loss": 0.1557, "step": 99520 }, { "epoch": 38.65, "learning_rate": 1.4846343042071198e-05, "loss": 0.1287, "step": 99530 }, { "epoch": 38.66, "learning_rate": 1.4845825242718448e-05, "loss": 0.0355, "step": 99540 }, { "epoch": 38.66, "learning_rate": 1.4845307443365697e-05, "loss": 0.1067, "step": 99550 }, { "epoch": 38.66, "learning_rate": 1.4844789644012947e-05, "loss": 0.024, "step": 99560 }, { "epoch": 38.67, "learning_rate": 1.4844271844660196e-05, "loss": 0.2945, "step": 99570 }, { "epoch": 38.67, "learning_rate": 1.4843754045307444e-05, "loss": 0.0981, "step": 99580 }, { "epoch": 38.68, "learning_rate": 1.4843236245954692e-05, "loss": 0.0533, "step": 99590 }, { "epoch": 38.68, "learning_rate": 1.4842718446601942e-05, "loss": 0.0963, "step": 99600 }, { "epoch": 38.68, "learning_rate": 1.4842200647249191e-05, "loss": 0.235, "step": 99610 }, { "epoch": 38.69, "learning_rate": 1.4841682847896441e-05, "loss": 0.1765, "step": 99620 }, { "epoch": 38.69, "learning_rate": 1.484116504854369e-05, "loss": 0.0639, "step": 99630 }, { "epoch": 38.7, "learning_rate": 1.484064724919094e-05, "loss": 0.1098, "step": 99640 }, { "epoch": 38.7, "learning_rate": 1.484012944983819e-05, "loss": 0.0299, "step": 99650 }, { "epoch": 38.7, "learning_rate": 1.4839611650485438e-05, "loss": 0.1043, "step": 99660 }, { "epoch": 38.71, "learning_rate": 1.4839093851132686e-05, "loss": 0.2268, "step": 99670 }, { "epoch": 38.71, "learning_rate": 1.4838576051779935e-05, "loss": 0.2019, "step": 99680 }, { "epoch": 38.71, "learning_rate": 1.4838058252427185e-05, "loss": 0.0589, "step": 99690 }, { "epoch": 38.72, "learning_rate": 1.4837540453074435e-05, "loss": 0.0805, "step": 99700 }, { "epoch": 38.72, "learning_rate": 1.4837022653721684e-05, "loss": 0.0912, "step": 99710 }, { "epoch": 38.73, "learning_rate": 1.4836504854368934e-05, "loss": 0.0487, "step": 99720 }, { "epoch": 38.73, "learning_rate": 1.4835987055016183e-05, "loss": 0.1106, "step": 99730 }, { "epoch": 38.73, "learning_rate": 1.483546925566343e-05, "loss": 0.0887, "step": 99740 }, { "epoch": 38.74, "learning_rate": 1.483495145631068e-05, "loss": 0.0291, "step": 99750 }, { "epoch": 38.74, "learning_rate": 1.4834433656957929e-05, "loss": 0.0446, "step": 99760 }, { "epoch": 38.75, "learning_rate": 1.4833915857605179e-05, "loss": 0.0352, "step": 99770 }, { "epoch": 38.75, "learning_rate": 1.4833398058252428e-05, "loss": 0.0275, "step": 99780 }, { "epoch": 38.75, "learning_rate": 1.4832880258899678e-05, "loss": 0.1322, "step": 99790 }, { "epoch": 38.76, "learning_rate": 1.4832362459546927e-05, "loss": 0.0564, "step": 99800 }, { "epoch": 38.76, "learning_rate": 1.4831844660194177e-05, "loss": 0.195, "step": 99810 }, { "epoch": 38.77, "learning_rate": 1.4831326860841427e-05, "loss": 0.0602, "step": 99820 }, { "epoch": 38.77, "learning_rate": 1.4830809061488673e-05, "loss": 0.0745, "step": 99830 }, { "epoch": 38.77, "learning_rate": 1.4830291262135923e-05, "loss": 0.1419, "step": 99840 }, { "epoch": 38.78, "learning_rate": 1.4829773462783172e-05, "loss": 0.2613, "step": 99850 }, { "epoch": 38.78, "learning_rate": 1.4829255663430422e-05, "loss": 0.2078, "step": 99860 }, { "epoch": 38.78, "learning_rate": 1.4828737864077671e-05, "loss": 0.1124, "step": 99870 }, { "epoch": 38.79, "learning_rate": 1.4828220064724921e-05, "loss": 0.1062, "step": 99880 }, { "epoch": 38.79, "learning_rate": 1.482770226537217e-05, "loss": 0.0164, "step": 99890 }, { "epoch": 38.8, "learning_rate": 1.482718446601942e-05, "loss": 0.3654, "step": 99900 }, { "epoch": 38.8, "learning_rate": 1.4826666666666666e-05, "loss": 0.0662, "step": 99910 }, { "epoch": 38.8, "learning_rate": 1.4826148867313916e-05, "loss": 0.1691, "step": 99920 }, { "epoch": 38.81, "learning_rate": 1.4825631067961166e-05, "loss": 0.0953, "step": 99930 }, { "epoch": 38.81, "learning_rate": 1.4825113268608415e-05, "loss": 0.0405, "step": 99940 }, { "epoch": 38.82, "learning_rate": 1.4824595469255665e-05, "loss": 0.0531, "step": 99950 }, { "epoch": 38.82, "learning_rate": 1.4824077669902915e-05, "loss": 0.0047, "step": 99960 }, { "epoch": 38.82, "learning_rate": 1.4823559870550164e-05, "loss": 0.0307, "step": 99970 }, { "epoch": 38.83, "learning_rate": 1.4823042071197414e-05, "loss": 0.0631, "step": 99980 }, { "epoch": 38.83, "learning_rate": 1.482252427184466e-05, "loss": 0.0663, "step": 99990 }, { "epoch": 38.83, "learning_rate": 1.482200647249191e-05, "loss": 0.0995, "step": 100000 }, { "epoch": 38.84, "learning_rate": 1.482148867313916e-05, "loss": 0.0657, "step": 100010 }, { "epoch": 38.84, "learning_rate": 1.4820970873786409e-05, "loss": 0.0476, "step": 100020 }, { "epoch": 38.85, "learning_rate": 1.4820453074433658e-05, "loss": 0.0809, "step": 100030 }, { "epoch": 38.85, "learning_rate": 1.4819935275080908e-05, "loss": 0.092, "step": 100040 }, { "epoch": 38.85, "learning_rate": 1.4819417475728158e-05, "loss": 0.025, "step": 100050 }, { "epoch": 38.86, "learning_rate": 1.4818899676375407e-05, "loss": 0.0307, "step": 100060 }, { "epoch": 38.86, "learning_rate": 1.4818381877022654e-05, "loss": 0.1003, "step": 100070 }, { "epoch": 38.87, "learning_rate": 1.4817864077669903e-05, "loss": 0.0164, "step": 100080 }, { "epoch": 38.87, "learning_rate": 1.4817346278317153e-05, "loss": 0.0425, "step": 100090 }, { "epoch": 38.87, "learning_rate": 1.4816828478964402e-05, "loss": 0.0542, "step": 100100 }, { "epoch": 38.88, "learning_rate": 1.4816310679611652e-05, "loss": 0.1034, "step": 100110 }, { "epoch": 38.88, "learning_rate": 1.4815792880258902e-05, "loss": 0.0907, "step": 100120 }, { "epoch": 38.89, "learning_rate": 1.4815275080906151e-05, "loss": 0.0755, "step": 100130 }, { "epoch": 38.89, "learning_rate": 1.4814757281553401e-05, "loss": 0.0016, "step": 100140 }, { "epoch": 38.89, "learning_rate": 1.4814239482200647e-05, "loss": 0.092, "step": 100150 }, { "epoch": 38.9, "learning_rate": 1.4813721682847897e-05, "loss": 0.0282, "step": 100160 }, { "epoch": 38.9, "learning_rate": 1.4813203883495146e-05, "loss": 0.062, "step": 100170 }, { "epoch": 38.9, "learning_rate": 1.4812686084142396e-05, "loss": 0.1852, "step": 100180 }, { "epoch": 38.91, "learning_rate": 1.4812168284789646e-05, "loss": 0.1167, "step": 100190 }, { "epoch": 38.91, "learning_rate": 1.4811650485436895e-05, "loss": 0.1023, "step": 100200 }, { "epoch": 38.92, "learning_rate": 1.4811132686084145e-05, "loss": 0.1074, "step": 100210 }, { "epoch": 38.92, "learning_rate": 1.4810614886731394e-05, "loss": 0.128, "step": 100220 }, { "epoch": 38.92, "learning_rate": 1.481009708737864e-05, "loss": 0.1094, "step": 100230 }, { "epoch": 38.93, "learning_rate": 1.480957928802589e-05, "loss": 0.153, "step": 100240 }, { "epoch": 38.93, "learning_rate": 1.480906148867314e-05, "loss": 0.0772, "step": 100250 }, { "epoch": 38.94, "learning_rate": 1.480854368932039e-05, "loss": 0.178, "step": 100260 }, { "epoch": 38.94, "learning_rate": 1.4808025889967639e-05, "loss": 0.1571, "step": 100270 }, { "epoch": 38.94, "learning_rate": 1.4807508090614889e-05, "loss": 0.0535, "step": 100280 }, { "epoch": 38.95, "learning_rate": 1.4806990291262138e-05, "loss": 0.0066, "step": 100290 }, { "epoch": 38.95, "learning_rate": 1.4806472491909388e-05, "loss": 0.0617, "step": 100300 }, { "epoch": 38.96, "learning_rate": 1.4805954692556634e-05, "loss": 0.061, "step": 100310 }, { "epoch": 38.96, "learning_rate": 1.4805436893203884e-05, "loss": 0.0989, "step": 100320 }, { "epoch": 38.96, "learning_rate": 1.4804919093851133e-05, "loss": 0.0277, "step": 100330 }, { "epoch": 38.97, "learning_rate": 1.4804401294498383e-05, "loss": 0.1652, "step": 100340 }, { "epoch": 38.97, "learning_rate": 1.4803883495145633e-05, "loss": 0.2654, "step": 100350 }, { "epoch": 38.97, "learning_rate": 1.4803365695792882e-05, "loss": 0.1039, "step": 100360 }, { "epoch": 38.98, "learning_rate": 1.4802847896440132e-05, "loss": 0.0731, "step": 100370 }, { "epoch": 38.98, "learning_rate": 1.480233009708738e-05, "loss": 0.1836, "step": 100380 }, { "epoch": 38.99, "learning_rate": 1.480181229773463e-05, "loss": 0.198, "step": 100390 }, { "epoch": 38.99, "learning_rate": 1.4801294498381877e-05, "loss": 0.0214, "step": 100400 }, { "epoch": 38.99, "learning_rate": 1.4800776699029127e-05, "loss": 0.1549, "step": 100410 }, { "epoch": 39.0, "learning_rate": 1.4800258899676377e-05, "loss": 0.0409, "step": 100420 }, { "epoch": 39.0, "eval_accuracy": 0.9482806052269601, "eval_loss": 0.3205643892288208, "eval_runtime": 8.2805, "eval_samples_per_second": 438.983, "eval_steps_per_second": 54.948, "step": 100425 }, { "epoch": 39.0, "learning_rate": 1.4799741100323626e-05, "loss": 0.0326, "step": 100430 }, { "epoch": 39.01, "learning_rate": 1.4799223300970876e-05, "loss": 0.196, "step": 100440 }, { "epoch": 39.01, "learning_rate": 1.4798705501618125e-05, "loss": 0.0952, "step": 100450 }, { "epoch": 39.01, "learning_rate": 1.4798187702265373e-05, "loss": 0.1202, "step": 100460 }, { "epoch": 39.02, "learning_rate": 1.4797669902912623e-05, "loss": 0.0551, "step": 100470 }, { "epoch": 39.02, "learning_rate": 1.4797152103559871e-05, "loss": 0.1816, "step": 100480 }, { "epoch": 39.03, "learning_rate": 1.479663430420712e-05, "loss": 0.0381, "step": 100490 }, { "epoch": 39.03, "learning_rate": 1.479611650485437e-05, "loss": 0.0738, "step": 100500 }, { "epoch": 39.03, "learning_rate": 1.479559870550162e-05, "loss": 0.0867, "step": 100510 }, { "epoch": 39.04, "learning_rate": 1.479508090614887e-05, "loss": 0.0705, "step": 100520 }, { "epoch": 39.04, "learning_rate": 1.4794563106796117e-05, "loss": 0.0706, "step": 100530 }, { "epoch": 39.04, "learning_rate": 1.4794045307443367e-05, "loss": 0.0753, "step": 100540 }, { "epoch": 39.05, "learning_rate": 1.4793527508090617e-05, "loss": 0.0529, "step": 100550 }, { "epoch": 39.05, "learning_rate": 1.4793009708737865e-05, "loss": 0.0088, "step": 100560 }, { "epoch": 39.06, "learning_rate": 1.4792491909385114e-05, "loss": 0.0159, "step": 100570 }, { "epoch": 39.06, "learning_rate": 1.4791974110032364e-05, "loss": 0.0012, "step": 100580 }, { "epoch": 39.06, "learning_rate": 1.4791456310679613e-05, "loss": 0.0753, "step": 100590 }, { "epoch": 39.07, "learning_rate": 1.4790938511326863e-05, "loss": 0.1147, "step": 100600 }, { "epoch": 39.07, "learning_rate": 1.4790420711974111e-05, "loss": 0.0201, "step": 100610 }, { "epoch": 39.08, "learning_rate": 1.478990291262136e-05, "loss": 0.1459, "step": 100620 }, { "epoch": 39.08, "learning_rate": 1.478938511326861e-05, "loss": 0.0649, "step": 100630 }, { "epoch": 39.08, "learning_rate": 1.4788867313915858e-05, "loss": 0.1094, "step": 100640 }, { "epoch": 39.09, "learning_rate": 1.4788349514563108e-05, "loss": 0.0616, "step": 100650 }, { "epoch": 39.09, "learning_rate": 1.4787831715210357e-05, "loss": 0.1036, "step": 100660 }, { "epoch": 39.1, "learning_rate": 1.4787313915857607e-05, "loss": 0.1349, "step": 100670 }, { "epoch": 39.1, "learning_rate": 1.4786796116504855e-05, "loss": 0.008, "step": 100680 }, { "epoch": 39.1, "learning_rate": 1.4786278317152104e-05, "loss": 0.0719, "step": 100690 }, { "epoch": 39.11, "learning_rate": 1.4785760517799354e-05, "loss": 0.0172, "step": 100700 }, { "epoch": 39.11, "learning_rate": 1.4785242718446604e-05, "loss": 0.0247, "step": 100710 }, { "epoch": 39.11, "learning_rate": 1.4784724919093852e-05, "loss": 0.1675, "step": 100720 }, { "epoch": 39.12, "learning_rate": 1.4784207119741101e-05, "loss": 0.0253, "step": 100730 }, { "epoch": 39.12, "learning_rate": 1.4783689320388351e-05, "loss": 0.0589, "step": 100740 }, { "epoch": 39.13, "learning_rate": 1.47831715210356e-05, "loss": 0.0433, "step": 100750 }, { "epoch": 39.13, "learning_rate": 1.4782653721682848e-05, "loss": 0.0155, "step": 100760 }, { "epoch": 39.13, "learning_rate": 1.4782135922330098e-05, "loss": 0.1465, "step": 100770 }, { "epoch": 39.14, "learning_rate": 1.4781618122977348e-05, "loss": 0.1849, "step": 100780 }, { "epoch": 39.14, "learning_rate": 1.4781100323624597e-05, "loss": 0.0778, "step": 100790 }, { "epoch": 39.15, "learning_rate": 1.4780582524271845e-05, "loss": 0.0338, "step": 100800 }, { "epoch": 39.15, "learning_rate": 1.4780064724919095e-05, "loss": 0.0299, "step": 100810 }, { "epoch": 39.15, "learning_rate": 1.4779546925566344e-05, "loss": 0.1034, "step": 100820 }, { "epoch": 39.16, "learning_rate": 1.4779029126213594e-05, "loss": 0.0614, "step": 100830 }, { "epoch": 39.16, "learning_rate": 1.4778511326860842e-05, "loss": 0.001, "step": 100840 }, { "epoch": 39.17, "learning_rate": 1.4777993527508092e-05, "loss": 0.1134, "step": 100850 }, { "epoch": 39.17, "learning_rate": 1.4777475728155341e-05, "loss": 0.1248, "step": 100860 }, { "epoch": 39.17, "learning_rate": 1.477695792880259e-05, "loss": 0.1521, "step": 100870 }, { "epoch": 39.18, "learning_rate": 1.4776440129449839e-05, "loss": 0.0232, "step": 100880 }, { "epoch": 39.18, "learning_rate": 1.4775922330097088e-05, "loss": 0.0097, "step": 100890 }, { "epoch": 39.18, "learning_rate": 1.4775404530744338e-05, "loss": 0.1155, "step": 100900 }, { "epoch": 39.19, "learning_rate": 1.4774886731391586e-05, "loss": 0.0431, "step": 100910 }, { "epoch": 39.19, "learning_rate": 1.4774368932038836e-05, "loss": 0.1555, "step": 100920 }, { "epoch": 39.2, "learning_rate": 1.4773851132686085e-05, "loss": 0.0232, "step": 100930 }, { "epoch": 39.2, "learning_rate": 1.4773333333333335e-05, "loss": 0.1632, "step": 100940 }, { "epoch": 39.2, "learning_rate": 1.4772815533980584e-05, "loss": 0.152, "step": 100950 }, { "epoch": 39.21, "learning_rate": 1.4772297734627834e-05, "loss": 0.1385, "step": 100960 }, { "epoch": 39.21, "learning_rate": 1.4771779935275082e-05, "loss": 0.0568, "step": 100970 }, { "epoch": 39.22, "learning_rate": 1.4771262135922332e-05, "loss": 0.0036, "step": 100980 }, { "epoch": 39.22, "learning_rate": 1.477074433656958e-05, "loss": 0.2125, "step": 100990 }, { "epoch": 39.22, "learning_rate": 1.4770226537216829e-05, "loss": 0.0315, "step": 101000 }, { "epoch": 39.23, "learning_rate": 1.4769708737864079e-05, "loss": 0.2239, "step": 101010 }, { "epoch": 39.23, "learning_rate": 1.4769190938511328e-05, "loss": 0.0808, "step": 101020 }, { "epoch": 39.23, "learning_rate": 1.4768673139158578e-05, "loss": 0.159, "step": 101030 }, { "epoch": 39.24, "learning_rate": 1.4768155339805828e-05, "loss": 0.1385, "step": 101040 }, { "epoch": 39.24, "learning_rate": 1.4767637540453075e-05, "loss": 0.0075, "step": 101050 }, { "epoch": 39.25, "learning_rate": 1.4767119741100323e-05, "loss": 0.0414, "step": 101060 }, { "epoch": 39.25, "learning_rate": 1.4766601941747573e-05, "loss": 0.0454, "step": 101070 }, { "epoch": 39.25, "learning_rate": 1.4766084142394823e-05, "loss": 0.1061, "step": 101080 }, { "epoch": 39.26, "learning_rate": 1.4765566343042072e-05, "loss": 0.1927, "step": 101090 }, { "epoch": 39.26, "learning_rate": 1.4765048543689322e-05, "loss": 0.131, "step": 101100 }, { "epoch": 39.27, "learning_rate": 1.4764530744336571e-05, "loss": 0.0615, "step": 101110 }, { "epoch": 39.27, "learning_rate": 1.4764012944983821e-05, "loss": 0.0475, "step": 101120 }, { "epoch": 39.27, "learning_rate": 1.4763495145631069e-05, "loss": 0.1368, "step": 101130 }, { "epoch": 39.28, "learning_rate": 1.4762977346278317e-05, "loss": 0.0901, "step": 101140 }, { "epoch": 39.28, "learning_rate": 1.4762459546925567e-05, "loss": 0.0224, "step": 101150 }, { "epoch": 39.29, "learning_rate": 1.4761941747572816e-05, "loss": 0.1132, "step": 101160 }, { "epoch": 39.29, "learning_rate": 1.4761423948220066e-05, "loss": 0.0308, "step": 101170 }, { "epoch": 39.29, "learning_rate": 1.4760906148867315e-05, "loss": 0.1596, "step": 101180 }, { "epoch": 39.3, "learning_rate": 1.4760388349514565e-05, "loss": 0.1732, "step": 101190 }, { "epoch": 39.3, "learning_rate": 1.4759870550161815e-05, "loss": 0.2038, "step": 101200 }, { "epoch": 39.3, "learning_rate": 1.4759352750809061e-05, "loss": 0.2404, "step": 101210 }, { "epoch": 39.31, "learning_rate": 1.475883495145631e-05, "loss": 0.064, "step": 101220 }, { "epoch": 39.31, "learning_rate": 1.475831715210356e-05, "loss": 0.1412, "step": 101230 }, { "epoch": 39.32, "learning_rate": 1.475779935275081e-05, "loss": 0.1361, "step": 101240 }, { "epoch": 39.32, "learning_rate": 1.475728155339806e-05, "loss": 0.0874, "step": 101250 }, { "epoch": 39.32, "learning_rate": 1.4756763754045309e-05, "loss": 0.1199, "step": 101260 }, { "epoch": 39.33, "learning_rate": 1.4756245954692559e-05, "loss": 0.0646, "step": 101270 }, { "epoch": 39.33, "learning_rate": 1.4755728155339808e-05, "loss": 0.0258, "step": 101280 }, { "epoch": 39.34, "learning_rate": 1.4755210355987054e-05, "loss": 0.0081, "step": 101290 }, { "epoch": 39.34, "learning_rate": 1.4754692556634304e-05, "loss": 0.1741, "step": 101300 }, { "epoch": 39.34, "learning_rate": 1.4754174757281554e-05, "loss": 0.1738, "step": 101310 }, { "epoch": 39.35, "learning_rate": 1.4753656957928803e-05, "loss": 0.1049, "step": 101320 }, { "epoch": 39.35, "learning_rate": 1.4753139158576053e-05, "loss": 0.0235, "step": 101330 }, { "epoch": 39.36, "learning_rate": 1.4752621359223303e-05, "loss": 0.0563, "step": 101340 }, { "epoch": 39.36, "learning_rate": 1.4752103559870552e-05, "loss": 0.1389, "step": 101350 }, { "epoch": 39.36, "learning_rate": 1.4751585760517802e-05, "loss": 0.1041, "step": 101360 }, { "epoch": 39.37, "learning_rate": 1.4751067961165048e-05, "loss": 0.0265, "step": 101370 }, { "epoch": 39.37, "learning_rate": 1.4750550161812298e-05, "loss": 0.1123, "step": 101380 }, { "epoch": 39.37, "learning_rate": 1.4750032362459547e-05, "loss": 0.0455, "step": 101390 }, { "epoch": 39.38, "learning_rate": 1.4749514563106797e-05, "loss": 0.0727, "step": 101400 }, { "epoch": 39.38, "learning_rate": 1.4748996763754046e-05, "loss": 0.1382, "step": 101410 }, { "epoch": 39.39, "learning_rate": 1.4748478964401296e-05, "loss": 0.0773, "step": 101420 }, { "epoch": 39.39, "learning_rate": 1.4747961165048546e-05, "loss": 0.0624, "step": 101430 }, { "epoch": 39.39, "learning_rate": 1.4747443365695795e-05, "loss": 0.1651, "step": 101440 }, { "epoch": 39.4, "learning_rate": 1.4746925566343042e-05, "loss": 0.0392, "step": 101450 }, { "epoch": 39.4, "learning_rate": 1.4746407766990291e-05, "loss": 0.0621, "step": 101460 }, { "epoch": 39.41, "learning_rate": 1.474588996763754e-05, "loss": 0.0484, "step": 101470 }, { "epoch": 39.41, "learning_rate": 1.474537216828479e-05, "loss": 0.1097, "step": 101480 }, { "epoch": 39.41, "learning_rate": 1.474485436893204e-05, "loss": 0.0711, "step": 101490 }, { "epoch": 39.42, "learning_rate": 1.474433656957929e-05, "loss": 0.2309, "step": 101500 }, { "epoch": 39.42, "learning_rate": 1.474381877022654e-05, "loss": 0.142, "step": 101510 }, { "epoch": 39.43, "learning_rate": 1.4743300970873789e-05, "loss": 0.0045, "step": 101520 }, { "epoch": 39.43, "learning_rate": 1.4742783171521039e-05, "loss": 0.0051, "step": 101530 }, { "epoch": 39.43, "learning_rate": 1.4742265372168285e-05, "loss": 0.0827, "step": 101540 }, { "epoch": 39.44, "learning_rate": 1.4741747572815534e-05, "loss": 0.0729, "step": 101550 }, { "epoch": 39.44, "learning_rate": 1.4741229773462784e-05, "loss": 0.1171, "step": 101560 }, { "epoch": 39.44, "learning_rate": 1.4740711974110034e-05, "loss": 0.0454, "step": 101570 }, { "epoch": 39.45, "learning_rate": 1.4740194174757283e-05, "loss": 0.0298, "step": 101580 }, { "epoch": 39.45, "learning_rate": 1.4739676375404533e-05, "loss": 0.193, "step": 101590 }, { "epoch": 39.46, "learning_rate": 1.4739158576051782e-05, "loss": 0.0743, "step": 101600 }, { "epoch": 39.46, "learning_rate": 1.4738640776699032e-05, "loss": 0.0672, "step": 101610 }, { "epoch": 39.46, "learning_rate": 1.4738122977346278e-05, "loss": 0.0298, "step": 101620 }, { "epoch": 39.47, "learning_rate": 1.4737605177993528e-05, "loss": 0.118, "step": 101630 }, { "epoch": 39.47, "learning_rate": 1.4737087378640778e-05, "loss": 0.0988, "step": 101640 }, { "epoch": 39.48, "learning_rate": 1.4736569579288027e-05, "loss": 0.0427, "step": 101650 }, { "epoch": 39.48, "learning_rate": 1.4736051779935277e-05, "loss": 0.0976, "step": 101660 }, { "epoch": 39.48, "learning_rate": 1.4735533980582526e-05, "loss": 0.0387, "step": 101670 }, { "epoch": 39.49, "learning_rate": 1.4735016181229776e-05, "loss": 0.0638, "step": 101680 }, { "epoch": 39.49, "learning_rate": 1.4734498381877026e-05, "loss": 0.1275, "step": 101690 }, { "epoch": 39.5, "learning_rate": 1.4733980582524272e-05, "loss": 0.0755, "step": 101700 }, { "epoch": 39.5, "learning_rate": 1.4733462783171521e-05, "loss": 0.0256, "step": 101710 }, { "epoch": 39.5, "learning_rate": 1.4732944983818771e-05, "loss": 0.1193, "step": 101720 }, { "epoch": 39.51, "learning_rate": 1.473242718446602e-05, "loss": 0.1469, "step": 101730 }, { "epoch": 39.51, "learning_rate": 1.473190938511327e-05, "loss": 0.1694, "step": 101740 }, { "epoch": 39.51, "learning_rate": 1.473139158576052e-05, "loss": 0.101, "step": 101750 }, { "epoch": 39.52, "learning_rate": 1.473087378640777e-05, "loss": 0.0822, "step": 101760 }, { "epoch": 39.52, "learning_rate": 1.473035598705502e-05, "loss": 0.1619, "step": 101770 }, { "epoch": 39.53, "learning_rate": 1.4729838187702265e-05, "loss": 0.1513, "step": 101780 }, { "epoch": 39.53, "learning_rate": 1.4729320388349515e-05, "loss": 0.0732, "step": 101790 }, { "epoch": 39.53, "learning_rate": 1.4728802588996765e-05, "loss": 0.0679, "step": 101800 }, { "epoch": 39.54, "learning_rate": 1.4728284789644014e-05, "loss": 0.1002, "step": 101810 }, { "epoch": 39.54, "learning_rate": 1.4727766990291264e-05, "loss": 0.0731, "step": 101820 }, { "epoch": 39.55, "learning_rate": 1.4727249190938513e-05, "loss": 0.0815, "step": 101830 }, { "epoch": 39.55, "learning_rate": 1.4726731391585763e-05, "loss": 0.011, "step": 101840 }, { "epoch": 39.55, "learning_rate": 1.4726213592233011e-05, "loss": 0.0034, "step": 101850 }, { "epoch": 39.56, "learning_rate": 1.4725695792880259e-05, "loss": 0.1259, "step": 101860 }, { "epoch": 39.56, "learning_rate": 1.4725177993527509e-05, "loss": 0.1331, "step": 101870 }, { "epoch": 39.57, "learning_rate": 1.4724660194174758e-05, "loss": 0.1252, "step": 101880 }, { "epoch": 39.57, "learning_rate": 1.4724142394822008e-05, "loss": 0.12, "step": 101890 }, { "epoch": 39.57, "learning_rate": 1.4723624595469257e-05, "loss": 0.037, "step": 101900 }, { "epoch": 39.58, "learning_rate": 1.4723106796116507e-05, "loss": 0.1042, "step": 101910 }, { "epoch": 39.58, "learning_rate": 1.4722588996763757e-05, "loss": 0.1297, "step": 101920 }, { "epoch": 39.58, "learning_rate": 1.4722071197411005e-05, "loss": 0.0754, "step": 101930 }, { "epoch": 39.59, "learning_rate": 1.4721553398058253e-05, "loss": 0.1813, "step": 101940 }, { "epoch": 39.59, "learning_rate": 1.4721035598705502e-05, "loss": 0.0029, "step": 101950 }, { "epoch": 39.6, "learning_rate": 1.4720517799352752e-05, "loss": 0.1134, "step": 101960 }, { "epoch": 39.6, "learning_rate": 1.4720000000000001e-05, "loss": 0.08, "step": 101970 }, { "epoch": 39.6, "learning_rate": 1.4719482200647251e-05, "loss": 0.0321, "step": 101980 }, { "epoch": 39.61, "learning_rate": 1.47189644012945e-05, "loss": 0.1706, "step": 101990 }, { "epoch": 39.61, "learning_rate": 1.4718446601941749e-05, "loss": 0.0078, "step": 102000 }, { "epoch": 39.62, "learning_rate": 1.4717928802588998e-05, "loss": 0.0844, "step": 102010 }, { "epoch": 39.62, "learning_rate": 1.4717411003236246e-05, "loss": 0.2054, "step": 102020 }, { "epoch": 39.62, "learning_rate": 1.4716893203883496e-05, "loss": 0.0832, "step": 102030 }, { "epoch": 39.63, "learning_rate": 1.4716375404530745e-05, "loss": 0.1389, "step": 102040 }, { "epoch": 39.63, "learning_rate": 1.4715857605177995e-05, "loss": 0.0742, "step": 102050 }, { "epoch": 39.63, "learning_rate": 1.4715339805825245e-05, "loss": 0.0016, "step": 102060 }, { "epoch": 39.64, "learning_rate": 1.4714822006472494e-05, "loss": 0.1111, "step": 102070 }, { "epoch": 39.64, "learning_rate": 1.4714304207119742e-05, "loss": 0.0486, "step": 102080 }, { "epoch": 39.65, "learning_rate": 1.4713786407766992e-05, "loss": 0.065, "step": 102090 }, { "epoch": 39.65, "learning_rate": 1.4713268608414241e-05, "loss": 0.0021, "step": 102100 }, { "epoch": 39.65, "learning_rate": 1.471275080906149e-05, "loss": 0.0348, "step": 102110 }, { "epoch": 39.66, "learning_rate": 1.4712233009708739e-05, "loss": 0.1241, "step": 102120 }, { "epoch": 39.66, "learning_rate": 1.4711715210355988e-05, "loss": 0.1321, "step": 102130 }, { "epoch": 39.67, "learning_rate": 1.4711197411003238e-05, "loss": 0.1762, "step": 102140 }, { "epoch": 39.67, "learning_rate": 1.4710679611650486e-05, "loss": 0.2518, "step": 102150 }, { "epoch": 39.67, "learning_rate": 1.4710161812297736e-05, "loss": 0.0824, "step": 102160 }, { "epoch": 39.68, "learning_rate": 1.4709644012944985e-05, "loss": 0.0206, "step": 102170 }, { "epoch": 39.68, "learning_rate": 1.4709126213592235e-05, "loss": 0.1726, "step": 102180 }, { "epoch": 39.69, "learning_rate": 1.4708608414239483e-05, "loss": 0.0871, "step": 102190 }, { "epoch": 39.69, "learning_rate": 1.4708090614886732e-05, "loss": 0.0288, "step": 102200 }, { "epoch": 39.69, "learning_rate": 1.4707572815533982e-05, "loss": 0.05, "step": 102210 }, { "epoch": 39.7, "learning_rate": 1.4707055016181232e-05, "loss": 0.0791, "step": 102220 }, { "epoch": 39.7, "learning_rate": 1.470653721682848e-05, "loss": 0.1369, "step": 102230 }, { "epoch": 39.7, "learning_rate": 1.470601941747573e-05, "loss": 0.0501, "step": 102240 }, { "epoch": 39.71, "learning_rate": 1.4705501618122979e-05, "loss": 0.0075, "step": 102250 }, { "epoch": 39.71, "learning_rate": 1.4704983818770228e-05, "loss": 0.0263, "step": 102260 }, { "epoch": 39.72, "learning_rate": 1.4704466019417476e-05, "loss": 0.1974, "step": 102270 }, { "epoch": 39.72, "learning_rate": 1.4703948220064726e-05, "loss": 0.0083, "step": 102280 }, { "epoch": 39.72, "learning_rate": 1.4703430420711976e-05, "loss": 0.1484, "step": 102290 }, { "epoch": 39.73, "learning_rate": 1.4702912621359225e-05, "loss": 0.0286, "step": 102300 }, { "epoch": 39.73, "learning_rate": 1.4702394822006473e-05, "loss": 0.1325, "step": 102310 }, { "epoch": 39.74, "learning_rate": 1.4701877022653723e-05, "loss": 0.0855, "step": 102320 }, { "epoch": 39.74, "learning_rate": 1.4701359223300972e-05, "loss": 0.1292, "step": 102330 }, { "epoch": 39.74, "learning_rate": 1.4700841423948222e-05, "loss": 0.201, "step": 102340 }, { "epoch": 39.75, "learning_rate": 1.470032362459547e-05, "loss": 0.021, "step": 102350 }, { "epoch": 39.75, "learning_rate": 1.469980582524272e-05, "loss": 0.1761, "step": 102360 }, { "epoch": 39.76, "learning_rate": 1.4699288025889969e-05, "loss": 0.0509, "step": 102370 }, { "epoch": 39.76, "learning_rate": 1.4698770226537217e-05, "loss": 0.1158, "step": 102380 }, { "epoch": 39.76, "learning_rate": 1.4698252427184467e-05, "loss": 0.082, "step": 102390 }, { "epoch": 39.77, "learning_rate": 1.4697734627831716e-05, "loss": 0.1641, "step": 102400 }, { "epoch": 39.77, "learning_rate": 1.4697216828478966e-05, "loss": 0.0757, "step": 102410 }, { "epoch": 39.77, "learning_rate": 1.4696699029126216e-05, "loss": 0.0005, "step": 102420 }, { "epoch": 39.78, "learning_rate": 1.4696181229773463e-05, "loss": 0.0969, "step": 102430 }, { "epoch": 39.78, "learning_rate": 1.4695663430420713e-05, "loss": 0.0472, "step": 102440 }, { "epoch": 39.79, "learning_rate": 1.4695145631067963e-05, "loss": 0.1683, "step": 102450 }, { "epoch": 39.79, "learning_rate": 1.469462783171521e-05, "loss": 0.1246, "step": 102460 }, { "epoch": 39.79, "learning_rate": 1.469411003236246e-05, "loss": 0.0176, "step": 102470 }, { "epoch": 39.8, "learning_rate": 1.469359223300971e-05, "loss": 0.0499, "step": 102480 }, { "epoch": 39.8, "learning_rate": 1.469307443365696e-05, "loss": 0.0536, "step": 102490 }, { "epoch": 39.81, "learning_rate": 1.4692556634304209e-05, "loss": 0.1382, "step": 102500 }, { "epoch": 39.81, "learning_rate": 1.4692038834951457e-05, "loss": 0.0762, "step": 102510 }, { "epoch": 39.81, "learning_rate": 1.4691521035598707e-05, "loss": 0.0374, "step": 102520 }, { "epoch": 39.82, "learning_rate": 1.4691003236245955e-05, "loss": 0.0447, "step": 102530 }, { "epoch": 39.82, "learning_rate": 1.4690485436893204e-05, "loss": 0.0927, "step": 102540 }, { "epoch": 39.83, "learning_rate": 1.4689967637540454e-05, "loss": 0.2104, "step": 102550 }, { "epoch": 39.83, "learning_rate": 1.4689449838187703e-05, "loss": 0.0374, "step": 102560 }, { "epoch": 39.83, "learning_rate": 1.4688932038834953e-05, "loss": 0.104, "step": 102570 }, { "epoch": 39.84, "learning_rate": 1.4688414239482203e-05, "loss": 0.0678, "step": 102580 }, { "epoch": 39.84, "learning_rate": 1.4687896440129452e-05, "loss": 0.2001, "step": 102590 }, { "epoch": 39.84, "learning_rate": 1.46873786407767e-05, "loss": 0.015, "step": 102600 }, { "epoch": 39.85, "learning_rate": 1.4686860841423948e-05, "loss": 0.0489, "step": 102610 }, { "epoch": 39.85, "learning_rate": 1.4686343042071198e-05, "loss": 0.0322, "step": 102620 }, { "epoch": 39.86, "learning_rate": 1.4685825242718447e-05, "loss": 0.0614, "step": 102630 }, { "epoch": 39.86, "learning_rate": 1.4685307443365697e-05, "loss": 0.0028, "step": 102640 }, { "epoch": 39.86, "learning_rate": 1.4684789644012947e-05, "loss": 0.093, "step": 102650 }, { "epoch": 39.87, "learning_rate": 1.4684271844660196e-05, "loss": 0.1306, "step": 102660 }, { "epoch": 39.87, "learning_rate": 1.4683754045307446e-05, "loss": 0.0899, "step": 102670 }, { "epoch": 39.88, "learning_rate": 1.4683236245954692e-05, "loss": 0.047, "step": 102680 }, { "epoch": 39.88, "learning_rate": 1.4682718446601942e-05, "loss": 0.0971, "step": 102690 }, { "epoch": 39.88, "learning_rate": 1.4682200647249191e-05, "loss": 0.0034, "step": 102700 }, { "epoch": 39.89, "learning_rate": 1.4681682847896441e-05, "loss": 0.128, "step": 102710 }, { "epoch": 39.89, "learning_rate": 1.468116504854369e-05, "loss": 0.1238, "step": 102720 }, { "epoch": 39.9, "learning_rate": 1.468064724919094e-05, "loss": 0.1027, "step": 102730 }, { "epoch": 39.9, "learning_rate": 1.468012944983819e-05, "loss": 0.0816, "step": 102740 }, { "epoch": 39.9, "learning_rate": 1.467961165048544e-05, "loss": 0.0714, "step": 102750 }, { "epoch": 39.91, "learning_rate": 1.4679093851132686e-05, "loss": 0.0042, "step": 102760 }, { "epoch": 39.91, "learning_rate": 1.4678576051779935e-05, "loss": 0.0157, "step": 102770 }, { "epoch": 39.91, "learning_rate": 1.4678058252427185e-05, "loss": 0.3695, "step": 102780 }, { "epoch": 39.92, "learning_rate": 1.4677540453074434e-05, "loss": 0.0301, "step": 102790 }, { "epoch": 39.92, "learning_rate": 1.4677022653721684e-05, "loss": 0.1392, "step": 102800 }, { "epoch": 39.93, "learning_rate": 1.4676504854368934e-05, "loss": 0.134, "step": 102810 }, { "epoch": 39.93, "learning_rate": 1.4675987055016183e-05, "loss": 0.0711, "step": 102820 }, { "epoch": 39.93, "learning_rate": 1.4675469255663433e-05, "loss": 0.1224, "step": 102830 }, { "epoch": 39.94, "learning_rate": 1.467495145631068e-05, "loss": 0.0272, "step": 102840 }, { "epoch": 39.94, "learning_rate": 1.4674433656957929e-05, "loss": 0.0989, "step": 102850 }, { "epoch": 39.95, "learning_rate": 1.4673915857605178e-05, "loss": 0.156, "step": 102860 }, { "epoch": 39.95, "learning_rate": 1.4673398058252428e-05, "loss": 0.279, "step": 102870 }, { "epoch": 39.95, "learning_rate": 1.4672880258899678e-05, "loss": 0.0568, "step": 102880 }, { "epoch": 39.96, "learning_rate": 1.4672362459546927e-05, "loss": 0.087, "step": 102890 }, { "epoch": 39.96, "learning_rate": 1.4671844660194177e-05, "loss": 0.0757, "step": 102900 }, { "epoch": 39.97, "learning_rate": 1.4671326860841426e-05, "loss": 0.0706, "step": 102910 }, { "epoch": 39.97, "learning_rate": 1.4670809061488673e-05, "loss": 0.3136, "step": 102920 }, { "epoch": 39.97, "learning_rate": 1.4670291262135922e-05, "loss": 0.1318, "step": 102930 }, { "epoch": 39.98, "learning_rate": 1.4669773462783172e-05, "loss": 0.0733, "step": 102940 }, { "epoch": 39.98, "learning_rate": 1.4669255663430422e-05, "loss": 0.1048, "step": 102950 }, { "epoch": 39.98, "learning_rate": 1.4668737864077671e-05, "loss": 0.128, "step": 102960 }, { "epoch": 39.99, "learning_rate": 1.466822006472492e-05, "loss": 0.1298, "step": 102970 }, { "epoch": 39.99, "learning_rate": 1.466770226537217e-05, "loss": 0.122, "step": 102980 }, { "epoch": 40.0, "learning_rate": 1.466718446601942e-05, "loss": 0.0043, "step": 102990 }, { "epoch": 40.0, "learning_rate": 1.4666666666666666e-05, "loss": 0.1052, "step": 103000 }, { "epoch": 40.0, "eval_accuracy": 0.9493810178817056, "eval_loss": 0.32120659947395325, "eval_runtime": 8.2083, "eval_samples_per_second": 442.844, "eval_steps_per_second": 55.432, "step": 103000 }, { "epoch": 40.0, "learning_rate": 1.4666148867313916e-05, "loss": 0.1314, "step": 103010 }, { "epoch": 40.01, "learning_rate": 1.4665631067961166e-05, "loss": 0.2195, "step": 103020 }, { "epoch": 40.01, "learning_rate": 1.4665113268608415e-05, "loss": 0.0419, "step": 103030 }, { "epoch": 40.02, "learning_rate": 1.4664595469255665e-05, "loss": 0.2252, "step": 103040 }, { "epoch": 40.02, "learning_rate": 1.4664077669902914e-05, "loss": 0.045, "step": 103050 }, { "epoch": 40.02, "learning_rate": 1.4663559870550164e-05, "loss": 0.2477, "step": 103060 }, { "epoch": 40.03, "learning_rate": 1.4663042071197414e-05, "loss": 0.0502, "step": 103070 }, { "epoch": 40.03, "learning_rate": 1.466252427184466e-05, "loss": 0.1252, "step": 103080 }, { "epoch": 40.03, "learning_rate": 1.466200647249191e-05, "loss": 0.0679, "step": 103090 }, { "epoch": 40.04, "learning_rate": 1.4661488673139159e-05, "loss": 0.013, "step": 103100 }, { "epoch": 40.04, "learning_rate": 1.4660970873786409e-05, "loss": 0.1509, "step": 103110 }, { "epoch": 40.05, "learning_rate": 1.4660453074433658e-05, "loss": 0.0251, "step": 103120 }, { "epoch": 40.05, "learning_rate": 1.4659935275080908e-05, "loss": 0.1605, "step": 103130 }, { "epoch": 40.05, "learning_rate": 1.4659417475728158e-05, "loss": 0.0702, "step": 103140 }, { "epoch": 40.06, "learning_rate": 1.4658899676375407e-05, "loss": 0.1271, "step": 103150 }, { "epoch": 40.06, "learning_rate": 1.4658381877022657e-05, "loss": 0.0264, "step": 103160 }, { "epoch": 40.07, "learning_rate": 1.4657864077669903e-05, "loss": 0.0189, "step": 103170 }, { "epoch": 40.07, "learning_rate": 1.4657346278317153e-05, "loss": 0.1867, "step": 103180 }, { "epoch": 40.07, "learning_rate": 1.4656828478964402e-05, "loss": 0.1789, "step": 103190 }, { "epoch": 40.08, "learning_rate": 1.4656310679611652e-05, "loss": 0.1584, "step": 103200 }, { "epoch": 40.08, "learning_rate": 1.4655792880258901e-05, "loss": 0.0564, "step": 103210 }, { "epoch": 40.09, "learning_rate": 1.4655275080906151e-05, "loss": 0.1026, "step": 103220 }, { "epoch": 40.09, "learning_rate": 1.46547572815534e-05, "loss": 0.0481, "step": 103230 }, { "epoch": 40.09, "learning_rate": 1.465423948220065e-05, "loss": 0.0857, "step": 103240 }, { "epoch": 40.1, "learning_rate": 1.4653721682847897e-05, "loss": 0.0057, "step": 103250 }, { "epoch": 40.1, "learning_rate": 1.4653203883495146e-05, "loss": 0.1944, "step": 103260 }, { "epoch": 40.1, "learning_rate": 1.4652686084142396e-05, "loss": 0.0327, "step": 103270 }, { "epoch": 40.11, "learning_rate": 1.4652168284789645e-05, "loss": 0.0838, "step": 103280 }, { "epoch": 40.11, "learning_rate": 1.4651650485436895e-05, "loss": 0.0295, "step": 103290 }, { "epoch": 40.12, "learning_rate": 1.4651132686084145e-05, "loss": 0.0373, "step": 103300 }, { "epoch": 40.12, "learning_rate": 1.4650614886731394e-05, "loss": 0.1162, "step": 103310 }, { "epoch": 40.12, "learning_rate": 1.4650097087378642e-05, "loss": 0.0627, "step": 103320 }, { "epoch": 40.13, "learning_rate": 1.464957928802589e-05, "loss": 0.0298, "step": 103330 }, { "epoch": 40.13, "learning_rate": 1.464906148867314e-05, "loss": 0.0399, "step": 103340 }, { "epoch": 40.14, "learning_rate": 1.464854368932039e-05, "loss": 0.0534, "step": 103350 }, { "epoch": 40.14, "learning_rate": 1.4648025889967639e-05, "loss": 0.151, "step": 103360 }, { "epoch": 40.14, "learning_rate": 1.4647508090614889e-05, "loss": 0.1134, "step": 103370 }, { "epoch": 40.15, "learning_rate": 1.4646990291262138e-05, "loss": 0.0912, "step": 103380 }, { "epoch": 40.15, "learning_rate": 1.4646472491909388e-05, "loss": 0.051, "step": 103390 }, { "epoch": 40.16, "learning_rate": 1.4645954692556636e-05, "loss": 0.0653, "step": 103400 }, { "epoch": 40.16, "learning_rate": 1.4645436893203884e-05, "loss": 0.113, "step": 103410 }, { "epoch": 40.16, "learning_rate": 1.4644919093851133e-05, "loss": 0.0629, "step": 103420 }, { "epoch": 40.17, "learning_rate": 1.4644401294498383e-05, "loss": 0.0813, "step": 103430 }, { "epoch": 40.17, "learning_rate": 1.4643883495145633e-05, "loss": 0.0229, "step": 103440 }, { "epoch": 40.17, "learning_rate": 1.4643365695792882e-05, "loss": 0.0783, "step": 103450 }, { "epoch": 40.18, "learning_rate": 1.4642847896440132e-05, "loss": 0.1043, "step": 103460 }, { "epoch": 40.18, "learning_rate": 1.464233009708738e-05, "loss": 0.0723, "step": 103470 }, { "epoch": 40.19, "learning_rate": 1.464181229773463e-05, "loss": 0.1835, "step": 103480 }, { "epoch": 40.19, "learning_rate": 1.4641294498381877e-05, "loss": 0.0827, "step": 103490 }, { "epoch": 40.19, "learning_rate": 1.4640776699029127e-05, "loss": 0.1413, "step": 103500 }, { "epoch": 40.2, "learning_rate": 1.4640258899676376e-05, "loss": 0.2662, "step": 103510 }, { "epoch": 40.2, "learning_rate": 1.4639741100323626e-05, "loss": 0.0384, "step": 103520 }, { "epoch": 40.21, "learning_rate": 1.4639223300970876e-05, "loss": 0.1289, "step": 103530 }, { "epoch": 40.21, "learning_rate": 1.4638705501618125e-05, "loss": 0.1093, "step": 103540 }, { "epoch": 40.21, "learning_rate": 1.4638187702265373e-05, "loss": 0.0874, "step": 103550 }, { "epoch": 40.22, "learning_rate": 1.4637669902912623e-05, "loss": 0.2186, "step": 103560 }, { "epoch": 40.22, "learning_rate": 1.463715210355987e-05, "loss": 0.072, "step": 103570 }, { "epoch": 40.23, "learning_rate": 1.463663430420712e-05, "loss": 0.0625, "step": 103580 }, { "epoch": 40.23, "learning_rate": 1.463611650485437e-05, "loss": 0.0409, "step": 103590 }, { "epoch": 40.23, "learning_rate": 1.463559870550162e-05, "loss": 0.0795, "step": 103600 }, { "epoch": 40.24, "learning_rate": 1.463508090614887e-05, "loss": 0.095, "step": 103610 }, { "epoch": 40.24, "learning_rate": 1.4634563106796117e-05, "loss": 0.1445, "step": 103620 }, { "epoch": 40.24, "learning_rate": 1.4634045307443367e-05, "loss": 0.1215, "step": 103630 }, { "epoch": 40.25, "learning_rate": 1.4633527508090616e-05, "loss": 0.0006, "step": 103640 }, { "epoch": 40.25, "learning_rate": 1.4633009708737864e-05, "loss": 0.1537, "step": 103650 }, { "epoch": 40.26, "learning_rate": 1.4632491909385114e-05, "loss": 0.0679, "step": 103660 }, { "epoch": 40.26, "learning_rate": 1.4631974110032364e-05, "loss": 0.0255, "step": 103670 }, { "epoch": 40.26, "learning_rate": 1.4631456310679613e-05, "loss": 0.0167, "step": 103680 }, { "epoch": 40.27, "learning_rate": 1.4630938511326863e-05, "loss": 0.0367, "step": 103690 }, { "epoch": 40.27, "learning_rate": 1.463042071197411e-05, "loss": 0.0788, "step": 103700 }, { "epoch": 40.28, "learning_rate": 1.462990291262136e-05, "loss": 0.0308, "step": 103710 }, { "epoch": 40.28, "learning_rate": 1.462938511326861e-05, "loss": 0.1272, "step": 103720 }, { "epoch": 40.28, "learning_rate": 1.462886731391586e-05, "loss": 0.1584, "step": 103730 }, { "epoch": 40.29, "learning_rate": 1.4628349514563108e-05, "loss": 0.2329, "step": 103740 }, { "epoch": 40.29, "learning_rate": 1.4627831715210357e-05, "loss": 0.2744, "step": 103750 }, { "epoch": 40.3, "learning_rate": 1.4627313915857607e-05, "loss": 0.1069, "step": 103760 }, { "epoch": 40.3, "learning_rate": 1.4626796116504856e-05, "loss": 0.1002, "step": 103770 }, { "epoch": 40.3, "learning_rate": 1.4626278317152104e-05, "loss": 0.1557, "step": 103780 }, { "epoch": 40.31, "learning_rate": 1.4625760517799354e-05, "loss": 0.134, "step": 103790 }, { "epoch": 40.31, "learning_rate": 1.4625242718446604e-05, "loss": 0.11, "step": 103800 }, { "epoch": 40.31, "learning_rate": 1.4624724919093853e-05, "loss": 0.1116, "step": 103810 }, { "epoch": 40.32, "learning_rate": 1.4624207119741101e-05, "loss": 0.0648, "step": 103820 }, { "epoch": 40.32, "learning_rate": 1.462368932038835e-05, "loss": 0.0032, "step": 103830 }, { "epoch": 40.33, "learning_rate": 1.46231715210356e-05, "loss": 0.0287, "step": 103840 }, { "epoch": 40.33, "learning_rate": 1.4622653721682848e-05, "loss": 0.0797, "step": 103850 }, { "epoch": 40.33, "learning_rate": 1.4622135922330098e-05, "loss": 0.0343, "step": 103860 }, { "epoch": 40.34, "learning_rate": 1.4621618122977347e-05, "loss": 0.1381, "step": 103870 }, { "epoch": 40.34, "learning_rate": 1.4621100323624597e-05, "loss": 0.0485, "step": 103880 }, { "epoch": 40.35, "learning_rate": 1.4620582524271847e-05, "loss": 0.1236, "step": 103890 }, { "epoch": 40.35, "learning_rate": 1.4620064724919095e-05, "loss": 0.0305, "step": 103900 }, { "epoch": 40.35, "learning_rate": 1.4619546925566344e-05, "loss": 0.0093, "step": 103910 }, { "epoch": 40.36, "learning_rate": 1.4619029126213594e-05, "loss": 0.0299, "step": 103920 }, { "epoch": 40.36, "learning_rate": 1.4618511326860842e-05, "loss": 0.0693, "step": 103930 }, { "epoch": 40.37, "learning_rate": 1.4617993527508091e-05, "loss": 0.0322, "step": 103940 }, { "epoch": 40.37, "learning_rate": 1.4617475728155341e-05, "loss": 0.0047, "step": 103950 }, { "epoch": 40.37, "learning_rate": 1.461695792880259e-05, "loss": 0.034, "step": 103960 }, { "epoch": 40.38, "learning_rate": 1.461644012944984e-05, "loss": 0.0613, "step": 103970 }, { "epoch": 40.38, "learning_rate": 1.4615922330097088e-05, "loss": 0.0148, "step": 103980 }, { "epoch": 40.38, "learning_rate": 1.4615404530744338e-05, "loss": 0.0945, "step": 103990 }, { "epoch": 40.39, "learning_rate": 1.4614886731391586e-05, "loss": 0.0891, "step": 104000 }, { "epoch": 40.39, "learning_rate": 1.4614368932038835e-05, "loss": 0.0344, "step": 104010 }, { "epoch": 40.4, "learning_rate": 1.4613851132686085e-05, "loss": 0.1106, "step": 104020 }, { "epoch": 40.4, "learning_rate": 1.4613333333333335e-05, "loss": 0.0146, "step": 104030 }, { "epoch": 40.4, "learning_rate": 1.4612815533980584e-05, "loss": 0.1867, "step": 104040 }, { "epoch": 40.41, "learning_rate": 1.4612297734627834e-05, "loss": 0.0843, "step": 104050 }, { "epoch": 40.41, "learning_rate": 1.4611779935275082e-05, "loss": 0.1028, "step": 104060 }, { "epoch": 40.42, "learning_rate": 1.4611262135922331e-05, "loss": 0.2123, "step": 104070 }, { "epoch": 40.42, "learning_rate": 1.461074433656958e-05, "loss": 0.0612, "step": 104080 }, { "epoch": 40.42, "learning_rate": 1.4610226537216829e-05, "loss": 0.1338, "step": 104090 }, { "epoch": 40.43, "learning_rate": 1.4609708737864079e-05, "loss": 0.0421, "step": 104100 }, { "epoch": 40.43, "learning_rate": 1.4609190938511328e-05, "loss": 0.1733, "step": 104110 }, { "epoch": 40.43, "learning_rate": 1.4608673139158578e-05, "loss": 0.1029, "step": 104120 }, { "epoch": 40.44, "learning_rate": 1.4608155339805827e-05, "loss": 0.0488, "step": 104130 }, { "epoch": 40.44, "learning_rate": 1.4607637540453075e-05, "loss": 0.0673, "step": 104140 }, { "epoch": 40.45, "learning_rate": 1.4607119741100323e-05, "loss": 0.0781, "step": 104150 }, { "epoch": 40.45, "learning_rate": 1.4606601941747573e-05, "loss": 0.0287, "step": 104160 }, { "epoch": 40.45, "learning_rate": 1.4606084142394822e-05, "loss": 0.0422, "step": 104170 }, { "epoch": 40.46, "learning_rate": 1.4605566343042072e-05, "loss": 0.0202, "step": 104180 }, { "epoch": 40.46, "learning_rate": 1.4605048543689322e-05, "loss": 0.0862, "step": 104190 }, { "epoch": 40.47, "learning_rate": 1.4604530744336571e-05, "loss": 0.0402, "step": 104200 }, { "epoch": 40.47, "learning_rate": 1.4604012944983821e-05, "loss": 0.1079, "step": 104210 }, { "epoch": 40.47, "learning_rate": 1.4603495145631069e-05, "loss": 0.036, "step": 104220 }, { "epoch": 40.48, "learning_rate": 1.4602977346278317e-05, "loss": 0.0708, "step": 104230 }, { "epoch": 40.48, "learning_rate": 1.4602459546925566e-05, "loss": 0.3168, "step": 104240 }, { "epoch": 40.49, "learning_rate": 1.4601941747572816e-05, "loss": 0.0304, "step": 104250 }, { "epoch": 40.49, "learning_rate": 1.4601423948220066e-05, "loss": 0.0778, "step": 104260 }, { "epoch": 40.49, "learning_rate": 1.4600906148867315e-05, "loss": 0.0726, "step": 104270 }, { "epoch": 40.5, "learning_rate": 1.4600388349514565e-05, "loss": 0.0122, "step": 104280 }, { "epoch": 40.5, "learning_rate": 1.4599870550161814e-05, "loss": 0.0343, "step": 104290 }, { "epoch": 40.5, "learning_rate": 1.4599352750809064e-05, "loss": 0.0184, "step": 104300 }, { "epoch": 40.51, "learning_rate": 1.459883495145631e-05, "loss": 0.1732, "step": 104310 }, { "epoch": 40.51, "learning_rate": 1.459831715210356e-05, "loss": 0.0248, "step": 104320 }, { "epoch": 40.52, "learning_rate": 1.459779935275081e-05, "loss": 0.0959, "step": 104330 }, { "epoch": 40.52, "learning_rate": 1.459728155339806e-05, "loss": 0.2013, "step": 104340 }, { "epoch": 40.52, "learning_rate": 1.4596763754045309e-05, "loss": 0.1094, "step": 104350 }, { "epoch": 40.53, "learning_rate": 1.4596245954692558e-05, "loss": 0.0455, "step": 104360 }, { "epoch": 40.53, "learning_rate": 1.4595728155339808e-05, "loss": 0.1691, "step": 104370 }, { "epoch": 40.54, "learning_rate": 1.4595210355987058e-05, "loss": 0.1876, "step": 104380 }, { "epoch": 40.54, "learning_rate": 1.4594692556634304e-05, "loss": 0.0784, "step": 104390 }, { "epoch": 40.54, "learning_rate": 1.4594174757281554e-05, "loss": 0.0646, "step": 104400 }, { "epoch": 40.55, "learning_rate": 1.4593656957928803e-05, "loss": 0.1644, "step": 104410 }, { "epoch": 40.55, "learning_rate": 1.4593139158576053e-05, "loss": 0.0688, "step": 104420 }, { "epoch": 40.56, "learning_rate": 1.4592621359223302e-05, "loss": 0.1552, "step": 104430 }, { "epoch": 40.56, "learning_rate": 1.4592103559870552e-05, "loss": 0.0885, "step": 104440 }, { "epoch": 40.56, "learning_rate": 1.4591585760517802e-05, "loss": 0.0453, "step": 104450 }, { "epoch": 40.57, "learning_rate": 1.4591067961165051e-05, "loss": 0.0986, "step": 104460 }, { "epoch": 40.57, "learning_rate": 1.4590550161812297e-05, "loss": 0.013, "step": 104470 }, { "epoch": 40.57, "learning_rate": 1.4590032362459547e-05, "loss": 0.0926, "step": 104480 }, { "epoch": 40.58, "learning_rate": 1.4589514563106797e-05, "loss": 0.1861, "step": 104490 }, { "epoch": 40.58, "learning_rate": 1.4588996763754046e-05, "loss": 0.1103, "step": 104500 }, { "epoch": 40.59, "learning_rate": 1.4588478964401296e-05, "loss": 0.0343, "step": 104510 }, { "epoch": 40.59, "learning_rate": 1.4587961165048546e-05, "loss": 0.1347, "step": 104520 }, { "epoch": 40.59, "learning_rate": 1.4587443365695795e-05, "loss": 0.0597, "step": 104530 }, { "epoch": 40.6, "learning_rate": 1.4586925566343045e-05, "loss": 0.2743, "step": 104540 }, { "epoch": 40.6, "learning_rate": 1.4586407766990291e-05, "loss": 0.2749, "step": 104550 }, { "epoch": 40.61, "learning_rate": 1.458588996763754e-05, "loss": 0.0921, "step": 104560 }, { "epoch": 40.61, "learning_rate": 1.458537216828479e-05, "loss": 0.0578, "step": 104570 }, { "epoch": 40.61, "learning_rate": 1.458485436893204e-05, "loss": 0.1066, "step": 104580 }, { "epoch": 40.62, "learning_rate": 1.458433656957929e-05, "loss": 0.1194, "step": 104590 }, { "epoch": 40.62, "learning_rate": 1.4583818770226539e-05, "loss": 0.0865, "step": 104600 }, { "epoch": 40.63, "learning_rate": 1.4583300970873789e-05, "loss": 0.1293, "step": 104610 }, { "epoch": 40.63, "learning_rate": 1.4582783171521038e-05, "loss": 0.0796, "step": 104620 }, { "epoch": 40.63, "learning_rate": 1.4582265372168285e-05, "loss": 0.0669, "step": 104630 }, { "epoch": 40.64, "learning_rate": 1.4581747572815534e-05, "loss": 0.0631, "step": 104640 }, { "epoch": 40.64, "learning_rate": 1.4581229773462784e-05, "loss": 0.0617, "step": 104650 }, { "epoch": 40.64, "learning_rate": 1.4580711974110033e-05, "loss": 0.0073, "step": 104660 }, { "epoch": 40.65, "learning_rate": 1.4580194174757283e-05, "loss": 0.1032, "step": 104670 }, { "epoch": 40.65, "learning_rate": 1.4579676375404533e-05, "loss": 0.0525, "step": 104680 }, { "epoch": 40.66, "learning_rate": 1.4579158576051782e-05, "loss": 0.1193, "step": 104690 }, { "epoch": 40.66, "learning_rate": 1.4578640776699032e-05, "loss": 0.2036, "step": 104700 }, { "epoch": 40.66, "learning_rate": 1.4578122977346278e-05, "loss": 0.0594, "step": 104710 }, { "epoch": 40.67, "learning_rate": 1.4577605177993528e-05, "loss": 0.0562, "step": 104720 }, { "epoch": 40.67, "learning_rate": 1.4577087378640777e-05, "loss": 0.1597, "step": 104730 }, { "epoch": 40.68, "learning_rate": 1.4576569579288027e-05, "loss": 0.0518, "step": 104740 }, { "epoch": 40.68, "learning_rate": 1.4576051779935277e-05, "loss": 0.0932, "step": 104750 }, { "epoch": 40.68, "learning_rate": 1.4575533980582526e-05, "loss": 0.2233, "step": 104760 }, { "epoch": 40.69, "learning_rate": 1.4575016181229776e-05, "loss": 0.0042, "step": 104770 }, { "epoch": 40.69, "learning_rate": 1.4574498381877025e-05, "loss": 0.1803, "step": 104780 }, { "epoch": 40.7, "learning_rate": 1.4573980582524272e-05, "loss": 0.0053, "step": 104790 }, { "epoch": 40.7, "learning_rate": 1.4573462783171521e-05, "loss": 0.149, "step": 104800 }, { "epoch": 40.7, "learning_rate": 1.4572944983818771e-05, "loss": 0.0249, "step": 104810 }, { "epoch": 40.71, "learning_rate": 1.457242718446602e-05, "loss": 0.052, "step": 104820 }, { "epoch": 40.71, "learning_rate": 1.457190938511327e-05, "loss": 0.004, "step": 104830 }, { "epoch": 40.71, "learning_rate": 1.457139158576052e-05, "loss": 0.1521, "step": 104840 }, { "epoch": 40.72, "learning_rate": 1.457087378640777e-05, "loss": 0.0782, "step": 104850 }, { "epoch": 40.72, "learning_rate": 1.4570355987055019e-05, "loss": 0.01, "step": 104860 }, { "epoch": 40.73, "learning_rate": 1.4569838187702267e-05, "loss": 0.1625, "step": 104870 }, { "epoch": 40.73, "learning_rate": 1.4569320388349515e-05, "loss": 0.0531, "step": 104880 }, { "epoch": 40.73, "learning_rate": 1.4568802588996764e-05, "loss": 0.228, "step": 104890 }, { "epoch": 40.74, "learning_rate": 1.4568284789644014e-05, "loss": 0.0036, "step": 104900 }, { "epoch": 40.74, "learning_rate": 1.4567766990291264e-05, "loss": 0.1281, "step": 104910 }, { "epoch": 40.75, "learning_rate": 1.4567249190938513e-05, "loss": 0.1211, "step": 104920 }, { "epoch": 40.75, "learning_rate": 1.4566731391585763e-05, "loss": 0.0979, "step": 104930 }, { "epoch": 40.75, "learning_rate": 1.4566213592233011e-05, "loss": 0.165, "step": 104940 }, { "epoch": 40.76, "learning_rate": 1.456569579288026e-05, "loss": 0.0326, "step": 104950 }, { "epoch": 40.76, "learning_rate": 1.4565177993527508e-05, "loss": 0.1103, "step": 104960 }, { "epoch": 40.77, "learning_rate": 1.4564660194174758e-05, "loss": 0.0319, "step": 104970 }, { "epoch": 40.77, "learning_rate": 1.4564142394822008e-05, "loss": 0.1743, "step": 104980 }, { "epoch": 40.77, "learning_rate": 1.4563624595469257e-05, "loss": 0.1302, "step": 104990 }, { "epoch": 40.78, "learning_rate": 1.4563106796116507e-05, "loss": 0.09, "step": 105000 }, { "epoch": 40.78, "learning_rate": 1.4562588996763757e-05, "loss": 0.0623, "step": 105010 }, { "epoch": 40.78, "learning_rate": 1.4562071197411004e-05, "loss": 0.0156, "step": 105020 }, { "epoch": 40.79, "learning_rate": 1.4561553398058254e-05, "loss": 0.1069, "step": 105030 }, { "epoch": 40.79, "learning_rate": 1.4561035598705502e-05, "loss": 0.0407, "step": 105040 }, { "epoch": 40.8, "learning_rate": 1.4560517799352752e-05, "loss": 0.1419, "step": 105050 }, { "epoch": 40.8, "learning_rate": 1.4560000000000001e-05, "loss": 0.0479, "step": 105060 }, { "epoch": 40.8, "learning_rate": 1.455948220064725e-05, "loss": 0.1584, "step": 105070 }, { "epoch": 40.81, "learning_rate": 1.45589644012945e-05, "loss": 0.0117, "step": 105080 }, { "epoch": 40.81, "learning_rate": 1.4558446601941748e-05, "loss": 0.0621, "step": 105090 }, { "epoch": 40.82, "learning_rate": 1.4557928802588998e-05, "loss": 0.1259, "step": 105100 }, { "epoch": 40.82, "learning_rate": 1.4557411003236248e-05, "loss": 0.0587, "step": 105110 }, { "epoch": 40.82, "learning_rate": 1.4556893203883496e-05, "loss": 0.0933, "step": 105120 }, { "epoch": 40.83, "learning_rate": 1.4556375404530745e-05, "loss": 0.012, "step": 105130 }, { "epoch": 40.83, "learning_rate": 1.4555857605177995e-05, "loss": 0.0402, "step": 105140 }, { "epoch": 40.83, "learning_rate": 1.4555339805825244e-05, "loss": 0.137, "step": 105150 }, { "epoch": 40.84, "learning_rate": 1.4554822006472494e-05, "loss": 0.2, "step": 105160 }, { "epoch": 40.84, "learning_rate": 1.4554304207119742e-05, "loss": 0.0897, "step": 105170 }, { "epoch": 40.85, "learning_rate": 1.4553786407766992e-05, "loss": 0.1763, "step": 105180 }, { "epoch": 40.85, "learning_rate": 1.4553268608414241e-05, "loss": 0.2383, "step": 105190 }, { "epoch": 40.85, "learning_rate": 1.4552750809061489e-05, "loss": 0.0314, "step": 105200 }, { "epoch": 40.86, "learning_rate": 1.4552233009708739e-05, "loss": 0.192, "step": 105210 }, { "epoch": 40.86, "learning_rate": 1.4551715210355988e-05, "loss": 0.0912, "step": 105220 }, { "epoch": 40.87, "learning_rate": 1.4551197411003238e-05, "loss": 0.0575, "step": 105230 }, { "epoch": 40.87, "learning_rate": 1.4550679611650486e-05, "loss": 0.1005, "step": 105240 }, { "epoch": 40.87, "learning_rate": 1.4550161812297735e-05, "loss": 0.1279, "step": 105250 }, { "epoch": 40.88, "learning_rate": 1.4549644012944985e-05, "loss": 0.0821, "step": 105260 }, { "epoch": 40.88, "learning_rate": 1.4549126213592235e-05, "loss": 0.1343, "step": 105270 }, { "epoch": 40.89, "learning_rate": 1.4548608414239483e-05, "loss": 0.0398, "step": 105280 }, { "epoch": 40.89, "learning_rate": 1.4548090614886732e-05, "loss": 0.023, "step": 105290 }, { "epoch": 40.89, "learning_rate": 1.4547572815533982e-05, "loss": 0.1831, "step": 105300 }, { "epoch": 40.9, "learning_rate": 1.4547055016181231e-05, "loss": 0.0436, "step": 105310 }, { "epoch": 40.9, "learning_rate": 1.454653721682848e-05, "loss": 0.0251, "step": 105320 }, { "epoch": 40.9, "learning_rate": 1.4546019417475729e-05, "loss": 0.0694, "step": 105330 }, { "epoch": 40.91, "learning_rate": 1.4545501618122979e-05, "loss": 0.0694, "step": 105340 }, { "epoch": 40.91, "learning_rate": 1.4544983818770228e-05, "loss": 0.1078, "step": 105350 }, { "epoch": 40.92, "learning_rate": 1.4544466019417476e-05, "loss": 0.1371, "step": 105360 }, { "epoch": 40.92, "learning_rate": 1.4543948220064726e-05, "loss": 0.0754, "step": 105370 }, { "epoch": 40.92, "learning_rate": 1.4543430420711975e-05, "loss": 0.1372, "step": 105380 }, { "epoch": 40.93, "learning_rate": 1.4542912621359225e-05, "loss": 0.1088, "step": 105390 }, { "epoch": 40.93, "learning_rate": 1.4542394822006473e-05, "loss": 0.0711, "step": 105400 }, { "epoch": 40.94, "learning_rate": 1.4541877022653723e-05, "loss": 0.0614, "step": 105410 }, { "epoch": 40.94, "learning_rate": 1.4541359223300972e-05, "loss": 0.1192, "step": 105420 }, { "epoch": 40.94, "learning_rate": 1.4540841423948222e-05, "loss": 0.0211, "step": 105430 }, { "epoch": 40.95, "learning_rate": 1.4540323624595471e-05, "loss": 0.0042, "step": 105440 }, { "epoch": 40.95, "learning_rate": 1.453980582524272e-05, "loss": 0.0833, "step": 105450 }, { "epoch": 40.96, "learning_rate": 1.4539288025889969e-05, "loss": 0.1226, "step": 105460 }, { "epoch": 40.96, "learning_rate": 1.4538770226537217e-05, "loss": 0.0888, "step": 105470 }, { "epoch": 40.96, "learning_rate": 1.4538252427184467e-05, "loss": 0.1802, "step": 105480 }, { "epoch": 40.97, "learning_rate": 1.4537734627831716e-05, "loss": 0.2005, "step": 105490 }, { "epoch": 40.97, "learning_rate": 1.4537216828478966e-05, "loss": 0.0428, "step": 105500 }, { "epoch": 40.97, "learning_rate": 1.4536699029126215e-05, "loss": 0.0287, "step": 105510 }, { "epoch": 40.98, "learning_rate": 1.4536181229773465e-05, "loss": 0.1419, "step": 105520 }, { "epoch": 40.98, "learning_rate": 1.4535663430420713e-05, "loss": 0.0189, "step": 105530 }, { "epoch": 40.99, "learning_rate": 1.4535145631067963e-05, "loss": 0.1525, "step": 105540 }, { "epoch": 40.99, "learning_rate": 1.453462783171521e-05, "loss": 0.1164, "step": 105550 }, { "epoch": 40.99, "learning_rate": 1.453411003236246e-05, "loss": 0.096, "step": 105560 }, { "epoch": 41.0, "learning_rate": 1.453359223300971e-05, "loss": 0.0943, "step": 105570 }, { "epoch": 41.0, "eval_accuracy": 0.9507565337001376, "eval_loss": 0.30752032995224, "eval_runtime": 8.282, "eval_samples_per_second": 438.906, "eval_steps_per_second": 54.939, "step": 105575 }, { "epoch": 41.0, "learning_rate": 1.453307443365696e-05, "loss": 0.0476, "step": 105580 }, { "epoch": 41.01, "learning_rate": 1.4532556634304209e-05, "loss": 0.2659, "step": 105590 }, { "epoch": 41.01, "learning_rate": 1.4532038834951459e-05, "loss": 0.0195, "step": 105600 }, { "epoch": 41.01, "learning_rate": 1.4531521035598706e-05, "loss": 0.0496, "step": 105610 }, { "epoch": 41.02, "learning_rate": 1.4531003236245954e-05, "loss": 0.0261, "step": 105620 }, { "epoch": 41.02, "learning_rate": 1.4530485436893204e-05, "loss": 0.0284, "step": 105630 }, { "epoch": 41.03, "learning_rate": 1.4529967637540454e-05, "loss": 0.0751, "step": 105640 }, { "epoch": 41.03, "learning_rate": 1.4529449838187703e-05, "loss": 0.1325, "step": 105650 }, { "epoch": 41.03, "learning_rate": 1.4528932038834953e-05, "loss": 0.0962, "step": 105660 }, { "epoch": 41.04, "learning_rate": 1.4528414239482202e-05, "loss": 0.1527, "step": 105670 }, { "epoch": 41.04, "learning_rate": 1.4527896440129452e-05, "loss": 0.1306, "step": 105680 }, { "epoch": 41.04, "learning_rate": 1.45273786407767e-05, "loss": 0.0232, "step": 105690 }, { "epoch": 41.05, "learning_rate": 1.4526860841423948e-05, "loss": 0.055, "step": 105700 }, { "epoch": 41.05, "learning_rate": 1.4526343042071198e-05, "loss": 0.0105, "step": 105710 }, { "epoch": 41.06, "learning_rate": 1.4525825242718447e-05, "loss": 0.1397, "step": 105720 }, { "epoch": 41.06, "learning_rate": 1.4525307443365697e-05, "loss": 0.0572, "step": 105730 }, { "epoch": 41.06, "learning_rate": 1.4524789644012946e-05, "loss": 0.0725, "step": 105740 }, { "epoch": 41.07, "learning_rate": 1.4524271844660196e-05, "loss": 0.192, "step": 105750 }, { "epoch": 41.07, "learning_rate": 1.4523754045307446e-05, "loss": 0.1866, "step": 105760 }, { "epoch": 41.08, "learning_rate": 1.4523236245954692e-05, "loss": 0.142, "step": 105770 }, { "epoch": 41.08, "learning_rate": 1.4522718446601942e-05, "loss": 0.0585, "step": 105780 }, { "epoch": 41.08, "learning_rate": 1.4522200647249191e-05, "loss": 0.0398, "step": 105790 }, { "epoch": 41.09, "learning_rate": 1.452168284789644e-05, "loss": 0.183, "step": 105800 }, { "epoch": 41.09, "learning_rate": 1.452116504854369e-05, "loss": 0.0389, "step": 105810 }, { "epoch": 41.1, "learning_rate": 1.452064724919094e-05, "loss": 0.0698, "step": 105820 }, { "epoch": 41.1, "learning_rate": 1.452012944983819e-05, "loss": 0.0986, "step": 105830 }, { "epoch": 41.1, "learning_rate": 1.451961165048544e-05, "loss": 0.2809, "step": 105840 }, { "epoch": 41.11, "learning_rate": 1.4519093851132685e-05, "loss": 0.0431, "step": 105850 }, { "epoch": 41.11, "learning_rate": 1.4518576051779935e-05, "loss": 0.1241, "step": 105860 }, { "epoch": 41.11, "learning_rate": 1.4518058252427185e-05, "loss": 0.0583, "step": 105870 }, { "epoch": 41.12, "learning_rate": 1.4517540453074434e-05, "loss": 0.0125, "step": 105880 }, { "epoch": 41.12, "learning_rate": 1.4517022653721684e-05, "loss": 0.0397, "step": 105890 }, { "epoch": 41.13, "learning_rate": 1.4516504854368934e-05, "loss": 0.1454, "step": 105900 }, { "epoch": 41.13, "learning_rate": 1.4515987055016183e-05, "loss": 0.031, "step": 105910 }, { "epoch": 41.13, "learning_rate": 1.4515469255663433e-05, "loss": 0.0266, "step": 105920 }, { "epoch": 41.14, "learning_rate": 1.4514951456310679e-05, "loss": 0.1638, "step": 105930 }, { "epoch": 41.14, "learning_rate": 1.4514433656957929e-05, "loss": 0.1444, "step": 105940 }, { "epoch": 41.15, "learning_rate": 1.4513915857605178e-05, "loss": 0.0429, "step": 105950 }, { "epoch": 41.15, "learning_rate": 1.4513398058252428e-05, "loss": 0.078, "step": 105960 }, { "epoch": 41.15, "learning_rate": 1.4512880258899677e-05, "loss": 0.0689, "step": 105970 }, { "epoch": 41.16, "learning_rate": 1.4512362459546927e-05, "loss": 0.3087, "step": 105980 }, { "epoch": 41.16, "learning_rate": 1.4511844660194177e-05, "loss": 0.1536, "step": 105990 }, { "epoch": 41.17, "learning_rate": 1.4511326860841426e-05, "loss": 0.1169, "step": 106000 }, { "epoch": 41.17, "learning_rate": 1.4510809061488676e-05, "loss": 0.1162, "step": 106010 }, { "epoch": 41.17, "learning_rate": 1.4510291262135922e-05, "loss": 0.04, "step": 106020 }, { "epoch": 41.18, "learning_rate": 1.4509773462783172e-05, "loss": 0.1046, "step": 106030 }, { "epoch": 41.18, "learning_rate": 1.4509255663430421e-05, "loss": 0.0684, "step": 106040 }, { "epoch": 41.18, "learning_rate": 1.4508737864077671e-05, "loss": 0.2015, "step": 106050 }, { "epoch": 41.19, "learning_rate": 1.450822006472492e-05, "loss": 0.0765, "step": 106060 }, { "epoch": 41.19, "learning_rate": 1.450770226537217e-05, "loss": 0.041, "step": 106070 }, { "epoch": 41.2, "learning_rate": 1.450718446601942e-05, "loss": 0.1708, "step": 106080 }, { "epoch": 41.2, "learning_rate": 1.450666666666667e-05, "loss": 0.0803, "step": 106090 }, { "epoch": 41.2, "learning_rate": 1.4506148867313916e-05, "loss": 0.0709, "step": 106100 }, { "epoch": 41.21, "learning_rate": 1.4505631067961165e-05, "loss": 0.0331, "step": 106110 }, { "epoch": 41.21, "learning_rate": 1.4505113268608415e-05, "loss": 0.0474, "step": 106120 }, { "epoch": 41.22, "learning_rate": 1.4504595469255665e-05, "loss": 0.1259, "step": 106130 }, { "epoch": 41.22, "learning_rate": 1.4504077669902914e-05, "loss": 0.044, "step": 106140 }, { "epoch": 41.22, "learning_rate": 1.4503559870550164e-05, "loss": 0.1158, "step": 106150 }, { "epoch": 41.23, "learning_rate": 1.4503042071197413e-05, "loss": 0.0692, "step": 106160 }, { "epoch": 41.23, "learning_rate": 1.4502524271844663e-05, "loss": 0.0374, "step": 106170 }, { "epoch": 41.23, "learning_rate": 1.450200647249191e-05, "loss": 0.0912, "step": 106180 }, { "epoch": 41.24, "learning_rate": 1.4501488673139159e-05, "loss": 0.0327, "step": 106190 }, { "epoch": 41.24, "learning_rate": 1.4500970873786409e-05, "loss": 0.1957, "step": 106200 }, { "epoch": 41.25, "learning_rate": 1.4500453074433658e-05, "loss": 0.2219, "step": 106210 }, { "epoch": 41.25, "learning_rate": 1.4499935275080908e-05, "loss": 0.0375, "step": 106220 }, { "epoch": 41.25, "learning_rate": 1.4499417475728157e-05, "loss": 0.0592, "step": 106230 }, { "epoch": 41.26, "learning_rate": 1.4498899676375407e-05, "loss": 0.0302, "step": 106240 }, { "epoch": 41.26, "learning_rate": 1.4498381877022657e-05, "loss": 0.0888, "step": 106250 }, { "epoch": 41.27, "learning_rate": 1.4497864077669903e-05, "loss": 0.0585, "step": 106260 }, { "epoch": 41.27, "learning_rate": 1.4497346278317152e-05, "loss": 0.0684, "step": 106270 }, { "epoch": 41.27, "learning_rate": 1.4496828478964402e-05, "loss": 0.0634, "step": 106280 }, { "epoch": 41.28, "learning_rate": 1.4496310679611652e-05, "loss": 0.2387, "step": 106290 }, { "epoch": 41.28, "learning_rate": 1.4495792880258901e-05, "loss": 0.107, "step": 106300 }, { "epoch": 41.29, "learning_rate": 1.4495275080906151e-05, "loss": 0.1224, "step": 106310 }, { "epoch": 41.29, "learning_rate": 1.44947572815534e-05, "loss": 0.0648, "step": 106320 }, { "epoch": 41.29, "learning_rate": 1.449423948220065e-05, "loss": 0.1275, "step": 106330 }, { "epoch": 41.3, "learning_rate": 1.4493721682847896e-05, "loss": 0.1111, "step": 106340 }, { "epoch": 41.3, "learning_rate": 1.4493203883495146e-05, "loss": 0.1144, "step": 106350 }, { "epoch": 41.3, "learning_rate": 1.4492686084142396e-05, "loss": 0.0088, "step": 106360 }, { "epoch": 41.31, "learning_rate": 1.4492168284789645e-05, "loss": 0.0863, "step": 106370 }, { "epoch": 41.31, "learning_rate": 1.4491650485436895e-05, "loss": 0.0435, "step": 106380 }, { "epoch": 41.32, "learning_rate": 1.4491132686084145e-05, "loss": 0.1411, "step": 106390 }, { "epoch": 41.32, "learning_rate": 1.4490614886731394e-05, "loss": 0.0611, "step": 106400 }, { "epoch": 41.32, "learning_rate": 1.4490097087378642e-05, "loss": 0.1254, "step": 106410 }, { "epoch": 41.33, "learning_rate": 1.448957928802589e-05, "loss": 0.0144, "step": 106420 }, { "epoch": 41.33, "learning_rate": 1.448906148867314e-05, "loss": 0.0755, "step": 106430 }, { "epoch": 41.34, "learning_rate": 1.448854368932039e-05, "loss": 0.0007, "step": 106440 }, { "epoch": 41.34, "learning_rate": 1.4488025889967639e-05, "loss": 0.031, "step": 106450 }, { "epoch": 41.34, "learning_rate": 1.4487508090614888e-05, "loss": 0.0535, "step": 106460 }, { "epoch": 41.35, "learning_rate": 1.4486990291262138e-05, "loss": 0.0434, "step": 106470 }, { "epoch": 41.35, "learning_rate": 1.4486472491909388e-05, "loss": 0.2348, "step": 106480 }, { "epoch": 41.36, "learning_rate": 1.4485954692556636e-05, "loss": 0.0023, "step": 106490 }, { "epoch": 41.36, "learning_rate": 1.4485436893203884e-05, "loss": 0.0137, "step": 106500 }, { "epoch": 41.36, "learning_rate": 1.4484919093851133e-05, "loss": 0.0641, "step": 106510 }, { "epoch": 41.37, "learning_rate": 1.4484401294498383e-05, "loss": 0.0879, "step": 106520 }, { "epoch": 41.37, "learning_rate": 1.4483883495145632e-05, "loss": 0.1138, "step": 106530 }, { "epoch": 41.37, "learning_rate": 1.4483365695792882e-05, "loss": 0.0652, "step": 106540 }, { "epoch": 41.38, "learning_rate": 1.4482847896440132e-05, "loss": 0.088, "step": 106550 }, { "epoch": 41.38, "learning_rate": 1.448233009708738e-05, "loss": 0.13, "step": 106560 }, { "epoch": 41.39, "learning_rate": 1.448181229773463e-05, "loss": 0.0816, "step": 106570 }, { "epoch": 41.39, "learning_rate": 1.4481294498381879e-05, "loss": 0.0127, "step": 106580 }, { "epoch": 41.39, "learning_rate": 1.4480776699029127e-05, "loss": 0.0525, "step": 106590 }, { "epoch": 41.4, "learning_rate": 1.4480258899676376e-05, "loss": 0.0383, "step": 106600 }, { "epoch": 41.4, "learning_rate": 1.4479741100323626e-05, "loss": 0.0635, "step": 106610 }, { "epoch": 41.41, "learning_rate": 1.4479223300970876e-05, "loss": 0.1877, "step": 106620 }, { "epoch": 41.41, "learning_rate": 1.4478705501618125e-05, "loss": 0.2468, "step": 106630 }, { "epoch": 41.41, "learning_rate": 1.4478187702265373e-05, "loss": 0.0163, "step": 106640 }, { "epoch": 41.42, "learning_rate": 1.4477669902912623e-05, "loss": 0.1283, "step": 106650 }, { "epoch": 41.42, "learning_rate": 1.4477152103559872e-05, "loss": 0.0306, "step": 106660 }, { "epoch": 41.43, "learning_rate": 1.447663430420712e-05, "loss": 0.0272, "step": 106670 }, { "epoch": 41.43, "learning_rate": 1.447611650485437e-05, "loss": 0.0932, "step": 106680 }, { "epoch": 41.43, "learning_rate": 1.447559870550162e-05, "loss": 0.0189, "step": 106690 }, { "epoch": 41.44, "learning_rate": 1.4475080906148869e-05, "loss": 0.0246, "step": 106700 }, { "epoch": 41.44, "learning_rate": 1.4474563106796117e-05, "loss": 0.0123, "step": 106710 }, { "epoch": 41.44, "learning_rate": 1.4474045307443367e-05, "loss": 0.0744, "step": 106720 }, { "epoch": 41.45, "learning_rate": 1.4473527508090616e-05, "loss": 0.0348, "step": 106730 }, { "epoch": 41.45, "learning_rate": 1.4473009708737866e-05, "loss": 0.0806, "step": 106740 }, { "epoch": 41.46, "learning_rate": 1.4472491909385114e-05, "loss": 0.0625, "step": 106750 }, { "epoch": 41.46, "learning_rate": 1.4471974110032363e-05, "loss": 0.0989, "step": 106760 }, { "epoch": 41.46, "learning_rate": 1.4471456310679613e-05, "loss": 0.0413, "step": 106770 }, { "epoch": 41.47, "learning_rate": 1.4470938511326863e-05, "loss": 0.1258, "step": 106780 }, { "epoch": 41.47, "learning_rate": 1.447042071197411e-05, "loss": 0.0746, "step": 106790 }, { "epoch": 41.48, "learning_rate": 1.446990291262136e-05, "loss": 0.1428, "step": 106800 }, { "epoch": 41.48, "learning_rate": 1.446938511326861e-05, "loss": 0.0777, "step": 106810 }, { "epoch": 41.48, "learning_rate": 1.446886731391586e-05, "loss": 0.1865, "step": 106820 }, { "epoch": 41.49, "learning_rate": 1.4468349514563107e-05, "loss": 0.0564, "step": 106830 }, { "epoch": 41.49, "learning_rate": 1.4467831715210357e-05, "loss": 0.0956, "step": 106840 }, { "epoch": 41.5, "learning_rate": 1.4467313915857607e-05, "loss": 0.1574, "step": 106850 }, { "epoch": 41.5, "learning_rate": 1.4466796116504856e-05, "loss": 0.3024, "step": 106860 }, { "epoch": 41.5, "learning_rate": 1.4466278317152104e-05, "loss": 0.0117, "step": 106870 }, { "epoch": 41.51, "learning_rate": 1.4465760517799354e-05, "loss": 0.0215, "step": 106880 }, { "epoch": 41.51, "learning_rate": 1.4465242718446603e-05, "loss": 0.1202, "step": 106890 }, { "epoch": 41.51, "learning_rate": 1.4464724919093853e-05, "loss": 0.1238, "step": 106900 }, { "epoch": 41.52, "learning_rate": 1.4464207119741101e-05, "loss": 0.0007, "step": 106910 }, { "epoch": 41.52, "learning_rate": 1.446368932038835e-05, "loss": 0.0662, "step": 106920 }, { "epoch": 41.53, "learning_rate": 1.44631715210356e-05, "loss": 0.1244, "step": 106930 }, { "epoch": 41.53, "learning_rate": 1.4462653721682848e-05, "loss": 0.2429, "step": 106940 }, { "epoch": 41.53, "learning_rate": 1.4462135922330098e-05, "loss": 0.1481, "step": 106950 }, { "epoch": 41.54, "learning_rate": 1.4461618122977347e-05, "loss": 0.0392, "step": 106960 }, { "epoch": 41.54, "learning_rate": 1.4461100323624597e-05, "loss": 0.137, "step": 106970 }, { "epoch": 41.55, "learning_rate": 1.4460582524271847e-05, "loss": 0.131, "step": 106980 }, { "epoch": 41.55, "learning_rate": 1.4460064724919094e-05, "loss": 0.0657, "step": 106990 }, { "epoch": 41.55, "learning_rate": 1.4459546925566344e-05, "loss": 0.1604, "step": 107000 }, { "epoch": 41.56, "learning_rate": 1.4459029126213594e-05, "loss": 0.0857, "step": 107010 }, { "epoch": 41.56, "learning_rate": 1.4458511326860842e-05, "loss": 0.0357, "step": 107020 }, { "epoch": 41.57, "learning_rate": 1.4457993527508091e-05, "loss": 0.1334, "step": 107030 }, { "epoch": 41.57, "learning_rate": 1.4457475728155341e-05, "loss": 0.1217, "step": 107040 }, { "epoch": 41.57, "learning_rate": 1.445695792880259e-05, "loss": 0.0513, "step": 107050 }, { "epoch": 41.58, "learning_rate": 1.445644012944984e-05, "loss": 0.0502, "step": 107060 }, { "epoch": 41.58, "learning_rate": 1.4455922330097088e-05, "loss": 0.0889, "step": 107070 }, { "epoch": 41.58, "learning_rate": 1.4455404530744338e-05, "loss": 0.0425, "step": 107080 }, { "epoch": 41.59, "learning_rate": 1.4454886731391586e-05, "loss": 0.2441, "step": 107090 }, { "epoch": 41.59, "learning_rate": 1.4454368932038835e-05, "loss": 0.0927, "step": 107100 }, { "epoch": 41.6, "learning_rate": 1.4453851132686085e-05, "loss": 0.0172, "step": 107110 }, { "epoch": 41.6, "learning_rate": 1.4453333333333334e-05, "loss": 0.0007, "step": 107120 }, { "epoch": 41.6, "learning_rate": 1.4452815533980584e-05, "loss": 0.0533, "step": 107130 }, { "epoch": 41.61, "learning_rate": 1.4452297734627834e-05, "loss": 0.1533, "step": 107140 }, { "epoch": 41.61, "learning_rate": 1.4451779935275083e-05, "loss": 0.0994, "step": 107150 }, { "epoch": 41.62, "learning_rate": 1.4451262135922331e-05, "loss": 0.0784, "step": 107160 }, { "epoch": 41.62, "learning_rate": 1.4450744336569579e-05, "loss": 0.0782, "step": 107170 }, { "epoch": 41.62, "learning_rate": 1.4450226537216829e-05, "loss": 0.159, "step": 107180 }, { "epoch": 41.63, "learning_rate": 1.4449708737864078e-05, "loss": 0.0476, "step": 107190 }, { "epoch": 41.63, "learning_rate": 1.4449190938511328e-05, "loss": 0.1539, "step": 107200 }, { "epoch": 41.63, "learning_rate": 1.4448673139158578e-05, "loss": 0.1535, "step": 107210 }, { "epoch": 41.64, "learning_rate": 1.4448155339805827e-05, "loss": 0.0607, "step": 107220 }, { "epoch": 41.64, "learning_rate": 1.4447637540453077e-05, "loss": 0.0034, "step": 107230 }, { "epoch": 41.65, "learning_rate": 1.4447119741100323e-05, "loss": 0.2325, "step": 107240 }, { "epoch": 41.65, "learning_rate": 1.4446601941747573e-05, "loss": 0.139, "step": 107250 }, { "epoch": 41.65, "learning_rate": 1.4446084142394822e-05, "loss": 0.059, "step": 107260 }, { "epoch": 41.66, "learning_rate": 1.4445566343042072e-05, "loss": 0.0452, "step": 107270 }, { "epoch": 41.66, "learning_rate": 1.4445048543689322e-05, "loss": 0.193, "step": 107280 }, { "epoch": 41.67, "learning_rate": 1.4444530744336571e-05, "loss": 0.0855, "step": 107290 }, { "epoch": 41.67, "learning_rate": 1.444401294498382e-05, "loss": 0.0604, "step": 107300 }, { "epoch": 41.67, "learning_rate": 1.444349514563107e-05, "loss": 0.1636, "step": 107310 }, { "epoch": 41.68, "learning_rate": 1.4442977346278317e-05, "loss": 0.1498, "step": 107320 }, { "epoch": 41.68, "learning_rate": 1.4442459546925566e-05, "loss": 0.0537, "step": 107330 }, { "epoch": 41.69, "learning_rate": 1.4441941747572816e-05, "loss": 0.043, "step": 107340 }, { "epoch": 41.69, "learning_rate": 1.4441423948220065e-05, "loss": 0.0522, "step": 107350 }, { "epoch": 41.69, "learning_rate": 1.4440906148867315e-05, "loss": 0.1496, "step": 107360 }, { "epoch": 41.7, "learning_rate": 1.4440388349514565e-05, "loss": 0.0681, "step": 107370 }, { "epoch": 41.7, "learning_rate": 1.4439870550161814e-05, "loss": 0.1359, "step": 107380 }, { "epoch": 41.7, "learning_rate": 1.4439352750809064e-05, "loss": 0.16, "step": 107390 }, { "epoch": 41.71, "learning_rate": 1.443883495145631e-05, "loss": 0.1084, "step": 107400 }, { "epoch": 41.71, "learning_rate": 1.443831715210356e-05, "loss": 0.1352, "step": 107410 }, { "epoch": 41.72, "learning_rate": 1.443779935275081e-05, "loss": 0.1554, "step": 107420 }, { "epoch": 41.72, "learning_rate": 1.4437281553398059e-05, "loss": 0.0416, "step": 107430 }, { "epoch": 41.72, "learning_rate": 1.4436763754045309e-05, "loss": 0.0732, "step": 107440 }, { "epoch": 41.73, "learning_rate": 1.4436245954692558e-05, "loss": 0.1191, "step": 107450 }, { "epoch": 41.73, "learning_rate": 1.4435728155339808e-05, "loss": 0.0822, "step": 107460 }, { "epoch": 41.74, "learning_rate": 1.4435210355987058e-05, "loss": 0.0643, "step": 107470 }, { "epoch": 41.74, "learning_rate": 1.4434692556634304e-05, "loss": 0.042, "step": 107480 }, { "epoch": 41.74, "learning_rate": 1.4434174757281553e-05, "loss": 0.121, "step": 107490 }, { "epoch": 41.75, "learning_rate": 1.4433656957928803e-05, "loss": 0.1494, "step": 107500 }, { "epoch": 41.75, "learning_rate": 1.4433139158576053e-05, "loss": 0.0384, "step": 107510 }, { "epoch": 41.76, "learning_rate": 1.4432621359223302e-05, "loss": 0.0252, "step": 107520 }, { "epoch": 41.76, "learning_rate": 1.4432103559870552e-05, "loss": 0.0634, "step": 107530 }, { "epoch": 41.76, "learning_rate": 1.4431585760517801e-05, "loss": 0.2084, "step": 107540 }, { "epoch": 41.77, "learning_rate": 1.4431067961165051e-05, "loss": 0.0539, "step": 107550 }, { "epoch": 41.77, "learning_rate": 1.4430550161812297e-05, "loss": 0.1632, "step": 107560 }, { "epoch": 41.77, "learning_rate": 1.4430032362459547e-05, "loss": 0.0012, "step": 107570 }, { "epoch": 41.78, "learning_rate": 1.4429514563106797e-05, "loss": 0.0259, "step": 107580 }, { "epoch": 41.78, "learning_rate": 1.4428996763754046e-05, "loss": 0.0906, "step": 107590 }, { "epoch": 41.79, "learning_rate": 1.4428478964401296e-05, "loss": 0.146, "step": 107600 }, { "epoch": 41.79, "learning_rate": 1.4427961165048545e-05, "loss": 0.0529, "step": 107610 }, { "epoch": 41.79, "learning_rate": 1.4427443365695795e-05, "loss": 0.0226, "step": 107620 }, { "epoch": 41.8, "learning_rate": 1.4426925566343045e-05, "loss": 0.0814, "step": 107630 }, { "epoch": 41.8, "learning_rate": 1.4426407766990294e-05, "loss": 0.0606, "step": 107640 }, { "epoch": 41.81, "learning_rate": 1.442588996763754e-05, "loss": 0.2571, "step": 107650 }, { "epoch": 41.81, "learning_rate": 1.442537216828479e-05, "loss": 0.1144, "step": 107660 }, { "epoch": 41.81, "learning_rate": 1.442485436893204e-05, "loss": 0.0218, "step": 107670 }, { "epoch": 41.82, "learning_rate": 1.442433656957929e-05, "loss": 0.082, "step": 107680 }, { "epoch": 41.82, "learning_rate": 1.4423818770226539e-05, "loss": 0.0004, "step": 107690 }, { "epoch": 41.83, "learning_rate": 1.4423300970873789e-05, "loss": 0.136, "step": 107700 }, { "epoch": 41.83, "learning_rate": 1.4422783171521038e-05, "loss": 0.1199, "step": 107710 }, { "epoch": 41.83, "learning_rate": 1.4422265372168288e-05, "loss": 0.077, "step": 107720 }, { "epoch": 41.84, "learning_rate": 1.4421747572815534e-05, "loss": 0.0015, "step": 107730 }, { "epoch": 41.84, "learning_rate": 1.4421229773462784e-05, "loss": 0.1094, "step": 107740 }, { "epoch": 41.84, "learning_rate": 1.4420711974110033e-05, "loss": 0.0005, "step": 107750 }, { "epoch": 41.85, "learning_rate": 1.4420194174757283e-05, "loss": 0.0749, "step": 107760 }, { "epoch": 41.85, "learning_rate": 1.4419676375404532e-05, "loss": 0.0443, "step": 107770 }, { "epoch": 41.86, "learning_rate": 1.4419158576051782e-05, "loss": 0.0195, "step": 107780 }, { "epoch": 41.86, "learning_rate": 1.4418640776699032e-05, "loss": 0.0594, "step": 107790 }, { "epoch": 41.86, "learning_rate": 1.4418122977346281e-05, "loss": 0.0118, "step": 107800 }, { "epoch": 41.87, "learning_rate": 1.4417605177993528e-05, "loss": 0.0457, "step": 107810 }, { "epoch": 41.87, "learning_rate": 1.4417087378640777e-05, "loss": 0.0327, "step": 107820 }, { "epoch": 41.88, "learning_rate": 1.4416569579288027e-05, "loss": 0.0659, "step": 107830 }, { "epoch": 41.88, "learning_rate": 1.4416051779935276e-05, "loss": 0.1326, "step": 107840 }, { "epoch": 41.88, "learning_rate": 1.4415533980582526e-05, "loss": 0.0028, "step": 107850 }, { "epoch": 41.89, "learning_rate": 1.4415016181229776e-05, "loss": 0.0004, "step": 107860 }, { "epoch": 41.89, "learning_rate": 1.4414498381877025e-05, "loss": 0.0308, "step": 107870 }, { "epoch": 41.9, "learning_rate": 1.4413980582524273e-05, "loss": 0.061, "step": 107880 }, { "epoch": 41.9, "learning_rate": 1.4413462783171521e-05, "loss": 0.0317, "step": 107890 }, { "epoch": 41.9, "learning_rate": 1.441294498381877e-05, "loss": 0.3771, "step": 107900 }, { "epoch": 41.91, "learning_rate": 1.441242718446602e-05, "loss": 0.0993, "step": 107910 }, { "epoch": 41.91, "learning_rate": 1.441190938511327e-05, "loss": 0.0966, "step": 107920 }, { "epoch": 41.91, "learning_rate": 1.441139158576052e-05, "loss": 0.0325, "step": 107930 }, { "epoch": 41.92, "learning_rate": 1.441087378640777e-05, "loss": 0.1256, "step": 107940 }, { "epoch": 41.92, "learning_rate": 1.4410355987055019e-05, "loss": 0.0293, "step": 107950 }, { "epoch": 41.93, "learning_rate": 1.4409838187702267e-05, "loss": 0.0121, "step": 107960 }, { "epoch": 41.93, "learning_rate": 1.4409320388349515e-05, "loss": 0.1379, "step": 107970 }, { "epoch": 41.93, "learning_rate": 1.4408802588996764e-05, "loss": 0.1667, "step": 107980 }, { "epoch": 41.94, "learning_rate": 1.4408284789644014e-05, "loss": 0.1153, "step": 107990 }, { "epoch": 41.94, "learning_rate": 1.4407766990291264e-05, "loss": 0.1259, "step": 108000 }, { "epoch": 41.95, "learning_rate": 1.4407249190938513e-05, "loss": 0.0505, "step": 108010 }, { "epoch": 41.95, "learning_rate": 1.4406731391585763e-05, "loss": 0.1428, "step": 108020 }, { "epoch": 41.95, "learning_rate": 1.440621359223301e-05, "loss": 0.1565, "step": 108030 }, { "epoch": 41.96, "learning_rate": 1.440569579288026e-05, "loss": 0.1931, "step": 108040 }, { "epoch": 41.96, "learning_rate": 1.4405177993527508e-05, "loss": 0.0551, "step": 108050 }, { "epoch": 41.97, "learning_rate": 1.4404660194174758e-05, "loss": 0.1546, "step": 108060 }, { "epoch": 41.97, "learning_rate": 1.4404142394822007e-05, "loss": 0.0484, "step": 108070 }, { "epoch": 41.97, "learning_rate": 1.4403624595469257e-05, "loss": 0.2425, "step": 108080 }, { "epoch": 41.98, "learning_rate": 1.4403106796116507e-05, "loss": 0.0775, "step": 108090 }, { "epoch": 41.98, "learning_rate": 1.4402588996763756e-05, "loss": 0.1118, "step": 108100 }, { "epoch": 41.98, "learning_rate": 1.4402071197411004e-05, "loss": 0.0833, "step": 108110 }, { "epoch": 41.99, "learning_rate": 1.4401553398058254e-05, "loss": 0.0532, "step": 108120 }, { "epoch": 41.99, "learning_rate": 1.4401035598705502e-05, "loss": 0.2897, "step": 108130 }, { "epoch": 42.0, "learning_rate": 1.4400517799352751e-05, "loss": 0.0734, "step": 108140 }, { "epoch": 42.0, "learning_rate": 1.4400000000000001e-05, "loss": 0.0018, "step": 108150 }, { "epoch": 42.0, "eval_accuracy": 0.951856946354883, "eval_loss": 0.3062381148338318, "eval_runtime": 8.1991, "eval_samples_per_second": 443.341, "eval_steps_per_second": 55.494, "step": 108150 }, { "epoch": 42.0, "learning_rate": 1.439948220064725e-05, "loss": 0.1145, "step": 108160 }, { "epoch": 42.01, "learning_rate": 1.43989644012945e-05, "loss": 0.0497, "step": 108170 }, { "epoch": 42.01, "learning_rate": 1.4398446601941748e-05, "loss": 0.1018, "step": 108180 }, { "epoch": 42.02, "learning_rate": 1.4397928802588998e-05, "loss": 0.0391, "step": 108190 }, { "epoch": 42.02, "learning_rate": 1.4397411003236247e-05, "loss": 0.2001, "step": 108200 }, { "epoch": 42.02, "learning_rate": 1.4396893203883497e-05, "loss": 0.0848, "step": 108210 }, { "epoch": 42.03, "learning_rate": 1.4396375404530745e-05, "loss": 0.0906, "step": 108220 }, { "epoch": 42.03, "learning_rate": 1.4395857605177995e-05, "loss": 0.2576, "step": 108230 }, { "epoch": 42.03, "learning_rate": 1.4395339805825244e-05, "loss": 0.062, "step": 108240 }, { "epoch": 42.04, "learning_rate": 1.4394822006472494e-05, "loss": 0.1169, "step": 108250 }, { "epoch": 42.04, "learning_rate": 1.4394304207119742e-05, "loss": 0.0024, "step": 108260 }, { "epoch": 42.05, "learning_rate": 1.4393786407766991e-05, "loss": 0.0312, "step": 108270 }, { "epoch": 42.05, "learning_rate": 1.4393268608414241e-05, "loss": 0.116, "step": 108280 }, { "epoch": 42.05, "learning_rate": 1.439275080906149e-05, "loss": 0.0845, "step": 108290 }, { "epoch": 42.06, "learning_rate": 1.4392233009708739e-05, "loss": 0.0468, "step": 108300 }, { "epoch": 42.06, "learning_rate": 1.4391715210355988e-05, "loss": 0.0546, "step": 108310 }, { "epoch": 42.07, "learning_rate": 1.4391197411003238e-05, "loss": 0.0552, "step": 108320 }, { "epoch": 42.07, "learning_rate": 1.4390679611650487e-05, "loss": 0.0781, "step": 108330 }, { "epoch": 42.07, "learning_rate": 1.4390161812297735e-05, "loss": 0.145, "step": 108340 }, { "epoch": 42.08, "learning_rate": 1.4389644012944985e-05, "loss": 0.0225, "step": 108350 }, { "epoch": 42.08, "learning_rate": 1.4389126213592235e-05, "loss": 0.142, "step": 108360 }, { "epoch": 42.09, "learning_rate": 1.4388608414239484e-05, "loss": 0.0222, "step": 108370 }, { "epoch": 42.09, "learning_rate": 1.4388090614886732e-05, "loss": 0.0384, "step": 108380 }, { "epoch": 42.09, "learning_rate": 1.4387572815533982e-05, "loss": 0.0186, "step": 108390 }, { "epoch": 42.1, "learning_rate": 1.4387055016181231e-05, "loss": 0.0383, "step": 108400 }, { "epoch": 42.1, "learning_rate": 1.438653721682848e-05, "loss": 0.0778, "step": 108410 }, { "epoch": 42.1, "learning_rate": 1.4386019417475729e-05, "loss": 0.0546, "step": 108420 }, { "epoch": 42.11, "learning_rate": 1.4385501618122978e-05, "loss": 0.0316, "step": 108430 }, { "epoch": 42.11, "learning_rate": 1.4384983818770228e-05, "loss": 0.0102, "step": 108440 }, { "epoch": 42.12, "learning_rate": 1.4384466019417478e-05, "loss": 0.1168, "step": 108450 }, { "epoch": 42.12, "learning_rate": 1.4383948220064726e-05, "loss": 0.1587, "step": 108460 }, { "epoch": 42.12, "learning_rate": 1.4383430420711975e-05, "loss": 0.0445, "step": 108470 }, { "epoch": 42.13, "learning_rate": 1.4382912621359225e-05, "loss": 0.3303, "step": 108480 }, { "epoch": 42.13, "learning_rate": 1.4382394822006473e-05, "loss": 0.0989, "step": 108490 }, { "epoch": 42.14, "learning_rate": 1.4381877022653722e-05, "loss": 0.0632, "step": 108500 }, { "epoch": 42.14, "learning_rate": 1.4381359223300972e-05, "loss": 0.0839, "step": 108510 }, { "epoch": 42.14, "learning_rate": 1.4380841423948222e-05, "loss": 0.0724, "step": 108520 }, { "epoch": 42.15, "learning_rate": 1.4380323624595471e-05, "loss": 0.1131, "step": 108530 }, { "epoch": 42.15, "learning_rate": 1.437980582524272e-05, "loss": 0.0477, "step": 108540 }, { "epoch": 42.16, "learning_rate": 1.4379288025889969e-05, "loss": 0.1338, "step": 108550 }, { "epoch": 42.16, "learning_rate": 1.4378770226537217e-05, "loss": 0.0363, "step": 108560 }, { "epoch": 42.16, "learning_rate": 1.4378252427184466e-05, "loss": 0.0934, "step": 108570 }, { "epoch": 42.17, "learning_rate": 1.4377734627831716e-05, "loss": 0.0422, "step": 108580 }, { "epoch": 42.17, "learning_rate": 1.4377216828478966e-05, "loss": 0.0099, "step": 108590 }, { "epoch": 42.17, "learning_rate": 1.4376699029126215e-05, "loss": 0.0449, "step": 108600 }, { "epoch": 42.18, "learning_rate": 1.4376181229773465e-05, "loss": 0.0017, "step": 108610 }, { "epoch": 42.18, "learning_rate": 1.4375663430420713e-05, "loss": 0.0563, "step": 108620 }, { "epoch": 42.19, "learning_rate": 1.4375145631067962e-05, "loss": 0.1052, "step": 108630 }, { "epoch": 42.19, "learning_rate": 1.437462783171521e-05, "loss": 0.0569, "step": 108640 }, { "epoch": 42.19, "learning_rate": 1.437411003236246e-05, "loss": 0.0908, "step": 108650 }, { "epoch": 42.2, "learning_rate": 1.437359223300971e-05, "loss": 0.0908, "step": 108660 }, { "epoch": 42.2, "learning_rate": 1.437307443365696e-05, "loss": 0.1022, "step": 108670 }, { "epoch": 42.21, "learning_rate": 1.4372556634304209e-05, "loss": 0.2435, "step": 108680 }, { "epoch": 42.21, "learning_rate": 1.4372038834951458e-05, "loss": 0.0425, "step": 108690 }, { "epoch": 42.21, "learning_rate": 1.4371521035598706e-05, "loss": 0.0185, "step": 108700 }, { "epoch": 42.22, "learning_rate": 1.4371003236245954e-05, "loss": 0.0525, "step": 108710 }, { "epoch": 42.22, "learning_rate": 1.4370485436893204e-05, "loss": 0.0953, "step": 108720 }, { "epoch": 42.23, "learning_rate": 1.4369967637540453e-05, "loss": 0.1887, "step": 108730 }, { "epoch": 42.23, "learning_rate": 1.4369449838187703e-05, "loss": 0.0085, "step": 108740 }, { "epoch": 42.23, "learning_rate": 1.4368932038834953e-05, "loss": 0.073, "step": 108750 }, { "epoch": 42.24, "learning_rate": 1.4368414239482202e-05, "loss": 0.0631, "step": 108760 }, { "epoch": 42.24, "learning_rate": 1.4367896440129452e-05, "loss": 0.0789, "step": 108770 }, { "epoch": 42.24, "learning_rate": 1.4367378640776702e-05, "loss": 0.1682, "step": 108780 }, { "epoch": 42.25, "learning_rate": 1.4366860841423948e-05, "loss": 0.043, "step": 108790 }, { "epoch": 42.25, "learning_rate": 1.4366343042071197e-05, "loss": 0.0397, "step": 108800 }, { "epoch": 42.26, "learning_rate": 1.4365825242718447e-05, "loss": 0.1086, "step": 108810 }, { "epoch": 42.26, "learning_rate": 1.4365307443365697e-05, "loss": 0.0721, "step": 108820 }, { "epoch": 42.26, "learning_rate": 1.4364789644012946e-05, "loss": 0.1072, "step": 108830 }, { "epoch": 42.27, "learning_rate": 1.4364271844660196e-05, "loss": 0.0111, "step": 108840 }, { "epoch": 42.27, "learning_rate": 1.4363754045307446e-05, "loss": 0.0864, "step": 108850 }, { "epoch": 42.28, "learning_rate": 1.4363236245954695e-05, "loss": 0.0892, "step": 108860 }, { "epoch": 42.28, "learning_rate": 1.4362718446601941e-05, "loss": 0.0006, "step": 108870 }, { "epoch": 42.28, "learning_rate": 1.4362200647249191e-05, "loss": 0.0423, "step": 108880 }, { "epoch": 42.29, "learning_rate": 1.436168284789644e-05, "loss": 0.0112, "step": 108890 }, { "epoch": 42.29, "learning_rate": 1.436116504854369e-05, "loss": 0.2227, "step": 108900 }, { "epoch": 42.3, "learning_rate": 1.436064724919094e-05, "loss": 0.0289, "step": 108910 }, { "epoch": 42.3, "learning_rate": 1.436012944983819e-05, "loss": 0.055, "step": 108920 }, { "epoch": 42.3, "learning_rate": 1.4359611650485439e-05, "loss": 0.0445, "step": 108930 }, { "epoch": 42.31, "learning_rate": 1.4359093851132689e-05, "loss": 0.1008, "step": 108940 }, { "epoch": 42.31, "learning_rate": 1.4358576051779935e-05, "loss": 0.0613, "step": 108950 }, { "epoch": 42.31, "learning_rate": 1.4358058252427185e-05, "loss": 0.035, "step": 108960 }, { "epoch": 42.32, "learning_rate": 1.4357540453074434e-05, "loss": 0.044, "step": 108970 }, { "epoch": 42.32, "learning_rate": 1.4357022653721684e-05, "loss": 0.1074, "step": 108980 }, { "epoch": 42.33, "learning_rate": 1.4356504854368933e-05, "loss": 0.0143, "step": 108990 }, { "epoch": 42.33, "learning_rate": 1.4355987055016183e-05, "loss": 0.0813, "step": 109000 }, { "epoch": 42.33, "learning_rate": 1.4355469255663433e-05, "loss": 0.11, "step": 109010 }, { "epoch": 42.34, "learning_rate": 1.4354951456310682e-05, "loss": 0.1026, "step": 109020 }, { "epoch": 42.34, "learning_rate": 1.4354433656957928e-05, "loss": 0.0651, "step": 109030 }, { "epoch": 42.35, "learning_rate": 1.4353915857605178e-05, "loss": 0.0922, "step": 109040 }, { "epoch": 42.35, "learning_rate": 1.4353398058252428e-05, "loss": 0.2437, "step": 109050 }, { "epoch": 42.35, "learning_rate": 1.4352880258899677e-05, "loss": 0.0151, "step": 109060 }, { "epoch": 42.36, "learning_rate": 1.4352362459546927e-05, "loss": 0.0438, "step": 109070 }, { "epoch": 42.36, "learning_rate": 1.4351844660194177e-05, "loss": 0.1338, "step": 109080 }, { "epoch": 42.37, "learning_rate": 1.4351326860841426e-05, "loss": 0.0701, "step": 109090 }, { "epoch": 42.37, "learning_rate": 1.4350809061488676e-05, "loss": 0.0704, "step": 109100 }, { "epoch": 42.37, "learning_rate": 1.4350291262135922e-05, "loss": 0.0279, "step": 109110 }, { "epoch": 42.38, "learning_rate": 1.4349773462783172e-05, "loss": 0.1986, "step": 109120 }, { "epoch": 42.38, "learning_rate": 1.4349255663430421e-05, "loss": 0.0574, "step": 109130 }, { "epoch": 42.38, "learning_rate": 1.4348737864077671e-05, "loss": 0.0883, "step": 109140 }, { "epoch": 42.39, "learning_rate": 1.434822006472492e-05, "loss": 0.0254, "step": 109150 }, { "epoch": 42.39, "learning_rate": 1.434770226537217e-05, "loss": 0.0012, "step": 109160 }, { "epoch": 42.4, "learning_rate": 1.434718446601942e-05, "loss": 0.0166, "step": 109170 }, { "epoch": 42.4, "learning_rate": 1.434666666666667e-05, "loss": 0.0087, "step": 109180 }, { "epoch": 42.4, "learning_rate": 1.4346148867313916e-05, "loss": 0.0371, "step": 109190 }, { "epoch": 42.41, "learning_rate": 1.4345631067961165e-05, "loss": 0.092, "step": 109200 }, { "epoch": 42.41, "learning_rate": 1.4345113268608415e-05, "loss": 0.0889, "step": 109210 }, { "epoch": 42.42, "learning_rate": 1.4344595469255664e-05, "loss": 0.061, "step": 109220 }, { "epoch": 42.42, "learning_rate": 1.4344077669902914e-05, "loss": 0.0156, "step": 109230 }, { "epoch": 42.42, "learning_rate": 1.4343559870550164e-05, "loss": 0.0286, "step": 109240 }, { "epoch": 42.43, "learning_rate": 1.4343042071197413e-05, "loss": 0.106, "step": 109250 }, { "epoch": 42.43, "learning_rate": 1.4342524271844663e-05, "loss": 0.0132, "step": 109260 }, { "epoch": 42.43, "learning_rate": 1.4342006472491909e-05, "loss": 0.0771, "step": 109270 }, { "epoch": 42.44, "learning_rate": 1.4341488673139159e-05, "loss": 0.0751, "step": 109280 }, { "epoch": 42.44, "learning_rate": 1.4340970873786408e-05, "loss": 0.1707, "step": 109290 }, { "epoch": 42.45, "learning_rate": 1.4340453074433658e-05, "loss": 0.0452, "step": 109300 }, { "epoch": 42.45, "learning_rate": 1.4339935275080908e-05, "loss": 0.0155, "step": 109310 }, { "epoch": 42.45, "learning_rate": 1.4339417475728157e-05, "loss": 0.0225, "step": 109320 }, { "epoch": 42.46, "learning_rate": 1.4338899676375407e-05, "loss": 0.0431, "step": 109330 }, { "epoch": 42.46, "learning_rate": 1.4338381877022656e-05, "loss": 0.2571, "step": 109340 }, { "epoch": 42.47, "learning_rate": 1.4337864077669904e-05, "loss": 0.2743, "step": 109350 }, { "epoch": 42.47, "learning_rate": 1.4337346278317152e-05, "loss": 0.0286, "step": 109360 }, { "epoch": 42.47, "learning_rate": 1.4336828478964402e-05, "loss": 0.0522, "step": 109370 }, { "epoch": 42.48, "learning_rate": 1.4336310679611652e-05, "loss": 0.0927, "step": 109380 }, { "epoch": 42.48, "learning_rate": 1.4335792880258901e-05, "loss": 0.1181, "step": 109390 }, { "epoch": 42.49, "learning_rate": 1.433527508090615e-05, "loss": 0.0794, "step": 109400 }, { "epoch": 42.49, "learning_rate": 1.43347572815534e-05, "loss": 0.0968, "step": 109410 }, { "epoch": 42.49, "learning_rate": 1.433423948220065e-05, "loss": 0.3346, "step": 109420 }, { "epoch": 42.5, "learning_rate": 1.4333721682847898e-05, "loss": 0.1778, "step": 109430 }, { "epoch": 42.5, "learning_rate": 1.4333203883495146e-05, "loss": 0.1185, "step": 109440 }, { "epoch": 42.5, "learning_rate": 1.4332686084142395e-05, "loss": 0.128, "step": 109450 }, { "epoch": 42.51, "learning_rate": 1.4332168284789645e-05, "loss": 0.0931, "step": 109460 }, { "epoch": 42.51, "learning_rate": 1.4331650485436895e-05, "loss": 0.1722, "step": 109470 }, { "epoch": 42.52, "learning_rate": 1.4331132686084144e-05, "loss": 0.1178, "step": 109480 }, { "epoch": 42.52, "learning_rate": 1.4330614886731394e-05, "loss": 0.1376, "step": 109490 }, { "epoch": 42.52, "learning_rate": 1.4330097087378642e-05, "loss": 0.0381, "step": 109500 }, { "epoch": 42.53, "learning_rate": 1.4329579288025891e-05, "loss": 0.0153, "step": 109510 }, { "epoch": 42.53, "learning_rate": 1.432906148867314e-05, "loss": 0.1925, "step": 109520 }, { "epoch": 42.54, "learning_rate": 1.4328543689320389e-05, "loss": 0.0316, "step": 109530 }, { "epoch": 42.54, "learning_rate": 1.4328025889967639e-05, "loss": 0.0603, "step": 109540 }, { "epoch": 42.54, "learning_rate": 1.4327508090614888e-05, "loss": 0.0873, "step": 109550 }, { "epoch": 42.55, "learning_rate": 1.4326990291262138e-05, "loss": 0.1117, "step": 109560 }, { "epoch": 42.55, "learning_rate": 1.4326472491909388e-05, "loss": 0.013, "step": 109570 }, { "epoch": 42.56, "learning_rate": 1.4325954692556635e-05, "loss": 0.099, "step": 109580 }, { "epoch": 42.56, "learning_rate": 1.4325436893203885e-05, "loss": 0.1597, "step": 109590 }, { "epoch": 42.56, "learning_rate": 1.4324919093851133e-05, "loss": 0.018, "step": 109600 }, { "epoch": 42.57, "learning_rate": 1.4324401294498383e-05, "loss": 0.0563, "step": 109610 }, { "epoch": 42.57, "learning_rate": 1.4323883495145632e-05, "loss": 0.189, "step": 109620 }, { "epoch": 42.57, "learning_rate": 1.4323365695792882e-05, "loss": 0.0139, "step": 109630 }, { "epoch": 42.58, "learning_rate": 1.4322847896440131e-05, "loss": 0.1256, "step": 109640 }, { "epoch": 42.58, "learning_rate": 1.432233009708738e-05, "loss": 0.0891, "step": 109650 }, { "epoch": 42.59, "learning_rate": 1.4321812297734629e-05, "loss": 0.0511, "step": 109660 }, { "epoch": 42.59, "learning_rate": 1.4321294498381879e-05, "loss": 0.0906, "step": 109670 }, { "epoch": 42.59, "learning_rate": 1.4320776699029127e-05, "loss": 0.1231, "step": 109680 }, { "epoch": 42.6, "learning_rate": 1.4320258899676376e-05, "loss": 0.0159, "step": 109690 }, { "epoch": 42.6, "learning_rate": 1.4319741100323626e-05, "loss": 0.0003, "step": 109700 }, { "epoch": 42.61, "learning_rate": 1.4319223300970875e-05, "loss": 0.269, "step": 109710 }, { "epoch": 42.61, "learning_rate": 1.4318705501618125e-05, "loss": 0.2327, "step": 109720 }, { "epoch": 42.61, "learning_rate": 1.4318187702265373e-05, "loss": 0.0519, "step": 109730 }, { "epoch": 42.62, "learning_rate": 1.4317669902912623e-05, "loss": 0.0884, "step": 109740 }, { "epoch": 42.62, "learning_rate": 1.4317152103559872e-05, "loss": 0.0247, "step": 109750 }, { "epoch": 42.63, "learning_rate": 1.431663430420712e-05, "loss": 0.0819, "step": 109760 }, { "epoch": 42.63, "learning_rate": 1.431611650485437e-05, "loss": 0.0719, "step": 109770 }, { "epoch": 42.63, "learning_rate": 1.431559870550162e-05, "loss": 0.0951, "step": 109780 }, { "epoch": 42.64, "learning_rate": 1.4315080906148869e-05, "loss": 0.0333, "step": 109790 }, { "epoch": 42.64, "learning_rate": 1.4314563106796117e-05, "loss": 0.1231, "step": 109800 }, { "epoch": 42.64, "learning_rate": 1.4314045307443366e-05, "loss": 0.0394, "step": 109810 }, { "epoch": 42.65, "learning_rate": 1.4313527508090616e-05, "loss": 0.1134, "step": 109820 }, { "epoch": 42.65, "learning_rate": 1.4313009708737866e-05, "loss": 0.0469, "step": 109830 }, { "epoch": 42.66, "learning_rate": 1.4312491909385114e-05, "loss": 0.1567, "step": 109840 }, { "epoch": 42.66, "learning_rate": 1.4311974110032363e-05, "loss": 0.1041, "step": 109850 }, { "epoch": 42.66, "learning_rate": 1.4311456310679613e-05, "loss": 0.0702, "step": 109860 }, { "epoch": 42.67, "learning_rate": 1.4310938511326863e-05, "loss": 0.0218, "step": 109870 }, { "epoch": 42.67, "learning_rate": 1.431042071197411e-05, "loss": 0.0908, "step": 109880 }, { "epoch": 42.68, "learning_rate": 1.430990291262136e-05, "loss": 0.006, "step": 109890 }, { "epoch": 42.68, "learning_rate": 1.430938511326861e-05, "loss": 0.0483, "step": 109900 }, { "epoch": 42.68, "learning_rate": 1.430886731391586e-05, "loss": 0.0312, "step": 109910 }, { "epoch": 42.69, "learning_rate": 1.4308349514563109e-05, "loss": 0.1271, "step": 109920 }, { "epoch": 42.69, "learning_rate": 1.4307831715210357e-05, "loss": 0.1347, "step": 109930 }, { "epoch": 42.7, "learning_rate": 1.4307313915857606e-05, "loss": 0.1291, "step": 109940 }, { "epoch": 42.7, "learning_rate": 1.4306796116504856e-05, "loss": 0.0849, "step": 109950 }, { "epoch": 42.7, "learning_rate": 1.4306278317152104e-05, "loss": 0.0307, "step": 109960 }, { "epoch": 42.71, "learning_rate": 1.4305760517799354e-05, "loss": 0.0835, "step": 109970 }, { "epoch": 42.71, "learning_rate": 1.4305242718446603e-05, "loss": 0.0686, "step": 109980 }, { "epoch": 42.71, "learning_rate": 1.4304724919093853e-05, "loss": 0.0107, "step": 109990 }, { "epoch": 42.72, "learning_rate": 1.4304207119741102e-05, "loss": 0.0375, "step": 110000 }, { "epoch": 42.72, "learning_rate": 1.430368932038835e-05, "loss": 0.0259, "step": 110010 }, { "epoch": 42.73, "learning_rate": 1.43031715210356e-05, "loss": 0.1343, "step": 110020 }, { "epoch": 42.73, "learning_rate": 1.4302653721682848e-05, "loss": 0.2079, "step": 110030 }, { "epoch": 42.73, "learning_rate": 1.4302135922330098e-05, "loss": 0.1439, "step": 110040 }, { "epoch": 42.74, "learning_rate": 1.4301618122977347e-05, "loss": 0.0051, "step": 110050 }, { "epoch": 42.74, "learning_rate": 1.4301100323624597e-05, "loss": 0.0445, "step": 110060 }, { "epoch": 42.75, "learning_rate": 1.4300582524271846e-05, "loss": 0.1062, "step": 110070 }, { "epoch": 42.75, "learning_rate": 1.4300064724919096e-05, "loss": 0.1363, "step": 110080 }, { "epoch": 42.75, "learning_rate": 1.4299546925566344e-05, "loss": 0.0486, "step": 110090 }, { "epoch": 42.76, "learning_rate": 1.4299029126213594e-05, "loss": 0.1843, "step": 110100 }, { "epoch": 42.76, "learning_rate": 1.4298511326860841e-05, "loss": 0.0623, "step": 110110 }, { "epoch": 42.77, "learning_rate": 1.4297993527508091e-05, "loss": 0.0773, "step": 110120 }, { "epoch": 42.77, "learning_rate": 1.429747572815534e-05, "loss": 0.2387, "step": 110130 }, { "epoch": 42.77, "learning_rate": 1.429695792880259e-05, "loss": 0.2098, "step": 110140 }, { "epoch": 42.78, "learning_rate": 1.429644012944984e-05, "loss": 0.1471, "step": 110150 }, { "epoch": 42.78, "learning_rate": 1.429592233009709e-05, "loss": 0.0314, "step": 110160 }, { "epoch": 42.78, "learning_rate": 1.4295404530744337e-05, "loss": 0.0697, "step": 110170 }, { "epoch": 42.79, "learning_rate": 1.4294886731391585e-05, "loss": 0.0047, "step": 110180 }, { "epoch": 42.79, "learning_rate": 1.4294368932038835e-05, "loss": 0.1226, "step": 110190 }, { "epoch": 42.8, "learning_rate": 1.4293851132686085e-05, "loss": 0.133, "step": 110200 }, { "epoch": 42.8, "learning_rate": 1.4293333333333334e-05, "loss": 0.2022, "step": 110210 }, { "epoch": 42.8, "learning_rate": 1.4292815533980584e-05, "loss": 0.1301, "step": 110220 }, { "epoch": 42.81, "learning_rate": 1.4292297734627834e-05, "loss": 0.1459, "step": 110230 }, { "epoch": 42.81, "learning_rate": 1.4291779935275083e-05, "loss": 0.0556, "step": 110240 }, { "epoch": 42.82, "learning_rate": 1.4291262135922331e-05, "loss": 0.0282, "step": 110250 }, { "epoch": 42.82, "learning_rate": 1.4290744336569579e-05, "loss": 0.2028, "step": 110260 }, { "epoch": 42.82, "learning_rate": 1.4290226537216829e-05, "loss": 0.0475, "step": 110270 }, { "epoch": 42.83, "learning_rate": 1.4289708737864078e-05, "loss": 0.0848, "step": 110280 }, { "epoch": 42.83, "learning_rate": 1.4289190938511328e-05, "loss": 0.0042, "step": 110290 }, { "epoch": 42.83, "learning_rate": 1.4288673139158577e-05, "loss": 0.0191, "step": 110300 }, { "epoch": 42.84, "learning_rate": 1.4288155339805827e-05, "loss": 0.142, "step": 110310 }, { "epoch": 42.84, "learning_rate": 1.4287637540453077e-05, "loss": 0.1659, "step": 110320 }, { "epoch": 42.85, "learning_rate": 1.4287119741100323e-05, "loss": 0.1027, "step": 110330 }, { "epoch": 42.85, "learning_rate": 1.4286601941747573e-05, "loss": 0.0458, "step": 110340 }, { "epoch": 42.85, "learning_rate": 1.4286084142394822e-05, "loss": 0.034, "step": 110350 }, { "epoch": 42.86, "learning_rate": 1.4285566343042072e-05, "loss": 0.2063, "step": 110360 }, { "epoch": 42.86, "learning_rate": 1.4285048543689321e-05, "loss": 0.0816, "step": 110370 }, { "epoch": 42.87, "learning_rate": 1.4284530744336571e-05, "loss": 0.0492, "step": 110380 }, { "epoch": 42.87, "learning_rate": 1.428401294498382e-05, "loss": 0.0252, "step": 110390 }, { "epoch": 42.87, "learning_rate": 1.428349514563107e-05, "loss": 0.026, "step": 110400 }, { "epoch": 42.88, "learning_rate": 1.4282977346278316e-05, "loss": 0.04, "step": 110410 }, { "epoch": 42.88, "learning_rate": 1.4282459546925566e-05, "loss": 0.1553, "step": 110420 }, { "epoch": 42.89, "learning_rate": 1.4281941747572816e-05, "loss": 0.128, "step": 110430 }, { "epoch": 42.89, "learning_rate": 1.4281423948220065e-05, "loss": 0.1954, "step": 110440 }, { "epoch": 42.89, "learning_rate": 1.4280906148867315e-05, "loss": 0.1066, "step": 110450 }, { "epoch": 42.9, "learning_rate": 1.4280388349514565e-05, "loss": 0.0824, "step": 110460 }, { "epoch": 42.9, "learning_rate": 1.4279870550161814e-05, "loss": 0.0309, "step": 110470 }, { "epoch": 42.9, "learning_rate": 1.4279352750809064e-05, "loss": 0.0743, "step": 110480 }, { "epoch": 42.91, "learning_rate": 1.4278834951456313e-05, "loss": 0.103, "step": 110490 }, { "epoch": 42.91, "learning_rate": 1.427831715210356e-05, "loss": 0.0618, "step": 110500 }, { "epoch": 42.92, "learning_rate": 1.427779935275081e-05, "loss": 0.0915, "step": 110510 }, { "epoch": 42.92, "learning_rate": 1.4277281553398059e-05, "loss": 0.0144, "step": 110520 }, { "epoch": 42.92, "learning_rate": 1.4276763754045308e-05, "loss": 0.0338, "step": 110530 }, { "epoch": 42.93, "learning_rate": 1.4276245954692558e-05, "loss": 0.084, "step": 110540 }, { "epoch": 42.93, "learning_rate": 1.4275728155339808e-05, "loss": 0.2384, "step": 110550 }, { "epoch": 42.94, "learning_rate": 1.4275210355987057e-05, "loss": 0.0006, "step": 110560 }, { "epoch": 42.94, "learning_rate": 1.4274692556634307e-05, "loss": 0.1204, "step": 110570 }, { "epoch": 42.94, "learning_rate": 1.4274174757281553e-05, "loss": 0.0492, "step": 110580 }, { "epoch": 42.95, "learning_rate": 1.4273656957928803e-05, "loss": 0.0316, "step": 110590 }, { "epoch": 42.95, "learning_rate": 1.4273139158576052e-05, "loss": 0.0651, "step": 110600 }, { "epoch": 42.96, "learning_rate": 1.4272621359223302e-05, "loss": 0.1892, "step": 110610 }, { "epoch": 42.96, "learning_rate": 1.4272103559870552e-05, "loss": 0.0738, "step": 110620 }, { "epoch": 42.96, "learning_rate": 1.4271585760517801e-05, "loss": 0.0482, "step": 110630 }, { "epoch": 42.97, "learning_rate": 1.4271067961165051e-05, "loss": 0.138, "step": 110640 }, { "epoch": 42.97, "learning_rate": 1.42705501618123e-05, "loss": 0.0794, "step": 110650 }, { "epoch": 42.97, "learning_rate": 1.4270032362459547e-05, "loss": 0.1457, "step": 110660 }, { "epoch": 42.98, "learning_rate": 1.4269514563106796e-05, "loss": 0.09, "step": 110670 }, { "epoch": 42.98, "learning_rate": 1.4268996763754046e-05, "loss": 0.0339, "step": 110680 }, { "epoch": 42.99, "learning_rate": 1.4268478964401296e-05, "loss": 0.1134, "step": 110690 }, { "epoch": 42.99, "learning_rate": 1.4267961165048545e-05, "loss": 0.0799, "step": 110700 }, { "epoch": 42.99, "learning_rate": 1.4267443365695795e-05, "loss": 0.2117, "step": 110710 }, { "epoch": 43.0, "learning_rate": 1.4266925566343044e-05, "loss": 0.0287, "step": 110720 }, { "epoch": 43.0, "eval_accuracy": 0.9469050894085282, "eval_loss": 0.3224157989025116, "eval_runtime": 8.2286, "eval_samples_per_second": 441.75, "eval_steps_per_second": 55.295, "step": 110725 }, { "epoch": 43.0, "learning_rate": 1.4266407766990294e-05, "loss": 0.1755, "step": 110730 }, { "epoch": 43.01, "learning_rate": 1.426588996763754e-05, "loss": 0.0774, "step": 110740 }, { "epoch": 43.01, "learning_rate": 1.426537216828479e-05, "loss": 0.0013, "step": 110750 }, { "epoch": 43.01, "learning_rate": 1.426485436893204e-05, "loss": 0.0362, "step": 110760 }, { "epoch": 43.02, "learning_rate": 1.426433656957929e-05, "loss": 0.1009, "step": 110770 }, { "epoch": 43.02, "learning_rate": 1.4263818770226539e-05, "loss": 0.0583, "step": 110780 }, { "epoch": 43.03, "learning_rate": 1.4263300970873788e-05, "loss": 0.0027, "step": 110790 }, { "epoch": 43.03, "learning_rate": 1.4262783171521038e-05, "loss": 0.1513, "step": 110800 }, { "epoch": 43.03, "learning_rate": 1.4262265372168288e-05, "loss": 0.1833, "step": 110810 }, { "epoch": 43.04, "learning_rate": 1.4261747572815534e-05, "loss": 0.1306, "step": 110820 }, { "epoch": 43.04, "learning_rate": 1.4261229773462783e-05, "loss": 0.1056, "step": 110830 }, { "epoch": 43.04, "learning_rate": 1.4260711974110033e-05, "loss": 0.0833, "step": 110840 }, { "epoch": 43.05, "learning_rate": 1.4260194174757283e-05, "loss": 0.0332, "step": 110850 }, { "epoch": 43.05, "learning_rate": 1.4259676375404532e-05, "loss": 0.0687, "step": 110860 }, { "epoch": 43.06, "learning_rate": 1.4259158576051782e-05, "loss": 0.0733, "step": 110870 }, { "epoch": 43.06, "learning_rate": 1.4258640776699032e-05, "loss": 0.0021, "step": 110880 }, { "epoch": 43.06, "learning_rate": 1.4258122977346281e-05, "loss": 0.0037, "step": 110890 }, { "epoch": 43.07, "learning_rate": 1.4257605177993527e-05, "loss": 0.0248, "step": 110900 }, { "epoch": 43.07, "learning_rate": 1.4257087378640777e-05, "loss": 0.0079, "step": 110910 }, { "epoch": 43.08, "learning_rate": 1.4256569579288027e-05, "loss": 0.037, "step": 110920 }, { "epoch": 43.08, "learning_rate": 1.4256051779935276e-05, "loss": 0.0703, "step": 110930 }, { "epoch": 43.08, "learning_rate": 1.4255533980582526e-05, "loss": 0.0452, "step": 110940 }, { "epoch": 43.09, "learning_rate": 1.4255016181229776e-05, "loss": 0.0452, "step": 110950 }, { "epoch": 43.09, "learning_rate": 1.4254498381877025e-05, "loss": 0.0781, "step": 110960 }, { "epoch": 43.1, "learning_rate": 1.4253980582524273e-05, "loss": 0.0027, "step": 110970 }, { "epoch": 43.1, "learning_rate": 1.4253462783171521e-05, "loss": 0.0635, "step": 110980 }, { "epoch": 43.1, "learning_rate": 1.425294498381877e-05, "loss": 0.0578, "step": 110990 }, { "epoch": 43.11, "learning_rate": 1.425242718446602e-05, "loss": 0.0858, "step": 111000 }, { "epoch": 43.11, "learning_rate": 1.425190938511327e-05, "loss": 0.0912, "step": 111010 }, { "epoch": 43.11, "learning_rate": 1.425139158576052e-05, "loss": 0.0547, "step": 111020 }, { "epoch": 43.12, "learning_rate": 1.4250873786407769e-05, "loss": 0.0836, "step": 111030 }, { "epoch": 43.12, "learning_rate": 1.4250355987055019e-05, "loss": 0.1152, "step": 111040 }, { "epoch": 43.13, "learning_rate": 1.4249838187702267e-05, "loss": 0.0006, "step": 111050 }, { "epoch": 43.13, "learning_rate": 1.4249320388349516e-05, "loss": 0.0921, "step": 111060 }, { "epoch": 43.13, "learning_rate": 1.4248802588996764e-05, "loss": 0.1454, "step": 111070 }, { "epoch": 43.14, "learning_rate": 1.4248284789644014e-05, "loss": 0.032, "step": 111080 }, { "epoch": 43.14, "learning_rate": 1.4247766990291263e-05, "loss": 0.1344, "step": 111090 }, { "epoch": 43.15, "learning_rate": 1.4247249190938513e-05, "loss": 0.1054, "step": 111100 }, { "epoch": 43.15, "learning_rate": 1.4246731391585763e-05, "loss": 0.0637, "step": 111110 }, { "epoch": 43.15, "learning_rate": 1.424621359223301e-05, "loss": 0.1001, "step": 111120 }, { "epoch": 43.16, "learning_rate": 1.424569579288026e-05, "loss": 0.1256, "step": 111130 }, { "epoch": 43.16, "learning_rate": 1.424517799352751e-05, "loss": 0.0185, "step": 111140 }, { "epoch": 43.17, "learning_rate": 1.4244660194174758e-05, "loss": 0.0715, "step": 111150 }, { "epoch": 43.17, "learning_rate": 1.4244142394822007e-05, "loss": 0.0727, "step": 111160 }, { "epoch": 43.17, "learning_rate": 1.4243624595469257e-05, "loss": 0.278, "step": 111170 }, { "epoch": 43.18, "learning_rate": 1.4243106796116507e-05, "loss": 0.1078, "step": 111180 }, { "epoch": 43.18, "learning_rate": 1.4242588996763756e-05, "loss": 0.0066, "step": 111190 }, { "epoch": 43.18, "learning_rate": 1.4242071197411004e-05, "loss": 0.0042, "step": 111200 }, { "epoch": 43.19, "learning_rate": 1.4241553398058254e-05, "loss": 0.1386, "step": 111210 }, { "epoch": 43.19, "learning_rate": 1.4241035598705503e-05, "loss": 0.0141, "step": 111220 }, { "epoch": 43.2, "learning_rate": 1.4240517799352751e-05, "loss": 0.0186, "step": 111230 }, { "epoch": 43.2, "learning_rate": 1.4240000000000001e-05, "loss": 0.0029, "step": 111240 }, { "epoch": 43.2, "learning_rate": 1.423948220064725e-05, "loss": 0.175, "step": 111250 }, { "epoch": 43.21, "learning_rate": 1.42389644012945e-05, "loss": 0.0408, "step": 111260 }, { "epoch": 43.21, "learning_rate": 1.4238446601941748e-05, "loss": 0.0476, "step": 111270 }, { "epoch": 43.22, "learning_rate": 1.4237928802588998e-05, "loss": 0.0341, "step": 111280 }, { "epoch": 43.22, "learning_rate": 1.4237411003236247e-05, "loss": 0.1651, "step": 111290 }, { "epoch": 43.22, "learning_rate": 1.4236893203883497e-05, "loss": 0.1432, "step": 111300 }, { "epoch": 43.23, "learning_rate": 1.4236375404530745e-05, "loss": 0.0465, "step": 111310 }, { "epoch": 43.23, "learning_rate": 1.4235857605177994e-05, "loss": 0.2105, "step": 111320 }, { "epoch": 43.23, "learning_rate": 1.4235339805825244e-05, "loss": 0.1131, "step": 111330 }, { "epoch": 43.24, "learning_rate": 1.4234822006472494e-05, "loss": 0.0727, "step": 111340 }, { "epoch": 43.24, "learning_rate": 1.4234304207119742e-05, "loss": 0.0228, "step": 111350 }, { "epoch": 43.25, "learning_rate": 1.4233786407766991e-05, "loss": 0.0015, "step": 111360 }, { "epoch": 43.25, "learning_rate": 1.423326860841424e-05, "loss": 0.1352, "step": 111370 }, { "epoch": 43.25, "learning_rate": 1.423275080906149e-05, "loss": 0.161, "step": 111380 }, { "epoch": 43.26, "learning_rate": 1.4232233009708738e-05, "loss": 0.2379, "step": 111390 }, { "epoch": 43.26, "learning_rate": 1.4231715210355988e-05, "loss": 0.0713, "step": 111400 }, { "epoch": 43.27, "learning_rate": 1.4231197411003238e-05, "loss": 0.021, "step": 111410 }, { "epoch": 43.27, "learning_rate": 1.4230679611650487e-05, "loss": 0.0351, "step": 111420 }, { "epoch": 43.27, "learning_rate": 1.4230161812297735e-05, "loss": 0.0637, "step": 111430 }, { "epoch": 43.28, "learning_rate": 1.4229644012944985e-05, "loss": 0.0437, "step": 111440 }, { "epoch": 43.28, "learning_rate": 1.4229126213592234e-05, "loss": 0.1011, "step": 111450 }, { "epoch": 43.29, "learning_rate": 1.4228608414239484e-05, "loss": 0.0055, "step": 111460 }, { "epoch": 43.29, "learning_rate": 1.4228090614886732e-05, "loss": 0.0802, "step": 111470 }, { "epoch": 43.29, "learning_rate": 1.4227572815533982e-05, "loss": 0.0858, "step": 111480 }, { "epoch": 43.3, "learning_rate": 1.4227055016181231e-05, "loss": 0.0177, "step": 111490 }, { "epoch": 43.3, "learning_rate": 1.4226537216828479e-05, "loss": 0.1429, "step": 111500 }, { "epoch": 43.3, "learning_rate": 1.4226019417475729e-05, "loss": 0.0273, "step": 111510 }, { "epoch": 43.31, "learning_rate": 1.4225501618122978e-05, "loss": 0.1542, "step": 111520 }, { "epoch": 43.31, "learning_rate": 1.4224983818770228e-05, "loss": 0.0346, "step": 111530 }, { "epoch": 43.32, "learning_rate": 1.4224466019417478e-05, "loss": 0.058, "step": 111540 }, { "epoch": 43.32, "learning_rate": 1.4223948220064725e-05, "loss": 0.2566, "step": 111550 }, { "epoch": 43.32, "learning_rate": 1.4223430420711975e-05, "loss": 0.0256, "step": 111560 }, { "epoch": 43.33, "learning_rate": 1.4222912621359225e-05, "loss": 0.0881, "step": 111570 }, { "epoch": 43.33, "learning_rate": 1.4222394822006473e-05, "loss": 0.0955, "step": 111580 }, { "epoch": 43.34, "learning_rate": 1.4221877022653722e-05, "loss": 0.0168, "step": 111590 }, { "epoch": 43.34, "learning_rate": 1.4221359223300972e-05, "loss": 0.1715, "step": 111600 }, { "epoch": 43.34, "learning_rate": 1.4220841423948222e-05, "loss": 0.0854, "step": 111610 }, { "epoch": 43.35, "learning_rate": 1.4220323624595471e-05, "loss": 0.2271, "step": 111620 }, { "epoch": 43.35, "learning_rate": 1.421980582524272e-05, "loss": 0.0229, "step": 111630 }, { "epoch": 43.36, "learning_rate": 1.4219288025889969e-05, "loss": 0.0886, "step": 111640 }, { "epoch": 43.36, "learning_rate": 1.4218770226537217e-05, "loss": 0.0807, "step": 111650 }, { "epoch": 43.36, "learning_rate": 1.4218252427184466e-05, "loss": 0.0946, "step": 111660 }, { "epoch": 43.37, "learning_rate": 1.4217734627831716e-05, "loss": 0.0908, "step": 111670 }, { "epoch": 43.37, "learning_rate": 1.4217216828478965e-05, "loss": 0.0714, "step": 111680 }, { "epoch": 43.37, "learning_rate": 1.4216699029126215e-05, "loss": 0.0825, "step": 111690 }, { "epoch": 43.38, "learning_rate": 1.4216181229773465e-05, "loss": 0.0664, "step": 111700 }, { "epoch": 43.38, "learning_rate": 1.4215663430420714e-05, "loss": 0.17, "step": 111710 }, { "epoch": 43.39, "learning_rate": 1.4215145631067962e-05, "loss": 0.0139, "step": 111720 }, { "epoch": 43.39, "learning_rate": 1.421462783171521e-05, "loss": 0.1287, "step": 111730 }, { "epoch": 43.39, "learning_rate": 1.421411003236246e-05, "loss": 0.126, "step": 111740 }, { "epoch": 43.4, "learning_rate": 1.421359223300971e-05, "loss": 0.107, "step": 111750 }, { "epoch": 43.4, "learning_rate": 1.4213074433656959e-05, "loss": 0.1543, "step": 111760 }, { "epoch": 43.41, "learning_rate": 1.4212556634304209e-05, "loss": 0.0516, "step": 111770 }, { "epoch": 43.41, "learning_rate": 1.4212038834951458e-05, "loss": 0.1143, "step": 111780 }, { "epoch": 43.41, "learning_rate": 1.4211521035598708e-05, "loss": 0.0764, "step": 111790 }, { "epoch": 43.42, "learning_rate": 1.4211003236245954e-05, "loss": 0.1029, "step": 111800 }, { "epoch": 43.42, "learning_rate": 1.4210485436893204e-05, "loss": 0.0721, "step": 111810 }, { "epoch": 43.43, "learning_rate": 1.4209967637540453e-05, "loss": 0.0454, "step": 111820 }, { "epoch": 43.43, "learning_rate": 1.4209449838187703e-05, "loss": 0.1338, "step": 111830 }, { "epoch": 43.43, "learning_rate": 1.4208932038834953e-05, "loss": 0.0141, "step": 111840 }, { "epoch": 43.44, "learning_rate": 1.4208414239482202e-05, "loss": 0.0211, "step": 111850 }, { "epoch": 43.44, "learning_rate": 1.4207896440129452e-05, "loss": 0.0348, "step": 111860 }, { "epoch": 43.44, "learning_rate": 1.4207378640776701e-05, "loss": 0.0446, "step": 111870 }, { "epoch": 43.45, "learning_rate": 1.4206860841423948e-05, "loss": 0.0291, "step": 111880 }, { "epoch": 43.45, "learning_rate": 1.4206343042071197e-05, "loss": 0.0198, "step": 111890 }, { "epoch": 43.46, "learning_rate": 1.4205825242718447e-05, "loss": 0.1119, "step": 111900 }, { "epoch": 43.46, "learning_rate": 1.4205307443365696e-05, "loss": 0.0273, "step": 111910 }, { "epoch": 43.46, "learning_rate": 1.4204789644012946e-05, "loss": 0.0047, "step": 111920 }, { "epoch": 43.47, "learning_rate": 1.4204271844660196e-05, "loss": 0.012, "step": 111930 }, { "epoch": 43.47, "learning_rate": 1.4203754045307445e-05, "loss": 0.0286, "step": 111940 }, { "epoch": 43.48, "learning_rate": 1.4203236245954695e-05, "loss": 0.1629, "step": 111950 }, { "epoch": 43.48, "learning_rate": 1.4202718446601941e-05, "loss": 0.0512, "step": 111960 }, { "epoch": 43.48, "learning_rate": 1.420220064724919e-05, "loss": 0.0764, "step": 111970 }, { "epoch": 43.49, "learning_rate": 1.420168284789644e-05, "loss": 0.0005, "step": 111980 }, { "epoch": 43.49, "learning_rate": 1.420116504854369e-05, "loss": 0.0116, "step": 111990 }, { "epoch": 43.5, "learning_rate": 1.420064724919094e-05, "loss": 0.1257, "step": 112000 }, { "epoch": 43.5, "learning_rate": 1.420012944983819e-05, "loss": 0.089, "step": 112010 }, { "epoch": 43.5, "learning_rate": 1.4199611650485439e-05, "loss": 0.0789, "step": 112020 }, { "epoch": 43.51, "learning_rate": 1.4199093851132689e-05, "loss": 0.1647, "step": 112030 }, { "epoch": 43.51, "learning_rate": 1.4198576051779935e-05, "loss": 0.0667, "step": 112040 }, { "epoch": 43.51, "learning_rate": 1.4198058252427184e-05, "loss": 0.1506, "step": 112050 }, { "epoch": 43.52, "learning_rate": 1.4197540453074434e-05, "loss": 0.0667, "step": 112060 }, { "epoch": 43.52, "learning_rate": 1.4197022653721684e-05, "loss": 0.0819, "step": 112070 }, { "epoch": 43.53, "learning_rate": 1.4196504854368933e-05, "loss": 0.2053, "step": 112080 }, { "epoch": 43.53, "learning_rate": 1.4195987055016183e-05, "loss": 0.1289, "step": 112090 }, { "epoch": 43.53, "learning_rate": 1.4195469255663432e-05, "loss": 0.1022, "step": 112100 }, { "epoch": 43.54, "learning_rate": 1.4194951456310682e-05, "loss": 0.1009, "step": 112110 }, { "epoch": 43.54, "learning_rate": 1.4194433656957932e-05, "loss": 0.1337, "step": 112120 }, { "epoch": 43.55, "learning_rate": 1.4193915857605178e-05, "loss": 0.0152, "step": 112130 }, { "epoch": 43.55, "learning_rate": 1.4193398058252428e-05, "loss": 0.1404, "step": 112140 }, { "epoch": 43.55, "learning_rate": 1.4192880258899677e-05, "loss": 0.1037, "step": 112150 }, { "epoch": 43.56, "learning_rate": 1.4192362459546927e-05, "loss": 0.0686, "step": 112160 }, { "epoch": 43.56, "learning_rate": 1.4191844660194176e-05, "loss": 0.0305, "step": 112170 }, { "epoch": 43.57, "learning_rate": 1.4191326860841426e-05, "loss": 0.0012, "step": 112180 }, { "epoch": 43.57, "learning_rate": 1.4190809061488676e-05, "loss": 0.075, "step": 112190 }, { "epoch": 43.57, "learning_rate": 1.4190291262135925e-05, "loss": 0.199, "step": 112200 }, { "epoch": 43.58, "learning_rate": 1.4189773462783171e-05, "loss": 0.1555, "step": 112210 }, { "epoch": 43.58, "learning_rate": 1.4189255663430421e-05, "loss": 0.0862, "step": 112220 }, { "epoch": 43.58, "learning_rate": 1.418873786407767e-05, "loss": 0.2112, "step": 112230 }, { "epoch": 43.59, "learning_rate": 1.418822006472492e-05, "loss": 0.0539, "step": 112240 }, { "epoch": 43.59, "learning_rate": 1.418770226537217e-05, "loss": 0.0137, "step": 112250 }, { "epoch": 43.6, "learning_rate": 1.418718446601942e-05, "loss": 0.0268, "step": 112260 }, { "epoch": 43.6, "learning_rate": 1.418666666666667e-05, "loss": 0.0303, "step": 112270 }, { "epoch": 43.6, "learning_rate": 1.4186148867313919e-05, "loss": 0.0217, "step": 112280 }, { "epoch": 43.61, "learning_rate": 1.4185631067961165e-05, "loss": 0.0929, "step": 112290 }, { "epoch": 43.61, "learning_rate": 1.4185113268608415e-05, "loss": 0.1031, "step": 112300 }, { "epoch": 43.62, "learning_rate": 1.4184595469255664e-05, "loss": 0.0526, "step": 112310 }, { "epoch": 43.62, "learning_rate": 1.4184077669902914e-05, "loss": 0.0237, "step": 112320 }, { "epoch": 43.62, "learning_rate": 1.4183559870550164e-05, "loss": 0.2315, "step": 112330 }, { "epoch": 43.63, "learning_rate": 1.4183042071197413e-05, "loss": 0.0712, "step": 112340 }, { "epoch": 43.63, "learning_rate": 1.4182524271844663e-05, "loss": 0.1062, "step": 112350 }, { "epoch": 43.63, "learning_rate": 1.4182006472491912e-05, "loss": 0.0526, "step": 112360 }, { "epoch": 43.64, "learning_rate": 1.4181488673139159e-05, "loss": 0.1261, "step": 112370 }, { "epoch": 43.64, "learning_rate": 1.4180970873786408e-05, "loss": 0.2217, "step": 112380 }, { "epoch": 43.65, "learning_rate": 1.4180453074433658e-05, "loss": 0.1111, "step": 112390 }, { "epoch": 43.65, "learning_rate": 1.4179935275080907e-05, "loss": 0.1611, "step": 112400 }, { "epoch": 43.65, "learning_rate": 1.4179417475728157e-05, "loss": 0.1256, "step": 112410 }, { "epoch": 43.66, "learning_rate": 1.4178899676375407e-05, "loss": 0.1157, "step": 112420 }, { "epoch": 43.66, "learning_rate": 1.4178381877022656e-05, "loss": 0.0919, "step": 112430 }, { "epoch": 43.67, "learning_rate": 1.4177864077669904e-05, "loss": 0.0885, "step": 112440 }, { "epoch": 43.67, "learning_rate": 1.4177346278317152e-05, "loss": 0.0106, "step": 112450 }, { "epoch": 43.67, "learning_rate": 1.4176828478964402e-05, "loss": 0.061, "step": 112460 }, { "epoch": 43.68, "learning_rate": 1.4176310679611651e-05, "loss": 0.0617, "step": 112470 }, { "epoch": 43.68, "learning_rate": 1.4175792880258901e-05, "loss": 0.0027, "step": 112480 }, { "epoch": 43.69, "learning_rate": 1.417527508090615e-05, "loss": 0.0721, "step": 112490 }, { "epoch": 43.69, "learning_rate": 1.41747572815534e-05, "loss": 0.0694, "step": 112500 }, { "epoch": 43.69, "learning_rate": 1.417423948220065e-05, "loss": 0.0511, "step": 112510 }, { "epoch": 43.7, "learning_rate": 1.4173721682847898e-05, "loss": 0.0768, "step": 112520 }, { "epoch": 43.7, "learning_rate": 1.4173203883495146e-05, "loss": 0.0235, "step": 112530 }, { "epoch": 43.7, "learning_rate": 1.4172686084142395e-05, "loss": 0.021, "step": 112540 }, { "epoch": 43.71, "learning_rate": 1.4172168284789645e-05, "loss": 0.1898, "step": 112550 }, { "epoch": 43.71, "learning_rate": 1.4171650485436895e-05, "loss": 0.1122, "step": 112560 }, { "epoch": 43.72, "learning_rate": 1.4171132686084144e-05, "loss": 0.1905, "step": 112570 }, { "epoch": 43.72, "learning_rate": 1.4170614886731394e-05, "loss": 0.3136, "step": 112580 }, { "epoch": 43.72, "learning_rate": 1.4170097087378642e-05, "loss": 0.0389, "step": 112590 }, { "epoch": 43.73, "learning_rate": 1.4169579288025891e-05, "loss": 0.0631, "step": 112600 }, { "epoch": 43.73, "learning_rate": 1.416906148867314e-05, "loss": 0.0663, "step": 112610 }, { "epoch": 43.74, "learning_rate": 1.4168543689320389e-05, "loss": 0.274, "step": 112620 }, { "epoch": 43.74, "learning_rate": 1.4168025889967638e-05, "loss": 0.1781, "step": 112630 }, { "epoch": 43.74, "learning_rate": 1.4167508090614888e-05, "loss": 0.0093, "step": 112640 }, { "epoch": 43.75, "learning_rate": 1.4166990291262138e-05, "loss": 0.0765, "step": 112650 }, { "epoch": 43.75, "learning_rate": 1.4166472491909387e-05, "loss": 0.0174, "step": 112660 }, { "epoch": 43.76, "learning_rate": 1.4165954692556635e-05, "loss": 0.0366, "step": 112670 }, { "epoch": 43.76, "learning_rate": 1.4165436893203885e-05, "loss": 0.0779, "step": 112680 }, { "epoch": 43.76, "learning_rate": 1.4164919093851135e-05, "loss": 0.1607, "step": 112690 }, { "epoch": 43.77, "learning_rate": 1.4164401294498382e-05, "loss": 0.1587, "step": 112700 }, { "epoch": 43.77, "learning_rate": 1.4163883495145632e-05, "loss": 0.0851, "step": 112710 }, { "epoch": 43.77, "learning_rate": 1.4163365695792882e-05, "loss": 0.0392, "step": 112720 }, { "epoch": 43.78, "learning_rate": 1.4162847896440131e-05, "loss": 0.021, "step": 112730 }, { "epoch": 43.78, "learning_rate": 1.416233009708738e-05, "loss": 0.0049, "step": 112740 }, { "epoch": 43.79, "learning_rate": 1.4161812297734629e-05, "loss": 0.1751, "step": 112750 }, { "epoch": 43.79, "learning_rate": 1.4161294498381878e-05, "loss": 0.0477, "step": 112760 }, { "epoch": 43.79, "learning_rate": 1.4160776699029128e-05, "loss": 0.0909, "step": 112770 }, { "epoch": 43.8, "learning_rate": 1.4160258899676376e-05, "loss": 0.0463, "step": 112780 }, { "epoch": 43.8, "learning_rate": 1.4159741100323626e-05, "loss": 0.1094, "step": 112790 }, { "epoch": 43.81, "learning_rate": 1.4159223300970875e-05, "loss": 0.0599, "step": 112800 }, { "epoch": 43.81, "learning_rate": 1.4158705501618125e-05, "loss": 0.0396, "step": 112810 }, { "epoch": 43.81, "learning_rate": 1.4158187702265373e-05, "loss": 0.1164, "step": 112820 }, { "epoch": 43.82, "learning_rate": 1.4157669902912622e-05, "loss": 0.0341, "step": 112830 }, { "epoch": 43.82, "learning_rate": 1.4157152103559872e-05, "loss": 0.0515, "step": 112840 }, { "epoch": 43.83, "learning_rate": 1.4156634304207122e-05, "loss": 0.187, "step": 112850 }, { "epoch": 43.83, "learning_rate": 1.415611650485437e-05, "loss": 0.0112, "step": 112860 }, { "epoch": 43.83, "learning_rate": 1.415559870550162e-05, "loss": 0.1974, "step": 112870 }, { "epoch": 43.84, "learning_rate": 1.4155080906148869e-05, "loss": 0.0152, "step": 112880 }, { "epoch": 43.84, "learning_rate": 1.4154563106796118e-05, "loss": 0.0226, "step": 112890 }, { "epoch": 43.84, "learning_rate": 1.4154045307443366e-05, "loss": 0.0748, "step": 112900 }, { "epoch": 43.85, "learning_rate": 1.4153527508090616e-05, "loss": 0.1564, "step": 112910 }, { "epoch": 43.85, "learning_rate": 1.4153009708737866e-05, "loss": 0.0366, "step": 112920 }, { "epoch": 43.86, "learning_rate": 1.4152491909385115e-05, "loss": 0.0575, "step": 112930 }, { "epoch": 43.86, "learning_rate": 1.4151974110032363e-05, "loss": 0.031, "step": 112940 }, { "epoch": 43.86, "learning_rate": 1.4151456310679613e-05, "loss": 0.197, "step": 112950 }, { "epoch": 43.87, "learning_rate": 1.4150938511326862e-05, "loss": 0.1475, "step": 112960 }, { "epoch": 43.87, "learning_rate": 1.415042071197411e-05, "loss": 0.0472, "step": 112970 }, { "epoch": 43.88, "learning_rate": 1.414990291262136e-05, "loss": 0.0223, "step": 112980 }, { "epoch": 43.88, "learning_rate": 1.414938511326861e-05, "loss": 0.0051, "step": 112990 }, { "epoch": 43.88, "learning_rate": 1.4148867313915859e-05, "loss": 0.12, "step": 113000 }, { "epoch": 43.89, "learning_rate": 1.4148349514563109e-05, "loss": 0.0016, "step": 113010 }, { "epoch": 43.89, "learning_rate": 1.4147831715210357e-05, "loss": 0.0949, "step": 113020 }, { "epoch": 43.9, "learning_rate": 1.4147313915857606e-05, "loss": 0.0623, "step": 113030 }, { "epoch": 43.9, "learning_rate": 1.4146796116504856e-05, "loss": 0.0139, "step": 113040 }, { "epoch": 43.9, "learning_rate": 1.4146278317152104e-05, "loss": 0.0452, "step": 113050 }, { "epoch": 43.91, "learning_rate": 1.4145760517799353e-05, "loss": 0.1457, "step": 113060 }, { "epoch": 43.91, "learning_rate": 1.4145242718446603e-05, "loss": 0.1514, "step": 113070 }, { "epoch": 43.91, "learning_rate": 1.4144724919093853e-05, "loss": 0.0865, "step": 113080 }, { "epoch": 43.92, "learning_rate": 1.4144207119741102e-05, "loss": 0.1589, "step": 113090 }, { "epoch": 43.92, "learning_rate": 1.414368932038835e-05, "loss": 0.2965, "step": 113100 }, { "epoch": 43.93, "learning_rate": 1.41431715210356e-05, "loss": 0.0699, "step": 113110 }, { "epoch": 43.93, "learning_rate": 1.4142653721682848e-05, "loss": 0.0106, "step": 113120 }, { "epoch": 43.93, "learning_rate": 1.4142135922330097e-05, "loss": 0.0676, "step": 113130 }, { "epoch": 43.94, "learning_rate": 1.4141618122977347e-05, "loss": 0.1224, "step": 113140 }, { "epoch": 43.94, "learning_rate": 1.4141100323624597e-05, "loss": 0.0325, "step": 113150 }, { "epoch": 43.95, "learning_rate": 1.4140582524271846e-05, "loss": 0.2108, "step": 113160 }, { "epoch": 43.95, "learning_rate": 1.4140064724919096e-05, "loss": 0.0361, "step": 113170 }, { "epoch": 43.95, "learning_rate": 1.4139546925566344e-05, "loss": 0.1428, "step": 113180 }, { "epoch": 43.96, "learning_rate": 1.4139029126213593e-05, "loss": 0.0689, "step": 113190 }, { "epoch": 43.96, "learning_rate": 1.4138511326860841e-05, "loss": 0.0957, "step": 113200 }, { "epoch": 43.97, "learning_rate": 1.4137993527508091e-05, "loss": 0.0075, "step": 113210 }, { "epoch": 43.97, "learning_rate": 1.413747572815534e-05, "loss": 0.2594, "step": 113220 }, { "epoch": 43.97, "learning_rate": 1.413695792880259e-05, "loss": 0.1103, "step": 113230 }, { "epoch": 43.98, "learning_rate": 1.413644012944984e-05, "loss": 0.0829, "step": 113240 }, { "epoch": 43.98, "learning_rate": 1.413592233009709e-05, "loss": 0.0528, "step": 113250 }, { "epoch": 43.98, "learning_rate": 1.4135404530744339e-05, "loss": 0.1021, "step": 113260 }, { "epoch": 43.99, "learning_rate": 1.4134886731391585e-05, "loss": 0.2524, "step": 113270 }, { "epoch": 43.99, "learning_rate": 1.4134368932038835e-05, "loss": 0.0952, "step": 113280 }, { "epoch": 44.0, "learning_rate": 1.4133851132686084e-05, "loss": 0.0713, "step": 113290 }, { "epoch": 44.0, "learning_rate": 1.4133333333333334e-05, "loss": 0.0384, "step": 113300 }, { "epoch": 44.0, "eval_accuracy": 0.9488308115543329, "eval_loss": 0.3085843324661255, "eval_runtime": 8.2092, "eval_samples_per_second": 442.797, "eval_steps_per_second": 55.426, "step": 113300 }, { "epoch": 44.0, "learning_rate": 1.4132815533980584e-05, "loss": 0.0434, "step": 113310 }, { "epoch": 44.01, "learning_rate": 1.4132297734627833e-05, "loss": 0.069, "step": 113320 }, { "epoch": 44.01, "learning_rate": 1.4131779935275083e-05, "loss": 0.1067, "step": 113330 }, { "epoch": 44.02, "learning_rate": 1.4131262135922333e-05, "loss": 0.0582, "step": 113340 }, { "epoch": 44.02, "learning_rate": 1.4130744336569579e-05, "loss": 0.0506, "step": 113350 }, { "epoch": 44.02, "learning_rate": 1.4130226537216828e-05, "loss": 0.0535, "step": 113360 }, { "epoch": 44.03, "learning_rate": 1.4129708737864078e-05, "loss": 0.1368, "step": 113370 }, { "epoch": 44.03, "learning_rate": 1.4129190938511328e-05, "loss": 0.2723, "step": 113380 }, { "epoch": 44.03, "learning_rate": 1.4128673139158577e-05, "loss": 0.0884, "step": 113390 }, { "epoch": 44.04, "learning_rate": 1.4128155339805827e-05, "loss": 0.0213, "step": 113400 }, { "epoch": 44.04, "learning_rate": 1.4127637540453077e-05, "loss": 0.038, "step": 113410 }, { "epoch": 44.05, "learning_rate": 1.4127119741100326e-05, "loss": 0.2221, "step": 113420 }, { "epoch": 44.05, "learning_rate": 1.4126601941747572e-05, "loss": 0.0761, "step": 113430 }, { "epoch": 44.05, "learning_rate": 1.4126084142394822e-05, "loss": 0.0666, "step": 113440 }, { "epoch": 44.06, "learning_rate": 1.4125566343042072e-05, "loss": 0.1397, "step": 113450 }, { "epoch": 44.06, "learning_rate": 1.4125048543689321e-05, "loss": 0.0349, "step": 113460 }, { "epoch": 44.07, "learning_rate": 1.412453074433657e-05, "loss": 0.0046, "step": 113470 }, { "epoch": 44.07, "learning_rate": 1.412401294498382e-05, "loss": 0.0532, "step": 113480 }, { "epoch": 44.07, "learning_rate": 1.412349514563107e-05, "loss": 0.1626, "step": 113490 }, { "epoch": 44.08, "learning_rate": 1.412297734627832e-05, "loss": 0.0062, "step": 113500 }, { "epoch": 44.08, "learning_rate": 1.4122459546925566e-05, "loss": 0.0983, "step": 113510 }, { "epoch": 44.09, "learning_rate": 1.4121941747572816e-05, "loss": 0.0535, "step": 113520 }, { "epoch": 44.09, "learning_rate": 1.4121423948220065e-05, "loss": 0.0605, "step": 113530 }, { "epoch": 44.09, "learning_rate": 1.4120906148867315e-05, "loss": 0.0757, "step": 113540 }, { "epoch": 44.1, "learning_rate": 1.4120388349514564e-05, "loss": 0.0291, "step": 113550 }, { "epoch": 44.1, "learning_rate": 1.4119870550161814e-05, "loss": 0.0417, "step": 113560 }, { "epoch": 44.1, "learning_rate": 1.4119352750809064e-05, "loss": 0.0725, "step": 113570 }, { "epoch": 44.11, "learning_rate": 1.4118834951456313e-05, "loss": 0.2589, "step": 113580 }, { "epoch": 44.11, "learning_rate": 1.411831715210356e-05, "loss": 0.1312, "step": 113590 }, { "epoch": 44.12, "learning_rate": 1.4117799352750809e-05, "loss": 0.0893, "step": 113600 }, { "epoch": 44.12, "learning_rate": 1.4117281553398059e-05, "loss": 0.1013, "step": 113610 }, { "epoch": 44.12, "learning_rate": 1.4116763754045308e-05, "loss": 0.0563, "step": 113620 }, { "epoch": 44.13, "learning_rate": 1.4116245954692558e-05, "loss": 0.0087, "step": 113630 }, { "epoch": 44.13, "learning_rate": 1.4115728155339808e-05, "loss": 0.1013, "step": 113640 }, { "epoch": 44.14, "learning_rate": 1.4115210355987057e-05, "loss": 0.0746, "step": 113650 }, { "epoch": 44.14, "learning_rate": 1.4114692556634307e-05, "loss": 0.073, "step": 113660 }, { "epoch": 44.14, "learning_rate": 1.4114174757281553e-05, "loss": 0.0642, "step": 113670 }, { "epoch": 44.15, "learning_rate": 1.4113656957928803e-05, "loss": 0.0349, "step": 113680 }, { "epoch": 44.15, "learning_rate": 1.4113139158576052e-05, "loss": 0.0355, "step": 113690 }, { "epoch": 44.16, "learning_rate": 1.4112621359223302e-05, "loss": 0.0387, "step": 113700 }, { "epoch": 44.16, "learning_rate": 1.4112103559870552e-05, "loss": 0.0082, "step": 113710 }, { "epoch": 44.16, "learning_rate": 1.4111585760517801e-05, "loss": 0.0051, "step": 113720 }, { "epoch": 44.17, "learning_rate": 1.411106796116505e-05, "loss": 0.0551, "step": 113730 }, { "epoch": 44.17, "learning_rate": 1.41105501618123e-05, "loss": 0.0827, "step": 113740 }, { "epoch": 44.17, "learning_rate": 1.4110032362459547e-05, "loss": 0.0745, "step": 113750 }, { "epoch": 44.18, "learning_rate": 1.4109514563106796e-05, "loss": 0.0107, "step": 113760 }, { "epoch": 44.18, "learning_rate": 1.4108996763754046e-05, "loss": 0.0711, "step": 113770 }, { "epoch": 44.19, "learning_rate": 1.4108478964401295e-05, "loss": 0.1428, "step": 113780 }, { "epoch": 44.19, "learning_rate": 1.4107961165048545e-05, "loss": 0.2399, "step": 113790 }, { "epoch": 44.19, "learning_rate": 1.4107443365695795e-05, "loss": 0.1052, "step": 113800 }, { "epoch": 44.2, "learning_rate": 1.4106925566343044e-05, "loss": 0.1278, "step": 113810 }, { "epoch": 44.2, "learning_rate": 1.4106407766990294e-05, "loss": 0.1221, "step": 113820 }, { "epoch": 44.21, "learning_rate": 1.4105889967637544e-05, "loss": 0.0763, "step": 113830 }, { "epoch": 44.21, "learning_rate": 1.410537216828479e-05, "loss": 0.2006, "step": 113840 }, { "epoch": 44.21, "learning_rate": 1.410485436893204e-05, "loss": 0.1274, "step": 113850 }, { "epoch": 44.22, "learning_rate": 1.4104336569579289e-05, "loss": 0.2367, "step": 113860 }, { "epoch": 44.22, "learning_rate": 1.4103818770226539e-05, "loss": 0.0236, "step": 113870 }, { "epoch": 44.23, "learning_rate": 1.4103300970873788e-05, "loss": 0.0081, "step": 113880 }, { "epoch": 44.23, "learning_rate": 1.4102783171521038e-05, "loss": 0.1016, "step": 113890 }, { "epoch": 44.23, "learning_rate": 1.4102265372168287e-05, "loss": 0.021, "step": 113900 }, { "epoch": 44.24, "learning_rate": 1.4101747572815535e-05, "loss": 0.0063, "step": 113910 }, { "epoch": 44.24, "learning_rate": 1.4101229773462783e-05, "loss": 0.0644, "step": 113920 }, { "epoch": 44.24, "learning_rate": 1.4100711974110033e-05, "loss": 0.1077, "step": 113930 }, { "epoch": 44.25, "learning_rate": 1.4100194174757283e-05, "loss": 0.0729, "step": 113940 }, { "epoch": 44.25, "learning_rate": 1.4099676375404532e-05, "loss": 0.0724, "step": 113950 }, { "epoch": 44.26, "learning_rate": 1.4099158576051782e-05, "loss": 0.0419, "step": 113960 }, { "epoch": 44.26, "learning_rate": 1.4098640776699031e-05, "loss": 0.0695, "step": 113970 }, { "epoch": 44.26, "learning_rate": 1.4098122977346281e-05, "loss": 0.1098, "step": 113980 }, { "epoch": 44.27, "learning_rate": 1.4097605177993529e-05, "loss": 0.0014, "step": 113990 }, { "epoch": 44.27, "learning_rate": 1.4097087378640777e-05, "loss": 0.1477, "step": 114000 }, { "epoch": 44.28, "learning_rate": 1.4096569579288026e-05, "loss": 0.0754, "step": 114010 }, { "epoch": 44.28, "learning_rate": 1.4096051779935276e-05, "loss": 0.167, "step": 114020 }, { "epoch": 44.28, "learning_rate": 1.4095533980582526e-05, "loss": 0.047, "step": 114030 }, { "epoch": 44.29, "learning_rate": 1.4095016181229775e-05, "loss": 0.0396, "step": 114040 }, { "epoch": 44.29, "learning_rate": 1.4094498381877025e-05, "loss": 0.0451, "step": 114050 }, { "epoch": 44.3, "learning_rate": 1.4093980582524273e-05, "loss": 0.0518, "step": 114060 }, { "epoch": 44.3, "learning_rate": 1.4093462783171523e-05, "loss": 0.0735, "step": 114070 }, { "epoch": 44.3, "learning_rate": 1.409294498381877e-05, "loss": 0.0418, "step": 114080 }, { "epoch": 44.31, "learning_rate": 1.409242718446602e-05, "loss": 0.0524, "step": 114090 }, { "epoch": 44.31, "learning_rate": 1.409190938511327e-05, "loss": 0.1514, "step": 114100 }, { "epoch": 44.31, "learning_rate": 1.409139158576052e-05, "loss": 0.1328, "step": 114110 }, { "epoch": 44.32, "learning_rate": 1.4090873786407769e-05, "loss": 0.0886, "step": 114120 }, { "epoch": 44.32, "learning_rate": 1.4090355987055019e-05, "loss": 0.1248, "step": 114130 }, { "epoch": 44.33, "learning_rate": 1.4089838187702266e-05, "loss": 0.0388, "step": 114140 }, { "epoch": 44.33, "learning_rate": 1.4089320388349516e-05, "loss": 0.0005, "step": 114150 }, { "epoch": 44.33, "learning_rate": 1.4088802588996764e-05, "loss": 0.1376, "step": 114160 }, { "epoch": 44.34, "learning_rate": 1.4088284789644014e-05, "loss": 0.1087, "step": 114170 }, { "epoch": 44.34, "learning_rate": 1.4087766990291263e-05, "loss": 0.0678, "step": 114180 }, { "epoch": 44.35, "learning_rate": 1.4087249190938513e-05, "loss": 0.1009, "step": 114190 }, { "epoch": 44.35, "learning_rate": 1.4086731391585762e-05, "loss": 0.0648, "step": 114200 }, { "epoch": 44.35, "learning_rate": 1.408621359223301e-05, "loss": 0.0849, "step": 114210 }, { "epoch": 44.36, "learning_rate": 1.408569579288026e-05, "loss": 0.0664, "step": 114220 }, { "epoch": 44.36, "learning_rate": 1.408517799352751e-05, "loss": 0.0206, "step": 114230 }, { "epoch": 44.37, "learning_rate": 1.4084660194174758e-05, "loss": 0.1469, "step": 114240 }, { "epoch": 44.37, "learning_rate": 1.4084142394822007e-05, "loss": 0.1996, "step": 114250 }, { "epoch": 44.37, "learning_rate": 1.4083624595469257e-05, "loss": 0.139, "step": 114260 }, { "epoch": 44.38, "learning_rate": 1.4083106796116506e-05, "loss": 0.0366, "step": 114270 }, { "epoch": 44.38, "learning_rate": 1.4082588996763756e-05, "loss": 0.0684, "step": 114280 }, { "epoch": 44.38, "learning_rate": 1.4082071197411004e-05, "loss": 0.0208, "step": 114290 }, { "epoch": 44.39, "learning_rate": 1.4081553398058254e-05, "loss": 0.1995, "step": 114300 }, { "epoch": 44.39, "learning_rate": 1.4081035598705503e-05, "loss": 0.068, "step": 114310 }, { "epoch": 44.4, "learning_rate": 1.4080517799352751e-05, "loss": 0.0285, "step": 114320 }, { "epoch": 44.4, "learning_rate": 1.408e-05, "loss": 0.0219, "step": 114330 }, { "epoch": 44.4, "learning_rate": 1.407948220064725e-05, "loss": 0.098, "step": 114340 }, { "epoch": 44.41, "learning_rate": 1.40789644012945e-05, "loss": 0.0554, "step": 114350 }, { "epoch": 44.41, "learning_rate": 1.4078446601941748e-05, "loss": 0.137, "step": 114360 }, { "epoch": 44.42, "learning_rate": 1.4077928802588997e-05, "loss": 0.0268, "step": 114370 }, { "epoch": 44.42, "learning_rate": 1.4077411003236247e-05, "loss": 0.0934, "step": 114380 }, { "epoch": 44.42, "learning_rate": 1.4076893203883497e-05, "loss": 0.079, "step": 114390 }, { "epoch": 44.43, "learning_rate": 1.4076375404530746e-05, "loss": 0.0471, "step": 114400 }, { "epoch": 44.43, "learning_rate": 1.4075857605177994e-05, "loss": 0.1365, "step": 114410 }, { "epoch": 44.43, "learning_rate": 1.4075339805825244e-05, "loss": 0.0496, "step": 114420 }, { "epoch": 44.44, "learning_rate": 1.4074822006472494e-05, "loss": 0.0592, "step": 114430 }, { "epoch": 44.44, "learning_rate": 1.4074304207119741e-05, "loss": 0.294, "step": 114440 }, { "epoch": 44.45, "learning_rate": 1.4073786407766991e-05, "loss": 0.0974, "step": 114450 }, { "epoch": 44.45, "learning_rate": 1.407326860841424e-05, "loss": 0.0113, "step": 114460 }, { "epoch": 44.45, "learning_rate": 1.407275080906149e-05, "loss": 0.0823, "step": 114470 }, { "epoch": 44.46, "learning_rate": 1.407223300970874e-05, "loss": 0.0593, "step": 114480 }, { "epoch": 44.46, "learning_rate": 1.4071715210355988e-05, "loss": 0.0337, "step": 114490 }, { "epoch": 44.47, "learning_rate": 1.4071197411003237e-05, "loss": 0.1532, "step": 114500 }, { "epoch": 44.47, "learning_rate": 1.4070679611650487e-05, "loss": 0.0775, "step": 114510 }, { "epoch": 44.47, "learning_rate": 1.4070161812297735e-05, "loss": 0.1341, "step": 114520 }, { "epoch": 44.48, "learning_rate": 1.4069644012944985e-05, "loss": 0.1074, "step": 114530 }, { "epoch": 44.48, "learning_rate": 1.4069126213592234e-05, "loss": 0.0266, "step": 114540 }, { "epoch": 44.49, "learning_rate": 1.4068608414239484e-05, "loss": 0.0532, "step": 114550 }, { "epoch": 44.49, "learning_rate": 1.4068090614886733e-05, "loss": 0.1694, "step": 114560 }, { "epoch": 44.49, "learning_rate": 1.4067572815533981e-05, "loss": 0.2494, "step": 114570 }, { "epoch": 44.5, "learning_rate": 1.4067055016181231e-05, "loss": 0.1343, "step": 114580 }, { "epoch": 44.5, "learning_rate": 1.4066537216828479e-05, "loss": 0.0549, "step": 114590 }, { "epoch": 44.5, "learning_rate": 1.4066019417475729e-05, "loss": 0.1193, "step": 114600 }, { "epoch": 44.51, "learning_rate": 1.4065501618122978e-05, "loss": 0.0503, "step": 114610 }, { "epoch": 44.51, "learning_rate": 1.4064983818770228e-05, "loss": 0.0159, "step": 114620 }, { "epoch": 44.52, "learning_rate": 1.4064466019417477e-05, "loss": 0.0855, "step": 114630 }, { "epoch": 44.52, "learning_rate": 1.4063948220064727e-05, "loss": 0.041, "step": 114640 }, { "epoch": 44.52, "learning_rate": 1.4063430420711975e-05, "loss": 0.1439, "step": 114650 }, { "epoch": 44.53, "learning_rate": 1.4062912621359225e-05, "loss": 0.0136, "step": 114660 }, { "epoch": 44.53, "learning_rate": 1.4062394822006472e-05, "loss": 0.0813, "step": 114670 }, { "epoch": 44.54, "learning_rate": 1.4061877022653722e-05, "loss": 0.0849, "step": 114680 }, { "epoch": 44.54, "learning_rate": 1.4061359223300972e-05, "loss": 0.0492, "step": 114690 }, { "epoch": 44.54, "learning_rate": 1.4060841423948221e-05, "loss": 0.0918, "step": 114700 }, { "epoch": 44.55, "learning_rate": 1.4060323624595471e-05, "loss": 0.1131, "step": 114710 }, { "epoch": 44.55, "learning_rate": 1.405980582524272e-05, "loss": 0.1241, "step": 114720 }, { "epoch": 44.56, "learning_rate": 1.4059288025889968e-05, "loss": 0.1851, "step": 114730 }, { "epoch": 44.56, "learning_rate": 1.4058770226537216e-05, "loss": 0.0527, "step": 114740 }, { "epoch": 44.56, "learning_rate": 1.4058252427184466e-05, "loss": 0.0111, "step": 114750 }, { "epoch": 44.57, "learning_rate": 1.4057734627831716e-05, "loss": 0.125, "step": 114760 }, { "epoch": 44.57, "learning_rate": 1.4057216828478965e-05, "loss": 0.0573, "step": 114770 }, { "epoch": 44.57, "learning_rate": 1.4056699029126215e-05, "loss": 0.1161, "step": 114780 }, { "epoch": 44.58, "learning_rate": 1.4056181229773465e-05, "loss": 0.1339, "step": 114790 }, { "epoch": 44.58, "learning_rate": 1.4055663430420714e-05, "loss": 0.1672, "step": 114800 }, { "epoch": 44.59, "learning_rate": 1.4055145631067962e-05, "loss": 0.0994, "step": 114810 }, { "epoch": 44.59, "learning_rate": 1.405462783171521e-05, "loss": 0.0948, "step": 114820 }, { "epoch": 44.59, "learning_rate": 1.405411003236246e-05, "loss": 0.0943, "step": 114830 }, { "epoch": 44.6, "learning_rate": 1.405359223300971e-05, "loss": 0.0546, "step": 114840 }, { "epoch": 44.6, "learning_rate": 1.4053074433656959e-05, "loss": 0.0712, "step": 114850 }, { "epoch": 44.61, "learning_rate": 1.4052556634304208e-05, "loss": 0.0551, "step": 114860 }, { "epoch": 44.61, "learning_rate": 1.4052038834951458e-05, "loss": 0.0426, "step": 114870 }, { "epoch": 44.61, "learning_rate": 1.4051521035598708e-05, "loss": 0.0764, "step": 114880 }, { "epoch": 44.62, "learning_rate": 1.4051003236245954e-05, "loss": 0.1159, "step": 114890 }, { "epoch": 44.62, "learning_rate": 1.4050485436893204e-05, "loss": 0.1709, "step": 114900 }, { "epoch": 44.63, "learning_rate": 1.4049967637540453e-05, "loss": 0.0483, "step": 114910 }, { "epoch": 44.63, "learning_rate": 1.4049449838187703e-05, "loss": 0.0699, "step": 114920 }, { "epoch": 44.63, "learning_rate": 1.4048932038834952e-05, "loss": 0.0514, "step": 114930 }, { "epoch": 44.64, "learning_rate": 1.4048414239482202e-05, "loss": 0.1164, "step": 114940 }, { "epoch": 44.64, "learning_rate": 1.4047896440129452e-05, "loss": 0.0034, "step": 114950 }, { "epoch": 44.64, "learning_rate": 1.4047378640776701e-05, "loss": 0.1484, "step": 114960 }, { "epoch": 44.65, "learning_rate": 1.4046860841423951e-05, "loss": 0.1233, "step": 114970 }, { "epoch": 44.65, "learning_rate": 1.4046343042071197e-05, "loss": 0.0892, "step": 114980 }, { "epoch": 44.66, "learning_rate": 1.4045825242718447e-05, "loss": 0.1505, "step": 114990 }, { "epoch": 44.66, "learning_rate": 1.4045307443365696e-05, "loss": 0.0822, "step": 115000 }, { "epoch": 44.66, "learning_rate": 1.4044789644012946e-05, "loss": 0.0719, "step": 115010 }, { "epoch": 44.67, "learning_rate": 1.4044271844660196e-05, "loss": 0.0897, "step": 115020 }, { "epoch": 44.67, "learning_rate": 1.4043754045307445e-05, "loss": 0.0033, "step": 115030 }, { "epoch": 44.68, "learning_rate": 1.4043236245954695e-05, "loss": 0.1836, "step": 115040 }, { "epoch": 44.68, "learning_rate": 1.4042718446601944e-05, "loss": 0.0395, "step": 115050 }, { "epoch": 44.68, "learning_rate": 1.404220064724919e-05, "loss": 0.174, "step": 115060 }, { "epoch": 44.69, "learning_rate": 1.404168284789644e-05, "loss": 0.0263, "step": 115070 }, { "epoch": 44.69, "learning_rate": 1.404116504854369e-05, "loss": 0.0434, "step": 115080 }, { "epoch": 44.7, "learning_rate": 1.404064724919094e-05, "loss": 0.0721, "step": 115090 }, { "epoch": 44.7, "learning_rate": 1.4040129449838189e-05, "loss": 0.0516, "step": 115100 }, { "epoch": 44.7, "learning_rate": 1.4039611650485439e-05, "loss": 0.0644, "step": 115110 }, { "epoch": 44.71, "learning_rate": 1.4039093851132688e-05, "loss": 0.1187, "step": 115120 }, { "epoch": 44.71, "learning_rate": 1.4038576051779938e-05, "loss": 0.2456, "step": 115130 }, { "epoch": 44.71, "learning_rate": 1.4038058252427184e-05, "loss": 0.2054, "step": 115140 }, { "epoch": 44.72, "learning_rate": 1.4037540453074434e-05, "loss": 0.1093, "step": 115150 }, { "epoch": 44.72, "learning_rate": 1.4037022653721683e-05, "loss": 0.014, "step": 115160 }, { "epoch": 44.73, "learning_rate": 1.4036504854368933e-05, "loss": 0.0013, "step": 115170 }, { "epoch": 44.73, "learning_rate": 1.4035987055016183e-05, "loss": 0.0542, "step": 115180 }, { "epoch": 44.73, "learning_rate": 1.4035469255663432e-05, "loss": 0.0375, "step": 115190 }, { "epoch": 44.74, "learning_rate": 1.4034951456310682e-05, "loss": 0.0019, "step": 115200 }, { "epoch": 44.74, "learning_rate": 1.4034433656957932e-05, "loss": 0.1169, "step": 115210 }, { "epoch": 44.75, "learning_rate": 1.4033915857605178e-05, "loss": 0.2151, "step": 115220 }, { "epoch": 44.75, "learning_rate": 1.4033398058252427e-05, "loss": 0.1614, "step": 115230 }, { "epoch": 44.75, "learning_rate": 1.4032880258899677e-05, "loss": 0.0432, "step": 115240 }, { "epoch": 44.76, "learning_rate": 1.4032362459546927e-05, "loss": 0.0175, "step": 115250 }, { "epoch": 44.76, "learning_rate": 1.4031844660194176e-05, "loss": 0.0672, "step": 115260 }, { "epoch": 44.77, "learning_rate": 1.4031326860841426e-05, "loss": 0.0391, "step": 115270 }, { "epoch": 44.77, "learning_rate": 1.4030809061488675e-05, "loss": 0.1352, "step": 115280 }, { "epoch": 44.77, "learning_rate": 1.4030291262135925e-05, "loss": 0.0438, "step": 115290 }, { "epoch": 44.78, "learning_rate": 1.4029773462783171e-05, "loss": 0.0168, "step": 115300 }, { "epoch": 44.78, "learning_rate": 1.4029255663430421e-05, "loss": 0.086, "step": 115310 }, { "epoch": 44.78, "learning_rate": 1.402873786407767e-05, "loss": 0.0712, "step": 115320 }, { "epoch": 44.79, "learning_rate": 1.402822006472492e-05, "loss": 0.0866, "step": 115330 }, { "epoch": 44.79, "learning_rate": 1.402770226537217e-05, "loss": 0.117, "step": 115340 }, { "epoch": 44.8, "learning_rate": 1.402718446601942e-05, "loss": 0.0535, "step": 115350 }, { "epoch": 44.8, "learning_rate": 1.4026666666666669e-05, "loss": 0.0582, "step": 115360 }, { "epoch": 44.8, "learning_rate": 1.4026148867313919e-05, "loss": 0.0776, "step": 115370 }, { "epoch": 44.81, "learning_rate": 1.4025631067961165e-05, "loss": 0.1571, "step": 115380 }, { "epoch": 44.81, "learning_rate": 1.4025113268608414e-05, "loss": 0.2169, "step": 115390 }, { "epoch": 44.82, "learning_rate": 1.4024595469255664e-05, "loss": 0.163, "step": 115400 }, { "epoch": 44.82, "learning_rate": 1.4024077669902914e-05, "loss": 0.1046, "step": 115410 }, { "epoch": 44.82, "learning_rate": 1.4023559870550163e-05, "loss": 0.0401, "step": 115420 }, { "epoch": 44.83, "learning_rate": 1.4023042071197413e-05, "loss": 0.0495, "step": 115430 }, { "epoch": 44.83, "learning_rate": 1.4022524271844663e-05, "loss": 0.0958, "step": 115440 }, { "epoch": 44.83, "learning_rate": 1.4022006472491912e-05, "loss": 0.0557, "step": 115450 }, { "epoch": 44.84, "learning_rate": 1.4021488673139158e-05, "loss": 0.1445, "step": 115460 }, { "epoch": 44.84, "learning_rate": 1.4020970873786408e-05, "loss": 0.0565, "step": 115470 }, { "epoch": 44.85, "learning_rate": 1.4020453074433658e-05, "loss": 0.1201, "step": 115480 }, { "epoch": 44.85, "learning_rate": 1.4019935275080907e-05, "loss": 0.0021, "step": 115490 }, { "epoch": 44.85, "learning_rate": 1.4019417475728157e-05, "loss": 0.2078, "step": 115500 }, { "epoch": 44.86, "learning_rate": 1.4018899676375407e-05, "loss": 0.1895, "step": 115510 }, { "epoch": 44.86, "learning_rate": 1.4018381877022656e-05, "loss": 0.123, "step": 115520 }, { "epoch": 44.87, "learning_rate": 1.4017864077669904e-05, "loss": 0.1512, "step": 115530 }, { "epoch": 44.87, "learning_rate": 1.4017346278317154e-05, "loss": 0.0336, "step": 115540 }, { "epoch": 44.87, "learning_rate": 1.4016828478964402e-05, "loss": 0.0686, "step": 115550 }, { "epoch": 44.88, "learning_rate": 1.4016310679611651e-05, "loss": 0.0505, "step": 115560 }, { "epoch": 44.88, "learning_rate": 1.40157928802589e-05, "loss": 0.1376, "step": 115570 }, { "epoch": 44.89, "learning_rate": 1.401527508090615e-05, "loss": 0.1276, "step": 115580 }, { "epoch": 44.89, "learning_rate": 1.40147572815534e-05, "loss": 0.099, "step": 115590 }, { "epoch": 44.89, "learning_rate": 1.401423948220065e-05, "loss": 0.0373, "step": 115600 }, { "epoch": 44.9, "learning_rate": 1.4013721682847898e-05, "loss": 0.0718, "step": 115610 }, { "epoch": 44.9, "learning_rate": 1.4013203883495147e-05, "loss": 0.1168, "step": 115620 }, { "epoch": 44.9, "learning_rate": 1.4012686084142395e-05, "loss": 0.1562, "step": 115630 }, { "epoch": 44.91, "learning_rate": 1.4012168284789645e-05, "loss": 0.0809, "step": 115640 }, { "epoch": 44.91, "learning_rate": 1.4011650485436894e-05, "loss": 0.1116, "step": 115650 }, { "epoch": 44.92, "learning_rate": 1.4011132686084144e-05, "loss": 0.0627, "step": 115660 }, { "epoch": 44.92, "learning_rate": 1.4010614886731394e-05, "loss": 0.0774, "step": 115670 }, { "epoch": 44.92, "learning_rate": 1.4010097087378642e-05, "loss": 0.112, "step": 115680 }, { "epoch": 44.93, "learning_rate": 1.4009579288025891e-05, "loss": 0.0716, "step": 115690 }, { "epoch": 44.93, "learning_rate": 1.400906148867314e-05, "loss": 0.1165, "step": 115700 }, { "epoch": 44.94, "learning_rate": 1.4008543689320389e-05, "loss": 0.1913, "step": 115710 }, { "epoch": 44.94, "learning_rate": 1.4008025889967638e-05, "loss": 0.0973, "step": 115720 }, { "epoch": 44.94, "learning_rate": 1.4007508090614888e-05, "loss": 0.0234, "step": 115730 }, { "epoch": 44.95, "learning_rate": 1.4006990291262138e-05, "loss": 0.1035, "step": 115740 }, { "epoch": 44.95, "learning_rate": 1.4006472491909387e-05, "loss": 0.0044, "step": 115750 }, { "epoch": 44.96, "learning_rate": 1.4005954692556635e-05, "loss": 0.0664, "step": 115760 }, { "epoch": 44.96, "learning_rate": 1.4005436893203885e-05, "loss": 0.0694, "step": 115770 }, { "epoch": 44.96, "learning_rate": 1.4004919093851134e-05, "loss": 0.0772, "step": 115780 }, { "epoch": 44.97, "learning_rate": 1.4004401294498382e-05, "loss": 0.1597, "step": 115790 }, { "epoch": 44.97, "learning_rate": 1.4003883495145632e-05, "loss": 0.0028, "step": 115800 }, { "epoch": 44.97, "learning_rate": 1.4003365695792882e-05, "loss": 0.0494, "step": 115810 }, { "epoch": 44.98, "learning_rate": 1.4002847896440131e-05, "loss": 0.0243, "step": 115820 }, { "epoch": 44.98, "learning_rate": 1.4002330097087379e-05, "loss": 0.012, "step": 115830 }, { "epoch": 44.99, "learning_rate": 1.4001812297734629e-05, "loss": 0.1421, "step": 115840 }, { "epoch": 44.99, "learning_rate": 1.4001294498381878e-05, "loss": 0.0327, "step": 115850 }, { "epoch": 44.99, "learning_rate": 1.4000776699029128e-05, "loss": 0.0467, "step": 115860 }, { "epoch": 45.0, "learning_rate": 1.4000258899676376e-05, "loss": 0.1214, "step": 115870 }, { "epoch": 45.0, "eval_accuracy": 0.9493810178817056, "eval_loss": 0.31446582078933716, "eval_runtime": 8.2323, "eval_samples_per_second": 441.553, "eval_steps_per_second": 55.27, "step": 115875 }, { "epoch": 45.0, "learning_rate": 1.3999741100323625e-05, "loss": 0.0036, "step": 115880 }, { "epoch": 45.01, "learning_rate": 1.3999223300970875e-05, "loss": 0.004, "step": 115890 }, { "epoch": 45.01, "learning_rate": 1.3998705501618125e-05, "loss": 0.0632, "step": 115900 }, { "epoch": 45.01, "learning_rate": 1.3998187702265373e-05, "loss": 0.0521, "step": 115910 }, { "epoch": 45.02, "learning_rate": 1.3997669902912622e-05, "loss": 0.1009, "step": 115920 }, { "epoch": 45.02, "learning_rate": 1.3997152103559872e-05, "loss": 0.1257, "step": 115930 }, { "epoch": 45.03, "learning_rate": 1.3996634304207121e-05, "loss": 0.0505, "step": 115940 }, { "epoch": 45.03, "learning_rate": 1.399611650485437e-05, "loss": 0.0768, "step": 115950 }, { "epoch": 45.03, "learning_rate": 1.3995598705501619e-05, "loss": 0.1131, "step": 115960 }, { "epoch": 45.04, "learning_rate": 1.3995080906148869e-05, "loss": 0.001, "step": 115970 }, { "epoch": 45.04, "learning_rate": 1.3994563106796118e-05, "loss": 0.0504, "step": 115980 }, { "epoch": 45.04, "learning_rate": 1.3994045307443366e-05, "loss": 0.1015, "step": 115990 }, { "epoch": 45.05, "learning_rate": 1.3993527508090616e-05, "loss": 0.1503, "step": 116000 }, { "epoch": 45.05, "learning_rate": 1.3993009708737865e-05, "loss": 0.005, "step": 116010 }, { "epoch": 45.06, "learning_rate": 1.3992491909385115e-05, "loss": 0.0009, "step": 116020 }, { "epoch": 45.06, "learning_rate": 1.3991974110032363e-05, "loss": 0.1056, "step": 116030 }, { "epoch": 45.06, "learning_rate": 1.3991456310679613e-05, "loss": 0.21, "step": 116040 }, { "epoch": 45.07, "learning_rate": 1.3990938511326862e-05, "loss": 0.0481, "step": 116050 }, { "epoch": 45.07, "learning_rate": 1.399042071197411e-05, "loss": 0.0307, "step": 116060 }, { "epoch": 45.08, "learning_rate": 1.398990291262136e-05, "loss": 0.0838, "step": 116070 }, { "epoch": 45.08, "learning_rate": 1.398938511326861e-05, "loss": 0.0826, "step": 116080 }, { "epoch": 45.08, "learning_rate": 1.3988867313915859e-05, "loss": 0.0999, "step": 116090 }, { "epoch": 45.09, "learning_rate": 1.3988349514563109e-05, "loss": 0.1512, "step": 116100 }, { "epoch": 45.09, "learning_rate": 1.3987831715210358e-05, "loss": 0.0328, "step": 116110 }, { "epoch": 45.1, "learning_rate": 1.3987313915857606e-05, "loss": 0.024, "step": 116120 }, { "epoch": 45.1, "learning_rate": 1.3986796116504856e-05, "loss": 0.063, "step": 116130 }, { "epoch": 45.1, "learning_rate": 1.3986278317152104e-05, "loss": 0.0032, "step": 116140 }, { "epoch": 45.11, "learning_rate": 1.3985760517799353e-05, "loss": 0.0223, "step": 116150 }, { "epoch": 45.11, "learning_rate": 1.3985242718446603e-05, "loss": 0.0875, "step": 116160 }, { "epoch": 45.11, "learning_rate": 1.3984724919093853e-05, "loss": 0.0738, "step": 116170 }, { "epoch": 45.12, "learning_rate": 1.3984207119741102e-05, "loss": 0.0656, "step": 116180 }, { "epoch": 45.12, "learning_rate": 1.3983689320388352e-05, "loss": 0.0983, "step": 116190 }, { "epoch": 45.13, "learning_rate": 1.39831715210356e-05, "loss": 0.0674, "step": 116200 }, { "epoch": 45.13, "learning_rate": 1.3982653721682848e-05, "loss": 0.0297, "step": 116210 }, { "epoch": 45.13, "learning_rate": 1.3982135922330097e-05, "loss": 0.106, "step": 116220 }, { "epoch": 45.14, "learning_rate": 1.3981618122977347e-05, "loss": 0.1697, "step": 116230 }, { "epoch": 45.14, "learning_rate": 1.3981100323624596e-05, "loss": 0.0053, "step": 116240 }, { "epoch": 45.15, "learning_rate": 1.3980582524271846e-05, "loss": 0.1963, "step": 116250 }, { "epoch": 45.15, "learning_rate": 1.3980064724919096e-05, "loss": 0.0881, "step": 116260 }, { "epoch": 45.15, "learning_rate": 1.3979546925566345e-05, "loss": 0.0164, "step": 116270 }, { "epoch": 45.16, "learning_rate": 1.3979029126213593e-05, "loss": 0.1287, "step": 116280 }, { "epoch": 45.16, "learning_rate": 1.3978511326860841e-05, "loss": 0.095, "step": 116290 }, { "epoch": 45.17, "learning_rate": 1.397799352750809e-05, "loss": 0.0097, "step": 116300 }, { "epoch": 45.17, "learning_rate": 1.397747572815534e-05, "loss": 0.0072, "step": 116310 }, { "epoch": 45.17, "learning_rate": 1.397695792880259e-05, "loss": 0.1621, "step": 116320 }, { "epoch": 45.18, "learning_rate": 1.397644012944984e-05, "loss": 0.1351, "step": 116330 }, { "epoch": 45.18, "learning_rate": 1.397592233009709e-05, "loss": 0.0203, "step": 116340 }, { "epoch": 45.18, "learning_rate": 1.3975404530744339e-05, "loss": 0.0521, "step": 116350 }, { "epoch": 45.19, "learning_rate": 1.3974886731391585e-05, "loss": 0.0707, "step": 116360 }, { "epoch": 45.19, "learning_rate": 1.3974368932038835e-05, "loss": 0.0619, "step": 116370 }, { "epoch": 45.2, "learning_rate": 1.3973851132686084e-05, "loss": 0.1363, "step": 116380 }, { "epoch": 45.2, "learning_rate": 1.3973333333333334e-05, "loss": 0.1135, "step": 116390 }, { "epoch": 45.2, "learning_rate": 1.3972815533980584e-05, "loss": 0.1156, "step": 116400 }, { "epoch": 45.21, "learning_rate": 1.3972297734627833e-05, "loss": 0.0857, "step": 116410 }, { "epoch": 45.21, "learning_rate": 1.3971779935275083e-05, "loss": 0.0119, "step": 116420 }, { "epoch": 45.22, "learning_rate": 1.3971262135922332e-05, "loss": 0.0203, "step": 116430 }, { "epoch": 45.22, "learning_rate": 1.3970744336569579e-05, "loss": 0.1167, "step": 116440 }, { "epoch": 45.22, "learning_rate": 1.3970226537216828e-05, "loss": 0.1133, "step": 116450 }, { "epoch": 45.23, "learning_rate": 1.3969708737864078e-05, "loss": 0.0045, "step": 116460 }, { "epoch": 45.23, "learning_rate": 1.3969190938511327e-05, "loss": 0.0469, "step": 116470 }, { "epoch": 45.23, "learning_rate": 1.3968673139158577e-05, "loss": 0.0801, "step": 116480 }, { "epoch": 45.24, "learning_rate": 1.3968155339805827e-05, "loss": 0.1006, "step": 116490 }, { "epoch": 45.24, "learning_rate": 1.3967637540453076e-05, "loss": 0.0369, "step": 116500 }, { "epoch": 45.25, "learning_rate": 1.3967119741100326e-05, "loss": 0.0233, "step": 116510 }, { "epoch": 45.25, "learning_rate": 1.3966601941747572e-05, "loss": 0.0523, "step": 116520 }, { "epoch": 45.25, "learning_rate": 1.3966084142394822e-05, "loss": 0.0075, "step": 116530 }, { "epoch": 45.26, "learning_rate": 1.3965566343042071e-05, "loss": 0.1466, "step": 116540 }, { "epoch": 45.26, "learning_rate": 1.3965048543689321e-05, "loss": 0.0494, "step": 116550 }, { "epoch": 45.27, "learning_rate": 1.396453074433657e-05, "loss": 0.0095, "step": 116560 }, { "epoch": 45.27, "learning_rate": 1.396401294498382e-05, "loss": 0.0797, "step": 116570 }, { "epoch": 45.27, "learning_rate": 1.396349514563107e-05, "loss": 0.0615, "step": 116580 }, { "epoch": 45.28, "learning_rate": 1.396297734627832e-05, "loss": 0.0351, "step": 116590 }, { "epoch": 45.28, "learning_rate": 1.3962459546925566e-05, "loss": 0.0857, "step": 116600 }, { "epoch": 45.29, "learning_rate": 1.3961941747572815e-05, "loss": 0.0252, "step": 116610 }, { "epoch": 45.29, "learning_rate": 1.3961423948220065e-05, "loss": 0.0938, "step": 116620 }, { "epoch": 45.29, "learning_rate": 1.3960906148867315e-05, "loss": 0.0451, "step": 116630 }, { "epoch": 45.3, "learning_rate": 1.3960388349514564e-05, "loss": 0.3091, "step": 116640 }, { "epoch": 45.3, "learning_rate": 1.3959870550161814e-05, "loss": 0.0788, "step": 116650 }, { "epoch": 45.3, "learning_rate": 1.3959352750809063e-05, "loss": 0.097, "step": 116660 }, { "epoch": 45.31, "learning_rate": 1.3958834951456313e-05, "loss": 0.0384, "step": 116670 }, { "epoch": 45.31, "learning_rate": 1.3958317152103563e-05, "loss": 0.0511, "step": 116680 }, { "epoch": 45.32, "learning_rate": 1.3957799352750809e-05, "loss": 0.0931, "step": 116690 }, { "epoch": 45.32, "learning_rate": 1.3957281553398059e-05, "loss": 0.1179, "step": 116700 }, { "epoch": 45.32, "learning_rate": 1.3956763754045308e-05, "loss": 0.0393, "step": 116710 }, { "epoch": 45.33, "learning_rate": 1.3956245954692558e-05, "loss": 0.0673, "step": 116720 }, { "epoch": 45.33, "learning_rate": 1.3955728155339807e-05, "loss": 0.0281, "step": 116730 }, { "epoch": 45.34, "learning_rate": 1.3955210355987057e-05, "loss": 0.0083, "step": 116740 }, { "epoch": 45.34, "learning_rate": 1.3954692556634307e-05, "loss": 0.0825, "step": 116750 }, { "epoch": 45.34, "learning_rate": 1.3954174757281556e-05, "loss": 0.0845, "step": 116760 }, { "epoch": 45.35, "learning_rate": 1.3953656957928802e-05, "loss": 0.03, "step": 116770 }, { "epoch": 45.35, "learning_rate": 1.3953139158576052e-05, "loss": 0.0291, "step": 116780 }, { "epoch": 45.36, "learning_rate": 1.3952621359223302e-05, "loss": 0.1124, "step": 116790 }, { "epoch": 45.36, "learning_rate": 1.3952103559870551e-05, "loss": 0.1395, "step": 116800 }, { "epoch": 45.36, "learning_rate": 1.3951585760517801e-05, "loss": 0.0475, "step": 116810 }, { "epoch": 45.37, "learning_rate": 1.395106796116505e-05, "loss": 0.0483, "step": 116820 }, { "epoch": 45.37, "learning_rate": 1.39505501618123e-05, "loss": 0.1144, "step": 116830 }, { "epoch": 45.37, "learning_rate": 1.395003236245955e-05, "loss": 0.0724, "step": 116840 }, { "epoch": 45.38, "learning_rate": 1.3949514563106796e-05, "loss": 0.0961, "step": 116850 }, { "epoch": 45.38, "learning_rate": 1.3948996763754046e-05, "loss": 0.0153, "step": 116860 }, { "epoch": 45.39, "learning_rate": 1.3948478964401295e-05, "loss": 0.1791, "step": 116870 }, { "epoch": 45.39, "learning_rate": 1.3947961165048545e-05, "loss": 0.0533, "step": 116880 }, { "epoch": 45.39, "learning_rate": 1.3947443365695795e-05, "loss": 0.1602, "step": 116890 }, { "epoch": 45.4, "learning_rate": 1.3946925566343044e-05, "loss": 0.0313, "step": 116900 }, { "epoch": 45.4, "learning_rate": 1.3946407766990294e-05, "loss": 0.0053, "step": 116910 }, { "epoch": 45.41, "learning_rate": 1.3945889967637543e-05, "loss": 0.0253, "step": 116920 }, { "epoch": 45.41, "learning_rate": 1.394537216828479e-05, "loss": 0.0062, "step": 116930 }, { "epoch": 45.41, "learning_rate": 1.394485436893204e-05, "loss": 0.0575, "step": 116940 }, { "epoch": 45.42, "learning_rate": 1.3944336569579289e-05, "loss": 0.0147, "step": 116950 }, { "epoch": 45.42, "learning_rate": 1.3943818770226538e-05, "loss": 0.0995, "step": 116960 }, { "epoch": 45.43, "learning_rate": 1.3943300970873788e-05, "loss": 0.0665, "step": 116970 }, { "epoch": 45.43, "learning_rate": 1.3942783171521038e-05, "loss": 0.0081, "step": 116980 }, { "epoch": 45.43, "learning_rate": 1.3942265372168287e-05, "loss": 0.0878, "step": 116990 }, { "epoch": 45.44, "learning_rate": 1.3941747572815535e-05, "loss": 0.1847, "step": 117000 }, { "epoch": 45.44, "learning_rate": 1.3941229773462783e-05, "loss": 0.3107, "step": 117010 }, { "epoch": 45.44, "learning_rate": 1.3940711974110033e-05, "loss": 0.0868, "step": 117020 }, { "epoch": 45.45, "learning_rate": 1.3940194174757282e-05, "loss": 0.0621, "step": 117030 }, { "epoch": 45.45, "learning_rate": 1.3939676375404532e-05, "loss": 0.0801, "step": 117040 }, { "epoch": 45.46, "learning_rate": 1.3939158576051782e-05, "loss": 0.0659, "step": 117050 }, { "epoch": 45.46, "learning_rate": 1.3938640776699031e-05, "loss": 0.0497, "step": 117060 }, { "epoch": 45.46, "learning_rate": 1.3938122977346281e-05, "loss": 0.2756, "step": 117070 }, { "epoch": 45.47, "learning_rate": 1.3937605177993529e-05, "loss": 0.1786, "step": 117080 }, { "epoch": 45.47, "learning_rate": 1.3937087378640777e-05, "loss": 0.0275, "step": 117090 }, { "epoch": 45.48, "learning_rate": 1.3936569579288026e-05, "loss": 0.1328, "step": 117100 }, { "epoch": 45.48, "learning_rate": 1.3936051779935276e-05, "loss": 0.0292, "step": 117110 }, { "epoch": 45.48, "learning_rate": 1.3935533980582526e-05, "loss": 0.0105, "step": 117120 }, { "epoch": 45.49, "learning_rate": 1.3935016181229775e-05, "loss": 0.1031, "step": 117130 }, { "epoch": 45.49, "learning_rate": 1.3934498381877025e-05, "loss": 0.0479, "step": 117140 }, { "epoch": 45.5, "learning_rate": 1.3933980582524273e-05, "loss": 0.054, "step": 117150 }, { "epoch": 45.5, "learning_rate": 1.3933462783171522e-05, "loss": 0.0063, "step": 117160 }, { "epoch": 45.5, "learning_rate": 1.3932944983818772e-05, "loss": 0.0017, "step": 117170 }, { "epoch": 45.51, "learning_rate": 1.393242718446602e-05, "loss": 0.0164, "step": 117180 }, { "epoch": 45.51, "learning_rate": 1.393190938511327e-05, "loss": 0.0997, "step": 117190 }, { "epoch": 45.51, "learning_rate": 1.3931391585760519e-05, "loss": 0.0876, "step": 117200 }, { "epoch": 45.52, "learning_rate": 1.3930873786407769e-05, "loss": 0.0883, "step": 117210 }, { "epoch": 45.52, "learning_rate": 1.3930355987055018e-05, "loss": 0.1892, "step": 117220 }, { "epoch": 45.53, "learning_rate": 1.3929838187702266e-05, "loss": 0.0922, "step": 117230 }, { "epoch": 45.53, "learning_rate": 1.3929320388349516e-05, "loss": 0.009, "step": 117240 }, { "epoch": 45.53, "learning_rate": 1.3928802588996766e-05, "loss": 0.0687, "step": 117250 }, { "epoch": 45.54, "learning_rate": 1.3928284789644013e-05, "loss": 0.1796, "step": 117260 }, { "epoch": 45.54, "learning_rate": 1.3927766990291263e-05, "loss": 0.0062, "step": 117270 }, { "epoch": 45.55, "learning_rate": 1.3927249190938513e-05, "loss": 0.1271, "step": 117280 }, { "epoch": 45.55, "learning_rate": 1.3926731391585762e-05, "loss": 0.013, "step": 117290 }, { "epoch": 45.55, "learning_rate": 1.392621359223301e-05, "loss": 0.1195, "step": 117300 }, { "epoch": 45.56, "learning_rate": 1.392569579288026e-05, "loss": 0.0542, "step": 117310 }, { "epoch": 45.56, "learning_rate": 1.392517799352751e-05, "loss": 0.1179, "step": 117320 }, { "epoch": 45.57, "learning_rate": 1.3924660194174759e-05, "loss": 0.1204, "step": 117330 }, { "epoch": 45.57, "learning_rate": 1.3924142394822007e-05, "loss": 0.1928, "step": 117340 }, { "epoch": 45.57, "learning_rate": 1.3923624595469257e-05, "loss": 0.0374, "step": 117350 }, { "epoch": 45.58, "learning_rate": 1.3923106796116506e-05, "loss": 0.0495, "step": 117360 }, { "epoch": 45.58, "learning_rate": 1.3922588996763756e-05, "loss": 0.1955, "step": 117370 }, { "epoch": 45.58, "learning_rate": 1.3922071197411004e-05, "loss": 0.2518, "step": 117380 }, { "epoch": 45.59, "learning_rate": 1.3921553398058253e-05, "loss": 0.1381, "step": 117390 }, { "epoch": 45.59, "learning_rate": 1.3921035598705503e-05, "loss": 0.0325, "step": 117400 }, { "epoch": 45.6, "learning_rate": 1.3920517799352753e-05, "loss": 0.1269, "step": 117410 }, { "epoch": 45.6, "learning_rate": 1.392e-05, "loss": 0.0307, "step": 117420 }, { "epoch": 45.6, "learning_rate": 1.391948220064725e-05, "loss": 0.047, "step": 117430 }, { "epoch": 45.61, "learning_rate": 1.39189644012945e-05, "loss": 0.0011, "step": 117440 }, { "epoch": 45.61, "learning_rate": 1.391844660194175e-05, "loss": 0.0656, "step": 117450 }, { "epoch": 45.62, "learning_rate": 1.3917928802588997e-05, "loss": 0.1009, "step": 117460 }, { "epoch": 45.62, "learning_rate": 1.3917411003236247e-05, "loss": 0.1032, "step": 117470 }, { "epoch": 45.62, "learning_rate": 1.3916893203883497e-05, "loss": 0.091, "step": 117480 }, { "epoch": 45.63, "learning_rate": 1.3916375404530746e-05, "loss": 0.0703, "step": 117490 }, { "epoch": 45.63, "learning_rate": 1.3915857605177994e-05, "loss": 0.0263, "step": 117500 }, { "epoch": 45.63, "learning_rate": 1.3915339805825244e-05, "loss": 0.0171, "step": 117510 }, { "epoch": 45.64, "learning_rate": 1.3914822006472493e-05, "loss": 0.0844, "step": 117520 }, { "epoch": 45.64, "learning_rate": 1.3914304207119741e-05, "loss": 0.1271, "step": 117530 }, { "epoch": 45.65, "learning_rate": 1.3913786407766991e-05, "loss": 0.1219, "step": 117540 }, { "epoch": 45.65, "learning_rate": 1.391326860841424e-05, "loss": 0.0296, "step": 117550 }, { "epoch": 45.65, "learning_rate": 1.391275080906149e-05, "loss": 0.1213, "step": 117560 }, { "epoch": 45.66, "learning_rate": 1.391223300970874e-05, "loss": 0.0666, "step": 117570 }, { "epoch": 45.66, "learning_rate": 1.3911715210355988e-05, "loss": 0.024, "step": 117580 }, { "epoch": 45.67, "learning_rate": 1.3911197411003237e-05, "loss": 0.0004, "step": 117590 }, { "epoch": 45.67, "learning_rate": 1.3910679611650487e-05, "loss": 0.0056, "step": 117600 }, { "epoch": 45.67, "learning_rate": 1.3910161812297735e-05, "loss": 0.1016, "step": 117610 }, { "epoch": 45.68, "learning_rate": 1.3909644012944984e-05, "loss": 0.1258, "step": 117620 }, { "epoch": 45.68, "learning_rate": 1.3909126213592234e-05, "loss": 0.1095, "step": 117630 }, { "epoch": 45.69, "learning_rate": 1.3908608414239484e-05, "loss": 0.1256, "step": 117640 }, { "epoch": 45.69, "learning_rate": 1.3908090614886733e-05, "loss": 0.0865, "step": 117650 }, { "epoch": 45.69, "learning_rate": 1.3907572815533981e-05, "loss": 0.1362, "step": 117660 }, { "epoch": 45.7, "learning_rate": 1.390705501618123e-05, "loss": 0.0035, "step": 117670 }, { "epoch": 45.7, "learning_rate": 1.3906537216828479e-05, "loss": 0.0056, "step": 117680 }, { "epoch": 45.7, "learning_rate": 1.3906019417475728e-05, "loss": 0.0847, "step": 117690 }, { "epoch": 45.71, "learning_rate": 1.3905501618122978e-05, "loss": 0.2662, "step": 117700 }, { "epoch": 45.71, "learning_rate": 1.3904983818770228e-05, "loss": 0.1463, "step": 117710 }, { "epoch": 45.72, "learning_rate": 1.3904466019417477e-05, "loss": 0.0732, "step": 117720 }, { "epoch": 45.72, "learning_rate": 1.3903948220064727e-05, "loss": 0.1418, "step": 117730 }, { "epoch": 45.72, "learning_rate": 1.3903430420711976e-05, "loss": 0.0011, "step": 117740 }, { "epoch": 45.73, "learning_rate": 1.3902912621359224e-05, "loss": 0.0794, "step": 117750 }, { "epoch": 45.73, "learning_rate": 1.3902394822006472e-05, "loss": 0.107, "step": 117760 }, { "epoch": 45.74, "learning_rate": 1.3901877022653722e-05, "loss": 0.2211, "step": 117770 }, { "epoch": 45.74, "learning_rate": 1.3901359223300972e-05, "loss": 0.0451, "step": 117780 }, { "epoch": 45.74, "learning_rate": 1.3900841423948221e-05, "loss": 0.0901, "step": 117790 }, { "epoch": 45.75, "learning_rate": 1.390032362459547e-05, "loss": 0.0855, "step": 117800 }, { "epoch": 45.75, "learning_rate": 1.389980582524272e-05, "loss": 0.011, "step": 117810 }, { "epoch": 45.76, "learning_rate": 1.389928802588997e-05, "loss": 0.0666, "step": 117820 }, { "epoch": 45.76, "learning_rate": 1.3898770226537216e-05, "loss": 0.0363, "step": 117830 }, { "epoch": 45.76, "learning_rate": 1.3898252427184466e-05, "loss": 0.111, "step": 117840 }, { "epoch": 45.77, "learning_rate": 1.3897734627831715e-05, "loss": 0.028, "step": 117850 }, { "epoch": 45.77, "learning_rate": 1.3897216828478965e-05, "loss": 0.1051, "step": 117860 }, { "epoch": 45.77, "learning_rate": 1.3896699029126215e-05, "loss": 0.1985, "step": 117870 }, { "epoch": 45.78, "learning_rate": 1.3896181229773464e-05, "loss": 0.1236, "step": 117880 }, { "epoch": 45.78, "learning_rate": 1.3895663430420714e-05, "loss": 0.2367, "step": 117890 }, { "epoch": 45.79, "learning_rate": 1.3895145631067964e-05, "loss": 0.0241, "step": 117900 }, { "epoch": 45.79, "learning_rate": 1.389462783171521e-05, "loss": 0.0558, "step": 117910 }, { "epoch": 45.79, "learning_rate": 1.389411003236246e-05, "loss": 0.0727, "step": 117920 }, { "epoch": 45.8, "learning_rate": 1.3893592233009709e-05, "loss": 0.1284, "step": 117930 }, { "epoch": 45.8, "learning_rate": 1.3893074433656959e-05, "loss": 0.0046, "step": 117940 }, { "epoch": 45.81, "learning_rate": 1.3892556634304208e-05, "loss": 0.0503, "step": 117950 }, { "epoch": 45.81, "learning_rate": 1.3892038834951458e-05, "loss": 0.0048, "step": 117960 }, { "epoch": 45.81, "learning_rate": 1.3891521035598708e-05, "loss": 0.1027, "step": 117970 }, { "epoch": 45.82, "learning_rate": 1.3891003236245957e-05, "loss": 0.1582, "step": 117980 }, { "epoch": 45.82, "learning_rate": 1.3890485436893203e-05, "loss": 0.0266, "step": 117990 }, { "epoch": 45.83, "learning_rate": 1.3889967637540453e-05, "loss": 0.0175, "step": 118000 }, { "epoch": 45.83, "learning_rate": 1.3889449838187703e-05, "loss": 0.0281, "step": 118010 }, { "epoch": 45.83, "learning_rate": 1.3888932038834952e-05, "loss": 0.0154, "step": 118020 }, { "epoch": 45.84, "learning_rate": 1.3888414239482202e-05, "loss": 0.0858, "step": 118030 }, { "epoch": 45.84, "learning_rate": 1.3887896440129451e-05, "loss": 0.0285, "step": 118040 }, { "epoch": 45.84, "learning_rate": 1.3887378640776701e-05, "loss": 0.2288, "step": 118050 }, { "epoch": 45.85, "learning_rate": 1.388686084142395e-05, "loss": 0.0499, "step": 118060 }, { "epoch": 45.85, "learning_rate": 1.3886343042071197e-05, "loss": 0.1386, "step": 118070 }, { "epoch": 45.86, "learning_rate": 1.3885825242718447e-05, "loss": 0.1937, "step": 118080 }, { "epoch": 45.86, "learning_rate": 1.3885307443365696e-05, "loss": 0.0323, "step": 118090 }, { "epoch": 45.86, "learning_rate": 1.3884789644012946e-05, "loss": 0.0052, "step": 118100 }, { "epoch": 45.87, "learning_rate": 1.3884271844660195e-05, "loss": 0.0721, "step": 118110 }, { "epoch": 45.87, "learning_rate": 1.3883754045307445e-05, "loss": 0.1239, "step": 118120 }, { "epoch": 45.88, "learning_rate": 1.3883236245954695e-05, "loss": 0.1845, "step": 118130 }, { "epoch": 45.88, "learning_rate": 1.3882718446601944e-05, "loss": 0.1274, "step": 118140 }, { "epoch": 45.88, "learning_rate": 1.388220064724919e-05, "loss": 0.0259, "step": 118150 }, { "epoch": 45.89, "learning_rate": 1.388168284789644e-05, "loss": 0.076, "step": 118160 }, { "epoch": 45.89, "learning_rate": 1.388116504854369e-05, "loss": 0.0454, "step": 118170 }, { "epoch": 45.9, "learning_rate": 1.388064724919094e-05, "loss": 0.1178, "step": 118180 }, { "epoch": 45.9, "learning_rate": 1.3880129449838189e-05, "loss": 0.1449, "step": 118190 }, { "epoch": 45.9, "learning_rate": 1.3879611650485439e-05, "loss": 0.0419, "step": 118200 }, { "epoch": 45.91, "learning_rate": 1.3879093851132688e-05, "loss": 0.036, "step": 118210 }, { "epoch": 45.91, "learning_rate": 1.3878576051779938e-05, "loss": 0.0529, "step": 118220 }, { "epoch": 45.91, "learning_rate": 1.3878058252427184e-05, "loss": 0.0808, "step": 118230 }, { "epoch": 45.92, "learning_rate": 1.3877540453074434e-05, "loss": 0.1587, "step": 118240 }, { "epoch": 45.92, "learning_rate": 1.3877022653721683e-05, "loss": 0.0013, "step": 118250 }, { "epoch": 45.93, "learning_rate": 1.3876504854368933e-05, "loss": 0.0618, "step": 118260 }, { "epoch": 45.93, "learning_rate": 1.3875987055016183e-05, "loss": 0.0178, "step": 118270 }, { "epoch": 45.93, "learning_rate": 1.3875469255663432e-05, "loss": 0.0355, "step": 118280 }, { "epoch": 45.94, "learning_rate": 1.3874951456310682e-05, "loss": 0.1488, "step": 118290 }, { "epoch": 45.94, "learning_rate": 1.3874433656957931e-05, "loss": 0.062, "step": 118300 }, { "epoch": 45.95, "learning_rate": 1.3873915857605181e-05, "loss": 0.0332, "step": 118310 }, { "epoch": 45.95, "learning_rate": 1.3873398058252427e-05, "loss": 0.16, "step": 118320 }, { "epoch": 45.95, "learning_rate": 1.3872880258899677e-05, "loss": 0.1126, "step": 118330 }, { "epoch": 45.96, "learning_rate": 1.3872362459546926e-05, "loss": 0.0179, "step": 118340 }, { "epoch": 45.96, "learning_rate": 1.3871844660194176e-05, "loss": 0.1076, "step": 118350 }, { "epoch": 45.97, "learning_rate": 1.3871326860841426e-05, "loss": 0.0104, "step": 118360 }, { "epoch": 45.97, "learning_rate": 1.3870809061488675e-05, "loss": 0.0687, "step": 118370 }, { "epoch": 45.97, "learning_rate": 1.3870291262135925e-05, "loss": 0.1351, "step": 118380 }, { "epoch": 45.98, "learning_rate": 1.3869773462783175e-05, "loss": 0.0399, "step": 118390 }, { "epoch": 45.98, "learning_rate": 1.386925566343042e-05, "loss": 0.1683, "step": 118400 }, { "epoch": 45.98, "learning_rate": 1.386873786407767e-05, "loss": 0.2905, "step": 118410 }, { "epoch": 45.99, "learning_rate": 1.386822006472492e-05, "loss": 0.0311, "step": 118420 }, { "epoch": 45.99, "learning_rate": 1.386770226537217e-05, "loss": 0.1109, "step": 118430 }, { "epoch": 46.0, "learning_rate": 1.386718446601942e-05, "loss": 0.0638, "step": 118440 }, { "epoch": 46.0, "learning_rate": 1.3866666666666669e-05, "loss": 0.1735, "step": 118450 }, { "epoch": 46.0, "eval_accuracy": 0.9493810178817056, "eval_loss": 0.31910738348960876, "eval_runtime": 8.1822, "eval_samples_per_second": 444.255, "eval_steps_per_second": 55.608, "step": 118450 }, { "epoch": 46.0, "learning_rate": 1.3866148867313918e-05, "loss": 0.0018, "step": 118460 }, { "epoch": 46.01, "learning_rate": 1.3865631067961166e-05, "loss": 0.0839, "step": 118470 }, { "epoch": 46.01, "learning_rate": 1.3865113268608414e-05, "loss": 0.147, "step": 118480 }, { "epoch": 46.02, "learning_rate": 1.3864595469255664e-05, "loss": 0.0284, "step": 118490 }, { "epoch": 46.02, "learning_rate": 1.3864077669902914e-05, "loss": 0.0714, "step": 118500 }, { "epoch": 46.02, "learning_rate": 1.3863559870550163e-05, "loss": 0.1089, "step": 118510 }, { "epoch": 46.03, "learning_rate": 1.3863042071197413e-05, "loss": 0.0324, "step": 118520 }, { "epoch": 46.03, "learning_rate": 1.3862524271844662e-05, "loss": 0.0931, "step": 118530 }, { "epoch": 46.03, "learning_rate": 1.3862006472491912e-05, "loss": 0.0398, "step": 118540 }, { "epoch": 46.04, "learning_rate": 1.386148867313916e-05, "loss": 0.1862, "step": 118550 }, { "epoch": 46.04, "learning_rate": 1.3860970873786408e-05, "loss": 0.0189, "step": 118560 }, { "epoch": 46.05, "learning_rate": 1.3860453074433658e-05, "loss": 0.0181, "step": 118570 }, { "epoch": 46.05, "learning_rate": 1.3859935275080907e-05, "loss": 0.0366, "step": 118580 }, { "epoch": 46.05, "learning_rate": 1.3859417475728157e-05, "loss": 0.1725, "step": 118590 }, { "epoch": 46.06, "learning_rate": 1.3858899676375406e-05, "loss": 0.0766, "step": 118600 }, { "epoch": 46.06, "learning_rate": 1.3858381877022656e-05, "loss": 0.0573, "step": 118610 }, { "epoch": 46.07, "learning_rate": 1.3857864077669904e-05, "loss": 0.0378, "step": 118620 }, { "epoch": 46.07, "learning_rate": 1.3857346278317154e-05, "loss": 0.0619, "step": 118630 }, { "epoch": 46.07, "learning_rate": 1.3856828478964401e-05, "loss": 0.1167, "step": 118640 }, { "epoch": 46.08, "learning_rate": 1.3856310679611651e-05, "loss": 0.0349, "step": 118650 }, { "epoch": 46.08, "learning_rate": 1.38557928802589e-05, "loss": 0.0919, "step": 118660 }, { "epoch": 46.09, "learning_rate": 1.385527508090615e-05, "loss": 0.016, "step": 118670 }, { "epoch": 46.09, "learning_rate": 1.38547572815534e-05, "loss": 0.0935, "step": 118680 }, { "epoch": 46.09, "learning_rate": 1.385423948220065e-05, "loss": 0.1195, "step": 118690 }, { "epoch": 46.1, "learning_rate": 1.3853721682847897e-05, "loss": 0.0789, "step": 118700 }, { "epoch": 46.1, "learning_rate": 1.3853203883495147e-05, "loss": 0.2546, "step": 118710 }, { "epoch": 46.1, "learning_rate": 1.3852686084142395e-05, "loss": 0.1461, "step": 118720 }, { "epoch": 46.11, "learning_rate": 1.3852168284789645e-05, "loss": 0.1101, "step": 118730 }, { "epoch": 46.11, "learning_rate": 1.3851650485436894e-05, "loss": 0.174, "step": 118740 }, { "epoch": 46.12, "learning_rate": 1.3851132686084144e-05, "loss": 0.002, "step": 118750 }, { "epoch": 46.12, "learning_rate": 1.3850614886731393e-05, "loss": 0.1688, "step": 118760 }, { "epoch": 46.12, "learning_rate": 1.3850097087378641e-05, "loss": 0.2182, "step": 118770 }, { "epoch": 46.13, "learning_rate": 1.3849579288025891e-05, "loss": 0.0789, "step": 118780 }, { "epoch": 46.13, "learning_rate": 1.384906148867314e-05, "loss": 0.0328, "step": 118790 }, { "epoch": 46.14, "learning_rate": 1.3848543689320389e-05, "loss": 0.0518, "step": 118800 }, { "epoch": 46.14, "learning_rate": 1.3848025889967638e-05, "loss": 0.1019, "step": 118810 }, { "epoch": 46.14, "learning_rate": 1.3847508090614888e-05, "loss": 0.1366, "step": 118820 }, { "epoch": 46.15, "learning_rate": 1.3846990291262137e-05, "loss": 0.0028, "step": 118830 }, { "epoch": 46.15, "learning_rate": 1.3846472491909387e-05, "loss": 0.1695, "step": 118840 }, { "epoch": 46.16, "learning_rate": 1.3845954692556635e-05, "loss": 0.1993, "step": 118850 }, { "epoch": 46.16, "learning_rate": 1.3845436893203885e-05, "loss": 0.1511, "step": 118860 }, { "epoch": 46.16, "learning_rate": 1.3844919093851134e-05, "loss": 0.0072, "step": 118870 }, { "epoch": 46.17, "learning_rate": 1.3844401294498384e-05, "loss": 0.0012, "step": 118880 }, { "epoch": 46.17, "learning_rate": 1.3843883495145632e-05, "loss": 0.0878, "step": 118890 }, { "epoch": 46.17, "learning_rate": 1.3843365695792881e-05, "loss": 0.0755, "step": 118900 }, { "epoch": 46.18, "learning_rate": 1.3842847896440131e-05, "loss": 0.0302, "step": 118910 }, { "epoch": 46.18, "learning_rate": 1.384233009708738e-05, "loss": 0.0009, "step": 118920 }, { "epoch": 46.19, "learning_rate": 1.3841812297734629e-05, "loss": 0.0222, "step": 118930 }, { "epoch": 46.19, "learning_rate": 1.3841294498381878e-05, "loss": 0.0112, "step": 118940 }, { "epoch": 46.19, "learning_rate": 1.3840776699029128e-05, "loss": 0.0209, "step": 118950 }, { "epoch": 46.2, "learning_rate": 1.3840258899676377e-05, "loss": 0.0561, "step": 118960 }, { "epoch": 46.2, "learning_rate": 1.3839741100323625e-05, "loss": 0.2384, "step": 118970 }, { "epoch": 46.21, "learning_rate": 1.3839223300970875e-05, "loss": 0.081, "step": 118980 }, { "epoch": 46.21, "learning_rate": 1.3838705501618125e-05, "loss": 0.0735, "step": 118990 }, { "epoch": 46.21, "learning_rate": 1.3838187702265372e-05, "loss": 0.001, "step": 119000 }, { "epoch": 46.22, "learning_rate": 1.3837669902912622e-05, "loss": 0.0398, "step": 119010 }, { "epoch": 46.22, "learning_rate": 1.3837152103559872e-05, "loss": 0.1107, "step": 119020 }, { "epoch": 46.23, "learning_rate": 1.3836634304207121e-05, "loss": 0.1387, "step": 119030 }, { "epoch": 46.23, "learning_rate": 1.3836116504854371e-05, "loss": 0.0449, "step": 119040 }, { "epoch": 46.23, "learning_rate": 1.3835598705501619e-05, "loss": 0.0339, "step": 119050 }, { "epoch": 46.24, "learning_rate": 1.3835080906148868e-05, "loss": 0.1461, "step": 119060 }, { "epoch": 46.24, "learning_rate": 1.3834563106796118e-05, "loss": 0.1617, "step": 119070 }, { "epoch": 46.24, "learning_rate": 1.3834045307443366e-05, "loss": 0.1343, "step": 119080 }, { "epoch": 46.25, "learning_rate": 1.3833527508090616e-05, "loss": 0.0019, "step": 119090 }, { "epoch": 46.25, "learning_rate": 1.3833009708737865e-05, "loss": 0.1805, "step": 119100 }, { "epoch": 46.26, "learning_rate": 1.3832491909385115e-05, "loss": 0.0314, "step": 119110 }, { "epoch": 46.26, "learning_rate": 1.3831974110032364e-05, "loss": 0.0784, "step": 119120 }, { "epoch": 46.26, "learning_rate": 1.3831456310679612e-05, "loss": 0.0379, "step": 119130 }, { "epoch": 46.27, "learning_rate": 1.3830938511326862e-05, "loss": 0.0424, "step": 119140 }, { "epoch": 46.27, "learning_rate": 1.383042071197411e-05, "loss": 0.0846, "step": 119150 }, { "epoch": 46.28, "learning_rate": 1.382990291262136e-05, "loss": 0.1702, "step": 119160 }, { "epoch": 46.28, "learning_rate": 1.382938511326861e-05, "loss": 0.0852, "step": 119170 }, { "epoch": 46.28, "learning_rate": 1.3828867313915859e-05, "loss": 0.0456, "step": 119180 }, { "epoch": 46.29, "learning_rate": 1.3828349514563108e-05, "loss": 0.034, "step": 119190 }, { "epoch": 46.29, "learning_rate": 1.3827831715210358e-05, "loss": 0.0389, "step": 119200 }, { "epoch": 46.3, "learning_rate": 1.3827313915857606e-05, "loss": 0.0139, "step": 119210 }, { "epoch": 46.3, "learning_rate": 1.3826796116504856e-05, "loss": 0.1255, "step": 119220 }, { "epoch": 46.3, "learning_rate": 1.3826278317152103e-05, "loss": 0.0251, "step": 119230 }, { "epoch": 46.31, "learning_rate": 1.3825760517799353e-05, "loss": 0.0606, "step": 119240 }, { "epoch": 46.31, "learning_rate": 1.3825242718446603e-05, "loss": 0.0554, "step": 119250 }, { "epoch": 46.31, "learning_rate": 1.3824724919093852e-05, "loss": 0.0347, "step": 119260 }, { "epoch": 46.32, "learning_rate": 1.3824207119741102e-05, "loss": 0.0175, "step": 119270 }, { "epoch": 46.32, "learning_rate": 1.3823689320388352e-05, "loss": 0.0472, "step": 119280 }, { "epoch": 46.33, "learning_rate": 1.38231715210356e-05, "loss": 0.0032, "step": 119290 }, { "epoch": 46.33, "learning_rate": 1.3822653721682847e-05, "loss": 0.0069, "step": 119300 }, { "epoch": 46.33, "learning_rate": 1.3822135922330097e-05, "loss": 0.0869, "step": 119310 }, { "epoch": 46.34, "learning_rate": 1.3821618122977347e-05, "loss": 0.1355, "step": 119320 }, { "epoch": 46.34, "learning_rate": 1.3821100323624596e-05, "loss": 0.1399, "step": 119330 }, { "epoch": 46.35, "learning_rate": 1.3820582524271846e-05, "loss": 0.0609, "step": 119340 }, { "epoch": 46.35, "learning_rate": 1.3820064724919096e-05, "loss": 0.0224, "step": 119350 }, { "epoch": 46.35, "learning_rate": 1.3819546925566345e-05, "loss": 0.0383, "step": 119360 }, { "epoch": 46.36, "learning_rate": 1.3819029126213593e-05, "loss": 0.2589, "step": 119370 }, { "epoch": 46.36, "learning_rate": 1.3818511326860841e-05, "loss": 0.1203, "step": 119380 }, { "epoch": 46.37, "learning_rate": 1.381799352750809e-05, "loss": 0.1043, "step": 119390 }, { "epoch": 46.37, "learning_rate": 1.381747572815534e-05, "loss": 0.0252, "step": 119400 }, { "epoch": 46.37, "learning_rate": 1.381695792880259e-05, "loss": 0.0961, "step": 119410 }, { "epoch": 46.38, "learning_rate": 1.381644012944984e-05, "loss": 0.0914, "step": 119420 }, { "epoch": 46.38, "learning_rate": 1.3815922330097089e-05, "loss": 0.1181, "step": 119430 }, { "epoch": 46.38, "learning_rate": 1.3815404530744339e-05, "loss": 0.0055, "step": 119440 }, { "epoch": 46.39, "learning_rate": 1.3814886731391588e-05, "loss": 0.0795, "step": 119450 }, { "epoch": 46.39, "learning_rate": 1.3814368932038835e-05, "loss": 0.0272, "step": 119460 }, { "epoch": 46.4, "learning_rate": 1.3813851132686084e-05, "loss": 0.0054, "step": 119470 }, { "epoch": 46.4, "learning_rate": 1.3813333333333334e-05, "loss": 0.0896, "step": 119480 }, { "epoch": 46.4, "learning_rate": 1.3812815533980583e-05, "loss": 0.0695, "step": 119490 }, { "epoch": 46.41, "learning_rate": 1.3812297734627833e-05, "loss": 0.0112, "step": 119500 }, { "epoch": 46.41, "learning_rate": 1.3811779935275083e-05, "loss": 0.0004, "step": 119510 }, { "epoch": 46.42, "learning_rate": 1.3811262135922332e-05, "loss": 0.1074, "step": 119520 }, { "epoch": 46.42, "learning_rate": 1.3810744336569582e-05, "loss": 0.036, "step": 119530 }, { "epoch": 46.42, "learning_rate": 1.3810226537216828e-05, "loss": 0.0579, "step": 119540 }, { "epoch": 46.43, "learning_rate": 1.3809708737864078e-05, "loss": 0.0402, "step": 119550 }, { "epoch": 46.43, "learning_rate": 1.3809190938511327e-05, "loss": 0.0845, "step": 119560 }, { "epoch": 46.43, "learning_rate": 1.3808673139158577e-05, "loss": 0.1101, "step": 119570 }, { "epoch": 46.44, "learning_rate": 1.3808155339805827e-05, "loss": 0.0447, "step": 119580 }, { "epoch": 46.44, "learning_rate": 1.3807637540453076e-05, "loss": 0.0387, "step": 119590 }, { "epoch": 46.45, "learning_rate": 1.3807119741100326e-05, "loss": 0.0685, "step": 119600 }, { "epoch": 46.45, "learning_rate": 1.3806601941747575e-05, "loss": 0.0284, "step": 119610 }, { "epoch": 46.45, "learning_rate": 1.3806084142394822e-05, "loss": 0.0319, "step": 119620 }, { "epoch": 46.46, "learning_rate": 1.3805566343042071e-05, "loss": 0.1088, "step": 119630 }, { "epoch": 46.46, "learning_rate": 1.3805048543689321e-05, "loss": 0.1034, "step": 119640 }, { "epoch": 46.47, "learning_rate": 1.380453074433657e-05, "loss": 0.069, "step": 119650 }, { "epoch": 46.47, "learning_rate": 1.380401294498382e-05, "loss": 0.1487, "step": 119660 }, { "epoch": 46.47, "learning_rate": 1.380349514563107e-05, "loss": 0.033, "step": 119670 }, { "epoch": 46.48, "learning_rate": 1.380297734627832e-05, "loss": 0.1628, "step": 119680 }, { "epoch": 46.48, "learning_rate": 1.3802459546925569e-05, "loss": 0.1152, "step": 119690 }, { "epoch": 46.49, "learning_rate": 1.3801941747572815e-05, "loss": 0.1103, "step": 119700 }, { "epoch": 46.49, "learning_rate": 1.3801423948220065e-05, "loss": 0.2213, "step": 119710 }, { "epoch": 46.49, "learning_rate": 1.3800906148867314e-05, "loss": 0.1662, "step": 119720 }, { "epoch": 46.5, "learning_rate": 1.3800388349514564e-05, "loss": 0.1643, "step": 119730 }, { "epoch": 46.5, "learning_rate": 1.3799870550161814e-05, "loss": 0.1132, "step": 119740 }, { "epoch": 46.5, "learning_rate": 1.3799352750809063e-05, "loss": 0.1096, "step": 119750 }, { "epoch": 46.51, "learning_rate": 1.3798834951456313e-05, "loss": 0.017, "step": 119760 }, { "epoch": 46.51, "learning_rate": 1.3798317152103563e-05, "loss": 0.0189, "step": 119770 }, { "epoch": 46.52, "learning_rate": 1.3797799352750809e-05, "loss": 0.0304, "step": 119780 }, { "epoch": 46.52, "learning_rate": 1.3797281553398058e-05, "loss": 0.1059, "step": 119790 }, { "epoch": 46.52, "learning_rate": 1.3796763754045308e-05, "loss": 0.1213, "step": 119800 }, { "epoch": 46.53, "learning_rate": 1.3796245954692558e-05, "loss": 0.0942, "step": 119810 }, { "epoch": 46.53, "learning_rate": 1.3795728155339807e-05, "loss": 0.162, "step": 119820 }, { "epoch": 46.54, "learning_rate": 1.3795210355987057e-05, "loss": 0.0228, "step": 119830 }, { "epoch": 46.54, "learning_rate": 1.3794692556634306e-05, "loss": 0.024, "step": 119840 }, { "epoch": 46.54, "learning_rate": 1.3794174757281556e-05, "loss": 0.1177, "step": 119850 }, { "epoch": 46.55, "learning_rate": 1.3793656957928802e-05, "loss": 0.0926, "step": 119860 }, { "epoch": 46.55, "learning_rate": 1.3793139158576052e-05, "loss": 0.0863, "step": 119870 }, { "epoch": 46.56, "learning_rate": 1.3792621359223302e-05, "loss": 0.249, "step": 119880 }, { "epoch": 46.56, "learning_rate": 1.3792103559870551e-05, "loss": 0.0375, "step": 119890 }, { "epoch": 46.56, "learning_rate": 1.37915857605178e-05, "loss": 0.0374, "step": 119900 }, { "epoch": 46.57, "learning_rate": 1.379106796116505e-05, "loss": 0.2026, "step": 119910 }, { "epoch": 46.57, "learning_rate": 1.37905501618123e-05, "loss": 0.0156, "step": 119920 }, { "epoch": 46.57, "learning_rate": 1.379003236245955e-05, "loss": 0.0186, "step": 119930 }, { "epoch": 46.58, "learning_rate": 1.3789514563106796e-05, "loss": 0.0192, "step": 119940 }, { "epoch": 46.58, "learning_rate": 1.3788996763754046e-05, "loss": 0.1303, "step": 119950 }, { "epoch": 46.59, "learning_rate": 1.3788478964401295e-05, "loss": 0.037, "step": 119960 }, { "epoch": 46.59, "learning_rate": 1.3787961165048545e-05, "loss": 0.0351, "step": 119970 }, { "epoch": 46.59, "learning_rate": 1.3787443365695794e-05, "loss": 0.1221, "step": 119980 }, { "epoch": 46.6, "learning_rate": 1.3786925566343044e-05, "loss": 0.0762, "step": 119990 }, { "epoch": 46.6, "learning_rate": 1.3786407766990294e-05, "loss": 0.3293, "step": 120000 }, { "epoch": 46.61, "learning_rate": 1.3785889967637543e-05, "loss": 0.0549, "step": 120010 }, { "epoch": 46.61, "learning_rate": 1.3785372168284791e-05, "loss": 0.0037, "step": 120020 }, { "epoch": 46.61, "learning_rate": 1.3784854368932039e-05, "loss": 0.0568, "step": 120030 }, { "epoch": 46.62, "learning_rate": 1.3784336569579289e-05, "loss": 0.1536, "step": 120040 }, { "epoch": 46.62, "learning_rate": 1.3783818770226538e-05, "loss": 0.0738, "step": 120050 }, { "epoch": 46.63, "learning_rate": 1.3783300970873788e-05, "loss": 0.1567, "step": 120060 }, { "epoch": 46.63, "learning_rate": 1.3782783171521038e-05, "loss": 0.0689, "step": 120070 }, { "epoch": 46.63, "learning_rate": 1.3782265372168287e-05, "loss": 0.0095, "step": 120080 }, { "epoch": 46.64, "learning_rate": 1.3781747572815535e-05, "loss": 0.1123, "step": 120090 }, { "epoch": 46.64, "learning_rate": 1.3781229773462785e-05, "loss": 0.1089, "step": 120100 }, { "epoch": 46.64, "learning_rate": 1.3780711974110033e-05, "loss": 0.1506, "step": 120110 }, { "epoch": 46.65, "learning_rate": 1.3780194174757282e-05, "loss": 0.0835, "step": 120120 }, { "epoch": 46.65, "learning_rate": 1.3779676375404532e-05, "loss": 0.1639, "step": 120130 }, { "epoch": 46.66, "learning_rate": 1.3779158576051781e-05, "loss": 0.1013, "step": 120140 }, { "epoch": 46.66, "learning_rate": 1.3778640776699031e-05, "loss": 0.0229, "step": 120150 }, { "epoch": 46.66, "learning_rate": 1.377812297734628e-05, "loss": 0.0813, "step": 120160 }, { "epoch": 46.67, "learning_rate": 1.3777605177993529e-05, "loss": 0.0726, "step": 120170 }, { "epoch": 46.67, "learning_rate": 1.3777087378640778e-05, "loss": 0.1203, "step": 120180 }, { "epoch": 46.68, "learning_rate": 1.3776569579288026e-05, "loss": 0.0983, "step": 120190 }, { "epoch": 46.68, "learning_rate": 1.3776051779935276e-05, "loss": 0.1232, "step": 120200 }, { "epoch": 46.68, "learning_rate": 1.3775533980582525e-05, "loss": 0.0544, "step": 120210 }, { "epoch": 46.69, "learning_rate": 1.3775016181229775e-05, "loss": 0.0855, "step": 120220 }, { "epoch": 46.69, "learning_rate": 1.3774498381877025e-05, "loss": 0.0232, "step": 120230 }, { "epoch": 46.7, "learning_rate": 1.3773980582524273e-05, "loss": 0.0419, "step": 120240 }, { "epoch": 46.7, "learning_rate": 1.3773462783171522e-05, "loss": 0.0223, "step": 120250 }, { "epoch": 46.7, "learning_rate": 1.3772944983818772e-05, "loss": 0.1979, "step": 120260 }, { "epoch": 46.71, "learning_rate": 1.377242718446602e-05, "loss": 0.0691, "step": 120270 }, { "epoch": 46.71, "learning_rate": 1.377190938511327e-05, "loss": 0.0016, "step": 120280 }, { "epoch": 46.71, "learning_rate": 1.3771391585760519e-05, "loss": 0.0748, "step": 120290 }, { "epoch": 46.72, "learning_rate": 1.3770873786407769e-05, "loss": 0.1024, "step": 120300 }, { "epoch": 46.72, "learning_rate": 1.3770355987055018e-05, "loss": 0.1291, "step": 120310 }, { "epoch": 46.73, "learning_rate": 1.3769838187702266e-05, "loss": 0.1112, "step": 120320 }, { "epoch": 46.73, "learning_rate": 1.3769320388349516e-05, "loss": 0.1231, "step": 120330 }, { "epoch": 46.73, "learning_rate": 1.3768802588996765e-05, "loss": 0.0339, "step": 120340 }, { "epoch": 46.74, "learning_rate": 1.3768284789644013e-05, "loss": 0.0308, "step": 120350 }, { "epoch": 46.74, "learning_rate": 1.3767766990291263e-05, "loss": 0.0417, "step": 120360 }, { "epoch": 46.75, "learning_rate": 1.3767249190938513e-05, "loss": 0.1599, "step": 120370 }, { "epoch": 46.75, "learning_rate": 1.3766731391585762e-05, "loss": 0.0031, "step": 120380 }, { "epoch": 46.75, "learning_rate": 1.376621359223301e-05, "loss": 0.1504, "step": 120390 }, { "epoch": 46.76, "learning_rate": 1.376569579288026e-05, "loss": 0.0284, "step": 120400 }, { "epoch": 46.76, "learning_rate": 1.376517799352751e-05, "loss": 0.0949, "step": 120410 }, { "epoch": 46.77, "learning_rate": 1.3764660194174759e-05, "loss": 0.0286, "step": 120420 }, { "epoch": 46.77, "learning_rate": 1.3764142394822007e-05, "loss": 0.1601, "step": 120430 }, { "epoch": 46.77, "learning_rate": 1.3763624595469256e-05, "loss": 0.0361, "step": 120440 }, { "epoch": 46.78, "learning_rate": 1.3763106796116506e-05, "loss": 0.0823, "step": 120450 }, { "epoch": 46.78, "learning_rate": 1.3762588996763756e-05, "loss": 0.0502, "step": 120460 }, { "epoch": 46.78, "learning_rate": 1.3762071197411004e-05, "loss": 0.0823, "step": 120470 }, { "epoch": 46.79, "learning_rate": 1.3761553398058253e-05, "loss": 0.0271, "step": 120480 }, { "epoch": 46.79, "learning_rate": 1.3761035598705503e-05, "loss": 0.0288, "step": 120490 }, { "epoch": 46.8, "learning_rate": 1.3760517799352752e-05, "loss": 0.0834, "step": 120500 }, { "epoch": 46.8, "learning_rate": 1.376e-05, "loss": 0.1004, "step": 120510 }, { "epoch": 46.8, "learning_rate": 1.375948220064725e-05, "loss": 0.0761, "step": 120520 }, { "epoch": 46.81, "learning_rate": 1.37589644012945e-05, "loss": 0.1262, "step": 120530 }, { "epoch": 46.81, "learning_rate": 1.375844660194175e-05, "loss": 0.03, "step": 120540 }, { "epoch": 46.82, "learning_rate": 1.3757928802588997e-05, "loss": 0.0024, "step": 120550 }, { "epoch": 46.82, "learning_rate": 1.3757411003236247e-05, "loss": 0.146, "step": 120560 }, { "epoch": 46.82, "learning_rate": 1.3756893203883496e-05, "loss": 0.0598, "step": 120570 }, { "epoch": 46.83, "learning_rate": 1.3756375404530746e-05, "loss": 0.0499, "step": 120580 }, { "epoch": 46.83, "learning_rate": 1.3755857605177996e-05, "loss": 0.1578, "step": 120590 }, { "epoch": 46.83, "learning_rate": 1.3755339805825244e-05, "loss": 0.0459, "step": 120600 }, { "epoch": 46.84, "learning_rate": 1.3754822006472493e-05, "loss": 0.0938, "step": 120610 }, { "epoch": 46.84, "learning_rate": 1.3754304207119741e-05, "loss": 0.0326, "step": 120620 }, { "epoch": 46.85, "learning_rate": 1.375378640776699e-05, "loss": 0.1172, "step": 120630 }, { "epoch": 46.85, "learning_rate": 1.375326860841424e-05, "loss": 0.075, "step": 120640 }, { "epoch": 46.85, "learning_rate": 1.375275080906149e-05, "loss": 0.0238, "step": 120650 }, { "epoch": 46.86, "learning_rate": 1.375223300970874e-05, "loss": 0.041, "step": 120660 }, { "epoch": 46.86, "learning_rate": 1.375171521035599e-05, "loss": 0.0608, "step": 120670 }, { "epoch": 46.87, "learning_rate": 1.3751197411003237e-05, "loss": 0.0704, "step": 120680 }, { "epoch": 46.87, "learning_rate": 1.3750679611650487e-05, "loss": 0.1618, "step": 120690 }, { "epoch": 46.87, "learning_rate": 1.3750161812297735e-05, "loss": 0.0539, "step": 120700 }, { "epoch": 46.88, "learning_rate": 1.3749644012944984e-05, "loss": 0.1621, "step": 120710 }, { "epoch": 46.88, "learning_rate": 1.3749126213592234e-05, "loss": 0.0229, "step": 120720 }, { "epoch": 46.89, "learning_rate": 1.3748608414239484e-05, "loss": 0.0604, "step": 120730 }, { "epoch": 46.89, "learning_rate": 1.3748090614886733e-05, "loss": 0.1737, "step": 120740 }, { "epoch": 46.89, "learning_rate": 1.3747572815533983e-05, "loss": 0.0282, "step": 120750 }, { "epoch": 46.9, "learning_rate": 1.374705501618123e-05, "loss": 0.0421, "step": 120760 }, { "epoch": 46.9, "learning_rate": 1.3746537216828479e-05, "loss": 0.0913, "step": 120770 }, { "epoch": 46.9, "learning_rate": 1.3746019417475728e-05, "loss": 0.0692, "step": 120780 }, { "epoch": 46.91, "learning_rate": 1.3745501618122978e-05, "loss": 0.0438, "step": 120790 }, { "epoch": 46.91, "learning_rate": 1.3744983818770227e-05, "loss": 0.0529, "step": 120800 }, { "epoch": 46.92, "learning_rate": 1.3744466019417477e-05, "loss": 0.0004, "step": 120810 }, { "epoch": 46.92, "learning_rate": 1.3743948220064727e-05, "loss": 0.0546, "step": 120820 }, { "epoch": 46.92, "learning_rate": 1.3743430420711976e-05, "loss": 0.02, "step": 120830 }, { "epoch": 46.93, "learning_rate": 1.3742912621359224e-05, "loss": 0.0799, "step": 120840 }, { "epoch": 46.93, "learning_rate": 1.3742394822006472e-05, "loss": 0.0964, "step": 120850 }, { "epoch": 46.94, "learning_rate": 1.3741877022653722e-05, "loss": 0.1591, "step": 120860 }, { "epoch": 46.94, "learning_rate": 1.3741359223300971e-05, "loss": 0.1087, "step": 120870 }, { "epoch": 46.94, "learning_rate": 1.3740841423948221e-05, "loss": 0.13, "step": 120880 }, { "epoch": 46.95, "learning_rate": 1.374032362459547e-05, "loss": 0.027, "step": 120890 }, { "epoch": 46.95, "learning_rate": 1.373980582524272e-05, "loss": 0.1496, "step": 120900 }, { "epoch": 46.96, "learning_rate": 1.373928802588997e-05, "loss": 0.0344, "step": 120910 }, { "epoch": 46.96, "learning_rate": 1.3738770226537216e-05, "loss": 0.0211, "step": 120920 }, { "epoch": 46.96, "learning_rate": 1.3738252427184466e-05, "loss": 0.0177, "step": 120930 }, { "epoch": 46.97, "learning_rate": 1.3737734627831715e-05, "loss": 0.0789, "step": 120940 }, { "epoch": 46.97, "learning_rate": 1.3737216828478965e-05, "loss": 0.0571, "step": 120950 }, { "epoch": 46.97, "learning_rate": 1.3736699029126215e-05, "loss": 0.0665, "step": 120960 }, { "epoch": 46.98, "learning_rate": 1.3736181229773464e-05, "loss": 0.0121, "step": 120970 }, { "epoch": 46.98, "learning_rate": 1.3735663430420714e-05, "loss": 0.0716, "step": 120980 }, { "epoch": 46.99, "learning_rate": 1.3735145631067963e-05, "loss": 0.0427, "step": 120990 }, { "epoch": 46.99, "learning_rate": 1.373462783171521e-05, "loss": 0.143, "step": 121000 }, { "epoch": 46.99, "learning_rate": 1.373411003236246e-05, "loss": 0.0167, "step": 121010 }, { "epoch": 47.0, "learning_rate": 1.3733592233009709e-05, "loss": 0.0477, "step": 121020 }, { "epoch": 47.0, "eval_accuracy": 0.9521320495185694, "eval_loss": 0.3004183769226074, "eval_runtime": 8.2571, "eval_samples_per_second": 440.227, "eval_steps_per_second": 55.104, "step": 121025 }, { "epoch": 47.0, "learning_rate": 1.3733074433656959e-05, "loss": 0.1371, "step": 121030 }, { "epoch": 47.01, "learning_rate": 1.3732556634304208e-05, "loss": 0.1228, "step": 121040 }, { "epoch": 47.01, "learning_rate": 1.3732038834951458e-05, "loss": 0.0104, "step": 121050 }, { "epoch": 47.01, "learning_rate": 1.3731521035598707e-05, "loss": 0.1207, "step": 121060 }, { "epoch": 47.02, "learning_rate": 1.3731003236245957e-05, "loss": 0.093, "step": 121070 }, { "epoch": 47.02, "learning_rate": 1.3730485436893203e-05, "loss": 0.0332, "step": 121080 }, { "epoch": 47.03, "learning_rate": 1.3729967637540453e-05, "loss": 0.1466, "step": 121090 }, { "epoch": 47.03, "learning_rate": 1.3729449838187702e-05, "loss": 0.0632, "step": 121100 }, { "epoch": 47.03, "learning_rate": 1.3728932038834952e-05, "loss": 0.041, "step": 121110 }, { "epoch": 47.04, "learning_rate": 1.3728414239482202e-05, "loss": 0.1267, "step": 121120 }, { "epoch": 47.04, "learning_rate": 1.3727896440129451e-05, "loss": 0.0043, "step": 121130 }, { "epoch": 47.04, "learning_rate": 1.3727378640776701e-05, "loss": 0.0776, "step": 121140 }, { "epoch": 47.05, "learning_rate": 1.372686084142395e-05, "loss": 0.2181, "step": 121150 }, { "epoch": 47.05, "learning_rate": 1.37263430420712e-05, "loss": 0.0791, "step": 121160 }, { "epoch": 47.06, "learning_rate": 1.3725825242718446e-05, "loss": 0.1001, "step": 121170 }, { "epoch": 47.06, "learning_rate": 1.3725307443365696e-05, "loss": 0.0686, "step": 121180 }, { "epoch": 47.06, "learning_rate": 1.3724789644012946e-05, "loss": 0.278, "step": 121190 }, { "epoch": 47.07, "learning_rate": 1.3724271844660195e-05, "loss": 0.0943, "step": 121200 }, { "epoch": 47.07, "learning_rate": 1.3723754045307445e-05, "loss": 0.0034, "step": 121210 }, { "epoch": 47.08, "learning_rate": 1.3723236245954694e-05, "loss": 0.0732, "step": 121220 }, { "epoch": 47.08, "learning_rate": 1.3722718446601944e-05, "loss": 0.1479, "step": 121230 }, { "epoch": 47.08, "learning_rate": 1.3722200647249194e-05, "loss": 0.0813, "step": 121240 }, { "epoch": 47.09, "learning_rate": 1.372168284789644e-05, "loss": 0.0418, "step": 121250 }, { "epoch": 47.09, "learning_rate": 1.372116504854369e-05, "loss": 0.1738, "step": 121260 }, { "epoch": 47.1, "learning_rate": 1.372064724919094e-05, "loss": 0.2138, "step": 121270 }, { "epoch": 47.1, "learning_rate": 1.3720129449838189e-05, "loss": 0.1742, "step": 121280 }, { "epoch": 47.1, "learning_rate": 1.3719611650485438e-05, "loss": 0.0257, "step": 121290 }, { "epoch": 47.11, "learning_rate": 1.3719093851132688e-05, "loss": 0.1741, "step": 121300 }, { "epoch": 47.11, "learning_rate": 1.3718576051779938e-05, "loss": 0.0047, "step": 121310 }, { "epoch": 47.11, "learning_rate": 1.3718058252427187e-05, "loss": 0.0185, "step": 121320 }, { "epoch": 47.12, "learning_rate": 1.3717540453074433e-05, "loss": 0.1538, "step": 121330 }, { "epoch": 47.12, "learning_rate": 1.3717022653721683e-05, "loss": 0.0455, "step": 121340 }, { "epoch": 47.13, "learning_rate": 1.3716504854368933e-05, "loss": 0.0921, "step": 121350 }, { "epoch": 47.13, "learning_rate": 1.3715987055016182e-05, "loss": 0.1305, "step": 121360 }, { "epoch": 47.13, "learning_rate": 1.3715469255663432e-05, "loss": 0.0091, "step": 121370 }, { "epoch": 47.14, "learning_rate": 1.3714951456310682e-05, "loss": 0.094, "step": 121380 }, { "epoch": 47.14, "learning_rate": 1.3714433656957931e-05, "loss": 0.0744, "step": 121390 }, { "epoch": 47.15, "learning_rate": 1.371391585760518e-05, "loss": 0.0416, "step": 121400 }, { "epoch": 47.15, "learning_rate": 1.3713398058252427e-05, "loss": 0.0254, "step": 121410 }, { "epoch": 47.15, "learning_rate": 1.3712880258899677e-05, "loss": 0.0472, "step": 121420 }, { "epoch": 47.16, "learning_rate": 1.3712362459546926e-05, "loss": 0.1175, "step": 121430 }, { "epoch": 47.16, "learning_rate": 1.3711844660194176e-05, "loss": 0.0746, "step": 121440 }, { "epoch": 47.17, "learning_rate": 1.3711326860841426e-05, "loss": 0.1241, "step": 121450 }, { "epoch": 47.17, "learning_rate": 1.3710809061488675e-05, "loss": 0.1389, "step": 121460 }, { "epoch": 47.17, "learning_rate": 1.3710291262135925e-05, "loss": 0.1224, "step": 121470 }, { "epoch": 47.18, "learning_rate": 1.3709773462783174e-05, "loss": 0.1481, "step": 121480 }, { "epoch": 47.18, "learning_rate": 1.370925566343042e-05, "loss": 0.034, "step": 121490 }, { "epoch": 47.18, "learning_rate": 1.370873786407767e-05, "loss": 0.0778, "step": 121500 }, { "epoch": 47.19, "learning_rate": 1.370822006472492e-05, "loss": 0.0572, "step": 121510 }, { "epoch": 47.19, "learning_rate": 1.370770226537217e-05, "loss": 0.0099, "step": 121520 }, { "epoch": 47.2, "learning_rate": 1.3707184466019419e-05, "loss": 0.031, "step": 121530 }, { "epoch": 47.2, "learning_rate": 1.3706666666666669e-05, "loss": 0.1592, "step": 121540 }, { "epoch": 47.2, "learning_rate": 1.3706148867313918e-05, "loss": 0.013, "step": 121550 }, { "epoch": 47.21, "learning_rate": 1.3705631067961166e-05, "loss": 0.0109, "step": 121560 }, { "epoch": 47.21, "learning_rate": 1.3705113268608414e-05, "loss": 0.071, "step": 121570 }, { "epoch": 47.22, "learning_rate": 1.3704595469255664e-05, "loss": 0.0339, "step": 121580 }, { "epoch": 47.22, "learning_rate": 1.3704077669902913e-05, "loss": 0.0363, "step": 121590 }, { "epoch": 47.22, "learning_rate": 1.3703559870550163e-05, "loss": 0.0596, "step": 121600 }, { "epoch": 47.23, "learning_rate": 1.3703042071197413e-05, "loss": 0.0996, "step": 121610 }, { "epoch": 47.23, "learning_rate": 1.3702524271844662e-05, "loss": 0.1272, "step": 121620 }, { "epoch": 47.23, "learning_rate": 1.3702006472491912e-05, "loss": 0.1305, "step": 121630 }, { "epoch": 47.24, "learning_rate": 1.370148867313916e-05, "loss": 0.1203, "step": 121640 }, { "epoch": 47.24, "learning_rate": 1.370097087378641e-05, "loss": 0.1075, "step": 121650 }, { "epoch": 47.25, "learning_rate": 1.3700453074433657e-05, "loss": 0.0821, "step": 121660 }, { "epoch": 47.25, "learning_rate": 1.3699935275080907e-05, "loss": 0.0819, "step": 121670 }, { "epoch": 47.25, "learning_rate": 1.3699417475728157e-05, "loss": 0.1786, "step": 121680 }, { "epoch": 47.26, "learning_rate": 1.3698899676375406e-05, "loss": 0.0382, "step": 121690 }, { "epoch": 47.26, "learning_rate": 1.3698381877022656e-05, "loss": 0.2385, "step": 121700 }, { "epoch": 47.27, "learning_rate": 1.3697864077669904e-05, "loss": 0.043, "step": 121710 }, { "epoch": 47.27, "learning_rate": 1.3697346278317153e-05, "loss": 0.0116, "step": 121720 }, { "epoch": 47.27, "learning_rate": 1.3696828478964403e-05, "loss": 0.0952, "step": 121730 }, { "epoch": 47.28, "learning_rate": 1.3696310679611651e-05, "loss": 0.0347, "step": 121740 }, { "epoch": 47.28, "learning_rate": 1.36957928802589e-05, "loss": 0.1355, "step": 121750 }, { "epoch": 47.29, "learning_rate": 1.369527508090615e-05, "loss": 0.0596, "step": 121760 }, { "epoch": 47.29, "learning_rate": 1.36947572815534e-05, "loss": 0.1499, "step": 121770 }, { "epoch": 47.29, "learning_rate": 1.369423948220065e-05, "loss": 0.0248, "step": 121780 }, { "epoch": 47.3, "learning_rate": 1.3693721682847897e-05, "loss": 0.0949, "step": 121790 }, { "epoch": 47.3, "learning_rate": 1.3693203883495147e-05, "loss": 0.075, "step": 121800 }, { "epoch": 47.3, "learning_rate": 1.3692686084142397e-05, "loss": 0.149, "step": 121810 }, { "epoch": 47.31, "learning_rate": 1.3692168284789644e-05, "loss": 0.2598, "step": 121820 }, { "epoch": 47.31, "learning_rate": 1.3691650485436894e-05, "loss": 0.1062, "step": 121830 }, { "epoch": 47.32, "learning_rate": 1.3691132686084144e-05, "loss": 0.0016, "step": 121840 }, { "epoch": 47.32, "learning_rate": 1.3690614886731393e-05, "loss": 0.1714, "step": 121850 }, { "epoch": 47.32, "learning_rate": 1.3690097087378641e-05, "loss": 0.0502, "step": 121860 }, { "epoch": 47.33, "learning_rate": 1.3689579288025891e-05, "loss": 0.0006, "step": 121870 }, { "epoch": 47.33, "learning_rate": 1.368906148867314e-05, "loss": 0.0922, "step": 121880 }, { "epoch": 47.34, "learning_rate": 1.368854368932039e-05, "loss": 0.0422, "step": 121890 }, { "epoch": 47.34, "learning_rate": 1.3688025889967638e-05, "loss": 0.1023, "step": 121900 }, { "epoch": 47.34, "learning_rate": 1.3687508090614888e-05, "loss": 0.1463, "step": 121910 }, { "epoch": 47.35, "learning_rate": 1.3686990291262137e-05, "loss": 0.1508, "step": 121920 }, { "epoch": 47.35, "learning_rate": 1.3686472491909387e-05, "loss": 0.0694, "step": 121930 }, { "epoch": 47.36, "learning_rate": 1.3685954692556635e-05, "loss": 0.0173, "step": 121940 }, { "epoch": 47.36, "learning_rate": 1.3685436893203884e-05, "loss": 0.0255, "step": 121950 }, { "epoch": 47.36, "learning_rate": 1.3684919093851134e-05, "loss": 0.0446, "step": 121960 }, { "epoch": 47.37, "learning_rate": 1.3684401294498384e-05, "loss": 0.1093, "step": 121970 }, { "epoch": 47.37, "learning_rate": 1.3683883495145632e-05, "loss": 0.2535, "step": 121980 }, { "epoch": 47.37, "learning_rate": 1.3683365695792881e-05, "loss": 0.0037, "step": 121990 }, { "epoch": 47.38, "learning_rate": 1.368284789644013e-05, "loss": 0.136, "step": 122000 }, { "epoch": 47.38, "learning_rate": 1.368233009708738e-05, "loss": 0.0708, "step": 122010 }, { "epoch": 47.39, "learning_rate": 1.3681812297734628e-05, "loss": 0.0052, "step": 122020 }, { "epoch": 47.39, "learning_rate": 1.3681294498381878e-05, "loss": 0.0027, "step": 122030 }, { "epoch": 47.39, "learning_rate": 1.3680776699029128e-05, "loss": 0.0638, "step": 122040 }, { "epoch": 47.4, "learning_rate": 1.3680258899676377e-05, "loss": 0.032, "step": 122050 }, { "epoch": 47.4, "learning_rate": 1.3679741100323625e-05, "loss": 0.0005, "step": 122060 }, { "epoch": 47.41, "learning_rate": 1.3679223300970875e-05, "loss": 0.0252, "step": 122070 }, { "epoch": 47.41, "learning_rate": 1.3678705501618124e-05, "loss": 0.0078, "step": 122080 }, { "epoch": 47.41, "learning_rate": 1.3678187702265372e-05, "loss": 0.0832, "step": 122090 }, { "epoch": 47.42, "learning_rate": 1.3677669902912622e-05, "loss": 0.1648, "step": 122100 }, { "epoch": 47.42, "learning_rate": 1.3677152103559872e-05, "loss": 0.0128, "step": 122110 }, { "epoch": 47.43, "learning_rate": 1.3676634304207121e-05, "loss": 0.0839, "step": 122120 }, { "epoch": 47.43, "learning_rate": 1.367611650485437e-05, "loss": 0.1285, "step": 122130 }, { "epoch": 47.43, "learning_rate": 1.3675598705501619e-05, "loss": 0.0202, "step": 122140 }, { "epoch": 47.44, "learning_rate": 1.3675080906148868e-05, "loss": 0.0689, "step": 122150 }, { "epoch": 47.44, "learning_rate": 1.3674563106796118e-05, "loss": 0.0645, "step": 122160 }, { "epoch": 47.44, "learning_rate": 1.3674045307443366e-05, "loss": 0.0404, "step": 122170 }, { "epoch": 47.45, "learning_rate": 1.3673527508090615e-05, "loss": 0.0626, "step": 122180 }, { "epoch": 47.45, "learning_rate": 1.3673009708737865e-05, "loss": 0.1205, "step": 122190 }, { "epoch": 47.46, "learning_rate": 1.3672491909385115e-05, "loss": 0.0424, "step": 122200 }, { "epoch": 47.46, "learning_rate": 1.3671974110032364e-05, "loss": 0.0212, "step": 122210 }, { "epoch": 47.46, "learning_rate": 1.3671456310679614e-05, "loss": 0.1278, "step": 122220 }, { "epoch": 47.47, "learning_rate": 1.3670938511326862e-05, "loss": 0.1588, "step": 122230 }, { "epoch": 47.47, "learning_rate": 1.367042071197411e-05, "loss": 0.1026, "step": 122240 }, { "epoch": 47.48, "learning_rate": 1.366990291262136e-05, "loss": 0.0783, "step": 122250 }, { "epoch": 47.48, "learning_rate": 1.3669385113268609e-05, "loss": 0.049, "step": 122260 }, { "epoch": 47.48, "learning_rate": 1.3668867313915859e-05, "loss": 0.0639, "step": 122270 }, { "epoch": 47.49, "learning_rate": 1.3668349514563108e-05, "loss": 0.0984, "step": 122280 }, { "epoch": 47.49, "learning_rate": 1.3667831715210358e-05, "loss": 0.1124, "step": 122290 }, { "epoch": 47.5, "learning_rate": 1.3667313915857607e-05, "loss": 0.0511, "step": 122300 }, { "epoch": 47.5, "learning_rate": 1.3666796116504855e-05, "loss": 0.057, "step": 122310 }, { "epoch": 47.5, "learning_rate": 1.3666278317152103e-05, "loss": 0.1541, "step": 122320 }, { "epoch": 47.51, "learning_rate": 1.3665760517799353e-05, "loss": 0.057, "step": 122330 }, { "epoch": 47.51, "learning_rate": 1.3665242718446603e-05, "loss": 0.2141, "step": 122340 }, { "epoch": 47.51, "learning_rate": 1.3664724919093852e-05, "loss": 0.2975, "step": 122350 }, { "epoch": 47.52, "learning_rate": 1.3664207119741102e-05, "loss": 0.1058, "step": 122360 }, { "epoch": 47.52, "learning_rate": 1.3663689320388351e-05, "loss": 0.0886, "step": 122370 }, { "epoch": 47.53, "learning_rate": 1.3663171521035601e-05, "loss": 0.0711, "step": 122380 }, { "epoch": 47.53, "learning_rate": 1.3662653721682847e-05, "loss": 0.0068, "step": 122390 }, { "epoch": 47.53, "learning_rate": 1.3662135922330097e-05, "loss": 0.0533, "step": 122400 }, { "epoch": 47.54, "learning_rate": 1.3661618122977347e-05, "loss": 0.0248, "step": 122410 }, { "epoch": 47.54, "learning_rate": 1.3661100323624596e-05, "loss": 0.1673, "step": 122420 }, { "epoch": 47.55, "learning_rate": 1.3660582524271846e-05, "loss": 0.0899, "step": 122430 }, { "epoch": 47.55, "learning_rate": 1.3660064724919095e-05, "loss": 0.1845, "step": 122440 }, { "epoch": 47.55, "learning_rate": 1.3659546925566345e-05, "loss": 0.0553, "step": 122450 }, { "epoch": 47.56, "learning_rate": 1.3659029126213595e-05, "loss": 0.126, "step": 122460 }, { "epoch": 47.56, "learning_rate": 1.365851132686084e-05, "loss": 0.0339, "step": 122470 }, { "epoch": 47.57, "learning_rate": 1.365799352750809e-05, "loss": 0.0196, "step": 122480 }, { "epoch": 47.57, "learning_rate": 1.365747572815534e-05, "loss": 0.0102, "step": 122490 }, { "epoch": 47.57, "learning_rate": 1.365695792880259e-05, "loss": 0.1043, "step": 122500 }, { "epoch": 47.58, "learning_rate": 1.365644012944984e-05, "loss": 0.0133, "step": 122510 }, { "epoch": 47.58, "learning_rate": 1.3655922330097089e-05, "loss": 0.0119, "step": 122520 }, { "epoch": 47.58, "learning_rate": 1.3655404530744339e-05, "loss": 0.0269, "step": 122530 }, { "epoch": 47.59, "learning_rate": 1.3654886731391588e-05, "loss": 0.0853, "step": 122540 }, { "epoch": 47.59, "learning_rate": 1.3654368932038834e-05, "loss": 0.0534, "step": 122550 }, { "epoch": 47.6, "learning_rate": 1.3653851132686084e-05, "loss": 0.0498, "step": 122560 }, { "epoch": 47.6, "learning_rate": 1.3653333333333334e-05, "loss": 0.0173, "step": 122570 }, { "epoch": 47.6, "learning_rate": 1.3652815533980583e-05, "loss": 0.0946, "step": 122580 }, { "epoch": 47.61, "learning_rate": 1.3652297734627833e-05, "loss": 0.1585, "step": 122590 }, { "epoch": 47.61, "learning_rate": 1.3651779935275082e-05, "loss": 0.0645, "step": 122600 }, { "epoch": 47.62, "learning_rate": 1.3651262135922332e-05, "loss": 0.1473, "step": 122610 }, { "epoch": 47.62, "learning_rate": 1.3650744336569582e-05, "loss": 0.1521, "step": 122620 }, { "epoch": 47.62, "learning_rate": 1.3650226537216828e-05, "loss": 0.0608, "step": 122630 }, { "epoch": 47.63, "learning_rate": 1.3649708737864078e-05, "loss": 0.1303, "step": 122640 }, { "epoch": 47.63, "learning_rate": 1.3649190938511327e-05, "loss": 0.0778, "step": 122650 }, { "epoch": 47.63, "learning_rate": 1.3648673139158577e-05, "loss": 0.087, "step": 122660 }, { "epoch": 47.64, "learning_rate": 1.3648155339805826e-05, "loss": 0.0299, "step": 122670 }, { "epoch": 47.64, "learning_rate": 1.3647637540453076e-05, "loss": 0.0314, "step": 122680 }, { "epoch": 47.65, "learning_rate": 1.3647119741100326e-05, "loss": 0.1367, "step": 122690 }, { "epoch": 47.65, "learning_rate": 1.3646601941747575e-05, "loss": 0.0128, "step": 122700 }, { "epoch": 47.65, "learning_rate": 1.3646084142394821e-05, "loss": 0.1053, "step": 122710 }, { "epoch": 47.66, "learning_rate": 1.3645566343042071e-05, "loss": 0.0099, "step": 122720 }, { "epoch": 47.66, "learning_rate": 1.364504854368932e-05, "loss": 0.1114, "step": 122730 }, { "epoch": 47.67, "learning_rate": 1.364453074433657e-05, "loss": 0.0359, "step": 122740 }, { "epoch": 47.67, "learning_rate": 1.364401294498382e-05, "loss": 0.0119, "step": 122750 }, { "epoch": 47.67, "learning_rate": 1.364349514563107e-05, "loss": 0.0754, "step": 122760 }, { "epoch": 47.68, "learning_rate": 1.364297734627832e-05, "loss": 0.0996, "step": 122770 }, { "epoch": 47.68, "learning_rate": 1.3642459546925569e-05, "loss": 0.0106, "step": 122780 }, { "epoch": 47.69, "learning_rate": 1.3641941747572818e-05, "loss": 0.1583, "step": 122790 }, { "epoch": 47.69, "learning_rate": 1.3641423948220065e-05, "loss": 0.1134, "step": 122800 }, { "epoch": 47.69, "learning_rate": 1.3640906148867314e-05, "loss": 0.1062, "step": 122810 }, { "epoch": 47.7, "learning_rate": 1.3640388349514564e-05, "loss": 0.0031, "step": 122820 }, { "epoch": 47.7, "learning_rate": 1.3639870550161814e-05, "loss": 0.0053, "step": 122830 }, { "epoch": 47.7, "learning_rate": 1.3639352750809063e-05, "loss": 0.1302, "step": 122840 }, { "epoch": 47.71, "learning_rate": 1.3638834951456313e-05, "loss": 0.0067, "step": 122850 }, { "epoch": 47.71, "learning_rate": 1.3638317152103562e-05, "loss": 0.0644, "step": 122860 }, { "epoch": 47.72, "learning_rate": 1.3637799352750812e-05, "loss": 0.0106, "step": 122870 }, { "epoch": 47.72, "learning_rate": 1.3637281553398058e-05, "loss": 0.2422, "step": 122880 }, { "epoch": 47.72, "learning_rate": 1.3636763754045308e-05, "loss": 0.0761, "step": 122890 }, { "epoch": 47.73, "learning_rate": 1.3636245954692557e-05, "loss": 0.0963, "step": 122900 }, { "epoch": 47.73, "learning_rate": 1.3635728155339807e-05, "loss": 0.1635, "step": 122910 }, { "epoch": 47.74, "learning_rate": 1.3635210355987057e-05, "loss": 0.175, "step": 122920 }, { "epoch": 47.74, "learning_rate": 1.3634692556634306e-05, "loss": 0.0721, "step": 122930 }, { "epoch": 47.74, "learning_rate": 1.3634174757281556e-05, "loss": 0.087, "step": 122940 }, { "epoch": 47.75, "learning_rate": 1.3633656957928806e-05, "loss": 0.0063, "step": 122950 }, { "epoch": 47.75, "learning_rate": 1.3633139158576052e-05, "loss": 0.0441, "step": 122960 }, { "epoch": 47.76, "learning_rate": 1.3632621359223301e-05, "loss": 0.0377, "step": 122970 }, { "epoch": 47.76, "learning_rate": 1.3632103559870551e-05, "loss": 0.1396, "step": 122980 }, { "epoch": 47.76, "learning_rate": 1.36315857605178e-05, "loss": 0.021, "step": 122990 }, { "epoch": 47.77, "learning_rate": 1.363106796116505e-05, "loss": 0.0529, "step": 123000 }, { "epoch": 47.77, "learning_rate": 1.36305501618123e-05, "loss": 0.0779, "step": 123010 }, { "epoch": 47.77, "learning_rate": 1.363003236245955e-05, "loss": 0.245, "step": 123020 }, { "epoch": 47.78, "learning_rate": 1.3629514563106797e-05, "loss": 0.0212, "step": 123030 }, { "epoch": 47.78, "learning_rate": 1.3628996763754045e-05, "loss": 0.0071, "step": 123040 }, { "epoch": 47.79, "learning_rate": 1.3628478964401295e-05, "loss": 0.1594, "step": 123050 }, { "epoch": 47.79, "learning_rate": 1.3627961165048545e-05, "loss": 0.0333, "step": 123060 }, { "epoch": 47.79, "learning_rate": 1.3627443365695794e-05, "loss": 0.0095, "step": 123070 }, { "epoch": 47.8, "learning_rate": 1.3626925566343044e-05, "loss": 0.0181, "step": 123080 }, { "epoch": 47.8, "learning_rate": 1.3626407766990293e-05, "loss": 0.0899, "step": 123090 }, { "epoch": 47.81, "learning_rate": 1.3625889967637543e-05, "loss": 0.1647, "step": 123100 }, { "epoch": 47.81, "learning_rate": 1.3625372168284791e-05, "loss": 0.1119, "step": 123110 }, { "epoch": 47.81, "learning_rate": 1.3624854368932039e-05, "loss": 0.1516, "step": 123120 }, { "epoch": 47.82, "learning_rate": 1.3624336569579289e-05, "loss": 0.1561, "step": 123130 }, { "epoch": 47.82, "learning_rate": 1.3623818770226538e-05, "loss": 0.0229, "step": 123140 }, { "epoch": 47.83, "learning_rate": 1.3623300970873788e-05, "loss": 0.1076, "step": 123150 }, { "epoch": 47.83, "learning_rate": 1.3622783171521037e-05, "loss": 0.1757, "step": 123160 }, { "epoch": 47.83, "learning_rate": 1.3622265372168287e-05, "loss": 0.061, "step": 123170 }, { "epoch": 47.84, "learning_rate": 1.3621747572815535e-05, "loss": 0.1153, "step": 123180 }, { "epoch": 47.84, "learning_rate": 1.3621229773462785e-05, "loss": 0.0342, "step": 123190 }, { "epoch": 47.84, "learning_rate": 1.3620711974110032e-05, "loss": 0.1088, "step": 123200 }, { "epoch": 47.85, "learning_rate": 1.3620194174757282e-05, "loss": 0.0344, "step": 123210 }, { "epoch": 47.85, "learning_rate": 1.3619676375404532e-05, "loss": 0.1193, "step": 123220 }, { "epoch": 47.86, "learning_rate": 1.3619158576051781e-05, "loss": 0.1406, "step": 123230 }, { "epoch": 47.86, "learning_rate": 1.3618640776699031e-05, "loss": 0.1434, "step": 123240 }, { "epoch": 47.86, "learning_rate": 1.361812297734628e-05, "loss": 0.052, "step": 123250 }, { "epoch": 47.87, "learning_rate": 1.3617605177993528e-05, "loss": 0.0928, "step": 123260 }, { "epoch": 47.87, "learning_rate": 1.3617087378640778e-05, "loss": 0.0081, "step": 123270 }, { "epoch": 47.88, "learning_rate": 1.3616569579288026e-05, "loss": 0.0496, "step": 123280 }, { "epoch": 47.88, "learning_rate": 1.3616051779935276e-05, "loss": 0.0538, "step": 123290 }, { "epoch": 47.88, "learning_rate": 1.3615533980582525e-05, "loss": 0.1032, "step": 123300 }, { "epoch": 47.89, "learning_rate": 1.3615016181229775e-05, "loss": 0.0959, "step": 123310 }, { "epoch": 47.89, "learning_rate": 1.3614498381877024e-05, "loss": 0.1043, "step": 123320 }, { "epoch": 47.9, "learning_rate": 1.3613980582524272e-05, "loss": 0.0884, "step": 123330 }, { "epoch": 47.9, "learning_rate": 1.3613462783171522e-05, "loss": 0.2509, "step": 123340 }, { "epoch": 47.9, "learning_rate": 1.3612944983818772e-05, "loss": 0.0407, "step": 123350 }, { "epoch": 47.91, "learning_rate": 1.3612427184466021e-05, "loss": 0.072, "step": 123360 }, { "epoch": 47.91, "learning_rate": 1.361190938511327e-05, "loss": 0.0425, "step": 123370 }, { "epoch": 47.91, "learning_rate": 1.3611391585760519e-05, "loss": 0.0003, "step": 123380 }, { "epoch": 47.92, "learning_rate": 1.3610873786407768e-05, "loss": 0.0014, "step": 123390 }, { "epoch": 47.92, "learning_rate": 1.3610355987055018e-05, "loss": 0.0919, "step": 123400 }, { "epoch": 47.93, "learning_rate": 1.3609838187702266e-05, "loss": 0.1265, "step": 123410 }, { "epoch": 47.93, "learning_rate": 1.3609320388349516e-05, "loss": 0.0009, "step": 123420 }, { "epoch": 47.93, "learning_rate": 1.3608802588996765e-05, "loss": 0.0782, "step": 123430 }, { "epoch": 47.94, "learning_rate": 1.3608284789644015e-05, "loss": 0.0963, "step": 123440 }, { "epoch": 47.94, "learning_rate": 1.3607766990291263e-05, "loss": 0.0444, "step": 123450 }, { "epoch": 47.95, "learning_rate": 1.3607249190938512e-05, "loss": 0.1148, "step": 123460 }, { "epoch": 47.95, "learning_rate": 1.3606731391585762e-05, "loss": 0.0076, "step": 123470 }, { "epoch": 47.95, "learning_rate": 1.3606213592233012e-05, "loss": 0.2529, "step": 123480 }, { "epoch": 47.96, "learning_rate": 1.360569579288026e-05, "loss": 0.0187, "step": 123490 }, { "epoch": 47.96, "learning_rate": 1.3605177993527509e-05, "loss": 0.1857, "step": 123500 }, { "epoch": 47.97, "learning_rate": 1.3604660194174759e-05, "loss": 0.0532, "step": 123510 }, { "epoch": 47.97, "learning_rate": 1.3604142394822008e-05, "loss": 0.01, "step": 123520 }, { "epoch": 47.97, "learning_rate": 1.3603624595469256e-05, "loss": 0.1938, "step": 123530 }, { "epoch": 47.98, "learning_rate": 1.3603106796116506e-05, "loss": 0.0742, "step": 123540 }, { "epoch": 47.98, "learning_rate": 1.3602588996763756e-05, "loss": 0.0604, "step": 123550 }, { "epoch": 47.98, "learning_rate": 1.3602071197411003e-05, "loss": 0.0784, "step": 123560 }, { "epoch": 47.99, "learning_rate": 1.3601553398058253e-05, "loss": 0.0182, "step": 123570 }, { "epoch": 47.99, "learning_rate": 1.3601035598705503e-05, "loss": 0.1701, "step": 123580 }, { "epoch": 48.0, "learning_rate": 1.3600517799352752e-05, "loss": 0.0932, "step": 123590 }, { "epoch": 48.0, "learning_rate": 1.3600000000000002e-05, "loss": 0.0221, "step": 123600 }, { "epoch": 48.0, "eval_accuracy": 0.9480055020632737, "eval_loss": 0.32050269842147827, "eval_runtime": 8.2485, "eval_samples_per_second": 440.685, "eval_steps_per_second": 55.161, "step": 123600 }, { "epoch": 48.0, "learning_rate": 1.359948220064725e-05, "loss": 0.0992, "step": 123610 }, { "epoch": 48.01, "learning_rate": 1.35989644012945e-05, "loss": 0.0346, "step": 123620 }, { "epoch": 48.01, "learning_rate": 1.3598446601941749e-05, "loss": 0.066, "step": 123630 }, { "epoch": 48.02, "learning_rate": 1.3597928802588997e-05, "loss": 0.1347, "step": 123640 }, { "epoch": 48.02, "learning_rate": 1.3597411003236247e-05, "loss": 0.029, "step": 123650 }, { "epoch": 48.02, "learning_rate": 1.3596893203883496e-05, "loss": 0.0241, "step": 123660 }, { "epoch": 48.03, "learning_rate": 1.3596375404530746e-05, "loss": 0.0115, "step": 123670 }, { "epoch": 48.03, "learning_rate": 1.3595857605177995e-05, "loss": 0.1223, "step": 123680 }, { "epoch": 48.03, "learning_rate": 1.3595339805825243e-05, "loss": 0.1311, "step": 123690 }, { "epoch": 48.04, "learning_rate": 1.3594822006472493e-05, "loss": 0.0632, "step": 123700 }, { "epoch": 48.04, "learning_rate": 1.3594304207119741e-05, "loss": 0.0287, "step": 123710 }, { "epoch": 48.05, "learning_rate": 1.359378640776699e-05, "loss": 0.0568, "step": 123720 }, { "epoch": 48.05, "learning_rate": 1.359326860841424e-05, "loss": 0.0805, "step": 123730 }, { "epoch": 48.05, "learning_rate": 1.359275080906149e-05, "loss": 0.0885, "step": 123740 }, { "epoch": 48.06, "learning_rate": 1.359223300970874e-05, "loss": 0.0385, "step": 123750 }, { "epoch": 48.06, "learning_rate": 1.3591715210355989e-05, "loss": 0.0407, "step": 123760 }, { "epoch": 48.07, "learning_rate": 1.3591197411003237e-05, "loss": 0.0514, "step": 123770 }, { "epoch": 48.07, "learning_rate": 1.3590679611650487e-05, "loss": 0.1457, "step": 123780 }, { "epoch": 48.07, "learning_rate": 1.3590161812297735e-05, "loss": 0.0113, "step": 123790 }, { "epoch": 48.08, "learning_rate": 1.3589644012944984e-05, "loss": 0.0014, "step": 123800 }, { "epoch": 48.08, "learning_rate": 1.3589126213592234e-05, "loss": 0.0857, "step": 123810 }, { "epoch": 48.09, "learning_rate": 1.3588608414239483e-05, "loss": 0.1111, "step": 123820 }, { "epoch": 48.09, "learning_rate": 1.3588090614886733e-05, "loss": 0.0866, "step": 123830 }, { "epoch": 48.09, "learning_rate": 1.3587572815533983e-05, "loss": 0.0227, "step": 123840 }, { "epoch": 48.1, "learning_rate": 1.358705501618123e-05, "loss": 0.1133, "step": 123850 }, { "epoch": 48.1, "learning_rate": 1.3586537216828478e-05, "loss": 0.0927, "step": 123860 }, { "epoch": 48.1, "learning_rate": 1.3586019417475728e-05, "loss": 0.0294, "step": 123870 }, { "epoch": 48.11, "learning_rate": 1.3585501618122978e-05, "loss": 0.141, "step": 123880 }, { "epoch": 48.11, "learning_rate": 1.3584983818770227e-05, "loss": 0.0935, "step": 123890 }, { "epoch": 48.12, "learning_rate": 1.3584466019417477e-05, "loss": 0.0002, "step": 123900 }, { "epoch": 48.12, "learning_rate": 1.3583948220064727e-05, "loss": 0.0005, "step": 123910 }, { "epoch": 48.12, "learning_rate": 1.3583430420711976e-05, "loss": 0.0589, "step": 123920 }, { "epoch": 48.13, "learning_rate": 1.3582912621359226e-05, "loss": 0.0015, "step": 123930 }, { "epoch": 48.13, "learning_rate": 1.3582394822006472e-05, "loss": 0.0272, "step": 123940 }, { "epoch": 48.14, "learning_rate": 1.3581877022653722e-05, "loss": 0.3285, "step": 123950 }, { "epoch": 48.14, "learning_rate": 1.3581359223300971e-05, "loss": 0.0486, "step": 123960 }, { "epoch": 48.14, "learning_rate": 1.3580841423948221e-05, "loss": 0.0523, "step": 123970 }, { "epoch": 48.15, "learning_rate": 1.358032362459547e-05, "loss": 0.0628, "step": 123980 }, { "epoch": 48.15, "learning_rate": 1.357980582524272e-05, "loss": 0.0848, "step": 123990 }, { "epoch": 48.16, "learning_rate": 1.357928802588997e-05, "loss": 0.0794, "step": 124000 }, { "epoch": 48.16, "learning_rate": 1.357877022653722e-05, "loss": 0.0214, "step": 124010 }, { "epoch": 48.16, "learning_rate": 1.3578252427184466e-05, "loss": 0.0272, "step": 124020 }, { "epoch": 48.17, "learning_rate": 1.3577734627831715e-05, "loss": 0.0188, "step": 124030 }, { "epoch": 48.17, "learning_rate": 1.3577216828478965e-05, "loss": 0.108, "step": 124040 }, { "epoch": 48.17, "learning_rate": 1.3576699029126214e-05, "loss": 0.1005, "step": 124050 }, { "epoch": 48.18, "learning_rate": 1.3576181229773464e-05, "loss": 0.0648, "step": 124060 }, { "epoch": 48.18, "learning_rate": 1.3575663430420714e-05, "loss": 0.0014, "step": 124070 }, { "epoch": 48.19, "learning_rate": 1.3575145631067963e-05, "loss": 0.0652, "step": 124080 }, { "epoch": 48.19, "learning_rate": 1.3574627831715213e-05, "loss": 0.0451, "step": 124090 }, { "epoch": 48.19, "learning_rate": 1.3574110032362459e-05, "loss": 0.0496, "step": 124100 }, { "epoch": 48.2, "learning_rate": 1.3573592233009709e-05, "loss": 0.2249, "step": 124110 }, { "epoch": 48.2, "learning_rate": 1.3573074433656958e-05, "loss": 0.0457, "step": 124120 }, { "epoch": 48.21, "learning_rate": 1.3572556634304208e-05, "loss": 0.0657, "step": 124130 }, { "epoch": 48.21, "learning_rate": 1.3572038834951458e-05, "loss": 0.0196, "step": 124140 }, { "epoch": 48.21, "learning_rate": 1.3571521035598707e-05, "loss": 0.0658, "step": 124150 }, { "epoch": 48.22, "learning_rate": 1.3571003236245957e-05, "loss": 0.0144, "step": 124160 }, { "epoch": 48.22, "learning_rate": 1.3570485436893206e-05, "loss": 0.0354, "step": 124170 }, { "epoch": 48.23, "learning_rate": 1.3569967637540453e-05, "loss": 0.0068, "step": 124180 }, { "epoch": 48.23, "learning_rate": 1.3569449838187702e-05, "loss": 0.0308, "step": 124190 }, { "epoch": 48.23, "learning_rate": 1.3568932038834952e-05, "loss": 0.1704, "step": 124200 }, { "epoch": 48.24, "learning_rate": 1.3568414239482202e-05, "loss": 0.2049, "step": 124210 }, { "epoch": 48.24, "learning_rate": 1.3567896440129451e-05, "loss": 0.2563, "step": 124220 }, { "epoch": 48.24, "learning_rate": 1.35673786407767e-05, "loss": 0.1956, "step": 124230 }, { "epoch": 48.25, "learning_rate": 1.356686084142395e-05, "loss": 0.1156, "step": 124240 }, { "epoch": 48.25, "learning_rate": 1.35663430420712e-05, "loss": 0.022, "step": 124250 }, { "epoch": 48.26, "learning_rate": 1.3565825242718446e-05, "loss": 0.1304, "step": 124260 }, { "epoch": 48.26, "learning_rate": 1.3565307443365696e-05, "loss": 0.1049, "step": 124270 }, { "epoch": 48.26, "learning_rate": 1.3564789644012945e-05, "loss": 0.0293, "step": 124280 }, { "epoch": 48.27, "learning_rate": 1.3564271844660195e-05, "loss": 0.0011, "step": 124290 }, { "epoch": 48.27, "learning_rate": 1.3563754045307445e-05, "loss": 0.0677, "step": 124300 }, { "epoch": 48.28, "learning_rate": 1.3563236245954694e-05, "loss": 0.0375, "step": 124310 }, { "epoch": 48.28, "learning_rate": 1.3562718446601944e-05, "loss": 0.0082, "step": 124320 }, { "epoch": 48.28, "learning_rate": 1.3562200647249194e-05, "loss": 0.1539, "step": 124330 }, { "epoch": 48.29, "learning_rate": 1.356168284789644e-05, "loss": 0.1383, "step": 124340 }, { "epoch": 48.29, "learning_rate": 1.356116504854369e-05, "loss": 0.0689, "step": 124350 }, { "epoch": 48.3, "learning_rate": 1.3560647249190939e-05, "loss": 0.0014, "step": 124360 }, { "epoch": 48.3, "learning_rate": 1.3560129449838189e-05, "loss": 0.1445, "step": 124370 }, { "epoch": 48.3, "learning_rate": 1.3559611650485438e-05, "loss": 0.0403, "step": 124380 }, { "epoch": 48.31, "learning_rate": 1.3559093851132688e-05, "loss": 0.164, "step": 124390 }, { "epoch": 48.31, "learning_rate": 1.3558576051779937e-05, "loss": 0.0682, "step": 124400 }, { "epoch": 48.31, "learning_rate": 1.3558058252427187e-05, "loss": 0.046, "step": 124410 }, { "epoch": 48.32, "learning_rate": 1.3557540453074433e-05, "loss": 0.0213, "step": 124420 }, { "epoch": 48.32, "learning_rate": 1.3557022653721683e-05, "loss": 0.1289, "step": 124430 }, { "epoch": 48.33, "learning_rate": 1.3556504854368933e-05, "loss": 0.0868, "step": 124440 }, { "epoch": 48.33, "learning_rate": 1.3555987055016182e-05, "loss": 0.1261, "step": 124450 }, { "epoch": 48.33, "learning_rate": 1.3555469255663432e-05, "loss": 0.0079, "step": 124460 }, { "epoch": 48.34, "learning_rate": 1.3554951456310681e-05, "loss": 0.0656, "step": 124470 }, { "epoch": 48.34, "learning_rate": 1.3554433656957931e-05, "loss": 0.0827, "step": 124480 }, { "epoch": 48.35, "learning_rate": 1.355391585760518e-05, "loss": 0.0681, "step": 124490 }, { "epoch": 48.35, "learning_rate": 1.3553398058252429e-05, "loss": 0.1584, "step": 124500 }, { "epoch": 48.35, "learning_rate": 1.3552880258899677e-05, "loss": 0.0024, "step": 124510 }, { "epoch": 48.36, "learning_rate": 1.3552362459546926e-05, "loss": 0.1401, "step": 124520 }, { "epoch": 48.36, "learning_rate": 1.3551844660194176e-05, "loss": 0.0968, "step": 124530 }, { "epoch": 48.37, "learning_rate": 1.3551326860841425e-05, "loss": 0.0003, "step": 124540 }, { "epoch": 48.37, "learning_rate": 1.3550809061488675e-05, "loss": 0.0298, "step": 124550 }, { "epoch": 48.37, "learning_rate": 1.3550291262135925e-05, "loss": 0.101, "step": 124560 }, { "epoch": 48.38, "learning_rate": 1.3549773462783174e-05, "loss": 0.1293, "step": 124570 }, { "epoch": 48.38, "learning_rate": 1.3549255663430422e-05, "loss": 0.0831, "step": 124580 }, { "epoch": 48.38, "learning_rate": 1.354873786407767e-05, "loss": 0.0306, "step": 124590 }, { "epoch": 48.39, "learning_rate": 1.354822006472492e-05, "loss": 0.0406, "step": 124600 }, { "epoch": 48.39, "learning_rate": 1.354770226537217e-05, "loss": 0.0456, "step": 124610 }, { "epoch": 48.4, "learning_rate": 1.3547184466019419e-05, "loss": 0.0659, "step": 124620 }, { "epoch": 48.4, "learning_rate": 1.3546666666666669e-05, "loss": 0.0404, "step": 124630 }, { "epoch": 48.4, "learning_rate": 1.3546148867313918e-05, "loss": 0.0725, "step": 124640 }, { "epoch": 48.41, "learning_rate": 1.3545631067961166e-05, "loss": 0.0416, "step": 124650 }, { "epoch": 48.41, "learning_rate": 1.3545113268608416e-05, "loss": 0.0704, "step": 124660 }, { "epoch": 48.42, "learning_rate": 1.3544595469255664e-05, "loss": 0.1281, "step": 124670 }, { "epoch": 48.42, "learning_rate": 1.3544077669902913e-05, "loss": 0.0504, "step": 124680 }, { "epoch": 48.42, "learning_rate": 1.3543559870550163e-05, "loss": 0.0833, "step": 124690 }, { "epoch": 48.43, "learning_rate": 1.3543042071197412e-05, "loss": 0.2, "step": 124700 }, { "epoch": 48.43, "learning_rate": 1.3542524271844662e-05, "loss": 0.1419, "step": 124710 }, { "epoch": 48.43, "learning_rate": 1.3542006472491912e-05, "loss": 0.0258, "step": 124720 }, { "epoch": 48.44, "learning_rate": 1.354148867313916e-05, "loss": 0.1503, "step": 124730 }, { "epoch": 48.44, "learning_rate": 1.354097087378641e-05, "loss": 0.0365, "step": 124740 }, { "epoch": 48.45, "learning_rate": 1.3540453074433657e-05, "loss": 0.0804, "step": 124750 }, { "epoch": 48.45, "learning_rate": 1.3539935275080907e-05, "loss": 0.0059, "step": 124760 }, { "epoch": 48.45, "learning_rate": 1.3539417475728156e-05, "loss": 0.0061, "step": 124770 }, { "epoch": 48.46, "learning_rate": 1.3538899676375406e-05, "loss": 0.0099, "step": 124780 }, { "epoch": 48.46, "learning_rate": 1.3538381877022656e-05, "loss": 0.0256, "step": 124790 }, { "epoch": 48.47, "learning_rate": 1.3537864077669904e-05, "loss": 0.1119, "step": 124800 }, { "epoch": 48.47, "learning_rate": 1.3537346278317153e-05, "loss": 0.0005, "step": 124810 }, { "epoch": 48.47, "learning_rate": 1.3536828478964403e-05, "loss": 0.0924, "step": 124820 }, { "epoch": 48.48, "learning_rate": 1.353631067961165e-05, "loss": 0.1597, "step": 124830 }, { "epoch": 48.48, "learning_rate": 1.35357928802589e-05, "loss": 0.1043, "step": 124840 }, { "epoch": 48.49, "learning_rate": 1.353527508090615e-05, "loss": 0.0333, "step": 124850 }, { "epoch": 48.49, "learning_rate": 1.35347572815534e-05, "loss": 0.2038, "step": 124860 }, { "epoch": 48.49, "learning_rate": 1.353423948220065e-05, "loss": 0.0507, "step": 124870 }, { "epoch": 48.5, "learning_rate": 1.3533721682847897e-05, "loss": 0.0928, "step": 124880 }, { "epoch": 48.5, "learning_rate": 1.3533203883495147e-05, "loss": 0.0206, "step": 124890 }, { "epoch": 48.5, "learning_rate": 1.3532686084142396e-05, "loss": 0.1349, "step": 124900 }, { "epoch": 48.51, "learning_rate": 1.3532168284789644e-05, "loss": 0.1557, "step": 124910 }, { "epoch": 48.51, "learning_rate": 1.3531650485436894e-05, "loss": 0.1238, "step": 124920 }, { "epoch": 48.52, "learning_rate": 1.3531132686084144e-05, "loss": 0.1107, "step": 124930 }, { "epoch": 48.52, "learning_rate": 1.3530614886731393e-05, "loss": 0.0173, "step": 124940 }, { "epoch": 48.52, "learning_rate": 1.3530097087378641e-05, "loss": 0.1518, "step": 124950 }, { "epoch": 48.53, "learning_rate": 1.352957928802589e-05, "loss": 0.0234, "step": 124960 }, { "epoch": 48.53, "learning_rate": 1.352906148867314e-05, "loss": 0.0899, "step": 124970 }, { "epoch": 48.54, "learning_rate": 1.352854368932039e-05, "loss": 0.0868, "step": 124980 }, { "epoch": 48.54, "learning_rate": 1.3528025889967638e-05, "loss": 0.111, "step": 124990 }, { "epoch": 48.54, "learning_rate": 1.3527508090614887e-05, "loss": 0.0305, "step": 125000 }, { "epoch": 48.55, "learning_rate": 1.3526990291262137e-05, "loss": 0.193, "step": 125010 }, { "epoch": 48.55, "learning_rate": 1.3526472491909387e-05, "loss": 0.0008, "step": 125020 }, { "epoch": 48.56, "learning_rate": 1.3525954692556635e-05, "loss": 0.0152, "step": 125030 }, { "epoch": 48.56, "learning_rate": 1.3525436893203884e-05, "loss": 0.0615, "step": 125040 }, { "epoch": 48.56, "learning_rate": 1.3524919093851134e-05, "loss": 0.2004, "step": 125050 }, { "epoch": 48.57, "learning_rate": 1.3524401294498383e-05, "loss": 0.1104, "step": 125060 }, { "epoch": 48.57, "learning_rate": 1.3523883495145633e-05, "loss": 0.1484, "step": 125070 }, { "epoch": 48.57, "learning_rate": 1.3523365695792881e-05, "loss": 0.0449, "step": 125080 }, { "epoch": 48.58, "learning_rate": 1.352284789644013e-05, "loss": 0.1092, "step": 125090 }, { "epoch": 48.58, "learning_rate": 1.352233009708738e-05, "loss": 0.0015, "step": 125100 }, { "epoch": 48.59, "learning_rate": 1.3521812297734628e-05, "loss": 0.0404, "step": 125110 }, { "epoch": 48.59, "learning_rate": 1.3521294498381878e-05, "loss": 0.0572, "step": 125120 }, { "epoch": 48.59, "learning_rate": 1.3520776699029127e-05, "loss": 0.1467, "step": 125130 }, { "epoch": 48.6, "learning_rate": 1.3520258899676377e-05, "loss": 0.1845, "step": 125140 }, { "epoch": 48.6, "learning_rate": 1.3519741100323627e-05, "loss": 0.0589, "step": 125150 }, { "epoch": 48.61, "learning_rate": 1.3519223300970875e-05, "loss": 0.0901, "step": 125160 }, { "epoch": 48.61, "learning_rate": 1.3518705501618124e-05, "loss": 0.0126, "step": 125170 }, { "epoch": 48.61, "learning_rate": 1.3518187702265372e-05, "loss": 0.1349, "step": 125180 }, { "epoch": 48.62, "learning_rate": 1.3517669902912622e-05, "loss": 0.0577, "step": 125190 }, { "epoch": 48.62, "learning_rate": 1.3517152103559871e-05, "loss": 0.0616, "step": 125200 }, { "epoch": 48.63, "learning_rate": 1.3516634304207121e-05, "loss": 0.018, "step": 125210 }, { "epoch": 48.63, "learning_rate": 1.351611650485437e-05, "loss": 0.0131, "step": 125220 }, { "epoch": 48.63, "learning_rate": 1.351559870550162e-05, "loss": 0.0069, "step": 125230 }, { "epoch": 48.64, "learning_rate": 1.3515080906148868e-05, "loss": 0.075, "step": 125240 }, { "epoch": 48.64, "learning_rate": 1.3514563106796118e-05, "loss": 0.1003, "step": 125250 }, { "epoch": 48.64, "learning_rate": 1.3514045307443366e-05, "loss": 0.0671, "step": 125260 }, { "epoch": 48.65, "learning_rate": 1.3513527508090615e-05, "loss": 0.0932, "step": 125270 }, { "epoch": 48.65, "learning_rate": 1.3513009708737865e-05, "loss": 0.0715, "step": 125280 }, { "epoch": 48.66, "learning_rate": 1.3512491909385115e-05, "loss": 0.1556, "step": 125290 }, { "epoch": 48.66, "learning_rate": 1.3511974110032364e-05, "loss": 0.056, "step": 125300 }, { "epoch": 48.66, "learning_rate": 1.3511456310679614e-05, "loss": 0.0124, "step": 125310 }, { "epoch": 48.67, "learning_rate": 1.3510938511326862e-05, "loss": 0.1419, "step": 125320 }, { "epoch": 48.67, "learning_rate": 1.351042071197411e-05, "loss": 0.014, "step": 125330 }, { "epoch": 48.68, "learning_rate": 1.350990291262136e-05, "loss": 0.129, "step": 125340 }, { "epoch": 48.68, "learning_rate": 1.3509385113268609e-05, "loss": 0.075, "step": 125350 }, { "epoch": 48.68, "learning_rate": 1.3508867313915858e-05, "loss": 0.04, "step": 125360 }, { "epoch": 48.69, "learning_rate": 1.3508349514563108e-05, "loss": 0.1133, "step": 125370 }, { "epoch": 48.69, "learning_rate": 1.3507831715210358e-05, "loss": 0.1857, "step": 125380 }, { "epoch": 48.7, "learning_rate": 1.3507313915857607e-05, "loss": 0.0863, "step": 125390 }, { "epoch": 48.7, "learning_rate": 1.3506796116504855e-05, "loss": 0.1328, "step": 125400 }, { "epoch": 48.7, "learning_rate": 1.3506278317152103e-05, "loss": 0.0005, "step": 125410 }, { "epoch": 48.71, "learning_rate": 1.3505760517799353e-05, "loss": 0.07, "step": 125420 }, { "epoch": 48.71, "learning_rate": 1.3505242718446602e-05, "loss": 0.1331, "step": 125430 }, { "epoch": 48.71, "learning_rate": 1.3504724919093852e-05, "loss": 0.0368, "step": 125440 }, { "epoch": 48.72, "learning_rate": 1.3504207119741102e-05, "loss": 0.0566, "step": 125450 }, { "epoch": 48.72, "learning_rate": 1.3503689320388351e-05, "loss": 0.0136, "step": 125460 }, { "epoch": 48.73, "learning_rate": 1.3503171521035601e-05, "loss": 0.0355, "step": 125470 }, { "epoch": 48.73, "learning_rate": 1.3502653721682847e-05, "loss": 0.0808, "step": 125480 }, { "epoch": 48.73, "learning_rate": 1.3502135922330097e-05, "loss": 0.0634, "step": 125490 }, { "epoch": 48.74, "learning_rate": 1.3501618122977346e-05, "loss": 0.1023, "step": 125500 }, { "epoch": 48.74, "learning_rate": 1.3501100323624596e-05, "loss": 0.0131, "step": 125510 }, { "epoch": 48.75, "learning_rate": 1.3500582524271846e-05, "loss": 0.0347, "step": 125520 }, { "epoch": 48.75, "learning_rate": 1.3500064724919095e-05, "loss": 0.0378, "step": 125530 }, { "epoch": 48.75, "learning_rate": 1.3499546925566345e-05, "loss": 0.1495, "step": 125540 }, { "epoch": 48.76, "learning_rate": 1.3499029126213594e-05, "loss": 0.0062, "step": 125550 }, { "epoch": 48.76, "learning_rate": 1.349851132686084e-05, "loss": 0.1176, "step": 125560 }, { "epoch": 48.77, "learning_rate": 1.349799352750809e-05, "loss": 0.082, "step": 125570 }, { "epoch": 48.77, "learning_rate": 1.349747572815534e-05, "loss": 0.1871, "step": 125580 }, { "epoch": 48.77, "learning_rate": 1.349695792880259e-05, "loss": 0.1503, "step": 125590 }, { "epoch": 48.78, "learning_rate": 1.3496440129449839e-05, "loss": 0.0503, "step": 125600 }, { "epoch": 48.78, "learning_rate": 1.3495922330097089e-05, "loss": 0.002, "step": 125610 }, { "epoch": 48.78, "learning_rate": 1.3495404530744338e-05, "loss": 0.1018, "step": 125620 }, { "epoch": 48.79, "learning_rate": 1.3494886731391588e-05, "loss": 0.0875, "step": 125630 }, { "epoch": 48.79, "learning_rate": 1.3494368932038838e-05, "loss": 0.1589, "step": 125640 }, { "epoch": 48.8, "learning_rate": 1.3493851132686084e-05, "loss": 0.0942, "step": 125650 }, { "epoch": 48.8, "learning_rate": 1.3493333333333333e-05, "loss": 0.2853, "step": 125660 }, { "epoch": 48.8, "learning_rate": 1.3492815533980583e-05, "loss": 0.0427, "step": 125670 }, { "epoch": 48.81, "learning_rate": 1.3492297734627833e-05, "loss": 0.1611, "step": 125680 }, { "epoch": 48.81, "learning_rate": 1.3491779935275082e-05, "loss": 0.064, "step": 125690 }, { "epoch": 48.82, "learning_rate": 1.3491262135922332e-05, "loss": 0.1089, "step": 125700 }, { "epoch": 48.82, "learning_rate": 1.3490744336569582e-05, "loss": 0.0263, "step": 125710 }, { "epoch": 48.82, "learning_rate": 1.3490226537216831e-05, "loss": 0.0322, "step": 125720 }, { "epoch": 48.83, "learning_rate": 1.3489708737864077e-05, "loss": 0.0624, "step": 125730 }, { "epoch": 48.83, "learning_rate": 1.3489190938511327e-05, "loss": 0.1727, "step": 125740 }, { "epoch": 48.83, "learning_rate": 1.3488673139158577e-05, "loss": 0.0412, "step": 125750 }, { "epoch": 48.84, "learning_rate": 1.3488155339805826e-05, "loss": 0.1111, "step": 125760 }, { "epoch": 48.84, "learning_rate": 1.3487637540453076e-05, "loss": 0.0448, "step": 125770 }, { "epoch": 48.85, "learning_rate": 1.3487119741100325e-05, "loss": 0.0952, "step": 125780 }, { "epoch": 48.85, "learning_rate": 1.3486601941747575e-05, "loss": 0.065, "step": 125790 }, { "epoch": 48.85, "learning_rate": 1.3486084142394825e-05, "loss": 0.0445, "step": 125800 }, { "epoch": 48.86, "learning_rate": 1.3485566343042071e-05, "loss": 0.0563, "step": 125810 }, { "epoch": 48.86, "learning_rate": 1.348504854368932e-05, "loss": 0.0329, "step": 125820 }, { "epoch": 48.87, "learning_rate": 1.348453074433657e-05, "loss": 0.0834, "step": 125830 }, { "epoch": 48.87, "learning_rate": 1.348401294498382e-05, "loss": 0.0074, "step": 125840 }, { "epoch": 48.87, "learning_rate": 1.348349514563107e-05, "loss": 0.0249, "step": 125850 }, { "epoch": 48.88, "learning_rate": 1.3482977346278319e-05, "loss": 0.1716, "step": 125860 }, { "epoch": 48.88, "learning_rate": 1.3482459546925569e-05, "loss": 0.0047, "step": 125870 }, { "epoch": 48.89, "learning_rate": 1.3481941747572818e-05, "loss": 0.1417, "step": 125880 }, { "epoch": 48.89, "learning_rate": 1.3481423948220065e-05, "loss": 0.0885, "step": 125890 }, { "epoch": 48.89, "learning_rate": 1.3480906148867314e-05, "loss": 0.0489, "step": 125900 }, { "epoch": 48.9, "learning_rate": 1.3480388349514564e-05, "loss": 0.0063, "step": 125910 }, { "epoch": 48.9, "learning_rate": 1.3479870550161813e-05, "loss": 0.023, "step": 125920 }, { "epoch": 48.9, "learning_rate": 1.3479352750809063e-05, "loss": 0.0416, "step": 125930 }, { "epoch": 48.91, "learning_rate": 1.3478834951456313e-05, "loss": 0.2327, "step": 125940 }, { "epoch": 48.91, "learning_rate": 1.3478317152103562e-05, "loss": 0.1167, "step": 125950 }, { "epoch": 48.92, "learning_rate": 1.3477799352750812e-05, "loss": 0.035, "step": 125960 }, { "epoch": 48.92, "learning_rate": 1.3477281553398058e-05, "loss": 0.0536, "step": 125970 }, { "epoch": 48.92, "learning_rate": 1.3476763754045308e-05, "loss": 0.1128, "step": 125980 }, { "epoch": 48.93, "learning_rate": 1.3476245954692557e-05, "loss": 0.2563, "step": 125990 }, { "epoch": 48.93, "learning_rate": 1.3475728155339807e-05, "loss": 0.1264, "step": 126000 }, { "epoch": 48.94, "learning_rate": 1.3475210355987057e-05, "loss": 0.1387, "step": 126010 }, { "epoch": 48.94, "learning_rate": 1.3474692556634306e-05, "loss": 0.1023, "step": 126020 }, { "epoch": 48.94, "learning_rate": 1.3474174757281556e-05, "loss": 0.0387, "step": 126030 }, { "epoch": 48.95, "learning_rate": 1.3473656957928805e-05, "loss": 0.0752, "step": 126040 }, { "epoch": 48.95, "learning_rate": 1.3473139158576052e-05, "loss": 0.1248, "step": 126050 }, { "epoch": 48.96, "learning_rate": 1.3472621359223301e-05, "loss": 0.0576, "step": 126060 }, { "epoch": 48.96, "learning_rate": 1.3472103559870551e-05, "loss": 0.0932, "step": 126070 }, { "epoch": 48.96, "learning_rate": 1.34715857605178e-05, "loss": 0.0213, "step": 126080 }, { "epoch": 48.97, "learning_rate": 1.347106796116505e-05, "loss": 0.068, "step": 126090 }, { "epoch": 48.97, "learning_rate": 1.34705501618123e-05, "loss": 0.082, "step": 126100 }, { "epoch": 48.97, "learning_rate": 1.347003236245955e-05, "loss": 0.2132, "step": 126110 }, { "epoch": 48.98, "learning_rate": 1.3469514563106797e-05, "loss": 0.0704, "step": 126120 }, { "epoch": 48.98, "learning_rate": 1.3468996763754045e-05, "loss": 0.0772, "step": 126130 }, { "epoch": 48.99, "learning_rate": 1.3468478964401295e-05, "loss": 0.0738, "step": 126140 }, { "epoch": 48.99, "learning_rate": 1.3467961165048544e-05, "loss": 0.1023, "step": 126150 }, { "epoch": 48.99, "learning_rate": 1.3467443365695794e-05, "loss": 0.123, "step": 126160 }, { "epoch": 49.0, "learning_rate": 1.3466925566343044e-05, "loss": 0.0939, "step": 126170 }, { "epoch": 49.0, "eval_accuracy": 0.9485557083906465, "eval_loss": 0.3431084454059601, "eval_runtime": 8.2153, "eval_samples_per_second": 442.467, "eval_steps_per_second": 55.384, "step": 126175 }, { "epoch": 49.0, "learning_rate": 1.3466407766990293e-05, "loss": 0.1448, "step": 126180 }, { "epoch": 49.01, "learning_rate": 1.3465889967637543e-05, "loss": 0.201, "step": 126190 }, { "epoch": 49.01, "learning_rate": 1.346537216828479e-05, "loss": 0.0787, "step": 126200 }, { "epoch": 49.01, "learning_rate": 1.346485436893204e-05, "loss": 0.0446, "step": 126210 }, { "epoch": 49.02, "learning_rate": 1.3464336569579288e-05, "loss": 0.0431, "step": 126220 }, { "epoch": 49.02, "learning_rate": 1.3463818770226538e-05, "loss": 0.108, "step": 126230 }, { "epoch": 49.03, "learning_rate": 1.3463300970873788e-05, "loss": 0.0901, "step": 126240 }, { "epoch": 49.03, "learning_rate": 1.3462783171521037e-05, "loss": 0.0603, "step": 126250 }, { "epoch": 49.03, "learning_rate": 1.3462265372168287e-05, "loss": 0.0156, "step": 126260 }, { "epoch": 49.04, "learning_rate": 1.3461747572815535e-05, "loss": 0.0329, "step": 126270 }, { "epoch": 49.04, "learning_rate": 1.3461229773462784e-05, "loss": 0.0831, "step": 126280 }, { "epoch": 49.04, "learning_rate": 1.3460711974110034e-05, "loss": 0.0132, "step": 126290 }, { "epoch": 49.05, "learning_rate": 1.3460194174757282e-05, "loss": 0.0908, "step": 126300 }, { "epoch": 49.05, "learning_rate": 1.3459676375404532e-05, "loss": 0.0813, "step": 126310 }, { "epoch": 49.06, "learning_rate": 1.3459158576051781e-05, "loss": 0.088, "step": 126320 }, { "epoch": 49.06, "learning_rate": 1.345864077669903e-05, "loss": 0.186, "step": 126330 }, { "epoch": 49.06, "learning_rate": 1.345812297734628e-05, "loss": 0.0888, "step": 126340 }, { "epoch": 49.07, "learning_rate": 1.3457605177993528e-05, "loss": 0.0658, "step": 126350 }, { "epoch": 49.07, "learning_rate": 1.3457087378640778e-05, "loss": 0.0547, "step": 126360 }, { "epoch": 49.08, "learning_rate": 1.3456569579288028e-05, "loss": 0.07, "step": 126370 }, { "epoch": 49.08, "learning_rate": 1.3456051779935275e-05, "loss": 0.1041, "step": 126380 }, { "epoch": 49.08, "learning_rate": 1.3455533980582525e-05, "loss": 0.1467, "step": 126390 }, { "epoch": 49.09, "learning_rate": 1.3455016181229775e-05, "loss": 0.0859, "step": 126400 }, { "epoch": 49.09, "learning_rate": 1.3454498381877024e-05, "loss": 0.1492, "step": 126410 }, { "epoch": 49.1, "learning_rate": 1.3453980582524272e-05, "loss": 0.0112, "step": 126420 }, { "epoch": 49.1, "learning_rate": 1.3453462783171522e-05, "loss": 0.009, "step": 126430 }, { "epoch": 49.1, "learning_rate": 1.3452944983818771e-05, "loss": 0.0247, "step": 126440 }, { "epoch": 49.11, "learning_rate": 1.3452427184466021e-05, "loss": 0.0012, "step": 126450 }, { "epoch": 49.11, "learning_rate": 1.3451909385113269e-05, "loss": 0.0196, "step": 126460 }, { "epoch": 49.11, "learning_rate": 1.3451391585760519e-05, "loss": 0.0093, "step": 126470 }, { "epoch": 49.12, "learning_rate": 1.3450873786407768e-05, "loss": 0.0665, "step": 126480 }, { "epoch": 49.12, "learning_rate": 1.3450355987055018e-05, "loss": 0.0313, "step": 126490 }, { "epoch": 49.13, "learning_rate": 1.3449838187702266e-05, "loss": 0.0215, "step": 126500 }, { "epoch": 49.13, "learning_rate": 1.3449320388349515e-05, "loss": 0.1794, "step": 126510 }, { "epoch": 49.13, "learning_rate": 1.3448802588996765e-05, "loss": 0.1555, "step": 126520 }, { "epoch": 49.14, "learning_rate": 1.3448284789644015e-05, "loss": 0.0261, "step": 126530 }, { "epoch": 49.14, "learning_rate": 1.3447766990291263e-05, "loss": 0.1589, "step": 126540 }, { "epoch": 49.15, "learning_rate": 1.3447249190938512e-05, "loss": 0.0074, "step": 126550 }, { "epoch": 49.15, "learning_rate": 1.3446731391585762e-05, "loss": 0.0512, "step": 126560 }, { "epoch": 49.15, "learning_rate": 1.3446213592233011e-05, "loss": 0.0801, "step": 126570 }, { "epoch": 49.16, "learning_rate": 1.344569579288026e-05, "loss": 0.0247, "step": 126580 }, { "epoch": 49.16, "learning_rate": 1.3445177993527509e-05, "loss": 0.1598, "step": 126590 }, { "epoch": 49.17, "learning_rate": 1.3444660194174759e-05, "loss": 0.0707, "step": 126600 }, { "epoch": 49.17, "learning_rate": 1.3444142394822008e-05, "loss": 0.1358, "step": 126610 }, { "epoch": 49.17, "learning_rate": 1.3443624595469256e-05, "loss": 0.1661, "step": 126620 }, { "epoch": 49.18, "learning_rate": 1.3443106796116506e-05, "loss": 0.0372, "step": 126630 }, { "epoch": 49.18, "learning_rate": 1.3442588996763755e-05, "loss": 0.1075, "step": 126640 }, { "epoch": 49.18, "learning_rate": 1.3442071197411003e-05, "loss": 0.0213, "step": 126650 }, { "epoch": 49.19, "learning_rate": 1.3441553398058253e-05, "loss": 0.0209, "step": 126660 }, { "epoch": 49.19, "learning_rate": 1.3441035598705503e-05, "loss": 0.0879, "step": 126670 }, { "epoch": 49.2, "learning_rate": 1.3440517799352752e-05, "loss": 0.0731, "step": 126680 }, { "epoch": 49.2, "learning_rate": 1.3440000000000002e-05, "loss": 0.0429, "step": 126690 }, { "epoch": 49.2, "learning_rate": 1.3439482200647251e-05, "loss": 0.2395, "step": 126700 }, { "epoch": 49.21, "learning_rate": 1.34389644012945e-05, "loss": 0.1148, "step": 126710 }, { "epoch": 49.21, "learning_rate": 1.3438446601941749e-05, "loss": 0.225, "step": 126720 }, { "epoch": 49.22, "learning_rate": 1.3437928802588997e-05, "loss": 0.1291, "step": 126730 }, { "epoch": 49.22, "learning_rate": 1.3437411003236246e-05, "loss": 0.0702, "step": 126740 }, { "epoch": 49.22, "learning_rate": 1.3436893203883496e-05, "loss": 0.102, "step": 126750 }, { "epoch": 49.23, "learning_rate": 1.3436375404530746e-05, "loss": 0.0703, "step": 126760 }, { "epoch": 49.23, "learning_rate": 1.3435857605177995e-05, "loss": 0.0655, "step": 126770 }, { "epoch": 49.23, "learning_rate": 1.3435339805825245e-05, "loss": 0.0044, "step": 126780 }, { "epoch": 49.24, "learning_rate": 1.3434822006472493e-05, "loss": 0.1531, "step": 126790 }, { "epoch": 49.24, "learning_rate": 1.343430420711974e-05, "loss": 0.0756, "step": 126800 }, { "epoch": 49.25, "learning_rate": 1.343378640776699e-05, "loss": 0.0577, "step": 126810 }, { "epoch": 49.25, "learning_rate": 1.343326860841424e-05, "loss": 0.0054, "step": 126820 }, { "epoch": 49.25, "learning_rate": 1.343275080906149e-05, "loss": 0.0418, "step": 126830 }, { "epoch": 49.26, "learning_rate": 1.343223300970874e-05, "loss": 0.0889, "step": 126840 }, { "epoch": 49.26, "learning_rate": 1.3431715210355989e-05, "loss": 0.1663, "step": 126850 }, { "epoch": 49.27, "learning_rate": 1.3431197411003238e-05, "loss": 0.0598, "step": 126860 }, { "epoch": 49.27, "learning_rate": 1.3430679611650486e-05, "loss": 0.0072, "step": 126870 }, { "epoch": 49.27, "learning_rate": 1.3430161812297734e-05, "loss": 0.0862, "step": 126880 }, { "epoch": 49.28, "learning_rate": 1.3429644012944984e-05, "loss": 0.0345, "step": 126890 }, { "epoch": 49.28, "learning_rate": 1.3429126213592234e-05, "loss": 0.0637, "step": 126900 }, { "epoch": 49.29, "learning_rate": 1.3428608414239483e-05, "loss": 0.071, "step": 126910 }, { "epoch": 49.29, "learning_rate": 1.3428090614886733e-05, "loss": 0.0163, "step": 126920 }, { "epoch": 49.29, "learning_rate": 1.3427572815533982e-05, "loss": 0.0114, "step": 126930 }, { "epoch": 49.3, "learning_rate": 1.3427055016181232e-05, "loss": 0.1619, "step": 126940 }, { "epoch": 49.3, "learning_rate": 1.3426537216828478e-05, "loss": 0.0723, "step": 126950 }, { "epoch": 49.3, "learning_rate": 1.3426019417475728e-05, "loss": 0.0248, "step": 126960 }, { "epoch": 49.31, "learning_rate": 1.3425501618122978e-05, "loss": 0.0522, "step": 126970 }, { "epoch": 49.31, "learning_rate": 1.3424983818770227e-05, "loss": 0.088, "step": 126980 }, { "epoch": 49.32, "learning_rate": 1.3424466019417477e-05, "loss": 0.1007, "step": 126990 }, { "epoch": 49.32, "learning_rate": 1.3423948220064726e-05, "loss": 0.1052, "step": 127000 }, { "epoch": 49.32, "learning_rate": 1.3423430420711976e-05, "loss": 0.0565, "step": 127010 }, { "epoch": 49.33, "learning_rate": 1.3422912621359226e-05, "loss": 0.1357, "step": 127020 }, { "epoch": 49.33, "learning_rate": 1.3422394822006472e-05, "loss": 0.02, "step": 127030 }, { "epoch": 49.34, "learning_rate": 1.3421877022653721e-05, "loss": 0.0195, "step": 127040 }, { "epoch": 49.34, "learning_rate": 1.3421359223300971e-05, "loss": 0.1843, "step": 127050 }, { "epoch": 49.34, "learning_rate": 1.342084142394822e-05, "loss": 0.0674, "step": 127060 }, { "epoch": 49.35, "learning_rate": 1.342032362459547e-05, "loss": 0.0353, "step": 127070 }, { "epoch": 49.35, "learning_rate": 1.341980582524272e-05, "loss": 0.0477, "step": 127080 }, { "epoch": 49.36, "learning_rate": 1.341928802588997e-05, "loss": 0.0152, "step": 127090 }, { "epoch": 49.36, "learning_rate": 1.341877022653722e-05, "loss": 0.038, "step": 127100 }, { "epoch": 49.36, "learning_rate": 1.3418252427184465e-05, "loss": 0.0757, "step": 127110 }, { "epoch": 49.37, "learning_rate": 1.3417734627831715e-05, "loss": 0.0316, "step": 127120 }, { "epoch": 49.37, "learning_rate": 1.3417216828478965e-05, "loss": 0.1288, "step": 127130 }, { "epoch": 49.37, "learning_rate": 1.3416699029126214e-05, "loss": 0.1889, "step": 127140 }, { "epoch": 49.38, "learning_rate": 1.3416181229773464e-05, "loss": 0.0597, "step": 127150 }, { "epoch": 49.38, "learning_rate": 1.3415663430420713e-05, "loss": 0.048, "step": 127160 }, { "epoch": 49.39, "learning_rate": 1.3415145631067963e-05, "loss": 0.1566, "step": 127170 }, { "epoch": 49.39, "learning_rate": 1.3414627831715213e-05, "loss": 0.0915, "step": 127180 }, { "epoch": 49.39, "learning_rate": 1.3414110032362459e-05, "loss": 0.0265, "step": 127190 }, { "epoch": 49.4, "learning_rate": 1.3413592233009709e-05, "loss": 0.0242, "step": 127200 }, { "epoch": 49.4, "learning_rate": 1.3413074433656958e-05, "loss": 0.0593, "step": 127210 }, { "epoch": 49.41, "learning_rate": 1.3412556634304208e-05, "loss": 0.0016, "step": 127220 }, { "epoch": 49.41, "learning_rate": 1.3412038834951457e-05, "loss": 0.0369, "step": 127230 }, { "epoch": 49.41, "learning_rate": 1.3411521035598707e-05, "loss": 0.0234, "step": 127240 }, { "epoch": 49.42, "learning_rate": 1.3411003236245957e-05, "loss": 0.0302, "step": 127250 }, { "epoch": 49.42, "learning_rate": 1.3410485436893206e-05, "loss": 0.0227, "step": 127260 }, { "epoch": 49.43, "learning_rate": 1.3409967637540456e-05, "loss": 0.0006, "step": 127270 }, { "epoch": 49.43, "learning_rate": 1.3409449838187702e-05, "loss": 0.021, "step": 127280 }, { "epoch": 49.43, "learning_rate": 1.3408932038834952e-05, "loss": 0.0284, "step": 127290 }, { "epoch": 49.44, "learning_rate": 1.3408414239482201e-05, "loss": 0.0008, "step": 127300 }, { "epoch": 49.44, "learning_rate": 1.3407896440129451e-05, "loss": 0.1253, "step": 127310 }, { "epoch": 49.44, "learning_rate": 1.34073786407767e-05, "loss": 0.0201, "step": 127320 }, { "epoch": 49.45, "learning_rate": 1.340686084142395e-05, "loss": 0.0737, "step": 127330 }, { "epoch": 49.45, "learning_rate": 1.34063430420712e-05, "loss": 0.1984, "step": 127340 }, { "epoch": 49.46, "learning_rate": 1.340582524271845e-05, "loss": 0.0782, "step": 127350 }, { "epoch": 49.46, "learning_rate": 1.3405307443365696e-05, "loss": 0.0442, "step": 127360 }, { "epoch": 49.46, "learning_rate": 1.3404789644012945e-05, "loss": 0.0182, "step": 127370 }, { "epoch": 49.47, "learning_rate": 1.3404271844660195e-05, "loss": 0.1399, "step": 127380 }, { "epoch": 49.47, "learning_rate": 1.3403754045307445e-05, "loss": 0.0723, "step": 127390 }, { "epoch": 49.48, "learning_rate": 1.3403236245954694e-05, "loss": 0.006, "step": 127400 }, { "epoch": 49.48, "learning_rate": 1.3402718446601944e-05, "loss": 0.054, "step": 127410 }, { "epoch": 49.48, "learning_rate": 1.3402200647249193e-05, "loss": 0.032, "step": 127420 }, { "epoch": 49.49, "learning_rate": 1.3401682847896443e-05, "loss": 0.0074, "step": 127430 }, { "epoch": 49.49, "learning_rate": 1.340116504854369e-05, "loss": 0.0062, "step": 127440 }, { "epoch": 49.5, "learning_rate": 1.3400647249190939e-05, "loss": 0.0556, "step": 127450 }, { "epoch": 49.5, "learning_rate": 1.3400129449838188e-05, "loss": 0.0005, "step": 127460 }, { "epoch": 49.5, "learning_rate": 1.3399611650485438e-05, "loss": 0.0628, "step": 127470 }, { "epoch": 49.51, "learning_rate": 1.3399093851132688e-05, "loss": 0.0017, "step": 127480 }, { "epoch": 49.51, "learning_rate": 1.3398576051779937e-05, "loss": 0.0237, "step": 127490 }, { "epoch": 49.51, "learning_rate": 1.3398058252427187e-05, "loss": 0.0159, "step": 127500 }, { "epoch": 49.52, "learning_rate": 1.3397540453074437e-05, "loss": 0.0963, "step": 127510 }, { "epoch": 49.52, "learning_rate": 1.3397022653721683e-05, "loss": 0.0155, "step": 127520 }, { "epoch": 49.53, "learning_rate": 1.3396504854368932e-05, "loss": 0.0259, "step": 127530 }, { "epoch": 49.53, "learning_rate": 1.3395987055016182e-05, "loss": 0.0669, "step": 127540 }, { "epoch": 49.53, "learning_rate": 1.3395469255663432e-05, "loss": 0.0182, "step": 127550 }, { "epoch": 49.54, "learning_rate": 1.3394951456310681e-05, "loss": 0.1163, "step": 127560 }, { "epoch": 49.54, "learning_rate": 1.3394433656957931e-05, "loss": 0.0983, "step": 127570 }, { "epoch": 49.55, "learning_rate": 1.339391585760518e-05, "loss": 0.0299, "step": 127580 }, { "epoch": 49.55, "learning_rate": 1.3393398058252428e-05, "loss": 0.0841, "step": 127590 }, { "epoch": 49.55, "learning_rate": 1.3392880258899676e-05, "loss": 0.0483, "step": 127600 }, { "epoch": 49.56, "learning_rate": 1.3392362459546926e-05, "loss": 0.0496, "step": 127610 }, { "epoch": 49.56, "learning_rate": 1.3391844660194176e-05, "loss": 0.3217, "step": 127620 }, { "epoch": 49.57, "learning_rate": 1.3391326860841425e-05, "loss": 0.0785, "step": 127630 }, { "epoch": 49.57, "learning_rate": 1.3390809061488675e-05, "loss": 0.0782, "step": 127640 }, { "epoch": 49.57, "learning_rate": 1.3390291262135924e-05, "loss": 0.007, "step": 127650 }, { "epoch": 49.58, "learning_rate": 1.3389773462783174e-05, "loss": 0.0479, "step": 127660 }, { "epoch": 49.58, "learning_rate": 1.3389255663430422e-05, "loss": 0.0379, "step": 127670 }, { "epoch": 49.58, "learning_rate": 1.338873786407767e-05, "loss": 0.0932, "step": 127680 }, { "epoch": 49.59, "learning_rate": 1.338822006472492e-05, "loss": 0.0505, "step": 127690 }, { "epoch": 49.59, "learning_rate": 1.3387702265372169e-05, "loss": 0.0869, "step": 127700 }, { "epoch": 49.6, "learning_rate": 1.3387184466019419e-05, "loss": 0.0273, "step": 127710 }, { "epoch": 49.6, "learning_rate": 1.3386666666666668e-05, "loss": 0.1132, "step": 127720 }, { "epoch": 49.6, "learning_rate": 1.3386148867313918e-05, "loss": 0.0221, "step": 127730 }, { "epoch": 49.61, "learning_rate": 1.3385631067961166e-05, "loss": 0.118, "step": 127740 }, { "epoch": 49.61, "learning_rate": 1.3385113268608416e-05, "loss": 0.3227, "step": 127750 }, { "epoch": 49.62, "learning_rate": 1.3384595469255663e-05, "loss": 0.0383, "step": 127760 }, { "epoch": 49.62, "learning_rate": 1.3384077669902913e-05, "loss": 0.178, "step": 127770 }, { "epoch": 49.62, "learning_rate": 1.3383559870550163e-05, "loss": 0.0758, "step": 127780 }, { "epoch": 49.63, "learning_rate": 1.3383042071197412e-05, "loss": 0.0314, "step": 127790 }, { "epoch": 49.63, "learning_rate": 1.3382524271844662e-05, "loss": 0.1465, "step": 127800 }, { "epoch": 49.63, "learning_rate": 1.3382006472491912e-05, "loss": 0.0182, "step": 127810 }, { "epoch": 49.64, "learning_rate": 1.338148867313916e-05, "loss": 0.0322, "step": 127820 }, { "epoch": 49.64, "learning_rate": 1.3380970873786409e-05, "loss": 0.3149, "step": 127830 }, { "epoch": 49.65, "learning_rate": 1.3380453074433659e-05, "loss": 0.0234, "step": 127840 }, { "epoch": 49.65, "learning_rate": 1.3379935275080907e-05, "loss": 0.2232, "step": 127850 }, { "epoch": 49.65, "learning_rate": 1.3379417475728156e-05, "loss": 0.0711, "step": 127860 }, { "epoch": 49.66, "learning_rate": 1.3378899676375406e-05, "loss": 0.1993, "step": 127870 }, { "epoch": 49.66, "learning_rate": 1.3378381877022655e-05, "loss": 0.0991, "step": 127880 }, { "epoch": 49.67, "learning_rate": 1.3377864077669903e-05, "loss": 0.029, "step": 127890 }, { "epoch": 49.67, "learning_rate": 1.3377346278317153e-05, "loss": 0.0872, "step": 127900 }, { "epoch": 49.67, "learning_rate": 1.3376828478964403e-05, "loss": 0.0229, "step": 127910 }, { "epoch": 49.68, "learning_rate": 1.3376310679611652e-05, "loss": 0.0385, "step": 127920 }, { "epoch": 49.68, "learning_rate": 1.33757928802589e-05, "loss": 0.1936, "step": 127930 }, { "epoch": 49.69, "learning_rate": 1.337527508090615e-05, "loss": 0.1104, "step": 127940 }, { "epoch": 49.69, "learning_rate": 1.33747572815534e-05, "loss": 0.0446, "step": 127950 }, { "epoch": 49.69, "learning_rate": 1.3374239482200649e-05, "loss": 0.1954, "step": 127960 }, { "epoch": 49.7, "learning_rate": 1.3373721682847897e-05, "loss": 0.1952, "step": 127970 }, { "epoch": 49.7, "learning_rate": 1.3373203883495147e-05, "loss": 0.1486, "step": 127980 }, { "epoch": 49.7, "learning_rate": 1.3372686084142396e-05, "loss": 0.0305, "step": 127990 }, { "epoch": 49.71, "learning_rate": 1.3372168284789646e-05, "loss": 0.1525, "step": 128000 }, { "epoch": 49.71, "learning_rate": 1.3371650485436894e-05, "loss": 0.1002, "step": 128010 }, { "epoch": 49.72, "learning_rate": 1.3371132686084143e-05, "loss": 0.0983, "step": 128020 }, { "epoch": 49.72, "learning_rate": 1.3370614886731393e-05, "loss": 0.1617, "step": 128030 }, { "epoch": 49.72, "learning_rate": 1.3370097087378643e-05, "loss": 0.2585, "step": 128040 }, { "epoch": 49.73, "learning_rate": 1.336957928802589e-05, "loss": 0.1167, "step": 128050 }, { "epoch": 49.73, "learning_rate": 1.336906148867314e-05, "loss": 0.0042, "step": 128060 }, { "epoch": 49.74, "learning_rate": 1.336854368932039e-05, "loss": 0.0106, "step": 128070 }, { "epoch": 49.74, "learning_rate": 1.336802588996764e-05, "loss": 0.0782, "step": 128080 }, { "epoch": 49.74, "learning_rate": 1.3367508090614887e-05, "loss": 0.0793, "step": 128090 }, { "epoch": 49.75, "learning_rate": 1.3366990291262137e-05, "loss": 0.1683, "step": 128100 }, { "epoch": 49.75, "learning_rate": 1.3366472491909387e-05, "loss": 0.1678, "step": 128110 }, { "epoch": 49.76, "learning_rate": 1.3365954692556634e-05, "loss": 0.0767, "step": 128120 }, { "epoch": 49.76, "learning_rate": 1.3365436893203884e-05, "loss": 0.0576, "step": 128130 }, { "epoch": 49.76, "learning_rate": 1.3364919093851134e-05, "loss": 0.0017, "step": 128140 }, { "epoch": 49.77, "learning_rate": 1.3364401294498383e-05, "loss": 0.177, "step": 128150 }, { "epoch": 49.77, "learning_rate": 1.3363883495145633e-05, "loss": 0.2572, "step": 128160 }, { "epoch": 49.77, "learning_rate": 1.3363365695792881e-05, "loss": 0.0596, "step": 128170 }, { "epoch": 49.78, "learning_rate": 1.336284789644013e-05, "loss": 0.0401, "step": 128180 }, { "epoch": 49.78, "learning_rate": 1.336233009708738e-05, "loss": 0.1306, "step": 128190 }, { "epoch": 49.79, "learning_rate": 1.3361812297734628e-05, "loss": 0.0642, "step": 128200 }, { "epoch": 49.79, "learning_rate": 1.3361294498381878e-05, "loss": 0.1035, "step": 128210 }, { "epoch": 49.79, "learning_rate": 1.3360776699029127e-05, "loss": 0.082, "step": 128220 }, { "epoch": 49.8, "learning_rate": 1.3360258899676377e-05, "loss": 0.0179, "step": 128230 }, { "epoch": 49.8, "learning_rate": 1.3359741100323626e-05, "loss": 0.004, "step": 128240 }, { "epoch": 49.81, "learning_rate": 1.3359223300970874e-05, "loss": 0.1409, "step": 128250 }, { "epoch": 49.81, "learning_rate": 1.3358705501618124e-05, "loss": 0.0705, "step": 128260 }, { "epoch": 49.81, "learning_rate": 1.3358187702265372e-05, "loss": 0.0459, "step": 128270 }, { "epoch": 49.82, "learning_rate": 1.3357669902912622e-05, "loss": 0.07, "step": 128280 }, { "epoch": 49.82, "learning_rate": 1.3357152103559871e-05, "loss": 0.1234, "step": 128290 }, { "epoch": 49.83, "learning_rate": 1.335663430420712e-05, "loss": 0.0956, "step": 128300 }, { "epoch": 49.83, "learning_rate": 1.335611650485437e-05, "loss": 0.0575, "step": 128310 }, { "epoch": 49.83, "learning_rate": 1.335559870550162e-05, "loss": 0.1417, "step": 128320 }, { "epoch": 49.84, "learning_rate": 1.3355080906148868e-05, "loss": 0.0942, "step": 128330 }, { "epoch": 49.84, "learning_rate": 1.3354563106796118e-05, "loss": 0.0604, "step": 128340 }, { "epoch": 49.84, "learning_rate": 1.3354045307443366e-05, "loss": 0.0588, "step": 128350 }, { "epoch": 49.85, "learning_rate": 1.3353527508090615e-05, "loss": 0.0025, "step": 128360 }, { "epoch": 49.85, "learning_rate": 1.3353009708737865e-05, "loss": 0.0556, "step": 128370 }, { "epoch": 49.86, "learning_rate": 1.3352491909385114e-05, "loss": 0.0023, "step": 128380 }, { "epoch": 49.86, "learning_rate": 1.3351974110032364e-05, "loss": 0.0805, "step": 128390 }, { "epoch": 49.86, "learning_rate": 1.3351456310679614e-05, "loss": 0.0012, "step": 128400 }, { "epoch": 49.87, "learning_rate": 1.3350938511326863e-05, "loss": 0.0022, "step": 128410 }, { "epoch": 49.87, "learning_rate": 1.335042071197411e-05, "loss": 0.0331, "step": 128420 }, { "epoch": 49.88, "learning_rate": 1.3349902912621359e-05, "loss": 0.1665, "step": 128430 }, { "epoch": 49.88, "learning_rate": 1.3349385113268609e-05, "loss": 0.0858, "step": 128440 }, { "epoch": 49.88, "learning_rate": 1.3348867313915858e-05, "loss": 0.1256, "step": 128450 }, { "epoch": 49.89, "learning_rate": 1.3348349514563108e-05, "loss": 0.2353, "step": 128460 }, { "epoch": 49.89, "learning_rate": 1.3347831715210358e-05, "loss": 0.0344, "step": 128470 }, { "epoch": 49.9, "learning_rate": 1.3347313915857607e-05, "loss": 0.0566, "step": 128480 }, { "epoch": 49.9, "learning_rate": 1.3346796116504857e-05, "loss": 0.1254, "step": 128490 }, { "epoch": 49.9, "learning_rate": 1.3346278317152103e-05, "loss": 0.0526, "step": 128500 }, { "epoch": 49.91, "learning_rate": 1.3345760517799353e-05, "loss": 0.1928, "step": 128510 }, { "epoch": 49.91, "learning_rate": 1.3345242718446602e-05, "loss": 0.0992, "step": 128520 }, { "epoch": 49.91, "learning_rate": 1.3344724919093852e-05, "loss": 0.1226, "step": 128530 }, { "epoch": 49.92, "learning_rate": 1.3344207119741101e-05, "loss": 0.1824, "step": 128540 }, { "epoch": 49.92, "learning_rate": 1.3343689320388351e-05, "loss": 0.078, "step": 128550 }, { "epoch": 49.93, "learning_rate": 1.33431715210356e-05, "loss": 0.305, "step": 128560 }, { "epoch": 49.93, "learning_rate": 1.334265372168285e-05, "loss": 0.0278, "step": 128570 }, { "epoch": 49.93, "learning_rate": 1.3342135922330097e-05, "loss": 0.1835, "step": 128580 }, { "epoch": 49.94, "learning_rate": 1.3341618122977346e-05, "loss": 0.0407, "step": 128590 }, { "epoch": 49.94, "learning_rate": 1.3341100323624596e-05, "loss": 0.0529, "step": 128600 }, { "epoch": 49.95, "learning_rate": 1.3340582524271845e-05, "loss": 0.0399, "step": 128610 }, { "epoch": 49.95, "learning_rate": 1.3340064724919095e-05, "loss": 0.1249, "step": 128620 }, { "epoch": 49.95, "learning_rate": 1.3339546925566345e-05, "loss": 0.0238, "step": 128630 }, { "epoch": 49.96, "learning_rate": 1.3339029126213594e-05, "loss": 0.168, "step": 128640 }, { "epoch": 49.96, "learning_rate": 1.3338511326860844e-05, "loss": 0.0496, "step": 128650 }, { "epoch": 49.97, "learning_rate": 1.333799352750809e-05, "loss": 0.0874, "step": 128660 }, { "epoch": 49.97, "learning_rate": 1.333747572815534e-05, "loss": 0.1442, "step": 128670 }, { "epoch": 49.97, "learning_rate": 1.333695792880259e-05, "loss": 0.0691, "step": 128680 }, { "epoch": 49.98, "learning_rate": 1.3336440129449839e-05, "loss": 0.081, "step": 128690 }, { "epoch": 49.98, "learning_rate": 1.3335922330097089e-05, "loss": 0.1192, "step": 128700 }, { "epoch": 49.98, "learning_rate": 1.3335404530744338e-05, "loss": 0.0461, "step": 128710 }, { "epoch": 49.99, "learning_rate": 1.3334886731391588e-05, "loss": 0.0862, "step": 128720 }, { "epoch": 49.99, "learning_rate": 1.3334368932038837e-05, "loss": 0.0132, "step": 128730 }, { "epoch": 50.0, "learning_rate": 1.3333851132686084e-05, "loss": 0.0503, "step": 128740 }, { "epoch": 50.0, "learning_rate": 1.3333333333333333e-05, "loss": 0.0599, "step": 128750 }, { "epoch": 50.0, "eval_accuracy": 0.9515818431911967, "eval_loss": 0.3166683316230774, "eval_runtime": 8.2321, "eval_samples_per_second": 441.562, "eval_steps_per_second": 55.271, "step": 128750 }, { "epoch": 50.0, "learning_rate": 1.3332815533980583e-05, "loss": 0.0261, "step": 128760 }, { "epoch": 50.01, "learning_rate": 1.3332297734627833e-05, "loss": 0.0754, "step": 128770 }, { "epoch": 50.01, "learning_rate": 1.3331779935275082e-05, "loss": 0.084, "step": 128780 }, { "epoch": 50.02, "learning_rate": 1.3331262135922332e-05, "loss": 0.0728, "step": 128790 }, { "epoch": 50.02, "learning_rate": 1.3330744336569581e-05, "loss": 0.1192, "step": 128800 }, { "epoch": 50.02, "learning_rate": 1.3330226537216831e-05, "loss": 0.0007, "step": 128810 }, { "epoch": 50.03, "learning_rate": 1.3329708737864077e-05, "loss": 0.0995, "step": 128820 }, { "epoch": 50.03, "learning_rate": 1.3329190938511327e-05, "loss": 0.0798, "step": 128830 }, { "epoch": 50.03, "learning_rate": 1.3328673139158576e-05, "loss": 0.0798, "step": 128840 }, { "epoch": 50.04, "learning_rate": 1.3328155339805826e-05, "loss": 0.1158, "step": 128850 }, { "epoch": 50.04, "learning_rate": 1.3327637540453076e-05, "loss": 0.0274, "step": 128860 }, { "epoch": 50.05, "learning_rate": 1.3327119741100325e-05, "loss": 0.1302, "step": 128870 }, { "epoch": 50.05, "learning_rate": 1.3326601941747575e-05, "loss": 0.0411, "step": 128880 }, { "epoch": 50.05, "learning_rate": 1.3326084142394825e-05, "loss": 0.0733, "step": 128890 }, { "epoch": 50.06, "learning_rate": 1.332556634304207e-05, "loss": 0.1093, "step": 128900 }, { "epoch": 50.06, "learning_rate": 1.332504854368932e-05, "loss": 0.1115, "step": 128910 }, { "epoch": 50.07, "learning_rate": 1.332453074433657e-05, "loss": 0.0039, "step": 128920 }, { "epoch": 50.07, "learning_rate": 1.332401294498382e-05, "loss": 0.0897, "step": 128930 }, { "epoch": 50.07, "learning_rate": 1.332349514563107e-05, "loss": 0.1407, "step": 128940 }, { "epoch": 50.08, "learning_rate": 1.3322977346278319e-05, "loss": 0.1428, "step": 128950 }, { "epoch": 50.08, "learning_rate": 1.3322459546925568e-05, "loss": 0.1149, "step": 128960 }, { "epoch": 50.09, "learning_rate": 1.3321941747572818e-05, "loss": 0.0594, "step": 128970 }, { "epoch": 50.09, "learning_rate": 1.3321423948220068e-05, "loss": 0.1067, "step": 128980 }, { "epoch": 50.09, "learning_rate": 1.3320906148867314e-05, "loss": 0.0598, "step": 128990 }, { "epoch": 50.1, "learning_rate": 1.3320388349514564e-05, "loss": 0.292, "step": 129000 }, { "epoch": 50.1, "learning_rate": 1.3319870550161813e-05, "loss": 0.0886, "step": 129010 }, { "epoch": 50.1, "learning_rate": 1.3319352750809063e-05, "loss": 0.0814, "step": 129020 }, { "epoch": 50.11, "learning_rate": 1.3318834951456312e-05, "loss": 0.1103, "step": 129030 }, { "epoch": 50.11, "learning_rate": 1.3318317152103562e-05, "loss": 0.1528, "step": 129040 }, { "epoch": 50.12, "learning_rate": 1.3317799352750812e-05, "loss": 0.0282, "step": 129050 }, { "epoch": 50.12, "learning_rate": 1.331728155339806e-05, "loss": 0.0574, "step": 129060 }, { "epoch": 50.12, "learning_rate": 1.3316763754045308e-05, "loss": 0.0631, "step": 129070 }, { "epoch": 50.13, "learning_rate": 1.3316245954692557e-05, "loss": 0.1645, "step": 129080 }, { "epoch": 50.13, "learning_rate": 1.3315728155339807e-05, "loss": 0.1168, "step": 129090 }, { "epoch": 50.14, "learning_rate": 1.3315210355987056e-05, "loss": 0.0095, "step": 129100 }, { "epoch": 50.14, "learning_rate": 1.3314692556634306e-05, "loss": 0.0915, "step": 129110 }, { "epoch": 50.14, "learning_rate": 1.3314174757281556e-05, "loss": 0.0202, "step": 129120 }, { "epoch": 50.15, "learning_rate": 1.3313656957928805e-05, "loss": 0.0306, "step": 129130 }, { "epoch": 50.15, "learning_rate": 1.3313139158576053e-05, "loss": 0.0987, "step": 129140 }, { "epoch": 50.16, "learning_rate": 1.3312621359223301e-05, "loss": 0.1058, "step": 129150 }, { "epoch": 50.16, "learning_rate": 1.331210355987055e-05, "loss": 0.0585, "step": 129160 }, { "epoch": 50.16, "learning_rate": 1.33115857605178e-05, "loss": 0.0514, "step": 129170 }, { "epoch": 50.17, "learning_rate": 1.331106796116505e-05, "loss": 0.0555, "step": 129180 }, { "epoch": 50.17, "learning_rate": 1.33105501618123e-05, "loss": 0.1225, "step": 129190 }, { "epoch": 50.17, "learning_rate": 1.331003236245955e-05, "loss": 0.1178, "step": 129200 }, { "epoch": 50.18, "learning_rate": 1.3309514563106797e-05, "loss": 0.0803, "step": 129210 }, { "epoch": 50.18, "learning_rate": 1.3308996763754047e-05, "loss": 0.2603, "step": 129220 }, { "epoch": 50.19, "learning_rate": 1.3308478964401295e-05, "loss": 0.069, "step": 129230 }, { "epoch": 50.19, "learning_rate": 1.3307961165048544e-05, "loss": 0.1192, "step": 129240 }, { "epoch": 50.19, "learning_rate": 1.3307443365695794e-05, "loss": 0.0125, "step": 129250 }, { "epoch": 50.2, "learning_rate": 1.3306925566343043e-05, "loss": 0.0123, "step": 129260 }, { "epoch": 50.2, "learning_rate": 1.3306407766990293e-05, "loss": 0.0623, "step": 129270 }, { "epoch": 50.21, "learning_rate": 1.3305889967637543e-05, "loss": 0.0736, "step": 129280 }, { "epoch": 50.21, "learning_rate": 1.330537216828479e-05, "loss": 0.035, "step": 129290 }, { "epoch": 50.21, "learning_rate": 1.330485436893204e-05, "loss": 0.0956, "step": 129300 }, { "epoch": 50.22, "learning_rate": 1.3304336569579288e-05, "loss": 0.0582, "step": 129310 }, { "epoch": 50.22, "learning_rate": 1.3303818770226538e-05, "loss": 0.1461, "step": 129320 }, { "epoch": 50.23, "learning_rate": 1.3303300970873787e-05, "loss": 0.1733, "step": 129330 }, { "epoch": 50.23, "learning_rate": 1.3302783171521037e-05, "loss": 0.0071, "step": 129340 }, { "epoch": 50.23, "learning_rate": 1.3302265372168287e-05, "loss": 0.0469, "step": 129350 }, { "epoch": 50.24, "learning_rate": 1.3301747572815535e-05, "loss": 0.1987, "step": 129360 }, { "epoch": 50.24, "learning_rate": 1.3301229773462784e-05, "loss": 0.0406, "step": 129370 }, { "epoch": 50.24, "learning_rate": 1.3300711974110034e-05, "loss": 0.2309, "step": 129380 }, { "epoch": 50.25, "learning_rate": 1.3300194174757282e-05, "loss": 0.0156, "step": 129390 }, { "epoch": 50.25, "learning_rate": 1.3299676375404531e-05, "loss": 0.0949, "step": 129400 }, { "epoch": 50.26, "learning_rate": 1.3299158576051781e-05, "loss": 0.0887, "step": 129410 }, { "epoch": 50.26, "learning_rate": 1.329864077669903e-05, "loss": 0.1759, "step": 129420 }, { "epoch": 50.26, "learning_rate": 1.329812297734628e-05, "loss": 0.2203, "step": 129430 }, { "epoch": 50.27, "learning_rate": 1.3297605177993528e-05, "loss": 0.08, "step": 129440 }, { "epoch": 50.27, "learning_rate": 1.3297087378640778e-05, "loss": 0.0841, "step": 129450 }, { "epoch": 50.28, "learning_rate": 1.3296569579288027e-05, "loss": 0.0959, "step": 129460 }, { "epoch": 50.28, "learning_rate": 1.3296051779935275e-05, "loss": 0.0028, "step": 129470 }, { "epoch": 50.28, "learning_rate": 1.3295533980582525e-05, "loss": 0.1344, "step": 129480 }, { "epoch": 50.29, "learning_rate": 1.3295016181229775e-05, "loss": 0.0182, "step": 129490 }, { "epoch": 50.29, "learning_rate": 1.3294498381877024e-05, "loss": 0.2198, "step": 129500 }, { "epoch": 50.3, "learning_rate": 1.3293980582524274e-05, "loss": 0.2259, "step": 129510 }, { "epoch": 50.3, "learning_rate": 1.3293462783171522e-05, "loss": 0.033, "step": 129520 }, { "epoch": 50.3, "learning_rate": 1.3292944983818771e-05, "loss": 0.1704, "step": 129530 }, { "epoch": 50.31, "learning_rate": 1.3292427184466021e-05, "loss": 0.0936, "step": 129540 }, { "epoch": 50.31, "learning_rate": 1.329190938511327e-05, "loss": 0.0188, "step": 129550 }, { "epoch": 50.31, "learning_rate": 1.3291391585760518e-05, "loss": 0.0279, "step": 129560 }, { "epoch": 50.32, "learning_rate": 1.3290873786407768e-05, "loss": 0.0233, "step": 129570 }, { "epoch": 50.32, "learning_rate": 1.3290355987055018e-05, "loss": 0.192, "step": 129580 }, { "epoch": 50.33, "learning_rate": 1.3289838187702266e-05, "loss": 0.0575, "step": 129590 }, { "epoch": 50.33, "learning_rate": 1.3289320388349515e-05, "loss": 0.1414, "step": 129600 }, { "epoch": 50.33, "learning_rate": 1.3288802588996765e-05, "loss": 0.1369, "step": 129610 }, { "epoch": 50.34, "learning_rate": 1.3288284789644014e-05, "loss": 0.0468, "step": 129620 }, { "epoch": 50.34, "learning_rate": 1.3287766990291264e-05, "loss": 0.0872, "step": 129630 }, { "epoch": 50.35, "learning_rate": 1.3287249190938512e-05, "loss": 0.0378, "step": 129640 }, { "epoch": 50.35, "learning_rate": 1.3286731391585762e-05, "loss": 0.036, "step": 129650 }, { "epoch": 50.35, "learning_rate": 1.3286213592233011e-05, "loss": 0.0093, "step": 129660 }, { "epoch": 50.36, "learning_rate": 1.328569579288026e-05, "loss": 0.126, "step": 129670 }, { "epoch": 50.36, "learning_rate": 1.3285177993527509e-05, "loss": 0.0991, "step": 129680 }, { "epoch": 50.37, "learning_rate": 1.3284660194174758e-05, "loss": 0.0057, "step": 129690 }, { "epoch": 50.37, "learning_rate": 1.3284142394822008e-05, "loss": 0.1076, "step": 129700 }, { "epoch": 50.37, "learning_rate": 1.3283624595469258e-05, "loss": 0.0382, "step": 129710 }, { "epoch": 50.38, "learning_rate": 1.3283106796116506e-05, "loss": 0.0744, "step": 129720 }, { "epoch": 50.38, "learning_rate": 1.3282588996763755e-05, "loss": 0.0611, "step": 129730 }, { "epoch": 50.38, "learning_rate": 1.3282071197411003e-05, "loss": 0.0346, "step": 129740 }, { "epoch": 50.39, "learning_rate": 1.3281553398058253e-05, "loss": 0.1452, "step": 129750 }, { "epoch": 50.39, "learning_rate": 1.3281035598705502e-05, "loss": 0.0738, "step": 129760 }, { "epoch": 50.4, "learning_rate": 1.3280517799352752e-05, "loss": 0.1081, "step": 129770 }, { "epoch": 50.4, "learning_rate": 1.3280000000000002e-05, "loss": 0.0326, "step": 129780 }, { "epoch": 50.4, "learning_rate": 1.3279482200647251e-05, "loss": 0.0172, "step": 129790 }, { "epoch": 50.41, "learning_rate": 1.3278964401294499e-05, "loss": 0.063, "step": 129800 }, { "epoch": 50.41, "learning_rate": 1.3278446601941749e-05, "loss": 0.1515, "step": 129810 }, { "epoch": 50.42, "learning_rate": 1.3277928802588997e-05, "loss": 0.0551, "step": 129820 }, { "epoch": 50.42, "learning_rate": 1.3277411003236246e-05, "loss": 0.0867, "step": 129830 }, { "epoch": 50.42, "learning_rate": 1.3276893203883496e-05, "loss": 0.007, "step": 129840 }, { "epoch": 50.43, "learning_rate": 1.3276375404530746e-05, "loss": 0.0073, "step": 129850 }, { "epoch": 50.43, "learning_rate": 1.3275857605177995e-05, "loss": 0.0773, "step": 129860 }, { "epoch": 50.43, "learning_rate": 1.3275339805825245e-05, "loss": 0.0248, "step": 129870 }, { "epoch": 50.44, "learning_rate": 1.3274822006472493e-05, "loss": 0.0754, "step": 129880 }, { "epoch": 50.44, "learning_rate": 1.327430420711974e-05, "loss": 0.1557, "step": 129890 }, { "epoch": 50.45, "learning_rate": 1.327378640776699e-05, "loss": 0.0076, "step": 129900 }, { "epoch": 50.45, "learning_rate": 1.327326860841424e-05, "loss": 0.125, "step": 129910 }, { "epoch": 50.45, "learning_rate": 1.327275080906149e-05, "loss": 0.055, "step": 129920 }, { "epoch": 50.46, "learning_rate": 1.3272233009708739e-05, "loss": 0.0918, "step": 129930 }, { "epoch": 50.46, "learning_rate": 1.3271715210355989e-05, "loss": 0.1725, "step": 129940 }, { "epoch": 50.47, "learning_rate": 1.3271197411003238e-05, "loss": 0.1927, "step": 129950 }, { "epoch": 50.47, "learning_rate": 1.3270679611650486e-05, "loss": 0.2174, "step": 129960 }, { "epoch": 50.47, "learning_rate": 1.3270161812297734e-05, "loss": 0.0632, "step": 129970 }, { "epoch": 50.48, "learning_rate": 1.3269644012944984e-05, "loss": 0.0758, "step": 129980 }, { "epoch": 50.48, "learning_rate": 1.3269126213592233e-05, "loss": 0.012, "step": 129990 }, { "epoch": 50.49, "learning_rate": 1.3268608414239483e-05, "loss": 0.0932, "step": 130000 }, { "epoch": 50.49, "learning_rate": 1.3268090614886733e-05, "loss": 0.0605, "step": 130010 }, { "epoch": 50.49, "learning_rate": 1.3267572815533982e-05, "loss": 0.0221, "step": 130020 }, { "epoch": 50.5, "learning_rate": 1.3267055016181232e-05, "loss": 0.0311, "step": 130030 }, { "epoch": 50.5, "learning_rate": 1.3266537216828478e-05, "loss": 0.0721, "step": 130040 }, { "epoch": 50.5, "learning_rate": 1.3266019417475728e-05, "loss": 0.0138, "step": 130050 }, { "epoch": 50.51, "learning_rate": 1.3265501618122977e-05, "loss": 0.164, "step": 130060 }, { "epoch": 50.51, "learning_rate": 1.3264983818770227e-05, "loss": 0.1385, "step": 130070 }, { "epoch": 50.52, "learning_rate": 1.3264466019417477e-05, "loss": 0.0605, "step": 130080 }, { "epoch": 50.52, "learning_rate": 1.3263948220064726e-05, "loss": 0.0822, "step": 130090 }, { "epoch": 50.52, "learning_rate": 1.3263430420711976e-05, "loss": 0.0632, "step": 130100 }, { "epoch": 50.53, "learning_rate": 1.3262912621359225e-05, "loss": 0.0425, "step": 130110 }, { "epoch": 50.53, "learning_rate": 1.3262394822006475e-05, "loss": 0.0877, "step": 130120 }, { "epoch": 50.54, "learning_rate": 1.3261877022653721e-05, "loss": 0.1132, "step": 130130 }, { "epoch": 50.54, "learning_rate": 1.3261359223300971e-05, "loss": 0.0942, "step": 130140 }, { "epoch": 50.54, "learning_rate": 1.326084142394822e-05, "loss": 0.1912, "step": 130150 }, { "epoch": 50.55, "learning_rate": 1.326032362459547e-05, "loss": 0.0007, "step": 130160 }, { "epoch": 50.55, "learning_rate": 1.325980582524272e-05, "loss": 0.0679, "step": 130170 }, { "epoch": 50.56, "learning_rate": 1.325928802588997e-05, "loss": 0.1151, "step": 130180 }, { "epoch": 50.56, "learning_rate": 1.3258770226537219e-05, "loss": 0.0894, "step": 130190 }, { "epoch": 50.56, "learning_rate": 1.3258252427184469e-05, "loss": 0.1693, "step": 130200 }, { "epoch": 50.57, "learning_rate": 1.3257734627831715e-05, "loss": 0.0379, "step": 130210 }, { "epoch": 50.57, "learning_rate": 1.3257216828478964e-05, "loss": 0.0899, "step": 130220 }, { "epoch": 50.57, "learning_rate": 1.3256699029126214e-05, "loss": 0.0394, "step": 130230 }, { "epoch": 50.58, "learning_rate": 1.3256181229773464e-05, "loss": 0.1063, "step": 130240 }, { "epoch": 50.58, "learning_rate": 1.3255663430420713e-05, "loss": 0.1492, "step": 130250 }, { "epoch": 50.59, "learning_rate": 1.3255145631067963e-05, "loss": 0.1158, "step": 130260 }, { "epoch": 50.59, "learning_rate": 1.3254627831715213e-05, "loss": 0.0749, "step": 130270 }, { "epoch": 50.59, "learning_rate": 1.3254110032362462e-05, "loss": 0.2305, "step": 130280 }, { "epoch": 50.6, "learning_rate": 1.3253592233009708e-05, "loss": 0.0591, "step": 130290 }, { "epoch": 50.6, "learning_rate": 1.3253074433656958e-05, "loss": 0.0728, "step": 130300 }, { "epoch": 50.61, "learning_rate": 1.3252556634304208e-05, "loss": 0.0701, "step": 130310 }, { "epoch": 50.61, "learning_rate": 1.3252038834951457e-05, "loss": 0.0985, "step": 130320 }, { "epoch": 50.61, "learning_rate": 1.3251521035598707e-05, "loss": 0.0531, "step": 130330 }, { "epoch": 50.62, "learning_rate": 1.3251003236245956e-05, "loss": 0.1245, "step": 130340 }, { "epoch": 50.62, "learning_rate": 1.3250485436893206e-05, "loss": 0.2005, "step": 130350 }, { "epoch": 50.63, "learning_rate": 1.3249967637540456e-05, "loss": 0.1264, "step": 130360 }, { "epoch": 50.63, "learning_rate": 1.3249449838187702e-05, "loss": 0.0703, "step": 130370 }, { "epoch": 50.63, "learning_rate": 1.3248932038834952e-05, "loss": 0.1338, "step": 130380 }, { "epoch": 50.64, "learning_rate": 1.3248414239482201e-05, "loss": 0.0581, "step": 130390 }, { "epoch": 50.64, "learning_rate": 1.324789644012945e-05, "loss": 0.0534, "step": 130400 }, { "epoch": 50.64, "learning_rate": 1.32473786407767e-05, "loss": 0.1012, "step": 130410 }, { "epoch": 50.65, "learning_rate": 1.324686084142395e-05, "loss": 0.0748, "step": 130420 }, { "epoch": 50.65, "learning_rate": 1.32463430420712e-05, "loss": 0.0502, "step": 130430 }, { "epoch": 50.66, "learning_rate": 1.324582524271845e-05, "loss": 0.1573, "step": 130440 }, { "epoch": 50.66, "learning_rate": 1.3245307443365696e-05, "loss": 0.1206, "step": 130450 }, { "epoch": 50.66, "learning_rate": 1.3244789644012945e-05, "loss": 0.1632, "step": 130460 }, { "epoch": 50.67, "learning_rate": 1.3244271844660195e-05, "loss": 0.0636, "step": 130470 }, { "epoch": 50.67, "learning_rate": 1.3243754045307444e-05, "loss": 0.032, "step": 130480 }, { "epoch": 50.68, "learning_rate": 1.3243236245954694e-05, "loss": 0.1203, "step": 130490 }, { "epoch": 50.68, "learning_rate": 1.3242718446601944e-05, "loss": 0.049, "step": 130500 }, { "epoch": 50.68, "learning_rate": 1.3242200647249193e-05, "loss": 0.1083, "step": 130510 }, { "epoch": 50.69, "learning_rate": 1.3241682847896443e-05, "loss": 0.0462, "step": 130520 }, { "epoch": 50.69, "learning_rate": 1.3241165048543689e-05, "loss": 0.1291, "step": 130530 }, { "epoch": 50.7, "learning_rate": 1.3240647249190939e-05, "loss": 0.0675, "step": 130540 }, { "epoch": 50.7, "learning_rate": 1.3240129449838188e-05, "loss": 0.0426, "step": 130550 }, { "epoch": 50.7, "learning_rate": 1.3239611650485438e-05, "loss": 0.0631, "step": 130560 }, { "epoch": 50.71, "learning_rate": 1.3239093851132688e-05, "loss": 0.0097, "step": 130570 }, { "epoch": 50.71, "learning_rate": 1.3238576051779937e-05, "loss": 0.0568, "step": 130580 }, { "epoch": 50.71, "learning_rate": 1.3238058252427187e-05, "loss": 0.0212, "step": 130590 }, { "epoch": 50.72, "learning_rate": 1.3237540453074436e-05, "loss": 0.0537, "step": 130600 }, { "epoch": 50.72, "learning_rate": 1.3237022653721683e-05, "loss": 0.0198, "step": 130610 }, { "epoch": 50.73, "learning_rate": 1.3236504854368932e-05, "loss": 0.0491, "step": 130620 }, { "epoch": 50.73, "learning_rate": 1.3235987055016182e-05, "loss": 0.1215, "step": 130630 }, { "epoch": 50.73, "learning_rate": 1.3235469255663431e-05, "loss": 0.1103, "step": 130640 }, { "epoch": 50.74, "learning_rate": 1.3234951456310681e-05, "loss": 0.1061, "step": 130650 }, { "epoch": 50.74, "learning_rate": 1.323443365695793e-05, "loss": 0.0931, "step": 130660 }, { "epoch": 50.75, "learning_rate": 1.323391585760518e-05, "loss": 0.2891, "step": 130670 }, { "epoch": 50.75, "learning_rate": 1.3233398058252428e-05, "loss": 0.0821, "step": 130680 }, { "epoch": 50.75, "learning_rate": 1.3232880258899678e-05, "loss": 0.0026, "step": 130690 }, { "epoch": 50.76, "learning_rate": 1.3232362459546926e-05, "loss": 0.0261, "step": 130700 }, { "epoch": 50.76, "learning_rate": 1.3231844660194175e-05, "loss": 0.0551, "step": 130710 }, { "epoch": 50.77, "learning_rate": 1.3231326860841425e-05, "loss": 0.2067, "step": 130720 }, { "epoch": 50.77, "learning_rate": 1.3230809061488675e-05, "loss": 0.0722, "step": 130730 }, { "epoch": 50.77, "learning_rate": 1.3230291262135924e-05, "loss": 0.046, "step": 130740 }, { "epoch": 50.78, "learning_rate": 1.3229773462783174e-05, "loss": 0.0667, "step": 130750 }, { "epoch": 50.78, "learning_rate": 1.3229255663430422e-05, "loss": 0.1106, "step": 130760 }, { "epoch": 50.78, "learning_rate": 1.3228737864077671e-05, "loss": 0.056, "step": 130770 }, { "epoch": 50.79, "learning_rate": 1.322822006472492e-05, "loss": 0.0731, "step": 130780 }, { "epoch": 50.79, "learning_rate": 1.3227702265372169e-05, "loss": 0.129, "step": 130790 }, { "epoch": 50.8, "learning_rate": 1.3227184466019419e-05, "loss": 0.1332, "step": 130800 }, { "epoch": 50.8, "learning_rate": 1.3226666666666668e-05, "loss": 0.0781, "step": 130810 }, { "epoch": 50.8, "learning_rate": 1.3226148867313918e-05, "loss": 0.0099, "step": 130820 }, { "epoch": 50.81, "learning_rate": 1.3225631067961166e-05, "loss": 0.017, "step": 130830 }, { "epoch": 50.81, "learning_rate": 1.3225113268608415e-05, "loss": 0.0236, "step": 130840 }, { "epoch": 50.82, "learning_rate": 1.3224595469255665e-05, "loss": 0.1228, "step": 130850 }, { "epoch": 50.82, "learning_rate": 1.3224077669902913e-05, "loss": 0.1749, "step": 130860 }, { "epoch": 50.82, "learning_rate": 1.3223559870550163e-05, "loss": 0.0388, "step": 130870 }, { "epoch": 50.83, "learning_rate": 1.3223042071197412e-05, "loss": 0.0283, "step": 130880 }, { "epoch": 50.83, "learning_rate": 1.3222524271844662e-05, "loss": 0.2304, "step": 130890 }, { "epoch": 50.83, "learning_rate": 1.3222006472491911e-05, "loss": 0.1408, "step": 130900 }, { "epoch": 50.84, "learning_rate": 1.322148867313916e-05, "loss": 0.0254, "step": 130910 }, { "epoch": 50.84, "learning_rate": 1.3220970873786409e-05, "loss": 0.0363, "step": 130920 }, { "epoch": 50.85, "learning_rate": 1.3220453074433659e-05, "loss": 0.0514, "step": 130930 }, { "epoch": 50.85, "learning_rate": 1.3219935275080906e-05, "loss": 0.036, "step": 130940 }, { "epoch": 50.85, "learning_rate": 1.3219417475728156e-05, "loss": 0.0967, "step": 130950 }, { "epoch": 50.86, "learning_rate": 1.3218899676375406e-05, "loss": 0.155, "step": 130960 }, { "epoch": 50.86, "learning_rate": 1.3218381877022655e-05, "loss": 0.0285, "step": 130970 }, { "epoch": 50.87, "learning_rate": 1.3217864077669903e-05, "loss": 0.0763, "step": 130980 }, { "epoch": 50.87, "learning_rate": 1.3217346278317153e-05, "loss": 0.0046, "step": 130990 }, { "epoch": 50.87, "learning_rate": 1.3216828478964402e-05, "loss": 0.0159, "step": 131000 }, { "epoch": 50.88, "learning_rate": 1.3216310679611652e-05, "loss": 0.043, "step": 131010 }, { "epoch": 50.88, "learning_rate": 1.32157928802589e-05, "loss": 0.109, "step": 131020 }, { "epoch": 50.89, "learning_rate": 1.321527508090615e-05, "loss": 0.0951, "step": 131030 }, { "epoch": 50.89, "learning_rate": 1.32147572815534e-05, "loss": 0.0399, "step": 131040 }, { "epoch": 50.89, "learning_rate": 1.3214239482200649e-05, "loss": 0.0826, "step": 131050 }, { "epoch": 50.9, "learning_rate": 1.3213721682847897e-05, "loss": 0.0697, "step": 131060 }, { "epoch": 50.9, "learning_rate": 1.3213203883495146e-05, "loss": 0.0053, "step": 131070 }, { "epoch": 50.9, "learning_rate": 1.3212686084142396e-05, "loss": 0.1033, "step": 131080 }, { "epoch": 50.91, "learning_rate": 1.3212168284789646e-05, "loss": 0.0128, "step": 131090 }, { "epoch": 50.91, "learning_rate": 1.3211650485436894e-05, "loss": 0.001, "step": 131100 }, { "epoch": 50.92, "learning_rate": 1.3211132686084143e-05, "loss": 0.0116, "step": 131110 }, { "epoch": 50.92, "learning_rate": 1.3210614886731393e-05, "loss": 0.0821, "step": 131120 }, { "epoch": 50.92, "learning_rate": 1.3210097087378642e-05, "loss": 0.059, "step": 131130 }, { "epoch": 50.93, "learning_rate": 1.320957928802589e-05, "loss": 0.0591, "step": 131140 }, { "epoch": 50.93, "learning_rate": 1.320906148867314e-05, "loss": 0.0518, "step": 131150 }, { "epoch": 50.94, "learning_rate": 1.320854368932039e-05, "loss": 0.2752, "step": 131160 }, { "epoch": 50.94, "learning_rate": 1.320802588996764e-05, "loss": 0.0007, "step": 131170 }, { "epoch": 50.94, "learning_rate": 1.3207508090614889e-05, "loss": 0.0352, "step": 131180 }, { "epoch": 50.95, "learning_rate": 1.3206990291262137e-05, "loss": 0.0436, "step": 131190 }, { "epoch": 50.95, "learning_rate": 1.3206472491909386e-05, "loss": 0.1107, "step": 131200 }, { "epoch": 50.96, "learning_rate": 1.3205954692556634e-05, "loss": 0.1102, "step": 131210 }, { "epoch": 50.96, "learning_rate": 1.3205436893203884e-05, "loss": 0.0896, "step": 131220 }, { "epoch": 50.96, "learning_rate": 1.3204919093851134e-05, "loss": 0.122, "step": 131230 }, { "epoch": 50.97, "learning_rate": 1.3204401294498383e-05, "loss": 0.1999, "step": 131240 }, { "epoch": 50.97, "learning_rate": 1.3203883495145633e-05, "loss": 0.0604, "step": 131250 }, { "epoch": 50.97, "learning_rate": 1.3203365695792882e-05, "loss": 0.4277, "step": 131260 }, { "epoch": 50.98, "learning_rate": 1.320284789644013e-05, "loss": 0.0934, "step": 131270 }, { "epoch": 50.98, "learning_rate": 1.320233009708738e-05, "loss": 0.0234, "step": 131280 }, { "epoch": 50.99, "learning_rate": 1.3201812297734628e-05, "loss": 0.1346, "step": 131290 }, { "epoch": 50.99, "learning_rate": 1.3201294498381877e-05, "loss": 0.138, "step": 131300 }, { "epoch": 50.99, "learning_rate": 1.3200776699029127e-05, "loss": 0.0856, "step": 131310 }, { "epoch": 51.0, "learning_rate": 1.3200258899676377e-05, "loss": 0.1785, "step": 131320 }, { "epoch": 51.0, "eval_accuracy": 0.9513067400275104, "eval_loss": 0.3273870646953583, "eval_runtime": 8.2475, "eval_samples_per_second": 440.741, "eval_steps_per_second": 55.168, "step": 131325 }, { "epoch": 51.0, "learning_rate": 1.3199741100323626e-05, "loss": 0.1529, "step": 131330 }, { "epoch": 51.01, "learning_rate": 1.3199223300970876e-05, "loss": 0.087, "step": 131340 }, { "epoch": 51.01, "learning_rate": 1.3198705501618124e-05, "loss": 0.0113, "step": 131350 }, { "epoch": 51.01, "learning_rate": 1.3198187702265372e-05, "loss": 0.0405, "step": 131360 }, { "epoch": 51.02, "learning_rate": 1.3197669902912621e-05, "loss": 0.0551, "step": 131370 }, { "epoch": 51.02, "learning_rate": 1.3197152103559871e-05, "loss": 0.0927, "step": 131380 }, { "epoch": 51.03, "learning_rate": 1.319663430420712e-05, "loss": 0.1212, "step": 131390 }, { "epoch": 51.03, "learning_rate": 1.319611650485437e-05, "loss": 0.0195, "step": 131400 }, { "epoch": 51.03, "learning_rate": 1.319559870550162e-05, "loss": 0.1405, "step": 131410 }, { "epoch": 51.04, "learning_rate": 1.319508090614887e-05, "loss": 0.0597, "step": 131420 }, { "epoch": 51.04, "learning_rate": 1.3194563106796117e-05, "loss": 0.2453, "step": 131430 }, { "epoch": 51.04, "learning_rate": 1.3194045307443365e-05, "loss": 0.1105, "step": 131440 }, { "epoch": 51.05, "learning_rate": 1.3193527508090615e-05, "loss": 0.1077, "step": 131450 }, { "epoch": 51.05, "learning_rate": 1.3193009708737865e-05, "loss": 0.0573, "step": 131460 }, { "epoch": 51.06, "learning_rate": 1.3192491909385114e-05, "loss": 0.0003, "step": 131470 }, { "epoch": 51.06, "learning_rate": 1.3191974110032364e-05, "loss": 0.0224, "step": 131480 }, { "epoch": 51.06, "learning_rate": 1.3191456310679613e-05, "loss": 0.0751, "step": 131490 }, { "epoch": 51.07, "learning_rate": 1.3190938511326863e-05, "loss": 0.0982, "step": 131500 }, { "epoch": 51.07, "learning_rate": 1.319042071197411e-05, "loss": 0.0719, "step": 131510 }, { "epoch": 51.08, "learning_rate": 1.3189902912621359e-05, "loss": 0.1023, "step": 131520 }, { "epoch": 51.08, "learning_rate": 1.3189385113268609e-05, "loss": 0.2425, "step": 131530 }, { "epoch": 51.08, "learning_rate": 1.3188867313915858e-05, "loss": 0.0245, "step": 131540 }, { "epoch": 51.09, "learning_rate": 1.3188349514563108e-05, "loss": 0.0976, "step": 131550 }, { "epoch": 51.09, "learning_rate": 1.3187831715210357e-05, "loss": 0.0864, "step": 131560 }, { "epoch": 51.1, "learning_rate": 1.3187313915857607e-05, "loss": 0.0928, "step": 131570 }, { "epoch": 51.1, "learning_rate": 1.3186796116504857e-05, "loss": 0.0568, "step": 131580 }, { "epoch": 51.1, "learning_rate": 1.3186278317152103e-05, "loss": 0.1058, "step": 131590 }, { "epoch": 51.11, "learning_rate": 1.3185760517799352e-05, "loss": 0.0882, "step": 131600 }, { "epoch": 51.11, "learning_rate": 1.3185242718446602e-05, "loss": 0.1466, "step": 131610 }, { "epoch": 51.11, "learning_rate": 1.3184724919093852e-05, "loss": 0.1514, "step": 131620 }, { "epoch": 51.12, "learning_rate": 1.3184207119741101e-05, "loss": 0.0326, "step": 131630 }, { "epoch": 51.12, "learning_rate": 1.3183689320388351e-05, "loss": 0.0873, "step": 131640 }, { "epoch": 51.13, "learning_rate": 1.31831715210356e-05, "loss": 0.0805, "step": 131650 }, { "epoch": 51.13, "learning_rate": 1.318265372168285e-05, "loss": 0.1236, "step": 131660 }, { "epoch": 51.13, "learning_rate": 1.3182135922330096e-05, "loss": 0.0734, "step": 131670 }, { "epoch": 51.14, "learning_rate": 1.3181618122977346e-05, "loss": 0.03, "step": 131680 }, { "epoch": 51.14, "learning_rate": 1.3181100323624596e-05, "loss": 0.0011, "step": 131690 }, { "epoch": 51.15, "learning_rate": 1.3180582524271845e-05, "loss": 0.1, "step": 131700 }, { "epoch": 51.15, "learning_rate": 1.3180064724919095e-05, "loss": 0.001, "step": 131710 }, { "epoch": 51.15, "learning_rate": 1.3179546925566344e-05, "loss": 0.0625, "step": 131720 }, { "epoch": 51.16, "learning_rate": 1.3179029126213594e-05, "loss": 0.0978, "step": 131730 }, { "epoch": 51.16, "learning_rate": 1.3178511326860844e-05, "loss": 0.0245, "step": 131740 }, { "epoch": 51.17, "learning_rate": 1.3177993527508093e-05, "loss": 0.1179, "step": 131750 }, { "epoch": 51.17, "learning_rate": 1.317747572815534e-05, "loss": 0.0261, "step": 131760 }, { "epoch": 51.17, "learning_rate": 1.317695792880259e-05, "loss": 0.0929, "step": 131770 }, { "epoch": 51.18, "learning_rate": 1.3176440129449839e-05, "loss": 0.0909, "step": 131780 }, { "epoch": 51.18, "learning_rate": 1.3175922330097088e-05, "loss": 0.098, "step": 131790 }, { "epoch": 51.18, "learning_rate": 1.3175404530744338e-05, "loss": 0.0432, "step": 131800 }, { "epoch": 51.19, "learning_rate": 1.3174886731391588e-05, "loss": 0.0686, "step": 131810 }, { "epoch": 51.19, "learning_rate": 1.3174368932038837e-05, "loss": 0.1541, "step": 131820 }, { "epoch": 51.2, "learning_rate": 1.3173851132686087e-05, "loss": 0.1656, "step": 131830 }, { "epoch": 51.2, "learning_rate": 1.3173333333333333e-05, "loss": 0.0859, "step": 131840 }, { "epoch": 51.2, "learning_rate": 1.3172815533980583e-05, "loss": 0.0727, "step": 131850 }, { "epoch": 51.21, "learning_rate": 1.3172297734627832e-05, "loss": 0.1848, "step": 131860 }, { "epoch": 51.21, "learning_rate": 1.3171779935275082e-05, "loss": 0.0209, "step": 131870 }, { "epoch": 51.22, "learning_rate": 1.3171262135922332e-05, "loss": 0.006, "step": 131880 }, { "epoch": 51.22, "learning_rate": 1.3170744336569581e-05, "loss": 0.0742, "step": 131890 }, { "epoch": 51.22, "learning_rate": 1.317022653721683e-05, "loss": 0.0711, "step": 131900 }, { "epoch": 51.23, "learning_rate": 1.316970873786408e-05, "loss": 0.0162, "step": 131910 }, { "epoch": 51.23, "learning_rate": 1.3169190938511327e-05, "loss": 0.1001, "step": 131920 }, { "epoch": 51.23, "learning_rate": 1.3168673139158576e-05, "loss": 0.0018, "step": 131930 }, { "epoch": 51.24, "learning_rate": 1.3168155339805826e-05, "loss": 0.1019, "step": 131940 }, { "epoch": 51.24, "learning_rate": 1.3167637540453076e-05, "loss": 0.0362, "step": 131950 }, { "epoch": 51.25, "learning_rate": 1.3167119741100325e-05, "loss": 0.0448, "step": 131960 }, { "epoch": 51.25, "learning_rate": 1.3166601941747575e-05, "loss": 0.0746, "step": 131970 }, { "epoch": 51.25, "learning_rate": 1.3166084142394824e-05, "loss": 0.0773, "step": 131980 }, { "epoch": 51.26, "learning_rate": 1.3165566343042074e-05, "loss": 0.1832, "step": 131990 }, { "epoch": 51.26, "learning_rate": 1.316504854368932e-05, "loss": 0.0048, "step": 132000 }, { "epoch": 51.27, "learning_rate": 1.316453074433657e-05, "loss": 0.0996, "step": 132010 }, { "epoch": 51.27, "learning_rate": 1.316401294498382e-05, "loss": 0.1454, "step": 132020 }, { "epoch": 51.27, "learning_rate": 1.3163495145631069e-05, "loss": 0.0055, "step": 132030 }, { "epoch": 51.28, "learning_rate": 1.3162977346278319e-05, "loss": 0.1092, "step": 132040 }, { "epoch": 51.28, "learning_rate": 1.3162459546925568e-05, "loss": 0.1275, "step": 132050 }, { "epoch": 51.29, "learning_rate": 1.3161941747572818e-05, "loss": 0.0388, "step": 132060 }, { "epoch": 51.29, "learning_rate": 1.3161423948220068e-05, "loss": 0.1498, "step": 132070 }, { "epoch": 51.29, "learning_rate": 1.3160906148867314e-05, "loss": 0.1005, "step": 132080 }, { "epoch": 51.3, "learning_rate": 1.3160388349514563e-05, "loss": 0.0317, "step": 132090 }, { "epoch": 51.3, "learning_rate": 1.3159870550161813e-05, "loss": 0.0373, "step": 132100 }, { "epoch": 51.3, "learning_rate": 1.3159352750809063e-05, "loss": 0.0155, "step": 132110 }, { "epoch": 51.31, "learning_rate": 1.3158834951456312e-05, "loss": 0.021, "step": 132120 }, { "epoch": 51.31, "learning_rate": 1.3158317152103562e-05, "loss": 0.0356, "step": 132130 }, { "epoch": 51.32, "learning_rate": 1.3157799352750812e-05, "loss": 0.0901, "step": 132140 }, { "epoch": 51.32, "learning_rate": 1.315728155339806e-05, "loss": 0.0418, "step": 132150 }, { "epoch": 51.32, "learning_rate": 1.3156763754045307e-05, "loss": 0.156, "step": 132160 }, { "epoch": 51.33, "learning_rate": 1.3156245954692557e-05, "loss": 0.0461, "step": 132170 }, { "epoch": 51.33, "learning_rate": 1.3155728155339807e-05, "loss": 0.068, "step": 132180 }, { "epoch": 51.34, "learning_rate": 1.3155210355987056e-05, "loss": 0.0952, "step": 132190 }, { "epoch": 51.34, "learning_rate": 1.3154692556634306e-05, "loss": 0.2315, "step": 132200 }, { "epoch": 51.34, "learning_rate": 1.3154174757281555e-05, "loss": 0.0729, "step": 132210 }, { "epoch": 51.35, "learning_rate": 1.3153656957928805e-05, "loss": 0.0691, "step": 132220 }, { "epoch": 51.35, "learning_rate": 1.3153139158576053e-05, "loss": 0.1652, "step": 132230 }, { "epoch": 51.36, "learning_rate": 1.3152621359223301e-05, "loss": 0.1815, "step": 132240 }, { "epoch": 51.36, "learning_rate": 1.315210355987055e-05, "loss": 0.004, "step": 132250 }, { "epoch": 51.36, "learning_rate": 1.31515857605178e-05, "loss": 0.1538, "step": 132260 }, { "epoch": 51.37, "learning_rate": 1.315106796116505e-05, "loss": 0.1328, "step": 132270 }, { "epoch": 51.37, "learning_rate": 1.31505501618123e-05, "loss": 0.0499, "step": 132280 }, { "epoch": 51.37, "learning_rate": 1.3150032362459549e-05, "loss": 0.1438, "step": 132290 }, { "epoch": 51.38, "learning_rate": 1.3149514563106797e-05, "loss": 0.1341, "step": 132300 }, { "epoch": 51.38, "learning_rate": 1.3148996763754047e-05, "loss": 0.0881, "step": 132310 }, { "epoch": 51.39, "learning_rate": 1.3148478964401296e-05, "loss": 0.0463, "step": 132320 }, { "epoch": 51.39, "learning_rate": 1.3147961165048544e-05, "loss": 0.0585, "step": 132330 }, { "epoch": 51.39, "learning_rate": 1.3147443365695794e-05, "loss": 0.0521, "step": 132340 }, { "epoch": 51.4, "learning_rate": 1.3146925566343043e-05, "loss": 0.0936, "step": 132350 }, { "epoch": 51.4, "learning_rate": 1.3146407766990293e-05, "loss": 0.0562, "step": 132360 }, { "epoch": 51.41, "learning_rate": 1.3145889967637543e-05, "loss": 0.0016, "step": 132370 }, { "epoch": 51.41, "learning_rate": 1.314537216828479e-05, "loss": 0.0572, "step": 132380 }, { "epoch": 51.41, "learning_rate": 1.314485436893204e-05, "loss": 0.1511, "step": 132390 }, { "epoch": 51.42, "learning_rate": 1.314433656957929e-05, "loss": 0.0648, "step": 132400 }, { "epoch": 51.42, "learning_rate": 1.3143818770226538e-05, "loss": 0.1046, "step": 132410 }, { "epoch": 51.43, "learning_rate": 1.3143300970873787e-05, "loss": 0.0633, "step": 132420 }, { "epoch": 51.43, "learning_rate": 1.3142783171521037e-05, "loss": 0.1827, "step": 132430 }, { "epoch": 51.43, "learning_rate": 1.3142265372168286e-05, "loss": 0.0316, "step": 132440 }, { "epoch": 51.44, "learning_rate": 1.3141747572815534e-05, "loss": 0.0719, "step": 132450 }, { "epoch": 51.44, "learning_rate": 1.3141229773462784e-05, "loss": 0.1907, "step": 132460 }, { "epoch": 51.44, "learning_rate": 1.3140711974110034e-05, "loss": 0.1051, "step": 132470 }, { "epoch": 51.45, "learning_rate": 1.3140194174757283e-05, "loss": 0.1064, "step": 132480 }, { "epoch": 51.45, "learning_rate": 1.3139676375404531e-05, "loss": 0.0908, "step": 132490 }, { "epoch": 51.46, "learning_rate": 1.313915857605178e-05, "loss": 0.1072, "step": 132500 }, { "epoch": 51.46, "learning_rate": 1.313864077669903e-05, "loss": 0.1335, "step": 132510 }, { "epoch": 51.46, "learning_rate": 1.313812297734628e-05, "loss": 0.0015, "step": 132520 }, { "epoch": 51.47, "learning_rate": 1.3137605177993528e-05, "loss": 0.0283, "step": 132530 }, { "epoch": 51.47, "learning_rate": 1.3137087378640778e-05, "loss": 0.0435, "step": 132540 }, { "epoch": 51.48, "learning_rate": 1.3136569579288027e-05, "loss": 0.0554, "step": 132550 }, { "epoch": 51.48, "learning_rate": 1.3136051779935277e-05, "loss": 0.0455, "step": 132560 }, { "epoch": 51.48, "learning_rate": 1.3135533980582525e-05, "loss": 0.0911, "step": 132570 }, { "epoch": 51.49, "learning_rate": 1.3135016181229774e-05, "loss": 0.0772, "step": 132580 }, { "epoch": 51.49, "learning_rate": 1.3134498381877024e-05, "loss": 0.1302, "step": 132590 }, { "epoch": 51.5, "learning_rate": 1.3133980582524274e-05, "loss": 0.0146, "step": 132600 }, { "epoch": 51.5, "learning_rate": 1.3133462783171522e-05, "loss": 0.0343, "step": 132610 }, { "epoch": 51.5, "learning_rate": 1.3132944983818771e-05, "loss": 0.1143, "step": 132620 }, { "epoch": 51.51, "learning_rate": 1.313242718446602e-05, "loss": 0.0393, "step": 132630 }, { "epoch": 51.51, "learning_rate": 1.313190938511327e-05, "loss": 0.1122, "step": 132640 }, { "epoch": 51.51, "learning_rate": 1.3131391585760518e-05, "loss": 0.0471, "step": 132650 }, { "epoch": 51.52, "learning_rate": 1.3130873786407768e-05, "loss": 0.0488, "step": 132660 }, { "epoch": 51.52, "learning_rate": 1.3130355987055018e-05, "loss": 0.12, "step": 132670 }, { "epoch": 51.53, "learning_rate": 1.3129838187702265e-05, "loss": 0.0108, "step": 132680 }, { "epoch": 51.53, "learning_rate": 1.3129320388349515e-05, "loss": 0.0216, "step": 132690 }, { "epoch": 51.53, "learning_rate": 1.3128802588996765e-05, "loss": 0.0028, "step": 132700 }, { "epoch": 51.54, "learning_rate": 1.3128284789644014e-05, "loss": 0.128, "step": 132710 }, { "epoch": 51.54, "learning_rate": 1.3127766990291264e-05, "loss": 0.0446, "step": 132720 }, { "epoch": 51.55, "learning_rate": 1.3127249190938512e-05, "loss": 0.0452, "step": 132730 }, { "epoch": 51.55, "learning_rate": 1.3126731391585761e-05, "loss": 0.185, "step": 132740 }, { "epoch": 51.55, "learning_rate": 1.3126213592233011e-05, "loss": 0.1568, "step": 132750 }, { "epoch": 51.56, "learning_rate": 1.3125695792880259e-05, "loss": 0.1241, "step": 132760 }, { "epoch": 51.56, "learning_rate": 1.3125177993527509e-05, "loss": 0.1309, "step": 132770 }, { "epoch": 51.57, "learning_rate": 1.3124660194174758e-05, "loss": 0.0132, "step": 132780 }, { "epoch": 51.57, "learning_rate": 1.3124142394822008e-05, "loss": 0.0448, "step": 132790 }, { "epoch": 51.57, "learning_rate": 1.3123624595469258e-05, "loss": 0.0428, "step": 132800 }, { "epoch": 51.58, "learning_rate": 1.3123106796116505e-05, "loss": 0.1283, "step": 132810 }, { "epoch": 51.58, "learning_rate": 1.3122588996763755e-05, "loss": 0.0358, "step": 132820 }, { "epoch": 51.58, "learning_rate": 1.3122071197411003e-05, "loss": 0.0291, "step": 132830 }, { "epoch": 51.59, "learning_rate": 1.3121553398058253e-05, "loss": 0.0604, "step": 132840 }, { "epoch": 51.59, "learning_rate": 1.3121035598705502e-05, "loss": 0.0336, "step": 132850 }, { "epoch": 51.6, "learning_rate": 1.3120517799352752e-05, "loss": 0.009, "step": 132860 }, { "epoch": 51.6, "learning_rate": 1.3120000000000001e-05, "loss": 0.1414, "step": 132870 }, { "epoch": 51.6, "learning_rate": 1.3119482200647251e-05, "loss": 0.0239, "step": 132880 }, { "epoch": 51.61, "learning_rate": 1.31189644012945e-05, "loss": 0.0507, "step": 132890 }, { "epoch": 51.61, "learning_rate": 1.3118446601941749e-05, "loss": 0.1254, "step": 132900 }, { "epoch": 51.62, "learning_rate": 1.3117928802588997e-05, "loss": 0.1163, "step": 132910 }, { "epoch": 51.62, "learning_rate": 1.3117411003236246e-05, "loss": 0.1097, "step": 132920 }, { "epoch": 51.62, "learning_rate": 1.3116893203883496e-05, "loss": 0.1226, "step": 132930 }, { "epoch": 51.63, "learning_rate": 1.3116375404530745e-05, "loss": 0.0275, "step": 132940 }, { "epoch": 51.63, "learning_rate": 1.3115857605177995e-05, "loss": 0.0182, "step": 132950 }, { "epoch": 51.63, "learning_rate": 1.3115339805825245e-05, "loss": 0.0746, "step": 132960 }, { "epoch": 51.64, "learning_rate": 1.3114822006472494e-05, "loss": 0.1684, "step": 132970 }, { "epoch": 51.64, "learning_rate": 1.311430420711974e-05, "loss": 0.0561, "step": 132980 }, { "epoch": 51.65, "learning_rate": 1.311378640776699e-05, "loss": 0.0927, "step": 132990 }, { "epoch": 51.65, "learning_rate": 1.311326860841424e-05, "loss": 0.0289, "step": 133000 }, { "epoch": 51.65, "learning_rate": 1.311275080906149e-05, "loss": 0.0015, "step": 133010 }, { "epoch": 51.66, "learning_rate": 1.3112233009708739e-05, "loss": 0.0375, "step": 133020 }, { "epoch": 51.66, "learning_rate": 1.3111715210355989e-05, "loss": 0.1232, "step": 133030 }, { "epoch": 51.67, "learning_rate": 1.3111197411003238e-05, "loss": 0.0712, "step": 133040 }, { "epoch": 51.67, "learning_rate": 1.3110679611650488e-05, "loss": 0.0764, "step": 133050 }, { "epoch": 51.67, "learning_rate": 1.3110161812297734e-05, "loss": 0.0538, "step": 133060 }, { "epoch": 51.68, "learning_rate": 1.3109644012944984e-05, "loss": 0.0732, "step": 133070 }, { "epoch": 51.68, "learning_rate": 1.3109126213592233e-05, "loss": 0.1258, "step": 133080 }, { "epoch": 51.69, "learning_rate": 1.3108608414239483e-05, "loss": 0.1584, "step": 133090 }, { "epoch": 51.69, "learning_rate": 1.3108090614886732e-05, "loss": 0.125, "step": 133100 }, { "epoch": 51.69, "learning_rate": 1.3107572815533982e-05, "loss": 0.1582, "step": 133110 }, { "epoch": 51.7, "learning_rate": 1.3107055016181232e-05, "loss": 0.1028, "step": 133120 }, { "epoch": 51.7, "learning_rate": 1.3106537216828481e-05, "loss": 0.0389, "step": 133130 }, { "epoch": 51.7, "learning_rate": 1.3106019417475728e-05, "loss": 0.2008, "step": 133140 }, { "epoch": 51.71, "learning_rate": 1.3105501618122977e-05, "loss": 0.0244, "step": 133150 }, { "epoch": 51.71, "learning_rate": 1.3104983818770227e-05, "loss": 0.0497, "step": 133160 }, { "epoch": 51.72, "learning_rate": 1.3104466019417476e-05, "loss": 0.0126, "step": 133170 }, { "epoch": 51.72, "learning_rate": 1.3103948220064726e-05, "loss": 0.0309, "step": 133180 }, { "epoch": 51.72, "learning_rate": 1.3103430420711976e-05, "loss": 0.0224, "step": 133190 }, { "epoch": 51.73, "learning_rate": 1.3102912621359225e-05, "loss": 0.0729, "step": 133200 }, { "epoch": 51.73, "learning_rate": 1.3102394822006475e-05, "loss": 0.075, "step": 133210 }, { "epoch": 51.74, "learning_rate": 1.3101877022653721e-05, "loss": 0.0716, "step": 133220 }, { "epoch": 51.74, "learning_rate": 1.310135922330097e-05, "loss": 0.2125, "step": 133230 }, { "epoch": 51.74, "learning_rate": 1.310084142394822e-05, "loss": 0.0269, "step": 133240 }, { "epoch": 51.75, "learning_rate": 1.310032362459547e-05, "loss": 0.0534, "step": 133250 }, { "epoch": 51.75, "learning_rate": 1.309980582524272e-05, "loss": 0.028, "step": 133260 }, { "epoch": 51.76, "learning_rate": 1.309928802588997e-05, "loss": 0.1072, "step": 133270 }, { "epoch": 51.76, "learning_rate": 1.3098770226537219e-05, "loss": 0.0014, "step": 133280 }, { "epoch": 51.76, "learning_rate": 1.3098252427184468e-05, "loss": 0.1145, "step": 133290 }, { "epoch": 51.77, "learning_rate": 1.3097734627831715e-05, "loss": 0.0625, "step": 133300 }, { "epoch": 51.77, "learning_rate": 1.3097216828478964e-05, "loss": 0.1866, "step": 133310 }, { "epoch": 51.77, "learning_rate": 1.3096699029126214e-05, "loss": 0.0403, "step": 133320 }, { "epoch": 51.78, "learning_rate": 1.3096181229773464e-05, "loss": 0.0692, "step": 133330 }, { "epoch": 51.78, "learning_rate": 1.3095663430420713e-05, "loss": 0.0954, "step": 133340 }, { "epoch": 51.79, "learning_rate": 1.3095145631067963e-05, "loss": 0.1215, "step": 133350 }, { "epoch": 51.79, "learning_rate": 1.3094627831715212e-05, "loss": 0.0034, "step": 133360 }, { "epoch": 51.79, "learning_rate": 1.3094110032362462e-05, "loss": 0.0281, "step": 133370 }, { "epoch": 51.8, "learning_rate": 1.3093592233009708e-05, "loss": 0.0149, "step": 133380 }, { "epoch": 51.8, "learning_rate": 1.3093074433656958e-05, "loss": 0.0438, "step": 133390 }, { "epoch": 51.81, "learning_rate": 1.3092556634304207e-05, "loss": 0.0329, "step": 133400 }, { "epoch": 51.81, "learning_rate": 1.3092038834951457e-05, "loss": 0.0221, "step": 133410 }, { "epoch": 51.81, "learning_rate": 1.3091521035598707e-05, "loss": 0.0403, "step": 133420 }, { "epoch": 51.82, "learning_rate": 1.3091003236245956e-05, "loss": 0.1138, "step": 133430 }, { "epoch": 51.82, "learning_rate": 1.3090485436893206e-05, "loss": 0.1323, "step": 133440 }, { "epoch": 51.83, "learning_rate": 1.3089967637540456e-05, "loss": 0.0286, "step": 133450 }, { "epoch": 51.83, "learning_rate": 1.3089449838187705e-05, "loss": 0.244, "step": 133460 }, { "epoch": 51.83, "learning_rate": 1.3088932038834951e-05, "loss": 0.0514, "step": 133470 }, { "epoch": 51.84, "learning_rate": 1.3088414239482201e-05, "loss": 0.081, "step": 133480 }, { "epoch": 51.84, "learning_rate": 1.308789644012945e-05, "loss": 0.0937, "step": 133490 }, { "epoch": 51.84, "learning_rate": 1.30873786407767e-05, "loss": 0.0447, "step": 133500 }, { "epoch": 51.85, "learning_rate": 1.308686084142395e-05, "loss": 0.0224, "step": 133510 }, { "epoch": 51.85, "learning_rate": 1.30863430420712e-05, "loss": 0.072, "step": 133520 }, { "epoch": 51.86, "learning_rate": 1.3085825242718449e-05, "loss": 0.0106, "step": 133530 }, { "epoch": 51.86, "learning_rate": 1.3085307443365699e-05, "loss": 0.1252, "step": 133540 }, { "epoch": 51.86, "learning_rate": 1.3084789644012945e-05, "loss": 0.0642, "step": 133550 }, { "epoch": 51.87, "learning_rate": 1.3084271844660195e-05, "loss": 0.0136, "step": 133560 }, { "epoch": 51.87, "learning_rate": 1.3083754045307444e-05, "loss": 0.0917, "step": 133570 }, { "epoch": 51.88, "learning_rate": 1.3083236245954694e-05, "loss": 0.2719, "step": 133580 }, { "epoch": 51.88, "learning_rate": 1.3082718446601943e-05, "loss": 0.1144, "step": 133590 }, { "epoch": 51.88, "learning_rate": 1.3082200647249193e-05, "loss": 0.2071, "step": 133600 }, { "epoch": 51.89, "learning_rate": 1.3081682847896443e-05, "loss": 0.0344, "step": 133610 }, { "epoch": 51.89, "learning_rate": 1.308116504854369e-05, "loss": 0.1053, "step": 133620 }, { "epoch": 51.9, "learning_rate": 1.3080647249190939e-05, "loss": 0.0284, "step": 133630 }, { "epoch": 51.9, "learning_rate": 1.3080129449838188e-05, "loss": 0.122, "step": 133640 }, { "epoch": 51.9, "learning_rate": 1.3079611650485438e-05, "loss": 0.0847, "step": 133650 }, { "epoch": 51.91, "learning_rate": 1.3079093851132687e-05, "loss": 0.1117, "step": 133660 }, { "epoch": 51.91, "learning_rate": 1.3078576051779937e-05, "loss": 0.0894, "step": 133670 }, { "epoch": 51.91, "learning_rate": 1.3078058252427187e-05, "loss": 0.0666, "step": 133680 }, { "epoch": 51.92, "learning_rate": 1.3077540453074436e-05, "loss": 0.0029, "step": 133690 }, { "epoch": 51.92, "learning_rate": 1.3077022653721684e-05, "loss": 0.048, "step": 133700 }, { "epoch": 51.93, "learning_rate": 1.3076504854368932e-05, "loss": 0.0152, "step": 133710 }, { "epoch": 51.93, "learning_rate": 1.3075987055016182e-05, "loss": 0.1696, "step": 133720 }, { "epoch": 51.93, "learning_rate": 1.3075469255663431e-05, "loss": 0.2976, "step": 133730 }, { "epoch": 51.94, "learning_rate": 1.3074951456310681e-05, "loss": 0.0914, "step": 133740 }, { "epoch": 51.94, "learning_rate": 1.307443365695793e-05, "loss": 0.1302, "step": 133750 }, { "epoch": 51.95, "learning_rate": 1.307391585760518e-05, "loss": 0.0947, "step": 133760 }, { "epoch": 51.95, "learning_rate": 1.3073398058252428e-05, "loss": 0.1116, "step": 133770 }, { "epoch": 51.95, "learning_rate": 1.3072880258899678e-05, "loss": 0.0864, "step": 133780 }, { "epoch": 51.96, "learning_rate": 1.3072362459546926e-05, "loss": 0.1238, "step": 133790 }, { "epoch": 51.96, "learning_rate": 1.3071844660194175e-05, "loss": 0.1392, "step": 133800 }, { "epoch": 51.97, "learning_rate": 1.3071326860841425e-05, "loss": 0.0167, "step": 133810 }, { "epoch": 51.97, "learning_rate": 1.3070809061488674e-05, "loss": 0.1485, "step": 133820 }, { "epoch": 51.97, "learning_rate": 1.3070291262135924e-05, "loss": 0.0613, "step": 133830 }, { "epoch": 51.98, "learning_rate": 1.3069773462783174e-05, "loss": 0.0806, "step": 133840 }, { "epoch": 51.98, "learning_rate": 1.3069255663430422e-05, "loss": 0.0922, "step": 133850 }, { "epoch": 51.98, "learning_rate": 1.3068737864077671e-05, "loss": 0.3644, "step": 133860 }, { "epoch": 51.99, "learning_rate": 1.306822006472492e-05, "loss": 0.1268, "step": 133870 }, { "epoch": 51.99, "learning_rate": 1.3067702265372169e-05, "loss": 0.1273, "step": 133880 }, { "epoch": 52.0, "learning_rate": 1.3067184466019418e-05, "loss": 0.0745, "step": 133890 }, { "epoch": 52.0, "learning_rate": 1.3066666666666668e-05, "loss": 0.1039, "step": 133900 }, { "epoch": 52.0, "eval_accuracy": 0.951856946354883, "eval_loss": 0.31140270829200745, "eval_runtime": 8.2636, "eval_samples_per_second": 439.878, "eval_steps_per_second": 55.06, "step": 133900 }, { "epoch": 52.0, "learning_rate": 1.3066148867313918e-05, "loss": 0.0709, "step": 133910 }, { "epoch": 52.01, "learning_rate": 1.3065631067961166e-05, "loss": 0.1453, "step": 133920 }, { "epoch": 52.01, "learning_rate": 1.3065113268608415e-05, "loss": 0.0447, "step": 133930 }, { "epoch": 52.02, "learning_rate": 1.3064595469255665e-05, "loss": 0.0609, "step": 133940 }, { "epoch": 52.02, "learning_rate": 1.3064077669902913e-05, "loss": 0.061, "step": 133950 }, { "epoch": 52.02, "learning_rate": 1.3063559870550162e-05, "loss": 0.1363, "step": 133960 }, { "epoch": 52.03, "learning_rate": 1.3063042071197412e-05, "loss": 0.1477, "step": 133970 }, { "epoch": 52.03, "learning_rate": 1.3062524271844662e-05, "loss": 0.0627, "step": 133980 }, { "epoch": 52.03, "learning_rate": 1.3062006472491911e-05, "loss": 0.1729, "step": 133990 }, { "epoch": 52.04, "learning_rate": 1.3061488673139159e-05, "loss": 0.042, "step": 134000 }, { "epoch": 52.04, "learning_rate": 1.3060970873786409e-05, "loss": 0.0873, "step": 134010 }, { "epoch": 52.05, "learning_rate": 1.3060453074433658e-05, "loss": 0.0053, "step": 134020 }, { "epoch": 52.05, "learning_rate": 1.3059935275080908e-05, "loss": 0.0174, "step": 134030 }, { "epoch": 52.05, "learning_rate": 1.3059417475728156e-05, "loss": 0.2504, "step": 134040 }, { "epoch": 52.06, "learning_rate": 1.3058899676375406e-05, "loss": 0.1361, "step": 134050 }, { "epoch": 52.06, "learning_rate": 1.3058381877022655e-05, "loss": 0.029, "step": 134060 }, { "epoch": 52.07, "learning_rate": 1.3057864077669905e-05, "loss": 0.0981, "step": 134070 }, { "epoch": 52.07, "learning_rate": 1.3057346278317153e-05, "loss": 0.0981, "step": 134080 }, { "epoch": 52.07, "learning_rate": 1.3056828478964402e-05, "loss": 0.0369, "step": 134090 }, { "epoch": 52.08, "learning_rate": 1.3056310679611652e-05, "loss": 0.0695, "step": 134100 }, { "epoch": 52.08, "learning_rate": 1.3055792880258902e-05, "loss": 0.1289, "step": 134110 }, { "epoch": 52.09, "learning_rate": 1.305527508090615e-05, "loss": 0.1293, "step": 134120 }, { "epoch": 52.09, "learning_rate": 1.3054757281553399e-05, "loss": 0.0436, "step": 134130 }, { "epoch": 52.09, "learning_rate": 1.3054239482200649e-05, "loss": 0.0321, "step": 134140 }, { "epoch": 52.1, "learning_rate": 1.3053721682847897e-05, "loss": 0.0723, "step": 134150 }, { "epoch": 52.1, "learning_rate": 1.3053203883495146e-05, "loss": 0.0005, "step": 134160 }, { "epoch": 52.1, "learning_rate": 1.3052686084142396e-05, "loss": 0.0055, "step": 134170 }, { "epoch": 52.11, "learning_rate": 1.3052168284789645e-05, "loss": 0.0259, "step": 134180 }, { "epoch": 52.11, "learning_rate": 1.3051650485436895e-05, "loss": 0.0629, "step": 134190 }, { "epoch": 52.12, "learning_rate": 1.3051132686084143e-05, "loss": 0.1509, "step": 134200 }, { "epoch": 52.12, "learning_rate": 1.3050614886731393e-05, "loss": 0.1206, "step": 134210 }, { "epoch": 52.12, "learning_rate": 1.3050097087378642e-05, "loss": 0.0348, "step": 134220 }, { "epoch": 52.13, "learning_rate": 1.304957928802589e-05, "loss": 0.0726, "step": 134230 }, { "epoch": 52.13, "learning_rate": 1.304906148867314e-05, "loss": 0.0638, "step": 134240 }, { "epoch": 52.14, "learning_rate": 1.304854368932039e-05, "loss": 0.0088, "step": 134250 }, { "epoch": 52.14, "learning_rate": 1.3048025889967639e-05, "loss": 0.1524, "step": 134260 }, { "epoch": 52.14, "learning_rate": 1.3047508090614889e-05, "loss": 0.0724, "step": 134270 }, { "epoch": 52.15, "learning_rate": 1.3046990291262137e-05, "loss": 0.0572, "step": 134280 }, { "epoch": 52.15, "learning_rate": 1.3046472491909386e-05, "loss": 0.0822, "step": 134290 }, { "epoch": 52.16, "learning_rate": 1.3045954692556634e-05, "loss": 0.1829, "step": 134300 }, { "epoch": 52.16, "learning_rate": 1.3045436893203884e-05, "loss": 0.1707, "step": 134310 }, { "epoch": 52.16, "learning_rate": 1.3044919093851133e-05, "loss": 0.0641, "step": 134320 }, { "epoch": 52.17, "learning_rate": 1.3044401294498383e-05, "loss": 0.0833, "step": 134330 }, { "epoch": 52.17, "learning_rate": 1.3043883495145633e-05, "loss": 0.1369, "step": 134340 }, { "epoch": 52.17, "learning_rate": 1.3043365695792882e-05, "loss": 0.0254, "step": 134350 }, { "epoch": 52.18, "learning_rate": 1.304284789644013e-05, "loss": 0.1059, "step": 134360 }, { "epoch": 52.18, "learning_rate": 1.304233009708738e-05, "loss": 0.1016, "step": 134370 }, { "epoch": 52.19, "learning_rate": 1.3041812297734628e-05, "loss": 0.0109, "step": 134380 }, { "epoch": 52.19, "learning_rate": 1.3041294498381877e-05, "loss": 0.0261, "step": 134390 }, { "epoch": 52.19, "learning_rate": 1.3040776699029127e-05, "loss": 0.0029, "step": 134400 }, { "epoch": 52.2, "learning_rate": 1.3040258899676377e-05, "loss": 0.1253, "step": 134410 }, { "epoch": 52.2, "learning_rate": 1.3039741100323626e-05, "loss": 0.0159, "step": 134420 }, { "epoch": 52.21, "learning_rate": 1.3039223300970876e-05, "loss": 0.0919, "step": 134430 }, { "epoch": 52.21, "learning_rate": 1.3038705501618124e-05, "loss": 0.0694, "step": 134440 }, { "epoch": 52.21, "learning_rate": 1.3038187702265372e-05, "loss": 0.0674, "step": 134450 }, { "epoch": 52.22, "learning_rate": 1.3037669902912621e-05, "loss": 0.0359, "step": 134460 }, { "epoch": 52.22, "learning_rate": 1.3037152103559871e-05, "loss": 0.109, "step": 134470 }, { "epoch": 52.23, "learning_rate": 1.303663430420712e-05, "loss": 0.0058, "step": 134480 }, { "epoch": 52.23, "learning_rate": 1.303611650485437e-05, "loss": 0.1067, "step": 134490 }, { "epoch": 52.23, "learning_rate": 1.303559870550162e-05, "loss": 0.0375, "step": 134500 }, { "epoch": 52.24, "learning_rate": 1.303508090614887e-05, "loss": 0.0849, "step": 134510 }, { "epoch": 52.24, "learning_rate": 1.3034563106796117e-05, "loss": 0.0449, "step": 134520 }, { "epoch": 52.24, "learning_rate": 1.3034045307443365e-05, "loss": 0.0362, "step": 134530 }, { "epoch": 52.25, "learning_rate": 1.3033527508090615e-05, "loss": 0.0151, "step": 134540 }, { "epoch": 52.25, "learning_rate": 1.3033009708737864e-05, "loss": 0.1025, "step": 134550 }, { "epoch": 52.26, "learning_rate": 1.3032491909385114e-05, "loss": 0.0572, "step": 134560 }, { "epoch": 52.26, "learning_rate": 1.3031974110032364e-05, "loss": 0.0565, "step": 134570 }, { "epoch": 52.26, "learning_rate": 1.3031456310679613e-05, "loss": 0.1472, "step": 134580 }, { "epoch": 52.27, "learning_rate": 1.3030938511326863e-05, "loss": 0.0674, "step": 134590 }, { "epoch": 52.27, "learning_rate": 1.3030420711974113e-05, "loss": 0.0268, "step": 134600 }, { "epoch": 52.28, "learning_rate": 1.3029902912621359e-05, "loss": 0.0342, "step": 134610 }, { "epoch": 52.28, "learning_rate": 1.3029385113268608e-05, "loss": 0.0858, "step": 134620 }, { "epoch": 52.28, "learning_rate": 1.3028867313915858e-05, "loss": 0.1257, "step": 134630 }, { "epoch": 52.29, "learning_rate": 1.3028349514563108e-05, "loss": 0.0227, "step": 134640 }, { "epoch": 52.29, "learning_rate": 1.3027831715210357e-05, "loss": 0.0486, "step": 134650 }, { "epoch": 52.3, "learning_rate": 1.3027313915857607e-05, "loss": 0.1537, "step": 134660 }, { "epoch": 52.3, "learning_rate": 1.3026796116504856e-05, "loss": 0.0272, "step": 134670 }, { "epoch": 52.3, "learning_rate": 1.3026278317152106e-05, "loss": 0.0683, "step": 134680 }, { "epoch": 52.31, "learning_rate": 1.3025760517799352e-05, "loss": 0.0204, "step": 134690 }, { "epoch": 52.31, "learning_rate": 1.3025242718446602e-05, "loss": 0.0561, "step": 134700 }, { "epoch": 52.31, "learning_rate": 1.3024724919093852e-05, "loss": 0.135, "step": 134710 }, { "epoch": 52.32, "learning_rate": 1.3024207119741101e-05, "loss": 0.045, "step": 134720 }, { "epoch": 52.32, "learning_rate": 1.302368932038835e-05, "loss": 0.037, "step": 134730 }, { "epoch": 52.33, "learning_rate": 1.30231715210356e-05, "loss": 0.183, "step": 134740 }, { "epoch": 52.33, "learning_rate": 1.302265372168285e-05, "loss": 0.048, "step": 134750 }, { "epoch": 52.33, "learning_rate": 1.30221359223301e-05, "loss": 0.1867, "step": 134760 }, { "epoch": 52.34, "learning_rate": 1.3021618122977346e-05, "loss": 0.0485, "step": 134770 }, { "epoch": 52.34, "learning_rate": 1.3021100323624595e-05, "loss": 0.0764, "step": 134780 }, { "epoch": 52.35, "learning_rate": 1.3020582524271845e-05, "loss": 0.0285, "step": 134790 }, { "epoch": 52.35, "learning_rate": 1.3020064724919095e-05, "loss": 0.0161, "step": 134800 }, { "epoch": 52.35, "learning_rate": 1.3019546925566344e-05, "loss": 0.0769, "step": 134810 }, { "epoch": 52.36, "learning_rate": 1.3019029126213594e-05, "loss": 0.0502, "step": 134820 }, { "epoch": 52.36, "learning_rate": 1.3018511326860844e-05, "loss": 0.0967, "step": 134830 }, { "epoch": 52.37, "learning_rate": 1.3017993527508093e-05, "loss": 0.085, "step": 134840 }, { "epoch": 52.37, "learning_rate": 1.301747572815534e-05, "loss": 0.0798, "step": 134850 }, { "epoch": 52.37, "learning_rate": 1.3016957928802589e-05, "loss": 0.0719, "step": 134860 }, { "epoch": 52.38, "learning_rate": 1.3016440129449839e-05, "loss": 0.1229, "step": 134870 }, { "epoch": 52.38, "learning_rate": 1.3015922330097088e-05, "loss": 0.0452, "step": 134880 }, { "epoch": 52.38, "learning_rate": 1.3015404530744338e-05, "loss": 0.0512, "step": 134890 }, { "epoch": 52.39, "learning_rate": 1.3014886731391588e-05, "loss": 0.0996, "step": 134900 }, { "epoch": 52.39, "learning_rate": 1.3014368932038837e-05, "loss": 0.0081, "step": 134910 }, { "epoch": 52.4, "learning_rate": 1.3013851132686087e-05, "loss": 0.1026, "step": 134920 }, { "epoch": 52.4, "learning_rate": 1.3013333333333333e-05, "loss": 0.0745, "step": 134930 }, { "epoch": 52.4, "learning_rate": 1.3012815533980583e-05, "loss": 0.112, "step": 134940 }, { "epoch": 52.41, "learning_rate": 1.3012297734627832e-05, "loss": 0.0348, "step": 134950 }, { "epoch": 52.41, "learning_rate": 1.3011779935275082e-05, "loss": 0.0235, "step": 134960 }, { "epoch": 52.42, "learning_rate": 1.3011262135922331e-05, "loss": 0.1054, "step": 134970 }, { "epoch": 52.42, "learning_rate": 1.3010744336569581e-05, "loss": 0.1154, "step": 134980 }, { "epoch": 52.42, "learning_rate": 1.301022653721683e-05, "loss": 0.0577, "step": 134990 }, { "epoch": 52.43, "learning_rate": 1.300970873786408e-05, "loss": 0.0688, "step": 135000 }, { "epoch": 52.43, "learning_rate": 1.3009190938511327e-05, "loss": 0.0457, "step": 135010 }, { "epoch": 52.43, "learning_rate": 1.3008673139158576e-05, "loss": 0.1648, "step": 135020 }, { "epoch": 52.44, "learning_rate": 1.3008155339805826e-05, "loss": 0.0941, "step": 135030 }, { "epoch": 52.44, "learning_rate": 1.3007637540453075e-05, "loss": 0.0871, "step": 135040 }, { "epoch": 52.45, "learning_rate": 1.3007119741100325e-05, "loss": 0.1435, "step": 135050 }, { "epoch": 52.45, "learning_rate": 1.3006601941747575e-05, "loss": 0.0563, "step": 135060 }, { "epoch": 52.45, "learning_rate": 1.3006084142394824e-05, "loss": 0.2036, "step": 135070 }, { "epoch": 52.46, "learning_rate": 1.3005566343042074e-05, "loss": 0.1116, "step": 135080 }, { "epoch": 52.46, "learning_rate": 1.300504854368932e-05, "loss": 0.0559, "step": 135090 }, { "epoch": 52.47, "learning_rate": 1.300453074433657e-05, "loss": 0.02, "step": 135100 }, { "epoch": 52.47, "learning_rate": 1.300401294498382e-05, "loss": 0.0059, "step": 135110 }, { "epoch": 52.47, "learning_rate": 1.3003495145631069e-05, "loss": 0.0094, "step": 135120 }, { "epoch": 52.48, "learning_rate": 1.3002977346278319e-05, "loss": 0.0581, "step": 135130 }, { "epoch": 52.48, "learning_rate": 1.3002459546925568e-05, "loss": 0.0288, "step": 135140 }, { "epoch": 52.49, "learning_rate": 1.3001941747572818e-05, "loss": 0.0712, "step": 135150 }, { "epoch": 52.49, "learning_rate": 1.3001423948220067e-05, "loss": 0.2684, "step": 135160 }, { "epoch": 52.49, "learning_rate": 1.3000906148867315e-05, "loss": 0.1627, "step": 135170 }, { "epoch": 52.5, "learning_rate": 1.3000388349514563e-05, "loss": 0.0744, "step": 135180 }, { "epoch": 52.5, "learning_rate": 1.2999870550161813e-05, "loss": 0.2232, "step": 135190 }, { "epoch": 52.5, "learning_rate": 1.2999352750809062e-05, "loss": 0.0296, "step": 135200 }, { "epoch": 52.51, "learning_rate": 1.2998834951456312e-05, "loss": 0.018, "step": 135210 }, { "epoch": 52.51, "learning_rate": 1.2998317152103562e-05, "loss": 0.1778, "step": 135220 }, { "epoch": 52.52, "learning_rate": 1.2997799352750811e-05, "loss": 0.0768, "step": 135230 }, { "epoch": 52.52, "learning_rate": 1.299728155339806e-05, "loss": 0.0269, "step": 135240 }, { "epoch": 52.52, "learning_rate": 1.2996763754045309e-05, "loss": 0.1571, "step": 135250 }, { "epoch": 52.53, "learning_rate": 1.2996245954692557e-05, "loss": 0.0436, "step": 135260 }, { "epoch": 52.53, "learning_rate": 1.2995728155339806e-05, "loss": 0.1329, "step": 135270 }, { "epoch": 52.54, "learning_rate": 1.2995210355987056e-05, "loss": 0.032, "step": 135280 }, { "epoch": 52.54, "learning_rate": 1.2994692556634306e-05, "loss": 0.0495, "step": 135290 }, { "epoch": 52.54, "learning_rate": 1.2994174757281555e-05, "loss": 0.0663, "step": 135300 }, { "epoch": 52.55, "learning_rate": 1.2993656957928805e-05, "loss": 0.0295, "step": 135310 }, { "epoch": 52.55, "learning_rate": 1.2993139158576053e-05, "loss": 0.0639, "step": 135320 }, { "epoch": 52.56, "learning_rate": 1.2992621359223302e-05, "loss": 0.0107, "step": 135330 }, { "epoch": 52.56, "learning_rate": 1.299210355987055e-05, "loss": 0.057, "step": 135340 }, { "epoch": 52.56, "learning_rate": 1.29915857605178e-05, "loss": 0.1533, "step": 135350 }, { "epoch": 52.57, "learning_rate": 1.299106796116505e-05, "loss": 0.0812, "step": 135360 }, { "epoch": 52.57, "learning_rate": 1.29905501618123e-05, "loss": 0.0565, "step": 135370 }, { "epoch": 52.57, "learning_rate": 1.2990032362459549e-05, "loss": 0.0389, "step": 135380 }, { "epoch": 52.58, "learning_rate": 1.2989514563106797e-05, "loss": 0.1392, "step": 135390 }, { "epoch": 52.58, "learning_rate": 1.2988996763754046e-05, "loss": 0.1665, "step": 135400 }, { "epoch": 52.59, "learning_rate": 1.2988478964401296e-05, "loss": 0.0269, "step": 135410 }, { "epoch": 52.59, "learning_rate": 1.2987961165048544e-05, "loss": 0.062, "step": 135420 }, { "epoch": 52.59, "learning_rate": 1.2987443365695794e-05, "loss": 0.1622, "step": 135430 }, { "epoch": 52.6, "learning_rate": 1.2986925566343043e-05, "loss": 0.071, "step": 135440 }, { "epoch": 52.6, "learning_rate": 1.2986407766990293e-05, "loss": 0.0471, "step": 135450 }, { "epoch": 52.61, "learning_rate": 1.2985889967637542e-05, "loss": 0.0835, "step": 135460 }, { "epoch": 52.61, "learning_rate": 1.298537216828479e-05, "loss": 0.0589, "step": 135470 }, { "epoch": 52.61, "learning_rate": 1.298485436893204e-05, "loss": 0.0195, "step": 135480 }, { "epoch": 52.62, "learning_rate": 1.298433656957929e-05, "loss": 0.0301, "step": 135490 }, { "epoch": 52.62, "learning_rate": 1.2983818770226537e-05, "loss": 0.1107, "step": 135500 }, { "epoch": 52.63, "learning_rate": 1.2983300970873787e-05, "loss": 0.1227, "step": 135510 }, { "epoch": 52.63, "learning_rate": 1.2982783171521037e-05, "loss": 0.0078, "step": 135520 }, { "epoch": 52.63, "learning_rate": 1.2982265372168286e-05, "loss": 0.1769, "step": 135530 }, { "epoch": 52.64, "learning_rate": 1.2981747572815534e-05, "loss": 0.0219, "step": 135540 }, { "epoch": 52.64, "learning_rate": 1.2981229773462784e-05, "loss": 0.1567, "step": 135550 }, { "epoch": 52.64, "learning_rate": 1.2980711974110033e-05, "loss": 0.0388, "step": 135560 }, { "epoch": 52.65, "learning_rate": 1.2980194174757283e-05, "loss": 0.1052, "step": 135570 }, { "epoch": 52.65, "learning_rate": 1.2979676375404531e-05, "loss": 0.0117, "step": 135580 }, { "epoch": 52.66, "learning_rate": 1.297915857605178e-05, "loss": 0.0276, "step": 135590 }, { "epoch": 52.66, "learning_rate": 1.297864077669903e-05, "loss": 0.0919, "step": 135600 }, { "epoch": 52.66, "learning_rate": 1.297812297734628e-05, "loss": 0.176, "step": 135610 }, { "epoch": 52.67, "learning_rate": 1.2977605177993528e-05, "loss": 0.0898, "step": 135620 }, { "epoch": 52.67, "learning_rate": 1.2977087378640777e-05, "loss": 0.0351, "step": 135630 }, { "epoch": 52.68, "learning_rate": 1.2976569579288027e-05, "loss": 0.0745, "step": 135640 }, { "epoch": 52.68, "learning_rate": 1.2976051779935277e-05, "loss": 0.0573, "step": 135650 }, { "epoch": 52.68, "learning_rate": 1.2975533980582525e-05, "loss": 0.1479, "step": 135660 }, { "epoch": 52.69, "learning_rate": 1.2975016181229774e-05, "loss": 0.0567, "step": 135670 }, { "epoch": 52.69, "learning_rate": 1.2974498381877024e-05, "loss": 0.0771, "step": 135680 }, { "epoch": 52.7, "learning_rate": 1.2973980582524273e-05, "loss": 0.0267, "step": 135690 }, { "epoch": 52.7, "learning_rate": 1.2973462783171521e-05, "loss": 0.0445, "step": 135700 }, { "epoch": 52.7, "learning_rate": 1.2972944983818771e-05, "loss": 0.1099, "step": 135710 }, { "epoch": 52.71, "learning_rate": 1.297242718446602e-05, "loss": 0.0609, "step": 135720 }, { "epoch": 52.71, "learning_rate": 1.297190938511327e-05, "loss": 0.1361, "step": 135730 }, { "epoch": 52.71, "learning_rate": 1.297139158576052e-05, "loss": 0.0444, "step": 135740 }, { "epoch": 52.72, "learning_rate": 1.2970873786407768e-05, "loss": 0.075, "step": 135750 }, { "epoch": 52.72, "learning_rate": 1.2970355987055017e-05, "loss": 0.0034, "step": 135760 }, { "epoch": 52.73, "learning_rate": 1.2969838187702265e-05, "loss": 0.0464, "step": 135770 }, { "epoch": 52.73, "learning_rate": 1.2969320388349515e-05, "loss": 0.0674, "step": 135780 }, { "epoch": 52.73, "learning_rate": 1.2968802588996765e-05, "loss": 0.0419, "step": 135790 }, { "epoch": 52.74, "learning_rate": 1.2968284789644014e-05, "loss": 0.0325, "step": 135800 }, { "epoch": 52.74, "learning_rate": 1.2967766990291264e-05, "loss": 0.0758, "step": 135810 }, { "epoch": 52.75, "learning_rate": 1.2967249190938513e-05, "loss": 0.198, "step": 135820 }, { "epoch": 52.75, "learning_rate": 1.2966731391585761e-05, "loss": 0.1668, "step": 135830 }, { "epoch": 52.75, "learning_rate": 1.2966213592233011e-05, "loss": 0.108, "step": 135840 }, { "epoch": 52.76, "learning_rate": 1.2965695792880259e-05, "loss": 0.058, "step": 135850 }, { "epoch": 52.76, "learning_rate": 1.2965177993527508e-05, "loss": 0.1207, "step": 135860 }, { "epoch": 52.77, "learning_rate": 1.2964660194174758e-05, "loss": 0.1166, "step": 135870 }, { "epoch": 52.77, "learning_rate": 1.2964142394822008e-05, "loss": 0.0348, "step": 135880 }, { "epoch": 52.77, "learning_rate": 1.2963624595469257e-05, "loss": 0.0258, "step": 135890 }, { "epoch": 52.78, "learning_rate": 1.2963106796116507e-05, "loss": 0.1234, "step": 135900 }, { "epoch": 52.78, "learning_rate": 1.2962588996763755e-05, "loss": 0.1146, "step": 135910 }, { "epoch": 52.78, "learning_rate": 1.2962071197411003e-05, "loss": 0.0231, "step": 135920 }, { "epoch": 52.79, "learning_rate": 1.2961553398058252e-05, "loss": 0.1982, "step": 135930 }, { "epoch": 52.79, "learning_rate": 1.2961035598705502e-05, "loss": 0.128, "step": 135940 }, { "epoch": 52.8, "learning_rate": 1.2960517799352752e-05, "loss": 0.0755, "step": 135950 }, { "epoch": 52.8, "learning_rate": 1.2960000000000001e-05, "loss": 0.0276, "step": 135960 }, { "epoch": 52.8, "learning_rate": 1.2959482200647251e-05, "loss": 0.1435, "step": 135970 }, { "epoch": 52.81, "learning_rate": 1.29589644012945e-05, "loss": 0.1442, "step": 135980 }, { "epoch": 52.81, "learning_rate": 1.2958446601941748e-05, "loss": 0.2039, "step": 135990 }, { "epoch": 52.82, "learning_rate": 1.2957928802588996e-05, "loss": 0.1069, "step": 136000 }, { "epoch": 52.82, "learning_rate": 1.2957411003236246e-05, "loss": 0.1514, "step": 136010 }, { "epoch": 52.82, "learning_rate": 1.2956893203883496e-05, "loss": 0.1401, "step": 136020 }, { "epoch": 52.83, "learning_rate": 1.2956375404530745e-05, "loss": 0.0493, "step": 136030 }, { "epoch": 52.83, "learning_rate": 1.2955857605177995e-05, "loss": 0.0407, "step": 136040 }, { "epoch": 52.83, "learning_rate": 1.2955339805825244e-05, "loss": 0.0254, "step": 136050 }, { "epoch": 52.84, "learning_rate": 1.2954822006472494e-05, "loss": 0.0193, "step": 136060 }, { "epoch": 52.84, "learning_rate": 1.295430420711974e-05, "loss": 0.0551, "step": 136070 }, { "epoch": 52.85, "learning_rate": 1.295378640776699e-05, "loss": 0.1208, "step": 136080 }, { "epoch": 52.85, "learning_rate": 1.295326860841424e-05, "loss": 0.0861, "step": 136090 }, { "epoch": 52.85, "learning_rate": 1.295275080906149e-05, "loss": 0.1217, "step": 136100 }, { "epoch": 52.86, "learning_rate": 1.2952233009708739e-05, "loss": 0.209, "step": 136110 }, { "epoch": 52.86, "learning_rate": 1.2951715210355988e-05, "loss": 0.1053, "step": 136120 }, { "epoch": 52.87, "learning_rate": 1.2951197411003238e-05, "loss": 0.0066, "step": 136130 }, { "epoch": 52.87, "learning_rate": 1.2950679611650488e-05, "loss": 0.0042, "step": 136140 }, { "epoch": 52.87, "learning_rate": 1.2950161812297734e-05, "loss": 0.0414, "step": 136150 }, { "epoch": 52.88, "learning_rate": 1.2949644012944983e-05, "loss": 0.0471, "step": 136160 }, { "epoch": 52.88, "learning_rate": 1.2949126213592233e-05, "loss": 0.0758, "step": 136170 }, { "epoch": 52.89, "learning_rate": 1.2948608414239483e-05, "loss": 0.0038, "step": 136180 }, { "epoch": 52.89, "learning_rate": 1.2948090614886732e-05, "loss": 0.0928, "step": 136190 }, { "epoch": 52.89, "learning_rate": 1.2947572815533982e-05, "loss": 0.1305, "step": 136200 }, { "epoch": 52.9, "learning_rate": 1.2947055016181232e-05, "loss": 0.115, "step": 136210 }, { "epoch": 52.9, "learning_rate": 1.2946537216828481e-05, "loss": 0.2102, "step": 136220 }, { "epoch": 52.9, "learning_rate": 1.294601941747573e-05, "loss": 0.0857, "step": 136230 }, { "epoch": 52.91, "learning_rate": 1.2945501618122977e-05, "loss": 0.0118, "step": 136240 }, { "epoch": 52.91, "learning_rate": 1.2944983818770227e-05, "loss": 0.0741, "step": 136250 }, { "epoch": 52.92, "learning_rate": 1.2944466019417476e-05, "loss": 0.0125, "step": 136260 }, { "epoch": 52.92, "learning_rate": 1.2943948220064726e-05, "loss": 0.0862, "step": 136270 }, { "epoch": 52.92, "learning_rate": 1.2943430420711976e-05, "loss": 0.0965, "step": 136280 }, { "epoch": 52.93, "learning_rate": 1.2942912621359225e-05, "loss": 0.0906, "step": 136290 }, { "epoch": 52.93, "learning_rate": 1.2942394822006475e-05, "loss": 0.0609, "step": 136300 }, { "epoch": 52.94, "learning_rate": 1.2941877022653724e-05, "loss": 0.1302, "step": 136310 }, { "epoch": 52.94, "learning_rate": 1.294135922330097e-05, "loss": 0.0822, "step": 136320 }, { "epoch": 52.94, "learning_rate": 1.294084142394822e-05, "loss": 0.0235, "step": 136330 }, { "epoch": 52.95, "learning_rate": 1.294032362459547e-05, "loss": 0.0097, "step": 136340 }, { "epoch": 52.95, "learning_rate": 1.293980582524272e-05, "loss": 0.1538, "step": 136350 }, { "epoch": 52.96, "learning_rate": 1.2939288025889969e-05, "loss": 0.1384, "step": 136360 }, { "epoch": 52.96, "learning_rate": 1.2938770226537219e-05, "loss": 0.1116, "step": 136370 }, { "epoch": 52.96, "learning_rate": 1.2938252427184468e-05, "loss": 0.0358, "step": 136380 }, { "epoch": 52.97, "learning_rate": 1.2937734627831718e-05, "loss": 0.211, "step": 136390 }, { "epoch": 52.97, "learning_rate": 1.2937216828478964e-05, "loss": 0.071, "step": 136400 }, { "epoch": 52.97, "learning_rate": 1.2936699029126214e-05, "loss": 0.048, "step": 136410 }, { "epoch": 52.98, "learning_rate": 1.2936181229773463e-05, "loss": 0.1028, "step": 136420 }, { "epoch": 52.98, "learning_rate": 1.2935663430420713e-05, "loss": 0.1078, "step": 136430 }, { "epoch": 52.99, "learning_rate": 1.2935145631067963e-05, "loss": 0.0413, "step": 136440 }, { "epoch": 52.99, "learning_rate": 1.2934627831715212e-05, "loss": 0.1997, "step": 136450 }, { "epoch": 52.99, "learning_rate": 1.2934110032362462e-05, "loss": 0.0489, "step": 136460 }, { "epoch": 53.0, "learning_rate": 1.2933592233009711e-05, "loss": 0.0527, "step": 136470 }, { "epoch": 53.0, "eval_accuracy": 0.9477303988995873, "eval_loss": 0.3252100348472595, "eval_runtime": 8.3172, "eval_samples_per_second": 437.044, "eval_steps_per_second": 54.706, "step": 136475 }, { "epoch": 53.0, "learning_rate": 1.2933074433656958e-05, "loss": 0.0636, "step": 136480 }, { "epoch": 53.01, "learning_rate": 1.2932556634304207e-05, "loss": 0.0243, "step": 136490 }, { "epoch": 53.01, "learning_rate": 1.2932038834951457e-05, "loss": 0.0908, "step": 136500 }, { "epoch": 53.01, "learning_rate": 1.2931521035598707e-05, "loss": 0.0429, "step": 136510 }, { "epoch": 53.02, "learning_rate": 1.2931003236245956e-05, "loss": 0.2102, "step": 136520 }, { "epoch": 53.02, "learning_rate": 1.2930485436893206e-05, "loss": 0.0568, "step": 136530 }, { "epoch": 53.03, "learning_rate": 1.2929967637540455e-05, "loss": 0.0271, "step": 136540 }, { "epoch": 53.03, "learning_rate": 1.2929449838187705e-05, "loss": 0.0194, "step": 136550 }, { "epoch": 53.03, "learning_rate": 1.2928932038834951e-05, "loss": 0.072, "step": 136560 }, { "epoch": 53.04, "learning_rate": 1.2928414239482201e-05, "loss": 0.155, "step": 136570 }, { "epoch": 53.04, "learning_rate": 1.292789644012945e-05, "loss": 0.0545, "step": 136580 }, { "epoch": 53.04, "learning_rate": 1.29273786407767e-05, "loss": 0.075, "step": 136590 }, { "epoch": 53.05, "learning_rate": 1.292686084142395e-05, "loss": 0.1236, "step": 136600 }, { "epoch": 53.05, "learning_rate": 1.29263430420712e-05, "loss": 0.066, "step": 136610 }, { "epoch": 53.06, "learning_rate": 1.2925825242718449e-05, "loss": 0.066, "step": 136620 }, { "epoch": 53.06, "learning_rate": 1.2925307443365699e-05, "loss": 0.0057, "step": 136630 }, { "epoch": 53.06, "learning_rate": 1.2924789644012945e-05, "loss": 0.0389, "step": 136640 }, { "epoch": 53.07, "learning_rate": 1.2924271844660194e-05, "loss": 0.0229, "step": 136650 }, { "epoch": 53.07, "learning_rate": 1.2923754045307444e-05, "loss": 0.0346, "step": 136660 }, { "epoch": 53.08, "learning_rate": 1.2923236245954694e-05, "loss": 0.112, "step": 136670 }, { "epoch": 53.08, "learning_rate": 1.2922718446601943e-05, "loss": 0.0854, "step": 136680 }, { "epoch": 53.08, "learning_rate": 1.2922200647249193e-05, "loss": 0.0109, "step": 136690 }, { "epoch": 53.09, "learning_rate": 1.2921682847896443e-05, "loss": 0.0381, "step": 136700 }, { "epoch": 53.09, "learning_rate": 1.292116504854369e-05, "loss": 0.2415, "step": 136710 }, { "epoch": 53.1, "learning_rate": 1.2920647249190938e-05, "loss": 0.0098, "step": 136720 }, { "epoch": 53.1, "learning_rate": 1.2920129449838188e-05, "loss": 0.0601, "step": 136730 }, { "epoch": 53.1, "learning_rate": 1.2919611650485438e-05, "loss": 0.0733, "step": 136740 }, { "epoch": 53.11, "learning_rate": 1.2919093851132687e-05, "loss": 0.0714, "step": 136750 }, { "epoch": 53.11, "learning_rate": 1.2918576051779937e-05, "loss": 0.0189, "step": 136760 }, { "epoch": 53.11, "learning_rate": 1.2918058252427186e-05, "loss": 0.1456, "step": 136770 }, { "epoch": 53.12, "learning_rate": 1.2917540453074436e-05, "loss": 0.058, "step": 136780 }, { "epoch": 53.12, "learning_rate": 1.2917022653721684e-05, "loss": 0.0703, "step": 136790 }, { "epoch": 53.13, "learning_rate": 1.2916504854368934e-05, "loss": 0.0353, "step": 136800 }, { "epoch": 53.13, "learning_rate": 1.2915987055016182e-05, "loss": 0.0254, "step": 136810 }, { "epoch": 53.13, "learning_rate": 1.2915469255663431e-05, "loss": 0.0158, "step": 136820 }, { "epoch": 53.14, "learning_rate": 1.291495145631068e-05, "loss": 0.0568, "step": 136830 }, { "epoch": 53.14, "learning_rate": 1.291443365695793e-05, "loss": 0.1673, "step": 136840 }, { "epoch": 53.15, "learning_rate": 1.291391585760518e-05, "loss": 0.0253, "step": 136850 }, { "epoch": 53.15, "learning_rate": 1.2913398058252428e-05, "loss": 0.1077, "step": 136860 }, { "epoch": 53.15, "learning_rate": 1.2912880258899678e-05, "loss": 0.0519, "step": 136870 }, { "epoch": 53.16, "learning_rate": 1.2912362459546927e-05, "loss": 0.1228, "step": 136880 }, { "epoch": 53.16, "learning_rate": 1.2911844660194175e-05, "loss": 0.0619, "step": 136890 }, { "epoch": 53.17, "learning_rate": 1.2911326860841425e-05, "loss": 0.0405, "step": 136900 }, { "epoch": 53.17, "learning_rate": 1.2910809061488674e-05, "loss": 0.0855, "step": 136910 }, { "epoch": 53.17, "learning_rate": 1.2910291262135924e-05, "loss": 0.1281, "step": 136920 }, { "epoch": 53.18, "learning_rate": 1.2909773462783174e-05, "loss": 0.0456, "step": 136930 }, { "epoch": 53.18, "learning_rate": 1.2909255663430421e-05, "loss": 0.0005, "step": 136940 }, { "epoch": 53.18, "learning_rate": 1.2908737864077671e-05, "loss": 0.0208, "step": 136950 }, { "epoch": 53.19, "learning_rate": 1.290822006472492e-05, "loss": 0.0622, "step": 136960 }, { "epoch": 53.19, "learning_rate": 1.2907702265372169e-05, "loss": 0.0312, "step": 136970 }, { "epoch": 53.2, "learning_rate": 1.2907184466019418e-05, "loss": 0.0224, "step": 136980 }, { "epoch": 53.2, "learning_rate": 1.2906666666666668e-05, "loss": 0.075, "step": 136990 }, { "epoch": 53.2, "learning_rate": 1.2906148867313918e-05, "loss": 0.0815, "step": 137000 }, { "epoch": 53.21, "learning_rate": 1.2905631067961165e-05, "loss": 0.1268, "step": 137010 }, { "epoch": 53.21, "learning_rate": 1.2905113268608415e-05, "loss": 0.0266, "step": 137020 }, { "epoch": 53.22, "learning_rate": 1.2904595469255665e-05, "loss": 0.066, "step": 137030 }, { "epoch": 53.22, "learning_rate": 1.2904077669902914e-05, "loss": 0.0169, "step": 137040 }, { "epoch": 53.22, "learning_rate": 1.2903559870550162e-05, "loss": 0.0717, "step": 137050 }, { "epoch": 53.23, "learning_rate": 1.2903042071197412e-05, "loss": 0.1607, "step": 137060 }, { "epoch": 53.23, "learning_rate": 1.2902524271844661e-05, "loss": 0.2586, "step": 137070 }, { "epoch": 53.23, "learning_rate": 1.2902006472491911e-05, "loss": 0.043, "step": 137080 }, { "epoch": 53.24, "learning_rate": 1.2901488673139159e-05, "loss": 0.1147, "step": 137090 }, { "epoch": 53.24, "learning_rate": 1.2900970873786409e-05, "loss": 0.0608, "step": 137100 }, { "epoch": 53.25, "learning_rate": 1.2900453074433658e-05, "loss": 0.2575, "step": 137110 }, { "epoch": 53.25, "learning_rate": 1.2899935275080908e-05, "loss": 0.0694, "step": 137120 }, { "epoch": 53.25, "learning_rate": 1.2899417475728156e-05, "loss": 0.0396, "step": 137130 }, { "epoch": 53.26, "learning_rate": 1.2898899676375405e-05, "loss": 0.1162, "step": 137140 }, { "epoch": 53.26, "learning_rate": 1.2898381877022655e-05, "loss": 0.1035, "step": 137150 }, { "epoch": 53.27, "learning_rate": 1.2897864077669905e-05, "loss": 0.2275, "step": 137160 }, { "epoch": 53.27, "learning_rate": 1.2897346278317153e-05, "loss": 0.0011, "step": 137170 }, { "epoch": 53.27, "learning_rate": 1.2896828478964402e-05, "loss": 0.0429, "step": 137180 }, { "epoch": 53.28, "learning_rate": 1.2896310679611652e-05, "loss": 0.0545, "step": 137190 }, { "epoch": 53.28, "learning_rate": 1.2895792880258901e-05, "loss": 0.1625, "step": 137200 }, { "epoch": 53.29, "learning_rate": 1.289527508090615e-05, "loss": 0.1022, "step": 137210 }, { "epoch": 53.29, "learning_rate": 1.2894757281553399e-05, "loss": 0.0015, "step": 137220 }, { "epoch": 53.29, "learning_rate": 1.2894239482200649e-05, "loss": 0.0211, "step": 137230 }, { "epoch": 53.3, "learning_rate": 1.2893721682847896e-05, "loss": 0.0294, "step": 137240 }, { "epoch": 53.3, "learning_rate": 1.2893203883495146e-05, "loss": 0.0945, "step": 137250 }, { "epoch": 53.3, "learning_rate": 1.2892686084142396e-05, "loss": 0.288, "step": 137260 }, { "epoch": 53.31, "learning_rate": 1.2892168284789645e-05, "loss": 0.1213, "step": 137270 }, { "epoch": 53.31, "learning_rate": 1.2891650485436895e-05, "loss": 0.0177, "step": 137280 }, { "epoch": 53.32, "learning_rate": 1.2891132686084143e-05, "loss": 0.0422, "step": 137290 }, { "epoch": 53.32, "learning_rate": 1.2890614886731392e-05, "loss": 0.1525, "step": 137300 }, { "epoch": 53.32, "learning_rate": 1.2890097087378642e-05, "loss": 0.1499, "step": 137310 }, { "epoch": 53.33, "learning_rate": 1.288957928802589e-05, "loss": 0.0256, "step": 137320 }, { "epoch": 53.33, "learning_rate": 1.288906148867314e-05, "loss": 0.1243, "step": 137330 }, { "epoch": 53.34, "learning_rate": 1.288854368932039e-05, "loss": 0.0513, "step": 137340 }, { "epoch": 53.34, "learning_rate": 1.2888025889967639e-05, "loss": 0.0813, "step": 137350 }, { "epoch": 53.34, "learning_rate": 1.2887508090614889e-05, "loss": 0.013, "step": 137360 }, { "epoch": 53.35, "learning_rate": 1.2886990291262138e-05, "loss": 0.1021, "step": 137370 }, { "epoch": 53.35, "learning_rate": 1.2886472491909386e-05, "loss": 0.1033, "step": 137380 }, { "epoch": 53.36, "learning_rate": 1.2885954692556634e-05, "loss": 0.0038, "step": 137390 }, { "epoch": 53.36, "learning_rate": 1.2885436893203884e-05, "loss": 0.0751, "step": 137400 }, { "epoch": 53.36, "learning_rate": 1.2884919093851133e-05, "loss": 0.0566, "step": 137410 }, { "epoch": 53.37, "learning_rate": 1.2884401294498383e-05, "loss": 0.0543, "step": 137420 }, { "epoch": 53.37, "learning_rate": 1.2883883495145632e-05, "loss": 0.0441, "step": 137430 }, { "epoch": 53.37, "learning_rate": 1.2883365695792882e-05, "loss": 0.1596, "step": 137440 }, { "epoch": 53.38, "learning_rate": 1.2882847896440132e-05, "loss": 0.0862, "step": 137450 }, { "epoch": 53.38, "learning_rate": 1.288233009708738e-05, "loss": 0.0169, "step": 137460 }, { "epoch": 53.39, "learning_rate": 1.2881812297734628e-05, "loss": 0.0776, "step": 137470 }, { "epoch": 53.39, "learning_rate": 1.2881294498381877e-05, "loss": 0.0004, "step": 137480 }, { "epoch": 53.39, "learning_rate": 1.2880776699029127e-05, "loss": 0.0188, "step": 137490 }, { "epoch": 53.4, "learning_rate": 1.2880258899676376e-05, "loss": 0.0332, "step": 137500 }, { "epoch": 53.4, "learning_rate": 1.2879741100323626e-05, "loss": 0.0672, "step": 137510 }, { "epoch": 53.41, "learning_rate": 1.2879223300970876e-05, "loss": 0.2091, "step": 137520 }, { "epoch": 53.41, "learning_rate": 1.2878705501618125e-05, "loss": 0.0898, "step": 137530 }, { "epoch": 53.41, "learning_rate": 1.2878187702265371e-05, "loss": 0.0942, "step": 137540 }, { "epoch": 53.42, "learning_rate": 1.2877669902912621e-05, "loss": 0.0846, "step": 137550 }, { "epoch": 53.42, "learning_rate": 1.287715210355987e-05, "loss": 0.0284, "step": 137560 }, { "epoch": 53.43, "learning_rate": 1.287663430420712e-05, "loss": 0.0101, "step": 137570 }, { "epoch": 53.43, "learning_rate": 1.287611650485437e-05, "loss": 0.1005, "step": 137580 }, { "epoch": 53.43, "learning_rate": 1.287559870550162e-05, "loss": 0.2141, "step": 137590 }, { "epoch": 53.44, "learning_rate": 1.287508090614887e-05, "loss": 0.0702, "step": 137600 }, { "epoch": 53.44, "learning_rate": 1.2874563106796119e-05, "loss": 0.0529, "step": 137610 }, { "epoch": 53.44, "learning_rate": 1.2874045307443365e-05, "loss": 0.0543, "step": 137620 }, { "epoch": 53.45, "learning_rate": 1.2873527508090615e-05, "loss": 0.014, "step": 137630 }, { "epoch": 53.45, "learning_rate": 1.2873009708737864e-05, "loss": 0.011, "step": 137640 }, { "epoch": 53.46, "learning_rate": 1.2872491909385114e-05, "loss": 0.0161, "step": 137650 }, { "epoch": 53.46, "learning_rate": 1.2871974110032363e-05, "loss": 0.0159, "step": 137660 }, { "epoch": 53.46, "learning_rate": 1.2871456310679613e-05, "loss": 0.2819, "step": 137670 }, { "epoch": 53.47, "learning_rate": 1.2870938511326863e-05, "loss": 0.1407, "step": 137680 }, { "epoch": 53.47, "learning_rate": 1.2870420711974112e-05, "loss": 0.207, "step": 137690 }, { "epoch": 53.48, "learning_rate": 1.2869902912621359e-05, "loss": 0.0357, "step": 137700 }, { "epoch": 53.48, "learning_rate": 1.2869385113268608e-05, "loss": 0.1306, "step": 137710 }, { "epoch": 53.48, "learning_rate": 1.2868867313915858e-05, "loss": 0.0915, "step": 137720 }, { "epoch": 53.49, "learning_rate": 1.2868349514563107e-05, "loss": 0.0052, "step": 137730 }, { "epoch": 53.49, "learning_rate": 1.2867831715210357e-05, "loss": 0.0564, "step": 137740 }, { "epoch": 53.5, "learning_rate": 1.2867313915857607e-05, "loss": 0.1943, "step": 137750 }, { "epoch": 53.5, "learning_rate": 1.2866796116504856e-05, "loss": 0.0716, "step": 137760 }, { "epoch": 53.5, "learning_rate": 1.2866278317152106e-05, "loss": 0.0589, "step": 137770 }, { "epoch": 53.51, "learning_rate": 1.2865760517799352e-05, "loss": 0.1383, "step": 137780 }, { "epoch": 53.51, "learning_rate": 1.2865242718446602e-05, "loss": 0.1479, "step": 137790 }, { "epoch": 53.51, "learning_rate": 1.2864724919093851e-05, "loss": 0.0332, "step": 137800 }, { "epoch": 53.52, "learning_rate": 1.2864207119741101e-05, "loss": 0.024, "step": 137810 }, { "epoch": 53.52, "learning_rate": 1.286368932038835e-05, "loss": 0.1534, "step": 137820 }, { "epoch": 53.53, "learning_rate": 1.28631715210356e-05, "loss": 0.1334, "step": 137830 }, { "epoch": 53.53, "learning_rate": 1.286265372168285e-05, "loss": 0.0825, "step": 137840 }, { "epoch": 53.53, "learning_rate": 1.28621359223301e-05, "loss": 0.0999, "step": 137850 }, { "epoch": 53.54, "learning_rate": 1.2861618122977346e-05, "loss": 0.1139, "step": 137860 }, { "epoch": 53.54, "learning_rate": 1.2861100323624595e-05, "loss": 0.3255, "step": 137870 }, { "epoch": 53.55, "learning_rate": 1.2860582524271845e-05, "loss": 0.1478, "step": 137880 }, { "epoch": 53.55, "learning_rate": 1.2860064724919095e-05, "loss": 0.0392, "step": 137890 }, { "epoch": 53.55, "learning_rate": 1.2859546925566344e-05, "loss": 0.0998, "step": 137900 }, { "epoch": 53.56, "learning_rate": 1.2859029126213594e-05, "loss": 0.0295, "step": 137910 }, { "epoch": 53.56, "learning_rate": 1.2858511326860843e-05, "loss": 0.1597, "step": 137920 }, { "epoch": 53.57, "learning_rate": 1.2857993527508093e-05, "loss": 0.0248, "step": 137930 }, { "epoch": 53.57, "learning_rate": 1.2857475728155343e-05, "loss": 0.0317, "step": 137940 }, { "epoch": 53.57, "learning_rate": 1.2856957928802589e-05, "loss": 0.0681, "step": 137950 }, { "epoch": 53.58, "learning_rate": 1.2856440129449838e-05, "loss": 0.0184, "step": 137960 }, { "epoch": 53.58, "learning_rate": 1.2855922330097088e-05, "loss": 0.0427, "step": 137970 }, { "epoch": 53.58, "learning_rate": 1.2855404530744338e-05, "loss": 0.1294, "step": 137980 }, { "epoch": 53.59, "learning_rate": 1.2854886731391587e-05, "loss": 0.1119, "step": 137990 }, { "epoch": 53.59, "learning_rate": 1.2854368932038837e-05, "loss": 0.0793, "step": 138000 }, { "epoch": 53.6, "learning_rate": 1.2853851132686087e-05, "loss": 0.0185, "step": 138010 }, { "epoch": 53.6, "learning_rate": 1.2853333333333336e-05, "loss": 0.037, "step": 138020 }, { "epoch": 53.6, "learning_rate": 1.2852815533980582e-05, "loss": 0.2302, "step": 138030 }, { "epoch": 53.61, "learning_rate": 1.2852297734627832e-05, "loss": 0.0784, "step": 138040 }, { "epoch": 53.61, "learning_rate": 1.2851779935275082e-05, "loss": 0.0632, "step": 138050 }, { "epoch": 53.62, "learning_rate": 1.2851262135922331e-05, "loss": 0.0689, "step": 138060 }, { "epoch": 53.62, "learning_rate": 1.2850744336569581e-05, "loss": 0.1287, "step": 138070 }, { "epoch": 53.62, "learning_rate": 1.285022653721683e-05, "loss": 0.0173, "step": 138080 }, { "epoch": 53.63, "learning_rate": 1.284970873786408e-05, "loss": 0.0499, "step": 138090 }, { "epoch": 53.63, "learning_rate": 1.284919093851133e-05, "loss": 0.1056, "step": 138100 }, { "epoch": 53.63, "learning_rate": 1.2848673139158576e-05, "loss": 0.0581, "step": 138110 }, { "epoch": 53.64, "learning_rate": 1.2848155339805826e-05, "loss": 0.0633, "step": 138120 }, { "epoch": 53.64, "learning_rate": 1.2847637540453075e-05, "loss": 0.0296, "step": 138130 }, { "epoch": 53.65, "learning_rate": 1.2847119741100325e-05, "loss": 0.2311, "step": 138140 }, { "epoch": 53.65, "learning_rate": 1.2846601941747574e-05, "loss": 0.0488, "step": 138150 }, { "epoch": 53.65, "learning_rate": 1.2846084142394824e-05, "loss": 0.0266, "step": 138160 }, { "epoch": 53.66, "learning_rate": 1.2845566343042074e-05, "loss": 0.0371, "step": 138170 }, { "epoch": 53.66, "learning_rate": 1.2845048543689322e-05, "loss": 0.1079, "step": 138180 }, { "epoch": 53.67, "learning_rate": 1.284453074433657e-05, "loss": 0.0381, "step": 138190 }, { "epoch": 53.67, "learning_rate": 1.284401294498382e-05, "loss": 0.0437, "step": 138200 }, { "epoch": 53.67, "learning_rate": 1.2843495145631069e-05, "loss": 0.1006, "step": 138210 }, { "epoch": 53.68, "learning_rate": 1.2842977346278318e-05, "loss": 0.0646, "step": 138220 }, { "epoch": 53.68, "learning_rate": 1.2842459546925568e-05, "loss": 0.1939, "step": 138230 }, { "epoch": 53.69, "learning_rate": 1.2841941747572818e-05, "loss": 0.2113, "step": 138240 }, { "epoch": 53.69, "learning_rate": 1.2841423948220067e-05, "loss": 0.0041, "step": 138250 }, { "epoch": 53.69, "learning_rate": 1.2840906148867315e-05, "loss": 0.0953, "step": 138260 }, { "epoch": 53.7, "learning_rate": 1.2840388349514563e-05, "loss": 0.085, "step": 138270 }, { "epoch": 53.7, "learning_rate": 1.2839870550161813e-05, "loss": 0.133, "step": 138280 }, { "epoch": 53.7, "learning_rate": 1.2839352750809062e-05, "loss": 0.1631, "step": 138290 }, { "epoch": 53.71, "learning_rate": 1.2838834951456312e-05, "loss": 0.1075, "step": 138300 }, { "epoch": 53.71, "learning_rate": 1.2838317152103562e-05, "loss": 0.0586, "step": 138310 }, { "epoch": 53.72, "learning_rate": 1.2837799352750811e-05, "loss": 0.1057, "step": 138320 }, { "epoch": 53.72, "learning_rate": 1.2837281553398059e-05, "loss": 0.0934, "step": 138330 }, { "epoch": 53.72, "learning_rate": 1.2836763754045309e-05, "loss": 0.1591, "step": 138340 }, { "epoch": 53.73, "learning_rate": 1.2836245954692557e-05, "loss": 0.0974, "step": 138350 }, { "epoch": 53.73, "learning_rate": 1.2835728155339806e-05, "loss": 0.0527, "step": 138360 }, { "epoch": 53.74, "learning_rate": 1.2835210355987056e-05, "loss": 0.0047, "step": 138370 }, { "epoch": 53.74, "learning_rate": 1.2834692556634306e-05, "loss": 0.1886, "step": 138380 }, { "epoch": 53.74, "learning_rate": 1.2834174757281555e-05, "loss": 0.0817, "step": 138390 }, { "epoch": 53.75, "learning_rate": 1.2833656957928805e-05, "loss": 0.0402, "step": 138400 }, { "epoch": 53.75, "learning_rate": 1.2833139158576053e-05, "loss": 0.0728, "step": 138410 }, { "epoch": 53.76, "learning_rate": 1.2832621359223302e-05, "loss": 0.0454, "step": 138420 }, { "epoch": 53.76, "learning_rate": 1.283210355987055e-05, "loss": 0.1984, "step": 138430 }, { "epoch": 53.76, "learning_rate": 1.28315857605178e-05, "loss": 0.112, "step": 138440 }, { "epoch": 53.77, "learning_rate": 1.283106796116505e-05, "loss": 0.0947, "step": 138450 }, { "epoch": 53.77, "learning_rate": 1.2830550161812299e-05, "loss": 0.2117, "step": 138460 }, { "epoch": 53.77, "learning_rate": 1.2830032362459549e-05, "loss": 0.0085, "step": 138470 }, { "epoch": 53.78, "learning_rate": 1.2829514563106797e-05, "loss": 0.1602, "step": 138480 }, { "epoch": 53.78, "learning_rate": 1.2828996763754046e-05, "loss": 0.0759, "step": 138490 }, { "epoch": 53.79, "learning_rate": 1.2828478964401296e-05, "loss": 0.0106, "step": 138500 }, { "epoch": 53.79, "learning_rate": 1.2827961165048545e-05, "loss": 0.0638, "step": 138510 }, { "epoch": 53.79, "learning_rate": 1.2827443365695793e-05, "loss": 0.0874, "step": 138520 }, { "epoch": 53.8, "learning_rate": 1.2826925566343043e-05, "loss": 0.1208, "step": 138530 }, { "epoch": 53.8, "learning_rate": 1.2826407766990293e-05, "loss": 0.038, "step": 138540 }, { "epoch": 53.81, "learning_rate": 1.2825889967637542e-05, "loss": 0.2173, "step": 138550 }, { "epoch": 53.81, "learning_rate": 1.282537216828479e-05, "loss": 0.1583, "step": 138560 }, { "epoch": 53.81, "learning_rate": 1.282485436893204e-05, "loss": 0.0147, "step": 138570 }, { "epoch": 53.82, "learning_rate": 1.282433656957929e-05, "loss": 0.0653, "step": 138580 }, { "epoch": 53.82, "learning_rate": 1.2823818770226539e-05, "loss": 0.0883, "step": 138590 }, { "epoch": 53.83, "learning_rate": 1.2823300970873787e-05, "loss": 0.0655, "step": 138600 }, { "epoch": 53.83, "learning_rate": 1.2822783171521037e-05, "loss": 0.0746, "step": 138610 }, { "epoch": 53.83, "learning_rate": 1.2822265372168286e-05, "loss": 0.0538, "step": 138620 }, { "epoch": 53.84, "learning_rate": 1.2821747572815536e-05, "loss": 0.0221, "step": 138630 }, { "epoch": 53.84, "learning_rate": 1.2821229773462784e-05, "loss": 0.0628, "step": 138640 }, { "epoch": 53.84, "learning_rate": 1.2820711974110033e-05, "loss": 0.1168, "step": 138650 }, { "epoch": 53.85, "learning_rate": 1.2820194174757283e-05, "loss": 0.0828, "step": 138660 }, { "epoch": 53.85, "learning_rate": 1.2819676375404533e-05, "loss": 0.2454, "step": 138670 }, { "epoch": 53.86, "learning_rate": 1.281915857605178e-05, "loss": 0.013, "step": 138680 }, { "epoch": 53.86, "learning_rate": 1.281864077669903e-05, "loss": 0.1341, "step": 138690 }, { "epoch": 53.86, "learning_rate": 1.281812297734628e-05, "loss": 0.1506, "step": 138700 }, { "epoch": 53.87, "learning_rate": 1.2817605177993528e-05, "loss": 0.0043, "step": 138710 }, { "epoch": 53.87, "learning_rate": 1.2817087378640777e-05, "loss": 0.0026, "step": 138720 }, { "epoch": 53.88, "learning_rate": 1.2816569579288027e-05, "loss": 0.1118, "step": 138730 }, { "epoch": 53.88, "learning_rate": 1.2816051779935277e-05, "loss": 0.1669, "step": 138740 }, { "epoch": 53.88, "learning_rate": 1.2815533980582526e-05, "loss": 0.0814, "step": 138750 }, { "epoch": 53.89, "learning_rate": 1.2815016181229774e-05, "loss": 0.1544, "step": 138760 }, { "epoch": 53.89, "learning_rate": 1.2814498381877024e-05, "loss": 0.0078, "step": 138770 }, { "epoch": 53.9, "learning_rate": 1.2813980582524273e-05, "loss": 0.0608, "step": 138780 }, { "epoch": 53.9, "learning_rate": 1.2813462783171521e-05, "loss": 0.1009, "step": 138790 }, { "epoch": 53.9, "learning_rate": 1.281294498381877e-05, "loss": 0.1198, "step": 138800 }, { "epoch": 53.91, "learning_rate": 1.281242718446602e-05, "loss": 0.058, "step": 138810 }, { "epoch": 53.91, "learning_rate": 1.281190938511327e-05, "loss": 0.0128, "step": 138820 }, { "epoch": 53.91, "learning_rate": 1.281139158576052e-05, "loss": 0.0508, "step": 138830 }, { "epoch": 53.92, "learning_rate": 1.2810873786407768e-05, "loss": 0.026, "step": 138840 }, { "epoch": 53.92, "learning_rate": 1.2810355987055017e-05, "loss": 0.1573, "step": 138850 }, { "epoch": 53.93, "learning_rate": 1.2809838187702265e-05, "loss": 0.0369, "step": 138860 }, { "epoch": 53.93, "learning_rate": 1.2809320388349515e-05, "loss": 0.0576, "step": 138870 }, { "epoch": 53.93, "learning_rate": 1.2808802588996764e-05, "loss": 0.0335, "step": 138880 }, { "epoch": 53.94, "learning_rate": 1.2808284789644014e-05, "loss": 0.101, "step": 138890 }, { "epoch": 53.94, "learning_rate": 1.2807766990291264e-05, "loss": 0.1526, "step": 138900 }, { "epoch": 53.95, "learning_rate": 1.2807249190938513e-05, "loss": 0.1037, "step": 138910 }, { "epoch": 53.95, "learning_rate": 1.2806731391585761e-05, "loss": 0.1148, "step": 138920 }, { "epoch": 53.95, "learning_rate": 1.280621359223301e-05, "loss": 0.1128, "step": 138930 }, { "epoch": 53.96, "learning_rate": 1.2805695792880259e-05, "loss": 0.0868, "step": 138940 }, { "epoch": 53.96, "learning_rate": 1.2805177993527508e-05, "loss": 0.0383, "step": 138950 }, { "epoch": 53.97, "learning_rate": 1.2804660194174758e-05, "loss": 0.0125, "step": 138960 }, { "epoch": 53.97, "learning_rate": 1.2804142394822008e-05, "loss": 0.1462, "step": 138970 }, { "epoch": 53.97, "learning_rate": 1.2803624595469257e-05, "loss": 0.1112, "step": 138980 }, { "epoch": 53.98, "learning_rate": 1.2803106796116507e-05, "loss": 0.0183, "step": 138990 }, { "epoch": 53.98, "learning_rate": 1.2802588996763755e-05, "loss": 0.0661, "step": 139000 }, { "epoch": 53.98, "learning_rate": 1.2802071197411003e-05, "loss": 0.0343, "step": 139010 }, { "epoch": 53.99, "learning_rate": 1.2801553398058252e-05, "loss": 0.1815, "step": 139020 }, { "epoch": 53.99, "learning_rate": 1.2801035598705502e-05, "loss": 0.0294, "step": 139030 }, { "epoch": 54.0, "learning_rate": 1.2800517799352751e-05, "loss": 0.0196, "step": 139040 }, { "epoch": 54.0, "learning_rate": 1.2800000000000001e-05, "loss": 0.0584, "step": 139050 }, { "epoch": 54.0, "eval_accuracy": 0.951031636863824, "eval_loss": 0.319997102022171, "eval_runtime": 8.2173, "eval_samples_per_second": 442.361, "eval_steps_per_second": 55.371, "step": 139050 }, { "epoch": 54.0, "learning_rate": 1.279948220064725e-05, "loss": 0.0565, "step": 139060 }, { "epoch": 54.01, "learning_rate": 1.27989644012945e-05, "loss": 0.0385, "step": 139070 }, { "epoch": 54.01, "learning_rate": 1.279844660194175e-05, "loss": 0.0364, "step": 139080 }, { "epoch": 54.02, "learning_rate": 1.2797928802588996e-05, "loss": 0.0796, "step": 139090 }, { "epoch": 54.02, "learning_rate": 1.2797411003236246e-05, "loss": 0.0429, "step": 139100 }, { "epoch": 54.02, "learning_rate": 1.2796893203883495e-05, "loss": 0.0148, "step": 139110 }, { "epoch": 54.03, "learning_rate": 1.2796375404530745e-05, "loss": 0.1017, "step": 139120 }, { "epoch": 54.03, "learning_rate": 1.2795857605177995e-05, "loss": 0.1885, "step": 139130 }, { "epoch": 54.03, "learning_rate": 1.2795339805825244e-05, "loss": 0.1088, "step": 139140 }, { "epoch": 54.04, "learning_rate": 1.2794822006472494e-05, "loss": 0.0837, "step": 139150 }, { "epoch": 54.04, "learning_rate": 1.2794304207119744e-05, "loss": 0.0096, "step": 139160 }, { "epoch": 54.05, "learning_rate": 1.279378640776699e-05, "loss": 0.0856, "step": 139170 }, { "epoch": 54.05, "learning_rate": 1.279326860841424e-05, "loss": 0.0407, "step": 139180 }, { "epoch": 54.05, "learning_rate": 1.2792750809061489e-05, "loss": 0.0196, "step": 139190 }, { "epoch": 54.06, "learning_rate": 1.2792233009708739e-05, "loss": 0.015, "step": 139200 }, { "epoch": 54.06, "learning_rate": 1.2791715210355988e-05, "loss": 0.0247, "step": 139210 }, { "epoch": 54.07, "learning_rate": 1.2791197411003238e-05, "loss": 0.0002, "step": 139220 }, { "epoch": 54.07, "learning_rate": 1.2790679611650487e-05, "loss": 0.0002, "step": 139230 }, { "epoch": 54.07, "learning_rate": 1.2790161812297737e-05, "loss": 0.1272, "step": 139240 }, { "epoch": 54.08, "learning_rate": 1.2789644012944983e-05, "loss": 0.2859, "step": 139250 }, { "epoch": 54.08, "learning_rate": 1.2789126213592233e-05, "loss": 0.0682, "step": 139260 }, { "epoch": 54.09, "learning_rate": 1.2788608414239483e-05, "loss": 0.1397, "step": 139270 }, { "epoch": 54.09, "learning_rate": 1.2788090614886732e-05, "loss": 0.031, "step": 139280 }, { "epoch": 54.09, "learning_rate": 1.2787572815533982e-05, "loss": 0.0286, "step": 139290 }, { "epoch": 54.1, "learning_rate": 1.2787055016181231e-05, "loss": 0.1421, "step": 139300 }, { "epoch": 54.1, "learning_rate": 1.2786537216828481e-05, "loss": 0.0031, "step": 139310 }, { "epoch": 54.1, "learning_rate": 1.278601941747573e-05, "loss": 0.0591, "step": 139320 }, { "epoch": 54.11, "learning_rate": 1.2785501618122977e-05, "loss": 0.034, "step": 139330 }, { "epoch": 54.11, "learning_rate": 1.2784983818770226e-05, "loss": 0.0696, "step": 139340 }, { "epoch": 54.12, "learning_rate": 1.2784466019417476e-05, "loss": 0.102, "step": 139350 }, { "epoch": 54.12, "learning_rate": 1.2783948220064726e-05, "loss": 0.0585, "step": 139360 }, { "epoch": 54.12, "learning_rate": 1.2783430420711975e-05, "loss": 0.1187, "step": 139370 }, { "epoch": 54.13, "learning_rate": 1.2782912621359225e-05, "loss": 0.0645, "step": 139380 }, { "epoch": 54.13, "learning_rate": 1.2782394822006475e-05, "loss": 0.1194, "step": 139390 }, { "epoch": 54.14, "learning_rate": 1.2781877022653724e-05, "loss": 0.0702, "step": 139400 }, { "epoch": 54.14, "learning_rate": 1.278135922330097e-05, "loss": 0.0202, "step": 139410 }, { "epoch": 54.14, "learning_rate": 1.278084142394822e-05, "loss": 0.0138, "step": 139420 }, { "epoch": 54.15, "learning_rate": 1.278032362459547e-05, "loss": 0.0431, "step": 139430 }, { "epoch": 54.15, "learning_rate": 1.277980582524272e-05, "loss": 0.0698, "step": 139440 }, { "epoch": 54.16, "learning_rate": 1.2779288025889969e-05, "loss": 0.12, "step": 139450 }, { "epoch": 54.16, "learning_rate": 1.2778770226537219e-05, "loss": 0.1108, "step": 139460 }, { "epoch": 54.16, "learning_rate": 1.2778252427184468e-05, "loss": 0.0521, "step": 139470 }, { "epoch": 54.17, "learning_rate": 1.2777734627831718e-05, "loss": 0.0555, "step": 139480 }, { "epoch": 54.17, "learning_rate": 1.2777216828478964e-05, "loss": 0.0876, "step": 139490 }, { "epoch": 54.17, "learning_rate": 1.2776699029126214e-05, "loss": 0.0496, "step": 139500 }, { "epoch": 54.18, "learning_rate": 1.2776181229773463e-05, "loss": 0.114, "step": 139510 }, { "epoch": 54.18, "learning_rate": 1.2775663430420713e-05, "loss": 0.0356, "step": 139520 }, { "epoch": 54.19, "learning_rate": 1.2775145631067962e-05, "loss": 0.1419, "step": 139530 }, { "epoch": 54.19, "learning_rate": 1.2774627831715212e-05, "loss": 0.009, "step": 139540 }, { "epoch": 54.19, "learning_rate": 1.2774110032362462e-05, "loss": 0.0053, "step": 139550 }, { "epoch": 54.2, "learning_rate": 1.2773592233009711e-05, "loss": 0.0997, "step": 139560 }, { "epoch": 54.2, "learning_rate": 1.2773074433656958e-05, "loss": 0.1274, "step": 139570 }, { "epoch": 54.21, "learning_rate": 1.2772556634304207e-05, "loss": 0.0722, "step": 139580 }, { "epoch": 54.21, "learning_rate": 1.2772038834951457e-05, "loss": 0.126, "step": 139590 }, { "epoch": 54.21, "learning_rate": 1.2771521035598706e-05, "loss": 0.0219, "step": 139600 }, { "epoch": 54.22, "learning_rate": 1.2771003236245956e-05, "loss": 0.1736, "step": 139610 }, { "epoch": 54.22, "learning_rate": 1.2770485436893206e-05, "loss": 0.3487, "step": 139620 }, { "epoch": 54.23, "learning_rate": 1.2769967637540455e-05, "loss": 0.1613, "step": 139630 }, { "epoch": 54.23, "learning_rate": 1.2769449838187705e-05, "loss": 0.0798, "step": 139640 }, { "epoch": 54.23, "learning_rate": 1.2768932038834953e-05, "loss": 0.1508, "step": 139650 }, { "epoch": 54.24, "learning_rate": 1.27684142394822e-05, "loss": 0.162, "step": 139660 }, { "epoch": 54.24, "learning_rate": 1.276789644012945e-05, "loss": 0.0639, "step": 139670 }, { "epoch": 54.24, "learning_rate": 1.27673786407767e-05, "loss": 0.0984, "step": 139680 }, { "epoch": 54.25, "learning_rate": 1.276686084142395e-05, "loss": 0.0422, "step": 139690 }, { "epoch": 54.25, "learning_rate": 1.27663430420712e-05, "loss": 0.1928, "step": 139700 }, { "epoch": 54.26, "learning_rate": 1.2765825242718449e-05, "loss": 0.1314, "step": 139710 }, { "epoch": 54.26, "learning_rate": 1.2765307443365698e-05, "loss": 0.1061, "step": 139720 }, { "epoch": 54.26, "learning_rate": 1.2764789644012946e-05, "loss": 0.0188, "step": 139730 }, { "epoch": 54.27, "learning_rate": 1.2764271844660194e-05, "loss": 0.1819, "step": 139740 }, { "epoch": 54.27, "learning_rate": 1.2763754045307444e-05, "loss": 0.0697, "step": 139750 }, { "epoch": 54.28, "learning_rate": 1.2763236245954694e-05, "loss": 0.0286, "step": 139760 }, { "epoch": 54.28, "learning_rate": 1.2762718446601943e-05, "loss": 0.0755, "step": 139770 }, { "epoch": 54.28, "learning_rate": 1.2762200647249193e-05, "loss": 0.1376, "step": 139780 }, { "epoch": 54.29, "learning_rate": 1.2761682847896442e-05, "loss": 0.0138, "step": 139790 }, { "epoch": 54.29, "learning_rate": 1.276116504854369e-05, "loss": 0.0357, "step": 139800 }, { "epoch": 54.3, "learning_rate": 1.276064724919094e-05, "loss": 0.0296, "step": 139810 }, { "epoch": 54.3, "learning_rate": 1.2760129449838188e-05, "loss": 0.0656, "step": 139820 }, { "epoch": 54.3, "learning_rate": 1.2759611650485437e-05, "loss": 0.0273, "step": 139830 }, { "epoch": 54.31, "learning_rate": 1.2759093851132687e-05, "loss": 0.0342, "step": 139840 }, { "epoch": 54.31, "learning_rate": 1.2758576051779937e-05, "loss": 0.1261, "step": 139850 }, { "epoch": 54.31, "learning_rate": 1.2758058252427186e-05, "loss": 0.0469, "step": 139860 }, { "epoch": 54.32, "learning_rate": 1.2757540453074436e-05, "loss": 0.0543, "step": 139870 }, { "epoch": 54.32, "learning_rate": 1.2757022653721684e-05, "loss": 0.0231, "step": 139880 }, { "epoch": 54.33, "learning_rate": 1.2756504854368933e-05, "loss": 0.0739, "step": 139890 }, { "epoch": 54.33, "learning_rate": 1.2755987055016181e-05, "loss": 0.1235, "step": 139900 }, { "epoch": 54.33, "learning_rate": 1.2755469255663431e-05, "loss": 0.1258, "step": 139910 }, { "epoch": 54.34, "learning_rate": 1.275495145631068e-05, "loss": 0.0774, "step": 139920 }, { "epoch": 54.34, "learning_rate": 1.275443365695793e-05, "loss": 0.0745, "step": 139930 }, { "epoch": 54.35, "learning_rate": 1.275391585760518e-05, "loss": 0.0837, "step": 139940 }, { "epoch": 54.35, "learning_rate": 1.2753398058252428e-05, "loss": 0.1456, "step": 139950 }, { "epoch": 54.35, "learning_rate": 1.2752880258899677e-05, "loss": 0.0531, "step": 139960 }, { "epoch": 54.36, "learning_rate": 1.2752362459546927e-05, "loss": 0.0865, "step": 139970 }, { "epoch": 54.36, "learning_rate": 1.2751844660194175e-05, "loss": 0.1078, "step": 139980 }, { "epoch": 54.37, "learning_rate": 1.2751326860841425e-05, "loss": 0.0327, "step": 139990 }, { "epoch": 54.37, "learning_rate": 1.2750809061488674e-05, "loss": 0.1181, "step": 140000 }, { "epoch": 54.37, "learning_rate": 1.2750291262135924e-05, "loss": 0.1126, "step": 140010 }, { "epoch": 54.38, "learning_rate": 1.2749773462783173e-05, "loss": 0.1063, "step": 140020 }, { "epoch": 54.38, "learning_rate": 1.2749255663430421e-05, "loss": 0.1153, "step": 140030 }, { "epoch": 54.38, "learning_rate": 1.2748737864077671e-05, "loss": 0.022, "step": 140040 }, { "epoch": 54.39, "learning_rate": 1.274822006472492e-05, "loss": 0.0945, "step": 140050 }, { "epoch": 54.39, "learning_rate": 1.2747702265372168e-05, "loss": 0.126, "step": 140060 }, { "epoch": 54.4, "learning_rate": 1.2747184466019418e-05, "loss": 0.0052, "step": 140070 }, { "epoch": 54.4, "learning_rate": 1.2746666666666668e-05, "loss": 0.1314, "step": 140080 }, { "epoch": 54.4, "learning_rate": 1.2746148867313917e-05, "loss": 0.0936, "step": 140090 }, { "epoch": 54.41, "learning_rate": 1.2745631067961165e-05, "loss": 0.1739, "step": 140100 }, { "epoch": 54.41, "learning_rate": 1.2745113268608415e-05, "loss": 0.0916, "step": 140110 }, { "epoch": 54.42, "learning_rate": 1.2744595469255665e-05, "loss": 0.0411, "step": 140120 }, { "epoch": 54.42, "learning_rate": 1.2744077669902914e-05, "loss": 0.0302, "step": 140130 }, { "epoch": 54.42, "learning_rate": 1.2743559870550162e-05, "loss": 0.0617, "step": 140140 }, { "epoch": 54.43, "learning_rate": 1.2743042071197412e-05, "loss": 0.098, "step": 140150 }, { "epoch": 54.43, "learning_rate": 1.2742524271844661e-05, "loss": 0.0351, "step": 140160 }, { "epoch": 54.43, "learning_rate": 1.2742006472491911e-05, "loss": 0.007, "step": 140170 }, { "epoch": 54.44, "learning_rate": 1.2741488673139159e-05, "loss": 0.0153, "step": 140180 }, { "epoch": 54.44, "learning_rate": 1.2740970873786408e-05, "loss": 0.0605, "step": 140190 }, { "epoch": 54.45, "learning_rate": 1.2740453074433658e-05, "loss": 0.2399, "step": 140200 }, { "epoch": 54.45, "learning_rate": 1.2739935275080908e-05, "loss": 0.0809, "step": 140210 }, { "epoch": 54.45, "learning_rate": 1.2739417475728157e-05, "loss": 0.0195, "step": 140220 }, { "epoch": 54.46, "learning_rate": 1.2738899676375405e-05, "loss": 0.0131, "step": 140230 }, { "epoch": 54.46, "learning_rate": 1.2738381877022655e-05, "loss": 0.0004, "step": 140240 }, { "epoch": 54.47, "learning_rate": 1.2737864077669904e-05, "loss": 0.0117, "step": 140250 }, { "epoch": 54.47, "learning_rate": 1.2737346278317152e-05, "loss": 0.137, "step": 140260 }, { "epoch": 54.47, "learning_rate": 1.2736828478964402e-05, "loss": 0.0349, "step": 140270 }, { "epoch": 54.48, "learning_rate": 1.2736310679611652e-05, "loss": 0.0738, "step": 140280 }, { "epoch": 54.48, "learning_rate": 1.2735792880258901e-05, "loss": 0.217, "step": 140290 }, { "epoch": 54.49, "learning_rate": 1.2735275080906151e-05, "loss": 0.0869, "step": 140300 }, { "epoch": 54.49, "learning_rate": 1.2734757281553399e-05, "loss": 0.0386, "step": 140310 }, { "epoch": 54.49, "learning_rate": 1.2734239482200648e-05, "loss": 0.0277, "step": 140320 }, { "epoch": 54.5, "learning_rate": 1.2733721682847896e-05, "loss": 0.0235, "step": 140330 }, { "epoch": 54.5, "learning_rate": 1.2733203883495146e-05, "loss": 0.0168, "step": 140340 }, { "epoch": 54.5, "learning_rate": 1.2732686084142396e-05, "loss": 0.1017, "step": 140350 }, { "epoch": 54.51, "learning_rate": 1.2732168284789645e-05, "loss": 0.03, "step": 140360 }, { "epoch": 54.51, "learning_rate": 1.2731650485436895e-05, "loss": 0.105, "step": 140370 }, { "epoch": 54.52, "learning_rate": 1.2731132686084144e-05, "loss": 0.0876, "step": 140380 }, { "epoch": 54.52, "learning_rate": 1.2730614886731392e-05, "loss": 0.1319, "step": 140390 }, { "epoch": 54.52, "learning_rate": 1.2730097087378642e-05, "loss": 0.1311, "step": 140400 }, { "epoch": 54.53, "learning_rate": 1.272957928802589e-05, "loss": 0.0358, "step": 140410 }, { "epoch": 54.53, "learning_rate": 1.272906148867314e-05, "loss": 0.0035, "step": 140420 }, { "epoch": 54.54, "learning_rate": 1.2728543689320389e-05, "loss": 0.0556, "step": 140430 }, { "epoch": 54.54, "learning_rate": 1.2728025889967639e-05, "loss": 0.0696, "step": 140440 }, { "epoch": 54.54, "learning_rate": 1.2727508090614888e-05, "loss": 0.0655, "step": 140450 }, { "epoch": 54.55, "learning_rate": 1.2726990291262138e-05, "loss": 0.1188, "step": 140460 }, { "epoch": 54.55, "learning_rate": 1.2726472491909386e-05, "loss": 0.024, "step": 140470 }, { "epoch": 54.56, "learning_rate": 1.2725954692556634e-05, "loss": 0.2087, "step": 140480 }, { "epoch": 54.56, "learning_rate": 1.2725436893203883e-05, "loss": 0.1394, "step": 140490 }, { "epoch": 54.56, "learning_rate": 1.2724919093851133e-05, "loss": 0.0673, "step": 140500 }, { "epoch": 54.57, "learning_rate": 1.2724401294498383e-05, "loss": 0.0985, "step": 140510 }, { "epoch": 54.57, "learning_rate": 1.2723883495145632e-05, "loss": 0.1547, "step": 140520 }, { "epoch": 54.57, "learning_rate": 1.2723365695792882e-05, "loss": 0.0498, "step": 140530 }, { "epoch": 54.58, "learning_rate": 1.2722847896440132e-05, "loss": 0.0723, "step": 140540 }, { "epoch": 54.58, "learning_rate": 1.272233009708738e-05, "loss": 0.076, "step": 140550 }, { "epoch": 54.59, "learning_rate": 1.2721812297734627e-05, "loss": 0.0792, "step": 140560 }, { "epoch": 54.59, "learning_rate": 1.2721294498381877e-05, "loss": 0.0379, "step": 140570 }, { "epoch": 54.59, "learning_rate": 1.2720776699029127e-05, "loss": 0.1347, "step": 140580 }, { "epoch": 54.6, "learning_rate": 1.2720258899676376e-05, "loss": 0.1407, "step": 140590 }, { "epoch": 54.6, "learning_rate": 1.2719741100323626e-05, "loss": 0.0301, "step": 140600 }, { "epoch": 54.61, "learning_rate": 1.2719223300970875e-05, "loss": 0.0676, "step": 140610 }, { "epoch": 54.61, "learning_rate": 1.2718705501618125e-05, "loss": 0.0422, "step": 140620 }, { "epoch": 54.61, "learning_rate": 1.2718187702265371e-05, "loss": 0.0146, "step": 140630 }, { "epoch": 54.62, "learning_rate": 1.2717669902912621e-05, "loss": 0.0747, "step": 140640 }, { "epoch": 54.62, "learning_rate": 1.271715210355987e-05, "loss": 0.1522, "step": 140650 }, { "epoch": 54.63, "learning_rate": 1.271663430420712e-05, "loss": 0.0264, "step": 140660 }, { "epoch": 54.63, "learning_rate": 1.271611650485437e-05, "loss": 0.0095, "step": 140670 }, { "epoch": 54.63, "learning_rate": 1.271559870550162e-05, "loss": 0.0634, "step": 140680 }, { "epoch": 54.64, "learning_rate": 1.2715080906148869e-05, "loss": 0.1337, "step": 140690 }, { "epoch": 54.64, "learning_rate": 1.2714563106796119e-05, "loss": 0.0376, "step": 140700 }, { "epoch": 54.64, "learning_rate": 1.2714045307443368e-05, "loss": 0.0419, "step": 140710 }, { "epoch": 54.65, "learning_rate": 1.2713527508090614e-05, "loss": 0.1088, "step": 140720 }, { "epoch": 54.65, "learning_rate": 1.2713009708737864e-05, "loss": 0.0156, "step": 140730 }, { "epoch": 54.66, "learning_rate": 1.2712491909385114e-05, "loss": 0.0229, "step": 140740 }, { "epoch": 54.66, "learning_rate": 1.2711974110032363e-05, "loss": 0.1092, "step": 140750 }, { "epoch": 54.66, "learning_rate": 1.2711456310679613e-05, "loss": 0.2339, "step": 140760 }, { "epoch": 54.67, "learning_rate": 1.2710938511326863e-05, "loss": 0.1252, "step": 140770 }, { "epoch": 54.67, "learning_rate": 1.2710420711974112e-05, "loss": 0.1215, "step": 140780 }, { "epoch": 54.68, "learning_rate": 1.2709902912621362e-05, "loss": 0.1302, "step": 140790 }, { "epoch": 54.68, "learning_rate": 1.2709385113268608e-05, "loss": 0.0498, "step": 140800 }, { "epoch": 54.68, "learning_rate": 1.2708867313915858e-05, "loss": 0.115, "step": 140810 }, { "epoch": 54.69, "learning_rate": 1.2708349514563107e-05, "loss": 0.1193, "step": 140820 }, { "epoch": 54.69, "learning_rate": 1.2707831715210357e-05, "loss": 0.1008, "step": 140830 }, { "epoch": 54.7, "learning_rate": 1.2707313915857607e-05, "loss": 0.1112, "step": 140840 }, { "epoch": 54.7, "learning_rate": 1.2706796116504856e-05, "loss": 0.0363, "step": 140850 }, { "epoch": 54.7, "learning_rate": 1.2706278317152106e-05, "loss": 0.0921, "step": 140860 }, { "epoch": 54.71, "learning_rate": 1.2705760517799355e-05, "loss": 0.058, "step": 140870 }, { "epoch": 54.71, "learning_rate": 1.2705242718446602e-05, "loss": 0.0453, "step": 140880 }, { "epoch": 54.71, "learning_rate": 1.2704724919093851e-05, "loss": 0.2195, "step": 140890 }, { "epoch": 54.72, "learning_rate": 1.27042071197411e-05, "loss": 0.0253, "step": 140900 }, { "epoch": 54.72, "learning_rate": 1.270368932038835e-05, "loss": 0.1713, "step": 140910 }, { "epoch": 54.73, "learning_rate": 1.27031715210356e-05, "loss": 0.1513, "step": 140920 }, { "epoch": 54.73, "learning_rate": 1.270265372168285e-05, "loss": 0.0296, "step": 140930 }, { "epoch": 54.73, "learning_rate": 1.27021359223301e-05, "loss": 0.0789, "step": 140940 }, { "epoch": 54.74, "learning_rate": 1.2701618122977349e-05, "loss": 0.0412, "step": 140950 }, { "epoch": 54.74, "learning_rate": 1.2701100323624595e-05, "loss": 0.0529, "step": 140960 }, { "epoch": 54.75, "learning_rate": 1.2700582524271845e-05, "loss": 0.1453, "step": 140970 }, { "epoch": 54.75, "learning_rate": 1.2700064724919094e-05, "loss": 0.0293, "step": 140980 }, { "epoch": 54.75, "learning_rate": 1.2699546925566344e-05, "loss": 0.101, "step": 140990 }, { "epoch": 54.76, "learning_rate": 1.2699029126213594e-05, "loss": 0.0993, "step": 141000 }, { "epoch": 54.76, "learning_rate": 1.2698511326860843e-05, "loss": 0.1448, "step": 141010 }, { "epoch": 54.77, "learning_rate": 1.2697993527508093e-05, "loss": 0.0479, "step": 141020 }, { "epoch": 54.77, "learning_rate": 1.2697475728155342e-05, "loss": 0.0767, "step": 141030 }, { "epoch": 54.77, "learning_rate": 1.2696957928802589e-05, "loss": 0.0499, "step": 141040 }, { "epoch": 54.78, "learning_rate": 1.2696440129449838e-05, "loss": 0.0792, "step": 141050 }, { "epoch": 54.78, "learning_rate": 1.2695922330097088e-05, "loss": 0.0571, "step": 141060 }, { "epoch": 54.78, "learning_rate": 1.2695404530744338e-05, "loss": 0.0493, "step": 141070 }, { "epoch": 54.79, "learning_rate": 1.2694886731391587e-05, "loss": 0.0181, "step": 141080 }, { "epoch": 54.79, "learning_rate": 1.2694368932038837e-05, "loss": 0.0123, "step": 141090 }, { "epoch": 54.8, "learning_rate": 1.2693851132686086e-05, "loss": 0.1277, "step": 141100 }, { "epoch": 54.8, "learning_rate": 1.2693333333333336e-05, "loss": 0.0715, "step": 141110 }, { "epoch": 54.8, "learning_rate": 1.2692815533980582e-05, "loss": 0.1867, "step": 141120 }, { "epoch": 54.81, "learning_rate": 1.2692297734627832e-05, "loss": 0.0112, "step": 141130 }, { "epoch": 54.81, "learning_rate": 1.2691779935275082e-05, "loss": 0.2275, "step": 141140 }, { "epoch": 54.82, "learning_rate": 1.2691262135922331e-05, "loss": 0.008, "step": 141150 }, { "epoch": 54.82, "learning_rate": 1.269074433656958e-05, "loss": 0.1353, "step": 141160 }, { "epoch": 54.82, "learning_rate": 1.269022653721683e-05, "loss": 0.1227, "step": 141170 }, { "epoch": 54.83, "learning_rate": 1.268970873786408e-05, "loss": 0.111, "step": 141180 }, { "epoch": 54.83, "learning_rate": 1.268919093851133e-05, "loss": 0.127, "step": 141190 }, { "epoch": 54.83, "learning_rate": 1.2688673139158576e-05, "loss": 0.0555, "step": 141200 }, { "epoch": 54.84, "learning_rate": 1.2688155339805825e-05, "loss": 0.1882, "step": 141210 }, { "epoch": 54.84, "learning_rate": 1.2687637540453075e-05, "loss": 0.1245, "step": 141220 }, { "epoch": 54.85, "learning_rate": 1.2687119741100325e-05, "loss": 0.0603, "step": 141230 }, { "epoch": 54.85, "learning_rate": 1.2686601941747574e-05, "loss": 0.1147, "step": 141240 }, { "epoch": 54.85, "learning_rate": 1.2686084142394824e-05, "loss": 0.0644, "step": 141250 }, { "epoch": 54.86, "learning_rate": 1.2685566343042074e-05, "loss": 0.0033, "step": 141260 }, { "epoch": 54.86, "learning_rate": 1.2685048543689321e-05, "loss": 0.1203, "step": 141270 }, { "epoch": 54.87, "learning_rate": 1.2684530744336571e-05, "loss": 0.1065, "step": 141280 }, { "epoch": 54.87, "learning_rate": 1.2684012944983819e-05, "loss": 0.1231, "step": 141290 }, { "epoch": 54.87, "learning_rate": 1.2683495145631069e-05, "loss": 0.0532, "step": 141300 }, { "epoch": 54.88, "learning_rate": 1.2682977346278318e-05, "loss": 0.0542, "step": 141310 }, { "epoch": 54.88, "learning_rate": 1.2682459546925568e-05, "loss": 0.0032, "step": 141320 }, { "epoch": 54.89, "learning_rate": 1.2681941747572817e-05, "loss": 0.0489, "step": 141330 }, { "epoch": 54.89, "learning_rate": 1.2681423948220067e-05, "loss": 0.0491, "step": 141340 }, { "epoch": 54.89, "learning_rate": 1.2680906148867315e-05, "loss": 0.0578, "step": 141350 }, { "epoch": 54.9, "learning_rate": 1.2680388349514565e-05, "loss": 0.0166, "step": 141360 }, { "epoch": 54.9, "learning_rate": 1.2679870550161813e-05, "loss": 0.0068, "step": 141370 }, { "epoch": 54.9, "learning_rate": 1.2679352750809062e-05, "loss": 0.1781, "step": 141380 }, { "epoch": 54.91, "learning_rate": 1.2678834951456312e-05, "loss": 0.0955, "step": 141390 }, { "epoch": 54.91, "learning_rate": 1.2678317152103561e-05, "loss": 0.0367, "step": 141400 }, { "epoch": 54.92, "learning_rate": 1.2677799352750811e-05, "loss": 0.0531, "step": 141410 }, { "epoch": 54.92, "learning_rate": 1.2677281553398059e-05, "loss": 0.0801, "step": 141420 }, { "epoch": 54.92, "learning_rate": 1.2676763754045309e-05, "loss": 0.0368, "step": 141430 }, { "epoch": 54.93, "learning_rate": 1.2676245954692558e-05, "loss": 0.035, "step": 141440 }, { "epoch": 54.93, "learning_rate": 1.2675728155339806e-05, "loss": 0.0912, "step": 141450 }, { "epoch": 54.94, "learning_rate": 1.2675210355987056e-05, "loss": 0.0645, "step": 141460 }, { "epoch": 54.94, "learning_rate": 1.2674692556634305e-05, "loss": 0.0715, "step": 141470 }, { "epoch": 54.94, "learning_rate": 1.2674174757281555e-05, "loss": 0.1044, "step": 141480 }, { "epoch": 54.95, "learning_rate": 1.2673656957928805e-05, "loss": 0.0528, "step": 141490 }, { "epoch": 54.95, "learning_rate": 1.2673139158576053e-05, "loss": 0.0399, "step": 141500 }, { "epoch": 54.96, "learning_rate": 1.2672621359223302e-05, "loss": 0.0863, "step": 141510 }, { "epoch": 54.96, "learning_rate": 1.2672103559870552e-05, "loss": 0.0386, "step": 141520 }, { "epoch": 54.96, "learning_rate": 1.26715857605178e-05, "loss": 0.1754, "step": 141530 }, { "epoch": 54.97, "learning_rate": 1.267106796116505e-05, "loss": 0.0556, "step": 141540 }, { "epoch": 54.97, "learning_rate": 1.2670550161812299e-05, "loss": 0.0203, "step": 141550 }, { "epoch": 54.97, "learning_rate": 1.2670032362459549e-05, "loss": 0.0545, "step": 141560 }, { "epoch": 54.98, "learning_rate": 1.2669514563106796e-05, "loss": 0.0999, "step": 141570 }, { "epoch": 54.98, "learning_rate": 1.2668996763754046e-05, "loss": 0.0427, "step": 141580 }, { "epoch": 54.99, "learning_rate": 1.2668478964401296e-05, "loss": 0.0003, "step": 141590 }, { "epoch": 54.99, "learning_rate": 1.2667961165048545e-05, "loss": 0.0296, "step": 141600 }, { "epoch": 54.99, "learning_rate": 1.2667443365695793e-05, "loss": 0.0801, "step": 141610 }, { "epoch": 55.0, "learning_rate": 1.2666925566343043e-05, "loss": 0.1022, "step": 141620 }, { "epoch": 55.0, "eval_accuracy": 0.9491059147180193, "eval_loss": 0.32835954427719116, "eval_runtime": 8.2469, "eval_samples_per_second": 440.774, "eval_steps_per_second": 55.173, "step": 141625 }, { "epoch": 55.0, "learning_rate": 1.2666407766990292e-05, "loss": 0.0212, "step": 141630 }, { "epoch": 55.01, "learning_rate": 1.2665889967637542e-05, "loss": 0.1452, "step": 141640 }, { "epoch": 55.01, "learning_rate": 1.266537216828479e-05, "loss": 0.059, "step": 141650 }, { "epoch": 55.01, "learning_rate": 1.266485436893204e-05, "loss": 0.0005, "step": 141660 }, { "epoch": 55.02, "learning_rate": 1.266433656957929e-05, "loss": 0.0512, "step": 141670 }, { "epoch": 55.02, "learning_rate": 1.2663818770226539e-05, "loss": 0.0275, "step": 141680 }, { "epoch": 55.03, "learning_rate": 1.2663300970873787e-05, "loss": 0.1011, "step": 141690 }, { "epoch": 55.03, "learning_rate": 1.2662783171521036e-05, "loss": 0.1663, "step": 141700 }, { "epoch": 55.03, "learning_rate": 1.2662265372168286e-05, "loss": 0.1308, "step": 141710 }, { "epoch": 55.04, "learning_rate": 1.2661747572815536e-05, "loss": 0.1457, "step": 141720 }, { "epoch": 55.04, "learning_rate": 1.2661229773462784e-05, "loss": 0.2276, "step": 141730 }, { "epoch": 55.04, "learning_rate": 1.2660711974110033e-05, "loss": 0.0246, "step": 141740 }, { "epoch": 55.05, "learning_rate": 1.2660194174757283e-05, "loss": 0.0748, "step": 141750 }, { "epoch": 55.05, "learning_rate": 1.2659676375404532e-05, "loss": 0.0374, "step": 141760 }, { "epoch": 55.06, "learning_rate": 1.265915857605178e-05, "loss": 0.0643, "step": 141770 }, { "epoch": 55.06, "learning_rate": 1.265864077669903e-05, "loss": 0.0519, "step": 141780 }, { "epoch": 55.06, "learning_rate": 1.265812297734628e-05, "loss": 0.0642, "step": 141790 }, { "epoch": 55.07, "learning_rate": 1.2657605177993527e-05, "loss": 0.0108, "step": 141800 }, { "epoch": 55.07, "learning_rate": 1.2657087378640777e-05, "loss": 0.0523, "step": 141810 }, { "epoch": 55.08, "learning_rate": 1.2656569579288027e-05, "loss": 0.0748, "step": 141820 }, { "epoch": 55.08, "learning_rate": 1.2656051779935276e-05, "loss": 0.1278, "step": 141830 }, { "epoch": 55.08, "learning_rate": 1.2655533980582526e-05, "loss": 0.0479, "step": 141840 }, { "epoch": 55.09, "learning_rate": 1.2655016181229776e-05, "loss": 0.0662, "step": 141850 }, { "epoch": 55.09, "learning_rate": 1.2654498381877024e-05, "loss": 0.0935, "step": 141860 }, { "epoch": 55.1, "learning_rate": 1.2653980582524273e-05, "loss": 0.0086, "step": 141870 }, { "epoch": 55.1, "learning_rate": 1.2653462783171521e-05, "loss": 0.0973, "step": 141880 }, { "epoch": 55.1, "learning_rate": 1.265294498381877e-05, "loss": 0.029, "step": 141890 }, { "epoch": 55.11, "learning_rate": 1.265242718446602e-05, "loss": 0.0711, "step": 141900 }, { "epoch": 55.11, "learning_rate": 1.265190938511327e-05, "loss": 0.0005, "step": 141910 }, { "epoch": 55.11, "learning_rate": 1.265139158576052e-05, "loss": 0.0653, "step": 141920 }, { "epoch": 55.12, "learning_rate": 1.2650873786407769e-05, "loss": 0.0394, "step": 141930 }, { "epoch": 55.12, "learning_rate": 1.2650355987055017e-05, "loss": 0.0981, "step": 141940 }, { "epoch": 55.13, "learning_rate": 1.2649838187702265e-05, "loss": 0.1034, "step": 141950 }, { "epoch": 55.13, "learning_rate": 1.2649320388349515e-05, "loss": 0.141, "step": 141960 }, { "epoch": 55.13, "learning_rate": 1.2648802588996764e-05, "loss": 0.0611, "step": 141970 }, { "epoch": 55.14, "learning_rate": 1.2648284789644014e-05, "loss": 0.1003, "step": 141980 }, { "epoch": 55.14, "learning_rate": 1.2647766990291263e-05, "loss": 0.0392, "step": 141990 }, { "epoch": 55.15, "learning_rate": 1.2647249190938513e-05, "loss": 0.2226, "step": 142000 }, { "epoch": 55.15, "learning_rate": 1.2646731391585763e-05, "loss": 0.1075, "step": 142010 }, { "epoch": 55.15, "learning_rate": 1.264621359223301e-05, "loss": 0.0325, "step": 142020 }, { "epoch": 55.16, "learning_rate": 1.2645695792880259e-05, "loss": 0.012, "step": 142030 }, { "epoch": 55.16, "learning_rate": 1.2645177993527508e-05, "loss": 0.0156, "step": 142040 }, { "epoch": 55.17, "learning_rate": 1.2644660194174758e-05, "loss": 0.0314, "step": 142050 }, { "epoch": 55.17, "learning_rate": 1.2644142394822007e-05, "loss": 0.0201, "step": 142060 }, { "epoch": 55.17, "learning_rate": 1.2643624595469257e-05, "loss": 0.0745, "step": 142070 }, { "epoch": 55.18, "learning_rate": 1.2643106796116507e-05, "loss": 0.0379, "step": 142080 }, { "epoch": 55.18, "learning_rate": 1.2642588996763756e-05, "loss": 0.09, "step": 142090 }, { "epoch": 55.18, "learning_rate": 1.2642071197411002e-05, "loss": 0.0408, "step": 142100 }, { "epoch": 55.19, "learning_rate": 1.2641553398058252e-05, "loss": 0.0881, "step": 142110 }, { "epoch": 55.19, "learning_rate": 1.2641035598705502e-05, "loss": 0.1241, "step": 142120 }, { "epoch": 55.2, "learning_rate": 1.2640517799352751e-05, "loss": 0.0149, "step": 142130 }, { "epoch": 55.2, "learning_rate": 1.2640000000000001e-05, "loss": 0.0759, "step": 142140 }, { "epoch": 55.2, "learning_rate": 1.263948220064725e-05, "loss": 0.0313, "step": 142150 }, { "epoch": 55.21, "learning_rate": 1.26389644012945e-05, "loss": 0.1009, "step": 142160 }, { "epoch": 55.21, "learning_rate": 1.263844660194175e-05, "loss": 0.0117, "step": 142170 }, { "epoch": 55.22, "learning_rate": 1.2637928802588996e-05, "loss": 0.0327, "step": 142180 }, { "epoch": 55.22, "learning_rate": 1.2637411003236246e-05, "loss": 0.1146, "step": 142190 }, { "epoch": 55.22, "learning_rate": 1.2636893203883495e-05, "loss": 0.0685, "step": 142200 }, { "epoch": 55.23, "learning_rate": 1.2636375404530745e-05, "loss": 0.149, "step": 142210 }, { "epoch": 55.23, "learning_rate": 1.2635857605177995e-05, "loss": 0.0395, "step": 142220 }, { "epoch": 55.23, "learning_rate": 1.2635339805825244e-05, "loss": 0.0004, "step": 142230 }, { "epoch": 55.24, "learning_rate": 1.2634822006472494e-05, "loss": 0.0472, "step": 142240 }, { "epoch": 55.24, "learning_rate": 1.2634304207119743e-05, "loss": 0.1354, "step": 142250 }, { "epoch": 55.25, "learning_rate": 1.263378640776699e-05, "loss": 0.0797, "step": 142260 }, { "epoch": 55.25, "learning_rate": 1.263326860841424e-05, "loss": 0.0769, "step": 142270 }, { "epoch": 55.25, "learning_rate": 1.2632750809061489e-05, "loss": 0.0836, "step": 142280 }, { "epoch": 55.26, "learning_rate": 1.2632233009708738e-05, "loss": 0.0599, "step": 142290 }, { "epoch": 55.26, "learning_rate": 1.2631715210355988e-05, "loss": 0.0379, "step": 142300 }, { "epoch": 55.27, "learning_rate": 1.2631197411003238e-05, "loss": 0.0316, "step": 142310 }, { "epoch": 55.27, "learning_rate": 1.2630679611650487e-05, "loss": 0.0511, "step": 142320 }, { "epoch": 55.27, "learning_rate": 1.2630161812297737e-05, "loss": 0.0691, "step": 142330 }, { "epoch": 55.28, "learning_rate": 1.2629644012944983e-05, "loss": 0.2658, "step": 142340 }, { "epoch": 55.28, "learning_rate": 1.2629126213592233e-05, "loss": 0.0397, "step": 142350 }, { "epoch": 55.29, "learning_rate": 1.2628608414239482e-05, "loss": 0.0961, "step": 142360 }, { "epoch": 55.29, "learning_rate": 1.2628090614886732e-05, "loss": 0.1, "step": 142370 }, { "epoch": 55.29, "learning_rate": 1.2627572815533982e-05, "loss": 0.0812, "step": 142380 }, { "epoch": 55.3, "learning_rate": 1.2627055016181231e-05, "loss": 0.172, "step": 142390 }, { "epoch": 55.3, "learning_rate": 1.2626537216828481e-05, "loss": 0.0806, "step": 142400 }, { "epoch": 55.3, "learning_rate": 1.262601941747573e-05, "loss": 0.0725, "step": 142410 }, { "epoch": 55.31, "learning_rate": 1.262550161812298e-05, "loss": 0.0079, "step": 142420 }, { "epoch": 55.31, "learning_rate": 1.2624983818770226e-05, "loss": 0.0249, "step": 142430 }, { "epoch": 55.32, "learning_rate": 1.2624466019417476e-05, "loss": 0.1251, "step": 142440 }, { "epoch": 55.32, "learning_rate": 1.2623948220064726e-05, "loss": 0.191, "step": 142450 }, { "epoch": 55.32, "learning_rate": 1.2623430420711975e-05, "loss": 0.0546, "step": 142460 }, { "epoch": 55.33, "learning_rate": 1.2622912621359225e-05, "loss": 0.0935, "step": 142470 }, { "epoch": 55.33, "learning_rate": 1.2622394822006474e-05, "loss": 0.1182, "step": 142480 }, { "epoch": 55.34, "learning_rate": 1.2621877022653724e-05, "loss": 0.1465, "step": 142490 }, { "epoch": 55.34, "learning_rate": 1.2621359223300974e-05, "loss": 0.0326, "step": 142500 }, { "epoch": 55.34, "learning_rate": 1.262084142394822e-05, "loss": 0.0882, "step": 142510 }, { "epoch": 55.35, "learning_rate": 1.262032362459547e-05, "loss": 0.0772, "step": 142520 }, { "epoch": 55.35, "learning_rate": 1.2619805825242719e-05, "loss": 0.0016, "step": 142530 }, { "epoch": 55.36, "learning_rate": 1.2619288025889969e-05, "loss": 0.0372, "step": 142540 }, { "epoch": 55.36, "learning_rate": 1.2618770226537218e-05, "loss": 0.2161, "step": 142550 }, { "epoch": 55.36, "learning_rate": 1.2618252427184468e-05, "loss": 0.0167, "step": 142560 }, { "epoch": 55.37, "learning_rate": 1.2617734627831718e-05, "loss": 0.0316, "step": 142570 }, { "epoch": 55.37, "learning_rate": 1.2617216828478967e-05, "loss": 0.1418, "step": 142580 }, { "epoch": 55.37, "learning_rate": 1.2616699029126213e-05, "loss": 0.1451, "step": 142590 }, { "epoch": 55.38, "learning_rate": 1.2616181229773463e-05, "loss": 0.2044, "step": 142600 }, { "epoch": 55.38, "learning_rate": 1.2615663430420713e-05, "loss": 0.022, "step": 142610 }, { "epoch": 55.39, "learning_rate": 1.2615145631067962e-05, "loss": 0.1969, "step": 142620 }, { "epoch": 55.39, "learning_rate": 1.2614627831715212e-05, "loss": 0.068, "step": 142630 }, { "epoch": 55.39, "learning_rate": 1.2614110032362462e-05, "loss": 0.076, "step": 142640 }, { "epoch": 55.4, "learning_rate": 1.2613592233009711e-05, "loss": 0.0839, "step": 142650 }, { "epoch": 55.4, "learning_rate": 1.261307443365696e-05, "loss": 0.1118, "step": 142660 }, { "epoch": 55.41, "learning_rate": 1.2612556634304207e-05, "loss": 0.0017, "step": 142670 }, { "epoch": 55.41, "learning_rate": 1.2612038834951457e-05, "loss": 0.0308, "step": 142680 }, { "epoch": 55.41, "learning_rate": 1.2611521035598706e-05, "loss": 0.0594, "step": 142690 }, { "epoch": 55.42, "learning_rate": 1.2611003236245956e-05, "loss": 0.0373, "step": 142700 }, { "epoch": 55.42, "learning_rate": 1.2610485436893205e-05, "loss": 0.0305, "step": 142710 }, { "epoch": 55.43, "learning_rate": 1.2609967637540455e-05, "loss": 0.0738, "step": 142720 }, { "epoch": 55.43, "learning_rate": 1.2609449838187705e-05, "loss": 0.0896, "step": 142730 }, { "epoch": 55.43, "learning_rate": 1.2608932038834953e-05, "loss": 0.0013, "step": 142740 }, { "epoch": 55.44, "learning_rate": 1.26084142394822e-05, "loss": 0.1317, "step": 142750 }, { "epoch": 55.44, "learning_rate": 1.260789644012945e-05, "loss": 0.0854, "step": 142760 }, { "epoch": 55.44, "learning_rate": 1.26073786407767e-05, "loss": 0.1681, "step": 142770 }, { "epoch": 55.45, "learning_rate": 1.260686084142395e-05, "loss": 0.1384, "step": 142780 }, { "epoch": 55.45, "learning_rate": 1.2606343042071199e-05, "loss": 0.0398, "step": 142790 }, { "epoch": 55.46, "learning_rate": 1.2605825242718449e-05, "loss": 0.0539, "step": 142800 }, { "epoch": 55.46, "learning_rate": 1.2605307443365698e-05, "loss": 0.0559, "step": 142810 }, { "epoch": 55.46, "learning_rate": 1.2604789644012946e-05, "loss": 0.098, "step": 142820 }, { "epoch": 55.47, "learning_rate": 1.2604271844660194e-05, "loss": 0.037, "step": 142830 }, { "epoch": 55.47, "learning_rate": 1.2603754045307444e-05, "loss": 0.3, "step": 142840 }, { "epoch": 55.48, "learning_rate": 1.2603236245954693e-05, "loss": 0.0654, "step": 142850 }, { "epoch": 55.48, "learning_rate": 1.2602718446601943e-05, "loss": 0.0864, "step": 142860 }, { "epoch": 55.48, "learning_rate": 1.2602200647249193e-05, "loss": 0.0465, "step": 142870 }, { "epoch": 55.49, "learning_rate": 1.2601682847896442e-05, "loss": 0.021, "step": 142880 }, { "epoch": 55.49, "learning_rate": 1.260116504854369e-05, "loss": 0.1219, "step": 142890 }, { "epoch": 55.5, "learning_rate": 1.260064724919094e-05, "loss": 0.0222, "step": 142900 }, { "epoch": 55.5, "learning_rate": 1.2600129449838188e-05, "loss": 0.2815, "step": 142910 }, { "epoch": 55.5, "learning_rate": 1.2599611650485437e-05, "loss": 0.0393, "step": 142920 }, { "epoch": 55.51, "learning_rate": 1.2599093851132687e-05, "loss": 0.134, "step": 142930 }, { "epoch": 55.51, "learning_rate": 1.2598576051779937e-05, "loss": 0.0007, "step": 142940 }, { "epoch": 55.51, "learning_rate": 1.2598058252427186e-05, "loss": 0.0808, "step": 142950 }, { "epoch": 55.52, "learning_rate": 1.2597540453074436e-05, "loss": 0.0899, "step": 142960 }, { "epoch": 55.52, "learning_rate": 1.2597022653721684e-05, "loss": 0.0457, "step": 142970 }, { "epoch": 55.53, "learning_rate": 1.2596504854368933e-05, "loss": 0.1032, "step": 142980 }, { "epoch": 55.53, "learning_rate": 1.2595987055016183e-05, "loss": 0.0363, "step": 142990 }, { "epoch": 55.53, "learning_rate": 1.259546925566343e-05, "loss": 0.0206, "step": 143000 }, { "epoch": 55.54, "learning_rate": 1.259495145631068e-05, "loss": 0.0031, "step": 143010 }, { "epoch": 55.54, "learning_rate": 1.259443365695793e-05, "loss": 0.0826, "step": 143020 }, { "epoch": 55.55, "learning_rate": 1.259391585760518e-05, "loss": 0.0495, "step": 143030 }, { "epoch": 55.55, "learning_rate": 1.2593398058252428e-05, "loss": 0.0423, "step": 143040 }, { "epoch": 55.55, "learning_rate": 1.2592880258899677e-05, "loss": 0.0232, "step": 143050 }, { "epoch": 55.56, "learning_rate": 1.2592362459546927e-05, "loss": 0.0047, "step": 143060 }, { "epoch": 55.56, "learning_rate": 1.2591844660194176e-05, "loss": 0.0665, "step": 143070 }, { "epoch": 55.57, "learning_rate": 1.2591326860841424e-05, "loss": 0.0061, "step": 143080 }, { "epoch": 55.57, "learning_rate": 1.2590809061488674e-05, "loss": 0.0218, "step": 143090 }, { "epoch": 55.57, "learning_rate": 1.2590291262135924e-05, "loss": 0.077, "step": 143100 }, { "epoch": 55.58, "learning_rate": 1.2589773462783173e-05, "loss": 0.0003, "step": 143110 }, { "epoch": 55.58, "learning_rate": 1.2589255663430421e-05, "loss": 0.2818, "step": 143120 }, { "epoch": 55.58, "learning_rate": 1.258873786407767e-05, "loss": 0.035, "step": 143130 }, { "epoch": 55.59, "learning_rate": 1.258822006472492e-05, "loss": 0.003, "step": 143140 }, { "epoch": 55.59, "learning_rate": 1.258770226537217e-05, "loss": 0.1882, "step": 143150 }, { "epoch": 55.6, "learning_rate": 1.2587184466019418e-05, "loss": 0.1245, "step": 143160 }, { "epoch": 55.6, "learning_rate": 1.2586666666666668e-05, "loss": 0.0603, "step": 143170 }, { "epoch": 55.6, "learning_rate": 1.2586148867313917e-05, "loss": 0.0704, "step": 143180 }, { "epoch": 55.61, "learning_rate": 1.2585631067961167e-05, "loss": 0.0881, "step": 143190 }, { "epoch": 55.61, "learning_rate": 1.2585113268608415e-05, "loss": 0.1678, "step": 143200 }, { "epoch": 55.62, "learning_rate": 1.2584595469255664e-05, "loss": 0.0404, "step": 143210 }, { "epoch": 55.62, "learning_rate": 1.2584077669902914e-05, "loss": 0.082, "step": 143220 }, { "epoch": 55.62, "learning_rate": 1.2583559870550164e-05, "loss": 0.0728, "step": 143230 }, { "epoch": 55.63, "learning_rate": 1.2583042071197412e-05, "loss": 0.1003, "step": 143240 }, { "epoch": 55.63, "learning_rate": 1.2582524271844661e-05, "loss": 0.028, "step": 143250 }, { "epoch": 55.63, "learning_rate": 1.258200647249191e-05, "loss": 0.0754, "step": 143260 }, { "epoch": 55.64, "learning_rate": 1.2581488673139159e-05, "loss": 0.1603, "step": 143270 }, { "epoch": 55.64, "learning_rate": 1.2580970873786408e-05, "loss": 0.1196, "step": 143280 }, { "epoch": 55.65, "learning_rate": 1.2580453074433658e-05, "loss": 0.0241, "step": 143290 }, { "epoch": 55.65, "learning_rate": 1.2579935275080908e-05, "loss": 0.0053, "step": 143300 }, { "epoch": 55.65, "learning_rate": 1.2579417475728157e-05, "loss": 0.0304, "step": 143310 }, { "epoch": 55.66, "learning_rate": 1.2578899676375405e-05, "loss": 0.2298, "step": 143320 }, { "epoch": 55.66, "learning_rate": 1.2578381877022655e-05, "loss": 0.1454, "step": 143330 }, { "epoch": 55.67, "learning_rate": 1.2577864077669904e-05, "loss": 0.1176, "step": 143340 }, { "epoch": 55.67, "learning_rate": 1.2577346278317152e-05, "loss": 0.0732, "step": 143350 }, { "epoch": 55.67, "learning_rate": 1.2576828478964402e-05, "loss": 0.1048, "step": 143360 }, { "epoch": 55.68, "learning_rate": 1.2576310679611651e-05, "loss": 0.0972, "step": 143370 }, { "epoch": 55.68, "learning_rate": 1.2575792880258901e-05, "loss": 0.0582, "step": 143380 }, { "epoch": 55.69, "learning_rate": 1.257527508090615e-05, "loss": 0.0973, "step": 143390 }, { "epoch": 55.69, "learning_rate": 1.2574757281553399e-05, "loss": 0.0533, "step": 143400 }, { "epoch": 55.69, "learning_rate": 1.2574239482200648e-05, "loss": 0.0311, "step": 143410 }, { "epoch": 55.7, "learning_rate": 1.2573721682847896e-05, "loss": 0.058, "step": 143420 }, { "epoch": 55.7, "learning_rate": 1.2573203883495146e-05, "loss": 0.0019, "step": 143430 }, { "epoch": 55.7, "learning_rate": 1.2572686084142395e-05, "loss": 0.1991, "step": 143440 }, { "epoch": 55.71, "learning_rate": 1.2572168284789645e-05, "loss": 0.0046, "step": 143450 }, { "epoch": 55.71, "learning_rate": 1.2571650485436895e-05, "loss": 0.0184, "step": 143460 }, { "epoch": 55.72, "learning_rate": 1.2571132686084144e-05, "loss": 0.0566, "step": 143470 }, { "epoch": 55.72, "learning_rate": 1.2570614886731392e-05, "loss": 0.0828, "step": 143480 }, { "epoch": 55.72, "learning_rate": 1.2570097087378642e-05, "loss": 0.0894, "step": 143490 }, { "epoch": 55.73, "learning_rate": 1.256957928802589e-05, "loss": 0.0206, "step": 143500 }, { "epoch": 55.73, "learning_rate": 1.256906148867314e-05, "loss": 0.018, "step": 143510 }, { "epoch": 55.74, "learning_rate": 1.2568543689320389e-05, "loss": 0.0895, "step": 143520 }, { "epoch": 55.74, "learning_rate": 1.2568025889967639e-05, "loss": 0.0143, "step": 143530 }, { "epoch": 55.74, "learning_rate": 1.2567508090614888e-05, "loss": 0.0649, "step": 143540 }, { "epoch": 55.75, "learning_rate": 1.2566990291262138e-05, "loss": 0.0847, "step": 143550 }, { "epoch": 55.75, "learning_rate": 1.2566472491909387e-05, "loss": 0.0232, "step": 143560 }, { "epoch": 55.76, "learning_rate": 1.2565954692556634e-05, "loss": 0.0274, "step": 143570 }, { "epoch": 55.76, "learning_rate": 1.2565436893203883e-05, "loss": 0.0137, "step": 143580 }, { "epoch": 55.76, "learning_rate": 1.2564919093851133e-05, "loss": 0.0215, "step": 143590 }, { "epoch": 55.77, "learning_rate": 1.2564401294498383e-05, "loss": 0.0162, "step": 143600 }, { "epoch": 55.77, "learning_rate": 1.2563883495145632e-05, "loss": 0.0606, "step": 143610 }, { "epoch": 55.77, "learning_rate": 1.2563365695792882e-05, "loss": 0.2037, "step": 143620 }, { "epoch": 55.78, "learning_rate": 1.2562847896440131e-05, "loss": 0.0794, "step": 143630 }, { "epoch": 55.78, "learning_rate": 1.2562330097087381e-05, "loss": 0.2215, "step": 143640 }, { "epoch": 55.79, "learning_rate": 1.2561812297734627e-05, "loss": 0.0536, "step": 143650 }, { "epoch": 55.79, "learning_rate": 1.2561294498381877e-05, "loss": 0.0517, "step": 143660 }, { "epoch": 55.79, "learning_rate": 1.2560776699029126e-05, "loss": 0.0543, "step": 143670 }, { "epoch": 55.8, "learning_rate": 1.2560258899676376e-05, "loss": 0.0237, "step": 143680 }, { "epoch": 55.8, "learning_rate": 1.2559741100323626e-05, "loss": 0.0298, "step": 143690 }, { "epoch": 55.81, "learning_rate": 1.2559223300970875e-05, "loss": 0.0414, "step": 143700 }, { "epoch": 55.81, "learning_rate": 1.2558705501618125e-05, "loss": 0.0625, "step": 143710 }, { "epoch": 55.81, "learning_rate": 1.2558187702265375e-05, "loss": 0.1638, "step": 143720 }, { "epoch": 55.82, "learning_rate": 1.255766990291262e-05, "loss": 0.0212, "step": 143730 }, { "epoch": 55.82, "learning_rate": 1.255715210355987e-05, "loss": 0.0752, "step": 143740 }, { "epoch": 55.83, "learning_rate": 1.255663430420712e-05, "loss": 0.1783, "step": 143750 }, { "epoch": 55.83, "learning_rate": 1.255611650485437e-05, "loss": 0.1245, "step": 143760 }, { "epoch": 55.83, "learning_rate": 1.255559870550162e-05, "loss": 0.2137, "step": 143770 }, { "epoch": 55.84, "learning_rate": 1.2555080906148869e-05, "loss": 0.0915, "step": 143780 }, { "epoch": 55.84, "learning_rate": 1.2554563106796118e-05, "loss": 0.0804, "step": 143790 }, { "epoch": 55.84, "learning_rate": 1.2554045307443368e-05, "loss": 0.0305, "step": 143800 }, { "epoch": 55.85, "learning_rate": 1.2553527508090614e-05, "loss": 0.0047, "step": 143810 }, { "epoch": 55.85, "learning_rate": 1.2553009708737864e-05, "loss": 0.1661, "step": 143820 }, { "epoch": 55.86, "learning_rate": 1.2552491909385114e-05, "loss": 0.0009, "step": 143830 }, { "epoch": 55.86, "learning_rate": 1.2551974110032363e-05, "loss": 0.0589, "step": 143840 }, { "epoch": 55.86, "learning_rate": 1.2551456310679613e-05, "loss": 0.0319, "step": 143850 }, { "epoch": 55.87, "learning_rate": 1.2550938511326862e-05, "loss": 0.0194, "step": 143860 }, { "epoch": 55.87, "learning_rate": 1.2550420711974112e-05, "loss": 0.0272, "step": 143870 }, { "epoch": 55.88, "learning_rate": 1.2549902912621362e-05, "loss": 0.0946, "step": 143880 }, { "epoch": 55.88, "learning_rate": 1.2549385113268608e-05, "loss": 0.1634, "step": 143890 }, { "epoch": 55.88, "learning_rate": 1.2548867313915857e-05, "loss": 0.007, "step": 143900 }, { "epoch": 55.89, "learning_rate": 1.2548349514563107e-05, "loss": 0.0028, "step": 143910 }, { "epoch": 55.89, "learning_rate": 1.2547831715210357e-05, "loss": 0.0465, "step": 143920 }, { "epoch": 55.9, "learning_rate": 1.2547313915857606e-05, "loss": 0.1184, "step": 143930 }, { "epoch": 55.9, "learning_rate": 1.2546796116504856e-05, "loss": 0.0128, "step": 143940 }, { "epoch": 55.9, "learning_rate": 1.2546278317152106e-05, "loss": 0.0457, "step": 143950 }, { "epoch": 55.91, "learning_rate": 1.2545760517799355e-05, "loss": 0.1039, "step": 143960 }, { "epoch": 55.91, "learning_rate": 1.2545242718446601e-05, "loss": 0.044, "step": 143970 }, { "epoch": 55.91, "learning_rate": 1.2544724919093851e-05, "loss": 0.0427, "step": 143980 }, { "epoch": 55.92, "learning_rate": 1.25442071197411e-05, "loss": 0.0011, "step": 143990 }, { "epoch": 55.92, "learning_rate": 1.254368932038835e-05, "loss": 0.114, "step": 144000 }, { "epoch": 55.93, "learning_rate": 1.25431715210356e-05, "loss": 0.1906, "step": 144010 }, { "epoch": 55.93, "learning_rate": 1.254265372168285e-05, "loss": 0.2447, "step": 144020 }, { "epoch": 55.93, "learning_rate": 1.2542135922330099e-05, "loss": 0.068, "step": 144030 }, { "epoch": 55.94, "learning_rate": 1.2541618122977349e-05, "loss": 0.0058, "step": 144040 }, { "epoch": 55.94, "learning_rate": 1.2541100323624595e-05, "loss": 0.0769, "step": 144050 }, { "epoch": 55.95, "learning_rate": 1.2540582524271845e-05, "loss": 0.0694, "step": 144060 }, { "epoch": 55.95, "learning_rate": 1.2540064724919094e-05, "loss": 0.0312, "step": 144070 }, { "epoch": 55.95, "learning_rate": 1.2539546925566344e-05, "loss": 0.0215, "step": 144080 }, { "epoch": 55.96, "learning_rate": 1.2539029126213593e-05, "loss": 0.0828, "step": 144090 }, { "epoch": 55.96, "learning_rate": 1.2538511326860843e-05, "loss": 0.0159, "step": 144100 }, { "epoch": 55.97, "learning_rate": 1.2537993527508093e-05, "loss": 0.0943, "step": 144110 }, { "epoch": 55.97, "learning_rate": 1.2537475728155342e-05, "loss": 0.0696, "step": 144120 }, { "epoch": 55.97, "learning_rate": 1.2536957928802592e-05, "loss": 0.0791, "step": 144130 }, { "epoch": 55.98, "learning_rate": 1.2536440129449838e-05, "loss": 0.0547, "step": 144140 }, { "epoch": 55.98, "learning_rate": 1.2535922330097088e-05, "loss": 0.025, "step": 144150 }, { "epoch": 55.98, "learning_rate": 1.2535404530744337e-05, "loss": 0.2083, "step": 144160 }, { "epoch": 55.99, "learning_rate": 1.2534886731391587e-05, "loss": 0.0352, "step": 144170 }, { "epoch": 55.99, "learning_rate": 1.2534368932038837e-05, "loss": 0.0056, "step": 144180 }, { "epoch": 56.0, "learning_rate": 1.2533851132686086e-05, "loss": 0.0707, "step": 144190 }, { "epoch": 56.0, "learning_rate": 1.2533333333333336e-05, "loss": 0.013, "step": 144200 }, { "epoch": 56.0, "eval_accuracy": 0.9474552957359009, "eval_loss": 0.33862391114234924, "eval_runtime": 8.2896, "eval_samples_per_second": 438.504, "eval_steps_per_second": 54.888, "step": 144200 }, { "epoch": 56.0, "learning_rate": 1.2532815533980584e-05, "loss": 0.1405, "step": 144210 }, { "epoch": 56.01, "learning_rate": 1.2532297734627832e-05, "loss": 0.1296, "step": 144220 }, { "epoch": 56.01, "learning_rate": 1.2531779935275081e-05, "loss": 0.0601, "step": 144230 }, { "epoch": 56.02, "learning_rate": 1.2531262135922331e-05, "loss": 0.0043, "step": 144240 }, { "epoch": 56.02, "learning_rate": 1.253074433656958e-05, "loss": 0.0137, "step": 144250 }, { "epoch": 56.02, "learning_rate": 1.253022653721683e-05, "loss": 0.0477, "step": 144260 }, { "epoch": 56.03, "learning_rate": 1.252970873786408e-05, "loss": 0.0662, "step": 144270 }, { "epoch": 56.03, "learning_rate": 1.252919093851133e-05, "loss": 0.063, "step": 144280 }, { "epoch": 56.03, "learning_rate": 1.2528673139158577e-05, "loss": 0.0343, "step": 144290 }, { "epoch": 56.04, "learning_rate": 1.2528155339805825e-05, "loss": 0.2374, "step": 144300 }, { "epoch": 56.04, "learning_rate": 1.2527637540453075e-05, "loss": 0.0435, "step": 144310 }, { "epoch": 56.05, "learning_rate": 1.2527119741100325e-05, "loss": 0.0995, "step": 144320 }, { "epoch": 56.05, "learning_rate": 1.2526601941747574e-05, "loss": 0.0177, "step": 144330 }, { "epoch": 56.05, "learning_rate": 1.2526084142394824e-05, "loss": 0.1354, "step": 144340 }, { "epoch": 56.06, "learning_rate": 1.2525566343042073e-05, "loss": 0.0589, "step": 144350 }, { "epoch": 56.06, "learning_rate": 1.2525048543689321e-05, "loss": 0.0906, "step": 144360 }, { "epoch": 56.07, "learning_rate": 1.2524530744336571e-05, "loss": 0.0372, "step": 144370 }, { "epoch": 56.07, "learning_rate": 1.2524012944983819e-05, "loss": 0.0242, "step": 144380 }, { "epoch": 56.07, "learning_rate": 1.2523495145631068e-05, "loss": 0.0014, "step": 144390 }, { "epoch": 56.08, "learning_rate": 1.2522977346278318e-05, "loss": 0.0441, "step": 144400 }, { "epoch": 56.08, "learning_rate": 1.2522459546925568e-05, "loss": 0.0789, "step": 144410 }, { "epoch": 56.09, "learning_rate": 1.2521941747572817e-05, "loss": 0.0829, "step": 144420 }, { "epoch": 56.09, "learning_rate": 1.2521423948220067e-05, "loss": 0.1228, "step": 144430 }, { "epoch": 56.09, "learning_rate": 1.2520906148867315e-05, "loss": 0.0412, "step": 144440 }, { "epoch": 56.1, "learning_rate": 1.2520388349514564e-05, "loss": 0.0311, "step": 144450 }, { "epoch": 56.1, "learning_rate": 1.2519870550161812e-05, "loss": 0.2098, "step": 144460 }, { "epoch": 56.1, "learning_rate": 1.2519352750809062e-05, "loss": 0.1536, "step": 144470 }, { "epoch": 56.11, "learning_rate": 1.2518834951456312e-05, "loss": 0.0453, "step": 144480 }, { "epoch": 56.11, "learning_rate": 1.2518317152103561e-05, "loss": 0.0191, "step": 144490 }, { "epoch": 56.12, "learning_rate": 1.2517799352750811e-05, "loss": 0.098, "step": 144500 }, { "epoch": 56.12, "learning_rate": 1.2517281553398059e-05, "loss": 0.1214, "step": 144510 }, { "epoch": 56.12, "learning_rate": 1.2516763754045308e-05, "loss": 0.0882, "step": 144520 }, { "epoch": 56.13, "learning_rate": 1.2516245954692558e-05, "loss": 0.0774, "step": 144530 }, { "epoch": 56.13, "learning_rate": 1.2515728155339806e-05, "loss": 0.1478, "step": 144540 }, { "epoch": 56.14, "learning_rate": 1.2515210355987056e-05, "loss": 0.0037, "step": 144550 }, { "epoch": 56.14, "learning_rate": 1.2514692556634305e-05, "loss": 0.0398, "step": 144560 }, { "epoch": 56.14, "learning_rate": 1.2514174757281555e-05, "loss": 0.0476, "step": 144570 }, { "epoch": 56.15, "learning_rate": 1.2513656957928804e-05, "loss": 0.0338, "step": 144580 }, { "epoch": 56.15, "learning_rate": 1.2513139158576052e-05, "loss": 0.0517, "step": 144590 }, { "epoch": 56.16, "learning_rate": 1.2512621359223302e-05, "loss": 0.0303, "step": 144600 }, { "epoch": 56.16, "learning_rate": 1.2512103559870552e-05, "loss": 0.0328, "step": 144610 }, { "epoch": 56.16, "learning_rate": 1.25115857605178e-05, "loss": 0.0506, "step": 144620 }, { "epoch": 56.17, "learning_rate": 1.2511067961165049e-05, "loss": 0.0563, "step": 144630 }, { "epoch": 56.17, "learning_rate": 1.2510550161812299e-05, "loss": 0.0519, "step": 144640 }, { "epoch": 56.17, "learning_rate": 1.2510032362459548e-05, "loss": 0.1511, "step": 144650 }, { "epoch": 56.18, "learning_rate": 1.2509514563106798e-05, "loss": 0.1289, "step": 144660 }, { "epoch": 56.18, "learning_rate": 1.2508996763754046e-05, "loss": 0.001, "step": 144670 }, { "epoch": 56.19, "learning_rate": 1.2508478964401296e-05, "loss": 0.0323, "step": 144680 }, { "epoch": 56.19, "learning_rate": 1.2507961165048545e-05, "loss": 0.042, "step": 144690 }, { "epoch": 56.19, "learning_rate": 1.2507443365695795e-05, "loss": 0.0226, "step": 144700 }, { "epoch": 56.2, "learning_rate": 1.2506925566343043e-05, "loss": 0.001, "step": 144710 }, { "epoch": 56.2, "learning_rate": 1.2506407766990292e-05, "loss": 0.0659, "step": 144720 }, { "epoch": 56.21, "learning_rate": 1.2505889967637542e-05, "loss": 0.1402, "step": 144730 }, { "epoch": 56.21, "learning_rate": 1.250537216828479e-05, "loss": 0.1722, "step": 144740 }, { "epoch": 56.21, "learning_rate": 1.250485436893204e-05, "loss": 0.1299, "step": 144750 }, { "epoch": 56.22, "learning_rate": 1.2504336569579289e-05, "loss": 0.1568, "step": 144760 }, { "epoch": 56.22, "learning_rate": 1.2503818770226539e-05, "loss": 0.0164, "step": 144770 }, { "epoch": 56.23, "learning_rate": 1.2503300970873788e-05, "loss": 0.0609, "step": 144780 }, { "epoch": 56.23, "learning_rate": 1.2502783171521036e-05, "loss": 0.0121, "step": 144790 }, { "epoch": 56.23, "learning_rate": 1.2502265372168286e-05, "loss": 0.1821, "step": 144800 }, { "epoch": 56.24, "learning_rate": 1.2501747572815535e-05, "loss": 0.0383, "step": 144810 }, { "epoch": 56.24, "learning_rate": 1.2501229773462783e-05, "loss": 0.0706, "step": 144820 }, { "epoch": 56.24, "learning_rate": 1.2500711974110033e-05, "loss": 0.0097, "step": 144830 }, { "epoch": 56.25, "learning_rate": 1.2500194174757283e-05, "loss": 0.1106, "step": 144840 }, { "epoch": 56.25, "learning_rate": 1.2499676375404532e-05, "loss": 0.0537, "step": 144850 }, { "epoch": 56.26, "learning_rate": 1.2499158576051782e-05, "loss": 0.0929, "step": 144860 }, { "epoch": 56.26, "learning_rate": 1.249864077669903e-05, "loss": 0.0985, "step": 144870 }, { "epoch": 56.26, "learning_rate": 1.249812297734628e-05, "loss": 0.0469, "step": 144880 }, { "epoch": 56.27, "learning_rate": 1.2497605177993527e-05, "loss": 0.0009, "step": 144890 }, { "epoch": 56.27, "learning_rate": 1.2497087378640777e-05, "loss": 0.1414, "step": 144900 }, { "epoch": 56.28, "learning_rate": 1.2496569579288027e-05, "loss": 0.1001, "step": 144910 }, { "epoch": 56.28, "learning_rate": 1.2496051779935276e-05, "loss": 0.0518, "step": 144920 }, { "epoch": 56.28, "learning_rate": 1.2495533980582526e-05, "loss": 0.0066, "step": 144930 }, { "epoch": 56.29, "learning_rate": 1.2495016181229775e-05, "loss": 0.0523, "step": 144940 }, { "epoch": 56.29, "learning_rate": 1.2494498381877023e-05, "loss": 0.1074, "step": 144950 }, { "epoch": 56.3, "learning_rate": 1.2493980582524273e-05, "loss": 0.1033, "step": 144960 }, { "epoch": 56.3, "learning_rate": 1.2493462783171521e-05, "loss": 0.0064, "step": 144970 }, { "epoch": 56.3, "learning_rate": 1.249294498381877e-05, "loss": 0.1705, "step": 144980 }, { "epoch": 56.31, "learning_rate": 1.249242718446602e-05, "loss": 0.1462, "step": 144990 }, { "epoch": 56.31, "learning_rate": 1.249190938511327e-05, "loss": 0.0427, "step": 145000 }, { "epoch": 56.31, "learning_rate": 1.249139158576052e-05, "loss": 0.0257, "step": 145010 }, { "epoch": 56.32, "learning_rate": 1.2490873786407769e-05, "loss": 0.0991, "step": 145020 }, { "epoch": 56.32, "learning_rate": 1.2490355987055017e-05, "loss": 0.0151, "step": 145030 }, { "epoch": 56.33, "learning_rate": 1.2489838187702265e-05, "loss": 0.0776, "step": 145040 }, { "epoch": 56.33, "learning_rate": 1.2489320388349514e-05, "loss": 0.1153, "step": 145050 }, { "epoch": 56.33, "learning_rate": 1.2488802588996764e-05, "loss": 0.0734, "step": 145060 }, { "epoch": 56.34, "learning_rate": 1.2488284789644014e-05, "loss": 0.0585, "step": 145070 }, { "epoch": 56.34, "learning_rate": 1.2487766990291263e-05, "loss": 0.0278, "step": 145080 }, { "epoch": 56.35, "learning_rate": 1.2487249190938513e-05, "loss": 0.0529, "step": 145090 }, { "epoch": 56.35, "learning_rate": 1.2486731391585763e-05, "loss": 0.1558, "step": 145100 }, { "epoch": 56.35, "learning_rate": 1.248621359223301e-05, "loss": 0.1343, "step": 145110 }, { "epoch": 56.36, "learning_rate": 1.2485695792880258e-05, "loss": 0.023, "step": 145120 }, { "epoch": 56.36, "learning_rate": 1.2485177993527508e-05, "loss": 0.0238, "step": 145130 }, { "epoch": 56.37, "learning_rate": 1.2484660194174758e-05, "loss": 0.1609, "step": 145140 }, { "epoch": 56.37, "learning_rate": 1.2484142394822007e-05, "loss": 0.1522, "step": 145150 }, { "epoch": 56.37, "learning_rate": 1.2483624595469257e-05, "loss": 0.0528, "step": 145160 }, { "epoch": 56.38, "learning_rate": 1.2483106796116506e-05, "loss": 0.0823, "step": 145170 }, { "epoch": 56.38, "learning_rate": 1.2482588996763756e-05, "loss": 0.1223, "step": 145180 }, { "epoch": 56.38, "learning_rate": 1.2482071197411002e-05, "loss": 0.2386, "step": 145190 }, { "epoch": 56.39, "learning_rate": 1.2481553398058252e-05, "loss": 0.1902, "step": 145200 }, { "epoch": 56.39, "learning_rate": 1.2481035598705502e-05, "loss": 0.0928, "step": 145210 }, { "epoch": 56.4, "learning_rate": 1.2480517799352751e-05, "loss": 0.0782, "step": 145220 }, { "epoch": 56.4, "learning_rate": 1.248e-05, "loss": 0.0974, "step": 145230 }, { "epoch": 56.4, "learning_rate": 1.247948220064725e-05, "loss": 0.1363, "step": 145240 }, { "epoch": 56.41, "learning_rate": 1.24789644012945e-05, "loss": 0.0192, "step": 145250 }, { "epoch": 56.41, "learning_rate": 1.247844660194175e-05, "loss": 0.05, "step": 145260 }, { "epoch": 56.42, "learning_rate": 1.2477928802589e-05, "loss": 0.0335, "step": 145270 }, { "epoch": 56.42, "learning_rate": 1.2477411003236245e-05, "loss": 0.077, "step": 145280 }, { "epoch": 56.42, "learning_rate": 1.2476893203883495e-05, "loss": 0.0285, "step": 145290 }, { "epoch": 56.43, "learning_rate": 1.2476375404530745e-05, "loss": 0.1448, "step": 145300 }, { "epoch": 56.43, "learning_rate": 1.2475857605177994e-05, "loss": 0.0026, "step": 145310 }, { "epoch": 56.43, "learning_rate": 1.2475339805825244e-05, "loss": 0.0894, "step": 145320 }, { "epoch": 56.44, "learning_rate": 1.2474822006472494e-05, "loss": 0.125, "step": 145330 }, { "epoch": 56.44, "learning_rate": 1.2474304207119743e-05, "loss": 0.065, "step": 145340 }, { "epoch": 56.45, "learning_rate": 1.2473786407766993e-05, "loss": 0.0414, "step": 145350 }, { "epoch": 56.45, "learning_rate": 1.2473268608414239e-05, "loss": 0.0133, "step": 145360 }, { "epoch": 56.45, "learning_rate": 1.2472750809061489e-05, "loss": 0.1055, "step": 145370 }, { "epoch": 56.46, "learning_rate": 1.2472233009708738e-05, "loss": 0.0154, "step": 145380 }, { "epoch": 56.46, "learning_rate": 1.2471715210355988e-05, "loss": 0.0679, "step": 145390 }, { "epoch": 56.47, "learning_rate": 1.2471197411003238e-05, "loss": 0.0034, "step": 145400 }, { "epoch": 56.47, "learning_rate": 1.2470679611650487e-05, "loss": 0.0218, "step": 145410 }, { "epoch": 56.47, "learning_rate": 1.2470161812297737e-05, "loss": 0.2282, "step": 145420 }, { "epoch": 56.48, "learning_rate": 1.2469644012944986e-05, "loss": 0.1077, "step": 145430 }, { "epoch": 56.48, "learning_rate": 1.2469126213592233e-05, "loss": 0.0683, "step": 145440 }, { "epoch": 56.49, "learning_rate": 1.2468608414239482e-05, "loss": 0.112, "step": 145450 }, { "epoch": 56.49, "learning_rate": 1.2468090614886732e-05, "loss": 0.0016, "step": 145460 }, { "epoch": 56.49, "learning_rate": 1.2467572815533981e-05, "loss": 0.0407, "step": 145470 }, { "epoch": 56.5, "learning_rate": 1.2467055016181231e-05, "loss": 0.0764, "step": 145480 }, { "epoch": 56.5, "learning_rate": 1.246653721682848e-05, "loss": 0.0518, "step": 145490 }, { "epoch": 56.5, "learning_rate": 1.246601941747573e-05, "loss": 0.0757, "step": 145500 }, { "epoch": 56.51, "learning_rate": 1.246550161812298e-05, "loss": 0.1609, "step": 145510 }, { "epoch": 56.51, "learning_rate": 1.2464983818770226e-05, "loss": 0.0666, "step": 145520 }, { "epoch": 56.52, "learning_rate": 1.2464466019417476e-05, "loss": 0.1055, "step": 145530 }, { "epoch": 56.52, "learning_rate": 1.2463948220064725e-05, "loss": 0.0554, "step": 145540 }, { "epoch": 56.52, "learning_rate": 1.2463430420711975e-05, "loss": 0.0167, "step": 145550 }, { "epoch": 56.53, "learning_rate": 1.2462912621359225e-05, "loss": 0.0073, "step": 145560 }, { "epoch": 56.53, "learning_rate": 1.2462394822006474e-05, "loss": 0.1386, "step": 145570 }, { "epoch": 56.54, "learning_rate": 1.2461877022653724e-05, "loss": 0.1172, "step": 145580 }, { "epoch": 56.54, "learning_rate": 1.2461359223300973e-05, "loss": 0.0151, "step": 145590 }, { "epoch": 56.54, "learning_rate": 1.246084142394822e-05, "loss": 0.1412, "step": 145600 }, { "epoch": 56.55, "learning_rate": 1.246032362459547e-05, "loss": 0.005, "step": 145610 }, { "epoch": 56.55, "learning_rate": 1.2459805825242719e-05, "loss": 0.0171, "step": 145620 }, { "epoch": 56.56, "learning_rate": 1.2459288025889969e-05, "loss": 0.0075, "step": 145630 }, { "epoch": 56.56, "learning_rate": 1.2458770226537218e-05, "loss": 0.1801, "step": 145640 }, { "epoch": 56.56, "learning_rate": 1.2458252427184468e-05, "loss": 0.097, "step": 145650 }, { "epoch": 56.57, "learning_rate": 1.2457734627831717e-05, "loss": 0.0083, "step": 145660 }, { "epoch": 56.57, "learning_rate": 1.2457216828478967e-05, "loss": 0.0776, "step": 145670 }, { "epoch": 56.57, "learning_rate": 1.2456699029126213e-05, "loss": 0.0644, "step": 145680 }, { "epoch": 56.58, "learning_rate": 1.2456181229773463e-05, "loss": 0.0409, "step": 145690 }, { "epoch": 56.58, "learning_rate": 1.2455663430420713e-05, "loss": 0.0985, "step": 145700 }, { "epoch": 56.59, "learning_rate": 1.2455145631067962e-05, "loss": 0.1275, "step": 145710 }, { "epoch": 56.59, "learning_rate": 1.2454627831715212e-05, "loss": 0.1726, "step": 145720 }, { "epoch": 56.59, "learning_rate": 1.2454110032362461e-05, "loss": 0.0899, "step": 145730 }, { "epoch": 56.6, "learning_rate": 1.2453592233009711e-05, "loss": 0.1321, "step": 145740 }, { "epoch": 56.6, "learning_rate": 1.245307443365696e-05, "loss": 0.0628, "step": 145750 }, { "epoch": 56.61, "learning_rate": 1.2452556634304209e-05, "loss": 0.0192, "step": 145760 }, { "epoch": 56.61, "learning_rate": 1.2452038834951456e-05, "loss": 0.0215, "step": 145770 }, { "epoch": 56.61, "learning_rate": 1.2451521035598706e-05, "loss": 0.0792, "step": 145780 }, { "epoch": 56.62, "learning_rate": 1.2451003236245956e-05, "loss": 0.0886, "step": 145790 }, { "epoch": 56.62, "learning_rate": 1.2450485436893205e-05, "loss": 0.0013, "step": 145800 }, { "epoch": 56.63, "learning_rate": 1.2449967637540455e-05, "loss": 0.146, "step": 145810 }, { "epoch": 56.63, "learning_rate": 1.2449449838187705e-05, "loss": 0.025, "step": 145820 }, { "epoch": 56.63, "learning_rate": 1.2448932038834952e-05, "loss": 0.0378, "step": 145830 }, { "epoch": 56.64, "learning_rate": 1.2448414239482202e-05, "loss": 0.0779, "step": 145840 }, { "epoch": 56.64, "learning_rate": 1.244789644012945e-05, "loss": 0.0331, "step": 145850 }, { "epoch": 56.64, "learning_rate": 1.24473786407767e-05, "loss": 0.0143, "step": 145860 }, { "epoch": 56.65, "learning_rate": 1.244686084142395e-05, "loss": 0.1706, "step": 145870 }, { "epoch": 56.65, "learning_rate": 1.2446343042071199e-05, "loss": 0.1078, "step": 145880 }, { "epoch": 56.66, "learning_rate": 1.2445825242718448e-05, "loss": 0.0256, "step": 145890 }, { "epoch": 56.66, "learning_rate": 1.2445307443365698e-05, "loss": 0.0561, "step": 145900 }, { "epoch": 56.66, "learning_rate": 1.2444789644012946e-05, "loss": 0.0901, "step": 145910 }, { "epoch": 56.67, "learning_rate": 1.2444271844660196e-05, "loss": 0.0444, "step": 145920 }, { "epoch": 56.67, "learning_rate": 1.2443754045307444e-05, "loss": 0.0166, "step": 145930 }, { "epoch": 56.68, "learning_rate": 1.2443236245954693e-05, "loss": 0.0555, "step": 145940 }, { "epoch": 56.68, "learning_rate": 1.2442718446601943e-05, "loss": 0.1718, "step": 145950 }, { "epoch": 56.68, "learning_rate": 1.2442200647249192e-05, "loss": 0.0022, "step": 145960 }, { "epoch": 56.69, "learning_rate": 1.2441682847896442e-05, "loss": 0.1049, "step": 145970 }, { "epoch": 56.69, "learning_rate": 1.244116504854369e-05, "loss": 0.074, "step": 145980 }, { "epoch": 56.7, "learning_rate": 1.244064724919094e-05, "loss": 0.1199, "step": 145990 }, { "epoch": 56.7, "learning_rate": 1.244012944983819e-05, "loss": 0.0177, "step": 146000 }, { "epoch": 56.7, "learning_rate": 1.2439611650485437e-05, "loss": 0.0922, "step": 146010 }, { "epoch": 56.71, "learning_rate": 1.2439093851132687e-05, "loss": 0.1021, "step": 146020 }, { "epoch": 56.71, "learning_rate": 1.2438576051779936e-05, "loss": 0.1059, "step": 146030 }, { "epoch": 56.71, "learning_rate": 1.2438058252427186e-05, "loss": 0.0174, "step": 146040 }, { "epoch": 56.72, "learning_rate": 1.2437540453074436e-05, "loss": 0.0574, "step": 146050 }, { "epoch": 56.72, "learning_rate": 1.2437022653721684e-05, "loss": 0.0244, "step": 146060 }, { "epoch": 56.73, "learning_rate": 1.2436504854368933e-05, "loss": 0.0312, "step": 146070 }, { "epoch": 56.73, "learning_rate": 1.2435987055016183e-05, "loss": 0.2167, "step": 146080 }, { "epoch": 56.73, "learning_rate": 1.243546925566343e-05, "loss": 0.0303, "step": 146090 }, { "epoch": 56.74, "learning_rate": 1.243495145631068e-05, "loss": 0.0338, "step": 146100 }, { "epoch": 56.74, "learning_rate": 1.243443365695793e-05, "loss": 0.1458, "step": 146110 }, { "epoch": 56.75, "learning_rate": 1.243391585760518e-05, "loss": 0.0193, "step": 146120 }, { "epoch": 56.75, "learning_rate": 1.2433398058252427e-05, "loss": 0.052, "step": 146130 }, { "epoch": 56.75, "learning_rate": 1.2432880258899677e-05, "loss": 0.2126, "step": 146140 }, { "epoch": 56.76, "learning_rate": 1.2432362459546927e-05, "loss": 0.1479, "step": 146150 }, { "epoch": 56.76, "learning_rate": 1.2431844660194176e-05, "loss": 0.067, "step": 146160 }, { "epoch": 56.77, "learning_rate": 1.2431326860841424e-05, "loss": 0.0446, "step": 146170 }, { "epoch": 56.77, "learning_rate": 1.2430809061488674e-05, "loss": 0.0296, "step": 146180 }, { "epoch": 56.77, "learning_rate": 1.2430291262135923e-05, "loss": 0.0359, "step": 146190 }, { "epoch": 56.78, "learning_rate": 1.2429773462783173e-05, "loss": 0.0748, "step": 146200 }, { "epoch": 56.78, "learning_rate": 1.2429255663430421e-05, "loss": 0.0648, "step": 146210 }, { "epoch": 56.78, "learning_rate": 1.242873786407767e-05, "loss": 0.0905, "step": 146220 }, { "epoch": 56.79, "learning_rate": 1.242822006472492e-05, "loss": 0.1142, "step": 146230 }, { "epoch": 56.79, "learning_rate": 1.242770226537217e-05, "loss": 0.1133, "step": 146240 }, { "epoch": 56.8, "learning_rate": 1.2427184466019418e-05, "loss": 0.1787, "step": 146250 }, { "epoch": 56.8, "learning_rate": 1.2426666666666667e-05, "loss": 0.0878, "step": 146260 }, { "epoch": 56.8, "learning_rate": 1.2426148867313917e-05, "loss": 0.0912, "step": 146270 }, { "epoch": 56.81, "learning_rate": 1.2425631067961167e-05, "loss": 0.0152, "step": 146280 }, { "epoch": 56.81, "learning_rate": 1.2425113268608415e-05, "loss": 0.0843, "step": 146290 }, { "epoch": 56.82, "learning_rate": 1.2424595469255664e-05, "loss": 0.1392, "step": 146300 }, { "epoch": 56.82, "learning_rate": 1.2424077669902914e-05, "loss": 0.0503, "step": 146310 }, { "epoch": 56.82, "learning_rate": 1.2423559870550163e-05, "loss": 0.0029, "step": 146320 }, { "epoch": 56.83, "learning_rate": 1.2423042071197413e-05, "loss": 0.1003, "step": 146330 }, { "epoch": 56.83, "learning_rate": 1.2422524271844661e-05, "loss": 0.0598, "step": 146340 }, { "epoch": 56.83, "learning_rate": 1.242200647249191e-05, "loss": 0.1111, "step": 146350 }, { "epoch": 56.84, "learning_rate": 1.2421488673139159e-05, "loss": 0.0496, "step": 146360 }, { "epoch": 56.84, "learning_rate": 1.2420970873786408e-05, "loss": 0.1087, "step": 146370 }, { "epoch": 56.85, "learning_rate": 1.2420453074433658e-05, "loss": 0.2092, "step": 146380 }, { "epoch": 56.85, "learning_rate": 1.2419935275080907e-05, "loss": 0.1021, "step": 146390 }, { "epoch": 56.85, "learning_rate": 1.2419417475728157e-05, "loss": 0.0052, "step": 146400 }, { "epoch": 56.86, "learning_rate": 1.2418899676375407e-05, "loss": 0.0113, "step": 146410 }, { "epoch": 56.86, "learning_rate": 1.2418381877022655e-05, "loss": 0.27, "step": 146420 }, { "epoch": 56.87, "learning_rate": 1.2417864077669904e-05, "loss": 0.0077, "step": 146430 }, { "epoch": 56.87, "learning_rate": 1.2417346278317152e-05, "loss": 0.0268, "step": 146440 }, { "epoch": 56.87, "learning_rate": 1.2416828478964402e-05, "loss": 0.1367, "step": 146450 }, { "epoch": 56.88, "learning_rate": 1.2416310679611651e-05, "loss": 0.1242, "step": 146460 }, { "epoch": 56.88, "learning_rate": 1.2415792880258901e-05, "loss": 0.1266, "step": 146470 }, { "epoch": 56.89, "learning_rate": 1.241527508090615e-05, "loss": 0.2708, "step": 146480 }, { "epoch": 56.89, "learning_rate": 1.24147572815534e-05, "loss": 0.2052, "step": 146490 }, { "epoch": 56.89, "learning_rate": 1.2414239482200648e-05, "loss": 0.0753, "step": 146500 }, { "epoch": 56.9, "learning_rate": 1.2413721682847896e-05, "loss": 0.0331, "step": 146510 }, { "epoch": 56.9, "learning_rate": 1.2413203883495146e-05, "loss": 0.0424, "step": 146520 }, { "epoch": 56.9, "learning_rate": 1.2412686084142395e-05, "loss": 0.1217, "step": 146530 }, { "epoch": 56.91, "learning_rate": 1.2412168284789645e-05, "loss": 0.021, "step": 146540 }, { "epoch": 56.91, "learning_rate": 1.2411650485436894e-05, "loss": 0.0602, "step": 146550 }, { "epoch": 56.92, "learning_rate": 1.2411132686084144e-05, "loss": 0.0227, "step": 146560 }, { "epoch": 56.92, "learning_rate": 1.2410614886731394e-05, "loss": 0.0426, "step": 146570 }, { "epoch": 56.92, "learning_rate": 1.2410097087378642e-05, "loss": 0.0595, "step": 146580 }, { "epoch": 56.93, "learning_rate": 1.240957928802589e-05, "loss": 0.1152, "step": 146590 }, { "epoch": 56.93, "learning_rate": 1.240906148867314e-05, "loss": 0.0169, "step": 146600 }, { "epoch": 56.94, "learning_rate": 1.2408543689320389e-05, "loss": 0.0962, "step": 146610 }, { "epoch": 56.94, "learning_rate": 1.2408025889967638e-05, "loss": 0.1029, "step": 146620 }, { "epoch": 56.94, "learning_rate": 1.2407508090614888e-05, "loss": 0.0006, "step": 146630 }, { "epoch": 56.95, "learning_rate": 1.2406990291262138e-05, "loss": 0.0467, "step": 146640 }, { "epoch": 56.95, "learning_rate": 1.2406472491909387e-05, "loss": 0.0608, "step": 146650 }, { "epoch": 56.96, "learning_rate": 1.2405954692556633e-05, "loss": 0.0212, "step": 146660 }, { "epoch": 56.96, "learning_rate": 1.2405436893203883e-05, "loss": 0.1184, "step": 146670 }, { "epoch": 56.96, "learning_rate": 1.2404919093851133e-05, "loss": 0.1716, "step": 146680 }, { "epoch": 56.97, "learning_rate": 1.2404401294498382e-05, "loss": 0.1621, "step": 146690 }, { "epoch": 56.97, "learning_rate": 1.2403883495145632e-05, "loss": 0.0233, "step": 146700 }, { "epoch": 56.97, "learning_rate": 1.2403365695792882e-05, "loss": 0.011, "step": 146710 }, { "epoch": 56.98, "learning_rate": 1.2402847896440131e-05, "loss": 0.0376, "step": 146720 }, { "epoch": 56.98, "learning_rate": 1.240233009708738e-05, "loss": 0.1659, "step": 146730 }, { "epoch": 56.99, "learning_rate": 1.2401812297734627e-05, "loss": 0.0811, "step": 146740 }, { "epoch": 56.99, "learning_rate": 1.2401294498381877e-05, "loss": 0.1475, "step": 146750 }, { "epoch": 56.99, "learning_rate": 1.2400776699029126e-05, "loss": 0.0554, "step": 146760 }, { "epoch": 57.0, "learning_rate": 1.2400258899676376e-05, "loss": 0.0488, "step": 146770 }, { "epoch": 57.0, "eval_accuracy": 0.9504814305364512, "eval_loss": 0.32903438806533813, "eval_runtime": 8.2078, "eval_samples_per_second": 442.874, "eval_steps_per_second": 55.435, "step": 146775 }, { "epoch": 57.0, "learning_rate": 1.2399741100323626e-05, "loss": 0.0551, "step": 146780 }, { "epoch": 57.01, "learning_rate": 1.2399223300970875e-05, "loss": 0.0517, "step": 146790 }, { "epoch": 57.01, "learning_rate": 1.2398705501618125e-05, "loss": 0.0426, "step": 146800 }, { "epoch": 57.01, "learning_rate": 1.2398187702265374e-05, "loss": 0.0879, "step": 146810 }, { "epoch": 57.02, "learning_rate": 1.239766990291262e-05, "loss": 0.0139, "step": 146820 }, { "epoch": 57.02, "learning_rate": 1.239715210355987e-05, "loss": 0.0321, "step": 146830 }, { "epoch": 57.03, "learning_rate": 1.239663430420712e-05, "loss": 0.1205, "step": 146840 }, { "epoch": 57.03, "learning_rate": 1.239611650485437e-05, "loss": 0.0827, "step": 146850 }, { "epoch": 57.03, "learning_rate": 1.2395598705501619e-05, "loss": 0.128, "step": 146860 }, { "epoch": 57.04, "learning_rate": 1.2395080906148869e-05, "loss": 0.1091, "step": 146870 }, { "epoch": 57.04, "learning_rate": 1.2394563106796118e-05, "loss": 0.0685, "step": 146880 }, { "epoch": 57.04, "learning_rate": 1.2394045307443368e-05, "loss": 0.0735, "step": 146890 }, { "epoch": 57.05, "learning_rate": 1.2393527508090618e-05, "loss": 0.0916, "step": 146900 }, { "epoch": 57.05, "learning_rate": 1.2393009708737864e-05, "loss": 0.0854, "step": 146910 }, { "epoch": 57.06, "learning_rate": 1.2392491909385113e-05, "loss": 0.0436, "step": 146920 }, { "epoch": 57.06, "learning_rate": 1.2391974110032363e-05, "loss": 0.0356, "step": 146930 }, { "epoch": 57.06, "learning_rate": 1.2391456310679613e-05, "loss": 0.0068, "step": 146940 }, { "epoch": 57.07, "learning_rate": 1.2390938511326862e-05, "loss": 0.0888, "step": 146950 }, { "epoch": 57.07, "learning_rate": 1.2390420711974112e-05, "loss": 0.0386, "step": 146960 }, { "epoch": 57.08, "learning_rate": 1.2389902912621361e-05, "loss": 0.0307, "step": 146970 }, { "epoch": 57.08, "learning_rate": 1.2389385113268611e-05, "loss": 0.0005, "step": 146980 }, { "epoch": 57.08, "learning_rate": 1.2388867313915857e-05, "loss": 0.2216, "step": 146990 }, { "epoch": 57.09, "learning_rate": 1.2388349514563107e-05, "loss": 0.0588, "step": 147000 }, { "epoch": 57.09, "learning_rate": 1.2387831715210357e-05, "loss": 0.1029, "step": 147010 }, { "epoch": 57.1, "learning_rate": 1.2387313915857606e-05, "loss": 0.0778, "step": 147020 }, { "epoch": 57.1, "learning_rate": 1.2386796116504856e-05, "loss": 0.1957, "step": 147030 }, { "epoch": 57.1, "learning_rate": 1.2386278317152105e-05, "loss": 0.0219, "step": 147040 }, { "epoch": 57.11, "learning_rate": 1.2385760517799355e-05, "loss": 0.0901, "step": 147050 }, { "epoch": 57.11, "learning_rate": 1.2385242718446605e-05, "loss": 0.0261, "step": 147060 }, { "epoch": 57.11, "learning_rate": 1.2384724919093851e-05, "loss": 0.1307, "step": 147070 }, { "epoch": 57.12, "learning_rate": 1.23842071197411e-05, "loss": 0.0759, "step": 147080 }, { "epoch": 57.12, "learning_rate": 1.238368932038835e-05, "loss": 0.0764, "step": 147090 }, { "epoch": 57.13, "learning_rate": 1.23831715210356e-05, "loss": 0.0933, "step": 147100 }, { "epoch": 57.13, "learning_rate": 1.238265372168285e-05, "loss": 0.0311, "step": 147110 }, { "epoch": 57.13, "learning_rate": 1.2382135922330099e-05, "loss": 0.0746, "step": 147120 }, { "epoch": 57.14, "learning_rate": 1.2381618122977349e-05, "loss": 0.0524, "step": 147130 }, { "epoch": 57.14, "learning_rate": 1.2381100323624598e-05, "loss": 0.047, "step": 147140 }, { "epoch": 57.15, "learning_rate": 1.2380582524271844e-05, "loss": 0.0625, "step": 147150 }, { "epoch": 57.15, "learning_rate": 1.2380064724919094e-05, "loss": 0.0667, "step": 147160 }, { "epoch": 57.15, "learning_rate": 1.2379546925566344e-05, "loss": 0.0895, "step": 147170 }, { "epoch": 57.16, "learning_rate": 1.2379029126213593e-05, "loss": 0.1018, "step": 147180 }, { "epoch": 57.16, "learning_rate": 1.2378511326860843e-05, "loss": 0.099, "step": 147190 }, { "epoch": 57.17, "learning_rate": 1.2377993527508093e-05, "loss": 0.1472, "step": 147200 }, { "epoch": 57.17, "learning_rate": 1.2377475728155342e-05, "loss": 0.0174, "step": 147210 }, { "epoch": 57.17, "learning_rate": 1.2376957928802592e-05, "loss": 0.0945, "step": 147220 }, { "epoch": 57.18, "learning_rate": 1.2376440129449838e-05, "loss": 0.116, "step": 147230 }, { "epoch": 57.18, "learning_rate": 1.2375922330097088e-05, "loss": 0.0737, "step": 147240 }, { "epoch": 57.18, "learning_rate": 1.2375404530744337e-05, "loss": 0.1798, "step": 147250 }, { "epoch": 57.19, "learning_rate": 1.2374886731391587e-05, "loss": 0.0071, "step": 147260 }, { "epoch": 57.19, "learning_rate": 1.2374368932038836e-05, "loss": 0.0407, "step": 147270 }, { "epoch": 57.2, "learning_rate": 1.2373851132686086e-05, "loss": 0.0915, "step": 147280 }, { "epoch": 57.2, "learning_rate": 1.2373333333333336e-05, "loss": 0.0086, "step": 147290 }, { "epoch": 57.2, "learning_rate": 1.2372815533980584e-05, "loss": 0.0395, "step": 147300 }, { "epoch": 57.21, "learning_rate": 1.2372297734627832e-05, "loss": 0.1053, "step": 147310 }, { "epoch": 57.21, "learning_rate": 1.2371779935275081e-05, "loss": 0.0202, "step": 147320 }, { "epoch": 57.22, "learning_rate": 1.237126213592233e-05, "loss": 0.1365, "step": 147330 }, { "epoch": 57.22, "learning_rate": 1.237074433656958e-05, "loss": 0.0012, "step": 147340 }, { "epoch": 57.22, "learning_rate": 1.237022653721683e-05, "loss": 0.0707, "step": 147350 }, { "epoch": 57.23, "learning_rate": 1.236970873786408e-05, "loss": 0.1063, "step": 147360 }, { "epoch": 57.23, "learning_rate": 1.236919093851133e-05, "loss": 0.1231, "step": 147370 }, { "epoch": 57.23, "learning_rate": 1.2368673139158577e-05, "loss": 0.0537, "step": 147380 }, { "epoch": 57.24, "learning_rate": 1.2368155339805825e-05, "loss": 0.0207, "step": 147390 }, { "epoch": 57.24, "learning_rate": 1.2367637540453075e-05, "loss": 0.1228, "step": 147400 }, { "epoch": 57.25, "learning_rate": 1.2367119741100324e-05, "loss": 0.0431, "step": 147410 }, { "epoch": 57.25, "learning_rate": 1.2366601941747574e-05, "loss": 0.1329, "step": 147420 }, { "epoch": 57.25, "learning_rate": 1.2366084142394824e-05, "loss": 0.0296, "step": 147430 }, { "epoch": 57.26, "learning_rate": 1.2365566343042073e-05, "loss": 0.0105, "step": 147440 }, { "epoch": 57.26, "learning_rate": 1.2365048543689321e-05, "loss": 0.1017, "step": 147450 }, { "epoch": 57.27, "learning_rate": 1.236453074433657e-05, "loss": 0.0009, "step": 147460 }, { "epoch": 57.27, "learning_rate": 1.236401294498382e-05, "loss": 0.0672, "step": 147470 }, { "epoch": 57.27, "learning_rate": 1.2363495145631068e-05, "loss": 0.1194, "step": 147480 }, { "epoch": 57.28, "learning_rate": 1.2362977346278318e-05, "loss": 0.0188, "step": 147490 }, { "epoch": 57.28, "learning_rate": 1.2362459546925568e-05, "loss": 0.0401, "step": 147500 }, { "epoch": 57.29, "learning_rate": 1.2361941747572817e-05, "loss": 0.1037, "step": 147510 }, { "epoch": 57.29, "learning_rate": 1.2361423948220067e-05, "loss": 0.037, "step": 147520 }, { "epoch": 57.29, "learning_rate": 1.2360906148867315e-05, "loss": 0.0926, "step": 147530 }, { "epoch": 57.3, "learning_rate": 1.2360388349514564e-05, "loss": 0.0182, "step": 147540 }, { "epoch": 57.3, "learning_rate": 1.2359870550161814e-05, "loss": 0.0561, "step": 147550 }, { "epoch": 57.3, "learning_rate": 1.2359352750809062e-05, "loss": 0.024, "step": 147560 }, { "epoch": 57.31, "learning_rate": 1.2358834951456311e-05, "loss": 0.0125, "step": 147570 }, { "epoch": 57.31, "learning_rate": 1.2358317152103561e-05, "loss": 0.1062, "step": 147580 }, { "epoch": 57.32, "learning_rate": 1.235779935275081e-05, "loss": 0.0931, "step": 147590 }, { "epoch": 57.32, "learning_rate": 1.2357281553398059e-05, "loss": 0.0292, "step": 147600 }, { "epoch": 57.32, "learning_rate": 1.2356763754045308e-05, "loss": 0.0671, "step": 147610 }, { "epoch": 57.33, "learning_rate": 1.2356245954692558e-05, "loss": 0.0012, "step": 147620 }, { "epoch": 57.33, "learning_rate": 1.2355728155339807e-05, "loss": 0.0211, "step": 147630 }, { "epoch": 57.34, "learning_rate": 1.2355210355987055e-05, "loss": 0.0436, "step": 147640 }, { "epoch": 57.34, "learning_rate": 1.2354692556634305e-05, "loss": 0.0404, "step": 147650 }, { "epoch": 57.34, "learning_rate": 1.2354174757281555e-05, "loss": 0.1809, "step": 147660 }, { "epoch": 57.35, "learning_rate": 1.2353656957928804e-05, "loss": 0.1913, "step": 147670 }, { "epoch": 57.35, "learning_rate": 1.2353139158576052e-05, "loss": 0.1422, "step": 147680 }, { "epoch": 57.36, "learning_rate": 1.2352621359223302e-05, "loss": 0.1429, "step": 147690 }, { "epoch": 57.36, "learning_rate": 1.2352103559870551e-05, "loss": 0.0068, "step": 147700 }, { "epoch": 57.36, "learning_rate": 1.2351585760517801e-05, "loss": 0.1333, "step": 147710 }, { "epoch": 57.37, "learning_rate": 1.2351067961165049e-05, "loss": 0.0336, "step": 147720 }, { "epoch": 57.37, "learning_rate": 1.2350550161812299e-05, "loss": 0.0035, "step": 147730 }, { "epoch": 57.37, "learning_rate": 1.2350032362459548e-05, "loss": 0.046, "step": 147740 }, { "epoch": 57.38, "learning_rate": 1.2349514563106798e-05, "loss": 0.0635, "step": 147750 }, { "epoch": 57.38, "learning_rate": 1.2348996763754046e-05, "loss": 0.0425, "step": 147760 }, { "epoch": 57.39, "learning_rate": 1.2348478964401295e-05, "loss": 0.1155, "step": 147770 }, { "epoch": 57.39, "learning_rate": 1.2347961165048545e-05, "loss": 0.1112, "step": 147780 }, { "epoch": 57.39, "learning_rate": 1.2347443365695795e-05, "loss": 0.1296, "step": 147790 }, { "epoch": 57.4, "learning_rate": 1.2346925566343043e-05, "loss": 0.167, "step": 147800 }, { "epoch": 57.4, "learning_rate": 1.2346407766990292e-05, "loss": 0.0332, "step": 147810 }, { "epoch": 57.41, "learning_rate": 1.2345889967637542e-05, "loss": 0.0608, "step": 147820 }, { "epoch": 57.41, "learning_rate": 1.234537216828479e-05, "loss": 0.1475, "step": 147830 }, { "epoch": 57.41, "learning_rate": 1.234485436893204e-05, "loss": 0.0088, "step": 147840 }, { "epoch": 57.42, "learning_rate": 1.2344336569579289e-05, "loss": 0.0875, "step": 147850 }, { "epoch": 57.42, "learning_rate": 1.2343818770226539e-05, "loss": 0.084, "step": 147860 }, { "epoch": 57.43, "learning_rate": 1.2343300970873788e-05, "loss": 0.02, "step": 147870 }, { "epoch": 57.43, "learning_rate": 1.2342783171521036e-05, "loss": 0.0574, "step": 147880 }, { "epoch": 57.43, "learning_rate": 1.2342265372168286e-05, "loss": 0.0008, "step": 147890 }, { "epoch": 57.44, "learning_rate": 1.2341747572815535e-05, "loss": 0.0327, "step": 147900 }, { "epoch": 57.44, "learning_rate": 1.2341229773462783e-05, "loss": 0.0745, "step": 147910 }, { "epoch": 57.44, "learning_rate": 1.2340711974110033e-05, "loss": 0.1114, "step": 147920 }, { "epoch": 57.45, "learning_rate": 1.2340194174757282e-05, "loss": 0.0494, "step": 147930 }, { "epoch": 57.45, "learning_rate": 1.2339676375404532e-05, "loss": 0.1438, "step": 147940 }, { "epoch": 57.46, "learning_rate": 1.2339158576051782e-05, "loss": 0.089, "step": 147950 }, { "epoch": 57.46, "learning_rate": 1.233864077669903e-05, "loss": 0.0908, "step": 147960 }, { "epoch": 57.46, "learning_rate": 1.233812297734628e-05, "loss": 0.245, "step": 147970 }, { "epoch": 57.47, "learning_rate": 1.2337605177993527e-05, "loss": 0.1421, "step": 147980 }, { "epoch": 57.47, "learning_rate": 1.2337087378640777e-05, "loss": 0.0779, "step": 147990 }, { "epoch": 57.48, "learning_rate": 1.2336569579288026e-05, "loss": 0.1637, "step": 148000 }, { "epoch": 57.48, "learning_rate": 1.2336051779935276e-05, "loss": 0.1809, "step": 148010 }, { "epoch": 57.48, "learning_rate": 1.2335533980582526e-05, "loss": 0.06, "step": 148020 }, { "epoch": 57.49, "learning_rate": 1.2335016181229775e-05, "loss": 0.0515, "step": 148030 }, { "epoch": 57.49, "learning_rate": 1.2334498381877025e-05, "loss": 0.0923, "step": 148040 }, { "epoch": 57.5, "learning_rate": 1.2333980582524273e-05, "loss": 0.0811, "step": 148050 }, { "epoch": 57.5, "learning_rate": 1.233346278317152e-05, "loss": 0.0508, "step": 148060 }, { "epoch": 57.5, "learning_rate": 1.233294498381877e-05, "loss": 0.1234, "step": 148070 }, { "epoch": 57.51, "learning_rate": 1.233242718446602e-05, "loss": 0.0969, "step": 148080 }, { "epoch": 57.51, "learning_rate": 1.233190938511327e-05, "loss": 0.0428, "step": 148090 }, { "epoch": 57.51, "learning_rate": 1.233139158576052e-05, "loss": 0.0189, "step": 148100 }, { "epoch": 57.52, "learning_rate": 1.2330873786407769e-05, "loss": 0.1193, "step": 148110 }, { "epoch": 57.52, "learning_rate": 1.2330355987055018e-05, "loss": 0.1869, "step": 148120 }, { "epoch": 57.53, "learning_rate": 1.2329838187702265e-05, "loss": 0.1211, "step": 148130 }, { "epoch": 57.53, "learning_rate": 1.2329320388349514e-05, "loss": 0.1472, "step": 148140 }, { "epoch": 57.53, "learning_rate": 1.2328802588996764e-05, "loss": 0.0072, "step": 148150 }, { "epoch": 57.54, "learning_rate": 1.2328284789644014e-05, "loss": 0.0161, "step": 148160 }, { "epoch": 57.54, "learning_rate": 1.2327766990291263e-05, "loss": 0.104, "step": 148170 }, { "epoch": 57.55, "learning_rate": 1.2327249190938513e-05, "loss": 0.2126, "step": 148180 }, { "epoch": 57.55, "learning_rate": 1.2326731391585762e-05, "loss": 0.0521, "step": 148190 }, { "epoch": 57.55, "learning_rate": 1.2326213592233012e-05, "loss": 0.1855, "step": 148200 }, { "epoch": 57.56, "learning_rate": 1.2325695792880258e-05, "loss": 0.0114, "step": 148210 }, { "epoch": 57.56, "learning_rate": 1.2325177993527508e-05, "loss": 0.1272, "step": 148220 }, { "epoch": 57.57, "learning_rate": 1.2324660194174757e-05, "loss": 0.148, "step": 148230 }, { "epoch": 57.57, "learning_rate": 1.2324142394822007e-05, "loss": 0.0155, "step": 148240 }, { "epoch": 57.57, "learning_rate": 1.2323624595469257e-05, "loss": 0.0602, "step": 148250 }, { "epoch": 57.58, "learning_rate": 1.2323106796116506e-05, "loss": 0.1456, "step": 148260 }, { "epoch": 57.58, "learning_rate": 1.2322588996763756e-05, "loss": 0.0952, "step": 148270 }, { "epoch": 57.58, "learning_rate": 1.2322071197411006e-05, "loss": 0.0098, "step": 148280 }, { "epoch": 57.59, "learning_rate": 1.2321553398058252e-05, "loss": 0.1108, "step": 148290 }, { "epoch": 57.59, "learning_rate": 1.2321035598705501e-05, "loss": 0.1067, "step": 148300 }, { "epoch": 57.6, "learning_rate": 1.2320517799352751e-05, "loss": 0.0641, "step": 148310 }, { "epoch": 57.6, "learning_rate": 1.232e-05, "loss": 0.202, "step": 148320 }, { "epoch": 57.6, "learning_rate": 1.231948220064725e-05, "loss": 0.1799, "step": 148330 }, { "epoch": 57.61, "learning_rate": 1.23189644012945e-05, "loss": 0.1125, "step": 148340 }, { "epoch": 57.61, "learning_rate": 1.231844660194175e-05, "loss": 0.0217, "step": 148350 }, { "epoch": 57.62, "learning_rate": 1.2317928802588999e-05, "loss": 0.1807, "step": 148360 }, { "epoch": 57.62, "learning_rate": 1.2317411003236245e-05, "loss": 0.0004, "step": 148370 }, { "epoch": 57.62, "learning_rate": 1.2316893203883495e-05, "loss": 0.0039, "step": 148380 }, { "epoch": 57.63, "learning_rate": 1.2316375404530745e-05, "loss": 0.0318, "step": 148390 }, { "epoch": 57.63, "learning_rate": 1.2315857605177994e-05, "loss": 0.0737, "step": 148400 }, { "epoch": 57.63, "learning_rate": 1.2315339805825244e-05, "loss": 0.0192, "step": 148410 }, { "epoch": 57.64, "learning_rate": 1.2314822006472493e-05, "loss": 0.0611, "step": 148420 }, { "epoch": 57.64, "learning_rate": 1.2314304207119743e-05, "loss": 0.1463, "step": 148430 }, { "epoch": 57.65, "learning_rate": 1.2313786407766993e-05, "loss": 0.0191, "step": 148440 }, { "epoch": 57.65, "learning_rate": 1.2313268608414239e-05, "loss": 0.0878, "step": 148450 }, { "epoch": 57.65, "learning_rate": 1.2312750809061489e-05, "loss": 0.0754, "step": 148460 }, { "epoch": 57.66, "learning_rate": 1.2312233009708738e-05, "loss": 0.1074, "step": 148470 }, { "epoch": 57.66, "learning_rate": 1.2311715210355988e-05, "loss": 0.06, "step": 148480 }, { "epoch": 57.67, "learning_rate": 1.2311197411003237e-05, "loss": 0.0649, "step": 148490 }, { "epoch": 57.67, "learning_rate": 1.2310679611650487e-05, "loss": 0.0267, "step": 148500 }, { "epoch": 57.67, "learning_rate": 1.2310161812297737e-05, "loss": 0.0227, "step": 148510 }, { "epoch": 57.68, "learning_rate": 1.2309644012944986e-05, "loss": 0.1277, "step": 148520 }, { "epoch": 57.68, "learning_rate": 1.2309126213592232e-05, "loss": 0.0187, "step": 148530 }, { "epoch": 57.69, "learning_rate": 1.2308608414239482e-05, "loss": 0.0385, "step": 148540 }, { "epoch": 57.69, "learning_rate": 1.2308090614886732e-05, "loss": 0.1231, "step": 148550 }, { "epoch": 57.69, "learning_rate": 1.2307572815533981e-05, "loss": 0.0289, "step": 148560 }, { "epoch": 57.7, "learning_rate": 1.2307055016181231e-05, "loss": 0.0701, "step": 148570 }, { "epoch": 57.7, "learning_rate": 1.230653721682848e-05, "loss": 0.062, "step": 148580 }, { "epoch": 57.7, "learning_rate": 1.230601941747573e-05, "loss": 0.0124, "step": 148590 }, { "epoch": 57.71, "learning_rate": 1.230550161812298e-05, "loss": 0.0185, "step": 148600 }, { "epoch": 57.71, "learning_rate": 1.230498381877023e-05, "loss": 0.1391, "step": 148610 }, { "epoch": 57.72, "learning_rate": 1.2304466019417476e-05, "loss": 0.1234, "step": 148620 }, { "epoch": 57.72, "learning_rate": 1.2303948220064725e-05, "loss": 0.0124, "step": 148630 }, { "epoch": 57.72, "learning_rate": 1.2303430420711975e-05, "loss": 0.1425, "step": 148640 }, { "epoch": 57.73, "learning_rate": 1.2302912621359224e-05, "loss": 0.0748, "step": 148650 }, { "epoch": 57.73, "learning_rate": 1.2302394822006474e-05, "loss": 0.06, "step": 148660 }, { "epoch": 57.74, "learning_rate": 1.2301877022653724e-05, "loss": 0.1168, "step": 148670 }, { "epoch": 57.74, "learning_rate": 1.2301359223300973e-05, "loss": 0.2276, "step": 148680 }, { "epoch": 57.74, "learning_rate": 1.2300841423948223e-05, "loss": 0.1543, "step": 148690 }, { "epoch": 57.75, "learning_rate": 1.230032362459547e-05, "loss": 0.0416, "step": 148700 }, { "epoch": 57.75, "learning_rate": 1.2299805825242719e-05, "loss": 0.0442, "step": 148710 }, { "epoch": 57.76, "learning_rate": 1.2299288025889968e-05, "loss": 0.1606, "step": 148720 }, { "epoch": 57.76, "learning_rate": 1.2298770226537218e-05, "loss": 0.0773, "step": 148730 }, { "epoch": 57.76, "learning_rate": 1.2298252427184468e-05, "loss": 0.0958, "step": 148740 }, { "epoch": 57.77, "learning_rate": 1.2297734627831717e-05, "loss": 0.0862, "step": 148750 }, { "epoch": 57.77, "learning_rate": 1.2297216828478967e-05, "loss": 0.0232, "step": 148760 }, { "epoch": 57.77, "learning_rate": 1.2296699029126215e-05, "loss": 0.0635, "step": 148770 }, { "epoch": 57.78, "learning_rate": 1.2296181229773463e-05, "loss": 0.1017, "step": 148780 }, { "epoch": 57.78, "learning_rate": 1.2295663430420712e-05, "loss": 0.0833, "step": 148790 }, { "epoch": 57.79, "learning_rate": 1.2295145631067962e-05, "loss": 0.1616, "step": 148800 }, { "epoch": 57.79, "learning_rate": 1.2294627831715212e-05, "loss": 0.0854, "step": 148810 }, { "epoch": 57.79, "learning_rate": 1.2294110032362461e-05, "loss": 0.083, "step": 148820 }, { "epoch": 57.8, "learning_rate": 1.229359223300971e-05, "loss": 0.2582, "step": 148830 }, { "epoch": 57.8, "learning_rate": 1.229307443365696e-05, "loss": 0.0247, "step": 148840 }, { "epoch": 57.81, "learning_rate": 1.2292556634304208e-05, "loss": 0.202, "step": 148850 }, { "epoch": 57.81, "learning_rate": 1.2292038834951456e-05, "loss": 0.1219, "step": 148860 }, { "epoch": 57.81, "learning_rate": 1.2291521035598706e-05, "loss": 0.0037, "step": 148870 }, { "epoch": 57.82, "learning_rate": 1.2291003236245956e-05, "loss": 0.0724, "step": 148880 }, { "epoch": 57.82, "learning_rate": 1.2290485436893205e-05, "loss": 0.0477, "step": 148890 }, { "epoch": 57.83, "learning_rate": 1.2289967637540455e-05, "loss": 0.0879, "step": 148900 }, { "epoch": 57.83, "learning_rate": 1.2289449838187704e-05, "loss": 0.0295, "step": 148910 }, { "epoch": 57.83, "learning_rate": 1.2288932038834952e-05, "loss": 0.0703, "step": 148920 }, { "epoch": 57.84, "learning_rate": 1.2288414239482202e-05, "loss": 0.0243, "step": 148930 }, { "epoch": 57.84, "learning_rate": 1.228789644012945e-05, "loss": 0.0455, "step": 148940 }, { "epoch": 57.84, "learning_rate": 1.22873786407767e-05, "loss": 0.0457, "step": 148950 }, { "epoch": 57.85, "learning_rate": 1.2286860841423949e-05, "loss": 0.0269, "step": 148960 }, { "epoch": 57.85, "learning_rate": 1.2286343042071199e-05, "loss": 0.1555, "step": 148970 }, { "epoch": 57.86, "learning_rate": 1.2285825242718448e-05, "loss": 0.1088, "step": 148980 }, { "epoch": 57.86, "learning_rate": 1.2285307443365698e-05, "loss": 0.0012, "step": 148990 }, { "epoch": 57.86, "learning_rate": 1.2284789644012946e-05, "loss": 0.1071, "step": 149000 }, { "epoch": 57.87, "learning_rate": 1.2284271844660195e-05, "loss": 0.0409, "step": 149010 }, { "epoch": 57.87, "learning_rate": 1.2283754045307443e-05, "loss": 0.0181, "step": 149020 }, { "epoch": 57.88, "learning_rate": 1.2283236245954693e-05, "loss": 0.0016, "step": 149030 }, { "epoch": 57.88, "learning_rate": 1.2282718446601943e-05, "loss": 0.0277, "step": 149040 }, { "epoch": 57.88, "learning_rate": 1.2282200647249192e-05, "loss": 0.1081, "step": 149050 }, { "epoch": 57.89, "learning_rate": 1.2281682847896442e-05, "loss": 0.0503, "step": 149060 }, { "epoch": 57.89, "learning_rate": 1.228116504854369e-05, "loss": 0.0805, "step": 149070 }, { "epoch": 57.9, "learning_rate": 1.228064724919094e-05, "loss": 0.1656, "step": 149080 }, { "epoch": 57.9, "learning_rate": 1.2280129449838189e-05, "loss": 0.1282, "step": 149090 }, { "epoch": 57.9, "learning_rate": 1.2279611650485437e-05, "loss": 0.0959, "step": 149100 }, { "epoch": 57.91, "learning_rate": 1.2279093851132687e-05, "loss": 0.0473, "step": 149110 }, { "epoch": 57.91, "learning_rate": 1.2278576051779936e-05, "loss": 0.0146, "step": 149120 }, { "epoch": 57.91, "learning_rate": 1.2278058252427186e-05, "loss": 0.0029, "step": 149130 }, { "epoch": 57.92, "learning_rate": 1.2277540453074435e-05, "loss": 0.1864, "step": 149140 }, { "epoch": 57.92, "learning_rate": 1.2277022653721683e-05, "loss": 0.0448, "step": 149150 }, { "epoch": 57.93, "learning_rate": 1.2276504854368933e-05, "loss": 0.243, "step": 149160 }, { "epoch": 57.93, "learning_rate": 1.2275987055016183e-05, "loss": 0.0515, "step": 149170 }, { "epoch": 57.93, "learning_rate": 1.2275469255663432e-05, "loss": 0.0092, "step": 149180 }, { "epoch": 57.94, "learning_rate": 1.227495145631068e-05, "loss": 0.0487, "step": 149190 }, { "epoch": 57.94, "learning_rate": 1.227443365695793e-05, "loss": 0.0298, "step": 149200 }, { "epoch": 57.95, "learning_rate": 1.227391585760518e-05, "loss": 0.0452, "step": 149210 }, { "epoch": 57.95, "learning_rate": 1.2273398058252429e-05, "loss": 0.097, "step": 149220 }, { "epoch": 57.95, "learning_rate": 1.2272880258899677e-05, "loss": 0.1084, "step": 149230 }, { "epoch": 57.96, "learning_rate": 1.2272362459546927e-05, "loss": 0.116, "step": 149240 }, { "epoch": 57.96, "learning_rate": 1.2271844660194176e-05, "loss": 0.0763, "step": 149250 }, { "epoch": 57.97, "learning_rate": 1.2271326860841426e-05, "loss": 0.2194, "step": 149260 }, { "epoch": 57.97, "learning_rate": 1.2270809061488674e-05, "loss": 0.0764, "step": 149270 }, { "epoch": 57.97, "learning_rate": 1.2270291262135923e-05, "loss": 0.0758, "step": 149280 }, { "epoch": 57.98, "learning_rate": 1.2269773462783173e-05, "loss": 0.0531, "step": 149290 }, { "epoch": 57.98, "learning_rate": 1.226925566343042e-05, "loss": 0.0161, "step": 149300 }, { "epoch": 57.98, "learning_rate": 1.226873786407767e-05, "loss": 0.1997, "step": 149310 }, { "epoch": 57.99, "learning_rate": 1.226822006472492e-05, "loss": 0.2084, "step": 149320 }, { "epoch": 57.99, "learning_rate": 1.226770226537217e-05, "loss": 0.0213, "step": 149330 }, { "epoch": 58.0, "learning_rate": 1.226718446601942e-05, "loss": 0.011, "step": 149340 }, { "epoch": 58.0, "learning_rate": 1.2266666666666667e-05, "loss": 0.0514, "step": 149350 }, { "epoch": 58.0, "eval_accuracy": 0.9535075653370014, "eval_loss": 0.3125966489315033, "eval_runtime": 8.2099, "eval_samples_per_second": 442.757, "eval_steps_per_second": 55.421, "step": 149350 }, { "epoch": 58.0, "learning_rate": 1.2266148867313917e-05, "loss": 0.0969, "step": 149360 }, { "epoch": 58.01, "learning_rate": 1.2265631067961166e-05, "loss": 0.0924, "step": 149370 }, { "epoch": 58.01, "learning_rate": 1.2265113268608414e-05, "loss": 0.0071, "step": 149380 }, { "epoch": 58.02, "learning_rate": 1.2264595469255664e-05, "loss": 0.1166, "step": 149390 }, { "epoch": 58.02, "learning_rate": 1.2264077669902914e-05, "loss": 0.0257, "step": 149400 }, { "epoch": 58.02, "learning_rate": 1.2263559870550163e-05, "loss": 0.1404, "step": 149410 }, { "epoch": 58.03, "learning_rate": 1.2263042071197413e-05, "loss": 0.0689, "step": 149420 }, { "epoch": 58.03, "learning_rate": 1.226252427184466e-05, "loss": 0.0527, "step": 149430 }, { "epoch": 58.03, "learning_rate": 1.226200647249191e-05, "loss": 0.0676, "step": 149440 }, { "epoch": 58.04, "learning_rate": 1.2261488673139158e-05, "loss": 0.1099, "step": 149450 }, { "epoch": 58.04, "learning_rate": 1.2260970873786408e-05, "loss": 0.0875, "step": 149460 }, { "epoch": 58.05, "learning_rate": 1.2260453074433658e-05, "loss": 0.0969, "step": 149470 }, { "epoch": 58.05, "learning_rate": 1.2259935275080907e-05, "loss": 0.0574, "step": 149480 }, { "epoch": 58.05, "learning_rate": 1.2259417475728157e-05, "loss": 0.062, "step": 149490 }, { "epoch": 58.06, "learning_rate": 1.2258899676375406e-05, "loss": 0.0681, "step": 149500 }, { "epoch": 58.06, "learning_rate": 1.2258381877022654e-05, "loss": 0.0496, "step": 149510 }, { "epoch": 58.07, "learning_rate": 1.2257864077669904e-05, "loss": 0.0508, "step": 149520 }, { "epoch": 58.07, "learning_rate": 1.2257346278317152e-05, "loss": 0.0015, "step": 149530 }, { "epoch": 58.07, "learning_rate": 1.2256828478964402e-05, "loss": 0.2114, "step": 149540 }, { "epoch": 58.08, "learning_rate": 1.2256310679611651e-05, "loss": 0.0917, "step": 149550 }, { "epoch": 58.08, "learning_rate": 1.22557928802589e-05, "loss": 0.1207, "step": 149560 }, { "epoch": 58.09, "learning_rate": 1.225527508090615e-05, "loss": 0.1091, "step": 149570 }, { "epoch": 58.09, "learning_rate": 1.22547572815534e-05, "loss": 0.0165, "step": 149580 }, { "epoch": 58.09, "learning_rate": 1.2254239482200648e-05, "loss": 0.001, "step": 149590 }, { "epoch": 58.1, "learning_rate": 1.2253721682847896e-05, "loss": 0.0298, "step": 149600 }, { "epoch": 58.1, "learning_rate": 1.2253203883495145e-05, "loss": 0.0437, "step": 149610 }, { "epoch": 58.1, "learning_rate": 1.2252686084142395e-05, "loss": 0.0076, "step": 149620 }, { "epoch": 58.11, "learning_rate": 1.2252168284789645e-05, "loss": 0.0249, "step": 149630 }, { "epoch": 58.11, "learning_rate": 1.2251650485436894e-05, "loss": 0.1105, "step": 149640 }, { "epoch": 58.12, "learning_rate": 1.2251132686084144e-05, "loss": 0.0889, "step": 149650 }, { "epoch": 58.12, "learning_rate": 1.2250614886731394e-05, "loss": 0.02, "step": 149660 }, { "epoch": 58.12, "learning_rate": 1.2250097087378641e-05, "loss": 0.0506, "step": 149670 }, { "epoch": 58.13, "learning_rate": 1.224957928802589e-05, "loss": 0.0707, "step": 149680 }, { "epoch": 58.13, "learning_rate": 1.2249061488673139e-05, "loss": 0.0242, "step": 149690 }, { "epoch": 58.14, "learning_rate": 1.2248543689320389e-05, "loss": 0.0552, "step": 149700 }, { "epoch": 58.14, "learning_rate": 1.2248025889967638e-05, "loss": 0.019, "step": 149710 }, { "epoch": 58.14, "learning_rate": 1.2247508090614888e-05, "loss": 0.1608, "step": 149720 }, { "epoch": 58.15, "learning_rate": 1.2246990291262137e-05, "loss": 0.015, "step": 149730 }, { "epoch": 58.15, "learning_rate": 1.2246472491909387e-05, "loss": 0.0862, "step": 149740 }, { "epoch": 58.16, "learning_rate": 1.2245954692556637e-05, "loss": 0.0512, "step": 149750 }, { "epoch": 58.16, "learning_rate": 1.2245436893203883e-05, "loss": 0.0802, "step": 149760 }, { "epoch": 58.16, "learning_rate": 1.2244919093851133e-05, "loss": 0.1479, "step": 149770 }, { "epoch": 58.17, "learning_rate": 1.2244401294498382e-05, "loss": 0.0624, "step": 149780 }, { "epoch": 58.17, "learning_rate": 1.2243883495145632e-05, "loss": 0.0204, "step": 149790 }, { "epoch": 58.17, "learning_rate": 1.2243365695792881e-05, "loss": 0.0149, "step": 149800 }, { "epoch": 58.18, "learning_rate": 1.2242847896440131e-05, "loss": 0.0259, "step": 149810 }, { "epoch": 58.18, "learning_rate": 1.224233009708738e-05, "loss": 0.009, "step": 149820 }, { "epoch": 58.19, "learning_rate": 1.224181229773463e-05, "loss": 0.094, "step": 149830 }, { "epoch": 58.19, "learning_rate": 1.2241294498381877e-05, "loss": 0.1391, "step": 149840 }, { "epoch": 58.19, "learning_rate": 1.2240776699029126e-05, "loss": 0.0831, "step": 149850 }, { "epoch": 58.2, "learning_rate": 1.2240258899676376e-05, "loss": 0.0509, "step": 149860 }, { "epoch": 58.2, "learning_rate": 1.2239741100323625e-05, "loss": 0.1053, "step": 149870 }, { "epoch": 58.21, "learning_rate": 1.2239223300970875e-05, "loss": 0.0775, "step": 149880 }, { "epoch": 58.21, "learning_rate": 1.2238705501618125e-05, "loss": 0.2851, "step": 149890 }, { "epoch": 58.21, "learning_rate": 1.2238187702265374e-05, "loss": 0.0003, "step": 149900 }, { "epoch": 58.22, "learning_rate": 1.2237669902912624e-05, "loss": 0.251, "step": 149910 }, { "epoch": 58.22, "learning_rate": 1.223715210355987e-05, "loss": 0.1444, "step": 149920 }, { "epoch": 58.23, "learning_rate": 1.223663430420712e-05, "loss": 0.0164, "step": 149930 }, { "epoch": 58.23, "learning_rate": 1.223611650485437e-05, "loss": 0.0451, "step": 149940 }, { "epoch": 58.23, "learning_rate": 1.2235598705501619e-05, "loss": 0.0836, "step": 149950 }, { "epoch": 58.24, "learning_rate": 1.2235080906148869e-05, "loss": 0.0407, "step": 149960 }, { "epoch": 58.24, "learning_rate": 1.2234563106796118e-05, "loss": 0.0953, "step": 149970 }, { "epoch": 58.24, "learning_rate": 1.2234045307443368e-05, "loss": 0.1638, "step": 149980 }, { "epoch": 58.25, "learning_rate": 1.2233527508090617e-05, "loss": 0.0996, "step": 149990 }, { "epoch": 58.25, "learning_rate": 1.2233009708737864e-05, "loss": 0.0044, "step": 150000 }, { "epoch": 58.26, "learning_rate": 1.2232491909385113e-05, "loss": 0.0921, "step": 150010 }, { "epoch": 58.26, "learning_rate": 1.2231974110032363e-05, "loss": 0.0377, "step": 150020 }, { "epoch": 58.26, "learning_rate": 1.2231456310679612e-05, "loss": 0.0647, "step": 150030 }, { "epoch": 58.27, "learning_rate": 1.2230938511326862e-05, "loss": 0.0779, "step": 150040 }, { "epoch": 58.27, "learning_rate": 1.2230420711974112e-05, "loss": 0.1152, "step": 150050 }, { "epoch": 58.28, "learning_rate": 1.2229902912621361e-05, "loss": 0.0137, "step": 150060 }, { "epoch": 58.28, "learning_rate": 1.2229385113268611e-05, "loss": 0.0831, "step": 150070 }, { "epoch": 58.28, "learning_rate": 1.2228867313915857e-05, "loss": 0.1087, "step": 150080 }, { "epoch": 58.29, "learning_rate": 1.2228349514563107e-05, "loss": 0.0191, "step": 150090 }, { "epoch": 58.29, "learning_rate": 1.2227831715210356e-05, "loss": 0.0375, "step": 150100 }, { "epoch": 58.3, "learning_rate": 1.2227313915857606e-05, "loss": 0.1038, "step": 150110 }, { "epoch": 58.3, "learning_rate": 1.2226796116504856e-05, "loss": 0.0185, "step": 150120 }, { "epoch": 58.3, "learning_rate": 1.2226278317152105e-05, "loss": 0.1499, "step": 150130 }, { "epoch": 58.31, "learning_rate": 1.2225760517799355e-05, "loss": 0.0625, "step": 150140 }, { "epoch": 58.31, "learning_rate": 1.2225242718446604e-05, "loss": 0.0387, "step": 150150 }, { "epoch": 58.31, "learning_rate": 1.222472491909385e-05, "loss": 0.117, "step": 150160 }, { "epoch": 58.32, "learning_rate": 1.22242071197411e-05, "loss": 0.2011, "step": 150170 }, { "epoch": 58.32, "learning_rate": 1.222368932038835e-05, "loss": 0.0756, "step": 150180 }, { "epoch": 58.33, "learning_rate": 1.22231715210356e-05, "loss": 0.0006, "step": 150190 }, { "epoch": 58.33, "learning_rate": 1.222265372168285e-05, "loss": 0.1178, "step": 150200 }, { "epoch": 58.33, "learning_rate": 1.2222135922330099e-05, "loss": 0.1524, "step": 150210 }, { "epoch": 58.34, "learning_rate": 1.2221618122977348e-05, "loss": 0.0763, "step": 150220 }, { "epoch": 58.34, "learning_rate": 1.2221100323624598e-05, "loss": 0.0164, "step": 150230 }, { "epoch": 58.35, "learning_rate": 1.2220582524271846e-05, "loss": 0.1098, "step": 150240 }, { "epoch": 58.35, "learning_rate": 1.2220064724919094e-05, "loss": 0.0699, "step": 150250 }, { "epoch": 58.35, "learning_rate": 1.2219546925566344e-05, "loss": 0.1229, "step": 150260 }, { "epoch": 58.36, "learning_rate": 1.2219029126213593e-05, "loss": 0.1069, "step": 150270 }, { "epoch": 58.36, "learning_rate": 1.2218511326860843e-05, "loss": 0.0643, "step": 150280 }, { "epoch": 58.37, "learning_rate": 1.2217993527508092e-05, "loss": 0.0125, "step": 150290 }, { "epoch": 58.37, "learning_rate": 1.2217475728155342e-05, "loss": 0.031, "step": 150300 }, { "epoch": 58.37, "learning_rate": 1.2216957928802592e-05, "loss": 0.1094, "step": 150310 }, { "epoch": 58.38, "learning_rate": 1.221644012944984e-05, "loss": 0.0484, "step": 150320 }, { "epoch": 58.38, "learning_rate": 1.2215922330097087e-05, "loss": 0.0433, "step": 150330 }, { "epoch": 58.38, "learning_rate": 1.2215404530744337e-05, "loss": 0.0893, "step": 150340 }, { "epoch": 58.39, "learning_rate": 1.2214886731391587e-05, "loss": 0.0823, "step": 150350 }, { "epoch": 58.39, "learning_rate": 1.2214368932038836e-05, "loss": 0.0833, "step": 150360 }, { "epoch": 58.4, "learning_rate": 1.2213851132686086e-05, "loss": 0.2186, "step": 150370 }, { "epoch": 58.4, "learning_rate": 1.2213333333333336e-05, "loss": 0.1108, "step": 150380 }, { "epoch": 58.4, "learning_rate": 1.2212815533980583e-05, "loss": 0.0427, "step": 150390 }, { "epoch": 58.41, "learning_rate": 1.2212297734627833e-05, "loss": 0.0596, "step": 150400 }, { "epoch": 58.41, "learning_rate": 1.2211779935275081e-05, "loss": 0.0315, "step": 150410 }, { "epoch": 58.42, "learning_rate": 1.221126213592233e-05, "loss": 0.1382, "step": 150420 }, { "epoch": 58.42, "learning_rate": 1.221074433656958e-05, "loss": 0.0009, "step": 150430 }, { "epoch": 58.42, "learning_rate": 1.221022653721683e-05, "loss": 0.0443, "step": 150440 }, { "epoch": 58.43, "learning_rate": 1.220970873786408e-05, "loss": 0.1066, "step": 150450 }, { "epoch": 58.43, "learning_rate": 1.2209190938511329e-05, "loss": 0.1195, "step": 150460 }, { "epoch": 58.43, "learning_rate": 1.2208673139158577e-05, "loss": 0.069, "step": 150470 }, { "epoch": 58.44, "learning_rate": 1.2208155339805827e-05, "loss": 0.0019, "step": 150480 }, { "epoch": 58.44, "learning_rate": 1.2207637540453075e-05, "loss": 0.0159, "step": 150490 }, { "epoch": 58.45, "learning_rate": 1.2207119741100324e-05, "loss": 0.0405, "step": 150500 }, { "epoch": 58.45, "learning_rate": 1.2206601941747574e-05, "loss": 0.0597, "step": 150510 }, { "epoch": 58.45, "learning_rate": 1.2206084142394823e-05, "loss": 0.2137, "step": 150520 }, { "epoch": 58.46, "learning_rate": 1.2205566343042073e-05, "loss": 0.0517, "step": 150530 }, { "epoch": 58.46, "learning_rate": 1.2205048543689321e-05, "loss": 0.0664, "step": 150540 }, { "epoch": 58.47, "learning_rate": 1.220453074433657e-05, "loss": 0.0418, "step": 150550 }, { "epoch": 58.47, "learning_rate": 1.220401294498382e-05, "loss": 0.1804, "step": 150560 }, { "epoch": 58.47, "learning_rate": 1.2203495145631068e-05, "loss": 0.0673, "step": 150570 }, { "epoch": 58.48, "learning_rate": 1.2202977346278318e-05, "loss": 0.0698, "step": 150580 }, { "epoch": 58.48, "learning_rate": 1.2202459546925567e-05, "loss": 0.0622, "step": 150590 }, { "epoch": 58.49, "learning_rate": 1.2201941747572817e-05, "loss": 0.0381, "step": 150600 }, { "epoch": 58.49, "learning_rate": 1.2201423948220067e-05, "loss": 0.0189, "step": 150610 }, { "epoch": 58.49, "learning_rate": 1.2200906148867315e-05, "loss": 0.0909, "step": 150620 }, { "epoch": 58.5, "learning_rate": 1.2200388349514564e-05, "loss": 0.0834, "step": 150630 }, { "epoch": 58.5, "learning_rate": 1.2199870550161814e-05, "loss": 0.0279, "step": 150640 }, { "epoch": 58.5, "learning_rate": 1.2199352750809062e-05, "loss": 0.0199, "step": 150650 }, { "epoch": 58.51, "learning_rate": 1.2198834951456311e-05, "loss": 0.0279, "step": 150660 }, { "epoch": 58.51, "learning_rate": 1.2198317152103561e-05, "loss": 0.289, "step": 150670 }, { "epoch": 58.52, "learning_rate": 1.219779935275081e-05, "loss": 0.0265, "step": 150680 }, { "epoch": 58.52, "learning_rate": 1.2197281553398058e-05, "loss": 0.0402, "step": 150690 }, { "epoch": 58.52, "learning_rate": 1.2196763754045308e-05, "loss": 0.1009, "step": 150700 }, { "epoch": 58.53, "learning_rate": 1.2196245954692558e-05, "loss": 0.0015, "step": 150710 }, { "epoch": 58.53, "learning_rate": 1.2195728155339807e-05, "loss": 0.1013, "step": 150720 }, { "epoch": 58.54, "learning_rate": 1.2195210355987055e-05, "loss": 0.0879, "step": 150730 }, { "epoch": 58.54, "learning_rate": 1.2194692556634305e-05, "loss": 0.0666, "step": 150740 }, { "epoch": 58.54, "learning_rate": 1.2194174757281554e-05, "loss": 0.0329, "step": 150750 }, { "epoch": 58.55, "learning_rate": 1.2193656957928804e-05, "loss": 0.097, "step": 150760 }, { "epoch": 58.55, "learning_rate": 1.2193139158576052e-05, "loss": 0.0225, "step": 150770 }, { "epoch": 58.56, "learning_rate": 1.2192621359223302e-05, "loss": 0.0553, "step": 150780 }, { "epoch": 58.56, "learning_rate": 1.2192103559870551e-05, "loss": 0.1021, "step": 150790 }, { "epoch": 58.56, "learning_rate": 1.2191585760517801e-05, "loss": 0.0382, "step": 150800 }, { "epoch": 58.57, "learning_rate": 1.219106796116505e-05, "loss": 0.1127, "step": 150810 }, { "epoch": 58.57, "learning_rate": 1.2190550161812298e-05, "loss": 0.0589, "step": 150820 }, { "epoch": 58.57, "learning_rate": 1.2190032362459548e-05, "loss": 0.0648, "step": 150830 }, { "epoch": 58.58, "learning_rate": 1.2189514563106798e-05, "loss": 0.0057, "step": 150840 }, { "epoch": 58.58, "learning_rate": 1.2188996763754046e-05, "loss": 0.0047, "step": 150850 }, { "epoch": 58.59, "learning_rate": 1.2188478964401295e-05, "loss": 0.1292, "step": 150860 }, { "epoch": 58.59, "learning_rate": 1.2187961165048545e-05, "loss": 0.0095, "step": 150870 }, { "epoch": 58.59, "learning_rate": 1.2187443365695794e-05, "loss": 0.2074, "step": 150880 }, { "epoch": 58.6, "learning_rate": 1.2186925566343044e-05, "loss": 0.0556, "step": 150890 }, { "epoch": 58.6, "learning_rate": 1.2186407766990292e-05, "loss": 0.0695, "step": 150900 }, { "epoch": 58.61, "learning_rate": 1.2185889967637542e-05, "loss": 0.2049, "step": 150910 }, { "epoch": 58.61, "learning_rate": 1.218537216828479e-05, "loss": 0.1577, "step": 150920 }, { "epoch": 58.61, "learning_rate": 1.2184854368932039e-05, "loss": 0.0471, "step": 150930 }, { "epoch": 58.62, "learning_rate": 1.2184336569579289e-05, "loss": 0.1259, "step": 150940 }, { "epoch": 58.62, "learning_rate": 1.2183818770226538e-05, "loss": 0.0359, "step": 150950 }, { "epoch": 58.63, "learning_rate": 1.2183300970873788e-05, "loss": 0.1161, "step": 150960 }, { "epoch": 58.63, "learning_rate": 1.2182783171521038e-05, "loss": 0.1067, "step": 150970 }, { "epoch": 58.63, "learning_rate": 1.2182265372168286e-05, "loss": 0.0598, "step": 150980 }, { "epoch": 58.64, "learning_rate": 1.2181747572815535e-05, "loss": 0.0471, "step": 150990 }, { "epoch": 58.64, "learning_rate": 1.2181229773462783e-05, "loss": 0.0836, "step": 151000 }, { "epoch": 58.64, "learning_rate": 1.2180711974110033e-05, "loss": 0.0427, "step": 151010 }, { "epoch": 58.65, "learning_rate": 1.2180194174757282e-05, "loss": 0.1646, "step": 151020 }, { "epoch": 58.65, "learning_rate": 1.2179676375404532e-05, "loss": 0.0452, "step": 151030 }, { "epoch": 58.66, "learning_rate": 1.2179158576051782e-05, "loss": 0.0509, "step": 151040 }, { "epoch": 58.66, "learning_rate": 1.2178640776699031e-05, "loss": 0.0709, "step": 151050 }, { "epoch": 58.66, "learning_rate": 1.2178122977346279e-05, "loss": 0.1116, "step": 151060 }, { "epoch": 58.67, "learning_rate": 1.2177605177993527e-05, "loss": 0.0455, "step": 151070 }, { "epoch": 58.67, "learning_rate": 1.2177087378640777e-05, "loss": 0.038, "step": 151080 }, { "epoch": 58.68, "learning_rate": 1.2176569579288026e-05, "loss": 0.0334, "step": 151090 }, { "epoch": 58.68, "learning_rate": 1.2176051779935276e-05, "loss": 0.1564, "step": 151100 }, { "epoch": 58.68, "learning_rate": 1.2175533980582525e-05, "loss": 0.0217, "step": 151110 }, { "epoch": 58.69, "learning_rate": 1.2175016181229775e-05, "loss": 0.1302, "step": 151120 }, { "epoch": 58.69, "learning_rate": 1.2174498381877025e-05, "loss": 0.0406, "step": 151130 }, { "epoch": 58.7, "learning_rate": 1.2173980582524273e-05, "loss": 0.0766, "step": 151140 }, { "epoch": 58.7, "learning_rate": 1.217346278317152e-05, "loss": 0.0921, "step": 151150 }, { "epoch": 58.7, "learning_rate": 1.217294498381877e-05, "loss": 0.0572, "step": 151160 }, { "epoch": 58.71, "learning_rate": 1.217242718446602e-05, "loss": 0.0255, "step": 151170 }, { "epoch": 58.71, "learning_rate": 1.217190938511327e-05, "loss": 0.1565, "step": 151180 }, { "epoch": 58.71, "learning_rate": 1.2171391585760519e-05, "loss": 0.0427, "step": 151190 }, { "epoch": 58.72, "learning_rate": 1.2170873786407769e-05, "loss": 0.0621, "step": 151200 }, { "epoch": 58.72, "learning_rate": 1.2170355987055018e-05, "loss": 0.1852, "step": 151210 }, { "epoch": 58.73, "learning_rate": 1.2169838187702264e-05, "loss": 0.0323, "step": 151220 }, { "epoch": 58.73, "learning_rate": 1.2169320388349514e-05, "loss": 0.1399, "step": 151230 }, { "epoch": 58.73, "learning_rate": 1.2168802588996764e-05, "loss": 0.1172, "step": 151240 }, { "epoch": 58.74, "learning_rate": 1.2168284789644013e-05, "loss": 0.0956, "step": 151250 }, { "epoch": 58.74, "learning_rate": 1.2167766990291263e-05, "loss": 0.0557, "step": 151260 }, { "epoch": 58.75, "learning_rate": 1.2167249190938513e-05, "loss": 0.0569, "step": 151270 }, { "epoch": 58.75, "learning_rate": 1.2166731391585762e-05, "loss": 0.0167, "step": 151280 }, { "epoch": 58.75, "learning_rate": 1.2166213592233012e-05, "loss": 0.0582, "step": 151290 }, { "epoch": 58.76, "learning_rate": 1.2165695792880258e-05, "loss": 0.0549, "step": 151300 }, { "epoch": 58.76, "learning_rate": 1.2165177993527508e-05, "loss": 0.0249, "step": 151310 }, { "epoch": 58.77, "learning_rate": 1.2164660194174757e-05, "loss": 0.0724, "step": 151320 }, { "epoch": 58.77, "learning_rate": 1.2164142394822007e-05, "loss": 0.1405, "step": 151330 }, { "epoch": 58.77, "learning_rate": 1.2163624595469257e-05, "loss": 0.0411, "step": 151340 }, { "epoch": 58.78, "learning_rate": 1.2163106796116506e-05, "loss": 0.0612, "step": 151350 }, { "epoch": 58.78, "learning_rate": 1.2162588996763756e-05, "loss": 0.005, "step": 151360 }, { "epoch": 58.78, "learning_rate": 1.2162071197411005e-05, "loss": 0.0424, "step": 151370 }, { "epoch": 58.79, "learning_rate": 1.2161553398058255e-05, "loss": 0.1032, "step": 151380 }, { "epoch": 58.79, "learning_rate": 1.2161035598705501e-05, "loss": 0.1223, "step": 151390 }, { "epoch": 58.8, "learning_rate": 1.2160517799352751e-05, "loss": 0.0095, "step": 151400 }, { "epoch": 58.8, "learning_rate": 1.216e-05, "loss": 0.0519, "step": 151410 }, { "epoch": 58.8, "learning_rate": 1.215948220064725e-05, "loss": 0.0576, "step": 151420 }, { "epoch": 58.81, "learning_rate": 1.21589644012945e-05, "loss": 0.0095, "step": 151430 }, { "epoch": 58.81, "learning_rate": 1.215844660194175e-05, "loss": 0.1768, "step": 151440 }, { "epoch": 58.82, "learning_rate": 1.2157928802588999e-05, "loss": 0.0222, "step": 151450 }, { "epoch": 58.82, "learning_rate": 1.2157411003236249e-05, "loss": 0.0005, "step": 151460 }, { "epoch": 58.82, "learning_rate": 1.2156893203883495e-05, "loss": 0.0159, "step": 151470 }, { "epoch": 58.83, "learning_rate": 1.2156375404530744e-05, "loss": 0.0107, "step": 151480 }, { "epoch": 58.83, "learning_rate": 1.2155857605177994e-05, "loss": 0.0186, "step": 151490 }, { "epoch": 58.83, "learning_rate": 1.2155339805825244e-05, "loss": 0.1654, "step": 151500 }, { "epoch": 58.84, "learning_rate": 1.2154822006472493e-05, "loss": 0.0557, "step": 151510 }, { "epoch": 58.84, "learning_rate": 1.2154304207119743e-05, "loss": 0.1145, "step": 151520 }, { "epoch": 58.85, "learning_rate": 1.2153786407766992e-05, "loss": 0.1287, "step": 151530 }, { "epoch": 58.85, "learning_rate": 1.2153268608414242e-05, "loss": 0.0684, "step": 151540 }, { "epoch": 58.85, "learning_rate": 1.2152750809061488e-05, "loss": 0.0016, "step": 151550 }, { "epoch": 58.86, "learning_rate": 1.2152233009708738e-05, "loss": 0.0752, "step": 151560 }, { "epoch": 58.86, "learning_rate": 1.2151715210355988e-05, "loss": 0.0323, "step": 151570 }, { "epoch": 58.87, "learning_rate": 1.2151197411003237e-05, "loss": 0.0292, "step": 151580 }, { "epoch": 58.87, "learning_rate": 1.2150679611650487e-05, "loss": 0.1222, "step": 151590 }, { "epoch": 58.87, "learning_rate": 1.2150161812297736e-05, "loss": 0.1622, "step": 151600 }, { "epoch": 58.88, "learning_rate": 1.2149644012944986e-05, "loss": 0.1783, "step": 151610 }, { "epoch": 58.88, "learning_rate": 1.2149126213592236e-05, "loss": 0.0229, "step": 151620 }, { "epoch": 58.89, "learning_rate": 1.2148608414239482e-05, "loss": 0.1725, "step": 151630 }, { "epoch": 58.89, "learning_rate": 1.2148090614886732e-05, "loss": 0.0427, "step": 151640 }, { "epoch": 58.89, "learning_rate": 1.2147572815533981e-05, "loss": 0.1389, "step": 151650 }, { "epoch": 58.9, "learning_rate": 1.214705501618123e-05, "loss": 0.2184, "step": 151660 }, { "epoch": 58.9, "learning_rate": 1.214653721682848e-05, "loss": 0.1255, "step": 151670 }, { "epoch": 58.9, "learning_rate": 1.214601941747573e-05, "loss": 0.0273, "step": 151680 }, { "epoch": 58.91, "learning_rate": 1.214550161812298e-05, "loss": 0.0318, "step": 151690 }, { "epoch": 58.91, "learning_rate": 1.214498381877023e-05, "loss": 0.0353, "step": 151700 }, { "epoch": 58.92, "learning_rate": 1.2144466019417475e-05, "loss": 0.2038, "step": 151710 }, { "epoch": 58.92, "learning_rate": 1.2143948220064725e-05, "loss": 0.1579, "step": 151720 }, { "epoch": 58.92, "learning_rate": 1.2143430420711975e-05, "loss": 0.1848, "step": 151730 }, { "epoch": 58.93, "learning_rate": 1.2142912621359224e-05, "loss": 0.0869, "step": 151740 }, { "epoch": 58.93, "learning_rate": 1.2142394822006474e-05, "loss": 0.0465, "step": 151750 }, { "epoch": 58.94, "learning_rate": 1.2141877022653724e-05, "loss": 0.0352, "step": 151760 }, { "epoch": 58.94, "learning_rate": 1.2141359223300973e-05, "loss": 0.0352, "step": 151770 }, { "epoch": 58.94, "learning_rate": 1.2140841423948223e-05, "loss": 0.0038, "step": 151780 }, { "epoch": 58.95, "learning_rate": 1.2140323624595469e-05, "loss": 0.1958, "step": 151790 }, { "epoch": 58.95, "learning_rate": 1.2139805825242719e-05, "loss": 0.0096, "step": 151800 }, { "epoch": 58.96, "learning_rate": 1.2139288025889968e-05, "loss": 0.0044, "step": 151810 }, { "epoch": 58.96, "learning_rate": 1.2138770226537218e-05, "loss": 0.1161, "step": 151820 }, { "epoch": 58.96, "learning_rate": 1.2138252427184467e-05, "loss": 0.0186, "step": 151830 }, { "epoch": 58.97, "learning_rate": 1.2137734627831717e-05, "loss": 0.0021, "step": 151840 }, { "epoch": 58.97, "learning_rate": 1.2137216828478967e-05, "loss": 0.0089, "step": 151850 }, { "epoch": 58.97, "learning_rate": 1.2136699029126215e-05, "loss": 0.1239, "step": 151860 }, { "epoch": 58.98, "learning_rate": 1.2136181229773463e-05, "loss": 0.0552, "step": 151870 }, { "epoch": 58.98, "learning_rate": 1.2135663430420712e-05, "loss": 0.0093, "step": 151880 }, { "epoch": 58.99, "learning_rate": 1.2135145631067962e-05, "loss": 0.0455, "step": 151890 }, { "epoch": 58.99, "learning_rate": 1.2134627831715211e-05, "loss": 0.0009, "step": 151900 }, { "epoch": 58.99, "learning_rate": 1.2134110032362461e-05, "loss": 0.0319, "step": 151910 }, { "epoch": 59.0, "learning_rate": 1.213359223300971e-05, "loss": 0.0184, "step": 151920 }, { "epoch": 59.0, "eval_accuracy": 0.953232462173315, "eval_loss": 0.31959620118141174, "eval_runtime": 8.2236, "eval_samples_per_second": 442.019, "eval_steps_per_second": 55.328, "step": 151925 }, { "epoch": 59.0, "learning_rate": 1.213307443365696e-05, "loss": 0.0603, "step": 151930 }, { "epoch": 59.01, "learning_rate": 1.2132556634304208e-05, "loss": 0.0134, "step": 151940 }, { "epoch": 59.01, "learning_rate": 1.2132038834951458e-05, "loss": 0.0138, "step": 151950 }, { "epoch": 59.01, "learning_rate": 1.2131521035598706e-05, "loss": 0.0068, "step": 151960 }, { "epoch": 59.02, "learning_rate": 1.2131003236245955e-05, "loss": 0.0337, "step": 151970 }, { "epoch": 59.02, "learning_rate": 1.2130485436893205e-05, "loss": 0.031, "step": 151980 }, { "epoch": 59.03, "learning_rate": 1.2129967637540455e-05, "loss": 0.2306, "step": 151990 }, { "epoch": 59.03, "learning_rate": 1.2129449838187704e-05, "loss": 0.078, "step": 152000 }, { "epoch": 59.03, "learning_rate": 1.2128932038834952e-05, "loss": 0.0008, "step": 152010 }, { "epoch": 59.04, "learning_rate": 1.2128414239482202e-05, "loss": 0.2069, "step": 152020 }, { "epoch": 59.04, "learning_rate": 1.2127896440129451e-05, "loss": 0.0187, "step": 152030 }, { "epoch": 59.04, "learning_rate": 1.21273786407767e-05, "loss": 0.0983, "step": 152040 }, { "epoch": 59.05, "learning_rate": 1.2126860841423949e-05, "loss": 0.0943, "step": 152050 }, { "epoch": 59.05, "learning_rate": 1.2126343042071199e-05, "loss": 0.1395, "step": 152060 }, { "epoch": 59.06, "learning_rate": 1.2125825242718448e-05, "loss": 0.1371, "step": 152070 }, { "epoch": 59.06, "learning_rate": 1.2125307443365698e-05, "loss": 0.0354, "step": 152080 }, { "epoch": 59.06, "learning_rate": 1.2124789644012946e-05, "loss": 0.0757, "step": 152090 }, { "epoch": 59.07, "learning_rate": 1.2124271844660195e-05, "loss": 0.18, "step": 152100 }, { "epoch": 59.07, "learning_rate": 1.2123754045307445e-05, "loss": 0.0141, "step": 152110 }, { "epoch": 59.08, "learning_rate": 1.2123236245954693e-05, "loss": 0.0078, "step": 152120 }, { "epoch": 59.08, "learning_rate": 1.2122718446601942e-05, "loss": 0.2501, "step": 152130 }, { "epoch": 59.08, "learning_rate": 1.2122200647249192e-05, "loss": 0.0062, "step": 152140 }, { "epoch": 59.09, "learning_rate": 1.2121682847896442e-05, "loss": 0.0942, "step": 152150 }, { "epoch": 59.09, "learning_rate": 1.212116504854369e-05, "loss": 0.2062, "step": 152160 }, { "epoch": 59.1, "learning_rate": 1.212064724919094e-05, "loss": 0.0428, "step": 152170 }, { "epoch": 59.1, "learning_rate": 1.2120129449838189e-05, "loss": 0.0624, "step": 152180 }, { "epoch": 59.1, "learning_rate": 1.2119611650485438e-05, "loss": 0.1801, "step": 152190 }, { "epoch": 59.11, "learning_rate": 1.2119093851132686e-05, "loss": 0.074, "step": 152200 }, { "epoch": 59.11, "learning_rate": 1.2118576051779936e-05, "loss": 0.0187, "step": 152210 }, { "epoch": 59.11, "learning_rate": 1.2118058252427186e-05, "loss": 0.1719, "step": 152220 }, { "epoch": 59.12, "learning_rate": 1.2117540453074435e-05, "loss": 0.0298, "step": 152230 }, { "epoch": 59.12, "learning_rate": 1.2117022653721683e-05, "loss": 0.0461, "step": 152240 }, { "epoch": 59.13, "learning_rate": 1.2116504854368933e-05, "loss": 0.0421, "step": 152250 }, { "epoch": 59.13, "learning_rate": 1.2115987055016182e-05, "loss": 0.1055, "step": 152260 }, { "epoch": 59.13, "learning_rate": 1.2115469255663432e-05, "loss": 0.0572, "step": 152270 }, { "epoch": 59.14, "learning_rate": 1.211495145631068e-05, "loss": 0.008, "step": 152280 }, { "epoch": 59.14, "learning_rate": 1.211443365695793e-05, "loss": 0.0329, "step": 152290 }, { "epoch": 59.15, "learning_rate": 1.211391585760518e-05, "loss": 0.0147, "step": 152300 }, { "epoch": 59.15, "learning_rate": 1.2113398058252429e-05, "loss": 0.1041, "step": 152310 }, { "epoch": 59.15, "learning_rate": 1.2112880258899677e-05, "loss": 0.0217, "step": 152320 }, { "epoch": 59.16, "learning_rate": 1.2112362459546926e-05, "loss": 0.1349, "step": 152330 }, { "epoch": 59.16, "learning_rate": 1.2111844660194176e-05, "loss": 0.0035, "step": 152340 }, { "epoch": 59.17, "learning_rate": 1.2111326860841426e-05, "loss": 0.0319, "step": 152350 }, { "epoch": 59.17, "learning_rate": 1.2110809061488674e-05, "loss": 0.0734, "step": 152360 }, { "epoch": 59.17, "learning_rate": 1.2110291262135923e-05, "loss": 0.0145, "step": 152370 }, { "epoch": 59.18, "learning_rate": 1.2109773462783173e-05, "loss": 0.0219, "step": 152380 }, { "epoch": 59.18, "learning_rate": 1.210925566343042e-05, "loss": 0.0104, "step": 152390 }, { "epoch": 59.18, "learning_rate": 1.210873786407767e-05, "loss": 0.023, "step": 152400 }, { "epoch": 59.19, "learning_rate": 1.210822006472492e-05, "loss": 0.0227, "step": 152410 }, { "epoch": 59.19, "learning_rate": 1.210770226537217e-05, "loss": 0.0475, "step": 152420 }, { "epoch": 59.2, "learning_rate": 1.210718446601942e-05, "loss": 0.0388, "step": 152430 }, { "epoch": 59.2, "learning_rate": 1.2106666666666667e-05, "loss": 0.1069, "step": 152440 }, { "epoch": 59.2, "learning_rate": 1.2106148867313917e-05, "loss": 0.1984, "step": 152450 }, { "epoch": 59.21, "learning_rate": 1.2105631067961166e-05, "loss": 0.0755, "step": 152460 }, { "epoch": 59.21, "learning_rate": 1.2105113268608414e-05, "loss": 0.1003, "step": 152470 }, { "epoch": 59.22, "learning_rate": 1.2104595469255664e-05, "loss": 0.0474, "step": 152480 }, { "epoch": 59.22, "learning_rate": 1.2104077669902913e-05, "loss": 0.1109, "step": 152490 }, { "epoch": 59.22, "learning_rate": 1.2103559870550163e-05, "loss": 0.1048, "step": 152500 }, { "epoch": 59.23, "learning_rate": 1.2103042071197413e-05, "loss": 0.0873, "step": 152510 }, { "epoch": 59.23, "learning_rate": 1.2102524271844662e-05, "loss": 0.1442, "step": 152520 }, { "epoch": 59.23, "learning_rate": 1.210200647249191e-05, "loss": 0.0271, "step": 152530 }, { "epoch": 59.24, "learning_rate": 1.2101488673139158e-05, "loss": 0.0509, "step": 152540 }, { "epoch": 59.24, "learning_rate": 1.2100970873786408e-05, "loss": 0.1766, "step": 152550 }, { "epoch": 59.25, "learning_rate": 1.2100453074433657e-05, "loss": 0.0083, "step": 152560 }, { "epoch": 59.25, "learning_rate": 1.2099935275080907e-05, "loss": 0.0193, "step": 152570 }, { "epoch": 59.25, "learning_rate": 1.2099417475728157e-05, "loss": 0.0652, "step": 152580 }, { "epoch": 59.26, "learning_rate": 1.2098899676375406e-05, "loss": 0.0277, "step": 152590 }, { "epoch": 59.26, "learning_rate": 1.2098381877022656e-05, "loss": 0.1498, "step": 152600 }, { "epoch": 59.27, "learning_rate": 1.2097864077669904e-05, "loss": 0.0566, "step": 152610 }, { "epoch": 59.27, "learning_rate": 1.2097346278317152e-05, "loss": 0.0985, "step": 152620 }, { "epoch": 59.27, "learning_rate": 1.2096828478964401e-05, "loss": 0.081, "step": 152630 }, { "epoch": 59.28, "learning_rate": 1.2096310679611651e-05, "loss": 0.0307, "step": 152640 }, { "epoch": 59.28, "learning_rate": 1.20957928802589e-05, "loss": 0.0067, "step": 152650 }, { "epoch": 59.29, "learning_rate": 1.209527508090615e-05, "loss": 0.3483, "step": 152660 }, { "epoch": 59.29, "learning_rate": 1.20947572815534e-05, "loss": 0.1129, "step": 152670 }, { "epoch": 59.29, "learning_rate": 1.209423948220065e-05, "loss": 0.1295, "step": 152680 }, { "epoch": 59.3, "learning_rate": 1.2093721682847896e-05, "loss": 0.0596, "step": 152690 }, { "epoch": 59.3, "learning_rate": 1.2093203883495145e-05, "loss": 0.0333, "step": 152700 }, { "epoch": 59.3, "learning_rate": 1.2092686084142395e-05, "loss": 0.0018, "step": 152710 }, { "epoch": 59.31, "learning_rate": 1.2092168284789645e-05, "loss": 0.1021, "step": 152720 }, { "epoch": 59.31, "learning_rate": 1.2091650485436894e-05, "loss": 0.0783, "step": 152730 }, { "epoch": 59.32, "learning_rate": 1.2091132686084144e-05, "loss": 0.005, "step": 152740 }, { "epoch": 59.32, "learning_rate": 1.2090614886731393e-05, "loss": 0.0311, "step": 152750 }, { "epoch": 59.32, "learning_rate": 1.2090097087378643e-05, "loss": 0.0906, "step": 152760 }, { "epoch": 59.33, "learning_rate": 1.208957928802589e-05, "loss": 0.0768, "step": 152770 }, { "epoch": 59.33, "learning_rate": 1.2089061488673139e-05, "loss": 0.0922, "step": 152780 }, { "epoch": 59.34, "learning_rate": 1.2088543689320388e-05, "loss": 0.0687, "step": 152790 }, { "epoch": 59.34, "learning_rate": 1.2088025889967638e-05, "loss": 0.0565, "step": 152800 }, { "epoch": 59.34, "learning_rate": 1.2087508090614888e-05, "loss": 0.0081, "step": 152810 }, { "epoch": 59.35, "learning_rate": 1.2086990291262137e-05, "loss": 0.1294, "step": 152820 }, { "epoch": 59.35, "learning_rate": 1.2086472491909387e-05, "loss": 0.0948, "step": 152830 }, { "epoch": 59.36, "learning_rate": 1.2085954692556637e-05, "loss": 0.0596, "step": 152840 }, { "epoch": 59.36, "learning_rate": 1.2085436893203883e-05, "loss": 0.02, "step": 152850 }, { "epoch": 59.36, "learning_rate": 1.2084919093851132e-05, "loss": 0.0218, "step": 152860 }, { "epoch": 59.37, "learning_rate": 1.2084401294498382e-05, "loss": 0.1019, "step": 152870 }, { "epoch": 59.37, "learning_rate": 1.2083883495145632e-05, "loss": 0.0145, "step": 152880 }, { "epoch": 59.37, "learning_rate": 1.2083365695792881e-05, "loss": 0.0338, "step": 152890 }, { "epoch": 59.38, "learning_rate": 1.2082847896440131e-05, "loss": 0.182, "step": 152900 }, { "epoch": 59.38, "learning_rate": 1.208233009708738e-05, "loss": 0.1098, "step": 152910 }, { "epoch": 59.39, "learning_rate": 1.208181229773463e-05, "loss": 0.0614, "step": 152920 }, { "epoch": 59.39, "learning_rate": 1.2081294498381876e-05, "loss": 0.095, "step": 152930 }, { "epoch": 59.39, "learning_rate": 1.2080776699029126e-05, "loss": 0.0516, "step": 152940 }, { "epoch": 59.4, "learning_rate": 1.2080258899676376e-05, "loss": 0.0418, "step": 152950 }, { "epoch": 59.4, "learning_rate": 1.2079741100323625e-05, "loss": 0.1119, "step": 152960 }, { "epoch": 59.41, "learning_rate": 1.2079223300970875e-05, "loss": 0.081, "step": 152970 }, { "epoch": 59.41, "learning_rate": 1.2078705501618124e-05, "loss": 0.0806, "step": 152980 }, { "epoch": 59.41, "learning_rate": 1.2078187702265374e-05, "loss": 0.0879, "step": 152990 }, { "epoch": 59.42, "learning_rate": 1.2077669902912624e-05, "loss": 0.1427, "step": 153000 }, { "epoch": 59.42, "learning_rate": 1.207715210355987e-05, "loss": 0.1138, "step": 153010 }, { "epoch": 59.43, "learning_rate": 1.207663430420712e-05, "loss": 0.0969, "step": 153020 }, { "epoch": 59.43, "learning_rate": 1.2076116504854369e-05, "loss": 0.1263, "step": 153030 }, { "epoch": 59.43, "learning_rate": 1.2075598705501619e-05, "loss": 0.0225, "step": 153040 }, { "epoch": 59.44, "learning_rate": 1.2075080906148868e-05, "loss": 0.0341, "step": 153050 }, { "epoch": 59.44, "learning_rate": 1.2074563106796118e-05, "loss": 0.1899, "step": 153060 }, { "epoch": 59.44, "learning_rate": 1.2074045307443368e-05, "loss": 0.0341, "step": 153070 }, { "epoch": 59.45, "learning_rate": 1.2073527508090617e-05, "loss": 0.1021, "step": 153080 }, { "epoch": 59.45, "learning_rate": 1.2073009708737867e-05, "loss": 0.1404, "step": 153090 }, { "epoch": 59.46, "learning_rate": 1.2072491909385113e-05, "loss": 0.0061, "step": 153100 }, { "epoch": 59.46, "learning_rate": 1.2071974110032363e-05, "loss": 0.0319, "step": 153110 }, { "epoch": 59.46, "learning_rate": 1.2071456310679612e-05, "loss": 0.0422, "step": 153120 }, { "epoch": 59.47, "learning_rate": 1.2070938511326862e-05, "loss": 0.0264, "step": 153130 }, { "epoch": 59.47, "learning_rate": 1.2070420711974112e-05, "loss": 0.0556, "step": 153140 }, { "epoch": 59.48, "learning_rate": 1.2069902912621361e-05, "loss": 0.1722, "step": 153150 }, { "epoch": 59.48, "learning_rate": 1.206938511326861e-05, "loss": 0.0184, "step": 153160 }, { "epoch": 59.48, "learning_rate": 1.206886731391586e-05, "loss": 0.0783, "step": 153170 }, { "epoch": 59.49, "learning_rate": 1.2068349514563107e-05, "loss": 0.1757, "step": 153180 }, { "epoch": 59.49, "learning_rate": 1.2067831715210356e-05, "loss": 0.2306, "step": 153190 }, { "epoch": 59.5, "learning_rate": 1.2067313915857606e-05, "loss": 0.0476, "step": 153200 }, { "epoch": 59.5, "learning_rate": 1.2066796116504855e-05, "loss": 0.0105, "step": 153210 }, { "epoch": 59.5, "learning_rate": 1.2066278317152105e-05, "loss": 0.1132, "step": 153220 }, { "epoch": 59.51, "learning_rate": 1.2065760517799355e-05, "loss": 0.0639, "step": 153230 }, { "epoch": 59.51, "learning_rate": 1.2065242718446604e-05, "loss": 0.1108, "step": 153240 }, { "epoch": 59.51, "learning_rate": 1.2064724919093854e-05, "loss": 0.0825, "step": 153250 }, { "epoch": 59.52, "learning_rate": 1.20642071197411e-05, "loss": 0.0598, "step": 153260 }, { "epoch": 59.52, "learning_rate": 1.206368932038835e-05, "loss": 0.042, "step": 153270 }, { "epoch": 59.53, "learning_rate": 1.20631715210356e-05, "loss": 0.1506, "step": 153280 }, { "epoch": 59.53, "learning_rate": 1.2062653721682849e-05, "loss": 0.1066, "step": 153290 }, { "epoch": 59.53, "learning_rate": 1.2062135922330099e-05, "loss": 0.087, "step": 153300 }, { "epoch": 59.54, "learning_rate": 1.2061618122977348e-05, "loss": 0.0815, "step": 153310 }, { "epoch": 59.54, "learning_rate": 1.2061100323624598e-05, "loss": 0.0448, "step": 153320 }, { "epoch": 59.55, "learning_rate": 1.2060582524271846e-05, "loss": 0.0053, "step": 153330 }, { "epoch": 59.55, "learning_rate": 1.2060064724919094e-05, "loss": 0.015, "step": 153340 }, { "epoch": 59.55, "learning_rate": 1.2059546925566343e-05, "loss": 0.0288, "step": 153350 }, { "epoch": 59.56, "learning_rate": 1.2059029126213593e-05, "loss": 0.2195, "step": 153360 }, { "epoch": 59.56, "learning_rate": 1.2058511326860843e-05, "loss": 0.0525, "step": 153370 }, { "epoch": 59.57, "learning_rate": 1.2057993527508092e-05, "loss": 0.0339, "step": 153380 }, { "epoch": 59.57, "learning_rate": 1.2057475728155342e-05, "loss": 0.1352, "step": 153390 }, { "epoch": 59.57, "learning_rate": 1.2056957928802591e-05, "loss": 0.1025, "step": 153400 }, { "epoch": 59.58, "learning_rate": 1.205644012944984e-05, "loss": 0.0748, "step": 153410 }, { "epoch": 59.58, "learning_rate": 1.2055922330097087e-05, "loss": 0.0241, "step": 153420 }, { "epoch": 59.58, "learning_rate": 1.2055404530744337e-05, "loss": 0.0476, "step": 153430 }, { "epoch": 59.59, "learning_rate": 1.2054886731391587e-05, "loss": 0.0217, "step": 153440 }, { "epoch": 59.59, "learning_rate": 1.2054368932038836e-05, "loss": 0.0954, "step": 153450 }, { "epoch": 59.6, "learning_rate": 1.2053851132686086e-05, "loss": 0.0556, "step": 153460 }, { "epoch": 59.6, "learning_rate": 1.2053333333333335e-05, "loss": 0.0162, "step": 153470 }, { "epoch": 59.6, "learning_rate": 1.2052815533980583e-05, "loss": 0.0091, "step": 153480 }, { "epoch": 59.61, "learning_rate": 1.2052297734627833e-05, "loss": 0.1533, "step": 153490 }, { "epoch": 59.61, "learning_rate": 1.2051779935275081e-05, "loss": 0.1139, "step": 153500 }, { "epoch": 59.62, "learning_rate": 1.205126213592233e-05, "loss": 0.1378, "step": 153510 }, { "epoch": 59.62, "learning_rate": 1.205074433656958e-05, "loss": 0.0895, "step": 153520 }, { "epoch": 59.62, "learning_rate": 1.205022653721683e-05, "loss": 0.095, "step": 153530 }, { "epoch": 59.63, "learning_rate": 1.204970873786408e-05, "loss": 0.0676, "step": 153540 }, { "epoch": 59.63, "learning_rate": 1.2049190938511329e-05, "loss": 0.125, "step": 153550 }, { "epoch": 59.63, "learning_rate": 1.2048673139158577e-05, "loss": 0.1613, "step": 153560 }, { "epoch": 59.64, "learning_rate": 1.2048155339805826e-05, "loss": 0.0601, "step": 153570 }, { "epoch": 59.64, "learning_rate": 1.2047637540453074e-05, "loss": 0.1631, "step": 153580 }, { "epoch": 59.65, "learning_rate": 1.2047119741100324e-05, "loss": 0.1871, "step": 153590 }, { "epoch": 59.65, "learning_rate": 1.2046601941747574e-05, "loss": 0.1252, "step": 153600 }, { "epoch": 59.65, "learning_rate": 1.2046084142394823e-05, "loss": 0.197, "step": 153610 }, { "epoch": 59.66, "learning_rate": 1.2045566343042073e-05, "loss": 0.016, "step": 153620 }, { "epoch": 59.66, "learning_rate": 1.204504854368932e-05, "loss": 0.0026, "step": 153630 }, { "epoch": 59.67, "learning_rate": 1.204453074433657e-05, "loss": 0.0565, "step": 153640 }, { "epoch": 59.67, "learning_rate": 1.204401294498382e-05, "loss": 0.0334, "step": 153650 }, { "epoch": 59.67, "learning_rate": 1.204349514563107e-05, "loss": 0.2449, "step": 153660 }, { "epoch": 59.68, "learning_rate": 1.2042977346278318e-05, "loss": 0.0606, "step": 153670 }, { "epoch": 59.68, "learning_rate": 1.2042459546925567e-05, "loss": 0.1644, "step": 153680 }, { "epoch": 59.69, "learning_rate": 1.2041941747572817e-05, "loss": 0.0005, "step": 153690 }, { "epoch": 59.69, "learning_rate": 1.2041423948220066e-05, "loss": 0.2303, "step": 153700 }, { "epoch": 59.69, "learning_rate": 1.2040906148867314e-05, "loss": 0.1502, "step": 153710 }, { "epoch": 59.7, "learning_rate": 1.2040388349514564e-05, "loss": 0.2184, "step": 153720 }, { "epoch": 59.7, "learning_rate": 1.2039870550161814e-05, "loss": 0.0693, "step": 153730 }, { "epoch": 59.7, "learning_rate": 1.2039352750809063e-05, "loss": 0.0086, "step": 153740 }, { "epoch": 59.71, "learning_rate": 1.2038834951456311e-05, "loss": 0.1077, "step": 153750 }, { "epoch": 59.71, "learning_rate": 1.203831715210356e-05, "loss": 0.017, "step": 153760 }, { "epoch": 59.72, "learning_rate": 1.203779935275081e-05, "loss": 0.0162, "step": 153770 }, { "epoch": 59.72, "learning_rate": 1.203728155339806e-05, "loss": 0.0541, "step": 153780 }, { "epoch": 59.72, "learning_rate": 1.2036763754045308e-05, "loss": 0.1001, "step": 153790 }, { "epoch": 59.73, "learning_rate": 1.2036245954692558e-05, "loss": 0.0735, "step": 153800 }, { "epoch": 59.73, "learning_rate": 1.2035728155339807e-05, "loss": 0.0468, "step": 153810 }, { "epoch": 59.74, "learning_rate": 1.2035210355987057e-05, "loss": 0.1216, "step": 153820 }, { "epoch": 59.74, "learning_rate": 1.2034692556634305e-05, "loss": 0.0959, "step": 153830 }, { "epoch": 59.74, "learning_rate": 1.2034174757281554e-05, "loss": 0.094, "step": 153840 }, { "epoch": 59.75, "learning_rate": 1.2033656957928804e-05, "loss": 0.144, "step": 153850 }, { "epoch": 59.75, "learning_rate": 1.2033139158576052e-05, "loss": 0.0402, "step": 153860 }, { "epoch": 59.76, "learning_rate": 1.2032621359223301e-05, "loss": 0.0998, "step": 153870 }, { "epoch": 59.76, "learning_rate": 1.2032103559870551e-05, "loss": 0.1523, "step": 153880 }, { "epoch": 59.76, "learning_rate": 1.20315857605178e-05, "loss": 0.1147, "step": 153890 }, { "epoch": 59.77, "learning_rate": 1.203106796116505e-05, "loss": 0.1817, "step": 153900 }, { "epoch": 59.77, "learning_rate": 1.2030550161812298e-05, "loss": 0.0591, "step": 153910 }, { "epoch": 59.77, "learning_rate": 1.2030032362459548e-05, "loss": 0.0687, "step": 153920 }, { "epoch": 59.78, "learning_rate": 1.2029514563106797e-05, "loss": 0.0806, "step": 153930 }, { "epoch": 59.78, "learning_rate": 1.2028996763754045e-05, "loss": 0.0372, "step": 153940 }, { "epoch": 59.79, "learning_rate": 1.2028478964401295e-05, "loss": 0.111, "step": 153950 }, { "epoch": 59.79, "learning_rate": 1.2027961165048545e-05, "loss": 0.0025, "step": 153960 }, { "epoch": 59.79, "learning_rate": 1.2027443365695794e-05, "loss": 0.1125, "step": 153970 }, { "epoch": 59.8, "learning_rate": 1.2026925566343044e-05, "loss": 0.0806, "step": 153980 }, { "epoch": 59.8, "learning_rate": 1.2026407766990292e-05, "loss": 0.0795, "step": 153990 }, { "epoch": 59.81, "learning_rate": 1.2025889967637541e-05, "loss": 0.0424, "step": 154000 }, { "epoch": 59.81, "learning_rate": 1.202537216828479e-05, "loss": 0.0071, "step": 154010 }, { "epoch": 59.81, "learning_rate": 1.2024854368932039e-05, "loss": 0.1018, "step": 154020 }, { "epoch": 59.82, "learning_rate": 1.2024336569579289e-05, "loss": 0.1193, "step": 154030 }, { "epoch": 59.82, "learning_rate": 1.2023818770226538e-05, "loss": 0.0379, "step": 154040 }, { "epoch": 59.83, "learning_rate": 1.2023300970873788e-05, "loss": 0.0061, "step": 154050 }, { "epoch": 59.83, "learning_rate": 1.2022783171521037e-05, "loss": 0.1379, "step": 154060 }, { "epoch": 59.83, "learning_rate": 1.2022265372168285e-05, "loss": 0.056, "step": 154070 }, { "epoch": 59.84, "learning_rate": 1.2021747572815535e-05, "loss": 0.0314, "step": 154080 }, { "epoch": 59.84, "learning_rate": 1.2021229773462783e-05, "loss": 0.0153, "step": 154090 }, { "epoch": 59.84, "learning_rate": 1.2020711974110033e-05, "loss": 0.0044, "step": 154100 }, { "epoch": 59.85, "learning_rate": 1.2020194174757282e-05, "loss": 0.0209, "step": 154110 }, { "epoch": 59.85, "learning_rate": 1.2019676375404532e-05, "loss": 0.0162, "step": 154120 }, { "epoch": 59.86, "learning_rate": 1.2019158576051781e-05, "loss": 0.229, "step": 154130 }, { "epoch": 59.86, "learning_rate": 1.2018640776699031e-05, "loss": 0.0471, "step": 154140 }, { "epoch": 59.86, "learning_rate": 1.2018122977346279e-05, "loss": 0.104, "step": 154150 }, { "epoch": 59.87, "learning_rate": 1.2017605177993527e-05, "loss": 0.1885, "step": 154160 }, { "epoch": 59.87, "learning_rate": 1.2017087378640776e-05, "loss": 0.053, "step": 154170 }, { "epoch": 59.88, "learning_rate": 1.2016569579288026e-05, "loss": 0.072, "step": 154180 }, { "epoch": 59.88, "learning_rate": 1.2016051779935276e-05, "loss": 0.0128, "step": 154190 }, { "epoch": 59.88, "learning_rate": 1.2015533980582525e-05, "loss": 0.0919, "step": 154200 }, { "epoch": 59.89, "learning_rate": 1.2015016181229775e-05, "loss": 0.0651, "step": 154210 }, { "epoch": 59.89, "learning_rate": 1.2014498381877025e-05, "loss": 0.0318, "step": 154220 }, { "epoch": 59.9, "learning_rate": 1.2013980582524274e-05, "loss": 0.0431, "step": 154230 }, { "epoch": 59.9, "learning_rate": 1.201346278317152e-05, "loss": 0.0049, "step": 154240 }, { "epoch": 59.9, "learning_rate": 1.201294498381877e-05, "loss": 0.1265, "step": 154250 }, { "epoch": 59.91, "learning_rate": 1.201242718446602e-05, "loss": 0.1034, "step": 154260 }, { "epoch": 59.91, "learning_rate": 1.201190938511327e-05, "loss": 0.0238, "step": 154270 }, { "epoch": 59.91, "learning_rate": 1.2011391585760519e-05, "loss": 0.1014, "step": 154280 }, { "epoch": 59.92, "learning_rate": 1.2010873786407768e-05, "loss": 0.1089, "step": 154290 }, { "epoch": 59.92, "learning_rate": 1.2010355987055018e-05, "loss": 0.2796, "step": 154300 }, { "epoch": 59.93, "learning_rate": 1.2009838187702268e-05, "loss": 0.0582, "step": 154310 }, { "epoch": 59.93, "learning_rate": 1.2009320388349514e-05, "loss": 0.0254, "step": 154320 }, { "epoch": 59.93, "learning_rate": 1.2008802588996764e-05, "loss": 0.1634, "step": 154330 }, { "epoch": 59.94, "learning_rate": 1.2008284789644013e-05, "loss": 0.0128, "step": 154340 }, { "epoch": 59.94, "learning_rate": 1.2007766990291263e-05, "loss": 0.0981, "step": 154350 }, { "epoch": 59.95, "learning_rate": 1.2007249190938512e-05, "loss": 0.0414, "step": 154360 }, { "epoch": 59.95, "learning_rate": 1.2006731391585762e-05, "loss": 0.1022, "step": 154370 }, { "epoch": 59.95, "learning_rate": 1.2006213592233012e-05, "loss": 0.0026, "step": 154380 }, { "epoch": 59.96, "learning_rate": 1.2005695792880261e-05, "loss": 0.0261, "step": 154390 }, { "epoch": 59.96, "learning_rate": 1.2005177993527508e-05, "loss": 0.1075, "step": 154400 }, { "epoch": 59.97, "learning_rate": 1.2004660194174757e-05, "loss": 0.0374, "step": 154410 }, { "epoch": 59.97, "learning_rate": 1.2004142394822007e-05, "loss": 0.0159, "step": 154420 }, { "epoch": 59.97, "learning_rate": 1.2003624595469256e-05, "loss": 0.1148, "step": 154430 }, { "epoch": 59.98, "learning_rate": 1.2003106796116506e-05, "loss": 0.1067, "step": 154440 }, { "epoch": 59.98, "learning_rate": 1.2002588996763756e-05, "loss": 0.0956, "step": 154450 }, { "epoch": 59.98, "learning_rate": 1.2002071197411005e-05, "loss": 0.0622, "step": 154460 }, { "epoch": 59.99, "learning_rate": 1.2001553398058255e-05, "loss": 0.0504, "step": 154470 }, { "epoch": 59.99, "learning_rate": 1.2001035598705501e-05, "loss": 0.0511, "step": 154480 }, { "epoch": 60.0, "learning_rate": 1.200051779935275e-05, "loss": 0.1265, "step": 154490 }, { "epoch": 60.0, "learning_rate": 1.2e-05, "loss": 0.1233, "step": 154500 }, { "epoch": 60.0, "eval_accuracy": 0.9515818431911967, "eval_loss": 0.3270106315612793, "eval_runtime": 8.2702, "eval_samples_per_second": 439.527, "eval_steps_per_second": 55.016, "step": 154500 }, { "epoch": 60.0, "learning_rate": 1.199948220064725e-05, "loss": 0.0453, "step": 154510 }, { "epoch": 60.01, "learning_rate": 1.19989644012945e-05, "loss": 0.0006, "step": 154520 }, { "epoch": 60.01, "learning_rate": 1.199844660194175e-05, "loss": 0.0644, "step": 154530 }, { "epoch": 60.02, "learning_rate": 1.1997928802588999e-05, "loss": 0.1287, "step": 154540 }, { "epoch": 60.02, "learning_rate": 1.1997411003236248e-05, "loss": 0.043, "step": 154550 }, { "epoch": 60.02, "learning_rate": 1.1996893203883495e-05, "loss": 0.0771, "step": 154560 }, { "epoch": 60.03, "learning_rate": 1.1996375404530744e-05, "loss": 0.0051, "step": 154570 }, { "epoch": 60.03, "learning_rate": 1.1995857605177994e-05, "loss": 0.1302, "step": 154580 }, { "epoch": 60.03, "learning_rate": 1.1995339805825243e-05, "loss": 0.0181, "step": 154590 }, { "epoch": 60.04, "learning_rate": 1.1994822006472493e-05, "loss": 0.0152, "step": 154600 }, { "epoch": 60.04, "learning_rate": 1.1994304207119743e-05, "loss": 0.0528, "step": 154610 }, { "epoch": 60.05, "learning_rate": 1.1993786407766992e-05, "loss": 0.0407, "step": 154620 }, { "epoch": 60.05, "learning_rate": 1.1993268608414242e-05, "loss": 0.2022, "step": 154630 }, { "epoch": 60.05, "learning_rate": 1.1992750809061488e-05, "loss": 0.0917, "step": 154640 }, { "epoch": 60.06, "learning_rate": 1.1992233009708738e-05, "loss": 0.1391, "step": 154650 }, { "epoch": 60.06, "learning_rate": 1.1991715210355987e-05, "loss": 0.0237, "step": 154660 }, { "epoch": 60.07, "learning_rate": 1.1991197411003237e-05, "loss": 0.0566, "step": 154670 }, { "epoch": 60.07, "learning_rate": 1.1990679611650487e-05, "loss": 0.0641, "step": 154680 }, { "epoch": 60.07, "learning_rate": 1.1990161812297736e-05, "loss": 0.0027, "step": 154690 }, { "epoch": 60.08, "learning_rate": 1.1989644012944986e-05, "loss": 0.0512, "step": 154700 }, { "epoch": 60.08, "learning_rate": 1.1989126213592236e-05, "loss": 0.1369, "step": 154710 }, { "epoch": 60.09, "learning_rate": 1.1988608414239482e-05, "loss": 0.172, "step": 154720 }, { "epoch": 60.09, "learning_rate": 1.1988090614886731e-05, "loss": 0.0525, "step": 154730 }, { "epoch": 60.09, "learning_rate": 1.1987572815533981e-05, "loss": 0.104, "step": 154740 }, { "epoch": 60.1, "learning_rate": 1.198705501618123e-05, "loss": 0.0926, "step": 154750 }, { "epoch": 60.1, "learning_rate": 1.198653721682848e-05, "loss": 0.042, "step": 154760 }, { "epoch": 60.1, "learning_rate": 1.198601941747573e-05, "loss": 0.0003, "step": 154770 }, { "epoch": 60.11, "learning_rate": 1.198550161812298e-05, "loss": 0.0698, "step": 154780 }, { "epoch": 60.11, "learning_rate": 1.1984983818770229e-05, "loss": 0.1121, "step": 154790 }, { "epoch": 60.12, "learning_rate": 1.1984466019417477e-05, "loss": 0.0514, "step": 154800 }, { "epoch": 60.12, "learning_rate": 1.1983948220064725e-05, "loss": 0.0543, "step": 154810 }, { "epoch": 60.12, "learning_rate": 1.1983430420711975e-05, "loss": 0.1078, "step": 154820 }, { "epoch": 60.13, "learning_rate": 1.1982912621359224e-05, "loss": 0.0311, "step": 154830 }, { "epoch": 60.13, "learning_rate": 1.1982394822006474e-05, "loss": 0.0886, "step": 154840 }, { "epoch": 60.14, "learning_rate": 1.1981877022653723e-05, "loss": 0.2306, "step": 154850 }, { "epoch": 60.14, "learning_rate": 1.1981359223300973e-05, "loss": 0.0024, "step": 154860 }, { "epoch": 60.14, "learning_rate": 1.1980841423948223e-05, "loss": 0.0228, "step": 154870 }, { "epoch": 60.15, "learning_rate": 1.198032362459547e-05, "loss": 0.0768, "step": 154880 }, { "epoch": 60.15, "learning_rate": 1.1979805825242718e-05, "loss": 0.086, "step": 154890 }, { "epoch": 60.16, "learning_rate": 1.1979288025889968e-05, "loss": 0.019, "step": 154900 }, { "epoch": 60.16, "learning_rate": 1.1978770226537218e-05, "loss": 0.1197, "step": 154910 }, { "epoch": 60.16, "learning_rate": 1.1978252427184467e-05, "loss": 0.0945, "step": 154920 }, { "epoch": 60.17, "learning_rate": 1.1977734627831717e-05, "loss": 0.0324, "step": 154930 }, { "epoch": 60.17, "learning_rate": 1.1977216828478967e-05, "loss": 0.1092, "step": 154940 }, { "epoch": 60.17, "learning_rate": 1.1976699029126214e-05, "loss": 0.1088, "step": 154950 }, { "epoch": 60.18, "learning_rate": 1.1976181229773464e-05, "loss": 0.2394, "step": 154960 }, { "epoch": 60.18, "learning_rate": 1.1975663430420712e-05, "loss": 0.1018, "step": 154970 }, { "epoch": 60.19, "learning_rate": 1.1975145631067962e-05, "loss": 0.045, "step": 154980 }, { "epoch": 60.19, "learning_rate": 1.1974627831715211e-05, "loss": 0.1302, "step": 154990 }, { "epoch": 60.19, "learning_rate": 1.1974110032362461e-05, "loss": 0.0075, "step": 155000 }, { "epoch": 60.2, "learning_rate": 1.197359223300971e-05, "loss": 0.0149, "step": 155010 }, { "epoch": 60.2, "learning_rate": 1.197307443365696e-05, "loss": 0.0471, "step": 155020 }, { "epoch": 60.21, "learning_rate": 1.1972556634304208e-05, "loss": 0.0063, "step": 155030 }, { "epoch": 60.21, "learning_rate": 1.1972038834951458e-05, "loss": 0.0503, "step": 155040 }, { "epoch": 60.21, "learning_rate": 1.1971521035598706e-05, "loss": 0.0417, "step": 155050 }, { "epoch": 60.22, "learning_rate": 1.1971003236245955e-05, "loss": 0.0171, "step": 155060 }, { "epoch": 60.22, "learning_rate": 1.1970485436893205e-05, "loss": 0.0379, "step": 155070 }, { "epoch": 60.23, "learning_rate": 1.1969967637540454e-05, "loss": 0.0549, "step": 155080 }, { "epoch": 60.23, "learning_rate": 1.1969449838187704e-05, "loss": 0.0016, "step": 155090 }, { "epoch": 60.23, "learning_rate": 1.1968932038834952e-05, "loss": 0.1607, "step": 155100 }, { "epoch": 60.24, "learning_rate": 1.1968414239482202e-05, "loss": 0.0549, "step": 155110 }, { "epoch": 60.24, "learning_rate": 1.1967896440129451e-05, "loss": 0.0467, "step": 155120 }, { "epoch": 60.24, "learning_rate": 1.1967378640776699e-05, "loss": 0.0406, "step": 155130 }, { "epoch": 60.25, "learning_rate": 1.1966860841423949e-05, "loss": 0.1003, "step": 155140 }, { "epoch": 60.25, "learning_rate": 1.1966343042071198e-05, "loss": 0.0907, "step": 155150 }, { "epoch": 60.26, "learning_rate": 1.1965825242718448e-05, "loss": 0.0063, "step": 155160 }, { "epoch": 60.26, "learning_rate": 1.1965307443365698e-05, "loss": 0.0475, "step": 155170 }, { "epoch": 60.26, "learning_rate": 1.1964789644012946e-05, "loss": 0.2801, "step": 155180 }, { "epoch": 60.27, "learning_rate": 1.1964271844660195e-05, "loss": 0.0951, "step": 155190 }, { "epoch": 60.27, "learning_rate": 1.1963754045307445e-05, "loss": 0.0232, "step": 155200 }, { "epoch": 60.28, "learning_rate": 1.1963236245954693e-05, "loss": 0.05, "step": 155210 }, { "epoch": 60.28, "learning_rate": 1.1962718446601942e-05, "loss": 0.0019, "step": 155220 }, { "epoch": 60.28, "learning_rate": 1.1962200647249192e-05, "loss": 0.2717, "step": 155230 }, { "epoch": 60.29, "learning_rate": 1.1961682847896442e-05, "loss": 0.0993, "step": 155240 }, { "epoch": 60.29, "learning_rate": 1.196116504854369e-05, "loss": 0.0132, "step": 155250 }, { "epoch": 60.3, "learning_rate": 1.1960647249190939e-05, "loss": 0.1866, "step": 155260 }, { "epoch": 60.3, "learning_rate": 1.1960129449838189e-05, "loss": 0.0181, "step": 155270 }, { "epoch": 60.3, "learning_rate": 1.1959611650485438e-05, "loss": 0.1661, "step": 155280 }, { "epoch": 60.31, "learning_rate": 1.1959093851132688e-05, "loss": 0.2961, "step": 155290 }, { "epoch": 60.31, "learning_rate": 1.1958576051779936e-05, "loss": 0.0004, "step": 155300 }, { "epoch": 60.31, "learning_rate": 1.1958058252427185e-05, "loss": 0.0545, "step": 155310 }, { "epoch": 60.32, "learning_rate": 1.1957540453074435e-05, "loss": 0.0004, "step": 155320 }, { "epoch": 60.32, "learning_rate": 1.1957022653721683e-05, "loss": 0.1217, "step": 155330 }, { "epoch": 60.33, "learning_rate": 1.1956504854368933e-05, "loss": 0.0252, "step": 155340 }, { "epoch": 60.33, "learning_rate": 1.1955987055016182e-05, "loss": 0.0132, "step": 155350 }, { "epoch": 60.33, "learning_rate": 1.1955469255663432e-05, "loss": 0.0563, "step": 155360 }, { "epoch": 60.34, "learning_rate": 1.1954951456310681e-05, "loss": 0.0036, "step": 155370 }, { "epoch": 60.34, "learning_rate": 1.195443365695793e-05, "loss": 0.0909, "step": 155380 }, { "epoch": 60.35, "learning_rate": 1.1953915857605179e-05, "loss": 0.0221, "step": 155390 }, { "epoch": 60.35, "learning_rate": 1.1953398058252429e-05, "loss": 0.0862, "step": 155400 }, { "epoch": 60.35, "learning_rate": 1.1952880258899677e-05, "loss": 0.0288, "step": 155410 }, { "epoch": 60.36, "learning_rate": 1.1952362459546926e-05, "loss": 0.0198, "step": 155420 }, { "epoch": 60.36, "learning_rate": 1.1951844660194176e-05, "loss": 0.0052, "step": 155430 }, { "epoch": 60.37, "learning_rate": 1.1951326860841425e-05, "loss": 0.1604, "step": 155440 }, { "epoch": 60.37, "learning_rate": 1.1950809061488675e-05, "loss": 0.0219, "step": 155450 }, { "epoch": 60.37, "learning_rate": 1.1950291262135923e-05, "loss": 0.2094, "step": 155460 }, { "epoch": 60.38, "learning_rate": 1.1949773462783173e-05, "loss": 0.0231, "step": 155470 }, { "epoch": 60.38, "learning_rate": 1.194925566343042e-05, "loss": 0.0587, "step": 155480 }, { "epoch": 60.38, "learning_rate": 1.194873786407767e-05, "loss": 0.0771, "step": 155490 }, { "epoch": 60.39, "learning_rate": 1.194822006472492e-05, "loss": 0.0692, "step": 155500 }, { "epoch": 60.39, "learning_rate": 1.194770226537217e-05, "loss": 0.0096, "step": 155510 }, { "epoch": 60.4, "learning_rate": 1.1947184466019419e-05, "loss": 0.0981, "step": 155520 }, { "epoch": 60.4, "learning_rate": 1.1946666666666669e-05, "loss": 0.0483, "step": 155530 }, { "epoch": 60.4, "learning_rate": 1.1946148867313917e-05, "loss": 0.0623, "step": 155540 }, { "epoch": 60.41, "learning_rate": 1.1945631067961166e-05, "loss": 0.1517, "step": 155550 }, { "epoch": 60.41, "learning_rate": 1.1945113268608414e-05, "loss": 0.0877, "step": 155560 }, { "epoch": 60.42, "learning_rate": 1.1944595469255664e-05, "loss": 0.1436, "step": 155570 }, { "epoch": 60.42, "learning_rate": 1.1944077669902913e-05, "loss": 0.147, "step": 155580 }, { "epoch": 60.42, "learning_rate": 1.1943559870550163e-05, "loss": 0.0363, "step": 155590 }, { "epoch": 60.43, "learning_rate": 1.1943042071197413e-05, "loss": 0.0277, "step": 155600 }, { "epoch": 60.43, "learning_rate": 1.1942524271844662e-05, "loss": 0.0407, "step": 155610 }, { "epoch": 60.43, "learning_rate": 1.194200647249191e-05, "loss": 0.0183, "step": 155620 }, { "epoch": 60.44, "learning_rate": 1.1941488673139158e-05, "loss": 0.0699, "step": 155630 }, { "epoch": 60.44, "learning_rate": 1.1940970873786408e-05, "loss": 0.1696, "step": 155640 }, { "epoch": 60.45, "learning_rate": 1.1940453074433657e-05, "loss": 0.1493, "step": 155650 }, { "epoch": 60.45, "learning_rate": 1.1939935275080907e-05, "loss": 0.082, "step": 155660 }, { "epoch": 60.45, "learning_rate": 1.1939417475728156e-05, "loss": 0.0391, "step": 155670 }, { "epoch": 60.46, "learning_rate": 1.1938899676375406e-05, "loss": 0.0958, "step": 155680 }, { "epoch": 60.46, "learning_rate": 1.1938381877022656e-05, "loss": 0.0453, "step": 155690 }, { "epoch": 60.47, "learning_rate": 1.1937864077669904e-05, "loss": 0.0927, "step": 155700 }, { "epoch": 60.47, "learning_rate": 1.1937346278317152e-05, "loss": 0.0811, "step": 155710 }, { "epoch": 60.47, "learning_rate": 1.1936828478964401e-05, "loss": 0.0727, "step": 155720 }, { "epoch": 60.48, "learning_rate": 1.193631067961165e-05, "loss": 0.0226, "step": 155730 }, { "epoch": 60.48, "learning_rate": 1.19357928802589e-05, "loss": 0.0282, "step": 155740 }, { "epoch": 60.49, "learning_rate": 1.193527508090615e-05, "loss": 0.1121, "step": 155750 }, { "epoch": 60.49, "learning_rate": 1.19347572815534e-05, "loss": 0.0542, "step": 155760 }, { "epoch": 60.49, "learning_rate": 1.193423948220065e-05, "loss": 0.0732, "step": 155770 }, { "epoch": 60.5, "learning_rate": 1.1933721682847896e-05, "loss": 0.0282, "step": 155780 }, { "epoch": 60.5, "learning_rate": 1.1933203883495145e-05, "loss": 0.0186, "step": 155790 }, { "epoch": 60.5, "learning_rate": 1.1932686084142395e-05, "loss": 0.0301, "step": 155800 }, { "epoch": 60.51, "learning_rate": 1.1932168284789644e-05, "loss": 0.0706, "step": 155810 }, { "epoch": 60.51, "learning_rate": 1.1931650485436894e-05, "loss": 0.0203, "step": 155820 }, { "epoch": 60.52, "learning_rate": 1.1931132686084144e-05, "loss": 0.0767, "step": 155830 }, { "epoch": 60.52, "learning_rate": 1.1930614886731393e-05, "loss": 0.0311, "step": 155840 }, { "epoch": 60.52, "learning_rate": 1.1930097087378643e-05, "loss": 0.0315, "step": 155850 }, { "epoch": 60.53, "learning_rate": 1.1929579288025892e-05, "loss": 0.0253, "step": 155860 }, { "epoch": 60.53, "learning_rate": 1.1929061488673139e-05, "loss": 0.0602, "step": 155870 }, { "epoch": 60.54, "learning_rate": 1.1928543689320388e-05, "loss": 0.0861, "step": 155880 }, { "epoch": 60.54, "learning_rate": 1.1928025889967638e-05, "loss": 0.1734, "step": 155890 }, { "epoch": 60.54, "learning_rate": 1.1927508090614888e-05, "loss": 0.1657, "step": 155900 }, { "epoch": 60.55, "learning_rate": 1.1926990291262137e-05, "loss": 0.0209, "step": 155910 }, { "epoch": 60.55, "learning_rate": 1.1926472491909387e-05, "loss": 0.0264, "step": 155920 }, { "epoch": 60.56, "learning_rate": 1.1925954692556636e-05, "loss": 0.0187, "step": 155930 }, { "epoch": 60.56, "learning_rate": 1.1925436893203886e-05, "loss": 0.0072, "step": 155940 }, { "epoch": 60.56, "learning_rate": 1.1924919093851132e-05, "loss": 0.0009, "step": 155950 }, { "epoch": 60.57, "learning_rate": 1.1924401294498382e-05, "loss": 0.1241, "step": 155960 }, { "epoch": 60.57, "learning_rate": 1.1923883495145631e-05, "loss": 0.018, "step": 155970 }, { "epoch": 60.57, "learning_rate": 1.1923365695792881e-05, "loss": 0.0708, "step": 155980 }, { "epoch": 60.58, "learning_rate": 1.192284789644013e-05, "loss": 0.08, "step": 155990 }, { "epoch": 60.58, "learning_rate": 1.192233009708738e-05, "loss": 0.0178, "step": 156000 }, { "epoch": 60.59, "learning_rate": 1.192181229773463e-05, "loss": 0.1351, "step": 156010 }, { "epoch": 60.59, "learning_rate": 1.192129449838188e-05, "loss": 0.0056, "step": 156020 }, { "epoch": 60.59, "learning_rate": 1.1920776699029126e-05, "loss": 0.1967, "step": 156030 }, { "epoch": 60.6, "learning_rate": 1.1920258899676375e-05, "loss": 0.0755, "step": 156040 }, { "epoch": 60.6, "learning_rate": 1.1919741100323625e-05, "loss": 0.0042, "step": 156050 }, { "epoch": 60.61, "learning_rate": 1.1919223300970875e-05, "loss": 0.0967, "step": 156060 }, { "epoch": 60.61, "learning_rate": 1.1918705501618124e-05, "loss": 0.0829, "step": 156070 }, { "epoch": 60.61, "learning_rate": 1.1918187702265374e-05, "loss": 0.1516, "step": 156080 }, { "epoch": 60.62, "learning_rate": 1.1917669902912624e-05, "loss": 0.0673, "step": 156090 }, { "epoch": 60.62, "learning_rate": 1.1917152103559873e-05, "loss": 0.064, "step": 156100 }, { "epoch": 60.63, "learning_rate": 1.191663430420712e-05, "loss": 0.1235, "step": 156110 }, { "epoch": 60.63, "learning_rate": 1.1916116504854369e-05, "loss": 0.037, "step": 156120 }, { "epoch": 60.63, "learning_rate": 1.1915598705501619e-05, "loss": 0.2453, "step": 156130 }, { "epoch": 60.64, "learning_rate": 1.1915080906148868e-05, "loss": 0.2768, "step": 156140 }, { "epoch": 60.64, "learning_rate": 1.1914563106796118e-05, "loss": 0.1128, "step": 156150 }, { "epoch": 60.64, "learning_rate": 1.1914045307443367e-05, "loss": 0.0558, "step": 156160 }, { "epoch": 60.65, "learning_rate": 1.1913527508090617e-05, "loss": 0.0101, "step": 156170 }, { "epoch": 60.65, "learning_rate": 1.1913009708737867e-05, "loss": 0.0732, "step": 156180 }, { "epoch": 60.66, "learning_rate": 1.1912491909385113e-05, "loss": 0.0288, "step": 156190 }, { "epoch": 60.66, "learning_rate": 1.1911974110032363e-05, "loss": 0.0195, "step": 156200 }, { "epoch": 60.66, "learning_rate": 1.1911456310679612e-05, "loss": 0.0801, "step": 156210 }, { "epoch": 60.67, "learning_rate": 1.1910938511326862e-05, "loss": 0.0658, "step": 156220 }, { "epoch": 60.67, "learning_rate": 1.1910420711974111e-05, "loss": 0.1411, "step": 156230 }, { "epoch": 60.68, "learning_rate": 1.1909902912621361e-05, "loss": 0.0135, "step": 156240 }, { "epoch": 60.68, "learning_rate": 1.190938511326861e-05, "loss": 0.1086, "step": 156250 }, { "epoch": 60.68, "learning_rate": 1.190886731391586e-05, "loss": 0.1215, "step": 156260 }, { "epoch": 60.69, "learning_rate": 1.1908349514563106e-05, "loss": 0.1385, "step": 156270 }, { "epoch": 60.69, "learning_rate": 1.1907831715210356e-05, "loss": 0.0248, "step": 156280 }, { "epoch": 60.7, "learning_rate": 1.1907313915857606e-05, "loss": 0.0218, "step": 156290 }, { "epoch": 60.7, "learning_rate": 1.1906796116504855e-05, "loss": 0.0949, "step": 156300 }, { "epoch": 60.7, "learning_rate": 1.1906278317152105e-05, "loss": 0.1057, "step": 156310 }, { "epoch": 60.71, "learning_rate": 1.1905760517799355e-05, "loss": 0.0911, "step": 156320 }, { "epoch": 60.71, "learning_rate": 1.1905242718446604e-05, "loss": 0.0939, "step": 156330 }, { "epoch": 60.71, "learning_rate": 1.1904724919093854e-05, "loss": 0.0695, "step": 156340 }, { "epoch": 60.72, "learning_rate": 1.19042071197411e-05, "loss": 0.1471, "step": 156350 }, { "epoch": 60.72, "learning_rate": 1.190368932038835e-05, "loss": 0.005, "step": 156360 }, { "epoch": 60.73, "learning_rate": 1.19031715210356e-05, "loss": 0.1125, "step": 156370 }, { "epoch": 60.73, "learning_rate": 1.1902653721682849e-05, "loss": 0.0658, "step": 156380 }, { "epoch": 60.73, "learning_rate": 1.1902135922330098e-05, "loss": 0.1357, "step": 156390 }, { "epoch": 60.74, "learning_rate": 1.1901618122977348e-05, "loss": 0.0673, "step": 156400 }, { "epoch": 60.74, "learning_rate": 1.1901100323624598e-05, "loss": 0.0803, "step": 156410 }, { "epoch": 60.75, "learning_rate": 1.1900582524271846e-05, "loss": 0.049, "step": 156420 }, { "epoch": 60.75, "learning_rate": 1.1900064724919095e-05, "loss": 0.0329, "step": 156430 }, { "epoch": 60.75, "learning_rate": 1.1899546925566343e-05, "loss": 0.0598, "step": 156440 }, { "epoch": 60.76, "learning_rate": 1.1899029126213593e-05, "loss": 0.093, "step": 156450 }, { "epoch": 60.76, "learning_rate": 1.1898511326860842e-05, "loss": 0.1312, "step": 156460 }, { "epoch": 60.77, "learning_rate": 1.1897993527508092e-05, "loss": 0.0543, "step": 156470 }, { "epoch": 60.77, "learning_rate": 1.1897475728155342e-05, "loss": 0.0244, "step": 156480 }, { "epoch": 60.77, "learning_rate": 1.1896957928802591e-05, "loss": 0.0946, "step": 156490 }, { "epoch": 60.78, "learning_rate": 1.189644012944984e-05, "loss": 0.0711, "step": 156500 }, { "epoch": 60.78, "learning_rate": 1.1895922330097089e-05, "loss": 0.0575, "step": 156510 }, { "epoch": 60.78, "learning_rate": 1.1895404530744337e-05, "loss": 0.1781, "step": 156520 }, { "epoch": 60.79, "learning_rate": 1.1894886731391586e-05, "loss": 0.087, "step": 156530 }, { "epoch": 60.79, "learning_rate": 1.1894368932038836e-05, "loss": 0.0565, "step": 156540 }, { "epoch": 60.8, "learning_rate": 1.1893851132686086e-05, "loss": 0.1005, "step": 156550 }, { "epoch": 60.8, "learning_rate": 1.1893333333333335e-05, "loss": 0.0472, "step": 156560 }, { "epoch": 60.8, "learning_rate": 1.1892815533980583e-05, "loss": 0.0956, "step": 156570 }, { "epoch": 60.81, "learning_rate": 1.1892297734627833e-05, "loss": 0.0237, "step": 156580 }, { "epoch": 60.81, "learning_rate": 1.1891779935275082e-05, "loss": 0.2198, "step": 156590 }, { "epoch": 60.82, "learning_rate": 1.189126213592233e-05, "loss": 0.0244, "step": 156600 }, { "epoch": 60.82, "learning_rate": 1.189074433656958e-05, "loss": 0.0889, "step": 156610 }, { "epoch": 60.82, "learning_rate": 1.189022653721683e-05, "loss": 0.1039, "step": 156620 }, { "epoch": 60.83, "learning_rate": 1.188970873786408e-05, "loss": 0.0231, "step": 156630 }, { "epoch": 60.83, "learning_rate": 1.1889190938511329e-05, "loss": 0.2699, "step": 156640 }, { "epoch": 60.83, "learning_rate": 1.1888673139158577e-05, "loss": 0.0003, "step": 156650 }, { "epoch": 60.84, "learning_rate": 1.1888155339805826e-05, "loss": 0.0308, "step": 156660 }, { "epoch": 60.84, "learning_rate": 1.1887637540453076e-05, "loss": 0.0496, "step": 156670 }, { "epoch": 60.85, "learning_rate": 1.1887119741100324e-05, "loss": 0.0518, "step": 156680 }, { "epoch": 60.85, "learning_rate": 1.1886601941747573e-05, "loss": 0.0646, "step": 156690 }, { "epoch": 60.85, "learning_rate": 1.1886084142394823e-05, "loss": 0.0341, "step": 156700 }, { "epoch": 60.86, "learning_rate": 1.1885566343042073e-05, "loss": 0.1255, "step": 156710 }, { "epoch": 60.86, "learning_rate": 1.188504854368932e-05, "loss": 0.0772, "step": 156720 }, { "epoch": 60.87, "learning_rate": 1.188453074433657e-05, "loss": 0.2768, "step": 156730 }, { "epoch": 60.87, "learning_rate": 1.188401294498382e-05, "loss": 0.1339, "step": 156740 }, { "epoch": 60.87, "learning_rate": 1.188349514563107e-05, "loss": 0.0872, "step": 156750 }, { "epoch": 60.88, "learning_rate": 1.1882977346278317e-05, "loss": 0.1968, "step": 156760 }, { "epoch": 60.88, "learning_rate": 1.1882459546925567e-05, "loss": 0.0801, "step": 156770 }, { "epoch": 60.89, "learning_rate": 1.1881941747572817e-05, "loss": 0.0899, "step": 156780 }, { "epoch": 60.89, "learning_rate": 1.1881423948220066e-05, "loss": 0.2303, "step": 156790 }, { "epoch": 60.89, "learning_rate": 1.1880906148867314e-05, "loss": 0.1224, "step": 156800 }, { "epoch": 60.9, "learning_rate": 1.1880388349514564e-05, "loss": 0.0699, "step": 156810 }, { "epoch": 60.9, "learning_rate": 1.1879870550161813e-05, "loss": 0.0803, "step": 156820 }, { "epoch": 60.9, "learning_rate": 1.1879352750809063e-05, "loss": 0.0285, "step": 156830 }, { "epoch": 60.91, "learning_rate": 1.1878834951456311e-05, "loss": 0.0492, "step": 156840 }, { "epoch": 60.91, "learning_rate": 1.187831715210356e-05, "loss": 0.0337, "step": 156850 }, { "epoch": 60.92, "learning_rate": 1.187779935275081e-05, "loss": 0.162, "step": 156860 }, { "epoch": 60.92, "learning_rate": 1.187728155339806e-05, "loss": 0.055, "step": 156870 }, { "epoch": 60.92, "learning_rate": 1.1876763754045308e-05, "loss": 0.1634, "step": 156880 }, { "epoch": 60.93, "learning_rate": 1.1876245954692557e-05, "loss": 0.0333, "step": 156890 }, { "epoch": 60.93, "learning_rate": 1.1875728155339807e-05, "loss": 0.0619, "step": 156900 }, { "epoch": 60.94, "learning_rate": 1.1875210355987057e-05, "loss": 0.0288, "step": 156910 }, { "epoch": 60.94, "learning_rate": 1.1874692556634305e-05, "loss": 0.0368, "step": 156920 }, { "epoch": 60.94, "learning_rate": 1.1874174757281554e-05, "loss": 0.0874, "step": 156930 }, { "epoch": 60.95, "learning_rate": 1.1873656957928804e-05, "loss": 0.0169, "step": 156940 }, { "epoch": 60.95, "learning_rate": 1.1873139158576052e-05, "loss": 0.0823, "step": 156950 }, { "epoch": 60.96, "learning_rate": 1.1872621359223301e-05, "loss": 0.1826, "step": 156960 }, { "epoch": 60.96, "learning_rate": 1.1872103559870551e-05, "loss": 0.0922, "step": 156970 }, { "epoch": 60.96, "learning_rate": 1.18715857605178e-05, "loss": 0.1045, "step": 156980 }, { "epoch": 60.97, "learning_rate": 1.187106796116505e-05, "loss": 0.1131, "step": 156990 }, { "epoch": 60.97, "learning_rate": 1.18705501618123e-05, "loss": 0.2021, "step": 157000 }, { "epoch": 60.97, "learning_rate": 1.1870032362459548e-05, "loss": 0.0268, "step": 157010 }, { "epoch": 60.98, "learning_rate": 1.1869514563106797e-05, "loss": 0.168, "step": 157020 }, { "epoch": 60.98, "learning_rate": 1.1868996763754045e-05, "loss": 0.0073, "step": 157030 }, { "epoch": 60.99, "learning_rate": 1.1868478964401295e-05, "loss": 0.0758, "step": 157040 }, { "epoch": 60.99, "learning_rate": 1.1867961165048544e-05, "loss": 0.0572, "step": 157050 }, { "epoch": 60.99, "learning_rate": 1.1867443365695794e-05, "loss": 0.0617, "step": 157060 }, { "epoch": 61.0, "learning_rate": 1.1866925566343044e-05, "loss": 0.1667, "step": 157070 }, { "epoch": 61.0, "eval_accuracy": 0.9502063273727648, "eval_loss": 0.3249916434288025, "eval_runtime": 8.2057, "eval_samples_per_second": 442.986, "eval_steps_per_second": 55.449, "step": 157075 }, { "epoch": 61.0, "learning_rate": 1.1866407766990293e-05, "loss": 0.0288, "step": 157080 }, { "epoch": 61.01, "learning_rate": 1.1865889967637541e-05, "loss": 0.0709, "step": 157090 }, { "epoch": 61.01, "learning_rate": 1.186537216828479e-05, "loss": 0.1363, "step": 157100 }, { "epoch": 61.01, "learning_rate": 1.1864854368932039e-05, "loss": 0.0433, "step": 157110 }, { "epoch": 61.02, "learning_rate": 1.1864336569579288e-05, "loss": 0.032, "step": 157120 }, { "epoch": 61.02, "learning_rate": 1.1863818770226538e-05, "loss": 0.0378, "step": 157130 }, { "epoch": 61.03, "learning_rate": 1.1863300970873788e-05, "loss": 0.0315, "step": 157140 }, { "epoch": 61.03, "learning_rate": 1.1862783171521037e-05, "loss": 0.213, "step": 157150 }, { "epoch": 61.03, "learning_rate": 1.1862265372168287e-05, "loss": 0.014, "step": 157160 }, { "epoch": 61.04, "learning_rate": 1.1861747572815535e-05, "loss": 0.0972, "step": 157170 }, { "epoch": 61.04, "learning_rate": 1.1861229773462783e-05, "loss": 0.005, "step": 157180 }, { "epoch": 61.04, "learning_rate": 1.1860711974110032e-05, "loss": 0.0274, "step": 157190 }, { "epoch": 61.05, "learning_rate": 1.1860194174757282e-05, "loss": 0.0462, "step": 157200 }, { "epoch": 61.05, "learning_rate": 1.1859676375404532e-05, "loss": 0.1357, "step": 157210 }, { "epoch": 61.06, "learning_rate": 1.1859158576051781e-05, "loss": 0.0478, "step": 157220 }, { "epoch": 61.06, "learning_rate": 1.185864077669903e-05, "loss": 0.1394, "step": 157230 }, { "epoch": 61.06, "learning_rate": 1.185812297734628e-05, "loss": 0.0446, "step": 157240 }, { "epoch": 61.07, "learning_rate": 1.1857605177993527e-05, "loss": 0.0333, "step": 157250 }, { "epoch": 61.07, "learning_rate": 1.1857087378640776e-05, "loss": 0.024, "step": 157260 }, { "epoch": 61.08, "learning_rate": 1.1856569579288026e-05, "loss": 0.0106, "step": 157270 }, { "epoch": 61.08, "learning_rate": 1.1856051779935276e-05, "loss": 0.0757, "step": 157280 }, { "epoch": 61.08, "learning_rate": 1.1855533980582525e-05, "loss": 0.2639, "step": 157290 }, { "epoch": 61.09, "learning_rate": 1.1855016181229775e-05, "loss": 0.0791, "step": 157300 }, { "epoch": 61.09, "learning_rate": 1.1854498381877024e-05, "loss": 0.0975, "step": 157310 }, { "epoch": 61.1, "learning_rate": 1.1853980582524274e-05, "loss": 0.0919, "step": 157320 }, { "epoch": 61.1, "learning_rate": 1.185346278317152e-05, "loss": 0.0501, "step": 157330 }, { "epoch": 61.1, "learning_rate": 1.185294498381877e-05, "loss": 0.1761, "step": 157340 }, { "epoch": 61.11, "learning_rate": 1.185242718446602e-05, "loss": 0.1919, "step": 157350 }, { "epoch": 61.11, "learning_rate": 1.1851909385113269e-05, "loss": 0.0587, "step": 157360 }, { "epoch": 61.11, "learning_rate": 1.1851391585760519e-05, "loss": 0.0197, "step": 157370 }, { "epoch": 61.12, "learning_rate": 1.1850873786407768e-05, "loss": 0.0271, "step": 157380 }, { "epoch": 61.12, "learning_rate": 1.1850355987055018e-05, "loss": 0.0718, "step": 157390 }, { "epoch": 61.13, "learning_rate": 1.1849838187702268e-05, "loss": 0.002, "step": 157400 }, { "epoch": 61.13, "learning_rate": 1.1849320388349514e-05, "loss": 0.0826, "step": 157410 }, { "epoch": 61.13, "learning_rate": 1.1848802588996763e-05, "loss": 0.087, "step": 157420 }, { "epoch": 61.14, "learning_rate": 1.1848284789644013e-05, "loss": 0.0038, "step": 157430 }, { "epoch": 61.14, "learning_rate": 1.1847766990291263e-05, "loss": 0.0663, "step": 157440 }, { "epoch": 61.15, "learning_rate": 1.1847249190938512e-05, "loss": 0.0216, "step": 157450 }, { "epoch": 61.15, "learning_rate": 1.1846731391585762e-05, "loss": 0.2073, "step": 157460 }, { "epoch": 61.15, "learning_rate": 1.1846213592233012e-05, "loss": 0.1304, "step": 157470 }, { "epoch": 61.16, "learning_rate": 1.1845695792880261e-05, "loss": 0.058, "step": 157480 }, { "epoch": 61.16, "learning_rate": 1.1845177993527507e-05, "loss": 0.0581, "step": 157490 }, { "epoch": 61.17, "learning_rate": 1.1844660194174757e-05, "loss": 0.0346, "step": 157500 }, { "epoch": 61.17, "learning_rate": 1.1844142394822007e-05, "loss": 0.1615, "step": 157510 }, { "epoch": 61.17, "learning_rate": 1.1843624595469256e-05, "loss": 0.1667, "step": 157520 }, { "epoch": 61.18, "learning_rate": 1.1843106796116506e-05, "loss": 0.1097, "step": 157530 }, { "epoch": 61.18, "learning_rate": 1.1842588996763755e-05, "loss": 0.1375, "step": 157540 }, { "epoch": 61.18, "learning_rate": 1.1842071197411005e-05, "loss": 0.0032, "step": 157550 }, { "epoch": 61.19, "learning_rate": 1.1841553398058255e-05, "loss": 0.0765, "step": 157560 }, { "epoch": 61.19, "learning_rate": 1.1841035598705504e-05, "loss": 0.0458, "step": 157570 }, { "epoch": 61.2, "learning_rate": 1.184051779935275e-05, "loss": 0.0089, "step": 157580 }, { "epoch": 61.2, "learning_rate": 1.184e-05, "loss": 0.0025, "step": 157590 }, { "epoch": 61.2, "learning_rate": 1.183948220064725e-05, "loss": 0.0066, "step": 157600 }, { "epoch": 61.21, "learning_rate": 1.18389644012945e-05, "loss": 0.08, "step": 157610 }, { "epoch": 61.21, "learning_rate": 1.1838446601941749e-05, "loss": 0.0757, "step": 157620 }, { "epoch": 61.22, "learning_rate": 1.1837928802588999e-05, "loss": 0.064, "step": 157630 }, { "epoch": 61.22, "learning_rate": 1.1837411003236248e-05, "loss": 0.0586, "step": 157640 }, { "epoch": 61.22, "learning_rate": 1.1836893203883498e-05, "loss": 0.0556, "step": 157650 }, { "epoch": 61.23, "learning_rate": 1.1836375404530744e-05, "loss": 0.0383, "step": 157660 }, { "epoch": 61.23, "learning_rate": 1.1835857605177994e-05, "loss": 0.0171, "step": 157670 }, { "epoch": 61.23, "learning_rate": 1.1835339805825243e-05, "loss": 0.0281, "step": 157680 }, { "epoch": 61.24, "learning_rate": 1.1834822006472493e-05, "loss": 0.057, "step": 157690 }, { "epoch": 61.24, "learning_rate": 1.1834304207119743e-05, "loss": 0.1338, "step": 157700 }, { "epoch": 61.25, "learning_rate": 1.1833786407766992e-05, "loss": 0.107, "step": 157710 }, { "epoch": 61.25, "learning_rate": 1.1833268608414242e-05, "loss": 0.0363, "step": 157720 }, { "epoch": 61.25, "learning_rate": 1.1832750809061491e-05, "loss": 0.1414, "step": 157730 }, { "epoch": 61.26, "learning_rate": 1.1832233009708738e-05, "loss": 0.0879, "step": 157740 }, { "epoch": 61.26, "learning_rate": 1.1831715210355987e-05, "loss": 0.0163, "step": 157750 }, { "epoch": 61.27, "learning_rate": 1.1831197411003237e-05, "loss": 0.0648, "step": 157760 }, { "epoch": 61.27, "learning_rate": 1.1830679611650486e-05, "loss": 0.0132, "step": 157770 }, { "epoch": 61.27, "learning_rate": 1.1830161812297736e-05, "loss": 0.0466, "step": 157780 }, { "epoch": 61.28, "learning_rate": 1.1829644012944986e-05, "loss": 0.1285, "step": 157790 }, { "epoch": 61.28, "learning_rate": 1.1829126213592235e-05, "loss": 0.0887, "step": 157800 }, { "epoch": 61.29, "learning_rate": 1.1828608414239485e-05, "loss": 0.0757, "step": 157810 }, { "epoch": 61.29, "learning_rate": 1.1828090614886731e-05, "loss": 0.0273, "step": 157820 }, { "epoch": 61.29, "learning_rate": 1.182757281553398e-05, "loss": 0.0724, "step": 157830 }, { "epoch": 61.3, "learning_rate": 1.182705501618123e-05, "loss": 0.0182, "step": 157840 }, { "epoch": 61.3, "learning_rate": 1.182653721682848e-05, "loss": 0.0378, "step": 157850 }, { "epoch": 61.3, "learning_rate": 1.182601941747573e-05, "loss": 0.093, "step": 157860 }, { "epoch": 61.31, "learning_rate": 1.182550161812298e-05, "loss": 0.0498, "step": 157870 }, { "epoch": 61.31, "learning_rate": 1.1824983818770229e-05, "loss": 0.0231, "step": 157880 }, { "epoch": 61.32, "learning_rate": 1.1824466019417477e-05, "loss": 0.1971, "step": 157890 }, { "epoch": 61.32, "learning_rate": 1.1823948220064725e-05, "loss": 0.0105, "step": 157900 }, { "epoch": 61.32, "learning_rate": 1.1823430420711974e-05, "loss": 0.0936, "step": 157910 }, { "epoch": 61.33, "learning_rate": 1.1822912621359224e-05, "loss": 0.0533, "step": 157920 }, { "epoch": 61.33, "learning_rate": 1.1822394822006474e-05, "loss": 0.0817, "step": 157930 }, { "epoch": 61.34, "learning_rate": 1.1821877022653723e-05, "loss": 0.0551, "step": 157940 }, { "epoch": 61.34, "learning_rate": 1.1821359223300973e-05, "loss": 0.0352, "step": 157950 }, { "epoch": 61.34, "learning_rate": 1.1820841423948222e-05, "loss": 0.0033, "step": 157960 }, { "epoch": 61.35, "learning_rate": 1.182032362459547e-05, "loss": 0.0906, "step": 157970 }, { "epoch": 61.35, "learning_rate": 1.1819805825242718e-05, "loss": 0.0855, "step": 157980 }, { "epoch": 61.36, "learning_rate": 1.1819288025889968e-05, "loss": 0.0081, "step": 157990 }, { "epoch": 61.36, "learning_rate": 1.1818770226537218e-05, "loss": 0.1398, "step": 158000 }, { "epoch": 61.36, "learning_rate": 1.1818252427184467e-05, "loss": 0.0598, "step": 158010 }, { "epoch": 61.37, "learning_rate": 1.1817734627831717e-05, "loss": 0.1733, "step": 158020 }, { "epoch": 61.37, "learning_rate": 1.1817216828478966e-05, "loss": 0.1032, "step": 158030 }, { "epoch": 61.37, "learning_rate": 1.1816699029126214e-05, "loss": 0.1334, "step": 158040 }, { "epoch": 61.38, "learning_rate": 1.1816181229773464e-05, "loss": 0.1433, "step": 158050 }, { "epoch": 61.38, "learning_rate": 1.1815663430420712e-05, "loss": 0.0595, "step": 158060 }, { "epoch": 61.39, "learning_rate": 1.1815145631067961e-05, "loss": 0.0122, "step": 158070 }, { "epoch": 61.39, "learning_rate": 1.1814627831715211e-05, "loss": 0.1087, "step": 158080 }, { "epoch": 61.39, "learning_rate": 1.181411003236246e-05, "loss": 0.0835, "step": 158090 }, { "epoch": 61.4, "learning_rate": 1.181359223300971e-05, "loss": 0.1839, "step": 158100 }, { "epoch": 61.4, "learning_rate": 1.181307443365696e-05, "loss": 0.1435, "step": 158110 }, { "epoch": 61.41, "learning_rate": 1.1812556634304208e-05, "loss": 0.0221, "step": 158120 }, { "epoch": 61.41, "learning_rate": 1.1812038834951457e-05, "loss": 0.0722, "step": 158130 }, { "epoch": 61.41, "learning_rate": 1.1811521035598707e-05, "loss": 0.1012, "step": 158140 }, { "epoch": 61.42, "learning_rate": 1.1811003236245955e-05, "loss": 0.1024, "step": 158150 }, { "epoch": 61.42, "learning_rate": 1.1810485436893205e-05, "loss": 0.111, "step": 158160 }, { "epoch": 61.43, "learning_rate": 1.1809967637540454e-05, "loss": 0.0436, "step": 158170 }, { "epoch": 61.43, "learning_rate": 1.1809449838187704e-05, "loss": 0.0482, "step": 158180 }, { "epoch": 61.43, "learning_rate": 1.1808932038834952e-05, "loss": 0.0499, "step": 158190 }, { "epoch": 61.44, "learning_rate": 1.1808414239482201e-05, "loss": 0.0687, "step": 158200 }, { "epoch": 61.44, "learning_rate": 1.1807896440129451e-05, "loss": 0.0585, "step": 158210 }, { "epoch": 61.44, "learning_rate": 1.18073786407767e-05, "loss": 0.059, "step": 158220 }, { "epoch": 61.45, "learning_rate": 1.1806860841423949e-05, "loss": 0.2574, "step": 158230 }, { "epoch": 61.45, "learning_rate": 1.1806343042071198e-05, "loss": 0.0391, "step": 158240 }, { "epoch": 61.46, "learning_rate": 1.1805825242718448e-05, "loss": 0.0202, "step": 158250 }, { "epoch": 61.46, "learning_rate": 1.1805307443365697e-05, "loss": 0.1099, "step": 158260 }, { "epoch": 61.46, "learning_rate": 1.1804789644012945e-05, "loss": 0.0453, "step": 158270 }, { "epoch": 61.47, "learning_rate": 1.1804271844660195e-05, "loss": 0.1198, "step": 158280 }, { "epoch": 61.47, "learning_rate": 1.1803754045307445e-05, "loss": 0.0562, "step": 158290 }, { "epoch": 61.48, "learning_rate": 1.1803236245954694e-05, "loss": 0.0322, "step": 158300 }, { "epoch": 61.48, "learning_rate": 1.1802718446601942e-05, "loss": 0.129, "step": 158310 }, { "epoch": 61.48, "learning_rate": 1.1802200647249192e-05, "loss": 0.0377, "step": 158320 }, { "epoch": 61.49, "learning_rate": 1.1801682847896441e-05, "loss": 0.0701, "step": 158330 }, { "epoch": 61.49, "learning_rate": 1.1801165048543691e-05, "loss": 0.1655, "step": 158340 }, { "epoch": 61.5, "learning_rate": 1.1800647249190939e-05, "loss": 0.1036, "step": 158350 }, { "epoch": 61.5, "learning_rate": 1.1800129449838189e-05, "loss": 0.0812, "step": 158360 }, { "epoch": 61.5, "learning_rate": 1.1799611650485438e-05, "loss": 0.1851, "step": 158370 }, { "epoch": 61.51, "learning_rate": 1.1799093851132688e-05, "loss": 0.0596, "step": 158380 }, { "epoch": 61.51, "learning_rate": 1.1798576051779936e-05, "loss": 0.0159, "step": 158390 }, { "epoch": 61.51, "learning_rate": 1.1798058252427185e-05, "loss": 0.0634, "step": 158400 }, { "epoch": 61.52, "learning_rate": 1.1797540453074435e-05, "loss": 0.0545, "step": 158410 }, { "epoch": 61.52, "learning_rate": 1.1797022653721683e-05, "loss": 0.0134, "step": 158420 }, { "epoch": 61.53, "learning_rate": 1.1796504854368932e-05, "loss": 0.049, "step": 158430 }, { "epoch": 61.53, "learning_rate": 1.1795987055016182e-05, "loss": 0.073, "step": 158440 }, { "epoch": 61.53, "learning_rate": 1.1795469255663432e-05, "loss": 0.0162, "step": 158450 }, { "epoch": 61.54, "learning_rate": 1.1794951456310681e-05, "loss": 0.0304, "step": 158460 }, { "epoch": 61.54, "learning_rate": 1.179443365695793e-05, "loss": 0.0602, "step": 158470 }, { "epoch": 61.55, "learning_rate": 1.1793915857605179e-05, "loss": 0.0357, "step": 158480 }, { "epoch": 61.55, "learning_rate": 1.1793398058252428e-05, "loss": 0.1271, "step": 158490 }, { "epoch": 61.55, "learning_rate": 1.1792880258899676e-05, "loss": 0.0545, "step": 158500 }, { "epoch": 61.56, "learning_rate": 1.1792362459546926e-05, "loss": 0.0633, "step": 158510 }, { "epoch": 61.56, "learning_rate": 1.1791844660194176e-05, "loss": 0.117, "step": 158520 }, { "epoch": 61.57, "learning_rate": 1.1791326860841425e-05, "loss": 0.0111, "step": 158530 }, { "epoch": 61.57, "learning_rate": 1.1790809061488675e-05, "loss": 0.0027, "step": 158540 }, { "epoch": 61.57, "learning_rate": 1.1790291262135923e-05, "loss": 0.0838, "step": 158550 }, { "epoch": 61.58, "learning_rate": 1.1789773462783172e-05, "loss": 0.0713, "step": 158560 }, { "epoch": 61.58, "learning_rate": 1.178925566343042e-05, "loss": 0.0617, "step": 158570 }, { "epoch": 61.58, "learning_rate": 1.178873786407767e-05, "loss": 0.0539, "step": 158580 }, { "epoch": 61.59, "learning_rate": 1.178822006472492e-05, "loss": 0.0006, "step": 158590 }, { "epoch": 61.59, "learning_rate": 1.178770226537217e-05, "loss": 0.3234, "step": 158600 }, { "epoch": 61.6, "learning_rate": 1.1787184466019419e-05, "loss": 0.0705, "step": 158610 }, { "epoch": 61.6, "learning_rate": 1.1786666666666668e-05, "loss": 0.1577, "step": 158620 }, { "epoch": 61.6, "learning_rate": 1.1786148867313916e-05, "loss": 0.2519, "step": 158630 }, { "epoch": 61.61, "learning_rate": 1.1785631067961166e-05, "loss": 0.1127, "step": 158640 }, { "epoch": 61.61, "learning_rate": 1.1785113268608414e-05, "loss": 0.0696, "step": 158650 }, { "epoch": 61.62, "learning_rate": 1.1784595469255664e-05, "loss": 0.1386, "step": 158660 }, { "epoch": 61.62, "learning_rate": 1.1784077669902913e-05, "loss": 0.2308, "step": 158670 }, { "epoch": 61.62, "learning_rate": 1.1783559870550163e-05, "loss": 0.0256, "step": 158680 }, { "epoch": 61.63, "learning_rate": 1.1783042071197412e-05, "loss": 0.0578, "step": 158690 }, { "epoch": 61.63, "learning_rate": 1.1782524271844662e-05, "loss": 0.0738, "step": 158700 }, { "epoch": 61.63, "learning_rate": 1.1782006472491912e-05, "loss": 0.0882, "step": 158710 }, { "epoch": 61.64, "learning_rate": 1.1781488673139158e-05, "loss": 0.0499, "step": 158720 }, { "epoch": 61.64, "learning_rate": 1.1780970873786407e-05, "loss": 0.1312, "step": 158730 }, { "epoch": 61.65, "learning_rate": 1.1780453074433657e-05, "loss": 0.0989, "step": 158740 }, { "epoch": 61.65, "learning_rate": 1.1779935275080907e-05, "loss": 0.0963, "step": 158750 }, { "epoch": 61.65, "learning_rate": 1.1779417475728156e-05, "loss": 0.0188, "step": 158760 }, { "epoch": 61.66, "learning_rate": 1.1778899676375406e-05, "loss": 0.0343, "step": 158770 }, { "epoch": 61.66, "learning_rate": 1.1778381877022656e-05, "loss": 0.0876, "step": 158780 }, { "epoch": 61.67, "learning_rate": 1.1777864077669905e-05, "loss": 0.0933, "step": 158790 }, { "epoch": 61.67, "learning_rate": 1.1777346278317151e-05, "loss": 0.1486, "step": 158800 }, { "epoch": 61.67, "learning_rate": 1.1776828478964401e-05, "loss": 0.0827, "step": 158810 }, { "epoch": 61.68, "learning_rate": 1.177631067961165e-05, "loss": 0.099, "step": 158820 }, { "epoch": 61.68, "learning_rate": 1.17757928802589e-05, "loss": 0.0519, "step": 158830 }, { "epoch": 61.69, "learning_rate": 1.177527508090615e-05, "loss": 0.0313, "step": 158840 }, { "epoch": 61.69, "learning_rate": 1.17747572815534e-05, "loss": 0.019, "step": 158850 }, { "epoch": 61.69, "learning_rate": 1.1774239482200649e-05, "loss": 0.1131, "step": 158860 }, { "epoch": 61.7, "learning_rate": 1.1773721682847899e-05, "loss": 0.1469, "step": 158870 }, { "epoch": 61.7, "learning_rate": 1.1773203883495145e-05, "loss": 0.1762, "step": 158880 }, { "epoch": 61.7, "learning_rate": 1.1772686084142395e-05, "loss": 0.0182, "step": 158890 }, { "epoch": 61.71, "learning_rate": 1.1772168284789644e-05, "loss": 0.0268, "step": 158900 }, { "epoch": 61.71, "learning_rate": 1.1771650485436894e-05, "loss": 0.0158, "step": 158910 }, { "epoch": 61.72, "learning_rate": 1.1771132686084143e-05, "loss": 0.0685, "step": 158920 }, { "epoch": 61.72, "learning_rate": 1.1770614886731393e-05, "loss": 0.0268, "step": 158930 }, { "epoch": 61.72, "learning_rate": 1.1770097087378643e-05, "loss": 0.0462, "step": 158940 }, { "epoch": 61.73, "learning_rate": 1.1769579288025892e-05, "loss": 0.0371, "step": 158950 }, { "epoch": 61.73, "learning_rate": 1.1769061488673139e-05, "loss": 0.0247, "step": 158960 }, { "epoch": 61.74, "learning_rate": 1.1768543689320388e-05, "loss": 0.0549, "step": 158970 }, { "epoch": 61.74, "learning_rate": 1.1768025889967638e-05, "loss": 0.0905, "step": 158980 }, { "epoch": 61.74, "learning_rate": 1.1767508090614887e-05, "loss": 0.0003, "step": 158990 }, { "epoch": 61.75, "learning_rate": 1.1766990291262137e-05, "loss": 0.0618, "step": 159000 }, { "epoch": 61.75, "learning_rate": 1.1766472491909387e-05, "loss": 0.0971, "step": 159010 }, { "epoch": 61.76, "learning_rate": 1.1765954692556636e-05, "loss": 0.043, "step": 159020 }, { "epoch": 61.76, "learning_rate": 1.1765436893203886e-05, "loss": 0.0469, "step": 159030 }, { "epoch": 61.76, "learning_rate": 1.1764919093851132e-05, "loss": 0.148, "step": 159040 }, { "epoch": 61.77, "learning_rate": 1.1764401294498382e-05, "loss": 0.0253, "step": 159050 }, { "epoch": 61.77, "learning_rate": 1.1763883495145631e-05, "loss": 0.047, "step": 159060 }, { "epoch": 61.77, "learning_rate": 1.1763365695792881e-05, "loss": 0.0921, "step": 159070 }, { "epoch": 61.78, "learning_rate": 1.176284789644013e-05, "loss": 0.0656, "step": 159080 }, { "epoch": 61.78, "learning_rate": 1.176233009708738e-05, "loss": 0.031, "step": 159090 }, { "epoch": 61.79, "learning_rate": 1.176181229773463e-05, "loss": 0.1548, "step": 159100 }, { "epoch": 61.79, "learning_rate": 1.176129449838188e-05, "loss": 0.1277, "step": 159110 }, { "epoch": 61.79, "learning_rate": 1.1760776699029126e-05, "loss": 0.0554, "step": 159120 }, { "epoch": 61.8, "learning_rate": 1.1760258899676375e-05, "loss": 0.0493, "step": 159130 }, { "epoch": 61.8, "learning_rate": 1.1759741100323625e-05, "loss": 0.0346, "step": 159140 }, { "epoch": 61.81, "learning_rate": 1.1759223300970874e-05, "loss": 0.0332, "step": 159150 }, { "epoch": 61.81, "learning_rate": 1.1758705501618124e-05, "loss": 0.1455, "step": 159160 }, { "epoch": 61.81, "learning_rate": 1.1758187702265374e-05, "loss": 0.003, "step": 159170 }, { "epoch": 61.82, "learning_rate": 1.1757669902912623e-05, "loss": 0.0016, "step": 159180 }, { "epoch": 61.82, "learning_rate": 1.1757152103559873e-05, "loss": 0.0646, "step": 159190 }, { "epoch": 61.83, "learning_rate": 1.175663430420712e-05, "loss": 0.0492, "step": 159200 }, { "epoch": 61.83, "learning_rate": 1.1756116504854369e-05, "loss": 0.1667, "step": 159210 }, { "epoch": 61.83, "learning_rate": 1.1755598705501618e-05, "loss": 0.0378, "step": 159220 }, { "epoch": 61.84, "learning_rate": 1.1755080906148868e-05, "loss": 0.1507, "step": 159230 }, { "epoch": 61.84, "learning_rate": 1.1754563106796118e-05, "loss": 0.0409, "step": 159240 }, { "epoch": 61.84, "learning_rate": 1.1754045307443367e-05, "loss": 0.0048, "step": 159250 }, { "epoch": 61.85, "learning_rate": 1.1753527508090617e-05, "loss": 0.0677, "step": 159260 }, { "epoch": 61.85, "learning_rate": 1.1753009708737867e-05, "loss": 0.0637, "step": 159270 }, { "epoch": 61.86, "learning_rate": 1.1752491909385116e-05, "loss": 0.1037, "step": 159280 }, { "epoch": 61.86, "learning_rate": 1.1751974110032362e-05, "loss": 0.0724, "step": 159290 }, { "epoch": 61.86, "learning_rate": 1.1751456310679612e-05, "loss": 0.087, "step": 159300 }, { "epoch": 61.87, "learning_rate": 1.1750938511326862e-05, "loss": 0.1013, "step": 159310 }, { "epoch": 61.87, "learning_rate": 1.1750420711974111e-05, "loss": 0.0971, "step": 159320 }, { "epoch": 61.88, "learning_rate": 1.174990291262136e-05, "loss": 0.044, "step": 159330 }, { "epoch": 61.88, "learning_rate": 1.174938511326861e-05, "loss": 0.0407, "step": 159340 }, { "epoch": 61.88, "learning_rate": 1.174886731391586e-05, "loss": 0.0866, "step": 159350 }, { "epoch": 61.89, "learning_rate": 1.1748349514563108e-05, "loss": 0.0331, "step": 159360 }, { "epoch": 61.89, "learning_rate": 1.1747831715210356e-05, "loss": 0.0297, "step": 159370 }, { "epoch": 61.9, "learning_rate": 1.1747313915857606e-05, "loss": 0.1338, "step": 159380 }, { "epoch": 61.9, "learning_rate": 1.1746796116504855e-05, "loss": 0.0109, "step": 159390 }, { "epoch": 61.9, "learning_rate": 1.1746278317152105e-05, "loss": 0.0343, "step": 159400 }, { "epoch": 61.91, "learning_rate": 1.1745760517799354e-05, "loss": 0.0107, "step": 159410 }, { "epoch": 61.91, "learning_rate": 1.1745242718446604e-05, "loss": 0.0006, "step": 159420 }, { "epoch": 61.91, "learning_rate": 1.1744724919093854e-05, "loss": 0.1806, "step": 159430 }, { "epoch": 61.92, "learning_rate": 1.1744207119741102e-05, "loss": 0.1237, "step": 159440 }, { "epoch": 61.92, "learning_rate": 1.174368932038835e-05, "loss": 0.0542, "step": 159450 }, { "epoch": 61.93, "learning_rate": 1.1743171521035599e-05, "loss": 0.0347, "step": 159460 }, { "epoch": 61.93, "learning_rate": 1.1742653721682849e-05, "loss": 0.0871, "step": 159470 }, { "epoch": 61.93, "learning_rate": 1.1742135922330098e-05, "loss": 0.07, "step": 159480 }, { "epoch": 61.94, "learning_rate": 1.1741618122977348e-05, "loss": 0.0381, "step": 159490 }, { "epoch": 61.94, "learning_rate": 1.1741100323624598e-05, "loss": 0.1865, "step": 159500 }, { "epoch": 61.95, "learning_rate": 1.1740582524271845e-05, "loss": 0.1099, "step": 159510 }, { "epoch": 61.95, "learning_rate": 1.1740064724919095e-05, "loss": 0.1264, "step": 159520 }, { "epoch": 61.95, "learning_rate": 1.1739546925566343e-05, "loss": 0.059, "step": 159530 }, { "epoch": 61.96, "learning_rate": 1.1739029126213593e-05, "loss": 0.0208, "step": 159540 }, { "epoch": 61.96, "learning_rate": 1.1738511326860842e-05, "loss": 0.048, "step": 159550 }, { "epoch": 61.97, "learning_rate": 1.1737993527508092e-05, "loss": 0.0655, "step": 159560 }, { "epoch": 61.97, "learning_rate": 1.1737475728155342e-05, "loss": 0.0386, "step": 159570 }, { "epoch": 61.97, "learning_rate": 1.1736957928802591e-05, "loss": 0.034, "step": 159580 }, { "epoch": 61.98, "learning_rate": 1.1736440129449839e-05, "loss": 0.0499, "step": 159590 }, { "epoch": 61.98, "learning_rate": 1.1735922330097089e-05, "loss": 0.0906, "step": 159600 }, { "epoch": 61.98, "learning_rate": 1.1735404530744337e-05, "loss": 0.1151, "step": 159610 }, { "epoch": 61.99, "learning_rate": 1.1734886731391586e-05, "loss": 0.3572, "step": 159620 }, { "epoch": 61.99, "learning_rate": 1.1734368932038836e-05, "loss": 0.0317, "step": 159630 }, { "epoch": 62.0, "learning_rate": 1.1733851132686085e-05, "loss": 0.1341, "step": 159640 }, { "epoch": 62.0, "learning_rate": 1.1733333333333335e-05, "loss": 0.0497, "step": 159650 }, { "epoch": 62.0, "eval_accuracy": 0.9466299862448418, "eval_loss": 0.33754032850265503, "eval_runtime": 8.2233, "eval_samples_per_second": 442.039, "eval_steps_per_second": 55.331, "step": 159650 }, { "epoch": 62.0, "learning_rate": 1.1732815533980583e-05, "loss": 0.1406, "step": 159660 }, { "epoch": 62.01, "learning_rate": 1.1732297734627833e-05, "loss": 0.0233, "step": 159670 }, { "epoch": 62.01, "learning_rate": 1.1731779935275082e-05, "loss": 0.0234, "step": 159680 }, { "epoch": 62.02, "learning_rate": 1.173126213592233e-05, "loss": 0.041, "step": 159690 }, { "epoch": 62.02, "learning_rate": 1.173074433656958e-05, "loss": 0.1109, "step": 159700 }, { "epoch": 62.02, "learning_rate": 1.173022653721683e-05, "loss": 0.0554, "step": 159710 }, { "epoch": 62.03, "learning_rate": 1.1729708737864079e-05, "loss": 0.0368, "step": 159720 }, { "epoch": 62.03, "learning_rate": 1.1729190938511329e-05, "loss": 0.168, "step": 159730 }, { "epoch": 62.03, "learning_rate": 1.1728673139158577e-05, "loss": 0.025, "step": 159740 }, { "epoch": 62.04, "learning_rate": 1.1728155339805826e-05, "loss": 0.0005, "step": 159750 }, { "epoch": 62.04, "learning_rate": 1.1727637540453076e-05, "loss": 0.0546, "step": 159760 }, { "epoch": 62.05, "learning_rate": 1.1727119741100325e-05, "loss": 0.0047, "step": 159770 }, { "epoch": 62.05, "learning_rate": 1.1726601941747573e-05, "loss": 0.106, "step": 159780 }, { "epoch": 62.05, "learning_rate": 1.1726084142394823e-05, "loss": 0.0217, "step": 159790 }, { "epoch": 62.06, "learning_rate": 1.1725566343042073e-05, "loss": 0.0271, "step": 159800 }, { "epoch": 62.06, "learning_rate": 1.1725048543689322e-05, "loss": 0.0968, "step": 159810 }, { "epoch": 62.07, "learning_rate": 1.172453074433657e-05, "loss": 0.0494, "step": 159820 }, { "epoch": 62.07, "learning_rate": 1.172401294498382e-05, "loss": 0.3305, "step": 159830 }, { "epoch": 62.07, "learning_rate": 1.172349514563107e-05, "loss": 0.1541, "step": 159840 }, { "epoch": 62.08, "learning_rate": 1.1722977346278319e-05, "loss": 0.1962, "step": 159850 }, { "epoch": 62.08, "learning_rate": 1.1722459546925567e-05, "loss": 0.0081, "step": 159860 }, { "epoch": 62.09, "learning_rate": 1.1721941747572816e-05, "loss": 0.1279, "step": 159870 }, { "epoch": 62.09, "learning_rate": 1.1721423948220066e-05, "loss": 0.1206, "step": 159880 }, { "epoch": 62.09, "learning_rate": 1.1720906148867314e-05, "loss": 0.002, "step": 159890 }, { "epoch": 62.1, "learning_rate": 1.1720388349514564e-05, "loss": 0.0952, "step": 159900 }, { "epoch": 62.1, "learning_rate": 1.1719870550161813e-05, "loss": 0.2279, "step": 159910 }, { "epoch": 62.1, "learning_rate": 1.1719352750809063e-05, "loss": 0.2049, "step": 159920 }, { "epoch": 62.11, "learning_rate": 1.1718834951456313e-05, "loss": 0.0516, "step": 159930 }, { "epoch": 62.11, "learning_rate": 1.171831715210356e-05, "loss": 0.0086, "step": 159940 }, { "epoch": 62.12, "learning_rate": 1.171779935275081e-05, "loss": 0.0817, "step": 159950 }, { "epoch": 62.12, "learning_rate": 1.171728155339806e-05, "loss": 0.0617, "step": 159960 }, { "epoch": 62.12, "learning_rate": 1.1716763754045308e-05, "loss": 0.0484, "step": 159970 }, { "epoch": 62.13, "learning_rate": 1.1716245954692557e-05, "loss": 0.0197, "step": 159980 }, { "epoch": 62.13, "learning_rate": 1.1715728155339807e-05, "loss": 0.1033, "step": 159990 }, { "epoch": 62.14, "learning_rate": 1.1715210355987056e-05, "loss": 0.0264, "step": 160000 }, { "epoch": 62.14, "learning_rate": 1.1714692556634306e-05, "loss": 0.0557, "step": 160010 }, { "epoch": 62.14, "learning_rate": 1.1714174757281554e-05, "loss": 0.0809, "step": 160020 }, { "epoch": 62.15, "learning_rate": 1.1713656957928804e-05, "loss": 0.1431, "step": 160030 }, { "epoch": 62.15, "learning_rate": 1.1713139158576052e-05, "loss": 0.0326, "step": 160040 }, { "epoch": 62.16, "learning_rate": 1.1712621359223301e-05, "loss": 0.0503, "step": 160050 }, { "epoch": 62.16, "learning_rate": 1.171210355987055e-05, "loss": 0.0215, "step": 160060 }, { "epoch": 62.16, "learning_rate": 1.17115857605178e-05, "loss": 0.1465, "step": 160070 }, { "epoch": 62.17, "learning_rate": 1.171106796116505e-05, "loss": 0.0962, "step": 160080 }, { "epoch": 62.17, "learning_rate": 1.17105501618123e-05, "loss": 0.0178, "step": 160090 }, { "epoch": 62.17, "learning_rate": 1.1710032362459548e-05, "loss": 0.0257, "step": 160100 }, { "epoch": 62.18, "learning_rate": 1.1709514563106797e-05, "loss": 0.0532, "step": 160110 }, { "epoch": 62.18, "learning_rate": 1.1708996763754045e-05, "loss": 0.1192, "step": 160120 }, { "epoch": 62.19, "learning_rate": 1.1708478964401295e-05, "loss": 0.0424, "step": 160130 }, { "epoch": 62.19, "learning_rate": 1.1707961165048544e-05, "loss": 0.0758, "step": 160140 }, { "epoch": 62.19, "learning_rate": 1.1707443365695794e-05, "loss": 0.1262, "step": 160150 }, { "epoch": 62.2, "learning_rate": 1.1706925566343044e-05, "loss": 0.0393, "step": 160160 }, { "epoch": 62.2, "learning_rate": 1.1706407766990293e-05, "loss": 0.1037, "step": 160170 }, { "epoch": 62.21, "learning_rate": 1.1705889967637541e-05, "loss": 0.0362, "step": 160180 }, { "epoch": 62.21, "learning_rate": 1.1705372168284789e-05, "loss": 0.0867, "step": 160190 }, { "epoch": 62.21, "learning_rate": 1.1704854368932039e-05, "loss": 0.1144, "step": 160200 }, { "epoch": 62.22, "learning_rate": 1.1704336569579288e-05, "loss": 0.087, "step": 160210 }, { "epoch": 62.22, "learning_rate": 1.1703818770226538e-05, "loss": 0.0356, "step": 160220 }, { "epoch": 62.23, "learning_rate": 1.1703300970873787e-05, "loss": 0.0624, "step": 160230 }, { "epoch": 62.23, "learning_rate": 1.1702783171521037e-05, "loss": 0.0434, "step": 160240 }, { "epoch": 62.23, "learning_rate": 1.1702265372168287e-05, "loss": 0.0009, "step": 160250 }, { "epoch": 62.24, "learning_rate": 1.1701747572815535e-05, "loss": 0.0181, "step": 160260 }, { "epoch": 62.24, "learning_rate": 1.1701229773462783e-05, "loss": 0.123, "step": 160270 }, { "epoch": 62.24, "learning_rate": 1.1700711974110032e-05, "loss": 0.0634, "step": 160280 }, { "epoch": 62.25, "learning_rate": 1.1700194174757282e-05, "loss": 0.0777, "step": 160290 }, { "epoch": 62.25, "learning_rate": 1.1699676375404531e-05, "loss": 0.0181, "step": 160300 }, { "epoch": 62.26, "learning_rate": 1.1699158576051781e-05, "loss": 0.0769, "step": 160310 }, { "epoch": 62.26, "learning_rate": 1.169864077669903e-05, "loss": 0.0425, "step": 160320 }, { "epoch": 62.26, "learning_rate": 1.169812297734628e-05, "loss": 0.1036, "step": 160330 }, { "epoch": 62.27, "learning_rate": 1.169760517799353e-05, "loss": 0.0013, "step": 160340 }, { "epoch": 62.27, "learning_rate": 1.1697087378640776e-05, "loss": 0.0923, "step": 160350 }, { "epoch": 62.28, "learning_rate": 1.1696569579288026e-05, "loss": 0.053, "step": 160360 }, { "epoch": 62.28, "learning_rate": 1.1696051779935275e-05, "loss": 0.0307, "step": 160370 }, { "epoch": 62.28, "learning_rate": 1.1695533980582525e-05, "loss": 0.0002, "step": 160380 }, { "epoch": 62.29, "learning_rate": 1.1695016181229775e-05, "loss": 0.1323, "step": 160390 }, { "epoch": 62.29, "learning_rate": 1.1694498381877024e-05, "loss": 0.0331, "step": 160400 }, { "epoch": 62.3, "learning_rate": 1.1693980582524274e-05, "loss": 0.1057, "step": 160410 }, { "epoch": 62.3, "learning_rate": 1.1693462783171523e-05, "loss": 0.0138, "step": 160420 }, { "epoch": 62.3, "learning_rate": 1.169294498381877e-05, "loss": 0.0436, "step": 160430 }, { "epoch": 62.31, "learning_rate": 1.169242718446602e-05, "loss": 0.0245, "step": 160440 }, { "epoch": 62.31, "learning_rate": 1.1691909385113269e-05, "loss": 0.1289, "step": 160450 }, { "epoch": 62.31, "learning_rate": 1.1691391585760519e-05, "loss": 0.0144, "step": 160460 }, { "epoch": 62.32, "learning_rate": 1.1690873786407768e-05, "loss": 0.0941, "step": 160470 }, { "epoch": 62.32, "learning_rate": 1.1690355987055018e-05, "loss": 0.0437, "step": 160480 }, { "epoch": 62.33, "learning_rate": 1.1689838187702267e-05, "loss": 0.3188, "step": 160490 }, { "epoch": 62.33, "learning_rate": 1.1689320388349517e-05, "loss": 0.0183, "step": 160500 }, { "epoch": 62.33, "learning_rate": 1.1688802588996763e-05, "loss": 0.1731, "step": 160510 }, { "epoch": 62.34, "learning_rate": 1.1688284789644013e-05, "loss": 0.0832, "step": 160520 }, { "epoch": 62.34, "learning_rate": 1.1687766990291262e-05, "loss": 0.1293, "step": 160530 }, { "epoch": 62.35, "learning_rate": 1.1687249190938512e-05, "loss": 0.1393, "step": 160540 }, { "epoch": 62.35, "learning_rate": 1.1686731391585762e-05, "loss": 0.0709, "step": 160550 }, { "epoch": 62.35, "learning_rate": 1.1686213592233011e-05, "loss": 0.0011, "step": 160560 }, { "epoch": 62.36, "learning_rate": 1.1685695792880261e-05, "loss": 0.0051, "step": 160570 }, { "epoch": 62.36, "learning_rate": 1.168517799352751e-05, "loss": 0.0704, "step": 160580 }, { "epoch": 62.37, "learning_rate": 1.1684660194174757e-05, "loss": 0.0175, "step": 160590 }, { "epoch": 62.37, "learning_rate": 1.1684142394822006e-05, "loss": 0.057, "step": 160600 }, { "epoch": 62.37, "learning_rate": 1.1683624595469256e-05, "loss": 0.0788, "step": 160610 }, { "epoch": 62.38, "learning_rate": 1.1683106796116506e-05, "loss": 0.1073, "step": 160620 }, { "epoch": 62.38, "learning_rate": 1.1682588996763755e-05, "loss": 0.065, "step": 160630 }, { "epoch": 62.38, "learning_rate": 1.1682071197411005e-05, "loss": 0.0454, "step": 160640 }, { "epoch": 62.39, "learning_rate": 1.1681553398058255e-05, "loss": 0.0352, "step": 160650 }, { "epoch": 62.39, "learning_rate": 1.1681035598705504e-05, "loss": 0.2588, "step": 160660 }, { "epoch": 62.4, "learning_rate": 1.168051779935275e-05, "loss": 0.1417, "step": 160670 }, { "epoch": 62.4, "learning_rate": 1.168e-05, "loss": 0.0188, "step": 160680 }, { "epoch": 62.4, "learning_rate": 1.167948220064725e-05, "loss": 0.0936, "step": 160690 }, { "epoch": 62.41, "learning_rate": 1.16789644012945e-05, "loss": 0.0251, "step": 160700 }, { "epoch": 62.41, "learning_rate": 1.1678446601941749e-05, "loss": 0.0663, "step": 160710 }, { "epoch": 62.42, "learning_rate": 1.1677928802588998e-05, "loss": 0.0834, "step": 160720 }, { "epoch": 62.42, "learning_rate": 1.1677411003236248e-05, "loss": 0.0544, "step": 160730 }, { "epoch": 62.42, "learning_rate": 1.1676893203883498e-05, "loss": 0.0421, "step": 160740 }, { "epoch": 62.43, "learning_rate": 1.1676375404530744e-05, "loss": 0.0105, "step": 160750 }, { "epoch": 62.43, "learning_rate": 1.1675857605177994e-05, "loss": 0.0942, "step": 160760 }, { "epoch": 62.43, "learning_rate": 1.1675339805825243e-05, "loss": 0.0843, "step": 160770 }, { "epoch": 62.44, "learning_rate": 1.1674822006472493e-05, "loss": 0.0362, "step": 160780 }, { "epoch": 62.44, "learning_rate": 1.1674304207119742e-05, "loss": 0.0593, "step": 160790 }, { "epoch": 62.45, "learning_rate": 1.1673786407766992e-05, "loss": 0.1021, "step": 160800 }, { "epoch": 62.45, "learning_rate": 1.1673268608414242e-05, "loss": 0.0887, "step": 160810 }, { "epoch": 62.45, "learning_rate": 1.1672750809061491e-05, "loss": 0.0649, "step": 160820 }, { "epoch": 62.46, "learning_rate": 1.1672233009708737e-05, "loss": 0.1001, "step": 160830 }, { "epoch": 62.46, "learning_rate": 1.1671715210355987e-05, "loss": 0.122, "step": 160840 }, { "epoch": 62.47, "learning_rate": 1.1671197411003237e-05, "loss": 0.0802, "step": 160850 }, { "epoch": 62.47, "learning_rate": 1.1670679611650486e-05, "loss": 0.0879, "step": 160860 }, { "epoch": 62.47, "learning_rate": 1.1670161812297736e-05, "loss": 0.0866, "step": 160870 }, { "epoch": 62.48, "learning_rate": 1.1669644012944986e-05, "loss": 0.0098, "step": 160880 }, { "epoch": 62.48, "learning_rate": 1.1669126213592235e-05, "loss": 0.1141, "step": 160890 }, { "epoch": 62.49, "learning_rate": 1.1668608414239485e-05, "loss": 0.0715, "step": 160900 }, { "epoch": 62.49, "learning_rate": 1.1668090614886733e-05, "loss": 0.0002, "step": 160910 }, { "epoch": 62.49, "learning_rate": 1.166757281553398e-05, "loss": 0.1001, "step": 160920 }, { "epoch": 62.5, "learning_rate": 1.166705501618123e-05, "loss": 0.0102, "step": 160930 }, { "epoch": 62.5, "learning_rate": 1.166653721682848e-05, "loss": 0.3731, "step": 160940 }, { "epoch": 62.5, "learning_rate": 1.166601941747573e-05, "loss": 0.136, "step": 160950 }, { "epoch": 62.51, "learning_rate": 1.1665501618122979e-05, "loss": 0.0472, "step": 160960 }, { "epoch": 62.51, "learning_rate": 1.1664983818770229e-05, "loss": 0.0711, "step": 160970 }, { "epoch": 62.52, "learning_rate": 1.1664466019417477e-05, "loss": 0.067, "step": 160980 }, { "epoch": 62.52, "learning_rate": 1.1663948220064726e-05, "loss": 0.0764, "step": 160990 }, { "epoch": 62.52, "learning_rate": 1.1663430420711974e-05, "loss": 0.0366, "step": 161000 }, { "epoch": 62.53, "learning_rate": 1.1662912621359224e-05, "loss": 0.0207, "step": 161010 }, { "epoch": 62.53, "learning_rate": 1.1662394822006473e-05, "loss": 0.1033, "step": 161020 }, { "epoch": 62.54, "learning_rate": 1.1661877022653723e-05, "loss": 0.0594, "step": 161030 }, { "epoch": 62.54, "learning_rate": 1.1661359223300973e-05, "loss": 0.0033, "step": 161040 }, { "epoch": 62.54, "learning_rate": 1.1660841423948222e-05, "loss": 0.1152, "step": 161050 }, { "epoch": 62.55, "learning_rate": 1.166032362459547e-05, "loss": 0.1302, "step": 161060 }, { "epoch": 62.55, "learning_rate": 1.165980582524272e-05, "loss": 0.0229, "step": 161070 }, { "epoch": 62.56, "learning_rate": 1.1659288025889968e-05, "loss": 0.0514, "step": 161080 }, { "epoch": 62.56, "learning_rate": 1.1658770226537217e-05, "loss": 0.0692, "step": 161090 }, { "epoch": 62.56, "learning_rate": 1.1658252427184467e-05, "loss": 0.0272, "step": 161100 }, { "epoch": 62.57, "learning_rate": 1.1657734627831717e-05, "loss": 0.0605, "step": 161110 }, { "epoch": 62.57, "learning_rate": 1.1657216828478966e-05, "loss": 0.0227, "step": 161120 }, { "epoch": 62.57, "learning_rate": 1.1656699029126214e-05, "loss": 0.0598, "step": 161130 }, { "epoch": 62.58, "learning_rate": 1.1656181229773464e-05, "loss": 0.059, "step": 161140 }, { "epoch": 62.58, "learning_rate": 1.1655663430420713e-05, "loss": 0.1037, "step": 161150 }, { "epoch": 62.59, "learning_rate": 1.1655145631067961e-05, "loss": 0.1128, "step": 161160 }, { "epoch": 62.59, "learning_rate": 1.1654627831715211e-05, "loss": 0.1446, "step": 161170 }, { "epoch": 62.59, "learning_rate": 1.165411003236246e-05, "loss": 0.0477, "step": 161180 }, { "epoch": 62.6, "learning_rate": 1.165359223300971e-05, "loss": 0.2412, "step": 161190 }, { "epoch": 62.6, "learning_rate": 1.165307443365696e-05, "loss": 0.0159, "step": 161200 }, { "epoch": 62.61, "learning_rate": 1.1652556634304208e-05, "loss": 0.0736, "step": 161210 }, { "epoch": 62.61, "learning_rate": 1.1652038834951457e-05, "loss": 0.0741, "step": 161220 }, { "epoch": 62.61, "learning_rate": 1.1651521035598707e-05, "loss": 0.0131, "step": 161230 }, { "epoch": 62.62, "learning_rate": 1.1651003236245955e-05, "loss": 0.0037, "step": 161240 }, { "epoch": 62.62, "learning_rate": 1.1650485436893204e-05, "loss": 0.102, "step": 161250 }, { "epoch": 62.63, "learning_rate": 1.1649967637540454e-05, "loss": 0.1175, "step": 161260 }, { "epoch": 62.63, "learning_rate": 1.1649449838187704e-05, "loss": 0.0133, "step": 161270 }, { "epoch": 62.63, "learning_rate": 1.1648932038834952e-05, "loss": 0.0872, "step": 161280 }, { "epoch": 62.64, "learning_rate": 1.1648414239482201e-05, "loss": 0.0774, "step": 161290 }, { "epoch": 62.64, "learning_rate": 1.1647896440129451e-05, "loss": 0.1211, "step": 161300 }, { "epoch": 62.64, "learning_rate": 1.16473786407767e-05, "loss": 0.0042, "step": 161310 }, { "epoch": 62.65, "learning_rate": 1.1646860841423948e-05, "loss": 0.001, "step": 161320 }, { "epoch": 62.65, "learning_rate": 1.1646343042071198e-05, "loss": 0.1241, "step": 161330 }, { "epoch": 62.66, "learning_rate": 1.1645825242718448e-05, "loss": 0.0166, "step": 161340 }, { "epoch": 62.66, "learning_rate": 1.1645307443365697e-05, "loss": 0.1305, "step": 161350 }, { "epoch": 62.66, "learning_rate": 1.1644789644012945e-05, "loss": 0.0179, "step": 161360 }, { "epoch": 62.67, "learning_rate": 1.1644271844660195e-05, "loss": 0.0035, "step": 161370 }, { "epoch": 62.67, "learning_rate": 1.1643754045307444e-05, "loss": 0.0282, "step": 161380 }, { "epoch": 62.68, "learning_rate": 1.1643236245954694e-05, "loss": 0.1037, "step": 161390 }, { "epoch": 62.68, "learning_rate": 1.1642718446601942e-05, "loss": 0.1124, "step": 161400 }, { "epoch": 62.68, "learning_rate": 1.1642200647249192e-05, "loss": 0.0868, "step": 161410 }, { "epoch": 62.69, "learning_rate": 1.1641682847896441e-05, "loss": 0.1238, "step": 161420 }, { "epoch": 62.69, "learning_rate": 1.164116504854369e-05, "loss": 0.0396, "step": 161430 }, { "epoch": 62.7, "learning_rate": 1.1640647249190939e-05, "loss": 0.0723, "step": 161440 }, { "epoch": 62.7, "learning_rate": 1.1640129449838188e-05, "loss": 0.0699, "step": 161450 }, { "epoch": 62.7, "learning_rate": 1.1639611650485438e-05, "loss": 0.0002, "step": 161460 }, { "epoch": 62.71, "learning_rate": 1.1639093851132688e-05, "loss": 0.018, "step": 161470 }, { "epoch": 62.71, "learning_rate": 1.1638576051779937e-05, "loss": 0.0615, "step": 161480 }, { "epoch": 62.71, "learning_rate": 1.1638058252427185e-05, "loss": 0.0293, "step": 161490 }, { "epoch": 62.72, "learning_rate": 1.1637540453074435e-05, "loss": 0.1037, "step": 161500 }, { "epoch": 62.72, "learning_rate": 1.1637022653721683e-05, "loss": 0.0902, "step": 161510 }, { "epoch": 62.73, "learning_rate": 1.1636504854368932e-05, "loss": 0.0508, "step": 161520 }, { "epoch": 62.73, "learning_rate": 1.1635987055016182e-05, "loss": 0.1576, "step": 161530 }, { "epoch": 62.73, "learning_rate": 1.1635469255663432e-05, "loss": 0.1609, "step": 161540 }, { "epoch": 62.74, "learning_rate": 1.1634951456310681e-05, "loss": 0.0626, "step": 161550 }, { "epoch": 62.74, "learning_rate": 1.163443365695793e-05, "loss": 0.1047, "step": 161560 }, { "epoch": 62.75, "learning_rate": 1.1633915857605179e-05, "loss": 0.0038, "step": 161570 }, { "epoch": 62.75, "learning_rate": 1.1633398058252428e-05, "loss": 0.1391, "step": 161580 }, { "epoch": 62.75, "learning_rate": 1.1632880258899676e-05, "loss": 0.047, "step": 161590 }, { "epoch": 62.76, "learning_rate": 1.1632362459546926e-05, "loss": 0.1438, "step": 161600 }, { "epoch": 62.76, "learning_rate": 1.1631844660194175e-05, "loss": 0.1288, "step": 161610 }, { "epoch": 62.77, "learning_rate": 1.1631326860841425e-05, "loss": 0.0121, "step": 161620 }, { "epoch": 62.77, "learning_rate": 1.1630809061488675e-05, "loss": 0.1414, "step": 161630 }, { "epoch": 62.77, "learning_rate": 1.1630291262135924e-05, "loss": 0.1377, "step": 161640 }, { "epoch": 62.78, "learning_rate": 1.1629773462783172e-05, "loss": 0.0324, "step": 161650 }, { "epoch": 62.78, "learning_rate": 1.162925566343042e-05, "loss": 0.0624, "step": 161660 }, { "epoch": 62.78, "learning_rate": 1.162873786407767e-05, "loss": 0.0004, "step": 161670 }, { "epoch": 62.79, "learning_rate": 1.162822006472492e-05, "loss": 0.1327, "step": 161680 }, { "epoch": 62.79, "learning_rate": 1.1627702265372169e-05, "loss": 0.0141, "step": 161690 }, { "epoch": 62.8, "learning_rate": 1.1627184466019419e-05, "loss": 0.1224, "step": 161700 }, { "epoch": 62.8, "learning_rate": 1.1626666666666668e-05, "loss": 0.0706, "step": 161710 }, { "epoch": 62.8, "learning_rate": 1.1626148867313918e-05, "loss": 0.0458, "step": 161720 }, { "epoch": 62.81, "learning_rate": 1.1625631067961166e-05, "loss": 0.0183, "step": 161730 }, { "epoch": 62.81, "learning_rate": 1.1625113268608414e-05, "loss": 0.05, "step": 161740 }, { "epoch": 62.82, "learning_rate": 1.1624595469255663e-05, "loss": 0.0702, "step": 161750 }, { "epoch": 62.82, "learning_rate": 1.1624077669902913e-05, "loss": 0.101, "step": 161760 }, { "epoch": 62.82, "learning_rate": 1.1623559870550163e-05, "loss": 0.0455, "step": 161770 }, { "epoch": 62.83, "learning_rate": 1.1623042071197412e-05, "loss": 0.0074, "step": 161780 }, { "epoch": 62.83, "learning_rate": 1.1622524271844662e-05, "loss": 0.0237, "step": 161790 }, { "epoch": 62.83, "learning_rate": 1.1622006472491911e-05, "loss": 0.141, "step": 161800 }, { "epoch": 62.84, "learning_rate": 1.1621488673139158e-05, "loss": 0.0845, "step": 161810 }, { "epoch": 62.84, "learning_rate": 1.1620970873786407e-05, "loss": 0.0577, "step": 161820 }, { "epoch": 62.85, "learning_rate": 1.1620453074433657e-05, "loss": 0.0075, "step": 161830 }, { "epoch": 62.85, "learning_rate": 1.1619935275080907e-05, "loss": 0.0389, "step": 161840 }, { "epoch": 62.85, "learning_rate": 1.1619417475728156e-05, "loss": 0.0006, "step": 161850 }, { "epoch": 62.86, "learning_rate": 1.1618899676375406e-05, "loss": 0.0037, "step": 161860 }, { "epoch": 62.86, "learning_rate": 1.1618381877022655e-05, "loss": 0.0061, "step": 161870 }, { "epoch": 62.87, "learning_rate": 1.1617864077669905e-05, "loss": 0.215, "step": 161880 }, { "epoch": 62.87, "learning_rate": 1.1617346278317151e-05, "loss": 0.1077, "step": 161890 }, { "epoch": 62.87, "learning_rate": 1.1616828478964401e-05, "loss": 0.0727, "step": 161900 }, { "epoch": 62.88, "learning_rate": 1.161631067961165e-05, "loss": 0.1936, "step": 161910 }, { "epoch": 62.88, "learning_rate": 1.16157928802589e-05, "loss": 0.0056, "step": 161920 }, { "epoch": 62.89, "learning_rate": 1.161527508090615e-05, "loss": 0.069, "step": 161930 }, { "epoch": 62.89, "learning_rate": 1.16147572815534e-05, "loss": 0.1229, "step": 161940 }, { "epoch": 62.89, "learning_rate": 1.1614239482200649e-05, "loss": 0.16, "step": 161950 }, { "epoch": 62.9, "learning_rate": 1.1613721682847899e-05, "loss": 0.0868, "step": 161960 }, { "epoch": 62.9, "learning_rate": 1.1613203883495145e-05, "loss": 0.0934, "step": 161970 }, { "epoch": 62.9, "learning_rate": 1.1612686084142394e-05, "loss": 0.1242, "step": 161980 }, { "epoch": 62.91, "learning_rate": 1.1612168284789644e-05, "loss": 0.0063, "step": 161990 }, { "epoch": 62.91, "learning_rate": 1.1611650485436894e-05, "loss": 0.0335, "step": 162000 }, { "epoch": 62.92, "learning_rate": 1.1611132686084143e-05, "loss": 0.1096, "step": 162010 }, { "epoch": 62.92, "learning_rate": 1.1610614886731393e-05, "loss": 0.1381, "step": 162020 }, { "epoch": 62.92, "learning_rate": 1.1610097087378643e-05, "loss": 0.0962, "step": 162030 }, { "epoch": 62.93, "learning_rate": 1.1609579288025892e-05, "loss": 0.015, "step": 162040 }, { "epoch": 62.93, "learning_rate": 1.1609061488673142e-05, "loss": 0.0563, "step": 162050 }, { "epoch": 62.94, "learning_rate": 1.1608543689320388e-05, "loss": 0.0204, "step": 162060 }, { "epoch": 62.94, "learning_rate": 1.1608025889967638e-05, "loss": 0.1194, "step": 162070 }, { "epoch": 62.94, "learning_rate": 1.1607508090614887e-05, "loss": 0.0202, "step": 162080 }, { "epoch": 62.95, "learning_rate": 1.1606990291262137e-05, "loss": 0.0841, "step": 162090 }, { "epoch": 62.95, "learning_rate": 1.1606472491909386e-05, "loss": 0.0231, "step": 162100 }, { "epoch": 62.96, "learning_rate": 1.1605954692556636e-05, "loss": 0.0652, "step": 162110 }, { "epoch": 62.96, "learning_rate": 1.1605436893203886e-05, "loss": 0.0267, "step": 162120 }, { "epoch": 62.96, "learning_rate": 1.1604919093851135e-05, "loss": 0.0009, "step": 162130 }, { "epoch": 62.97, "learning_rate": 1.1604401294498382e-05, "loss": 0.0943, "step": 162140 }, { "epoch": 62.97, "learning_rate": 1.1603883495145631e-05, "loss": 0.0009, "step": 162150 }, { "epoch": 62.97, "learning_rate": 1.160336569579288e-05, "loss": 0.0972, "step": 162160 }, { "epoch": 62.98, "learning_rate": 1.160284789644013e-05, "loss": 0.0077, "step": 162170 }, { "epoch": 62.98, "learning_rate": 1.160233009708738e-05, "loss": 0.0875, "step": 162180 }, { "epoch": 62.99, "learning_rate": 1.160181229773463e-05, "loss": 0.0965, "step": 162190 }, { "epoch": 62.99, "learning_rate": 1.160129449838188e-05, "loss": 0.0094, "step": 162200 }, { "epoch": 62.99, "learning_rate": 1.1600776699029129e-05, "loss": 0.0729, "step": 162210 }, { "epoch": 63.0, "learning_rate": 1.1600258899676375e-05, "loss": 0.0445, "step": 162220 }, { "epoch": 63.0, "eval_accuracy": 0.9502063273727648, "eval_loss": 0.34933972358703613, "eval_runtime": 8.1937, "eval_samples_per_second": 443.635, "eval_steps_per_second": 55.531, "step": 162225 }, { "epoch": 63.0, "learning_rate": 1.1599741100323625e-05, "loss": 0.1234, "step": 162230 }, { "epoch": 63.01, "learning_rate": 1.1599223300970874e-05, "loss": 0.1836, "step": 162240 }, { "epoch": 63.01, "learning_rate": 1.1598705501618124e-05, "loss": 0.1387, "step": 162250 }, { "epoch": 63.01, "learning_rate": 1.1598187702265374e-05, "loss": 0.0033, "step": 162260 }, { "epoch": 63.02, "learning_rate": 1.1597669902912623e-05, "loss": 0.01, "step": 162270 }, { "epoch": 63.02, "learning_rate": 1.1597152103559873e-05, "loss": 0.0965, "step": 162280 }, { "epoch": 63.03, "learning_rate": 1.1596634304207122e-05, "loss": 0.0452, "step": 162290 }, { "epoch": 63.03, "learning_rate": 1.1596116504854369e-05, "loss": 0.0174, "step": 162300 }, { "epoch": 63.03, "learning_rate": 1.1595598705501618e-05, "loss": 0.0437, "step": 162310 }, { "epoch": 63.04, "learning_rate": 1.1595080906148868e-05, "loss": 0.0892, "step": 162320 }, { "epoch": 63.04, "learning_rate": 1.1594563106796117e-05, "loss": 0.0024, "step": 162330 }, { "epoch": 63.04, "learning_rate": 1.1594045307443367e-05, "loss": 0.3035, "step": 162340 }, { "epoch": 63.05, "learning_rate": 1.1593527508090617e-05, "loss": 0.007, "step": 162350 }, { "epoch": 63.05, "learning_rate": 1.1593009708737866e-05, "loss": 0.0165, "step": 162360 }, { "epoch": 63.06, "learning_rate": 1.1592491909385116e-05, "loss": 0.0145, "step": 162370 }, { "epoch": 63.06, "learning_rate": 1.1591974110032362e-05, "loss": 0.123, "step": 162380 }, { "epoch": 63.06, "learning_rate": 1.1591456310679612e-05, "loss": 0.013, "step": 162390 }, { "epoch": 63.07, "learning_rate": 1.1590938511326861e-05, "loss": 0.0853, "step": 162400 }, { "epoch": 63.07, "learning_rate": 1.1590420711974111e-05, "loss": 0.1146, "step": 162410 }, { "epoch": 63.08, "learning_rate": 1.158990291262136e-05, "loss": 0.0007, "step": 162420 }, { "epoch": 63.08, "learning_rate": 1.158938511326861e-05, "loss": 0.0025, "step": 162430 }, { "epoch": 63.08, "learning_rate": 1.158886731391586e-05, "loss": 0.0161, "step": 162440 }, { "epoch": 63.09, "learning_rate": 1.1588349514563108e-05, "loss": 0.0796, "step": 162450 }, { "epoch": 63.09, "learning_rate": 1.1587831715210356e-05, "loss": 0.0487, "step": 162460 }, { "epoch": 63.1, "learning_rate": 1.1587313915857605e-05, "loss": 0.0814, "step": 162470 }, { "epoch": 63.1, "learning_rate": 1.1586796116504855e-05, "loss": 0.1035, "step": 162480 }, { "epoch": 63.1, "learning_rate": 1.1586278317152105e-05, "loss": 0.1078, "step": 162490 }, { "epoch": 63.11, "learning_rate": 1.1585760517799354e-05, "loss": 0.0898, "step": 162500 }, { "epoch": 63.11, "learning_rate": 1.1585242718446604e-05, "loss": 0.0471, "step": 162510 }, { "epoch": 63.11, "learning_rate": 1.1584724919093853e-05, "loss": 0.0917, "step": 162520 }, { "epoch": 63.12, "learning_rate": 1.1584207119741101e-05, "loss": 0.0382, "step": 162530 }, { "epoch": 63.12, "learning_rate": 1.158368932038835e-05, "loss": 0.0087, "step": 162540 }, { "epoch": 63.13, "learning_rate": 1.1583171521035599e-05, "loss": 0.0337, "step": 162550 }, { "epoch": 63.13, "learning_rate": 1.1582653721682849e-05, "loss": 0.0576, "step": 162560 }, { "epoch": 63.13, "learning_rate": 1.1582135922330098e-05, "loss": 0.0011, "step": 162570 }, { "epoch": 63.14, "learning_rate": 1.1581618122977348e-05, "loss": 0.0369, "step": 162580 }, { "epoch": 63.14, "learning_rate": 1.1581100323624597e-05, "loss": 0.0984, "step": 162590 }, { "epoch": 63.15, "learning_rate": 1.1580582524271845e-05, "loss": 0.062, "step": 162600 }, { "epoch": 63.15, "learning_rate": 1.1580064724919095e-05, "loss": 0.0089, "step": 162610 }, { "epoch": 63.15, "learning_rate": 1.1579546925566345e-05, "loss": 0.0878, "step": 162620 }, { "epoch": 63.16, "learning_rate": 1.1579029126213592e-05, "loss": 0.0446, "step": 162630 }, { "epoch": 63.16, "learning_rate": 1.1578511326860842e-05, "loss": 0.0064, "step": 162640 }, { "epoch": 63.17, "learning_rate": 1.1577993527508092e-05, "loss": 0.1143, "step": 162650 }, { "epoch": 63.17, "learning_rate": 1.1577475728155341e-05, "loss": 0.1178, "step": 162660 }, { "epoch": 63.17, "learning_rate": 1.1576957928802591e-05, "loss": 0.0475, "step": 162670 }, { "epoch": 63.18, "learning_rate": 1.1576440129449839e-05, "loss": 0.156, "step": 162680 }, { "epoch": 63.18, "learning_rate": 1.1575922330097089e-05, "loss": 0.0269, "step": 162690 }, { "epoch": 63.18, "learning_rate": 1.1575404530744338e-05, "loss": 0.1417, "step": 162700 }, { "epoch": 63.19, "learning_rate": 1.1574886731391586e-05, "loss": 0.0492, "step": 162710 }, { "epoch": 63.19, "learning_rate": 1.1574368932038836e-05, "loss": 0.0785, "step": 162720 }, { "epoch": 63.2, "learning_rate": 1.1573851132686085e-05, "loss": 0.0663, "step": 162730 }, { "epoch": 63.2, "learning_rate": 1.1573333333333335e-05, "loss": 0.0844, "step": 162740 }, { "epoch": 63.2, "learning_rate": 1.1572815533980583e-05, "loss": 0.1656, "step": 162750 }, { "epoch": 63.21, "learning_rate": 1.1572297734627832e-05, "loss": 0.0459, "step": 162760 }, { "epoch": 63.21, "learning_rate": 1.1571779935275082e-05, "loss": 0.096, "step": 162770 }, { "epoch": 63.22, "learning_rate": 1.1571262135922332e-05, "loss": 0.1477, "step": 162780 }, { "epoch": 63.22, "learning_rate": 1.157074433656958e-05, "loss": 0.0711, "step": 162790 }, { "epoch": 63.22, "learning_rate": 1.157022653721683e-05, "loss": 0.0119, "step": 162800 }, { "epoch": 63.23, "learning_rate": 1.1569708737864079e-05, "loss": 0.116, "step": 162810 }, { "epoch": 63.23, "learning_rate": 1.1569190938511328e-05, "loss": 0.1007, "step": 162820 }, { "epoch": 63.23, "learning_rate": 1.1568673139158576e-05, "loss": 0.0507, "step": 162830 }, { "epoch": 63.24, "learning_rate": 1.1568155339805826e-05, "loss": 0.2073, "step": 162840 }, { "epoch": 63.24, "learning_rate": 1.1567637540453076e-05, "loss": 0.0518, "step": 162850 }, { "epoch": 63.25, "learning_rate": 1.1567119741100325e-05, "loss": 0.1393, "step": 162860 }, { "epoch": 63.25, "learning_rate": 1.1566601941747573e-05, "loss": 0.0166, "step": 162870 }, { "epoch": 63.25, "learning_rate": 1.1566084142394823e-05, "loss": 0.1809, "step": 162880 }, { "epoch": 63.26, "learning_rate": 1.1565566343042072e-05, "loss": 0.3335, "step": 162890 }, { "epoch": 63.26, "learning_rate": 1.1565048543689322e-05, "loss": 0.0297, "step": 162900 }, { "epoch": 63.27, "learning_rate": 1.156453074433657e-05, "loss": 0.0537, "step": 162910 }, { "epoch": 63.27, "learning_rate": 1.156401294498382e-05, "loss": 0.0898, "step": 162920 }, { "epoch": 63.27, "learning_rate": 1.156349514563107e-05, "loss": 0.0523, "step": 162930 }, { "epoch": 63.28, "learning_rate": 1.1562977346278319e-05, "loss": 0.1789, "step": 162940 }, { "epoch": 63.28, "learning_rate": 1.1562459546925567e-05, "loss": 0.158, "step": 162950 }, { "epoch": 63.29, "learning_rate": 1.1561941747572816e-05, "loss": 0.2038, "step": 162960 }, { "epoch": 63.29, "learning_rate": 1.1561423948220066e-05, "loss": 0.1665, "step": 162970 }, { "epoch": 63.29, "learning_rate": 1.1560906148867314e-05, "loss": 0.2001, "step": 162980 }, { "epoch": 63.3, "learning_rate": 1.1560388349514563e-05, "loss": 0.034, "step": 162990 }, { "epoch": 63.3, "learning_rate": 1.1559870550161813e-05, "loss": 0.09, "step": 163000 }, { "epoch": 63.3, "learning_rate": 1.1559352750809063e-05, "loss": 0.0228, "step": 163010 }, { "epoch": 63.31, "learning_rate": 1.1558834951456312e-05, "loss": 0.0875, "step": 163020 }, { "epoch": 63.31, "learning_rate": 1.155831715210356e-05, "loss": 0.0498, "step": 163030 }, { "epoch": 63.32, "learning_rate": 1.155779935275081e-05, "loss": 0.1129, "step": 163040 }, { "epoch": 63.32, "learning_rate": 1.155728155339806e-05, "loss": 0.1949, "step": 163050 }, { "epoch": 63.32, "learning_rate": 1.1556763754045307e-05, "loss": 0.1952, "step": 163060 }, { "epoch": 63.33, "learning_rate": 1.1556245954692557e-05, "loss": 0.0191, "step": 163070 }, { "epoch": 63.33, "learning_rate": 1.1555728155339807e-05, "loss": 0.0619, "step": 163080 }, { "epoch": 63.34, "learning_rate": 1.1555210355987056e-05, "loss": 0.0632, "step": 163090 }, { "epoch": 63.34, "learning_rate": 1.1554692556634306e-05, "loss": 0.1438, "step": 163100 }, { "epoch": 63.34, "learning_rate": 1.1554174757281554e-05, "loss": 0.0732, "step": 163110 }, { "epoch": 63.35, "learning_rate": 1.1553656957928803e-05, "loss": 0.1305, "step": 163120 }, { "epoch": 63.35, "learning_rate": 1.1553139158576051e-05, "loss": 0.0217, "step": 163130 }, { "epoch": 63.36, "learning_rate": 1.1552621359223301e-05, "loss": 0.0279, "step": 163140 }, { "epoch": 63.36, "learning_rate": 1.155210355987055e-05, "loss": 0.0151, "step": 163150 }, { "epoch": 63.36, "learning_rate": 1.15515857605178e-05, "loss": 0.0268, "step": 163160 }, { "epoch": 63.37, "learning_rate": 1.155106796116505e-05, "loss": 0.0455, "step": 163170 }, { "epoch": 63.37, "learning_rate": 1.15505501618123e-05, "loss": 0.0212, "step": 163180 }, { "epoch": 63.37, "learning_rate": 1.1550032362459549e-05, "loss": 0.0888, "step": 163190 }, { "epoch": 63.38, "learning_rate": 1.1549514563106797e-05, "loss": 0.0219, "step": 163200 }, { "epoch": 63.38, "learning_rate": 1.1548996763754045e-05, "loss": 0.0525, "step": 163210 }, { "epoch": 63.39, "learning_rate": 1.1548478964401295e-05, "loss": 0.029, "step": 163220 }, { "epoch": 63.39, "learning_rate": 1.1547961165048544e-05, "loss": 0.0825, "step": 163230 }, { "epoch": 63.39, "learning_rate": 1.1547443365695794e-05, "loss": 0.0557, "step": 163240 }, { "epoch": 63.4, "learning_rate": 1.1546925566343043e-05, "loss": 0.0108, "step": 163250 }, { "epoch": 63.4, "learning_rate": 1.1546407766990293e-05, "loss": 0.0684, "step": 163260 }, { "epoch": 63.41, "learning_rate": 1.1545889967637543e-05, "loss": 0.1003, "step": 163270 }, { "epoch": 63.41, "learning_rate": 1.1545372168284789e-05, "loss": 0.0638, "step": 163280 }, { "epoch": 63.41, "learning_rate": 1.1544854368932038e-05, "loss": 0.1211, "step": 163290 }, { "epoch": 63.42, "learning_rate": 1.1544336569579288e-05, "loss": 0.096, "step": 163300 }, { "epoch": 63.42, "learning_rate": 1.1543818770226538e-05, "loss": 0.0424, "step": 163310 }, { "epoch": 63.43, "learning_rate": 1.1543300970873787e-05, "loss": 0.1519, "step": 163320 }, { "epoch": 63.43, "learning_rate": 1.1542783171521037e-05, "loss": 0.0011, "step": 163330 }, { "epoch": 63.43, "learning_rate": 1.1542265372168287e-05, "loss": 0.1671, "step": 163340 }, { "epoch": 63.44, "learning_rate": 1.1541747572815536e-05, "loss": 0.1156, "step": 163350 }, { "epoch": 63.44, "learning_rate": 1.1541229773462782e-05, "loss": 0.0263, "step": 163360 }, { "epoch": 63.44, "learning_rate": 1.1540711974110032e-05, "loss": 0.0694, "step": 163370 }, { "epoch": 63.45, "learning_rate": 1.1540194174757282e-05, "loss": 0.1245, "step": 163380 }, { "epoch": 63.45, "learning_rate": 1.1539676375404531e-05, "loss": 0.0511, "step": 163390 }, { "epoch": 63.46, "learning_rate": 1.1539158576051781e-05, "loss": 0.1245, "step": 163400 }, { "epoch": 63.46, "learning_rate": 1.153864077669903e-05, "loss": 0.0038, "step": 163410 }, { "epoch": 63.46, "learning_rate": 1.153812297734628e-05, "loss": 0.1716, "step": 163420 }, { "epoch": 63.47, "learning_rate": 1.153760517799353e-05, "loss": 0.0747, "step": 163430 }, { "epoch": 63.47, "learning_rate": 1.1537087378640776e-05, "loss": 0.3062, "step": 163440 }, { "epoch": 63.48, "learning_rate": 1.1536569579288026e-05, "loss": 0.1054, "step": 163450 }, { "epoch": 63.48, "learning_rate": 1.1536051779935275e-05, "loss": 0.054, "step": 163460 }, { "epoch": 63.48, "learning_rate": 1.1535533980582525e-05, "loss": 0.0402, "step": 163470 }, { "epoch": 63.49, "learning_rate": 1.1535016181229774e-05, "loss": 0.1091, "step": 163480 }, { "epoch": 63.49, "learning_rate": 1.1534498381877024e-05, "loss": 0.0744, "step": 163490 }, { "epoch": 63.5, "learning_rate": 1.1533980582524274e-05, "loss": 0.0292, "step": 163500 }, { "epoch": 63.5, "learning_rate": 1.1533462783171523e-05, "loss": 0.0344, "step": 163510 }, { "epoch": 63.5, "learning_rate": 1.153294498381877e-05, "loss": 0.0831, "step": 163520 }, { "epoch": 63.51, "learning_rate": 1.1532427184466019e-05, "loss": 0.0082, "step": 163530 }, { "epoch": 63.51, "learning_rate": 1.1531909385113269e-05, "loss": 0.0046, "step": 163540 }, { "epoch": 63.51, "learning_rate": 1.1531391585760518e-05, "loss": 0.0526, "step": 163550 }, { "epoch": 63.52, "learning_rate": 1.1530873786407768e-05, "loss": 0.1092, "step": 163560 }, { "epoch": 63.52, "learning_rate": 1.1530355987055018e-05, "loss": 0.0216, "step": 163570 }, { "epoch": 63.53, "learning_rate": 1.1529838187702267e-05, "loss": 0.0196, "step": 163580 }, { "epoch": 63.53, "learning_rate": 1.1529320388349517e-05, "loss": 0.0276, "step": 163590 }, { "epoch": 63.53, "learning_rate": 1.1528802588996763e-05, "loss": 0.0088, "step": 163600 }, { "epoch": 63.54, "learning_rate": 1.1528284789644013e-05, "loss": 0.038, "step": 163610 }, { "epoch": 63.54, "learning_rate": 1.1527766990291262e-05, "loss": 0.0187, "step": 163620 }, { "epoch": 63.55, "learning_rate": 1.1527249190938512e-05, "loss": 0.0719, "step": 163630 }, { "epoch": 63.55, "learning_rate": 1.1526731391585762e-05, "loss": 0.0495, "step": 163640 }, { "epoch": 63.55, "learning_rate": 1.1526213592233011e-05, "loss": 0.0252, "step": 163650 }, { "epoch": 63.56, "learning_rate": 1.152569579288026e-05, "loss": 0.037, "step": 163660 }, { "epoch": 63.56, "learning_rate": 1.152517799352751e-05, "loss": 0.0388, "step": 163670 }, { "epoch": 63.57, "learning_rate": 1.1524660194174757e-05, "loss": 0.0324, "step": 163680 }, { "epoch": 63.57, "learning_rate": 1.1524142394822006e-05, "loss": 0.0243, "step": 163690 }, { "epoch": 63.57, "learning_rate": 1.1523624595469256e-05, "loss": 0.0017, "step": 163700 }, { "epoch": 63.58, "learning_rate": 1.1523106796116505e-05, "loss": 0.1349, "step": 163710 }, { "epoch": 63.58, "learning_rate": 1.1522588996763755e-05, "loss": 0.0003, "step": 163720 }, { "epoch": 63.58, "learning_rate": 1.1522071197411005e-05, "loss": 0.0208, "step": 163730 }, { "epoch": 63.59, "learning_rate": 1.1521553398058254e-05, "loss": 0.0356, "step": 163740 }, { "epoch": 63.59, "learning_rate": 1.1521035598705504e-05, "loss": 0.0214, "step": 163750 }, { "epoch": 63.6, "learning_rate": 1.1520517799352754e-05, "loss": 0.0548, "step": 163760 }, { "epoch": 63.6, "learning_rate": 1.152e-05, "loss": 0.1039, "step": 163770 }, { "epoch": 63.6, "learning_rate": 1.151948220064725e-05, "loss": 0.0434, "step": 163780 }, { "epoch": 63.61, "learning_rate": 1.1518964401294499e-05, "loss": 0.1026, "step": 163790 }, { "epoch": 63.61, "learning_rate": 1.1518446601941749e-05, "loss": 0.129, "step": 163800 }, { "epoch": 63.62, "learning_rate": 1.1517928802588998e-05, "loss": 0.0426, "step": 163810 }, { "epoch": 63.62, "learning_rate": 1.1517411003236248e-05, "loss": 0.0095, "step": 163820 }, { "epoch": 63.62, "learning_rate": 1.1516893203883498e-05, "loss": 0.0461, "step": 163830 }, { "epoch": 63.63, "learning_rate": 1.1516375404530747e-05, "loss": 0.0248, "step": 163840 }, { "epoch": 63.63, "learning_rate": 1.1515857605177993e-05, "loss": 0.1225, "step": 163850 }, { "epoch": 63.63, "learning_rate": 1.1515339805825243e-05, "loss": 0.0301, "step": 163860 }, { "epoch": 63.64, "learning_rate": 1.1514822006472493e-05, "loss": 0.0736, "step": 163870 }, { "epoch": 63.64, "learning_rate": 1.1514304207119742e-05, "loss": 0.1236, "step": 163880 }, { "epoch": 63.65, "learning_rate": 1.1513786407766992e-05, "loss": 0.0182, "step": 163890 }, { "epoch": 63.65, "learning_rate": 1.1513268608414241e-05, "loss": 0.0398, "step": 163900 }, { "epoch": 63.65, "learning_rate": 1.1512750809061491e-05, "loss": 0.1578, "step": 163910 }, { "epoch": 63.66, "learning_rate": 1.1512233009708739e-05, "loss": 0.0342, "step": 163920 }, { "epoch": 63.66, "learning_rate": 1.1511715210355987e-05, "loss": 0.0011, "step": 163930 }, { "epoch": 63.67, "learning_rate": 1.1511197411003237e-05, "loss": 0.1475, "step": 163940 }, { "epoch": 63.67, "learning_rate": 1.1510679611650486e-05, "loss": 0.1054, "step": 163950 }, { "epoch": 63.67, "learning_rate": 1.1510161812297736e-05, "loss": 0.0015, "step": 163960 }, { "epoch": 63.68, "learning_rate": 1.1509644012944985e-05, "loss": 0.1701, "step": 163970 }, { "epoch": 63.68, "learning_rate": 1.1509126213592235e-05, "loss": 0.1073, "step": 163980 }, { "epoch": 63.69, "learning_rate": 1.1508608414239485e-05, "loss": 0.0335, "step": 163990 }, { "epoch": 63.69, "learning_rate": 1.1508090614886733e-05, "loss": 0.0332, "step": 164000 }, { "epoch": 63.69, "learning_rate": 1.150757281553398e-05, "loss": 0.1068, "step": 164010 }, { "epoch": 63.7, "learning_rate": 1.150705501618123e-05, "loss": 0.0242, "step": 164020 }, { "epoch": 63.7, "learning_rate": 1.150653721682848e-05, "loss": 0.0551, "step": 164030 }, { "epoch": 63.7, "learning_rate": 1.150601941747573e-05, "loss": 0.3086, "step": 164040 }, { "epoch": 63.71, "learning_rate": 1.1505501618122979e-05, "loss": 0.0616, "step": 164050 }, { "epoch": 63.71, "learning_rate": 1.1504983818770229e-05, "loss": 0.0706, "step": 164060 }, { "epoch": 63.72, "learning_rate": 1.1504466019417477e-05, "loss": 0.1485, "step": 164070 }, { "epoch": 63.72, "learning_rate": 1.1503948220064726e-05, "loss": 0.0486, "step": 164080 }, { "epoch": 63.72, "learning_rate": 1.1503430420711974e-05, "loss": 0.0479, "step": 164090 }, { "epoch": 63.73, "learning_rate": 1.1502912621359224e-05, "loss": 0.0578, "step": 164100 }, { "epoch": 63.73, "learning_rate": 1.1502394822006473e-05, "loss": 0.0027, "step": 164110 }, { "epoch": 63.74, "learning_rate": 1.1501877022653723e-05, "loss": 0.2392, "step": 164120 }, { "epoch": 63.74, "learning_rate": 1.1501359223300973e-05, "loss": 0.0635, "step": 164130 }, { "epoch": 63.74, "learning_rate": 1.1500841423948222e-05, "loss": 0.0821, "step": 164140 }, { "epoch": 63.75, "learning_rate": 1.150032362459547e-05, "loss": 0.1873, "step": 164150 }, { "epoch": 63.75, "learning_rate": 1.149980582524272e-05, "loss": 0.0426, "step": 164160 }, { "epoch": 63.76, "learning_rate": 1.1499288025889968e-05, "loss": 0.1033, "step": 164170 }, { "epoch": 63.76, "learning_rate": 1.1498770226537217e-05, "loss": 0.0594, "step": 164180 }, { "epoch": 63.76, "learning_rate": 1.1498252427184467e-05, "loss": 0.0253, "step": 164190 }, { "epoch": 63.77, "learning_rate": 1.1497734627831716e-05, "loss": 0.0751, "step": 164200 }, { "epoch": 63.77, "learning_rate": 1.1497216828478966e-05, "loss": 0.0528, "step": 164210 }, { "epoch": 63.77, "learning_rate": 1.1496699029126214e-05, "loss": 0.1324, "step": 164220 }, { "epoch": 63.78, "learning_rate": 1.1496181229773464e-05, "loss": 0.0314, "step": 164230 }, { "epoch": 63.78, "learning_rate": 1.1495663430420713e-05, "loss": 0.0185, "step": 164240 }, { "epoch": 63.79, "learning_rate": 1.1495145631067961e-05, "loss": 0.0002, "step": 164250 }, { "epoch": 63.79, "learning_rate": 1.149462783171521e-05, "loss": 0.086, "step": 164260 }, { "epoch": 63.79, "learning_rate": 1.149411003236246e-05, "loss": 0.0896, "step": 164270 }, { "epoch": 63.8, "learning_rate": 1.149359223300971e-05, "loss": 0.0094, "step": 164280 }, { "epoch": 63.8, "learning_rate": 1.149307443365696e-05, "loss": 0.0852, "step": 164290 }, { "epoch": 63.81, "learning_rate": 1.1492556634304208e-05, "loss": 0.0319, "step": 164300 }, { "epoch": 63.81, "learning_rate": 1.1492038834951457e-05, "loss": 0.0714, "step": 164310 }, { "epoch": 63.81, "learning_rate": 1.1491521035598707e-05, "loss": 0.0075, "step": 164320 }, { "epoch": 63.82, "learning_rate": 1.1491003236245956e-05, "loss": 0.0401, "step": 164330 }, { "epoch": 63.82, "learning_rate": 1.1490485436893204e-05, "loss": 0.2353, "step": 164340 }, { "epoch": 63.83, "learning_rate": 1.1489967637540454e-05, "loss": 0.0856, "step": 164350 }, { "epoch": 63.83, "learning_rate": 1.1489449838187704e-05, "loss": 0.0727, "step": 164360 }, { "epoch": 63.83, "learning_rate": 1.1488932038834953e-05, "loss": 0.0368, "step": 164370 }, { "epoch": 63.84, "learning_rate": 1.1488414239482201e-05, "loss": 0.0117, "step": 164380 }, { "epoch": 63.84, "learning_rate": 1.148789644012945e-05, "loss": 0.0058, "step": 164390 }, { "epoch": 63.84, "learning_rate": 1.14873786407767e-05, "loss": 0.0671, "step": 164400 }, { "epoch": 63.85, "learning_rate": 1.148686084142395e-05, "loss": 0.0198, "step": 164410 }, { "epoch": 63.85, "learning_rate": 1.1486343042071198e-05, "loss": 0.1179, "step": 164420 }, { "epoch": 63.86, "learning_rate": 1.1485825242718448e-05, "loss": 0.1665, "step": 164430 }, { "epoch": 63.86, "learning_rate": 1.1485307443365697e-05, "loss": 0.1671, "step": 164440 }, { "epoch": 63.86, "learning_rate": 1.1484789644012945e-05, "loss": 0.0015, "step": 164450 }, { "epoch": 63.87, "learning_rate": 1.1484271844660195e-05, "loss": 0.0706, "step": 164460 }, { "epoch": 63.87, "learning_rate": 1.1483754045307444e-05, "loss": 0.0075, "step": 164470 }, { "epoch": 63.88, "learning_rate": 1.1483236245954694e-05, "loss": 0.0392, "step": 164480 }, { "epoch": 63.88, "learning_rate": 1.1482718446601944e-05, "loss": 0.0317, "step": 164490 }, { "epoch": 63.88, "learning_rate": 1.1482200647249191e-05, "loss": 0.1332, "step": 164500 }, { "epoch": 63.89, "learning_rate": 1.1481682847896441e-05, "loss": 0.0305, "step": 164510 }, { "epoch": 63.89, "learning_rate": 1.148116504854369e-05, "loss": 0.0688, "step": 164520 }, { "epoch": 63.9, "learning_rate": 1.1480647249190939e-05, "loss": 0.0588, "step": 164530 }, { "epoch": 63.9, "learning_rate": 1.1480129449838188e-05, "loss": 0.0558, "step": 164540 }, { "epoch": 63.9, "learning_rate": 1.1479611650485438e-05, "loss": 0.1488, "step": 164550 }, { "epoch": 63.91, "learning_rate": 1.1479093851132687e-05, "loss": 0.1101, "step": 164560 }, { "epoch": 63.91, "learning_rate": 1.1478576051779937e-05, "loss": 0.0942, "step": 164570 }, { "epoch": 63.91, "learning_rate": 1.1478058252427185e-05, "loss": 0.0881, "step": 164580 }, { "epoch": 63.92, "learning_rate": 1.1477540453074435e-05, "loss": 0.0563, "step": 164590 }, { "epoch": 63.92, "learning_rate": 1.1477022653721683e-05, "loss": 0.1307, "step": 164600 }, { "epoch": 63.93, "learning_rate": 1.1476504854368932e-05, "loss": 0.05, "step": 164610 }, { "epoch": 63.93, "learning_rate": 1.1475987055016182e-05, "loss": 0.1104, "step": 164620 }, { "epoch": 63.93, "learning_rate": 1.1475469255663431e-05, "loss": 0.0195, "step": 164630 }, { "epoch": 63.94, "learning_rate": 1.1474951456310681e-05, "loss": 0.1457, "step": 164640 }, { "epoch": 63.94, "learning_rate": 1.147443365695793e-05, "loss": 0.1289, "step": 164650 }, { "epoch": 63.95, "learning_rate": 1.1473915857605179e-05, "loss": 0.056, "step": 164660 }, { "epoch": 63.95, "learning_rate": 1.1473398058252428e-05, "loss": 0.0269, "step": 164670 }, { "epoch": 63.95, "learning_rate": 1.1472880258899676e-05, "loss": 0.246, "step": 164680 }, { "epoch": 63.96, "learning_rate": 1.1472362459546926e-05, "loss": 0.0964, "step": 164690 }, { "epoch": 63.96, "learning_rate": 1.1471844660194175e-05, "loss": 0.1809, "step": 164700 }, { "epoch": 63.97, "learning_rate": 1.1471326860841425e-05, "loss": 0.0892, "step": 164710 }, { "epoch": 63.97, "learning_rate": 1.1470809061488675e-05, "loss": 0.1482, "step": 164720 }, { "epoch": 63.97, "learning_rate": 1.1470291262135924e-05, "loss": 0.0147, "step": 164730 }, { "epoch": 63.98, "learning_rate": 1.1469773462783172e-05, "loss": 0.1311, "step": 164740 }, { "epoch": 63.98, "learning_rate": 1.146925566343042e-05, "loss": 0.1914, "step": 164750 }, { "epoch": 63.98, "learning_rate": 1.146873786407767e-05, "loss": 0.0791, "step": 164760 }, { "epoch": 63.99, "learning_rate": 1.146822006472492e-05, "loss": 0.1097, "step": 164770 }, { "epoch": 63.99, "learning_rate": 1.1467702265372169e-05, "loss": 0.0787, "step": 164780 }, { "epoch": 64.0, "learning_rate": 1.1467184466019419e-05, "loss": 0.0004, "step": 164790 }, { "epoch": 64.0, "learning_rate": 1.1466666666666668e-05, "loss": 0.114, "step": 164800 }, { "epoch": 64.0, "eval_accuracy": 0.9488308115543329, "eval_loss": 0.3368075489997864, "eval_runtime": 8.2161, "eval_samples_per_second": 442.424, "eval_steps_per_second": 55.379, "step": 164800 }, { "epoch": 64.0, "learning_rate": 1.1466148867313918e-05, "loss": 0.0828, "step": 164810 }, { "epoch": 64.01, "learning_rate": 1.1465631067961167e-05, "loss": 0.0741, "step": 164820 }, { "epoch": 64.01, "learning_rate": 1.1465113268608414e-05, "loss": 0.012, "step": 164830 }, { "epoch": 64.02, "learning_rate": 1.1464595469255663e-05, "loss": 0.0484, "step": 164840 }, { "epoch": 64.02, "learning_rate": 1.1464077669902913e-05, "loss": 0.0615, "step": 164850 }, { "epoch": 64.02, "learning_rate": 1.1463559870550162e-05, "loss": 0.0826, "step": 164860 }, { "epoch": 64.03, "learning_rate": 1.1463042071197412e-05, "loss": 0.0693, "step": 164870 }, { "epoch": 64.03, "learning_rate": 1.1462524271844662e-05, "loss": 0.054, "step": 164880 }, { "epoch": 64.03, "learning_rate": 1.1462006472491911e-05, "loss": 0.0352, "step": 164890 }, { "epoch": 64.04, "learning_rate": 1.1461488673139161e-05, "loss": 0.115, "step": 164900 }, { "epoch": 64.04, "learning_rate": 1.1460970873786407e-05, "loss": 0.0208, "step": 164910 }, { "epoch": 64.05, "learning_rate": 1.1460453074433657e-05, "loss": 0.1814, "step": 164920 }, { "epoch": 64.05, "learning_rate": 1.1459935275080906e-05, "loss": 0.2075, "step": 164930 }, { "epoch": 64.05, "learning_rate": 1.1459417475728156e-05, "loss": 0.0606, "step": 164940 }, { "epoch": 64.06, "learning_rate": 1.1458899676375406e-05, "loss": 0.0448, "step": 164950 }, { "epoch": 64.06, "learning_rate": 1.1458381877022655e-05, "loss": 0.0169, "step": 164960 }, { "epoch": 64.07, "learning_rate": 1.1457864077669905e-05, "loss": 0.0686, "step": 164970 }, { "epoch": 64.07, "learning_rate": 1.1457346278317154e-05, "loss": 0.0356, "step": 164980 }, { "epoch": 64.07, "learning_rate": 1.14568284789644e-05, "loss": 0.0005, "step": 164990 }, { "epoch": 64.08, "learning_rate": 1.145631067961165e-05, "loss": 0.0287, "step": 165000 }, { "epoch": 64.08, "learning_rate": 1.14557928802589e-05, "loss": 0.0384, "step": 165010 }, { "epoch": 64.09, "learning_rate": 1.145527508090615e-05, "loss": 0.087, "step": 165020 }, { "epoch": 64.09, "learning_rate": 1.14547572815534e-05, "loss": 0.0227, "step": 165030 }, { "epoch": 64.09, "learning_rate": 1.1454239482200649e-05, "loss": 0.1596, "step": 165040 }, { "epoch": 64.1, "learning_rate": 1.1453721682847898e-05, "loss": 0.0483, "step": 165050 }, { "epoch": 64.1, "learning_rate": 1.1453203883495148e-05, "loss": 0.0002, "step": 165060 }, { "epoch": 64.1, "learning_rate": 1.1452686084142394e-05, "loss": 0.0002, "step": 165070 }, { "epoch": 64.11, "learning_rate": 1.1452168284789644e-05, "loss": 0.0439, "step": 165080 }, { "epoch": 64.11, "learning_rate": 1.1451650485436893e-05, "loss": 0.0172, "step": 165090 }, { "epoch": 64.12, "learning_rate": 1.1451132686084143e-05, "loss": 0.0175, "step": 165100 }, { "epoch": 64.12, "learning_rate": 1.1450614886731393e-05, "loss": 0.1803, "step": 165110 }, { "epoch": 64.12, "learning_rate": 1.1450097087378642e-05, "loss": 0.0463, "step": 165120 }, { "epoch": 64.13, "learning_rate": 1.1449579288025892e-05, "loss": 0.1294, "step": 165130 }, { "epoch": 64.13, "learning_rate": 1.1449061488673142e-05, "loss": 0.1067, "step": 165140 }, { "epoch": 64.14, "learning_rate": 1.1448543689320388e-05, "loss": 0.0637, "step": 165150 }, { "epoch": 64.14, "learning_rate": 1.1448025889967637e-05, "loss": 0.1315, "step": 165160 }, { "epoch": 64.14, "learning_rate": 1.1447508090614887e-05, "loss": 0.0193, "step": 165170 }, { "epoch": 64.15, "learning_rate": 1.1446990291262137e-05, "loss": 0.0972, "step": 165180 }, { "epoch": 64.15, "learning_rate": 1.1446472491909386e-05, "loss": 0.0452, "step": 165190 }, { "epoch": 64.16, "learning_rate": 1.1445954692556636e-05, "loss": 0.055, "step": 165200 }, { "epoch": 64.16, "learning_rate": 1.1445436893203886e-05, "loss": 0.1493, "step": 165210 }, { "epoch": 64.16, "learning_rate": 1.1444919093851135e-05, "loss": 0.0295, "step": 165220 }, { "epoch": 64.17, "learning_rate": 1.1444401294498381e-05, "loss": 0.0511, "step": 165230 }, { "epoch": 64.17, "learning_rate": 1.1443883495145631e-05, "loss": 0.2345, "step": 165240 }, { "epoch": 64.17, "learning_rate": 1.144336569579288e-05, "loss": 0.079, "step": 165250 }, { "epoch": 64.18, "learning_rate": 1.144284789644013e-05, "loss": 0.0353, "step": 165260 }, { "epoch": 64.18, "learning_rate": 1.144233009708738e-05, "loss": 0.0608, "step": 165270 }, { "epoch": 64.19, "learning_rate": 1.144181229773463e-05, "loss": 0.0979, "step": 165280 }, { "epoch": 64.19, "learning_rate": 1.1441294498381879e-05, "loss": 0.0617, "step": 165290 }, { "epoch": 64.19, "learning_rate": 1.1440776699029129e-05, "loss": 0.1291, "step": 165300 }, { "epoch": 64.2, "learning_rate": 1.1440258899676375e-05, "loss": 0.0107, "step": 165310 }, { "epoch": 64.2, "learning_rate": 1.1439741100323625e-05, "loss": 0.0563, "step": 165320 }, { "epoch": 64.21, "learning_rate": 1.1439223300970874e-05, "loss": 0.04, "step": 165330 }, { "epoch": 64.21, "learning_rate": 1.1438705501618124e-05, "loss": 0.0829, "step": 165340 }, { "epoch": 64.21, "learning_rate": 1.1438187702265373e-05, "loss": 0.01, "step": 165350 }, { "epoch": 64.22, "learning_rate": 1.1437669902912623e-05, "loss": 0.0673, "step": 165360 }, { "epoch": 64.22, "learning_rate": 1.1437152103559873e-05, "loss": 0.0507, "step": 165370 }, { "epoch": 64.23, "learning_rate": 1.1436634304207122e-05, "loss": 0.0076, "step": 165380 }, { "epoch": 64.23, "learning_rate": 1.143611650485437e-05, "loss": 0.1252, "step": 165390 }, { "epoch": 64.23, "learning_rate": 1.1435598705501618e-05, "loss": 0.1142, "step": 165400 }, { "epoch": 64.24, "learning_rate": 1.1435080906148868e-05, "loss": 0.0003, "step": 165410 }, { "epoch": 64.24, "learning_rate": 1.1434563106796117e-05, "loss": 0.1224, "step": 165420 }, { "epoch": 64.24, "learning_rate": 1.1434045307443367e-05, "loss": 0.0138, "step": 165430 }, { "epoch": 64.25, "learning_rate": 1.1433527508090617e-05, "loss": 0.0143, "step": 165440 }, { "epoch": 64.25, "learning_rate": 1.1433009708737866e-05, "loss": 0.0583, "step": 165450 }, { "epoch": 64.26, "learning_rate": 1.1432491909385116e-05, "loss": 0.0723, "step": 165460 }, { "epoch": 64.26, "learning_rate": 1.1431974110032364e-05, "loss": 0.0148, "step": 165470 }, { "epoch": 64.26, "learning_rate": 1.1431456310679612e-05, "loss": 0.0609, "step": 165480 }, { "epoch": 64.27, "learning_rate": 1.1430938511326861e-05, "loss": 0.0139, "step": 165490 }, { "epoch": 64.27, "learning_rate": 1.1430420711974111e-05, "loss": 0.1054, "step": 165500 }, { "epoch": 64.28, "learning_rate": 1.142990291262136e-05, "loss": 0.0378, "step": 165510 }, { "epoch": 64.28, "learning_rate": 1.142938511326861e-05, "loss": 0.0131, "step": 165520 }, { "epoch": 64.28, "learning_rate": 1.142886731391586e-05, "loss": 0.0002, "step": 165530 }, { "epoch": 64.29, "learning_rate": 1.1428349514563108e-05, "loss": 0.1214, "step": 165540 }, { "epoch": 64.29, "learning_rate": 1.1427831715210357e-05, "loss": 0.1354, "step": 165550 }, { "epoch": 64.3, "learning_rate": 1.1427313915857605e-05, "loss": 0.1821, "step": 165560 }, { "epoch": 64.3, "learning_rate": 1.1426796116504855e-05, "loss": 0.038, "step": 165570 }, { "epoch": 64.3, "learning_rate": 1.1426278317152104e-05, "loss": 0.0587, "step": 165580 }, { "epoch": 64.31, "learning_rate": 1.1425760517799354e-05, "loss": 0.0083, "step": 165590 }, { "epoch": 64.31, "learning_rate": 1.1425242718446604e-05, "loss": 0.0288, "step": 165600 }, { "epoch": 64.31, "learning_rate": 1.1424724919093853e-05, "loss": 0.0261, "step": 165610 }, { "epoch": 64.32, "learning_rate": 1.1424207119741101e-05, "loss": 0.0451, "step": 165620 }, { "epoch": 64.32, "learning_rate": 1.142368932038835e-05, "loss": 0.0038, "step": 165630 }, { "epoch": 64.33, "learning_rate": 1.1423171521035599e-05, "loss": 0.1513, "step": 165640 }, { "epoch": 64.33, "learning_rate": 1.1422653721682848e-05, "loss": 0.1076, "step": 165650 }, { "epoch": 64.33, "learning_rate": 1.1422135922330098e-05, "loss": 0.0071, "step": 165660 }, { "epoch": 64.34, "learning_rate": 1.1421618122977348e-05, "loss": 0.0175, "step": 165670 }, { "epoch": 64.34, "learning_rate": 1.1421100323624597e-05, "loss": 0.0004, "step": 165680 }, { "epoch": 64.35, "learning_rate": 1.1420582524271845e-05, "loss": 0.0067, "step": 165690 }, { "epoch": 64.35, "learning_rate": 1.1420064724919095e-05, "loss": 0.0915, "step": 165700 }, { "epoch": 64.35, "learning_rate": 1.1419546925566344e-05, "loss": 0.0018, "step": 165710 }, { "epoch": 64.36, "learning_rate": 1.1419029126213592e-05, "loss": 0.1316, "step": 165720 }, { "epoch": 64.36, "learning_rate": 1.1418511326860842e-05, "loss": 0.1427, "step": 165730 }, { "epoch": 64.37, "learning_rate": 1.1417993527508092e-05, "loss": 0.0443, "step": 165740 }, { "epoch": 64.37, "learning_rate": 1.1417475728155341e-05, "loss": 0.0944, "step": 165750 }, { "epoch": 64.37, "learning_rate": 1.141695792880259e-05, "loss": 0.0527, "step": 165760 }, { "epoch": 64.38, "learning_rate": 1.1416440129449839e-05, "loss": 0.0845, "step": 165770 }, { "epoch": 64.38, "learning_rate": 1.1415922330097088e-05, "loss": 0.1426, "step": 165780 }, { "epoch": 64.38, "learning_rate": 1.1415404530744338e-05, "loss": 0.0723, "step": 165790 }, { "epoch": 64.39, "learning_rate": 1.1414886731391586e-05, "loss": 0.0658, "step": 165800 }, { "epoch": 64.39, "learning_rate": 1.1414368932038836e-05, "loss": 0.0018, "step": 165810 }, { "epoch": 64.4, "learning_rate": 1.1413851132686085e-05, "loss": 0.0397, "step": 165820 }, { "epoch": 64.4, "learning_rate": 1.1413333333333335e-05, "loss": 0.0643, "step": 165830 }, { "epoch": 64.4, "learning_rate": 1.1412815533980583e-05, "loss": 0.1348, "step": 165840 }, { "epoch": 64.41, "learning_rate": 1.1412297734627832e-05, "loss": 0.1665, "step": 165850 }, { "epoch": 64.41, "learning_rate": 1.1411779935275082e-05, "loss": 0.1429, "step": 165860 }, { "epoch": 64.42, "learning_rate": 1.1411262135922332e-05, "loss": 0.0631, "step": 165870 }, { "epoch": 64.42, "learning_rate": 1.141074433656958e-05, "loss": 0.0701, "step": 165880 }, { "epoch": 64.42, "learning_rate": 1.1410226537216829e-05, "loss": 0.1045, "step": 165890 }, { "epoch": 64.43, "learning_rate": 1.1409708737864079e-05, "loss": 0.0402, "step": 165900 }, { "epoch": 64.43, "learning_rate": 1.1409190938511328e-05, "loss": 0.0795, "step": 165910 }, { "epoch": 64.43, "learning_rate": 1.1408673139158576e-05, "loss": 0.2038, "step": 165920 }, { "epoch": 64.44, "learning_rate": 1.1408155339805826e-05, "loss": 0.0314, "step": 165930 }, { "epoch": 64.44, "learning_rate": 1.1407637540453075e-05, "loss": 0.0439, "step": 165940 }, { "epoch": 64.45, "learning_rate": 1.1407119741100325e-05, "loss": 0.001, "step": 165950 }, { "epoch": 64.45, "learning_rate": 1.1406601941747575e-05, "loss": 0.0451, "step": 165960 }, { "epoch": 64.45, "learning_rate": 1.1406084142394823e-05, "loss": 0.0467, "step": 165970 }, { "epoch": 64.46, "learning_rate": 1.1405566343042072e-05, "loss": 0.1373, "step": 165980 }, { "epoch": 64.46, "learning_rate": 1.1405048543689322e-05, "loss": 0.0186, "step": 165990 }, { "epoch": 64.47, "learning_rate": 1.140453074433657e-05, "loss": 0.1583, "step": 166000 }, { "epoch": 64.47, "learning_rate": 1.140401294498382e-05, "loss": 0.1107, "step": 166010 }, { "epoch": 64.47, "learning_rate": 1.1403495145631069e-05, "loss": 0.0367, "step": 166020 }, { "epoch": 64.48, "learning_rate": 1.1402977346278319e-05, "loss": 0.0342, "step": 166030 }, { "epoch": 64.48, "learning_rate": 1.1402459546925568e-05, "loss": 0.0225, "step": 166040 }, { "epoch": 64.49, "learning_rate": 1.1401941747572816e-05, "loss": 0.0561, "step": 166050 }, { "epoch": 64.49, "learning_rate": 1.1401423948220066e-05, "loss": 0.0461, "step": 166060 }, { "epoch": 64.49, "learning_rate": 1.1400906148867314e-05, "loss": 0.0572, "step": 166070 }, { "epoch": 64.5, "learning_rate": 1.1400388349514563e-05, "loss": 0.0205, "step": 166080 }, { "epoch": 64.5, "learning_rate": 1.1399870550161813e-05, "loss": 0.0258, "step": 166090 }, { "epoch": 64.5, "learning_rate": 1.1399352750809063e-05, "loss": 0.0257, "step": 166100 }, { "epoch": 64.51, "learning_rate": 1.1398834951456312e-05, "loss": 0.0105, "step": 166110 }, { "epoch": 64.51, "learning_rate": 1.1398317152103562e-05, "loss": 0.0554, "step": 166120 }, { "epoch": 64.52, "learning_rate": 1.139779935275081e-05, "loss": 0.0413, "step": 166130 }, { "epoch": 64.52, "learning_rate": 1.139728155339806e-05, "loss": 0.0516, "step": 166140 }, { "epoch": 64.52, "learning_rate": 1.1396763754045307e-05, "loss": 0.1315, "step": 166150 }, { "epoch": 64.53, "learning_rate": 1.1396245954692557e-05, "loss": 0.0008, "step": 166160 }, { "epoch": 64.53, "learning_rate": 1.1395728155339807e-05, "loss": 0.0623, "step": 166170 }, { "epoch": 64.54, "learning_rate": 1.1395210355987056e-05, "loss": 0.1403, "step": 166180 }, { "epoch": 64.54, "learning_rate": 1.1394692556634306e-05, "loss": 0.1247, "step": 166190 }, { "epoch": 64.54, "learning_rate": 1.1394174757281555e-05, "loss": 0.1378, "step": 166200 }, { "epoch": 64.55, "learning_rate": 1.1393656957928803e-05, "loss": 0.048, "step": 166210 }, { "epoch": 64.55, "learning_rate": 1.1393139158576051e-05, "loss": 0.1123, "step": 166220 }, { "epoch": 64.56, "learning_rate": 1.13926213592233e-05, "loss": 0.0461, "step": 166230 }, { "epoch": 64.56, "learning_rate": 1.139210355987055e-05, "loss": 0.072, "step": 166240 }, { "epoch": 64.56, "learning_rate": 1.13915857605178e-05, "loss": 0.1131, "step": 166250 }, { "epoch": 64.57, "learning_rate": 1.139106796116505e-05, "loss": 0.0385, "step": 166260 }, { "epoch": 64.57, "learning_rate": 1.13905501618123e-05, "loss": 0.0569, "step": 166270 }, { "epoch": 64.57, "learning_rate": 1.1390032362459549e-05, "loss": 0.0725, "step": 166280 }, { "epoch": 64.58, "learning_rate": 1.1389514563106797e-05, "loss": 0.0872, "step": 166290 }, { "epoch": 64.58, "learning_rate": 1.1388996763754045e-05, "loss": 0.0989, "step": 166300 }, { "epoch": 64.59, "learning_rate": 1.1388478964401294e-05, "loss": 0.0565, "step": 166310 }, { "epoch": 64.59, "learning_rate": 1.1387961165048544e-05, "loss": 0.0733, "step": 166320 }, { "epoch": 64.59, "learning_rate": 1.1387443365695794e-05, "loss": 0.1651, "step": 166330 }, { "epoch": 64.6, "learning_rate": 1.1386925566343043e-05, "loss": 0.1265, "step": 166340 }, { "epoch": 64.6, "learning_rate": 1.1386407766990293e-05, "loss": 0.058, "step": 166350 }, { "epoch": 64.61, "learning_rate": 1.1385889967637542e-05, "loss": 0.0014, "step": 166360 }, { "epoch": 64.61, "learning_rate": 1.1385372168284789e-05, "loss": 0.0742, "step": 166370 }, { "epoch": 64.61, "learning_rate": 1.1384854368932038e-05, "loss": 0.0413, "step": 166380 }, { "epoch": 64.62, "learning_rate": 1.1384336569579288e-05, "loss": 0.0889, "step": 166390 }, { "epoch": 64.62, "learning_rate": 1.1383818770226538e-05, "loss": 0.0754, "step": 166400 }, { "epoch": 64.63, "learning_rate": 1.1383300970873787e-05, "loss": 0.0931, "step": 166410 }, { "epoch": 64.63, "learning_rate": 1.1382783171521037e-05, "loss": 0.0624, "step": 166420 }, { "epoch": 64.63, "learning_rate": 1.1382265372168286e-05, "loss": 0.0697, "step": 166430 }, { "epoch": 64.64, "learning_rate": 1.1381747572815536e-05, "loss": 0.1209, "step": 166440 }, { "epoch": 64.64, "learning_rate": 1.1381229773462782e-05, "loss": 0.0231, "step": 166450 }, { "epoch": 64.64, "learning_rate": 1.1380711974110032e-05, "loss": 0.0667, "step": 166460 }, { "epoch": 64.65, "learning_rate": 1.1380194174757281e-05, "loss": 0.1146, "step": 166470 }, { "epoch": 64.65, "learning_rate": 1.1379676375404531e-05, "loss": 0.1356, "step": 166480 }, { "epoch": 64.66, "learning_rate": 1.137915857605178e-05, "loss": 0.1053, "step": 166490 }, { "epoch": 64.66, "learning_rate": 1.137864077669903e-05, "loss": 0.0141, "step": 166500 }, { "epoch": 64.66, "learning_rate": 1.137812297734628e-05, "loss": 0.1087, "step": 166510 }, { "epoch": 64.67, "learning_rate": 1.137760517799353e-05, "loss": 0.0185, "step": 166520 }, { "epoch": 64.67, "learning_rate": 1.137708737864078e-05, "loss": 0.0103, "step": 166530 }, { "epoch": 64.68, "learning_rate": 1.1376569579288025e-05, "loss": 0.0034, "step": 166540 }, { "epoch": 64.68, "learning_rate": 1.1376051779935275e-05, "loss": 0.0585, "step": 166550 }, { "epoch": 64.68, "learning_rate": 1.1375533980582525e-05, "loss": 0.1668, "step": 166560 }, { "epoch": 64.69, "learning_rate": 1.1375016181229774e-05, "loss": 0.0128, "step": 166570 }, { "epoch": 64.69, "learning_rate": 1.1374498381877024e-05, "loss": 0.1597, "step": 166580 }, { "epoch": 64.7, "learning_rate": 1.1373980582524274e-05, "loss": 0.072, "step": 166590 }, { "epoch": 64.7, "learning_rate": 1.1373462783171523e-05, "loss": 0.0415, "step": 166600 }, { "epoch": 64.7, "learning_rate": 1.1372944983818773e-05, "loss": 0.052, "step": 166610 }, { "epoch": 64.71, "learning_rate": 1.1372427184466019e-05, "loss": 0.1109, "step": 166620 }, { "epoch": 64.71, "learning_rate": 1.1371909385113269e-05, "loss": 0.1262, "step": 166630 }, { "epoch": 64.71, "learning_rate": 1.1371391585760518e-05, "loss": 0.1002, "step": 166640 }, { "epoch": 64.72, "learning_rate": 1.1370873786407768e-05, "loss": 0.1112, "step": 166650 }, { "epoch": 64.72, "learning_rate": 1.1370355987055017e-05, "loss": 0.0005, "step": 166660 }, { "epoch": 64.73, "learning_rate": 1.1369838187702267e-05, "loss": 0.1112, "step": 166670 }, { "epoch": 64.73, "learning_rate": 1.1369320388349517e-05, "loss": 0.1059, "step": 166680 }, { "epoch": 64.73, "learning_rate": 1.1368802588996766e-05, "loss": 0.0232, "step": 166690 }, { "epoch": 64.74, "learning_rate": 1.1368284789644013e-05, "loss": 0.0167, "step": 166700 }, { "epoch": 64.74, "learning_rate": 1.1367766990291262e-05, "loss": 0.0604, "step": 166710 }, { "epoch": 64.75, "learning_rate": 1.1367249190938512e-05, "loss": 0.0724, "step": 166720 }, { "epoch": 64.75, "learning_rate": 1.1366731391585761e-05, "loss": 0.0007, "step": 166730 }, { "epoch": 64.75, "learning_rate": 1.1366213592233011e-05, "loss": 0.038, "step": 166740 }, { "epoch": 64.76, "learning_rate": 1.136569579288026e-05, "loss": 0.007, "step": 166750 }, { "epoch": 64.76, "learning_rate": 1.136517799352751e-05, "loss": 0.0366, "step": 166760 }, { "epoch": 64.77, "learning_rate": 1.136466019417476e-05, "loss": 0.0085, "step": 166770 }, { "epoch": 64.77, "learning_rate": 1.1364142394822006e-05, "loss": 0.0601, "step": 166780 }, { "epoch": 64.77, "learning_rate": 1.1363624595469256e-05, "loss": 0.1358, "step": 166790 }, { "epoch": 64.78, "learning_rate": 1.1363106796116505e-05, "loss": 0.1318, "step": 166800 }, { "epoch": 64.78, "learning_rate": 1.1362588996763755e-05, "loss": 0.0495, "step": 166810 }, { "epoch": 64.78, "learning_rate": 1.1362071197411005e-05, "loss": 0.0338, "step": 166820 }, { "epoch": 64.79, "learning_rate": 1.1361553398058254e-05, "loss": 0.065, "step": 166830 }, { "epoch": 64.79, "learning_rate": 1.1361035598705504e-05, "loss": 0.1135, "step": 166840 }, { "epoch": 64.8, "learning_rate": 1.1360517799352753e-05, "loss": 0.0287, "step": 166850 }, { "epoch": 64.8, "learning_rate": 1.136e-05, "loss": 0.0541, "step": 166860 }, { "epoch": 64.8, "learning_rate": 1.135948220064725e-05, "loss": 0.0745, "step": 166870 }, { "epoch": 64.81, "learning_rate": 1.1358964401294499e-05, "loss": 0.032, "step": 166880 }, { "epoch": 64.81, "learning_rate": 1.1358446601941749e-05, "loss": 0.0895, "step": 166890 }, { "epoch": 64.82, "learning_rate": 1.1357928802588998e-05, "loss": 0.0477, "step": 166900 }, { "epoch": 64.82, "learning_rate": 1.1357411003236248e-05, "loss": 0.0468, "step": 166910 }, { "epoch": 64.82, "learning_rate": 1.1356893203883497e-05, "loss": 0.0288, "step": 166920 }, { "epoch": 64.83, "learning_rate": 1.1356375404530747e-05, "loss": 0.0034, "step": 166930 }, { "epoch": 64.83, "learning_rate": 1.1355857605177993e-05, "loss": 0.1734, "step": 166940 }, { "epoch": 64.83, "learning_rate": 1.1355339805825243e-05, "loss": 0.1708, "step": 166950 }, { "epoch": 64.84, "learning_rate": 1.1354822006472492e-05, "loss": 0.0479, "step": 166960 }, { "epoch": 64.84, "learning_rate": 1.1354304207119742e-05, "loss": 0.0997, "step": 166970 }, { "epoch": 64.85, "learning_rate": 1.1353786407766992e-05, "loss": 0.1767, "step": 166980 }, { "epoch": 64.85, "learning_rate": 1.1353268608414241e-05, "loss": 0.0436, "step": 166990 }, { "epoch": 64.85, "learning_rate": 1.1352750809061491e-05, "loss": 0.104, "step": 167000 }, { "epoch": 64.86, "learning_rate": 1.1352233009708739e-05, "loss": 0.0016, "step": 167010 }, { "epoch": 64.86, "learning_rate": 1.1351715210355987e-05, "loss": 0.0211, "step": 167020 }, { "epoch": 64.87, "learning_rate": 1.1351197411003236e-05, "loss": 0.1665, "step": 167030 }, { "epoch": 64.87, "learning_rate": 1.1350679611650486e-05, "loss": 0.1075, "step": 167040 }, { "epoch": 64.87, "learning_rate": 1.1350161812297736e-05, "loss": 0.067, "step": 167050 }, { "epoch": 64.88, "learning_rate": 1.1349644012944985e-05, "loss": 0.0363, "step": 167060 }, { "epoch": 64.88, "learning_rate": 1.1349126213592235e-05, "loss": 0.006, "step": 167070 }, { "epoch": 64.89, "learning_rate": 1.1348608414239484e-05, "loss": 0.0737, "step": 167080 }, { "epoch": 64.89, "learning_rate": 1.1348090614886732e-05, "loss": 0.2195, "step": 167090 }, { "epoch": 64.89, "learning_rate": 1.1347572815533982e-05, "loss": 0.0128, "step": 167100 }, { "epoch": 64.9, "learning_rate": 1.134705501618123e-05, "loss": 0.1553, "step": 167110 }, { "epoch": 64.9, "learning_rate": 1.134653721682848e-05, "loss": 0.03, "step": 167120 }, { "epoch": 64.9, "learning_rate": 1.134601941747573e-05, "loss": 0.1351, "step": 167130 }, { "epoch": 64.91, "learning_rate": 1.1345501618122979e-05, "loss": 0.0308, "step": 167140 }, { "epoch": 64.91, "learning_rate": 1.1344983818770228e-05, "loss": 0.1303, "step": 167150 }, { "epoch": 64.92, "learning_rate": 1.1344466019417476e-05, "loss": 0.1481, "step": 167160 }, { "epoch": 64.92, "learning_rate": 1.1343948220064726e-05, "loss": 0.0676, "step": 167170 }, { "epoch": 64.92, "learning_rate": 1.1343430420711976e-05, "loss": 0.1199, "step": 167180 }, { "epoch": 64.93, "learning_rate": 1.1342912621359223e-05, "loss": 0.1081, "step": 167190 }, { "epoch": 64.93, "learning_rate": 1.1342394822006473e-05, "loss": 0.0298, "step": 167200 }, { "epoch": 64.94, "learning_rate": 1.1341877022653723e-05, "loss": 0.0627, "step": 167210 }, { "epoch": 64.94, "learning_rate": 1.1341359223300972e-05, "loss": 0.0153, "step": 167220 }, { "epoch": 64.94, "learning_rate": 1.1340841423948222e-05, "loss": 0.1145, "step": 167230 }, { "epoch": 64.95, "learning_rate": 1.134032362459547e-05, "loss": 0.0292, "step": 167240 }, { "epoch": 64.95, "learning_rate": 1.133980582524272e-05, "loss": 0.0319, "step": 167250 }, { "epoch": 64.96, "learning_rate": 1.1339288025889969e-05, "loss": 0.0475, "step": 167260 }, { "epoch": 64.96, "learning_rate": 1.1338770226537217e-05, "loss": 0.0255, "step": 167270 }, { "epoch": 64.96, "learning_rate": 1.1338252427184467e-05, "loss": 0.0354, "step": 167280 }, { "epoch": 64.97, "learning_rate": 1.1337734627831716e-05, "loss": 0.0615, "step": 167290 }, { "epoch": 64.97, "learning_rate": 1.1337216828478966e-05, "loss": 0.0663, "step": 167300 }, { "epoch": 64.97, "learning_rate": 1.1336699029126214e-05, "loss": 0.0246, "step": 167310 }, { "epoch": 64.98, "learning_rate": 1.1336181229773463e-05, "loss": 0.008, "step": 167320 }, { "epoch": 64.98, "learning_rate": 1.1335663430420713e-05, "loss": 0.1021, "step": 167330 }, { "epoch": 64.99, "learning_rate": 1.1335145631067963e-05, "loss": 0.0746, "step": 167340 }, { "epoch": 64.99, "learning_rate": 1.133462783171521e-05, "loss": 0.0203, "step": 167350 }, { "epoch": 64.99, "learning_rate": 1.133411003236246e-05, "loss": 0.0834, "step": 167360 }, { "epoch": 65.0, "learning_rate": 1.133359223300971e-05, "loss": 0.048, "step": 167370 }, { "epoch": 65.0, "eval_accuracy": 0.951031636863824, "eval_loss": 0.335758239030838, "eval_runtime": 8.2116, "eval_samples_per_second": 442.667, "eval_steps_per_second": 55.41, "step": 167375 }, { "epoch": 65.0, "learning_rate": 1.133307443365696e-05, "loss": 0.0573, "step": 167380 }, { "epoch": 65.01, "learning_rate": 1.1332556634304207e-05, "loss": 0.0145, "step": 167390 }, { "epoch": 65.01, "learning_rate": 1.1332038834951457e-05, "loss": 0.0201, "step": 167400 }, { "epoch": 65.01, "learning_rate": 1.1331521035598707e-05, "loss": 0.0482, "step": 167410 }, { "epoch": 65.02, "learning_rate": 1.1331003236245956e-05, "loss": 0.1077, "step": 167420 }, { "epoch": 65.02, "learning_rate": 1.1330485436893204e-05, "loss": 0.0676, "step": 167430 }, { "epoch": 65.03, "learning_rate": 1.1329967637540454e-05, "loss": 0.1153, "step": 167440 }, { "epoch": 65.03, "learning_rate": 1.1329449838187703e-05, "loss": 0.2046, "step": 167450 }, { "epoch": 65.03, "learning_rate": 1.1328932038834953e-05, "loss": 0.052, "step": 167460 }, { "epoch": 65.04, "learning_rate": 1.1328414239482201e-05, "loss": 0.0173, "step": 167470 }, { "epoch": 65.04, "learning_rate": 1.132789644012945e-05, "loss": 0.1732, "step": 167480 }, { "epoch": 65.04, "learning_rate": 1.13273786407767e-05, "loss": 0.1, "step": 167490 }, { "epoch": 65.05, "learning_rate": 1.132686084142395e-05, "loss": 0.0024, "step": 167500 }, { "epoch": 65.05, "learning_rate": 1.1326343042071198e-05, "loss": 0.0454, "step": 167510 }, { "epoch": 65.06, "learning_rate": 1.1325825242718447e-05, "loss": 0.0184, "step": 167520 }, { "epoch": 65.06, "learning_rate": 1.1325307443365697e-05, "loss": 0.0399, "step": 167530 }, { "epoch": 65.06, "learning_rate": 1.1324789644012945e-05, "loss": 0.1152, "step": 167540 }, { "epoch": 65.07, "learning_rate": 1.1324271844660195e-05, "loss": 0.0153, "step": 167550 }, { "epoch": 65.07, "learning_rate": 1.1323754045307444e-05, "loss": 0.0308, "step": 167560 }, { "epoch": 65.08, "learning_rate": 1.1323236245954694e-05, "loss": 0.0821, "step": 167570 }, { "epoch": 65.08, "learning_rate": 1.1322718446601943e-05, "loss": 0.1258, "step": 167580 }, { "epoch": 65.08, "learning_rate": 1.1322200647249191e-05, "loss": 0.0155, "step": 167590 }, { "epoch": 65.09, "learning_rate": 1.1321682847896441e-05, "loss": 0.033, "step": 167600 }, { "epoch": 65.09, "learning_rate": 1.132116504854369e-05, "loss": 0.0125, "step": 167610 }, { "epoch": 65.1, "learning_rate": 1.1320647249190938e-05, "loss": 0.0879, "step": 167620 }, { "epoch": 65.1, "learning_rate": 1.1320129449838188e-05, "loss": 0.056, "step": 167630 }, { "epoch": 65.1, "learning_rate": 1.1319611650485438e-05, "loss": 0.0985, "step": 167640 }, { "epoch": 65.11, "learning_rate": 1.1319093851132687e-05, "loss": 0.0935, "step": 167650 }, { "epoch": 65.11, "learning_rate": 1.1318576051779937e-05, "loss": 0.0002, "step": 167660 }, { "epoch": 65.11, "learning_rate": 1.1318058252427187e-05, "loss": 0.1038, "step": 167670 }, { "epoch": 65.12, "learning_rate": 1.1317540453074434e-05, "loss": 0.0836, "step": 167680 }, { "epoch": 65.12, "learning_rate": 1.1317022653721682e-05, "loss": 0.0438, "step": 167690 }, { "epoch": 65.13, "learning_rate": 1.1316504854368932e-05, "loss": 0.045, "step": 167700 }, { "epoch": 65.13, "learning_rate": 1.1315987055016182e-05, "loss": 0.1049, "step": 167710 }, { "epoch": 65.13, "learning_rate": 1.1315469255663431e-05, "loss": 0.0325, "step": 167720 }, { "epoch": 65.14, "learning_rate": 1.1314951456310681e-05, "loss": 0.0329, "step": 167730 }, { "epoch": 65.14, "learning_rate": 1.131443365695793e-05, "loss": 0.0501, "step": 167740 }, { "epoch": 65.15, "learning_rate": 1.131391585760518e-05, "loss": 0.1619, "step": 167750 }, { "epoch": 65.15, "learning_rate": 1.1313398058252428e-05, "loss": 0.1382, "step": 167760 }, { "epoch": 65.15, "learning_rate": 1.1312880258899676e-05, "loss": 0.0668, "step": 167770 }, { "epoch": 65.16, "learning_rate": 1.1312362459546926e-05, "loss": 0.0045, "step": 167780 }, { "epoch": 65.16, "learning_rate": 1.1311844660194175e-05, "loss": 0.017, "step": 167790 }, { "epoch": 65.17, "learning_rate": 1.1311326860841425e-05, "loss": 0.0117, "step": 167800 }, { "epoch": 65.17, "learning_rate": 1.1310809061488674e-05, "loss": 0.0605, "step": 167810 }, { "epoch": 65.17, "learning_rate": 1.1310291262135924e-05, "loss": 0.0933, "step": 167820 }, { "epoch": 65.18, "learning_rate": 1.1309773462783174e-05, "loss": 0.2166, "step": 167830 }, { "epoch": 65.18, "learning_rate": 1.130925566343042e-05, "loss": 0.0865, "step": 167840 }, { "epoch": 65.18, "learning_rate": 1.130873786407767e-05, "loss": 0.024, "step": 167850 }, { "epoch": 65.19, "learning_rate": 1.1308220064724919e-05, "loss": 0.0007, "step": 167860 }, { "epoch": 65.19, "learning_rate": 1.1307702265372169e-05, "loss": 0.0484, "step": 167870 }, { "epoch": 65.2, "learning_rate": 1.1307184466019418e-05, "loss": 0.0372, "step": 167880 }, { "epoch": 65.2, "learning_rate": 1.1306666666666668e-05, "loss": 0.0549, "step": 167890 }, { "epoch": 65.2, "learning_rate": 1.1306148867313918e-05, "loss": 0.0207, "step": 167900 }, { "epoch": 65.21, "learning_rate": 1.1305631067961167e-05, "loss": 0.1246, "step": 167910 }, { "epoch": 65.21, "learning_rate": 1.1305113268608413e-05, "loss": 0.112, "step": 167920 }, { "epoch": 65.22, "learning_rate": 1.1304595469255663e-05, "loss": 0.068, "step": 167930 }, { "epoch": 65.22, "learning_rate": 1.1304077669902913e-05, "loss": 0.0906, "step": 167940 }, { "epoch": 65.22, "learning_rate": 1.1303559870550162e-05, "loss": 0.1214, "step": 167950 }, { "epoch": 65.23, "learning_rate": 1.1303042071197412e-05, "loss": 0.0113, "step": 167960 }, { "epoch": 65.23, "learning_rate": 1.1302524271844662e-05, "loss": 0.0307, "step": 167970 }, { "epoch": 65.23, "learning_rate": 1.1302006472491911e-05, "loss": 0.0507, "step": 167980 }, { "epoch": 65.24, "learning_rate": 1.130148867313916e-05, "loss": 0.0555, "step": 167990 }, { "epoch": 65.24, "learning_rate": 1.1300970873786407e-05, "loss": 0.0003, "step": 168000 }, { "epoch": 65.25, "learning_rate": 1.1300453074433657e-05, "loss": 0.0746, "step": 168010 }, { "epoch": 65.25, "learning_rate": 1.1299935275080906e-05, "loss": 0.0357, "step": 168020 }, { "epoch": 65.25, "learning_rate": 1.1299417475728156e-05, "loss": 0.0657, "step": 168030 }, { "epoch": 65.26, "learning_rate": 1.1298899676375405e-05, "loss": 0.0024, "step": 168040 }, { "epoch": 65.26, "learning_rate": 1.1298381877022655e-05, "loss": 0.0037, "step": 168050 }, { "epoch": 65.27, "learning_rate": 1.1297864077669905e-05, "loss": 0.0722, "step": 168060 }, { "epoch": 65.27, "learning_rate": 1.1297346278317154e-05, "loss": 0.0629, "step": 168070 }, { "epoch": 65.27, "learning_rate": 1.12968284789644e-05, "loss": 0.1243, "step": 168080 }, { "epoch": 65.28, "learning_rate": 1.129631067961165e-05, "loss": 0.1355, "step": 168090 }, { "epoch": 65.28, "learning_rate": 1.12957928802589e-05, "loss": 0.0599, "step": 168100 }, { "epoch": 65.29, "learning_rate": 1.129527508090615e-05, "loss": 0.044, "step": 168110 }, { "epoch": 65.29, "learning_rate": 1.1294757281553399e-05, "loss": 0.0071, "step": 168120 }, { "epoch": 65.29, "learning_rate": 1.1294239482200649e-05, "loss": 0.0547, "step": 168130 }, { "epoch": 65.3, "learning_rate": 1.1293721682847898e-05, "loss": 0.0762, "step": 168140 }, { "epoch": 65.3, "learning_rate": 1.1293203883495148e-05, "loss": 0.138, "step": 168150 }, { "epoch": 65.3, "learning_rate": 1.1292686084142394e-05, "loss": 0.0519, "step": 168160 }, { "epoch": 65.31, "learning_rate": 1.1292168284789644e-05, "loss": 0.111, "step": 168170 }, { "epoch": 65.31, "learning_rate": 1.1291650485436893e-05, "loss": 0.0014, "step": 168180 }, { "epoch": 65.32, "learning_rate": 1.1291132686084143e-05, "loss": 0.0133, "step": 168190 }, { "epoch": 65.32, "learning_rate": 1.1290614886731393e-05, "loss": 0.0237, "step": 168200 }, { "epoch": 65.32, "learning_rate": 1.1290097087378642e-05, "loss": 0.0907, "step": 168210 }, { "epoch": 65.33, "learning_rate": 1.1289579288025892e-05, "loss": 0.0739, "step": 168220 }, { "epoch": 65.33, "learning_rate": 1.1289061488673141e-05, "loss": 0.0824, "step": 168230 }, { "epoch": 65.34, "learning_rate": 1.1288543689320391e-05, "loss": 0.0269, "step": 168240 }, { "epoch": 65.34, "learning_rate": 1.1288025889967637e-05, "loss": 0.1133, "step": 168250 }, { "epoch": 65.34, "learning_rate": 1.1287508090614887e-05, "loss": 0.0316, "step": 168260 }, { "epoch": 65.35, "learning_rate": 1.1286990291262137e-05, "loss": 0.1447, "step": 168270 }, { "epoch": 65.35, "learning_rate": 1.1286472491909386e-05, "loss": 0.0954, "step": 168280 }, { "epoch": 65.36, "learning_rate": 1.1285954692556636e-05, "loss": 0.2526, "step": 168290 }, { "epoch": 65.36, "learning_rate": 1.1285436893203885e-05, "loss": 0.0695, "step": 168300 }, { "epoch": 65.36, "learning_rate": 1.1284919093851135e-05, "loss": 0.0094, "step": 168310 }, { "epoch": 65.37, "learning_rate": 1.1284401294498385e-05, "loss": 0.1178, "step": 168320 }, { "epoch": 65.37, "learning_rate": 1.128388349514563e-05, "loss": 0.0469, "step": 168330 }, { "epoch": 65.37, "learning_rate": 1.128336569579288e-05, "loss": 0.062, "step": 168340 }, { "epoch": 65.38, "learning_rate": 1.128284789644013e-05, "loss": 0.009, "step": 168350 }, { "epoch": 65.38, "learning_rate": 1.128233009708738e-05, "loss": 0.0542, "step": 168360 }, { "epoch": 65.39, "learning_rate": 1.128181229773463e-05, "loss": 0.018, "step": 168370 }, { "epoch": 65.39, "learning_rate": 1.1281294498381879e-05, "loss": 0.1299, "step": 168380 }, { "epoch": 65.39, "learning_rate": 1.1280776699029129e-05, "loss": 0.0741, "step": 168390 }, { "epoch": 65.4, "learning_rate": 1.1280258899676378e-05, "loss": 0.0339, "step": 168400 }, { "epoch": 65.4, "learning_rate": 1.1279741100323624e-05, "loss": 0.0088, "step": 168410 }, { "epoch": 65.41, "learning_rate": 1.1279223300970874e-05, "loss": 0.1032, "step": 168420 }, { "epoch": 65.41, "learning_rate": 1.1278705501618124e-05, "loss": 0.0219, "step": 168430 }, { "epoch": 65.41, "learning_rate": 1.1278187702265373e-05, "loss": 0.1528, "step": 168440 }, { "epoch": 65.42, "learning_rate": 1.1277669902912623e-05, "loss": 0.0576, "step": 168450 }, { "epoch": 65.42, "learning_rate": 1.1277152103559872e-05, "loss": 0.1169, "step": 168460 }, { "epoch": 65.43, "learning_rate": 1.1276634304207122e-05, "loss": 0.1258, "step": 168470 }, { "epoch": 65.43, "learning_rate": 1.127611650485437e-05, "loss": 0.0647, "step": 168480 }, { "epoch": 65.43, "learning_rate": 1.1275598705501618e-05, "loss": 0.1146, "step": 168490 }, { "epoch": 65.44, "learning_rate": 1.1275080906148868e-05, "loss": 0.0372, "step": 168500 }, { "epoch": 65.44, "learning_rate": 1.1274563106796117e-05, "loss": 0.0205, "step": 168510 }, { "epoch": 65.44, "learning_rate": 1.1274045307443367e-05, "loss": 0.0475, "step": 168520 }, { "epoch": 65.45, "learning_rate": 1.1273527508090616e-05, "loss": 0.1392, "step": 168530 }, { "epoch": 65.45, "learning_rate": 1.1273009708737866e-05, "loss": 0.114, "step": 168540 }, { "epoch": 65.46, "learning_rate": 1.1272491909385116e-05, "loss": 0.0761, "step": 168550 }, { "epoch": 65.46, "learning_rate": 1.1271974110032364e-05, "loss": 0.0006, "step": 168560 }, { "epoch": 65.46, "learning_rate": 1.1271456310679611e-05, "loss": 0.1811, "step": 168570 }, { "epoch": 65.47, "learning_rate": 1.1270938511326861e-05, "loss": 0.0123, "step": 168580 }, { "epoch": 65.47, "learning_rate": 1.127042071197411e-05, "loss": 0.148, "step": 168590 }, { "epoch": 65.48, "learning_rate": 1.126990291262136e-05, "loss": 0.1811, "step": 168600 }, { "epoch": 65.48, "learning_rate": 1.126938511326861e-05, "loss": 0.1067, "step": 168610 }, { "epoch": 65.48, "learning_rate": 1.126886731391586e-05, "loss": 0.0556, "step": 168620 }, { "epoch": 65.49, "learning_rate": 1.1268349514563108e-05, "loss": 0.1919, "step": 168630 }, { "epoch": 65.49, "learning_rate": 1.1267831715210357e-05, "loss": 0.0088, "step": 168640 }, { "epoch": 65.5, "learning_rate": 1.1267313915857605e-05, "loss": 0.0519, "step": 168650 }, { "epoch": 65.5, "learning_rate": 1.1266796116504855e-05, "loss": 0.0923, "step": 168660 }, { "epoch": 65.5, "learning_rate": 1.1266278317152104e-05, "loss": 0.0012, "step": 168670 }, { "epoch": 65.51, "learning_rate": 1.1265760517799354e-05, "loss": 0.0154, "step": 168680 }, { "epoch": 65.51, "learning_rate": 1.1265242718446604e-05, "loss": 0.0317, "step": 168690 }, { "epoch": 65.51, "learning_rate": 1.1264724919093853e-05, "loss": 0.0025, "step": 168700 }, { "epoch": 65.52, "learning_rate": 1.1264207119741101e-05, "loss": 0.0201, "step": 168710 }, { "epoch": 65.52, "learning_rate": 1.126368932038835e-05, "loss": 0.1575, "step": 168720 }, { "epoch": 65.53, "learning_rate": 1.1263171521035599e-05, "loss": 0.0148, "step": 168730 }, { "epoch": 65.53, "learning_rate": 1.1262653721682848e-05, "loss": 0.1472, "step": 168740 }, { "epoch": 65.53, "learning_rate": 1.1262135922330098e-05, "loss": 0.1445, "step": 168750 }, { "epoch": 65.54, "learning_rate": 1.1261618122977347e-05, "loss": 0.1112, "step": 168760 }, { "epoch": 65.54, "learning_rate": 1.1261100323624597e-05, "loss": 0.0839, "step": 168770 }, { "epoch": 65.55, "learning_rate": 1.1260582524271845e-05, "loss": 0.0653, "step": 168780 }, { "epoch": 65.55, "learning_rate": 1.1260064724919095e-05, "loss": 0.0879, "step": 168790 }, { "epoch": 65.55, "learning_rate": 1.1259546925566344e-05, "loss": 0.0429, "step": 168800 }, { "epoch": 65.56, "learning_rate": 1.1259029126213594e-05, "loss": 0.042, "step": 168810 }, { "epoch": 65.56, "learning_rate": 1.1258511326860842e-05, "loss": 0.0183, "step": 168820 }, { "epoch": 65.57, "learning_rate": 1.1257993527508091e-05, "loss": 0.0681, "step": 168830 }, { "epoch": 65.57, "learning_rate": 1.1257475728155341e-05, "loss": 0.2782, "step": 168840 }, { "epoch": 65.57, "learning_rate": 1.125695792880259e-05, "loss": 0.0346, "step": 168850 }, { "epoch": 65.58, "learning_rate": 1.1256440129449839e-05, "loss": 0.198, "step": 168860 }, { "epoch": 65.58, "learning_rate": 1.1255922330097088e-05, "loss": 0.1432, "step": 168870 }, { "epoch": 65.58, "learning_rate": 1.1255404530744338e-05, "loss": 0.0696, "step": 168880 }, { "epoch": 65.59, "learning_rate": 1.1254886731391587e-05, "loss": 0.0462, "step": 168890 }, { "epoch": 65.59, "learning_rate": 1.1254368932038835e-05, "loss": 0.0881, "step": 168900 }, { "epoch": 65.6, "learning_rate": 1.1253851132686085e-05, "loss": 0.182, "step": 168910 }, { "epoch": 65.6, "learning_rate": 1.1253333333333335e-05, "loss": 0.0385, "step": 168920 }, { "epoch": 65.6, "learning_rate": 1.1252815533980584e-05, "loss": 0.0313, "step": 168930 }, { "epoch": 65.61, "learning_rate": 1.1252297734627832e-05, "loss": 0.0632, "step": 168940 }, { "epoch": 65.61, "learning_rate": 1.1251779935275082e-05, "loss": 0.0622, "step": 168950 }, { "epoch": 65.62, "learning_rate": 1.1251262135922331e-05, "loss": 0.0925, "step": 168960 }, { "epoch": 65.62, "learning_rate": 1.1250744336569581e-05, "loss": 0.1, "step": 168970 }, { "epoch": 65.62, "learning_rate": 1.1250226537216829e-05, "loss": 0.1298, "step": 168980 }, { "epoch": 65.63, "learning_rate": 1.1249708737864079e-05, "loss": 0.0903, "step": 168990 }, { "epoch": 65.63, "learning_rate": 1.1249190938511328e-05, "loss": 0.1195, "step": 169000 }, { "epoch": 65.63, "learning_rate": 1.1248673139158576e-05, "loss": 0.0175, "step": 169010 }, { "epoch": 65.64, "learning_rate": 1.1248155339805826e-05, "loss": 0.0088, "step": 169020 }, { "epoch": 65.64, "learning_rate": 1.1247637540453075e-05, "loss": 0.0502, "step": 169030 }, { "epoch": 65.65, "learning_rate": 1.1247119741100325e-05, "loss": 0.0193, "step": 169040 }, { "epoch": 65.65, "learning_rate": 1.1246601941747575e-05, "loss": 0.0613, "step": 169050 }, { "epoch": 65.65, "learning_rate": 1.1246084142394822e-05, "loss": 0.0065, "step": 169060 }, { "epoch": 65.66, "learning_rate": 1.1245566343042072e-05, "loss": 0.0012, "step": 169070 }, { "epoch": 65.66, "learning_rate": 1.1245048543689322e-05, "loss": 0.0226, "step": 169080 }, { "epoch": 65.67, "learning_rate": 1.124453074433657e-05, "loss": 0.0369, "step": 169090 }, { "epoch": 65.67, "learning_rate": 1.124401294498382e-05, "loss": 0.0997, "step": 169100 }, { "epoch": 65.67, "learning_rate": 1.1243495145631069e-05, "loss": 0.121, "step": 169110 }, { "epoch": 65.68, "learning_rate": 1.1242977346278318e-05, "loss": 0.0911, "step": 169120 }, { "epoch": 65.68, "learning_rate": 1.1242459546925568e-05, "loss": 0.11, "step": 169130 }, { "epoch": 65.69, "learning_rate": 1.1241941747572816e-05, "loss": 0.0579, "step": 169140 }, { "epoch": 65.69, "learning_rate": 1.1241423948220066e-05, "loss": 0.0054, "step": 169150 }, { "epoch": 65.69, "learning_rate": 1.1240906148867314e-05, "loss": 0.1151, "step": 169160 }, { "epoch": 65.7, "learning_rate": 1.1240388349514563e-05, "loss": 0.0751, "step": 169170 }, { "epoch": 65.7, "learning_rate": 1.1239870550161813e-05, "loss": 0.0127, "step": 169180 }, { "epoch": 65.7, "learning_rate": 1.1239352750809062e-05, "loss": 0.0365, "step": 169190 }, { "epoch": 65.71, "learning_rate": 1.1238834951456312e-05, "loss": 0.1651, "step": 169200 }, { "epoch": 65.71, "learning_rate": 1.1238317152103562e-05, "loss": 0.0014, "step": 169210 }, { "epoch": 65.72, "learning_rate": 1.123779935275081e-05, "loss": 0.0186, "step": 169220 }, { "epoch": 65.72, "learning_rate": 1.123728155339806e-05, "loss": 0.0912, "step": 169230 }, { "epoch": 65.72, "learning_rate": 1.1236763754045307e-05, "loss": 0.1584, "step": 169240 }, { "epoch": 65.73, "learning_rate": 1.1236245954692557e-05, "loss": 0.2207, "step": 169250 }, { "epoch": 65.73, "learning_rate": 1.1235728155339806e-05, "loss": 0.0913, "step": 169260 }, { "epoch": 65.74, "learning_rate": 1.1235210355987056e-05, "loss": 0.0852, "step": 169270 }, { "epoch": 65.74, "learning_rate": 1.1234692556634306e-05, "loss": 0.0498, "step": 169280 }, { "epoch": 65.74, "learning_rate": 1.1234174757281555e-05, "loss": 0.0784, "step": 169290 }, { "epoch": 65.75, "learning_rate": 1.1233656957928805e-05, "loss": 0.0513, "step": 169300 }, { "epoch": 65.75, "learning_rate": 1.1233139158576051e-05, "loss": 0.0756, "step": 169310 }, { "epoch": 65.76, "learning_rate": 1.12326213592233e-05, "loss": 0.1284, "step": 169320 }, { "epoch": 65.76, "learning_rate": 1.123210355987055e-05, "loss": 0.0289, "step": 169330 }, { "epoch": 65.76, "learning_rate": 1.12315857605178e-05, "loss": 0.0618, "step": 169340 }, { "epoch": 65.77, "learning_rate": 1.123106796116505e-05, "loss": 0.0217, "step": 169350 }, { "epoch": 65.77, "learning_rate": 1.1230550161812299e-05, "loss": 0.0328, "step": 169360 }, { "epoch": 65.77, "learning_rate": 1.1230032362459549e-05, "loss": 0.0212, "step": 169370 }, { "epoch": 65.78, "learning_rate": 1.1229514563106798e-05, "loss": 0.0109, "step": 169380 }, { "epoch": 65.78, "learning_rate": 1.1228996763754045e-05, "loss": 0.0803, "step": 169390 }, { "epoch": 65.79, "learning_rate": 1.1228478964401294e-05, "loss": 0.0755, "step": 169400 }, { "epoch": 65.79, "learning_rate": 1.1227961165048544e-05, "loss": 0.2043, "step": 169410 }, { "epoch": 65.79, "learning_rate": 1.1227443365695793e-05, "loss": 0.0455, "step": 169420 }, { "epoch": 65.8, "learning_rate": 1.1226925566343043e-05, "loss": 0.0441, "step": 169430 }, { "epoch": 65.8, "learning_rate": 1.1226407766990293e-05, "loss": 0.0118, "step": 169440 }, { "epoch": 65.81, "learning_rate": 1.1225889967637542e-05, "loss": 0.0548, "step": 169450 }, { "epoch": 65.81, "learning_rate": 1.1225372168284792e-05, "loss": 0.0754, "step": 169460 }, { "epoch": 65.81, "learning_rate": 1.1224854368932038e-05, "loss": 0.0244, "step": 169470 }, { "epoch": 65.82, "learning_rate": 1.1224336569579288e-05, "loss": 0.0006, "step": 169480 }, { "epoch": 65.82, "learning_rate": 1.1223818770226537e-05, "loss": 0.0816, "step": 169490 }, { "epoch": 65.83, "learning_rate": 1.1223300970873787e-05, "loss": 0.0633, "step": 169500 }, { "epoch": 65.83, "learning_rate": 1.1222783171521037e-05, "loss": 0.1546, "step": 169510 }, { "epoch": 65.83, "learning_rate": 1.1222265372168286e-05, "loss": 0.0533, "step": 169520 }, { "epoch": 65.84, "learning_rate": 1.1221747572815536e-05, "loss": 0.0747, "step": 169530 }, { "epoch": 65.84, "learning_rate": 1.1221229773462785e-05, "loss": 0.1262, "step": 169540 }, { "epoch": 65.84, "learning_rate": 1.1220711974110032e-05, "loss": 0.2004, "step": 169550 }, { "epoch": 65.85, "learning_rate": 1.1220194174757281e-05, "loss": 0.2035, "step": 169560 }, { "epoch": 65.85, "learning_rate": 1.1219676375404531e-05, "loss": 0.0413, "step": 169570 }, { "epoch": 65.86, "learning_rate": 1.121915857605178e-05, "loss": 0.0593, "step": 169580 }, { "epoch": 65.86, "learning_rate": 1.121864077669903e-05, "loss": 0.0388, "step": 169590 }, { "epoch": 65.86, "learning_rate": 1.121812297734628e-05, "loss": 0.066, "step": 169600 }, { "epoch": 65.87, "learning_rate": 1.121760517799353e-05, "loss": 0.0605, "step": 169610 }, { "epoch": 65.87, "learning_rate": 1.1217087378640779e-05, "loss": 0.0115, "step": 169620 }, { "epoch": 65.88, "learning_rate": 1.1216569579288025e-05, "loss": 0.0758, "step": 169630 }, { "epoch": 65.88, "learning_rate": 1.1216051779935275e-05, "loss": 0.0616, "step": 169640 }, { "epoch": 65.88, "learning_rate": 1.1215533980582525e-05, "loss": 0.0375, "step": 169650 }, { "epoch": 65.89, "learning_rate": 1.1215016181229774e-05, "loss": 0.0406, "step": 169660 }, { "epoch": 65.89, "learning_rate": 1.1214498381877024e-05, "loss": 0.0152, "step": 169670 }, { "epoch": 65.9, "learning_rate": 1.1213980582524273e-05, "loss": 0.0099, "step": 169680 }, { "epoch": 65.9, "learning_rate": 1.1213462783171523e-05, "loss": 0.0812, "step": 169690 }, { "epoch": 65.9, "learning_rate": 1.1212944983818773e-05, "loss": 0.0689, "step": 169700 }, { "epoch": 65.91, "learning_rate": 1.1212427184466019e-05, "loss": 0.0514, "step": 169710 }, { "epoch": 65.91, "learning_rate": 1.1211909385113268e-05, "loss": 0.0185, "step": 169720 }, { "epoch": 65.91, "learning_rate": 1.1211391585760518e-05, "loss": 0.0466, "step": 169730 }, { "epoch": 65.92, "learning_rate": 1.1210873786407768e-05, "loss": 0.0966, "step": 169740 }, { "epoch": 65.92, "learning_rate": 1.1210355987055017e-05, "loss": 0.1537, "step": 169750 }, { "epoch": 65.93, "learning_rate": 1.1209838187702267e-05, "loss": 0.1197, "step": 169760 }, { "epoch": 65.93, "learning_rate": 1.1209320388349517e-05, "loss": 0.1824, "step": 169770 }, { "epoch": 65.93, "learning_rate": 1.1208802588996766e-05, "loss": 0.0434, "step": 169780 }, { "epoch": 65.94, "learning_rate": 1.1208284789644012e-05, "loss": 0.0166, "step": 169790 }, { "epoch": 65.94, "learning_rate": 1.1207766990291262e-05, "loss": 0.0263, "step": 169800 }, { "epoch": 65.95, "learning_rate": 1.1207249190938512e-05, "loss": 0.0941, "step": 169810 }, { "epoch": 65.95, "learning_rate": 1.1206731391585761e-05, "loss": 0.1069, "step": 169820 }, { "epoch": 65.95, "learning_rate": 1.1206213592233011e-05, "loss": 0.0086, "step": 169830 }, { "epoch": 65.96, "learning_rate": 1.120569579288026e-05, "loss": 0.0293, "step": 169840 }, { "epoch": 65.96, "learning_rate": 1.120517799352751e-05, "loss": 0.0317, "step": 169850 }, { "epoch": 65.97, "learning_rate": 1.120466019417476e-05, "loss": 0.0217, "step": 169860 }, { "epoch": 65.97, "learning_rate": 1.120414239482201e-05, "loss": 0.0499, "step": 169870 }, { "epoch": 65.97, "learning_rate": 1.1203624595469256e-05, "loss": 0.2496, "step": 169880 }, { "epoch": 65.98, "learning_rate": 1.1203106796116505e-05, "loss": 0.1562, "step": 169890 }, { "epoch": 65.98, "learning_rate": 1.1202588996763755e-05, "loss": 0.0027, "step": 169900 }, { "epoch": 65.98, "learning_rate": 1.1202071197411004e-05, "loss": 0.0026, "step": 169910 }, { "epoch": 65.99, "learning_rate": 1.1201553398058254e-05, "loss": 0.0693, "step": 169920 }, { "epoch": 65.99, "learning_rate": 1.1201035598705504e-05, "loss": 0.0842, "step": 169930 }, { "epoch": 66.0, "learning_rate": 1.1200517799352753e-05, "loss": 0.1062, "step": 169940 }, { "epoch": 66.0, "learning_rate": 1.1200000000000001e-05, "loss": 0.2337, "step": 169950 }, { "epoch": 66.0, "eval_accuracy": 0.951031636863824, "eval_loss": 0.3330360949039459, "eval_runtime": 8.1861, "eval_samples_per_second": 444.046, "eval_steps_per_second": 55.582, "step": 169950 }, { "epoch": 66.0, "learning_rate": 1.1199482200647249e-05, "loss": 0.0164, "step": 169960 }, { "epoch": 66.01, "learning_rate": 1.1198964401294499e-05, "loss": 0.0503, "step": 169970 }, { "epoch": 66.01, "learning_rate": 1.1198446601941748e-05, "loss": 0.0116, "step": 169980 }, { "epoch": 66.02, "learning_rate": 1.1197928802588998e-05, "loss": 0.0566, "step": 169990 }, { "epoch": 66.02, "learning_rate": 1.1197411003236248e-05, "loss": 0.0158, "step": 170000 }, { "epoch": 66.02, "learning_rate": 1.1196893203883497e-05, "loss": 0.0013, "step": 170010 }, { "epoch": 66.03, "learning_rate": 1.1196375404530747e-05, "loss": 0.0211, "step": 170020 }, { "epoch": 66.03, "learning_rate": 1.1195857605177995e-05, "loss": 0.0365, "step": 170030 }, { "epoch": 66.03, "learning_rate": 1.1195339805825243e-05, "loss": 0.063, "step": 170040 }, { "epoch": 66.04, "learning_rate": 1.1194822006472492e-05, "loss": 0.0729, "step": 170050 }, { "epoch": 66.04, "learning_rate": 1.1194304207119742e-05, "loss": 0.1178, "step": 170060 }, { "epoch": 66.05, "learning_rate": 1.1193786407766992e-05, "loss": 0.0285, "step": 170070 }, { "epoch": 66.05, "learning_rate": 1.1193268608414241e-05, "loss": 0.0782, "step": 170080 }, { "epoch": 66.05, "learning_rate": 1.119275080906149e-05, "loss": 0.0034, "step": 170090 }, { "epoch": 66.06, "learning_rate": 1.1192233009708739e-05, "loss": 0.143, "step": 170100 }, { "epoch": 66.06, "learning_rate": 1.1191715210355988e-05, "loss": 0.0576, "step": 170110 }, { "epoch": 66.07, "learning_rate": 1.1191197411003236e-05, "loss": 0.0518, "step": 170120 }, { "epoch": 66.07, "learning_rate": 1.1190679611650486e-05, "loss": 0.2071, "step": 170130 }, { "epoch": 66.07, "learning_rate": 1.1190161812297735e-05, "loss": 0.0009, "step": 170140 }, { "epoch": 66.08, "learning_rate": 1.1189644012944985e-05, "loss": 0.1267, "step": 170150 }, { "epoch": 66.08, "learning_rate": 1.1189126213592235e-05, "loss": 0.0587, "step": 170160 }, { "epoch": 66.09, "learning_rate": 1.1188608414239484e-05, "loss": 0.0269, "step": 170170 }, { "epoch": 66.09, "learning_rate": 1.1188090614886732e-05, "loss": 0.066, "step": 170180 }, { "epoch": 66.09, "learning_rate": 1.1187572815533982e-05, "loss": 0.0908, "step": 170190 }, { "epoch": 66.1, "learning_rate": 1.118705501618123e-05, "loss": 0.0442, "step": 170200 }, { "epoch": 66.1, "learning_rate": 1.118653721682848e-05, "loss": 0.0482, "step": 170210 }, { "epoch": 66.1, "learning_rate": 1.1186019417475729e-05, "loss": 0.0749, "step": 170220 }, { "epoch": 66.11, "learning_rate": 1.1185501618122979e-05, "loss": 0.0209, "step": 170230 }, { "epoch": 66.11, "learning_rate": 1.1184983818770228e-05, "loss": 0.0889, "step": 170240 }, { "epoch": 66.12, "learning_rate": 1.1184466019417476e-05, "loss": 0.0182, "step": 170250 }, { "epoch": 66.12, "learning_rate": 1.1183948220064726e-05, "loss": 0.002, "step": 170260 }, { "epoch": 66.12, "learning_rate": 1.1183430420711975e-05, "loss": 0.0438, "step": 170270 }, { "epoch": 66.13, "learning_rate": 1.1182912621359223e-05, "loss": 0.1083, "step": 170280 }, { "epoch": 66.13, "learning_rate": 1.1182394822006473e-05, "loss": 0.1071, "step": 170290 }, { "epoch": 66.14, "learning_rate": 1.1181877022653723e-05, "loss": 0.0628, "step": 170300 }, { "epoch": 66.14, "learning_rate": 1.1181359223300972e-05, "loss": 0.0773, "step": 170310 }, { "epoch": 66.14, "learning_rate": 1.1180841423948222e-05, "loss": 0.1106, "step": 170320 }, { "epoch": 66.15, "learning_rate": 1.118032362459547e-05, "loss": 0.0828, "step": 170330 }, { "epoch": 66.15, "learning_rate": 1.117980582524272e-05, "loss": 0.0522, "step": 170340 }, { "epoch": 66.16, "learning_rate": 1.1179288025889969e-05, "loss": 0.1227, "step": 170350 }, { "epoch": 66.16, "learning_rate": 1.1178770226537217e-05, "loss": 0.0621, "step": 170360 }, { "epoch": 66.16, "learning_rate": 1.1178252427184467e-05, "loss": 0.0111, "step": 170370 }, { "epoch": 66.17, "learning_rate": 1.1177734627831716e-05, "loss": 0.0042, "step": 170380 }, { "epoch": 66.17, "learning_rate": 1.1177216828478966e-05, "loss": 0.0007, "step": 170390 }, { "epoch": 66.17, "learning_rate": 1.1176699029126214e-05, "loss": 0.0772, "step": 170400 }, { "epoch": 66.18, "learning_rate": 1.1176181229773463e-05, "loss": 0.0739, "step": 170410 }, { "epoch": 66.18, "learning_rate": 1.1175663430420713e-05, "loss": 0.0437, "step": 170420 }, { "epoch": 66.19, "learning_rate": 1.1175145631067963e-05, "loss": 0.1578, "step": 170430 }, { "epoch": 66.19, "learning_rate": 1.1174627831715212e-05, "loss": 0.0933, "step": 170440 }, { "epoch": 66.19, "learning_rate": 1.117411003236246e-05, "loss": 0.077, "step": 170450 }, { "epoch": 66.2, "learning_rate": 1.117359223300971e-05, "loss": 0.0463, "step": 170460 }, { "epoch": 66.2, "learning_rate": 1.117307443365696e-05, "loss": 0.0957, "step": 170470 }, { "epoch": 66.21, "learning_rate": 1.1172556634304207e-05, "loss": 0.0933, "step": 170480 }, { "epoch": 66.21, "learning_rate": 1.1172038834951457e-05, "loss": 0.0436, "step": 170490 }, { "epoch": 66.21, "learning_rate": 1.1171521035598706e-05, "loss": 0.0162, "step": 170500 }, { "epoch": 66.22, "learning_rate": 1.1171003236245956e-05, "loss": 0.0407, "step": 170510 }, { "epoch": 66.22, "learning_rate": 1.1170485436893206e-05, "loss": 0.0451, "step": 170520 }, { "epoch": 66.23, "learning_rate": 1.1169967637540454e-05, "loss": 0.0996, "step": 170530 }, { "epoch": 66.23, "learning_rate": 1.1169449838187703e-05, "loss": 0.2134, "step": 170540 }, { "epoch": 66.23, "learning_rate": 1.1168932038834953e-05, "loss": 0.0433, "step": 170550 }, { "epoch": 66.24, "learning_rate": 1.11684142394822e-05, "loss": 0.0977, "step": 170560 }, { "epoch": 66.24, "learning_rate": 1.116789644012945e-05, "loss": 0.1031, "step": 170570 }, { "epoch": 66.24, "learning_rate": 1.11673786407767e-05, "loss": 0.0335, "step": 170580 }, { "epoch": 66.25, "learning_rate": 1.116686084142395e-05, "loss": 0.0237, "step": 170590 }, { "epoch": 66.25, "learning_rate": 1.11663430420712e-05, "loss": 0.0059, "step": 170600 }, { "epoch": 66.26, "learning_rate": 1.1165825242718447e-05, "loss": 0.0984, "step": 170610 }, { "epoch": 66.26, "learning_rate": 1.1165307443365697e-05, "loss": 0.0079, "step": 170620 }, { "epoch": 66.26, "learning_rate": 1.1164789644012945e-05, "loss": 0.0672, "step": 170630 }, { "epoch": 66.27, "learning_rate": 1.1164271844660194e-05, "loss": 0.0109, "step": 170640 }, { "epoch": 66.27, "learning_rate": 1.1163754045307444e-05, "loss": 0.0446, "step": 170650 }, { "epoch": 66.28, "learning_rate": 1.1163236245954694e-05, "loss": 0.0308, "step": 170660 }, { "epoch": 66.28, "learning_rate": 1.1162718446601943e-05, "loss": 0.048, "step": 170670 }, { "epoch": 66.28, "learning_rate": 1.1162200647249193e-05, "loss": 0.1441, "step": 170680 }, { "epoch": 66.29, "learning_rate": 1.116168284789644e-05, "loss": 0.0582, "step": 170690 }, { "epoch": 66.29, "learning_rate": 1.116116504854369e-05, "loss": 0.1504, "step": 170700 }, { "epoch": 66.3, "learning_rate": 1.1160647249190938e-05, "loss": 0.1974, "step": 170710 }, { "epoch": 66.3, "learning_rate": 1.1160129449838188e-05, "loss": 0.0586, "step": 170720 }, { "epoch": 66.3, "learning_rate": 1.1159611650485438e-05, "loss": 0.083, "step": 170730 }, { "epoch": 66.31, "learning_rate": 1.1159093851132687e-05, "loss": 0.1323, "step": 170740 }, { "epoch": 66.31, "learning_rate": 1.1158576051779937e-05, "loss": 0.0673, "step": 170750 }, { "epoch": 66.31, "learning_rate": 1.1158058252427186e-05, "loss": 0.0625, "step": 170760 }, { "epoch": 66.32, "learning_rate": 1.1157540453074434e-05, "loss": 0.0664, "step": 170770 }, { "epoch": 66.32, "learning_rate": 1.1157022653721682e-05, "loss": 0.0022, "step": 170780 }, { "epoch": 66.33, "learning_rate": 1.1156504854368932e-05, "loss": 0.1038, "step": 170790 }, { "epoch": 66.33, "learning_rate": 1.1155987055016181e-05, "loss": 0.1543, "step": 170800 }, { "epoch": 66.33, "learning_rate": 1.1155469255663431e-05, "loss": 0.0405, "step": 170810 }, { "epoch": 66.34, "learning_rate": 1.115495145631068e-05, "loss": 0.1025, "step": 170820 }, { "epoch": 66.34, "learning_rate": 1.115443365695793e-05, "loss": 0.0546, "step": 170830 }, { "epoch": 66.35, "learning_rate": 1.115391585760518e-05, "loss": 0.2046, "step": 170840 }, { "epoch": 66.35, "learning_rate": 1.1153398058252428e-05, "loss": 0.1409, "step": 170850 }, { "epoch": 66.35, "learning_rate": 1.1152880258899676e-05, "loss": 0.0124, "step": 170860 }, { "epoch": 66.36, "learning_rate": 1.1152362459546925e-05, "loss": 0.0861, "step": 170870 }, { "epoch": 66.36, "learning_rate": 1.1151844660194175e-05, "loss": 0.037, "step": 170880 }, { "epoch": 66.37, "learning_rate": 1.1151326860841425e-05, "loss": 0.0237, "step": 170890 }, { "epoch": 66.37, "learning_rate": 1.1150809061488674e-05, "loss": 0.07, "step": 170900 }, { "epoch": 66.37, "learning_rate": 1.1150291262135924e-05, "loss": 0.0118, "step": 170910 }, { "epoch": 66.38, "learning_rate": 1.1149773462783173e-05, "loss": 0.0501, "step": 170920 }, { "epoch": 66.38, "learning_rate": 1.114925566343042e-05, "loss": 0.0415, "step": 170930 }, { "epoch": 66.38, "learning_rate": 1.114873786407767e-05, "loss": 0.0208, "step": 170940 }, { "epoch": 66.39, "learning_rate": 1.1148220064724919e-05, "loss": 0.0374, "step": 170950 }, { "epoch": 66.39, "learning_rate": 1.1147702265372169e-05, "loss": 0.068, "step": 170960 }, { "epoch": 66.4, "learning_rate": 1.1147184466019418e-05, "loss": 0.0405, "step": 170970 }, { "epoch": 66.4, "learning_rate": 1.1146666666666668e-05, "loss": 0.0754, "step": 170980 }, { "epoch": 66.4, "learning_rate": 1.1146148867313917e-05, "loss": 0.1058, "step": 170990 }, { "epoch": 66.41, "learning_rate": 1.1145631067961167e-05, "loss": 0.0191, "step": 171000 }, { "epoch": 66.41, "learning_rate": 1.1145113268608417e-05, "loss": 0.1645, "step": 171010 }, { "epoch": 66.42, "learning_rate": 1.1144595469255663e-05, "loss": 0.0795, "step": 171020 }, { "epoch": 66.42, "learning_rate": 1.1144077669902913e-05, "loss": 0.1664, "step": 171030 }, { "epoch": 66.42, "learning_rate": 1.1143559870550162e-05, "loss": 0.0609, "step": 171040 }, { "epoch": 66.43, "learning_rate": 1.1143042071197412e-05, "loss": 0.1361, "step": 171050 }, { "epoch": 66.43, "learning_rate": 1.1142524271844661e-05, "loss": 0.061, "step": 171060 }, { "epoch": 66.43, "learning_rate": 1.1142006472491911e-05, "loss": 0.0413, "step": 171070 }, { "epoch": 66.44, "learning_rate": 1.114148867313916e-05, "loss": 0.0726, "step": 171080 }, { "epoch": 66.44, "learning_rate": 1.114097087378641e-05, "loss": 0.1758, "step": 171090 }, { "epoch": 66.45, "learning_rate": 1.1140453074433656e-05, "loss": 0.1055, "step": 171100 }, { "epoch": 66.45, "learning_rate": 1.1139935275080906e-05, "loss": 0.0907, "step": 171110 }, { "epoch": 66.45, "learning_rate": 1.1139417475728156e-05, "loss": 0.0158, "step": 171120 }, { "epoch": 66.46, "learning_rate": 1.1138899676375405e-05, "loss": 0.0445, "step": 171130 }, { "epoch": 66.46, "learning_rate": 1.1138381877022655e-05, "loss": 0.0049, "step": 171140 }, { "epoch": 66.47, "learning_rate": 1.1137864077669905e-05, "loss": 0.0002, "step": 171150 }, { "epoch": 66.47, "learning_rate": 1.1137346278317154e-05, "loss": 0.0015, "step": 171160 }, { "epoch": 66.47, "learning_rate": 1.1136828478964404e-05, "loss": 0.1283, "step": 171170 }, { "epoch": 66.48, "learning_rate": 1.113631067961165e-05, "loss": 0.2223, "step": 171180 }, { "epoch": 66.48, "learning_rate": 1.11357928802589e-05, "loss": 0.0186, "step": 171190 }, { "epoch": 66.49, "learning_rate": 1.113527508090615e-05, "loss": 0.0635, "step": 171200 }, { "epoch": 66.49, "learning_rate": 1.1134757281553399e-05, "loss": 0.0117, "step": 171210 }, { "epoch": 66.49, "learning_rate": 1.1134239482200648e-05, "loss": 0.0341, "step": 171220 }, { "epoch": 66.5, "learning_rate": 1.1133721682847898e-05, "loss": 0.1455, "step": 171230 }, { "epoch": 66.5, "learning_rate": 1.1133203883495148e-05, "loss": 0.2285, "step": 171240 }, { "epoch": 66.5, "learning_rate": 1.1132686084142397e-05, "loss": 0.2105, "step": 171250 }, { "epoch": 66.51, "learning_rate": 1.1132168284789644e-05, "loss": 0.0279, "step": 171260 }, { "epoch": 66.51, "learning_rate": 1.1131650485436893e-05, "loss": 0.1267, "step": 171270 }, { "epoch": 66.52, "learning_rate": 1.1131132686084143e-05, "loss": 0.0851, "step": 171280 }, { "epoch": 66.52, "learning_rate": 1.1130614886731392e-05, "loss": 0.2046, "step": 171290 }, { "epoch": 66.52, "learning_rate": 1.1130097087378642e-05, "loss": 0.0394, "step": 171300 }, { "epoch": 66.53, "learning_rate": 1.1129579288025892e-05, "loss": 0.1169, "step": 171310 }, { "epoch": 66.53, "learning_rate": 1.1129061488673141e-05, "loss": 0.0013, "step": 171320 }, { "epoch": 66.54, "learning_rate": 1.1128543689320391e-05, "loss": 0.0015, "step": 171330 }, { "epoch": 66.54, "learning_rate": 1.1128025889967637e-05, "loss": 0.1745, "step": 171340 }, { "epoch": 66.54, "learning_rate": 1.1127508090614887e-05, "loss": 0.1446, "step": 171350 }, { "epoch": 66.55, "learning_rate": 1.1126990291262136e-05, "loss": 0.1358, "step": 171360 }, { "epoch": 66.55, "learning_rate": 1.1126472491909386e-05, "loss": 0.0247, "step": 171370 }, { "epoch": 66.56, "learning_rate": 1.1125954692556636e-05, "loss": 0.0125, "step": 171380 }, { "epoch": 66.56, "learning_rate": 1.1125436893203885e-05, "loss": 0.0887, "step": 171390 }, { "epoch": 66.56, "learning_rate": 1.1124919093851135e-05, "loss": 0.0498, "step": 171400 }, { "epoch": 66.57, "learning_rate": 1.1124401294498384e-05, "loss": 0.0127, "step": 171410 }, { "epoch": 66.57, "learning_rate": 1.112388349514563e-05, "loss": 0.0069, "step": 171420 }, { "epoch": 66.57, "learning_rate": 1.112336569579288e-05, "loss": 0.0324, "step": 171430 }, { "epoch": 66.58, "learning_rate": 1.112284789644013e-05, "loss": 0.0845, "step": 171440 }, { "epoch": 66.58, "learning_rate": 1.112233009708738e-05, "loss": 0.1043, "step": 171450 }, { "epoch": 66.59, "learning_rate": 1.1121812297734629e-05, "loss": 0.0753, "step": 171460 }, { "epoch": 66.59, "learning_rate": 1.1121294498381879e-05, "loss": 0.0674, "step": 171470 }, { "epoch": 66.59, "learning_rate": 1.1120776699029128e-05, "loss": 0.025, "step": 171480 }, { "epoch": 66.6, "learning_rate": 1.1120258899676378e-05, "loss": 0.1231, "step": 171490 }, { "epoch": 66.6, "learning_rate": 1.1119741100323624e-05, "loss": 0.1318, "step": 171500 }, { "epoch": 66.61, "learning_rate": 1.1119223300970874e-05, "loss": 0.101, "step": 171510 }, { "epoch": 66.61, "learning_rate": 1.1118705501618123e-05, "loss": 0.2335, "step": 171520 }, { "epoch": 66.61, "learning_rate": 1.1118187702265373e-05, "loss": 0.0701, "step": 171530 }, { "epoch": 66.62, "learning_rate": 1.1117669902912623e-05, "loss": 0.0102, "step": 171540 }, { "epoch": 66.62, "learning_rate": 1.1117152103559872e-05, "loss": 0.1691, "step": 171550 }, { "epoch": 66.63, "learning_rate": 1.1116634304207122e-05, "loss": 0.1114, "step": 171560 }, { "epoch": 66.63, "learning_rate": 1.111611650485437e-05, "loss": 0.026, "step": 171570 }, { "epoch": 66.63, "learning_rate": 1.111559870550162e-05, "loss": 0.1072, "step": 171580 }, { "epoch": 66.64, "learning_rate": 1.1115080906148867e-05, "loss": 0.1495, "step": 171590 }, { "epoch": 66.64, "learning_rate": 1.1114563106796117e-05, "loss": 0.0911, "step": 171600 }, { "epoch": 66.64, "learning_rate": 1.1114045307443367e-05, "loss": 0.0192, "step": 171610 }, { "epoch": 66.65, "learning_rate": 1.1113527508090616e-05, "loss": 0.0081, "step": 171620 }, { "epoch": 66.65, "learning_rate": 1.1113009708737866e-05, "loss": 0.0451, "step": 171630 }, { "epoch": 66.66, "learning_rate": 1.1112491909385115e-05, "loss": 0.0809, "step": 171640 }, { "epoch": 66.66, "learning_rate": 1.1111974110032363e-05, "loss": 0.0974, "step": 171650 }, { "epoch": 66.66, "learning_rate": 1.1111456310679613e-05, "loss": 0.017, "step": 171660 }, { "epoch": 66.67, "learning_rate": 1.1110938511326861e-05, "loss": 0.0308, "step": 171670 }, { "epoch": 66.67, "learning_rate": 1.111042071197411e-05, "loss": 0.0975, "step": 171680 }, { "epoch": 66.68, "learning_rate": 1.110990291262136e-05, "loss": 0.0897, "step": 171690 }, { "epoch": 66.68, "learning_rate": 1.110938511326861e-05, "loss": 0.0152, "step": 171700 }, { "epoch": 66.68, "learning_rate": 1.110886731391586e-05, "loss": 0.0015, "step": 171710 }, { "epoch": 66.69, "learning_rate": 1.1108349514563107e-05, "loss": 0.0261, "step": 171720 }, { "epoch": 66.69, "learning_rate": 1.1107831715210357e-05, "loss": 0.0923, "step": 171730 }, { "epoch": 66.7, "learning_rate": 1.1107313915857607e-05, "loss": 0.0017, "step": 171740 }, { "epoch": 66.7, "learning_rate": 1.1106796116504855e-05, "loss": 0.122, "step": 171750 }, { "epoch": 66.7, "learning_rate": 1.1106278317152104e-05, "loss": 0.0738, "step": 171760 }, { "epoch": 66.71, "learning_rate": 1.1105760517799354e-05, "loss": 0.0724, "step": 171770 }, { "epoch": 66.71, "learning_rate": 1.1105242718446603e-05, "loss": 0.2447, "step": 171780 }, { "epoch": 66.71, "learning_rate": 1.1104724919093853e-05, "loss": 0.0292, "step": 171790 }, { "epoch": 66.72, "learning_rate": 1.1104207119741101e-05, "loss": 0.1115, "step": 171800 }, { "epoch": 66.72, "learning_rate": 1.110368932038835e-05, "loss": 0.2066, "step": 171810 }, { "epoch": 66.73, "learning_rate": 1.11031715210356e-05, "loss": 0.0004, "step": 171820 }, { "epoch": 66.73, "learning_rate": 1.1102653721682848e-05, "loss": 0.0207, "step": 171830 }, { "epoch": 66.73, "learning_rate": 1.1102135922330098e-05, "loss": 0.0908, "step": 171840 }, { "epoch": 66.74, "learning_rate": 1.1101618122977347e-05, "loss": 0.1119, "step": 171850 }, { "epoch": 66.74, "learning_rate": 1.1101100323624597e-05, "loss": 0.0711, "step": 171860 }, { "epoch": 66.75, "learning_rate": 1.1100582524271845e-05, "loss": 0.0339, "step": 171870 }, { "epoch": 66.75, "learning_rate": 1.1100064724919094e-05, "loss": 0.0448, "step": 171880 }, { "epoch": 66.75, "learning_rate": 1.1099546925566344e-05, "loss": 0.0539, "step": 171890 }, { "epoch": 66.76, "learning_rate": 1.1099029126213594e-05, "loss": 0.0219, "step": 171900 }, { "epoch": 66.76, "learning_rate": 1.1098511326860842e-05, "loss": 0.0474, "step": 171910 }, { "epoch": 66.77, "learning_rate": 1.1097993527508091e-05, "loss": 0.0536, "step": 171920 }, { "epoch": 66.77, "learning_rate": 1.1097475728155341e-05, "loss": 0.0206, "step": 171930 }, { "epoch": 66.77, "learning_rate": 1.109695792880259e-05, "loss": 0.0395, "step": 171940 }, { "epoch": 66.78, "learning_rate": 1.1096440129449838e-05, "loss": 0.0552, "step": 171950 }, { "epoch": 66.78, "learning_rate": 1.1095922330097088e-05, "loss": 0.0133, "step": 171960 }, { "epoch": 66.78, "learning_rate": 1.1095404530744338e-05, "loss": 0.1071, "step": 171970 }, { "epoch": 66.79, "learning_rate": 1.1094886731391587e-05, "loss": 0.1399, "step": 171980 }, { "epoch": 66.79, "learning_rate": 1.1094368932038835e-05, "loss": 0.0279, "step": 171990 }, { "epoch": 66.8, "learning_rate": 1.1093851132686085e-05, "loss": 0.0141, "step": 172000 }, { "epoch": 66.8, "learning_rate": 1.1093333333333334e-05, "loss": 0.0029, "step": 172010 }, { "epoch": 66.8, "learning_rate": 1.1092815533980584e-05, "loss": 0.1476, "step": 172020 }, { "epoch": 66.81, "learning_rate": 1.1092297734627832e-05, "loss": 0.098, "step": 172030 }, { "epoch": 66.81, "learning_rate": 1.1091779935275082e-05, "loss": 0.0175, "step": 172040 }, { "epoch": 66.82, "learning_rate": 1.1091262135922331e-05, "loss": 0.0053, "step": 172050 }, { "epoch": 66.82, "learning_rate": 1.109074433656958e-05, "loss": 0.0947, "step": 172060 }, { "epoch": 66.82, "learning_rate": 1.1090226537216829e-05, "loss": 0.015, "step": 172070 }, { "epoch": 66.83, "learning_rate": 1.1089708737864078e-05, "loss": 0.0501, "step": 172080 }, { "epoch": 66.83, "learning_rate": 1.1089190938511328e-05, "loss": 0.186, "step": 172090 }, { "epoch": 66.83, "learning_rate": 1.1088673139158576e-05, "loss": 0.1847, "step": 172100 }, { "epoch": 66.84, "learning_rate": 1.1088155339805826e-05, "loss": 0.002, "step": 172110 }, { "epoch": 66.84, "learning_rate": 1.1087637540453075e-05, "loss": 0.0267, "step": 172120 }, { "epoch": 66.85, "learning_rate": 1.1087119741100325e-05, "loss": 0.0082, "step": 172130 }, { "epoch": 66.85, "learning_rate": 1.1086601941747574e-05, "loss": 0.0771, "step": 172140 }, { "epoch": 66.85, "learning_rate": 1.1086084142394824e-05, "loss": 0.0003, "step": 172150 }, { "epoch": 66.86, "learning_rate": 1.1085566343042072e-05, "loss": 0.0011, "step": 172160 }, { "epoch": 66.86, "learning_rate": 1.1085048543689322e-05, "loss": 0.0365, "step": 172170 }, { "epoch": 66.87, "learning_rate": 1.108453074433657e-05, "loss": 0.0116, "step": 172180 }, { "epoch": 66.87, "learning_rate": 1.1084012944983819e-05, "loss": 0.0195, "step": 172190 }, { "epoch": 66.87, "learning_rate": 1.1083495145631069e-05, "loss": 0.1264, "step": 172200 }, { "epoch": 66.88, "learning_rate": 1.1082977346278318e-05, "loss": 0.1395, "step": 172210 }, { "epoch": 66.88, "learning_rate": 1.1082459546925568e-05, "loss": 0.0242, "step": 172220 }, { "epoch": 66.89, "learning_rate": 1.1081941747572818e-05, "loss": 0.1665, "step": 172230 }, { "epoch": 66.89, "learning_rate": 1.1081423948220065e-05, "loss": 0.076, "step": 172240 }, { "epoch": 66.89, "learning_rate": 1.1080906148867313e-05, "loss": 0.1804, "step": 172250 }, { "epoch": 66.9, "learning_rate": 1.1080388349514563e-05, "loss": 0.095, "step": 172260 }, { "epoch": 66.9, "learning_rate": 1.1079870550161813e-05, "loss": 0.2112, "step": 172270 }, { "epoch": 66.9, "learning_rate": 1.1079352750809062e-05, "loss": 0.0839, "step": 172280 }, { "epoch": 66.91, "learning_rate": 1.1078834951456312e-05, "loss": 0.2411, "step": 172290 }, { "epoch": 66.91, "learning_rate": 1.1078317152103561e-05, "loss": 0.1247, "step": 172300 }, { "epoch": 66.92, "learning_rate": 1.1077799352750811e-05, "loss": 0.0033, "step": 172310 }, { "epoch": 66.92, "learning_rate": 1.1077281553398059e-05, "loss": 0.067, "step": 172320 }, { "epoch": 66.92, "learning_rate": 1.1076763754045307e-05, "loss": 0.1136, "step": 172330 }, { "epoch": 66.93, "learning_rate": 1.1076245954692557e-05, "loss": 0.1222, "step": 172340 }, { "epoch": 66.93, "learning_rate": 1.1075728155339806e-05, "loss": 0.0321, "step": 172350 }, { "epoch": 66.94, "learning_rate": 1.1075210355987056e-05, "loss": 0.1564, "step": 172360 }, { "epoch": 66.94, "learning_rate": 1.1074692556634305e-05, "loss": 0.0377, "step": 172370 }, { "epoch": 66.94, "learning_rate": 1.1074174757281555e-05, "loss": 0.0049, "step": 172380 }, { "epoch": 66.95, "learning_rate": 1.1073656957928805e-05, "loss": 0.1062, "step": 172390 }, { "epoch": 66.95, "learning_rate": 1.1073139158576051e-05, "loss": 0.0274, "step": 172400 }, { "epoch": 66.96, "learning_rate": 1.10726213592233e-05, "loss": 0.0274, "step": 172410 }, { "epoch": 66.96, "learning_rate": 1.107210355987055e-05, "loss": 0.0852, "step": 172420 }, { "epoch": 66.96, "learning_rate": 1.10715857605178e-05, "loss": 0.0612, "step": 172430 }, { "epoch": 66.97, "learning_rate": 1.107106796116505e-05, "loss": 0.0247, "step": 172440 }, { "epoch": 66.97, "learning_rate": 1.1070550161812299e-05, "loss": 0.1255, "step": 172450 }, { "epoch": 66.97, "learning_rate": 1.1070032362459549e-05, "loss": 0.1527, "step": 172460 }, { "epoch": 66.98, "learning_rate": 1.1069514563106798e-05, "loss": 0.0109, "step": 172470 }, { "epoch": 66.98, "learning_rate": 1.1068996763754044e-05, "loss": 0.0006, "step": 172480 }, { "epoch": 66.99, "learning_rate": 1.1068478964401294e-05, "loss": 0.0131, "step": 172490 }, { "epoch": 66.99, "learning_rate": 1.1067961165048544e-05, "loss": 0.1492, "step": 172500 }, { "epoch": 66.99, "learning_rate": 1.1067443365695793e-05, "loss": 0.0331, "step": 172510 }, { "epoch": 67.0, "learning_rate": 1.1066925566343043e-05, "loss": 0.0705, "step": 172520 }, { "epoch": 67.0, "eval_accuracy": 0.951031636863824, "eval_loss": 0.3480420410633087, "eval_runtime": 8.2142, "eval_samples_per_second": 442.526, "eval_steps_per_second": 55.392, "step": 172525 }, { "epoch": 67.0, "learning_rate": 1.1066407766990293e-05, "loss": 0.0972, "step": 172530 }, { "epoch": 67.01, "learning_rate": 1.1065889967637542e-05, "loss": 0.0404, "step": 172540 }, { "epoch": 67.01, "learning_rate": 1.1065372168284792e-05, "loss": 0.0024, "step": 172550 }, { "epoch": 67.01, "learning_rate": 1.1064854368932038e-05, "loss": 0.0102, "step": 172560 }, { "epoch": 67.02, "learning_rate": 1.1064336569579288e-05, "loss": 0.0512, "step": 172570 }, { "epoch": 67.02, "learning_rate": 1.1063818770226537e-05, "loss": 0.0289, "step": 172580 }, { "epoch": 67.03, "learning_rate": 1.1063300970873787e-05, "loss": 0.0619, "step": 172590 }, { "epoch": 67.03, "learning_rate": 1.1062783171521036e-05, "loss": 0.0193, "step": 172600 }, { "epoch": 67.03, "learning_rate": 1.1062265372168286e-05, "loss": 0.0691, "step": 172610 }, { "epoch": 67.04, "learning_rate": 1.1061747572815536e-05, "loss": 0.0774, "step": 172620 }, { "epoch": 67.04, "learning_rate": 1.1061229773462785e-05, "loss": 0.2546, "step": 172630 }, { "epoch": 67.04, "learning_rate": 1.1060711974110032e-05, "loss": 0.1219, "step": 172640 }, { "epoch": 67.05, "learning_rate": 1.1060194174757281e-05, "loss": 0.0488, "step": 172650 }, { "epoch": 67.05, "learning_rate": 1.105967637540453e-05, "loss": 0.1328, "step": 172660 }, { "epoch": 67.06, "learning_rate": 1.105915857605178e-05, "loss": 0.0984, "step": 172670 }, { "epoch": 67.06, "learning_rate": 1.105864077669903e-05, "loss": 0.0712, "step": 172680 }, { "epoch": 67.06, "learning_rate": 1.105812297734628e-05, "loss": 0.0644, "step": 172690 }, { "epoch": 67.07, "learning_rate": 1.105760517799353e-05, "loss": 0.0639, "step": 172700 }, { "epoch": 67.07, "learning_rate": 1.1057087378640779e-05, "loss": 0.0782, "step": 172710 }, { "epoch": 67.08, "learning_rate": 1.1056569579288028e-05, "loss": 0.0248, "step": 172720 }, { "epoch": 67.08, "learning_rate": 1.1056051779935275e-05, "loss": 0.0133, "step": 172730 }, { "epoch": 67.08, "learning_rate": 1.1055533980582524e-05, "loss": 0.0332, "step": 172740 }, { "epoch": 67.09, "learning_rate": 1.1055016181229774e-05, "loss": 0.0132, "step": 172750 }, { "epoch": 67.09, "learning_rate": 1.1054498381877024e-05, "loss": 0.0821, "step": 172760 }, { "epoch": 67.1, "learning_rate": 1.1053980582524273e-05, "loss": 0.1764, "step": 172770 }, { "epoch": 67.1, "learning_rate": 1.1053462783171523e-05, "loss": 0.1969, "step": 172780 }, { "epoch": 67.1, "learning_rate": 1.1052944983818772e-05, "loss": 0.0243, "step": 172790 }, { "epoch": 67.11, "learning_rate": 1.1052427184466022e-05, "loss": 0.0519, "step": 172800 }, { "epoch": 67.11, "learning_rate": 1.1051909385113268e-05, "loss": 0.0094, "step": 172810 }, { "epoch": 67.11, "learning_rate": 1.1051391585760518e-05, "loss": 0.0488, "step": 172820 }, { "epoch": 67.12, "learning_rate": 1.1050873786407768e-05, "loss": 0.0763, "step": 172830 }, { "epoch": 67.12, "learning_rate": 1.1050355987055017e-05, "loss": 0.1604, "step": 172840 }, { "epoch": 67.13, "learning_rate": 1.1049838187702267e-05, "loss": 0.092, "step": 172850 }, { "epoch": 67.13, "learning_rate": 1.1049320388349516e-05, "loss": 0.001, "step": 172860 }, { "epoch": 67.13, "learning_rate": 1.1048802588996766e-05, "loss": 0.0004, "step": 172870 }, { "epoch": 67.14, "learning_rate": 1.1048284789644016e-05, "loss": 0.0261, "step": 172880 }, { "epoch": 67.14, "learning_rate": 1.1047766990291262e-05, "loss": 0.0539, "step": 172890 }, { "epoch": 67.15, "learning_rate": 1.1047249190938511e-05, "loss": 0.0009, "step": 172900 }, { "epoch": 67.15, "learning_rate": 1.1046731391585761e-05, "loss": 0.1068, "step": 172910 }, { "epoch": 67.15, "learning_rate": 1.104621359223301e-05, "loss": 0.012, "step": 172920 }, { "epoch": 67.16, "learning_rate": 1.104569579288026e-05, "loss": 0.0639, "step": 172930 }, { "epoch": 67.16, "learning_rate": 1.104517799352751e-05, "loss": 0.051, "step": 172940 }, { "epoch": 67.17, "learning_rate": 1.104466019417476e-05, "loss": 0.1596, "step": 172950 }, { "epoch": 67.17, "learning_rate": 1.104414239482201e-05, "loss": 0.1444, "step": 172960 }, { "epoch": 67.17, "learning_rate": 1.1043624595469255e-05, "loss": 0.0764, "step": 172970 }, { "epoch": 67.18, "learning_rate": 1.1043106796116505e-05, "loss": 0.0726, "step": 172980 }, { "epoch": 67.18, "learning_rate": 1.1042588996763755e-05, "loss": 0.0977, "step": 172990 }, { "epoch": 67.18, "learning_rate": 1.1042071197411004e-05, "loss": 0.1265, "step": 173000 }, { "epoch": 67.19, "learning_rate": 1.1041553398058254e-05, "loss": 0.0793, "step": 173010 }, { "epoch": 67.19, "learning_rate": 1.1041035598705503e-05, "loss": 0.1177, "step": 173020 }, { "epoch": 67.2, "learning_rate": 1.1040517799352753e-05, "loss": 0.0953, "step": 173030 }, { "epoch": 67.2, "learning_rate": 1.1040000000000001e-05, "loss": 0.0671, "step": 173040 }, { "epoch": 67.2, "learning_rate": 1.1039482200647249e-05, "loss": 0.1165, "step": 173050 }, { "epoch": 67.21, "learning_rate": 1.1038964401294499e-05, "loss": 0.0599, "step": 173060 }, { "epoch": 67.21, "learning_rate": 1.1038446601941748e-05, "loss": 0.0238, "step": 173070 }, { "epoch": 67.22, "learning_rate": 1.1037928802588998e-05, "loss": 0.1281, "step": 173080 }, { "epoch": 67.22, "learning_rate": 1.1037411003236247e-05, "loss": 0.1539, "step": 173090 }, { "epoch": 67.22, "learning_rate": 1.1036893203883497e-05, "loss": 0.1116, "step": 173100 }, { "epoch": 67.23, "learning_rate": 1.1036375404530747e-05, "loss": 0.0604, "step": 173110 }, { "epoch": 67.23, "learning_rate": 1.1035857605177995e-05, "loss": 0.0868, "step": 173120 }, { "epoch": 67.23, "learning_rate": 1.1035339805825243e-05, "loss": 0.0786, "step": 173130 }, { "epoch": 67.24, "learning_rate": 1.1034822006472492e-05, "loss": 0.0601, "step": 173140 }, { "epoch": 67.24, "learning_rate": 1.1034304207119742e-05, "loss": 0.0702, "step": 173150 }, { "epoch": 67.25, "learning_rate": 1.1033786407766991e-05, "loss": 0.0052, "step": 173160 }, { "epoch": 67.25, "learning_rate": 1.1033268608414241e-05, "loss": 0.1556, "step": 173170 }, { "epoch": 67.25, "learning_rate": 1.103275080906149e-05, "loss": 0.1069, "step": 173180 }, { "epoch": 67.26, "learning_rate": 1.1032233009708739e-05, "loss": 0.055, "step": 173190 }, { "epoch": 67.26, "learning_rate": 1.1031715210355988e-05, "loss": 0.0427, "step": 173200 }, { "epoch": 67.27, "learning_rate": 1.1031197411003236e-05, "loss": 0.0285, "step": 173210 }, { "epoch": 67.27, "learning_rate": 1.1030679611650486e-05, "loss": 0.1133, "step": 173220 }, { "epoch": 67.27, "learning_rate": 1.1030161812297735e-05, "loss": 0.0929, "step": 173230 }, { "epoch": 67.28, "learning_rate": 1.1029644012944985e-05, "loss": 0.1059, "step": 173240 }, { "epoch": 67.28, "learning_rate": 1.1029126213592235e-05, "loss": 0.0419, "step": 173250 }, { "epoch": 67.29, "learning_rate": 1.1028608414239484e-05, "loss": 0.0102, "step": 173260 }, { "epoch": 67.29, "learning_rate": 1.1028090614886732e-05, "loss": 0.0928, "step": 173270 }, { "epoch": 67.29, "learning_rate": 1.1027572815533982e-05, "loss": 0.0689, "step": 173280 }, { "epoch": 67.3, "learning_rate": 1.1027055016181231e-05, "loss": 0.0609, "step": 173290 }, { "epoch": 67.3, "learning_rate": 1.102653721682848e-05, "loss": 0.1797, "step": 173300 }, { "epoch": 67.3, "learning_rate": 1.1026019417475729e-05, "loss": 0.1429, "step": 173310 }, { "epoch": 67.31, "learning_rate": 1.1025501618122978e-05, "loss": 0.0004, "step": 173320 }, { "epoch": 67.31, "learning_rate": 1.1024983818770228e-05, "loss": 0.0121, "step": 173330 }, { "epoch": 67.32, "learning_rate": 1.1024466019417476e-05, "loss": 0.0663, "step": 173340 }, { "epoch": 67.32, "learning_rate": 1.1023948220064726e-05, "loss": 0.0226, "step": 173350 }, { "epoch": 67.32, "learning_rate": 1.1023430420711975e-05, "loss": 0.1278, "step": 173360 }, { "epoch": 67.33, "learning_rate": 1.1022912621359225e-05, "loss": 0.0542, "step": 173370 }, { "epoch": 67.33, "learning_rate": 1.1022394822006473e-05, "loss": 0.0536, "step": 173380 }, { "epoch": 67.34, "learning_rate": 1.1021877022653722e-05, "loss": 0.0638, "step": 173390 }, { "epoch": 67.34, "learning_rate": 1.1021359223300972e-05, "loss": 0.0365, "step": 173400 }, { "epoch": 67.34, "learning_rate": 1.1020841423948222e-05, "loss": 0.0777, "step": 173410 }, { "epoch": 67.35, "learning_rate": 1.102032362459547e-05, "loss": 0.082, "step": 173420 }, { "epoch": 67.35, "learning_rate": 1.101980582524272e-05, "loss": 0.0333, "step": 173430 }, { "epoch": 67.36, "learning_rate": 1.1019288025889969e-05, "loss": 0.0322, "step": 173440 }, { "epoch": 67.36, "learning_rate": 1.1018770226537218e-05, "loss": 0.0205, "step": 173450 }, { "epoch": 67.36, "learning_rate": 1.1018252427184466e-05, "loss": 0.0049, "step": 173460 }, { "epoch": 67.37, "learning_rate": 1.1017734627831716e-05, "loss": 0.0334, "step": 173470 }, { "epoch": 67.37, "learning_rate": 1.1017216828478966e-05, "loss": 0.0287, "step": 173480 }, { "epoch": 67.37, "learning_rate": 1.1016699029126215e-05, "loss": 0.1505, "step": 173490 }, { "epoch": 67.38, "learning_rate": 1.1016181229773463e-05, "loss": 0.1216, "step": 173500 }, { "epoch": 67.38, "learning_rate": 1.1015663430420713e-05, "loss": 0.0119, "step": 173510 }, { "epoch": 67.39, "learning_rate": 1.1015145631067962e-05, "loss": 0.0738, "step": 173520 }, { "epoch": 67.39, "learning_rate": 1.1014627831715212e-05, "loss": 0.001, "step": 173530 }, { "epoch": 67.39, "learning_rate": 1.101411003236246e-05, "loss": 0.0871, "step": 173540 }, { "epoch": 67.4, "learning_rate": 1.101359223300971e-05, "loss": 0.1601, "step": 173550 }, { "epoch": 67.4, "learning_rate": 1.1013074433656959e-05, "loss": 0.0548, "step": 173560 }, { "epoch": 67.41, "learning_rate": 1.1012556634304207e-05, "loss": 0.2329, "step": 173570 }, { "epoch": 67.41, "learning_rate": 1.1012038834951457e-05, "loss": 0.0693, "step": 173580 }, { "epoch": 67.41, "learning_rate": 1.1011521035598706e-05, "loss": 0.0354, "step": 173590 }, { "epoch": 67.42, "learning_rate": 1.1011003236245956e-05, "loss": 0.0194, "step": 173600 }, { "epoch": 67.42, "learning_rate": 1.1010485436893206e-05, "loss": 0.0887, "step": 173610 }, { "epoch": 67.43, "learning_rate": 1.1009967637540453e-05, "loss": 0.01, "step": 173620 }, { "epoch": 67.43, "learning_rate": 1.1009449838187703e-05, "loss": 0.0429, "step": 173630 }, { "epoch": 67.43, "learning_rate": 1.1008932038834953e-05, "loss": 0.0305, "step": 173640 }, { "epoch": 67.44, "learning_rate": 1.10084142394822e-05, "loss": 0.039, "step": 173650 }, { "epoch": 67.44, "learning_rate": 1.100789644012945e-05, "loss": 0.1512, "step": 173660 }, { "epoch": 67.44, "learning_rate": 1.10073786407767e-05, "loss": 0.0337, "step": 173670 }, { "epoch": 67.45, "learning_rate": 1.100686084142395e-05, "loss": 0.0353, "step": 173680 }, { "epoch": 67.45, "learning_rate": 1.1006343042071199e-05, "loss": 0.1227, "step": 173690 }, { "epoch": 67.46, "learning_rate": 1.1005825242718447e-05, "loss": 0.002, "step": 173700 }, { "epoch": 67.46, "learning_rate": 1.1005307443365697e-05, "loss": 0.0141, "step": 173710 }, { "epoch": 67.46, "learning_rate": 1.1004789644012945e-05, "loss": 0.1152, "step": 173720 }, { "epoch": 67.47, "learning_rate": 1.1004271844660194e-05, "loss": 0.0058, "step": 173730 }, { "epoch": 67.47, "learning_rate": 1.1003754045307444e-05, "loss": 0.0916, "step": 173740 }, { "epoch": 67.48, "learning_rate": 1.1003236245954693e-05, "loss": 0.0836, "step": 173750 }, { "epoch": 67.48, "learning_rate": 1.1002718446601943e-05, "loss": 0.0419, "step": 173760 }, { "epoch": 67.48, "learning_rate": 1.1002200647249193e-05, "loss": 0.0541, "step": 173770 }, { "epoch": 67.49, "learning_rate": 1.100168284789644e-05, "loss": 0.0057, "step": 173780 }, { "epoch": 67.49, "learning_rate": 1.100116504854369e-05, "loss": 0.2058, "step": 173790 }, { "epoch": 67.5, "learning_rate": 1.1000647249190938e-05, "loss": 0.0403, "step": 173800 }, { "epoch": 67.5, "learning_rate": 1.1000129449838188e-05, "loss": 0.0302, "step": 173810 }, { "epoch": 67.5, "learning_rate": 1.0999611650485437e-05, "loss": 0.0827, "step": 173820 }, { "epoch": 67.51, "learning_rate": 1.0999093851132687e-05, "loss": 0.1238, "step": 173830 }, { "epoch": 67.51, "learning_rate": 1.0998576051779937e-05, "loss": 0.0191, "step": 173840 }, { "epoch": 67.51, "learning_rate": 1.0998058252427186e-05, "loss": 0.0286, "step": 173850 }, { "epoch": 67.52, "learning_rate": 1.0997540453074436e-05, "loss": 0.0597, "step": 173860 }, { "epoch": 67.52, "learning_rate": 1.0997022653721682e-05, "loss": 0.225, "step": 173870 }, { "epoch": 67.53, "learning_rate": 1.0996504854368932e-05, "loss": 0.0507, "step": 173880 }, { "epoch": 67.53, "learning_rate": 1.0995987055016181e-05, "loss": 0.122, "step": 173890 }, { "epoch": 67.53, "learning_rate": 1.0995469255663431e-05, "loss": 0.0625, "step": 173900 }, { "epoch": 67.54, "learning_rate": 1.099495145631068e-05, "loss": 0.0768, "step": 173910 }, { "epoch": 67.54, "learning_rate": 1.099443365695793e-05, "loss": 0.1315, "step": 173920 }, { "epoch": 67.55, "learning_rate": 1.099391585760518e-05, "loss": 0.0007, "step": 173930 }, { "epoch": 67.55, "learning_rate": 1.099339805825243e-05, "loss": 0.105, "step": 173940 }, { "epoch": 67.55, "learning_rate": 1.0992880258899676e-05, "loss": 0.1466, "step": 173950 }, { "epoch": 67.56, "learning_rate": 1.0992362459546925e-05, "loss": 0.0134, "step": 173960 }, { "epoch": 67.56, "learning_rate": 1.0991844660194175e-05, "loss": 0.2258, "step": 173970 }, { "epoch": 67.57, "learning_rate": 1.0991326860841424e-05, "loss": 0.0361, "step": 173980 }, { "epoch": 67.57, "learning_rate": 1.0990809061488674e-05, "loss": 0.0927, "step": 173990 }, { "epoch": 67.57, "learning_rate": 1.0990291262135924e-05, "loss": 0.0536, "step": 174000 }, { "epoch": 67.58, "learning_rate": 1.0989773462783173e-05, "loss": 0.1109, "step": 174010 }, { "epoch": 67.58, "learning_rate": 1.0989255663430423e-05, "loss": 0.1175, "step": 174020 }, { "epoch": 67.58, "learning_rate": 1.098873786407767e-05, "loss": 0.0653, "step": 174030 }, { "epoch": 67.59, "learning_rate": 1.0988220064724919e-05, "loss": 0.0895, "step": 174040 }, { "epoch": 67.59, "learning_rate": 1.0987702265372168e-05, "loss": 0.0161, "step": 174050 }, { "epoch": 67.6, "learning_rate": 1.0987184466019418e-05, "loss": 0.0266, "step": 174060 }, { "epoch": 67.6, "learning_rate": 1.0986666666666668e-05, "loss": 0.0368, "step": 174070 }, { "epoch": 67.6, "learning_rate": 1.0986148867313917e-05, "loss": 0.041, "step": 174080 }, { "epoch": 67.61, "learning_rate": 1.0985631067961167e-05, "loss": 0.0173, "step": 174090 }, { "epoch": 67.61, "learning_rate": 1.0985113268608416e-05, "loss": 0.028, "step": 174100 }, { "epoch": 67.62, "learning_rate": 1.0984595469255663e-05, "loss": 0.105, "step": 174110 }, { "epoch": 67.62, "learning_rate": 1.0984077669902912e-05, "loss": 0.022, "step": 174120 }, { "epoch": 67.62, "learning_rate": 1.0983559870550162e-05, "loss": 0.0826, "step": 174130 }, { "epoch": 67.63, "learning_rate": 1.0983042071197412e-05, "loss": 0.0467, "step": 174140 }, { "epoch": 67.63, "learning_rate": 1.0982524271844661e-05, "loss": 0.0681, "step": 174150 }, { "epoch": 67.63, "learning_rate": 1.098200647249191e-05, "loss": 0.0338, "step": 174160 }, { "epoch": 67.64, "learning_rate": 1.098148867313916e-05, "loss": 0.068, "step": 174170 }, { "epoch": 67.64, "learning_rate": 1.098097087378641e-05, "loss": 0.0117, "step": 174180 }, { "epoch": 67.65, "learning_rate": 1.0980453074433656e-05, "loss": 0.0271, "step": 174190 }, { "epoch": 67.65, "learning_rate": 1.0979935275080906e-05, "loss": 0.0645, "step": 174200 }, { "epoch": 67.65, "learning_rate": 1.0979417475728156e-05, "loss": 0.0871, "step": 174210 }, { "epoch": 67.66, "learning_rate": 1.0978899676375405e-05, "loss": 0.0719, "step": 174220 }, { "epoch": 67.66, "learning_rate": 1.0978381877022655e-05, "loss": 0.0986, "step": 174230 }, { "epoch": 67.67, "learning_rate": 1.0977864077669904e-05, "loss": 0.0489, "step": 174240 }, { "epoch": 67.67, "learning_rate": 1.0977346278317154e-05, "loss": 0.0224, "step": 174250 }, { "epoch": 67.67, "learning_rate": 1.0976828478964404e-05, "loss": 0.0329, "step": 174260 }, { "epoch": 67.68, "learning_rate": 1.097631067961165e-05, "loss": 0.0007, "step": 174270 }, { "epoch": 67.68, "learning_rate": 1.09757928802589e-05, "loss": 0.0655, "step": 174280 }, { "epoch": 67.69, "learning_rate": 1.0975275080906149e-05, "loss": 0.1875, "step": 174290 }, { "epoch": 67.69, "learning_rate": 1.0974757281553399e-05, "loss": 0.0012, "step": 174300 }, { "epoch": 67.69, "learning_rate": 1.0974239482200648e-05, "loss": 0.0186, "step": 174310 }, { "epoch": 67.7, "learning_rate": 1.0973721682847898e-05, "loss": 0.0825, "step": 174320 }, { "epoch": 67.7, "learning_rate": 1.0973203883495148e-05, "loss": 0.0991, "step": 174330 }, { "epoch": 67.7, "learning_rate": 1.0972686084142397e-05, "loss": 0.1659, "step": 174340 }, { "epoch": 67.71, "learning_rate": 1.0972168284789647e-05, "loss": 0.0492, "step": 174350 }, { "epoch": 67.71, "learning_rate": 1.0971650485436893e-05, "loss": 0.1596, "step": 174360 }, { "epoch": 67.72, "learning_rate": 1.0971132686084143e-05, "loss": 0.0243, "step": 174370 }, { "epoch": 67.72, "learning_rate": 1.0970614886731392e-05, "loss": 0.0065, "step": 174380 }, { "epoch": 67.72, "learning_rate": 1.0970097087378642e-05, "loss": 0.1524, "step": 174390 }, { "epoch": 67.73, "learning_rate": 1.0969579288025891e-05, "loss": 0.1073, "step": 174400 }, { "epoch": 67.73, "learning_rate": 1.0969061488673141e-05, "loss": 0.1084, "step": 174410 }, { "epoch": 67.74, "learning_rate": 1.096854368932039e-05, "loss": 0.0105, "step": 174420 }, { "epoch": 67.74, "learning_rate": 1.096802588996764e-05, "loss": 0.0454, "step": 174430 }, { "epoch": 67.74, "learning_rate": 1.0967508090614887e-05, "loss": 0.0047, "step": 174440 }, { "epoch": 67.75, "learning_rate": 1.0966990291262136e-05, "loss": 0.0076, "step": 174450 }, { "epoch": 67.75, "learning_rate": 1.0966472491909386e-05, "loss": 0.0769, "step": 174460 }, { "epoch": 67.76, "learning_rate": 1.0965954692556635e-05, "loss": 0.0381, "step": 174470 }, { "epoch": 67.76, "learning_rate": 1.0965436893203885e-05, "loss": 0.0514, "step": 174480 }, { "epoch": 67.76, "learning_rate": 1.0964919093851135e-05, "loss": 0.0331, "step": 174490 }, { "epoch": 67.77, "learning_rate": 1.0964401294498384e-05, "loss": 0.0526, "step": 174500 }, { "epoch": 67.77, "learning_rate": 1.0963883495145632e-05, "loss": 0.0555, "step": 174510 }, { "epoch": 67.77, "learning_rate": 1.096336569579288e-05, "loss": 0.0015, "step": 174520 }, { "epoch": 67.78, "learning_rate": 1.096284789644013e-05, "loss": 0.1176, "step": 174530 }, { "epoch": 67.78, "learning_rate": 1.096233009708738e-05, "loss": 0.0623, "step": 174540 }, { "epoch": 67.79, "learning_rate": 1.0961812297734629e-05, "loss": 0.0521, "step": 174550 }, { "epoch": 67.79, "learning_rate": 1.0961294498381879e-05, "loss": 0.0197, "step": 174560 }, { "epoch": 67.79, "learning_rate": 1.0960776699029128e-05, "loss": 0.1512, "step": 174570 }, { "epoch": 67.8, "learning_rate": 1.0960258899676378e-05, "loss": 0.1197, "step": 174580 }, { "epoch": 67.8, "learning_rate": 1.0959741100323626e-05, "loss": 0.0872, "step": 174590 }, { "epoch": 67.81, "learning_rate": 1.0959223300970874e-05, "loss": 0.0945, "step": 174600 }, { "epoch": 67.81, "learning_rate": 1.0958705501618123e-05, "loss": 0.2249, "step": 174610 }, { "epoch": 67.81, "learning_rate": 1.0958187702265373e-05, "loss": 0.0177, "step": 174620 }, { "epoch": 67.82, "learning_rate": 1.0957669902912623e-05, "loss": 0.0366, "step": 174630 }, { "epoch": 67.82, "learning_rate": 1.0957152103559872e-05, "loss": 0.0075, "step": 174640 }, { "epoch": 67.83, "learning_rate": 1.0956634304207122e-05, "loss": 0.0324, "step": 174650 }, { "epoch": 67.83, "learning_rate": 1.095611650485437e-05, "loss": 0.0184, "step": 174660 }, { "epoch": 67.83, "learning_rate": 1.095559870550162e-05, "loss": 0.12, "step": 174670 }, { "epoch": 67.84, "learning_rate": 1.0955080906148867e-05, "loss": 0.1216, "step": 174680 }, { "epoch": 67.84, "learning_rate": 1.0954563106796117e-05, "loss": 0.1556, "step": 174690 }, { "epoch": 67.84, "learning_rate": 1.0954045307443366e-05, "loss": 0.1248, "step": 174700 }, { "epoch": 67.85, "learning_rate": 1.0953527508090616e-05, "loss": 0.1028, "step": 174710 }, { "epoch": 67.85, "learning_rate": 1.0953009708737866e-05, "loss": 0.1275, "step": 174720 }, { "epoch": 67.86, "learning_rate": 1.0952491909385115e-05, "loss": 0.0037, "step": 174730 }, { "epoch": 67.86, "learning_rate": 1.0951974110032363e-05, "loss": 0.1086, "step": 174740 }, { "epoch": 67.86, "learning_rate": 1.0951456310679613e-05, "loss": 0.0905, "step": 174750 }, { "epoch": 67.87, "learning_rate": 1.095093851132686e-05, "loss": 0.0035, "step": 174760 }, { "epoch": 67.87, "learning_rate": 1.095042071197411e-05, "loss": 0.1989, "step": 174770 }, { "epoch": 67.88, "learning_rate": 1.094990291262136e-05, "loss": 0.172, "step": 174780 }, { "epoch": 67.88, "learning_rate": 1.094938511326861e-05, "loss": 0.0627, "step": 174790 }, { "epoch": 67.88, "learning_rate": 1.094886731391586e-05, "loss": 0.0894, "step": 174800 }, { "epoch": 67.89, "learning_rate": 1.0948349514563107e-05, "loss": 0.0966, "step": 174810 }, { "epoch": 67.89, "learning_rate": 1.0947831715210357e-05, "loss": 0.0185, "step": 174820 }, { "epoch": 67.9, "learning_rate": 1.0947313915857606e-05, "loss": 0.0361, "step": 174830 }, { "epoch": 67.9, "learning_rate": 1.0946796116504854e-05, "loss": 0.0787, "step": 174840 }, { "epoch": 67.9, "learning_rate": 1.0946278317152104e-05, "loss": 0.0844, "step": 174850 }, { "epoch": 67.91, "learning_rate": 1.0945760517799354e-05, "loss": 0.0022, "step": 174860 }, { "epoch": 67.91, "learning_rate": 1.0945242718446603e-05, "loss": 0.0022, "step": 174870 }, { "epoch": 67.91, "learning_rate": 1.0944724919093853e-05, "loss": 0.0693, "step": 174880 }, { "epoch": 67.92, "learning_rate": 1.09442071197411e-05, "loss": 0.0325, "step": 174890 }, { "epoch": 67.92, "learning_rate": 1.094368932038835e-05, "loss": 0.0149, "step": 174900 }, { "epoch": 67.93, "learning_rate": 1.09431715210356e-05, "loss": 0.0519, "step": 174910 }, { "epoch": 67.93, "learning_rate": 1.094265372168285e-05, "loss": 0.0177, "step": 174920 }, { "epoch": 67.93, "learning_rate": 1.0942135922330098e-05, "loss": 0.1525, "step": 174930 }, { "epoch": 67.94, "learning_rate": 1.0941618122977347e-05, "loss": 0.0111, "step": 174940 }, { "epoch": 67.94, "learning_rate": 1.0941100323624597e-05, "loss": 0.0507, "step": 174950 }, { "epoch": 67.95, "learning_rate": 1.0940582524271846e-05, "loss": 0.0704, "step": 174960 }, { "epoch": 67.95, "learning_rate": 1.0940064724919094e-05, "loss": 0.0076, "step": 174970 }, { "epoch": 67.95, "learning_rate": 1.0939546925566344e-05, "loss": 0.043, "step": 174980 }, { "epoch": 67.96, "learning_rate": 1.0939029126213594e-05, "loss": 0.0614, "step": 174990 }, { "epoch": 67.96, "learning_rate": 1.0938511326860843e-05, "loss": 0.0258, "step": 175000 }, { "epoch": 67.97, "learning_rate": 1.0937993527508091e-05, "loss": 0.0131, "step": 175010 }, { "epoch": 67.97, "learning_rate": 1.093747572815534e-05, "loss": 0.1103, "step": 175020 }, { "epoch": 67.97, "learning_rate": 1.093695792880259e-05, "loss": 0.0285, "step": 175030 }, { "epoch": 67.98, "learning_rate": 1.0936440129449838e-05, "loss": 0.1335, "step": 175040 }, { "epoch": 67.98, "learning_rate": 1.0935922330097088e-05, "loss": 0.217, "step": 175050 }, { "epoch": 67.98, "learning_rate": 1.0935404530744337e-05, "loss": 0.0311, "step": 175060 }, { "epoch": 67.99, "learning_rate": 1.0934886731391587e-05, "loss": 0.0021, "step": 175070 }, { "epoch": 67.99, "learning_rate": 1.0934368932038837e-05, "loss": 0.0594, "step": 175080 }, { "epoch": 68.0, "learning_rate": 1.0933851132686085e-05, "loss": 0.1347, "step": 175090 }, { "epoch": 68.0, "learning_rate": 1.0933333333333334e-05, "loss": 0.094, "step": 175100 }, { "epoch": 68.0, "eval_accuracy": 0.949656121045392, "eval_loss": 0.3507804572582245, "eval_runtime": 8.2022, "eval_samples_per_second": 443.174, "eval_steps_per_second": 55.473, "step": 175100 }, { "epoch": 68.0, "learning_rate": 1.0932815533980584e-05, "loss": 0.1268, "step": 175110 }, { "epoch": 68.01, "learning_rate": 1.0932297734627832e-05, "loss": 0.023, "step": 175120 }, { "epoch": 68.01, "learning_rate": 1.0931779935275081e-05, "loss": 0.0272, "step": 175130 }, { "epoch": 68.02, "learning_rate": 1.0931262135922331e-05, "loss": 0.0036, "step": 175140 }, { "epoch": 68.02, "learning_rate": 1.093074433656958e-05, "loss": 0.0312, "step": 175150 }, { "epoch": 68.02, "learning_rate": 1.093022653721683e-05, "loss": 0.0873, "step": 175160 }, { "epoch": 68.03, "learning_rate": 1.0929708737864078e-05, "loss": 0.0882, "step": 175170 }, { "epoch": 68.03, "learning_rate": 1.0929190938511328e-05, "loss": 0.0077, "step": 175180 }, { "epoch": 68.03, "learning_rate": 1.0928673139158576e-05, "loss": 0.2152, "step": 175190 }, { "epoch": 68.04, "learning_rate": 1.0928155339805825e-05, "loss": 0.0742, "step": 175200 }, { "epoch": 68.04, "learning_rate": 1.0927637540453075e-05, "loss": 0.0224, "step": 175210 }, { "epoch": 68.05, "learning_rate": 1.0927119741100325e-05, "loss": 0.0432, "step": 175220 }, { "epoch": 68.05, "learning_rate": 1.0926601941747574e-05, "loss": 0.1055, "step": 175230 }, { "epoch": 68.05, "learning_rate": 1.0926084142394824e-05, "loss": 0.2127, "step": 175240 }, { "epoch": 68.06, "learning_rate": 1.0925566343042072e-05, "loss": 0.0594, "step": 175250 }, { "epoch": 68.06, "learning_rate": 1.0925048543689321e-05, "loss": 0.0412, "step": 175260 }, { "epoch": 68.07, "learning_rate": 1.092453074433657e-05, "loss": 0.1809, "step": 175270 }, { "epoch": 68.07, "learning_rate": 1.0924012944983819e-05, "loss": 0.0103, "step": 175280 }, { "epoch": 68.07, "learning_rate": 1.0923495145631069e-05, "loss": 0.0009, "step": 175290 }, { "epoch": 68.08, "learning_rate": 1.0922977346278318e-05, "loss": 0.0182, "step": 175300 }, { "epoch": 68.08, "learning_rate": 1.0922459546925568e-05, "loss": 0.0814, "step": 175310 }, { "epoch": 68.09, "learning_rate": 1.0921941747572817e-05, "loss": 0.0982, "step": 175320 }, { "epoch": 68.09, "learning_rate": 1.0921423948220065e-05, "loss": 0.1108, "step": 175330 }, { "epoch": 68.09, "learning_rate": 1.0920906148867313e-05, "loss": 0.0066, "step": 175340 }, { "epoch": 68.1, "learning_rate": 1.0920388349514563e-05, "loss": 0.1458, "step": 175350 }, { "epoch": 68.1, "learning_rate": 1.0919870550161812e-05, "loss": 0.0225, "step": 175360 }, { "epoch": 68.1, "learning_rate": 1.0919352750809062e-05, "loss": 0.1302, "step": 175370 }, { "epoch": 68.11, "learning_rate": 1.0918834951456312e-05, "loss": 0.0604, "step": 175380 }, { "epoch": 68.11, "learning_rate": 1.0918317152103561e-05, "loss": 0.0152, "step": 175390 }, { "epoch": 68.12, "learning_rate": 1.0917799352750811e-05, "loss": 0.0382, "step": 175400 }, { "epoch": 68.12, "learning_rate": 1.0917281553398059e-05, "loss": 0.0351, "step": 175410 }, { "epoch": 68.12, "learning_rate": 1.0916763754045307e-05, "loss": 0.0026, "step": 175420 }, { "epoch": 68.13, "learning_rate": 1.0916245954692556e-05, "loss": 0.0184, "step": 175430 }, { "epoch": 68.13, "learning_rate": 1.0915728155339806e-05, "loss": 0.0446, "step": 175440 }, { "epoch": 68.14, "learning_rate": 1.0915210355987056e-05, "loss": 0.0398, "step": 175450 }, { "epoch": 68.14, "learning_rate": 1.0914692556634305e-05, "loss": 0.1502, "step": 175460 }, { "epoch": 68.14, "learning_rate": 1.0914174757281555e-05, "loss": 0.0603, "step": 175470 }, { "epoch": 68.15, "learning_rate": 1.0913656957928804e-05, "loss": 0.0744, "step": 175480 }, { "epoch": 68.15, "learning_rate": 1.0913139158576054e-05, "loss": 0.0476, "step": 175490 }, { "epoch": 68.16, "learning_rate": 1.09126213592233e-05, "loss": 0.1489, "step": 175500 }, { "epoch": 68.16, "learning_rate": 1.091210355987055e-05, "loss": 0.0398, "step": 175510 }, { "epoch": 68.16, "learning_rate": 1.09115857605178e-05, "loss": 0.0674, "step": 175520 }, { "epoch": 68.17, "learning_rate": 1.091106796116505e-05, "loss": 0.0382, "step": 175530 }, { "epoch": 68.17, "learning_rate": 1.0910550161812299e-05, "loss": 0.0106, "step": 175540 }, { "epoch": 68.17, "learning_rate": 1.0910032362459548e-05, "loss": 0.1437, "step": 175550 }, { "epoch": 68.18, "learning_rate": 1.0909514563106798e-05, "loss": 0.0534, "step": 175560 }, { "epoch": 68.18, "learning_rate": 1.0908996763754048e-05, "loss": 0.1641, "step": 175570 }, { "epoch": 68.19, "learning_rate": 1.0908478964401294e-05, "loss": 0.0682, "step": 175580 }, { "epoch": 68.19, "learning_rate": 1.0907961165048544e-05, "loss": 0.0419, "step": 175590 }, { "epoch": 68.19, "learning_rate": 1.0907443365695793e-05, "loss": 0.0015, "step": 175600 }, { "epoch": 68.2, "learning_rate": 1.0906925566343043e-05, "loss": 0.0253, "step": 175610 }, { "epoch": 68.2, "learning_rate": 1.0906407766990292e-05, "loss": 0.0112, "step": 175620 }, { "epoch": 68.21, "learning_rate": 1.0905889967637542e-05, "loss": 0.0184, "step": 175630 }, { "epoch": 68.21, "learning_rate": 1.0905372168284792e-05, "loss": 0.1615, "step": 175640 }, { "epoch": 68.21, "learning_rate": 1.0904854368932041e-05, "loss": 0.0835, "step": 175650 }, { "epoch": 68.22, "learning_rate": 1.0904336569579287e-05, "loss": 0.0785, "step": 175660 }, { "epoch": 68.22, "learning_rate": 1.0903818770226537e-05, "loss": 0.1597, "step": 175670 }, { "epoch": 68.23, "learning_rate": 1.0903300970873787e-05, "loss": 0.0104, "step": 175680 }, { "epoch": 68.23, "learning_rate": 1.0902783171521036e-05, "loss": 0.0179, "step": 175690 }, { "epoch": 68.23, "learning_rate": 1.0902265372168286e-05, "loss": 0.0391, "step": 175700 }, { "epoch": 68.24, "learning_rate": 1.0901747572815536e-05, "loss": 0.1177, "step": 175710 }, { "epoch": 68.24, "learning_rate": 1.0901229773462785e-05, "loss": 0.0068, "step": 175720 }, { "epoch": 68.24, "learning_rate": 1.0900711974110035e-05, "loss": 0.0249, "step": 175730 }, { "epoch": 68.25, "learning_rate": 1.0900194174757281e-05, "loss": 0.0003, "step": 175740 }, { "epoch": 68.25, "learning_rate": 1.089967637540453e-05, "loss": 0.0835, "step": 175750 }, { "epoch": 68.26, "learning_rate": 1.089915857605178e-05, "loss": 0.2499, "step": 175760 }, { "epoch": 68.26, "learning_rate": 1.089864077669903e-05, "loss": 0.0268, "step": 175770 }, { "epoch": 68.26, "learning_rate": 1.089812297734628e-05, "loss": 0.04, "step": 175780 }, { "epoch": 68.27, "learning_rate": 1.0897605177993529e-05, "loss": 0.0974, "step": 175790 }, { "epoch": 68.27, "learning_rate": 1.0897087378640779e-05, "loss": 0.0668, "step": 175800 }, { "epoch": 68.28, "learning_rate": 1.0896569579288028e-05, "loss": 0.014, "step": 175810 }, { "epoch": 68.28, "learning_rate": 1.0896051779935275e-05, "loss": 0.1303, "step": 175820 }, { "epoch": 68.28, "learning_rate": 1.0895533980582524e-05, "loss": 0.0618, "step": 175830 }, { "epoch": 68.29, "learning_rate": 1.0895016181229774e-05, "loss": 0.1218, "step": 175840 }, { "epoch": 68.29, "learning_rate": 1.0894498381877023e-05, "loss": 0.0725, "step": 175850 }, { "epoch": 68.3, "learning_rate": 1.0893980582524273e-05, "loss": 0.0596, "step": 175860 }, { "epoch": 68.3, "learning_rate": 1.0893462783171523e-05, "loss": 0.0105, "step": 175870 }, { "epoch": 68.3, "learning_rate": 1.0892944983818772e-05, "loss": 0.1382, "step": 175880 }, { "epoch": 68.31, "learning_rate": 1.0892427184466022e-05, "loss": 0.0886, "step": 175890 }, { "epoch": 68.31, "learning_rate": 1.0891909385113268e-05, "loss": 0.0085, "step": 175900 }, { "epoch": 68.31, "learning_rate": 1.0891391585760518e-05, "loss": 0.1796, "step": 175910 }, { "epoch": 68.32, "learning_rate": 1.0890873786407767e-05, "loss": 0.0245, "step": 175920 }, { "epoch": 68.32, "learning_rate": 1.0890355987055017e-05, "loss": 0.0148, "step": 175930 }, { "epoch": 68.33, "learning_rate": 1.0889838187702267e-05, "loss": 0.1071, "step": 175940 }, { "epoch": 68.33, "learning_rate": 1.0889320388349516e-05, "loss": 0.0517, "step": 175950 }, { "epoch": 68.33, "learning_rate": 1.0888802588996766e-05, "loss": 0.0274, "step": 175960 }, { "epoch": 68.34, "learning_rate": 1.0888284789644015e-05, "loss": 0.0223, "step": 175970 }, { "epoch": 68.34, "learning_rate": 1.0887766990291262e-05, "loss": 0.0003, "step": 175980 }, { "epoch": 68.35, "learning_rate": 1.0887249190938511e-05, "loss": 0.0831, "step": 175990 }, { "epoch": 68.35, "learning_rate": 1.0886731391585761e-05, "loss": 0.0306, "step": 176000 }, { "epoch": 68.35, "learning_rate": 1.088621359223301e-05, "loss": 0.1181, "step": 176010 }, { "epoch": 68.36, "learning_rate": 1.088569579288026e-05, "loss": 0.1923, "step": 176020 }, { "epoch": 68.36, "learning_rate": 1.088517799352751e-05, "loss": 0.0445, "step": 176030 }, { "epoch": 68.37, "learning_rate": 1.088466019417476e-05, "loss": 0.0224, "step": 176040 }, { "epoch": 68.37, "learning_rate": 1.0884142394822009e-05, "loss": 0.2267, "step": 176050 }, { "epoch": 68.37, "learning_rate": 1.0883624595469257e-05, "loss": 0.12, "step": 176060 }, { "epoch": 68.38, "learning_rate": 1.0883106796116505e-05, "loss": 0.0119, "step": 176070 }, { "epoch": 68.38, "learning_rate": 1.0882588996763754e-05, "loss": 0.1754, "step": 176080 }, { "epoch": 68.38, "learning_rate": 1.0882071197411004e-05, "loss": 0.0499, "step": 176090 }, { "epoch": 68.39, "learning_rate": 1.0881553398058254e-05, "loss": 0.0342, "step": 176100 }, { "epoch": 68.39, "learning_rate": 1.0881035598705503e-05, "loss": 0.16, "step": 176110 }, { "epoch": 68.4, "learning_rate": 1.0880517799352753e-05, "loss": 0.1582, "step": 176120 }, { "epoch": 68.4, "learning_rate": 1.0880000000000001e-05, "loss": 0.1044, "step": 176130 }, { "epoch": 68.4, "learning_rate": 1.087948220064725e-05, "loss": 0.1002, "step": 176140 }, { "epoch": 68.41, "learning_rate": 1.0878964401294498e-05, "loss": 0.113, "step": 176150 }, { "epoch": 68.41, "learning_rate": 1.0878446601941748e-05, "loss": 0.1205, "step": 176160 }, { "epoch": 68.42, "learning_rate": 1.0877928802588998e-05, "loss": 0.0524, "step": 176170 }, { "epoch": 68.42, "learning_rate": 1.0877411003236247e-05, "loss": 0.0024, "step": 176180 }, { "epoch": 68.42, "learning_rate": 1.0876893203883497e-05, "loss": 0.0171, "step": 176190 }, { "epoch": 68.43, "learning_rate": 1.0876375404530746e-05, "loss": 0.1116, "step": 176200 }, { "epoch": 68.43, "learning_rate": 1.0875857605177994e-05, "loss": 0.1023, "step": 176210 }, { "epoch": 68.43, "learning_rate": 1.0875339805825244e-05, "loss": 0.006, "step": 176220 }, { "epoch": 68.44, "learning_rate": 1.0874822006472492e-05, "loss": 0.0794, "step": 176230 }, { "epoch": 68.44, "learning_rate": 1.0874304207119742e-05, "loss": 0.0415, "step": 176240 }, { "epoch": 68.45, "learning_rate": 1.0873786407766991e-05, "loss": 0.0509, "step": 176250 }, { "epoch": 68.45, "learning_rate": 1.087326860841424e-05, "loss": 0.0555, "step": 176260 }, { "epoch": 68.45, "learning_rate": 1.087275080906149e-05, "loss": 0.0165, "step": 176270 }, { "epoch": 68.46, "learning_rate": 1.0872233009708738e-05, "loss": 0.1446, "step": 176280 }, { "epoch": 68.46, "learning_rate": 1.0871715210355988e-05, "loss": 0.0943, "step": 176290 }, { "epoch": 68.47, "learning_rate": 1.0871197411003238e-05, "loss": 0.1089, "step": 176300 }, { "epoch": 68.47, "learning_rate": 1.0870679611650486e-05, "loss": 0.0963, "step": 176310 }, { "epoch": 68.47, "learning_rate": 1.0870161812297735e-05, "loss": 0.1373, "step": 176320 }, { "epoch": 68.48, "learning_rate": 1.0869644012944985e-05, "loss": 0.0358, "step": 176330 }, { "epoch": 68.48, "learning_rate": 1.0869126213592234e-05, "loss": 0.0195, "step": 176340 }, { "epoch": 68.49, "learning_rate": 1.0868608414239484e-05, "loss": 0.0173, "step": 176350 }, { "epoch": 68.49, "learning_rate": 1.0868090614886732e-05, "loss": 0.097, "step": 176360 }, { "epoch": 68.49, "learning_rate": 1.0867572815533982e-05, "loss": 0.037, "step": 176370 }, { "epoch": 68.5, "learning_rate": 1.0867055016181231e-05, "loss": 0.1286, "step": 176380 }, { "epoch": 68.5, "learning_rate": 1.0866537216828479e-05, "loss": 0.0545, "step": 176390 }, { "epoch": 68.5, "learning_rate": 1.0866019417475729e-05, "loss": 0.0145, "step": 176400 }, { "epoch": 68.51, "learning_rate": 1.0865501618122978e-05, "loss": 0.0301, "step": 176410 }, { "epoch": 68.51, "learning_rate": 1.0864983818770228e-05, "loss": 0.0827, "step": 176420 }, { "epoch": 68.52, "learning_rate": 1.0864466019417476e-05, "loss": 0.0323, "step": 176430 }, { "epoch": 68.52, "learning_rate": 1.0863948220064725e-05, "loss": 0.0335, "step": 176440 }, { "epoch": 68.52, "learning_rate": 1.0863430420711975e-05, "loss": 0.0515, "step": 176450 }, { "epoch": 68.53, "learning_rate": 1.0862912621359225e-05, "loss": 0.0087, "step": 176460 }, { "epoch": 68.53, "learning_rate": 1.0862394822006473e-05, "loss": 0.0513, "step": 176470 }, { "epoch": 68.54, "learning_rate": 1.0861877022653722e-05, "loss": 0.0603, "step": 176480 }, { "epoch": 68.54, "learning_rate": 1.0861359223300972e-05, "loss": 0.0283, "step": 176490 }, { "epoch": 68.54, "learning_rate": 1.0860841423948221e-05, "loss": 0.0754, "step": 176500 }, { "epoch": 68.55, "learning_rate": 1.086032362459547e-05, "loss": 0.1206, "step": 176510 }, { "epoch": 68.55, "learning_rate": 1.0859805825242719e-05, "loss": 0.0577, "step": 176520 }, { "epoch": 68.56, "learning_rate": 1.0859288025889969e-05, "loss": 0.1396, "step": 176530 }, { "epoch": 68.56, "learning_rate": 1.0858770226537218e-05, "loss": 0.0952, "step": 176540 }, { "epoch": 68.56, "learning_rate": 1.0858252427184466e-05, "loss": 0.0532, "step": 176550 }, { "epoch": 68.57, "learning_rate": 1.0857734627831716e-05, "loss": 0.2163, "step": 176560 }, { "epoch": 68.57, "learning_rate": 1.0857216828478965e-05, "loss": 0.0643, "step": 176570 }, { "epoch": 68.57, "learning_rate": 1.0856699029126215e-05, "loss": 0.127, "step": 176580 }, { "epoch": 68.58, "learning_rate": 1.0856181229773463e-05, "loss": 0.0464, "step": 176590 }, { "epoch": 68.58, "learning_rate": 1.0855663430420713e-05, "loss": 0.0584, "step": 176600 }, { "epoch": 68.59, "learning_rate": 1.0855145631067962e-05, "loss": 0.0207, "step": 176610 }, { "epoch": 68.59, "learning_rate": 1.0854627831715212e-05, "loss": 0.0003, "step": 176620 }, { "epoch": 68.59, "learning_rate": 1.0854110032362461e-05, "loss": 0.027, "step": 176630 }, { "epoch": 68.6, "learning_rate": 1.085359223300971e-05, "loss": 0.0681, "step": 176640 }, { "epoch": 68.6, "learning_rate": 1.0853074433656959e-05, "loss": 0.1082, "step": 176650 }, { "epoch": 68.61, "learning_rate": 1.0852556634304207e-05, "loss": 0.0022, "step": 176660 }, { "epoch": 68.61, "learning_rate": 1.0852038834951457e-05, "loss": 0.1848, "step": 176670 }, { "epoch": 68.61, "learning_rate": 1.0851521035598706e-05, "loss": 0.1273, "step": 176680 }, { "epoch": 68.62, "learning_rate": 1.0851003236245956e-05, "loss": 0.0643, "step": 176690 }, { "epoch": 68.62, "learning_rate": 1.0850485436893205e-05, "loss": 0.0011, "step": 176700 }, { "epoch": 68.63, "learning_rate": 1.0849967637540455e-05, "loss": 0.0879, "step": 176710 }, { "epoch": 68.63, "learning_rate": 1.0849449838187703e-05, "loss": 0.0869, "step": 176720 }, { "epoch": 68.63, "learning_rate": 1.0848932038834953e-05, "loss": 0.0364, "step": 176730 }, { "epoch": 68.64, "learning_rate": 1.08484142394822e-05, "loss": 0.1076, "step": 176740 }, { "epoch": 68.64, "learning_rate": 1.084789644012945e-05, "loss": 0.0308, "step": 176750 }, { "epoch": 68.64, "learning_rate": 1.08473786407767e-05, "loss": 0.0212, "step": 176760 }, { "epoch": 68.65, "learning_rate": 1.084686084142395e-05, "loss": 0.0497, "step": 176770 }, { "epoch": 68.65, "learning_rate": 1.0846343042071199e-05, "loss": 0.0671, "step": 176780 }, { "epoch": 68.66, "learning_rate": 1.0845825242718449e-05, "loss": 0.0386, "step": 176790 }, { "epoch": 68.66, "learning_rate": 1.0845307443365696e-05, "loss": 0.0953, "step": 176800 }, { "epoch": 68.66, "learning_rate": 1.0844789644012944e-05, "loss": 0.1142, "step": 176810 }, { "epoch": 68.67, "learning_rate": 1.0844271844660194e-05, "loss": 0.0146, "step": 176820 }, { "epoch": 68.67, "learning_rate": 1.0843754045307444e-05, "loss": 0.0471, "step": 176830 }, { "epoch": 68.68, "learning_rate": 1.0843236245954693e-05, "loss": 0.0383, "step": 176840 }, { "epoch": 68.68, "learning_rate": 1.0842718446601943e-05, "loss": 0.2526, "step": 176850 }, { "epoch": 68.68, "learning_rate": 1.0842200647249192e-05, "loss": 0.0973, "step": 176860 }, { "epoch": 68.69, "learning_rate": 1.0841682847896442e-05, "loss": 0.0801, "step": 176870 }, { "epoch": 68.69, "learning_rate": 1.084116504854369e-05, "loss": 0.0289, "step": 176880 }, { "epoch": 68.7, "learning_rate": 1.0840647249190938e-05, "loss": 0.0101, "step": 176890 }, { "epoch": 68.7, "learning_rate": 1.0840129449838188e-05, "loss": 0.029, "step": 176900 }, { "epoch": 68.7, "learning_rate": 1.0839611650485437e-05, "loss": 0.1888, "step": 176910 }, { "epoch": 68.71, "learning_rate": 1.0839093851132687e-05, "loss": 0.1239, "step": 176920 }, { "epoch": 68.71, "learning_rate": 1.0838576051779936e-05, "loss": 0.1638, "step": 176930 }, { "epoch": 68.71, "learning_rate": 1.0838058252427186e-05, "loss": 0.1342, "step": 176940 }, { "epoch": 68.72, "learning_rate": 1.0837540453074436e-05, "loss": 0.0663, "step": 176950 }, { "epoch": 68.72, "learning_rate": 1.0837022653721682e-05, "loss": 0.1194, "step": 176960 }, { "epoch": 68.73, "learning_rate": 1.0836504854368932e-05, "loss": 0.107, "step": 176970 }, { "epoch": 68.73, "learning_rate": 1.0835987055016181e-05, "loss": 0.1353, "step": 176980 }, { "epoch": 68.73, "learning_rate": 1.083546925566343e-05, "loss": 0.0566, "step": 176990 }, { "epoch": 68.74, "learning_rate": 1.083495145631068e-05, "loss": 0.0216, "step": 177000 }, { "epoch": 68.74, "learning_rate": 1.083443365695793e-05, "loss": 0.0767, "step": 177010 }, { "epoch": 68.75, "learning_rate": 1.083391585760518e-05, "loss": 0.0287, "step": 177020 }, { "epoch": 68.75, "learning_rate": 1.083339805825243e-05, "loss": 0.0925, "step": 177030 }, { "epoch": 68.75, "learning_rate": 1.0832880258899675e-05, "loss": 0.0998, "step": 177040 }, { "epoch": 68.76, "learning_rate": 1.0832362459546925e-05, "loss": 0.1122, "step": 177050 }, { "epoch": 68.76, "learning_rate": 1.0831844660194175e-05, "loss": 0.1447, "step": 177060 }, { "epoch": 68.77, "learning_rate": 1.0831326860841424e-05, "loss": 0.1263, "step": 177070 }, { "epoch": 68.77, "learning_rate": 1.0830809061488674e-05, "loss": 0.1273, "step": 177080 }, { "epoch": 68.77, "learning_rate": 1.0830291262135924e-05, "loss": 0.0712, "step": 177090 }, { "epoch": 68.78, "learning_rate": 1.0829773462783173e-05, "loss": 0.1277, "step": 177100 }, { "epoch": 68.78, "learning_rate": 1.0829255663430423e-05, "loss": 0.0065, "step": 177110 }, { "epoch": 68.78, "learning_rate": 1.0828737864077669e-05, "loss": 0.0494, "step": 177120 }, { "epoch": 68.79, "learning_rate": 1.0828220064724919e-05, "loss": 0.0305, "step": 177130 }, { "epoch": 68.79, "learning_rate": 1.0827702265372168e-05, "loss": 0.0432, "step": 177140 }, { "epoch": 68.8, "learning_rate": 1.0827184466019418e-05, "loss": 0.0539, "step": 177150 }, { "epoch": 68.8, "learning_rate": 1.0826666666666667e-05, "loss": 0.0641, "step": 177160 }, { "epoch": 68.8, "learning_rate": 1.0826148867313917e-05, "loss": 0.0672, "step": 177170 }, { "epoch": 68.81, "learning_rate": 1.0825631067961167e-05, "loss": 0.0319, "step": 177180 }, { "epoch": 68.81, "learning_rate": 1.0825113268608416e-05, "loss": 0.002, "step": 177190 }, { "epoch": 68.82, "learning_rate": 1.0824595469255666e-05, "loss": 0.1161, "step": 177200 }, { "epoch": 68.82, "learning_rate": 1.0824077669902912e-05, "loss": 0.0952, "step": 177210 }, { "epoch": 68.82, "learning_rate": 1.0823559870550162e-05, "loss": 0.1003, "step": 177220 }, { "epoch": 68.83, "learning_rate": 1.0823042071197411e-05, "loss": 0.0415, "step": 177230 }, { "epoch": 68.83, "learning_rate": 1.0822524271844661e-05, "loss": 0.025, "step": 177240 }, { "epoch": 68.83, "learning_rate": 1.082200647249191e-05, "loss": 0.0895, "step": 177250 }, { "epoch": 68.84, "learning_rate": 1.082148867313916e-05, "loss": 0.091, "step": 177260 }, { "epoch": 68.84, "learning_rate": 1.082097087378641e-05, "loss": 0.0838, "step": 177270 }, { "epoch": 68.85, "learning_rate": 1.082045307443366e-05, "loss": 0.019, "step": 177280 }, { "epoch": 68.85, "learning_rate": 1.0819935275080906e-05, "loss": 0.0676, "step": 177290 }, { "epoch": 68.85, "learning_rate": 1.0819417475728155e-05, "loss": 0.07, "step": 177300 }, { "epoch": 68.86, "learning_rate": 1.0818899676375405e-05, "loss": 0.117, "step": 177310 }, { "epoch": 68.86, "learning_rate": 1.0818381877022655e-05, "loss": 0.1152, "step": 177320 }, { "epoch": 68.87, "learning_rate": 1.0817864077669904e-05, "loss": 0.1645, "step": 177330 }, { "epoch": 68.87, "learning_rate": 1.0817346278317154e-05, "loss": 0.0091, "step": 177340 }, { "epoch": 68.87, "learning_rate": 1.0816828478964403e-05, "loss": 0.0685, "step": 177350 }, { "epoch": 68.88, "learning_rate": 1.0816310679611653e-05, "loss": 0.0752, "step": 177360 }, { "epoch": 68.88, "learning_rate": 1.08157928802589e-05, "loss": 0.245, "step": 177370 }, { "epoch": 68.89, "learning_rate": 1.0815275080906149e-05, "loss": 0.0709, "step": 177380 }, { "epoch": 68.89, "learning_rate": 1.0814757281553399e-05, "loss": 0.0201, "step": 177390 }, { "epoch": 68.89, "learning_rate": 1.0814239482200648e-05, "loss": 0.0722, "step": 177400 }, { "epoch": 68.9, "learning_rate": 1.0813721682847898e-05, "loss": 0.0664, "step": 177410 }, { "epoch": 68.9, "learning_rate": 1.0813203883495147e-05, "loss": 0.1465, "step": 177420 }, { "epoch": 68.9, "learning_rate": 1.0812686084142397e-05, "loss": 0.0293, "step": 177430 }, { "epoch": 68.91, "learning_rate": 1.0812168284789647e-05, "loss": 0.001, "step": 177440 }, { "epoch": 68.91, "learning_rate": 1.0811650485436893e-05, "loss": 0.1311, "step": 177450 }, { "epoch": 68.92, "learning_rate": 1.0811132686084142e-05, "loss": 0.0317, "step": 177460 }, { "epoch": 68.92, "learning_rate": 1.0810614886731392e-05, "loss": 0.0285, "step": 177470 }, { "epoch": 68.92, "learning_rate": 1.0810097087378642e-05, "loss": 0.0709, "step": 177480 }, { "epoch": 68.93, "learning_rate": 1.0809579288025891e-05, "loss": 0.0025, "step": 177490 }, { "epoch": 68.93, "learning_rate": 1.0809061488673141e-05, "loss": 0.0072, "step": 177500 }, { "epoch": 68.94, "learning_rate": 1.080854368932039e-05, "loss": 0.0954, "step": 177510 }, { "epoch": 68.94, "learning_rate": 1.080802588996764e-05, "loss": 0.1739, "step": 177520 }, { "epoch": 68.94, "learning_rate": 1.0807508090614886e-05, "loss": 0.028, "step": 177530 }, { "epoch": 68.95, "learning_rate": 1.0806990291262136e-05, "loss": 0.0358, "step": 177540 }, { "epoch": 68.95, "learning_rate": 1.0806472491909386e-05, "loss": 0.0596, "step": 177550 }, { "epoch": 68.96, "learning_rate": 1.0805954692556635e-05, "loss": 0.0302, "step": 177560 }, { "epoch": 68.96, "learning_rate": 1.0805436893203885e-05, "loss": 0.049, "step": 177570 }, { "epoch": 68.96, "learning_rate": 1.0804919093851134e-05, "loss": 0.0378, "step": 177580 }, { "epoch": 68.97, "learning_rate": 1.0804401294498384e-05, "loss": 0.1051, "step": 177590 }, { "epoch": 68.97, "learning_rate": 1.0803883495145632e-05, "loss": 0.0987, "step": 177600 }, { "epoch": 68.97, "learning_rate": 1.080336569579288e-05, "loss": 0.1052, "step": 177610 }, { "epoch": 68.98, "learning_rate": 1.080284789644013e-05, "loss": 0.0704, "step": 177620 }, { "epoch": 68.98, "learning_rate": 1.080233009708738e-05, "loss": 0.0278, "step": 177630 }, { "epoch": 68.99, "learning_rate": 1.0801812297734629e-05, "loss": 0.0106, "step": 177640 }, { "epoch": 68.99, "learning_rate": 1.0801294498381878e-05, "loss": 0.1588, "step": 177650 }, { "epoch": 68.99, "learning_rate": 1.0800776699029128e-05, "loss": 0.0632, "step": 177660 }, { "epoch": 69.0, "learning_rate": 1.0800258899676378e-05, "loss": 0.0498, "step": 177670 }, { "epoch": 69.0, "eval_accuracy": 0.9507565337001376, "eval_loss": 0.33282846212387085, "eval_runtime": 8.256, "eval_samples_per_second": 440.285, "eval_steps_per_second": 55.111, "step": 177675 }, { "epoch": 69.0, "learning_rate": 1.0799741100323626e-05, "loss": 0.0004, "step": 177680 }, { "epoch": 69.01, "learning_rate": 1.0799223300970874e-05, "loss": 0.1271, "step": 177690 }, { "epoch": 69.01, "learning_rate": 1.0798705501618123e-05, "loss": 0.0668, "step": 177700 }, { "epoch": 69.01, "learning_rate": 1.0798187702265373e-05, "loss": 0.0604, "step": 177710 }, { "epoch": 69.02, "learning_rate": 1.0797669902912622e-05, "loss": 0.1426, "step": 177720 }, { "epoch": 69.02, "learning_rate": 1.0797152103559872e-05, "loss": 0.1536, "step": 177730 }, { "epoch": 69.03, "learning_rate": 1.0796634304207122e-05, "loss": 0.1229, "step": 177740 }, { "epoch": 69.03, "learning_rate": 1.079611650485437e-05, "loss": 0.0915, "step": 177750 }, { "epoch": 69.03, "learning_rate": 1.0795598705501619e-05, "loss": 0.045, "step": 177760 }, { "epoch": 69.04, "learning_rate": 1.0795080906148869e-05, "loss": 0.1046, "step": 177770 }, { "epoch": 69.04, "learning_rate": 1.0794563106796117e-05, "loss": 0.1974, "step": 177780 }, { "epoch": 69.04, "learning_rate": 1.0794045307443366e-05, "loss": 0.0502, "step": 177790 }, { "epoch": 69.05, "learning_rate": 1.0793527508090616e-05, "loss": 0.0079, "step": 177800 }, { "epoch": 69.05, "learning_rate": 1.0793009708737866e-05, "loss": 0.1, "step": 177810 }, { "epoch": 69.06, "learning_rate": 1.0792491909385115e-05, "loss": 0.1038, "step": 177820 }, { "epoch": 69.06, "learning_rate": 1.0791974110032363e-05, "loss": 0.1187, "step": 177830 }, { "epoch": 69.06, "learning_rate": 1.0791456310679613e-05, "loss": 0.0792, "step": 177840 }, { "epoch": 69.07, "learning_rate": 1.0790938511326862e-05, "loss": 0.0016, "step": 177850 }, { "epoch": 69.07, "learning_rate": 1.079042071197411e-05, "loss": 0.0075, "step": 177860 }, { "epoch": 69.08, "learning_rate": 1.078990291262136e-05, "loss": 0.0954, "step": 177870 }, { "epoch": 69.08, "learning_rate": 1.078938511326861e-05, "loss": 0.0456, "step": 177880 }, { "epoch": 69.08, "learning_rate": 1.0788867313915859e-05, "loss": 0.153, "step": 177890 }, { "epoch": 69.09, "learning_rate": 1.0788349514563107e-05, "loss": 0.0574, "step": 177900 }, { "epoch": 69.09, "learning_rate": 1.0787831715210357e-05, "loss": 0.0272, "step": 177910 }, { "epoch": 69.1, "learning_rate": 1.0787313915857606e-05, "loss": 0.1048, "step": 177920 }, { "epoch": 69.1, "learning_rate": 1.0786796116504856e-05, "loss": 0.1543, "step": 177930 }, { "epoch": 69.1, "learning_rate": 1.0786278317152104e-05, "loss": 0.2584, "step": 177940 }, { "epoch": 69.11, "learning_rate": 1.0785760517799353e-05, "loss": 0.103, "step": 177950 }, { "epoch": 69.11, "learning_rate": 1.0785242718446603e-05, "loss": 0.0818, "step": 177960 }, { "epoch": 69.11, "learning_rate": 1.0784724919093853e-05, "loss": 0.1342, "step": 177970 }, { "epoch": 69.12, "learning_rate": 1.07842071197411e-05, "loss": 0.063, "step": 177980 }, { "epoch": 69.12, "learning_rate": 1.078368932038835e-05, "loss": 0.0202, "step": 177990 }, { "epoch": 69.13, "learning_rate": 1.07831715210356e-05, "loss": 0.0502, "step": 178000 }, { "epoch": 69.13, "learning_rate": 1.078265372168285e-05, "loss": 0.041, "step": 178010 }, { "epoch": 69.13, "learning_rate": 1.0782135922330097e-05, "loss": 0.0574, "step": 178020 }, { "epoch": 69.14, "learning_rate": 1.0781618122977347e-05, "loss": 0.1061, "step": 178030 }, { "epoch": 69.14, "learning_rate": 1.0781100323624597e-05, "loss": 0.0853, "step": 178040 }, { "epoch": 69.15, "learning_rate": 1.0780582524271846e-05, "loss": 0.0372, "step": 178050 }, { "epoch": 69.15, "learning_rate": 1.0780064724919094e-05, "loss": 0.0567, "step": 178060 }, { "epoch": 69.15, "learning_rate": 1.0779546925566344e-05, "loss": 0.032, "step": 178070 }, { "epoch": 69.16, "learning_rate": 1.0779029126213593e-05, "loss": 0.0958, "step": 178080 }, { "epoch": 69.16, "learning_rate": 1.0778511326860843e-05, "loss": 0.0527, "step": 178090 }, { "epoch": 69.17, "learning_rate": 1.0777993527508091e-05, "loss": 0.0767, "step": 178100 }, { "epoch": 69.17, "learning_rate": 1.077747572815534e-05, "loss": 0.0693, "step": 178110 }, { "epoch": 69.17, "learning_rate": 1.077695792880259e-05, "loss": 0.0463, "step": 178120 }, { "epoch": 69.18, "learning_rate": 1.0776440129449838e-05, "loss": 0.0519, "step": 178130 }, { "epoch": 69.18, "learning_rate": 1.0775922330097088e-05, "loss": 0.0942, "step": 178140 }, { "epoch": 69.18, "learning_rate": 1.0775404530744337e-05, "loss": 0.0674, "step": 178150 }, { "epoch": 69.19, "learning_rate": 1.0774886731391587e-05, "loss": 0.0598, "step": 178160 }, { "epoch": 69.19, "learning_rate": 1.0774368932038837e-05, "loss": 0.028, "step": 178170 }, { "epoch": 69.2, "learning_rate": 1.0773851132686084e-05, "loss": 0.0215, "step": 178180 }, { "epoch": 69.2, "learning_rate": 1.0773333333333334e-05, "loss": 0.0839, "step": 178190 }, { "epoch": 69.2, "learning_rate": 1.0772815533980584e-05, "loss": 0.3014, "step": 178200 }, { "epoch": 69.21, "learning_rate": 1.0772297734627832e-05, "loss": 0.236, "step": 178210 }, { "epoch": 69.21, "learning_rate": 1.0771779935275081e-05, "loss": 0.0307, "step": 178220 }, { "epoch": 69.22, "learning_rate": 1.0771262135922331e-05, "loss": 0.0519, "step": 178230 }, { "epoch": 69.22, "learning_rate": 1.077074433656958e-05, "loss": 0.0687, "step": 178240 }, { "epoch": 69.22, "learning_rate": 1.077022653721683e-05, "loss": 0.1202, "step": 178250 }, { "epoch": 69.23, "learning_rate": 1.0769708737864078e-05, "loss": 0.0461, "step": 178260 }, { "epoch": 69.23, "learning_rate": 1.0769190938511328e-05, "loss": 0.1181, "step": 178270 }, { "epoch": 69.23, "learning_rate": 1.0768673139158576e-05, "loss": 0.0549, "step": 178280 }, { "epoch": 69.24, "learning_rate": 1.0768155339805825e-05, "loss": 0.0559, "step": 178290 }, { "epoch": 69.24, "learning_rate": 1.0767637540453075e-05, "loss": 0.0522, "step": 178300 }, { "epoch": 69.25, "learning_rate": 1.0767119741100324e-05, "loss": 0.0243, "step": 178310 }, { "epoch": 69.25, "learning_rate": 1.0766601941747574e-05, "loss": 0.0987, "step": 178320 }, { "epoch": 69.25, "learning_rate": 1.0766084142394824e-05, "loss": 0.0872, "step": 178330 }, { "epoch": 69.26, "learning_rate": 1.0765566343042073e-05, "loss": 0.0602, "step": 178340 }, { "epoch": 69.26, "learning_rate": 1.0765048543689321e-05, "loss": 0.2384, "step": 178350 }, { "epoch": 69.27, "learning_rate": 1.0764530744336569e-05, "loss": 0.0138, "step": 178360 }, { "epoch": 69.27, "learning_rate": 1.0764012944983819e-05, "loss": 0.0438, "step": 178370 }, { "epoch": 69.27, "learning_rate": 1.0763495145631068e-05, "loss": 0.0398, "step": 178380 }, { "epoch": 69.28, "learning_rate": 1.0762977346278318e-05, "loss": 0.0626, "step": 178390 }, { "epoch": 69.28, "learning_rate": 1.0762459546925568e-05, "loss": 0.0913, "step": 178400 }, { "epoch": 69.29, "learning_rate": 1.0761941747572817e-05, "loss": 0.1047, "step": 178410 }, { "epoch": 69.29, "learning_rate": 1.0761423948220067e-05, "loss": 0.1467, "step": 178420 }, { "epoch": 69.29, "learning_rate": 1.0760906148867313e-05, "loss": 0.0673, "step": 178430 }, { "epoch": 69.3, "learning_rate": 1.0760388349514563e-05, "loss": 0.0432, "step": 178440 }, { "epoch": 69.3, "learning_rate": 1.0759870550161812e-05, "loss": 0.0514, "step": 178450 }, { "epoch": 69.3, "learning_rate": 1.0759352750809062e-05, "loss": 0.0801, "step": 178460 }, { "epoch": 69.31, "learning_rate": 1.0758834951456312e-05, "loss": 0.0207, "step": 178470 }, { "epoch": 69.31, "learning_rate": 1.0758317152103561e-05, "loss": 0.0197, "step": 178480 }, { "epoch": 69.32, "learning_rate": 1.075779935275081e-05, "loss": 0.0015, "step": 178490 }, { "epoch": 69.32, "learning_rate": 1.075728155339806e-05, "loss": 0.0506, "step": 178500 }, { "epoch": 69.32, "learning_rate": 1.0756763754045307e-05, "loss": 0.03, "step": 178510 }, { "epoch": 69.33, "learning_rate": 1.0756245954692556e-05, "loss": 0.0066, "step": 178520 }, { "epoch": 69.33, "learning_rate": 1.0755728155339806e-05, "loss": 0.1083, "step": 178530 }, { "epoch": 69.34, "learning_rate": 1.0755210355987055e-05, "loss": 0.0321, "step": 178540 }, { "epoch": 69.34, "learning_rate": 1.0754692556634305e-05, "loss": 0.0527, "step": 178550 }, { "epoch": 69.34, "learning_rate": 1.0754174757281555e-05, "loss": 0.1802, "step": 178560 }, { "epoch": 69.35, "learning_rate": 1.0753656957928804e-05, "loss": 0.0904, "step": 178570 }, { "epoch": 69.35, "learning_rate": 1.0753139158576054e-05, "loss": 0.0056, "step": 178580 }, { "epoch": 69.36, "learning_rate": 1.07526213592233e-05, "loss": 0.0635, "step": 178590 }, { "epoch": 69.36, "learning_rate": 1.075210355987055e-05, "loss": 0.0423, "step": 178600 }, { "epoch": 69.36, "learning_rate": 1.07515857605178e-05, "loss": 0.0275, "step": 178610 }, { "epoch": 69.37, "learning_rate": 1.0751067961165049e-05, "loss": 0.0482, "step": 178620 }, { "epoch": 69.37, "learning_rate": 1.0750550161812299e-05, "loss": 0.0796, "step": 178630 }, { "epoch": 69.37, "learning_rate": 1.0750032362459548e-05, "loss": 0.0749, "step": 178640 }, { "epoch": 69.38, "learning_rate": 1.0749514563106798e-05, "loss": 0.0885, "step": 178650 }, { "epoch": 69.38, "learning_rate": 1.0748996763754048e-05, "loss": 0.1116, "step": 178660 }, { "epoch": 69.39, "learning_rate": 1.0748478964401294e-05, "loss": 0.0176, "step": 178670 }, { "epoch": 69.39, "learning_rate": 1.0747961165048543e-05, "loss": 0.0166, "step": 178680 }, { "epoch": 69.39, "learning_rate": 1.0747443365695793e-05, "loss": 0.0036, "step": 178690 }, { "epoch": 69.4, "learning_rate": 1.0746925566343043e-05, "loss": 0.0031, "step": 178700 }, { "epoch": 69.4, "learning_rate": 1.0746407766990292e-05, "loss": 0.0665, "step": 178710 }, { "epoch": 69.41, "learning_rate": 1.0745889967637542e-05, "loss": 0.05, "step": 178720 }, { "epoch": 69.41, "learning_rate": 1.0745372168284791e-05, "loss": 0.0988, "step": 178730 }, { "epoch": 69.41, "learning_rate": 1.0744854368932041e-05, "loss": 0.1728, "step": 178740 }, { "epoch": 69.42, "learning_rate": 1.0744336569579287e-05, "loss": 0.0007, "step": 178750 }, { "epoch": 69.42, "learning_rate": 1.0743818770226537e-05, "loss": 0.0871, "step": 178760 }, { "epoch": 69.43, "learning_rate": 1.0743300970873787e-05, "loss": 0.041, "step": 178770 }, { "epoch": 69.43, "learning_rate": 1.0742783171521036e-05, "loss": 0.1185, "step": 178780 }, { "epoch": 69.43, "learning_rate": 1.0742265372168286e-05, "loss": 0.0338, "step": 178790 }, { "epoch": 69.44, "learning_rate": 1.0741747572815535e-05, "loss": 0.0459, "step": 178800 }, { "epoch": 69.44, "learning_rate": 1.0741229773462785e-05, "loss": 0.0442, "step": 178810 }, { "epoch": 69.44, "learning_rate": 1.0740711974110035e-05, "loss": 0.0116, "step": 178820 }, { "epoch": 69.45, "learning_rate": 1.0740194174757284e-05, "loss": 0.0857, "step": 178830 }, { "epoch": 69.45, "learning_rate": 1.073967637540453e-05, "loss": 0.0857, "step": 178840 }, { "epoch": 69.46, "learning_rate": 1.073915857605178e-05, "loss": 0.0068, "step": 178850 }, { "epoch": 69.46, "learning_rate": 1.073864077669903e-05, "loss": 0.0911, "step": 178860 }, { "epoch": 69.46, "learning_rate": 1.073812297734628e-05, "loss": 0.0927, "step": 178870 }, { "epoch": 69.47, "learning_rate": 1.0737605177993529e-05, "loss": 0.0317, "step": 178880 }, { "epoch": 69.47, "learning_rate": 1.0737087378640779e-05, "loss": 0.0077, "step": 178890 }, { "epoch": 69.48, "learning_rate": 1.0736569579288028e-05, "loss": 0.0222, "step": 178900 }, { "epoch": 69.48, "learning_rate": 1.0736051779935278e-05, "loss": 0.0424, "step": 178910 }, { "epoch": 69.48, "learning_rate": 1.0735533980582524e-05, "loss": 0.0542, "step": 178920 }, { "epoch": 69.49, "learning_rate": 1.0735016181229774e-05, "loss": 0.0371, "step": 178930 }, { "epoch": 69.49, "learning_rate": 1.0734498381877023e-05, "loss": 0.0099, "step": 178940 }, { "epoch": 69.5, "learning_rate": 1.0733980582524273e-05, "loss": 0.0036, "step": 178950 }, { "epoch": 69.5, "learning_rate": 1.0733462783171522e-05, "loss": 0.0405, "step": 178960 }, { "epoch": 69.5, "learning_rate": 1.0732944983818772e-05, "loss": 0.0015, "step": 178970 }, { "epoch": 69.51, "learning_rate": 1.0732427184466022e-05, "loss": 0.0234, "step": 178980 }, { "epoch": 69.51, "learning_rate": 1.0731909385113271e-05, "loss": 0.1003, "step": 178990 }, { "epoch": 69.51, "learning_rate": 1.0731391585760518e-05, "loss": 0.0623, "step": 179000 }, { "epoch": 69.52, "learning_rate": 1.0730873786407767e-05, "loss": 0.0625, "step": 179010 }, { "epoch": 69.52, "learning_rate": 1.0730355987055017e-05, "loss": 0.023, "step": 179020 }, { "epoch": 69.53, "learning_rate": 1.0729838187702266e-05, "loss": 0.0514, "step": 179030 }, { "epoch": 69.53, "learning_rate": 1.0729320388349516e-05, "loss": 0.0327, "step": 179040 }, { "epoch": 69.53, "learning_rate": 1.0728802588996766e-05, "loss": 0.0703, "step": 179050 }, { "epoch": 69.54, "learning_rate": 1.0728284789644015e-05, "loss": 0.1096, "step": 179060 }, { "epoch": 69.54, "learning_rate": 1.0727766990291263e-05, "loss": 0.0177, "step": 179070 }, { "epoch": 69.55, "learning_rate": 1.0727249190938511e-05, "loss": 0.0192, "step": 179080 }, { "epoch": 69.55, "learning_rate": 1.072673139158576e-05, "loss": 0.0768, "step": 179090 }, { "epoch": 69.55, "learning_rate": 1.072621359223301e-05, "loss": 0.0889, "step": 179100 }, { "epoch": 69.56, "learning_rate": 1.072569579288026e-05, "loss": 0.1635, "step": 179110 }, { "epoch": 69.56, "learning_rate": 1.072517799352751e-05, "loss": 0.0406, "step": 179120 }, { "epoch": 69.57, "learning_rate": 1.072466019417476e-05, "loss": 0.015, "step": 179130 }, { "epoch": 69.57, "learning_rate": 1.0724142394822009e-05, "loss": 0.0315, "step": 179140 }, { "epoch": 69.57, "learning_rate": 1.0723624595469257e-05, "loss": 0.0311, "step": 179150 }, { "epoch": 69.58, "learning_rate": 1.0723106796116505e-05, "loss": 0.1396, "step": 179160 }, { "epoch": 69.58, "learning_rate": 1.0722588996763754e-05, "loss": 0.0037, "step": 179170 }, { "epoch": 69.58, "learning_rate": 1.0722071197411004e-05, "loss": 0.0996, "step": 179180 }, { "epoch": 69.59, "learning_rate": 1.0721553398058254e-05, "loss": 0.1059, "step": 179190 }, { "epoch": 69.59, "learning_rate": 1.0721035598705503e-05, "loss": 0.0001, "step": 179200 }, { "epoch": 69.6, "learning_rate": 1.0720517799352753e-05, "loss": 0.0086, "step": 179210 }, { "epoch": 69.6, "learning_rate": 1.072e-05, "loss": 0.0029, "step": 179220 }, { "epoch": 69.6, "learning_rate": 1.071948220064725e-05, "loss": 0.0936, "step": 179230 }, { "epoch": 69.61, "learning_rate": 1.0718964401294498e-05, "loss": 0.0296, "step": 179240 }, { "epoch": 69.61, "learning_rate": 1.0718446601941748e-05, "loss": 0.0968, "step": 179250 }, { "epoch": 69.62, "learning_rate": 1.0717928802588997e-05, "loss": 0.0793, "step": 179260 }, { "epoch": 69.62, "learning_rate": 1.0717411003236247e-05, "loss": 0.2012, "step": 179270 }, { "epoch": 69.62, "learning_rate": 1.0716893203883497e-05, "loss": 0.0627, "step": 179280 }, { "epoch": 69.63, "learning_rate": 1.0716375404530746e-05, "loss": 0.0525, "step": 179290 }, { "epoch": 69.63, "learning_rate": 1.0715857605177994e-05, "loss": 0.0552, "step": 179300 }, { "epoch": 69.63, "learning_rate": 1.0715339805825244e-05, "loss": 0.1928, "step": 179310 }, { "epoch": 69.64, "learning_rate": 1.0714822006472492e-05, "loss": 0.0413, "step": 179320 }, { "epoch": 69.64, "learning_rate": 1.0714304207119741e-05, "loss": 0.0633, "step": 179330 }, { "epoch": 69.65, "learning_rate": 1.0713786407766991e-05, "loss": 0.0962, "step": 179340 }, { "epoch": 69.65, "learning_rate": 1.071326860841424e-05, "loss": 0.1175, "step": 179350 }, { "epoch": 69.65, "learning_rate": 1.071275080906149e-05, "loss": 0.1767, "step": 179360 }, { "epoch": 69.66, "learning_rate": 1.0712233009708738e-05, "loss": 0.0007, "step": 179370 }, { "epoch": 69.66, "learning_rate": 1.0711715210355988e-05, "loss": 0.0738, "step": 179380 }, { "epoch": 69.67, "learning_rate": 1.0711197411003237e-05, "loss": 0.128, "step": 179390 }, { "epoch": 69.67, "learning_rate": 1.0710679611650487e-05, "loss": 0.0399, "step": 179400 }, { "epoch": 69.67, "learning_rate": 1.0710161812297735e-05, "loss": 0.0863, "step": 179410 }, { "epoch": 69.68, "learning_rate": 1.0709644012944985e-05, "loss": 0.0779, "step": 179420 }, { "epoch": 69.68, "learning_rate": 1.0709126213592234e-05, "loss": 0.1155, "step": 179430 }, { "epoch": 69.69, "learning_rate": 1.0708608414239484e-05, "loss": 0.1061, "step": 179440 }, { "epoch": 69.69, "learning_rate": 1.0708090614886732e-05, "loss": 0.0009, "step": 179450 }, { "epoch": 69.69, "learning_rate": 1.0707572815533981e-05, "loss": 0.0429, "step": 179460 }, { "epoch": 69.7, "learning_rate": 1.0707055016181231e-05, "loss": 0.0769, "step": 179470 }, { "epoch": 69.7, "learning_rate": 1.070653721682848e-05, "loss": 0.0032, "step": 179480 }, { "epoch": 69.7, "learning_rate": 1.0706019417475729e-05, "loss": 0.1538, "step": 179490 }, { "epoch": 69.71, "learning_rate": 1.0705501618122978e-05, "loss": 0.2242, "step": 179500 }, { "epoch": 69.71, "learning_rate": 1.0704983818770228e-05, "loss": 0.0618, "step": 179510 }, { "epoch": 69.72, "learning_rate": 1.0704466019417477e-05, "loss": 0.0218, "step": 179520 }, { "epoch": 69.72, "learning_rate": 1.0703948220064725e-05, "loss": 0.0212, "step": 179530 }, { "epoch": 69.72, "learning_rate": 1.0703430420711975e-05, "loss": 0.049, "step": 179540 }, { "epoch": 69.73, "learning_rate": 1.0702912621359225e-05, "loss": 0.0338, "step": 179550 }, { "epoch": 69.73, "learning_rate": 1.0702394822006474e-05, "loss": 0.1114, "step": 179560 }, { "epoch": 69.74, "learning_rate": 1.0701877022653722e-05, "loss": 0.1169, "step": 179570 }, { "epoch": 69.74, "learning_rate": 1.0701359223300972e-05, "loss": 0.1006, "step": 179580 }, { "epoch": 69.74, "learning_rate": 1.0700841423948221e-05, "loss": 0.1587, "step": 179590 }, { "epoch": 69.75, "learning_rate": 1.070032362459547e-05, "loss": 0.169, "step": 179600 }, { "epoch": 69.75, "learning_rate": 1.0699805825242719e-05, "loss": 0.0215, "step": 179610 }, { "epoch": 69.76, "learning_rate": 1.0699288025889968e-05, "loss": 0.0094, "step": 179620 }, { "epoch": 69.76, "learning_rate": 1.0698770226537218e-05, "loss": 0.1559, "step": 179630 }, { "epoch": 69.76, "learning_rate": 1.0698252427184468e-05, "loss": 0.0109, "step": 179640 }, { "epoch": 69.77, "learning_rate": 1.0697734627831716e-05, "loss": 0.1409, "step": 179650 }, { "epoch": 69.77, "learning_rate": 1.0697216828478965e-05, "loss": 0.1355, "step": 179660 }, { "epoch": 69.77, "learning_rate": 1.0696699029126215e-05, "loss": 0.0298, "step": 179670 }, { "epoch": 69.78, "learning_rate": 1.0696181229773463e-05, "loss": 0.1295, "step": 179680 }, { "epoch": 69.78, "learning_rate": 1.0695663430420712e-05, "loss": 0.0692, "step": 179690 }, { "epoch": 69.79, "learning_rate": 1.0695145631067962e-05, "loss": 0.1102, "step": 179700 }, { "epoch": 69.79, "learning_rate": 1.0694627831715212e-05, "loss": 0.0398, "step": 179710 }, { "epoch": 69.79, "learning_rate": 1.0694110032362461e-05, "loss": 0.0611, "step": 179720 }, { "epoch": 69.8, "learning_rate": 1.069359223300971e-05, "loss": 0.1066, "step": 179730 }, { "epoch": 69.8, "learning_rate": 1.0693074433656959e-05, "loss": 0.0552, "step": 179740 }, { "epoch": 69.81, "learning_rate": 1.0692556634304207e-05, "loss": 0.0577, "step": 179750 }, { "epoch": 69.81, "learning_rate": 1.0692038834951456e-05, "loss": 0.0602, "step": 179760 }, { "epoch": 69.81, "learning_rate": 1.0691521035598706e-05, "loss": 0.0356, "step": 179770 }, { "epoch": 69.82, "learning_rate": 1.0691003236245956e-05, "loss": 0.1042, "step": 179780 }, { "epoch": 69.82, "learning_rate": 1.0690485436893205e-05, "loss": 0.0514, "step": 179790 }, { "epoch": 69.83, "learning_rate": 1.0689967637540455e-05, "loss": 0.057, "step": 179800 }, { "epoch": 69.83, "learning_rate": 1.0689449838187703e-05, "loss": 0.0294, "step": 179810 }, { "epoch": 69.83, "learning_rate": 1.0688932038834952e-05, "loss": 0.0002, "step": 179820 }, { "epoch": 69.84, "learning_rate": 1.06884142394822e-05, "loss": 0.0745, "step": 179830 }, { "epoch": 69.84, "learning_rate": 1.068789644012945e-05, "loss": 0.0042, "step": 179840 }, { "epoch": 69.84, "learning_rate": 1.06873786407767e-05, "loss": 0.167, "step": 179850 }, { "epoch": 69.85, "learning_rate": 1.0686860841423949e-05, "loss": 0.009, "step": 179860 }, { "epoch": 69.85, "learning_rate": 1.0686343042071199e-05, "loss": 0.193, "step": 179870 }, { "epoch": 69.86, "learning_rate": 1.0685825242718448e-05, "loss": 0.0712, "step": 179880 }, { "epoch": 69.86, "learning_rate": 1.0685307443365696e-05, "loss": 0.0619, "step": 179890 }, { "epoch": 69.86, "learning_rate": 1.0684789644012944e-05, "loss": 0.0177, "step": 179900 }, { "epoch": 69.87, "learning_rate": 1.0684271844660194e-05, "loss": 0.0033, "step": 179910 }, { "epoch": 69.87, "learning_rate": 1.0683754045307443e-05, "loss": 0.0528, "step": 179920 }, { "epoch": 69.88, "learning_rate": 1.0683236245954693e-05, "loss": 0.0754, "step": 179930 }, { "epoch": 69.88, "learning_rate": 1.0682718446601943e-05, "loss": 0.1956, "step": 179940 }, { "epoch": 69.88, "learning_rate": 1.0682200647249192e-05, "loss": 0.0623, "step": 179950 }, { "epoch": 69.89, "learning_rate": 1.0681682847896442e-05, "loss": 0.0167, "step": 179960 }, { "epoch": 69.89, "learning_rate": 1.0681165048543692e-05, "loss": 0.1324, "step": 179970 }, { "epoch": 69.9, "learning_rate": 1.0680647249190938e-05, "loss": 0.0345, "step": 179980 }, { "epoch": 69.9, "learning_rate": 1.0680129449838187e-05, "loss": 0.0711, "step": 179990 }, { "epoch": 69.9, "learning_rate": 1.0679611650485437e-05, "loss": 0.0772, "step": 180000 }, { "epoch": 69.91, "learning_rate": 1.0679093851132687e-05, "loss": 0.0465, "step": 180010 }, { "epoch": 69.91, "learning_rate": 1.0678576051779936e-05, "loss": 0.0376, "step": 180020 }, { "epoch": 69.91, "learning_rate": 1.0678058252427186e-05, "loss": 0.0316, "step": 180030 }, { "epoch": 69.92, "learning_rate": 1.0677540453074435e-05, "loss": 0.0507, "step": 180040 }, { "epoch": 69.92, "learning_rate": 1.0677022653721685e-05, "loss": 0.1756, "step": 180050 }, { "epoch": 69.93, "learning_rate": 1.0676504854368931e-05, "loss": 0.1412, "step": 180060 }, { "epoch": 69.93, "learning_rate": 1.0675987055016181e-05, "loss": 0.0138, "step": 180070 }, { "epoch": 69.93, "learning_rate": 1.067546925566343e-05, "loss": 0.0967, "step": 180080 }, { "epoch": 69.94, "learning_rate": 1.067495145631068e-05, "loss": 0.0026, "step": 180090 }, { "epoch": 69.94, "learning_rate": 1.067443365695793e-05, "loss": 0.0295, "step": 180100 }, { "epoch": 69.95, "learning_rate": 1.067391585760518e-05, "loss": 0.0279, "step": 180110 }, { "epoch": 69.95, "learning_rate": 1.0673398058252429e-05, "loss": 0.0883, "step": 180120 }, { "epoch": 69.95, "learning_rate": 1.0672880258899679e-05, "loss": 0.0196, "step": 180130 }, { "epoch": 69.96, "learning_rate": 1.0672362459546925e-05, "loss": 0.0159, "step": 180140 }, { "epoch": 69.96, "learning_rate": 1.0671844660194175e-05, "loss": 0.0602, "step": 180150 }, { "epoch": 69.97, "learning_rate": 1.0671326860841424e-05, "loss": 0.0699, "step": 180160 }, { "epoch": 69.97, "learning_rate": 1.0670809061488674e-05, "loss": 0.1291, "step": 180170 }, { "epoch": 69.97, "learning_rate": 1.0670291262135923e-05, "loss": 0.0144, "step": 180180 }, { "epoch": 69.98, "learning_rate": 1.0669773462783173e-05, "loss": 0.0077, "step": 180190 }, { "epoch": 69.98, "learning_rate": 1.0669255663430423e-05, "loss": 0.1981, "step": 180200 }, { "epoch": 69.98, "learning_rate": 1.0668737864077672e-05, "loss": 0.0833, "step": 180210 }, { "epoch": 69.99, "learning_rate": 1.0668220064724918e-05, "loss": 0.1865, "step": 180220 }, { "epoch": 69.99, "learning_rate": 1.0667702265372168e-05, "loss": 0.005, "step": 180230 }, { "epoch": 70.0, "learning_rate": 1.0667184466019418e-05, "loss": 0.2284, "step": 180240 }, { "epoch": 70.0, "learning_rate": 1.0666666666666667e-05, "loss": 0.0535, "step": 180250 }, { "epoch": 70.0, "eval_accuracy": 0.9499312242090784, "eval_loss": 0.3557586073875427, "eval_runtime": 8.2114, "eval_samples_per_second": 442.677, "eval_steps_per_second": 55.411, "step": 180250 }, { "epoch": 70.0, "learning_rate": 1.0666148867313917e-05, "loss": 0.0776, "step": 180260 }, { "epoch": 70.01, "learning_rate": 1.0665631067961167e-05, "loss": 0.0792, "step": 180270 }, { "epoch": 70.01, "learning_rate": 1.0665113268608416e-05, "loss": 0.0006, "step": 180280 }, { "epoch": 70.02, "learning_rate": 1.0664595469255666e-05, "loss": 0.0909, "step": 180290 }, { "epoch": 70.02, "learning_rate": 1.0664077669902912e-05, "loss": 0.044, "step": 180300 }, { "epoch": 70.02, "learning_rate": 1.0663559870550162e-05, "loss": 0.0509, "step": 180310 }, { "epoch": 70.03, "learning_rate": 1.0663042071197411e-05, "loss": 0.0522, "step": 180320 }, { "epoch": 70.03, "learning_rate": 1.0662524271844661e-05, "loss": 0.0662, "step": 180330 }, { "epoch": 70.03, "learning_rate": 1.066200647249191e-05, "loss": 0.1108, "step": 180340 }, { "epoch": 70.04, "learning_rate": 1.066148867313916e-05, "loss": 0.0607, "step": 180350 }, { "epoch": 70.04, "learning_rate": 1.066097087378641e-05, "loss": 0.0527, "step": 180360 }, { "epoch": 70.05, "learning_rate": 1.066045307443366e-05, "loss": 0.0929, "step": 180370 }, { "epoch": 70.05, "learning_rate": 1.0659935275080906e-05, "loss": 0.0645, "step": 180380 }, { "epoch": 70.05, "learning_rate": 1.0659417475728155e-05, "loss": 0.0156, "step": 180390 }, { "epoch": 70.06, "learning_rate": 1.0658899676375405e-05, "loss": 0.1432, "step": 180400 }, { "epoch": 70.06, "learning_rate": 1.0658381877022654e-05, "loss": 0.0607, "step": 180410 }, { "epoch": 70.07, "learning_rate": 1.0657864077669904e-05, "loss": 0.1211, "step": 180420 }, { "epoch": 70.07, "learning_rate": 1.0657346278317154e-05, "loss": 0.0047, "step": 180430 }, { "epoch": 70.07, "learning_rate": 1.0656828478964403e-05, "loss": 0.0951, "step": 180440 }, { "epoch": 70.08, "learning_rate": 1.0656310679611653e-05, "loss": 0.1895, "step": 180450 }, { "epoch": 70.08, "learning_rate": 1.0655792880258899e-05, "loss": 0.0253, "step": 180460 }, { "epoch": 70.09, "learning_rate": 1.0655275080906149e-05, "loss": 0.125, "step": 180470 }, { "epoch": 70.09, "learning_rate": 1.0654757281553398e-05, "loss": 0.0908, "step": 180480 }, { "epoch": 70.09, "learning_rate": 1.0654239482200648e-05, "loss": 0.0676, "step": 180490 }, { "epoch": 70.1, "learning_rate": 1.0653721682847898e-05, "loss": 0.1533, "step": 180500 }, { "epoch": 70.1, "learning_rate": 1.0653203883495147e-05, "loss": 0.0626, "step": 180510 }, { "epoch": 70.1, "learning_rate": 1.0652686084142397e-05, "loss": 0.0676, "step": 180520 }, { "epoch": 70.11, "learning_rate": 1.0652168284789646e-05, "loss": 0.0458, "step": 180530 }, { "epoch": 70.11, "learning_rate": 1.0651650485436894e-05, "loss": 0.0576, "step": 180540 }, { "epoch": 70.12, "learning_rate": 1.0651132686084142e-05, "loss": 0.0674, "step": 180550 }, { "epoch": 70.12, "learning_rate": 1.0650614886731392e-05, "loss": 0.0064, "step": 180560 }, { "epoch": 70.12, "learning_rate": 1.0650097087378642e-05, "loss": 0.0322, "step": 180570 }, { "epoch": 70.13, "learning_rate": 1.0649579288025891e-05, "loss": 0.1584, "step": 180580 }, { "epoch": 70.13, "learning_rate": 1.064906148867314e-05, "loss": 0.018, "step": 180590 }, { "epoch": 70.14, "learning_rate": 1.064854368932039e-05, "loss": 0.0612, "step": 180600 }, { "epoch": 70.14, "learning_rate": 1.064802588996764e-05, "loss": 0.0768, "step": 180610 }, { "epoch": 70.14, "learning_rate": 1.0647508090614888e-05, "loss": 0.0011, "step": 180620 }, { "epoch": 70.15, "learning_rate": 1.0646990291262136e-05, "loss": 0.0725, "step": 180630 }, { "epoch": 70.15, "learning_rate": 1.0646472491909385e-05, "loss": 0.033, "step": 180640 }, { "epoch": 70.16, "learning_rate": 1.0645954692556635e-05, "loss": 0.0915, "step": 180650 }, { "epoch": 70.16, "learning_rate": 1.0645436893203885e-05, "loss": 0.0632, "step": 180660 }, { "epoch": 70.16, "learning_rate": 1.0644919093851134e-05, "loss": 0.0752, "step": 180670 }, { "epoch": 70.17, "learning_rate": 1.0644401294498384e-05, "loss": 0.0688, "step": 180680 }, { "epoch": 70.17, "learning_rate": 1.0643883495145632e-05, "loss": 0.0155, "step": 180690 }, { "epoch": 70.17, "learning_rate": 1.0643365695792881e-05, "loss": 0.0709, "step": 180700 }, { "epoch": 70.18, "learning_rate": 1.064284789644013e-05, "loss": 0.1874, "step": 180710 }, { "epoch": 70.18, "learning_rate": 1.0642330097087379e-05, "loss": 0.0868, "step": 180720 }, { "epoch": 70.19, "learning_rate": 1.0641812297734629e-05, "loss": 0.1348, "step": 180730 }, { "epoch": 70.19, "learning_rate": 1.0641294498381878e-05, "loss": 0.039, "step": 180740 }, { "epoch": 70.19, "learning_rate": 1.0640776699029128e-05, "loss": 0.0343, "step": 180750 }, { "epoch": 70.2, "learning_rate": 1.0640258899676378e-05, "loss": 0.0025, "step": 180760 }, { "epoch": 70.2, "learning_rate": 1.0639741100323625e-05, "loss": 0.0929, "step": 180770 }, { "epoch": 70.21, "learning_rate": 1.0639223300970875e-05, "loss": 0.0931, "step": 180780 }, { "epoch": 70.21, "learning_rate": 1.0638705501618123e-05, "loss": 0.0076, "step": 180790 }, { "epoch": 70.21, "learning_rate": 1.0638187702265373e-05, "loss": 0.0563, "step": 180800 }, { "epoch": 70.22, "learning_rate": 1.0637669902912622e-05, "loss": 0.1776, "step": 180810 }, { "epoch": 70.22, "learning_rate": 1.0637152103559872e-05, "loss": 0.1674, "step": 180820 }, { "epoch": 70.23, "learning_rate": 1.0636634304207121e-05, "loss": 0.1463, "step": 180830 }, { "epoch": 70.23, "learning_rate": 1.063611650485437e-05, "loss": 0.0486, "step": 180840 }, { "epoch": 70.23, "learning_rate": 1.0635598705501619e-05, "loss": 0.0281, "step": 180850 }, { "epoch": 70.24, "learning_rate": 1.0635080906148869e-05, "loss": 0.0035, "step": 180860 }, { "epoch": 70.24, "learning_rate": 1.0634563106796117e-05, "loss": 0.0187, "step": 180870 }, { "epoch": 70.24, "learning_rate": 1.0634045307443366e-05, "loss": 0.091, "step": 180880 }, { "epoch": 70.25, "learning_rate": 1.0633527508090616e-05, "loss": 0.0641, "step": 180890 }, { "epoch": 70.25, "learning_rate": 1.0633009708737865e-05, "loss": 0.1093, "step": 180900 }, { "epoch": 70.26, "learning_rate": 1.0632491909385115e-05, "loss": 0.1469, "step": 180910 }, { "epoch": 70.26, "learning_rate": 1.0631974110032363e-05, "loss": 0.0112, "step": 180920 }, { "epoch": 70.26, "learning_rate": 1.0631456310679613e-05, "loss": 0.0728, "step": 180930 }, { "epoch": 70.27, "learning_rate": 1.0630938511326862e-05, "loss": 0.015, "step": 180940 }, { "epoch": 70.27, "learning_rate": 1.063042071197411e-05, "loss": 0.0371, "step": 180950 }, { "epoch": 70.28, "learning_rate": 1.062990291262136e-05, "loss": 0.0141, "step": 180960 }, { "epoch": 70.28, "learning_rate": 1.062938511326861e-05, "loss": 0.0523, "step": 180970 }, { "epoch": 70.28, "learning_rate": 1.0628867313915859e-05, "loss": 0.185, "step": 180980 }, { "epoch": 70.29, "learning_rate": 1.0628349514563107e-05, "loss": 0.0559, "step": 180990 }, { "epoch": 70.29, "learning_rate": 1.0627831715210356e-05, "loss": 0.1456, "step": 181000 }, { "epoch": 70.3, "learning_rate": 1.0627313915857606e-05, "loss": 0.1343, "step": 181010 }, { "epoch": 70.3, "learning_rate": 1.0626796116504856e-05, "loss": 0.0243, "step": 181020 }, { "epoch": 70.3, "learning_rate": 1.0626278317152104e-05, "loss": 0.0198, "step": 181030 }, { "epoch": 70.31, "learning_rate": 1.0625760517799353e-05, "loss": 0.0022, "step": 181040 }, { "epoch": 70.31, "learning_rate": 1.0625242718446603e-05, "loss": 0.0457, "step": 181050 }, { "epoch": 70.31, "learning_rate": 1.0624724919093852e-05, "loss": 0.1107, "step": 181060 }, { "epoch": 70.32, "learning_rate": 1.06242071197411e-05, "loss": 0.1717, "step": 181070 }, { "epoch": 70.32, "learning_rate": 1.062368932038835e-05, "loss": 0.0457, "step": 181080 }, { "epoch": 70.33, "learning_rate": 1.06231715210356e-05, "loss": 0.092, "step": 181090 }, { "epoch": 70.33, "learning_rate": 1.062265372168285e-05, "loss": 0.0513, "step": 181100 }, { "epoch": 70.33, "learning_rate": 1.0622135922330099e-05, "loss": 0.0384, "step": 181110 }, { "epoch": 70.34, "learning_rate": 1.0621618122977347e-05, "loss": 0.0658, "step": 181120 }, { "epoch": 70.34, "learning_rate": 1.0621100323624596e-05, "loss": 0.0604, "step": 181130 }, { "epoch": 70.35, "learning_rate": 1.0620582524271846e-05, "loss": 0.084, "step": 181140 }, { "epoch": 70.35, "learning_rate": 1.0620064724919094e-05, "loss": 0.0428, "step": 181150 }, { "epoch": 70.35, "learning_rate": 1.0619546925566344e-05, "loss": 0.0741, "step": 181160 }, { "epoch": 70.36, "learning_rate": 1.0619029126213593e-05, "loss": 0.0411, "step": 181170 }, { "epoch": 70.36, "learning_rate": 1.0618511326860843e-05, "loss": 0.0358, "step": 181180 }, { "epoch": 70.37, "learning_rate": 1.0617993527508092e-05, "loss": 0.1684, "step": 181190 }, { "epoch": 70.37, "learning_rate": 1.061747572815534e-05, "loss": 0.0372, "step": 181200 }, { "epoch": 70.37, "learning_rate": 1.061695792880259e-05, "loss": 0.0321, "step": 181210 }, { "epoch": 70.38, "learning_rate": 1.0616440129449838e-05, "loss": 0.1459, "step": 181220 }, { "epoch": 70.38, "learning_rate": 1.0615922330097088e-05, "loss": 0.0325, "step": 181230 }, { "epoch": 70.38, "learning_rate": 1.0615404530744337e-05, "loss": 0.023, "step": 181240 }, { "epoch": 70.39, "learning_rate": 1.0614886731391587e-05, "loss": 0.1089, "step": 181250 }, { "epoch": 70.39, "learning_rate": 1.0614368932038836e-05, "loss": 0.102, "step": 181260 }, { "epoch": 70.4, "learning_rate": 1.0613851132686086e-05, "loss": 0.064, "step": 181270 }, { "epoch": 70.4, "learning_rate": 1.0613333333333334e-05, "loss": 0.2323, "step": 181280 }, { "epoch": 70.4, "learning_rate": 1.0612815533980584e-05, "loss": 0.0232, "step": 181290 }, { "epoch": 70.41, "learning_rate": 1.0612297734627831e-05, "loss": 0.1379, "step": 181300 }, { "epoch": 70.41, "learning_rate": 1.0611779935275081e-05, "loss": 0.0997, "step": 181310 }, { "epoch": 70.42, "learning_rate": 1.061126213592233e-05, "loss": 0.0448, "step": 181320 }, { "epoch": 70.42, "learning_rate": 1.061074433656958e-05, "loss": 0.1354, "step": 181330 }, { "epoch": 70.42, "learning_rate": 1.061022653721683e-05, "loss": 0.0324, "step": 181340 }, { "epoch": 70.43, "learning_rate": 1.060970873786408e-05, "loss": 0.0649, "step": 181350 }, { "epoch": 70.43, "learning_rate": 1.0609190938511327e-05, "loss": 0.0132, "step": 181360 }, { "epoch": 70.43, "learning_rate": 1.0608673139158575e-05, "loss": 0.005, "step": 181370 }, { "epoch": 70.44, "learning_rate": 1.0608155339805825e-05, "loss": 0.0269, "step": 181380 }, { "epoch": 70.44, "learning_rate": 1.0607637540453075e-05, "loss": 0.0618, "step": 181390 }, { "epoch": 70.45, "learning_rate": 1.0607119741100324e-05, "loss": 0.0234, "step": 181400 }, { "epoch": 70.45, "learning_rate": 1.0606601941747574e-05, "loss": 0.0548, "step": 181410 }, { "epoch": 70.45, "learning_rate": 1.0606084142394823e-05, "loss": 0.0654, "step": 181420 }, { "epoch": 70.46, "learning_rate": 1.0605566343042073e-05, "loss": 0.0294, "step": 181430 }, { "epoch": 70.46, "learning_rate": 1.0605048543689321e-05, "loss": 0.0181, "step": 181440 }, { "epoch": 70.47, "learning_rate": 1.0604530744336569e-05, "loss": 0.0012, "step": 181450 }, { "epoch": 70.47, "learning_rate": 1.0604012944983819e-05, "loss": 0.1554, "step": 181460 }, { "epoch": 70.47, "learning_rate": 1.0603495145631068e-05, "loss": 0.0886, "step": 181470 }, { "epoch": 70.48, "learning_rate": 1.0602977346278318e-05, "loss": 0.0501, "step": 181480 }, { "epoch": 70.48, "learning_rate": 1.0602459546925567e-05, "loss": 0.0204, "step": 181490 }, { "epoch": 70.49, "learning_rate": 1.0601941747572817e-05, "loss": 0.0053, "step": 181500 }, { "epoch": 70.49, "learning_rate": 1.0601423948220067e-05, "loss": 0.0004, "step": 181510 }, { "epoch": 70.49, "learning_rate": 1.0600906148867313e-05, "loss": 0.0781, "step": 181520 }, { "epoch": 70.5, "learning_rate": 1.0600388349514563e-05, "loss": 0.0771, "step": 181530 }, { "epoch": 70.5, "learning_rate": 1.0599870550161812e-05, "loss": 0.0046, "step": 181540 }, { "epoch": 70.5, "learning_rate": 1.0599352750809062e-05, "loss": 0.1046, "step": 181550 }, { "epoch": 70.51, "learning_rate": 1.0598834951456311e-05, "loss": 0.21, "step": 181560 }, { "epoch": 70.51, "learning_rate": 1.0598317152103561e-05, "loss": 0.0298, "step": 181570 }, { "epoch": 70.52, "learning_rate": 1.059779935275081e-05, "loss": 0.0729, "step": 181580 }, { "epoch": 70.52, "learning_rate": 1.059728155339806e-05, "loss": 0.0074, "step": 181590 }, { "epoch": 70.52, "learning_rate": 1.0596763754045306e-05, "loss": 0.0262, "step": 181600 }, { "epoch": 70.53, "learning_rate": 1.0596245954692556e-05, "loss": 0.0331, "step": 181610 }, { "epoch": 70.53, "learning_rate": 1.0595728155339806e-05, "loss": 0.0796, "step": 181620 }, { "epoch": 70.54, "learning_rate": 1.0595210355987055e-05, "loss": 0.0408, "step": 181630 }, { "epoch": 70.54, "learning_rate": 1.0594692556634305e-05, "loss": 0.0032, "step": 181640 }, { "epoch": 70.54, "learning_rate": 1.0594174757281555e-05, "loss": 0.0888, "step": 181650 }, { "epoch": 70.55, "learning_rate": 1.0593656957928804e-05, "loss": 0.1528, "step": 181660 }, { "epoch": 70.55, "learning_rate": 1.0593139158576054e-05, "loss": 0.2742, "step": 181670 }, { "epoch": 70.56, "learning_rate": 1.0592621359223303e-05, "loss": 0.0995, "step": 181680 }, { "epoch": 70.56, "learning_rate": 1.059210355987055e-05, "loss": 0.0751, "step": 181690 }, { "epoch": 70.56, "learning_rate": 1.05915857605178e-05, "loss": 0.0818, "step": 181700 }, { "epoch": 70.57, "learning_rate": 1.0591067961165049e-05, "loss": 0.0878, "step": 181710 }, { "epoch": 70.57, "learning_rate": 1.0590550161812298e-05, "loss": 0.0602, "step": 181720 }, { "epoch": 70.57, "learning_rate": 1.0590032362459548e-05, "loss": 0.0711, "step": 181730 }, { "epoch": 70.58, "learning_rate": 1.0589514563106798e-05, "loss": 0.0392, "step": 181740 }, { "epoch": 70.58, "learning_rate": 1.0588996763754047e-05, "loss": 0.0599, "step": 181750 }, { "epoch": 70.59, "learning_rate": 1.0588478964401297e-05, "loss": 0.2649, "step": 181760 }, { "epoch": 70.59, "learning_rate": 1.0587961165048543e-05, "loss": 0.0328, "step": 181770 }, { "epoch": 70.59, "learning_rate": 1.0587443365695793e-05, "loss": 0.0075, "step": 181780 }, { "epoch": 70.6, "learning_rate": 1.0586925566343042e-05, "loss": 0.1296, "step": 181790 }, { "epoch": 70.6, "learning_rate": 1.0586407766990292e-05, "loss": 0.0169, "step": 181800 }, { "epoch": 70.61, "learning_rate": 1.0585889967637542e-05, "loss": 0.0536, "step": 181810 }, { "epoch": 70.61, "learning_rate": 1.0585372168284791e-05, "loss": 0.1202, "step": 181820 }, { "epoch": 70.61, "learning_rate": 1.0584854368932041e-05, "loss": 0.0007, "step": 181830 }, { "epoch": 70.62, "learning_rate": 1.058433656957929e-05, "loss": 0.1145, "step": 181840 }, { "epoch": 70.62, "learning_rate": 1.0583818770226537e-05, "loss": 0.0289, "step": 181850 }, { "epoch": 70.63, "learning_rate": 1.0583300970873786e-05, "loss": 0.0397, "step": 181860 }, { "epoch": 70.63, "learning_rate": 1.0582783171521036e-05, "loss": 0.202, "step": 181870 }, { "epoch": 70.63, "learning_rate": 1.0582265372168286e-05, "loss": 0.0678, "step": 181880 }, { "epoch": 70.64, "learning_rate": 1.0581747572815535e-05, "loss": 0.0312, "step": 181890 }, { "epoch": 70.64, "learning_rate": 1.0581229773462785e-05, "loss": 0.0941, "step": 181900 }, { "epoch": 70.64, "learning_rate": 1.0580711974110034e-05, "loss": 0.0384, "step": 181910 }, { "epoch": 70.65, "learning_rate": 1.0580194174757284e-05, "loss": 0.1796, "step": 181920 }, { "epoch": 70.65, "learning_rate": 1.057967637540453e-05, "loss": 0.0642, "step": 181930 }, { "epoch": 70.66, "learning_rate": 1.057915857605178e-05, "loss": 0.0766, "step": 181940 }, { "epoch": 70.66, "learning_rate": 1.057864077669903e-05, "loss": 0.0739, "step": 181950 }, { "epoch": 70.66, "learning_rate": 1.057812297734628e-05, "loss": 0.1495, "step": 181960 }, { "epoch": 70.67, "learning_rate": 1.0577605177993529e-05, "loss": 0.0088, "step": 181970 }, { "epoch": 70.67, "learning_rate": 1.0577087378640778e-05, "loss": 0.1097, "step": 181980 }, { "epoch": 70.68, "learning_rate": 1.0576569579288028e-05, "loss": 0.1385, "step": 181990 }, { "epoch": 70.68, "learning_rate": 1.0576051779935278e-05, "loss": 0.0446, "step": 182000 }, { "epoch": 70.68, "learning_rate": 1.0575533980582524e-05, "loss": 0.0658, "step": 182010 }, { "epoch": 70.69, "learning_rate": 1.0575016181229773e-05, "loss": 0.0334, "step": 182020 }, { "epoch": 70.69, "learning_rate": 1.0574498381877023e-05, "loss": 0.0145, "step": 182030 }, { "epoch": 70.7, "learning_rate": 1.0573980582524273e-05, "loss": 0.0007, "step": 182040 }, { "epoch": 70.7, "learning_rate": 1.0573462783171522e-05, "loss": 0.0015, "step": 182050 }, { "epoch": 70.7, "learning_rate": 1.0572944983818772e-05, "loss": 0.1117, "step": 182060 }, { "epoch": 70.71, "learning_rate": 1.0572427184466022e-05, "loss": 0.0466, "step": 182070 }, { "epoch": 70.71, "learning_rate": 1.0571909385113271e-05, "loss": 0.0556, "step": 182080 }, { "epoch": 70.71, "learning_rate": 1.0571391585760517e-05, "loss": 0.032, "step": 182090 }, { "epoch": 70.72, "learning_rate": 1.0570873786407767e-05, "loss": 0.1218, "step": 182100 }, { "epoch": 70.72, "learning_rate": 1.0570355987055017e-05, "loss": 0.1239, "step": 182110 }, { "epoch": 70.73, "learning_rate": 1.0569838187702266e-05, "loss": 0.1025, "step": 182120 }, { "epoch": 70.73, "learning_rate": 1.0569320388349516e-05, "loss": 0.0091, "step": 182130 }, { "epoch": 70.73, "learning_rate": 1.0568802588996766e-05, "loss": 0.0691, "step": 182140 }, { "epoch": 70.74, "learning_rate": 1.0568284789644015e-05, "loss": 0.0963, "step": 182150 }, { "epoch": 70.74, "learning_rate": 1.0567766990291263e-05, "loss": 0.1049, "step": 182160 }, { "epoch": 70.75, "learning_rate": 1.0567249190938511e-05, "loss": 0.1234, "step": 182170 }, { "epoch": 70.75, "learning_rate": 1.056673139158576e-05, "loss": 0.0564, "step": 182180 }, { "epoch": 70.75, "learning_rate": 1.056621359223301e-05, "loss": 0.1231, "step": 182190 }, { "epoch": 70.76, "learning_rate": 1.056569579288026e-05, "loss": 0.058, "step": 182200 }, { "epoch": 70.76, "learning_rate": 1.056517799352751e-05, "loss": 0.0496, "step": 182210 }, { "epoch": 70.77, "learning_rate": 1.0564660194174759e-05, "loss": 0.0152, "step": 182220 }, { "epoch": 70.77, "learning_rate": 1.0564142394822009e-05, "loss": 0.0679, "step": 182230 }, { "epoch": 70.77, "learning_rate": 1.0563624595469257e-05, "loss": 0.1545, "step": 182240 }, { "epoch": 70.78, "learning_rate": 1.0563106796116506e-05, "loss": 0.0072, "step": 182250 }, { "epoch": 70.78, "learning_rate": 1.0562588996763754e-05, "loss": 0.1236, "step": 182260 }, { "epoch": 70.78, "learning_rate": 1.0562071197411004e-05, "loss": 0.2174, "step": 182270 }, { "epoch": 70.79, "learning_rate": 1.0561553398058253e-05, "loss": 0.0418, "step": 182280 }, { "epoch": 70.79, "learning_rate": 1.0561035598705503e-05, "loss": 0.0607, "step": 182290 }, { "epoch": 70.8, "learning_rate": 1.0560517799352753e-05, "loss": 0.0709, "step": 182300 }, { "epoch": 70.8, "learning_rate": 1.056e-05, "loss": 0.1377, "step": 182310 }, { "epoch": 70.8, "learning_rate": 1.055948220064725e-05, "loss": 0.0465, "step": 182320 }, { "epoch": 70.81, "learning_rate": 1.05589644012945e-05, "loss": 0.0325, "step": 182330 }, { "epoch": 70.81, "learning_rate": 1.0558446601941748e-05, "loss": 0.065, "step": 182340 }, { "epoch": 70.82, "learning_rate": 1.0557928802588997e-05, "loss": 0.0127, "step": 182350 }, { "epoch": 70.82, "learning_rate": 1.0557411003236247e-05, "loss": 0.0454, "step": 182360 }, { "epoch": 70.82, "learning_rate": 1.0556893203883497e-05, "loss": 0.03, "step": 182370 }, { "epoch": 70.83, "learning_rate": 1.0556375404530746e-05, "loss": 0.0113, "step": 182380 }, { "epoch": 70.83, "learning_rate": 1.0555857605177994e-05, "loss": 0.0504, "step": 182390 }, { "epoch": 70.83, "learning_rate": 1.0555339805825244e-05, "loss": 0.0952, "step": 182400 }, { "epoch": 70.84, "learning_rate": 1.0554822006472493e-05, "loss": 0.0294, "step": 182410 }, { "epoch": 70.84, "learning_rate": 1.0554304207119741e-05, "loss": 0.0558, "step": 182420 }, { "epoch": 70.85, "learning_rate": 1.0553786407766991e-05, "loss": 0.1178, "step": 182430 }, { "epoch": 70.85, "learning_rate": 1.055326860841424e-05, "loss": 0.1503, "step": 182440 }, { "epoch": 70.85, "learning_rate": 1.055275080906149e-05, "loss": 0.1709, "step": 182450 }, { "epoch": 70.86, "learning_rate": 1.0552233009708738e-05, "loss": 0.0143, "step": 182460 }, { "epoch": 70.86, "learning_rate": 1.0551715210355988e-05, "loss": 0.0299, "step": 182470 }, { "epoch": 70.87, "learning_rate": 1.0551197411003237e-05, "loss": 0.0592, "step": 182480 }, { "epoch": 70.87, "learning_rate": 1.0550679611650487e-05, "loss": 0.0814, "step": 182490 }, { "epoch": 70.87, "learning_rate": 1.0550161812297735e-05, "loss": 0.0793, "step": 182500 }, { "epoch": 70.88, "learning_rate": 1.0549644012944984e-05, "loss": 0.0793, "step": 182510 }, { "epoch": 70.88, "learning_rate": 1.0549126213592234e-05, "loss": 0.0534, "step": 182520 }, { "epoch": 70.89, "learning_rate": 1.0548608414239484e-05, "loss": 0.0718, "step": 182530 }, { "epoch": 70.89, "learning_rate": 1.0548090614886732e-05, "loss": 0.0234, "step": 182540 }, { "epoch": 70.89, "learning_rate": 1.0547572815533981e-05, "loss": 0.1224, "step": 182550 }, { "epoch": 70.9, "learning_rate": 1.054705501618123e-05, "loss": 0.1524, "step": 182560 }, { "epoch": 70.9, "learning_rate": 1.054653721682848e-05, "loss": 0.0506, "step": 182570 }, { "epoch": 70.9, "learning_rate": 1.0546019417475728e-05, "loss": 0.0143, "step": 182580 }, { "epoch": 70.91, "learning_rate": 1.0545501618122978e-05, "loss": 0.1028, "step": 182590 }, { "epoch": 70.91, "learning_rate": 1.0544983818770228e-05, "loss": 0.0118, "step": 182600 }, { "epoch": 70.92, "learning_rate": 1.0544466019417477e-05, "loss": 0.0175, "step": 182610 }, { "epoch": 70.92, "learning_rate": 1.0543948220064725e-05, "loss": 0.0527, "step": 182620 }, { "epoch": 70.92, "learning_rate": 1.0543430420711975e-05, "loss": 0.0273, "step": 182630 }, { "epoch": 70.93, "learning_rate": 1.0542912621359224e-05, "loss": 0.0185, "step": 182640 }, { "epoch": 70.93, "learning_rate": 1.0542394822006474e-05, "loss": 0.1144, "step": 182650 }, { "epoch": 70.94, "learning_rate": 1.0541877022653722e-05, "loss": 0.1295, "step": 182660 }, { "epoch": 70.94, "learning_rate": 1.0541359223300972e-05, "loss": 0.0135, "step": 182670 }, { "epoch": 70.94, "learning_rate": 1.0540841423948221e-05, "loss": 0.0197, "step": 182680 }, { "epoch": 70.95, "learning_rate": 1.0540323624595469e-05, "loss": 0.2627, "step": 182690 }, { "epoch": 70.95, "learning_rate": 1.0539805825242719e-05, "loss": 0.1789, "step": 182700 }, { "epoch": 70.96, "learning_rate": 1.0539288025889968e-05, "loss": 0.1568, "step": 182710 }, { "epoch": 70.96, "learning_rate": 1.0538770226537218e-05, "loss": 0.0134, "step": 182720 }, { "epoch": 70.96, "learning_rate": 1.0538252427184468e-05, "loss": 0.0443, "step": 182730 }, { "epoch": 70.97, "learning_rate": 1.0537734627831715e-05, "loss": 0.0037, "step": 182740 }, { "epoch": 70.97, "learning_rate": 1.0537216828478965e-05, "loss": 0.1315, "step": 182750 }, { "epoch": 70.97, "learning_rate": 1.0536699029126215e-05, "loss": 0.0414, "step": 182760 }, { "epoch": 70.98, "learning_rate": 1.0536181229773463e-05, "loss": 0.0014, "step": 182770 }, { "epoch": 70.98, "learning_rate": 1.0535663430420712e-05, "loss": 0.2767, "step": 182780 }, { "epoch": 70.99, "learning_rate": 1.0535145631067962e-05, "loss": 0.131, "step": 182790 }, { "epoch": 70.99, "learning_rate": 1.0534627831715211e-05, "loss": 0.182, "step": 182800 }, { "epoch": 70.99, "learning_rate": 1.0534110032362461e-05, "loss": 0.0684, "step": 182810 }, { "epoch": 71.0, "learning_rate": 1.053359223300971e-05, "loss": 0.0217, "step": 182820 }, { "epoch": 71.0, "eval_accuracy": 0.9488308115543329, "eval_loss": 0.35828545689582825, "eval_runtime": 8.1754, "eval_samples_per_second": 444.628, "eval_steps_per_second": 55.655, "step": 182825 }, { "epoch": 71.0, "learning_rate": 1.0533074433656959e-05, "loss": 0.0338, "step": 182830 }, { "epoch": 71.01, "learning_rate": 1.0532556634304207e-05, "loss": 0.0241, "step": 182840 }, { "epoch": 71.01, "learning_rate": 1.0532038834951456e-05, "loss": 0.1219, "step": 182850 }, { "epoch": 71.01, "learning_rate": 1.0531521035598706e-05, "loss": 0.0288, "step": 182860 }, { "epoch": 71.02, "learning_rate": 1.0531003236245955e-05, "loss": 0.0197, "step": 182870 }, { "epoch": 71.02, "learning_rate": 1.0530485436893205e-05, "loss": 0.0403, "step": 182880 }, { "epoch": 71.03, "learning_rate": 1.0529967637540455e-05, "loss": 0.0246, "step": 182890 }, { "epoch": 71.03, "learning_rate": 1.0529449838187704e-05, "loss": 0.1104, "step": 182900 }, { "epoch": 71.03, "learning_rate": 1.0528932038834952e-05, "loss": 0.1113, "step": 182910 }, { "epoch": 71.04, "learning_rate": 1.05284142394822e-05, "loss": 0.0149, "step": 182920 }, { "epoch": 71.04, "learning_rate": 1.052789644012945e-05, "loss": 0.0733, "step": 182930 }, { "epoch": 71.04, "learning_rate": 1.05273786407767e-05, "loss": 0.0449, "step": 182940 }, { "epoch": 71.05, "learning_rate": 1.0526860841423949e-05, "loss": 0.0306, "step": 182950 }, { "epoch": 71.05, "learning_rate": 1.0526343042071199e-05, "loss": 0.0157, "step": 182960 }, { "epoch": 71.06, "learning_rate": 1.0525825242718448e-05, "loss": 0.0017, "step": 182970 }, { "epoch": 71.06, "learning_rate": 1.0525307443365698e-05, "loss": 0.009, "step": 182980 }, { "epoch": 71.06, "learning_rate": 1.0524789644012944e-05, "loss": 0.1288, "step": 182990 }, { "epoch": 71.07, "learning_rate": 1.0524271844660194e-05, "loss": 0.0353, "step": 183000 }, { "epoch": 71.07, "learning_rate": 1.0523754045307443e-05, "loss": 0.0372, "step": 183010 }, { "epoch": 71.08, "learning_rate": 1.0523236245954693e-05, "loss": 0.0913, "step": 183020 }, { "epoch": 71.08, "learning_rate": 1.0522718446601943e-05, "loss": 0.0389, "step": 183030 }, { "epoch": 71.08, "learning_rate": 1.0522200647249192e-05, "loss": 0.0641, "step": 183040 }, { "epoch": 71.09, "learning_rate": 1.0521682847896442e-05, "loss": 0.0197, "step": 183050 }, { "epoch": 71.09, "learning_rate": 1.0521165048543691e-05, "loss": 0.103, "step": 183060 }, { "epoch": 71.1, "learning_rate": 1.0520647249190938e-05, "loss": 0.1023, "step": 183070 }, { "epoch": 71.1, "learning_rate": 1.0520129449838187e-05, "loss": 0.0596, "step": 183080 }, { "epoch": 71.1, "learning_rate": 1.0519611650485437e-05, "loss": 0.0348, "step": 183090 }, { "epoch": 71.11, "learning_rate": 1.0519093851132686e-05, "loss": 0.0003, "step": 183100 }, { "epoch": 71.11, "learning_rate": 1.0518576051779936e-05, "loss": 0.0225, "step": 183110 }, { "epoch": 71.11, "learning_rate": 1.0518058252427186e-05, "loss": 0.0869, "step": 183120 }, { "epoch": 71.12, "learning_rate": 1.0517540453074435e-05, "loss": 0.1911, "step": 183130 }, { "epoch": 71.12, "learning_rate": 1.0517022653721685e-05, "loss": 0.0004, "step": 183140 }, { "epoch": 71.13, "learning_rate": 1.0516504854368931e-05, "loss": 0.0246, "step": 183150 }, { "epoch": 71.13, "learning_rate": 1.051598705501618e-05, "loss": 0.0969, "step": 183160 }, { "epoch": 71.13, "learning_rate": 1.051546925566343e-05, "loss": 0.0827, "step": 183170 }, { "epoch": 71.14, "learning_rate": 1.051495145631068e-05, "loss": 0.0343, "step": 183180 }, { "epoch": 71.14, "learning_rate": 1.051443365695793e-05, "loss": 0.0929, "step": 183190 }, { "epoch": 71.15, "learning_rate": 1.051391585760518e-05, "loss": 0.0964, "step": 183200 }, { "epoch": 71.15, "learning_rate": 1.0513398058252429e-05, "loss": 0.0972, "step": 183210 }, { "epoch": 71.15, "learning_rate": 1.0512880258899679e-05, "loss": 0.0383, "step": 183220 }, { "epoch": 71.16, "learning_rate": 1.0512362459546925e-05, "loss": 0.209, "step": 183230 }, { "epoch": 71.16, "learning_rate": 1.0511844660194174e-05, "loss": 0.0594, "step": 183240 }, { "epoch": 71.17, "learning_rate": 1.0511326860841424e-05, "loss": 0.0746, "step": 183250 }, { "epoch": 71.17, "learning_rate": 1.0510809061488674e-05, "loss": 0.0955, "step": 183260 }, { "epoch": 71.17, "learning_rate": 1.0510291262135923e-05, "loss": 0.0318, "step": 183270 }, { "epoch": 71.18, "learning_rate": 1.0509773462783173e-05, "loss": 0.1788, "step": 183280 }, { "epoch": 71.18, "learning_rate": 1.0509255663430422e-05, "loss": 0.103, "step": 183290 }, { "epoch": 71.18, "learning_rate": 1.0508737864077672e-05, "loss": 0.0325, "step": 183300 }, { "epoch": 71.19, "learning_rate": 1.0508220064724918e-05, "loss": 0.0011, "step": 183310 }, { "epoch": 71.19, "learning_rate": 1.0507702265372168e-05, "loss": 0.0914, "step": 183320 }, { "epoch": 71.2, "learning_rate": 1.0507184466019418e-05, "loss": 0.0532, "step": 183330 }, { "epoch": 71.2, "learning_rate": 1.0506666666666667e-05, "loss": 0.0183, "step": 183340 }, { "epoch": 71.2, "learning_rate": 1.0506148867313917e-05, "loss": 0.0173, "step": 183350 }, { "epoch": 71.21, "learning_rate": 1.0505631067961166e-05, "loss": 0.1288, "step": 183360 }, { "epoch": 71.21, "learning_rate": 1.0505113268608416e-05, "loss": 0.028, "step": 183370 }, { "epoch": 71.22, "learning_rate": 1.0504595469255666e-05, "loss": 0.0798, "step": 183380 }, { "epoch": 71.22, "learning_rate": 1.0504077669902915e-05, "loss": 0.0026, "step": 183390 }, { "epoch": 71.22, "learning_rate": 1.0503559870550161e-05, "loss": 0.1988, "step": 183400 }, { "epoch": 71.23, "learning_rate": 1.0503042071197411e-05, "loss": 0.0904, "step": 183410 }, { "epoch": 71.23, "learning_rate": 1.050252427184466e-05, "loss": 0.0959, "step": 183420 }, { "epoch": 71.23, "learning_rate": 1.050200647249191e-05, "loss": 0.0828, "step": 183430 }, { "epoch": 71.24, "learning_rate": 1.050148867313916e-05, "loss": 0.1064, "step": 183440 }, { "epoch": 71.24, "learning_rate": 1.050097087378641e-05, "loss": 0.1591, "step": 183450 }, { "epoch": 71.25, "learning_rate": 1.050045307443366e-05, "loss": 0.0582, "step": 183460 }, { "epoch": 71.25, "learning_rate": 1.0499935275080909e-05, "loss": 0.0207, "step": 183470 }, { "epoch": 71.25, "learning_rate": 1.0499417475728155e-05, "loss": 0.0148, "step": 183480 }, { "epoch": 71.26, "learning_rate": 1.0498899676375405e-05, "loss": 0.1408, "step": 183490 }, { "epoch": 71.26, "learning_rate": 1.0498381877022654e-05, "loss": 0.0765, "step": 183500 }, { "epoch": 71.27, "learning_rate": 1.0497864077669904e-05, "loss": 0.0649, "step": 183510 }, { "epoch": 71.27, "learning_rate": 1.0497346278317153e-05, "loss": 0.1207, "step": 183520 }, { "epoch": 71.27, "learning_rate": 1.0496828478964403e-05, "loss": 0.1496, "step": 183530 }, { "epoch": 71.28, "learning_rate": 1.0496310679611653e-05, "loss": 0.0654, "step": 183540 }, { "epoch": 71.28, "learning_rate": 1.0495792880258902e-05, "loss": 0.0734, "step": 183550 }, { "epoch": 71.29, "learning_rate": 1.0495275080906149e-05, "loss": 0.1172, "step": 183560 }, { "epoch": 71.29, "learning_rate": 1.0494757281553398e-05, "loss": 0.021, "step": 183570 }, { "epoch": 71.29, "learning_rate": 1.0494239482200648e-05, "loss": 0.1245, "step": 183580 }, { "epoch": 71.3, "learning_rate": 1.0493721682847897e-05, "loss": 0.0475, "step": 183590 }, { "epoch": 71.3, "learning_rate": 1.0493203883495147e-05, "loss": 0.0441, "step": 183600 }, { "epoch": 71.3, "learning_rate": 1.0492686084142397e-05, "loss": 0.0645, "step": 183610 }, { "epoch": 71.31, "learning_rate": 1.0492168284789646e-05, "loss": 0.0123, "step": 183620 }, { "epoch": 71.31, "learning_rate": 1.0491650485436894e-05, "loss": 0.1279, "step": 183630 }, { "epoch": 71.32, "learning_rate": 1.0491132686084142e-05, "loss": 0.0004, "step": 183640 }, { "epoch": 71.32, "learning_rate": 1.0490614886731392e-05, "loss": 0.0672, "step": 183650 }, { "epoch": 71.32, "learning_rate": 1.0490097087378641e-05, "loss": 0.0752, "step": 183660 }, { "epoch": 71.33, "learning_rate": 1.0489579288025891e-05, "loss": 0.0622, "step": 183670 }, { "epoch": 71.33, "learning_rate": 1.048906148867314e-05, "loss": 0.0165, "step": 183680 }, { "epoch": 71.34, "learning_rate": 1.048854368932039e-05, "loss": 0.0099, "step": 183690 }, { "epoch": 71.34, "learning_rate": 1.048802588996764e-05, "loss": 0.1289, "step": 183700 }, { "epoch": 71.34, "learning_rate": 1.0487508090614888e-05, "loss": 0.1947, "step": 183710 }, { "epoch": 71.35, "learning_rate": 1.0486990291262136e-05, "loss": 0.0011, "step": 183720 }, { "epoch": 71.35, "learning_rate": 1.0486472491909385e-05, "loss": 0.0617, "step": 183730 }, { "epoch": 71.36, "learning_rate": 1.0485954692556635e-05, "loss": 0.1271, "step": 183740 }, { "epoch": 71.36, "learning_rate": 1.0485436893203885e-05, "loss": 0.0866, "step": 183750 }, { "epoch": 71.36, "learning_rate": 1.0484919093851134e-05, "loss": 0.0946, "step": 183760 }, { "epoch": 71.37, "learning_rate": 1.0484401294498384e-05, "loss": 0.0961, "step": 183770 }, { "epoch": 71.37, "learning_rate": 1.0483883495145632e-05, "loss": 0.1469, "step": 183780 }, { "epoch": 71.37, "learning_rate": 1.0483365695792881e-05, "loss": 0.05, "step": 183790 }, { "epoch": 71.38, "learning_rate": 1.048284789644013e-05, "loss": 0.0723, "step": 183800 }, { "epoch": 71.38, "learning_rate": 1.0482330097087379e-05, "loss": 0.0046, "step": 183810 }, { "epoch": 71.39, "learning_rate": 1.0481812297734628e-05, "loss": 0.1058, "step": 183820 }, { "epoch": 71.39, "learning_rate": 1.0481294498381878e-05, "loss": 0.069, "step": 183830 }, { "epoch": 71.39, "learning_rate": 1.0480776699029128e-05, "loss": 0.0112, "step": 183840 }, { "epoch": 71.4, "learning_rate": 1.0480258899676377e-05, "loss": 0.0226, "step": 183850 }, { "epoch": 71.4, "learning_rate": 1.0479741100323625e-05, "loss": 0.0015, "step": 183860 }, { "epoch": 71.41, "learning_rate": 1.0479223300970875e-05, "loss": 0.0029, "step": 183870 }, { "epoch": 71.41, "learning_rate": 1.0478705501618125e-05, "loss": 0.0968, "step": 183880 }, { "epoch": 71.41, "learning_rate": 1.0478187702265372e-05, "loss": 0.0174, "step": 183890 }, { "epoch": 71.42, "learning_rate": 1.0477669902912622e-05, "loss": 0.0864, "step": 183900 }, { "epoch": 71.42, "learning_rate": 1.0477152103559872e-05, "loss": 0.0618, "step": 183910 }, { "epoch": 71.43, "learning_rate": 1.0476634304207121e-05, "loss": 0.0897, "step": 183920 }, { "epoch": 71.43, "learning_rate": 1.047611650485437e-05, "loss": 0.0644, "step": 183930 }, { "epoch": 71.43, "learning_rate": 1.0475598705501619e-05, "loss": 0.0337, "step": 183940 }, { "epoch": 71.44, "learning_rate": 1.0475080906148868e-05, "loss": 0.028, "step": 183950 }, { "epoch": 71.44, "learning_rate": 1.0474563106796118e-05, "loss": 0.0003, "step": 183960 }, { "epoch": 71.44, "learning_rate": 1.0474045307443366e-05, "loss": 0.0232, "step": 183970 }, { "epoch": 71.45, "learning_rate": 1.0473527508090616e-05, "loss": 0.0549, "step": 183980 }, { "epoch": 71.45, "learning_rate": 1.0473009708737865e-05, "loss": 0.0692, "step": 183990 }, { "epoch": 71.46, "learning_rate": 1.0472491909385115e-05, "loss": 0.0019, "step": 184000 }, { "epoch": 71.46, "learning_rate": 1.0471974110032363e-05, "loss": 0.0645, "step": 184010 }, { "epoch": 71.46, "learning_rate": 1.0471456310679612e-05, "loss": 0.095, "step": 184020 }, { "epoch": 71.47, "learning_rate": 1.0470938511326862e-05, "loss": 0.082, "step": 184030 }, { "epoch": 71.47, "learning_rate": 1.0470420711974112e-05, "loss": 0.0492, "step": 184040 }, { "epoch": 71.48, "learning_rate": 1.046990291262136e-05, "loss": 0.0232, "step": 184050 }, { "epoch": 71.48, "learning_rate": 1.046938511326861e-05, "loss": 0.0615, "step": 184060 }, { "epoch": 71.48, "learning_rate": 1.0468867313915859e-05, "loss": 0.0003, "step": 184070 }, { "epoch": 71.49, "learning_rate": 1.0468349514563108e-05, "loss": 0.0754, "step": 184080 }, { "epoch": 71.49, "learning_rate": 1.0467831715210356e-05, "loss": 0.1469, "step": 184090 }, { "epoch": 71.5, "learning_rate": 1.0467313915857606e-05, "loss": 0.0004, "step": 184100 }, { "epoch": 71.5, "learning_rate": 1.0466796116504856e-05, "loss": 0.0313, "step": 184110 }, { "epoch": 71.5, "learning_rate": 1.0466278317152105e-05, "loss": 0.0152, "step": 184120 }, { "epoch": 71.51, "learning_rate": 1.0465760517799353e-05, "loss": 0.0714, "step": 184130 }, { "epoch": 71.51, "learning_rate": 1.0465242718446603e-05, "loss": 0.0439, "step": 184140 }, { "epoch": 71.51, "learning_rate": 1.0464724919093852e-05, "loss": 0.0658, "step": 184150 }, { "epoch": 71.52, "learning_rate": 1.04642071197411e-05, "loss": 0.0524, "step": 184160 }, { "epoch": 71.52, "learning_rate": 1.046368932038835e-05, "loss": 0.046, "step": 184170 }, { "epoch": 71.53, "learning_rate": 1.04631715210356e-05, "loss": 0.0274, "step": 184180 }, { "epoch": 71.53, "learning_rate": 1.0462653721682849e-05, "loss": 0.021, "step": 184190 }, { "epoch": 71.53, "learning_rate": 1.0462135922330099e-05, "loss": 0.1134, "step": 184200 }, { "epoch": 71.54, "learning_rate": 1.0461618122977347e-05, "loss": 0.0833, "step": 184210 }, { "epoch": 71.54, "learning_rate": 1.0461100323624596e-05, "loss": 0.0137, "step": 184220 }, { "epoch": 71.55, "learning_rate": 1.0460582524271846e-05, "loss": 0.1086, "step": 184230 }, { "epoch": 71.55, "learning_rate": 1.0460064724919094e-05, "loss": 0.0032, "step": 184240 }, { "epoch": 71.55, "learning_rate": 1.0459546925566343e-05, "loss": 0.0111, "step": 184250 }, { "epoch": 71.56, "learning_rate": 1.0459029126213593e-05, "loss": 0.0029, "step": 184260 }, { "epoch": 71.56, "learning_rate": 1.0458511326860843e-05, "loss": 0.0754, "step": 184270 }, { "epoch": 71.57, "learning_rate": 1.0457993527508092e-05, "loss": 0.0852, "step": 184280 }, { "epoch": 71.57, "learning_rate": 1.045747572815534e-05, "loss": 0.1493, "step": 184290 }, { "epoch": 71.57, "learning_rate": 1.045695792880259e-05, "loss": 0.1759, "step": 184300 }, { "epoch": 71.58, "learning_rate": 1.0456440129449838e-05, "loss": 0.0454, "step": 184310 }, { "epoch": 71.58, "learning_rate": 1.0455922330097087e-05, "loss": 0.0738, "step": 184320 }, { "epoch": 71.58, "learning_rate": 1.0455404530744337e-05, "loss": 0.1595, "step": 184330 }, { "epoch": 71.59, "learning_rate": 1.0454886731391587e-05, "loss": 0.018, "step": 184340 }, { "epoch": 71.59, "learning_rate": 1.0454368932038836e-05, "loss": 0.1258, "step": 184350 }, { "epoch": 71.6, "learning_rate": 1.0453851132686086e-05, "loss": 0.0036, "step": 184360 }, { "epoch": 71.6, "learning_rate": 1.0453333333333334e-05, "loss": 0.1143, "step": 184370 }, { "epoch": 71.6, "learning_rate": 1.0452815533980583e-05, "loss": 0.0166, "step": 184380 }, { "epoch": 71.61, "learning_rate": 1.0452297734627831e-05, "loss": 0.0703, "step": 184390 }, { "epoch": 71.61, "learning_rate": 1.0451779935275081e-05, "loss": 0.0428, "step": 184400 }, { "epoch": 71.62, "learning_rate": 1.045126213592233e-05, "loss": 0.0933, "step": 184410 }, { "epoch": 71.62, "learning_rate": 1.045074433656958e-05, "loss": 0.0172, "step": 184420 }, { "epoch": 71.62, "learning_rate": 1.045022653721683e-05, "loss": 0.0409, "step": 184430 }, { "epoch": 71.63, "learning_rate": 1.044970873786408e-05, "loss": 0.081, "step": 184440 }, { "epoch": 71.63, "learning_rate": 1.0449190938511329e-05, "loss": 0.1183, "step": 184450 }, { "epoch": 71.63, "learning_rate": 1.0448673139158575e-05, "loss": 0.0407, "step": 184460 }, { "epoch": 71.64, "learning_rate": 1.0448155339805825e-05, "loss": 0.0443, "step": 184470 }, { "epoch": 71.64, "learning_rate": 1.0447637540453074e-05, "loss": 0.1215, "step": 184480 }, { "epoch": 71.65, "learning_rate": 1.0447119741100324e-05, "loss": 0.0415, "step": 184490 }, { "epoch": 71.65, "learning_rate": 1.0446601941747574e-05, "loss": 0.0436, "step": 184500 }, { "epoch": 71.65, "learning_rate": 1.0446084142394823e-05, "loss": 0.0307, "step": 184510 }, { "epoch": 71.66, "learning_rate": 1.0445566343042073e-05, "loss": 0.1242, "step": 184520 }, { "epoch": 71.66, "learning_rate": 1.0445048543689323e-05, "loss": 0.0444, "step": 184530 }, { "epoch": 71.67, "learning_rate": 1.0444530744336569e-05, "loss": 0.004, "step": 184540 }, { "epoch": 71.67, "learning_rate": 1.0444012944983818e-05, "loss": 0.1965, "step": 184550 }, { "epoch": 71.67, "learning_rate": 1.0443495145631068e-05, "loss": 0.0458, "step": 184560 }, { "epoch": 71.68, "learning_rate": 1.0442977346278318e-05, "loss": 0.0444, "step": 184570 }, { "epoch": 71.68, "learning_rate": 1.0442459546925567e-05, "loss": 0.0422, "step": 184580 }, { "epoch": 71.69, "learning_rate": 1.0441941747572817e-05, "loss": 0.1339, "step": 184590 }, { "epoch": 71.69, "learning_rate": 1.0441423948220067e-05, "loss": 0.0275, "step": 184600 }, { "epoch": 71.69, "learning_rate": 1.0440906148867316e-05, "loss": 0.0221, "step": 184610 }, { "epoch": 71.7, "learning_rate": 1.0440388349514562e-05, "loss": 0.006, "step": 184620 }, { "epoch": 71.7, "learning_rate": 1.0439870550161812e-05, "loss": 0.0135, "step": 184630 }, { "epoch": 71.7, "learning_rate": 1.0439352750809062e-05, "loss": 0.0111, "step": 184640 }, { "epoch": 71.71, "learning_rate": 1.0438834951456311e-05, "loss": 0.0343, "step": 184650 }, { "epoch": 71.71, "learning_rate": 1.043831715210356e-05, "loss": 0.0217, "step": 184660 }, { "epoch": 71.72, "learning_rate": 1.043779935275081e-05, "loss": 0.0053, "step": 184670 }, { "epoch": 71.72, "learning_rate": 1.043728155339806e-05, "loss": 0.0433, "step": 184680 }, { "epoch": 71.72, "learning_rate": 1.043676375404531e-05, "loss": 0.0014, "step": 184690 }, { "epoch": 71.73, "learning_rate": 1.0436245954692556e-05, "loss": 0.0664, "step": 184700 }, { "epoch": 71.73, "learning_rate": 1.0435728155339806e-05, "loss": 0.0137, "step": 184710 }, { "epoch": 71.74, "learning_rate": 1.0435210355987055e-05, "loss": 0.0012, "step": 184720 }, { "epoch": 71.74, "learning_rate": 1.0434692556634305e-05, "loss": 0.1119, "step": 184730 }, { "epoch": 71.74, "learning_rate": 1.0434174757281554e-05, "loss": 0.0428, "step": 184740 }, { "epoch": 71.75, "learning_rate": 1.0433656957928804e-05, "loss": 0.0241, "step": 184750 }, { "epoch": 71.75, "learning_rate": 1.0433139158576054e-05, "loss": 0.067, "step": 184760 }, { "epoch": 71.76, "learning_rate": 1.0432621359223303e-05, "loss": 0.1071, "step": 184770 }, { "epoch": 71.76, "learning_rate": 1.043210355987055e-05, "loss": 0.0458, "step": 184780 }, { "epoch": 71.76, "learning_rate": 1.0431585760517799e-05, "loss": 0.0056, "step": 184790 }, { "epoch": 71.77, "learning_rate": 1.0431067961165049e-05, "loss": 0.0578, "step": 184800 }, { "epoch": 71.77, "learning_rate": 1.0430550161812298e-05, "loss": 0.0326, "step": 184810 }, { "epoch": 71.77, "learning_rate": 1.0430032362459548e-05, "loss": 0.0734, "step": 184820 }, { "epoch": 71.78, "learning_rate": 1.0429514563106798e-05, "loss": 0.0234, "step": 184830 }, { "epoch": 71.78, "learning_rate": 1.0428996763754047e-05, "loss": 0.0108, "step": 184840 }, { "epoch": 71.79, "learning_rate": 1.0428478964401297e-05, "loss": 0.0396, "step": 184850 }, { "epoch": 71.79, "learning_rate": 1.0427961165048543e-05, "loss": 0.0657, "step": 184860 }, { "epoch": 71.79, "learning_rate": 1.0427443365695793e-05, "loss": 0.0724, "step": 184870 }, { "epoch": 71.8, "learning_rate": 1.0426925566343042e-05, "loss": 0.1535, "step": 184880 }, { "epoch": 71.8, "learning_rate": 1.0426407766990292e-05, "loss": 0.1264, "step": 184890 }, { "epoch": 71.81, "learning_rate": 1.0425889967637541e-05, "loss": 0.0574, "step": 184900 }, { "epoch": 71.81, "learning_rate": 1.0425372168284791e-05, "loss": 0.049, "step": 184910 }, { "epoch": 71.81, "learning_rate": 1.042485436893204e-05, "loss": 0.0585, "step": 184920 }, { "epoch": 71.82, "learning_rate": 1.042433656957929e-05, "loss": 0.1314, "step": 184930 }, { "epoch": 71.82, "learning_rate": 1.0423818770226537e-05, "loss": 0.2103, "step": 184940 }, { "epoch": 71.83, "learning_rate": 1.0423300970873786e-05, "loss": 0.1295, "step": 184950 }, { "epoch": 71.83, "learning_rate": 1.0422783171521036e-05, "loss": 0.0577, "step": 184960 }, { "epoch": 71.83, "learning_rate": 1.0422265372168285e-05, "loss": 0.0942, "step": 184970 }, { "epoch": 71.84, "learning_rate": 1.0421747572815535e-05, "loss": 0.1349, "step": 184980 }, { "epoch": 71.84, "learning_rate": 1.0421229773462785e-05, "loss": 0.2063, "step": 184990 }, { "epoch": 71.84, "learning_rate": 1.0420711974110034e-05, "loss": 0.0368, "step": 185000 }, { "epoch": 71.85, "learning_rate": 1.0420194174757284e-05, "loss": 0.0146, "step": 185010 }, { "epoch": 71.85, "learning_rate": 1.0419676375404534e-05, "loss": 0.0134, "step": 185020 }, { "epoch": 71.86, "learning_rate": 1.041915857605178e-05, "loss": 0.046, "step": 185030 }, { "epoch": 71.86, "learning_rate": 1.041864077669903e-05, "loss": 0.0403, "step": 185040 }, { "epoch": 71.86, "learning_rate": 1.0418122977346279e-05, "loss": 0.1825, "step": 185050 }, { "epoch": 71.87, "learning_rate": 1.0417605177993529e-05, "loss": 0.1602, "step": 185060 }, { "epoch": 71.87, "learning_rate": 1.0417087378640778e-05, "loss": 0.107, "step": 185070 }, { "epoch": 71.88, "learning_rate": 1.0416569579288028e-05, "loss": 0.0002, "step": 185080 }, { "epoch": 71.88, "learning_rate": 1.0416051779935277e-05, "loss": 0.3094, "step": 185090 }, { "epoch": 71.88, "learning_rate": 1.0415533980582525e-05, "loss": 0.0132, "step": 185100 }, { "epoch": 71.89, "learning_rate": 1.0415016181229773e-05, "loss": 0.0222, "step": 185110 }, { "epoch": 71.89, "learning_rate": 1.0414498381877023e-05, "loss": 0.0764, "step": 185120 }, { "epoch": 71.9, "learning_rate": 1.0413980582524273e-05, "loss": 0.0111, "step": 185130 }, { "epoch": 71.9, "learning_rate": 1.0413462783171522e-05, "loss": 0.0795, "step": 185140 }, { "epoch": 71.9, "learning_rate": 1.0412944983818772e-05, "loss": 0.011, "step": 185150 }, { "epoch": 71.91, "learning_rate": 1.0412427184466021e-05, "loss": 0.0429, "step": 185160 }, { "epoch": 71.91, "learning_rate": 1.0411909385113271e-05, "loss": 0.0396, "step": 185170 }, { "epoch": 71.91, "learning_rate": 1.0411391585760519e-05, "loss": 0.1042, "step": 185180 }, { "epoch": 71.92, "learning_rate": 1.0410873786407767e-05, "loss": 0.1204, "step": 185190 }, { "epoch": 71.92, "learning_rate": 1.0410355987055016e-05, "loss": 0.0679, "step": 185200 }, { "epoch": 71.93, "learning_rate": 1.0409838187702266e-05, "loss": 0.0514, "step": 185210 }, { "epoch": 71.93, "learning_rate": 1.0409320388349516e-05, "loss": 0.0622, "step": 185220 }, { "epoch": 71.93, "learning_rate": 1.0408802588996765e-05, "loss": 0.047, "step": 185230 }, { "epoch": 71.94, "learning_rate": 1.0408284789644015e-05, "loss": 0.0006, "step": 185240 }, { "epoch": 71.94, "learning_rate": 1.0407766990291263e-05, "loss": 0.004, "step": 185250 }, { "epoch": 71.95, "learning_rate": 1.0407249190938513e-05, "loss": 0.3037, "step": 185260 }, { "epoch": 71.95, "learning_rate": 1.040673139158576e-05, "loss": 0.1149, "step": 185270 }, { "epoch": 71.95, "learning_rate": 1.040621359223301e-05, "loss": 0.064, "step": 185280 }, { "epoch": 71.96, "learning_rate": 1.040569579288026e-05, "loss": 0.0004, "step": 185290 }, { "epoch": 71.96, "learning_rate": 1.040517799352751e-05, "loss": 0.0348, "step": 185300 }, { "epoch": 71.97, "learning_rate": 1.0404660194174759e-05, "loss": 0.063, "step": 185310 }, { "epoch": 71.97, "learning_rate": 1.0404142394822009e-05, "loss": 0.0506, "step": 185320 }, { "epoch": 71.97, "learning_rate": 1.0403624595469256e-05, "loss": 0.0897, "step": 185330 }, { "epoch": 71.98, "learning_rate": 1.0403106796116506e-05, "loss": 0.1156, "step": 185340 }, { "epoch": 71.98, "learning_rate": 1.0402588996763754e-05, "loss": 0.0018, "step": 185350 }, { "epoch": 71.98, "learning_rate": 1.0402071197411004e-05, "loss": 0.1471, "step": 185360 }, { "epoch": 71.99, "learning_rate": 1.0401553398058253e-05, "loss": 0.0025, "step": 185370 }, { "epoch": 71.99, "learning_rate": 1.0401035598705503e-05, "loss": 0.1567, "step": 185380 }, { "epoch": 72.0, "learning_rate": 1.0400517799352752e-05, "loss": 0.0831, "step": 185390 }, { "epoch": 72.0, "learning_rate": 1.04e-05, "loss": 0.0264, "step": 185400 }, { "epoch": 72.0, "eval_accuracy": 0.9477303988995873, "eval_loss": 0.35998329520225525, "eval_runtime": 8.2027, "eval_samples_per_second": 443.148, "eval_steps_per_second": 55.47, "step": 185400 }, { "epoch": 72.0, "learning_rate": 1.039948220064725e-05, "loss": 0.012, "step": 185410 }, { "epoch": 72.01, "learning_rate": 1.03989644012945e-05, "loss": 0.0154, "step": 185420 }, { "epoch": 72.01, "learning_rate": 1.0398446601941748e-05, "loss": 0.0256, "step": 185430 }, { "epoch": 72.02, "learning_rate": 1.0397928802588997e-05, "loss": 0.0205, "step": 185440 }, { "epoch": 72.02, "learning_rate": 1.0397411003236247e-05, "loss": 0.0003, "step": 185450 }, { "epoch": 72.02, "learning_rate": 1.0396893203883496e-05, "loss": 0.059, "step": 185460 }, { "epoch": 72.03, "learning_rate": 1.0396375404530746e-05, "loss": 0.2431, "step": 185470 }, { "epoch": 72.03, "learning_rate": 1.0395857605177994e-05, "loss": 0.1409, "step": 185480 }, { "epoch": 72.03, "learning_rate": 1.0395339805825244e-05, "loss": 0.053, "step": 185490 }, { "epoch": 72.04, "learning_rate": 1.0394822006472493e-05, "loss": 0.0015, "step": 185500 }, { "epoch": 72.04, "learning_rate": 1.0394304207119741e-05, "loss": 0.1503, "step": 185510 }, { "epoch": 72.05, "learning_rate": 1.039378640776699e-05, "loss": 0.1318, "step": 185520 }, { "epoch": 72.05, "learning_rate": 1.039326860841424e-05, "loss": 0.0409, "step": 185530 }, { "epoch": 72.05, "learning_rate": 1.039275080906149e-05, "loss": 0.1381, "step": 185540 }, { "epoch": 72.06, "learning_rate": 1.0392233009708738e-05, "loss": 0.0198, "step": 185550 }, { "epoch": 72.06, "learning_rate": 1.0391715210355987e-05, "loss": 0.1336, "step": 185560 }, { "epoch": 72.07, "learning_rate": 1.0391197411003237e-05, "loss": 0.0395, "step": 185570 }, { "epoch": 72.07, "learning_rate": 1.0390679611650487e-05, "loss": 0.0347, "step": 185580 }, { "epoch": 72.07, "learning_rate": 1.0390161812297736e-05, "loss": 0.1868, "step": 185590 }, { "epoch": 72.08, "learning_rate": 1.0389644012944984e-05, "loss": 0.1334, "step": 185600 }, { "epoch": 72.08, "learning_rate": 1.0389126213592234e-05, "loss": 0.0394, "step": 185610 }, { "epoch": 72.09, "learning_rate": 1.0388608414239484e-05, "loss": 0.001, "step": 185620 }, { "epoch": 72.09, "learning_rate": 1.0388090614886731e-05, "loss": 0.0937, "step": 185630 }, { "epoch": 72.09, "learning_rate": 1.0387572815533981e-05, "loss": 0.0803, "step": 185640 }, { "epoch": 72.1, "learning_rate": 1.038705501618123e-05, "loss": 0.0173, "step": 185650 }, { "epoch": 72.1, "learning_rate": 1.038653721682848e-05, "loss": 0.0562, "step": 185660 }, { "epoch": 72.1, "learning_rate": 1.038601941747573e-05, "loss": 0.0802, "step": 185670 }, { "epoch": 72.11, "learning_rate": 1.0385501618122978e-05, "loss": 0.0299, "step": 185680 }, { "epoch": 72.11, "learning_rate": 1.0384983818770227e-05, "loss": 0.0973, "step": 185690 }, { "epoch": 72.12, "learning_rate": 1.0384466019417477e-05, "loss": 0.0617, "step": 185700 }, { "epoch": 72.12, "learning_rate": 1.0383948220064725e-05, "loss": 0.0018, "step": 185710 }, { "epoch": 72.12, "learning_rate": 1.0383430420711975e-05, "loss": 0.0608, "step": 185720 }, { "epoch": 72.13, "learning_rate": 1.0382912621359224e-05, "loss": 0.1126, "step": 185730 }, { "epoch": 72.13, "learning_rate": 1.0382394822006474e-05, "loss": 0.0452, "step": 185740 }, { "epoch": 72.14, "learning_rate": 1.0381877022653723e-05, "loss": 0.0945, "step": 185750 }, { "epoch": 72.14, "learning_rate": 1.0381359223300971e-05, "loss": 0.0438, "step": 185760 }, { "epoch": 72.14, "learning_rate": 1.0380841423948221e-05, "loss": 0.0061, "step": 185770 }, { "epoch": 72.15, "learning_rate": 1.0380323624595469e-05, "loss": 0.0968, "step": 185780 }, { "epoch": 72.15, "learning_rate": 1.0379805825242719e-05, "loss": 0.0004, "step": 185790 }, { "epoch": 72.16, "learning_rate": 1.0379288025889968e-05, "loss": 0.0219, "step": 185800 }, { "epoch": 72.16, "learning_rate": 1.0378770226537218e-05, "loss": 0.0759, "step": 185810 }, { "epoch": 72.16, "learning_rate": 1.0378252427184467e-05, "loss": 0.062, "step": 185820 }, { "epoch": 72.17, "learning_rate": 1.0377734627831717e-05, "loss": 0.0478, "step": 185830 }, { "epoch": 72.17, "learning_rate": 1.0377216828478965e-05, "loss": 0.0992, "step": 185840 }, { "epoch": 72.17, "learning_rate": 1.0376699029126215e-05, "loss": 0.0819, "step": 185850 }, { "epoch": 72.18, "learning_rate": 1.0376181229773462e-05, "loss": 0.0184, "step": 185860 }, { "epoch": 72.18, "learning_rate": 1.0375663430420712e-05, "loss": 0.0745, "step": 185870 }, { "epoch": 72.19, "learning_rate": 1.0375145631067962e-05, "loss": 0.0498, "step": 185880 }, { "epoch": 72.19, "learning_rate": 1.0374627831715211e-05, "loss": 0.0868, "step": 185890 }, { "epoch": 72.19, "learning_rate": 1.0374110032362461e-05, "loss": 0.0203, "step": 185900 }, { "epoch": 72.2, "learning_rate": 1.037359223300971e-05, "loss": 0.0886, "step": 185910 }, { "epoch": 72.2, "learning_rate": 1.0373074433656958e-05, "loss": 0.019, "step": 185920 }, { "epoch": 72.21, "learning_rate": 1.0372556634304206e-05, "loss": 0.0766, "step": 185930 }, { "epoch": 72.21, "learning_rate": 1.0372038834951456e-05, "loss": 0.0652, "step": 185940 }, { "epoch": 72.21, "learning_rate": 1.0371521035598706e-05, "loss": 0.0824, "step": 185950 }, { "epoch": 72.22, "learning_rate": 1.0371003236245955e-05, "loss": 0.002, "step": 185960 }, { "epoch": 72.22, "learning_rate": 1.0370485436893205e-05, "loss": 0.0251, "step": 185970 }, { "epoch": 72.23, "learning_rate": 1.0369967637540455e-05, "loss": 0.0765, "step": 185980 }, { "epoch": 72.23, "learning_rate": 1.0369449838187704e-05, "loss": 0.021, "step": 185990 }, { "epoch": 72.23, "learning_rate": 1.0368932038834952e-05, "loss": 0.0004, "step": 186000 }, { "epoch": 72.24, "learning_rate": 1.03684142394822e-05, "loss": 0.0152, "step": 186010 }, { "epoch": 72.24, "learning_rate": 1.036789644012945e-05, "loss": 0.0196, "step": 186020 }, { "epoch": 72.24, "learning_rate": 1.03673786407767e-05, "loss": 0.1437, "step": 186030 }, { "epoch": 72.25, "learning_rate": 1.0366860841423949e-05, "loss": 0.1571, "step": 186040 }, { "epoch": 72.25, "learning_rate": 1.0366343042071198e-05, "loss": 0.0752, "step": 186050 }, { "epoch": 72.26, "learning_rate": 1.0365825242718448e-05, "loss": 0.174, "step": 186060 }, { "epoch": 72.26, "learning_rate": 1.0365307443365698e-05, "loss": 0.1298, "step": 186070 }, { "epoch": 72.26, "learning_rate": 1.0364789644012944e-05, "loss": 0.0466, "step": 186080 }, { "epoch": 72.27, "learning_rate": 1.0364271844660194e-05, "loss": 0.0999, "step": 186090 }, { "epoch": 72.27, "learning_rate": 1.0363754045307443e-05, "loss": 0.081, "step": 186100 }, { "epoch": 72.28, "learning_rate": 1.0363236245954693e-05, "loss": 0.0827, "step": 186110 }, { "epoch": 72.28, "learning_rate": 1.0362718446601942e-05, "loss": 0.0459, "step": 186120 }, { "epoch": 72.28, "learning_rate": 1.0362200647249192e-05, "loss": 0.1077, "step": 186130 }, { "epoch": 72.29, "learning_rate": 1.0361682847896442e-05, "loss": 0.1913, "step": 186140 }, { "epoch": 72.29, "learning_rate": 1.0361165048543691e-05, "loss": 0.1541, "step": 186150 }, { "epoch": 72.3, "learning_rate": 1.0360647249190941e-05, "loss": 0.0006, "step": 186160 }, { "epoch": 72.3, "learning_rate": 1.0360129449838187e-05, "loss": 0.1568, "step": 186170 }, { "epoch": 72.3, "learning_rate": 1.0359611650485437e-05, "loss": 0.0118, "step": 186180 }, { "epoch": 72.31, "learning_rate": 1.0359093851132686e-05, "loss": 0.0012, "step": 186190 }, { "epoch": 72.31, "learning_rate": 1.0358576051779936e-05, "loss": 0.1331, "step": 186200 }, { "epoch": 72.31, "learning_rate": 1.0358058252427186e-05, "loss": 0.0121, "step": 186210 }, { "epoch": 72.32, "learning_rate": 1.0357540453074435e-05, "loss": 0.0435, "step": 186220 }, { "epoch": 72.32, "learning_rate": 1.0357022653721685e-05, "loss": 0.0252, "step": 186230 }, { "epoch": 72.33, "learning_rate": 1.0356504854368934e-05, "loss": 0.025, "step": 186240 }, { "epoch": 72.33, "learning_rate": 1.035598705501618e-05, "loss": 0.0953, "step": 186250 }, { "epoch": 72.33, "learning_rate": 1.035546925566343e-05, "loss": 0.0324, "step": 186260 }, { "epoch": 72.34, "learning_rate": 1.035495145631068e-05, "loss": 0.0173, "step": 186270 }, { "epoch": 72.34, "learning_rate": 1.035443365695793e-05, "loss": 0.0352, "step": 186280 }, { "epoch": 72.35, "learning_rate": 1.0353915857605179e-05, "loss": 0.0012, "step": 186290 }, { "epoch": 72.35, "learning_rate": 1.0353398058252429e-05, "loss": 0.1352, "step": 186300 }, { "epoch": 72.35, "learning_rate": 1.0352880258899678e-05, "loss": 0.0297, "step": 186310 }, { "epoch": 72.36, "learning_rate": 1.0352362459546928e-05, "loss": 0.0697, "step": 186320 }, { "epoch": 72.36, "learning_rate": 1.0351844660194174e-05, "loss": 0.0503, "step": 186330 }, { "epoch": 72.37, "learning_rate": 1.0351326860841424e-05, "loss": 0.0168, "step": 186340 }, { "epoch": 72.37, "learning_rate": 1.0350809061488673e-05, "loss": 0.0059, "step": 186350 }, { "epoch": 72.37, "learning_rate": 1.0350291262135923e-05, "loss": 0.0308, "step": 186360 }, { "epoch": 72.38, "learning_rate": 1.0349773462783173e-05, "loss": 0.0633, "step": 186370 }, { "epoch": 72.38, "learning_rate": 1.0349255663430422e-05, "loss": 0.1008, "step": 186380 }, { "epoch": 72.38, "learning_rate": 1.0348737864077672e-05, "loss": 0.143, "step": 186390 }, { "epoch": 72.39, "learning_rate": 1.0348220064724922e-05, "loss": 0.0224, "step": 186400 }, { "epoch": 72.39, "learning_rate": 1.0347702265372168e-05, "loss": 0.0025, "step": 186410 }, { "epoch": 72.4, "learning_rate": 1.0347184466019417e-05, "loss": 0.0117, "step": 186420 }, { "epoch": 72.4, "learning_rate": 1.0346666666666667e-05, "loss": 0.0591, "step": 186430 }, { "epoch": 72.4, "learning_rate": 1.0346148867313917e-05, "loss": 0.0856, "step": 186440 }, { "epoch": 72.41, "learning_rate": 1.0345631067961166e-05, "loss": 0.0878, "step": 186450 }, { "epoch": 72.41, "learning_rate": 1.0345113268608416e-05, "loss": 0.0257, "step": 186460 }, { "epoch": 72.42, "learning_rate": 1.0344595469255665e-05, "loss": 0.0579, "step": 186470 }, { "epoch": 72.42, "learning_rate": 1.0344077669902915e-05, "loss": 0.0475, "step": 186480 }, { "epoch": 72.42, "learning_rate": 1.0343559870550161e-05, "loss": 0.0135, "step": 186490 }, { "epoch": 72.43, "learning_rate": 1.0343042071197411e-05, "loss": 0.1235, "step": 186500 }, { "epoch": 72.43, "learning_rate": 1.034252427184466e-05, "loss": 0.0621, "step": 186510 }, { "epoch": 72.43, "learning_rate": 1.034200647249191e-05, "loss": 0.0661, "step": 186520 }, { "epoch": 72.44, "learning_rate": 1.034148867313916e-05, "loss": 0.0105, "step": 186530 }, { "epoch": 72.44, "learning_rate": 1.034097087378641e-05, "loss": 0.0774, "step": 186540 }, { "epoch": 72.45, "learning_rate": 1.0340453074433659e-05, "loss": 0.1572, "step": 186550 }, { "epoch": 72.45, "learning_rate": 1.0339935275080909e-05, "loss": 0.0076, "step": 186560 }, { "epoch": 72.45, "learning_rate": 1.0339417475728155e-05, "loss": 0.0705, "step": 186570 }, { "epoch": 72.46, "learning_rate": 1.0338899676375404e-05, "loss": 0.1513, "step": 186580 }, { "epoch": 72.46, "learning_rate": 1.0338381877022654e-05, "loss": 0.0628, "step": 186590 }, { "epoch": 72.47, "learning_rate": 1.0337864077669904e-05, "loss": 0.0977, "step": 186600 }, { "epoch": 72.47, "learning_rate": 1.0337346278317153e-05, "loss": 0.0234, "step": 186610 }, { "epoch": 72.47, "learning_rate": 1.0336828478964403e-05, "loss": 0.1736, "step": 186620 }, { "epoch": 72.48, "learning_rate": 1.0336310679611653e-05, "loss": 0.0636, "step": 186630 }, { "epoch": 72.48, "learning_rate": 1.0335792880258902e-05, "loss": 0.09, "step": 186640 }, { "epoch": 72.49, "learning_rate": 1.0335275080906148e-05, "loss": 0.0268, "step": 186650 }, { "epoch": 72.49, "learning_rate": 1.0334757281553398e-05, "loss": 0.1216, "step": 186660 }, { "epoch": 72.49, "learning_rate": 1.0334239482200648e-05, "loss": 0.1387, "step": 186670 }, { "epoch": 72.5, "learning_rate": 1.0333721682847897e-05, "loss": 0.0194, "step": 186680 }, { "epoch": 72.5, "learning_rate": 1.0333203883495147e-05, "loss": 0.0673, "step": 186690 }, { "epoch": 72.5, "learning_rate": 1.0332686084142397e-05, "loss": 0.0588, "step": 186700 }, { "epoch": 72.51, "learning_rate": 1.0332168284789646e-05, "loss": 0.0056, "step": 186710 }, { "epoch": 72.51, "learning_rate": 1.0331650485436894e-05, "loss": 0.0968, "step": 186720 }, { "epoch": 72.52, "learning_rate": 1.0331132686084144e-05, "loss": 0.2472, "step": 186730 }, { "epoch": 72.52, "learning_rate": 1.0330614886731392e-05, "loss": 0.0999, "step": 186740 }, { "epoch": 72.52, "learning_rate": 1.0330097087378641e-05, "loss": 0.063, "step": 186750 }, { "epoch": 72.53, "learning_rate": 1.032957928802589e-05, "loss": 0.1234, "step": 186760 }, { "epoch": 72.53, "learning_rate": 1.032906148867314e-05, "loss": 0.0214, "step": 186770 }, { "epoch": 72.54, "learning_rate": 1.032854368932039e-05, "loss": 0.0634, "step": 186780 }, { "epoch": 72.54, "learning_rate": 1.032802588996764e-05, "loss": 0.0329, "step": 186790 }, { "epoch": 72.54, "learning_rate": 1.0327508090614888e-05, "loss": 0.0115, "step": 186800 }, { "epoch": 72.55, "learning_rate": 1.0326990291262137e-05, "loss": 0.0087, "step": 186810 }, { "epoch": 72.55, "learning_rate": 1.0326472491909385e-05, "loss": 0.0176, "step": 186820 }, { "epoch": 72.56, "learning_rate": 1.0325954692556635e-05, "loss": 0.0006, "step": 186830 }, { "epoch": 72.56, "learning_rate": 1.0325436893203884e-05, "loss": 0.0286, "step": 186840 }, { "epoch": 72.56, "learning_rate": 1.0324919093851134e-05, "loss": 0.028, "step": 186850 }, { "epoch": 72.57, "learning_rate": 1.0324401294498384e-05, "loss": 0.0523, "step": 186860 }, { "epoch": 72.57, "learning_rate": 1.0323883495145632e-05, "loss": 0.0101, "step": 186870 }, { "epoch": 72.57, "learning_rate": 1.0323365695792881e-05, "loss": 0.0007, "step": 186880 }, { "epoch": 72.58, "learning_rate": 1.032284789644013e-05, "loss": 0.0651, "step": 186890 }, { "epoch": 72.58, "learning_rate": 1.0322330097087379e-05, "loss": 0.1259, "step": 186900 }, { "epoch": 72.59, "learning_rate": 1.0321812297734628e-05, "loss": 0.1662, "step": 186910 }, { "epoch": 72.59, "learning_rate": 1.0321294498381878e-05, "loss": 0.1143, "step": 186920 }, { "epoch": 72.59, "learning_rate": 1.0320776699029128e-05, "loss": 0.1189, "step": 186930 }, { "epoch": 72.6, "learning_rate": 1.0320258899676377e-05, "loss": 0.1191, "step": 186940 }, { "epoch": 72.6, "learning_rate": 1.0319741100323625e-05, "loss": 0.0184, "step": 186950 }, { "epoch": 72.61, "learning_rate": 1.0319223300970875e-05, "loss": 0.0741, "step": 186960 }, { "epoch": 72.61, "learning_rate": 1.0318705501618124e-05, "loss": 0.0183, "step": 186970 }, { "epoch": 72.61, "learning_rate": 1.0318187702265372e-05, "loss": 0.0281, "step": 186980 }, { "epoch": 72.62, "learning_rate": 1.0317669902912622e-05, "loss": 0.1602, "step": 186990 }, { "epoch": 72.62, "learning_rate": 1.0317152103559872e-05, "loss": 0.0272, "step": 187000 }, { "epoch": 72.63, "learning_rate": 1.0316634304207121e-05, "loss": 0.0108, "step": 187010 }, { "epoch": 72.63, "learning_rate": 1.0316116504854369e-05, "loss": 0.0691, "step": 187020 }, { "epoch": 72.63, "learning_rate": 1.0315598705501619e-05, "loss": 0.0024, "step": 187030 }, { "epoch": 72.64, "learning_rate": 1.0315080906148868e-05, "loss": 0.06, "step": 187040 }, { "epoch": 72.64, "learning_rate": 1.0314563106796118e-05, "loss": 0.319, "step": 187050 }, { "epoch": 72.64, "learning_rate": 1.0314045307443366e-05, "loss": 0.1279, "step": 187060 }, { "epoch": 72.65, "learning_rate": 1.0313527508090615e-05, "loss": 0.0005, "step": 187070 }, { "epoch": 72.65, "learning_rate": 1.0313009708737865e-05, "loss": 0.1704, "step": 187080 }, { "epoch": 72.66, "learning_rate": 1.0312491909385115e-05, "loss": 0.0003, "step": 187090 }, { "epoch": 72.66, "learning_rate": 1.0311974110032363e-05, "loss": 0.0313, "step": 187100 }, { "epoch": 72.66, "learning_rate": 1.0311456310679612e-05, "loss": 0.0893, "step": 187110 }, { "epoch": 72.67, "learning_rate": 1.0310938511326862e-05, "loss": 0.0642, "step": 187120 }, { "epoch": 72.67, "learning_rate": 1.0310420711974111e-05, "loss": 0.0561, "step": 187130 }, { "epoch": 72.68, "learning_rate": 1.030990291262136e-05, "loss": 0.1443, "step": 187140 }, { "epoch": 72.68, "learning_rate": 1.0309385113268609e-05, "loss": 0.0937, "step": 187150 }, { "epoch": 72.68, "learning_rate": 1.0308867313915859e-05, "loss": 0.0704, "step": 187160 }, { "epoch": 72.69, "learning_rate": 1.0308349514563108e-05, "loss": 0.035, "step": 187170 }, { "epoch": 72.69, "learning_rate": 1.0307831715210356e-05, "loss": 0.002, "step": 187180 }, { "epoch": 72.7, "learning_rate": 1.0307313915857606e-05, "loss": 0.0466, "step": 187190 }, { "epoch": 72.7, "learning_rate": 1.0306796116504855e-05, "loss": 0.0887, "step": 187200 }, { "epoch": 72.7, "learning_rate": 1.0306278317152105e-05, "loss": 0.0885, "step": 187210 }, { "epoch": 72.71, "learning_rate": 1.0305760517799353e-05, "loss": 0.0317, "step": 187220 }, { "epoch": 72.71, "learning_rate": 1.0305242718446603e-05, "loss": 0.0175, "step": 187230 }, { "epoch": 72.71, "learning_rate": 1.0304724919093852e-05, "loss": 0.0332, "step": 187240 }, { "epoch": 72.72, "learning_rate": 1.03042071197411e-05, "loss": 0.0724, "step": 187250 }, { "epoch": 72.72, "learning_rate": 1.030368932038835e-05, "loss": 0.0011, "step": 187260 }, { "epoch": 72.73, "learning_rate": 1.03031715210356e-05, "loss": 0.0195, "step": 187270 }, { "epoch": 72.73, "learning_rate": 1.0302653721682849e-05, "loss": 0.0287, "step": 187280 }, { "epoch": 72.73, "learning_rate": 1.0302135922330099e-05, "loss": 0.0093, "step": 187290 }, { "epoch": 72.74, "learning_rate": 1.0301618122977348e-05, "loss": 0.0002, "step": 187300 }, { "epoch": 72.74, "learning_rate": 1.0301100323624596e-05, "loss": 0.0002, "step": 187310 }, { "epoch": 72.75, "learning_rate": 1.0300582524271846e-05, "loss": 0.017, "step": 187320 }, { "epoch": 72.75, "learning_rate": 1.0300064724919094e-05, "loss": 0.0954, "step": 187330 }, { "epoch": 72.75, "learning_rate": 1.0299546925566343e-05, "loss": 0.0691, "step": 187340 }, { "epoch": 72.76, "learning_rate": 1.0299029126213593e-05, "loss": 0.1751, "step": 187350 }, { "epoch": 72.76, "learning_rate": 1.0298511326860843e-05, "loss": 0.0569, "step": 187360 }, { "epoch": 72.77, "learning_rate": 1.0297993527508092e-05, "loss": 0.1162, "step": 187370 }, { "epoch": 72.77, "learning_rate": 1.0297475728155342e-05, "loss": 0.037, "step": 187380 }, { "epoch": 72.77, "learning_rate": 1.029695792880259e-05, "loss": 0.0659, "step": 187390 }, { "epoch": 72.78, "learning_rate": 1.0296440129449838e-05, "loss": 0.0379, "step": 187400 }, { "epoch": 72.78, "learning_rate": 1.0295922330097087e-05, "loss": 0.1346, "step": 187410 }, { "epoch": 72.78, "learning_rate": 1.0295404530744337e-05, "loss": 0.1368, "step": 187420 }, { "epoch": 72.79, "learning_rate": 1.0294886731391586e-05, "loss": 0.0507, "step": 187430 }, { "epoch": 72.79, "learning_rate": 1.0294368932038836e-05, "loss": 0.0462, "step": 187440 }, { "epoch": 72.8, "learning_rate": 1.0293851132686086e-05, "loss": 0.0005, "step": 187450 }, { "epoch": 72.8, "learning_rate": 1.0293333333333335e-05, "loss": 0.1229, "step": 187460 }, { "epoch": 72.8, "learning_rate": 1.0292815533980583e-05, "loss": 0.0613, "step": 187470 }, { "epoch": 72.81, "learning_rate": 1.0292297734627831e-05, "loss": 0.0976, "step": 187480 }, { "epoch": 72.81, "learning_rate": 1.029177993527508e-05, "loss": 0.0051, "step": 187490 }, { "epoch": 72.82, "learning_rate": 1.029126213592233e-05, "loss": 0.1282, "step": 187500 }, { "epoch": 72.82, "learning_rate": 1.029074433656958e-05, "loss": 0.0027, "step": 187510 }, { "epoch": 72.82, "learning_rate": 1.029022653721683e-05, "loss": 0.0386, "step": 187520 }, { "epoch": 72.83, "learning_rate": 1.028970873786408e-05, "loss": 0.0001, "step": 187530 }, { "epoch": 72.83, "learning_rate": 1.0289190938511329e-05, "loss": 0.021, "step": 187540 }, { "epoch": 72.83, "learning_rate": 1.0288673139158575e-05, "loss": 0.0176, "step": 187550 }, { "epoch": 72.84, "learning_rate": 1.0288155339805825e-05, "loss": 0.0138, "step": 187560 }, { "epoch": 72.84, "learning_rate": 1.0287637540453074e-05, "loss": 0.0177, "step": 187570 }, { "epoch": 72.85, "learning_rate": 1.0287119741100324e-05, "loss": 0.0177, "step": 187580 }, { "epoch": 72.85, "learning_rate": 1.0286601941747574e-05, "loss": 0.0856, "step": 187590 }, { "epoch": 72.85, "learning_rate": 1.0286084142394823e-05, "loss": 0.0533, "step": 187600 }, { "epoch": 72.86, "learning_rate": 1.0285566343042073e-05, "loss": 0.0249, "step": 187610 }, { "epoch": 72.86, "learning_rate": 1.0285048543689322e-05, "loss": 0.0519, "step": 187620 }, { "epoch": 72.87, "learning_rate": 1.0284530744336569e-05, "loss": 0.0756, "step": 187630 }, { "epoch": 72.87, "learning_rate": 1.0284012944983818e-05, "loss": 0.0193, "step": 187640 }, { "epoch": 72.87, "learning_rate": 1.0283495145631068e-05, "loss": 0.1042, "step": 187650 }, { "epoch": 72.88, "learning_rate": 1.0282977346278317e-05, "loss": 0.0434, "step": 187660 }, { "epoch": 72.88, "learning_rate": 1.0282459546925567e-05, "loss": 0.0923, "step": 187670 }, { "epoch": 72.89, "learning_rate": 1.0281941747572817e-05, "loss": 0.1582, "step": 187680 }, { "epoch": 72.89, "learning_rate": 1.0281423948220066e-05, "loss": 0.0108, "step": 187690 }, { "epoch": 72.89, "learning_rate": 1.0280906148867316e-05, "loss": 0.0255, "step": 187700 }, { "epoch": 72.9, "learning_rate": 1.0280388349514562e-05, "loss": 0.0204, "step": 187710 }, { "epoch": 72.9, "learning_rate": 1.0279870550161812e-05, "loss": 0.1345, "step": 187720 }, { "epoch": 72.9, "learning_rate": 1.0279352750809061e-05, "loss": 0.0376, "step": 187730 }, { "epoch": 72.91, "learning_rate": 1.0278834951456311e-05, "loss": 0.0522, "step": 187740 }, { "epoch": 72.91, "learning_rate": 1.027831715210356e-05, "loss": 0.0324, "step": 187750 }, { "epoch": 72.92, "learning_rate": 1.027779935275081e-05, "loss": 0.0936, "step": 187760 }, { "epoch": 72.92, "learning_rate": 1.027728155339806e-05, "loss": 0.2088, "step": 187770 }, { "epoch": 72.92, "learning_rate": 1.027676375404531e-05, "loss": 0.0494, "step": 187780 }, { "epoch": 72.93, "learning_rate": 1.0276245954692556e-05, "loss": 0.047, "step": 187790 }, { "epoch": 72.93, "learning_rate": 1.0275728155339805e-05, "loss": 0.0374, "step": 187800 }, { "epoch": 72.94, "learning_rate": 1.0275210355987055e-05, "loss": 0.0365, "step": 187810 }, { "epoch": 72.94, "learning_rate": 1.0274692556634305e-05, "loss": 0.0856, "step": 187820 }, { "epoch": 72.94, "learning_rate": 1.0274174757281554e-05, "loss": 0.0446, "step": 187830 }, { "epoch": 72.95, "learning_rate": 1.0273656957928804e-05, "loss": 0.048, "step": 187840 }, { "epoch": 72.95, "learning_rate": 1.0273139158576053e-05, "loss": 0.0062, "step": 187850 }, { "epoch": 72.96, "learning_rate": 1.0272621359223303e-05, "loss": 0.0405, "step": 187860 }, { "epoch": 72.96, "learning_rate": 1.0272103559870553e-05, "loss": 0.1084, "step": 187870 }, { "epoch": 72.96, "learning_rate": 1.0271585760517799e-05, "loss": 0.0416, "step": 187880 }, { "epoch": 72.97, "learning_rate": 1.0271067961165049e-05, "loss": 0.2291, "step": 187890 }, { "epoch": 72.97, "learning_rate": 1.0270550161812298e-05, "loss": 0.1103, "step": 187900 }, { "epoch": 72.97, "learning_rate": 1.0270032362459548e-05, "loss": 0.0067, "step": 187910 }, { "epoch": 72.98, "learning_rate": 1.0269514563106797e-05, "loss": 0.0453, "step": 187920 }, { "epoch": 72.98, "learning_rate": 1.0268996763754047e-05, "loss": 0.1366, "step": 187930 }, { "epoch": 72.99, "learning_rate": 1.0268478964401297e-05, "loss": 0.0481, "step": 187940 }, { "epoch": 72.99, "learning_rate": 1.0267961165048546e-05, "loss": 0.0473, "step": 187950 }, { "epoch": 72.99, "learning_rate": 1.0267443365695792e-05, "loss": 0.0945, "step": 187960 }, { "epoch": 73.0, "learning_rate": 1.0266925566343042e-05, "loss": 0.0108, "step": 187970 }, { "epoch": 73.0, "eval_accuracy": 0.9491059147180193, "eval_loss": 0.36288994550704956, "eval_runtime": 8.2044, "eval_samples_per_second": 443.055, "eval_steps_per_second": 55.458, "step": 187975 }, { "epoch": 73.0, "learning_rate": 1.0266407766990292e-05, "loss": 0.0536, "step": 187980 }, { "epoch": 73.01, "learning_rate": 1.0265889967637541e-05, "loss": 0.1152, "step": 187990 }, { "epoch": 73.01, "learning_rate": 1.0265372168284791e-05, "loss": 0.1347, "step": 188000 }, { "epoch": 73.01, "learning_rate": 1.026485436893204e-05, "loss": 0.1505, "step": 188010 }, { "epoch": 73.02, "learning_rate": 1.026433656957929e-05, "loss": 0.078, "step": 188020 }, { "epoch": 73.02, "learning_rate": 1.026381877022654e-05, "loss": 0.0423, "step": 188030 }, { "epoch": 73.03, "learning_rate": 1.0263300970873786e-05, "loss": 0.0243, "step": 188040 }, { "epoch": 73.03, "learning_rate": 1.0262783171521036e-05, "loss": 0.1491, "step": 188050 }, { "epoch": 73.03, "learning_rate": 1.0262265372168285e-05, "loss": 0.1088, "step": 188060 }, { "epoch": 73.04, "learning_rate": 1.0261747572815535e-05, "loss": 0.1759, "step": 188070 }, { "epoch": 73.04, "learning_rate": 1.0261229773462785e-05, "loss": 0.0036, "step": 188080 }, { "epoch": 73.04, "learning_rate": 1.0260711974110034e-05, "loss": 0.0294, "step": 188090 }, { "epoch": 73.05, "learning_rate": 1.0260194174757284e-05, "loss": 0.0936, "step": 188100 }, { "epoch": 73.05, "learning_rate": 1.0259676375404533e-05, "loss": 0.009, "step": 188110 }, { "epoch": 73.06, "learning_rate": 1.025915857605178e-05, "loss": 0.0592, "step": 188120 }, { "epoch": 73.06, "learning_rate": 1.025864077669903e-05, "loss": 0.066, "step": 188130 }, { "epoch": 73.06, "learning_rate": 1.0258122977346279e-05, "loss": 0.0091, "step": 188140 }, { "epoch": 73.07, "learning_rate": 1.0257605177993528e-05, "loss": 0.0588, "step": 188150 }, { "epoch": 73.07, "learning_rate": 1.0257087378640778e-05, "loss": 0.142, "step": 188160 }, { "epoch": 73.08, "learning_rate": 1.0256569579288028e-05, "loss": 0.0153, "step": 188170 }, { "epoch": 73.08, "learning_rate": 1.0256051779935277e-05, "loss": 0.0628, "step": 188180 }, { "epoch": 73.08, "learning_rate": 1.0255533980582525e-05, "loss": 0.096, "step": 188190 }, { "epoch": 73.09, "learning_rate": 1.0255016181229773e-05, "loss": 0.2315, "step": 188200 }, { "epoch": 73.09, "learning_rate": 1.0254498381877023e-05, "loss": 0.0139, "step": 188210 }, { "epoch": 73.1, "learning_rate": 1.0253980582524272e-05, "loss": 0.0328, "step": 188220 }, { "epoch": 73.1, "learning_rate": 1.0253462783171522e-05, "loss": 0.1674, "step": 188230 }, { "epoch": 73.1, "learning_rate": 1.0252944983818772e-05, "loss": 0.1621, "step": 188240 }, { "epoch": 73.11, "learning_rate": 1.0252427184466021e-05, "loss": 0.1289, "step": 188250 }, { "epoch": 73.11, "learning_rate": 1.0251909385113271e-05, "loss": 0.0508, "step": 188260 }, { "epoch": 73.11, "learning_rate": 1.0251391585760519e-05, "loss": 0.0602, "step": 188270 }, { "epoch": 73.12, "learning_rate": 1.0250873786407767e-05, "loss": 0.0443, "step": 188280 }, { "epoch": 73.12, "learning_rate": 1.0250355987055016e-05, "loss": 0.0119, "step": 188290 }, { "epoch": 73.13, "learning_rate": 1.0249838187702266e-05, "loss": 0.0991, "step": 188300 }, { "epoch": 73.13, "learning_rate": 1.0249320388349516e-05, "loss": 0.0339, "step": 188310 }, { "epoch": 73.13, "learning_rate": 1.0248802588996765e-05, "loss": 0.0285, "step": 188320 }, { "epoch": 73.14, "learning_rate": 1.0248284789644015e-05, "loss": 0.033, "step": 188330 }, { "epoch": 73.14, "learning_rate": 1.0247766990291263e-05, "loss": 0.1006, "step": 188340 }, { "epoch": 73.15, "learning_rate": 1.0247249190938512e-05, "loss": 0.0496, "step": 188350 }, { "epoch": 73.15, "learning_rate": 1.0246731391585762e-05, "loss": 0.0657, "step": 188360 }, { "epoch": 73.15, "learning_rate": 1.024621359223301e-05, "loss": 0.1309, "step": 188370 }, { "epoch": 73.16, "learning_rate": 1.024569579288026e-05, "loss": 0.2257, "step": 188380 }, { "epoch": 73.16, "learning_rate": 1.0245177993527509e-05, "loss": 0.0405, "step": 188390 }, { "epoch": 73.17, "learning_rate": 1.0244660194174759e-05, "loss": 0.0012, "step": 188400 }, { "epoch": 73.17, "learning_rate": 1.0244142394822008e-05, "loss": 0.1353, "step": 188410 }, { "epoch": 73.17, "learning_rate": 1.0243624595469256e-05, "loss": 0.0254, "step": 188420 }, { "epoch": 73.18, "learning_rate": 1.0243106796116506e-05, "loss": 0.0668, "step": 188430 }, { "epoch": 73.18, "learning_rate": 1.0242588996763756e-05, "loss": 0.0131, "step": 188440 }, { "epoch": 73.18, "learning_rate": 1.0242071197411003e-05, "loss": 0.1939, "step": 188450 }, { "epoch": 73.19, "learning_rate": 1.0241553398058253e-05, "loss": 0.1155, "step": 188460 }, { "epoch": 73.19, "learning_rate": 1.0241035598705503e-05, "loss": 0.0192, "step": 188470 }, { "epoch": 73.2, "learning_rate": 1.0240517799352752e-05, "loss": 0.0315, "step": 188480 }, { "epoch": 73.2, "learning_rate": 1.024e-05, "loss": 0.1691, "step": 188490 }, { "epoch": 73.2, "learning_rate": 1.023948220064725e-05, "loss": 0.0169, "step": 188500 }, { "epoch": 73.21, "learning_rate": 1.02389644012945e-05, "loss": 0.1071, "step": 188510 }, { "epoch": 73.21, "learning_rate": 1.0238446601941749e-05, "loss": 0.0521, "step": 188520 }, { "epoch": 73.22, "learning_rate": 1.0237928802588997e-05, "loss": 0.1492, "step": 188530 }, { "epoch": 73.22, "learning_rate": 1.0237411003236247e-05, "loss": 0.0101, "step": 188540 }, { "epoch": 73.22, "learning_rate": 1.0236893203883496e-05, "loss": 0.0385, "step": 188550 }, { "epoch": 73.23, "learning_rate": 1.0236375404530746e-05, "loss": 0.0073, "step": 188560 }, { "epoch": 73.23, "learning_rate": 1.0235857605177994e-05, "loss": 0.0256, "step": 188570 }, { "epoch": 73.23, "learning_rate": 1.0235339805825243e-05, "loss": 0.0137, "step": 188580 }, { "epoch": 73.24, "learning_rate": 1.0234822006472493e-05, "loss": 0.0545, "step": 188590 }, { "epoch": 73.24, "learning_rate": 1.0234304207119743e-05, "loss": 0.11, "step": 188600 }, { "epoch": 73.25, "learning_rate": 1.023378640776699e-05, "loss": 0.0748, "step": 188610 }, { "epoch": 73.25, "learning_rate": 1.023326860841424e-05, "loss": 0.0369, "step": 188620 }, { "epoch": 73.25, "learning_rate": 1.023275080906149e-05, "loss": 0.1298, "step": 188630 }, { "epoch": 73.26, "learning_rate": 1.023223300970874e-05, "loss": 0.0671, "step": 188640 }, { "epoch": 73.26, "learning_rate": 1.0231715210355987e-05, "loss": 0.1757, "step": 188650 }, { "epoch": 73.27, "learning_rate": 1.0231197411003237e-05, "loss": 0.0816, "step": 188660 }, { "epoch": 73.27, "learning_rate": 1.0230679611650487e-05, "loss": 0.1489, "step": 188670 }, { "epoch": 73.27, "learning_rate": 1.0230161812297736e-05, "loss": 0.1148, "step": 188680 }, { "epoch": 73.28, "learning_rate": 1.0229644012944984e-05, "loss": 0.1654, "step": 188690 }, { "epoch": 73.28, "learning_rate": 1.0229126213592234e-05, "loss": 0.0283, "step": 188700 }, { "epoch": 73.29, "learning_rate": 1.0228608414239483e-05, "loss": 0.0399, "step": 188710 }, { "epoch": 73.29, "learning_rate": 1.0228090614886731e-05, "loss": 0.001, "step": 188720 }, { "epoch": 73.29, "learning_rate": 1.0227572815533981e-05, "loss": 0.038, "step": 188730 }, { "epoch": 73.3, "learning_rate": 1.022705501618123e-05, "loss": 0.0202, "step": 188740 }, { "epoch": 73.3, "learning_rate": 1.022653721682848e-05, "loss": 0.0488, "step": 188750 }, { "epoch": 73.3, "learning_rate": 1.022601941747573e-05, "loss": 0.0873, "step": 188760 }, { "epoch": 73.31, "learning_rate": 1.0225501618122978e-05, "loss": 0.0607, "step": 188770 }, { "epoch": 73.31, "learning_rate": 1.0224983818770227e-05, "loss": 0.0098, "step": 188780 }, { "epoch": 73.32, "learning_rate": 1.0224466019417477e-05, "loss": 0.0308, "step": 188790 }, { "epoch": 73.32, "learning_rate": 1.0223948220064725e-05, "loss": 0.0015, "step": 188800 }, { "epoch": 73.32, "learning_rate": 1.0223430420711974e-05, "loss": 0.1135, "step": 188810 }, { "epoch": 73.33, "learning_rate": 1.0222912621359224e-05, "loss": 0.0787, "step": 188820 }, { "epoch": 73.33, "learning_rate": 1.0222394822006474e-05, "loss": 0.0562, "step": 188830 }, { "epoch": 73.34, "learning_rate": 1.0221877022653723e-05, "loss": 0.0948, "step": 188840 }, { "epoch": 73.34, "learning_rate": 1.0221359223300971e-05, "loss": 0.0903, "step": 188850 }, { "epoch": 73.34, "learning_rate": 1.022084142394822e-05, "loss": 0.0433, "step": 188860 }, { "epoch": 73.35, "learning_rate": 1.0220323624595469e-05, "loss": 0.0489, "step": 188870 }, { "epoch": 73.35, "learning_rate": 1.0219805825242718e-05, "loss": 0.0269, "step": 188880 }, { "epoch": 73.36, "learning_rate": 1.0219288025889968e-05, "loss": 0.0989, "step": 188890 }, { "epoch": 73.36, "learning_rate": 1.0218770226537218e-05, "loss": 0.0139, "step": 188900 }, { "epoch": 73.36, "learning_rate": 1.0218252427184467e-05, "loss": 0.0354, "step": 188910 }, { "epoch": 73.37, "learning_rate": 1.0217734627831717e-05, "loss": 0.0184, "step": 188920 }, { "epoch": 73.37, "learning_rate": 1.0217216828478966e-05, "loss": 0.0061, "step": 188930 }, { "epoch": 73.37, "learning_rate": 1.0216699029126214e-05, "loss": 0.0899, "step": 188940 }, { "epoch": 73.38, "learning_rate": 1.0216181229773462e-05, "loss": 0.0587, "step": 188950 }, { "epoch": 73.38, "learning_rate": 1.0215663430420712e-05, "loss": 0.0314, "step": 188960 }, { "epoch": 73.39, "learning_rate": 1.0215145631067962e-05, "loss": 0.0899, "step": 188970 }, { "epoch": 73.39, "learning_rate": 1.0214627831715211e-05, "loss": 0.2649, "step": 188980 }, { "epoch": 73.39, "learning_rate": 1.021411003236246e-05, "loss": 0.0978, "step": 188990 }, { "epoch": 73.4, "learning_rate": 1.021359223300971e-05, "loss": 0.0847, "step": 189000 }, { "epoch": 73.4, "learning_rate": 1.021307443365696e-05, "loss": 0.0598, "step": 189010 }, { "epoch": 73.41, "learning_rate": 1.0212556634304206e-05, "loss": 0.1057, "step": 189020 }, { "epoch": 73.41, "learning_rate": 1.0212038834951456e-05, "loss": 0.0484, "step": 189030 }, { "epoch": 73.41, "learning_rate": 1.0211521035598705e-05, "loss": 0.2021, "step": 189040 }, { "epoch": 73.42, "learning_rate": 1.0211003236245955e-05, "loss": 0.0065, "step": 189050 }, { "epoch": 73.42, "learning_rate": 1.0210485436893205e-05, "loss": 0.0083, "step": 189060 }, { "epoch": 73.43, "learning_rate": 1.0209967637540454e-05, "loss": 0.1749, "step": 189070 }, { "epoch": 73.43, "learning_rate": 1.0209449838187704e-05, "loss": 0.0435, "step": 189080 }, { "epoch": 73.43, "learning_rate": 1.0208932038834954e-05, "loss": 0.1296, "step": 189090 }, { "epoch": 73.44, "learning_rate": 1.02084142394822e-05, "loss": 0.0235, "step": 189100 }, { "epoch": 73.44, "learning_rate": 1.020789644012945e-05, "loss": 0.0297, "step": 189110 }, { "epoch": 73.44, "learning_rate": 1.0207378640776699e-05, "loss": 0.0133, "step": 189120 }, { "epoch": 73.45, "learning_rate": 1.0206860841423949e-05, "loss": 0.0768, "step": 189130 }, { "epoch": 73.45, "learning_rate": 1.0206343042071198e-05, "loss": 0.0839, "step": 189140 }, { "epoch": 73.46, "learning_rate": 1.0205825242718448e-05, "loss": 0.053, "step": 189150 }, { "epoch": 73.46, "learning_rate": 1.0205307443365698e-05, "loss": 0.0297, "step": 189160 }, { "epoch": 73.46, "learning_rate": 1.0204789644012947e-05, "loss": 0.0222, "step": 189170 }, { "epoch": 73.47, "learning_rate": 1.0204271844660193e-05, "loss": 0.0625, "step": 189180 }, { "epoch": 73.47, "learning_rate": 1.0203754045307443e-05, "loss": 0.0252, "step": 189190 }, { "epoch": 73.48, "learning_rate": 1.0203236245954693e-05, "loss": 0.0584, "step": 189200 }, { "epoch": 73.48, "learning_rate": 1.0202718446601942e-05, "loss": 0.2124, "step": 189210 }, { "epoch": 73.48, "learning_rate": 1.0202200647249192e-05, "loss": 0.0691, "step": 189220 }, { "epoch": 73.49, "learning_rate": 1.0201682847896441e-05, "loss": 0.0988, "step": 189230 }, { "epoch": 73.49, "learning_rate": 1.0201165048543691e-05, "loss": 0.0849, "step": 189240 }, { "epoch": 73.5, "learning_rate": 1.020064724919094e-05, "loss": 0.0811, "step": 189250 }, { "epoch": 73.5, "learning_rate": 1.0200129449838187e-05, "loss": 0.0638, "step": 189260 }, { "epoch": 73.5, "learning_rate": 1.0199611650485437e-05, "loss": 0.1045, "step": 189270 }, { "epoch": 73.51, "learning_rate": 1.0199093851132686e-05, "loss": 0.1435, "step": 189280 }, { "epoch": 73.51, "learning_rate": 1.0198576051779936e-05, "loss": 0.1562, "step": 189290 }, { "epoch": 73.51, "learning_rate": 1.0198058252427185e-05, "loss": 0.0218, "step": 189300 }, { "epoch": 73.52, "learning_rate": 1.0197540453074435e-05, "loss": 0.0666, "step": 189310 }, { "epoch": 73.52, "learning_rate": 1.0197022653721685e-05, "loss": 0.1, "step": 189320 }, { "epoch": 73.53, "learning_rate": 1.0196504854368934e-05, "loss": 0.1259, "step": 189330 }, { "epoch": 73.53, "learning_rate": 1.019598705501618e-05, "loss": 0.0013, "step": 189340 }, { "epoch": 73.53, "learning_rate": 1.019546925566343e-05, "loss": 0.0151, "step": 189350 }, { "epoch": 73.54, "learning_rate": 1.019495145631068e-05, "loss": 0.0712, "step": 189360 }, { "epoch": 73.54, "learning_rate": 1.019443365695793e-05, "loss": 0.0476, "step": 189370 }, { "epoch": 73.55, "learning_rate": 1.0193915857605179e-05, "loss": 0.1017, "step": 189380 }, { "epoch": 73.55, "learning_rate": 1.0193398058252429e-05, "loss": 0.045, "step": 189390 }, { "epoch": 73.55, "learning_rate": 1.0192880258899678e-05, "loss": 0.0204, "step": 189400 }, { "epoch": 73.56, "learning_rate": 1.0192362459546928e-05, "loss": 0.0201, "step": 189410 }, { "epoch": 73.56, "learning_rate": 1.0191844660194174e-05, "loss": 0.0592, "step": 189420 }, { "epoch": 73.57, "learning_rate": 1.0191326860841424e-05, "loss": 0.0077, "step": 189430 }, { "epoch": 73.57, "learning_rate": 1.0190809061488673e-05, "loss": 0.0233, "step": 189440 }, { "epoch": 73.57, "learning_rate": 1.0190291262135923e-05, "loss": 0.1624, "step": 189450 }, { "epoch": 73.58, "learning_rate": 1.0189773462783173e-05, "loss": 0.0003, "step": 189460 }, { "epoch": 73.58, "learning_rate": 1.0189255663430422e-05, "loss": 0.0088, "step": 189470 }, { "epoch": 73.58, "learning_rate": 1.0188737864077672e-05, "loss": 0.0235, "step": 189480 }, { "epoch": 73.59, "learning_rate": 1.0188220064724921e-05, "loss": 0.1561, "step": 189490 }, { "epoch": 73.59, "learning_rate": 1.0187702265372171e-05, "loss": 0.0148, "step": 189500 }, { "epoch": 73.6, "learning_rate": 1.0187184466019417e-05, "loss": 0.0328, "step": 189510 }, { "epoch": 73.6, "learning_rate": 1.0186666666666667e-05, "loss": 0.0043, "step": 189520 }, { "epoch": 73.6, "learning_rate": 1.0186148867313916e-05, "loss": 0.0403, "step": 189530 }, { "epoch": 73.61, "learning_rate": 1.0185631067961166e-05, "loss": 0.1375, "step": 189540 }, { "epoch": 73.61, "learning_rate": 1.0185113268608416e-05, "loss": 0.0293, "step": 189550 }, { "epoch": 73.62, "learning_rate": 1.0184595469255665e-05, "loss": 0.038, "step": 189560 }, { "epoch": 73.62, "learning_rate": 1.0184077669902915e-05, "loss": 0.0833, "step": 189570 }, { "epoch": 73.62, "learning_rate": 1.0183559870550165e-05, "loss": 0.0018, "step": 189580 }, { "epoch": 73.63, "learning_rate": 1.018304207119741e-05, "loss": 0.1619, "step": 189590 }, { "epoch": 73.63, "learning_rate": 1.018252427184466e-05, "loss": 0.0054, "step": 189600 }, { "epoch": 73.63, "learning_rate": 1.018200647249191e-05, "loss": 0.1262, "step": 189610 }, { "epoch": 73.64, "learning_rate": 1.018148867313916e-05, "loss": 0.2621, "step": 189620 }, { "epoch": 73.64, "learning_rate": 1.018097087378641e-05, "loss": 0.0981, "step": 189630 }, { "epoch": 73.65, "learning_rate": 1.0180453074433659e-05, "loss": 0.0227, "step": 189640 }, { "epoch": 73.65, "learning_rate": 1.0179935275080908e-05, "loss": 0.0764, "step": 189650 }, { "epoch": 73.65, "learning_rate": 1.0179417475728156e-05, "loss": 0.0143, "step": 189660 }, { "epoch": 73.66, "learning_rate": 1.0178899676375404e-05, "loss": 0.2687, "step": 189670 }, { "epoch": 73.66, "learning_rate": 1.0178381877022654e-05, "loss": 0.0533, "step": 189680 }, { "epoch": 73.67, "learning_rate": 1.0177864077669904e-05, "loss": 0.1215, "step": 189690 }, { "epoch": 73.67, "learning_rate": 1.0177346278317153e-05, "loss": 0.0849, "step": 189700 }, { "epoch": 73.67, "learning_rate": 1.0176828478964403e-05, "loss": 0.0544, "step": 189710 }, { "epoch": 73.68, "learning_rate": 1.0176310679611652e-05, "loss": 0.0253, "step": 189720 }, { "epoch": 73.68, "learning_rate": 1.0175792880258902e-05, "loss": 0.0282, "step": 189730 }, { "epoch": 73.69, "learning_rate": 1.017527508090615e-05, "loss": 0.0308, "step": 189740 }, { "epoch": 73.69, "learning_rate": 1.0174757281553398e-05, "loss": 0.1439, "step": 189750 }, { "epoch": 73.69, "learning_rate": 1.0174239482200647e-05, "loss": 0.1147, "step": 189760 }, { "epoch": 73.7, "learning_rate": 1.0173721682847897e-05, "loss": 0.0121, "step": 189770 }, { "epoch": 73.7, "learning_rate": 1.0173203883495147e-05, "loss": 0.0388, "step": 189780 }, { "epoch": 73.7, "learning_rate": 1.0172686084142396e-05, "loss": 0.1018, "step": 189790 }, { "epoch": 73.71, "learning_rate": 1.0172168284789646e-05, "loss": 0.0011, "step": 189800 }, { "epoch": 73.71, "learning_rate": 1.0171650485436894e-05, "loss": 0.0573, "step": 189810 }, { "epoch": 73.72, "learning_rate": 1.0171132686084144e-05, "loss": 0.0317, "step": 189820 }, { "epoch": 73.72, "learning_rate": 1.0170614886731391e-05, "loss": 0.184, "step": 189830 }, { "epoch": 73.72, "learning_rate": 1.0170097087378641e-05, "loss": 0.0597, "step": 189840 }, { "epoch": 73.73, "learning_rate": 1.016957928802589e-05, "loss": 0.0122, "step": 189850 }, { "epoch": 73.73, "learning_rate": 1.016906148867314e-05, "loss": 0.0445, "step": 189860 }, { "epoch": 73.74, "learning_rate": 1.016854368932039e-05, "loss": 0.0008, "step": 189870 }, { "epoch": 73.74, "learning_rate": 1.016802588996764e-05, "loss": 0.0436, "step": 189880 }, { "epoch": 73.74, "learning_rate": 1.0167508090614887e-05, "loss": 0.089, "step": 189890 }, { "epoch": 73.75, "learning_rate": 1.0166990291262137e-05, "loss": 0.0002, "step": 189900 }, { "epoch": 73.75, "learning_rate": 1.0166472491909385e-05, "loss": 0.1984, "step": 189910 }, { "epoch": 73.76, "learning_rate": 1.0165954692556635e-05, "loss": 0.1084, "step": 189920 }, { "epoch": 73.76, "learning_rate": 1.0165436893203884e-05, "loss": 0.0902, "step": 189930 }, { "epoch": 73.76, "learning_rate": 1.0164919093851134e-05, "loss": 0.0847, "step": 189940 }, { "epoch": 73.77, "learning_rate": 1.0164401294498383e-05, "loss": 0.0269, "step": 189950 }, { "epoch": 73.77, "learning_rate": 1.0163883495145631e-05, "loss": 0.0448, "step": 189960 }, { "epoch": 73.77, "learning_rate": 1.0163365695792881e-05, "loss": 0.0358, "step": 189970 }, { "epoch": 73.78, "learning_rate": 1.016284789644013e-05, "loss": 0.0623, "step": 189980 }, { "epoch": 73.78, "learning_rate": 1.0162330097087379e-05, "loss": 0.1839, "step": 189990 }, { "epoch": 73.79, "learning_rate": 1.0161812297734628e-05, "loss": 0.0624, "step": 190000 }, { "epoch": 73.79, "learning_rate": 1.0161294498381878e-05, "loss": 0.0843, "step": 190010 }, { "epoch": 73.79, "learning_rate": 1.0160776699029127e-05, "loss": 0.0168, "step": 190020 }, { "epoch": 73.8, "learning_rate": 1.0160258899676377e-05, "loss": 0.0706, "step": 190030 }, { "epoch": 73.8, "learning_rate": 1.0159741100323625e-05, "loss": 0.0418, "step": 190040 }, { "epoch": 73.81, "learning_rate": 1.0159223300970875e-05, "loss": 0.0647, "step": 190050 }, { "epoch": 73.81, "learning_rate": 1.0158705501618124e-05, "loss": 0.0327, "step": 190060 }, { "epoch": 73.81, "learning_rate": 1.0158187702265374e-05, "loss": 0.1731, "step": 190070 }, { "epoch": 73.82, "learning_rate": 1.0157669902912622e-05, "loss": 0.011, "step": 190080 }, { "epoch": 73.82, "learning_rate": 1.0157152103559871e-05, "loss": 0.1228, "step": 190090 }, { "epoch": 73.83, "learning_rate": 1.0156634304207121e-05, "loss": 0.0369, "step": 190100 }, { "epoch": 73.83, "learning_rate": 1.015611650485437e-05, "loss": 0.0699, "step": 190110 }, { "epoch": 73.83, "learning_rate": 1.0155598705501618e-05, "loss": 0.0437, "step": 190120 }, { "epoch": 73.84, "learning_rate": 1.0155080906148868e-05, "loss": 0.0428, "step": 190130 }, { "epoch": 73.84, "learning_rate": 1.0154563106796118e-05, "loss": 0.0012, "step": 190140 }, { "epoch": 73.84, "learning_rate": 1.0154045307443367e-05, "loss": 0.1259, "step": 190150 }, { "epoch": 73.85, "learning_rate": 1.0153527508090615e-05, "loss": 0.1472, "step": 190160 }, { "epoch": 73.85, "learning_rate": 1.0153009708737865e-05, "loss": 0.0323, "step": 190170 }, { "epoch": 73.86, "learning_rate": 1.0152491909385115e-05, "loss": 0.0016, "step": 190180 }, { "epoch": 73.86, "learning_rate": 1.0151974110032362e-05, "loss": 0.0308, "step": 190190 }, { "epoch": 73.86, "learning_rate": 1.0151456310679612e-05, "loss": 0.1118, "step": 190200 }, { "epoch": 73.87, "learning_rate": 1.0150938511326862e-05, "loss": 0.1181, "step": 190210 }, { "epoch": 73.87, "learning_rate": 1.0150420711974111e-05, "loss": 0.1537, "step": 190220 }, { "epoch": 73.88, "learning_rate": 1.0149902912621361e-05, "loss": 0.0007, "step": 190230 }, { "epoch": 73.88, "learning_rate": 1.0149385113268609e-05, "loss": 0.1252, "step": 190240 }, { "epoch": 73.88, "learning_rate": 1.0148867313915858e-05, "loss": 0.0044, "step": 190250 }, { "epoch": 73.89, "learning_rate": 1.0148349514563108e-05, "loss": 0.084, "step": 190260 }, { "epoch": 73.89, "learning_rate": 1.0147831715210356e-05, "loss": 0.0036, "step": 190270 }, { "epoch": 73.9, "learning_rate": 1.0147313915857606e-05, "loss": 0.0516, "step": 190280 }, { "epoch": 73.9, "learning_rate": 1.0146796116504855e-05, "loss": 0.0052, "step": 190290 }, { "epoch": 73.9, "learning_rate": 1.0146278317152105e-05, "loss": 0.137, "step": 190300 }, { "epoch": 73.91, "learning_rate": 1.0145760517799354e-05, "loss": 0.0509, "step": 190310 }, { "epoch": 73.91, "learning_rate": 1.0145242718446602e-05, "loss": 0.0288, "step": 190320 }, { "epoch": 73.91, "learning_rate": 1.0144724919093852e-05, "loss": 0.0786, "step": 190330 }, { "epoch": 73.92, "learning_rate": 1.01442071197411e-05, "loss": 0.035, "step": 190340 }, { "epoch": 73.92, "learning_rate": 1.014368932038835e-05, "loss": 0.0098, "step": 190350 }, { "epoch": 73.93, "learning_rate": 1.01431715210356e-05, "loss": 0.0757, "step": 190360 }, { "epoch": 73.93, "learning_rate": 1.0142653721682849e-05, "loss": 0.0334, "step": 190370 }, { "epoch": 73.93, "learning_rate": 1.0142135922330098e-05, "loss": 0.1023, "step": 190380 }, { "epoch": 73.94, "learning_rate": 1.0141618122977348e-05, "loss": 0.0236, "step": 190390 }, { "epoch": 73.94, "learning_rate": 1.0141100323624596e-05, "loss": 0.0585, "step": 190400 }, { "epoch": 73.95, "learning_rate": 1.0140582524271846e-05, "loss": 0.0615, "step": 190410 }, { "epoch": 73.95, "learning_rate": 1.0140064724919093e-05, "loss": 0.0525, "step": 190420 }, { "epoch": 73.95, "learning_rate": 1.0139546925566343e-05, "loss": 0.0601, "step": 190430 }, { "epoch": 73.96, "learning_rate": 1.0139029126213593e-05, "loss": 0.1023, "step": 190440 }, { "epoch": 73.96, "learning_rate": 1.0138511326860842e-05, "loss": 0.1699, "step": 190450 }, { "epoch": 73.97, "learning_rate": 1.0137993527508092e-05, "loss": 0.1158, "step": 190460 }, { "epoch": 73.97, "learning_rate": 1.0137475728155342e-05, "loss": 0.055, "step": 190470 }, { "epoch": 73.97, "learning_rate": 1.013695792880259e-05, "loss": 0.0509, "step": 190480 }, { "epoch": 73.98, "learning_rate": 1.0136440129449837e-05, "loss": 0.07, "step": 190490 }, { "epoch": 73.98, "learning_rate": 1.0135922330097087e-05, "loss": 0.0216, "step": 190500 }, { "epoch": 73.98, "learning_rate": 1.0135404530744337e-05, "loss": 0.0681, "step": 190510 }, { "epoch": 73.99, "learning_rate": 1.0134886731391586e-05, "loss": 0.0175, "step": 190520 }, { "epoch": 73.99, "learning_rate": 1.0134368932038836e-05, "loss": 0.0829, "step": 190530 }, { "epoch": 74.0, "learning_rate": 1.0133851132686086e-05, "loss": 0.1317, "step": 190540 }, { "epoch": 74.0, "learning_rate": 1.0133333333333335e-05, "loss": 0.0446, "step": 190550 }, { "epoch": 74.0, "eval_accuracy": 0.9507565337001376, "eval_loss": 0.35697141289711, "eval_runtime": 8.2637, "eval_samples_per_second": 439.875, "eval_steps_per_second": 55.06, "step": 190550 }, { "epoch": 74.0, "learning_rate": 1.0132815533980583e-05, "loss": 0.0938, "step": 190560 }, { "epoch": 74.01, "learning_rate": 1.0132297734627831e-05, "loss": 0.084, "step": 190570 }, { "epoch": 74.01, "learning_rate": 1.013177993527508e-05, "loss": 0.0003, "step": 190580 }, { "epoch": 74.02, "learning_rate": 1.013126213592233e-05, "loss": 0.0807, "step": 190590 }, { "epoch": 74.02, "learning_rate": 1.013074433656958e-05, "loss": 0.066, "step": 190600 }, { "epoch": 74.02, "learning_rate": 1.013022653721683e-05, "loss": 0.0293, "step": 190610 }, { "epoch": 74.03, "learning_rate": 1.0129708737864079e-05, "loss": 0.0082, "step": 190620 }, { "epoch": 74.03, "learning_rate": 1.0129190938511329e-05, "loss": 0.0405, "step": 190630 }, { "epoch": 74.03, "learning_rate": 1.0128673139158578e-05, "loss": 0.0277, "step": 190640 }, { "epoch": 74.04, "learning_rate": 1.0128155339805825e-05, "loss": 0.0017, "step": 190650 }, { "epoch": 74.04, "learning_rate": 1.0127637540453074e-05, "loss": 0.072, "step": 190660 }, { "epoch": 74.05, "learning_rate": 1.0127119741100324e-05, "loss": 0.1266, "step": 190670 }, { "epoch": 74.05, "learning_rate": 1.0126601941747573e-05, "loss": 0.1322, "step": 190680 }, { "epoch": 74.05, "learning_rate": 1.0126084142394823e-05, "loss": 0.1008, "step": 190690 }, { "epoch": 74.06, "learning_rate": 1.0125566343042073e-05, "loss": 0.0665, "step": 190700 }, { "epoch": 74.06, "learning_rate": 1.0125048543689322e-05, "loss": 0.0088, "step": 190710 }, { "epoch": 74.07, "learning_rate": 1.0124530744336572e-05, "loss": 0.0016, "step": 190720 }, { "epoch": 74.07, "learning_rate": 1.0124012944983818e-05, "loss": 0.3053, "step": 190730 }, { "epoch": 74.07, "learning_rate": 1.0123495145631068e-05, "loss": 0.0174, "step": 190740 }, { "epoch": 74.08, "learning_rate": 1.0122977346278317e-05, "loss": 0.0005, "step": 190750 }, { "epoch": 74.08, "learning_rate": 1.0122459546925567e-05, "loss": 0.0028, "step": 190760 }, { "epoch": 74.09, "learning_rate": 1.0121941747572817e-05, "loss": 0.0902, "step": 190770 }, { "epoch": 74.09, "learning_rate": 1.0121423948220066e-05, "loss": 0.1061, "step": 190780 }, { "epoch": 74.09, "learning_rate": 1.0120906148867316e-05, "loss": 0.0038, "step": 190790 }, { "epoch": 74.1, "learning_rate": 1.0120388349514565e-05, "loss": 0.0173, "step": 190800 }, { "epoch": 74.1, "learning_rate": 1.0119870550161812e-05, "loss": 0.0362, "step": 190810 }, { "epoch": 74.1, "learning_rate": 1.0119352750809061e-05, "loss": 0.0003, "step": 190820 }, { "epoch": 74.11, "learning_rate": 1.0118834951456311e-05, "loss": 0.197, "step": 190830 }, { "epoch": 74.11, "learning_rate": 1.011831715210356e-05, "loss": 0.0197, "step": 190840 }, { "epoch": 74.12, "learning_rate": 1.011779935275081e-05, "loss": 0.084, "step": 190850 }, { "epoch": 74.12, "learning_rate": 1.011728155339806e-05, "loss": 0.0616, "step": 190860 }, { "epoch": 74.12, "learning_rate": 1.011676375404531e-05, "loss": 0.0763, "step": 190870 }, { "epoch": 74.13, "learning_rate": 1.0116245954692559e-05, "loss": 0.1039, "step": 190880 }, { "epoch": 74.13, "learning_rate": 1.0115728155339805e-05, "loss": 0.1235, "step": 190890 }, { "epoch": 74.14, "learning_rate": 1.0115210355987055e-05, "loss": 0.0714, "step": 190900 }, { "epoch": 74.14, "learning_rate": 1.0114692556634304e-05, "loss": 0.0981, "step": 190910 }, { "epoch": 74.14, "learning_rate": 1.0114174757281554e-05, "loss": 0.1792, "step": 190920 }, { "epoch": 74.15, "learning_rate": 1.0113656957928804e-05, "loss": 0.1034, "step": 190930 }, { "epoch": 74.15, "learning_rate": 1.0113139158576053e-05, "loss": 0.0027, "step": 190940 }, { "epoch": 74.16, "learning_rate": 1.0112621359223303e-05, "loss": 0.0288, "step": 190950 }, { "epoch": 74.16, "learning_rate": 1.0112103559870553e-05, "loss": 0.0835, "step": 190960 }, { "epoch": 74.16, "learning_rate": 1.0111585760517799e-05, "loss": 0.0434, "step": 190970 }, { "epoch": 74.17, "learning_rate": 1.0111067961165048e-05, "loss": 0.1468, "step": 190980 }, { "epoch": 74.17, "learning_rate": 1.0110550161812298e-05, "loss": 0.0148, "step": 190990 }, { "epoch": 74.17, "learning_rate": 1.0110032362459548e-05, "loss": 0.084, "step": 191000 }, { "epoch": 74.18, "learning_rate": 1.0109514563106797e-05, "loss": 0.0941, "step": 191010 }, { "epoch": 74.18, "learning_rate": 1.0108996763754047e-05, "loss": 0.1014, "step": 191020 }, { "epoch": 74.19, "learning_rate": 1.0108478964401296e-05, "loss": 0.1619, "step": 191030 }, { "epoch": 74.19, "learning_rate": 1.0107961165048546e-05, "loss": 0.1233, "step": 191040 }, { "epoch": 74.19, "learning_rate": 1.0107443365695792e-05, "loss": 0.2314, "step": 191050 }, { "epoch": 74.2, "learning_rate": 1.0106925566343042e-05, "loss": 0.1445, "step": 191060 }, { "epoch": 74.2, "learning_rate": 1.0106407766990292e-05, "loss": 0.0533, "step": 191070 }, { "epoch": 74.21, "learning_rate": 1.0105889967637541e-05, "loss": 0.0044, "step": 191080 }, { "epoch": 74.21, "learning_rate": 1.010537216828479e-05, "loss": 0.0574, "step": 191090 }, { "epoch": 74.21, "learning_rate": 1.010485436893204e-05, "loss": 0.0498, "step": 191100 }, { "epoch": 74.22, "learning_rate": 1.010433656957929e-05, "loss": 0.0583, "step": 191110 }, { "epoch": 74.22, "learning_rate": 1.010381877022654e-05, "loss": 0.0413, "step": 191120 }, { "epoch": 74.23, "learning_rate": 1.0103300970873786e-05, "loss": 0.0446, "step": 191130 }, { "epoch": 74.23, "learning_rate": 1.0102783171521035e-05, "loss": 0.0003, "step": 191140 }, { "epoch": 74.23, "learning_rate": 1.0102265372168285e-05, "loss": 0.125, "step": 191150 }, { "epoch": 74.24, "learning_rate": 1.0101747572815535e-05, "loss": 0.1559, "step": 191160 }, { "epoch": 74.24, "learning_rate": 1.0101229773462784e-05, "loss": 0.1756, "step": 191170 }, { "epoch": 74.24, "learning_rate": 1.0100711974110034e-05, "loss": 0.0673, "step": 191180 }, { "epoch": 74.25, "learning_rate": 1.0100194174757284e-05, "loss": 0.0476, "step": 191190 }, { "epoch": 74.25, "learning_rate": 1.0099676375404533e-05, "loss": 0.1097, "step": 191200 }, { "epoch": 74.26, "learning_rate": 1.0099158576051781e-05, "loss": 0.0571, "step": 191210 }, { "epoch": 74.26, "learning_rate": 1.0098640776699029e-05, "loss": 0.0956, "step": 191220 }, { "epoch": 74.26, "learning_rate": 1.0098122977346279e-05, "loss": 0.0206, "step": 191230 }, { "epoch": 74.27, "learning_rate": 1.0097605177993528e-05, "loss": 0.0459, "step": 191240 }, { "epoch": 74.27, "learning_rate": 1.0097087378640778e-05, "loss": 0.0396, "step": 191250 }, { "epoch": 74.28, "learning_rate": 1.0096569579288028e-05, "loss": 0.0096, "step": 191260 }, { "epoch": 74.28, "learning_rate": 1.0096051779935277e-05, "loss": 0.0151, "step": 191270 }, { "epoch": 74.28, "learning_rate": 1.0095533980582525e-05, "loss": 0.135, "step": 191280 }, { "epoch": 74.29, "learning_rate": 1.0095016181229775e-05, "loss": 0.0193, "step": 191290 }, { "epoch": 74.29, "learning_rate": 1.0094498381877023e-05, "loss": 0.0202, "step": 191300 }, { "epoch": 74.3, "learning_rate": 1.0093980582524272e-05, "loss": 0.0003, "step": 191310 }, { "epoch": 74.3, "learning_rate": 1.0093462783171522e-05, "loss": 0.0669, "step": 191320 }, { "epoch": 74.3, "learning_rate": 1.0092944983818771e-05, "loss": 0.2528, "step": 191330 }, { "epoch": 74.31, "learning_rate": 1.0092427184466021e-05, "loss": 0.0043, "step": 191340 }, { "epoch": 74.31, "learning_rate": 1.009190938511327e-05, "loss": 0.0394, "step": 191350 }, { "epoch": 74.31, "learning_rate": 1.0091391585760519e-05, "loss": 0.0137, "step": 191360 }, { "epoch": 74.32, "learning_rate": 1.0090873786407768e-05, "loss": 0.2087, "step": 191370 }, { "epoch": 74.32, "learning_rate": 1.0090355987055016e-05, "loss": 0.0488, "step": 191380 }, { "epoch": 74.33, "learning_rate": 1.0089838187702266e-05, "loss": 0.0243, "step": 191390 }, { "epoch": 74.33, "learning_rate": 1.0089320388349515e-05, "loss": 0.0184, "step": 191400 }, { "epoch": 74.33, "learning_rate": 1.0088802588996765e-05, "loss": 0.0591, "step": 191410 }, { "epoch": 74.34, "learning_rate": 1.0088284789644015e-05, "loss": 0.0932, "step": 191420 }, { "epoch": 74.34, "learning_rate": 1.0087766990291263e-05, "loss": 0.004, "step": 191430 }, { "epoch": 74.35, "learning_rate": 1.0087249190938512e-05, "loss": 0.0245, "step": 191440 }, { "epoch": 74.35, "learning_rate": 1.0086731391585762e-05, "loss": 0.089, "step": 191450 }, { "epoch": 74.35, "learning_rate": 1.008621359223301e-05, "loss": 0.0212, "step": 191460 }, { "epoch": 74.36, "learning_rate": 1.008569579288026e-05, "loss": 0.0775, "step": 191470 }, { "epoch": 74.36, "learning_rate": 1.0085177993527509e-05, "loss": 0.0036, "step": 191480 }, { "epoch": 74.37, "learning_rate": 1.0084660194174759e-05, "loss": 0.1239, "step": 191490 }, { "epoch": 74.37, "learning_rate": 1.0084142394822008e-05, "loss": 0.0206, "step": 191500 }, { "epoch": 74.37, "learning_rate": 1.0083624595469256e-05, "loss": 0.1132, "step": 191510 }, { "epoch": 74.38, "learning_rate": 1.0083106796116506e-05, "loss": 0.1228, "step": 191520 }, { "epoch": 74.38, "learning_rate": 1.0082588996763755e-05, "loss": 0.038, "step": 191530 }, { "epoch": 74.38, "learning_rate": 1.0082071197411003e-05, "loss": 0.1063, "step": 191540 }, { "epoch": 74.39, "learning_rate": 1.0081553398058253e-05, "loss": 0.0733, "step": 191550 }, { "epoch": 74.39, "learning_rate": 1.0081035598705503e-05, "loss": 0.0902, "step": 191560 }, { "epoch": 74.4, "learning_rate": 1.0080517799352752e-05, "loss": 0.1961, "step": 191570 }, { "epoch": 74.4, "learning_rate": 1.008e-05, "loss": 0.0568, "step": 191580 }, { "epoch": 74.4, "learning_rate": 1.007948220064725e-05, "loss": 0.0267, "step": 191590 }, { "epoch": 74.41, "learning_rate": 1.00789644012945e-05, "loss": 0.072, "step": 191600 }, { "epoch": 74.41, "learning_rate": 1.0078446601941749e-05, "loss": 0.0076, "step": 191610 }, { "epoch": 74.42, "learning_rate": 1.0077928802588997e-05, "loss": 0.0024, "step": 191620 }, { "epoch": 74.42, "learning_rate": 1.0077411003236246e-05, "loss": 0.0711, "step": 191630 }, { "epoch": 74.42, "learning_rate": 1.0076893203883496e-05, "loss": 0.0035, "step": 191640 }, { "epoch": 74.43, "learning_rate": 1.0076375404530746e-05, "loss": 0.0646, "step": 191650 }, { "epoch": 74.43, "learning_rate": 1.0075857605177994e-05, "loss": 0.0236, "step": 191660 }, { "epoch": 74.43, "learning_rate": 1.0075339805825243e-05, "loss": 0.0776, "step": 191670 }, { "epoch": 74.44, "learning_rate": 1.0074822006472493e-05, "loss": 0.0407, "step": 191680 }, { "epoch": 74.44, "learning_rate": 1.0074304207119742e-05, "loss": 0.0167, "step": 191690 }, { "epoch": 74.45, "learning_rate": 1.007378640776699e-05, "loss": 0.1497, "step": 191700 }, { "epoch": 74.45, "learning_rate": 1.007326860841424e-05, "loss": 0.1452, "step": 191710 }, { "epoch": 74.45, "learning_rate": 1.007275080906149e-05, "loss": 0.1229, "step": 191720 }, { "epoch": 74.46, "learning_rate": 1.007223300970874e-05, "loss": 0.0764, "step": 191730 }, { "epoch": 74.46, "learning_rate": 1.0071715210355987e-05, "loss": 0.0957, "step": 191740 }, { "epoch": 74.47, "learning_rate": 1.0071197411003237e-05, "loss": 0.0017, "step": 191750 }, { "epoch": 74.47, "learning_rate": 1.0070679611650486e-05, "loss": 0.0541, "step": 191760 }, { "epoch": 74.47, "learning_rate": 1.0070161812297736e-05, "loss": 0.1046, "step": 191770 }, { "epoch": 74.48, "learning_rate": 1.0069644012944986e-05, "loss": 0.0064, "step": 191780 }, { "epoch": 74.48, "learning_rate": 1.0069126213592234e-05, "loss": 0.0713, "step": 191790 }, { "epoch": 74.49, "learning_rate": 1.0068608414239483e-05, "loss": 0.0763, "step": 191800 }, { "epoch": 74.49, "learning_rate": 1.0068090614886731e-05, "loss": 0.0825, "step": 191810 }, { "epoch": 74.49, "learning_rate": 1.006757281553398e-05, "loss": 0.0324, "step": 191820 }, { "epoch": 74.5, "learning_rate": 1.006705501618123e-05, "loss": 0.0007, "step": 191830 }, { "epoch": 74.5, "learning_rate": 1.006653721682848e-05, "loss": 0.0372, "step": 191840 }, { "epoch": 74.5, "learning_rate": 1.006601941747573e-05, "loss": 0.0357, "step": 191850 }, { "epoch": 74.51, "learning_rate": 1.006550161812298e-05, "loss": 0.0138, "step": 191860 }, { "epoch": 74.51, "learning_rate": 1.0064983818770227e-05, "loss": 0.0432, "step": 191870 }, { "epoch": 74.52, "learning_rate": 1.0064466019417477e-05, "loss": 0.0337, "step": 191880 }, { "epoch": 74.52, "learning_rate": 1.0063948220064725e-05, "loss": 0.0033, "step": 191890 }, { "epoch": 74.52, "learning_rate": 1.0063430420711974e-05, "loss": 0.1304, "step": 191900 }, { "epoch": 74.53, "learning_rate": 1.0062912621359224e-05, "loss": 0.0143, "step": 191910 }, { "epoch": 74.53, "learning_rate": 1.0062394822006474e-05, "loss": 0.0128, "step": 191920 }, { "epoch": 74.54, "learning_rate": 1.0061877022653723e-05, "loss": 0.0519, "step": 191930 }, { "epoch": 74.54, "learning_rate": 1.0061359223300973e-05, "loss": 0.0658, "step": 191940 }, { "epoch": 74.54, "learning_rate": 1.006084142394822e-05, "loss": 0.0775, "step": 191950 }, { "epoch": 74.55, "learning_rate": 1.0060323624595469e-05, "loss": 0.0057, "step": 191960 }, { "epoch": 74.55, "learning_rate": 1.0059805825242718e-05, "loss": 0.0482, "step": 191970 }, { "epoch": 74.56, "learning_rate": 1.0059288025889968e-05, "loss": 0.0225, "step": 191980 }, { "epoch": 74.56, "learning_rate": 1.0058770226537217e-05, "loss": 0.1523, "step": 191990 }, { "epoch": 74.56, "learning_rate": 1.0058252427184467e-05, "loss": 0.0159, "step": 192000 }, { "epoch": 74.57, "learning_rate": 1.0057734627831717e-05, "loss": 0.1058, "step": 192010 }, { "epoch": 74.57, "learning_rate": 1.0057216828478966e-05, "loss": 0.0588, "step": 192020 }, { "epoch": 74.57, "learning_rate": 1.0056699029126214e-05, "loss": 0.0363, "step": 192030 }, { "epoch": 74.58, "learning_rate": 1.0056181229773462e-05, "loss": 0.0124, "step": 192040 }, { "epoch": 74.58, "learning_rate": 1.0055663430420712e-05, "loss": 0.0079, "step": 192050 }, { "epoch": 74.59, "learning_rate": 1.0055145631067961e-05, "loss": 0.0935, "step": 192060 }, { "epoch": 74.59, "learning_rate": 1.0054627831715211e-05, "loss": 0.0016, "step": 192070 }, { "epoch": 74.59, "learning_rate": 1.005411003236246e-05, "loss": 0.0579, "step": 192080 }, { "epoch": 74.6, "learning_rate": 1.005359223300971e-05, "loss": 0.1021, "step": 192090 }, { "epoch": 74.6, "learning_rate": 1.005307443365696e-05, "loss": 0.0584, "step": 192100 }, { "epoch": 74.61, "learning_rate": 1.0052556634304206e-05, "loss": 0.1834, "step": 192110 }, { "epoch": 74.61, "learning_rate": 1.0052038834951456e-05, "loss": 0.1713, "step": 192120 }, { "epoch": 74.61, "learning_rate": 1.0051521035598705e-05, "loss": 0.0489, "step": 192130 }, { "epoch": 74.62, "learning_rate": 1.0051003236245955e-05, "loss": 0.0599, "step": 192140 }, { "epoch": 74.62, "learning_rate": 1.0050485436893205e-05, "loss": 0.0927, "step": 192150 }, { "epoch": 74.63, "learning_rate": 1.0049967637540454e-05, "loss": 0.0033, "step": 192160 }, { "epoch": 74.63, "learning_rate": 1.0049449838187704e-05, "loss": 0.1239, "step": 192170 }, { "epoch": 74.63, "learning_rate": 1.0048932038834953e-05, "loss": 0.0225, "step": 192180 }, { "epoch": 74.64, "learning_rate": 1.00484142394822e-05, "loss": 0.0005, "step": 192190 }, { "epoch": 74.64, "learning_rate": 1.004789644012945e-05, "loss": 0.0901, "step": 192200 }, { "epoch": 74.64, "learning_rate": 1.0047378640776699e-05, "loss": 0.1268, "step": 192210 }, { "epoch": 74.65, "learning_rate": 1.0046860841423949e-05, "loss": 0.1025, "step": 192220 }, { "epoch": 74.65, "learning_rate": 1.0046343042071198e-05, "loss": 0.0168, "step": 192230 }, { "epoch": 74.66, "learning_rate": 1.0045825242718448e-05, "loss": 0.1188, "step": 192240 }, { "epoch": 74.66, "learning_rate": 1.0045307443365697e-05, "loss": 0.0263, "step": 192250 }, { "epoch": 74.66, "learning_rate": 1.0044789644012947e-05, "loss": 0.0077, "step": 192260 }, { "epoch": 74.67, "learning_rate": 1.0044271844660193e-05, "loss": 0.0921, "step": 192270 }, { "epoch": 74.67, "learning_rate": 1.0043754045307443e-05, "loss": 0.009, "step": 192280 }, { "epoch": 74.68, "learning_rate": 1.0043236245954692e-05, "loss": 0.0143, "step": 192290 }, { "epoch": 74.68, "learning_rate": 1.0042718446601942e-05, "loss": 0.0253, "step": 192300 }, { "epoch": 74.68, "learning_rate": 1.0042200647249192e-05, "loss": 0.058, "step": 192310 }, { "epoch": 74.69, "learning_rate": 1.0041682847896441e-05, "loss": 0.05, "step": 192320 }, { "epoch": 74.69, "learning_rate": 1.0041165048543691e-05, "loss": 0.0147, "step": 192330 }, { "epoch": 74.7, "learning_rate": 1.004064724919094e-05, "loss": 0.0075, "step": 192340 }, { "epoch": 74.7, "learning_rate": 1.004012944983819e-05, "loss": 0.0398, "step": 192350 }, { "epoch": 74.7, "learning_rate": 1.0039611650485436e-05, "loss": 0.0471, "step": 192360 }, { "epoch": 74.71, "learning_rate": 1.0039093851132686e-05, "loss": 0.082, "step": 192370 }, { "epoch": 74.71, "learning_rate": 1.0038576051779936e-05, "loss": 0.0403, "step": 192380 }, { "epoch": 74.71, "learning_rate": 1.0038058252427185e-05, "loss": 0.0817, "step": 192390 }, { "epoch": 74.72, "learning_rate": 1.0037540453074435e-05, "loss": 0.136, "step": 192400 }, { "epoch": 74.72, "learning_rate": 1.0037022653721684e-05, "loss": 0.138, "step": 192410 }, { "epoch": 74.73, "learning_rate": 1.0036504854368934e-05, "loss": 0.1607, "step": 192420 }, { "epoch": 74.73, "learning_rate": 1.0035987055016184e-05, "loss": 0.1907, "step": 192430 }, { "epoch": 74.73, "learning_rate": 1.003546925566343e-05, "loss": 0.0185, "step": 192440 }, { "epoch": 74.74, "learning_rate": 1.003495145631068e-05, "loss": 0.0715, "step": 192450 }, { "epoch": 74.74, "learning_rate": 1.003443365695793e-05, "loss": 0.0025, "step": 192460 }, { "epoch": 74.75, "learning_rate": 1.0033915857605179e-05, "loss": 0.0311, "step": 192470 }, { "epoch": 74.75, "learning_rate": 1.0033398058252428e-05, "loss": 0.0642, "step": 192480 }, { "epoch": 74.75, "learning_rate": 1.0032880258899678e-05, "loss": 0.0819, "step": 192490 }, { "epoch": 74.76, "learning_rate": 1.0032362459546928e-05, "loss": 0.0041, "step": 192500 }, { "epoch": 74.76, "learning_rate": 1.0031844660194177e-05, "loss": 0.0309, "step": 192510 }, { "epoch": 74.77, "learning_rate": 1.0031326860841423e-05, "loss": 0.0757, "step": 192520 }, { "epoch": 74.77, "learning_rate": 1.0030809061488673e-05, "loss": 0.1517, "step": 192530 }, { "epoch": 74.77, "learning_rate": 1.0030291262135923e-05, "loss": 0.0057, "step": 192540 }, { "epoch": 74.78, "learning_rate": 1.0029773462783172e-05, "loss": 0.0709, "step": 192550 }, { "epoch": 74.78, "learning_rate": 1.0029255663430422e-05, "loss": 0.1319, "step": 192560 }, { "epoch": 74.78, "learning_rate": 1.0028737864077672e-05, "loss": 0.1234, "step": 192570 }, { "epoch": 74.79, "learning_rate": 1.0028220064724921e-05, "loss": 0.1415, "step": 192580 }, { "epoch": 74.79, "learning_rate": 1.002770226537217e-05, "loss": 0.0542, "step": 192590 }, { "epoch": 74.8, "learning_rate": 1.0027184466019417e-05, "loss": 0.055, "step": 192600 }, { "epoch": 74.8, "learning_rate": 1.0026666666666667e-05, "loss": 0.0143, "step": 192610 }, { "epoch": 74.8, "learning_rate": 1.0026148867313916e-05, "loss": 0.0928, "step": 192620 }, { "epoch": 74.81, "learning_rate": 1.0025631067961166e-05, "loss": 0.0971, "step": 192630 }, { "epoch": 74.81, "learning_rate": 1.0025113268608416e-05, "loss": 0.0688, "step": 192640 }, { "epoch": 74.82, "learning_rate": 1.0024595469255665e-05, "loss": 0.1474, "step": 192650 }, { "epoch": 74.82, "learning_rate": 1.0024077669902915e-05, "loss": 0.1121, "step": 192660 }, { "epoch": 74.82, "learning_rate": 1.0023559870550164e-05, "loss": 0.0431, "step": 192670 }, { "epoch": 74.83, "learning_rate": 1.002304207119741e-05, "loss": 0.0157, "step": 192680 }, { "epoch": 74.83, "learning_rate": 1.002252427184466e-05, "loss": 0.0427, "step": 192690 }, { "epoch": 74.83, "learning_rate": 1.002200647249191e-05, "loss": 0.0264, "step": 192700 }, { "epoch": 74.84, "learning_rate": 1.002148867313916e-05, "loss": 0.0338, "step": 192710 }, { "epoch": 74.84, "learning_rate": 1.0020970873786409e-05, "loss": 0.1123, "step": 192720 }, { "epoch": 74.85, "learning_rate": 1.0020453074433659e-05, "loss": 0.0161, "step": 192730 }, { "epoch": 74.85, "learning_rate": 1.0019935275080908e-05, "loss": 0.0025, "step": 192740 }, { "epoch": 74.85, "learning_rate": 1.0019417475728156e-05, "loss": 0.0187, "step": 192750 }, { "epoch": 74.86, "learning_rate": 1.0018899676375404e-05, "loss": 0.0003, "step": 192760 }, { "epoch": 74.86, "learning_rate": 1.0018381877022654e-05, "loss": 0.0763, "step": 192770 }, { "epoch": 74.87, "learning_rate": 1.0017864077669903e-05, "loss": 0.0877, "step": 192780 }, { "epoch": 74.87, "learning_rate": 1.0017346278317153e-05, "loss": 0.0623, "step": 192790 }, { "epoch": 74.87, "learning_rate": 1.0016828478964403e-05, "loss": 0.1048, "step": 192800 }, { "epoch": 74.88, "learning_rate": 1.0016310679611652e-05, "loss": 0.046, "step": 192810 }, { "epoch": 74.88, "learning_rate": 1.0015792880258902e-05, "loss": 0.1502, "step": 192820 }, { "epoch": 74.89, "learning_rate": 1.001527508090615e-05, "loss": 0.0348, "step": 192830 }, { "epoch": 74.89, "learning_rate": 1.0014757281553398e-05, "loss": 0.0539, "step": 192840 }, { "epoch": 74.89, "learning_rate": 1.0014239482200647e-05, "loss": 0.1101, "step": 192850 }, { "epoch": 74.9, "learning_rate": 1.0013721682847897e-05, "loss": 0.0726, "step": 192860 }, { "epoch": 74.9, "learning_rate": 1.0013203883495147e-05, "loss": 0.0696, "step": 192870 }, { "epoch": 74.9, "learning_rate": 1.0012686084142396e-05, "loss": 0.0402, "step": 192880 }, { "epoch": 74.91, "learning_rate": 1.0012168284789646e-05, "loss": 0.0519, "step": 192890 }, { "epoch": 74.91, "learning_rate": 1.0011650485436894e-05, "loss": 0.0362, "step": 192900 }, { "epoch": 74.92, "learning_rate": 1.0011132686084143e-05, "loss": 0.1967, "step": 192910 }, { "epoch": 74.92, "learning_rate": 1.0010614886731393e-05, "loss": 0.1408, "step": 192920 }, { "epoch": 74.92, "learning_rate": 1.0010097087378641e-05, "loss": 0.1049, "step": 192930 }, { "epoch": 74.93, "learning_rate": 1.000957928802589e-05, "loss": 0.0526, "step": 192940 }, { "epoch": 74.93, "learning_rate": 1.000906148867314e-05, "loss": 0.0089, "step": 192950 }, { "epoch": 74.94, "learning_rate": 1.000854368932039e-05, "loss": 0.0074, "step": 192960 }, { "epoch": 74.94, "learning_rate": 1.000802588996764e-05, "loss": 0.0969, "step": 192970 }, { "epoch": 74.94, "learning_rate": 1.0007508090614887e-05, "loss": 0.0211, "step": 192980 }, { "epoch": 74.95, "learning_rate": 1.0006990291262137e-05, "loss": 0.0663, "step": 192990 }, { "epoch": 74.95, "learning_rate": 1.0006472491909387e-05, "loss": 0.0741, "step": 193000 }, { "epoch": 74.96, "learning_rate": 1.0005954692556634e-05, "loss": 0.0683, "step": 193010 }, { "epoch": 74.96, "learning_rate": 1.0005436893203884e-05, "loss": 0.1097, "step": 193020 }, { "epoch": 74.96, "learning_rate": 1.0004919093851134e-05, "loss": 0.0691, "step": 193030 }, { "epoch": 74.97, "learning_rate": 1.0004401294498383e-05, "loss": 0.1019, "step": 193040 }, { "epoch": 74.97, "learning_rate": 1.0003883495145631e-05, "loss": 0.0875, "step": 193050 }, { "epoch": 74.97, "learning_rate": 1.000336569579288e-05, "loss": 0.0345, "step": 193060 }, { "epoch": 74.98, "learning_rate": 1.000284789644013e-05, "loss": 0.0373, "step": 193070 }, { "epoch": 74.98, "learning_rate": 1.000233009708738e-05, "loss": 0.1415, "step": 193080 }, { "epoch": 74.99, "learning_rate": 1.0001812297734628e-05, "loss": 0.0051, "step": 193090 }, { "epoch": 74.99, "learning_rate": 1.0001294498381878e-05, "loss": 0.028, "step": 193100 }, { "epoch": 74.99, "learning_rate": 1.0000776699029127e-05, "loss": 0.0484, "step": 193110 }, { "epoch": 75.0, "learning_rate": 1.0000258899676377e-05, "loss": 0.0702, "step": 193120 }, { "epoch": 75.0, "eval_accuracy": 0.9502063273727648, "eval_loss": 0.3600085973739624, "eval_runtime": 8.1812, "eval_samples_per_second": 444.312, "eval_steps_per_second": 55.615, "step": 193125 }, { "epoch": 75.0, "learning_rate": 9.999741100323625e-06, "loss": 0.0287, "step": 193130 }, { "epoch": 75.01, "learning_rate": 9.999223300970874e-06, "loss": 0.1373, "step": 193140 }, { "epoch": 75.01, "learning_rate": 9.998705501618124e-06, "loss": 0.0446, "step": 193150 }, { "epoch": 75.01, "learning_rate": 9.998187702265374e-06, "loss": 0.1161, "step": 193160 }, { "epoch": 75.02, "learning_rate": 9.997669902912622e-06, "loss": 0.1237, "step": 193170 }, { "epoch": 75.02, "learning_rate": 9.997152103559871e-06, "loss": 0.0003, "step": 193180 }, { "epoch": 75.03, "learning_rate": 9.99663430420712e-06, "loss": 0.0163, "step": 193190 }, { "epoch": 75.03, "learning_rate": 9.99611650485437e-06, "loss": 0.0835, "step": 193200 }, { "epoch": 75.03, "learning_rate": 9.995598705501618e-06, "loss": 0.0394, "step": 193210 }, { "epoch": 75.04, "learning_rate": 9.995080906148868e-06, "loss": 0.0698, "step": 193220 }, { "epoch": 75.04, "learning_rate": 9.994563106796118e-06, "loss": 0.052, "step": 193230 }, { "epoch": 75.04, "learning_rate": 9.994045307443365e-06, "loss": 0.055, "step": 193240 }, { "epoch": 75.05, "learning_rate": 9.993527508090615e-06, "loss": 0.177, "step": 193250 }, { "epoch": 75.05, "learning_rate": 9.993009708737865e-06, "loss": 0.0341, "step": 193260 }, { "epoch": 75.06, "learning_rate": 9.992491909385114e-06, "loss": 0.0431, "step": 193270 }, { "epoch": 75.06, "learning_rate": 9.991974110032362e-06, "loss": 0.0077, "step": 193280 }, { "epoch": 75.06, "learning_rate": 9.991456310679612e-06, "loss": 0.1538, "step": 193290 }, { "epoch": 75.07, "learning_rate": 9.990938511326862e-06, "loss": 0.0356, "step": 193300 }, { "epoch": 75.07, "learning_rate": 9.990420711974111e-06, "loss": 0.0008, "step": 193310 }, { "epoch": 75.08, "learning_rate": 9.989902912621359e-06, "loss": 0.067, "step": 193320 }, { "epoch": 75.08, "learning_rate": 9.989385113268609e-06, "loss": 0.0005, "step": 193330 }, { "epoch": 75.08, "learning_rate": 9.988867313915858e-06, "loss": 0.1441, "step": 193340 }, { "epoch": 75.09, "learning_rate": 9.988349514563108e-06, "loss": 0.0011, "step": 193350 }, { "epoch": 75.09, "learning_rate": 9.987831715210356e-06, "loss": 0.1236, "step": 193360 }, { "epoch": 75.1, "learning_rate": 9.987313915857605e-06, "loss": 0.1442, "step": 193370 }, { "epoch": 75.1, "learning_rate": 9.986796116504855e-06, "loss": 0.1062, "step": 193380 }, { "epoch": 75.1, "learning_rate": 9.986278317152103e-06, "loss": 0.1093, "step": 193390 }, { "epoch": 75.11, "learning_rate": 9.985760517799353e-06, "loss": 0.1529, "step": 193400 }, { "epoch": 75.11, "learning_rate": 9.985242718446602e-06, "loss": 0.0003, "step": 193410 }, { "epoch": 75.11, "learning_rate": 9.984724919093852e-06, "loss": 0.0171, "step": 193420 }, { "epoch": 75.12, "learning_rate": 9.984207119741101e-06, "loss": 0.0532, "step": 193430 }, { "epoch": 75.12, "learning_rate": 9.98368932038835e-06, "loss": 0.291, "step": 193440 }, { "epoch": 75.13, "learning_rate": 9.983171521035599e-06, "loss": 0.0603, "step": 193450 }, { "epoch": 75.13, "learning_rate": 9.982653721682849e-06, "loss": 0.062, "step": 193460 }, { "epoch": 75.13, "learning_rate": 9.982135922330098e-06, "loss": 0.0421, "step": 193470 }, { "epoch": 75.14, "learning_rate": 9.981618122977346e-06, "loss": 0.0061, "step": 193480 }, { "epoch": 75.14, "learning_rate": 9.981100323624596e-06, "loss": 0.1542, "step": 193490 }, { "epoch": 75.15, "learning_rate": 9.980582524271845e-06, "loss": 0.0034, "step": 193500 }, { "epoch": 75.15, "learning_rate": 9.980064724919095e-06, "loss": 0.0158, "step": 193510 }, { "epoch": 75.15, "learning_rate": 9.979546925566343e-06, "loss": 0.0565, "step": 193520 }, { "epoch": 75.16, "learning_rate": 9.979029126213593e-06, "loss": 0.1973, "step": 193530 }, { "epoch": 75.16, "learning_rate": 9.978511326860842e-06, "loss": 0.0283, "step": 193540 }, { "epoch": 75.17, "learning_rate": 9.977993527508092e-06, "loss": 0.2026, "step": 193550 }, { "epoch": 75.17, "learning_rate": 9.97747572815534e-06, "loss": 0.0131, "step": 193560 }, { "epoch": 75.17, "learning_rate": 9.97695792880259e-06, "loss": 0.0897, "step": 193570 }, { "epoch": 75.18, "learning_rate": 9.976440129449839e-06, "loss": 0.015, "step": 193580 }, { "epoch": 75.18, "learning_rate": 9.975922330097089e-06, "loss": 0.0649, "step": 193590 }, { "epoch": 75.18, "learning_rate": 9.975404530744336e-06, "loss": 0.085, "step": 193600 }, { "epoch": 75.19, "learning_rate": 9.974886731391586e-06, "loss": 0.1915, "step": 193610 }, { "epoch": 75.19, "learning_rate": 9.974368932038836e-06, "loss": 0.0079, "step": 193620 }, { "epoch": 75.2, "learning_rate": 9.973851132686085e-06, "loss": 0.0003, "step": 193630 }, { "epoch": 75.2, "learning_rate": 9.973333333333333e-06, "loss": 0.1474, "step": 193640 }, { "epoch": 75.2, "learning_rate": 9.972815533980583e-06, "loss": 0.1088, "step": 193650 }, { "epoch": 75.21, "learning_rate": 9.972297734627833e-06, "loss": 0.1359, "step": 193660 }, { "epoch": 75.21, "learning_rate": 9.971779935275082e-06, "loss": 0.0008, "step": 193670 }, { "epoch": 75.22, "learning_rate": 9.97126213592233e-06, "loss": 0.0642, "step": 193680 }, { "epoch": 75.22, "learning_rate": 9.97074433656958e-06, "loss": 0.0206, "step": 193690 }, { "epoch": 75.22, "learning_rate": 9.97022653721683e-06, "loss": 0.0676, "step": 193700 }, { "epoch": 75.23, "learning_rate": 9.969708737864079e-06, "loss": 0.0265, "step": 193710 }, { "epoch": 75.23, "learning_rate": 9.969190938511327e-06, "loss": 0.1958, "step": 193720 }, { "epoch": 75.23, "learning_rate": 9.968673139158576e-06, "loss": 0.0846, "step": 193730 }, { "epoch": 75.24, "learning_rate": 9.968155339805826e-06, "loss": 0.0315, "step": 193740 }, { "epoch": 75.24, "learning_rate": 9.967637540453076e-06, "loss": 0.1283, "step": 193750 }, { "epoch": 75.25, "learning_rate": 9.967119741100324e-06, "loss": 0.0015, "step": 193760 }, { "epoch": 75.25, "learning_rate": 9.966601941747573e-06, "loss": 0.0018, "step": 193770 }, { "epoch": 75.25, "learning_rate": 9.966084142394823e-06, "loss": 0.0222, "step": 193780 }, { "epoch": 75.26, "learning_rate": 9.965566343042072e-06, "loss": 0.0498, "step": 193790 }, { "epoch": 75.26, "learning_rate": 9.96504854368932e-06, "loss": 0.0078, "step": 193800 }, { "epoch": 75.27, "learning_rate": 9.96453074433657e-06, "loss": 0.1294, "step": 193810 }, { "epoch": 75.27, "learning_rate": 9.96401294498382e-06, "loss": 0.0547, "step": 193820 }, { "epoch": 75.27, "learning_rate": 9.96349514563107e-06, "loss": 0.0382, "step": 193830 }, { "epoch": 75.28, "learning_rate": 9.962977346278317e-06, "loss": 0.005, "step": 193840 }, { "epoch": 75.28, "learning_rate": 9.962459546925567e-06, "loss": 0.1343, "step": 193850 }, { "epoch": 75.29, "learning_rate": 9.961941747572816e-06, "loss": 0.0177, "step": 193860 }, { "epoch": 75.29, "learning_rate": 9.961423948220066e-06, "loss": 0.0295, "step": 193870 }, { "epoch": 75.29, "learning_rate": 9.960906148867314e-06, "loss": 0.0668, "step": 193880 }, { "epoch": 75.3, "learning_rate": 9.960388349514564e-06, "loss": 0.1177, "step": 193890 }, { "epoch": 75.3, "learning_rate": 9.959870550161813e-06, "loss": 0.035, "step": 193900 }, { "epoch": 75.3, "learning_rate": 9.959352750809063e-06, "loss": 0.1088, "step": 193910 }, { "epoch": 75.31, "learning_rate": 9.95883495145631e-06, "loss": 0.0635, "step": 193920 }, { "epoch": 75.31, "learning_rate": 9.95831715210356e-06, "loss": 0.0007, "step": 193930 }, { "epoch": 75.32, "learning_rate": 9.95779935275081e-06, "loss": 0.0242, "step": 193940 }, { "epoch": 75.32, "learning_rate": 9.95728155339806e-06, "loss": 0.0917, "step": 193950 }, { "epoch": 75.32, "learning_rate": 9.956763754045308e-06, "loss": 0.2047, "step": 193960 }, { "epoch": 75.33, "learning_rate": 9.956245954692557e-06, "loss": 0.1515, "step": 193970 }, { "epoch": 75.33, "learning_rate": 9.955728155339807e-06, "loss": 0.0192, "step": 193980 }, { "epoch": 75.34, "learning_rate": 9.955210355987056e-06, "loss": 0.0158, "step": 193990 }, { "epoch": 75.34, "learning_rate": 9.954692556634306e-06, "loss": 0.0196, "step": 194000 }, { "epoch": 75.34, "learning_rate": 9.954174757281554e-06, "loss": 0.0687, "step": 194010 }, { "epoch": 75.35, "learning_rate": 9.953656957928804e-06, "loss": 0.1105, "step": 194020 }, { "epoch": 75.35, "learning_rate": 9.953139158576053e-06, "loss": 0.1153, "step": 194030 }, { "epoch": 75.36, "learning_rate": 9.952621359223303e-06, "loss": 0.1458, "step": 194040 }, { "epoch": 75.36, "learning_rate": 9.95210355987055e-06, "loss": 0.2594, "step": 194050 }, { "epoch": 75.36, "learning_rate": 9.9515857605178e-06, "loss": 0.1193, "step": 194060 }, { "epoch": 75.37, "learning_rate": 9.95106796116505e-06, "loss": 0.0782, "step": 194070 }, { "epoch": 75.37, "learning_rate": 9.9505501618123e-06, "loss": 0.1256, "step": 194080 }, { "epoch": 75.37, "learning_rate": 9.950032362459547e-06, "loss": 0.0702, "step": 194090 }, { "epoch": 75.38, "learning_rate": 9.949514563106797e-06, "loss": 0.0441, "step": 194100 }, { "epoch": 75.38, "learning_rate": 9.948996763754047e-06, "loss": 0.055, "step": 194110 }, { "epoch": 75.39, "learning_rate": 9.948478964401296e-06, "loss": 0.0179, "step": 194120 }, { "epoch": 75.39, "learning_rate": 9.947961165048544e-06, "loss": 0.1141, "step": 194130 }, { "epoch": 75.39, "learning_rate": 9.947443365695794e-06, "loss": 0.0002, "step": 194140 }, { "epoch": 75.4, "learning_rate": 9.946925566343043e-06, "loss": 0.069, "step": 194150 }, { "epoch": 75.4, "learning_rate": 9.946407766990293e-06, "loss": 0.0577, "step": 194160 }, { "epoch": 75.41, "learning_rate": 9.945889967637541e-06, "loss": 0.0279, "step": 194170 }, { "epoch": 75.41, "learning_rate": 9.94537216828479e-06, "loss": 0.0652, "step": 194180 }, { "epoch": 75.41, "learning_rate": 9.94485436893204e-06, "loss": 0.0375, "step": 194190 }, { "epoch": 75.42, "learning_rate": 9.94433656957929e-06, "loss": 0.1226, "step": 194200 }, { "epoch": 75.42, "learning_rate": 9.943818770226538e-06, "loss": 0.1914, "step": 194210 }, { "epoch": 75.43, "learning_rate": 9.943300970873787e-06, "loss": 0.0542, "step": 194220 }, { "epoch": 75.43, "learning_rate": 9.942783171521037e-06, "loss": 0.1068, "step": 194230 }, { "epoch": 75.43, "learning_rate": 9.942265372168287e-06, "loss": 0.0315, "step": 194240 }, { "epoch": 75.44, "learning_rate": 9.941747572815535e-06, "loss": 0.1178, "step": 194250 }, { "epoch": 75.44, "learning_rate": 9.941229773462784e-06, "loss": 0.0582, "step": 194260 }, { "epoch": 75.44, "learning_rate": 9.940711974110034e-06, "loss": 0.0324, "step": 194270 }, { "epoch": 75.45, "learning_rate": 9.940194174757283e-06, "loss": 0.0895, "step": 194280 }, { "epoch": 75.45, "learning_rate": 9.939676375404531e-06, "loss": 0.0734, "step": 194290 }, { "epoch": 75.46, "learning_rate": 9.939158576051781e-06, "loss": 0.0325, "step": 194300 }, { "epoch": 75.46, "learning_rate": 9.93864077669903e-06, "loss": 0.0867, "step": 194310 }, { "epoch": 75.46, "learning_rate": 9.93812297734628e-06, "loss": 0.0884, "step": 194320 }, { "epoch": 75.47, "learning_rate": 9.937605177993528e-06, "loss": 0.088, "step": 194330 }, { "epoch": 75.47, "learning_rate": 9.937087378640778e-06, "loss": 0.0252, "step": 194340 }, { "epoch": 75.48, "learning_rate": 9.936569579288027e-06, "loss": 0.2104, "step": 194350 }, { "epoch": 75.48, "learning_rate": 9.936051779935277e-06, "loss": 0.0523, "step": 194360 }, { "epoch": 75.48, "learning_rate": 9.935533980582525e-06, "loss": 0.0376, "step": 194370 }, { "epoch": 75.49, "learning_rate": 9.935016181229775e-06, "loss": 0.1502, "step": 194380 }, { "epoch": 75.49, "learning_rate": 9.934498381877024e-06, "loss": 0.0684, "step": 194390 }, { "epoch": 75.5, "learning_rate": 9.933980582524274e-06, "loss": 0.0782, "step": 194400 }, { "epoch": 75.5, "learning_rate": 9.933462783171522e-06, "loss": 0.0573, "step": 194410 }, { "epoch": 75.5, "learning_rate": 9.932944983818771e-06, "loss": 0.0327, "step": 194420 }, { "epoch": 75.51, "learning_rate": 9.932427184466021e-06, "loss": 0.1462, "step": 194430 }, { "epoch": 75.51, "learning_rate": 9.93190938511327e-06, "loss": 0.0642, "step": 194440 }, { "epoch": 75.51, "learning_rate": 9.931391585760518e-06, "loss": 0.0702, "step": 194450 }, { "epoch": 75.52, "learning_rate": 9.930873786407768e-06, "loss": 0.0219, "step": 194460 }, { "epoch": 75.52, "learning_rate": 9.930355987055018e-06, "loss": 0.0025, "step": 194470 }, { "epoch": 75.53, "learning_rate": 9.929838187702267e-06, "loss": 0.0191, "step": 194480 }, { "epoch": 75.53, "learning_rate": 9.929320388349515e-06, "loss": 0.0806, "step": 194490 }, { "epoch": 75.53, "learning_rate": 9.928802588996765e-06, "loss": 0.0015, "step": 194500 }, { "epoch": 75.54, "learning_rate": 9.928284789644014e-06, "loss": 0.1148, "step": 194510 }, { "epoch": 75.54, "learning_rate": 9.927766990291262e-06, "loss": 0.0029, "step": 194520 }, { "epoch": 75.55, "learning_rate": 9.927249190938512e-06, "loss": 0.1723, "step": 194530 }, { "epoch": 75.55, "learning_rate": 9.926731391585762e-06, "loss": 0.0011, "step": 194540 }, { "epoch": 75.55, "learning_rate": 9.926213592233011e-06, "loss": 0.0182, "step": 194550 }, { "epoch": 75.56, "learning_rate": 9.92569579288026e-06, "loss": 0.0585, "step": 194560 }, { "epoch": 75.56, "learning_rate": 9.925177993527509e-06, "loss": 0.1335, "step": 194570 }, { "epoch": 75.57, "learning_rate": 9.924660194174758e-06, "loss": 0.0854, "step": 194580 }, { "epoch": 75.57, "learning_rate": 9.924142394822008e-06, "loss": 0.0595, "step": 194590 }, { "epoch": 75.57, "learning_rate": 9.923624595469256e-06, "loss": 0.1089, "step": 194600 }, { "epoch": 75.58, "learning_rate": 9.923106796116506e-06, "loss": 0.0672, "step": 194610 }, { "epoch": 75.58, "learning_rate": 9.922588996763755e-06, "loss": 0.0096, "step": 194620 }, { "epoch": 75.58, "learning_rate": 9.922071197411005e-06, "loss": 0.0307, "step": 194630 }, { "epoch": 75.59, "learning_rate": 9.921553398058253e-06, "loss": 0.0015, "step": 194640 }, { "epoch": 75.59, "learning_rate": 9.921035598705502e-06, "loss": 0.0319, "step": 194650 }, { "epoch": 75.6, "learning_rate": 9.920517799352752e-06, "loss": 0.081, "step": 194660 }, { "epoch": 75.6, "learning_rate": 9.920000000000002e-06, "loss": 0.0465, "step": 194670 }, { "epoch": 75.6, "learning_rate": 9.91948220064725e-06, "loss": 0.1223, "step": 194680 }, { "epoch": 75.61, "learning_rate": 9.918964401294499e-06, "loss": 0.04, "step": 194690 }, { "epoch": 75.61, "learning_rate": 9.918446601941749e-06, "loss": 0.0843, "step": 194700 }, { "epoch": 75.62, "learning_rate": 9.917928802588997e-06, "loss": 0.1177, "step": 194710 }, { "epoch": 75.62, "learning_rate": 9.917411003236246e-06, "loss": 0.0748, "step": 194720 }, { "epoch": 75.62, "learning_rate": 9.916893203883496e-06, "loss": 0.1224, "step": 194730 }, { "epoch": 75.63, "learning_rate": 9.916375404530746e-06, "loss": 0.0761, "step": 194740 }, { "epoch": 75.63, "learning_rate": 9.915857605177993e-06, "loss": 0.0806, "step": 194750 }, { "epoch": 75.63, "learning_rate": 9.915339805825243e-06, "loss": 0.1322, "step": 194760 }, { "epoch": 75.64, "learning_rate": 9.914822006472493e-06, "loss": 0.046, "step": 194770 }, { "epoch": 75.64, "learning_rate": 9.914304207119742e-06, "loss": 0.0004, "step": 194780 }, { "epoch": 75.65, "learning_rate": 9.91378640776699e-06, "loss": 0.0144, "step": 194790 }, { "epoch": 75.65, "learning_rate": 9.91326860841424e-06, "loss": 0.0777, "step": 194800 }, { "epoch": 75.65, "learning_rate": 9.91275080906149e-06, "loss": 0.0797, "step": 194810 }, { "epoch": 75.66, "learning_rate": 9.912233009708739e-06, "loss": 0.0244, "step": 194820 }, { "epoch": 75.66, "learning_rate": 9.911715210355987e-06, "loss": 0.0156, "step": 194830 }, { "epoch": 75.67, "learning_rate": 9.911197411003237e-06, "loss": 0.0033, "step": 194840 }, { "epoch": 75.67, "learning_rate": 9.910679611650486e-06, "loss": 0.0326, "step": 194850 }, { "epoch": 75.67, "learning_rate": 9.910161812297734e-06, "loss": 0.0025, "step": 194860 }, { "epoch": 75.68, "learning_rate": 9.909644012944984e-06, "loss": 0.0718, "step": 194870 }, { "epoch": 75.68, "learning_rate": 9.909126213592233e-06, "loss": 0.0814, "step": 194880 }, { "epoch": 75.69, "learning_rate": 9.908608414239483e-06, "loss": 0.0133, "step": 194890 }, { "epoch": 75.69, "learning_rate": 9.908090614886731e-06, "loss": 0.0566, "step": 194900 }, { "epoch": 75.69, "learning_rate": 9.90757281553398e-06, "loss": 0.0081, "step": 194910 }, { "epoch": 75.7, "learning_rate": 9.90705501618123e-06, "loss": 0.053, "step": 194920 }, { "epoch": 75.7, "learning_rate": 9.90653721682848e-06, "loss": 0.0095, "step": 194930 }, { "epoch": 75.7, "learning_rate": 9.906019417475728e-06, "loss": 0.0507, "step": 194940 }, { "epoch": 75.71, "learning_rate": 9.905501618122977e-06, "loss": 0.0016, "step": 194950 }, { "epoch": 75.71, "learning_rate": 9.904983818770227e-06, "loss": 0.1255, "step": 194960 }, { "epoch": 75.72, "learning_rate": 9.904466019417477e-06, "loss": 0.1314, "step": 194970 }, { "epoch": 75.72, "learning_rate": 9.903948220064724e-06, "loss": 0.0243, "step": 194980 }, { "epoch": 75.72, "learning_rate": 9.903430420711974e-06, "loss": 0.0262, "step": 194990 }, { "epoch": 75.73, "learning_rate": 9.902912621359224e-06, "loss": 0.0179, "step": 195000 }, { "epoch": 75.73, "learning_rate": 9.902394822006473e-06, "loss": 0.0155, "step": 195010 }, { "epoch": 75.74, "learning_rate": 9.901877022653721e-06, "loss": 0.0016, "step": 195020 }, { "epoch": 75.74, "learning_rate": 9.901359223300971e-06, "loss": 0.0128, "step": 195030 }, { "epoch": 75.74, "learning_rate": 9.90084142394822e-06, "loss": 0.1132, "step": 195040 }, { "epoch": 75.75, "learning_rate": 9.90032362459547e-06, "loss": 0.0045, "step": 195050 }, { "epoch": 75.75, "learning_rate": 9.899805825242718e-06, "loss": 0.0462, "step": 195060 }, { "epoch": 75.76, "learning_rate": 9.899288025889968e-06, "loss": 0.1335, "step": 195070 }, { "epoch": 75.76, "learning_rate": 9.898770226537217e-06, "loss": 0.1145, "step": 195080 }, { "epoch": 75.76, "learning_rate": 9.898252427184467e-06, "loss": 0.0346, "step": 195090 }, { "epoch": 75.77, "learning_rate": 9.897734627831717e-06, "loss": 0.1411, "step": 195100 }, { "epoch": 75.77, "learning_rate": 9.897216828478964e-06, "loss": 0.0594, "step": 195110 }, { "epoch": 75.77, "learning_rate": 9.896699029126214e-06, "loss": 0.0424, "step": 195120 }, { "epoch": 75.78, "learning_rate": 9.896181229773464e-06, "loss": 0.2705, "step": 195130 }, { "epoch": 75.78, "learning_rate": 9.895663430420713e-06, "loss": 0.0372, "step": 195140 }, { "epoch": 75.79, "learning_rate": 9.895145631067961e-06, "loss": 0.001, "step": 195150 }, { "epoch": 75.79, "learning_rate": 9.89462783171521e-06, "loss": 0.0495, "step": 195160 }, { "epoch": 75.79, "learning_rate": 9.89411003236246e-06, "loss": 0.1372, "step": 195170 }, { "epoch": 75.8, "learning_rate": 9.89359223300971e-06, "loss": 0.1401, "step": 195180 }, { "epoch": 75.8, "learning_rate": 9.893074433656958e-06, "loss": 0.1624, "step": 195190 }, { "epoch": 75.81, "learning_rate": 9.892556634304208e-06, "loss": 0.0783, "step": 195200 }, { "epoch": 75.81, "learning_rate": 9.892038834951457e-06, "loss": 0.0258, "step": 195210 }, { "epoch": 75.81, "learning_rate": 9.891521035598707e-06, "loss": 0.0629, "step": 195220 }, { "epoch": 75.82, "learning_rate": 9.891003236245955e-06, "loss": 0.0476, "step": 195230 }, { "epoch": 75.82, "learning_rate": 9.890485436893204e-06, "loss": 0.036, "step": 195240 }, { "epoch": 75.83, "learning_rate": 9.889967637540454e-06, "loss": 0.0972, "step": 195250 }, { "epoch": 75.83, "learning_rate": 9.889449838187704e-06, "loss": 0.0015, "step": 195260 }, { "epoch": 75.83, "learning_rate": 9.888932038834952e-06, "loss": 0.0171, "step": 195270 }, { "epoch": 75.84, "learning_rate": 9.888414239482201e-06, "loss": 0.025, "step": 195280 }, { "epoch": 75.84, "learning_rate": 9.88789644012945e-06, "loss": 0.0042, "step": 195290 }, { "epoch": 75.84, "learning_rate": 9.8873786407767e-06, "loss": 0.1762, "step": 195300 }, { "epoch": 75.85, "learning_rate": 9.886860841423948e-06, "loss": 0.0315, "step": 195310 }, { "epoch": 75.85, "learning_rate": 9.886343042071198e-06, "loss": 0.0551, "step": 195320 }, { "epoch": 75.86, "learning_rate": 9.885825242718448e-06, "loss": 0.0929, "step": 195330 }, { "epoch": 75.86, "learning_rate": 9.885307443365697e-06, "loss": 0.0393, "step": 195340 }, { "epoch": 75.86, "learning_rate": 9.884789644012945e-06, "loss": 0.0161, "step": 195350 }, { "epoch": 75.87, "learning_rate": 9.884271844660195e-06, "loss": 0.0289, "step": 195360 }, { "epoch": 75.87, "learning_rate": 9.883754045307444e-06, "loss": 0.1172, "step": 195370 }, { "epoch": 75.88, "learning_rate": 9.883236245954694e-06, "loss": 0.021, "step": 195380 }, { "epoch": 75.88, "learning_rate": 9.882718446601942e-06, "loss": 0.0103, "step": 195390 }, { "epoch": 75.88, "learning_rate": 9.882200647249192e-06, "loss": 0.0002, "step": 195400 }, { "epoch": 75.89, "learning_rate": 9.881682847896441e-06, "loss": 0.2606, "step": 195410 }, { "epoch": 75.89, "learning_rate": 9.88116504854369e-06, "loss": 0.0002, "step": 195420 }, { "epoch": 75.9, "learning_rate": 9.880647249190939e-06, "loss": 0.1573, "step": 195430 }, { "epoch": 75.9, "learning_rate": 9.880129449838188e-06, "loss": 0.1398, "step": 195440 }, { "epoch": 75.9, "learning_rate": 9.879611650485438e-06, "loss": 0.0479, "step": 195450 }, { "epoch": 75.91, "learning_rate": 9.879093851132688e-06, "loss": 0.0061, "step": 195460 }, { "epoch": 75.91, "learning_rate": 9.878576051779935e-06, "loss": 0.0036, "step": 195470 }, { "epoch": 75.91, "learning_rate": 9.878058252427185e-06, "loss": 0.0913, "step": 195480 }, { "epoch": 75.92, "learning_rate": 9.877540453074435e-06, "loss": 0.0651, "step": 195490 }, { "epoch": 75.92, "learning_rate": 9.877022653721684e-06, "loss": 0.2052, "step": 195500 }, { "epoch": 75.93, "learning_rate": 9.876504854368932e-06, "loss": 0.0023, "step": 195510 }, { "epoch": 75.93, "learning_rate": 9.875987055016182e-06, "loss": 0.0613, "step": 195520 }, { "epoch": 75.93, "learning_rate": 9.875469255663431e-06, "loss": 0.1094, "step": 195530 }, { "epoch": 75.94, "learning_rate": 9.874951456310681e-06, "loss": 0.1584, "step": 195540 }, { "epoch": 75.94, "learning_rate": 9.874433656957929e-06, "loss": 0.0037, "step": 195550 }, { "epoch": 75.95, "learning_rate": 9.873915857605179e-06, "loss": 0.0179, "step": 195560 }, { "epoch": 75.95, "learning_rate": 9.873398058252428e-06, "loss": 0.0281, "step": 195570 }, { "epoch": 75.95, "learning_rate": 9.872880258899678e-06, "loss": 0.1004, "step": 195580 }, { "epoch": 75.96, "learning_rate": 9.872362459546926e-06, "loss": 0.08, "step": 195590 }, { "epoch": 75.96, "learning_rate": 9.871844660194175e-06, "loss": 0.0575, "step": 195600 }, { "epoch": 75.97, "learning_rate": 9.871326860841425e-06, "loss": 0.0345, "step": 195610 }, { "epoch": 75.97, "learning_rate": 9.870809061488675e-06, "loss": 0.1059, "step": 195620 }, { "epoch": 75.97, "learning_rate": 9.870291262135923e-06, "loss": 0.0832, "step": 195630 }, { "epoch": 75.98, "learning_rate": 9.869773462783172e-06, "loss": 0.0513, "step": 195640 }, { "epoch": 75.98, "learning_rate": 9.869255663430422e-06, "loss": 0.1186, "step": 195650 }, { "epoch": 75.98, "learning_rate": 9.868737864077671e-06, "loss": 0.2331, "step": 195660 }, { "epoch": 75.99, "learning_rate": 9.868220064724921e-06, "loss": 0.0023, "step": 195670 }, { "epoch": 75.99, "learning_rate": 9.867702265372169e-06, "loss": 0.0786, "step": 195680 }, { "epoch": 76.0, "learning_rate": 9.867184466019419e-06, "loss": 0.0378, "step": 195690 }, { "epoch": 76.0, "learning_rate": 9.866666666666668e-06, "loss": 0.141, "step": 195700 }, { "epoch": 76.0, "eval_accuracy": 0.9526822558459422, "eval_loss": 0.34276077151298523, "eval_runtime": 8.1771, "eval_samples_per_second": 444.532, "eval_steps_per_second": 55.643, "step": 195700 }, { "epoch": 76.0, "learning_rate": 9.866148867313918e-06, "loss": 0.1148, "step": 195710 }, { "epoch": 76.01, "learning_rate": 9.865631067961166e-06, "loss": 0.0486, "step": 195720 }, { "epoch": 76.01, "learning_rate": 9.865113268608415e-06, "loss": 0.0333, "step": 195730 }, { "epoch": 76.02, "learning_rate": 9.864595469255665e-06, "loss": 0.1143, "step": 195740 }, { "epoch": 76.02, "learning_rate": 9.864077669902915e-06, "loss": 0.2066, "step": 195750 }, { "epoch": 76.02, "learning_rate": 9.863559870550163e-06, "loss": 0.1544, "step": 195760 }, { "epoch": 76.03, "learning_rate": 9.863042071197412e-06, "loss": 0.1269, "step": 195770 }, { "epoch": 76.03, "learning_rate": 9.862524271844662e-06, "loss": 0.0393, "step": 195780 }, { "epoch": 76.03, "learning_rate": 9.862006472491911e-06, "loss": 0.042, "step": 195790 }, { "epoch": 76.04, "learning_rate": 9.86148867313916e-06, "loss": 0.0617, "step": 195800 }, { "epoch": 76.04, "learning_rate": 9.860970873786409e-06, "loss": 0.0391, "step": 195810 }, { "epoch": 76.05, "learning_rate": 9.860453074433659e-06, "loss": 0.0324, "step": 195820 }, { "epoch": 76.05, "learning_rate": 9.859935275080908e-06, "loss": 0.0486, "step": 195830 }, { "epoch": 76.05, "learning_rate": 9.859417475728156e-06, "loss": 0.2899, "step": 195840 }, { "epoch": 76.06, "learning_rate": 9.858899676375406e-06, "loss": 0.0588, "step": 195850 }, { "epoch": 76.06, "learning_rate": 9.858381877022655e-06, "loss": 0.0443, "step": 195860 }, { "epoch": 76.07, "learning_rate": 9.857864077669905e-06, "loss": 0.0441, "step": 195870 }, { "epoch": 76.07, "learning_rate": 9.857346278317153e-06, "loss": 0.0779, "step": 195880 }, { "epoch": 76.07, "learning_rate": 9.856828478964402e-06, "loss": 0.0575, "step": 195890 }, { "epoch": 76.08, "learning_rate": 9.856310679611652e-06, "loss": 0.0153, "step": 195900 }, { "epoch": 76.08, "learning_rate": 9.855792880258902e-06, "loss": 0.0002, "step": 195910 }, { "epoch": 76.09, "learning_rate": 9.85527508090615e-06, "loss": 0.0722, "step": 195920 }, { "epoch": 76.09, "learning_rate": 9.8547572815534e-06, "loss": 0.0874, "step": 195930 }, { "epoch": 76.09, "learning_rate": 9.854239482200649e-06, "loss": 0.0715, "step": 195940 }, { "epoch": 76.1, "learning_rate": 9.853721682847898e-06, "loss": 0.0063, "step": 195950 }, { "epoch": 76.1, "learning_rate": 9.853203883495146e-06, "loss": 0.0321, "step": 195960 }, { "epoch": 76.1, "learning_rate": 9.852686084142396e-06, "loss": 0.1362, "step": 195970 }, { "epoch": 76.11, "learning_rate": 9.852168284789646e-06, "loss": 0.0636, "step": 195980 }, { "epoch": 76.11, "learning_rate": 9.851650485436894e-06, "loss": 0.0161, "step": 195990 }, { "epoch": 76.12, "learning_rate": 9.851132686084143e-06, "loss": 0.0111, "step": 196000 }, { "epoch": 76.12, "learning_rate": 9.850614886731393e-06, "loss": 0.0551, "step": 196010 }, { "epoch": 76.12, "learning_rate": 9.850097087378642e-06, "loss": 0.1109, "step": 196020 }, { "epoch": 76.13, "learning_rate": 9.84957928802589e-06, "loss": 0.142, "step": 196030 }, { "epoch": 76.13, "learning_rate": 9.84906148867314e-06, "loss": 0.0684, "step": 196040 }, { "epoch": 76.14, "learning_rate": 9.84854368932039e-06, "loss": 0.059, "step": 196050 }, { "epoch": 76.14, "learning_rate": 9.84802588996764e-06, "loss": 0.0179, "step": 196060 }, { "epoch": 76.14, "learning_rate": 9.847508090614887e-06, "loss": 0.0562, "step": 196070 }, { "epoch": 76.15, "learning_rate": 9.846990291262137e-06, "loss": 0.0885, "step": 196080 }, { "epoch": 76.15, "learning_rate": 9.846472491909386e-06, "loss": 0.051, "step": 196090 }, { "epoch": 76.16, "learning_rate": 9.845954692556636e-06, "loss": 0.0215, "step": 196100 }, { "epoch": 76.16, "learning_rate": 9.845436893203884e-06, "loss": 0.0078, "step": 196110 }, { "epoch": 76.16, "learning_rate": 9.844919093851134e-06, "loss": 0.0525, "step": 196120 }, { "epoch": 76.17, "learning_rate": 9.844401294498383e-06, "loss": 0.022, "step": 196130 }, { "epoch": 76.17, "learning_rate": 9.843883495145631e-06, "loss": 0.0417, "step": 196140 }, { "epoch": 76.17, "learning_rate": 9.84336569579288e-06, "loss": 0.0425, "step": 196150 }, { "epoch": 76.18, "learning_rate": 9.84284789644013e-06, "loss": 0.1461, "step": 196160 }, { "epoch": 76.18, "learning_rate": 9.84233009708738e-06, "loss": 0.0154, "step": 196170 }, { "epoch": 76.19, "learning_rate": 9.841812297734628e-06, "loss": 0.1002, "step": 196180 }, { "epoch": 76.19, "learning_rate": 9.841294498381877e-06, "loss": 0.0003, "step": 196190 }, { "epoch": 76.19, "learning_rate": 9.840776699029127e-06, "loss": 0.1564, "step": 196200 }, { "epoch": 76.2, "learning_rate": 9.840258899676377e-06, "loss": 0.0471, "step": 196210 }, { "epoch": 76.2, "learning_rate": 9.839741100323625e-06, "loss": 0.001, "step": 196220 }, { "epoch": 76.21, "learning_rate": 9.839223300970874e-06, "loss": 0.0002, "step": 196230 }, { "epoch": 76.21, "learning_rate": 9.838705501618124e-06, "loss": 0.0304, "step": 196240 }, { "epoch": 76.21, "learning_rate": 9.838187702265373e-06, "loss": 0.2077, "step": 196250 }, { "epoch": 76.22, "learning_rate": 9.837669902912621e-06, "loss": 0.0408, "step": 196260 }, { "epoch": 76.22, "learning_rate": 9.837152103559871e-06, "loss": 0.0533, "step": 196270 }, { "epoch": 76.23, "learning_rate": 9.83663430420712e-06, "loss": 0.1076, "step": 196280 }, { "epoch": 76.23, "learning_rate": 9.83611650485437e-06, "loss": 0.1653, "step": 196290 }, { "epoch": 76.23, "learning_rate": 9.835598705501618e-06, "loss": 0.1596, "step": 196300 }, { "epoch": 76.24, "learning_rate": 9.835080906148868e-06, "loss": 0.0463, "step": 196310 }, { "epoch": 76.24, "learning_rate": 9.834563106796117e-06, "loss": 0.0719, "step": 196320 }, { "epoch": 76.24, "learning_rate": 9.834045307443365e-06, "loss": 0.0864, "step": 196330 }, { "epoch": 76.25, "learning_rate": 9.833527508090615e-06, "loss": 0.0579, "step": 196340 }, { "epoch": 76.25, "learning_rate": 9.833009708737865e-06, "loss": 0.0018, "step": 196350 }, { "epoch": 76.26, "learning_rate": 9.832491909385114e-06, "loss": 0.0054, "step": 196360 }, { "epoch": 76.26, "learning_rate": 9.831974110032362e-06, "loss": 0.0877, "step": 196370 }, { "epoch": 76.26, "learning_rate": 9.831456310679612e-06, "loss": 0.0626, "step": 196380 }, { "epoch": 76.27, "learning_rate": 9.830938511326861e-06, "loss": 0.0508, "step": 196390 }, { "epoch": 76.27, "learning_rate": 9.830420711974111e-06, "loss": 0.032, "step": 196400 }, { "epoch": 76.28, "learning_rate": 9.829902912621359e-06, "loss": 0.1934, "step": 196410 }, { "epoch": 76.28, "learning_rate": 9.829385113268609e-06, "loss": 0.0775, "step": 196420 }, { "epoch": 76.28, "learning_rate": 9.828867313915858e-06, "loss": 0.1724, "step": 196430 }, { "epoch": 76.29, "learning_rate": 9.828349514563108e-06, "loss": 0.0489, "step": 196440 }, { "epoch": 76.29, "learning_rate": 9.827831715210356e-06, "loss": 0.0401, "step": 196450 }, { "epoch": 76.3, "learning_rate": 9.827313915857605e-06, "loss": 0.004, "step": 196460 }, { "epoch": 76.3, "learning_rate": 9.826796116504855e-06, "loss": 0.1798, "step": 196470 }, { "epoch": 76.3, "learning_rate": 9.826278317152105e-06, "loss": 0.0612, "step": 196480 }, { "epoch": 76.31, "learning_rate": 9.825760517799352e-06, "loss": 0.0044, "step": 196490 }, { "epoch": 76.31, "learning_rate": 9.825242718446602e-06, "loss": 0.142, "step": 196500 }, { "epoch": 76.31, "learning_rate": 9.824724919093852e-06, "loss": 0.0878, "step": 196510 }, { "epoch": 76.32, "learning_rate": 9.824207119741101e-06, "loss": 0.0078, "step": 196520 }, { "epoch": 76.32, "learning_rate": 9.82368932038835e-06, "loss": 0.0849, "step": 196530 }, { "epoch": 76.33, "learning_rate": 9.823171521035599e-06, "loss": 0.0321, "step": 196540 }, { "epoch": 76.33, "learning_rate": 9.822653721682848e-06, "loss": 0.0326, "step": 196550 }, { "epoch": 76.33, "learning_rate": 9.822135922330098e-06, "loss": 0.1192, "step": 196560 }, { "epoch": 76.34, "learning_rate": 9.821618122977346e-06, "loss": 0.1195, "step": 196570 }, { "epoch": 76.34, "learning_rate": 9.821100323624596e-06, "loss": 0.0583, "step": 196580 }, { "epoch": 76.35, "learning_rate": 9.820582524271845e-06, "loss": 0.132, "step": 196590 }, { "epoch": 76.35, "learning_rate": 9.820064724919095e-06, "loss": 0.053, "step": 196600 }, { "epoch": 76.35, "learning_rate": 9.819546925566343e-06, "loss": 0.011, "step": 196610 }, { "epoch": 76.36, "learning_rate": 9.819029126213592e-06, "loss": 0.0239, "step": 196620 }, { "epoch": 76.36, "learning_rate": 9.818511326860842e-06, "loss": 0.0176, "step": 196630 }, { "epoch": 76.37, "learning_rate": 9.817993527508092e-06, "loss": 0.1626, "step": 196640 }, { "epoch": 76.37, "learning_rate": 9.81747572815534e-06, "loss": 0.0172, "step": 196650 }, { "epoch": 76.37, "learning_rate": 9.81695792880259e-06, "loss": 0.0629, "step": 196660 }, { "epoch": 76.38, "learning_rate": 9.816440129449839e-06, "loss": 0.1979, "step": 196670 }, { "epoch": 76.38, "learning_rate": 9.815922330097088e-06, "loss": 0.0274, "step": 196680 }, { "epoch": 76.38, "learning_rate": 9.815404530744336e-06, "loss": 0.0239, "step": 196690 }, { "epoch": 76.39, "learning_rate": 9.814886731391586e-06, "loss": 0.0273, "step": 196700 }, { "epoch": 76.39, "learning_rate": 9.814368932038836e-06, "loss": 0.0758, "step": 196710 }, { "epoch": 76.4, "learning_rate": 9.813851132686085e-06, "loss": 0.0033, "step": 196720 }, { "epoch": 76.4, "learning_rate": 9.813333333333333e-06, "loss": 0.0177, "step": 196730 }, { "epoch": 76.4, "learning_rate": 9.812815533980583e-06, "loss": 0.0915, "step": 196740 }, { "epoch": 76.41, "learning_rate": 9.812297734627832e-06, "loss": 0.0634, "step": 196750 }, { "epoch": 76.41, "learning_rate": 9.811779935275082e-06, "loss": 0.0346, "step": 196760 }, { "epoch": 76.42, "learning_rate": 9.811262135922332e-06, "loss": 0.0476, "step": 196770 }, { "epoch": 76.42, "learning_rate": 9.81074433656958e-06, "loss": 0.0019, "step": 196780 }, { "epoch": 76.42, "learning_rate": 9.810226537216829e-06, "loss": 0.0657, "step": 196790 }, { "epoch": 76.43, "learning_rate": 9.809708737864079e-06, "loss": 0.2223, "step": 196800 }, { "epoch": 76.43, "learning_rate": 9.809190938511328e-06, "loss": 0.0219, "step": 196810 }, { "epoch": 76.43, "learning_rate": 9.808673139158576e-06, "loss": 0.0527, "step": 196820 }, { "epoch": 76.44, "learning_rate": 9.808155339805826e-06, "loss": 0.0032, "step": 196830 }, { "epoch": 76.44, "learning_rate": 9.807637540453076e-06, "loss": 0.0976, "step": 196840 }, { "epoch": 76.45, "learning_rate": 9.807119741100325e-06, "loss": 0.0436, "step": 196850 }, { "epoch": 76.45, "learning_rate": 9.806601941747573e-06, "loss": 0.014, "step": 196860 }, { "epoch": 76.45, "learning_rate": 9.806084142394823e-06, "loss": 0.0096, "step": 196870 }, { "epoch": 76.46, "learning_rate": 9.805566343042072e-06, "loss": 0.0143, "step": 196880 }, { "epoch": 76.46, "learning_rate": 9.805048543689322e-06, "loss": 0.0268, "step": 196890 }, { "epoch": 76.47, "learning_rate": 9.80453074433657e-06, "loss": 0.0147, "step": 196900 }, { "epoch": 76.47, "learning_rate": 9.80401294498382e-06, "loss": 0.1641, "step": 196910 }, { "epoch": 76.47, "learning_rate": 9.803495145631069e-06, "loss": 0.028, "step": 196920 }, { "epoch": 76.48, "learning_rate": 9.802977346278319e-06, "loss": 0.0855, "step": 196930 }, { "epoch": 76.48, "learning_rate": 9.802459546925567e-06, "loss": 0.1244, "step": 196940 }, { "epoch": 76.49, "learning_rate": 9.801941747572816e-06, "loss": 0.1262, "step": 196950 }, { "epoch": 76.49, "learning_rate": 9.801423948220066e-06, "loss": 0.0512, "step": 196960 }, { "epoch": 76.49, "learning_rate": 9.800906148867315e-06, "loss": 0.0005, "step": 196970 }, { "epoch": 76.5, "learning_rate": 9.800388349514563e-06, "loss": 0.0638, "step": 196980 }, { "epoch": 76.5, "learning_rate": 9.799870550161813e-06, "loss": 0.1059, "step": 196990 }, { "epoch": 76.5, "learning_rate": 9.799352750809063e-06, "loss": 0.136, "step": 197000 }, { "epoch": 76.51, "learning_rate": 9.798834951456312e-06, "loss": 0.0762, "step": 197010 }, { "epoch": 76.51, "learning_rate": 9.79831715210356e-06, "loss": 0.1048, "step": 197020 }, { "epoch": 76.52, "learning_rate": 9.79779935275081e-06, "loss": 0.0175, "step": 197030 }, { "epoch": 76.52, "learning_rate": 9.79728155339806e-06, "loss": 0.3074, "step": 197040 }, { "epoch": 76.52, "learning_rate": 9.796763754045309e-06, "loss": 0.0898, "step": 197050 }, { "epoch": 76.53, "learning_rate": 9.796245954692557e-06, "loss": 0.0318, "step": 197060 }, { "epoch": 76.53, "learning_rate": 9.795728155339807e-06, "loss": 0.0425, "step": 197070 }, { "epoch": 76.54, "learning_rate": 9.795210355987056e-06, "loss": 0.0209, "step": 197080 }, { "epoch": 76.54, "learning_rate": 9.794692556634306e-06, "loss": 0.0093, "step": 197090 }, { "epoch": 76.54, "learning_rate": 9.794174757281554e-06, "loss": 0.1393, "step": 197100 }, { "epoch": 76.55, "learning_rate": 9.793656957928803e-06, "loss": 0.0416, "step": 197110 }, { "epoch": 76.55, "learning_rate": 9.793139158576053e-06, "loss": 0.0225, "step": 197120 }, { "epoch": 76.56, "learning_rate": 9.792621359223303e-06, "loss": 0.0459, "step": 197130 }, { "epoch": 76.56, "learning_rate": 9.79210355987055e-06, "loss": 0.0555, "step": 197140 }, { "epoch": 76.56, "learning_rate": 9.7915857605178e-06, "loss": 0.136, "step": 197150 }, { "epoch": 76.57, "learning_rate": 9.79106796116505e-06, "loss": 0.1385, "step": 197160 }, { "epoch": 76.57, "learning_rate": 9.7905501618123e-06, "loss": 0.0381, "step": 197170 }, { "epoch": 76.57, "learning_rate": 9.790032362459547e-06, "loss": 0.1206, "step": 197180 }, { "epoch": 76.58, "learning_rate": 9.789514563106797e-06, "loss": 0.1444, "step": 197190 }, { "epoch": 76.58, "learning_rate": 9.788996763754047e-06, "loss": 0.0153, "step": 197200 }, { "epoch": 76.59, "learning_rate": 9.788478964401296e-06, "loss": 0.1742, "step": 197210 }, { "epoch": 76.59, "learning_rate": 9.787961165048544e-06, "loss": 0.2147, "step": 197220 }, { "epoch": 76.59, "learning_rate": 9.787443365695794e-06, "loss": 0.1913, "step": 197230 }, { "epoch": 76.6, "learning_rate": 9.786925566343043e-06, "loss": 0.0598, "step": 197240 }, { "epoch": 76.6, "learning_rate": 9.786407766990293e-06, "loss": 0.0689, "step": 197250 }, { "epoch": 76.61, "learning_rate": 9.785889967637541e-06, "loss": 0.065, "step": 197260 }, { "epoch": 76.61, "learning_rate": 9.78537216828479e-06, "loss": 0.0171, "step": 197270 }, { "epoch": 76.61, "learning_rate": 9.78485436893204e-06, "loss": 0.0893, "step": 197280 }, { "epoch": 76.62, "learning_rate": 9.78433656957929e-06, "loss": 0.032, "step": 197290 }, { "epoch": 76.62, "learning_rate": 9.783818770226538e-06, "loss": 0.0953, "step": 197300 }, { "epoch": 76.63, "learning_rate": 9.783300970873787e-06, "loss": 0.0576, "step": 197310 }, { "epoch": 76.63, "learning_rate": 9.782783171521037e-06, "loss": 0.0251, "step": 197320 }, { "epoch": 76.63, "learning_rate": 9.782265372168286e-06, "loss": 0.07, "step": 197330 }, { "epoch": 76.64, "learning_rate": 9.781747572815536e-06, "loss": 0.0647, "step": 197340 }, { "epoch": 76.64, "learning_rate": 9.781229773462784e-06, "loss": 0.0099, "step": 197350 }, { "epoch": 76.64, "learning_rate": 9.780711974110034e-06, "loss": 0.0395, "step": 197360 }, { "epoch": 76.65, "learning_rate": 9.780194174757283e-06, "loss": 0.085, "step": 197370 }, { "epoch": 76.65, "learning_rate": 9.779676375404533e-06, "loss": 0.0512, "step": 197380 }, { "epoch": 76.66, "learning_rate": 9.77915857605178e-06, "loss": 0.0021, "step": 197390 }, { "epoch": 76.66, "learning_rate": 9.77864077669903e-06, "loss": 0.0927, "step": 197400 }, { "epoch": 76.66, "learning_rate": 9.77812297734628e-06, "loss": 0.1181, "step": 197410 }, { "epoch": 76.67, "learning_rate": 9.77760517799353e-06, "loss": 0.0163, "step": 197420 }, { "epoch": 76.67, "learning_rate": 9.777087378640778e-06, "loss": 0.0871, "step": 197430 }, { "epoch": 76.68, "learning_rate": 9.776569579288027e-06, "loss": 0.1003, "step": 197440 }, { "epoch": 76.68, "learning_rate": 9.776051779935277e-06, "loss": 0.0062, "step": 197450 }, { "epoch": 76.68, "learning_rate": 9.775533980582525e-06, "loss": 0.0825, "step": 197460 }, { "epoch": 76.69, "learning_rate": 9.775016181229774e-06, "loss": 0.1042, "step": 197470 }, { "epoch": 76.69, "learning_rate": 9.774498381877024e-06, "loss": 0.0282, "step": 197480 }, { "epoch": 76.7, "learning_rate": 9.773980582524274e-06, "loss": 0.0675, "step": 197490 }, { "epoch": 76.7, "learning_rate": 9.773462783171522e-06, "loss": 0.0721, "step": 197500 }, { "epoch": 76.7, "learning_rate": 9.772944983818771e-06, "loss": 0.0066, "step": 197510 }, { "epoch": 76.71, "learning_rate": 9.77242718446602e-06, "loss": 0.026, "step": 197520 }, { "epoch": 76.71, "learning_rate": 9.77190938511327e-06, "loss": 0.0315, "step": 197530 }, { "epoch": 76.71, "learning_rate": 9.771391585760518e-06, "loss": 0.0723, "step": 197540 }, { "epoch": 76.72, "learning_rate": 9.770873786407768e-06, "loss": 0.0289, "step": 197550 }, { "epoch": 76.72, "learning_rate": 9.770355987055018e-06, "loss": 0.0149, "step": 197560 }, { "epoch": 76.73, "learning_rate": 9.769838187702267e-06, "loss": 0.0996, "step": 197570 }, { "epoch": 76.73, "learning_rate": 9.769320388349515e-06, "loss": 0.0892, "step": 197580 }, { "epoch": 76.73, "learning_rate": 9.768802588996765e-06, "loss": 0.1379, "step": 197590 }, { "epoch": 76.74, "learning_rate": 9.768284789644014e-06, "loss": 0.1011, "step": 197600 }, { "epoch": 76.74, "learning_rate": 9.767766990291262e-06, "loss": 0.0853, "step": 197610 }, { "epoch": 76.75, "learning_rate": 9.767249190938512e-06, "loss": 0.0152, "step": 197620 }, { "epoch": 76.75, "learning_rate": 9.766731391585761e-06, "loss": 0.1629, "step": 197630 }, { "epoch": 76.75, "learning_rate": 9.766213592233011e-06, "loss": 0.2364, "step": 197640 }, { "epoch": 76.76, "learning_rate": 9.765695792880259e-06, "loss": 0.2121, "step": 197650 }, { "epoch": 76.76, "learning_rate": 9.765177993527509e-06, "loss": 0.0667, "step": 197660 }, { "epoch": 76.77, "learning_rate": 9.764660194174758e-06, "loss": 0.0715, "step": 197670 }, { "epoch": 76.77, "learning_rate": 9.764142394822008e-06, "loss": 0.0689, "step": 197680 }, { "epoch": 76.77, "learning_rate": 9.763624595469256e-06, "loss": 0.0123, "step": 197690 }, { "epoch": 76.78, "learning_rate": 9.763106796116505e-06, "loss": 0.1291, "step": 197700 }, { "epoch": 76.78, "learning_rate": 9.762588996763755e-06, "loss": 0.014, "step": 197710 }, { "epoch": 76.78, "learning_rate": 9.762071197411005e-06, "loss": 0.0616, "step": 197720 }, { "epoch": 76.79, "learning_rate": 9.761553398058253e-06, "loss": 0.0455, "step": 197730 }, { "epoch": 76.79, "learning_rate": 9.761035598705502e-06, "loss": 0.0376, "step": 197740 }, { "epoch": 76.8, "learning_rate": 9.760517799352752e-06, "loss": 0.0637, "step": 197750 }, { "epoch": 76.8, "learning_rate": 9.760000000000001e-06, "loss": 0.0248, "step": 197760 }, { "epoch": 76.8, "learning_rate": 9.75948220064725e-06, "loss": 0.0662, "step": 197770 }, { "epoch": 76.81, "learning_rate": 9.758964401294499e-06, "loss": 0.0676, "step": 197780 }, { "epoch": 76.81, "learning_rate": 9.758446601941749e-06, "loss": 0.0091, "step": 197790 }, { "epoch": 76.82, "learning_rate": 9.757928802588997e-06, "loss": 0.0867, "step": 197800 }, { "epoch": 76.82, "learning_rate": 9.757411003236246e-06, "loss": 0.087, "step": 197810 }, { "epoch": 76.82, "learning_rate": 9.756893203883496e-06, "loss": 0.0652, "step": 197820 }, { "epoch": 76.83, "learning_rate": 9.756375404530745e-06, "loss": 0.0667, "step": 197830 }, { "epoch": 76.83, "learning_rate": 9.755857605177993e-06, "loss": 0.1265, "step": 197840 }, { "epoch": 76.83, "learning_rate": 9.755339805825243e-06, "loss": 0.0323, "step": 197850 }, { "epoch": 76.84, "learning_rate": 9.754822006472493e-06, "loss": 0.0003, "step": 197860 }, { "epoch": 76.84, "learning_rate": 9.754304207119742e-06, "loss": 0.0114, "step": 197870 }, { "epoch": 76.85, "learning_rate": 9.75378640776699e-06, "loss": 0.093, "step": 197880 }, { "epoch": 76.85, "learning_rate": 9.75326860841424e-06, "loss": 0.0008, "step": 197890 }, { "epoch": 76.85, "learning_rate": 9.75275080906149e-06, "loss": 0.0367, "step": 197900 }, { "epoch": 76.86, "learning_rate": 9.752233009708739e-06, "loss": 0.0519, "step": 197910 }, { "epoch": 76.86, "learning_rate": 9.751715210355987e-06, "loss": 0.0791, "step": 197920 }, { "epoch": 76.87, "learning_rate": 9.751197411003236e-06, "loss": 0.0088, "step": 197930 }, { "epoch": 76.87, "learning_rate": 9.750679611650486e-06, "loss": 0.072, "step": 197940 }, { "epoch": 76.87, "learning_rate": 9.750161812297736e-06, "loss": 0.0008, "step": 197950 }, { "epoch": 76.88, "learning_rate": 9.749644012944984e-06, "loss": 0.0484, "step": 197960 }, { "epoch": 76.88, "learning_rate": 9.749126213592233e-06, "loss": 0.0351, "step": 197970 }, { "epoch": 76.89, "learning_rate": 9.748608414239483e-06, "loss": 0.0134, "step": 197980 }, { "epoch": 76.89, "learning_rate": 9.748090614886732e-06, "loss": 0.0455, "step": 197990 }, { "epoch": 76.89, "learning_rate": 9.74757281553398e-06, "loss": 0.0054, "step": 198000 }, { "epoch": 76.9, "learning_rate": 9.74705501618123e-06, "loss": 0.0483, "step": 198010 }, { "epoch": 76.9, "learning_rate": 9.74653721682848e-06, "loss": 0.0007, "step": 198020 }, { "epoch": 76.9, "learning_rate": 9.74601941747573e-06, "loss": 0.0483, "step": 198030 }, { "epoch": 76.91, "learning_rate": 9.745501618122977e-06, "loss": 0.0378, "step": 198040 }, { "epoch": 76.91, "learning_rate": 9.744983818770227e-06, "loss": 0.0772, "step": 198050 }, { "epoch": 76.92, "learning_rate": 9.744466019417476e-06, "loss": 0.0543, "step": 198060 }, { "epoch": 76.92, "learning_rate": 9.743948220064726e-06, "loss": 0.0244, "step": 198070 }, { "epoch": 76.92, "learning_rate": 9.743430420711974e-06, "loss": 0.0822, "step": 198080 }, { "epoch": 76.93, "learning_rate": 9.742912621359224e-06, "loss": 0.0003, "step": 198090 }, { "epoch": 76.93, "learning_rate": 9.742394822006473e-06, "loss": 0.0641, "step": 198100 }, { "epoch": 76.94, "learning_rate": 9.741877022653723e-06, "loss": 0.103, "step": 198110 }, { "epoch": 76.94, "learning_rate": 9.74135922330097e-06, "loss": 0.0601, "step": 198120 }, { "epoch": 76.94, "learning_rate": 9.74084142394822e-06, "loss": 0.1037, "step": 198130 }, { "epoch": 76.95, "learning_rate": 9.74032362459547e-06, "loss": 0.0691, "step": 198140 }, { "epoch": 76.95, "learning_rate": 9.73980582524272e-06, "loss": 0.0171, "step": 198150 }, { "epoch": 76.96, "learning_rate": 9.739288025889968e-06, "loss": 0.0052, "step": 198160 }, { "epoch": 76.96, "learning_rate": 9.738770226537217e-06, "loss": 0.0667, "step": 198170 }, { "epoch": 76.96, "learning_rate": 9.738252427184467e-06, "loss": 0.0743, "step": 198180 }, { "epoch": 76.97, "learning_rate": 9.737734627831716e-06, "loss": 0.0731, "step": 198190 }, { "epoch": 76.97, "learning_rate": 9.737216828478964e-06, "loss": 0.0141, "step": 198200 }, { "epoch": 76.97, "learning_rate": 9.736699029126214e-06, "loss": 0.0012, "step": 198210 }, { "epoch": 76.98, "learning_rate": 9.736181229773464e-06, "loss": 0.0026, "step": 198220 }, { "epoch": 76.98, "learning_rate": 9.735663430420713e-06, "loss": 0.0692, "step": 198230 }, { "epoch": 76.99, "learning_rate": 9.735145631067961e-06, "loss": 0.1095, "step": 198240 }, { "epoch": 76.99, "learning_rate": 9.73462783171521e-06, "loss": 0.1161, "step": 198250 }, { "epoch": 76.99, "learning_rate": 9.73411003236246e-06, "loss": 0.1324, "step": 198260 }, { "epoch": 77.0, "learning_rate": 9.73359223300971e-06, "loss": 0.0226, "step": 198270 }, { "epoch": 77.0, "eval_accuracy": 0.9502063273727648, "eval_loss": 0.3594486117362976, "eval_runtime": 8.1373, "eval_samples_per_second": 446.707, "eval_steps_per_second": 55.915, "step": 198275 }, { "epoch": 77.0, "learning_rate": 9.733074433656958e-06, "loss": 0.0019, "step": 198280 }, { "epoch": 77.01, "learning_rate": 9.732556634304207e-06, "loss": 0.0467, "step": 198290 }, { "epoch": 77.01, "learning_rate": 9.732038834951457e-06, "loss": 0.0712, "step": 198300 }, { "epoch": 77.01, "learning_rate": 9.731521035598707e-06, "loss": 0.0859, "step": 198310 }, { "epoch": 77.02, "learning_rate": 9.731003236245955e-06, "loss": 0.2678, "step": 198320 }, { "epoch": 77.02, "learning_rate": 9.730485436893204e-06, "loss": 0.1927, "step": 198330 }, { "epoch": 77.03, "learning_rate": 9.729967637540454e-06, "loss": 0.0548, "step": 198340 }, { "epoch": 77.03, "learning_rate": 9.729449838187703e-06, "loss": 0.0046, "step": 198350 }, { "epoch": 77.03, "learning_rate": 9.728932038834951e-06, "loss": 0.1073, "step": 198360 }, { "epoch": 77.04, "learning_rate": 9.728414239482201e-06, "loss": 0.2912, "step": 198370 }, { "epoch": 77.04, "learning_rate": 9.72789644012945e-06, "loss": 0.0927, "step": 198380 }, { "epoch": 77.04, "learning_rate": 9.7273786407767e-06, "loss": 0.044, "step": 198390 }, { "epoch": 77.05, "learning_rate": 9.726860841423948e-06, "loss": 0.0503, "step": 198400 }, { "epoch": 77.05, "learning_rate": 9.726343042071198e-06, "loss": 0.1347, "step": 198410 }, { "epoch": 77.06, "learning_rate": 9.725825242718447e-06, "loss": 0.0156, "step": 198420 }, { "epoch": 77.06, "learning_rate": 9.725307443365697e-06, "loss": 0.0223, "step": 198430 }, { "epoch": 77.06, "learning_rate": 9.724789644012945e-06, "loss": 0.039, "step": 198440 }, { "epoch": 77.07, "learning_rate": 9.724271844660195e-06, "loss": 0.0259, "step": 198450 }, { "epoch": 77.07, "learning_rate": 9.723754045307444e-06, "loss": 0.0607, "step": 198460 }, { "epoch": 77.08, "learning_rate": 9.723236245954694e-06, "loss": 0.0177, "step": 198470 }, { "epoch": 77.08, "learning_rate": 9.722718446601943e-06, "loss": 0.0103, "step": 198480 }, { "epoch": 77.08, "learning_rate": 9.722200647249191e-06, "loss": 0.0412, "step": 198490 }, { "epoch": 77.09, "learning_rate": 9.721682847896441e-06, "loss": 0.0528, "step": 198500 }, { "epoch": 77.09, "learning_rate": 9.72116504854369e-06, "loss": 0.0233, "step": 198510 }, { "epoch": 77.1, "learning_rate": 9.72064724919094e-06, "loss": 0.0563, "step": 198520 }, { "epoch": 77.1, "learning_rate": 9.720129449838188e-06, "loss": 0.01, "step": 198530 }, { "epoch": 77.1, "learning_rate": 9.719611650485438e-06, "loss": 0.0697, "step": 198540 }, { "epoch": 77.11, "learning_rate": 9.719093851132687e-06, "loss": 0.0157, "step": 198550 }, { "epoch": 77.11, "learning_rate": 9.718576051779937e-06, "loss": 0.1156, "step": 198560 }, { "epoch": 77.11, "learning_rate": 9.718058252427185e-06, "loss": 0.0004, "step": 198570 }, { "epoch": 77.12, "learning_rate": 9.717540453074435e-06, "loss": 0.0143, "step": 198580 }, { "epoch": 77.12, "learning_rate": 9.717022653721684e-06, "loss": 0.2524, "step": 198590 }, { "epoch": 77.13, "learning_rate": 9.716504854368934e-06, "loss": 0.1034, "step": 198600 }, { "epoch": 77.13, "learning_rate": 9.715987055016182e-06, "loss": 0.1952, "step": 198610 }, { "epoch": 77.13, "learning_rate": 9.715469255663431e-06, "loss": 0.1269, "step": 198620 }, { "epoch": 77.14, "learning_rate": 9.714951456310681e-06, "loss": 0.001, "step": 198630 }, { "epoch": 77.14, "learning_rate": 9.71443365695793e-06, "loss": 0.0385, "step": 198640 }, { "epoch": 77.15, "learning_rate": 9.713915857605178e-06, "loss": 0.1623, "step": 198650 }, { "epoch": 77.15, "learning_rate": 9.713398058252428e-06, "loss": 0.0917, "step": 198660 }, { "epoch": 77.15, "learning_rate": 9.712880258899678e-06, "loss": 0.0223, "step": 198670 }, { "epoch": 77.16, "learning_rate": 9.712362459546927e-06, "loss": 0.0167, "step": 198680 }, { "epoch": 77.16, "learning_rate": 9.711844660194175e-06, "loss": 0.0674, "step": 198690 }, { "epoch": 77.17, "learning_rate": 9.711326860841425e-06, "loss": 0.1744, "step": 198700 }, { "epoch": 77.17, "learning_rate": 9.710809061488674e-06, "loss": 0.0611, "step": 198710 }, { "epoch": 77.17, "learning_rate": 9.710291262135924e-06, "loss": 0.0786, "step": 198720 }, { "epoch": 77.18, "learning_rate": 9.709773462783172e-06, "loss": 0.1056, "step": 198730 }, { "epoch": 77.18, "learning_rate": 9.709255663430422e-06, "loss": 0.1144, "step": 198740 }, { "epoch": 77.18, "learning_rate": 9.708737864077671e-06, "loss": 0.0326, "step": 198750 }, { "epoch": 77.19, "learning_rate": 9.708220064724921e-06, "loss": 0.0413, "step": 198760 }, { "epoch": 77.19, "learning_rate": 9.707702265372169e-06, "loss": 0.0236, "step": 198770 }, { "epoch": 77.2, "learning_rate": 9.707184466019418e-06, "loss": 0.0473, "step": 198780 }, { "epoch": 77.2, "learning_rate": 9.706666666666668e-06, "loss": 0.0739, "step": 198790 }, { "epoch": 77.2, "learning_rate": 9.706148867313918e-06, "loss": 0.1098, "step": 198800 }, { "epoch": 77.21, "learning_rate": 9.705631067961166e-06, "loss": 0.0559, "step": 198810 }, { "epoch": 77.21, "learning_rate": 9.705113268608415e-06, "loss": 0.071, "step": 198820 }, { "epoch": 77.22, "learning_rate": 9.704595469255665e-06, "loss": 0.0946, "step": 198830 }, { "epoch": 77.22, "learning_rate": 9.704077669902914e-06, "loss": 0.0001, "step": 198840 }, { "epoch": 77.22, "learning_rate": 9.703559870550162e-06, "loss": 0.0307, "step": 198850 }, { "epoch": 77.23, "learning_rate": 9.703042071197412e-06, "loss": 0.0551, "step": 198860 }, { "epoch": 77.23, "learning_rate": 9.702524271844662e-06, "loss": 0.1406, "step": 198870 }, { "epoch": 77.23, "learning_rate": 9.702006472491911e-06, "loss": 0.0295, "step": 198880 }, { "epoch": 77.24, "learning_rate": 9.701488673139159e-06, "loss": 0.1565, "step": 198890 }, { "epoch": 77.24, "learning_rate": 9.700970873786409e-06, "loss": 0.0718, "step": 198900 }, { "epoch": 77.25, "learning_rate": 9.700453074433658e-06, "loss": 0.1579, "step": 198910 }, { "epoch": 77.25, "learning_rate": 9.699935275080908e-06, "loss": 0.0779, "step": 198920 }, { "epoch": 77.25, "learning_rate": 9.699417475728156e-06, "loss": 0.0082, "step": 198930 }, { "epoch": 77.26, "learning_rate": 9.698899676375406e-06, "loss": 0.0255, "step": 198940 }, { "epoch": 77.26, "learning_rate": 9.698381877022655e-06, "loss": 0.0658, "step": 198950 }, { "epoch": 77.27, "learning_rate": 9.697864077669905e-06, "loss": 0.0192, "step": 198960 }, { "epoch": 77.27, "learning_rate": 9.697346278317153e-06, "loss": 0.0505, "step": 198970 }, { "epoch": 77.27, "learning_rate": 9.696828478964402e-06, "loss": 0.0876, "step": 198980 }, { "epoch": 77.28, "learning_rate": 9.696310679611652e-06, "loss": 0.066, "step": 198990 }, { "epoch": 77.28, "learning_rate": 9.695792880258902e-06, "loss": 0.0565, "step": 199000 }, { "epoch": 77.29, "learning_rate": 9.69527508090615e-06, "loss": 0.0728, "step": 199010 }, { "epoch": 77.29, "learning_rate": 9.694757281553399e-06, "loss": 0.0571, "step": 199020 }, { "epoch": 77.29, "learning_rate": 9.694239482200649e-06, "loss": 0.0979, "step": 199030 }, { "epoch": 77.3, "learning_rate": 9.693721682847898e-06, "loss": 0.0189, "step": 199040 }, { "epoch": 77.3, "learning_rate": 9.693203883495146e-06, "loss": 0.1149, "step": 199050 }, { "epoch": 77.3, "learning_rate": 9.692686084142396e-06, "loss": 0.0487, "step": 199060 }, { "epoch": 77.31, "learning_rate": 9.692168284789645e-06, "loss": 0.0173, "step": 199070 }, { "epoch": 77.31, "learning_rate": 9.691650485436893e-06, "loss": 0.0246, "step": 199080 }, { "epoch": 77.32, "learning_rate": 9.691132686084143e-06, "loss": 0.0014, "step": 199090 }, { "epoch": 77.32, "learning_rate": 9.690614886731393e-06, "loss": 0.0741, "step": 199100 }, { "epoch": 77.32, "learning_rate": 9.690097087378642e-06, "loss": 0.0707, "step": 199110 }, { "epoch": 77.33, "learning_rate": 9.68957928802589e-06, "loss": 0.1339, "step": 199120 }, { "epoch": 77.33, "learning_rate": 9.68906148867314e-06, "loss": 0.14, "step": 199130 }, { "epoch": 77.34, "learning_rate": 9.68854368932039e-06, "loss": 0.0592, "step": 199140 }, { "epoch": 77.34, "learning_rate": 9.688025889967639e-06, "loss": 0.0619, "step": 199150 }, { "epoch": 77.34, "learning_rate": 9.687508090614887e-06, "loss": 0.0477, "step": 199160 }, { "epoch": 77.35, "learning_rate": 9.686990291262137e-06, "loss": 0.0106, "step": 199170 }, { "epoch": 77.35, "learning_rate": 9.686472491909386e-06, "loss": 0.0233, "step": 199180 }, { "epoch": 77.36, "learning_rate": 9.685954692556636e-06, "loss": 0.0443, "step": 199190 }, { "epoch": 77.36, "learning_rate": 9.685436893203884e-06, "loss": 0.001, "step": 199200 }, { "epoch": 77.36, "learning_rate": 9.684919093851133e-06, "loss": 0.1172, "step": 199210 }, { "epoch": 77.37, "learning_rate": 9.684401294498383e-06, "loss": 0.0075, "step": 199220 }, { "epoch": 77.37, "learning_rate": 9.683883495145633e-06, "loss": 0.0785, "step": 199230 }, { "epoch": 77.37, "learning_rate": 9.68336569579288e-06, "loss": 0.0919, "step": 199240 }, { "epoch": 77.38, "learning_rate": 9.68284789644013e-06, "loss": 0.0428, "step": 199250 }, { "epoch": 77.38, "learning_rate": 9.68233009708738e-06, "loss": 0.1062, "step": 199260 }, { "epoch": 77.39, "learning_rate": 9.681812297734628e-06, "loss": 0.1699, "step": 199270 }, { "epoch": 77.39, "learning_rate": 9.681294498381877e-06, "loss": 0.06, "step": 199280 }, { "epoch": 77.39, "learning_rate": 9.680776699029127e-06, "loss": 0.1057, "step": 199290 }, { "epoch": 77.4, "learning_rate": 9.680258899676377e-06, "loss": 0.117, "step": 199300 }, { "epoch": 77.4, "learning_rate": 9.679741100323624e-06, "loss": 0.0008, "step": 199310 }, { "epoch": 77.41, "learning_rate": 9.679223300970874e-06, "loss": 0.0617, "step": 199320 }, { "epoch": 77.41, "learning_rate": 9.678705501618124e-06, "loss": 0.125, "step": 199330 }, { "epoch": 77.41, "learning_rate": 9.678187702265373e-06, "loss": 0.0043, "step": 199340 }, { "epoch": 77.42, "learning_rate": 9.677669902912621e-06, "loss": 0.0152, "step": 199350 }, { "epoch": 77.42, "learning_rate": 9.677152103559871e-06, "loss": 0.013, "step": 199360 }, { "epoch": 77.43, "learning_rate": 9.67663430420712e-06, "loss": 0.0389, "step": 199370 }, { "epoch": 77.43, "learning_rate": 9.67611650485437e-06, "loss": 0.0626, "step": 199380 }, { "epoch": 77.43, "learning_rate": 9.675598705501618e-06, "loss": 0.048, "step": 199390 }, { "epoch": 77.44, "learning_rate": 9.675080906148868e-06, "loss": 0.0734, "step": 199400 }, { "epoch": 77.44, "learning_rate": 9.674563106796117e-06, "loss": 0.0831, "step": 199410 }, { "epoch": 77.44, "learning_rate": 9.674045307443365e-06, "loss": 0.027, "step": 199420 }, { "epoch": 77.45, "learning_rate": 9.673527508090615e-06, "loss": 0.0654, "step": 199430 }, { "epoch": 77.45, "learning_rate": 9.673009708737864e-06, "loss": 0.0079, "step": 199440 }, { "epoch": 77.46, "learning_rate": 9.672491909385114e-06, "loss": 0.0019, "step": 199450 }, { "epoch": 77.46, "learning_rate": 9.671974110032362e-06, "loss": 0.0299, "step": 199460 }, { "epoch": 77.46, "learning_rate": 9.671456310679612e-06, "loss": 0.0796, "step": 199470 }, { "epoch": 77.47, "learning_rate": 9.670938511326861e-06, "loss": 0.0794, "step": 199480 }, { "epoch": 77.47, "learning_rate": 9.67042071197411e-06, "loss": 0.0915, "step": 199490 }, { "epoch": 77.48, "learning_rate": 9.669902912621359e-06, "loss": 0.1295, "step": 199500 }, { "epoch": 77.48, "learning_rate": 9.669385113268608e-06, "loss": 0.094, "step": 199510 }, { "epoch": 77.48, "learning_rate": 9.668867313915858e-06, "loss": 0.0131, "step": 199520 }, { "epoch": 77.49, "learning_rate": 9.668349514563108e-06, "loss": 0.1824, "step": 199530 }, { "epoch": 77.49, "learning_rate": 9.667831715210356e-06, "loss": 0.1163, "step": 199540 }, { "epoch": 77.5, "learning_rate": 9.667313915857605e-06, "loss": 0.0915, "step": 199550 }, { "epoch": 77.5, "learning_rate": 9.666796116504855e-06, "loss": 0.0969, "step": 199560 }, { "epoch": 77.5, "learning_rate": 9.666278317152104e-06, "loss": 0.0121, "step": 199570 }, { "epoch": 77.51, "learning_rate": 9.665760517799354e-06, "loss": 0.0083, "step": 199580 }, { "epoch": 77.51, "learning_rate": 9.665242718446602e-06, "loss": 0.0937, "step": 199590 }, { "epoch": 77.51, "learning_rate": 9.664724919093852e-06, "loss": 0.1614, "step": 199600 }, { "epoch": 77.52, "learning_rate": 9.664207119741101e-06, "loss": 0.0956, "step": 199610 }, { "epoch": 77.52, "learning_rate": 9.66368932038835e-06, "loss": 0.0034, "step": 199620 }, { "epoch": 77.53, "learning_rate": 9.663171521035599e-06, "loss": 0.0405, "step": 199630 }, { "epoch": 77.53, "learning_rate": 9.662653721682848e-06, "loss": 0.0703, "step": 199640 }, { "epoch": 77.53, "learning_rate": 9.662135922330098e-06, "loss": 0.029, "step": 199650 }, { "epoch": 77.54, "learning_rate": 9.661618122977348e-06, "loss": 0.0034, "step": 199660 }, { "epoch": 77.54, "learning_rate": 9.661100323624595e-06, "loss": 0.0246, "step": 199670 }, { "epoch": 77.55, "learning_rate": 9.660582524271845e-06, "loss": 0.0008, "step": 199680 }, { "epoch": 77.55, "learning_rate": 9.660064724919095e-06, "loss": 0.1044, "step": 199690 }, { "epoch": 77.55, "learning_rate": 9.659546925566344e-06, "loss": 0.0463, "step": 199700 }, { "epoch": 77.56, "learning_rate": 9.659029126213592e-06, "loss": 0.0096, "step": 199710 }, { "epoch": 77.56, "learning_rate": 9.658511326860842e-06, "loss": 0.045, "step": 199720 }, { "epoch": 77.57, "learning_rate": 9.657993527508091e-06, "loss": 0.0274, "step": 199730 }, { "epoch": 77.57, "learning_rate": 9.657475728155341e-06, "loss": 0.0695, "step": 199740 }, { "epoch": 77.57, "learning_rate": 9.656957928802589e-06, "loss": 0.1581, "step": 199750 }, { "epoch": 77.58, "learning_rate": 9.656440129449839e-06, "loss": 0.1469, "step": 199760 }, { "epoch": 77.58, "learning_rate": 9.655922330097088e-06, "loss": 0.0542, "step": 199770 }, { "epoch": 77.58, "learning_rate": 9.655404530744338e-06, "loss": 0.0658, "step": 199780 }, { "epoch": 77.59, "learning_rate": 9.654886731391586e-06, "loss": 0.0874, "step": 199790 }, { "epoch": 77.59, "learning_rate": 9.654368932038835e-06, "loss": 0.0307, "step": 199800 }, { "epoch": 77.6, "learning_rate": 9.653851132686085e-06, "loss": 0.0722, "step": 199810 }, { "epoch": 77.6, "learning_rate": 9.653333333333335e-06, "loss": 0.0349, "step": 199820 }, { "epoch": 77.6, "learning_rate": 9.652815533980583e-06, "loss": 0.0819, "step": 199830 }, { "epoch": 77.61, "learning_rate": 9.652297734627832e-06, "loss": 0.0938, "step": 199840 }, { "epoch": 77.61, "learning_rate": 9.651779935275082e-06, "loss": 0.0473, "step": 199850 }, { "epoch": 77.62, "learning_rate": 9.651262135922331e-06, "loss": 0.0405, "step": 199860 }, { "epoch": 77.62, "learning_rate": 9.65074433656958e-06, "loss": 0.0243, "step": 199870 }, { "epoch": 77.62, "learning_rate": 9.650226537216829e-06, "loss": 0.1544, "step": 199880 }, { "epoch": 77.63, "learning_rate": 9.649708737864079e-06, "loss": 0.0863, "step": 199890 }, { "epoch": 77.63, "learning_rate": 9.649190938511328e-06, "loss": 0.027, "step": 199900 }, { "epoch": 77.63, "learning_rate": 9.648673139158576e-06, "loss": 0.0396, "step": 199910 }, { "epoch": 77.64, "learning_rate": 9.648155339805826e-06, "loss": 0.0014, "step": 199920 }, { "epoch": 77.64, "learning_rate": 9.647637540453075e-06, "loss": 0.0248, "step": 199930 }, { "epoch": 77.65, "learning_rate": 9.647119741100325e-06, "loss": 0.1174, "step": 199940 }, { "epoch": 77.65, "learning_rate": 9.646601941747573e-06, "loss": 0.0102, "step": 199950 }, { "epoch": 77.65, "learning_rate": 9.646084142394823e-06, "loss": 0.1626, "step": 199960 }, { "epoch": 77.66, "learning_rate": 9.645566343042072e-06, "loss": 0.0371, "step": 199970 }, { "epoch": 77.66, "learning_rate": 9.645048543689322e-06, "loss": 0.1089, "step": 199980 }, { "epoch": 77.67, "learning_rate": 9.64453074433657e-06, "loss": 0.01, "step": 199990 }, { "epoch": 77.67, "learning_rate": 9.64401294498382e-06, "loss": 0.0995, "step": 200000 }, { "epoch": 77.67, "learning_rate": 9.643495145631069e-06, "loss": 0.0488, "step": 200010 }, { "epoch": 77.68, "learning_rate": 9.642977346278319e-06, "loss": 0.0027, "step": 200020 }, { "epoch": 77.68, "learning_rate": 9.642459546925566e-06, "loss": 0.0096, "step": 200030 }, { "epoch": 77.69, "learning_rate": 9.641941747572816e-06, "loss": 0.0822, "step": 200040 }, { "epoch": 77.69, "learning_rate": 9.641423948220066e-06, "loss": 0.0003, "step": 200050 }, { "epoch": 77.69, "learning_rate": 9.640906148867315e-06, "loss": 0.2158, "step": 200060 }, { "epoch": 77.7, "learning_rate": 9.640388349514563e-06, "loss": 0.0653, "step": 200070 }, { "epoch": 77.7, "learning_rate": 9.639870550161813e-06, "loss": 0.0304, "step": 200080 }, { "epoch": 77.7, "learning_rate": 9.639352750809062e-06, "loss": 0.0468, "step": 200090 }, { "epoch": 77.71, "learning_rate": 9.638834951456312e-06, "loss": 0.2144, "step": 200100 }, { "epoch": 77.71, "learning_rate": 9.63831715210356e-06, "loss": 0.0135, "step": 200110 }, { "epoch": 77.72, "learning_rate": 9.63779935275081e-06, "loss": 0.0188, "step": 200120 }, { "epoch": 77.72, "learning_rate": 9.63728155339806e-06, "loss": 0.0096, "step": 200130 }, { "epoch": 77.72, "learning_rate": 9.636763754045309e-06, "loss": 0.0403, "step": 200140 }, { "epoch": 77.73, "learning_rate": 9.636245954692558e-06, "loss": 0.1316, "step": 200150 }, { "epoch": 77.73, "learning_rate": 9.635728155339806e-06, "loss": 0.0192, "step": 200160 }, { "epoch": 77.74, "learning_rate": 9.635210355987056e-06, "loss": 0.0463, "step": 200170 }, { "epoch": 77.74, "learning_rate": 9.634692556634306e-06, "loss": 0.068, "step": 200180 }, { "epoch": 77.74, "learning_rate": 9.634174757281555e-06, "loss": 0.0271, "step": 200190 }, { "epoch": 77.75, "learning_rate": 9.633656957928803e-06, "loss": 0.0721, "step": 200200 }, { "epoch": 77.75, "learning_rate": 9.633139158576053e-06, "loss": 0.0834, "step": 200210 }, { "epoch": 77.76, "learning_rate": 9.632621359223302e-06, "loss": 0.0998, "step": 200220 }, { "epoch": 77.76, "learning_rate": 9.632103559870552e-06, "loss": 0.0534, "step": 200230 }, { "epoch": 77.76, "learning_rate": 9.6315857605178e-06, "loss": 0.0552, "step": 200240 }, { "epoch": 77.77, "learning_rate": 9.63106796116505e-06, "loss": 0.0207, "step": 200250 }, { "epoch": 77.77, "learning_rate": 9.6305501618123e-06, "loss": 0.0062, "step": 200260 }, { "epoch": 77.77, "learning_rate": 9.630032362459549e-06, "loss": 0.131, "step": 200270 }, { "epoch": 77.78, "learning_rate": 9.629514563106797e-06, "loss": 0.1376, "step": 200280 }, { "epoch": 77.78, "learning_rate": 9.628996763754046e-06, "loss": 0.0591, "step": 200290 }, { "epoch": 77.79, "learning_rate": 9.628478964401296e-06, "loss": 0.0789, "step": 200300 }, { "epoch": 77.79, "learning_rate": 9.627961165048546e-06, "loss": 0.0456, "step": 200310 }, { "epoch": 77.79, "learning_rate": 9.627443365695794e-06, "loss": 0.0428, "step": 200320 }, { "epoch": 77.8, "learning_rate": 9.626925566343043e-06, "loss": 0.0025, "step": 200330 }, { "epoch": 77.8, "learning_rate": 9.626407766990293e-06, "loss": 0.0831, "step": 200340 }, { "epoch": 77.81, "learning_rate": 9.625889967637542e-06, "loss": 0.0165, "step": 200350 }, { "epoch": 77.81, "learning_rate": 9.62537216828479e-06, "loss": 0.0713, "step": 200360 }, { "epoch": 77.81, "learning_rate": 9.62485436893204e-06, "loss": 0.1156, "step": 200370 }, { "epoch": 77.82, "learning_rate": 9.62433656957929e-06, "loss": 0.0204, "step": 200380 }, { "epoch": 77.82, "learning_rate": 9.62381877022654e-06, "loss": 0.0338, "step": 200390 }, { "epoch": 77.83, "learning_rate": 9.623300970873787e-06, "loss": 0.09, "step": 200400 }, { "epoch": 77.83, "learning_rate": 9.622783171521037e-06, "loss": 0.059, "step": 200410 }, { "epoch": 77.83, "learning_rate": 9.622265372168286e-06, "loss": 0.0646, "step": 200420 }, { "epoch": 77.84, "learning_rate": 9.621747572815536e-06, "loss": 0.0042, "step": 200430 }, { "epoch": 77.84, "learning_rate": 9.621229773462784e-06, "loss": 0.0767, "step": 200440 }, { "epoch": 77.84, "learning_rate": 9.620711974110033e-06, "loss": 0.1534, "step": 200450 }, { "epoch": 77.85, "learning_rate": 9.620194174757283e-06, "loss": 0.0959, "step": 200460 }, { "epoch": 77.85, "learning_rate": 9.619676375404533e-06, "loss": 0.1026, "step": 200470 }, { "epoch": 77.86, "learning_rate": 9.61915857605178e-06, "loss": 0.1404, "step": 200480 }, { "epoch": 77.86, "learning_rate": 9.61864077669903e-06, "loss": 0.1637, "step": 200490 }, { "epoch": 77.86, "learning_rate": 9.61812297734628e-06, "loss": 0.0012, "step": 200500 }, { "epoch": 77.87, "learning_rate": 9.61760517799353e-06, "loss": 0.0088, "step": 200510 }, { "epoch": 77.87, "learning_rate": 9.617087378640777e-06, "loss": 0.1094, "step": 200520 }, { "epoch": 77.88, "learning_rate": 9.616569579288027e-06, "loss": 0.0181, "step": 200530 }, { "epoch": 77.88, "learning_rate": 9.616051779935277e-06, "loss": 0.0517, "step": 200540 }, { "epoch": 77.88, "learning_rate": 9.615533980582525e-06, "loss": 0.0264, "step": 200550 }, { "epoch": 77.89, "learning_rate": 9.615016181229774e-06, "loss": 0.0147, "step": 200560 }, { "epoch": 77.89, "learning_rate": 9.614498381877024e-06, "loss": 0.0444, "step": 200570 }, { "epoch": 77.9, "learning_rate": 9.613980582524273e-06, "loss": 0.0784, "step": 200580 }, { "epoch": 77.9, "learning_rate": 9.613462783171521e-06, "loss": 0.02, "step": 200590 }, { "epoch": 77.9, "learning_rate": 9.612944983818771e-06, "loss": 0.0883, "step": 200600 }, { "epoch": 77.91, "learning_rate": 9.61242718446602e-06, "loss": 0.0587, "step": 200610 }, { "epoch": 77.91, "learning_rate": 9.61190938511327e-06, "loss": 0.0342, "step": 200620 }, { "epoch": 77.91, "learning_rate": 9.611391585760518e-06, "loss": 0.1098, "step": 200630 }, { "epoch": 77.92, "learning_rate": 9.610873786407768e-06, "loss": 0.0721, "step": 200640 }, { "epoch": 77.92, "learning_rate": 9.610355987055017e-06, "loss": 0.1282, "step": 200650 }, { "epoch": 77.93, "learning_rate": 9.609838187702267e-06, "loss": 0.0471, "step": 200660 }, { "epoch": 77.93, "learning_rate": 9.609320388349515e-06, "loss": 0.0335, "step": 200670 }, { "epoch": 77.93, "learning_rate": 9.608802588996765e-06, "loss": 0.0836, "step": 200680 }, { "epoch": 77.94, "learning_rate": 9.608284789644014e-06, "loss": 0.0967, "step": 200690 }, { "epoch": 77.94, "learning_rate": 9.607766990291264e-06, "loss": 0.0292, "step": 200700 }, { "epoch": 77.95, "learning_rate": 9.607249190938512e-06, "loss": 0.0132, "step": 200710 }, { "epoch": 77.95, "learning_rate": 9.606731391585761e-06, "loss": 0.021, "step": 200720 }, { "epoch": 77.95, "learning_rate": 9.606213592233011e-06, "loss": 0.1618, "step": 200730 }, { "epoch": 77.96, "learning_rate": 9.605695792880259e-06, "loss": 0.0706, "step": 200740 }, { "epoch": 77.96, "learning_rate": 9.605177993527508e-06, "loss": 0.1346, "step": 200750 }, { "epoch": 77.97, "learning_rate": 9.604660194174758e-06, "loss": 0.0009, "step": 200760 }, { "epoch": 77.97, "learning_rate": 9.604142394822008e-06, "loss": 0.0367, "step": 200770 }, { "epoch": 77.97, "learning_rate": 9.603624595469256e-06, "loss": 0.1164, "step": 200780 }, { "epoch": 77.98, "learning_rate": 9.603106796116505e-06, "loss": 0.012, "step": 200790 }, { "epoch": 77.98, "learning_rate": 9.602588996763755e-06, "loss": 0.0222, "step": 200800 }, { "epoch": 77.98, "learning_rate": 9.602071197411004e-06, "loss": 0.0746, "step": 200810 }, { "epoch": 77.99, "learning_rate": 9.601553398058252e-06, "loss": 0.0677, "step": 200820 }, { "epoch": 77.99, "learning_rate": 9.601035598705502e-06, "loss": 0.04, "step": 200830 }, { "epoch": 78.0, "learning_rate": 9.600517799352752e-06, "loss": 0.0946, "step": 200840 }, { "epoch": 78.0, "learning_rate": 9.600000000000001e-06, "loss": 0.0055, "step": 200850 }, { "epoch": 78.0, "eval_accuracy": 0.9507565337001376, "eval_loss": 0.36534062027931213, "eval_runtime": 8.2121, "eval_samples_per_second": 442.637, "eval_steps_per_second": 55.406, "step": 200850 }, { "epoch": 78.0, "learning_rate": 9.59948220064725e-06, "loss": 0.0779, "step": 200860 }, { "epoch": 78.01, "learning_rate": 9.598964401294499e-06, "loss": 0.095, "step": 200870 }, { "epoch": 78.01, "learning_rate": 9.598446601941748e-06, "loss": 0.107, "step": 200880 }, { "epoch": 78.02, "learning_rate": 9.597928802588996e-06, "loss": 0.1115, "step": 200890 }, { "epoch": 78.02, "learning_rate": 9.597411003236246e-06, "loss": 0.153, "step": 200900 }, { "epoch": 78.02, "learning_rate": 9.596893203883496e-06, "loss": 0.0514, "step": 200910 }, { "epoch": 78.03, "learning_rate": 9.596375404530745e-06, "loss": 0.0118, "step": 200920 }, { "epoch": 78.03, "learning_rate": 9.595857605177993e-06, "loss": 0.056, "step": 200930 }, { "epoch": 78.03, "learning_rate": 9.595339805825243e-06, "loss": 0.0336, "step": 200940 }, { "epoch": 78.04, "learning_rate": 9.594822006472492e-06, "loss": 0.0412, "step": 200950 }, { "epoch": 78.04, "learning_rate": 9.594304207119742e-06, "loss": 0.0638, "step": 200960 }, { "epoch": 78.05, "learning_rate": 9.59378640776699e-06, "loss": 0.0609, "step": 200970 }, { "epoch": 78.05, "learning_rate": 9.59326860841424e-06, "loss": 0.0672, "step": 200980 }, { "epoch": 78.05, "learning_rate": 9.592750809061489e-06, "loss": 0.1036, "step": 200990 }, { "epoch": 78.06, "learning_rate": 9.592233009708739e-06, "loss": 0.0163, "step": 201000 }, { "epoch": 78.06, "learning_rate": 9.591715210355987e-06, "loss": 0.0105, "step": 201010 }, { "epoch": 78.07, "learning_rate": 9.591197411003236e-06, "loss": 0.0137, "step": 201020 }, { "epoch": 78.07, "learning_rate": 9.590679611650486e-06, "loss": 0.0015, "step": 201030 }, { "epoch": 78.07, "learning_rate": 9.590161812297736e-06, "loss": 0.0531, "step": 201040 }, { "epoch": 78.08, "learning_rate": 9.589644012944983e-06, "loss": 0.1231, "step": 201050 }, { "epoch": 78.08, "learning_rate": 9.589126213592233e-06, "loss": 0.0237, "step": 201060 }, { "epoch": 78.09, "learning_rate": 9.588608414239483e-06, "loss": 0.0499, "step": 201070 }, { "epoch": 78.09, "learning_rate": 9.588090614886732e-06, "loss": 0.1048, "step": 201080 }, { "epoch": 78.09, "learning_rate": 9.58757281553398e-06, "loss": 0.1215, "step": 201090 }, { "epoch": 78.1, "learning_rate": 9.58705501618123e-06, "loss": 0.1036, "step": 201100 }, { "epoch": 78.1, "learning_rate": 9.58653721682848e-06, "loss": 0.0824, "step": 201110 }, { "epoch": 78.1, "learning_rate": 9.586019417475729e-06, "loss": 0.0212, "step": 201120 }, { "epoch": 78.11, "learning_rate": 9.585501618122977e-06, "loss": 0.0559, "step": 201130 }, { "epoch": 78.11, "learning_rate": 9.584983818770227e-06, "loss": 0.2121, "step": 201140 }, { "epoch": 78.12, "learning_rate": 9.584466019417476e-06, "loss": 0.0083, "step": 201150 }, { "epoch": 78.12, "learning_rate": 9.583948220064726e-06, "loss": 0.0503, "step": 201160 }, { "epoch": 78.12, "learning_rate": 9.583430420711974e-06, "loss": 0.051, "step": 201170 }, { "epoch": 78.13, "learning_rate": 9.582912621359223e-06, "loss": 0.1026, "step": 201180 }, { "epoch": 78.13, "learning_rate": 9.582394822006473e-06, "loss": 0.0143, "step": 201190 }, { "epoch": 78.14, "learning_rate": 9.581877022653723e-06, "loss": 0.0014, "step": 201200 }, { "epoch": 78.14, "learning_rate": 9.58135922330097e-06, "loss": 0.0295, "step": 201210 }, { "epoch": 78.14, "learning_rate": 9.58084142394822e-06, "loss": 0.0004, "step": 201220 }, { "epoch": 78.15, "learning_rate": 9.58032362459547e-06, "loss": 0.0008, "step": 201230 }, { "epoch": 78.15, "learning_rate": 9.57980582524272e-06, "loss": 0.0735, "step": 201240 }, { "epoch": 78.16, "learning_rate": 9.579288025889967e-06, "loss": 0.0074, "step": 201250 }, { "epoch": 78.16, "learning_rate": 9.578770226537217e-06, "loss": 0.0009, "step": 201260 }, { "epoch": 78.16, "learning_rate": 9.578252427184467e-06, "loss": 0.1236, "step": 201270 }, { "epoch": 78.17, "learning_rate": 9.577734627831716e-06, "loss": 0.0111, "step": 201280 }, { "epoch": 78.17, "learning_rate": 9.577216828478966e-06, "loss": 0.0358, "step": 201290 }, { "epoch": 78.17, "learning_rate": 9.576699029126214e-06, "loss": 0.0171, "step": 201300 }, { "epoch": 78.18, "learning_rate": 9.576181229773463e-06, "loss": 0.002, "step": 201310 }, { "epoch": 78.18, "learning_rate": 9.575663430420713e-06, "loss": 0.0341, "step": 201320 }, { "epoch": 78.19, "learning_rate": 9.575145631067963e-06, "loss": 0.1217, "step": 201330 }, { "epoch": 78.19, "learning_rate": 9.57462783171521e-06, "loss": 0.0121, "step": 201340 }, { "epoch": 78.19, "learning_rate": 9.57411003236246e-06, "loss": 0.0964, "step": 201350 }, { "epoch": 78.2, "learning_rate": 9.57359223300971e-06, "loss": 0.0011, "step": 201360 }, { "epoch": 78.2, "learning_rate": 9.57307443365696e-06, "loss": 0.0413, "step": 201370 }, { "epoch": 78.21, "learning_rate": 9.572556634304207e-06, "loss": 0.0321, "step": 201380 }, { "epoch": 78.21, "learning_rate": 9.572038834951457e-06, "loss": 0.0676, "step": 201390 }, { "epoch": 78.21, "learning_rate": 9.571521035598707e-06, "loss": 0.0256, "step": 201400 }, { "epoch": 78.22, "learning_rate": 9.571003236245956e-06, "loss": 0.0048, "step": 201410 }, { "epoch": 78.22, "learning_rate": 9.570485436893204e-06, "loss": 0.1065, "step": 201420 }, { "epoch": 78.23, "learning_rate": 9.569967637540454e-06, "loss": 0.0668, "step": 201430 }, { "epoch": 78.23, "learning_rate": 9.569449838187703e-06, "loss": 0.0308, "step": 201440 }, { "epoch": 78.23, "learning_rate": 9.568932038834953e-06, "loss": 0.0195, "step": 201450 }, { "epoch": 78.24, "learning_rate": 9.568414239482201e-06, "loss": 0.1277, "step": 201460 }, { "epoch": 78.24, "learning_rate": 9.56789644012945e-06, "loss": 0.0897, "step": 201470 }, { "epoch": 78.24, "learning_rate": 9.5673786407767e-06, "loss": 0.0873, "step": 201480 }, { "epoch": 78.25, "learning_rate": 9.56686084142395e-06, "loss": 0.0118, "step": 201490 }, { "epoch": 78.25, "learning_rate": 9.566343042071198e-06, "loss": 0.1278, "step": 201500 }, { "epoch": 78.26, "learning_rate": 9.565825242718447e-06, "loss": 0.1043, "step": 201510 }, { "epoch": 78.26, "learning_rate": 9.565307443365697e-06, "loss": 0.0449, "step": 201520 }, { "epoch": 78.26, "learning_rate": 9.564789644012946e-06, "loss": 0.0062, "step": 201530 }, { "epoch": 78.27, "learning_rate": 9.564271844660194e-06, "loss": 0.0303, "step": 201540 }, { "epoch": 78.27, "learning_rate": 9.563754045307444e-06, "loss": 0.0546, "step": 201550 }, { "epoch": 78.28, "learning_rate": 9.563236245954694e-06, "loss": 0.0175, "step": 201560 }, { "epoch": 78.28, "learning_rate": 9.562718446601943e-06, "loss": 0.0059, "step": 201570 }, { "epoch": 78.28, "learning_rate": 9.562200647249191e-06, "loss": 0.1638, "step": 201580 }, { "epoch": 78.29, "learning_rate": 9.56168284789644e-06, "loss": 0.0789, "step": 201590 }, { "epoch": 78.29, "learning_rate": 9.56116504854369e-06, "loss": 0.1419, "step": 201600 }, { "epoch": 78.3, "learning_rate": 9.56064724919094e-06, "loss": 0.056, "step": 201610 }, { "epoch": 78.3, "learning_rate": 9.560129449838188e-06, "loss": 0.0284, "step": 201620 }, { "epoch": 78.3, "learning_rate": 9.559611650485438e-06, "loss": 0.0095, "step": 201630 }, { "epoch": 78.31, "learning_rate": 9.559093851132687e-06, "loss": 0.067, "step": 201640 }, { "epoch": 78.31, "learning_rate": 9.558576051779937e-06, "loss": 0.1047, "step": 201650 }, { "epoch": 78.31, "learning_rate": 9.558058252427185e-06, "loss": 0.0016, "step": 201660 }, { "epoch": 78.32, "learning_rate": 9.557540453074434e-06, "loss": 0.0404, "step": 201670 }, { "epoch": 78.32, "learning_rate": 9.557022653721684e-06, "loss": 0.0386, "step": 201680 }, { "epoch": 78.33, "learning_rate": 9.556504854368934e-06, "loss": 0.0266, "step": 201690 }, { "epoch": 78.33, "learning_rate": 9.555987055016182e-06, "loss": 0.0586, "step": 201700 }, { "epoch": 78.33, "learning_rate": 9.555469255663431e-06, "loss": 0.0505, "step": 201710 }, { "epoch": 78.34, "learning_rate": 9.55495145631068e-06, "loss": 0.2207, "step": 201720 }, { "epoch": 78.34, "learning_rate": 9.55443365695793e-06, "loss": 0.021, "step": 201730 }, { "epoch": 78.35, "learning_rate": 9.553915857605178e-06, "loss": 0.0674, "step": 201740 }, { "epoch": 78.35, "learning_rate": 9.553398058252428e-06, "loss": 0.0953, "step": 201750 }, { "epoch": 78.35, "learning_rate": 9.552880258899678e-06, "loss": 0.0167, "step": 201760 }, { "epoch": 78.36, "learning_rate": 9.552362459546927e-06, "loss": 0.068, "step": 201770 }, { "epoch": 78.36, "learning_rate": 9.551844660194175e-06, "loss": 0.0209, "step": 201780 }, { "epoch": 78.37, "learning_rate": 9.551326860841425e-06, "loss": 0.0109, "step": 201790 }, { "epoch": 78.37, "learning_rate": 9.550809061488674e-06, "loss": 0.0478, "step": 201800 }, { "epoch": 78.37, "learning_rate": 9.550291262135924e-06, "loss": 0.0004, "step": 201810 }, { "epoch": 78.38, "learning_rate": 9.549773462783174e-06, "loss": 0.0771, "step": 201820 }, { "epoch": 78.38, "learning_rate": 9.549255663430421e-06, "loss": 0.1698, "step": 201830 }, { "epoch": 78.38, "learning_rate": 9.548737864077671e-06, "loss": 0.0782, "step": 201840 }, { "epoch": 78.39, "learning_rate": 9.54822006472492e-06, "loss": 0.0365, "step": 201850 }, { "epoch": 78.39, "learning_rate": 9.54770226537217e-06, "loss": 0.1406, "step": 201860 }, { "epoch": 78.4, "learning_rate": 9.547184466019418e-06, "loss": 0.1312, "step": 201870 }, { "epoch": 78.4, "learning_rate": 9.546666666666668e-06, "loss": 0.0169, "step": 201880 }, { "epoch": 78.4, "learning_rate": 9.546148867313917e-06, "loss": 0.0269, "step": 201890 }, { "epoch": 78.41, "learning_rate": 9.545631067961167e-06, "loss": 0.0433, "step": 201900 }, { "epoch": 78.41, "learning_rate": 9.545113268608415e-06, "loss": 0.3131, "step": 201910 }, { "epoch": 78.42, "learning_rate": 9.544595469255665e-06, "loss": 0.007, "step": 201920 }, { "epoch": 78.42, "learning_rate": 9.544077669902914e-06, "loss": 0.1338, "step": 201930 }, { "epoch": 78.42, "learning_rate": 9.543559870550164e-06, "loss": 0.0357, "step": 201940 }, { "epoch": 78.43, "learning_rate": 9.543042071197412e-06, "loss": 0.0385, "step": 201950 }, { "epoch": 78.43, "learning_rate": 9.542524271844661e-06, "loss": 0.0562, "step": 201960 }, { "epoch": 78.43, "learning_rate": 9.542006472491911e-06, "loss": 0.0997, "step": 201970 }, { "epoch": 78.44, "learning_rate": 9.54148867313916e-06, "loss": 0.088, "step": 201980 }, { "epoch": 78.44, "learning_rate": 9.540970873786409e-06, "loss": 0.0048, "step": 201990 }, { "epoch": 78.45, "learning_rate": 9.540453074433658e-06, "loss": 0.1222, "step": 202000 }, { "epoch": 78.45, "learning_rate": 9.539935275080908e-06, "loss": 0.0402, "step": 202010 }, { "epoch": 78.45, "learning_rate": 9.539417475728156e-06, "loss": 0.0481, "step": 202020 }, { "epoch": 78.46, "learning_rate": 9.538899676375405e-06, "loss": 0.026, "step": 202030 }, { "epoch": 78.46, "learning_rate": 9.538381877022655e-06, "loss": 0.02, "step": 202040 }, { "epoch": 78.47, "learning_rate": 9.537864077669905e-06, "loss": 0.03, "step": 202050 }, { "epoch": 78.47, "learning_rate": 9.537346278317153e-06, "loss": 0.1672, "step": 202060 }, { "epoch": 78.47, "learning_rate": 9.536828478964402e-06, "loss": 0.015, "step": 202070 }, { "epoch": 78.48, "learning_rate": 9.536310679611652e-06, "loss": 0.141, "step": 202080 }, { "epoch": 78.48, "learning_rate": 9.535792880258901e-06, "loss": 0.054, "step": 202090 }, { "epoch": 78.49, "learning_rate": 9.53527508090615e-06, "loss": 0.0587, "step": 202100 }, { "epoch": 78.49, "learning_rate": 9.534757281553399e-06, "loss": 0.1602, "step": 202110 }, { "epoch": 78.49, "learning_rate": 9.534239482200649e-06, "loss": 0.0943, "step": 202120 }, { "epoch": 78.5, "learning_rate": 9.533721682847898e-06, "loss": 0.0348, "step": 202130 }, { "epoch": 78.5, "learning_rate": 9.533203883495146e-06, "loss": 0.1064, "step": 202140 }, { "epoch": 78.5, "learning_rate": 9.532686084142396e-06, "loss": 0.0455, "step": 202150 }, { "epoch": 78.51, "learning_rate": 9.532168284789645e-06, "loss": 0.1528, "step": 202160 }, { "epoch": 78.51, "learning_rate": 9.531650485436893e-06, "loss": 0.0234, "step": 202170 }, { "epoch": 78.52, "learning_rate": 9.531132686084143e-06, "loss": 0.085, "step": 202180 }, { "epoch": 78.52, "learning_rate": 9.530614886731392e-06, "loss": 0.0751, "step": 202190 }, { "epoch": 78.52, "learning_rate": 9.530097087378642e-06, "loss": 0.0284, "step": 202200 }, { "epoch": 78.53, "learning_rate": 9.52957928802589e-06, "loss": 0.1681, "step": 202210 }, { "epoch": 78.53, "learning_rate": 9.52906148867314e-06, "loss": 0.0205, "step": 202220 }, { "epoch": 78.54, "learning_rate": 9.52854368932039e-06, "loss": 0.1132, "step": 202230 }, { "epoch": 78.54, "learning_rate": 9.528025889967639e-06, "loss": 0.0539, "step": 202240 }, { "epoch": 78.54, "learning_rate": 9.527508090614887e-06, "loss": 0.1909, "step": 202250 }, { "epoch": 78.55, "learning_rate": 9.526990291262136e-06, "loss": 0.0579, "step": 202260 }, { "epoch": 78.55, "learning_rate": 9.526472491909386e-06, "loss": 0.0678, "step": 202270 }, { "epoch": 78.56, "learning_rate": 9.525954692556636e-06, "loss": 0.0003, "step": 202280 }, { "epoch": 78.56, "learning_rate": 9.525436893203884e-06, "loss": 0.0228, "step": 202290 }, { "epoch": 78.56, "learning_rate": 9.524919093851133e-06, "loss": 0.0261, "step": 202300 }, { "epoch": 78.57, "learning_rate": 9.524401294498383e-06, "loss": 0.0643, "step": 202310 }, { "epoch": 78.57, "learning_rate": 9.523883495145632e-06, "loss": 0.1013, "step": 202320 }, { "epoch": 78.57, "learning_rate": 9.52336569579288e-06, "loss": 0.025, "step": 202330 }, { "epoch": 78.58, "learning_rate": 9.52284789644013e-06, "loss": 0.0003, "step": 202340 }, { "epoch": 78.58, "learning_rate": 9.52233009708738e-06, "loss": 0.075, "step": 202350 }, { "epoch": 78.59, "learning_rate": 9.521812297734628e-06, "loss": 0.0434, "step": 202360 }, { "epoch": 78.59, "learning_rate": 9.521294498381877e-06, "loss": 0.0556, "step": 202370 }, { "epoch": 78.59, "learning_rate": 9.520776699029127e-06, "loss": 0.0928, "step": 202380 }, { "epoch": 78.6, "learning_rate": 9.520258899676376e-06, "loss": 0.126, "step": 202390 }, { "epoch": 78.6, "learning_rate": 9.519741100323624e-06, "loss": 0.0267, "step": 202400 }, { "epoch": 78.61, "learning_rate": 9.519223300970874e-06, "loss": 0.0683, "step": 202410 }, { "epoch": 78.61, "learning_rate": 9.518705501618124e-06, "loss": 0.0825, "step": 202420 }, { "epoch": 78.61, "learning_rate": 9.518187702265373e-06, "loss": 0.0492, "step": 202430 }, { "epoch": 78.62, "learning_rate": 9.517669902912621e-06, "loss": 0.057, "step": 202440 }, { "epoch": 78.62, "learning_rate": 9.51715210355987e-06, "loss": 0.0022, "step": 202450 }, { "epoch": 78.63, "learning_rate": 9.51663430420712e-06, "loss": 0.0499, "step": 202460 }, { "epoch": 78.63, "learning_rate": 9.51611650485437e-06, "loss": 0.0014, "step": 202470 }, { "epoch": 78.63, "learning_rate": 9.515598705501618e-06, "loss": 0.0425, "step": 202480 }, { "epoch": 78.64, "learning_rate": 9.515080906148867e-06, "loss": 0.1058, "step": 202490 }, { "epoch": 78.64, "learning_rate": 9.514563106796117e-06, "loss": 0.0913, "step": 202500 }, { "epoch": 78.64, "learning_rate": 9.514045307443367e-06, "loss": 0.027, "step": 202510 }, { "epoch": 78.65, "learning_rate": 9.513527508090615e-06, "loss": 0.0787, "step": 202520 }, { "epoch": 78.65, "learning_rate": 9.513009708737864e-06, "loss": 0.1203, "step": 202530 }, { "epoch": 78.66, "learning_rate": 9.512491909385114e-06, "loss": 0.047, "step": 202540 }, { "epoch": 78.66, "learning_rate": 9.511974110032363e-06, "loss": 0.0011, "step": 202550 }, { "epoch": 78.66, "learning_rate": 9.511456310679611e-06, "loss": 0.0698, "step": 202560 }, { "epoch": 78.67, "learning_rate": 9.510938511326861e-06, "loss": 0.0651, "step": 202570 }, { "epoch": 78.67, "learning_rate": 9.51042071197411e-06, "loss": 0.0631, "step": 202580 }, { "epoch": 78.68, "learning_rate": 9.50990291262136e-06, "loss": 0.0101, "step": 202590 }, { "epoch": 78.68, "learning_rate": 9.509385113268608e-06, "loss": 0.0445, "step": 202600 }, { "epoch": 78.68, "learning_rate": 9.508867313915858e-06, "loss": 0.0536, "step": 202610 }, { "epoch": 78.69, "learning_rate": 9.508349514563107e-06, "loss": 0.035, "step": 202620 }, { "epoch": 78.69, "learning_rate": 9.507831715210357e-06, "loss": 0.0666, "step": 202630 }, { "epoch": 78.7, "learning_rate": 9.507313915857605e-06, "loss": 0.1132, "step": 202640 }, { "epoch": 78.7, "learning_rate": 9.506796116504855e-06, "loss": 0.0834, "step": 202650 }, { "epoch": 78.7, "learning_rate": 9.506278317152104e-06, "loss": 0.1332, "step": 202660 }, { "epoch": 78.71, "learning_rate": 9.505760517799354e-06, "loss": 0.0664, "step": 202670 }, { "epoch": 78.71, "learning_rate": 9.505242718446602e-06, "loss": 0.0582, "step": 202680 }, { "epoch": 78.71, "learning_rate": 9.504724919093851e-06, "loss": 0.1476, "step": 202690 }, { "epoch": 78.72, "learning_rate": 9.504207119741101e-06, "loss": 0.0935, "step": 202700 }, { "epoch": 78.72, "learning_rate": 9.50368932038835e-06, "loss": 0.1115, "step": 202710 }, { "epoch": 78.73, "learning_rate": 9.503171521035599e-06, "loss": 0.047, "step": 202720 }, { "epoch": 78.73, "learning_rate": 9.502653721682848e-06, "loss": 0.0031, "step": 202730 }, { "epoch": 78.73, "learning_rate": 9.502135922330098e-06, "loss": 0.0584, "step": 202740 }, { "epoch": 78.74, "learning_rate": 9.501618122977347e-06, "loss": 0.0966, "step": 202750 }, { "epoch": 78.74, "learning_rate": 9.501100323624595e-06, "loss": 0.1354, "step": 202760 }, { "epoch": 78.75, "learning_rate": 9.500582524271845e-06, "loss": 0.0788, "step": 202770 }, { "epoch": 78.75, "learning_rate": 9.500064724919095e-06, "loss": 0.0725, "step": 202780 }, { "epoch": 78.75, "learning_rate": 9.499546925566344e-06, "loss": 0.1238, "step": 202790 }, { "epoch": 78.76, "learning_rate": 9.499029126213592e-06, "loss": 0.0355, "step": 202800 }, { "epoch": 78.76, "learning_rate": 9.498511326860842e-06, "loss": 0.0259, "step": 202810 }, { "epoch": 78.77, "learning_rate": 9.497993527508091e-06, "loss": 0.0919, "step": 202820 }, { "epoch": 78.77, "learning_rate": 9.497475728155341e-06, "loss": 0.0402, "step": 202830 }, { "epoch": 78.77, "learning_rate": 9.496957928802589e-06, "loss": 0.1655, "step": 202840 }, { "epoch": 78.78, "learning_rate": 9.496440129449838e-06, "loss": 0.0887, "step": 202850 }, { "epoch": 78.78, "learning_rate": 9.495922330097088e-06, "loss": 0.1879, "step": 202860 }, { "epoch": 78.78, "learning_rate": 9.495404530744338e-06, "loss": 0.0686, "step": 202870 }, { "epoch": 78.79, "learning_rate": 9.494886731391586e-06, "loss": 0.0364, "step": 202880 }, { "epoch": 78.79, "learning_rate": 9.494368932038835e-06, "loss": 0.0234, "step": 202890 }, { "epoch": 78.8, "learning_rate": 9.493851132686085e-06, "loss": 0.1594, "step": 202900 }, { "epoch": 78.8, "learning_rate": 9.493333333333334e-06, "loss": 0.0551, "step": 202910 }, { "epoch": 78.8, "learning_rate": 9.492815533980582e-06, "loss": 0.0562, "step": 202920 }, { "epoch": 78.81, "learning_rate": 9.492297734627832e-06, "loss": 0.4693, "step": 202930 }, { "epoch": 78.81, "learning_rate": 9.491779935275082e-06, "loss": 0.0929, "step": 202940 }, { "epoch": 78.82, "learning_rate": 9.491262135922331e-06, "loss": 0.1009, "step": 202950 }, { "epoch": 78.82, "learning_rate": 9.490744336569581e-06, "loss": 0.1005, "step": 202960 }, { "epoch": 78.82, "learning_rate": 9.490226537216829e-06, "loss": 0.0393, "step": 202970 }, { "epoch": 78.83, "learning_rate": 9.489708737864078e-06, "loss": 0.0623, "step": 202980 }, { "epoch": 78.83, "learning_rate": 9.489190938511328e-06, "loss": 0.0271, "step": 202990 }, { "epoch": 78.83, "learning_rate": 9.488673139158578e-06, "loss": 0.0198, "step": 203000 }, { "epoch": 78.84, "learning_rate": 9.488155339805826e-06, "loss": 0.0465, "step": 203010 }, { "epoch": 78.84, "learning_rate": 9.487637540453075e-06, "loss": 0.0123, "step": 203020 }, { "epoch": 78.85, "learning_rate": 9.487119741100325e-06, "loss": 0.0394, "step": 203030 }, { "epoch": 78.85, "learning_rate": 9.486601941747574e-06, "loss": 0.1156, "step": 203040 }, { "epoch": 78.85, "learning_rate": 9.486084142394822e-06, "loss": 0.0494, "step": 203050 }, { "epoch": 78.86, "learning_rate": 9.485566343042072e-06, "loss": 0.2171, "step": 203060 }, { "epoch": 78.86, "learning_rate": 9.485048543689322e-06, "loss": 0.0489, "step": 203070 }, { "epoch": 78.87, "learning_rate": 9.484530744336571e-06, "loss": 0.0827, "step": 203080 }, { "epoch": 78.87, "learning_rate": 9.484012944983819e-06, "loss": 0.0045, "step": 203090 }, { "epoch": 78.87, "learning_rate": 9.483495145631069e-06, "loss": 0.0045, "step": 203100 }, { "epoch": 78.88, "learning_rate": 9.482977346278318e-06, "loss": 0.0674, "step": 203110 }, { "epoch": 78.88, "learning_rate": 9.482459546925568e-06, "loss": 0.0246, "step": 203120 }, { "epoch": 78.89, "learning_rate": 9.481941747572816e-06, "loss": 0.0093, "step": 203130 }, { "epoch": 78.89, "learning_rate": 9.481423948220066e-06, "loss": 0.09, "step": 203140 }, { "epoch": 78.89, "learning_rate": 9.480906148867315e-06, "loss": 0.0052, "step": 203150 }, { "epoch": 78.9, "learning_rate": 9.480388349514565e-06, "loss": 0.1577, "step": 203160 }, { "epoch": 78.9, "learning_rate": 9.479870550161813e-06, "loss": 0.1233, "step": 203170 }, { "epoch": 78.9, "learning_rate": 9.479352750809062e-06, "loss": 0.1578, "step": 203180 }, { "epoch": 78.91, "learning_rate": 9.478834951456312e-06, "loss": 0.064, "step": 203190 }, { "epoch": 78.91, "learning_rate": 9.478317152103562e-06, "loss": 0.031, "step": 203200 }, { "epoch": 78.92, "learning_rate": 9.47779935275081e-06, "loss": 0.0732, "step": 203210 }, { "epoch": 78.92, "learning_rate": 9.477281553398059e-06, "loss": 0.0991, "step": 203220 }, { "epoch": 78.92, "learning_rate": 9.476763754045309e-06, "loss": 0.0566, "step": 203230 }, { "epoch": 78.93, "learning_rate": 9.476245954692558e-06, "loss": 0.0271, "step": 203240 }, { "epoch": 78.93, "learning_rate": 9.475728155339806e-06, "loss": 0.0617, "step": 203250 }, { "epoch": 78.94, "learning_rate": 9.475210355987056e-06, "loss": 0.0155, "step": 203260 }, { "epoch": 78.94, "learning_rate": 9.474692556634305e-06, "loss": 0.0194, "step": 203270 }, { "epoch": 78.94, "learning_rate": 9.474174757281555e-06, "loss": 0.0711, "step": 203280 }, { "epoch": 78.95, "learning_rate": 9.473656957928803e-06, "loss": 0.0493, "step": 203290 }, { "epoch": 78.95, "learning_rate": 9.473139158576053e-06, "loss": 0.0831, "step": 203300 }, { "epoch": 78.96, "learning_rate": 9.472621359223302e-06, "loss": 0.002, "step": 203310 }, { "epoch": 78.96, "learning_rate": 9.472103559870552e-06, "loss": 0.0491, "step": 203320 }, { "epoch": 78.96, "learning_rate": 9.4715857605178e-06, "loss": 0.0478, "step": 203330 }, { "epoch": 78.97, "learning_rate": 9.47106796116505e-06, "loss": 0.0745, "step": 203340 }, { "epoch": 78.97, "learning_rate": 9.470550161812299e-06, "loss": 0.079, "step": 203350 }, { "epoch": 78.97, "learning_rate": 9.470032362459549e-06, "loss": 0.2838, "step": 203360 }, { "epoch": 78.98, "learning_rate": 9.469514563106797e-06, "loss": 0.2464, "step": 203370 }, { "epoch": 78.98, "learning_rate": 9.468996763754046e-06, "loss": 0.0393, "step": 203380 }, { "epoch": 78.99, "learning_rate": 9.468478964401296e-06, "loss": 0.0001, "step": 203390 }, { "epoch": 78.99, "learning_rate": 9.467961165048545e-06, "loss": 0.0093, "step": 203400 }, { "epoch": 78.99, "learning_rate": 9.467443365695793e-06, "loss": 0.0123, "step": 203410 }, { "epoch": 79.0, "learning_rate": 9.466925566343043e-06, "loss": 0.1442, "step": 203420 }, { "epoch": 79.0, "eval_accuracy": 0.9529573590096286, "eval_loss": 0.34370800852775574, "eval_runtime": 8.2133, "eval_samples_per_second": 442.572, "eval_steps_per_second": 55.398, "step": 203425 }, { "epoch": 79.0, "learning_rate": 9.466407766990293e-06, "loss": 0.0215, "step": 203430 }, { "epoch": 79.01, "learning_rate": 9.465889967637542e-06, "loss": 0.0243, "step": 203440 }, { "epoch": 79.01, "learning_rate": 9.46537216828479e-06, "loss": 0.0405, "step": 203450 }, { "epoch": 79.01, "learning_rate": 9.46485436893204e-06, "loss": 0.0826, "step": 203460 }, { "epoch": 79.02, "learning_rate": 9.46433656957929e-06, "loss": 0.0668, "step": 203470 }, { "epoch": 79.02, "learning_rate": 9.463818770226539e-06, "loss": 0.0086, "step": 203480 }, { "epoch": 79.03, "learning_rate": 9.463300970873787e-06, "loss": 0.0773, "step": 203490 }, { "epoch": 79.03, "learning_rate": 9.462783171521037e-06, "loss": 0.091, "step": 203500 }, { "epoch": 79.03, "learning_rate": 9.462265372168286e-06, "loss": 0.0131, "step": 203510 }, { "epoch": 79.04, "learning_rate": 9.461747572815536e-06, "loss": 0.0552, "step": 203520 }, { "epoch": 79.04, "learning_rate": 9.461229773462784e-06, "loss": 0.1489, "step": 203530 }, { "epoch": 79.04, "learning_rate": 9.460711974110033e-06, "loss": 0.2338, "step": 203540 }, { "epoch": 79.05, "learning_rate": 9.460194174757283e-06, "loss": 0.0773, "step": 203550 }, { "epoch": 79.05, "learning_rate": 9.459676375404533e-06, "loss": 0.0177, "step": 203560 }, { "epoch": 79.06, "learning_rate": 9.45915857605178e-06, "loss": 0.0946, "step": 203570 }, { "epoch": 79.06, "learning_rate": 9.45864077669903e-06, "loss": 0.0895, "step": 203580 }, { "epoch": 79.06, "learning_rate": 9.45812297734628e-06, "loss": 0.0428, "step": 203590 }, { "epoch": 79.07, "learning_rate": 9.45760517799353e-06, "loss": 0.012, "step": 203600 }, { "epoch": 79.07, "learning_rate": 9.457087378640777e-06, "loss": 0.0153, "step": 203610 }, { "epoch": 79.08, "learning_rate": 9.456569579288027e-06, "loss": 0.0332, "step": 203620 }, { "epoch": 79.08, "learning_rate": 9.456051779935276e-06, "loss": 0.0001, "step": 203630 }, { "epoch": 79.08, "learning_rate": 9.455533980582524e-06, "loss": 0.0975, "step": 203640 }, { "epoch": 79.09, "learning_rate": 9.455016181229774e-06, "loss": 0.0544, "step": 203650 }, { "epoch": 79.09, "learning_rate": 9.454498381877024e-06, "loss": 0.1182, "step": 203660 }, { "epoch": 79.1, "learning_rate": 9.453980582524273e-06, "loss": 0.0481, "step": 203670 }, { "epoch": 79.1, "learning_rate": 9.453462783171521e-06, "loss": 0.0123, "step": 203680 }, { "epoch": 79.1, "learning_rate": 9.45294498381877e-06, "loss": 0.1767, "step": 203690 }, { "epoch": 79.11, "learning_rate": 9.45242718446602e-06, "loss": 0.1113, "step": 203700 }, { "epoch": 79.11, "learning_rate": 9.45190938511327e-06, "loss": 0.0023, "step": 203710 }, { "epoch": 79.11, "learning_rate": 9.451391585760518e-06, "loss": 0.0117, "step": 203720 }, { "epoch": 79.12, "learning_rate": 9.450873786407768e-06, "loss": 0.0651, "step": 203730 }, { "epoch": 79.12, "learning_rate": 9.450355987055017e-06, "loss": 0.0793, "step": 203740 }, { "epoch": 79.13, "learning_rate": 9.449838187702267e-06, "loss": 0.0913, "step": 203750 }, { "epoch": 79.13, "learning_rate": 9.449320388349515e-06, "loss": 0.008, "step": 203760 }, { "epoch": 79.13, "learning_rate": 9.448802588996764e-06, "loss": 0.0823, "step": 203770 }, { "epoch": 79.14, "learning_rate": 9.448284789644014e-06, "loss": 0.1207, "step": 203780 }, { "epoch": 79.14, "learning_rate": 9.447766990291264e-06, "loss": 0.0821, "step": 203790 }, { "epoch": 79.15, "learning_rate": 9.447249190938512e-06, "loss": 0.0655, "step": 203800 }, { "epoch": 79.15, "learning_rate": 9.446731391585761e-06, "loss": 0.1306, "step": 203810 }, { "epoch": 79.15, "learning_rate": 9.44621359223301e-06, "loss": 0.0578, "step": 203820 }, { "epoch": 79.16, "learning_rate": 9.445695792880259e-06, "loss": 0.0612, "step": 203830 }, { "epoch": 79.16, "learning_rate": 9.445177993527508e-06, "loss": 0.0676, "step": 203840 }, { "epoch": 79.17, "learning_rate": 9.444660194174758e-06, "loss": 0.0574, "step": 203850 }, { "epoch": 79.17, "learning_rate": 9.444142394822008e-06, "loss": 0.0633, "step": 203860 }, { "epoch": 79.17, "learning_rate": 9.443624595469255e-06, "loss": 0.1024, "step": 203870 }, { "epoch": 79.18, "learning_rate": 9.443106796116505e-06, "loss": 0.0316, "step": 203880 }, { "epoch": 79.18, "learning_rate": 9.442588996763755e-06, "loss": 0.0098, "step": 203890 }, { "epoch": 79.18, "learning_rate": 9.442071197411004e-06, "loss": 0.0003, "step": 203900 }, { "epoch": 79.19, "learning_rate": 9.441553398058252e-06, "loss": 0.0588, "step": 203910 }, { "epoch": 79.19, "learning_rate": 9.441035598705502e-06, "loss": 0.0797, "step": 203920 }, { "epoch": 79.2, "learning_rate": 9.440517799352751e-06, "loss": 0.0092, "step": 203930 }, { "epoch": 79.2, "learning_rate": 9.440000000000001e-06, "loss": 0.0717, "step": 203940 }, { "epoch": 79.2, "learning_rate": 9.439482200647249e-06, "loss": 0.0446, "step": 203950 }, { "epoch": 79.21, "learning_rate": 9.438964401294499e-06, "loss": 0.0004, "step": 203960 }, { "epoch": 79.21, "learning_rate": 9.438446601941748e-06, "loss": 0.0452, "step": 203970 }, { "epoch": 79.22, "learning_rate": 9.437928802588996e-06, "loss": 0.0564, "step": 203980 }, { "epoch": 79.22, "learning_rate": 9.437411003236246e-06, "loss": 0.0034, "step": 203990 }, { "epoch": 79.22, "learning_rate": 9.436893203883495e-06, "loss": 0.092, "step": 204000 }, { "epoch": 79.23, "learning_rate": 9.436375404530745e-06, "loss": 0.1499, "step": 204010 }, { "epoch": 79.23, "learning_rate": 9.435857605177993e-06, "loss": 0.0012, "step": 204020 }, { "epoch": 79.23, "learning_rate": 9.435339805825243e-06, "loss": 0.0254, "step": 204030 }, { "epoch": 79.24, "learning_rate": 9.434822006472492e-06, "loss": 0.1441, "step": 204040 }, { "epoch": 79.24, "learning_rate": 9.434304207119742e-06, "loss": 0.0348, "step": 204050 }, { "epoch": 79.25, "learning_rate": 9.433786407766991e-06, "loss": 0.0188, "step": 204060 }, { "epoch": 79.25, "learning_rate": 9.43326860841424e-06, "loss": 0.0662, "step": 204070 }, { "epoch": 79.25, "learning_rate": 9.432750809061489e-06, "loss": 0.1723, "step": 204080 }, { "epoch": 79.26, "learning_rate": 9.432233009708739e-06, "loss": 0.0952, "step": 204090 }, { "epoch": 79.26, "learning_rate": 9.431715210355988e-06, "loss": 0.0452, "step": 204100 }, { "epoch": 79.27, "learning_rate": 9.431197411003236e-06, "loss": 0.0775, "step": 204110 }, { "epoch": 79.27, "learning_rate": 9.430679611650486e-06, "loss": 0.0424, "step": 204120 }, { "epoch": 79.27, "learning_rate": 9.430161812297735e-06, "loss": 0.1063, "step": 204130 }, { "epoch": 79.28, "learning_rate": 9.429644012944985e-06, "loss": 0.0951, "step": 204140 }, { "epoch": 79.28, "learning_rate": 9.429126213592233e-06, "loss": 0.0609, "step": 204150 }, { "epoch": 79.29, "learning_rate": 9.428608414239483e-06, "loss": 0.0816, "step": 204160 }, { "epoch": 79.29, "learning_rate": 9.428090614886732e-06, "loss": 0.2035, "step": 204170 }, { "epoch": 79.29, "learning_rate": 9.427572815533982e-06, "loss": 0.0802, "step": 204180 }, { "epoch": 79.3, "learning_rate": 9.42705501618123e-06, "loss": 0.0143, "step": 204190 }, { "epoch": 79.3, "learning_rate": 9.42653721682848e-06, "loss": 0.0109, "step": 204200 }, { "epoch": 79.3, "learning_rate": 9.426019417475729e-06, "loss": 0.0668, "step": 204210 }, { "epoch": 79.31, "learning_rate": 9.425501618122979e-06, "loss": 0.0482, "step": 204220 }, { "epoch": 79.31, "learning_rate": 9.424983818770226e-06, "loss": 0.1096, "step": 204230 }, { "epoch": 79.32, "learning_rate": 9.424466019417476e-06, "loss": 0.002, "step": 204240 }, { "epoch": 79.32, "learning_rate": 9.423948220064726e-06, "loss": 0.0594, "step": 204250 }, { "epoch": 79.32, "learning_rate": 9.423430420711975e-06, "loss": 0.0215, "step": 204260 }, { "epoch": 79.33, "learning_rate": 9.422912621359223e-06, "loss": 0.1454, "step": 204270 }, { "epoch": 79.33, "learning_rate": 9.422394822006473e-06, "loss": 0.0388, "step": 204280 }, { "epoch": 79.34, "learning_rate": 9.421877022653722e-06, "loss": 0.1316, "step": 204290 }, { "epoch": 79.34, "learning_rate": 9.421359223300972e-06, "loss": 0.0149, "step": 204300 }, { "epoch": 79.34, "learning_rate": 9.42084142394822e-06, "loss": 0.0736, "step": 204310 }, { "epoch": 79.35, "learning_rate": 9.42032362459547e-06, "loss": 0.0941, "step": 204320 }, { "epoch": 79.35, "learning_rate": 9.41980582524272e-06, "loss": 0.0467, "step": 204330 }, { "epoch": 79.36, "learning_rate": 9.419288025889969e-06, "loss": 0.0352, "step": 204340 }, { "epoch": 79.36, "learning_rate": 9.418770226537217e-06, "loss": 0.0983, "step": 204350 }, { "epoch": 79.36, "learning_rate": 9.418252427184466e-06, "loss": 0.0121, "step": 204360 }, { "epoch": 79.37, "learning_rate": 9.417734627831716e-06, "loss": 0.0323, "step": 204370 }, { "epoch": 79.37, "learning_rate": 9.417216828478966e-06, "loss": 0.026, "step": 204380 }, { "epoch": 79.37, "learning_rate": 9.416699029126214e-06, "loss": 0.0925, "step": 204390 }, { "epoch": 79.38, "learning_rate": 9.416181229773463e-06, "loss": 0.0121, "step": 204400 }, { "epoch": 79.38, "learning_rate": 9.415663430420713e-06, "loss": 0.1486, "step": 204410 }, { "epoch": 79.39, "learning_rate": 9.415145631067962e-06, "loss": 0.0494, "step": 204420 }, { "epoch": 79.39, "learning_rate": 9.41462783171521e-06, "loss": 0.1412, "step": 204430 }, { "epoch": 79.39, "learning_rate": 9.41411003236246e-06, "loss": 0.1564, "step": 204440 }, { "epoch": 79.4, "learning_rate": 9.41359223300971e-06, "loss": 0.0455, "step": 204450 }, { "epoch": 79.4, "learning_rate": 9.41307443365696e-06, "loss": 0.1792, "step": 204460 }, { "epoch": 79.41, "learning_rate": 9.412556634304207e-06, "loss": 0.119, "step": 204470 }, { "epoch": 79.41, "learning_rate": 9.412038834951457e-06, "loss": 0.0333, "step": 204480 }, { "epoch": 79.41, "learning_rate": 9.411521035598706e-06, "loss": 0.0799, "step": 204490 }, { "epoch": 79.42, "learning_rate": 9.411003236245956e-06, "loss": 0.1046, "step": 204500 }, { "epoch": 79.42, "learning_rate": 9.410485436893204e-06, "loss": 0.0444, "step": 204510 }, { "epoch": 79.43, "learning_rate": 9.409967637540454e-06, "loss": 0.0618, "step": 204520 }, { "epoch": 79.43, "learning_rate": 9.409449838187703e-06, "loss": 0.092, "step": 204530 }, { "epoch": 79.43, "learning_rate": 9.408932038834953e-06, "loss": 0.0721, "step": 204540 }, { "epoch": 79.44, "learning_rate": 9.4084142394822e-06, "loss": 0.0582, "step": 204550 }, { "epoch": 79.44, "learning_rate": 9.40789644012945e-06, "loss": 0.0133, "step": 204560 }, { "epoch": 79.44, "learning_rate": 9.4073786407767e-06, "loss": 0.2454, "step": 204570 }, { "epoch": 79.45, "learning_rate": 9.40686084142395e-06, "loss": 0.1449, "step": 204580 }, { "epoch": 79.45, "learning_rate": 9.406343042071197e-06, "loss": 0.0703, "step": 204590 }, { "epoch": 79.46, "learning_rate": 9.405825242718447e-06, "loss": 0.0587, "step": 204600 }, { "epoch": 79.46, "learning_rate": 9.405307443365697e-06, "loss": 0.0351, "step": 204610 }, { "epoch": 79.46, "learning_rate": 9.404789644012946e-06, "loss": 0.0488, "step": 204620 }, { "epoch": 79.47, "learning_rate": 9.404271844660196e-06, "loss": 0.0469, "step": 204630 }, { "epoch": 79.47, "learning_rate": 9.403754045307444e-06, "loss": 0.0325, "step": 204640 }, { "epoch": 79.48, "learning_rate": 9.403236245954693e-06, "loss": 0.1569, "step": 204650 }, { "epoch": 79.48, "learning_rate": 9.402718446601943e-06, "loss": 0.1724, "step": 204660 }, { "epoch": 79.48, "learning_rate": 9.402200647249193e-06, "loss": 0.0611, "step": 204670 }, { "epoch": 79.49, "learning_rate": 9.40168284789644e-06, "loss": 0.0665, "step": 204680 }, { "epoch": 79.49, "learning_rate": 9.40116504854369e-06, "loss": 0.0588, "step": 204690 }, { "epoch": 79.5, "learning_rate": 9.40064724919094e-06, "loss": 0.0473, "step": 204700 }, { "epoch": 79.5, "learning_rate": 9.40012944983819e-06, "loss": 0.1229, "step": 204710 }, { "epoch": 79.5, "learning_rate": 9.399611650485437e-06, "loss": 0.0701, "step": 204720 }, { "epoch": 79.51, "learning_rate": 9.399093851132687e-06, "loss": 0.0505, "step": 204730 }, { "epoch": 79.51, "learning_rate": 9.398576051779937e-06, "loss": 0.0267, "step": 204740 }, { "epoch": 79.51, "learning_rate": 9.398058252427186e-06, "loss": 0.059, "step": 204750 }, { "epoch": 79.52, "learning_rate": 9.397540453074434e-06, "loss": 0.0815, "step": 204760 }, { "epoch": 79.52, "learning_rate": 9.397022653721684e-06, "loss": 0.0207, "step": 204770 }, { "epoch": 79.53, "learning_rate": 9.396504854368933e-06, "loss": 0.0948, "step": 204780 }, { "epoch": 79.53, "learning_rate": 9.395987055016183e-06, "loss": 0.0263, "step": 204790 }, { "epoch": 79.53, "learning_rate": 9.395469255663431e-06, "loss": 0.0982, "step": 204800 }, { "epoch": 79.54, "learning_rate": 9.39495145631068e-06, "loss": 0.0094, "step": 204810 }, { "epoch": 79.54, "learning_rate": 9.39443365695793e-06, "loss": 0.0636, "step": 204820 }, { "epoch": 79.55, "learning_rate": 9.39391585760518e-06, "loss": 0.0495, "step": 204830 }, { "epoch": 79.55, "learning_rate": 9.393398058252428e-06, "loss": 0.0671, "step": 204840 }, { "epoch": 79.55, "learning_rate": 9.392880258899677e-06, "loss": 0.1453, "step": 204850 }, { "epoch": 79.56, "learning_rate": 9.392362459546927e-06, "loss": 0.0352, "step": 204860 }, { "epoch": 79.56, "learning_rate": 9.391844660194177e-06, "loss": 0.099, "step": 204870 }, { "epoch": 79.57, "learning_rate": 9.391326860841425e-06, "loss": 0.0908, "step": 204880 }, { "epoch": 79.57, "learning_rate": 9.390809061488674e-06, "loss": 0.017, "step": 204890 }, { "epoch": 79.57, "learning_rate": 9.390291262135924e-06, "loss": 0.05, "step": 204900 }, { "epoch": 79.58, "learning_rate": 9.389773462783173e-06, "loss": 0.0569, "step": 204910 }, { "epoch": 79.58, "learning_rate": 9.389255663430421e-06, "loss": 0.0199, "step": 204920 }, { "epoch": 79.58, "learning_rate": 9.388737864077671e-06, "loss": 0.1493, "step": 204930 }, { "epoch": 79.59, "learning_rate": 9.38822006472492e-06, "loss": 0.1191, "step": 204940 }, { "epoch": 79.59, "learning_rate": 9.38770226537217e-06, "loss": 0.0598, "step": 204950 }, { "epoch": 79.6, "learning_rate": 9.387184466019418e-06, "loss": 0.1, "step": 204960 }, { "epoch": 79.6, "learning_rate": 9.386666666666668e-06, "loss": 0.0244, "step": 204970 }, { "epoch": 79.6, "learning_rate": 9.386148867313917e-06, "loss": 0.0878, "step": 204980 }, { "epoch": 79.61, "learning_rate": 9.385631067961167e-06, "loss": 0.04, "step": 204990 }, { "epoch": 79.61, "learning_rate": 9.385113268608415e-06, "loss": 0.0689, "step": 205000 }, { "epoch": 79.62, "learning_rate": 9.384595469255664e-06, "loss": 0.0059, "step": 205010 }, { "epoch": 79.62, "learning_rate": 9.384077669902914e-06, "loss": 0.1311, "step": 205020 }, { "epoch": 79.62, "learning_rate": 9.383559870550164e-06, "loss": 0.0787, "step": 205030 }, { "epoch": 79.63, "learning_rate": 9.383042071197412e-06, "loss": 0.1991, "step": 205040 }, { "epoch": 79.63, "learning_rate": 9.382524271844661e-06, "loss": 0.0569, "step": 205050 }, { "epoch": 79.63, "learning_rate": 9.382006472491911e-06, "loss": 0.0109, "step": 205060 }, { "epoch": 79.64, "learning_rate": 9.38148867313916e-06, "loss": 0.0835, "step": 205070 }, { "epoch": 79.64, "learning_rate": 9.380970873786408e-06, "loss": 0.013, "step": 205080 }, { "epoch": 79.65, "learning_rate": 9.380453074433658e-06, "loss": 0.0883, "step": 205090 }, { "epoch": 79.65, "learning_rate": 9.379935275080908e-06, "loss": 0.0013, "step": 205100 }, { "epoch": 79.65, "learning_rate": 9.379417475728156e-06, "loss": 0.1321, "step": 205110 }, { "epoch": 79.66, "learning_rate": 9.378899676375405e-06, "loss": 0.0129, "step": 205120 }, { "epoch": 79.66, "learning_rate": 9.378381877022655e-06, "loss": 0.0003, "step": 205130 }, { "epoch": 79.67, "learning_rate": 9.377864077669904e-06, "loss": 0.033, "step": 205140 }, { "epoch": 79.67, "learning_rate": 9.377346278317152e-06, "loss": 0.084, "step": 205150 }, { "epoch": 79.67, "learning_rate": 9.376828478964402e-06, "loss": 0.0898, "step": 205160 }, { "epoch": 79.68, "learning_rate": 9.376310679611652e-06, "loss": 0.0446, "step": 205170 }, { "epoch": 79.68, "learning_rate": 9.375792880258901e-06, "loss": 0.0671, "step": 205180 }, { "epoch": 79.69, "learning_rate": 9.375275080906149e-06, "loss": 0.0332, "step": 205190 }, { "epoch": 79.69, "learning_rate": 9.374757281553399e-06, "loss": 0.074, "step": 205200 }, { "epoch": 79.69, "learning_rate": 9.374239482200648e-06, "loss": 0.0534, "step": 205210 }, { "epoch": 79.7, "learning_rate": 9.373721682847898e-06, "loss": 0.1099, "step": 205220 }, { "epoch": 79.7, "learning_rate": 9.373203883495146e-06, "loss": 0.1084, "step": 205230 }, { "epoch": 79.7, "learning_rate": 9.372686084142396e-06, "loss": 0.0561, "step": 205240 }, { "epoch": 79.71, "learning_rate": 9.372168284789645e-06, "loss": 0.0249, "step": 205250 }, { "epoch": 79.71, "learning_rate": 9.371650485436895e-06, "loss": 0.0369, "step": 205260 }, { "epoch": 79.72, "learning_rate": 9.371132686084143e-06, "loss": 0.0109, "step": 205270 }, { "epoch": 79.72, "learning_rate": 9.370614886731392e-06, "loss": 0.0589, "step": 205280 }, { "epoch": 79.72, "learning_rate": 9.370097087378642e-06, "loss": 0.1434, "step": 205290 }, { "epoch": 79.73, "learning_rate": 9.36957928802589e-06, "loss": 0.0061, "step": 205300 }, { "epoch": 79.73, "learning_rate": 9.36906148867314e-06, "loss": 0.0846, "step": 205310 }, { "epoch": 79.74, "learning_rate": 9.368543689320389e-06, "loss": 0.0393, "step": 205320 }, { "epoch": 79.74, "learning_rate": 9.368025889967639e-06, "loss": 0.0396, "step": 205330 }, { "epoch": 79.74, "learning_rate": 9.367508090614887e-06, "loss": 0.0806, "step": 205340 }, { "epoch": 79.75, "learning_rate": 9.366990291262136e-06, "loss": 0.0018, "step": 205350 }, { "epoch": 79.75, "learning_rate": 9.366472491909386e-06, "loss": 0.0677, "step": 205360 }, { "epoch": 79.76, "learning_rate": 9.365954692556635e-06, "loss": 0.0392, "step": 205370 }, { "epoch": 79.76, "learning_rate": 9.365436893203883e-06, "loss": 0.0529, "step": 205380 }, { "epoch": 79.76, "learning_rate": 9.364919093851133e-06, "loss": 0.0232, "step": 205390 }, { "epoch": 79.77, "learning_rate": 9.364401294498383e-06, "loss": 0.0162, "step": 205400 }, { "epoch": 79.77, "learning_rate": 9.363883495145632e-06, "loss": 0.0366, "step": 205410 }, { "epoch": 79.77, "learning_rate": 9.36336569579288e-06, "loss": 0.044, "step": 205420 }, { "epoch": 79.78, "learning_rate": 9.36284789644013e-06, "loss": 0.0997, "step": 205430 }, { "epoch": 79.78, "learning_rate": 9.36233009708738e-06, "loss": 0.0627, "step": 205440 }, { "epoch": 79.79, "learning_rate": 9.361812297734627e-06, "loss": 0.0558, "step": 205450 }, { "epoch": 79.79, "learning_rate": 9.361294498381877e-06, "loss": 0.0453, "step": 205460 }, { "epoch": 79.79, "learning_rate": 9.360776699029127e-06, "loss": 0.0619, "step": 205470 }, { "epoch": 79.8, "learning_rate": 9.360258899676376e-06, "loss": 0.0439, "step": 205480 }, { "epoch": 79.8, "learning_rate": 9.359741100323624e-06, "loss": 0.133, "step": 205490 }, { "epoch": 79.81, "learning_rate": 9.359223300970874e-06, "loss": 0.0712, "step": 205500 }, { "epoch": 79.81, "learning_rate": 9.358705501618123e-06, "loss": 0.1207, "step": 205510 }, { "epoch": 79.81, "learning_rate": 9.358187702265373e-06, "loss": 0.0508, "step": 205520 }, { "epoch": 79.82, "learning_rate": 9.357669902912621e-06, "loss": 0.1441, "step": 205530 }, { "epoch": 79.82, "learning_rate": 9.35715210355987e-06, "loss": 0.0574, "step": 205540 }, { "epoch": 79.83, "learning_rate": 9.35663430420712e-06, "loss": 0.0427, "step": 205550 }, { "epoch": 79.83, "learning_rate": 9.35611650485437e-06, "loss": 0.0061, "step": 205560 }, { "epoch": 79.83, "learning_rate": 9.355598705501618e-06, "loss": 0.0251, "step": 205570 }, { "epoch": 79.84, "learning_rate": 9.355080906148867e-06, "loss": 0.1727, "step": 205580 }, { "epoch": 79.84, "learning_rate": 9.354563106796117e-06, "loss": 0.0073, "step": 205590 }, { "epoch": 79.84, "learning_rate": 9.354045307443367e-06, "loss": 0.0162, "step": 205600 }, { "epoch": 79.85, "learning_rate": 9.353527508090614e-06, "loss": 0.0669, "step": 205610 }, { "epoch": 79.85, "learning_rate": 9.353009708737864e-06, "loss": 0.0325, "step": 205620 }, { "epoch": 79.86, "learning_rate": 9.352491909385114e-06, "loss": 0.0622, "step": 205630 }, { "epoch": 79.86, "learning_rate": 9.351974110032363e-06, "loss": 0.0537, "step": 205640 }, { "epoch": 79.86, "learning_rate": 9.351456310679611e-06, "loss": 0.021, "step": 205650 }, { "epoch": 79.87, "learning_rate": 9.350938511326861e-06, "loss": 0.0978, "step": 205660 }, { "epoch": 79.87, "learning_rate": 9.35042071197411e-06, "loss": 0.0339, "step": 205670 }, { "epoch": 79.88, "learning_rate": 9.34990291262136e-06, "loss": 0.0694, "step": 205680 }, { "epoch": 79.88, "learning_rate": 9.349385113268608e-06, "loss": 0.4598, "step": 205690 }, { "epoch": 79.88, "learning_rate": 9.348867313915858e-06, "loss": 0.0146, "step": 205700 }, { "epoch": 79.89, "learning_rate": 9.348349514563107e-06, "loss": 0.0211, "step": 205710 }, { "epoch": 79.89, "learning_rate": 9.347831715210357e-06, "loss": 0.0019, "step": 205720 }, { "epoch": 79.9, "learning_rate": 9.347313915857605e-06, "loss": 0.0669, "step": 205730 }, { "epoch": 79.9, "learning_rate": 9.346796116504854e-06, "loss": 0.0844, "step": 205740 }, { "epoch": 79.9, "learning_rate": 9.346278317152104e-06, "loss": 0.0593, "step": 205750 }, { "epoch": 79.91, "learning_rate": 9.345760517799354e-06, "loss": 0.0837, "step": 205760 }, { "epoch": 79.91, "learning_rate": 9.345242718446603e-06, "loss": 0.028, "step": 205770 }, { "epoch": 79.91, "learning_rate": 9.344724919093851e-06, "loss": 0.0122, "step": 205780 }, { "epoch": 79.92, "learning_rate": 9.3442071197411e-06, "loss": 0.0547, "step": 205790 }, { "epoch": 79.92, "learning_rate": 9.34368932038835e-06, "loss": 0.2912, "step": 205800 }, { "epoch": 79.93, "learning_rate": 9.3431715210356e-06, "loss": 0.0863, "step": 205810 }, { "epoch": 79.93, "learning_rate": 9.342653721682848e-06, "loss": 0.0759, "step": 205820 }, { "epoch": 79.93, "learning_rate": 9.342135922330098e-06, "loss": 0.1011, "step": 205830 }, { "epoch": 79.94, "learning_rate": 9.341618122977347e-06, "loss": 0.0452, "step": 205840 }, { "epoch": 79.94, "learning_rate": 9.341100323624597e-06, "loss": 0.034, "step": 205850 }, { "epoch": 79.95, "learning_rate": 9.340582524271845e-06, "loss": 0.1132, "step": 205860 }, { "epoch": 79.95, "learning_rate": 9.340064724919094e-06, "loss": 0.0465, "step": 205870 }, { "epoch": 79.95, "learning_rate": 9.339546925566344e-06, "loss": 0.003, "step": 205880 }, { "epoch": 79.96, "learning_rate": 9.339029126213594e-06, "loss": 0.1065, "step": 205890 }, { "epoch": 79.96, "learning_rate": 9.338511326860842e-06, "loss": 0.1086, "step": 205900 }, { "epoch": 79.97, "learning_rate": 9.337993527508091e-06, "loss": 0.1116, "step": 205910 }, { "epoch": 79.97, "learning_rate": 9.33747572815534e-06, "loss": 0.0332, "step": 205920 }, { "epoch": 79.97, "learning_rate": 9.33695792880259e-06, "loss": 0.0698, "step": 205930 }, { "epoch": 79.98, "learning_rate": 9.336440129449838e-06, "loss": 0.0369, "step": 205940 }, { "epoch": 79.98, "learning_rate": 9.335922330097088e-06, "loss": 0.0163, "step": 205950 }, { "epoch": 79.98, "learning_rate": 9.335404530744338e-06, "loss": 0.0516, "step": 205960 }, { "epoch": 79.99, "learning_rate": 9.334886731391587e-06, "loss": 0.0105, "step": 205970 }, { "epoch": 79.99, "learning_rate": 9.334368932038835e-06, "loss": 0.0852, "step": 205980 }, { "epoch": 80.0, "learning_rate": 9.333851132686085e-06, "loss": 0.1047, "step": 205990 }, { "epoch": 80.0, "learning_rate": 9.333333333333334e-06, "loss": 0.0834, "step": 206000 }, { "epoch": 80.0, "eval_accuracy": 0.9524071526822558, "eval_loss": 0.3430519104003906, "eval_runtime": 8.1895, "eval_samples_per_second": 443.859, "eval_steps_per_second": 55.559, "step": 206000 }, { "epoch": 80.0, "learning_rate": 9.332815533980584e-06, "loss": 0.0701, "step": 206010 }, { "epoch": 80.01, "learning_rate": 9.332297734627832e-06, "loss": 0.0704, "step": 206020 }, { "epoch": 80.01, "learning_rate": 9.331779935275081e-06, "loss": 0.0757, "step": 206030 }, { "epoch": 80.02, "learning_rate": 9.331262135922331e-06, "loss": 0.034, "step": 206040 }, { "epoch": 80.02, "learning_rate": 9.33074433656958e-06, "loss": 0.2888, "step": 206050 }, { "epoch": 80.02, "learning_rate": 9.330226537216829e-06, "loss": 0.0143, "step": 206060 }, { "epoch": 80.03, "learning_rate": 9.329708737864078e-06, "loss": 0.0434, "step": 206070 }, { "epoch": 80.03, "learning_rate": 9.329190938511328e-06, "loss": 0.0008, "step": 206080 }, { "epoch": 80.03, "learning_rate": 9.328673139158577e-06, "loss": 0.0093, "step": 206090 }, { "epoch": 80.04, "learning_rate": 9.328155339805825e-06, "loss": 0.0194, "step": 206100 }, { "epoch": 80.04, "learning_rate": 9.327637540453075e-06, "loss": 0.0629, "step": 206110 }, { "epoch": 80.05, "learning_rate": 9.327119741100325e-06, "loss": 0.007, "step": 206120 }, { "epoch": 80.05, "learning_rate": 9.326601941747574e-06, "loss": 0.0005, "step": 206130 }, { "epoch": 80.05, "learning_rate": 9.326084142394822e-06, "loss": 0.0686, "step": 206140 }, { "epoch": 80.06, "learning_rate": 9.325566343042072e-06, "loss": 0.0946, "step": 206150 }, { "epoch": 80.06, "learning_rate": 9.325048543689321e-06, "loss": 0.0264, "step": 206160 }, { "epoch": 80.07, "learning_rate": 9.324530744336571e-06, "loss": 0.1151, "step": 206170 }, { "epoch": 80.07, "learning_rate": 9.324012944983819e-06, "loss": 0.0529, "step": 206180 }, { "epoch": 80.07, "learning_rate": 9.323495145631069e-06, "loss": 0.0466, "step": 206190 }, { "epoch": 80.08, "learning_rate": 9.322977346278318e-06, "loss": 0.0387, "step": 206200 }, { "epoch": 80.08, "learning_rate": 9.322459546925568e-06, "loss": 0.0217, "step": 206210 }, { "epoch": 80.09, "learning_rate": 9.321941747572816e-06, "loss": 0.0031, "step": 206220 }, { "epoch": 80.09, "learning_rate": 9.321423948220065e-06, "loss": 0.092, "step": 206230 }, { "epoch": 80.09, "learning_rate": 9.320906148867315e-06, "loss": 0.0472, "step": 206240 }, { "epoch": 80.1, "learning_rate": 9.320388349514565e-06, "loss": 0.0616, "step": 206250 }, { "epoch": 80.1, "learning_rate": 9.319870550161813e-06, "loss": 0.0198, "step": 206260 }, { "epoch": 80.1, "learning_rate": 9.319352750809062e-06, "loss": 0.0416, "step": 206270 }, { "epoch": 80.11, "learning_rate": 9.318834951456312e-06, "loss": 0.0336, "step": 206280 }, { "epoch": 80.11, "learning_rate": 9.318317152103561e-06, "loss": 0.0587, "step": 206290 }, { "epoch": 80.12, "learning_rate": 9.317799352750811e-06, "loss": 0.0071, "step": 206300 }, { "epoch": 80.12, "learning_rate": 9.317281553398059e-06, "loss": 0.0108, "step": 206310 }, { "epoch": 80.12, "learning_rate": 9.316763754045309e-06, "loss": 0.0472, "step": 206320 }, { "epoch": 80.13, "learning_rate": 9.316245954692558e-06, "loss": 0.1397, "step": 206330 }, { "epoch": 80.13, "learning_rate": 9.315728155339808e-06, "loss": 0.036, "step": 206340 }, { "epoch": 80.14, "learning_rate": 9.315210355987056e-06, "loss": 0.0259, "step": 206350 }, { "epoch": 80.14, "learning_rate": 9.314692556634305e-06, "loss": 0.0474, "step": 206360 }, { "epoch": 80.14, "learning_rate": 9.314174757281555e-06, "loss": 0.0389, "step": 206370 }, { "epoch": 80.15, "learning_rate": 9.313656957928805e-06, "loss": 0.0001, "step": 206380 }, { "epoch": 80.15, "learning_rate": 9.313139158576052e-06, "loss": 0.0247, "step": 206390 }, { "epoch": 80.16, "learning_rate": 9.312621359223302e-06, "loss": 0.0372, "step": 206400 }, { "epoch": 80.16, "learning_rate": 9.312103559870552e-06, "loss": 0.0001, "step": 206410 }, { "epoch": 80.16, "learning_rate": 9.311585760517801e-06, "loss": 0.0214, "step": 206420 }, { "epoch": 80.17, "learning_rate": 9.31106796116505e-06, "loss": 0.0004, "step": 206430 }, { "epoch": 80.17, "learning_rate": 9.310550161812299e-06, "loss": 0.0835, "step": 206440 }, { "epoch": 80.17, "learning_rate": 9.310032362459548e-06, "loss": 0.0956, "step": 206450 }, { "epoch": 80.18, "learning_rate": 9.309514563106798e-06, "loss": 0.0154, "step": 206460 }, { "epoch": 80.18, "learning_rate": 9.308996763754046e-06, "loss": 0.0262, "step": 206470 }, { "epoch": 80.19, "learning_rate": 9.308478964401296e-06, "loss": 0.0639, "step": 206480 }, { "epoch": 80.19, "learning_rate": 9.307961165048545e-06, "loss": 0.0122, "step": 206490 }, { "epoch": 80.19, "learning_rate": 9.307443365695795e-06, "loss": 0.0215, "step": 206500 }, { "epoch": 80.2, "learning_rate": 9.306925566343043e-06, "loss": 0.0119, "step": 206510 }, { "epoch": 80.2, "learning_rate": 9.306407766990292e-06, "loss": 0.0823, "step": 206520 }, { "epoch": 80.21, "learning_rate": 9.305889967637542e-06, "loss": 0.0192, "step": 206530 }, { "epoch": 80.21, "learning_rate": 9.305372168284792e-06, "loss": 0.0344, "step": 206540 }, { "epoch": 80.21, "learning_rate": 9.30485436893204e-06, "loss": 0.0009, "step": 206550 }, { "epoch": 80.22, "learning_rate": 9.30433656957929e-06, "loss": 0.0768, "step": 206560 }, { "epoch": 80.22, "learning_rate": 9.303818770226539e-06, "loss": 0.0163, "step": 206570 }, { "epoch": 80.23, "learning_rate": 9.303300970873787e-06, "loss": 0.0747, "step": 206580 }, { "epoch": 80.23, "learning_rate": 9.302783171521036e-06, "loss": 0.0438, "step": 206590 }, { "epoch": 80.23, "learning_rate": 9.302265372168286e-06, "loss": 0.0003, "step": 206600 }, { "epoch": 80.24, "learning_rate": 9.301747572815536e-06, "loss": 0.0741, "step": 206610 }, { "epoch": 80.24, "learning_rate": 9.301229773462784e-06, "loss": 0.0271, "step": 206620 }, { "epoch": 80.24, "learning_rate": 9.300711974110033e-06, "loss": 0.0811, "step": 206630 }, { "epoch": 80.25, "learning_rate": 9.300194174757283e-06, "loss": 0.0252, "step": 206640 }, { "epoch": 80.25, "learning_rate": 9.299676375404532e-06, "loss": 0.0125, "step": 206650 }, { "epoch": 80.26, "learning_rate": 9.29915857605178e-06, "loss": 0.067, "step": 206660 }, { "epoch": 80.26, "learning_rate": 9.29864077669903e-06, "loss": 0.0631, "step": 206670 }, { "epoch": 80.26, "learning_rate": 9.29812297734628e-06, "loss": 0.0195, "step": 206680 }, { "epoch": 80.27, "learning_rate": 9.29760517799353e-06, "loss": 0.0368, "step": 206690 }, { "epoch": 80.27, "learning_rate": 9.297087378640777e-06, "loss": 0.0005, "step": 206700 }, { "epoch": 80.28, "learning_rate": 9.296569579288027e-06, "loss": 0.071, "step": 206710 }, { "epoch": 80.28, "learning_rate": 9.296051779935276e-06, "loss": 0.1663, "step": 206720 }, { "epoch": 80.28, "learning_rate": 9.295533980582524e-06, "loss": 0.105, "step": 206730 }, { "epoch": 80.29, "learning_rate": 9.295016181229774e-06, "loss": 0.0188, "step": 206740 }, { "epoch": 80.29, "learning_rate": 9.294498381877023e-06, "loss": 0.0592, "step": 206750 }, { "epoch": 80.3, "learning_rate": 9.293980582524273e-06, "loss": 0.0124, "step": 206760 }, { "epoch": 80.3, "learning_rate": 9.293462783171521e-06, "loss": 0.0478, "step": 206770 }, { "epoch": 80.3, "learning_rate": 9.29294498381877e-06, "loss": 0.133, "step": 206780 }, { "epoch": 80.31, "learning_rate": 9.29242718446602e-06, "loss": 0.0165, "step": 206790 }, { "epoch": 80.31, "learning_rate": 9.29190938511327e-06, "loss": 0.0259, "step": 206800 }, { "epoch": 80.31, "learning_rate": 9.291391585760518e-06, "loss": 0.0191, "step": 206810 }, { "epoch": 80.32, "learning_rate": 9.290873786407767e-06, "loss": 0.0486, "step": 206820 }, { "epoch": 80.32, "learning_rate": 9.290355987055017e-06, "loss": 0.0168, "step": 206830 }, { "epoch": 80.33, "learning_rate": 9.289838187702267e-06, "loss": 0.0132, "step": 206840 }, { "epoch": 80.33, "learning_rate": 9.289320388349515e-06, "loss": 0.0143, "step": 206850 }, { "epoch": 80.33, "learning_rate": 9.288802588996764e-06, "loss": 0.0639, "step": 206860 }, { "epoch": 80.34, "learning_rate": 9.288284789644014e-06, "loss": 0.1073, "step": 206870 }, { "epoch": 80.34, "learning_rate": 9.287766990291263e-06, "loss": 0.0424, "step": 206880 }, { "epoch": 80.35, "learning_rate": 9.287249190938511e-06, "loss": 0.0181, "step": 206890 }, { "epoch": 80.35, "learning_rate": 9.286731391585761e-06, "loss": 0.1596, "step": 206900 }, { "epoch": 80.35, "learning_rate": 9.28621359223301e-06, "loss": 0.0136, "step": 206910 }, { "epoch": 80.36, "learning_rate": 9.285695792880259e-06, "loss": 0.1907, "step": 206920 }, { "epoch": 80.36, "learning_rate": 9.285177993527508e-06, "loss": 0.1622, "step": 206930 }, { "epoch": 80.37, "learning_rate": 9.284660194174758e-06, "loss": 0.0721, "step": 206940 }, { "epoch": 80.37, "learning_rate": 9.284142394822007e-06, "loss": 0.04, "step": 206950 }, { "epoch": 80.37, "learning_rate": 9.283624595469255e-06, "loss": 0.0215, "step": 206960 }, { "epoch": 80.38, "learning_rate": 9.283106796116505e-06, "loss": 0.0804, "step": 206970 }, { "epoch": 80.38, "learning_rate": 9.282588996763755e-06, "loss": 0.149, "step": 206980 }, { "epoch": 80.38, "learning_rate": 9.282071197411004e-06, "loss": 0.2217, "step": 206990 }, { "epoch": 80.39, "learning_rate": 9.281553398058252e-06, "loss": 0.03, "step": 207000 }, { "epoch": 80.39, "learning_rate": 9.281035598705502e-06, "loss": 0.1564, "step": 207010 }, { "epoch": 80.4, "learning_rate": 9.280517799352751e-06, "loss": 0.0085, "step": 207020 }, { "epoch": 80.4, "learning_rate": 9.280000000000001e-06, "loss": 0.137, "step": 207030 }, { "epoch": 80.4, "learning_rate": 9.279482200647249e-06, "loss": 0.0545, "step": 207040 }, { "epoch": 80.41, "learning_rate": 9.278964401294498e-06, "loss": 0.0845, "step": 207050 }, { "epoch": 80.41, "learning_rate": 9.278446601941748e-06, "loss": 0.124, "step": 207060 }, { "epoch": 80.42, "learning_rate": 9.277928802588998e-06, "loss": 0.0863, "step": 207070 }, { "epoch": 80.42, "learning_rate": 9.277411003236246e-06, "loss": 0.0183, "step": 207080 }, { "epoch": 80.42, "learning_rate": 9.276893203883495e-06, "loss": 0.1179, "step": 207090 }, { "epoch": 80.43, "learning_rate": 9.276375404530745e-06, "loss": 0.0426, "step": 207100 }, { "epoch": 80.43, "learning_rate": 9.275857605177994e-06, "loss": 0.0661, "step": 207110 }, { "epoch": 80.43, "learning_rate": 9.275339805825242e-06, "loss": 0.0107, "step": 207120 }, { "epoch": 80.44, "learning_rate": 9.274822006472492e-06, "loss": 0.1471, "step": 207130 }, { "epoch": 80.44, "learning_rate": 9.274304207119742e-06, "loss": 0.0474, "step": 207140 }, { "epoch": 80.45, "learning_rate": 9.273786407766991e-06, "loss": 0.0199, "step": 207150 }, { "epoch": 80.45, "learning_rate": 9.27326860841424e-06, "loss": 0.0492, "step": 207160 }, { "epoch": 80.45, "learning_rate": 9.272750809061489e-06, "loss": 0.017, "step": 207170 }, { "epoch": 80.46, "learning_rate": 9.272233009708738e-06, "loss": 0.0448, "step": 207180 }, { "epoch": 80.46, "learning_rate": 9.271715210355988e-06, "loss": 0.0566, "step": 207190 }, { "epoch": 80.47, "learning_rate": 9.271197411003236e-06, "loss": 0.0049, "step": 207200 }, { "epoch": 80.47, "learning_rate": 9.270679611650486e-06, "loss": 0.0205, "step": 207210 }, { "epoch": 80.47, "learning_rate": 9.270161812297735e-06, "loss": 0.0346, "step": 207220 }, { "epoch": 80.48, "learning_rate": 9.269644012944985e-06, "loss": 0.0728, "step": 207230 }, { "epoch": 80.48, "learning_rate": 9.269126213592233e-06, "loss": 0.1606, "step": 207240 }, { "epoch": 80.49, "learning_rate": 9.268608414239482e-06, "loss": 0.0498, "step": 207250 }, { "epoch": 80.49, "learning_rate": 9.268090614886732e-06, "loss": 0.2175, "step": 207260 }, { "epoch": 80.49, "learning_rate": 9.267572815533982e-06, "loss": 0.0157, "step": 207270 }, { "epoch": 80.5, "learning_rate": 9.26705501618123e-06, "loss": 0.0827, "step": 207280 }, { "epoch": 80.5, "learning_rate": 9.266537216828479e-06, "loss": 0.0209, "step": 207290 }, { "epoch": 80.5, "learning_rate": 9.266019417475729e-06, "loss": 0.0094, "step": 207300 }, { "epoch": 80.51, "learning_rate": 9.265501618122978e-06, "loss": 0.0663, "step": 207310 }, { "epoch": 80.51, "learning_rate": 9.264983818770226e-06, "loss": 0.0755, "step": 207320 }, { "epoch": 80.52, "learning_rate": 9.264466019417476e-06, "loss": 0.0646, "step": 207330 }, { "epoch": 80.52, "learning_rate": 9.263948220064726e-06, "loss": 0.0281, "step": 207340 }, { "epoch": 80.52, "learning_rate": 9.263430420711975e-06, "loss": 0.0113, "step": 207350 }, { "epoch": 80.53, "learning_rate": 9.262912621359223e-06, "loss": 0.0719, "step": 207360 }, { "epoch": 80.53, "learning_rate": 9.262394822006473e-06, "loss": 0.0665, "step": 207370 }, { "epoch": 80.54, "learning_rate": 9.261877022653722e-06, "loss": 0.06, "step": 207380 }, { "epoch": 80.54, "learning_rate": 9.261359223300972e-06, "loss": 0.0889, "step": 207390 }, { "epoch": 80.54, "learning_rate": 9.26084142394822e-06, "loss": 0.0839, "step": 207400 }, { "epoch": 80.55, "learning_rate": 9.26032362459547e-06, "loss": 0.1781, "step": 207410 }, { "epoch": 80.55, "learning_rate": 9.259805825242719e-06, "loss": 0.1102, "step": 207420 }, { "epoch": 80.56, "learning_rate": 9.259288025889969e-06, "loss": 0.0007, "step": 207430 }, { "epoch": 80.56, "learning_rate": 9.258770226537218e-06, "loss": 0.0094, "step": 207440 }, { "epoch": 80.56, "learning_rate": 9.258252427184466e-06, "loss": 0.0667, "step": 207450 }, { "epoch": 80.57, "learning_rate": 9.257734627831716e-06, "loss": 0.0471, "step": 207460 }, { "epoch": 80.57, "learning_rate": 9.257216828478965e-06, "loss": 0.0168, "step": 207470 }, { "epoch": 80.57, "learning_rate": 9.256699029126215e-06, "loss": 0.0364, "step": 207480 }, { "epoch": 80.58, "learning_rate": 9.256181229773463e-06, "loss": 0.0962, "step": 207490 }, { "epoch": 80.58, "learning_rate": 9.255663430420713e-06, "loss": 0.0302, "step": 207500 }, { "epoch": 80.59, "learning_rate": 9.255145631067962e-06, "loss": 0.096, "step": 207510 }, { "epoch": 80.59, "learning_rate": 9.254627831715212e-06, "loss": 0.153, "step": 207520 }, { "epoch": 80.59, "learning_rate": 9.25411003236246e-06, "loss": 0.0833, "step": 207530 }, { "epoch": 80.6, "learning_rate": 9.25359223300971e-06, "loss": 0.0342, "step": 207540 }, { "epoch": 80.6, "learning_rate": 9.253074433656959e-06, "loss": 0.0393, "step": 207550 }, { "epoch": 80.61, "learning_rate": 9.252556634304209e-06, "loss": 0.0266, "step": 207560 }, { "epoch": 80.61, "learning_rate": 9.252038834951457e-06, "loss": 0.0884, "step": 207570 }, { "epoch": 80.61, "learning_rate": 9.251521035598706e-06, "loss": 0.1696, "step": 207580 }, { "epoch": 80.62, "learning_rate": 9.251003236245956e-06, "loss": 0.0737, "step": 207590 }, { "epoch": 80.62, "learning_rate": 9.250485436893205e-06, "loss": 0.0293, "step": 207600 }, { "epoch": 80.63, "learning_rate": 9.249967637540453e-06, "loss": 0.0542, "step": 207610 }, { "epoch": 80.63, "learning_rate": 9.249449838187703e-06, "loss": 0.1616, "step": 207620 }, { "epoch": 80.63, "learning_rate": 9.248932038834953e-06, "loss": 0.1097, "step": 207630 }, { "epoch": 80.64, "learning_rate": 9.248414239482202e-06, "loss": 0.1761, "step": 207640 }, { "epoch": 80.64, "learning_rate": 9.24789644012945e-06, "loss": 0.0247, "step": 207650 }, { "epoch": 80.64, "learning_rate": 9.2473786407767e-06, "loss": 0.0522, "step": 207660 }, { "epoch": 80.65, "learning_rate": 9.24686084142395e-06, "loss": 0.0081, "step": 207670 }, { "epoch": 80.65, "learning_rate": 9.246343042071199e-06, "loss": 0.082, "step": 207680 }, { "epoch": 80.66, "learning_rate": 9.245825242718447e-06, "loss": 0.1285, "step": 207690 }, { "epoch": 80.66, "learning_rate": 9.245307443365697e-06, "loss": 0.0021, "step": 207700 }, { "epoch": 80.66, "learning_rate": 9.244789644012946e-06, "loss": 0.0772, "step": 207710 }, { "epoch": 80.67, "learning_rate": 9.244271844660196e-06, "loss": 0.0161, "step": 207720 }, { "epoch": 80.67, "learning_rate": 9.243754045307444e-06, "loss": 0.0839, "step": 207730 }, { "epoch": 80.68, "learning_rate": 9.243236245954693e-06, "loss": 0.0367, "step": 207740 }, { "epoch": 80.68, "learning_rate": 9.242718446601943e-06, "loss": 0.0133, "step": 207750 }, { "epoch": 80.68, "learning_rate": 9.242200647249193e-06, "loss": 0.073, "step": 207760 }, { "epoch": 80.69, "learning_rate": 9.24168284789644e-06, "loss": 0.1004, "step": 207770 }, { "epoch": 80.69, "learning_rate": 9.24116504854369e-06, "loss": 0.02, "step": 207780 }, { "epoch": 80.7, "learning_rate": 9.24064724919094e-06, "loss": 0.0353, "step": 207790 }, { "epoch": 80.7, "learning_rate": 9.24012944983819e-06, "loss": 0.0485, "step": 207800 }, { "epoch": 80.7, "learning_rate": 9.239611650485437e-06, "loss": 0.1412, "step": 207810 }, { "epoch": 80.71, "learning_rate": 9.239093851132687e-06, "loss": 0.1189, "step": 207820 }, { "epoch": 80.71, "learning_rate": 9.238576051779936e-06, "loss": 0.1168, "step": 207830 }, { "epoch": 80.71, "learning_rate": 9.238058252427186e-06, "loss": 0.0429, "step": 207840 }, { "epoch": 80.72, "learning_rate": 9.237540453074434e-06, "loss": 0.0115, "step": 207850 }, { "epoch": 80.72, "learning_rate": 9.237022653721684e-06, "loss": 0.015, "step": 207860 }, { "epoch": 80.73, "learning_rate": 9.236504854368933e-06, "loss": 0.0372, "step": 207870 }, { "epoch": 80.73, "learning_rate": 9.235987055016183e-06, "loss": 0.0676, "step": 207880 }, { "epoch": 80.73, "learning_rate": 9.23546925566343e-06, "loss": 0.0002, "step": 207890 }, { "epoch": 80.74, "learning_rate": 9.23495145631068e-06, "loss": 0.0874, "step": 207900 }, { "epoch": 80.74, "learning_rate": 9.23443365695793e-06, "loss": 0.1168, "step": 207910 }, { "epoch": 80.75, "learning_rate": 9.23391585760518e-06, "loss": 0.0601, "step": 207920 }, { "epoch": 80.75, "learning_rate": 9.233398058252428e-06, "loss": 0.021, "step": 207930 }, { "epoch": 80.75, "learning_rate": 9.232880258899677e-06, "loss": 0.0731, "step": 207940 }, { "epoch": 80.76, "learning_rate": 9.232362459546927e-06, "loss": 0.0869, "step": 207950 }, { "epoch": 80.76, "learning_rate": 9.231844660194176e-06, "loss": 0.0092, "step": 207960 }, { "epoch": 80.77, "learning_rate": 9.231326860841424e-06, "loss": 0.0391, "step": 207970 }, { "epoch": 80.77, "learning_rate": 9.230809061488674e-06, "loss": 0.0067, "step": 207980 }, { "epoch": 80.77, "learning_rate": 9.230291262135924e-06, "loss": 0.0004, "step": 207990 }, { "epoch": 80.78, "learning_rate": 9.229773462783173e-06, "loss": 0.0276, "step": 208000 }, { "epoch": 80.78, "learning_rate": 9.229255663430423e-06, "loss": 0.0314, "step": 208010 }, { "epoch": 80.78, "learning_rate": 9.22873786407767e-06, "loss": 0.0753, "step": 208020 }, { "epoch": 80.79, "learning_rate": 9.22822006472492e-06, "loss": 0.1325, "step": 208030 }, { "epoch": 80.79, "learning_rate": 9.22770226537217e-06, "loss": 0.1429, "step": 208040 }, { "epoch": 80.8, "learning_rate": 9.227184466019418e-06, "loss": 0.0194, "step": 208050 }, { "epoch": 80.8, "learning_rate": 9.226666666666668e-06, "loss": 0.1394, "step": 208060 }, { "epoch": 80.8, "learning_rate": 9.226148867313917e-06, "loss": 0.044, "step": 208070 }, { "epoch": 80.81, "learning_rate": 9.225631067961167e-06, "loss": 0.0592, "step": 208080 }, { "epoch": 80.81, "learning_rate": 9.225113268608415e-06, "loss": 0.1318, "step": 208090 }, { "epoch": 80.82, "learning_rate": 9.224595469255664e-06, "loss": 0.0746, "step": 208100 }, { "epoch": 80.82, "learning_rate": 9.224077669902914e-06, "loss": 0.012, "step": 208110 }, { "epoch": 80.82, "learning_rate": 9.223559870550164e-06, "loss": 0.0265, "step": 208120 }, { "epoch": 80.83, "learning_rate": 9.223042071197411e-06, "loss": 0.0002, "step": 208130 }, { "epoch": 80.83, "learning_rate": 9.222524271844661e-06, "loss": 0.0733, "step": 208140 }, { "epoch": 80.83, "learning_rate": 9.22200647249191e-06, "loss": 0.0761, "step": 208150 }, { "epoch": 80.84, "learning_rate": 9.22148867313916e-06, "loss": 0.0024, "step": 208160 }, { "epoch": 80.84, "learning_rate": 9.220970873786408e-06, "loss": 0.0169, "step": 208170 }, { "epoch": 80.85, "learning_rate": 9.220453074433658e-06, "loss": 0.0029, "step": 208180 }, { "epoch": 80.85, "learning_rate": 9.219935275080908e-06, "loss": 0.1233, "step": 208190 }, { "epoch": 80.85, "learning_rate": 9.219417475728155e-06, "loss": 0.0875, "step": 208200 }, { "epoch": 80.86, "learning_rate": 9.218899676375405e-06, "loss": 0.0253, "step": 208210 }, { "epoch": 80.86, "learning_rate": 9.218381877022655e-06, "loss": 0.0405, "step": 208220 }, { "epoch": 80.87, "learning_rate": 9.217864077669904e-06, "loss": 0.0819, "step": 208230 }, { "epoch": 80.87, "learning_rate": 9.217346278317152e-06, "loss": 0.0119, "step": 208240 }, { "epoch": 80.87, "learning_rate": 9.216828478964402e-06, "loss": 0.0319, "step": 208250 }, { "epoch": 80.88, "learning_rate": 9.216310679611651e-06, "loss": 0.1023, "step": 208260 }, { "epoch": 80.88, "learning_rate": 9.215792880258901e-06, "loss": 0.0343, "step": 208270 }, { "epoch": 80.89, "learning_rate": 9.215275080906149e-06, "loss": 0.0745, "step": 208280 }, { "epoch": 80.89, "learning_rate": 9.214757281553399e-06, "loss": 0.0082, "step": 208290 }, { "epoch": 80.89, "learning_rate": 9.214239482200648e-06, "loss": 0.092, "step": 208300 }, { "epoch": 80.9, "learning_rate": 9.213721682847898e-06, "loss": 0.0317, "step": 208310 }, { "epoch": 80.9, "learning_rate": 9.213203883495146e-06, "loss": 0.0504, "step": 208320 }, { "epoch": 80.9, "learning_rate": 9.212686084142395e-06, "loss": 0.0709, "step": 208330 }, { "epoch": 80.91, "learning_rate": 9.212168284789645e-06, "loss": 0.0015, "step": 208340 }, { "epoch": 80.91, "learning_rate": 9.211650485436895e-06, "loss": 0.048, "step": 208350 }, { "epoch": 80.92, "learning_rate": 9.211132686084143e-06, "loss": 0.0651, "step": 208360 }, { "epoch": 80.92, "learning_rate": 9.210614886731392e-06, "loss": 0.0243, "step": 208370 }, { "epoch": 80.92, "learning_rate": 9.210097087378642e-06, "loss": 0.0051, "step": 208380 }, { "epoch": 80.93, "learning_rate": 9.20957928802589e-06, "loss": 0.0253, "step": 208390 }, { "epoch": 80.93, "learning_rate": 9.20906148867314e-06, "loss": 0.0173, "step": 208400 }, { "epoch": 80.94, "learning_rate": 9.208543689320389e-06, "loss": 0.1477, "step": 208410 }, { "epoch": 80.94, "learning_rate": 9.208025889967639e-06, "loss": 0.0066, "step": 208420 }, { "epoch": 80.94, "learning_rate": 9.207508090614886e-06, "loss": 0.1031, "step": 208430 }, { "epoch": 80.95, "learning_rate": 9.206990291262136e-06, "loss": 0.1412, "step": 208440 }, { "epoch": 80.95, "learning_rate": 9.206472491909386e-06, "loss": 0.042, "step": 208450 }, { "epoch": 80.96, "learning_rate": 9.205954692556635e-06, "loss": 0.078, "step": 208460 }, { "epoch": 80.96, "learning_rate": 9.205436893203883e-06, "loss": 0.177, "step": 208470 }, { "epoch": 80.96, "learning_rate": 9.204919093851133e-06, "loss": 0.0672, "step": 208480 }, { "epoch": 80.97, "learning_rate": 9.204401294498382e-06, "loss": 0.0305, "step": 208490 }, { "epoch": 80.97, "learning_rate": 9.203883495145632e-06, "loss": 0.0087, "step": 208500 }, { "epoch": 80.97, "learning_rate": 9.20336569579288e-06, "loss": 0.0501, "step": 208510 }, { "epoch": 80.98, "learning_rate": 9.20284789644013e-06, "loss": 0.036, "step": 208520 }, { "epoch": 80.98, "learning_rate": 9.20233009708738e-06, "loss": 0.0439, "step": 208530 }, { "epoch": 80.99, "learning_rate": 9.201812297734627e-06, "loss": 0.0631, "step": 208540 }, { "epoch": 80.99, "learning_rate": 9.201294498381877e-06, "loss": 0.0006, "step": 208550 }, { "epoch": 80.99, "learning_rate": 9.200776699029126e-06, "loss": 0.2947, "step": 208560 }, { "epoch": 81.0, "learning_rate": 9.200258899676376e-06, "loss": 0.0388, "step": 208570 }, { "epoch": 81.0, "eval_accuracy": 0.9521320495185694, "eval_loss": 0.34262311458587646, "eval_runtime": 8.2561, "eval_samples_per_second": 440.282, "eval_steps_per_second": 55.111, "step": 208575 }, { "epoch": 81.0, "learning_rate": 9.199741100323626e-06, "loss": 0.0242, "step": 208580 }, { "epoch": 81.01, "learning_rate": 9.199223300970874e-06, "loss": 0.0981, "step": 208590 }, { "epoch": 81.01, "learning_rate": 9.198705501618123e-06, "loss": 0.0065, "step": 208600 }, { "epoch": 81.01, "learning_rate": 9.198187702265373e-06, "loss": 0.0776, "step": 208610 }, { "epoch": 81.02, "learning_rate": 9.197669902912622e-06, "loss": 0.0646, "step": 208620 }, { "epoch": 81.02, "learning_rate": 9.19715210355987e-06, "loss": 0.0574, "step": 208630 }, { "epoch": 81.03, "learning_rate": 9.19663430420712e-06, "loss": 0.1101, "step": 208640 }, { "epoch": 81.03, "learning_rate": 9.19611650485437e-06, "loss": 0.0355, "step": 208650 }, { "epoch": 81.03, "learning_rate": 9.19559870550162e-06, "loss": 0.0132, "step": 208660 }, { "epoch": 81.04, "learning_rate": 9.195080906148867e-06, "loss": 0.02, "step": 208670 }, { "epoch": 81.04, "learning_rate": 9.194563106796117e-06, "loss": 0.0384, "step": 208680 }, { "epoch": 81.04, "learning_rate": 9.194045307443366e-06, "loss": 0.0798, "step": 208690 }, { "epoch": 81.05, "learning_rate": 9.193527508090616e-06, "loss": 0.0205, "step": 208700 }, { "epoch": 81.05, "learning_rate": 9.193009708737864e-06, "loss": 0.0153, "step": 208710 }, { "epoch": 81.06, "learning_rate": 9.192491909385114e-06, "loss": 0.0209, "step": 208720 }, { "epoch": 81.06, "learning_rate": 9.191974110032363e-06, "loss": 0.1864, "step": 208730 }, { "epoch": 81.06, "learning_rate": 9.191456310679613e-06, "loss": 0.1199, "step": 208740 }, { "epoch": 81.07, "learning_rate": 9.19093851132686e-06, "loss": 0.0086, "step": 208750 }, { "epoch": 81.07, "learning_rate": 9.19042071197411e-06, "loss": 0.0301, "step": 208760 }, { "epoch": 81.08, "learning_rate": 9.18990291262136e-06, "loss": 0.0916, "step": 208770 }, { "epoch": 81.08, "learning_rate": 9.18938511326861e-06, "loss": 0.0906, "step": 208780 }, { "epoch": 81.08, "learning_rate": 9.188867313915857e-06, "loss": 0.1145, "step": 208790 }, { "epoch": 81.09, "learning_rate": 9.188349514563107e-06, "loss": 0.0149, "step": 208800 }, { "epoch": 81.09, "learning_rate": 9.187831715210357e-06, "loss": 0.0699, "step": 208810 }, { "epoch": 81.1, "learning_rate": 9.187313915857606e-06, "loss": 0.0238, "step": 208820 }, { "epoch": 81.1, "learning_rate": 9.186796116504854e-06, "loss": 0.0318, "step": 208830 }, { "epoch": 81.1, "learning_rate": 9.186278317152104e-06, "loss": 0.0179, "step": 208840 }, { "epoch": 81.11, "learning_rate": 9.185760517799353e-06, "loss": 0.1295, "step": 208850 }, { "epoch": 81.11, "learning_rate": 9.185242718446603e-06, "loss": 0.0097, "step": 208860 }, { "epoch": 81.11, "learning_rate": 9.184724919093851e-06, "loss": 0.0127, "step": 208870 }, { "epoch": 81.12, "learning_rate": 9.1842071197411e-06, "loss": 0.0056, "step": 208880 }, { "epoch": 81.12, "learning_rate": 9.18368932038835e-06, "loss": 0.1129, "step": 208890 }, { "epoch": 81.13, "learning_rate": 9.1831715210356e-06, "loss": 0.0577, "step": 208900 }, { "epoch": 81.13, "learning_rate": 9.182653721682848e-06, "loss": 0.1776, "step": 208910 }, { "epoch": 81.13, "learning_rate": 9.182135922330097e-06, "loss": 0.0275, "step": 208920 }, { "epoch": 81.14, "learning_rate": 9.181618122977347e-06, "loss": 0.0813, "step": 208930 }, { "epoch": 81.14, "learning_rate": 9.181100323624597e-06, "loss": 0.0009, "step": 208940 }, { "epoch": 81.15, "learning_rate": 9.180582524271845e-06, "loss": 0.0358, "step": 208950 }, { "epoch": 81.15, "learning_rate": 9.180064724919094e-06, "loss": 0.0269, "step": 208960 }, { "epoch": 81.15, "learning_rate": 9.179546925566344e-06, "loss": 0.1834, "step": 208970 }, { "epoch": 81.16, "learning_rate": 9.179029126213593e-06, "loss": 0.0799, "step": 208980 }, { "epoch": 81.16, "learning_rate": 9.178511326860841e-06, "loss": 0.0396, "step": 208990 }, { "epoch": 81.17, "learning_rate": 9.177993527508091e-06, "loss": 0.0719, "step": 209000 }, { "epoch": 81.17, "learning_rate": 9.17747572815534e-06, "loss": 0.0841, "step": 209010 }, { "epoch": 81.17, "learning_rate": 9.17695792880259e-06, "loss": 0.1213, "step": 209020 }, { "epoch": 81.18, "learning_rate": 9.176440129449838e-06, "loss": 0.1153, "step": 209030 }, { "epoch": 81.18, "learning_rate": 9.175922330097088e-06, "loss": 0.1099, "step": 209040 }, { "epoch": 81.18, "learning_rate": 9.175404530744337e-06, "loss": 0.0229, "step": 209050 }, { "epoch": 81.19, "learning_rate": 9.174886731391587e-06, "loss": 0.0355, "step": 209060 }, { "epoch": 81.19, "learning_rate": 9.174368932038835e-06, "loss": 0.1444, "step": 209070 }, { "epoch": 81.2, "learning_rate": 9.173851132686085e-06, "loss": 0.0092, "step": 209080 }, { "epoch": 81.2, "learning_rate": 9.173333333333334e-06, "loss": 0.1225, "step": 209090 }, { "epoch": 81.2, "learning_rate": 9.172815533980584e-06, "loss": 0.0139, "step": 209100 }, { "epoch": 81.21, "learning_rate": 9.172297734627833e-06, "loss": 0.0443, "step": 209110 }, { "epoch": 81.21, "learning_rate": 9.171779935275081e-06, "loss": 0.0218, "step": 209120 }, { "epoch": 81.22, "learning_rate": 9.171262135922331e-06, "loss": 0.0003, "step": 209130 }, { "epoch": 81.22, "learning_rate": 9.17074433656958e-06, "loss": 0.0958, "step": 209140 }, { "epoch": 81.22, "learning_rate": 9.17022653721683e-06, "loss": 0.0502, "step": 209150 }, { "epoch": 81.23, "learning_rate": 9.169708737864078e-06, "loss": 0.0535, "step": 209160 }, { "epoch": 81.23, "learning_rate": 9.169190938511328e-06, "loss": 0.0429, "step": 209170 }, { "epoch": 81.23, "learning_rate": 9.168673139158577e-06, "loss": 0.0937, "step": 209180 }, { "epoch": 81.24, "learning_rate": 9.168155339805827e-06, "loss": 0.1716, "step": 209190 }, { "epoch": 81.24, "learning_rate": 9.167637540453075e-06, "loss": 0.0284, "step": 209200 }, { "epoch": 81.25, "learning_rate": 9.167119741100324e-06, "loss": 0.1401, "step": 209210 }, { "epoch": 81.25, "learning_rate": 9.166601941747574e-06, "loss": 0.0167, "step": 209220 }, { "epoch": 81.25, "learning_rate": 9.166084142394824e-06, "loss": 0.0184, "step": 209230 }, { "epoch": 81.26, "learning_rate": 9.165566343042072e-06, "loss": 0.0867, "step": 209240 }, { "epoch": 81.26, "learning_rate": 9.165048543689321e-06, "loss": 0.1053, "step": 209250 }, { "epoch": 81.27, "learning_rate": 9.164530744336571e-06, "loss": 0.0308, "step": 209260 }, { "epoch": 81.27, "learning_rate": 9.16401294498382e-06, "loss": 0.0209, "step": 209270 }, { "epoch": 81.27, "learning_rate": 9.163495145631068e-06, "loss": 0.1603, "step": 209280 }, { "epoch": 81.28, "learning_rate": 9.162977346278318e-06, "loss": 0.0241, "step": 209290 }, { "epoch": 81.28, "learning_rate": 9.162459546925568e-06, "loss": 0.0839, "step": 209300 }, { "epoch": 81.29, "learning_rate": 9.161941747572817e-06, "loss": 0.04, "step": 209310 }, { "epoch": 81.29, "learning_rate": 9.161423948220065e-06, "loss": 0.0767, "step": 209320 }, { "epoch": 81.29, "learning_rate": 9.160906148867315e-06, "loss": 0.0016, "step": 209330 }, { "epoch": 81.3, "learning_rate": 9.160388349514564e-06, "loss": 0.0458, "step": 209340 }, { "epoch": 81.3, "learning_rate": 9.159870550161814e-06, "loss": 0.0231, "step": 209350 }, { "epoch": 81.3, "learning_rate": 9.159352750809062e-06, "loss": 0.1401, "step": 209360 }, { "epoch": 81.31, "learning_rate": 9.158834951456312e-06, "loss": 0.047, "step": 209370 }, { "epoch": 81.31, "learning_rate": 9.158317152103561e-06, "loss": 0.0217, "step": 209380 }, { "epoch": 81.32, "learning_rate": 9.15779935275081e-06, "loss": 0.024, "step": 209390 }, { "epoch": 81.32, "learning_rate": 9.157281553398059e-06, "loss": 0.1227, "step": 209400 }, { "epoch": 81.32, "learning_rate": 9.156763754045308e-06, "loss": 0.0896, "step": 209410 }, { "epoch": 81.33, "learning_rate": 9.156245954692558e-06, "loss": 0.0342, "step": 209420 }, { "epoch": 81.33, "learning_rate": 9.155728155339808e-06, "loss": 0.0993, "step": 209430 }, { "epoch": 81.34, "learning_rate": 9.155210355987056e-06, "loss": 0.0824, "step": 209440 }, { "epoch": 81.34, "learning_rate": 9.154692556634305e-06, "loss": 0.0675, "step": 209450 }, { "epoch": 81.34, "learning_rate": 9.154174757281555e-06, "loss": 0.0412, "step": 209460 }, { "epoch": 81.35, "learning_rate": 9.153656957928804e-06, "loss": 0.0137, "step": 209470 }, { "epoch": 81.35, "learning_rate": 9.153139158576052e-06, "loss": 0.089, "step": 209480 }, { "epoch": 81.36, "learning_rate": 9.152621359223302e-06, "loss": 0.0012, "step": 209490 }, { "epoch": 81.36, "learning_rate": 9.152103559870552e-06, "loss": 0.0309, "step": 209500 }, { "epoch": 81.36, "learning_rate": 9.151585760517801e-06, "loss": 0.0457, "step": 209510 }, { "epoch": 81.37, "learning_rate": 9.151067961165049e-06, "loss": 0.047, "step": 209520 }, { "epoch": 81.37, "learning_rate": 9.150550161812299e-06, "loss": 0.1341, "step": 209530 }, { "epoch": 81.37, "learning_rate": 9.150032362459548e-06, "loss": 0.0799, "step": 209540 }, { "epoch": 81.38, "learning_rate": 9.149514563106798e-06, "loss": 0.0378, "step": 209550 }, { "epoch": 81.38, "learning_rate": 9.148996763754046e-06, "loss": 0.138, "step": 209560 }, { "epoch": 81.39, "learning_rate": 9.148478964401295e-06, "loss": 0.0597, "step": 209570 }, { "epoch": 81.39, "learning_rate": 9.147961165048545e-06, "loss": 0.0434, "step": 209580 }, { "epoch": 81.39, "learning_rate": 9.147443365695795e-06, "loss": 0.0991, "step": 209590 }, { "epoch": 81.4, "learning_rate": 9.146925566343043e-06, "loss": 0.0164, "step": 209600 }, { "epoch": 81.4, "learning_rate": 9.146407766990292e-06, "loss": 0.0872, "step": 209610 }, { "epoch": 81.41, "learning_rate": 9.145889967637542e-06, "loss": 0.0789, "step": 209620 }, { "epoch": 81.41, "learning_rate": 9.145372168284792e-06, "loss": 0.1037, "step": 209630 }, { "epoch": 81.41, "learning_rate": 9.14485436893204e-06, "loss": 0.0906, "step": 209640 }, { "epoch": 81.42, "learning_rate": 9.144336569579289e-06, "loss": 0.03, "step": 209650 }, { "epoch": 81.42, "learning_rate": 9.143818770226539e-06, "loss": 0.0242, "step": 209660 }, { "epoch": 81.43, "learning_rate": 9.143300970873787e-06, "loss": 0.0235, "step": 209670 }, { "epoch": 81.43, "learning_rate": 9.142783171521036e-06, "loss": 0.047, "step": 209680 }, { "epoch": 81.43, "learning_rate": 9.142265372168286e-06, "loss": 0.215, "step": 209690 }, { "epoch": 81.44, "learning_rate": 9.141747572815535e-06, "loss": 0.0194, "step": 209700 }, { "epoch": 81.44, "learning_rate": 9.141229773462783e-06, "loss": 0.0436, "step": 209710 }, { "epoch": 81.44, "learning_rate": 9.140711974110033e-06, "loss": 0.1308, "step": 209720 }, { "epoch": 81.45, "learning_rate": 9.140194174757283e-06, "loss": 0.0424, "step": 209730 }, { "epoch": 81.45, "learning_rate": 9.139676375404532e-06, "loss": 0.0585, "step": 209740 }, { "epoch": 81.46, "learning_rate": 9.13915857605178e-06, "loss": 0.028, "step": 209750 }, { "epoch": 81.46, "learning_rate": 9.13864077669903e-06, "loss": 0.0008, "step": 209760 }, { "epoch": 81.46, "learning_rate": 9.13812297734628e-06, "loss": 0.1022, "step": 209770 }, { "epoch": 81.47, "learning_rate": 9.137605177993529e-06, "loss": 0.0421, "step": 209780 }, { "epoch": 81.47, "learning_rate": 9.137087378640777e-06, "loss": 0.165, "step": 209790 }, { "epoch": 81.48, "learning_rate": 9.136569579288027e-06, "loss": 0.1437, "step": 209800 }, { "epoch": 81.48, "learning_rate": 9.136051779935276e-06, "loss": 0.0365, "step": 209810 }, { "epoch": 81.48, "learning_rate": 9.135533980582526e-06, "loss": 0.0082, "step": 209820 }, { "epoch": 81.49, "learning_rate": 9.135016181229774e-06, "loss": 0.0216, "step": 209830 }, { "epoch": 81.49, "learning_rate": 9.134498381877023e-06, "loss": 0.0339, "step": 209840 }, { "epoch": 81.5, "learning_rate": 9.133980582524273e-06, "loss": 0.0881, "step": 209850 }, { "epoch": 81.5, "learning_rate": 9.133462783171521e-06, "loss": 0.012, "step": 209860 }, { "epoch": 81.5, "learning_rate": 9.13294498381877e-06, "loss": 0.0115, "step": 209870 }, { "epoch": 81.51, "learning_rate": 9.13242718446602e-06, "loss": 0.046, "step": 209880 }, { "epoch": 81.51, "learning_rate": 9.13190938511327e-06, "loss": 0.1129, "step": 209890 }, { "epoch": 81.51, "learning_rate": 9.131391585760518e-06, "loss": 0.1034, "step": 209900 }, { "epoch": 81.52, "learning_rate": 9.130873786407767e-06, "loss": 0.0152, "step": 209910 }, { "epoch": 81.52, "learning_rate": 9.130355987055017e-06, "loss": 0.0189, "step": 209920 }, { "epoch": 81.53, "learning_rate": 9.129838187702267e-06, "loss": 0.0115, "step": 209930 }, { "epoch": 81.53, "learning_rate": 9.129320388349514e-06, "loss": 0.0454, "step": 209940 }, { "epoch": 81.53, "learning_rate": 9.128802588996764e-06, "loss": 0.1421, "step": 209950 }, { "epoch": 81.54, "learning_rate": 9.128284789644014e-06, "loss": 0.0506, "step": 209960 }, { "epoch": 81.54, "learning_rate": 9.127766990291263e-06, "loss": 0.1097, "step": 209970 }, { "epoch": 81.55, "learning_rate": 9.127249190938511e-06, "loss": 0.0647, "step": 209980 }, { "epoch": 81.55, "learning_rate": 9.12673139158576e-06, "loss": 0.0255, "step": 209990 }, { "epoch": 81.55, "learning_rate": 9.12621359223301e-06, "loss": 0.0072, "step": 210000 }, { "epoch": 81.56, "learning_rate": 9.125695792880258e-06, "loss": 0.0136, "step": 210010 }, { "epoch": 81.56, "learning_rate": 9.125177993527508e-06, "loss": 0.024, "step": 210020 }, { "epoch": 81.57, "learning_rate": 9.124660194174758e-06, "loss": 0.108, "step": 210030 }, { "epoch": 81.57, "learning_rate": 9.124142394822007e-06, "loss": 0.0754, "step": 210040 }, { "epoch": 81.57, "learning_rate": 9.123624595469255e-06, "loss": 0.0357, "step": 210050 }, { "epoch": 81.58, "learning_rate": 9.123106796116505e-06, "loss": 0.0921, "step": 210060 }, { "epoch": 81.58, "learning_rate": 9.122588996763754e-06, "loss": 0.0465, "step": 210070 }, { "epoch": 81.58, "learning_rate": 9.122071197411004e-06, "loss": 0.0185, "step": 210080 }, { "epoch": 81.59, "learning_rate": 9.121553398058252e-06, "loss": 0.0019, "step": 210090 }, { "epoch": 81.59, "learning_rate": 9.121035598705502e-06, "loss": 0.0135, "step": 210100 }, { "epoch": 81.6, "learning_rate": 9.120517799352751e-06, "loss": 0.1091, "step": 210110 }, { "epoch": 81.6, "learning_rate": 9.12e-06, "loss": 0.0403, "step": 210120 }, { "epoch": 81.6, "learning_rate": 9.119482200647249e-06, "loss": 0.005, "step": 210130 }, { "epoch": 81.61, "learning_rate": 9.118964401294498e-06, "loss": 0.0959, "step": 210140 }, { "epoch": 81.61, "learning_rate": 9.118446601941748e-06, "loss": 0.0873, "step": 210150 }, { "epoch": 81.62, "learning_rate": 9.117928802588998e-06, "loss": 0.087, "step": 210160 }, { "epoch": 81.62, "learning_rate": 9.117411003236245e-06, "loss": 0.1089, "step": 210170 }, { "epoch": 81.62, "learning_rate": 9.116893203883495e-06, "loss": 0.0338, "step": 210180 }, { "epoch": 81.63, "learning_rate": 9.116375404530745e-06, "loss": 0.0353, "step": 210190 }, { "epoch": 81.63, "learning_rate": 9.115857605177994e-06, "loss": 0.0346, "step": 210200 }, { "epoch": 81.63, "learning_rate": 9.115339805825242e-06, "loss": 0.0011, "step": 210210 }, { "epoch": 81.64, "learning_rate": 9.114822006472492e-06, "loss": 0.0371, "step": 210220 }, { "epoch": 81.64, "learning_rate": 9.114304207119741e-06, "loss": 0.0602, "step": 210230 }, { "epoch": 81.65, "learning_rate": 9.113786407766991e-06, "loss": 0.1011, "step": 210240 }, { "epoch": 81.65, "learning_rate": 9.11326860841424e-06, "loss": 0.0124, "step": 210250 }, { "epoch": 81.65, "learning_rate": 9.112750809061489e-06, "loss": 0.0372, "step": 210260 }, { "epoch": 81.66, "learning_rate": 9.112233009708738e-06, "loss": 0.01, "step": 210270 }, { "epoch": 81.66, "learning_rate": 9.111715210355988e-06, "loss": 0.0043, "step": 210280 }, { "epoch": 81.67, "learning_rate": 9.111197411003238e-06, "loss": 0.0772, "step": 210290 }, { "epoch": 81.67, "learning_rate": 9.110679611650485e-06, "loss": 0.1096, "step": 210300 }, { "epoch": 81.67, "learning_rate": 9.110161812297735e-06, "loss": 0.079, "step": 210310 }, { "epoch": 81.68, "learning_rate": 9.109644012944985e-06, "loss": 0.0135, "step": 210320 }, { "epoch": 81.68, "learning_rate": 9.109126213592234e-06, "loss": 0.0474, "step": 210330 }, { "epoch": 81.69, "learning_rate": 9.108608414239482e-06, "loss": 0.0581, "step": 210340 }, { "epoch": 81.69, "learning_rate": 9.108090614886732e-06, "loss": 0.0645, "step": 210350 }, { "epoch": 81.69, "learning_rate": 9.107572815533981e-06, "loss": 0.0177, "step": 210360 }, { "epoch": 81.7, "learning_rate": 9.107055016181231e-06, "loss": 0.1155, "step": 210370 }, { "epoch": 81.7, "learning_rate": 9.106537216828479e-06, "loss": 0.0374, "step": 210380 }, { "epoch": 81.7, "learning_rate": 9.106019417475729e-06, "loss": 0.0263, "step": 210390 }, { "epoch": 81.71, "learning_rate": 9.105501618122978e-06, "loss": 0.0069, "step": 210400 }, { "epoch": 81.71, "learning_rate": 9.104983818770228e-06, "loss": 0.0522, "step": 210410 }, { "epoch": 81.72, "learning_rate": 9.104466019417476e-06, "loss": 0.0276, "step": 210420 }, { "epoch": 81.72, "learning_rate": 9.103948220064725e-06, "loss": 0.0218, "step": 210430 }, { "epoch": 81.72, "learning_rate": 9.103430420711975e-06, "loss": 0.0411, "step": 210440 }, { "epoch": 81.73, "learning_rate": 9.102912621359225e-06, "loss": 0.002, "step": 210450 }, { "epoch": 81.73, "learning_rate": 9.102394822006473e-06, "loss": 0.1061, "step": 210460 }, { "epoch": 81.74, "learning_rate": 9.101877022653722e-06, "loss": 0.146, "step": 210470 }, { "epoch": 81.74, "learning_rate": 9.101359223300972e-06, "loss": 0.1651, "step": 210480 }, { "epoch": 81.74, "learning_rate": 9.100841423948221e-06, "loss": 0.1358, "step": 210490 }, { "epoch": 81.75, "learning_rate": 9.10032362459547e-06, "loss": 0.1671, "step": 210500 }, { "epoch": 81.75, "learning_rate": 9.099805825242719e-06, "loss": 0.1055, "step": 210510 }, { "epoch": 81.76, "learning_rate": 9.099288025889969e-06, "loss": 0.0268, "step": 210520 }, { "epoch": 81.76, "learning_rate": 9.098770226537218e-06, "loss": 0.0252, "step": 210530 }, { "epoch": 81.76, "learning_rate": 9.098252427184466e-06, "loss": 0.0029, "step": 210540 }, { "epoch": 81.77, "learning_rate": 9.097734627831716e-06, "loss": 0.0056, "step": 210550 }, { "epoch": 81.77, "learning_rate": 9.097216828478965e-06, "loss": 0.1349, "step": 210560 }, { "epoch": 81.77, "learning_rate": 9.096699029126215e-06, "loss": 0.1305, "step": 210570 }, { "epoch": 81.78, "learning_rate": 9.096181229773463e-06, "loss": 0.1104, "step": 210580 }, { "epoch": 81.78, "learning_rate": 9.095663430420712e-06, "loss": 0.1126, "step": 210590 }, { "epoch": 81.79, "learning_rate": 9.095145631067962e-06, "loss": 0.0817, "step": 210600 }, { "epoch": 81.79, "learning_rate": 9.094627831715212e-06, "loss": 0.0354, "step": 210610 }, { "epoch": 81.79, "learning_rate": 9.09411003236246e-06, "loss": 0.0133, "step": 210620 }, { "epoch": 81.8, "learning_rate": 9.09359223300971e-06, "loss": 0.0544, "step": 210630 }, { "epoch": 81.8, "learning_rate": 9.093074433656959e-06, "loss": 0.0407, "step": 210640 }, { "epoch": 81.81, "learning_rate": 9.092556634304209e-06, "loss": 0.0703, "step": 210650 }, { "epoch": 81.81, "learning_rate": 9.092038834951456e-06, "loss": 0.0258, "step": 210660 }, { "epoch": 81.81, "learning_rate": 9.091521035598706e-06, "loss": 0.0516, "step": 210670 }, { "epoch": 81.82, "learning_rate": 9.091003236245956e-06, "loss": 0.0009, "step": 210680 }, { "epoch": 81.82, "learning_rate": 9.090485436893205e-06, "loss": 0.0928, "step": 210690 }, { "epoch": 81.83, "learning_rate": 9.089967637540453e-06, "loss": 0.3581, "step": 210700 }, { "epoch": 81.83, "learning_rate": 9.089449838187703e-06, "loss": 0.0727, "step": 210710 }, { "epoch": 81.83, "learning_rate": 9.088932038834952e-06, "loss": 0.0618, "step": 210720 }, { "epoch": 81.84, "learning_rate": 9.088414239482202e-06, "loss": 0.1099, "step": 210730 }, { "epoch": 81.84, "learning_rate": 9.08789644012945e-06, "loss": 0.1867, "step": 210740 }, { "epoch": 81.84, "learning_rate": 9.0873786407767e-06, "loss": 0.1348, "step": 210750 }, { "epoch": 81.85, "learning_rate": 9.08686084142395e-06, "loss": 0.0294, "step": 210760 }, { "epoch": 81.85, "learning_rate": 9.086343042071199e-06, "loss": 0.0264, "step": 210770 }, { "epoch": 81.86, "learning_rate": 9.085825242718447e-06, "loss": 0.1793, "step": 210780 }, { "epoch": 81.86, "learning_rate": 9.085307443365696e-06, "loss": 0.1212, "step": 210790 }, { "epoch": 81.86, "learning_rate": 9.084789644012946e-06, "loss": 0.005, "step": 210800 }, { "epoch": 81.87, "learning_rate": 9.084271844660196e-06, "loss": 0.0122, "step": 210810 }, { "epoch": 81.87, "learning_rate": 9.083754045307445e-06, "loss": 0.0244, "step": 210820 }, { "epoch": 81.88, "learning_rate": 9.083236245954693e-06, "loss": 0.1112, "step": 210830 }, { "epoch": 81.88, "learning_rate": 9.082718446601943e-06, "loss": 0.1243, "step": 210840 }, { "epoch": 81.88, "learning_rate": 9.082200647249192e-06, "loss": 0.0241, "step": 210850 }, { "epoch": 81.89, "learning_rate": 9.081682847896442e-06, "loss": 0.027, "step": 210860 }, { "epoch": 81.89, "learning_rate": 9.08116504854369e-06, "loss": 0.0661, "step": 210870 }, { "epoch": 81.9, "learning_rate": 9.08064724919094e-06, "loss": 0.1182, "step": 210880 }, { "epoch": 81.9, "learning_rate": 9.08012944983819e-06, "loss": 0.0814, "step": 210890 }, { "epoch": 81.9, "learning_rate": 9.079611650485439e-06, "loss": 0.0671, "step": 210900 }, { "epoch": 81.91, "learning_rate": 9.079093851132687e-06, "loss": 0.0003, "step": 210910 }, { "epoch": 81.91, "learning_rate": 9.078576051779936e-06, "loss": 0.2047, "step": 210920 }, { "epoch": 81.91, "learning_rate": 9.078058252427186e-06, "loss": 0.0569, "step": 210930 }, { "epoch": 81.92, "learning_rate": 9.077540453074436e-06, "loss": 0.0635, "step": 210940 }, { "epoch": 81.92, "learning_rate": 9.077022653721683e-06, "loss": 0.0896, "step": 210950 }, { "epoch": 81.93, "learning_rate": 9.076504854368933e-06, "loss": 0.2268, "step": 210960 }, { "epoch": 81.93, "learning_rate": 9.075987055016183e-06, "loss": 0.0624, "step": 210970 }, { "epoch": 81.93, "learning_rate": 9.075469255663432e-06, "loss": 0.0749, "step": 210980 }, { "epoch": 81.94, "learning_rate": 9.07495145631068e-06, "loss": 0.072, "step": 210990 }, { "epoch": 81.94, "learning_rate": 9.07443365695793e-06, "loss": 0.0435, "step": 211000 }, { "epoch": 81.95, "learning_rate": 9.07391585760518e-06, "loss": 0.032, "step": 211010 }, { "epoch": 81.95, "learning_rate": 9.073398058252429e-06, "loss": 0.0183, "step": 211020 }, { "epoch": 81.95, "learning_rate": 9.072880258899677e-06, "loss": 0.0316, "step": 211030 }, { "epoch": 81.96, "learning_rate": 9.072362459546927e-06, "loss": 0.0575, "step": 211040 }, { "epoch": 81.96, "learning_rate": 9.071844660194176e-06, "loss": 0.013, "step": 211050 }, { "epoch": 81.97, "learning_rate": 9.071326860841426e-06, "loss": 0.0757, "step": 211060 }, { "epoch": 81.97, "learning_rate": 9.070809061488674e-06, "loss": 0.0686, "step": 211070 }, { "epoch": 81.97, "learning_rate": 9.070291262135923e-06, "loss": 0.017, "step": 211080 }, { "epoch": 81.98, "learning_rate": 9.069773462783173e-06, "loss": 0.0461, "step": 211090 }, { "epoch": 81.98, "learning_rate": 9.069255663430423e-06, "loss": 0.0391, "step": 211100 }, { "epoch": 81.98, "learning_rate": 9.06873786407767e-06, "loss": 0.024, "step": 211110 }, { "epoch": 81.99, "learning_rate": 9.06822006472492e-06, "loss": 0.0004, "step": 211120 }, { "epoch": 81.99, "learning_rate": 9.06770226537217e-06, "loss": 0.0019, "step": 211130 }, { "epoch": 82.0, "learning_rate": 9.067184466019418e-06, "loss": 0.0783, "step": 211140 }, { "epoch": 82.0, "learning_rate": 9.066666666666667e-06, "loss": 0.0321, "step": 211150 }, { "epoch": 82.0, "eval_accuracy": 0.949656121045392, "eval_loss": 0.35550665855407715, "eval_runtime": 8.1719, "eval_samples_per_second": 444.817, "eval_steps_per_second": 55.679, "step": 211150 }, { "epoch": 82.0, "learning_rate": 9.066148867313917e-06, "loss": 0.0241, "step": 211160 }, { "epoch": 82.01, "learning_rate": 9.065631067961167e-06, "loss": 0.1045, "step": 211170 }, { "epoch": 82.01, "learning_rate": 9.065113268608415e-06, "loss": 0.0207, "step": 211180 }, { "epoch": 82.02, "learning_rate": 9.064595469255664e-06, "loss": 0.0713, "step": 211190 }, { "epoch": 82.02, "learning_rate": 9.064077669902914e-06, "loss": 0.0396, "step": 211200 }, { "epoch": 82.02, "learning_rate": 9.063559870550163e-06, "loss": 0.0562, "step": 211210 }, { "epoch": 82.03, "learning_rate": 9.063042071197411e-06, "loss": 0.0004, "step": 211220 }, { "epoch": 82.03, "learning_rate": 9.062524271844661e-06, "loss": 0.0576, "step": 211230 }, { "epoch": 82.03, "learning_rate": 9.06200647249191e-06, "loss": 0.0888, "step": 211240 }, { "epoch": 82.04, "learning_rate": 9.06148867313916e-06, "loss": 0.0095, "step": 211250 }, { "epoch": 82.04, "learning_rate": 9.060970873786408e-06, "loss": 0.0886, "step": 211260 }, { "epoch": 82.05, "learning_rate": 9.060453074433658e-06, "loss": 0.0436, "step": 211270 }, { "epoch": 82.05, "learning_rate": 9.059935275080907e-06, "loss": 0.0707, "step": 211280 }, { "epoch": 82.05, "learning_rate": 9.059417475728155e-06, "loss": 0.0105, "step": 211290 }, { "epoch": 82.06, "learning_rate": 9.058899676375405e-06, "loss": 0.0677, "step": 211300 }, { "epoch": 82.06, "learning_rate": 9.058381877022654e-06, "loss": 0.0201, "step": 211310 }, { "epoch": 82.07, "learning_rate": 9.057864077669904e-06, "loss": 0.0574, "step": 211320 }, { "epoch": 82.07, "learning_rate": 9.057346278317152e-06, "loss": 0.035, "step": 211330 }, { "epoch": 82.07, "learning_rate": 9.056828478964402e-06, "loss": 0.0047, "step": 211340 }, { "epoch": 82.08, "learning_rate": 9.056310679611651e-06, "loss": 0.0218, "step": 211350 }, { "epoch": 82.08, "learning_rate": 9.055792880258901e-06, "loss": 0.097, "step": 211360 }, { "epoch": 82.09, "learning_rate": 9.055275080906149e-06, "loss": 0.0028, "step": 211370 }, { "epoch": 82.09, "learning_rate": 9.054757281553398e-06, "loss": 0.0154, "step": 211380 }, { "epoch": 82.09, "learning_rate": 9.054239482200648e-06, "loss": 0.0207, "step": 211390 }, { "epoch": 82.1, "learning_rate": 9.053721682847898e-06, "loss": 0.0379, "step": 211400 }, { "epoch": 82.1, "learning_rate": 9.053203883495146e-06, "loss": 0.1271, "step": 211410 }, { "epoch": 82.1, "learning_rate": 9.052686084142395e-06, "loss": 0.0581, "step": 211420 }, { "epoch": 82.11, "learning_rate": 9.052168284789645e-06, "loss": 0.0021, "step": 211430 }, { "epoch": 82.11, "learning_rate": 9.051650485436894e-06, "loss": 0.053, "step": 211440 }, { "epoch": 82.12, "learning_rate": 9.051132686084142e-06, "loss": 0.1199, "step": 211450 }, { "epoch": 82.12, "learning_rate": 9.050614886731392e-06, "loss": 0.0897, "step": 211460 }, { "epoch": 82.12, "learning_rate": 9.050097087378642e-06, "loss": 0.0248, "step": 211470 }, { "epoch": 82.13, "learning_rate": 9.04957928802589e-06, "loss": 0.0002, "step": 211480 }, { "epoch": 82.13, "learning_rate": 9.049061488673139e-06, "loss": 0.011, "step": 211490 }, { "epoch": 82.14, "learning_rate": 9.048543689320389e-06, "loss": 0.0079, "step": 211500 }, { "epoch": 82.14, "learning_rate": 9.048025889967638e-06, "loss": 0.0261, "step": 211510 }, { "epoch": 82.14, "learning_rate": 9.047508090614886e-06, "loss": 0.0934, "step": 211520 }, { "epoch": 82.15, "learning_rate": 9.046990291262136e-06, "loss": 0.0397, "step": 211530 }, { "epoch": 82.15, "learning_rate": 9.046472491909386e-06, "loss": 0.1331, "step": 211540 }, { "epoch": 82.16, "learning_rate": 9.045954692556635e-06, "loss": 0.1355, "step": 211550 }, { "epoch": 82.16, "learning_rate": 9.045436893203883e-06, "loss": 0.0405, "step": 211560 }, { "epoch": 82.16, "learning_rate": 9.044919093851133e-06, "loss": 0.0278, "step": 211570 }, { "epoch": 82.17, "learning_rate": 9.044401294498382e-06, "loss": 0.0489, "step": 211580 }, { "epoch": 82.17, "learning_rate": 9.043883495145632e-06, "loss": 0.0309, "step": 211590 }, { "epoch": 82.17, "learning_rate": 9.04336569579288e-06, "loss": 0.0204, "step": 211600 }, { "epoch": 82.18, "learning_rate": 9.04284789644013e-06, "loss": 0.0465, "step": 211610 }, { "epoch": 82.18, "learning_rate": 9.042330097087379e-06, "loss": 0.0363, "step": 211620 }, { "epoch": 82.19, "learning_rate": 9.041812297734629e-06, "loss": 0.0163, "step": 211630 }, { "epoch": 82.19, "learning_rate": 9.041294498381877e-06, "loss": 0.0312, "step": 211640 }, { "epoch": 82.19, "learning_rate": 9.040776699029126e-06, "loss": 0.0157, "step": 211650 }, { "epoch": 82.2, "learning_rate": 9.040258899676376e-06, "loss": 0.1325, "step": 211660 }, { "epoch": 82.2, "learning_rate": 9.039741100323626e-06, "loss": 0.0054, "step": 211670 }, { "epoch": 82.21, "learning_rate": 9.039223300970873e-06, "loss": 0.0016, "step": 211680 }, { "epoch": 82.21, "learning_rate": 9.038705501618123e-06, "loss": 0.1406, "step": 211690 }, { "epoch": 82.21, "learning_rate": 9.038187702265373e-06, "loss": 0.0277, "step": 211700 }, { "epoch": 82.22, "learning_rate": 9.037669902912622e-06, "loss": 0.0084, "step": 211710 }, { "epoch": 82.22, "learning_rate": 9.03715210355987e-06, "loss": 0.2167, "step": 211720 }, { "epoch": 82.23, "learning_rate": 9.03663430420712e-06, "loss": 0.0309, "step": 211730 }, { "epoch": 82.23, "learning_rate": 9.03611650485437e-06, "loss": 0.0438, "step": 211740 }, { "epoch": 82.23, "learning_rate": 9.035598705501619e-06, "loss": 0.0599, "step": 211750 }, { "epoch": 82.24, "learning_rate": 9.035080906148867e-06, "loss": 0.0358, "step": 211760 }, { "epoch": 82.24, "learning_rate": 9.034563106796117e-06, "loss": 0.113, "step": 211770 }, { "epoch": 82.24, "learning_rate": 9.034045307443366e-06, "loss": 0.1344, "step": 211780 }, { "epoch": 82.25, "learning_rate": 9.033527508090616e-06, "loss": 0.0623, "step": 211790 }, { "epoch": 82.25, "learning_rate": 9.033009708737864e-06, "loss": 0.0137, "step": 211800 }, { "epoch": 82.26, "learning_rate": 9.032491909385113e-06, "loss": 0.1124, "step": 211810 }, { "epoch": 82.26, "learning_rate": 9.031974110032363e-06, "loss": 0.0806, "step": 211820 }, { "epoch": 82.26, "learning_rate": 9.031456310679613e-06, "loss": 0.1408, "step": 211830 }, { "epoch": 82.27, "learning_rate": 9.03093851132686e-06, "loss": 0.0017, "step": 211840 }, { "epoch": 82.27, "learning_rate": 9.03042071197411e-06, "loss": 0.0675, "step": 211850 }, { "epoch": 82.28, "learning_rate": 9.02990291262136e-06, "loss": 0.1094, "step": 211860 }, { "epoch": 82.28, "learning_rate": 9.02938511326861e-06, "loss": 0.0353, "step": 211870 }, { "epoch": 82.28, "learning_rate": 9.028867313915857e-06, "loss": 0.0652, "step": 211880 }, { "epoch": 82.29, "learning_rate": 9.028349514563107e-06, "loss": 0.028, "step": 211890 }, { "epoch": 82.29, "learning_rate": 9.027831715210357e-06, "loss": 0.0708, "step": 211900 }, { "epoch": 82.3, "learning_rate": 9.027313915857606e-06, "loss": 0.0203, "step": 211910 }, { "epoch": 82.3, "learning_rate": 9.026796116504856e-06, "loss": 0.0826, "step": 211920 }, { "epoch": 82.3, "learning_rate": 9.026278317152104e-06, "loss": 0.0273, "step": 211930 }, { "epoch": 82.31, "learning_rate": 9.025760517799353e-06, "loss": 0.1086, "step": 211940 }, { "epoch": 82.31, "learning_rate": 9.025242718446603e-06, "loss": 0.0659, "step": 211950 }, { "epoch": 82.31, "learning_rate": 9.024724919093853e-06, "loss": 0.178, "step": 211960 }, { "epoch": 82.32, "learning_rate": 9.0242071197411e-06, "loss": 0.1838, "step": 211970 }, { "epoch": 82.32, "learning_rate": 9.02368932038835e-06, "loss": 0.0267, "step": 211980 }, { "epoch": 82.33, "learning_rate": 9.0231715210356e-06, "loss": 0.1512, "step": 211990 }, { "epoch": 82.33, "learning_rate": 9.02265372168285e-06, "loss": 0.0264, "step": 212000 }, { "epoch": 82.33, "learning_rate": 9.022135922330097e-06, "loss": 0.0568, "step": 212010 }, { "epoch": 82.34, "learning_rate": 9.021618122977347e-06, "loss": 0.0186, "step": 212020 }, { "epoch": 82.34, "learning_rate": 9.021100323624597e-06, "loss": 0.117, "step": 212030 }, { "epoch": 82.35, "learning_rate": 9.020582524271846e-06, "loss": 0.0709, "step": 212040 }, { "epoch": 82.35, "learning_rate": 9.020064724919094e-06, "loss": 0.0904, "step": 212050 }, { "epoch": 82.35, "learning_rate": 9.019546925566344e-06, "loss": 0.0468, "step": 212060 }, { "epoch": 82.36, "learning_rate": 9.019029126213593e-06, "loss": 0.0135, "step": 212070 }, { "epoch": 82.36, "learning_rate": 9.018511326860843e-06, "loss": 0.0223, "step": 212080 }, { "epoch": 82.37, "learning_rate": 9.01799352750809e-06, "loss": 0.0432, "step": 212090 }, { "epoch": 82.37, "learning_rate": 9.01747572815534e-06, "loss": 0.0593, "step": 212100 }, { "epoch": 82.37, "learning_rate": 9.01695792880259e-06, "loss": 0.1741, "step": 212110 }, { "epoch": 82.38, "learning_rate": 9.01644012944984e-06, "loss": 0.1354, "step": 212120 }, { "epoch": 82.38, "learning_rate": 9.015922330097088e-06, "loss": 0.0121, "step": 212130 }, { "epoch": 82.38, "learning_rate": 9.015404530744337e-06, "loss": 0.0837, "step": 212140 }, { "epoch": 82.39, "learning_rate": 9.014886731391587e-06, "loss": 0.0132, "step": 212150 }, { "epoch": 82.39, "learning_rate": 9.014368932038836e-06, "loss": 0.0204, "step": 212160 }, { "epoch": 82.4, "learning_rate": 9.013851132686084e-06, "loss": 0.0167, "step": 212170 }, { "epoch": 82.4, "learning_rate": 9.013333333333334e-06, "loss": 0.0189, "step": 212180 }, { "epoch": 82.4, "learning_rate": 9.012815533980584e-06, "loss": 0.1586, "step": 212190 }, { "epoch": 82.41, "learning_rate": 9.012297734627833e-06, "loss": 0.04, "step": 212200 }, { "epoch": 82.41, "learning_rate": 9.011779935275081e-06, "loss": 0.1638, "step": 212210 }, { "epoch": 82.42, "learning_rate": 9.01126213592233e-06, "loss": 0.0396, "step": 212220 }, { "epoch": 82.42, "learning_rate": 9.01074433656958e-06, "loss": 0.1615, "step": 212230 }, { "epoch": 82.42, "learning_rate": 9.01022653721683e-06, "loss": 0.0937, "step": 212240 }, { "epoch": 82.43, "learning_rate": 9.009708737864078e-06, "loss": 0.0645, "step": 212250 }, { "epoch": 82.43, "learning_rate": 9.009190938511328e-06, "loss": 0.001, "step": 212260 }, { "epoch": 82.43, "learning_rate": 9.008673139158577e-06, "loss": 0.0688, "step": 212270 }, { "epoch": 82.44, "learning_rate": 9.008155339805827e-06, "loss": 0.0239, "step": 212280 }, { "epoch": 82.44, "learning_rate": 9.007637540453075e-06, "loss": 0.0328, "step": 212290 }, { "epoch": 82.45, "learning_rate": 9.007119741100324e-06, "loss": 0.1198, "step": 212300 }, { "epoch": 82.45, "learning_rate": 9.006601941747574e-06, "loss": 0.0408, "step": 212310 }, { "epoch": 82.45, "learning_rate": 9.006084142394824e-06, "loss": 0.0184, "step": 212320 }, { "epoch": 82.46, "learning_rate": 9.005566343042071e-06, "loss": 0.0565, "step": 212330 }, { "epoch": 82.46, "learning_rate": 9.005048543689321e-06, "loss": 0.1639, "step": 212340 }, { "epoch": 82.47, "learning_rate": 9.00453074433657e-06, "loss": 0.0607, "step": 212350 }, { "epoch": 82.47, "learning_rate": 9.00401294498382e-06, "loss": 0.0212, "step": 212360 }, { "epoch": 82.47, "learning_rate": 9.003495145631068e-06, "loss": 0.0018, "step": 212370 }, { "epoch": 82.48, "learning_rate": 9.002977346278318e-06, "loss": 0.0323, "step": 212380 }, { "epoch": 82.48, "learning_rate": 9.002459546925568e-06, "loss": 0.0869, "step": 212390 }, { "epoch": 82.49, "learning_rate": 9.001941747572817e-06, "loss": 0.3047, "step": 212400 }, { "epoch": 82.49, "learning_rate": 9.001423948220065e-06, "loss": 0.0465, "step": 212410 }, { "epoch": 82.49, "learning_rate": 9.000906148867315e-06, "loss": 0.146, "step": 212420 }, { "epoch": 82.5, "learning_rate": 9.000388349514564e-06, "loss": 0.1513, "step": 212430 }, { "epoch": 82.5, "learning_rate": 8.999870550161814e-06, "loss": 0.0419, "step": 212440 }, { "epoch": 82.5, "learning_rate": 8.999352750809062e-06, "loss": 0.0806, "step": 212450 }, { "epoch": 82.51, "learning_rate": 8.998834951456311e-06, "loss": 0.0665, "step": 212460 }, { "epoch": 82.51, "learning_rate": 8.998317152103561e-06, "loss": 0.08, "step": 212470 }, { "epoch": 82.52, "learning_rate": 8.99779935275081e-06, "loss": 0.0198, "step": 212480 }, { "epoch": 82.52, "learning_rate": 8.99728155339806e-06, "loss": 0.0628, "step": 212490 }, { "epoch": 82.52, "learning_rate": 8.996763754045308e-06, "loss": 0.0426, "step": 212500 }, { "epoch": 82.53, "learning_rate": 8.996245954692558e-06, "loss": 0.0481, "step": 212510 }, { "epoch": 82.53, "learning_rate": 8.995728155339807e-06, "loss": 0.1077, "step": 212520 }, { "epoch": 82.54, "learning_rate": 8.995210355987057e-06, "loss": 0.0173, "step": 212530 }, { "epoch": 82.54, "learning_rate": 8.994692556634305e-06, "loss": 0.1043, "step": 212540 }, { "epoch": 82.54, "learning_rate": 8.994174757281555e-06, "loss": 0.0956, "step": 212550 }, { "epoch": 82.55, "learning_rate": 8.993656957928804e-06, "loss": 0.018, "step": 212560 }, { "epoch": 82.55, "learning_rate": 8.993139158576054e-06, "loss": 0.1036, "step": 212570 }, { "epoch": 82.56, "learning_rate": 8.992621359223302e-06, "loss": 0.0644, "step": 212580 }, { "epoch": 82.56, "learning_rate": 8.992103559870551e-06, "loss": 0.1074, "step": 212590 }, { "epoch": 82.56, "learning_rate": 8.991585760517801e-06, "loss": 0.0843, "step": 212600 }, { "epoch": 82.57, "learning_rate": 8.991067961165049e-06, "loss": 0.1225, "step": 212610 }, { "epoch": 82.57, "learning_rate": 8.990550161812299e-06, "loss": 0.1554, "step": 212620 }, { "epoch": 82.57, "learning_rate": 8.990032362459548e-06, "loss": 0.0372, "step": 212630 }, { "epoch": 82.58, "learning_rate": 8.989514563106798e-06, "loss": 0.078, "step": 212640 }, { "epoch": 82.58, "learning_rate": 8.988996763754046e-06, "loss": 0.0154, "step": 212650 }, { "epoch": 82.59, "learning_rate": 8.988478964401295e-06, "loss": 0.0019, "step": 212660 }, { "epoch": 82.59, "learning_rate": 8.987961165048545e-06, "loss": 0.157, "step": 212670 }, { "epoch": 82.59, "learning_rate": 8.987443365695795e-06, "loss": 0.0182, "step": 212680 }, { "epoch": 82.6, "learning_rate": 8.986925566343042e-06, "loss": 0.0191, "step": 212690 }, { "epoch": 82.6, "learning_rate": 8.986407766990292e-06, "loss": 0.103, "step": 212700 }, { "epoch": 82.61, "learning_rate": 8.985889967637542e-06, "loss": 0.0267, "step": 212710 }, { "epoch": 82.61, "learning_rate": 8.985372168284791e-06, "loss": 0.1026, "step": 212720 }, { "epoch": 82.61, "learning_rate": 8.98485436893204e-06, "loss": 0.0027, "step": 212730 }, { "epoch": 82.62, "learning_rate": 8.984336569579289e-06, "loss": 0.0478, "step": 212740 }, { "epoch": 82.62, "learning_rate": 8.983818770226539e-06, "loss": 0.0285, "step": 212750 }, { "epoch": 82.63, "learning_rate": 8.983300970873786e-06, "loss": 0.0001, "step": 212760 }, { "epoch": 82.63, "learning_rate": 8.982783171521036e-06, "loss": 0.0358, "step": 212770 }, { "epoch": 82.63, "learning_rate": 8.982265372168286e-06, "loss": 0.0189, "step": 212780 }, { "epoch": 82.64, "learning_rate": 8.981747572815535e-06, "loss": 0.2348, "step": 212790 }, { "epoch": 82.64, "learning_rate": 8.981229773462783e-06, "loss": 0.0319, "step": 212800 }, { "epoch": 82.64, "learning_rate": 8.980711974110033e-06, "loss": 0.2198, "step": 212810 }, { "epoch": 82.65, "learning_rate": 8.980194174757282e-06, "loss": 0.0057, "step": 212820 }, { "epoch": 82.65, "learning_rate": 8.979676375404532e-06, "loss": 0.0712, "step": 212830 }, { "epoch": 82.66, "learning_rate": 8.97915857605178e-06, "loss": 0.0627, "step": 212840 }, { "epoch": 82.66, "learning_rate": 8.97864077669903e-06, "loss": 0.1685, "step": 212850 }, { "epoch": 82.66, "learning_rate": 8.97812297734628e-06, "loss": 0.0003, "step": 212860 }, { "epoch": 82.67, "learning_rate": 8.977605177993529e-06, "loss": 0.0006, "step": 212870 }, { "epoch": 82.67, "learning_rate": 8.977087378640777e-06, "loss": 0.009, "step": 212880 }, { "epoch": 82.68, "learning_rate": 8.976569579288026e-06, "loss": 0.0667, "step": 212890 }, { "epoch": 82.68, "learning_rate": 8.976051779935276e-06, "loss": 0.1025, "step": 212900 }, { "epoch": 82.68, "learning_rate": 8.975533980582526e-06, "loss": 0.0291, "step": 212910 }, { "epoch": 82.69, "learning_rate": 8.975016181229774e-06, "loss": 0.0093, "step": 212920 }, { "epoch": 82.69, "learning_rate": 8.974498381877023e-06, "loss": 0.0136, "step": 212930 }, { "epoch": 82.7, "learning_rate": 8.973980582524273e-06, "loss": 0.0856, "step": 212940 }, { "epoch": 82.7, "learning_rate": 8.97346278317152e-06, "loss": 0.0352, "step": 212950 }, { "epoch": 82.7, "learning_rate": 8.97294498381877e-06, "loss": 0.1215, "step": 212960 }, { "epoch": 82.71, "learning_rate": 8.97242718446602e-06, "loss": 0.0463, "step": 212970 }, { "epoch": 82.71, "learning_rate": 8.97190938511327e-06, "loss": 0.135, "step": 212980 }, { "epoch": 82.71, "learning_rate": 8.971391585760517e-06, "loss": 0.0673, "step": 212990 }, { "epoch": 82.72, "learning_rate": 8.970873786407767e-06, "loss": 0.0139, "step": 213000 }, { "epoch": 82.72, "learning_rate": 8.970355987055017e-06, "loss": 0.0096, "step": 213010 }, { "epoch": 82.73, "learning_rate": 8.969838187702266e-06, "loss": 0.0751, "step": 213020 }, { "epoch": 82.73, "learning_rate": 8.969320388349514e-06, "loss": 0.0044, "step": 213030 }, { "epoch": 82.73, "learning_rate": 8.968802588996764e-06, "loss": 0.0649, "step": 213040 }, { "epoch": 82.74, "learning_rate": 8.968284789644013e-06, "loss": 0.044, "step": 213050 }, { "epoch": 82.74, "learning_rate": 8.967766990291263e-06, "loss": 0.0372, "step": 213060 }, { "epoch": 82.75, "learning_rate": 8.967249190938511e-06, "loss": 0.0583, "step": 213070 }, { "epoch": 82.75, "learning_rate": 8.96673139158576e-06, "loss": 0.03, "step": 213080 }, { "epoch": 82.75, "learning_rate": 8.96621359223301e-06, "loss": 0.0181, "step": 213090 }, { "epoch": 82.76, "learning_rate": 8.96569579288026e-06, "loss": 0.0073, "step": 213100 }, { "epoch": 82.76, "learning_rate": 8.965177993527508e-06, "loss": 0.0627, "step": 213110 }, { "epoch": 82.77, "learning_rate": 8.964660194174757e-06, "loss": 0.0673, "step": 213120 }, { "epoch": 82.77, "learning_rate": 8.964142394822007e-06, "loss": 0.0934, "step": 213130 }, { "epoch": 82.77, "learning_rate": 8.963624595469257e-06, "loss": 0.1125, "step": 213140 }, { "epoch": 82.78, "learning_rate": 8.963106796116505e-06, "loss": 0.1144, "step": 213150 }, { "epoch": 82.78, "learning_rate": 8.962588996763754e-06, "loss": 0.0726, "step": 213160 }, { "epoch": 82.78, "learning_rate": 8.962071197411004e-06, "loss": 0.0017, "step": 213170 }, { "epoch": 82.79, "learning_rate": 8.961553398058253e-06, "loss": 0.0803, "step": 213180 }, { "epoch": 82.79, "learning_rate": 8.961035598705501e-06, "loss": 0.0041, "step": 213190 }, { "epoch": 82.8, "learning_rate": 8.960517799352751e-06, "loss": 0.0138, "step": 213200 }, { "epoch": 82.8, "learning_rate": 8.96e-06, "loss": 0.0677, "step": 213210 }, { "epoch": 82.8, "learning_rate": 8.95948220064725e-06, "loss": 0.0248, "step": 213220 }, { "epoch": 82.81, "learning_rate": 8.958964401294498e-06, "loss": 0.0842, "step": 213230 }, { "epoch": 82.81, "learning_rate": 8.958446601941748e-06, "loss": 0.1126, "step": 213240 }, { "epoch": 82.82, "learning_rate": 8.957928802588997e-06, "loss": 0.0559, "step": 213250 }, { "epoch": 82.82, "learning_rate": 8.957411003236247e-06, "loss": 0.0081, "step": 213260 }, { "epoch": 82.82, "learning_rate": 8.956893203883495e-06, "loss": 0.0808, "step": 213270 }, { "epoch": 82.83, "learning_rate": 8.956375404530745e-06, "loss": 0.0314, "step": 213280 }, { "epoch": 82.83, "learning_rate": 8.955857605177994e-06, "loss": 0.1403, "step": 213290 }, { "epoch": 82.83, "learning_rate": 8.955339805825244e-06, "loss": 0.0942, "step": 213300 }, { "epoch": 82.84, "learning_rate": 8.954822006472492e-06, "loss": 0.0063, "step": 213310 }, { "epoch": 82.84, "learning_rate": 8.954304207119741e-06, "loss": 0.0713, "step": 213320 }, { "epoch": 82.85, "learning_rate": 8.953786407766991e-06, "loss": 0.0682, "step": 213330 }, { "epoch": 82.85, "learning_rate": 8.95326860841424e-06, "loss": 0.0189, "step": 213340 }, { "epoch": 82.85, "learning_rate": 8.952750809061488e-06, "loss": 0.1195, "step": 213350 }, { "epoch": 82.86, "learning_rate": 8.952233009708738e-06, "loss": 0.0127, "step": 213360 }, { "epoch": 82.86, "learning_rate": 8.951715210355988e-06, "loss": 0.0124, "step": 213370 }, { "epoch": 82.87, "learning_rate": 8.951197411003237e-06, "loss": 0.0196, "step": 213380 }, { "epoch": 82.87, "learning_rate": 8.950679611650485e-06, "loss": 0.1042, "step": 213390 }, { "epoch": 82.87, "learning_rate": 8.950161812297735e-06, "loss": 0.0088, "step": 213400 }, { "epoch": 82.88, "learning_rate": 8.949644012944985e-06, "loss": 0.1113, "step": 213410 }, { "epoch": 82.88, "learning_rate": 8.949126213592234e-06, "loss": 0.1065, "step": 213420 }, { "epoch": 82.89, "learning_rate": 8.948608414239482e-06, "loss": 0.1004, "step": 213430 }, { "epoch": 82.89, "learning_rate": 8.948090614886732e-06, "loss": 0.0067, "step": 213440 }, { "epoch": 82.89, "learning_rate": 8.947572815533981e-06, "loss": 0.0155, "step": 213450 }, { "epoch": 82.9, "learning_rate": 8.947055016181231e-06, "loss": 0.0392, "step": 213460 }, { "epoch": 82.9, "learning_rate": 8.946537216828479e-06, "loss": 0.0231, "step": 213470 }, { "epoch": 82.9, "learning_rate": 8.946019417475728e-06, "loss": 0.0122, "step": 213480 }, { "epoch": 82.91, "learning_rate": 8.945501618122978e-06, "loss": 0.0395, "step": 213490 }, { "epoch": 82.91, "learning_rate": 8.944983818770228e-06, "loss": 0.1326, "step": 213500 }, { "epoch": 82.92, "learning_rate": 8.944466019417476e-06, "loss": 0.0062, "step": 213510 }, { "epoch": 82.92, "learning_rate": 8.943948220064725e-06, "loss": 0.0466, "step": 213520 }, { "epoch": 82.92, "learning_rate": 8.943430420711975e-06, "loss": 0.0094, "step": 213530 }, { "epoch": 82.93, "learning_rate": 8.942912621359224e-06, "loss": 0.0109, "step": 213540 }, { "epoch": 82.93, "learning_rate": 8.942394822006472e-06, "loss": 0.0495, "step": 213550 }, { "epoch": 82.94, "learning_rate": 8.941877022653722e-06, "loss": 0.0615, "step": 213560 }, { "epoch": 82.94, "learning_rate": 8.941359223300972e-06, "loss": 0.0085, "step": 213570 }, { "epoch": 82.94, "learning_rate": 8.940841423948221e-06, "loss": 0.1322, "step": 213580 }, { "epoch": 82.95, "learning_rate": 8.940323624595471e-06, "loss": 0.0167, "step": 213590 }, { "epoch": 82.95, "learning_rate": 8.939805825242719e-06, "loss": 0.022, "step": 213600 }, { "epoch": 82.96, "learning_rate": 8.939288025889968e-06, "loss": 0.0616, "step": 213610 }, { "epoch": 82.96, "learning_rate": 8.938770226537218e-06, "loss": 0.1776, "step": 213620 }, { "epoch": 82.96, "learning_rate": 8.938252427184468e-06, "loss": 0.0204, "step": 213630 }, { "epoch": 82.97, "learning_rate": 8.937734627831716e-06, "loss": 0.0776, "step": 213640 }, { "epoch": 82.97, "learning_rate": 8.937216828478965e-06, "loss": 0.101, "step": 213650 }, { "epoch": 82.97, "learning_rate": 8.936699029126215e-06, "loss": 0.0555, "step": 213660 }, { "epoch": 82.98, "learning_rate": 8.936181229773464e-06, "loss": 0.0359, "step": 213670 }, { "epoch": 82.98, "learning_rate": 8.935663430420712e-06, "loss": 0.0002, "step": 213680 }, { "epoch": 82.99, "learning_rate": 8.935145631067962e-06, "loss": 0.1289, "step": 213690 }, { "epoch": 82.99, "learning_rate": 8.934627831715212e-06, "loss": 0.0442, "step": 213700 }, { "epoch": 82.99, "learning_rate": 8.934110032362461e-06, "loss": 0.014, "step": 213710 }, { "epoch": 83.0, "learning_rate": 8.933592233009709e-06, "loss": 0.051, "step": 213720 }, { "epoch": 83.0, "eval_accuracy": 0.9504814305364512, "eval_loss": 0.3729594647884369, "eval_runtime": 8.1912, "eval_samples_per_second": 443.767, "eval_steps_per_second": 55.547, "step": 213725 }, { "epoch": 83.0, "learning_rate": 8.933074433656959e-06, "loss": 0.1098, "step": 213730 }, { "epoch": 83.01, "learning_rate": 8.932556634304208e-06, "loss": 0.0499, "step": 213740 }, { "epoch": 83.01, "learning_rate": 8.932038834951458e-06, "loss": 0.0066, "step": 213750 }, { "epoch": 83.01, "learning_rate": 8.931521035598706e-06, "loss": 0.0163, "step": 213760 }, { "epoch": 83.02, "learning_rate": 8.931003236245956e-06, "loss": 0.0374, "step": 213770 }, { "epoch": 83.02, "learning_rate": 8.930485436893205e-06, "loss": 0.0599, "step": 213780 }, { "epoch": 83.03, "learning_rate": 8.929967637540455e-06, "loss": 0.0071, "step": 213790 }, { "epoch": 83.03, "learning_rate": 8.929449838187703e-06, "loss": 0.0014, "step": 213800 }, { "epoch": 83.03, "learning_rate": 8.928932038834952e-06, "loss": 0.0833, "step": 213810 }, { "epoch": 83.04, "learning_rate": 8.928414239482202e-06, "loss": 0.0328, "step": 213820 }, { "epoch": 83.04, "learning_rate": 8.927896440129452e-06, "loss": 0.1229, "step": 213830 }, { "epoch": 83.04, "learning_rate": 8.9273786407767e-06, "loss": 0.0003, "step": 213840 }, { "epoch": 83.05, "learning_rate": 8.926860841423949e-06, "loss": 0.0003, "step": 213850 }, { "epoch": 83.05, "learning_rate": 8.926343042071199e-06, "loss": 0.0988, "step": 213860 }, { "epoch": 83.06, "learning_rate": 8.925825242718448e-06, "loss": 0.0204, "step": 213870 }, { "epoch": 83.06, "learning_rate": 8.925307443365696e-06, "loss": 0.009, "step": 213880 }, { "epoch": 83.06, "learning_rate": 8.924789644012946e-06, "loss": 0.022, "step": 213890 }, { "epoch": 83.07, "learning_rate": 8.924271844660195e-06, "loss": 0.0489, "step": 213900 }, { "epoch": 83.07, "learning_rate": 8.923754045307445e-06, "loss": 0.0012, "step": 213910 }, { "epoch": 83.08, "learning_rate": 8.923236245954693e-06, "loss": 0.0163, "step": 213920 }, { "epoch": 83.08, "learning_rate": 8.922718446601943e-06, "loss": 0.134, "step": 213930 }, { "epoch": 83.08, "learning_rate": 8.922200647249192e-06, "loss": 0.0222, "step": 213940 }, { "epoch": 83.09, "learning_rate": 8.921682847896442e-06, "loss": 0.0367, "step": 213950 }, { "epoch": 83.09, "learning_rate": 8.92116504854369e-06, "loss": 0.0072, "step": 213960 }, { "epoch": 83.1, "learning_rate": 8.92064724919094e-06, "loss": 0.0987, "step": 213970 }, { "epoch": 83.1, "learning_rate": 8.920129449838189e-06, "loss": 0.045, "step": 213980 }, { "epoch": 83.1, "learning_rate": 8.919611650485439e-06, "loss": 0.0079, "step": 213990 }, { "epoch": 83.11, "learning_rate": 8.919093851132687e-06, "loss": 0.0264, "step": 214000 }, { "epoch": 83.11, "learning_rate": 8.918576051779936e-06, "loss": 0.0023, "step": 214010 }, { "epoch": 83.11, "learning_rate": 8.918058252427186e-06, "loss": 0.0706, "step": 214020 }, { "epoch": 83.12, "learning_rate": 8.917540453074435e-06, "loss": 0.1012, "step": 214030 }, { "epoch": 83.12, "learning_rate": 8.917022653721683e-06, "loss": 0.4475, "step": 214040 }, { "epoch": 83.13, "learning_rate": 8.916504854368933e-06, "loss": 0.0534, "step": 214050 }, { "epoch": 83.13, "learning_rate": 8.915987055016183e-06, "loss": 0.0013, "step": 214060 }, { "epoch": 83.13, "learning_rate": 8.915469255663432e-06, "loss": 0.0358, "step": 214070 }, { "epoch": 83.14, "learning_rate": 8.91495145631068e-06, "loss": 0.1854, "step": 214080 }, { "epoch": 83.14, "learning_rate": 8.91443365695793e-06, "loss": 0.1059, "step": 214090 }, { "epoch": 83.15, "learning_rate": 8.91391585760518e-06, "loss": 0.0113, "step": 214100 }, { "epoch": 83.15, "learning_rate": 8.913398058252429e-06, "loss": 0.0003, "step": 214110 }, { "epoch": 83.15, "learning_rate": 8.912880258899677e-06, "loss": 0.0219, "step": 214120 }, { "epoch": 83.16, "learning_rate": 8.912362459546927e-06, "loss": 0.0044, "step": 214130 }, { "epoch": 83.16, "learning_rate": 8.911844660194176e-06, "loss": 0.0986, "step": 214140 }, { "epoch": 83.17, "learning_rate": 8.911326860841426e-06, "loss": 0.0152, "step": 214150 }, { "epoch": 83.17, "learning_rate": 8.910809061488674e-06, "loss": 0.156, "step": 214160 }, { "epoch": 83.17, "learning_rate": 8.910291262135923e-06, "loss": 0.0292, "step": 214170 }, { "epoch": 83.18, "learning_rate": 8.909773462783173e-06, "loss": 0.0572, "step": 214180 }, { "epoch": 83.18, "learning_rate": 8.909255663430423e-06, "loss": 0.1479, "step": 214190 }, { "epoch": 83.18, "learning_rate": 8.90873786407767e-06, "loss": 0.1086, "step": 214200 }, { "epoch": 83.19, "learning_rate": 8.90822006472492e-06, "loss": 0.1411, "step": 214210 }, { "epoch": 83.19, "learning_rate": 8.90770226537217e-06, "loss": 0.0868, "step": 214220 }, { "epoch": 83.2, "learning_rate": 8.907184466019418e-06, "loss": 0.0156, "step": 214230 }, { "epoch": 83.2, "learning_rate": 8.906666666666667e-06, "loss": 0.0269, "step": 214240 }, { "epoch": 83.2, "learning_rate": 8.906148867313917e-06, "loss": 0.0622, "step": 214250 }, { "epoch": 83.21, "learning_rate": 8.905631067961166e-06, "loss": 0.0629, "step": 214260 }, { "epoch": 83.21, "learning_rate": 8.905113268608414e-06, "loss": 0.0429, "step": 214270 }, { "epoch": 83.22, "learning_rate": 8.904595469255664e-06, "loss": 0.0393, "step": 214280 }, { "epoch": 83.22, "learning_rate": 8.904077669902914e-06, "loss": 0.0764, "step": 214290 }, { "epoch": 83.22, "learning_rate": 8.903559870550163e-06, "loss": 0.0189, "step": 214300 }, { "epoch": 83.23, "learning_rate": 8.903042071197411e-06, "loss": 0.0022, "step": 214310 }, { "epoch": 83.23, "learning_rate": 8.90252427184466e-06, "loss": 0.1649, "step": 214320 }, { "epoch": 83.23, "learning_rate": 8.90200647249191e-06, "loss": 0.0724, "step": 214330 }, { "epoch": 83.24, "learning_rate": 8.90148867313916e-06, "loss": 0.054, "step": 214340 }, { "epoch": 83.24, "learning_rate": 8.900970873786408e-06, "loss": 0.0529, "step": 214350 }, { "epoch": 83.25, "learning_rate": 8.900453074433658e-06, "loss": 0.0237, "step": 214360 }, { "epoch": 83.25, "learning_rate": 8.899935275080907e-06, "loss": 0.0179, "step": 214370 }, { "epoch": 83.25, "learning_rate": 8.899417475728157e-06, "loss": 0.0168, "step": 214380 }, { "epoch": 83.26, "learning_rate": 8.898899676375405e-06, "loss": 0.0198, "step": 214390 }, { "epoch": 83.26, "learning_rate": 8.898381877022654e-06, "loss": 0.0487, "step": 214400 }, { "epoch": 83.27, "learning_rate": 8.897864077669904e-06, "loss": 0.0765, "step": 214410 }, { "epoch": 83.27, "learning_rate": 8.897346278317152e-06, "loss": 0.0168, "step": 214420 }, { "epoch": 83.27, "learning_rate": 8.896828478964401e-06, "loss": 0.0656, "step": 214430 }, { "epoch": 83.28, "learning_rate": 8.896310679611651e-06, "loss": 0.0444, "step": 214440 }, { "epoch": 83.28, "learning_rate": 8.8957928802589e-06, "loss": 0.053, "step": 214450 }, { "epoch": 83.29, "learning_rate": 8.895275080906149e-06, "loss": 0.0092, "step": 214460 }, { "epoch": 83.29, "learning_rate": 8.894757281553398e-06, "loss": 0.1552, "step": 214470 }, { "epoch": 83.29, "learning_rate": 8.894239482200648e-06, "loss": 0.0823, "step": 214480 }, { "epoch": 83.3, "learning_rate": 8.893721682847898e-06, "loss": 0.0751, "step": 214490 }, { "epoch": 83.3, "learning_rate": 8.893203883495145e-06, "loss": 0.0404, "step": 214500 }, { "epoch": 83.3, "learning_rate": 8.892686084142395e-06, "loss": 0.0222, "step": 214510 }, { "epoch": 83.31, "learning_rate": 8.892168284789645e-06, "loss": 0.0298, "step": 214520 }, { "epoch": 83.31, "learning_rate": 8.891650485436894e-06, "loss": 0.0313, "step": 214530 }, { "epoch": 83.32, "learning_rate": 8.891132686084142e-06, "loss": 0.1718, "step": 214540 }, { "epoch": 83.32, "learning_rate": 8.890614886731392e-06, "loss": 0.1025, "step": 214550 }, { "epoch": 83.32, "learning_rate": 8.890097087378641e-06, "loss": 0.0083, "step": 214560 }, { "epoch": 83.33, "learning_rate": 8.88957928802589e-06, "loss": 0.0212, "step": 214570 }, { "epoch": 83.33, "learning_rate": 8.889061488673139e-06, "loss": 0.0219, "step": 214580 }, { "epoch": 83.34, "learning_rate": 8.888543689320389e-06, "loss": 0.0744, "step": 214590 }, { "epoch": 83.34, "learning_rate": 8.888025889967638e-06, "loss": 0.1091, "step": 214600 }, { "epoch": 83.34, "learning_rate": 8.887508090614886e-06, "loss": 0.0032, "step": 214610 }, { "epoch": 83.35, "learning_rate": 8.886990291262136e-06, "loss": 0.0749, "step": 214620 }, { "epoch": 83.35, "learning_rate": 8.886472491909385e-06, "loss": 0.0822, "step": 214630 }, { "epoch": 83.36, "learning_rate": 8.885954692556635e-06, "loss": 0.0545, "step": 214640 }, { "epoch": 83.36, "learning_rate": 8.885436893203883e-06, "loss": 0.0108, "step": 214650 }, { "epoch": 83.36, "learning_rate": 8.884919093851133e-06, "loss": 0.0044, "step": 214660 }, { "epoch": 83.37, "learning_rate": 8.884401294498382e-06, "loss": 0.0693, "step": 214670 }, { "epoch": 83.37, "learning_rate": 8.883883495145632e-06, "loss": 0.0358, "step": 214680 }, { "epoch": 83.37, "learning_rate": 8.88336569579288e-06, "loss": 0.0135, "step": 214690 }, { "epoch": 83.38, "learning_rate": 8.88284789644013e-06, "loss": 0.0189, "step": 214700 }, { "epoch": 83.38, "learning_rate": 8.882330097087379e-06, "loss": 0.0468, "step": 214710 }, { "epoch": 83.39, "learning_rate": 8.881812297734629e-06, "loss": 0.0198, "step": 214720 }, { "epoch": 83.39, "learning_rate": 8.881294498381878e-06, "loss": 0.0001, "step": 214730 }, { "epoch": 83.39, "learning_rate": 8.880776699029126e-06, "loss": 0.1825, "step": 214740 }, { "epoch": 83.4, "learning_rate": 8.880258899676376e-06, "loss": 0.2035, "step": 214750 }, { "epoch": 83.4, "learning_rate": 8.879741100323625e-06, "loss": 0.066, "step": 214760 }, { "epoch": 83.41, "learning_rate": 8.879223300970875e-06, "loss": 0.1557, "step": 214770 }, { "epoch": 83.41, "learning_rate": 8.878705501618123e-06, "loss": 0.0627, "step": 214780 }, { "epoch": 83.41, "learning_rate": 8.878187702265372e-06, "loss": 0.0228, "step": 214790 }, { "epoch": 83.42, "learning_rate": 8.877669902912622e-06, "loss": 0.0078, "step": 214800 }, { "epoch": 83.42, "learning_rate": 8.877152103559872e-06, "loss": 0.0004, "step": 214810 }, { "epoch": 83.43, "learning_rate": 8.87663430420712e-06, "loss": 0.1188, "step": 214820 }, { "epoch": 83.43, "learning_rate": 8.87611650485437e-06, "loss": 0.0374, "step": 214830 }, { "epoch": 83.43, "learning_rate": 8.875598705501619e-06, "loss": 0.0693, "step": 214840 }, { "epoch": 83.44, "learning_rate": 8.875080906148869e-06, "loss": 0.0133, "step": 214850 }, { "epoch": 83.44, "learning_rate": 8.874563106796116e-06, "loss": 0.0125, "step": 214860 }, { "epoch": 83.44, "learning_rate": 8.874045307443366e-06, "loss": 0.0244, "step": 214870 }, { "epoch": 83.45, "learning_rate": 8.873527508090616e-06, "loss": 0.1499, "step": 214880 }, { "epoch": 83.45, "learning_rate": 8.873009708737865e-06, "loss": 0.0141, "step": 214890 }, { "epoch": 83.46, "learning_rate": 8.872491909385113e-06, "loss": 0.0682, "step": 214900 }, { "epoch": 83.46, "learning_rate": 8.871974110032363e-06, "loss": 0.0065, "step": 214910 }, { "epoch": 83.46, "learning_rate": 8.871456310679612e-06, "loss": 0.0311, "step": 214920 }, { "epoch": 83.47, "learning_rate": 8.870938511326862e-06, "loss": 0.0119, "step": 214930 }, { "epoch": 83.47, "learning_rate": 8.87042071197411e-06, "loss": 0.0419, "step": 214940 }, { "epoch": 83.48, "learning_rate": 8.86990291262136e-06, "loss": 0.2019, "step": 214950 }, { "epoch": 83.48, "learning_rate": 8.86938511326861e-06, "loss": 0.1012, "step": 214960 }, { "epoch": 83.48, "learning_rate": 8.868867313915859e-06, "loss": 0.0468, "step": 214970 }, { "epoch": 83.49, "learning_rate": 8.868349514563107e-06, "loss": 0.0715, "step": 214980 }, { "epoch": 83.49, "learning_rate": 8.867831715210356e-06, "loss": 0.0452, "step": 214990 }, { "epoch": 83.5, "learning_rate": 8.867313915857606e-06, "loss": 0.1162, "step": 215000 }, { "epoch": 83.5, "learning_rate": 8.866796116504856e-06, "loss": 0.063, "step": 215010 }, { "epoch": 83.5, "learning_rate": 8.866278317152104e-06, "loss": 0.2283, "step": 215020 }, { "epoch": 83.51, "learning_rate": 8.865760517799353e-06, "loss": 0.0829, "step": 215030 }, { "epoch": 83.51, "learning_rate": 8.865242718446603e-06, "loss": 0.0742, "step": 215040 }, { "epoch": 83.51, "learning_rate": 8.864724919093852e-06, "loss": 0.0088, "step": 215050 }, { "epoch": 83.52, "learning_rate": 8.8642071197411e-06, "loss": 0.0195, "step": 215060 }, { "epoch": 83.52, "learning_rate": 8.86368932038835e-06, "loss": 0.03, "step": 215070 }, { "epoch": 83.53, "learning_rate": 8.8631715210356e-06, "loss": 0.0994, "step": 215080 }, { "epoch": 83.53, "learning_rate": 8.86265372168285e-06, "loss": 0.1239, "step": 215090 }, { "epoch": 83.53, "learning_rate": 8.862135922330097e-06, "loss": 0.0786, "step": 215100 }, { "epoch": 83.54, "learning_rate": 8.861618122977347e-06, "loss": 0.1017, "step": 215110 }, { "epoch": 83.54, "learning_rate": 8.861100323624596e-06, "loss": 0.0781, "step": 215120 }, { "epoch": 83.55, "learning_rate": 8.860582524271846e-06, "loss": 0.0346, "step": 215130 }, { "epoch": 83.55, "learning_rate": 8.860064724919094e-06, "loss": 0.122, "step": 215140 }, { "epoch": 83.55, "learning_rate": 8.859546925566344e-06, "loss": 0.0039, "step": 215150 }, { "epoch": 83.56, "learning_rate": 8.859029126213593e-06, "loss": 0.1319, "step": 215160 }, { "epoch": 83.56, "learning_rate": 8.858511326860843e-06, "loss": 0.016, "step": 215170 }, { "epoch": 83.57, "learning_rate": 8.85799352750809e-06, "loss": 0.0245, "step": 215180 }, { "epoch": 83.57, "learning_rate": 8.85747572815534e-06, "loss": 0.047, "step": 215190 }, { "epoch": 83.57, "learning_rate": 8.85695792880259e-06, "loss": 0.0337, "step": 215200 }, { "epoch": 83.58, "learning_rate": 8.85644012944984e-06, "loss": 0.1295, "step": 215210 }, { "epoch": 83.58, "learning_rate": 8.855922330097087e-06, "loss": 0.0565, "step": 215220 }, { "epoch": 83.58, "learning_rate": 8.855404530744337e-06, "loss": 0.0267, "step": 215230 }, { "epoch": 83.59, "learning_rate": 8.854886731391587e-06, "loss": 0.016, "step": 215240 }, { "epoch": 83.59, "learning_rate": 8.854368932038836e-06, "loss": 0.0772, "step": 215250 }, { "epoch": 83.6, "learning_rate": 8.853851132686084e-06, "loss": 0.0591, "step": 215260 }, { "epoch": 83.6, "learning_rate": 8.853333333333334e-06, "loss": 0.1231, "step": 215270 }, { "epoch": 83.6, "learning_rate": 8.852815533980583e-06, "loss": 0.0927, "step": 215280 }, { "epoch": 83.61, "learning_rate": 8.852297734627833e-06, "loss": 0.0002, "step": 215290 }, { "epoch": 83.61, "learning_rate": 8.851779935275083e-06, "loss": 0.1272, "step": 215300 }, { "epoch": 83.62, "learning_rate": 8.85126213592233e-06, "loss": 0.0886, "step": 215310 }, { "epoch": 83.62, "learning_rate": 8.85074433656958e-06, "loss": 0.0854, "step": 215320 }, { "epoch": 83.62, "learning_rate": 8.85022653721683e-06, "loss": 0.0002, "step": 215330 }, { "epoch": 83.63, "learning_rate": 8.84970873786408e-06, "loss": 0.0048, "step": 215340 }, { "epoch": 83.63, "learning_rate": 8.849190938511327e-06, "loss": 0.0356, "step": 215350 }, { "epoch": 83.63, "learning_rate": 8.848673139158577e-06, "loss": 0.0718, "step": 215360 }, { "epoch": 83.64, "learning_rate": 8.848155339805827e-06, "loss": 0.0477, "step": 215370 }, { "epoch": 83.64, "learning_rate": 8.847637540453076e-06, "loss": 0.0518, "step": 215380 }, { "epoch": 83.65, "learning_rate": 8.847119741100324e-06, "loss": 0.0438, "step": 215390 }, { "epoch": 83.65, "learning_rate": 8.846601941747574e-06, "loss": 0.0002, "step": 215400 }, { "epoch": 83.65, "learning_rate": 8.846084142394823e-06, "loss": 0.0879, "step": 215410 }, { "epoch": 83.66, "learning_rate": 8.845566343042073e-06, "loss": 0.0007, "step": 215420 }, { "epoch": 83.66, "learning_rate": 8.845048543689321e-06, "loss": 0.1372, "step": 215430 }, { "epoch": 83.67, "learning_rate": 8.84453074433657e-06, "loss": 0.119, "step": 215440 }, { "epoch": 83.67, "learning_rate": 8.84401294498382e-06, "loss": 0.0847, "step": 215450 }, { "epoch": 83.67, "learning_rate": 8.84349514563107e-06, "loss": 0.0223, "step": 215460 }, { "epoch": 83.68, "learning_rate": 8.842977346278318e-06, "loss": 0.0241, "step": 215470 }, { "epoch": 83.68, "learning_rate": 8.842459546925567e-06, "loss": 0.0065, "step": 215480 }, { "epoch": 83.69, "learning_rate": 8.841941747572817e-06, "loss": 0.0808, "step": 215490 }, { "epoch": 83.69, "learning_rate": 8.841423948220067e-06, "loss": 0.0267, "step": 215500 }, { "epoch": 83.69, "learning_rate": 8.840906148867315e-06, "loss": 0.011, "step": 215510 }, { "epoch": 83.7, "learning_rate": 8.840388349514564e-06, "loss": 0.0317, "step": 215520 }, { "epoch": 83.7, "learning_rate": 8.839870550161814e-06, "loss": 0.1552, "step": 215530 }, { "epoch": 83.7, "learning_rate": 8.839352750809063e-06, "loss": 0.0489, "step": 215540 }, { "epoch": 83.71, "learning_rate": 8.838834951456311e-06, "loss": 0.0951, "step": 215550 }, { "epoch": 83.71, "learning_rate": 8.838317152103561e-06, "loss": 0.114, "step": 215560 }, { "epoch": 83.72, "learning_rate": 8.83779935275081e-06, "loss": 0.0991, "step": 215570 }, { "epoch": 83.72, "learning_rate": 8.83728155339806e-06, "loss": 0.0182, "step": 215580 }, { "epoch": 83.72, "learning_rate": 8.836763754045308e-06, "loss": 0.0762, "step": 215590 }, { "epoch": 83.73, "learning_rate": 8.836245954692558e-06, "loss": 0.0306, "step": 215600 }, { "epoch": 83.73, "learning_rate": 8.835728155339807e-06, "loss": 0.095, "step": 215610 }, { "epoch": 83.74, "learning_rate": 8.835210355987057e-06, "loss": 0.0215, "step": 215620 }, { "epoch": 83.74, "learning_rate": 8.834692556634305e-06, "loss": 0.1118, "step": 215630 }, { "epoch": 83.74, "learning_rate": 8.834174757281554e-06, "loss": 0.0015, "step": 215640 }, { "epoch": 83.75, "learning_rate": 8.833656957928804e-06, "loss": 0.0333, "step": 215650 }, { "epoch": 83.75, "learning_rate": 8.833139158576054e-06, "loss": 0.1615, "step": 215660 }, { "epoch": 83.76, "learning_rate": 8.832621359223302e-06, "loss": 0.0735, "step": 215670 }, { "epoch": 83.76, "learning_rate": 8.832103559870551e-06, "loss": 0.0251, "step": 215680 }, { "epoch": 83.76, "learning_rate": 8.831585760517801e-06, "loss": 0.1706, "step": 215690 }, { "epoch": 83.77, "learning_rate": 8.831067961165049e-06, "loss": 0.0211, "step": 215700 }, { "epoch": 83.77, "learning_rate": 8.830550161812298e-06, "loss": 0.0278, "step": 215710 }, { "epoch": 83.77, "learning_rate": 8.830032362459548e-06, "loss": 0.0661, "step": 215720 }, { "epoch": 83.78, "learning_rate": 8.829514563106798e-06, "loss": 0.1933, "step": 215730 }, { "epoch": 83.78, "learning_rate": 8.828996763754046e-06, "loss": 0.0851, "step": 215740 }, { "epoch": 83.79, "learning_rate": 8.828478964401295e-06, "loss": 0.0726, "step": 215750 }, { "epoch": 83.79, "learning_rate": 8.827961165048545e-06, "loss": 0.0646, "step": 215760 }, { "epoch": 83.79, "learning_rate": 8.827443365695794e-06, "loss": 0.0011, "step": 215770 }, { "epoch": 83.8, "learning_rate": 8.826925566343042e-06, "loss": 0.0132, "step": 215780 }, { "epoch": 83.8, "learning_rate": 8.826407766990292e-06, "loss": 0.0184, "step": 215790 }, { "epoch": 83.81, "learning_rate": 8.825889967637542e-06, "loss": 0.0629, "step": 215800 }, { "epoch": 83.81, "learning_rate": 8.825372168284791e-06, "loss": 0.1478, "step": 215810 }, { "epoch": 83.81, "learning_rate": 8.824854368932039e-06, "loss": 0.0122, "step": 215820 }, { "epoch": 83.82, "learning_rate": 8.824336569579289e-06, "loss": 0.0446, "step": 215830 }, { "epoch": 83.82, "learning_rate": 8.823818770226538e-06, "loss": 0.0767, "step": 215840 }, { "epoch": 83.83, "learning_rate": 8.823300970873788e-06, "loss": 0.0982, "step": 215850 }, { "epoch": 83.83, "learning_rate": 8.822783171521036e-06, "loss": 0.0168, "step": 215860 }, { "epoch": 83.83, "learning_rate": 8.822265372168286e-06, "loss": 0.0118, "step": 215870 }, { "epoch": 83.84, "learning_rate": 8.821747572815535e-06, "loss": 0.0273, "step": 215880 }, { "epoch": 83.84, "learning_rate": 8.821229773462783e-06, "loss": 0.0015, "step": 215890 }, { "epoch": 83.84, "learning_rate": 8.820711974110033e-06, "loss": 0.0967, "step": 215900 }, { "epoch": 83.85, "learning_rate": 8.820194174757282e-06, "loss": 0.013, "step": 215910 }, { "epoch": 83.85, "learning_rate": 8.819676375404532e-06, "loss": 0.0319, "step": 215920 }, { "epoch": 83.86, "learning_rate": 8.81915857605178e-06, "loss": 0.0492, "step": 215930 }, { "epoch": 83.86, "learning_rate": 8.81864077669903e-06, "loss": 0.0377, "step": 215940 }, { "epoch": 83.86, "learning_rate": 8.818122977346279e-06, "loss": 0.0511, "step": 215950 }, { "epoch": 83.87, "learning_rate": 8.817605177993529e-06, "loss": 0.0575, "step": 215960 }, { "epoch": 83.87, "learning_rate": 8.817087378640777e-06, "loss": 0.084, "step": 215970 }, { "epoch": 83.88, "learning_rate": 8.816569579288026e-06, "loss": 0.1589, "step": 215980 }, { "epoch": 83.88, "learning_rate": 8.816051779935276e-06, "loss": 0.0389, "step": 215990 }, { "epoch": 83.88, "learning_rate": 8.815533980582525e-06, "loss": 0.002, "step": 216000 }, { "epoch": 83.89, "learning_rate": 8.815016181229773e-06, "loss": 0.0727, "step": 216010 }, { "epoch": 83.89, "learning_rate": 8.814498381877023e-06, "loss": 0.0472, "step": 216020 }, { "epoch": 83.9, "learning_rate": 8.813980582524273e-06, "loss": 0.1136, "step": 216030 }, { "epoch": 83.9, "learning_rate": 8.81346278317152e-06, "loss": 0.0428, "step": 216040 }, { "epoch": 83.9, "learning_rate": 8.81294498381877e-06, "loss": 0.05, "step": 216050 }, { "epoch": 83.91, "learning_rate": 8.81242718446602e-06, "loss": 0.0922, "step": 216060 }, { "epoch": 83.91, "learning_rate": 8.81190938511327e-06, "loss": 0.2485, "step": 216070 }, { "epoch": 83.91, "learning_rate": 8.811391585760517e-06, "loss": 0.0204, "step": 216080 }, { "epoch": 83.92, "learning_rate": 8.810873786407767e-06, "loss": 0.1234, "step": 216090 }, { "epoch": 83.92, "learning_rate": 8.810355987055017e-06, "loss": 0.0004, "step": 216100 }, { "epoch": 83.93, "learning_rate": 8.809838187702266e-06, "loss": 0.0003, "step": 216110 }, { "epoch": 83.93, "learning_rate": 8.809320388349514e-06, "loss": 0.0002, "step": 216120 }, { "epoch": 83.93, "learning_rate": 8.808802588996764e-06, "loss": 0.018, "step": 216130 }, { "epoch": 83.94, "learning_rate": 8.808284789644013e-06, "loss": 0.0285, "step": 216140 }, { "epoch": 83.94, "learning_rate": 8.807766990291263e-06, "loss": 0.0996, "step": 216150 }, { "epoch": 83.95, "learning_rate": 8.807249190938511e-06, "loss": 0.0603, "step": 216160 }, { "epoch": 83.95, "learning_rate": 8.80673139158576e-06, "loss": 0.0576, "step": 216170 }, { "epoch": 83.95, "learning_rate": 8.80621359223301e-06, "loss": 0.0375, "step": 216180 }, { "epoch": 83.96, "learning_rate": 8.80569579288026e-06, "loss": 0.0823, "step": 216190 }, { "epoch": 83.96, "learning_rate": 8.805177993527508e-06, "loss": 0.0122, "step": 216200 }, { "epoch": 83.97, "learning_rate": 8.804660194174757e-06, "loss": 0.1507, "step": 216210 }, { "epoch": 83.97, "learning_rate": 8.804142394822007e-06, "loss": 0.2317, "step": 216220 }, { "epoch": 83.97, "learning_rate": 8.803624595469257e-06, "loss": 0.1536, "step": 216230 }, { "epoch": 83.98, "learning_rate": 8.803106796116504e-06, "loss": 0.0143, "step": 216240 }, { "epoch": 83.98, "learning_rate": 8.802588996763754e-06, "loss": 0.0279, "step": 216250 }, { "epoch": 83.98, "learning_rate": 8.802071197411004e-06, "loss": 0.1296, "step": 216260 }, { "epoch": 83.99, "learning_rate": 8.801553398058253e-06, "loss": 0.0577, "step": 216270 }, { "epoch": 83.99, "learning_rate": 8.801035598705501e-06, "loss": 0.0102, "step": 216280 }, { "epoch": 84.0, "learning_rate": 8.80051779935275e-06, "loss": 0.0372, "step": 216290 }, { "epoch": 84.0, "learning_rate": 8.8e-06, "loss": 0.0049, "step": 216300 }, { "epoch": 84.0, "eval_accuracy": 0.9526822558459422, "eval_loss": 0.35490280389785767, "eval_runtime": 8.159, "eval_samples_per_second": 445.52, "eval_steps_per_second": 55.767, "step": 216300 }, { "epoch": 84.0, "learning_rate": 8.79948220064725e-06, "loss": 0.1428, "step": 216310 }, { "epoch": 84.01, "learning_rate": 8.798964401294498e-06, "loss": 0.02, "step": 216320 }, { "epoch": 84.01, "learning_rate": 8.798446601941748e-06, "loss": 0.0963, "step": 216330 }, { "epoch": 84.02, "learning_rate": 8.797928802588997e-06, "loss": 0.1017, "step": 216340 }, { "epoch": 84.02, "learning_rate": 8.797411003236247e-06, "loss": 0.0261, "step": 216350 }, { "epoch": 84.02, "learning_rate": 8.796893203883495e-06, "loss": 0.0274, "step": 216360 }, { "epoch": 84.03, "learning_rate": 8.796375404530744e-06, "loss": 0.1114, "step": 216370 }, { "epoch": 84.03, "learning_rate": 8.795857605177994e-06, "loss": 0.0248, "step": 216380 }, { "epoch": 84.03, "learning_rate": 8.795339805825244e-06, "loss": 0.1006, "step": 216390 }, { "epoch": 84.04, "learning_rate": 8.794822006472493e-06, "loss": 0.0223, "step": 216400 }, { "epoch": 84.04, "learning_rate": 8.794304207119741e-06, "loss": 0.195, "step": 216410 }, { "epoch": 84.05, "learning_rate": 8.79378640776699e-06, "loss": 0.116, "step": 216420 }, { "epoch": 84.05, "learning_rate": 8.79326860841424e-06, "loss": 0.0439, "step": 216430 }, { "epoch": 84.05, "learning_rate": 8.79275080906149e-06, "loss": 0.038, "step": 216440 }, { "epoch": 84.06, "learning_rate": 8.792233009708738e-06, "loss": 0.1371, "step": 216450 }, { "epoch": 84.06, "learning_rate": 8.791715210355988e-06, "loss": 0.0164, "step": 216460 }, { "epoch": 84.07, "learning_rate": 8.791197411003237e-06, "loss": 0.0555, "step": 216470 }, { "epoch": 84.07, "learning_rate": 8.790679611650487e-06, "loss": 0.0181, "step": 216480 }, { "epoch": 84.07, "learning_rate": 8.790161812297735e-06, "loss": 0.1066, "step": 216490 }, { "epoch": 84.08, "learning_rate": 8.789644012944984e-06, "loss": 0.0909, "step": 216500 }, { "epoch": 84.08, "learning_rate": 8.789126213592234e-06, "loss": 0.1629, "step": 216510 }, { "epoch": 84.09, "learning_rate": 8.788608414239484e-06, "loss": 0.1455, "step": 216520 }, { "epoch": 84.09, "learning_rate": 8.788090614886731e-06, "loss": 0.0657, "step": 216530 }, { "epoch": 84.09, "learning_rate": 8.787572815533981e-06, "loss": 0.0439, "step": 216540 }, { "epoch": 84.1, "learning_rate": 8.78705501618123e-06, "loss": 0.1125, "step": 216550 }, { "epoch": 84.1, "learning_rate": 8.78653721682848e-06, "loss": 0.0748, "step": 216560 }, { "epoch": 84.1, "learning_rate": 8.786019417475728e-06, "loss": 0.045, "step": 216570 }, { "epoch": 84.11, "learning_rate": 8.785501618122978e-06, "loss": 0.1503, "step": 216580 }, { "epoch": 84.11, "learning_rate": 8.784983818770228e-06, "loss": 0.0876, "step": 216590 }, { "epoch": 84.12, "learning_rate": 8.784466019417477e-06, "loss": 0.0132, "step": 216600 }, { "epoch": 84.12, "learning_rate": 8.783948220064725e-06, "loss": 0.1259, "step": 216610 }, { "epoch": 84.12, "learning_rate": 8.783430420711975e-06, "loss": 0.0339, "step": 216620 }, { "epoch": 84.13, "learning_rate": 8.782912621359224e-06, "loss": 0.107, "step": 216630 }, { "epoch": 84.13, "learning_rate": 8.782394822006474e-06, "loss": 0.016, "step": 216640 }, { "epoch": 84.14, "learning_rate": 8.781877022653722e-06, "loss": 0.0445, "step": 216650 }, { "epoch": 84.14, "learning_rate": 8.781359223300971e-06, "loss": 0.0286, "step": 216660 }, { "epoch": 84.14, "learning_rate": 8.780841423948221e-06, "loss": 0.0399, "step": 216670 }, { "epoch": 84.15, "learning_rate": 8.78032362459547e-06, "loss": 0.031, "step": 216680 }, { "epoch": 84.15, "learning_rate": 8.779805825242719e-06, "loss": 0.0032, "step": 216690 }, { "epoch": 84.16, "learning_rate": 8.779288025889968e-06, "loss": 0.0718, "step": 216700 }, { "epoch": 84.16, "learning_rate": 8.778770226537218e-06, "loss": 0.081, "step": 216710 }, { "epoch": 84.16, "learning_rate": 8.778252427184467e-06, "loss": 0.0455, "step": 216720 }, { "epoch": 84.17, "learning_rate": 8.777734627831715e-06, "loss": 0.0001, "step": 216730 }, { "epoch": 84.17, "learning_rate": 8.777216828478965e-06, "loss": 0.0315, "step": 216740 }, { "epoch": 84.17, "learning_rate": 8.776699029126215e-06, "loss": 0.017, "step": 216750 }, { "epoch": 84.18, "learning_rate": 8.776181229773464e-06, "loss": 0.1011, "step": 216760 }, { "epoch": 84.18, "learning_rate": 8.775663430420712e-06, "loss": 0.0818, "step": 216770 }, { "epoch": 84.19, "learning_rate": 8.775145631067962e-06, "loss": 0.0232, "step": 216780 }, { "epoch": 84.19, "learning_rate": 8.774627831715211e-06, "loss": 0.0331, "step": 216790 }, { "epoch": 84.19, "learning_rate": 8.774110032362461e-06, "loss": 0.0793, "step": 216800 }, { "epoch": 84.2, "learning_rate": 8.773592233009709e-06, "loss": 0.0746, "step": 216810 }, { "epoch": 84.2, "learning_rate": 8.773074433656959e-06, "loss": 0.0976, "step": 216820 }, { "epoch": 84.21, "learning_rate": 8.772556634304208e-06, "loss": 0.067, "step": 216830 }, { "epoch": 84.21, "learning_rate": 8.772038834951458e-06, "loss": 0.0884, "step": 216840 }, { "epoch": 84.21, "learning_rate": 8.771521035598706e-06, "loss": 0.0868, "step": 216850 }, { "epoch": 84.22, "learning_rate": 8.771003236245955e-06, "loss": 0.001, "step": 216860 }, { "epoch": 84.22, "learning_rate": 8.770485436893205e-06, "loss": 0.002, "step": 216870 }, { "epoch": 84.23, "learning_rate": 8.769967637540455e-06, "loss": 0.0474, "step": 216880 }, { "epoch": 84.23, "learning_rate": 8.769449838187703e-06, "loss": 0.1686, "step": 216890 }, { "epoch": 84.23, "learning_rate": 8.768932038834952e-06, "loss": 0.0803, "step": 216900 }, { "epoch": 84.24, "learning_rate": 8.768414239482202e-06, "loss": 0.0061, "step": 216910 }, { "epoch": 84.24, "learning_rate": 8.767896440129451e-06, "loss": 0.0091, "step": 216920 }, { "epoch": 84.24, "learning_rate": 8.7673786407767e-06, "loss": 0.0074, "step": 216930 }, { "epoch": 84.25, "learning_rate": 8.766860841423949e-06, "loss": 0.0012, "step": 216940 }, { "epoch": 84.25, "learning_rate": 8.766343042071199e-06, "loss": 0.0125, "step": 216950 }, { "epoch": 84.26, "learning_rate": 8.765825242718448e-06, "loss": 0.0371, "step": 216960 }, { "epoch": 84.26, "learning_rate": 8.765307443365698e-06, "loss": 0.0866, "step": 216970 }, { "epoch": 84.26, "learning_rate": 8.764789644012946e-06, "loss": 0.0515, "step": 216980 }, { "epoch": 84.27, "learning_rate": 8.764271844660195e-06, "loss": 0.0427, "step": 216990 }, { "epoch": 84.27, "learning_rate": 8.763754045307445e-06, "loss": 0.0635, "step": 217000 }, { "epoch": 84.28, "learning_rate": 8.763236245954695e-06, "loss": 0.0636, "step": 217010 }, { "epoch": 84.28, "learning_rate": 8.762718446601942e-06, "loss": 0.0156, "step": 217020 }, { "epoch": 84.28, "learning_rate": 8.762200647249192e-06, "loss": 0.038, "step": 217030 }, { "epoch": 84.29, "learning_rate": 8.761682847896442e-06, "loss": 0.0449, "step": 217040 }, { "epoch": 84.29, "learning_rate": 8.761165048543691e-06, "loss": 0.0042, "step": 217050 }, { "epoch": 84.3, "learning_rate": 8.76064724919094e-06, "loss": 0.0721, "step": 217060 }, { "epoch": 84.3, "learning_rate": 8.760129449838189e-06, "loss": 0.0233, "step": 217070 }, { "epoch": 84.3, "learning_rate": 8.759611650485438e-06, "loss": 0.1353, "step": 217080 }, { "epoch": 84.31, "learning_rate": 8.759093851132688e-06, "loss": 0.0975, "step": 217090 }, { "epoch": 84.31, "learning_rate": 8.758576051779936e-06, "loss": 0.0681, "step": 217100 }, { "epoch": 84.31, "learning_rate": 8.758058252427186e-06, "loss": 0.0273, "step": 217110 }, { "epoch": 84.32, "learning_rate": 8.757540453074435e-06, "loss": 0.1793, "step": 217120 }, { "epoch": 84.32, "learning_rate": 8.757022653721685e-06, "loss": 0.1479, "step": 217130 }, { "epoch": 84.33, "learning_rate": 8.756504854368933e-06, "loss": 0.1144, "step": 217140 }, { "epoch": 84.33, "learning_rate": 8.755987055016182e-06, "loss": 0.0196, "step": 217150 }, { "epoch": 84.33, "learning_rate": 8.755469255663432e-06, "loss": 0.15, "step": 217160 }, { "epoch": 84.34, "learning_rate": 8.75495145631068e-06, "loss": 0.0434, "step": 217170 }, { "epoch": 84.34, "learning_rate": 8.75443365695793e-06, "loss": 0.1618, "step": 217180 }, { "epoch": 84.35, "learning_rate": 8.75391585760518e-06, "loss": 0.0708, "step": 217190 }, { "epoch": 84.35, "learning_rate": 8.753398058252429e-06, "loss": 0.0901, "step": 217200 }, { "epoch": 84.35, "learning_rate": 8.752880258899677e-06, "loss": 0.0628, "step": 217210 }, { "epoch": 84.36, "learning_rate": 8.752362459546926e-06, "loss": 0.1114, "step": 217220 }, { "epoch": 84.36, "learning_rate": 8.751844660194176e-06, "loss": 0.0763, "step": 217230 }, { "epoch": 84.37, "learning_rate": 8.751326860841426e-06, "loss": 0.0352, "step": 217240 }, { "epoch": 84.37, "learning_rate": 8.750809061488674e-06, "loss": 0.0816, "step": 217250 }, { "epoch": 84.37, "learning_rate": 8.750291262135923e-06, "loss": 0.0877, "step": 217260 }, { "epoch": 84.38, "learning_rate": 8.749773462783173e-06, "loss": 0.0328, "step": 217270 }, { "epoch": 84.38, "learning_rate": 8.749255663430422e-06, "loss": 0.1195, "step": 217280 }, { "epoch": 84.38, "learning_rate": 8.74873786407767e-06, "loss": 0.0867, "step": 217290 }, { "epoch": 84.39, "learning_rate": 8.74822006472492e-06, "loss": 0.0692, "step": 217300 }, { "epoch": 84.39, "learning_rate": 8.74770226537217e-06, "loss": 0.0863, "step": 217310 }, { "epoch": 84.4, "learning_rate": 8.747184466019417e-06, "loss": 0.1513, "step": 217320 }, { "epoch": 84.4, "learning_rate": 8.746666666666667e-06, "loss": 0.0147, "step": 217330 }, { "epoch": 84.4, "learning_rate": 8.746148867313917e-06, "loss": 0.0003, "step": 217340 }, { "epoch": 84.41, "learning_rate": 8.745631067961166e-06, "loss": 0.0639, "step": 217350 }, { "epoch": 84.41, "learning_rate": 8.745113268608414e-06, "loss": 0.0519, "step": 217360 }, { "epoch": 84.42, "learning_rate": 8.744595469255664e-06, "loss": 0.0527, "step": 217370 }, { "epoch": 84.42, "learning_rate": 8.744077669902913e-06, "loss": 0.0447, "step": 217380 }, { "epoch": 84.42, "learning_rate": 8.743559870550163e-06, "loss": 0.0295, "step": 217390 }, { "epoch": 84.43, "learning_rate": 8.743042071197411e-06, "loss": 0.1065, "step": 217400 }, { "epoch": 84.43, "learning_rate": 8.74252427184466e-06, "loss": 0.0124, "step": 217410 }, { "epoch": 84.43, "learning_rate": 8.74200647249191e-06, "loss": 0.0042, "step": 217420 }, { "epoch": 84.44, "learning_rate": 8.74148867313916e-06, "loss": 0.0754, "step": 217430 }, { "epoch": 84.44, "learning_rate": 8.740970873786408e-06, "loss": 0.0052, "step": 217440 }, { "epoch": 84.45, "learning_rate": 8.740453074433657e-06, "loss": 0.0923, "step": 217450 }, { "epoch": 84.45, "learning_rate": 8.739935275080907e-06, "loss": 0.0206, "step": 217460 }, { "epoch": 84.45, "learning_rate": 8.739417475728157e-06, "loss": 0.0666, "step": 217470 }, { "epoch": 84.46, "learning_rate": 8.738899676375405e-06, "loss": 0.0713, "step": 217480 }, { "epoch": 84.46, "learning_rate": 8.738381877022654e-06, "loss": 0.0126, "step": 217490 }, { "epoch": 84.47, "learning_rate": 8.737864077669904e-06, "loss": 0.0005, "step": 217500 }, { "epoch": 84.47, "learning_rate": 8.737346278317152e-06, "loss": 0.1545, "step": 217510 }, { "epoch": 84.47, "learning_rate": 8.736828478964401e-06, "loss": 0.0166, "step": 217520 }, { "epoch": 84.48, "learning_rate": 8.736310679611651e-06, "loss": 0.0358, "step": 217530 }, { "epoch": 84.48, "learning_rate": 8.7357928802589e-06, "loss": 0.0331, "step": 217540 }, { "epoch": 84.49, "learning_rate": 8.735275080906148e-06, "loss": 0.0097, "step": 217550 }, { "epoch": 84.49, "learning_rate": 8.734757281553398e-06, "loss": 0.1009, "step": 217560 }, { "epoch": 84.49, "learning_rate": 8.734239482200648e-06, "loss": 0.0648, "step": 217570 }, { "epoch": 84.5, "learning_rate": 8.733721682847897e-06, "loss": 0.0189, "step": 217580 }, { "epoch": 84.5, "learning_rate": 8.733203883495145e-06, "loss": 0.0449, "step": 217590 }, { "epoch": 84.5, "learning_rate": 8.732686084142395e-06, "loss": 0.056, "step": 217600 }, { "epoch": 84.51, "learning_rate": 8.732168284789645e-06, "loss": 0.0946, "step": 217610 }, { "epoch": 84.51, "learning_rate": 8.731650485436894e-06, "loss": 0.0687, "step": 217620 }, { "epoch": 84.52, "learning_rate": 8.731132686084142e-06, "loss": 0.0817, "step": 217630 }, { "epoch": 84.52, "learning_rate": 8.730614886731392e-06, "loss": 0.1189, "step": 217640 }, { "epoch": 84.52, "learning_rate": 8.730097087378641e-06, "loss": 0.0893, "step": 217650 }, { "epoch": 84.53, "learning_rate": 8.729579288025891e-06, "loss": 0.0001, "step": 217660 }, { "epoch": 84.53, "learning_rate": 8.729061488673139e-06, "loss": 0.0159, "step": 217670 }, { "epoch": 84.54, "learning_rate": 8.728543689320388e-06, "loss": 0.0485, "step": 217680 }, { "epoch": 84.54, "learning_rate": 8.728025889967638e-06, "loss": 0.0937, "step": 217690 }, { "epoch": 84.54, "learning_rate": 8.727508090614888e-06, "loss": 0.0894, "step": 217700 }, { "epoch": 84.55, "learning_rate": 8.726990291262136e-06, "loss": 0.1008, "step": 217710 }, { "epoch": 84.55, "learning_rate": 8.726472491909385e-06, "loss": 0.0005, "step": 217720 }, { "epoch": 84.56, "learning_rate": 8.725954692556635e-06, "loss": 0.0004, "step": 217730 }, { "epoch": 84.56, "learning_rate": 8.725436893203884e-06, "loss": 0.0662, "step": 217740 }, { "epoch": 84.56, "learning_rate": 8.724919093851132e-06, "loss": 0.1215, "step": 217750 }, { "epoch": 84.57, "learning_rate": 8.724401294498382e-06, "loss": 0.0011, "step": 217760 }, { "epoch": 84.57, "learning_rate": 8.723883495145632e-06, "loss": 0.0452, "step": 217770 }, { "epoch": 84.57, "learning_rate": 8.723365695792881e-06, "loss": 0.0338, "step": 217780 }, { "epoch": 84.58, "learning_rate": 8.72284789644013e-06, "loss": 0.0005, "step": 217790 }, { "epoch": 84.58, "learning_rate": 8.722330097087379e-06, "loss": 0.0741, "step": 217800 }, { "epoch": 84.59, "learning_rate": 8.721812297734628e-06, "loss": 0.0395, "step": 217810 }, { "epoch": 84.59, "learning_rate": 8.721294498381878e-06, "loss": 0.2081, "step": 217820 }, { "epoch": 84.59, "learning_rate": 8.720776699029126e-06, "loss": 0.0311, "step": 217830 }, { "epoch": 84.6, "learning_rate": 8.720258899676376e-06, "loss": 0.0512, "step": 217840 }, { "epoch": 84.6, "learning_rate": 8.719741100323625e-06, "loss": 0.1498, "step": 217850 }, { "epoch": 84.61, "learning_rate": 8.719223300970875e-06, "loss": 0.1302, "step": 217860 }, { "epoch": 84.61, "learning_rate": 8.718705501618123e-06, "loss": 0.0002, "step": 217870 }, { "epoch": 84.61, "learning_rate": 8.718187702265372e-06, "loss": 0.1226, "step": 217880 }, { "epoch": 84.62, "learning_rate": 8.717669902912622e-06, "loss": 0.0303, "step": 217890 }, { "epoch": 84.62, "learning_rate": 8.717152103559872e-06, "loss": 0.0673, "step": 217900 }, { "epoch": 84.63, "learning_rate": 8.71663430420712e-06, "loss": 0.1485, "step": 217910 }, { "epoch": 84.63, "learning_rate": 8.716116504854369e-06, "loss": 0.0125, "step": 217920 }, { "epoch": 84.63, "learning_rate": 8.715598705501619e-06, "loss": 0.1405, "step": 217930 }, { "epoch": 84.64, "learning_rate": 8.715080906148868e-06, "loss": 0.041, "step": 217940 }, { "epoch": 84.64, "learning_rate": 8.714563106796116e-06, "loss": 0.0159, "step": 217950 }, { "epoch": 84.64, "learning_rate": 8.714045307443366e-06, "loss": 0.1471, "step": 217960 }, { "epoch": 84.65, "learning_rate": 8.713527508090616e-06, "loss": 0.0051, "step": 217970 }, { "epoch": 84.65, "learning_rate": 8.713009708737865e-06, "loss": 0.0796, "step": 217980 }, { "epoch": 84.66, "learning_rate": 8.712491909385113e-06, "loss": 0.0603, "step": 217990 }, { "epoch": 84.66, "learning_rate": 8.711974110032363e-06, "loss": 0.0078, "step": 218000 }, { "epoch": 84.66, "learning_rate": 8.711456310679612e-06, "loss": 0.0374, "step": 218010 }, { "epoch": 84.67, "learning_rate": 8.710938511326862e-06, "loss": 0.0116, "step": 218020 }, { "epoch": 84.67, "learning_rate": 8.71042071197411e-06, "loss": 0.1142, "step": 218030 }, { "epoch": 84.68, "learning_rate": 8.70990291262136e-06, "loss": 0.1628, "step": 218040 }, { "epoch": 84.68, "learning_rate": 8.709385113268609e-06, "loss": 0.0564, "step": 218050 }, { "epoch": 84.68, "learning_rate": 8.708867313915859e-06, "loss": 0.0161, "step": 218060 }, { "epoch": 84.69, "learning_rate": 8.708349514563107e-06, "loss": 0.0096, "step": 218070 }, { "epoch": 84.69, "learning_rate": 8.707831715210356e-06, "loss": 0.0164, "step": 218080 }, { "epoch": 84.7, "learning_rate": 8.707313915857606e-06, "loss": 0.0659, "step": 218090 }, { "epoch": 84.7, "learning_rate": 8.706796116504855e-06, "loss": 0.0141, "step": 218100 }, { "epoch": 84.7, "learning_rate": 8.706278317152105e-06, "loss": 0.0036, "step": 218110 }, { "epoch": 84.71, "learning_rate": 8.705760517799353e-06, "loss": 0.0607, "step": 218120 }, { "epoch": 84.71, "learning_rate": 8.705242718446603e-06, "loss": 0.0737, "step": 218130 }, { "epoch": 84.71, "learning_rate": 8.704724919093852e-06, "loss": 0.0557, "step": 218140 }, { "epoch": 84.72, "learning_rate": 8.704207119741102e-06, "loss": 0.1873, "step": 218150 }, { "epoch": 84.72, "learning_rate": 8.70368932038835e-06, "loss": 0.0148, "step": 218160 }, { "epoch": 84.73, "learning_rate": 8.7031715210356e-06, "loss": 0.0011, "step": 218170 }, { "epoch": 84.73, "learning_rate": 8.702653721682849e-06, "loss": 0.1181, "step": 218180 }, { "epoch": 84.73, "learning_rate": 8.702135922330099e-06, "loss": 0.0616, "step": 218190 }, { "epoch": 84.74, "learning_rate": 8.701618122977347e-06, "loss": 0.0215, "step": 218200 }, { "epoch": 84.74, "learning_rate": 8.701100323624596e-06, "loss": 0.1044, "step": 218210 }, { "epoch": 84.75, "learning_rate": 8.700582524271846e-06, "loss": 0.0301, "step": 218220 }, { "epoch": 84.75, "learning_rate": 8.700064724919095e-06, "loss": 0.0488, "step": 218230 }, { "epoch": 84.75, "learning_rate": 8.699546925566343e-06, "loss": 0.0014, "step": 218240 }, { "epoch": 84.76, "learning_rate": 8.699029126213593e-06, "loss": 0.0617, "step": 218250 }, { "epoch": 84.76, "learning_rate": 8.698511326860843e-06, "loss": 0.0535, "step": 218260 }, { "epoch": 84.77, "learning_rate": 8.697993527508092e-06, "loss": 0.0036, "step": 218270 }, { "epoch": 84.77, "learning_rate": 8.69747572815534e-06, "loss": 0.0569, "step": 218280 }, { "epoch": 84.77, "learning_rate": 8.69695792880259e-06, "loss": 0.0016, "step": 218290 }, { "epoch": 84.78, "learning_rate": 8.69644012944984e-06, "loss": 0.101, "step": 218300 }, { "epoch": 84.78, "learning_rate": 8.695922330097089e-06, "loss": 0.0077, "step": 218310 }, { "epoch": 84.78, "learning_rate": 8.695404530744337e-06, "loss": 0.0393, "step": 218320 }, { "epoch": 84.79, "learning_rate": 8.694886731391587e-06, "loss": 0.0282, "step": 218330 }, { "epoch": 84.79, "learning_rate": 8.694368932038836e-06, "loss": 0.0006, "step": 218340 }, { "epoch": 84.8, "learning_rate": 8.693851132686086e-06, "loss": 0.0091, "step": 218350 }, { "epoch": 84.8, "learning_rate": 8.693333333333334e-06, "loss": 0.0381, "step": 218360 }, { "epoch": 84.8, "learning_rate": 8.692815533980583e-06, "loss": 0.0081, "step": 218370 }, { "epoch": 84.81, "learning_rate": 8.692297734627833e-06, "loss": 0.0486, "step": 218380 }, { "epoch": 84.81, "learning_rate": 8.691779935275083e-06, "loss": 0.2375, "step": 218390 }, { "epoch": 84.82, "learning_rate": 8.69126213592233e-06, "loss": 0.0454, "step": 218400 }, { "epoch": 84.82, "learning_rate": 8.69074433656958e-06, "loss": 0.0624, "step": 218410 }, { "epoch": 84.82, "learning_rate": 8.69022653721683e-06, "loss": 0.0011, "step": 218420 }, { "epoch": 84.83, "learning_rate": 8.68970873786408e-06, "loss": 0.0106, "step": 218430 }, { "epoch": 84.83, "learning_rate": 8.689190938511327e-06, "loss": 0.1086, "step": 218440 }, { "epoch": 84.83, "learning_rate": 8.688673139158577e-06, "loss": 0.0467, "step": 218450 }, { "epoch": 84.84, "learning_rate": 8.688155339805826e-06, "loss": 0.0002, "step": 218460 }, { "epoch": 84.84, "learning_rate": 8.687637540453076e-06, "loss": 0.0427, "step": 218470 }, { "epoch": 84.85, "learning_rate": 8.687119741100324e-06, "loss": 0.1414, "step": 218480 }, { "epoch": 84.85, "learning_rate": 8.686601941747574e-06, "loss": 0.1014, "step": 218490 }, { "epoch": 84.85, "learning_rate": 8.686084142394823e-06, "loss": 0.0795, "step": 218500 }, { "epoch": 84.86, "learning_rate": 8.685566343042073e-06, "loss": 0.1764, "step": 218510 }, { "epoch": 84.86, "learning_rate": 8.68504854368932e-06, "loss": 0.0545, "step": 218520 }, { "epoch": 84.87, "learning_rate": 8.68453074433657e-06, "loss": 0.0603, "step": 218530 }, { "epoch": 84.87, "learning_rate": 8.68401294498382e-06, "loss": 0.0208, "step": 218540 }, { "epoch": 84.87, "learning_rate": 8.68349514563107e-06, "loss": 0.0018, "step": 218550 }, { "epoch": 84.88, "learning_rate": 8.682977346278318e-06, "loss": 0.0552, "step": 218560 }, { "epoch": 84.88, "learning_rate": 8.682459546925567e-06, "loss": 0.1088, "step": 218570 }, { "epoch": 84.89, "learning_rate": 8.681941747572817e-06, "loss": 0.0162, "step": 218580 }, { "epoch": 84.89, "learning_rate": 8.681423948220066e-06, "loss": 0.1549, "step": 218590 }, { "epoch": 84.89, "learning_rate": 8.680906148867314e-06, "loss": 0.0232, "step": 218600 }, { "epoch": 84.9, "learning_rate": 8.680388349514564e-06, "loss": 0.031, "step": 218610 }, { "epoch": 84.9, "learning_rate": 8.679870550161814e-06, "loss": 0.0956, "step": 218620 }, { "epoch": 84.9, "learning_rate": 8.679352750809063e-06, "loss": 0.1014, "step": 218630 }, { "epoch": 84.91, "learning_rate": 8.678834951456311e-06, "loss": 0.1449, "step": 218640 }, { "epoch": 84.91, "learning_rate": 8.67831715210356e-06, "loss": 0.015, "step": 218650 }, { "epoch": 84.92, "learning_rate": 8.67779935275081e-06, "loss": 0.0103, "step": 218660 }, { "epoch": 84.92, "learning_rate": 8.67728155339806e-06, "loss": 0.0809, "step": 218670 }, { "epoch": 84.92, "learning_rate": 8.676763754045308e-06, "loss": 0.0363, "step": 218680 }, { "epoch": 84.93, "learning_rate": 8.676245954692558e-06, "loss": 0.0817, "step": 218690 }, { "epoch": 84.93, "learning_rate": 8.675728155339807e-06, "loss": 0.0588, "step": 218700 }, { "epoch": 84.94, "learning_rate": 8.675210355987057e-06, "loss": 0.1293, "step": 218710 }, { "epoch": 84.94, "learning_rate": 8.674692556634305e-06, "loss": 0.1119, "step": 218720 }, { "epoch": 84.94, "learning_rate": 8.674174757281554e-06, "loss": 0.0087, "step": 218730 }, { "epoch": 84.95, "learning_rate": 8.673656957928804e-06, "loss": 0.0328, "step": 218740 }, { "epoch": 84.95, "learning_rate": 8.673139158576054e-06, "loss": 0.0284, "step": 218750 }, { "epoch": 84.96, "learning_rate": 8.672621359223301e-06, "loss": 0.0258, "step": 218760 }, { "epoch": 84.96, "learning_rate": 8.672103559870551e-06, "loss": 0.0349, "step": 218770 }, { "epoch": 84.96, "learning_rate": 8.6715857605178e-06, "loss": 0.0191, "step": 218780 }, { "epoch": 84.97, "learning_rate": 8.671067961165049e-06, "loss": 0.0619, "step": 218790 }, { "epoch": 84.97, "learning_rate": 8.670550161812298e-06, "loss": 0.0293, "step": 218800 }, { "epoch": 84.97, "learning_rate": 8.670032362459548e-06, "loss": 0.0529, "step": 218810 }, { "epoch": 84.98, "learning_rate": 8.669514563106797e-06, "loss": 0.0748, "step": 218820 }, { "epoch": 84.98, "learning_rate": 8.668996763754045e-06, "loss": 0.0754, "step": 218830 }, { "epoch": 84.99, "learning_rate": 8.668478964401295e-06, "loss": 0.0671, "step": 218840 }, { "epoch": 84.99, "learning_rate": 8.667961165048545e-06, "loss": 0.0402, "step": 218850 }, { "epoch": 84.99, "learning_rate": 8.667443365695794e-06, "loss": 0.0009, "step": 218860 }, { "epoch": 85.0, "learning_rate": 8.666925566343042e-06, "loss": 0.043, "step": 218870 }, { "epoch": 85.0, "eval_accuracy": 0.9524071526822558, "eval_loss": 0.35918599367141724, "eval_runtime": 8.1935, "eval_samples_per_second": 443.646, "eval_steps_per_second": 55.532, "step": 218875 }, { "epoch": 85.0, "learning_rate": 8.666407766990292e-06, "loss": 0.001, "step": 218880 }, { "epoch": 85.01, "learning_rate": 8.665889967637541e-06, "loss": 0.1453, "step": 218890 }, { "epoch": 85.01, "learning_rate": 8.665372168284791e-06, "loss": 0.018, "step": 218900 }, { "epoch": 85.01, "learning_rate": 8.664854368932039e-06, "loss": 0.125, "step": 218910 }, { "epoch": 85.02, "learning_rate": 8.664336569579289e-06, "loss": 0.0006, "step": 218920 }, { "epoch": 85.02, "learning_rate": 8.663818770226538e-06, "loss": 0.1196, "step": 218930 }, { "epoch": 85.03, "learning_rate": 8.663300970873788e-06, "loss": 0.0656, "step": 218940 }, { "epoch": 85.03, "learning_rate": 8.662783171521036e-06, "loss": 0.014, "step": 218950 }, { "epoch": 85.03, "learning_rate": 8.662265372168285e-06, "loss": 0.0271, "step": 218960 }, { "epoch": 85.04, "learning_rate": 8.661747572815535e-06, "loss": 0.1233, "step": 218970 }, { "epoch": 85.04, "learning_rate": 8.661229773462783e-06, "loss": 0.0518, "step": 218980 }, { "epoch": 85.04, "learning_rate": 8.660711974110033e-06, "loss": 0.0131, "step": 218990 }, { "epoch": 85.05, "learning_rate": 8.660194174757282e-06, "loss": 0.1272, "step": 219000 }, { "epoch": 85.05, "learning_rate": 8.659676375404532e-06, "loss": 0.1033, "step": 219010 }, { "epoch": 85.06, "learning_rate": 8.65915857605178e-06, "loss": 0.0531, "step": 219020 }, { "epoch": 85.06, "learning_rate": 8.65864077669903e-06, "loss": 0.1653, "step": 219030 }, { "epoch": 85.06, "learning_rate": 8.658122977346279e-06, "loss": 0.0346, "step": 219040 }, { "epoch": 85.07, "learning_rate": 8.657605177993529e-06, "loss": 0.0261, "step": 219050 }, { "epoch": 85.07, "learning_rate": 8.657087378640776e-06, "loss": 0.1185, "step": 219060 }, { "epoch": 85.08, "learning_rate": 8.656569579288026e-06, "loss": 0.1434, "step": 219070 }, { "epoch": 85.08, "learning_rate": 8.656051779935276e-06, "loss": 0.0559, "step": 219080 }, { "epoch": 85.08, "learning_rate": 8.655533980582525e-06, "loss": 0.0001, "step": 219090 }, { "epoch": 85.09, "learning_rate": 8.655016181229773e-06, "loss": 0.0129, "step": 219100 }, { "epoch": 85.09, "learning_rate": 8.654498381877023e-06, "loss": 0.1186, "step": 219110 }, { "epoch": 85.1, "learning_rate": 8.653980582524272e-06, "loss": 0.0148, "step": 219120 }, { "epoch": 85.1, "learning_rate": 8.65346278317152e-06, "loss": 0.048, "step": 219130 }, { "epoch": 85.1, "learning_rate": 8.65294498381877e-06, "loss": 0.0062, "step": 219140 }, { "epoch": 85.11, "learning_rate": 8.65242718446602e-06, "loss": 0.0658, "step": 219150 }, { "epoch": 85.11, "learning_rate": 8.65190938511327e-06, "loss": 0.0089, "step": 219160 }, { "epoch": 85.11, "learning_rate": 8.651391585760517e-06, "loss": 0.0469, "step": 219170 }, { "epoch": 85.12, "learning_rate": 8.650873786407767e-06, "loss": 0.0822, "step": 219180 }, { "epoch": 85.12, "learning_rate": 8.650355987055016e-06, "loss": 0.0388, "step": 219190 }, { "epoch": 85.13, "learning_rate": 8.649838187702266e-06, "loss": 0.0624, "step": 219200 }, { "epoch": 85.13, "learning_rate": 8.649320388349516e-06, "loss": 0.0133, "step": 219210 }, { "epoch": 85.13, "learning_rate": 8.648802588996764e-06, "loss": 0.0049, "step": 219220 }, { "epoch": 85.14, "learning_rate": 8.648284789644013e-06, "loss": 0.0051, "step": 219230 }, { "epoch": 85.14, "learning_rate": 8.647766990291263e-06, "loss": 0.0692, "step": 219240 }, { "epoch": 85.15, "learning_rate": 8.647249190938512e-06, "loss": 0.0421, "step": 219250 }, { "epoch": 85.15, "learning_rate": 8.64673139158576e-06, "loss": 0.0335, "step": 219260 }, { "epoch": 85.15, "learning_rate": 8.64621359223301e-06, "loss": 0.0186, "step": 219270 }, { "epoch": 85.16, "learning_rate": 8.64569579288026e-06, "loss": 0.0055, "step": 219280 }, { "epoch": 85.16, "learning_rate": 8.64517799352751e-06, "loss": 0.0274, "step": 219290 }, { "epoch": 85.17, "learning_rate": 8.644660194174757e-06, "loss": 0.0169, "step": 219300 }, { "epoch": 85.17, "learning_rate": 8.644142394822007e-06, "loss": 0.0429, "step": 219310 }, { "epoch": 85.17, "learning_rate": 8.643624595469256e-06, "loss": 0.0432, "step": 219320 }, { "epoch": 85.18, "learning_rate": 8.643106796116506e-06, "loss": 0.0447, "step": 219330 }, { "epoch": 85.18, "learning_rate": 8.642588996763754e-06, "loss": 0.0067, "step": 219340 }, { "epoch": 85.18, "learning_rate": 8.642071197411004e-06, "loss": 0.04, "step": 219350 }, { "epoch": 85.19, "learning_rate": 8.641553398058253e-06, "loss": 0.0018, "step": 219360 }, { "epoch": 85.19, "learning_rate": 8.641035598705503e-06, "loss": 0.0073, "step": 219370 }, { "epoch": 85.2, "learning_rate": 8.64051779935275e-06, "loss": 0.1129, "step": 219380 }, { "epoch": 85.2, "learning_rate": 8.64e-06, "loss": 0.0565, "step": 219390 }, { "epoch": 85.2, "learning_rate": 8.63948220064725e-06, "loss": 0.0376, "step": 219400 }, { "epoch": 85.21, "learning_rate": 8.6389644012945e-06, "loss": 0.0082, "step": 219410 }, { "epoch": 85.21, "learning_rate": 8.638446601941747e-06, "loss": 0.0347, "step": 219420 }, { "epoch": 85.22, "learning_rate": 8.637928802588997e-06, "loss": 0.1035, "step": 219430 }, { "epoch": 85.22, "learning_rate": 8.637411003236247e-06, "loss": 0.1807, "step": 219440 }, { "epoch": 85.22, "learning_rate": 8.636893203883496e-06, "loss": 0.0125, "step": 219450 }, { "epoch": 85.23, "learning_rate": 8.636375404530744e-06, "loss": 0.0553, "step": 219460 }, { "epoch": 85.23, "learning_rate": 8.635857605177994e-06, "loss": 0.0963, "step": 219470 }, { "epoch": 85.23, "learning_rate": 8.635339805825243e-06, "loss": 0.0538, "step": 219480 }, { "epoch": 85.24, "learning_rate": 8.634822006472493e-06, "loss": 0.1437, "step": 219490 }, { "epoch": 85.24, "learning_rate": 8.634304207119741e-06, "loss": 0.0294, "step": 219500 }, { "epoch": 85.25, "learning_rate": 8.63378640776699e-06, "loss": 0.1208, "step": 219510 }, { "epoch": 85.25, "learning_rate": 8.63326860841424e-06, "loss": 0.0196, "step": 219520 }, { "epoch": 85.25, "learning_rate": 8.63275080906149e-06, "loss": 0.0951, "step": 219530 }, { "epoch": 85.26, "learning_rate": 8.632233009708738e-06, "loss": 0.1345, "step": 219540 }, { "epoch": 85.26, "learning_rate": 8.631715210355987e-06, "loss": 0.0938, "step": 219550 }, { "epoch": 85.27, "learning_rate": 8.631197411003237e-06, "loss": 0.0295, "step": 219560 }, { "epoch": 85.27, "learning_rate": 8.630679611650487e-06, "loss": 0.0015, "step": 219570 }, { "epoch": 85.27, "learning_rate": 8.630161812297735e-06, "loss": 0.0454, "step": 219580 }, { "epoch": 85.28, "learning_rate": 8.629644012944984e-06, "loss": 0.1038, "step": 219590 }, { "epoch": 85.28, "learning_rate": 8.629126213592234e-06, "loss": 0.0561, "step": 219600 }, { "epoch": 85.29, "learning_rate": 8.628608414239483e-06, "loss": 0.0002, "step": 219610 }, { "epoch": 85.29, "learning_rate": 8.628090614886731e-06, "loss": 0.1394, "step": 219620 }, { "epoch": 85.29, "learning_rate": 8.627572815533981e-06, "loss": 0.1044, "step": 219630 }, { "epoch": 85.3, "learning_rate": 8.62705501618123e-06, "loss": 0.0281, "step": 219640 }, { "epoch": 85.3, "learning_rate": 8.62653721682848e-06, "loss": 0.002, "step": 219650 }, { "epoch": 85.3, "learning_rate": 8.626019417475728e-06, "loss": 0.014, "step": 219660 }, { "epoch": 85.31, "learning_rate": 8.625501618122978e-06, "loss": 0.1707, "step": 219670 }, { "epoch": 85.31, "learning_rate": 8.624983818770227e-06, "loss": 0.0916, "step": 219680 }, { "epoch": 85.32, "learning_rate": 8.624466019417477e-06, "loss": 0.0069, "step": 219690 }, { "epoch": 85.32, "learning_rate": 8.623948220064725e-06, "loss": 0.1947, "step": 219700 }, { "epoch": 85.32, "learning_rate": 8.623430420711975e-06, "loss": 0.0025, "step": 219710 }, { "epoch": 85.33, "learning_rate": 8.622912621359224e-06, "loss": 0.073, "step": 219720 }, { "epoch": 85.33, "learning_rate": 8.622394822006474e-06, "loss": 0.1819, "step": 219730 }, { "epoch": 85.34, "learning_rate": 8.621877022653722e-06, "loss": 0.0635, "step": 219740 }, { "epoch": 85.34, "learning_rate": 8.621359223300971e-06, "loss": 0.0528, "step": 219750 }, { "epoch": 85.34, "learning_rate": 8.620841423948221e-06, "loss": 0.0619, "step": 219760 }, { "epoch": 85.35, "learning_rate": 8.62032362459547e-06, "loss": 0.0126, "step": 219770 }, { "epoch": 85.35, "learning_rate": 8.61980582524272e-06, "loss": 0.0033, "step": 219780 }, { "epoch": 85.36, "learning_rate": 8.619288025889968e-06, "loss": 0.1256, "step": 219790 }, { "epoch": 85.36, "learning_rate": 8.618770226537218e-06, "loss": 0.0075, "step": 219800 }, { "epoch": 85.36, "learning_rate": 8.618252427184467e-06, "loss": 0.0821, "step": 219810 }, { "epoch": 85.37, "learning_rate": 8.617734627831717e-06, "loss": 0.0203, "step": 219820 }, { "epoch": 85.37, "learning_rate": 8.617216828478965e-06, "loss": 0.0095, "step": 219830 }, { "epoch": 85.37, "learning_rate": 8.616699029126214e-06, "loss": 0.0187, "step": 219840 }, { "epoch": 85.38, "learning_rate": 8.616181229773464e-06, "loss": 0.122, "step": 219850 }, { "epoch": 85.38, "learning_rate": 8.615663430420714e-06, "loss": 0.021, "step": 219860 }, { "epoch": 85.39, "learning_rate": 8.615145631067962e-06, "loss": 0.03, "step": 219870 }, { "epoch": 85.39, "learning_rate": 8.614627831715211e-06, "loss": 0.0689, "step": 219880 }, { "epoch": 85.39, "learning_rate": 8.614110032362461e-06, "loss": 0.0569, "step": 219890 }, { "epoch": 85.4, "learning_rate": 8.61359223300971e-06, "loss": 0.0159, "step": 219900 }, { "epoch": 85.4, "learning_rate": 8.613074433656958e-06, "loss": 0.0094, "step": 219910 }, { "epoch": 85.41, "learning_rate": 8.612556634304208e-06, "loss": 0.1133, "step": 219920 }, { "epoch": 85.41, "learning_rate": 8.612038834951458e-06, "loss": 0.0361, "step": 219930 }, { "epoch": 85.41, "learning_rate": 8.611521035598707e-06, "loss": 0.0168, "step": 219940 }, { "epoch": 85.42, "learning_rate": 8.611003236245955e-06, "loss": 0.1061, "step": 219950 }, { "epoch": 85.42, "learning_rate": 8.610485436893205e-06, "loss": 0.0717, "step": 219960 }, { "epoch": 85.43, "learning_rate": 8.609967637540454e-06, "loss": 0.0782, "step": 219970 }, { "epoch": 85.43, "learning_rate": 8.609449838187704e-06, "loss": 0.0376, "step": 219980 }, { "epoch": 85.43, "learning_rate": 8.608932038834952e-06, "loss": 0.0114, "step": 219990 }, { "epoch": 85.44, "learning_rate": 8.608414239482202e-06, "loss": 0.0675, "step": 220000 }, { "epoch": 85.44, "learning_rate": 8.607896440129451e-06, "loss": 0.1431, "step": 220010 }, { "epoch": 85.44, "learning_rate": 8.6073786407767e-06, "loss": 0.1466, "step": 220020 }, { "epoch": 85.45, "learning_rate": 8.606860841423949e-06, "loss": 0.0331, "step": 220030 }, { "epoch": 85.45, "learning_rate": 8.606343042071198e-06, "loss": 0.0648, "step": 220040 }, { "epoch": 85.46, "learning_rate": 8.605825242718448e-06, "loss": 0.0406, "step": 220050 }, { "epoch": 85.46, "learning_rate": 8.605307443365698e-06, "loss": 0.0333, "step": 220060 }, { "epoch": 85.46, "learning_rate": 8.604789644012946e-06, "loss": 0.0449, "step": 220070 }, { "epoch": 85.47, "learning_rate": 8.604271844660195e-06, "loss": 0.0047, "step": 220080 }, { "epoch": 85.47, "learning_rate": 8.603754045307445e-06, "loss": 0.042, "step": 220090 }, { "epoch": 85.48, "learning_rate": 8.603236245954694e-06, "loss": 0.0001, "step": 220100 }, { "epoch": 85.48, "learning_rate": 8.602718446601942e-06, "loss": 0.0744, "step": 220110 }, { "epoch": 85.48, "learning_rate": 8.602200647249192e-06, "loss": 0.0337, "step": 220120 }, { "epoch": 85.49, "learning_rate": 8.601682847896442e-06, "loss": 0.0499, "step": 220130 }, { "epoch": 85.49, "learning_rate": 8.601165048543691e-06, "loss": 0.0336, "step": 220140 }, { "epoch": 85.5, "learning_rate": 8.600647249190939e-06, "loss": 0.0094, "step": 220150 }, { "epoch": 85.5, "learning_rate": 8.600129449838189e-06, "loss": 0.0003, "step": 220160 }, { "epoch": 85.5, "learning_rate": 8.599611650485438e-06, "loss": 0.1371, "step": 220170 }, { "epoch": 85.51, "learning_rate": 8.599093851132688e-06, "loss": 0.0814, "step": 220180 }, { "epoch": 85.51, "learning_rate": 8.598576051779936e-06, "loss": 0.0267, "step": 220190 }, { "epoch": 85.51, "learning_rate": 8.598058252427185e-06, "loss": 0.026, "step": 220200 }, { "epoch": 85.52, "learning_rate": 8.597540453074435e-06, "loss": 0.1749, "step": 220210 }, { "epoch": 85.52, "learning_rate": 8.597022653721685e-06, "loss": 0.1153, "step": 220220 }, { "epoch": 85.53, "learning_rate": 8.596504854368933e-06, "loss": 0.0095, "step": 220230 }, { "epoch": 85.53, "learning_rate": 8.595987055016182e-06, "loss": 0.0319, "step": 220240 }, { "epoch": 85.53, "learning_rate": 8.595469255663432e-06, "loss": 0.1418, "step": 220250 }, { "epoch": 85.54, "learning_rate": 8.59495145631068e-06, "loss": 0.26, "step": 220260 }, { "epoch": 85.54, "learning_rate": 8.59443365695793e-06, "loss": 0.0455, "step": 220270 }, { "epoch": 85.55, "learning_rate": 8.593915857605179e-06, "loss": 0.0354, "step": 220280 }, { "epoch": 85.55, "learning_rate": 8.593398058252429e-06, "loss": 0.1276, "step": 220290 }, { "epoch": 85.55, "learning_rate": 8.592880258899677e-06, "loss": 0.1517, "step": 220300 }, { "epoch": 85.56, "learning_rate": 8.592362459546926e-06, "loss": 0.0834, "step": 220310 }, { "epoch": 85.56, "learning_rate": 8.591844660194176e-06, "loss": 0.11, "step": 220320 }, { "epoch": 85.57, "learning_rate": 8.591326860841425e-06, "loss": 0.0065, "step": 220330 }, { "epoch": 85.57, "learning_rate": 8.590809061488673e-06, "loss": 0.013, "step": 220340 }, { "epoch": 85.57, "learning_rate": 8.590291262135923e-06, "loss": 0.0362, "step": 220350 }, { "epoch": 85.58, "learning_rate": 8.589773462783173e-06, "loss": 0.0298, "step": 220360 }, { "epoch": 85.58, "learning_rate": 8.589255663430422e-06, "loss": 0.0526, "step": 220370 }, { "epoch": 85.58, "learning_rate": 8.58873786407767e-06, "loss": 0.0418, "step": 220380 }, { "epoch": 85.59, "learning_rate": 8.58822006472492e-06, "loss": 0.0617, "step": 220390 }, { "epoch": 85.59, "learning_rate": 8.58770226537217e-06, "loss": 0.0404, "step": 220400 }, { "epoch": 85.6, "learning_rate": 8.587184466019419e-06, "loss": 0.0572, "step": 220410 }, { "epoch": 85.6, "learning_rate": 8.586666666666667e-06, "loss": 0.0831, "step": 220420 }, { "epoch": 85.6, "learning_rate": 8.586148867313917e-06, "loss": 0.0293, "step": 220430 }, { "epoch": 85.61, "learning_rate": 8.585631067961166e-06, "loss": 0.0021, "step": 220440 }, { "epoch": 85.61, "learning_rate": 8.585113268608414e-06, "loss": 0.1545, "step": 220450 }, { "epoch": 85.62, "learning_rate": 8.584595469255664e-06, "loss": 0.1351, "step": 220460 }, { "epoch": 85.62, "learning_rate": 8.584077669902913e-06, "loss": 0.0169, "step": 220470 }, { "epoch": 85.62, "learning_rate": 8.583559870550163e-06, "loss": 0.0927, "step": 220480 }, { "epoch": 85.63, "learning_rate": 8.58304207119741e-06, "loss": 0.1434, "step": 220490 }, { "epoch": 85.63, "learning_rate": 8.58252427184466e-06, "loss": 0.1431, "step": 220500 }, { "epoch": 85.63, "learning_rate": 8.58200647249191e-06, "loss": 0.0542, "step": 220510 }, { "epoch": 85.64, "learning_rate": 8.58148867313916e-06, "loss": 0.0451, "step": 220520 }, { "epoch": 85.64, "learning_rate": 8.580970873786408e-06, "loss": 0.2238, "step": 220530 }, { "epoch": 85.65, "learning_rate": 8.580453074433657e-06, "loss": 0.0109, "step": 220540 }, { "epoch": 85.65, "learning_rate": 8.579935275080907e-06, "loss": 0.0476, "step": 220550 }, { "epoch": 85.65, "learning_rate": 8.579417475728156e-06, "loss": 0.0437, "step": 220560 }, { "epoch": 85.66, "learning_rate": 8.578899676375404e-06, "loss": 0.1187, "step": 220570 }, { "epoch": 85.66, "learning_rate": 8.578381877022654e-06, "loss": 0.0348, "step": 220580 }, { "epoch": 85.67, "learning_rate": 8.577864077669904e-06, "loss": 0.013, "step": 220590 }, { "epoch": 85.67, "learning_rate": 8.577346278317152e-06, "loss": 0.0261, "step": 220600 }, { "epoch": 85.67, "learning_rate": 8.576828478964401e-06, "loss": 0.0108, "step": 220610 }, { "epoch": 85.68, "learning_rate": 8.57631067961165e-06, "loss": 0.1254, "step": 220620 }, { "epoch": 85.68, "learning_rate": 8.5757928802589e-06, "loss": 0.079, "step": 220630 }, { "epoch": 85.69, "learning_rate": 8.575275080906148e-06, "loss": 0.1107, "step": 220640 }, { "epoch": 85.69, "learning_rate": 8.574757281553398e-06, "loss": 0.017, "step": 220650 }, { "epoch": 85.69, "learning_rate": 8.574239482200648e-06, "loss": 0.0248, "step": 220660 }, { "epoch": 85.7, "learning_rate": 8.573721682847897e-06, "loss": 0.1514, "step": 220670 }, { "epoch": 85.7, "learning_rate": 8.573203883495145e-06, "loss": 0.2548, "step": 220680 }, { "epoch": 85.7, "learning_rate": 8.572686084142395e-06, "loss": 0.0486, "step": 220690 }, { "epoch": 85.71, "learning_rate": 8.572168284789644e-06, "loss": 0.0838, "step": 220700 }, { "epoch": 85.71, "learning_rate": 8.571650485436894e-06, "loss": 0.0164, "step": 220710 }, { "epoch": 85.72, "learning_rate": 8.571132686084142e-06, "loss": 0.0788, "step": 220720 }, { "epoch": 85.72, "learning_rate": 8.570614886731392e-06, "loss": 0.0829, "step": 220730 }, { "epoch": 85.72, "learning_rate": 8.570097087378641e-06, "loss": 0.0527, "step": 220740 }, { "epoch": 85.73, "learning_rate": 8.56957928802589e-06, "loss": 0.0417, "step": 220750 }, { "epoch": 85.73, "learning_rate": 8.569061488673139e-06, "loss": 0.0719, "step": 220760 }, { "epoch": 85.74, "learning_rate": 8.568543689320388e-06, "loss": 0.0735, "step": 220770 }, { "epoch": 85.74, "learning_rate": 8.568025889967638e-06, "loss": 0.1058, "step": 220780 }, { "epoch": 85.74, "learning_rate": 8.567508090614888e-06, "loss": 0.0548, "step": 220790 }, { "epoch": 85.75, "learning_rate": 8.566990291262135e-06, "loss": 0.0672, "step": 220800 }, { "epoch": 85.75, "learning_rate": 8.566472491909385e-06, "loss": 0.0378, "step": 220810 }, { "epoch": 85.76, "learning_rate": 8.565954692556635e-06, "loss": 0.0833, "step": 220820 }, { "epoch": 85.76, "learning_rate": 8.565436893203884e-06, "loss": 0.0024, "step": 220830 }, { "epoch": 85.76, "learning_rate": 8.564919093851132e-06, "loss": 0.1122, "step": 220840 }, { "epoch": 85.77, "learning_rate": 8.564401294498382e-06, "loss": 0.0341, "step": 220850 }, { "epoch": 85.77, "learning_rate": 8.563883495145631e-06, "loss": 0.0369, "step": 220860 }, { "epoch": 85.77, "learning_rate": 8.563365695792881e-06, "loss": 0.2044, "step": 220870 }, { "epoch": 85.78, "learning_rate": 8.56284789644013e-06, "loss": 0.0217, "step": 220880 }, { "epoch": 85.78, "learning_rate": 8.562330097087379e-06, "loss": 0.0396, "step": 220890 }, { "epoch": 85.79, "learning_rate": 8.561812297734628e-06, "loss": 0.0442, "step": 220900 }, { "epoch": 85.79, "learning_rate": 8.561294498381878e-06, "loss": 0.0037, "step": 220910 }, { "epoch": 85.79, "learning_rate": 8.560776699029127e-06, "loss": 0.1072, "step": 220920 }, { "epoch": 85.8, "learning_rate": 8.560258899676375e-06, "loss": 0.0335, "step": 220930 }, { "epoch": 85.8, "learning_rate": 8.559741100323625e-06, "loss": 0.035, "step": 220940 }, { "epoch": 85.81, "learning_rate": 8.559223300970875e-06, "loss": 0.0672, "step": 220950 }, { "epoch": 85.81, "learning_rate": 8.558705501618124e-06, "loss": 0.147, "step": 220960 }, { "epoch": 85.81, "learning_rate": 8.558187702265372e-06, "loss": 0.0317, "step": 220970 }, { "epoch": 85.82, "learning_rate": 8.557669902912622e-06, "loss": 0.0714, "step": 220980 }, { "epoch": 85.82, "learning_rate": 8.557152103559871e-06, "loss": 0.0846, "step": 220990 }, { "epoch": 85.83, "learning_rate": 8.556634304207121e-06, "loss": 0.0021, "step": 221000 }, { "epoch": 85.83, "learning_rate": 8.556116504854369e-06, "loss": 0.0935, "step": 221010 }, { "epoch": 85.83, "learning_rate": 8.555598705501619e-06, "loss": 0.0004, "step": 221020 }, { "epoch": 85.84, "learning_rate": 8.555080906148868e-06, "loss": 0.0868, "step": 221030 }, { "epoch": 85.84, "learning_rate": 8.554563106796118e-06, "loss": 0.0655, "step": 221040 }, { "epoch": 85.84, "learning_rate": 8.554045307443366e-06, "loss": 0.093, "step": 221050 }, { "epoch": 85.85, "learning_rate": 8.553527508090615e-06, "loss": 0.1758, "step": 221060 }, { "epoch": 85.85, "learning_rate": 8.553009708737865e-06, "loss": 0.0098, "step": 221070 }, { "epoch": 85.86, "learning_rate": 8.552491909385115e-06, "loss": 0.1004, "step": 221080 }, { "epoch": 85.86, "learning_rate": 8.551974110032363e-06, "loss": 0.0669, "step": 221090 }, { "epoch": 85.86, "learning_rate": 8.551456310679612e-06, "loss": 0.0028, "step": 221100 }, { "epoch": 85.87, "learning_rate": 8.550938511326862e-06, "loss": 0.0441, "step": 221110 }, { "epoch": 85.87, "learning_rate": 8.550420711974111e-06, "loss": 0.0128, "step": 221120 }, { "epoch": 85.88, "learning_rate": 8.54990291262136e-06, "loss": 0.1313, "step": 221130 }, { "epoch": 85.88, "learning_rate": 8.549385113268609e-06, "loss": 0.1093, "step": 221140 }, { "epoch": 85.88, "learning_rate": 8.548867313915859e-06, "loss": 0.0202, "step": 221150 }, { "epoch": 85.89, "learning_rate": 8.548349514563108e-06, "loss": 0.057, "step": 221160 }, { "epoch": 85.89, "learning_rate": 8.547831715210356e-06, "loss": 0.058, "step": 221170 }, { "epoch": 85.9, "learning_rate": 8.547313915857606e-06, "loss": 0.015, "step": 221180 }, { "epoch": 85.9, "learning_rate": 8.546796116504855e-06, "loss": 0.0333, "step": 221190 }, { "epoch": 85.9, "learning_rate": 8.546278317152105e-06, "loss": 0.0504, "step": 221200 }, { "epoch": 85.91, "learning_rate": 8.545760517799353e-06, "loss": 0.068, "step": 221210 }, { "epoch": 85.91, "learning_rate": 8.545242718446602e-06, "loss": 0.1287, "step": 221220 }, { "epoch": 85.91, "learning_rate": 8.544724919093852e-06, "loss": 0.0399, "step": 221230 }, { "epoch": 85.92, "learning_rate": 8.544207119741102e-06, "loss": 0.0175, "step": 221240 }, { "epoch": 85.92, "learning_rate": 8.54368932038835e-06, "loss": 0.009, "step": 221250 }, { "epoch": 85.93, "learning_rate": 8.5431715210356e-06, "loss": 0.0787, "step": 221260 }, { "epoch": 85.93, "learning_rate": 8.542653721682849e-06, "loss": 0.1314, "step": 221270 }, { "epoch": 85.93, "learning_rate": 8.542135922330098e-06, "loss": 0.0589, "step": 221280 }, { "epoch": 85.94, "learning_rate": 8.541618122977346e-06, "loss": 0.0115, "step": 221290 }, { "epoch": 85.94, "learning_rate": 8.541100323624596e-06, "loss": 0.0043, "step": 221300 }, { "epoch": 85.95, "learning_rate": 8.540582524271846e-06, "loss": 0.0636, "step": 221310 }, { "epoch": 85.95, "learning_rate": 8.540064724919095e-06, "loss": 0.0693, "step": 221320 }, { "epoch": 85.95, "learning_rate": 8.539546925566343e-06, "loss": 0.0021, "step": 221330 }, { "epoch": 85.96, "learning_rate": 8.539029126213593e-06, "loss": 0.0276, "step": 221340 }, { "epoch": 85.96, "learning_rate": 8.538511326860842e-06, "loss": 0.0411, "step": 221350 }, { "epoch": 85.97, "learning_rate": 8.537993527508092e-06, "loss": 0.0163, "step": 221360 }, { "epoch": 85.97, "learning_rate": 8.53747572815534e-06, "loss": 0.1259, "step": 221370 }, { "epoch": 85.97, "learning_rate": 8.53695792880259e-06, "loss": 0.014, "step": 221380 }, { "epoch": 85.98, "learning_rate": 8.53644012944984e-06, "loss": 0.1157, "step": 221390 }, { "epoch": 85.98, "learning_rate": 8.535922330097089e-06, "loss": 0.0116, "step": 221400 }, { "epoch": 85.98, "learning_rate": 8.535404530744337e-06, "loss": 0.165, "step": 221410 }, { "epoch": 85.99, "learning_rate": 8.534886731391586e-06, "loss": 0.0999, "step": 221420 }, { "epoch": 85.99, "learning_rate": 8.534368932038836e-06, "loss": 0.094, "step": 221430 }, { "epoch": 86.0, "learning_rate": 8.533851132686086e-06, "loss": 0.1481, "step": 221440 }, { "epoch": 86.0, "learning_rate": 8.533333333333335e-06, "loss": 0.0284, "step": 221450 }, { "epoch": 86.0, "eval_accuracy": 0.9499312242090784, "eval_loss": 0.3749178946018219, "eval_runtime": 8.1935, "eval_samples_per_second": 443.645, "eval_steps_per_second": 55.532, "step": 221450 }, { "epoch": 86.0, "learning_rate": 8.532815533980583e-06, "loss": 0.0116, "step": 221460 }, { "epoch": 86.01, "learning_rate": 8.532297734627833e-06, "loss": 0.1044, "step": 221470 }, { "epoch": 86.01, "learning_rate": 8.531779935275082e-06, "loss": 0.0464, "step": 221480 }, { "epoch": 86.02, "learning_rate": 8.531262135922332e-06, "loss": 0.011, "step": 221490 }, { "epoch": 86.02, "learning_rate": 8.53074433656958e-06, "loss": 0.0044, "step": 221500 }, { "epoch": 86.02, "learning_rate": 8.53022653721683e-06, "loss": 0.2006, "step": 221510 }, { "epoch": 86.03, "learning_rate": 8.529708737864079e-06, "loss": 0.053, "step": 221520 }, { "epoch": 86.03, "learning_rate": 8.529190938511329e-06, "loss": 0.0808, "step": 221530 }, { "epoch": 86.03, "learning_rate": 8.528673139158577e-06, "loss": 0.1504, "step": 221540 }, { "epoch": 86.04, "learning_rate": 8.528155339805826e-06, "loss": 0.1345, "step": 221550 }, { "epoch": 86.04, "learning_rate": 8.527637540453076e-06, "loss": 0.0276, "step": 221560 }, { "epoch": 86.05, "learning_rate": 8.527119741100326e-06, "loss": 0.0668, "step": 221570 }, { "epoch": 86.05, "learning_rate": 8.526601941747573e-06, "loss": 0.0452, "step": 221580 }, { "epoch": 86.05, "learning_rate": 8.526084142394823e-06, "loss": 0.0217, "step": 221590 }, { "epoch": 86.06, "learning_rate": 8.525566343042073e-06, "loss": 0.01, "step": 221600 }, { "epoch": 86.06, "learning_rate": 8.525048543689322e-06, "loss": 0.0358, "step": 221610 }, { "epoch": 86.07, "learning_rate": 8.52453074433657e-06, "loss": 0.0481, "step": 221620 }, { "epoch": 86.07, "learning_rate": 8.52401294498382e-06, "loss": 0.098, "step": 221630 }, { "epoch": 86.07, "learning_rate": 8.52349514563107e-06, "loss": 0.068, "step": 221640 }, { "epoch": 86.08, "learning_rate": 8.522977346278319e-06, "loss": 0.0619, "step": 221650 }, { "epoch": 86.08, "learning_rate": 8.522459546925567e-06, "loss": 0.0599, "step": 221660 }, { "epoch": 86.09, "learning_rate": 8.521941747572817e-06, "loss": 0.0086, "step": 221670 }, { "epoch": 86.09, "learning_rate": 8.521423948220066e-06, "loss": 0.0331, "step": 221680 }, { "epoch": 86.09, "learning_rate": 8.520906148867316e-06, "loss": 0.0763, "step": 221690 }, { "epoch": 86.1, "learning_rate": 8.520388349514564e-06, "loss": 0.1874, "step": 221700 }, { "epoch": 86.1, "learning_rate": 8.519870550161813e-06, "loss": 0.0007, "step": 221710 }, { "epoch": 86.1, "learning_rate": 8.519352750809063e-06, "loss": 0.013, "step": 221720 }, { "epoch": 86.11, "learning_rate": 8.518834951456311e-06, "loss": 0.0152, "step": 221730 }, { "epoch": 86.11, "learning_rate": 8.51831715210356e-06, "loss": 0.0543, "step": 221740 }, { "epoch": 86.12, "learning_rate": 8.51779935275081e-06, "loss": 0.023, "step": 221750 }, { "epoch": 86.12, "learning_rate": 8.51728155339806e-06, "loss": 0.1119, "step": 221760 }, { "epoch": 86.12, "learning_rate": 8.516763754045308e-06, "loss": 0.0116, "step": 221770 }, { "epoch": 86.13, "learning_rate": 8.516245954692557e-06, "loss": 0.0264, "step": 221780 }, { "epoch": 86.13, "learning_rate": 8.515728155339807e-06, "loss": 0.0003, "step": 221790 }, { "epoch": 86.14, "learning_rate": 8.515210355987057e-06, "loss": 0.0723, "step": 221800 }, { "epoch": 86.14, "learning_rate": 8.514692556634305e-06, "loss": 0.0229, "step": 221810 }, { "epoch": 86.14, "learning_rate": 8.514174757281554e-06, "loss": 0.0715, "step": 221820 }, { "epoch": 86.15, "learning_rate": 8.513656957928804e-06, "loss": 0.0554, "step": 221830 }, { "epoch": 86.15, "learning_rate": 8.513139158576053e-06, "loss": 0.0339, "step": 221840 }, { "epoch": 86.16, "learning_rate": 8.512621359223301e-06, "loss": 0.0086, "step": 221850 }, { "epoch": 86.16, "learning_rate": 8.512103559870551e-06, "loss": 0.0538, "step": 221860 }, { "epoch": 86.16, "learning_rate": 8.5115857605178e-06, "loss": 0.076, "step": 221870 }, { "epoch": 86.17, "learning_rate": 8.511067961165048e-06, "loss": 0.0174, "step": 221880 }, { "epoch": 86.17, "learning_rate": 8.510550161812298e-06, "loss": 0.0708, "step": 221890 }, { "epoch": 86.17, "learning_rate": 8.510032362459548e-06, "loss": 0.0091, "step": 221900 }, { "epoch": 86.18, "learning_rate": 8.509514563106797e-06, "loss": 0.0157, "step": 221910 }, { "epoch": 86.18, "learning_rate": 8.508996763754045e-06, "loss": 0.0733, "step": 221920 }, { "epoch": 86.19, "learning_rate": 8.508478964401295e-06, "loss": 0.1329, "step": 221930 }, { "epoch": 86.19, "learning_rate": 8.507961165048544e-06, "loss": 0.0348, "step": 221940 }, { "epoch": 86.19, "learning_rate": 8.507443365695794e-06, "loss": 0.0628, "step": 221950 }, { "epoch": 86.2, "learning_rate": 8.506925566343042e-06, "loss": 0.1106, "step": 221960 }, { "epoch": 86.2, "learning_rate": 8.506407766990292e-06, "loss": 0.0638, "step": 221970 }, { "epoch": 86.21, "learning_rate": 8.505889967637541e-06, "loss": 0.0795, "step": 221980 }, { "epoch": 86.21, "learning_rate": 8.505372168284791e-06, "loss": 0.0317, "step": 221990 }, { "epoch": 86.21, "learning_rate": 8.504854368932039e-06, "loss": 0.0663, "step": 222000 }, { "epoch": 86.22, "learning_rate": 8.504336569579288e-06, "loss": 0.0414, "step": 222010 }, { "epoch": 86.22, "learning_rate": 8.503818770226538e-06, "loss": 0.0403, "step": 222020 }, { "epoch": 86.23, "learning_rate": 8.503300970873788e-06, "loss": 0.0325, "step": 222030 }, { "epoch": 86.23, "learning_rate": 8.502783171521036e-06, "loss": 0.0086, "step": 222040 }, { "epoch": 86.23, "learning_rate": 8.502265372168285e-06, "loss": 0.01, "step": 222050 }, { "epoch": 86.24, "learning_rate": 8.501747572815535e-06, "loss": 0.0634, "step": 222060 }, { "epoch": 86.24, "learning_rate": 8.501229773462783e-06, "loss": 0.034, "step": 222070 }, { "epoch": 86.24, "learning_rate": 8.500711974110032e-06, "loss": 0.1227, "step": 222080 }, { "epoch": 86.25, "learning_rate": 8.500194174757282e-06, "loss": 0.0145, "step": 222090 }, { "epoch": 86.25, "learning_rate": 8.499676375404532e-06, "loss": 0.019, "step": 222100 }, { "epoch": 86.26, "learning_rate": 8.49915857605178e-06, "loss": 0.0522, "step": 222110 }, { "epoch": 86.26, "learning_rate": 8.498640776699029e-06, "loss": 0.048, "step": 222120 }, { "epoch": 86.26, "learning_rate": 8.498122977346279e-06, "loss": 0.01, "step": 222130 }, { "epoch": 86.27, "learning_rate": 8.497605177993528e-06, "loss": 0.0908, "step": 222140 }, { "epoch": 86.27, "learning_rate": 8.497087378640776e-06, "loss": 0.083, "step": 222150 }, { "epoch": 86.28, "learning_rate": 8.496569579288026e-06, "loss": 0.0309, "step": 222160 }, { "epoch": 86.28, "learning_rate": 8.496051779935276e-06, "loss": 0.0478, "step": 222170 }, { "epoch": 86.28, "learning_rate": 8.495533980582525e-06, "loss": 0.0396, "step": 222180 }, { "epoch": 86.29, "learning_rate": 8.495016181229773e-06, "loss": 0.0735, "step": 222190 }, { "epoch": 86.29, "learning_rate": 8.494498381877023e-06, "loss": 0.0202, "step": 222200 }, { "epoch": 86.3, "learning_rate": 8.493980582524272e-06, "loss": 0.0003, "step": 222210 }, { "epoch": 86.3, "learning_rate": 8.493462783171522e-06, "loss": 0.0214, "step": 222220 }, { "epoch": 86.3, "learning_rate": 8.49294498381877e-06, "loss": 0.091, "step": 222230 }, { "epoch": 86.31, "learning_rate": 8.49242718446602e-06, "loss": 0.0344, "step": 222240 }, { "epoch": 86.31, "learning_rate": 8.491909385113269e-06, "loss": 0.0398, "step": 222250 }, { "epoch": 86.31, "learning_rate": 8.491391585760519e-06, "loss": 0.0004, "step": 222260 }, { "epoch": 86.32, "learning_rate": 8.490873786407767e-06, "loss": 0.1373, "step": 222270 }, { "epoch": 86.32, "learning_rate": 8.490355987055016e-06, "loss": 0.014, "step": 222280 }, { "epoch": 86.33, "learning_rate": 8.489838187702266e-06, "loss": 0.2258, "step": 222290 }, { "epoch": 86.33, "learning_rate": 8.489320388349515e-06, "loss": 0.0074, "step": 222300 }, { "epoch": 86.33, "learning_rate": 8.488802588996763e-06, "loss": 0.0382, "step": 222310 }, { "epoch": 86.34, "learning_rate": 8.488284789644013e-06, "loss": 0.0773, "step": 222320 }, { "epoch": 86.34, "learning_rate": 8.487766990291263e-06, "loss": 0.0011, "step": 222330 }, { "epoch": 86.35, "learning_rate": 8.487249190938512e-06, "loss": 0.0087, "step": 222340 }, { "epoch": 86.35, "learning_rate": 8.48673139158576e-06, "loss": 0.0064, "step": 222350 }, { "epoch": 86.35, "learning_rate": 8.48621359223301e-06, "loss": 0.0292, "step": 222360 }, { "epoch": 86.36, "learning_rate": 8.48569579288026e-06, "loss": 0.0007, "step": 222370 }, { "epoch": 86.36, "learning_rate": 8.485177993527509e-06, "loss": 0.0105, "step": 222380 }, { "epoch": 86.37, "learning_rate": 8.484660194174757e-06, "loss": 0.1652, "step": 222390 }, { "epoch": 86.37, "learning_rate": 8.484142394822007e-06, "loss": 0.135, "step": 222400 }, { "epoch": 86.37, "learning_rate": 8.483624595469256e-06, "loss": 0.2559, "step": 222410 }, { "epoch": 86.38, "learning_rate": 8.483106796116506e-06, "loss": 0.2507, "step": 222420 }, { "epoch": 86.38, "learning_rate": 8.482588996763754e-06, "loss": 0.0002, "step": 222430 }, { "epoch": 86.38, "learning_rate": 8.482071197411003e-06, "loss": 0.0647, "step": 222440 }, { "epoch": 86.39, "learning_rate": 8.481553398058253e-06, "loss": 0.0118, "step": 222450 }, { "epoch": 86.39, "learning_rate": 8.481035598705503e-06, "loss": 0.0118, "step": 222460 }, { "epoch": 86.4, "learning_rate": 8.48051779935275e-06, "loss": 0.0465, "step": 222470 }, { "epoch": 86.4, "learning_rate": 8.48e-06, "loss": 0.0053, "step": 222480 }, { "epoch": 86.4, "learning_rate": 8.47948220064725e-06, "loss": 0.0015, "step": 222490 }, { "epoch": 86.41, "learning_rate": 8.4789644012945e-06, "loss": 0.0072, "step": 222500 }, { "epoch": 86.41, "learning_rate": 8.478446601941747e-06, "loss": 0.0215, "step": 222510 }, { "epoch": 86.42, "learning_rate": 8.477928802588997e-06, "loss": 0.0061, "step": 222520 }, { "epoch": 86.42, "learning_rate": 8.477411003236247e-06, "loss": 0.0518, "step": 222530 }, { "epoch": 86.42, "learning_rate": 8.476893203883496e-06, "loss": 0.0121, "step": 222540 }, { "epoch": 86.43, "learning_rate": 8.476375404530744e-06, "loss": 0.0146, "step": 222550 }, { "epoch": 86.43, "learning_rate": 8.475857605177994e-06, "loss": 0.1413, "step": 222560 }, { "epoch": 86.43, "learning_rate": 8.475339805825243e-06, "loss": 0.0014, "step": 222570 }, { "epoch": 86.44, "learning_rate": 8.474822006472493e-06, "loss": 0.0357, "step": 222580 }, { "epoch": 86.44, "learning_rate": 8.474304207119743e-06, "loss": 0.0593, "step": 222590 }, { "epoch": 86.45, "learning_rate": 8.47378640776699e-06, "loss": 0.0486, "step": 222600 }, { "epoch": 86.45, "learning_rate": 8.47326860841424e-06, "loss": 0.0722, "step": 222610 }, { "epoch": 86.45, "learning_rate": 8.47275080906149e-06, "loss": 0.0628, "step": 222620 }, { "epoch": 86.46, "learning_rate": 8.47223300970874e-06, "loss": 0.0345, "step": 222630 }, { "epoch": 86.46, "learning_rate": 8.471715210355987e-06, "loss": 0.0439, "step": 222640 }, { "epoch": 86.47, "learning_rate": 8.471197411003237e-06, "loss": 0.0288, "step": 222650 }, { "epoch": 86.47, "learning_rate": 8.470679611650486e-06, "loss": 0.0083, "step": 222660 }, { "epoch": 86.47, "learning_rate": 8.470161812297736e-06, "loss": 0.0409, "step": 222670 }, { "epoch": 86.48, "learning_rate": 8.469644012944984e-06, "loss": 0.0149, "step": 222680 }, { "epoch": 86.48, "learning_rate": 8.469126213592234e-06, "loss": 0.0847, "step": 222690 }, { "epoch": 86.49, "learning_rate": 8.468608414239483e-06, "loss": 0.1681, "step": 222700 }, { "epoch": 86.49, "learning_rate": 8.468090614886733e-06, "loss": 0.0229, "step": 222710 }, { "epoch": 86.49, "learning_rate": 8.46757281553398e-06, "loss": 0.0841, "step": 222720 }, { "epoch": 86.5, "learning_rate": 8.46705501618123e-06, "loss": 0.0253, "step": 222730 }, { "epoch": 86.5, "learning_rate": 8.46653721682848e-06, "loss": 0.0577, "step": 222740 }, { "epoch": 86.5, "learning_rate": 8.46601941747573e-06, "loss": 0.0145, "step": 222750 }, { "epoch": 86.51, "learning_rate": 8.465501618122978e-06, "loss": 0.0352, "step": 222760 }, { "epoch": 86.51, "learning_rate": 8.464983818770227e-06, "loss": 0.0545, "step": 222770 }, { "epoch": 86.52, "learning_rate": 8.464466019417477e-06, "loss": 0.032, "step": 222780 }, { "epoch": 86.52, "learning_rate": 8.463948220064726e-06, "loss": 0.0355, "step": 222790 }, { "epoch": 86.52, "learning_rate": 8.463430420711974e-06, "loss": 0.0819, "step": 222800 }, { "epoch": 86.53, "learning_rate": 8.462912621359224e-06, "loss": 0.0079, "step": 222810 }, { "epoch": 86.53, "learning_rate": 8.462394822006474e-06, "loss": 0.1389, "step": 222820 }, { "epoch": 86.54, "learning_rate": 8.461877022653723e-06, "loss": 0.0111, "step": 222830 }, { "epoch": 86.54, "learning_rate": 8.461359223300971e-06, "loss": 0.0041, "step": 222840 }, { "epoch": 86.54, "learning_rate": 8.46084142394822e-06, "loss": 0.1657, "step": 222850 }, { "epoch": 86.55, "learning_rate": 8.46032362459547e-06, "loss": 0.0967, "step": 222860 }, { "epoch": 86.55, "learning_rate": 8.45980582524272e-06, "loss": 0.0016, "step": 222870 }, { "epoch": 86.56, "learning_rate": 8.459288025889968e-06, "loss": 0.0011, "step": 222880 }, { "epoch": 86.56, "learning_rate": 8.458770226537218e-06, "loss": 0.0028, "step": 222890 }, { "epoch": 86.56, "learning_rate": 8.458252427184467e-06, "loss": 0.065, "step": 222900 }, { "epoch": 86.57, "learning_rate": 8.457734627831717e-06, "loss": 0.0802, "step": 222910 }, { "epoch": 86.57, "learning_rate": 8.457216828478965e-06, "loss": 0.0115, "step": 222920 }, { "epoch": 86.57, "learning_rate": 8.456699029126214e-06, "loss": 0.1321, "step": 222930 }, { "epoch": 86.58, "learning_rate": 8.456181229773464e-06, "loss": 0.105, "step": 222940 }, { "epoch": 86.58, "learning_rate": 8.455663430420714e-06, "loss": 0.0445, "step": 222950 }, { "epoch": 86.59, "learning_rate": 8.455145631067961e-06, "loss": 0.0478, "step": 222960 }, { "epoch": 86.59, "learning_rate": 8.454627831715211e-06, "loss": 0.2093, "step": 222970 }, { "epoch": 86.59, "learning_rate": 8.45411003236246e-06, "loss": 0.0468, "step": 222980 }, { "epoch": 86.6, "learning_rate": 8.45359223300971e-06, "loss": 0.0963, "step": 222990 }, { "epoch": 86.6, "learning_rate": 8.453074433656958e-06, "loss": 0.0207, "step": 223000 }, { "epoch": 86.61, "learning_rate": 8.452556634304208e-06, "loss": 0.0615, "step": 223010 }, { "epoch": 86.61, "learning_rate": 8.452038834951457e-06, "loss": 0.0899, "step": 223020 }, { "epoch": 86.61, "learning_rate": 8.451521035598707e-06, "loss": 0.0269, "step": 223030 }, { "epoch": 86.62, "learning_rate": 8.451003236245955e-06, "loss": 0.0119, "step": 223040 }, { "epoch": 86.62, "learning_rate": 8.450485436893205e-06, "loss": 0.0856, "step": 223050 }, { "epoch": 86.63, "learning_rate": 8.449967637540454e-06, "loss": 0.0132, "step": 223060 }, { "epoch": 86.63, "learning_rate": 8.449449838187704e-06, "loss": 0.0475, "step": 223070 }, { "epoch": 86.63, "learning_rate": 8.448932038834952e-06, "loss": 0.0422, "step": 223080 }, { "epoch": 86.64, "learning_rate": 8.448414239482201e-06, "loss": 0.0844, "step": 223090 }, { "epoch": 86.64, "learning_rate": 8.447896440129451e-06, "loss": 0.0971, "step": 223100 }, { "epoch": 86.64, "learning_rate": 8.4473786407767e-06, "loss": 0.0508, "step": 223110 }, { "epoch": 86.65, "learning_rate": 8.44686084142395e-06, "loss": 0.0832, "step": 223120 }, { "epoch": 86.65, "learning_rate": 8.446343042071198e-06, "loss": 0.0911, "step": 223130 }, { "epoch": 86.66, "learning_rate": 8.445825242718448e-06, "loss": 0.0476, "step": 223140 }, { "epoch": 86.66, "learning_rate": 8.445307443365697e-06, "loss": 0.0892, "step": 223150 }, { "epoch": 86.66, "learning_rate": 8.444789644012947e-06, "loss": 0.0012, "step": 223160 }, { "epoch": 86.67, "learning_rate": 8.444271844660195e-06, "loss": 0.1167, "step": 223170 }, { "epoch": 86.67, "learning_rate": 8.443754045307445e-06, "loss": 0.002, "step": 223180 }, { "epoch": 86.68, "learning_rate": 8.443236245954694e-06, "loss": 0.0595, "step": 223190 }, { "epoch": 86.68, "learning_rate": 8.442718446601942e-06, "loss": 0.002, "step": 223200 }, { "epoch": 86.68, "learning_rate": 8.442200647249192e-06, "loss": 0.1019, "step": 223210 }, { "epoch": 86.69, "learning_rate": 8.441682847896441e-06, "loss": 0.1163, "step": 223220 }, { "epoch": 86.69, "learning_rate": 8.441165048543691e-06, "loss": 0.0472, "step": 223230 }, { "epoch": 86.7, "learning_rate": 8.440647249190939e-06, "loss": 0.1311, "step": 223240 }, { "epoch": 86.7, "learning_rate": 8.440129449838189e-06, "loss": 0.2311, "step": 223250 }, { "epoch": 86.7, "learning_rate": 8.439611650485438e-06, "loss": 0.1632, "step": 223260 }, { "epoch": 86.71, "learning_rate": 8.439093851132688e-06, "loss": 0.0245, "step": 223270 }, { "epoch": 86.71, "learning_rate": 8.438576051779936e-06, "loss": 0.0516, "step": 223280 }, { "epoch": 86.71, "learning_rate": 8.438058252427185e-06, "loss": 0.0398, "step": 223290 }, { "epoch": 86.72, "learning_rate": 8.437540453074435e-06, "loss": 0.0357, "step": 223300 }, { "epoch": 86.72, "learning_rate": 8.437022653721685e-06, "loss": 0.1833, "step": 223310 }, { "epoch": 86.73, "learning_rate": 8.436504854368932e-06, "loss": 0.0093, "step": 223320 }, { "epoch": 86.73, "learning_rate": 8.435987055016182e-06, "loss": 0.0111, "step": 223330 }, { "epoch": 86.73, "learning_rate": 8.435469255663432e-06, "loss": 0.0628, "step": 223340 }, { "epoch": 86.74, "learning_rate": 8.43495145631068e-06, "loss": 0.0401, "step": 223350 }, { "epoch": 86.74, "learning_rate": 8.43443365695793e-06, "loss": 0.0446, "step": 223360 }, { "epoch": 86.75, "learning_rate": 8.433915857605179e-06, "loss": 0.0202, "step": 223370 }, { "epoch": 86.75, "learning_rate": 8.433398058252428e-06, "loss": 0.0106, "step": 223380 }, { "epoch": 86.75, "learning_rate": 8.432880258899676e-06, "loss": 0.0027, "step": 223390 }, { "epoch": 86.76, "learning_rate": 8.432362459546926e-06, "loss": 0.1113, "step": 223400 }, { "epoch": 86.76, "learning_rate": 8.431844660194176e-06, "loss": 0.0806, "step": 223410 }, { "epoch": 86.77, "learning_rate": 8.431326860841425e-06, "loss": 0.083, "step": 223420 }, { "epoch": 86.77, "learning_rate": 8.430809061488673e-06, "loss": 0.0755, "step": 223430 }, { "epoch": 86.77, "learning_rate": 8.430291262135923e-06, "loss": 0.0189, "step": 223440 }, { "epoch": 86.78, "learning_rate": 8.429773462783172e-06, "loss": 0.0725, "step": 223450 }, { "epoch": 86.78, "learning_rate": 8.429255663430422e-06, "loss": 0.024, "step": 223460 }, { "epoch": 86.78, "learning_rate": 8.42873786407767e-06, "loss": 0.0088, "step": 223470 }, { "epoch": 86.79, "learning_rate": 8.42822006472492e-06, "loss": 0.0364, "step": 223480 }, { "epoch": 86.79, "learning_rate": 8.42770226537217e-06, "loss": 0.0204, "step": 223490 }, { "epoch": 86.8, "learning_rate": 8.427184466019419e-06, "loss": 0.1066, "step": 223500 }, { "epoch": 86.8, "learning_rate": 8.426666666666667e-06, "loss": 0.0957, "step": 223510 }, { "epoch": 86.8, "learning_rate": 8.426148867313916e-06, "loss": 0.0838, "step": 223520 }, { "epoch": 86.81, "learning_rate": 8.425631067961166e-06, "loss": 0.1694, "step": 223530 }, { "epoch": 86.81, "learning_rate": 8.425113268608414e-06, "loss": 0.0624, "step": 223540 }, { "epoch": 86.82, "learning_rate": 8.424595469255664e-06, "loss": 0.0248, "step": 223550 }, { "epoch": 86.82, "learning_rate": 8.424077669902913e-06, "loss": 0.1287, "step": 223560 }, { "epoch": 86.82, "learning_rate": 8.423559870550163e-06, "loss": 0.0522, "step": 223570 }, { "epoch": 86.83, "learning_rate": 8.42304207119741e-06, "loss": 0.0191, "step": 223580 }, { "epoch": 86.83, "learning_rate": 8.42252427184466e-06, "loss": 0.0344, "step": 223590 }, { "epoch": 86.83, "learning_rate": 8.42200647249191e-06, "loss": 0.0106, "step": 223600 }, { "epoch": 86.84, "learning_rate": 8.42148867313916e-06, "loss": 0.0673, "step": 223610 }, { "epoch": 86.84, "learning_rate": 8.420970873786407e-06, "loss": 0.0428, "step": 223620 }, { "epoch": 86.85, "learning_rate": 8.420453074433657e-06, "loss": 0.1552, "step": 223630 }, { "epoch": 86.85, "learning_rate": 8.419935275080907e-06, "loss": 0.0835, "step": 223640 }, { "epoch": 86.85, "learning_rate": 8.419417475728156e-06, "loss": 0.0665, "step": 223650 }, { "epoch": 86.86, "learning_rate": 8.418899676375404e-06, "loss": 0.0489, "step": 223660 }, { "epoch": 86.86, "learning_rate": 8.418381877022654e-06, "loss": 0.0006, "step": 223670 }, { "epoch": 86.87, "learning_rate": 8.417864077669903e-06, "loss": 0.0769, "step": 223680 }, { "epoch": 86.87, "learning_rate": 8.417346278317153e-06, "loss": 0.1649, "step": 223690 }, { "epoch": 86.87, "learning_rate": 8.416828478964401e-06, "loss": 0.0712, "step": 223700 }, { "epoch": 86.88, "learning_rate": 8.41631067961165e-06, "loss": 0.0379, "step": 223710 }, { "epoch": 86.88, "learning_rate": 8.4157928802589e-06, "loss": 0.1122, "step": 223720 }, { "epoch": 86.89, "learning_rate": 8.41527508090615e-06, "loss": 0.0957, "step": 223730 }, { "epoch": 86.89, "learning_rate": 8.414757281553398e-06, "loss": 0.0081, "step": 223740 }, { "epoch": 86.89, "learning_rate": 8.414239482200647e-06, "loss": 0.0154, "step": 223750 }, { "epoch": 86.9, "learning_rate": 8.413721682847897e-06, "loss": 0.0979, "step": 223760 }, { "epoch": 86.9, "learning_rate": 8.413203883495147e-06, "loss": 0.0171, "step": 223770 }, { "epoch": 86.9, "learning_rate": 8.412686084142395e-06, "loss": 0.1473, "step": 223780 }, { "epoch": 86.91, "learning_rate": 8.412168284789644e-06, "loss": 0.1239, "step": 223790 }, { "epoch": 86.91, "learning_rate": 8.411650485436894e-06, "loss": 0.026, "step": 223800 }, { "epoch": 86.92, "learning_rate": 8.411132686084143e-06, "loss": 0.0664, "step": 223810 }, { "epoch": 86.92, "learning_rate": 8.410614886731391e-06, "loss": 0.0413, "step": 223820 }, { "epoch": 86.92, "learning_rate": 8.410097087378641e-06, "loss": 0.0655, "step": 223830 }, { "epoch": 86.93, "learning_rate": 8.40957928802589e-06, "loss": 0.0653, "step": 223840 }, { "epoch": 86.93, "learning_rate": 8.40906148867314e-06, "loss": 0.0003, "step": 223850 }, { "epoch": 86.94, "learning_rate": 8.408543689320388e-06, "loss": 0.008, "step": 223860 }, { "epoch": 86.94, "learning_rate": 8.408025889967638e-06, "loss": 0.0388, "step": 223870 }, { "epoch": 86.94, "learning_rate": 8.407508090614887e-06, "loss": 0.0762, "step": 223880 }, { "epoch": 86.95, "learning_rate": 8.406990291262137e-06, "loss": 0.0265, "step": 223890 }, { "epoch": 86.95, "learning_rate": 8.406472491909385e-06, "loss": 0.1949, "step": 223900 }, { "epoch": 86.96, "learning_rate": 8.405954692556635e-06, "loss": 0.0312, "step": 223910 }, { "epoch": 86.96, "learning_rate": 8.405436893203884e-06, "loss": 0.0711, "step": 223920 }, { "epoch": 86.96, "learning_rate": 8.404919093851134e-06, "loss": 0.0967, "step": 223930 }, { "epoch": 86.97, "learning_rate": 8.404401294498382e-06, "loss": 0.0721, "step": 223940 }, { "epoch": 86.97, "learning_rate": 8.403883495145631e-06, "loss": 0.03, "step": 223950 }, { "epoch": 86.97, "learning_rate": 8.403365695792881e-06, "loss": 0.022, "step": 223960 }, { "epoch": 86.98, "learning_rate": 8.40284789644013e-06, "loss": 0.002, "step": 223970 }, { "epoch": 86.98, "learning_rate": 8.402330097087378e-06, "loss": 0.0506, "step": 223980 }, { "epoch": 86.99, "learning_rate": 8.401812297734628e-06, "loss": 0.197, "step": 223990 }, { "epoch": 86.99, "learning_rate": 8.401294498381878e-06, "loss": 0.1186, "step": 224000 }, { "epoch": 86.99, "learning_rate": 8.400776699029127e-06, "loss": 0.03, "step": 224010 }, { "epoch": 87.0, "learning_rate": 8.400258899676375e-06, "loss": 0.0923, "step": 224020 }, { "epoch": 87.0, "eval_accuracy": 0.9513067400275104, "eval_loss": 0.35266244411468506, "eval_runtime": 8.1691, "eval_samples_per_second": 444.971, "eval_steps_per_second": 55.698, "step": 224025 }, { "epoch": 87.0, "learning_rate": 8.399741100323625e-06, "loss": 0.1982, "step": 224030 }, { "epoch": 87.01, "learning_rate": 8.399223300970874e-06, "loss": 0.0222, "step": 224040 }, { "epoch": 87.01, "learning_rate": 8.398705501618124e-06, "loss": 0.0012, "step": 224050 }, { "epoch": 87.01, "learning_rate": 8.398187702265372e-06, "loss": 0.1271, "step": 224060 }, { "epoch": 87.02, "learning_rate": 8.397669902912622e-06, "loss": 0.0492, "step": 224070 }, { "epoch": 87.02, "learning_rate": 8.397152103559871e-06, "loss": 0.0158, "step": 224080 }, { "epoch": 87.03, "learning_rate": 8.396634304207121e-06, "loss": 0.0628, "step": 224090 }, { "epoch": 87.03, "learning_rate": 8.396116504854369e-06, "loss": 0.0428, "step": 224100 }, { "epoch": 87.03, "learning_rate": 8.395598705501618e-06, "loss": 0.0486, "step": 224110 }, { "epoch": 87.04, "learning_rate": 8.395080906148868e-06, "loss": 0.0104, "step": 224120 }, { "epoch": 87.04, "learning_rate": 8.394563106796118e-06, "loss": 0.0591, "step": 224130 }, { "epoch": 87.04, "learning_rate": 8.394045307443366e-06, "loss": 0.0153, "step": 224140 }, { "epoch": 87.05, "learning_rate": 8.393527508090615e-06, "loss": 0.0597, "step": 224150 }, { "epoch": 87.05, "learning_rate": 8.393009708737865e-06, "loss": 0.0198, "step": 224160 }, { "epoch": 87.06, "learning_rate": 8.392491909385114e-06, "loss": 0.0007, "step": 224170 }, { "epoch": 87.06, "learning_rate": 8.391974110032362e-06, "loss": 0.049, "step": 224180 }, { "epoch": 87.06, "learning_rate": 8.391456310679612e-06, "loss": 0.068, "step": 224190 }, { "epoch": 87.07, "learning_rate": 8.390938511326862e-06, "loss": 0.0818, "step": 224200 }, { "epoch": 87.07, "learning_rate": 8.390420711974111e-06, "loss": 0.0041, "step": 224210 }, { "epoch": 87.08, "learning_rate": 8.389902912621359e-06, "loss": 0.0219, "step": 224220 }, { "epoch": 87.08, "learning_rate": 8.389385113268609e-06, "loss": 0.0581, "step": 224230 }, { "epoch": 87.08, "learning_rate": 8.388867313915858e-06, "loss": 0.0361, "step": 224240 }, { "epoch": 87.09, "learning_rate": 8.388349514563108e-06, "loss": 0.019, "step": 224250 }, { "epoch": 87.09, "learning_rate": 8.387831715210358e-06, "loss": 0.0736, "step": 224260 }, { "epoch": 87.1, "learning_rate": 8.387313915857606e-06, "loss": 0.0264, "step": 224270 }, { "epoch": 87.1, "learning_rate": 8.386796116504855e-06, "loss": 0.1067, "step": 224280 }, { "epoch": 87.1, "learning_rate": 8.386278317152105e-06, "loss": 0.1028, "step": 224290 }, { "epoch": 87.11, "learning_rate": 8.385760517799354e-06, "loss": 0.0497, "step": 224300 }, { "epoch": 87.11, "learning_rate": 8.385242718446602e-06, "loss": 0.0485, "step": 224310 }, { "epoch": 87.11, "learning_rate": 8.384724919093852e-06, "loss": 0.1011, "step": 224320 }, { "epoch": 87.12, "learning_rate": 8.384207119741102e-06, "loss": 0.0709, "step": 224330 }, { "epoch": 87.12, "learning_rate": 8.383689320388351e-06, "loss": 0.0383, "step": 224340 }, { "epoch": 87.13, "learning_rate": 8.383171521035599e-06, "loss": 0.0121, "step": 224350 }, { "epoch": 87.13, "learning_rate": 8.382653721682849e-06, "loss": 0.0617, "step": 224360 }, { "epoch": 87.13, "learning_rate": 8.382135922330098e-06, "loss": 0.158, "step": 224370 }, { "epoch": 87.14, "learning_rate": 8.381618122977348e-06, "loss": 0.0436, "step": 224380 }, { "epoch": 87.14, "learning_rate": 8.381100323624596e-06, "loss": 0.0524, "step": 224390 }, { "epoch": 87.15, "learning_rate": 8.380582524271845e-06, "loss": 0.0466, "step": 224400 }, { "epoch": 87.15, "learning_rate": 8.380064724919095e-06, "loss": 0.0789, "step": 224410 }, { "epoch": 87.15, "learning_rate": 8.379546925566345e-06, "loss": 0.1797, "step": 224420 }, { "epoch": 87.16, "learning_rate": 8.379029126213593e-06, "loss": 0.0976, "step": 224430 }, { "epoch": 87.16, "learning_rate": 8.378511326860842e-06, "loss": 0.0574, "step": 224440 }, { "epoch": 87.17, "learning_rate": 8.377993527508092e-06, "loss": 0.0044, "step": 224450 }, { "epoch": 87.17, "learning_rate": 8.377475728155341e-06, "loss": 0.1577, "step": 224460 }, { "epoch": 87.17, "learning_rate": 8.37695792880259e-06, "loss": 0.0241, "step": 224470 }, { "epoch": 87.18, "learning_rate": 8.376440129449839e-06, "loss": 0.024, "step": 224480 }, { "epoch": 87.18, "learning_rate": 8.375922330097089e-06, "loss": 0.1509, "step": 224490 }, { "epoch": 87.18, "learning_rate": 8.375404530744338e-06, "loss": 0.0006, "step": 224500 }, { "epoch": 87.19, "learning_rate": 8.374886731391586e-06, "loss": 0.0007, "step": 224510 }, { "epoch": 87.19, "learning_rate": 8.374368932038836e-06, "loss": 0.1691, "step": 224520 }, { "epoch": 87.2, "learning_rate": 8.373851132686085e-06, "loss": 0.0416, "step": 224530 }, { "epoch": 87.2, "learning_rate": 8.373333333333335e-06, "loss": 0.013, "step": 224540 }, { "epoch": 87.2, "learning_rate": 8.372815533980583e-06, "loss": 0.0501, "step": 224550 }, { "epoch": 87.21, "learning_rate": 8.372297734627833e-06, "loss": 0.0623, "step": 224560 }, { "epoch": 87.21, "learning_rate": 8.371779935275082e-06, "loss": 0.0011, "step": 224570 }, { "epoch": 87.22, "learning_rate": 8.371262135922332e-06, "loss": 0.1161, "step": 224580 }, { "epoch": 87.22, "learning_rate": 8.37074433656958e-06, "loss": 0.0697, "step": 224590 }, { "epoch": 87.22, "learning_rate": 8.37022653721683e-06, "loss": 0.0754, "step": 224600 }, { "epoch": 87.23, "learning_rate": 8.369708737864079e-06, "loss": 0.1084, "step": 224610 }, { "epoch": 87.23, "learning_rate": 8.369190938511329e-06, "loss": 0.159, "step": 224620 }, { "epoch": 87.23, "learning_rate": 8.368673139158577e-06, "loss": 0.0477, "step": 224630 }, { "epoch": 87.24, "learning_rate": 8.368155339805826e-06, "loss": 0.0673, "step": 224640 }, { "epoch": 87.24, "learning_rate": 8.367637540453076e-06, "loss": 0.0483, "step": 224650 }, { "epoch": 87.25, "learning_rate": 8.367119741100325e-06, "loss": 0.0701, "step": 224660 }, { "epoch": 87.25, "learning_rate": 8.366601941747573e-06, "loss": 0.0728, "step": 224670 }, { "epoch": 87.25, "learning_rate": 8.366084142394823e-06, "loss": 0.0538, "step": 224680 }, { "epoch": 87.26, "learning_rate": 8.365566343042073e-06, "loss": 0.0027, "step": 224690 }, { "epoch": 87.26, "learning_rate": 8.365048543689322e-06, "loss": 0.0203, "step": 224700 }, { "epoch": 87.27, "learning_rate": 8.36453074433657e-06, "loss": 0.0184, "step": 224710 }, { "epoch": 87.27, "learning_rate": 8.36401294498382e-06, "loss": 0.0022, "step": 224720 }, { "epoch": 87.27, "learning_rate": 8.36349514563107e-06, "loss": 0.0762, "step": 224730 }, { "epoch": 87.28, "learning_rate": 8.362977346278319e-06, "loss": 0.0505, "step": 224740 }, { "epoch": 87.28, "learning_rate": 8.362459546925567e-06, "loss": 0.0099, "step": 224750 }, { "epoch": 87.29, "learning_rate": 8.361941747572816e-06, "loss": 0.1424, "step": 224760 }, { "epoch": 87.29, "learning_rate": 8.361423948220066e-06, "loss": 0.0004, "step": 224770 }, { "epoch": 87.29, "learning_rate": 8.360906148867316e-06, "loss": 0.0136, "step": 224780 }, { "epoch": 87.3, "learning_rate": 8.360388349514564e-06, "loss": 0.0749, "step": 224790 }, { "epoch": 87.3, "learning_rate": 8.359870550161813e-06, "loss": 0.1446, "step": 224800 }, { "epoch": 87.3, "learning_rate": 8.359352750809063e-06, "loss": 0.0642, "step": 224810 }, { "epoch": 87.31, "learning_rate": 8.35883495145631e-06, "loss": 0.0085, "step": 224820 }, { "epoch": 87.31, "learning_rate": 8.35831715210356e-06, "loss": 0.0539, "step": 224830 }, { "epoch": 87.32, "learning_rate": 8.35779935275081e-06, "loss": 0.0871, "step": 224840 }, { "epoch": 87.32, "learning_rate": 8.35728155339806e-06, "loss": 0.0675, "step": 224850 }, { "epoch": 87.32, "learning_rate": 8.356763754045308e-06, "loss": 0.0614, "step": 224860 }, { "epoch": 87.33, "learning_rate": 8.356245954692557e-06, "loss": 0.0091, "step": 224870 }, { "epoch": 87.33, "learning_rate": 8.355728155339807e-06, "loss": 0.1359, "step": 224880 }, { "epoch": 87.34, "learning_rate": 8.355210355987056e-06, "loss": 0.0748, "step": 224890 }, { "epoch": 87.34, "learning_rate": 8.354692556634304e-06, "loss": 0.1367, "step": 224900 }, { "epoch": 87.34, "learning_rate": 8.354174757281554e-06, "loss": 0.0049, "step": 224910 }, { "epoch": 87.35, "learning_rate": 8.353656957928804e-06, "loss": 0.1359, "step": 224920 }, { "epoch": 87.35, "learning_rate": 8.353139158576053e-06, "loss": 0.2223, "step": 224930 }, { "epoch": 87.36, "learning_rate": 8.352621359223301e-06, "loss": 0.0086, "step": 224940 }, { "epoch": 87.36, "learning_rate": 8.35210355987055e-06, "loss": 0.188, "step": 224950 }, { "epoch": 87.36, "learning_rate": 8.3515857605178e-06, "loss": 0.2138, "step": 224960 }, { "epoch": 87.37, "learning_rate": 8.35106796116505e-06, "loss": 0.143, "step": 224970 }, { "epoch": 87.37, "learning_rate": 8.350550161812298e-06, "loss": 0.0142, "step": 224980 }, { "epoch": 87.37, "learning_rate": 8.350032362459548e-06, "loss": 0.0815, "step": 224990 }, { "epoch": 87.38, "learning_rate": 8.349514563106797e-06, "loss": 0.0159, "step": 225000 }, { "epoch": 87.38, "learning_rate": 8.348996763754045e-06, "loss": 0.0003, "step": 225010 }, { "epoch": 87.39, "learning_rate": 8.348478964401295e-06, "loss": 0.1051, "step": 225020 }, { "epoch": 87.39, "learning_rate": 8.347961165048544e-06, "loss": 0.0337, "step": 225030 }, { "epoch": 87.39, "learning_rate": 8.347443365695794e-06, "loss": 0.0109, "step": 225040 }, { "epoch": 87.4, "learning_rate": 8.346925566343042e-06, "loss": 0.0062, "step": 225050 }, { "epoch": 87.4, "learning_rate": 8.346407766990291e-06, "loss": 0.0165, "step": 225060 }, { "epoch": 87.41, "learning_rate": 8.345889967637541e-06, "loss": 0.0642, "step": 225070 }, { "epoch": 87.41, "learning_rate": 8.34537216828479e-06, "loss": 0.0335, "step": 225080 }, { "epoch": 87.41, "learning_rate": 8.344854368932039e-06, "loss": 0.0906, "step": 225090 }, { "epoch": 87.42, "learning_rate": 8.344336569579288e-06, "loss": 0.058, "step": 225100 }, { "epoch": 87.42, "learning_rate": 8.343818770226538e-06, "loss": 0.0254, "step": 225110 }, { "epoch": 87.43, "learning_rate": 8.343300970873787e-06, "loss": 0.1002, "step": 225120 }, { "epoch": 87.43, "learning_rate": 8.342783171521035e-06, "loss": 0.012, "step": 225130 }, { "epoch": 87.43, "learning_rate": 8.342265372168285e-06, "loss": 0.0162, "step": 225140 }, { "epoch": 87.44, "learning_rate": 8.341747572815535e-06, "loss": 0.0329, "step": 225150 }, { "epoch": 87.44, "learning_rate": 8.341229773462783e-06, "loss": 0.0448, "step": 225160 }, { "epoch": 87.44, "learning_rate": 8.340711974110032e-06, "loss": 0.0515, "step": 225170 }, { "epoch": 87.45, "learning_rate": 8.340194174757282e-06, "loss": 0.1003, "step": 225180 }, { "epoch": 87.45, "learning_rate": 8.339676375404531e-06, "loss": 0.0001, "step": 225190 }, { "epoch": 87.46, "learning_rate": 8.33915857605178e-06, "loss": 0.0031, "step": 225200 }, { "epoch": 87.46, "learning_rate": 8.338640776699029e-06, "loss": 0.0786, "step": 225210 }, { "epoch": 87.46, "learning_rate": 8.338122977346279e-06, "loss": 0.1756, "step": 225220 }, { "epoch": 87.47, "learning_rate": 8.337605177993528e-06, "loss": 0.0029, "step": 225230 }, { "epoch": 87.47, "learning_rate": 8.337087378640776e-06, "loss": 0.118, "step": 225240 }, { "epoch": 87.48, "learning_rate": 8.336569579288026e-06, "loss": 0.0257, "step": 225250 }, { "epoch": 87.48, "learning_rate": 8.336051779935275e-06, "loss": 0.0178, "step": 225260 }, { "epoch": 87.48, "learning_rate": 8.335533980582525e-06, "loss": 0.1261, "step": 225270 }, { "epoch": 87.49, "learning_rate": 8.335016181229773e-06, "loss": 0.0708, "step": 225280 }, { "epoch": 87.49, "learning_rate": 8.334498381877023e-06, "loss": 0.0226, "step": 225290 }, { "epoch": 87.5, "learning_rate": 8.333980582524272e-06, "loss": 0.021, "step": 225300 }, { "epoch": 87.5, "learning_rate": 8.333462783171522e-06, "loss": 0.1167, "step": 225310 }, { "epoch": 87.5, "learning_rate": 8.33294498381877e-06, "loss": 0.042, "step": 225320 }, { "epoch": 87.51, "learning_rate": 8.33242718446602e-06, "loss": 0.059, "step": 225330 }, { "epoch": 87.51, "learning_rate": 8.331909385113269e-06, "loss": 0.1111, "step": 225340 }, { "epoch": 87.51, "learning_rate": 8.331391585760519e-06, "loss": 0.1122, "step": 225350 }, { "epoch": 87.52, "learning_rate": 8.330873786407768e-06, "loss": 0.1031, "step": 225360 }, { "epoch": 87.52, "learning_rate": 8.330355987055016e-06, "loss": 0.0152, "step": 225370 }, { "epoch": 87.53, "learning_rate": 8.329838187702266e-06, "loss": 0.1022, "step": 225380 }, { "epoch": 87.53, "learning_rate": 8.329320388349515e-06, "loss": 0.084, "step": 225390 }, { "epoch": 87.53, "learning_rate": 8.328802588996765e-06, "loss": 0.1629, "step": 225400 }, { "epoch": 87.54, "learning_rate": 8.328284789644013e-06, "loss": 0.0147, "step": 225410 }, { "epoch": 87.54, "learning_rate": 8.327766990291262e-06, "loss": 0.0897, "step": 225420 }, { "epoch": 87.55, "learning_rate": 8.327249190938512e-06, "loss": 0.0568, "step": 225430 }, { "epoch": 87.55, "learning_rate": 8.326731391585762e-06, "loss": 0.0327, "step": 225440 }, { "epoch": 87.55, "learning_rate": 8.32621359223301e-06, "loss": 0.034, "step": 225450 }, { "epoch": 87.56, "learning_rate": 8.32569579288026e-06, "loss": 0.0574, "step": 225460 }, { "epoch": 87.56, "learning_rate": 8.325177993527509e-06, "loss": 0.0511, "step": 225470 }, { "epoch": 87.57, "learning_rate": 8.324660194174758e-06, "loss": 0.1085, "step": 225480 }, { "epoch": 87.57, "learning_rate": 8.324142394822006e-06, "loss": 0.0612, "step": 225490 }, { "epoch": 87.57, "learning_rate": 8.323624595469256e-06, "loss": 0.0952, "step": 225500 }, { "epoch": 87.58, "learning_rate": 8.323106796116506e-06, "loss": 0.0757, "step": 225510 }, { "epoch": 87.58, "learning_rate": 8.322588996763755e-06, "loss": 0.0769, "step": 225520 }, { "epoch": 87.58, "learning_rate": 8.322071197411003e-06, "loss": 0.0817, "step": 225530 }, { "epoch": 87.59, "learning_rate": 8.321553398058253e-06, "loss": 0.0426, "step": 225540 }, { "epoch": 87.59, "learning_rate": 8.321035598705502e-06, "loss": 0.0504, "step": 225550 }, { "epoch": 87.6, "learning_rate": 8.320517799352752e-06, "loss": 0.144, "step": 225560 }, { "epoch": 87.6, "learning_rate": 8.32e-06, "loss": 0.0893, "step": 225570 }, { "epoch": 87.6, "learning_rate": 8.31948220064725e-06, "loss": 0.0962, "step": 225580 }, { "epoch": 87.61, "learning_rate": 8.3189644012945e-06, "loss": 0.0004, "step": 225590 }, { "epoch": 87.61, "learning_rate": 8.318446601941749e-06, "loss": 0.017, "step": 225600 }, { "epoch": 87.62, "learning_rate": 8.317928802588997e-06, "loss": 0.0785, "step": 225610 }, { "epoch": 87.62, "learning_rate": 8.317411003236246e-06, "loss": 0.0021, "step": 225620 }, { "epoch": 87.62, "learning_rate": 8.316893203883496e-06, "loss": 0.0678, "step": 225630 }, { "epoch": 87.63, "learning_rate": 8.316375404530746e-06, "loss": 0.0246, "step": 225640 }, { "epoch": 87.63, "learning_rate": 8.315857605177994e-06, "loss": 0.0184, "step": 225650 }, { "epoch": 87.63, "learning_rate": 8.315339805825243e-06, "loss": 0.0039, "step": 225660 }, { "epoch": 87.64, "learning_rate": 8.314822006472493e-06, "loss": 0.0635, "step": 225670 }, { "epoch": 87.64, "learning_rate": 8.314304207119742e-06, "loss": 0.0339, "step": 225680 }, { "epoch": 87.65, "learning_rate": 8.31378640776699e-06, "loss": 0.0128, "step": 225690 }, { "epoch": 87.65, "learning_rate": 8.31326860841424e-06, "loss": 0.1059, "step": 225700 }, { "epoch": 87.65, "learning_rate": 8.31275080906149e-06, "loss": 0.0993, "step": 225710 }, { "epoch": 87.66, "learning_rate": 8.312233009708739e-06, "loss": 0.0884, "step": 225720 }, { "epoch": 87.66, "learning_rate": 8.311715210355987e-06, "loss": 0.0188, "step": 225730 }, { "epoch": 87.67, "learning_rate": 8.311197411003237e-06, "loss": 0.0034, "step": 225740 }, { "epoch": 87.67, "learning_rate": 8.310679611650486e-06, "loss": 0.1298, "step": 225750 }, { "epoch": 87.67, "learning_rate": 8.310161812297736e-06, "loss": 0.0149, "step": 225760 }, { "epoch": 87.68, "learning_rate": 8.309644012944984e-06, "loss": 0.0407, "step": 225770 }, { "epoch": 87.68, "learning_rate": 8.309126213592233e-06, "loss": 0.034, "step": 225780 }, { "epoch": 87.69, "learning_rate": 8.308608414239483e-06, "loss": 0.0374, "step": 225790 }, { "epoch": 87.69, "learning_rate": 8.308090614886733e-06, "loss": 0.0997, "step": 225800 }, { "epoch": 87.69, "learning_rate": 8.30757281553398e-06, "loss": 0.0132, "step": 225810 }, { "epoch": 87.7, "learning_rate": 8.30705501618123e-06, "loss": 0.0844, "step": 225820 }, { "epoch": 87.7, "learning_rate": 8.30653721682848e-06, "loss": 0.0579, "step": 225830 }, { "epoch": 87.7, "learning_rate": 8.30601941747573e-06, "loss": 0.0779, "step": 225840 }, { "epoch": 87.71, "learning_rate": 8.305501618122977e-06, "loss": 0.0445, "step": 225850 }, { "epoch": 87.71, "learning_rate": 8.304983818770227e-06, "loss": 0.2079, "step": 225860 }, { "epoch": 87.72, "learning_rate": 8.304466019417477e-06, "loss": 0.0104, "step": 225870 }, { "epoch": 87.72, "learning_rate": 8.303948220064726e-06, "loss": 0.0471, "step": 225880 }, { "epoch": 87.72, "learning_rate": 8.303430420711974e-06, "loss": 0.0148, "step": 225890 }, { "epoch": 87.73, "learning_rate": 8.302912621359224e-06, "loss": 0.0006, "step": 225900 }, { "epoch": 87.73, "learning_rate": 8.302394822006473e-06, "loss": 0.0014, "step": 225910 }, { "epoch": 87.74, "learning_rate": 8.301877022653723e-06, "loss": 0.0181, "step": 225920 }, { "epoch": 87.74, "learning_rate": 8.301359223300973e-06, "loss": 0.0363, "step": 225930 }, { "epoch": 87.74, "learning_rate": 8.30084142394822e-06, "loss": 0.0397, "step": 225940 }, { "epoch": 87.75, "learning_rate": 8.30032362459547e-06, "loss": 0.0868, "step": 225950 }, { "epoch": 87.75, "learning_rate": 8.29980582524272e-06, "loss": 0.0093, "step": 225960 }, { "epoch": 87.76, "learning_rate": 8.29928802588997e-06, "loss": 0.0196, "step": 225970 }, { "epoch": 87.76, "learning_rate": 8.298770226537217e-06, "loss": 0.0492, "step": 225980 }, { "epoch": 87.76, "learning_rate": 8.298252427184467e-06, "loss": 0.015, "step": 225990 }, { "epoch": 87.77, "learning_rate": 8.297734627831717e-06, "loss": 0.0349, "step": 226000 }, { "epoch": 87.77, "learning_rate": 8.297216828478966e-06, "loss": 0.0269, "step": 226010 }, { "epoch": 87.77, "learning_rate": 8.296699029126214e-06, "loss": 0.0849, "step": 226020 }, { "epoch": 87.78, "learning_rate": 8.296181229773464e-06, "loss": 0.044, "step": 226030 }, { "epoch": 87.78, "learning_rate": 8.295663430420713e-06, "loss": 0.0869, "step": 226040 }, { "epoch": 87.79, "learning_rate": 8.295145631067963e-06, "loss": 0.0504, "step": 226050 }, { "epoch": 87.79, "learning_rate": 8.294627831715211e-06, "loss": 0.1417, "step": 226060 }, { "epoch": 87.79, "learning_rate": 8.29411003236246e-06, "loss": 0.0126, "step": 226070 }, { "epoch": 87.8, "learning_rate": 8.29359223300971e-06, "loss": 0.0033, "step": 226080 }, { "epoch": 87.8, "learning_rate": 8.29307443365696e-06, "loss": 0.1094, "step": 226090 }, { "epoch": 87.81, "learning_rate": 8.292556634304208e-06, "loss": 0.0509, "step": 226100 }, { "epoch": 87.81, "learning_rate": 8.292038834951457e-06, "loss": 0.1086, "step": 226110 }, { "epoch": 87.81, "learning_rate": 8.291521035598707e-06, "loss": 0.0032, "step": 226120 }, { "epoch": 87.82, "learning_rate": 8.291003236245957e-06, "loss": 0.0006, "step": 226130 }, { "epoch": 87.82, "learning_rate": 8.290485436893204e-06, "loss": 0.0532, "step": 226140 }, { "epoch": 87.83, "learning_rate": 8.289967637540454e-06, "loss": 0.058, "step": 226150 }, { "epoch": 87.83, "learning_rate": 8.289449838187704e-06, "loss": 0.06, "step": 226160 }, { "epoch": 87.83, "learning_rate": 8.288932038834953e-06, "loss": 0.087, "step": 226170 }, { "epoch": 87.84, "learning_rate": 8.288414239482201e-06, "loss": 0.09, "step": 226180 }, { "epoch": 87.84, "learning_rate": 8.287896440129451e-06, "loss": 0.0272, "step": 226190 }, { "epoch": 87.84, "learning_rate": 8.2873786407767e-06, "loss": 0.0455, "step": 226200 }, { "epoch": 87.85, "learning_rate": 8.28686084142395e-06, "loss": 0.0259, "step": 226210 }, { "epoch": 87.85, "learning_rate": 8.286343042071198e-06, "loss": 0.0214, "step": 226220 }, { "epoch": 87.86, "learning_rate": 8.285825242718448e-06, "loss": 0.0333, "step": 226230 }, { "epoch": 87.86, "learning_rate": 8.285307443365697e-06, "loss": 0.0189, "step": 226240 }, { "epoch": 87.86, "learning_rate": 8.284789644012947e-06, "loss": 0.1101, "step": 226250 }, { "epoch": 87.87, "learning_rate": 8.284271844660195e-06, "loss": 0.0059, "step": 226260 }, { "epoch": 87.87, "learning_rate": 8.283754045307444e-06, "loss": 0.0162, "step": 226270 }, { "epoch": 87.88, "learning_rate": 8.283236245954694e-06, "loss": 0.2011, "step": 226280 }, { "epoch": 87.88, "learning_rate": 8.282718446601942e-06, "loss": 0.0198, "step": 226290 }, { "epoch": 87.88, "learning_rate": 8.282200647249192e-06, "loss": 0.0154, "step": 226300 }, { "epoch": 87.89, "learning_rate": 8.281682847896441e-06, "loss": 0.0074, "step": 226310 }, { "epoch": 87.89, "learning_rate": 8.28116504854369e-06, "loss": 0.3087, "step": 226320 }, { "epoch": 87.9, "learning_rate": 8.280647249190939e-06, "loss": 0.0541, "step": 226330 }, { "epoch": 87.9, "learning_rate": 8.280129449838188e-06, "loss": 0.0392, "step": 226340 }, { "epoch": 87.9, "learning_rate": 8.279611650485438e-06, "loss": 0.0144, "step": 226350 }, { "epoch": 87.91, "learning_rate": 8.279093851132688e-06, "loss": 0.0531, "step": 226360 }, { "epoch": 87.91, "learning_rate": 8.278576051779936e-06, "loss": 0.0218, "step": 226370 }, { "epoch": 87.91, "learning_rate": 8.278058252427185e-06, "loss": 0.0174, "step": 226380 }, { "epoch": 87.92, "learning_rate": 8.277540453074435e-06, "loss": 0.1345, "step": 226390 }, { "epoch": 87.92, "learning_rate": 8.277022653721684e-06, "loss": 0.0504, "step": 226400 }, { "epoch": 87.93, "learning_rate": 8.276504854368932e-06, "loss": 0.093, "step": 226410 }, { "epoch": 87.93, "learning_rate": 8.275987055016182e-06, "loss": 0.0386, "step": 226420 }, { "epoch": 87.93, "learning_rate": 8.275469255663432e-06, "loss": 0.0815, "step": 226430 }, { "epoch": 87.94, "learning_rate": 8.27495145631068e-06, "loss": 0.1022, "step": 226440 }, { "epoch": 87.94, "learning_rate": 8.274433656957929e-06, "loss": 0.1055, "step": 226450 }, { "epoch": 87.95, "learning_rate": 8.273915857605179e-06, "loss": 0.0004, "step": 226460 }, { "epoch": 87.95, "learning_rate": 8.273398058252428e-06, "loss": 0.1114, "step": 226470 }, { "epoch": 87.95, "learning_rate": 8.272880258899676e-06, "loss": 0.0086, "step": 226480 }, { "epoch": 87.96, "learning_rate": 8.272362459546926e-06, "loss": 0.0493, "step": 226490 }, { "epoch": 87.96, "learning_rate": 8.271844660194175e-06, "loss": 0.0995, "step": 226500 }, { "epoch": 87.97, "learning_rate": 8.271326860841425e-06, "loss": 0.0006, "step": 226510 }, { "epoch": 87.97, "learning_rate": 8.270809061488673e-06, "loss": 0.0074, "step": 226520 }, { "epoch": 87.97, "learning_rate": 8.270291262135923e-06, "loss": 0.0104, "step": 226530 }, { "epoch": 87.98, "learning_rate": 8.269773462783172e-06, "loss": 0.1458, "step": 226540 }, { "epoch": 87.98, "learning_rate": 8.269255663430422e-06, "loss": 0.0094, "step": 226550 }, { "epoch": 87.98, "learning_rate": 8.26873786407767e-06, "loss": 0.035, "step": 226560 }, { "epoch": 87.99, "learning_rate": 8.26822006472492e-06, "loss": 0.0944, "step": 226570 }, { "epoch": 87.99, "learning_rate": 8.267702265372169e-06, "loss": 0.0648, "step": 226580 }, { "epoch": 88.0, "learning_rate": 8.267184466019419e-06, "loss": 0.1342, "step": 226590 }, { "epoch": 88.0, "learning_rate": 8.266666666666667e-06, "loss": 0.1188, "step": 226600 }, { "epoch": 88.0, "eval_accuracy": 0.9485557083906465, "eval_loss": 0.37250131368637085, "eval_runtime": 8.1938, "eval_samples_per_second": 443.626, "eval_steps_per_second": 55.529, "step": 226600 }, { "epoch": 88.0, "learning_rate": 8.266148867313916e-06, "loss": 0.0775, "step": 226610 }, { "epoch": 88.01, "learning_rate": 8.265631067961166e-06, "loss": 0.024, "step": 226620 }, { "epoch": 88.01, "learning_rate": 8.265113268608414e-06, "loss": 0.0093, "step": 226630 }, { "epoch": 88.02, "learning_rate": 8.264595469255663e-06, "loss": 0.0931, "step": 226640 }, { "epoch": 88.02, "learning_rate": 8.264077669902913e-06, "loss": 0.1535, "step": 226650 }, { "epoch": 88.02, "learning_rate": 8.263559870550163e-06, "loss": 0.0054, "step": 226660 }, { "epoch": 88.03, "learning_rate": 8.26304207119741e-06, "loss": 0.1043, "step": 226670 }, { "epoch": 88.03, "learning_rate": 8.26252427184466e-06, "loss": 0.0002, "step": 226680 }, { "epoch": 88.03, "learning_rate": 8.26200647249191e-06, "loss": 0.0286, "step": 226690 }, { "epoch": 88.04, "learning_rate": 8.26148867313916e-06, "loss": 0.0088, "step": 226700 }, { "epoch": 88.04, "learning_rate": 8.260970873786407e-06, "loss": 0.05, "step": 226710 }, { "epoch": 88.05, "learning_rate": 8.260453074433657e-06, "loss": 0.057, "step": 226720 }, { "epoch": 88.05, "learning_rate": 8.259935275080907e-06, "loss": 0.045, "step": 226730 }, { "epoch": 88.05, "learning_rate": 8.259417475728156e-06, "loss": 0.001, "step": 226740 }, { "epoch": 88.06, "learning_rate": 8.258899676375404e-06, "loss": 0.0076, "step": 226750 }, { "epoch": 88.06, "learning_rate": 8.258381877022654e-06, "loss": 0.0288, "step": 226760 }, { "epoch": 88.07, "learning_rate": 8.257864077669903e-06, "loss": 0.0431, "step": 226770 }, { "epoch": 88.07, "learning_rate": 8.257346278317153e-06, "loss": 0.0211, "step": 226780 }, { "epoch": 88.07, "learning_rate": 8.256828478964401e-06, "loss": 0.0313, "step": 226790 }, { "epoch": 88.08, "learning_rate": 8.25631067961165e-06, "loss": 0.0515, "step": 226800 }, { "epoch": 88.08, "learning_rate": 8.2557928802589e-06, "loss": 0.0916, "step": 226810 }, { "epoch": 88.09, "learning_rate": 8.25527508090615e-06, "loss": 0.1283, "step": 226820 }, { "epoch": 88.09, "learning_rate": 8.254757281553398e-06, "loss": 0.1278, "step": 226830 }, { "epoch": 88.09, "learning_rate": 8.254239482200647e-06, "loss": 0.0133, "step": 226840 }, { "epoch": 88.1, "learning_rate": 8.253721682847897e-06, "loss": 0.1005, "step": 226850 }, { "epoch": 88.1, "learning_rate": 8.253203883495146e-06, "loss": 0.0509, "step": 226860 }, { "epoch": 88.1, "learning_rate": 8.252686084142394e-06, "loss": 0.1255, "step": 226870 }, { "epoch": 88.11, "learning_rate": 8.252168284789644e-06, "loss": 0.001, "step": 226880 }, { "epoch": 88.11, "learning_rate": 8.251650485436894e-06, "loss": 0.1471, "step": 226890 }, { "epoch": 88.12, "learning_rate": 8.251132686084143e-06, "loss": 0.002, "step": 226900 }, { "epoch": 88.12, "learning_rate": 8.250614886731391e-06, "loss": 0.1261, "step": 226910 }, { "epoch": 88.12, "learning_rate": 8.25009708737864e-06, "loss": 0.1305, "step": 226920 }, { "epoch": 88.13, "learning_rate": 8.24957928802589e-06, "loss": 0.1085, "step": 226930 }, { "epoch": 88.13, "learning_rate": 8.24906148867314e-06, "loss": 0.0133, "step": 226940 }, { "epoch": 88.14, "learning_rate": 8.248543689320388e-06, "loss": 0.0481, "step": 226950 }, { "epoch": 88.14, "learning_rate": 8.248025889967638e-06, "loss": 0.0506, "step": 226960 }, { "epoch": 88.14, "learning_rate": 8.247508090614887e-06, "loss": 0.0914, "step": 226970 }, { "epoch": 88.15, "learning_rate": 8.246990291262137e-06, "loss": 0.0583, "step": 226980 }, { "epoch": 88.15, "learning_rate": 8.246472491909385e-06, "loss": 0.0495, "step": 226990 }, { "epoch": 88.16, "learning_rate": 8.245954692556634e-06, "loss": 0.1721, "step": 227000 }, { "epoch": 88.16, "learning_rate": 8.245436893203884e-06, "loss": 0.1916, "step": 227010 }, { "epoch": 88.16, "learning_rate": 8.244919093851134e-06, "loss": 0.0168, "step": 227020 }, { "epoch": 88.17, "learning_rate": 8.244401294498382e-06, "loss": 0.0234, "step": 227030 }, { "epoch": 88.17, "learning_rate": 8.243883495145631e-06, "loss": 0.0539, "step": 227040 }, { "epoch": 88.17, "learning_rate": 8.24336569579288e-06, "loss": 0.0056, "step": 227050 }, { "epoch": 88.18, "learning_rate": 8.24284789644013e-06, "loss": 0.0752, "step": 227060 }, { "epoch": 88.18, "learning_rate": 8.24233009708738e-06, "loss": 0.0163, "step": 227070 }, { "epoch": 88.19, "learning_rate": 8.241812297734628e-06, "loss": 0.0029, "step": 227080 }, { "epoch": 88.19, "learning_rate": 8.241294498381878e-06, "loss": 0.1332, "step": 227090 }, { "epoch": 88.19, "learning_rate": 8.240776699029127e-06, "loss": 0.0012, "step": 227100 }, { "epoch": 88.2, "learning_rate": 8.240258899676377e-06, "loss": 0.0388, "step": 227110 }, { "epoch": 88.2, "learning_rate": 8.239741100323625e-06, "loss": 0.109, "step": 227120 }, { "epoch": 88.21, "learning_rate": 8.239223300970874e-06, "loss": 0.0824, "step": 227130 }, { "epoch": 88.21, "learning_rate": 8.238705501618124e-06, "loss": 0.0497, "step": 227140 }, { "epoch": 88.21, "learning_rate": 8.238187702265374e-06, "loss": 0.0262, "step": 227150 }, { "epoch": 88.22, "learning_rate": 8.237669902912621e-06, "loss": 0.052, "step": 227160 }, { "epoch": 88.22, "learning_rate": 8.237152103559871e-06, "loss": 0.0172, "step": 227170 }, { "epoch": 88.23, "learning_rate": 8.23663430420712e-06, "loss": 0.0013, "step": 227180 }, { "epoch": 88.23, "learning_rate": 8.23611650485437e-06, "loss": 0.0703, "step": 227190 }, { "epoch": 88.23, "learning_rate": 8.235598705501618e-06, "loss": 0.0171, "step": 227200 }, { "epoch": 88.24, "learning_rate": 8.235080906148868e-06, "loss": 0.0173, "step": 227210 }, { "epoch": 88.24, "learning_rate": 8.234563106796117e-06, "loss": 0.238, "step": 227220 }, { "epoch": 88.24, "learning_rate": 8.234045307443367e-06, "loss": 0.0215, "step": 227230 }, { "epoch": 88.25, "learning_rate": 8.233527508090615e-06, "loss": 0.0236, "step": 227240 }, { "epoch": 88.25, "learning_rate": 8.233009708737865e-06, "loss": 0.0332, "step": 227250 }, { "epoch": 88.26, "learning_rate": 8.232491909385114e-06, "loss": 0.1584, "step": 227260 }, { "epoch": 88.26, "learning_rate": 8.231974110032364e-06, "loss": 0.0264, "step": 227270 }, { "epoch": 88.26, "learning_rate": 8.231456310679612e-06, "loss": 0.04, "step": 227280 }, { "epoch": 88.27, "learning_rate": 8.230938511326861e-06, "loss": 0.1187, "step": 227290 }, { "epoch": 88.27, "learning_rate": 8.230420711974111e-06, "loss": 0.0107, "step": 227300 }, { "epoch": 88.28, "learning_rate": 8.22990291262136e-06, "loss": 0.061, "step": 227310 }, { "epoch": 88.28, "learning_rate": 8.229385113268609e-06, "loss": 0.0618, "step": 227320 }, { "epoch": 88.28, "learning_rate": 8.228867313915858e-06, "loss": 0.0749, "step": 227330 }, { "epoch": 88.29, "learning_rate": 8.228349514563108e-06, "loss": 0.0315, "step": 227340 }, { "epoch": 88.29, "learning_rate": 8.227831715210357e-06, "loss": 0.0405, "step": 227350 }, { "epoch": 88.3, "learning_rate": 8.227313915857605e-06, "loss": 0.0334, "step": 227360 }, { "epoch": 88.3, "learning_rate": 8.226796116504855e-06, "loss": 0.0011, "step": 227370 }, { "epoch": 88.3, "learning_rate": 8.226278317152105e-06, "loss": 0.0256, "step": 227380 }, { "epoch": 88.31, "learning_rate": 8.225760517799354e-06, "loss": 0.0459, "step": 227390 }, { "epoch": 88.31, "learning_rate": 8.225242718446602e-06, "loss": 0.067, "step": 227400 }, { "epoch": 88.31, "learning_rate": 8.224724919093852e-06, "loss": 0.0345, "step": 227410 }, { "epoch": 88.32, "learning_rate": 8.224207119741101e-06, "loss": 0.0107, "step": 227420 }, { "epoch": 88.32, "learning_rate": 8.223689320388351e-06, "loss": 0.1011, "step": 227430 }, { "epoch": 88.33, "learning_rate": 8.223171521035599e-06, "loss": 0.027, "step": 227440 }, { "epoch": 88.33, "learning_rate": 8.222653721682849e-06, "loss": 0.0809, "step": 227450 }, { "epoch": 88.33, "learning_rate": 8.222135922330098e-06, "loss": 0.0047, "step": 227460 }, { "epoch": 88.34, "learning_rate": 8.221618122977348e-06, "loss": 0.0671, "step": 227470 }, { "epoch": 88.34, "learning_rate": 8.221100323624596e-06, "loss": 0.0743, "step": 227480 }, { "epoch": 88.35, "learning_rate": 8.220582524271845e-06, "loss": 0.0168, "step": 227490 }, { "epoch": 88.35, "learning_rate": 8.220064724919095e-06, "loss": 0.0421, "step": 227500 }, { "epoch": 88.35, "learning_rate": 8.219546925566345e-06, "loss": 0.0094, "step": 227510 }, { "epoch": 88.36, "learning_rate": 8.219029126213592e-06, "loss": 0.0114, "step": 227520 }, { "epoch": 88.36, "learning_rate": 8.218511326860842e-06, "loss": 0.0833, "step": 227530 }, { "epoch": 88.37, "learning_rate": 8.217993527508092e-06, "loss": 0.0115, "step": 227540 }, { "epoch": 88.37, "learning_rate": 8.217475728155341e-06, "loss": 0.0239, "step": 227550 }, { "epoch": 88.37, "learning_rate": 8.21695792880259e-06, "loss": 0.0644, "step": 227560 }, { "epoch": 88.38, "learning_rate": 8.216440129449839e-06, "loss": 0.0289, "step": 227570 }, { "epoch": 88.38, "learning_rate": 8.215922330097088e-06, "loss": 0.0001, "step": 227580 }, { "epoch": 88.38, "learning_rate": 8.215404530744338e-06, "loss": 0.088, "step": 227590 }, { "epoch": 88.39, "learning_rate": 8.214886731391586e-06, "loss": 0.0602, "step": 227600 }, { "epoch": 88.39, "learning_rate": 8.214368932038836e-06, "loss": 0.0155, "step": 227610 }, { "epoch": 88.4, "learning_rate": 8.213851132686085e-06, "loss": 0.1322, "step": 227620 }, { "epoch": 88.4, "learning_rate": 8.213333333333335e-06, "loss": 0.0656, "step": 227630 }, { "epoch": 88.4, "learning_rate": 8.212815533980584e-06, "loss": 0.0035, "step": 227640 }, { "epoch": 88.41, "learning_rate": 8.212297734627832e-06, "loss": 0.0682, "step": 227650 }, { "epoch": 88.41, "learning_rate": 8.211779935275082e-06, "loss": 0.0331, "step": 227660 }, { "epoch": 88.42, "learning_rate": 8.211262135922332e-06, "loss": 0.0009, "step": 227670 }, { "epoch": 88.42, "learning_rate": 8.210744336569581e-06, "loss": 0.0396, "step": 227680 }, { "epoch": 88.42, "learning_rate": 8.21022653721683e-06, "loss": 0.0729, "step": 227690 }, { "epoch": 88.43, "learning_rate": 8.209708737864079e-06, "loss": 0.2013, "step": 227700 }, { "epoch": 88.43, "learning_rate": 8.209190938511328e-06, "loss": 0.0376, "step": 227710 }, { "epoch": 88.43, "learning_rate": 8.208673139158578e-06, "loss": 0.1394, "step": 227720 }, { "epoch": 88.44, "learning_rate": 8.208155339805826e-06, "loss": 0.1549, "step": 227730 }, { "epoch": 88.44, "learning_rate": 8.207637540453076e-06, "loss": 0.0201, "step": 227740 }, { "epoch": 88.45, "learning_rate": 8.207119741100325e-06, "loss": 0.1594, "step": 227750 }, { "epoch": 88.45, "learning_rate": 8.206601941747573e-06, "loss": 0.0761, "step": 227760 }, { "epoch": 88.45, "learning_rate": 8.206084142394823e-06, "loss": 0.0014, "step": 227770 }, { "epoch": 88.46, "learning_rate": 8.205566343042072e-06, "loss": 0.0688, "step": 227780 }, { "epoch": 88.46, "learning_rate": 8.205048543689322e-06, "loss": 0.0007, "step": 227790 }, { "epoch": 88.47, "learning_rate": 8.20453074433657e-06, "loss": 0.0562, "step": 227800 }, { "epoch": 88.47, "learning_rate": 8.20401294498382e-06, "loss": 0.0522, "step": 227810 }, { "epoch": 88.47, "learning_rate": 8.20349514563107e-06, "loss": 0.0765, "step": 227820 }, { "epoch": 88.48, "learning_rate": 8.202977346278319e-06, "loss": 0.0378, "step": 227830 }, { "epoch": 88.48, "learning_rate": 8.202459546925567e-06, "loss": 0.0191, "step": 227840 }, { "epoch": 88.49, "learning_rate": 8.201941747572816e-06, "loss": 0.0022, "step": 227850 }, { "epoch": 88.49, "learning_rate": 8.201423948220066e-06, "loss": 0.0521, "step": 227860 }, { "epoch": 88.49, "learning_rate": 8.200906148867316e-06, "loss": 0.0921, "step": 227870 }, { "epoch": 88.5, "learning_rate": 8.200388349514563e-06, "loss": 0.0787, "step": 227880 }, { "epoch": 88.5, "learning_rate": 8.199870550161813e-06, "loss": 0.0442, "step": 227890 }, { "epoch": 88.5, "learning_rate": 8.199352750809063e-06, "loss": 0.0375, "step": 227900 }, { "epoch": 88.51, "learning_rate": 8.19883495145631e-06, "loss": 0.0529, "step": 227910 }, { "epoch": 88.51, "learning_rate": 8.19831715210356e-06, "loss": 0.0384, "step": 227920 }, { "epoch": 88.52, "learning_rate": 8.19779935275081e-06, "loss": 0.0466, "step": 227930 }, { "epoch": 88.52, "learning_rate": 8.19728155339806e-06, "loss": 0.0002, "step": 227940 }, { "epoch": 88.52, "learning_rate": 8.196763754045307e-06, "loss": 0.0391, "step": 227950 }, { "epoch": 88.53, "learning_rate": 8.196245954692557e-06, "loss": 0.048, "step": 227960 }, { "epoch": 88.53, "learning_rate": 8.195728155339807e-06, "loss": 0.2114, "step": 227970 }, { "epoch": 88.54, "learning_rate": 8.195210355987056e-06, "loss": 0.0548, "step": 227980 }, { "epoch": 88.54, "learning_rate": 8.194692556634304e-06, "loss": 0.2075, "step": 227990 }, { "epoch": 88.54, "learning_rate": 8.194174757281554e-06, "loss": 0.0908, "step": 228000 }, { "epoch": 88.55, "learning_rate": 8.193656957928803e-06, "loss": 0.0008, "step": 228010 }, { "epoch": 88.55, "learning_rate": 8.193139158576053e-06, "loss": 0.0365, "step": 228020 }, { "epoch": 88.56, "learning_rate": 8.192621359223301e-06, "loss": 0.0332, "step": 228030 }, { "epoch": 88.56, "learning_rate": 8.19210355987055e-06, "loss": 0.0261, "step": 228040 }, { "epoch": 88.56, "learning_rate": 8.1915857605178e-06, "loss": 0.0102, "step": 228050 }, { "epoch": 88.57, "learning_rate": 8.19106796116505e-06, "loss": 0.0125, "step": 228060 }, { "epoch": 88.57, "learning_rate": 8.190550161812298e-06, "loss": 0.1339, "step": 228070 }, { "epoch": 88.57, "learning_rate": 8.190032362459547e-06, "loss": 0.0934, "step": 228080 }, { "epoch": 88.58, "learning_rate": 8.189514563106797e-06, "loss": 0.1305, "step": 228090 }, { "epoch": 88.58, "learning_rate": 8.188996763754045e-06, "loss": 0.0626, "step": 228100 }, { "epoch": 88.59, "learning_rate": 8.188478964401295e-06, "loss": 0.0111, "step": 228110 }, { "epoch": 88.59, "learning_rate": 8.187961165048544e-06, "loss": 0.0005, "step": 228120 }, { "epoch": 88.59, "learning_rate": 8.187443365695794e-06, "loss": 0.0001, "step": 228130 }, { "epoch": 88.6, "learning_rate": 8.186925566343042e-06, "loss": 0.0028, "step": 228140 }, { "epoch": 88.6, "learning_rate": 8.186407766990291e-06, "loss": 0.0215, "step": 228150 }, { "epoch": 88.61, "learning_rate": 8.185889967637541e-06, "loss": 0.0381, "step": 228160 }, { "epoch": 88.61, "learning_rate": 8.18537216828479e-06, "loss": 0.0151, "step": 228170 }, { "epoch": 88.61, "learning_rate": 8.184854368932038e-06, "loss": 0.1006, "step": 228180 }, { "epoch": 88.62, "learning_rate": 8.184336569579288e-06, "loss": 0.0393, "step": 228190 }, { "epoch": 88.62, "learning_rate": 8.183818770226538e-06, "loss": 0.0786, "step": 228200 }, { "epoch": 88.63, "learning_rate": 8.183300970873787e-06, "loss": 0.0358, "step": 228210 }, { "epoch": 88.63, "learning_rate": 8.182783171521035e-06, "loss": 0.0474, "step": 228220 }, { "epoch": 88.63, "learning_rate": 8.182265372168285e-06, "loss": 0.1346, "step": 228230 }, { "epoch": 88.64, "learning_rate": 8.181747572815534e-06, "loss": 0.0075, "step": 228240 }, { "epoch": 88.64, "learning_rate": 8.181229773462784e-06, "loss": 0.237, "step": 228250 }, { "epoch": 88.64, "learning_rate": 8.180711974110032e-06, "loss": 0.1597, "step": 228260 }, { "epoch": 88.65, "learning_rate": 8.180194174757282e-06, "loss": 0.0276, "step": 228270 }, { "epoch": 88.65, "learning_rate": 8.179676375404531e-06, "loss": 0.1664, "step": 228280 }, { "epoch": 88.66, "learning_rate": 8.179158576051781e-06, "loss": 0.0483, "step": 228290 }, { "epoch": 88.66, "learning_rate": 8.178640776699029e-06, "loss": 0.0003, "step": 228300 }, { "epoch": 88.66, "learning_rate": 8.178122977346278e-06, "loss": 0.0716, "step": 228310 }, { "epoch": 88.67, "learning_rate": 8.177605177993528e-06, "loss": 0.0861, "step": 228320 }, { "epoch": 88.67, "learning_rate": 8.177087378640778e-06, "loss": 0.0034, "step": 228330 }, { "epoch": 88.68, "learning_rate": 8.176569579288026e-06, "loss": 0.0412, "step": 228340 }, { "epoch": 88.68, "learning_rate": 8.176051779935275e-06, "loss": 0.0092, "step": 228350 }, { "epoch": 88.68, "learning_rate": 8.175533980582525e-06, "loss": 0.0397, "step": 228360 }, { "epoch": 88.69, "learning_rate": 8.175016181229774e-06, "loss": 0.0817, "step": 228370 }, { "epoch": 88.69, "learning_rate": 8.174498381877022e-06, "loss": 0.0264, "step": 228380 }, { "epoch": 88.7, "learning_rate": 8.173980582524272e-06, "loss": 0.0001, "step": 228390 }, { "epoch": 88.7, "learning_rate": 8.173462783171522e-06, "loss": 0.0535, "step": 228400 }, { "epoch": 88.7, "learning_rate": 8.172944983818771e-06, "loss": 0.0345, "step": 228410 }, { "epoch": 88.71, "learning_rate": 8.172427184466019e-06, "loss": 0.0075, "step": 228420 }, { "epoch": 88.71, "learning_rate": 8.171909385113269e-06, "loss": 0.0015, "step": 228430 }, { "epoch": 88.71, "learning_rate": 8.171391585760518e-06, "loss": 0.0196, "step": 228440 }, { "epoch": 88.72, "learning_rate": 8.170873786407768e-06, "loss": 0.0039, "step": 228450 }, { "epoch": 88.72, "learning_rate": 8.170355987055016e-06, "loss": 0.0111, "step": 228460 }, { "epoch": 88.73, "learning_rate": 8.169838187702266e-06, "loss": 0.0016, "step": 228470 }, { "epoch": 88.73, "learning_rate": 8.169320388349515e-06, "loss": 0.0073, "step": 228480 }, { "epoch": 88.73, "learning_rate": 8.168802588996765e-06, "loss": 0.0006, "step": 228490 }, { "epoch": 88.74, "learning_rate": 8.168284789644013e-06, "loss": 0.0633, "step": 228500 }, { "epoch": 88.74, "learning_rate": 8.167766990291262e-06, "loss": 0.1453, "step": 228510 }, { "epoch": 88.75, "learning_rate": 8.167249190938512e-06, "loss": 0.0979, "step": 228520 }, { "epoch": 88.75, "learning_rate": 8.166731391585762e-06, "loss": 0.1108, "step": 228530 }, { "epoch": 88.75, "learning_rate": 8.16621359223301e-06, "loss": 0.0001, "step": 228540 }, { "epoch": 88.76, "learning_rate": 8.165695792880259e-06, "loss": 0.1057, "step": 228550 }, { "epoch": 88.76, "learning_rate": 8.165177993527509e-06, "loss": 0.1027, "step": 228560 }, { "epoch": 88.77, "learning_rate": 8.164660194174758e-06, "loss": 0.0493, "step": 228570 }, { "epoch": 88.77, "learning_rate": 8.164142394822006e-06, "loss": 0.0286, "step": 228580 }, { "epoch": 88.77, "learning_rate": 8.163624595469256e-06, "loss": 0.0203, "step": 228590 }, { "epoch": 88.78, "learning_rate": 8.163106796116505e-06, "loss": 0.0174, "step": 228600 }, { "epoch": 88.78, "learning_rate": 8.162588996763755e-06, "loss": 0.0773, "step": 228610 }, { "epoch": 88.78, "learning_rate": 8.162071197411003e-06, "loss": 0.0336, "step": 228620 }, { "epoch": 88.79, "learning_rate": 8.161553398058253e-06, "loss": 0.0458, "step": 228630 }, { "epoch": 88.79, "learning_rate": 8.161035598705502e-06, "loss": 0.0001, "step": 228640 }, { "epoch": 88.8, "learning_rate": 8.160517799352752e-06, "loss": 0.0387, "step": 228650 }, { "epoch": 88.8, "learning_rate": 8.16e-06, "loss": 0.0024, "step": 228660 }, { "epoch": 88.8, "learning_rate": 8.15948220064725e-06, "loss": 0.0002, "step": 228670 }, { "epoch": 88.81, "learning_rate": 8.158964401294499e-06, "loss": 0.0098, "step": 228680 }, { "epoch": 88.81, "learning_rate": 8.158446601941749e-06, "loss": 0.0088, "step": 228690 }, { "epoch": 88.82, "learning_rate": 8.157928802588997e-06, "loss": 0.061, "step": 228700 }, { "epoch": 88.82, "learning_rate": 8.157411003236246e-06, "loss": 0.0195, "step": 228710 }, { "epoch": 88.82, "learning_rate": 8.156893203883496e-06, "loss": 0.0336, "step": 228720 }, { "epoch": 88.83, "learning_rate": 8.156375404530745e-06, "loss": 0.0084, "step": 228730 }, { "epoch": 88.83, "learning_rate": 8.155857605177995e-06, "loss": 0.0708, "step": 228740 }, { "epoch": 88.83, "learning_rate": 8.155339805825243e-06, "loss": 0.0942, "step": 228750 }, { "epoch": 88.84, "learning_rate": 8.154822006472493e-06, "loss": 0.0375, "step": 228760 }, { "epoch": 88.84, "learning_rate": 8.154304207119742e-06, "loss": 0.0162, "step": 228770 }, { "epoch": 88.85, "learning_rate": 8.153786407766992e-06, "loss": 0.0287, "step": 228780 }, { "epoch": 88.85, "learning_rate": 8.15326860841424e-06, "loss": 0.0004, "step": 228790 }, { "epoch": 88.85, "learning_rate": 8.15275080906149e-06, "loss": 0.0266, "step": 228800 }, { "epoch": 88.86, "learning_rate": 8.152233009708739e-06, "loss": 0.0165, "step": 228810 }, { "epoch": 88.86, "learning_rate": 8.151715210355989e-06, "loss": 0.0549, "step": 228820 }, { "epoch": 88.87, "learning_rate": 8.151197411003237e-06, "loss": 0.0189, "step": 228830 }, { "epoch": 88.87, "learning_rate": 8.150679611650486e-06, "loss": 0.0002, "step": 228840 }, { "epoch": 88.87, "learning_rate": 8.150161812297736e-06, "loss": 0.0346, "step": 228850 }, { "epoch": 88.88, "learning_rate": 8.149644012944985e-06, "loss": 0.0075, "step": 228860 }, { "epoch": 88.88, "learning_rate": 8.149126213592233e-06, "loss": 0.0613, "step": 228870 }, { "epoch": 88.89, "learning_rate": 8.148608414239483e-06, "loss": 0.0158, "step": 228880 }, { "epoch": 88.89, "learning_rate": 8.148090614886733e-06, "loss": 0.0141, "step": 228890 }, { "epoch": 88.89, "learning_rate": 8.147572815533982e-06, "loss": 0.0405, "step": 228900 }, { "epoch": 88.9, "learning_rate": 8.14705501618123e-06, "loss": 0.0288, "step": 228910 }, { "epoch": 88.9, "learning_rate": 8.14653721682848e-06, "loss": 0.0731, "step": 228920 }, { "epoch": 88.9, "learning_rate": 8.14601941747573e-06, "loss": 0.0477, "step": 228930 }, { "epoch": 88.91, "learning_rate": 8.145501618122979e-06, "loss": 0.0499, "step": 228940 }, { "epoch": 88.91, "learning_rate": 8.144983818770227e-06, "loss": 0.0216, "step": 228950 }, { "epoch": 88.92, "learning_rate": 8.144466019417476e-06, "loss": 0.0215, "step": 228960 }, { "epoch": 88.92, "learning_rate": 8.143948220064726e-06, "loss": 0.0891, "step": 228970 }, { "epoch": 88.92, "learning_rate": 8.143430420711976e-06, "loss": 0.038, "step": 228980 }, { "epoch": 88.93, "learning_rate": 8.142912621359224e-06, "loss": 0.0089, "step": 228990 }, { "epoch": 88.93, "learning_rate": 8.142394822006473e-06, "loss": 0.0777, "step": 229000 }, { "epoch": 88.94, "learning_rate": 8.141877022653723e-06, "loss": 0.1021, "step": 229010 }, { "epoch": 88.94, "learning_rate": 8.141359223300972e-06, "loss": 0.0446, "step": 229020 }, { "epoch": 88.94, "learning_rate": 8.14084142394822e-06, "loss": 0.0077, "step": 229030 }, { "epoch": 88.95, "learning_rate": 8.14032362459547e-06, "loss": 0.0108, "step": 229040 }, { "epoch": 88.95, "learning_rate": 8.13980582524272e-06, "loss": 0.0446, "step": 229050 }, { "epoch": 88.96, "learning_rate": 8.13928802588997e-06, "loss": 0.2494, "step": 229060 }, { "epoch": 88.96, "learning_rate": 8.138770226537217e-06, "loss": 0.1199, "step": 229070 }, { "epoch": 88.96, "learning_rate": 8.138252427184467e-06, "loss": 0.0352, "step": 229080 }, { "epoch": 88.97, "learning_rate": 8.137734627831716e-06, "loss": 0.0506, "step": 229090 }, { "epoch": 88.97, "learning_rate": 8.137216828478966e-06, "loss": 0.1272, "step": 229100 }, { "epoch": 88.97, "learning_rate": 8.136699029126214e-06, "loss": 0.0888, "step": 229110 }, { "epoch": 88.98, "learning_rate": 8.136181229773464e-06, "loss": 0.1335, "step": 229120 }, { "epoch": 88.98, "learning_rate": 8.135663430420713e-06, "loss": 0.0383, "step": 229130 }, { "epoch": 88.99, "learning_rate": 8.135145631067963e-06, "loss": 0.0239, "step": 229140 }, { "epoch": 88.99, "learning_rate": 8.13462783171521e-06, "loss": 0.0002, "step": 229150 }, { "epoch": 88.99, "learning_rate": 8.13411003236246e-06, "loss": 0.1077, "step": 229160 }, { "epoch": 89.0, "learning_rate": 8.13359223300971e-06, "loss": 0.1493, "step": 229170 }, { "epoch": 89.0, "eval_accuracy": 0.9521320495185694, "eval_loss": 0.3559674322605133, "eval_runtime": 8.2123, "eval_samples_per_second": 442.628, "eval_steps_per_second": 55.405, "step": 229175 }, { "epoch": 89.0, "learning_rate": 8.13307443365696e-06, "loss": 0.0005, "step": 229180 }, { "epoch": 89.01, "learning_rate": 8.132556634304208e-06, "loss": 0.0232, "step": 229190 }, { "epoch": 89.01, "learning_rate": 8.132038834951457e-06, "loss": 0.0862, "step": 229200 }, { "epoch": 89.01, "learning_rate": 8.131521035598707e-06, "loss": 0.0457, "step": 229210 }, { "epoch": 89.02, "learning_rate": 8.131003236245956e-06, "loss": 0.0373, "step": 229220 }, { "epoch": 89.02, "learning_rate": 8.130485436893204e-06, "loss": 0.1482, "step": 229230 }, { "epoch": 89.03, "learning_rate": 8.129967637540454e-06, "loss": 0.014, "step": 229240 }, { "epoch": 89.03, "learning_rate": 8.129449838187704e-06, "loss": 0.0172, "step": 229250 }, { "epoch": 89.03, "learning_rate": 8.128932038834953e-06, "loss": 0.0105, "step": 229260 }, { "epoch": 89.04, "learning_rate": 8.128414239482201e-06, "loss": 0.0966, "step": 229270 }, { "epoch": 89.04, "learning_rate": 8.12789644012945e-06, "loss": 0.0411, "step": 229280 }, { "epoch": 89.04, "learning_rate": 8.1273786407767e-06, "loss": 0.1419, "step": 229290 }, { "epoch": 89.05, "learning_rate": 8.12686084142395e-06, "loss": 0.1712, "step": 229300 }, { "epoch": 89.05, "learning_rate": 8.126343042071198e-06, "loss": 0.0085, "step": 229310 }, { "epoch": 89.06, "learning_rate": 8.125825242718447e-06, "loss": 0.0041, "step": 229320 }, { "epoch": 89.06, "learning_rate": 8.125307443365697e-06, "loss": 0.0279, "step": 229330 }, { "epoch": 89.06, "learning_rate": 8.124789644012947e-06, "loss": 0.1184, "step": 229340 }, { "epoch": 89.07, "learning_rate": 8.124271844660195e-06, "loss": 0.0003, "step": 229350 }, { "epoch": 89.07, "learning_rate": 8.123754045307444e-06, "loss": 0.13, "step": 229360 }, { "epoch": 89.08, "learning_rate": 8.123236245954694e-06, "loss": 0.1883, "step": 229370 }, { "epoch": 89.08, "learning_rate": 8.122718446601942e-06, "loss": 0.1589, "step": 229380 }, { "epoch": 89.08, "learning_rate": 8.122200647249191e-06, "loss": 0.0355, "step": 229390 }, { "epoch": 89.09, "learning_rate": 8.121682847896441e-06, "loss": 0.0522, "step": 229400 }, { "epoch": 89.09, "learning_rate": 8.12116504854369e-06, "loss": 0.1907, "step": 229410 }, { "epoch": 89.1, "learning_rate": 8.120647249190939e-06, "loss": 0.002, "step": 229420 }, { "epoch": 89.1, "learning_rate": 8.120129449838188e-06, "loss": 0.0042, "step": 229430 }, { "epoch": 89.1, "learning_rate": 8.119611650485438e-06, "loss": 0.0855, "step": 229440 }, { "epoch": 89.11, "learning_rate": 8.119093851132687e-06, "loss": 0.1521, "step": 229450 }, { "epoch": 89.11, "learning_rate": 8.118576051779935e-06, "loss": 0.0688, "step": 229460 }, { "epoch": 89.11, "learning_rate": 8.118058252427185e-06, "loss": 0.0799, "step": 229470 }, { "epoch": 89.12, "learning_rate": 8.117540453074435e-06, "loss": 0.1158, "step": 229480 }, { "epoch": 89.12, "learning_rate": 8.117022653721684e-06, "loss": 0.0566, "step": 229490 }, { "epoch": 89.13, "learning_rate": 8.116504854368932e-06, "loss": 0.0009, "step": 229500 }, { "epoch": 89.13, "learning_rate": 8.115987055016182e-06, "loss": 0.0012, "step": 229510 }, { "epoch": 89.13, "learning_rate": 8.115469255663431e-06, "loss": 0.0514, "step": 229520 }, { "epoch": 89.14, "learning_rate": 8.114951456310681e-06, "loss": 0.0026, "step": 229530 }, { "epoch": 89.14, "learning_rate": 8.114433656957929e-06, "loss": 0.0282, "step": 229540 }, { "epoch": 89.15, "learning_rate": 8.113915857605179e-06, "loss": 0.0271, "step": 229550 }, { "epoch": 89.15, "learning_rate": 8.113398058252428e-06, "loss": 0.0982, "step": 229560 }, { "epoch": 89.15, "learning_rate": 8.112880258899676e-06, "loss": 0.0202, "step": 229570 }, { "epoch": 89.16, "learning_rate": 8.112362459546926e-06, "loss": 0.0705, "step": 229580 }, { "epoch": 89.16, "learning_rate": 8.111844660194175e-06, "loss": 0.015, "step": 229590 }, { "epoch": 89.17, "learning_rate": 8.111326860841425e-06, "loss": 0.0684, "step": 229600 }, { "epoch": 89.17, "learning_rate": 8.110809061488673e-06, "loss": 0.1066, "step": 229610 }, { "epoch": 89.17, "learning_rate": 8.110291262135922e-06, "loss": 0.0008, "step": 229620 }, { "epoch": 89.18, "learning_rate": 8.109773462783172e-06, "loss": 0.001, "step": 229630 }, { "epoch": 89.18, "learning_rate": 8.109255663430422e-06, "loss": 0.0544, "step": 229640 }, { "epoch": 89.18, "learning_rate": 8.10873786407767e-06, "loss": 0.1639, "step": 229650 }, { "epoch": 89.19, "learning_rate": 8.10822006472492e-06, "loss": 0.0535, "step": 229660 }, { "epoch": 89.19, "learning_rate": 8.107702265372169e-06, "loss": 0.0448, "step": 229670 }, { "epoch": 89.2, "learning_rate": 8.107184466019418e-06, "loss": 0.0885, "step": 229680 }, { "epoch": 89.2, "learning_rate": 8.106666666666666e-06, "loss": 0.0829, "step": 229690 }, { "epoch": 89.2, "learning_rate": 8.106148867313916e-06, "loss": 0.0538, "step": 229700 }, { "epoch": 89.21, "learning_rate": 8.105631067961166e-06, "loss": 0.0467, "step": 229710 }, { "epoch": 89.21, "learning_rate": 8.105113268608414e-06, "loss": 0.19, "step": 229720 }, { "epoch": 89.22, "learning_rate": 8.104595469255663e-06, "loss": 0.0549, "step": 229730 }, { "epoch": 89.22, "learning_rate": 8.104077669902913e-06, "loss": 0.0375, "step": 229740 }, { "epoch": 89.22, "learning_rate": 8.103559870550162e-06, "loss": 0.0332, "step": 229750 }, { "epoch": 89.23, "learning_rate": 8.10304207119741e-06, "loss": 0.0358, "step": 229760 }, { "epoch": 89.23, "learning_rate": 8.10252427184466e-06, "loss": 0.0389, "step": 229770 }, { "epoch": 89.23, "learning_rate": 8.10200647249191e-06, "loss": 0.1136, "step": 229780 }, { "epoch": 89.24, "learning_rate": 8.10148867313916e-06, "loss": 0.0036, "step": 229790 }, { "epoch": 89.24, "learning_rate": 8.100970873786407e-06, "loss": 0.1126, "step": 229800 }, { "epoch": 89.25, "learning_rate": 8.100453074433657e-06, "loss": 0.0225, "step": 229810 }, { "epoch": 89.25, "learning_rate": 8.099935275080906e-06, "loss": 0.2919, "step": 229820 }, { "epoch": 89.25, "learning_rate": 8.099417475728156e-06, "loss": 0.0138, "step": 229830 }, { "epoch": 89.26, "learning_rate": 8.098899676375404e-06, "loss": 0.0413, "step": 229840 }, { "epoch": 89.26, "learning_rate": 8.098381877022654e-06, "loss": 0.0202, "step": 229850 }, { "epoch": 89.27, "learning_rate": 8.097864077669903e-06, "loss": 0.0173, "step": 229860 }, { "epoch": 89.27, "learning_rate": 8.097346278317153e-06, "loss": 0.055, "step": 229870 }, { "epoch": 89.27, "learning_rate": 8.096828478964402e-06, "loss": 0.1036, "step": 229880 }, { "epoch": 89.28, "learning_rate": 8.09631067961165e-06, "loss": 0.2198, "step": 229890 }, { "epoch": 89.28, "learning_rate": 8.0957928802589e-06, "loss": 0.036, "step": 229900 }, { "epoch": 89.29, "learning_rate": 8.09527508090615e-06, "loss": 0.1316, "step": 229910 }, { "epoch": 89.29, "learning_rate": 8.0947572815534e-06, "loss": 0.0079, "step": 229920 }, { "epoch": 89.29, "learning_rate": 8.094239482200647e-06, "loss": 0.109, "step": 229930 }, { "epoch": 89.3, "learning_rate": 8.093721682847897e-06, "loss": 0.0643, "step": 229940 }, { "epoch": 89.3, "learning_rate": 8.093203883495146e-06, "loss": 0.029, "step": 229950 }, { "epoch": 89.3, "learning_rate": 8.092686084142396e-06, "loss": 0.1209, "step": 229960 }, { "epoch": 89.31, "learning_rate": 8.092168284789644e-06, "loss": 0.0383, "step": 229970 }, { "epoch": 89.31, "learning_rate": 8.091650485436893e-06, "loss": 0.0404, "step": 229980 }, { "epoch": 89.32, "learning_rate": 8.091132686084143e-06, "loss": 0.1191, "step": 229990 }, { "epoch": 89.32, "learning_rate": 8.090614886731393e-06, "loss": 0.0105, "step": 230000 }, { "epoch": 89.32, "learning_rate": 8.09009708737864e-06, "loss": 0.016, "step": 230010 }, { "epoch": 89.33, "learning_rate": 8.08957928802589e-06, "loss": 0.0116, "step": 230020 }, { "epoch": 89.33, "learning_rate": 8.08906148867314e-06, "loss": 0.0169, "step": 230030 }, { "epoch": 89.34, "learning_rate": 8.08854368932039e-06, "loss": 0.029, "step": 230040 }, { "epoch": 89.34, "learning_rate": 8.088025889967637e-06, "loss": 0.0105, "step": 230050 }, { "epoch": 89.34, "learning_rate": 8.087508090614887e-06, "loss": 0.1755, "step": 230060 }, { "epoch": 89.35, "learning_rate": 8.086990291262137e-06, "loss": 0.0381, "step": 230070 }, { "epoch": 89.35, "learning_rate": 8.086472491909386e-06, "loss": 0.0255, "step": 230080 }, { "epoch": 89.36, "learning_rate": 8.085954692556634e-06, "loss": 0.0643, "step": 230090 }, { "epoch": 89.36, "learning_rate": 8.085436893203884e-06, "loss": 0.0535, "step": 230100 }, { "epoch": 89.36, "learning_rate": 8.084919093851133e-06, "loss": 0.0432, "step": 230110 }, { "epoch": 89.37, "learning_rate": 8.084401294498383e-06, "loss": 0.069, "step": 230120 }, { "epoch": 89.37, "learning_rate": 8.083883495145631e-06, "loss": 0.0134, "step": 230130 }, { "epoch": 89.37, "learning_rate": 8.08336569579288e-06, "loss": 0.0146, "step": 230140 }, { "epoch": 89.38, "learning_rate": 8.08284789644013e-06, "loss": 0.02, "step": 230150 }, { "epoch": 89.38, "learning_rate": 8.08233009708738e-06, "loss": 0.0707, "step": 230160 }, { "epoch": 89.39, "learning_rate": 8.081812297734628e-06, "loss": 0.0582, "step": 230170 }, { "epoch": 89.39, "learning_rate": 8.081294498381877e-06, "loss": 0.035, "step": 230180 }, { "epoch": 89.39, "learning_rate": 8.080776699029127e-06, "loss": 0.0697, "step": 230190 }, { "epoch": 89.4, "learning_rate": 8.080258899676377e-06, "loss": 0.0004, "step": 230200 }, { "epoch": 89.4, "learning_rate": 8.079741100323625e-06, "loss": 0.0178, "step": 230210 }, { "epoch": 89.41, "learning_rate": 8.079223300970874e-06, "loss": 0.0795, "step": 230220 }, { "epoch": 89.41, "learning_rate": 8.078705501618124e-06, "loss": 0.0311, "step": 230230 }, { "epoch": 89.41, "learning_rate": 8.078187702265373e-06, "loss": 0.0228, "step": 230240 }, { "epoch": 89.42, "learning_rate": 8.077669902912621e-06, "loss": 0.001, "step": 230250 }, { "epoch": 89.42, "learning_rate": 8.077152103559871e-06, "loss": 0.0514, "step": 230260 }, { "epoch": 89.43, "learning_rate": 8.07663430420712e-06, "loss": 0.0646, "step": 230270 }, { "epoch": 89.43, "learning_rate": 8.07611650485437e-06, "loss": 0.0129, "step": 230280 }, { "epoch": 89.43, "learning_rate": 8.075598705501618e-06, "loss": 0.0294, "step": 230290 }, { "epoch": 89.44, "learning_rate": 8.075080906148868e-06, "loss": 0.1105, "step": 230300 }, { "epoch": 89.44, "learning_rate": 8.074563106796117e-06, "loss": 0.1377, "step": 230310 }, { "epoch": 89.44, "learning_rate": 8.074045307443367e-06, "loss": 0.023, "step": 230320 }, { "epoch": 89.45, "learning_rate": 8.073527508090615e-06, "loss": 0.1767, "step": 230330 }, { "epoch": 89.45, "learning_rate": 8.073009708737864e-06, "loss": 0.0378, "step": 230340 }, { "epoch": 89.46, "learning_rate": 8.072491909385114e-06, "loss": 0.0732, "step": 230350 }, { "epoch": 89.46, "learning_rate": 8.071974110032364e-06, "loss": 0.0488, "step": 230360 }, { "epoch": 89.46, "learning_rate": 8.071456310679612e-06, "loss": 0.0525, "step": 230370 }, { "epoch": 89.47, "learning_rate": 8.070938511326861e-06, "loss": 0.0422, "step": 230380 }, { "epoch": 89.47, "learning_rate": 8.070420711974111e-06, "loss": 0.0311, "step": 230390 }, { "epoch": 89.48, "learning_rate": 8.06990291262136e-06, "loss": 0.0118, "step": 230400 }, { "epoch": 89.48, "learning_rate": 8.06938511326861e-06, "loss": 0.1679, "step": 230410 }, { "epoch": 89.48, "learning_rate": 8.068867313915858e-06, "loss": 0.0081, "step": 230420 }, { "epoch": 89.49, "learning_rate": 8.068349514563108e-06, "loss": 0.0216, "step": 230430 }, { "epoch": 89.49, "learning_rate": 8.067831715210357e-06, "loss": 0.0191, "step": 230440 }, { "epoch": 89.5, "learning_rate": 8.067313915857607e-06, "loss": 0.0337, "step": 230450 }, { "epoch": 89.5, "learning_rate": 8.066796116504855e-06, "loss": 0.029, "step": 230460 }, { "epoch": 89.5, "learning_rate": 8.066278317152104e-06, "loss": 0.1171, "step": 230470 }, { "epoch": 89.51, "learning_rate": 8.065760517799354e-06, "loss": 0.2098, "step": 230480 }, { "epoch": 89.51, "learning_rate": 8.065242718446604e-06, "loss": 0.0441, "step": 230490 }, { "epoch": 89.51, "learning_rate": 8.064724919093852e-06, "loss": 0.1055, "step": 230500 }, { "epoch": 89.52, "learning_rate": 8.064207119741101e-06, "loss": 0.0045, "step": 230510 }, { "epoch": 89.52, "learning_rate": 8.06368932038835e-06, "loss": 0.07, "step": 230520 }, { "epoch": 89.53, "learning_rate": 8.0631715210356e-06, "loss": 0.0558, "step": 230530 }, { "epoch": 89.53, "learning_rate": 8.062653721682848e-06, "loss": 0.1042, "step": 230540 }, { "epoch": 89.53, "learning_rate": 8.062135922330098e-06, "loss": 0.0074, "step": 230550 }, { "epoch": 89.54, "learning_rate": 8.061618122977348e-06, "loss": 0.1678, "step": 230560 }, { "epoch": 89.54, "learning_rate": 8.061100323624597e-06, "loss": 0.0022, "step": 230570 }, { "epoch": 89.55, "learning_rate": 8.060582524271845e-06, "loss": 0.0186, "step": 230580 }, { "epoch": 89.55, "learning_rate": 8.060064724919095e-06, "loss": 0.0299, "step": 230590 }, { "epoch": 89.55, "learning_rate": 8.059546925566344e-06, "loss": 0.0469, "step": 230600 }, { "epoch": 89.56, "learning_rate": 8.059029126213594e-06, "loss": 0.0068, "step": 230610 }, { "epoch": 89.56, "learning_rate": 8.058511326860842e-06, "loss": 0.0099, "step": 230620 }, { "epoch": 89.57, "learning_rate": 8.057993527508092e-06, "loss": 0.0715, "step": 230630 }, { "epoch": 89.57, "learning_rate": 8.057475728155341e-06, "loss": 0.0332, "step": 230640 }, { "epoch": 89.57, "learning_rate": 8.05695792880259e-06, "loss": 0.1022, "step": 230650 }, { "epoch": 89.58, "learning_rate": 8.056440129449839e-06, "loss": 0.1461, "step": 230660 }, { "epoch": 89.58, "learning_rate": 8.055922330097088e-06, "loss": 0.0069, "step": 230670 }, { "epoch": 89.58, "learning_rate": 8.055404530744338e-06, "loss": 0.0484, "step": 230680 }, { "epoch": 89.59, "learning_rate": 8.054886731391588e-06, "loss": 0.0896, "step": 230690 }, { "epoch": 89.59, "learning_rate": 8.054368932038835e-06, "loss": 0.002, "step": 230700 }, { "epoch": 89.6, "learning_rate": 8.053851132686085e-06, "loss": 0.0217, "step": 230710 }, { "epoch": 89.6, "learning_rate": 8.053333333333335e-06, "loss": 0.0505, "step": 230720 }, { "epoch": 89.6, "learning_rate": 8.052815533980584e-06, "loss": 0.2077, "step": 230730 }, { "epoch": 89.61, "learning_rate": 8.052297734627832e-06, "loss": 0.0226, "step": 230740 }, { "epoch": 89.61, "learning_rate": 8.051779935275082e-06, "loss": 0.1332, "step": 230750 }, { "epoch": 89.62, "learning_rate": 8.051262135922331e-06, "loss": 0.0896, "step": 230760 }, { "epoch": 89.62, "learning_rate": 8.050744336569581e-06, "loss": 0.0055, "step": 230770 }, { "epoch": 89.62, "learning_rate": 8.050226537216829e-06, "loss": 0.0425, "step": 230780 }, { "epoch": 89.63, "learning_rate": 8.049708737864079e-06, "loss": 0.0267, "step": 230790 }, { "epoch": 89.63, "learning_rate": 8.049190938511328e-06, "loss": 0.0618, "step": 230800 }, { "epoch": 89.63, "learning_rate": 8.048673139158578e-06, "loss": 0.0886, "step": 230810 }, { "epoch": 89.64, "learning_rate": 8.048155339805826e-06, "loss": 0.0014, "step": 230820 }, { "epoch": 89.64, "learning_rate": 8.047637540453075e-06, "loss": 0.0331, "step": 230830 }, { "epoch": 89.65, "learning_rate": 8.047119741100325e-06, "loss": 0.0288, "step": 230840 }, { "epoch": 89.65, "learning_rate": 8.046601941747573e-06, "loss": 0.0189, "step": 230850 }, { "epoch": 89.65, "learning_rate": 8.046084142394823e-06, "loss": 0.1266, "step": 230860 }, { "epoch": 89.66, "learning_rate": 8.045566343042072e-06, "loss": 0.0278, "step": 230870 }, { "epoch": 89.66, "learning_rate": 8.045048543689322e-06, "loss": 0.009, "step": 230880 }, { "epoch": 89.67, "learning_rate": 8.04453074433657e-06, "loss": 0.0035, "step": 230890 }, { "epoch": 89.67, "learning_rate": 8.04401294498382e-06, "loss": 0.1982, "step": 230900 }, { "epoch": 89.67, "learning_rate": 8.043495145631069e-06, "loss": 0.1187, "step": 230910 }, { "epoch": 89.68, "learning_rate": 8.042977346278319e-06, "loss": 0.0007, "step": 230920 }, { "epoch": 89.68, "learning_rate": 8.042459546925567e-06, "loss": 0.0466, "step": 230930 }, { "epoch": 89.69, "learning_rate": 8.041941747572816e-06, "loss": 0.0397, "step": 230940 }, { "epoch": 89.69, "learning_rate": 8.041423948220066e-06, "loss": 0.0801, "step": 230950 }, { "epoch": 89.69, "learning_rate": 8.040906148867315e-06, "loss": 0.0876, "step": 230960 }, { "epoch": 89.7, "learning_rate": 8.040388349514563e-06, "loss": 0.0012, "step": 230970 }, { "epoch": 89.7, "learning_rate": 8.039870550161813e-06, "loss": 0.0019, "step": 230980 }, { "epoch": 89.7, "learning_rate": 8.039352750809063e-06, "loss": 0.0329, "step": 230990 }, { "epoch": 89.71, "learning_rate": 8.038834951456312e-06, "loss": 0.0184, "step": 231000 }, { "epoch": 89.71, "learning_rate": 8.03831715210356e-06, "loss": 0.0722, "step": 231010 }, { "epoch": 89.72, "learning_rate": 8.03779935275081e-06, "loss": 0.2023, "step": 231020 }, { "epoch": 89.72, "learning_rate": 8.03728155339806e-06, "loss": 0.0179, "step": 231030 }, { "epoch": 89.72, "learning_rate": 8.036763754045307e-06, "loss": 0.0735, "step": 231040 }, { "epoch": 89.73, "learning_rate": 8.036245954692557e-06, "loss": 0.1068, "step": 231050 }, { "epoch": 89.73, "learning_rate": 8.035728155339806e-06, "loss": 0.0208, "step": 231060 }, { "epoch": 89.74, "learning_rate": 8.035210355987056e-06, "loss": 0.0097, "step": 231070 }, { "epoch": 89.74, "learning_rate": 8.034692556634304e-06, "loss": 0.0548, "step": 231080 }, { "epoch": 89.74, "learning_rate": 8.034174757281554e-06, "loss": 0.0436, "step": 231090 }, { "epoch": 89.75, "learning_rate": 8.033656957928803e-06, "loss": 0.022, "step": 231100 }, { "epoch": 89.75, "learning_rate": 8.033139158576053e-06, "loss": 0.135, "step": 231110 }, { "epoch": 89.76, "learning_rate": 8.0326213592233e-06, "loss": 0.0277, "step": 231120 }, { "epoch": 89.76, "learning_rate": 8.03210355987055e-06, "loss": 0.004, "step": 231130 }, { "epoch": 89.76, "learning_rate": 8.0315857605178e-06, "loss": 0.1233, "step": 231140 }, { "epoch": 89.77, "learning_rate": 8.03106796116505e-06, "loss": 0.129, "step": 231150 }, { "epoch": 89.77, "learning_rate": 8.030550161812298e-06, "loss": 0.0628, "step": 231160 }, { "epoch": 89.77, "learning_rate": 8.030032362459547e-06, "loss": 0.0831, "step": 231170 }, { "epoch": 89.78, "learning_rate": 8.029514563106797e-06, "loss": 0.0896, "step": 231180 }, { "epoch": 89.78, "learning_rate": 8.028996763754045e-06, "loss": 0.158, "step": 231190 }, { "epoch": 89.79, "learning_rate": 8.028478964401294e-06, "loss": 0.0874, "step": 231200 }, { "epoch": 89.79, "learning_rate": 8.027961165048544e-06, "loss": 0.0976, "step": 231210 }, { "epoch": 89.79, "learning_rate": 8.027443365695794e-06, "loss": 0.0494, "step": 231220 }, { "epoch": 89.8, "learning_rate": 8.026925566343042e-06, "loss": 0.0351, "step": 231230 }, { "epoch": 89.8, "learning_rate": 8.026407766990291e-06, "loss": 0.009, "step": 231240 }, { "epoch": 89.81, "learning_rate": 8.02588996763754e-06, "loss": 0.1917, "step": 231250 }, { "epoch": 89.81, "learning_rate": 8.02537216828479e-06, "loss": 0.0227, "step": 231260 }, { "epoch": 89.81, "learning_rate": 8.024854368932038e-06, "loss": 0.05, "step": 231270 }, { "epoch": 89.82, "learning_rate": 8.024336569579288e-06, "loss": 0.0341, "step": 231280 }, { "epoch": 89.82, "learning_rate": 8.023818770226538e-06, "loss": 0.1045, "step": 231290 }, { "epoch": 89.83, "learning_rate": 8.023300970873787e-06, "loss": 0.099, "step": 231300 }, { "epoch": 89.83, "learning_rate": 8.022783171521035e-06, "loss": 0.0581, "step": 231310 }, { "epoch": 89.83, "learning_rate": 8.022265372168285e-06, "loss": 0.0525, "step": 231320 }, { "epoch": 89.84, "learning_rate": 8.021747572815534e-06, "loss": 0.1286, "step": 231330 }, { "epoch": 89.84, "learning_rate": 8.021229773462784e-06, "loss": 0.0689, "step": 231340 }, { "epoch": 89.84, "learning_rate": 8.020711974110032e-06, "loss": 0.1125, "step": 231350 }, { "epoch": 89.85, "learning_rate": 8.020194174757281e-06, "loss": 0.0768, "step": 231360 }, { "epoch": 89.85, "learning_rate": 8.019676375404531e-06, "loss": 0.0663, "step": 231370 }, { "epoch": 89.86, "learning_rate": 8.01915857605178e-06, "loss": 0.0033, "step": 231380 }, { "epoch": 89.86, "learning_rate": 8.018640776699029e-06, "loss": 0.1355, "step": 231390 }, { "epoch": 89.86, "learning_rate": 8.018122977346278e-06, "loss": 0.158, "step": 231400 }, { "epoch": 89.87, "learning_rate": 8.017605177993528e-06, "loss": 0.0008, "step": 231410 }, { "epoch": 89.87, "learning_rate": 8.017087378640777e-06, "loss": 0.169, "step": 231420 }, { "epoch": 89.88, "learning_rate": 8.016569579288025e-06, "loss": 0.0593, "step": 231430 }, { "epoch": 89.88, "learning_rate": 8.016051779935275e-06, "loss": 0.0584, "step": 231440 }, { "epoch": 89.88, "learning_rate": 8.015533980582525e-06, "loss": 0.1459, "step": 231450 }, { "epoch": 89.89, "learning_rate": 8.015016181229774e-06, "loss": 0.1572, "step": 231460 }, { "epoch": 89.89, "learning_rate": 8.014498381877022e-06, "loss": 0.0076, "step": 231470 }, { "epoch": 89.9, "learning_rate": 8.013980582524272e-06, "loss": 0.0516, "step": 231480 }, { "epoch": 89.9, "learning_rate": 8.013462783171521e-06, "loss": 0.0315, "step": 231490 }, { "epoch": 89.9, "learning_rate": 8.012944983818771e-06, "loss": 0.1207, "step": 231500 }, { "epoch": 89.91, "learning_rate": 8.012427184466019e-06, "loss": 0.2, "step": 231510 }, { "epoch": 89.91, "learning_rate": 8.011909385113269e-06, "loss": 0.0008, "step": 231520 }, { "epoch": 89.91, "learning_rate": 8.011391585760518e-06, "loss": 0.0633, "step": 231530 }, { "epoch": 89.92, "learning_rate": 8.010873786407768e-06, "loss": 0.0126, "step": 231540 }, { "epoch": 89.92, "learning_rate": 8.010355987055017e-06, "loss": 0.0062, "step": 231550 }, { "epoch": 89.93, "learning_rate": 8.009838187702265e-06, "loss": 0.0946, "step": 231560 }, { "epoch": 89.93, "learning_rate": 8.009320388349515e-06, "loss": 0.2503, "step": 231570 }, { "epoch": 89.93, "learning_rate": 8.008802588996765e-06, "loss": 0.0033, "step": 231580 }, { "epoch": 89.94, "learning_rate": 8.008284789644014e-06, "loss": 0.0722, "step": 231590 }, { "epoch": 89.94, "learning_rate": 8.007766990291262e-06, "loss": 0.1168, "step": 231600 }, { "epoch": 89.95, "learning_rate": 8.007249190938512e-06, "loss": 0.0325, "step": 231610 }, { "epoch": 89.95, "learning_rate": 8.006731391585761e-06, "loss": 0.1396, "step": 231620 }, { "epoch": 89.95, "learning_rate": 8.006213592233011e-06, "loss": 0.1214, "step": 231630 }, { "epoch": 89.96, "learning_rate": 8.005695792880259e-06, "loss": 0.1364, "step": 231640 }, { "epoch": 89.96, "learning_rate": 8.005177993527509e-06, "loss": 0.1105, "step": 231650 }, { "epoch": 89.97, "learning_rate": 8.004660194174758e-06, "loss": 0.0365, "step": 231660 }, { "epoch": 89.97, "learning_rate": 8.004142394822008e-06, "loss": 0.1072, "step": 231670 }, { "epoch": 89.97, "learning_rate": 8.003624595469256e-06, "loss": 0.1143, "step": 231680 }, { "epoch": 89.98, "learning_rate": 8.003106796116505e-06, "loss": 0.0029, "step": 231690 }, { "epoch": 89.98, "learning_rate": 8.002588996763755e-06, "loss": 0.0909, "step": 231700 }, { "epoch": 89.98, "learning_rate": 8.002071197411005e-06, "loss": 0.0898, "step": 231710 }, { "epoch": 89.99, "learning_rate": 8.001553398058252e-06, "loss": 0.0676, "step": 231720 }, { "epoch": 89.99, "learning_rate": 8.001035598705502e-06, "loss": 0.0568, "step": 231730 }, { "epoch": 90.0, "learning_rate": 8.000517799352752e-06, "loss": 0.0094, "step": 231740 }, { "epoch": 90.0, "learning_rate": 8.000000000000001e-06, "loss": 0.0164, "step": 231750 }, { "epoch": 90.0, "eval_accuracy": 0.9507565337001376, "eval_loss": 0.3572826385498047, "eval_runtime": 8.1761, "eval_samples_per_second": 444.589, "eval_steps_per_second": 55.65, "step": 231750 }, { "epoch": 90.0, "learning_rate": 7.99948220064725e-06, "loss": 0.1009, "step": 231760 }, { "epoch": 90.01, "learning_rate": 7.998964401294499e-06, "loss": 0.114, "step": 231770 }, { "epoch": 90.01, "learning_rate": 7.998446601941748e-06, "loss": 0.0455, "step": 231780 }, { "epoch": 90.02, "learning_rate": 7.997928802588998e-06, "loss": 0.0555, "step": 231790 }, { "epoch": 90.02, "learning_rate": 7.997411003236246e-06, "loss": 0.0476, "step": 231800 }, { "epoch": 90.02, "learning_rate": 7.996893203883496e-06, "loss": 0.0476, "step": 231810 }, { "epoch": 90.03, "learning_rate": 7.996375404530745e-06, "loss": 0.1748, "step": 231820 }, { "epoch": 90.03, "learning_rate": 7.995857605177995e-06, "loss": 0.1309, "step": 231830 }, { "epoch": 90.03, "learning_rate": 7.995339805825243e-06, "loss": 0.0164, "step": 231840 }, { "epoch": 90.04, "learning_rate": 7.994822006472492e-06, "loss": 0.1648, "step": 231850 }, { "epoch": 90.04, "learning_rate": 7.994304207119742e-06, "loss": 0.0611, "step": 231860 }, { "epoch": 90.05, "learning_rate": 7.993786407766992e-06, "loss": 0.0111, "step": 231870 }, { "epoch": 90.05, "learning_rate": 7.99326860841424e-06, "loss": 0.0334, "step": 231880 }, { "epoch": 90.05, "learning_rate": 7.99275080906149e-06, "loss": 0.1384, "step": 231890 }, { "epoch": 90.06, "learning_rate": 7.992233009708739e-06, "loss": 0.0222, "step": 231900 }, { "epoch": 90.06, "learning_rate": 7.991715210355988e-06, "loss": 0.0058, "step": 231910 }, { "epoch": 90.07, "learning_rate": 7.991197411003236e-06, "loss": 0.2447, "step": 231920 }, { "epoch": 90.07, "learning_rate": 7.990679611650486e-06, "loss": 0.1321, "step": 231930 }, { "epoch": 90.07, "learning_rate": 7.990161812297736e-06, "loss": 0.1063, "step": 231940 }, { "epoch": 90.08, "learning_rate": 7.989644012944985e-06, "loss": 0.0582, "step": 231950 }, { "epoch": 90.08, "learning_rate": 7.989126213592233e-06, "loss": 0.0146, "step": 231960 }, { "epoch": 90.09, "learning_rate": 7.988608414239483e-06, "loss": 0.0015, "step": 231970 }, { "epoch": 90.09, "learning_rate": 7.988090614886732e-06, "loss": 0.1494, "step": 231980 }, { "epoch": 90.09, "learning_rate": 7.987572815533982e-06, "loss": 0.0556, "step": 231990 }, { "epoch": 90.1, "learning_rate": 7.98705501618123e-06, "loss": 0.1115, "step": 232000 }, { "epoch": 90.1, "learning_rate": 7.98653721682848e-06, "loss": 0.0179, "step": 232010 }, { "epoch": 90.1, "learning_rate": 7.98601941747573e-06, "loss": 0.046, "step": 232020 }, { "epoch": 90.11, "learning_rate": 7.985501618122979e-06, "loss": 0.0019, "step": 232030 }, { "epoch": 90.11, "learning_rate": 7.984983818770227e-06, "loss": 0.0489, "step": 232040 }, { "epoch": 90.12, "learning_rate": 7.984466019417476e-06, "loss": 0.051, "step": 232050 }, { "epoch": 90.12, "learning_rate": 7.983948220064726e-06, "loss": 0.0661, "step": 232060 }, { "epoch": 90.12, "learning_rate": 7.983430420711976e-06, "loss": 0.0083, "step": 232070 }, { "epoch": 90.13, "learning_rate": 7.982912621359223e-06, "loss": 0.002, "step": 232080 }, { "epoch": 90.13, "learning_rate": 7.982394822006473e-06, "loss": 0.0574, "step": 232090 }, { "epoch": 90.14, "learning_rate": 7.981877022653723e-06, "loss": 0.0386, "step": 232100 }, { "epoch": 90.14, "learning_rate": 7.981359223300972e-06, "loss": 0.0434, "step": 232110 }, { "epoch": 90.14, "learning_rate": 7.980841423948222e-06, "loss": 0.053, "step": 232120 }, { "epoch": 90.15, "learning_rate": 7.98032362459547e-06, "loss": 0.034, "step": 232130 }, { "epoch": 90.15, "learning_rate": 7.97980582524272e-06, "loss": 0.0287, "step": 232140 }, { "epoch": 90.16, "learning_rate": 7.979288025889969e-06, "loss": 0.0547, "step": 232150 }, { "epoch": 90.16, "learning_rate": 7.978770226537219e-06, "loss": 0.0163, "step": 232160 }, { "epoch": 90.16, "learning_rate": 7.978252427184467e-06, "loss": 0.0907, "step": 232170 }, { "epoch": 90.17, "learning_rate": 7.977734627831716e-06, "loss": 0.206, "step": 232180 }, { "epoch": 90.17, "learning_rate": 7.977216828478966e-06, "loss": 0.0978, "step": 232190 }, { "epoch": 90.17, "learning_rate": 7.976699029126216e-06, "loss": 0.0795, "step": 232200 }, { "epoch": 90.18, "learning_rate": 7.976181229773463e-06, "loss": 0.0347, "step": 232210 }, { "epoch": 90.18, "learning_rate": 7.975663430420713e-06, "loss": 0.0516, "step": 232220 }, { "epoch": 90.19, "learning_rate": 7.975145631067963e-06, "loss": 0.0487, "step": 232230 }, { "epoch": 90.19, "learning_rate": 7.974627831715212e-06, "loss": 0.0098, "step": 232240 }, { "epoch": 90.19, "learning_rate": 7.97411003236246e-06, "loss": 0.1611, "step": 232250 }, { "epoch": 90.2, "learning_rate": 7.97359223300971e-06, "loss": 0.0523, "step": 232260 }, { "epoch": 90.2, "learning_rate": 7.97307443365696e-06, "loss": 0.1814, "step": 232270 }, { "epoch": 90.21, "learning_rate": 7.972556634304209e-06, "loss": 0.1037, "step": 232280 }, { "epoch": 90.21, "learning_rate": 7.972038834951457e-06, "loss": 0.0167, "step": 232290 }, { "epoch": 90.21, "learning_rate": 7.971521035598707e-06, "loss": 0.0369, "step": 232300 }, { "epoch": 90.22, "learning_rate": 7.971003236245956e-06, "loss": 0.0041, "step": 232310 }, { "epoch": 90.22, "learning_rate": 7.970485436893204e-06, "loss": 0.0922, "step": 232320 }, { "epoch": 90.23, "learning_rate": 7.969967637540454e-06, "loss": 0.0347, "step": 232330 }, { "epoch": 90.23, "learning_rate": 7.969449838187703e-06, "loss": 0.0984, "step": 232340 }, { "epoch": 90.23, "learning_rate": 7.968932038834953e-06, "loss": 0.0091, "step": 232350 }, { "epoch": 90.24, "learning_rate": 7.968414239482201e-06, "loss": 0.0831, "step": 232360 }, { "epoch": 90.24, "learning_rate": 7.96789644012945e-06, "loss": 0.0019, "step": 232370 }, { "epoch": 90.24, "learning_rate": 7.9673786407767e-06, "loss": 0.0018, "step": 232380 }, { "epoch": 90.25, "learning_rate": 7.96686084142395e-06, "loss": 0.0483, "step": 232390 }, { "epoch": 90.25, "learning_rate": 7.966343042071198e-06, "loss": 0.0976, "step": 232400 }, { "epoch": 90.26, "learning_rate": 7.965825242718447e-06, "loss": 0.1454, "step": 232410 }, { "epoch": 90.26, "learning_rate": 7.965307443365697e-06, "loss": 0.0024, "step": 232420 }, { "epoch": 90.26, "learning_rate": 7.964789644012947e-06, "loss": 0.035, "step": 232430 }, { "epoch": 90.27, "learning_rate": 7.964271844660194e-06, "loss": 0.0155, "step": 232440 }, { "epoch": 90.27, "learning_rate": 7.963754045307444e-06, "loss": 0.0828, "step": 232450 }, { "epoch": 90.28, "learning_rate": 7.963236245954694e-06, "loss": 0.0098, "step": 232460 }, { "epoch": 90.28, "learning_rate": 7.962718446601942e-06, "loss": 0.0362, "step": 232470 }, { "epoch": 90.28, "learning_rate": 7.962200647249191e-06, "loss": 0.0009, "step": 232480 }, { "epoch": 90.29, "learning_rate": 7.961682847896441e-06, "loss": 0.0541, "step": 232490 }, { "epoch": 90.29, "learning_rate": 7.96116504854369e-06, "loss": 0.1354, "step": 232500 }, { "epoch": 90.3, "learning_rate": 7.960647249190938e-06, "loss": 0.1718, "step": 232510 }, { "epoch": 90.3, "learning_rate": 7.960129449838188e-06, "loss": 0.0984, "step": 232520 }, { "epoch": 90.3, "learning_rate": 7.959611650485438e-06, "loss": 0.0637, "step": 232530 }, { "epoch": 90.31, "learning_rate": 7.959093851132687e-06, "loss": 0.0285, "step": 232540 }, { "epoch": 90.31, "learning_rate": 7.958576051779935e-06, "loss": 0.056, "step": 232550 }, { "epoch": 90.31, "learning_rate": 7.958058252427185e-06, "loss": 0.0142, "step": 232560 }, { "epoch": 90.32, "learning_rate": 7.957540453074434e-06, "loss": 0.1653, "step": 232570 }, { "epoch": 90.32, "learning_rate": 7.957022653721684e-06, "loss": 0.0126, "step": 232580 }, { "epoch": 90.33, "learning_rate": 7.956504854368932e-06, "loss": 0.0867, "step": 232590 }, { "epoch": 90.33, "learning_rate": 7.955987055016182e-06, "loss": 0.0593, "step": 232600 }, { "epoch": 90.33, "learning_rate": 7.955469255663431e-06, "loss": 0.0466, "step": 232610 }, { "epoch": 90.34, "learning_rate": 7.95495145631068e-06, "loss": 0.0513, "step": 232620 }, { "epoch": 90.34, "learning_rate": 7.954433656957929e-06, "loss": 0.1138, "step": 232630 }, { "epoch": 90.35, "learning_rate": 7.953915857605178e-06, "loss": 0.1154, "step": 232640 }, { "epoch": 90.35, "learning_rate": 7.953398058252428e-06, "loss": 0.055, "step": 232650 }, { "epoch": 90.35, "learning_rate": 7.952880258899676e-06, "loss": 0.0941, "step": 232660 }, { "epoch": 90.36, "learning_rate": 7.952362459546926e-06, "loss": 0.0832, "step": 232670 }, { "epoch": 90.36, "learning_rate": 7.951844660194175e-06, "loss": 0.0283, "step": 232680 }, { "epoch": 90.37, "learning_rate": 7.951326860841425e-06, "loss": 0.0097, "step": 232690 }, { "epoch": 90.37, "learning_rate": 7.950809061488673e-06, "loss": 0.1393, "step": 232700 }, { "epoch": 90.37, "learning_rate": 7.950291262135922e-06, "loss": 0.0852, "step": 232710 }, { "epoch": 90.38, "learning_rate": 7.949773462783172e-06, "loss": 0.1222, "step": 232720 }, { "epoch": 90.38, "learning_rate": 7.949255663430422e-06, "loss": 0.0431, "step": 232730 }, { "epoch": 90.38, "learning_rate": 7.94873786407767e-06, "loss": 0.0396, "step": 232740 }, { "epoch": 90.39, "learning_rate": 7.948220064724919e-06, "loss": 0.0193, "step": 232750 }, { "epoch": 90.39, "learning_rate": 7.947702265372169e-06, "loss": 0.1236, "step": 232760 }, { "epoch": 90.4, "learning_rate": 7.947184466019418e-06, "loss": 0.0508, "step": 232770 }, { "epoch": 90.4, "learning_rate": 7.946666666666666e-06, "loss": 0.0868, "step": 232780 }, { "epoch": 90.4, "learning_rate": 7.946148867313916e-06, "loss": 0.0045, "step": 232790 }, { "epoch": 90.41, "learning_rate": 7.945631067961165e-06, "loss": 0.0512, "step": 232800 }, { "epoch": 90.41, "learning_rate": 7.945113268608415e-06, "loss": 0.092, "step": 232810 }, { "epoch": 90.42, "learning_rate": 7.944595469255663e-06, "loss": 0.0948, "step": 232820 }, { "epoch": 90.42, "learning_rate": 7.944077669902913e-06, "loss": 0.0693, "step": 232830 }, { "epoch": 90.42, "learning_rate": 7.943559870550162e-06, "loss": 0.1007, "step": 232840 }, { "epoch": 90.43, "learning_rate": 7.943042071197412e-06, "loss": 0.0309, "step": 232850 }, { "epoch": 90.43, "learning_rate": 7.94252427184466e-06, "loss": 0.0102, "step": 232860 }, { "epoch": 90.43, "learning_rate": 7.94200647249191e-06, "loss": 0.2096, "step": 232870 }, { "epoch": 90.44, "learning_rate": 7.941488673139159e-06, "loss": 0.0824, "step": 232880 }, { "epoch": 90.44, "learning_rate": 7.940970873786409e-06, "loss": 0.0004, "step": 232890 }, { "epoch": 90.45, "learning_rate": 7.940453074433657e-06, "loss": 0.0632, "step": 232900 }, { "epoch": 90.45, "learning_rate": 7.939935275080906e-06, "loss": 0.1391, "step": 232910 }, { "epoch": 90.45, "learning_rate": 7.939417475728156e-06, "loss": 0.1039, "step": 232920 }, { "epoch": 90.46, "learning_rate": 7.938899676375405e-06, "loss": 0.0213, "step": 232930 }, { "epoch": 90.46, "learning_rate": 7.938381877022653e-06, "loss": 0.0137, "step": 232940 }, { "epoch": 90.47, "learning_rate": 7.937864077669903e-06, "loss": 0.0243, "step": 232950 }, { "epoch": 90.47, "learning_rate": 7.937346278317153e-06, "loss": 0.0339, "step": 232960 }, { "epoch": 90.47, "learning_rate": 7.936828478964402e-06, "loss": 0.0369, "step": 232970 }, { "epoch": 90.48, "learning_rate": 7.93631067961165e-06, "loss": 0.065, "step": 232980 }, { "epoch": 90.48, "learning_rate": 7.9357928802589e-06, "loss": 0.1356, "step": 232990 }, { "epoch": 90.49, "learning_rate": 7.93527508090615e-06, "loss": 0.0599, "step": 233000 }, { "epoch": 90.49, "learning_rate": 7.934757281553399e-06, "loss": 0.0183, "step": 233010 }, { "epoch": 90.49, "learning_rate": 7.934239482200647e-06, "loss": 0.0926, "step": 233020 }, { "epoch": 90.5, "learning_rate": 7.933721682847897e-06, "loss": 0.024, "step": 233030 }, { "epoch": 90.5, "learning_rate": 7.933203883495146e-06, "loss": 0.0777, "step": 233040 }, { "epoch": 90.5, "learning_rate": 7.932686084142396e-06, "loss": 0.1844, "step": 233050 }, { "epoch": 90.51, "learning_rate": 7.932168284789644e-06, "loss": 0.0158, "step": 233060 }, { "epoch": 90.51, "learning_rate": 7.931650485436893e-06, "loss": 0.1348, "step": 233070 }, { "epoch": 90.52, "learning_rate": 7.931132686084143e-06, "loss": 0.0998, "step": 233080 }, { "epoch": 90.52, "learning_rate": 7.930614886731393e-06, "loss": 0.174, "step": 233090 }, { "epoch": 90.52, "learning_rate": 7.93009708737864e-06, "loss": 0.1779, "step": 233100 }, { "epoch": 90.53, "learning_rate": 7.92957928802589e-06, "loss": 0.065, "step": 233110 }, { "epoch": 90.53, "learning_rate": 7.92906148867314e-06, "loss": 0.0804, "step": 233120 }, { "epoch": 90.54, "learning_rate": 7.92854368932039e-06, "loss": 0.0002, "step": 233130 }, { "epoch": 90.54, "learning_rate": 7.928025889967637e-06, "loss": 0.0404, "step": 233140 }, { "epoch": 90.54, "learning_rate": 7.927508090614887e-06, "loss": 0.0141, "step": 233150 }, { "epoch": 90.55, "learning_rate": 7.926990291262136e-06, "loss": 0.0448, "step": 233160 }, { "epoch": 90.55, "learning_rate": 7.926472491909386e-06, "loss": 0.0111, "step": 233170 }, { "epoch": 90.56, "learning_rate": 7.925954692556634e-06, "loss": 0.0385, "step": 233180 }, { "epoch": 90.56, "learning_rate": 7.925436893203884e-06, "loss": 0.1138, "step": 233190 }, { "epoch": 90.56, "learning_rate": 7.924919093851133e-06, "loss": 0.0517, "step": 233200 }, { "epoch": 90.57, "learning_rate": 7.924401294498383e-06, "loss": 0.0461, "step": 233210 }, { "epoch": 90.57, "learning_rate": 7.923883495145633e-06, "loss": 0.0427, "step": 233220 }, { "epoch": 90.57, "learning_rate": 7.92336569579288e-06, "loss": 0.0125, "step": 233230 }, { "epoch": 90.58, "learning_rate": 7.92284789644013e-06, "loss": 0.011, "step": 233240 }, { "epoch": 90.58, "learning_rate": 7.92233009708738e-06, "loss": 0.0179, "step": 233250 }, { "epoch": 90.59, "learning_rate": 7.92181229773463e-06, "loss": 0.0629, "step": 233260 }, { "epoch": 90.59, "learning_rate": 7.921294498381877e-06, "loss": 0.093, "step": 233270 }, { "epoch": 90.59, "learning_rate": 7.920776699029127e-06, "loss": 0.0011, "step": 233280 }, { "epoch": 90.6, "learning_rate": 7.920258899676376e-06, "loss": 0.0062, "step": 233290 }, { "epoch": 90.6, "learning_rate": 7.919741100323626e-06, "loss": 0.1268, "step": 233300 }, { "epoch": 90.61, "learning_rate": 7.919223300970874e-06, "loss": 0.1453, "step": 233310 }, { "epoch": 90.61, "learning_rate": 7.918705501618124e-06, "loss": 0.0524, "step": 233320 }, { "epoch": 90.61, "learning_rate": 7.918187702265373e-06, "loss": 0.1196, "step": 233330 }, { "epoch": 90.62, "learning_rate": 7.917669902912623e-06, "loss": 0.1316, "step": 233340 }, { "epoch": 90.62, "learning_rate": 7.91715210355987e-06, "loss": 0.0514, "step": 233350 }, { "epoch": 90.63, "learning_rate": 7.91663430420712e-06, "loss": 0.0199, "step": 233360 }, { "epoch": 90.63, "learning_rate": 7.91611650485437e-06, "loss": 0.0511, "step": 233370 }, { "epoch": 90.63, "learning_rate": 7.91559870550162e-06, "loss": 0.0747, "step": 233380 }, { "epoch": 90.64, "learning_rate": 7.915080906148868e-06, "loss": 0.0072, "step": 233390 }, { "epoch": 90.64, "learning_rate": 7.914563106796117e-06, "loss": 0.0853, "step": 233400 }, { "epoch": 90.64, "learning_rate": 7.914045307443367e-06, "loss": 0.0454, "step": 233410 }, { "epoch": 90.65, "learning_rate": 7.913527508090616e-06, "loss": 0.0026, "step": 233420 }, { "epoch": 90.65, "learning_rate": 7.913009708737864e-06, "loss": 0.0604, "step": 233430 }, { "epoch": 90.66, "learning_rate": 7.912491909385114e-06, "loss": 0.0195, "step": 233440 }, { "epoch": 90.66, "learning_rate": 7.911974110032364e-06, "loss": 0.0524, "step": 233450 }, { "epoch": 90.66, "learning_rate": 7.911456310679613e-06, "loss": 0.0788, "step": 233460 }, { "epoch": 90.67, "learning_rate": 7.910938511326861e-06, "loss": 0.0387, "step": 233470 }, { "epoch": 90.67, "learning_rate": 7.91042071197411e-06, "loss": 0.0015, "step": 233480 }, { "epoch": 90.68, "learning_rate": 7.90990291262136e-06, "loss": 0.1358, "step": 233490 }, { "epoch": 90.68, "learning_rate": 7.90938511326861e-06, "loss": 0.0027, "step": 233500 }, { "epoch": 90.68, "learning_rate": 7.908867313915858e-06, "loss": 0.0066, "step": 233510 }, { "epoch": 90.69, "learning_rate": 7.908349514563107e-06, "loss": 0.0179, "step": 233520 }, { "epoch": 90.69, "learning_rate": 7.907831715210357e-06, "loss": 0.1197, "step": 233530 }, { "epoch": 90.7, "learning_rate": 7.907313915857607e-06, "loss": 0.0627, "step": 233540 }, { "epoch": 90.7, "learning_rate": 7.906796116504855e-06, "loss": 0.0825, "step": 233550 }, { "epoch": 90.7, "learning_rate": 7.906278317152104e-06, "loss": 0.0233, "step": 233560 }, { "epoch": 90.71, "learning_rate": 7.905760517799354e-06, "loss": 0.0152, "step": 233570 }, { "epoch": 90.71, "learning_rate": 7.905242718446604e-06, "loss": 0.0442, "step": 233580 }, { "epoch": 90.71, "learning_rate": 7.904724919093851e-06, "loss": 0.0142, "step": 233590 }, { "epoch": 90.72, "learning_rate": 7.904207119741101e-06, "loss": 0.0066, "step": 233600 }, { "epoch": 90.72, "learning_rate": 7.90368932038835e-06, "loss": 0.0029, "step": 233610 }, { "epoch": 90.73, "learning_rate": 7.9031715210356e-06, "loss": 0.0238, "step": 233620 }, { "epoch": 90.73, "learning_rate": 7.902653721682848e-06, "loss": 0.01, "step": 233630 }, { "epoch": 90.73, "learning_rate": 7.902135922330098e-06, "loss": 0.079, "step": 233640 }, { "epoch": 90.74, "learning_rate": 7.901618122977347e-06, "loss": 0.0138, "step": 233650 }, { "epoch": 90.74, "learning_rate": 7.901100323624597e-06, "loss": 0.0098, "step": 233660 }, { "epoch": 90.75, "learning_rate": 7.900582524271845e-06, "loss": 0.0187, "step": 233670 }, { "epoch": 90.75, "learning_rate": 7.900064724919095e-06, "loss": 0.0643, "step": 233680 }, { "epoch": 90.75, "learning_rate": 7.899546925566344e-06, "loss": 0.0178, "step": 233690 }, { "epoch": 90.76, "learning_rate": 7.899029126213594e-06, "loss": 0.0315, "step": 233700 }, { "epoch": 90.76, "learning_rate": 7.898511326860842e-06, "loss": 0.0776, "step": 233710 }, { "epoch": 90.77, "learning_rate": 7.897993527508091e-06, "loss": 0.0192, "step": 233720 }, { "epoch": 90.77, "learning_rate": 7.897475728155341e-06, "loss": 0.1483, "step": 233730 }, { "epoch": 90.77, "learning_rate": 7.89695792880259e-06, "loss": 0.0697, "step": 233740 }, { "epoch": 90.78, "learning_rate": 7.896440129449839e-06, "loss": 0.0947, "step": 233750 }, { "epoch": 90.78, "learning_rate": 7.895922330097088e-06, "loss": 0.0592, "step": 233760 }, { "epoch": 90.78, "learning_rate": 7.895404530744338e-06, "loss": 0.0155, "step": 233770 }, { "epoch": 90.79, "learning_rate": 7.894886731391587e-06, "loss": 0.0361, "step": 233780 }, { "epoch": 90.79, "learning_rate": 7.894368932038835e-06, "loss": 0.0222, "step": 233790 }, { "epoch": 90.8, "learning_rate": 7.893851132686085e-06, "loss": 0.0937, "step": 233800 }, { "epoch": 90.8, "learning_rate": 7.893333333333335e-06, "loss": 0.0578, "step": 233810 }, { "epoch": 90.8, "learning_rate": 7.892815533980584e-06, "loss": 0.1153, "step": 233820 }, { "epoch": 90.81, "learning_rate": 7.892297734627832e-06, "loss": 0.0719, "step": 233830 }, { "epoch": 90.81, "learning_rate": 7.891779935275082e-06, "loss": 0.0211, "step": 233840 }, { "epoch": 90.82, "learning_rate": 7.891262135922331e-06, "loss": 0.0097, "step": 233850 }, { "epoch": 90.82, "learning_rate": 7.890744336569581e-06, "loss": 0.0765, "step": 233860 }, { "epoch": 90.82, "learning_rate": 7.890226537216829e-06, "loss": 0.0325, "step": 233870 }, { "epoch": 90.83, "learning_rate": 7.889708737864078e-06, "loss": 0.0588, "step": 233880 }, { "epoch": 90.83, "learning_rate": 7.889190938511328e-06, "loss": 0.0109, "step": 233890 }, { "epoch": 90.83, "learning_rate": 7.888673139158578e-06, "loss": 0.0088, "step": 233900 }, { "epoch": 90.84, "learning_rate": 7.888155339805826e-06, "loss": 0.0055, "step": 233910 }, { "epoch": 90.84, "learning_rate": 7.887637540453075e-06, "loss": 0.1153, "step": 233920 }, { "epoch": 90.85, "learning_rate": 7.887119741100325e-06, "loss": 0.0407, "step": 233930 }, { "epoch": 90.85, "learning_rate": 7.886601941747573e-06, "loss": 0.0249, "step": 233940 }, { "epoch": 90.85, "learning_rate": 7.886084142394822e-06, "loss": 0.0638, "step": 233950 }, { "epoch": 90.86, "learning_rate": 7.885566343042072e-06, "loss": 0.1586, "step": 233960 }, { "epoch": 90.86, "learning_rate": 7.885048543689322e-06, "loss": 0.0523, "step": 233970 }, { "epoch": 90.87, "learning_rate": 7.88453074433657e-06, "loss": 0.0003, "step": 233980 }, { "epoch": 90.87, "learning_rate": 7.88401294498382e-06, "loss": 0.0433, "step": 233990 }, { "epoch": 90.87, "learning_rate": 7.883495145631069e-06, "loss": 0.009, "step": 234000 }, { "epoch": 90.88, "learning_rate": 7.882977346278318e-06, "loss": 0.0981, "step": 234010 }, { "epoch": 90.88, "learning_rate": 7.882459546925566e-06, "loss": 0.0218, "step": 234020 }, { "epoch": 90.89, "learning_rate": 7.881941747572816e-06, "loss": 0.0033, "step": 234030 }, { "epoch": 90.89, "learning_rate": 7.881423948220066e-06, "loss": 0.0044, "step": 234040 }, { "epoch": 90.89, "learning_rate": 7.880906148867315e-06, "loss": 0.093, "step": 234050 }, { "epoch": 90.9, "learning_rate": 7.880388349514563e-06, "loss": 0.0289, "step": 234060 }, { "epoch": 90.9, "learning_rate": 7.879870550161813e-06, "loss": 0.0832, "step": 234070 }, { "epoch": 90.9, "learning_rate": 7.879352750809062e-06, "loss": 0.0789, "step": 234080 }, { "epoch": 90.91, "learning_rate": 7.878834951456312e-06, "loss": 0.0197, "step": 234090 }, { "epoch": 90.91, "learning_rate": 7.87831715210356e-06, "loss": 0.0791, "step": 234100 }, { "epoch": 90.92, "learning_rate": 7.87779935275081e-06, "loss": 0.097, "step": 234110 }, { "epoch": 90.92, "learning_rate": 7.87728155339806e-06, "loss": 0.0159, "step": 234120 }, { "epoch": 90.92, "learning_rate": 7.876763754045307e-06, "loss": 0.0304, "step": 234130 }, { "epoch": 90.93, "learning_rate": 7.876245954692557e-06, "loss": 0.1623, "step": 234140 }, { "epoch": 90.93, "learning_rate": 7.875728155339806e-06, "loss": 0.0429, "step": 234150 }, { "epoch": 90.94, "learning_rate": 7.875210355987056e-06, "loss": 0.0159, "step": 234160 }, { "epoch": 90.94, "learning_rate": 7.874692556634304e-06, "loss": 0.0242, "step": 234170 }, { "epoch": 90.94, "learning_rate": 7.874174757281553e-06, "loss": 0.0353, "step": 234180 }, { "epoch": 90.95, "learning_rate": 7.873656957928803e-06, "loss": 0.1684, "step": 234190 }, { "epoch": 90.95, "learning_rate": 7.873139158576053e-06, "loss": 0.0683, "step": 234200 }, { "epoch": 90.96, "learning_rate": 7.8726213592233e-06, "loss": 0.0759, "step": 234210 }, { "epoch": 90.96, "learning_rate": 7.87210355987055e-06, "loss": 0.0514, "step": 234220 }, { "epoch": 90.96, "learning_rate": 7.8715857605178e-06, "loss": 0.1981, "step": 234230 }, { "epoch": 90.97, "learning_rate": 7.87106796116505e-06, "loss": 0.0504, "step": 234240 }, { "epoch": 90.97, "learning_rate": 7.870550161812297e-06, "loss": 0.1595, "step": 234250 }, { "epoch": 90.97, "learning_rate": 7.870032362459547e-06, "loss": 0.0851, "step": 234260 }, { "epoch": 90.98, "learning_rate": 7.869514563106797e-06, "loss": 0.002, "step": 234270 }, { "epoch": 90.98, "learning_rate": 7.868996763754045e-06, "loss": 0.0181, "step": 234280 }, { "epoch": 90.99, "learning_rate": 7.868478964401294e-06, "loss": 0.0005, "step": 234290 }, { "epoch": 90.99, "learning_rate": 7.867961165048544e-06, "loss": 0.0215, "step": 234300 }, { "epoch": 90.99, "learning_rate": 7.867443365695793e-06, "loss": 0.0274, "step": 234310 }, { "epoch": 91.0, "learning_rate": 7.866925566343041e-06, "loss": 0.0477, "step": 234320 }, { "epoch": 91.0, "eval_accuracy": 0.9502063273727648, "eval_loss": 0.3678571879863739, "eval_runtime": 8.2007, "eval_samples_per_second": 443.255, "eval_steps_per_second": 55.483, "step": 234325 }, { "epoch": 91.0, "learning_rate": 7.866407766990291e-06, "loss": 0.1098, "step": 234330 }, { "epoch": 91.01, "learning_rate": 7.86588996763754e-06, "loss": 0.0449, "step": 234340 }, { "epoch": 91.01, "learning_rate": 7.86537216828479e-06, "loss": 0.0774, "step": 234350 }, { "epoch": 91.01, "learning_rate": 7.86485436893204e-06, "loss": 0.0132, "step": 234360 }, { "epoch": 91.02, "learning_rate": 7.864336569579288e-06, "loss": 0.0546, "step": 234370 }, { "epoch": 91.02, "learning_rate": 7.863818770226537e-06, "loss": 0.0267, "step": 234380 }, { "epoch": 91.03, "learning_rate": 7.863300970873787e-06, "loss": 0.0015, "step": 234390 }, { "epoch": 91.03, "learning_rate": 7.862783171521037e-06, "loss": 0.0046, "step": 234400 }, { "epoch": 91.03, "learning_rate": 7.862265372168285e-06, "loss": 0.0043, "step": 234410 }, { "epoch": 91.04, "learning_rate": 7.861747572815534e-06, "loss": 0.041, "step": 234420 }, { "epoch": 91.04, "learning_rate": 7.861229773462784e-06, "loss": 0.058, "step": 234430 }, { "epoch": 91.04, "learning_rate": 7.860711974110033e-06, "loss": 0.0292, "step": 234440 }, { "epoch": 91.05, "learning_rate": 7.860194174757281e-06, "loss": 0.0813, "step": 234450 }, { "epoch": 91.05, "learning_rate": 7.859676375404531e-06, "loss": 0.0777, "step": 234460 }, { "epoch": 91.06, "learning_rate": 7.85915857605178e-06, "loss": 0.0607, "step": 234470 }, { "epoch": 91.06, "learning_rate": 7.85864077669903e-06, "loss": 0.0013, "step": 234480 }, { "epoch": 91.06, "learning_rate": 7.858122977346278e-06, "loss": 0.1373, "step": 234490 }, { "epoch": 91.07, "learning_rate": 7.857605177993528e-06, "loss": 0.0005, "step": 234500 }, { "epoch": 91.07, "learning_rate": 7.857087378640777e-06, "loss": 0.0388, "step": 234510 }, { "epoch": 91.08, "learning_rate": 7.856569579288027e-06, "loss": 0.0981, "step": 234520 }, { "epoch": 91.08, "learning_rate": 7.856051779935275e-06, "loss": 0.0509, "step": 234530 }, { "epoch": 91.08, "learning_rate": 7.855533980582524e-06, "loss": 0.1173, "step": 234540 }, { "epoch": 91.09, "learning_rate": 7.855016181229774e-06, "loss": 0.0609, "step": 234550 }, { "epoch": 91.09, "learning_rate": 7.854498381877024e-06, "loss": 0.0013, "step": 234560 }, { "epoch": 91.1, "learning_rate": 7.853980582524272e-06, "loss": 0.0639, "step": 234570 }, { "epoch": 91.1, "learning_rate": 7.853462783171521e-06, "loss": 0.0321, "step": 234580 }, { "epoch": 91.1, "learning_rate": 7.852944983818771e-06, "loss": 0.026, "step": 234590 }, { "epoch": 91.11, "learning_rate": 7.85242718446602e-06, "loss": 0.1283, "step": 234600 }, { "epoch": 91.11, "learning_rate": 7.851909385113268e-06, "loss": 0.0572, "step": 234610 }, { "epoch": 91.11, "learning_rate": 7.851391585760518e-06, "loss": 0.1822, "step": 234620 }, { "epoch": 91.12, "learning_rate": 7.850873786407768e-06, "loss": 0.065, "step": 234630 }, { "epoch": 91.12, "learning_rate": 7.850355987055017e-06, "loss": 0.0147, "step": 234640 }, { "epoch": 91.13, "learning_rate": 7.849838187702265e-06, "loss": 0.0398, "step": 234650 }, { "epoch": 91.13, "learning_rate": 7.849320388349515e-06, "loss": 0.0007, "step": 234660 }, { "epoch": 91.13, "learning_rate": 7.848802588996764e-06, "loss": 0.0755, "step": 234670 }, { "epoch": 91.14, "learning_rate": 7.848284789644014e-06, "loss": 0.0019, "step": 234680 }, { "epoch": 91.14, "learning_rate": 7.847766990291262e-06, "loss": 0.0407, "step": 234690 }, { "epoch": 91.15, "learning_rate": 7.847249190938512e-06, "loss": 0.1187, "step": 234700 }, { "epoch": 91.15, "learning_rate": 7.846731391585761e-06, "loss": 0.0034, "step": 234710 }, { "epoch": 91.15, "learning_rate": 7.84621359223301e-06, "loss": 0.0488, "step": 234720 }, { "epoch": 91.16, "learning_rate": 7.845695792880259e-06, "loss": 0.0922, "step": 234730 }, { "epoch": 91.16, "learning_rate": 7.845177993527508e-06, "loss": 0.0194, "step": 234740 }, { "epoch": 91.17, "learning_rate": 7.844660194174758e-06, "loss": 0.0493, "step": 234750 }, { "epoch": 91.17, "learning_rate": 7.844142394822008e-06, "loss": 0.1157, "step": 234760 }, { "epoch": 91.17, "learning_rate": 7.843624595469256e-06, "loss": 0.0457, "step": 234770 }, { "epoch": 91.18, "learning_rate": 7.843106796116505e-06, "loss": 0.0093, "step": 234780 }, { "epoch": 91.18, "learning_rate": 7.842588996763755e-06, "loss": 0.0649, "step": 234790 }, { "epoch": 91.18, "learning_rate": 7.842071197411004e-06, "loss": 0.0534, "step": 234800 }, { "epoch": 91.19, "learning_rate": 7.841553398058252e-06, "loss": 0.0053, "step": 234810 }, { "epoch": 91.19, "learning_rate": 7.841035598705502e-06, "loss": 0.0557, "step": 234820 }, { "epoch": 91.2, "learning_rate": 7.840517799352752e-06, "loss": 0.0488, "step": 234830 }, { "epoch": 91.2, "learning_rate": 7.840000000000001e-06, "loss": 0.0292, "step": 234840 }, { "epoch": 91.2, "learning_rate": 7.839482200647249e-06, "loss": 0.0364, "step": 234850 }, { "epoch": 91.21, "learning_rate": 7.838964401294499e-06, "loss": 0.008, "step": 234860 }, { "epoch": 91.21, "learning_rate": 7.838446601941748e-06, "loss": 0.0735, "step": 234870 }, { "epoch": 91.22, "learning_rate": 7.837928802588998e-06, "loss": 0.0082, "step": 234880 }, { "epoch": 91.22, "learning_rate": 7.837411003236248e-06, "loss": 0.0728, "step": 234890 }, { "epoch": 91.22, "learning_rate": 7.836893203883495e-06, "loss": 0.0533, "step": 234900 }, { "epoch": 91.23, "learning_rate": 7.836375404530745e-06, "loss": 0.0134, "step": 234910 }, { "epoch": 91.23, "learning_rate": 7.835857605177995e-06, "loss": 0.0008, "step": 234920 }, { "epoch": 91.23, "learning_rate": 7.835339805825244e-06, "loss": 0.1104, "step": 234930 }, { "epoch": 91.24, "learning_rate": 7.834822006472492e-06, "loss": 0.0227, "step": 234940 }, { "epoch": 91.24, "learning_rate": 7.834304207119742e-06, "loss": 0.1052, "step": 234950 }, { "epoch": 91.25, "learning_rate": 7.833786407766992e-06, "loss": 0.0317, "step": 234960 }, { "epoch": 91.25, "learning_rate": 7.833268608414241e-06, "loss": 0.0681, "step": 234970 }, { "epoch": 91.25, "learning_rate": 7.832750809061489e-06, "loss": 0.0239, "step": 234980 }, { "epoch": 91.26, "learning_rate": 7.832233009708739e-06, "loss": 0.0966, "step": 234990 }, { "epoch": 91.26, "learning_rate": 7.831715210355988e-06, "loss": 0.0844, "step": 235000 }, { "epoch": 91.27, "learning_rate": 7.831197411003238e-06, "loss": 0.0048, "step": 235010 }, { "epoch": 91.27, "learning_rate": 7.830679611650486e-06, "loss": 0.1476, "step": 235020 }, { "epoch": 91.27, "learning_rate": 7.830161812297735e-06, "loss": 0.0084, "step": 235030 }, { "epoch": 91.28, "learning_rate": 7.829644012944985e-06, "loss": 0.0074, "step": 235040 }, { "epoch": 91.28, "learning_rate": 7.829126213592235e-06, "loss": 0.111, "step": 235050 }, { "epoch": 91.29, "learning_rate": 7.828608414239483e-06, "loss": 0.0579, "step": 235060 }, { "epoch": 91.29, "learning_rate": 7.828090614886732e-06, "loss": 0.0514, "step": 235070 }, { "epoch": 91.29, "learning_rate": 7.827572815533982e-06, "loss": 0.0268, "step": 235080 }, { "epoch": 91.3, "learning_rate": 7.827055016181231e-06, "loss": 0.1087, "step": 235090 }, { "epoch": 91.3, "learning_rate": 7.82653721682848e-06, "loss": 0.0099, "step": 235100 }, { "epoch": 91.3, "learning_rate": 7.826019417475729e-06, "loss": 0.0011, "step": 235110 }, { "epoch": 91.31, "learning_rate": 7.825501618122979e-06, "loss": 0.0427, "step": 235120 }, { "epoch": 91.31, "learning_rate": 7.824983818770228e-06, "loss": 0.0061, "step": 235130 }, { "epoch": 91.32, "learning_rate": 7.824466019417476e-06, "loss": 0.0614, "step": 235140 }, { "epoch": 91.32, "learning_rate": 7.823948220064726e-06, "loss": 0.1145, "step": 235150 }, { "epoch": 91.32, "learning_rate": 7.823430420711975e-06, "loss": 0.1097, "step": 235160 }, { "epoch": 91.33, "learning_rate": 7.822912621359225e-06, "loss": 0.0351, "step": 235170 }, { "epoch": 91.33, "learning_rate": 7.822394822006473e-06, "loss": 0.0164, "step": 235180 }, { "epoch": 91.34, "learning_rate": 7.821877022653723e-06, "loss": 0.0591, "step": 235190 }, { "epoch": 91.34, "learning_rate": 7.821359223300972e-06, "loss": 0.088, "step": 235200 }, { "epoch": 91.34, "learning_rate": 7.820841423948222e-06, "loss": 0.0225, "step": 235210 }, { "epoch": 91.35, "learning_rate": 7.82032362459547e-06, "loss": 0.0832, "step": 235220 }, { "epoch": 91.35, "learning_rate": 7.81980582524272e-06, "loss": 0.0174, "step": 235230 }, { "epoch": 91.36, "learning_rate": 7.819288025889969e-06, "loss": 0.0251, "step": 235240 }, { "epoch": 91.36, "learning_rate": 7.818770226537219e-06, "loss": 0.1409, "step": 235250 }, { "epoch": 91.36, "learning_rate": 7.818252427184466e-06, "loss": 0.0008, "step": 235260 }, { "epoch": 91.37, "learning_rate": 7.817734627831716e-06, "loss": 0.0578, "step": 235270 }, { "epoch": 91.37, "learning_rate": 7.817216828478966e-06, "loss": 0.0366, "step": 235280 }, { "epoch": 91.37, "learning_rate": 7.816699029126215e-06, "loss": 0.0253, "step": 235290 }, { "epoch": 91.38, "learning_rate": 7.816181229773463e-06, "loss": 0.0719, "step": 235300 }, { "epoch": 91.38, "learning_rate": 7.815663430420713e-06, "loss": 0.0862, "step": 235310 }, { "epoch": 91.39, "learning_rate": 7.815145631067963e-06, "loss": 0.0604, "step": 235320 }, { "epoch": 91.39, "learning_rate": 7.814627831715212e-06, "loss": 0.0177, "step": 235330 }, { "epoch": 91.39, "learning_rate": 7.81411003236246e-06, "loss": 0.0961, "step": 235340 }, { "epoch": 91.4, "learning_rate": 7.81359223300971e-06, "loss": 0.0002, "step": 235350 }, { "epoch": 91.4, "learning_rate": 7.81307443365696e-06, "loss": 0.0099, "step": 235360 }, { "epoch": 91.41, "learning_rate": 7.812556634304209e-06, "loss": 0.2257, "step": 235370 }, { "epoch": 91.41, "learning_rate": 7.812038834951457e-06, "loss": 0.09, "step": 235380 }, { "epoch": 91.41, "learning_rate": 7.811521035598706e-06, "loss": 0.0096, "step": 235390 }, { "epoch": 91.42, "learning_rate": 7.811003236245956e-06, "loss": 0.0094, "step": 235400 }, { "epoch": 91.42, "learning_rate": 7.810485436893204e-06, "loss": 0.0909, "step": 235410 }, { "epoch": 91.43, "learning_rate": 7.809967637540454e-06, "loss": 0.1653, "step": 235420 }, { "epoch": 91.43, "learning_rate": 7.809449838187703e-06, "loss": 0.0482, "step": 235430 }, { "epoch": 91.43, "learning_rate": 7.808932038834953e-06, "loss": 0.1097, "step": 235440 }, { "epoch": 91.44, "learning_rate": 7.8084142394822e-06, "loss": 0.095, "step": 235450 }, { "epoch": 91.44, "learning_rate": 7.80789644012945e-06, "loss": 0.0581, "step": 235460 }, { "epoch": 91.44, "learning_rate": 7.8073786407767e-06, "loss": 0.0003, "step": 235470 }, { "epoch": 91.45, "learning_rate": 7.80686084142395e-06, "loss": 0.0029, "step": 235480 }, { "epoch": 91.45, "learning_rate": 7.806343042071198e-06, "loss": 0.0614, "step": 235490 }, { "epoch": 91.46, "learning_rate": 7.805825242718447e-06, "loss": 0.02, "step": 235500 }, { "epoch": 91.46, "learning_rate": 7.805307443365697e-06, "loss": 0.1414, "step": 235510 }, { "epoch": 91.46, "learning_rate": 7.804789644012946e-06, "loss": 0.1167, "step": 235520 }, { "epoch": 91.47, "learning_rate": 7.804271844660194e-06, "loss": 0.0198, "step": 235530 }, { "epoch": 91.47, "learning_rate": 7.803754045307444e-06, "loss": 0.052, "step": 235540 }, { "epoch": 91.48, "learning_rate": 7.803236245954694e-06, "loss": 0.0034, "step": 235550 }, { "epoch": 91.48, "learning_rate": 7.802718446601943e-06, "loss": 0.0084, "step": 235560 }, { "epoch": 91.48, "learning_rate": 7.802200647249191e-06, "loss": 0.0866, "step": 235570 }, { "epoch": 91.49, "learning_rate": 7.80168284789644e-06, "loss": 0.0012, "step": 235580 }, { "epoch": 91.49, "learning_rate": 7.80116504854369e-06, "loss": 0.2861, "step": 235590 }, { "epoch": 91.5, "learning_rate": 7.800647249190938e-06, "loss": 0.0184, "step": 235600 }, { "epoch": 91.5, "learning_rate": 7.800129449838188e-06, "loss": 0.037, "step": 235610 }, { "epoch": 91.5, "learning_rate": 7.799611650485437e-06, "loss": 0.0848, "step": 235620 }, { "epoch": 91.51, "learning_rate": 7.799093851132687e-06, "loss": 0.0222, "step": 235630 }, { "epoch": 91.51, "learning_rate": 7.798576051779935e-06, "loss": 0.0802, "step": 235640 }, { "epoch": 91.51, "learning_rate": 7.798058252427185e-06, "loss": 0.0039, "step": 235650 }, { "epoch": 91.52, "learning_rate": 7.797540453074434e-06, "loss": 0.069, "step": 235660 }, { "epoch": 91.52, "learning_rate": 7.797022653721684e-06, "loss": 0.0747, "step": 235670 }, { "epoch": 91.53, "learning_rate": 7.796504854368932e-06, "loss": 0.011, "step": 235680 }, { "epoch": 91.53, "learning_rate": 7.795987055016181e-06, "loss": 0.0031, "step": 235690 }, { "epoch": 91.53, "learning_rate": 7.795469255663431e-06, "loss": 0.0034, "step": 235700 }, { "epoch": 91.54, "learning_rate": 7.79495145631068e-06, "loss": 0.0794, "step": 235710 }, { "epoch": 91.54, "learning_rate": 7.794433656957929e-06, "loss": 0.0607, "step": 235720 }, { "epoch": 91.55, "learning_rate": 7.793915857605178e-06, "loss": 0.0003, "step": 235730 }, { "epoch": 91.55, "learning_rate": 7.793398058252428e-06, "loss": 0.0953, "step": 235740 }, { "epoch": 91.55, "learning_rate": 7.792880258899676e-06, "loss": 0.0462, "step": 235750 }, { "epoch": 91.56, "learning_rate": 7.792362459546925e-06, "loss": 0.0929, "step": 235760 }, { "epoch": 91.56, "learning_rate": 7.791844660194175e-06, "loss": 0.1277, "step": 235770 }, { "epoch": 91.57, "learning_rate": 7.791326860841425e-06, "loss": 0.1522, "step": 235780 }, { "epoch": 91.57, "learning_rate": 7.790809061488673e-06, "loss": 0.0239, "step": 235790 }, { "epoch": 91.57, "learning_rate": 7.790291262135922e-06, "loss": 0.0129, "step": 235800 }, { "epoch": 91.58, "learning_rate": 7.789773462783172e-06, "loss": 0.0741, "step": 235810 }, { "epoch": 91.58, "learning_rate": 7.789255663430421e-06, "loss": 0.0841, "step": 235820 }, { "epoch": 91.58, "learning_rate": 7.78873786407767e-06, "loss": 0.1555, "step": 235830 }, { "epoch": 91.59, "learning_rate": 7.788220064724919e-06, "loss": 0.0144, "step": 235840 }, { "epoch": 91.59, "learning_rate": 7.787702265372169e-06, "loss": 0.0493, "step": 235850 }, { "epoch": 91.6, "learning_rate": 7.787184466019418e-06, "loss": 0.1034, "step": 235860 }, { "epoch": 91.6, "learning_rate": 7.786666666666666e-06, "loss": 0.0092, "step": 235870 }, { "epoch": 91.6, "learning_rate": 7.786148867313916e-06, "loss": 0.0513, "step": 235880 }, { "epoch": 91.61, "learning_rate": 7.785631067961165e-06, "loss": 0.0242, "step": 235890 }, { "epoch": 91.61, "learning_rate": 7.785113268608415e-06, "loss": 0.0154, "step": 235900 }, { "epoch": 91.62, "learning_rate": 7.784595469255663e-06, "loss": 0.0001, "step": 235910 }, { "epoch": 91.62, "learning_rate": 7.784077669902912e-06, "loss": 0.0011, "step": 235920 }, { "epoch": 91.62, "learning_rate": 7.783559870550162e-06, "loss": 0.138, "step": 235930 }, { "epoch": 91.63, "learning_rate": 7.783042071197412e-06, "loss": 0.0243, "step": 235940 }, { "epoch": 91.63, "learning_rate": 7.78252427184466e-06, "loss": 0.1508, "step": 235950 }, { "epoch": 91.63, "learning_rate": 7.78200647249191e-06, "loss": 0.0194, "step": 235960 }, { "epoch": 91.64, "learning_rate": 7.781488673139159e-06, "loss": 0.0141, "step": 235970 }, { "epoch": 91.64, "learning_rate": 7.780970873786408e-06, "loss": 0.0293, "step": 235980 }, { "epoch": 91.65, "learning_rate": 7.780453074433656e-06, "loss": 0.0333, "step": 235990 }, { "epoch": 91.65, "learning_rate": 7.779935275080906e-06, "loss": 0.0841, "step": 236000 }, { "epoch": 91.65, "learning_rate": 7.779417475728156e-06, "loss": 0.0676, "step": 236010 }, { "epoch": 91.66, "learning_rate": 7.778899676375405e-06, "loss": 0.01, "step": 236020 }, { "epoch": 91.66, "learning_rate": 7.778381877022655e-06, "loss": 0.063, "step": 236030 }, { "epoch": 91.67, "learning_rate": 7.777864077669903e-06, "loss": 0.0484, "step": 236040 }, { "epoch": 91.67, "learning_rate": 7.777346278317152e-06, "loss": 0.0003, "step": 236050 }, { "epoch": 91.67, "learning_rate": 7.776828478964402e-06, "loss": 0.0007, "step": 236060 }, { "epoch": 91.68, "learning_rate": 7.776310679611652e-06, "loss": 0.0875, "step": 236070 }, { "epoch": 91.68, "learning_rate": 7.7757928802589e-06, "loss": 0.0304, "step": 236080 }, { "epoch": 91.69, "learning_rate": 7.77527508090615e-06, "loss": 0.0834, "step": 236090 }, { "epoch": 91.69, "learning_rate": 7.774757281553399e-06, "loss": 0.0927, "step": 236100 }, { "epoch": 91.69, "learning_rate": 7.774239482200648e-06, "loss": 0.0375, "step": 236110 }, { "epoch": 91.7, "learning_rate": 7.773721682847896e-06, "loss": 0.0039, "step": 236120 }, { "epoch": 91.7, "learning_rate": 7.773203883495146e-06, "loss": 0.0931, "step": 236130 }, { "epoch": 91.7, "learning_rate": 7.772686084142396e-06, "loss": 0.1058, "step": 236140 }, { "epoch": 91.71, "learning_rate": 7.772168284789645e-06, "loss": 0.0066, "step": 236150 }, { "epoch": 91.71, "learning_rate": 7.771650485436893e-06, "loss": 0.0017, "step": 236160 }, { "epoch": 91.72, "learning_rate": 7.771132686084143e-06, "loss": 0.0951, "step": 236170 }, { "epoch": 91.72, "learning_rate": 7.770614886731392e-06, "loss": 0.053, "step": 236180 }, { "epoch": 91.72, "learning_rate": 7.770097087378642e-06, "loss": 0.0583, "step": 236190 }, { "epoch": 91.73, "learning_rate": 7.76957928802589e-06, "loss": 0.0002, "step": 236200 }, { "epoch": 91.73, "learning_rate": 7.76906148867314e-06, "loss": 0.0504, "step": 236210 }, { "epoch": 91.74, "learning_rate": 7.76854368932039e-06, "loss": 0.0593, "step": 236220 }, { "epoch": 91.74, "learning_rate": 7.768025889967639e-06, "loss": 0.0285, "step": 236230 }, { "epoch": 91.74, "learning_rate": 7.767508090614887e-06, "loss": 0.0793, "step": 236240 }, { "epoch": 91.75, "learning_rate": 7.766990291262136e-06, "loss": 0.1508, "step": 236250 }, { "epoch": 91.75, "learning_rate": 7.766472491909386e-06, "loss": 0.0901, "step": 236260 }, { "epoch": 91.76, "learning_rate": 7.765954692556636e-06, "loss": 0.0599, "step": 236270 }, { "epoch": 91.76, "learning_rate": 7.765436893203883e-06, "loss": 0.0283, "step": 236280 }, { "epoch": 91.76, "learning_rate": 7.764919093851133e-06, "loss": 0.0022, "step": 236290 }, { "epoch": 91.77, "learning_rate": 7.764401294498383e-06, "loss": 0.0254, "step": 236300 }, { "epoch": 91.77, "learning_rate": 7.763883495145632e-06, "loss": 0.1088, "step": 236310 }, { "epoch": 91.77, "learning_rate": 7.76336569579288e-06, "loss": 0.1425, "step": 236320 }, { "epoch": 91.78, "learning_rate": 7.76284789644013e-06, "loss": 0.0146, "step": 236330 }, { "epoch": 91.78, "learning_rate": 7.76233009708738e-06, "loss": 0.0732, "step": 236340 }, { "epoch": 91.79, "learning_rate": 7.761812297734629e-06, "loss": 0.0636, "step": 236350 }, { "epoch": 91.79, "learning_rate": 7.761294498381877e-06, "loss": 0.0991, "step": 236360 }, { "epoch": 91.79, "learning_rate": 7.760776699029127e-06, "loss": 0.0554, "step": 236370 }, { "epoch": 91.8, "learning_rate": 7.760258899676376e-06, "loss": 0.0206, "step": 236380 }, { "epoch": 91.8, "learning_rate": 7.759741100323626e-06, "loss": 0.1408, "step": 236390 }, { "epoch": 91.81, "learning_rate": 7.759223300970874e-06, "loss": 0.0529, "step": 236400 }, { "epoch": 91.81, "learning_rate": 7.758705501618123e-06, "loss": 0.1708, "step": 236410 }, { "epoch": 91.81, "learning_rate": 7.758187702265373e-06, "loss": 0.0312, "step": 236420 }, { "epoch": 91.82, "learning_rate": 7.757669902912623e-06, "loss": 0.0168, "step": 236430 }, { "epoch": 91.82, "learning_rate": 7.75715210355987e-06, "loss": 0.084, "step": 236440 }, { "epoch": 91.83, "learning_rate": 7.75663430420712e-06, "loss": 0.1027, "step": 236450 }, { "epoch": 91.83, "learning_rate": 7.75611650485437e-06, "loss": 0.0483, "step": 236460 }, { "epoch": 91.83, "learning_rate": 7.75559870550162e-06, "loss": 0.1206, "step": 236470 }, { "epoch": 91.84, "learning_rate": 7.755080906148867e-06, "loss": 0.2143, "step": 236480 }, { "epoch": 91.84, "learning_rate": 7.754563106796117e-06, "loss": 0.0002, "step": 236490 }, { "epoch": 91.84, "learning_rate": 7.754045307443367e-06, "loss": 0.172, "step": 236500 }, { "epoch": 91.85, "learning_rate": 7.753527508090616e-06, "loss": 0.0004, "step": 236510 }, { "epoch": 91.85, "learning_rate": 7.753009708737864e-06, "loss": 0.0206, "step": 236520 }, { "epoch": 91.86, "learning_rate": 7.752491909385114e-06, "loss": 0.0508, "step": 236530 }, { "epoch": 91.86, "learning_rate": 7.751974110032363e-06, "loss": 0.0851, "step": 236540 }, { "epoch": 91.86, "learning_rate": 7.751456310679613e-06, "loss": 0.0373, "step": 236550 }, { "epoch": 91.87, "learning_rate": 7.750938511326861e-06, "loss": 0.0074, "step": 236560 }, { "epoch": 91.87, "learning_rate": 7.75042071197411e-06, "loss": 0.0874, "step": 236570 }, { "epoch": 91.88, "learning_rate": 7.74990291262136e-06, "loss": 0.0879, "step": 236580 }, { "epoch": 91.88, "learning_rate": 7.74938511326861e-06, "loss": 0.0507, "step": 236590 }, { "epoch": 91.88, "learning_rate": 7.74886731391586e-06, "loss": 0.1319, "step": 236600 }, { "epoch": 91.89, "learning_rate": 7.748349514563107e-06, "loss": 0.019, "step": 236610 }, { "epoch": 91.89, "learning_rate": 7.747831715210357e-06, "loss": 0.0448, "step": 236620 }, { "epoch": 91.9, "learning_rate": 7.747313915857607e-06, "loss": 0.0497, "step": 236630 }, { "epoch": 91.9, "learning_rate": 7.746796116504856e-06, "loss": 0.0381, "step": 236640 }, { "epoch": 91.9, "learning_rate": 7.746278317152104e-06, "loss": 0.058, "step": 236650 }, { "epoch": 91.91, "learning_rate": 7.745760517799354e-06, "loss": 0.0583, "step": 236660 }, { "epoch": 91.91, "learning_rate": 7.745242718446603e-06, "loss": 0.0566, "step": 236670 }, { "epoch": 91.91, "learning_rate": 7.744724919093853e-06, "loss": 0.1002, "step": 236680 }, { "epoch": 91.92, "learning_rate": 7.744207119741101e-06, "loss": 0.0182, "step": 236690 }, { "epoch": 91.92, "learning_rate": 7.74368932038835e-06, "loss": 0.0375, "step": 236700 }, { "epoch": 91.93, "learning_rate": 7.7431715210356e-06, "loss": 0.1384, "step": 236710 }, { "epoch": 91.93, "learning_rate": 7.74265372168285e-06, "loss": 0.2688, "step": 236720 }, { "epoch": 91.93, "learning_rate": 7.742135922330098e-06, "loss": 0.0798, "step": 236730 }, { "epoch": 91.94, "learning_rate": 7.741618122977347e-06, "loss": 0.0603, "step": 236740 }, { "epoch": 91.94, "learning_rate": 7.741100323624597e-06, "loss": 0.0607, "step": 236750 }, { "epoch": 91.95, "learning_rate": 7.740582524271847e-06, "loss": 0.0338, "step": 236760 }, { "epoch": 91.95, "learning_rate": 7.740064724919094e-06, "loss": 0.1021, "step": 236770 }, { "epoch": 91.95, "learning_rate": 7.739546925566344e-06, "loss": 0.0432, "step": 236780 }, { "epoch": 91.96, "learning_rate": 7.739029126213594e-06, "loss": 0.0145, "step": 236790 }, { "epoch": 91.96, "learning_rate": 7.738511326860843e-06, "loss": 0.0347, "step": 236800 }, { "epoch": 91.97, "learning_rate": 7.737993527508091e-06, "loss": 0.0034, "step": 236810 }, { "epoch": 91.97, "learning_rate": 7.73747572815534e-06, "loss": 0.0005, "step": 236820 }, { "epoch": 91.97, "learning_rate": 7.73695792880259e-06, "loss": 0.0709, "step": 236830 }, { "epoch": 91.98, "learning_rate": 7.73644012944984e-06, "loss": 0.0033, "step": 236840 }, { "epoch": 91.98, "learning_rate": 7.735922330097088e-06, "loss": 0.0981, "step": 236850 }, { "epoch": 91.98, "learning_rate": 7.735404530744338e-06, "loss": 0.1462, "step": 236860 }, { "epoch": 91.99, "learning_rate": 7.734886731391587e-06, "loss": 0.1284, "step": 236870 }, { "epoch": 91.99, "learning_rate": 7.734368932038835e-06, "loss": 0.0125, "step": 236880 }, { "epoch": 92.0, "learning_rate": 7.733851132686085e-06, "loss": 0.0002, "step": 236890 }, { "epoch": 92.0, "learning_rate": 7.733333333333334e-06, "loss": 0.0827, "step": 236900 }, { "epoch": 92.0, "eval_accuracy": 0.9485557083906465, "eval_loss": 0.36826103925704956, "eval_runtime": 8.2034, "eval_samples_per_second": 443.11, "eval_steps_per_second": 55.465, "step": 236900 }, { "epoch": 92.0, "learning_rate": 7.732815533980584e-06, "loss": 0.0974, "step": 236910 }, { "epoch": 92.01, "learning_rate": 7.732297734627832e-06, "loss": 0.0106, "step": 236920 }, { "epoch": 92.01, "learning_rate": 7.731779935275082e-06, "loss": 0.0321, "step": 236930 }, { "epoch": 92.02, "learning_rate": 7.731262135922331e-06, "loss": 0.0937, "step": 236940 }, { "epoch": 92.02, "learning_rate": 7.73074433656958e-06, "loss": 0.1883, "step": 236950 }, { "epoch": 92.02, "learning_rate": 7.730226537216829e-06, "loss": 0.1077, "step": 236960 }, { "epoch": 92.03, "learning_rate": 7.729708737864078e-06, "loss": 0.0939, "step": 236970 }, { "epoch": 92.03, "learning_rate": 7.729190938511328e-06, "loss": 0.0282, "step": 236980 }, { "epoch": 92.03, "learning_rate": 7.728673139158578e-06, "loss": 0.0423, "step": 236990 }, { "epoch": 92.04, "learning_rate": 7.728155339805825e-06, "loss": 0.0149, "step": 237000 }, { "epoch": 92.04, "learning_rate": 7.727637540453075e-06, "loss": 0.0824, "step": 237010 }, { "epoch": 92.05, "learning_rate": 7.727119741100325e-06, "loss": 0.2211, "step": 237020 }, { "epoch": 92.05, "learning_rate": 7.726601941747573e-06, "loss": 0.0225, "step": 237030 }, { "epoch": 92.05, "learning_rate": 7.726084142394822e-06, "loss": 0.0088, "step": 237040 }, { "epoch": 92.06, "learning_rate": 7.725566343042072e-06, "loss": 0.0897, "step": 237050 }, { "epoch": 92.06, "learning_rate": 7.725048543689322e-06, "loss": 0.001, "step": 237060 }, { "epoch": 92.07, "learning_rate": 7.72453074433657e-06, "loss": 0.0825, "step": 237070 }, { "epoch": 92.07, "learning_rate": 7.724012944983819e-06, "loss": 0.0379, "step": 237080 }, { "epoch": 92.07, "learning_rate": 7.723495145631069e-06, "loss": 0.1259, "step": 237090 }, { "epoch": 92.08, "learning_rate": 7.722977346278318e-06, "loss": 0.0326, "step": 237100 }, { "epoch": 92.08, "learning_rate": 7.722459546925566e-06, "loss": 0.0181, "step": 237110 }, { "epoch": 92.09, "learning_rate": 7.721941747572816e-06, "loss": 0.0864, "step": 237120 }, { "epoch": 92.09, "learning_rate": 7.721423948220065e-06, "loss": 0.0024, "step": 237130 }, { "epoch": 92.09, "learning_rate": 7.720906148867315e-06, "loss": 0.0174, "step": 237140 }, { "epoch": 92.1, "learning_rate": 7.720388349514563e-06, "loss": 0.0197, "step": 237150 }, { "epoch": 92.1, "learning_rate": 7.719870550161813e-06, "loss": 0.0565, "step": 237160 }, { "epoch": 92.1, "learning_rate": 7.719352750809062e-06, "loss": 0.0458, "step": 237170 }, { "epoch": 92.11, "learning_rate": 7.718834951456312e-06, "loss": 0.0213, "step": 237180 }, { "epoch": 92.11, "learning_rate": 7.71831715210356e-06, "loss": 0.1253, "step": 237190 }, { "epoch": 92.12, "learning_rate": 7.71779935275081e-06, "loss": 0.1019, "step": 237200 }, { "epoch": 92.12, "learning_rate": 7.717281553398059e-06, "loss": 0.0098, "step": 237210 }, { "epoch": 92.12, "learning_rate": 7.716763754045307e-06, "loss": 0.0894, "step": 237220 }, { "epoch": 92.13, "learning_rate": 7.716245954692557e-06, "loss": 0.0264, "step": 237230 }, { "epoch": 92.13, "learning_rate": 7.715728155339806e-06, "loss": 0.0449, "step": 237240 }, { "epoch": 92.14, "learning_rate": 7.715210355987056e-06, "loss": 0.0207, "step": 237250 }, { "epoch": 92.14, "learning_rate": 7.714692556634304e-06, "loss": 0.0313, "step": 237260 }, { "epoch": 92.14, "learning_rate": 7.714174757281553e-06, "loss": 0.0043, "step": 237270 }, { "epoch": 92.15, "learning_rate": 7.713656957928803e-06, "loss": 0.0114, "step": 237280 }, { "epoch": 92.15, "learning_rate": 7.713139158576053e-06, "loss": 0.0036, "step": 237290 }, { "epoch": 92.16, "learning_rate": 7.7126213592233e-06, "loss": 0.1209, "step": 237300 }, { "epoch": 92.16, "learning_rate": 7.71210355987055e-06, "loss": 0.0834, "step": 237310 }, { "epoch": 92.16, "learning_rate": 7.7115857605178e-06, "loss": 0.0004, "step": 237320 }, { "epoch": 92.17, "learning_rate": 7.71106796116505e-06, "loss": 0.0792, "step": 237330 }, { "epoch": 92.17, "learning_rate": 7.710550161812297e-06, "loss": 0.034, "step": 237340 }, { "epoch": 92.17, "learning_rate": 7.710032362459547e-06, "loss": 0.0698, "step": 237350 }, { "epoch": 92.18, "learning_rate": 7.709514563106796e-06, "loss": 0.001, "step": 237360 }, { "epoch": 92.18, "learning_rate": 7.708996763754046e-06, "loss": 0.0154, "step": 237370 }, { "epoch": 92.19, "learning_rate": 7.708478964401294e-06, "loss": 0.1349, "step": 237380 }, { "epoch": 92.19, "learning_rate": 7.707961165048544e-06, "loss": 0.0537, "step": 237390 }, { "epoch": 92.19, "learning_rate": 7.707443365695793e-06, "loss": 0.1664, "step": 237400 }, { "epoch": 92.2, "learning_rate": 7.706925566343043e-06, "loss": 0.0622, "step": 237410 }, { "epoch": 92.2, "learning_rate": 7.70640776699029e-06, "loss": 0.0015, "step": 237420 }, { "epoch": 92.21, "learning_rate": 7.70588996763754e-06, "loss": 0.0927, "step": 237430 }, { "epoch": 92.21, "learning_rate": 7.70537216828479e-06, "loss": 0.0009, "step": 237440 }, { "epoch": 92.21, "learning_rate": 7.70485436893204e-06, "loss": 0.092, "step": 237450 }, { "epoch": 92.22, "learning_rate": 7.704336569579288e-06, "loss": 0.1238, "step": 237460 }, { "epoch": 92.22, "learning_rate": 7.703818770226537e-06, "loss": 0.0139, "step": 237470 }, { "epoch": 92.23, "learning_rate": 7.703300970873787e-06, "loss": 0.0205, "step": 237480 }, { "epoch": 92.23, "learning_rate": 7.702783171521036e-06, "loss": 0.0041, "step": 237490 }, { "epoch": 92.23, "learning_rate": 7.702265372168284e-06, "loss": 0.0833, "step": 237500 }, { "epoch": 92.24, "learning_rate": 7.701747572815534e-06, "loss": 0.0208, "step": 237510 }, { "epoch": 92.24, "learning_rate": 7.701229773462784e-06, "loss": 0.0273, "step": 237520 }, { "epoch": 92.24, "learning_rate": 7.700711974110033e-06, "loss": 0.0794, "step": 237530 }, { "epoch": 92.25, "learning_rate": 7.700194174757281e-06, "loss": 0.0688, "step": 237540 }, { "epoch": 92.25, "learning_rate": 7.69967637540453e-06, "loss": 0.0875, "step": 237550 }, { "epoch": 92.26, "learning_rate": 7.69915857605178e-06, "loss": 0.0017, "step": 237560 }, { "epoch": 92.26, "learning_rate": 7.69864077669903e-06, "loss": 0.0368, "step": 237570 }, { "epoch": 92.26, "learning_rate": 7.698122977346278e-06, "loss": 0.0415, "step": 237580 }, { "epoch": 92.27, "learning_rate": 7.697605177993528e-06, "loss": 0.1454, "step": 237590 }, { "epoch": 92.27, "learning_rate": 7.697087378640777e-06, "loss": 0.0029, "step": 237600 }, { "epoch": 92.28, "learning_rate": 7.696569579288027e-06, "loss": 0.0267, "step": 237610 }, { "epoch": 92.28, "learning_rate": 7.696051779935275e-06, "loss": 0.0362, "step": 237620 }, { "epoch": 92.28, "learning_rate": 7.695533980582524e-06, "loss": 0.0457, "step": 237630 }, { "epoch": 92.29, "learning_rate": 7.695016181229774e-06, "loss": 0.0128, "step": 237640 }, { "epoch": 92.29, "learning_rate": 7.694498381877024e-06, "loss": 0.047, "step": 237650 }, { "epoch": 92.3, "learning_rate": 7.693980582524271e-06, "loss": 0.0306, "step": 237660 }, { "epoch": 92.3, "learning_rate": 7.693462783171521e-06, "loss": 0.2175, "step": 237670 }, { "epoch": 92.3, "learning_rate": 7.69294498381877e-06, "loss": 0.0173, "step": 237680 }, { "epoch": 92.31, "learning_rate": 7.69242718446602e-06, "loss": 0.0219, "step": 237690 }, { "epoch": 92.31, "learning_rate": 7.69190938511327e-06, "loss": 0.0889, "step": 237700 }, { "epoch": 92.31, "learning_rate": 7.691391585760518e-06, "loss": 0.0869, "step": 237710 }, { "epoch": 92.32, "learning_rate": 7.690873786407767e-06, "loss": 0.0708, "step": 237720 }, { "epoch": 92.32, "learning_rate": 7.690355987055017e-06, "loss": 0.1184, "step": 237730 }, { "epoch": 92.33, "learning_rate": 7.689838187702267e-06, "loss": 0.0523, "step": 237740 }, { "epoch": 92.33, "learning_rate": 7.689320388349515e-06, "loss": 0.1246, "step": 237750 }, { "epoch": 92.33, "learning_rate": 7.688802588996764e-06, "loss": 0.0025, "step": 237760 }, { "epoch": 92.34, "learning_rate": 7.688284789644014e-06, "loss": 0.0921, "step": 237770 }, { "epoch": 92.34, "learning_rate": 7.687766990291264e-06, "loss": 0.0325, "step": 237780 }, { "epoch": 92.35, "learning_rate": 7.687249190938511e-06, "loss": 0.0609, "step": 237790 }, { "epoch": 92.35, "learning_rate": 7.686731391585761e-06, "loss": 0.0473, "step": 237800 }, { "epoch": 92.35, "learning_rate": 7.68621359223301e-06, "loss": 0.0608, "step": 237810 }, { "epoch": 92.36, "learning_rate": 7.68569579288026e-06, "loss": 0.0321, "step": 237820 }, { "epoch": 92.36, "learning_rate": 7.685177993527508e-06, "loss": 0.0532, "step": 237830 }, { "epoch": 92.37, "learning_rate": 7.684660194174758e-06, "loss": 0.1558, "step": 237840 }, { "epoch": 92.37, "learning_rate": 7.684142394822007e-06, "loss": 0.0567, "step": 237850 }, { "epoch": 92.37, "learning_rate": 7.683624595469257e-06, "loss": 0.1126, "step": 237860 }, { "epoch": 92.38, "learning_rate": 7.683106796116505e-06, "loss": 0.011, "step": 237870 }, { "epoch": 92.38, "learning_rate": 7.682588996763755e-06, "loss": 0.005, "step": 237880 }, { "epoch": 92.38, "learning_rate": 7.682071197411004e-06, "loss": 0.0134, "step": 237890 }, { "epoch": 92.39, "learning_rate": 7.681553398058254e-06, "loss": 0.0311, "step": 237900 }, { "epoch": 92.39, "learning_rate": 7.681035598705502e-06, "loss": 0.1547, "step": 237910 }, { "epoch": 92.4, "learning_rate": 7.680517799352751e-06, "loss": 0.046, "step": 237920 }, { "epoch": 92.4, "learning_rate": 7.680000000000001e-06, "loss": 0.1096, "step": 237930 }, { "epoch": 92.4, "learning_rate": 7.67948220064725e-06, "loss": 0.0911, "step": 237940 }, { "epoch": 92.41, "learning_rate": 7.678964401294499e-06, "loss": 0.0146, "step": 237950 }, { "epoch": 92.41, "learning_rate": 7.678446601941748e-06, "loss": 0.0572, "step": 237960 }, { "epoch": 92.42, "learning_rate": 7.677928802588998e-06, "loss": 0.0707, "step": 237970 }, { "epoch": 92.42, "learning_rate": 7.677411003236247e-06, "loss": 0.0534, "step": 237980 }, { "epoch": 92.42, "learning_rate": 7.676893203883495e-06, "loss": 0.096, "step": 237990 }, { "epoch": 92.43, "learning_rate": 7.676375404530745e-06, "loss": 0.0756, "step": 238000 }, { "epoch": 92.43, "learning_rate": 7.675857605177995e-06, "loss": 0.1056, "step": 238010 }, { "epoch": 92.43, "learning_rate": 7.675339805825244e-06, "loss": 0.0636, "step": 238020 }, { "epoch": 92.44, "learning_rate": 7.674822006472492e-06, "loss": 0.1009, "step": 238030 }, { "epoch": 92.44, "learning_rate": 7.674304207119742e-06, "loss": 0.0944, "step": 238040 }, { "epoch": 92.45, "learning_rate": 7.673786407766991e-06, "loss": 0.1125, "step": 238050 }, { "epoch": 92.45, "learning_rate": 7.673268608414241e-06, "loss": 0.0968, "step": 238060 }, { "epoch": 92.45, "learning_rate": 7.672750809061489e-06, "loss": 0.039, "step": 238070 }, { "epoch": 92.46, "learning_rate": 7.672233009708739e-06, "loss": 0.0399, "step": 238080 }, { "epoch": 92.46, "learning_rate": 7.671715210355988e-06, "loss": 0.0108, "step": 238090 }, { "epoch": 92.47, "learning_rate": 7.671197411003238e-06, "loss": 0.1491, "step": 238100 }, { "epoch": 92.47, "learning_rate": 7.670679611650486e-06, "loss": 0.0402, "step": 238110 }, { "epoch": 92.47, "learning_rate": 7.670161812297735e-06, "loss": 0.0495, "step": 238120 }, { "epoch": 92.48, "learning_rate": 7.669644012944985e-06, "loss": 0.0005, "step": 238130 }, { "epoch": 92.48, "learning_rate": 7.669126213592235e-06, "loss": 0.0387, "step": 238140 }, { "epoch": 92.49, "learning_rate": 7.668608414239482e-06, "loss": 0.0668, "step": 238150 }, { "epoch": 92.49, "learning_rate": 7.668090614886732e-06, "loss": 0.0071, "step": 238160 }, { "epoch": 92.49, "learning_rate": 7.667572815533982e-06, "loss": 0.0795, "step": 238170 }, { "epoch": 92.5, "learning_rate": 7.667055016181231e-06, "loss": 0.0007, "step": 238180 }, { "epoch": 92.5, "learning_rate": 7.66653721682848e-06, "loss": 0.0419, "step": 238190 }, { "epoch": 92.5, "learning_rate": 7.666019417475729e-06, "loss": 0.0694, "step": 238200 }, { "epoch": 92.51, "learning_rate": 7.665501618122978e-06, "loss": 0.0203, "step": 238210 }, { "epoch": 92.51, "learning_rate": 7.664983818770228e-06, "loss": 0.0906, "step": 238220 }, { "epoch": 92.52, "learning_rate": 7.664466019417476e-06, "loss": 0.0057, "step": 238230 }, { "epoch": 92.52, "learning_rate": 7.663948220064726e-06, "loss": 0.0059, "step": 238240 }, { "epoch": 92.52, "learning_rate": 7.663430420711975e-06, "loss": 0.0015, "step": 238250 }, { "epoch": 92.53, "learning_rate": 7.662912621359225e-06, "loss": 0.1447, "step": 238260 }, { "epoch": 92.53, "learning_rate": 7.662394822006474e-06, "loss": 0.1366, "step": 238270 }, { "epoch": 92.54, "learning_rate": 7.661877022653722e-06, "loss": 0.061, "step": 238280 }, { "epoch": 92.54, "learning_rate": 7.661359223300972e-06, "loss": 0.0374, "step": 238290 }, { "epoch": 92.54, "learning_rate": 7.660841423948222e-06, "loss": 0.1163, "step": 238300 }, { "epoch": 92.55, "learning_rate": 7.660323624595471e-06, "loss": 0.1245, "step": 238310 }, { "epoch": 92.55, "learning_rate": 7.65980582524272e-06, "loss": 0.1311, "step": 238320 }, { "epoch": 92.56, "learning_rate": 7.659288025889969e-06, "loss": 0.1952, "step": 238330 }, { "epoch": 92.56, "learning_rate": 7.658770226537218e-06, "loss": 0.0376, "step": 238340 }, { "epoch": 92.56, "learning_rate": 7.658252427184466e-06, "loss": 0.0172, "step": 238350 }, { "epoch": 92.57, "learning_rate": 7.657734627831716e-06, "loss": 0.0108, "step": 238360 }, { "epoch": 92.57, "learning_rate": 7.657216828478966e-06, "loss": 0.0301, "step": 238370 }, { "epoch": 92.57, "learning_rate": 7.656699029126215e-06, "loss": 0.0289, "step": 238380 }, { "epoch": 92.58, "learning_rate": 7.656181229773463e-06, "loss": 0.0492, "step": 238390 }, { "epoch": 92.58, "learning_rate": 7.655663430420713e-06, "loss": 0.0005, "step": 238400 }, { "epoch": 92.59, "learning_rate": 7.655145631067962e-06, "loss": 0.0277, "step": 238410 }, { "epoch": 92.59, "learning_rate": 7.654627831715212e-06, "loss": 0.0508, "step": 238420 }, { "epoch": 92.59, "learning_rate": 7.65411003236246e-06, "loss": 0.106, "step": 238430 }, { "epoch": 92.6, "learning_rate": 7.65359223300971e-06, "loss": 0.0622, "step": 238440 }, { "epoch": 92.6, "learning_rate": 7.653074433656959e-06, "loss": 0.0002, "step": 238450 }, { "epoch": 92.61, "learning_rate": 7.652556634304209e-06, "loss": 0.0129, "step": 238460 }, { "epoch": 92.61, "learning_rate": 7.652038834951457e-06, "loss": 0.0739, "step": 238470 }, { "epoch": 92.61, "learning_rate": 7.651521035598706e-06, "loss": 0.0244, "step": 238480 }, { "epoch": 92.62, "learning_rate": 7.651003236245956e-06, "loss": 0.0608, "step": 238490 }, { "epoch": 92.62, "learning_rate": 7.650485436893204e-06, "loss": 0.0017, "step": 238500 }, { "epoch": 92.63, "learning_rate": 7.649967637540453e-06, "loss": 0.0221, "step": 238510 }, { "epoch": 92.63, "learning_rate": 7.649449838187703e-06, "loss": 0.0659, "step": 238520 }, { "epoch": 92.63, "learning_rate": 7.648932038834953e-06, "loss": 0.0827, "step": 238530 }, { "epoch": 92.64, "learning_rate": 7.6484142394822e-06, "loss": 0.0669, "step": 238540 }, { "epoch": 92.64, "learning_rate": 7.64789644012945e-06, "loss": 0.0288, "step": 238550 }, { "epoch": 92.64, "learning_rate": 7.6473786407767e-06, "loss": 0.0187, "step": 238560 }, { "epoch": 92.65, "learning_rate": 7.64686084142395e-06, "loss": 0.0985, "step": 238570 }, { "epoch": 92.65, "learning_rate": 7.646343042071197e-06, "loss": 0.0005, "step": 238580 }, { "epoch": 92.66, "learning_rate": 7.645825242718447e-06, "loss": 0.0323, "step": 238590 }, { "epoch": 92.66, "learning_rate": 7.645307443365697e-06, "loss": 0.0205, "step": 238600 }, { "epoch": 92.66, "learning_rate": 7.644789644012946e-06, "loss": 0.0677, "step": 238610 }, { "epoch": 92.67, "learning_rate": 7.644271844660194e-06, "loss": 0.0007, "step": 238620 }, { "epoch": 92.67, "learning_rate": 7.643754045307444e-06, "loss": 0.0554, "step": 238630 }, { "epoch": 92.68, "learning_rate": 7.643236245954693e-06, "loss": 0.0004, "step": 238640 }, { "epoch": 92.68, "learning_rate": 7.642718446601943e-06, "loss": 0.0195, "step": 238650 }, { "epoch": 92.68, "learning_rate": 7.642200647249191e-06, "loss": 0.1397, "step": 238660 }, { "epoch": 92.69, "learning_rate": 7.64168284789644e-06, "loss": 0.0809, "step": 238670 }, { "epoch": 92.69, "learning_rate": 7.64116504854369e-06, "loss": 0.0303, "step": 238680 }, { "epoch": 92.7, "learning_rate": 7.640647249190938e-06, "loss": 0.0091, "step": 238690 }, { "epoch": 92.7, "learning_rate": 7.640129449838188e-06, "loss": 0.0639, "step": 238700 }, { "epoch": 92.7, "learning_rate": 7.639611650485437e-06, "loss": 0.0002, "step": 238710 }, { "epoch": 92.71, "learning_rate": 7.639093851132687e-06, "loss": 0.0669, "step": 238720 }, { "epoch": 92.71, "learning_rate": 7.638576051779935e-06, "loss": 0.1333, "step": 238730 }, { "epoch": 92.71, "learning_rate": 7.638058252427184e-06, "loss": 0.003, "step": 238740 }, { "epoch": 92.72, "learning_rate": 7.637540453074434e-06, "loss": 0.0119, "step": 238750 }, { "epoch": 92.72, "learning_rate": 7.637022653721684e-06, "loss": 0.1363, "step": 238760 }, { "epoch": 92.73, "learning_rate": 7.636504854368932e-06, "loss": 0.1757, "step": 238770 }, { "epoch": 92.73, "learning_rate": 7.635987055016181e-06, "loss": 0.1835, "step": 238780 }, { "epoch": 92.73, "learning_rate": 7.635469255663431e-06, "loss": 0.1767, "step": 238790 }, { "epoch": 92.74, "learning_rate": 7.63495145631068e-06, "loss": 0.0853, "step": 238800 }, { "epoch": 92.74, "learning_rate": 7.634433656957928e-06, "loss": 0.0384, "step": 238810 }, { "epoch": 92.75, "learning_rate": 7.633915857605178e-06, "loss": 0.0187, "step": 238820 }, { "epoch": 92.75, "learning_rate": 7.633398058252428e-06, "loss": 0.0313, "step": 238830 }, { "epoch": 92.75, "learning_rate": 7.632880258899677e-06, "loss": 0.1071, "step": 238840 }, { "epoch": 92.76, "learning_rate": 7.632362459546925e-06, "loss": 0.0627, "step": 238850 }, { "epoch": 92.76, "learning_rate": 7.631844660194175e-06, "loss": 0.0168, "step": 238860 }, { "epoch": 92.77, "learning_rate": 7.631326860841424e-06, "loss": 0.0971, "step": 238870 }, { "epoch": 92.77, "learning_rate": 7.630809061488674e-06, "loss": 0.0023, "step": 238880 }, { "epoch": 92.77, "learning_rate": 7.630291262135922e-06, "loss": 0.106, "step": 238890 }, { "epoch": 92.78, "learning_rate": 7.629773462783172e-06, "loss": 0.0364, "step": 238900 }, { "epoch": 92.78, "learning_rate": 7.629255663430421e-06, "loss": 0.0457, "step": 238910 }, { "epoch": 92.78, "learning_rate": 7.628737864077671e-06, "loss": 0.1308, "step": 238920 }, { "epoch": 92.79, "learning_rate": 7.62822006472492e-06, "loss": 0.0657, "step": 238930 }, { "epoch": 92.79, "learning_rate": 7.627702265372169e-06, "loss": 0.0876, "step": 238940 }, { "epoch": 92.8, "learning_rate": 7.627184466019418e-06, "loss": 0.0063, "step": 238950 }, { "epoch": 92.8, "learning_rate": 7.626666666666668e-06, "loss": 0.1407, "step": 238960 }, { "epoch": 92.8, "learning_rate": 7.626148867313916e-06, "loss": 0.1009, "step": 238970 }, { "epoch": 92.81, "learning_rate": 7.625631067961166e-06, "loss": 0.0758, "step": 238980 }, { "epoch": 92.81, "learning_rate": 7.625113268608415e-06, "loss": 0.0229, "step": 238990 }, { "epoch": 92.82, "learning_rate": 7.624595469255664e-06, "loss": 0.0026, "step": 239000 }, { "epoch": 92.82, "learning_rate": 7.624077669902913e-06, "loss": 0.0155, "step": 239010 }, { "epoch": 92.82, "learning_rate": 7.623559870550162e-06, "loss": 0.0131, "step": 239020 }, { "epoch": 92.83, "learning_rate": 7.6230420711974116e-06, "loss": 0.0801, "step": 239030 }, { "epoch": 92.83, "learning_rate": 7.622524271844661e-06, "loss": 0.1329, "step": 239040 }, { "epoch": 92.83, "learning_rate": 7.62200647249191e-06, "loss": 0.1669, "step": 239050 }, { "epoch": 92.84, "learning_rate": 7.621488673139159e-06, "loss": 0.0315, "step": 239060 }, { "epoch": 92.84, "learning_rate": 7.620970873786408e-06, "loss": 0.0436, "step": 239070 }, { "epoch": 92.85, "learning_rate": 7.620453074433658e-06, "loss": 0.0561, "step": 239080 }, { "epoch": 92.85, "learning_rate": 7.619935275080907e-06, "loss": 0.0783, "step": 239090 }, { "epoch": 92.85, "learning_rate": 7.6194174757281555e-06, "loss": 0.0005, "step": 239100 }, { "epoch": 92.86, "learning_rate": 7.618899676375405e-06, "loss": 0.0796, "step": 239110 }, { "epoch": 92.86, "learning_rate": 7.618381877022655e-06, "loss": 0.0023, "step": 239120 }, { "epoch": 92.87, "learning_rate": 7.6178640776699035e-06, "loss": 0.0733, "step": 239130 }, { "epoch": 92.87, "learning_rate": 7.617346278317152e-06, "loss": 0.0119, "step": 239140 }, { "epoch": 92.87, "learning_rate": 7.616828478964402e-06, "loss": 0.0239, "step": 239150 }, { "epoch": 92.88, "learning_rate": 7.6163106796116515e-06, "loss": 0.0402, "step": 239160 }, { "epoch": 92.88, "learning_rate": 7.6157928802588994e-06, "loss": 0.0839, "step": 239170 }, { "epoch": 92.89, "learning_rate": 7.615275080906149e-06, "loss": 0.0443, "step": 239180 }, { "epoch": 92.89, "learning_rate": 7.614757281553399e-06, "loss": 0.1275, "step": 239190 }, { "epoch": 92.89, "learning_rate": 7.614239482200648e-06, "loss": 0.042, "step": 239200 }, { "epoch": 92.9, "learning_rate": 7.613721682847896e-06, "loss": 0.0322, "step": 239210 }, { "epoch": 92.9, "learning_rate": 7.613203883495146e-06, "loss": 0.0338, "step": 239220 }, { "epoch": 92.9, "learning_rate": 7.6126860841423954e-06, "loss": 0.0064, "step": 239230 }, { "epoch": 92.91, "learning_rate": 7.612168284789645e-06, "loss": 0.0434, "step": 239240 }, { "epoch": 92.91, "learning_rate": 7.611650485436893e-06, "loss": 0.0641, "step": 239250 }, { "epoch": 92.92, "learning_rate": 7.611132686084143e-06, "loss": 0.1536, "step": 239260 }, { "epoch": 92.92, "learning_rate": 7.610614886731392e-06, "loss": 0.0675, "step": 239270 }, { "epoch": 92.92, "learning_rate": 7.610097087378642e-06, "loss": 0.1626, "step": 239280 }, { "epoch": 92.93, "learning_rate": 7.60957928802589e-06, "loss": 0.017, "step": 239290 }, { "epoch": 92.93, "learning_rate": 7.609061488673139e-06, "loss": 0.0004, "step": 239300 }, { "epoch": 92.94, "learning_rate": 7.608543689320389e-06, "loss": 0.1239, "step": 239310 }, { "epoch": 92.94, "learning_rate": 7.608025889967639e-06, "loss": 0.1049, "step": 239320 }, { "epoch": 92.94, "learning_rate": 7.6075080906148865e-06, "loss": 0.0223, "step": 239330 }, { "epoch": 92.95, "learning_rate": 7.606990291262136e-06, "loss": 0.1044, "step": 239340 }, { "epoch": 92.95, "learning_rate": 7.606472491909386e-06, "loss": 0.0002, "step": 239350 }, { "epoch": 92.96, "learning_rate": 7.605954692556635e-06, "loss": 0.0946, "step": 239360 }, { "epoch": 92.96, "learning_rate": 7.605436893203883e-06, "loss": 0.0798, "step": 239370 }, { "epoch": 92.96, "learning_rate": 7.604919093851133e-06, "loss": 0.0269, "step": 239380 }, { "epoch": 92.97, "learning_rate": 7.6044012944983826e-06, "loss": 0.0575, "step": 239390 }, { "epoch": 92.97, "learning_rate": 7.603883495145632e-06, "loss": 0.1086, "step": 239400 }, { "epoch": 92.97, "learning_rate": 7.603365695792882e-06, "loss": 0.0727, "step": 239410 }, { "epoch": 92.98, "learning_rate": 7.60284789644013e-06, "loss": 0.0003, "step": 239420 }, { "epoch": 92.98, "learning_rate": 7.602330097087379e-06, "loss": 0.2123, "step": 239430 }, { "epoch": 92.99, "learning_rate": 7.601812297734629e-06, "loss": 0.0421, "step": 239440 }, { "epoch": 92.99, "learning_rate": 7.6012944983818786e-06, "loss": 0.0249, "step": 239450 }, { "epoch": 92.99, "learning_rate": 7.6007766990291265e-06, "loss": 0.0528, "step": 239460 }, { "epoch": 93.0, "learning_rate": 7.600258899676376e-06, "loss": 0.0799, "step": 239470 }, { "epoch": 93.0, "eval_accuracy": 0.951031636863824, "eval_loss": 0.3666875958442688, "eval_runtime": 8.1844, "eval_samples_per_second": 444.135, "eval_steps_per_second": 55.593, "step": 239475 }, { "epoch": 93.0, "learning_rate": 7.599741100323626e-06, "loss": 0.0847, "step": 239480 }, { "epoch": 93.01, "learning_rate": 7.5992233009708745e-06, "loss": 0.0177, "step": 239490 }, { "epoch": 93.01, "learning_rate": 7.598705501618123e-06, "loss": 0.0653, "step": 239500 }, { "epoch": 93.01, "learning_rate": 7.598187702265373e-06, "loss": 0.0104, "step": 239510 }, { "epoch": 93.02, "learning_rate": 7.5976699029126225e-06, "loss": 0.155, "step": 239520 }, { "epoch": 93.02, "learning_rate": 7.597152103559871e-06, "loss": 0.0309, "step": 239530 }, { "epoch": 93.03, "learning_rate": 7.59663430420712e-06, "loss": 0.022, "step": 239540 }, { "epoch": 93.03, "learning_rate": 7.59611650485437e-06, "loss": 0.0003, "step": 239550 }, { "epoch": 93.03, "learning_rate": 7.595598705501619e-06, "loss": 0.091, "step": 239560 }, { "epoch": 93.04, "learning_rate": 7.595080906148868e-06, "loss": 0.0241, "step": 239570 }, { "epoch": 93.04, "learning_rate": 7.594563106796117e-06, "loss": 0.0645, "step": 239580 }, { "epoch": 93.04, "learning_rate": 7.5940453074433665e-06, "loss": 0.0503, "step": 239590 }, { "epoch": 93.05, "learning_rate": 7.593527508090616e-06, "loss": 0.0121, "step": 239600 }, { "epoch": 93.05, "learning_rate": 7.593009708737865e-06, "loss": 0.0461, "step": 239610 }, { "epoch": 93.06, "learning_rate": 7.592491909385114e-06, "loss": 0.009, "step": 239620 }, { "epoch": 93.06, "learning_rate": 7.591974110032363e-06, "loss": 0.0636, "step": 239630 }, { "epoch": 93.06, "learning_rate": 7.591456310679612e-06, "loss": 0.0899, "step": 239640 }, { "epoch": 93.07, "learning_rate": 7.590938511326862e-06, "loss": 0.0886, "step": 239650 }, { "epoch": 93.07, "learning_rate": 7.59042071197411e-06, "loss": 0.0136, "step": 239660 }, { "epoch": 93.08, "learning_rate": 7.58990291262136e-06, "loss": 0.1004, "step": 239670 }, { "epoch": 93.08, "learning_rate": 7.589385113268609e-06, "loss": 0.0558, "step": 239680 }, { "epoch": 93.08, "learning_rate": 7.588867313915858e-06, "loss": 0.0874, "step": 239690 }, { "epoch": 93.09, "learning_rate": 7.588349514563107e-06, "loss": 0.0249, "step": 239700 }, { "epoch": 93.09, "learning_rate": 7.587831715210357e-06, "loss": 0.0273, "step": 239710 }, { "epoch": 93.1, "learning_rate": 7.5873139158576056e-06, "loss": 0.0765, "step": 239720 }, { "epoch": 93.1, "learning_rate": 7.586796116504855e-06, "loss": 0.0842, "step": 239730 }, { "epoch": 93.1, "learning_rate": 7.586278317152104e-06, "loss": 0.019, "step": 239740 }, { "epoch": 93.11, "learning_rate": 7.5857605177993536e-06, "loss": 0.0001, "step": 239750 }, { "epoch": 93.11, "learning_rate": 7.585242718446602e-06, "loss": 0.1205, "step": 239760 }, { "epoch": 93.11, "learning_rate": 7.584724919093852e-06, "loss": 0.0064, "step": 239770 }, { "epoch": 93.12, "learning_rate": 7.584207119741101e-06, "loss": 0.0874, "step": 239780 }, { "epoch": 93.12, "learning_rate": 7.58368932038835e-06, "loss": 0.0045, "step": 239790 }, { "epoch": 93.13, "learning_rate": 7.583171521035599e-06, "loss": 0.088, "step": 239800 }, { "epoch": 93.13, "learning_rate": 7.582653721682849e-06, "loss": 0.2661, "step": 239810 }, { "epoch": 93.13, "learning_rate": 7.5821359223300975e-06, "loss": 0.0294, "step": 239820 }, { "epoch": 93.14, "learning_rate": 7.581618122977346e-06, "loss": 0.0255, "step": 239830 }, { "epoch": 93.14, "learning_rate": 7.581100323624596e-06, "loss": 0.0162, "step": 239840 }, { "epoch": 93.15, "learning_rate": 7.5805825242718455e-06, "loss": 0.0377, "step": 239850 }, { "epoch": 93.15, "learning_rate": 7.580064724919094e-06, "loss": 0.0382, "step": 239860 }, { "epoch": 93.15, "learning_rate": 7.579546925566343e-06, "loss": 0.0008, "step": 239870 }, { "epoch": 93.16, "learning_rate": 7.579029126213593e-06, "loss": 0.0974, "step": 239880 }, { "epoch": 93.16, "learning_rate": 7.578511326860842e-06, "loss": 0.0823, "step": 239890 }, { "epoch": 93.17, "learning_rate": 7.577993527508091e-06, "loss": 0.1039, "step": 239900 }, { "epoch": 93.17, "learning_rate": 7.57747572815534e-06, "loss": 0.1412, "step": 239910 }, { "epoch": 93.17, "learning_rate": 7.5769579288025894e-06, "loss": 0.0023, "step": 239920 }, { "epoch": 93.18, "learning_rate": 7.576440129449839e-06, "loss": 0.1495, "step": 239930 }, { "epoch": 93.18, "learning_rate": 7.575922330097089e-06, "loss": 0.3411, "step": 239940 }, { "epoch": 93.18, "learning_rate": 7.575404530744337e-06, "loss": 0.0223, "step": 239950 }, { "epoch": 93.19, "learning_rate": 7.574886731391586e-06, "loss": 0.0142, "step": 239960 }, { "epoch": 93.19, "learning_rate": 7.574368932038836e-06, "loss": 0.0438, "step": 239970 }, { "epoch": 93.2, "learning_rate": 7.5738511326860855e-06, "loss": 0.1137, "step": 239980 }, { "epoch": 93.2, "learning_rate": 7.573333333333333e-06, "loss": 0.0114, "step": 239990 }, { "epoch": 93.2, "learning_rate": 7.572815533980583e-06, "loss": 0.1191, "step": 240000 }, { "epoch": 93.21, "learning_rate": 7.572297734627833e-06, "loss": 0.1699, "step": 240010 }, { "epoch": 93.21, "learning_rate": 7.571779935275082e-06, "loss": 0.0154, "step": 240020 }, { "epoch": 93.22, "learning_rate": 7.57126213592233e-06, "loss": 0.0317, "step": 240030 }, { "epoch": 93.22, "learning_rate": 7.57074433656958e-06, "loss": 0.1658, "step": 240040 }, { "epoch": 93.22, "learning_rate": 7.570226537216829e-06, "loss": 0.1383, "step": 240050 }, { "epoch": 93.23, "learning_rate": 7.569708737864079e-06, "loss": 0.0081, "step": 240060 }, { "epoch": 93.23, "learning_rate": 7.569190938511327e-06, "loss": 0.0856, "step": 240070 }, { "epoch": 93.23, "learning_rate": 7.5686731391585766e-06, "loss": 0.0685, "step": 240080 }, { "epoch": 93.24, "learning_rate": 7.568155339805826e-06, "loss": 0.098, "step": 240090 }, { "epoch": 93.24, "learning_rate": 7.567637540453076e-06, "loss": 0.0179, "step": 240100 }, { "epoch": 93.25, "learning_rate": 7.567119741100324e-06, "loss": 0.0124, "step": 240110 }, { "epoch": 93.25, "learning_rate": 7.566601941747573e-06, "loss": 0.0172, "step": 240120 }, { "epoch": 93.25, "learning_rate": 7.566084142394823e-06, "loss": 0.0002, "step": 240130 }, { "epoch": 93.26, "learning_rate": 7.5655663430420726e-06, "loss": 0.012, "step": 240140 }, { "epoch": 93.26, "learning_rate": 7.5650485436893205e-06, "loss": 0.0619, "step": 240150 }, { "epoch": 93.27, "learning_rate": 7.56453074433657e-06, "loss": 0.1086, "step": 240160 }, { "epoch": 93.27, "learning_rate": 7.56401294498382e-06, "loss": 0.0752, "step": 240170 }, { "epoch": 93.27, "learning_rate": 7.563495145631069e-06, "loss": 0.0002, "step": 240180 }, { "epoch": 93.28, "learning_rate": 7.562977346278317e-06, "loss": 0.1772, "step": 240190 }, { "epoch": 93.28, "learning_rate": 7.562459546925567e-06, "loss": 0.0747, "step": 240200 }, { "epoch": 93.29, "learning_rate": 7.5619417475728165e-06, "loss": 0.014, "step": 240210 }, { "epoch": 93.29, "learning_rate": 7.561423948220066e-06, "loss": 0.0308, "step": 240220 }, { "epoch": 93.29, "learning_rate": 7.560906148867314e-06, "loss": 0.0016, "step": 240230 }, { "epoch": 93.3, "learning_rate": 7.560388349514564e-06, "loss": 0.0352, "step": 240240 }, { "epoch": 93.3, "learning_rate": 7.559870550161813e-06, "loss": 0.1753, "step": 240250 }, { "epoch": 93.3, "learning_rate": 7.559352750809063e-06, "loss": 0.1696, "step": 240260 }, { "epoch": 93.31, "learning_rate": 7.558834951456311e-06, "loss": 0.015, "step": 240270 }, { "epoch": 93.31, "learning_rate": 7.5583171521035605e-06, "loss": 0.0003, "step": 240280 }, { "epoch": 93.32, "learning_rate": 7.55779935275081e-06, "loss": 0.0192, "step": 240290 }, { "epoch": 93.32, "learning_rate": 7.557281553398059e-06, "loss": 0.0031, "step": 240300 }, { "epoch": 93.32, "learning_rate": 7.556763754045308e-06, "loss": 0.0145, "step": 240310 }, { "epoch": 93.33, "learning_rate": 7.556245954692557e-06, "loss": 0.0267, "step": 240320 }, { "epoch": 93.33, "learning_rate": 7.555728155339807e-06, "loss": 0.1786, "step": 240330 }, { "epoch": 93.34, "learning_rate": 7.555210355987056e-06, "loss": 0.0021, "step": 240340 }, { "epoch": 93.34, "learning_rate": 7.554692556634304e-06, "loss": 0.0002, "step": 240350 }, { "epoch": 93.34, "learning_rate": 7.554174757281554e-06, "loss": 0.042, "step": 240360 }, { "epoch": 93.35, "learning_rate": 7.553656957928804e-06, "loss": 0.0099, "step": 240370 }, { "epoch": 93.35, "learning_rate": 7.553139158576052e-06, "loss": 0.0591, "step": 240380 }, { "epoch": 93.36, "learning_rate": 7.552621359223301e-06, "loss": 0.1944, "step": 240390 }, { "epoch": 93.36, "learning_rate": 7.552103559870551e-06, "loss": 0.0894, "step": 240400 }, { "epoch": 93.36, "learning_rate": 7.5515857605178e-06, "loss": 0.0043, "step": 240410 }, { "epoch": 93.37, "learning_rate": 7.551067961165049e-06, "loss": 0.0029, "step": 240420 }, { "epoch": 93.37, "learning_rate": 7.550550161812298e-06, "loss": 0.0022, "step": 240430 }, { "epoch": 93.37, "learning_rate": 7.5500323624595476e-06, "loss": 0.0932, "step": 240440 }, { "epoch": 93.38, "learning_rate": 7.549514563106796e-06, "loss": 0.0232, "step": 240450 }, { "epoch": 93.38, "learning_rate": 7.548996763754046e-06, "loss": 0.0162, "step": 240460 }, { "epoch": 93.39, "learning_rate": 7.548478964401295e-06, "loss": 0.1003, "step": 240470 }, { "epoch": 93.39, "learning_rate": 7.547961165048544e-06, "loss": 0.0134, "step": 240480 }, { "epoch": 93.39, "learning_rate": 7.547443365695793e-06, "loss": 0.0194, "step": 240490 }, { "epoch": 93.4, "learning_rate": 7.546925566343043e-06, "loss": 0.077, "step": 240500 }, { "epoch": 93.4, "learning_rate": 7.546407766990292e-06, "loss": 0.0186, "step": 240510 }, { "epoch": 93.41, "learning_rate": 7.545889967637541e-06, "loss": 0.0992, "step": 240520 }, { "epoch": 93.41, "learning_rate": 7.54537216828479e-06, "loss": 0.0529, "step": 240530 }, { "epoch": 93.41, "learning_rate": 7.5448543689320395e-06, "loss": 0.0242, "step": 240540 }, { "epoch": 93.42, "learning_rate": 7.544336569579289e-06, "loss": 0.0992, "step": 240550 }, { "epoch": 93.42, "learning_rate": 7.543818770226538e-06, "loss": 0.01, "step": 240560 }, { "epoch": 93.43, "learning_rate": 7.543300970873787e-06, "loss": 0.0235, "step": 240570 }, { "epoch": 93.43, "learning_rate": 7.542783171521036e-06, "loss": 0.0125, "step": 240580 }, { "epoch": 93.43, "learning_rate": 7.542265372168286e-06, "loss": 0.01, "step": 240590 }, { "epoch": 93.44, "learning_rate": 7.541747572815535e-06, "loss": 0.084, "step": 240600 }, { "epoch": 93.44, "learning_rate": 7.5412297734627834e-06, "loss": 0.2215, "step": 240610 }, { "epoch": 93.44, "learning_rate": 7.540711974110033e-06, "loss": 0.0557, "step": 240620 }, { "epoch": 93.45, "learning_rate": 7.540194174757283e-06, "loss": 0.0161, "step": 240630 }, { "epoch": 93.45, "learning_rate": 7.539676375404531e-06, "loss": 0.0151, "step": 240640 }, { "epoch": 93.46, "learning_rate": 7.53915857605178e-06, "loss": 0.0087, "step": 240650 }, { "epoch": 93.46, "learning_rate": 7.53864077669903e-06, "loss": 0.0725, "step": 240660 }, { "epoch": 93.46, "learning_rate": 7.5381229773462795e-06, "loss": 0.1285, "step": 240670 }, { "epoch": 93.47, "learning_rate": 7.537605177993527e-06, "loss": 0.0007, "step": 240680 }, { "epoch": 93.47, "learning_rate": 7.537087378640777e-06, "loss": 0.0937, "step": 240690 }, { "epoch": 93.48, "learning_rate": 7.536569579288027e-06, "loss": 0.0471, "step": 240700 }, { "epoch": 93.48, "learning_rate": 7.536051779935276e-06, "loss": 0.0323, "step": 240710 }, { "epoch": 93.48, "learning_rate": 7.535533980582524e-06, "loss": 0.0824, "step": 240720 }, { "epoch": 93.49, "learning_rate": 7.535016181229774e-06, "loss": 0.0074, "step": 240730 }, { "epoch": 93.49, "learning_rate": 7.534498381877023e-06, "loss": 0.0254, "step": 240740 }, { "epoch": 93.5, "learning_rate": 7.533980582524273e-06, "loss": 0.0317, "step": 240750 }, { "epoch": 93.5, "learning_rate": 7.533462783171521e-06, "loss": 0.1572, "step": 240760 }, { "epoch": 93.5, "learning_rate": 7.5329449838187706e-06, "loss": 0.0002, "step": 240770 }, { "epoch": 93.51, "learning_rate": 7.53242718446602e-06, "loss": 0.0099, "step": 240780 }, { "epoch": 93.51, "learning_rate": 7.53190938511327e-06, "loss": 0.0886, "step": 240790 }, { "epoch": 93.51, "learning_rate": 7.531391585760518e-06, "loss": 0.0727, "step": 240800 }, { "epoch": 93.52, "learning_rate": 7.530873786407767e-06, "loss": 0.0702, "step": 240810 }, { "epoch": 93.52, "learning_rate": 7.530355987055017e-06, "loss": 0.0111, "step": 240820 }, { "epoch": 93.53, "learning_rate": 7.5298381877022666e-06, "loss": 0.0014, "step": 240830 }, { "epoch": 93.53, "learning_rate": 7.5293203883495145e-06, "loss": 0.0437, "step": 240840 }, { "epoch": 93.53, "learning_rate": 7.528802588996764e-06, "loss": 0.035, "step": 240850 }, { "epoch": 93.54, "learning_rate": 7.528284789644014e-06, "loss": 0.0566, "step": 240860 }, { "epoch": 93.54, "learning_rate": 7.527766990291263e-06, "loss": 0.0729, "step": 240870 }, { "epoch": 93.55, "learning_rate": 7.527249190938511e-06, "loss": 0.091, "step": 240880 }, { "epoch": 93.55, "learning_rate": 7.526731391585761e-06, "loss": 0.045, "step": 240890 }, { "epoch": 93.55, "learning_rate": 7.5262135922330105e-06, "loss": 0.0162, "step": 240900 }, { "epoch": 93.56, "learning_rate": 7.52569579288026e-06, "loss": 0.1122, "step": 240910 }, { "epoch": 93.56, "learning_rate": 7.525177993527508e-06, "loss": 0.1686, "step": 240920 }, { "epoch": 93.57, "learning_rate": 7.524660194174758e-06, "loss": 0.1209, "step": 240930 }, { "epoch": 93.57, "learning_rate": 7.524142394822007e-06, "loss": 0.0456, "step": 240940 }, { "epoch": 93.57, "learning_rate": 7.523624595469257e-06, "loss": 0.0682, "step": 240950 }, { "epoch": 93.58, "learning_rate": 7.523106796116505e-06, "loss": 0.0096, "step": 240960 }, { "epoch": 93.58, "learning_rate": 7.5225889967637544e-06, "loss": 0.0973, "step": 240970 }, { "epoch": 93.58, "learning_rate": 7.522071197411004e-06, "loss": 0.0833, "step": 240980 }, { "epoch": 93.59, "learning_rate": 7.521553398058254e-06, "loss": 0.1033, "step": 240990 }, { "epoch": 93.59, "learning_rate": 7.521035598705502e-06, "loss": 0.0123, "step": 241000 }, { "epoch": 93.6, "learning_rate": 7.520517799352751e-06, "loss": 0.0023, "step": 241010 }, { "epoch": 93.6, "learning_rate": 7.520000000000001e-06, "loss": 0.0271, "step": 241020 }, { "epoch": 93.6, "learning_rate": 7.5194822006472505e-06, "loss": 0.0617, "step": 241030 }, { "epoch": 93.61, "learning_rate": 7.518964401294498e-06, "loss": 0.1628, "step": 241040 }, { "epoch": 93.61, "learning_rate": 7.518446601941748e-06, "loss": 0.1281, "step": 241050 }, { "epoch": 93.62, "learning_rate": 7.517928802588998e-06, "loss": 0.1009, "step": 241060 }, { "epoch": 93.62, "learning_rate": 7.517411003236247e-06, "loss": 0.1651, "step": 241070 }, { "epoch": 93.62, "learning_rate": 7.516893203883496e-06, "loss": 0.0052, "step": 241080 }, { "epoch": 93.63, "learning_rate": 7.516375404530745e-06, "loss": 0.1503, "step": 241090 }, { "epoch": 93.63, "learning_rate": 7.515857605177994e-06, "loss": 0.0387, "step": 241100 }, { "epoch": 93.63, "learning_rate": 7.515339805825243e-06, "loss": 0.0873, "step": 241110 }, { "epoch": 93.64, "learning_rate": 7.514822006472493e-06, "loss": 0.0743, "step": 241120 }, { "epoch": 93.64, "learning_rate": 7.5143042071197416e-06, "loss": 0.041, "step": 241130 }, { "epoch": 93.65, "learning_rate": 7.513786407766991e-06, "loss": 0.0272, "step": 241140 }, { "epoch": 93.65, "learning_rate": 7.51326860841424e-06, "loss": 0.0215, "step": 241150 }, { "epoch": 93.65, "learning_rate": 7.5127508090614896e-06, "loss": 0.0326, "step": 241160 }, { "epoch": 93.66, "learning_rate": 7.512233009708738e-06, "loss": 0.0009, "step": 241170 }, { "epoch": 93.66, "learning_rate": 7.511715210355988e-06, "loss": 0.0856, "step": 241180 }, { "epoch": 93.67, "learning_rate": 7.511197411003237e-06, "loss": 0.086, "step": 241190 }, { "epoch": 93.67, "learning_rate": 7.510679611650486e-06, "loss": 0.0112, "step": 241200 }, { "epoch": 93.67, "learning_rate": 7.510161812297735e-06, "loss": 0.0083, "step": 241210 }, { "epoch": 93.68, "learning_rate": 7.509644012944985e-06, "loss": 0.0375, "step": 241220 }, { "epoch": 93.68, "learning_rate": 7.5091262135922335e-06, "loss": 0.0962, "step": 241230 }, { "epoch": 93.69, "learning_rate": 7.508608414239483e-06, "loss": 0.0248, "step": 241240 }, { "epoch": 93.69, "learning_rate": 7.508090614886732e-06, "loss": 0.0005, "step": 241250 }, { "epoch": 93.69, "learning_rate": 7.5075728155339815e-06, "loss": 0.034, "step": 241260 }, { "epoch": 93.7, "learning_rate": 7.50705501618123e-06, "loss": 0.0133, "step": 241270 }, { "epoch": 93.7, "learning_rate": 7.50653721682848e-06, "loss": 0.0233, "step": 241280 }, { "epoch": 93.7, "learning_rate": 7.506019417475729e-06, "loss": 0.0687, "step": 241290 }, { "epoch": 93.71, "learning_rate": 7.5055016181229774e-06, "loss": 0.033, "step": 241300 }, { "epoch": 93.71, "learning_rate": 7.504983818770227e-06, "loss": 0.0888, "step": 241310 }, { "epoch": 93.72, "learning_rate": 7.504466019417477e-06, "loss": 0.1394, "step": 241320 }, { "epoch": 93.72, "learning_rate": 7.5039482200647255e-06, "loss": 0.105, "step": 241330 }, { "epoch": 93.72, "learning_rate": 7.503430420711974e-06, "loss": 0.0807, "step": 241340 }, { "epoch": 93.73, "learning_rate": 7.502912621359224e-06, "loss": 0.0185, "step": 241350 }, { "epoch": 93.73, "learning_rate": 7.5023948220064735e-06, "loss": 0.1281, "step": 241360 }, { "epoch": 93.74, "learning_rate": 7.501877022653722e-06, "loss": 0.1563, "step": 241370 }, { "epoch": 93.74, "learning_rate": 7.501359223300971e-06, "loss": 0.0643, "step": 241380 }, { "epoch": 93.74, "learning_rate": 7.500841423948221e-06, "loss": 0.0445, "step": 241390 }, { "epoch": 93.75, "learning_rate": 7.50032362459547e-06, "loss": 0.0522, "step": 241400 }, { "epoch": 93.75, "learning_rate": 7.499805825242719e-06, "loss": 0.1032, "step": 241410 }, { "epoch": 93.76, "learning_rate": 7.499288025889968e-06, "loss": 0.065, "step": 241420 }, { "epoch": 93.76, "learning_rate": 7.498770226537217e-06, "loss": 0.0091, "step": 241430 }, { "epoch": 93.76, "learning_rate": 7.498252427184467e-06, "loss": 0.0704, "step": 241440 }, { "epoch": 93.77, "learning_rate": 7.497734627831715e-06, "loss": 0.0997, "step": 241450 }, { "epoch": 93.77, "learning_rate": 7.4972168284789646e-06, "loss": 0.0282, "step": 241460 }, { "epoch": 93.77, "learning_rate": 7.496699029126214e-06, "loss": 0.0573, "step": 241470 }, { "epoch": 93.78, "learning_rate": 7.496181229773464e-06, "loss": 0.0088, "step": 241480 }, { "epoch": 93.78, "learning_rate": 7.495663430420712e-06, "loss": 0.0809, "step": 241490 }, { "epoch": 93.79, "learning_rate": 7.495145631067961e-06, "loss": 0.1551, "step": 241500 }, { "epoch": 93.79, "learning_rate": 7.494627831715211e-06, "loss": 0.0007, "step": 241510 }, { "epoch": 93.79, "learning_rate": 7.4941100323624606e-06, "loss": 0.2059, "step": 241520 }, { "epoch": 93.8, "learning_rate": 7.4935922330097085e-06, "loss": 0.0006, "step": 241530 }, { "epoch": 93.8, "learning_rate": 7.493074433656958e-06, "loss": 0.0767, "step": 241540 }, { "epoch": 93.81, "learning_rate": 7.492556634304208e-06, "loss": 0.0301, "step": 241550 }, { "epoch": 93.81, "learning_rate": 7.492038834951457e-06, "loss": 0.0008, "step": 241560 }, { "epoch": 93.81, "learning_rate": 7.491521035598705e-06, "loss": 0.027, "step": 241570 }, { "epoch": 93.82, "learning_rate": 7.491003236245955e-06, "loss": 0.0486, "step": 241580 }, { "epoch": 93.82, "learning_rate": 7.4904854368932045e-06, "loss": 0.0921, "step": 241590 }, { "epoch": 93.83, "learning_rate": 7.489967637540454e-06, "loss": 0.0381, "step": 241600 }, { "epoch": 93.83, "learning_rate": 7.489449838187702e-06, "loss": 0.1182, "step": 241610 }, { "epoch": 93.83, "learning_rate": 7.488932038834952e-06, "loss": 0.0598, "step": 241620 }, { "epoch": 93.84, "learning_rate": 7.488414239482201e-06, "loss": 0.0314, "step": 241630 }, { "epoch": 93.84, "learning_rate": 7.487896440129451e-06, "loss": 0.073, "step": 241640 }, { "epoch": 93.84, "learning_rate": 7.4873786407767005e-06, "loss": 0.2092, "step": 241650 }, { "epoch": 93.85, "learning_rate": 7.4868608414239484e-06, "loss": 0.0529, "step": 241660 }, { "epoch": 93.85, "learning_rate": 7.486343042071198e-06, "loss": 0.0385, "step": 241670 }, { "epoch": 93.86, "learning_rate": 7.485825242718448e-06, "loss": 0.024, "step": 241680 }, { "epoch": 93.86, "learning_rate": 7.485307443365697e-06, "loss": 0.0363, "step": 241690 }, { "epoch": 93.86, "learning_rate": 7.484789644012945e-06, "loss": 0.0069, "step": 241700 }, { "epoch": 93.87, "learning_rate": 7.484271844660195e-06, "loss": 0.115, "step": 241710 }, { "epoch": 93.87, "learning_rate": 7.4837540453074445e-06, "loss": 0.1405, "step": 241720 }, { "epoch": 93.88, "learning_rate": 7.483236245954694e-06, "loss": 0.1307, "step": 241730 }, { "epoch": 93.88, "learning_rate": 7.482718446601942e-06, "loss": 0.0536, "step": 241740 }, { "epoch": 93.88, "learning_rate": 7.482200647249192e-06, "loss": 0.0132, "step": 241750 }, { "epoch": 93.89, "learning_rate": 7.481682847896441e-06, "loss": 0.025, "step": 241760 }, { "epoch": 93.89, "learning_rate": 7.48116504854369e-06, "loss": 0.0211, "step": 241770 }, { "epoch": 93.9, "learning_rate": 7.480647249190939e-06, "loss": 0.1992, "step": 241780 }, { "epoch": 93.9, "learning_rate": 7.480129449838188e-06, "loss": 0.0371, "step": 241790 }, { "epoch": 93.9, "learning_rate": 7.479611650485438e-06, "loss": 0.0007, "step": 241800 }, { "epoch": 93.91, "learning_rate": 7.479093851132687e-06, "loss": 0.0184, "step": 241810 }, { "epoch": 93.91, "learning_rate": 7.4785760517799356e-06, "loss": 0.011, "step": 241820 }, { "epoch": 93.91, "learning_rate": 7.478058252427185e-06, "loss": 0.102, "step": 241830 }, { "epoch": 93.92, "learning_rate": 7.477540453074435e-06, "loss": 0.0038, "step": 241840 }, { "epoch": 93.92, "learning_rate": 7.4770226537216836e-06, "loss": 0.0261, "step": 241850 }, { "epoch": 93.93, "learning_rate": 7.476504854368932e-06, "loss": 0.0107, "step": 241860 }, { "epoch": 93.93, "learning_rate": 7.475987055016182e-06, "loss": 0.0162, "step": 241870 }, { "epoch": 93.93, "learning_rate": 7.4754692556634316e-06, "loss": 0.0137, "step": 241880 }, { "epoch": 93.94, "learning_rate": 7.47495145631068e-06, "loss": 0.0035, "step": 241890 }, { "epoch": 93.94, "learning_rate": 7.474433656957929e-06, "loss": 0.0058, "step": 241900 }, { "epoch": 93.95, "learning_rate": 7.473915857605179e-06, "loss": 0.0493, "step": 241910 }, { "epoch": 93.95, "learning_rate": 7.4733980582524275e-06, "loss": 0.1736, "step": 241920 }, { "epoch": 93.95, "learning_rate": 7.472880258899677e-06, "loss": 0.0522, "step": 241930 }, { "epoch": 93.96, "learning_rate": 7.472362459546926e-06, "loss": 0.1151, "step": 241940 }, { "epoch": 93.96, "learning_rate": 7.4718446601941755e-06, "loss": 0.0801, "step": 241950 }, { "epoch": 93.97, "learning_rate": 7.471326860841424e-06, "loss": 0.0097, "step": 241960 }, { "epoch": 93.97, "learning_rate": 7.470809061488674e-06, "loss": 0.0285, "step": 241970 }, { "epoch": 93.97, "learning_rate": 7.470291262135923e-06, "loss": 0.0894, "step": 241980 }, { "epoch": 93.98, "learning_rate": 7.469773462783172e-06, "loss": 0.092, "step": 241990 }, { "epoch": 93.98, "learning_rate": 7.469255663430421e-06, "loss": 0.0461, "step": 242000 }, { "epoch": 93.98, "learning_rate": 7.468737864077671e-06, "loss": 0.0457, "step": 242010 }, { "epoch": 93.99, "learning_rate": 7.4682200647249195e-06, "loss": 0.0706, "step": 242020 }, { "epoch": 93.99, "learning_rate": 7.467702265372169e-06, "loss": 0.218, "step": 242030 }, { "epoch": 94.0, "learning_rate": 7.467184466019418e-06, "loss": 0.0713, "step": 242040 }, { "epoch": 94.0, "learning_rate": 7.4666666666666675e-06, "loss": 0.0413, "step": 242050 }, { "epoch": 94.0, "eval_accuracy": 0.9515818431911967, "eval_loss": 0.3603576719760895, "eval_runtime": 8.2048, "eval_samples_per_second": 443.032, "eval_steps_per_second": 55.455, "step": 242050 }, { "epoch": 94.0, "learning_rate": 7.466148867313916e-06, "loss": 0.0198, "step": 242060 }, { "epoch": 94.01, "learning_rate": 7.465631067961166e-06, "loss": 0.0501, "step": 242070 }, { "epoch": 94.01, "learning_rate": 7.465113268608415e-06, "loss": 0.0263, "step": 242080 }, { "epoch": 94.02, "learning_rate": 7.464595469255664e-06, "loss": 0.0108, "step": 242090 }, { "epoch": 94.02, "learning_rate": 7.464077669902913e-06, "loss": 0.0484, "step": 242100 }, { "epoch": 94.02, "learning_rate": 7.463559870550162e-06, "loss": 0.0825, "step": 242110 }, { "epoch": 94.03, "learning_rate": 7.463042071197411e-06, "loss": 0.1299, "step": 242120 }, { "epoch": 94.03, "learning_rate": 7.462524271844661e-06, "loss": 0.0002, "step": 242130 }, { "epoch": 94.03, "learning_rate": 7.46200647249191e-06, "loss": 0.0347, "step": 242140 }, { "epoch": 94.04, "learning_rate": 7.4614886731391586e-06, "loss": 0.0961, "step": 242150 }, { "epoch": 94.04, "learning_rate": 7.460970873786408e-06, "loss": 0.0229, "step": 242160 }, { "epoch": 94.05, "learning_rate": 7.460453074433658e-06, "loss": 0.0746, "step": 242170 }, { "epoch": 94.05, "learning_rate": 7.459935275080907e-06, "loss": 0.0715, "step": 242180 }, { "epoch": 94.05, "learning_rate": 7.459417475728155e-06, "loss": 0.0558, "step": 242190 }, { "epoch": 94.06, "learning_rate": 7.458899676375405e-06, "loss": 0.0072, "step": 242200 }, { "epoch": 94.06, "learning_rate": 7.4583818770226546e-06, "loss": 0.0921, "step": 242210 }, { "epoch": 94.07, "learning_rate": 7.457864077669904e-06, "loss": 0.1365, "step": 242220 }, { "epoch": 94.07, "learning_rate": 7.457346278317152e-06, "loss": 0.0721, "step": 242230 }, { "epoch": 94.07, "learning_rate": 7.456828478964402e-06, "loss": 0.0445, "step": 242240 }, { "epoch": 94.08, "learning_rate": 7.456310679611651e-06, "loss": 0.0086, "step": 242250 }, { "epoch": 94.08, "learning_rate": 7.455792880258901e-06, "loss": 0.0115, "step": 242260 }, { "epoch": 94.09, "learning_rate": 7.455275080906149e-06, "loss": 0.2317, "step": 242270 }, { "epoch": 94.09, "learning_rate": 7.4547572815533985e-06, "loss": 0.1381, "step": 242280 }, { "epoch": 94.09, "learning_rate": 7.454239482200648e-06, "loss": 0.0487, "step": 242290 }, { "epoch": 94.1, "learning_rate": 7.453721682847898e-06, "loss": 0.0002, "step": 242300 }, { "epoch": 94.1, "learning_rate": 7.453203883495146e-06, "loss": 0.002, "step": 242310 }, { "epoch": 94.1, "learning_rate": 7.452686084142395e-06, "loss": 0.0118, "step": 242320 }, { "epoch": 94.11, "learning_rate": 7.452168284789645e-06, "loss": 0.0993, "step": 242330 }, { "epoch": 94.11, "learning_rate": 7.4516504854368945e-06, "loss": 0.0226, "step": 242340 }, { "epoch": 94.12, "learning_rate": 7.4511326860841424e-06, "loss": 0.0863, "step": 242350 }, { "epoch": 94.12, "learning_rate": 7.450614886731392e-06, "loss": 0.0113, "step": 242360 }, { "epoch": 94.12, "learning_rate": 7.450097087378642e-06, "loss": 0.0726, "step": 242370 }, { "epoch": 94.13, "learning_rate": 7.449579288025891e-06, "loss": 0.0101, "step": 242380 }, { "epoch": 94.13, "learning_rate": 7.449061488673139e-06, "loss": 0.0206, "step": 242390 }, { "epoch": 94.14, "learning_rate": 7.448543689320389e-06, "loss": 0.0318, "step": 242400 }, { "epoch": 94.14, "learning_rate": 7.4480258899676385e-06, "loss": 0.0396, "step": 242410 }, { "epoch": 94.14, "learning_rate": 7.447508090614888e-06, "loss": 0.0006, "step": 242420 }, { "epoch": 94.15, "learning_rate": 7.446990291262136e-06, "loss": 0.1187, "step": 242430 }, { "epoch": 94.15, "learning_rate": 7.446472491909386e-06, "loss": 0.0118, "step": 242440 }, { "epoch": 94.16, "learning_rate": 7.445954692556635e-06, "loss": 0.0432, "step": 242450 }, { "epoch": 94.16, "learning_rate": 7.445436893203885e-06, "loss": 0.0161, "step": 242460 }, { "epoch": 94.16, "learning_rate": 7.444919093851133e-06, "loss": 0.0688, "step": 242470 }, { "epoch": 94.17, "learning_rate": 7.444401294498382e-06, "loss": 0.0901, "step": 242480 }, { "epoch": 94.17, "learning_rate": 7.443883495145632e-06, "loss": 0.1486, "step": 242490 }, { "epoch": 94.17, "learning_rate": 7.443365695792882e-06, "loss": 0.0165, "step": 242500 }, { "epoch": 94.18, "learning_rate": 7.4428478964401296e-06, "loss": 0.0394, "step": 242510 }, { "epoch": 94.18, "learning_rate": 7.442330097087379e-06, "loss": 0.0128, "step": 242520 }, { "epoch": 94.19, "learning_rate": 7.441812297734629e-06, "loss": 0.0329, "step": 242530 }, { "epoch": 94.19, "learning_rate": 7.441294498381878e-06, "loss": 0.1185, "step": 242540 }, { "epoch": 94.19, "learning_rate": 7.440776699029126e-06, "loss": 0.0101, "step": 242550 }, { "epoch": 94.2, "learning_rate": 7.440258899676376e-06, "loss": 0.0752, "step": 242560 }, { "epoch": 94.2, "learning_rate": 7.4397411003236256e-06, "loss": 0.0668, "step": 242570 }, { "epoch": 94.21, "learning_rate": 7.439223300970874e-06, "loss": 0.068, "step": 242580 }, { "epoch": 94.21, "learning_rate": 7.438705501618123e-06, "loss": 0.0001, "step": 242590 }, { "epoch": 94.21, "learning_rate": 7.438187702265373e-06, "loss": 0.0212, "step": 242600 }, { "epoch": 94.22, "learning_rate": 7.437669902912622e-06, "loss": 0.0235, "step": 242610 }, { "epoch": 94.22, "learning_rate": 7.437152103559871e-06, "loss": 0.1304, "step": 242620 }, { "epoch": 94.23, "learning_rate": 7.43663430420712e-06, "loss": 0.115, "step": 242630 }, { "epoch": 94.23, "learning_rate": 7.4361165048543695e-06, "loss": 0.0859, "step": 242640 }, { "epoch": 94.23, "learning_rate": 7.435598705501619e-06, "loss": 0.0499, "step": 242650 }, { "epoch": 94.24, "learning_rate": 7.435080906148868e-06, "loss": 0.0459, "step": 242660 }, { "epoch": 94.24, "learning_rate": 7.434563106796117e-06, "loss": 0.0347, "step": 242670 }, { "epoch": 94.24, "learning_rate": 7.434045307443366e-06, "loss": 0.0438, "step": 242680 }, { "epoch": 94.25, "learning_rate": 7.433527508090616e-06, "loss": 0.0493, "step": 242690 }, { "epoch": 94.25, "learning_rate": 7.433009708737865e-06, "loss": 0.1844, "step": 242700 }, { "epoch": 94.26, "learning_rate": 7.4324919093851134e-06, "loss": 0.0648, "step": 242710 }, { "epoch": 94.26, "learning_rate": 7.431974110032363e-06, "loss": 0.0302, "step": 242720 }, { "epoch": 94.26, "learning_rate": 7.431456310679612e-06, "loss": 0.1572, "step": 242730 }, { "epoch": 94.27, "learning_rate": 7.4309385113268615e-06, "loss": 0.0766, "step": 242740 }, { "epoch": 94.27, "learning_rate": 7.430420711974111e-06, "loss": 0.1026, "step": 242750 }, { "epoch": 94.28, "learning_rate": 7.42990291262136e-06, "loss": 0.0133, "step": 242760 }, { "epoch": 94.28, "learning_rate": 7.429385113268609e-06, "loss": 0.0824, "step": 242770 }, { "epoch": 94.28, "learning_rate": 7.428867313915858e-06, "loss": 0.0358, "step": 242780 }, { "epoch": 94.29, "learning_rate": 7.428349514563108e-06, "loss": 0.0023, "step": 242790 }, { "epoch": 94.29, "learning_rate": 7.427831715210357e-06, "loss": 0.0188, "step": 242800 }, { "epoch": 94.3, "learning_rate": 7.427313915857605e-06, "loss": 0.0494, "step": 242810 }, { "epoch": 94.3, "learning_rate": 7.426796116504855e-06, "loss": 0.0173, "step": 242820 }, { "epoch": 94.3, "learning_rate": 7.426278317152105e-06, "loss": 0.0806, "step": 242830 }, { "epoch": 94.31, "learning_rate": 7.425760517799353e-06, "loss": 0.0699, "step": 242840 }, { "epoch": 94.31, "learning_rate": 7.425242718446602e-06, "loss": 0.1913, "step": 242850 }, { "epoch": 94.31, "learning_rate": 7.424724919093852e-06, "loss": 0.0362, "step": 242860 }, { "epoch": 94.32, "learning_rate": 7.424207119741101e-06, "loss": 0.1441, "step": 242870 }, { "epoch": 94.32, "learning_rate": 7.42368932038835e-06, "loss": 0.0524, "step": 242880 }, { "epoch": 94.33, "learning_rate": 7.423171521035599e-06, "loss": 0.2376, "step": 242890 }, { "epoch": 94.33, "learning_rate": 7.4226537216828486e-06, "loss": 0.1154, "step": 242900 }, { "epoch": 94.33, "learning_rate": 7.422135922330098e-06, "loss": 0.1111, "step": 242910 }, { "epoch": 94.34, "learning_rate": 7.421618122977346e-06, "loss": 0.0556, "step": 242920 }, { "epoch": 94.34, "learning_rate": 7.421100323624596e-06, "loss": 0.0924, "step": 242930 }, { "epoch": 94.35, "learning_rate": 7.420582524271845e-06, "loss": 0.033, "step": 242940 }, { "epoch": 94.35, "learning_rate": 7.420064724919095e-06, "loss": 0.0792, "step": 242950 }, { "epoch": 94.35, "learning_rate": 7.419546925566343e-06, "loss": 0.0021, "step": 242960 }, { "epoch": 94.36, "learning_rate": 7.4190291262135925e-06, "loss": 0.0366, "step": 242970 }, { "epoch": 94.36, "learning_rate": 7.418511326860842e-06, "loss": 0.0206, "step": 242980 }, { "epoch": 94.37, "learning_rate": 7.417993527508092e-06, "loss": 0.0444, "step": 242990 }, { "epoch": 94.37, "learning_rate": 7.41747572815534e-06, "loss": 0.0006, "step": 243000 }, { "epoch": 94.37, "learning_rate": 7.416957928802589e-06, "loss": 0.0547, "step": 243010 }, { "epoch": 94.38, "learning_rate": 7.416440129449839e-06, "loss": 0.0004, "step": 243020 }, { "epoch": 94.38, "learning_rate": 7.4159223300970885e-06, "loss": 0.0784, "step": 243030 }, { "epoch": 94.38, "learning_rate": 7.4154045307443364e-06, "loss": 0.1938, "step": 243040 }, { "epoch": 94.39, "learning_rate": 7.414886731391586e-06, "loss": 0.0177, "step": 243050 }, { "epoch": 94.39, "learning_rate": 7.414368932038836e-06, "loss": 0.0962, "step": 243060 }, { "epoch": 94.4, "learning_rate": 7.413851132686085e-06, "loss": 0.0455, "step": 243070 }, { "epoch": 94.4, "learning_rate": 7.413333333333333e-06, "loss": 0.0561, "step": 243080 }, { "epoch": 94.4, "learning_rate": 7.412815533980583e-06, "loss": 0.1252, "step": 243090 }, { "epoch": 94.41, "learning_rate": 7.4122977346278325e-06, "loss": 0.0772, "step": 243100 }, { "epoch": 94.41, "learning_rate": 7.411779935275082e-06, "loss": 0.0996, "step": 243110 }, { "epoch": 94.42, "learning_rate": 7.41126213592233e-06, "loss": 0.0301, "step": 243120 }, { "epoch": 94.42, "learning_rate": 7.41074433656958e-06, "loss": 0.0204, "step": 243130 }, { "epoch": 94.42, "learning_rate": 7.410226537216829e-06, "loss": 0.0887, "step": 243140 }, { "epoch": 94.43, "learning_rate": 7.409708737864079e-06, "loss": 0.0015, "step": 243150 }, { "epoch": 94.43, "learning_rate": 7.409190938511327e-06, "loss": 0.0666, "step": 243160 }, { "epoch": 94.43, "learning_rate": 7.408673139158576e-06, "loss": 0.047, "step": 243170 }, { "epoch": 94.44, "learning_rate": 7.408155339805826e-06, "loss": 0.0275, "step": 243180 }, { "epoch": 94.44, "learning_rate": 7.407637540453076e-06, "loss": 0.1671, "step": 243190 }, { "epoch": 94.45, "learning_rate": 7.4071197411003236e-06, "loss": 0.0111, "step": 243200 }, { "epoch": 94.45, "learning_rate": 7.406601941747573e-06, "loss": 0.0607, "step": 243210 }, { "epoch": 94.45, "learning_rate": 7.406084142394823e-06, "loss": 0.0274, "step": 243220 }, { "epoch": 94.46, "learning_rate": 7.405566343042072e-06, "loss": 0.0507, "step": 243230 }, { "epoch": 94.46, "learning_rate": 7.40504854368932e-06, "loss": 0.0096, "step": 243240 }, { "epoch": 94.47, "learning_rate": 7.40453074433657e-06, "loss": 0.0671, "step": 243250 }, { "epoch": 94.47, "learning_rate": 7.4040129449838196e-06, "loss": 0.0248, "step": 243260 }, { "epoch": 94.47, "learning_rate": 7.403495145631069e-06, "loss": 0.1207, "step": 243270 }, { "epoch": 94.48, "learning_rate": 7.402977346278317e-06, "loss": 0.1018, "step": 243280 }, { "epoch": 94.48, "learning_rate": 7.402459546925567e-06, "loss": 0.0452, "step": 243290 }, { "epoch": 94.49, "learning_rate": 7.401941747572816e-06, "loss": 0.0102, "step": 243300 }, { "epoch": 94.49, "learning_rate": 7.401423948220066e-06, "loss": 0.0067, "step": 243310 }, { "epoch": 94.49, "learning_rate": 7.400906148867315e-06, "loss": 0.0334, "step": 243320 }, { "epoch": 94.5, "learning_rate": 7.4003883495145635e-06, "loss": 0.0589, "step": 243330 }, { "epoch": 94.5, "learning_rate": 7.399870550161813e-06, "loss": 0.0195, "step": 243340 }, { "epoch": 94.5, "learning_rate": 7.399352750809063e-06, "loss": 0.0184, "step": 243350 }, { "epoch": 94.51, "learning_rate": 7.3988349514563115e-06, "loss": 0.045, "step": 243360 }, { "epoch": 94.51, "learning_rate": 7.39831715210356e-06, "loss": 0.0934, "step": 243370 }, { "epoch": 94.52, "learning_rate": 7.39779935275081e-06, "loss": 0.0564, "step": 243380 }, { "epoch": 94.52, "learning_rate": 7.397281553398059e-06, "loss": 0.037, "step": 243390 }, { "epoch": 94.52, "learning_rate": 7.396763754045308e-06, "loss": 0.0275, "step": 243400 }, { "epoch": 94.53, "learning_rate": 7.396245954692557e-06, "loss": 0.0267, "step": 243410 }, { "epoch": 94.53, "learning_rate": 7.395728155339807e-06, "loss": 0.0023, "step": 243420 }, { "epoch": 94.54, "learning_rate": 7.3952103559870555e-06, "loss": 0.0005, "step": 243430 }, { "epoch": 94.54, "learning_rate": 7.394692556634305e-06, "loss": 0.1122, "step": 243440 }, { "epoch": 94.54, "learning_rate": 7.394174757281554e-06, "loss": 0.1022, "step": 243450 }, { "epoch": 94.55, "learning_rate": 7.3936569579288035e-06, "loss": 0.0326, "step": 243460 }, { "epoch": 94.55, "learning_rate": 7.393139158576052e-06, "loss": 0.126, "step": 243470 }, { "epoch": 94.56, "learning_rate": 7.392621359223302e-06, "loss": 0.0749, "step": 243480 }, { "epoch": 94.56, "learning_rate": 7.392103559870551e-06, "loss": 0.0295, "step": 243490 }, { "epoch": 94.56, "learning_rate": 7.3915857605178e-06, "loss": 0.0006, "step": 243500 }, { "epoch": 94.57, "learning_rate": 7.391067961165049e-06, "loss": 0.0452, "step": 243510 }, { "epoch": 94.57, "learning_rate": 7.390550161812299e-06, "loss": 0.1028, "step": 243520 }, { "epoch": 94.57, "learning_rate": 7.390032362459547e-06, "loss": 0.0435, "step": 243530 }, { "epoch": 94.58, "learning_rate": 7.389514563106797e-06, "loss": 0.0138, "step": 243540 }, { "epoch": 94.58, "learning_rate": 7.388996763754046e-06, "loss": 0.0365, "step": 243550 }, { "epoch": 94.59, "learning_rate": 7.388478964401295e-06, "loss": 0.1402, "step": 243560 }, { "epoch": 94.59, "learning_rate": 7.387961165048544e-06, "loss": 0.0579, "step": 243570 }, { "epoch": 94.59, "learning_rate": 7.387443365695793e-06, "loss": 0.0315, "step": 243580 }, { "epoch": 94.6, "learning_rate": 7.3869255663430426e-06, "loss": 0.1554, "step": 243590 }, { "epoch": 94.6, "learning_rate": 7.386407766990292e-06, "loss": 0.0127, "step": 243600 }, { "epoch": 94.61, "learning_rate": 7.385889967637541e-06, "loss": 0.0429, "step": 243610 }, { "epoch": 94.61, "learning_rate": 7.38537216828479e-06, "loss": 0.1281, "step": 243620 }, { "epoch": 94.61, "learning_rate": 7.384854368932039e-06, "loss": 0.0954, "step": 243630 }, { "epoch": 94.62, "learning_rate": 7.384336569579289e-06, "loss": 0.0832, "step": 243640 }, { "epoch": 94.62, "learning_rate": 7.383818770226538e-06, "loss": 0.0323, "step": 243650 }, { "epoch": 94.63, "learning_rate": 7.3833009708737865e-06, "loss": 0.1274, "step": 243660 }, { "epoch": 94.63, "learning_rate": 7.382783171521036e-06, "loss": 0.0286, "step": 243670 }, { "epoch": 94.63, "learning_rate": 7.382265372168286e-06, "loss": 0.0102, "step": 243680 }, { "epoch": 94.64, "learning_rate": 7.3817475728155345e-06, "loss": 0.0469, "step": 243690 }, { "epoch": 94.64, "learning_rate": 7.381229773462783e-06, "loss": 0.0142, "step": 243700 }, { "epoch": 94.64, "learning_rate": 7.380711974110033e-06, "loss": 0.0975, "step": 243710 }, { "epoch": 94.65, "learning_rate": 7.3801941747572825e-06, "loss": 0.0021, "step": 243720 }, { "epoch": 94.65, "learning_rate": 7.3796763754045304e-06, "loss": 0.0935, "step": 243730 }, { "epoch": 94.66, "learning_rate": 7.37915857605178e-06, "loss": 0.0919, "step": 243740 }, { "epoch": 94.66, "learning_rate": 7.37864077669903e-06, "loss": 0.0516, "step": 243750 }, { "epoch": 94.66, "learning_rate": 7.378122977346279e-06, "loss": 0.0002, "step": 243760 }, { "epoch": 94.67, "learning_rate": 7.377605177993527e-06, "loss": 0.021, "step": 243770 }, { "epoch": 94.67, "learning_rate": 7.377087378640777e-06, "loss": 0.1205, "step": 243780 }, { "epoch": 94.68, "learning_rate": 7.3765695792880265e-06, "loss": 0.0206, "step": 243790 }, { "epoch": 94.68, "learning_rate": 7.376051779935276e-06, "loss": 0.0074, "step": 243800 }, { "epoch": 94.68, "learning_rate": 7.375533980582524e-06, "loss": 0.0939, "step": 243810 }, { "epoch": 94.69, "learning_rate": 7.375016181229774e-06, "loss": 0.0458, "step": 243820 }, { "epoch": 94.69, "learning_rate": 7.374498381877023e-06, "loss": 0.2001, "step": 243830 }, { "epoch": 94.7, "learning_rate": 7.373980582524273e-06, "loss": 0.0313, "step": 243840 }, { "epoch": 94.7, "learning_rate": 7.373462783171521e-06, "loss": 0.0236, "step": 243850 }, { "epoch": 94.7, "learning_rate": 7.37294498381877e-06, "loss": 0.0662, "step": 243860 }, { "epoch": 94.71, "learning_rate": 7.37242718446602e-06, "loss": 0.0725, "step": 243870 }, { "epoch": 94.71, "learning_rate": 7.37190938511327e-06, "loss": 0.0803, "step": 243880 }, { "epoch": 94.71, "learning_rate": 7.371391585760519e-06, "loss": 0.0198, "step": 243890 }, { "epoch": 94.72, "learning_rate": 7.370873786407767e-06, "loss": 0.036, "step": 243900 }, { "epoch": 94.72, "learning_rate": 7.370355987055017e-06, "loss": 0.0223, "step": 243910 }, { "epoch": 94.73, "learning_rate": 7.369838187702266e-06, "loss": 0.1279, "step": 243920 }, { "epoch": 94.73, "learning_rate": 7.369320388349516e-06, "loss": 0.1346, "step": 243930 }, { "epoch": 94.73, "learning_rate": 7.368802588996764e-06, "loss": 0.06, "step": 243940 }, { "epoch": 94.74, "learning_rate": 7.3682847896440136e-06, "loss": 0.0458, "step": 243950 }, { "epoch": 94.74, "learning_rate": 7.367766990291263e-06, "loss": 0.0377, "step": 243960 }, { "epoch": 94.75, "learning_rate": 7.367249190938513e-06, "loss": 0.0005, "step": 243970 }, { "epoch": 94.75, "learning_rate": 7.366731391585761e-06, "loss": 0.0806, "step": 243980 }, { "epoch": 94.75, "learning_rate": 7.36621359223301e-06, "loss": 0.0291, "step": 243990 }, { "epoch": 94.76, "learning_rate": 7.36569579288026e-06, "loss": 0.0002, "step": 244000 }, { "epoch": 94.76, "learning_rate": 7.36517799352751e-06, "loss": 0.0655, "step": 244010 }, { "epoch": 94.77, "learning_rate": 7.3646601941747575e-06, "loss": 0.0405, "step": 244020 }, { "epoch": 94.77, "learning_rate": 7.364142394822007e-06, "loss": 0.0513, "step": 244030 }, { "epoch": 94.77, "learning_rate": 7.363624595469257e-06, "loss": 0.1117, "step": 244040 }, { "epoch": 94.78, "learning_rate": 7.3631067961165055e-06, "loss": 0.0486, "step": 244050 }, { "epoch": 94.78, "learning_rate": 7.362588996763754e-06, "loss": 0.0505, "step": 244060 }, { "epoch": 94.78, "learning_rate": 7.362071197411004e-06, "loss": 0.0003, "step": 244070 }, { "epoch": 94.79, "learning_rate": 7.3615533980582535e-06, "loss": 0.0228, "step": 244080 }, { "epoch": 94.79, "learning_rate": 7.361035598705502e-06, "loss": 0.081, "step": 244090 }, { "epoch": 94.8, "learning_rate": 7.360517799352751e-06, "loss": 0.1203, "step": 244100 }, { "epoch": 94.8, "learning_rate": 7.360000000000001e-06, "loss": 0.0015, "step": 244110 }, { "epoch": 94.8, "learning_rate": 7.35948220064725e-06, "loss": 0.2574, "step": 244120 }, { "epoch": 94.81, "learning_rate": 7.358964401294499e-06, "loss": 0.0946, "step": 244130 }, { "epoch": 94.81, "learning_rate": 7.358446601941748e-06, "loss": 0.1121, "step": 244140 }, { "epoch": 94.82, "learning_rate": 7.3579288025889975e-06, "loss": 0.0139, "step": 244150 }, { "epoch": 94.82, "learning_rate": 7.357411003236247e-06, "loss": 0.0333, "step": 244160 }, { "epoch": 94.82, "learning_rate": 7.356893203883496e-06, "loss": 0.061, "step": 244170 }, { "epoch": 94.83, "learning_rate": 7.356375404530745e-06, "loss": 0.0609, "step": 244180 }, { "epoch": 94.83, "learning_rate": 7.355857605177994e-06, "loss": 0.1075, "step": 244190 }, { "epoch": 94.83, "learning_rate": 7.355339805825243e-06, "loss": 0.0027, "step": 244200 }, { "epoch": 94.84, "learning_rate": 7.354822006472493e-06, "loss": 0.0942, "step": 244210 }, { "epoch": 94.84, "learning_rate": 7.354304207119741e-06, "loss": 0.0492, "step": 244220 }, { "epoch": 94.85, "learning_rate": 7.353786407766991e-06, "loss": 0.1011, "step": 244230 }, { "epoch": 94.85, "learning_rate": 7.35326860841424e-06, "loss": 0.1131, "step": 244240 }, { "epoch": 94.85, "learning_rate": 7.352750809061489e-06, "loss": 0.0527, "step": 244250 }, { "epoch": 94.86, "learning_rate": 7.352233009708738e-06, "loss": 0.0057, "step": 244260 }, { "epoch": 94.86, "learning_rate": 7.351715210355988e-06, "loss": 0.0873, "step": 244270 }, { "epoch": 94.87, "learning_rate": 7.3511974110032366e-06, "loss": 0.0618, "step": 244280 }, { "epoch": 94.87, "learning_rate": 7.350679611650486e-06, "loss": 0.0265, "step": 244290 }, { "epoch": 94.87, "learning_rate": 7.350161812297735e-06, "loss": 0.0204, "step": 244300 }, { "epoch": 94.88, "learning_rate": 7.3496440129449846e-06, "loss": 0.0133, "step": 244310 }, { "epoch": 94.88, "learning_rate": 7.349126213592233e-06, "loss": 0.0059, "step": 244320 }, { "epoch": 94.89, "learning_rate": 7.348608414239483e-06, "loss": 0.1071, "step": 244330 }, { "epoch": 94.89, "learning_rate": 7.348090614886732e-06, "loss": 0.0071, "step": 244340 }, { "epoch": 94.89, "learning_rate": 7.347572815533981e-06, "loss": 0.0827, "step": 244350 }, { "epoch": 94.9, "learning_rate": 7.34705501618123e-06, "loss": 0.0307, "step": 244360 }, { "epoch": 94.9, "learning_rate": 7.34653721682848e-06, "loss": 0.1318, "step": 244370 }, { "epoch": 94.9, "learning_rate": 7.3460194174757285e-06, "loss": 0.0132, "step": 244380 }, { "epoch": 94.91, "learning_rate": 7.345501618122977e-06, "loss": 0.0182, "step": 244390 }, { "epoch": 94.91, "learning_rate": 7.344983818770227e-06, "loss": 0.0235, "step": 244400 }, { "epoch": 94.92, "learning_rate": 7.3444660194174765e-06, "loss": 0.0696, "step": 244410 }, { "epoch": 94.92, "learning_rate": 7.343948220064726e-06, "loss": 0.0419, "step": 244420 }, { "epoch": 94.92, "learning_rate": 7.343430420711974e-06, "loss": 0.0008, "step": 244430 }, { "epoch": 94.93, "learning_rate": 7.342912621359224e-06, "loss": 0.159, "step": 244440 }, { "epoch": 94.93, "learning_rate": 7.342394822006473e-06, "loss": 0.1616, "step": 244450 }, { "epoch": 94.94, "learning_rate": 7.341877022653723e-06, "loss": 0.0915, "step": 244460 }, { "epoch": 94.94, "learning_rate": 7.341359223300971e-06, "loss": 0.0336, "step": 244470 }, { "epoch": 94.94, "learning_rate": 7.3408414239482205e-06, "loss": 0.0266, "step": 244480 }, { "epoch": 94.95, "learning_rate": 7.34032362459547e-06, "loss": 0.1053, "step": 244490 }, { "epoch": 94.95, "learning_rate": 7.33980582524272e-06, "loss": 0.069, "step": 244500 }, { "epoch": 94.96, "learning_rate": 7.339288025889968e-06, "loss": 0.0234, "step": 244510 }, { "epoch": 94.96, "learning_rate": 7.338770226537217e-06, "loss": 0.0412, "step": 244520 }, { "epoch": 94.96, "learning_rate": 7.338252427184467e-06, "loss": 0.0917, "step": 244530 }, { "epoch": 94.97, "learning_rate": 7.3377346278317165e-06, "loss": 0.2571, "step": 244540 }, { "epoch": 94.97, "learning_rate": 7.337216828478964e-06, "loss": 0.1234, "step": 244550 }, { "epoch": 94.97, "learning_rate": 7.336699029126214e-06, "loss": 0.0302, "step": 244560 }, { "epoch": 94.98, "learning_rate": 7.336181229773464e-06, "loss": 0.0668, "step": 244570 }, { "epoch": 94.98, "learning_rate": 7.335663430420713e-06, "loss": 0.0854, "step": 244580 }, { "epoch": 94.99, "learning_rate": 7.335145631067961e-06, "loss": 0.0138, "step": 244590 }, { "epoch": 94.99, "learning_rate": 7.334627831715211e-06, "loss": 0.0103, "step": 244600 }, { "epoch": 94.99, "learning_rate": 7.33411003236246e-06, "loss": 0.0001, "step": 244610 }, { "epoch": 95.0, "learning_rate": 7.33359223300971e-06, "loss": 0.071, "step": 244620 }, { "epoch": 95.0, "eval_accuracy": 0.9482806052269601, "eval_loss": 0.3724736273288727, "eval_runtime": 8.291, "eval_samples_per_second": 438.427, "eval_steps_per_second": 54.879, "step": 244625 }, { "epoch": 95.0, "learning_rate": 7.333074433656958e-06, "loss": 0.0049, "step": 244630 }, { "epoch": 95.01, "learning_rate": 7.3325566343042076e-06, "loss": 0.0385, "step": 244640 }, { "epoch": 95.01, "learning_rate": 7.332038834951457e-06, "loss": 0.0183, "step": 244650 }, { "epoch": 95.01, "learning_rate": 7.331521035598707e-06, "loss": 0.0379, "step": 244660 }, { "epoch": 95.02, "learning_rate": 7.331003236245955e-06, "loss": 0.1306, "step": 244670 }, { "epoch": 95.02, "learning_rate": 7.330485436893204e-06, "loss": 0.0612, "step": 244680 }, { "epoch": 95.03, "learning_rate": 7.329967637540454e-06, "loss": 0.003, "step": 244690 }, { "epoch": 95.03, "learning_rate": 7.329449838187704e-06, "loss": 0.041, "step": 244700 }, { "epoch": 95.03, "learning_rate": 7.3289320388349515e-06, "loss": 0.1253, "step": 244710 }, { "epoch": 95.04, "learning_rate": 7.328414239482201e-06, "loss": 0.0237, "step": 244720 }, { "epoch": 95.04, "learning_rate": 7.327896440129451e-06, "loss": 0.0886, "step": 244730 }, { "epoch": 95.04, "learning_rate": 7.3273786407767e-06, "loss": 0.0273, "step": 244740 }, { "epoch": 95.05, "learning_rate": 7.326860841423948e-06, "loss": 0.0085, "step": 244750 }, { "epoch": 95.05, "learning_rate": 7.326343042071198e-06, "loss": 0.0156, "step": 244760 }, { "epoch": 95.06, "learning_rate": 7.3258252427184475e-06, "loss": 0.0104, "step": 244770 }, { "epoch": 95.06, "learning_rate": 7.325307443365697e-06, "loss": 0.0363, "step": 244780 }, { "epoch": 95.06, "learning_rate": 7.324789644012945e-06, "loss": 0.0601, "step": 244790 }, { "epoch": 95.07, "learning_rate": 7.324271844660195e-06, "loss": 0.0623, "step": 244800 }, { "epoch": 95.07, "learning_rate": 7.323754045307444e-06, "loss": 0.0008, "step": 244810 }, { "epoch": 95.08, "learning_rate": 7.323236245954694e-06, "loss": 0.1423, "step": 244820 }, { "epoch": 95.08, "learning_rate": 7.322718446601942e-06, "loss": 0.0314, "step": 244830 }, { "epoch": 95.08, "learning_rate": 7.3222006472491915e-06, "loss": 0.0365, "step": 244840 }, { "epoch": 95.09, "learning_rate": 7.321682847896441e-06, "loss": 0.0727, "step": 244850 }, { "epoch": 95.09, "learning_rate": 7.32116504854369e-06, "loss": 0.0521, "step": 244860 }, { "epoch": 95.1, "learning_rate": 7.320647249190939e-06, "loss": 0.0211, "step": 244870 }, { "epoch": 95.1, "learning_rate": 7.320129449838188e-06, "loss": 0.0272, "step": 244880 }, { "epoch": 95.1, "learning_rate": 7.319611650485438e-06, "loss": 0.0519, "step": 244890 }, { "epoch": 95.11, "learning_rate": 7.319093851132687e-06, "loss": 0.1658, "step": 244900 }, { "epoch": 95.11, "learning_rate": 7.318576051779935e-06, "loss": 0.2328, "step": 244910 }, { "epoch": 95.11, "learning_rate": 7.318058252427185e-06, "loss": 0.1275, "step": 244920 }, { "epoch": 95.12, "learning_rate": 7.317540453074435e-06, "loss": 0.1211, "step": 244930 }, { "epoch": 95.12, "learning_rate": 7.317022653721683e-06, "loss": 0.0023, "step": 244940 }, { "epoch": 95.13, "learning_rate": 7.316504854368932e-06, "loss": 0.042, "step": 244950 }, { "epoch": 95.13, "learning_rate": 7.315987055016182e-06, "loss": 0.0146, "step": 244960 }, { "epoch": 95.13, "learning_rate": 7.315469255663431e-06, "loss": 0.0147, "step": 244970 }, { "epoch": 95.14, "learning_rate": 7.31495145631068e-06, "loss": 0.1993, "step": 244980 }, { "epoch": 95.14, "learning_rate": 7.31443365695793e-06, "loss": 0.0432, "step": 244990 }, { "epoch": 95.15, "learning_rate": 7.3139158576051786e-06, "loss": 0.1412, "step": 245000 }, { "epoch": 95.15, "learning_rate": 7.313398058252428e-06, "loss": 0.0385, "step": 245010 }, { "epoch": 95.15, "learning_rate": 7.312880258899677e-06, "loss": 0.0292, "step": 245020 }, { "epoch": 95.16, "learning_rate": 7.312362459546927e-06, "loss": 0.0912, "step": 245030 }, { "epoch": 95.16, "learning_rate": 7.311844660194175e-06, "loss": 0.0002, "step": 245040 }, { "epoch": 95.17, "learning_rate": 7.311326860841424e-06, "loss": 0.0233, "step": 245050 }, { "epoch": 95.17, "learning_rate": 7.310809061488674e-06, "loss": 0.1104, "step": 245060 }, { "epoch": 95.17, "learning_rate": 7.310291262135923e-06, "loss": 0.0611, "step": 245070 }, { "epoch": 95.18, "learning_rate": 7.309773462783172e-06, "loss": 0.118, "step": 245080 }, { "epoch": 95.18, "learning_rate": 7.309255663430421e-06, "loss": 0.1542, "step": 245090 }, { "epoch": 95.18, "learning_rate": 7.3087378640776705e-06, "loss": 0.0146, "step": 245100 }, { "epoch": 95.19, "learning_rate": 7.30822006472492e-06, "loss": 0.0145, "step": 245110 }, { "epoch": 95.19, "learning_rate": 7.307702265372169e-06, "loss": 0.0113, "step": 245120 }, { "epoch": 95.2, "learning_rate": 7.307184466019418e-06, "loss": 0.0812, "step": 245130 }, { "epoch": 95.2, "learning_rate": 7.306666666666667e-06, "loss": 0.0333, "step": 245140 }, { "epoch": 95.2, "learning_rate": 7.306148867313917e-06, "loss": 0.0034, "step": 245150 }, { "epoch": 95.21, "learning_rate": 7.305631067961166e-06, "loss": 0.0264, "step": 245160 }, { "epoch": 95.21, "learning_rate": 7.3051132686084145e-06, "loss": 0.0714, "step": 245170 }, { "epoch": 95.22, "learning_rate": 7.304595469255664e-06, "loss": 0.0442, "step": 245180 }, { "epoch": 95.22, "learning_rate": 7.304077669902914e-06, "loss": 0.0357, "step": 245190 }, { "epoch": 95.22, "learning_rate": 7.303559870550162e-06, "loss": 0.0224, "step": 245200 }, { "epoch": 95.23, "learning_rate": 7.303042071197411e-06, "loss": 0.093, "step": 245210 }, { "epoch": 95.23, "learning_rate": 7.302524271844661e-06, "loss": 0.0163, "step": 245220 }, { "epoch": 95.23, "learning_rate": 7.3020064724919105e-06, "loss": 0.0381, "step": 245230 }, { "epoch": 95.24, "learning_rate": 7.301488673139158e-06, "loss": 0.0388, "step": 245240 }, { "epoch": 95.24, "learning_rate": 7.300970873786408e-06, "loss": 0.0002, "step": 245250 }, { "epoch": 95.25, "learning_rate": 7.300453074433658e-06, "loss": 0.0911, "step": 245260 }, { "epoch": 95.25, "learning_rate": 7.299935275080907e-06, "loss": 0.0089, "step": 245270 }, { "epoch": 95.25, "learning_rate": 7.299417475728155e-06, "loss": 0.023, "step": 245280 }, { "epoch": 95.26, "learning_rate": 7.298899676375405e-06, "loss": 0.1346, "step": 245290 }, { "epoch": 95.26, "learning_rate": 7.298381877022654e-06, "loss": 0.1675, "step": 245300 }, { "epoch": 95.27, "learning_rate": 7.297864077669904e-06, "loss": 0.0554, "step": 245310 }, { "epoch": 95.27, "learning_rate": 7.297346278317152e-06, "loss": 0.0379, "step": 245320 }, { "epoch": 95.27, "learning_rate": 7.2968284789644016e-06, "loss": 0.0337, "step": 245330 }, { "epoch": 95.28, "learning_rate": 7.296310679611651e-06, "loss": 0.1455, "step": 245340 }, { "epoch": 95.28, "learning_rate": 7.295792880258901e-06, "loss": 0.0911, "step": 245350 }, { "epoch": 95.29, "learning_rate": 7.295275080906149e-06, "loss": 0.0008, "step": 245360 }, { "epoch": 95.29, "learning_rate": 7.294757281553398e-06, "loss": 0.108, "step": 245370 }, { "epoch": 95.29, "learning_rate": 7.294239482200648e-06, "loss": 0.0589, "step": 245380 }, { "epoch": 95.3, "learning_rate": 7.293721682847898e-06, "loss": 0.0667, "step": 245390 }, { "epoch": 95.3, "learning_rate": 7.2932038834951455e-06, "loss": 0.2048, "step": 245400 }, { "epoch": 95.3, "learning_rate": 7.292686084142395e-06, "loss": 0.073, "step": 245410 }, { "epoch": 95.31, "learning_rate": 7.292168284789645e-06, "loss": 0.0331, "step": 245420 }, { "epoch": 95.31, "learning_rate": 7.291650485436894e-06, "loss": 0.0531, "step": 245430 }, { "epoch": 95.32, "learning_rate": 7.291132686084142e-06, "loss": 0.0003, "step": 245440 }, { "epoch": 95.32, "learning_rate": 7.290614886731392e-06, "loss": 0.0279, "step": 245450 }, { "epoch": 95.32, "learning_rate": 7.2900970873786415e-06, "loss": 0.0509, "step": 245460 }, { "epoch": 95.33, "learning_rate": 7.289579288025891e-06, "loss": 0.0225, "step": 245470 }, { "epoch": 95.33, "learning_rate": 7.289061488673139e-06, "loss": 0.0047, "step": 245480 }, { "epoch": 95.34, "learning_rate": 7.288543689320389e-06, "loss": 0.0204, "step": 245490 }, { "epoch": 95.34, "learning_rate": 7.288025889967638e-06, "loss": 0.0928, "step": 245500 }, { "epoch": 95.34, "learning_rate": 7.287508090614888e-06, "loss": 0.0571, "step": 245510 }, { "epoch": 95.35, "learning_rate": 7.286990291262136e-06, "loss": 0.0144, "step": 245520 }, { "epoch": 95.35, "learning_rate": 7.2864724919093855e-06, "loss": 0.1381, "step": 245530 }, { "epoch": 95.36, "learning_rate": 7.285954692556635e-06, "loss": 0.026, "step": 245540 }, { "epoch": 95.36, "learning_rate": 7.285436893203885e-06, "loss": 0.1009, "step": 245550 }, { "epoch": 95.36, "learning_rate": 7.2849190938511335e-06, "loss": 0.0314, "step": 245560 }, { "epoch": 95.37, "learning_rate": 7.284401294498382e-06, "loss": 0.1501, "step": 245570 }, { "epoch": 95.37, "learning_rate": 7.283883495145632e-06, "loss": 0.0393, "step": 245580 }, { "epoch": 95.37, "learning_rate": 7.2833656957928815e-06, "loss": 0.0725, "step": 245590 }, { "epoch": 95.38, "learning_rate": 7.28284789644013e-06, "loss": 0.172, "step": 245600 }, { "epoch": 95.38, "learning_rate": 7.282330097087379e-06, "loss": 0.0422, "step": 245610 }, { "epoch": 95.39, "learning_rate": 7.281812297734629e-06, "loss": 0.024, "step": 245620 }, { "epoch": 95.39, "learning_rate": 7.281294498381878e-06, "loss": 0.112, "step": 245630 }, { "epoch": 95.39, "learning_rate": 7.280776699029127e-06, "loss": 0.149, "step": 245640 }, { "epoch": 95.4, "learning_rate": 7.280258899676376e-06, "loss": 0.0339, "step": 245650 }, { "epoch": 95.4, "learning_rate": 7.279741100323625e-06, "loss": 0.0955, "step": 245660 }, { "epoch": 95.41, "learning_rate": 7.279223300970874e-06, "loss": 0.0897, "step": 245670 }, { "epoch": 95.41, "learning_rate": 7.278705501618124e-06, "loss": 0.043, "step": 245680 }, { "epoch": 95.41, "learning_rate": 7.2781877022653726e-06, "loss": 0.0914, "step": 245690 }, { "epoch": 95.42, "learning_rate": 7.277669902912622e-06, "loss": 0.1133, "step": 245700 }, { "epoch": 95.42, "learning_rate": 7.277152103559871e-06, "loss": 0.0971, "step": 245710 }, { "epoch": 95.43, "learning_rate": 7.276634304207121e-06, "loss": 0.033, "step": 245720 }, { "epoch": 95.43, "learning_rate": 7.276116504854369e-06, "loss": 0.047, "step": 245730 }, { "epoch": 95.43, "learning_rate": 7.275598705501619e-06, "loss": 0.0484, "step": 245740 }, { "epoch": 95.44, "learning_rate": 7.275080906148868e-06, "loss": 0.0011, "step": 245750 }, { "epoch": 95.44, "learning_rate": 7.274563106796117e-06, "loss": 0.1063, "step": 245760 }, { "epoch": 95.44, "learning_rate": 7.274045307443366e-06, "loss": 0.1546, "step": 245770 }, { "epoch": 95.45, "learning_rate": 7.273527508090616e-06, "loss": 0.0508, "step": 245780 }, { "epoch": 95.45, "learning_rate": 7.2730097087378645e-06, "loss": 0.1168, "step": 245790 }, { "epoch": 95.46, "learning_rate": 7.272491909385114e-06, "loss": 0.0242, "step": 245800 }, { "epoch": 95.46, "learning_rate": 7.271974110032363e-06, "loss": 0.0135, "step": 245810 }, { "epoch": 95.46, "learning_rate": 7.2714563106796125e-06, "loss": 0.0446, "step": 245820 }, { "epoch": 95.47, "learning_rate": 7.270938511326861e-06, "loss": 0.1222, "step": 245830 }, { "epoch": 95.47, "learning_rate": 7.270420711974111e-06, "loss": 0.0665, "step": 245840 }, { "epoch": 95.48, "learning_rate": 7.26990291262136e-06, "loss": 0.1829, "step": 245850 }, { "epoch": 95.48, "learning_rate": 7.2693851132686085e-06, "loss": 0.0298, "step": 245860 }, { "epoch": 95.48, "learning_rate": 7.268867313915858e-06, "loss": 0.1288, "step": 245870 }, { "epoch": 95.49, "learning_rate": 7.268349514563108e-06, "loss": 0.1544, "step": 245880 }, { "epoch": 95.49, "learning_rate": 7.2678317152103565e-06, "loss": 0.0296, "step": 245890 }, { "epoch": 95.5, "learning_rate": 7.267313915857605e-06, "loss": 0.0698, "step": 245900 }, { "epoch": 95.5, "learning_rate": 7.266796116504855e-06, "loss": 0.113, "step": 245910 }, { "epoch": 95.5, "learning_rate": 7.2662783171521045e-06, "loss": 0.0402, "step": 245920 }, { "epoch": 95.51, "learning_rate": 7.265760517799353e-06, "loss": 0.0471, "step": 245930 }, { "epoch": 95.51, "learning_rate": 7.265242718446602e-06, "loss": 0.0138, "step": 245940 }, { "epoch": 95.51, "learning_rate": 7.264724919093852e-06, "loss": 0.0555, "step": 245950 }, { "epoch": 95.52, "learning_rate": 7.264207119741101e-06, "loss": 0.0625, "step": 245960 }, { "epoch": 95.52, "learning_rate": 7.26368932038835e-06, "loss": 0.0003, "step": 245970 }, { "epoch": 95.53, "learning_rate": 7.263171521035599e-06, "loss": 0.1187, "step": 245980 }, { "epoch": 95.53, "learning_rate": 7.262653721682848e-06, "loss": 0.0399, "step": 245990 }, { "epoch": 95.53, "learning_rate": 7.262135922330098e-06, "loss": 0.0194, "step": 246000 }, { "epoch": 95.54, "learning_rate": 7.261618122977346e-06, "loss": 0.0762, "step": 246010 }, { "epoch": 95.54, "learning_rate": 7.2611003236245956e-06, "loss": 0.1149, "step": 246020 }, { "epoch": 95.55, "learning_rate": 7.260582524271845e-06, "loss": 0.0111, "step": 246030 }, { "epoch": 95.55, "learning_rate": 7.260064724919095e-06, "loss": 0.0216, "step": 246040 }, { "epoch": 95.55, "learning_rate": 7.259546925566343e-06, "loss": 0.1582, "step": 246050 }, { "epoch": 95.56, "learning_rate": 7.259029126213592e-06, "loss": 0.113, "step": 246060 }, { "epoch": 95.56, "learning_rate": 7.258511326860842e-06, "loss": 0.0207, "step": 246070 }, { "epoch": 95.57, "learning_rate": 7.257993527508092e-06, "loss": 0.0522, "step": 246080 }, { "epoch": 95.57, "learning_rate": 7.2574757281553395e-06, "loss": 0.1453, "step": 246090 }, { "epoch": 95.57, "learning_rate": 7.256957928802589e-06, "loss": 0.0619, "step": 246100 }, { "epoch": 95.58, "learning_rate": 7.256440129449839e-06, "loss": 0.0292, "step": 246110 }, { "epoch": 95.58, "learning_rate": 7.255922330097088e-06, "loss": 0.0181, "step": 246120 }, { "epoch": 95.58, "learning_rate": 7.255404530744338e-06, "loss": 0.0787, "step": 246130 }, { "epoch": 95.59, "learning_rate": 7.254886731391586e-06, "loss": 0.1276, "step": 246140 }, { "epoch": 95.59, "learning_rate": 7.2543689320388355e-06, "loss": 0.0256, "step": 246150 }, { "epoch": 95.6, "learning_rate": 7.253851132686085e-06, "loss": 0.0245, "step": 246160 }, { "epoch": 95.6, "learning_rate": 7.253333333333335e-06, "loss": 0.0542, "step": 246170 }, { "epoch": 95.6, "learning_rate": 7.252815533980583e-06, "loss": 0.1703, "step": 246180 }, { "epoch": 95.61, "learning_rate": 7.252297734627832e-06, "loss": 0.0797, "step": 246190 }, { "epoch": 95.61, "learning_rate": 7.251779935275082e-06, "loss": 0.0938, "step": 246200 }, { "epoch": 95.62, "learning_rate": 7.2512621359223315e-06, "loss": 0.0736, "step": 246210 }, { "epoch": 95.62, "learning_rate": 7.2507443365695795e-06, "loss": 0.0429, "step": 246220 }, { "epoch": 95.62, "learning_rate": 7.250226537216829e-06, "loss": 0.0833, "step": 246230 }, { "epoch": 95.63, "learning_rate": 7.249708737864079e-06, "loss": 0.174, "step": 246240 }, { "epoch": 95.63, "learning_rate": 7.249190938511328e-06, "loss": 0.0499, "step": 246250 }, { "epoch": 95.63, "learning_rate": 7.248673139158576e-06, "loss": 0.0458, "step": 246260 }, { "epoch": 95.64, "learning_rate": 7.248155339805826e-06, "loss": 0.061, "step": 246270 }, { "epoch": 95.64, "learning_rate": 7.2476375404530755e-06, "loss": 0.0004, "step": 246280 }, { "epoch": 95.65, "learning_rate": 7.247119741100325e-06, "loss": 0.1346, "step": 246290 }, { "epoch": 95.65, "learning_rate": 7.246601941747573e-06, "loss": 0.0189, "step": 246300 }, { "epoch": 95.65, "learning_rate": 7.246084142394823e-06, "loss": 0.0581, "step": 246310 }, { "epoch": 95.66, "learning_rate": 7.245566343042072e-06, "loss": 0.0969, "step": 246320 }, { "epoch": 95.66, "learning_rate": 7.245048543689321e-06, "loss": 0.0275, "step": 246330 }, { "epoch": 95.67, "learning_rate": 7.24453074433657e-06, "loss": 0.0323, "step": 246340 }, { "epoch": 95.67, "learning_rate": 7.244012944983819e-06, "loss": 0.0595, "step": 246350 }, { "epoch": 95.67, "learning_rate": 7.243495145631069e-06, "loss": 0.0963, "step": 246360 }, { "epoch": 95.68, "learning_rate": 7.242977346278318e-06, "loss": 0.0536, "step": 246370 }, { "epoch": 95.68, "learning_rate": 7.2424595469255666e-06, "loss": 0.0002, "step": 246380 }, { "epoch": 95.69, "learning_rate": 7.241941747572816e-06, "loss": 0.0885, "step": 246390 }, { "epoch": 95.69, "learning_rate": 7.241423948220066e-06, "loss": 0.0626, "step": 246400 }, { "epoch": 95.69, "learning_rate": 7.240906148867315e-06, "loss": 0.1123, "step": 246410 }, { "epoch": 95.7, "learning_rate": 7.240388349514563e-06, "loss": 0.1517, "step": 246420 }, { "epoch": 95.7, "learning_rate": 7.239870550161813e-06, "loss": 0.0767, "step": 246430 }, { "epoch": 95.7, "learning_rate": 7.239352750809063e-06, "loss": 0.0441, "step": 246440 }, { "epoch": 95.71, "learning_rate": 7.238834951456311e-06, "loss": 0.0603, "step": 246450 }, { "epoch": 95.71, "learning_rate": 7.23831715210356e-06, "loss": 0.131, "step": 246460 }, { "epoch": 95.72, "learning_rate": 7.23779935275081e-06, "loss": 0.0352, "step": 246470 }, { "epoch": 95.72, "learning_rate": 7.2372815533980585e-06, "loss": 0.0181, "step": 246480 }, { "epoch": 95.72, "learning_rate": 7.236763754045308e-06, "loss": 0.0187, "step": 246490 }, { "epoch": 95.73, "learning_rate": 7.236245954692557e-06, "loss": 0.0732, "step": 246500 }, { "epoch": 95.73, "learning_rate": 7.2357281553398065e-06, "loss": 0.0788, "step": 246510 }, { "epoch": 95.74, "learning_rate": 7.235210355987055e-06, "loss": 0.0341, "step": 246520 }, { "epoch": 95.74, "learning_rate": 7.234692556634305e-06, "loss": 0.1159, "step": 246530 }, { "epoch": 95.74, "learning_rate": 7.234174757281554e-06, "loss": 0.2356, "step": 246540 }, { "epoch": 95.75, "learning_rate": 7.233656957928803e-06, "loss": 0.0041, "step": 246550 }, { "epoch": 95.75, "learning_rate": 7.233139158576052e-06, "loss": 0.0111, "step": 246560 }, { "epoch": 95.76, "learning_rate": 7.232621359223302e-06, "loss": 0.0641, "step": 246570 }, { "epoch": 95.76, "learning_rate": 7.2321035598705505e-06, "loss": 0.1044, "step": 246580 }, { "epoch": 95.76, "learning_rate": 7.2315857605178e-06, "loss": 0.0005, "step": 246590 }, { "epoch": 95.77, "learning_rate": 7.231067961165049e-06, "loss": 0.0523, "step": 246600 }, { "epoch": 95.77, "learning_rate": 7.2305501618122985e-06, "loss": 0.0525, "step": 246610 }, { "epoch": 95.77, "learning_rate": 7.230032362459547e-06, "loss": 0.0911, "step": 246620 }, { "epoch": 95.78, "learning_rate": 7.229514563106797e-06, "loss": 0.1091, "step": 246630 }, { "epoch": 95.78, "learning_rate": 7.228996763754046e-06, "loss": 0.0457, "step": 246640 }, { "epoch": 95.79, "learning_rate": 7.228478964401295e-06, "loss": 0.0365, "step": 246650 }, { "epoch": 95.79, "learning_rate": 7.227961165048544e-06, "loss": 0.0369, "step": 246660 }, { "epoch": 95.79, "learning_rate": 7.227443365695793e-06, "loss": 0.0456, "step": 246670 }, { "epoch": 95.8, "learning_rate": 7.226925566343042e-06, "loss": 0.091, "step": 246680 }, { "epoch": 95.8, "learning_rate": 7.226407766990292e-06, "loss": 0.037, "step": 246690 }, { "epoch": 95.81, "learning_rate": 7.225889967637542e-06, "loss": 0.0381, "step": 246700 }, { "epoch": 95.81, "learning_rate": 7.2253721682847896e-06, "loss": 0.0078, "step": 246710 }, { "epoch": 95.81, "learning_rate": 7.224854368932039e-06, "loss": 0.0085, "step": 246720 }, { "epoch": 95.82, "learning_rate": 7.224336569579289e-06, "loss": 0.0524, "step": 246730 }, { "epoch": 95.82, "learning_rate": 7.223818770226538e-06, "loss": 0.0325, "step": 246740 }, { "epoch": 95.83, "learning_rate": 7.223300970873786e-06, "loss": 0.0107, "step": 246750 }, { "epoch": 95.83, "learning_rate": 7.222783171521036e-06, "loss": 0.0523, "step": 246760 }, { "epoch": 95.83, "learning_rate": 7.222265372168286e-06, "loss": 0.0529, "step": 246770 }, { "epoch": 95.84, "learning_rate": 7.221747572815535e-06, "loss": 0.0606, "step": 246780 }, { "epoch": 95.84, "learning_rate": 7.221229773462783e-06, "loss": 0.016, "step": 246790 }, { "epoch": 95.84, "learning_rate": 7.220711974110033e-06, "loss": 0.0627, "step": 246800 }, { "epoch": 95.85, "learning_rate": 7.220194174757282e-06, "loss": 0.0002, "step": 246810 }, { "epoch": 95.85, "learning_rate": 7.219676375404532e-06, "loss": 0.062, "step": 246820 }, { "epoch": 95.86, "learning_rate": 7.21915857605178e-06, "loss": 0.1087, "step": 246830 }, { "epoch": 95.86, "learning_rate": 7.2186407766990295e-06, "loss": 0.0852, "step": 246840 }, { "epoch": 95.86, "learning_rate": 7.218122977346279e-06, "loss": 0.0002, "step": 246850 }, { "epoch": 95.87, "learning_rate": 7.217605177993529e-06, "loss": 0.1184, "step": 246860 }, { "epoch": 95.87, "learning_rate": 7.217087378640777e-06, "loss": 0.0368, "step": 246870 }, { "epoch": 95.88, "learning_rate": 7.216569579288026e-06, "loss": 0.1051, "step": 246880 }, { "epoch": 95.88, "learning_rate": 7.216051779935276e-06, "loss": 0.1661, "step": 246890 }, { "epoch": 95.88, "learning_rate": 7.2155339805825255e-06, "loss": 0.0513, "step": 246900 }, { "epoch": 95.89, "learning_rate": 7.2150161812297735e-06, "loss": 0.0354, "step": 246910 }, { "epoch": 95.89, "learning_rate": 7.214498381877023e-06, "loss": 0.0887, "step": 246920 }, { "epoch": 95.9, "learning_rate": 7.213980582524273e-06, "loss": 0.0697, "step": 246930 }, { "epoch": 95.9, "learning_rate": 7.213462783171522e-06, "loss": 0.0464, "step": 246940 }, { "epoch": 95.9, "learning_rate": 7.21294498381877e-06, "loss": 0.0716, "step": 246950 }, { "epoch": 95.91, "learning_rate": 7.21242718446602e-06, "loss": 0.065, "step": 246960 }, { "epoch": 95.91, "learning_rate": 7.2119093851132695e-06, "loss": 0.0833, "step": 246970 }, { "epoch": 95.91, "learning_rate": 7.211391585760519e-06, "loss": 0.0228, "step": 246980 }, { "epoch": 95.92, "learning_rate": 7.210873786407767e-06, "loss": 0.0016, "step": 246990 }, { "epoch": 95.92, "learning_rate": 7.210355987055017e-06, "loss": 0.0003, "step": 247000 }, { "epoch": 95.93, "learning_rate": 7.209838187702266e-06, "loss": 0.0665, "step": 247010 }, { "epoch": 95.93, "learning_rate": 7.209320388349516e-06, "loss": 0.006, "step": 247020 }, { "epoch": 95.93, "learning_rate": 7.208802588996764e-06, "loss": 0.0606, "step": 247030 }, { "epoch": 95.94, "learning_rate": 7.208284789644013e-06, "loss": 0.0434, "step": 247040 }, { "epoch": 95.94, "learning_rate": 7.207766990291263e-06, "loss": 0.0156, "step": 247050 }, { "epoch": 95.95, "learning_rate": 7.207249190938513e-06, "loss": 0.0765, "step": 247060 }, { "epoch": 95.95, "learning_rate": 7.2067313915857606e-06, "loss": 0.0931, "step": 247070 }, { "epoch": 95.95, "learning_rate": 7.20621359223301e-06, "loss": 0.1202, "step": 247080 }, { "epoch": 95.96, "learning_rate": 7.20569579288026e-06, "loss": 0.0226, "step": 247090 }, { "epoch": 95.96, "learning_rate": 7.205177993527509e-06, "loss": 0.0421, "step": 247100 }, { "epoch": 95.97, "learning_rate": 7.204660194174757e-06, "loss": 0.0209, "step": 247110 }, { "epoch": 95.97, "learning_rate": 7.204142394822007e-06, "loss": 0.0313, "step": 247120 }, { "epoch": 95.97, "learning_rate": 7.203624595469257e-06, "loss": 0.0268, "step": 247130 }, { "epoch": 95.98, "learning_rate": 7.203106796116505e-06, "loss": 0.0032, "step": 247140 }, { "epoch": 95.98, "learning_rate": 7.202588996763754e-06, "loss": 0.0272, "step": 247150 }, { "epoch": 95.98, "learning_rate": 7.202071197411004e-06, "loss": 0.0433, "step": 247160 }, { "epoch": 95.99, "learning_rate": 7.201553398058253e-06, "loss": 0.0104, "step": 247170 }, { "epoch": 95.99, "learning_rate": 7.201035598705502e-06, "loss": 0.0582, "step": 247180 }, { "epoch": 96.0, "learning_rate": 7.200517799352751e-06, "loss": 0.0336, "step": 247190 }, { "epoch": 96.0, "learning_rate": 7.2000000000000005e-06, "loss": 0.2079, "step": 247200 }, { "epoch": 96.0, "eval_accuracy": 0.9482806052269601, "eval_loss": 0.36884090304374695, "eval_runtime": 8.2807, "eval_samples_per_second": 438.975, "eval_steps_per_second": 54.947, "step": 247200 }, { "epoch": 96.0, "learning_rate": 7.19948220064725e-06, "loss": 0.0115, "step": 247210 }, { "epoch": 96.01, "learning_rate": 7.198964401294499e-06, "loss": 0.0944, "step": 247220 }, { "epoch": 96.01, "learning_rate": 7.1984466019417485e-06, "loss": 0.0194, "step": 247230 }, { "epoch": 96.02, "learning_rate": 7.197928802588997e-06, "loss": 0.0206, "step": 247240 }, { "epoch": 96.02, "learning_rate": 7.197411003236247e-06, "loss": 0.0509, "step": 247250 }, { "epoch": 96.02, "learning_rate": 7.196893203883496e-06, "loss": 0.0372, "step": 247260 }, { "epoch": 96.03, "learning_rate": 7.196375404530745e-06, "loss": 0.0815, "step": 247270 }, { "epoch": 96.03, "learning_rate": 7.195857605177994e-06, "loss": 0.0019, "step": 247280 }, { "epoch": 96.03, "learning_rate": 7.195339805825244e-06, "loss": 0.095, "step": 247290 }, { "epoch": 96.04, "learning_rate": 7.1948220064724925e-06, "loss": 0.0135, "step": 247300 }, { "epoch": 96.04, "learning_rate": 7.194304207119742e-06, "loss": 0.01, "step": 247310 }, { "epoch": 96.05, "learning_rate": 7.193786407766991e-06, "loss": 0.0174, "step": 247320 }, { "epoch": 96.05, "learning_rate": 7.19326860841424e-06, "loss": 0.0552, "step": 247330 }, { "epoch": 96.05, "learning_rate": 7.192750809061489e-06, "loss": 0.1177, "step": 247340 }, { "epoch": 96.06, "learning_rate": 7.192233009708739e-06, "loss": 0.0865, "step": 247350 }, { "epoch": 96.06, "learning_rate": 7.191715210355988e-06, "loss": 0.0885, "step": 247360 }, { "epoch": 96.07, "learning_rate": 7.191197411003236e-06, "loss": 0.0114, "step": 247370 }, { "epoch": 96.07, "learning_rate": 7.190679611650486e-06, "loss": 0.0001, "step": 247380 }, { "epoch": 96.07, "learning_rate": 7.190161812297736e-06, "loss": 0.1163, "step": 247390 }, { "epoch": 96.08, "learning_rate": 7.189644012944984e-06, "loss": 0.047, "step": 247400 }, { "epoch": 96.08, "learning_rate": 7.189126213592233e-06, "loss": 0.1093, "step": 247410 }, { "epoch": 96.09, "learning_rate": 7.188608414239483e-06, "loss": 0.0845, "step": 247420 }, { "epoch": 96.09, "learning_rate": 7.188090614886732e-06, "loss": 0.0521, "step": 247430 }, { "epoch": 96.09, "learning_rate": 7.187572815533981e-06, "loss": 0.0514, "step": 247440 }, { "epoch": 96.1, "learning_rate": 7.18705501618123e-06, "loss": 0.0385, "step": 247450 }, { "epoch": 96.1, "learning_rate": 7.18653721682848e-06, "loss": 0.004, "step": 247460 }, { "epoch": 96.1, "learning_rate": 7.186019417475729e-06, "loss": 0.1586, "step": 247470 }, { "epoch": 96.11, "learning_rate": 7.185501618122977e-06, "loss": 0.0027, "step": 247480 }, { "epoch": 96.11, "learning_rate": 7.184983818770227e-06, "loss": 0.0364, "step": 247490 }, { "epoch": 96.12, "learning_rate": 7.184466019417476e-06, "loss": 0.0447, "step": 247500 }, { "epoch": 96.12, "learning_rate": 7.183948220064726e-06, "loss": 0.0228, "step": 247510 }, { "epoch": 96.12, "learning_rate": 7.183430420711974e-06, "loss": 0.0812, "step": 247520 }, { "epoch": 96.13, "learning_rate": 7.1829126213592235e-06, "loss": 0.054, "step": 247530 }, { "epoch": 96.13, "learning_rate": 7.182394822006473e-06, "loss": 0.0872, "step": 247540 }, { "epoch": 96.14, "learning_rate": 7.181877022653723e-06, "loss": 0.0234, "step": 247550 }, { "epoch": 96.14, "learning_rate": 7.181359223300971e-06, "loss": 0.0471, "step": 247560 }, { "epoch": 96.14, "learning_rate": 7.18084142394822e-06, "loss": 0.0147, "step": 247570 }, { "epoch": 96.15, "learning_rate": 7.18032362459547e-06, "loss": 0.082, "step": 247580 }, { "epoch": 96.15, "learning_rate": 7.1798058252427195e-06, "loss": 0.0611, "step": 247590 }, { "epoch": 96.16, "learning_rate": 7.1792880258899675e-06, "loss": 0.0309, "step": 247600 }, { "epoch": 96.16, "learning_rate": 7.178770226537217e-06, "loss": 0.0054, "step": 247610 }, { "epoch": 96.16, "learning_rate": 7.178252427184467e-06, "loss": 0.014, "step": 247620 }, { "epoch": 96.17, "learning_rate": 7.177734627831716e-06, "loss": 0.0001, "step": 247630 }, { "epoch": 96.17, "learning_rate": 7.177216828478964e-06, "loss": 0.0272, "step": 247640 }, { "epoch": 96.17, "learning_rate": 7.176699029126214e-06, "loss": 0.0316, "step": 247650 }, { "epoch": 96.18, "learning_rate": 7.1761812297734635e-06, "loss": 0.0974, "step": 247660 }, { "epoch": 96.18, "learning_rate": 7.175663430420713e-06, "loss": 0.0412, "step": 247670 }, { "epoch": 96.19, "learning_rate": 7.175145631067961e-06, "loss": 0.1019, "step": 247680 }, { "epoch": 96.19, "learning_rate": 7.174627831715211e-06, "loss": 0.0394, "step": 247690 }, { "epoch": 96.19, "learning_rate": 7.17411003236246e-06, "loss": 0.0074, "step": 247700 }, { "epoch": 96.2, "learning_rate": 7.17359223300971e-06, "loss": 0.1972, "step": 247710 }, { "epoch": 96.2, "learning_rate": 7.173074433656958e-06, "loss": 0.0999, "step": 247720 }, { "epoch": 96.21, "learning_rate": 7.172556634304207e-06, "loss": 0.0584, "step": 247730 }, { "epoch": 96.21, "learning_rate": 7.172038834951457e-06, "loss": 0.0002, "step": 247740 }, { "epoch": 96.21, "learning_rate": 7.171521035598707e-06, "loss": 0.0374, "step": 247750 }, { "epoch": 96.22, "learning_rate": 7.1710032362459546e-06, "loss": 0.1763, "step": 247760 }, { "epoch": 96.22, "learning_rate": 7.170485436893204e-06, "loss": 0.0472, "step": 247770 }, { "epoch": 96.23, "learning_rate": 7.169967637540454e-06, "loss": 0.0876, "step": 247780 }, { "epoch": 96.23, "learning_rate": 7.169449838187703e-06, "loss": 0.0608, "step": 247790 }, { "epoch": 96.23, "learning_rate": 7.168932038834952e-06, "loss": 0.0241, "step": 247800 }, { "epoch": 96.24, "learning_rate": 7.168414239482201e-06, "loss": 0.027, "step": 247810 }, { "epoch": 96.24, "learning_rate": 7.167896440129451e-06, "loss": 0.0006, "step": 247820 }, { "epoch": 96.24, "learning_rate": 7.1673786407767e-06, "loss": 0.0821, "step": 247830 }, { "epoch": 96.25, "learning_rate": 7.166860841423949e-06, "loss": 0.0501, "step": 247840 }, { "epoch": 96.25, "learning_rate": 7.166343042071198e-06, "loss": 0.0482, "step": 247850 }, { "epoch": 96.26, "learning_rate": 7.165825242718447e-06, "loss": 0.0002, "step": 247860 }, { "epoch": 96.26, "learning_rate": 7.165307443365697e-06, "loss": 0.0433, "step": 247870 }, { "epoch": 96.26, "learning_rate": 7.164789644012946e-06, "loss": 0.0376, "step": 247880 }, { "epoch": 96.27, "learning_rate": 7.1642718446601945e-06, "loss": 0.1897, "step": 247890 }, { "epoch": 96.27, "learning_rate": 7.163754045307444e-06, "loss": 0.045, "step": 247900 }, { "epoch": 96.28, "learning_rate": 7.163236245954694e-06, "loss": 0.0159, "step": 247910 }, { "epoch": 96.28, "learning_rate": 7.1627184466019425e-06, "loss": 0.1948, "step": 247920 }, { "epoch": 96.28, "learning_rate": 7.162200647249191e-06, "loss": 0.0099, "step": 247930 }, { "epoch": 96.29, "learning_rate": 7.161682847896441e-06, "loss": 0.0708, "step": 247940 }, { "epoch": 96.29, "learning_rate": 7.16116504854369e-06, "loss": 0.1057, "step": 247950 }, { "epoch": 96.3, "learning_rate": 7.160647249190939e-06, "loss": 0.0038, "step": 247960 }, { "epoch": 96.3, "learning_rate": 7.160129449838188e-06, "loss": 0.0236, "step": 247970 }, { "epoch": 96.3, "learning_rate": 7.159611650485438e-06, "loss": 0.0692, "step": 247980 }, { "epoch": 96.31, "learning_rate": 7.1590938511326865e-06, "loss": 0.1074, "step": 247990 }, { "epoch": 96.31, "learning_rate": 7.158576051779936e-06, "loss": 0.0571, "step": 248000 }, { "epoch": 96.31, "learning_rate": 7.158058252427185e-06, "loss": 0.0557, "step": 248010 }, { "epoch": 96.32, "learning_rate": 7.1575404530744345e-06, "loss": 0.1576, "step": 248020 }, { "epoch": 96.32, "learning_rate": 7.157022653721683e-06, "loss": 0.0176, "step": 248030 }, { "epoch": 96.33, "learning_rate": 7.156504854368933e-06, "loss": 0.0459, "step": 248040 }, { "epoch": 96.33, "learning_rate": 7.155987055016182e-06, "loss": 0.1444, "step": 248050 }, { "epoch": 96.33, "learning_rate": 7.155469255663431e-06, "loss": 0.0125, "step": 248060 }, { "epoch": 96.34, "learning_rate": 7.15495145631068e-06, "loss": 0.0667, "step": 248070 }, { "epoch": 96.34, "learning_rate": 7.15443365695793e-06, "loss": 0.0868, "step": 248080 }, { "epoch": 96.35, "learning_rate": 7.153915857605178e-06, "loss": 0.1282, "step": 248090 }, { "epoch": 96.35, "learning_rate": 7.153398058252428e-06, "loss": 0.0337, "step": 248100 }, { "epoch": 96.35, "learning_rate": 7.152880258899677e-06, "loss": 0.0128, "step": 248110 }, { "epoch": 96.36, "learning_rate": 7.152362459546926e-06, "loss": 0.0661, "step": 248120 }, { "epoch": 96.36, "learning_rate": 7.151844660194175e-06, "loss": 0.0992, "step": 248130 }, { "epoch": 96.37, "learning_rate": 7.151326860841424e-06, "loss": 0.1058, "step": 248140 }, { "epoch": 96.37, "learning_rate": 7.150809061488674e-06, "loss": 0.0173, "step": 248150 }, { "epoch": 96.37, "learning_rate": 7.150291262135923e-06, "loss": 0.0943, "step": 248160 }, { "epoch": 96.38, "learning_rate": 7.149773462783172e-06, "loss": 0.0996, "step": 248170 }, { "epoch": 96.38, "learning_rate": 7.149255663430421e-06, "loss": 0.0376, "step": 248180 }, { "epoch": 96.38, "learning_rate": 7.14873786407767e-06, "loss": 0.0226, "step": 248190 }, { "epoch": 96.39, "learning_rate": 7.14822006472492e-06, "loss": 0.0542, "step": 248200 }, { "epoch": 96.39, "learning_rate": 7.147702265372169e-06, "loss": 0.0413, "step": 248210 }, { "epoch": 96.4, "learning_rate": 7.1471844660194175e-06, "loss": 0.1502, "step": 248220 }, { "epoch": 96.4, "learning_rate": 7.146666666666667e-06, "loss": 0.0124, "step": 248230 }, { "epoch": 96.4, "learning_rate": 7.146148867313917e-06, "loss": 0.0535, "step": 248240 }, { "epoch": 96.41, "learning_rate": 7.1456310679611655e-06, "loss": 0.0032, "step": 248250 }, { "epoch": 96.41, "learning_rate": 7.145113268608414e-06, "loss": 0.022, "step": 248260 }, { "epoch": 96.42, "learning_rate": 7.144595469255664e-06, "loss": 0.0848, "step": 248270 }, { "epoch": 96.42, "learning_rate": 7.1440776699029135e-06, "loss": 0.0009, "step": 248280 }, { "epoch": 96.42, "learning_rate": 7.1435598705501615e-06, "loss": 0.0429, "step": 248290 }, { "epoch": 96.43, "learning_rate": 7.143042071197411e-06, "loss": 0.0245, "step": 248300 }, { "epoch": 96.43, "learning_rate": 7.142524271844661e-06, "loss": 0.0604, "step": 248310 }, { "epoch": 96.43, "learning_rate": 7.14200647249191e-06, "loss": 0.0618, "step": 248320 }, { "epoch": 96.44, "learning_rate": 7.141488673139158e-06, "loss": 0.0712, "step": 248330 }, { "epoch": 96.44, "learning_rate": 7.140970873786408e-06, "loss": 0.0482, "step": 248340 }, { "epoch": 96.45, "learning_rate": 7.1404530744336575e-06, "loss": 0.0775, "step": 248350 }, { "epoch": 96.45, "learning_rate": 7.139935275080907e-06, "loss": 0.0865, "step": 248360 }, { "epoch": 96.45, "learning_rate": 7.139417475728157e-06, "loss": 0.1499, "step": 248370 }, { "epoch": 96.46, "learning_rate": 7.138899676375405e-06, "loss": 0.0912, "step": 248380 }, { "epoch": 96.46, "learning_rate": 7.138381877022654e-06, "loss": 0.0621, "step": 248390 }, { "epoch": 96.47, "learning_rate": 7.137864077669904e-06, "loss": 0.1173, "step": 248400 }, { "epoch": 96.47, "learning_rate": 7.1373462783171535e-06, "loss": 0.0212, "step": 248410 }, { "epoch": 96.47, "learning_rate": 7.136828478964401e-06, "loss": 0.0707, "step": 248420 }, { "epoch": 96.48, "learning_rate": 7.136310679611651e-06, "loss": 0.0034, "step": 248430 }, { "epoch": 96.48, "learning_rate": 7.135792880258901e-06, "loss": 0.0112, "step": 248440 }, { "epoch": 96.49, "learning_rate": 7.13527508090615e-06, "loss": 0.0347, "step": 248450 }, { "epoch": 96.49, "learning_rate": 7.134757281553398e-06, "loss": 0.0198, "step": 248460 }, { "epoch": 96.49, "learning_rate": 7.134239482200648e-06, "loss": 0.0573, "step": 248470 }, { "epoch": 96.5, "learning_rate": 7.133721682847897e-06, "loss": 0.0111, "step": 248480 }, { "epoch": 96.5, "learning_rate": 7.133203883495147e-06, "loss": 0.0065, "step": 248490 }, { "epoch": 96.5, "learning_rate": 7.132686084142395e-06, "loss": 0.0243, "step": 248500 }, { "epoch": 96.51, "learning_rate": 7.132168284789645e-06, "loss": 0.0511, "step": 248510 }, { "epoch": 96.51, "learning_rate": 7.131650485436894e-06, "loss": 0.0274, "step": 248520 }, { "epoch": 96.52, "learning_rate": 7.131132686084144e-06, "loss": 0.0796, "step": 248530 }, { "epoch": 96.52, "learning_rate": 7.130614886731392e-06, "loss": 0.0351, "step": 248540 }, { "epoch": 96.52, "learning_rate": 7.130097087378641e-06, "loss": 0.0693, "step": 248550 }, { "epoch": 96.53, "learning_rate": 7.129579288025891e-06, "loss": 0.1469, "step": 248560 }, { "epoch": 96.53, "learning_rate": 7.129061488673141e-06, "loss": 0.0034, "step": 248570 }, { "epoch": 96.54, "learning_rate": 7.1285436893203885e-06, "loss": 0.0005, "step": 248580 }, { "epoch": 96.54, "learning_rate": 7.128025889967638e-06, "loss": 0.0283, "step": 248590 }, { "epoch": 96.54, "learning_rate": 7.127508090614888e-06, "loss": 0.0145, "step": 248600 }, { "epoch": 96.55, "learning_rate": 7.1269902912621365e-06, "loss": 0.0506, "step": 248610 }, { "epoch": 96.55, "learning_rate": 7.126472491909385e-06, "loss": 0.0526, "step": 248620 }, { "epoch": 96.56, "learning_rate": 7.125954692556635e-06, "loss": 0.0332, "step": 248630 }, { "epoch": 96.56, "learning_rate": 7.1254368932038845e-06, "loss": 0.0002, "step": 248640 }, { "epoch": 96.56, "learning_rate": 7.124919093851133e-06, "loss": 0.0231, "step": 248650 }, { "epoch": 96.57, "learning_rate": 7.124401294498382e-06, "loss": 0.0241, "step": 248660 }, { "epoch": 96.57, "learning_rate": 7.123883495145632e-06, "loss": 0.0655, "step": 248670 }, { "epoch": 96.57, "learning_rate": 7.123365695792881e-06, "loss": 0.0712, "step": 248680 }, { "epoch": 96.58, "learning_rate": 7.12284789644013e-06, "loss": 0.1203, "step": 248690 }, { "epoch": 96.58, "learning_rate": 7.122330097087379e-06, "loss": 0.0005, "step": 248700 }, { "epoch": 96.59, "learning_rate": 7.1218122977346285e-06, "loss": 0.0002, "step": 248710 }, { "epoch": 96.59, "learning_rate": 7.121294498381878e-06, "loss": 0.0439, "step": 248720 }, { "epoch": 96.59, "learning_rate": 7.120776699029127e-06, "loss": 0.0776, "step": 248730 }, { "epoch": 96.6, "learning_rate": 7.120258899676376e-06, "loss": 0.1602, "step": 248740 }, { "epoch": 96.6, "learning_rate": 7.119741100323625e-06, "loss": 0.0134, "step": 248750 }, { "epoch": 96.61, "learning_rate": 7.119223300970874e-06, "loss": 0.11, "step": 248760 }, { "epoch": 96.61, "learning_rate": 7.118705501618124e-06, "loss": 0.0118, "step": 248770 }, { "epoch": 96.61, "learning_rate": 7.118187702265372e-06, "loss": 0.0038, "step": 248780 }, { "epoch": 96.62, "learning_rate": 7.117669902912622e-06, "loss": 0.0061, "step": 248790 }, { "epoch": 96.62, "learning_rate": 7.117152103559871e-06, "loss": 0.0283, "step": 248800 }, { "epoch": 96.63, "learning_rate": 7.11663430420712e-06, "loss": 0.0005, "step": 248810 }, { "epoch": 96.63, "learning_rate": 7.116116504854369e-06, "loss": 0.1237, "step": 248820 }, { "epoch": 96.63, "learning_rate": 7.115598705501619e-06, "loss": 0.1111, "step": 248830 }, { "epoch": 96.64, "learning_rate": 7.1150809061488676e-06, "loss": 0.039, "step": 248840 }, { "epoch": 96.64, "learning_rate": 7.114563106796117e-06, "loss": 0.0541, "step": 248850 }, { "epoch": 96.64, "learning_rate": 7.114045307443366e-06, "loss": 0.083, "step": 248860 }, { "epoch": 96.65, "learning_rate": 7.113527508090616e-06, "loss": 0.0186, "step": 248870 }, { "epoch": 96.65, "learning_rate": 7.113009708737864e-06, "loss": 0.0012, "step": 248880 }, { "epoch": 96.66, "learning_rate": 7.112491909385114e-06, "loss": 0.0392, "step": 248890 }, { "epoch": 96.66, "learning_rate": 7.111974110032363e-06, "loss": 0.0093, "step": 248900 }, { "epoch": 96.66, "learning_rate": 7.111456310679612e-06, "loss": 0.1545, "step": 248910 }, { "epoch": 96.67, "learning_rate": 7.110938511326861e-06, "loss": 0.1075, "step": 248920 }, { "epoch": 96.67, "learning_rate": 7.110420711974111e-06, "loss": 0.0119, "step": 248930 }, { "epoch": 96.68, "learning_rate": 7.10990291262136e-06, "loss": 0.0307, "step": 248940 }, { "epoch": 96.68, "learning_rate": 7.109385113268608e-06, "loss": 0.0497, "step": 248950 }, { "epoch": 96.68, "learning_rate": 7.108867313915858e-06, "loss": 0.0252, "step": 248960 }, { "epoch": 96.69, "learning_rate": 7.1083495145631075e-06, "loss": 0.089, "step": 248970 }, { "epoch": 96.69, "learning_rate": 7.107831715210357e-06, "loss": 0.0501, "step": 248980 }, { "epoch": 96.7, "learning_rate": 7.107313915857605e-06, "loss": 0.0745, "step": 248990 }, { "epoch": 96.7, "learning_rate": 7.106796116504855e-06, "loss": 0.0375, "step": 249000 }, { "epoch": 96.7, "learning_rate": 7.106278317152104e-06, "loss": 0.1, "step": 249010 }, { "epoch": 96.71, "learning_rate": 7.105760517799354e-06, "loss": 0.043, "step": 249020 }, { "epoch": 96.71, "learning_rate": 7.105242718446602e-06, "loss": 0.0309, "step": 249030 }, { "epoch": 96.71, "learning_rate": 7.1047249190938515e-06, "loss": 0.0462, "step": 249040 }, { "epoch": 96.72, "learning_rate": 7.104207119741101e-06, "loss": 0.0824, "step": 249050 }, { "epoch": 96.72, "learning_rate": 7.103689320388351e-06, "loss": 0.0669, "step": 249060 }, { "epoch": 96.73, "learning_rate": 7.103171521035599e-06, "loss": 0.0759, "step": 249070 }, { "epoch": 96.73, "learning_rate": 7.102653721682848e-06, "loss": 0.0592, "step": 249080 }, { "epoch": 96.73, "learning_rate": 7.102135922330098e-06, "loss": 0.0317, "step": 249090 }, { "epoch": 96.74, "learning_rate": 7.1016181229773475e-06, "loss": 0.0005, "step": 249100 }, { "epoch": 96.74, "learning_rate": 7.101100323624595e-06, "loss": 0.0163, "step": 249110 }, { "epoch": 96.75, "learning_rate": 7.100582524271845e-06, "loss": 0.0443, "step": 249120 }, { "epoch": 96.75, "learning_rate": 7.100064724919095e-06, "loss": 0.1801, "step": 249130 }, { "epoch": 96.75, "learning_rate": 7.099546925566344e-06, "loss": 0.0523, "step": 249140 }, { "epoch": 96.76, "learning_rate": 7.099029126213592e-06, "loss": 0.0226, "step": 249150 }, { "epoch": 96.76, "learning_rate": 7.098511326860842e-06, "loss": 0.0277, "step": 249160 }, { "epoch": 96.77, "learning_rate": 7.097993527508091e-06, "loss": 0.0012, "step": 249170 }, { "epoch": 96.77, "learning_rate": 7.097475728155341e-06, "loss": 0.0077, "step": 249180 }, { "epoch": 96.77, "learning_rate": 7.096957928802589e-06, "loss": 0.0731, "step": 249190 }, { "epoch": 96.78, "learning_rate": 7.096440129449839e-06, "loss": 0.114, "step": 249200 }, { "epoch": 96.78, "learning_rate": 7.095922330097088e-06, "loss": 0.0102, "step": 249210 }, { "epoch": 96.78, "learning_rate": 7.095404530744338e-06, "loss": 0.0664, "step": 249220 }, { "epoch": 96.79, "learning_rate": 7.094886731391586e-06, "loss": 0.0995, "step": 249230 }, { "epoch": 96.79, "learning_rate": 7.094368932038835e-06, "loss": 0.0846, "step": 249240 }, { "epoch": 96.8, "learning_rate": 7.093851132686085e-06, "loss": 0.0125, "step": 249250 }, { "epoch": 96.8, "learning_rate": 7.093333333333335e-06, "loss": 0.0392, "step": 249260 }, { "epoch": 96.8, "learning_rate": 7.0928155339805825e-06, "loss": 0.0491, "step": 249270 }, { "epoch": 96.81, "learning_rate": 7.092297734627832e-06, "loss": 0.1044, "step": 249280 }, { "epoch": 96.81, "learning_rate": 7.091779935275082e-06, "loss": 0.0864, "step": 249290 }, { "epoch": 96.82, "learning_rate": 7.091262135922331e-06, "loss": 0.0788, "step": 249300 }, { "epoch": 96.82, "learning_rate": 7.090744336569579e-06, "loss": 0.0428, "step": 249310 }, { "epoch": 96.82, "learning_rate": 7.090226537216829e-06, "loss": 0.0228, "step": 249320 }, { "epoch": 96.83, "learning_rate": 7.0897087378640785e-06, "loss": 0.0301, "step": 249330 }, { "epoch": 96.83, "learning_rate": 7.089190938511328e-06, "loss": 0.0316, "step": 249340 }, { "epoch": 96.83, "learning_rate": 7.088673139158576e-06, "loss": 0.0596, "step": 249350 }, { "epoch": 96.84, "learning_rate": 7.088155339805826e-06, "loss": 0.0914, "step": 249360 }, { "epoch": 96.84, "learning_rate": 7.087637540453075e-06, "loss": 0.0754, "step": 249370 }, { "epoch": 96.85, "learning_rate": 7.087119741100325e-06, "loss": 0.0159, "step": 249380 }, { "epoch": 96.85, "learning_rate": 7.086601941747573e-06, "loss": 0.0839, "step": 249390 }, { "epoch": 96.85, "learning_rate": 7.0860841423948225e-06, "loss": 0.1048, "step": 249400 }, { "epoch": 96.86, "learning_rate": 7.085566343042072e-06, "loss": 0.0733, "step": 249410 }, { "epoch": 96.86, "learning_rate": 7.085048543689321e-06, "loss": 0.0001, "step": 249420 }, { "epoch": 96.87, "learning_rate": 7.08453074433657e-06, "loss": 0.0075, "step": 249430 }, { "epoch": 96.87, "learning_rate": 7.084012944983819e-06, "loss": 0.0707, "step": 249440 }, { "epoch": 96.87, "learning_rate": 7.083495145631069e-06, "loss": 0.0854, "step": 249450 }, { "epoch": 96.88, "learning_rate": 7.082977346278318e-06, "loss": 0.0242, "step": 249460 }, { "epoch": 96.88, "learning_rate": 7.082459546925567e-06, "loss": 0.0013, "step": 249470 }, { "epoch": 96.89, "learning_rate": 7.081941747572816e-06, "loss": 0.0659, "step": 249480 }, { "epoch": 96.89, "learning_rate": 7.081423948220066e-06, "loss": 0.1134, "step": 249490 }, { "epoch": 96.89, "learning_rate": 7.080906148867314e-06, "loss": 0.0451, "step": 249500 }, { "epoch": 96.9, "learning_rate": 7.080388349514564e-06, "loss": 0.0397, "step": 249510 }, { "epoch": 96.9, "learning_rate": 7.079870550161813e-06, "loss": 0.0898, "step": 249520 }, { "epoch": 96.9, "learning_rate": 7.079352750809062e-06, "loss": 0.1404, "step": 249530 }, { "epoch": 96.91, "learning_rate": 7.078834951456311e-06, "loss": 0.0727, "step": 249540 }, { "epoch": 96.91, "learning_rate": 7.078317152103561e-06, "loss": 0.03, "step": 249550 }, { "epoch": 96.92, "learning_rate": 7.07779935275081e-06, "loss": 0.1876, "step": 249560 }, { "epoch": 96.92, "learning_rate": 7.077281553398059e-06, "loss": 0.0037, "step": 249570 }, { "epoch": 96.92, "learning_rate": 7.076763754045308e-06, "loss": 0.0544, "step": 249580 }, { "epoch": 96.93, "learning_rate": 7.076245954692558e-06, "loss": 0.0016, "step": 249590 }, { "epoch": 96.93, "learning_rate": 7.075728155339806e-06, "loss": 0.0035, "step": 249600 }, { "epoch": 96.94, "learning_rate": 7.075210355987055e-06, "loss": 0.0667, "step": 249610 }, { "epoch": 96.94, "learning_rate": 7.074692556634305e-06, "loss": 0.0899, "step": 249620 }, { "epoch": 96.94, "learning_rate": 7.074174757281554e-06, "loss": 0.0006, "step": 249630 }, { "epoch": 96.95, "learning_rate": 7.073656957928803e-06, "loss": 0.0018, "step": 249640 }, { "epoch": 96.95, "learning_rate": 7.073139158576052e-06, "loss": 0.0411, "step": 249650 }, { "epoch": 96.96, "learning_rate": 7.0726213592233015e-06, "loss": 0.0527, "step": 249660 }, { "epoch": 96.96, "learning_rate": 7.072103559870551e-06, "loss": 0.0551, "step": 249670 }, { "epoch": 96.96, "learning_rate": 7.0715857605178e-06, "loss": 0.0623, "step": 249680 }, { "epoch": 96.97, "learning_rate": 7.071067961165049e-06, "loss": 0.1161, "step": 249690 }, { "epoch": 96.97, "learning_rate": 7.070550161812298e-06, "loss": 0.0724, "step": 249700 }, { "epoch": 96.97, "learning_rate": 7.070032362459548e-06, "loss": 0.0005, "step": 249710 }, { "epoch": 96.98, "learning_rate": 7.069514563106797e-06, "loss": 0.0003, "step": 249720 }, { "epoch": 96.98, "learning_rate": 7.0689967637540455e-06, "loss": 0.0628, "step": 249730 }, { "epoch": 96.99, "learning_rate": 7.068478964401295e-06, "loss": 0.0925, "step": 249740 }, { "epoch": 96.99, "learning_rate": 7.067961165048545e-06, "loss": 0.0772, "step": 249750 }, { "epoch": 96.99, "learning_rate": 7.067443365695793e-06, "loss": 0.0257, "step": 249760 }, { "epoch": 97.0, "learning_rate": 7.066925566343042e-06, "loss": 0.0665, "step": 249770 }, { "epoch": 97.0, "eval_accuracy": 0.9521320495185694, "eval_loss": 0.3576328158378601, "eval_runtime": 8.1723, "eval_samples_per_second": 444.796, "eval_steps_per_second": 55.676, "step": 249775 }, { "epoch": 97.0, "learning_rate": 7.066407766990292e-06, "loss": 0.0156, "step": 249780 }, { "epoch": 97.01, "learning_rate": 7.0658899676375415e-06, "loss": 0.015, "step": 249790 }, { "epoch": 97.01, "learning_rate": 7.065372168284789e-06, "loss": 0.2007, "step": 249800 }, { "epoch": 97.01, "learning_rate": 7.064854368932039e-06, "loss": 0.0234, "step": 249810 }, { "epoch": 97.02, "learning_rate": 7.064336569579289e-06, "loss": 0.0006, "step": 249820 }, { "epoch": 97.02, "learning_rate": 7.063818770226538e-06, "loss": 0.0446, "step": 249830 }, { "epoch": 97.03, "learning_rate": 7.063300970873786e-06, "loss": 0.0098, "step": 249840 }, { "epoch": 97.03, "learning_rate": 7.062783171521036e-06, "loss": 0.0435, "step": 249850 }, { "epoch": 97.03, "learning_rate": 7.062265372168285e-06, "loss": 0.0108, "step": 249860 }, { "epoch": 97.04, "learning_rate": 7.061747572815535e-06, "loss": 0.0628, "step": 249870 }, { "epoch": 97.04, "learning_rate": 7.061229773462783e-06, "loss": 0.0472, "step": 249880 }, { "epoch": 97.04, "learning_rate": 7.060711974110033e-06, "loss": 0.0185, "step": 249890 }, { "epoch": 97.05, "learning_rate": 7.060194174757282e-06, "loss": 0.1263, "step": 249900 }, { "epoch": 97.05, "learning_rate": 7.059676375404532e-06, "loss": 0.1439, "step": 249910 }, { "epoch": 97.06, "learning_rate": 7.05915857605178e-06, "loss": 0.0012, "step": 249920 }, { "epoch": 97.06, "learning_rate": 7.058640776699029e-06, "loss": 0.1363, "step": 249930 }, { "epoch": 97.06, "learning_rate": 7.058122977346279e-06, "loss": 0.1063, "step": 249940 }, { "epoch": 97.07, "learning_rate": 7.057605177993529e-06, "loss": 0.0229, "step": 249950 }, { "epoch": 97.07, "learning_rate": 7.0570873786407765e-06, "loss": 0.082, "step": 249960 }, { "epoch": 97.08, "learning_rate": 7.056569579288026e-06, "loss": 0.066, "step": 249970 }, { "epoch": 97.08, "learning_rate": 7.056051779935276e-06, "loss": 0.0678, "step": 249980 }, { "epoch": 97.08, "learning_rate": 7.055533980582525e-06, "loss": 0.0016, "step": 249990 }, { "epoch": 97.09, "learning_rate": 7.055016181229773e-06, "loss": 0.0535, "step": 250000 }, { "epoch": 97.09, "learning_rate": 7.054498381877023e-06, "loss": 0.1558, "step": 250010 }, { "epoch": 97.1, "learning_rate": 7.0539805825242725e-06, "loss": 0.0039, "step": 250020 }, { "epoch": 97.1, "learning_rate": 7.053462783171522e-06, "loss": 0.0698, "step": 250030 }, { "epoch": 97.1, "learning_rate": 7.052944983818772e-06, "loss": 0.0165, "step": 250040 }, { "epoch": 97.11, "learning_rate": 7.05242718446602e-06, "loss": 0.0476, "step": 250050 }, { "epoch": 97.11, "learning_rate": 7.051909385113269e-06, "loss": 0.0131, "step": 250060 }, { "epoch": 97.11, "learning_rate": 7.051391585760519e-06, "loss": 0.0001, "step": 250070 }, { "epoch": 97.12, "learning_rate": 7.050873786407768e-06, "loss": 0.0118, "step": 250080 }, { "epoch": 97.12, "learning_rate": 7.0503559870550165e-06, "loss": 0.0299, "step": 250090 }, { "epoch": 97.13, "learning_rate": 7.049838187702266e-06, "loss": 0.0807, "step": 250100 }, { "epoch": 97.13, "learning_rate": 7.049320388349516e-06, "loss": 0.0568, "step": 250110 }, { "epoch": 97.13, "learning_rate": 7.0488025889967645e-06, "loss": 0.0091, "step": 250120 }, { "epoch": 97.14, "learning_rate": 7.048284789644013e-06, "loss": 0.0004, "step": 250130 }, { "epoch": 97.14, "learning_rate": 7.047766990291263e-06, "loss": 0.0008, "step": 250140 }, { "epoch": 97.15, "learning_rate": 7.0472491909385125e-06, "loss": 0.033, "step": 250150 }, { "epoch": 97.15, "learning_rate": 7.046731391585761e-06, "loss": 0.0106, "step": 250160 }, { "epoch": 97.15, "learning_rate": 7.04621359223301e-06, "loss": 0.0193, "step": 250170 }, { "epoch": 97.16, "learning_rate": 7.04569579288026e-06, "loss": 0.0122, "step": 250180 }, { "epoch": 97.16, "learning_rate": 7.045177993527509e-06, "loss": 0.1215, "step": 250190 }, { "epoch": 97.17, "learning_rate": 7.044660194174758e-06, "loss": 0.0606, "step": 250200 }, { "epoch": 97.17, "learning_rate": 7.044142394822007e-06, "loss": 0.0117, "step": 250210 }, { "epoch": 97.17, "learning_rate": 7.043624595469256e-06, "loss": 0.1112, "step": 250220 }, { "epoch": 97.18, "learning_rate": 7.043106796116505e-06, "loss": 0.0239, "step": 250230 }, { "epoch": 97.18, "learning_rate": 7.042588996763755e-06, "loss": 0.065, "step": 250240 }, { "epoch": 97.18, "learning_rate": 7.042071197411004e-06, "loss": 0.06, "step": 250250 }, { "epoch": 97.19, "learning_rate": 7.041553398058253e-06, "loss": 0.1445, "step": 250260 }, { "epoch": 97.19, "learning_rate": 7.041035598705502e-06, "loss": 0.0003, "step": 250270 }, { "epoch": 97.2, "learning_rate": 7.040517799352752e-06, "loss": 0.0003, "step": 250280 }, { "epoch": 97.2, "learning_rate": 7.04e-06, "loss": 0.0394, "step": 250290 }, { "epoch": 97.2, "learning_rate": 7.03948220064725e-06, "loss": 0.0004, "step": 250300 }, { "epoch": 97.21, "learning_rate": 7.038964401294499e-06, "loss": 0.0149, "step": 250310 }, { "epoch": 97.21, "learning_rate": 7.038446601941748e-06, "loss": 0.0311, "step": 250320 }, { "epoch": 97.22, "learning_rate": 7.037928802588997e-06, "loss": 0.0439, "step": 250330 }, { "epoch": 97.22, "learning_rate": 7.037411003236247e-06, "loss": 0.0191, "step": 250340 }, { "epoch": 97.22, "learning_rate": 7.0368932038834955e-06, "loss": 0.0537, "step": 250350 }, { "epoch": 97.23, "learning_rate": 7.036375404530745e-06, "loss": 0.048, "step": 250360 }, { "epoch": 97.23, "learning_rate": 7.035857605177994e-06, "loss": 0.0298, "step": 250370 }, { "epoch": 97.23, "learning_rate": 7.0353398058252435e-06, "loss": 0.0105, "step": 250380 }, { "epoch": 97.24, "learning_rate": 7.034822006472492e-06, "loss": 0.0232, "step": 250390 }, { "epoch": 97.24, "learning_rate": 7.034304207119742e-06, "loss": 0.0721, "step": 250400 }, { "epoch": 97.25, "learning_rate": 7.033786407766991e-06, "loss": 0.0775, "step": 250410 }, { "epoch": 97.25, "learning_rate": 7.0332686084142395e-06, "loss": 0.0017, "step": 250420 }, { "epoch": 97.25, "learning_rate": 7.032750809061489e-06, "loss": 0.0354, "step": 250430 }, { "epoch": 97.26, "learning_rate": 7.032233009708739e-06, "loss": 0.003, "step": 250440 }, { "epoch": 97.26, "learning_rate": 7.0317152103559875e-06, "loss": 0.0004, "step": 250450 }, { "epoch": 97.27, "learning_rate": 7.031197411003236e-06, "loss": 0.0249, "step": 250460 }, { "epoch": 97.27, "learning_rate": 7.030679611650486e-06, "loss": 0.1807, "step": 250470 }, { "epoch": 97.27, "learning_rate": 7.0301618122977355e-06, "loss": 0.0047, "step": 250480 }, { "epoch": 97.28, "learning_rate": 7.029644012944984e-06, "loss": 0.1428, "step": 250490 }, { "epoch": 97.28, "learning_rate": 7.029126213592233e-06, "loss": 0.0692, "step": 250500 }, { "epoch": 97.29, "learning_rate": 7.028608414239483e-06, "loss": 0.1244, "step": 250510 }, { "epoch": 97.29, "learning_rate": 7.028090614886732e-06, "loss": 0.0133, "step": 250520 }, { "epoch": 97.29, "learning_rate": 7.027572815533981e-06, "loss": 0.0806, "step": 250530 }, { "epoch": 97.3, "learning_rate": 7.02705501618123e-06, "loss": 0.001, "step": 250540 }, { "epoch": 97.3, "learning_rate": 7.026537216828479e-06, "loss": 0.0094, "step": 250550 }, { "epoch": 97.3, "learning_rate": 7.026019417475729e-06, "loss": 0.0061, "step": 250560 }, { "epoch": 97.31, "learning_rate": 7.025501618122977e-06, "loss": 0.0187, "step": 250570 }, { "epoch": 97.31, "learning_rate": 7.024983818770227e-06, "loss": 0.1196, "step": 250580 }, { "epoch": 97.32, "learning_rate": 7.024466019417476e-06, "loss": 0.0028, "step": 250590 }, { "epoch": 97.32, "learning_rate": 7.023948220064726e-06, "loss": 0.0001, "step": 250600 }, { "epoch": 97.32, "learning_rate": 7.0234304207119754e-06, "loss": 0.0761, "step": 250610 }, { "epoch": 97.33, "learning_rate": 7.022912621359223e-06, "loss": 0.049, "step": 250620 }, { "epoch": 97.33, "learning_rate": 7.022394822006473e-06, "loss": 0.0236, "step": 250630 }, { "epoch": 97.34, "learning_rate": 7.021877022653723e-06, "loss": 0.0902, "step": 250640 }, { "epoch": 97.34, "learning_rate": 7.021359223300972e-06, "loss": 0.0924, "step": 250650 }, { "epoch": 97.34, "learning_rate": 7.02084142394822e-06, "loss": 0.0686, "step": 250660 }, { "epoch": 97.35, "learning_rate": 7.02032362459547e-06, "loss": 0.0575, "step": 250670 }, { "epoch": 97.35, "learning_rate": 7.019805825242719e-06, "loss": 0.0481, "step": 250680 }, { "epoch": 97.36, "learning_rate": 7.019288025889969e-06, "loss": 0.0108, "step": 250690 }, { "epoch": 97.36, "learning_rate": 7.018770226537217e-06, "loss": 0.0203, "step": 250700 }, { "epoch": 97.36, "learning_rate": 7.0182524271844665e-06, "loss": 0.1255, "step": 250710 }, { "epoch": 97.37, "learning_rate": 7.017734627831716e-06, "loss": 0.0781, "step": 250720 }, { "epoch": 97.37, "learning_rate": 7.017216828478966e-06, "loss": 0.016, "step": 250730 }, { "epoch": 97.37, "learning_rate": 7.016699029126214e-06, "loss": 0.0504, "step": 250740 }, { "epoch": 97.38, "learning_rate": 7.016181229773463e-06, "loss": 0.03, "step": 250750 }, { "epoch": 97.38, "learning_rate": 7.015663430420713e-06, "loss": 0.038, "step": 250760 }, { "epoch": 97.39, "learning_rate": 7.0151456310679625e-06, "loss": 0.1098, "step": 250770 }, { "epoch": 97.39, "learning_rate": 7.0146278317152105e-06, "loss": 0.1365, "step": 250780 }, { "epoch": 97.39, "learning_rate": 7.01411003236246e-06, "loss": 0.0931, "step": 250790 }, { "epoch": 97.4, "learning_rate": 7.01359223300971e-06, "loss": 0.0074, "step": 250800 }, { "epoch": 97.4, "learning_rate": 7.013074433656959e-06, "loss": 0.0635, "step": 250810 }, { "epoch": 97.41, "learning_rate": 7.012556634304207e-06, "loss": 0.0081, "step": 250820 }, { "epoch": 97.41, "learning_rate": 7.012038834951457e-06, "loss": 0.008, "step": 250830 }, { "epoch": 97.41, "learning_rate": 7.0115210355987065e-06, "loss": 0.0246, "step": 250840 }, { "epoch": 97.42, "learning_rate": 7.011003236245956e-06, "loss": 0.0563, "step": 250850 }, { "epoch": 97.42, "learning_rate": 7.010485436893204e-06, "loss": 0.0365, "step": 250860 }, { "epoch": 97.43, "learning_rate": 7.009967637540454e-06, "loss": 0.0092, "step": 250870 }, { "epoch": 97.43, "learning_rate": 7.009449838187703e-06, "loss": 0.007, "step": 250880 }, { "epoch": 97.43, "learning_rate": 7.008932038834952e-06, "loss": 0.0298, "step": 250890 }, { "epoch": 97.44, "learning_rate": 7.008414239482201e-06, "loss": 0.033, "step": 250900 }, { "epoch": 97.44, "learning_rate": 7.00789644012945e-06, "loss": 0.001, "step": 250910 }, { "epoch": 97.44, "learning_rate": 7.0073786407767e-06, "loss": 0.1066, "step": 250920 }, { "epoch": 97.45, "learning_rate": 7.006860841423949e-06, "loss": 0.0002, "step": 250930 }, { "epoch": 97.45, "learning_rate": 7.006343042071198e-06, "loss": 0.0495, "step": 250940 }, { "epoch": 97.46, "learning_rate": 7.005825242718447e-06, "loss": 0.0534, "step": 250950 }, { "epoch": 97.46, "learning_rate": 7.005307443365697e-06, "loss": 0.0172, "step": 250960 }, { "epoch": 97.46, "learning_rate": 7.004789644012946e-06, "loss": 0.0648, "step": 250970 }, { "epoch": 97.47, "learning_rate": 7.004271844660194e-06, "loss": 0.0451, "step": 250980 }, { "epoch": 97.47, "learning_rate": 7.003754045307444e-06, "loss": 0.0003, "step": 250990 }, { "epoch": 97.48, "learning_rate": 7.003236245954694e-06, "loss": 0.0095, "step": 251000 }, { "epoch": 97.48, "learning_rate": 7.002718446601942e-06, "loss": 0.1241, "step": 251010 }, { "epoch": 97.48, "learning_rate": 7.002200647249191e-06, "loss": 0.077, "step": 251020 }, { "epoch": 97.49, "learning_rate": 7.001682847896441e-06, "loss": 0.0632, "step": 251030 }, { "epoch": 97.49, "learning_rate": 7.0011650485436895e-06, "loss": 0.0038, "step": 251040 }, { "epoch": 97.5, "learning_rate": 7.000647249190939e-06, "loss": 0.0595, "step": 251050 }, { "epoch": 97.5, "learning_rate": 7.000129449838188e-06, "loss": 0.017, "step": 251060 }, { "epoch": 97.5, "learning_rate": 6.9996116504854375e-06, "loss": 0.0654, "step": 251070 }, { "epoch": 97.51, "learning_rate": 6.999093851132686e-06, "loss": 0.2574, "step": 251080 }, { "epoch": 97.51, "learning_rate": 6.998576051779936e-06, "loss": 0.0002, "step": 251090 }, { "epoch": 97.51, "learning_rate": 6.998058252427185e-06, "loss": 0.0753, "step": 251100 }, { "epoch": 97.52, "learning_rate": 6.997540453074434e-06, "loss": 0.0671, "step": 251110 }, { "epoch": 97.52, "learning_rate": 6.997022653721683e-06, "loss": 0.0595, "step": 251120 }, { "epoch": 97.53, "learning_rate": 6.996504854368933e-06, "loss": 0.0621, "step": 251130 }, { "epoch": 97.53, "learning_rate": 6.9959870550161815e-06, "loss": 0.049, "step": 251140 }, { "epoch": 97.53, "learning_rate": 6.995469255663431e-06, "loss": 0.0285, "step": 251150 }, { "epoch": 97.54, "learning_rate": 6.99495145631068e-06, "loss": 0.0642, "step": 251160 }, { "epoch": 97.54, "learning_rate": 6.9944336569579295e-06, "loss": 0.0081, "step": 251170 }, { "epoch": 97.55, "learning_rate": 6.993915857605179e-06, "loss": 0.0002, "step": 251180 }, { "epoch": 97.55, "learning_rate": 6.993398058252428e-06, "loss": 0.0044, "step": 251190 }, { "epoch": 97.55, "learning_rate": 6.992880258899677e-06, "loss": 0.037, "step": 251200 }, { "epoch": 97.56, "learning_rate": 6.992362459546926e-06, "loss": 0.041, "step": 251210 }, { "epoch": 97.56, "learning_rate": 6.991844660194176e-06, "loss": 0.0217, "step": 251220 }, { "epoch": 97.57, "learning_rate": 6.991326860841424e-06, "loss": 0.0007, "step": 251230 }, { "epoch": 97.57, "learning_rate": 6.990809061488673e-06, "loss": 0.0387, "step": 251240 }, { "epoch": 97.57, "learning_rate": 6.990291262135923e-06, "loss": 0.059, "step": 251250 }, { "epoch": 97.58, "learning_rate": 6.989773462783173e-06, "loss": 0.0001, "step": 251260 }, { "epoch": 97.58, "learning_rate": 6.9892556634304206e-06, "loss": 0.033, "step": 251270 }, { "epoch": 97.58, "learning_rate": 6.98873786407767e-06, "loss": 0.0031, "step": 251280 }, { "epoch": 97.59, "learning_rate": 6.98822006472492e-06, "loss": 0.0757, "step": 251290 }, { "epoch": 97.59, "learning_rate": 6.9877022653721694e-06, "loss": 0.0112, "step": 251300 }, { "epoch": 97.6, "learning_rate": 6.987184466019417e-06, "loss": 0.0805, "step": 251310 }, { "epoch": 97.6, "learning_rate": 6.986666666666667e-06, "loss": 0.0536, "step": 251320 }, { "epoch": 97.6, "learning_rate": 6.986148867313917e-06, "loss": 0.0482, "step": 251330 }, { "epoch": 97.61, "learning_rate": 6.985631067961166e-06, "loss": 0.1667, "step": 251340 }, { "epoch": 97.61, "learning_rate": 6.985113268608414e-06, "loss": 0.0453, "step": 251350 }, { "epoch": 97.62, "learning_rate": 6.984595469255664e-06, "loss": 0.0177, "step": 251360 }, { "epoch": 97.62, "learning_rate": 6.984077669902913e-06, "loss": 0.0851, "step": 251370 }, { "epoch": 97.62, "learning_rate": 6.983559870550163e-06, "loss": 0.0152, "step": 251380 }, { "epoch": 97.63, "learning_rate": 6.983042071197411e-06, "loss": 0.1217, "step": 251390 }, { "epoch": 97.63, "learning_rate": 6.9825242718446605e-06, "loss": 0.0635, "step": 251400 }, { "epoch": 97.63, "learning_rate": 6.98200647249191e-06, "loss": 0.1394, "step": 251410 }, { "epoch": 97.64, "learning_rate": 6.98148867313916e-06, "loss": 0.1144, "step": 251420 }, { "epoch": 97.64, "learning_rate": 6.980970873786408e-06, "loss": 0.0493, "step": 251430 }, { "epoch": 97.65, "learning_rate": 6.980453074433657e-06, "loss": 0.1289, "step": 251440 }, { "epoch": 97.65, "learning_rate": 6.979935275080907e-06, "loss": 0.042, "step": 251450 }, { "epoch": 97.65, "learning_rate": 6.9794174757281565e-06, "loss": 0.1034, "step": 251460 }, { "epoch": 97.66, "learning_rate": 6.9788996763754045e-06, "loss": 0.0789, "step": 251470 }, { "epoch": 97.66, "learning_rate": 6.978381877022654e-06, "loss": 0.0414, "step": 251480 }, { "epoch": 97.67, "learning_rate": 6.977864077669904e-06, "loss": 0.0195, "step": 251490 }, { "epoch": 97.67, "learning_rate": 6.977346278317153e-06, "loss": 0.0599, "step": 251500 }, { "epoch": 97.67, "learning_rate": 6.976828478964401e-06, "loss": 0.0519, "step": 251510 }, { "epoch": 97.68, "learning_rate": 6.976310679611651e-06, "loss": 0.1099, "step": 251520 }, { "epoch": 97.68, "learning_rate": 6.9757928802589005e-06, "loss": 0.069, "step": 251530 }, { "epoch": 97.69, "learning_rate": 6.97527508090615e-06, "loss": 0.0048, "step": 251540 }, { "epoch": 97.69, "learning_rate": 6.974757281553398e-06, "loss": 0.0852, "step": 251550 }, { "epoch": 97.69, "learning_rate": 6.974239482200648e-06, "loss": 0.1081, "step": 251560 }, { "epoch": 97.7, "learning_rate": 6.973721682847897e-06, "loss": 0.0165, "step": 251570 }, { "epoch": 97.7, "learning_rate": 6.973203883495147e-06, "loss": 0.0706, "step": 251580 }, { "epoch": 97.7, "learning_rate": 6.972686084142395e-06, "loss": 0.0773, "step": 251590 }, { "epoch": 97.71, "learning_rate": 6.972168284789644e-06, "loss": 0.1595, "step": 251600 }, { "epoch": 97.71, "learning_rate": 6.971650485436894e-06, "loss": 0.0441, "step": 251610 }, { "epoch": 97.72, "learning_rate": 6.971132686084144e-06, "loss": 0.1397, "step": 251620 }, { "epoch": 97.72, "learning_rate": 6.970614886731392e-06, "loss": 0.004, "step": 251630 }, { "epoch": 97.72, "learning_rate": 6.970097087378641e-06, "loss": 0.0118, "step": 251640 }, { "epoch": 97.73, "learning_rate": 6.969579288025891e-06, "loss": 0.1189, "step": 251650 }, { "epoch": 97.73, "learning_rate": 6.9690614886731404e-06, "loss": 0.0162, "step": 251660 }, { "epoch": 97.74, "learning_rate": 6.968543689320388e-06, "loss": 0.0036, "step": 251670 }, { "epoch": 97.74, "learning_rate": 6.968025889967638e-06, "loss": 0.0944, "step": 251680 }, { "epoch": 97.74, "learning_rate": 6.967508090614888e-06, "loss": 0.0285, "step": 251690 }, { "epoch": 97.75, "learning_rate": 6.966990291262136e-06, "loss": 0.1372, "step": 251700 }, { "epoch": 97.75, "learning_rate": 6.966472491909386e-06, "loss": 0.0716, "step": 251710 }, { "epoch": 97.76, "learning_rate": 6.965954692556635e-06, "loss": 0.0615, "step": 251720 }, { "epoch": 97.76, "learning_rate": 6.965436893203884e-06, "loss": 0.0783, "step": 251730 }, { "epoch": 97.76, "learning_rate": 6.964919093851133e-06, "loss": 0.0369, "step": 251740 }, { "epoch": 97.77, "learning_rate": 6.964401294498383e-06, "loss": 0.1647, "step": 251750 }, { "epoch": 97.77, "learning_rate": 6.9638834951456315e-06, "loss": 0.0095, "step": 251760 }, { "epoch": 97.77, "learning_rate": 6.963365695792881e-06, "loss": 0.1303, "step": 251770 }, { "epoch": 97.78, "learning_rate": 6.96284789644013e-06, "loss": 0.1212, "step": 251780 }, { "epoch": 97.78, "learning_rate": 6.9623300970873795e-06, "loss": 0.0557, "step": 251790 }, { "epoch": 97.79, "learning_rate": 6.961812297734628e-06, "loss": 0.0854, "step": 251800 }, { "epoch": 97.79, "learning_rate": 6.961294498381878e-06, "loss": 0.0233, "step": 251810 }, { "epoch": 97.79, "learning_rate": 6.960776699029127e-06, "loss": 0.0046, "step": 251820 }, { "epoch": 97.8, "learning_rate": 6.960258899676376e-06, "loss": 0.0312, "step": 251830 }, { "epoch": 97.8, "learning_rate": 6.959741100323625e-06, "loss": 0.1149, "step": 251840 }, { "epoch": 97.81, "learning_rate": 6.959223300970875e-06, "loss": 0.1044, "step": 251850 }, { "epoch": 97.81, "learning_rate": 6.9587055016181235e-06, "loss": 0.1299, "step": 251860 }, { "epoch": 97.81, "learning_rate": 6.958187702265373e-06, "loss": 0.032, "step": 251870 }, { "epoch": 97.82, "learning_rate": 6.957669902912622e-06, "loss": 0.0169, "step": 251880 }, { "epoch": 97.82, "learning_rate": 6.957152103559871e-06, "loss": 0.0344, "step": 251890 }, { "epoch": 97.83, "learning_rate": 6.95663430420712e-06, "loss": 0.0566, "step": 251900 }, { "epoch": 97.83, "learning_rate": 6.95611650485437e-06, "loss": 0.0093, "step": 251910 }, { "epoch": 97.83, "learning_rate": 6.955598705501619e-06, "loss": 0.0619, "step": 251920 }, { "epoch": 97.84, "learning_rate": 6.955080906148867e-06, "loss": 0.0225, "step": 251930 }, { "epoch": 97.84, "learning_rate": 6.954563106796117e-06, "loss": 0.0171, "step": 251940 }, { "epoch": 97.84, "learning_rate": 6.954045307443367e-06, "loss": 0.0098, "step": 251950 }, { "epoch": 97.85, "learning_rate": 6.953527508090615e-06, "loss": 0.0507, "step": 251960 }, { "epoch": 97.85, "learning_rate": 6.953009708737864e-06, "loss": 0.0036, "step": 251970 }, { "epoch": 97.86, "learning_rate": 6.952491909385114e-06, "loss": 0.0854, "step": 251980 }, { "epoch": 97.86, "learning_rate": 6.9519741100323634e-06, "loss": 0.0401, "step": 251990 }, { "epoch": 97.86, "learning_rate": 6.951456310679612e-06, "loss": 0.0539, "step": 252000 }, { "epoch": 97.87, "learning_rate": 6.950938511326861e-06, "loss": 0.069, "step": 252010 }, { "epoch": 97.87, "learning_rate": 6.950420711974111e-06, "loss": 0.0916, "step": 252020 }, { "epoch": 97.88, "learning_rate": 6.94990291262136e-06, "loss": 0.007, "step": 252030 }, { "epoch": 97.88, "learning_rate": 6.949385113268608e-06, "loss": 0.0043, "step": 252040 }, { "epoch": 97.88, "learning_rate": 6.948867313915858e-06, "loss": 0.0681, "step": 252050 }, { "epoch": 97.89, "learning_rate": 6.948349514563107e-06, "loss": 0.0323, "step": 252060 }, { "epoch": 97.89, "learning_rate": 6.947831715210357e-06, "loss": 0.0259, "step": 252070 }, { "epoch": 97.9, "learning_rate": 6.947313915857605e-06, "loss": 0.0898, "step": 252080 }, { "epoch": 97.9, "learning_rate": 6.9467961165048545e-06, "loss": 0.0221, "step": 252090 }, { "epoch": 97.9, "learning_rate": 6.946278317152104e-06, "loss": 0.0263, "step": 252100 }, { "epoch": 97.91, "learning_rate": 6.945760517799354e-06, "loss": 0.0403, "step": 252110 }, { "epoch": 97.91, "learning_rate": 6.945242718446602e-06, "loss": 0.0382, "step": 252120 }, { "epoch": 97.91, "learning_rate": 6.944724919093851e-06, "loss": 0.1629, "step": 252130 }, { "epoch": 97.92, "learning_rate": 6.944207119741101e-06, "loss": 0.0226, "step": 252140 }, { "epoch": 97.92, "learning_rate": 6.9436893203883505e-06, "loss": 0.0038, "step": 252150 }, { "epoch": 97.93, "learning_rate": 6.9431715210355985e-06, "loss": 0.0407, "step": 252160 }, { "epoch": 97.93, "learning_rate": 6.942653721682848e-06, "loss": 0.0835, "step": 252170 }, { "epoch": 97.93, "learning_rate": 6.942135922330098e-06, "loss": 0.0485, "step": 252180 }, { "epoch": 97.94, "learning_rate": 6.941618122977347e-06, "loss": 0.0392, "step": 252190 }, { "epoch": 97.94, "learning_rate": 6.941100323624595e-06, "loss": 0.0211, "step": 252200 }, { "epoch": 97.95, "learning_rate": 6.940582524271845e-06, "loss": 0.0569, "step": 252210 }, { "epoch": 97.95, "learning_rate": 6.9400647249190945e-06, "loss": 0.0287, "step": 252220 }, { "epoch": 97.95, "learning_rate": 6.939546925566344e-06, "loss": 0.0504, "step": 252230 }, { "epoch": 97.96, "learning_rate": 6.939029126213592e-06, "loss": 0.0388, "step": 252240 }, { "epoch": 97.96, "learning_rate": 6.938511326860842e-06, "loss": 0.0003, "step": 252250 }, { "epoch": 97.97, "learning_rate": 6.937993527508091e-06, "loss": 0.0617, "step": 252260 }, { "epoch": 97.97, "learning_rate": 6.937475728155341e-06, "loss": 0.0791, "step": 252270 }, { "epoch": 97.97, "learning_rate": 6.9369579288025905e-06, "loss": 0.1061, "step": 252280 }, { "epoch": 97.98, "learning_rate": 6.936440129449838e-06, "loss": 0.1122, "step": 252290 }, { "epoch": 97.98, "learning_rate": 6.935922330097088e-06, "loss": 0.0449, "step": 252300 }, { "epoch": 97.98, "learning_rate": 6.935404530744338e-06, "loss": 0.1041, "step": 252310 }, { "epoch": 97.99, "learning_rate": 6.934886731391587e-06, "loss": 0.054, "step": 252320 }, { "epoch": 97.99, "learning_rate": 6.934368932038835e-06, "loss": 0.0873, "step": 252330 }, { "epoch": 98.0, "learning_rate": 6.933851132686085e-06, "loss": 0.1139, "step": 252340 }, { "epoch": 98.0, "learning_rate": 6.9333333333333344e-06, "loss": 0.0673, "step": 252350 }, { "epoch": 98.0, "eval_accuracy": 0.9513067400275104, "eval_loss": 0.3636351227760315, "eval_runtime": 8.1971, "eval_samples_per_second": 443.452, "eval_steps_per_second": 55.508, "step": 252350 }, { "epoch": 98.0, "learning_rate": 6.932815533980583e-06, "loss": 0.0216, "step": 252360 }, { "epoch": 98.01, "learning_rate": 6.932297734627832e-06, "loss": 0.0353, "step": 252370 }, { "epoch": 98.01, "learning_rate": 6.931779935275082e-06, "loss": 0.2583, "step": 252380 }, { "epoch": 98.02, "learning_rate": 6.931262135922331e-06, "loss": 0.0064, "step": 252390 }, { "epoch": 98.02, "learning_rate": 6.93074433656958e-06, "loss": 0.0661, "step": 252400 }, { "epoch": 98.02, "learning_rate": 6.930226537216829e-06, "loss": 0.0232, "step": 252410 }, { "epoch": 98.03, "learning_rate": 6.929708737864078e-06, "loss": 0.092, "step": 252420 }, { "epoch": 98.03, "learning_rate": 6.929190938511328e-06, "loss": 0.0324, "step": 252430 }, { "epoch": 98.03, "learning_rate": 6.928673139158577e-06, "loss": 0.042, "step": 252440 }, { "epoch": 98.04, "learning_rate": 6.9281553398058255e-06, "loss": 0.1064, "step": 252450 }, { "epoch": 98.04, "learning_rate": 6.927637540453075e-06, "loss": 0.0626, "step": 252460 }, { "epoch": 98.05, "learning_rate": 6.927119741100325e-06, "loss": 0.0527, "step": 252470 }, { "epoch": 98.05, "learning_rate": 6.9266019417475735e-06, "loss": 0.1293, "step": 252480 }, { "epoch": 98.05, "learning_rate": 6.926084142394822e-06, "loss": 0.0301, "step": 252490 }, { "epoch": 98.06, "learning_rate": 6.925566343042072e-06, "loss": 0.0187, "step": 252500 }, { "epoch": 98.06, "learning_rate": 6.925048543689321e-06, "loss": 0.0149, "step": 252510 }, { "epoch": 98.07, "learning_rate": 6.92453074433657e-06, "loss": 0.0337, "step": 252520 }, { "epoch": 98.07, "learning_rate": 6.924012944983819e-06, "loss": 0.0069, "step": 252530 }, { "epoch": 98.07, "learning_rate": 6.923495145631069e-06, "loss": 0.0031, "step": 252540 }, { "epoch": 98.08, "learning_rate": 6.9229773462783175e-06, "loss": 0.1159, "step": 252550 }, { "epoch": 98.08, "learning_rate": 6.922459546925567e-06, "loss": 0.0222, "step": 252560 }, { "epoch": 98.09, "learning_rate": 6.921941747572816e-06, "loss": 0.0342, "step": 252570 }, { "epoch": 98.09, "learning_rate": 6.9214239482200655e-06, "loss": 0.016, "step": 252580 }, { "epoch": 98.09, "learning_rate": 6.920906148867314e-06, "loss": 0.1221, "step": 252590 }, { "epoch": 98.1, "learning_rate": 6.920388349514564e-06, "loss": 0.0344, "step": 252600 }, { "epoch": 98.1, "learning_rate": 6.919870550161813e-06, "loss": 0.0167, "step": 252610 }, { "epoch": 98.1, "learning_rate": 6.919352750809062e-06, "loss": 0.0573, "step": 252620 }, { "epoch": 98.11, "learning_rate": 6.918834951456311e-06, "loss": 0.0975, "step": 252630 }, { "epoch": 98.11, "learning_rate": 6.918317152103561e-06, "loss": 0.0049, "step": 252640 }, { "epoch": 98.12, "learning_rate": 6.917799352750809e-06, "loss": 0.059, "step": 252650 }, { "epoch": 98.12, "learning_rate": 6.917281553398059e-06, "loss": 0.0681, "step": 252660 }, { "epoch": 98.12, "learning_rate": 6.916763754045308e-06, "loss": 0.1316, "step": 252670 }, { "epoch": 98.13, "learning_rate": 6.9162459546925574e-06, "loss": 0.1364, "step": 252680 }, { "epoch": 98.13, "learning_rate": 6.915728155339806e-06, "loss": 0.0026, "step": 252690 }, { "epoch": 98.14, "learning_rate": 6.915210355987055e-06, "loss": 0.0425, "step": 252700 }, { "epoch": 98.14, "learning_rate": 6.914692556634305e-06, "loss": 0.1669, "step": 252710 }, { "epoch": 98.14, "learning_rate": 6.914174757281554e-06, "loss": 0.104, "step": 252720 }, { "epoch": 98.15, "learning_rate": 6.913656957928803e-06, "loss": 0.0103, "step": 252730 }, { "epoch": 98.15, "learning_rate": 6.913139158576052e-06, "loss": 0.0185, "step": 252740 }, { "epoch": 98.16, "learning_rate": 6.912621359223301e-06, "loss": 0.1722, "step": 252750 }, { "epoch": 98.16, "learning_rate": 6.912103559870551e-06, "loss": 0.0395, "step": 252760 }, { "epoch": 98.16, "learning_rate": 6.9115857605178e-06, "loss": 0.0053, "step": 252770 }, { "epoch": 98.17, "learning_rate": 6.9110679611650485e-06, "loss": 0.0776, "step": 252780 }, { "epoch": 98.17, "learning_rate": 6.910550161812298e-06, "loss": 0.0593, "step": 252790 }, { "epoch": 98.17, "learning_rate": 6.910032362459548e-06, "loss": 0.1146, "step": 252800 }, { "epoch": 98.18, "learning_rate": 6.9095145631067965e-06, "loss": 0.1055, "step": 252810 }, { "epoch": 98.18, "learning_rate": 6.908996763754045e-06, "loss": 0.0226, "step": 252820 }, { "epoch": 98.19, "learning_rate": 6.908478964401295e-06, "loss": 0.1461, "step": 252830 }, { "epoch": 98.19, "learning_rate": 6.9079611650485445e-06, "loss": 0.0618, "step": 252840 }, { "epoch": 98.19, "learning_rate": 6.907443365695794e-06, "loss": 0.0069, "step": 252850 }, { "epoch": 98.2, "learning_rate": 6.906925566343042e-06, "loss": 0.0136, "step": 252860 }, { "epoch": 98.2, "learning_rate": 6.906407766990292e-06, "loss": 0.0007, "step": 252870 }, { "epoch": 98.21, "learning_rate": 6.905889967637541e-06, "loss": 0.0054, "step": 252880 }, { "epoch": 98.21, "learning_rate": 6.905372168284791e-06, "loss": 0.0395, "step": 252890 }, { "epoch": 98.21, "learning_rate": 6.904854368932039e-06, "loss": 0.0943, "step": 252900 }, { "epoch": 98.22, "learning_rate": 6.9043365695792885e-06, "loss": 0.1115, "step": 252910 }, { "epoch": 98.22, "learning_rate": 6.903818770226538e-06, "loss": 0.0067, "step": 252920 }, { "epoch": 98.23, "learning_rate": 6.903300970873788e-06, "loss": 0.032, "step": 252930 }, { "epoch": 98.23, "learning_rate": 6.902783171521036e-06, "loss": 0.0187, "step": 252940 }, { "epoch": 98.23, "learning_rate": 6.902265372168285e-06, "loss": 0.1301, "step": 252950 }, { "epoch": 98.24, "learning_rate": 6.901747572815535e-06, "loss": 0.0596, "step": 252960 }, { "epoch": 98.24, "learning_rate": 6.9012297734627845e-06, "loss": 0.1034, "step": 252970 }, { "epoch": 98.24, "learning_rate": 6.900711974110032e-06, "loss": 0.0198, "step": 252980 }, { "epoch": 98.25, "learning_rate": 6.900194174757282e-06, "loss": 0.0238, "step": 252990 }, { "epoch": 98.25, "learning_rate": 6.899676375404532e-06, "loss": 0.0093, "step": 253000 }, { "epoch": 98.26, "learning_rate": 6.899158576051781e-06, "loss": 0.0553, "step": 253010 }, { "epoch": 98.26, "learning_rate": 6.898640776699029e-06, "loss": 0.0421, "step": 253020 }, { "epoch": 98.26, "learning_rate": 6.898122977346279e-06, "loss": 0.0963, "step": 253030 }, { "epoch": 98.27, "learning_rate": 6.8976051779935284e-06, "loss": 0.0003, "step": 253040 }, { "epoch": 98.27, "learning_rate": 6.897087378640778e-06, "loss": 0.001, "step": 253050 }, { "epoch": 98.28, "learning_rate": 6.896569579288026e-06, "loss": 0.0764, "step": 253060 }, { "epoch": 98.28, "learning_rate": 6.896051779935276e-06, "loss": 0.2061, "step": 253070 }, { "epoch": 98.28, "learning_rate": 6.895533980582525e-06, "loss": 0.079, "step": 253080 }, { "epoch": 98.29, "learning_rate": 6.895016181229775e-06, "loss": 0.0591, "step": 253090 }, { "epoch": 98.29, "learning_rate": 6.894498381877023e-06, "loss": 0.035, "step": 253100 }, { "epoch": 98.3, "learning_rate": 6.893980582524272e-06, "loss": 0.0336, "step": 253110 }, { "epoch": 98.3, "learning_rate": 6.893462783171522e-06, "loss": 0.072, "step": 253120 }, { "epoch": 98.3, "learning_rate": 6.892944983818772e-06, "loss": 0.1616, "step": 253130 }, { "epoch": 98.31, "learning_rate": 6.8924271844660195e-06, "loss": 0.0587, "step": 253140 }, { "epoch": 98.31, "learning_rate": 6.891909385113269e-06, "loss": 0.0543, "step": 253150 }, { "epoch": 98.31, "learning_rate": 6.891391585760519e-06, "loss": 0.0264, "step": 253160 }, { "epoch": 98.32, "learning_rate": 6.8908737864077675e-06, "loss": 0.015, "step": 253170 }, { "epoch": 98.32, "learning_rate": 6.890355987055016e-06, "loss": 0.1798, "step": 253180 }, { "epoch": 98.33, "learning_rate": 6.889838187702266e-06, "loss": 0.018, "step": 253190 }, { "epoch": 98.33, "learning_rate": 6.8893203883495155e-06, "loss": 0.0507, "step": 253200 }, { "epoch": 98.33, "learning_rate": 6.888802588996764e-06, "loss": 0.0054, "step": 253210 }, { "epoch": 98.34, "learning_rate": 6.888284789644013e-06, "loss": 0.0509, "step": 253220 }, { "epoch": 98.34, "learning_rate": 6.887766990291263e-06, "loss": 0.1517, "step": 253230 }, { "epoch": 98.35, "learning_rate": 6.887249190938512e-06, "loss": 0.2042, "step": 253240 }, { "epoch": 98.35, "learning_rate": 6.886731391585761e-06, "loss": 0.0294, "step": 253250 }, { "epoch": 98.35, "learning_rate": 6.88621359223301e-06, "loss": 0.2049, "step": 253260 }, { "epoch": 98.36, "learning_rate": 6.8856957928802595e-06, "loss": 0.0392, "step": 253270 }, { "epoch": 98.36, "learning_rate": 6.885177993527509e-06, "loss": 0.0026, "step": 253280 }, { "epoch": 98.37, "learning_rate": 6.884660194174758e-06, "loss": 0.0681, "step": 253290 }, { "epoch": 98.37, "learning_rate": 6.884142394822007e-06, "loss": 0.1597, "step": 253300 }, { "epoch": 98.37, "learning_rate": 6.883624595469256e-06, "loss": 0.0728, "step": 253310 }, { "epoch": 98.38, "learning_rate": 6.883106796116505e-06, "loss": 0.0452, "step": 253320 }, { "epoch": 98.38, "learning_rate": 6.882588996763755e-06, "loss": 0.0025, "step": 253330 }, { "epoch": 98.38, "learning_rate": 6.882071197411003e-06, "loss": 0.0163, "step": 253340 }, { "epoch": 98.39, "learning_rate": 6.881553398058253e-06, "loss": 0.0012, "step": 253350 }, { "epoch": 98.39, "learning_rate": 6.881035598705502e-06, "loss": 0.0263, "step": 253360 }, { "epoch": 98.4, "learning_rate": 6.8805177993527514e-06, "loss": 0.0202, "step": 253370 }, { "epoch": 98.4, "learning_rate": 6.88e-06, "loss": 0.0666, "step": 253380 }, { "epoch": 98.4, "learning_rate": 6.87948220064725e-06, "loss": 0.061, "step": 253390 }, { "epoch": 98.41, "learning_rate": 6.878964401294499e-06, "loss": 0.0227, "step": 253400 }, { "epoch": 98.41, "learning_rate": 6.878446601941748e-06, "loss": 0.0004, "step": 253410 }, { "epoch": 98.42, "learning_rate": 6.877928802588998e-06, "loss": 0.0354, "step": 253420 }, { "epoch": 98.42, "learning_rate": 6.877411003236247e-06, "loss": 0.0602, "step": 253430 }, { "epoch": 98.42, "learning_rate": 6.876893203883495e-06, "loss": 0.0545, "step": 253440 }, { "epoch": 98.43, "learning_rate": 6.876375404530745e-06, "loss": 0.0505, "step": 253450 }, { "epoch": 98.43, "learning_rate": 6.875857605177995e-06, "loss": 0.0194, "step": 253460 }, { "epoch": 98.43, "learning_rate": 6.875339805825243e-06, "loss": 0.149, "step": 253470 }, { "epoch": 98.44, "learning_rate": 6.874822006472492e-06, "loss": 0.0376, "step": 253480 }, { "epoch": 98.44, "learning_rate": 6.874304207119742e-06, "loss": 0.0354, "step": 253490 }, { "epoch": 98.45, "learning_rate": 6.873786407766991e-06, "loss": 0.0677, "step": 253500 }, { "epoch": 98.45, "learning_rate": 6.873268608414239e-06, "loss": 0.0812, "step": 253510 }, { "epoch": 98.45, "learning_rate": 6.872750809061489e-06, "loss": 0.0219, "step": 253520 }, { "epoch": 98.46, "learning_rate": 6.8722330097087385e-06, "loss": 0.0309, "step": 253530 }, { "epoch": 98.46, "learning_rate": 6.871715210355988e-06, "loss": 0.0037, "step": 253540 }, { "epoch": 98.47, "learning_rate": 6.871197411003236e-06, "loss": 0.0381, "step": 253550 }, { "epoch": 98.47, "learning_rate": 6.870679611650486e-06, "loss": 0.1144, "step": 253560 }, { "epoch": 98.47, "learning_rate": 6.870161812297735e-06, "loss": 0.0352, "step": 253570 }, { "epoch": 98.48, "learning_rate": 6.869644012944985e-06, "loss": 0.0003, "step": 253580 }, { "epoch": 98.48, "learning_rate": 6.869126213592233e-06, "loss": 0.0419, "step": 253590 }, { "epoch": 98.49, "learning_rate": 6.8686084142394825e-06, "loss": 0.0672, "step": 253600 }, { "epoch": 98.49, "learning_rate": 6.868090614886732e-06, "loss": 0.0335, "step": 253610 }, { "epoch": 98.49, "learning_rate": 6.867572815533982e-06, "loss": 0.0562, "step": 253620 }, { "epoch": 98.5, "learning_rate": 6.86705501618123e-06, "loss": 0.1066, "step": 253630 }, { "epoch": 98.5, "learning_rate": 6.866537216828479e-06, "loss": 0.124, "step": 253640 }, { "epoch": 98.5, "learning_rate": 6.866019417475729e-06, "loss": 0.0123, "step": 253650 }, { "epoch": 98.51, "learning_rate": 6.8655016181229785e-06, "loss": 0.0017, "step": 253660 }, { "epoch": 98.51, "learning_rate": 6.864983818770226e-06, "loss": 0.0341, "step": 253670 }, { "epoch": 98.52, "learning_rate": 6.864466019417476e-06, "loss": 0.0001, "step": 253680 }, { "epoch": 98.52, "learning_rate": 6.863948220064726e-06, "loss": 0.1197, "step": 253690 }, { "epoch": 98.52, "learning_rate": 6.863430420711975e-06, "loss": 0.0942, "step": 253700 }, { "epoch": 98.53, "learning_rate": 6.862912621359223e-06, "loss": 0.0001, "step": 253710 }, { "epoch": 98.53, "learning_rate": 6.862394822006473e-06, "loss": 0.0921, "step": 253720 }, { "epoch": 98.54, "learning_rate": 6.8618770226537224e-06, "loss": 0.0053, "step": 253730 }, { "epoch": 98.54, "learning_rate": 6.861359223300972e-06, "loss": 0.0088, "step": 253740 }, { "epoch": 98.54, "learning_rate": 6.86084142394822e-06, "loss": 0.0412, "step": 253750 }, { "epoch": 98.55, "learning_rate": 6.86032362459547e-06, "loss": 0.006, "step": 253760 }, { "epoch": 98.55, "learning_rate": 6.859805825242719e-06, "loss": 0.0816, "step": 253770 }, { "epoch": 98.56, "learning_rate": 6.859288025889969e-06, "loss": 0.0119, "step": 253780 }, { "epoch": 98.56, "learning_rate": 6.858770226537217e-06, "loss": 0.0229, "step": 253790 }, { "epoch": 98.56, "learning_rate": 6.858252427184466e-06, "loss": 0.0131, "step": 253800 }, { "epoch": 98.57, "learning_rate": 6.857734627831716e-06, "loss": 0.0907, "step": 253810 }, { "epoch": 98.57, "learning_rate": 6.857216828478966e-06, "loss": 0.0487, "step": 253820 }, { "epoch": 98.57, "learning_rate": 6.8566990291262135e-06, "loss": 0.0722, "step": 253830 }, { "epoch": 98.58, "learning_rate": 6.856181229773463e-06, "loss": 0.0148, "step": 253840 }, { "epoch": 98.58, "learning_rate": 6.855663430420713e-06, "loss": 0.0211, "step": 253850 }, { "epoch": 98.59, "learning_rate": 6.855145631067962e-06, "loss": 0.0144, "step": 253860 }, { "epoch": 98.59, "learning_rate": 6.85462783171521e-06, "loss": 0.0537, "step": 253870 }, { "epoch": 98.59, "learning_rate": 6.85411003236246e-06, "loss": 0.0018, "step": 253880 }, { "epoch": 98.6, "learning_rate": 6.8535922330097095e-06, "loss": 0.0536, "step": 253890 }, { "epoch": 98.6, "learning_rate": 6.853074433656959e-06, "loss": 0.0591, "step": 253900 }, { "epoch": 98.61, "learning_rate": 6.852556634304207e-06, "loss": 0.0305, "step": 253910 }, { "epoch": 98.61, "learning_rate": 6.852038834951457e-06, "loss": 0.0174, "step": 253920 }, { "epoch": 98.61, "learning_rate": 6.851521035598706e-06, "loss": 0.0554, "step": 253930 }, { "epoch": 98.62, "learning_rate": 6.851003236245956e-06, "loss": 0.044, "step": 253940 }, { "epoch": 98.62, "learning_rate": 6.850485436893205e-06, "loss": 0.0807, "step": 253950 }, { "epoch": 98.63, "learning_rate": 6.8499676375404535e-06, "loss": 0.0206, "step": 253960 }, { "epoch": 98.63, "learning_rate": 6.849449838187703e-06, "loss": 0.0345, "step": 253970 }, { "epoch": 98.63, "learning_rate": 6.848932038834952e-06, "loss": 0.0019, "step": 253980 }, { "epoch": 98.64, "learning_rate": 6.8484142394822015e-06, "loss": 0.0032, "step": 253990 }, { "epoch": 98.64, "learning_rate": 6.84789644012945e-06, "loss": 0.0617, "step": 254000 }, { "epoch": 98.64, "learning_rate": 6.8473786407767e-06, "loss": 0.0695, "step": 254010 }, { "epoch": 98.65, "learning_rate": 6.846860841423949e-06, "loss": 0.0784, "step": 254020 }, { "epoch": 98.65, "learning_rate": 6.846343042071198e-06, "loss": 0.072, "step": 254030 }, { "epoch": 98.66, "learning_rate": 6.845825242718447e-06, "loss": 0.023, "step": 254040 }, { "epoch": 98.66, "learning_rate": 6.845307443365697e-06, "loss": 0.087, "step": 254050 }, { "epoch": 98.66, "learning_rate": 6.8447896440129454e-06, "loss": 0.0322, "step": 254060 }, { "epoch": 98.67, "learning_rate": 6.844271844660195e-06, "loss": 0.0345, "step": 254070 }, { "epoch": 98.67, "learning_rate": 6.843754045307444e-06, "loss": 0.0844, "step": 254080 }, { "epoch": 98.68, "learning_rate": 6.8432362459546934e-06, "loss": 0.2505, "step": 254090 }, { "epoch": 98.68, "learning_rate": 6.842718446601942e-06, "loss": 0.0206, "step": 254100 }, { "epoch": 98.68, "learning_rate": 6.842200647249192e-06, "loss": 0.1007, "step": 254110 }, { "epoch": 98.69, "learning_rate": 6.841682847896441e-06, "loss": 0.0018, "step": 254120 }, { "epoch": 98.69, "learning_rate": 6.84116504854369e-06, "loss": 0.0575, "step": 254130 }, { "epoch": 98.7, "learning_rate": 6.840647249190939e-06, "loss": 0.0236, "step": 254140 }, { "epoch": 98.7, "learning_rate": 6.840129449838189e-06, "loss": 0.1194, "step": 254150 }, { "epoch": 98.7, "learning_rate": 6.839611650485437e-06, "loss": 0.1679, "step": 254160 }, { "epoch": 98.71, "learning_rate": 6.839093851132686e-06, "loss": 0.0507, "step": 254170 }, { "epoch": 98.71, "learning_rate": 6.838576051779936e-06, "loss": 0.0176, "step": 254180 }, { "epoch": 98.71, "learning_rate": 6.838058252427185e-06, "loss": 0.0009, "step": 254190 }, { "epoch": 98.72, "learning_rate": 6.837540453074434e-06, "loss": 0.0306, "step": 254200 }, { "epoch": 98.72, "learning_rate": 6.837022653721683e-06, "loss": 0.0346, "step": 254210 }, { "epoch": 98.73, "learning_rate": 6.8365048543689325e-06, "loss": 0.0002, "step": 254220 }, { "epoch": 98.73, "learning_rate": 6.835987055016182e-06, "loss": 0.1106, "step": 254230 }, { "epoch": 98.73, "learning_rate": 6.835469255663431e-06, "loss": 0.0108, "step": 254240 }, { "epoch": 98.74, "learning_rate": 6.83495145631068e-06, "loss": 0.0897, "step": 254250 }, { "epoch": 98.74, "learning_rate": 6.834433656957929e-06, "loss": 0.0035, "step": 254260 }, { "epoch": 98.75, "learning_rate": 6.833915857605179e-06, "loss": 0.0456, "step": 254270 }, { "epoch": 98.75, "learning_rate": 6.833398058252428e-06, "loss": 0.0176, "step": 254280 }, { "epoch": 98.75, "learning_rate": 6.8328802588996765e-06, "loss": 0.0188, "step": 254290 }, { "epoch": 98.76, "learning_rate": 6.832362459546926e-06, "loss": 0.0073, "step": 254300 }, { "epoch": 98.76, "learning_rate": 6.831844660194176e-06, "loss": 0.0375, "step": 254310 }, { "epoch": 98.77, "learning_rate": 6.831326860841424e-06, "loss": 0.0691, "step": 254320 }, { "epoch": 98.77, "learning_rate": 6.830809061488673e-06, "loss": 0.0881, "step": 254330 }, { "epoch": 98.77, "learning_rate": 6.830291262135923e-06, "loss": 0.0114, "step": 254340 }, { "epoch": 98.78, "learning_rate": 6.8297734627831725e-06, "loss": 0.0555, "step": 254350 }, { "epoch": 98.78, "learning_rate": 6.82925566343042e-06, "loss": 0.1366, "step": 254360 }, { "epoch": 98.78, "learning_rate": 6.82873786407767e-06, "loss": 0.0593, "step": 254370 }, { "epoch": 98.79, "learning_rate": 6.82822006472492e-06, "loss": 0.0777, "step": 254380 }, { "epoch": 98.79, "learning_rate": 6.827702265372169e-06, "loss": 0.0007, "step": 254390 }, { "epoch": 98.8, "learning_rate": 6.827184466019417e-06, "loss": 0.1249, "step": 254400 }, { "epoch": 98.8, "learning_rate": 6.826666666666667e-06, "loss": 0.0708, "step": 254410 }, { "epoch": 98.8, "learning_rate": 6.8261488673139164e-06, "loss": 0.0477, "step": 254420 }, { "epoch": 98.81, "learning_rate": 6.825631067961166e-06, "loss": 0.0974, "step": 254430 }, { "epoch": 98.81, "learning_rate": 6.825113268608414e-06, "loss": 0.0316, "step": 254440 }, { "epoch": 98.82, "learning_rate": 6.824595469255664e-06, "loss": 0.1103, "step": 254450 }, { "epoch": 98.82, "learning_rate": 6.824077669902913e-06, "loss": 0.0439, "step": 254460 }, { "epoch": 98.82, "learning_rate": 6.823559870550163e-06, "loss": 0.1616, "step": 254470 }, { "epoch": 98.83, "learning_rate": 6.823042071197411e-06, "loss": 0.0203, "step": 254480 }, { "epoch": 98.83, "learning_rate": 6.82252427184466e-06, "loss": 0.0641, "step": 254490 }, { "epoch": 98.83, "learning_rate": 6.82200647249191e-06, "loss": 0.0978, "step": 254500 }, { "epoch": 98.84, "learning_rate": 6.82148867313916e-06, "loss": 0.0452, "step": 254510 }, { "epoch": 98.84, "learning_rate": 6.820970873786409e-06, "loss": 0.0573, "step": 254520 }, { "epoch": 98.85, "learning_rate": 6.820453074433657e-06, "loss": 0.0465, "step": 254530 }, { "epoch": 98.85, "learning_rate": 6.819935275080907e-06, "loss": 0.1753, "step": 254540 }, { "epoch": 98.85, "learning_rate": 6.819417475728156e-06, "loss": 0.0354, "step": 254550 }, { "epoch": 98.86, "learning_rate": 6.818899676375406e-06, "loss": 0.0399, "step": 254560 }, { "epoch": 98.86, "learning_rate": 6.818381877022654e-06, "loss": 0.1956, "step": 254570 }, { "epoch": 98.87, "learning_rate": 6.8178640776699035e-06, "loss": 0.0479, "step": 254580 }, { "epoch": 98.87, "learning_rate": 6.817346278317153e-06, "loss": 0.1183, "step": 254590 }, { "epoch": 98.87, "learning_rate": 6.816828478964403e-06, "loss": 0.0677, "step": 254600 }, { "epoch": 98.88, "learning_rate": 6.816310679611651e-06, "loss": 0.0062, "step": 254610 }, { "epoch": 98.88, "learning_rate": 6.8157928802589e-06, "loss": 0.0783, "step": 254620 }, { "epoch": 98.89, "learning_rate": 6.81527508090615e-06, "loss": 0.1224, "step": 254630 }, { "epoch": 98.89, "learning_rate": 6.814757281553399e-06, "loss": 0.0381, "step": 254640 }, { "epoch": 98.89, "learning_rate": 6.8142394822006475e-06, "loss": 0.102, "step": 254650 }, { "epoch": 98.9, "learning_rate": 6.813721682847897e-06, "loss": 0.0047, "step": 254660 }, { "epoch": 98.9, "learning_rate": 6.813203883495147e-06, "loss": 0.0997, "step": 254670 }, { "epoch": 98.9, "learning_rate": 6.8126860841423955e-06, "loss": 0.0382, "step": 254680 }, { "epoch": 98.91, "learning_rate": 6.812168284789644e-06, "loss": 0.0012, "step": 254690 }, { "epoch": 98.91, "learning_rate": 6.811650485436894e-06, "loss": 0.0901, "step": 254700 }, { "epoch": 98.92, "learning_rate": 6.8111326860841435e-06, "loss": 0.0698, "step": 254710 }, { "epoch": 98.92, "learning_rate": 6.810614886731392e-06, "loss": 0.0578, "step": 254720 }, { "epoch": 98.92, "learning_rate": 6.810097087378641e-06, "loss": 0.0622, "step": 254730 }, { "epoch": 98.93, "learning_rate": 6.809579288025891e-06, "loss": 0.1823, "step": 254740 }, { "epoch": 98.93, "learning_rate": 6.80906148867314e-06, "loss": 0.064, "step": 254750 }, { "epoch": 98.94, "learning_rate": 6.808543689320389e-06, "loss": 0.0647, "step": 254760 }, { "epoch": 98.94, "learning_rate": 6.808025889967638e-06, "loss": 0.0456, "step": 254770 }, { "epoch": 98.94, "learning_rate": 6.8075080906148874e-06, "loss": 0.0335, "step": 254780 }, { "epoch": 98.95, "learning_rate": 6.806990291262136e-06, "loss": 0.1062, "step": 254790 }, { "epoch": 98.95, "learning_rate": 6.806472491909386e-06, "loss": 0.0281, "step": 254800 }, { "epoch": 98.96, "learning_rate": 6.805954692556635e-06, "loss": 0.0194, "step": 254810 }, { "epoch": 98.96, "learning_rate": 6.805436893203884e-06, "loss": 0.0797, "step": 254820 }, { "epoch": 98.96, "learning_rate": 6.804919093851133e-06, "loss": 0.0433, "step": 254830 }, { "epoch": 98.97, "learning_rate": 6.804401294498383e-06, "loss": 0.0509, "step": 254840 }, { "epoch": 98.97, "learning_rate": 6.803883495145631e-06, "loss": 0.0578, "step": 254850 }, { "epoch": 98.97, "learning_rate": 6.803365695792881e-06, "loss": 0.0162, "step": 254860 }, { "epoch": 98.98, "learning_rate": 6.80284789644013e-06, "loss": 0.0013, "step": 254870 }, { "epoch": 98.98, "learning_rate": 6.802330097087379e-06, "loss": 0.0004, "step": 254880 }, { "epoch": 98.99, "learning_rate": 6.801812297734628e-06, "loss": 0.017, "step": 254890 }, { "epoch": 98.99, "learning_rate": 6.801294498381878e-06, "loss": 0.0297, "step": 254900 }, { "epoch": 98.99, "learning_rate": 6.8007766990291265e-06, "loss": 0.1409, "step": 254910 }, { "epoch": 99.0, "learning_rate": 6.800258899676376e-06, "loss": 0.062, "step": 254920 }, { "epoch": 99.0, "eval_accuracy": 0.9513067400275104, "eval_loss": 0.3688491880893707, "eval_runtime": 8.2271, "eval_samples_per_second": 441.835, "eval_steps_per_second": 55.305, "step": 254925 }, { "epoch": 99.0, "learning_rate": 6.799741100323625e-06, "loss": 0.0874, "step": 254930 }, { "epoch": 99.01, "learning_rate": 6.7992233009708745e-06, "loss": 0.1264, "step": 254940 }, { "epoch": 99.01, "learning_rate": 6.798705501618123e-06, "loss": 0.0868, "step": 254950 }, { "epoch": 99.01, "learning_rate": 6.798187702265373e-06, "loss": 0.0552, "step": 254960 }, { "epoch": 99.02, "learning_rate": 6.797669902912622e-06, "loss": 0.0306, "step": 254970 }, { "epoch": 99.02, "learning_rate": 6.7971521035598705e-06, "loss": 0.0732, "step": 254980 }, { "epoch": 99.03, "learning_rate": 6.79663430420712e-06, "loss": 0.0319, "step": 254990 }, { "epoch": 99.03, "learning_rate": 6.79611650485437e-06, "loss": 0.0391, "step": 255000 }, { "epoch": 99.03, "learning_rate": 6.7955987055016185e-06, "loss": 0.0139, "step": 255010 }, { "epoch": 99.04, "learning_rate": 6.795080906148867e-06, "loss": 0.0267, "step": 255020 }, { "epoch": 99.04, "learning_rate": 6.794563106796117e-06, "loss": 0.0088, "step": 255030 }, { "epoch": 99.04, "learning_rate": 6.7940453074433665e-06, "loss": 0.0388, "step": 255040 }, { "epoch": 99.05, "learning_rate": 6.793527508090615e-06, "loss": 0.0369, "step": 255050 }, { "epoch": 99.05, "learning_rate": 6.793009708737864e-06, "loss": 0.1204, "step": 255060 }, { "epoch": 99.06, "learning_rate": 6.792491909385114e-06, "loss": 0.1541, "step": 255070 }, { "epoch": 99.06, "learning_rate": 6.791974110032363e-06, "loss": 0.1017, "step": 255080 }, { "epoch": 99.06, "learning_rate": 6.791456310679613e-06, "loss": 0.0132, "step": 255090 }, { "epoch": 99.07, "learning_rate": 6.790938511326861e-06, "loss": 0.0648, "step": 255100 }, { "epoch": 99.07, "learning_rate": 6.7904207119741104e-06, "loss": 0.0967, "step": 255110 }, { "epoch": 99.08, "learning_rate": 6.78990291262136e-06, "loss": 0.0359, "step": 255120 }, { "epoch": 99.08, "learning_rate": 6.78938511326861e-06, "loss": 0.0748, "step": 255130 }, { "epoch": 99.08, "learning_rate": 6.788867313915858e-06, "loss": 0.113, "step": 255140 }, { "epoch": 99.09, "learning_rate": 6.788349514563107e-06, "loss": 0.0062, "step": 255150 }, { "epoch": 99.09, "learning_rate": 6.787831715210357e-06, "loss": 0.0904, "step": 255160 }, { "epoch": 99.1, "learning_rate": 6.7873139158576064e-06, "loss": 0.1337, "step": 255170 }, { "epoch": 99.1, "learning_rate": 6.786796116504854e-06, "loss": 0.0975, "step": 255180 }, { "epoch": 99.1, "learning_rate": 6.786278317152104e-06, "loss": 0.0411, "step": 255190 }, { "epoch": 99.11, "learning_rate": 6.785760517799354e-06, "loss": 0.0049, "step": 255200 }, { "epoch": 99.11, "learning_rate": 6.785242718446603e-06, "loss": 0.0501, "step": 255210 }, { "epoch": 99.11, "learning_rate": 6.784724919093851e-06, "loss": 0.0684, "step": 255220 }, { "epoch": 99.12, "learning_rate": 6.784207119741101e-06, "loss": 0.0091, "step": 255230 }, { "epoch": 99.12, "learning_rate": 6.78368932038835e-06, "loss": 0.0039, "step": 255240 }, { "epoch": 99.13, "learning_rate": 6.7831715210356e-06, "loss": 0.1021, "step": 255250 }, { "epoch": 99.13, "learning_rate": 6.782653721682848e-06, "loss": 0.0027, "step": 255260 }, { "epoch": 99.13, "learning_rate": 6.7821359223300975e-06, "loss": 0.0012, "step": 255270 }, { "epoch": 99.14, "learning_rate": 6.781618122977347e-06, "loss": 0.0244, "step": 255280 }, { "epoch": 99.14, "learning_rate": 6.781100323624597e-06, "loss": 0.0103, "step": 255290 }, { "epoch": 99.15, "learning_rate": 6.780582524271845e-06, "loss": 0.0179, "step": 255300 }, { "epoch": 99.15, "learning_rate": 6.780064724919094e-06, "loss": 0.0008, "step": 255310 }, { "epoch": 99.15, "learning_rate": 6.779546925566344e-06, "loss": 0.1987, "step": 255320 }, { "epoch": 99.16, "learning_rate": 6.7790291262135936e-06, "loss": 0.005, "step": 255330 }, { "epoch": 99.16, "learning_rate": 6.7785113268608415e-06, "loss": 0.0417, "step": 255340 }, { "epoch": 99.17, "learning_rate": 6.777993527508091e-06, "loss": 0.0625, "step": 255350 }, { "epoch": 99.17, "learning_rate": 6.777475728155341e-06, "loss": 0.0335, "step": 255360 }, { "epoch": 99.17, "learning_rate": 6.77695792880259e-06, "loss": 0.1089, "step": 255370 }, { "epoch": 99.18, "learning_rate": 6.776440129449838e-06, "loss": 0.0567, "step": 255380 }, { "epoch": 99.18, "learning_rate": 6.775922330097088e-06, "loss": 0.0084, "step": 255390 }, { "epoch": 99.18, "learning_rate": 6.7754045307443375e-06, "loss": 0.1237, "step": 255400 }, { "epoch": 99.19, "learning_rate": 6.774886731391587e-06, "loss": 0.0139, "step": 255410 }, { "epoch": 99.19, "learning_rate": 6.774368932038835e-06, "loss": 0.0351, "step": 255420 }, { "epoch": 99.2, "learning_rate": 6.773851132686085e-06, "loss": 0.0104, "step": 255430 }, { "epoch": 99.2, "learning_rate": 6.773333333333334e-06, "loss": 0.0405, "step": 255440 }, { "epoch": 99.2, "learning_rate": 6.772815533980583e-06, "loss": 0.0534, "step": 255450 }, { "epoch": 99.21, "learning_rate": 6.772297734627832e-06, "loss": 0.0338, "step": 255460 }, { "epoch": 99.21, "learning_rate": 6.7717799352750814e-06, "loss": 0.0011, "step": 255470 }, { "epoch": 99.22, "learning_rate": 6.771262135922331e-06, "loss": 0.0167, "step": 255480 }, { "epoch": 99.22, "learning_rate": 6.77074433656958e-06, "loss": 0.1182, "step": 255490 }, { "epoch": 99.22, "learning_rate": 6.770226537216829e-06, "loss": 0.2223, "step": 255500 }, { "epoch": 99.23, "learning_rate": 6.769708737864078e-06, "loss": 0.0764, "step": 255510 }, { "epoch": 99.23, "learning_rate": 6.769190938511328e-06, "loss": 0.1797, "step": 255520 }, { "epoch": 99.23, "learning_rate": 6.768673139158577e-06, "loss": 0.0548, "step": 255530 }, { "epoch": 99.24, "learning_rate": 6.768155339805825e-06, "loss": 0.0853, "step": 255540 }, { "epoch": 99.24, "learning_rate": 6.767637540453075e-06, "loss": 0.0092, "step": 255550 }, { "epoch": 99.25, "learning_rate": 6.767119741100325e-06, "loss": 0.0895, "step": 255560 }, { "epoch": 99.25, "learning_rate": 6.766601941747573e-06, "loss": 0.0482, "step": 255570 }, { "epoch": 99.25, "learning_rate": 6.766084142394822e-06, "loss": 0.017, "step": 255580 }, { "epoch": 99.26, "learning_rate": 6.765566343042072e-06, "loss": 0.0415, "step": 255590 }, { "epoch": 99.26, "learning_rate": 6.7650485436893205e-06, "loss": 0.0215, "step": 255600 }, { "epoch": 99.27, "learning_rate": 6.76453074433657e-06, "loss": 0.083, "step": 255610 }, { "epoch": 99.27, "learning_rate": 6.764012944983819e-06, "loss": 0.0656, "step": 255620 }, { "epoch": 99.27, "learning_rate": 6.7634951456310685e-06, "loss": 0.0535, "step": 255630 }, { "epoch": 99.28, "learning_rate": 6.762977346278317e-06, "loss": 0.0928, "step": 255640 }, { "epoch": 99.28, "learning_rate": 6.762459546925567e-06, "loss": 0.0009, "step": 255650 }, { "epoch": 99.29, "learning_rate": 6.7619417475728165e-06, "loss": 0.1442, "step": 255660 }, { "epoch": 99.29, "learning_rate": 6.761423948220065e-06, "loss": 0.0214, "step": 255670 }, { "epoch": 99.29, "learning_rate": 6.760906148867314e-06, "loss": 0.0126, "step": 255680 }, { "epoch": 99.3, "learning_rate": 6.760388349514564e-06, "loss": 0.0817, "step": 255690 }, { "epoch": 99.3, "learning_rate": 6.759870550161813e-06, "loss": 0.1712, "step": 255700 }, { "epoch": 99.3, "learning_rate": 6.759352750809062e-06, "loss": 0.0095, "step": 255710 }, { "epoch": 99.31, "learning_rate": 6.758834951456311e-06, "loss": 0.0833, "step": 255720 }, { "epoch": 99.31, "learning_rate": 6.7583171521035605e-06, "loss": 0.0041, "step": 255730 }, { "epoch": 99.32, "learning_rate": 6.75779935275081e-06, "loss": 0.0093, "step": 255740 }, { "epoch": 99.32, "learning_rate": 6.757281553398059e-06, "loss": 0.1266, "step": 255750 }, { "epoch": 99.32, "learning_rate": 6.756763754045308e-06, "loss": 0.0112, "step": 255760 }, { "epoch": 99.33, "learning_rate": 6.756245954692557e-06, "loss": 0.0136, "step": 255770 }, { "epoch": 99.33, "learning_rate": 6.755728155339807e-06, "loss": 0.0805, "step": 255780 }, { "epoch": 99.34, "learning_rate": 6.755210355987055e-06, "loss": 0.0736, "step": 255790 }, { "epoch": 99.34, "learning_rate": 6.7546925566343044e-06, "loss": 0.1187, "step": 255800 }, { "epoch": 99.34, "learning_rate": 6.754174757281554e-06, "loss": 0.0606, "step": 255810 }, { "epoch": 99.35, "learning_rate": 6.753656957928804e-06, "loss": 0.0941, "step": 255820 }, { "epoch": 99.35, "learning_rate": 6.753139158576052e-06, "loss": 0.0115, "step": 255830 }, { "epoch": 99.36, "learning_rate": 6.752621359223301e-06, "loss": 0.0078, "step": 255840 }, { "epoch": 99.36, "learning_rate": 6.752103559870551e-06, "loss": 0.0927, "step": 255850 }, { "epoch": 99.36, "learning_rate": 6.7515857605178004e-06, "loss": 0.061, "step": 255860 }, { "epoch": 99.37, "learning_rate": 6.751067961165048e-06, "loss": 0.0228, "step": 255870 }, { "epoch": 99.37, "learning_rate": 6.750550161812298e-06, "loss": 0.1399, "step": 255880 }, { "epoch": 99.37, "learning_rate": 6.750032362459548e-06, "loss": 0.0118, "step": 255890 }, { "epoch": 99.38, "learning_rate": 6.749514563106797e-06, "loss": 0.0315, "step": 255900 }, { "epoch": 99.38, "learning_rate": 6.748996763754045e-06, "loss": 0.0168, "step": 255910 }, { "epoch": 99.39, "learning_rate": 6.748478964401295e-06, "loss": 0.0926, "step": 255920 }, { "epoch": 99.39, "learning_rate": 6.747961165048544e-06, "loss": 0.0147, "step": 255930 }, { "epoch": 99.39, "learning_rate": 6.747443365695794e-06, "loss": 0.0009, "step": 255940 }, { "epoch": 99.4, "learning_rate": 6.746925566343042e-06, "loss": 0.0346, "step": 255950 }, { "epoch": 99.4, "learning_rate": 6.7464077669902915e-06, "loss": 0.1183, "step": 255960 }, { "epoch": 99.41, "learning_rate": 6.745889967637541e-06, "loss": 0.0365, "step": 255970 }, { "epoch": 99.41, "learning_rate": 6.745372168284791e-06, "loss": 0.0001, "step": 255980 }, { "epoch": 99.41, "learning_rate": 6.744854368932039e-06, "loss": 0.0609, "step": 255990 }, { "epoch": 99.42, "learning_rate": 6.744336569579288e-06, "loss": 0.0173, "step": 256000 }, { "epoch": 99.42, "learning_rate": 6.743818770226538e-06, "loss": 0.066, "step": 256010 }, { "epoch": 99.43, "learning_rate": 6.7433009708737876e-06, "loss": 0.0849, "step": 256020 }, { "epoch": 99.43, "learning_rate": 6.7427831715210355e-06, "loss": 0.0719, "step": 256030 }, { "epoch": 99.43, "learning_rate": 6.742265372168285e-06, "loss": 0.0004, "step": 256040 }, { "epoch": 99.44, "learning_rate": 6.741747572815535e-06, "loss": 0.0261, "step": 256050 }, { "epoch": 99.44, "learning_rate": 6.741229773462784e-06, "loss": 0.0102, "step": 256060 }, { "epoch": 99.44, "learning_rate": 6.740711974110032e-06, "loss": 0.0363, "step": 256070 }, { "epoch": 99.45, "learning_rate": 6.740194174757282e-06, "loss": 0.1431, "step": 256080 }, { "epoch": 99.45, "learning_rate": 6.7396763754045315e-06, "loss": 0.0625, "step": 256090 }, { "epoch": 99.46, "learning_rate": 6.739158576051781e-06, "loss": 0.0199, "step": 256100 }, { "epoch": 99.46, "learning_rate": 6.738640776699029e-06, "loss": 0.0234, "step": 256110 }, { "epoch": 99.46, "learning_rate": 6.738122977346279e-06, "loss": 0.1127, "step": 256120 }, { "epoch": 99.47, "learning_rate": 6.737605177993528e-06, "loss": 0.0528, "step": 256130 }, { "epoch": 99.47, "learning_rate": 6.737087378640778e-06, "loss": 0.0465, "step": 256140 }, { "epoch": 99.48, "learning_rate": 6.736569579288026e-06, "loss": 0.0456, "step": 256150 }, { "epoch": 99.48, "learning_rate": 6.7360517799352754e-06, "loss": 0.0634, "step": 256160 }, { "epoch": 99.48, "learning_rate": 6.735533980582525e-06, "loss": 0.0434, "step": 256170 }, { "epoch": 99.49, "learning_rate": 6.735016181229775e-06, "loss": 0.0183, "step": 256180 }, { "epoch": 99.49, "learning_rate": 6.734498381877023e-06, "loss": 0.2522, "step": 256190 }, { "epoch": 99.5, "learning_rate": 6.733980582524272e-06, "loss": 0.0095, "step": 256200 }, { "epoch": 99.5, "learning_rate": 6.733462783171522e-06, "loss": 0.0443, "step": 256210 }, { "epoch": 99.5, "learning_rate": 6.7329449838187714e-06, "loss": 0.0476, "step": 256220 }, { "epoch": 99.51, "learning_rate": 6.73242718446602e-06, "loss": 0.0195, "step": 256230 }, { "epoch": 99.51, "learning_rate": 6.731909385113269e-06, "loss": 0.0305, "step": 256240 }, { "epoch": 99.51, "learning_rate": 6.731391585760519e-06, "loss": 0.079, "step": 256250 }, { "epoch": 99.52, "learning_rate": 6.730873786407767e-06, "loss": 0.0273, "step": 256260 }, { "epoch": 99.52, "learning_rate": 6.730355987055017e-06, "loss": 0.1401, "step": 256270 }, { "epoch": 99.53, "learning_rate": 6.729838187702266e-06, "loss": 0.0119, "step": 256280 }, { "epoch": 99.53, "learning_rate": 6.729320388349515e-06, "loss": 0.0367, "step": 256290 }, { "epoch": 99.53, "learning_rate": 6.728802588996764e-06, "loss": 0.0172, "step": 256300 }, { "epoch": 99.54, "learning_rate": 6.728284789644014e-06, "loss": 0.0008, "step": 256310 }, { "epoch": 99.54, "learning_rate": 6.7277669902912625e-06, "loss": 0.1096, "step": 256320 }, { "epoch": 99.55, "learning_rate": 6.727249190938512e-06, "loss": 0.017, "step": 256330 }, { "epoch": 99.55, "learning_rate": 6.726731391585761e-06, "loss": 0.0038, "step": 256340 }, { "epoch": 99.55, "learning_rate": 6.7262135922330105e-06, "loss": 0.0467, "step": 256350 }, { "epoch": 99.56, "learning_rate": 6.725695792880259e-06, "loss": 0.024, "step": 256360 }, { "epoch": 99.56, "learning_rate": 6.725177993527509e-06, "loss": 0.0238, "step": 256370 }, { "epoch": 99.57, "learning_rate": 6.724660194174758e-06, "loss": 0.0402, "step": 256380 }, { "epoch": 99.57, "learning_rate": 6.724142394822007e-06, "loss": 0.0012, "step": 256390 }, { "epoch": 99.57, "learning_rate": 6.723624595469256e-06, "loss": 0.0912, "step": 256400 }, { "epoch": 99.58, "learning_rate": 6.723106796116506e-06, "loss": 0.0849, "step": 256410 }, { "epoch": 99.58, "learning_rate": 6.7225889967637545e-06, "loss": 0.0165, "step": 256420 }, { "epoch": 99.58, "learning_rate": 6.722071197411004e-06, "loss": 0.0996, "step": 256430 }, { "epoch": 99.59, "learning_rate": 6.721553398058253e-06, "loss": 0.0385, "step": 256440 }, { "epoch": 99.59, "learning_rate": 6.721035598705502e-06, "loss": 0.0439, "step": 256450 }, { "epoch": 99.6, "learning_rate": 6.720517799352751e-06, "loss": 0.0555, "step": 256460 }, { "epoch": 99.6, "learning_rate": 6.720000000000001e-06, "loss": 0.0202, "step": 256470 }, { "epoch": 99.6, "learning_rate": 6.71948220064725e-06, "loss": 0.0203, "step": 256480 }, { "epoch": 99.61, "learning_rate": 6.7189644012944984e-06, "loss": 0.059, "step": 256490 }, { "epoch": 99.61, "learning_rate": 6.718446601941748e-06, "loss": 0.0323, "step": 256500 }, { "epoch": 99.62, "learning_rate": 6.717928802588998e-06, "loss": 0.012, "step": 256510 }, { "epoch": 99.62, "learning_rate": 6.7174110032362464e-06, "loss": 0.0259, "step": 256520 }, { "epoch": 99.62, "learning_rate": 6.716893203883495e-06, "loss": 0.087, "step": 256530 }, { "epoch": 99.63, "learning_rate": 6.716375404530745e-06, "loss": 0.0435, "step": 256540 }, { "epoch": 99.63, "learning_rate": 6.7158576051779944e-06, "loss": 0.0802, "step": 256550 }, { "epoch": 99.63, "learning_rate": 6.715339805825243e-06, "loss": 0.0464, "step": 256560 }, { "epoch": 99.64, "learning_rate": 6.714822006472492e-06, "loss": 0.0958, "step": 256570 }, { "epoch": 99.64, "learning_rate": 6.714304207119742e-06, "loss": 0.0571, "step": 256580 }, { "epoch": 99.65, "learning_rate": 6.713786407766991e-06, "loss": 0.005, "step": 256590 }, { "epoch": 99.65, "learning_rate": 6.713268608414239e-06, "loss": 0.044, "step": 256600 }, { "epoch": 99.65, "learning_rate": 6.712750809061489e-06, "loss": 0.0069, "step": 256610 }, { "epoch": 99.66, "learning_rate": 6.712233009708738e-06, "loss": 0.0103, "step": 256620 }, { "epoch": 99.66, "learning_rate": 6.711715210355988e-06, "loss": 0.0832, "step": 256630 }, { "epoch": 99.67, "learning_rate": 6.711197411003236e-06, "loss": 0.0996, "step": 256640 }, { "epoch": 99.67, "learning_rate": 6.7106796116504855e-06, "loss": 0.0666, "step": 256650 }, { "epoch": 99.67, "learning_rate": 6.710161812297735e-06, "loss": 0.0128, "step": 256660 }, { "epoch": 99.68, "learning_rate": 6.709644012944985e-06, "loss": 0.0069, "step": 256670 }, { "epoch": 99.68, "learning_rate": 6.709126213592233e-06, "loss": 0.0141, "step": 256680 }, { "epoch": 99.69, "learning_rate": 6.708608414239482e-06, "loss": 0.0277, "step": 256690 }, { "epoch": 99.69, "learning_rate": 6.708090614886732e-06, "loss": 0.1315, "step": 256700 }, { "epoch": 99.69, "learning_rate": 6.7075728155339816e-06, "loss": 0.0206, "step": 256710 }, { "epoch": 99.7, "learning_rate": 6.7070550161812295e-06, "loss": 0.0273, "step": 256720 }, { "epoch": 99.7, "learning_rate": 6.706537216828479e-06, "loss": 0.0003, "step": 256730 }, { "epoch": 99.7, "learning_rate": 6.706019417475729e-06, "loss": 0.0278, "step": 256740 }, { "epoch": 99.71, "learning_rate": 6.705501618122978e-06, "loss": 0.1183, "step": 256750 }, { "epoch": 99.71, "learning_rate": 6.704983818770228e-06, "loss": 0.0165, "step": 256760 }, { "epoch": 99.72, "learning_rate": 6.704466019417476e-06, "loss": 0.095, "step": 256770 }, { "epoch": 99.72, "learning_rate": 6.7039482200647255e-06, "loss": 0.0739, "step": 256780 }, { "epoch": 99.72, "learning_rate": 6.703430420711975e-06, "loss": 0.0528, "step": 256790 }, { "epoch": 99.73, "learning_rate": 6.702912621359225e-06, "loss": 0.0629, "step": 256800 }, { "epoch": 99.73, "learning_rate": 6.702394822006473e-06, "loss": 0.1046, "step": 256810 }, { "epoch": 99.74, "learning_rate": 6.701877022653722e-06, "loss": 0.0089, "step": 256820 }, { "epoch": 99.74, "learning_rate": 6.701359223300972e-06, "loss": 0.1678, "step": 256830 }, { "epoch": 99.74, "learning_rate": 6.7008414239482215e-06, "loss": 0.0192, "step": 256840 }, { "epoch": 99.75, "learning_rate": 6.7003236245954694e-06, "loss": 0.0008, "step": 256850 }, { "epoch": 99.75, "learning_rate": 6.699805825242719e-06, "loss": 0.0149, "step": 256860 }, { "epoch": 99.76, "learning_rate": 6.699288025889969e-06, "loss": 0.0098, "step": 256870 }, { "epoch": 99.76, "learning_rate": 6.698770226537218e-06, "loss": 0.0102, "step": 256880 }, { "epoch": 99.76, "learning_rate": 6.698252427184466e-06, "loss": 0.0503, "step": 256890 }, { "epoch": 99.77, "learning_rate": 6.697734627831716e-06, "loss": 0.0418, "step": 256900 }, { "epoch": 99.77, "learning_rate": 6.6972168284789654e-06, "loss": 0.0775, "step": 256910 }, { "epoch": 99.77, "learning_rate": 6.696699029126214e-06, "loss": 0.0031, "step": 256920 }, { "epoch": 99.78, "learning_rate": 6.696181229773463e-06, "loss": 0.0746, "step": 256930 }, { "epoch": 99.78, "learning_rate": 6.695663430420713e-06, "loss": 0.1522, "step": 256940 }, { "epoch": 99.79, "learning_rate": 6.695145631067962e-06, "loss": 0.0564, "step": 256950 }, { "epoch": 99.79, "learning_rate": 6.694627831715211e-06, "loss": 0.0284, "step": 256960 }, { "epoch": 99.79, "learning_rate": 6.69411003236246e-06, "loss": 0.059, "step": 256970 }, { "epoch": 99.8, "learning_rate": 6.693592233009709e-06, "loss": 0.0467, "step": 256980 }, { "epoch": 99.8, "learning_rate": 6.693074433656959e-06, "loss": 0.0558, "step": 256990 }, { "epoch": 99.81, "learning_rate": 6.692556634304208e-06, "loss": 0.0473, "step": 257000 }, { "epoch": 99.81, "learning_rate": 6.6920388349514565e-06, "loss": 0.215, "step": 257010 }, { "epoch": 99.81, "learning_rate": 6.691521035598706e-06, "loss": 0.0739, "step": 257020 }, { "epoch": 99.82, "learning_rate": 6.691003236245956e-06, "loss": 0.0324, "step": 257030 }, { "epoch": 99.82, "learning_rate": 6.6904854368932045e-06, "loss": 0.0297, "step": 257040 }, { "epoch": 99.83, "learning_rate": 6.689967637540453e-06, "loss": 0.0845, "step": 257050 }, { "epoch": 99.83, "learning_rate": 6.689449838187703e-06, "loss": 0.0119, "step": 257060 }, { "epoch": 99.83, "learning_rate": 6.688932038834952e-06, "loss": 0.0921, "step": 257070 }, { "epoch": 99.84, "learning_rate": 6.688414239482201e-06, "loss": 0.0497, "step": 257080 }, { "epoch": 99.84, "learning_rate": 6.68789644012945e-06, "loss": 0.0501, "step": 257090 }, { "epoch": 99.84, "learning_rate": 6.6873786407767e-06, "loss": 0.0595, "step": 257100 }, { "epoch": 99.85, "learning_rate": 6.6868608414239485e-06, "loss": 0.0022, "step": 257110 }, { "epoch": 99.85, "learning_rate": 6.686343042071198e-06, "loss": 0.0135, "step": 257120 }, { "epoch": 99.86, "learning_rate": 6.685825242718447e-06, "loss": 0.1788, "step": 257130 }, { "epoch": 99.86, "learning_rate": 6.6853074433656965e-06, "loss": 0.0391, "step": 257140 }, { "epoch": 99.86, "learning_rate": 6.684789644012945e-06, "loss": 0.0009, "step": 257150 }, { "epoch": 99.87, "learning_rate": 6.684271844660195e-06, "loss": 0.0333, "step": 257160 }, { "epoch": 99.87, "learning_rate": 6.683754045307444e-06, "loss": 0.0221, "step": 257170 }, { "epoch": 99.88, "learning_rate": 6.683236245954693e-06, "loss": 0.0489, "step": 257180 }, { "epoch": 99.88, "learning_rate": 6.682718446601942e-06, "loss": 0.0596, "step": 257190 }, { "epoch": 99.88, "learning_rate": 6.682200647249192e-06, "loss": 0.0201, "step": 257200 }, { "epoch": 99.89, "learning_rate": 6.6816828478964404e-06, "loss": 0.1118, "step": 257210 }, { "epoch": 99.89, "learning_rate": 6.68116504854369e-06, "loss": 0.0611, "step": 257220 }, { "epoch": 99.9, "learning_rate": 6.680647249190939e-06, "loss": 0.0209, "step": 257230 }, { "epoch": 99.9, "learning_rate": 6.6801294498381884e-06, "loss": 0.0784, "step": 257240 }, { "epoch": 99.9, "learning_rate": 6.679611650485437e-06, "loss": 0.0227, "step": 257250 }, { "epoch": 99.91, "learning_rate": 6.679093851132686e-06, "loss": 0.0104, "step": 257260 }, { "epoch": 99.91, "learning_rate": 6.678576051779936e-06, "loss": 0.045, "step": 257270 }, { "epoch": 99.91, "learning_rate": 6.678058252427185e-06, "loss": 0.0877, "step": 257280 }, { "epoch": 99.92, "learning_rate": 6.677540453074434e-06, "loss": 0.0299, "step": 257290 }, { "epoch": 99.92, "learning_rate": 6.677022653721683e-06, "loss": 0.0692, "step": 257300 }, { "epoch": 99.93, "learning_rate": 6.676504854368932e-06, "loss": 0.0588, "step": 257310 }, { "epoch": 99.93, "learning_rate": 6.675987055016182e-06, "loss": 0.0033, "step": 257320 }, { "epoch": 99.93, "learning_rate": 6.675469255663432e-06, "loss": 0.0009, "step": 257330 }, { "epoch": 99.94, "learning_rate": 6.6749514563106795e-06, "loss": 0.0257, "step": 257340 }, { "epoch": 99.94, "learning_rate": 6.674433656957929e-06, "loss": 0.0338, "step": 257350 }, { "epoch": 99.95, "learning_rate": 6.673915857605179e-06, "loss": 0.0657, "step": 257360 }, { "epoch": 99.95, "learning_rate": 6.673398058252428e-06, "loss": 0.046, "step": 257370 }, { "epoch": 99.95, "learning_rate": 6.672880258899676e-06, "loss": 0.0422, "step": 257380 }, { "epoch": 99.96, "learning_rate": 6.672362459546926e-06, "loss": 0.0733, "step": 257390 }, { "epoch": 99.96, "learning_rate": 6.6718446601941755e-06, "loss": 0.0702, "step": 257400 }, { "epoch": 99.97, "learning_rate": 6.671326860841425e-06, "loss": 0.0109, "step": 257410 }, { "epoch": 99.97, "learning_rate": 6.670809061488673e-06, "loss": 0.0084, "step": 257420 }, { "epoch": 99.97, "learning_rate": 6.670291262135923e-06, "loss": 0.0247, "step": 257430 }, { "epoch": 99.98, "learning_rate": 6.669773462783172e-06, "loss": 0.0815, "step": 257440 }, { "epoch": 99.98, "learning_rate": 6.669255663430422e-06, "loss": 0.0207, "step": 257450 }, { "epoch": 99.98, "learning_rate": 6.66873786407767e-06, "loss": 0.1644, "step": 257460 }, { "epoch": 99.99, "learning_rate": 6.6682200647249195e-06, "loss": 0.0304, "step": 257470 }, { "epoch": 99.99, "learning_rate": 6.667702265372169e-06, "loss": 0.054, "step": 257480 }, { "epoch": 100.0, "learning_rate": 6.667184466019419e-06, "loss": 0.0016, "step": 257490 }, { "epoch": 100.0, "learning_rate": 6.666666666666667e-06, "loss": 0.1217, "step": 257500 }, { "epoch": 100.0, "eval_accuracy": 0.9507565337001376, "eval_loss": 0.3741964101791382, "eval_runtime": 8.2071, "eval_samples_per_second": 442.91, "eval_steps_per_second": 55.44, "step": 257500 }, { "epoch": 100.0, "learning_rate": 6.666148867313916e-06, "loss": 0.0838, "step": 257510 }, { "epoch": 100.01, "learning_rate": 6.665631067961166e-06, "loss": 0.0515, "step": 257520 }, { "epoch": 100.01, "learning_rate": 6.6651132686084155e-06, "loss": 0.0096, "step": 257530 }, { "epoch": 100.02, "learning_rate": 6.6645954692556634e-06, "loss": 0.0482, "step": 257540 }, { "epoch": 100.02, "learning_rate": 6.664077669902913e-06, "loss": 0.0186, "step": 257550 }, { "epoch": 100.02, "learning_rate": 6.663559870550163e-06, "loss": 0.0087, "step": 257560 }, { "epoch": 100.03, "learning_rate": 6.663042071197412e-06, "loss": 0.0068, "step": 257570 }, { "epoch": 100.03, "learning_rate": 6.66252427184466e-06, "loss": 0.0458, "step": 257580 }, { "epoch": 100.03, "learning_rate": 6.66200647249191e-06, "loss": 0.0442, "step": 257590 }, { "epoch": 100.04, "learning_rate": 6.6614886731391594e-06, "loss": 0.0011, "step": 257600 }, { "epoch": 100.04, "learning_rate": 6.660970873786409e-06, "loss": 0.0644, "step": 257610 }, { "epoch": 100.05, "learning_rate": 6.660453074433657e-06, "loss": 0.1092, "step": 257620 }, { "epoch": 100.05, "learning_rate": 6.659935275080907e-06, "loss": 0.0176, "step": 257630 }, { "epoch": 100.05, "learning_rate": 6.659417475728156e-06, "loss": 0.1484, "step": 257640 }, { "epoch": 100.06, "learning_rate": 6.658899676375406e-06, "loss": 0.0423, "step": 257650 }, { "epoch": 100.06, "learning_rate": 6.658381877022654e-06, "loss": 0.032, "step": 257660 }, { "epoch": 100.07, "learning_rate": 6.657864077669903e-06, "loss": 0.0023, "step": 257670 }, { "epoch": 100.07, "learning_rate": 6.657346278317153e-06, "loss": 0.0026, "step": 257680 }, { "epoch": 100.07, "learning_rate": 6.656828478964403e-06, "loss": 0.0556, "step": 257690 }, { "epoch": 100.08, "learning_rate": 6.6563106796116505e-06, "loss": 0.0002, "step": 257700 }, { "epoch": 100.08, "learning_rate": 6.6557928802589e-06, "loss": 0.008, "step": 257710 }, { "epoch": 100.09, "learning_rate": 6.65527508090615e-06, "loss": 0.0809, "step": 257720 }, { "epoch": 100.09, "learning_rate": 6.6547572815533985e-06, "loss": 0.0174, "step": 257730 }, { "epoch": 100.09, "learning_rate": 6.654239482200647e-06, "loss": 0.0591, "step": 257740 }, { "epoch": 100.1, "learning_rate": 6.653721682847897e-06, "loss": 0.0094, "step": 257750 }, { "epoch": 100.1, "learning_rate": 6.6532038834951466e-06, "loss": 0.0723, "step": 257760 }, { "epoch": 100.1, "learning_rate": 6.652686084142395e-06, "loss": 0.0305, "step": 257770 }, { "epoch": 100.11, "learning_rate": 6.652168284789644e-06, "loss": 0.0409, "step": 257780 }, { "epoch": 100.11, "learning_rate": 6.651650485436894e-06, "loss": 0.1209, "step": 257790 }, { "epoch": 100.12, "learning_rate": 6.651132686084143e-06, "loss": 0.0308, "step": 257800 }, { "epoch": 100.12, "learning_rate": 6.650614886731392e-06, "loss": 0.1677, "step": 257810 }, { "epoch": 100.12, "learning_rate": 6.650097087378641e-06, "loss": 0.0111, "step": 257820 }, { "epoch": 100.13, "learning_rate": 6.6495792880258905e-06, "loss": 0.006, "step": 257830 }, { "epoch": 100.13, "learning_rate": 6.64906148867314e-06, "loss": 0.0495, "step": 257840 }, { "epoch": 100.14, "learning_rate": 6.648543689320389e-06, "loss": 0.0951, "step": 257850 }, { "epoch": 100.14, "learning_rate": 6.648025889967638e-06, "loss": 0.0869, "step": 257860 }, { "epoch": 100.14, "learning_rate": 6.647508090614887e-06, "loss": 0.0631, "step": 257870 }, { "epoch": 100.15, "learning_rate": 6.646990291262137e-06, "loss": 0.0146, "step": 257880 }, { "epoch": 100.15, "learning_rate": 6.646472491909386e-06, "loss": 0.1394, "step": 257890 }, { "epoch": 100.16, "learning_rate": 6.645954692556635e-06, "loss": 0.0651, "step": 257900 }, { "epoch": 100.16, "learning_rate": 6.645436893203884e-06, "loss": 0.0429, "step": 257910 }, { "epoch": 100.16, "learning_rate": 6.644919093851133e-06, "loss": 0.0071, "step": 257920 }, { "epoch": 100.17, "learning_rate": 6.6444012944983824e-06, "loss": 0.034, "step": 257930 }, { "epoch": 100.17, "learning_rate": 6.643883495145632e-06, "loss": 0.0249, "step": 257940 }, { "epoch": 100.17, "learning_rate": 6.643365695792881e-06, "loss": 0.0549, "step": 257950 }, { "epoch": 100.18, "learning_rate": 6.64284789644013e-06, "loss": 0.0049, "step": 257960 }, { "epoch": 100.18, "learning_rate": 6.642330097087379e-06, "loss": 0.0825, "step": 257970 }, { "epoch": 100.19, "learning_rate": 6.641812297734629e-06, "loss": 0.0631, "step": 257980 }, { "epoch": 100.19, "learning_rate": 6.641294498381878e-06, "loss": 0.0599, "step": 257990 }, { "epoch": 100.19, "learning_rate": 6.640776699029126e-06, "loss": 0.0703, "step": 258000 }, { "epoch": 100.2, "learning_rate": 6.640258899676376e-06, "loss": 0.0203, "step": 258010 }, { "epoch": 100.2, "learning_rate": 6.639741100323626e-06, "loss": 0.0309, "step": 258020 }, { "epoch": 100.21, "learning_rate": 6.639223300970874e-06, "loss": 0.1547, "step": 258030 }, { "epoch": 100.21, "learning_rate": 6.638705501618123e-06, "loss": 0.0331, "step": 258040 }, { "epoch": 100.21, "learning_rate": 6.638187702265373e-06, "loss": 0.1308, "step": 258050 }, { "epoch": 100.22, "learning_rate": 6.637669902912622e-06, "loss": 0.0087, "step": 258060 }, { "epoch": 100.22, "learning_rate": 6.63715210355987e-06, "loss": 0.0462, "step": 258070 }, { "epoch": 100.23, "learning_rate": 6.63663430420712e-06, "loss": 0.0108, "step": 258080 }, { "epoch": 100.23, "learning_rate": 6.6361165048543695e-06, "loss": 0.0013, "step": 258090 }, { "epoch": 100.23, "learning_rate": 6.635598705501619e-06, "loss": 0.0728, "step": 258100 }, { "epoch": 100.24, "learning_rate": 6.635080906148867e-06, "loss": 0.0741, "step": 258110 }, { "epoch": 100.24, "learning_rate": 6.634563106796117e-06, "loss": 0.072, "step": 258120 }, { "epoch": 100.24, "learning_rate": 6.634045307443366e-06, "loss": 0.0549, "step": 258130 }, { "epoch": 100.25, "learning_rate": 6.633527508090616e-06, "loss": 0.0001, "step": 258140 }, { "epoch": 100.25, "learning_rate": 6.633009708737864e-06, "loss": 0.0572, "step": 258150 }, { "epoch": 100.26, "learning_rate": 6.6324919093851135e-06, "loss": 0.0986, "step": 258160 }, { "epoch": 100.26, "learning_rate": 6.631974110032363e-06, "loss": 0.0581, "step": 258170 }, { "epoch": 100.26, "learning_rate": 6.631456310679613e-06, "loss": 0.1109, "step": 258180 }, { "epoch": 100.27, "learning_rate": 6.630938511326861e-06, "loss": 0.001, "step": 258190 }, { "epoch": 100.27, "learning_rate": 6.63042071197411e-06, "loss": 0.0699, "step": 258200 }, { "epoch": 100.28, "learning_rate": 6.62990291262136e-06, "loss": 0.0379, "step": 258210 }, { "epoch": 100.28, "learning_rate": 6.6293851132686095e-06, "loss": 0.0303, "step": 258220 }, { "epoch": 100.28, "learning_rate": 6.6288673139158574e-06, "loss": 0.0425, "step": 258230 }, { "epoch": 100.29, "learning_rate": 6.628349514563107e-06, "loss": 0.0006, "step": 258240 }, { "epoch": 100.29, "learning_rate": 6.627831715210357e-06, "loss": 0.0973, "step": 258250 }, { "epoch": 100.3, "learning_rate": 6.627313915857606e-06, "loss": 0.0348, "step": 258260 }, { "epoch": 100.3, "learning_rate": 6.626796116504854e-06, "loss": 0.0489, "step": 258270 }, { "epoch": 100.3, "learning_rate": 6.626278317152104e-06, "loss": 0.0321, "step": 258280 }, { "epoch": 100.31, "learning_rate": 6.6257605177993534e-06, "loss": 0.1461, "step": 258290 }, { "epoch": 100.31, "learning_rate": 6.625242718446603e-06, "loss": 0.074, "step": 258300 }, { "epoch": 100.31, "learning_rate": 6.624724919093851e-06, "loss": 0.0729, "step": 258310 }, { "epoch": 100.32, "learning_rate": 6.624207119741101e-06, "loss": 0.0245, "step": 258320 }, { "epoch": 100.32, "learning_rate": 6.62368932038835e-06, "loss": 0.0669, "step": 258330 }, { "epoch": 100.33, "learning_rate": 6.6231715210356e-06, "loss": 0.0654, "step": 258340 }, { "epoch": 100.33, "learning_rate": 6.622653721682848e-06, "loss": 0.0088, "step": 258350 }, { "epoch": 100.33, "learning_rate": 6.622135922330097e-06, "loss": 0.0343, "step": 258360 }, { "epoch": 100.34, "learning_rate": 6.621618122977347e-06, "loss": 0.0964, "step": 258370 }, { "epoch": 100.34, "learning_rate": 6.621100323624597e-06, "loss": 0.0271, "step": 258380 }, { "epoch": 100.35, "learning_rate": 6.6205825242718445e-06, "loss": 0.0734, "step": 258390 }, { "epoch": 100.35, "learning_rate": 6.620064724919094e-06, "loss": 0.125, "step": 258400 }, { "epoch": 100.35, "learning_rate": 6.619546925566344e-06, "loss": 0.0087, "step": 258410 }, { "epoch": 100.36, "learning_rate": 6.619029126213593e-06, "loss": 0.1169, "step": 258420 }, { "epoch": 100.36, "learning_rate": 6.618511326860841e-06, "loss": 0.0255, "step": 258430 }, { "epoch": 100.37, "learning_rate": 6.617993527508091e-06, "loss": 0.0347, "step": 258440 }, { "epoch": 100.37, "learning_rate": 6.6174757281553406e-06, "loss": 0.0844, "step": 258450 }, { "epoch": 100.37, "learning_rate": 6.61695792880259e-06, "loss": 0.014, "step": 258460 }, { "epoch": 100.38, "learning_rate": 6.616440129449839e-06, "loss": 0.0358, "step": 258470 }, { "epoch": 100.38, "learning_rate": 6.615922330097088e-06, "loss": 0.0538, "step": 258480 }, { "epoch": 100.38, "learning_rate": 6.615404530744337e-06, "loss": 0.0293, "step": 258490 }, { "epoch": 100.39, "learning_rate": 6.614886731391587e-06, "loss": 0.0487, "step": 258500 }, { "epoch": 100.39, "learning_rate": 6.614368932038836e-06, "loss": 0.05, "step": 258510 }, { "epoch": 100.4, "learning_rate": 6.6138511326860845e-06, "loss": 0.0174, "step": 258520 }, { "epoch": 100.4, "learning_rate": 6.613333333333334e-06, "loss": 0.0465, "step": 258530 }, { "epoch": 100.4, "learning_rate": 6.612815533980583e-06, "loss": 0.052, "step": 258540 }, { "epoch": 100.41, "learning_rate": 6.6122977346278325e-06, "loss": 0.0247, "step": 258550 }, { "epoch": 100.41, "learning_rate": 6.611779935275081e-06, "loss": 0.0581, "step": 258560 }, { "epoch": 100.42, "learning_rate": 6.611262135922331e-06, "loss": 0.0283, "step": 258570 }, { "epoch": 100.42, "learning_rate": 6.61074433656958e-06, "loss": 0.0264, "step": 258580 }, { "epoch": 100.42, "learning_rate": 6.610226537216829e-06, "loss": 0.0016, "step": 258590 }, { "epoch": 100.43, "learning_rate": 6.609708737864078e-06, "loss": 0.1041, "step": 258600 }, { "epoch": 100.43, "learning_rate": 6.609190938511328e-06, "loss": 0.1571, "step": 258610 }, { "epoch": 100.43, "learning_rate": 6.6086731391585764e-06, "loss": 0.0002, "step": 258620 }, { "epoch": 100.44, "learning_rate": 6.608155339805826e-06, "loss": 0.0557, "step": 258630 }, { "epoch": 100.44, "learning_rate": 6.607637540453075e-06, "loss": 0.1335, "step": 258640 }, { "epoch": 100.45, "learning_rate": 6.6071197411003244e-06, "loss": 0.0007, "step": 258650 }, { "epoch": 100.45, "learning_rate": 6.606601941747573e-06, "loss": 0.0814, "step": 258660 }, { "epoch": 100.45, "learning_rate": 6.606084142394823e-06, "loss": 0.0422, "step": 258670 }, { "epoch": 100.46, "learning_rate": 6.605566343042072e-06, "loss": 0.1398, "step": 258680 }, { "epoch": 100.46, "learning_rate": 6.605048543689321e-06, "loss": 0.0104, "step": 258690 }, { "epoch": 100.47, "learning_rate": 6.60453074433657e-06, "loss": 0.0812, "step": 258700 }, { "epoch": 100.47, "learning_rate": 6.60401294498382e-06, "loss": 0.0538, "step": 258710 }, { "epoch": 100.47, "learning_rate": 6.603495145631068e-06, "loss": 0.0881, "step": 258720 }, { "epoch": 100.48, "learning_rate": 6.602977346278317e-06, "loss": 0.1046, "step": 258730 }, { "epoch": 100.48, "learning_rate": 6.602459546925567e-06, "loss": 0.1103, "step": 258740 }, { "epoch": 100.49, "learning_rate": 6.601941747572816e-06, "loss": 0.0117, "step": 258750 }, { "epoch": 100.49, "learning_rate": 6.601423948220065e-06, "loss": 0.0934, "step": 258760 }, { "epoch": 100.49, "learning_rate": 6.600906148867314e-06, "loss": 0.0576, "step": 258770 }, { "epoch": 100.5, "learning_rate": 6.6003883495145635e-06, "loss": 0.0319, "step": 258780 }, { "epoch": 100.5, "learning_rate": 6.599870550161813e-06, "loss": 0.0234, "step": 258790 }, { "epoch": 100.5, "learning_rate": 6.599352750809062e-06, "loss": 0.0541, "step": 258800 }, { "epoch": 100.51, "learning_rate": 6.598834951456311e-06, "loss": 0.0048, "step": 258810 }, { "epoch": 100.51, "learning_rate": 6.59831715210356e-06, "loss": 0.0596, "step": 258820 }, { "epoch": 100.52, "learning_rate": 6.59779935275081e-06, "loss": 0.0295, "step": 258830 }, { "epoch": 100.52, "learning_rate": 6.597281553398059e-06, "loss": 0.0008, "step": 258840 }, { "epoch": 100.52, "learning_rate": 6.5967637540453075e-06, "loss": 0.0562, "step": 258850 }, { "epoch": 100.53, "learning_rate": 6.596245954692557e-06, "loss": 0.0185, "step": 258860 }, { "epoch": 100.53, "learning_rate": 6.595728155339807e-06, "loss": 0.0597, "step": 258870 }, { "epoch": 100.54, "learning_rate": 6.595210355987055e-06, "loss": 0.0002, "step": 258880 }, { "epoch": 100.54, "learning_rate": 6.594692556634304e-06, "loss": 0.0612, "step": 258890 }, { "epoch": 100.54, "learning_rate": 6.594174757281554e-06, "loss": 0.0883, "step": 258900 }, { "epoch": 100.55, "learning_rate": 6.5936569579288035e-06, "loss": 0.0046, "step": 258910 }, { "epoch": 100.55, "learning_rate": 6.593139158576051e-06, "loss": 0.0002, "step": 258920 }, { "epoch": 100.56, "learning_rate": 6.592621359223301e-06, "loss": 0.0116, "step": 258930 }, { "epoch": 100.56, "learning_rate": 6.592103559870551e-06, "loss": 0.1956, "step": 258940 }, { "epoch": 100.56, "learning_rate": 6.5915857605178e-06, "loss": 0.0337, "step": 258950 }, { "epoch": 100.57, "learning_rate": 6.591067961165048e-06, "loss": 0.111, "step": 258960 }, { "epoch": 100.57, "learning_rate": 6.590550161812298e-06, "loss": 0.003, "step": 258970 }, { "epoch": 100.57, "learning_rate": 6.5900323624595474e-06, "loss": 0.1574, "step": 258980 }, { "epoch": 100.58, "learning_rate": 6.589514563106797e-06, "loss": 0.0117, "step": 258990 }, { "epoch": 100.58, "learning_rate": 6.588996763754047e-06, "loss": 0.0059, "step": 259000 }, { "epoch": 100.59, "learning_rate": 6.588478964401295e-06, "loss": 0.0266, "step": 259010 }, { "epoch": 100.59, "learning_rate": 6.587961165048544e-06, "loss": 0.0228, "step": 259020 }, { "epoch": 100.59, "learning_rate": 6.587443365695794e-06, "loss": 0.1159, "step": 259030 }, { "epoch": 100.6, "learning_rate": 6.5869255663430435e-06, "loss": 0.1334, "step": 259040 }, { "epoch": 100.6, "learning_rate": 6.586407766990291e-06, "loss": 0.0112, "step": 259050 }, { "epoch": 100.61, "learning_rate": 6.585889967637541e-06, "loss": 0.0811, "step": 259060 }, { "epoch": 100.61, "learning_rate": 6.585372168284791e-06, "loss": 0.014, "step": 259070 }, { "epoch": 100.61, "learning_rate": 6.58485436893204e-06, "loss": 0.1534, "step": 259080 }, { "epoch": 100.62, "learning_rate": 6.584336569579288e-06, "loss": 0.0251, "step": 259090 }, { "epoch": 100.62, "learning_rate": 6.583818770226538e-06, "loss": 0.0001, "step": 259100 }, { "epoch": 100.63, "learning_rate": 6.583300970873787e-06, "loss": 0.068, "step": 259110 }, { "epoch": 100.63, "learning_rate": 6.582783171521037e-06, "loss": 0.0035, "step": 259120 }, { "epoch": 100.63, "learning_rate": 6.582265372168285e-06, "loss": 0.076, "step": 259130 }, { "epoch": 100.64, "learning_rate": 6.5817475728155345e-06, "loss": 0.0814, "step": 259140 }, { "epoch": 100.64, "learning_rate": 6.581229773462784e-06, "loss": 0.0164, "step": 259150 }, { "epoch": 100.64, "learning_rate": 6.580711974110034e-06, "loss": 0.0636, "step": 259160 }, { "epoch": 100.65, "learning_rate": 6.580194174757282e-06, "loss": 0.0896, "step": 259170 }, { "epoch": 100.65, "learning_rate": 6.579676375404531e-06, "loss": 0.1134, "step": 259180 }, { "epoch": 100.66, "learning_rate": 6.579158576051781e-06, "loss": 0.0006, "step": 259190 }, { "epoch": 100.66, "learning_rate": 6.57864077669903e-06, "loss": 0.0581, "step": 259200 }, { "epoch": 100.66, "learning_rate": 6.5781229773462785e-06, "loss": 0.0419, "step": 259210 }, { "epoch": 100.67, "learning_rate": 6.577605177993528e-06, "loss": 0.0681, "step": 259220 }, { "epoch": 100.67, "learning_rate": 6.577087378640778e-06, "loss": 0.1661, "step": 259230 }, { "epoch": 100.68, "learning_rate": 6.5765695792880265e-06, "loss": 0.0506, "step": 259240 }, { "epoch": 100.68, "learning_rate": 6.576051779935275e-06, "loss": 0.0099, "step": 259250 }, { "epoch": 100.68, "learning_rate": 6.575533980582525e-06, "loss": 0.0156, "step": 259260 }, { "epoch": 100.69, "learning_rate": 6.5750161812297745e-06, "loss": 0.0246, "step": 259270 }, { "epoch": 100.69, "learning_rate": 6.574498381877023e-06, "loss": 0.0429, "step": 259280 }, { "epoch": 100.7, "learning_rate": 6.573980582524272e-06, "loss": 0.1408, "step": 259290 }, { "epoch": 100.7, "learning_rate": 6.573462783171522e-06, "loss": 0.0536, "step": 259300 }, { "epoch": 100.7, "learning_rate": 6.572944983818771e-06, "loss": 0.1049, "step": 259310 }, { "epoch": 100.71, "learning_rate": 6.57242718446602e-06, "loss": 0.0055, "step": 259320 }, { "epoch": 100.71, "learning_rate": 6.571909385113269e-06, "loss": 0.0001, "step": 259330 }, { "epoch": 100.71, "learning_rate": 6.5713915857605184e-06, "loss": 0.0001, "step": 259340 }, { "epoch": 100.72, "learning_rate": 6.570873786407767e-06, "loss": 0.0684, "step": 259350 }, { "epoch": 100.72, "learning_rate": 6.570355987055017e-06, "loss": 0.0013, "step": 259360 }, { "epoch": 100.73, "learning_rate": 6.569838187702266e-06, "loss": 0.0286, "step": 259370 }, { "epoch": 100.73, "learning_rate": 6.569320388349515e-06, "loss": 0.0262, "step": 259380 }, { "epoch": 100.73, "learning_rate": 6.568802588996764e-06, "loss": 0.1304, "step": 259390 }, { "epoch": 100.74, "learning_rate": 6.568284789644014e-06, "loss": 0.0857, "step": 259400 }, { "epoch": 100.74, "learning_rate": 6.567766990291262e-06, "loss": 0.003, "step": 259410 }, { "epoch": 100.75, "learning_rate": 6.567249190938512e-06, "loss": 0.0319, "step": 259420 }, { "epoch": 100.75, "learning_rate": 6.566731391585761e-06, "loss": 0.1103, "step": 259430 }, { "epoch": 100.75, "learning_rate": 6.56621359223301e-06, "loss": 0.0027, "step": 259440 }, { "epoch": 100.76, "learning_rate": 6.565695792880259e-06, "loss": 0.0349, "step": 259450 }, { "epoch": 100.76, "learning_rate": 6.565177993527509e-06, "loss": 0.0458, "step": 259460 }, { "epoch": 100.77, "learning_rate": 6.5646601941747575e-06, "loss": 0.0272, "step": 259470 }, { "epoch": 100.77, "learning_rate": 6.564142394822007e-06, "loss": 0.0027, "step": 259480 }, { "epoch": 100.77, "learning_rate": 6.563624595469256e-06, "loss": 0.0578, "step": 259490 }, { "epoch": 100.78, "learning_rate": 6.5631067961165056e-06, "loss": 0.0938, "step": 259500 }, { "epoch": 100.78, "learning_rate": 6.562588996763754e-06, "loss": 0.0548, "step": 259510 }, { "epoch": 100.78, "learning_rate": 6.562071197411004e-06, "loss": 0.107, "step": 259520 }, { "epoch": 100.79, "learning_rate": 6.561553398058253e-06, "loss": 0.0128, "step": 259530 }, { "epoch": 100.79, "learning_rate": 6.5610355987055015e-06, "loss": 0.0516, "step": 259540 }, { "epoch": 100.8, "learning_rate": 6.560517799352751e-06, "loss": 0.1079, "step": 259550 }, { "epoch": 100.8, "learning_rate": 6.560000000000001e-06, "loss": 0.0758, "step": 259560 }, { "epoch": 100.8, "learning_rate": 6.55948220064725e-06, "loss": 0.0125, "step": 259570 }, { "epoch": 100.81, "learning_rate": 6.558964401294498e-06, "loss": 0.0321, "step": 259580 }, { "epoch": 100.81, "learning_rate": 6.558446601941748e-06, "loss": 0.0373, "step": 259590 }, { "epoch": 100.82, "learning_rate": 6.5579288025889975e-06, "loss": 0.0557, "step": 259600 }, { "epoch": 100.82, "learning_rate": 6.557411003236247e-06, "loss": 0.0451, "step": 259610 }, { "epoch": 100.82, "learning_rate": 6.556893203883495e-06, "loss": 0.0065, "step": 259620 }, { "epoch": 100.83, "learning_rate": 6.556375404530745e-06, "loss": 0.0052, "step": 259630 }, { "epoch": 100.83, "learning_rate": 6.555857605177994e-06, "loss": 0.0098, "step": 259640 }, { "epoch": 100.83, "learning_rate": 6.555339805825244e-06, "loss": 0.0198, "step": 259650 }, { "epoch": 100.84, "learning_rate": 6.554822006472492e-06, "loss": 0.0756, "step": 259660 }, { "epoch": 100.84, "learning_rate": 6.5543042071197414e-06, "loss": 0.0005, "step": 259670 }, { "epoch": 100.85, "learning_rate": 6.553786407766991e-06, "loss": 0.0655, "step": 259680 }, { "epoch": 100.85, "learning_rate": 6.553268608414241e-06, "loss": 0.0127, "step": 259690 }, { "epoch": 100.85, "learning_rate": 6.552750809061489e-06, "loss": 0.0925, "step": 259700 }, { "epoch": 100.86, "learning_rate": 6.552233009708738e-06, "loss": 0.0102, "step": 259710 }, { "epoch": 100.86, "learning_rate": 6.551715210355988e-06, "loss": 0.0392, "step": 259720 }, { "epoch": 100.87, "learning_rate": 6.5511974110032375e-06, "loss": 0.0585, "step": 259730 }, { "epoch": 100.87, "learning_rate": 6.550679611650485e-06, "loss": 0.219, "step": 259740 }, { "epoch": 100.87, "learning_rate": 6.550161812297735e-06, "loss": 0.0001, "step": 259750 }, { "epoch": 100.88, "learning_rate": 6.549644012944985e-06, "loss": 0.0187, "step": 259760 }, { "epoch": 100.88, "learning_rate": 6.549126213592234e-06, "loss": 0.0712, "step": 259770 }, { "epoch": 100.89, "learning_rate": 6.548608414239482e-06, "loss": 0.0862, "step": 259780 }, { "epoch": 100.89, "learning_rate": 6.548090614886732e-06, "loss": 0.0591, "step": 259790 }, { "epoch": 100.89, "learning_rate": 6.547572815533981e-06, "loss": 0.0105, "step": 259800 }, { "epoch": 100.9, "learning_rate": 6.547055016181231e-06, "loss": 0.0107, "step": 259810 }, { "epoch": 100.9, "learning_rate": 6.546537216828479e-06, "loss": 0.0348, "step": 259820 }, { "epoch": 100.9, "learning_rate": 6.5460194174757285e-06, "loss": 0.0984, "step": 259830 }, { "epoch": 100.91, "learning_rate": 6.545501618122978e-06, "loss": 0.0028, "step": 259840 }, { "epoch": 100.91, "learning_rate": 6.544983818770228e-06, "loss": 0.0656, "step": 259850 }, { "epoch": 100.92, "learning_rate": 6.544466019417476e-06, "loss": 0.1759, "step": 259860 }, { "epoch": 100.92, "learning_rate": 6.543948220064725e-06, "loss": 0.1096, "step": 259870 }, { "epoch": 100.92, "learning_rate": 6.543430420711975e-06, "loss": 0.0824, "step": 259880 }, { "epoch": 100.93, "learning_rate": 6.5429126213592246e-06, "loss": 0.0714, "step": 259890 }, { "epoch": 100.93, "learning_rate": 6.5423948220064725e-06, "loss": 0.0373, "step": 259900 }, { "epoch": 100.94, "learning_rate": 6.541877022653722e-06, "loss": 0.0671, "step": 259910 }, { "epoch": 100.94, "learning_rate": 6.541359223300972e-06, "loss": 0.0243, "step": 259920 }, { "epoch": 100.94, "learning_rate": 6.540841423948221e-06, "loss": 0.001, "step": 259930 }, { "epoch": 100.95, "learning_rate": 6.540323624595469e-06, "loss": 0.0847, "step": 259940 }, { "epoch": 100.95, "learning_rate": 6.539805825242719e-06, "loss": 0.0469, "step": 259950 }, { "epoch": 100.96, "learning_rate": 6.5392880258899685e-06, "loss": 0.039, "step": 259960 }, { "epoch": 100.96, "learning_rate": 6.538770226537218e-06, "loss": 0.0041, "step": 259970 }, { "epoch": 100.96, "learning_rate": 6.538252427184466e-06, "loss": 0.0717, "step": 259980 }, { "epoch": 100.97, "learning_rate": 6.537734627831716e-06, "loss": 0.0019, "step": 259990 }, { "epoch": 100.97, "learning_rate": 6.537216828478965e-06, "loss": 0.0039, "step": 260000 }, { "epoch": 100.97, "learning_rate": 6.536699029126214e-06, "loss": 0.0344, "step": 260010 }, { "epoch": 100.98, "learning_rate": 6.536181229773463e-06, "loss": 0.0943, "step": 260020 }, { "epoch": 100.98, "learning_rate": 6.5356634304207124e-06, "loss": 0.0077, "step": 260030 }, { "epoch": 100.99, "learning_rate": 6.535145631067962e-06, "loss": 0.1502, "step": 260040 }, { "epoch": 100.99, "learning_rate": 6.534627831715211e-06, "loss": 0.1233, "step": 260050 }, { "epoch": 100.99, "learning_rate": 6.53411003236246e-06, "loss": 0.0772, "step": 260060 }, { "epoch": 101.0, "learning_rate": 6.533592233009709e-06, "loss": 0.0951, "step": 260070 }, { "epoch": 101.0, "eval_accuracy": 0.9491059147180193, "eval_loss": 0.3718360364437103, "eval_runtime": 8.2256, "eval_samples_per_second": 441.912, "eval_steps_per_second": 55.315, "step": 260075 }, { "epoch": 101.0, "learning_rate": 6.533074433656959e-06, "loss": 0.0625, "step": 260080 }, { "epoch": 101.01, "learning_rate": 6.532556634304208e-06, "loss": 0.2079, "step": 260090 }, { "epoch": 101.01, "learning_rate": 6.532038834951456e-06, "loss": 0.0924, "step": 260100 }, { "epoch": 101.01, "learning_rate": 6.531521035598706e-06, "loss": 0.1342, "step": 260110 }, { "epoch": 101.02, "learning_rate": 6.531003236245956e-06, "loss": 0.0001, "step": 260120 }, { "epoch": 101.02, "learning_rate": 6.530485436893204e-06, "loss": 0.0028, "step": 260130 }, { "epoch": 101.03, "learning_rate": 6.529967637540454e-06, "loss": 0.0665, "step": 260140 }, { "epoch": 101.03, "learning_rate": 6.529449838187703e-06, "loss": 0.0387, "step": 260150 }, { "epoch": 101.03, "learning_rate": 6.528932038834952e-06, "loss": 0.0073, "step": 260160 }, { "epoch": 101.04, "learning_rate": 6.528414239482201e-06, "loss": 0.0611, "step": 260170 }, { "epoch": 101.04, "learning_rate": 6.527896440129451e-06, "loss": 0.0693, "step": 260180 }, { "epoch": 101.04, "learning_rate": 6.5273786407766996e-06, "loss": 0.0398, "step": 260190 }, { "epoch": 101.05, "learning_rate": 6.526860841423948e-06, "loss": 0.1098, "step": 260200 }, { "epoch": 101.05, "learning_rate": 6.526343042071198e-06, "loss": 0.0138, "step": 260210 }, { "epoch": 101.06, "learning_rate": 6.5258252427184476e-06, "loss": 0.0628, "step": 260220 }, { "epoch": 101.06, "learning_rate": 6.525307443365696e-06, "loss": 0.1302, "step": 260230 }, { "epoch": 101.06, "learning_rate": 6.524789644012945e-06, "loss": 0.0366, "step": 260240 }, { "epoch": 101.07, "learning_rate": 6.524271844660195e-06, "loss": 0.0537, "step": 260250 }, { "epoch": 101.07, "learning_rate": 6.523754045307444e-06, "loss": 0.01, "step": 260260 }, { "epoch": 101.08, "learning_rate": 6.523236245954693e-06, "loss": 0.082, "step": 260270 }, { "epoch": 101.08, "learning_rate": 6.522718446601942e-06, "loss": 0.0397, "step": 260280 }, { "epoch": 101.08, "learning_rate": 6.5222006472491915e-06, "loss": 0.011, "step": 260290 }, { "epoch": 101.09, "learning_rate": 6.521682847896441e-06, "loss": 0.0161, "step": 260300 }, { "epoch": 101.09, "learning_rate": 6.52116504854369e-06, "loss": 0.0405, "step": 260310 }, { "epoch": 101.1, "learning_rate": 6.520647249190939e-06, "loss": 0.0653, "step": 260320 }, { "epoch": 101.1, "learning_rate": 6.520129449838188e-06, "loss": 0.0371, "step": 260330 }, { "epoch": 101.1, "learning_rate": 6.519611650485438e-06, "loss": 0.1711, "step": 260340 }, { "epoch": 101.11, "learning_rate": 6.519093851132686e-06, "loss": 0.0118, "step": 260350 }, { "epoch": 101.11, "learning_rate": 6.5185760517799354e-06, "loss": 0.0007, "step": 260360 }, { "epoch": 101.11, "learning_rate": 6.518058252427185e-06, "loss": 0.06, "step": 260370 }, { "epoch": 101.12, "learning_rate": 6.517540453074435e-06, "loss": 0.0146, "step": 260380 }, { "epoch": 101.12, "learning_rate": 6.517022653721683e-06, "loss": 0.0032, "step": 260390 }, { "epoch": 101.13, "learning_rate": 6.516504854368932e-06, "loss": 0.0894, "step": 260400 }, { "epoch": 101.13, "learning_rate": 6.515987055016182e-06, "loss": 0.137, "step": 260410 }, { "epoch": 101.13, "learning_rate": 6.5154692556634314e-06, "loss": 0.1694, "step": 260420 }, { "epoch": 101.14, "learning_rate": 6.514951456310679e-06, "loss": 0.1914, "step": 260430 }, { "epoch": 101.14, "learning_rate": 6.514433656957929e-06, "loss": 0.0391, "step": 260440 }, { "epoch": 101.15, "learning_rate": 6.513915857605179e-06, "loss": 0.0759, "step": 260450 }, { "epoch": 101.15, "learning_rate": 6.513398058252428e-06, "loss": 0.0417, "step": 260460 }, { "epoch": 101.15, "learning_rate": 6.512880258899676e-06, "loss": 0.0577, "step": 260470 }, { "epoch": 101.16, "learning_rate": 6.512362459546926e-06, "loss": 0.0253, "step": 260480 }, { "epoch": 101.16, "learning_rate": 6.511844660194175e-06, "loss": 0.0267, "step": 260490 }, { "epoch": 101.17, "learning_rate": 6.511326860841425e-06, "loss": 0.0193, "step": 260500 }, { "epoch": 101.17, "learning_rate": 6.510809061488673e-06, "loss": 0.0241, "step": 260510 }, { "epoch": 101.17, "learning_rate": 6.5102912621359225e-06, "loss": 0.0926, "step": 260520 }, { "epoch": 101.18, "learning_rate": 6.509773462783172e-06, "loss": 0.2238, "step": 260530 }, { "epoch": 101.18, "learning_rate": 6.509255663430422e-06, "loss": 0.0012, "step": 260540 }, { "epoch": 101.18, "learning_rate": 6.50873786407767e-06, "loss": 0.0644, "step": 260550 }, { "epoch": 101.19, "learning_rate": 6.508220064724919e-06, "loss": 0.0732, "step": 260560 }, { "epoch": 101.19, "learning_rate": 6.507702265372169e-06, "loss": 0.0089, "step": 260570 }, { "epoch": 101.2, "learning_rate": 6.5071844660194186e-06, "loss": 0.0124, "step": 260580 }, { "epoch": 101.2, "learning_rate": 6.5066666666666665e-06, "loss": 0.0913, "step": 260590 }, { "epoch": 101.2, "learning_rate": 6.506148867313916e-06, "loss": 0.027, "step": 260600 }, { "epoch": 101.21, "learning_rate": 6.505631067961166e-06, "loss": 0.1625, "step": 260610 }, { "epoch": 101.21, "learning_rate": 6.505113268608415e-06, "loss": 0.0025, "step": 260620 }, { "epoch": 101.22, "learning_rate": 6.504595469255663e-06, "loss": 0.0857, "step": 260630 }, { "epoch": 101.22, "learning_rate": 6.504077669902913e-06, "loss": 0.0254, "step": 260640 }, { "epoch": 101.22, "learning_rate": 6.5035598705501625e-06, "loss": 0.002, "step": 260650 }, { "epoch": 101.23, "learning_rate": 6.503042071197412e-06, "loss": 0.0567, "step": 260660 }, { "epoch": 101.23, "learning_rate": 6.50252427184466e-06, "loss": 0.0062, "step": 260670 }, { "epoch": 101.23, "learning_rate": 6.50200647249191e-06, "loss": 0.0776, "step": 260680 }, { "epoch": 101.24, "learning_rate": 6.501488673139159e-06, "loss": 0.0815, "step": 260690 }, { "epoch": 101.24, "learning_rate": 6.500970873786409e-06, "loss": 0.069, "step": 260700 }, { "epoch": 101.25, "learning_rate": 6.500453074433658e-06, "loss": 0.2334, "step": 260710 }, { "epoch": 101.25, "learning_rate": 6.4999352750809064e-06, "loss": 0.0279, "step": 260720 }, { "epoch": 101.25, "learning_rate": 6.499417475728156e-06, "loss": 0.0022, "step": 260730 }, { "epoch": 101.26, "learning_rate": 6.498899676375406e-06, "loss": 0.013, "step": 260740 }, { "epoch": 101.26, "learning_rate": 6.4983818770226544e-06, "loss": 0.0742, "step": 260750 }, { "epoch": 101.27, "learning_rate": 6.497864077669903e-06, "loss": 0.075, "step": 260760 }, { "epoch": 101.27, "learning_rate": 6.497346278317153e-06, "loss": 0.139, "step": 260770 }, { "epoch": 101.27, "learning_rate": 6.4968284789644025e-06, "loss": 0.0309, "step": 260780 }, { "epoch": 101.28, "learning_rate": 6.496310679611651e-06, "loss": 0.0562, "step": 260790 }, { "epoch": 101.28, "learning_rate": 6.4957928802589e-06, "loss": 0.0088, "step": 260800 }, { "epoch": 101.29, "learning_rate": 6.49527508090615e-06, "loss": 0.0233, "step": 260810 }, { "epoch": 101.29, "learning_rate": 6.494757281553398e-06, "loss": 0.0003, "step": 260820 }, { "epoch": 101.29, "learning_rate": 6.494239482200648e-06, "loss": 0.0686, "step": 260830 }, { "epoch": 101.3, "learning_rate": 6.493721682847897e-06, "loss": 0.001, "step": 260840 }, { "epoch": 101.3, "learning_rate": 6.493203883495146e-06, "loss": 0.0002, "step": 260850 }, { "epoch": 101.3, "learning_rate": 6.492686084142395e-06, "loss": 0.0517, "step": 260860 }, { "epoch": 101.31, "learning_rate": 6.492168284789645e-06, "loss": 0.0898, "step": 260870 }, { "epoch": 101.31, "learning_rate": 6.4916504854368935e-06, "loss": 0.1048, "step": 260880 }, { "epoch": 101.32, "learning_rate": 6.491132686084143e-06, "loss": 0.0551, "step": 260890 }, { "epoch": 101.32, "learning_rate": 6.490614886731392e-06, "loss": 0.0155, "step": 260900 }, { "epoch": 101.32, "learning_rate": 6.4900970873786416e-06, "loss": 0.0742, "step": 260910 }, { "epoch": 101.33, "learning_rate": 6.48957928802589e-06, "loss": 0.0148, "step": 260920 }, { "epoch": 101.33, "learning_rate": 6.48906148867314e-06, "loss": 0.0128, "step": 260930 }, { "epoch": 101.34, "learning_rate": 6.488543689320389e-06, "loss": 0.064, "step": 260940 }, { "epoch": 101.34, "learning_rate": 6.488025889967638e-06, "loss": 0.1186, "step": 260950 }, { "epoch": 101.34, "learning_rate": 6.487508090614887e-06, "loss": 0.0344, "step": 260960 }, { "epoch": 101.35, "learning_rate": 6.486990291262137e-06, "loss": 0.0011, "step": 260970 }, { "epoch": 101.35, "learning_rate": 6.4864724919093855e-06, "loss": 0.1031, "step": 260980 }, { "epoch": 101.36, "learning_rate": 6.485954692556635e-06, "loss": 0.0837, "step": 260990 }, { "epoch": 101.36, "learning_rate": 6.485436893203884e-06, "loss": 0.031, "step": 261000 }, { "epoch": 101.36, "learning_rate": 6.484919093851133e-06, "loss": 0.0005, "step": 261010 }, { "epoch": 101.37, "learning_rate": 6.484401294498382e-06, "loss": 0.0142, "step": 261020 }, { "epoch": 101.37, "learning_rate": 6.483883495145632e-06, "loss": 0.0277, "step": 261030 }, { "epoch": 101.37, "learning_rate": 6.483365695792881e-06, "loss": 0.0152, "step": 261040 }, { "epoch": 101.38, "learning_rate": 6.4828478964401294e-06, "loss": 0.0053, "step": 261050 }, { "epoch": 101.38, "learning_rate": 6.482330097087379e-06, "loss": 0.0352, "step": 261060 }, { "epoch": 101.39, "learning_rate": 6.481812297734629e-06, "loss": 0.0566, "step": 261070 }, { "epoch": 101.39, "learning_rate": 6.4812944983818774e-06, "loss": 0.1071, "step": 261080 }, { "epoch": 101.39, "learning_rate": 6.480776699029126e-06, "loss": 0.0467, "step": 261090 }, { "epoch": 101.4, "learning_rate": 6.480258899676376e-06, "loss": 0.0385, "step": 261100 }, { "epoch": 101.4, "learning_rate": 6.4797411003236254e-06, "loss": 0.0001, "step": 261110 }, { "epoch": 101.41, "learning_rate": 6.479223300970874e-06, "loss": 0.0005, "step": 261120 }, { "epoch": 101.41, "learning_rate": 6.478705501618123e-06, "loss": 0.0871, "step": 261130 }, { "epoch": 101.41, "learning_rate": 6.478187702265373e-06, "loss": 0.0675, "step": 261140 }, { "epoch": 101.42, "learning_rate": 6.477669902912622e-06, "loss": 0.0334, "step": 261150 }, { "epoch": 101.42, "learning_rate": 6.47715210355987e-06, "loss": 0.1113, "step": 261160 }, { "epoch": 101.43, "learning_rate": 6.47663430420712e-06, "loss": 0.0889, "step": 261170 }, { "epoch": 101.43, "learning_rate": 6.476116504854369e-06, "loss": 0.0134, "step": 261180 }, { "epoch": 101.43, "learning_rate": 6.475598705501619e-06, "loss": 0.0553, "step": 261190 }, { "epoch": 101.44, "learning_rate": 6.475080906148867e-06, "loss": 0.0338, "step": 261200 }, { "epoch": 101.44, "learning_rate": 6.4745631067961165e-06, "loss": 0.0485, "step": 261210 }, { "epoch": 101.44, "learning_rate": 6.474045307443366e-06, "loss": 0.0078, "step": 261220 }, { "epoch": 101.45, "learning_rate": 6.473527508090616e-06, "loss": 0.0388, "step": 261230 }, { "epoch": 101.45, "learning_rate": 6.473009708737865e-06, "loss": 0.0339, "step": 261240 }, { "epoch": 101.46, "learning_rate": 6.472491909385113e-06, "loss": 0.0001, "step": 261250 }, { "epoch": 101.46, "learning_rate": 6.471974110032363e-06, "loss": 0.0613, "step": 261260 }, { "epoch": 101.46, "learning_rate": 6.4714563106796126e-06, "loss": 0.052, "step": 261270 }, { "epoch": 101.47, "learning_rate": 6.470938511326862e-06, "loss": 0.0125, "step": 261280 }, { "epoch": 101.47, "learning_rate": 6.47042071197411e-06, "loss": 0.0283, "step": 261290 }, { "epoch": 101.48, "learning_rate": 6.46990291262136e-06, "loss": 0.0268, "step": 261300 }, { "epoch": 101.48, "learning_rate": 6.469385113268609e-06, "loss": 0.0744, "step": 261310 }, { "epoch": 101.48, "learning_rate": 6.468867313915859e-06, "loss": 0.0133, "step": 261320 }, { "epoch": 101.49, "learning_rate": 6.468349514563107e-06, "loss": 0.0157, "step": 261330 }, { "epoch": 101.49, "learning_rate": 6.4678317152103565e-06, "loss": 0.0004, "step": 261340 }, { "epoch": 101.5, "learning_rate": 6.467313915857606e-06, "loss": 0.0636, "step": 261350 }, { "epoch": 101.5, "learning_rate": 6.466796116504856e-06, "loss": 0.0012, "step": 261360 }, { "epoch": 101.5, "learning_rate": 6.466278317152104e-06, "loss": 0.0099, "step": 261370 }, { "epoch": 101.51, "learning_rate": 6.465760517799353e-06, "loss": 0.0672, "step": 261380 }, { "epoch": 101.51, "learning_rate": 6.465242718446603e-06, "loss": 0.034, "step": 261390 }, { "epoch": 101.51, "learning_rate": 6.4647249190938525e-06, "loss": 0.0334, "step": 261400 }, { "epoch": 101.52, "learning_rate": 6.4642071197411004e-06, "loss": 0.0437, "step": 261410 }, { "epoch": 101.52, "learning_rate": 6.46368932038835e-06, "loss": 0.0195, "step": 261420 }, { "epoch": 101.53, "learning_rate": 6.4631715210356e-06, "loss": 0.1053, "step": 261430 }, { "epoch": 101.53, "learning_rate": 6.462653721682849e-06, "loss": 0.1218, "step": 261440 }, { "epoch": 101.53, "learning_rate": 6.462135922330097e-06, "loss": 0.1588, "step": 261450 }, { "epoch": 101.54, "learning_rate": 6.461618122977347e-06, "loss": 0.0456, "step": 261460 }, { "epoch": 101.54, "learning_rate": 6.4611003236245965e-06, "loss": 0.0834, "step": 261470 }, { "epoch": 101.55, "learning_rate": 6.460582524271845e-06, "loss": 0.0891, "step": 261480 }, { "epoch": 101.55, "learning_rate": 6.460064724919094e-06, "loss": 0.019, "step": 261490 }, { "epoch": 101.55, "learning_rate": 6.459546925566344e-06, "loss": 0.0843, "step": 261500 }, { "epoch": 101.56, "learning_rate": 6.459029126213593e-06, "loss": 0.0429, "step": 261510 }, { "epoch": 101.56, "learning_rate": 6.458511326860842e-06, "loss": 0.0432, "step": 261520 }, { "epoch": 101.57, "learning_rate": 6.457993527508091e-06, "loss": 0.0114, "step": 261530 }, { "epoch": 101.57, "learning_rate": 6.45747572815534e-06, "loss": 0.0212, "step": 261540 }, { "epoch": 101.57, "learning_rate": 6.45695792880259e-06, "loss": 0.0672, "step": 261550 }, { "epoch": 101.58, "learning_rate": 6.456440129449839e-06, "loss": 0.0461, "step": 261560 }, { "epoch": 101.58, "learning_rate": 6.4559223300970875e-06, "loss": 0.0417, "step": 261570 }, { "epoch": 101.58, "learning_rate": 6.455404530744337e-06, "loss": 0.0351, "step": 261580 }, { "epoch": 101.59, "learning_rate": 6.454886731391587e-06, "loss": 0.0339, "step": 261590 }, { "epoch": 101.59, "learning_rate": 6.4543689320388356e-06, "loss": 0.0456, "step": 261600 }, { "epoch": 101.6, "learning_rate": 6.453851132686084e-06, "loss": 0.0818, "step": 261610 }, { "epoch": 101.6, "learning_rate": 6.453333333333334e-06, "loss": 0.1812, "step": 261620 }, { "epoch": 101.6, "learning_rate": 6.452815533980583e-06, "loss": 0.0013, "step": 261630 }, { "epoch": 101.61, "learning_rate": 6.452297734627832e-06, "loss": 0.0102, "step": 261640 }, { "epoch": 101.61, "learning_rate": 6.451779935275081e-06, "loss": 0.054, "step": 261650 }, { "epoch": 101.62, "learning_rate": 6.451262135922331e-06, "loss": 0.0018, "step": 261660 }, { "epoch": 101.62, "learning_rate": 6.4507443365695795e-06, "loss": 0.0116, "step": 261670 }, { "epoch": 101.62, "learning_rate": 6.450226537216829e-06, "loss": 0.0269, "step": 261680 }, { "epoch": 101.63, "learning_rate": 6.449708737864078e-06, "loss": 0.0778, "step": 261690 }, { "epoch": 101.63, "learning_rate": 6.4491909385113275e-06, "loss": 0.1439, "step": 261700 }, { "epoch": 101.63, "learning_rate": 6.448673139158576e-06, "loss": 0.0009, "step": 261710 }, { "epoch": 101.64, "learning_rate": 6.448155339805826e-06, "loss": 0.0054, "step": 261720 }, { "epoch": 101.64, "learning_rate": 6.447637540453075e-06, "loss": 0.0003, "step": 261730 }, { "epoch": 101.65, "learning_rate": 6.447119741100324e-06, "loss": 0.0005, "step": 261740 }, { "epoch": 101.65, "learning_rate": 6.446601941747573e-06, "loss": 0.1327, "step": 261750 }, { "epoch": 101.65, "learning_rate": 6.446084142394823e-06, "loss": 0.1778, "step": 261760 }, { "epoch": 101.66, "learning_rate": 6.4455663430420714e-06, "loss": 0.0427, "step": 261770 }, { "epoch": 101.66, "learning_rate": 6.445048543689321e-06, "loss": 0.0199, "step": 261780 }, { "epoch": 101.67, "learning_rate": 6.44453074433657e-06, "loss": 0.0323, "step": 261790 }, { "epoch": 101.67, "learning_rate": 6.4440129449838194e-06, "loss": 0.0571, "step": 261800 }, { "epoch": 101.67, "learning_rate": 6.443495145631069e-06, "loss": 0.0635, "step": 261810 }, { "epoch": 101.68, "learning_rate": 6.442977346278317e-06, "loss": 0.0114, "step": 261820 }, { "epoch": 101.68, "learning_rate": 6.442459546925567e-06, "loss": 0.0149, "step": 261830 }, { "epoch": 101.69, "learning_rate": 6.441941747572816e-06, "loss": 0.0228, "step": 261840 }, { "epoch": 101.69, "learning_rate": 6.441423948220066e-06, "loss": 0.0149, "step": 261850 }, { "epoch": 101.69, "learning_rate": 6.440906148867314e-06, "loss": 0.0776, "step": 261860 }, { "epoch": 101.7, "learning_rate": 6.440388349514563e-06, "loss": 0.0004, "step": 261870 }, { "epoch": 101.7, "learning_rate": 6.439870550161813e-06, "loss": 0.0991, "step": 261880 }, { "epoch": 101.7, "learning_rate": 6.439352750809063e-06, "loss": 0.002, "step": 261890 }, { "epoch": 101.71, "learning_rate": 6.4388349514563105e-06, "loss": 0.0122, "step": 261900 }, { "epoch": 101.71, "learning_rate": 6.43831715210356e-06, "loss": 0.1291, "step": 261910 }, { "epoch": 101.72, "learning_rate": 6.43779935275081e-06, "loss": 0.0283, "step": 261920 }, { "epoch": 101.72, "learning_rate": 6.437281553398059e-06, "loss": 0.0799, "step": 261930 }, { "epoch": 101.72, "learning_rate": 6.436763754045307e-06, "loss": 0.0695, "step": 261940 }, { "epoch": 101.73, "learning_rate": 6.436245954692557e-06, "loss": 0.0316, "step": 261950 }, { "epoch": 101.73, "learning_rate": 6.4357281553398066e-06, "loss": 0.0452, "step": 261960 }, { "epoch": 101.74, "learning_rate": 6.435210355987056e-06, "loss": 0.0005, "step": 261970 }, { "epoch": 101.74, "learning_rate": 6.434692556634304e-06, "loss": 0.1686, "step": 261980 }, { "epoch": 101.74, "learning_rate": 6.434174757281554e-06, "loss": 0.2042, "step": 261990 }, { "epoch": 101.75, "learning_rate": 6.433656957928803e-06, "loss": 0.0458, "step": 262000 }, { "epoch": 101.75, "learning_rate": 6.433139158576053e-06, "loss": 0.0243, "step": 262010 }, { "epoch": 101.76, "learning_rate": 6.432621359223301e-06, "loss": 0.0002, "step": 262020 }, { "epoch": 101.76, "learning_rate": 6.4321035598705505e-06, "loss": 0.0117, "step": 262030 }, { "epoch": 101.76, "learning_rate": 6.4315857605178e-06, "loss": 0.1202, "step": 262040 }, { "epoch": 101.77, "learning_rate": 6.43106796116505e-06, "loss": 0.1342, "step": 262050 }, { "epoch": 101.77, "learning_rate": 6.430550161812298e-06, "loss": 0.0317, "step": 262060 }, { "epoch": 101.77, "learning_rate": 6.430032362459547e-06, "loss": 0.0523, "step": 262070 }, { "epoch": 101.78, "learning_rate": 6.429514563106797e-06, "loss": 0.0986, "step": 262080 }, { "epoch": 101.78, "learning_rate": 6.4289967637540465e-06, "loss": 0.0367, "step": 262090 }, { "epoch": 101.79, "learning_rate": 6.4284789644012944e-06, "loss": 0.0391, "step": 262100 }, { "epoch": 101.79, "learning_rate": 6.427961165048544e-06, "loss": 0.0331, "step": 262110 }, { "epoch": 101.79, "learning_rate": 6.427443365695794e-06, "loss": 0.0256, "step": 262120 }, { "epoch": 101.8, "learning_rate": 6.426925566343043e-06, "loss": 0.0633, "step": 262130 }, { "epoch": 101.8, "learning_rate": 6.426407766990291e-06, "loss": 0.0745, "step": 262140 }, { "epoch": 101.81, "learning_rate": 6.425889967637541e-06, "loss": 0.0193, "step": 262150 }, { "epoch": 101.81, "learning_rate": 6.4253721682847904e-06, "loss": 0.0756, "step": 262160 }, { "epoch": 101.81, "learning_rate": 6.42485436893204e-06, "loss": 0.0923, "step": 262170 }, { "epoch": 101.82, "learning_rate": 6.424336569579288e-06, "loss": 0.0213, "step": 262180 }, { "epoch": 101.82, "learning_rate": 6.423818770226538e-06, "loss": 0.0083, "step": 262190 }, { "epoch": 101.83, "learning_rate": 6.423300970873787e-06, "loss": 0.0084, "step": 262200 }, { "epoch": 101.83, "learning_rate": 6.422783171521037e-06, "loss": 0.0106, "step": 262210 }, { "epoch": 101.83, "learning_rate": 6.422265372168285e-06, "loss": 0.0002, "step": 262220 }, { "epoch": 101.84, "learning_rate": 6.421747572815534e-06, "loss": 0.0475, "step": 262230 }, { "epoch": 101.84, "learning_rate": 6.421229773462784e-06, "loss": 0.016, "step": 262240 }, { "epoch": 101.84, "learning_rate": 6.420711974110034e-06, "loss": 0.0883, "step": 262250 }, { "epoch": 101.85, "learning_rate": 6.4201941747572815e-06, "loss": 0.078, "step": 262260 }, { "epoch": 101.85, "learning_rate": 6.419676375404531e-06, "loss": 0.0736, "step": 262270 }, { "epoch": 101.86, "learning_rate": 6.419158576051781e-06, "loss": 0.015, "step": 262280 }, { "epoch": 101.86, "learning_rate": 6.4186407766990296e-06, "loss": 0.0547, "step": 262290 }, { "epoch": 101.86, "learning_rate": 6.418122977346278e-06, "loss": 0.1039, "step": 262300 }, { "epoch": 101.87, "learning_rate": 6.417605177993528e-06, "loss": 0.0781, "step": 262310 }, { "epoch": 101.87, "learning_rate": 6.4170873786407776e-06, "loss": 0.062, "step": 262320 }, { "epoch": 101.88, "learning_rate": 6.416569579288026e-06, "loss": 0.0351, "step": 262330 }, { "epoch": 101.88, "learning_rate": 6.416051779935275e-06, "loss": 0.0693, "step": 262340 }, { "epoch": 101.88, "learning_rate": 6.415533980582525e-06, "loss": 0.0009, "step": 262350 }, { "epoch": 101.89, "learning_rate": 6.415016181229774e-06, "loss": 0.0597, "step": 262360 }, { "epoch": 101.89, "learning_rate": 6.414498381877023e-06, "loss": 0.098, "step": 262370 }, { "epoch": 101.9, "learning_rate": 6.413980582524273e-06, "loss": 0.0441, "step": 262380 }, { "epoch": 101.9, "learning_rate": 6.4134627831715215e-06, "loss": 0.0322, "step": 262390 }, { "epoch": 101.9, "learning_rate": 6.412944983818771e-06, "loss": 0.0169, "step": 262400 }, { "epoch": 101.91, "learning_rate": 6.41242718446602e-06, "loss": 0.0944, "step": 262410 }, { "epoch": 101.91, "learning_rate": 6.4119093851132695e-06, "loss": 0.0981, "step": 262420 }, { "epoch": 101.91, "learning_rate": 6.411391585760518e-06, "loss": 0.0014, "step": 262430 }, { "epoch": 101.92, "learning_rate": 6.410873786407768e-06, "loss": 0.0002, "step": 262440 }, { "epoch": 101.92, "learning_rate": 6.410355987055017e-06, "loss": 0.2191, "step": 262450 }, { "epoch": 101.93, "learning_rate": 6.409838187702266e-06, "loss": 0.0951, "step": 262460 }, { "epoch": 101.93, "learning_rate": 6.409320388349515e-06, "loss": 0.0133, "step": 262470 }, { "epoch": 101.93, "learning_rate": 6.408802588996764e-06, "loss": 0.0292, "step": 262480 }, { "epoch": 101.94, "learning_rate": 6.4082847896440134e-06, "loss": 0.012, "step": 262490 }, { "epoch": 101.94, "learning_rate": 6.407766990291263e-06, "loss": 0.0276, "step": 262500 }, { "epoch": 101.95, "learning_rate": 6.407249190938512e-06, "loss": 0.012, "step": 262510 }, { "epoch": 101.95, "learning_rate": 6.406731391585761e-06, "loss": 0.0014, "step": 262520 }, { "epoch": 101.95, "learning_rate": 6.40621359223301e-06, "loss": 0.0783, "step": 262530 }, { "epoch": 101.96, "learning_rate": 6.40569579288026e-06, "loss": 0.0174, "step": 262540 }, { "epoch": 101.96, "learning_rate": 6.405177993527509e-06, "loss": 0.0165, "step": 262550 }, { "epoch": 101.97, "learning_rate": 6.404660194174757e-06, "loss": 0.0024, "step": 262560 }, { "epoch": 101.97, "learning_rate": 6.404142394822007e-06, "loss": 0.0139, "step": 262570 }, { "epoch": 101.97, "learning_rate": 6.403624595469257e-06, "loss": 0.0319, "step": 262580 }, { "epoch": 101.98, "learning_rate": 6.403106796116505e-06, "loss": 0.004, "step": 262590 }, { "epoch": 101.98, "learning_rate": 6.402588996763754e-06, "loss": 0.1765, "step": 262600 }, { "epoch": 101.98, "learning_rate": 6.402071197411004e-06, "loss": 0.0189, "step": 262610 }, { "epoch": 101.99, "learning_rate": 6.401553398058253e-06, "loss": 0.0384, "step": 262620 }, { "epoch": 101.99, "learning_rate": 6.401035598705501e-06, "loss": 0.0366, "step": 262630 }, { "epoch": 102.0, "learning_rate": 6.400517799352751e-06, "loss": 0.0207, "step": 262640 }, { "epoch": 102.0, "learning_rate": 6.4000000000000006e-06, "loss": 0.0118, "step": 262650 }, { "epoch": 102.0, "eval_accuracy": 0.9491059147180193, "eval_loss": 0.38485851883888245, "eval_runtime": 8.2149, "eval_samples_per_second": 442.49, "eval_steps_per_second": 55.387, "step": 262650 }, { "epoch": 102.0, "learning_rate": 6.39948220064725e-06, "loss": 0.0217, "step": 262660 }, { "epoch": 102.01, "learning_rate": 6.398964401294498e-06, "loss": 0.0995, "step": 262670 }, { "epoch": 102.01, "learning_rate": 6.398446601941748e-06, "loss": 0.0006, "step": 262680 }, { "epoch": 102.02, "learning_rate": 6.397928802588997e-06, "loss": 0.0928, "step": 262690 }, { "epoch": 102.02, "learning_rate": 6.397411003236247e-06, "loss": 0.042, "step": 262700 }, { "epoch": 102.02, "learning_rate": 6.396893203883495e-06, "loss": 0.0485, "step": 262710 }, { "epoch": 102.03, "learning_rate": 6.3963754045307445e-06, "loss": 0.0903, "step": 262720 }, { "epoch": 102.03, "learning_rate": 6.395857605177994e-06, "loss": 0.0601, "step": 262730 }, { "epoch": 102.03, "learning_rate": 6.395339805825244e-06, "loss": 0.0897, "step": 262740 }, { "epoch": 102.04, "learning_rate": 6.394822006472492e-06, "loss": 0.0518, "step": 262750 }, { "epoch": 102.04, "learning_rate": 6.394304207119741e-06, "loss": 0.0204, "step": 262760 }, { "epoch": 102.05, "learning_rate": 6.393786407766991e-06, "loss": 0.0749, "step": 262770 }, { "epoch": 102.05, "learning_rate": 6.3932686084142405e-06, "loss": 0.0199, "step": 262780 }, { "epoch": 102.05, "learning_rate": 6.3927508090614884e-06, "loss": 0.0088, "step": 262790 }, { "epoch": 102.06, "learning_rate": 6.392233009708738e-06, "loss": 0.0347, "step": 262800 }, { "epoch": 102.06, "learning_rate": 6.391715210355988e-06, "loss": 0.0467, "step": 262810 }, { "epoch": 102.07, "learning_rate": 6.391197411003237e-06, "loss": 0.1711, "step": 262820 }, { "epoch": 102.07, "learning_rate": 6.390679611650485e-06, "loss": 0.0022, "step": 262830 }, { "epoch": 102.07, "learning_rate": 6.390161812297735e-06, "loss": 0.049, "step": 262840 }, { "epoch": 102.08, "learning_rate": 6.3896440129449844e-06, "loss": 0.0572, "step": 262850 }, { "epoch": 102.08, "learning_rate": 6.389126213592234e-06, "loss": 0.0925, "step": 262860 }, { "epoch": 102.09, "learning_rate": 6.388608414239482e-06, "loss": 0.0001, "step": 262870 }, { "epoch": 102.09, "learning_rate": 6.388090614886732e-06, "loss": 0.085, "step": 262880 }, { "epoch": 102.09, "learning_rate": 6.387572815533981e-06, "loss": 0.0365, "step": 262890 }, { "epoch": 102.1, "learning_rate": 6.387055016181231e-06, "loss": 0.0596, "step": 262900 }, { "epoch": 102.1, "learning_rate": 6.386537216828479e-06, "loss": 0.1671, "step": 262910 }, { "epoch": 102.1, "learning_rate": 6.386019417475728e-06, "loss": 0.1103, "step": 262920 }, { "epoch": 102.11, "learning_rate": 6.385501618122978e-06, "loss": 0.0497, "step": 262930 }, { "epoch": 102.11, "learning_rate": 6.384983818770228e-06, "loss": 0.0673, "step": 262940 }, { "epoch": 102.12, "learning_rate": 6.384466019417476e-06, "loss": 0.0004, "step": 262950 }, { "epoch": 102.12, "learning_rate": 6.383948220064725e-06, "loss": 0.0525, "step": 262960 }, { "epoch": 102.12, "learning_rate": 6.383430420711975e-06, "loss": 0.0098, "step": 262970 }, { "epoch": 102.13, "learning_rate": 6.382912621359224e-06, "loss": 0.0015, "step": 262980 }, { "epoch": 102.13, "learning_rate": 6.382394822006473e-06, "loss": 0.0335, "step": 262990 }, { "epoch": 102.14, "learning_rate": 6.381877022653722e-06, "loss": 0.0611, "step": 263000 }, { "epoch": 102.14, "learning_rate": 6.3813592233009716e-06, "loss": 0.0027, "step": 263010 }, { "epoch": 102.14, "learning_rate": 6.380841423948221e-06, "loss": 0.0135, "step": 263020 }, { "epoch": 102.15, "learning_rate": 6.38032362459547e-06, "loss": 0.0563, "step": 263030 }, { "epoch": 102.15, "learning_rate": 6.379805825242719e-06, "loss": 0.1217, "step": 263040 }, { "epoch": 102.16, "learning_rate": 6.379288025889968e-06, "loss": 0.0859, "step": 263050 }, { "epoch": 102.16, "learning_rate": 6.378770226537218e-06, "loss": 0.0127, "step": 263060 }, { "epoch": 102.16, "learning_rate": 6.378252427184467e-06, "loss": 0.0133, "step": 263070 }, { "epoch": 102.17, "learning_rate": 6.3777346278317155e-06, "loss": 0.0639, "step": 263080 }, { "epoch": 102.17, "learning_rate": 6.377216828478965e-06, "loss": 0.0526, "step": 263090 }, { "epoch": 102.17, "learning_rate": 6.376699029126214e-06, "loss": 0.0923, "step": 263100 }, { "epoch": 102.18, "learning_rate": 6.3761812297734635e-06, "loss": 0.0905, "step": 263110 }, { "epoch": 102.18, "learning_rate": 6.375663430420712e-06, "loss": 0.0534, "step": 263120 }, { "epoch": 102.19, "learning_rate": 6.375145631067962e-06, "loss": 0.0043, "step": 263130 }, { "epoch": 102.19, "learning_rate": 6.374627831715211e-06, "loss": 0.0862, "step": 263140 }, { "epoch": 102.19, "learning_rate": 6.37411003236246e-06, "loss": 0.0605, "step": 263150 }, { "epoch": 102.2, "learning_rate": 6.373592233009709e-06, "loss": 0.0412, "step": 263160 }, { "epoch": 102.2, "learning_rate": 6.373074433656959e-06, "loss": 0.0682, "step": 263170 }, { "epoch": 102.21, "learning_rate": 6.3725566343042074e-06, "loss": 0.1837, "step": 263180 }, { "epoch": 102.21, "learning_rate": 6.372038834951457e-06, "loss": 0.0568, "step": 263190 }, { "epoch": 102.21, "learning_rate": 6.371521035598706e-06, "loss": 0.0499, "step": 263200 }, { "epoch": 102.22, "learning_rate": 6.3710032362459555e-06, "loss": 0.0094, "step": 263210 }, { "epoch": 102.22, "learning_rate": 6.370485436893204e-06, "loss": 0.0015, "step": 263220 }, { "epoch": 102.23, "learning_rate": 6.369967637540454e-06, "loss": 0.0256, "step": 263230 }, { "epoch": 102.23, "learning_rate": 6.369449838187703e-06, "loss": 0.0008, "step": 263240 }, { "epoch": 102.23, "learning_rate": 6.368932038834952e-06, "loss": 0.0418, "step": 263250 }, { "epoch": 102.24, "learning_rate": 6.368414239482201e-06, "loss": 0.026, "step": 263260 }, { "epoch": 102.24, "learning_rate": 6.367896440129451e-06, "loss": 0.0123, "step": 263270 }, { "epoch": 102.24, "learning_rate": 6.367378640776699e-06, "loss": 0.0264, "step": 263280 }, { "epoch": 102.25, "learning_rate": 6.366860841423948e-06, "loss": 0.0022, "step": 263290 }, { "epoch": 102.25, "learning_rate": 6.366343042071198e-06, "loss": 0.0811, "step": 263300 }, { "epoch": 102.26, "learning_rate": 6.365825242718447e-06, "loss": 0.088, "step": 263310 }, { "epoch": 102.26, "learning_rate": 6.365307443365696e-06, "loss": 0.0077, "step": 263320 }, { "epoch": 102.26, "learning_rate": 6.364789644012945e-06, "loss": 0.0309, "step": 263330 }, { "epoch": 102.27, "learning_rate": 6.3642718446601946e-06, "loss": 0.0011, "step": 263340 }, { "epoch": 102.27, "learning_rate": 6.363754045307444e-06, "loss": 0.0004, "step": 263350 }, { "epoch": 102.28, "learning_rate": 6.363236245954693e-06, "loss": 0.0259, "step": 263360 }, { "epoch": 102.28, "learning_rate": 6.362718446601942e-06, "loss": 0.0849, "step": 263370 }, { "epoch": 102.28, "learning_rate": 6.362200647249191e-06, "loss": 0.0216, "step": 263380 }, { "epoch": 102.29, "learning_rate": 6.361682847896441e-06, "loss": 0.0299, "step": 263390 }, { "epoch": 102.29, "learning_rate": 6.36116504854369e-06, "loss": 0.0305, "step": 263400 }, { "epoch": 102.3, "learning_rate": 6.3606472491909385e-06, "loss": 0.003, "step": 263410 }, { "epoch": 102.3, "learning_rate": 6.360129449838188e-06, "loss": 0.0785, "step": 263420 }, { "epoch": 102.3, "learning_rate": 6.359611650485438e-06, "loss": 0.0424, "step": 263430 }, { "epoch": 102.31, "learning_rate": 6.359093851132686e-06, "loss": 0.0231, "step": 263440 }, { "epoch": 102.31, "learning_rate": 6.358576051779935e-06, "loss": 0.0546, "step": 263450 }, { "epoch": 102.31, "learning_rate": 6.358058252427185e-06, "loss": 0.0648, "step": 263460 }, { "epoch": 102.32, "learning_rate": 6.3575404530744345e-06, "loss": 0.0195, "step": 263470 }, { "epoch": 102.32, "learning_rate": 6.357022653721684e-06, "loss": 0.0234, "step": 263480 }, { "epoch": 102.33, "learning_rate": 6.356504854368932e-06, "loss": 0.0219, "step": 263490 }, { "epoch": 102.33, "learning_rate": 6.355987055016182e-06, "loss": 0.0315, "step": 263500 }, { "epoch": 102.33, "learning_rate": 6.355469255663431e-06, "loss": 0.0113, "step": 263510 }, { "epoch": 102.34, "learning_rate": 6.354951456310681e-06, "loss": 0.029, "step": 263520 }, { "epoch": 102.34, "learning_rate": 6.354433656957929e-06, "loss": 0.0161, "step": 263530 }, { "epoch": 102.35, "learning_rate": 6.3539158576051784e-06, "loss": 0.0374, "step": 263540 }, { "epoch": 102.35, "learning_rate": 6.353398058252428e-06, "loss": 0.0032, "step": 263550 }, { "epoch": 102.35, "learning_rate": 6.352880258899678e-06, "loss": 0.0229, "step": 263560 }, { "epoch": 102.36, "learning_rate": 6.352362459546926e-06, "loss": 0.1049, "step": 263570 }, { "epoch": 102.36, "learning_rate": 6.351844660194175e-06, "loss": 0.0469, "step": 263580 }, { "epoch": 102.37, "learning_rate": 6.351326860841425e-06, "loss": 0.0148, "step": 263590 }, { "epoch": 102.37, "learning_rate": 6.3508090614886745e-06, "loss": 0.0719, "step": 263600 }, { "epoch": 102.37, "learning_rate": 6.350291262135922e-06, "loss": 0.1212, "step": 263610 }, { "epoch": 102.38, "learning_rate": 6.349773462783172e-06, "loss": 0.0213, "step": 263620 }, { "epoch": 102.38, "learning_rate": 6.349255663430422e-06, "loss": 0.1691, "step": 263630 }, { "epoch": 102.38, "learning_rate": 6.348737864077671e-06, "loss": 0.0019, "step": 263640 }, { "epoch": 102.39, "learning_rate": 6.348220064724919e-06, "loss": 0.1392, "step": 263650 }, { "epoch": 102.39, "learning_rate": 6.347702265372169e-06, "loss": 0.065, "step": 263660 }, { "epoch": 102.4, "learning_rate": 6.347184466019418e-06, "loss": 0.0668, "step": 263670 }, { "epoch": 102.4, "learning_rate": 6.346666666666668e-06, "loss": 0.0752, "step": 263680 }, { "epoch": 102.4, "learning_rate": 6.346148867313916e-06, "loss": 0.0275, "step": 263690 }, { "epoch": 102.41, "learning_rate": 6.3456310679611656e-06, "loss": 0.0333, "step": 263700 }, { "epoch": 102.41, "learning_rate": 6.345113268608415e-06, "loss": 0.0327, "step": 263710 }, { "epoch": 102.42, "learning_rate": 6.344595469255665e-06, "loss": 0.002, "step": 263720 }, { "epoch": 102.42, "learning_rate": 6.344077669902913e-06, "loss": 0.0768, "step": 263730 }, { "epoch": 102.42, "learning_rate": 6.343559870550162e-06, "loss": 0.0087, "step": 263740 }, { "epoch": 102.43, "learning_rate": 6.343042071197412e-06, "loss": 0.0497, "step": 263750 }, { "epoch": 102.43, "learning_rate": 6.342524271844661e-06, "loss": 0.0903, "step": 263760 }, { "epoch": 102.43, "learning_rate": 6.3420064724919095e-06, "loss": 0.0307, "step": 263770 }, { "epoch": 102.44, "learning_rate": 6.341488673139159e-06, "loss": 0.0004, "step": 263780 }, { "epoch": 102.44, "learning_rate": 6.340970873786409e-06, "loss": 0.0402, "step": 263790 }, { "epoch": 102.45, "learning_rate": 6.3404530744336575e-06, "loss": 0.0998, "step": 263800 }, { "epoch": 102.45, "learning_rate": 6.339935275080906e-06, "loss": 0.043, "step": 263810 }, { "epoch": 102.45, "learning_rate": 6.339417475728156e-06, "loss": 0.0657, "step": 263820 }, { "epoch": 102.46, "learning_rate": 6.3388996763754055e-06, "loss": 0.1726, "step": 263830 }, { "epoch": 102.46, "learning_rate": 6.338381877022654e-06, "loss": 0.0342, "step": 263840 }, { "epoch": 102.47, "learning_rate": 6.337864077669903e-06, "loss": 0.016, "step": 263850 }, { "epoch": 102.47, "learning_rate": 6.337346278317153e-06, "loss": 0.1193, "step": 263860 }, { "epoch": 102.47, "learning_rate": 6.336828478964402e-06, "loss": 0.0705, "step": 263870 }, { "epoch": 102.48, "learning_rate": 6.336310679611651e-06, "loss": 0.0197, "step": 263880 }, { "epoch": 102.48, "learning_rate": 6.3357928802589e-06, "loss": 0.104, "step": 263890 }, { "epoch": 102.49, "learning_rate": 6.3352750809061494e-06, "loss": 0.1033, "step": 263900 }, { "epoch": 102.49, "learning_rate": 6.334757281553398e-06, "loss": 0.0083, "step": 263910 }, { "epoch": 102.49, "learning_rate": 6.334239482200648e-06, "loss": 0.0005, "step": 263920 }, { "epoch": 102.5, "learning_rate": 6.333721682847897e-06, "loss": 0.0438, "step": 263930 }, { "epoch": 102.5, "learning_rate": 6.333203883495146e-06, "loss": 0.0121, "step": 263940 }, { "epoch": 102.5, "learning_rate": 6.332686084142395e-06, "loss": 0.0017, "step": 263950 }, { "epoch": 102.51, "learning_rate": 6.332168284789645e-06, "loss": 0.0487, "step": 263960 }, { "epoch": 102.51, "learning_rate": 6.331650485436893e-06, "loss": 0.0974, "step": 263970 }, { "epoch": 102.52, "learning_rate": 6.331132686084143e-06, "loss": 0.0623, "step": 263980 }, { "epoch": 102.52, "learning_rate": 6.330614886731392e-06, "loss": 0.0155, "step": 263990 }, { "epoch": 102.52, "learning_rate": 6.330097087378641e-06, "loss": 0.0133, "step": 264000 }, { "epoch": 102.53, "learning_rate": 6.32957928802589e-06, "loss": 0.0722, "step": 264010 }, { "epoch": 102.53, "learning_rate": 6.32906148867314e-06, "loss": 0.0654, "step": 264020 }, { "epoch": 102.54, "learning_rate": 6.3285436893203886e-06, "loss": 0.0009, "step": 264030 }, { "epoch": 102.54, "learning_rate": 6.328025889967638e-06, "loss": 0.062, "step": 264040 }, { "epoch": 102.54, "learning_rate": 6.327508090614888e-06, "loss": 0.0852, "step": 264050 }, { "epoch": 102.55, "learning_rate": 6.3269902912621366e-06, "loss": 0.0283, "step": 264060 }, { "epoch": 102.55, "learning_rate": 6.326472491909385e-06, "loss": 0.0563, "step": 264070 }, { "epoch": 102.56, "learning_rate": 6.325954692556635e-06, "loss": 0.051, "step": 264080 }, { "epoch": 102.56, "learning_rate": 6.3254368932038846e-06, "loss": 0.029, "step": 264090 }, { "epoch": 102.56, "learning_rate": 6.3249190938511325e-06, "loss": 0.0226, "step": 264100 }, { "epoch": 102.57, "learning_rate": 6.324401294498382e-06, "loss": 0.0522, "step": 264110 }, { "epoch": 102.57, "learning_rate": 6.323883495145632e-06, "loss": 0.0225, "step": 264120 }, { "epoch": 102.57, "learning_rate": 6.323365695792881e-06, "loss": 0.0049, "step": 264130 }, { "epoch": 102.58, "learning_rate": 6.322847896440129e-06, "loss": 0.1292, "step": 264140 }, { "epoch": 102.58, "learning_rate": 6.322330097087379e-06, "loss": 0.0728, "step": 264150 }, { "epoch": 102.59, "learning_rate": 6.3218122977346285e-06, "loss": 0.1119, "step": 264160 }, { "epoch": 102.59, "learning_rate": 6.321294498381878e-06, "loss": 0.081, "step": 264170 }, { "epoch": 102.59, "learning_rate": 6.320776699029126e-06, "loss": 0.0864, "step": 264180 }, { "epoch": 102.6, "learning_rate": 6.320258899676376e-06, "loss": 0.0084, "step": 264190 }, { "epoch": 102.6, "learning_rate": 6.319741100323625e-06, "loss": 0.0125, "step": 264200 }, { "epoch": 102.61, "learning_rate": 6.319223300970875e-06, "loss": 0.1179, "step": 264210 }, { "epoch": 102.61, "learning_rate": 6.318705501618123e-06, "loss": 0.0192, "step": 264220 }, { "epoch": 102.61, "learning_rate": 6.3181877022653724e-06, "loss": 0.0002, "step": 264230 }, { "epoch": 102.62, "learning_rate": 6.317669902912622e-06, "loss": 0.0091, "step": 264240 }, { "epoch": 102.62, "learning_rate": 6.317152103559872e-06, "loss": 0.0213, "step": 264250 }, { "epoch": 102.63, "learning_rate": 6.31663430420712e-06, "loss": 0.0125, "step": 264260 }, { "epoch": 102.63, "learning_rate": 6.316116504854369e-06, "loss": 0.0026, "step": 264270 }, { "epoch": 102.63, "learning_rate": 6.315598705501619e-06, "loss": 0.1897, "step": 264280 }, { "epoch": 102.64, "learning_rate": 6.3150809061488685e-06, "loss": 0.0181, "step": 264290 }, { "epoch": 102.64, "learning_rate": 6.314563106796116e-06, "loss": 0.0832, "step": 264300 }, { "epoch": 102.64, "learning_rate": 6.314045307443366e-06, "loss": 0.0177, "step": 264310 }, { "epoch": 102.65, "learning_rate": 6.313527508090616e-06, "loss": 0.0941, "step": 264320 }, { "epoch": 102.65, "learning_rate": 6.313009708737865e-06, "loss": 0.1718, "step": 264330 }, { "epoch": 102.66, "learning_rate": 6.312491909385113e-06, "loss": 0.0382, "step": 264340 }, { "epoch": 102.66, "learning_rate": 6.311974110032363e-06, "loss": 0.0025, "step": 264350 }, { "epoch": 102.66, "learning_rate": 6.311456310679612e-06, "loss": 0.0891, "step": 264360 }, { "epoch": 102.67, "learning_rate": 6.310938511326862e-06, "loss": 0.0984, "step": 264370 }, { "epoch": 102.67, "learning_rate": 6.31042071197411e-06, "loss": 0.1473, "step": 264380 }, { "epoch": 102.68, "learning_rate": 6.3099029126213596e-06, "loss": 0.0437, "step": 264390 }, { "epoch": 102.68, "learning_rate": 6.309385113268609e-06, "loss": 0.001, "step": 264400 }, { "epoch": 102.68, "learning_rate": 6.308867313915859e-06, "loss": 0.1302, "step": 264410 }, { "epoch": 102.69, "learning_rate": 6.308349514563107e-06, "loss": 0.0248, "step": 264420 }, { "epoch": 102.69, "learning_rate": 6.307831715210356e-06, "loss": 0.0131, "step": 264430 }, { "epoch": 102.7, "learning_rate": 6.307313915857606e-06, "loss": 0.0139, "step": 264440 }, { "epoch": 102.7, "learning_rate": 6.3067961165048556e-06, "loss": 0.0142, "step": 264450 }, { "epoch": 102.7, "learning_rate": 6.3062783171521035e-06, "loss": 0.0071, "step": 264460 }, { "epoch": 102.71, "learning_rate": 6.305760517799353e-06, "loss": 0.1486, "step": 264470 }, { "epoch": 102.71, "learning_rate": 6.305242718446603e-06, "loss": 0.1743, "step": 264480 }, { "epoch": 102.71, "learning_rate": 6.304724919093852e-06, "loss": 0.0938, "step": 264490 }, { "epoch": 102.72, "learning_rate": 6.3042071197411e-06, "loss": 0.0717, "step": 264500 }, { "epoch": 102.72, "learning_rate": 6.30368932038835e-06, "loss": 0.0386, "step": 264510 }, { "epoch": 102.73, "learning_rate": 6.3031715210355995e-06, "loss": 0.075, "step": 264520 }, { "epoch": 102.73, "learning_rate": 6.302653721682849e-06, "loss": 0.1006, "step": 264530 }, { "epoch": 102.73, "learning_rate": 6.302135922330097e-06, "loss": 0.1097, "step": 264540 }, { "epoch": 102.74, "learning_rate": 6.301618122977347e-06, "loss": 0.1012, "step": 264550 }, { "epoch": 102.74, "learning_rate": 6.301100323624596e-06, "loss": 0.0013, "step": 264560 }, { "epoch": 102.75, "learning_rate": 6.300582524271845e-06, "loss": 0.1417, "step": 264570 }, { "epoch": 102.75, "learning_rate": 6.300064724919094e-06, "loss": 0.0099, "step": 264580 }, { "epoch": 102.75, "learning_rate": 6.2995469255663434e-06, "loss": 0.0176, "step": 264590 }, { "epoch": 102.76, "learning_rate": 6.299029126213593e-06, "loss": 0.0889, "step": 264600 }, { "epoch": 102.76, "learning_rate": 6.298511326860842e-06, "loss": 0.0016, "step": 264610 }, { "epoch": 102.77, "learning_rate": 6.2979935275080915e-06, "loss": 0.0011, "step": 264620 }, { "epoch": 102.77, "learning_rate": 6.29747572815534e-06, "loss": 0.0178, "step": 264630 }, { "epoch": 102.77, "learning_rate": 6.29695792880259e-06, "loss": 0.0596, "step": 264640 }, { "epoch": 102.78, "learning_rate": 6.296440129449839e-06, "loss": 0.0566, "step": 264650 }, { "epoch": 102.78, "learning_rate": 6.295922330097088e-06, "loss": 0.1539, "step": 264660 }, { "epoch": 102.78, "learning_rate": 6.295404530744337e-06, "loss": 0.116, "step": 264670 }, { "epoch": 102.79, "learning_rate": 6.294886731391587e-06, "loss": 0.0462, "step": 264680 }, { "epoch": 102.79, "learning_rate": 6.294368932038835e-06, "loss": 0.0303, "step": 264690 }, { "epoch": 102.8, "learning_rate": 6.293851132686085e-06, "loss": 0.0365, "step": 264700 }, { "epoch": 102.8, "learning_rate": 6.293333333333334e-06, "loss": 0.0002, "step": 264710 }, { "epoch": 102.8, "learning_rate": 6.292815533980583e-06, "loss": 0.0269, "step": 264720 }, { "epoch": 102.81, "learning_rate": 6.292297734627832e-06, "loss": 0.0259, "step": 264730 }, { "epoch": 102.81, "learning_rate": 6.291779935275082e-06, "loss": 0.114, "step": 264740 }, { "epoch": 102.82, "learning_rate": 6.2912621359223306e-06, "loss": 0.0373, "step": 264750 }, { "epoch": 102.82, "learning_rate": 6.290744336569579e-06, "loss": 0.1162, "step": 264760 }, { "epoch": 102.82, "learning_rate": 6.290226537216829e-06, "loss": 0.0913, "step": 264770 }, { "epoch": 102.83, "learning_rate": 6.2897087378640786e-06, "loss": 0.1233, "step": 264780 }, { "epoch": 102.83, "learning_rate": 6.289190938511327e-06, "loss": 0.0677, "step": 264790 }, { "epoch": 102.83, "learning_rate": 6.288673139158576e-06, "loss": 0.1268, "step": 264800 }, { "epoch": 102.84, "learning_rate": 6.288155339805826e-06, "loss": 0.1093, "step": 264810 }, { "epoch": 102.84, "learning_rate": 6.287637540453075e-06, "loss": 0.0512, "step": 264820 }, { "epoch": 102.85, "learning_rate": 6.287119741100324e-06, "loss": 0.0002, "step": 264830 }, { "epoch": 102.85, "learning_rate": 6.286601941747573e-06, "loss": 0.0303, "step": 264840 }, { "epoch": 102.85, "learning_rate": 6.2860841423948225e-06, "loss": 0.0471, "step": 264850 }, { "epoch": 102.86, "learning_rate": 6.285566343042072e-06, "loss": 0.0197, "step": 264860 }, { "epoch": 102.86, "learning_rate": 6.285048543689321e-06, "loss": 0.0644, "step": 264870 }, { "epoch": 102.87, "learning_rate": 6.28453074433657e-06, "loss": 0.0294, "step": 264880 }, { "epoch": 102.87, "learning_rate": 6.284012944983819e-06, "loss": 0.0942, "step": 264890 }, { "epoch": 102.87, "learning_rate": 6.283495145631069e-06, "loss": 0.0267, "step": 264900 }, { "epoch": 102.88, "learning_rate": 6.282977346278317e-06, "loss": 0.0182, "step": 264910 }, { "epoch": 102.88, "learning_rate": 6.2824595469255664e-06, "loss": 0.0462, "step": 264920 }, { "epoch": 102.89, "learning_rate": 6.281941747572816e-06, "loss": 0.0004, "step": 264930 }, { "epoch": 102.89, "learning_rate": 6.281423948220066e-06, "loss": 0.0944, "step": 264940 }, { "epoch": 102.89, "learning_rate": 6.280906148867314e-06, "loss": 0.0089, "step": 264950 }, { "epoch": 102.9, "learning_rate": 6.280388349514563e-06, "loss": 0.1006, "step": 264960 }, { "epoch": 102.9, "learning_rate": 6.279870550161813e-06, "loss": 0.0004, "step": 264970 }, { "epoch": 102.9, "learning_rate": 6.2793527508090625e-06, "loss": 0.0814, "step": 264980 }, { "epoch": 102.91, "learning_rate": 6.27883495145631e-06, "loss": 0.0191, "step": 264990 }, { "epoch": 102.91, "learning_rate": 6.27831715210356e-06, "loss": 0.0127, "step": 265000 }, { "epoch": 102.92, "learning_rate": 6.27779935275081e-06, "loss": 0.0811, "step": 265010 }, { "epoch": 102.92, "learning_rate": 6.277281553398059e-06, "loss": 0.2498, "step": 265020 }, { "epoch": 102.92, "learning_rate": 6.276763754045307e-06, "loss": 0.0915, "step": 265030 }, { "epoch": 102.93, "learning_rate": 6.276245954692557e-06, "loss": 0.0143, "step": 265040 }, { "epoch": 102.93, "learning_rate": 6.275728155339806e-06, "loss": 0.1296, "step": 265050 }, { "epoch": 102.94, "learning_rate": 6.275210355987056e-06, "loss": 0.1452, "step": 265060 }, { "epoch": 102.94, "learning_rate": 6.274692556634304e-06, "loss": 0.0481, "step": 265070 }, { "epoch": 102.94, "learning_rate": 6.2741747572815536e-06, "loss": 0.0763, "step": 265080 }, { "epoch": 102.95, "learning_rate": 6.273656957928803e-06, "loss": 0.0006, "step": 265090 }, { "epoch": 102.95, "learning_rate": 6.273139158576053e-06, "loss": 0.0547, "step": 265100 }, { "epoch": 102.96, "learning_rate": 6.272621359223301e-06, "loss": 0.2334, "step": 265110 }, { "epoch": 102.96, "learning_rate": 6.27210355987055e-06, "loss": 0.0418, "step": 265120 }, { "epoch": 102.96, "learning_rate": 6.2715857605178e-06, "loss": 0.0317, "step": 265130 }, { "epoch": 102.97, "learning_rate": 6.2710679611650496e-06, "loss": 0.0739, "step": 265140 }, { "epoch": 102.97, "learning_rate": 6.2705501618122975e-06, "loss": 0.091, "step": 265150 }, { "epoch": 102.97, "learning_rate": 6.270032362459547e-06, "loss": 0.0142, "step": 265160 }, { "epoch": 102.98, "learning_rate": 6.269514563106797e-06, "loss": 0.101, "step": 265170 }, { "epoch": 102.98, "learning_rate": 6.268996763754046e-06, "loss": 0.1589, "step": 265180 }, { "epoch": 102.99, "learning_rate": 6.268478964401296e-06, "loss": 0.0397, "step": 265190 }, { "epoch": 102.99, "learning_rate": 6.267961165048544e-06, "loss": 0.0148, "step": 265200 }, { "epoch": 102.99, "learning_rate": 6.2674433656957935e-06, "loss": 0.0528, "step": 265210 }, { "epoch": 103.0, "learning_rate": 6.266925566343043e-06, "loss": 0.0307, "step": 265220 }, { "epoch": 103.0, "eval_accuracy": 0.9535075653370014, "eval_loss": 0.36436453461647034, "eval_runtime": 8.1887, "eval_samples_per_second": 443.903, "eval_steps_per_second": 55.564, "step": 265225 }, { "epoch": 103.0, "learning_rate": 6.266407766990292e-06, "loss": 0.0408, "step": 265230 }, { "epoch": 103.01, "learning_rate": 6.265889967637541e-06, "loss": 0.0114, "step": 265240 }, { "epoch": 103.01, "learning_rate": 6.26537216828479e-06, "loss": 0.0002, "step": 265250 }, { "epoch": 103.01, "learning_rate": 6.26485436893204e-06, "loss": 0.0371, "step": 265260 }, { "epoch": 103.02, "learning_rate": 6.264336569579289e-06, "loss": 0.1479, "step": 265270 }, { "epoch": 103.02, "learning_rate": 6.2638187702265374e-06, "loss": 0.0824, "step": 265280 }, { "epoch": 103.03, "learning_rate": 6.263300970873787e-06, "loss": 0.1319, "step": 265290 }, { "epoch": 103.03, "learning_rate": 6.262783171521037e-06, "loss": 0.1564, "step": 265300 }, { "epoch": 103.03, "learning_rate": 6.2622653721682855e-06, "loss": 0.0194, "step": 265310 }, { "epoch": 103.04, "learning_rate": 6.261747572815534e-06, "loss": 0.015, "step": 265320 }, { "epoch": 103.04, "learning_rate": 6.261229773462784e-06, "loss": 0.0448, "step": 265330 }, { "epoch": 103.04, "learning_rate": 6.2607119741100335e-06, "loss": 0.0535, "step": 265340 }, { "epoch": 103.05, "learning_rate": 6.260194174757282e-06, "loss": 0.0865, "step": 265350 }, { "epoch": 103.05, "learning_rate": 6.259676375404531e-06, "loss": 0.0313, "step": 265360 }, { "epoch": 103.06, "learning_rate": 6.259158576051781e-06, "loss": 0.2875, "step": 265370 }, { "epoch": 103.06, "learning_rate": 6.258640776699029e-06, "loss": 0.0088, "step": 265380 }, { "epoch": 103.06, "learning_rate": 6.258122977346279e-06, "loss": 0.0233, "step": 265390 }, { "epoch": 103.07, "learning_rate": 6.257605177993528e-06, "loss": 0.0228, "step": 265400 }, { "epoch": 103.07, "learning_rate": 6.257087378640777e-06, "loss": 0.028, "step": 265410 }, { "epoch": 103.08, "learning_rate": 6.256569579288026e-06, "loss": 0.0996, "step": 265420 }, { "epoch": 103.08, "learning_rate": 6.256051779935276e-06, "loss": 0.076, "step": 265430 }, { "epoch": 103.08, "learning_rate": 6.2555339805825246e-06, "loss": 0.1366, "step": 265440 }, { "epoch": 103.09, "learning_rate": 6.255016181229774e-06, "loss": 0.116, "step": 265450 }, { "epoch": 103.09, "learning_rate": 6.254498381877023e-06, "loss": 0.037, "step": 265460 }, { "epoch": 103.1, "learning_rate": 6.2539805825242726e-06, "loss": 0.0094, "step": 265470 }, { "epoch": 103.1, "learning_rate": 6.253462783171521e-06, "loss": 0.0154, "step": 265480 }, { "epoch": 103.1, "learning_rate": 6.252944983818771e-06, "loss": 0.0088, "step": 265490 }, { "epoch": 103.11, "learning_rate": 6.25242718446602e-06, "loss": 0.0597, "step": 265500 }, { "epoch": 103.11, "learning_rate": 6.251909385113269e-06, "loss": 0.2524, "step": 265510 }, { "epoch": 103.11, "learning_rate": 6.251391585760518e-06, "loss": 0.0892, "step": 265520 }, { "epoch": 103.12, "learning_rate": 6.250873786407768e-06, "loss": 0.013, "step": 265530 }, { "epoch": 103.12, "learning_rate": 6.2503559870550165e-06, "loss": 0.0528, "step": 265540 }, { "epoch": 103.13, "learning_rate": 6.249838187702266e-06, "loss": 0.0254, "step": 265550 }, { "epoch": 103.13, "learning_rate": 6.249320388349515e-06, "loss": 0.0505, "step": 265560 }, { "epoch": 103.13, "learning_rate": 6.248802588996764e-06, "loss": 0.0703, "step": 265570 }, { "epoch": 103.14, "learning_rate": 6.248284789644013e-06, "loss": 0.0516, "step": 265580 }, { "epoch": 103.14, "learning_rate": 6.247766990291263e-06, "loss": 0.0575, "step": 265590 }, { "epoch": 103.15, "learning_rate": 6.247249190938512e-06, "loss": 0.0234, "step": 265600 }, { "epoch": 103.15, "learning_rate": 6.2467313915857604e-06, "loss": 0.0026, "step": 265610 }, { "epoch": 103.15, "learning_rate": 6.24621359223301e-06, "loss": 0.0399, "step": 265620 }, { "epoch": 103.16, "learning_rate": 6.24569579288026e-06, "loss": 0.0188, "step": 265630 }, { "epoch": 103.16, "learning_rate": 6.2451779935275084e-06, "loss": 0.1378, "step": 265640 }, { "epoch": 103.17, "learning_rate": 6.244660194174757e-06, "loss": 0.1106, "step": 265650 }, { "epoch": 103.17, "learning_rate": 6.244142394822007e-06, "loss": 0.0564, "step": 265660 }, { "epoch": 103.17, "learning_rate": 6.2436245954692565e-06, "loss": 0.1288, "step": 265670 }, { "epoch": 103.18, "learning_rate": 6.243106796116505e-06, "loss": 0.0121, "step": 265680 }, { "epoch": 103.18, "learning_rate": 6.242588996763754e-06, "loss": 0.0427, "step": 265690 }, { "epoch": 103.18, "learning_rate": 6.242071197411004e-06, "loss": 0.034, "step": 265700 }, { "epoch": 103.19, "learning_rate": 6.241553398058253e-06, "loss": 0.0097, "step": 265710 }, { "epoch": 103.19, "learning_rate": 6.241035598705501e-06, "loss": 0.0836, "step": 265720 }, { "epoch": 103.2, "learning_rate": 6.240517799352751e-06, "loss": 0.0313, "step": 265730 }, { "epoch": 103.2, "learning_rate": 6.24e-06, "loss": 0.0329, "step": 265740 }, { "epoch": 103.2, "learning_rate": 6.23948220064725e-06, "loss": 0.0446, "step": 265750 }, { "epoch": 103.21, "learning_rate": 6.2389644012945e-06, "loss": 0.0269, "step": 265760 }, { "epoch": 103.21, "learning_rate": 6.2384466019417476e-06, "loss": 0.0457, "step": 265770 }, { "epoch": 103.22, "learning_rate": 6.237928802588997e-06, "loss": 0.0993, "step": 265780 }, { "epoch": 103.22, "learning_rate": 6.237411003236247e-06, "loss": 0.0063, "step": 265790 }, { "epoch": 103.22, "learning_rate": 6.236893203883496e-06, "loss": 0.0411, "step": 265800 }, { "epoch": 103.23, "learning_rate": 6.236375404530744e-06, "loss": 0.0334, "step": 265810 }, { "epoch": 103.23, "learning_rate": 6.235857605177994e-06, "loss": 0.0476, "step": 265820 }, { "epoch": 103.23, "learning_rate": 6.2353398058252436e-06, "loss": 0.09, "step": 265830 }, { "epoch": 103.24, "learning_rate": 6.234822006472493e-06, "loss": 0.0147, "step": 265840 }, { "epoch": 103.24, "learning_rate": 6.234304207119741e-06, "loss": 0.026, "step": 265850 }, { "epoch": 103.25, "learning_rate": 6.233786407766991e-06, "loss": 0.1581, "step": 265860 }, { "epoch": 103.25, "learning_rate": 6.23326860841424e-06, "loss": 0.0542, "step": 265870 }, { "epoch": 103.25, "learning_rate": 6.23275080906149e-06, "loss": 0.0244, "step": 265880 }, { "epoch": 103.26, "learning_rate": 6.232233009708738e-06, "loss": 0.1223, "step": 265890 }, { "epoch": 103.26, "learning_rate": 6.2317152103559875e-06, "loss": 0.0121, "step": 265900 }, { "epoch": 103.27, "learning_rate": 6.231197411003237e-06, "loss": 0.063, "step": 265910 }, { "epoch": 103.27, "learning_rate": 6.230679611650487e-06, "loss": 0.1375, "step": 265920 }, { "epoch": 103.27, "learning_rate": 6.230161812297735e-06, "loss": 0.0872, "step": 265930 }, { "epoch": 103.28, "learning_rate": 6.229644012944984e-06, "loss": 0.114, "step": 265940 }, { "epoch": 103.28, "learning_rate": 6.229126213592234e-06, "loss": 0.0104, "step": 265950 }, { "epoch": 103.29, "learning_rate": 6.2286084142394835e-06, "loss": 0.2218, "step": 265960 }, { "epoch": 103.29, "learning_rate": 6.2280906148867314e-06, "loss": 0.0227, "step": 265970 }, { "epoch": 103.29, "learning_rate": 6.227572815533981e-06, "loss": 0.0817, "step": 265980 }, { "epoch": 103.3, "learning_rate": 6.227055016181231e-06, "loss": 0.0381, "step": 265990 }, { "epoch": 103.3, "learning_rate": 6.22653721682848e-06, "loss": 0.0368, "step": 266000 }, { "epoch": 103.3, "learning_rate": 6.226019417475728e-06, "loss": 0.0333, "step": 266010 }, { "epoch": 103.31, "learning_rate": 6.225501618122978e-06, "loss": 0.009, "step": 266020 }, { "epoch": 103.31, "learning_rate": 6.2249838187702275e-06, "loss": 0.0549, "step": 266030 }, { "epoch": 103.32, "learning_rate": 6.224466019417476e-06, "loss": 0.0035, "step": 266040 }, { "epoch": 103.32, "learning_rate": 6.223948220064725e-06, "loss": 0.0497, "step": 266050 }, { "epoch": 103.32, "learning_rate": 6.223430420711975e-06, "loss": 0.0723, "step": 266060 }, { "epoch": 103.33, "learning_rate": 6.222912621359224e-06, "loss": 0.0272, "step": 266070 }, { "epoch": 103.33, "learning_rate": 6.222394822006473e-06, "loss": 0.0177, "step": 266080 }, { "epoch": 103.34, "learning_rate": 6.221877022653722e-06, "loss": 0.0184, "step": 266090 }, { "epoch": 103.34, "learning_rate": 6.221359223300971e-06, "loss": 0.0742, "step": 266100 }, { "epoch": 103.34, "learning_rate": 6.220841423948221e-06, "loss": 0.0107, "step": 266110 }, { "epoch": 103.35, "learning_rate": 6.22032362459547e-06, "loss": 0.1515, "step": 266120 }, { "epoch": 103.35, "learning_rate": 6.2198058252427186e-06, "loss": 0.0539, "step": 266130 }, { "epoch": 103.36, "learning_rate": 6.219288025889968e-06, "loss": 0.0698, "step": 266140 }, { "epoch": 103.36, "learning_rate": 6.218770226537218e-06, "loss": 0.1244, "step": 266150 }, { "epoch": 103.36, "learning_rate": 6.2182524271844666e-06, "loss": 0.0699, "step": 266160 }, { "epoch": 103.37, "learning_rate": 6.217734627831715e-06, "loss": 0.0369, "step": 266170 }, { "epoch": 103.37, "learning_rate": 6.217216828478965e-06, "loss": 0.1518, "step": 266180 }, { "epoch": 103.37, "learning_rate": 6.216699029126214e-06, "loss": 0.0382, "step": 266190 }, { "epoch": 103.38, "learning_rate": 6.216181229773463e-06, "loss": 0.0644, "step": 266200 }, { "epoch": 103.38, "learning_rate": 6.215663430420712e-06, "loss": 0.1128, "step": 266210 }, { "epoch": 103.39, "learning_rate": 6.215145631067962e-06, "loss": 0.0266, "step": 266220 }, { "epoch": 103.39, "learning_rate": 6.2146278317152105e-06, "loss": 0.0316, "step": 266230 }, { "epoch": 103.39, "learning_rate": 6.21411003236246e-06, "loss": 0.0277, "step": 266240 }, { "epoch": 103.4, "learning_rate": 6.213592233009709e-06, "loss": 0.0149, "step": 266250 }, { "epoch": 103.4, "learning_rate": 6.2130744336569585e-06, "loss": 0.0075, "step": 266260 }, { "epoch": 103.41, "learning_rate": 6.212556634304207e-06, "loss": 0.0409, "step": 266270 }, { "epoch": 103.41, "learning_rate": 6.212038834951457e-06, "loss": 0.0875, "step": 266280 }, { "epoch": 103.41, "learning_rate": 6.2115210355987065e-06, "loss": 0.1334, "step": 266290 }, { "epoch": 103.42, "learning_rate": 6.211003236245955e-06, "loss": 0.1269, "step": 266300 }, { "epoch": 103.42, "learning_rate": 6.210485436893204e-06, "loss": 0.016, "step": 266310 }, { "epoch": 103.43, "learning_rate": 6.209967637540454e-06, "loss": 0.0126, "step": 266320 }, { "epoch": 103.43, "learning_rate": 6.209449838187703e-06, "loss": 0.0497, "step": 266330 }, { "epoch": 103.43, "learning_rate": 6.208932038834952e-06, "loss": 0.1382, "step": 266340 }, { "epoch": 103.44, "learning_rate": 6.208414239482201e-06, "loss": 0.08, "step": 266350 }, { "epoch": 103.44, "learning_rate": 6.2078964401294505e-06, "loss": 0.0654, "step": 266360 }, { "epoch": 103.44, "learning_rate": 6.2073786407767e-06, "loss": 0.0025, "step": 266370 }, { "epoch": 103.45, "learning_rate": 6.206860841423948e-06, "loss": 0.0797, "step": 266380 }, { "epoch": 103.45, "learning_rate": 6.206343042071198e-06, "loss": 0.0645, "step": 266390 }, { "epoch": 103.46, "learning_rate": 6.205825242718447e-06, "loss": 0.0025, "step": 266400 }, { "epoch": 103.46, "learning_rate": 6.205307443365697e-06, "loss": 0.0769, "step": 266410 }, { "epoch": 103.46, "learning_rate": 6.204789644012945e-06, "loss": 0.0184, "step": 266420 }, { "epoch": 103.47, "learning_rate": 6.204271844660194e-06, "loss": 0.0203, "step": 266430 }, { "epoch": 103.47, "learning_rate": 6.203754045307444e-06, "loss": 0.1077, "step": 266440 }, { "epoch": 103.48, "learning_rate": 6.203236245954694e-06, "loss": 0.0496, "step": 266450 }, { "epoch": 103.48, "learning_rate": 6.2027184466019416e-06, "loss": 0.0251, "step": 266460 }, { "epoch": 103.48, "learning_rate": 6.202200647249191e-06, "loss": 0.0242, "step": 266470 }, { "epoch": 103.49, "learning_rate": 6.201682847896441e-06, "loss": 0.0443, "step": 266480 }, { "epoch": 103.49, "learning_rate": 6.20116504854369e-06, "loss": 0.0007, "step": 266490 }, { "epoch": 103.5, "learning_rate": 6.200647249190938e-06, "loss": 0.1417, "step": 266500 }, { "epoch": 103.5, "learning_rate": 6.200129449838188e-06, "loss": 0.1165, "step": 266510 }, { "epoch": 103.5, "learning_rate": 6.1996116504854376e-06, "loss": 0.0156, "step": 266520 }, { "epoch": 103.51, "learning_rate": 6.199093851132687e-06, "loss": 0.0253, "step": 266530 }, { "epoch": 103.51, "learning_rate": 6.198576051779935e-06, "loss": 0.0356, "step": 266540 }, { "epoch": 103.51, "learning_rate": 6.198058252427185e-06, "loss": 0.0093, "step": 266550 }, { "epoch": 103.52, "learning_rate": 6.197540453074434e-06, "loss": 0.0103, "step": 266560 }, { "epoch": 103.52, "learning_rate": 6.197022653721684e-06, "loss": 0.0811, "step": 266570 }, { "epoch": 103.53, "learning_rate": 6.196504854368932e-06, "loss": 0.0386, "step": 266580 }, { "epoch": 103.53, "learning_rate": 6.1959870550161815e-06, "loss": 0.0001, "step": 266590 }, { "epoch": 103.53, "learning_rate": 6.195469255663431e-06, "loss": 0.0825, "step": 266600 }, { "epoch": 103.54, "learning_rate": 6.194951456310681e-06, "loss": 0.0545, "step": 266610 }, { "epoch": 103.54, "learning_rate": 6.194433656957929e-06, "loss": 0.1119, "step": 266620 }, { "epoch": 103.55, "learning_rate": 6.193915857605178e-06, "loss": 0.041, "step": 266630 }, { "epoch": 103.55, "learning_rate": 6.193398058252428e-06, "loss": 0.1184, "step": 266640 }, { "epoch": 103.55, "learning_rate": 6.1928802588996775e-06, "loss": 0.0234, "step": 266650 }, { "epoch": 103.56, "learning_rate": 6.1923624595469254e-06, "loss": 0.0704, "step": 266660 }, { "epoch": 103.56, "learning_rate": 6.191844660194175e-06, "loss": 0.0045, "step": 266670 }, { "epoch": 103.57, "learning_rate": 6.191326860841425e-06, "loss": 0.005, "step": 266680 }, { "epoch": 103.57, "learning_rate": 6.190809061488674e-06, "loss": 0.0266, "step": 266690 }, { "epoch": 103.57, "learning_rate": 6.190291262135922e-06, "loss": 0.1506, "step": 266700 }, { "epoch": 103.58, "learning_rate": 6.189773462783172e-06, "loss": 0.0286, "step": 266710 }, { "epoch": 103.58, "learning_rate": 6.1892556634304215e-06, "loss": 0.0058, "step": 266720 }, { "epoch": 103.58, "learning_rate": 6.188737864077671e-06, "loss": 0.0373, "step": 266730 }, { "epoch": 103.59, "learning_rate": 6.188220064724919e-06, "loss": 0.0345, "step": 266740 }, { "epoch": 103.59, "learning_rate": 6.187702265372169e-06, "loss": 0.0022, "step": 266750 }, { "epoch": 103.6, "learning_rate": 6.187184466019418e-06, "loss": 0.0576, "step": 266760 }, { "epoch": 103.6, "learning_rate": 6.186666666666668e-06, "loss": 0.0247, "step": 266770 }, { "epoch": 103.6, "learning_rate": 6.186148867313916e-06, "loss": 0.1273, "step": 266780 }, { "epoch": 103.61, "learning_rate": 6.185631067961165e-06, "loss": 0.0133, "step": 266790 }, { "epoch": 103.61, "learning_rate": 6.185113268608415e-06, "loss": 0.2013, "step": 266800 }, { "epoch": 103.62, "learning_rate": 6.184595469255665e-06, "loss": 0.072, "step": 266810 }, { "epoch": 103.62, "learning_rate": 6.1840776699029126e-06, "loss": 0.0655, "step": 266820 }, { "epoch": 103.62, "learning_rate": 6.183559870550162e-06, "loss": 0.0227, "step": 266830 }, { "epoch": 103.63, "learning_rate": 6.183042071197412e-06, "loss": 0.0013, "step": 266840 }, { "epoch": 103.63, "learning_rate": 6.1825242718446606e-06, "loss": 0.0012, "step": 266850 }, { "epoch": 103.63, "learning_rate": 6.18200647249191e-06, "loss": 0.0584, "step": 266860 }, { "epoch": 103.64, "learning_rate": 6.181488673139159e-06, "loss": 0.0224, "step": 266870 }, { "epoch": 103.64, "learning_rate": 6.1809708737864086e-06, "loss": 0.1171, "step": 266880 }, { "epoch": 103.65, "learning_rate": 6.180453074433657e-06, "loss": 0.0066, "step": 266890 }, { "epoch": 103.65, "learning_rate": 6.179935275080907e-06, "loss": 0.1067, "step": 266900 }, { "epoch": 103.65, "learning_rate": 6.179417475728156e-06, "loss": 0.0375, "step": 266910 }, { "epoch": 103.66, "learning_rate": 6.178899676375405e-06, "loss": 0.0578, "step": 266920 }, { "epoch": 103.66, "learning_rate": 6.178381877022654e-06, "loss": 0.0424, "step": 266930 }, { "epoch": 103.67, "learning_rate": 6.177864077669904e-06, "loss": 0.114, "step": 266940 }, { "epoch": 103.67, "learning_rate": 6.1773462783171525e-06, "loss": 0.0249, "step": 266950 }, { "epoch": 103.67, "learning_rate": 6.176828478964402e-06, "loss": 0.0676, "step": 266960 }, { "epoch": 103.68, "learning_rate": 6.176310679611651e-06, "loss": 0.0262, "step": 266970 }, { "epoch": 103.68, "learning_rate": 6.1757928802589005e-06, "loss": 0.0196, "step": 266980 }, { "epoch": 103.69, "learning_rate": 6.175275080906149e-06, "loss": 0.0009, "step": 266990 }, { "epoch": 103.69, "learning_rate": 6.174757281553399e-06, "loss": 0.0012, "step": 267000 }, { "epoch": 103.69, "learning_rate": 6.174239482200648e-06, "loss": 0.0873, "step": 267010 }, { "epoch": 103.7, "learning_rate": 6.173721682847897e-06, "loss": 0.0407, "step": 267020 }, { "epoch": 103.7, "learning_rate": 6.173203883495146e-06, "loss": 0.0846, "step": 267030 }, { "epoch": 103.7, "learning_rate": 6.172686084142395e-06, "loss": 0.0321, "step": 267040 }, { "epoch": 103.71, "learning_rate": 6.1721682847896445e-06, "loss": 0.0137, "step": 267050 }, { "epoch": 103.71, "learning_rate": 6.171650485436894e-06, "loss": 0.0329, "step": 267060 }, { "epoch": 103.72, "learning_rate": 6.171132686084143e-06, "loss": 0.0003, "step": 267070 }, { "epoch": 103.72, "learning_rate": 6.170614886731392e-06, "loss": 0.0466, "step": 267080 }, { "epoch": 103.72, "learning_rate": 6.170097087378641e-06, "loss": 0.0132, "step": 267090 }, { "epoch": 103.73, "learning_rate": 6.169579288025891e-06, "loss": 0.0003, "step": 267100 }, { "epoch": 103.73, "learning_rate": 6.16906148867314e-06, "loss": 0.039, "step": 267110 }, { "epoch": 103.74, "learning_rate": 6.168543689320388e-06, "loss": 0.076, "step": 267120 }, { "epoch": 103.74, "learning_rate": 6.168025889967638e-06, "loss": 0.0854, "step": 267130 }, { "epoch": 103.74, "learning_rate": 6.167508090614888e-06, "loss": 0.0052, "step": 267140 }, { "epoch": 103.75, "learning_rate": 6.166990291262136e-06, "loss": 0.0942, "step": 267150 }, { "epoch": 103.75, "learning_rate": 6.166472491909385e-06, "loss": 0.0603, "step": 267160 }, { "epoch": 103.76, "learning_rate": 6.165954692556635e-06, "loss": 0.0295, "step": 267170 }, { "epoch": 103.76, "learning_rate": 6.165436893203884e-06, "loss": 0.1152, "step": 267180 }, { "epoch": 103.76, "learning_rate": 6.164919093851132e-06, "loss": 0.0175, "step": 267190 }, { "epoch": 103.77, "learning_rate": 6.164401294498382e-06, "loss": 0.0078, "step": 267200 }, { "epoch": 103.77, "learning_rate": 6.1638834951456316e-06, "loss": 0.001, "step": 267210 }, { "epoch": 103.77, "learning_rate": 6.163365695792881e-06, "loss": 0.1355, "step": 267220 }, { "epoch": 103.78, "learning_rate": 6.162847896440129e-06, "loss": 0.1037, "step": 267230 }, { "epoch": 103.78, "learning_rate": 6.162330097087379e-06, "loss": 0.034, "step": 267240 }, { "epoch": 103.79, "learning_rate": 6.161812297734628e-06, "loss": 0.031, "step": 267250 }, { "epoch": 103.79, "learning_rate": 6.161294498381878e-06, "loss": 0.0018, "step": 267260 }, { "epoch": 103.79, "learning_rate": 6.160776699029126e-06, "loss": 0.0008, "step": 267270 }, { "epoch": 103.8, "learning_rate": 6.1602588996763755e-06, "loss": 0.0283, "step": 267280 }, { "epoch": 103.8, "learning_rate": 6.159741100323625e-06, "loss": 0.0239, "step": 267290 }, { "epoch": 103.81, "learning_rate": 6.159223300970875e-06, "loss": 0.0541, "step": 267300 }, { "epoch": 103.81, "learning_rate": 6.158705501618123e-06, "loss": 0.103, "step": 267310 }, { "epoch": 103.81, "learning_rate": 6.158187702265372e-06, "loss": 0.0132, "step": 267320 }, { "epoch": 103.82, "learning_rate": 6.157669902912622e-06, "loss": 0.0013, "step": 267330 }, { "epoch": 103.82, "learning_rate": 6.1571521035598715e-06, "loss": 0.0215, "step": 267340 }, { "epoch": 103.83, "learning_rate": 6.1566343042071194e-06, "loss": 0.0156, "step": 267350 }, { "epoch": 103.83, "learning_rate": 6.156116504854369e-06, "loss": 0.0717, "step": 267360 }, { "epoch": 103.83, "learning_rate": 6.155598705501619e-06, "loss": 0.0678, "step": 267370 }, { "epoch": 103.84, "learning_rate": 6.155080906148868e-06, "loss": 0.0017, "step": 267380 }, { "epoch": 103.84, "learning_rate": 6.154563106796116e-06, "loss": 0.0133, "step": 267390 }, { "epoch": 103.84, "learning_rate": 6.154045307443366e-06, "loss": 0.008, "step": 267400 }, { "epoch": 103.85, "learning_rate": 6.1535275080906155e-06, "loss": 0.0408, "step": 267410 }, { "epoch": 103.85, "learning_rate": 6.153009708737865e-06, "loss": 0.003, "step": 267420 }, { "epoch": 103.86, "learning_rate": 6.152491909385115e-06, "loss": 0.0003, "step": 267430 }, { "epoch": 103.86, "learning_rate": 6.151974110032363e-06, "loss": 0.0006, "step": 267440 }, { "epoch": 103.86, "learning_rate": 6.151456310679612e-06, "loss": 0.0302, "step": 267450 }, { "epoch": 103.87, "learning_rate": 6.150938511326862e-06, "loss": 0.0019, "step": 267460 }, { "epoch": 103.87, "learning_rate": 6.1504207119741115e-06, "loss": 0.1092, "step": 267470 }, { "epoch": 103.88, "learning_rate": 6.149902912621359e-06, "loss": 0.0109, "step": 267480 }, { "epoch": 103.88, "learning_rate": 6.149385113268609e-06, "loss": 0.0042, "step": 267490 }, { "epoch": 103.88, "learning_rate": 6.148867313915859e-06, "loss": 0.0153, "step": 267500 }, { "epoch": 103.89, "learning_rate": 6.148349514563107e-06, "loss": 0.0308, "step": 267510 }, { "epoch": 103.89, "learning_rate": 6.147831715210356e-06, "loss": 0.078, "step": 267520 }, { "epoch": 103.9, "learning_rate": 6.147313915857606e-06, "loss": 0.0088, "step": 267530 }, { "epoch": 103.9, "learning_rate": 6.146796116504855e-06, "loss": 0.0702, "step": 267540 }, { "epoch": 103.9, "learning_rate": 6.146278317152104e-06, "loss": 0.02, "step": 267550 }, { "epoch": 103.91, "learning_rate": 6.145760517799353e-06, "loss": 0.0227, "step": 267560 }, { "epoch": 103.91, "learning_rate": 6.1452427184466026e-06, "loss": 0.1399, "step": 267570 }, { "epoch": 103.91, "learning_rate": 6.144724919093852e-06, "loss": 0.0578, "step": 267580 }, { "epoch": 103.92, "learning_rate": 6.144207119741101e-06, "loss": 0.1169, "step": 267590 }, { "epoch": 103.92, "learning_rate": 6.14368932038835e-06, "loss": 0.052, "step": 267600 }, { "epoch": 103.93, "learning_rate": 6.143171521035599e-06, "loss": 0.0868, "step": 267610 }, { "epoch": 103.93, "learning_rate": 6.142653721682849e-06, "loss": 0.1188, "step": 267620 }, { "epoch": 103.93, "learning_rate": 6.142135922330098e-06, "loss": 0.0574, "step": 267630 }, { "epoch": 103.94, "learning_rate": 6.1416181229773465e-06, "loss": 0.0194, "step": 267640 }, { "epoch": 103.94, "learning_rate": 6.141100323624596e-06, "loss": 0.0115, "step": 267650 }, { "epoch": 103.95, "learning_rate": 6.140582524271845e-06, "loss": 0.122, "step": 267660 }, { "epoch": 103.95, "learning_rate": 6.1400647249190945e-06, "loss": 0.0216, "step": 267670 }, { "epoch": 103.95, "learning_rate": 6.139546925566343e-06, "loss": 0.0182, "step": 267680 }, { "epoch": 103.96, "learning_rate": 6.139029126213593e-06, "loss": 0.0001, "step": 267690 }, { "epoch": 103.96, "learning_rate": 6.138511326860842e-06, "loss": 0.0764, "step": 267700 }, { "epoch": 103.97, "learning_rate": 6.137993527508091e-06, "loss": 0.1348, "step": 267710 }, { "epoch": 103.97, "learning_rate": 6.13747572815534e-06, "loss": 0.1209, "step": 267720 }, { "epoch": 103.97, "learning_rate": 6.13695792880259e-06, "loss": 0.0626, "step": 267730 }, { "epoch": 103.98, "learning_rate": 6.1364401294498385e-06, "loss": 0.0399, "step": 267740 }, { "epoch": 103.98, "learning_rate": 6.135922330097088e-06, "loss": 0.007, "step": 267750 }, { "epoch": 103.98, "learning_rate": 6.135404530744337e-06, "loss": 0.0003, "step": 267760 }, { "epoch": 103.99, "learning_rate": 6.1348867313915865e-06, "loss": 0.0613, "step": 267770 }, { "epoch": 103.99, "learning_rate": 6.134368932038835e-06, "loss": 0.0017, "step": 267780 }, { "epoch": 104.0, "learning_rate": 6.133851132686085e-06, "loss": 0.0982, "step": 267790 }, { "epoch": 104.0, "learning_rate": 6.133333333333334e-06, "loss": 0.0157, "step": 267800 }, { "epoch": 104.0, "eval_accuracy": 0.9524071526822558, "eval_loss": 0.3646901249885559, "eval_runtime": 8.2362, "eval_samples_per_second": 441.346, "eval_steps_per_second": 55.244, "step": 267800 }, { "epoch": 104.0, "learning_rate": 6.132815533980583e-06, "loss": 0.0003, "step": 267810 }, { "epoch": 104.01, "learning_rate": 6.132297734627832e-06, "loss": 0.0245, "step": 267820 }, { "epoch": 104.01, "learning_rate": 6.131779935275082e-06, "loss": 0.0006, "step": 267830 }, { "epoch": 104.02, "learning_rate": 6.13126213592233e-06, "loss": 0.0102, "step": 267840 }, { "epoch": 104.02, "learning_rate": 6.130744336569579e-06, "loss": 0.0646, "step": 267850 }, { "epoch": 104.02, "learning_rate": 6.130226537216829e-06, "loss": 0.0549, "step": 267860 }, { "epoch": 104.03, "learning_rate": 6.129708737864078e-06, "loss": 0.0832, "step": 267870 }, { "epoch": 104.03, "learning_rate": 6.129190938511327e-06, "loss": 0.0409, "step": 267880 }, { "epoch": 104.03, "learning_rate": 6.128673139158576e-06, "loss": 0.0947, "step": 267890 }, { "epoch": 104.04, "learning_rate": 6.1281553398058256e-06, "loss": 0.0167, "step": 267900 }, { "epoch": 104.04, "learning_rate": 6.127637540453075e-06, "loss": 0.0718, "step": 267910 }, { "epoch": 104.05, "learning_rate": 6.127119741100324e-06, "loss": 0.074, "step": 267920 }, { "epoch": 104.05, "learning_rate": 6.126601941747573e-06, "loss": 0.0537, "step": 267930 }, { "epoch": 104.05, "learning_rate": 6.126084142394822e-06, "loss": 0.0127, "step": 267940 }, { "epoch": 104.06, "learning_rate": 6.125566343042072e-06, "loss": 0.0994, "step": 267950 }, { "epoch": 104.06, "learning_rate": 6.125048543689321e-06, "loss": 0.0019, "step": 267960 }, { "epoch": 104.07, "learning_rate": 6.1245307443365695e-06, "loss": 0.1137, "step": 267970 }, { "epoch": 104.07, "learning_rate": 6.124012944983819e-06, "loss": 0.0628, "step": 267980 }, { "epoch": 104.07, "learning_rate": 6.123495145631069e-06, "loss": 0.0756, "step": 267990 }, { "epoch": 104.08, "learning_rate": 6.122977346278318e-06, "loss": 0.0524, "step": 268000 }, { "epoch": 104.08, "learning_rate": 6.122459546925566e-06, "loss": 0.0585, "step": 268010 }, { "epoch": 104.09, "learning_rate": 6.121941747572816e-06, "loss": 0.0701, "step": 268020 }, { "epoch": 104.09, "learning_rate": 6.1214239482200655e-06, "loss": 0.0101, "step": 268030 }, { "epoch": 104.09, "learning_rate": 6.120906148867315e-06, "loss": 0.0013, "step": 268040 }, { "epoch": 104.1, "learning_rate": 6.120388349514563e-06, "loss": 0.0127, "step": 268050 }, { "epoch": 104.1, "learning_rate": 6.119870550161813e-06, "loss": 0.1034, "step": 268060 }, { "epoch": 104.1, "learning_rate": 6.119352750809062e-06, "loss": 0.0002, "step": 268070 }, { "epoch": 104.11, "learning_rate": 6.118834951456312e-06, "loss": 0.0188, "step": 268080 }, { "epoch": 104.11, "learning_rate": 6.11831715210356e-06, "loss": 0.0002, "step": 268090 }, { "epoch": 104.12, "learning_rate": 6.1177993527508095e-06, "loss": 0.0032, "step": 268100 }, { "epoch": 104.12, "learning_rate": 6.117281553398059e-06, "loss": 0.1262, "step": 268110 }, { "epoch": 104.12, "learning_rate": 6.116763754045309e-06, "loss": 0.0109, "step": 268120 }, { "epoch": 104.13, "learning_rate": 6.116245954692557e-06, "loss": 0.0069, "step": 268130 }, { "epoch": 104.13, "learning_rate": 6.115728155339806e-06, "loss": 0.084, "step": 268140 }, { "epoch": 104.14, "learning_rate": 6.115210355987056e-06, "loss": 0.0152, "step": 268150 }, { "epoch": 104.14, "learning_rate": 6.1146925566343055e-06, "loss": 0.0561, "step": 268160 }, { "epoch": 104.14, "learning_rate": 6.114174757281553e-06, "loss": 0.0036, "step": 268170 }, { "epoch": 104.15, "learning_rate": 6.113656957928803e-06, "loss": 0.072, "step": 268180 }, { "epoch": 104.15, "learning_rate": 6.113139158576053e-06, "loss": 0.0097, "step": 268190 }, { "epoch": 104.16, "learning_rate": 6.112621359223302e-06, "loss": 0.1761, "step": 268200 }, { "epoch": 104.16, "learning_rate": 6.11210355987055e-06, "loss": 0.0627, "step": 268210 }, { "epoch": 104.16, "learning_rate": 6.1115857605178e-06, "loss": 0.092, "step": 268220 }, { "epoch": 104.17, "learning_rate": 6.111067961165049e-06, "loss": 0.031, "step": 268230 }, { "epoch": 104.17, "learning_rate": 6.110550161812299e-06, "loss": 0.0014, "step": 268240 }, { "epoch": 104.17, "learning_rate": 6.110032362459547e-06, "loss": 0.0924, "step": 268250 }, { "epoch": 104.18, "learning_rate": 6.1095145631067966e-06, "loss": 0.024, "step": 268260 }, { "epoch": 104.18, "learning_rate": 6.108996763754046e-06, "loss": 0.0005, "step": 268270 }, { "epoch": 104.19, "learning_rate": 6.108478964401296e-06, "loss": 0.0006, "step": 268280 }, { "epoch": 104.19, "learning_rate": 6.107961165048544e-06, "loss": 0.0813, "step": 268290 }, { "epoch": 104.19, "learning_rate": 6.107443365695793e-06, "loss": 0.0687, "step": 268300 }, { "epoch": 104.2, "learning_rate": 6.106925566343043e-06, "loss": 0.0471, "step": 268310 }, { "epoch": 104.2, "learning_rate": 6.106407766990292e-06, "loss": 0.0014, "step": 268320 }, { "epoch": 104.21, "learning_rate": 6.1058899676375405e-06, "loss": 0.0628, "step": 268330 }, { "epoch": 104.21, "learning_rate": 6.10537216828479e-06, "loss": 0.0967, "step": 268340 }, { "epoch": 104.21, "learning_rate": 6.10485436893204e-06, "loss": 0.0377, "step": 268350 }, { "epoch": 104.22, "learning_rate": 6.1043365695792885e-06, "loss": 0.1229, "step": 268360 }, { "epoch": 104.22, "learning_rate": 6.103818770226537e-06, "loss": 0.0575, "step": 268370 }, { "epoch": 104.23, "learning_rate": 6.103300970873787e-06, "loss": 0.0198, "step": 268380 }, { "epoch": 104.23, "learning_rate": 6.1027831715210365e-06, "loss": 0.1508, "step": 268390 }, { "epoch": 104.23, "learning_rate": 6.102265372168285e-06, "loss": 0.1169, "step": 268400 }, { "epoch": 104.24, "learning_rate": 6.101747572815534e-06, "loss": 0.1612, "step": 268410 }, { "epoch": 104.24, "learning_rate": 6.101229773462784e-06, "loss": 0.0838, "step": 268420 }, { "epoch": 104.24, "learning_rate": 6.100711974110033e-06, "loss": 0.0473, "step": 268430 }, { "epoch": 104.25, "learning_rate": 6.100194174757282e-06, "loss": 0.031, "step": 268440 }, { "epoch": 104.25, "learning_rate": 6.099676375404531e-06, "loss": 0.0432, "step": 268450 }, { "epoch": 104.26, "learning_rate": 6.0991585760517805e-06, "loss": 0.0467, "step": 268460 }, { "epoch": 104.26, "learning_rate": 6.098640776699029e-06, "loss": 0.0351, "step": 268470 }, { "epoch": 104.26, "learning_rate": 6.098122977346279e-06, "loss": 0.0004, "step": 268480 }, { "epoch": 104.27, "learning_rate": 6.097605177993528e-06, "loss": 0.0189, "step": 268490 }, { "epoch": 104.27, "learning_rate": 6.097087378640777e-06, "loss": 0.0009, "step": 268500 }, { "epoch": 104.28, "learning_rate": 6.096569579288026e-06, "loss": 0.0242, "step": 268510 }, { "epoch": 104.28, "learning_rate": 6.096051779935276e-06, "loss": 0.0018, "step": 268520 }, { "epoch": 104.28, "learning_rate": 6.095533980582525e-06, "loss": 0.0342, "step": 268530 }, { "epoch": 104.29, "learning_rate": 6.095016181229774e-06, "loss": 0.0869, "step": 268540 }, { "epoch": 104.29, "learning_rate": 6.094498381877023e-06, "loss": 0.0049, "step": 268550 }, { "epoch": 104.3, "learning_rate": 6.093980582524272e-06, "loss": 0.1104, "step": 268560 }, { "epoch": 104.3, "learning_rate": 6.093462783171522e-06, "loss": 0.0338, "step": 268570 }, { "epoch": 104.3, "learning_rate": 6.092944983818771e-06, "loss": 0.1001, "step": 268580 }, { "epoch": 104.31, "learning_rate": 6.0924271844660196e-06, "loss": 0.0393, "step": 268590 }, { "epoch": 104.31, "learning_rate": 6.091909385113269e-06, "loss": 0.0612, "step": 268600 }, { "epoch": 104.31, "learning_rate": 6.091391585760519e-06, "loss": 0.0667, "step": 268610 }, { "epoch": 104.32, "learning_rate": 6.0908737864077676e-06, "loss": 0.0224, "step": 268620 }, { "epoch": 104.32, "learning_rate": 6.090355987055016e-06, "loss": 0.1132, "step": 268630 }, { "epoch": 104.33, "learning_rate": 6.089838187702266e-06, "loss": 0.0145, "step": 268640 }, { "epoch": 104.33, "learning_rate": 6.089320388349516e-06, "loss": 0.0001, "step": 268650 }, { "epoch": 104.33, "learning_rate": 6.0888025889967635e-06, "loss": 0.0631, "step": 268660 }, { "epoch": 104.34, "learning_rate": 6.088284789644013e-06, "loss": 0.0137, "step": 268670 }, { "epoch": 104.34, "learning_rate": 6.087766990291263e-06, "loss": 0.0291, "step": 268680 }, { "epoch": 104.35, "learning_rate": 6.087249190938512e-06, "loss": 0.0918, "step": 268690 }, { "epoch": 104.35, "learning_rate": 6.08673139158576e-06, "loss": 0.0774, "step": 268700 }, { "epoch": 104.35, "learning_rate": 6.08621359223301e-06, "loss": 0.0225, "step": 268710 }, { "epoch": 104.36, "learning_rate": 6.0856957928802595e-06, "loss": 0.0912, "step": 268720 }, { "epoch": 104.36, "learning_rate": 6.085177993527509e-06, "loss": 0.0006, "step": 268730 }, { "epoch": 104.37, "learning_rate": 6.084660194174757e-06, "loss": 0.1603, "step": 268740 }, { "epoch": 104.37, "learning_rate": 6.084142394822007e-06, "loss": 0.029, "step": 268750 }, { "epoch": 104.37, "learning_rate": 6.083624595469256e-06, "loss": 0.0139, "step": 268760 }, { "epoch": 104.38, "learning_rate": 6.083106796116506e-06, "loss": 0.0131, "step": 268770 }, { "epoch": 104.38, "learning_rate": 6.082588996763754e-06, "loss": 0.0278, "step": 268780 }, { "epoch": 104.38, "learning_rate": 6.0820711974110035e-06, "loss": 0.0676, "step": 268790 }, { "epoch": 104.39, "learning_rate": 6.081553398058253e-06, "loss": 0.0346, "step": 268800 }, { "epoch": 104.39, "learning_rate": 6.081035598705503e-06, "loss": 0.1286, "step": 268810 }, { "epoch": 104.4, "learning_rate": 6.080517799352751e-06, "loss": 0.0223, "step": 268820 }, { "epoch": 104.4, "learning_rate": 6.08e-06, "loss": 0.0001, "step": 268830 }, { "epoch": 104.4, "learning_rate": 6.07948220064725e-06, "loss": 0.026, "step": 268840 }, { "epoch": 104.41, "learning_rate": 6.0789644012944995e-06, "loss": 0.0003, "step": 268850 }, { "epoch": 104.41, "learning_rate": 6.078446601941747e-06, "loss": 0.1449, "step": 268860 }, { "epoch": 104.42, "learning_rate": 6.077928802588997e-06, "loss": 0.1069, "step": 268870 }, { "epoch": 104.42, "learning_rate": 6.077411003236247e-06, "loss": 0.0004, "step": 268880 }, { "epoch": 104.42, "learning_rate": 6.076893203883496e-06, "loss": 0.0576, "step": 268890 }, { "epoch": 104.43, "learning_rate": 6.076375404530744e-06, "loss": 0.0002, "step": 268900 }, { "epoch": 104.43, "learning_rate": 6.075857605177994e-06, "loss": 0.0134, "step": 268910 }, { "epoch": 104.43, "learning_rate": 6.075339805825243e-06, "loss": 0.0346, "step": 268920 }, { "epoch": 104.44, "learning_rate": 6.074822006472493e-06, "loss": 0.0705, "step": 268930 }, { "epoch": 104.44, "learning_rate": 6.074304207119741e-06, "loss": 0.0755, "step": 268940 }, { "epoch": 104.45, "learning_rate": 6.0737864077669906e-06, "loss": 0.0001, "step": 268950 }, { "epoch": 104.45, "learning_rate": 6.07326860841424e-06, "loss": 0.0545, "step": 268960 }, { "epoch": 104.45, "learning_rate": 6.07275080906149e-06, "loss": 0.0069, "step": 268970 }, { "epoch": 104.46, "learning_rate": 6.072233009708738e-06, "loss": 0.0157, "step": 268980 }, { "epoch": 104.46, "learning_rate": 6.071715210355987e-06, "loss": 0.031, "step": 268990 }, { "epoch": 104.47, "learning_rate": 6.071197411003237e-06, "loss": 0.119, "step": 269000 }, { "epoch": 104.47, "learning_rate": 6.070679611650487e-06, "loss": 0.1231, "step": 269010 }, { "epoch": 104.47, "learning_rate": 6.0701618122977345e-06, "loss": 0.0721, "step": 269020 }, { "epoch": 104.48, "learning_rate": 6.069644012944984e-06, "loss": 0.0351, "step": 269030 }, { "epoch": 104.48, "learning_rate": 6.069126213592234e-06, "loss": 0.0268, "step": 269040 }, { "epoch": 104.49, "learning_rate": 6.068608414239483e-06, "loss": 0.0126, "step": 269050 }, { "epoch": 104.49, "learning_rate": 6.068090614886731e-06, "loss": 0.0281, "step": 269060 }, { "epoch": 104.49, "learning_rate": 6.067572815533981e-06, "loss": 0.0105, "step": 269070 }, { "epoch": 104.5, "learning_rate": 6.0670550161812305e-06, "loss": 0.1915, "step": 269080 }, { "epoch": 104.5, "learning_rate": 6.06653721682848e-06, "loss": 0.0002, "step": 269090 }, { "epoch": 104.5, "learning_rate": 6.066019417475729e-06, "loss": 0.0522, "step": 269100 }, { "epoch": 104.51, "learning_rate": 6.065501618122978e-06, "loss": 0.0284, "step": 269110 }, { "epoch": 104.51, "learning_rate": 6.064983818770227e-06, "loss": 0.0051, "step": 269120 }, { "epoch": 104.52, "learning_rate": 6.064466019417476e-06, "loss": 0.003, "step": 269130 }, { "epoch": 104.52, "learning_rate": 6.063948220064726e-06, "loss": 0.0448, "step": 269140 }, { "epoch": 104.52, "learning_rate": 6.0634304207119745e-06, "loss": 0.21, "step": 269150 }, { "epoch": 104.53, "learning_rate": 6.062912621359224e-06, "loss": 0.0845, "step": 269160 }, { "epoch": 104.53, "learning_rate": 6.062394822006473e-06, "loss": 0.0148, "step": 269170 }, { "epoch": 104.54, "learning_rate": 6.0618770226537225e-06, "loss": 0.0479, "step": 269180 }, { "epoch": 104.54, "learning_rate": 6.061359223300971e-06, "loss": 0.0297, "step": 269190 }, { "epoch": 104.54, "learning_rate": 6.060841423948221e-06, "loss": 0.0187, "step": 269200 }, { "epoch": 104.55, "learning_rate": 6.06032362459547e-06, "loss": 0.1585, "step": 269210 }, { "epoch": 104.55, "learning_rate": 6.059805825242719e-06, "loss": 0.0206, "step": 269220 }, { "epoch": 104.56, "learning_rate": 6.059288025889968e-06, "loss": 0.1669, "step": 269230 }, { "epoch": 104.56, "learning_rate": 6.058770226537218e-06, "loss": 0.0258, "step": 269240 }, { "epoch": 104.56, "learning_rate": 6.058252427184466e-06, "loss": 0.0225, "step": 269250 }, { "epoch": 104.57, "learning_rate": 6.057734627831716e-06, "loss": 0.1364, "step": 269260 }, { "epoch": 104.57, "learning_rate": 6.057216828478965e-06, "loss": 0.0076, "step": 269270 }, { "epoch": 104.57, "learning_rate": 6.056699029126214e-06, "loss": 0.0295, "step": 269280 }, { "epoch": 104.58, "learning_rate": 6.056181229773463e-06, "loss": 0.1768, "step": 269290 }, { "epoch": 104.58, "learning_rate": 6.055663430420713e-06, "loss": 0.0659, "step": 269300 }, { "epoch": 104.59, "learning_rate": 6.0551456310679616e-06, "loss": 0.0286, "step": 269310 }, { "epoch": 104.59, "learning_rate": 6.05462783171521e-06, "loss": 0.0096, "step": 269320 }, { "epoch": 104.59, "learning_rate": 6.05411003236246e-06, "loss": 0.1171, "step": 269330 }, { "epoch": 104.6, "learning_rate": 6.05359223300971e-06, "loss": 0.058, "step": 269340 }, { "epoch": 104.6, "learning_rate": 6.053074433656958e-06, "loss": 0.0981, "step": 269350 }, { "epoch": 104.61, "learning_rate": 6.052556634304207e-06, "loss": 0.0468, "step": 269360 }, { "epoch": 104.61, "learning_rate": 6.052038834951457e-06, "loss": 0.1009, "step": 269370 }, { "epoch": 104.61, "learning_rate": 6.051521035598706e-06, "loss": 0.0693, "step": 269380 }, { "epoch": 104.62, "learning_rate": 6.051003236245955e-06, "loss": 0.0743, "step": 269390 }, { "epoch": 104.62, "learning_rate": 6.050485436893204e-06, "loss": 0.0001, "step": 269400 }, { "epoch": 104.63, "learning_rate": 6.0499676375404535e-06, "loss": 0.0304, "step": 269410 }, { "epoch": 104.63, "learning_rate": 6.049449838187703e-06, "loss": 0.0011, "step": 269420 }, { "epoch": 104.63, "learning_rate": 6.048932038834952e-06, "loss": 0.0164, "step": 269430 }, { "epoch": 104.64, "learning_rate": 6.048414239482201e-06, "loss": 0.0417, "step": 269440 }, { "epoch": 104.64, "learning_rate": 6.04789644012945e-06, "loss": 0.0065, "step": 269450 }, { "epoch": 104.64, "learning_rate": 6.0473786407767e-06, "loss": 0.0256, "step": 269460 }, { "epoch": 104.65, "learning_rate": 6.046860841423948e-06, "loss": 0.0324, "step": 269470 }, { "epoch": 104.65, "learning_rate": 6.0463430420711975e-06, "loss": 0.0325, "step": 269480 }, { "epoch": 104.66, "learning_rate": 6.045825242718447e-06, "loss": 0.1281, "step": 269490 }, { "epoch": 104.66, "learning_rate": 6.045307443365697e-06, "loss": 0.1012, "step": 269500 }, { "epoch": 104.66, "learning_rate": 6.044789644012945e-06, "loss": 0.0005, "step": 269510 }, { "epoch": 104.67, "learning_rate": 6.044271844660194e-06, "loss": 0.0269, "step": 269520 }, { "epoch": 104.67, "learning_rate": 6.043754045307444e-06, "loss": 0.0152, "step": 269530 }, { "epoch": 104.68, "learning_rate": 6.0432362459546935e-06, "loss": 0.0217, "step": 269540 }, { "epoch": 104.68, "learning_rate": 6.042718446601941e-06, "loss": 0.0669, "step": 269550 }, { "epoch": 104.68, "learning_rate": 6.042200647249191e-06, "loss": 0.0047, "step": 269560 }, { "epoch": 104.69, "learning_rate": 6.041682847896441e-06, "loss": 0.0012, "step": 269570 }, { "epoch": 104.69, "learning_rate": 6.04116504854369e-06, "loss": 0.0502, "step": 269580 }, { "epoch": 104.7, "learning_rate": 6.040647249190938e-06, "loss": 0.0895, "step": 269590 }, { "epoch": 104.7, "learning_rate": 6.040129449838188e-06, "loss": 0.0814, "step": 269600 }, { "epoch": 104.7, "learning_rate": 6.039611650485437e-06, "loss": 0.0201, "step": 269610 }, { "epoch": 104.71, "learning_rate": 6.039093851132687e-06, "loss": 0.0133, "step": 269620 }, { "epoch": 104.71, "learning_rate": 6.038576051779935e-06, "loss": 0.0716, "step": 269630 }, { "epoch": 104.71, "learning_rate": 6.0380582524271846e-06, "loss": 0.0835, "step": 269640 }, { "epoch": 104.72, "learning_rate": 6.037540453074434e-06, "loss": 0.0299, "step": 269650 }, { "epoch": 104.72, "learning_rate": 6.037022653721684e-06, "loss": 0.0575, "step": 269660 }, { "epoch": 104.73, "learning_rate": 6.036504854368933e-06, "loss": 0.1117, "step": 269670 }, { "epoch": 104.73, "learning_rate": 6.035987055016181e-06, "loss": 0.0413, "step": 269680 }, { "epoch": 104.73, "learning_rate": 6.035469255663431e-06, "loss": 0.0157, "step": 269690 }, { "epoch": 104.74, "learning_rate": 6.034951456310681e-06, "loss": 0.0193, "step": 269700 }, { "epoch": 104.74, "learning_rate": 6.03443365695793e-06, "loss": 0.0114, "step": 269710 }, { "epoch": 104.75, "learning_rate": 6.033915857605178e-06, "loss": 0.0409, "step": 269720 }, { "epoch": 104.75, "learning_rate": 6.033398058252428e-06, "loss": 0.0337, "step": 269730 }, { "epoch": 104.75, "learning_rate": 6.032880258899677e-06, "loss": 0.0526, "step": 269740 }, { "epoch": 104.76, "learning_rate": 6.032362459546927e-06, "loss": 0.0171, "step": 269750 }, { "epoch": 104.76, "learning_rate": 6.031844660194175e-06, "loss": 0.0075, "step": 269760 }, { "epoch": 104.77, "learning_rate": 6.0313268608414245e-06, "loss": 0.0951, "step": 269770 }, { "epoch": 104.77, "learning_rate": 6.030809061488674e-06, "loss": 0.0903, "step": 269780 }, { "epoch": 104.77, "learning_rate": 6.030291262135923e-06, "loss": 0.0346, "step": 269790 }, { "epoch": 104.78, "learning_rate": 6.029773462783172e-06, "loss": 0.009, "step": 269800 }, { "epoch": 104.78, "learning_rate": 6.029255663430421e-06, "loss": 0.0765, "step": 269810 }, { "epoch": 104.78, "learning_rate": 6.028737864077671e-06, "loss": 0.0485, "step": 269820 }, { "epoch": 104.79, "learning_rate": 6.02822006472492e-06, "loss": 0.1918, "step": 269830 }, { "epoch": 104.79, "learning_rate": 6.0277022653721685e-06, "loss": 0.0007, "step": 269840 }, { "epoch": 104.8, "learning_rate": 6.027184466019418e-06, "loss": 0.0103, "step": 269850 }, { "epoch": 104.8, "learning_rate": 6.026666666666668e-06, "loss": 0.0211, "step": 269860 }, { "epoch": 104.8, "learning_rate": 6.0261488673139165e-06, "loss": 0.1003, "step": 269870 }, { "epoch": 104.81, "learning_rate": 6.025631067961165e-06, "loss": 0.0441, "step": 269880 }, { "epoch": 104.81, "learning_rate": 6.025113268608415e-06, "loss": 0.0006, "step": 269890 }, { "epoch": 104.82, "learning_rate": 6.0245954692556645e-06, "loss": 0.0205, "step": 269900 }, { "epoch": 104.82, "learning_rate": 6.024077669902913e-06, "loss": 0.0206, "step": 269910 }, { "epoch": 104.82, "learning_rate": 6.023559870550162e-06, "loss": 0.0288, "step": 269920 }, { "epoch": 104.83, "learning_rate": 6.023042071197412e-06, "loss": 0.0003, "step": 269930 }, { "epoch": 104.83, "learning_rate": 6.02252427184466e-06, "loss": 0.0109, "step": 269940 }, { "epoch": 104.83, "learning_rate": 6.02200647249191e-06, "loss": 0.0863, "step": 269950 }, { "epoch": 104.84, "learning_rate": 6.021488673139159e-06, "loss": 0.0008, "step": 269960 }, { "epoch": 104.84, "learning_rate": 6.020970873786408e-06, "loss": 0.0978, "step": 269970 }, { "epoch": 104.85, "learning_rate": 6.020453074433657e-06, "loss": 0.0231, "step": 269980 }, { "epoch": 104.85, "learning_rate": 6.019935275080907e-06, "loss": 0.0428, "step": 269990 }, { "epoch": 104.85, "learning_rate": 6.0194174757281556e-06, "loss": 0.039, "step": 270000 }, { "epoch": 104.86, "learning_rate": 6.018899676375405e-06, "loss": 0.0337, "step": 270010 }, { "epoch": 104.86, "learning_rate": 6.018381877022654e-06, "loss": 0.0373, "step": 270020 }, { "epoch": 104.87, "learning_rate": 6.0178640776699036e-06, "loss": 0.1072, "step": 270030 }, { "epoch": 104.87, "learning_rate": 6.017346278317152e-06, "loss": 0.0472, "step": 270040 }, { "epoch": 104.87, "learning_rate": 6.016828478964402e-06, "loss": 0.039, "step": 270050 }, { "epoch": 104.88, "learning_rate": 6.016310679611651e-06, "loss": 0.0234, "step": 270060 }, { "epoch": 104.88, "learning_rate": 6.0157928802589e-06, "loss": 0.133, "step": 270070 }, { "epoch": 104.89, "learning_rate": 6.015275080906149e-06, "loss": 0.0156, "step": 270080 }, { "epoch": 104.89, "learning_rate": 6.014757281553399e-06, "loss": 0.1553, "step": 270090 }, { "epoch": 104.89, "learning_rate": 6.0142394822006475e-06, "loss": 0.0503, "step": 270100 }, { "epoch": 104.9, "learning_rate": 6.013721682847897e-06, "loss": 0.0001, "step": 270110 }, { "epoch": 104.9, "learning_rate": 6.013203883495146e-06, "loss": 0.0001, "step": 270120 }, { "epoch": 104.9, "learning_rate": 6.012686084142395e-06, "loss": 0.0051, "step": 270130 }, { "epoch": 104.91, "learning_rate": 6.012168284789644e-06, "loss": 0.0116, "step": 270140 }, { "epoch": 104.91, "learning_rate": 6.011650485436894e-06, "loss": 0.0536, "step": 270150 }, { "epoch": 104.92, "learning_rate": 6.011132686084143e-06, "loss": 0.0404, "step": 270160 }, { "epoch": 104.92, "learning_rate": 6.0106148867313915e-06, "loss": 0.0428, "step": 270170 }, { "epoch": 104.92, "learning_rate": 6.010097087378641e-06, "loss": 0.0201, "step": 270180 }, { "epoch": 104.93, "learning_rate": 6.009579288025891e-06, "loss": 0.0663, "step": 270190 }, { "epoch": 104.93, "learning_rate": 6.0090614886731395e-06, "loss": 0.0001, "step": 270200 }, { "epoch": 104.94, "learning_rate": 6.008543689320388e-06, "loss": 0.0015, "step": 270210 }, { "epoch": 104.94, "learning_rate": 6.008025889967638e-06, "loss": 0.1012, "step": 270220 }, { "epoch": 104.94, "learning_rate": 6.0075080906148875e-06, "loss": 0.1549, "step": 270230 }, { "epoch": 104.95, "learning_rate": 6.006990291262137e-06, "loss": 0.0453, "step": 270240 }, { "epoch": 104.95, "learning_rate": 6.006472491909385e-06, "loss": 0.0104, "step": 270250 }, { "epoch": 104.96, "learning_rate": 6.005954692556635e-06, "loss": 0.0886, "step": 270260 }, { "epoch": 104.96, "learning_rate": 6.005436893203884e-06, "loss": 0.0421, "step": 270270 }, { "epoch": 104.96, "learning_rate": 6.004919093851134e-06, "loss": 0.0075, "step": 270280 }, { "epoch": 104.97, "learning_rate": 6.004401294498382e-06, "loss": 0.1413, "step": 270290 }, { "epoch": 104.97, "learning_rate": 6.003883495145631e-06, "loss": 0.0768, "step": 270300 }, { "epoch": 104.97, "learning_rate": 6.003365695792881e-06, "loss": 0.0026, "step": 270310 }, { "epoch": 104.98, "learning_rate": 6.002847896440131e-06, "loss": 0.0008, "step": 270320 }, { "epoch": 104.98, "learning_rate": 6.0023300970873786e-06, "loss": 0.0548, "step": 270330 }, { "epoch": 104.99, "learning_rate": 6.001812297734628e-06, "loss": 0.1933, "step": 270340 }, { "epoch": 104.99, "learning_rate": 6.001294498381878e-06, "loss": 0.0104, "step": 270350 }, { "epoch": 104.99, "learning_rate": 6.000776699029127e-06, "loss": 0.0119, "step": 270360 }, { "epoch": 105.0, "learning_rate": 6.000258899676375e-06, "loss": 0.0125, "step": 270370 }, { "epoch": 105.0, "eval_accuracy": 0.9485557083906465, "eval_loss": 0.3993532061576843, "eval_runtime": 8.1978, "eval_samples_per_second": 443.411, "eval_steps_per_second": 55.503, "step": 270375 }, { "epoch": 105.0, "learning_rate": 5.999741100323625e-06, "loss": 0.0015, "step": 270380 }, { "epoch": 105.01, "learning_rate": 5.999223300970875e-06, "loss": 0.0724, "step": 270390 }, { "epoch": 105.01, "learning_rate": 5.998705501618124e-06, "loss": 0.0099, "step": 270400 }, { "epoch": 105.01, "learning_rate": 5.998187702265372e-06, "loss": 0.0693, "step": 270410 }, { "epoch": 105.02, "learning_rate": 5.997669902912622e-06, "loss": 0.1983, "step": 270420 }, { "epoch": 105.02, "learning_rate": 5.997152103559871e-06, "loss": 0.0094, "step": 270430 }, { "epoch": 105.03, "learning_rate": 5.996634304207121e-06, "loss": 0.0005, "step": 270440 }, { "epoch": 105.03, "learning_rate": 5.996116504854369e-06, "loss": 0.0333, "step": 270450 }, { "epoch": 105.03, "learning_rate": 5.9955987055016185e-06, "loss": 0.0833, "step": 270460 }, { "epoch": 105.04, "learning_rate": 5.995080906148868e-06, "loss": 0.049, "step": 270470 }, { "epoch": 105.04, "learning_rate": 5.994563106796118e-06, "loss": 0.1508, "step": 270480 }, { "epoch": 105.04, "learning_rate": 5.994045307443366e-06, "loss": 0.0012, "step": 270490 }, { "epoch": 105.05, "learning_rate": 5.993527508090615e-06, "loss": 0.0438, "step": 270500 }, { "epoch": 105.05, "learning_rate": 5.993009708737865e-06, "loss": 0.0173, "step": 270510 }, { "epoch": 105.06, "learning_rate": 5.9924919093851145e-06, "loss": 0.0662, "step": 270520 }, { "epoch": 105.06, "learning_rate": 5.9919741100323625e-06, "loss": 0.0251, "step": 270530 }, { "epoch": 105.06, "learning_rate": 5.991456310679612e-06, "loss": 0.0108, "step": 270540 }, { "epoch": 105.07, "learning_rate": 5.990938511326862e-06, "loss": 0.2184, "step": 270550 }, { "epoch": 105.07, "learning_rate": 5.990420711974111e-06, "loss": 0.0107, "step": 270560 }, { "epoch": 105.08, "learning_rate": 5.989902912621359e-06, "loss": 0.0153, "step": 270570 }, { "epoch": 105.08, "learning_rate": 5.989385113268609e-06, "loss": 0.0742, "step": 270580 }, { "epoch": 105.08, "learning_rate": 5.9888673139158585e-06, "loss": 0.0103, "step": 270590 }, { "epoch": 105.09, "learning_rate": 5.988349514563107e-06, "loss": 0.0079, "step": 270600 }, { "epoch": 105.09, "learning_rate": 5.987831715210356e-06, "loss": 0.148, "step": 270610 }, { "epoch": 105.1, "learning_rate": 5.987313915857606e-06, "loss": 0.0589, "step": 270620 }, { "epoch": 105.1, "learning_rate": 5.986796116504855e-06, "loss": 0.0163, "step": 270630 }, { "epoch": 105.1, "learning_rate": 5.986278317152104e-06, "loss": 0.0448, "step": 270640 }, { "epoch": 105.11, "learning_rate": 5.985760517799353e-06, "loss": 0.1316, "step": 270650 }, { "epoch": 105.11, "learning_rate": 5.985242718446602e-06, "loss": 0.1383, "step": 270660 }, { "epoch": 105.11, "learning_rate": 5.984724919093852e-06, "loss": 0.0251, "step": 270670 }, { "epoch": 105.12, "learning_rate": 5.984207119741101e-06, "loss": 0.0178, "step": 270680 }, { "epoch": 105.12, "learning_rate": 5.9836893203883496e-06, "loss": 0.0083, "step": 270690 }, { "epoch": 105.13, "learning_rate": 5.983171521035599e-06, "loss": 0.0196, "step": 270700 }, { "epoch": 105.13, "learning_rate": 5.982653721682849e-06, "loss": 0.0414, "step": 270710 }, { "epoch": 105.13, "learning_rate": 5.9821359223300976e-06, "loss": 0.1205, "step": 270720 }, { "epoch": 105.14, "learning_rate": 5.981618122977346e-06, "loss": 0.0864, "step": 270730 }, { "epoch": 105.14, "learning_rate": 5.981100323624596e-06, "loss": 0.0264, "step": 270740 }, { "epoch": 105.15, "learning_rate": 5.980582524271845e-06, "loss": 0.0285, "step": 270750 }, { "epoch": 105.15, "learning_rate": 5.980064724919094e-06, "loss": 0.0002, "step": 270760 }, { "epoch": 105.15, "learning_rate": 5.979546925566344e-06, "loss": 0.0539, "step": 270770 }, { "epoch": 105.16, "learning_rate": 5.979029126213593e-06, "loss": 0.0475, "step": 270780 }, { "epoch": 105.16, "learning_rate": 5.9785113268608415e-06, "loss": 0.0084, "step": 270790 }, { "epoch": 105.17, "learning_rate": 5.977993527508091e-06, "loss": 0.0002, "step": 270800 }, { "epoch": 105.17, "learning_rate": 5.977475728155341e-06, "loss": 0.0116, "step": 270810 }, { "epoch": 105.17, "learning_rate": 5.9769579288025895e-06, "loss": 0.0298, "step": 270820 }, { "epoch": 105.18, "learning_rate": 5.976440129449838e-06, "loss": 0.0722, "step": 270830 }, { "epoch": 105.18, "learning_rate": 5.975922330097088e-06, "loss": 0.0825, "step": 270840 }, { "epoch": 105.18, "learning_rate": 5.9754045307443375e-06, "loss": 0.0294, "step": 270850 }, { "epoch": 105.19, "learning_rate": 5.974886731391586e-06, "loss": 0.1402, "step": 270860 }, { "epoch": 105.19, "learning_rate": 5.974368932038835e-06, "loss": 0.0156, "step": 270870 }, { "epoch": 105.2, "learning_rate": 5.973851132686085e-06, "loss": 0.0822, "step": 270880 }, { "epoch": 105.2, "learning_rate": 5.973333333333334e-06, "loss": 0.0227, "step": 270890 }, { "epoch": 105.2, "learning_rate": 5.972815533980583e-06, "loss": 0.0029, "step": 270900 }, { "epoch": 105.21, "learning_rate": 5.972297734627832e-06, "loss": 0.1005, "step": 270910 }, { "epoch": 105.21, "learning_rate": 5.9717799352750815e-06, "loss": 0.0476, "step": 270920 }, { "epoch": 105.22, "learning_rate": 5.971262135922331e-06, "loss": 0.0648, "step": 270930 }, { "epoch": 105.22, "learning_rate": 5.970744336569579e-06, "loss": 0.048, "step": 270940 }, { "epoch": 105.22, "learning_rate": 5.970226537216829e-06, "loss": 0.0043, "step": 270950 }, { "epoch": 105.23, "learning_rate": 5.969708737864078e-06, "loss": 0.0399, "step": 270960 }, { "epoch": 105.23, "learning_rate": 5.969190938511328e-06, "loss": 0.0639, "step": 270970 }, { "epoch": 105.23, "learning_rate": 5.968673139158576e-06, "loss": 0.1068, "step": 270980 }, { "epoch": 105.24, "learning_rate": 5.968155339805825e-06, "loss": 0.0916, "step": 270990 }, { "epoch": 105.24, "learning_rate": 5.967637540453075e-06, "loss": 0.0439, "step": 271000 }, { "epoch": 105.25, "learning_rate": 5.967119741100325e-06, "loss": 0.1391, "step": 271010 }, { "epoch": 105.25, "learning_rate": 5.9666019417475726e-06, "loss": 0.0145, "step": 271020 }, { "epoch": 105.25, "learning_rate": 5.966084142394822e-06, "loss": 0.0004, "step": 271030 }, { "epoch": 105.26, "learning_rate": 5.965566343042072e-06, "loss": 0.0007, "step": 271040 }, { "epoch": 105.26, "learning_rate": 5.965048543689321e-06, "loss": 0.0113, "step": 271050 }, { "epoch": 105.27, "learning_rate": 5.964530744336569e-06, "loss": 0.0044, "step": 271060 }, { "epoch": 105.27, "learning_rate": 5.964012944983819e-06, "loss": 0.0829, "step": 271070 }, { "epoch": 105.27, "learning_rate": 5.963495145631069e-06, "loss": 0.0572, "step": 271080 }, { "epoch": 105.28, "learning_rate": 5.962977346278318e-06, "loss": 0.1516, "step": 271090 }, { "epoch": 105.28, "learning_rate": 5.962459546925566e-06, "loss": 0.0617, "step": 271100 }, { "epoch": 105.29, "learning_rate": 5.961941747572816e-06, "loss": 0.0235, "step": 271110 }, { "epoch": 105.29, "learning_rate": 5.961423948220065e-06, "loss": 0.0089, "step": 271120 }, { "epoch": 105.29, "learning_rate": 5.960906148867315e-06, "loss": 0.0599, "step": 271130 }, { "epoch": 105.3, "learning_rate": 5.960388349514563e-06, "loss": 0.0769, "step": 271140 }, { "epoch": 105.3, "learning_rate": 5.9598705501618125e-06, "loss": 0.0143, "step": 271150 }, { "epoch": 105.3, "learning_rate": 5.959352750809062e-06, "loss": 0.0179, "step": 271160 }, { "epoch": 105.31, "learning_rate": 5.958834951456312e-06, "loss": 0.0179, "step": 271170 }, { "epoch": 105.31, "learning_rate": 5.95831715210356e-06, "loss": 0.099, "step": 271180 }, { "epoch": 105.32, "learning_rate": 5.957799352750809e-06, "loss": 0.0748, "step": 271190 }, { "epoch": 105.32, "learning_rate": 5.957281553398059e-06, "loss": 0.022, "step": 271200 }, { "epoch": 105.32, "learning_rate": 5.9567637540453085e-06, "loss": 0.0004, "step": 271210 }, { "epoch": 105.33, "learning_rate": 5.9562459546925565e-06, "loss": 0.0283, "step": 271220 }, { "epoch": 105.33, "learning_rate": 5.955728155339806e-06, "loss": 0.0993, "step": 271230 }, { "epoch": 105.34, "learning_rate": 5.955210355987056e-06, "loss": 0.0539, "step": 271240 }, { "epoch": 105.34, "learning_rate": 5.954692556634305e-06, "loss": 0.0199, "step": 271250 }, { "epoch": 105.34, "learning_rate": 5.954174757281553e-06, "loss": 0.126, "step": 271260 }, { "epoch": 105.35, "learning_rate": 5.953656957928803e-06, "loss": 0.0762, "step": 271270 }, { "epoch": 105.35, "learning_rate": 5.9531391585760525e-06, "loss": 0.0281, "step": 271280 }, { "epoch": 105.36, "learning_rate": 5.952621359223302e-06, "loss": 0.0268, "step": 271290 }, { "epoch": 105.36, "learning_rate": 5.95210355987055e-06, "loss": 0.0462, "step": 271300 }, { "epoch": 105.36, "learning_rate": 5.9515857605178e-06, "loss": 0.0201, "step": 271310 }, { "epoch": 105.37, "learning_rate": 5.951067961165049e-06, "loss": 0.0478, "step": 271320 }, { "epoch": 105.37, "learning_rate": 5.950550161812299e-06, "loss": 0.0017, "step": 271330 }, { "epoch": 105.37, "learning_rate": 5.950032362459548e-06, "loss": 0.0491, "step": 271340 }, { "epoch": 105.38, "learning_rate": 5.949514563106796e-06, "loss": 0.0007, "step": 271350 }, { "epoch": 105.38, "learning_rate": 5.948996763754046e-06, "loss": 0.0203, "step": 271360 }, { "epoch": 105.39, "learning_rate": 5.948478964401296e-06, "loss": 0.0087, "step": 271370 }, { "epoch": 105.39, "learning_rate": 5.947961165048544e-06, "loss": 0.0005, "step": 271380 }, { "epoch": 105.39, "learning_rate": 5.947443365695793e-06, "loss": 0.0149, "step": 271390 }, { "epoch": 105.4, "learning_rate": 5.946925566343043e-06, "loss": 0.004, "step": 271400 }, { "epoch": 105.4, "learning_rate": 5.9464077669902916e-06, "loss": 0.0001, "step": 271410 }, { "epoch": 105.41, "learning_rate": 5.945889967637541e-06, "loss": 0.0533, "step": 271420 }, { "epoch": 105.41, "learning_rate": 5.94537216828479e-06, "loss": 0.0327, "step": 271430 }, { "epoch": 105.41, "learning_rate": 5.94485436893204e-06, "loss": 0.0036, "step": 271440 }, { "epoch": 105.42, "learning_rate": 5.944336569579288e-06, "loss": 0.144, "step": 271450 }, { "epoch": 105.42, "learning_rate": 5.943818770226538e-06, "loss": 0.0671, "step": 271460 }, { "epoch": 105.43, "learning_rate": 5.943300970873787e-06, "loss": 0.1067, "step": 271470 }, { "epoch": 105.43, "learning_rate": 5.942783171521036e-06, "loss": 0.1026, "step": 271480 }, { "epoch": 105.43, "learning_rate": 5.942265372168285e-06, "loss": 0.0557, "step": 271490 }, { "epoch": 105.44, "learning_rate": 5.941747572815535e-06, "loss": 0.3014, "step": 271500 }, { "epoch": 105.44, "learning_rate": 5.9412297734627835e-06, "loss": 0.0746, "step": 271510 }, { "epoch": 105.44, "learning_rate": 5.940711974110033e-06, "loss": 0.0381, "step": 271520 }, { "epoch": 105.45, "learning_rate": 5.940194174757282e-06, "loss": 0.1234, "step": 271530 }, { "epoch": 105.45, "learning_rate": 5.9396763754045315e-06, "loss": 0.0197, "step": 271540 }, { "epoch": 105.46, "learning_rate": 5.93915857605178e-06, "loss": 0.0143, "step": 271550 }, { "epoch": 105.46, "learning_rate": 5.93864077669903e-06, "loss": 0.0858, "step": 271560 }, { "epoch": 105.46, "learning_rate": 5.938122977346279e-06, "loss": 0.0287, "step": 271570 }, { "epoch": 105.47, "learning_rate": 5.937605177993528e-06, "loss": 0.0866, "step": 271580 }, { "epoch": 105.47, "learning_rate": 5.937087378640777e-06, "loss": 0.0006, "step": 271590 }, { "epoch": 105.48, "learning_rate": 5.936569579288026e-06, "loss": 0.0727, "step": 271600 }, { "epoch": 105.48, "learning_rate": 5.9360517799352755e-06, "loss": 0.0374, "step": 271610 }, { "epoch": 105.48, "learning_rate": 5.935533980582525e-06, "loss": 0.0131, "step": 271620 }, { "epoch": 105.49, "learning_rate": 5.935016181229774e-06, "loss": 0.0307, "step": 271630 }, { "epoch": 105.49, "learning_rate": 5.934498381877023e-06, "loss": 0.0199, "step": 271640 }, { "epoch": 105.5, "learning_rate": 5.933980582524272e-06, "loss": 0.1011, "step": 271650 }, { "epoch": 105.5, "learning_rate": 5.933462783171522e-06, "loss": 0.0407, "step": 271660 }, { "epoch": 105.5, "learning_rate": 5.932944983818771e-06, "loss": 0.0006, "step": 271670 }, { "epoch": 105.51, "learning_rate": 5.932427184466019e-06, "loss": 0.0346, "step": 271680 }, { "epoch": 105.51, "learning_rate": 5.931909385113269e-06, "loss": 0.0684, "step": 271690 }, { "epoch": 105.51, "learning_rate": 5.931391585760519e-06, "loss": 0.0706, "step": 271700 }, { "epoch": 105.52, "learning_rate": 5.930873786407767e-06, "loss": 0.0576, "step": 271710 }, { "epoch": 105.52, "learning_rate": 5.930355987055016e-06, "loss": 0.1102, "step": 271720 }, { "epoch": 105.53, "learning_rate": 5.929838187702266e-06, "loss": 0.0047, "step": 271730 }, { "epoch": 105.53, "learning_rate": 5.929320388349515e-06, "loss": 0.0989, "step": 271740 }, { "epoch": 105.53, "learning_rate": 5.928802588996763e-06, "loss": 0.0658, "step": 271750 }, { "epoch": 105.54, "learning_rate": 5.928284789644013e-06, "loss": 0.0109, "step": 271760 }, { "epoch": 105.54, "learning_rate": 5.927766990291263e-06, "loss": 0.0346, "step": 271770 }, { "epoch": 105.55, "learning_rate": 5.927249190938512e-06, "loss": 0.0393, "step": 271780 }, { "epoch": 105.55, "learning_rate": 5.92673139158576e-06, "loss": 0.0323, "step": 271790 }, { "epoch": 105.55, "learning_rate": 5.92621359223301e-06, "loss": 0.0106, "step": 271800 }, { "epoch": 105.56, "learning_rate": 5.925695792880259e-06, "loss": 0.0472, "step": 271810 }, { "epoch": 105.56, "learning_rate": 5.925177993527509e-06, "loss": 0.092, "step": 271820 }, { "epoch": 105.57, "learning_rate": 5.924660194174757e-06, "loss": 0.0769, "step": 271830 }, { "epoch": 105.57, "learning_rate": 5.9241423948220065e-06, "loss": 0.1222, "step": 271840 }, { "epoch": 105.57, "learning_rate": 5.923624595469256e-06, "loss": 0.0804, "step": 271850 }, { "epoch": 105.58, "learning_rate": 5.923106796116506e-06, "loss": 0.0038, "step": 271860 }, { "epoch": 105.58, "learning_rate": 5.922588996763754e-06, "loss": 0.0244, "step": 271870 }, { "epoch": 105.58, "learning_rate": 5.922071197411003e-06, "loss": 0.0389, "step": 271880 }, { "epoch": 105.59, "learning_rate": 5.921553398058253e-06, "loss": 0.1394, "step": 271890 }, { "epoch": 105.59, "learning_rate": 5.9210355987055025e-06, "loss": 0.1384, "step": 271900 }, { "epoch": 105.6, "learning_rate": 5.920517799352752e-06, "loss": 0.0353, "step": 271910 }, { "epoch": 105.6, "learning_rate": 5.92e-06, "loss": 0.0038, "step": 271920 }, { "epoch": 105.6, "learning_rate": 5.91948220064725e-06, "loss": 0.2257, "step": 271930 }, { "epoch": 105.61, "learning_rate": 5.918964401294499e-06, "loss": 0.1107, "step": 271940 }, { "epoch": 105.61, "learning_rate": 5.918446601941749e-06, "loss": 0.1484, "step": 271950 }, { "epoch": 105.62, "learning_rate": 5.917928802588997e-06, "loss": 0.0121, "step": 271960 }, { "epoch": 105.62, "learning_rate": 5.9174110032362465e-06, "loss": 0.1342, "step": 271970 }, { "epoch": 105.62, "learning_rate": 5.916893203883496e-06, "loss": 0.0429, "step": 271980 }, { "epoch": 105.63, "learning_rate": 5.916375404530746e-06, "loss": 0.1303, "step": 271990 }, { "epoch": 105.63, "learning_rate": 5.915857605177994e-06, "loss": 0.2571, "step": 272000 }, { "epoch": 105.63, "learning_rate": 5.915339805825243e-06, "loss": 0.0565, "step": 272010 }, { "epoch": 105.64, "learning_rate": 5.914822006472493e-06, "loss": 0.0594, "step": 272020 }, { "epoch": 105.64, "learning_rate": 5.9143042071197425e-06, "loss": 0.2857, "step": 272030 }, { "epoch": 105.65, "learning_rate": 5.91378640776699e-06, "loss": 0.0024, "step": 272040 }, { "epoch": 105.65, "learning_rate": 5.91326860841424e-06, "loss": 0.1181, "step": 272050 }, { "epoch": 105.65, "learning_rate": 5.91275080906149e-06, "loss": 0.0624, "step": 272060 }, { "epoch": 105.66, "learning_rate": 5.912233009708738e-06, "loss": 0.0336, "step": 272070 }, { "epoch": 105.66, "learning_rate": 5.911715210355987e-06, "loss": 0.045, "step": 272080 }, { "epoch": 105.67, "learning_rate": 5.911197411003237e-06, "loss": 0.164, "step": 272090 }, { "epoch": 105.67, "learning_rate": 5.910679611650486e-06, "loss": 0.0536, "step": 272100 }, { "epoch": 105.67, "learning_rate": 5.910161812297735e-06, "loss": 0.0335, "step": 272110 }, { "epoch": 105.68, "learning_rate": 5.909644012944984e-06, "loss": 0.0377, "step": 272120 }, { "epoch": 105.68, "learning_rate": 5.909126213592234e-06, "loss": 0.1199, "step": 272130 }, { "epoch": 105.69, "learning_rate": 5.908608414239483e-06, "loss": 0.1187, "step": 272140 }, { "epoch": 105.69, "learning_rate": 5.908090614886732e-06, "loss": 0.0164, "step": 272150 }, { "epoch": 105.69, "learning_rate": 5.907572815533981e-06, "loss": 0.1135, "step": 272160 }, { "epoch": 105.7, "learning_rate": 5.90705501618123e-06, "loss": 0.0682, "step": 272170 }, { "epoch": 105.7, "learning_rate": 5.90653721682848e-06, "loss": 0.0919, "step": 272180 }, { "epoch": 105.7, "learning_rate": 5.906019417475729e-06, "loss": 0.0081, "step": 272190 }, { "epoch": 105.71, "learning_rate": 5.9055016181229775e-06, "loss": 0.0223, "step": 272200 }, { "epoch": 105.71, "learning_rate": 5.904983818770227e-06, "loss": 0.0456, "step": 272210 }, { "epoch": 105.72, "learning_rate": 5.904466019417476e-06, "loss": 0.1161, "step": 272220 }, { "epoch": 105.72, "learning_rate": 5.9039482200647255e-06, "loss": 0.0116, "step": 272230 }, { "epoch": 105.72, "learning_rate": 5.903430420711974e-06, "loss": 0.1161, "step": 272240 }, { "epoch": 105.73, "learning_rate": 5.902912621359224e-06, "loss": 0.0991, "step": 272250 }, { "epoch": 105.73, "learning_rate": 5.902394822006473e-06, "loss": 0.0209, "step": 272260 }, { "epoch": 105.74, "learning_rate": 5.901877022653722e-06, "loss": 0.0588, "step": 272270 }, { "epoch": 105.74, "learning_rate": 5.901359223300971e-06, "loss": 0.0203, "step": 272280 }, { "epoch": 105.74, "learning_rate": 5.900841423948221e-06, "loss": 0.009, "step": 272290 }, { "epoch": 105.75, "learning_rate": 5.9003236245954695e-06, "loss": 0.0295, "step": 272300 }, { "epoch": 105.75, "learning_rate": 5.899805825242719e-06, "loss": 0.1001, "step": 272310 }, { "epoch": 105.76, "learning_rate": 5.899288025889968e-06, "loss": 0.0644, "step": 272320 }, { "epoch": 105.76, "learning_rate": 5.8987702265372175e-06, "loss": 0.0004, "step": 272330 }, { "epoch": 105.76, "learning_rate": 5.898252427184466e-06, "loss": 0.0788, "step": 272340 }, { "epoch": 105.77, "learning_rate": 5.897734627831716e-06, "loss": 0.0407, "step": 272350 }, { "epoch": 105.77, "learning_rate": 5.897216828478965e-06, "loss": 0.0285, "step": 272360 }, { "epoch": 105.77, "learning_rate": 5.896699029126214e-06, "loss": 0.0291, "step": 272370 }, { "epoch": 105.78, "learning_rate": 5.896181229773463e-06, "loss": 0.0021, "step": 272380 }, { "epoch": 105.78, "learning_rate": 5.895663430420713e-06, "loss": 0.043, "step": 272390 }, { "epoch": 105.79, "learning_rate": 5.895145631067961e-06, "loss": 0.0227, "step": 272400 }, { "epoch": 105.79, "learning_rate": 5.89462783171521e-06, "loss": 0.0217, "step": 272410 }, { "epoch": 105.79, "learning_rate": 5.89411003236246e-06, "loss": 0.0138, "step": 272420 }, { "epoch": 105.8, "learning_rate": 5.893592233009709e-06, "loss": 0.0316, "step": 272430 }, { "epoch": 105.8, "learning_rate": 5.893074433656958e-06, "loss": 0.07, "step": 272440 }, { "epoch": 105.81, "learning_rate": 5.892556634304207e-06, "loss": 0.0355, "step": 272450 }, { "epoch": 105.81, "learning_rate": 5.8920388349514566e-06, "loss": 0.1566, "step": 272460 }, { "epoch": 105.81, "learning_rate": 5.891521035598706e-06, "loss": 0.0012, "step": 272470 }, { "epoch": 105.82, "learning_rate": 5.891003236245956e-06, "loss": 0.1116, "step": 272480 }, { "epoch": 105.82, "learning_rate": 5.890485436893204e-06, "loss": 0.049, "step": 272490 }, { "epoch": 105.83, "learning_rate": 5.889967637540453e-06, "loss": 0.1103, "step": 272500 }, { "epoch": 105.83, "learning_rate": 5.889449838187703e-06, "loss": 0.0399, "step": 272510 }, { "epoch": 105.83, "learning_rate": 5.888932038834953e-06, "loss": 0.1547, "step": 272520 }, { "epoch": 105.84, "learning_rate": 5.8884142394822005e-06, "loss": 0.0697, "step": 272530 }, { "epoch": 105.84, "learning_rate": 5.88789644012945e-06, "loss": 0.0122, "step": 272540 }, { "epoch": 105.84, "learning_rate": 5.8873786407767e-06, "loss": 0.0129, "step": 272550 }, { "epoch": 105.85, "learning_rate": 5.886860841423949e-06, "loss": 0.0442, "step": 272560 }, { "epoch": 105.85, "learning_rate": 5.886343042071197e-06, "loss": 0.0351, "step": 272570 }, { "epoch": 105.86, "learning_rate": 5.885825242718447e-06, "loss": 0.0015, "step": 272580 }, { "epoch": 105.86, "learning_rate": 5.8853074433656965e-06, "loss": 0.0755, "step": 272590 }, { "epoch": 105.86, "learning_rate": 5.884789644012946e-06, "loss": 0.0077, "step": 272600 }, { "epoch": 105.87, "learning_rate": 5.884271844660194e-06, "loss": 0.1655, "step": 272610 }, { "epoch": 105.87, "learning_rate": 5.883754045307444e-06, "loss": 0.0547, "step": 272620 }, { "epoch": 105.88, "learning_rate": 5.883236245954693e-06, "loss": 0.0238, "step": 272630 }, { "epoch": 105.88, "learning_rate": 5.882718446601943e-06, "loss": 0.0851, "step": 272640 }, { "epoch": 105.88, "learning_rate": 5.882200647249191e-06, "loss": 0.0032, "step": 272650 }, { "epoch": 105.89, "learning_rate": 5.8816828478964405e-06, "loss": 0.0742, "step": 272660 }, { "epoch": 105.89, "learning_rate": 5.88116504854369e-06, "loss": 0.0168, "step": 272670 }, { "epoch": 105.9, "learning_rate": 5.88064724919094e-06, "loss": 0.0475, "step": 272680 }, { "epoch": 105.9, "learning_rate": 5.880129449838188e-06, "loss": 0.0419, "step": 272690 }, { "epoch": 105.9, "learning_rate": 5.879611650485437e-06, "loss": 0.056, "step": 272700 }, { "epoch": 105.91, "learning_rate": 5.879093851132687e-06, "loss": 0.1533, "step": 272710 }, { "epoch": 105.91, "learning_rate": 5.8785760517799365e-06, "loss": 0.0983, "step": 272720 }, { "epoch": 105.91, "learning_rate": 5.878058252427184e-06, "loss": 0.0488, "step": 272730 }, { "epoch": 105.92, "learning_rate": 5.877540453074434e-06, "loss": 0.0621, "step": 272740 }, { "epoch": 105.92, "learning_rate": 5.877022653721684e-06, "loss": 0.0167, "step": 272750 }, { "epoch": 105.93, "learning_rate": 5.876504854368933e-06, "loss": 0.0842, "step": 272760 }, { "epoch": 105.93, "learning_rate": 5.875987055016181e-06, "loss": 0.0105, "step": 272770 }, { "epoch": 105.93, "learning_rate": 5.875469255663431e-06, "loss": 0.1693, "step": 272780 }, { "epoch": 105.94, "learning_rate": 5.87495145631068e-06, "loss": 0.0673, "step": 272790 }, { "epoch": 105.94, "learning_rate": 5.87443365695793e-06, "loss": 0.0284, "step": 272800 }, { "epoch": 105.95, "learning_rate": 5.873915857605178e-06, "loss": 0.0178, "step": 272810 }, { "epoch": 105.95, "learning_rate": 5.873398058252428e-06, "loss": 0.0757, "step": 272820 }, { "epoch": 105.95, "learning_rate": 5.872880258899677e-06, "loss": 0.0082, "step": 272830 }, { "epoch": 105.96, "learning_rate": 5.872362459546927e-06, "loss": 0.0099, "step": 272840 }, { "epoch": 105.96, "learning_rate": 5.871844660194175e-06, "loss": 0.071, "step": 272850 }, { "epoch": 105.97, "learning_rate": 5.871326860841424e-06, "loss": 0.0396, "step": 272860 }, { "epoch": 105.97, "learning_rate": 5.870809061488674e-06, "loss": 0.017, "step": 272870 }, { "epoch": 105.97, "learning_rate": 5.870291262135923e-06, "loss": 0.003, "step": 272880 }, { "epoch": 105.98, "learning_rate": 5.8697734627831715e-06, "loss": 0.0024, "step": 272890 }, { "epoch": 105.98, "learning_rate": 5.869255663430421e-06, "loss": 0.091, "step": 272900 }, { "epoch": 105.98, "learning_rate": 5.868737864077671e-06, "loss": 0.0246, "step": 272910 }, { "epoch": 105.99, "learning_rate": 5.8682200647249195e-06, "loss": 0.0993, "step": 272920 }, { "epoch": 105.99, "learning_rate": 5.867702265372168e-06, "loss": 0.0108, "step": 272930 }, { "epoch": 106.0, "learning_rate": 5.867184466019418e-06, "loss": 0.0105, "step": 272940 }, { "epoch": 106.0, "learning_rate": 5.8666666666666675e-06, "loss": 0.0213, "step": 272950 }, { "epoch": 106.0, "eval_accuracy": 0.9499312242090784, "eval_loss": 0.37747612595558167, "eval_runtime": 8.1704, "eval_samples_per_second": 444.896, "eval_steps_per_second": 55.689, "step": 272950 }, { "epoch": 106.0, "learning_rate": 5.866148867313916e-06, "loss": 0.0153, "step": 272960 }, { "epoch": 106.01, "learning_rate": 5.865631067961165e-06, "loss": 0.0341, "step": 272970 }, { "epoch": 106.01, "learning_rate": 5.865113268608415e-06, "loss": 0.1719, "step": 272980 }, { "epoch": 106.02, "learning_rate": 5.864595469255664e-06, "loss": 0.0515, "step": 272990 }, { "epoch": 106.02, "learning_rate": 5.864077669902913e-06, "loss": 0.0739, "step": 273000 }, { "epoch": 106.02, "learning_rate": 5.863559870550163e-06, "loss": 0.0181, "step": 273010 }, { "epoch": 106.03, "learning_rate": 5.8630420711974115e-06, "loss": 0.0836, "step": 273020 }, { "epoch": 106.03, "learning_rate": 5.862524271844661e-06, "loss": 0.1646, "step": 273030 }, { "epoch": 106.03, "learning_rate": 5.86200647249191e-06, "loss": 0.0002, "step": 273040 }, { "epoch": 106.04, "learning_rate": 5.8614886731391595e-06, "loss": 0.0922, "step": 273050 }, { "epoch": 106.04, "learning_rate": 5.860970873786408e-06, "loss": 0.001, "step": 273060 }, { "epoch": 106.05, "learning_rate": 5.860453074433657e-06, "loss": 0.0011, "step": 273070 }, { "epoch": 106.05, "learning_rate": 5.859935275080907e-06, "loss": 0.0795, "step": 273080 }, { "epoch": 106.05, "learning_rate": 5.859417475728156e-06, "loss": 0.0527, "step": 273090 }, { "epoch": 106.06, "learning_rate": 5.858899676375405e-06, "loss": 0.0473, "step": 273100 }, { "epoch": 106.06, "learning_rate": 5.858381877022654e-06, "loss": 0.0001, "step": 273110 }, { "epoch": 106.07, "learning_rate": 5.857864077669903e-06, "loss": 0.0065, "step": 273120 }, { "epoch": 106.07, "learning_rate": 5.857346278317153e-06, "loss": 0.0288, "step": 273130 }, { "epoch": 106.07, "learning_rate": 5.856828478964402e-06, "loss": 0.0127, "step": 273140 }, { "epoch": 106.08, "learning_rate": 5.8563106796116506e-06, "loss": 0.0336, "step": 273150 }, { "epoch": 106.08, "learning_rate": 5.8557928802589e-06, "loss": 0.0458, "step": 273160 }, { "epoch": 106.09, "learning_rate": 5.85527508090615e-06, "loss": 0.0151, "step": 273170 }, { "epoch": 106.09, "learning_rate": 5.854757281553399e-06, "loss": 0.0245, "step": 273180 }, { "epoch": 106.09, "learning_rate": 5.854239482200647e-06, "loss": 0.1152, "step": 273190 }, { "epoch": 106.1, "learning_rate": 5.853721682847897e-06, "loss": 0.1609, "step": 273200 }, { "epoch": 106.1, "learning_rate": 5.853203883495147e-06, "loss": 0.0096, "step": 273210 }, { "epoch": 106.1, "learning_rate": 5.8526860841423945e-06, "loss": 0.0283, "step": 273220 }, { "epoch": 106.11, "learning_rate": 5.852168284789644e-06, "loss": 0.0514, "step": 273230 }, { "epoch": 106.11, "learning_rate": 5.851650485436894e-06, "loss": 0.0126, "step": 273240 }, { "epoch": 106.12, "learning_rate": 5.851132686084143e-06, "loss": 0.0158, "step": 273250 }, { "epoch": 106.12, "learning_rate": 5.850614886731391e-06, "loss": 0.0014, "step": 273260 }, { "epoch": 106.12, "learning_rate": 5.850097087378641e-06, "loss": 0.1306, "step": 273270 }, { "epoch": 106.13, "learning_rate": 5.8495792880258905e-06, "loss": 0.0177, "step": 273280 }, { "epoch": 106.13, "learning_rate": 5.84906148867314e-06, "loss": 0.0005, "step": 273290 }, { "epoch": 106.14, "learning_rate": 5.848543689320388e-06, "loss": 0.0022, "step": 273300 }, { "epoch": 106.14, "learning_rate": 5.848025889967638e-06, "loss": 0.0113, "step": 273310 }, { "epoch": 106.14, "learning_rate": 5.847508090614887e-06, "loss": 0.0981, "step": 273320 }, { "epoch": 106.15, "learning_rate": 5.846990291262137e-06, "loss": 0.0215, "step": 273330 }, { "epoch": 106.15, "learning_rate": 5.846472491909385e-06, "loss": 0.0134, "step": 273340 }, { "epoch": 106.16, "learning_rate": 5.8459546925566345e-06, "loss": 0.0603, "step": 273350 }, { "epoch": 106.16, "learning_rate": 5.845436893203884e-06, "loss": 0.0726, "step": 273360 }, { "epoch": 106.16, "learning_rate": 5.844919093851134e-06, "loss": 0.0705, "step": 273370 }, { "epoch": 106.17, "learning_rate": 5.844401294498382e-06, "loss": 0.0002, "step": 273380 }, { "epoch": 106.17, "learning_rate": 5.843883495145631e-06, "loss": 0.0162, "step": 273390 }, { "epoch": 106.17, "learning_rate": 5.843365695792881e-06, "loss": 0.0177, "step": 273400 }, { "epoch": 106.18, "learning_rate": 5.8428478964401305e-06, "loss": 0.1124, "step": 273410 }, { "epoch": 106.18, "learning_rate": 5.842330097087378e-06, "loss": 0.1101, "step": 273420 }, { "epoch": 106.19, "learning_rate": 5.841812297734628e-06, "loss": 0.0001, "step": 273430 }, { "epoch": 106.19, "learning_rate": 5.841294498381878e-06, "loss": 0.0291, "step": 273440 }, { "epoch": 106.19, "learning_rate": 5.840776699029127e-06, "loss": 0.0722, "step": 273450 }, { "epoch": 106.2, "learning_rate": 5.840258899676375e-06, "loss": 0.0599, "step": 273460 }, { "epoch": 106.2, "learning_rate": 5.839741100323625e-06, "loss": 0.1002, "step": 273470 }, { "epoch": 106.21, "learning_rate": 5.839223300970874e-06, "loss": 0.0199, "step": 273480 }, { "epoch": 106.21, "learning_rate": 5.838705501618124e-06, "loss": 0.0055, "step": 273490 }, { "epoch": 106.21, "learning_rate": 5.838187702265372e-06, "loss": 0.004, "step": 273500 }, { "epoch": 106.22, "learning_rate": 5.837669902912622e-06, "loss": 0.0037, "step": 273510 }, { "epoch": 106.22, "learning_rate": 5.837152103559871e-06, "loss": 0.0128, "step": 273520 }, { "epoch": 106.23, "learning_rate": 5.836634304207121e-06, "loss": 0.01, "step": 273530 }, { "epoch": 106.23, "learning_rate": 5.836116504854369e-06, "loss": 0.0578, "step": 273540 }, { "epoch": 106.23, "learning_rate": 5.835598705501618e-06, "loss": 0.0425, "step": 273550 }, { "epoch": 106.24, "learning_rate": 5.835080906148868e-06, "loss": 0.0427, "step": 273560 }, { "epoch": 106.24, "learning_rate": 5.834563106796118e-06, "loss": 0.0098, "step": 273570 }, { "epoch": 106.24, "learning_rate": 5.834045307443366e-06, "loss": 0.049, "step": 273580 }, { "epoch": 106.25, "learning_rate": 5.833527508090615e-06, "loss": 0.0127, "step": 273590 }, { "epoch": 106.25, "learning_rate": 5.833009708737865e-06, "loss": 0.0182, "step": 273600 }, { "epoch": 106.26, "learning_rate": 5.832491909385114e-06, "loss": 0.0918, "step": 273610 }, { "epoch": 106.26, "learning_rate": 5.831974110032363e-06, "loss": 0.0012, "step": 273620 }, { "epoch": 106.26, "learning_rate": 5.831456310679612e-06, "loss": 0.175, "step": 273630 }, { "epoch": 106.27, "learning_rate": 5.8309385113268615e-06, "loss": 0.018, "step": 273640 }, { "epoch": 106.27, "learning_rate": 5.830420711974111e-06, "loss": 0.0216, "step": 273650 }, { "epoch": 106.28, "learning_rate": 5.82990291262136e-06, "loss": 0.0386, "step": 273660 }, { "epoch": 106.28, "learning_rate": 5.829385113268609e-06, "loss": 0.0437, "step": 273670 }, { "epoch": 106.28, "learning_rate": 5.828867313915858e-06, "loss": 0.0634, "step": 273680 }, { "epoch": 106.29, "learning_rate": 5.828349514563107e-06, "loss": 0.0257, "step": 273690 }, { "epoch": 106.29, "learning_rate": 5.827831715210357e-06, "loss": 0.0752, "step": 273700 }, { "epoch": 106.3, "learning_rate": 5.8273139158576055e-06, "loss": 0.0298, "step": 273710 }, { "epoch": 106.3, "learning_rate": 5.826796116504855e-06, "loss": 0.1016, "step": 273720 }, { "epoch": 106.3, "learning_rate": 5.826278317152104e-06, "loss": 0.0717, "step": 273730 }, { "epoch": 106.31, "learning_rate": 5.8257605177993535e-06, "loss": 0.0172, "step": 273740 }, { "epoch": 106.31, "learning_rate": 5.825242718446602e-06, "loss": 0.0165, "step": 273750 }, { "epoch": 106.31, "learning_rate": 5.824724919093852e-06, "loss": 0.1044, "step": 273760 }, { "epoch": 106.32, "learning_rate": 5.824207119741101e-06, "loss": 0.0013, "step": 273770 }, { "epoch": 106.32, "learning_rate": 5.82368932038835e-06, "loss": 0.0213, "step": 273780 }, { "epoch": 106.33, "learning_rate": 5.823171521035599e-06, "loss": 0.2549, "step": 273790 }, { "epoch": 106.33, "learning_rate": 5.822653721682849e-06, "loss": 0.0915, "step": 273800 }, { "epoch": 106.33, "learning_rate": 5.822135922330097e-06, "loss": 0.1249, "step": 273810 }, { "epoch": 106.34, "learning_rate": 5.821618122977347e-06, "loss": 0.0666, "step": 273820 }, { "epoch": 106.34, "learning_rate": 5.821100323624596e-06, "loss": 0.1315, "step": 273830 }, { "epoch": 106.35, "learning_rate": 5.820582524271845e-06, "loss": 0.0265, "step": 273840 }, { "epoch": 106.35, "learning_rate": 5.820064724919094e-06, "loss": 0.0526, "step": 273850 }, { "epoch": 106.35, "learning_rate": 5.819546925566344e-06, "loss": 0.089, "step": 273860 }, { "epoch": 106.36, "learning_rate": 5.819029126213593e-06, "loss": 0.0126, "step": 273870 }, { "epoch": 106.36, "learning_rate": 5.818511326860841e-06, "loss": 0.0098, "step": 273880 }, { "epoch": 106.37, "learning_rate": 5.817993527508091e-06, "loss": 0.0152, "step": 273890 }, { "epoch": 106.37, "learning_rate": 5.817475728155341e-06, "loss": 0.0042, "step": 273900 }, { "epoch": 106.37, "learning_rate": 5.816957928802589e-06, "loss": 0.035, "step": 273910 }, { "epoch": 106.38, "learning_rate": 5.816440129449838e-06, "loss": 0.0126, "step": 273920 }, { "epoch": 106.38, "learning_rate": 5.815922330097088e-06, "loss": 0.0232, "step": 273930 }, { "epoch": 106.38, "learning_rate": 5.815404530744337e-06, "loss": 0.0433, "step": 273940 }, { "epoch": 106.39, "learning_rate": 5.814886731391586e-06, "loss": 0.0616, "step": 273950 }, { "epoch": 106.39, "learning_rate": 5.814368932038835e-06, "loss": 0.0114, "step": 273960 }, { "epoch": 106.4, "learning_rate": 5.8138511326860845e-06, "loss": 0.0235, "step": 273970 }, { "epoch": 106.4, "learning_rate": 5.813333333333334e-06, "loss": 0.0257, "step": 273980 }, { "epoch": 106.4, "learning_rate": 5.812815533980583e-06, "loss": 0.1049, "step": 273990 }, { "epoch": 106.41, "learning_rate": 5.812297734627832e-06, "loss": 0.1609, "step": 274000 }, { "epoch": 106.41, "learning_rate": 5.811779935275081e-06, "loss": 0.0842, "step": 274010 }, { "epoch": 106.42, "learning_rate": 5.811262135922331e-06, "loss": 0.0116, "step": 274020 }, { "epoch": 106.42, "learning_rate": 5.810744336569579e-06, "loss": 0.0153, "step": 274030 }, { "epoch": 106.42, "learning_rate": 5.8102265372168285e-06, "loss": 0.0009, "step": 274040 }, { "epoch": 106.43, "learning_rate": 5.809708737864078e-06, "loss": 0.1575, "step": 274050 }, { "epoch": 106.43, "learning_rate": 5.809190938511328e-06, "loss": 0.0002, "step": 274060 }, { "epoch": 106.43, "learning_rate": 5.808673139158576e-06, "loss": 0.0104, "step": 274070 }, { "epoch": 106.44, "learning_rate": 5.808155339805825e-06, "loss": 0.0043, "step": 274080 }, { "epoch": 106.44, "learning_rate": 5.807637540453075e-06, "loss": 0.0311, "step": 274090 }, { "epoch": 106.45, "learning_rate": 5.8071197411003245e-06, "loss": 0.0811, "step": 274100 }, { "epoch": 106.45, "learning_rate": 5.806601941747572e-06, "loss": 0.0299, "step": 274110 }, { "epoch": 106.45, "learning_rate": 5.806084142394822e-06, "loss": 0.0002, "step": 274120 }, { "epoch": 106.46, "learning_rate": 5.805566343042072e-06, "loss": 0.0931, "step": 274130 }, { "epoch": 106.46, "learning_rate": 5.805048543689321e-06, "loss": 0.1359, "step": 274140 }, { "epoch": 106.47, "learning_rate": 5.804530744336571e-06, "loss": 0.1135, "step": 274150 }, { "epoch": 106.47, "learning_rate": 5.804012944983819e-06, "loss": 0.0621, "step": 274160 }, { "epoch": 106.47, "learning_rate": 5.803495145631068e-06, "loss": 0.1754, "step": 274170 }, { "epoch": 106.48, "learning_rate": 5.802977346278318e-06, "loss": 0.0443, "step": 274180 }, { "epoch": 106.48, "learning_rate": 5.802459546925568e-06, "loss": 0.0501, "step": 274190 }, { "epoch": 106.49, "learning_rate": 5.8019417475728156e-06, "loss": 0.0476, "step": 274200 }, { "epoch": 106.49, "learning_rate": 5.801423948220065e-06, "loss": 0.08, "step": 274210 }, { "epoch": 106.49, "learning_rate": 5.800906148867315e-06, "loss": 0.0191, "step": 274220 }, { "epoch": 106.5, "learning_rate": 5.8003883495145644e-06, "loss": 0.0776, "step": 274230 }, { "epoch": 106.5, "learning_rate": 5.799870550161812e-06, "loss": 0.1239, "step": 274240 }, { "epoch": 106.5, "learning_rate": 5.799352750809062e-06, "loss": 0.0601, "step": 274250 }, { "epoch": 106.51, "learning_rate": 5.798834951456312e-06, "loss": 0.0453, "step": 274260 }, { "epoch": 106.51, "learning_rate": 5.798317152103561e-06, "loss": 0.0714, "step": 274270 }, { "epoch": 106.52, "learning_rate": 5.797799352750809e-06, "loss": 0.0304, "step": 274280 }, { "epoch": 106.52, "learning_rate": 5.797281553398059e-06, "loss": 0.0192, "step": 274290 }, { "epoch": 106.52, "learning_rate": 5.796763754045308e-06, "loss": 0.0346, "step": 274300 }, { "epoch": 106.53, "learning_rate": 5.796245954692558e-06, "loss": 0.0143, "step": 274310 }, { "epoch": 106.53, "learning_rate": 5.795728155339806e-06, "loss": 0.1402, "step": 274320 }, { "epoch": 106.54, "learning_rate": 5.7952103559870555e-06, "loss": 0.0663, "step": 274330 }, { "epoch": 106.54, "learning_rate": 5.794692556634305e-06, "loss": 0.002, "step": 274340 }, { "epoch": 106.54, "learning_rate": 5.794174757281554e-06, "loss": 0.0006, "step": 274350 }, { "epoch": 106.55, "learning_rate": 5.793656957928803e-06, "loss": 0.0276, "step": 274360 }, { "epoch": 106.55, "learning_rate": 5.793139158576052e-06, "loss": 0.2616, "step": 274370 }, { "epoch": 106.56, "learning_rate": 5.792621359223302e-06, "loss": 0.0003, "step": 274380 }, { "epoch": 106.56, "learning_rate": 5.792103559870551e-06, "loss": 0.037, "step": 274390 }, { "epoch": 106.56, "learning_rate": 5.7915857605177995e-06, "loss": 0.0577, "step": 274400 }, { "epoch": 106.57, "learning_rate": 5.791067961165049e-06, "loss": 0.0614, "step": 274410 }, { "epoch": 106.57, "learning_rate": 5.790550161812299e-06, "loss": 0.0901, "step": 274420 }, { "epoch": 106.57, "learning_rate": 5.7900323624595475e-06, "loss": 0.0958, "step": 274430 }, { "epoch": 106.58, "learning_rate": 5.789514563106796e-06, "loss": 0.0501, "step": 274440 }, { "epoch": 106.58, "learning_rate": 5.788996763754046e-06, "loss": 0.0245, "step": 274450 }, { "epoch": 106.59, "learning_rate": 5.7884789644012955e-06, "loss": 0.0554, "step": 274460 }, { "epoch": 106.59, "learning_rate": 5.787961165048544e-06, "loss": 0.0921, "step": 274470 }, { "epoch": 106.59, "learning_rate": 5.787443365695793e-06, "loss": 0.0093, "step": 274480 }, { "epoch": 106.6, "learning_rate": 5.786925566343043e-06, "loss": 0.0196, "step": 274490 }, { "epoch": 106.6, "learning_rate": 5.786407766990291e-06, "loss": 0.0176, "step": 274500 }, { "epoch": 106.61, "learning_rate": 5.785889967637541e-06, "loss": 0.0619, "step": 274510 }, { "epoch": 106.61, "learning_rate": 5.78537216828479e-06, "loss": 0.0185, "step": 274520 }, { "epoch": 106.61, "learning_rate": 5.784854368932039e-06, "loss": 0.1206, "step": 274530 }, { "epoch": 106.62, "learning_rate": 5.784336569579288e-06, "loss": 0.1073, "step": 274540 }, { "epoch": 106.62, "learning_rate": 5.783818770226538e-06, "loss": 0.0949, "step": 274550 }, { "epoch": 106.63, "learning_rate": 5.783300970873787e-06, "loss": 0.0346, "step": 274560 }, { "epoch": 106.63, "learning_rate": 5.782783171521036e-06, "loss": 0.0045, "step": 274570 }, { "epoch": 106.63, "learning_rate": 5.782265372168285e-06, "loss": 0.0834, "step": 274580 }, { "epoch": 106.64, "learning_rate": 5.781747572815535e-06, "loss": 0.0589, "step": 274590 }, { "epoch": 106.64, "learning_rate": 5.781229773462783e-06, "loss": 0.0461, "step": 274600 }, { "epoch": 106.64, "learning_rate": 5.780711974110033e-06, "loss": 0.1046, "step": 274610 }, { "epoch": 106.65, "learning_rate": 5.780194174757282e-06, "loss": 0.1322, "step": 274620 }, { "epoch": 106.65, "learning_rate": 5.779676375404531e-06, "loss": 0.0296, "step": 274630 }, { "epoch": 106.66, "learning_rate": 5.77915857605178e-06, "loss": 0.0808, "step": 274640 }, { "epoch": 106.66, "learning_rate": 5.77864077669903e-06, "loss": 0.0054, "step": 274650 }, { "epoch": 106.66, "learning_rate": 5.7781229773462785e-06, "loss": 0.0472, "step": 274660 }, { "epoch": 106.67, "learning_rate": 5.777605177993528e-06, "loss": 0.169, "step": 274670 }, { "epoch": 106.67, "learning_rate": 5.777087378640777e-06, "loss": 0.1849, "step": 274680 }, { "epoch": 106.68, "learning_rate": 5.776569579288026e-06, "loss": 0.0914, "step": 274690 }, { "epoch": 106.68, "learning_rate": 5.776051779935275e-06, "loss": 0.0005, "step": 274700 }, { "epoch": 106.68, "learning_rate": 5.775533980582525e-06, "loss": 0.0453, "step": 274710 }, { "epoch": 106.69, "learning_rate": 5.7750161812297745e-06, "loss": 0.0968, "step": 274720 }, { "epoch": 106.69, "learning_rate": 5.7744983818770225e-06, "loss": 0.093, "step": 274730 }, { "epoch": 106.7, "learning_rate": 5.773980582524272e-06, "loss": 0.0758, "step": 274740 }, { "epoch": 106.7, "learning_rate": 5.773462783171522e-06, "loss": 0.0574, "step": 274750 }, { "epoch": 106.7, "learning_rate": 5.772944983818771e-06, "loss": 0.012, "step": 274760 }, { "epoch": 106.71, "learning_rate": 5.772427184466019e-06, "loss": 0.1484, "step": 274770 }, { "epoch": 106.71, "learning_rate": 5.771909385113269e-06, "loss": 0.0156, "step": 274780 }, { "epoch": 106.71, "learning_rate": 5.7713915857605185e-06, "loss": 0.0517, "step": 274790 }, { "epoch": 106.72, "learning_rate": 5.770873786407768e-06, "loss": 0.1327, "step": 274800 }, { "epoch": 106.72, "learning_rate": 5.770355987055016e-06, "loss": 0.1137, "step": 274810 }, { "epoch": 106.73, "learning_rate": 5.769838187702266e-06, "loss": 0.0518, "step": 274820 }, { "epoch": 106.73, "learning_rate": 5.769320388349515e-06, "loss": 0.0005, "step": 274830 }, { "epoch": 106.73, "learning_rate": 5.768802588996765e-06, "loss": 0.0745, "step": 274840 }, { "epoch": 106.74, "learning_rate": 5.768284789644013e-06, "loss": 0.0551, "step": 274850 }, { "epoch": 106.74, "learning_rate": 5.767766990291262e-06, "loss": 0.1052, "step": 274860 }, { "epoch": 106.75, "learning_rate": 5.767249190938512e-06, "loss": 0.0002, "step": 274870 }, { "epoch": 106.75, "learning_rate": 5.766731391585762e-06, "loss": 0.0146, "step": 274880 }, { "epoch": 106.75, "learning_rate": 5.7662135922330096e-06, "loss": 0.0156, "step": 274890 }, { "epoch": 106.76, "learning_rate": 5.765695792880259e-06, "loss": 0.0584, "step": 274900 }, { "epoch": 106.76, "learning_rate": 5.765177993527509e-06, "loss": 0.0962, "step": 274910 }, { "epoch": 106.77, "learning_rate": 5.7646601941747584e-06, "loss": 0.0647, "step": 274920 }, { "epoch": 106.77, "learning_rate": 5.764142394822006e-06, "loss": 0.0009, "step": 274930 }, { "epoch": 106.77, "learning_rate": 5.763624595469256e-06, "loss": 0.0763, "step": 274940 }, { "epoch": 106.78, "learning_rate": 5.763106796116506e-06, "loss": 0.0846, "step": 274950 }, { "epoch": 106.78, "learning_rate": 5.762588996763755e-06, "loss": 0.0258, "step": 274960 }, { "epoch": 106.78, "learning_rate": 5.762071197411003e-06, "loss": 0.0368, "step": 274970 }, { "epoch": 106.79, "learning_rate": 5.761553398058253e-06, "loss": 0.0175, "step": 274980 }, { "epoch": 106.79, "learning_rate": 5.761035598705502e-06, "loss": 0.0575, "step": 274990 }, { "epoch": 106.8, "learning_rate": 5.760517799352752e-06, "loss": 0.0379, "step": 275000 }, { "epoch": 106.8, "learning_rate": 5.76e-06, "loss": 0.0676, "step": 275010 }, { "epoch": 106.8, "learning_rate": 5.7594822006472495e-06, "loss": 0.0122, "step": 275020 }, { "epoch": 106.81, "learning_rate": 5.758964401294499e-06, "loss": 0.2, "step": 275030 }, { "epoch": 106.81, "learning_rate": 5.758446601941749e-06, "loss": 0.0607, "step": 275040 }, { "epoch": 106.82, "learning_rate": 5.757928802588997e-06, "loss": 0.0292, "step": 275050 }, { "epoch": 106.82, "learning_rate": 5.757411003236246e-06, "loss": 0.0334, "step": 275060 }, { "epoch": 106.82, "learning_rate": 5.756893203883496e-06, "loss": 0.0587, "step": 275070 }, { "epoch": 106.83, "learning_rate": 5.7563754045307455e-06, "loss": 0.0284, "step": 275080 }, { "epoch": 106.83, "learning_rate": 5.7558576051779935e-06, "loss": 0.0683, "step": 275090 }, { "epoch": 106.83, "learning_rate": 5.755339805825243e-06, "loss": 0.0305, "step": 275100 }, { "epoch": 106.84, "learning_rate": 5.754822006472493e-06, "loss": 0.0124, "step": 275110 }, { "epoch": 106.84, "learning_rate": 5.754304207119742e-06, "loss": 0.0339, "step": 275120 }, { "epoch": 106.85, "learning_rate": 5.75378640776699e-06, "loss": 0.0621, "step": 275130 }, { "epoch": 106.85, "learning_rate": 5.75326860841424e-06, "loss": 0.0059, "step": 275140 }, { "epoch": 106.85, "learning_rate": 5.7527508090614895e-06, "loss": 0.1002, "step": 275150 }, { "epoch": 106.86, "learning_rate": 5.752233009708738e-06, "loss": 0.0953, "step": 275160 }, { "epoch": 106.86, "learning_rate": 5.751715210355987e-06, "loss": 0.0945, "step": 275170 }, { "epoch": 106.87, "learning_rate": 5.751197411003237e-06, "loss": 0.0131, "step": 275180 }, { "epoch": 106.87, "learning_rate": 5.750679611650486e-06, "loss": 0.0578, "step": 275190 }, { "epoch": 106.87, "learning_rate": 5.750161812297735e-06, "loss": 0.0697, "step": 275200 }, { "epoch": 106.88, "learning_rate": 5.749644012944984e-06, "loss": 0.0245, "step": 275210 }, { "epoch": 106.88, "learning_rate": 5.749126213592233e-06, "loss": 0.0428, "step": 275220 }, { "epoch": 106.89, "learning_rate": 5.748608414239483e-06, "loss": 0.0644, "step": 275230 }, { "epoch": 106.89, "learning_rate": 5.748090614886732e-06, "loss": 0.0291, "step": 275240 }, { "epoch": 106.89, "learning_rate": 5.747572815533981e-06, "loss": 0.0001, "step": 275250 }, { "epoch": 106.9, "learning_rate": 5.74705501618123e-06, "loss": 0.0196, "step": 275260 }, { "epoch": 106.9, "learning_rate": 5.74653721682848e-06, "loss": 0.0559, "step": 275270 }, { "epoch": 106.9, "learning_rate": 5.746019417475729e-06, "loss": 0.0273, "step": 275280 }, { "epoch": 106.91, "learning_rate": 5.745501618122978e-06, "loss": 0.039, "step": 275290 }, { "epoch": 106.91, "learning_rate": 5.744983818770227e-06, "loss": 0.0355, "step": 275300 }, { "epoch": 106.92, "learning_rate": 5.744466019417477e-06, "loss": 0.074, "step": 275310 }, { "epoch": 106.92, "learning_rate": 5.743948220064725e-06, "loss": 0.1195, "step": 275320 }, { "epoch": 106.92, "learning_rate": 5.743430420711975e-06, "loss": 0.1055, "step": 275330 }, { "epoch": 106.93, "learning_rate": 5.742912621359224e-06, "loss": 0.0517, "step": 275340 }, { "epoch": 106.93, "learning_rate": 5.7423948220064725e-06, "loss": 0.0093, "step": 275350 }, { "epoch": 106.94, "learning_rate": 5.741877022653722e-06, "loss": 0.0061, "step": 275360 }, { "epoch": 106.94, "learning_rate": 5.741359223300972e-06, "loss": 0.067, "step": 275370 }, { "epoch": 106.94, "learning_rate": 5.7408414239482205e-06, "loss": 0.0308, "step": 275380 }, { "epoch": 106.95, "learning_rate": 5.740323624595469e-06, "loss": 0.0009, "step": 275390 }, { "epoch": 106.95, "learning_rate": 5.739805825242719e-06, "loss": 0.0651, "step": 275400 }, { "epoch": 106.96, "learning_rate": 5.7392880258899685e-06, "loss": 0.0015, "step": 275410 }, { "epoch": 106.96, "learning_rate": 5.738770226537217e-06, "loss": 0.0014, "step": 275420 }, { "epoch": 106.96, "learning_rate": 5.738252427184466e-06, "loss": 0.0114, "step": 275430 }, { "epoch": 106.97, "learning_rate": 5.737734627831716e-06, "loss": 0.0235, "step": 275440 }, { "epoch": 106.97, "learning_rate": 5.737216828478965e-06, "loss": 0.0888, "step": 275450 }, { "epoch": 106.97, "learning_rate": 5.736699029126214e-06, "loss": 0.1199, "step": 275460 }, { "epoch": 106.98, "learning_rate": 5.736181229773463e-06, "loss": 0.0066, "step": 275470 }, { "epoch": 106.98, "learning_rate": 5.7356634304207125e-06, "loss": 0.0316, "step": 275480 }, { "epoch": 106.99, "learning_rate": 5.735145631067962e-06, "loss": 0.0001, "step": 275490 }, { "epoch": 106.99, "learning_rate": 5.73462783171521e-06, "loss": 0.0423, "step": 275500 }, { "epoch": 106.99, "learning_rate": 5.73411003236246e-06, "loss": 0.0198, "step": 275510 }, { "epoch": 107.0, "learning_rate": 5.733592233009709e-06, "loss": 0.1249, "step": 275520 }, { "epoch": 107.0, "eval_accuracy": 0.9491059147180193, "eval_loss": 0.3901929259300232, "eval_runtime": 8.2173, "eval_samples_per_second": 442.361, "eval_steps_per_second": 55.371, "step": 275525 }, { "epoch": 107.0, "learning_rate": 5.733074433656959e-06, "loss": 0.1031, "step": 275530 }, { "epoch": 107.01, "learning_rate": 5.732556634304207e-06, "loss": 0.0002, "step": 275540 }, { "epoch": 107.01, "learning_rate": 5.732038834951456e-06, "loss": 0.1034, "step": 275550 }, { "epoch": 107.01, "learning_rate": 5.731521035598706e-06, "loss": 0.0122, "step": 275560 }, { "epoch": 107.02, "learning_rate": 5.731003236245956e-06, "loss": 0.0983, "step": 275570 }, { "epoch": 107.02, "learning_rate": 5.7304854368932036e-06, "loss": 0.0972, "step": 275580 }, { "epoch": 107.03, "learning_rate": 5.729967637540453e-06, "loss": 0.06, "step": 275590 }, { "epoch": 107.03, "learning_rate": 5.729449838187703e-06, "loss": 0.0017, "step": 275600 }, { "epoch": 107.03, "learning_rate": 5.7289320388349524e-06, "loss": 0.0626, "step": 275610 }, { "epoch": 107.04, "learning_rate": 5.7284142394822e-06, "loss": 0.0025, "step": 275620 }, { "epoch": 107.04, "learning_rate": 5.72789644012945e-06, "loss": 0.0115, "step": 275630 }, { "epoch": 107.04, "learning_rate": 5.7273786407767e-06, "loss": 0.0127, "step": 275640 }, { "epoch": 107.05, "learning_rate": 5.726860841423949e-06, "loss": 0.0001, "step": 275650 }, { "epoch": 107.05, "learning_rate": 5.726343042071197e-06, "loss": 0.0775, "step": 275660 }, { "epoch": 107.06, "learning_rate": 5.725825242718447e-06, "loss": 0.0543, "step": 275670 }, { "epoch": 107.06, "learning_rate": 5.725307443365696e-06, "loss": 0.0988, "step": 275680 }, { "epoch": 107.06, "learning_rate": 5.724789644012946e-06, "loss": 0.0936, "step": 275690 }, { "epoch": 107.07, "learning_rate": 5.724271844660194e-06, "loss": 0.0099, "step": 275700 }, { "epoch": 107.07, "learning_rate": 5.7237540453074435e-06, "loss": 0.0219, "step": 275710 }, { "epoch": 107.08, "learning_rate": 5.723236245954693e-06, "loss": 0.0326, "step": 275720 }, { "epoch": 107.08, "learning_rate": 5.722718446601943e-06, "loss": 0.0602, "step": 275730 }, { "epoch": 107.08, "learning_rate": 5.722200647249191e-06, "loss": 0.0398, "step": 275740 }, { "epoch": 107.09, "learning_rate": 5.72168284789644e-06, "loss": 0.0368, "step": 275750 }, { "epoch": 107.09, "learning_rate": 5.72116504854369e-06, "loss": 0.1156, "step": 275760 }, { "epoch": 107.1, "learning_rate": 5.7206472491909395e-06, "loss": 0.044, "step": 275770 }, { "epoch": 107.1, "learning_rate": 5.7201294498381875e-06, "loss": 0.0361, "step": 275780 }, { "epoch": 107.1, "learning_rate": 5.719611650485437e-06, "loss": 0.0524, "step": 275790 }, { "epoch": 107.11, "learning_rate": 5.719093851132687e-06, "loss": 0.0783, "step": 275800 }, { "epoch": 107.11, "learning_rate": 5.718576051779936e-06, "loss": 0.0582, "step": 275810 }, { "epoch": 107.11, "learning_rate": 5.718058252427185e-06, "loss": 0.0853, "step": 275820 }, { "epoch": 107.12, "learning_rate": 5.717540453074434e-06, "loss": 0.0739, "step": 275830 }, { "epoch": 107.12, "learning_rate": 5.7170226537216835e-06, "loss": 0.1153, "step": 275840 }, { "epoch": 107.13, "learning_rate": 5.716504854368933e-06, "loss": 0.0516, "step": 275850 }, { "epoch": 107.13, "learning_rate": 5.715987055016182e-06, "loss": 0.0087, "step": 275860 }, { "epoch": 107.13, "learning_rate": 5.715469255663431e-06, "loss": 0.0213, "step": 275870 }, { "epoch": 107.14, "learning_rate": 5.71495145631068e-06, "loss": 0.0374, "step": 275880 }, { "epoch": 107.14, "learning_rate": 5.71443365695793e-06, "loss": 0.0565, "step": 275890 }, { "epoch": 107.15, "learning_rate": 5.713915857605179e-06, "loss": 0.0995, "step": 275900 }, { "epoch": 107.15, "learning_rate": 5.713398058252427e-06, "loss": 0.0172, "step": 275910 }, { "epoch": 107.15, "learning_rate": 5.712880258899677e-06, "loss": 0.0748, "step": 275920 }, { "epoch": 107.16, "learning_rate": 5.712362459546927e-06, "loss": 0.0593, "step": 275930 }, { "epoch": 107.16, "learning_rate": 5.711844660194175e-06, "loss": 0.1275, "step": 275940 }, { "epoch": 107.17, "learning_rate": 5.711326860841424e-06, "loss": 0.077, "step": 275950 }, { "epoch": 107.17, "learning_rate": 5.710809061488674e-06, "loss": 0.0153, "step": 275960 }, { "epoch": 107.17, "learning_rate": 5.710291262135923e-06, "loss": 0.0007, "step": 275970 }, { "epoch": 107.18, "learning_rate": 5.709773462783172e-06, "loss": 0.047, "step": 275980 }, { "epoch": 107.18, "learning_rate": 5.709255663430421e-06, "loss": 0.1218, "step": 275990 }, { "epoch": 107.18, "learning_rate": 5.708737864077671e-06, "loss": 0.0706, "step": 276000 }, { "epoch": 107.19, "learning_rate": 5.708220064724919e-06, "loss": 0.0164, "step": 276010 }, { "epoch": 107.19, "learning_rate": 5.707702265372169e-06, "loss": 0.1985, "step": 276020 }, { "epoch": 107.2, "learning_rate": 5.707184466019418e-06, "loss": 0.0759, "step": 276030 }, { "epoch": 107.2, "learning_rate": 5.706666666666667e-06, "loss": 0.0004, "step": 276040 }, { "epoch": 107.2, "learning_rate": 5.706148867313916e-06, "loss": 0.0625, "step": 276050 }, { "epoch": 107.21, "learning_rate": 5.705631067961166e-06, "loss": 0.0443, "step": 276060 }, { "epoch": 107.21, "learning_rate": 5.7051132686084145e-06, "loss": 0.0085, "step": 276070 }, { "epoch": 107.22, "learning_rate": 5.704595469255664e-06, "loss": 0.0103, "step": 276080 }, { "epoch": 107.22, "learning_rate": 5.704077669902913e-06, "loss": 0.0721, "step": 276090 }, { "epoch": 107.22, "learning_rate": 5.7035598705501625e-06, "loss": 0.0015, "step": 276100 }, { "epoch": 107.23, "learning_rate": 5.703042071197411e-06, "loss": 0.2234, "step": 276110 }, { "epoch": 107.23, "learning_rate": 5.702524271844661e-06, "loss": 0.0001, "step": 276120 }, { "epoch": 107.23, "learning_rate": 5.70200647249191e-06, "loss": 0.0608, "step": 276130 }, { "epoch": 107.24, "learning_rate": 5.701488673139159e-06, "loss": 0.0651, "step": 276140 }, { "epoch": 107.24, "learning_rate": 5.700970873786408e-06, "loss": 0.1088, "step": 276150 }, { "epoch": 107.25, "learning_rate": 5.700453074433657e-06, "loss": 0.219, "step": 276160 }, { "epoch": 107.25, "learning_rate": 5.6999352750809065e-06, "loss": 0.0067, "step": 276170 }, { "epoch": 107.25, "learning_rate": 5.699417475728156e-06, "loss": 0.029, "step": 276180 }, { "epoch": 107.26, "learning_rate": 5.698899676375405e-06, "loss": 0.1035, "step": 276190 }, { "epoch": 107.26, "learning_rate": 5.698381877022654e-06, "loss": 0.0877, "step": 276200 }, { "epoch": 107.27, "learning_rate": 5.697864077669903e-06, "loss": 0.0328, "step": 276210 }, { "epoch": 107.27, "learning_rate": 5.697346278317153e-06, "loss": 0.0964, "step": 276220 }, { "epoch": 107.27, "learning_rate": 5.696828478964402e-06, "loss": 0.0526, "step": 276230 }, { "epoch": 107.28, "learning_rate": 5.69631067961165e-06, "loss": 0.0621, "step": 276240 }, { "epoch": 107.28, "learning_rate": 5.6957928802589e-06, "loss": 0.1284, "step": 276250 }, { "epoch": 107.29, "learning_rate": 5.69527508090615e-06, "loss": 0.0765, "step": 276260 }, { "epoch": 107.29, "learning_rate": 5.694757281553398e-06, "loss": 0.0132, "step": 276270 }, { "epoch": 107.29, "learning_rate": 5.694239482200647e-06, "loss": 0.0729, "step": 276280 }, { "epoch": 107.3, "learning_rate": 5.693721682847897e-06, "loss": 0.0313, "step": 276290 }, { "epoch": 107.3, "learning_rate": 5.6932038834951464e-06, "loss": 0.0072, "step": 276300 }, { "epoch": 107.3, "learning_rate": 5.692686084142394e-06, "loss": 0.0125, "step": 276310 }, { "epoch": 107.31, "learning_rate": 5.692168284789644e-06, "loss": 0.1654, "step": 276320 }, { "epoch": 107.31, "learning_rate": 5.691650485436894e-06, "loss": 0.1318, "step": 276330 }, { "epoch": 107.32, "learning_rate": 5.691132686084143e-06, "loss": 0.1228, "step": 276340 }, { "epoch": 107.32, "learning_rate": 5.690614886731391e-06, "loss": 0.0951, "step": 276350 }, { "epoch": 107.32, "learning_rate": 5.690097087378641e-06, "loss": 0.0165, "step": 276360 }, { "epoch": 107.33, "learning_rate": 5.68957928802589e-06, "loss": 0.0059, "step": 276370 }, { "epoch": 107.33, "learning_rate": 5.68906148867314e-06, "loss": 0.0078, "step": 276380 }, { "epoch": 107.34, "learning_rate": 5.68854368932039e-06, "loss": 0.0306, "step": 276390 }, { "epoch": 107.34, "learning_rate": 5.6880258899676375e-06, "loss": 0.0032, "step": 276400 }, { "epoch": 107.34, "learning_rate": 5.687508090614887e-06, "loss": 0.0805, "step": 276410 }, { "epoch": 107.35, "learning_rate": 5.686990291262137e-06, "loss": 0.0727, "step": 276420 }, { "epoch": 107.35, "learning_rate": 5.686472491909386e-06, "loss": 0.0218, "step": 276430 }, { "epoch": 107.36, "learning_rate": 5.685954692556634e-06, "loss": 0.0795, "step": 276440 }, { "epoch": 107.36, "learning_rate": 5.685436893203884e-06, "loss": 0.0482, "step": 276450 }, { "epoch": 107.36, "learning_rate": 5.6849190938511335e-06, "loss": 0.0965, "step": 276460 }, { "epoch": 107.37, "learning_rate": 5.684401294498383e-06, "loss": 0.0416, "step": 276470 }, { "epoch": 107.37, "learning_rate": 5.683883495145631e-06, "loss": 0.1095, "step": 276480 }, { "epoch": 107.37, "learning_rate": 5.683365695792881e-06, "loss": 0.0424, "step": 276490 }, { "epoch": 107.38, "learning_rate": 5.68284789644013e-06, "loss": 0.0561, "step": 276500 }, { "epoch": 107.38, "learning_rate": 5.68233009708738e-06, "loss": 0.0197, "step": 276510 }, { "epoch": 107.39, "learning_rate": 5.681812297734628e-06, "loss": 0.0268, "step": 276520 }, { "epoch": 107.39, "learning_rate": 5.6812944983818775e-06, "loss": 0.0159, "step": 276530 }, { "epoch": 107.39, "learning_rate": 5.680776699029127e-06, "loss": 0.0104, "step": 276540 }, { "epoch": 107.4, "learning_rate": 5.680258899676377e-06, "loss": 0.086, "step": 276550 }, { "epoch": 107.4, "learning_rate": 5.679741100323625e-06, "loss": 0.0143, "step": 276560 }, { "epoch": 107.41, "learning_rate": 5.679223300970874e-06, "loss": 0.0318, "step": 276570 }, { "epoch": 107.41, "learning_rate": 5.678705501618124e-06, "loss": 0.0745, "step": 276580 }, { "epoch": 107.41, "learning_rate": 5.6781877022653735e-06, "loss": 0.1146, "step": 276590 }, { "epoch": 107.42, "learning_rate": 5.677669902912621e-06, "loss": 0.0099, "step": 276600 }, { "epoch": 107.42, "learning_rate": 5.677152103559871e-06, "loss": 0.0993, "step": 276610 }, { "epoch": 107.43, "learning_rate": 5.676634304207121e-06, "loss": 0.1012, "step": 276620 }, { "epoch": 107.43, "learning_rate": 5.676116504854369e-06, "loss": 0.1315, "step": 276630 }, { "epoch": 107.43, "learning_rate": 5.675598705501618e-06, "loss": 0.0008, "step": 276640 }, { "epoch": 107.44, "learning_rate": 5.675080906148868e-06, "loss": 0.1136, "step": 276650 }, { "epoch": 107.44, "learning_rate": 5.6745631067961174e-06, "loss": 0.114, "step": 276660 }, { "epoch": 107.44, "learning_rate": 5.674045307443366e-06, "loss": 0.041, "step": 276670 }, { "epoch": 107.45, "learning_rate": 5.673527508090615e-06, "loss": 0.1168, "step": 276680 }, { "epoch": 107.45, "learning_rate": 5.673009708737865e-06, "loss": 0.098, "step": 276690 }, { "epoch": 107.46, "learning_rate": 5.672491909385114e-06, "loss": 0.1129, "step": 276700 }, { "epoch": 107.46, "learning_rate": 5.671974110032363e-06, "loss": 0.0003, "step": 276710 }, { "epoch": 107.46, "learning_rate": 5.671456310679612e-06, "loss": 0.0316, "step": 276720 }, { "epoch": 107.47, "learning_rate": 5.670938511326861e-06, "loss": 0.0149, "step": 276730 }, { "epoch": 107.47, "learning_rate": 5.670420711974111e-06, "loss": 0.0139, "step": 276740 }, { "epoch": 107.48, "learning_rate": 5.66990291262136e-06, "loss": 0.0054, "step": 276750 }, { "epoch": 107.48, "learning_rate": 5.6693851132686085e-06, "loss": 0.0363, "step": 276760 }, { "epoch": 107.48, "learning_rate": 5.668867313915858e-06, "loss": 0.0847, "step": 276770 }, { "epoch": 107.49, "learning_rate": 5.668349514563107e-06, "loss": 0.059, "step": 276780 }, { "epoch": 107.49, "learning_rate": 5.6678317152103565e-06, "loss": 0.0449, "step": 276790 }, { "epoch": 107.5, "learning_rate": 5.667313915857605e-06, "loss": 0.0992, "step": 276800 }, { "epoch": 107.5, "learning_rate": 5.666796116504855e-06, "loss": 0.1517, "step": 276810 }, { "epoch": 107.5, "learning_rate": 5.666278317152104e-06, "loss": 0.0948, "step": 276820 }, { "epoch": 107.51, "learning_rate": 5.665760517799353e-06, "loss": 0.0986, "step": 276830 }, { "epoch": 107.51, "learning_rate": 5.665242718446602e-06, "loss": 0.0343, "step": 276840 }, { "epoch": 107.51, "learning_rate": 5.664724919093852e-06, "loss": 0.0274, "step": 276850 }, { "epoch": 107.52, "learning_rate": 5.6642071197411005e-06, "loss": 0.0304, "step": 276860 }, { "epoch": 107.52, "learning_rate": 5.66368932038835e-06, "loss": 0.0955, "step": 276870 }, { "epoch": 107.53, "learning_rate": 5.663171521035599e-06, "loss": 0.0263, "step": 276880 }, { "epoch": 107.53, "learning_rate": 5.6626537216828485e-06, "loss": 0.0271, "step": 276890 }, { "epoch": 107.53, "learning_rate": 5.662135922330097e-06, "loss": 0.048, "step": 276900 }, { "epoch": 107.54, "learning_rate": 5.661618122977347e-06, "loss": 0.0344, "step": 276910 }, { "epoch": 107.54, "learning_rate": 5.661100323624596e-06, "loss": 0.0157, "step": 276920 }, { "epoch": 107.55, "learning_rate": 5.660582524271845e-06, "loss": 0.0006, "step": 276930 }, { "epoch": 107.55, "learning_rate": 5.660064724919094e-06, "loss": 0.1053, "step": 276940 }, { "epoch": 107.55, "learning_rate": 5.659546925566344e-06, "loss": 0.071, "step": 276950 }, { "epoch": 107.56, "learning_rate": 5.659029126213593e-06, "loss": 0.0201, "step": 276960 }, { "epoch": 107.56, "learning_rate": 5.658511326860841e-06, "loss": 0.0412, "step": 276970 }, { "epoch": 107.57, "learning_rate": 5.657993527508091e-06, "loss": 0.0136, "step": 276980 }, { "epoch": 107.57, "learning_rate": 5.6574757281553404e-06, "loss": 0.0251, "step": 276990 }, { "epoch": 107.57, "learning_rate": 5.65695792880259e-06, "loss": 0.0829, "step": 277000 }, { "epoch": 107.58, "learning_rate": 5.656440129449838e-06, "loss": 0.0422, "step": 277010 }, { "epoch": 107.58, "learning_rate": 5.655922330097088e-06, "loss": 0.0376, "step": 277020 }, { "epoch": 107.58, "learning_rate": 5.655404530744337e-06, "loss": 0.0142, "step": 277030 }, { "epoch": 107.59, "learning_rate": 5.654886731391587e-06, "loss": 0.0481, "step": 277040 }, { "epoch": 107.59, "learning_rate": 5.654368932038835e-06, "loss": 0.0032, "step": 277050 }, { "epoch": 107.6, "learning_rate": 5.653851132686084e-06, "loss": 0.06, "step": 277060 }, { "epoch": 107.6, "learning_rate": 5.653333333333334e-06, "loss": 0.0754, "step": 277070 }, { "epoch": 107.6, "learning_rate": 5.652815533980584e-06, "loss": 0.0003, "step": 277080 }, { "epoch": 107.61, "learning_rate": 5.6522977346278315e-06, "loss": 0.0099, "step": 277090 }, { "epoch": 107.61, "learning_rate": 5.651779935275081e-06, "loss": 0.0528, "step": 277100 }, { "epoch": 107.62, "learning_rate": 5.651262135922331e-06, "loss": 0.0074, "step": 277110 }, { "epoch": 107.62, "learning_rate": 5.65074433656958e-06, "loss": 0.0393, "step": 277120 }, { "epoch": 107.62, "learning_rate": 5.650226537216828e-06, "loss": 0.1259, "step": 277130 }, { "epoch": 107.63, "learning_rate": 5.649708737864078e-06, "loss": 0.0811, "step": 277140 }, { "epoch": 107.63, "learning_rate": 5.6491909385113275e-06, "loss": 0.201, "step": 277150 }, { "epoch": 107.63, "learning_rate": 5.648673139158577e-06, "loss": 0.1425, "step": 277160 }, { "epoch": 107.64, "learning_rate": 5.648155339805825e-06, "loss": 0.0097, "step": 277170 }, { "epoch": 107.64, "learning_rate": 5.647637540453075e-06, "loss": 0.0155, "step": 277180 }, { "epoch": 107.65, "learning_rate": 5.647119741100324e-06, "loss": 0.0714, "step": 277190 }, { "epoch": 107.65, "learning_rate": 5.646601941747574e-06, "loss": 0.0341, "step": 277200 }, { "epoch": 107.65, "learning_rate": 5.646084142394822e-06, "loss": 0.0086, "step": 277210 }, { "epoch": 107.66, "learning_rate": 5.6455663430420715e-06, "loss": 0.0325, "step": 277220 }, { "epoch": 107.66, "learning_rate": 5.645048543689321e-06, "loss": 0.1432, "step": 277230 }, { "epoch": 107.67, "learning_rate": 5.644530744336571e-06, "loss": 0.041, "step": 277240 }, { "epoch": 107.67, "learning_rate": 5.644012944983819e-06, "loss": 0.1665, "step": 277250 }, { "epoch": 107.67, "learning_rate": 5.643495145631068e-06, "loss": 0.0439, "step": 277260 }, { "epoch": 107.68, "learning_rate": 5.642977346278318e-06, "loss": 0.0683, "step": 277270 }, { "epoch": 107.68, "learning_rate": 5.6424595469255675e-06, "loss": 0.0277, "step": 277280 }, { "epoch": 107.69, "learning_rate": 5.641941747572815e-06, "loss": 0.0292, "step": 277290 }, { "epoch": 107.69, "learning_rate": 5.641423948220065e-06, "loss": 0.0226, "step": 277300 }, { "epoch": 107.69, "learning_rate": 5.640906148867315e-06, "loss": 0.0232, "step": 277310 }, { "epoch": 107.7, "learning_rate": 5.640388349514564e-06, "loss": 0.0602, "step": 277320 }, { "epoch": 107.7, "learning_rate": 5.639870550161812e-06, "loss": 0.0462, "step": 277330 }, { "epoch": 107.7, "learning_rate": 5.639352750809062e-06, "loss": 0.0719, "step": 277340 }, { "epoch": 107.71, "learning_rate": 5.6388349514563114e-06, "loss": 0.1397, "step": 277350 }, { "epoch": 107.71, "learning_rate": 5.638317152103561e-06, "loss": 0.0833, "step": 277360 }, { "epoch": 107.72, "learning_rate": 5.637799352750809e-06, "loss": 0.0243, "step": 277370 }, { "epoch": 107.72, "learning_rate": 5.637281553398059e-06, "loss": 0.1004, "step": 277380 }, { "epoch": 107.72, "learning_rate": 5.636763754045308e-06, "loss": 0.0097, "step": 277390 }, { "epoch": 107.73, "learning_rate": 5.636245954692558e-06, "loss": 0.033, "step": 277400 }, { "epoch": 107.73, "learning_rate": 5.635728155339806e-06, "loss": 0.0384, "step": 277410 }, { "epoch": 107.74, "learning_rate": 5.635210355987055e-06, "loss": 0.0001, "step": 277420 }, { "epoch": 107.74, "learning_rate": 5.634692556634305e-06, "loss": 0.0797, "step": 277430 }, { "epoch": 107.74, "learning_rate": 5.634174757281554e-06, "loss": 0.0223, "step": 277440 }, { "epoch": 107.75, "learning_rate": 5.6336569579288025e-06, "loss": 0.0571, "step": 277450 }, { "epoch": 107.75, "learning_rate": 5.633139158576052e-06, "loss": 0.0223, "step": 277460 }, { "epoch": 107.76, "learning_rate": 5.632621359223302e-06, "loss": 0.0219, "step": 277470 }, { "epoch": 107.76, "learning_rate": 5.6321035598705505e-06, "loss": 0.0005, "step": 277480 }, { "epoch": 107.76, "learning_rate": 5.631585760517799e-06, "loss": 0.0884, "step": 277490 }, { "epoch": 107.77, "learning_rate": 5.631067961165049e-06, "loss": 0.0001, "step": 277500 }, { "epoch": 107.77, "learning_rate": 5.6305501618122985e-06, "loss": 0.0372, "step": 277510 }, { "epoch": 107.77, "learning_rate": 5.630032362459547e-06, "loss": 0.0667, "step": 277520 }, { "epoch": 107.78, "learning_rate": 5.629514563106797e-06, "loss": 0.0417, "step": 277530 }, { "epoch": 107.78, "learning_rate": 5.628996763754046e-06, "loss": 0.0141, "step": 277540 }, { "epoch": 107.79, "learning_rate": 5.628478964401295e-06, "loss": 0.1067, "step": 277550 }, { "epoch": 107.79, "learning_rate": 5.627961165048544e-06, "loss": 0.0138, "step": 277560 }, { "epoch": 107.79, "learning_rate": 5.627443365695794e-06, "loss": 0.0059, "step": 277570 }, { "epoch": 107.8, "learning_rate": 5.6269255663430425e-06, "loss": 0.0781, "step": 277580 }, { "epoch": 107.8, "learning_rate": 5.626407766990292e-06, "loss": 0.0417, "step": 277590 }, { "epoch": 107.81, "learning_rate": 5.625889967637541e-06, "loss": 0.0474, "step": 277600 }, { "epoch": 107.81, "learning_rate": 5.6253721682847905e-06, "loss": 0.1032, "step": 277610 }, { "epoch": 107.81, "learning_rate": 5.624854368932039e-06, "loss": 0.0088, "step": 277620 }, { "epoch": 107.82, "learning_rate": 5.624336569579288e-06, "loss": 0.0123, "step": 277630 }, { "epoch": 107.82, "learning_rate": 5.623818770226538e-06, "loss": 0.034, "step": 277640 }, { "epoch": 107.83, "learning_rate": 5.623300970873787e-06, "loss": 0.0032, "step": 277650 }, { "epoch": 107.83, "learning_rate": 5.622783171521036e-06, "loss": 0.0338, "step": 277660 }, { "epoch": 107.83, "learning_rate": 5.622265372168285e-06, "loss": 0.1775, "step": 277670 }, { "epoch": 107.84, "learning_rate": 5.6217475728155344e-06, "loss": 0.0509, "step": 277680 }, { "epoch": 107.84, "learning_rate": 5.621229773462784e-06, "loss": 0.0002, "step": 277690 }, { "epoch": 107.84, "learning_rate": 5.620711974110033e-06, "loss": 0.1802, "step": 277700 }, { "epoch": 107.85, "learning_rate": 5.620194174757282e-06, "loss": 0.0453, "step": 277710 }, { "epoch": 107.85, "learning_rate": 5.619676375404531e-06, "loss": 0.0685, "step": 277720 }, { "epoch": 107.86, "learning_rate": 5.619158576051781e-06, "loss": 0.0058, "step": 277730 }, { "epoch": 107.86, "learning_rate": 5.61864077669903e-06, "loss": 0.068, "step": 277740 }, { "epoch": 107.86, "learning_rate": 5.618122977346278e-06, "loss": 0.0527, "step": 277750 }, { "epoch": 107.87, "learning_rate": 5.617605177993528e-06, "loss": 0.001, "step": 277760 }, { "epoch": 107.87, "learning_rate": 5.617087378640778e-06, "loss": 0.0245, "step": 277770 }, { "epoch": 107.88, "learning_rate": 5.6165695792880255e-06, "loss": 0.0836, "step": 277780 }, { "epoch": 107.88, "learning_rate": 5.616051779935275e-06, "loss": 0.047, "step": 277790 }, { "epoch": 107.88, "learning_rate": 5.615533980582525e-06, "loss": 0.0708, "step": 277800 }, { "epoch": 107.89, "learning_rate": 5.615016181229774e-06, "loss": 0.1361, "step": 277810 }, { "epoch": 107.89, "learning_rate": 5.614498381877022e-06, "loss": 0.0756, "step": 277820 }, { "epoch": 107.9, "learning_rate": 5.613980582524272e-06, "loss": 0.0389, "step": 277830 }, { "epoch": 107.9, "learning_rate": 5.6134627831715215e-06, "loss": 0.0359, "step": 277840 }, { "epoch": 107.9, "learning_rate": 5.612944983818771e-06, "loss": 0.0006, "step": 277850 }, { "epoch": 107.91, "learning_rate": 5.612427184466019e-06, "loss": 0.0663, "step": 277860 }, { "epoch": 107.91, "learning_rate": 5.611909385113269e-06, "loss": 0.0005, "step": 277870 }, { "epoch": 107.91, "learning_rate": 5.611391585760518e-06, "loss": 0.0024, "step": 277880 }, { "epoch": 107.92, "learning_rate": 5.610873786407768e-06, "loss": 0.0147, "step": 277890 }, { "epoch": 107.92, "learning_rate": 5.610355987055016e-06, "loss": 0.0155, "step": 277900 }, { "epoch": 107.93, "learning_rate": 5.6098381877022655e-06, "loss": 0.0794, "step": 277910 }, { "epoch": 107.93, "learning_rate": 5.609320388349515e-06, "loss": 0.1, "step": 277920 }, { "epoch": 107.93, "learning_rate": 5.608802588996765e-06, "loss": 0.0197, "step": 277930 }, { "epoch": 107.94, "learning_rate": 5.608284789644013e-06, "loss": 0.0745, "step": 277940 }, { "epoch": 107.94, "learning_rate": 5.607766990291262e-06, "loss": 0.0975, "step": 277950 }, { "epoch": 107.95, "learning_rate": 5.607249190938512e-06, "loss": 0.1889, "step": 277960 }, { "epoch": 107.95, "learning_rate": 5.6067313915857615e-06, "loss": 0.0482, "step": 277970 }, { "epoch": 107.95, "learning_rate": 5.606213592233009e-06, "loss": 0.0367, "step": 277980 }, { "epoch": 107.96, "learning_rate": 5.605695792880259e-06, "loss": 0.0176, "step": 277990 }, { "epoch": 107.96, "learning_rate": 5.605177993527509e-06, "loss": 0.0406, "step": 278000 }, { "epoch": 107.97, "learning_rate": 5.604660194174758e-06, "loss": 0.1117, "step": 278010 }, { "epoch": 107.97, "learning_rate": 5.604142394822006e-06, "loss": 0.014, "step": 278020 }, { "epoch": 107.97, "learning_rate": 5.603624595469256e-06, "loss": 0.2085, "step": 278030 }, { "epoch": 107.98, "learning_rate": 5.6031067961165054e-06, "loss": 0.0758, "step": 278040 }, { "epoch": 107.98, "learning_rate": 5.602588996763755e-06, "loss": 0.085, "step": 278050 }, { "epoch": 107.98, "learning_rate": 5.602071197411005e-06, "loss": 0.0017, "step": 278060 }, { "epoch": 107.99, "learning_rate": 5.601553398058253e-06, "loss": 0.1027, "step": 278070 }, { "epoch": 107.99, "learning_rate": 5.601035598705502e-06, "loss": 0.0336, "step": 278080 }, { "epoch": 108.0, "learning_rate": 5.600517799352752e-06, "loss": 0.0552, "step": 278090 }, { "epoch": 108.0, "learning_rate": 5.600000000000001e-06, "loss": 0.0333, "step": 278100 }, { "epoch": 108.0, "eval_accuracy": 0.9515818431911967, "eval_loss": 0.3637031316757202, "eval_runtime": 8.2062, "eval_samples_per_second": 442.96, "eval_steps_per_second": 55.446, "step": 278100 }, { "epoch": 108.0, "learning_rate": 5.599482200647249e-06, "loss": 0.0526, "step": 278110 }, { "epoch": 108.01, "learning_rate": 5.598964401294499e-06, "loss": 0.0057, "step": 278120 }, { "epoch": 108.01, "learning_rate": 5.598446601941749e-06, "loss": 0.0653, "step": 278130 }, { "epoch": 108.02, "learning_rate": 5.597928802588997e-06, "loss": 0.1102, "step": 278140 }, { "epoch": 108.02, "learning_rate": 5.597411003236246e-06, "loss": 0.1764, "step": 278150 }, { "epoch": 108.02, "learning_rate": 5.596893203883496e-06, "loss": 0.0548, "step": 278160 }, { "epoch": 108.03, "learning_rate": 5.596375404530745e-06, "loss": 0.0283, "step": 278170 }, { "epoch": 108.03, "learning_rate": 5.595857605177994e-06, "loss": 0.0037, "step": 278180 }, { "epoch": 108.03, "learning_rate": 5.595339805825243e-06, "loss": 0.0899, "step": 278190 }, { "epoch": 108.04, "learning_rate": 5.5948220064724925e-06, "loss": 0.0559, "step": 278200 }, { "epoch": 108.04, "learning_rate": 5.594304207119742e-06, "loss": 0.0007, "step": 278210 }, { "epoch": 108.05, "learning_rate": 5.593786407766991e-06, "loss": 0.0999, "step": 278220 }, { "epoch": 108.05, "learning_rate": 5.59326860841424e-06, "loss": 0.0477, "step": 278230 }, { "epoch": 108.05, "learning_rate": 5.592750809061489e-06, "loss": 0.0012, "step": 278240 }, { "epoch": 108.06, "learning_rate": 5.592233009708738e-06, "loss": 0.0673, "step": 278250 }, { "epoch": 108.06, "learning_rate": 5.591715210355988e-06, "loss": 0.001, "step": 278260 }, { "epoch": 108.07, "learning_rate": 5.5911974110032365e-06, "loss": 0.0303, "step": 278270 }, { "epoch": 108.07, "learning_rate": 5.590679611650486e-06, "loss": 0.0592, "step": 278280 }, { "epoch": 108.07, "learning_rate": 5.590161812297735e-06, "loss": 0.0001, "step": 278290 }, { "epoch": 108.08, "learning_rate": 5.5896440129449845e-06, "loss": 0.0358, "step": 278300 }, { "epoch": 108.08, "learning_rate": 5.589126213592233e-06, "loss": 0.0675, "step": 278310 }, { "epoch": 108.09, "learning_rate": 5.588608414239483e-06, "loss": 0.022, "step": 278320 }, { "epoch": 108.09, "learning_rate": 5.588090614886732e-06, "loss": 0.0866, "step": 278330 }, { "epoch": 108.09, "learning_rate": 5.587572815533981e-06, "loss": 0.0641, "step": 278340 }, { "epoch": 108.1, "learning_rate": 5.58705501618123e-06, "loss": 0.0385, "step": 278350 }, { "epoch": 108.1, "learning_rate": 5.58653721682848e-06, "loss": 0.042, "step": 278360 }, { "epoch": 108.1, "learning_rate": 5.586019417475728e-06, "loss": 0.0284, "step": 278370 }, { "epoch": 108.11, "learning_rate": 5.585501618122978e-06, "loss": 0.0183, "step": 278380 }, { "epoch": 108.11, "learning_rate": 5.584983818770227e-06, "loss": 0.0627, "step": 278390 }, { "epoch": 108.12, "learning_rate": 5.5844660194174764e-06, "loss": 0.0629, "step": 278400 }, { "epoch": 108.12, "learning_rate": 5.583948220064725e-06, "loss": 0.1208, "step": 278410 }, { "epoch": 108.12, "learning_rate": 5.583430420711975e-06, "loss": 0.0871, "step": 278420 }, { "epoch": 108.13, "learning_rate": 5.582912621359224e-06, "loss": 0.0002, "step": 278430 }, { "epoch": 108.13, "learning_rate": 5.582394822006472e-06, "loss": 0.0272, "step": 278440 }, { "epoch": 108.14, "learning_rate": 5.581877022653722e-06, "loss": 0.0598, "step": 278450 }, { "epoch": 108.14, "learning_rate": 5.581359223300972e-06, "loss": 0.0915, "step": 278460 }, { "epoch": 108.14, "learning_rate": 5.58084142394822e-06, "loss": 0.1158, "step": 278470 }, { "epoch": 108.15, "learning_rate": 5.580323624595469e-06, "loss": 0.037, "step": 278480 }, { "epoch": 108.15, "learning_rate": 5.579805825242719e-06, "loss": 0.0733, "step": 278490 }, { "epoch": 108.16, "learning_rate": 5.579288025889968e-06, "loss": 0.0008, "step": 278500 }, { "epoch": 108.16, "learning_rate": 5.578770226537217e-06, "loss": 0.0126, "step": 278510 }, { "epoch": 108.16, "learning_rate": 5.578252427184466e-06, "loss": 0.0304, "step": 278520 }, { "epoch": 108.17, "learning_rate": 5.5777346278317155e-06, "loss": 0.0022, "step": 278530 }, { "epoch": 108.17, "learning_rate": 5.577216828478965e-06, "loss": 0.0906, "step": 278540 }, { "epoch": 108.17, "learning_rate": 5.576699029126214e-06, "loss": 0.0607, "step": 278550 }, { "epoch": 108.18, "learning_rate": 5.576181229773463e-06, "loss": 0.101, "step": 278560 }, { "epoch": 108.18, "learning_rate": 5.575663430420712e-06, "loss": 0.0507, "step": 278570 }, { "epoch": 108.19, "learning_rate": 5.575145631067962e-06, "loss": 0.0013, "step": 278580 }, { "epoch": 108.19, "learning_rate": 5.57462783171521e-06, "loss": 0.14, "step": 278590 }, { "epoch": 108.19, "learning_rate": 5.5741100323624595e-06, "loss": 0.0035, "step": 278600 }, { "epoch": 108.2, "learning_rate": 5.573592233009709e-06, "loss": 0.0579, "step": 278610 }, { "epoch": 108.2, "learning_rate": 5.573074433656959e-06, "loss": 0.1343, "step": 278620 }, { "epoch": 108.21, "learning_rate": 5.572556634304208e-06, "loss": 0.0951, "step": 278630 }, { "epoch": 108.21, "learning_rate": 5.572038834951456e-06, "loss": 0.0058, "step": 278640 }, { "epoch": 108.21, "learning_rate": 5.571521035598706e-06, "loss": 0.0699, "step": 278650 }, { "epoch": 108.22, "learning_rate": 5.5710032362459555e-06, "loss": 0.009, "step": 278660 }, { "epoch": 108.22, "learning_rate": 5.570485436893205e-06, "loss": 0.0445, "step": 278670 }, { "epoch": 108.23, "learning_rate": 5.569967637540453e-06, "loss": 0.1145, "step": 278680 }, { "epoch": 108.23, "learning_rate": 5.569449838187703e-06, "loss": 0.0609, "step": 278690 }, { "epoch": 108.23, "learning_rate": 5.568932038834952e-06, "loss": 0.0561, "step": 278700 }, { "epoch": 108.24, "learning_rate": 5.568414239482202e-06, "loss": 0.1192, "step": 278710 }, { "epoch": 108.24, "learning_rate": 5.56789644012945e-06, "loss": 0.0007, "step": 278720 }, { "epoch": 108.24, "learning_rate": 5.5673786407766994e-06, "loss": 0.0233, "step": 278730 }, { "epoch": 108.25, "learning_rate": 5.566860841423949e-06, "loss": 0.0729, "step": 278740 }, { "epoch": 108.25, "learning_rate": 5.566343042071199e-06, "loss": 0.003, "step": 278750 }, { "epoch": 108.26, "learning_rate": 5.565825242718447e-06, "loss": 0.0328, "step": 278760 }, { "epoch": 108.26, "learning_rate": 5.565307443365696e-06, "loss": 0.0302, "step": 278770 }, { "epoch": 108.26, "learning_rate": 5.564789644012946e-06, "loss": 0.0388, "step": 278780 }, { "epoch": 108.27, "learning_rate": 5.5642718446601954e-06, "loss": 0.0003, "step": 278790 }, { "epoch": 108.27, "learning_rate": 5.563754045307443e-06, "loss": 0.1385, "step": 278800 }, { "epoch": 108.28, "learning_rate": 5.563236245954693e-06, "loss": 0.0084, "step": 278810 }, { "epoch": 108.28, "learning_rate": 5.562718446601943e-06, "loss": 0.0888, "step": 278820 }, { "epoch": 108.28, "learning_rate": 5.562200647249192e-06, "loss": 0.0464, "step": 278830 }, { "epoch": 108.29, "learning_rate": 5.56168284789644e-06, "loss": 0.0477, "step": 278840 }, { "epoch": 108.29, "learning_rate": 5.56116504854369e-06, "loss": 0.0017, "step": 278850 }, { "epoch": 108.3, "learning_rate": 5.560647249190939e-06, "loss": 0.1876, "step": 278860 }, { "epoch": 108.3, "learning_rate": 5.560129449838189e-06, "loss": 0.0828, "step": 278870 }, { "epoch": 108.3, "learning_rate": 5.559611650485437e-06, "loss": 0.0214, "step": 278880 }, { "epoch": 108.31, "learning_rate": 5.5590938511326865e-06, "loss": 0.0035, "step": 278890 }, { "epoch": 108.31, "learning_rate": 5.558576051779936e-06, "loss": 0.0157, "step": 278900 }, { "epoch": 108.31, "learning_rate": 5.558058252427185e-06, "loss": 0.0581, "step": 278910 }, { "epoch": 108.32, "learning_rate": 5.557540453074434e-06, "loss": 0.0659, "step": 278920 }, { "epoch": 108.32, "learning_rate": 5.557022653721683e-06, "loss": 0.0085, "step": 278930 }, { "epoch": 108.33, "learning_rate": 5.556504854368933e-06, "loss": 0.072, "step": 278940 }, { "epoch": 108.33, "learning_rate": 5.555987055016182e-06, "loss": 0.0018, "step": 278950 }, { "epoch": 108.33, "learning_rate": 5.5554692556634305e-06, "loss": 0.0369, "step": 278960 }, { "epoch": 108.34, "learning_rate": 5.55495145631068e-06, "loss": 0.0108, "step": 278970 }, { "epoch": 108.34, "learning_rate": 5.55443365695793e-06, "loss": 0.035, "step": 278980 }, { "epoch": 108.35, "learning_rate": 5.5539158576051785e-06, "loss": 0.0323, "step": 278990 }, { "epoch": 108.35, "learning_rate": 5.553398058252427e-06, "loss": 0.113, "step": 279000 }, { "epoch": 108.35, "learning_rate": 5.552880258899677e-06, "loss": 0.0266, "step": 279010 }, { "epoch": 108.36, "learning_rate": 5.5523624595469265e-06, "loss": 0.1914, "step": 279020 }, { "epoch": 108.36, "learning_rate": 5.551844660194175e-06, "loss": 0.044, "step": 279030 }, { "epoch": 108.37, "learning_rate": 5.551326860841424e-06, "loss": 0.0655, "step": 279040 }, { "epoch": 108.37, "learning_rate": 5.550809061488674e-06, "loss": 0.0674, "step": 279050 }, { "epoch": 108.37, "learning_rate": 5.550291262135922e-06, "loss": 0.0369, "step": 279060 }, { "epoch": 108.38, "learning_rate": 5.549773462783172e-06, "loss": 0.0595, "step": 279070 }, { "epoch": 108.38, "learning_rate": 5.549255663430421e-06, "loss": 0.0087, "step": 279080 }, { "epoch": 108.38, "learning_rate": 5.5487378640776704e-06, "loss": 0.249, "step": 279090 }, { "epoch": 108.39, "learning_rate": 5.548220064724919e-06, "loss": 0.0308, "step": 279100 }, { "epoch": 108.39, "learning_rate": 5.547702265372169e-06, "loss": 0.0633, "step": 279110 }, { "epoch": 108.4, "learning_rate": 5.547184466019418e-06, "loss": 0.0747, "step": 279120 }, { "epoch": 108.4, "learning_rate": 5.546666666666667e-06, "loss": 0.0003, "step": 279130 }, { "epoch": 108.4, "learning_rate": 5.546148867313916e-06, "loss": 0.0101, "step": 279140 }, { "epoch": 108.41, "learning_rate": 5.545631067961166e-06, "loss": 0.0192, "step": 279150 }, { "epoch": 108.41, "learning_rate": 5.545113268608414e-06, "loss": 0.0651, "step": 279160 }, { "epoch": 108.42, "learning_rate": 5.544595469255664e-06, "loss": 0.0374, "step": 279170 }, { "epoch": 108.42, "learning_rate": 5.544077669902913e-06, "loss": 0.2018, "step": 279180 }, { "epoch": 108.42, "learning_rate": 5.543559870550162e-06, "loss": 0.0151, "step": 279190 }, { "epoch": 108.43, "learning_rate": 5.543042071197412e-06, "loss": 0.0246, "step": 279200 }, { "epoch": 108.43, "learning_rate": 5.542524271844661e-06, "loss": 0.0001, "step": 279210 }, { "epoch": 108.43, "learning_rate": 5.5420064724919095e-06, "loss": 0.0153, "step": 279220 }, { "epoch": 108.44, "learning_rate": 5.541488673139159e-06, "loss": 0.0098, "step": 279230 }, { "epoch": 108.44, "learning_rate": 5.540970873786409e-06, "loss": 0.1474, "step": 279240 }, { "epoch": 108.45, "learning_rate": 5.540453074433657e-06, "loss": 0.0603, "step": 279250 }, { "epoch": 108.45, "learning_rate": 5.539935275080906e-06, "loss": 0.0165, "step": 279260 }, { "epoch": 108.45, "learning_rate": 5.539417475728156e-06, "loss": 0.0678, "step": 279270 }, { "epoch": 108.46, "learning_rate": 5.5388996763754055e-06, "loss": 0.0072, "step": 279280 }, { "epoch": 108.46, "learning_rate": 5.5383818770226535e-06, "loss": 0.0347, "step": 279290 }, { "epoch": 108.47, "learning_rate": 5.537864077669903e-06, "loss": 0.0388, "step": 279300 }, { "epoch": 108.47, "learning_rate": 5.537346278317153e-06, "loss": 0.014, "step": 279310 }, { "epoch": 108.47, "learning_rate": 5.536828478964402e-06, "loss": 0.0011, "step": 279320 }, { "epoch": 108.48, "learning_rate": 5.53631067961165e-06, "loss": 0.0663, "step": 279330 }, { "epoch": 108.48, "learning_rate": 5.5357928802589e-06, "loss": 0.1285, "step": 279340 }, { "epoch": 108.49, "learning_rate": 5.5352750809061495e-06, "loss": 0.0715, "step": 279350 }, { "epoch": 108.49, "learning_rate": 5.534757281553399e-06, "loss": 0.0329, "step": 279360 }, { "epoch": 108.49, "learning_rate": 5.534239482200647e-06, "loss": 0.0489, "step": 279370 }, { "epoch": 108.5, "learning_rate": 5.533721682847897e-06, "loss": 0.0233, "step": 279380 }, { "epoch": 108.5, "learning_rate": 5.533203883495146e-06, "loss": 0.0486, "step": 279390 }, { "epoch": 108.5, "learning_rate": 5.532686084142396e-06, "loss": 0.0014, "step": 279400 }, { "epoch": 108.51, "learning_rate": 5.532168284789644e-06, "loss": 0.0048, "step": 279410 }, { "epoch": 108.51, "learning_rate": 5.5316504854368934e-06, "loss": 0.1202, "step": 279420 }, { "epoch": 108.52, "learning_rate": 5.531132686084143e-06, "loss": 0.1714, "step": 279430 }, { "epoch": 108.52, "learning_rate": 5.530614886731393e-06, "loss": 0.0995, "step": 279440 }, { "epoch": 108.52, "learning_rate": 5.530097087378641e-06, "loss": 0.05, "step": 279450 }, { "epoch": 108.53, "learning_rate": 5.52957928802589e-06, "loss": 0.1, "step": 279460 }, { "epoch": 108.53, "learning_rate": 5.52906148867314e-06, "loss": 0.0763, "step": 279470 }, { "epoch": 108.54, "learning_rate": 5.5285436893203894e-06, "loss": 0.1219, "step": 279480 }, { "epoch": 108.54, "learning_rate": 5.528025889967637e-06, "loss": 0.102, "step": 279490 }, { "epoch": 108.54, "learning_rate": 5.527508090614887e-06, "loss": 0.0533, "step": 279500 }, { "epoch": 108.55, "learning_rate": 5.526990291262137e-06, "loss": 0.1294, "step": 279510 }, { "epoch": 108.55, "learning_rate": 5.526472491909386e-06, "loss": 0.0305, "step": 279520 }, { "epoch": 108.56, "learning_rate": 5.525954692556634e-06, "loss": 0.1597, "step": 279530 }, { "epoch": 108.56, "learning_rate": 5.525436893203884e-06, "loss": 0.0019, "step": 279540 }, { "epoch": 108.56, "learning_rate": 5.524919093851133e-06, "loss": 0.0848, "step": 279550 }, { "epoch": 108.57, "learning_rate": 5.524401294498383e-06, "loss": 0.0865, "step": 279560 }, { "epoch": 108.57, "learning_rate": 5.523883495145631e-06, "loss": 0.0173, "step": 279570 }, { "epoch": 108.57, "learning_rate": 5.5233656957928805e-06, "loss": 0.0056, "step": 279580 }, { "epoch": 108.58, "learning_rate": 5.52284789644013e-06, "loss": 0.0162, "step": 279590 }, { "epoch": 108.58, "learning_rate": 5.52233009708738e-06, "loss": 0.0524, "step": 279600 }, { "epoch": 108.59, "learning_rate": 5.521812297734628e-06, "loss": 0.1795, "step": 279610 }, { "epoch": 108.59, "learning_rate": 5.521294498381877e-06, "loss": 0.1228, "step": 279620 }, { "epoch": 108.59, "learning_rate": 5.520776699029127e-06, "loss": 0.0943, "step": 279630 }, { "epoch": 108.6, "learning_rate": 5.5202588996763766e-06, "loss": 0.0134, "step": 279640 }, { "epoch": 108.6, "learning_rate": 5.5197411003236245e-06, "loss": 0.0042, "step": 279650 }, { "epoch": 108.61, "learning_rate": 5.519223300970874e-06, "loss": 0.1291, "step": 279660 }, { "epoch": 108.61, "learning_rate": 5.518705501618124e-06, "loss": 0.0358, "step": 279670 }, { "epoch": 108.61, "learning_rate": 5.518187702265373e-06, "loss": 0.0547, "step": 279680 }, { "epoch": 108.62, "learning_rate": 5.517669902912621e-06, "loss": 0.0184, "step": 279690 }, { "epoch": 108.62, "learning_rate": 5.517152103559871e-06, "loss": 0.0075, "step": 279700 }, { "epoch": 108.63, "learning_rate": 5.5166343042071205e-06, "loss": 0.0571, "step": 279710 }, { "epoch": 108.63, "learning_rate": 5.516116504854369e-06, "loss": 0.0548, "step": 279720 }, { "epoch": 108.63, "learning_rate": 5.515598705501618e-06, "loss": 0.0965, "step": 279730 }, { "epoch": 108.64, "learning_rate": 5.515080906148868e-06, "loss": 0.0053, "step": 279740 }, { "epoch": 108.64, "learning_rate": 5.514563106796117e-06, "loss": 0.074, "step": 279750 }, { "epoch": 108.64, "learning_rate": 5.514045307443366e-06, "loss": 0.0044, "step": 279760 }, { "epoch": 108.65, "learning_rate": 5.513527508090616e-06, "loss": 0.0126, "step": 279770 }, { "epoch": 108.65, "learning_rate": 5.5130097087378644e-06, "loss": 0.0004, "step": 279780 }, { "epoch": 108.66, "learning_rate": 5.512491909385114e-06, "loss": 0.0084, "step": 279790 }, { "epoch": 108.66, "learning_rate": 5.511974110032363e-06, "loss": 0.1216, "step": 279800 }, { "epoch": 108.66, "learning_rate": 5.5114563106796124e-06, "loss": 0.0167, "step": 279810 }, { "epoch": 108.67, "learning_rate": 5.510938511326861e-06, "loss": 0.0007, "step": 279820 }, { "epoch": 108.67, "learning_rate": 5.510420711974111e-06, "loss": 0.059, "step": 279830 }, { "epoch": 108.68, "learning_rate": 5.50990291262136e-06, "loss": 0.0362, "step": 279840 }, { "epoch": 108.68, "learning_rate": 5.509385113268609e-06, "loss": 0.0004, "step": 279850 }, { "epoch": 108.68, "learning_rate": 5.508867313915858e-06, "loss": 0.0017, "step": 279860 }, { "epoch": 108.69, "learning_rate": 5.508349514563108e-06, "loss": 0.1115, "step": 279870 }, { "epoch": 108.69, "learning_rate": 5.507831715210356e-06, "loss": 0.0996, "step": 279880 }, { "epoch": 108.7, "learning_rate": 5.507313915857606e-06, "loss": 0.1305, "step": 279890 }, { "epoch": 108.7, "learning_rate": 5.506796116504855e-06, "loss": 0.1093, "step": 279900 }, { "epoch": 108.7, "learning_rate": 5.5062783171521035e-06, "loss": 0.0529, "step": 279910 }, { "epoch": 108.71, "learning_rate": 5.505760517799353e-06, "loss": 0.1208, "step": 279920 }, { "epoch": 108.71, "learning_rate": 5.505242718446603e-06, "loss": 0.0724, "step": 279930 }, { "epoch": 108.71, "learning_rate": 5.5047249190938515e-06, "loss": 0.0132, "step": 279940 }, { "epoch": 108.72, "learning_rate": 5.5042071197411e-06, "loss": 0.0109, "step": 279950 }, { "epoch": 108.72, "learning_rate": 5.50368932038835e-06, "loss": 0.0237, "step": 279960 }, { "epoch": 108.73, "learning_rate": 5.5031715210355995e-06, "loss": 0.0167, "step": 279970 }, { "epoch": 108.73, "learning_rate": 5.502653721682848e-06, "loss": 0.0003, "step": 279980 }, { "epoch": 108.73, "learning_rate": 5.502135922330097e-06, "loss": 0.1302, "step": 279990 }, { "epoch": 108.74, "learning_rate": 5.501618122977347e-06, "loss": 0.0131, "step": 280000 }, { "epoch": 108.74, "learning_rate": 5.501100323624596e-06, "loss": 0.0807, "step": 280010 }, { "epoch": 108.75, "learning_rate": 5.500582524271845e-06, "loss": 0.091, "step": 280020 }, { "epoch": 108.75, "learning_rate": 5.500064724919094e-06, "loss": 0.088, "step": 280030 }, { "epoch": 108.75, "learning_rate": 5.4995469255663435e-06, "loss": 0.0087, "step": 280040 }, { "epoch": 108.76, "learning_rate": 5.499029126213593e-06, "loss": 0.0014, "step": 280050 }, { "epoch": 108.76, "learning_rate": 5.498511326860841e-06, "loss": 0.0033, "step": 280060 }, { "epoch": 108.77, "learning_rate": 5.497993527508091e-06, "loss": 0.1208, "step": 280070 }, { "epoch": 108.77, "learning_rate": 5.49747572815534e-06, "loss": 0.1336, "step": 280080 }, { "epoch": 108.77, "learning_rate": 5.49695792880259e-06, "loss": 0.063, "step": 280090 }, { "epoch": 108.78, "learning_rate": 5.496440129449838e-06, "loss": 0.0146, "step": 280100 }, { "epoch": 108.78, "learning_rate": 5.495922330097087e-06, "loss": 0.0246, "step": 280110 }, { "epoch": 108.78, "learning_rate": 5.495404530744337e-06, "loss": 0.0262, "step": 280120 }, { "epoch": 108.79, "learning_rate": 5.494886731391587e-06, "loss": 0.0585, "step": 280130 }, { "epoch": 108.79, "learning_rate": 5.494368932038835e-06, "loss": 0.0002, "step": 280140 }, { "epoch": 108.8, "learning_rate": 5.493851132686084e-06, "loss": 0.0275, "step": 280150 }, { "epoch": 108.8, "learning_rate": 5.493333333333334e-06, "loss": 0.0731, "step": 280160 }, { "epoch": 108.8, "learning_rate": 5.4928155339805834e-06, "loss": 0.0963, "step": 280170 }, { "epoch": 108.81, "learning_rate": 5.492297734627831e-06, "loss": 0.1446, "step": 280180 }, { "epoch": 108.81, "learning_rate": 5.491779935275081e-06, "loss": 0.0248, "step": 280190 }, { "epoch": 108.82, "learning_rate": 5.491262135922331e-06, "loss": 0.1315, "step": 280200 }, { "epoch": 108.82, "learning_rate": 5.49074433656958e-06, "loss": 0.1841, "step": 280210 }, { "epoch": 108.82, "learning_rate": 5.490226537216828e-06, "loss": 0.0406, "step": 280220 }, { "epoch": 108.83, "learning_rate": 5.489708737864078e-06, "loss": 0.0111, "step": 280230 }, { "epoch": 108.83, "learning_rate": 5.489190938511327e-06, "loss": 0.0936, "step": 280240 }, { "epoch": 108.83, "learning_rate": 5.488673139158577e-06, "loss": 0.1842, "step": 280250 }, { "epoch": 108.84, "learning_rate": 5.488155339805825e-06, "loss": 0.0385, "step": 280260 }, { "epoch": 108.84, "learning_rate": 5.4876375404530745e-06, "loss": 0.0177, "step": 280270 }, { "epoch": 108.85, "learning_rate": 5.487119741100324e-06, "loss": 0.0424, "step": 280280 }, { "epoch": 108.85, "learning_rate": 5.486601941747574e-06, "loss": 0.0249, "step": 280290 }, { "epoch": 108.85, "learning_rate": 5.486084142394823e-06, "loss": 0.0393, "step": 280300 }, { "epoch": 108.86, "learning_rate": 5.485566343042071e-06, "loss": 0.0395, "step": 280310 }, { "epoch": 108.86, "learning_rate": 5.485048543689321e-06, "loss": 0.0101, "step": 280320 }, { "epoch": 108.87, "learning_rate": 5.4845307443365705e-06, "loss": 0.0013, "step": 280330 }, { "epoch": 108.87, "learning_rate": 5.48401294498382e-06, "loss": 0.0005, "step": 280340 }, { "epoch": 108.87, "learning_rate": 5.483495145631068e-06, "loss": 0.0238, "step": 280350 }, { "epoch": 108.88, "learning_rate": 5.482977346278318e-06, "loss": 0.0093, "step": 280360 }, { "epoch": 108.88, "learning_rate": 5.482459546925567e-06, "loss": 0.0556, "step": 280370 }, { "epoch": 108.89, "learning_rate": 5.481941747572816e-06, "loss": 0.1537, "step": 280380 }, { "epoch": 108.89, "learning_rate": 5.481423948220065e-06, "loss": 0.0973, "step": 280390 }, { "epoch": 108.89, "learning_rate": 5.4809061488673145e-06, "loss": 0.1341, "step": 280400 }, { "epoch": 108.9, "learning_rate": 5.480388349514564e-06, "loss": 0.0087, "step": 280410 }, { "epoch": 108.9, "learning_rate": 5.479870550161813e-06, "loss": 0.0449, "step": 280420 }, { "epoch": 108.9, "learning_rate": 5.479352750809062e-06, "loss": 0.0431, "step": 280430 }, { "epoch": 108.91, "learning_rate": 5.478834951456311e-06, "loss": 0.0204, "step": 280440 }, { "epoch": 108.91, "learning_rate": 5.478317152103561e-06, "loss": 0.0149, "step": 280450 }, { "epoch": 108.92, "learning_rate": 5.47779935275081e-06, "loss": 0.0068, "step": 280460 }, { "epoch": 108.92, "learning_rate": 5.4772815533980584e-06, "loss": 0.0661, "step": 280470 }, { "epoch": 108.92, "learning_rate": 5.476763754045308e-06, "loss": 0.0724, "step": 280480 }, { "epoch": 108.93, "learning_rate": 5.476245954692558e-06, "loss": 0.1044, "step": 280490 }, { "epoch": 108.93, "learning_rate": 5.4757281553398064e-06, "loss": 0.014, "step": 280500 }, { "epoch": 108.94, "learning_rate": 5.475210355987055e-06, "loss": 0.0778, "step": 280510 }, { "epoch": 108.94, "learning_rate": 5.474692556634305e-06, "loss": 0.0131, "step": 280520 }, { "epoch": 108.94, "learning_rate": 5.474174757281554e-06, "loss": 0.001, "step": 280530 }, { "epoch": 108.95, "learning_rate": 5.473656957928803e-06, "loss": 0.008, "step": 280540 }, { "epoch": 108.95, "learning_rate": 5.473139158576052e-06, "loss": 0.2071, "step": 280550 }, { "epoch": 108.96, "learning_rate": 5.472621359223302e-06, "loss": 0.0014, "step": 280560 }, { "epoch": 108.96, "learning_rate": 5.47210355987055e-06, "loss": 0.0398, "step": 280570 }, { "epoch": 108.96, "learning_rate": 5.4715857605178e-06, "loss": 0.0233, "step": 280580 }, { "epoch": 108.97, "learning_rate": 5.471067961165049e-06, "loss": 0.1256, "step": 280590 }, { "epoch": 108.97, "learning_rate": 5.470550161812298e-06, "loss": 0.1392, "step": 280600 }, { "epoch": 108.97, "learning_rate": 5.470032362459547e-06, "loss": 0.1028, "step": 280610 }, { "epoch": 108.98, "learning_rate": 5.469514563106797e-06, "loss": 0.0235, "step": 280620 }, { "epoch": 108.98, "learning_rate": 5.4689967637540455e-06, "loss": 0.141, "step": 280630 }, { "epoch": 108.99, "learning_rate": 5.468478964401295e-06, "loss": 0.0313, "step": 280640 }, { "epoch": 108.99, "learning_rate": 5.467961165048544e-06, "loss": 0.0323, "step": 280650 }, { "epoch": 108.99, "learning_rate": 5.4674433656957935e-06, "loss": 0.1213, "step": 280660 }, { "epoch": 109.0, "learning_rate": 5.466925566343042e-06, "loss": 0.0545, "step": 280670 }, { "epoch": 109.0, "eval_accuracy": 0.9521320495185694, "eval_loss": 0.3662911355495453, "eval_runtime": 8.2186, "eval_samples_per_second": 442.287, "eval_steps_per_second": 55.362, "step": 280675 }, { "epoch": 109.0, "learning_rate": 5.466407766990292e-06, "loss": 0.1973, "step": 280680 }, { "epoch": 109.01, "learning_rate": 5.465889967637541e-06, "loss": 0.0873, "step": 280690 }, { "epoch": 109.01, "learning_rate": 5.46537216828479e-06, "loss": 0.0596, "step": 280700 }, { "epoch": 109.01, "learning_rate": 5.464854368932039e-06, "loss": 0.0102, "step": 280710 }, { "epoch": 109.02, "learning_rate": 5.464336569579288e-06, "loss": 0.0003, "step": 280720 }, { "epoch": 109.02, "learning_rate": 5.4638187702265375e-06, "loss": 0.1827, "step": 280730 }, { "epoch": 109.03, "learning_rate": 5.463300970873787e-06, "loss": 0.0718, "step": 280740 }, { "epoch": 109.03, "learning_rate": 5.462783171521036e-06, "loss": 0.1124, "step": 280750 }, { "epoch": 109.03, "learning_rate": 5.462265372168285e-06, "loss": 0.0361, "step": 280760 }, { "epoch": 109.04, "learning_rate": 5.461747572815534e-06, "loss": 0.0814, "step": 280770 }, { "epoch": 109.04, "learning_rate": 5.461229773462784e-06, "loss": 0.035, "step": 280780 }, { "epoch": 109.04, "learning_rate": 5.460711974110033e-06, "loss": 0.0153, "step": 280790 }, { "epoch": 109.05, "learning_rate": 5.460194174757281e-06, "loss": 0.1077, "step": 280800 }, { "epoch": 109.05, "learning_rate": 5.459676375404531e-06, "loss": 0.0113, "step": 280810 }, { "epoch": 109.06, "learning_rate": 5.459158576051781e-06, "loss": 0.0018, "step": 280820 }, { "epoch": 109.06, "learning_rate": 5.4586407766990294e-06, "loss": 0.026, "step": 280830 }, { "epoch": 109.06, "learning_rate": 5.458122977346278e-06, "loss": 0.1534, "step": 280840 }, { "epoch": 109.07, "learning_rate": 5.457605177993528e-06, "loss": 0.0605, "step": 280850 }, { "epoch": 109.07, "learning_rate": 5.4570873786407774e-06, "loss": 0.0366, "step": 280860 }, { "epoch": 109.08, "learning_rate": 5.456569579288027e-06, "loss": 0.0001, "step": 280870 }, { "epoch": 109.08, "learning_rate": 5.456051779935275e-06, "loss": 0.0457, "step": 280880 }, { "epoch": 109.08, "learning_rate": 5.455533980582525e-06, "loss": 0.0748, "step": 280890 }, { "epoch": 109.09, "learning_rate": 5.455016181229774e-06, "loss": 0.0609, "step": 280900 }, { "epoch": 109.09, "learning_rate": 5.454498381877024e-06, "loss": 0.1361, "step": 280910 }, { "epoch": 109.1, "learning_rate": 5.453980582524272e-06, "loss": 0.0793, "step": 280920 }, { "epoch": 109.1, "learning_rate": 5.453462783171521e-06, "loss": 0.0328, "step": 280930 }, { "epoch": 109.1, "learning_rate": 5.452944983818771e-06, "loss": 0.0031, "step": 280940 }, { "epoch": 109.11, "learning_rate": 5.452427184466021e-06, "loss": 0.0442, "step": 280950 }, { "epoch": 109.11, "learning_rate": 5.4519093851132685e-06, "loss": 0.0848, "step": 280960 }, { "epoch": 109.11, "learning_rate": 5.451391585760518e-06, "loss": 0.0021, "step": 280970 }, { "epoch": 109.12, "learning_rate": 5.450873786407768e-06, "loss": 0.0457, "step": 280980 }, { "epoch": 109.12, "learning_rate": 5.450355987055017e-06, "loss": 0.0528, "step": 280990 }, { "epoch": 109.13, "learning_rate": 5.449838187702265e-06, "loss": 0.0869, "step": 281000 }, { "epoch": 109.13, "learning_rate": 5.449320388349515e-06, "loss": 0.0684, "step": 281010 }, { "epoch": 109.13, "learning_rate": 5.4488025889967645e-06, "loss": 0.0013, "step": 281020 }, { "epoch": 109.14, "learning_rate": 5.448284789644014e-06, "loss": 0.0818, "step": 281030 }, { "epoch": 109.14, "learning_rate": 5.447766990291262e-06, "loss": 0.0647, "step": 281040 }, { "epoch": 109.15, "learning_rate": 5.447249190938512e-06, "loss": 0.0285, "step": 281050 }, { "epoch": 109.15, "learning_rate": 5.446731391585761e-06, "loss": 0.0687, "step": 281060 }, { "epoch": 109.15, "learning_rate": 5.446213592233011e-06, "loss": 0.0105, "step": 281070 }, { "epoch": 109.16, "learning_rate": 5.445695792880259e-06, "loss": 0.1491, "step": 281080 }, { "epoch": 109.16, "learning_rate": 5.4451779935275085e-06, "loss": 0.0306, "step": 281090 }, { "epoch": 109.17, "learning_rate": 5.444660194174758e-06, "loss": 0.0425, "step": 281100 }, { "epoch": 109.17, "learning_rate": 5.444142394822008e-06, "loss": 0.1132, "step": 281110 }, { "epoch": 109.17, "learning_rate": 5.443624595469256e-06, "loss": 0.0001, "step": 281120 }, { "epoch": 109.18, "learning_rate": 5.443106796116505e-06, "loss": 0.0861, "step": 281130 }, { "epoch": 109.18, "learning_rate": 5.442588996763755e-06, "loss": 0.0379, "step": 281140 }, { "epoch": 109.18, "learning_rate": 5.4420711974110045e-06, "loss": 0.0359, "step": 281150 }, { "epoch": 109.19, "learning_rate": 5.4415533980582524e-06, "loss": 0.0862, "step": 281160 }, { "epoch": 109.19, "learning_rate": 5.441035598705502e-06, "loss": 0.085, "step": 281170 }, { "epoch": 109.2, "learning_rate": 5.440517799352752e-06, "loss": 0.0623, "step": 281180 }, { "epoch": 109.2, "learning_rate": 5.4400000000000004e-06, "loss": 0.0917, "step": 281190 }, { "epoch": 109.2, "learning_rate": 5.439482200647249e-06, "loss": 0.0242, "step": 281200 }, { "epoch": 109.21, "learning_rate": 5.438964401294499e-06, "loss": 0.0444, "step": 281210 }, { "epoch": 109.21, "learning_rate": 5.4384466019417484e-06, "loss": 0.1922, "step": 281220 }, { "epoch": 109.22, "learning_rate": 5.437928802588997e-06, "loss": 0.0431, "step": 281230 }, { "epoch": 109.22, "learning_rate": 5.437411003236246e-06, "loss": 0.0444, "step": 281240 }, { "epoch": 109.22, "learning_rate": 5.436893203883496e-06, "loss": 0.1075, "step": 281250 }, { "epoch": 109.23, "learning_rate": 5.436375404530745e-06, "loss": 0.0225, "step": 281260 }, { "epoch": 109.23, "learning_rate": 5.435857605177994e-06, "loss": 0.0703, "step": 281270 }, { "epoch": 109.23, "learning_rate": 5.435339805825243e-06, "loss": 0.0315, "step": 281280 }, { "epoch": 109.24, "learning_rate": 5.434822006472492e-06, "loss": 0.0429, "step": 281290 }, { "epoch": 109.24, "learning_rate": 5.434304207119742e-06, "loss": 0.0182, "step": 281300 }, { "epoch": 109.25, "learning_rate": 5.433786407766991e-06, "loss": 0.0963, "step": 281310 }, { "epoch": 109.25, "learning_rate": 5.4332686084142395e-06, "loss": 0.1197, "step": 281320 }, { "epoch": 109.25, "learning_rate": 5.432750809061489e-06, "loss": 0.0082, "step": 281330 }, { "epoch": 109.26, "learning_rate": 5.432233009708738e-06, "loss": 0.0096, "step": 281340 }, { "epoch": 109.26, "learning_rate": 5.4317152103559875e-06, "loss": 0.0141, "step": 281350 }, { "epoch": 109.27, "learning_rate": 5.431197411003236e-06, "loss": 0.1285, "step": 281360 }, { "epoch": 109.27, "learning_rate": 5.430679611650486e-06, "loss": 0.0442, "step": 281370 }, { "epoch": 109.27, "learning_rate": 5.430161812297735e-06, "loss": 0.0464, "step": 281380 }, { "epoch": 109.28, "learning_rate": 5.429644012944984e-06, "loss": 0.0131, "step": 281390 }, { "epoch": 109.28, "learning_rate": 5.429126213592233e-06, "loss": 0.029, "step": 281400 }, { "epoch": 109.29, "learning_rate": 5.428608414239483e-06, "loss": 0.0109, "step": 281410 }, { "epoch": 109.29, "learning_rate": 5.4280906148867315e-06, "loss": 0.0678, "step": 281420 }, { "epoch": 109.29, "learning_rate": 5.427572815533981e-06, "loss": 0.0208, "step": 281430 }, { "epoch": 109.3, "learning_rate": 5.427055016181231e-06, "loss": 0.093, "step": 281440 }, { "epoch": 109.3, "learning_rate": 5.4265372168284795e-06, "loss": 0.2169, "step": 281450 }, { "epoch": 109.3, "learning_rate": 5.426019417475728e-06, "loss": 0.098, "step": 281460 }, { "epoch": 109.31, "learning_rate": 5.425501618122978e-06, "loss": 0.0101, "step": 281470 }, { "epoch": 109.31, "learning_rate": 5.4249838187702275e-06, "loss": 0.0156, "step": 281480 }, { "epoch": 109.32, "learning_rate": 5.424466019417476e-06, "loss": 0.0062, "step": 281490 }, { "epoch": 109.32, "learning_rate": 5.423948220064725e-06, "loss": 0.0032, "step": 281500 }, { "epoch": 109.32, "learning_rate": 5.423430420711975e-06, "loss": 0.0173, "step": 281510 }, { "epoch": 109.33, "learning_rate": 5.422912621359224e-06, "loss": 0.0981, "step": 281520 }, { "epoch": 109.33, "learning_rate": 5.422394822006472e-06, "loss": 0.0546, "step": 281530 }, { "epoch": 109.34, "learning_rate": 5.421877022653722e-06, "loss": 0.019, "step": 281540 }, { "epoch": 109.34, "learning_rate": 5.4213592233009714e-06, "loss": 0.027, "step": 281550 }, { "epoch": 109.34, "learning_rate": 5.420841423948221e-06, "loss": 0.0787, "step": 281560 }, { "epoch": 109.35, "learning_rate": 5.420323624595469e-06, "loss": 0.0142, "step": 281570 }, { "epoch": 109.35, "learning_rate": 5.419805825242719e-06, "loss": 0.0794, "step": 281580 }, { "epoch": 109.36, "learning_rate": 5.419288025889968e-06, "loss": 0.0122, "step": 281590 }, { "epoch": 109.36, "learning_rate": 5.418770226537218e-06, "loss": 0.0297, "step": 281600 }, { "epoch": 109.36, "learning_rate": 5.418252427184466e-06, "loss": 0.2012, "step": 281610 }, { "epoch": 109.37, "learning_rate": 5.417734627831715e-06, "loss": 0.0507, "step": 281620 }, { "epoch": 109.37, "learning_rate": 5.417216828478965e-06, "loss": 0.1149, "step": 281630 }, { "epoch": 109.37, "learning_rate": 5.416699029126215e-06, "loss": 0.043, "step": 281640 }, { "epoch": 109.38, "learning_rate": 5.4161812297734625e-06, "loss": 0.0256, "step": 281650 }, { "epoch": 109.38, "learning_rate": 5.415663430420712e-06, "loss": 0.035, "step": 281660 }, { "epoch": 109.39, "learning_rate": 5.415145631067962e-06, "loss": 0.0607, "step": 281670 }, { "epoch": 109.39, "learning_rate": 5.414627831715211e-06, "loss": 0.0253, "step": 281680 }, { "epoch": 109.39, "learning_rate": 5.414110032362459e-06, "loss": 0.0421, "step": 281690 }, { "epoch": 109.4, "learning_rate": 5.413592233009709e-06, "loss": 0.0307, "step": 281700 }, { "epoch": 109.4, "learning_rate": 5.4130744336569585e-06, "loss": 0.0333, "step": 281710 }, { "epoch": 109.41, "learning_rate": 5.412556634304208e-06, "loss": 0.0232, "step": 281720 }, { "epoch": 109.41, "learning_rate": 5.412038834951456e-06, "loss": 0.0788, "step": 281730 }, { "epoch": 109.41, "learning_rate": 5.411521035598706e-06, "loss": 0.0786, "step": 281740 }, { "epoch": 109.42, "learning_rate": 5.411003236245955e-06, "loss": 0.011, "step": 281750 }, { "epoch": 109.42, "learning_rate": 5.410485436893205e-06, "loss": 0.0001, "step": 281760 }, { "epoch": 109.43, "learning_rate": 5.409967637540453e-06, "loss": 0.0026, "step": 281770 }, { "epoch": 109.43, "learning_rate": 5.4094498381877025e-06, "loss": 0.0948, "step": 281780 }, { "epoch": 109.43, "learning_rate": 5.408932038834952e-06, "loss": 0.0512, "step": 281790 }, { "epoch": 109.44, "learning_rate": 5.408414239482202e-06, "loss": 0.0117, "step": 281800 }, { "epoch": 109.44, "learning_rate": 5.40789644012945e-06, "loss": 0.0025, "step": 281810 }, { "epoch": 109.44, "learning_rate": 5.407378640776699e-06, "loss": 0.0991, "step": 281820 }, { "epoch": 109.45, "learning_rate": 5.406860841423949e-06, "loss": 0.1095, "step": 281830 }, { "epoch": 109.45, "learning_rate": 5.4063430420711985e-06, "loss": 0.1055, "step": 281840 }, { "epoch": 109.46, "learning_rate": 5.405825242718446e-06, "loss": 0.0288, "step": 281850 }, { "epoch": 109.46, "learning_rate": 5.405307443365696e-06, "loss": 0.0729, "step": 281860 }, { "epoch": 109.46, "learning_rate": 5.404789644012946e-06, "loss": 0.0002, "step": 281870 }, { "epoch": 109.47, "learning_rate": 5.404271844660195e-06, "loss": 0.0028, "step": 281880 }, { "epoch": 109.47, "learning_rate": 5.403754045307443e-06, "loss": 0.0689, "step": 281890 }, { "epoch": 109.48, "learning_rate": 5.403236245954693e-06, "loss": 0.0596, "step": 281900 }, { "epoch": 109.48, "learning_rate": 5.4027184466019424e-06, "loss": 0.0177, "step": 281910 }, { "epoch": 109.48, "learning_rate": 5.402200647249192e-06, "loss": 0.0201, "step": 281920 }, { "epoch": 109.49, "learning_rate": 5.40168284789644e-06, "loss": 0.0253, "step": 281930 }, { "epoch": 109.49, "learning_rate": 5.40116504854369e-06, "loss": 0.013, "step": 281940 }, { "epoch": 109.5, "learning_rate": 5.400647249190939e-06, "loss": 0.0042, "step": 281950 }, { "epoch": 109.5, "learning_rate": 5.400129449838189e-06, "loss": 0.0386, "step": 281960 }, { "epoch": 109.5, "learning_rate": 5.399611650485437e-06, "loss": 0.0324, "step": 281970 }, { "epoch": 109.51, "learning_rate": 5.399093851132686e-06, "loss": 0.0108, "step": 281980 }, { "epoch": 109.51, "learning_rate": 5.398576051779936e-06, "loss": 0.0928, "step": 281990 }, { "epoch": 109.51, "learning_rate": 5.398058252427185e-06, "loss": 0.0032, "step": 282000 }, { "epoch": 109.52, "learning_rate": 5.397540453074434e-06, "loss": 0.046, "step": 282010 }, { "epoch": 109.52, "learning_rate": 5.397022653721683e-06, "loss": 0.1248, "step": 282020 }, { "epoch": 109.53, "learning_rate": 5.396504854368933e-06, "loss": 0.0009, "step": 282030 }, { "epoch": 109.53, "learning_rate": 5.3959870550161815e-06, "loss": 0.0724, "step": 282040 }, { "epoch": 109.53, "learning_rate": 5.395469255663431e-06, "loss": 0.0554, "step": 282050 }, { "epoch": 109.54, "learning_rate": 5.39495145631068e-06, "loss": 0.0312, "step": 282060 }, { "epoch": 109.54, "learning_rate": 5.3944336569579295e-06, "loss": 0.1031, "step": 282070 }, { "epoch": 109.55, "learning_rate": 5.393915857605178e-06, "loss": 0.0612, "step": 282080 }, { "epoch": 109.55, "learning_rate": 5.393398058252428e-06, "loss": 0.0093, "step": 282090 }, { "epoch": 109.55, "learning_rate": 5.392880258899677e-06, "loss": 0.0296, "step": 282100 }, { "epoch": 109.56, "learning_rate": 5.392362459546926e-06, "loss": 0.0315, "step": 282110 }, { "epoch": 109.56, "learning_rate": 5.391844660194175e-06, "loss": 0.0466, "step": 282120 }, { "epoch": 109.57, "learning_rate": 5.391326860841425e-06, "loss": 0.1441, "step": 282130 }, { "epoch": 109.57, "learning_rate": 5.3908090614886735e-06, "loss": 0.1307, "step": 282140 }, { "epoch": 109.57, "learning_rate": 5.390291262135923e-06, "loss": 0.0151, "step": 282150 }, { "epoch": 109.58, "learning_rate": 5.389773462783172e-06, "loss": 0.0668, "step": 282160 }, { "epoch": 109.58, "learning_rate": 5.3892556634304215e-06, "loss": 0.0623, "step": 282170 }, { "epoch": 109.58, "learning_rate": 5.38873786407767e-06, "loss": 0.0718, "step": 282180 }, { "epoch": 109.59, "learning_rate": 5.388220064724919e-06, "loss": 0.071, "step": 282190 }, { "epoch": 109.59, "learning_rate": 5.387702265372169e-06, "loss": 0.0479, "step": 282200 }, { "epoch": 109.6, "learning_rate": 5.387184466019418e-06, "loss": 0.0448, "step": 282210 }, { "epoch": 109.6, "learning_rate": 5.386666666666667e-06, "loss": 0.0203, "step": 282220 }, { "epoch": 109.6, "learning_rate": 5.386148867313916e-06, "loss": 0.0577, "step": 282230 }, { "epoch": 109.61, "learning_rate": 5.3856310679611654e-06, "loss": 0.1391, "step": 282240 }, { "epoch": 109.61, "learning_rate": 5.385113268608415e-06, "loss": 0.0011, "step": 282250 }, { "epoch": 109.62, "learning_rate": 5.384595469255664e-06, "loss": 0.0898, "step": 282260 }, { "epoch": 109.62, "learning_rate": 5.384077669902913e-06, "loss": 0.0067, "step": 282270 }, { "epoch": 109.62, "learning_rate": 5.383559870550162e-06, "loss": 0.1435, "step": 282280 }, { "epoch": 109.63, "learning_rate": 5.383042071197412e-06, "loss": 0.0744, "step": 282290 }, { "epoch": 109.63, "learning_rate": 5.382524271844661e-06, "loss": 0.0009, "step": 282300 }, { "epoch": 109.63, "learning_rate": 5.382006472491909e-06, "loss": 0.0856, "step": 282310 }, { "epoch": 109.64, "learning_rate": 5.381488673139159e-06, "loss": 0.0538, "step": 282320 }, { "epoch": 109.64, "learning_rate": 5.380970873786409e-06, "loss": 0.0496, "step": 282330 }, { "epoch": 109.65, "learning_rate": 5.3804530744336565e-06, "loss": 0.0874, "step": 282340 }, { "epoch": 109.65, "learning_rate": 5.379935275080906e-06, "loss": 0.1401, "step": 282350 }, { "epoch": 109.65, "learning_rate": 5.379417475728156e-06, "loss": 0.0808, "step": 282360 }, { "epoch": 109.66, "learning_rate": 5.378899676375405e-06, "loss": 0.0252, "step": 282370 }, { "epoch": 109.66, "learning_rate": 5.378381877022653e-06, "loss": 0.04, "step": 282380 }, { "epoch": 109.67, "learning_rate": 5.377864077669903e-06, "loss": 0.0702, "step": 282390 }, { "epoch": 109.67, "learning_rate": 5.3773462783171525e-06, "loss": 0.0973, "step": 282400 }, { "epoch": 109.67, "learning_rate": 5.376828478964402e-06, "loss": 0.0259, "step": 282410 }, { "epoch": 109.68, "learning_rate": 5.37631067961165e-06, "loss": 0.0443, "step": 282420 }, { "epoch": 109.68, "learning_rate": 5.3757928802589e-06, "loss": 0.0402, "step": 282430 }, { "epoch": 109.69, "learning_rate": 5.375275080906149e-06, "loss": 0.0266, "step": 282440 }, { "epoch": 109.69, "learning_rate": 5.374757281553399e-06, "loss": 0.0082, "step": 282450 }, { "epoch": 109.69, "learning_rate": 5.374239482200647e-06, "loss": 0.0118, "step": 282460 }, { "epoch": 109.7, "learning_rate": 5.3737216828478965e-06, "loss": 0.1169, "step": 282470 }, { "epoch": 109.7, "learning_rate": 5.373203883495146e-06, "loss": 0.0197, "step": 282480 }, { "epoch": 109.7, "learning_rate": 5.372686084142396e-06, "loss": 0.0589, "step": 282490 }, { "epoch": 109.71, "learning_rate": 5.372168284789644e-06, "loss": 0.0075, "step": 282500 }, { "epoch": 109.71, "learning_rate": 5.371650485436893e-06, "loss": 0.1597, "step": 282510 }, { "epoch": 109.72, "learning_rate": 5.371132686084143e-06, "loss": 0.0642, "step": 282520 }, { "epoch": 109.72, "learning_rate": 5.3706148867313925e-06, "loss": 0.0602, "step": 282530 }, { "epoch": 109.72, "learning_rate": 5.370097087378642e-06, "loss": 0.0635, "step": 282540 }, { "epoch": 109.73, "learning_rate": 5.36957928802589e-06, "loss": 0.002, "step": 282550 }, { "epoch": 109.73, "learning_rate": 5.36906148867314e-06, "loss": 0.0715, "step": 282560 }, { "epoch": 109.74, "learning_rate": 5.368543689320389e-06, "loss": 0.1458, "step": 282570 }, { "epoch": 109.74, "learning_rate": 5.368025889967639e-06, "loss": 0.1386, "step": 282580 }, { "epoch": 109.74, "learning_rate": 5.367508090614887e-06, "loss": 0.0021, "step": 282590 }, { "epoch": 109.75, "learning_rate": 5.3669902912621364e-06, "loss": 0.0162, "step": 282600 }, { "epoch": 109.75, "learning_rate": 5.366472491909386e-06, "loss": 0.1106, "step": 282610 }, { "epoch": 109.76, "learning_rate": 5.365954692556636e-06, "loss": 0.0659, "step": 282620 }, { "epoch": 109.76, "learning_rate": 5.365436893203884e-06, "loss": 0.0108, "step": 282630 }, { "epoch": 109.76, "learning_rate": 5.364919093851133e-06, "loss": 0.0407, "step": 282640 }, { "epoch": 109.77, "learning_rate": 5.364401294498383e-06, "loss": 0.0944, "step": 282650 }, { "epoch": 109.77, "learning_rate": 5.363883495145632e-06, "loss": 0.1025, "step": 282660 }, { "epoch": 109.77, "learning_rate": 5.36336569579288e-06, "loss": 0.0065, "step": 282670 }, { "epoch": 109.78, "learning_rate": 5.36284789644013e-06, "loss": 0.0002, "step": 282680 }, { "epoch": 109.78, "learning_rate": 5.36233009708738e-06, "loss": 0.0485, "step": 282690 }, { "epoch": 109.79, "learning_rate": 5.361812297734628e-06, "loss": 0.0112, "step": 282700 }, { "epoch": 109.79, "learning_rate": 5.361294498381877e-06, "loss": 0.0133, "step": 282710 }, { "epoch": 109.79, "learning_rate": 5.360776699029127e-06, "loss": 0.0215, "step": 282720 }, { "epoch": 109.8, "learning_rate": 5.360258899676376e-06, "loss": 0.014, "step": 282730 }, { "epoch": 109.8, "learning_rate": 5.359741100323625e-06, "loss": 0.0325, "step": 282740 }, { "epoch": 109.81, "learning_rate": 5.359223300970874e-06, "loss": 0.0535, "step": 282750 }, { "epoch": 109.81, "learning_rate": 5.3587055016181235e-06, "loss": 0.0724, "step": 282760 }, { "epoch": 109.81, "learning_rate": 5.358187702265373e-06, "loss": 0.0195, "step": 282770 }, { "epoch": 109.82, "learning_rate": 5.357669902912622e-06, "loss": 0.0957, "step": 282780 }, { "epoch": 109.82, "learning_rate": 5.357152103559871e-06, "loss": 0.004, "step": 282790 }, { "epoch": 109.83, "learning_rate": 5.35663430420712e-06, "loss": 0.0461, "step": 282800 }, { "epoch": 109.83, "learning_rate": 5.356116504854369e-06, "loss": 0.0126, "step": 282810 }, { "epoch": 109.83, "learning_rate": 5.355598705501619e-06, "loss": 0.1068, "step": 282820 }, { "epoch": 109.84, "learning_rate": 5.3550809061488675e-06, "loss": 0.0786, "step": 282830 }, { "epoch": 109.84, "learning_rate": 5.354563106796117e-06, "loss": 0.0135, "step": 282840 }, { "epoch": 109.84, "learning_rate": 5.354045307443366e-06, "loss": 0.0485, "step": 282850 }, { "epoch": 109.85, "learning_rate": 5.3535275080906155e-06, "loss": 0.1129, "step": 282860 }, { "epoch": 109.85, "learning_rate": 5.353009708737864e-06, "loss": 0.04, "step": 282870 }, { "epoch": 109.86, "learning_rate": 5.352491909385114e-06, "loss": 0.0002, "step": 282880 }, { "epoch": 109.86, "learning_rate": 5.351974110032363e-06, "loss": 0.0904, "step": 282890 }, { "epoch": 109.86, "learning_rate": 5.351456310679612e-06, "loss": 0.0292, "step": 282900 }, { "epoch": 109.87, "learning_rate": 5.350938511326861e-06, "loss": 0.0643, "step": 282910 }, { "epoch": 109.87, "learning_rate": 5.350420711974111e-06, "loss": 0.098, "step": 282920 }, { "epoch": 109.88, "learning_rate": 5.3499029126213594e-06, "loss": 0.0082, "step": 282930 }, { "epoch": 109.88, "learning_rate": 5.349385113268609e-06, "loss": 0.0099, "step": 282940 }, { "epoch": 109.88, "learning_rate": 5.348867313915858e-06, "loss": 0.1181, "step": 282950 }, { "epoch": 109.89, "learning_rate": 5.3483495145631074e-06, "loss": 0.0973, "step": 282960 }, { "epoch": 109.89, "learning_rate": 5.347831715210356e-06, "loss": 0.1662, "step": 282970 }, { "epoch": 109.9, "learning_rate": 5.347313915857606e-06, "loss": 0.0387, "step": 282980 }, { "epoch": 109.9, "learning_rate": 5.346796116504855e-06, "loss": 0.0009, "step": 282990 }, { "epoch": 109.9, "learning_rate": 5.346278317152103e-06, "loss": 0.0601, "step": 283000 }, { "epoch": 109.91, "learning_rate": 5.345760517799353e-06, "loss": 0.0613, "step": 283010 }, { "epoch": 109.91, "learning_rate": 5.345242718446603e-06, "loss": 0.01, "step": 283020 }, { "epoch": 109.91, "learning_rate": 5.344724919093851e-06, "loss": 0.0155, "step": 283030 }, { "epoch": 109.92, "learning_rate": 5.3442071197411e-06, "loss": 0.1427, "step": 283040 }, { "epoch": 109.92, "learning_rate": 5.34368932038835e-06, "loss": 0.1307, "step": 283050 }, { "epoch": 109.93, "learning_rate": 5.343171521035599e-06, "loss": 0.0202, "step": 283060 }, { "epoch": 109.93, "learning_rate": 5.342653721682848e-06, "loss": 0.0657, "step": 283070 }, { "epoch": 109.93, "learning_rate": 5.342135922330097e-06, "loss": 0.1823, "step": 283080 }, { "epoch": 109.94, "learning_rate": 5.3416181229773465e-06, "loss": 0.0781, "step": 283090 }, { "epoch": 109.94, "learning_rate": 5.341100323624596e-06, "loss": 0.0411, "step": 283100 }, { "epoch": 109.95, "learning_rate": 5.340582524271846e-06, "loss": 0.0816, "step": 283110 }, { "epoch": 109.95, "learning_rate": 5.340064724919094e-06, "loss": 0.0325, "step": 283120 }, { "epoch": 109.95, "learning_rate": 5.339546925566343e-06, "loss": 0.0648, "step": 283130 }, { "epoch": 109.96, "learning_rate": 5.339029126213593e-06, "loss": 0.0428, "step": 283140 }, { "epoch": 109.96, "learning_rate": 5.3385113268608426e-06, "loss": 0.0683, "step": 283150 }, { "epoch": 109.97, "learning_rate": 5.3379935275080905e-06, "loss": 0.0418, "step": 283160 }, { "epoch": 109.97, "learning_rate": 5.33747572815534e-06, "loss": 0.1464, "step": 283170 }, { "epoch": 109.97, "learning_rate": 5.33695792880259e-06, "loss": 0.0247, "step": 283180 }, { "epoch": 109.98, "learning_rate": 5.336440129449839e-06, "loss": 0.0344, "step": 283190 }, { "epoch": 109.98, "learning_rate": 5.335922330097087e-06, "loss": 0.0831, "step": 283200 }, { "epoch": 109.98, "learning_rate": 5.335404530744337e-06, "loss": 0.0752, "step": 283210 }, { "epoch": 109.99, "learning_rate": 5.3348867313915865e-06, "loss": 0.0429, "step": 283220 }, { "epoch": 109.99, "learning_rate": 5.334368932038836e-06, "loss": 0.0018, "step": 283230 }, { "epoch": 110.0, "learning_rate": 5.333851132686084e-06, "loss": 0.0084, "step": 283240 }, { "epoch": 110.0, "learning_rate": 5.333333333333334e-06, "loss": 0.1136, "step": 283250 }, { "epoch": 110.0, "eval_accuracy": 0.9502063273727648, "eval_loss": 0.3846931755542755, "eval_runtime": 8.1817, "eval_samples_per_second": 444.286, "eval_steps_per_second": 55.612, "step": 283250 }, { "epoch": 110.0, "learning_rate": 5.332815533980583e-06, "loss": 0.0531, "step": 283260 }, { "epoch": 110.01, "learning_rate": 5.332297734627833e-06, "loss": 0.1532, "step": 283270 }, { "epoch": 110.01, "learning_rate": 5.331779935275081e-06, "loss": 0.0001, "step": 283280 }, { "epoch": 110.02, "learning_rate": 5.3312621359223304e-06, "loss": 0.1421, "step": 283290 }, { "epoch": 110.02, "learning_rate": 5.33074433656958e-06, "loss": 0.0315, "step": 283300 }, { "epoch": 110.02, "learning_rate": 5.33022653721683e-06, "loss": 0.0232, "step": 283310 }, { "epoch": 110.03, "learning_rate": 5.329708737864078e-06, "loss": 0.0896, "step": 283320 }, { "epoch": 110.03, "learning_rate": 5.329190938511327e-06, "loss": 0.0093, "step": 283330 }, { "epoch": 110.03, "learning_rate": 5.328673139158577e-06, "loss": 0.0633, "step": 283340 }, { "epoch": 110.04, "learning_rate": 5.3281553398058264e-06, "loss": 0.0128, "step": 283350 }, { "epoch": 110.04, "learning_rate": 5.327637540453074e-06, "loss": 0.161, "step": 283360 }, { "epoch": 110.05, "learning_rate": 5.327119741100324e-06, "loss": 0.0283, "step": 283370 }, { "epoch": 110.05, "learning_rate": 5.326601941747574e-06, "loss": 0.0351, "step": 283380 }, { "epoch": 110.05, "learning_rate": 5.326084142394823e-06, "loss": 0.0736, "step": 283390 }, { "epoch": 110.06, "learning_rate": 5.325566343042071e-06, "loss": 0.0346, "step": 283400 }, { "epoch": 110.06, "learning_rate": 5.325048543689321e-06, "loss": 0.1902, "step": 283410 }, { "epoch": 110.07, "learning_rate": 5.32453074433657e-06, "loss": 0.0757, "step": 283420 }, { "epoch": 110.07, "learning_rate": 5.32401294498382e-06, "loss": 0.0017, "step": 283430 }, { "epoch": 110.07, "learning_rate": 5.323495145631068e-06, "loss": 0.009, "step": 283440 }, { "epoch": 110.08, "learning_rate": 5.3229773462783175e-06, "loss": 0.0251, "step": 283450 }, { "epoch": 110.08, "learning_rate": 5.322459546925567e-06, "loss": 0.0045, "step": 283460 }, { "epoch": 110.09, "learning_rate": 5.321941747572816e-06, "loss": 0.0918, "step": 283470 }, { "epoch": 110.09, "learning_rate": 5.321423948220065e-06, "loss": 0.0636, "step": 283480 }, { "epoch": 110.09, "learning_rate": 5.320906148867314e-06, "loss": 0.1418, "step": 283490 }, { "epoch": 110.1, "learning_rate": 5.320388349514564e-06, "loss": 0.0135, "step": 283500 }, { "epoch": 110.1, "learning_rate": 5.319870550161813e-06, "loss": 0.1267, "step": 283510 }, { "epoch": 110.1, "learning_rate": 5.3193527508090615e-06, "loss": 0.0379, "step": 283520 }, { "epoch": 110.11, "learning_rate": 5.318834951456311e-06, "loss": 0.1027, "step": 283530 }, { "epoch": 110.11, "learning_rate": 5.318317152103561e-06, "loss": 0.014, "step": 283540 }, { "epoch": 110.12, "learning_rate": 5.3177993527508095e-06, "loss": 0.1096, "step": 283550 }, { "epoch": 110.12, "learning_rate": 5.317281553398058e-06, "loss": 0.0306, "step": 283560 }, { "epoch": 110.12, "learning_rate": 5.316763754045308e-06, "loss": 0.0003, "step": 283570 }, { "epoch": 110.13, "learning_rate": 5.3162459546925575e-06, "loss": 0.0464, "step": 283580 }, { "epoch": 110.13, "learning_rate": 5.315728155339806e-06, "loss": 0.0098, "step": 283590 }, { "epoch": 110.14, "learning_rate": 5.315210355987055e-06, "loss": 0.1423, "step": 283600 }, { "epoch": 110.14, "learning_rate": 5.314692556634305e-06, "loss": 0.0131, "step": 283610 }, { "epoch": 110.14, "learning_rate": 5.3141747572815534e-06, "loss": 0.0393, "step": 283620 }, { "epoch": 110.15, "learning_rate": 5.313656957928803e-06, "loss": 0.0003, "step": 283630 }, { "epoch": 110.15, "learning_rate": 5.313139158576052e-06, "loss": 0.0926, "step": 283640 }, { "epoch": 110.16, "learning_rate": 5.3126213592233014e-06, "loss": 0.0331, "step": 283650 }, { "epoch": 110.16, "learning_rate": 5.31210355987055e-06, "loss": 0.0344, "step": 283660 }, { "epoch": 110.16, "learning_rate": 5.3115857605178e-06, "loss": 0.0005, "step": 283670 }, { "epoch": 110.17, "learning_rate": 5.3110679611650494e-06, "loss": 0.0947, "step": 283680 }, { "epoch": 110.17, "learning_rate": 5.310550161812298e-06, "loss": 0.0344, "step": 283690 }, { "epoch": 110.17, "learning_rate": 5.310032362459547e-06, "loss": 0.0915, "step": 283700 }, { "epoch": 110.18, "learning_rate": 5.309514563106797e-06, "loss": 0.03, "step": 283710 }, { "epoch": 110.18, "learning_rate": 5.308996763754046e-06, "loss": 0.0163, "step": 283720 }, { "epoch": 110.19, "learning_rate": 5.308478964401295e-06, "loss": 0.0562, "step": 283730 }, { "epoch": 110.19, "learning_rate": 5.307961165048544e-06, "loss": 0.0087, "step": 283740 }, { "epoch": 110.19, "learning_rate": 5.307443365695793e-06, "loss": 0.1461, "step": 283750 }, { "epoch": 110.2, "learning_rate": 5.306925566343043e-06, "loss": 0.0196, "step": 283760 }, { "epoch": 110.2, "learning_rate": 5.306407766990292e-06, "loss": 0.0954, "step": 283770 }, { "epoch": 110.21, "learning_rate": 5.3058899676375405e-06, "loss": 0.129, "step": 283780 }, { "epoch": 110.21, "learning_rate": 5.30537216828479e-06, "loss": 0.2165, "step": 283790 }, { "epoch": 110.21, "learning_rate": 5.30485436893204e-06, "loss": 0.0196, "step": 283800 }, { "epoch": 110.22, "learning_rate": 5.304336569579288e-06, "loss": 0.0069, "step": 283810 }, { "epoch": 110.22, "learning_rate": 5.303818770226537e-06, "loss": 0.0255, "step": 283820 }, { "epoch": 110.23, "learning_rate": 5.303300970873787e-06, "loss": 0.0164, "step": 283830 }, { "epoch": 110.23, "learning_rate": 5.3027831715210366e-06, "loss": 0.0595, "step": 283840 }, { "epoch": 110.23, "learning_rate": 5.3022653721682845e-06, "loss": 0.018, "step": 283850 }, { "epoch": 110.24, "learning_rate": 5.301747572815534e-06, "loss": 0.0403, "step": 283860 }, { "epoch": 110.24, "learning_rate": 5.301229773462784e-06, "loss": 0.018, "step": 283870 }, { "epoch": 110.24, "learning_rate": 5.300711974110033e-06, "loss": 0.0187, "step": 283880 }, { "epoch": 110.25, "learning_rate": 5.300194174757281e-06, "loss": 0.0565, "step": 283890 }, { "epoch": 110.25, "learning_rate": 5.299676375404531e-06, "loss": 0.092, "step": 283900 }, { "epoch": 110.26, "learning_rate": 5.2991585760517805e-06, "loss": 0.0025, "step": 283910 }, { "epoch": 110.26, "learning_rate": 5.29864077669903e-06, "loss": 0.0131, "step": 283920 }, { "epoch": 110.26, "learning_rate": 5.298122977346278e-06, "loss": 0.0405, "step": 283930 }, { "epoch": 110.27, "learning_rate": 5.297605177993528e-06, "loss": 0.0371, "step": 283940 }, { "epoch": 110.27, "learning_rate": 5.297087378640777e-06, "loss": 0.0301, "step": 283950 }, { "epoch": 110.28, "learning_rate": 5.296569579288027e-06, "loss": 0.0178, "step": 283960 }, { "epoch": 110.28, "learning_rate": 5.296051779935275e-06, "loss": 0.0001, "step": 283970 }, { "epoch": 110.28, "learning_rate": 5.2955339805825244e-06, "loss": 0.074, "step": 283980 }, { "epoch": 110.29, "learning_rate": 5.295016181229774e-06, "loss": 0.0035, "step": 283990 }, { "epoch": 110.29, "learning_rate": 5.294498381877024e-06, "loss": 0.0173, "step": 284000 }, { "epoch": 110.3, "learning_rate": 5.293980582524272e-06, "loss": 0.0005, "step": 284010 }, { "epoch": 110.3, "learning_rate": 5.293462783171521e-06, "loss": 0.0749, "step": 284020 }, { "epoch": 110.3, "learning_rate": 5.292944983818771e-06, "loss": 0.0429, "step": 284030 }, { "epoch": 110.31, "learning_rate": 5.2924271844660204e-06, "loss": 0.1101, "step": 284040 }, { "epoch": 110.31, "learning_rate": 5.291909385113268e-06, "loss": 0.0165, "step": 284050 }, { "epoch": 110.31, "learning_rate": 5.291391585760518e-06, "loss": 0.0642, "step": 284060 }, { "epoch": 110.32, "learning_rate": 5.290873786407768e-06, "loss": 0.013, "step": 284070 }, { "epoch": 110.32, "learning_rate": 5.290355987055017e-06, "loss": 0.001, "step": 284080 }, { "epoch": 110.33, "learning_rate": 5.289838187702265e-06, "loss": 0.0712, "step": 284090 }, { "epoch": 110.33, "learning_rate": 5.289320388349515e-06, "loss": 0.0017, "step": 284100 }, { "epoch": 110.33, "learning_rate": 5.288802588996764e-06, "loss": 0.002, "step": 284110 }, { "epoch": 110.34, "learning_rate": 5.288284789644014e-06, "loss": 0.0944, "step": 284120 }, { "epoch": 110.34, "learning_rate": 5.287766990291262e-06, "loss": 0.0017, "step": 284130 }, { "epoch": 110.35, "learning_rate": 5.2872491909385115e-06, "loss": 0.0061, "step": 284140 }, { "epoch": 110.35, "learning_rate": 5.286731391585761e-06, "loss": 0.0008, "step": 284150 }, { "epoch": 110.35, "learning_rate": 5.286213592233011e-06, "loss": 0.0094, "step": 284160 }, { "epoch": 110.36, "learning_rate": 5.285695792880259e-06, "loss": 0.0458, "step": 284170 }, { "epoch": 110.36, "learning_rate": 5.285177993527508e-06, "loss": 0.0783, "step": 284180 }, { "epoch": 110.37, "learning_rate": 5.284660194174758e-06, "loss": 0.0065, "step": 284190 }, { "epoch": 110.37, "learning_rate": 5.2841423948220076e-06, "loss": 0.0143, "step": 284200 }, { "epoch": 110.37, "learning_rate": 5.2836245954692555e-06, "loss": 0.0281, "step": 284210 }, { "epoch": 110.38, "learning_rate": 5.283106796116505e-06, "loss": 0.0083, "step": 284220 }, { "epoch": 110.38, "learning_rate": 5.282588996763755e-06, "loss": 0.0987, "step": 284230 }, { "epoch": 110.38, "learning_rate": 5.282071197411004e-06, "loss": 0.0004, "step": 284240 }, { "epoch": 110.39, "learning_rate": 5.281553398058253e-06, "loss": 0.0622, "step": 284250 }, { "epoch": 110.39, "learning_rate": 5.281035598705502e-06, "loss": 0.0449, "step": 284260 }, { "epoch": 110.4, "learning_rate": 5.2805177993527515e-06, "loss": 0.0527, "step": 284270 }, { "epoch": 110.4, "learning_rate": 5.28e-06, "loss": 0.0001, "step": 284280 }, { "epoch": 110.4, "learning_rate": 5.27948220064725e-06, "loss": 0.0008, "step": 284290 }, { "epoch": 110.41, "learning_rate": 5.278964401294499e-06, "loss": 0.0385, "step": 284300 }, { "epoch": 110.41, "learning_rate": 5.278446601941748e-06, "loss": 0.09, "step": 284310 }, { "epoch": 110.42, "learning_rate": 5.277928802588997e-06, "loss": 0.027, "step": 284320 }, { "epoch": 110.42, "learning_rate": 5.277411003236247e-06, "loss": 0.0053, "step": 284330 }, { "epoch": 110.42, "learning_rate": 5.2768932038834954e-06, "loss": 0.0469, "step": 284340 }, { "epoch": 110.43, "learning_rate": 5.276375404530745e-06, "loss": 0.0259, "step": 284350 }, { "epoch": 110.43, "learning_rate": 5.275857605177994e-06, "loss": 0.1013, "step": 284360 }, { "epoch": 110.43, "learning_rate": 5.2753398058252434e-06, "loss": 0.1427, "step": 284370 }, { "epoch": 110.44, "learning_rate": 5.274822006472492e-06, "loss": 0.042, "step": 284380 }, { "epoch": 110.44, "learning_rate": 5.274304207119742e-06, "loss": 0.034, "step": 284390 }, { "epoch": 110.45, "learning_rate": 5.273786407766991e-06, "loss": 0.0459, "step": 284400 }, { "epoch": 110.45, "learning_rate": 5.27326860841424e-06, "loss": 0.0769, "step": 284410 }, { "epoch": 110.45, "learning_rate": 5.272750809061489e-06, "loss": 0.0006, "step": 284420 }, { "epoch": 110.46, "learning_rate": 5.272233009708739e-06, "loss": 0.0975, "step": 284430 }, { "epoch": 110.46, "learning_rate": 5.271715210355987e-06, "loss": 0.1041, "step": 284440 }, { "epoch": 110.47, "learning_rate": 5.271197411003237e-06, "loss": 0.0036, "step": 284450 }, { "epoch": 110.47, "learning_rate": 5.270679611650486e-06, "loss": 0.0015, "step": 284460 }, { "epoch": 110.47, "learning_rate": 5.2701618122977345e-06, "loss": 0.081, "step": 284470 }, { "epoch": 110.48, "learning_rate": 5.269644012944984e-06, "loss": 0.0456, "step": 284480 }, { "epoch": 110.48, "learning_rate": 5.269126213592234e-06, "loss": 0.0397, "step": 284490 }, { "epoch": 110.49, "learning_rate": 5.2686084142394825e-06, "loss": 0.0723, "step": 284500 }, { "epoch": 110.49, "learning_rate": 5.268090614886731e-06, "loss": 0.1289, "step": 284510 }, { "epoch": 110.49, "learning_rate": 5.267572815533981e-06, "loss": 0.0001, "step": 284520 }, { "epoch": 110.5, "learning_rate": 5.2670550161812306e-06, "loss": 0.0003, "step": 284530 }, { "epoch": 110.5, "learning_rate": 5.266537216828479e-06, "loss": 0.0713, "step": 284540 }, { "epoch": 110.5, "learning_rate": 5.266019417475728e-06, "loss": 0.001, "step": 284550 }, { "epoch": 110.51, "learning_rate": 5.265501618122978e-06, "loss": 0.0078, "step": 284560 }, { "epoch": 110.51, "learning_rate": 5.264983818770227e-06, "loss": 0.037, "step": 284570 }, { "epoch": 110.52, "learning_rate": 5.264466019417476e-06, "loss": 0.0599, "step": 284580 }, { "epoch": 110.52, "learning_rate": 5.263948220064725e-06, "loss": 0.1222, "step": 284590 }, { "epoch": 110.52, "learning_rate": 5.2634304207119745e-06, "loss": 0.0187, "step": 284600 }, { "epoch": 110.53, "learning_rate": 5.262912621359224e-06, "loss": 0.0423, "step": 284610 }, { "epoch": 110.53, "learning_rate": 5.262394822006472e-06, "loss": 0.0607, "step": 284620 }, { "epoch": 110.54, "learning_rate": 5.261877022653722e-06, "loss": 0.0028, "step": 284630 }, { "epoch": 110.54, "learning_rate": 5.261359223300971e-06, "loss": 0.059, "step": 284640 }, { "epoch": 110.54, "learning_rate": 5.260841423948221e-06, "loss": 0.0454, "step": 284650 }, { "epoch": 110.55, "learning_rate": 5.260323624595469e-06, "loss": 0.1094, "step": 284660 }, { "epoch": 110.55, "learning_rate": 5.2598058252427184e-06, "loss": 0.021, "step": 284670 }, { "epoch": 110.56, "learning_rate": 5.259288025889968e-06, "loss": 0.0366, "step": 284680 }, { "epoch": 110.56, "learning_rate": 5.258770226537218e-06, "loss": 0.1077, "step": 284690 }, { "epoch": 110.56, "learning_rate": 5.258252427184466e-06, "loss": 0.0973, "step": 284700 }, { "epoch": 110.57, "learning_rate": 5.257734627831715e-06, "loss": 0.0003, "step": 284710 }, { "epoch": 110.57, "learning_rate": 5.257216828478965e-06, "loss": 0.0431, "step": 284720 }, { "epoch": 110.57, "learning_rate": 5.2566990291262144e-06, "loss": 0.0975, "step": 284730 }, { "epoch": 110.58, "learning_rate": 5.256181229773462e-06, "loss": 0.0856, "step": 284740 }, { "epoch": 110.58, "learning_rate": 5.255663430420712e-06, "loss": 0.0964, "step": 284750 }, { "epoch": 110.59, "learning_rate": 5.255145631067962e-06, "loss": 0.0029, "step": 284760 }, { "epoch": 110.59, "learning_rate": 5.254627831715211e-06, "loss": 0.0445, "step": 284770 }, { "epoch": 110.59, "learning_rate": 5.254110032362459e-06, "loss": 0.0279, "step": 284780 }, { "epoch": 110.6, "learning_rate": 5.253592233009709e-06, "loss": 0.0669, "step": 284790 }, { "epoch": 110.6, "learning_rate": 5.253074433656958e-06, "loss": 0.0193, "step": 284800 }, { "epoch": 110.61, "learning_rate": 5.252556634304208e-06, "loss": 0.0876, "step": 284810 }, { "epoch": 110.61, "learning_rate": 5.252038834951458e-06, "loss": 0.0832, "step": 284820 }, { "epoch": 110.61, "learning_rate": 5.2515210355987055e-06, "loss": 0.1484, "step": 284830 }, { "epoch": 110.62, "learning_rate": 5.251003236245955e-06, "loss": 0.0008, "step": 284840 }, { "epoch": 110.62, "learning_rate": 5.250485436893205e-06, "loss": 0.0623, "step": 284850 }, { "epoch": 110.63, "learning_rate": 5.249967637540454e-06, "loss": 0.0441, "step": 284860 }, { "epoch": 110.63, "learning_rate": 5.249449838187702e-06, "loss": 0.0361, "step": 284870 }, { "epoch": 110.63, "learning_rate": 5.248932038834952e-06, "loss": 0.0148, "step": 284880 }, { "epoch": 110.64, "learning_rate": 5.2484142394822016e-06, "loss": 0.0557, "step": 284890 }, { "epoch": 110.64, "learning_rate": 5.247896440129451e-06, "loss": 0.1154, "step": 284900 }, { "epoch": 110.64, "learning_rate": 5.247378640776699e-06, "loss": 0.067, "step": 284910 }, { "epoch": 110.65, "learning_rate": 5.246860841423949e-06, "loss": 0.0976, "step": 284920 }, { "epoch": 110.65, "learning_rate": 5.246343042071198e-06, "loss": 0.1133, "step": 284930 }, { "epoch": 110.66, "learning_rate": 5.245825242718447e-06, "loss": 0.1607, "step": 284940 }, { "epoch": 110.66, "learning_rate": 5.245307443365696e-06, "loss": 0.1014, "step": 284950 }, { "epoch": 110.66, "learning_rate": 5.2447896440129455e-06, "loss": 0.0259, "step": 284960 }, { "epoch": 110.67, "learning_rate": 5.244271844660195e-06, "loss": 0.0323, "step": 284970 }, { "epoch": 110.67, "learning_rate": 5.243754045307444e-06, "loss": 0.0344, "step": 284980 }, { "epoch": 110.68, "learning_rate": 5.243236245954693e-06, "loss": 0.0017, "step": 284990 }, { "epoch": 110.68, "learning_rate": 5.242718446601942e-06, "loss": 0.0002, "step": 285000 }, { "epoch": 110.68, "learning_rate": 5.242200647249192e-06, "loss": 0.0704, "step": 285010 }, { "epoch": 110.69, "learning_rate": 5.241682847896441e-06, "loss": 0.0757, "step": 285020 }, { "epoch": 110.69, "learning_rate": 5.2411650485436894e-06, "loss": 0.0769, "step": 285030 }, { "epoch": 110.7, "learning_rate": 5.240647249190939e-06, "loss": 0.0098, "step": 285040 }, { "epoch": 110.7, "learning_rate": 5.240129449838189e-06, "loss": 0.0786, "step": 285050 }, { "epoch": 110.7, "learning_rate": 5.2396116504854374e-06, "loss": 0.3239, "step": 285060 }, { "epoch": 110.71, "learning_rate": 5.239093851132686e-06, "loss": 0.1818, "step": 285070 }, { "epoch": 110.71, "learning_rate": 5.238576051779936e-06, "loss": 0.0144, "step": 285080 }, { "epoch": 110.71, "learning_rate": 5.238058252427185e-06, "loss": 0.039, "step": 285090 }, { "epoch": 110.72, "learning_rate": 5.237540453074434e-06, "loss": 0.0244, "step": 285100 }, { "epoch": 110.72, "learning_rate": 5.237022653721683e-06, "loss": 0.0797, "step": 285110 }, { "epoch": 110.73, "learning_rate": 5.236504854368933e-06, "loss": 0.0837, "step": 285120 }, { "epoch": 110.73, "learning_rate": 5.235987055016181e-06, "loss": 0.0571, "step": 285130 }, { "epoch": 110.73, "learning_rate": 5.235469255663431e-06, "loss": 0.0517, "step": 285140 }, { "epoch": 110.74, "learning_rate": 5.23495145631068e-06, "loss": 0.0431, "step": 285150 }, { "epoch": 110.74, "learning_rate": 5.234433656957929e-06, "loss": 0.0501, "step": 285160 }, { "epoch": 110.75, "learning_rate": 5.233915857605178e-06, "loss": 0.0115, "step": 285170 }, { "epoch": 110.75, "learning_rate": 5.233398058252428e-06, "loss": 0.0012, "step": 285180 }, { "epoch": 110.75, "learning_rate": 5.2328802588996765e-06, "loss": 0.1219, "step": 285190 }, { "epoch": 110.76, "learning_rate": 5.232362459546926e-06, "loss": 0.0886, "step": 285200 }, { "epoch": 110.76, "learning_rate": 5.231844660194175e-06, "loss": 0.0206, "step": 285210 }, { "epoch": 110.77, "learning_rate": 5.2313268608414246e-06, "loss": 0.0433, "step": 285220 }, { "epoch": 110.77, "learning_rate": 5.230809061488673e-06, "loss": 0.0577, "step": 285230 }, { "epoch": 110.77, "learning_rate": 5.230291262135923e-06, "loss": 0.0042, "step": 285240 }, { "epoch": 110.78, "learning_rate": 5.229773462783172e-06, "loss": 0.2058, "step": 285250 }, { "epoch": 110.78, "learning_rate": 5.229255663430421e-06, "loss": 0.0187, "step": 285260 }, { "epoch": 110.78, "learning_rate": 5.22873786407767e-06, "loss": 0.066, "step": 285270 }, { "epoch": 110.79, "learning_rate": 5.228220064724919e-06, "loss": 0.0576, "step": 285280 }, { "epoch": 110.79, "learning_rate": 5.2277022653721685e-06, "loss": 0.1293, "step": 285290 }, { "epoch": 110.8, "learning_rate": 5.227184466019418e-06, "loss": 0.0888, "step": 285300 }, { "epoch": 110.8, "learning_rate": 5.226666666666667e-06, "loss": 0.0129, "step": 285310 }, { "epoch": 110.8, "learning_rate": 5.226148867313916e-06, "loss": 0.0002, "step": 285320 }, { "epoch": 110.81, "learning_rate": 5.225631067961165e-06, "loss": 0.007, "step": 285330 }, { "epoch": 110.81, "learning_rate": 5.225113268608415e-06, "loss": 0.0176, "step": 285340 }, { "epoch": 110.82, "learning_rate": 5.2245954692556645e-06, "loss": 0.0692, "step": 285350 }, { "epoch": 110.82, "learning_rate": 5.2240776699029124e-06, "loss": 0.0456, "step": 285360 }, { "epoch": 110.82, "learning_rate": 5.223559870550162e-06, "loss": 0.0436, "step": 285370 }, { "epoch": 110.83, "learning_rate": 5.223042071197412e-06, "loss": 0.1016, "step": 285380 }, { "epoch": 110.83, "learning_rate": 5.222524271844661e-06, "loss": 0.1118, "step": 285390 }, { "epoch": 110.83, "learning_rate": 5.222006472491909e-06, "loss": 0.0857, "step": 285400 }, { "epoch": 110.84, "learning_rate": 5.221488673139159e-06, "loss": 0.066, "step": 285410 }, { "epoch": 110.84, "learning_rate": 5.2209708737864084e-06, "loss": 0.0934, "step": 285420 }, { "epoch": 110.85, "learning_rate": 5.220453074433658e-06, "loss": 0.0498, "step": 285430 }, { "epoch": 110.85, "learning_rate": 5.219935275080906e-06, "loss": 0.0283, "step": 285440 }, { "epoch": 110.85, "learning_rate": 5.219417475728156e-06, "loss": 0.0634, "step": 285450 }, { "epoch": 110.86, "learning_rate": 5.218899676375405e-06, "loss": 0.0293, "step": 285460 }, { "epoch": 110.86, "learning_rate": 5.218381877022655e-06, "loss": 0.1879, "step": 285470 }, { "epoch": 110.87, "learning_rate": 5.217864077669903e-06, "loss": 0.0196, "step": 285480 }, { "epoch": 110.87, "learning_rate": 5.217346278317152e-06, "loss": 0.0089, "step": 285490 }, { "epoch": 110.87, "learning_rate": 5.216828478964402e-06, "loss": 0.0175, "step": 285500 }, { "epoch": 110.88, "learning_rate": 5.216310679611652e-06, "loss": 0.0282, "step": 285510 }, { "epoch": 110.88, "learning_rate": 5.2157928802588995e-06, "loss": 0.0507, "step": 285520 }, { "epoch": 110.89, "learning_rate": 5.215275080906149e-06, "loss": 0.0017, "step": 285530 }, { "epoch": 110.89, "learning_rate": 5.214757281553399e-06, "loss": 0.0379, "step": 285540 }, { "epoch": 110.89, "learning_rate": 5.214239482200648e-06, "loss": 0.0471, "step": 285550 }, { "epoch": 110.9, "learning_rate": 5.213721682847896e-06, "loss": 0.07, "step": 285560 }, { "epoch": 110.9, "learning_rate": 5.213203883495146e-06, "loss": 0.0257, "step": 285570 }, { "epoch": 110.9, "learning_rate": 5.2126860841423956e-06, "loss": 0.0388, "step": 285580 }, { "epoch": 110.91, "learning_rate": 5.212168284789645e-06, "loss": 0.0478, "step": 285590 }, { "epoch": 110.91, "learning_rate": 5.211650485436893e-06, "loss": 0.0438, "step": 285600 }, { "epoch": 110.92, "learning_rate": 5.211132686084143e-06, "loss": 0.004, "step": 285610 }, { "epoch": 110.92, "learning_rate": 5.210614886731392e-06, "loss": 0.0035, "step": 285620 }, { "epoch": 110.92, "learning_rate": 5.210097087378642e-06, "loss": 0.1101, "step": 285630 }, { "epoch": 110.93, "learning_rate": 5.20957928802589e-06, "loss": 0.0938, "step": 285640 }, { "epoch": 110.93, "learning_rate": 5.2090614886731395e-06, "loss": 0.0484, "step": 285650 }, { "epoch": 110.94, "learning_rate": 5.208543689320389e-06, "loss": 0.0389, "step": 285660 }, { "epoch": 110.94, "learning_rate": 5.208025889967639e-06, "loss": 0.0224, "step": 285670 }, { "epoch": 110.94, "learning_rate": 5.207508090614887e-06, "loss": 0.0525, "step": 285680 }, { "epoch": 110.95, "learning_rate": 5.206990291262136e-06, "loss": 0.0159, "step": 285690 }, { "epoch": 110.95, "learning_rate": 5.206472491909386e-06, "loss": 0.1414, "step": 285700 }, { "epoch": 110.96, "learning_rate": 5.2059546925566355e-06, "loss": 0.0184, "step": 285710 }, { "epoch": 110.96, "learning_rate": 5.2054368932038834e-06, "loss": 0.0124, "step": 285720 }, { "epoch": 110.96, "learning_rate": 5.204919093851133e-06, "loss": 0.1025, "step": 285730 }, { "epoch": 110.97, "learning_rate": 5.204401294498383e-06, "loss": 0.0951, "step": 285740 }, { "epoch": 110.97, "learning_rate": 5.2038834951456314e-06, "loss": 0.0387, "step": 285750 }, { "epoch": 110.97, "learning_rate": 5.20336569579288e-06, "loss": 0.0006, "step": 285760 }, { "epoch": 110.98, "learning_rate": 5.20284789644013e-06, "loss": 0.0101, "step": 285770 }, { "epoch": 110.98, "learning_rate": 5.2023300970873794e-06, "loss": 0.0035, "step": 285780 }, { "epoch": 110.99, "learning_rate": 5.201812297734628e-06, "loss": 0.1124, "step": 285790 }, { "epoch": 110.99, "learning_rate": 5.201294498381877e-06, "loss": 0.0001, "step": 285800 }, { "epoch": 110.99, "learning_rate": 5.200776699029127e-06, "loss": 0.01, "step": 285810 }, { "epoch": 111.0, "learning_rate": 5.200258899676376e-06, "loss": 0.0751, "step": 285820 }, { "epoch": 111.0, "eval_accuracy": 0.9513067400275104, "eval_loss": 0.38175803422927856, "eval_runtime": 8.1715, "eval_samples_per_second": 444.84, "eval_steps_per_second": 55.682, "step": 285825 }, { "epoch": 111.0, "learning_rate": 5.199741100323625e-06, "loss": 0.0085, "step": 285830 }, { "epoch": 111.01, "learning_rate": 5.199223300970874e-06, "loss": 0.1059, "step": 285840 }, { "epoch": 111.01, "learning_rate": 5.198705501618123e-06, "loss": 0.0087, "step": 285850 }, { "epoch": 111.01, "learning_rate": 5.198187702265373e-06, "loss": 0.0117, "step": 285860 }, { "epoch": 111.02, "learning_rate": 5.197669902912622e-06, "loss": 0.0482, "step": 285870 }, { "epoch": 111.02, "learning_rate": 5.1971521035598705e-06, "loss": 0.017, "step": 285880 }, { "epoch": 111.03, "learning_rate": 5.19663430420712e-06, "loss": 0.0399, "step": 285890 }, { "epoch": 111.03, "learning_rate": 5.196116504854369e-06, "loss": 0.0227, "step": 285900 }, { "epoch": 111.03, "learning_rate": 5.1955987055016186e-06, "loss": 0.0179, "step": 285910 }, { "epoch": 111.04, "learning_rate": 5.195080906148868e-06, "loss": 0.0356, "step": 285920 }, { "epoch": 111.04, "learning_rate": 5.194563106796117e-06, "loss": 0.0113, "step": 285930 }, { "epoch": 111.04, "learning_rate": 5.194045307443366e-06, "loss": 0.0112, "step": 285940 }, { "epoch": 111.05, "learning_rate": 5.193527508090615e-06, "loss": 0.1683, "step": 285950 }, { "epoch": 111.05, "learning_rate": 5.193009708737865e-06, "loss": 0.0212, "step": 285960 }, { "epoch": 111.06, "learning_rate": 5.192491909385114e-06, "loss": 0.0321, "step": 285970 }, { "epoch": 111.06, "learning_rate": 5.1919741100323625e-06, "loss": 0.0042, "step": 285980 }, { "epoch": 111.06, "learning_rate": 5.191456310679612e-06, "loss": 0.0076, "step": 285990 }, { "epoch": 111.07, "learning_rate": 5.190938511326862e-06, "loss": 0.0819, "step": 286000 }, { "epoch": 111.07, "learning_rate": 5.1904207119741105e-06, "loss": 0.0277, "step": 286010 }, { "epoch": 111.08, "learning_rate": 5.189902912621359e-06, "loss": 0.0044, "step": 286020 }, { "epoch": 111.08, "learning_rate": 5.189385113268609e-06, "loss": 0.0483, "step": 286030 }, { "epoch": 111.08, "learning_rate": 5.1888673139158585e-06, "loss": 0.0143, "step": 286040 }, { "epoch": 111.09, "learning_rate": 5.188349514563107e-06, "loss": 0.0428, "step": 286050 }, { "epoch": 111.09, "learning_rate": 5.187831715210356e-06, "loss": 0.0276, "step": 286060 }, { "epoch": 111.1, "learning_rate": 5.187313915857606e-06, "loss": 0.1004, "step": 286070 }, { "epoch": 111.1, "learning_rate": 5.186796116504855e-06, "loss": 0.0435, "step": 286080 }, { "epoch": 111.1, "learning_rate": 5.186278317152103e-06, "loss": 0.132, "step": 286090 }, { "epoch": 111.11, "learning_rate": 5.185760517799353e-06, "loss": 0.058, "step": 286100 }, { "epoch": 111.11, "learning_rate": 5.1852427184466024e-06, "loss": 0.0674, "step": 286110 }, { "epoch": 111.11, "learning_rate": 5.184724919093852e-06, "loss": 0.0171, "step": 286120 }, { "epoch": 111.12, "learning_rate": 5.1842071197411e-06, "loss": 0.0113, "step": 286130 }, { "epoch": 111.12, "learning_rate": 5.18368932038835e-06, "loss": 0.061, "step": 286140 }, { "epoch": 111.13, "learning_rate": 5.183171521035599e-06, "loss": 0.0561, "step": 286150 }, { "epoch": 111.13, "learning_rate": 5.182653721682849e-06, "loss": 0.0093, "step": 286160 }, { "epoch": 111.13, "learning_rate": 5.182135922330097e-06, "loss": 0.0827, "step": 286170 }, { "epoch": 111.14, "learning_rate": 5.181618122977346e-06, "loss": 0.0454, "step": 286180 }, { "epoch": 111.14, "learning_rate": 5.181100323624596e-06, "loss": 0.0156, "step": 286190 }, { "epoch": 111.15, "learning_rate": 5.180582524271846e-06, "loss": 0.0001, "step": 286200 }, { "epoch": 111.15, "learning_rate": 5.1800647249190935e-06, "loss": 0.1181, "step": 286210 }, { "epoch": 111.15, "learning_rate": 5.179546925566343e-06, "loss": 0.0779, "step": 286220 }, { "epoch": 111.16, "learning_rate": 5.179029126213593e-06, "loss": 0.1052, "step": 286230 }, { "epoch": 111.16, "learning_rate": 5.178511326860842e-06, "loss": 0.0289, "step": 286240 }, { "epoch": 111.17, "learning_rate": 5.17799352750809e-06, "loss": 0.0261, "step": 286250 }, { "epoch": 111.17, "learning_rate": 5.17747572815534e-06, "loss": 0.0188, "step": 286260 }, { "epoch": 111.17, "learning_rate": 5.1769579288025896e-06, "loss": 0.1851, "step": 286270 }, { "epoch": 111.18, "learning_rate": 5.176440129449839e-06, "loss": 0.004, "step": 286280 }, { "epoch": 111.18, "learning_rate": 5.175922330097087e-06, "loss": 0.0017, "step": 286290 }, { "epoch": 111.18, "learning_rate": 5.175404530744337e-06, "loss": 0.0029, "step": 286300 }, { "epoch": 111.19, "learning_rate": 5.174886731391586e-06, "loss": 0.0946, "step": 286310 }, { "epoch": 111.19, "learning_rate": 5.174368932038836e-06, "loss": 0.0679, "step": 286320 }, { "epoch": 111.2, "learning_rate": 5.173851132686084e-06, "loss": 0.0494, "step": 286330 }, { "epoch": 111.2, "learning_rate": 5.1733333333333335e-06, "loss": 0.0644, "step": 286340 }, { "epoch": 111.2, "learning_rate": 5.172815533980583e-06, "loss": 0.0266, "step": 286350 }, { "epoch": 111.21, "learning_rate": 5.172297734627833e-06, "loss": 0.069, "step": 286360 }, { "epoch": 111.21, "learning_rate": 5.171779935275081e-06, "loss": 0.1173, "step": 286370 }, { "epoch": 111.22, "learning_rate": 5.17126213592233e-06, "loss": 0.0001, "step": 286380 }, { "epoch": 111.22, "learning_rate": 5.17074433656958e-06, "loss": 0.0015, "step": 286390 }, { "epoch": 111.22, "learning_rate": 5.1702265372168295e-06, "loss": 0.0235, "step": 286400 }, { "epoch": 111.23, "learning_rate": 5.1697087378640774e-06, "loss": 0.0007, "step": 286410 }, { "epoch": 111.23, "learning_rate": 5.169190938511327e-06, "loss": 0.0682, "step": 286420 }, { "epoch": 111.23, "learning_rate": 5.168673139158577e-06, "loss": 0.0396, "step": 286430 }, { "epoch": 111.24, "learning_rate": 5.168155339805826e-06, "loss": 0.0187, "step": 286440 }, { "epoch": 111.24, "learning_rate": 5.167637540453074e-06, "loss": 0.0651, "step": 286450 }, { "epoch": 111.25, "learning_rate": 5.167119741100324e-06, "loss": 0.0253, "step": 286460 }, { "epoch": 111.25, "learning_rate": 5.1666019417475734e-06, "loss": 0.0149, "step": 286470 }, { "epoch": 111.25, "learning_rate": 5.166084142394823e-06, "loss": 0.0314, "step": 286480 }, { "epoch": 111.26, "learning_rate": 5.165566343042072e-06, "loss": 0.0591, "step": 286490 }, { "epoch": 111.26, "learning_rate": 5.165048543689321e-06, "loss": 0.025, "step": 286500 }, { "epoch": 111.27, "learning_rate": 5.16453074433657e-06, "loss": 0.0961, "step": 286510 }, { "epoch": 111.27, "learning_rate": 5.16401294498382e-06, "loss": 0.0323, "step": 286520 }, { "epoch": 111.27, "learning_rate": 5.163495145631069e-06, "loss": 0.2161, "step": 286530 }, { "epoch": 111.28, "learning_rate": 5.162977346278317e-06, "loss": 0.0004, "step": 286540 }, { "epoch": 111.28, "learning_rate": 5.162459546925567e-06, "loss": 0.0766, "step": 286550 }, { "epoch": 111.29, "learning_rate": 5.161941747572816e-06, "loss": 0.0387, "step": 286560 }, { "epoch": 111.29, "learning_rate": 5.161423948220065e-06, "loss": 0.0718, "step": 286570 }, { "epoch": 111.29, "learning_rate": 5.160906148867314e-06, "loss": 0.0019, "step": 286580 }, { "epoch": 111.3, "learning_rate": 5.160388349514564e-06, "loss": 0.0156, "step": 286590 }, { "epoch": 111.3, "learning_rate": 5.1598705501618126e-06, "loss": 0.0086, "step": 286600 }, { "epoch": 111.3, "learning_rate": 5.159352750809062e-06, "loss": 0.0412, "step": 286610 }, { "epoch": 111.31, "learning_rate": 5.158834951456311e-06, "loss": 0.0952, "step": 286620 }, { "epoch": 111.31, "learning_rate": 5.1583171521035606e-06, "loss": 0.0724, "step": 286630 }, { "epoch": 111.32, "learning_rate": 5.157799352750809e-06, "loss": 0.0547, "step": 286640 }, { "epoch": 111.32, "learning_rate": 5.157281553398059e-06, "loss": 0.0089, "step": 286650 }, { "epoch": 111.32, "learning_rate": 5.156763754045308e-06, "loss": 0.0953, "step": 286660 }, { "epoch": 111.33, "learning_rate": 5.156245954692557e-06, "loss": 0.0714, "step": 286670 }, { "epoch": 111.33, "learning_rate": 5.155728155339806e-06, "loss": 0.0487, "step": 286680 }, { "epoch": 111.34, "learning_rate": 5.155210355987056e-06, "loss": 0.0109, "step": 286690 }, { "epoch": 111.34, "learning_rate": 5.1546925566343045e-06, "loss": 0.0281, "step": 286700 }, { "epoch": 111.34, "learning_rate": 5.154174757281554e-06, "loss": 0.0397, "step": 286710 }, { "epoch": 111.35, "learning_rate": 5.153656957928803e-06, "loss": 0.0131, "step": 286720 }, { "epoch": 111.35, "learning_rate": 5.1531391585760525e-06, "loss": 0.0162, "step": 286730 }, { "epoch": 111.36, "learning_rate": 5.152621359223301e-06, "loss": 0.1855, "step": 286740 }, { "epoch": 111.36, "learning_rate": 5.15210355987055e-06, "loss": 0.0002, "step": 286750 }, { "epoch": 111.36, "learning_rate": 5.1515857605178e-06, "loss": 0.0288, "step": 286760 }, { "epoch": 111.37, "learning_rate": 5.151067961165049e-06, "loss": 0.0024, "step": 286770 }, { "epoch": 111.37, "learning_rate": 5.150550161812298e-06, "loss": 0.0003, "step": 286780 }, { "epoch": 111.37, "learning_rate": 5.150032362459547e-06, "loss": 0.0814, "step": 286790 }, { "epoch": 111.38, "learning_rate": 5.1495145631067964e-06, "loss": 0.0099, "step": 286800 }, { "epoch": 111.38, "learning_rate": 5.148996763754046e-06, "loss": 0.0005, "step": 286810 }, { "epoch": 111.39, "learning_rate": 5.148478964401295e-06, "loss": 0.0701, "step": 286820 }, { "epoch": 111.39, "learning_rate": 5.147961165048544e-06, "loss": 0.0477, "step": 286830 }, { "epoch": 111.39, "learning_rate": 5.147443365695793e-06, "loss": 0.0091, "step": 286840 }, { "epoch": 111.4, "learning_rate": 5.146925566343043e-06, "loss": 0.0204, "step": 286850 }, { "epoch": 111.4, "learning_rate": 5.146407766990292e-06, "loss": 0.0412, "step": 286860 }, { "epoch": 111.41, "learning_rate": 5.14588996763754e-06, "loss": 0.167, "step": 286870 }, { "epoch": 111.41, "learning_rate": 5.14537216828479e-06, "loss": 0.0688, "step": 286880 }, { "epoch": 111.41, "learning_rate": 5.14485436893204e-06, "loss": 0.0668, "step": 286890 }, { "epoch": 111.42, "learning_rate": 5.1443365695792875e-06, "loss": 0.1327, "step": 286900 }, { "epoch": 111.42, "learning_rate": 5.143818770226537e-06, "loss": 0.0214, "step": 286910 }, { "epoch": 111.43, "learning_rate": 5.143300970873787e-06, "loss": 0.0365, "step": 286920 }, { "epoch": 111.43, "learning_rate": 5.142783171521036e-06, "loss": 0.0838, "step": 286930 }, { "epoch": 111.43, "learning_rate": 5.142265372168284e-06, "loss": 0.1303, "step": 286940 }, { "epoch": 111.44, "learning_rate": 5.141747572815534e-06, "loss": 0.0333, "step": 286950 }, { "epoch": 111.44, "learning_rate": 5.1412297734627836e-06, "loss": 0.0152, "step": 286960 }, { "epoch": 111.44, "learning_rate": 5.140711974110033e-06, "loss": 0.01, "step": 286970 }, { "epoch": 111.45, "learning_rate": 5.140194174757281e-06, "loss": 0.0368, "step": 286980 }, { "epoch": 111.45, "learning_rate": 5.139676375404531e-06, "loss": 0.0748, "step": 286990 }, { "epoch": 111.46, "learning_rate": 5.13915857605178e-06, "loss": 0.0571, "step": 287000 }, { "epoch": 111.46, "learning_rate": 5.13864077669903e-06, "loss": 0.0008, "step": 287010 }, { "epoch": 111.46, "learning_rate": 5.138122977346278e-06, "loss": 0.0979, "step": 287020 }, { "epoch": 111.47, "learning_rate": 5.1376051779935275e-06, "loss": 0.0304, "step": 287030 }, { "epoch": 111.47, "learning_rate": 5.137087378640777e-06, "loss": 0.0792, "step": 287040 }, { "epoch": 111.48, "learning_rate": 5.136569579288027e-06, "loss": 0.0324, "step": 287050 }, { "epoch": 111.48, "learning_rate": 5.136051779935276e-06, "loss": 0.1361, "step": 287060 }, { "epoch": 111.48, "learning_rate": 5.135533980582524e-06, "loss": 0.0975, "step": 287070 }, { "epoch": 111.49, "learning_rate": 5.135016181229774e-06, "loss": 0.0126, "step": 287080 }, { "epoch": 111.49, "learning_rate": 5.1344983818770235e-06, "loss": 0.1261, "step": 287090 }, { "epoch": 111.5, "learning_rate": 5.133980582524273e-06, "loss": 0.0225, "step": 287100 }, { "epoch": 111.5, "learning_rate": 5.133462783171521e-06, "loss": 0.0047, "step": 287110 }, { "epoch": 111.5, "learning_rate": 5.132944983818771e-06, "loss": 0.0636, "step": 287120 }, { "epoch": 111.51, "learning_rate": 5.13242718446602e-06, "loss": 0.0115, "step": 287130 }, { "epoch": 111.51, "learning_rate": 5.13190938511327e-06, "loss": 0.0151, "step": 287140 }, { "epoch": 111.51, "learning_rate": 5.131391585760518e-06, "loss": 0.1257, "step": 287150 }, { "epoch": 111.52, "learning_rate": 5.1308737864077674e-06, "loss": 0.0146, "step": 287160 }, { "epoch": 111.52, "learning_rate": 5.130355987055017e-06, "loss": 0.0371, "step": 287170 }, { "epoch": 111.53, "learning_rate": 5.129838187702267e-06, "loss": 0.0603, "step": 287180 }, { "epoch": 111.53, "learning_rate": 5.129320388349515e-06, "loss": 0.0362, "step": 287190 }, { "epoch": 111.53, "learning_rate": 5.128802588996764e-06, "loss": 0.016, "step": 287200 }, { "epoch": 111.54, "learning_rate": 5.128284789644014e-06, "loss": 0.0008, "step": 287210 }, { "epoch": 111.54, "learning_rate": 5.127766990291263e-06, "loss": 0.0725, "step": 287220 }, { "epoch": 111.55, "learning_rate": 5.127249190938511e-06, "loss": 0.0352, "step": 287230 }, { "epoch": 111.55, "learning_rate": 5.126731391585761e-06, "loss": 0.0221, "step": 287240 }, { "epoch": 111.55, "learning_rate": 5.126213592233011e-06, "loss": 0.0594, "step": 287250 }, { "epoch": 111.56, "learning_rate": 5.125695792880259e-06, "loss": 0.0781, "step": 287260 }, { "epoch": 111.56, "learning_rate": 5.125177993527508e-06, "loss": 0.0001, "step": 287270 }, { "epoch": 111.57, "learning_rate": 5.124660194174758e-06, "loss": 0.0946, "step": 287280 }, { "epoch": 111.57, "learning_rate": 5.124142394822007e-06, "loss": 0.0085, "step": 287290 }, { "epoch": 111.57, "learning_rate": 5.123624595469256e-06, "loss": 0.0107, "step": 287300 }, { "epoch": 111.58, "learning_rate": 5.123106796116505e-06, "loss": 0.0859, "step": 287310 }, { "epoch": 111.58, "learning_rate": 5.1225889967637546e-06, "loss": 0.0331, "step": 287320 }, { "epoch": 111.58, "learning_rate": 5.122071197411004e-06, "loss": 0.0244, "step": 287330 }, { "epoch": 111.59, "learning_rate": 5.121553398058253e-06, "loss": 0.0319, "step": 287340 }, { "epoch": 111.59, "learning_rate": 5.121035598705502e-06, "loss": 0.0674, "step": 287350 }, { "epoch": 111.6, "learning_rate": 5.120517799352751e-06, "loss": 0.0857, "step": 287360 }, { "epoch": 111.6, "learning_rate": 5.12e-06, "loss": 0.0024, "step": 287370 }, { "epoch": 111.6, "learning_rate": 5.11948220064725e-06, "loss": 0.0001, "step": 287380 }, { "epoch": 111.61, "learning_rate": 5.1189644012944985e-06, "loss": 0.0297, "step": 287390 }, { "epoch": 111.61, "learning_rate": 5.118446601941748e-06, "loss": 0.0386, "step": 287400 }, { "epoch": 111.62, "learning_rate": 5.117928802588997e-06, "loss": 0.0091, "step": 287410 }, { "epoch": 111.62, "learning_rate": 5.1174110032362465e-06, "loss": 0.033, "step": 287420 }, { "epoch": 111.62, "learning_rate": 5.116893203883495e-06, "loss": 0.0396, "step": 287430 }, { "epoch": 111.63, "learning_rate": 5.116375404530745e-06, "loss": 0.0557, "step": 287440 }, { "epoch": 111.63, "learning_rate": 5.115857605177994e-06, "loss": 0.0808, "step": 287450 }, { "epoch": 111.63, "learning_rate": 5.115339805825243e-06, "loss": 0.0511, "step": 287460 }, { "epoch": 111.64, "learning_rate": 5.114822006472492e-06, "loss": 0.0001, "step": 287470 }, { "epoch": 111.64, "learning_rate": 5.114304207119742e-06, "loss": 0.031, "step": 287480 }, { "epoch": 111.65, "learning_rate": 5.1137864077669904e-06, "loss": 0.0023, "step": 287490 }, { "epoch": 111.65, "learning_rate": 5.11326860841424e-06, "loss": 0.046, "step": 287500 }, { "epoch": 111.65, "learning_rate": 5.112750809061489e-06, "loss": 0.0003, "step": 287510 }, { "epoch": 111.66, "learning_rate": 5.1122330097087384e-06, "loss": 0.0287, "step": 287520 }, { "epoch": 111.66, "learning_rate": 5.111715210355987e-06, "loss": 0.0443, "step": 287530 }, { "epoch": 111.67, "learning_rate": 5.111197411003237e-06, "loss": 0.0087, "step": 287540 }, { "epoch": 111.67, "learning_rate": 5.110679611650486e-06, "loss": 0.0764, "step": 287550 }, { "epoch": 111.67, "learning_rate": 5.110161812297734e-06, "loss": 0.0028, "step": 287560 }, { "epoch": 111.68, "learning_rate": 5.109644012944984e-06, "loss": 0.0001, "step": 287570 }, { "epoch": 111.68, "learning_rate": 5.109126213592234e-06, "loss": 0.011, "step": 287580 }, { "epoch": 111.69, "learning_rate": 5.108608414239483e-06, "loss": 0.0821, "step": 287590 }, { "epoch": 111.69, "learning_rate": 5.108090614886731e-06, "loss": 0.0745, "step": 287600 }, { "epoch": 111.69, "learning_rate": 5.107572815533981e-06, "loss": 0.1662, "step": 287610 }, { "epoch": 111.7, "learning_rate": 5.10705501618123e-06, "loss": 0.0483, "step": 287620 }, { "epoch": 111.7, "learning_rate": 5.10653721682848e-06, "loss": 0.0236, "step": 287630 }, { "epoch": 111.7, "learning_rate": 5.106019417475728e-06, "loss": 0.0129, "step": 287640 }, { "epoch": 111.71, "learning_rate": 5.1055016181229776e-06, "loss": 0.0002, "step": 287650 }, { "epoch": 111.71, "learning_rate": 5.104983818770227e-06, "loss": 0.0739, "step": 287660 }, { "epoch": 111.72, "learning_rate": 5.104466019417477e-06, "loss": 0.0644, "step": 287670 }, { "epoch": 111.72, "learning_rate": 5.103948220064725e-06, "loss": 0.0735, "step": 287680 }, { "epoch": 111.72, "learning_rate": 5.103430420711974e-06, "loss": 0.0028, "step": 287690 }, { "epoch": 111.73, "learning_rate": 5.102912621359224e-06, "loss": 0.0106, "step": 287700 }, { "epoch": 111.73, "learning_rate": 5.1023948220064736e-06, "loss": 0.0066, "step": 287710 }, { "epoch": 111.74, "learning_rate": 5.1018770226537215e-06, "loss": 0.022, "step": 287720 }, { "epoch": 111.74, "learning_rate": 5.101359223300971e-06, "loss": 0.0264, "step": 287730 }, { "epoch": 111.74, "learning_rate": 5.100841423948221e-06, "loss": 0.0464, "step": 287740 }, { "epoch": 111.75, "learning_rate": 5.10032362459547e-06, "loss": 0.0726, "step": 287750 }, { "epoch": 111.75, "learning_rate": 5.099805825242718e-06, "loss": 0.1451, "step": 287760 }, { "epoch": 111.76, "learning_rate": 5.099288025889968e-06, "loss": 0.034, "step": 287770 }, { "epoch": 111.76, "learning_rate": 5.0987702265372175e-06, "loss": 0.1463, "step": 287780 }, { "epoch": 111.76, "learning_rate": 5.098252427184467e-06, "loss": 0.089, "step": 287790 }, { "epoch": 111.77, "learning_rate": 5.097734627831715e-06, "loss": 0.0004, "step": 287800 }, { "epoch": 111.77, "learning_rate": 5.097216828478965e-06, "loss": 0.1094, "step": 287810 }, { "epoch": 111.77, "learning_rate": 5.096699029126214e-06, "loss": 0.0216, "step": 287820 }, { "epoch": 111.78, "learning_rate": 5.096181229773464e-06, "loss": 0.014, "step": 287830 }, { "epoch": 111.78, "learning_rate": 5.095663430420712e-06, "loss": 0.0018, "step": 287840 }, { "epoch": 111.79, "learning_rate": 5.0951456310679614e-06, "loss": 0.0271, "step": 287850 }, { "epoch": 111.79, "learning_rate": 5.094627831715211e-06, "loss": 0.0298, "step": 287860 }, { "epoch": 111.79, "learning_rate": 5.094110032362461e-06, "loss": 0.033, "step": 287870 }, { "epoch": 111.8, "learning_rate": 5.093592233009709e-06, "loss": 0.1338, "step": 287880 }, { "epoch": 111.8, "learning_rate": 5.093074433656958e-06, "loss": 0.0556, "step": 287890 }, { "epoch": 111.81, "learning_rate": 5.092556634304208e-06, "loss": 0.0237, "step": 287900 }, { "epoch": 111.81, "learning_rate": 5.0920388349514575e-06, "loss": 0.0402, "step": 287910 }, { "epoch": 111.81, "learning_rate": 5.091521035598705e-06, "loss": 0.0109, "step": 287920 }, { "epoch": 111.82, "learning_rate": 5.091003236245955e-06, "loss": 0.0312, "step": 287930 }, { "epoch": 111.82, "learning_rate": 5.090485436893205e-06, "loss": 0.0001, "step": 287940 }, { "epoch": 111.83, "learning_rate": 5.089967637540454e-06, "loss": 0.002, "step": 287950 }, { "epoch": 111.83, "learning_rate": 5.089449838187702e-06, "loss": 0.0896, "step": 287960 }, { "epoch": 111.83, "learning_rate": 5.088932038834952e-06, "loss": 0.116, "step": 287970 }, { "epoch": 111.84, "learning_rate": 5.088414239482201e-06, "loss": 0.0003, "step": 287980 }, { "epoch": 111.84, "learning_rate": 5.087896440129451e-06, "loss": 0.1049, "step": 287990 }, { "epoch": 111.84, "learning_rate": 5.087378640776699e-06, "loss": 0.1248, "step": 288000 }, { "epoch": 111.85, "learning_rate": 5.0868608414239486e-06, "loss": 0.0004, "step": 288010 }, { "epoch": 111.85, "learning_rate": 5.086343042071198e-06, "loss": 0.0917, "step": 288020 }, { "epoch": 111.86, "learning_rate": 5.085825242718447e-06, "loss": 0.0008, "step": 288030 }, { "epoch": 111.86, "learning_rate": 5.085307443365696e-06, "loss": 0.0945, "step": 288040 }, { "epoch": 111.86, "learning_rate": 5.084789644012945e-06, "loss": 0.0064, "step": 288050 }, { "epoch": 111.87, "learning_rate": 5.084271844660195e-06, "loss": 0.046, "step": 288060 }, { "epoch": 111.87, "learning_rate": 5.083754045307444e-06, "loss": 0.057, "step": 288070 }, { "epoch": 111.88, "learning_rate": 5.0832362459546925e-06, "loss": 0.04, "step": 288080 }, { "epoch": 111.88, "learning_rate": 5.082718446601942e-06, "loss": 0.0108, "step": 288090 }, { "epoch": 111.88, "learning_rate": 5.082200647249192e-06, "loss": 0.0409, "step": 288100 }, { "epoch": 111.89, "learning_rate": 5.0816828478964405e-06, "loss": 0.058, "step": 288110 }, { "epoch": 111.89, "learning_rate": 5.081165048543689e-06, "loss": 0.0098, "step": 288120 }, { "epoch": 111.9, "learning_rate": 5.080647249190939e-06, "loss": 0.1203, "step": 288130 }, { "epoch": 111.9, "learning_rate": 5.0801294498381885e-06, "loss": 0.0348, "step": 288140 }, { "epoch": 111.9, "learning_rate": 5.079611650485437e-06, "loss": 0.009, "step": 288150 }, { "epoch": 111.91, "learning_rate": 5.079093851132687e-06, "loss": 0.0128, "step": 288160 }, { "epoch": 111.91, "learning_rate": 5.078576051779936e-06, "loss": 0.0684, "step": 288170 }, { "epoch": 111.91, "learning_rate": 5.078058252427185e-06, "loss": 0.0464, "step": 288180 }, { "epoch": 111.92, "learning_rate": 5.077540453074434e-06, "loss": 0.0218, "step": 288190 }, { "epoch": 111.92, "learning_rate": 5.077022653721684e-06, "loss": 0.1688, "step": 288200 }, { "epoch": 111.93, "learning_rate": 5.0765048543689324e-06, "loss": 0.1379, "step": 288210 }, { "epoch": 111.93, "learning_rate": 5.075987055016181e-06, "loss": 0.0335, "step": 288220 }, { "epoch": 111.93, "learning_rate": 5.075469255663431e-06, "loss": 0.0135, "step": 288230 }, { "epoch": 111.94, "learning_rate": 5.0749514563106805e-06, "loss": 0.0147, "step": 288240 }, { "epoch": 111.94, "learning_rate": 5.074433656957929e-06, "loss": 0.1257, "step": 288250 }, { "epoch": 111.95, "learning_rate": 5.073915857605178e-06, "loss": 0.0036, "step": 288260 }, { "epoch": 111.95, "learning_rate": 5.073398058252428e-06, "loss": 0.0188, "step": 288270 }, { "epoch": 111.95, "learning_rate": 5.072880258899677e-06, "loss": 0.0911, "step": 288280 }, { "epoch": 111.96, "learning_rate": 5.072362459546926e-06, "loss": 0.0074, "step": 288290 }, { "epoch": 111.96, "learning_rate": 5.071844660194175e-06, "loss": 0.0001, "step": 288300 }, { "epoch": 111.97, "learning_rate": 5.071326860841424e-06, "loss": 0.0101, "step": 288310 }, { "epoch": 111.97, "learning_rate": 5.070809061488674e-06, "loss": 0.0009, "step": 288320 }, { "epoch": 111.97, "learning_rate": 5.070291262135923e-06, "loss": 0.1474, "step": 288330 }, { "epoch": 111.98, "learning_rate": 5.0697734627831716e-06, "loss": 0.161, "step": 288340 }, { "epoch": 111.98, "learning_rate": 5.069255663430421e-06, "loss": 0.088, "step": 288350 }, { "epoch": 111.98, "learning_rate": 5.068737864077671e-06, "loss": 0.0103, "step": 288360 }, { "epoch": 111.99, "learning_rate": 5.068220064724919e-06, "loss": 0.1057, "step": 288370 }, { "epoch": 111.99, "learning_rate": 5.067702265372168e-06, "loss": 0.0452, "step": 288380 }, { "epoch": 112.0, "learning_rate": 5.067184466019418e-06, "loss": 0.1511, "step": 288390 }, { "epoch": 112.0, "learning_rate": 5.0666666666666676e-06, "loss": 0.001, "step": 288400 }, { "epoch": 112.0, "eval_accuracy": 0.9521320495185694, "eval_loss": 0.3811458349227905, "eval_runtime": 8.2582, "eval_samples_per_second": 440.168, "eval_steps_per_second": 55.097, "step": 288400 }, { "epoch": 112.0, "learning_rate": 5.0661488673139155e-06, "loss": 0.0133, "step": 288410 }, { "epoch": 112.01, "learning_rate": 5.065631067961165e-06, "loss": 0.0474, "step": 288420 }, { "epoch": 112.01, "learning_rate": 5.065113268608415e-06, "loss": 0.0331, "step": 288430 }, { "epoch": 112.02, "learning_rate": 5.064595469255664e-06, "loss": 0.0031, "step": 288440 }, { "epoch": 112.02, "learning_rate": 5.064077669902912e-06, "loss": 0.0658, "step": 288450 }, { "epoch": 112.02, "learning_rate": 5.063559870550162e-06, "loss": 0.05, "step": 288460 }, { "epoch": 112.03, "learning_rate": 5.0630420711974115e-06, "loss": 0.0369, "step": 288470 }, { "epoch": 112.03, "learning_rate": 5.062524271844661e-06, "loss": 0.0539, "step": 288480 }, { "epoch": 112.03, "learning_rate": 5.062006472491909e-06, "loss": 0.0221, "step": 288490 }, { "epoch": 112.04, "learning_rate": 5.061488673139159e-06, "loss": 0.0006, "step": 288500 }, { "epoch": 112.04, "learning_rate": 5.060970873786408e-06, "loss": 0.0273, "step": 288510 }, { "epoch": 112.05, "learning_rate": 5.060453074433658e-06, "loss": 0.0001, "step": 288520 }, { "epoch": 112.05, "learning_rate": 5.059935275080906e-06, "loss": 0.0098, "step": 288530 }, { "epoch": 112.05, "learning_rate": 5.0594174757281554e-06, "loss": 0.0208, "step": 288540 }, { "epoch": 112.06, "learning_rate": 5.058899676375405e-06, "loss": 0.0106, "step": 288550 }, { "epoch": 112.06, "learning_rate": 5.058381877022655e-06, "loss": 0.0851, "step": 288560 }, { "epoch": 112.07, "learning_rate": 5.057864077669903e-06, "loss": 0.0597, "step": 288570 }, { "epoch": 112.07, "learning_rate": 5.057346278317152e-06, "loss": 0.0583, "step": 288580 }, { "epoch": 112.07, "learning_rate": 5.056828478964402e-06, "loss": 0.0226, "step": 288590 }, { "epoch": 112.08, "learning_rate": 5.0563106796116515e-06, "loss": 0.0003, "step": 288600 }, { "epoch": 112.08, "learning_rate": 5.055792880258899e-06, "loss": 0.1237, "step": 288610 }, { "epoch": 112.09, "learning_rate": 5.055275080906149e-06, "loss": 0.025, "step": 288620 }, { "epoch": 112.09, "learning_rate": 5.054757281553399e-06, "loss": 0.0011, "step": 288630 }, { "epoch": 112.09, "learning_rate": 5.054239482200648e-06, "loss": 0.0162, "step": 288640 }, { "epoch": 112.1, "learning_rate": 5.053721682847896e-06, "loss": 0.0002, "step": 288650 }, { "epoch": 112.1, "learning_rate": 5.053203883495146e-06, "loss": 0.0625, "step": 288660 }, { "epoch": 112.1, "learning_rate": 5.052686084142395e-06, "loss": 0.0004, "step": 288670 }, { "epoch": 112.11, "learning_rate": 5.052168284789645e-06, "loss": 0.1508, "step": 288680 }, { "epoch": 112.11, "learning_rate": 5.051650485436893e-06, "loss": 0.0418, "step": 288690 }, { "epoch": 112.12, "learning_rate": 5.0511326860841426e-06, "loss": 0.0716, "step": 288700 }, { "epoch": 112.12, "learning_rate": 5.050614886731392e-06, "loss": 0.0956, "step": 288710 }, { "epoch": 112.12, "learning_rate": 5.050097087378642e-06, "loss": 0.1314, "step": 288720 }, { "epoch": 112.13, "learning_rate": 5.0495792880258906e-06, "loss": 0.0369, "step": 288730 }, { "epoch": 112.13, "learning_rate": 5.049061488673139e-06, "loss": 0.0462, "step": 288740 }, { "epoch": 112.14, "learning_rate": 5.048543689320389e-06, "loss": 0.1149, "step": 288750 }, { "epoch": 112.14, "learning_rate": 5.0480258899676386e-06, "loss": 0.0179, "step": 288760 }, { "epoch": 112.14, "learning_rate": 5.047508090614887e-06, "loss": 0.0203, "step": 288770 }, { "epoch": 112.15, "learning_rate": 5.046990291262136e-06, "loss": 0.0496, "step": 288780 }, { "epoch": 112.15, "learning_rate": 5.046472491909386e-06, "loss": 0.1111, "step": 288790 }, { "epoch": 112.16, "learning_rate": 5.045954692556635e-06, "loss": 0.0163, "step": 288800 }, { "epoch": 112.16, "learning_rate": 5.045436893203884e-06, "loss": 0.1297, "step": 288810 }, { "epoch": 112.16, "learning_rate": 5.044919093851133e-06, "loss": 0.0171, "step": 288820 }, { "epoch": 112.17, "learning_rate": 5.0444012944983825e-06, "loss": 0.0001, "step": 288830 }, { "epoch": 112.17, "learning_rate": 5.043883495145631e-06, "loss": 0.0234, "step": 288840 }, { "epoch": 112.17, "learning_rate": 5.043365695792881e-06, "loss": 0.0283, "step": 288850 }, { "epoch": 112.18, "learning_rate": 5.04284789644013e-06, "loss": 0.0002, "step": 288860 }, { "epoch": 112.18, "learning_rate": 5.042330097087379e-06, "loss": 0.0565, "step": 288870 }, { "epoch": 112.19, "learning_rate": 5.041812297734628e-06, "loss": 0.0042, "step": 288880 }, { "epoch": 112.19, "learning_rate": 5.041294498381878e-06, "loss": 0.0011, "step": 288890 }, { "epoch": 112.19, "learning_rate": 5.0407766990291264e-06, "loss": 0.0486, "step": 288900 }, { "epoch": 112.2, "learning_rate": 5.040258899676376e-06, "loss": 0.0274, "step": 288910 }, { "epoch": 112.2, "learning_rate": 5.039741100323625e-06, "loss": 0.0574, "step": 288920 }, { "epoch": 112.21, "learning_rate": 5.0392233009708745e-06, "loss": 0.0129, "step": 288930 }, { "epoch": 112.21, "learning_rate": 5.038705501618123e-06, "loss": 0.0167, "step": 288940 }, { "epoch": 112.21, "learning_rate": 5.038187702265373e-06, "loss": 0.177, "step": 288950 }, { "epoch": 112.22, "learning_rate": 5.037669902912622e-06, "loss": 0.0167, "step": 288960 }, { "epoch": 112.22, "learning_rate": 5.037152103559871e-06, "loss": 0.1575, "step": 288970 }, { "epoch": 112.23, "learning_rate": 5.03663430420712e-06, "loss": 0.1229, "step": 288980 }, { "epoch": 112.23, "learning_rate": 5.03611650485437e-06, "loss": 0.0435, "step": 288990 }, { "epoch": 112.23, "learning_rate": 5.035598705501618e-06, "loss": 0.0266, "step": 289000 }, { "epoch": 112.24, "learning_rate": 5.035080906148868e-06, "loss": 0.0106, "step": 289010 }, { "epoch": 112.24, "learning_rate": 5.034563106796117e-06, "loss": 0.1688, "step": 289020 }, { "epoch": 112.24, "learning_rate": 5.0340453074433656e-06, "loss": 0.047, "step": 289030 }, { "epoch": 112.25, "learning_rate": 5.033527508090615e-06, "loss": 0.0088, "step": 289040 }, { "epoch": 112.25, "learning_rate": 5.033009708737865e-06, "loss": 0.0492, "step": 289050 }, { "epoch": 112.26, "learning_rate": 5.0324919093851136e-06, "loss": 0.0422, "step": 289060 }, { "epoch": 112.26, "learning_rate": 5.031974110032362e-06, "loss": 0.0142, "step": 289070 }, { "epoch": 112.26, "learning_rate": 5.031456310679612e-06, "loss": 0.0991, "step": 289080 }, { "epoch": 112.27, "learning_rate": 5.0309385113268616e-06, "loss": 0.0016, "step": 289090 }, { "epoch": 112.27, "learning_rate": 5.03042071197411e-06, "loss": 0.0006, "step": 289100 }, { "epoch": 112.28, "learning_rate": 5.029902912621359e-06, "loss": 0.043, "step": 289110 }, { "epoch": 112.28, "learning_rate": 5.029385113268609e-06, "loss": 0.0085, "step": 289120 }, { "epoch": 112.28, "learning_rate": 5.028867313915858e-06, "loss": 0.0834, "step": 289130 }, { "epoch": 112.29, "learning_rate": 5.028349514563107e-06, "loss": 0.0003, "step": 289140 }, { "epoch": 112.29, "learning_rate": 5.027831715210356e-06, "loss": 0.0511, "step": 289150 }, { "epoch": 112.3, "learning_rate": 5.0273139158576055e-06, "loss": 0.0735, "step": 289160 }, { "epoch": 112.3, "learning_rate": 5.026796116504855e-06, "loss": 0.0235, "step": 289170 }, { "epoch": 112.3, "learning_rate": 5.026278317152103e-06, "loss": 0.0138, "step": 289180 }, { "epoch": 112.31, "learning_rate": 5.025760517799353e-06, "loss": 0.0788, "step": 289190 }, { "epoch": 112.31, "learning_rate": 5.025242718446602e-06, "loss": 0.1815, "step": 289200 }, { "epoch": 112.31, "learning_rate": 5.024724919093852e-06, "loss": 0.0007, "step": 289210 }, { "epoch": 112.32, "learning_rate": 5.0242071197411e-06, "loss": 0.0832, "step": 289220 }, { "epoch": 112.32, "learning_rate": 5.0236893203883494e-06, "loss": 0.0569, "step": 289230 }, { "epoch": 112.33, "learning_rate": 5.023171521035599e-06, "loss": 0.0447, "step": 289240 }, { "epoch": 112.33, "learning_rate": 5.022653721682849e-06, "loss": 0.0797, "step": 289250 }, { "epoch": 112.33, "learning_rate": 5.022135922330097e-06, "loss": 0.0095, "step": 289260 }, { "epoch": 112.34, "learning_rate": 5.021618122977346e-06, "loss": 0.0486, "step": 289270 }, { "epoch": 112.34, "learning_rate": 5.021100323624596e-06, "loss": 0.0581, "step": 289280 }, { "epoch": 112.35, "learning_rate": 5.0205825242718455e-06, "loss": 0.1283, "step": 289290 }, { "epoch": 112.35, "learning_rate": 5.020064724919095e-06, "loss": 0.1163, "step": 289300 }, { "epoch": 112.35, "learning_rate": 5.019546925566343e-06, "loss": 0.0301, "step": 289310 }, { "epoch": 112.36, "learning_rate": 5.019029126213593e-06, "loss": 0.027, "step": 289320 }, { "epoch": 112.36, "learning_rate": 5.018511326860842e-06, "loss": 0.0116, "step": 289330 }, { "epoch": 112.37, "learning_rate": 5.017993527508092e-06, "loss": 0.0491, "step": 289340 }, { "epoch": 112.37, "learning_rate": 5.01747572815534e-06, "loss": 0.0254, "step": 289350 }, { "epoch": 112.37, "learning_rate": 5.016957928802589e-06, "loss": 0.0761, "step": 289360 }, { "epoch": 112.38, "learning_rate": 5.016440129449839e-06, "loss": 0.0255, "step": 289370 }, { "epoch": 112.38, "learning_rate": 5.015922330097089e-06, "loss": 0.143, "step": 289380 }, { "epoch": 112.38, "learning_rate": 5.0154045307443366e-06, "loss": 0.0269, "step": 289390 }, { "epoch": 112.39, "learning_rate": 5.014886731391586e-06, "loss": 0.0312, "step": 289400 }, { "epoch": 112.39, "learning_rate": 5.014368932038836e-06, "loss": 0.0473, "step": 289410 }, { "epoch": 112.4, "learning_rate": 5.013851132686085e-06, "loss": 0.0605, "step": 289420 }, { "epoch": 112.4, "learning_rate": 5.013333333333333e-06, "loss": 0.0208, "step": 289430 }, { "epoch": 112.4, "learning_rate": 5.012815533980583e-06, "loss": 0.0552, "step": 289440 }, { "epoch": 112.41, "learning_rate": 5.0122977346278326e-06, "loss": 0.0078, "step": 289450 }, { "epoch": 112.41, "learning_rate": 5.011779935275082e-06, "loss": 0.1286, "step": 289460 }, { "epoch": 112.42, "learning_rate": 5.01126213592233e-06, "loss": 0.0431, "step": 289470 }, { "epoch": 112.42, "learning_rate": 5.01074433656958e-06, "loss": 0.1587, "step": 289480 }, { "epoch": 112.42, "learning_rate": 5.010226537216829e-06, "loss": 0.0003, "step": 289490 }, { "epoch": 112.43, "learning_rate": 5.009708737864078e-06, "loss": 0.0243, "step": 289500 }, { "epoch": 112.43, "learning_rate": 5.009190938511327e-06, "loss": 0.0016, "step": 289510 }, { "epoch": 112.43, "learning_rate": 5.0086731391585765e-06, "loss": 0.0003, "step": 289520 }, { "epoch": 112.44, "learning_rate": 5.008155339805826e-06, "loss": 0.0001, "step": 289530 }, { "epoch": 112.44, "learning_rate": 5.007637540453075e-06, "loss": 0.0003, "step": 289540 }, { "epoch": 112.45, "learning_rate": 5.007119741100324e-06, "loss": 0.0108, "step": 289550 }, { "epoch": 112.45, "learning_rate": 5.006601941747573e-06, "loss": 0.0445, "step": 289560 }, { "epoch": 112.45, "learning_rate": 5.006084142394823e-06, "loss": 0.0437, "step": 289570 }, { "epoch": 112.46, "learning_rate": 5.005566343042072e-06, "loss": 0.0261, "step": 289580 }, { "epoch": 112.46, "learning_rate": 5.0050485436893204e-06, "loss": 0.014, "step": 289590 }, { "epoch": 112.47, "learning_rate": 5.00453074433657e-06, "loss": 0.0008, "step": 289600 }, { "epoch": 112.47, "learning_rate": 5.00401294498382e-06, "loss": 0.0669, "step": 289610 }, { "epoch": 112.47, "learning_rate": 5.0034951456310685e-06, "loss": 0.0015, "step": 289620 }, { "epoch": 112.48, "learning_rate": 5.002977346278317e-06, "loss": 0.0097, "step": 289630 }, { "epoch": 112.48, "learning_rate": 5.002459546925567e-06, "loss": 0.1313, "step": 289640 }, { "epoch": 112.49, "learning_rate": 5.001941747572816e-06, "loss": 0.134, "step": 289650 }, { "epoch": 112.49, "learning_rate": 5.001423948220065e-06, "loss": 0.0003, "step": 289660 }, { "epoch": 112.49, "learning_rate": 5.000906148867314e-06, "loss": 0.0664, "step": 289670 }, { "epoch": 112.5, "learning_rate": 5.000388349514564e-06, "loss": 0.0808, "step": 289680 }, { "epoch": 112.5, "learning_rate": 4.999870550161812e-06, "loss": 0.0217, "step": 289690 }, { "epoch": 112.5, "learning_rate": 4.999352750809062e-06, "loss": 0.0001, "step": 289700 }, { "epoch": 112.51, "learning_rate": 4.998834951456311e-06, "loss": 0.0254, "step": 289710 }, { "epoch": 112.51, "learning_rate": 4.99831715210356e-06, "loss": 0.1837, "step": 289720 }, { "epoch": 112.52, "learning_rate": 4.997799352750809e-06, "loss": 0.0852, "step": 289730 }, { "epoch": 112.52, "learning_rate": 4.997281553398059e-06, "loss": 0.0987, "step": 289740 }, { "epoch": 112.52, "learning_rate": 4.9967637540453076e-06, "loss": 0.0481, "step": 289750 }, { "epoch": 112.53, "learning_rate": 4.996245954692557e-06, "loss": 0.1156, "step": 289760 }, { "epoch": 112.53, "learning_rate": 4.995728155339806e-06, "loss": 0.0036, "step": 289770 }, { "epoch": 112.54, "learning_rate": 4.9952103559870556e-06, "loss": 0.1142, "step": 289780 }, { "epoch": 112.54, "learning_rate": 4.994692556634304e-06, "loss": 0.0588, "step": 289790 }, { "epoch": 112.54, "learning_rate": 4.994174757281554e-06, "loss": 0.0034, "step": 289800 }, { "epoch": 112.55, "learning_rate": 4.993656957928803e-06, "loss": 0.0021, "step": 289810 }, { "epoch": 112.55, "learning_rate": 4.9931391585760515e-06, "loss": 0.029, "step": 289820 }, { "epoch": 112.56, "learning_rate": 4.992621359223301e-06, "loss": 0.0025, "step": 289830 }, { "epoch": 112.56, "learning_rate": 4.992103559870551e-06, "loss": 0.0407, "step": 289840 }, { "epoch": 112.56, "learning_rate": 4.9915857605177995e-06, "loss": 0.0873, "step": 289850 }, { "epoch": 112.57, "learning_rate": 4.991067961165049e-06, "loss": 0.0117, "step": 289860 }, { "epoch": 112.57, "learning_rate": 4.990550161812298e-06, "loss": 0.0817, "step": 289870 }, { "epoch": 112.57, "learning_rate": 4.9900323624595475e-06, "loss": 0.0181, "step": 289880 }, { "epoch": 112.58, "learning_rate": 4.989514563106796e-06, "loss": 0.0002, "step": 289890 }, { "epoch": 112.58, "learning_rate": 4.988996763754046e-06, "loss": 0.0002, "step": 289900 }, { "epoch": 112.59, "learning_rate": 4.988478964401295e-06, "loss": 0.0601, "step": 289910 }, { "epoch": 112.59, "learning_rate": 4.987961165048544e-06, "loss": 0.1102, "step": 289920 }, { "epoch": 112.59, "learning_rate": 4.987443365695793e-06, "loss": 0.0599, "step": 289930 }, { "epoch": 112.6, "learning_rate": 4.986925566343043e-06, "loss": 0.1024, "step": 289940 }, { "epoch": 112.6, "learning_rate": 4.9864077669902914e-06, "loss": 0.0087, "step": 289950 }, { "epoch": 112.61, "learning_rate": 4.985889967637541e-06, "loss": 0.1288, "step": 289960 }, { "epoch": 112.61, "learning_rate": 4.98537216828479e-06, "loss": 0.0879, "step": 289970 }, { "epoch": 112.61, "learning_rate": 4.9848543689320395e-06, "loss": 0.0006, "step": 289980 }, { "epoch": 112.62, "learning_rate": 4.984336569579288e-06, "loss": 0.0578, "step": 289990 }, { "epoch": 112.62, "learning_rate": 4.983818770226538e-06, "loss": 0.1399, "step": 290000 }, { "epoch": 112.63, "learning_rate": 4.983300970873787e-06, "loss": 0.0993, "step": 290010 }, { "epoch": 112.63, "learning_rate": 4.982783171521036e-06, "loss": 0.0517, "step": 290020 }, { "epoch": 112.63, "learning_rate": 4.982265372168285e-06, "loss": 0.0197, "step": 290030 }, { "epoch": 112.64, "learning_rate": 4.981747572815535e-06, "loss": 0.0741, "step": 290040 }, { "epoch": 112.64, "learning_rate": 4.981229773462783e-06, "loss": 0.0003, "step": 290050 }, { "epoch": 112.64, "learning_rate": 4.980711974110033e-06, "loss": 0.0042, "step": 290060 }, { "epoch": 112.65, "learning_rate": 4.980194174757282e-06, "loss": 0.0589, "step": 290070 }, { "epoch": 112.65, "learning_rate": 4.979676375404531e-06, "loss": 0.0051, "step": 290080 }, { "epoch": 112.66, "learning_rate": 4.97915857605178e-06, "loss": 0.0536, "step": 290090 }, { "epoch": 112.66, "learning_rate": 4.97864077669903e-06, "loss": 0.0116, "step": 290100 }, { "epoch": 112.66, "learning_rate": 4.9781229773462786e-06, "loss": 0.0012, "step": 290110 }, { "epoch": 112.67, "learning_rate": 4.977605177993528e-06, "loss": 0.0978, "step": 290120 }, { "epoch": 112.67, "learning_rate": 4.977087378640777e-06, "loss": 0.0147, "step": 290130 }, { "epoch": 112.68, "learning_rate": 4.9765695792880266e-06, "loss": 0.1607, "step": 290140 }, { "epoch": 112.68, "learning_rate": 4.976051779935275e-06, "loss": 0.0756, "step": 290150 }, { "epoch": 112.68, "learning_rate": 4.975533980582525e-06, "loss": 0.0322, "step": 290160 }, { "epoch": 112.69, "learning_rate": 4.975016181229774e-06, "loss": 0.1384, "step": 290170 }, { "epoch": 112.69, "learning_rate": 4.974498381877023e-06, "loss": 0.087, "step": 290180 }, { "epoch": 112.7, "learning_rate": 4.973980582524272e-06, "loss": 0.0851, "step": 290190 }, { "epoch": 112.7, "learning_rate": 4.973462783171522e-06, "loss": 0.0231, "step": 290200 }, { "epoch": 112.7, "learning_rate": 4.9729449838187705e-06, "loss": 0.0818, "step": 290210 }, { "epoch": 112.71, "learning_rate": 4.97242718446602e-06, "loss": 0.0062, "step": 290220 }, { "epoch": 112.71, "learning_rate": 4.971909385113269e-06, "loss": 0.0266, "step": 290230 }, { "epoch": 112.71, "learning_rate": 4.9713915857605185e-06, "loss": 0.0014, "step": 290240 }, { "epoch": 112.72, "learning_rate": 4.970873786407767e-06, "loss": 0.0347, "step": 290250 }, { "epoch": 112.72, "learning_rate": 4.970355987055017e-06, "loss": 0.1288, "step": 290260 }, { "epoch": 112.73, "learning_rate": 4.969838187702266e-06, "loss": 0.0304, "step": 290270 }, { "epoch": 112.73, "learning_rate": 4.969320388349515e-06, "loss": 0.0325, "step": 290280 }, { "epoch": 112.73, "learning_rate": 4.968802588996764e-06, "loss": 0.1261, "step": 290290 }, { "epoch": 112.74, "learning_rate": 4.968284789644014e-06, "loss": 0.0967, "step": 290300 }, { "epoch": 112.74, "learning_rate": 4.9677669902912625e-06, "loss": 0.1161, "step": 290310 }, { "epoch": 112.75, "learning_rate": 4.967249190938512e-06, "loss": 0.0003, "step": 290320 }, { "epoch": 112.75, "learning_rate": 4.966731391585761e-06, "loss": 0.0001, "step": 290330 }, { "epoch": 112.75, "learning_rate": 4.9662135922330105e-06, "loss": 0.0284, "step": 290340 }, { "epoch": 112.76, "learning_rate": 4.965695792880259e-06, "loss": 0.1949, "step": 290350 }, { "epoch": 112.76, "learning_rate": 4.965177993527509e-06, "loss": 0.0066, "step": 290360 }, { "epoch": 112.77, "learning_rate": 4.964660194174758e-06, "loss": 0.0008, "step": 290370 }, { "epoch": 112.77, "learning_rate": 4.964142394822007e-06, "loss": 0.0578, "step": 290380 }, { "epoch": 112.77, "learning_rate": 4.963624595469256e-06, "loss": 0.1878, "step": 290390 }, { "epoch": 112.78, "learning_rate": 4.963106796116506e-06, "loss": 0.0329, "step": 290400 }, { "epoch": 112.78, "learning_rate": 4.962588996763754e-06, "loss": 0.0417, "step": 290410 }, { "epoch": 112.78, "learning_rate": 4.962071197411004e-06, "loss": 0.1084, "step": 290420 }, { "epoch": 112.79, "learning_rate": 4.961553398058253e-06, "loss": 0.0568, "step": 290430 }, { "epoch": 112.79, "learning_rate": 4.961035598705502e-06, "loss": 0.0232, "step": 290440 }, { "epoch": 112.8, "learning_rate": 4.960517799352751e-06, "loss": 0.0067, "step": 290450 }, { "epoch": 112.8, "learning_rate": 4.960000000000001e-06, "loss": 0.012, "step": 290460 }, { "epoch": 112.8, "learning_rate": 4.9594822006472496e-06, "loss": 0.0934, "step": 290470 }, { "epoch": 112.81, "learning_rate": 4.958964401294498e-06, "loss": 0.0194, "step": 290480 }, { "epoch": 112.81, "learning_rate": 4.958446601941748e-06, "loss": 0.0323, "step": 290490 }, { "epoch": 112.82, "learning_rate": 4.957928802588997e-06, "loss": 0.0228, "step": 290500 }, { "epoch": 112.82, "learning_rate": 4.957411003236246e-06, "loss": 0.0021, "step": 290510 }, { "epoch": 112.82, "learning_rate": 4.956893203883495e-06, "loss": 0.0508, "step": 290520 }, { "epoch": 112.83, "learning_rate": 4.956375404530745e-06, "loss": 0.05, "step": 290530 }, { "epoch": 112.83, "learning_rate": 4.9558576051779935e-06, "loss": 0.0445, "step": 290540 }, { "epoch": 112.83, "learning_rate": 4.955339805825243e-06, "loss": 0.1436, "step": 290550 }, { "epoch": 112.84, "learning_rate": 4.954822006472492e-06, "loss": 0.018, "step": 290560 }, { "epoch": 112.84, "learning_rate": 4.9543042071197415e-06, "loss": 0.0531, "step": 290570 }, { "epoch": 112.85, "learning_rate": 4.95378640776699e-06, "loss": 0.07, "step": 290580 }, { "epoch": 112.85, "learning_rate": 4.95326860841424e-06, "loss": 0.1176, "step": 290590 }, { "epoch": 112.85, "learning_rate": 4.952750809061489e-06, "loss": 0.0215, "step": 290600 }, { "epoch": 112.86, "learning_rate": 4.952233009708738e-06, "loss": 0.0001, "step": 290610 }, { "epoch": 112.86, "learning_rate": 4.951715210355987e-06, "loss": 0.0799, "step": 290620 }, { "epoch": 112.87, "learning_rate": 4.951197411003237e-06, "loss": 0.1248, "step": 290630 }, { "epoch": 112.87, "learning_rate": 4.9506796116504854e-06, "loss": 0.0028, "step": 290640 }, { "epoch": 112.87, "learning_rate": 4.950161812297735e-06, "loss": 0.0815, "step": 290650 }, { "epoch": 112.88, "learning_rate": 4.949644012944984e-06, "loss": 0.0527, "step": 290660 }, { "epoch": 112.88, "learning_rate": 4.9491262135922335e-06, "loss": 0.0317, "step": 290670 }, { "epoch": 112.89, "learning_rate": 4.948608414239482e-06, "loss": 0.0274, "step": 290680 }, { "epoch": 112.89, "learning_rate": 4.948090614886732e-06, "loss": 0.039, "step": 290690 }, { "epoch": 112.89, "learning_rate": 4.947572815533981e-06, "loss": 0.0859, "step": 290700 }, { "epoch": 112.9, "learning_rate": 4.94705501618123e-06, "loss": 0.0363, "step": 290710 }, { "epoch": 112.9, "learning_rate": 4.946537216828479e-06, "loss": 0.0733, "step": 290720 }, { "epoch": 112.9, "learning_rate": 4.946019417475729e-06, "loss": 0.0081, "step": 290730 }, { "epoch": 112.91, "learning_rate": 4.945501618122977e-06, "loss": 0.0224, "step": 290740 }, { "epoch": 112.91, "learning_rate": 4.944983818770227e-06, "loss": 0.0639, "step": 290750 }, { "epoch": 112.92, "learning_rate": 4.944466019417476e-06, "loss": 0.0366, "step": 290760 }, { "epoch": 112.92, "learning_rate": 4.943948220064725e-06, "loss": 0.0395, "step": 290770 }, { "epoch": 112.92, "learning_rate": 4.943430420711974e-06, "loss": 0.0396, "step": 290780 }, { "epoch": 112.93, "learning_rate": 4.942912621359224e-06, "loss": 0.0074, "step": 290790 }, { "epoch": 112.93, "learning_rate": 4.9423948220064726e-06, "loss": 0.0118, "step": 290800 }, { "epoch": 112.94, "learning_rate": 4.941877022653722e-06, "loss": 0.1131, "step": 290810 }, { "epoch": 112.94, "learning_rate": 4.941359223300971e-06, "loss": 0.0755, "step": 290820 }, { "epoch": 112.94, "learning_rate": 4.9408414239482206e-06, "loss": 0.0124, "step": 290830 }, { "epoch": 112.95, "learning_rate": 4.940323624595469e-06, "loss": 0.0125, "step": 290840 }, { "epoch": 112.95, "learning_rate": 4.939805825242719e-06, "loss": 0.014, "step": 290850 }, { "epoch": 112.96, "learning_rate": 4.939288025889968e-06, "loss": 0.0922, "step": 290860 }, { "epoch": 112.96, "learning_rate": 4.938770226537217e-06, "loss": 0.0247, "step": 290870 }, { "epoch": 112.96, "learning_rate": 4.938252427184466e-06, "loss": 0.0792, "step": 290880 }, { "epoch": 112.97, "learning_rate": 4.937734627831716e-06, "loss": 0.0608, "step": 290890 }, { "epoch": 112.97, "learning_rate": 4.9372168284789645e-06, "loss": 0.0791, "step": 290900 }, { "epoch": 112.97, "learning_rate": 4.936699029126214e-06, "loss": 0.0161, "step": 290910 }, { "epoch": 112.98, "learning_rate": 4.936181229773463e-06, "loss": 0.0962, "step": 290920 }, { "epoch": 112.98, "learning_rate": 4.9356634304207125e-06, "loss": 0.0445, "step": 290930 }, { "epoch": 112.99, "learning_rate": 4.935145631067961e-06, "loss": 0.0005, "step": 290940 }, { "epoch": 112.99, "learning_rate": 4.934627831715211e-06, "loss": 0.0416, "step": 290950 }, { "epoch": 112.99, "learning_rate": 4.9341100323624605e-06, "loss": 0.0347, "step": 290960 }, { "epoch": 113.0, "learning_rate": 4.933592233009709e-06, "loss": 0.0282, "step": 290970 }, { "epoch": 113.0, "eval_accuracy": 0.951031636863824, "eval_loss": 0.38432401418685913, "eval_runtime": 8.2159, "eval_samples_per_second": 442.437, "eval_steps_per_second": 55.381, "step": 290975 }, { "epoch": 113.0, "learning_rate": 4.933074433656959e-06, "loss": 0.0301, "step": 290980 }, { "epoch": 113.01, "learning_rate": 4.932556634304208e-06, "loss": 0.1055, "step": 290990 }, { "epoch": 113.01, "learning_rate": 4.932038834951457e-06, "loss": 0.0006, "step": 291000 }, { "epoch": 113.01, "learning_rate": 4.931521035598706e-06, "loss": 0.0918, "step": 291010 }, { "epoch": 113.02, "learning_rate": 4.931003236245956e-06, "loss": 0.1132, "step": 291020 }, { "epoch": 113.02, "learning_rate": 4.9304854368932045e-06, "loss": 0.0033, "step": 291030 }, { "epoch": 113.03, "learning_rate": 4.929967637540454e-06, "loss": 0.0304, "step": 291040 }, { "epoch": 113.03, "learning_rate": 4.929449838187703e-06, "loss": 0.081, "step": 291050 }, { "epoch": 113.03, "learning_rate": 4.9289320388349525e-06, "loss": 0.2264, "step": 291060 }, { "epoch": 113.04, "learning_rate": 4.928414239482201e-06, "loss": 0.0445, "step": 291070 }, { "epoch": 113.04, "learning_rate": 4.927896440129451e-06, "loss": 0.0697, "step": 291080 }, { "epoch": 113.04, "learning_rate": 4.9273786407767e-06, "loss": 0.0417, "step": 291090 }, { "epoch": 113.05, "learning_rate": 4.926860841423949e-06, "loss": 0.0043, "step": 291100 }, { "epoch": 113.05, "learning_rate": 4.926343042071198e-06, "loss": 0.0163, "step": 291110 }, { "epoch": 113.06, "learning_rate": 4.925825242718447e-06, "loss": 0.0691, "step": 291120 }, { "epoch": 113.06, "learning_rate": 4.925307443365696e-06, "loss": 0.0034, "step": 291130 }, { "epoch": 113.06, "learning_rate": 4.924789644012945e-06, "loss": 0.1025, "step": 291140 }, { "epoch": 113.07, "learning_rate": 4.924271844660195e-06, "loss": 0.0243, "step": 291150 }, { "epoch": 113.07, "learning_rate": 4.9237540453074436e-06, "loss": 0.1543, "step": 291160 }, { "epoch": 113.08, "learning_rate": 4.923236245954693e-06, "loss": 0.0021, "step": 291170 }, { "epoch": 113.08, "learning_rate": 4.922718446601942e-06, "loss": 0.0066, "step": 291180 }, { "epoch": 113.08, "learning_rate": 4.9222006472491916e-06, "loss": 0.0645, "step": 291190 }, { "epoch": 113.09, "learning_rate": 4.92168284789644e-06, "loss": 0.011, "step": 291200 }, { "epoch": 113.09, "learning_rate": 4.92116504854369e-06, "loss": 0.0248, "step": 291210 }, { "epoch": 113.1, "learning_rate": 4.920647249190939e-06, "loss": 0.1875, "step": 291220 }, { "epoch": 113.1, "learning_rate": 4.920129449838188e-06, "loss": 0.0269, "step": 291230 }, { "epoch": 113.1, "learning_rate": 4.919611650485437e-06, "loss": 0.0808, "step": 291240 }, { "epoch": 113.11, "learning_rate": 4.919093851132687e-06, "loss": 0.0259, "step": 291250 }, { "epoch": 113.11, "learning_rate": 4.9185760517799355e-06, "loss": 0.0738, "step": 291260 }, { "epoch": 113.11, "learning_rate": 4.918058252427185e-06, "loss": 0.0614, "step": 291270 }, { "epoch": 113.12, "learning_rate": 4.917540453074434e-06, "loss": 0.0081, "step": 291280 }, { "epoch": 113.12, "learning_rate": 4.917022653721683e-06, "loss": 0.0004, "step": 291290 }, { "epoch": 113.13, "learning_rate": 4.916504854368932e-06, "loss": 0.05, "step": 291300 }, { "epoch": 113.13, "learning_rate": 4.915987055016181e-06, "loss": 0.0805, "step": 291310 }, { "epoch": 113.13, "learning_rate": 4.915469255663431e-06, "loss": 0.0421, "step": 291320 }, { "epoch": 113.14, "learning_rate": 4.9149514563106794e-06, "loss": 0.0459, "step": 291330 }, { "epoch": 113.14, "learning_rate": 4.914433656957929e-06, "loss": 0.1232, "step": 291340 }, { "epoch": 113.15, "learning_rate": 4.913915857605178e-06, "loss": 0.0513, "step": 291350 }, { "epoch": 113.15, "learning_rate": 4.9133980582524275e-06, "loss": 0.0379, "step": 291360 }, { "epoch": 113.15, "learning_rate": 4.912880258899676e-06, "loss": 0.0222, "step": 291370 }, { "epoch": 113.16, "learning_rate": 4.912362459546926e-06, "loss": 0.0359, "step": 291380 }, { "epoch": 113.16, "learning_rate": 4.911844660194175e-06, "loss": 0.0156, "step": 291390 }, { "epoch": 113.17, "learning_rate": 4.911326860841424e-06, "loss": 0.0007, "step": 291400 }, { "epoch": 113.17, "learning_rate": 4.910809061488673e-06, "loss": 0.0118, "step": 291410 }, { "epoch": 113.17, "learning_rate": 4.910291262135923e-06, "loss": 0.001, "step": 291420 }, { "epoch": 113.18, "learning_rate": 4.909773462783171e-06, "loss": 0.0352, "step": 291430 }, { "epoch": 113.18, "learning_rate": 4.909255663430421e-06, "loss": 0.0461, "step": 291440 }, { "epoch": 113.18, "learning_rate": 4.90873786407767e-06, "loss": 0.0259, "step": 291450 }, { "epoch": 113.19, "learning_rate": 4.908220064724919e-06, "loss": 0.0162, "step": 291460 }, { "epoch": 113.19, "learning_rate": 4.907702265372168e-06, "loss": 0.0976, "step": 291470 }, { "epoch": 113.2, "learning_rate": 4.907184466019418e-06, "loss": 0.0192, "step": 291480 }, { "epoch": 113.2, "learning_rate": 4.9066666666666666e-06, "loss": 0.0177, "step": 291490 }, { "epoch": 113.2, "learning_rate": 4.906148867313916e-06, "loss": 0.114, "step": 291500 }, { "epoch": 113.21, "learning_rate": 4.905631067961166e-06, "loss": 0.0179, "step": 291510 }, { "epoch": 113.21, "learning_rate": 4.9051132686084146e-06, "loss": 0.0141, "step": 291520 }, { "epoch": 113.22, "learning_rate": 4.904595469255664e-06, "loss": 0.0362, "step": 291530 }, { "epoch": 113.22, "learning_rate": 4.904077669902913e-06, "loss": 0.1245, "step": 291540 }, { "epoch": 113.22, "learning_rate": 4.9035598705501626e-06, "loss": 0.0002, "step": 291550 }, { "epoch": 113.23, "learning_rate": 4.903042071197411e-06, "loss": 0.0173, "step": 291560 }, { "epoch": 113.23, "learning_rate": 4.902524271844661e-06, "loss": 0.015, "step": 291570 }, { "epoch": 113.23, "learning_rate": 4.90200647249191e-06, "loss": 0.0114, "step": 291580 }, { "epoch": 113.24, "learning_rate": 4.901488673139159e-06, "loss": 0.0218, "step": 291590 }, { "epoch": 113.24, "learning_rate": 4.900970873786408e-06, "loss": 0.0612, "step": 291600 }, { "epoch": 113.25, "learning_rate": 4.900453074433658e-06, "loss": 0.0798, "step": 291610 }, { "epoch": 113.25, "learning_rate": 4.8999352750809065e-06, "loss": 0.031, "step": 291620 }, { "epoch": 113.25, "learning_rate": 4.899417475728156e-06, "loss": 0.0233, "step": 291630 }, { "epoch": 113.26, "learning_rate": 4.898899676375405e-06, "loss": 0.0527, "step": 291640 }, { "epoch": 113.26, "learning_rate": 4.8983818770226545e-06, "loss": 0.0002, "step": 291650 }, { "epoch": 113.27, "learning_rate": 4.897864077669903e-06, "loss": 0.0926, "step": 291660 }, { "epoch": 113.27, "learning_rate": 4.897346278317153e-06, "loss": 0.0375, "step": 291670 }, { "epoch": 113.27, "learning_rate": 4.896828478964402e-06, "loss": 0.0468, "step": 291680 }, { "epoch": 113.28, "learning_rate": 4.896310679611651e-06, "loss": 0.0174, "step": 291690 }, { "epoch": 113.28, "learning_rate": 4.8957928802589e-06, "loss": 0.0627, "step": 291700 }, { "epoch": 113.29, "learning_rate": 4.89527508090615e-06, "loss": 0.1028, "step": 291710 }, { "epoch": 113.29, "learning_rate": 4.8947572815533985e-06, "loss": 0.0792, "step": 291720 }, { "epoch": 113.29, "learning_rate": 4.894239482200648e-06, "loss": 0.0078, "step": 291730 }, { "epoch": 113.3, "learning_rate": 4.893721682847897e-06, "loss": 0.0782, "step": 291740 }, { "epoch": 113.3, "learning_rate": 4.8932038834951465e-06, "loss": 0.0984, "step": 291750 }, { "epoch": 113.3, "learning_rate": 4.892686084142395e-06, "loss": 0.0111, "step": 291760 }, { "epoch": 113.31, "learning_rate": 4.892168284789645e-06, "loss": 0.0317, "step": 291770 }, { "epoch": 113.31, "learning_rate": 4.891650485436894e-06, "loss": 0.0317, "step": 291780 }, { "epoch": 113.32, "learning_rate": 4.891132686084143e-06, "loss": 0.0744, "step": 291790 }, { "epoch": 113.32, "learning_rate": 4.890614886731392e-06, "loss": 0.1261, "step": 291800 }, { "epoch": 113.32, "learning_rate": 4.890097087378642e-06, "loss": 0.0529, "step": 291810 }, { "epoch": 113.33, "learning_rate": 4.88957928802589e-06, "loss": 0.0414, "step": 291820 }, { "epoch": 113.33, "learning_rate": 4.88906148867314e-06, "loss": 0.0337, "step": 291830 }, { "epoch": 113.34, "learning_rate": 4.888543689320389e-06, "loss": 0.026, "step": 291840 }, { "epoch": 113.34, "learning_rate": 4.888025889967638e-06, "loss": 0.061, "step": 291850 }, { "epoch": 113.34, "learning_rate": 4.887508090614887e-06, "loss": 0.0325, "step": 291860 }, { "epoch": 113.35, "learning_rate": 4.886990291262137e-06, "loss": 0.0115, "step": 291870 }, { "epoch": 113.35, "learning_rate": 4.8864724919093856e-06, "loss": 0.1056, "step": 291880 }, { "epoch": 113.36, "learning_rate": 4.885954692556635e-06, "loss": 0.1041, "step": 291890 }, { "epoch": 113.36, "learning_rate": 4.885436893203884e-06, "loss": 0.0007, "step": 291900 }, { "epoch": 113.36, "learning_rate": 4.8849190938511336e-06, "loss": 0.0079, "step": 291910 }, { "epoch": 113.37, "learning_rate": 4.884401294498382e-06, "loss": 0.2211, "step": 291920 }, { "epoch": 113.37, "learning_rate": 4.883883495145631e-06, "loss": 0.0007, "step": 291930 }, { "epoch": 113.37, "learning_rate": 4.883365695792881e-06, "loss": 0.0087, "step": 291940 }, { "epoch": 113.38, "learning_rate": 4.8828478964401295e-06, "loss": 0.029, "step": 291950 }, { "epoch": 113.38, "learning_rate": 4.882330097087379e-06, "loss": 0.0925, "step": 291960 }, { "epoch": 113.39, "learning_rate": 4.881812297734628e-06, "loss": 0.0002, "step": 291970 }, { "epoch": 113.39, "learning_rate": 4.8812944983818775e-06, "loss": 0.0784, "step": 291980 }, { "epoch": 113.39, "learning_rate": 4.880776699029126e-06, "loss": 0.0496, "step": 291990 }, { "epoch": 113.4, "learning_rate": 4.880258899676376e-06, "loss": 0.0505, "step": 292000 }, { "epoch": 113.4, "learning_rate": 4.879741100323625e-06, "loss": 0.051, "step": 292010 }, { "epoch": 113.41, "learning_rate": 4.879223300970874e-06, "loss": 0.0275, "step": 292020 }, { "epoch": 113.41, "learning_rate": 4.878705501618123e-06, "loss": 0.0779, "step": 292030 }, { "epoch": 113.41, "learning_rate": 4.878187702265373e-06, "loss": 0.0763, "step": 292040 }, { "epoch": 113.42, "learning_rate": 4.8776699029126215e-06, "loss": 0.0238, "step": 292050 }, { "epoch": 113.42, "learning_rate": 4.877152103559871e-06, "loss": 0.0159, "step": 292060 }, { "epoch": 113.43, "learning_rate": 4.87663430420712e-06, "loss": 0.087, "step": 292070 }, { "epoch": 113.43, "learning_rate": 4.8761165048543695e-06, "loss": 0.0351, "step": 292080 }, { "epoch": 113.43, "learning_rate": 4.875598705501618e-06, "loss": 0.0097, "step": 292090 }, { "epoch": 113.44, "learning_rate": 4.875080906148868e-06, "loss": 0.0544, "step": 292100 }, { "epoch": 113.44, "learning_rate": 4.874563106796117e-06, "loss": 0.1541, "step": 292110 }, { "epoch": 113.44, "learning_rate": 4.874045307443366e-06, "loss": 0.0871, "step": 292120 }, { "epoch": 113.45, "learning_rate": 4.873527508090615e-06, "loss": 0.1023, "step": 292130 }, { "epoch": 113.45, "learning_rate": 4.873009708737865e-06, "loss": 0.1073, "step": 292140 }, { "epoch": 113.46, "learning_rate": 4.872491909385113e-06, "loss": 0.0174, "step": 292150 }, { "epoch": 113.46, "learning_rate": 4.871974110032363e-06, "loss": 0.0507, "step": 292160 }, { "epoch": 113.46, "learning_rate": 4.871456310679612e-06, "loss": 0.0056, "step": 292170 }, { "epoch": 113.47, "learning_rate": 4.870938511326861e-06, "loss": 0.0645, "step": 292180 }, { "epoch": 113.47, "learning_rate": 4.87042071197411e-06, "loss": 0.0494, "step": 292190 }, { "epoch": 113.48, "learning_rate": 4.86990291262136e-06, "loss": 0.023, "step": 292200 }, { "epoch": 113.48, "learning_rate": 4.8693851132686086e-06, "loss": 0.056, "step": 292210 }, { "epoch": 113.48, "learning_rate": 4.868867313915858e-06, "loss": 0.0168, "step": 292220 }, { "epoch": 113.49, "learning_rate": 4.868349514563107e-06, "loss": 0.0833, "step": 292230 }, { "epoch": 113.49, "learning_rate": 4.8678317152103566e-06, "loss": 0.0872, "step": 292240 }, { "epoch": 113.5, "learning_rate": 4.867313915857605e-06, "loss": 0.0181, "step": 292250 }, { "epoch": 113.5, "learning_rate": 4.866796116504855e-06, "loss": 0.0787, "step": 292260 }, { "epoch": 113.5, "learning_rate": 4.866278317152104e-06, "loss": 0.0094, "step": 292270 }, { "epoch": 113.51, "learning_rate": 4.865760517799353e-06, "loss": 0.0276, "step": 292280 }, { "epoch": 113.51, "learning_rate": 4.865242718446602e-06, "loss": 0.0616, "step": 292290 }, { "epoch": 113.51, "learning_rate": 4.864724919093852e-06, "loss": 0.0225, "step": 292300 }, { "epoch": 113.52, "learning_rate": 4.8642071197411005e-06, "loss": 0.1236, "step": 292310 }, { "epoch": 113.52, "learning_rate": 4.86368932038835e-06, "loss": 0.0044, "step": 292320 }, { "epoch": 113.53, "learning_rate": 4.863171521035599e-06, "loss": 0.0136, "step": 292330 }, { "epoch": 113.53, "learning_rate": 4.8626537216828485e-06, "loss": 0.059, "step": 292340 }, { "epoch": 113.53, "learning_rate": 4.862135922330097e-06, "loss": 0.0137, "step": 292350 }, { "epoch": 113.54, "learning_rate": 4.861618122977347e-06, "loss": 0.0286, "step": 292360 }, { "epoch": 113.54, "learning_rate": 4.861100323624596e-06, "loss": 0.1999, "step": 292370 }, { "epoch": 113.55, "learning_rate": 4.860582524271845e-06, "loss": 0.1766, "step": 292380 }, { "epoch": 113.55, "learning_rate": 4.860064724919094e-06, "loss": 0.0995, "step": 292390 }, { "epoch": 113.55, "learning_rate": 4.859546925566344e-06, "loss": 0.0136, "step": 292400 }, { "epoch": 113.56, "learning_rate": 4.8590291262135925e-06, "loss": 0.0839, "step": 292410 }, { "epoch": 113.56, "learning_rate": 4.858511326860842e-06, "loss": 0.0009, "step": 292420 }, { "epoch": 113.57, "learning_rate": 4.857993527508091e-06, "loss": 0.1132, "step": 292430 }, { "epoch": 113.57, "learning_rate": 4.8574757281553405e-06, "loss": 0.0565, "step": 292440 }, { "epoch": 113.57, "learning_rate": 4.856957928802589e-06, "loss": 0.0077, "step": 292450 }, { "epoch": 113.58, "learning_rate": 4.856440129449839e-06, "loss": 0.0229, "step": 292460 }, { "epoch": 113.58, "learning_rate": 4.855922330097088e-06, "loss": 0.0278, "step": 292470 }, { "epoch": 113.58, "learning_rate": 4.855404530744337e-06, "loss": 0.0001, "step": 292480 }, { "epoch": 113.59, "learning_rate": 4.854886731391586e-06, "loss": 0.0266, "step": 292490 }, { "epoch": 113.59, "learning_rate": 4.854368932038836e-06, "loss": 0.0178, "step": 292500 }, { "epoch": 113.6, "learning_rate": 4.853851132686084e-06, "loss": 0.1188, "step": 292510 }, { "epoch": 113.6, "learning_rate": 4.853333333333334e-06, "loss": 0.0654, "step": 292520 }, { "epoch": 113.6, "learning_rate": 4.852815533980583e-06, "loss": 0.1424, "step": 292530 }, { "epoch": 113.61, "learning_rate": 4.852297734627832e-06, "loss": 0.0099, "step": 292540 }, { "epoch": 113.61, "learning_rate": 4.851779935275081e-06, "loss": 0.0482, "step": 292550 }, { "epoch": 113.62, "learning_rate": 4.851262135922331e-06, "loss": 0.0162, "step": 292560 }, { "epoch": 113.62, "learning_rate": 4.8507443365695796e-06, "loss": 0.0725, "step": 292570 }, { "epoch": 113.62, "learning_rate": 4.850226537216829e-06, "loss": 0.0894, "step": 292580 }, { "epoch": 113.63, "learning_rate": 4.849708737864078e-06, "loss": 0.0584, "step": 292590 }, { "epoch": 113.63, "learning_rate": 4.8491909385113276e-06, "loss": 0.0537, "step": 292600 }, { "epoch": 113.63, "learning_rate": 4.848673139158576e-06, "loss": 0.0017, "step": 292610 }, { "epoch": 113.64, "learning_rate": 4.848155339805826e-06, "loss": 0.035, "step": 292620 }, { "epoch": 113.64, "learning_rate": 4.847637540453075e-06, "loss": 0.004, "step": 292630 }, { "epoch": 113.65, "learning_rate": 4.847119741100324e-06, "loss": 0.1286, "step": 292640 }, { "epoch": 113.65, "learning_rate": 4.846601941747573e-06, "loss": 0.1134, "step": 292650 }, { "epoch": 113.65, "learning_rate": 4.846084142394823e-06, "loss": 0.0107, "step": 292660 }, { "epoch": 113.66, "learning_rate": 4.8455663430420715e-06, "loss": 0.0265, "step": 292670 }, { "epoch": 113.66, "learning_rate": 4.845048543689321e-06, "loss": 0.0517, "step": 292680 }, { "epoch": 113.67, "learning_rate": 4.84453074433657e-06, "loss": 0.0934, "step": 292690 }, { "epoch": 113.67, "learning_rate": 4.8440129449838195e-06, "loss": 0.2096, "step": 292700 }, { "epoch": 113.67, "learning_rate": 4.843495145631068e-06, "loss": 0.1428, "step": 292710 }, { "epoch": 113.68, "learning_rate": 4.842977346278318e-06, "loss": 0.0763, "step": 292720 }, { "epoch": 113.68, "learning_rate": 4.842459546925567e-06, "loss": 0.0886, "step": 292730 }, { "epoch": 113.69, "learning_rate": 4.841941747572816e-06, "loss": 0.0816, "step": 292740 }, { "epoch": 113.69, "learning_rate": 4.841423948220065e-06, "loss": 0.0016, "step": 292750 }, { "epoch": 113.69, "learning_rate": 4.840906148867314e-06, "loss": 0.0567, "step": 292760 }, { "epoch": 113.7, "learning_rate": 4.8403883495145635e-06, "loss": 0.0002, "step": 292770 }, { "epoch": 113.7, "learning_rate": 4.839870550161812e-06, "loss": 0.0617, "step": 292780 }, { "epoch": 113.7, "learning_rate": 4.839352750809062e-06, "loss": 0.0778, "step": 292790 }, { "epoch": 113.71, "learning_rate": 4.838834951456311e-06, "loss": 0.0559, "step": 292800 }, { "epoch": 113.71, "learning_rate": 4.83831715210356e-06, "loss": 0.003, "step": 292810 }, { "epoch": 113.72, "learning_rate": 4.837799352750809e-06, "loss": 0.0805, "step": 292820 }, { "epoch": 113.72, "learning_rate": 4.837281553398059e-06, "loss": 0.1329, "step": 292830 }, { "epoch": 113.72, "learning_rate": 4.836763754045307e-06, "loss": 0.0389, "step": 292840 }, { "epoch": 113.73, "learning_rate": 4.836245954692557e-06, "loss": 0.0955, "step": 292850 }, { "epoch": 113.73, "learning_rate": 4.835728155339806e-06, "loss": 0.0201, "step": 292860 }, { "epoch": 113.74, "learning_rate": 4.835210355987055e-06, "loss": 0.0908, "step": 292870 }, { "epoch": 113.74, "learning_rate": 4.834692556634304e-06, "loss": 0.0007, "step": 292880 }, { "epoch": 113.74, "learning_rate": 4.834174757281554e-06, "loss": 0.0011, "step": 292890 }, { "epoch": 113.75, "learning_rate": 4.8336569579288026e-06, "loss": 0.0899, "step": 292900 }, { "epoch": 113.75, "learning_rate": 4.833139158576052e-06, "loss": 0.007, "step": 292910 }, { "epoch": 113.76, "learning_rate": 4.832621359223301e-06, "loss": 0.0214, "step": 292920 }, { "epoch": 113.76, "learning_rate": 4.8321035598705506e-06, "loss": 0.187, "step": 292930 }, { "epoch": 113.76, "learning_rate": 4.831585760517799e-06, "loss": 0.01, "step": 292940 }, { "epoch": 113.77, "learning_rate": 4.831067961165049e-06, "loss": 0.0255, "step": 292950 }, { "epoch": 113.77, "learning_rate": 4.830550161812298e-06, "loss": 0.0273, "step": 292960 }, { "epoch": 113.77, "learning_rate": 4.830032362459547e-06, "loss": 0.0419, "step": 292970 }, { "epoch": 113.78, "learning_rate": 4.829514563106796e-06, "loss": 0.0008, "step": 292980 }, { "epoch": 113.78, "learning_rate": 4.828996763754046e-06, "loss": 0.0256, "step": 292990 }, { "epoch": 113.79, "learning_rate": 4.8284789644012945e-06, "loss": 0.0813, "step": 293000 }, { "epoch": 113.79, "learning_rate": 4.827961165048544e-06, "loss": 0.0012, "step": 293010 }, { "epoch": 113.79, "learning_rate": 4.827443365695793e-06, "loss": 0.0149, "step": 293020 }, { "epoch": 113.8, "learning_rate": 4.8269255663430425e-06, "loss": 0.0186, "step": 293030 }, { "epoch": 113.8, "learning_rate": 4.826407766990291e-06, "loss": 0.0002, "step": 293040 }, { "epoch": 113.81, "learning_rate": 4.825889967637541e-06, "loss": 0.0242, "step": 293050 }, { "epoch": 113.81, "learning_rate": 4.82537216828479e-06, "loss": 0.0497, "step": 293060 }, { "epoch": 113.81, "learning_rate": 4.824854368932039e-06, "loss": 0.0607, "step": 293070 }, { "epoch": 113.82, "learning_rate": 4.824336569579288e-06, "loss": 0.0655, "step": 293080 }, { "epoch": 113.82, "learning_rate": 4.823818770226538e-06, "loss": 0.0004, "step": 293090 }, { "epoch": 113.83, "learning_rate": 4.8233009708737865e-06, "loss": 0.0599, "step": 293100 }, { "epoch": 113.83, "learning_rate": 4.822783171521036e-06, "loss": 0.026, "step": 293110 }, { "epoch": 113.83, "learning_rate": 4.822265372168285e-06, "loss": 0.0093, "step": 293120 }, { "epoch": 113.84, "learning_rate": 4.8217475728155345e-06, "loss": 0.0299, "step": 293130 }, { "epoch": 113.84, "learning_rate": 4.821229773462783e-06, "loss": 0.0252, "step": 293140 }, { "epoch": 113.84, "learning_rate": 4.820711974110033e-06, "loss": 0.0337, "step": 293150 }, { "epoch": 113.85, "learning_rate": 4.820194174757282e-06, "loss": 0.0171, "step": 293160 }, { "epoch": 113.85, "learning_rate": 4.819676375404531e-06, "loss": 0.0545, "step": 293170 }, { "epoch": 113.86, "learning_rate": 4.81915857605178e-06, "loss": 0.0236, "step": 293180 }, { "epoch": 113.86, "learning_rate": 4.81864077669903e-06, "loss": 0.2279, "step": 293190 }, { "epoch": 113.86, "learning_rate": 4.818122977346279e-06, "loss": 0.103, "step": 293200 }, { "epoch": 113.87, "learning_rate": 4.817605177993528e-06, "loss": 0.0013, "step": 293210 }, { "epoch": 113.87, "learning_rate": 4.817087378640778e-06, "loss": 0.0855, "step": 293220 }, { "epoch": 113.88, "learning_rate": 4.816569579288026e-06, "loss": 0.0008, "step": 293230 }, { "epoch": 113.88, "learning_rate": 4.816051779935276e-06, "loss": 0.0001, "step": 293240 }, { "epoch": 113.88, "learning_rate": 4.815533980582525e-06, "loss": 0.1217, "step": 293250 }, { "epoch": 113.89, "learning_rate": 4.815016181229774e-06, "loss": 0.0383, "step": 293260 }, { "epoch": 113.89, "learning_rate": 4.814498381877023e-06, "loss": 0.0222, "step": 293270 }, { "epoch": 113.9, "learning_rate": 4.813980582524273e-06, "loss": 0.0817, "step": 293280 }, { "epoch": 113.9, "learning_rate": 4.8134627831715216e-06, "loss": 0.0021, "step": 293290 }, { "epoch": 113.9, "learning_rate": 4.812944983818771e-06, "loss": 0.1562, "step": 293300 }, { "epoch": 113.91, "learning_rate": 4.81242718446602e-06, "loss": 0.068, "step": 293310 }, { "epoch": 113.91, "learning_rate": 4.81190938511327e-06, "loss": 0.0605, "step": 293320 }, { "epoch": 113.91, "learning_rate": 4.811391585760518e-06, "loss": 0.055, "step": 293330 }, { "epoch": 113.92, "learning_rate": 4.810873786407768e-06, "loss": 0.0085, "step": 293340 }, { "epoch": 113.92, "learning_rate": 4.810355987055017e-06, "loss": 0.0003, "step": 293350 }, { "epoch": 113.93, "learning_rate": 4.809838187702266e-06, "loss": 0.1281, "step": 293360 }, { "epoch": 113.93, "learning_rate": 4.809320388349515e-06, "loss": 0.252, "step": 293370 }, { "epoch": 113.93, "learning_rate": 4.808802588996765e-06, "loss": 0.0896, "step": 293380 }, { "epoch": 113.94, "learning_rate": 4.8082847896440135e-06, "loss": 0.1008, "step": 293390 }, { "epoch": 113.94, "learning_rate": 4.807766990291262e-06, "loss": 0.0029, "step": 293400 }, { "epoch": 113.95, "learning_rate": 4.807249190938512e-06, "loss": 0.01, "step": 293410 }, { "epoch": 113.95, "learning_rate": 4.806731391585761e-06, "loss": 0.001, "step": 293420 }, { "epoch": 113.95, "learning_rate": 4.80621359223301e-06, "loss": 0.0002, "step": 293430 }, { "epoch": 113.96, "learning_rate": 4.805695792880259e-06, "loss": 0.0353, "step": 293440 }, { "epoch": 113.96, "learning_rate": 4.805177993527509e-06, "loss": 0.0329, "step": 293450 }, { "epoch": 113.97, "learning_rate": 4.8046601941747575e-06, "loss": 0.0432, "step": 293460 }, { "epoch": 113.97, "learning_rate": 4.804142394822007e-06, "loss": 0.0234, "step": 293470 }, { "epoch": 113.97, "learning_rate": 4.803624595469256e-06, "loss": 0.0789, "step": 293480 }, { "epoch": 113.98, "learning_rate": 4.8031067961165055e-06, "loss": 0.0384, "step": 293490 }, { "epoch": 113.98, "learning_rate": 4.802588996763754e-06, "loss": 0.0238, "step": 293500 }, { "epoch": 113.98, "learning_rate": 4.802071197411004e-06, "loss": 0.0455, "step": 293510 }, { "epoch": 113.99, "learning_rate": 4.801553398058253e-06, "loss": 0.0003, "step": 293520 }, { "epoch": 113.99, "learning_rate": 4.801035598705502e-06, "loss": 0.0133, "step": 293530 }, { "epoch": 114.0, "learning_rate": 4.800517799352751e-06, "loss": 0.1137, "step": 293540 }, { "epoch": 114.0, "learning_rate": 4.800000000000001e-06, "loss": 0.1117, "step": 293550 }, { "epoch": 114.0, "eval_accuracy": 0.9521320495185694, "eval_loss": 0.3789585828781128, "eval_runtime": 8.1818, "eval_samples_per_second": 444.276, "eval_steps_per_second": 55.611, "step": 293550 }, { "epoch": 114.0, "learning_rate": 4.799482200647249e-06, "loss": 0.023, "step": 293560 }, { "epoch": 114.01, "learning_rate": 4.798964401294498e-06, "loss": 0.0116, "step": 293570 }, { "epoch": 114.01, "learning_rate": 4.798446601941748e-06, "loss": 0.0012, "step": 293580 }, { "epoch": 114.02, "learning_rate": 4.7979288025889966e-06, "loss": 0.0399, "step": 293590 }, { "epoch": 114.02, "learning_rate": 4.797411003236246e-06, "loss": 0.1014, "step": 293600 }, { "epoch": 114.02, "learning_rate": 4.796893203883495e-06, "loss": 0.0733, "step": 293610 }, { "epoch": 114.03, "learning_rate": 4.7963754045307446e-06, "loss": 0.0312, "step": 293620 }, { "epoch": 114.03, "learning_rate": 4.795857605177993e-06, "loss": 0.008, "step": 293630 }, { "epoch": 114.03, "learning_rate": 4.795339805825243e-06, "loss": 0.0444, "step": 293640 }, { "epoch": 114.04, "learning_rate": 4.794822006472492e-06, "loss": 0.0082, "step": 293650 }, { "epoch": 114.04, "learning_rate": 4.794304207119741e-06, "loss": 0.0784, "step": 293660 }, { "epoch": 114.05, "learning_rate": 4.79378640776699e-06, "loss": 0.0107, "step": 293670 }, { "epoch": 114.05, "learning_rate": 4.79326860841424e-06, "loss": 0.1249, "step": 293680 }, { "epoch": 114.05, "learning_rate": 4.7927508090614885e-06, "loss": 0.0275, "step": 293690 }, { "epoch": 114.06, "learning_rate": 4.792233009708738e-06, "loss": 0.0318, "step": 293700 }, { "epoch": 114.06, "learning_rate": 4.791715210355987e-06, "loss": 0.0181, "step": 293710 }, { "epoch": 114.07, "learning_rate": 4.7911974110032365e-06, "loss": 0.046, "step": 293720 }, { "epoch": 114.07, "learning_rate": 4.790679611650485e-06, "loss": 0.1164, "step": 293730 }, { "epoch": 114.07, "learning_rate": 4.790161812297735e-06, "loss": 0.0163, "step": 293740 }, { "epoch": 114.08, "learning_rate": 4.789644012944984e-06, "loss": 0.0534, "step": 293750 }, { "epoch": 114.08, "learning_rate": 4.789126213592233e-06, "loss": 0.0404, "step": 293760 }, { "epoch": 114.09, "learning_rate": 4.788608414239483e-06, "loss": 0.1018, "step": 293770 }, { "epoch": 114.09, "learning_rate": 4.788090614886732e-06, "loss": 0.0004, "step": 293780 }, { "epoch": 114.09, "learning_rate": 4.787572815533981e-06, "loss": 0.0583, "step": 293790 }, { "epoch": 114.1, "learning_rate": 4.78705501618123e-06, "loss": 0.0004, "step": 293800 }, { "epoch": 114.1, "learning_rate": 4.78653721682848e-06, "loss": 0.0088, "step": 293810 }, { "epoch": 114.1, "learning_rate": 4.7860194174757285e-06, "loss": 0.1356, "step": 293820 }, { "epoch": 114.11, "learning_rate": 4.785501618122978e-06, "loss": 0.0348, "step": 293830 }, { "epoch": 114.11, "learning_rate": 4.784983818770227e-06, "loss": 0.0173, "step": 293840 }, { "epoch": 114.12, "learning_rate": 4.7844660194174765e-06, "loss": 0.0703, "step": 293850 }, { "epoch": 114.12, "learning_rate": 4.783948220064725e-06, "loss": 0.0386, "step": 293860 }, { "epoch": 114.12, "learning_rate": 4.783430420711975e-06, "loss": 0.0969, "step": 293870 }, { "epoch": 114.13, "learning_rate": 4.782912621359224e-06, "loss": 0.1254, "step": 293880 }, { "epoch": 114.13, "learning_rate": 4.782394822006473e-06, "loss": 0.0827, "step": 293890 }, { "epoch": 114.14, "learning_rate": 4.781877022653722e-06, "loss": 0.1176, "step": 293900 }, { "epoch": 114.14, "learning_rate": 4.781359223300972e-06, "loss": 0.0529, "step": 293910 }, { "epoch": 114.14, "learning_rate": 4.78084142394822e-06, "loss": 0.0126, "step": 293920 }, { "epoch": 114.15, "learning_rate": 4.78032362459547e-06, "loss": 0.0452, "step": 293930 }, { "epoch": 114.15, "learning_rate": 4.779805825242719e-06, "loss": 0.0488, "step": 293940 }, { "epoch": 114.16, "learning_rate": 4.779288025889968e-06, "loss": 0.0831, "step": 293950 }, { "epoch": 114.16, "learning_rate": 4.778770226537217e-06, "loss": 0.0011, "step": 293960 }, { "epoch": 114.16, "learning_rate": 4.778252427184467e-06, "loss": 0.0692, "step": 293970 }, { "epoch": 114.17, "learning_rate": 4.7777346278317156e-06, "loss": 0.0318, "step": 293980 }, { "epoch": 114.17, "learning_rate": 4.777216828478965e-06, "loss": 0.0317, "step": 293990 }, { "epoch": 114.17, "learning_rate": 4.776699029126214e-06, "loss": 0.0665, "step": 294000 }, { "epoch": 114.18, "learning_rate": 4.776181229773464e-06, "loss": 0.0038, "step": 294010 }, { "epoch": 114.18, "learning_rate": 4.775663430420712e-06, "loss": 0.021, "step": 294020 }, { "epoch": 114.19, "learning_rate": 4.775145631067962e-06, "loss": 0.0312, "step": 294030 }, { "epoch": 114.19, "learning_rate": 4.774627831715211e-06, "loss": 0.07, "step": 294040 }, { "epoch": 114.19, "learning_rate": 4.77411003236246e-06, "loss": 0.0163, "step": 294050 }, { "epoch": 114.2, "learning_rate": 4.773592233009709e-06, "loss": 0.0175, "step": 294060 }, { "epoch": 114.2, "learning_rate": 4.773074433656959e-06, "loss": 0.0013, "step": 294070 }, { "epoch": 114.21, "learning_rate": 4.7725566343042075e-06, "loss": 0.2251, "step": 294080 }, { "epoch": 114.21, "learning_rate": 4.772038834951457e-06, "loss": 0.0122, "step": 294090 }, { "epoch": 114.21, "learning_rate": 4.771521035598706e-06, "loss": 0.0312, "step": 294100 }, { "epoch": 114.22, "learning_rate": 4.7710032362459555e-06, "loss": 0.0591, "step": 294110 }, { "epoch": 114.22, "learning_rate": 4.770485436893204e-06, "loss": 0.031, "step": 294120 }, { "epoch": 114.23, "learning_rate": 4.769967637540454e-06, "loss": 0.0259, "step": 294130 }, { "epoch": 114.23, "learning_rate": 4.769449838187703e-06, "loss": 0.0007, "step": 294140 }, { "epoch": 114.23, "learning_rate": 4.768932038834952e-06, "loss": 0.0915, "step": 294150 }, { "epoch": 114.24, "learning_rate": 4.768414239482201e-06, "loss": 0.0418, "step": 294160 }, { "epoch": 114.24, "learning_rate": 4.767896440129451e-06, "loss": 0.0825, "step": 294170 }, { "epoch": 114.24, "learning_rate": 4.7673786407766995e-06, "loss": 0.2062, "step": 294180 }, { "epoch": 114.25, "learning_rate": 4.766860841423949e-06, "loss": 0.1862, "step": 294190 }, { "epoch": 114.25, "learning_rate": 4.766343042071198e-06, "loss": 0.0013, "step": 294200 }, { "epoch": 114.26, "learning_rate": 4.765825242718447e-06, "loss": 0.0966, "step": 294210 }, { "epoch": 114.26, "learning_rate": 4.765307443365696e-06, "loss": 0.0148, "step": 294220 }, { "epoch": 114.26, "learning_rate": 4.764789644012945e-06, "loss": 0.0474, "step": 294230 }, { "epoch": 114.27, "learning_rate": 4.764271844660195e-06, "loss": 0.0479, "step": 294240 }, { "epoch": 114.27, "learning_rate": 4.763754045307443e-06, "loss": 0.0219, "step": 294250 }, { "epoch": 114.28, "learning_rate": 4.763236245954693e-06, "loss": 0.0003, "step": 294260 }, { "epoch": 114.28, "learning_rate": 4.762718446601942e-06, "loss": 0.0795, "step": 294270 }, { "epoch": 114.28, "learning_rate": 4.762200647249191e-06, "loss": 0.1411, "step": 294280 }, { "epoch": 114.29, "learning_rate": 4.76168284789644e-06, "loss": 0.0592, "step": 294290 }, { "epoch": 114.29, "learning_rate": 4.76116504854369e-06, "loss": 0.1206, "step": 294300 }, { "epoch": 114.3, "learning_rate": 4.7606472491909386e-06, "loss": 0.0001, "step": 294310 }, { "epoch": 114.3, "learning_rate": 4.760129449838188e-06, "loss": 0.0152, "step": 294320 }, { "epoch": 114.3, "learning_rate": 4.759611650485437e-06, "loss": 0.0589, "step": 294330 }, { "epoch": 114.31, "learning_rate": 4.7590938511326866e-06, "loss": 0.0092, "step": 294340 }, { "epoch": 114.31, "learning_rate": 4.758576051779935e-06, "loss": 0.0325, "step": 294350 }, { "epoch": 114.31, "learning_rate": 4.758058252427185e-06, "loss": 0.0856, "step": 294360 }, { "epoch": 114.32, "learning_rate": 4.757540453074434e-06, "loss": 0.0416, "step": 294370 }, { "epoch": 114.32, "learning_rate": 4.757022653721683e-06, "loss": 0.0443, "step": 294380 }, { "epoch": 114.33, "learning_rate": 4.756504854368932e-06, "loss": 0.0169, "step": 294390 }, { "epoch": 114.33, "learning_rate": 4.755987055016182e-06, "loss": 0.0004, "step": 294400 }, { "epoch": 114.33, "learning_rate": 4.7554692556634305e-06, "loss": 0.0243, "step": 294410 }, { "epoch": 114.34, "learning_rate": 4.75495145631068e-06, "loss": 0.1376, "step": 294420 }, { "epoch": 114.34, "learning_rate": 4.754433656957929e-06, "loss": 0.0178, "step": 294430 }, { "epoch": 114.35, "learning_rate": 4.7539158576051785e-06, "loss": 0.0895, "step": 294440 }, { "epoch": 114.35, "learning_rate": 4.753398058252427e-06, "loss": 0.0183, "step": 294450 }, { "epoch": 114.35, "learning_rate": 4.752880258899677e-06, "loss": 0.0148, "step": 294460 }, { "epoch": 114.36, "learning_rate": 4.752362459546926e-06, "loss": 0.0022, "step": 294470 }, { "epoch": 114.36, "learning_rate": 4.751844660194175e-06, "loss": 0.0096, "step": 294480 }, { "epoch": 114.37, "learning_rate": 4.751326860841424e-06, "loss": 0.0291, "step": 294490 }, { "epoch": 114.37, "learning_rate": 4.750809061488674e-06, "loss": 0.068, "step": 294500 }, { "epoch": 114.37, "learning_rate": 4.7502912621359225e-06, "loss": 0.0358, "step": 294510 }, { "epoch": 114.38, "learning_rate": 4.749773462783172e-06, "loss": 0.0273, "step": 294520 }, { "epoch": 114.38, "learning_rate": 4.749255663430421e-06, "loss": 0.0149, "step": 294530 }, { "epoch": 114.38, "learning_rate": 4.7487378640776705e-06, "loss": 0.0963, "step": 294540 }, { "epoch": 114.39, "learning_rate": 4.748220064724919e-06, "loss": 0.0681, "step": 294550 }, { "epoch": 114.39, "learning_rate": 4.747702265372169e-06, "loss": 0.0584, "step": 294560 }, { "epoch": 114.4, "learning_rate": 4.747184466019418e-06, "loss": 0.0326, "step": 294570 }, { "epoch": 114.4, "learning_rate": 4.746666666666667e-06, "loss": 0.0225, "step": 294580 }, { "epoch": 114.4, "learning_rate": 4.746148867313916e-06, "loss": 0.0147, "step": 294590 }, { "epoch": 114.41, "learning_rate": 4.745631067961166e-06, "loss": 0.0454, "step": 294600 }, { "epoch": 114.41, "learning_rate": 4.745113268608414e-06, "loss": 0.0019, "step": 294610 }, { "epoch": 114.42, "learning_rate": 4.744595469255664e-06, "loss": 0.0391, "step": 294620 }, { "epoch": 114.42, "learning_rate": 4.744077669902913e-06, "loss": 0.0505, "step": 294630 }, { "epoch": 114.42, "learning_rate": 4.743559870550162e-06, "loss": 0.0461, "step": 294640 }, { "epoch": 114.43, "learning_rate": 4.743042071197411e-06, "loss": 0.0099, "step": 294650 }, { "epoch": 114.43, "learning_rate": 4.742524271844661e-06, "loss": 0.1036, "step": 294660 }, { "epoch": 114.43, "learning_rate": 4.7420064724919096e-06, "loss": 0.0805, "step": 294670 }, { "epoch": 114.44, "learning_rate": 4.741488673139159e-06, "loss": 0.0001, "step": 294680 }, { "epoch": 114.44, "learning_rate": 4.740970873786408e-06, "loss": 0.0468, "step": 294690 }, { "epoch": 114.45, "learning_rate": 4.740453074433658e-06, "loss": 0.0229, "step": 294700 }, { "epoch": 114.45, "learning_rate": 4.739935275080906e-06, "loss": 0.1052, "step": 294710 }, { "epoch": 114.45, "learning_rate": 4.739417475728156e-06, "loss": 0.0077, "step": 294720 }, { "epoch": 114.46, "learning_rate": 4.738899676375405e-06, "loss": 0.0001, "step": 294730 }, { "epoch": 114.46, "learning_rate": 4.738381877022654e-06, "loss": 0.0865, "step": 294740 }, { "epoch": 114.47, "learning_rate": 4.737864077669903e-06, "loss": 0.1698, "step": 294750 }, { "epoch": 114.47, "learning_rate": 4.737346278317153e-06, "loss": 0.0091, "step": 294760 }, { "epoch": 114.47, "learning_rate": 4.7368284789644015e-06, "loss": 0.0912, "step": 294770 }, { "epoch": 114.48, "learning_rate": 4.736310679611651e-06, "loss": 0.0194, "step": 294780 }, { "epoch": 114.48, "learning_rate": 4.7357928802589e-06, "loss": 0.0841, "step": 294790 }, { "epoch": 114.49, "learning_rate": 4.7352750809061495e-06, "loss": 0.0001, "step": 294800 }, { "epoch": 114.49, "learning_rate": 4.734757281553398e-06, "loss": 0.0103, "step": 294810 }, { "epoch": 114.49, "learning_rate": 4.734239482200648e-06, "loss": 0.0666, "step": 294820 }, { "epoch": 114.5, "learning_rate": 4.733721682847897e-06, "loss": 0.0246, "step": 294830 }, { "epoch": 114.5, "learning_rate": 4.733203883495146e-06, "loss": 0.0638, "step": 294840 }, { "epoch": 114.5, "learning_rate": 4.732686084142395e-06, "loss": 0.1281, "step": 294850 }, { "epoch": 114.51, "learning_rate": 4.732168284789645e-06, "loss": 0.0123, "step": 294860 }, { "epoch": 114.51, "learning_rate": 4.7316504854368935e-06, "loss": 0.0667, "step": 294870 }, { "epoch": 114.52, "learning_rate": 4.731132686084143e-06, "loss": 0.0494, "step": 294880 }, { "epoch": 114.52, "learning_rate": 4.730614886731392e-06, "loss": 0.0007, "step": 294890 }, { "epoch": 114.52, "learning_rate": 4.7300970873786415e-06, "loss": 0.0156, "step": 294900 }, { "epoch": 114.53, "learning_rate": 4.72957928802589e-06, "loss": 0.0529, "step": 294910 }, { "epoch": 114.53, "learning_rate": 4.72906148867314e-06, "loss": 0.0186, "step": 294920 }, { "epoch": 114.54, "learning_rate": 4.728543689320389e-06, "loss": 0.0535, "step": 294930 }, { "epoch": 114.54, "learning_rate": 4.728025889967638e-06, "loss": 0.0084, "step": 294940 }, { "epoch": 114.54, "learning_rate": 4.727508090614887e-06, "loss": 0.0088, "step": 294950 }, { "epoch": 114.55, "learning_rate": 4.726990291262137e-06, "loss": 0.0172, "step": 294960 }, { "epoch": 114.55, "learning_rate": 4.726472491909385e-06, "loss": 0.0108, "step": 294970 }, { "epoch": 114.56, "learning_rate": 4.725954692556635e-06, "loss": 0.028, "step": 294980 }, { "epoch": 114.56, "learning_rate": 4.725436893203884e-06, "loss": 0.0621, "step": 294990 }, { "epoch": 114.56, "learning_rate": 4.724919093851133e-06, "loss": 0.2592, "step": 295000 }, { "epoch": 114.57, "learning_rate": 4.724401294498382e-06, "loss": 0.0002, "step": 295010 }, { "epoch": 114.57, "learning_rate": 4.723883495145632e-06, "loss": 0.0361, "step": 295020 }, { "epoch": 114.57, "learning_rate": 4.7233656957928806e-06, "loss": 0.0037, "step": 295030 }, { "epoch": 114.58, "learning_rate": 4.722847896440129e-06, "loss": 0.0174, "step": 295040 }, { "epoch": 114.58, "learning_rate": 4.722330097087379e-06, "loss": 0.0004, "step": 295050 }, { "epoch": 114.59, "learning_rate": 4.721812297734628e-06, "loss": 0.0193, "step": 295060 }, { "epoch": 114.59, "learning_rate": 4.721294498381877e-06, "loss": 0.1299, "step": 295070 }, { "epoch": 114.59, "learning_rate": 4.720776699029126e-06, "loss": 0.1156, "step": 295080 }, { "epoch": 114.6, "learning_rate": 4.720258899676376e-06, "loss": 0.0622, "step": 295090 }, { "epoch": 114.6, "learning_rate": 4.7197411003236245e-06, "loss": 0.001, "step": 295100 }, { "epoch": 114.61, "learning_rate": 4.719223300970874e-06, "loss": 0.027, "step": 295110 }, { "epoch": 114.61, "learning_rate": 4.718705501618123e-06, "loss": 0.0077, "step": 295120 }, { "epoch": 114.61, "learning_rate": 4.7181877022653725e-06, "loss": 0.0018, "step": 295130 }, { "epoch": 114.62, "learning_rate": 4.717669902912621e-06, "loss": 0.0012, "step": 295140 }, { "epoch": 114.62, "learning_rate": 4.717152103559871e-06, "loss": 0.0268, "step": 295150 }, { "epoch": 114.63, "learning_rate": 4.71663430420712e-06, "loss": 0.0088, "step": 295160 }, { "epoch": 114.63, "learning_rate": 4.716116504854369e-06, "loss": 0.0157, "step": 295170 }, { "epoch": 114.63, "learning_rate": 4.715598705501618e-06, "loss": 0.0164, "step": 295180 }, { "epoch": 114.64, "learning_rate": 4.715080906148868e-06, "loss": 0.0293, "step": 295190 }, { "epoch": 114.64, "learning_rate": 4.7145631067961165e-06, "loss": 0.0015, "step": 295200 }, { "epoch": 114.64, "learning_rate": 4.714045307443366e-06, "loss": 0.1605, "step": 295210 }, { "epoch": 114.65, "learning_rate": 4.713527508090615e-06, "loss": 0.1028, "step": 295220 }, { "epoch": 114.65, "learning_rate": 4.7130097087378645e-06, "loss": 0.0289, "step": 295230 }, { "epoch": 114.66, "learning_rate": 4.712491909385113e-06, "loss": 0.0378, "step": 295240 }, { "epoch": 114.66, "learning_rate": 4.711974110032363e-06, "loss": 0.0104, "step": 295250 }, { "epoch": 114.66, "learning_rate": 4.711456310679612e-06, "loss": 0.0066, "step": 295260 }, { "epoch": 114.67, "learning_rate": 4.710938511326861e-06, "loss": 0.1991, "step": 295270 }, { "epoch": 114.67, "learning_rate": 4.71042071197411e-06, "loss": 0.0999, "step": 295280 }, { "epoch": 114.68, "learning_rate": 4.70990291262136e-06, "loss": 0.0011, "step": 295290 }, { "epoch": 114.68, "learning_rate": 4.709385113268608e-06, "loss": 0.0629, "step": 295300 }, { "epoch": 114.68, "learning_rate": 4.708867313915858e-06, "loss": 0.0069, "step": 295310 }, { "epoch": 114.69, "learning_rate": 4.708349514563107e-06, "loss": 0.0023, "step": 295320 }, { "epoch": 114.69, "learning_rate": 4.707831715210356e-06, "loss": 0.0254, "step": 295330 }, { "epoch": 114.7, "learning_rate": 4.707313915857605e-06, "loss": 0.0228, "step": 295340 }, { "epoch": 114.7, "learning_rate": 4.706796116504855e-06, "loss": 0.0008, "step": 295350 }, { "epoch": 114.7, "learning_rate": 4.7062783171521036e-06, "loss": 0.0429, "step": 295360 }, { "epoch": 114.71, "learning_rate": 4.705760517799353e-06, "loss": 0.1593, "step": 295370 }, { "epoch": 114.71, "learning_rate": 4.705242718446602e-06, "loss": 0.1689, "step": 295380 }, { "epoch": 114.71, "learning_rate": 4.7047249190938516e-06, "loss": 0.0976, "step": 295390 }, { "epoch": 114.72, "learning_rate": 4.7042071197411e-06, "loss": 0.0002, "step": 295400 }, { "epoch": 114.72, "learning_rate": 4.70368932038835e-06, "loss": 0.1808, "step": 295410 }, { "epoch": 114.73, "learning_rate": 4.703171521035599e-06, "loss": 0.1095, "step": 295420 }, { "epoch": 114.73, "learning_rate": 4.702653721682848e-06, "loss": 0.0246, "step": 295430 }, { "epoch": 114.73, "learning_rate": 4.702135922330098e-06, "loss": 0.0002, "step": 295440 }, { "epoch": 114.74, "learning_rate": 4.701618122977347e-06, "loss": 0.0828, "step": 295450 }, { "epoch": 114.74, "learning_rate": 4.701100323624596e-06, "loss": 0.0964, "step": 295460 }, { "epoch": 114.75, "learning_rate": 4.700582524271845e-06, "loss": 0.0735, "step": 295470 }, { "epoch": 114.75, "learning_rate": 4.700064724919095e-06, "loss": 0.0354, "step": 295480 }, { "epoch": 114.75, "learning_rate": 4.6995469255663435e-06, "loss": 0.071, "step": 295490 }, { "epoch": 114.76, "learning_rate": 4.699029126213593e-06, "loss": 0.0064, "step": 295500 }, { "epoch": 114.76, "learning_rate": 4.698511326860842e-06, "loss": 0.011, "step": 295510 }, { "epoch": 114.77, "learning_rate": 4.6979935275080915e-06, "loss": 0.0458, "step": 295520 }, { "epoch": 114.77, "learning_rate": 4.69747572815534e-06, "loss": 0.0984, "step": 295530 }, { "epoch": 114.77, "learning_rate": 4.69695792880259e-06, "loss": 0.0137, "step": 295540 }, { "epoch": 114.78, "learning_rate": 4.696440129449839e-06, "loss": 0.0211, "step": 295550 }, { "epoch": 114.78, "learning_rate": 4.695922330097088e-06, "loss": 0.1298, "step": 295560 }, { "epoch": 114.78, "learning_rate": 4.695404530744337e-06, "loss": 0.1281, "step": 295570 }, { "epoch": 114.79, "learning_rate": 4.694886731391587e-06, "loss": 0.0364, "step": 295580 }, { "epoch": 114.79, "learning_rate": 4.6943689320388355e-06, "loss": 0.0004, "step": 295590 }, { "epoch": 114.8, "learning_rate": 4.693851132686085e-06, "loss": 0.0875, "step": 295600 }, { "epoch": 114.8, "learning_rate": 4.693333333333334e-06, "loss": 0.0214, "step": 295610 }, { "epoch": 114.8, "learning_rate": 4.6928155339805835e-06, "loss": 0.0206, "step": 295620 }, { "epoch": 114.81, "learning_rate": 4.692297734627832e-06, "loss": 0.0774, "step": 295630 }, { "epoch": 114.81, "learning_rate": 4.691779935275082e-06, "loss": 0.0472, "step": 295640 }, { "epoch": 114.82, "learning_rate": 4.691262135922331e-06, "loss": 0.0426, "step": 295650 }, { "epoch": 114.82, "learning_rate": 4.69074433656958e-06, "loss": 0.0703, "step": 295660 }, { "epoch": 114.82, "learning_rate": 4.690226537216829e-06, "loss": 0.0924, "step": 295670 }, { "epoch": 114.83, "learning_rate": 4.689708737864078e-06, "loss": 0.0677, "step": 295680 }, { "epoch": 114.83, "learning_rate": 4.689190938511327e-06, "loss": 0.0157, "step": 295690 }, { "epoch": 114.83, "learning_rate": 4.688673139158576e-06, "loss": 0.0123, "step": 295700 }, { "epoch": 114.84, "learning_rate": 4.688155339805826e-06, "loss": 0.0416, "step": 295710 }, { "epoch": 114.84, "learning_rate": 4.6876375404530746e-06, "loss": 0.1002, "step": 295720 }, { "epoch": 114.85, "learning_rate": 4.687119741100324e-06, "loss": 0.0422, "step": 295730 }, { "epoch": 114.85, "learning_rate": 4.686601941747573e-06, "loss": 0.0496, "step": 295740 }, { "epoch": 114.85, "learning_rate": 4.686084142394823e-06, "loss": 0.0495, "step": 295750 }, { "epoch": 114.86, "learning_rate": 4.685566343042071e-06, "loss": 0.018, "step": 295760 }, { "epoch": 114.86, "learning_rate": 4.685048543689321e-06, "loss": 0.0573, "step": 295770 }, { "epoch": 114.87, "learning_rate": 4.68453074433657e-06, "loss": 0.0236, "step": 295780 }, { "epoch": 114.87, "learning_rate": 4.684012944983819e-06, "loss": 0.0205, "step": 295790 }, { "epoch": 114.87, "learning_rate": 4.683495145631068e-06, "loss": 0.0643, "step": 295800 }, { "epoch": 114.88, "learning_rate": 4.682977346278318e-06, "loss": 0.0132, "step": 295810 }, { "epoch": 114.88, "learning_rate": 4.6824595469255665e-06, "loss": 0.0372, "step": 295820 }, { "epoch": 114.89, "learning_rate": 4.681941747572816e-06, "loss": 0.0016, "step": 295830 }, { "epoch": 114.89, "learning_rate": 4.681423948220065e-06, "loss": 0.1709, "step": 295840 }, { "epoch": 114.89, "learning_rate": 4.680906148867314e-06, "loss": 0.0696, "step": 295850 }, { "epoch": 114.9, "learning_rate": 4.680388349514563e-06, "loss": 0.0191, "step": 295860 }, { "epoch": 114.9, "learning_rate": 4.679870550161812e-06, "loss": 0.0197, "step": 295870 }, { "epoch": 114.9, "learning_rate": 4.679352750809062e-06, "loss": 0.0191, "step": 295880 }, { "epoch": 114.91, "learning_rate": 4.6788349514563105e-06, "loss": 0.079, "step": 295890 }, { "epoch": 114.91, "learning_rate": 4.67831715210356e-06, "loss": 0.1457, "step": 295900 }, { "epoch": 114.92, "learning_rate": 4.677799352750809e-06, "loss": 0.117, "step": 295910 }, { "epoch": 114.92, "learning_rate": 4.6772815533980585e-06, "loss": 0.0716, "step": 295920 }, { "epoch": 114.92, "learning_rate": 4.676763754045307e-06, "loss": 0.0001, "step": 295930 }, { "epoch": 114.93, "learning_rate": 4.676245954692557e-06, "loss": 0.0954, "step": 295940 }, { "epoch": 114.93, "learning_rate": 4.675728155339806e-06, "loss": 0.0225, "step": 295950 }, { "epoch": 114.94, "learning_rate": 4.675210355987055e-06, "loss": 0.0005, "step": 295960 }, { "epoch": 114.94, "learning_rate": 4.674692556634304e-06, "loss": 0.0025, "step": 295970 }, { "epoch": 114.94, "learning_rate": 4.674174757281554e-06, "loss": 0.0345, "step": 295980 }, { "epoch": 114.95, "learning_rate": 4.673656957928802e-06, "loss": 0.0486, "step": 295990 }, { "epoch": 114.95, "learning_rate": 4.673139158576052e-06, "loss": 0.0015, "step": 296000 }, { "epoch": 114.96, "learning_rate": 4.672621359223302e-06, "loss": 0.1916, "step": 296010 }, { "epoch": 114.96, "learning_rate": 4.67210355987055e-06, "loss": 0.022, "step": 296020 }, { "epoch": 114.96, "learning_rate": 4.6715857605178e-06, "loss": 0.0021, "step": 296030 }, { "epoch": 114.97, "learning_rate": 4.671067961165049e-06, "loss": 0.0294, "step": 296040 }, { "epoch": 114.97, "learning_rate": 4.670550161812298e-06, "loss": 0.0645, "step": 296050 }, { "epoch": 114.97, "learning_rate": 4.670032362459547e-06, "loss": 0.0255, "step": 296060 }, { "epoch": 114.98, "learning_rate": 4.669514563106797e-06, "loss": 0.0579, "step": 296070 }, { "epoch": 114.98, "learning_rate": 4.6689967637540456e-06, "loss": 0.0296, "step": 296080 }, { "epoch": 114.99, "learning_rate": 4.668478964401295e-06, "loss": 0.0821, "step": 296090 }, { "epoch": 114.99, "learning_rate": 4.667961165048544e-06, "loss": 0.0307, "step": 296100 }, { "epoch": 114.99, "learning_rate": 4.667443365695794e-06, "loss": 0.0338, "step": 296110 }, { "epoch": 115.0, "learning_rate": 4.666925566343042e-06, "loss": 0.0022, "step": 296120 }, { "epoch": 115.0, "eval_accuracy": 0.9521320495185694, "eval_loss": 0.3717191815376282, "eval_runtime": 8.1946, "eval_samples_per_second": 443.583, "eval_steps_per_second": 55.524, "step": 296125 }, { "epoch": 115.0, "learning_rate": 4.666407766990292e-06, "loss": 0.0316, "step": 296130 }, { "epoch": 115.01, "learning_rate": 4.665889967637541e-06, "loss": 0.0021, "step": 296140 }, { "epoch": 115.01, "learning_rate": 4.66537216828479e-06, "loss": 0.0237, "step": 296150 }, { "epoch": 115.01, "learning_rate": 4.664854368932039e-06, "loss": 0.0174, "step": 296160 }, { "epoch": 115.02, "learning_rate": 4.664336569579289e-06, "loss": 0.0662, "step": 296170 }, { "epoch": 115.02, "learning_rate": 4.6638187702265375e-06, "loss": 0.0727, "step": 296180 }, { "epoch": 115.03, "learning_rate": 4.663300970873787e-06, "loss": 0.21, "step": 296190 }, { "epoch": 115.03, "learning_rate": 4.662783171521036e-06, "loss": 0.0009, "step": 296200 }, { "epoch": 115.03, "learning_rate": 4.6622653721682855e-06, "loss": 0.0403, "step": 296210 }, { "epoch": 115.04, "learning_rate": 4.661747572815534e-06, "loss": 0.0164, "step": 296220 }, { "epoch": 115.04, "learning_rate": 4.661229773462784e-06, "loss": 0.0567, "step": 296230 }, { "epoch": 115.04, "learning_rate": 4.660711974110033e-06, "loss": 0.0011, "step": 296240 }, { "epoch": 115.05, "learning_rate": 4.660194174757282e-06, "loss": 0.0119, "step": 296250 }, { "epoch": 115.05, "learning_rate": 4.659676375404531e-06, "loss": 0.1424, "step": 296260 }, { "epoch": 115.06, "learning_rate": 4.659158576051781e-06, "loss": 0.0615, "step": 296270 }, { "epoch": 115.06, "learning_rate": 4.6586407766990295e-06, "loss": 0.0728, "step": 296280 }, { "epoch": 115.06, "learning_rate": 4.658122977346279e-06, "loss": 0.0114, "step": 296290 }, { "epoch": 115.07, "learning_rate": 4.657605177993528e-06, "loss": 0.144, "step": 296300 }, { "epoch": 115.07, "learning_rate": 4.6570873786407775e-06, "loss": 0.0411, "step": 296310 }, { "epoch": 115.08, "learning_rate": 4.656569579288026e-06, "loss": 0.0271, "step": 296320 }, { "epoch": 115.08, "learning_rate": 4.656051779935276e-06, "loss": 0.0009, "step": 296330 }, { "epoch": 115.08, "learning_rate": 4.655533980582525e-06, "loss": 0.089, "step": 296340 }, { "epoch": 115.09, "learning_rate": 4.655016181229774e-06, "loss": 0.0259, "step": 296350 }, { "epoch": 115.09, "learning_rate": 4.654498381877023e-06, "loss": 0.0016, "step": 296360 }, { "epoch": 115.1, "learning_rate": 4.653980582524273e-06, "loss": 0.0405, "step": 296370 }, { "epoch": 115.1, "learning_rate": 4.653462783171521e-06, "loss": 0.2202, "step": 296380 }, { "epoch": 115.1, "learning_rate": 4.652944983818771e-06, "loss": 0.0696, "step": 296390 }, { "epoch": 115.11, "learning_rate": 4.65242718446602e-06, "loss": 0.1528, "step": 296400 }, { "epoch": 115.11, "learning_rate": 4.651909385113269e-06, "loss": 0.0358, "step": 296410 }, { "epoch": 115.11, "learning_rate": 4.651391585760518e-06, "loss": 0.0894, "step": 296420 }, { "epoch": 115.12, "learning_rate": 4.650873786407768e-06, "loss": 0.0375, "step": 296430 }, { "epoch": 115.12, "learning_rate": 4.650355987055017e-06, "loss": 0.046, "step": 296440 }, { "epoch": 115.13, "learning_rate": 4.649838187702266e-06, "loss": 0.0101, "step": 296450 }, { "epoch": 115.13, "learning_rate": 4.649320388349515e-06, "loss": 0.0514, "step": 296460 }, { "epoch": 115.13, "learning_rate": 4.648802588996765e-06, "loss": 0.0014, "step": 296470 }, { "epoch": 115.14, "learning_rate": 4.648284789644013e-06, "loss": 0.0769, "step": 296480 }, { "epoch": 115.14, "learning_rate": 4.647766990291262e-06, "loss": 0.122, "step": 296490 }, { "epoch": 115.15, "learning_rate": 4.647249190938512e-06, "loss": 0.0601, "step": 296500 }, { "epoch": 115.15, "learning_rate": 4.6467313915857605e-06, "loss": 0.0459, "step": 296510 }, { "epoch": 115.15, "learning_rate": 4.64621359223301e-06, "loss": 0.0678, "step": 296520 }, { "epoch": 115.16, "learning_rate": 4.645695792880259e-06, "loss": 0.1038, "step": 296530 }, { "epoch": 115.16, "learning_rate": 4.6451779935275085e-06, "loss": 0.0489, "step": 296540 }, { "epoch": 115.17, "learning_rate": 4.644660194174757e-06, "loss": 0.0072, "step": 296550 }, { "epoch": 115.17, "learning_rate": 4.644142394822007e-06, "loss": 0.0119, "step": 296560 }, { "epoch": 115.17, "learning_rate": 4.643624595469256e-06, "loss": 0.0249, "step": 296570 }, { "epoch": 115.18, "learning_rate": 4.643106796116505e-06, "loss": 0.0001, "step": 296580 }, { "epoch": 115.18, "learning_rate": 4.642588996763754e-06, "loss": 0.1036, "step": 296590 }, { "epoch": 115.18, "learning_rate": 4.642071197411004e-06, "loss": 0.0181, "step": 296600 }, { "epoch": 115.19, "learning_rate": 4.6415533980582525e-06, "loss": 0.0103, "step": 296610 }, { "epoch": 115.19, "learning_rate": 4.641035598705502e-06, "loss": 0.0165, "step": 296620 }, { "epoch": 115.2, "learning_rate": 4.640517799352751e-06, "loss": 0.0567, "step": 296630 }, { "epoch": 115.2, "learning_rate": 4.6400000000000005e-06, "loss": 0.0092, "step": 296640 }, { "epoch": 115.2, "learning_rate": 4.639482200647249e-06, "loss": 0.0061, "step": 296650 }, { "epoch": 115.21, "learning_rate": 4.638964401294499e-06, "loss": 0.0005, "step": 296660 }, { "epoch": 115.21, "learning_rate": 4.638446601941748e-06, "loss": 0.0944, "step": 296670 }, { "epoch": 115.22, "learning_rate": 4.637928802588997e-06, "loss": 0.0444, "step": 296680 }, { "epoch": 115.22, "learning_rate": 4.637411003236246e-06, "loss": 0.0524, "step": 296690 }, { "epoch": 115.22, "learning_rate": 4.636893203883496e-06, "loss": 0.0115, "step": 296700 }, { "epoch": 115.23, "learning_rate": 4.636375404530744e-06, "loss": 0.0867, "step": 296710 }, { "epoch": 115.23, "learning_rate": 4.635857605177994e-06, "loss": 0.0007, "step": 296720 }, { "epoch": 115.23, "learning_rate": 4.635339805825243e-06, "loss": 0.0361, "step": 296730 }, { "epoch": 115.24, "learning_rate": 4.634822006472492e-06, "loss": 0.0815, "step": 296740 }, { "epoch": 115.24, "learning_rate": 4.634304207119741e-06, "loss": 0.0666, "step": 296750 }, { "epoch": 115.25, "learning_rate": 4.633786407766991e-06, "loss": 0.0026, "step": 296760 }, { "epoch": 115.25, "learning_rate": 4.6332686084142396e-06, "loss": 0.0697, "step": 296770 }, { "epoch": 115.25, "learning_rate": 4.632750809061489e-06, "loss": 0.0514, "step": 296780 }, { "epoch": 115.26, "learning_rate": 4.632233009708738e-06, "loss": 0.0808, "step": 296790 }, { "epoch": 115.26, "learning_rate": 4.631715210355988e-06, "loss": 0.1025, "step": 296800 }, { "epoch": 115.27, "learning_rate": 4.631197411003236e-06, "loss": 0.0193, "step": 296810 }, { "epoch": 115.27, "learning_rate": 4.630679611650486e-06, "loss": 0.0193, "step": 296820 }, { "epoch": 115.27, "learning_rate": 4.630161812297735e-06, "loss": 0.0478, "step": 296830 }, { "epoch": 115.28, "learning_rate": 4.629644012944984e-06, "loss": 0.0226, "step": 296840 }, { "epoch": 115.28, "learning_rate": 4.629126213592233e-06, "loss": 0.0626, "step": 296850 }, { "epoch": 115.29, "learning_rate": 4.628608414239483e-06, "loss": 0.0651, "step": 296860 }, { "epoch": 115.29, "learning_rate": 4.6280906148867315e-06, "loss": 0.0006, "step": 296870 }, { "epoch": 115.29, "learning_rate": 4.627572815533981e-06, "loss": 0.086, "step": 296880 }, { "epoch": 115.3, "learning_rate": 4.62705501618123e-06, "loss": 0.0006, "step": 296890 }, { "epoch": 115.3, "learning_rate": 4.6265372168284795e-06, "loss": 0.1143, "step": 296900 }, { "epoch": 115.3, "learning_rate": 4.626019417475728e-06, "loss": 0.0931, "step": 296910 }, { "epoch": 115.31, "learning_rate": 4.625501618122978e-06, "loss": 0.0201, "step": 296920 }, { "epoch": 115.31, "learning_rate": 4.624983818770227e-06, "loss": 0.0009, "step": 296930 }, { "epoch": 115.32, "learning_rate": 4.624466019417476e-06, "loss": 0.0024, "step": 296940 }, { "epoch": 115.32, "learning_rate": 4.623948220064725e-06, "loss": 0.0005, "step": 296950 }, { "epoch": 115.32, "learning_rate": 4.623430420711975e-06, "loss": 0.0454, "step": 296960 }, { "epoch": 115.33, "learning_rate": 4.6229126213592235e-06, "loss": 0.0231, "step": 296970 }, { "epoch": 115.33, "learning_rate": 4.622394822006473e-06, "loss": 0.0744, "step": 296980 }, { "epoch": 115.34, "learning_rate": 4.621877022653722e-06, "loss": 0.0008, "step": 296990 }, { "epoch": 115.34, "learning_rate": 4.6213592233009715e-06, "loss": 0.0001, "step": 297000 }, { "epoch": 115.34, "learning_rate": 4.62084142394822e-06, "loss": 0.0084, "step": 297010 }, { "epoch": 115.35, "learning_rate": 4.62032362459547e-06, "loss": 0.0711, "step": 297020 }, { "epoch": 115.35, "learning_rate": 4.619805825242719e-06, "loss": 0.1702, "step": 297030 }, { "epoch": 115.36, "learning_rate": 4.619288025889968e-06, "loss": 0.0207, "step": 297040 }, { "epoch": 115.36, "learning_rate": 4.618770226537217e-06, "loss": 0.0441, "step": 297050 }, { "epoch": 115.36, "learning_rate": 4.618252427184467e-06, "loss": 0.0308, "step": 297060 }, { "epoch": 115.37, "learning_rate": 4.617734627831715e-06, "loss": 0.0195, "step": 297070 }, { "epoch": 115.37, "learning_rate": 4.617216828478965e-06, "loss": 0.0003, "step": 297080 }, { "epoch": 115.37, "learning_rate": 4.616699029126214e-06, "loss": 0.001, "step": 297090 }, { "epoch": 115.38, "learning_rate": 4.616181229773463e-06, "loss": 0.0422, "step": 297100 }, { "epoch": 115.38, "learning_rate": 4.615663430420712e-06, "loss": 0.0113, "step": 297110 }, { "epoch": 115.39, "learning_rate": 4.615145631067962e-06, "loss": 0.0712, "step": 297120 }, { "epoch": 115.39, "learning_rate": 4.614627831715211e-06, "loss": 0.0136, "step": 297130 }, { "epoch": 115.39, "learning_rate": 4.61411003236246e-06, "loss": 0.03, "step": 297140 }, { "epoch": 115.4, "learning_rate": 4.613592233009709e-06, "loss": 0.0028, "step": 297150 }, { "epoch": 115.4, "learning_rate": 4.613074433656959e-06, "loss": 0.1817, "step": 297160 }, { "epoch": 115.41, "learning_rate": 4.612556634304207e-06, "loss": 0.0554, "step": 297170 }, { "epoch": 115.41, "learning_rate": 4.612038834951457e-06, "loss": 0.028, "step": 297180 }, { "epoch": 115.41, "learning_rate": 4.611521035598706e-06, "loss": 0.0084, "step": 297190 }, { "epoch": 115.42, "learning_rate": 4.611003236245955e-06, "loss": 0.0017, "step": 297200 }, { "epoch": 115.42, "learning_rate": 4.610485436893204e-06, "loss": 0.0435, "step": 297210 }, { "epoch": 115.43, "learning_rate": 4.609967637540454e-06, "loss": 0.0979, "step": 297220 }, { "epoch": 115.43, "learning_rate": 4.6094498381877025e-06, "loss": 0.1138, "step": 297230 }, { "epoch": 115.43, "learning_rate": 4.608932038834952e-06, "loss": 0.0479, "step": 297240 }, { "epoch": 115.44, "learning_rate": 4.608414239482201e-06, "loss": 0.112, "step": 297250 }, { "epoch": 115.44, "learning_rate": 4.6078964401294505e-06, "loss": 0.0191, "step": 297260 }, { "epoch": 115.44, "learning_rate": 4.607378640776699e-06, "loss": 0.0285, "step": 297270 }, { "epoch": 115.45, "learning_rate": 4.606860841423949e-06, "loss": 0.0153, "step": 297280 }, { "epoch": 115.45, "learning_rate": 4.606343042071198e-06, "loss": 0.2271, "step": 297290 }, { "epoch": 115.46, "learning_rate": 4.605825242718447e-06, "loss": 0.0792, "step": 297300 }, { "epoch": 115.46, "learning_rate": 4.605307443365696e-06, "loss": 0.0825, "step": 297310 }, { "epoch": 115.46, "learning_rate": 4.604789644012945e-06, "loss": 0.0061, "step": 297320 }, { "epoch": 115.47, "learning_rate": 4.6042718446601945e-06, "loss": 0.0669, "step": 297330 }, { "epoch": 115.47, "learning_rate": 4.603754045307443e-06, "loss": 0.0302, "step": 297340 }, { "epoch": 115.48, "learning_rate": 4.603236245954693e-06, "loss": 0.0008, "step": 297350 }, { "epoch": 115.48, "learning_rate": 4.602718446601942e-06, "loss": 0.0411, "step": 297360 }, { "epoch": 115.48, "learning_rate": 4.602200647249191e-06, "loss": 0.0275, "step": 297370 }, { "epoch": 115.49, "learning_rate": 4.60168284789644e-06, "loss": 0.0733, "step": 297380 }, { "epoch": 115.49, "learning_rate": 4.60116504854369e-06, "loss": 0.0249, "step": 297390 }, { "epoch": 115.5, "learning_rate": 4.600647249190938e-06, "loss": 0.0622, "step": 297400 }, { "epoch": 115.5, "learning_rate": 4.600129449838188e-06, "loss": 0.0164, "step": 297410 }, { "epoch": 115.5, "learning_rate": 4.599611650485437e-06, "loss": 0.051, "step": 297420 }, { "epoch": 115.51, "learning_rate": 4.599093851132686e-06, "loss": 0.1509, "step": 297430 }, { "epoch": 115.51, "learning_rate": 4.598576051779935e-06, "loss": 0.0602, "step": 297440 }, { "epoch": 115.51, "learning_rate": 4.598058252427185e-06, "loss": 0.3053, "step": 297450 }, { "epoch": 115.52, "learning_rate": 4.5975404530744336e-06, "loss": 0.0214, "step": 297460 }, { "epoch": 115.52, "learning_rate": 4.597022653721683e-06, "loss": 0.0753, "step": 297470 }, { "epoch": 115.53, "learning_rate": 4.596504854368932e-06, "loss": 0.0473, "step": 297480 }, { "epoch": 115.53, "learning_rate": 4.595987055016182e-06, "loss": 0.0245, "step": 297490 }, { "epoch": 115.53, "learning_rate": 4.59546925566343e-06, "loss": 0.037, "step": 297500 }, { "epoch": 115.54, "learning_rate": 4.59495145631068e-06, "loss": 0.0454, "step": 297510 }, { "epoch": 115.54, "learning_rate": 4.594433656957929e-06, "loss": 0.1222, "step": 297520 }, { "epoch": 115.55, "learning_rate": 4.593915857605178e-06, "loss": 0.0762, "step": 297530 }, { "epoch": 115.55, "learning_rate": 4.593398058252427e-06, "loss": 0.0007, "step": 297540 }, { "epoch": 115.55, "learning_rate": 4.592880258899677e-06, "loss": 0.1968, "step": 297550 }, { "epoch": 115.56, "learning_rate": 4.5923624595469255e-06, "loss": 0.0475, "step": 297560 }, { "epoch": 115.56, "learning_rate": 4.591844660194175e-06, "loss": 0.1319, "step": 297570 }, { "epoch": 115.57, "learning_rate": 4.591326860841424e-06, "loss": 0.0353, "step": 297580 }, { "epoch": 115.57, "learning_rate": 4.5908090614886735e-06, "loss": 0.0353, "step": 297590 }, { "epoch": 115.57, "learning_rate": 4.590291262135922e-06, "loss": 0.0479, "step": 297600 }, { "epoch": 115.58, "learning_rate": 4.589773462783172e-06, "loss": 0.0265, "step": 297610 }, { "epoch": 115.58, "learning_rate": 4.589255663430421e-06, "loss": 0.0007, "step": 297620 }, { "epoch": 115.58, "learning_rate": 4.58873786407767e-06, "loss": 0.0528, "step": 297630 }, { "epoch": 115.59, "learning_rate": 4.588220064724919e-06, "loss": 0.0183, "step": 297640 }, { "epoch": 115.59, "learning_rate": 4.587702265372169e-06, "loss": 0.0191, "step": 297650 }, { "epoch": 115.6, "learning_rate": 4.5871844660194175e-06, "loss": 0.1009, "step": 297660 }, { "epoch": 115.6, "learning_rate": 4.586666666666667e-06, "loss": 0.1474, "step": 297670 }, { "epoch": 115.6, "learning_rate": 4.586148867313917e-06, "loss": 0.0027, "step": 297680 }, { "epoch": 115.61, "learning_rate": 4.5856310679611655e-06, "loss": 0.0256, "step": 297690 }, { "epoch": 115.61, "learning_rate": 4.585113268608415e-06, "loss": 0.0004, "step": 297700 }, { "epoch": 115.62, "learning_rate": 4.584595469255664e-06, "loss": 0.0757, "step": 297710 }, { "epoch": 115.62, "learning_rate": 4.5840776699029135e-06, "loss": 0.0632, "step": 297720 }, { "epoch": 115.62, "learning_rate": 4.583559870550162e-06, "loss": 0.022, "step": 297730 }, { "epoch": 115.63, "learning_rate": 4.583042071197412e-06, "loss": 0.157, "step": 297740 }, { "epoch": 115.63, "learning_rate": 4.582524271844661e-06, "loss": 0.0114, "step": 297750 }, { "epoch": 115.63, "learning_rate": 4.58200647249191e-06, "loss": 0.0002, "step": 297760 }, { "epoch": 115.64, "learning_rate": 4.581488673139159e-06, "loss": 0.1108, "step": 297770 }, { "epoch": 115.64, "learning_rate": 4.580970873786409e-06, "loss": 0.001, "step": 297780 }, { "epoch": 115.65, "learning_rate": 4.580453074433657e-06, "loss": 0.0119, "step": 297790 }, { "epoch": 115.65, "learning_rate": 4.579935275080907e-06, "loss": 0.0092, "step": 297800 }, { "epoch": 115.65, "learning_rate": 4.579417475728156e-06, "loss": 0.0454, "step": 297810 }, { "epoch": 115.66, "learning_rate": 4.578899676375405e-06, "loss": 0.0329, "step": 297820 }, { "epoch": 115.66, "learning_rate": 4.578381877022654e-06, "loss": 0.1952, "step": 297830 }, { "epoch": 115.67, "learning_rate": 4.577864077669904e-06, "loss": 0.0328, "step": 297840 }, { "epoch": 115.67, "learning_rate": 4.577346278317153e-06, "loss": 0.1247, "step": 297850 }, { "epoch": 115.67, "learning_rate": 4.576828478964402e-06, "loss": 0.0004, "step": 297860 }, { "epoch": 115.68, "learning_rate": 4.576310679611651e-06, "loss": 0.0253, "step": 297870 }, { "epoch": 115.68, "learning_rate": 4.575792880258901e-06, "loss": 0.0011, "step": 297880 }, { "epoch": 115.69, "learning_rate": 4.575275080906149e-06, "loss": 0.1075, "step": 297890 }, { "epoch": 115.69, "learning_rate": 4.574757281553399e-06, "loss": 0.0002, "step": 297900 }, { "epoch": 115.69, "learning_rate": 4.574239482200648e-06, "loss": 0.0022, "step": 297910 }, { "epoch": 115.7, "learning_rate": 4.573721682847897e-06, "loss": 0.2034, "step": 297920 }, { "epoch": 115.7, "learning_rate": 4.573203883495146e-06, "loss": 0.1292, "step": 297930 }, { "epoch": 115.7, "learning_rate": 4.572686084142396e-06, "loss": 0.0522, "step": 297940 }, { "epoch": 115.71, "learning_rate": 4.5721682847896445e-06, "loss": 0.0063, "step": 297950 }, { "epoch": 115.71, "learning_rate": 4.571650485436893e-06, "loss": 0.0381, "step": 297960 }, { "epoch": 115.72, "learning_rate": 4.571132686084143e-06, "loss": 0.0001, "step": 297970 }, { "epoch": 115.72, "learning_rate": 4.570614886731392e-06, "loss": 0.0399, "step": 297980 }, { "epoch": 115.72, "learning_rate": 4.570097087378641e-06, "loss": 0.0206, "step": 297990 }, { "epoch": 115.73, "learning_rate": 4.56957928802589e-06, "loss": 0.0299, "step": 298000 }, { "epoch": 115.73, "learning_rate": 4.56906148867314e-06, "loss": 0.0667, "step": 298010 }, { "epoch": 115.74, "learning_rate": 4.5685436893203885e-06, "loss": 0.1092, "step": 298020 }, { "epoch": 115.74, "learning_rate": 4.568025889967638e-06, "loss": 0.0077, "step": 298030 }, { "epoch": 115.74, "learning_rate": 4.567508090614887e-06, "loss": 0.0606, "step": 298040 }, { "epoch": 115.75, "learning_rate": 4.5669902912621365e-06, "loss": 0.0344, "step": 298050 }, { "epoch": 115.75, "learning_rate": 4.566472491909385e-06, "loss": 0.0794, "step": 298060 }, { "epoch": 115.76, "learning_rate": 4.565954692556635e-06, "loss": 0.0781, "step": 298070 }, { "epoch": 115.76, "learning_rate": 4.565436893203884e-06, "loss": 0.0802, "step": 298080 }, { "epoch": 115.76, "learning_rate": 4.564919093851133e-06, "loss": 0.0362, "step": 298090 }, { "epoch": 115.77, "learning_rate": 4.564401294498382e-06, "loss": 0.0203, "step": 298100 }, { "epoch": 115.77, "learning_rate": 4.563883495145632e-06, "loss": 0.0002, "step": 298110 }, { "epoch": 115.77, "learning_rate": 4.56336569579288e-06, "loss": 0.073, "step": 298120 }, { "epoch": 115.78, "learning_rate": 4.562847896440129e-06, "loss": 0.1433, "step": 298130 }, { "epoch": 115.78, "learning_rate": 4.562330097087379e-06, "loss": 0.0003, "step": 298140 }, { "epoch": 115.79, "learning_rate": 4.5618122977346276e-06, "loss": 0.0232, "step": 298150 }, { "epoch": 115.79, "learning_rate": 4.561294498381877e-06, "loss": 0.0291, "step": 298160 }, { "epoch": 115.79, "learning_rate": 4.560776699029126e-06, "loss": 0.0171, "step": 298170 }, { "epoch": 115.8, "learning_rate": 4.560258899676376e-06, "loss": 0.0114, "step": 298180 }, { "epoch": 115.8, "learning_rate": 4.559741100323624e-06, "loss": 0.0002, "step": 298190 }, { "epoch": 115.81, "learning_rate": 4.559223300970874e-06, "loss": 0.0374, "step": 298200 }, { "epoch": 115.81, "learning_rate": 4.558705501618123e-06, "loss": 0.0869, "step": 298210 }, { "epoch": 115.81, "learning_rate": 4.558187702265372e-06, "loss": 0.0261, "step": 298220 }, { "epoch": 115.82, "learning_rate": 4.557669902912621e-06, "loss": 0.0007, "step": 298230 }, { "epoch": 115.82, "learning_rate": 4.557152103559871e-06, "loss": 0.0012, "step": 298240 }, { "epoch": 115.83, "learning_rate": 4.55663430420712e-06, "loss": 0.0001, "step": 298250 }, { "epoch": 115.83, "learning_rate": 4.556116504854369e-06, "loss": 0.0752, "step": 298260 }, { "epoch": 115.83, "learning_rate": 4.555598705501619e-06, "loss": 0.0882, "step": 298270 }, { "epoch": 115.84, "learning_rate": 4.5550809061488675e-06, "loss": 0.0173, "step": 298280 }, { "epoch": 115.84, "learning_rate": 4.554563106796117e-06, "loss": 0.1275, "step": 298290 }, { "epoch": 115.84, "learning_rate": 4.554045307443366e-06, "loss": 0.1937, "step": 298300 }, { "epoch": 115.85, "learning_rate": 4.5535275080906155e-06, "loss": 0.0159, "step": 298310 }, { "epoch": 115.85, "learning_rate": 4.553009708737864e-06, "loss": 0.1134, "step": 298320 }, { "epoch": 115.86, "learning_rate": 4.552491909385114e-06, "loss": 0.0878, "step": 298330 }, { "epoch": 115.86, "learning_rate": 4.551974110032363e-06, "loss": 0.0133, "step": 298340 }, { "epoch": 115.86, "learning_rate": 4.551456310679612e-06, "loss": 0.0228, "step": 298350 }, { "epoch": 115.87, "learning_rate": 4.550938511326861e-06, "loss": 0.1829, "step": 298360 }, { "epoch": 115.87, "learning_rate": 4.550420711974111e-06, "loss": 0.0829, "step": 298370 }, { "epoch": 115.88, "learning_rate": 4.5499029126213595e-06, "loss": 0.0125, "step": 298380 }, { "epoch": 115.88, "learning_rate": 4.549385113268609e-06, "loss": 0.0459, "step": 298390 }, { "epoch": 115.88, "learning_rate": 4.548867313915858e-06, "loss": 0.0296, "step": 298400 }, { "epoch": 115.89, "learning_rate": 4.5483495145631075e-06, "loss": 0.0515, "step": 298410 }, { "epoch": 115.89, "learning_rate": 4.547831715210356e-06, "loss": 0.0344, "step": 298420 }, { "epoch": 115.9, "learning_rate": 4.547313915857606e-06, "loss": 0.0566, "step": 298430 }, { "epoch": 115.9, "learning_rate": 4.546796116504855e-06, "loss": 0.0276, "step": 298440 }, { "epoch": 115.9, "learning_rate": 4.546278317152104e-06, "loss": 0.0206, "step": 298450 }, { "epoch": 115.91, "learning_rate": 4.545760517799353e-06, "loss": 0.0355, "step": 298460 }, { "epoch": 115.91, "learning_rate": 4.545242718446603e-06, "loss": 0.0126, "step": 298470 }, { "epoch": 115.91, "learning_rate": 4.544724919093851e-06, "loss": 0.0023, "step": 298480 }, { "epoch": 115.92, "learning_rate": 4.544207119741101e-06, "loss": 0.0583, "step": 298490 }, { "epoch": 115.92, "learning_rate": 4.54368932038835e-06, "loss": 0.0097, "step": 298500 }, { "epoch": 115.93, "learning_rate": 4.543171521035599e-06, "loss": 0.0279, "step": 298510 }, { "epoch": 115.93, "learning_rate": 4.542653721682848e-06, "loss": 0.021, "step": 298520 }, { "epoch": 115.93, "learning_rate": 4.542135922330098e-06, "loss": 0.1048, "step": 298530 }, { "epoch": 115.94, "learning_rate": 4.541618122977347e-06, "loss": 0.0803, "step": 298540 }, { "epoch": 115.94, "learning_rate": 4.541100323624596e-06, "loss": 0.0958, "step": 298550 }, { "epoch": 115.95, "learning_rate": 4.540582524271845e-06, "loss": 0.0017, "step": 298560 }, { "epoch": 115.95, "learning_rate": 4.540064724919095e-06, "loss": 0.0295, "step": 298570 }, { "epoch": 115.95, "learning_rate": 4.539546925566343e-06, "loss": 0.1096, "step": 298580 }, { "epoch": 115.96, "learning_rate": 4.539029126213593e-06, "loss": 0.0395, "step": 298590 }, { "epoch": 115.96, "learning_rate": 4.538511326860842e-06, "loss": 0.1657, "step": 298600 }, { "epoch": 115.97, "learning_rate": 4.537993527508091e-06, "loss": 0.0762, "step": 298610 }, { "epoch": 115.97, "learning_rate": 4.53747572815534e-06, "loss": 0.0083, "step": 298620 }, { "epoch": 115.97, "learning_rate": 4.53695792880259e-06, "loss": 0.1394, "step": 298630 }, { "epoch": 115.98, "learning_rate": 4.5364401294498385e-06, "loss": 0.0176, "step": 298640 }, { "epoch": 115.98, "learning_rate": 4.535922330097088e-06, "loss": 0.1013, "step": 298650 }, { "epoch": 115.98, "learning_rate": 4.535404530744337e-06, "loss": 0.0002, "step": 298660 }, { "epoch": 115.99, "learning_rate": 4.5348867313915865e-06, "loss": 0.0107, "step": 298670 }, { "epoch": 115.99, "learning_rate": 4.534368932038835e-06, "loss": 0.0096, "step": 298680 }, { "epoch": 116.0, "learning_rate": 4.533851132686085e-06, "loss": 0.0742, "step": 298690 }, { "epoch": 116.0, "learning_rate": 4.533333333333334e-06, "loss": 0.0203, "step": 298700 }, { "epoch": 116.0, "eval_accuracy": 0.9529573590096286, "eval_loss": 0.37935107946395874, "eval_runtime": 8.2562, "eval_samples_per_second": 440.273, "eval_steps_per_second": 55.11, "step": 298700 }, { "epoch": 116.0, "learning_rate": 4.532815533980583e-06, "loss": 0.0373, "step": 298710 }, { "epoch": 116.01, "learning_rate": 4.532297734627832e-06, "loss": 0.0795, "step": 298720 }, { "epoch": 116.01, "learning_rate": 4.531779935275082e-06, "loss": 0.0023, "step": 298730 }, { "epoch": 116.02, "learning_rate": 4.5312621359223305e-06, "loss": 0.0417, "step": 298740 }, { "epoch": 116.02, "learning_rate": 4.53074433656958e-06, "loss": 0.0005, "step": 298750 }, { "epoch": 116.02, "learning_rate": 4.530226537216829e-06, "loss": 0.019, "step": 298760 }, { "epoch": 116.03, "learning_rate": 4.529708737864078e-06, "loss": 0.017, "step": 298770 }, { "epoch": 116.03, "learning_rate": 4.529190938511327e-06, "loss": 0.0003, "step": 298780 }, { "epoch": 116.03, "learning_rate": 4.528673139158576e-06, "loss": 0.0016, "step": 298790 }, { "epoch": 116.04, "learning_rate": 4.528155339805826e-06, "loss": 0.0219, "step": 298800 }, { "epoch": 116.04, "learning_rate": 4.527637540453074e-06, "loss": 0.0092, "step": 298810 }, { "epoch": 116.05, "learning_rate": 4.527119741100324e-06, "loss": 0.0015, "step": 298820 }, { "epoch": 116.05, "learning_rate": 4.526601941747573e-06, "loss": 0.0542, "step": 298830 }, { "epoch": 116.05, "learning_rate": 4.526084142394822e-06, "loss": 0.0408, "step": 298840 }, { "epoch": 116.06, "learning_rate": 4.525566343042071e-06, "loss": 0.0858, "step": 298850 }, { "epoch": 116.06, "learning_rate": 4.525048543689321e-06, "loss": 0.0039, "step": 298860 }, { "epoch": 116.07, "learning_rate": 4.5245307443365696e-06, "loss": 0.07, "step": 298870 }, { "epoch": 116.07, "learning_rate": 4.524012944983819e-06, "loss": 0.0049, "step": 298880 }, { "epoch": 116.07, "learning_rate": 4.523495145631068e-06, "loss": 0.054, "step": 298890 }, { "epoch": 116.08, "learning_rate": 4.522977346278318e-06, "loss": 0.0335, "step": 298900 }, { "epoch": 116.08, "learning_rate": 4.522459546925566e-06, "loss": 0.0118, "step": 298910 }, { "epoch": 116.09, "learning_rate": 4.521941747572816e-06, "loss": 0.0534, "step": 298920 }, { "epoch": 116.09, "learning_rate": 4.521423948220065e-06, "loss": 0.008, "step": 298930 }, { "epoch": 116.09, "learning_rate": 4.520906148867314e-06, "loss": 0.0876, "step": 298940 }, { "epoch": 116.1, "learning_rate": 4.520388349514563e-06, "loss": 0.0794, "step": 298950 }, { "epoch": 116.1, "learning_rate": 4.519870550161813e-06, "loss": 0.1365, "step": 298960 }, { "epoch": 116.1, "learning_rate": 4.5193527508090615e-06, "loss": 0.0957, "step": 298970 }, { "epoch": 116.11, "learning_rate": 4.518834951456311e-06, "loss": 0.0628, "step": 298980 }, { "epoch": 116.11, "learning_rate": 4.51831715210356e-06, "loss": 0.0608, "step": 298990 }, { "epoch": 116.12, "learning_rate": 4.5177993527508095e-06, "loss": 0.0007, "step": 299000 }, { "epoch": 116.12, "learning_rate": 4.517281553398058e-06, "loss": 0.025, "step": 299010 }, { "epoch": 116.12, "learning_rate": 4.516763754045308e-06, "loss": 0.0491, "step": 299020 }, { "epoch": 116.13, "learning_rate": 4.516245954692557e-06, "loss": 0.0249, "step": 299030 }, { "epoch": 116.13, "learning_rate": 4.515728155339806e-06, "loss": 0.0203, "step": 299040 }, { "epoch": 116.14, "learning_rate": 4.515210355987055e-06, "loss": 0.0002, "step": 299050 }, { "epoch": 116.14, "learning_rate": 4.514692556634305e-06, "loss": 0.007, "step": 299060 }, { "epoch": 116.14, "learning_rate": 4.5141747572815535e-06, "loss": 0.0279, "step": 299070 }, { "epoch": 116.15, "learning_rate": 4.513656957928803e-06, "loss": 0.0917, "step": 299080 }, { "epoch": 116.15, "learning_rate": 4.513139158576052e-06, "loss": 0.0259, "step": 299090 }, { "epoch": 116.16, "learning_rate": 4.5126213592233015e-06, "loss": 0.025, "step": 299100 }, { "epoch": 116.16, "learning_rate": 4.51210355987055e-06, "loss": 0.0735, "step": 299110 }, { "epoch": 116.16, "learning_rate": 4.5115857605178e-06, "loss": 0.1106, "step": 299120 }, { "epoch": 116.17, "learning_rate": 4.511067961165049e-06, "loss": 0.0145, "step": 299130 }, { "epoch": 116.17, "learning_rate": 4.510550161812298e-06, "loss": 0.0374, "step": 299140 }, { "epoch": 116.17, "learning_rate": 4.510032362459547e-06, "loss": 0.1241, "step": 299150 }, { "epoch": 116.18, "learning_rate": 4.509514563106797e-06, "loss": 0.0017, "step": 299160 }, { "epoch": 116.18, "learning_rate": 4.508996763754045e-06, "loss": 0.0948, "step": 299170 }, { "epoch": 116.19, "learning_rate": 4.508478964401295e-06, "loss": 0.0465, "step": 299180 }, { "epoch": 116.19, "learning_rate": 4.507961165048544e-06, "loss": 0.2227, "step": 299190 }, { "epoch": 116.19, "learning_rate": 4.507443365695793e-06, "loss": 0.1159, "step": 299200 }, { "epoch": 116.2, "learning_rate": 4.506925566343042e-06, "loss": 0.0024, "step": 299210 }, { "epoch": 116.2, "learning_rate": 4.506407766990292e-06, "loss": 0.0709, "step": 299220 }, { "epoch": 116.21, "learning_rate": 4.505889967637541e-06, "loss": 0.0003, "step": 299230 }, { "epoch": 116.21, "learning_rate": 4.50537216828479e-06, "loss": 0.0331, "step": 299240 }, { "epoch": 116.21, "learning_rate": 4.504854368932039e-06, "loss": 0.0022, "step": 299250 }, { "epoch": 116.22, "learning_rate": 4.504336569579289e-06, "loss": 0.0866, "step": 299260 }, { "epoch": 116.22, "learning_rate": 4.503818770226537e-06, "loss": 0.1539, "step": 299270 }, { "epoch": 116.23, "learning_rate": 4.503300970873787e-06, "loss": 0.012, "step": 299280 }, { "epoch": 116.23, "learning_rate": 4.502783171521036e-06, "loss": 0.0919, "step": 299290 }, { "epoch": 116.23, "learning_rate": 4.502265372168285e-06, "loss": 0.0494, "step": 299300 }, { "epoch": 116.24, "learning_rate": 4.501747572815534e-06, "loss": 0.0771, "step": 299310 }, { "epoch": 116.24, "learning_rate": 4.501229773462784e-06, "loss": 0.0545, "step": 299320 }, { "epoch": 116.24, "learning_rate": 4.5007119741100325e-06, "loss": 0.0165, "step": 299330 }, { "epoch": 116.25, "learning_rate": 4.500194174757282e-06, "loss": 0.0463, "step": 299340 }, { "epoch": 116.25, "learning_rate": 4.499676375404531e-06, "loss": 0.0261, "step": 299350 }, { "epoch": 116.26, "learning_rate": 4.4991585760517805e-06, "loss": 0.1425, "step": 299360 }, { "epoch": 116.26, "learning_rate": 4.49864077669903e-06, "loss": 0.018, "step": 299370 }, { "epoch": 116.26, "learning_rate": 4.498122977346279e-06, "loss": 0.0809, "step": 299380 }, { "epoch": 116.27, "learning_rate": 4.4976051779935285e-06, "loss": 0.0059, "step": 299390 }, { "epoch": 116.27, "learning_rate": 4.497087378640777e-06, "loss": 0.0193, "step": 299400 }, { "epoch": 116.28, "learning_rate": 4.496569579288027e-06, "loss": 0.0055, "step": 299410 }, { "epoch": 116.28, "learning_rate": 4.496051779935276e-06, "loss": 0.0257, "step": 299420 }, { "epoch": 116.28, "learning_rate": 4.4955339805825245e-06, "loss": 0.0249, "step": 299430 }, { "epoch": 116.29, "learning_rate": 4.495016181229774e-06, "loss": 0.0552, "step": 299440 }, { "epoch": 116.29, "learning_rate": 4.494498381877023e-06, "loss": 0.015, "step": 299450 }, { "epoch": 116.3, "learning_rate": 4.4939805825242725e-06, "loss": 0.0667, "step": 299460 }, { "epoch": 116.3, "learning_rate": 4.493462783171521e-06, "loss": 0.0134, "step": 299470 }, { "epoch": 116.3, "learning_rate": 4.492944983818771e-06, "loss": 0.0671, "step": 299480 }, { "epoch": 116.31, "learning_rate": 4.49242718446602e-06, "loss": 0.0319, "step": 299490 }, { "epoch": 116.31, "learning_rate": 4.491909385113269e-06, "loss": 0.0468, "step": 299500 }, { "epoch": 116.31, "learning_rate": 4.491391585760518e-06, "loss": 0.0952, "step": 299510 }, { "epoch": 116.32, "learning_rate": 4.490873786407768e-06, "loss": 0.0319, "step": 299520 }, { "epoch": 116.32, "learning_rate": 4.490355987055016e-06, "loss": 0.0938, "step": 299530 }, { "epoch": 116.33, "learning_rate": 4.489838187702266e-06, "loss": 0.057, "step": 299540 }, { "epoch": 116.33, "learning_rate": 4.489320388349515e-06, "loss": 0.0547, "step": 299550 }, { "epoch": 116.33, "learning_rate": 4.488802588996764e-06, "loss": 0.0011, "step": 299560 }, { "epoch": 116.34, "learning_rate": 4.488284789644013e-06, "loss": 0.0766, "step": 299570 }, { "epoch": 116.34, "learning_rate": 4.487766990291263e-06, "loss": 0.0716, "step": 299580 }, { "epoch": 116.35, "learning_rate": 4.487249190938512e-06, "loss": 0.0984, "step": 299590 }, { "epoch": 116.35, "learning_rate": 4.48673139158576e-06, "loss": 0.1083, "step": 299600 }, { "epoch": 116.35, "learning_rate": 4.48621359223301e-06, "loss": 0.0217, "step": 299610 }, { "epoch": 116.36, "learning_rate": 4.485695792880259e-06, "loss": 0.0437, "step": 299620 }, { "epoch": 116.36, "learning_rate": 4.485177993527508e-06, "loss": 0.024, "step": 299630 }, { "epoch": 116.37, "learning_rate": 4.484660194174757e-06, "loss": 0.0812, "step": 299640 }, { "epoch": 116.37, "learning_rate": 4.484142394822007e-06, "loss": 0.0641, "step": 299650 }, { "epoch": 116.37, "learning_rate": 4.4836245954692555e-06, "loss": 0.035, "step": 299660 }, { "epoch": 116.38, "learning_rate": 4.483106796116505e-06, "loss": 0.1, "step": 299670 }, { "epoch": 116.38, "learning_rate": 4.482588996763754e-06, "loss": 0.0094, "step": 299680 }, { "epoch": 116.38, "learning_rate": 4.4820711974110035e-06, "loss": 0.0172, "step": 299690 }, { "epoch": 116.39, "learning_rate": 4.481553398058252e-06, "loss": 0.0897, "step": 299700 }, { "epoch": 116.39, "learning_rate": 4.481035598705502e-06, "loss": 0.0003, "step": 299710 }, { "epoch": 116.4, "learning_rate": 4.480517799352751e-06, "loss": 0.085, "step": 299720 }, { "epoch": 116.4, "learning_rate": 4.48e-06, "loss": 0.0052, "step": 299730 }, { "epoch": 116.4, "learning_rate": 4.479482200647249e-06, "loss": 0.0171, "step": 299740 }, { "epoch": 116.41, "learning_rate": 4.478964401294499e-06, "loss": 0.0936, "step": 299750 }, { "epoch": 116.41, "learning_rate": 4.4784466019417475e-06, "loss": 0.0084, "step": 299760 }, { "epoch": 116.42, "learning_rate": 4.477928802588997e-06, "loss": 0.0417, "step": 299770 }, { "epoch": 116.42, "learning_rate": 4.477411003236246e-06, "loss": 0.1116, "step": 299780 }, { "epoch": 116.42, "learning_rate": 4.4768932038834955e-06, "loss": 0.0171, "step": 299790 }, { "epoch": 116.43, "learning_rate": 4.476375404530744e-06, "loss": 0.0118, "step": 299800 }, { "epoch": 116.43, "learning_rate": 4.475857605177994e-06, "loss": 0.1502, "step": 299810 }, { "epoch": 116.43, "learning_rate": 4.475339805825243e-06, "loss": 0.0095, "step": 299820 }, { "epoch": 116.44, "learning_rate": 4.474822006472492e-06, "loss": 0.0266, "step": 299830 }, { "epoch": 116.44, "learning_rate": 4.474304207119741e-06, "loss": 0.1747, "step": 299840 }, { "epoch": 116.45, "learning_rate": 4.473786407766991e-06, "loss": 0.0322, "step": 299850 }, { "epoch": 116.45, "learning_rate": 4.473268608414239e-06, "loss": 0.0068, "step": 299860 }, { "epoch": 116.45, "learning_rate": 4.472750809061489e-06, "loss": 0.0007, "step": 299870 }, { "epoch": 116.46, "learning_rate": 4.472233009708738e-06, "loss": 0.2051, "step": 299880 }, { "epoch": 116.46, "learning_rate": 4.471715210355987e-06, "loss": 0.0329, "step": 299890 }, { "epoch": 116.47, "learning_rate": 4.471197411003236e-06, "loss": 0.0328, "step": 299900 }, { "epoch": 116.47, "learning_rate": 4.470679611650486e-06, "loss": 0.0478, "step": 299910 }, { "epoch": 116.47, "learning_rate": 4.4701618122977354e-06, "loss": 0.0165, "step": 299920 }, { "epoch": 116.48, "learning_rate": 4.469644012944984e-06, "loss": 0.0009, "step": 299930 }, { "epoch": 116.48, "learning_rate": 4.469126213592234e-06, "loss": 0.0435, "step": 299940 }, { "epoch": 116.49, "learning_rate": 4.468608414239483e-06, "loss": 0.1248, "step": 299950 }, { "epoch": 116.49, "learning_rate": 4.468090614886732e-06, "loss": 0.0867, "step": 299960 }, { "epoch": 116.49, "learning_rate": 4.467572815533981e-06, "loss": 0.0337, "step": 299970 }, { "epoch": 116.5, "learning_rate": 4.467055016181231e-06, "loss": 0.0196, "step": 299980 }, { "epoch": 116.5, "learning_rate": 4.466537216828479e-06, "loss": 0.0809, "step": 299990 }, { "epoch": 116.5, "learning_rate": 4.466019417475729e-06, "loss": 0.0393, "step": 300000 }, { "epoch": 116.51, "learning_rate": 4.465501618122978e-06, "loss": 0.0333, "step": 300010 }, { "epoch": 116.51, "learning_rate": 4.464983818770227e-06, "loss": 0.1105, "step": 300020 }, { "epoch": 116.52, "learning_rate": 4.464466019417476e-06, "loss": 0.0455, "step": 300030 }, { "epoch": 116.52, "learning_rate": 4.463948220064726e-06, "loss": 0.0004, "step": 300040 }, { "epoch": 116.52, "learning_rate": 4.4634304207119745e-06, "loss": 0.081, "step": 300050 }, { "epoch": 116.53, "learning_rate": 4.462912621359224e-06, "loss": 0.0003, "step": 300060 }, { "epoch": 116.53, "learning_rate": 4.462394822006473e-06, "loss": 0.0643, "step": 300070 }, { "epoch": 116.54, "learning_rate": 4.4618770226537225e-06, "loss": 0.0873, "step": 300080 }, { "epoch": 116.54, "learning_rate": 4.461359223300971e-06, "loss": 0.1652, "step": 300090 }, { "epoch": 116.54, "learning_rate": 4.460841423948221e-06, "loss": 0.0641, "step": 300100 }, { "epoch": 116.55, "learning_rate": 4.46032362459547e-06, "loss": 0.0024, "step": 300110 }, { "epoch": 116.55, "learning_rate": 4.459805825242719e-06, "loss": 0.1002, "step": 300120 }, { "epoch": 116.56, "learning_rate": 4.459288025889968e-06, "loss": 0.0002, "step": 300130 }, { "epoch": 116.56, "learning_rate": 4.458770226537218e-06, "loss": 0.0063, "step": 300140 }, { "epoch": 116.56, "learning_rate": 4.4582524271844665e-06, "loss": 0.0929, "step": 300150 }, { "epoch": 116.57, "learning_rate": 4.457734627831716e-06, "loss": 0.0004, "step": 300160 }, { "epoch": 116.57, "learning_rate": 4.457216828478965e-06, "loss": 0.0119, "step": 300170 }, { "epoch": 116.57, "learning_rate": 4.4566990291262145e-06, "loss": 0.0088, "step": 300180 }, { "epoch": 116.58, "learning_rate": 4.456181229773463e-06, "loss": 0.0131, "step": 300190 }, { "epoch": 116.58, "learning_rate": 4.455663430420713e-06, "loss": 0.0214, "step": 300200 }, { "epoch": 116.59, "learning_rate": 4.455145631067962e-06, "loss": 0.1053, "step": 300210 }, { "epoch": 116.59, "learning_rate": 4.454627831715211e-06, "loss": 0.1004, "step": 300220 }, { "epoch": 116.59, "learning_rate": 4.45411003236246e-06, "loss": 0.0315, "step": 300230 }, { "epoch": 116.6, "learning_rate": 4.453592233009709e-06, "loss": 0.0001, "step": 300240 }, { "epoch": 116.6, "learning_rate": 4.453074433656958e-06, "loss": 0.0111, "step": 300250 }, { "epoch": 116.61, "learning_rate": 4.452556634304207e-06, "loss": 0.0044, "step": 300260 }, { "epoch": 116.61, "learning_rate": 4.452038834951457e-06, "loss": 0.0078, "step": 300270 }, { "epoch": 116.61, "learning_rate": 4.451521035598706e-06, "loss": 0.0466, "step": 300280 }, { "epoch": 116.62, "learning_rate": 4.451003236245955e-06, "loss": 0.0928, "step": 300290 }, { "epoch": 116.62, "learning_rate": 4.450485436893204e-06, "loss": 0.102, "step": 300300 }, { "epoch": 116.63, "learning_rate": 4.449967637540454e-06, "loss": 0.0977, "step": 300310 }, { "epoch": 116.63, "learning_rate": 4.449449838187702e-06, "loss": 0.0213, "step": 300320 }, { "epoch": 116.63, "learning_rate": 4.448932038834952e-06, "loss": 0.0611, "step": 300330 }, { "epoch": 116.64, "learning_rate": 4.448414239482201e-06, "loss": 0.0184, "step": 300340 }, { "epoch": 116.64, "learning_rate": 4.44789644012945e-06, "loss": 0.039, "step": 300350 }, { "epoch": 116.64, "learning_rate": 4.447378640776699e-06, "loss": 0.0931, "step": 300360 }, { "epoch": 116.65, "learning_rate": 4.446860841423949e-06, "loss": 0.048, "step": 300370 }, { "epoch": 116.65, "learning_rate": 4.4463430420711975e-06, "loss": 0.0508, "step": 300380 }, { "epoch": 116.66, "learning_rate": 4.445825242718447e-06, "loss": 0.0772, "step": 300390 }, { "epoch": 116.66, "learning_rate": 4.445307443365696e-06, "loss": 0.0469, "step": 300400 }, { "epoch": 116.66, "learning_rate": 4.444789644012945e-06, "loss": 0.0045, "step": 300410 }, { "epoch": 116.67, "learning_rate": 4.444271844660194e-06, "loss": 0.101, "step": 300420 }, { "epoch": 116.67, "learning_rate": 4.443754045307443e-06, "loss": 0.0151, "step": 300430 }, { "epoch": 116.68, "learning_rate": 4.443236245954693e-06, "loss": 0.0022, "step": 300440 }, { "epoch": 116.68, "learning_rate": 4.4427184466019415e-06, "loss": 0.0115, "step": 300450 }, { "epoch": 116.68, "learning_rate": 4.442200647249191e-06, "loss": 0.0213, "step": 300460 }, { "epoch": 116.69, "learning_rate": 4.44168284789644e-06, "loss": 0.0008, "step": 300470 }, { "epoch": 116.69, "learning_rate": 4.4411650485436895e-06, "loss": 0.0124, "step": 300480 }, { "epoch": 116.7, "learning_rate": 4.440647249190939e-06, "loss": 0.0265, "step": 300490 }, { "epoch": 116.7, "learning_rate": 4.440129449838188e-06, "loss": 0.1345, "step": 300500 }, { "epoch": 116.7, "learning_rate": 4.4396116504854375e-06, "loss": 0.066, "step": 300510 }, { "epoch": 116.71, "learning_rate": 4.439093851132686e-06, "loss": 0.0521, "step": 300520 }, { "epoch": 116.71, "learning_rate": 4.438576051779936e-06, "loss": 0.0991, "step": 300530 }, { "epoch": 116.71, "learning_rate": 4.438058252427185e-06, "loss": 0.0079, "step": 300540 }, { "epoch": 116.72, "learning_rate": 4.437540453074434e-06, "loss": 0.042, "step": 300550 }, { "epoch": 116.72, "learning_rate": 4.437022653721683e-06, "loss": 0.0221, "step": 300560 }, { "epoch": 116.73, "learning_rate": 4.436504854368933e-06, "loss": 0.0079, "step": 300570 }, { "epoch": 116.73, "learning_rate": 4.435987055016181e-06, "loss": 0.071, "step": 300580 }, { "epoch": 116.73, "learning_rate": 4.435469255663431e-06, "loss": 0.0355, "step": 300590 }, { "epoch": 116.74, "learning_rate": 4.43495145631068e-06, "loss": 0.006, "step": 300600 }, { "epoch": 116.74, "learning_rate": 4.4344336569579294e-06, "loss": 0.1479, "step": 300610 }, { "epoch": 116.75, "learning_rate": 4.433915857605178e-06, "loss": 0.0105, "step": 300620 }, { "epoch": 116.75, "learning_rate": 4.433398058252428e-06, "loss": 0.001, "step": 300630 }, { "epoch": 116.75, "learning_rate": 4.432880258899677e-06, "loss": 0.0436, "step": 300640 }, { "epoch": 116.76, "learning_rate": 4.432362459546926e-06, "loss": 0.0315, "step": 300650 }, { "epoch": 116.76, "learning_rate": 4.431844660194175e-06, "loss": 0.002, "step": 300660 }, { "epoch": 116.77, "learning_rate": 4.431326860841425e-06, "loss": 0.0035, "step": 300670 }, { "epoch": 116.77, "learning_rate": 4.430809061488673e-06, "loss": 0.0511, "step": 300680 }, { "epoch": 116.77, "learning_rate": 4.430291262135923e-06, "loss": 0.2045, "step": 300690 }, { "epoch": 116.78, "learning_rate": 4.429773462783172e-06, "loss": 0.0799, "step": 300700 }, { "epoch": 116.78, "learning_rate": 4.429255663430421e-06, "loss": 0.0447, "step": 300710 }, { "epoch": 116.78, "learning_rate": 4.42873786407767e-06, "loss": 0.0187, "step": 300720 }, { "epoch": 116.79, "learning_rate": 4.42822006472492e-06, "loss": 0.0703, "step": 300730 }, { "epoch": 116.79, "learning_rate": 4.4277022653721685e-06, "loss": 0.0298, "step": 300740 }, { "epoch": 116.8, "learning_rate": 4.427184466019418e-06, "loss": 0.0878, "step": 300750 }, { "epoch": 116.8, "learning_rate": 4.426666666666667e-06, "loss": 0.0034, "step": 300760 }, { "epoch": 116.8, "learning_rate": 4.4261488673139165e-06, "loss": 0.009, "step": 300770 }, { "epoch": 116.81, "learning_rate": 4.425631067961165e-06, "loss": 0.1492, "step": 300780 }, { "epoch": 116.81, "learning_rate": 4.425113268608415e-06, "loss": 0.0201, "step": 300790 }, { "epoch": 116.82, "learning_rate": 4.424595469255664e-06, "loss": 0.0175, "step": 300800 }, { "epoch": 116.82, "learning_rate": 4.424077669902913e-06, "loss": 0.0004, "step": 300810 }, { "epoch": 116.82, "learning_rate": 4.423559870550162e-06, "loss": 0.0101, "step": 300820 }, { "epoch": 116.83, "learning_rate": 4.423042071197412e-06, "loss": 0.0948, "step": 300830 }, { "epoch": 116.83, "learning_rate": 4.4225242718446605e-06, "loss": 0.0405, "step": 300840 }, { "epoch": 116.83, "learning_rate": 4.42200647249191e-06, "loss": 0.1598, "step": 300850 }, { "epoch": 116.84, "learning_rate": 4.421488673139159e-06, "loss": 0.0317, "step": 300860 }, { "epoch": 116.84, "learning_rate": 4.4209708737864085e-06, "loss": 0.0252, "step": 300870 }, { "epoch": 116.85, "learning_rate": 4.420453074433657e-06, "loss": 0.0225, "step": 300880 }, { "epoch": 116.85, "learning_rate": 4.419935275080907e-06, "loss": 0.0956, "step": 300890 }, { "epoch": 116.85, "learning_rate": 4.419417475728156e-06, "loss": 0.0278, "step": 300900 }, { "epoch": 116.86, "learning_rate": 4.418899676375405e-06, "loss": 0.0208, "step": 300910 }, { "epoch": 116.86, "learning_rate": 4.418381877022654e-06, "loss": 0.021, "step": 300920 }, { "epoch": 116.87, "learning_rate": 4.417864077669904e-06, "loss": 0.121, "step": 300930 }, { "epoch": 116.87, "learning_rate": 4.417346278317152e-06, "loss": 0.0678, "step": 300940 }, { "epoch": 116.87, "learning_rate": 4.416828478964402e-06, "loss": 0.0988, "step": 300950 }, { "epoch": 116.88, "learning_rate": 4.416310679611651e-06, "loss": 0.0021, "step": 300960 }, { "epoch": 116.88, "learning_rate": 4.4157928802589004e-06, "loss": 0.094, "step": 300970 }, { "epoch": 116.89, "learning_rate": 4.415275080906149e-06, "loss": 0.0926, "step": 300980 }, { "epoch": 116.89, "learning_rate": 4.414757281553399e-06, "loss": 0.1016, "step": 300990 }, { "epoch": 116.89, "learning_rate": 4.414239482200648e-06, "loss": 0.0228, "step": 301000 }, { "epoch": 116.9, "learning_rate": 4.413721682847897e-06, "loss": 0.0078, "step": 301010 }, { "epoch": 116.9, "learning_rate": 4.413203883495146e-06, "loss": 0.0001, "step": 301020 }, { "epoch": 116.9, "learning_rate": 4.412686084142396e-06, "loss": 0.0257, "step": 301030 }, { "epoch": 116.91, "learning_rate": 4.412168284789644e-06, "loss": 0.0599, "step": 301040 }, { "epoch": 116.91, "learning_rate": 4.411650485436894e-06, "loss": 0.0008, "step": 301050 }, { "epoch": 116.92, "learning_rate": 4.411132686084143e-06, "loss": 0.0001, "step": 301060 }, { "epoch": 116.92, "learning_rate": 4.4106148867313915e-06, "loss": 0.039, "step": 301070 }, { "epoch": 116.92, "learning_rate": 4.410097087378641e-06, "loss": 0.0697, "step": 301080 }, { "epoch": 116.93, "learning_rate": 4.40957928802589e-06, "loss": 0.0142, "step": 301090 }, { "epoch": 116.93, "learning_rate": 4.4090614886731395e-06, "loss": 0.0325, "step": 301100 }, { "epoch": 116.94, "learning_rate": 4.408543689320388e-06, "loss": 0.0279, "step": 301110 }, { "epoch": 116.94, "learning_rate": 4.408025889967638e-06, "loss": 0.2032, "step": 301120 }, { "epoch": 116.94, "learning_rate": 4.407508090614887e-06, "loss": 0.0297, "step": 301130 }, { "epoch": 116.95, "learning_rate": 4.406990291262136e-06, "loss": 0.0002, "step": 301140 }, { "epoch": 116.95, "learning_rate": 4.406472491909385e-06, "loss": 0.1513, "step": 301150 }, { "epoch": 116.96, "learning_rate": 4.405954692556635e-06, "loss": 0.0395, "step": 301160 }, { "epoch": 116.96, "learning_rate": 4.4054368932038835e-06, "loss": 0.1121, "step": 301170 }, { "epoch": 116.96, "learning_rate": 4.404919093851133e-06, "loss": 0.0158, "step": 301180 }, { "epoch": 116.97, "learning_rate": 4.404401294498382e-06, "loss": 0.037, "step": 301190 }, { "epoch": 116.97, "learning_rate": 4.4038834951456315e-06, "loss": 0.0156, "step": 301200 }, { "epoch": 116.97, "learning_rate": 4.40336569579288e-06, "loss": 0.056, "step": 301210 }, { "epoch": 116.98, "learning_rate": 4.40284789644013e-06, "loss": 0.009, "step": 301220 }, { "epoch": 116.98, "learning_rate": 4.402330097087379e-06, "loss": 0.0031, "step": 301230 }, { "epoch": 116.99, "learning_rate": 4.401812297734628e-06, "loss": 0.0228, "step": 301240 }, { "epoch": 116.99, "learning_rate": 4.401294498381877e-06, "loss": 0.0005, "step": 301250 }, { "epoch": 116.99, "learning_rate": 4.400776699029127e-06, "loss": 0.1003, "step": 301260 }, { "epoch": 117.0, "learning_rate": 4.400258899676375e-06, "loss": 0.0437, "step": 301270 }, { "epoch": 117.0, "eval_accuracy": 0.9526822558459422, "eval_loss": 0.3807455897331238, "eval_runtime": 8.1742, "eval_samples_per_second": 444.693, "eval_steps_per_second": 55.663, "step": 301275 }, { "epoch": 117.0, "learning_rate": 4.399741100323625e-06, "loss": 0.0054, "step": 301280 }, { "epoch": 117.01, "learning_rate": 4.399223300970874e-06, "loss": 0.1195, "step": 301290 }, { "epoch": 117.01, "learning_rate": 4.398705501618123e-06, "loss": 0.0154, "step": 301300 }, { "epoch": 117.01, "learning_rate": 4.398187702265372e-06, "loss": 0.0301, "step": 301310 }, { "epoch": 117.02, "learning_rate": 4.397669902912622e-06, "loss": 0.0003, "step": 301320 }, { "epoch": 117.02, "learning_rate": 4.397152103559871e-06, "loss": 0.0659, "step": 301330 }, { "epoch": 117.03, "learning_rate": 4.39663430420712e-06, "loss": 0.0549, "step": 301340 }, { "epoch": 117.03, "learning_rate": 4.396116504854369e-06, "loss": 0.0354, "step": 301350 }, { "epoch": 117.03, "learning_rate": 4.395598705501619e-06, "loss": 0.0231, "step": 301360 }, { "epoch": 117.04, "learning_rate": 4.395080906148867e-06, "loss": 0.0231, "step": 301370 }, { "epoch": 117.04, "learning_rate": 4.394563106796117e-06, "loss": 0.0005, "step": 301380 }, { "epoch": 117.04, "learning_rate": 4.394045307443366e-06, "loss": 0.0208, "step": 301390 }, { "epoch": 117.05, "learning_rate": 4.393527508090615e-06, "loss": 0.0397, "step": 301400 }, { "epoch": 117.05, "learning_rate": 4.393009708737864e-06, "loss": 0.0398, "step": 301410 }, { "epoch": 117.06, "learning_rate": 4.392491909385114e-06, "loss": 0.1056, "step": 301420 }, { "epoch": 117.06, "learning_rate": 4.3919741100323625e-06, "loss": 0.0367, "step": 301430 }, { "epoch": 117.06, "learning_rate": 4.391456310679612e-06, "loss": 0.0201, "step": 301440 }, { "epoch": 117.07, "learning_rate": 4.390938511326861e-06, "loss": 0.0099, "step": 301450 }, { "epoch": 117.07, "learning_rate": 4.3904207119741105e-06, "loss": 0.021, "step": 301460 }, { "epoch": 117.08, "learning_rate": 4.389902912621359e-06, "loss": 0.0411, "step": 301470 }, { "epoch": 117.08, "learning_rate": 4.389385113268609e-06, "loss": 0.1234, "step": 301480 }, { "epoch": 117.08, "learning_rate": 4.388867313915858e-06, "loss": 0.0517, "step": 301490 }, { "epoch": 117.09, "learning_rate": 4.388349514563107e-06, "loss": 0.0117, "step": 301500 }, { "epoch": 117.09, "learning_rate": 4.387831715210356e-06, "loss": 0.0967, "step": 301510 }, { "epoch": 117.1, "learning_rate": 4.387313915857606e-06, "loss": 0.0081, "step": 301520 }, { "epoch": 117.1, "learning_rate": 4.3867961165048545e-06, "loss": 0.0413, "step": 301530 }, { "epoch": 117.1, "learning_rate": 4.386278317152104e-06, "loss": 0.0034, "step": 301540 }, { "epoch": 117.11, "learning_rate": 4.385760517799353e-06, "loss": 0.0025, "step": 301550 }, { "epoch": 117.11, "learning_rate": 4.3852427184466025e-06, "loss": 0.0951, "step": 301560 }, { "epoch": 117.11, "learning_rate": 4.384724919093851e-06, "loss": 0.0074, "step": 301570 }, { "epoch": 117.12, "learning_rate": 4.384207119741101e-06, "loss": 0.0869, "step": 301580 }, { "epoch": 117.12, "learning_rate": 4.38368932038835e-06, "loss": 0.0115, "step": 301590 }, { "epoch": 117.13, "learning_rate": 4.383171521035599e-06, "loss": 0.1069, "step": 301600 }, { "epoch": 117.13, "learning_rate": 4.382653721682849e-06, "loss": 0.0251, "step": 301610 }, { "epoch": 117.13, "learning_rate": 4.382135922330098e-06, "loss": 0.1173, "step": 301620 }, { "epoch": 117.14, "learning_rate": 4.381618122977347e-06, "loss": 0.0443, "step": 301630 }, { "epoch": 117.14, "learning_rate": 4.381100323624596e-06, "loss": 0.0547, "step": 301640 }, { "epoch": 117.15, "learning_rate": 4.380582524271846e-06, "loss": 0.0771, "step": 301650 }, { "epoch": 117.15, "learning_rate": 4.3800647249190944e-06, "loss": 0.0402, "step": 301660 }, { "epoch": 117.15, "learning_rate": 4.379546925566344e-06, "loss": 0.0627, "step": 301670 }, { "epoch": 117.16, "learning_rate": 4.379029126213593e-06, "loss": 0.0697, "step": 301680 }, { "epoch": 117.16, "learning_rate": 4.3785113268608424e-06, "loss": 0.0063, "step": 301690 }, { "epoch": 117.17, "learning_rate": 4.377993527508091e-06, "loss": 0.0157, "step": 301700 }, { "epoch": 117.17, "learning_rate": 4.37747572815534e-06, "loss": 0.0037, "step": 301710 }, { "epoch": 117.17, "learning_rate": 4.37695792880259e-06, "loss": 0.0842, "step": 301720 }, { "epoch": 117.18, "learning_rate": 4.376440129449838e-06, "loss": 0.0966, "step": 301730 }, { "epoch": 117.18, "learning_rate": 4.375922330097088e-06, "loss": 0.0132, "step": 301740 }, { "epoch": 117.18, "learning_rate": 4.375404530744337e-06, "loss": 0.0015, "step": 301750 }, { "epoch": 117.19, "learning_rate": 4.374886731391586e-06, "loss": 0.1428, "step": 301760 }, { "epoch": 117.19, "learning_rate": 4.374368932038835e-06, "loss": 0.0841, "step": 301770 }, { "epoch": 117.2, "learning_rate": 4.373851132686085e-06, "loss": 0.025, "step": 301780 }, { "epoch": 117.2, "learning_rate": 4.3733333333333335e-06, "loss": 0.0475, "step": 301790 }, { "epoch": 117.2, "learning_rate": 4.372815533980583e-06, "loss": 0.0235, "step": 301800 }, { "epoch": 117.21, "learning_rate": 4.372297734627832e-06, "loss": 0.0279, "step": 301810 }, { "epoch": 117.21, "learning_rate": 4.3717799352750815e-06, "loss": 0.0283, "step": 301820 }, { "epoch": 117.22, "learning_rate": 4.37126213592233e-06, "loss": 0.0282, "step": 301830 }, { "epoch": 117.22, "learning_rate": 4.37074433656958e-06, "loss": 0.248, "step": 301840 }, { "epoch": 117.22, "learning_rate": 4.370226537216829e-06, "loss": 0.0486, "step": 301850 }, { "epoch": 117.23, "learning_rate": 4.369708737864078e-06, "loss": 0.067, "step": 301860 }, { "epoch": 117.23, "learning_rate": 4.369190938511327e-06, "loss": 0.0785, "step": 301870 }, { "epoch": 117.23, "learning_rate": 4.368673139158576e-06, "loss": 0.0272, "step": 301880 }, { "epoch": 117.24, "learning_rate": 4.3681553398058255e-06, "loss": 0.0376, "step": 301890 }, { "epoch": 117.24, "learning_rate": 4.367637540453074e-06, "loss": 0.0713, "step": 301900 }, { "epoch": 117.25, "learning_rate": 4.367119741100324e-06, "loss": 0.0409, "step": 301910 }, { "epoch": 117.25, "learning_rate": 4.366601941747573e-06, "loss": 0.1008, "step": 301920 }, { "epoch": 117.25, "learning_rate": 4.366084142394822e-06, "loss": 0.0479, "step": 301930 }, { "epoch": 117.26, "learning_rate": 4.365566343042071e-06, "loss": 0.032, "step": 301940 }, { "epoch": 117.26, "learning_rate": 4.365048543689321e-06, "loss": 0.0823, "step": 301950 }, { "epoch": 117.27, "learning_rate": 4.364530744336569e-06, "loss": 0.0011, "step": 301960 }, { "epoch": 117.27, "learning_rate": 4.364012944983819e-06, "loss": 0.0039, "step": 301970 }, { "epoch": 117.27, "learning_rate": 4.363495145631068e-06, "loss": 0.0517, "step": 301980 }, { "epoch": 117.28, "learning_rate": 4.362977346278317e-06, "loss": 0.049, "step": 301990 }, { "epoch": 117.28, "learning_rate": 4.362459546925566e-06, "loss": 0.0004, "step": 302000 }, { "epoch": 117.29, "learning_rate": 4.361941747572816e-06, "loss": 0.0924, "step": 302010 }, { "epoch": 117.29, "learning_rate": 4.361423948220065e-06, "loss": 0.0334, "step": 302020 }, { "epoch": 117.29, "learning_rate": 4.360906148867314e-06, "loss": 0.1643, "step": 302030 }, { "epoch": 117.3, "learning_rate": 4.360388349514563e-06, "loss": 0.0246, "step": 302040 }, { "epoch": 117.3, "learning_rate": 4.359870550161813e-06, "loss": 0.0856, "step": 302050 }, { "epoch": 117.3, "learning_rate": 4.359352750809061e-06, "loss": 0.0771, "step": 302060 }, { "epoch": 117.31, "learning_rate": 4.358834951456311e-06, "loss": 0.0388, "step": 302070 }, { "epoch": 117.31, "learning_rate": 4.35831715210356e-06, "loss": 0.0006, "step": 302080 }, { "epoch": 117.32, "learning_rate": 4.357799352750809e-06, "loss": 0.0356, "step": 302090 }, { "epoch": 117.32, "learning_rate": 4.357281553398058e-06, "loss": 0.0816, "step": 302100 }, { "epoch": 117.32, "learning_rate": 4.356763754045308e-06, "loss": 0.0846, "step": 302110 }, { "epoch": 117.33, "learning_rate": 4.3562459546925565e-06, "loss": 0.0895, "step": 302120 }, { "epoch": 117.33, "learning_rate": 4.355728155339806e-06, "loss": 0.0012, "step": 302130 }, { "epoch": 117.34, "learning_rate": 4.355210355987055e-06, "loss": 0.02, "step": 302140 }, { "epoch": 117.34, "learning_rate": 4.3546925566343045e-06, "loss": 0.0529, "step": 302150 }, { "epoch": 117.34, "learning_rate": 4.354174757281553e-06, "loss": 0.0109, "step": 302160 }, { "epoch": 117.35, "learning_rate": 4.353656957928803e-06, "loss": 0.0008, "step": 302170 }, { "epoch": 117.35, "learning_rate": 4.3531391585760525e-06, "loss": 0.0337, "step": 302180 }, { "epoch": 117.36, "learning_rate": 4.352621359223301e-06, "loss": 0.0631, "step": 302190 }, { "epoch": 117.36, "learning_rate": 4.352103559870551e-06, "loss": 0.0298, "step": 302200 }, { "epoch": 117.36, "learning_rate": 4.3515857605178e-06, "loss": 0.0162, "step": 302210 }, { "epoch": 117.37, "learning_rate": 4.351067961165049e-06, "loss": 0.0149, "step": 302220 }, { "epoch": 117.37, "learning_rate": 4.350550161812298e-06, "loss": 0.2614, "step": 302230 }, { "epoch": 117.37, "learning_rate": 4.350032362459548e-06, "loss": 0.0691, "step": 302240 }, { "epoch": 117.38, "learning_rate": 4.3495145631067965e-06, "loss": 0.0073, "step": 302250 }, { "epoch": 117.38, "learning_rate": 4.348996763754046e-06, "loss": 0.0321, "step": 302260 }, { "epoch": 117.39, "learning_rate": 4.348478964401295e-06, "loss": 0.0465, "step": 302270 }, { "epoch": 117.39, "learning_rate": 4.3479611650485445e-06, "loss": 0.0006, "step": 302280 }, { "epoch": 117.39, "learning_rate": 4.347443365695793e-06, "loss": 0.0081, "step": 302290 }, { "epoch": 117.4, "learning_rate": 4.346925566343043e-06, "loss": 0.063, "step": 302300 }, { "epoch": 117.4, "learning_rate": 4.346407766990292e-06, "loss": 0.1007, "step": 302310 }, { "epoch": 117.41, "learning_rate": 4.345889967637541e-06, "loss": 0.001, "step": 302320 }, { "epoch": 117.41, "learning_rate": 4.34537216828479e-06, "loss": 0.1071, "step": 302330 }, { "epoch": 117.41, "learning_rate": 4.34485436893204e-06, "loss": 0.0412, "step": 302340 }, { "epoch": 117.42, "learning_rate": 4.3443365695792884e-06, "loss": 0.0971, "step": 302350 }, { "epoch": 117.42, "learning_rate": 4.343818770226538e-06, "loss": 0.1249, "step": 302360 }, { "epoch": 117.43, "learning_rate": 4.343300970873787e-06, "loss": 0.0729, "step": 302370 }, { "epoch": 117.43, "learning_rate": 4.3427831715210364e-06, "loss": 0.0291, "step": 302380 }, { "epoch": 117.43, "learning_rate": 4.342265372168285e-06, "loss": 0.0332, "step": 302390 }, { "epoch": 117.44, "learning_rate": 4.341747572815535e-06, "loss": 0.1386, "step": 302400 }, { "epoch": 117.44, "learning_rate": 4.341229773462784e-06, "loss": 0.0701, "step": 302410 }, { "epoch": 117.44, "learning_rate": 4.340711974110033e-06, "loss": 0.0271, "step": 302420 }, { "epoch": 117.45, "learning_rate": 4.340194174757282e-06, "loss": 0.0456, "step": 302430 }, { "epoch": 117.45, "learning_rate": 4.339676375404532e-06, "loss": 0.0307, "step": 302440 }, { "epoch": 117.46, "learning_rate": 4.33915857605178e-06, "loss": 0.0377, "step": 302450 }, { "epoch": 117.46, "learning_rate": 4.33864077669903e-06, "loss": 0.0522, "step": 302460 }, { "epoch": 117.46, "learning_rate": 4.338122977346279e-06, "loss": 0.065, "step": 302470 }, { "epoch": 117.47, "learning_rate": 4.337605177993528e-06, "loss": 0.0014, "step": 302480 }, { "epoch": 117.47, "learning_rate": 4.337087378640777e-06, "loss": 0.071, "step": 302490 }, { "epoch": 117.48, "learning_rate": 4.336569579288027e-06, "loss": 0.0925, "step": 302500 }, { "epoch": 117.48, "learning_rate": 4.3360517799352755e-06, "loss": 0.054, "step": 302510 }, { "epoch": 117.48, "learning_rate": 4.335533980582524e-06, "loss": 0.0582, "step": 302520 }, { "epoch": 117.49, "learning_rate": 4.335016181229774e-06, "loss": 0.0114, "step": 302530 }, { "epoch": 117.49, "learning_rate": 4.334498381877023e-06, "loss": 0.0934, "step": 302540 }, { "epoch": 117.5, "learning_rate": 4.333980582524272e-06, "loss": 0.0483, "step": 302550 }, { "epoch": 117.5, "learning_rate": 4.333462783171521e-06, "loss": 0.0022, "step": 302560 }, { "epoch": 117.5, "learning_rate": 4.332944983818771e-06, "loss": 0.0728, "step": 302570 }, { "epoch": 117.51, "learning_rate": 4.3324271844660195e-06, "loss": 0.0302, "step": 302580 }, { "epoch": 117.51, "learning_rate": 4.331909385113269e-06, "loss": 0.0155, "step": 302590 }, { "epoch": 117.51, "learning_rate": 4.331391585760518e-06, "loss": 0.0429, "step": 302600 }, { "epoch": 117.52, "learning_rate": 4.3308737864077675e-06, "loss": 0.0001, "step": 302610 }, { "epoch": 117.52, "learning_rate": 4.330355987055016e-06, "loss": 0.0121, "step": 302620 }, { "epoch": 117.53, "learning_rate": 4.329838187702266e-06, "loss": 0.0107, "step": 302630 }, { "epoch": 117.53, "learning_rate": 4.329320388349515e-06, "loss": 0.2105, "step": 302640 }, { "epoch": 117.53, "learning_rate": 4.328802588996764e-06, "loss": 0.0306, "step": 302650 }, { "epoch": 117.54, "learning_rate": 4.328284789644013e-06, "loss": 0.0732, "step": 302660 }, { "epoch": 117.54, "learning_rate": 4.327766990291263e-06, "loss": 0.0966, "step": 302670 }, { "epoch": 117.55, "learning_rate": 4.327249190938511e-06, "loss": 0.0426, "step": 302680 }, { "epoch": 117.55, "learning_rate": 4.32673139158576e-06, "loss": 0.0398, "step": 302690 }, { "epoch": 117.55, "learning_rate": 4.32621359223301e-06, "loss": 0.03, "step": 302700 }, { "epoch": 117.56, "learning_rate": 4.325695792880259e-06, "loss": 0.0699, "step": 302710 }, { "epoch": 117.56, "learning_rate": 4.325177993527508e-06, "loss": 0.0007, "step": 302720 }, { "epoch": 117.57, "learning_rate": 4.324660194174758e-06, "loss": 0.0135, "step": 302730 }, { "epoch": 117.57, "learning_rate": 4.324142394822007e-06, "loss": 0.0017, "step": 302740 }, { "epoch": 117.57, "learning_rate": 4.323624595469256e-06, "loss": 0.0003, "step": 302750 }, { "epoch": 117.58, "learning_rate": 4.323106796116505e-06, "loss": 0.0001, "step": 302760 }, { "epoch": 117.58, "learning_rate": 4.322588996763755e-06, "loss": 0.0085, "step": 302770 }, { "epoch": 117.58, "learning_rate": 4.322071197411003e-06, "loss": 0.0204, "step": 302780 }, { "epoch": 117.59, "learning_rate": 4.321553398058253e-06, "loss": 0.0956, "step": 302790 }, { "epoch": 117.59, "learning_rate": 4.321035598705502e-06, "loss": 0.0897, "step": 302800 }, { "epoch": 117.6, "learning_rate": 4.320517799352751e-06, "loss": 0.0286, "step": 302810 }, { "epoch": 117.6, "learning_rate": 4.32e-06, "loss": 0.0962, "step": 302820 }, { "epoch": 117.6, "learning_rate": 4.31948220064725e-06, "loss": 0.0867, "step": 302830 }, { "epoch": 117.61, "learning_rate": 4.3189644012944985e-06, "loss": 0.0093, "step": 302840 }, { "epoch": 117.61, "learning_rate": 4.318446601941748e-06, "loss": 0.028, "step": 302850 }, { "epoch": 117.62, "learning_rate": 4.317928802588997e-06, "loss": 0.037, "step": 302860 }, { "epoch": 117.62, "learning_rate": 4.3174110032362465e-06, "loss": 0.0125, "step": 302870 }, { "epoch": 117.62, "learning_rate": 4.316893203883495e-06, "loss": 0.01, "step": 302880 }, { "epoch": 117.63, "learning_rate": 4.316375404530745e-06, "loss": 0.0155, "step": 302890 }, { "epoch": 117.63, "learning_rate": 4.315857605177994e-06, "loss": 0.0109, "step": 302900 }, { "epoch": 117.63, "learning_rate": 4.315339805825243e-06, "loss": 0.0011, "step": 302910 }, { "epoch": 117.64, "learning_rate": 4.314822006472492e-06, "loss": 0.0204, "step": 302920 }, { "epoch": 117.64, "learning_rate": 4.314304207119742e-06, "loss": 0.0911, "step": 302930 }, { "epoch": 117.65, "learning_rate": 4.3137864077669905e-06, "loss": 0.0657, "step": 302940 }, { "epoch": 117.65, "learning_rate": 4.31326860841424e-06, "loss": 0.0379, "step": 302950 }, { "epoch": 117.65, "learning_rate": 4.312750809061489e-06, "loss": 0.0004, "step": 302960 }, { "epoch": 117.66, "learning_rate": 4.3122330097087385e-06, "loss": 0.0248, "step": 302970 }, { "epoch": 117.66, "learning_rate": 4.311715210355987e-06, "loss": 0.0341, "step": 302980 }, { "epoch": 117.67, "learning_rate": 4.311197411003237e-06, "loss": 0.1404, "step": 302990 }, { "epoch": 117.67, "learning_rate": 4.310679611650486e-06, "loss": 0.0374, "step": 303000 }, { "epoch": 117.67, "learning_rate": 4.310161812297735e-06, "loss": 0.0081, "step": 303010 }, { "epoch": 117.68, "learning_rate": 4.309644012944984e-06, "loss": 0.0086, "step": 303020 }, { "epoch": 117.68, "learning_rate": 4.309126213592234e-06, "loss": 0.0978, "step": 303030 }, { "epoch": 117.69, "learning_rate": 4.308608414239482e-06, "loss": 0.0009, "step": 303040 }, { "epoch": 117.69, "learning_rate": 4.308090614886732e-06, "loss": 0.0737, "step": 303050 }, { "epoch": 117.69, "learning_rate": 4.307572815533981e-06, "loss": 0.0632, "step": 303060 }, { "epoch": 117.7, "learning_rate": 4.3070550161812304e-06, "loss": 0.1006, "step": 303070 }, { "epoch": 117.7, "learning_rate": 4.306537216828479e-06, "loss": 0.0001, "step": 303080 }, { "epoch": 117.7, "learning_rate": 4.306019417475729e-06, "loss": 0.0162, "step": 303090 }, { "epoch": 117.71, "learning_rate": 4.305501618122978e-06, "loss": 0.0756, "step": 303100 }, { "epoch": 117.71, "learning_rate": 4.304983818770227e-06, "loss": 0.0004, "step": 303110 }, { "epoch": 117.72, "learning_rate": 4.304466019417476e-06, "loss": 0.0002, "step": 303120 }, { "epoch": 117.72, "learning_rate": 4.303948220064726e-06, "loss": 0.1409, "step": 303130 }, { "epoch": 117.72, "learning_rate": 4.303430420711974e-06, "loss": 0.0015, "step": 303140 }, { "epoch": 117.73, "learning_rate": 4.302912621359224e-06, "loss": 0.0766, "step": 303150 }, { "epoch": 117.73, "learning_rate": 4.302394822006473e-06, "loss": 0.0085, "step": 303160 }, { "epoch": 117.74, "learning_rate": 4.301877022653722e-06, "loss": 0.051, "step": 303170 }, { "epoch": 117.74, "learning_rate": 4.301359223300971e-06, "loss": 0.0129, "step": 303180 }, { "epoch": 117.74, "learning_rate": 4.300841423948221e-06, "loss": 0.0554, "step": 303190 }, { "epoch": 117.75, "learning_rate": 4.3003236245954695e-06, "loss": 0.0662, "step": 303200 }, { "epoch": 117.75, "learning_rate": 4.299805825242719e-06, "loss": 0.0523, "step": 303210 }, { "epoch": 117.76, "learning_rate": 4.299288025889968e-06, "loss": 0.0213, "step": 303220 }, { "epoch": 117.76, "learning_rate": 4.2987702265372175e-06, "loss": 0.0591, "step": 303230 }, { "epoch": 117.76, "learning_rate": 4.298252427184466e-06, "loss": 0.0151, "step": 303240 }, { "epoch": 117.77, "learning_rate": 4.297734627831716e-06, "loss": 0.0605, "step": 303250 }, { "epoch": 117.77, "learning_rate": 4.297216828478965e-06, "loss": 0.0586, "step": 303260 }, { "epoch": 117.77, "learning_rate": 4.296699029126214e-06, "loss": 0.1048, "step": 303270 }, { "epoch": 117.78, "learning_rate": 4.296181229773463e-06, "loss": 0.0563, "step": 303280 }, { "epoch": 117.78, "learning_rate": 4.295663430420713e-06, "loss": 0.0564, "step": 303290 }, { "epoch": 117.79, "learning_rate": 4.2951456310679615e-06, "loss": 0.0357, "step": 303300 }, { "epoch": 117.79, "learning_rate": 4.294627831715211e-06, "loss": 0.0383, "step": 303310 }, { "epoch": 117.79, "learning_rate": 4.29411003236246e-06, "loss": 0.0126, "step": 303320 }, { "epoch": 117.8, "learning_rate": 4.2935922330097095e-06, "loss": 0.0793, "step": 303330 }, { "epoch": 117.8, "learning_rate": 4.293074433656958e-06, "loss": 0.0254, "step": 303340 }, { "epoch": 117.81, "learning_rate": 4.292556634304207e-06, "loss": 0.0342, "step": 303350 }, { "epoch": 117.81, "learning_rate": 4.292038834951457e-06, "loss": 0.0966, "step": 303360 }, { "epoch": 117.81, "learning_rate": 4.291521035598705e-06, "loss": 0.0195, "step": 303370 }, { "epoch": 117.82, "learning_rate": 4.291003236245955e-06, "loss": 0.0106, "step": 303380 }, { "epoch": 117.82, "learning_rate": 4.290485436893204e-06, "loss": 0.044, "step": 303390 }, { "epoch": 117.83, "learning_rate": 4.2899676375404534e-06, "loss": 0.0039, "step": 303400 }, { "epoch": 117.83, "learning_rate": 4.289449838187702e-06, "loss": 0.0178, "step": 303410 }, { "epoch": 117.83, "learning_rate": 4.288932038834952e-06, "loss": 0.0066, "step": 303420 }, { "epoch": 117.84, "learning_rate": 4.288414239482201e-06, "loss": 0.0001, "step": 303430 }, { "epoch": 117.84, "learning_rate": 4.28789644012945e-06, "loss": 0.0142, "step": 303440 }, { "epoch": 117.84, "learning_rate": 4.287378640776699e-06, "loss": 0.0006, "step": 303450 }, { "epoch": 117.85, "learning_rate": 4.286860841423949e-06, "loss": 0.0003, "step": 303460 }, { "epoch": 117.85, "learning_rate": 4.286343042071197e-06, "loss": 0.0577, "step": 303470 }, { "epoch": 117.86, "learning_rate": 4.285825242718447e-06, "loss": 0.0182, "step": 303480 }, { "epoch": 117.86, "learning_rate": 4.285307443365696e-06, "loss": 0.0944, "step": 303490 }, { "epoch": 117.86, "learning_rate": 4.284789644012945e-06, "loss": 0.0002, "step": 303500 }, { "epoch": 117.87, "learning_rate": 4.284271844660194e-06, "loss": 0.0004, "step": 303510 }, { "epoch": 117.87, "learning_rate": 4.283754045307444e-06, "loss": 0.0996, "step": 303520 }, { "epoch": 117.88, "learning_rate": 4.2832362459546925e-06, "loss": 0.0091, "step": 303530 }, { "epoch": 117.88, "learning_rate": 4.282718446601942e-06, "loss": 0.0112, "step": 303540 }, { "epoch": 117.88, "learning_rate": 4.282200647249191e-06, "loss": 0.0318, "step": 303550 }, { "epoch": 117.89, "learning_rate": 4.2816828478964405e-06, "loss": 0.0203, "step": 303560 }, { "epoch": 117.89, "learning_rate": 4.281165048543689e-06, "loss": 0.032, "step": 303570 }, { "epoch": 117.9, "learning_rate": 4.280647249190939e-06, "loss": 0.0645, "step": 303580 }, { "epoch": 117.9, "learning_rate": 4.280129449838188e-06, "loss": 0.0208, "step": 303590 }, { "epoch": 117.9, "learning_rate": 4.279611650485437e-06, "loss": 0.0009, "step": 303600 }, { "epoch": 117.91, "learning_rate": 4.279093851132686e-06, "loss": 0.0002, "step": 303610 }, { "epoch": 117.91, "learning_rate": 4.278576051779936e-06, "loss": 0.1258, "step": 303620 }, { "epoch": 117.91, "learning_rate": 4.2780582524271845e-06, "loss": 0.0001, "step": 303630 }, { "epoch": 117.92, "learning_rate": 4.277540453074434e-06, "loss": 0.0218, "step": 303640 }, { "epoch": 117.92, "learning_rate": 4.277022653721683e-06, "loss": 0.1255, "step": 303650 }, { "epoch": 117.93, "learning_rate": 4.2765048543689325e-06, "loss": 0.0002, "step": 303660 }, { "epoch": 117.93, "learning_rate": 4.275987055016181e-06, "loss": 0.0048, "step": 303670 }, { "epoch": 117.93, "learning_rate": 4.275469255663431e-06, "loss": 0.0098, "step": 303680 }, { "epoch": 117.94, "learning_rate": 4.27495145631068e-06, "loss": 0.043, "step": 303690 }, { "epoch": 117.94, "learning_rate": 4.274433656957929e-06, "loss": 0.117, "step": 303700 }, { "epoch": 117.95, "learning_rate": 4.273915857605178e-06, "loss": 0.1114, "step": 303710 }, { "epoch": 117.95, "learning_rate": 4.273398058252428e-06, "loss": 0.0208, "step": 303720 }, { "epoch": 117.95, "learning_rate": 4.272880258899676e-06, "loss": 0.0094, "step": 303730 }, { "epoch": 117.96, "learning_rate": 4.272362459546926e-06, "loss": 0.0897, "step": 303740 }, { "epoch": 117.96, "learning_rate": 4.271844660194175e-06, "loss": 0.0774, "step": 303750 }, { "epoch": 117.97, "learning_rate": 4.2713268608414244e-06, "loss": 0.0479, "step": 303760 }, { "epoch": 117.97, "learning_rate": 4.270809061488673e-06, "loss": 0.0201, "step": 303770 }, { "epoch": 117.97, "learning_rate": 4.270291262135923e-06, "loss": 0.0931, "step": 303780 }, { "epoch": 117.98, "learning_rate": 4.269773462783172e-06, "loss": 0.0348, "step": 303790 }, { "epoch": 117.98, "learning_rate": 4.269255663430421e-06, "loss": 0.059, "step": 303800 }, { "epoch": 117.98, "learning_rate": 4.26873786407767e-06, "loss": 0.0446, "step": 303810 }, { "epoch": 117.99, "learning_rate": 4.26822006472492e-06, "loss": 0.0946, "step": 303820 }, { "epoch": 117.99, "learning_rate": 4.267702265372168e-06, "loss": 0.0402, "step": 303830 }, { "epoch": 118.0, "learning_rate": 4.267184466019418e-06, "loss": 0.0587, "step": 303840 }, { "epoch": 118.0, "learning_rate": 4.266666666666668e-06, "loss": 0.0045, "step": 303850 }, { "epoch": 118.0, "eval_accuracy": 0.9529573590096286, "eval_loss": 0.3821144700050354, "eval_runtime": 8.1852, "eval_samples_per_second": 444.092, "eval_steps_per_second": 55.588, "step": 303850 }, { "epoch": 118.0, "learning_rate": 4.266148867313916e-06, "loss": 0.0002, "step": 303860 }, { "epoch": 118.01, "learning_rate": 4.265631067961166e-06, "loss": 0.1503, "step": 303870 }, { "epoch": 118.01, "learning_rate": 4.265113268608415e-06, "loss": 0.0005, "step": 303880 }, { "epoch": 118.02, "learning_rate": 4.264595469255664e-06, "loss": 0.0208, "step": 303890 }, { "epoch": 118.02, "learning_rate": 4.264077669902913e-06, "loss": 0.1188, "step": 303900 }, { "epoch": 118.02, "learning_rate": 4.263559870550163e-06, "loss": 0.0119, "step": 303910 }, { "epoch": 118.03, "learning_rate": 4.2630420711974115e-06, "loss": 0.0114, "step": 303920 }, { "epoch": 118.03, "learning_rate": 4.262524271844661e-06, "loss": 0.0096, "step": 303930 }, { "epoch": 118.03, "learning_rate": 4.26200647249191e-06, "loss": 0.0455, "step": 303940 }, { "epoch": 118.04, "learning_rate": 4.2614886731391595e-06, "loss": 0.0003, "step": 303950 }, { "epoch": 118.04, "learning_rate": 4.260970873786408e-06, "loss": 0.0546, "step": 303960 }, { "epoch": 118.05, "learning_rate": 4.260453074433658e-06, "loss": 0.1516, "step": 303970 }, { "epoch": 118.05, "learning_rate": 4.259935275080907e-06, "loss": 0.1272, "step": 303980 }, { "epoch": 118.05, "learning_rate": 4.2594174757281555e-06, "loss": 0.0329, "step": 303990 }, { "epoch": 118.06, "learning_rate": 4.258899676375405e-06, "loss": 0.042, "step": 304000 }, { "epoch": 118.06, "learning_rate": 4.258381877022654e-06, "loss": 0.0538, "step": 304010 }, { "epoch": 118.07, "learning_rate": 4.2578640776699035e-06, "loss": 0.0007, "step": 304020 }, { "epoch": 118.07, "learning_rate": 4.257346278317152e-06, "loss": 0.0212, "step": 304030 }, { "epoch": 118.07, "learning_rate": 4.256828478964402e-06, "loss": 0.0011, "step": 304040 }, { "epoch": 118.08, "learning_rate": 4.256310679611651e-06, "loss": 0.0517, "step": 304050 }, { "epoch": 118.08, "learning_rate": 4.2557928802589e-06, "loss": 0.0417, "step": 304060 }, { "epoch": 118.09, "learning_rate": 4.255275080906149e-06, "loss": 0.0395, "step": 304070 }, { "epoch": 118.09, "learning_rate": 4.254757281553399e-06, "loss": 0.004, "step": 304080 }, { "epoch": 118.09, "learning_rate": 4.2542394822006474e-06, "loss": 0.0103, "step": 304090 }, { "epoch": 118.1, "learning_rate": 4.253721682847897e-06, "loss": 0.0593, "step": 304100 }, { "epoch": 118.1, "learning_rate": 4.253203883495146e-06, "loss": 0.0007, "step": 304110 }, { "epoch": 118.1, "learning_rate": 4.2526860841423954e-06, "loss": 0.0007, "step": 304120 }, { "epoch": 118.11, "learning_rate": 4.252168284789644e-06, "loss": 0.0326, "step": 304130 }, { "epoch": 118.11, "learning_rate": 4.251650485436894e-06, "loss": 0.0007, "step": 304140 }, { "epoch": 118.12, "learning_rate": 4.251132686084143e-06, "loss": 0.0466, "step": 304150 }, { "epoch": 118.12, "learning_rate": 4.250614886731391e-06, "loss": 0.0431, "step": 304160 }, { "epoch": 118.12, "learning_rate": 4.250097087378641e-06, "loss": 0.0285, "step": 304170 }, { "epoch": 118.13, "learning_rate": 4.24957928802589e-06, "loss": 0.0241, "step": 304180 }, { "epoch": 118.13, "learning_rate": 4.249061488673139e-06, "loss": 0.0015, "step": 304190 }, { "epoch": 118.14, "learning_rate": 4.248543689320388e-06, "loss": 0.023, "step": 304200 }, { "epoch": 118.14, "learning_rate": 4.248025889967638e-06, "loss": 0.0692, "step": 304210 }, { "epoch": 118.14, "learning_rate": 4.2475080906148865e-06, "loss": 0.0015, "step": 304220 }, { "epoch": 118.15, "learning_rate": 4.246990291262136e-06, "loss": 0.0541, "step": 304230 }, { "epoch": 118.15, "learning_rate": 4.246472491909385e-06, "loss": 0.0779, "step": 304240 }, { "epoch": 118.16, "learning_rate": 4.2459546925566345e-06, "loss": 0.0012, "step": 304250 }, { "epoch": 118.16, "learning_rate": 4.245436893203883e-06, "loss": 0.0217, "step": 304260 }, { "epoch": 118.16, "learning_rate": 4.244919093851133e-06, "loss": 0.0784, "step": 304270 }, { "epoch": 118.17, "learning_rate": 4.244401294498382e-06, "loss": 0.1067, "step": 304280 }, { "epoch": 118.17, "learning_rate": 4.243883495145631e-06, "loss": 0.0602, "step": 304290 }, { "epoch": 118.17, "learning_rate": 4.24336569579288e-06, "loss": 0.0045, "step": 304300 }, { "epoch": 118.18, "learning_rate": 4.24284789644013e-06, "loss": 0.0201, "step": 304310 }, { "epoch": 118.18, "learning_rate": 4.2423300970873785e-06, "loss": 0.0451, "step": 304320 }, { "epoch": 118.19, "learning_rate": 4.241812297734628e-06, "loss": 0.0361, "step": 304330 }, { "epoch": 118.19, "learning_rate": 4.241294498381877e-06, "loss": 0.0171, "step": 304340 }, { "epoch": 118.19, "learning_rate": 4.2407766990291265e-06, "loss": 0.1083, "step": 304350 }, { "epoch": 118.2, "learning_rate": 4.240258899676375e-06, "loss": 0.0357, "step": 304360 }, { "epoch": 118.2, "learning_rate": 4.239741100323625e-06, "loss": 0.065, "step": 304370 }, { "epoch": 118.21, "learning_rate": 4.239223300970874e-06, "loss": 0.0057, "step": 304380 }, { "epoch": 118.21, "learning_rate": 4.238705501618123e-06, "loss": 0.0747, "step": 304390 }, { "epoch": 118.21, "learning_rate": 4.238187702265372e-06, "loss": 0.0098, "step": 304400 }, { "epoch": 118.22, "learning_rate": 4.237669902912622e-06, "loss": 0.0465, "step": 304410 }, { "epoch": 118.22, "learning_rate": 4.237152103559871e-06, "loss": 0.0995, "step": 304420 }, { "epoch": 118.23, "learning_rate": 4.23663430420712e-06, "loss": 0.0428, "step": 304430 }, { "epoch": 118.23, "learning_rate": 4.23611650485437e-06, "loss": 0.0807, "step": 304440 }, { "epoch": 118.23, "learning_rate": 4.2355987055016184e-06, "loss": 0.0828, "step": 304450 }, { "epoch": 118.24, "learning_rate": 4.235080906148868e-06, "loss": 0.0266, "step": 304460 }, { "epoch": 118.24, "learning_rate": 4.234563106796117e-06, "loss": 0.0473, "step": 304470 }, { "epoch": 118.24, "learning_rate": 4.2340453074433664e-06, "loss": 0.0352, "step": 304480 }, { "epoch": 118.25, "learning_rate": 4.233527508090615e-06, "loss": 0.0162, "step": 304490 }, { "epoch": 118.25, "learning_rate": 4.233009708737865e-06, "loss": 0.0332, "step": 304500 }, { "epoch": 118.26, "learning_rate": 4.232491909385114e-06, "loss": 0.0091, "step": 304510 }, { "epoch": 118.26, "learning_rate": 4.231974110032363e-06, "loss": 0.0275, "step": 304520 }, { "epoch": 118.26, "learning_rate": 4.231456310679612e-06, "loss": 0.0921, "step": 304530 }, { "epoch": 118.27, "learning_rate": 4.230938511326862e-06, "loss": 0.0402, "step": 304540 }, { "epoch": 118.27, "learning_rate": 4.23042071197411e-06, "loss": 0.1986, "step": 304550 }, { "epoch": 118.28, "learning_rate": 4.22990291262136e-06, "loss": 0.0733, "step": 304560 }, { "epoch": 118.28, "learning_rate": 4.229385113268609e-06, "loss": 0.0523, "step": 304570 }, { "epoch": 118.28, "learning_rate": 4.228867313915858e-06, "loss": 0.1387, "step": 304580 }, { "epoch": 118.29, "learning_rate": 4.228349514563107e-06, "loss": 0.009, "step": 304590 }, { "epoch": 118.29, "learning_rate": 4.227831715210357e-06, "loss": 0.0562, "step": 304600 }, { "epoch": 118.3, "learning_rate": 4.2273139158576055e-06, "loss": 0.1713, "step": 304610 }, { "epoch": 118.3, "learning_rate": 4.226796116504855e-06, "loss": 0.1227, "step": 304620 }, { "epoch": 118.3, "learning_rate": 4.226278317152104e-06, "loss": 0.1221, "step": 304630 }, { "epoch": 118.31, "learning_rate": 4.2257605177993535e-06, "loss": 0.0108, "step": 304640 }, { "epoch": 118.31, "learning_rate": 4.225242718446602e-06, "loss": 0.1494, "step": 304650 }, { "epoch": 118.31, "learning_rate": 4.224724919093852e-06, "loss": 0.1221, "step": 304660 }, { "epoch": 118.32, "learning_rate": 4.224207119741101e-06, "loss": 0.0123, "step": 304670 }, { "epoch": 118.32, "learning_rate": 4.22368932038835e-06, "loss": 0.0788, "step": 304680 }, { "epoch": 118.33, "learning_rate": 4.223171521035599e-06, "loss": 0.0095, "step": 304690 }, { "epoch": 118.33, "learning_rate": 4.222653721682849e-06, "loss": 0.0616, "step": 304700 }, { "epoch": 118.33, "learning_rate": 4.2221359223300975e-06, "loss": 0.133, "step": 304710 }, { "epoch": 118.34, "learning_rate": 4.221618122977347e-06, "loss": 0.0174, "step": 304720 }, { "epoch": 118.34, "learning_rate": 4.221100323624596e-06, "loss": 0.0079, "step": 304730 }, { "epoch": 118.35, "learning_rate": 4.2205825242718455e-06, "loss": 0.0004, "step": 304740 }, { "epoch": 118.35, "learning_rate": 4.220064724919094e-06, "loss": 0.1355, "step": 304750 }, { "epoch": 118.35, "learning_rate": 4.219546925566344e-06, "loss": 0.0346, "step": 304760 }, { "epoch": 118.36, "learning_rate": 4.219029126213593e-06, "loss": 0.0341, "step": 304770 }, { "epoch": 118.36, "learning_rate": 4.218511326860842e-06, "loss": 0.0807, "step": 304780 }, { "epoch": 118.37, "learning_rate": 4.217993527508091e-06, "loss": 0.0427, "step": 304790 }, { "epoch": 118.37, "learning_rate": 4.21747572815534e-06, "loss": 0.0801, "step": 304800 }, { "epoch": 118.37, "learning_rate": 4.2169579288025894e-06, "loss": 0.0468, "step": 304810 }, { "epoch": 118.38, "learning_rate": 4.216440129449838e-06, "loss": 0.144, "step": 304820 }, { "epoch": 118.38, "learning_rate": 4.215922330097088e-06, "loss": 0.0357, "step": 304830 }, { "epoch": 118.38, "learning_rate": 4.215404530744337e-06, "loss": 0.0089, "step": 304840 }, { "epoch": 118.39, "learning_rate": 4.214886731391586e-06, "loss": 0.0089, "step": 304850 }, { "epoch": 118.39, "learning_rate": 4.214368932038835e-06, "loss": 0.1129, "step": 304860 }, { "epoch": 118.4, "learning_rate": 4.213851132686085e-06, "loss": 0.1442, "step": 304870 }, { "epoch": 118.4, "learning_rate": 4.213333333333333e-06, "loss": 0.0954, "step": 304880 }, { "epoch": 118.4, "learning_rate": 4.212815533980583e-06, "loss": 0.0475, "step": 304890 }, { "epoch": 118.41, "learning_rate": 4.212297734627832e-06, "loss": 0.0468, "step": 304900 }, { "epoch": 118.41, "learning_rate": 4.211779935275081e-06, "loss": 0.0051, "step": 304910 }, { "epoch": 118.42, "learning_rate": 4.21126213592233e-06, "loss": 0.0085, "step": 304920 }, { "epoch": 118.42, "learning_rate": 4.21074433656958e-06, "loss": 0.0608, "step": 304930 }, { "epoch": 118.42, "learning_rate": 4.2102265372168285e-06, "loss": 0.0959, "step": 304940 }, { "epoch": 118.43, "learning_rate": 4.209708737864078e-06, "loss": 0.0552, "step": 304950 }, { "epoch": 118.43, "learning_rate": 4.209190938511327e-06, "loss": 0.0216, "step": 304960 }, { "epoch": 118.43, "learning_rate": 4.2086731391585765e-06, "loss": 0.0514, "step": 304970 }, { "epoch": 118.44, "learning_rate": 4.208155339805825e-06, "loss": 0.0717, "step": 304980 }, { "epoch": 118.44, "learning_rate": 4.207637540453075e-06, "loss": 0.0279, "step": 304990 }, { "epoch": 118.45, "learning_rate": 4.207119741100324e-06, "loss": 0.0001, "step": 305000 }, { "epoch": 118.45, "learning_rate": 4.206601941747573e-06, "loss": 0.0001, "step": 305010 }, { "epoch": 118.45, "learning_rate": 4.206084142394822e-06, "loss": 0.0642, "step": 305020 }, { "epoch": 118.46, "learning_rate": 4.205566343042072e-06, "loss": 0.0986, "step": 305030 }, { "epoch": 118.46, "learning_rate": 4.2050485436893205e-06, "loss": 0.1188, "step": 305040 }, { "epoch": 118.47, "learning_rate": 4.20453074433657e-06, "loss": 0.0729, "step": 305050 }, { "epoch": 118.47, "learning_rate": 4.204012944983819e-06, "loss": 0.0003, "step": 305060 }, { "epoch": 118.47, "learning_rate": 4.2034951456310685e-06, "loss": 0.1166, "step": 305070 }, { "epoch": 118.48, "learning_rate": 4.202977346278317e-06, "loss": 0.0235, "step": 305080 }, { "epoch": 118.48, "learning_rate": 4.202459546925567e-06, "loss": 0.0001, "step": 305090 }, { "epoch": 118.49, "learning_rate": 4.201941747572816e-06, "loss": 0.1036, "step": 305100 }, { "epoch": 118.49, "learning_rate": 4.201423948220065e-06, "loss": 0.0062, "step": 305110 }, { "epoch": 118.49, "learning_rate": 4.200906148867314e-06, "loss": 0.0414, "step": 305120 }, { "epoch": 118.5, "learning_rate": 4.200388349514564e-06, "loss": 0.0174, "step": 305130 }, { "epoch": 118.5, "learning_rate": 4.1998705501618124e-06, "loss": 0.0274, "step": 305140 }, { "epoch": 118.5, "learning_rate": 4.199352750809062e-06, "loss": 0.0925, "step": 305150 }, { "epoch": 118.51, "learning_rate": 4.198834951456311e-06, "loss": 0.0006, "step": 305160 }, { "epoch": 118.51, "learning_rate": 4.1983171521035604e-06, "loss": 0.0119, "step": 305170 }, { "epoch": 118.52, "learning_rate": 4.197799352750809e-06, "loss": 0.0769, "step": 305180 }, { "epoch": 118.52, "learning_rate": 4.197281553398059e-06, "loss": 0.0295, "step": 305190 }, { "epoch": 118.52, "learning_rate": 4.196763754045308e-06, "loss": 0.0265, "step": 305200 }, { "epoch": 118.53, "learning_rate": 4.196245954692557e-06, "loss": 0.0002, "step": 305210 }, { "epoch": 118.53, "learning_rate": 4.195728155339806e-06, "loss": 0.0607, "step": 305220 }, { "epoch": 118.54, "learning_rate": 4.195210355987056e-06, "loss": 0.0715, "step": 305230 }, { "epoch": 118.54, "learning_rate": 4.194692556634304e-06, "loss": 0.0241, "step": 305240 }, { "epoch": 118.54, "learning_rate": 4.194174757281554e-06, "loss": 0.027, "step": 305250 }, { "epoch": 118.55, "learning_rate": 4.193656957928803e-06, "loss": 0.0843, "step": 305260 }, { "epoch": 118.55, "learning_rate": 4.193139158576052e-06, "loss": 0.0001, "step": 305270 }, { "epoch": 118.56, "learning_rate": 4.192621359223301e-06, "loss": 0.0227, "step": 305280 }, { "epoch": 118.56, "learning_rate": 4.192103559870551e-06, "loss": 0.0885, "step": 305290 }, { "epoch": 118.56, "learning_rate": 4.1915857605177995e-06, "loss": 0.003, "step": 305300 }, { "epoch": 118.57, "learning_rate": 4.191067961165049e-06, "loss": 0.0637, "step": 305310 }, { "epoch": 118.57, "learning_rate": 4.190550161812298e-06, "loss": 0.049, "step": 305320 }, { "epoch": 118.57, "learning_rate": 4.1900323624595475e-06, "loss": 0.0568, "step": 305330 }, { "epoch": 118.58, "learning_rate": 4.189514563106796e-06, "loss": 0.1109, "step": 305340 }, { "epoch": 118.58, "learning_rate": 4.188996763754046e-06, "loss": 0.0183, "step": 305350 }, { "epoch": 118.59, "learning_rate": 4.188478964401295e-06, "loss": 0.0189, "step": 305360 }, { "epoch": 118.59, "learning_rate": 4.187961165048544e-06, "loss": 0.0005, "step": 305370 }, { "epoch": 118.59, "learning_rate": 4.187443365695793e-06, "loss": 0.0545, "step": 305380 }, { "epoch": 118.6, "learning_rate": 4.186925566343043e-06, "loss": 0.0882, "step": 305390 }, { "epoch": 118.6, "learning_rate": 4.1864077669902915e-06, "loss": 0.0024, "step": 305400 }, { "epoch": 118.61, "learning_rate": 4.185889967637541e-06, "loss": 0.1319, "step": 305410 }, { "epoch": 118.61, "learning_rate": 4.18537216828479e-06, "loss": 0.0582, "step": 305420 }, { "epoch": 118.61, "learning_rate": 4.1848543689320395e-06, "loss": 0.0006, "step": 305430 }, { "epoch": 118.62, "learning_rate": 4.184336569579288e-06, "loss": 0.0554, "step": 305440 }, { "epoch": 118.62, "learning_rate": 4.183818770226538e-06, "loss": 0.0792, "step": 305450 }, { "epoch": 118.63, "learning_rate": 4.183300970873787e-06, "loss": 0.0244, "step": 305460 }, { "epoch": 118.63, "learning_rate": 4.182783171521036e-06, "loss": 0.1235, "step": 305470 }, { "epoch": 118.63, "learning_rate": 4.182265372168285e-06, "loss": 0.006, "step": 305480 }, { "epoch": 118.64, "learning_rate": 4.181747572815535e-06, "loss": 0.0041, "step": 305490 }, { "epoch": 118.64, "learning_rate": 4.1812297734627834e-06, "loss": 0.0005, "step": 305500 }, { "epoch": 118.64, "learning_rate": 4.180711974110033e-06, "loss": 0.0979, "step": 305510 }, { "epoch": 118.65, "learning_rate": 4.180194174757282e-06, "loss": 0.0397, "step": 305520 }, { "epoch": 118.65, "learning_rate": 4.1796763754045314e-06, "loss": 0.1189, "step": 305530 }, { "epoch": 118.66, "learning_rate": 4.17915857605178e-06, "loss": 0.0145, "step": 305540 }, { "epoch": 118.66, "learning_rate": 4.17864077669903e-06, "loss": 0.0725, "step": 305550 }, { "epoch": 118.66, "learning_rate": 4.178122977346279e-06, "loss": 0.0131, "step": 305560 }, { "epoch": 118.67, "learning_rate": 4.177605177993528e-06, "loss": 0.1086, "step": 305570 }, { "epoch": 118.67, "learning_rate": 4.177087378640777e-06, "loss": 0.1194, "step": 305580 }, { "epoch": 118.68, "learning_rate": 4.176569579288027e-06, "loss": 0.0221, "step": 305590 }, { "epoch": 118.68, "learning_rate": 4.176051779935275e-06, "loss": 0.0527, "step": 305600 }, { "epoch": 118.68, "learning_rate": 4.175533980582525e-06, "loss": 0.1023, "step": 305610 }, { "epoch": 118.69, "learning_rate": 4.175016181229774e-06, "loss": 0.1207, "step": 305620 }, { "epoch": 118.69, "learning_rate": 4.1744983818770225e-06, "loss": 0.0185, "step": 305630 }, { "epoch": 118.7, "learning_rate": 4.173980582524272e-06, "loss": 0.0212, "step": 305640 }, { "epoch": 118.7, "learning_rate": 4.173462783171521e-06, "loss": 0.0644, "step": 305650 }, { "epoch": 118.7, "learning_rate": 4.1729449838187705e-06, "loss": 0.083, "step": 305660 }, { "epoch": 118.71, "learning_rate": 4.172427184466019e-06, "loss": 0.0897, "step": 305670 }, { "epoch": 118.71, "learning_rate": 4.171909385113269e-06, "loss": 0.0191, "step": 305680 }, { "epoch": 118.71, "learning_rate": 4.171391585760518e-06, "loss": 0.157, "step": 305690 }, { "epoch": 118.72, "learning_rate": 4.170873786407767e-06, "loss": 0.1191, "step": 305700 }, { "epoch": 118.72, "learning_rate": 4.170355987055016e-06, "loss": 0.0805, "step": 305710 }, { "epoch": 118.73, "learning_rate": 4.169838187702266e-06, "loss": 0.0028, "step": 305720 }, { "epoch": 118.73, "learning_rate": 4.1693203883495145e-06, "loss": 0.0411, "step": 305730 }, { "epoch": 118.73, "learning_rate": 4.168802588996764e-06, "loss": 0.0039, "step": 305740 }, { "epoch": 118.74, "learning_rate": 4.168284789644013e-06, "loss": 0.0145, "step": 305750 }, { "epoch": 118.74, "learning_rate": 4.1677669902912625e-06, "loss": 0.0601, "step": 305760 }, { "epoch": 118.75, "learning_rate": 4.167249190938511e-06, "loss": 0.0033, "step": 305770 }, { "epoch": 118.75, "learning_rate": 4.166731391585761e-06, "loss": 0.0185, "step": 305780 }, { "epoch": 118.75, "learning_rate": 4.16621359223301e-06, "loss": 0.1451, "step": 305790 }, { "epoch": 118.76, "learning_rate": 4.165695792880259e-06, "loss": 0.0002, "step": 305800 }, { "epoch": 118.76, "learning_rate": 4.165177993527508e-06, "loss": 0.0005, "step": 305810 }, { "epoch": 118.77, "learning_rate": 4.164660194174758e-06, "loss": 0.0253, "step": 305820 }, { "epoch": 118.77, "learning_rate": 4.1641423948220064e-06, "loss": 0.0351, "step": 305830 }, { "epoch": 118.77, "learning_rate": 4.163624595469256e-06, "loss": 0.0942, "step": 305840 }, { "epoch": 118.78, "learning_rate": 4.163106796116505e-06, "loss": 0.1116, "step": 305850 }, { "epoch": 118.78, "learning_rate": 4.1625889967637544e-06, "loss": 0.0565, "step": 305860 }, { "epoch": 118.78, "learning_rate": 4.162071197411003e-06, "loss": 0.0381, "step": 305870 }, { "epoch": 118.79, "learning_rate": 4.161553398058253e-06, "loss": 0.0009, "step": 305880 }, { "epoch": 118.79, "learning_rate": 4.161035598705502e-06, "loss": 0.0158, "step": 305890 }, { "epoch": 118.8, "learning_rate": 4.160517799352751e-06, "loss": 0.1461, "step": 305900 }, { "epoch": 118.8, "learning_rate": 4.16e-06, "loss": 0.0218, "step": 305910 }, { "epoch": 118.8, "learning_rate": 4.15948220064725e-06, "loss": 0.0665, "step": 305920 }, { "epoch": 118.81, "learning_rate": 4.158964401294498e-06, "loss": 0.0784, "step": 305930 }, { "epoch": 118.81, "learning_rate": 4.158446601941748e-06, "loss": 0.0355, "step": 305940 }, { "epoch": 118.82, "learning_rate": 4.157928802588997e-06, "loss": 0.0138, "step": 305950 }, { "epoch": 118.82, "learning_rate": 4.157411003236246e-06, "loss": 0.0007, "step": 305960 }, { "epoch": 118.82, "learning_rate": 4.156893203883495e-06, "loss": 0.0271, "step": 305970 }, { "epoch": 118.83, "learning_rate": 4.156375404530745e-06, "loss": 0.0218, "step": 305980 }, { "epoch": 118.83, "learning_rate": 4.1558576051779935e-06, "loss": 0.1237, "step": 305990 }, { "epoch": 118.83, "learning_rate": 4.155339805825243e-06, "loss": 0.1401, "step": 306000 }, { "epoch": 118.84, "learning_rate": 4.154822006472492e-06, "loss": 0.0014, "step": 306010 }, { "epoch": 118.84, "learning_rate": 4.1543042071197415e-06, "loss": 0.0373, "step": 306020 }, { "epoch": 118.85, "learning_rate": 4.15378640776699e-06, "loss": 0.0186, "step": 306030 }, { "epoch": 118.85, "learning_rate": 4.15326860841424e-06, "loss": 0.1324, "step": 306040 }, { "epoch": 118.85, "learning_rate": 4.152750809061489e-06, "loss": 0.0242, "step": 306050 }, { "epoch": 118.86, "learning_rate": 4.152233009708738e-06, "loss": 0.0107, "step": 306060 }, { "epoch": 118.86, "learning_rate": 4.151715210355987e-06, "loss": 0.0234, "step": 306070 }, { "epoch": 118.87, "learning_rate": 4.151197411003237e-06, "loss": 0.0605, "step": 306080 }, { "epoch": 118.87, "learning_rate": 4.150679611650486e-06, "loss": 0.073, "step": 306090 }, { "epoch": 118.87, "learning_rate": 4.150161812297735e-06, "loss": 0.0009, "step": 306100 }, { "epoch": 118.88, "learning_rate": 4.149644012944985e-06, "loss": 0.0087, "step": 306110 }, { "epoch": 118.88, "learning_rate": 4.1491262135922335e-06, "loss": 0.1229, "step": 306120 }, { "epoch": 118.89, "learning_rate": 4.148608414239483e-06, "loss": 0.035, "step": 306130 }, { "epoch": 118.89, "learning_rate": 4.148090614886732e-06, "loss": 0.0116, "step": 306140 }, { "epoch": 118.89, "learning_rate": 4.1475728155339815e-06, "loss": 0.0346, "step": 306150 }, { "epoch": 118.9, "learning_rate": 4.14705501618123e-06, "loss": 0.0109, "step": 306160 }, { "epoch": 118.9, "learning_rate": 4.14653721682848e-06, "loss": 0.0556, "step": 306170 }, { "epoch": 118.9, "learning_rate": 4.146019417475729e-06, "loss": 0.0864, "step": 306180 }, { "epoch": 118.91, "learning_rate": 4.145501618122978e-06, "loss": 0.0704, "step": 306190 }, { "epoch": 118.91, "learning_rate": 4.144983818770227e-06, "loss": 0.0025, "step": 306200 }, { "epoch": 118.92, "learning_rate": 4.144466019417477e-06, "loss": 0.0759, "step": 306210 }, { "epoch": 118.92, "learning_rate": 4.1439482200647254e-06, "loss": 0.0472, "step": 306220 }, { "epoch": 118.92, "learning_rate": 4.143430420711975e-06, "loss": 0.0013, "step": 306230 }, { "epoch": 118.93, "learning_rate": 4.142912621359224e-06, "loss": 0.0202, "step": 306240 }, { "epoch": 118.93, "learning_rate": 4.1423948220064734e-06, "loss": 0.0166, "step": 306250 }, { "epoch": 118.94, "learning_rate": 4.141877022653722e-06, "loss": 0.0499, "step": 306260 }, { "epoch": 118.94, "learning_rate": 4.141359223300971e-06, "loss": 0.0176, "step": 306270 }, { "epoch": 118.94, "learning_rate": 4.140841423948221e-06, "loss": 0.0106, "step": 306280 }, { "epoch": 118.95, "learning_rate": 4.140323624595469e-06, "loss": 0.0687, "step": 306290 }, { "epoch": 118.95, "learning_rate": 4.139805825242719e-06, "loss": 0.0861, "step": 306300 }, { "epoch": 118.96, "learning_rate": 4.139288025889968e-06, "loss": 0.0203, "step": 306310 }, { "epoch": 118.96, "learning_rate": 4.138770226537217e-06, "loss": 0.0128, "step": 306320 }, { "epoch": 118.96, "learning_rate": 4.138252427184466e-06, "loss": 0.0001, "step": 306330 }, { "epoch": 118.97, "learning_rate": 4.137734627831716e-06, "loss": 0.0974, "step": 306340 }, { "epoch": 118.97, "learning_rate": 4.1372168284789645e-06, "loss": 0.0945, "step": 306350 }, { "epoch": 118.97, "learning_rate": 4.136699029126214e-06, "loss": 0.0162, "step": 306360 }, { "epoch": 118.98, "learning_rate": 4.136181229773463e-06, "loss": 0.0396, "step": 306370 }, { "epoch": 118.98, "learning_rate": 4.1356634304207125e-06, "loss": 0.028, "step": 306380 }, { "epoch": 118.99, "learning_rate": 4.135145631067961e-06, "loss": 0.0649, "step": 306390 }, { "epoch": 118.99, "learning_rate": 4.134627831715211e-06, "loss": 0.0676, "step": 306400 }, { "epoch": 118.99, "learning_rate": 4.13411003236246e-06, "loss": 0.0206, "step": 306410 }, { "epoch": 119.0, "learning_rate": 4.133592233009709e-06, "loss": 0.0015, "step": 306420 }, { "epoch": 119.0, "eval_accuracy": 0.9526822558459422, "eval_loss": 0.38671842217445374, "eval_runtime": 8.2613, "eval_samples_per_second": 440.001, "eval_steps_per_second": 55.076, "step": 306425 }, { "epoch": 119.0, "learning_rate": 4.133074433656958e-06, "loss": 0.1241, "step": 306430 }, { "epoch": 119.01, "learning_rate": 4.132556634304207e-06, "loss": 0.0918, "step": 306440 }, { "epoch": 119.01, "learning_rate": 4.1320388349514565e-06, "loss": 0.0206, "step": 306450 }, { "epoch": 119.01, "learning_rate": 4.131521035598705e-06, "loss": 0.017, "step": 306460 }, { "epoch": 119.02, "learning_rate": 4.131003236245955e-06, "loss": 0.104, "step": 306470 }, { "epoch": 119.02, "learning_rate": 4.130485436893204e-06, "loss": 0.0909, "step": 306480 }, { "epoch": 119.03, "learning_rate": 4.129967637540453e-06, "loss": 0.0111, "step": 306490 }, { "epoch": 119.03, "learning_rate": 4.129449838187702e-06, "loss": 0.0571, "step": 306500 }, { "epoch": 119.03, "learning_rate": 4.128932038834952e-06, "loss": 0.0859, "step": 306510 }, { "epoch": 119.04, "learning_rate": 4.1284142394822004e-06, "loss": 0.0287, "step": 306520 }, { "epoch": 119.04, "learning_rate": 4.12789644012945e-06, "loss": 0.0436, "step": 306530 }, { "epoch": 119.04, "learning_rate": 4.127378640776699e-06, "loss": 0.0402, "step": 306540 }, { "epoch": 119.05, "learning_rate": 4.1268608414239484e-06, "loss": 0.0194, "step": 306550 }, { "epoch": 119.05, "learning_rate": 4.126343042071197e-06, "loss": 0.0212, "step": 306560 }, { "epoch": 119.06, "learning_rate": 4.125825242718447e-06, "loss": 0.1084, "step": 306570 }, { "epoch": 119.06, "learning_rate": 4.125307443365696e-06, "loss": 0.0069, "step": 306580 }, { "epoch": 119.06, "learning_rate": 4.124789644012945e-06, "loss": 0.0594, "step": 306590 }, { "epoch": 119.07, "learning_rate": 4.124271844660194e-06, "loss": 0.0232, "step": 306600 }, { "epoch": 119.07, "learning_rate": 4.123754045307444e-06, "loss": 0.1606, "step": 306610 }, { "epoch": 119.08, "learning_rate": 4.123236245954692e-06, "loss": 0.0362, "step": 306620 }, { "epoch": 119.08, "learning_rate": 4.122718446601942e-06, "loss": 0.053, "step": 306630 }, { "epoch": 119.08, "learning_rate": 4.122200647249191e-06, "loss": 0.0511, "step": 306640 }, { "epoch": 119.09, "learning_rate": 4.12168284789644e-06, "loss": 0.0165, "step": 306650 }, { "epoch": 119.09, "learning_rate": 4.12116504854369e-06, "loss": 0.0001, "step": 306660 }, { "epoch": 119.1, "learning_rate": 4.120647249190939e-06, "loss": 0.0766, "step": 306670 }, { "epoch": 119.1, "learning_rate": 4.120129449838188e-06, "loss": 0.014, "step": 306680 }, { "epoch": 119.1, "learning_rate": 4.119611650485437e-06, "loss": 0.0073, "step": 306690 }, { "epoch": 119.11, "learning_rate": 4.119093851132687e-06, "loss": 0.0608, "step": 306700 }, { "epoch": 119.11, "learning_rate": 4.1185760517799355e-06, "loss": 0.0647, "step": 306710 }, { "epoch": 119.11, "learning_rate": 4.118058252427185e-06, "loss": 0.0724, "step": 306720 }, { "epoch": 119.12, "learning_rate": 4.117540453074434e-06, "loss": 0.0392, "step": 306730 }, { "epoch": 119.12, "learning_rate": 4.1170226537216836e-06, "loss": 0.0351, "step": 306740 }, { "epoch": 119.13, "learning_rate": 4.116504854368932e-06, "loss": 0.0262, "step": 306750 }, { "epoch": 119.13, "learning_rate": 4.115987055016182e-06, "loss": 0.069, "step": 306760 }, { "epoch": 119.13, "learning_rate": 4.115469255663431e-06, "loss": 0.0189, "step": 306770 }, { "epoch": 119.14, "learning_rate": 4.11495145631068e-06, "loss": 0.019, "step": 306780 }, { "epoch": 119.14, "learning_rate": 4.114433656957929e-06, "loss": 0.0055, "step": 306790 }, { "epoch": 119.15, "learning_rate": 4.113915857605179e-06, "loss": 0.0639, "step": 306800 }, { "epoch": 119.15, "learning_rate": 4.1133980582524275e-06, "loss": 0.0474, "step": 306810 }, { "epoch": 119.15, "learning_rate": 4.112880258899677e-06, "loss": 0.0548, "step": 306820 }, { "epoch": 119.16, "learning_rate": 4.112362459546926e-06, "loss": 0.0141, "step": 306830 }, { "epoch": 119.16, "learning_rate": 4.1118446601941755e-06, "loss": 0.0004, "step": 306840 }, { "epoch": 119.17, "learning_rate": 4.111326860841424e-06, "loss": 0.0454, "step": 306850 }, { "epoch": 119.17, "learning_rate": 4.110809061488674e-06, "loss": 0.0711, "step": 306860 }, { "epoch": 119.17, "learning_rate": 4.110291262135923e-06, "loss": 0.1555, "step": 306870 }, { "epoch": 119.18, "learning_rate": 4.109773462783172e-06, "loss": 0.0215, "step": 306880 }, { "epoch": 119.18, "learning_rate": 4.109255663430421e-06, "loss": 0.0926, "step": 306890 }, { "epoch": 119.18, "learning_rate": 4.108737864077671e-06, "loss": 0.0001, "step": 306900 }, { "epoch": 119.19, "learning_rate": 4.1082200647249194e-06, "loss": 0.0552, "step": 306910 }, { "epoch": 119.19, "learning_rate": 4.107702265372169e-06, "loss": 0.0724, "step": 306920 }, { "epoch": 119.2, "learning_rate": 4.107184466019418e-06, "loss": 0.0123, "step": 306930 }, { "epoch": 119.2, "learning_rate": 4.1066666666666674e-06, "loss": 0.0136, "step": 306940 }, { "epoch": 119.2, "learning_rate": 4.106148867313916e-06, "loss": 0.0829, "step": 306950 }, { "epoch": 119.21, "learning_rate": 4.105631067961166e-06, "loss": 0.0025, "step": 306960 }, { "epoch": 119.21, "learning_rate": 4.105113268608415e-06, "loss": 0.0334, "step": 306970 }, { "epoch": 119.22, "learning_rate": 4.104595469255664e-06, "loss": 0.0019, "step": 306980 }, { "epoch": 119.22, "learning_rate": 4.104077669902913e-06, "loss": 0.0009, "step": 306990 }, { "epoch": 119.22, "learning_rate": 4.103559870550163e-06, "loss": 0.0706, "step": 307000 }, { "epoch": 119.23, "learning_rate": 4.103042071197411e-06, "loss": 0.0726, "step": 307010 }, { "epoch": 119.23, "learning_rate": 4.102524271844661e-06, "loss": 0.0475, "step": 307020 }, { "epoch": 119.23, "learning_rate": 4.10200647249191e-06, "loss": 0.0171, "step": 307030 }, { "epoch": 119.24, "learning_rate": 4.101488673139159e-06, "loss": 0.0207, "step": 307040 }, { "epoch": 119.24, "learning_rate": 4.100970873786408e-06, "loss": 0.034, "step": 307050 }, { "epoch": 119.25, "learning_rate": 4.100453074433658e-06, "loss": 0.0997, "step": 307060 }, { "epoch": 119.25, "learning_rate": 4.0999352750809065e-06, "loss": 0.0283, "step": 307070 }, { "epoch": 119.25, "learning_rate": 4.099417475728155e-06, "loss": 0.0649, "step": 307080 }, { "epoch": 119.26, "learning_rate": 4.098899676375405e-06, "loss": 0.0914, "step": 307090 }, { "epoch": 119.26, "learning_rate": 4.098381877022654e-06, "loss": 0.0317, "step": 307100 }, { "epoch": 119.27, "learning_rate": 4.097864077669903e-06, "loss": 0.0287, "step": 307110 }, { "epoch": 119.27, "learning_rate": 4.097346278317152e-06, "loss": 0.0108, "step": 307120 }, { "epoch": 119.27, "learning_rate": 4.096828478964402e-06, "loss": 0.0003, "step": 307130 }, { "epoch": 119.28, "learning_rate": 4.0963106796116505e-06, "loss": 0.0789, "step": 307140 }, { "epoch": 119.28, "learning_rate": 4.0957928802589e-06, "loss": 0.0668, "step": 307150 }, { "epoch": 119.29, "learning_rate": 4.095275080906149e-06, "loss": 0.0091, "step": 307160 }, { "epoch": 119.29, "learning_rate": 4.0947572815533985e-06, "loss": 0.0027, "step": 307170 }, { "epoch": 119.29, "learning_rate": 4.094239482200647e-06, "loss": 0.0008, "step": 307180 }, { "epoch": 119.3, "learning_rate": 4.093721682847897e-06, "loss": 0.0078, "step": 307190 }, { "epoch": 119.3, "learning_rate": 4.093203883495146e-06, "loss": 0.0468, "step": 307200 }, { "epoch": 119.3, "learning_rate": 4.092686084142395e-06, "loss": 0.045, "step": 307210 }, { "epoch": 119.31, "learning_rate": 4.092168284789644e-06, "loss": 0.0067, "step": 307220 }, { "epoch": 119.31, "learning_rate": 4.091650485436894e-06, "loss": 0.0946, "step": 307230 }, { "epoch": 119.32, "learning_rate": 4.0911326860841424e-06, "loss": 0.1151, "step": 307240 }, { "epoch": 119.32, "learning_rate": 4.090614886731392e-06, "loss": 0.0557, "step": 307250 }, { "epoch": 119.32, "learning_rate": 4.090097087378641e-06, "loss": 0.0006, "step": 307260 }, { "epoch": 119.33, "learning_rate": 4.0895792880258904e-06, "loss": 0.0039, "step": 307270 }, { "epoch": 119.33, "learning_rate": 4.089061488673139e-06, "loss": 0.0289, "step": 307280 }, { "epoch": 119.34, "learning_rate": 4.088543689320389e-06, "loss": 0.0002, "step": 307290 }, { "epoch": 119.34, "learning_rate": 4.088025889967638e-06, "loss": 0.098, "step": 307300 }, { "epoch": 119.34, "learning_rate": 4.087508090614887e-06, "loss": 0.0009, "step": 307310 }, { "epoch": 119.35, "learning_rate": 4.086990291262136e-06, "loss": 0.0397, "step": 307320 }, { "epoch": 119.35, "learning_rate": 4.086472491909386e-06, "loss": 0.0319, "step": 307330 }, { "epoch": 119.36, "learning_rate": 4.085954692556634e-06, "loss": 0.0126, "step": 307340 }, { "epoch": 119.36, "learning_rate": 4.085436893203884e-06, "loss": 0.0163, "step": 307350 }, { "epoch": 119.36, "learning_rate": 4.084919093851133e-06, "loss": 0.1154, "step": 307360 }, { "epoch": 119.37, "learning_rate": 4.084401294498382e-06, "loss": 0.0076, "step": 307370 }, { "epoch": 119.37, "learning_rate": 4.083883495145631e-06, "loss": 0.0089, "step": 307380 }, { "epoch": 119.37, "learning_rate": 4.083365695792881e-06, "loss": 0.0822, "step": 307390 }, { "epoch": 119.38, "learning_rate": 4.0828478964401295e-06, "loss": 0.141, "step": 307400 }, { "epoch": 119.38, "learning_rate": 4.082330097087379e-06, "loss": 0.0561, "step": 307410 }, { "epoch": 119.39, "learning_rate": 4.081812297734628e-06, "loss": 0.0112, "step": 307420 }, { "epoch": 119.39, "learning_rate": 4.0812944983818775e-06, "loss": 0.0022, "step": 307430 }, { "epoch": 119.39, "learning_rate": 4.080776699029126e-06, "loss": 0.0573, "step": 307440 }, { "epoch": 119.4, "learning_rate": 4.080258899676376e-06, "loss": 0.0002, "step": 307450 }, { "epoch": 119.4, "learning_rate": 4.079741100323625e-06, "loss": 0.0968, "step": 307460 }, { "epoch": 119.41, "learning_rate": 4.079223300970874e-06, "loss": 0.0001, "step": 307470 }, { "epoch": 119.41, "learning_rate": 4.078705501618123e-06, "loss": 0.0195, "step": 307480 }, { "epoch": 119.41, "learning_rate": 4.078187702265373e-06, "loss": 0.0265, "step": 307490 }, { "epoch": 119.42, "learning_rate": 4.0776699029126215e-06, "loss": 0.0759, "step": 307500 }, { "epoch": 119.42, "learning_rate": 4.077152103559871e-06, "loss": 0.1338, "step": 307510 }, { "epoch": 119.43, "learning_rate": 4.07663430420712e-06, "loss": 0.0728, "step": 307520 }, { "epoch": 119.43, "learning_rate": 4.0761165048543695e-06, "loss": 0.0817, "step": 307530 }, { "epoch": 119.43, "learning_rate": 4.075598705501618e-06, "loss": 0.0916, "step": 307540 }, { "epoch": 119.44, "learning_rate": 4.075080906148868e-06, "loss": 0.0787, "step": 307550 }, { "epoch": 119.44, "learning_rate": 4.074563106796117e-06, "loss": 0.0087, "step": 307560 }, { "epoch": 119.44, "learning_rate": 4.074045307443366e-06, "loss": 0.0006, "step": 307570 }, { "epoch": 119.45, "learning_rate": 4.073527508090615e-06, "loss": 0.0456, "step": 307580 }, { "epoch": 119.45, "learning_rate": 4.073009708737865e-06, "loss": 0.01, "step": 307590 }, { "epoch": 119.46, "learning_rate": 4.0724919093851134e-06, "loss": 0.0018, "step": 307600 }, { "epoch": 119.46, "learning_rate": 4.071974110032363e-06, "loss": 0.0171, "step": 307610 }, { "epoch": 119.46, "learning_rate": 4.071456310679612e-06, "loss": 0.0175, "step": 307620 }, { "epoch": 119.47, "learning_rate": 4.0709385113268614e-06, "loss": 0.0936, "step": 307630 }, { "epoch": 119.47, "learning_rate": 4.07042071197411e-06, "loss": 0.0504, "step": 307640 }, { "epoch": 119.48, "learning_rate": 4.06990291262136e-06, "loss": 0.0891, "step": 307650 }, { "epoch": 119.48, "learning_rate": 4.069385113268609e-06, "loss": 0.0934, "step": 307660 }, { "epoch": 119.48, "learning_rate": 4.068867313915858e-06, "loss": 0.0326, "step": 307670 }, { "epoch": 119.49, "learning_rate": 4.068349514563107e-06, "loss": 0.0499, "step": 307680 }, { "epoch": 119.49, "learning_rate": 4.067831715210357e-06, "loss": 0.1323, "step": 307690 }, { "epoch": 119.5, "learning_rate": 4.067313915857605e-06, "loss": 0.1009, "step": 307700 }, { "epoch": 119.5, "learning_rate": 4.066796116504855e-06, "loss": 0.0562, "step": 307710 }, { "epoch": 119.5, "learning_rate": 4.066278317152104e-06, "loss": 0.0001, "step": 307720 }, { "epoch": 119.51, "learning_rate": 4.065760517799353e-06, "loss": 0.0347, "step": 307730 }, { "epoch": 119.51, "learning_rate": 4.065242718446602e-06, "loss": 0.0327, "step": 307740 }, { "epoch": 119.51, "learning_rate": 4.064724919093852e-06, "loss": 0.0072, "step": 307750 }, { "epoch": 119.52, "learning_rate": 4.0642071197411005e-06, "loss": 0.0722, "step": 307760 }, { "epoch": 119.52, "learning_rate": 4.06368932038835e-06, "loss": 0.0277, "step": 307770 }, { "epoch": 119.53, "learning_rate": 4.063171521035599e-06, "loss": 0.06, "step": 307780 }, { "epoch": 119.53, "learning_rate": 4.0626537216828486e-06, "loss": 0.1064, "step": 307790 }, { "epoch": 119.53, "learning_rate": 4.062135922330097e-06, "loss": 0.0053, "step": 307800 }, { "epoch": 119.54, "learning_rate": 4.061618122977347e-06, "loss": 0.0537, "step": 307810 }, { "epoch": 119.54, "learning_rate": 4.061100323624596e-06, "loss": 0.0127, "step": 307820 }, { "epoch": 119.55, "learning_rate": 4.060582524271845e-06, "loss": 0.0523, "step": 307830 }, { "epoch": 119.55, "learning_rate": 4.060064724919094e-06, "loss": 0.0243, "step": 307840 }, { "epoch": 119.55, "learning_rate": 4.059546925566344e-06, "loss": 0.0804, "step": 307850 }, { "epoch": 119.56, "learning_rate": 4.0590291262135925e-06, "loss": 0.0274, "step": 307860 }, { "epoch": 119.56, "learning_rate": 4.058511326860842e-06, "loss": 0.0016, "step": 307870 }, { "epoch": 119.57, "learning_rate": 4.057993527508091e-06, "loss": 0.0209, "step": 307880 }, { "epoch": 119.57, "learning_rate": 4.0574757281553405e-06, "loss": 0.0006, "step": 307890 }, { "epoch": 119.57, "learning_rate": 4.056957928802589e-06, "loss": 0.0877, "step": 307900 }, { "epoch": 119.58, "learning_rate": 4.056440129449838e-06, "loss": 0.0012, "step": 307910 }, { "epoch": 119.58, "learning_rate": 4.055922330097088e-06, "loss": 0.0294, "step": 307920 }, { "epoch": 119.58, "learning_rate": 4.0554045307443364e-06, "loss": 0.0107, "step": 307930 }, { "epoch": 119.59, "learning_rate": 4.054886731391586e-06, "loss": 0.0022, "step": 307940 }, { "epoch": 119.59, "learning_rate": 4.054368932038835e-06, "loss": 0.0536, "step": 307950 }, { "epoch": 119.6, "learning_rate": 4.0538511326860844e-06, "loss": 0.0256, "step": 307960 }, { "epoch": 119.6, "learning_rate": 4.053333333333333e-06, "loss": 0.0139, "step": 307970 }, { "epoch": 119.6, "learning_rate": 4.052815533980583e-06, "loss": 0.0283, "step": 307980 }, { "epoch": 119.61, "learning_rate": 4.052297734627832e-06, "loss": 0.0828, "step": 307990 }, { "epoch": 119.61, "learning_rate": 4.051779935275081e-06, "loss": 0.1688, "step": 308000 }, { "epoch": 119.62, "learning_rate": 4.05126213592233e-06, "loss": 0.0328, "step": 308010 }, { "epoch": 119.62, "learning_rate": 4.05074433656958e-06, "loss": 0.017, "step": 308020 }, { "epoch": 119.62, "learning_rate": 4.050226537216828e-06, "loss": 0.0001, "step": 308030 }, { "epoch": 119.63, "learning_rate": 4.049708737864078e-06, "loss": 0.0092, "step": 308040 }, { "epoch": 119.63, "learning_rate": 4.049190938511327e-06, "loss": 0.0212, "step": 308050 }, { "epoch": 119.63, "learning_rate": 4.048673139158576e-06, "loss": 0.0356, "step": 308060 }, { "epoch": 119.64, "learning_rate": 4.048155339805825e-06, "loss": 0.0104, "step": 308070 }, { "epoch": 119.64, "learning_rate": 4.047637540453075e-06, "loss": 0.0558, "step": 308080 }, { "epoch": 119.65, "learning_rate": 4.0471197411003235e-06, "loss": 0.0286, "step": 308090 }, { "epoch": 119.65, "learning_rate": 4.046601941747573e-06, "loss": 0.0076, "step": 308100 }, { "epoch": 119.65, "learning_rate": 4.046084142394822e-06, "loss": 0.1497, "step": 308110 }, { "epoch": 119.66, "learning_rate": 4.0455663430420715e-06, "loss": 0.0001, "step": 308120 }, { "epoch": 119.66, "learning_rate": 4.04504854368932e-06, "loss": 0.0376, "step": 308130 }, { "epoch": 119.67, "learning_rate": 4.04453074433657e-06, "loss": 0.0714, "step": 308140 }, { "epoch": 119.67, "learning_rate": 4.044012944983819e-06, "loss": 0.0292, "step": 308150 }, { "epoch": 119.67, "learning_rate": 4.043495145631068e-06, "loss": 0.001, "step": 308160 }, { "epoch": 119.68, "learning_rate": 4.042977346278317e-06, "loss": 0.001, "step": 308170 }, { "epoch": 119.68, "learning_rate": 4.042459546925567e-06, "loss": 0.0402, "step": 308180 }, { "epoch": 119.69, "learning_rate": 4.0419417475728155e-06, "loss": 0.002, "step": 308190 }, { "epoch": 119.69, "learning_rate": 4.041423948220065e-06, "loss": 0.0007, "step": 308200 }, { "epoch": 119.69, "learning_rate": 4.040906148867314e-06, "loss": 0.0417, "step": 308210 }, { "epoch": 119.7, "learning_rate": 4.0403883495145635e-06, "loss": 0.0249, "step": 308220 }, { "epoch": 119.7, "learning_rate": 4.039870550161812e-06, "loss": 0.022, "step": 308230 }, { "epoch": 119.7, "learning_rate": 4.039352750809062e-06, "loss": 0.0258, "step": 308240 }, { "epoch": 119.71, "learning_rate": 4.038834951456311e-06, "loss": 0.1113, "step": 308250 }, { "epoch": 119.71, "learning_rate": 4.03831715210356e-06, "loss": 0.1169, "step": 308260 }, { "epoch": 119.72, "learning_rate": 4.037799352750809e-06, "loss": 0.0002, "step": 308270 }, { "epoch": 119.72, "learning_rate": 4.037281553398059e-06, "loss": 0.0005, "step": 308280 }, { "epoch": 119.72, "learning_rate": 4.0367637540453074e-06, "loss": 0.0002, "step": 308290 }, { "epoch": 119.73, "learning_rate": 4.036245954692557e-06, "loss": 0.1028, "step": 308300 }, { "epoch": 119.73, "learning_rate": 4.035728155339806e-06, "loss": 0.0025, "step": 308310 }, { "epoch": 119.74, "learning_rate": 4.0352103559870554e-06, "loss": 0.0912, "step": 308320 }, { "epoch": 119.74, "learning_rate": 4.034692556634305e-06, "loss": 0.0242, "step": 308330 }, { "epoch": 119.74, "learning_rate": 4.034174757281554e-06, "loss": 0.0373, "step": 308340 }, { "epoch": 119.75, "learning_rate": 4.0336569579288034e-06, "loss": 0.0166, "step": 308350 }, { "epoch": 119.75, "learning_rate": 4.033139158576052e-06, "loss": 0.0517, "step": 308360 }, { "epoch": 119.76, "learning_rate": 4.032621359223302e-06, "loss": 0.0258, "step": 308370 }, { "epoch": 119.76, "learning_rate": 4.032103559870551e-06, "loss": 0.002, "step": 308380 }, { "epoch": 119.76, "learning_rate": 4.0315857605178e-06, "loss": 0.0318, "step": 308390 }, { "epoch": 119.77, "learning_rate": 4.031067961165049e-06, "loss": 0.0502, "step": 308400 }, { "epoch": 119.77, "learning_rate": 4.030550161812299e-06, "loss": 0.0826, "step": 308410 }, { "epoch": 119.77, "learning_rate": 4.030032362459547e-06, "loss": 0.0584, "step": 308420 }, { "epoch": 119.78, "learning_rate": 4.029514563106797e-06, "loss": 0.0171, "step": 308430 }, { "epoch": 119.78, "learning_rate": 4.028996763754046e-06, "loss": 0.1051, "step": 308440 }, { "epoch": 119.79, "learning_rate": 4.028478964401295e-06, "loss": 0.0499, "step": 308450 }, { "epoch": 119.79, "learning_rate": 4.027961165048544e-06, "loss": 0.0305, "step": 308460 }, { "epoch": 119.79, "learning_rate": 4.027443365695794e-06, "loss": 0.0124, "step": 308470 }, { "epoch": 119.8, "learning_rate": 4.0269255663430426e-06, "loss": 0.0804, "step": 308480 }, { "epoch": 119.8, "learning_rate": 4.026407766990292e-06, "loss": 0.0622, "step": 308490 }, { "epoch": 119.81, "learning_rate": 4.025889967637541e-06, "loss": 0.0432, "step": 308500 }, { "epoch": 119.81, "learning_rate": 4.0253721682847906e-06, "loss": 0.0081, "step": 308510 }, { "epoch": 119.81, "learning_rate": 4.024854368932039e-06, "loss": 0.0002, "step": 308520 }, { "epoch": 119.82, "learning_rate": 4.024336569579289e-06, "loss": 0.0312, "step": 308530 }, { "epoch": 119.82, "learning_rate": 4.023818770226538e-06, "loss": 0.1119, "step": 308540 }, { "epoch": 119.83, "learning_rate": 4.0233009708737865e-06, "loss": 0.0014, "step": 308550 }, { "epoch": 119.83, "learning_rate": 4.022783171521036e-06, "loss": 0.0232, "step": 308560 }, { "epoch": 119.83, "learning_rate": 4.022265372168285e-06, "loss": 0.0008, "step": 308570 }, { "epoch": 119.84, "learning_rate": 4.0217475728155345e-06, "loss": 0.0098, "step": 308580 }, { "epoch": 119.84, "learning_rate": 4.021229773462783e-06, "loss": 0.0531, "step": 308590 }, { "epoch": 119.84, "learning_rate": 4.020711974110033e-06, "loss": 0.0743, "step": 308600 }, { "epoch": 119.85, "learning_rate": 4.020194174757282e-06, "loss": 0.0759, "step": 308610 }, { "epoch": 119.85, "learning_rate": 4.019676375404531e-06, "loss": 0.0199, "step": 308620 }, { "epoch": 119.86, "learning_rate": 4.01915857605178e-06, "loss": 0.0176, "step": 308630 }, { "epoch": 119.86, "learning_rate": 4.01864077669903e-06, "loss": 0.0557, "step": 308640 }, { "epoch": 119.86, "learning_rate": 4.0181229773462784e-06, "loss": 0.001, "step": 308650 }, { "epoch": 119.87, "learning_rate": 4.017605177993528e-06, "loss": 0.0003, "step": 308660 }, { "epoch": 119.87, "learning_rate": 4.017087378640777e-06, "loss": 0.009, "step": 308670 }, { "epoch": 119.88, "learning_rate": 4.0165695792880264e-06, "loss": 0.0473, "step": 308680 }, { "epoch": 119.88, "learning_rate": 4.016051779935275e-06, "loss": 0.1472, "step": 308690 }, { "epoch": 119.88, "learning_rate": 4.015533980582525e-06, "loss": 0.0503, "step": 308700 }, { "epoch": 119.89, "learning_rate": 4.015016181229774e-06, "loss": 0.0244, "step": 308710 }, { "epoch": 119.89, "learning_rate": 4.014498381877022e-06, "loss": 0.0692, "step": 308720 }, { "epoch": 119.9, "learning_rate": 4.013980582524272e-06, "loss": 0.0827, "step": 308730 }, { "epoch": 119.9, "learning_rate": 4.013462783171521e-06, "loss": 0.0254, "step": 308740 }, { "epoch": 119.9, "learning_rate": 4.01294498381877e-06, "loss": 0.1048, "step": 308750 }, { "epoch": 119.91, "learning_rate": 4.012427184466019e-06, "loss": 0.0287, "step": 308760 }, { "epoch": 119.91, "learning_rate": 4.011909385113269e-06, "loss": 0.0825, "step": 308770 }, { "epoch": 119.91, "learning_rate": 4.0113915857605175e-06, "loss": 0.0207, "step": 308780 }, { "epoch": 119.92, "learning_rate": 4.010873786407767e-06, "loss": 0.0272, "step": 308790 }, { "epoch": 119.92, "learning_rate": 4.010355987055016e-06, "loss": 0.2165, "step": 308800 }, { "epoch": 119.93, "learning_rate": 4.0098381877022655e-06, "loss": 0.0034, "step": 308810 }, { "epoch": 119.93, "learning_rate": 4.009320388349514e-06, "loss": 0.0831, "step": 308820 }, { "epoch": 119.93, "learning_rate": 4.008802588996764e-06, "loss": 0.0905, "step": 308830 }, { "epoch": 119.94, "learning_rate": 4.008284789644013e-06, "loss": 0.0001, "step": 308840 }, { "epoch": 119.94, "learning_rate": 4.007766990291262e-06, "loss": 0.0549, "step": 308850 }, { "epoch": 119.95, "learning_rate": 4.007249190938511e-06, "loss": 0.0026, "step": 308860 }, { "epoch": 119.95, "learning_rate": 4.006731391585761e-06, "loss": 0.1377, "step": 308870 }, { "epoch": 119.95, "learning_rate": 4.0062135922330095e-06, "loss": 0.0348, "step": 308880 }, { "epoch": 119.96, "learning_rate": 4.005695792880259e-06, "loss": 0.0661, "step": 308890 }, { "epoch": 119.96, "learning_rate": 4.005177993527509e-06, "loss": 0.1015, "step": 308900 }, { "epoch": 119.97, "learning_rate": 4.0046601941747575e-06, "loss": 0.1233, "step": 308910 }, { "epoch": 119.97, "learning_rate": 4.004142394822007e-06, "loss": 0.044, "step": 308920 }, { "epoch": 119.97, "learning_rate": 4.003624595469256e-06, "loss": 0.0094, "step": 308930 }, { "epoch": 119.98, "learning_rate": 4.0031067961165055e-06, "loss": 0.0125, "step": 308940 }, { "epoch": 119.98, "learning_rate": 4.002588996763754e-06, "loss": 0.0628, "step": 308950 }, { "epoch": 119.98, "learning_rate": 4.002071197411004e-06, "loss": 0.0008, "step": 308960 }, { "epoch": 119.99, "learning_rate": 4.001553398058253e-06, "loss": 0.0512, "step": 308970 }, { "epoch": 119.99, "learning_rate": 4.001035598705502e-06, "loss": 0.0168, "step": 308980 }, { "epoch": 120.0, "learning_rate": 4.000517799352751e-06, "loss": 0.0195, "step": 308990 }, { "epoch": 120.0, "learning_rate": 4.000000000000001e-06, "loss": 0.1152, "step": 309000 }, { "epoch": 120.0, "eval_accuracy": 0.9521320495185694, "eval_loss": 0.38419002294540405, "eval_runtime": 8.3091, "eval_samples_per_second": 437.472, "eval_steps_per_second": 54.759, "step": 309000 }, { "epoch": 120.0, "learning_rate": 3.9994822006472494e-06, "loss": 0.0991, "step": 309010 }, { "epoch": 120.01, "learning_rate": 3.998964401294499e-06, "loss": 0.1227, "step": 309020 }, { "epoch": 120.01, "learning_rate": 3.998446601941748e-06, "loss": 0.0653, "step": 309030 }, { "epoch": 120.02, "learning_rate": 3.9979288025889974e-06, "loss": 0.139, "step": 309040 }, { "epoch": 120.02, "learning_rate": 3.997411003236246e-06, "loss": 0.1291, "step": 309050 }, { "epoch": 120.02, "learning_rate": 3.996893203883496e-06, "loss": 0.0001, "step": 309060 }, { "epoch": 120.03, "learning_rate": 3.996375404530745e-06, "loss": 0.0379, "step": 309070 }, { "epoch": 120.03, "learning_rate": 3.995857605177994e-06, "loss": 0.0552, "step": 309080 }, { "epoch": 120.03, "learning_rate": 3.995339805825243e-06, "loss": 0.0618, "step": 309090 }, { "epoch": 120.04, "learning_rate": 3.994822006472493e-06, "loss": 0.1623, "step": 309100 }, { "epoch": 120.04, "learning_rate": 3.994304207119741e-06, "loss": 0.0001, "step": 309110 }, { "epoch": 120.05, "learning_rate": 3.993786407766991e-06, "loss": 0.033, "step": 309120 }, { "epoch": 120.05, "learning_rate": 3.99326860841424e-06, "loss": 0.0223, "step": 309130 }, { "epoch": 120.05, "learning_rate": 3.992750809061489e-06, "loss": 0.0015, "step": 309140 }, { "epoch": 120.06, "learning_rate": 3.992233009708738e-06, "loss": 0.0372, "step": 309150 }, { "epoch": 120.06, "learning_rate": 3.991715210355988e-06, "loss": 0.1175, "step": 309160 }, { "epoch": 120.07, "learning_rate": 3.9911974110032365e-06, "loss": 0.1884, "step": 309170 }, { "epoch": 120.07, "learning_rate": 3.990679611650486e-06, "loss": 0.0226, "step": 309180 }, { "epoch": 120.07, "learning_rate": 3.990161812297735e-06, "loss": 0.0549, "step": 309190 }, { "epoch": 120.08, "learning_rate": 3.9896440129449846e-06, "loss": 0.0871, "step": 309200 }, { "epoch": 120.08, "learning_rate": 3.989126213592233e-06, "loss": 0.1964, "step": 309210 }, { "epoch": 120.09, "learning_rate": 3.988608414239483e-06, "loss": 0.0671, "step": 309220 }, { "epoch": 120.09, "learning_rate": 3.988090614886732e-06, "loss": 0.0345, "step": 309230 }, { "epoch": 120.09, "learning_rate": 3.987572815533981e-06, "loss": 0.0273, "step": 309240 }, { "epoch": 120.1, "learning_rate": 3.98705501618123e-06, "loss": 0.0198, "step": 309250 }, { "epoch": 120.1, "learning_rate": 3.98653721682848e-06, "loss": 0.0088, "step": 309260 }, { "epoch": 120.1, "learning_rate": 3.9860194174757285e-06, "loss": 0.063, "step": 309270 }, { "epoch": 120.11, "learning_rate": 3.985501618122978e-06, "loss": 0.047, "step": 309280 }, { "epoch": 120.11, "learning_rate": 3.984983818770227e-06, "loss": 0.0092, "step": 309290 }, { "epoch": 120.12, "learning_rate": 3.9844660194174765e-06, "loss": 0.0571, "step": 309300 }, { "epoch": 120.12, "learning_rate": 3.983948220064725e-06, "loss": 0.0746, "step": 309310 }, { "epoch": 120.12, "learning_rate": 3.983430420711975e-06, "loss": 0.025, "step": 309320 }, { "epoch": 120.13, "learning_rate": 3.982912621359224e-06, "loss": 0.0226, "step": 309330 }, { "epoch": 120.13, "learning_rate": 3.982394822006473e-06, "loss": 0.1043, "step": 309340 }, { "epoch": 120.14, "learning_rate": 3.981877022653722e-06, "loss": 0.0517, "step": 309350 }, { "epoch": 120.14, "learning_rate": 3.981359223300971e-06, "loss": 0.0497, "step": 309360 }, { "epoch": 120.14, "learning_rate": 3.9808414239482204e-06, "loss": 0.0075, "step": 309370 }, { "epoch": 120.15, "learning_rate": 3.980323624595469e-06, "loss": 0.0088, "step": 309380 }, { "epoch": 120.15, "learning_rate": 3.979805825242719e-06, "loss": 0.1232, "step": 309390 }, { "epoch": 120.16, "learning_rate": 3.979288025889968e-06, "loss": 0.0004, "step": 309400 }, { "epoch": 120.16, "learning_rate": 3.978770226537217e-06, "loss": 0.1045, "step": 309410 }, { "epoch": 120.16, "learning_rate": 3.978252427184466e-06, "loss": 0.0988, "step": 309420 }, { "epoch": 120.17, "learning_rate": 3.977734627831716e-06, "loss": 0.0009, "step": 309430 }, { "epoch": 120.17, "learning_rate": 3.977216828478964e-06, "loss": 0.0608, "step": 309440 }, { "epoch": 120.17, "learning_rate": 3.976699029126214e-06, "loss": 0.0651, "step": 309450 }, { "epoch": 120.18, "learning_rate": 3.976181229773463e-06, "loss": 0.004, "step": 309460 }, { "epoch": 120.18, "learning_rate": 3.975663430420712e-06, "loss": 0.0423, "step": 309470 }, { "epoch": 120.19, "learning_rate": 3.975145631067961e-06, "loss": 0.0643, "step": 309480 }, { "epoch": 120.19, "learning_rate": 3.974627831715211e-06, "loss": 0.042, "step": 309490 }, { "epoch": 120.19, "learning_rate": 3.9741100323624595e-06, "loss": 0.0447, "step": 309500 }, { "epoch": 120.2, "learning_rate": 3.973592233009709e-06, "loss": 0.0612, "step": 309510 }, { "epoch": 120.2, "learning_rate": 3.973074433656958e-06, "loss": 0.0008, "step": 309520 }, { "epoch": 120.21, "learning_rate": 3.9725566343042076e-06, "loss": 0.109, "step": 309530 }, { "epoch": 120.21, "learning_rate": 3.972038834951456e-06, "loss": 0.0013, "step": 309540 }, { "epoch": 120.21, "learning_rate": 3.971521035598706e-06, "loss": 0.0799, "step": 309550 }, { "epoch": 120.22, "learning_rate": 3.971003236245955e-06, "loss": 0.0152, "step": 309560 }, { "epoch": 120.22, "learning_rate": 3.970485436893204e-06, "loss": 0.0951, "step": 309570 }, { "epoch": 120.23, "learning_rate": 3.969967637540453e-06, "loss": 0.0104, "step": 309580 }, { "epoch": 120.23, "learning_rate": 3.969449838187703e-06, "loss": 0.0356, "step": 309590 }, { "epoch": 120.23, "learning_rate": 3.9689320388349515e-06, "loss": 0.0837, "step": 309600 }, { "epoch": 120.24, "learning_rate": 3.968414239482201e-06, "loss": 0.0182, "step": 309610 }, { "epoch": 120.24, "learning_rate": 3.96789644012945e-06, "loss": 0.1028, "step": 309620 }, { "epoch": 120.24, "learning_rate": 3.9673786407766995e-06, "loss": 0.1984, "step": 309630 }, { "epoch": 120.25, "learning_rate": 3.966860841423948e-06, "loss": 0.0561, "step": 309640 }, { "epoch": 120.25, "learning_rate": 3.966343042071198e-06, "loss": 0.019, "step": 309650 }, { "epoch": 120.26, "learning_rate": 3.965825242718447e-06, "loss": 0.115, "step": 309660 }, { "epoch": 120.26, "learning_rate": 3.965307443365696e-06, "loss": 0.0393, "step": 309670 }, { "epoch": 120.26, "learning_rate": 3.964789644012945e-06, "loss": 0.0822, "step": 309680 }, { "epoch": 120.27, "learning_rate": 3.964271844660195e-06, "loss": 0.0858, "step": 309690 }, { "epoch": 120.27, "learning_rate": 3.9637540453074434e-06, "loss": 0.0085, "step": 309700 }, { "epoch": 120.28, "learning_rate": 3.963236245954693e-06, "loss": 0.0644, "step": 309710 }, { "epoch": 120.28, "learning_rate": 3.962718446601942e-06, "loss": 0.1916, "step": 309720 }, { "epoch": 120.28, "learning_rate": 3.9622006472491914e-06, "loss": 0.1138, "step": 309730 }, { "epoch": 120.29, "learning_rate": 3.96168284789644e-06, "loss": 0.0336, "step": 309740 }, { "epoch": 120.29, "learning_rate": 3.96116504854369e-06, "loss": 0.0164, "step": 309750 }, { "epoch": 120.3, "learning_rate": 3.960647249190939e-06, "loss": 0.0296, "step": 309760 }, { "epoch": 120.3, "learning_rate": 3.960129449838188e-06, "loss": 0.1213, "step": 309770 }, { "epoch": 120.3, "learning_rate": 3.959611650485437e-06, "loss": 0.0393, "step": 309780 }, { "epoch": 120.31, "learning_rate": 3.959093851132687e-06, "loss": 0.0003, "step": 309790 }, { "epoch": 120.31, "learning_rate": 3.958576051779935e-06, "loss": 0.1484, "step": 309800 }, { "epoch": 120.31, "learning_rate": 3.958058252427185e-06, "loss": 0.0008, "step": 309810 }, { "epoch": 120.32, "learning_rate": 3.957540453074434e-06, "loss": 0.012, "step": 309820 }, { "epoch": 120.32, "learning_rate": 3.957022653721683e-06, "loss": 0.0438, "step": 309830 }, { "epoch": 120.33, "learning_rate": 3.956504854368932e-06, "loss": 0.0162, "step": 309840 }, { "epoch": 120.33, "learning_rate": 3.955987055016182e-06, "loss": 0.0646, "step": 309850 }, { "epoch": 120.33, "learning_rate": 3.9554692556634305e-06, "loss": 0.0738, "step": 309860 }, { "epoch": 120.34, "learning_rate": 3.95495145631068e-06, "loss": 0.0769, "step": 309870 }, { "epoch": 120.34, "learning_rate": 3.954433656957929e-06, "loss": 0.0524, "step": 309880 }, { "epoch": 120.35, "learning_rate": 3.9539158576051786e-06, "loss": 0.0555, "step": 309890 }, { "epoch": 120.35, "learning_rate": 3.953398058252427e-06, "loss": 0.0893, "step": 309900 }, { "epoch": 120.35, "learning_rate": 3.952880258899677e-06, "loss": 0.1085, "step": 309910 }, { "epoch": 120.36, "learning_rate": 3.952362459546926e-06, "loss": 0.1031, "step": 309920 }, { "epoch": 120.36, "learning_rate": 3.951844660194175e-06, "loss": 0.0242, "step": 309930 }, { "epoch": 120.37, "learning_rate": 3.951326860841424e-06, "loss": 0.0641, "step": 309940 }, { "epoch": 120.37, "learning_rate": 3.950809061488674e-06, "loss": 0.012, "step": 309950 }, { "epoch": 120.37, "learning_rate": 3.9502912621359225e-06, "loss": 0.009, "step": 309960 }, { "epoch": 120.38, "learning_rate": 3.949773462783172e-06, "loss": 0.0599, "step": 309970 }, { "epoch": 120.38, "learning_rate": 3.949255663430421e-06, "loss": 0.0095, "step": 309980 }, { "epoch": 120.38, "learning_rate": 3.9487378640776705e-06, "loss": 0.0274, "step": 309990 }, { "epoch": 120.39, "learning_rate": 3.948220064724919e-06, "loss": 0.0268, "step": 310000 }, { "epoch": 120.39, "learning_rate": 3.947702265372169e-06, "loss": 0.0111, "step": 310010 }, { "epoch": 120.4, "learning_rate": 3.947184466019418e-06, "loss": 0.0018, "step": 310020 }, { "epoch": 120.4, "learning_rate": 3.946666666666667e-06, "loss": 0.0581, "step": 310030 }, { "epoch": 120.4, "learning_rate": 3.946148867313916e-06, "loss": 0.0004, "step": 310040 }, { "epoch": 120.41, "learning_rate": 3.945631067961166e-06, "loss": 0.0353, "step": 310050 }, { "epoch": 120.41, "learning_rate": 3.9451132686084144e-06, "loss": 0.0691, "step": 310060 }, { "epoch": 120.42, "learning_rate": 3.944595469255664e-06, "loss": 0.0812, "step": 310070 }, { "epoch": 120.42, "learning_rate": 3.944077669902913e-06, "loss": 0.0009, "step": 310080 }, { "epoch": 120.42, "learning_rate": 3.9435598705501624e-06, "loss": 0.029, "step": 310090 }, { "epoch": 120.43, "learning_rate": 3.943042071197411e-06, "loss": 0.0476, "step": 310100 }, { "epoch": 120.43, "learning_rate": 3.942524271844661e-06, "loss": 0.1185, "step": 310110 }, { "epoch": 120.43, "learning_rate": 3.94200647249191e-06, "loss": 0.039, "step": 310120 }, { "epoch": 120.44, "learning_rate": 3.941488673139159e-06, "loss": 0.0474, "step": 310130 }, { "epoch": 120.44, "learning_rate": 3.940970873786408e-06, "loss": 0.0021, "step": 310140 }, { "epoch": 120.45, "learning_rate": 3.940453074433658e-06, "loss": 0.0267, "step": 310150 }, { "epoch": 120.45, "learning_rate": 3.939935275080906e-06, "loss": 0.0298, "step": 310160 }, { "epoch": 120.45, "learning_rate": 3.939417475728156e-06, "loss": 0.0253, "step": 310170 }, { "epoch": 120.46, "learning_rate": 3.938899676375405e-06, "loss": 0.0215, "step": 310180 }, { "epoch": 120.46, "learning_rate": 3.9383818770226535e-06, "loss": 0.1199, "step": 310190 }, { "epoch": 120.47, "learning_rate": 3.937864077669903e-06, "loss": 0.01, "step": 310200 }, { "epoch": 120.47, "learning_rate": 3.937346278317152e-06, "loss": 0.0228, "step": 310210 }, { "epoch": 120.47, "learning_rate": 3.9368284789644016e-06, "loss": 0.0005, "step": 310220 }, { "epoch": 120.48, "learning_rate": 3.93631067961165e-06, "loss": 0.0408, "step": 310230 }, { "epoch": 120.48, "learning_rate": 3.9357928802589e-06, "loss": 0.0137, "step": 310240 }, { "epoch": 120.49, "learning_rate": 3.935275080906149e-06, "loss": 0.072, "step": 310250 }, { "epoch": 120.49, "learning_rate": 3.934757281553398e-06, "loss": 0.0013, "step": 310260 }, { "epoch": 120.49, "learning_rate": 3.934239482200647e-06, "loss": 0.0001, "step": 310270 }, { "epoch": 120.5, "learning_rate": 3.933721682847897e-06, "loss": 0.0776, "step": 310280 }, { "epoch": 120.5, "learning_rate": 3.9332038834951455e-06, "loss": 0.0891, "step": 310290 }, { "epoch": 120.5, "learning_rate": 3.932686084142395e-06, "loss": 0.1022, "step": 310300 }, { "epoch": 120.51, "learning_rate": 3.932168284789644e-06, "loss": 0.0229, "step": 310310 }, { "epoch": 120.51, "learning_rate": 3.9316504854368935e-06, "loss": 0.0968, "step": 310320 }, { "epoch": 120.52, "learning_rate": 3.931132686084142e-06, "loss": 0.0639, "step": 310330 }, { "epoch": 120.52, "learning_rate": 3.930614886731392e-06, "loss": 0.0247, "step": 310340 }, { "epoch": 120.52, "learning_rate": 3.930097087378641e-06, "loss": 0.0444, "step": 310350 }, { "epoch": 120.53, "learning_rate": 3.92957928802589e-06, "loss": 0.0191, "step": 310360 }, { "epoch": 120.53, "learning_rate": 3.929061488673139e-06, "loss": 0.0067, "step": 310370 }, { "epoch": 120.54, "learning_rate": 3.928543689320389e-06, "loss": 0.028, "step": 310380 }, { "epoch": 120.54, "learning_rate": 3.9280258899676374e-06, "loss": 0.0686, "step": 310390 }, { "epoch": 120.54, "learning_rate": 3.927508090614887e-06, "loss": 0.0686, "step": 310400 }, { "epoch": 120.55, "learning_rate": 3.926990291262136e-06, "loss": 0.0642, "step": 310410 }, { "epoch": 120.55, "learning_rate": 3.9264724919093854e-06, "loss": 0.0103, "step": 310420 }, { "epoch": 120.56, "learning_rate": 3.925954692556634e-06, "loss": 0.0868, "step": 310430 }, { "epoch": 120.56, "learning_rate": 3.925436893203884e-06, "loss": 0.013, "step": 310440 }, { "epoch": 120.56, "learning_rate": 3.924919093851133e-06, "loss": 0.001, "step": 310450 }, { "epoch": 120.57, "learning_rate": 3.924401294498382e-06, "loss": 0.0443, "step": 310460 }, { "epoch": 120.57, "learning_rate": 3.923883495145631e-06, "loss": 0.0304, "step": 310470 }, { "epoch": 120.57, "learning_rate": 3.923365695792881e-06, "loss": 0.1446, "step": 310480 }, { "epoch": 120.58, "learning_rate": 3.922847896440129e-06, "loss": 0.0019, "step": 310490 }, { "epoch": 120.58, "learning_rate": 3.922330097087379e-06, "loss": 0.0017, "step": 310500 }, { "epoch": 120.59, "learning_rate": 3.921812297734628e-06, "loss": 0.0251, "step": 310510 }, { "epoch": 120.59, "learning_rate": 3.921294498381877e-06, "loss": 0.0001, "step": 310520 }, { "epoch": 120.59, "learning_rate": 3.920776699029126e-06, "loss": 0.0092, "step": 310530 }, { "epoch": 120.6, "learning_rate": 3.920258899676376e-06, "loss": 0.0511, "step": 310540 }, { "epoch": 120.6, "learning_rate": 3.9197411003236245e-06, "loss": 0.0777, "step": 310550 }, { "epoch": 120.61, "learning_rate": 3.919223300970874e-06, "loss": 0.002, "step": 310560 }, { "epoch": 120.61, "learning_rate": 3.918705501618124e-06, "loss": 0.074, "step": 310570 }, { "epoch": 120.61, "learning_rate": 3.9181877022653726e-06, "loss": 0.0773, "step": 310580 }, { "epoch": 120.62, "learning_rate": 3.917669902912622e-06, "loss": 0.0945, "step": 310590 }, { "epoch": 120.62, "learning_rate": 3.917152103559871e-06, "loss": 0.0564, "step": 310600 }, { "epoch": 120.63, "learning_rate": 3.9166343042071206e-06, "loss": 0.1149, "step": 310610 }, { "epoch": 120.63, "learning_rate": 3.916116504854369e-06, "loss": 0.0801, "step": 310620 }, { "epoch": 120.63, "learning_rate": 3.915598705501619e-06, "loss": 0.0393, "step": 310630 }, { "epoch": 120.64, "learning_rate": 3.915080906148868e-06, "loss": 0.0008, "step": 310640 }, { "epoch": 120.64, "learning_rate": 3.914563106796117e-06, "loss": 0.0004, "step": 310650 }, { "epoch": 120.64, "learning_rate": 3.914045307443366e-06, "loss": 0.0655, "step": 310660 }, { "epoch": 120.65, "learning_rate": 3.913527508090616e-06, "loss": 0.1668, "step": 310670 }, { "epoch": 120.65, "learning_rate": 3.9130097087378645e-06, "loss": 0.071, "step": 310680 }, { "epoch": 120.66, "learning_rate": 3.912491909385114e-06, "loss": 0.0205, "step": 310690 }, { "epoch": 120.66, "learning_rate": 3.911974110032363e-06, "loss": 0.0004, "step": 310700 }, { "epoch": 120.66, "learning_rate": 3.9114563106796125e-06, "loss": 0.0244, "step": 310710 }, { "epoch": 120.67, "learning_rate": 3.910938511326861e-06, "loss": 0.082, "step": 310720 }, { "epoch": 120.67, "learning_rate": 3.910420711974111e-06, "loss": 0.0556, "step": 310730 }, { "epoch": 120.68, "learning_rate": 3.90990291262136e-06, "loss": 0.0794, "step": 310740 }, { "epoch": 120.68, "learning_rate": 3.909385113268609e-06, "loss": 0.1021, "step": 310750 }, { "epoch": 120.68, "learning_rate": 3.908867313915858e-06, "loss": 0.0287, "step": 310760 }, { "epoch": 120.69, "learning_rate": 3.908349514563108e-06, "loss": 0.0104, "step": 310770 }, { "epoch": 120.69, "learning_rate": 3.9078317152103564e-06, "loss": 0.0119, "step": 310780 }, { "epoch": 120.7, "learning_rate": 3.907313915857606e-06, "loss": 0.0432, "step": 310790 }, { "epoch": 120.7, "learning_rate": 3.906796116504855e-06, "loss": 0.0329, "step": 310800 }, { "epoch": 120.7, "learning_rate": 3.9062783171521045e-06, "loss": 0.0483, "step": 310810 }, { "epoch": 120.71, "learning_rate": 3.905760517799353e-06, "loss": 0.0799, "step": 310820 }, { "epoch": 120.71, "learning_rate": 3.905242718446602e-06, "loss": 0.0445, "step": 310830 }, { "epoch": 120.71, "learning_rate": 3.904724919093852e-06, "loss": 0.0173, "step": 310840 }, { "epoch": 120.72, "learning_rate": 3.9042071197411e-06, "loss": 0.0164, "step": 310850 }, { "epoch": 120.72, "learning_rate": 3.90368932038835e-06, "loss": 0.0321, "step": 310860 }, { "epoch": 120.73, "learning_rate": 3.903171521035599e-06, "loss": 0.1408, "step": 310870 }, { "epoch": 120.73, "learning_rate": 3.902653721682848e-06, "loss": 0.0638, "step": 310880 }, { "epoch": 120.73, "learning_rate": 3.902135922330097e-06, "loss": 0.0544, "step": 310890 }, { "epoch": 120.74, "learning_rate": 3.901618122977347e-06, "loss": 0.0547, "step": 310900 }, { "epoch": 120.74, "learning_rate": 3.9011003236245955e-06, "loss": 0.0487, "step": 310910 }, { "epoch": 120.75, "learning_rate": 3.900582524271845e-06, "loss": 0.0001, "step": 310920 }, { "epoch": 120.75, "learning_rate": 3.900064724919094e-06, "loss": 0.0019, "step": 310930 }, { "epoch": 120.75, "learning_rate": 3.8995469255663436e-06, "loss": 0.0482, "step": 310940 }, { "epoch": 120.76, "learning_rate": 3.899029126213592e-06, "loss": 0.0643, "step": 310950 }, { "epoch": 120.76, "learning_rate": 3.898511326860842e-06, "loss": 0.0272, "step": 310960 }, { "epoch": 120.77, "learning_rate": 3.897993527508091e-06, "loss": 0.0316, "step": 310970 }, { "epoch": 120.77, "learning_rate": 3.89747572815534e-06, "loss": 0.0001, "step": 310980 }, { "epoch": 120.77, "learning_rate": 3.896957928802589e-06, "loss": 0.0393, "step": 310990 }, { "epoch": 120.78, "learning_rate": 3.896440129449838e-06, "loss": 0.0419, "step": 311000 }, { "epoch": 120.78, "learning_rate": 3.8959223300970875e-06, "loss": 0.0058, "step": 311010 }, { "epoch": 120.78, "learning_rate": 3.895404530744336e-06, "loss": 0.0799, "step": 311020 }, { "epoch": 120.79, "learning_rate": 3.894886731391586e-06, "loss": 0.0186, "step": 311030 }, { "epoch": 120.79, "learning_rate": 3.894368932038835e-06, "loss": 0.0195, "step": 311040 }, { "epoch": 120.8, "learning_rate": 3.893851132686084e-06, "loss": 0.0215, "step": 311050 }, { "epoch": 120.8, "learning_rate": 3.893333333333333e-06, "loss": 0.0001, "step": 311060 }, { "epoch": 120.8, "learning_rate": 3.892815533980583e-06, "loss": 0.0381, "step": 311070 }, { "epoch": 120.81, "learning_rate": 3.8922977346278314e-06, "loss": 0.0172, "step": 311080 }, { "epoch": 120.81, "learning_rate": 3.891779935275081e-06, "loss": 0.0374, "step": 311090 }, { "epoch": 120.82, "learning_rate": 3.89126213592233e-06, "loss": 0.0001, "step": 311100 }, { "epoch": 120.82, "learning_rate": 3.8907443365695794e-06, "loss": 0.0239, "step": 311110 }, { "epoch": 120.82, "learning_rate": 3.890226537216828e-06, "loss": 0.045, "step": 311120 }, { "epoch": 120.83, "learning_rate": 3.889708737864078e-06, "loss": 0.0087, "step": 311130 }, { "epoch": 120.83, "learning_rate": 3.8891909385113274e-06, "loss": 0.0076, "step": 311140 }, { "epoch": 120.83, "learning_rate": 3.888673139158576e-06, "loss": 0.0535, "step": 311150 }, { "epoch": 120.84, "learning_rate": 3.888155339805826e-06, "loss": 0.0083, "step": 311160 }, { "epoch": 120.84, "learning_rate": 3.887637540453075e-06, "loss": 0.0842, "step": 311170 }, { "epoch": 120.85, "learning_rate": 3.887119741100324e-06, "loss": 0.0441, "step": 311180 }, { "epoch": 120.85, "learning_rate": 3.886601941747573e-06, "loss": 0.0016, "step": 311190 }, { "epoch": 120.85, "learning_rate": 3.886084142394823e-06, "loss": 0.0099, "step": 311200 }, { "epoch": 120.86, "learning_rate": 3.885566343042071e-06, "loss": 0.0285, "step": 311210 }, { "epoch": 120.86, "learning_rate": 3.885048543689321e-06, "loss": 0.0004, "step": 311220 }, { "epoch": 120.87, "learning_rate": 3.88453074433657e-06, "loss": 0.0173, "step": 311230 }, { "epoch": 120.87, "learning_rate": 3.884012944983819e-06, "loss": 0.0033, "step": 311240 }, { "epoch": 120.87, "learning_rate": 3.883495145631068e-06, "loss": 0.048, "step": 311250 }, { "epoch": 120.88, "learning_rate": 3.882977346278318e-06, "loss": 0.0602, "step": 311260 }, { "epoch": 120.88, "learning_rate": 3.8824595469255666e-06, "loss": 0.1591, "step": 311270 }, { "epoch": 120.89, "learning_rate": 3.881941747572816e-06, "loss": 0.006, "step": 311280 }, { "epoch": 120.89, "learning_rate": 3.881423948220065e-06, "loss": 0.0416, "step": 311290 }, { "epoch": 120.89, "learning_rate": 3.8809061488673146e-06, "loss": 0.0435, "step": 311300 }, { "epoch": 120.9, "learning_rate": 3.880388349514563e-06, "loss": 0.0243, "step": 311310 }, { "epoch": 120.9, "learning_rate": 3.879870550161813e-06, "loss": 0.0627, "step": 311320 }, { "epoch": 120.9, "learning_rate": 3.879352750809062e-06, "loss": 0.0698, "step": 311330 }, { "epoch": 120.91, "learning_rate": 3.878834951456311e-06, "loss": 0.0004, "step": 311340 }, { "epoch": 120.91, "learning_rate": 3.87831715210356e-06, "loss": 0.1194, "step": 311350 }, { "epoch": 120.92, "learning_rate": 3.87779935275081e-06, "loss": 0.0171, "step": 311360 }, { "epoch": 120.92, "learning_rate": 3.8772815533980585e-06, "loss": 0.1213, "step": 311370 }, { "epoch": 120.92, "learning_rate": 3.876763754045308e-06, "loss": 0.1258, "step": 311380 }, { "epoch": 120.93, "learning_rate": 3.876245954692557e-06, "loss": 0.0001, "step": 311390 }, { "epoch": 120.93, "learning_rate": 3.8757281553398065e-06, "loss": 0.026, "step": 311400 }, { "epoch": 120.94, "learning_rate": 3.875210355987055e-06, "loss": 0.0632, "step": 311410 }, { "epoch": 120.94, "learning_rate": 3.874692556634305e-06, "loss": 0.0089, "step": 311420 }, { "epoch": 120.94, "learning_rate": 3.874174757281554e-06, "loss": 0.0113, "step": 311430 }, { "epoch": 120.95, "learning_rate": 3.873656957928803e-06, "loss": 0.0029, "step": 311440 }, { "epoch": 120.95, "learning_rate": 3.873139158576052e-06, "loss": 0.0659, "step": 311450 }, { "epoch": 120.96, "learning_rate": 3.872621359223302e-06, "loss": 0.0548, "step": 311460 }, { "epoch": 120.96, "learning_rate": 3.8721035598705504e-06, "loss": 0.0002, "step": 311470 }, { "epoch": 120.96, "learning_rate": 3.8715857605178e-06, "loss": 0.1502, "step": 311480 }, { "epoch": 120.97, "learning_rate": 3.871067961165049e-06, "loss": 0.0594, "step": 311490 }, { "epoch": 120.97, "learning_rate": 3.8705501618122985e-06, "loss": 0.0285, "step": 311500 }, { "epoch": 120.97, "learning_rate": 3.870032362459547e-06, "loss": 0.2019, "step": 311510 }, { "epoch": 120.98, "learning_rate": 3.869514563106797e-06, "loss": 0.0001, "step": 311520 }, { "epoch": 120.98, "learning_rate": 3.868996763754046e-06, "loss": 0.0158, "step": 311530 }, { "epoch": 120.99, "learning_rate": 3.868478964401295e-06, "loss": 0.0193, "step": 311540 }, { "epoch": 120.99, "learning_rate": 3.867961165048544e-06, "loss": 0.0493, "step": 311550 }, { "epoch": 120.99, "learning_rate": 3.867443365695794e-06, "loss": 0.0444, "step": 311560 }, { "epoch": 121.0, "learning_rate": 3.866925566343042e-06, "loss": 0.0748, "step": 311570 }, { "epoch": 121.0, "eval_accuracy": 0.9526822558459422, "eval_loss": 0.3838876187801361, "eval_runtime": 8.201, "eval_samples_per_second": 443.239, "eval_steps_per_second": 55.481, "step": 311575 }, { "epoch": 121.0, "learning_rate": 3.866407766990292e-06, "loss": 0.0002, "step": 311580 }, { "epoch": 121.01, "learning_rate": 3.865889967637541e-06, "loss": 0.0142, "step": 311590 }, { "epoch": 121.01, "learning_rate": 3.86537216828479e-06, "loss": 0.0024, "step": 311600 }, { "epoch": 121.01, "learning_rate": 3.864854368932039e-06, "loss": 0.084, "step": 311610 }, { "epoch": 121.02, "learning_rate": 3.864336569579289e-06, "loss": 0.0558, "step": 311620 }, { "epoch": 121.02, "learning_rate": 3.8638187702265376e-06, "loss": 0.0234, "step": 311630 }, { "epoch": 121.03, "learning_rate": 3.863300970873786e-06, "loss": 0.0405, "step": 311640 }, { "epoch": 121.03, "learning_rate": 3.862783171521036e-06, "loss": 0.0406, "step": 311650 }, { "epoch": 121.03, "learning_rate": 3.862265372168285e-06, "loss": 0.0001, "step": 311660 }, { "epoch": 121.04, "learning_rate": 3.861747572815534e-06, "loss": 0.0489, "step": 311670 }, { "epoch": 121.04, "learning_rate": 3.861229773462783e-06, "loss": 0.1208, "step": 311680 }, { "epoch": 121.04, "learning_rate": 3.860711974110033e-06, "loss": 0.0325, "step": 311690 }, { "epoch": 121.05, "learning_rate": 3.8601941747572815e-06, "loss": 0.1169, "step": 311700 }, { "epoch": 121.05, "learning_rate": 3.859676375404531e-06, "loss": 0.1447, "step": 311710 }, { "epoch": 121.06, "learning_rate": 3.85915857605178e-06, "loss": 0.0554, "step": 311720 }, { "epoch": 121.06, "learning_rate": 3.8586407766990295e-06, "loss": 0.0404, "step": 311730 }, { "epoch": 121.06, "learning_rate": 3.858122977346278e-06, "loss": 0.0324, "step": 311740 }, { "epoch": 121.07, "learning_rate": 3.857605177993528e-06, "loss": 0.0277, "step": 311750 }, { "epoch": 121.07, "learning_rate": 3.857087378640777e-06, "loss": 0.0289, "step": 311760 }, { "epoch": 121.08, "learning_rate": 3.856569579288026e-06, "loss": 0.0003, "step": 311770 }, { "epoch": 121.08, "learning_rate": 3.856051779935275e-06, "loss": 0.1404, "step": 311780 }, { "epoch": 121.08, "learning_rate": 3.855533980582525e-06, "loss": 0.0022, "step": 311790 }, { "epoch": 121.09, "learning_rate": 3.8550161812297734e-06, "loss": 0.0122, "step": 311800 }, { "epoch": 121.09, "learning_rate": 3.854498381877023e-06, "loss": 0.0081, "step": 311810 }, { "epoch": 121.1, "learning_rate": 3.853980582524272e-06, "loss": 0.0323, "step": 311820 }, { "epoch": 121.1, "learning_rate": 3.8534627831715214e-06, "loss": 0.0974, "step": 311830 }, { "epoch": 121.1, "learning_rate": 3.85294498381877e-06, "loss": 0.0529, "step": 311840 }, { "epoch": 121.11, "learning_rate": 3.85242718446602e-06, "loss": 0.0008, "step": 311850 }, { "epoch": 121.11, "learning_rate": 3.851909385113269e-06, "loss": 0.0886, "step": 311860 }, { "epoch": 121.11, "learning_rate": 3.851391585760518e-06, "loss": 0.1124, "step": 311870 }, { "epoch": 121.12, "learning_rate": 3.850873786407767e-06, "loss": 0.064, "step": 311880 }, { "epoch": 121.12, "learning_rate": 3.850355987055017e-06, "loss": 0.0087, "step": 311890 }, { "epoch": 121.13, "learning_rate": 3.849838187702265e-06, "loss": 0.0081, "step": 311900 }, { "epoch": 121.13, "learning_rate": 3.849320388349515e-06, "loss": 0.0128, "step": 311910 }, { "epoch": 121.13, "learning_rate": 3.848802588996764e-06, "loss": 0.108, "step": 311920 }, { "epoch": 121.14, "learning_rate": 3.848284789644013e-06, "loss": 0.0954, "step": 311930 }, { "epoch": 121.14, "learning_rate": 3.847766990291262e-06, "loss": 0.0102, "step": 311940 }, { "epoch": 121.15, "learning_rate": 3.847249190938512e-06, "loss": 0.1279, "step": 311950 }, { "epoch": 121.15, "learning_rate": 3.8467313915857606e-06, "loss": 0.0006, "step": 311960 }, { "epoch": 121.15, "learning_rate": 3.84621359223301e-06, "loss": 0.0137, "step": 311970 }, { "epoch": 121.16, "learning_rate": 3.845695792880259e-06, "loss": 0.0297, "step": 311980 }, { "epoch": 121.16, "learning_rate": 3.8451779935275086e-06, "loss": 0.0076, "step": 311990 }, { "epoch": 121.17, "learning_rate": 3.844660194174757e-06, "loss": 0.0064, "step": 312000 }, { "epoch": 121.17, "learning_rate": 3.844142394822007e-06, "loss": 0.0001, "step": 312010 }, { "epoch": 121.17, "learning_rate": 3.843624595469256e-06, "loss": 0.0403, "step": 312020 }, { "epoch": 121.18, "learning_rate": 3.843106796116505e-06, "loss": 0.0281, "step": 312030 }, { "epoch": 121.18, "learning_rate": 3.842588996763754e-06, "loss": 0.1573, "step": 312040 }, { "epoch": 121.18, "learning_rate": 3.842071197411004e-06, "loss": 0.0551, "step": 312050 }, { "epoch": 121.19, "learning_rate": 3.8415533980582525e-06, "loss": 0.0013, "step": 312060 }, { "epoch": 121.19, "learning_rate": 3.841035598705502e-06, "loss": 0.0531, "step": 312070 }, { "epoch": 121.2, "learning_rate": 3.840517799352751e-06, "loss": 0.0013, "step": 312080 }, { "epoch": 121.2, "learning_rate": 3.8400000000000005e-06, "loss": 0.0788, "step": 312090 }, { "epoch": 121.2, "learning_rate": 3.839482200647249e-06, "loss": 0.0213, "step": 312100 }, { "epoch": 121.21, "learning_rate": 3.838964401294499e-06, "loss": 0.0836, "step": 312110 }, { "epoch": 121.21, "learning_rate": 3.838446601941748e-06, "loss": 0.0077, "step": 312120 }, { "epoch": 121.22, "learning_rate": 3.837928802588997e-06, "loss": 0.0338, "step": 312130 }, { "epoch": 121.22, "learning_rate": 3.837411003236246e-06, "loss": 0.0035, "step": 312140 }, { "epoch": 121.22, "learning_rate": 3.836893203883496e-06, "loss": 0.0113, "step": 312150 }, { "epoch": 121.23, "learning_rate": 3.8363754045307444e-06, "loss": 0.1074, "step": 312160 }, { "epoch": 121.23, "learning_rate": 3.835857605177994e-06, "loss": 0.0491, "step": 312170 }, { "epoch": 121.23, "learning_rate": 3.835339805825243e-06, "loss": 0.0265, "step": 312180 }, { "epoch": 121.24, "learning_rate": 3.8348220064724924e-06, "loss": 0.0002, "step": 312190 }, { "epoch": 121.24, "learning_rate": 3.834304207119741e-06, "loss": 0.0009, "step": 312200 }, { "epoch": 121.25, "learning_rate": 3.833786407766991e-06, "loss": 0.0301, "step": 312210 }, { "epoch": 121.25, "learning_rate": 3.83326860841424e-06, "loss": 0.0196, "step": 312220 }, { "epoch": 121.25, "learning_rate": 3.832750809061489e-06, "loss": 0.0582, "step": 312230 }, { "epoch": 121.26, "learning_rate": 3.832233009708738e-06, "loss": 0.0118, "step": 312240 }, { "epoch": 121.26, "learning_rate": 3.831715210355988e-06, "loss": 0.1244, "step": 312250 }, { "epoch": 121.27, "learning_rate": 3.831197411003237e-06, "loss": 0.0004, "step": 312260 }, { "epoch": 121.27, "learning_rate": 3.830679611650486e-06, "loss": 0.0004, "step": 312270 }, { "epoch": 121.27, "learning_rate": 3.830161812297736e-06, "loss": 0.0595, "step": 312280 }, { "epoch": 121.28, "learning_rate": 3.829644012944984e-06, "loss": 0.1599, "step": 312290 }, { "epoch": 121.28, "learning_rate": 3.829126213592233e-06, "loss": 0.1076, "step": 312300 }, { "epoch": 121.29, "learning_rate": 3.828608414239483e-06, "loss": 0.0001, "step": 312310 }, { "epoch": 121.29, "learning_rate": 3.8280906148867316e-06, "loss": 0.0511, "step": 312320 }, { "epoch": 121.29, "learning_rate": 3.827572815533981e-06, "loss": 0.0387, "step": 312330 }, { "epoch": 121.3, "learning_rate": 3.82705501618123e-06, "loss": 0.0094, "step": 312340 }, { "epoch": 121.3, "learning_rate": 3.8265372168284796e-06, "loss": 0.0102, "step": 312350 }, { "epoch": 121.3, "learning_rate": 3.826019417475728e-06, "loss": 0.0162, "step": 312360 }, { "epoch": 121.31, "learning_rate": 3.825501618122978e-06, "loss": 0.0107, "step": 312370 }, { "epoch": 121.31, "learning_rate": 3.824983818770227e-06, "loss": 0.0249, "step": 312380 }, { "epoch": 121.32, "learning_rate": 3.824466019417476e-06, "loss": 0.0004, "step": 312390 }, { "epoch": 121.32, "learning_rate": 3.823948220064725e-06, "loss": 0.0006, "step": 312400 }, { "epoch": 121.32, "learning_rate": 3.823430420711975e-06, "loss": 0.0551, "step": 312410 }, { "epoch": 121.33, "learning_rate": 3.8229126213592235e-06, "loss": 0.0199, "step": 312420 }, { "epoch": 121.33, "learning_rate": 3.822394822006473e-06, "loss": 0.0777, "step": 312430 }, { "epoch": 121.34, "learning_rate": 3.821877022653722e-06, "loss": 0.1624, "step": 312440 }, { "epoch": 121.34, "learning_rate": 3.8213592233009715e-06, "loss": 0.1108, "step": 312450 }, { "epoch": 121.34, "learning_rate": 3.82084142394822e-06, "loss": 0.0894, "step": 312460 }, { "epoch": 121.35, "learning_rate": 3.820323624595469e-06, "loss": 0.0859, "step": 312470 }, { "epoch": 121.35, "learning_rate": 3.819805825242719e-06, "loss": 0.0796, "step": 312480 }, { "epoch": 121.36, "learning_rate": 3.8192880258899674e-06, "loss": 0.074, "step": 312490 }, { "epoch": 121.36, "learning_rate": 3.818770226537217e-06, "loss": 0.0426, "step": 312500 }, { "epoch": 121.36, "learning_rate": 3.818252427184466e-06, "loss": 0.001, "step": 312510 }, { "epoch": 121.37, "learning_rate": 3.8177346278317154e-06, "loss": 0.0255, "step": 312520 }, { "epoch": 121.37, "learning_rate": 3.817216828478964e-06, "loss": 0.0355, "step": 312530 }, { "epoch": 121.37, "learning_rate": 3.816699029126214e-06, "loss": 0.0838, "step": 312540 }, { "epoch": 121.38, "learning_rate": 3.816181229773463e-06, "loss": 0.016, "step": 312550 }, { "epoch": 121.38, "learning_rate": 3.815663430420712e-06, "loss": 0.019, "step": 312560 }, { "epoch": 121.39, "learning_rate": 3.815145631067961e-06, "loss": 0.0002, "step": 312570 }, { "epoch": 121.39, "learning_rate": 3.8146278317152106e-06, "loss": 0.0179, "step": 312580 }, { "epoch": 121.39, "learning_rate": 3.81411003236246e-06, "loss": 0.0198, "step": 312590 }, { "epoch": 121.4, "learning_rate": 3.813592233009709e-06, "loss": 0.1035, "step": 312600 }, { "epoch": 121.4, "learning_rate": 3.813074433656958e-06, "loss": 0.0647, "step": 312610 }, { "epoch": 121.41, "learning_rate": 3.8125566343042074e-06, "loss": 0.1397, "step": 312620 }, { "epoch": 121.41, "learning_rate": 3.8120388349514566e-06, "loss": 0.017, "step": 312630 }, { "epoch": 121.41, "learning_rate": 3.8115210355987058e-06, "loss": 0.042, "step": 312640 }, { "epoch": 121.42, "learning_rate": 3.811003236245955e-06, "loss": 0.0701, "step": 312650 }, { "epoch": 121.42, "learning_rate": 3.810485436893204e-06, "loss": 0.001, "step": 312660 }, { "epoch": 121.43, "learning_rate": 3.8099676375404534e-06, "loss": 0.1091, "step": 312670 }, { "epoch": 121.43, "learning_rate": 3.8094498381877026e-06, "loss": 0.0495, "step": 312680 }, { "epoch": 121.43, "learning_rate": 3.8089320388349518e-06, "loss": 0.0004, "step": 312690 }, { "epoch": 121.44, "learning_rate": 3.808414239482201e-06, "loss": 0.0744, "step": 312700 }, { "epoch": 121.44, "learning_rate": 3.8078964401294497e-06, "loss": 0.1477, "step": 312710 }, { "epoch": 121.44, "learning_rate": 3.8073786407766993e-06, "loss": 0.0485, "step": 312720 }, { "epoch": 121.45, "learning_rate": 3.806860841423948e-06, "loss": 0.046, "step": 312730 }, { "epoch": 121.45, "learning_rate": 3.8063430420711977e-06, "loss": 0.0363, "step": 312740 }, { "epoch": 121.46, "learning_rate": 3.8058252427184465e-06, "loss": 0.0413, "step": 312750 }, { "epoch": 121.46, "learning_rate": 3.805307443365696e-06, "loss": 0.0097, "step": 312760 }, { "epoch": 121.46, "learning_rate": 3.804789644012945e-06, "loss": 0.0837, "step": 312770 }, { "epoch": 121.47, "learning_rate": 3.8042718446601945e-06, "loss": 0.0011, "step": 312780 }, { "epoch": 121.47, "learning_rate": 3.8037540453074433e-06, "loss": 0.0081, "step": 312790 }, { "epoch": 121.48, "learning_rate": 3.803236245954693e-06, "loss": 0.0076, "step": 312800 }, { "epoch": 121.48, "learning_rate": 3.8027184466019417e-06, "loss": 0.0612, "step": 312810 }, { "epoch": 121.48, "learning_rate": 3.8022006472491913e-06, "loss": 0.0433, "step": 312820 }, { "epoch": 121.49, "learning_rate": 3.801682847896441e-06, "loss": 0.0575, "step": 312830 }, { "epoch": 121.49, "learning_rate": 3.8011650485436897e-06, "loss": 0.0051, "step": 312840 }, { "epoch": 121.5, "learning_rate": 3.8006472491909393e-06, "loss": 0.011, "step": 312850 }, { "epoch": 121.5, "learning_rate": 3.800129449838188e-06, "loss": 0.0315, "step": 312860 }, { "epoch": 121.5, "learning_rate": 3.7996116504854373e-06, "loss": 0.0004, "step": 312870 }, { "epoch": 121.51, "learning_rate": 3.7990938511326864e-06, "loss": 0.1247, "step": 312880 }, { "epoch": 121.51, "learning_rate": 3.7985760517799356e-06, "loss": 0.0018, "step": 312890 }, { "epoch": 121.51, "learning_rate": 3.798058252427185e-06, "loss": 0.0474, "step": 312900 }, { "epoch": 121.52, "learning_rate": 3.797540453074434e-06, "loss": 0.0657, "step": 312910 }, { "epoch": 121.52, "learning_rate": 3.7970226537216832e-06, "loss": 0.0042, "step": 312920 }, { "epoch": 121.53, "learning_rate": 3.7965048543689324e-06, "loss": 0.0664, "step": 312930 }, { "epoch": 121.53, "learning_rate": 3.7959870550161816e-06, "loss": 0.054, "step": 312940 }, { "epoch": 121.53, "learning_rate": 3.795469255663431e-06, "loss": 0.0787, "step": 312950 }, { "epoch": 121.54, "learning_rate": 3.79495145631068e-06, "loss": 0.1083, "step": 312960 }, { "epoch": 121.54, "learning_rate": 3.794433656957929e-06, "loss": 0.0003, "step": 312970 }, { "epoch": 121.55, "learning_rate": 3.7939158576051784e-06, "loss": 0.0401, "step": 312980 }, { "epoch": 121.55, "learning_rate": 3.7933980582524276e-06, "loss": 0.0585, "step": 312990 }, { "epoch": 121.55, "learning_rate": 3.7928802588996768e-06, "loss": 0.0298, "step": 313000 }, { "epoch": 121.56, "learning_rate": 3.792362459546926e-06, "loss": 0.0393, "step": 313010 }, { "epoch": 121.56, "learning_rate": 3.791844660194175e-06, "loss": 0.0814, "step": 313020 }, { "epoch": 121.57, "learning_rate": 3.7913268608414244e-06, "loss": 0.0156, "step": 313030 }, { "epoch": 121.57, "learning_rate": 3.790809061488673e-06, "loss": 0.0178, "step": 313040 }, { "epoch": 121.57, "learning_rate": 3.7902912621359228e-06, "loss": 0.0275, "step": 313050 }, { "epoch": 121.58, "learning_rate": 3.7897734627831715e-06, "loss": 0.0616, "step": 313060 }, { "epoch": 121.58, "learning_rate": 3.789255663430421e-06, "loss": 0.0121, "step": 313070 }, { "epoch": 121.58, "learning_rate": 3.78873786407767e-06, "loss": 0.0356, "step": 313080 }, { "epoch": 121.59, "learning_rate": 3.7882200647249195e-06, "loss": 0.0083, "step": 313090 }, { "epoch": 121.59, "learning_rate": 3.7877022653721683e-06, "loss": 0.0221, "step": 313100 }, { "epoch": 121.6, "learning_rate": 3.787184466019418e-06, "loss": 0.0156, "step": 313110 }, { "epoch": 121.6, "learning_rate": 3.7866666666666667e-06, "loss": 0.0021, "step": 313120 }, { "epoch": 121.6, "learning_rate": 3.7861488673139163e-06, "loss": 0.053, "step": 313130 }, { "epoch": 121.61, "learning_rate": 3.785631067961165e-06, "loss": 0.0529, "step": 313140 }, { "epoch": 121.61, "learning_rate": 3.7851132686084147e-06, "loss": 0.0696, "step": 313150 }, { "epoch": 121.62, "learning_rate": 3.7845954692556635e-06, "loss": 0.0089, "step": 313160 }, { "epoch": 121.62, "learning_rate": 3.784077669902913e-06, "loss": 0.0583, "step": 313170 }, { "epoch": 121.62, "learning_rate": 3.783559870550162e-06, "loss": 0.0364, "step": 313180 }, { "epoch": 121.63, "learning_rate": 3.7830420711974115e-06, "loss": 0.0001, "step": 313190 }, { "epoch": 121.63, "learning_rate": 3.7825242718446602e-06, "loss": 0.0387, "step": 313200 }, { "epoch": 121.63, "learning_rate": 3.78200647249191e-06, "loss": 0.0007, "step": 313210 }, { "epoch": 121.64, "learning_rate": 3.7814886731391586e-06, "loss": 0.0182, "step": 313220 }, { "epoch": 121.64, "learning_rate": 3.7809708737864083e-06, "loss": 0.0726, "step": 313230 }, { "epoch": 121.65, "learning_rate": 3.780453074433657e-06, "loss": 0.0391, "step": 313240 }, { "epoch": 121.65, "learning_rate": 3.7799352750809066e-06, "loss": 0.09, "step": 313250 }, { "epoch": 121.65, "learning_rate": 3.7794174757281554e-06, "loss": 0.015, "step": 313260 }, { "epoch": 121.66, "learning_rate": 3.778899676375405e-06, "loss": 0.0134, "step": 313270 }, { "epoch": 121.66, "learning_rate": 3.778381877022654e-06, "loss": 0.1042, "step": 313280 }, { "epoch": 121.67, "learning_rate": 3.7778640776699034e-06, "loss": 0.0907, "step": 313290 }, { "epoch": 121.67, "learning_rate": 3.777346278317152e-06, "loss": 0.0436, "step": 313300 }, { "epoch": 121.67, "learning_rate": 3.776828478964402e-06, "loss": 0.0378, "step": 313310 }, { "epoch": 121.68, "learning_rate": 3.7763106796116506e-06, "loss": 0.0005, "step": 313320 }, { "epoch": 121.68, "learning_rate": 3.7757928802589e-06, "loss": 0.0149, "step": 313330 }, { "epoch": 121.69, "learning_rate": 3.775275080906149e-06, "loss": 0.0002, "step": 313340 }, { "epoch": 121.69, "learning_rate": 3.774757281553398e-06, "loss": 0.047, "step": 313350 }, { "epoch": 121.69, "learning_rate": 3.7742394822006474e-06, "loss": 0.0093, "step": 313360 }, { "epoch": 121.7, "learning_rate": 3.7737216828478966e-06, "loss": 0.0391, "step": 313370 }, { "epoch": 121.7, "learning_rate": 3.773203883495146e-06, "loss": 0.0492, "step": 313380 }, { "epoch": 121.7, "learning_rate": 3.772686084142395e-06, "loss": 0.0921, "step": 313390 }, { "epoch": 121.71, "learning_rate": 3.7721682847896446e-06, "loss": 0.0002, "step": 313400 }, { "epoch": 121.71, "learning_rate": 3.7716504854368933e-06, "loss": 0.0374, "step": 313410 }, { "epoch": 121.72, "learning_rate": 3.771132686084143e-06, "loss": 0.0908, "step": 313420 }, { "epoch": 121.72, "learning_rate": 3.7706148867313917e-06, "loss": 0.0595, "step": 313430 }, { "epoch": 121.72, "learning_rate": 3.7700970873786413e-06, "loss": 0.0354, "step": 313440 }, { "epoch": 121.73, "learning_rate": 3.76957928802589e-06, "loss": 0.0136, "step": 313450 }, { "epoch": 121.73, "learning_rate": 3.7690614886731397e-06, "loss": 0.0003, "step": 313460 }, { "epoch": 121.74, "learning_rate": 3.7685436893203885e-06, "loss": 0.0075, "step": 313470 }, { "epoch": 121.74, "learning_rate": 3.768025889967638e-06, "loss": 0.0395, "step": 313480 }, { "epoch": 121.74, "learning_rate": 3.767508090614887e-06, "loss": 0.1485, "step": 313490 }, { "epoch": 121.75, "learning_rate": 3.7669902912621365e-06, "loss": 0.0746, "step": 313500 }, { "epoch": 121.75, "learning_rate": 3.7664724919093853e-06, "loss": 0.0644, "step": 313510 }, { "epoch": 121.76, "learning_rate": 3.765954692556635e-06, "loss": 0.0364, "step": 313520 }, { "epoch": 121.76, "learning_rate": 3.7654368932038837e-06, "loss": 0.0236, "step": 313530 }, { "epoch": 121.76, "learning_rate": 3.7649190938511333e-06, "loss": 0.1112, "step": 313540 }, { "epoch": 121.77, "learning_rate": 3.764401294498382e-06, "loss": 0.0011, "step": 313550 }, { "epoch": 121.77, "learning_rate": 3.7638834951456317e-06, "loss": 0.1617, "step": 313560 }, { "epoch": 121.77, "learning_rate": 3.7633656957928804e-06, "loss": 0.0152, "step": 313570 }, { "epoch": 121.78, "learning_rate": 3.76284789644013e-06, "loss": 0.1622, "step": 313580 }, { "epoch": 121.78, "learning_rate": 3.762330097087379e-06, "loss": 0.0268, "step": 313590 }, { "epoch": 121.79, "learning_rate": 3.7618122977346285e-06, "loss": 0.0093, "step": 313600 }, { "epoch": 121.79, "learning_rate": 3.7612944983818772e-06, "loss": 0.0243, "step": 313610 }, { "epoch": 121.79, "learning_rate": 3.760776699029127e-06, "loss": 0.0683, "step": 313620 }, { "epoch": 121.8, "learning_rate": 3.7602588996763756e-06, "loss": 0.0005, "step": 313630 }, { "epoch": 121.8, "learning_rate": 3.7597411003236252e-06, "loss": 0.0792, "step": 313640 }, { "epoch": 121.81, "learning_rate": 3.759223300970874e-06, "loss": 0.02, "step": 313650 }, { "epoch": 121.81, "learning_rate": 3.7587055016181236e-06, "loss": 0.0004, "step": 313660 }, { "epoch": 121.81, "learning_rate": 3.7581877022653724e-06, "loss": 0.0253, "step": 313670 }, { "epoch": 121.82, "learning_rate": 3.7576699029126216e-06, "loss": 0.0183, "step": 313680 }, { "epoch": 121.82, "learning_rate": 3.7571521035598708e-06, "loss": 0.0268, "step": 313690 }, { "epoch": 121.83, "learning_rate": 3.75663430420712e-06, "loss": 0.031, "step": 313700 }, { "epoch": 121.83, "learning_rate": 3.756116504854369e-06, "loss": 0.1488, "step": 313710 }, { "epoch": 121.83, "learning_rate": 3.7555987055016184e-06, "loss": 0.0647, "step": 313720 }, { "epoch": 121.84, "learning_rate": 3.7550809061488676e-06, "loss": 0.0426, "step": 313730 }, { "epoch": 121.84, "learning_rate": 3.7545631067961168e-06, "loss": 0.0298, "step": 313740 }, { "epoch": 121.84, "learning_rate": 3.754045307443366e-06, "loss": 0.0329, "step": 313750 }, { "epoch": 121.85, "learning_rate": 3.753527508090615e-06, "loss": 0.0573, "step": 313760 }, { "epoch": 121.85, "learning_rate": 3.7530097087378643e-06, "loss": 0.0605, "step": 313770 }, { "epoch": 121.86, "learning_rate": 3.7524919093851135e-06, "loss": 0.0073, "step": 313780 }, { "epoch": 121.86, "learning_rate": 3.7519741100323627e-06, "loss": 0.0178, "step": 313790 }, { "epoch": 121.86, "learning_rate": 3.751456310679612e-06, "loss": 0.0292, "step": 313800 }, { "epoch": 121.87, "learning_rate": 3.750938511326861e-06, "loss": 0.1087, "step": 313810 }, { "epoch": 121.87, "learning_rate": 3.7504207119741103e-06, "loss": 0.0143, "step": 313820 }, { "epoch": 121.88, "learning_rate": 3.7499029126213595e-06, "loss": 0.0114, "step": 313830 }, { "epoch": 121.88, "learning_rate": 3.7493851132686087e-06, "loss": 0.0307, "step": 313840 }, { "epoch": 121.88, "learning_rate": 3.7488673139158575e-06, "loss": 0.0015, "step": 313850 }, { "epoch": 121.89, "learning_rate": 3.748349514563107e-06, "loss": 0.0119, "step": 313860 }, { "epoch": 121.89, "learning_rate": 3.747831715210356e-06, "loss": 0.085, "step": 313870 }, { "epoch": 121.9, "learning_rate": 3.7473139158576055e-06, "loss": 0.044, "step": 313880 }, { "epoch": 121.9, "learning_rate": 3.7467961165048542e-06, "loss": 0.1774, "step": 313890 }, { "epoch": 121.9, "learning_rate": 3.746278317152104e-06, "loss": 0.0683, "step": 313900 }, { "epoch": 121.91, "learning_rate": 3.7457605177993526e-06, "loss": 0.0913, "step": 313910 }, { "epoch": 121.91, "learning_rate": 3.7452427184466023e-06, "loss": 0.0008, "step": 313920 }, { "epoch": 121.91, "learning_rate": 3.744724919093851e-06, "loss": 0.1803, "step": 313930 }, { "epoch": 121.92, "learning_rate": 3.7442071197411006e-06, "loss": 0.0058, "step": 313940 }, { "epoch": 121.92, "learning_rate": 3.7436893203883503e-06, "loss": 0.0633, "step": 313950 }, { "epoch": 121.93, "learning_rate": 3.743171521035599e-06, "loss": 0.049, "step": 313960 }, { "epoch": 121.93, "learning_rate": 3.7426537216828487e-06, "loss": 0.0003, "step": 313970 }, { "epoch": 121.93, "learning_rate": 3.7421359223300974e-06, "loss": 0.1282, "step": 313980 }, { "epoch": 121.94, "learning_rate": 3.741618122977347e-06, "loss": 0.0003, "step": 313990 }, { "epoch": 121.94, "learning_rate": 3.741100323624596e-06, "loss": 0.0357, "step": 314000 }, { "epoch": 121.95, "learning_rate": 3.740582524271845e-06, "loss": 0.042, "step": 314010 }, { "epoch": 121.95, "learning_rate": 3.740064724919094e-06, "loss": 0.1049, "step": 314020 }, { "epoch": 121.95, "learning_rate": 3.7395469255663434e-06, "loss": 0.0343, "step": 314030 }, { "epoch": 121.96, "learning_rate": 3.7390291262135926e-06, "loss": 0.0183, "step": 314040 }, { "epoch": 121.96, "learning_rate": 3.7385113268608418e-06, "loss": 0.0053, "step": 314050 }, { "epoch": 121.97, "learning_rate": 3.737993527508091e-06, "loss": 0.0527, "step": 314060 }, { "epoch": 121.97, "learning_rate": 3.73747572815534e-06, "loss": 0.0004, "step": 314070 }, { "epoch": 121.97, "learning_rate": 3.7369579288025894e-06, "loss": 0.0948, "step": 314080 }, { "epoch": 121.98, "learning_rate": 3.7364401294498386e-06, "loss": 0.0321, "step": 314090 }, { "epoch": 121.98, "learning_rate": 3.7359223300970878e-06, "loss": 0.0104, "step": 314100 }, { "epoch": 121.98, "learning_rate": 3.735404530744337e-06, "loss": 0.081, "step": 314110 }, { "epoch": 121.99, "learning_rate": 3.734886731391586e-06, "loss": 0.0154, "step": 314120 }, { "epoch": 121.99, "learning_rate": 3.7343689320388353e-06, "loss": 0.0444, "step": 314130 }, { "epoch": 122.0, "learning_rate": 3.7338511326860845e-06, "loss": 0.0241, "step": 314140 }, { "epoch": 122.0, "learning_rate": 3.7333333333333337e-06, "loss": 0.0955, "step": 314150 }, { "epoch": 122.0, "eval_accuracy": 0.9515818431911967, "eval_loss": 0.3805077075958252, "eval_runtime": 8.2196, "eval_samples_per_second": 442.234, "eval_steps_per_second": 55.355, "step": 314150 }, { "epoch": 122.0, "learning_rate": 3.732815533980583e-06, "loss": 0.0437, "step": 314160 }, { "epoch": 122.01, "learning_rate": 3.732297734627832e-06, "loss": 0.0447, "step": 314170 }, { "epoch": 122.01, "learning_rate": 3.731779935275081e-06, "loss": 0.0441, "step": 314180 }, { "epoch": 122.02, "learning_rate": 3.7312621359223305e-06, "loss": 0.0161, "step": 314190 }, { "epoch": 122.02, "learning_rate": 3.7307443365695793e-06, "loss": 0.1053, "step": 314200 }, { "epoch": 122.02, "learning_rate": 3.730226537216829e-06, "loss": 0.0645, "step": 314210 }, { "epoch": 122.03, "learning_rate": 3.7297087378640777e-06, "loss": 0.0135, "step": 314220 }, { "epoch": 122.03, "learning_rate": 3.7291909385113273e-06, "loss": 0.0034, "step": 314230 }, { "epoch": 122.03, "learning_rate": 3.728673139158576e-06, "loss": 0.0111, "step": 314240 }, { "epoch": 122.04, "learning_rate": 3.7281553398058257e-06, "loss": 0.0434, "step": 314250 }, { "epoch": 122.04, "learning_rate": 3.7276375404530744e-06, "loss": 0.0003, "step": 314260 }, { "epoch": 122.05, "learning_rate": 3.727119741100324e-06, "loss": 0.0075, "step": 314270 }, { "epoch": 122.05, "learning_rate": 3.726601941747573e-06, "loss": 0.1492, "step": 314280 }, { "epoch": 122.05, "learning_rate": 3.7260841423948225e-06, "loss": 0.137, "step": 314290 }, { "epoch": 122.06, "learning_rate": 3.7255663430420712e-06, "loss": 0.0186, "step": 314300 }, { "epoch": 122.06, "learning_rate": 3.725048543689321e-06, "loss": 0.0004, "step": 314310 }, { "epoch": 122.07, "learning_rate": 3.7245307443365696e-06, "loss": 0.0377, "step": 314320 }, { "epoch": 122.07, "learning_rate": 3.7240129449838192e-06, "loss": 0.0124, "step": 314330 }, { "epoch": 122.07, "learning_rate": 3.723495145631068e-06, "loss": 0.0119, "step": 314340 }, { "epoch": 122.08, "learning_rate": 3.7229773462783176e-06, "loss": 0.0562, "step": 314350 }, { "epoch": 122.08, "learning_rate": 3.7224595469255664e-06, "loss": 0.0171, "step": 314360 }, { "epoch": 122.09, "learning_rate": 3.721941747572816e-06, "loss": 0.1025, "step": 314370 }, { "epoch": 122.09, "learning_rate": 3.7214239482200648e-06, "loss": 0.0749, "step": 314380 }, { "epoch": 122.09, "learning_rate": 3.7209061488673144e-06, "loss": 0.0324, "step": 314390 }, { "epoch": 122.1, "learning_rate": 3.720388349514563e-06, "loss": 0.0401, "step": 314400 }, { "epoch": 122.1, "learning_rate": 3.7198705501618128e-06, "loss": 0.0263, "step": 314410 }, { "epoch": 122.1, "learning_rate": 3.7193527508090616e-06, "loss": 0.0204, "step": 314420 }, { "epoch": 122.11, "learning_rate": 3.718834951456311e-06, "loss": 0.0537, "step": 314430 }, { "epoch": 122.11, "learning_rate": 3.71831715210356e-06, "loss": 0.0003, "step": 314440 }, { "epoch": 122.12, "learning_rate": 3.7177993527508096e-06, "loss": 0.025, "step": 314450 }, { "epoch": 122.12, "learning_rate": 3.7172815533980583e-06, "loss": 0.0005, "step": 314460 }, { "epoch": 122.12, "learning_rate": 3.716763754045308e-06, "loss": 0.0219, "step": 314470 }, { "epoch": 122.13, "learning_rate": 3.7162459546925567e-06, "loss": 0.1092, "step": 314480 }, { "epoch": 122.13, "learning_rate": 3.715728155339806e-06, "loss": 0.0601, "step": 314490 }, { "epoch": 122.14, "learning_rate": 3.7152103559870555e-06, "loss": 0.0602, "step": 314500 }, { "epoch": 122.14, "learning_rate": 3.7146925566343043e-06, "loss": 0.0675, "step": 314510 }, { "epoch": 122.14, "learning_rate": 3.714174757281554e-06, "loss": 0.022, "step": 314520 }, { "epoch": 122.15, "learning_rate": 3.7136569579288027e-06, "loss": 0.1135, "step": 314530 }, { "epoch": 122.15, "learning_rate": 3.7131391585760523e-06, "loss": 0.0011, "step": 314540 }, { "epoch": 122.16, "learning_rate": 3.712621359223301e-06, "loss": 0.0002, "step": 314550 }, { "epoch": 122.16, "learning_rate": 3.7121035598705507e-06, "loss": 0.0395, "step": 314560 }, { "epoch": 122.16, "learning_rate": 3.7115857605177995e-06, "loss": 0.0773, "step": 314570 }, { "epoch": 122.17, "learning_rate": 3.711067961165049e-06, "loss": 0.1662, "step": 314580 }, { "epoch": 122.17, "learning_rate": 3.710550161812298e-06, "loss": 0.001, "step": 314590 }, { "epoch": 122.17, "learning_rate": 3.7100323624595475e-06, "loss": 0.0219, "step": 314600 }, { "epoch": 122.18, "learning_rate": 3.7095145631067963e-06, "loss": 0.0003, "step": 314610 }, { "epoch": 122.18, "learning_rate": 3.708996763754046e-06, "loss": 0.0001, "step": 314620 }, { "epoch": 122.19, "learning_rate": 3.7084789644012946e-06, "loss": 0.0872, "step": 314630 }, { "epoch": 122.19, "learning_rate": 3.7079611650485443e-06, "loss": 0.0691, "step": 314640 }, { "epoch": 122.19, "learning_rate": 3.707443365695793e-06, "loss": 0.0714, "step": 314650 }, { "epoch": 122.2, "learning_rate": 3.7069255663430426e-06, "loss": 0.1219, "step": 314660 }, { "epoch": 122.2, "learning_rate": 3.7064077669902914e-06, "loss": 0.1325, "step": 314670 }, { "epoch": 122.21, "learning_rate": 3.705889967637541e-06, "loss": 0.0277, "step": 314680 }, { "epoch": 122.21, "learning_rate": 3.70537216828479e-06, "loss": 0.1217, "step": 314690 }, { "epoch": 122.21, "learning_rate": 3.7048543689320394e-06, "loss": 0.0322, "step": 314700 }, { "epoch": 122.22, "learning_rate": 3.704336569579288e-06, "loss": 0.0578, "step": 314710 }, { "epoch": 122.22, "learning_rate": 3.703818770226538e-06, "loss": 0.0015, "step": 314720 }, { "epoch": 122.23, "learning_rate": 3.7033009708737866e-06, "loss": 0.0004, "step": 314730 }, { "epoch": 122.23, "learning_rate": 3.702783171521036e-06, "loss": 0.0317, "step": 314740 }, { "epoch": 122.23, "learning_rate": 3.702265372168285e-06, "loss": 0.0284, "step": 314750 }, { "epoch": 122.24, "learning_rate": 3.7017475728155346e-06, "loss": 0.0692, "step": 314760 }, { "epoch": 122.24, "learning_rate": 3.7012297734627834e-06, "loss": 0.0011, "step": 314770 }, { "epoch": 122.24, "learning_rate": 3.700711974110033e-06, "loss": 0.0002, "step": 314780 }, { "epoch": 122.25, "learning_rate": 3.7001941747572818e-06, "loss": 0.0002, "step": 314790 }, { "epoch": 122.25, "learning_rate": 3.6996763754045314e-06, "loss": 0.1122, "step": 314800 }, { "epoch": 122.26, "learning_rate": 3.69915857605178e-06, "loss": 0.023, "step": 314810 }, { "epoch": 122.26, "learning_rate": 3.6986407766990293e-06, "loss": 0.0146, "step": 314820 }, { "epoch": 122.26, "learning_rate": 3.6981229773462785e-06, "loss": 0.0645, "step": 314830 }, { "epoch": 122.27, "learning_rate": 3.6976051779935277e-06, "loss": 0.0003, "step": 314840 }, { "epoch": 122.27, "learning_rate": 3.697087378640777e-06, "loss": 0.069, "step": 314850 }, { "epoch": 122.28, "learning_rate": 3.696569579288026e-06, "loss": 0.0678, "step": 314860 }, { "epoch": 122.28, "learning_rate": 3.6960517799352753e-06, "loss": 0.1672, "step": 314870 }, { "epoch": 122.28, "learning_rate": 3.6955339805825245e-06, "loss": 0.0332, "step": 314880 }, { "epoch": 122.29, "learning_rate": 3.6950161812297737e-06, "loss": 0.0274, "step": 314890 }, { "epoch": 122.29, "learning_rate": 3.694498381877023e-06, "loss": 0.1067, "step": 314900 }, { "epoch": 122.3, "learning_rate": 3.693980582524272e-06, "loss": 0.0925, "step": 314910 }, { "epoch": 122.3, "learning_rate": 3.6934627831715213e-06, "loss": 0.0464, "step": 314920 }, { "epoch": 122.3, "learning_rate": 3.6929449838187705e-06, "loss": 0.081, "step": 314930 }, { "epoch": 122.31, "learning_rate": 3.6924271844660197e-06, "loss": 0.0229, "step": 314940 }, { "epoch": 122.31, "learning_rate": 3.691909385113269e-06, "loss": 0.022, "step": 314950 }, { "epoch": 122.31, "learning_rate": 3.691391585760518e-06, "loss": 0.0211, "step": 314960 }, { "epoch": 122.32, "learning_rate": 3.6908737864077673e-06, "loss": 0.0656, "step": 314970 }, { "epoch": 122.32, "learning_rate": 3.6903559870550165e-06, "loss": 0.0216, "step": 314980 }, { "epoch": 122.33, "learning_rate": 3.6898381877022652e-06, "loss": 0.0131, "step": 314990 }, { "epoch": 122.33, "learning_rate": 3.689320388349515e-06, "loss": 0.0005, "step": 315000 }, { "epoch": 122.33, "learning_rate": 3.6888025889967636e-06, "loss": 0.0001, "step": 315010 }, { "epoch": 122.34, "learning_rate": 3.6882847896440132e-06, "loss": 0.0409, "step": 315020 }, { "epoch": 122.34, "learning_rate": 3.687766990291262e-06, "loss": 0.009, "step": 315030 }, { "epoch": 122.35, "learning_rate": 3.6872491909385116e-06, "loss": 0.0101, "step": 315040 }, { "epoch": 122.35, "learning_rate": 3.6867313915857604e-06, "loss": 0.0114, "step": 315050 }, { "epoch": 122.35, "learning_rate": 3.68621359223301e-06, "loss": 0.0531, "step": 315060 }, { "epoch": 122.36, "learning_rate": 3.6856957928802596e-06, "loss": 0.0958, "step": 315070 }, { "epoch": 122.36, "learning_rate": 3.6851779935275084e-06, "loss": 0.0788, "step": 315080 }, { "epoch": 122.37, "learning_rate": 3.684660194174758e-06, "loss": 0.0453, "step": 315090 }, { "epoch": 122.37, "learning_rate": 3.6841423948220068e-06, "loss": 0.0031, "step": 315100 }, { "epoch": 122.37, "learning_rate": 3.6836245954692564e-06, "loss": 0.0551, "step": 315110 }, { "epoch": 122.38, "learning_rate": 3.683106796116505e-06, "loss": 0.1995, "step": 315120 }, { "epoch": 122.38, "learning_rate": 3.682588996763755e-06, "loss": 0.0732, "step": 315130 }, { "epoch": 122.38, "learning_rate": 3.6820711974110036e-06, "loss": 0.1225, "step": 315140 }, { "epoch": 122.39, "learning_rate": 3.6815533980582528e-06, "loss": 0.1026, "step": 315150 }, { "epoch": 122.39, "learning_rate": 3.681035598705502e-06, "loss": 0.0848, "step": 315160 }, { "epoch": 122.4, "learning_rate": 3.680517799352751e-06, "loss": 0.0222, "step": 315170 }, { "epoch": 122.4, "learning_rate": 3.6800000000000003e-06, "loss": 0.169, "step": 315180 }, { "epoch": 122.4, "learning_rate": 3.6794822006472495e-06, "loss": 0.0099, "step": 315190 }, { "epoch": 122.41, "learning_rate": 3.6789644012944987e-06, "loss": 0.0456, "step": 315200 }, { "epoch": 122.41, "learning_rate": 3.678446601941748e-06, "loss": 0.0657, "step": 315210 }, { "epoch": 122.42, "learning_rate": 3.677928802588997e-06, "loss": 0.0132, "step": 315220 }, { "epoch": 122.42, "learning_rate": 3.6774110032362463e-06, "loss": 0.0594, "step": 315230 }, { "epoch": 122.42, "learning_rate": 3.6768932038834955e-06, "loss": 0.0114, "step": 315240 }, { "epoch": 122.43, "learning_rate": 3.6763754045307447e-06, "loss": 0.07, "step": 315250 }, { "epoch": 122.43, "learning_rate": 3.675857605177994e-06, "loss": 0.0316, "step": 315260 }, { "epoch": 122.43, "learning_rate": 3.675339805825243e-06, "loss": 0.0004, "step": 315270 }, { "epoch": 122.44, "learning_rate": 3.6748220064724923e-06, "loss": 0.003, "step": 315280 }, { "epoch": 122.44, "learning_rate": 3.6743042071197415e-06, "loss": 0.0197, "step": 315290 }, { "epoch": 122.45, "learning_rate": 3.6737864077669907e-06, "loss": 0.0168, "step": 315300 }, { "epoch": 122.45, "learning_rate": 3.67326860841424e-06, "loss": 0.0448, "step": 315310 }, { "epoch": 122.45, "learning_rate": 3.6727508090614886e-06, "loss": 0.0162, "step": 315320 }, { "epoch": 122.46, "learning_rate": 3.6722330097087383e-06, "loss": 0.0592, "step": 315330 }, { "epoch": 122.46, "learning_rate": 3.671715210355987e-06, "loss": 0.0005, "step": 315340 }, { "epoch": 122.47, "learning_rate": 3.6711974110032366e-06, "loss": 0.0122, "step": 315350 }, { "epoch": 122.47, "learning_rate": 3.6706796116504854e-06, "loss": 0.0009, "step": 315360 }, { "epoch": 122.47, "learning_rate": 3.670161812297735e-06, "loss": 0.0472, "step": 315370 }, { "epoch": 122.48, "learning_rate": 3.669644012944984e-06, "loss": 0.0763, "step": 315380 }, { "epoch": 122.48, "learning_rate": 3.6691262135922334e-06, "loss": 0.0017, "step": 315390 }, { "epoch": 122.49, "learning_rate": 3.668608414239482e-06, "loss": 0.0742, "step": 315400 }, { "epoch": 122.49, "learning_rate": 3.668090614886732e-06, "loss": 0.0299, "step": 315410 }, { "epoch": 122.49, "learning_rate": 3.6675728155339806e-06, "loss": 0.0085, "step": 315420 }, { "epoch": 122.5, "learning_rate": 3.66705501618123e-06, "loss": 0.0438, "step": 315430 }, { "epoch": 122.5, "learning_rate": 3.666537216828479e-06, "loss": 0.02, "step": 315440 }, { "epoch": 122.5, "learning_rate": 3.6660194174757286e-06, "loss": 0.0155, "step": 315450 }, { "epoch": 122.51, "learning_rate": 3.6655016181229774e-06, "loss": 0.0003, "step": 315460 }, { "epoch": 122.51, "learning_rate": 3.664983818770227e-06, "loss": 0.0126, "step": 315470 }, { "epoch": 122.52, "learning_rate": 3.6644660194174758e-06, "loss": 0.0001, "step": 315480 }, { "epoch": 122.52, "learning_rate": 3.6639482200647254e-06, "loss": 0.1207, "step": 315490 }, { "epoch": 122.52, "learning_rate": 3.663430420711974e-06, "loss": 0.0055, "step": 315500 }, { "epoch": 122.53, "learning_rate": 3.6629126213592238e-06, "loss": 0.1579, "step": 315510 }, { "epoch": 122.53, "learning_rate": 3.6623948220064725e-06, "loss": 0.0225, "step": 315520 }, { "epoch": 122.54, "learning_rate": 3.661877022653722e-06, "loss": 0.0166, "step": 315530 }, { "epoch": 122.54, "learning_rate": 3.661359223300971e-06, "loss": 0.0608, "step": 315540 }, { "epoch": 122.54, "learning_rate": 3.6608414239482205e-06, "loss": 0.0687, "step": 315550 }, { "epoch": 122.55, "learning_rate": 3.6603236245954693e-06, "loss": 0.0029, "step": 315560 }, { "epoch": 122.55, "learning_rate": 3.659805825242719e-06, "loss": 0.0441, "step": 315570 }, { "epoch": 122.56, "learning_rate": 3.6592880258899677e-06, "loss": 0.2118, "step": 315580 }, { "epoch": 122.56, "learning_rate": 3.6587702265372173e-06, "loss": 0.0557, "step": 315590 }, { "epoch": 122.56, "learning_rate": 3.658252427184466e-06, "loss": 0.1214, "step": 315600 }, { "epoch": 122.57, "learning_rate": 3.6577346278317157e-06, "loss": 0.0969, "step": 315610 }, { "epoch": 122.57, "learning_rate": 3.657216828478965e-06, "loss": 0.062, "step": 315620 }, { "epoch": 122.57, "learning_rate": 3.656699029126214e-06, "loss": 0.052, "step": 315630 }, { "epoch": 122.58, "learning_rate": 3.6561812297734633e-06, "loss": 0.0002, "step": 315640 }, { "epoch": 122.58, "learning_rate": 3.655663430420712e-06, "loss": 0.0822, "step": 315650 }, { "epoch": 122.59, "learning_rate": 3.6551456310679617e-06, "loss": 0.0015, "step": 315660 }, { "epoch": 122.59, "learning_rate": 3.6546278317152104e-06, "loss": 0.0086, "step": 315670 }, { "epoch": 122.59, "learning_rate": 3.65411003236246e-06, "loss": 0.0505, "step": 315680 }, { "epoch": 122.6, "learning_rate": 3.653592233009709e-06, "loss": 0.0734, "step": 315690 }, { "epoch": 122.6, "learning_rate": 3.6530744336569585e-06, "loss": 0.0245, "step": 315700 }, { "epoch": 122.61, "learning_rate": 3.6525566343042072e-06, "loss": 0.0211, "step": 315710 }, { "epoch": 122.61, "learning_rate": 3.652038834951457e-06, "loss": 0.0208, "step": 315720 }, { "epoch": 122.61, "learning_rate": 3.6515210355987056e-06, "loss": 0.0602, "step": 315730 }, { "epoch": 122.62, "learning_rate": 3.6510032362459552e-06, "loss": 0.0009, "step": 315740 }, { "epoch": 122.62, "learning_rate": 3.650485436893204e-06, "loss": 0.0007, "step": 315750 }, { "epoch": 122.63, "learning_rate": 3.6499676375404536e-06, "loss": 0.0044, "step": 315760 }, { "epoch": 122.63, "learning_rate": 3.6494498381877024e-06, "loss": 0.086, "step": 315770 }, { "epoch": 122.63, "learning_rate": 3.648932038834952e-06, "loss": 0.1922, "step": 315780 }, { "epoch": 122.64, "learning_rate": 3.6484142394822008e-06, "loss": 0.1045, "step": 315790 }, { "epoch": 122.64, "learning_rate": 3.6478964401294504e-06, "loss": 0.0111, "step": 315800 }, { "epoch": 122.64, "learning_rate": 3.647378640776699e-06, "loss": 0.0616, "step": 315810 }, { "epoch": 122.65, "learning_rate": 3.646860841423949e-06, "loss": 0.0002, "step": 315820 }, { "epoch": 122.65, "learning_rate": 3.6463430420711976e-06, "loss": 0.0001, "step": 315830 }, { "epoch": 122.66, "learning_rate": 3.645825242718447e-06, "loss": 0.0161, "step": 315840 }, { "epoch": 122.66, "learning_rate": 3.645307443365696e-06, "loss": 0.0829, "step": 315850 }, { "epoch": 122.66, "learning_rate": 3.6447896440129456e-06, "loss": 0.0404, "step": 315860 }, { "epoch": 122.67, "learning_rate": 3.6442718446601943e-06, "loss": 0.0484, "step": 315870 }, { "epoch": 122.67, "learning_rate": 3.643754045307444e-06, "loss": 0.0137, "step": 315880 }, { "epoch": 122.68, "learning_rate": 3.6432362459546927e-06, "loss": 0.1349, "step": 315890 }, { "epoch": 122.68, "learning_rate": 3.6427184466019423e-06, "loss": 0.1004, "step": 315900 }, { "epoch": 122.68, "learning_rate": 3.642200647249191e-06, "loss": 0.0013, "step": 315910 }, { "epoch": 122.69, "learning_rate": 3.6416828478964407e-06, "loss": 0.0233, "step": 315920 }, { "epoch": 122.69, "learning_rate": 3.6411650485436895e-06, "loss": 0.1507, "step": 315930 }, { "epoch": 122.7, "learning_rate": 3.640647249190939e-06, "loss": 0.1468, "step": 315940 }, { "epoch": 122.7, "learning_rate": 3.640129449838188e-06, "loss": 0.0006, "step": 315950 }, { "epoch": 122.7, "learning_rate": 3.639611650485437e-06, "loss": 0.066, "step": 315960 }, { "epoch": 122.71, "learning_rate": 3.6390938511326863e-06, "loss": 0.0723, "step": 315970 }, { "epoch": 122.71, "learning_rate": 3.6385760517799355e-06, "loss": 0.0271, "step": 315980 }, { "epoch": 122.71, "learning_rate": 3.6380582524271847e-06, "loss": 0.0005, "step": 315990 }, { "epoch": 122.72, "learning_rate": 3.637540453074434e-06, "loss": 0.0013, "step": 316000 }, { "epoch": 122.72, "learning_rate": 3.637022653721683e-06, "loss": 0.0867, "step": 316010 }, { "epoch": 122.73, "learning_rate": 3.6365048543689323e-06, "loss": 0.032, "step": 316020 }, { "epoch": 122.73, "learning_rate": 3.6359870550161815e-06, "loss": 0.0561, "step": 316030 }, { "epoch": 122.73, "learning_rate": 3.6354692556634306e-06, "loss": 0.0805, "step": 316040 }, { "epoch": 122.74, "learning_rate": 3.63495145631068e-06, "loss": 0.1851, "step": 316050 }, { "epoch": 122.74, "learning_rate": 3.634433656957929e-06, "loss": 0.0716, "step": 316060 }, { "epoch": 122.75, "learning_rate": 3.6339158576051782e-06, "loss": 0.01, "step": 316070 }, { "epoch": 122.75, "learning_rate": 3.6333980582524274e-06, "loss": 0.0184, "step": 316080 }, { "epoch": 122.75, "learning_rate": 3.6328802588996766e-06, "loss": 0.0652, "step": 316090 }, { "epoch": 122.76, "learning_rate": 3.632362459546926e-06, "loss": 0.0083, "step": 316100 }, { "epoch": 122.76, "learning_rate": 3.631844660194175e-06, "loss": 0.0256, "step": 316110 }, { "epoch": 122.77, "learning_rate": 3.631326860841424e-06, "loss": 0.0157, "step": 316120 }, { "epoch": 122.77, "learning_rate": 3.630809061488673e-06, "loss": 0.0561, "step": 316130 }, { "epoch": 122.77, "learning_rate": 3.6302912621359226e-06, "loss": 0.1023, "step": 316140 }, { "epoch": 122.78, "learning_rate": 3.6297734627831714e-06, "loss": 0.0148, "step": 316150 }, { "epoch": 122.78, "learning_rate": 3.629255663430421e-06, "loss": 0.0517, "step": 316160 }, { "epoch": 122.78, "learning_rate": 3.6287378640776698e-06, "loss": 0.0254, "step": 316170 }, { "epoch": 122.79, "learning_rate": 3.6282200647249194e-06, "loss": 0.0352, "step": 316180 }, { "epoch": 122.79, "learning_rate": 3.627702265372169e-06, "loss": 0.1056, "step": 316190 }, { "epoch": 122.8, "learning_rate": 3.6271844660194178e-06, "loss": 0.1261, "step": 316200 }, { "epoch": 122.8, "learning_rate": 3.6266666666666674e-06, "loss": 0.0681, "step": 316210 }, { "epoch": 122.8, "learning_rate": 3.626148867313916e-06, "loss": 0.0101, "step": 316220 }, { "epoch": 122.81, "learning_rate": 3.6256310679611658e-06, "loss": 0.0126, "step": 316230 }, { "epoch": 122.81, "learning_rate": 3.6251132686084145e-06, "loss": 0.0291, "step": 316240 }, { "epoch": 122.82, "learning_rate": 3.624595469255664e-06, "loss": 0.0423, "step": 316250 }, { "epoch": 122.82, "learning_rate": 3.624077669902913e-06, "loss": 0.1383, "step": 316260 }, { "epoch": 122.82, "learning_rate": 3.6235598705501625e-06, "loss": 0.0015, "step": 316270 }, { "epoch": 122.83, "learning_rate": 3.6230420711974113e-06, "loss": 0.1236, "step": 316280 }, { "epoch": 122.83, "learning_rate": 3.6225242718446605e-06, "loss": 0.0288, "step": 316290 }, { "epoch": 122.83, "learning_rate": 3.6220064724919097e-06, "loss": 0.1618, "step": 316300 }, { "epoch": 122.84, "learning_rate": 3.621488673139159e-06, "loss": 0.0629, "step": 316310 }, { "epoch": 122.84, "learning_rate": 3.620970873786408e-06, "loss": 0.0969, "step": 316320 }, { "epoch": 122.85, "learning_rate": 3.6204530744336573e-06, "loss": 0.0496, "step": 316330 }, { "epoch": 122.85, "learning_rate": 3.6199352750809065e-06, "loss": 0.0074, "step": 316340 }, { "epoch": 122.85, "learning_rate": 3.6194174757281557e-06, "loss": 0.1352, "step": 316350 }, { "epoch": 122.86, "learning_rate": 3.618899676375405e-06, "loss": 0.0699, "step": 316360 }, { "epoch": 122.86, "learning_rate": 3.618381877022654e-06, "loss": 0.0249, "step": 316370 }, { "epoch": 122.87, "learning_rate": 3.6178640776699033e-06, "loss": 0.0229, "step": 316380 }, { "epoch": 122.87, "learning_rate": 3.6173462783171525e-06, "loss": 0.0704, "step": 316390 }, { "epoch": 122.87, "learning_rate": 3.6168284789644017e-06, "loss": 0.0155, "step": 316400 }, { "epoch": 122.88, "learning_rate": 3.616310679611651e-06, "loss": 0.082, "step": 316410 }, { "epoch": 122.88, "learning_rate": 3.6157928802589e-06, "loss": 0.0014, "step": 316420 }, { "epoch": 122.89, "learning_rate": 3.6152750809061492e-06, "loss": 0.0728, "step": 316430 }, { "epoch": 122.89, "learning_rate": 3.6147572815533984e-06, "loss": 0.0162, "step": 316440 }, { "epoch": 122.89, "learning_rate": 3.6142394822006476e-06, "loss": 0.0971, "step": 316450 }, { "epoch": 122.9, "learning_rate": 3.6137216828478964e-06, "loss": 0.0003, "step": 316460 }, { "epoch": 122.9, "learning_rate": 3.613203883495146e-06, "loss": 0.0138, "step": 316470 }, { "epoch": 122.9, "learning_rate": 3.6126860841423948e-06, "loss": 0.0263, "step": 316480 }, { "epoch": 122.91, "learning_rate": 3.6121682847896444e-06, "loss": 0.0003, "step": 316490 }, { "epoch": 122.91, "learning_rate": 3.611650485436893e-06, "loss": 0.0021, "step": 316500 }, { "epoch": 122.92, "learning_rate": 3.611132686084143e-06, "loss": 0.0902, "step": 316510 }, { "epoch": 122.92, "learning_rate": 3.6106148867313916e-06, "loss": 0.0503, "step": 316520 }, { "epoch": 122.92, "learning_rate": 3.610097087378641e-06, "loss": 0.0274, "step": 316530 }, { "epoch": 122.93, "learning_rate": 3.60957928802589e-06, "loss": 0.0596, "step": 316540 }, { "epoch": 122.93, "learning_rate": 3.6090614886731396e-06, "loss": 0.0686, "step": 316550 }, { "epoch": 122.94, "learning_rate": 3.6085436893203883e-06, "loss": 0.1064, "step": 316560 }, { "epoch": 122.94, "learning_rate": 3.608025889967638e-06, "loss": 0.0459, "step": 316570 }, { "epoch": 122.94, "learning_rate": 3.6075080906148867e-06, "loss": 0.0066, "step": 316580 }, { "epoch": 122.95, "learning_rate": 3.6069902912621363e-06, "loss": 0.0545, "step": 316590 }, { "epoch": 122.95, "learning_rate": 3.606472491909385e-06, "loss": 0.0027, "step": 316600 }, { "epoch": 122.96, "learning_rate": 3.6059546925566347e-06, "loss": 0.1454, "step": 316610 }, { "epoch": 122.96, "learning_rate": 3.6054368932038835e-06, "loss": 0.1015, "step": 316620 }, { "epoch": 122.96, "learning_rate": 3.604919093851133e-06, "loss": 0.0258, "step": 316630 }, { "epoch": 122.97, "learning_rate": 3.604401294498382e-06, "loss": 0.0193, "step": 316640 }, { "epoch": 122.97, "learning_rate": 3.6038834951456315e-06, "loss": 0.0588, "step": 316650 }, { "epoch": 122.97, "learning_rate": 3.6033656957928803e-06, "loss": 0.0415, "step": 316660 }, { "epoch": 122.98, "learning_rate": 3.60284789644013e-06, "loss": 0.0468, "step": 316670 }, { "epoch": 122.98, "learning_rate": 3.6023300970873787e-06, "loss": 0.0622, "step": 316680 }, { "epoch": 122.99, "learning_rate": 3.6018122977346283e-06, "loss": 0.0018, "step": 316690 }, { "epoch": 122.99, "learning_rate": 3.601294498381877e-06, "loss": 0.0142, "step": 316700 }, { "epoch": 122.99, "learning_rate": 3.6007766990291267e-06, "loss": 0.0004, "step": 316710 }, { "epoch": 123.0, "learning_rate": 3.6002588996763755e-06, "loss": 0.0043, "step": 316720 }, { "epoch": 123.0, "eval_accuracy": 0.9521320495185694, "eval_loss": 0.38330063223838806, "eval_runtime": 8.1818, "eval_samples_per_second": 444.276, "eval_steps_per_second": 55.611, "step": 316725 }, { "epoch": 123.0, "learning_rate": 3.599741100323625e-06, "loss": 0.0721, "step": 316730 }, { "epoch": 123.01, "learning_rate": 3.5992233009708743e-06, "loss": 0.0073, "step": 316740 }, { "epoch": 123.01, "learning_rate": 3.5987055016181235e-06, "loss": 0.011, "step": 316750 }, { "epoch": 123.01, "learning_rate": 3.5981877022653727e-06, "loss": 0.0015, "step": 316760 }, { "epoch": 123.02, "learning_rate": 3.597669902912622e-06, "loss": 0.0539, "step": 316770 }, { "epoch": 123.02, "learning_rate": 3.597152103559871e-06, "loss": 0.0008, "step": 316780 }, { "epoch": 123.03, "learning_rate": 3.59663430420712e-06, "loss": 0.0001, "step": 316790 }, { "epoch": 123.03, "learning_rate": 3.5961165048543694e-06, "loss": 0.0125, "step": 316800 }, { "epoch": 123.03, "learning_rate": 3.595598705501618e-06, "loss": 0.024, "step": 316810 }, { "epoch": 123.04, "learning_rate": 3.595080906148868e-06, "loss": 0.0291, "step": 316820 }, { "epoch": 123.04, "learning_rate": 3.5945631067961166e-06, "loss": 0.073, "step": 316830 }, { "epoch": 123.04, "learning_rate": 3.594045307443366e-06, "loss": 0.0812, "step": 316840 }, { "epoch": 123.05, "learning_rate": 3.593527508090615e-06, "loss": 0.0545, "step": 316850 }, { "epoch": 123.05, "learning_rate": 3.5930097087378646e-06, "loss": 0.0571, "step": 316860 }, { "epoch": 123.06, "learning_rate": 3.5924919093851134e-06, "loss": 0.1486, "step": 316870 }, { "epoch": 123.06, "learning_rate": 3.591974110032363e-06, "loss": 0.0172, "step": 316880 }, { "epoch": 123.06, "learning_rate": 3.5914563106796118e-06, "loss": 0.0299, "step": 316890 }, { "epoch": 123.07, "learning_rate": 3.5909385113268614e-06, "loss": 0.0067, "step": 316900 }, { "epoch": 123.07, "learning_rate": 3.59042071197411e-06, "loss": 0.0654, "step": 316910 }, { "epoch": 123.08, "learning_rate": 3.5899029126213598e-06, "loss": 0.0696, "step": 316920 }, { "epoch": 123.08, "learning_rate": 3.5893851132686085e-06, "loss": 0.068, "step": 316930 }, { "epoch": 123.08, "learning_rate": 3.588867313915858e-06, "loss": 0.0724, "step": 316940 }, { "epoch": 123.09, "learning_rate": 3.588349514563107e-06, "loss": 0.0825, "step": 316950 }, { "epoch": 123.09, "learning_rate": 3.5878317152103565e-06, "loss": 0.0002, "step": 316960 }, { "epoch": 123.1, "learning_rate": 3.5873139158576053e-06, "loss": 0.0161, "step": 316970 }, { "epoch": 123.1, "learning_rate": 3.586796116504855e-06, "loss": 0.0078, "step": 316980 }, { "epoch": 123.1, "learning_rate": 3.5862783171521037e-06, "loss": 0.0883, "step": 316990 }, { "epoch": 123.11, "learning_rate": 3.5857605177993533e-06, "loss": 0.1626, "step": 317000 }, { "epoch": 123.11, "learning_rate": 3.585242718446602e-06, "loss": 0.0917, "step": 317010 }, { "epoch": 123.11, "learning_rate": 3.5847249190938517e-06, "loss": 0.111, "step": 317020 }, { "epoch": 123.12, "learning_rate": 3.5842071197411005e-06, "loss": 0.0681, "step": 317030 }, { "epoch": 123.12, "learning_rate": 3.58368932038835e-06, "loss": 0.0001, "step": 317040 }, { "epoch": 123.13, "learning_rate": 3.583171521035599e-06, "loss": 0.0209, "step": 317050 }, { "epoch": 123.13, "learning_rate": 3.5826537216828485e-06, "loss": 0.1319, "step": 317060 }, { "epoch": 123.13, "learning_rate": 3.5821359223300973e-06, "loss": 0.0248, "step": 317070 }, { "epoch": 123.14, "learning_rate": 3.581618122977347e-06, "loss": 0.1271, "step": 317080 }, { "epoch": 123.14, "learning_rate": 3.5811003236245956e-06, "loss": 0.0589, "step": 317090 }, { "epoch": 123.15, "learning_rate": 3.580582524271845e-06, "loss": 0.0278, "step": 317100 }, { "epoch": 123.15, "learning_rate": 3.580064724919094e-06, "loss": 0.029, "step": 317110 }, { "epoch": 123.15, "learning_rate": 3.5795469255663432e-06, "loss": 0.0162, "step": 317120 }, { "epoch": 123.16, "learning_rate": 3.5790291262135924e-06, "loss": 0.0569, "step": 317130 }, { "epoch": 123.16, "learning_rate": 3.5785113268608416e-06, "loss": 0.0009, "step": 317140 }, { "epoch": 123.17, "learning_rate": 3.577993527508091e-06, "loss": 0.0897, "step": 317150 }, { "epoch": 123.17, "learning_rate": 3.57747572815534e-06, "loss": 0.0237, "step": 317160 }, { "epoch": 123.17, "learning_rate": 3.576957928802589e-06, "loss": 0.0752, "step": 317170 }, { "epoch": 123.18, "learning_rate": 3.5764401294498384e-06, "loss": 0.0108, "step": 317180 }, { "epoch": 123.18, "learning_rate": 3.5759223300970876e-06, "loss": 0.0335, "step": 317190 }, { "epoch": 123.18, "learning_rate": 3.575404530744337e-06, "loss": 0.0147, "step": 317200 }, { "epoch": 123.19, "learning_rate": 3.574886731391586e-06, "loss": 0.0361, "step": 317210 }, { "epoch": 123.19, "learning_rate": 3.574368932038835e-06, "loss": 0.0687, "step": 317220 }, { "epoch": 123.2, "learning_rate": 3.5738511326860844e-06, "loss": 0.0565, "step": 317230 }, { "epoch": 123.2, "learning_rate": 3.5733333333333336e-06, "loss": 0.0813, "step": 317240 }, { "epoch": 123.2, "learning_rate": 3.5728155339805828e-06, "loss": 0.0095, "step": 317250 }, { "epoch": 123.21, "learning_rate": 3.572297734627832e-06, "loss": 0.0221, "step": 317260 }, { "epoch": 123.21, "learning_rate": 3.5717799352750807e-06, "loss": 0.1134, "step": 317270 }, { "epoch": 123.22, "learning_rate": 3.5712621359223303e-06, "loss": 0.0021, "step": 317280 }, { "epoch": 123.22, "learning_rate": 3.570744336569579e-06, "loss": 0.0702, "step": 317290 }, { "epoch": 123.22, "learning_rate": 3.5702265372168287e-06, "loss": 0.0088, "step": 317300 }, { "epoch": 123.23, "learning_rate": 3.5697087378640784e-06, "loss": 0.0288, "step": 317310 }, { "epoch": 123.23, "learning_rate": 3.569190938511327e-06, "loss": 0.0342, "step": 317320 }, { "epoch": 123.23, "learning_rate": 3.5686731391585767e-06, "loss": 0.0116, "step": 317330 }, { "epoch": 123.24, "learning_rate": 3.5681553398058255e-06, "loss": 0.0238, "step": 317340 }, { "epoch": 123.24, "learning_rate": 3.567637540453075e-06, "loss": 0.0198, "step": 317350 }, { "epoch": 123.25, "learning_rate": 3.567119741100324e-06, "loss": 0.021, "step": 317360 }, { "epoch": 123.25, "learning_rate": 3.5666019417475735e-06, "loss": 0.1595, "step": 317370 }, { "epoch": 123.25, "learning_rate": 3.5660841423948223e-06, "loss": 0.0561, "step": 317380 }, { "epoch": 123.26, "learning_rate": 3.565566343042072e-06, "loss": 0.0111, "step": 317390 }, { "epoch": 123.26, "learning_rate": 3.5650485436893207e-06, "loss": 0.0437, "step": 317400 }, { "epoch": 123.27, "learning_rate": 3.5645307443365703e-06, "loss": 0.0625, "step": 317410 }, { "epoch": 123.27, "learning_rate": 3.564012944983819e-06, "loss": 0.0165, "step": 317420 }, { "epoch": 123.27, "learning_rate": 3.5634951456310683e-06, "loss": 0.0003, "step": 317430 }, { "epoch": 123.28, "learning_rate": 3.5629773462783175e-06, "loss": 0.0901, "step": 317440 }, { "epoch": 123.28, "learning_rate": 3.5624595469255667e-06, "loss": 0.0743, "step": 317450 }, { "epoch": 123.29, "learning_rate": 3.561941747572816e-06, "loss": 0.0276, "step": 317460 }, { "epoch": 123.29, "learning_rate": 3.561423948220065e-06, "loss": 0.0001, "step": 317470 }, { "epoch": 123.29, "learning_rate": 3.5609061488673142e-06, "loss": 0.0013, "step": 317480 }, { "epoch": 123.3, "learning_rate": 3.5603883495145634e-06, "loss": 0.111, "step": 317490 }, { "epoch": 123.3, "learning_rate": 3.5598705501618126e-06, "loss": 0.0225, "step": 317500 }, { "epoch": 123.3, "learning_rate": 3.559352750809062e-06, "loss": 0.097, "step": 317510 }, { "epoch": 123.31, "learning_rate": 3.558834951456311e-06, "loss": 0.1888, "step": 317520 }, { "epoch": 123.31, "learning_rate": 3.55831715210356e-06, "loss": 0.0449, "step": 317530 }, { "epoch": 123.32, "learning_rate": 3.5577993527508094e-06, "loss": 0.0109, "step": 317540 }, { "epoch": 123.32, "learning_rate": 3.5572815533980586e-06, "loss": 0.0004, "step": 317550 }, { "epoch": 123.32, "learning_rate": 3.556763754045308e-06, "loss": 0.0896, "step": 317560 }, { "epoch": 123.33, "learning_rate": 3.556245954692557e-06, "loss": 0.1412, "step": 317570 }, { "epoch": 123.33, "learning_rate": 3.555728155339806e-06, "loss": 0.0006, "step": 317580 }, { "epoch": 123.34, "learning_rate": 3.5552103559870554e-06, "loss": 0.0707, "step": 317590 }, { "epoch": 123.34, "learning_rate": 3.554692556634304e-06, "loss": 0.1362, "step": 317600 }, { "epoch": 123.34, "learning_rate": 3.5541747572815538e-06, "loss": 0.0007, "step": 317610 }, { "epoch": 123.35, "learning_rate": 3.5536569579288025e-06, "loss": 0.0328, "step": 317620 }, { "epoch": 123.35, "learning_rate": 3.553139158576052e-06, "loss": 0.0694, "step": 317630 }, { "epoch": 123.36, "learning_rate": 3.552621359223301e-06, "loss": 0.0547, "step": 317640 }, { "epoch": 123.36, "learning_rate": 3.5521035598705505e-06, "loss": 0.1061, "step": 317650 }, { "epoch": 123.36, "learning_rate": 3.5515857605177993e-06, "loss": 0.0155, "step": 317660 }, { "epoch": 123.37, "learning_rate": 3.551067961165049e-06, "loss": 0.0269, "step": 317670 }, { "epoch": 123.37, "learning_rate": 3.5505501618122977e-06, "loss": 0.0542, "step": 317680 }, { "epoch": 123.37, "learning_rate": 3.5500323624595473e-06, "loss": 0.0712, "step": 317690 }, { "epoch": 123.38, "learning_rate": 3.549514563106796e-06, "loss": 0.0159, "step": 317700 }, { "epoch": 123.38, "learning_rate": 3.5489967637540457e-06, "loss": 0.0453, "step": 317710 }, { "epoch": 123.39, "learning_rate": 3.5484789644012945e-06, "loss": 0.0125, "step": 317720 }, { "epoch": 123.39, "learning_rate": 3.547961165048544e-06, "loss": 0.0162, "step": 317730 }, { "epoch": 123.39, "learning_rate": 3.547443365695793e-06, "loss": 0.0676, "step": 317740 }, { "epoch": 123.4, "learning_rate": 3.5469255663430425e-06, "loss": 0.0096, "step": 317750 }, { "epoch": 123.4, "learning_rate": 3.5464077669902913e-06, "loss": 0.0646, "step": 317760 }, { "epoch": 123.41, "learning_rate": 3.545889967637541e-06, "loss": 0.0001, "step": 317770 }, { "epoch": 123.41, "learning_rate": 3.5453721682847896e-06, "loss": 0.0344, "step": 317780 }, { "epoch": 123.41, "learning_rate": 3.5448543689320393e-06, "loss": 0.0432, "step": 317790 }, { "epoch": 123.42, "learning_rate": 3.544336569579288e-06, "loss": 0.0728, "step": 317800 }, { "epoch": 123.42, "learning_rate": 3.5438187702265377e-06, "loss": 0.0018, "step": 317810 }, { "epoch": 123.43, "learning_rate": 3.5433009708737864e-06, "loss": 0.0442, "step": 317820 }, { "epoch": 123.43, "learning_rate": 3.542783171521036e-06, "loss": 0.0829, "step": 317830 }, { "epoch": 123.43, "learning_rate": 3.542265372168285e-06, "loss": 0.1556, "step": 317840 }, { "epoch": 123.44, "learning_rate": 3.5417475728155344e-06, "loss": 0.0182, "step": 317850 }, { "epoch": 123.44, "learning_rate": 3.5412297734627836e-06, "loss": 0.0107, "step": 317860 }, { "epoch": 123.44, "learning_rate": 3.540711974110033e-06, "loss": 0.0266, "step": 317870 }, { "epoch": 123.45, "learning_rate": 3.540194174757282e-06, "loss": 0.017, "step": 317880 }, { "epoch": 123.45, "learning_rate": 3.539676375404531e-06, "loss": 0.039, "step": 317890 }, { "epoch": 123.46, "learning_rate": 3.5391585760517804e-06, "loss": 0.0427, "step": 317900 }, { "epoch": 123.46, "learning_rate": 3.5386407766990296e-06, "loss": 0.0872, "step": 317910 }, { "epoch": 123.46, "learning_rate": 3.538122977346279e-06, "loss": 0.0233, "step": 317920 }, { "epoch": 123.47, "learning_rate": 3.5376051779935276e-06, "loss": 0.0078, "step": 317930 }, { "epoch": 123.47, "learning_rate": 3.537087378640777e-06, "loss": 0.084, "step": 317940 }, { "epoch": 123.48, "learning_rate": 3.536569579288026e-06, "loss": 0.015, "step": 317950 }, { "epoch": 123.48, "learning_rate": 3.5360517799352756e-06, "loss": 0.0328, "step": 317960 }, { "epoch": 123.48, "learning_rate": 3.5355339805825243e-06, "loss": 0.043, "step": 317970 }, { "epoch": 123.49, "learning_rate": 3.535016181229774e-06, "loss": 0.0176, "step": 317980 }, { "epoch": 123.49, "learning_rate": 3.5344983818770227e-06, "loss": 0.2075, "step": 317990 }, { "epoch": 123.5, "learning_rate": 3.5339805825242724e-06, "loss": 0.0588, "step": 318000 }, { "epoch": 123.5, "learning_rate": 3.533462783171521e-06, "loss": 0.0772, "step": 318010 }, { "epoch": 123.5, "learning_rate": 3.5329449838187707e-06, "loss": 0.0001, "step": 318020 }, { "epoch": 123.51, "learning_rate": 3.5324271844660195e-06, "loss": 0.0535, "step": 318030 }, { "epoch": 123.51, "learning_rate": 3.531909385113269e-06, "loss": 0.0003, "step": 318040 }, { "epoch": 123.51, "learning_rate": 3.531391585760518e-06, "loss": 0.0499, "step": 318050 }, { "epoch": 123.52, "learning_rate": 3.5308737864077675e-06, "loss": 0.0365, "step": 318060 }, { "epoch": 123.52, "learning_rate": 3.5303559870550163e-06, "loss": 0.0564, "step": 318070 }, { "epoch": 123.53, "learning_rate": 3.529838187702266e-06, "loss": 0.0973, "step": 318080 }, { "epoch": 123.53, "learning_rate": 3.5293203883495147e-06, "loss": 0.0005, "step": 318090 }, { "epoch": 123.53, "learning_rate": 3.5288025889967643e-06, "loss": 0.0489, "step": 318100 }, { "epoch": 123.54, "learning_rate": 3.528284789644013e-06, "loss": 0.0554, "step": 318110 }, { "epoch": 123.54, "learning_rate": 3.5277669902912627e-06, "loss": 0.0024, "step": 318120 }, { "epoch": 123.55, "learning_rate": 3.5272491909385115e-06, "loss": 0.0303, "step": 318130 }, { "epoch": 123.55, "learning_rate": 3.526731391585761e-06, "loss": 0.0162, "step": 318140 }, { "epoch": 123.55, "learning_rate": 3.52621359223301e-06, "loss": 0.0212, "step": 318150 }, { "epoch": 123.56, "learning_rate": 3.5256957928802595e-06, "loss": 0.0789, "step": 318160 }, { "epoch": 123.56, "learning_rate": 3.5251779935275082e-06, "loss": 0.0001, "step": 318170 }, { "epoch": 123.57, "learning_rate": 3.524660194174758e-06, "loss": 0.0026, "step": 318180 }, { "epoch": 123.57, "learning_rate": 3.5241423948220066e-06, "loss": 0.0527, "step": 318190 }, { "epoch": 123.57, "learning_rate": 3.5236245954692562e-06, "loss": 0.0264, "step": 318200 }, { "epoch": 123.58, "learning_rate": 3.523106796116505e-06, "loss": 0.1864, "step": 318210 }, { "epoch": 123.58, "learning_rate": 3.5225889967637546e-06, "loss": 0.0587, "step": 318220 }, { "epoch": 123.58, "learning_rate": 3.5220711974110034e-06, "loss": 0.0845, "step": 318230 }, { "epoch": 123.59, "learning_rate": 3.5215533980582526e-06, "loss": 0.0119, "step": 318240 }, { "epoch": 123.59, "learning_rate": 3.521035598705502e-06, "loss": 0.108, "step": 318250 }, { "epoch": 123.6, "learning_rate": 3.520517799352751e-06, "loss": 0.0413, "step": 318260 }, { "epoch": 123.6, "learning_rate": 3.52e-06, "loss": 0.0004, "step": 318270 }, { "epoch": 123.6, "learning_rate": 3.5194822006472494e-06, "loss": 0.0395, "step": 318280 }, { "epoch": 123.61, "learning_rate": 3.5189644012944986e-06, "loss": 0.0508, "step": 318290 }, { "epoch": 123.61, "learning_rate": 3.5184466019417478e-06, "loss": 0.142, "step": 318300 }, { "epoch": 123.62, "learning_rate": 3.517928802588997e-06, "loss": 0.0525, "step": 318310 }, { "epoch": 123.62, "learning_rate": 3.517411003236246e-06, "loss": 0.0096, "step": 318320 }, { "epoch": 123.62, "learning_rate": 3.5168932038834953e-06, "loss": 0.0316, "step": 318330 }, { "epoch": 123.63, "learning_rate": 3.5163754045307445e-06, "loss": 0.062, "step": 318340 }, { "epoch": 123.63, "learning_rate": 3.5158576051779937e-06, "loss": 0.0132, "step": 318350 }, { "epoch": 123.63, "learning_rate": 3.515339805825243e-06, "loss": 0.0699, "step": 318360 }, { "epoch": 123.64, "learning_rate": 3.514822006472492e-06, "loss": 0.0002, "step": 318370 }, { "epoch": 123.64, "learning_rate": 3.5143042071197413e-06, "loss": 0.1603, "step": 318380 }, { "epoch": 123.65, "learning_rate": 3.5137864077669905e-06, "loss": 0.0957, "step": 318390 }, { "epoch": 123.65, "learning_rate": 3.5132686084142397e-06, "loss": 0.0017, "step": 318400 }, { "epoch": 123.65, "learning_rate": 3.5127508090614885e-06, "loss": 0.0007, "step": 318410 }, { "epoch": 123.66, "learning_rate": 3.512233009708738e-06, "loss": 0.0312, "step": 318420 }, { "epoch": 123.66, "learning_rate": 3.5117152103559877e-06, "loss": 0.0444, "step": 318430 }, { "epoch": 123.67, "learning_rate": 3.5111974110032365e-06, "loss": 0.0668, "step": 318440 }, { "epoch": 123.67, "learning_rate": 3.510679611650486e-06, "loss": 0.0048, "step": 318450 }, { "epoch": 123.67, "learning_rate": 3.510161812297735e-06, "loss": 0.0094, "step": 318460 }, { "epoch": 123.68, "learning_rate": 3.5096440129449845e-06, "loss": 0.0759, "step": 318470 }, { "epoch": 123.68, "learning_rate": 3.5091262135922333e-06, "loss": 0.0036, "step": 318480 }, { "epoch": 123.69, "learning_rate": 3.508608414239483e-06, "loss": 0.0075, "step": 318490 }, { "epoch": 123.69, "learning_rate": 3.5080906148867317e-06, "loss": 0.0186, "step": 318500 }, { "epoch": 123.69, "learning_rate": 3.5075728155339813e-06, "loss": 0.0545, "step": 318510 }, { "epoch": 123.7, "learning_rate": 3.50705501618123e-06, "loss": 0.0018, "step": 318520 }, { "epoch": 123.7, "learning_rate": 3.5065372168284797e-06, "loss": 0.0163, "step": 318530 }, { "epoch": 123.7, "learning_rate": 3.5060194174757284e-06, "loss": 0.0001, "step": 318540 }, { "epoch": 123.71, "learning_rate": 3.505501618122978e-06, "loss": 0.0522, "step": 318550 }, { "epoch": 123.71, "learning_rate": 3.504983818770227e-06, "loss": 0.0415, "step": 318560 }, { "epoch": 123.72, "learning_rate": 3.504466019417476e-06, "loss": 0.0084, "step": 318570 }, { "epoch": 123.72, "learning_rate": 3.503948220064725e-06, "loss": 0.0713, "step": 318580 }, { "epoch": 123.72, "learning_rate": 3.5034304207119744e-06, "loss": 0.0753, "step": 318590 }, { "epoch": 123.73, "learning_rate": 3.5029126213592236e-06, "loss": 0.0001, "step": 318600 }, { "epoch": 123.73, "learning_rate": 3.502394822006473e-06, "loss": 0.0213, "step": 318610 }, { "epoch": 123.74, "learning_rate": 3.501877022653722e-06, "loss": 0.0455, "step": 318620 }, { "epoch": 123.74, "learning_rate": 3.501359223300971e-06, "loss": 0.0529, "step": 318630 }, { "epoch": 123.74, "learning_rate": 3.5008414239482204e-06, "loss": 0.0635, "step": 318640 }, { "epoch": 123.75, "learning_rate": 3.5003236245954696e-06, "loss": 0.0387, "step": 318650 }, { "epoch": 123.75, "learning_rate": 3.4998058252427188e-06, "loss": 0.026, "step": 318660 }, { "epoch": 123.76, "learning_rate": 3.499288025889968e-06, "loss": 0.0945, "step": 318670 }, { "epoch": 123.76, "learning_rate": 3.498770226537217e-06, "loss": 0.0741, "step": 318680 }, { "epoch": 123.76, "learning_rate": 3.4982524271844663e-06, "loss": 0.0677, "step": 318690 }, { "epoch": 123.77, "learning_rate": 3.4977346278317155e-06, "loss": 0.0444, "step": 318700 }, { "epoch": 123.77, "learning_rate": 3.4972168284789647e-06, "loss": 0.0035, "step": 318710 }, { "epoch": 123.77, "learning_rate": 3.496699029126214e-06, "loss": 0.0592, "step": 318720 }, { "epoch": 123.78, "learning_rate": 3.496181229773463e-06, "loss": 0.0028, "step": 318730 }, { "epoch": 123.78, "learning_rate": 3.495663430420712e-06, "loss": 0.0222, "step": 318740 }, { "epoch": 123.79, "learning_rate": 3.4951456310679615e-06, "loss": 0.0967, "step": 318750 }, { "epoch": 123.79, "learning_rate": 3.4946278317152103e-06, "loss": 0.0095, "step": 318760 }, { "epoch": 123.79, "learning_rate": 3.49411003236246e-06, "loss": 0.0759, "step": 318770 }, { "epoch": 123.8, "learning_rate": 3.4935922330097087e-06, "loss": 0.0267, "step": 318780 }, { "epoch": 123.8, "learning_rate": 3.4930744336569583e-06, "loss": 0.0633, "step": 318790 }, { "epoch": 123.81, "learning_rate": 3.492556634304207e-06, "loss": 0.0425, "step": 318800 }, { "epoch": 123.81, "learning_rate": 3.4920388349514567e-06, "loss": 0.0419, "step": 318810 }, { "epoch": 123.81, "learning_rate": 3.4915210355987055e-06, "loss": 0.1061, "step": 318820 }, { "epoch": 123.82, "learning_rate": 3.491003236245955e-06, "loss": 0.0313, "step": 318830 }, { "epoch": 123.82, "learning_rate": 3.490485436893204e-06, "loss": 0.0105, "step": 318840 }, { "epoch": 123.83, "learning_rate": 3.4899676375404535e-06, "loss": 0.0735, "step": 318850 }, { "epoch": 123.83, "learning_rate": 3.4894498381877022e-06, "loss": 0.0757, "step": 318860 }, { "epoch": 123.83, "learning_rate": 3.488932038834952e-06, "loss": 0.0098, "step": 318870 }, { "epoch": 123.84, "learning_rate": 3.4884142394822006e-06, "loss": 0.0125, "step": 318880 }, { "epoch": 123.84, "learning_rate": 3.4878964401294502e-06, "loss": 0.1016, "step": 318890 }, { "epoch": 123.84, "learning_rate": 3.487378640776699e-06, "loss": 0.0127, "step": 318900 }, { "epoch": 123.85, "learning_rate": 3.4868608414239486e-06, "loss": 0.0888, "step": 318910 }, { "epoch": 123.85, "learning_rate": 3.4863430420711974e-06, "loss": 0.0132, "step": 318920 }, { "epoch": 123.86, "learning_rate": 3.485825242718447e-06, "loss": 0.0527, "step": 318930 }, { "epoch": 123.86, "learning_rate": 3.485307443365696e-06, "loss": 0.0887, "step": 318940 }, { "epoch": 123.86, "learning_rate": 3.4847896440129454e-06, "loss": 0.0239, "step": 318950 }, { "epoch": 123.87, "learning_rate": 3.484271844660194e-06, "loss": 0.1479, "step": 318960 }, { "epoch": 123.87, "learning_rate": 3.483754045307444e-06, "loss": 0.0782, "step": 318970 }, { "epoch": 123.88, "learning_rate": 3.483236245954693e-06, "loss": 0.0683, "step": 318980 }, { "epoch": 123.88, "learning_rate": 3.482718446601942e-06, "loss": 0.0387, "step": 318990 }, { "epoch": 123.88, "learning_rate": 3.4822006472491914e-06, "loss": 0.1402, "step": 319000 }, { "epoch": 123.89, "learning_rate": 3.4816828478964406e-06, "loss": 0.0204, "step": 319010 }, { "epoch": 123.89, "learning_rate": 3.4811650485436898e-06, "loss": 0.0536, "step": 319020 }, { "epoch": 123.9, "learning_rate": 3.480647249190939e-06, "loss": 0.2267, "step": 319030 }, { "epoch": 123.9, "learning_rate": 3.480129449838188e-06, "loss": 0.1253, "step": 319040 }, { "epoch": 123.9, "learning_rate": 3.4796116504854374e-06, "loss": 0.062, "step": 319050 }, { "epoch": 123.91, "learning_rate": 3.4790938511326865e-06, "loss": 0.0487, "step": 319060 }, { "epoch": 123.91, "learning_rate": 3.4785760517799353e-06, "loss": 0.0463, "step": 319070 }, { "epoch": 123.91, "learning_rate": 3.478058252427185e-06, "loss": 0.1364, "step": 319080 }, { "epoch": 123.92, "learning_rate": 3.4775404530744337e-06, "loss": 0.0268, "step": 319090 }, { "epoch": 123.92, "learning_rate": 3.4770226537216833e-06, "loss": 0.1199, "step": 319100 }, { "epoch": 123.93, "learning_rate": 3.476504854368932e-06, "loss": 0.0428, "step": 319110 }, { "epoch": 123.93, "learning_rate": 3.4759870550161817e-06, "loss": 0.0361, "step": 319120 }, { "epoch": 123.93, "learning_rate": 3.4754692556634305e-06, "loss": 0.1658, "step": 319130 }, { "epoch": 123.94, "learning_rate": 3.47495145631068e-06, "loss": 0.0894, "step": 319140 }, { "epoch": 123.94, "learning_rate": 3.474433656957929e-06, "loss": 0.0214, "step": 319150 }, { "epoch": 123.95, "learning_rate": 3.4739158576051785e-06, "loss": 0.0004, "step": 319160 }, { "epoch": 123.95, "learning_rate": 3.4733980582524273e-06, "loss": 0.0109, "step": 319170 }, { "epoch": 123.95, "learning_rate": 3.472880258899677e-06, "loss": 0.1144, "step": 319180 }, { "epoch": 123.96, "learning_rate": 3.4723624595469257e-06, "loss": 0.0192, "step": 319190 }, { "epoch": 123.96, "learning_rate": 3.4718446601941753e-06, "loss": 0.0205, "step": 319200 }, { "epoch": 123.97, "learning_rate": 3.471326860841424e-06, "loss": 0.0123, "step": 319210 }, { "epoch": 123.97, "learning_rate": 3.4708090614886737e-06, "loss": 0.0533, "step": 319220 }, { "epoch": 123.97, "learning_rate": 3.4702912621359224e-06, "loss": 0.115, "step": 319230 }, { "epoch": 123.98, "learning_rate": 3.469773462783172e-06, "loss": 0.0296, "step": 319240 }, { "epoch": 123.98, "learning_rate": 3.469255663430421e-06, "loss": 0.0007, "step": 319250 }, { "epoch": 123.98, "learning_rate": 3.4687378640776704e-06, "loss": 0.0675, "step": 319260 }, { "epoch": 123.99, "learning_rate": 3.468220064724919e-06, "loss": 0.0077, "step": 319270 }, { "epoch": 123.99, "learning_rate": 3.467702265372169e-06, "loss": 0.0023, "step": 319280 }, { "epoch": 124.0, "learning_rate": 3.4671844660194176e-06, "loss": 0.0178, "step": 319290 }, { "epoch": 124.0, "learning_rate": 3.4666666666666672e-06, "loss": 0.0249, "step": 319300 }, { "epoch": 124.0, "eval_accuracy": 0.949656121045392, "eval_loss": 0.374478280544281, "eval_runtime": 8.2186, "eval_samples_per_second": 442.287, "eval_steps_per_second": 55.362, "step": 319300 }, { "epoch": 124.0, "learning_rate": 3.466148867313916e-06, "loss": 0.0547, "step": 319310 }, { "epoch": 124.01, "learning_rate": 3.4656310679611656e-06, "loss": 0.0917, "step": 319320 }, { "epoch": 124.01, "learning_rate": 3.4651132686084144e-06, "loss": 0.0216, "step": 319330 }, { "epoch": 124.02, "learning_rate": 3.464595469255664e-06, "loss": 0.0197, "step": 319340 }, { "epoch": 124.02, "learning_rate": 3.4640776699029128e-06, "loss": 0.0001, "step": 319350 }, { "epoch": 124.02, "learning_rate": 3.4635598705501624e-06, "loss": 0.0863, "step": 319360 }, { "epoch": 124.03, "learning_rate": 3.463042071197411e-06, "loss": 0.1096, "step": 319370 }, { "epoch": 124.03, "learning_rate": 3.4625242718446603e-06, "loss": 0.0249, "step": 319380 }, { "epoch": 124.03, "learning_rate": 3.4620064724919095e-06, "loss": 0.1369, "step": 319390 }, { "epoch": 124.04, "learning_rate": 3.4614886731391587e-06, "loss": 0.067, "step": 319400 }, { "epoch": 124.04, "learning_rate": 3.460970873786408e-06, "loss": 0.0003, "step": 319410 }, { "epoch": 124.05, "learning_rate": 3.460453074433657e-06, "loss": 0.0479, "step": 319420 }, { "epoch": 124.05, "learning_rate": 3.4599352750809063e-06, "loss": 0.019, "step": 319430 }, { "epoch": 124.05, "learning_rate": 3.4594174757281555e-06, "loss": 0.0188, "step": 319440 }, { "epoch": 124.06, "learning_rate": 3.4588996763754047e-06, "loss": 0.0267, "step": 319450 }, { "epoch": 124.06, "learning_rate": 3.458381877022654e-06, "loss": 0.0855, "step": 319460 }, { "epoch": 124.07, "learning_rate": 3.457864077669903e-06, "loss": 0.0107, "step": 319470 }, { "epoch": 124.07, "learning_rate": 3.4573462783171523e-06, "loss": 0.0086, "step": 319480 }, { "epoch": 124.07, "learning_rate": 3.4568284789644015e-06, "loss": 0.0535, "step": 319490 }, { "epoch": 124.08, "learning_rate": 3.4563106796116507e-06, "loss": 0.0216, "step": 319500 }, { "epoch": 124.08, "learning_rate": 3.4557928802589e-06, "loss": 0.1156, "step": 319510 }, { "epoch": 124.09, "learning_rate": 3.455275080906149e-06, "loss": 0.0081, "step": 319520 }, { "epoch": 124.09, "learning_rate": 3.4547572815533983e-06, "loss": 0.0687, "step": 319530 }, { "epoch": 124.09, "learning_rate": 3.4542394822006475e-06, "loss": 0.0574, "step": 319540 }, { "epoch": 124.1, "learning_rate": 3.453721682847897e-06, "loss": 0.0308, "step": 319550 }, { "epoch": 124.1, "learning_rate": 3.453203883495146e-06, "loss": 0.0266, "step": 319560 }, { "epoch": 124.1, "learning_rate": 3.4526860841423955e-06, "loss": 0.0805, "step": 319570 }, { "epoch": 124.11, "learning_rate": 3.4521682847896442e-06, "loss": 0.0002, "step": 319580 }, { "epoch": 124.11, "learning_rate": 3.451650485436894e-06, "loss": 0.0162, "step": 319590 }, { "epoch": 124.12, "learning_rate": 3.4511326860841426e-06, "loss": 0.1626, "step": 319600 }, { "epoch": 124.12, "learning_rate": 3.4506148867313922e-06, "loss": 0.0567, "step": 319610 }, { "epoch": 124.12, "learning_rate": 3.450097087378641e-06, "loss": 0.001, "step": 319620 }, { "epoch": 124.13, "learning_rate": 3.4495792880258906e-06, "loss": 0.0685, "step": 319630 }, { "epoch": 124.13, "learning_rate": 3.4490614886731394e-06, "loss": 0.0169, "step": 319640 }, { "epoch": 124.14, "learning_rate": 3.448543689320389e-06, "loss": 0.1052, "step": 319650 }, { "epoch": 124.14, "learning_rate": 3.448025889967638e-06, "loss": 0.0589, "step": 319660 }, { "epoch": 124.14, "learning_rate": 3.4475080906148874e-06, "loss": 0.0113, "step": 319670 }, { "epoch": 124.15, "learning_rate": 3.446990291262136e-06, "loss": 0.074, "step": 319680 }, { "epoch": 124.15, "learning_rate": 3.446472491909386e-06, "loss": 0.0452, "step": 319690 }, { "epoch": 124.16, "learning_rate": 3.4459546925566346e-06, "loss": 0.0344, "step": 319700 }, { "epoch": 124.16, "learning_rate": 3.4454368932038838e-06, "loss": 0.0136, "step": 319710 }, { "epoch": 124.16, "learning_rate": 3.444919093851133e-06, "loss": 0.0001, "step": 319720 }, { "epoch": 124.17, "learning_rate": 3.444401294498382e-06, "loss": 0.0251, "step": 319730 }, { "epoch": 124.17, "learning_rate": 3.4438834951456314e-06, "loss": 0.0336, "step": 319740 }, { "epoch": 124.17, "learning_rate": 3.4433656957928805e-06, "loss": 0.07, "step": 319750 }, { "epoch": 124.18, "learning_rate": 3.4428478964401297e-06, "loss": 0.02, "step": 319760 }, { "epoch": 124.18, "learning_rate": 3.442330097087379e-06, "loss": 0.0474, "step": 319770 }, { "epoch": 124.19, "learning_rate": 3.441812297734628e-06, "loss": 0.0027, "step": 319780 }, { "epoch": 124.19, "learning_rate": 3.4412944983818773e-06, "loss": 0.1075, "step": 319790 }, { "epoch": 124.19, "learning_rate": 3.4407766990291265e-06, "loss": 0.078, "step": 319800 }, { "epoch": 124.2, "learning_rate": 3.4402588996763757e-06, "loss": 0.0882, "step": 319810 }, { "epoch": 124.2, "learning_rate": 3.439741100323625e-06, "loss": 0.0886, "step": 319820 }, { "epoch": 124.21, "learning_rate": 3.439223300970874e-06, "loss": 0.0208, "step": 319830 }, { "epoch": 124.21, "learning_rate": 3.4387055016181233e-06, "loss": 0.0111, "step": 319840 }, { "epoch": 124.21, "learning_rate": 3.4381877022653725e-06, "loss": 0.0096, "step": 319850 }, { "epoch": 124.22, "learning_rate": 3.4376699029126217e-06, "loss": 0.009, "step": 319860 }, { "epoch": 124.22, "learning_rate": 3.437152103559871e-06, "loss": 0.0862, "step": 319870 }, { "epoch": 124.23, "learning_rate": 3.4366343042071197e-06, "loss": 0.0001, "step": 319880 }, { "epoch": 124.23, "learning_rate": 3.4361165048543693e-06, "loss": 0.0777, "step": 319890 }, { "epoch": 124.23, "learning_rate": 3.435598705501618e-06, "loss": 0.1183, "step": 319900 }, { "epoch": 124.24, "learning_rate": 3.4350809061488677e-06, "loss": 0.0005, "step": 319910 }, { "epoch": 124.24, "learning_rate": 3.4345631067961164e-06, "loss": 0.014, "step": 319920 }, { "epoch": 124.24, "learning_rate": 3.434045307443366e-06, "loss": 0.0024, "step": 319930 }, { "epoch": 124.25, "learning_rate": 3.433527508090615e-06, "loss": 0.0023, "step": 319940 }, { "epoch": 124.25, "learning_rate": 3.4330097087378644e-06, "loss": 0.0527, "step": 319950 }, { "epoch": 124.26, "learning_rate": 3.432491909385113e-06, "loss": 0.0414, "step": 319960 }, { "epoch": 124.26, "learning_rate": 3.431974110032363e-06, "loss": 0.014, "step": 319970 }, { "epoch": 124.26, "learning_rate": 3.4314563106796116e-06, "loss": 0.0456, "step": 319980 }, { "epoch": 124.27, "learning_rate": 3.4309385113268612e-06, "loss": 0.0304, "step": 319990 }, { "epoch": 124.27, "learning_rate": 3.43042071197411e-06, "loss": 0.033, "step": 320000 }, { "epoch": 124.28, "learning_rate": 3.4299029126213596e-06, "loss": 0.0101, "step": 320010 }, { "epoch": 124.28, "learning_rate": 3.4293851132686084e-06, "loss": 0.012, "step": 320020 }, { "epoch": 124.28, "learning_rate": 3.428867313915858e-06, "loss": 0.0148, "step": 320030 }, { "epoch": 124.29, "learning_rate": 3.4283495145631068e-06, "loss": 0.0297, "step": 320040 }, { "epoch": 124.29, "learning_rate": 3.4278317152103564e-06, "loss": 0.0123, "step": 320050 }, { "epoch": 124.3, "learning_rate": 3.427313915857605e-06, "loss": 0.0015, "step": 320060 }, { "epoch": 124.3, "learning_rate": 3.4267961165048548e-06, "loss": 0.0101, "step": 320070 }, { "epoch": 124.3, "learning_rate": 3.4262783171521035e-06, "loss": 0.0647, "step": 320080 }, { "epoch": 124.31, "learning_rate": 3.425760517799353e-06, "loss": 0.074, "step": 320090 }, { "epoch": 124.31, "learning_rate": 3.4252427184466024e-06, "loss": 0.115, "step": 320100 }, { "epoch": 124.31, "learning_rate": 3.4247249190938515e-06, "loss": 0.0588, "step": 320110 }, { "epoch": 124.32, "learning_rate": 3.4242071197411007e-06, "loss": 0.0013, "step": 320120 }, { "epoch": 124.32, "learning_rate": 3.42368932038835e-06, "loss": 0.0559, "step": 320130 }, { "epoch": 124.33, "learning_rate": 3.423171521035599e-06, "loss": 0.0351, "step": 320140 }, { "epoch": 124.33, "learning_rate": 3.4226537216828483e-06, "loss": 0.0612, "step": 320150 }, { "epoch": 124.33, "learning_rate": 3.4221359223300975e-06, "loss": 0.0215, "step": 320160 }, { "epoch": 124.34, "learning_rate": 3.4216181229773467e-06, "loss": 0.0764, "step": 320170 }, { "epoch": 124.34, "learning_rate": 3.421100323624596e-06, "loss": 0.0405, "step": 320180 }, { "epoch": 124.35, "learning_rate": 3.420582524271845e-06, "loss": 0.0486, "step": 320190 }, { "epoch": 124.35, "learning_rate": 3.4200647249190943e-06, "loss": 0.0172, "step": 320200 }, { "epoch": 124.35, "learning_rate": 3.419546925566343e-06, "loss": 0.0335, "step": 320210 }, { "epoch": 124.36, "learning_rate": 3.4190291262135927e-06, "loss": 0.0969, "step": 320220 }, { "epoch": 124.36, "learning_rate": 3.4185113268608415e-06, "loss": 0.0925, "step": 320230 }, { "epoch": 124.37, "learning_rate": 3.417993527508091e-06, "loss": 0.0152, "step": 320240 }, { "epoch": 124.37, "learning_rate": 3.41747572815534e-06, "loss": 0.0004, "step": 320250 }, { "epoch": 124.37, "learning_rate": 3.4169579288025895e-06, "loss": 0.0975, "step": 320260 }, { "epoch": 124.38, "learning_rate": 3.4164401294498382e-06, "loss": 0.1043, "step": 320270 }, { "epoch": 124.38, "learning_rate": 3.415922330097088e-06, "loss": 0.0034, "step": 320280 }, { "epoch": 124.38, "learning_rate": 3.4154045307443366e-06, "loss": 0.0053, "step": 320290 }, { "epoch": 124.39, "learning_rate": 3.4148867313915862e-06, "loss": 0.0576, "step": 320300 }, { "epoch": 124.39, "learning_rate": 3.414368932038835e-06, "loss": 0.1428, "step": 320310 }, { "epoch": 124.4, "learning_rate": 3.4138511326860846e-06, "loss": 0.0374, "step": 320320 }, { "epoch": 124.4, "learning_rate": 3.4133333333333334e-06, "loss": 0.0278, "step": 320330 }, { "epoch": 124.4, "learning_rate": 3.412815533980583e-06, "loss": 0.0231, "step": 320340 }, { "epoch": 124.41, "learning_rate": 3.412297734627832e-06, "loss": 0.0917, "step": 320350 }, { "epoch": 124.41, "learning_rate": 3.4117799352750814e-06, "loss": 0.0413, "step": 320360 }, { "epoch": 124.42, "learning_rate": 3.41126213592233e-06, "loss": 0.0244, "step": 320370 }, { "epoch": 124.42, "learning_rate": 3.41074433656958e-06, "loss": 0.0259, "step": 320380 }, { "epoch": 124.42, "learning_rate": 3.4102265372168286e-06, "loss": 0.0002, "step": 320390 }, { "epoch": 124.43, "learning_rate": 3.409708737864078e-06, "loss": 0.0015, "step": 320400 }, { "epoch": 124.43, "learning_rate": 3.409190938511327e-06, "loss": 0.1017, "step": 320410 }, { "epoch": 124.43, "learning_rate": 3.4086731391585766e-06, "loss": 0.0257, "step": 320420 }, { "epoch": 124.44, "learning_rate": 3.4081553398058254e-06, "loss": 0.0107, "step": 320430 }, { "epoch": 124.44, "learning_rate": 3.407637540453075e-06, "loss": 0.0607, "step": 320440 }, { "epoch": 124.45, "learning_rate": 3.4071197411003237e-06, "loss": 0.0398, "step": 320450 }, { "epoch": 124.45, "learning_rate": 3.4066019417475734e-06, "loss": 0.1649, "step": 320460 }, { "epoch": 124.45, "learning_rate": 3.406084142394822e-06, "loss": 0.0442, "step": 320470 }, { "epoch": 124.46, "learning_rate": 3.4055663430420717e-06, "loss": 0.0238, "step": 320480 }, { "epoch": 124.46, "learning_rate": 3.4050485436893205e-06, "loss": 0.0103, "step": 320490 }, { "epoch": 124.47, "learning_rate": 3.40453074433657e-06, "loss": 0.0001, "step": 320500 }, { "epoch": 124.47, "learning_rate": 3.404012944983819e-06, "loss": 0.0298, "step": 320510 }, { "epoch": 124.47, "learning_rate": 3.403495145631068e-06, "loss": 0.0389, "step": 320520 }, { "epoch": 124.48, "learning_rate": 3.4029773462783173e-06, "loss": 0.0818, "step": 320530 }, { "epoch": 124.48, "learning_rate": 3.4024595469255665e-06, "loss": 0.0299, "step": 320540 }, { "epoch": 124.49, "learning_rate": 3.4019417475728157e-06, "loss": 0.0004, "step": 320550 }, { "epoch": 124.49, "learning_rate": 3.401423948220065e-06, "loss": 0.1104, "step": 320560 }, { "epoch": 124.49, "learning_rate": 3.400906148867314e-06, "loss": 0.045, "step": 320570 }, { "epoch": 124.5, "learning_rate": 3.4003883495145633e-06, "loss": 0.014, "step": 320580 }, { "epoch": 124.5, "learning_rate": 3.3998705501618125e-06, "loss": 0.0135, "step": 320590 }, { "epoch": 124.5, "learning_rate": 3.3993527508090617e-06, "loss": 0.0342, "step": 320600 }, { "epoch": 124.51, "learning_rate": 3.398834951456311e-06, "loss": 0.0204, "step": 320610 }, { "epoch": 124.51, "learning_rate": 3.39831715210356e-06, "loss": 0.029, "step": 320620 }, { "epoch": 124.52, "learning_rate": 3.3977993527508092e-06, "loss": 0.0709, "step": 320630 }, { "epoch": 124.52, "learning_rate": 3.3972815533980584e-06, "loss": 0.0016, "step": 320640 }, { "epoch": 124.52, "learning_rate": 3.3967637540453076e-06, "loss": 0.0376, "step": 320650 }, { "epoch": 124.53, "learning_rate": 3.396245954692557e-06, "loss": 0.0155, "step": 320660 }, { "epoch": 124.53, "learning_rate": 3.3957281553398064e-06, "loss": 0.1189, "step": 320670 }, { "epoch": 124.54, "learning_rate": 3.3952103559870552e-06, "loss": 0.0385, "step": 320680 }, { "epoch": 124.54, "learning_rate": 3.394692556634305e-06, "loss": 0.01, "step": 320690 }, { "epoch": 124.54, "learning_rate": 3.3941747572815536e-06, "loss": 0.0827, "step": 320700 }, { "epoch": 124.55, "learning_rate": 3.3936569579288032e-06, "loss": 0.0003, "step": 320710 }, { "epoch": 124.55, "learning_rate": 3.393139158576052e-06, "loss": 0.0001, "step": 320720 }, { "epoch": 124.56, "learning_rate": 3.3926213592233016e-06, "loss": 0.0503, "step": 320730 }, { "epoch": 124.56, "learning_rate": 3.3921035598705504e-06, "loss": 0.059, "step": 320740 }, { "epoch": 124.56, "learning_rate": 3.3915857605178e-06, "loss": 0.0001, "step": 320750 }, { "epoch": 124.57, "learning_rate": 3.3910679611650488e-06, "loss": 0.0709, "step": 320760 }, { "epoch": 124.57, "learning_rate": 3.3905501618122984e-06, "loss": 0.0277, "step": 320770 }, { "epoch": 124.57, "learning_rate": 3.390032362459547e-06, "loss": 0.0226, "step": 320780 }, { "epoch": 124.58, "learning_rate": 3.3895145631067968e-06, "loss": 0.2468, "step": 320790 }, { "epoch": 124.58, "learning_rate": 3.3889967637540455e-06, "loss": 0.0101, "step": 320800 }, { "epoch": 124.59, "learning_rate": 3.388478964401295e-06, "loss": 0.0213, "step": 320810 }, { "epoch": 124.59, "learning_rate": 3.387961165048544e-06, "loss": 0.0903, "step": 320820 }, { "epoch": 124.59, "learning_rate": 3.3874433656957936e-06, "loss": 0.0518, "step": 320830 }, { "epoch": 124.6, "learning_rate": 3.3869255663430423e-06, "loss": 0.0121, "step": 320840 }, { "epoch": 124.6, "learning_rate": 3.3864077669902915e-06, "loss": 0.2143, "step": 320850 }, { "epoch": 124.61, "learning_rate": 3.3858899676375407e-06, "loss": 0.009, "step": 320860 }, { "epoch": 124.61, "learning_rate": 3.38537216828479e-06, "loss": 0.0498, "step": 320870 }, { "epoch": 124.61, "learning_rate": 3.384854368932039e-06, "loss": 0.0527, "step": 320880 }, { "epoch": 124.62, "learning_rate": 3.3843365695792883e-06, "loss": 0.0207, "step": 320890 }, { "epoch": 124.62, "learning_rate": 3.3838187702265375e-06, "loss": 0.0429, "step": 320900 }, { "epoch": 124.63, "learning_rate": 3.3833009708737867e-06, "loss": 0.0477, "step": 320910 }, { "epoch": 124.63, "learning_rate": 3.382783171521036e-06, "loss": 0.0296, "step": 320920 }, { "epoch": 124.63, "learning_rate": 3.382265372168285e-06, "loss": 0.0028, "step": 320930 }, { "epoch": 124.64, "learning_rate": 3.3817475728155343e-06, "loss": 0.1008, "step": 320940 }, { "epoch": 124.64, "learning_rate": 3.3812297734627835e-06, "loss": 0.0509, "step": 320950 }, { "epoch": 124.64, "learning_rate": 3.3807119741100327e-06, "loss": 0.0525, "step": 320960 }, { "epoch": 124.65, "learning_rate": 3.380194174757282e-06, "loss": 0.0145, "step": 320970 }, { "epoch": 124.65, "learning_rate": 3.379676375404531e-06, "loss": 0.0732, "step": 320980 }, { "epoch": 124.66, "learning_rate": 3.3791585760517802e-06, "loss": 0.0128, "step": 320990 }, { "epoch": 124.66, "learning_rate": 3.3786407766990294e-06, "loss": 0.0216, "step": 321000 }, { "epoch": 124.66, "learning_rate": 3.3781229773462786e-06, "loss": 0.0091, "step": 321010 }, { "epoch": 124.67, "learning_rate": 3.3776051779935274e-06, "loss": 0.0005, "step": 321020 }, { "epoch": 124.67, "learning_rate": 3.377087378640777e-06, "loss": 0.0364, "step": 321030 }, { "epoch": 124.68, "learning_rate": 3.376569579288026e-06, "loss": 0.0116, "step": 321040 }, { "epoch": 124.68, "learning_rate": 3.3760517799352754e-06, "loss": 0.025, "step": 321050 }, { "epoch": 124.68, "learning_rate": 3.375533980582524e-06, "loss": 0.0177, "step": 321060 }, { "epoch": 124.69, "learning_rate": 3.375016181229774e-06, "loss": 0.1125, "step": 321070 }, { "epoch": 124.69, "learning_rate": 3.3744983818770226e-06, "loss": 0.0901, "step": 321080 }, { "epoch": 124.7, "learning_rate": 3.373980582524272e-06, "loss": 0.1173, "step": 321090 }, { "epoch": 124.7, "learning_rate": 3.373462783171521e-06, "loss": 0.0187, "step": 321100 }, { "epoch": 124.7, "learning_rate": 3.3729449838187706e-06, "loss": 0.0001, "step": 321110 }, { "epoch": 124.71, "learning_rate": 3.3724271844660193e-06, "loss": 0.0948, "step": 321120 }, { "epoch": 124.71, "learning_rate": 3.371909385113269e-06, "loss": 0.0004, "step": 321130 }, { "epoch": 124.71, "learning_rate": 3.3713915857605177e-06, "loss": 0.012, "step": 321140 }, { "epoch": 124.72, "learning_rate": 3.3708737864077674e-06, "loss": 0.0324, "step": 321150 }, { "epoch": 124.72, "learning_rate": 3.370355987055016e-06, "loss": 0.0644, "step": 321160 }, { "epoch": 124.73, "learning_rate": 3.3698381877022657e-06, "loss": 0.0949, "step": 321170 }, { "epoch": 124.73, "learning_rate": 3.3693203883495145e-06, "loss": 0.0143, "step": 321180 }, { "epoch": 124.73, "learning_rate": 3.368802588996764e-06, "loss": 0.0183, "step": 321190 }, { "epoch": 124.74, "learning_rate": 3.368284789644013e-06, "loss": 0.1957, "step": 321200 }, { "epoch": 124.74, "learning_rate": 3.3677669902912625e-06, "loss": 0.0198, "step": 321210 }, { "epoch": 124.75, "learning_rate": 3.3672491909385113e-06, "loss": 0.0886, "step": 321220 }, { "epoch": 124.75, "learning_rate": 3.366731391585761e-06, "loss": 0.0116, "step": 321230 }, { "epoch": 124.75, "learning_rate": 3.36621359223301e-06, "loss": 0.0197, "step": 321240 }, { "epoch": 124.76, "learning_rate": 3.3656957928802593e-06, "loss": 0.1219, "step": 321250 }, { "epoch": 124.76, "learning_rate": 3.3651779935275085e-06, "loss": 0.0228, "step": 321260 }, { "epoch": 124.77, "learning_rate": 3.3646601941747577e-06, "loss": 0.1601, "step": 321270 }, { "epoch": 124.77, "learning_rate": 3.364142394822007e-06, "loss": 0.0879, "step": 321280 }, { "epoch": 124.77, "learning_rate": 3.363624595469256e-06, "loss": 0.0059, "step": 321290 }, { "epoch": 124.78, "learning_rate": 3.3631067961165053e-06, "loss": 0.0092, "step": 321300 }, { "epoch": 124.78, "learning_rate": 3.3625889967637545e-06, "loss": 0.0163, "step": 321310 }, { "epoch": 124.78, "learning_rate": 3.3620711974110037e-06, "loss": 0.1129, "step": 321320 }, { "epoch": 124.79, "learning_rate": 3.361553398058253e-06, "loss": 0.0921, "step": 321330 }, { "epoch": 124.79, "learning_rate": 3.361035598705502e-06, "loss": 0.0138, "step": 321340 }, { "epoch": 124.8, "learning_rate": 3.360517799352751e-06, "loss": 0.0241, "step": 321350 }, { "epoch": 124.8, "learning_rate": 3.3600000000000004e-06, "loss": 0.0897, "step": 321360 }, { "epoch": 124.8, "learning_rate": 3.3594822006472492e-06, "loss": 0.0926, "step": 321370 }, { "epoch": 124.81, "learning_rate": 3.358964401294499e-06, "loss": 0.0194, "step": 321380 }, { "epoch": 124.81, "learning_rate": 3.3584466019417476e-06, "loss": 0.0346, "step": 321390 }, { "epoch": 124.82, "learning_rate": 3.3579288025889972e-06, "loss": 0.013, "step": 321400 }, { "epoch": 124.82, "learning_rate": 3.357411003236246e-06, "loss": 0.0465, "step": 321410 }, { "epoch": 124.82, "learning_rate": 3.3568932038834956e-06, "loss": 0.0881, "step": 321420 }, { "epoch": 124.83, "learning_rate": 3.3563754045307444e-06, "loss": 0.0258, "step": 321430 }, { "epoch": 124.83, "learning_rate": 3.355857605177994e-06, "loss": 0.0103, "step": 321440 }, { "epoch": 124.83, "learning_rate": 3.3553398058252428e-06, "loss": 0.0001, "step": 321450 }, { "epoch": 124.84, "learning_rate": 3.3548220064724924e-06, "loss": 0.0014, "step": 321460 }, { "epoch": 124.84, "learning_rate": 3.354304207119741e-06, "loss": 0.0119, "step": 321470 }, { "epoch": 124.85, "learning_rate": 3.3537864077669908e-06, "loss": 0.0002, "step": 321480 }, { "epoch": 124.85, "learning_rate": 3.3532686084142395e-06, "loss": 0.0099, "step": 321490 }, { "epoch": 124.85, "learning_rate": 3.352750809061489e-06, "loss": 0.0825, "step": 321500 }, { "epoch": 124.86, "learning_rate": 3.352233009708738e-06, "loss": 0.0041, "step": 321510 }, { "epoch": 124.86, "learning_rate": 3.3517152103559876e-06, "loss": 0.0835, "step": 321520 }, { "epoch": 124.87, "learning_rate": 3.3511974110032363e-06, "loss": 0.0426, "step": 321530 }, { "epoch": 124.87, "learning_rate": 3.350679611650486e-06, "loss": 0.0388, "step": 321540 }, { "epoch": 124.87, "learning_rate": 3.3501618122977347e-06, "loss": 0.0327, "step": 321550 }, { "epoch": 124.88, "learning_rate": 3.3496440129449843e-06, "loss": 0.0848, "step": 321560 }, { "epoch": 124.88, "learning_rate": 3.349126213592233e-06, "loss": 0.0054, "step": 321570 }, { "epoch": 124.89, "learning_rate": 3.3486084142394827e-06, "loss": 0.0048, "step": 321580 }, { "epoch": 124.89, "learning_rate": 3.3480906148867315e-06, "loss": 0.0522, "step": 321590 }, { "epoch": 124.89, "learning_rate": 3.347572815533981e-06, "loss": 0.0559, "step": 321600 }, { "epoch": 124.9, "learning_rate": 3.34705501618123e-06, "loss": 0.0936, "step": 321610 }, { "epoch": 124.9, "learning_rate": 3.3465372168284795e-06, "loss": 0.0648, "step": 321620 }, { "epoch": 124.9, "learning_rate": 3.3460194174757283e-06, "loss": 0.0392, "step": 321630 }, { "epoch": 124.91, "learning_rate": 3.345501618122978e-06, "loss": 0.0803, "step": 321640 }, { "epoch": 124.91, "learning_rate": 3.3449838187702267e-06, "loss": 0.1087, "step": 321650 }, { "epoch": 124.92, "learning_rate": 3.344466019417476e-06, "loss": 0.0371, "step": 321660 }, { "epoch": 124.92, "learning_rate": 3.343948220064725e-06, "loss": 0.0482, "step": 321670 }, { "epoch": 124.92, "learning_rate": 3.3434304207119742e-06, "loss": 0.0185, "step": 321680 }, { "epoch": 124.93, "learning_rate": 3.3429126213592234e-06, "loss": 0.0186, "step": 321690 }, { "epoch": 124.93, "learning_rate": 3.3423948220064726e-06, "loss": 0.0579, "step": 321700 }, { "epoch": 124.94, "learning_rate": 3.341877022653722e-06, "loss": 0.0005, "step": 321710 }, { "epoch": 124.94, "learning_rate": 3.341359223300971e-06, "loss": 0.0598, "step": 321720 }, { "epoch": 124.94, "learning_rate": 3.3408414239482202e-06, "loss": 0.0558, "step": 321730 }, { "epoch": 124.95, "learning_rate": 3.3403236245954694e-06, "loss": 0.0115, "step": 321740 }, { "epoch": 124.95, "learning_rate": 3.3398058252427186e-06, "loss": 0.0104, "step": 321750 }, { "epoch": 124.96, "learning_rate": 3.339288025889968e-06, "loss": 0.0143, "step": 321760 }, { "epoch": 124.96, "learning_rate": 3.338770226537217e-06, "loss": 0.0218, "step": 321770 }, { "epoch": 124.96, "learning_rate": 3.338252427184466e-06, "loss": 0.0554, "step": 321780 }, { "epoch": 124.97, "learning_rate": 3.337734627831716e-06, "loss": 0.0744, "step": 321790 }, { "epoch": 124.97, "learning_rate": 3.3372168284789646e-06, "loss": 0.1341, "step": 321800 }, { "epoch": 124.97, "learning_rate": 3.336699029126214e-06, "loss": 0.2297, "step": 321810 }, { "epoch": 124.98, "learning_rate": 3.336181229773463e-06, "loss": 0.1113, "step": 321820 }, { "epoch": 124.98, "learning_rate": 3.3356634304207126e-06, "loss": 0.101, "step": 321830 }, { "epoch": 124.99, "learning_rate": 3.3351456310679614e-06, "loss": 0.0107, "step": 321840 }, { "epoch": 124.99, "learning_rate": 3.334627831715211e-06, "loss": 0.0276, "step": 321850 }, { "epoch": 124.99, "learning_rate": 3.3341100323624597e-06, "loss": 0.0107, "step": 321860 }, { "epoch": 125.0, "learning_rate": 3.3335922330097094e-06, "loss": 0.0002, "step": 321870 }, { "epoch": 125.0, "eval_accuracy": 0.951856946354883, "eval_loss": 0.37443777918815613, "eval_runtime": 8.2887, "eval_samples_per_second": 438.547, "eval_steps_per_second": 54.894, "step": 321875 }, { "epoch": 125.0, "learning_rate": 3.333074433656958e-06, "loss": 0.0311, "step": 321880 }, { "epoch": 125.01, "learning_rate": 3.3325566343042078e-06, "loss": 0.0095, "step": 321890 }, { "epoch": 125.01, "learning_rate": 3.3320388349514565e-06, "loss": 0.0511, "step": 321900 }, { "epoch": 125.01, "learning_rate": 3.331521035598706e-06, "loss": 0.0693, "step": 321910 }, { "epoch": 125.02, "learning_rate": 3.331003236245955e-06, "loss": 0.0426, "step": 321920 }, { "epoch": 125.02, "learning_rate": 3.3304854368932045e-06, "loss": 0.0984, "step": 321930 }, { "epoch": 125.03, "learning_rate": 3.3299676375404533e-06, "loss": 0.0761, "step": 321940 }, { "epoch": 125.03, "learning_rate": 3.329449838187703e-06, "loss": 0.0699, "step": 321950 }, { "epoch": 125.03, "learning_rate": 3.3289320388349517e-06, "loss": 0.0137, "step": 321960 }, { "epoch": 125.04, "learning_rate": 3.3284142394822013e-06, "loss": 0.009, "step": 321970 }, { "epoch": 125.04, "learning_rate": 3.32789644012945e-06, "loss": 0.0012, "step": 321980 }, { "epoch": 125.04, "learning_rate": 3.3273786407766993e-06, "loss": 0.1338, "step": 321990 }, { "epoch": 125.05, "learning_rate": 3.3268608414239485e-06, "loss": 0.0227, "step": 322000 }, { "epoch": 125.05, "learning_rate": 3.3263430420711977e-06, "loss": 0.101, "step": 322010 }, { "epoch": 125.06, "learning_rate": 3.325825242718447e-06, "loss": 0.0002, "step": 322020 }, { "epoch": 125.06, "learning_rate": 3.325307443365696e-06, "loss": 0.1301, "step": 322030 }, { "epoch": 125.06, "learning_rate": 3.3247896440129452e-06, "loss": 0.0492, "step": 322040 }, { "epoch": 125.07, "learning_rate": 3.3242718446601944e-06, "loss": 0.0068, "step": 322050 }, { "epoch": 125.07, "learning_rate": 3.3237540453074436e-06, "loss": 0.0001, "step": 322060 }, { "epoch": 125.08, "learning_rate": 3.323236245954693e-06, "loss": 0.0583, "step": 322070 }, { "epoch": 125.08, "learning_rate": 3.322718446601942e-06, "loss": 0.0497, "step": 322080 }, { "epoch": 125.08, "learning_rate": 3.3222006472491912e-06, "loss": 0.0299, "step": 322090 }, { "epoch": 125.09, "learning_rate": 3.3216828478964404e-06, "loss": 0.0039, "step": 322100 }, { "epoch": 125.09, "learning_rate": 3.3211650485436896e-06, "loss": 0.1666, "step": 322110 }, { "epoch": 125.1, "learning_rate": 3.320647249190939e-06, "loss": 0.0359, "step": 322120 }, { "epoch": 125.1, "learning_rate": 3.320129449838188e-06, "loss": 0.0209, "step": 322130 }, { "epoch": 125.1, "learning_rate": 3.319611650485437e-06, "loss": 0.0191, "step": 322140 }, { "epoch": 125.11, "learning_rate": 3.3190938511326864e-06, "loss": 0.0001, "step": 322150 }, { "epoch": 125.11, "learning_rate": 3.318576051779935e-06, "loss": 0.0005, "step": 322160 }, { "epoch": 125.11, "learning_rate": 3.3180582524271848e-06, "loss": 0.0688, "step": 322170 }, { "epoch": 125.12, "learning_rate": 3.3175404530744335e-06, "loss": 0.0696, "step": 322180 }, { "epoch": 125.12, "learning_rate": 3.317022653721683e-06, "loss": 0.0099, "step": 322190 }, { "epoch": 125.13, "learning_rate": 3.316504854368932e-06, "loss": 0.1323, "step": 322200 }, { "epoch": 125.13, "learning_rate": 3.3159870550161816e-06, "loss": 0.0144, "step": 322210 }, { "epoch": 125.13, "learning_rate": 3.3154692556634303e-06, "loss": 0.0017, "step": 322220 }, { "epoch": 125.14, "learning_rate": 3.31495145631068e-06, "loss": 0.0241, "step": 322230 }, { "epoch": 125.14, "learning_rate": 3.3144336569579287e-06, "loss": 0.0219, "step": 322240 }, { "epoch": 125.15, "learning_rate": 3.3139158576051783e-06, "loss": 0.0247, "step": 322250 }, { "epoch": 125.15, "learning_rate": 3.313398058252427e-06, "loss": 0.008, "step": 322260 }, { "epoch": 125.15, "learning_rate": 3.3128802588996767e-06, "loss": 0.0747, "step": 322270 }, { "epoch": 125.16, "learning_rate": 3.3123624595469255e-06, "loss": 0.0002, "step": 322280 }, { "epoch": 125.16, "learning_rate": 3.311844660194175e-06, "loss": 0.0892, "step": 322290 }, { "epoch": 125.17, "learning_rate": 3.311326860841424e-06, "loss": 0.0219, "step": 322300 }, { "epoch": 125.17, "learning_rate": 3.3108090614886735e-06, "loss": 0.1232, "step": 322310 }, { "epoch": 125.17, "learning_rate": 3.3102912621359223e-06, "loss": 0.0347, "step": 322320 }, { "epoch": 125.18, "learning_rate": 3.309773462783172e-06, "loss": 0.0192, "step": 322330 }, { "epoch": 125.18, "learning_rate": 3.3092556634304207e-06, "loss": 0.0221, "step": 322340 }, { "epoch": 125.18, "learning_rate": 3.3087378640776703e-06, "loss": 0.0831, "step": 322350 }, { "epoch": 125.19, "learning_rate": 3.3082200647249195e-06, "loss": 0.0622, "step": 322360 }, { "epoch": 125.19, "learning_rate": 3.3077022653721687e-06, "loss": 0.0713, "step": 322370 }, { "epoch": 125.2, "learning_rate": 3.307184466019418e-06, "loss": 0.1009, "step": 322380 }, { "epoch": 125.2, "learning_rate": 3.306666666666667e-06, "loss": 0.0388, "step": 322390 }, { "epoch": 125.2, "learning_rate": 3.3061488673139162e-06, "loss": 0.0733, "step": 322400 }, { "epoch": 125.21, "learning_rate": 3.3056310679611654e-06, "loss": 0.0374, "step": 322410 }, { "epoch": 125.21, "learning_rate": 3.3051132686084146e-06, "loss": 0.0921, "step": 322420 }, { "epoch": 125.22, "learning_rate": 3.304595469255664e-06, "loss": 0.0149, "step": 322430 }, { "epoch": 125.22, "learning_rate": 3.304077669902913e-06, "loss": 0.0023, "step": 322440 }, { "epoch": 125.22, "learning_rate": 3.3035598705501622e-06, "loss": 0.0305, "step": 322450 }, { "epoch": 125.23, "learning_rate": 3.3030420711974114e-06, "loss": 0.0217, "step": 322460 }, { "epoch": 125.23, "learning_rate": 3.3025242718446606e-06, "loss": 0.0772, "step": 322470 }, { "epoch": 125.23, "learning_rate": 3.30200647249191e-06, "loss": 0.0085, "step": 322480 }, { "epoch": 125.24, "learning_rate": 3.3014886731391586e-06, "loss": 0.0343, "step": 322490 }, { "epoch": 125.24, "learning_rate": 3.300970873786408e-06, "loss": 0.0383, "step": 322500 }, { "epoch": 125.25, "learning_rate": 3.300453074433657e-06, "loss": 0.0465, "step": 322510 }, { "epoch": 125.25, "learning_rate": 3.2999352750809066e-06, "loss": 0.0196, "step": 322520 }, { "epoch": 125.25, "learning_rate": 3.2994174757281554e-06, "loss": 0.0546, "step": 322530 }, { "epoch": 125.26, "learning_rate": 3.298899676375405e-06, "loss": 0.0006, "step": 322540 }, { "epoch": 125.26, "learning_rate": 3.2983818770226537e-06, "loss": 0.1354, "step": 322550 }, { "epoch": 125.27, "learning_rate": 3.2978640776699034e-06, "loss": 0.0154, "step": 322560 }, { "epoch": 125.27, "learning_rate": 3.297346278317152e-06, "loss": 0.1533, "step": 322570 }, { "epoch": 125.27, "learning_rate": 3.2968284789644017e-06, "loss": 0.0067, "step": 322580 }, { "epoch": 125.28, "learning_rate": 3.2963106796116505e-06, "loss": 0.0155, "step": 322590 }, { "epoch": 125.28, "learning_rate": 3.2957928802589e-06, "loss": 0.0001, "step": 322600 }, { "epoch": 125.29, "learning_rate": 3.295275080906149e-06, "loss": 0.0023, "step": 322610 }, { "epoch": 125.29, "learning_rate": 3.2947572815533985e-06, "loss": 0.0213, "step": 322620 }, { "epoch": 125.29, "learning_rate": 3.2942394822006473e-06, "loss": 0.0168, "step": 322630 }, { "epoch": 125.3, "learning_rate": 3.293721682847897e-06, "loss": 0.0305, "step": 322640 }, { "epoch": 125.3, "learning_rate": 3.2932038834951457e-06, "loss": 0.0766, "step": 322650 }, { "epoch": 125.3, "learning_rate": 3.2926860841423953e-06, "loss": 0.0482, "step": 322660 }, { "epoch": 125.31, "learning_rate": 3.292168284789644e-06, "loss": 0.0498, "step": 322670 }, { "epoch": 125.31, "learning_rate": 3.2916504854368937e-06, "loss": 0.0905, "step": 322680 }, { "epoch": 125.32, "learning_rate": 3.2911326860841425e-06, "loss": 0.0299, "step": 322690 }, { "epoch": 125.32, "learning_rate": 3.290614886731392e-06, "loss": 0.033, "step": 322700 }, { "epoch": 125.32, "learning_rate": 3.290097087378641e-06, "loss": 0.0786, "step": 322710 }, { "epoch": 125.33, "learning_rate": 3.2895792880258905e-06, "loss": 0.0746, "step": 322720 }, { "epoch": 125.33, "learning_rate": 3.2890614886731392e-06, "loss": 0.0629, "step": 322730 }, { "epoch": 125.34, "learning_rate": 3.288543689320389e-06, "loss": 0.0205, "step": 322740 }, { "epoch": 125.34, "learning_rate": 3.2880258899676376e-06, "loss": 0.0256, "step": 322750 }, { "epoch": 125.34, "learning_rate": 3.2875080906148873e-06, "loss": 0.0003, "step": 322760 }, { "epoch": 125.35, "learning_rate": 3.286990291262136e-06, "loss": 0.0028, "step": 322770 }, { "epoch": 125.35, "learning_rate": 3.2864724919093856e-06, "loss": 0.0375, "step": 322780 }, { "epoch": 125.36, "learning_rate": 3.2859546925566344e-06, "loss": 0.1092, "step": 322790 }, { "epoch": 125.36, "learning_rate": 3.2854368932038836e-06, "loss": 0.0797, "step": 322800 }, { "epoch": 125.36, "learning_rate": 3.284919093851133e-06, "loss": 0.0299, "step": 322810 }, { "epoch": 125.37, "learning_rate": 3.284401294498382e-06, "loss": 0.1172, "step": 322820 }, { "epoch": 125.37, "learning_rate": 3.283883495145631e-06, "loss": 0.0207, "step": 322830 }, { "epoch": 125.37, "learning_rate": 3.2833656957928804e-06, "loss": 0.0239, "step": 322840 }, { "epoch": 125.38, "learning_rate": 3.2828478964401296e-06, "loss": 0.0259, "step": 322850 }, { "epoch": 125.38, "learning_rate": 3.2823300970873788e-06, "loss": 0.0265, "step": 322860 }, { "epoch": 125.39, "learning_rate": 3.281812297734628e-06, "loss": 0.0913, "step": 322870 }, { "epoch": 125.39, "learning_rate": 3.281294498381877e-06, "loss": 0.0006, "step": 322880 }, { "epoch": 125.39, "learning_rate": 3.2807766990291264e-06, "loss": 0.0774, "step": 322890 }, { "epoch": 125.4, "learning_rate": 3.2802588996763756e-06, "loss": 0.0783, "step": 322900 }, { "epoch": 125.4, "learning_rate": 3.279741100323625e-06, "loss": 0.0057, "step": 322910 }, { "epoch": 125.41, "learning_rate": 3.279223300970874e-06, "loss": 0.1359, "step": 322920 }, { "epoch": 125.41, "learning_rate": 3.2787055016181236e-06, "loss": 0.036, "step": 322930 }, { "epoch": 125.41, "learning_rate": 3.2781877022653723e-06, "loss": 0.1364, "step": 322940 }, { "epoch": 125.42, "learning_rate": 3.277669902912622e-06, "loss": 0.0091, "step": 322950 }, { "epoch": 125.42, "learning_rate": 3.2771521035598707e-06, "loss": 0.0384, "step": 322960 }, { "epoch": 125.43, "learning_rate": 3.2766343042071203e-06, "loss": 0.1239, "step": 322970 }, { "epoch": 125.43, "learning_rate": 3.276116504854369e-06, "loss": 0.0007, "step": 322980 }, { "epoch": 125.43, "learning_rate": 3.2755987055016187e-06, "loss": 0.0073, "step": 322990 }, { "epoch": 125.44, "learning_rate": 3.2750809061488675e-06, "loss": 0.0464, "step": 323000 }, { "epoch": 125.44, "learning_rate": 3.274563106796117e-06, "loss": 0.0006, "step": 323010 }, { "epoch": 125.44, "learning_rate": 3.274045307443366e-06, "loss": 0.1693, "step": 323020 }, { "epoch": 125.45, "learning_rate": 3.2735275080906155e-06, "loss": 0.0003, "step": 323030 }, { "epoch": 125.45, "learning_rate": 3.2730097087378643e-06, "loss": 0.1203, "step": 323040 }, { "epoch": 125.46, "learning_rate": 3.272491909385114e-06, "loss": 0.0001, "step": 323050 }, { "epoch": 125.46, "learning_rate": 3.2719741100323627e-06, "loss": 0.0544, "step": 323060 }, { "epoch": 125.46, "learning_rate": 3.2714563106796123e-06, "loss": 0.1535, "step": 323070 }, { "epoch": 125.47, "learning_rate": 3.270938511326861e-06, "loss": 0.0001, "step": 323080 }, { "epoch": 125.47, "learning_rate": 3.2704207119741107e-06, "loss": 0.0948, "step": 323090 }, { "epoch": 125.48, "learning_rate": 3.2699029126213594e-06, "loss": 0.035, "step": 323100 }, { "epoch": 125.48, "learning_rate": 3.269385113268609e-06, "loss": 0.0005, "step": 323110 }, { "epoch": 125.48, "learning_rate": 3.268867313915858e-06, "loss": 0.0272, "step": 323120 }, { "epoch": 125.49, "learning_rate": 3.268349514563107e-06, "loss": 0.1522, "step": 323130 }, { "epoch": 125.49, "learning_rate": 3.2678317152103562e-06, "loss": 0.0283, "step": 323140 }, { "epoch": 125.5, "learning_rate": 3.2673139158576054e-06, "loss": 0.0539, "step": 323150 }, { "epoch": 125.5, "learning_rate": 3.2667961165048546e-06, "loss": 0.0314, "step": 323160 }, { "epoch": 125.5, "learning_rate": 3.266278317152104e-06, "loss": 0.1392, "step": 323170 }, { "epoch": 125.51, "learning_rate": 3.265760517799353e-06, "loss": 0.0142, "step": 323180 }, { "epoch": 125.51, "learning_rate": 3.265242718446602e-06, "loss": 0.0277, "step": 323190 }, { "epoch": 125.51, "learning_rate": 3.2647249190938514e-06, "loss": 0.0691, "step": 323200 }, { "epoch": 125.52, "learning_rate": 3.2642071197411006e-06, "loss": 0.0042, "step": 323210 }, { "epoch": 125.52, "learning_rate": 3.2636893203883498e-06, "loss": 0.0083, "step": 323220 }, { "epoch": 125.53, "learning_rate": 3.263171521035599e-06, "loss": 0.008, "step": 323230 }, { "epoch": 125.53, "learning_rate": 3.262653721682848e-06, "loss": 0.0298, "step": 323240 }, { "epoch": 125.53, "learning_rate": 3.2621359223300974e-06, "loss": 0.0326, "step": 323250 }, { "epoch": 125.54, "learning_rate": 3.2616181229773466e-06, "loss": 0.0096, "step": 323260 }, { "epoch": 125.54, "learning_rate": 3.2611003236245957e-06, "loss": 0.0194, "step": 323270 }, { "epoch": 125.55, "learning_rate": 3.260582524271845e-06, "loss": 0.0973, "step": 323280 }, { "epoch": 125.55, "learning_rate": 3.260064724919094e-06, "loss": 0.0001, "step": 323290 }, { "epoch": 125.55, "learning_rate": 3.259546925566343e-06, "loss": 0.0932, "step": 323300 }, { "epoch": 125.56, "learning_rate": 3.2590291262135925e-06, "loss": 0.0225, "step": 323310 }, { "epoch": 125.56, "learning_rate": 3.2585113268608413e-06, "loss": 0.1337, "step": 323320 }, { "epoch": 125.57, "learning_rate": 3.257993527508091e-06, "loss": 0.0657, "step": 323330 }, { "epoch": 125.57, "learning_rate": 3.2574757281553397e-06, "loss": 0.0006, "step": 323340 }, { "epoch": 125.57, "learning_rate": 3.2569579288025893e-06, "loss": 0.0318, "step": 323350 }, { "epoch": 125.58, "learning_rate": 3.256440129449838e-06, "loss": 0.0376, "step": 323360 }, { "epoch": 125.58, "learning_rate": 3.2559223300970877e-06, "loss": 0.041, "step": 323370 }, { "epoch": 125.58, "learning_rate": 3.2554045307443365e-06, "loss": 0.0667, "step": 323380 }, { "epoch": 125.59, "learning_rate": 3.254886731391586e-06, "loss": 0.0001, "step": 323390 }, { "epoch": 125.59, "learning_rate": 3.254368932038835e-06, "loss": 0.0079, "step": 323400 }, { "epoch": 125.6, "learning_rate": 3.2538511326860845e-06, "loss": 0.1019, "step": 323410 }, { "epoch": 125.6, "learning_rate": 3.2533333333333332e-06, "loss": 0.0812, "step": 323420 }, { "epoch": 125.6, "learning_rate": 3.252815533980583e-06, "loss": 0.022, "step": 323430 }, { "epoch": 125.61, "learning_rate": 3.2522977346278316e-06, "loss": 0.0002, "step": 323440 }, { "epoch": 125.61, "learning_rate": 3.2517799352750812e-06, "loss": 0.0229, "step": 323450 }, { "epoch": 125.62, "learning_rate": 3.25126213592233e-06, "loss": 0.0109, "step": 323460 }, { "epoch": 125.62, "learning_rate": 3.2507443365695796e-06, "loss": 0.0002, "step": 323470 }, { "epoch": 125.62, "learning_rate": 3.250226537216829e-06, "loss": 0.0003, "step": 323480 }, { "epoch": 125.63, "learning_rate": 3.249708737864078e-06, "loss": 0.0199, "step": 323490 }, { "epoch": 125.63, "learning_rate": 3.2491909385113272e-06, "loss": 0.1282, "step": 323500 }, { "epoch": 125.63, "learning_rate": 3.2486731391585764e-06, "loss": 0.0101, "step": 323510 }, { "epoch": 125.64, "learning_rate": 3.2481553398058256e-06, "loss": 0.0038, "step": 323520 }, { "epoch": 125.64, "learning_rate": 3.247637540453075e-06, "loss": 0.0297, "step": 323530 }, { "epoch": 125.65, "learning_rate": 3.247119741100324e-06, "loss": 0.0535, "step": 323540 }, { "epoch": 125.65, "learning_rate": 3.246601941747573e-06, "loss": 0.0698, "step": 323550 }, { "epoch": 125.65, "learning_rate": 3.2460841423948224e-06, "loss": 0.0283, "step": 323560 }, { "epoch": 125.66, "learning_rate": 3.2455663430420716e-06, "loss": 0.0977, "step": 323570 }, { "epoch": 125.66, "learning_rate": 3.2450485436893208e-06, "loss": 0.0287, "step": 323580 }, { "epoch": 125.67, "learning_rate": 3.24453074433657e-06, "loss": 0.0197, "step": 323590 }, { "epoch": 125.67, "learning_rate": 3.244012944983819e-06, "loss": 0.014, "step": 323600 }, { "epoch": 125.67, "learning_rate": 3.2434951456310684e-06, "loss": 0.007, "step": 323610 }, { "epoch": 125.68, "learning_rate": 3.2429773462783176e-06, "loss": 0.0354, "step": 323620 }, { "epoch": 125.68, "learning_rate": 3.2424595469255663e-06, "loss": 0.1047, "step": 323630 }, { "epoch": 125.69, "learning_rate": 3.241941747572816e-06, "loss": 0.0001, "step": 323640 }, { "epoch": 125.69, "learning_rate": 3.2414239482200647e-06, "loss": 0.0966, "step": 323650 }, { "epoch": 125.69, "learning_rate": 3.2409061488673143e-06, "loss": 0.0194, "step": 323660 }, { "epoch": 125.7, "learning_rate": 3.240388349514563e-06, "loss": 0.0423, "step": 323670 }, { "epoch": 125.7, "learning_rate": 3.2398705501618127e-06, "loss": 0.1132, "step": 323680 }, { "epoch": 125.7, "learning_rate": 3.2393527508090615e-06, "loss": 0.0565, "step": 323690 }, { "epoch": 125.71, "learning_rate": 3.238834951456311e-06, "loss": 0.003, "step": 323700 }, { "epoch": 125.71, "learning_rate": 3.23831715210356e-06, "loss": 0.0417, "step": 323710 }, { "epoch": 125.72, "learning_rate": 3.2377993527508095e-06, "loss": 0.0266, "step": 323720 }, { "epoch": 125.72, "learning_rate": 3.2372815533980583e-06, "loss": 0.198, "step": 323730 }, { "epoch": 125.72, "learning_rate": 3.236763754045308e-06, "loss": 0.0242, "step": 323740 }, { "epoch": 125.73, "learning_rate": 3.2362459546925567e-06, "loss": 0.0733, "step": 323750 }, { "epoch": 125.73, "learning_rate": 3.2357281553398063e-06, "loss": 0.0632, "step": 323760 }, { "epoch": 125.74, "learning_rate": 3.235210355987055e-06, "loss": 0.0994, "step": 323770 }, { "epoch": 125.74, "learning_rate": 3.2346925566343047e-06, "loss": 0.0661, "step": 323780 }, { "epoch": 125.74, "learning_rate": 3.2341747572815534e-06, "loss": 0.0193, "step": 323790 }, { "epoch": 125.75, "learning_rate": 3.233656957928803e-06, "loss": 0.0661, "step": 323800 }, { "epoch": 125.75, "learning_rate": 3.233139158576052e-06, "loss": 0.0005, "step": 323810 }, { "epoch": 125.76, "learning_rate": 3.2326213592233014e-06, "loss": 0.2176, "step": 323820 }, { "epoch": 125.76, "learning_rate": 3.2321035598705502e-06, "loss": 0.0089, "step": 323830 }, { "epoch": 125.76, "learning_rate": 3.2315857605178e-06, "loss": 0.0976, "step": 323840 }, { "epoch": 125.77, "learning_rate": 3.2310679611650486e-06, "loss": 0.0162, "step": 323850 }, { "epoch": 125.77, "learning_rate": 3.2305501618122982e-06, "loss": 0.1407, "step": 323860 }, { "epoch": 125.77, "learning_rate": 3.230032362459547e-06, "loss": 0.0002, "step": 323870 }, { "epoch": 125.78, "learning_rate": 3.2295145631067966e-06, "loss": 0.1029, "step": 323880 }, { "epoch": 125.78, "learning_rate": 3.2289967637540454e-06, "loss": 0.1826, "step": 323890 }, { "epoch": 125.79, "learning_rate": 3.228478964401295e-06, "loss": 0.0394, "step": 323900 }, { "epoch": 125.79, "learning_rate": 3.2279611650485438e-06, "loss": 0.0006, "step": 323910 }, { "epoch": 125.79, "learning_rate": 3.2274433656957934e-06, "loss": 0.1683, "step": 323920 }, { "epoch": 125.8, "learning_rate": 3.226925566343042e-06, "loss": 0.0235, "step": 323930 }, { "epoch": 125.8, "learning_rate": 3.2264077669902914e-06, "loss": 0.0003, "step": 323940 }, { "epoch": 125.81, "learning_rate": 3.2258899676375406e-06, "loss": 0.0387, "step": 323950 }, { "epoch": 125.81, "learning_rate": 3.2253721682847897e-06, "loss": 0.0579, "step": 323960 }, { "epoch": 125.81, "learning_rate": 3.224854368932039e-06, "loss": 0.0877, "step": 323970 }, { "epoch": 125.82, "learning_rate": 3.224336569579288e-06, "loss": 0.0006, "step": 323980 }, { "epoch": 125.82, "learning_rate": 3.2238187702265373e-06, "loss": 0.0683, "step": 323990 }, { "epoch": 125.83, "learning_rate": 3.2233009708737865e-06, "loss": 0.124, "step": 324000 }, { "epoch": 125.83, "learning_rate": 3.2227831715210357e-06, "loss": 0.0352, "step": 324010 }, { "epoch": 125.83, "learning_rate": 3.222265372168285e-06, "loss": 0.0737, "step": 324020 }, { "epoch": 125.84, "learning_rate": 3.2217475728155345e-06, "loss": 0.0362, "step": 324030 }, { "epoch": 125.84, "learning_rate": 3.2212297734627833e-06, "loss": 0.071, "step": 324040 }, { "epoch": 125.84, "learning_rate": 3.220711974110033e-06, "loss": 0.0612, "step": 324050 }, { "epoch": 125.85, "learning_rate": 3.2201941747572817e-06, "loss": 0.0244, "step": 324060 }, { "epoch": 125.85, "learning_rate": 3.2196763754045313e-06, "loss": 0.0348, "step": 324070 }, { "epoch": 125.86, "learning_rate": 3.21915857605178e-06, "loss": 0.0179, "step": 324080 }, { "epoch": 125.86, "learning_rate": 3.2186407766990297e-06, "loss": 0.0723, "step": 324090 }, { "epoch": 125.86, "learning_rate": 3.2181229773462785e-06, "loss": 0.0946, "step": 324100 }, { "epoch": 125.87, "learning_rate": 3.217605177993528e-06, "loss": 0.0001, "step": 324110 }, { "epoch": 125.87, "learning_rate": 3.217087378640777e-06, "loss": 0.0024, "step": 324120 }, { "epoch": 125.88, "learning_rate": 3.2165695792880265e-06, "loss": 0.0626, "step": 324130 }, { "epoch": 125.88, "learning_rate": 3.2160517799352752e-06, "loss": 0.0763, "step": 324140 }, { "epoch": 125.88, "learning_rate": 3.215533980582525e-06, "loss": 0.0093, "step": 324150 }, { "epoch": 125.89, "learning_rate": 3.2150161812297736e-06, "loss": 0.0001, "step": 324160 }, { "epoch": 125.89, "learning_rate": 3.2144983818770233e-06, "loss": 0.0232, "step": 324170 }, { "epoch": 125.9, "learning_rate": 3.213980582524272e-06, "loss": 0.1123, "step": 324180 }, { "epoch": 125.9, "learning_rate": 3.2134627831715216e-06, "loss": 0.0038, "step": 324190 }, { "epoch": 125.9, "learning_rate": 3.2129449838187704e-06, "loss": 0.0708, "step": 324200 }, { "epoch": 125.91, "learning_rate": 3.21242718446602e-06, "loss": 0.0677, "step": 324210 }, { "epoch": 125.91, "learning_rate": 3.211909385113269e-06, "loss": 0.0001, "step": 324220 }, { "epoch": 125.91, "learning_rate": 3.2113915857605184e-06, "loss": 0.0178, "step": 324230 }, { "epoch": 125.92, "learning_rate": 3.210873786407767e-06, "loss": 0.0977, "step": 324240 }, { "epoch": 125.92, "learning_rate": 3.210355987055017e-06, "loss": 0.0933, "step": 324250 }, { "epoch": 125.93, "learning_rate": 3.2098381877022656e-06, "loss": 0.0649, "step": 324260 }, { "epoch": 125.93, "learning_rate": 3.2093203883495148e-06, "loss": 0.0299, "step": 324270 }, { "epoch": 125.93, "learning_rate": 3.208802588996764e-06, "loss": 0.0358, "step": 324280 }, { "epoch": 125.94, "learning_rate": 3.208284789644013e-06, "loss": 0.0569, "step": 324290 }, { "epoch": 125.94, "learning_rate": 3.2077669902912624e-06, "loss": 0.0002, "step": 324300 }, { "epoch": 125.95, "learning_rate": 3.2072491909385116e-06, "loss": 0.0613, "step": 324310 }, { "epoch": 125.95, "learning_rate": 3.2067313915857607e-06, "loss": 0.0799, "step": 324320 }, { "epoch": 125.95, "learning_rate": 3.20621359223301e-06, "loss": 0.0006, "step": 324330 }, { "epoch": 125.96, "learning_rate": 3.205695792880259e-06, "loss": 0.0006, "step": 324340 }, { "epoch": 125.96, "learning_rate": 3.2051779935275083e-06, "loss": 0.1195, "step": 324350 }, { "epoch": 125.97, "learning_rate": 3.2046601941747575e-06, "loss": 0.0531, "step": 324360 }, { "epoch": 125.97, "learning_rate": 3.2041423948220067e-06, "loss": 0.0121, "step": 324370 }, { "epoch": 125.97, "learning_rate": 3.203624595469256e-06, "loss": 0.0394, "step": 324380 }, { "epoch": 125.98, "learning_rate": 3.203106796116505e-06, "loss": 0.0401, "step": 324390 }, { "epoch": 125.98, "learning_rate": 3.2025889967637543e-06, "loss": 0.0405, "step": 324400 }, { "epoch": 125.98, "learning_rate": 3.2020711974110035e-06, "loss": 0.0419, "step": 324410 }, { "epoch": 125.99, "learning_rate": 3.2015533980582527e-06, "loss": 0.0277, "step": 324420 }, { "epoch": 125.99, "learning_rate": 3.201035598705502e-06, "loss": 0.0519, "step": 324430 }, { "epoch": 126.0, "learning_rate": 3.2005177993527507e-06, "loss": 0.0133, "step": 324440 }, { "epoch": 126.0, "learning_rate": 3.2000000000000003e-06, "loss": 0.0169, "step": 324450 }, { "epoch": 126.0, "eval_accuracy": 0.951031636863824, "eval_loss": 0.380782812833786, "eval_runtime": 8.2806, "eval_samples_per_second": 438.977, "eval_steps_per_second": 54.948, "step": 324450 }, { "epoch": 126.0, "learning_rate": 3.199482200647249e-06, "loss": 0.0002, "step": 324460 }, { "epoch": 126.01, "learning_rate": 3.1989644012944987e-06, "loss": 0.0534, "step": 324470 }, { "epoch": 126.01, "learning_rate": 3.1984466019417474e-06, "loss": 0.029, "step": 324480 }, { "epoch": 126.02, "learning_rate": 3.197928802588997e-06, "loss": 0.0901, "step": 324490 }, { "epoch": 126.02, "learning_rate": 3.197411003236246e-06, "loss": 0.1226, "step": 324500 }, { "epoch": 126.02, "learning_rate": 3.1968932038834954e-06, "loss": 0.1409, "step": 324510 }, { "epoch": 126.03, "learning_rate": 3.1963754045307442e-06, "loss": 0.0419, "step": 324520 }, { "epoch": 126.03, "learning_rate": 3.195857605177994e-06, "loss": 0.0085, "step": 324530 }, { "epoch": 126.03, "learning_rate": 3.1953398058252426e-06, "loss": 0.0206, "step": 324540 }, { "epoch": 126.04, "learning_rate": 3.1948220064724922e-06, "loss": 0.0352, "step": 324550 }, { "epoch": 126.04, "learning_rate": 3.194304207119741e-06, "loss": 0.0369, "step": 324560 }, { "epoch": 126.05, "learning_rate": 3.1937864077669906e-06, "loss": 0.034, "step": 324570 }, { "epoch": 126.05, "learning_rate": 3.1932686084142394e-06, "loss": 0.1324, "step": 324580 }, { "epoch": 126.05, "learning_rate": 3.192750809061489e-06, "loss": 0.0399, "step": 324590 }, { "epoch": 126.06, "learning_rate": 3.192233009708738e-06, "loss": 0.1423, "step": 324600 }, { "epoch": 126.06, "learning_rate": 3.1917152103559874e-06, "loss": 0.0174, "step": 324610 }, { "epoch": 126.07, "learning_rate": 3.1911974110032366e-06, "loss": 0.1043, "step": 324620 }, { "epoch": 126.07, "learning_rate": 3.1906796116504858e-06, "loss": 0.0506, "step": 324630 }, { "epoch": 126.07, "learning_rate": 3.190161812297735e-06, "loss": 0.0074, "step": 324640 }, { "epoch": 126.08, "learning_rate": 3.189644012944984e-06, "loss": 0.0291, "step": 324650 }, { "epoch": 126.08, "learning_rate": 3.1891262135922334e-06, "loss": 0.0305, "step": 324660 }, { "epoch": 126.09, "learning_rate": 3.1886084142394826e-06, "loss": 0.0408, "step": 324670 }, { "epoch": 126.09, "learning_rate": 3.1880906148867318e-06, "loss": 0.0084, "step": 324680 }, { "epoch": 126.09, "learning_rate": 3.187572815533981e-06, "loss": 0.0361, "step": 324690 }, { "epoch": 126.1, "learning_rate": 3.18705501618123e-06, "loss": 0.0414, "step": 324700 }, { "epoch": 126.1, "learning_rate": 3.1865372168284793e-06, "loss": 0.0578, "step": 324710 }, { "epoch": 126.1, "learning_rate": 3.1860194174757285e-06, "loss": 0.0161, "step": 324720 }, { "epoch": 126.11, "learning_rate": 3.1855016181229777e-06, "loss": 0.1001, "step": 324730 }, { "epoch": 126.11, "learning_rate": 3.184983818770227e-06, "loss": 0.0905, "step": 324740 }, { "epoch": 126.12, "learning_rate": 3.184466019417476e-06, "loss": 0.0183, "step": 324750 }, { "epoch": 126.12, "learning_rate": 3.1839482200647253e-06, "loss": 0.045, "step": 324760 }, { "epoch": 126.12, "learning_rate": 3.183430420711974e-06, "loss": 0.0535, "step": 324770 }, { "epoch": 126.13, "learning_rate": 3.1829126213592237e-06, "loss": 0.0199, "step": 324780 }, { "epoch": 126.13, "learning_rate": 3.1823948220064725e-06, "loss": 0.0103, "step": 324790 }, { "epoch": 126.14, "learning_rate": 3.181877022653722e-06, "loss": 0.0105, "step": 324800 }, { "epoch": 126.14, "learning_rate": 3.181359223300971e-06, "loss": 0.0005, "step": 324810 }, { "epoch": 126.14, "learning_rate": 3.1808414239482205e-06, "loss": 0.1236, "step": 324820 }, { "epoch": 126.15, "learning_rate": 3.1803236245954692e-06, "loss": 0.0313, "step": 324830 }, { "epoch": 126.15, "learning_rate": 3.179805825242719e-06, "loss": 0.112, "step": 324840 }, { "epoch": 126.16, "learning_rate": 3.1792880258899676e-06, "loss": 0.0757, "step": 324850 }, { "epoch": 126.16, "learning_rate": 3.1787702265372173e-06, "loss": 0.0096, "step": 324860 }, { "epoch": 126.16, "learning_rate": 3.178252427184466e-06, "loss": 0.0165, "step": 324870 }, { "epoch": 126.17, "learning_rate": 3.1777346278317156e-06, "loss": 0.1481, "step": 324880 }, { "epoch": 126.17, "learning_rate": 3.1772168284789644e-06, "loss": 0.0278, "step": 324890 }, { "epoch": 126.17, "learning_rate": 3.176699029126214e-06, "loss": 0.0018, "step": 324900 }, { "epoch": 126.18, "learning_rate": 3.176181229773463e-06, "loss": 0.0096, "step": 324910 }, { "epoch": 126.18, "learning_rate": 3.1756634304207124e-06, "loss": 0.0002, "step": 324920 }, { "epoch": 126.19, "learning_rate": 3.175145631067961e-06, "loss": 0.1061, "step": 324930 }, { "epoch": 126.19, "learning_rate": 3.174627831715211e-06, "loss": 0.0973, "step": 324940 }, { "epoch": 126.19, "learning_rate": 3.1741100323624596e-06, "loss": 0.0567, "step": 324950 }, { "epoch": 126.2, "learning_rate": 3.173592233009709e-06, "loss": 0.0551, "step": 324960 }, { "epoch": 126.2, "learning_rate": 3.173074433656958e-06, "loss": 0.173, "step": 324970 }, { "epoch": 126.21, "learning_rate": 3.1725566343042076e-06, "loss": 0.0089, "step": 324980 }, { "epoch": 126.21, "learning_rate": 3.1720388349514564e-06, "loss": 0.034, "step": 324990 }, { "epoch": 126.21, "learning_rate": 3.171521035598706e-06, "loss": 0.0002, "step": 325000 }, { "epoch": 126.22, "learning_rate": 3.1710032362459547e-06, "loss": 0.0362, "step": 325010 }, { "epoch": 126.22, "learning_rate": 3.1704854368932044e-06, "loss": 0.0957, "step": 325020 }, { "epoch": 126.23, "learning_rate": 3.169967637540453e-06, "loss": 0.1369, "step": 325030 }, { "epoch": 126.23, "learning_rate": 3.1694498381877028e-06, "loss": 0.0481, "step": 325040 }, { "epoch": 126.23, "learning_rate": 3.1689320388349515e-06, "loss": 0.0001, "step": 325050 }, { "epoch": 126.24, "learning_rate": 3.168414239482201e-06, "loss": 0.0259, "step": 325060 }, { "epoch": 126.24, "learning_rate": 3.16789644012945e-06, "loss": 0.0091, "step": 325070 }, { "epoch": 126.24, "learning_rate": 3.167378640776699e-06, "loss": 0.089, "step": 325080 }, { "epoch": 126.25, "learning_rate": 3.1668608414239483e-06, "loss": 0.0291, "step": 325090 }, { "epoch": 126.25, "learning_rate": 3.1663430420711975e-06, "loss": 0.005, "step": 325100 }, { "epoch": 126.26, "learning_rate": 3.1658252427184467e-06, "loss": 0.0008, "step": 325110 }, { "epoch": 126.26, "learning_rate": 3.165307443365696e-06, "loss": 0.0818, "step": 325120 }, { "epoch": 126.26, "learning_rate": 3.164789644012945e-06, "loss": 0.0779, "step": 325130 }, { "epoch": 126.27, "learning_rate": 3.1642718446601943e-06, "loss": 0.0263, "step": 325140 }, { "epoch": 126.27, "learning_rate": 3.163754045307444e-06, "loss": 0.0625, "step": 325150 }, { "epoch": 126.28, "learning_rate": 3.1632362459546927e-06, "loss": 0.0383, "step": 325160 }, { "epoch": 126.28, "learning_rate": 3.1627184466019423e-06, "loss": 0.0004, "step": 325170 }, { "epoch": 126.28, "learning_rate": 3.162200647249191e-06, "loss": 0.0266, "step": 325180 }, { "epoch": 126.29, "learning_rate": 3.1616828478964407e-06, "loss": 0.0729, "step": 325190 }, { "epoch": 126.29, "learning_rate": 3.1611650485436894e-06, "loss": 0.0259, "step": 325200 }, { "epoch": 126.3, "learning_rate": 3.160647249190939e-06, "loss": 0.0476, "step": 325210 }, { "epoch": 126.3, "learning_rate": 3.160129449838188e-06, "loss": 0.0034, "step": 325220 }, { "epoch": 126.3, "learning_rate": 3.1596116504854375e-06, "loss": 0.1365, "step": 325230 }, { "epoch": 126.31, "learning_rate": 3.1590938511326862e-06, "loss": 0.0504, "step": 325240 }, { "epoch": 126.31, "learning_rate": 3.158576051779936e-06, "loss": 0.0014, "step": 325250 }, { "epoch": 126.31, "learning_rate": 3.1580582524271846e-06, "loss": 0.0542, "step": 325260 }, { "epoch": 126.32, "learning_rate": 3.1575404530744342e-06, "loss": 0.0721, "step": 325270 }, { "epoch": 126.32, "learning_rate": 3.157022653721683e-06, "loss": 0.0075, "step": 325280 }, { "epoch": 126.33, "learning_rate": 3.1565048543689326e-06, "loss": 0.078, "step": 325290 }, { "epoch": 126.33, "learning_rate": 3.1559870550161814e-06, "loss": 0.0699, "step": 325300 }, { "epoch": 126.33, "learning_rate": 3.155469255663431e-06, "loss": 0.0805, "step": 325310 }, { "epoch": 126.34, "learning_rate": 3.1549514563106798e-06, "loss": 0.0222, "step": 325320 }, { "epoch": 126.34, "learning_rate": 3.1544336569579294e-06, "loss": 0.0792, "step": 325330 }, { "epoch": 126.35, "learning_rate": 3.153915857605178e-06, "loss": 0.0415, "step": 325340 }, { "epoch": 126.35, "learning_rate": 3.1533980582524278e-06, "loss": 0.0761, "step": 325350 }, { "epoch": 126.35, "learning_rate": 3.1528802588996766e-06, "loss": 0.0658, "step": 325360 }, { "epoch": 126.36, "learning_rate": 3.152362459546926e-06, "loss": 0.1202, "step": 325370 }, { "epoch": 126.36, "learning_rate": 3.151844660194175e-06, "loss": 0.0206, "step": 325380 }, { "epoch": 126.37, "learning_rate": 3.1513268608414246e-06, "loss": 0.1212, "step": 325390 }, { "epoch": 126.37, "learning_rate": 3.1508090614886733e-06, "loss": 0.0683, "step": 325400 }, { "epoch": 126.37, "learning_rate": 3.1502912621359225e-06, "loss": 0.0301, "step": 325410 }, { "epoch": 126.38, "learning_rate": 3.1497734627831717e-06, "loss": 0.0082, "step": 325420 }, { "epoch": 126.38, "learning_rate": 3.149255663430421e-06, "loss": 0.0096, "step": 325430 }, { "epoch": 126.38, "learning_rate": 3.14873786407767e-06, "loss": 0.0686, "step": 325440 }, { "epoch": 126.39, "learning_rate": 3.1482200647249193e-06, "loss": 0.0699, "step": 325450 }, { "epoch": 126.39, "learning_rate": 3.1477022653721685e-06, "loss": 0.002, "step": 325460 }, { "epoch": 126.4, "learning_rate": 3.1471844660194177e-06, "loss": 0.0779, "step": 325470 }, { "epoch": 126.4, "learning_rate": 3.146666666666667e-06, "loss": 0.0002, "step": 325480 }, { "epoch": 126.4, "learning_rate": 3.146148867313916e-06, "loss": 0.1036, "step": 325490 }, { "epoch": 126.41, "learning_rate": 3.1456310679611653e-06, "loss": 0.0436, "step": 325500 }, { "epoch": 126.41, "learning_rate": 3.1451132686084145e-06, "loss": 0.1197, "step": 325510 }, { "epoch": 126.42, "learning_rate": 3.1445954692556637e-06, "loss": 0.0309, "step": 325520 }, { "epoch": 126.42, "learning_rate": 3.144077669902913e-06, "loss": 0.0165, "step": 325530 }, { "epoch": 126.42, "learning_rate": 3.143559870550162e-06, "loss": 0.0115, "step": 325540 }, { "epoch": 126.43, "learning_rate": 3.1430420711974113e-06, "loss": 0.0005, "step": 325550 }, { "epoch": 126.43, "learning_rate": 3.1425242718446604e-06, "loss": 0.0106, "step": 325560 }, { "epoch": 126.43, "learning_rate": 3.1420064724919096e-06, "loss": 0.0665, "step": 325570 }, { "epoch": 126.44, "learning_rate": 3.1414886731391584e-06, "loss": 0.0502, "step": 325580 }, { "epoch": 126.44, "learning_rate": 3.140970873786408e-06, "loss": 0.0939, "step": 325590 }, { "epoch": 126.45, "learning_rate": 3.140453074433657e-06, "loss": 0.0339, "step": 325600 }, { "epoch": 126.45, "learning_rate": 3.1399352750809064e-06, "loss": 0.0006, "step": 325610 }, { "epoch": 126.45, "learning_rate": 3.139417475728155e-06, "loss": 0.0196, "step": 325620 }, { "epoch": 126.46, "learning_rate": 3.138899676375405e-06, "loss": 0.0312, "step": 325630 }, { "epoch": 126.46, "learning_rate": 3.1383818770226536e-06, "loss": 0.0123, "step": 325640 }, { "epoch": 126.47, "learning_rate": 3.137864077669903e-06, "loss": 0.0247, "step": 325650 }, { "epoch": 126.47, "learning_rate": 3.137346278317152e-06, "loss": 0.0645, "step": 325660 }, { "epoch": 126.47, "learning_rate": 3.1368284789644016e-06, "loss": 0.0079, "step": 325670 }, { "epoch": 126.48, "learning_rate": 3.1363106796116504e-06, "loss": 0.0381, "step": 325680 }, { "epoch": 126.48, "learning_rate": 3.1357928802589e-06, "loss": 0.1424, "step": 325690 }, { "epoch": 126.49, "learning_rate": 3.1352750809061487e-06, "loss": 0.009, "step": 325700 }, { "epoch": 126.49, "learning_rate": 3.1347572815533984e-06, "loss": 0.1653, "step": 325710 }, { "epoch": 126.49, "learning_rate": 3.134239482200648e-06, "loss": 0.0964, "step": 325720 }, { "epoch": 126.5, "learning_rate": 3.1337216828478968e-06, "loss": 0.0361, "step": 325730 }, { "epoch": 126.5, "learning_rate": 3.133203883495146e-06, "loss": 0.0391, "step": 325740 }, { "epoch": 126.5, "learning_rate": 3.132686084142395e-06, "loss": 0.0239, "step": 325750 }, { "epoch": 126.51, "learning_rate": 3.1321682847896443e-06, "loss": 0.0197, "step": 325760 }, { "epoch": 126.51, "learning_rate": 3.1316504854368935e-06, "loss": 0.0153, "step": 325770 }, { "epoch": 126.52, "learning_rate": 3.1311326860841427e-06, "loss": 0.0255, "step": 325780 }, { "epoch": 126.52, "learning_rate": 3.130614886731392e-06, "loss": 0.0125, "step": 325790 }, { "epoch": 126.52, "learning_rate": 3.130097087378641e-06, "loss": 0.0116, "step": 325800 }, { "epoch": 126.53, "learning_rate": 3.1295792880258903e-06, "loss": 0.0262, "step": 325810 }, { "epoch": 126.53, "learning_rate": 3.1290614886731395e-06, "loss": 0.0146, "step": 325820 }, { "epoch": 126.54, "learning_rate": 3.1285436893203887e-06, "loss": 0.0344, "step": 325830 }, { "epoch": 126.54, "learning_rate": 3.128025889967638e-06, "loss": 0.0912, "step": 325840 }, { "epoch": 126.54, "learning_rate": 3.127508090614887e-06, "loss": 0.0158, "step": 325850 }, { "epoch": 126.55, "learning_rate": 3.1269902912621363e-06, "loss": 0.1793, "step": 325860 }, { "epoch": 126.55, "learning_rate": 3.1264724919093855e-06, "loss": 0.047, "step": 325870 }, { "epoch": 126.56, "learning_rate": 3.1259546925566347e-06, "loss": 0.0015, "step": 325880 }, { "epoch": 126.56, "learning_rate": 3.125436893203884e-06, "loss": 0.1297, "step": 325890 }, { "epoch": 126.56, "learning_rate": 3.124919093851133e-06, "loss": 0.0175, "step": 325900 }, { "epoch": 126.57, "learning_rate": 3.124401294498382e-06, "loss": 0.0012, "step": 325910 }, { "epoch": 126.57, "learning_rate": 3.1238834951456315e-06, "loss": 0.0795, "step": 325920 }, { "epoch": 126.57, "learning_rate": 3.1233656957928802e-06, "loss": 0.07, "step": 325930 }, { "epoch": 126.58, "learning_rate": 3.12284789644013e-06, "loss": 0.0328, "step": 325940 }, { "epoch": 126.58, "learning_rate": 3.1223300970873786e-06, "loss": 0.0292, "step": 325950 }, { "epoch": 126.59, "learning_rate": 3.1218122977346282e-06, "loss": 0.1527, "step": 325960 }, { "epoch": 126.59, "learning_rate": 3.121294498381877e-06, "loss": 0.065, "step": 325970 }, { "epoch": 126.59, "learning_rate": 3.1207766990291266e-06, "loss": 0.0679, "step": 325980 }, { "epoch": 126.6, "learning_rate": 3.1202588996763754e-06, "loss": 0.0182, "step": 325990 }, { "epoch": 126.6, "learning_rate": 3.119741100323625e-06, "loss": 0.0906, "step": 326000 }, { "epoch": 126.61, "learning_rate": 3.1192233009708738e-06, "loss": 0.0584, "step": 326010 }, { "epoch": 126.61, "learning_rate": 3.1187055016181234e-06, "loss": 0.036, "step": 326020 }, { "epoch": 126.61, "learning_rate": 3.118187702265372e-06, "loss": 0.0262, "step": 326030 }, { "epoch": 126.62, "learning_rate": 3.1176699029126218e-06, "loss": 0.0001, "step": 326040 }, { "epoch": 126.62, "learning_rate": 3.1171521035598706e-06, "loss": 0.0127, "step": 326050 }, { "epoch": 126.63, "learning_rate": 3.11663430420712e-06, "loss": 0.0547, "step": 326060 }, { "epoch": 126.63, "learning_rate": 3.116116504854369e-06, "loss": 0.089, "step": 326070 }, { "epoch": 126.63, "learning_rate": 3.1155987055016186e-06, "loss": 0.0423, "step": 326080 }, { "epoch": 126.64, "learning_rate": 3.1150809061488673e-06, "loss": 0.0285, "step": 326090 }, { "epoch": 126.64, "learning_rate": 3.114563106796117e-06, "loss": 0.0539, "step": 326100 }, { "epoch": 126.64, "learning_rate": 3.1140453074433657e-06, "loss": 0.0353, "step": 326110 }, { "epoch": 126.65, "learning_rate": 3.1135275080906153e-06, "loss": 0.0732, "step": 326120 }, { "epoch": 126.65, "learning_rate": 3.113009708737864e-06, "loss": 0.0139, "step": 326130 }, { "epoch": 126.66, "learning_rate": 3.1124919093851137e-06, "loss": 0.1133, "step": 326140 }, { "epoch": 126.66, "learning_rate": 3.1119741100323625e-06, "loss": 0.0213, "step": 326150 }, { "epoch": 126.66, "learning_rate": 3.111456310679612e-06, "loss": 0.0036, "step": 326160 }, { "epoch": 126.67, "learning_rate": 3.110938511326861e-06, "loss": 0.09, "step": 326170 }, { "epoch": 126.67, "learning_rate": 3.1104207119741105e-06, "loss": 0.007, "step": 326180 }, { "epoch": 126.68, "learning_rate": 3.1099029126213593e-06, "loss": 0.0567, "step": 326190 }, { "epoch": 126.68, "learning_rate": 3.109385113268609e-06, "loss": 0.0875, "step": 326200 }, { "epoch": 126.68, "learning_rate": 3.1088673139158577e-06, "loss": 0.0872, "step": 326210 }, { "epoch": 126.69, "learning_rate": 3.108349514563107e-06, "loss": 0.0815, "step": 326220 }, { "epoch": 126.69, "learning_rate": 3.107831715210356e-06, "loss": 0.0075, "step": 326230 }, { "epoch": 126.7, "learning_rate": 3.1073139158576053e-06, "loss": 0.0374, "step": 326240 }, { "epoch": 126.7, "learning_rate": 3.1067961165048544e-06, "loss": 0.0051, "step": 326250 }, { "epoch": 126.7, "learning_rate": 3.1062783171521036e-06, "loss": 0.0961, "step": 326260 }, { "epoch": 126.71, "learning_rate": 3.1057605177993533e-06, "loss": 0.1242, "step": 326270 }, { "epoch": 126.71, "learning_rate": 3.105242718446602e-06, "loss": 0.0205, "step": 326280 }, { "epoch": 126.71, "learning_rate": 3.1047249190938516e-06, "loss": 0.0587, "step": 326290 }, { "epoch": 126.72, "learning_rate": 3.1042071197411004e-06, "loss": 0.0122, "step": 326300 }, { "epoch": 126.72, "learning_rate": 3.10368932038835e-06, "loss": 0.0583, "step": 326310 }, { "epoch": 126.73, "learning_rate": 3.103171521035599e-06, "loss": 0.0122, "step": 326320 }, { "epoch": 126.73, "learning_rate": 3.1026537216828484e-06, "loss": 0.0355, "step": 326330 }, { "epoch": 126.73, "learning_rate": 3.102135922330097e-06, "loss": 0.0067, "step": 326340 }, { "epoch": 126.74, "learning_rate": 3.101618122977347e-06, "loss": 0.0969, "step": 326350 }, { "epoch": 126.74, "learning_rate": 3.1011003236245956e-06, "loss": 0.0803, "step": 326360 }, { "epoch": 126.75, "learning_rate": 3.100582524271845e-06, "loss": 0.0138, "step": 326370 }, { "epoch": 126.75, "learning_rate": 3.100064724919094e-06, "loss": 0.0747, "step": 326380 }, { "epoch": 126.75, "learning_rate": 3.0995469255663436e-06, "loss": 0.008, "step": 326390 }, { "epoch": 126.76, "learning_rate": 3.0990291262135924e-06, "loss": 0.0957, "step": 326400 }, { "epoch": 126.76, "learning_rate": 3.098511326860842e-06, "loss": 0.1213, "step": 326410 }, { "epoch": 126.77, "learning_rate": 3.0979935275080908e-06, "loss": 0.061, "step": 326420 }, { "epoch": 126.77, "learning_rate": 3.0974757281553404e-06, "loss": 0.0014, "step": 326430 }, { "epoch": 126.77, "learning_rate": 3.096957928802589e-06, "loss": 0.0093, "step": 326440 }, { "epoch": 126.78, "learning_rate": 3.0964401294498388e-06, "loss": 0.1183, "step": 326450 }, { "epoch": 126.78, "learning_rate": 3.0959223300970875e-06, "loss": 0.0303, "step": 326460 }, { "epoch": 126.78, "learning_rate": 3.095404530744337e-06, "loss": 0.059, "step": 326470 }, { "epoch": 126.79, "learning_rate": 3.094886731391586e-06, "loss": 0.0331, "step": 326480 }, { "epoch": 126.79, "learning_rate": 3.0943689320388355e-06, "loss": 0.0608, "step": 326490 }, { "epoch": 126.8, "learning_rate": 3.0938511326860843e-06, "loss": 0.0811, "step": 326500 }, { "epoch": 126.8, "learning_rate": 3.093333333333334e-06, "loss": 0.0081, "step": 326510 }, { "epoch": 126.8, "learning_rate": 3.0928155339805827e-06, "loss": 0.0261, "step": 326520 }, { "epoch": 126.81, "learning_rate": 3.0922977346278323e-06, "loss": 0.0389, "step": 326530 }, { "epoch": 126.81, "learning_rate": 3.091779935275081e-06, "loss": 0.0001, "step": 326540 }, { "epoch": 126.82, "learning_rate": 3.0912621359223303e-06, "loss": 0.0002, "step": 326550 }, { "epoch": 126.82, "learning_rate": 3.0907443365695795e-06, "loss": 0.0342, "step": 326560 }, { "epoch": 126.82, "learning_rate": 3.0902265372168287e-06, "loss": 0.0462, "step": 326570 }, { "epoch": 126.83, "learning_rate": 3.089708737864078e-06, "loss": 0.0087, "step": 326580 }, { "epoch": 126.83, "learning_rate": 3.089190938511327e-06, "loss": 0.0737, "step": 326590 }, { "epoch": 126.83, "learning_rate": 3.0886731391585763e-06, "loss": 0.0272, "step": 326600 }, { "epoch": 126.84, "learning_rate": 3.0881553398058254e-06, "loss": 0.0497, "step": 326610 }, { "epoch": 126.84, "learning_rate": 3.0876375404530746e-06, "loss": 0.0001, "step": 326620 }, { "epoch": 126.85, "learning_rate": 3.087119741100324e-06, "loss": 0.0561, "step": 326630 }, { "epoch": 126.85, "learning_rate": 3.086601941747573e-06, "loss": 0.0002, "step": 326640 }, { "epoch": 126.85, "learning_rate": 3.0860841423948222e-06, "loss": 0.0222, "step": 326650 }, { "epoch": 126.86, "learning_rate": 3.0855663430420714e-06, "loss": 0.0437, "step": 326660 }, { "epoch": 126.86, "learning_rate": 3.0850485436893206e-06, "loss": 0.045, "step": 326670 }, { "epoch": 126.87, "learning_rate": 3.08453074433657e-06, "loss": 0.0649, "step": 326680 }, { "epoch": 126.87, "learning_rate": 3.084012944983819e-06, "loss": 0.0124, "step": 326690 }, { "epoch": 126.87, "learning_rate": 3.083495145631068e-06, "loss": 0.0191, "step": 326700 }, { "epoch": 126.88, "learning_rate": 3.0829773462783174e-06, "loss": 0.1462, "step": 326710 }, { "epoch": 126.88, "learning_rate": 3.082459546925566e-06, "loss": 0.0989, "step": 326720 }, { "epoch": 126.89, "learning_rate": 3.0819417475728158e-06, "loss": 0.0766, "step": 326730 }, { "epoch": 126.89, "learning_rate": 3.0814239482200646e-06, "loss": 0.0612, "step": 326740 }, { "epoch": 126.89, "learning_rate": 3.080906148867314e-06, "loss": 0.1186, "step": 326750 }, { "epoch": 126.9, "learning_rate": 3.080388349514563e-06, "loss": 0.0226, "step": 326760 }, { "epoch": 126.9, "learning_rate": 3.0798705501618126e-06, "loss": 0.0314, "step": 326770 }, { "epoch": 126.9, "learning_rate": 3.0793527508090613e-06, "loss": 0.0636, "step": 326780 }, { "epoch": 126.91, "learning_rate": 3.078834951456311e-06, "loss": 0.0465, "step": 326790 }, { "epoch": 126.91, "learning_rate": 3.0783171521035597e-06, "loss": 0.0112, "step": 326800 }, { "epoch": 126.92, "learning_rate": 3.0777993527508093e-06, "loss": 0.044, "step": 326810 }, { "epoch": 126.92, "learning_rate": 3.077281553398058e-06, "loss": 0.0935, "step": 326820 }, { "epoch": 126.92, "learning_rate": 3.0767637540453077e-06, "loss": 0.1455, "step": 326830 }, { "epoch": 126.93, "learning_rate": 3.0762459546925573e-06, "loss": 0.0829, "step": 326840 }, { "epoch": 126.93, "learning_rate": 3.075728155339806e-06, "loss": 0.0588, "step": 326850 }, { "epoch": 126.94, "learning_rate": 3.0752103559870557e-06, "loss": 0.0549, "step": 326860 }, { "epoch": 126.94, "learning_rate": 3.0746925566343045e-06, "loss": 0.0762, "step": 326870 }, { "epoch": 126.94, "learning_rate": 3.0741747572815537e-06, "loss": 0.0384, "step": 326880 }, { "epoch": 126.95, "learning_rate": 3.073656957928803e-06, "loss": 0.0094, "step": 326890 }, { "epoch": 126.95, "learning_rate": 3.073139158576052e-06, "loss": 0.0179, "step": 326900 }, { "epoch": 126.96, "learning_rate": 3.0726213592233013e-06, "loss": 0.0004, "step": 326910 }, { "epoch": 126.96, "learning_rate": 3.0721035598705505e-06, "loss": 0.0232, "step": 326920 }, { "epoch": 126.96, "learning_rate": 3.0715857605177997e-06, "loss": 0.0346, "step": 326930 }, { "epoch": 126.97, "learning_rate": 3.071067961165049e-06, "loss": 0.1026, "step": 326940 }, { "epoch": 126.97, "learning_rate": 3.070550161812298e-06, "loss": 0.0186, "step": 326950 }, { "epoch": 126.97, "learning_rate": 3.0700323624595473e-06, "loss": 0.1791, "step": 326960 }, { "epoch": 126.98, "learning_rate": 3.0695145631067965e-06, "loss": 0.0802, "step": 326970 }, { "epoch": 126.98, "learning_rate": 3.0689967637540456e-06, "loss": 0.1251, "step": 326980 }, { "epoch": 126.99, "learning_rate": 3.068478964401295e-06, "loss": 0.0842, "step": 326990 }, { "epoch": 126.99, "learning_rate": 3.067961165048544e-06, "loss": 0.007, "step": 327000 }, { "epoch": 126.99, "learning_rate": 3.0674433656957932e-06, "loss": 0.0338, "step": 327010 }, { "epoch": 127.0, "learning_rate": 3.0669255663430424e-06, "loss": 0.0277, "step": 327020 }, { "epoch": 127.0, "eval_accuracy": 0.9524071526822558, "eval_loss": 0.37351882457733154, "eval_runtime": 8.1793, "eval_samples_per_second": 444.416, "eval_steps_per_second": 55.628, "step": 327025 }, { "epoch": 127.0, "learning_rate": 3.0664077669902916e-06, "loss": 0.0036, "step": 327030 }, { "epoch": 127.01, "learning_rate": 3.065889967637541e-06, "loss": 0.0449, "step": 327040 }, { "epoch": 127.01, "learning_rate": 3.0653721682847896e-06, "loss": 0.0688, "step": 327050 }, { "epoch": 127.01, "learning_rate": 3.064854368932039e-06, "loss": 0.0156, "step": 327060 }, { "epoch": 127.02, "learning_rate": 3.064336569579288e-06, "loss": 0.1342, "step": 327070 }, { "epoch": 127.02, "learning_rate": 3.0638187702265376e-06, "loss": 0.0374, "step": 327080 }, { "epoch": 127.03, "learning_rate": 3.0633009708737864e-06, "loss": 0.0581, "step": 327090 }, { "epoch": 127.03, "learning_rate": 3.062783171521036e-06, "loss": 0.1054, "step": 327100 }, { "epoch": 127.03, "learning_rate": 3.0622653721682848e-06, "loss": 0.0106, "step": 327110 }, { "epoch": 127.04, "learning_rate": 3.0617475728155344e-06, "loss": 0.0705, "step": 327120 }, { "epoch": 127.04, "learning_rate": 3.061229773462783e-06, "loss": 0.0865, "step": 327130 }, { "epoch": 127.04, "learning_rate": 3.0607119741100328e-06, "loss": 0.0066, "step": 327140 }, { "epoch": 127.05, "learning_rate": 3.0601941747572815e-06, "loss": 0.0187, "step": 327150 }, { "epoch": 127.05, "learning_rate": 3.059676375404531e-06, "loss": 0.0035, "step": 327160 }, { "epoch": 127.06, "learning_rate": 3.05915857605178e-06, "loss": 0.0743, "step": 327170 }, { "epoch": 127.06, "learning_rate": 3.0586407766990295e-06, "loss": 0.0003, "step": 327180 }, { "epoch": 127.06, "learning_rate": 3.0581229773462783e-06, "loss": 0.0983, "step": 327190 }, { "epoch": 127.07, "learning_rate": 3.057605177993528e-06, "loss": 0.0355, "step": 327200 }, { "epoch": 127.07, "learning_rate": 3.0570873786407767e-06, "loss": 0.0001, "step": 327210 }, { "epoch": 127.08, "learning_rate": 3.0565695792880263e-06, "loss": 0.0204, "step": 327220 }, { "epoch": 127.08, "learning_rate": 3.056051779935275e-06, "loss": 0.1041, "step": 327230 }, { "epoch": 127.08, "learning_rate": 3.0555339805825247e-06, "loss": 0.0009, "step": 327240 }, { "epoch": 127.09, "learning_rate": 3.0550161812297735e-06, "loss": 0.0124, "step": 327250 }, { "epoch": 127.09, "learning_rate": 3.054498381877023e-06, "loss": 0.0086, "step": 327260 }, { "epoch": 127.1, "learning_rate": 3.053980582524272e-06, "loss": 0.0013, "step": 327270 }, { "epoch": 127.1, "learning_rate": 3.0534627831715215e-06, "loss": 0.0056, "step": 327280 }, { "epoch": 127.1, "learning_rate": 3.0529449838187703e-06, "loss": 0.0095, "step": 327290 }, { "epoch": 127.11, "learning_rate": 3.05242718446602e-06, "loss": 0.0803, "step": 327300 }, { "epoch": 127.11, "learning_rate": 3.0519093851132686e-06, "loss": 0.0185, "step": 327310 }, { "epoch": 127.11, "learning_rate": 3.0513915857605183e-06, "loss": 0.091, "step": 327320 }, { "epoch": 127.12, "learning_rate": 3.050873786407767e-06, "loss": 0.0009, "step": 327330 }, { "epoch": 127.12, "learning_rate": 3.0503559870550166e-06, "loss": 0.0377, "step": 327340 }, { "epoch": 127.13, "learning_rate": 3.0498381877022654e-06, "loss": 0.0379, "step": 327350 }, { "epoch": 127.13, "learning_rate": 3.0493203883495146e-06, "loss": 0.0167, "step": 327360 }, { "epoch": 127.13, "learning_rate": 3.048802588996764e-06, "loss": 0.0296, "step": 327370 }, { "epoch": 127.14, "learning_rate": 3.048284789644013e-06, "loss": 0.0653, "step": 327380 }, { "epoch": 127.14, "learning_rate": 3.0477669902912626e-06, "loss": 0.0436, "step": 327390 }, { "epoch": 127.15, "learning_rate": 3.0472491909385114e-06, "loss": 0.036, "step": 327400 }, { "epoch": 127.15, "learning_rate": 3.046731391585761e-06, "loss": 0.0219, "step": 327410 }, { "epoch": 127.15, "learning_rate": 3.0462135922330098e-06, "loss": 0.0001, "step": 327420 }, { "epoch": 127.16, "learning_rate": 3.0456957928802594e-06, "loss": 0.0011, "step": 327430 }, { "epoch": 127.16, "learning_rate": 3.045177993527508e-06, "loss": 0.0801, "step": 327440 }, { "epoch": 127.17, "learning_rate": 3.044660194174758e-06, "loss": 0.0166, "step": 327450 }, { "epoch": 127.17, "learning_rate": 3.0441423948220066e-06, "loss": 0.0014, "step": 327460 }, { "epoch": 127.17, "learning_rate": 3.043624595469256e-06, "loss": 0.0551, "step": 327470 }, { "epoch": 127.18, "learning_rate": 3.043106796116505e-06, "loss": 0.0137, "step": 327480 }, { "epoch": 127.18, "learning_rate": 3.0425889967637546e-06, "loss": 0.0076, "step": 327490 }, { "epoch": 127.18, "learning_rate": 3.0420711974110033e-06, "loss": 0.1127, "step": 327500 }, { "epoch": 127.19, "learning_rate": 3.041553398058253e-06, "loss": 0.0311, "step": 327510 }, { "epoch": 127.19, "learning_rate": 3.0410355987055017e-06, "loss": 0.0004, "step": 327520 }, { "epoch": 127.2, "learning_rate": 3.0405177993527513e-06, "loss": 0.0147, "step": 327530 }, { "epoch": 127.2, "learning_rate": 3.04e-06, "loss": 0.1063, "step": 327540 }, { "epoch": 127.2, "learning_rate": 3.0394822006472497e-06, "loss": 0.0387, "step": 327550 }, { "epoch": 127.21, "learning_rate": 3.0389644012944985e-06, "loss": 0.0119, "step": 327560 }, { "epoch": 127.21, "learning_rate": 3.038446601941748e-06, "loss": 0.0754, "step": 327570 }, { "epoch": 127.22, "learning_rate": 3.037928802588997e-06, "loss": 0.073, "step": 327580 }, { "epoch": 127.22, "learning_rate": 3.0374110032362465e-06, "loss": 0.1298, "step": 327590 }, { "epoch": 127.22, "learning_rate": 3.0368932038834953e-06, "loss": 0.0533, "step": 327600 }, { "epoch": 127.23, "learning_rate": 3.036375404530745e-06, "loss": 0.2302, "step": 327610 }, { "epoch": 127.23, "learning_rate": 3.0358576051779937e-06, "loss": 0.1064, "step": 327620 }, { "epoch": 127.23, "learning_rate": 3.0353398058252433e-06, "loss": 0.0985, "step": 327630 }, { "epoch": 127.24, "learning_rate": 3.034822006472492e-06, "loss": 0.0162, "step": 327640 }, { "epoch": 127.24, "learning_rate": 3.0343042071197417e-06, "loss": 0.033, "step": 327650 }, { "epoch": 127.25, "learning_rate": 3.0337864077669905e-06, "loss": 0.0153, "step": 327660 }, { "epoch": 127.25, "learning_rate": 3.03326860841424e-06, "loss": 0.0326, "step": 327670 }, { "epoch": 127.25, "learning_rate": 3.032750809061489e-06, "loss": 0.0592, "step": 327680 }, { "epoch": 127.26, "learning_rate": 3.032233009708738e-06, "loss": 0.0005, "step": 327690 }, { "epoch": 127.26, "learning_rate": 3.0317152103559872e-06, "loss": 0.0785, "step": 327700 }, { "epoch": 127.27, "learning_rate": 3.0311974110032364e-06, "loss": 0.0124, "step": 327710 }, { "epoch": 127.27, "learning_rate": 3.0306796116504856e-06, "loss": 0.0421, "step": 327720 }, { "epoch": 127.27, "learning_rate": 3.030161812297735e-06, "loss": 0.0002, "step": 327730 }, { "epoch": 127.28, "learning_rate": 3.029644012944984e-06, "loss": 0.0575, "step": 327740 }, { "epoch": 127.28, "learning_rate": 3.029126213592233e-06, "loss": 0.0097, "step": 327750 }, { "epoch": 127.29, "learning_rate": 3.0286084142394824e-06, "loss": 0.0706, "step": 327760 }, { "epoch": 127.29, "learning_rate": 3.0280906148867316e-06, "loss": 0.0238, "step": 327770 }, { "epoch": 127.29, "learning_rate": 3.0275728155339808e-06, "loss": 0.0537, "step": 327780 }, { "epoch": 127.3, "learning_rate": 3.02705501618123e-06, "loss": 0.0015, "step": 327790 }, { "epoch": 127.3, "learning_rate": 3.026537216828479e-06, "loss": 0.0193, "step": 327800 }, { "epoch": 127.3, "learning_rate": 3.0260194174757284e-06, "loss": 0.11, "step": 327810 }, { "epoch": 127.31, "learning_rate": 3.0255016181229776e-06, "loss": 0.0103, "step": 327820 }, { "epoch": 127.31, "learning_rate": 3.0249838187702268e-06, "loss": 0.1914, "step": 327830 }, { "epoch": 127.32, "learning_rate": 3.024466019417476e-06, "loss": 0.092, "step": 327840 }, { "epoch": 127.32, "learning_rate": 3.023948220064725e-06, "loss": 0.0087, "step": 327850 }, { "epoch": 127.32, "learning_rate": 3.023430420711974e-06, "loss": 0.0002, "step": 327860 }, { "epoch": 127.33, "learning_rate": 3.0229126213592235e-06, "loss": 0.0001, "step": 327870 }, { "epoch": 127.33, "learning_rate": 3.0223948220064723e-06, "loss": 0.0397, "step": 327880 }, { "epoch": 127.34, "learning_rate": 3.021877022653722e-06, "loss": 0.0882, "step": 327890 }, { "epoch": 127.34, "learning_rate": 3.0213592233009707e-06, "loss": 0.0721, "step": 327900 }, { "epoch": 127.34, "learning_rate": 3.0208414239482203e-06, "loss": 0.0513, "step": 327910 }, { "epoch": 127.35, "learning_rate": 3.020323624595469e-06, "loss": 0.0003, "step": 327920 }, { "epoch": 127.35, "learning_rate": 3.0198058252427187e-06, "loss": 0.0352, "step": 327930 }, { "epoch": 127.36, "learning_rate": 3.0192880258899675e-06, "loss": 0.0211, "step": 327940 }, { "epoch": 127.36, "learning_rate": 3.018770226537217e-06, "loss": 0.1231, "step": 327950 }, { "epoch": 127.36, "learning_rate": 3.0182524271844667e-06, "loss": 0.0064, "step": 327960 }, { "epoch": 127.37, "learning_rate": 3.0177346278317155e-06, "loss": 0.0499, "step": 327970 }, { "epoch": 127.37, "learning_rate": 3.017216828478965e-06, "loss": 0.0885, "step": 327980 }, { "epoch": 127.37, "learning_rate": 3.016699029126214e-06, "loss": 0.0966, "step": 327990 }, { "epoch": 127.38, "learning_rate": 3.0161812297734635e-06, "loss": 0.0206, "step": 328000 }, { "epoch": 127.38, "learning_rate": 3.0156634304207123e-06, "loss": 0.082, "step": 328010 }, { "epoch": 127.39, "learning_rate": 3.0151456310679615e-06, "loss": 0.0005, "step": 328020 }, { "epoch": 127.39, "learning_rate": 3.0146278317152106e-06, "loss": 0.0306, "step": 328030 }, { "epoch": 127.39, "learning_rate": 3.01411003236246e-06, "loss": 0.0109, "step": 328040 }, { "epoch": 127.4, "learning_rate": 3.013592233009709e-06, "loss": 0.1121, "step": 328050 }, { "epoch": 127.4, "learning_rate": 3.0130744336569582e-06, "loss": 0.0011, "step": 328060 }, { "epoch": 127.41, "learning_rate": 3.0125566343042074e-06, "loss": 0.0084, "step": 328070 }, { "epoch": 127.41, "learning_rate": 3.0120388349514566e-06, "loss": 0.0345, "step": 328080 }, { "epoch": 127.41, "learning_rate": 3.011521035598706e-06, "loss": 0.0181, "step": 328090 }, { "epoch": 127.42, "learning_rate": 3.011003236245955e-06, "loss": 0.0377, "step": 328100 }, { "epoch": 127.42, "learning_rate": 3.010485436893204e-06, "loss": 0.1132, "step": 328110 }, { "epoch": 127.43, "learning_rate": 3.0099676375404534e-06, "loss": 0.0212, "step": 328120 }, { "epoch": 127.43, "learning_rate": 3.0094498381877026e-06, "loss": 0.0175, "step": 328130 }, { "epoch": 127.43, "learning_rate": 3.0089320388349518e-06, "loss": 0.1046, "step": 328140 }, { "epoch": 127.44, "learning_rate": 3.008414239482201e-06, "loss": 0.0283, "step": 328150 }, { "epoch": 127.44, "learning_rate": 3.00789644012945e-06, "loss": 0.0111, "step": 328160 }, { "epoch": 127.44, "learning_rate": 3.0073786407766994e-06, "loss": 0.0083, "step": 328170 }, { "epoch": 127.45, "learning_rate": 3.0068608414239486e-06, "loss": 0.001, "step": 328180 }, { "epoch": 127.45, "learning_rate": 3.0063430420711973e-06, "loss": 0.0363, "step": 328190 }, { "epoch": 127.46, "learning_rate": 3.005825242718447e-06, "loss": 0.1108, "step": 328200 }, { "epoch": 127.46, "learning_rate": 3.0053074433656957e-06, "loss": 0.0033, "step": 328210 }, { "epoch": 127.46, "learning_rate": 3.0047896440129453e-06, "loss": 0.0014, "step": 328220 }, { "epoch": 127.47, "learning_rate": 3.004271844660194e-06, "loss": 0.1015, "step": 328230 }, { "epoch": 127.47, "learning_rate": 3.0037540453074437e-06, "loss": 0.1147, "step": 328240 }, { "epoch": 127.48, "learning_rate": 3.0032362459546925e-06, "loss": 0.0747, "step": 328250 }, { "epoch": 127.48, "learning_rate": 3.002718446601942e-06, "loss": 0.089, "step": 328260 }, { "epoch": 127.48, "learning_rate": 3.002200647249191e-06, "loss": 0.011, "step": 328270 }, { "epoch": 127.49, "learning_rate": 3.0016828478964405e-06, "loss": 0.0092, "step": 328280 }, { "epoch": 127.49, "learning_rate": 3.0011650485436893e-06, "loss": 0.1088, "step": 328290 }, { "epoch": 127.5, "learning_rate": 3.000647249190939e-06, "loss": 0.0049, "step": 328300 }, { "epoch": 127.5, "learning_rate": 3.0001294498381877e-06, "loss": 0.053, "step": 328310 }, { "epoch": 127.5, "learning_rate": 2.9996116504854373e-06, "loss": 0.0026, "step": 328320 }, { "epoch": 127.51, "learning_rate": 2.999093851132686e-06, "loss": 0.0492, "step": 328330 }, { "epoch": 127.51, "learning_rate": 2.9985760517799357e-06, "loss": 0.0096, "step": 328340 }, { "epoch": 127.51, "learning_rate": 2.9980582524271844e-06, "loss": 0.0235, "step": 328350 }, { "epoch": 127.52, "learning_rate": 2.997540453074434e-06, "loss": 0.0358, "step": 328360 }, { "epoch": 127.52, "learning_rate": 2.997022653721683e-06, "loss": 0.058, "step": 328370 }, { "epoch": 127.53, "learning_rate": 2.9965048543689325e-06, "loss": 0.039, "step": 328380 }, { "epoch": 127.53, "learning_rate": 2.9959870550161812e-06, "loss": 0.0086, "step": 328390 }, { "epoch": 127.53, "learning_rate": 2.995469255663431e-06, "loss": 0.0938, "step": 328400 }, { "epoch": 127.54, "learning_rate": 2.9949514563106796e-06, "loss": 0.0029, "step": 328410 }, { "epoch": 127.54, "learning_rate": 2.9944336569579292e-06, "loss": 0.2031, "step": 328420 }, { "epoch": 127.55, "learning_rate": 2.993915857605178e-06, "loss": 0.056, "step": 328430 }, { "epoch": 127.55, "learning_rate": 2.9933980582524276e-06, "loss": 0.04, "step": 328440 }, { "epoch": 127.55, "learning_rate": 2.9928802588996764e-06, "loss": 0.0177, "step": 328450 }, { "epoch": 127.56, "learning_rate": 2.992362459546926e-06, "loss": 0.1095, "step": 328460 }, { "epoch": 127.56, "learning_rate": 2.9918446601941748e-06, "loss": 0.0094, "step": 328470 }, { "epoch": 127.57, "learning_rate": 2.9913268608414244e-06, "loss": 0.0299, "step": 328480 }, { "epoch": 127.57, "learning_rate": 2.990809061488673e-06, "loss": 0.0086, "step": 328490 }, { "epoch": 127.57, "learning_rate": 2.9902912621359224e-06, "loss": 0.0462, "step": 328500 }, { "epoch": 127.58, "learning_rate": 2.989773462783172e-06, "loss": 0.0109, "step": 328510 }, { "epoch": 127.58, "learning_rate": 2.9892556634304208e-06, "loss": 0.1009, "step": 328520 }, { "epoch": 127.58, "learning_rate": 2.9887378640776704e-06, "loss": 0.0268, "step": 328530 }, { "epoch": 127.59, "learning_rate": 2.988220064724919e-06, "loss": 0.0713, "step": 328540 }, { "epoch": 127.59, "learning_rate": 2.9877022653721688e-06, "loss": 0.0759, "step": 328550 }, { "epoch": 127.6, "learning_rate": 2.9871844660194175e-06, "loss": 0.0644, "step": 328560 }, { "epoch": 127.6, "learning_rate": 2.986666666666667e-06, "loss": 0.14, "step": 328570 }, { "epoch": 127.6, "learning_rate": 2.986148867313916e-06, "loss": 0.0029, "step": 328580 }, { "epoch": 127.61, "learning_rate": 2.9856310679611655e-06, "loss": 0.0025, "step": 328590 }, { "epoch": 127.61, "learning_rate": 2.9851132686084143e-06, "loss": 0.0734, "step": 328600 }, { "epoch": 127.62, "learning_rate": 2.984595469255664e-06, "loss": 0.0652, "step": 328610 }, { "epoch": 127.62, "learning_rate": 2.9840776699029127e-06, "loss": 0.0007, "step": 328620 }, { "epoch": 127.62, "learning_rate": 2.9835598705501623e-06, "loss": 0.0811, "step": 328630 }, { "epoch": 127.63, "learning_rate": 2.983042071197411e-06, "loss": 0.0849, "step": 328640 }, { "epoch": 127.63, "learning_rate": 2.9825242718446607e-06, "loss": 0.0351, "step": 328650 }, { "epoch": 127.63, "learning_rate": 2.9820064724919095e-06, "loss": 0.0079, "step": 328660 }, { "epoch": 127.64, "learning_rate": 2.981488673139159e-06, "loss": 0.0987, "step": 328670 }, { "epoch": 127.64, "learning_rate": 2.980970873786408e-06, "loss": 0.0017, "step": 328680 }, { "epoch": 127.65, "learning_rate": 2.9804530744336575e-06, "loss": 0.001, "step": 328690 }, { "epoch": 127.65, "learning_rate": 2.9799352750809063e-06, "loss": 0.0007, "step": 328700 }, { "epoch": 127.65, "learning_rate": 2.979417475728156e-06, "loss": 0.0974, "step": 328710 }, { "epoch": 127.66, "learning_rate": 2.9788996763754046e-06, "loss": 0.0094, "step": 328720 }, { "epoch": 127.66, "learning_rate": 2.9783818770226543e-06, "loss": 0.0128, "step": 328730 }, { "epoch": 127.67, "learning_rate": 2.977864077669903e-06, "loss": 0.009, "step": 328740 }, { "epoch": 127.67, "learning_rate": 2.9773462783171527e-06, "loss": 0.0668, "step": 328750 }, { "epoch": 127.67, "learning_rate": 2.9768284789644014e-06, "loss": 0.1018, "step": 328760 }, { "epoch": 127.68, "learning_rate": 2.976310679611651e-06, "loss": 0.0259, "step": 328770 }, { "epoch": 127.68, "learning_rate": 2.9757928802589e-06, "loss": 0.0001, "step": 328780 }, { "epoch": 127.69, "learning_rate": 2.9752750809061494e-06, "loss": 0.0491, "step": 328790 }, { "epoch": 127.69, "learning_rate": 2.974757281553398e-06, "loss": 0.0905, "step": 328800 }, { "epoch": 127.69, "learning_rate": 2.974239482200648e-06, "loss": 0.0001, "step": 328810 }, { "epoch": 127.7, "learning_rate": 2.9737216828478966e-06, "loss": 0.0841, "step": 328820 }, { "epoch": 127.7, "learning_rate": 2.9732038834951458e-06, "loss": 0.0006, "step": 328830 }, { "epoch": 127.7, "learning_rate": 2.972686084142395e-06, "loss": 0.0127, "step": 328840 }, { "epoch": 127.71, "learning_rate": 2.972168284789644e-06, "loss": 0.0159, "step": 328850 }, { "epoch": 127.71, "learning_rate": 2.9716504854368934e-06, "loss": 0.0345, "step": 328860 }, { "epoch": 127.72, "learning_rate": 2.9711326860841426e-06, "loss": 0.0006, "step": 328870 }, { "epoch": 127.72, "learning_rate": 2.9706148867313918e-06, "loss": 0.0035, "step": 328880 }, { "epoch": 127.72, "learning_rate": 2.970097087378641e-06, "loss": 0.109, "step": 328890 }, { "epoch": 127.73, "learning_rate": 2.96957928802589e-06, "loss": 0.1205, "step": 328900 }, { "epoch": 127.73, "learning_rate": 2.9690614886731393e-06, "loss": 0.0856, "step": 328910 }, { "epoch": 127.74, "learning_rate": 2.9685436893203885e-06, "loss": 0.0664, "step": 328920 }, { "epoch": 127.74, "learning_rate": 2.9680258899676377e-06, "loss": 0.0018, "step": 328930 }, { "epoch": 127.74, "learning_rate": 2.967508090614887e-06, "loss": 0.0367, "step": 328940 }, { "epoch": 127.75, "learning_rate": 2.966990291262136e-06, "loss": 0.067, "step": 328950 }, { "epoch": 127.75, "learning_rate": 2.9664724919093853e-06, "loss": 0.1275, "step": 328960 }, { "epoch": 127.76, "learning_rate": 2.9659546925566345e-06, "loss": 0.0263, "step": 328970 }, { "epoch": 127.76, "learning_rate": 2.9654368932038837e-06, "loss": 0.0182, "step": 328980 }, { "epoch": 127.76, "learning_rate": 2.964919093851133e-06, "loss": 0.0032, "step": 328990 }, { "epoch": 127.77, "learning_rate": 2.9644012944983817e-06, "loss": 0.0106, "step": 329000 }, { "epoch": 127.77, "learning_rate": 2.9638834951456313e-06, "loss": 0.0325, "step": 329010 }, { "epoch": 127.77, "learning_rate": 2.96336569579288e-06, "loss": 0.0005, "step": 329020 }, { "epoch": 127.78, "learning_rate": 2.9628478964401297e-06, "loss": 0.0836, "step": 329030 }, { "epoch": 127.78, "learning_rate": 2.9623300970873784e-06, "loss": 0.0444, "step": 329040 }, { "epoch": 127.79, "learning_rate": 2.961812297734628e-06, "loss": 0.1286, "step": 329050 }, { "epoch": 127.79, "learning_rate": 2.961294498381877e-06, "loss": 0.0012, "step": 329060 }, { "epoch": 127.79, "learning_rate": 2.9607766990291265e-06, "loss": 0.124, "step": 329070 }, { "epoch": 127.8, "learning_rate": 2.960258899676376e-06, "loss": 0.1073, "step": 329080 }, { "epoch": 127.8, "learning_rate": 2.959741100323625e-06, "loss": 0.0582, "step": 329090 }, { "epoch": 127.81, "learning_rate": 2.9592233009708745e-06, "loss": 0.0232, "step": 329100 }, { "epoch": 127.81, "learning_rate": 2.9587055016181232e-06, "loss": 0.0724, "step": 329110 }, { "epoch": 127.81, "learning_rate": 2.958187702265373e-06, "loss": 0.1391, "step": 329120 }, { "epoch": 127.82, "learning_rate": 2.9576699029126216e-06, "loss": 0.1063, "step": 329130 }, { "epoch": 127.82, "learning_rate": 2.9571521035598712e-06, "loss": 0.0152, "step": 329140 }, { "epoch": 127.83, "learning_rate": 2.95663430420712e-06, "loss": 0.0892, "step": 329150 }, { "epoch": 127.83, "learning_rate": 2.956116504854369e-06, "loss": 0.1256, "step": 329160 }, { "epoch": 127.83, "learning_rate": 2.9555987055016184e-06, "loss": 0.0795, "step": 329170 }, { "epoch": 127.84, "learning_rate": 2.9550809061488676e-06, "loss": 0.0511, "step": 329180 }, { "epoch": 127.84, "learning_rate": 2.954563106796117e-06, "loss": 0.0262, "step": 329190 }, { "epoch": 127.84, "learning_rate": 2.954045307443366e-06, "loss": 0.0559, "step": 329200 }, { "epoch": 127.85, "learning_rate": 2.953527508090615e-06, "loss": 0.0684, "step": 329210 }, { "epoch": 127.85, "learning_rate": 2.9530097087378644e-06, "loss": 0.099, "step": 329220 }, { "epoch": 127.86, "learning_rate": 2.9524919093851136e-06, "loss": 0.0006, "step": 329230 }, { "epoch": 127.86, "learning_rate": 2.9519741100323628e-06, "loss": 0.0133, "step": 329240 }, { "epoch": 127.86, "learning_rate": 2.951456310679612e-06, "loss": 0.0959, "step": 329250 }, { "epoch": 127.87, "learning_rate": 2.950938511326861e-06, "loss": 0.0003, "step": 329260 }, { "epoch": 127.87, "learning_rate": 2.9504207119741103e-06, "loss": 0.0285, "step": 329270 }, { "epoch": 127.88, "learning_rate": 2.9499029126213595e-06, "loss": 0.0589, "step": 329280 }, { "epoch": 127.88, "learning_rate": 2.9493851132686087e-06, "loss": 0.0004, "step": 329290 }, { "epoch": 127.88, "learning_rate": 2.948867313915858e-06, "loss": 0.0012, "step": 329300 }, { "epoch": 127.89, "learning_rate": 2.948349514563107e-06, "loss": 0.0093, "step": 329310 }, { "epoch": 127.89, "learning_rate": 2.9478317152103563e-06, "loss": 0.1011, "step": 329320 }, { "epoch": 127.9, "learning_rate": 2.947313915857605e-06, "loss": 0.0026, "step": 329330 }, { "epoch": 127.9, "learning_rate": 2.9467961165048547e-06, "loss": 0.0208, "step": 329340 }, { "epoch": 127.9, "learning_rate": 2.9462783171521035e-06, "loss": 0.0224, "step": 329350 }, { "epoch": 127.91, "learning_rate": 2.945760517799353e-06, "loss": 0.0102, "step": 329360 }, { "epoch": 127.91, "learning_rate": 2.945242718446602e-06, "loss": 0.0008, "step": 329370 }, { "epoch": 127.91, "learning_rate": 2.9447249190938515e-06, "loss": 0.0364, "step": 329380 }, { "epoch": 127.92, "learning_rate": 2.9442071197411003e-06, "loss": 0.0005, "step": 329390 }, { "epoch": 127.92, "learning_rate": 2.94368932038835e-06, "loss": 0.0218, "step": 329400 }, { "epoch": 127.93, "learning_rate": 2.9431715210355986e-06, "loss": 0.0002, "step": 329410 }, { "epoch": 127.93, "learning_rate": 2.9426537216828483e-06, "loss": 0.0924, "step": 329420 }, { "epoch": 127.93, "learning_rate": 2.942135922330097e-06, "loss": 0.0948, "step": 329430 }, { "epoch": 127.94, "learning_rate": 2.9416181229773467e-06, "loss": 0.0386, "step": 329440 }, { "epoch": 127.94, "learning_rate": 2.9411003236245954e-06, "loss": 0.0382, "step": 329450 }, { "epoch": 127.95, "learning_rate": 2.940582524271845e-06, "loss": 0.0759, "step": 329460 }, { "epoch": 127.95, "learning_rate": 2.940064724919094e-06, "loss": 0.0635, "step": 329470 }, { "epoch": 127.95, "learning_rate": 2.9395469255663434e-06, "loss": 0.0041, "step": 329480 }, { "epoch": 127.96, "learning_rate": 2.939029126213592e-06, "loss": 0.0309, "step": 329490 }, { "epoch": 127.96, "learning_rate": 2.938511326860842e-06, "loss": 0.0002, "step": 329500 }, { "epoch": 127.97, "learning_rate": 2.9379935275080906e-06, "loss": 0.0464, "step": 329510 }, { "epoch": 127.97, "learning_rate": 2.93747572815534e-06, "loss": 0.0762, "step": 329520 }, { "epoch": 127.97, "learning_rate": 2.936957928802589e-06, "loss": 0.0803, "step": 329530 }, { "epoch": 127.98, "learning_rate": 2.9364401294498386e-06, "loss": 0.0426, "step": 329540 }, { "epoch": 127.98, "learning_rate": 2.9359223300970874e-06, "loss": 0.0084, "step": 329550 }, { "epoch": 127.98, "learning_rate": 2.935404530744337e-06, "loss": 0.0002, "step": 329560 }, { "epoch": 127.99, "learning_rate": 2.9348867313915858e-06, "loss": 0.031, "step": 329570 }, { "epoch": 127.99, "learning_rate": 2.9343689320388354e-06, "loss": 0.0021, "step": 329580 }, { "epoch": 128.0, "learning_rate": 2.933851132686084e-06, "loss": 0.0001, "step": 329590 }, { "epoch": 128.0, "learning_rate": 2.9333333333333338e-06, "loss": 0.0082, "step": 329600 }, { "epoch": 128.0, "eval_accuracy": 0.9526822558459422, "eval_loss": 0.38309401273727417, "eval_runtime": 8.2374, "eval_samples_per_second": 441.28, "eval_steps_per_second": 55.236, "step": 329600 }, { "epoch": 128.0, "learning_rate": 2.9328155339805825e-06, "loss": 0.0029, "step": 329610 }, { "epoch": 128.01, "learning_rate": 2.932297734627832e-06, "loss": 0.0942, "step": 329620 }, { "epoch": 128.01, "learning_rate": 2.9317799352750813e-06, "loss": 0.072, "step": 329630 }, { "epoch": 128.02, "learning_rate": 2.9312621359223305e-06, "loss": 0.0001, "step": 329640 }, { "epoch": 128.02, "learning_rate": 2.9307443365695797e-06, "loss": 0.0159, "step": 329650 }, { "epoch": 128.02, "learning_rate": 2.9302265372168285e-06, "loss": 0.0234, "step": 329660 }, { "epoch": 128.03, "learning_rate": 2.929708737864078e-06, "loss": 0.0112, "step": 329670 }, { "epoch": 128.03, "learning_rate": 2.929190938511327e-06, "loss": 0.0306, "step": 329680 }, { "epoch": 128.03, "learning_rate": 2.9286731391585765e-06, "loss": 0.0192, "step": 329690 }, { "epoch": 128.04, "learning_rate": 2.9281553398058253e-06, "loss": 0.0645, "step": 329700 }, { "epoch": 128.04, "learning_rate": 2.927637540453075e-06, "loss": 0.1185, "step": 329710 }, { "epoch": 128.05, "learning_rate": 2.9271197411003237e-06, "loss": 0.0852, "step": 329720 }, { "epoch": 128.05, "learning_rate": 2.9266019417475733e-06, "loss": 0.0721, "step": 329730 }, { "epoch": 128.05, "learning_rate": 2.926084142394822e-06, "loss": 0.0273, "step": 329740 }, { "epoch": 128.06, "learning_rate": 2.9255663430420717e-06, "loss": 0.0336, "step": 329750 }, { "epoch": 128.06, "learning_rate": 2.9250485436893205e-06, "loss": 0.0727, "step": 329760 }, { "epoch": 128.07, "learning_rate": 2.92453074433657e-06, "loss": 0.0087, "step": 329770 }, { "epoch": 128.07, "learning_rate": 2.924012944983819e-06, "loss": 0.012, "step": 329780 }, { "epoch": 128.07, "learning_rate": 2.9234951456310685e-06, "loss": 0.0996, "step": 329790 }, { "epoch": 128.08, "learning_rate": 2.9229773462783172e-06, "loss": 0.0245, "step": 329800 }, { "epoch": 128.08, "learning_rate": 2.922459546925567e-06, "loss": 0.0943, "step": 329810 }, { "epoch": 128.09, "learning_rate": 2.9219417475728156e-06, "loss": 0.0467, "step": 329820 }, { "epoch": 128.09, "learning_rate": 2.9214239482200652e-06, "loss": 0.0002, "step": 329830 }, { "epoch": 128.09, "learning_rate": 2.920906148867314e-06, "loss": 0.022, "step": 329840 }, { "epoch": 128.1, "learning_rate": 2.9203883495145636e-06, "loss": 0.0284, "step": 329850 }, { "epoch": 128.1, "learning_rate": 2.9198705501618124e-06, "loss": 0.0521, "step": 329860 }, { "epoch": 128.1, "learning_rate": 2.919352750809062e-06, "loss": 0.0546, "step": 329870 }, { "epoch": 128.11, "learning_rate": 2.918834951456311e-06, "loss": 0.036, "step": 329880 }, { "epoch": 128.11, "learning_rate": 2.9183171521035604e-06, "loss": 0.081, "step": 329890 }, { "epoch": 128.12, "learning_rate": 2.917799352750809e-06, "loss": 0.1636, "step": 329900 }, { "epoch": 128.12, "learning_rate": 2.917281553398059e-06, "loss": 0.1028, "step": 329910 }, { "epoch": 128.12, "learning_rate": 2.9167637540453076e-06, "loss": 0.0591, "step": 329920 }, { "epoch": 128.13, "learning_rate": 2.916245954692557e-06, "loss": 0.0001, "step": 329930 }, { "epoch": 128.13, "learning_rate": 2.915728155339806e-06, "loss": 0.1116, "step": 329940 }, { "epoch": 128.14, "learning_rate": 2.9152103559870556e-06, "loss": 0.0413, "step": 329950 }, { "epoch": 128.14, "learning_rate": 2.9146925566343043e-06, "loss": 0.0104, "step": 329960 }, { "epoch": 128.14, "learning_rate": 2.9141747572815535e-06, "loss": 0.0197, "step": 329970 }, { "epoch": 128.15, "learning_rate": 2.9136569579288027e-06, "loss": 0.001, "step": 329980 }, { "epoch": 128.15, "learning_rate": 2.913139158576052e-06, "loss": 0.0148, "step": 329990 }, { "epoch": 128.16, "learning_rate": 2.912621359223301e-06, "loss": 0.0536, "step": 330000 }, { "epoch": 128.16, "learning_rate": 2.9121035598705503e-06, "loss": 0.0207, "step": 330010 }, { "epoch": 128.16, "learning_rate": 2.9115857605177995e-06, "loss": 0.037, "step": 330020 }, { "epoch": 128.17, "learning_rate": 2.9110679611650487e-06, "loss": 0.0012, "step": 330030 }, { "epoch": 128.17, "learning_rate": 2.910550161812298e-06, "loss": 0.03, "step": 330040 }, { "epoch": 128.17, "learning_rate": 2.910032362459547e-06, "loss": 0.0228, "step": 330050 }, { "epoch": 128.18, "learning_rate": 2.9095145631067963e-06, "loss": 0.0091, "step": 330060 }, { "epoch": 128.18, "learning_rate": 2.9089967637540455e-06, "loss": 0.0805, "step": 330070 }, { "epoch": 128.19, "learning_rate": 2.9084789644012947e-06, "loss": 0.0024, "step": 330080 }, { "epoch": 128.19, "learning_rate": 2.907961165048544e-06, "loss": 0.0461, "step": 330090 }, { "epoch": 128.19, "learning_rate": 2.907443365695793e-06, "loss": 0.0445, "step": 330100 }, { "epoch": 128.2, "learning_rate": 2.9069255663430423e-06, "loss": 0.0139, "step": 330110 }, { "epoch": 128.2, "learning_rate": 2.9064077669902915e-06, "loss": 0.0117, "step": 330120 }, { "epoch": 128.21, "learning_rate": 2.9058899676375407e-06, "loss": 0.0018, "step": 330130 }, { "epoch": 128.21, "learning_rate": 2.9053721682847894e-06, "loss": 0.0192, "step": 330140 }, { "epoch": 128.21, "learning_rate": 2.904854368932039e-06, "loss": 0.0269, "step": 330150 }, { "epoch": 128.22, "learning_rate": 2.904336569579288e-06, "loss": 0.0857, "step": 330160 }, { "epoch": 128.22, "learning_rate": 2.9038187702265374e-06, "loss": 0.0351, "step": 330170 }, { "epoch": 128.23, "learning_rate": 2.903300970873786e-06, "loss": 0.0812, "step": 330180 }, { "epoch": 128.23, "learning_rate": 2.902783171521036e-06, "loss": 0.1412, "step": 330190 }, { "epoch": 128.23, "learning_rate": 2.9022653721682854e-06, "loss": 0.0661, "step": 330200 }, { "epoch": 128.24, "learning_rate": 2.901747572815534e-06, "loss": 0.0599, "step": 330210 }, { "epoch": 128.24, "learning_rate": 2.901229773462784e-06, "loss": 0.0743, "step": 330220 }, { "epoch": 128.24, "learning_rate": 2.9007119741100326e-06, "loss": 0.0004, "step": 330230 }, { "epoch": 128.25, "learning_rate": 2.9001941747572822e-06, "loss": 0.0303, "step": 330240 }, { "epoch": 128.25, "learning_rate": 2.899676375404531e-06, "loss": 0.0615, "step": 330250 }, { "epoch": 128.26, "learning_rate": 2.8991585760517806e-06, "loss": 0.0512, "step": 330260 }, { "epoch": 128.26, "learning_rate": 2.8986407766990294e-06, "loss": 0.0014, "step": 330270 }, { "epoch": 128.26, "learning_rate": 2.898122977346279e-06, "loss": 0.04, "step": 330280 }, { "epoch": 128.27, "learning_rate": 2.8976051779935278e-06, "loss": 0.0303, "step": 330290 }, { "epoch": 128.27, "learning_rate": 2.897087378640777e-06, "loss": 0.1007, "step": 330300 }, { "epoch": 128.28, "learning_rate": 2.896569579288026e-06, "loss": 0.0216, "step": 330310 }, { "epoch": 128.28, "learning_rate": 2.8960517799352753e-06, "loss": 0.0196, "step": 330320 }, { "epoch": 128.28, "learning_rate": 2.8955339805825245e-06, "loss": 0.0671, "step": 330330 }, { "epoch": 128.29, "learning_rate": 2.8950161812297737e-06, "loss": 0.0006, "step": 330340 }, { "epoch": 128.29, "learning_rate": 2.894498381877023e-06, "loss": 0.0712, "step": 330350 }, { "epoch": 128.3, "learning_rate": 2.893980582524272e-06, "loss": 0.0003, "step": 330360 }, { "epoch": 128.3, "learning_rate": 2.8934627831715213e-06, "loss": 0.0075, "step": 330370 }, { "epoch": 128.3, "learning_rate": 2.8929449838187705e-06, "loss": 0.0275, "step": 330380 }, { "epoch": 128.31, "learning_rate": 2.8924271844660197e-06, "loss": 0.0002, "step": 330390 }, { "epoch": 128.31, "learning_rate": 2.891909385113269e-06, "loss": 0.103, "step": 330400 }, { "epoch": 128.31, "learning_rate": 2.891391585760518e-06, "loss": 0.0395, "step": 330410 }, { "epoch": 128.32, "learning_rate": 2.8908737864077673e-06, "loss": 0.0978, "step": 330420 }, { "epoch": 128.32, "learning_rate": 2.8903559870550165e-06, "loss": 0.1139, "step": 330430 }, { "epoch": 128.33, "learning_rate": 2.8898381877022657e-06, "loss": 0.0748, "step": 330440 }, { "epoch": 128.33, "learning_rate": 2.889320388349515e-06, "loss": 0.1188, "step": 330450 }, { "epoch": 128.33, "learning_rate": 2.888802588996764e-06, "loss": 0.0663, "step": 330460 }, { "epoch": 128.34, "learning_rate": 2.888284789644013e-06, "loss": 0.0121, "step": 330470 }, { "epoch": 128.34, "learning_rate": 2.8877669902912625e-06, "loss": 0.021, "step": 330480 }, { "epoch": 128.35, "learning_rate": 2.8872491909385112e-06, "loss": 0.0436, "step": 330490 }, { "epoch": 128.35, "learning_rate": 2.886731391585761e-06, "loss": 0.0001, "step": 330500 }, { "epoch": 128.35, "learning_rate": 2.8862135922330096e-06, "loss": 0.1789, "step": 330510 }, { "epoch": 128.36, "learning_rate": 2.8856957928802592e-06, "loss": 0.1138, "step": 330520 }, { "epoch": 128.36, "learning_rate": 2.885177993527508e-06, "loss": 0.0004, "step": 330530 }, { "epoch": 128.37, "learning_rate": 2.8846601941747576e-06, "loss": 0.0659, "step": 330540 }, { "epoch": 128.37, "learning_rate": 2.8841423948220064e-06, "loss": 0.0826, "step": 330550 }, { "epoch": 128.37, "learning_rate": 2.883624595469256e-06, "loss": 0.0008, "step": 330560 }, { "epoch": 128.38, "learning_rate": 2.8831067961165048e-06, "loss": 0.135, "step": 330570 }, { "epoch": 128.38, "learning_rate": 2.8825889967637544e-06, "loss": 0.0121, "step": 330580 }, { "epoch": 128.38, "learning_rate": 2.882071197411003e-06, "loss": 0.0694, "step": 330590 }, { "epoch": 128.39, "learning_rate": 2.881553398058253e-06, "loss": 0.0872, "step": 330600 }, { "epoch": 128.39, "learning_rate": 2.8810355987055016e-06, "loss": 0.0397, "step": 330610 }, { "epoch": 128.4, "learning_rate": 2.880517799352751e-06, "loss": 0.0425, "step": 330620 }, { "epoch": 128.4, "learning_rate": 2.88e-06, "loss": 0.0297, "step": 330630 }, { "epoch": 128.4, "learning_rate": 2.8794822006472496e-06, "loss": 0.1196, "step": 330640 }, { "epoch": 128.41, "learning_rate": 2.8789644012944983e-06, "loss": 0.0396, "step": 330650 }, { "epoch": 128.41, "learning_rate": 2.878446601941748e-06, "loss": 0.0086, "step": 330660 }, { "epoch": 128.42, "learning_rate": 2.8779288025889967e-06, "loss": 0.0249, "step": 330670 }, { "epoch": 128.42, "learning_rate": 2.8774110032362464e-06, "loss": 0.0408, "step": 330680 }, { "epoch": 128.42, "learning_rate": 2.876893203883495e-06, "loss": 0.0469, "step": 330690 }, { "epoch": 128.43, "learning_rate": 2.8763754045307447e-06, "loss": 0.0337, "step": 330700 }, { "epoch": 128.43, "learning_rate": 2.8758576051779935e-06, "loss": 0.1987, "step": 330710 }, { "epoch": 128.43, "learning_rate": 2.875339805825243e-06, "loss": 0.0498, "step": 330720 }, { "epoch": 128.44, "learning_rate": 2.874822006472492e-06, "loss": 0.0091, "step": 330730 }, { "epoch": 128.44, "learning_rate": 2.8743042071197415e-06, "loss": 0.0921, "step": 330740 }, { "epoch": 128.45, "learning_rate": 2.8737864077669903e-06, "loss": 0.0001, "step": 330750 }, { "epoch": 128.45, "learning_rate": 2.87326860841424e-06, "loss": 0.0165, "step": 330760 }, { "epoch": 128.45, "learning_rate": 2.872750809061489e-06, "loss": 0.002, "step": 330770 }, { "epoch": 128.46, "learning_rate": 2.8722330097087383e-06, "loss": 0.0018, "step": 330780 }, { "epoch": 128.46, "learning_rate": 2.8717152103559875e-06, "loss": 0.0677, "step": 330790 }, { "epoch": 128.47, "learning_rate": 2.8711974110032363e-06, "loss": 0.1144, "step": 330800 }, { "epoch": 128.47, "learning_rate": 2.870679611650486e-06, "loss": 0.0484, "step": 330810 }, { "epoch": 128.47, "learning_rate": 2.8701618122977347e-06, "loss": 0.0114, "step": 330820 }, { "epoch": 128.48, "learning_rate": 2.8696440129449843e-06, "loss": 0.0009, "step": 330830 }, { "epoch": 128.48, "learning_rate": 2.869126213592233e-06, "loss": 0.038, "step": 330840 }, { "epoch": 128.49, "learning_rate": 2.8686084142394827e-06, "loss": 0.067, "step": 330850 }, { "epoch": 128.49, "learning_rate": 2.8680906148867314e-06, "loss": 0.0416, "step": 330860 }, { "epoch": 128.49, "learning_rate": 2.867572815533981e-06, "loss": 0.0801, "step": 330870 }, { "epoch": 128.5, "learning_rate": 2.86705501618123e-06, "loss": 0.021, "step": 330880 }, { "epoch": 128.5, "learning_rate": 2.8665372168284794e-06, "loss": 0.0206, "step": 330890 }, { "epoch": 128.5, "learning_rate": 2.866019417475728e-06, "loss": 0.0375, "step": 330900 }, { "epoch": 128.51, "learning_rate": 2.865501618122978e-06, "loss": 0.0164, "step": 330910 }, { "epoch": 128.51, "learning_rate": 2.8649838187702266e-06, "loss": 0.0001, "step": 330920 }, { "epoch": 128.52, "learning_rate": 2.8644660194174762e-06, "loss": 0.0707, "step": 330930 }, { "epoch": 128.52, "learning_rate": 2.863948220064725e-06, "loss": 0.0008, "step": 330940 }, { "epoch": 128.52, "learning_rate": 2.8634304207119746e-06, "loss": 0.0149, "step": 330950 }, { "epoch": 128.53, "learning_rate": 2.8629126213592234e-06, "loss": 0.0392, "step": 330960 }, { "epoch": 128.53, "learning_rate": 2.862394822006473e-06, "loss": 0.0208, "step": 330970 }, { "epoch": 128.54, "learning_rate": 2.8618770226537218e-06, "loss": 0.0279, "step": 330980 }, { "epoch": 128.54, "learning_rate": 2.8613592233009714e-06, "loss": 0.0632, "step": 330990 }, { "epoch": 128.54, "learning_rate": 2.86084142394822e-06, "loss": 0.0109, "step": 331000 }, { "epoch": 128.55, "learning_rate": 2.8603236245954698e-06, "loss": 0.0185, "step": 331010 }, { "epoch": 128.55, "learning_rate": 2.8598058252427185e-06, "loss": 0.0508, "step": 331020 }, { "epoch": 128.56, "learning_rate": 2.859288025889968e-06, "loss": 0.0003, "step": 331030 }, { "epoch": 128.56, "learning_rate": 2.858770226537217e-06, "loss": 0.0519, "step": 331040 }, { "epoch": 128.56, "learning_rate": 2.8582524271844665e-06, "loss": 0.0127, "step": 331050 }, { "epoch": 128.57, "learning_rate": 2.8577346278317153e-06, "loss": 0.0099, "step": 331060 }, { "epoch": 128.57, "learning_rate": 2.857216828478965e-06, "loss": 0.0678, "step": 331070 }, { "epoch": 128.57, "learning_rate": 2.8566990291262137e-06, "loss": 0.0155, "step": 331080 }, { "epoch": 128.58, "learning_rate": 2.8561812297734633e-06, "loss": 0.1311, "step": 331090 }, { "epoch": 128.58, "learning_rate": 2.855663430420712e-06, "loss": 0.0006, "step": 331100 }, { "epoch": 128.59, "learning_rate": 2.8551456310679613e-06, "loss": 0.0395, "step": 331110 }, { "epoch": 128.59, "learning_rate": 2.8546278317152105e-06, "loss": 0.0148, "step": 331120 }, { "epoch": 128.59, "learning_rate": 2.8541100323624597e-06, "loss": 0.0239, "step": 331130 }, { "epoch": 128.6, "learning_rate": 2.853592233009709e-06, "loss": 0.0091, "step": 331140 }, { "epoch": 128.6, "learning_rate": 2.853074433656958e-06, "loss": 0.0093, "step": 331150 }, { "epoch": 128.61, "learning_rate": 2.8525566343042073e-06, "loss": 0.0943, "step": 331160 }, { "epoch": 128.61, "learning_rate": 2.8520388349514565e-06, "loss": 0.0168, "step": 331170 }, { "epoch": 128.61, "learning_rate": 2.8515210355987057e-06, "loss": 0.0303, "step": 331180 }, { "epoch": 128.62, "learning_rate": 2.851003236245955e-06, "loss": 0.205, "step": 331190 }, { "epoch": 128.62, "learning_rate": 2.850485436893204e-06, "loss": 0.0001, "step": 331200 }, { "epoch": 128.63, "learning_rate": 2.8499676375404532e-06, "loss": 0.0023, "step": 331210 }, { "epoch": 128.63, "learning_rate": 2.8494498381877024e-06, "loss": 0.0097, "step": 331220 }, { "epoch": 128.63, "learning_rate": 2.8489320388349516e-06, "loss": 0.0124, "step": 331230 }, { "epoch": 128.64, "learning_rate": 2.848414239482201e-06, "loss": 0.0302, "step": 331240 }, { "epoch": 128.64, "learning_rate": 2.84789644012945e-06, "loss": 0.0002, "step": 331250 }, { "epoch": 128.64, "learning_rate": 2.847378640776699e-06, "loss": 0.0201, "step": 331260 }, { "epoch": 128.65, "learning_rate": 2.8468608414239484e-06, "loss": 0.0579, "step": 331270 }, { "epoch": 128.65, "learning_rate": 2.846343042071197e-06, "loss": 0.0355, "step": 331280 }, { "epoch": 128.66, "learning_rate": 2.845825242718447e-06, "loss": 0.0594, "step": 331290 }, { "epoch": 128.66, "learning_rate": 2.8453074433656956e-06, "loss": 0.0417, "step": 331300 }, { "epoch": 128.66, "learning_rate": 2.844789644012945e-06, "loss": 0.0494, "step": 331310 }, { "epoch": 128.67, "learning_rate": 2.844271844660195e-06, "loss": 0.0028, "step": 331320 }, { "epoch": 128.67, "learning_rate": 2.8437540453074436e-06, "loss": 0.0001, "step": 331330 }, { "epoch": 128.68, "learning_rate": 2.843236245954693e-06, "loss": 0.0784, "step": 331340 }, { "epoch": 128.68, "learning_rate": 2.842718446601942e-06, "loss": 0.1356, "step": 331350 }, { "epoch": 128.68, "learning_rate": 2.8422006472491916e-06, "loss": 0.0018, "step": 331360 }, { "epoch": 128.69, "learning_rate": 2.8416828478964403e-06, "loss": 0.0715, "step": 331370 }, { "epoch": 128.69, "learning_rate": 2.84116504854369e-06, "loss": 0.0001, "step": 331380 }, { "epoch": 128.7, "learning_rate": 2.8406472491909387e-06, "loss": 0.1034, "step": 331390 }, { "epoch": 128.7, "learning_rate": 2.8401294498381884e-06, "loss": 0.0017, "step": 331400 }, { "epoch": 128.7, "learning_rate": 2.839611650485437e-06, "loss": 0.0066, "step": 331410 }, { "epoch": 128.71, "learning_rate": 2.8390938511326867e-06, "loss": 0.0432, "step": 331420 }, { "epoch": 128.71, "learning_rate": 2.8385760517799355e-06, "loss": 0.021, "step": 331430 }, { "epoch": 128.71, "learning_rate": 2.8380582524271847e-06, "loss": 0.0198, "step": 331440 }, { "epoch": 128.72, "learning_rate": 2.837540453074434e-06, "loss": 0.0292, "step": 331450 }, { "epoch": 128.72, "learning_rate": 2.837022653721683e-06, "loss": 0.0178, "step": 331460 }, { "epoch": 128.73, "learning_rate": 2.8365048543689323e-06, "loss": 0.0001, "step": 331470 }, { "epoch": 128.73, "learning_rate": 2.8359870550161815e-06, "loss": 0.0001, "step": 331480 }, { "epoch": 128.73, "learning_rate": 2.8354692556634307e-06, "loss": 0.0012, "step": 331490 }, { "epoch": 128.74, "learning_rate": 2.83495145631068e-06, "loss": 0.0101, "step": 331500 }, { "epoch": 128.74, "learning_rate": 2.834433656957929e-06, "loss": 0.0003, "step": 331510 }, { "epoch": 128.75, "learning_rate": 2.8339158576051783e-06, "loss": 0.028, "step": 331520 }, { "epoch": 128.75, "learning_rate": 2.8333980582524275e-06, "loss": 0.0124, "step": 331530 }, { "epoch": 128.75, "learning_rate": 2.8328802588996767e-06, "loss": 0.0366, "step": 331540 }, { "epoch": 128.76, "learning_rate": 2.832362459546926e-06, "loss": 0.0847, "step": 331550 }, { "epoch": 128.76, "learning_rate": 2.831844660194175e-06, "loss": 0.0001, "step": 331560 }, { "epoch": 128.77, "learning_rate": 2.8313268608414242e-06, "loss": 0.0248, "step": 331570 }, { "epoch": 128.77, "learning_rate": 2.8308090614886734e-06, "loss": 0.0566, "step": 331580 }, { "epoch": 128.77, "learning_rate": 2.8302912621359226e-06, "loss": 0.0112, "step": 331590 }, { "epoch": 128.78, "learning_rate": 2.829773462783172e-06, "loss": 0.0376, "step": 331600 }, { "epoch": 128.78, "learning_rate": 2.8292556634304206e-06, "loss": 0.0003, "step": 331610 }, { "epoch": 128.78, "learning_rate": 2.8287378640776702e-06, "loss": 0.0282, "step": 331620 }, { "epoch": 128.79, "learning_rate": 2.828220064724919e-06, "loss": 0.0475, "step": 331630 }, { "epoch": 128.79, "learning_rate": 2.8277022653721686e-06, "loss": 0.0207, "step": 331640 }, { "epoch": 128.8, "learning_rate": 2.8271844660194174e-06, "loss": 0.0338, "step": 331650 }, { "epoch": 128.8, "learning_rate": 2.826666666666667e-06, "loss": 0.0465, "step": 331660 }, { "epoch": 128.8, "learning_rate": 2.8261488673139158e-06, "loss": 0.0456, "step": 331670 }, { "epoch": 128.81, "learning_rate": 2.8256310679611654e-06, "loss": 0.1323, "step": 331680 }, { "epoch": 128.81, "learning_rate": 2.825113268608414e-06, "loss": 0.022, "step": 331690 }, { "epoch": 128.82, "learning_rate": 2.8245954692556638e-06, "loss": 0.0099, "step": 331700 }, { "epoch": 128.82, "learning_rate": 2.8240776699029125e-06, "loss": 0.1346, "step": 331710 }, { "epoch": 128.82, "learning_rate": 2.823559870550162e-06, "loss": 0.0221, "step": 331720 }, { "epoch": 128.83, "learning_rate": 2.823042071197411e-06, "loss": 0.0013, "step": 331730 }, { "epoch": 128.83, "learning_rate": 2.8225242718446605e-06, "loss": 0.0614, "step": 331740 }, { "epoch": 128.83, "learning_rate": 2.8220064724919093e-06, "loss": 0.0006, "step": 331750 }, { "epoch": 128.84, "learning_rate": 2.821488673139159e-06, "loss": 0.1223, "step": 331760 }, { "epoch": 128.84, "learning_rate": 2.8209708737864077e-06, "loss": 0.1119, "step": 331770 }, { "epoch": 128.85, "learning_rate": 2.8204530744336573e-06, "loss": 0.0587, "step": 331780 }, { "epoch": 128.85, "learning_rate": 2.819935275080906e-06, "loss": 0.0755, "step": 331790 }, { "epoch": 128.85, "learning_rate": 2.8194174757281557e-06, "loss": 0.0717, "step": 331800 }, { "epoch": 128.86, "learning_rate": 2.8188996763754045e-06, "loss": 0.0578, "step": 331810 }, { "epoch": 128.86, "learning_rate": 2.818381877022654e-06, "loss": 0.1022, "step": 331820 }, { "epoch": 128.87, "learning_rate": 2.817864077669903e-06, "loss": 0.0101, "step": 331830 }, { "epoch": 128.87, "learning_rate": 2.8173462783171525e-06, "loss": 0.0246, "step": 331840 }, { "epoch": 128.87, "learning_rate": 2.8168284789644013e-06, "loss": 0.1435, "step": 331850 }, { "epoch": 128.88, "learning_rate": 2.816310679611651e-06, "loss": 0.0075, "step": 331860 }, { "epoch": 128.88, "learning_rate": 2.8157928802588997e-06, "loss": 0.0084, "step": 331870 }, { "epoch": 128.89, "learning_rate": 2.8152750809061493e-06, "loss": 0.0356, "step": 331880 }, { "epoch": 128.89, "learning_rate": 2.8147572815533985e-06, "loss": 0.0474, "step": 331890 }, { "epoch": 128.89, "learning_rate": 2.8142394822006477e-06, "loss": 0.0001, "step": 331900 }, { "epoch": 128.9, "learning_rate": 2.813721682847897e-06, "loss": 0.0291, "step": 331910 }, { "epoch": 128.9, "learning_rate": 2.813203883495146e-06, "loss": 0.0108, "step": 331920 }, { "epoch": 128.9, "learning_rate": 2.8126860841423952e-06, "loss": 0.0186, "step": 331930 }, { "epoch": 128.91, "learning_rate": 2.812168284789644e-06, "loss": 0.0913, "step": 331940 }, { "epoch": 128.91, "learning_rate": 2.8116504854368936e-06, "loss": 0.0393, "step": 331950 }, { "epoch": 128.92, "learning_rate": 2.8111326860841424e-06, "loss": 0.0594, "step": 331960 }, { "epoch": 128.92, "learning_rate": 2.810614886731392e-06, "loss": 0.0465, "step": 331970 }, { "epoch": 128.92, "learning_rate": 2.810097087378641e-06, "loss": 0.0312, "step": 331980 }, { "epoch": 128.93, "learning_rate": 2.8095792880258904e-06, "loss": 0.0001, "step": 331990 }, { "epoch": 128.93, "learning_rate": 2.809061488673139e-06, "loss": 0.0037, "step": 332000 }, { "epoch": 128.94, "learning_rate": 2.808543689320389e-06, "loss": 0.0124, "step": 332010 }, { "epoch": 128.94, "learning_rate": 2.8080258899676376e-06, "loss": 0.0756, "step": 332020 }, { "epoch": 128.94, "learning_rate": 2.807508090614887e-06, "loss": 0.0006, "step": 332030 }, { "epoch": 128.95, "learning_rate": 2.806990291262136e-06, "loss": 0.1215, "step": 332040 }, { "epoch": 128.95, "learning_rate": 2.8064724919093856e-06, "loss": 0.0124, "step": 332050 }, { "epoch": 128.96, "learning_rate": 2.8059546925566343e-06, "loss": 0.0162, "step": 332060 }, { "epoch": 128.96, "learning_rate": 2.805436893203884e-06, "loss": 0.0734, "step": 332070 }, { "epoch": 128.96, "learning_rate": 2.8049190938511327e-06, "loss": 0.0257, "step": 332080 }, { "epoch": 128.97, "learning_rate": 2.8044012944983824e-06, "loss": 0.021, "step": 332090 }, { "epoch": 128.97, "learning_rate": 2.803883495145631e-06, "loss": 0.0037, "step": 332100 }, { "epoch": 128.97, "learning_rate": 2.8033656957928807e-06, "loss": 0.1202, "step": 332110 }, { "epoch": 128.98, "learning_rate": 2.8028478964401295e-06, "loss": 0.0494, "step": 332120 }, { "epoch": 128.98, "learning_rate": 2.802330097087379e-06, "loss": 0.0147, "step": 332130 }, { "epoch": 128.99, "learning_rate": 2.801812297734628e-06, "loss": 0.0496, "step": 332140 }, { "epoch": 128.99, "learning_rate": 2.8012944983818775e-06, "loss": 0.0618, "step": 332150 }, { "epoch": 128.99, "learning_rate": 2.8007766990291263e-06, "loss": 0.1299, "step": 332160 }, { "epoch": 129.0, "learning_rate": 2.800258899676376e-06, "loss": 0.0737, "step": 332170 }, { "epoch": 129.0, "eval_accuracy": 0.9524071526822558, "eval_loss": 0.3891226649284363, "eval_runtime": 8.3068, "eval_samples_per_second": 437.593, "eval_steps_per_second": 54.774, "step": 332175 }, { "epoch": 129.0, "learning_rate": 2.7997411003236247e-06, "loss": 0.0097, "step": 332180 }, { "epoch": 129.01, "learning_rate": 2.7992233009708743e-06, "loss": 0.0341, "step": 332190 }, { "epoch": 129.01, "learning_rate": 2.798705501618123e-06, "loss": 0.0363, "step": 332200 }, { "epoch": 129.01, "learning_rate": 2.7981877022653727e-06, "loss": 0.0202, "step": 332210 }, { "epoch": 129.02, "learning_rate": 2.7976699029126215e-06, "loss": 0.0275, "step": 332220 }, { "epoch": 129.02, "learning_rate": 2.797152103559871e-06, "loss": 0.0012, "step": 332230 }, { "epoch": 129.03, "learning_rate": 2.79663430420712e-06, "loss": 0.0092, "step": 332240 }, { "epoch": 129.03, "learning_rate": 2.796116504854369e-06, "loss": 0.0102, "step": 332250 }, { "epoch": 129.03, "learning_rate": 2.7955987055016182e-06, "loss": 0.0632, "step": 332260 }, { "epoch": 129.04, "learning_rate": 2.7950809061488674e-06, "loss": 0.0442, "step": 332270 }, { "epoch": 129.04, "learning_rate": 2.7945631067961166e-06, "loss": 0.0622, "step": 332280 }, { "epoch": 129.04, "learning_rate": 2.794045307443366e-06, "loss": 0.0132, "step": 332290 }, { "epoch": 129.05, "learning_rate": 2.793527508090615e-06, "loss": 0.038, "step": 332300 }, { "epoch": 129.05, "learning_rate": 2.793009708737864e-06, "loss": 0.0166, "step": 332310 }, { "epoch": 129.06, "learning_rate": 2.7924919093851134e-06, "loss": 0.0301, "step": 332320 }, { "epoch": 129.06, "learning_rate": 2.7919741100323626e-06, "loss": 0.0696, "step": 332330 }, { "epoch": 129.06, "learning_rate": 2.791456310679612e-06, "loss": 0.038, "step": 332340 }, { "epoch": 129.07, "learning_rate": 2.790938511326861e-06, "loss": 0.1081, "step": 332350 }, { "epoch": 129.07, "learning_rate": 2.79042071197411e-06, "loss": 0.0107, "step": 332360 }, { "epoch": 129.08, "learning_rate": 2.7899029126213594e-06, "loss": 0.0089, "step": 332370 }, { "epoch": 129.08, "learning_rate": 2.7893851132686086e-06, "loss": 0.0473, "step": 332380 }, { "epoch": 129.08, "learning_rate": 2.7888673139158578e-06, "loss": 0.1061, "step": 332390 }, { "epoch": 129.09, "learning_rate": 2.788349514563107e-06, "loss": 0.002, "step": 332400 }, { "epoch": 129.09, "learning_rate": 2.787831715210356e-06, "loss": 0.0514, "step": 332410 }, { "epoch": 129.1, "learning_rate": 2.787313915857605e-06, "loss": 0.0999, "step": 332420 }, { "epoch": 129.1, "learning_rate": 2.7867961165048545e-06, "loss": 0.0129, "step": 332430 }, { "epoch": 129.1, "learning_rate": 2.786278317152104e-06, "loss": 0.009, "step": 332440 }, { "epoch": 129.11, "learning_rate": 2.785760517799353e-06, "loss": 0.0443, "step": 332450 }, { "epoch": 129.11, "learning_rate": 2.7852427184466026e-06, "loss": 0.1636, "step": 332460 }, { "epoch": 129.11, "learning_rate": 2.7847249190938513e-06, "loss": 0.2318, "step": 332470 }, { "epoch": 129.12, "learning_rate": 2.784207119741101e-06, "loss": 0.0817, "step": 332480 }, { "epoch": 129.12, "learning_rate": 2.7836893203883497e-06, "loss": 0.0103, "step": 332490 }, { "epoch": 129.13, "learning_rate": 2.7831715210355993e-06, "loss": 0.1351, "step": 332500 }, { "epoch": 129.13, "learning_rate": 2.782653721682848e-06, "loss": 0.0015, "step": 332510 }, { "epoch": 129.13, "learning_rate": 2.7821359223300977e-06, "loss": 0.2429, "step": 332520 }, { "epoch": 129.14, "learning_rate": 2.7816181229773465e-06, "loss": 0.0102, "step": 332530 }, { "epoch": 129.14, "learning_rate": 2.781100323624596e-06, "loss": 0.0349, "step": 332540 }, { "epoch": 129.15, "learning_rate": 2.780582524271845e-06, "loss": 0.0002, "step": 332550 }, { "epoch": 129.15, "learning_rate": 2.7800647249190945e-06, "loss": 0.0004, "step": 332560 }, { "epoch": 129.15, "learning_rate": 2.7795469255663433e-06, "loss": 0.1209, "step": 332570 }, { "epoch": 129.16, "learning_rate": 2.7790291262135925e-06, "loss": 0.0657, "step": 332580 }, { "epoch": 129.16, "learning_rate": 2.7785113268608417e-06, "loss": 0.0337, "step": 332590 }, { "epoch": 129.17, "learning_rate": 2.777993527508091e-06, "loss": 0.0449, "step": 332600 }, { "epoch": 129.17, "learning_rate": 2.77747572815534e-06, "loss": 0.0607, "step": 332610 }, { "epoch": 129.17, "learning_rate": 2.7769579288025892e-06, "loss": 0.1453, "step": 332620 }, { "epoch": 129.18, "learning_rate": 2.7764401294498384e-06, "loss": 0.0315, "step": 332630 }, { "epoch": 129.18, "learning_rate": 2.7759223300970876e-06, "loss": 0.004, "step": 332640 }, { "epoch": 129.18, "learning_rate": 2.775404530744337e-06, "loss": 0.0079, "step": 332650 }, { "epoch": 129.19, "learning_rate": 2.774886731391586e-06, "loss": 0.0343, "step": 332660 }, { "epoch": 129.19, "learning_rate": 2.7743689320388352e-06, "loss": 0.0004, "step": 332670 }, { "epoch": 129.2, "learning_rate": 2.7738511326860844e-06, "loss": 0.1294, "step": 332680 }, { "epoch": 129.2, "learning_rate": 2.7733333333333336e-06, "loss": 0.0005, "step": 332690 }, { "epoch": 129.2, "learning_rate": 2.772815533980583e-06, "loss": 0.0404, "step": 332700 }, { "epoch": 129.21, "learning_rate": 2.772297734627832e-06, "loss": 0.0004, "step": 332710 }, { "epoch": 129.21, "learning_rate": 2.771779935275081e-06, "loss": 0.0186, "step": 332720 }, { "epoch": 129.22, "learning_rate": 2.7712621359223304e-06, "loss": 0.0523, "step": 332730 }, { "epoch": 129.22, "learning_rate": 2.7707443365695796e-06, "loss": 0.0233, "step": 332740 }, { "epoch": 129.22, "learning_rate": 2.7702265372168283e-06, "loss": 0.0116, "step": 332750 }, { "epoch": 129.23, "learning_rate": 2.769708737864078e-06, "loss": 0.034, "step": 332760 }, { "epoch": 129.23, "learning_rate": 2.7691909385113267e-06, "loss": 0.0007, "step": 332770 }, { "epoch": 129.23, "learning_rate": 2.7686731391585764e-06, "loss": 0.1027, "step": 332780 }, { "epoch": 129.24, "learning_rate": 2.768155339805825e-06, "loss": 0.0002, "step": 332790 }, { "epoch": 129.24, "learning_rate": 2.7676375404530747e-06, "loss": 0.0019, "step": 332800 }, { "epoch": 129.25, "learning_rate": 2.7671197411003235e-06, "loss": 0.0241, "step": 332810 }, { "epoch": 129.25, "learning_rate": 2.766601941747573e-06, "loss": 0.041, "step": 332820 }, { "epoch": 129.25, "learning_rate": 2.766084142394822e-06, "loss": 0.0931, "step": 332830 }, { "epoch": 129.26, "learning_rate": 2.7655663430420715e-06, "loss": 0.1435, "step": 332840 }, { "epoch": 129.26, "learning_rate": 2.7650485436893203e-06, "loss": 0.0001, "step": 332850 }, { "epoch": 129.27, "learning_rate": 2.76453074433657e-06, "loss": 0.0961, "step": 332860 }, { "epoch": 129.27, "learning_rate": 2.7640129449838187e-06, "loss": 0.034, "step": 332870 }, { "epoch": 129.27, "learning_rate": 2.7634951456310683e-06, "loss": 0.0096, "step": 332880 }, { "epoch": 129.28, "learning_rate": 2.762977346278317e-06, "loss": 0.0041, "step": 332890 }, { "epoch": 129.28, "learning_rate": 2.7624595469255667e-06, "loss": 0.019, "step": 332900 }, { "epoch": 129.29, "learning_rate": 2.7619417475728155e-06, "loss": 0.0069, "step": 332910 }, { "epoch": 129.29, "learning_rate": 2.761423948220065e-06, "loss": 0.0916, "step": 332920 }, { "epoch": 129.29, "learning_rate": 2.760906148867314e-06, "loss": 0.0132, "step": 332930 }, { "epoch": 129.3, "learning_rate": 2.7603883495145635e-06, "loss": 0.0106, "step": 332940 }, { "epoch": 129.3, "learning_rate": 2.7598705501618122e-06, "loss": 0.1662, "step": 332950 }, { "epoch": 129.3, "learning_rate": 2.759352750809062e-06, "loss": 0.0371, "step": 332960 }, { "epoch": 129.31, "learning_rate": 2.7588349514563106e-06, "loss": 0.0428, "step": 332970 }, { "epoch": 129.31, "learning_rate": 2.7583171521035602e-06, "loss": 0.0004, "step": 332980 }, { "epoch": 129.32, "learning_rate": 2.757799352750809e-06, "loss": 0.0846, "step": 332990 }, { "epoch": 129.32, "learning_rate": 2.7572815533980586e-06, "loss": 0.0683, "step": 333000 }, { "epoch": 129.32, "learning_rate": 2.756763754045308e-06, "loss": 0.0448, "step": 333010 }, { "epoch": 129.33, "learning_rate": 2.756245954692557e-06, "loss": 0.04, "step": 333020 }, { "epoch": 129.33, "learning_rate": 2.7557281553398062e-06, "loss": 0.0005, "step": 333030 }, { "epoch": 129.34, "learning_rate": 2.7552103559870554e-06, "loss": 0.0121, "step": 333040 }, { "epoch": 129.34, "learning_rate": 2.7546925566343046e-06, "loss": 0.0205, "step": 333050 }, { "epoch": 129.34, "learning_rate": 2.754174757281554e-06, "loss": 0.0301, "step": 333060 }, { "epoch": 129.35, "learning_rate": 2.753656957928803e-06, "loss": 0.0008, "step": 333070 }, { "epoch": 129.35, "learning_rate": 2.7531391585760518e-06, "loss": 0.1299, "step": 333080 }, { "epoch": 129.36, "learning_rate": 2.7526213592233014e-06, "loss": 0.071, "step": 333090 }, { "epoch": 129.36, "learning_rate": 2.75210355987055e-06, "loss": 0.0001, "step": 333100 }, { "epoch": 129.36, "learning_rate": 2.7515857605177998e-06, "loss": 0.1163, "step": 333110 }, { "epoch": 129.37, "learning_rate": 2.7510679611650485e-06, "loss": 0.0015, "step": 333120 }, { "epoch": 129.37, "learning_rate": 2.750550161812298e-06, "loss": 0.1002, "step": 333130 }, { "epoch": 129.37, "learning_rate": 2.750032362459547e-06, "loss": 0.0173, "step": 333140 }, { "epoch": 129.38, "learning_rate": 2.7495145631067966e-06, "loss": 0.069, "step": 333150 }, { "epoch": 129.38, "learning_rate": 2.7489967637540453e-06, "loss": 0.0124, "step": 333160 }, { "epoch": 129.39, "learning_rate": 2.748478964401295e-06, "loss": 0.0001, "step": 333170 }, { "epoch": 129.39, "learning_rate": 2.7479611650485437e-06, "loss": 0.0094, "step": 333180 }, { "epoch": 129.39, "learning_rate": 2.7474433656957933e-06, "loss": 0.0519, "step": 333190 }, { "epoch": 129.4, "learning_rate": 2.746925566343042e-06, "loss": 0.0108, "step": 333200 }, { "epoch": 129.4, "learning_rate": 2.7464077669902917e-06, "loss": 0.0603, "step": 333210 }, { "epoch": 129.41, "learning_rate": 2.7458899676375405e-06, "loss": 0.061, "step": 333220 }, { "epoch": 129.41, "learning_rate": 2.74537216828479e-06, "loss": 0.0075, "step": 333230 }, { "epoch": 129.41, "learning_rate": 2.744854368932039e-06, "loss": 0.0168, "step": 333240 }, { "epoch": 129.42, "learning_rate": 2.7443365695792885e-06, "loss": 0.0398, "step": 333250 }, { "epoch": 129.42, "learning_rate": 2.7438187702265373e-06, "loss": 0.0652, "step": 333260 }, { "epoch": 129.43, "learning_rate": 2.743300970873787e-06, "loss": 0.116, "step": 333270 }, { "epoch": 129.43, "learning_rate": 2.7427831715210357e-06, "loss": 0.018, "step": 333280 }, { "epoch": 129.43, "learning_rate": 2.7422653721682853e-06, "loss": 0.0627, "step": 333290 }, { "epoch": 129.44, "learning_rate": 2.741747572815534e-06, "loss": 0.0004, "step": 333300 }, { "epoch": 129.44, "learning_rate": 2.7412297734627837e-06, "loss": 0.0291, "step": 333310 }, { "epoch": 129.44, "learning_rate": 2.7407119741100324e-06, "loss": 0.029, "step": 333320 }, { "epoch": 129.45, "learning_rate": 2.740194174757282e-06, "loss": 0.0008, "step": 333330 }, { "epoch": 129.45, "learning_rate": 2.739676375404531e-06, "loss": 0.1041, "step": 333340 }, { "epoch": 129.46, "learning_rate": 2.7391585760517804e-06, "loss": 0.0066, "step": 333350 }, { "epoch": 129.46, "learning_rate": 2.7386407766990292e-06, "loss": 0.018, "step": 333360 }, { "epoch": 129.46, "learning_rate": 2.738122977346279e-06, "loss": 0.1292, "step": 333370 }, { "epoch": 129.47, "learning_rate": 2.7376051779935276e-06, "loss": 0.0138, "step": 333380 }, { "epoch": 129.47, "learning_rate": 2.737087378640777e-06, "loss": 0.0581, "step": 333390 }, { "epoch": 129.48, "learning_rate": 2.736569579288026e-06, "loss": 0.163, "step": 333400 }, { "epoch": 129.48, "learning_rate": 2.736051779935275e-06, "loss": 0.0142, "step": 333410 }, { "epoch": 129.48, "learning_rate": 2.7355339805825244e-06, "loss": 0.0677, "step": 333420 }, { "epoch": 129.49, "learning_rate": 2.7350161812297736e-06, "loss": 0.0201, "step": 333430 }, { "epoch": 129.49, "learning_rate": 2.7344983818770228e-06, "loss": 0.0148, "step": 333440 }, { "epoch": 129.5, "learning_rate": 2.733980582524272e-06, "loss": 0.0528, "step": 333450 }, { "epoch": 129.5, "learning_rate": 2.733462783171521e-06, "loss": 0.0157, "step": 333460 }, { "epoch": 129.5, "learning_rate": 2.7329449838187704e-06, "loss": 0.1195, "step": 333470 }, { "epoch": 129.51, "learning_rate": 2.7324271844660195e-06, "loss": 0.0104, "step": 333480 }, { "epoch": 129.51, "learning_rate": 2.7319093851132687e-06, "loss": 0.1421, "step": 333490 }, { "epoch": 129.51, "learning_rate": 2.731391585760518e-06, "loss": 0.0206, "step": 333500 }, { "epoch": 129.52, "learning_rate": 2.730873786407767e-06, "loss": 0.1186, "step": 333510 }, { "epoch": 129.52, "learning_rate": 2.7303559870550163e-06, "loss": 0.0002, "step": 333520 }, { "epoch": 129.53, "learning_rate": 2.7298381877022655e-06, "loss": 0.0528, "step": 333530 }, { "epoch": 129.53, "learning_rate": 2.7293203883495147e-06, "loss": 0.0531, "step": 333540 }, { "epoch": 129.53, "learning_rate": 2.728802588996764e-06, "loss": 0.0573, "step": 333550 }, { "epoch": 129.54, "learning_rate": 2.7282847896440135e-06, "loss": 0.0862, "step": 333560 }, { "epoch": 129.54, "learning_rate": 2.7277669902912623e-06, "loss": 0.0022, "step": 333570 }, { "epoch": 129.55, "learning_rate": 2.727249190938512e-06, "loss": 0.0037, "step": 333580 }, { "epoch": 129.55, "learning_rate": 2.7267313915857607e-06, "loss": 0.1027, "step": 333590 }, { "epoch": 129.55, "learning_rate": 2.7262135922330103e-06, "loss": 0.0556, "step": 333600 }, { "epoch": 129.56, "learning_rate": 2.725695792880259e-06, "loss": 0.1154, "step": 333610 }, { "epoch": 129.56, "learning_rate": 2.7251779935275087e-06, "loss": 0.1771, "step": 333620 }, { "epoch": 129.57, "learning_rate": 2.7246601941747575e-06, "loss": 0.0007, "step": 333630 }, { "epoch": 129.57, "learning_rate": 2.724142394822007e-06, "loss": 0.0653, "step": 333640 }, { "epoch": 129.57, "learning_rate": 2.723624595469256e-06, "loss": 0.0852, "step": 333650 }, { "epoch": 129.58, "learning_rate": 2.7231067961165055e-06, "loss": 0.0075, "step": 333660 }, { "epoch": 129.58, "learning_rate": 2.7225889967637542e-06, "loss": 0.0299, "step": 333670 }, { "epoch": 129.58, "learning_rate": 2.722071197411004e-06, "loss": 0.0001, "step": 333680 }, { "epoch": 129.59, "learning_rate": 2.7215533980582526e-06, "loss": 0.0554, "step": 333690 }, { "epoch": 129.59, "learning_rate": 2.7210355987055023e-06, "loss": 0.0223, "step": 333700 }, { "epoch": 129.6, "learning_rate": 2.720517799352751e-06, "loss": 0.0319, "step": 333710 }, { "epoch": 129.6, "learning_rate": 2.7200000000000002e-06, "loss": 0.1023, "step": 333720 }, { "epoch": 129.6, "learning_rate": 2.7194822006472494e-06, "loss": 0.0243, "step": 333730 }, { "epoch": 129.61, "learning_rate": 2.7189644012944986e-06, "loss": 0.0823, "step": 333740 }, { "epoch": 129.61, "learning_rate": 2.718446601941748e-06, "loss": 0.0553, "step": 333750 }, { "epoch": 129.62, "learning_rate": 2.717928802588997e-06, "loss": 0.0393, "step": 333760 }, { "epoch": 129.62, "learning_rate": 2.717411003236246e-06, "loss": 0.0595, "step": 333770 }, { "epoch": 129.62, "learning_rate": 2.7168932038834954e-06, "loss": 0.0631, "step": 333780 }, { "epoch": 129.63, "learning_rate": 2.7163754045307446e-06, "loss": 0.0002, "step": 333790 }, { "epoch": 129.63, "learning_rate": 2.7158576051779938e-06, "loss": 0.0173, "step": 333800 }, { "epoch": 129.63, "learning_rate": 2.715339805825243e-06, "loss": 0.0473, "step": 333810 }, { "epoch": 129.64, "learning_rate": 2.714822006472492e-06, "loss": 0.0325, "step": 333820 }, { "epoch": 129.64, "learning_rate": 2.7143042071197414e-06, "loss": 0.0671, "step": 333830 }, { "epoch": 129.65, "learning_rate": 2.7137864077669906e-06, "loss": 0.0062, "step": 333840 }, { "epoch": 129.65, "learning_rate": 2.7132686084142397e-06, "loss": 0.0805, "step": 333850 }, { "epoch": 129.65, "learning_rate": 2.712750809061489e-06, "loss": 0.0045, "step": 333860 }, { "epoch": 129.66, "learning_rate": 2.712233009708738e-06, "loss": 0.0325, "step": 333870 }, { "epoch": 129.66, "learning_rate": 2.7117152103559873e-06, "loss": 0.1343, "step": 333880 }, { "epoch": 129.67, "learning_rate": 2.711197411003236e-06, "loss": 0.0154, "step": 333890 }, { "epoch": 129.67, "learning_rate": 2.7106796116504857e-06, "loss": 0.1575, "step": 333900 }, { "epoch": 129.67, "learning_rate": 2.7101618122977345e-06, "loss": 0.0334, "step": 333910 }, { "epoch": 129.68, "learning_rate": 2.709644012944984e-06, "loss": 0.0289, "step": 333920 }, { "epoch": 129.68, "learning_rate": 2.709126213592233e-06, "loss": 0.0567, "step": 333930 }, { "epoch": 129.69, "learning_rate": 2.7086084142394825e-06, "loss": 0.081, "step": 333940 }, { "epoch": 129.69, "learning_rate": 2.7080906148867313e-06, "loss": 0.0316, "step": 333950 }, { "epoch": 129.69, "learning_rate": 2.707572815533981e-06, "loss": 0.0011, "step": 333960 }, { "epoch": 129.7, "learning_rate": 2.7070550161812297e-06, "loss": 0.0287, "step": 333970 }, { "epoch": 129.7, "learning_rate": 2.7065372168284793e-06, "loss": 0.0826, "step": 333980 }, { "epoch": 129.7, "learning_rate": 2.706019417475728e-06, "loss": 0.0098, "step": 333990 }, { "epoch": 129.71, "learning_rate": 2.7055016181229777e-06, "loss": 0.0096, "step": 334000 }, { "epoch": 129.71, "learning_rate": 2.7049838187702264e-06, "loss": 0.003, "step": 334010 }, { "epoch": 129.72, "learning_rate": 2.704466019417476e-06, "loss": 0.0202, "step": 334020 }, { "epoch": 129.72, "learning_rate": 2.703948220064725e-06, "loss": 0.0369, "step": 334030 }, { "epoch": 129.72, "learning_rate": 2.7034304207119744e-06, "loss": 0.0038, "step": 334040 }, { "epoch": 129.73, "learning_rate": 2.702912621359223e-06, "loss": 0.1625, "step": 334050 }, { "epoch": 129.73, "learning_rate": 2.702394822006473e-06, "loss": 0.0208, "step": 334060 }, { "epoch": 129.74, "learning_rate": 2.7018770226537216e-06, "loss": 0.0227, "step": 334070 }, { "epoch": 129.74, "learning_rate": 2.7013592233009712e-06, "loss": 0.0455, "step": 334080 }, { "epoch": 129.74, "learning_rate": 2.70084142394822e-06, "loss": 0.0972, "step": 334090 }, { "epoch": 129.75, "learning_rate": 2.7003236245954696e-06, "loss": 0.0288, "step": 334100 }, { "epoch": 129.75, "learning_rate": 2.6998058252427184e-06, "loss": 0.1005, "step": 334110 }, { "epoch": 129.76, "learning_rate": 2.699288025889968e-06, "loss": 0.0628, "step": 334120 }, { "epoch": 129.76, "learning_rate": 2.698770226537217e-06, "loss": 0.032, "step": 334130 }, { "epoch": 129.76, "learning_rate": 2.6982524271844664e-06, "loss": 0.0039, "step": 334140 }, { "epoch": 129.77, "learning_rate": 2.6977346278317156e-06, "loss": 0.015, "step": 334150 }, { "epoch": 129.77, "learning_rate": 2.6972168284789648e-06, "loss": 0.0103, "step": 334160 }, { "epoch": 129.77, "learning_rate": 2.696699029126214e-06, "loss": 0.0134, "step": 334170 }, { "epoch": 129.78, "learning_rate": 2.696181229773463e-06, "loss": 0.0969, "step": 334180 }, { "epoch": 129.78, "learning_rate": 2.6956634304207124e-06, "loss": 0.1614, "step": 334190 }, { "epoch": 129.79, "learning_rate": 2.6951456310679616e-06, "loss": 0.1065, "step": 334200 }, { "epoch": 129.79, "learning_rate": 2.6946278317152107e-06, "loss": 0.056, "step": 334210 }, { "epoch": 129.79, "learning_rate": 2.6941100323624595e-06, "loss": 0.0138, "step": 334220 }, { "epoch": 129.8, "learning_rate": 2.693592233009709e-06, "loss": 0.0233, "step": 334230 }, { "epoch": 129.8, "learning_rate": 2.693074433656958e-06, "loss": 0.0389, "step": 334240 }, { "epoch": 129.81, "learning_rate": 2.6925566343042075e-06, "loss": 0.0002, "step": 334250 }, { "epoch": 129.81, "learning_rate": 2.6920388349514563e-06, "loss": 0.0314, "step": 334260 }, { "epoch": 129.81, "learning_rate": 2.691521035598706e-06, "loss": 0.041, "step": 334270 }, { "epoch": 129.82, "learning_rate": 2.6910032362459547e-06, "loss": 0.0217, "step": 334280 }, { "epoch": 129.82, "learning_rate": 2.6904854368932043e-06, "loss": 0.1101, "step": 334290 }, { "epoch": 129.83, "learning_rate": 2.689967637540453e-06, "loss": 0.0088, "step": 334300 }, { "epoch": 129.83, "learning_rate": 2.6894498381877027e-06, "loss": 0.1055, "step": 334310 }, { "epoch": 129.83, "learning_rate": 2.6889320388349515e-06, "loss": 0.0628, "step": 334320 }, { "epoch": 129.84, "learning_rate": 2.688414239482201e-06, "loss": 0.0092, "step": 334330 }, { "epoch": 129.84, "learning_rate": 2.68789644012945e-06, "loss": 0.1199, "step": 334340 }, { "epoch": 129.84, "learning_rate": 2.6873786407766995e-06, "loss": 0.0001, "step": 334350 }, { "epoch": 129.85, "learning_rate": 2.6868608414239482e-06, "loss": 0.0139, "step": 334360 }, { "epoch": 129.85, "learning_rate": 2.686343042071198e-06, "loss": 0.0112, "step": 334370 }, { "epoch": 129.86, "learning_rate": 2.6858252427184466e-06, "loss": 0.0051, "step": 334380 }, { "epoch": 129.86, "learning_rate": 2.6853074433656962e-06, "loss": 0.0014, "step": 334390 }, { "epoch": 129.86, "learning_rate": 2.684789644012945e-06, "loss": 0.0172, "step": 334400 }, { "epoch": 129.87, "learning_rate": 2.6842718446601946e-06, "loss": 0.0686, "step": 334410 }, { "epoch": 129.87, "learning_rate": 2.6837540453074434e-06, "loss": 0.1174, "step": 334420 }, { "epoch": 129.88, "learning_rate": 2.683236245954693e-06, "loss": 0.0264, "step": 334430 }, { "epoch": 129.88, "learning_rate": 2.682718446601942e-06, "loss": 0.0979, "step": 334440 }, { "epoch": 129.88, "learning_rate": 2.6822006472491914e-06, "loss": 0.0112, "step": 334450 }, { "epoch": 129.89, "learning_rate": 2.68168284789644e-06, "loss": 0.0197, "step": 334460 }, { "epoch": 129.89, "learning_rate": 2.68116504854369e-06, "loss": 0.0001, "step": 334470 }, { "epoch": 129.9, "learning_rate": 2.6806472491909386e-06, "loss": 0.0571, "step": 334480 }, { "epoch": 129.9, "learning_rate": 2.680129449838188e-06, "loss": 0.0413, "step": 334490 }, { "epoch": 129.9, "learning_rate": 2.679611650485437e-06, "loss": 0.1219, "step": 334500 }, { "epoch": 129.91, "learning_rate": 2.6790938511326866e-06, "loss": 0.0281, "step": 334510 }, { "epoch": 129.91, "learning_rate": 2.6785760517799354e-06, "loss": 0.001, "step": 334520 }, { "epoch": 129.91, "learning_rate": 2.6780582524271845e-06, "loss": 0.0028, "step": 334530 }, { "epoch": 129.92, "learning_rate": 2.6775404530744337e-06, "loss": 0.098, "step": 334540 }, { "epoch": 129.92, "learning_rate": 2.677022653721683e-06, "loss": 0.0141, "step": 334550 }, { "epoch": 129.93, "learning_rate": 2.676504854368932e-06, "loss": 0.1808, "step": 334560 }, { "epoch": 129.93, "learning_rate": 2.6759870550161813e-06, "loss": 0.0095, "step": 334570 }, { "epoch": 129.93, "learning_rate": 2.6754692556634305e-06, "loss": 0.1653, "step": 334580 }, { "epoch": 129.94, "learning_rate": 2.6749514563106797e-06, "loss": 0.0088, "step": 334590 }, { "epoch": 129.94, "learning_rate": 2.674433656957929e-06, "loss": 0.1214, "step": 334600 }, { "epoch": 129.95, "learning_rate": 2.673915857605178e-06, "loss": 0.0099, "step": 334610 }, { "epoch": 129.95, "learning_rate": 2.6733980582524273e-06, "loss": 0.0419, "step": 334620 }, { "epoch": 129.95, "learning_rate": 2.6728802588996765e-06, "loss": 0.0176, "step": 334630 }, { "epoch": 129.96, "learning_rate": 2.6723624595469257e-06, "loss": 0.0131, "step": 334640 }, { "epoch": 129.96, "learning_rate": 2.671844660194175e-06, "loss": 0.0174, "step": 334650 }, { "epoch": 129.97, "learning_rate": 2.671326860841424e-06, "loss": 0.0061, "step": 334660 }, { "epoch": 129.97, "learning_rate": 2.6708090614886733e-06, "loss": 0.009, "step": 334670 }, { "epoch": 129.97, "learning_rate": 2.670291262135923e-06, "loss": 0.0221, "step": 334680 }, { "epoch": 129.98, "learning_rate": 2.6697734627831717e-06, "loss": 0.105, "step": 334690 }, { "epoch": 129.98, "learning_rate": 2.6692556634304213e-06, "loss": 0.0984, "step": 334700 }, { "epoch": 129.98, "learning_rate": 2.66873786407767e-06, "loss": 0.0098, "step": 334710 }, { "epoch": 129.99, "learning_rate": 2.6682200647249197e-06, "loss": 0.0608, "step": 334720 }, { "epoch": 129.99, "learning_rate": 2.6677022653721684e-06, "loss": 0.0326, "step": 334730 }, { "epoch": 130.0, "learning_rate": 2.667184466019418e-06, "loss": 0.0483, "step": 334740 }, { "epoch": 130.0, "learning_rate": 2.666666666666667e-06, "loss": 0.0517, "step": 334750 }, { "epoch": 130.0, "eval_accuracy": 0.9529573590096286, "eval_loss": 0.38386210799217224, "eval_runtime": 8.2239, "eval_samples_per_second": 442.005, "eval_steps_per_second": 55.327, "step": 334750 }, { "epoch": 130.0, "learning_rate": 2.6661488673139164e-06, "loss": 0.0329, "step": 334760 }, { "epoch": 130.01, "learning_rate": 2.6656310679611652e-06, "loss": 0.1286, "step": 334770 }, { "epoch": 130.01, "learning_rate": 2.665113268608415e-06, "loss": 0.0083, "step": 334780 }, { "epoch": 130.02, "learning_rate": 2.6645954692556636e-06, "loss": 0.0456, "step": 334790 }, { "epoch": 130.02, "learning_rate": 2.6640776699029132e-06, "loss": 0.0151, "step": 334800 }, { "epoch": 130.02, "learning_rate": 2.663559870550162e-06, "loss": 0.0001, "step": 334810 }, { "epoch": 130.03, "learning_rate": 2.6630420711974116e-06, "loss": 0.0518, "step": 334820 }, { "epoch": 130.03, "learning_rate": 2.6625242718446604e-06, "loss": 0.0088, "step": 334830 }, { "epoch": 130.03, "learning_rate": 2.66200647249191e-06, "loss": 0.0393, "step": 334840 }, { "epoch": 130.04, "learning_rate": 2.6614886731391588e-06, "loss": 0.02, "step": 334850 }, { "epoch": 130.04, "learning_rate": 2.660970873786408e-06, "loss": 0.0522, "step": 334860 }, { "epoch": 130.05, "learning_rate": 2.660453074433657e-06, "loss": 0.0083, "step": 334870 }, { "epoch": 130.05, "learning_rate": 2.6599352750809064e-06, "loss": 0.0482, "step": 334880 }, { "epoch": 130.05, "learning_rate": 2.6594174757281556e-06, "loss": 0.037, "step": 334890 }, { "epoch": 130.06, "learning_rate": 2.6588996763754047e-06, "loss": 0.0592, "step": 334900 }, { "epoch": 130.06, "learning_rate": 2.658381877022654e-06, "loss": 0.0001, "step": 334910 }, { "epoch": 130.07, "learning_rate": 2.657864077669903e-06, "loss": 0.0009, "step": 334920 }, { "epoch": 130.07, "learning_rate": 2.6573462783171523e-06, "loss": 0.1308, "step": 334930 }, { "epoch": 130.07, "learning_rate": 2.6568284789644015e-06, "loss": 0.0384, "step": 334940 }, { "epoch": 130.08, "learning_rate": 2.6563106796116507e-06, "loss": 0.0832, "step": 334950 }, { "epoch": 130.08, "learning_rate": 2.6557928802589e-06, "loss": 0.0793, "step": 334960 }, { "epoch": 130.09, "learning_rate": 2.655275080906149e-06, "loss": 0.0051, "step": 334970 }, { "epoch": 130.09, "learning_rate": 2.6547572815533983e-06, "loss": 0.0232, "step": 334980 }, { "epoch": 130.09, "learning_rate": 2.6542394822006475e-06, "loss": 0.1319, "step": 334990 }, { "epoch": 130.1, "learning_rate": 2.6537216828478967e-06, "loss": 0.0004, "step": 335000 }, { "epoch": 130.1, "learning_rate": 2.653203883495146e-06, "loss": 0.0248, "step": 335010 }, { "epoch": 130.1, "learning_rate": 2.652686084142395e-06, "loss": 0.0298, "step": 335020 }, { "epoch": 130.11, "learning_rate": 2.652168284789644e-06, "loss": 0.0462, "step": 335030 }, { "epoch": 130.11, "learning_rate": 2.6516504854368935e-06, "loss": 0.0063, "step": 335040 }, { "epoch": 130.12, "learning_rate": 2.6511326860841422e-06, "loss": 0.0452, "step": 335050 }, { "epoch": 130.12, "learning_rate": 2.650614886731392e-06, "loss": 0.0719, "step": 335060 }, { "epoch": 130.12, "learning_rate": 2.6500970873786406e-06, "loss": 0.017, "step": 335070 }, { "epoch": 130.13, "learning_rate": 2.6495792880258902e-06, "loss": 0.0245, "step": 335080 }, { "epoch": 130.13, "learning_rate": 2.649061488673139e-06, "loss": 0.0054, "step": 335090 }, { "epoch": 130.14, "learning_rate": 2.6485436893203886e-06, "loss": 0.0822, "step": 335100 }, { "epoch": 130.14, "learning_rate": 2.6480258899676374e-06, "loss": 0.0338, "step": 335110 }, { "epoch": 130.14, "learning_rate": 2.647508090614887e-06, "loss": 0.0332, "step": 335120 }, { "epoch": 130.15, "learning_rate": 2.646990291262136e-06, "loss": 0.1302, "step": 335130 }, { "epoch": 130.15, "learning_rate": 2.6464724919093854e-06, "loss": 0.0479, "step": 335140 }, { "epoch": 130.16, "learning_rate": 2.645954692556634e-06, "loss": 0.0499, "step": 335150 }, { "epoch": 130.16, "learning_rate": 2.645436893203884e-06, "loss": 0.031, "step": 335160 }, { "epoch": 130.16, "learning_rate": 2.6449190938511326e-06, "loss": 0.1055, "step": 335170 }, { "epoch": 130.17, "learning_rate": 2.644401294498382e-06, "loss": 0.0001, "step": 335180 }, { "epoch": 130.17, "learning_rate": 2.643883495145631e-06, "loss": 0.0348, "step": 335190 }, { "epoch": 130.17, "learning_rate": 2.6433656957928806e-06, "loss": 0.1096, "step": 335200 }, { "epoch": 130.18, "learning_rate": 2.6428478964401294e-06, "loss": 0.1239, "step": 335210 }, { "epoch": 130.18, "learning_rate": 2.642330097087379e-06, "loss": 0.0914, "step": 335220 }, { "epoch": 130.19, "learning_rate": 2.6418122977346277e-06, "loss": 0.0106, "step": 335230 }, { "epoch": 130.19, "learning_rate": 2.6412944983818774e-06, "loss": 0.0231, "step": 335240 }, { "epoch": 130.19, "learning_rate": 2.6407766990291266e-06, "loss": 0.0115, "step": 335250 }, { "epoch": 130.2, "learning_rate": 2.6402588996763757e-06, "loss": 0.0266, "step": 335260 }, { "epoch": 130.2, "learning_rate": 2.639741100323625e-06, "loss": 0.0041, "step": 335270 }, { "epoch": 130.21, "learning_rate": 2.639223300970874e-06, "loss": 0.1669, "step": 335280 }, { "epoch": 130.21, "learning_rate": 2.6387055016181233e-06, "loss": 0.0883, "step": 335290 }, { "epoch": 130.21, "learning_rate": 2.6381877022653725e-06, "loss": 0.0153, "step": 335300 }, { "epoch": 130.22, "learning_rate": 2.6376699029126217e-06, "loss": 0.0221, "step": 335310 }, { "epoch": 130.22, "learning_rate": 2.637152103559871e-06, "loss": 0.0596, "step": 335320 }, { "epoch": 130.23, "learning_rate": 2.63663430420712e-06, "loss": 0.0172, "step": 335330 }, { "epoch": 130.23, "learning_rate": 2.6361165048543693e-06, "loss": 0.0661, "step": 335340 }, { "epoch": 130.23, "learning_rate": 2.6355987055016185e-06, "loss": 0.0269, "step": 335350 }, { "epoch": 130.24, "learning_rate": 2.6350809061488673e-06, "loss": 0.011, "step": 335360 }, { "epoch": 130.24, "learning_rate": 2.634563106796117e-06, "loss": 0.0201, "step": 335370 }, { "epoch": 130.24, "learning_rate": 2.6340453074433657e-06, "loss": 0.1614, "step": 335380 }, { "epoch": 130.25, "learning_rate": 2.6335275080906153e-06, "loss": 0.017, "step": 335390 }, { "epoch": 130.25, "learning_rate": 2.633009708737864e-06, "loss": 0.1881, "step": 335400 }, { "epoch": 130.26, "learning_rate": 2.6324919093851137e-06, "loss": 0.1032, "step": 335410 }, { "epoch": 130.26, "learning_rate": 2.6319741100323624e-06, "loss": 0.0364, "step": 335420 }, { "epoch": 130.26, "learning_rate": 2.631456310679612e-06, "loss": 0.0456, "step": 335430 }, { "epoch": 130.27, "learning_rate": 2.630938511326861e-06, "loss": 0.0101, "step": 335440 }, { "epoch": 130.27, "learning_rate": 2.6304207119741104e-06, "loss": 0.0303, "step": 335450 }, { "epoch": 130.28, "learning_rate": 2.6299029126213592e-06, "loss": 0.0081, "step": 335460 }, { "epoch": 130.28, "learning_rate": 2.629385113268609e-06, "loss": 0.0448, "step": 335470 }, { "epoch": 130.28, "learning_rate": 2.6288673139158576e-06, "loss": 0.0465, "step": 335480 }, { "epoch": 130.29, "learning_rate": 2.6283495145631072e-06, "loss": 0.0195, "step": 335490 }, { "epoch": 130.29, "learning_rate": 2.627831715210356e-06, "loss": 0.0961, "step": 335500 }, { "epoch": 130.3, "learning_rate": 2.6273139158576056e-06, "loss": 0.0196, "step": 335510 }, { "epoch": 130.3, "learning_rate": 2.6267961165048544e-06, "loss": 0.0357, "step": 335520 }, { "epoch": 130.3, "learning_rate": 2.626278317152104e-06, "loss": 0.0241, "step": 335530 }, { "epoch": 130.31, "learning_rate": 2.6257605177993528e-06, "loss": 0.0077, "step": 335540 }, { "epoch": 130.31, "learning_rate": 2.6252427184466024e-06, "loss": 0.0495, "step": 335550 }, { "epoch": 130.31, "learning_rate": 2.624724919093851e-06, "loss": 0.0528, "step": 335560 }, { "epoch": 130.32, "learning_rate": 2.6242071197411008e-06, "loss": 0.0125, "step": 335570 }, { "epoch": 130.32, "learning_rate": 2.6236893203883496e-06, "loss": 0.1282, "step": 335580 }, { "epoch": 130.33, "learning_rate": 2.623171521035599e-06, "loss": 0.0001, "step": 335590 }, { "epoch": 130.33, "learning_rate": 2.622653721682848e-06, "loss": 0.0202, "step": 335600 }, { "epoch": 130.33, "learning_rate": 2.6221359223300976e-06, "loss": 0.0659, "step": 335610 }, { "epoch": 130.34, "learning_rate": 2.6216181229773463e-06, "loss": 0.0096, "step": 335620 }, { "epoch": 130.34, "learning_rate": 2.621100323624596e-06, "loss": 0.1821, "step": 335630 }, { "epoch": 130.35, "learning_rate": 2.6205825242718447e-06, "loss": 0.0003, "step": 335640 }, { "epoch": 130.35, "learning_rate": 2.6200647249190943e-06, "loss": 0.0181, "step": 335650 }, { "epoch": 130.35, "learning_rate": 2.619546925566343e-06, "loss": 0.1166, "step": 335660 }, { "epoch": 130.36, "learning_rate": 2.6190291262135923e-06, "loss": 0.0517, "step": 335670 }, { "epoch": 130.36, "learning_rate": 2.6185113268608415e-06, "loss": 0.1522, "step": 335680 }, { "epoch": 130.37, "learning_rate": 2.6179935275080907e-06, "loss": 0.004, "step": 335690 }, { "epoch": 130.37, "learning_rate": 2.61747572815534e-06, "loss": 0.0265, "step": 335700 }, { "epoch": 130.37, "learning_rate": 2.616957928802589e-06, "loss": 0.0275, "step": 335710 }, { "epoch": 130.38, "learning_rate": 2.6164401294498383e-06, "loss": 0.008, "step": 335720 }, { "epoch": 130.38, "learning_rate": 2.6159223300970875e-06, "loss": 0.0907, "step": 335730 }, { "epoch": 130.38, "learning_rate": 2.6154045307443367e-06, "loss": 0.0196, "step": 335740 }, { "epoch": 130.39, "learning_rate": 2.614886731391586e-06, "loss": 0.0011, "step": 335750 }, { "epoch": 130.39, "learning_rate": 2.614368932038835e-06, "loss": 0.0002, "step": 335760 }, { "epoch": 130.4, "learning_rate": 2.6138511326860842e-06, "loss": 0.0476, "step": 335770 }, { "epoch": 130.4, "learning_rate": 2.6133333333333334e-06, "loss": 0.0316, "step": 335780 }, { "epoch": 130.4, "learning_rate": 2.6128155339805826e-06, "loss": 0.037, "step": 335790 }, { "epoch": 130.41, "learning_rate": 2.6122977346278323e-06, "loss": 0.1021, "step": 335800 }, { "epoch": 130.41, "learning_rate": 2.611779935275081e-06, "loss": 0.047, "step": 335810 }, { "epoch": 130.42, "learning_rate": 2.6112621359223306e-06, "loss": 0.0099, "step": 335820 }, { "epoch": 130.42, "learning_rate": 2.6107443365695794e-06, "loss": 0.0136, "step": 335830 }, { "epoch": 130.42, "learning_rate": 2.610226537216829e-06, "loss": 0.1459, "step": 335840 }, { "epoch": 130.43, "learning_rate": 2.609708737864078e-06, "loss": 0.0108, "step": 335850 }, { "epoch": 130.43, "learning_rate": 2.6091909385113274e-06, "loss": 0.1189, "step": 335860 }, { "epoch": 130.43, "learning_rate": 2.608673139158576e-06, "loss": 0.0387, "step": 335870 }, { "epoch": 130.44, "learning_rate": 2.608155339805826e-06, "loss": 0.0427, "step": 335880 }, { "epoch": 130.44, "learning_rate": 2.6076375404530746e-06, "loss": 0.0099, "step": 335890 }, { "epoch": 130.45, "learning_rate": 2.607119741100324e-06, "loss": 0.1186, "step": 335900 }, { "epoch": 130.45, "learning_rate": 2.606601941747573e-06, "loss": 0.021, "step": 335910 }, { "epoch": 130.45, "learning_rate": 2.6060841423948226e-06, "loss": 0.0169, "step": 335920 }, { "epoch": 130.46, "learning_rate": 2.6055663430420714e-06, "loss": 0.046, "step": 335930 }, { "epoch": 130.46, "learning_rate": 2.605048543689321e-06, "loss": 0.0002, "step": 335940 }, { "epoch": 130.47, "learning_rate": 2.6045307443365697e-06, "loss": 0.0006, "step": 335950 }, { "epoch": 130.47, "learning_rate": 2.6040129449838194e-06, "loss": 0.0001, "step": 335960 }, { "epoch": 130.47, "learning_rate": 2.603495145631068e-06, "loss": 0.1051, "step": 335970 }, { "epoch": 130.48, "learning_rate": 2.6029773462783178e-06, "loss": 0.0006, "step": 335980 }, { "epoch": 130.48, "learning_rate": 2.6024595469255665e-06, "loss": 0.0508, "step": 335990 }, { "epoch": 130.49, "learning_rate": 2.6019417475728157e-06, "loss": 0.0169, "step": 336000 }, { "epoch": 130.49, "learning_rate": 2.601423948220065e-06, "loss": 0.1123, "step": 336010 }, { "epoch": 130.49, "learning_rate": 2.600906148867314e-06, "loss": 0.0237, "step": 336020 }, { "epoch": 130.5, "learning_rate": 2.6003883495145633e-06, "loss": 0.023, "step": 336030 }, { "epoch": 130.5, "learning_rate": 2.5998705501618125e-06, "loss": 0.01, "step": 336040 }, { "epoch": 130.5, "learning_rate": 2.5993527508090617e-06, "loss": 0.0452, "step": 336050 }, { "epoch": 130.51, "learning_rate": 2.598834951456311e-06, "loss": 0.0194, "step": 336060 }, { "epoch": 130.51, "learning_rate": 2.59831715210356e-06, "loss": 0.0088, "step": 336070 }, { "epoch": 130.52, "learning_rate": 2.5977993527508093e-06, "loss": 0.0533, "step": 336080 }, { "epoch": 130.52, "learning_rate": 2.5972815533980585e-06, "loss": 0.1349, "step": 336090 }, { "epoch": 130.52, "learning_rate": 2.5967637540453077e-06, "loss": 0.01, "step": 336100 }, { "epoch": 130.53, "learning_rate": 2.596245954692557e-06, "loss": 0.0755, "step": 336110 }, { "epoch": 130.53, "learning_rate": 2.595728155339806e-06, "loss": 0.0721, "step": 336120 }, { "epoch": 130.54, "learning_rate": 2.5952103559870552e-06, "loss": 0.0072, "step": 336130 }, { "epoch": 130.54, "learning_rate": 2.5946925566343044e-06, "loss": 0.1273, "step": 336140 }, { "epoch": 130.54, "learning_rate": 2.5941747572815536e-06, "loss": 0.0476, "step": 336150 }, { "epoch": 130.55, "learning_rate": 2.593656957928803e-06, "loss": 0.1116, "step": 336160 }, { "epoch": 130.55, "learning_rate": 2.5931391585760516e-06, "loss": 0.0224, "step": 336170 }, { "epoch": 130.56, "learning_rate": 2.5926213592233012e-06, "loss": 0.0361, "step": 336180 }, { "epoch": 130.56, "learning_rate": 2.59210355987055e-06, "loss": 0.0452, "step": 336190 }, { "epoch": 130.56, "learning_rate": 2.5915857605177996e-06, "loss": 0.0905, "step": 336200 }, { "epoch": 130.57, "learning_rate": 2.5910679611650484e-06, "loss": 0.0156, "step": 336210 }, { "epoch": 130.57, "learning_rate": 2.590550161812298e-06, "loss": 0.0319, "step": 336220 }, { "epoch": 130.57, "learning_rate": 2.5900323624595468e-06, "loss": 0.0549, "step": 336230 }, { "epoch": 130.58, "learning_rate": 2.5895145631067964e-06, "loss": 0.0316, "step": 336240 }, { "epoch": 130.58, "learning_rate": 2.588996763754045e-06, "loss": 0.0278, "step": 336250 }, { "epoch": 130.59, "learning_rate": 2.5884789644012948e-06, "loss": 0.0011, "step": 336260 }, { "epoch": 130.59, "learning_rate": 2.5879611650485435e-06, "loss": 0.0403, "step": 336270 }, { "epoch": 130.59, "learning_rate": 2.587443365695793e-06, "loss": 0.0031, "step": 336280 }, { "epoch": 130.6, "learning_rate": 2.586925566343042e-06, "loss": 0.0011, "step": 336290 }, { "epoch": 130.6, "learning_rate": 2.5864077669902916e-06, "loss": 0.0002, "step": 336300 }, { "epoch": 130.61, "learning_rate": 2.5858899676375403e-06, "loss": 0.0155, "step": 336310 }, { "epoch": 130.61, "learning_rate": 2.58537216828479e-06, "loss": 0.0797, "step": 336320 }, { "epoch": 130.61, "learning_rate": 2.5848543689320387e-06, "loss": 0.1185, "step": 336330 }, { "epoch": 130.62, "learning_rate": 2.5843365695792883e-06, "loss": 0.0304, "step": 336340 }, { "epoch": 130.62, "learning_rate": 2.583818770226537e-06, "loss": 0.1541, "step": 336350 }, { "epoch": 130.63, "learning_rate": 2.5833009708737867e-06, "loss": 0.0066, "step": 336360 }, { "epoch": 130.63, "learning_rate": 2.582783171521036e-06, "loss": 0.1098, "step": 336370 }, { "epoch": 130.63, "learning_rate": 2.582265372168285e-06, "loss": 0.1411, "step": 336380 }, { "epoch": 130.64, "learning_rate": 2.5817475728155343e-06, "loss": 0.0758, "step": 336390 }, { "epoch": 130.64, "learning_rate": 2.5812297734627835e-06, "loss": 0.0997, "step": 336400 }, { "epoch": 130.64, "learning_rate": 2.5807119741100327e-06, "loss": 0.1517, "step": 336410 }, { "epoch": 130.65, "learning_rate": 2.580194174757282e-06, "loss": 0.0641, "step": 336420 }, { "epoch": 130.65, "learning_rate": 2.579676375404531e-06, "loss": 0.1003, "step": 336430 }, { "epoch": 130.66, "learning_rate": 2.5791585760517803e-06, "loss": 0.1459, "step": 336440 }, { "epoch": 130.66, "learning_rate": 2.5786407766990295e-06, "loss": 0.0064, "step": 336450 }, { "epoch": 130.66, "learning_rate": 2.5781229773462787e-06, "loss": 0.0606, "step": 336460 }, { "epoch": 130.67, "learning_rate": 2.577605177993528e-06, "loss": 0.0093, "step": 336470 }, { "epoch": 130.67, "learning_rate": 2.577087378640777e-06, "loss": 0.0008, "step": 336480 }, { "epoch": 130.68, "learning_rate": 2.5765695792880263e-06, "loss": 0.0101, "step": 336490 }, { "epoch": 130.68, "learning_rate": 2.576051779935275e-06, "loss": 0.0081, "step": 336500 }, { "epoch": 130.68, "learning_rate": 2.5755339805825246e-06, "loss": 0.0163, "step": 336510 }, { "epoch": 130.69, "learning_rate": 2.5750161812297734e-06, "loss": 0.0132, "step": 336520 }, { "epoch": 130.69, "learning_rate": 2.574498381877023e-06, "loss": 0.0431, "step": 336530 }, { "epoch": 130.7, "learning_rate": 2.573980582524272e-06, "loss": 0.1605, "step": 336540 }, { "epoch": 130.7, "learning_rate": 2.5734627831715214e-06, "loss": 0.026, "step": 336550 }, { "epoch": 130.7, "learning_rate": 2.57294498381877e-06, "loss": 0.0069, "step": 336560 }, { "epoch": 130.71, "learning_rate": 2.57242718446602e-06, "loss": 0.0633, "step": 336570 }, { "epoch": 130.71, "learning_rate": 2.5719093851132686e-06, "loss": 0.0236, "step": 336580 }, { "epoch": 130.71, "learning_rate": 2.571391585760518e-06, "loss": 0.049, "step": 336590 }, { "epoch": 130.72, "learning_rate": 2.570873786407767e-06, "loss": 0.0647, "step": 336600 }, { "epoch": 130.72, "learning_rate": 2.5703559870550166e-06, "loss": 0.0461, "step": 336610 }, { "epoch": 130.73, "learning_rate": 2.5698381877022654e-06, "loss": 0.0303, "step": 336620 }, { "epoch": 130.73, "learning_rate": 2.569320388349515e-06, "loss": 0.076, "step": 336630 }, { "epoch": 130.73, "learning_rate": 2.5688025889967637e-06, "loss": 0.0599, "step": 336640 }, { "epoch": 130.74, "learning_rate": 2.5682847896440134e-06, "loss": 0.0077, "step": 336650 }, { "epoch": 130.74, "learning_rate": 2.567766990291262e-06, "loss": 0.0587, "step": 336660 }, { "epoch": 130.75, "learning_rate": 2.5672491909385118e-06, "loss": 0.0164, "step": 336670 }, { "epoch": 130.75, "learning_rate": 2.5667313915857605e-06, "loss": 0.0248, "step": 336680 }, { "epoch": 130.75, "learning_rate": 2.56621359223301e-06, "loss": 0.0779, "step": 336690 }, { "epoch": 130.76, "learning_rate": 2.565695792880259e-06, "loss": 0.1172, "step": 336700 }, { "epoch": 130.76, "learning_rate": 2.5651779935275085e-06, "loss": 0.0803, "step": 336710 }, { "epoch": 130.77, "learning_rate": 2.5646601941747573e-06, "loss": 0.0208, "step": 336720 }, { "epoch": 130.77, "learning_rate": 2.564142394822007e-06, "loss": 0.0193, "step": 336730 }, { "epoch": 130.77, "learning_rate": 2.5636245954692557e-06, "loss": 0.0507, "step": 336740 }, { "epoch": 130.78, "learning_rate": 2.5631067961165053e-06, "loss": 0.0102, "step": 336750 }, { "epoch": 130.78, "learning_rate": 2.562588996763754e-06, "loss": 0.011, "step": 336760 }, { "epoch": 130.78, "learning_rate": 2.5620711974110037e-06, "loss": 0.0111, "step": 336770 }, { "epoch": 130.79, "learning_rate": 2.5615533980582525e-06, "loss": 0.0263, "step": 336780 }, { "epoch": 130.79, "learning_rate": 2.561035598705502e-06, "loss": 0.0431, "step": 336790 }, { "epoch": 130.8, "learning_rate": 2.560517799352751e-06, "loss": 0.0028, "step": 336800 }, { "epoch": 130.8, "learning_rate": 2.56e-06, "loss": 0.0124, "step": 336810 }, { "epoch": 130.8, "learning_rate": 2.5594822006472492e-06, "loss": 0.0773, "step": 336820 }, { "epoch": 130.81, "learning_rate": 2.5589644012944984e-06, "loss": 0.0845, "step": 336830 }, { "epoch": 130.81, "learning_rate": 2.5584466019417476e-06, "loss": 0.0005, "step": 336840 }, { "epoch": 130.82, "learning_rate": 2.557928802588997e-06, "loss": 0.0103, "step": 336850 }, { "epoch": 130.82, "learning_rate": 2.557411003236246e-06, "loss": 0.0988, "step": 336860 }, { "epoch": 130.82, "learning_rate": 2.5568932038834952e-06, "loss": 0.0189, "step": 336870 }, { "epoch": 130.83, "learning_rate": 2.5563754045307444e-06, "loss": 0.0087, "step": 336880 }, { "epoch": 130.83, "learning_rate": 2.5558576051779936e-06, "loss": 0.0107, "step": 336890 }, { "epoch": 130.83, "learning_rate": 2.555339805825243e-06, "loss": 0.062, "step": 336900 }, { "epoch": 130.84, "learning_rate": 2.554822006472492e-06, "loss": 0.026, "step": 336910 }, { "epoch": 130.84, "learning_rate": 2.5543042071197416e-06, "loss": 0.0094, "step": 336920 }, { "epoch": 130.85, "learning_rate": 2.5537864077669904e-06, "loss": 0.0105, "step": 336930 }, { "epoch": 130.85, "learning_rate": 2.55326860841424e-06, "loss": 0.0153, "step": 336940 }, { "epoch": 130.85, "learning_rate": 2.5527508090614888e-06, "loss": 0.0205, "step": 336950 }, { "epoch": 130.86, "learning_rate": 2.5522330097087384e-06, "loss": 0.0191, "step": 336960 }, { "epoch": 130.86, "learning_rate": 2.551715210355987e-06, "loss": 0.0277, "step": 336970 }, { "epoch": 130.87, "learning_rate": 2.5511974110032368e-06, "loss": 0.0552, "step": 336980 }, { "epoch": 130.87, "learning_rate": 2.5506796116504856e-06, "loss": 0.0017, "step": 336990 }, { "epoch": 130.87, "learning_rate": 2.550161812297735e-06, "loss": 0.0004, "step": 337000 }, { "epoch": 130.88, "learning_rate": 2.549644012944984e-06, "loss": 0.0607, "step": 337010 }, { "epoch": 130.88, "learning_rate": 2.5491262135922336e-06, "loss": 0.1047, "step": 337020 }, { "epoch": 130.89, "learning_rate": 2.5486084142394823e-06, "loss": 0.0005, "step": 337030 }, { "epoch": 130.89, "learning_rate": 2.548090614886732e-06, "loss": 0.0005, "step": 337040 }, { "epoch": 130.89, "learning_rate": 2.5475728155339807e-06, "loss": 0.054, "step": 337050 }, { "epoch": 130.9, "learning_rate": 2.5470550161812303e-06, "loss": 0.1247, "step": 337060 }, { "epoch": 130.9, "learning_rate": 2.546537216828479e-06, "loss": 0.0187, "step": 337070 }, { "epoch": 130.9, "learning_rate": 2.5460194174757287e-06, "loss": 0.134, "step": 337080 }, { "epoch": 130.91, "learning_rate": 2.5455016181229775e-06, "loss": 0.1253, "step": 337090 }, { "epoch": 130.91, "learning_rate": 2.544983818770227e-06, "loss": 0.112, "step": 337100 }, { "epoch": 130.92, "learning_rate": 2.544466019417476e-06, "loss": 0.0753, "step": 337110 }, { "epoch": 130.92, "learning_rate": 2.5439482200647255e-06, "loss": 0.1241, "step": 337120 }, { "epoch": 130.92, "learning_rate": 2.5434304207119743e-06, "loss": 0.0898, "step": 337130 }, { "epoch": 130.93, "learning_rate": 2.5429126213592235e-06, "loss": 0.0383, "step": 337140 }, { "epoch": 130.93, "learning_rate": 2.5423948220064727e-06, "loss": 0.0459, "step": 337150 }, { "epoch": 130.94, "learning_rate": 2.541877022653722e-06, "loss": 0.0003, "step": 337160 }, { "epoch": 130.94, "learning_rate": 2.541359223300971e-06, "loss": 0.0135, "step": 337170 }, { "epoch": 130.94, "learning_rate": 2.5408414239482203e-06, "loss": 0.1643, "step": 337180 }, { "epoch": 130.95, "learning_rate": 2.5403236245954694e-06, "loss": 0.0108, "step": 337190 }, { "epoch": 130.95, "learning_rate": 2.5398058252427186e-06, "loss": 0.0298, "step": 337200 }, { "epoch": 130.96, "learning_rate": 2.539288025889968e-06, "loss": 0.0001, "step": 337210 }, { "epoch": 130.96, "learning_rate": 2.538770226537217e-06, "loss": 0.0001, "step": 337220 }, { "epoch": 130.96, "learning_rate": 2.5382524271844662e-06, "loss": 0.0044, "step": 337230 }, { "epoch": 130.97, "learning_rate": 2.5377346278317154e-06, "loss": 0.1353, "step": 337240 }, { "epoch": 130.97, "learning_rate": 2.5372168284789646e-06, "loss": 0.012, "step": 337250 }, { "epoch": 130.97, "learning_rate": 2.536699029126214e-06, "loss": 0.0034, "step": 337260 }, { "epoch": 130.98, "learning_rate": 2.536181229773463e-06, "loss": 0.0289, "step": 337270 }, { "epoch": 130.98, "learning_rate": 2.535663430420712e-06, "loss": 0.2499, "step": 337280 }, { "epoch": 130.99, "learning_rate": 2.5351456310679614e-06, "loss": 0.185, "step": 337290 }, { "epoch": 130.99, "learning_rate": 2.5346278317152106e-06, "loss": 0.0087, "step": 337300 }, { "epoch": 130.99, "learning_rate": 2.5341100323624594e-06, "loss": 0.0511, "step": 337310 }, { "epoch": 131.0, "learning_rate": 2.533592233009709e-06, "loss": 0.0218, "step": 337320 }, { "epoch": 131.0, "eval_accuracy": 0.9526822558459422, "eval_loss": 0.3862694203853607, "eval_runtime": 8.3054, "eval_samples_per_second": 437.664, "eval_steps_per_second": 54.783, "step": 337325 }, { "epoch": 131.0, "learning_rate": 2.5330744336569577e-06, "loss": 0.0046, "step": 337330 }, { "epoch": 131.01, "learning_rate": 2.5325566343042074e-06, "loss": 0.0612, "step": 337340 }, { "epoch": 131.01, "learning_rate": 2.532038834951456e-06, "loss": 0.1255, "step": 337350 }, { "epoch": 131.01, "learning_rate": 2.5315210355987058e-06, "loss": 0.0123, "step": 337360 }, { "epoch": 131.02, "learning_rate": 2.5310032362459545e-06, "loss": 0.0003, "step": 337370 }, { "epoch": 131.02, "learning_rate": 2.530485436893204e-06, "loss": 0.0012, "step": 337380 }, { "epoch": 131.03, "learning_rate": 2.529967637540453e-06, "loss": 0.1012, "step": 337390 }, { "epoch": 131.03, "learning_rate": 2.5294498381877025e-06, "loss": 0.0503, "step": 337400 }, { "epoch": 131.03, "learning_rate": 2.5289320388349513e-06, "loss": 0.0004, "step": 337410 }, { "epoch": 131.04, "learning_rate": 2.528414239482201e-06, "loss": 0.0011, "step": 337420 }, { "epoch": 131.04, "learning_rate": 2.5278964401294497e-06, "loss": 0.0012, "step": 337430 }, { "epoch": 131.04, "learning_rate": 2.5273786407766993e-06, "loss": 0.0661, "step": 337440 }, { "epoch": 131.05, "learning_rate": 2.526860841423948e-06, "loss": 0.0119, "step": 337450 }, { "epoch": 131.05, "learning_rate": 2.5263430420711977e-06, "loss": 0.0538, "step": 337460 }, { "epoch": 131.06, "learning_rate": 2.5258252427184465e-06, "loss": 0.076, "step": 337470 }, { "epoch": 131.06, "learning_rate": 2.525307443365696e-06, "loss": 0.0179, "step": 337480 }, { "epoch": 131.06, "learning_rate": 2.5247896440129453e-06, "loss": 0.0962, "step": 337490 }, { "epoch": 131.07, "learning_rate": 2.5242718446601945e-06, "loss": 0.0005, "step": 337500 }, { "epoch": 131.07, "learning_rate": 2.5237540453074437e-06, "loss": 0.0011, "step": 337510 }, { "epoch": 131.08, "learning_rate": 2.523236245954693e-06, "loss": 0.0182, "step": 337520 }, { "epoch": 131.08, "learning_rate": 2.522718446601942e-06, "loss": 0.0681, "step": 337530 }, { "epoch": 131.08, "learning_rate": 2.5222006472491913e-06, "loss": 0.1218, "step": 337540 }, { "epoch": 131.09, "learning_rate": 2.5216828478964404e-06, "loss": 0.1103, "step": 337550 }, { "epoch": 131.09, "learning_rate": 2.5211650485436896e-06, "loss": 0.0318, "step": 337560 }, { "epoch": 131.1, "learning_rate": 2.520647249190939e-06, "loss": 0.0518, "step": 337570 }, { "epoch": 131.1, "learning_rate": 2.520129449838188e-06, "loss": 0.1343, "step": 337580 }, { "epoch": 131.1, "learning_rate": 2.5196116504854372e-06, "loss": 0.001, "step": 337590 }, { "epoch": 131.11, "learning_rate": 2.5190938511326864e-06, "loss": 0.1164, "step": 337600 }, { "epoch": 131.11, "learning_rate": 2.5185760517799356e-06, "loss": 0.0904, "step": 337610 }, { "epoch": 131.11, "learning_rate": 2.518058252427185e-06, "loss": 0.0625, "step": 337620 }, { "epoch": 131.12, "learning_rate": 2.517540453074434e-06, "loss": 0.0657, "step": 337630 }, { "epoch": 131.12, "learning_rate": 2.5170226537216828e-06, "loss": 0.04, "step": 337640 }, { "epoch": 131.13, "learning_rate": 2.5165048543689324e-06, "loss": 0.0614, "step": 337650 }, { "epoch": 131.13, "learning_rate": 2.515987055016181e-06, "loss": 0.1342, "step": 337660 }, { "epoch": 131.13, "learning_rate": 2.5154692556634308e-06, "loss": 0.0141, "step": 337670 }, { "epoch": 131.14, "learning_rate": 2.5149514563106796e-06, "loss": 0.0442, "step": 337680 }, { "epoch": 131.14, "learning_rate": 2.514433656957929e-06, "loss": 0.0178, "step": 337690 }, { "epoch": 131.15, "learning_rate": 2.513915857605178e-06, "loss": 0.0712, "step": 337700 }, { "epoch": 131.15, "learning_rate": 2.5133980582524276e-06, "loss": 0.0919, "step": 337710 }, { "epoch": 131.15, "learning_rate": 2.5128802588996763e-06, "loss": 0.1116, "step": 337720 }, { "epoch": 131.16, "learning_rate": 2.512362459546926e-06, "loss": 0.0783, "step": 337730 }, { "epoch": 131.16, "learning_rate": 2.5118446601941747e-06, "loss": 0.1029, "step": 337740 }, { "epoch": 131.17, "learning_rate": 2.5113268608414243e-06, "loss": 0.0432, "step": 337750 }, { "epoch": 131.17, "learning_rate": 2.510809061488673e-06, "loss": 0.0284, "step": 337760 }, { "epoch": 131.17, "learning_rate": 2.5102912621359227e-06, "loss": 0.0424, "step": 337770 }, { "epoch": 131.18, "learning_rate": 2.5097734627831715e-06, "loss": 0.0526, "step": 337780 }, { "epoch": 131.18, "learning_rate": 2.509255663430421e-06, "loss": 0.1551, "step": 337790 }, { "epoch": 131.18, "learning_rate": 2.50873786407767e-06, "loss": 0.0099, "step": 337800 }, { "epoch": 131.19, "learning_rate": 2.5082200647249195e-06, "loss": 0.0898, "step": 337810 }, { "epoch": 131.19, "learning_rate": 2.5077022653721683e-06, "loss": 0.032, "step": 337820 }, { "epoch": 131.2, "learning_rate": 2.507184466019418e-06, "loss": 0.0643, "step": 337830 }, { "epoch": 131.2, "learning_rate": 2.5066666666666667e-06, "loss": 0.1159, "step": 337840 }, { "epoch": 131.2, "learning_rate": 2.5061488673139163e-06, "loss": 0.0085, "step": 337850 }, { "epoch": 131.21, "learning_rate": 2.505631067961165e-06, "loss": 0.035, "step": 337860 }, { "epoch": 131.21, "learning_rate": 2.5051132686084147e-06, "loss": 0.0465, "step": 337870 }, { "epoch": 131.22, "learning_rate": 2.5045954692556634e-06, "loss": 0.0175, "step": 337880 }, { "epoch": 131.22, "learning_rate": 2.504077669902913e-06, "loss": 0.0787, "step": 337890 }, { "epoch": 131.22, "learning_rate": 2.503559870550162e-06, "loss": 0.066, "step": 337900 }, { "epoch": 131.23, "learning_rate": 2.5030420711974115e-06, "loss": 0.0295, "step": 337910 }, { "epoch": 131.23, "learning_rate": 2.5025242718446602e-06, "loss": 0.0384, "step": 337920 }, { "epoch": 131.23, "learning_rate": 2.50200647249191e-06, "loss": 0.0881, "step": 337930 }, { "epoch": 131.24, "learning_rate": 2.5014886731391586e-06, "loss": 0.056, "step": 337940 }, { "epoch": 131.24, "learning_rate": 2.500970873786408e-06, "loss": 0.0096, "step": 337950 }, { "epoch": 131.25, "learning_rate": 2.500453074433657e-06, "loss": 0.0001, "step": 337960 }, { "epoch": 131.25, "learning_rate": 2.499935275080906e-06, "loss": 0.051, "step": 337970 }, { "epoch": 131.25, "learning_rate": 2.4994174757281554e-06, "loss": 0.0013, "step": 337980 }, { "epoch": 131.26, "learning_rate": 2.4988996763754046e-06, "loss": 0.0086, "step": 337990 }, { "epoch": 131.26, "learning_rate": 2.4983818770226538e-06, "loss": 0.074, "step": 338000 }, { "epoch": 131.27, "learning_rate": 2.497864077669903e-06, "loss": 0.0261, "step": 338010 }, { "epoch": 131.27, "learning_rate": 2.497346278317152e-06, "loss": 0.0095, "step": 338020 }, { "epoch": 131.27, "learning_rate": 2.4968284789644014e-06, "loss": 0.1082, "step": 338030 }, { "epoch": 131.28, "learning_rate": 2.4963106796116506e-06, "loss": 0.0484, "step": 338040 }, { "epoch": 131.28, "learning_rate": 2.4957928802588998e-06, "loss": 0.0217, "step": 338050 }, { "epoch": 131.29, "learning_rate": 2.495275080906149e-06, "loss": 0.0608, "step": 338060 }, { "epoch": 131.29, "learning_rate": 2.494757281553398e-06, "loss": 0.0019, "step": 338070 }, { "epoch": 131.29, "learning_rate": 2.4942394822006473e-06, "loss": 0.0374, "step": 338080 }, { "epoch": 131.3, "learning_rate": 2.4937216828478965e-06, "loss": 0.0004, "step": 338090 }, { "epoch": 131.3, "learning_rate": 2.4932038834951457e-06, "loss": 0.1095, "step": 338100 }, { "epoch": 131.3, "learning_rate": 2.492686084142395e-06, "loss": 0.1092, "step": 338110 }, { "epoch": 131.31, "learning_rate": 2.492168284789644e-06, "loss": 0.0338, "step": 338120 }, { "epoch": 131.31, "learning_rate": 2.4916504854368933e-06, "loss": 0.0321, "step": 338130 }, { "epoch": 131.32, "learning_rate": 2.4911326860841425e-06, "loss": 0.0078, "step": 338140 }, { "epoch": 131.32, "learning_rate": 2.4906148867313917e-06, "loss": 0.0264, "step": 338150 }, { "epoch": 131.32, "learning_rate": 2.490097087378641e-06, "loss": 0.0185, "step": 338160 }, { "epoch": 131.33, "learning_rate": 2.48957928802589e-06, "loss": 0.0232, "step": 338170 }, { "epoch": 131.33, "learning_rate": 2.4890614886731393e-06, "loss": 0.0264, "step": 338180 }, { "epoch": 131.34, "learning_rate": 2.4885436893203885e-06, "loss": 0.0819, "step": 338190 }, { "epoch": 131.34, "learning_rate": 2.4880258899676377e-06, "loss": 0.0044, "step": 338200 }, { "epoch": 131.34, "learning_rate": 2.487508090614887e-06, "loss": 0.0213, "step": 338210 }, { "epoch": 131.35, "learning_rate": 2.486990291262136e-06, "loss": 0.0272, "step": 338220 }, { "epoch": 131.35, "learning_rate": 2.4864724919093853e-06, "loss": 0.0277, "step": 338230 }, { "epoch": 131.36, "learning_rate": 2.4859546925566344e-06, "loss": 0.1436, "step": 338240 }, { "epoch": 131.36, "learning_rate": 2.4854368932038836e-06, "loss": 0.0886, "step": 338250 }, { "epoch": 131.36, "learning_rate": 2.484919093851133e-06, "loss": 0.0002, "step": 338260 }, { "epoch": 131.37, "learning_rate": 2.484401294498382e-06, "loss": 0.0365, "step": 338270 }, { "epoch": 131.37, "learning_rate": 2.4838834951456312e-06, "loss": 0.0274, "step": 338280 }, { "epoch": 131.37, "learning_rate": 2.4833656957928804e-06, "loss": 0.0605, "step": 338290 }, { "epoch": 131.38, "learning_rate": 2.4828478964401296e-06, "loss": 0.0373, "step": 338300 }, { "epoch": 131.38, "learning_rate": 2.482330097087379e-06, "loss": 0.0146, "step": 338310 }, { "epoch": 131.39, "learning_rate": 2.481812297734628e-06, "loss": 0.1114, "step": 338320 }, { "epoch": 131.39, "learning_rate": 2.481294498381877e-06, "loss": 0.0023, "step": 338330 }, { "epoch": 131.39, "learning_rate": 2.4807766990291264e-06, "loss": 0.0344, "step": 338340 }, { "epoch": 131.4, "learning_rate": 2.4802588996763756e-06, "loss": 0.205, "step": 338350 }, { "epoch": 131.4, "learning_rate": 2.4797411003236248e-06, "loss": 0.0074, "step": 338360 }, { "epoch": 131.41, "learning_rate": 2.479223300970874e-06, "loss": 0.1325, "step": 338370 }, { "epoch": 131.41, "learning_rate": 2.478705501618123e-06, "loss": 0.0462, "step": 338380 }, { "epoch": 131.41, "learning_rate": 2.4781877022653724e-06, "loss": 0.0326, "step": 338390 }, { "epoch": 131.42, "learning_rate": 2.4776699029126216e-06, "loss": 0.0073, "step": 338400 }, { "epoch": 131.42, "learning_rate": 2.4771521035598708e-06, "loss": 0.0438, "step": 338410 }, { "epoch": 131.43, "learning_rate": 2.47663430420712e-06, "loss": 0.0021, "step": 338420 }, { "epoch": 131.43, "learning_rate": 2.476116504854369e-06, "loss": 0.0385, "step": 338430 }, { "epoch": 131.43, "learning_rate": 2.4755987055016183e-06, "loss": 0.0542, "step": 338440 }, { "epoch": 131.44, "learning_rate": 2.4750809061488675e-06, "loss": 0.0418, "step": 338450 }, { "epoch": 131.44, "learning_rate": 2.4745631067961167e-06, "loss": 0.0178, "step": 338460 }, { "epoch": 131.44, "learning_rate": 2.474045307443366e-06, "loss": 0.0181, "step": 338470 }, { "epoch": 131.45, "learning_rate": 2.473527508090615e-06, "loss": 0.0154, "step": 338480 }, { "epoch": 131.45, "learning_rate": 2.4730097087378643e-06, "loss": 0.1979, "step": 338490 }, { "epoch": 131.46, "learning_rate": 2.4724919093851135e-06, "loss": 0.0346, "step": 338500 }, { "epoch": 131.46, "learning_rate": 2.4719741100323627e-06, "loss": 0.0665, "step": 338510 }, { "epoch": 131.46, "learning_rate": 2.471456310679612e-06, "loss": 0.0315, "step": 338520 }, { "epoch": 131.47, "learning_rate": 2.470938511326861e-06, "loss": 0.0001, "step": 338530 }, { "epoch": 131.47, "learning_rate": 2.4704207119741103e-06, "loss": 0.0288, "step": 338540 }, { "epoch": 131.48, "learning_rate": 2.4699029126213595e-06, "loss": 0.0165, "step": 338550 }, { "epoch": 131.48, "learning_rate": 2.4693851132686087e-06, "loss": 0.0007, "step": 338560 }, { "epoch": 131.48, "learning_rate": 2.468867313915858e-06, "loss": 0.0091, "step": 338570 }, { "epoch": 131.49, "learning_rate": 2.468349514563107e-06, "loss": 0.0033, "step": 338580 }, { "epoch": 131.49, "learning_rate": 2.4678317152103563e-06, "loss": 0.0323, "step": 338590 }, { "epoch": 131.5, "learning_rate": 2.4673139158576055e-06, "loss": 0.0313, "step": 338600 }, { "epoch": 131.5, "learning_rate": 2.4667961165048546e-06, "loss": 0.0002, "step": 338610 }, { "epoch": 131.5, "learning_rate": 2.466278317152104e-06, "loss": 0.0308, "step": 338620 }, { "epoch": 131.51, "learning_rate": 2.465760517799353e-06, "loss": 0.006, "step": 338630 }, { "epoch": 131.51, "learning_rate": 2.4652427184466022e-06, "loss": 0.0182, "step": 338640 }, { "epoch": 131.51, "learning_rate": 2.4647249190938514e-06, "loss": 0.0635, "step": 338650 }, { "epoch": 131.52, "learning_rate": 2.4642071197411006e-06, "loss": 0.0201, "step": 338660 }, { "epoch": 131.52, "learning_rate": 2.46368932038835e-06, "loss": 0.0003, "step": 338670 }, { "epoch": 131.53, "learning_rate": 2.463171521035599e-06, "loss": 0.0694, "step": 338680 }, { "epoch": 131.53, "learning_rate": 2.462653721682848e-06, "loss": 0.0001, "step": 338690 }, { "epoch": 131.53, "learning_rate": 2.4621359223300974e-06, "loss": 0.0412, "step": 338700 }, { "epoch": 131.54, "learning_rate": 2.4616181229773466e-06, "loss": 0.0394, "step": 338710 }, { "epoch": 131.54, "learning_rate": 2.4611003236245958e-06, "loss": 0.0094, "step": 338720 }, { "epoch": 131.55, "learning_rate": 2.460582524271845e-06, "loss": 0.0159, "step": 338730 }, { "epoch": 131.55, "learning_rate": 2.460064724919094e-06, "loss": 0.0157, "step": 338740 }, { "epoch": 131.55, "learning_rate": 2.4595469255663434e-06, "loss": 0.1193, "step": 338750 }, { "epoch": 131.56, "learning_rate": 2.4590291262135926e-06, "loss": 0.0485, "step": 338760 }, { "epoch": 131.56, "learning_rate": 2.4585113268608413e-06, "loss": 0.0586, "step": 338770 }, { "epoch": 131.57, "learning_rate": 2.4579935275080905e-06, "loss": 0.0478, "step": 338780 }, { "epoch": 131.57, "learning_rate": 2.4574757281553397e-06, "loss": 0.0126, "step": 338790 }, { "epoch": 131.57, "learning_rate": 2.456957928802589e-06, "loss": 0.0162, "step": 338800 }, { "epoch": 131.58, "learning_rate": 2.456440129449838e-06, "loss": 0.0608, "step": 338810 }, { "epoch": 131.58, "learning_rate": 2.4559223300970873e-06, "loss": 0.0311, "step": 338820 }, { "epoch": 131.58, "learning_rate": 2.4554045307443365e-06, "loss": 0.0516, "step": 338830 }, { "epoch": 131.59, "learning_rate": 2.4548867313915857e-06, "loss": 0.0166, "step": 338840 }, { "epoch": 131.59, "learning_rate": 2.454368932038835e-06, "loss": 0.0011, "step": 338850 }, { "epoch": 131.6, "learning_rate": 2.453851132686084e-06, "loss": 0.0387, "step": 338860 }, { "epoch": 131.6, "learning_rate": 2.4533333333333333e-06, "loss": 0.0486, "step": 338870 }, { "epoch": 131.6, "learning_rate": 2.452815533980583e-06, "loss": 0.0004, "step": 338880 }, { "epoch": 131.61, "learning_rate": 2.452297734627832e-06, "loss": 0.0206, "step": 338890 }, { "epoch": 131.61, "learning_rate": 2.4517799352750813e-06, "loss": 0.0364, "step": 338900 }, { "epoch": 131.62, "learning_rate": 2.4512621359223305e-06, "loss": 0.0339, "step": 338910 }, { "epoch": 131.62, "learning_rate": 2.4507443365695797e-06, "loss": 0.0471, "step": 338920 }, { "epoch": 131.62, "learning_rate": 2.450226537216829e-06, "loss": 0.0044, "step": 338930 }, { "epoch": 131.63, "learning_rate": 2.449708737864078e-06, "loss": 0.0104, "step": 338940 }, { "epoch": 131.63, "learning_rate": 2.4491909385113273e-06, "loss": 0.0856, "step": 338950 }, { "epoch": 131.63, "learning_rate": 2.4486731391585765e-06, "loss": 0.0365, "step": 338960 }, { "epoch": 131.64, "learning_rate": 2.4481553398058256e-06, "loss": 0.0368, "step": 338970 }, { "epoch": 131.64, "learning_rate": 2.447637540453075e-06, "loss": 0.0608, "step": 338980 }, { "epoch": 131.65, "learning_rate": 2.447119741100324e-06, "loss": 0.1869, "step": 338990 }, { "epoch": 131.65, "learning_rate": 2.4466019417475732e-06, "loss": 0.0622, "step": 339000 }, { "epoch": 131.65, "learning_rate": 2.4460841423948224e-06, "loss": 0.0011, "step": 339010 }, { "epoch": 131.66, "learning_rate": 2.4455663430420716e-06, "loss": 0.0457, "step": 339020 }, { "epoch": 131.66, "learning_rate": 2.445048543689321e-06, "loss": 0.0087, "step": 339030 }, { "epoch": 131.67, "learning_rate": 2.44453074433657e-06, "loss": 0.0145, "step": 339040 }, { "epoch": 131.67, "learning_rate": 2.444012944983819e-06, "loss": 0.0894, "step": 339050 }, { "epoch": 131.67, "learning_rate": 2.4434951456310684e-06, "loss": 0.0287, "step": 339060 }, { "epoch": 131.68, "learning_rate": 2.4429773462783176e-06, "loss": 0.0652, "step": 339070 }, { "epoch": 131.68, "learning_rate": 2.4424595469255668e-06, "loss": 0.0013, "step": 339080 }, { "epoch": 131.69, "learning_rate": 2.4419417475728156e-06, "loss": 0.0696, "step": 339090 }, { "epoch": 131.69, "learning_rate": 2.4414239482200648e-06, "loss": 0.021, "step": 339100 }, { "epoch": 131.69, "learning_rate": 2.440906148867314e-06, "loss": 0.0468, "step": 339110 }, { "epoch": 131.7, "learning_rate": 2.440388349514563e-06, "loss": 0.0402, "step": 339120 }, { "epoch": 131.7, "learning_rate": 2.4398705501618123e-06, "loss": 0.0467, "step": 339130 }, { "epoch": 131.7, "learning_rate": 2.4393527508090615e-06, "loss": 0.0973, "step": 339140 }, { "epoch": 131.71, "learning_rate": 2.4388349514563107e-06, "loss": 0.0347, "step": 339150 }, { "epoch": 131.71, "learning_rate": 2.43831715210356e-06, "loss": 0.0008, "step": 339160 }, { "epoch": 131.72, "learning_rate": 2.437799352750809e-06, "loss": 0.049, "step": 339170 }, { "epoch": 131.72, "learning_rate": 2.4372815533980583e-06, "loss": 0.0006, "step": 339180 }, { "epoch": 131.72, "learning_rate": 2.4367637540453075e-06, "loss": 0.031, "step": 339190 }, { "epoch": 131.73, "learning_rate": 2.4362459546925567e-06, "loss": 0.0001, "step": 339200 }, { "epoch": 131.73, "learning_rate": 2.435728155339806e-06, "loss": 0.0014, "step": 339210 }, { "epoch": 131.74, "learning_rate": 2.435210355987055e-06, "loss": 0.0671, "step": 339220 }, { "epoch": 131.74, "learning_rate": 2.4346925566343043e-06, "loss": 0.1417, "step": 339230 }, { "epoch": 131.74, "learning_rate": 2.4341747572815535e-06, "loss": 0.101, "step": 339240 }, { "epoch": 131.75, "learning_rate": 2.4336569579288027e-06, "loss": 0.0674, "step": 339250 }, { "epoch": 131.75, "learning_rate": 2.433139158576052e-06, "loss": 0.0024, "step": 339260 }, { "epoch": 131.76, "learning_rate": 2.432621359223301e-06, "loss": 0.0078, "step": 339270 }, { "epoch": 131.76, "learning_rate": 2.4321035598705503e-06, "loss": 0.0168, "step": 339280 }, { "epoch": 131.76, "learning_rate": 2.4315857605177994e-06, "loss": 0.0288, "step": 339290 }, { "epoch": 131.77, "learning_rate": 2.4310679611650486e-06, "loss": 0.0237, "step": 339300 }, { "epoch": 131.77, "learning_rate": 2.430550161812298e-06, "loss": 0.0595, "step": 339310 }, { "epoch": 131.77, "learning_rate": 2.430032362459547e-06, "loss": 0.123, "step": 339320 }, { "epoch": 131.78, "learning_rate": 2.4295145631067962e-06, "loss": 0.0015, "step": 339330 }, { "epoch": 131.78, "learning_rate": 2.4289967637540454e-06, "loss": 0.0215, "step": 339340 }, { "epoch": 131.79, "learning_rate": 2.4284789644012946e-06, "loss": 0.0011, "step": 339350 }, { "epoch": 131.79, "learning_rate": 2.427961165048544e-06, "loss": 0.0112, "step": 339360 }, { "epoch": 131.79, "learning_rate": 2.427443365695793e-06, "loss": 0.0476, "step": 339370 }, { "epoch": 131.8, "learning_rate": 2.426925566343042e-06, "loss": 0.0897, "step": 339380 }, { "epoch": 131.8, "learning_rate": 2.4264077669902914e-06, "loss": 0.0332, "step": 339390 }, { "epoch": 131.81, "learning_rate": 2.4258899676375406e-06, "loss": 0.0281, "step": 339400 }, { "epoch": 131.81, "learning_rate": 2.4253721682847898e-06, "loss": 0.0005, "step": 339410 }, { "epoch": 131.81, "learning_rate": 2.424854368932039e-06, "loss": 0.0897, "step": 339420 }, { "epoch": 131.82, "learning_rate": 2.424336569579288e-06, "loss": 0.0112, "step": 339430 }, { "epoch": 131.82, "learning_rate": 2.4238187702265374e-06, "loss": 0.0254, "step": 339440 }, { "epoch": 131.83, "learning_rate": 2.4233009708737866e-06, "loss": 0.0152, "step": 339450 }, { "epoch": 131.83, "learning_rate": 2.4227831715210358e-06, "loss": 0.0922, "step": 339460 }, { "epoch": 131.83, "learning_rate": 2.422265372168285e-06, "loss": 0.0236, "step": 339470 }, { "epoch": 131.84, "learning_rate": 2.421747572815534e-06, "loss": 0.0004, "step": 339480 }, { "epoch": 131.84, "learning_rate": 2.4212297734627833e-06, "loss": 0.014, "step": 339490 }, { "epoch": 131.84, "learning_rate": 2.4207119741100325e-06, "loss": 0.1082, "step": 339500 }, { "epoch": 131.85, "learning_rate": 2.4201941747572817e-06, "loss": 0.009, "step": 339510 }, { "epoch": 131.85, "learning_rate": 2.419676375404531e-06, "loss": 0.0671, "step": 339520 }, { "epoch": 131.86, "learning_rate": 2.41915857605178e-06, "loss": 0.0164, "step": 339530 }, { "epoch": 131.86, "learning_rate": 2.4186407766990293e-06, "loss": 0.0094, "step": 339540 }, { "epoch": 131.86, "learning_rate": 2.4181229773462785e-06, "loss": 0.0415, "step": 339550 }, { "epoch": 131.87, "learning_rate": 2.4176051779935277e-06, "loss": 0.0001, "step": 339560 }, { "epoch": 131.87, "learning_rate": 2.417087378640777e-06, "loss": 0.0007, "step": 339570 }, { "epoch": 131.88, "learning_rate": 2.416569579288026e-06, "loss": 0.071, "step": 339580 }, { "epoch": 131.88, "learning_rate": 2.4160517799352753e-06, "loss": 0.0168, "step": 339590 }, { "epoch": 131.88, "learning_rate": 2.4155339805825245e-06, "loss": 0.0984, "step": 339600 }, { "epoch": 131.89, "learning_rate": 2.4150161812297737e-06, "loss": 0.0105, "step": 339610 }, { "epoch": 131.89, "learning_rate": 2.414498381877023e-06, "loss": 0.0082, "step": 339620 }, { "epoch": 131.9, "learning_rate": 2.413980582524272e-06, "loss": 0.1208, "step": 339630 }, { "epoch": 131.9, "learning_rate": 2.4134627831715213e-06, "loss": 0.0219, "step": 339640 }, { "epoch": 131.9, "learning_rate": 2.4129449838187705e-06, "loss": 0.0764, "step": 339650 }, { "epoch": 131.91, "learning_rate": 2.4124271844660196e-06, "loss": 0.0204, "step": 339660 }, { "epoch": 131.91, "learning_rate": 2.411909385113269e-06, "loss": 0.1032, "step": 339670 }, { "epoch": 131.91, "learning_rate": 2.411391585760518e-06, "loss": 0.07, "step": 339680 }, { "epoch": 131.92, "learning_rate": 2.4108737864077672e-06, "loss": 0.0628, "step": 339690 }, { "epoch": 131.92, "learning_rate": 2.4103559870550164e-06, "loss": 0.0413, "step": 339700 }, { "epoch": 131.93, "learning_rate": 2.4098381877022656e-06, "loss": 0.0434, "step": 339710 }, { "epoch": 131.93, "learning_rate": 2.409320388349515e-06, "loss": 0.1248, "step": 339720 }, { "epoch": 131.93, "learning_rate": 2.408802588996764e-06, "loss": 0.0211, "step": 339730 }, { "epoch": 131.94, "learning_rate": 2.408284789644013e-06, "loss": 0.0893, "step": 339740 }, { "epoch": 131.94, "learning_rate": 2.4077669902912624e-06, "loss": 0.0002, "step": 339750 }, { "epoch": 131.95, "learning_rate": 2.4072491909385116e-06, "loss": 0.0067, "step": 339760 }, { "epoch": 131.95, "learning_rate": 2.4067313915857608e-06, "loss": 0.0008, "step": 339770 }, { "epoch": 131.95, "learning_rate": 2.40621359223301e-06, "loss": 0.011, "step": 339780 }, { "epoch": 131.96, "learning_rate": 2.405695792880259e-06, "loss": 0.0248, "step": 339790 }, { "epoch": 131.96, "learning_rate": 2.4051779935275084e-06, "loss": 0.0573, "step": 339800 }, { "epoch": 131.97, "learning_rate": 2.4046601941747576e-06, "loss": 0.0126, "step": 339810 }, { "epoch": 131.97, "learning_rate": 2.4041423948220068e-06, "loss": 0.0007, "step": 339820 }, { "epoch": 131.97, "learning_rate": 2.403624595469256e-06, "loss": 0.1148, "step": 339830 }, { "epoch": 131.98, "learning_rate": 2.403106796116505e-06, "loss": 0.0195, "step": 339840 }, { "epoch": 131.98, "learning_rate": 2.4025889967637543e-06, "loss": 0.1303, "step": 339850 }, { "epoch": 131.98, "learning_rate": 2.4020711974110035e-06, "loss": 0.0011, "step": 339860 }, { "epoch": 131.99, "learning_rate": 2.4015533980582527e-06, "loss": 0.0149, "step": 339870 }, { "epoch": 131.99, "learning_rate": 2.401035598705502e-06, "loss": 0.0605, "step": 339880 }, { "epoch": 132.0, "learning_rate": 2.400517799352751e-06, "loss": 0.04, "step": 339890 }, { "epoch": 132.0, "learning_rate": 2.4000000000000003e-06, "loss": 0.0228, "step": 339900 }, { "epoch": 132.0, "eval_accuracy": 0.951856946354883, "eval_loss": 0.3912859261035919, "eval_runtime": 8.2608, "eval_samples_per_second": 440.032, "eval_steps_per_second": 55.08, "step": 339900 }, { "epoch": 132.0, "learning_rate": 2.399482200647249e-06, "loss": 0.0232, "step": 339910 }, { "epoch": 132.01, "learning_rate": 2.3989644012944983e-06, "loss": 0.0073, "step": 339920 }, { "epoch": 132.01, "learning_rate": 2.3984466019417475e-06, "loss": 0.0469, "step": 339930 }, { "epoch": 132.02, "learning_rate": 2.3979288025889967e-06, "loss": 0.0579, "step": 339940 }, { "epoch": 132.02, "learning_rate": 2.397411003236246e-06, "loss": 0.0675, "step": 339950 }, { "epoch": 132.02, "learning_rate": 2.396893203883495e-06, "loss": 0.0001, "step": 339960 }, { "epoch": 132.03, "learning_rate": 2.3963754045307443e-06, "loss": 0.0351, "step": 339970 }, { "epoch": 132.03, "learning_rate": 2.3958576051779934e-06, "loss": 0.088, "step": 339980 }, { "epoch": 132.03, "learning_rate": 2.3953398058252426e-06, "loss": 0.0344, "step": 339990 }, { "epoch": 132.04, "learning_rate": 2.394822006472492e-06, "loss": 0.0282, "step": 340000 }, { "epoch": 132.04, "learning_rate": 2.3943042071197415e-06, "loss": 0.0367, "step": 340010 }, { "epoch": 132.05, "learning_rate": 2.3937864077669906e-06, "loss": 0.0486, "step": 340020 }, { "epoch": 132.05, "learning_rate": 2.39326860841424e-06, "loss": 0.0542, "step": 340030 }, { "epoch": 132.05, "learning_rate": 2.392750809061489e-06, "loss": 0.1233, "step": 340040 }, { "epoch": 132.06, "learning_rate": 2.3922330097087382e-06, "loss": 0.0047, "step": 340050 }, { "epoch": 132.06, "learning_rate": 2.3917152103559874e-06, "loss": 0.0011, "step": 340060 }, { "epoch": 132.07, "learning_rate": 2.3911974110032366e-06, "loss": 0.0991, "step": 340070 }, { "epoch": 132.07, "learning_rate": 2.390679611650486e-06, "loss": 0.0094, "step": 340080 }, { "epoch": 132.07, "learning_rate": 2.390161812297735e-06, "loss": 0.0092, "step": 340090 }, { "epoch": 132.08, "learning_rate": 2.389644012944984e-06, "loss": 0.1107, "step": 340100 }, { "epoch": 132.08, "learning_rate": 2.3891262135922334e-06, "loss": 0.1126, "step": 340110 }, { "epoch": 132.09, "learning_rate": 2.3886084142394826e-06, "loss": 0.0193, "step": 340120 }, { "epoch": 132.09, "learning_rate": 2.388090614886732e-06, "loss": 0.0391, "step": 340130 }, { "epoch": 132.09, "learning_rate": 2.387572815533981e-06, "loss": 0.0114, "step": 340140 }, { "epoch": 132.1, "learning_rate": 2.38705501618123e-06, "loss": 0.0327, "step": 340150 }, { "epoch": 132.1, "learning_rate": 2.3865372168284794e-06, "loss": 0.0094, "step": 340160 }, { "epoch": 132.1, "learning_rate": 2.3860194174757286e-06, "loss": 0.0088, "step": 340170 }, { "epoch": 132.11, "learning_rate": 2.3855016181229778e-06, "loss": 0.043, "step": 340180 }, { "epoch": 132.11, "learning_rate": 2.384983818770227e-06, "loss": 0.0205, "step": 340190 }, { "epoch": 132.12, "learning_rate": 2.384466019417476e-06, "loss": 0.0086, "step": 340200 }, { "epoch": 132.12, "learning_rate": 2.3839482200647253e-06, "loss": 0.0025, "step": 340210 }, { "epoch": 132.12, "learning_rate": 2.3834304207119745e-06, "loss": 0.0206, "step": 340220 }, { "epoch": 132.13, "learning_rate": 2.3829126213592233e-06, "loss": 0.0468, "step": 340230 }, { "epoch": 132.13, "learning_rate": 2.3823948220064725e-06, "loss": 0.0496, "step": 340240 }, { "epoch": 132.14, "learning_rate": 2.3818770226537217e-06, "loss": 0.019, "step": 340250 }, { "epoch": 132.14, "learning_rate": 2.381359223300971e-06, "loss": 0.0921, "step": 340260 }, { "epoch": 132.14, "learning_rate": 2.38084142394822e-06, "loss": 0.1295, "step": 340270 }, { "epoch": 132.15, "learning_rate": 2.3803236245954693e-06, "loss": 0.0147, "step": 340280 }, { "epoch": 132.15, "learning_rate": 2.3798058252427185e-06, "loss": 0.052, "step": 340290 }, { "epoch": 132.16, "learning_rate": 2.3792880258899677e-06, "loss": 0.0461, "step": 340300 }, { "epoch": 132.16, "learning_rate": 2.378770226537217e-06, "loss": 0.0696, "step": 340310 }, { "epoch": 132.16, "learning_rate": 2.378252427184466e-06, "loss": 0.0405, "step": 340320 }, { "epoch": 132.17, "learning_rate": 2.3777346278317153e-06, "loss": 0.0004, "step": 340330 }, { "epoch": 132.17, "learning_rate": 2.3772168284789645e-06, "loss": 0.0195, "step": 340340 }, { "epoch": 132.17, "learning_rate": 2.3766990291262136e-06, "loss": 0.1409, "step": 340350 }, { "epoch": 132.18, "learning_rate": 2.376181229773463e-06, "loss": 0.0007, "step": 340360 }, { "epoch": 132.18, "learning_rate": 2.375663430420712e-06, "loss": 0.001, "step": 340370 }, { "epoch": 132.19, "learning_rate": 2.3751456310679612e-06, "loss": 0.0257, "step": 340380 }, { "epoch": 132.19, "learning_rate": 2.3746278317152104e-06, "loss": 0.084, "step": 340390 }, { "epoch": 132.19, "learning_rate": 2.3741100323624596e-06, "loss": 0.0348, "step": 340400 }, { "epoch": 132.2, "learning_rate": 2.373592233009709e-06, "loss": 0.0971, "step": 340410 }, { "epoch": 132.2, "learning_rate": 2.373074433656958e-06, "loss": 0.0281, "step": 340420 }, { "epoch": 132.21, "learning_rate": 2.372556634304207e-06, "loss": 0.0701, "step": 340430 }, { "epoch": 132.21, "learning_rate": 2.3720388349514564e-06, "loss": 0.0025, "step": 340440 }, { "epoch": 132.21, "learning_rate": 2.3715210355987056e-06, "loss": 0.0373, "step": 340450 }, { "epoch": 132.22, "learning_rate": 2.3710032362459548e-06, "loss": 0.1042, "step": 340460 }, { "epoch": 132.22, "learning_rate": 2.370485436893204e-06, "loss": 0.069, "step": 340470 }, { "epoch": 132.23, "learning_rate": 2.369967637540453e-06, "loss": 0.0208, "step": 340480 }, { "epoch": 132.23, "learning_rate": 2.3694498381877024e-06, "loss": 0.0048, "step": 340490 }, { "epoch": 132.23, "learning_rate": 2.3689320388349516e-06, "loss": 0.022, "step": 340500 }, { "epoch": 132.24, "learning_rate": 2.3684142394822008e-06, "loss": 0.0131, "step": 340510 }, { "epoch": 132.24, "learning_rate": 2.36789644012945e-06, "loss": 0.1183, "step": 340520 }, { "epoch": 132.24, "learning_rate": 2.367378640776699e-06, "loss": 0.0328, "step": 340530 }, { "epoch": 132.25, "learning_rate": 2.3668608414239483e-06, "loss": 0.0539, "step": 340540 }, { "epoch": 132.25, "learning_rate": 2.3663430420711975e-06, "loss": 0.097, "step": 340550 }, { "epoch": 132.26, "learning_rate": 2.3658252427184467e-06, "loss": 0.0225, "step": 340560 }, { "epoch": 132.26, "learning_rate": 2.365307443365696e-06, "loss": 0.1175, "step": 340570 }, { "epoch": 132.26, "learning_rate": 2.364789644012945e-06, "loss": 0.0069, "step": 340580 }, { "epoch": 132.27, "learning_rate": 2.3642718446601943e-06, "loss": 0.0533, "step": 340590 }, { "epoch": 132.27, "learning_rate": 2.3637540453074435e-06, "loss": 0.0165, "step": 340600 }, { "epoch": 132.28, "learning_rate": 2.3632362459546927e-06, "loss": 0.0108, "step": 340610 }, { "epoch": 132.28, "learning_rate": 2.362718446601942e-06, "loss": 0.158, "step": 340620 }, { "epoch": 132.28, "learning_rate": 2.362200647249191e-06, "loss": 0.035, "step": 340630 }, { "epoch": 132.29, "learning_rate": 2.3616828478964403e-06, "loss": 0.0258, "step": 340640 }, { "epoch": 132.29, "learning_rate": 2.3611650485436895e-06, "loss": 0.0181, "step": 340650 }, { "epoch": 132.3, "learning_rate": 2.3606472491909387e-06, "loss": 0.0394, "step": 340660 }, { "epoch": 132.3, "learning_rate": 2.360129449838188e-06, "loss": 0.0735, "step": 340670 }, { "epoch": 132.3, "learning_rate": 2.359611650485437e-06, "loss": 0.0916, "step": 340680 }, { "epoch": 132.31, "learning_rate": 2.3590938511326863e-06, "loss": 0.0227, "step": 340690 }, { "epoch": 132.31, "learning_rate": 2.3585760517799355e-06, "loss": 0.0005, "step": 340700 }, { "epoch": 132.31, "learning_rate": 2.3580582524271846e-06, "loss": 0.046, "step": 340710 }, { "epoch": 132.32, "learning_rate": 2.357540453074434e-06, "loss": 0.0143, "step": 340720 }, { "epoch": 132.32, "learning_rate": 2.357022653721683e-06, "loss": 0.0694, "step": 340730 }, { "epoch": 132.33, "learning_rate": 2.3565048543689322e-06, "loss": 0.0042, "step": 340740 }, { "epoch": 132.33, "learning_rate": 2.3559870550161814e-06, "loss": 0.0102, "step": 340750 }, { "epoch": 132.33, "learning_rate": 2.3554692556634306e-06, "loss": 0.0503, "step": 340760 }, { "epoch": 132.34, "learning_rate": 2.35495145631068e-06, "loss": 0.094, "step": 340770 }, { "epoch": 132.34, "learning_rate": 2.354433656957929e-06, "loss": 0.0181, "step": 340780 }, { "epoch": 132.35, "learning_rate": 2.353915857605178e-06, "loss": 0.0071, "step": 340790 }, { "epoch": 132.35, "learning_rate": 2.3533980582524274e-06, "loss": 0.0535, "step": 340800 }, { "epoch": 132.35, "learning_rate": 2.3528802588996766e-06, "loss": 0.0394, "step": 340810 }, { "epoch": 132.36, "learning_rate": 2.3523624595469258e-06, "loss": 0.0227, "step": 340820 }, { "epoch": 132.36, "learning_rate": 2.351844660194175e-06, "loss": 0.0002, "step": 340830 }, { "epoch": 132.37, "learning_rate": 2.351326860841424e-06, "loss": 0.0176, "step": 340840 }, { "epoch": 132.37, "learning_rate": 2.3508090614886734e-06, "loss": 0.0158, "step": 340850 }, { "epoch": 132.37, "learning_rate": 2.3502912621359226e-06, "loss": 0.0647, "step": 340860 }, { "epoch": 132.38, "learning_rate": 2.3497734627831718e-06, "loss": 0.0205, "step": 340870 }, { "epoch": 132.38, "learning_rate": 2.349255663430421e-06, "loss": 0.0513, "step": 340880 }, { "epoch": 132.38, "learning_rate": 2.34873786407767e-06, "loss": 0.0583, "step": 340890 }, { "epoch": 132.39, "learning_rate": 2.3482200647249193e-06, "loss": 0.0665, "step": 340900 }, { "epoch": 132.39, "learning_rate": 2.3477022653721685e-06, "loss": 0.0005, "step": 340910 }, { "epoch": 132.4, "learning_rate": 2.3471844660194177e-06, "loss": 0.0002, "step": 340920 }, { "epoch": 132.4, "learning_rate": 2.346666666666667e-06, "loss": 0.0403, "step": 340930 }, { "epoch": 132.4, "learning_rate": 2.346148867313916e-06, "loss": 0.0048, "step": 340940 }, { "epoch": 132.41, "learning_rate": 2.3456310679611653e-06, "loss": 0.0076, "step": 340950 }, { "epoch": 132.41, "learning_rate": 2.3451132686084145e-06, "loss": 0.0947, "step": 340960 }, { "epoch": 132.42, "learning_rate": 2.3445954692556637e-06, "loss": 0.0677, "step": 340970 }, { "epoch": 132.42, "learning_rate": 2.344077669902913e-06, "loss": 0.073, "step": 340980 }, { "epoch": 132.42, "learning_rate": 2.343559870550162e-06, "loss": 0.0614, "step": 340990 }, { "epoch": 132.43, "learning_rate": 2.3430420711974113e-06, "loss": 0.0004, "step": 341000 }, { "epoch": 132.43, "learning_rate": 2.3425242718446605e-06, "loss": 0.0099, "step": 341010 }, { "epoch": 132.43, "learning_rate": 2.3420064724919097e-06, "loss": 0.0155, "step": 341020 }, { "epoch": 132.44, "learning_rate": 2.341488673139159e-06, "loss": 0.0098, "step": 341030 }, { "epoch": 132.44, "learning_rate": 2.340970873786408e-06, "loss": 0.0006, "step": 341040 }, { "epoch": 132.45, "learning_rate": 2.340453074433657e-06, "loss": 0.0004, "step": 341050 }, { "epoch": 132.45, "learning_rate": 2.339935275080906e-06, "loss": 0.0806, "step": 341060 }, { "epoch": 132.45, "learning_rate": 2.3394174757281552e-06, "loss": 0.0001, "step": 341070 }, { "epoch": 132.46, "learning_rate": 2.3388996763754044e-06, "loss": 0.0471, "step": 341080 }, { "epoch": 132.46, "learning_rate": 2.3383818770226536e-06, "loss": 0.0444, "step": 341090 }, { "epoch": 132.47, "learning_rate": 2.337864077669903e-06, "loss": 0.0006, "step": 341100 }, { "epoch": 132.47, "learning_rate": 2.337346278317152e-06, "loss": 0.0004, "step": 341110 }, { "epoch": 132.47, "learning_rate": 2.336828478964401e-06, "loss": 0.0367, "step": 341120 }, { "epoch": 132.48, "learning_rate": 2.336310679611651e-06, "loss": 0.0626, "step": 341130 }, { "epoch": 132.48, "learning_rate": 2.3357928802589e-06, "loss": 0.1194, "step": 341140 }, { "epoch": 132.49, "learning_rate": 2.335275080906149e-06, "loss": 0.0438, "step": 341150 }, { "epoch": 132.49, "learning_rate": 2.3347572815533984e-06, "loss": 0.0044, "step": 341160 }, { "epoch": 132.49, "learning_rate": 2.3342394822006476e-06, "loss": 0.0005, "step": 341170 }, { "epoch": 132.5, "learning_rate": 2.333721682847897e-06, "loss": 0.0566, "step": 341180 }, { "epoch": 132.5, "learning_rate": 2.333203883495146e-06, "loss": 0.0812, "step": 341190 }, { "epoch": 132.5, "learning_rate": 2.332686084142395e-06, "loss": 0.0156, "step": 341200 }, { "epoch": 132.51, "learning_rate": 2.3321682847896444e-06, "loss": 0.0528, "step": 341210 }, { "epoch": 132.51, "learning_rate": 2.3316504854368936e-06, "loss": 0.0019, "step": 341220 }, { "epoch": 132.52, "learning_rate": 2.3311326860841428e-06, "loss": 0.0102, "step": 341230 }, { "epoch": 132.52, "learning_rate": 2.330614886731392e-06, "loss": 0.0118, "step": 341240 }, { "epoch": 132.52, "learning_rate": 2.330097087378641e-06, "loss": 0.0005, "step": 341250 }, { "epoch": 132.53, "learning_rate": 2.3295792880258903e-06, "loss": 0.0304, "step": 341260 }, { "epoch": 132.53, "learning_rate": 2.3290614886731395e-06, "loss": 0.0275, "step": 341270 }, { "epoch": 132.54, "learning_rate": 2.3285436893203887e-06, "loss": 0.105, "step": 341280 }, { "epoch": 132.54, "learning_rate": 2.328025889967638e-06, "loss": 0.0344, "step": 341290 }, { "epoch": 132.54, "learning_rate": 2.327508090614887e-06, "loss": 0.0801, "step": 341300 }, { "epoch": 132.55, "learning_rate": 2.3269902912621363e-06, "loss": 0.0566, "step": 341310 }, { "epoch": 132.55, "learning_rate": 2.3264724919093855e-06, "loss": 0.0111, "step": 341320 }, { "epoch": 132.56, "learning_rate": 2.3259546925566347e-06, "loss": 0.0793, "step": 341330 }, { "epoch": 132.56, "learning_rate": 2.325436893203884e-06, "loss": 0.0002, "step": 341340 }, { "epoch": 132.56, "learning_rate": 2.324919093851133e-06, "loss": 0.0015, "step": 341350 }, { "epoch": 132.57, "learning_rate": 2.3244012944983823e-06, "loss": 0.0204, "step": 341360 }, { "epoch": 132.57, "learning_rate": 2.323883495145631e-06, "loss": 0.0029, "step": 341370 }, { "epoch": 132.57, "learning_rate": 2.3233656957928803e-06, "loss": 0.007, "step": 341380 }, { "epoch": 132.58, "learning_rate": 2.3228478964401295e-06, "loss": 0.0157, "step": 341390 }, { "epoch": 132.58, "learning_rate": 2.3223300970873786e-06, "loss": 0.0381, "step": 341400 }, { "epoch": 132.59, "learning_rate": 2.321812297734628e-06, "loss": 0.0182, "step": 341410 }, { "epoch": 132.59, "learning_rate": 2.321294498381877e-06, "loss": 0.0002, "step": 341420 }, { "epoch": 132.59, "learning_rate": 2.3207766990291262e-06, "loss": 0.1808, "step": 341430 }, { "epoch": 132.6, "learning_rate": 2.3202588996763754e-06, "loss": 0.0607, "step": 341440 }, { "epoch": 132.6, "learning_rate": 2.3197411003236246e-06, "loss": 0.0855, "step": 341450 }, { "epoch": 132.61, "learning_rate": 2.319223300970874e-06, "loss": 0.0539, "step": 341460 }, { "epoch": 132.61, "learning_rate": 2.318705501618123e-06, "loss": 0.0166, "step": 341470 }, { "epoch": 132.61, "learning_rate": 2.318187702265372e-06, "loss": 0.0206, "step": 341480 }, { "epoch": 132.62, "learning_rate": 2.3176699029126214e-06, "loss": 0.0002, "step": 341490 }, { "epoch": 132.62, "learning_rate": 2.3171521035598706e-06, "loss": 0.0275, "step": 341500 }, { "epoch": 132.63, "learning_rate": 2.3166343042071198e-06, "loss": 0.1134, "step": 341510 }, { "epoch": 132.63, "learning_rate": 2.316116504854369e-06, "loss": 0.1037, "step": 341520 }, { "epoch": 132.63, "learning_rate": 2.315598705501618e-06, "loss": 0.0552, "step": 341530 }, { "epoch": 132.64, "learning_rate": 2.3150809061488674e-06, "loss": 0.0548, "step": 341540 }, { "epoch": 132.64, "learning_rate": 2.3145631067961166e-06, "loss": 0.0787, "step": 341550 }, { "epoch": 132.64, "learning_rate": 2.3140453074433658e-06, "loss": 0.0408, "step": 341560 }, { "epoch": 132.65, "learning_rate": 2.313527508090615e-06, "loss": 0.0008, "step": 341570 }, { "epoch": 132.65, "learning_rate": 2.313009708737864e-06, "loss": 0.0303, "step": 341580 }, { "epoch": 132.66, "learning_rate": 2.3124919093851133e-06, "loss": 0.0009, "step": 341590 }, { "epoch": 132.66, "learning_rate": 2.3119741100323625e-06, "loss": 0.062, "step": 341600 }, { "epoch": 132.66, "learning_rate": 2.3114563106796117e-06, "loss": 0.0151, "step": 341610 }, { "epoch": 132.67, "learning_rate": 2.310938511326861e-06, "loss": 0.0168, "step": 341620 }, { "epoch": 132.67, "learning_rate": 2.31042071197411e-06, "loss": 0.0102, "step": 341630 }, { "epoch": 132.68, "learning_rate": 2.3099029126213593e-06, "loss": 0.0604, "step": 341640 }, { "epoch": 132.68, "learning_rate": 2.3093851132686085e-06, "loss": 0.0086, "step": 341650 }, { "epoch": 132.68, "learning_rate": 2.3088673139158577e-06, "loss": 0.0174, "step": 341660 }, { "epoch": 132.69, "learning_rate": 2.308349514563107e-06, "loss": 0.0188, "step": 341670 }, { "epoch": 132.69, "learning_rate": 2.307831715210356e-06, "loss": 0.0014, "step": 341680 }, { "epoch": 132.7, "learning_rate": 2.3073139158576057e-06, "loss": 0.0028, "step": 341690 }, { "epoch": 132.7, "learning_rate": 2.3067961165048545e-06, "loss": 0.025, "step": 341700 }, { "epoch": 132.7, "learning_rate": 2.3062783171521037e-06, "loss": 0.0637, "step": 341710 }, { "epoch": 132.71, "learning_rate": 2.305760517799353e-06, "loss": 0.0173, "step": 341720 }, { "epoch": 132.71, "learning_rate": 2.305242718446602e-06, "loss": 0.0102, "step": 341730 }, { "epoch": 132.71, "learning_rate": 2.3047249190938513e-06, "loss": 0.0927, "step": 341740 }, { "epoch": 132.72, "learning_rate": 2.3042071197411005e-06, "loss": 0.0576, "step": 341750 }, { "epoch": 132.72, "learning_rate": 2.3036893203883496e-06, "loss": 0.0098, "step": 341760 }, { "epoch": 132.73, "learning_rate": 2.303171521035599e-06, "loss": 0.01, "step": 341770 }, { "epoch": 132.73, "learning_rate": 2.302653721682848e-06, "loss": 0.0307, "step": 341780 }, { "epoch": 132.73, "learning_rate": 2.3021359223300972e-06, "loss": 0.0827, "step": 341790 }, { "epoch": 132.74, "learning_rate": 2.3016181229773464e-06, "loss": 0.0719, "step": 341800 }, { "epoch": 132.74, "learning_rate": 2.3011003236245956e-06, "loss": 0.0706, "step": 341810 }, { "epoch": 132.75, "learning_rate": 2.300582524271845e-06, "loss": 0.0528, "step": 341820 }, { "epoch": 132.75, "learning_rate": 2.300064724919094e-06, "loss": 0.0794, "step": 341830 }, { "epoch": 132.75, "learning_rate": 2.299546925566343e-06, "loss": 0.0377, "step": 341840 }, { "epoch": 132.76, "learning_rate": 2.2990291262135924e-06, "loss": 0.0726, "step": 341850 }, { "epoch": 132.76, "learning_rate": 2.2985113268608416e-06, "loss": 0.0111, "step": 341860 }, { "epoch": 132.77, "learning_rate": 2.297993527508091e-06, "loss": 0.0349, "step": 341870 }, { "epoch": 132.77, "learning_rate": 2.29747572815534e-06, "loss": 0.0249, "step": 341880 }, { "epoch": 132.77, "learning_rate": 2.296957928802589e-06, "loss": 0.0241, "step": 341890 }, { "epoch": 132.78, "learning_rate": 2.2964401294498384e-06, "loss": 0.0003, "step": 341900 }, { "epoch": 132.78, "learning_rate": 2.2959223300970876e-06, "loss": 0.0113, "step": 341910 }, { "epoch": 132.78, "learning_rate": 2.2954045307443368e-06, "loss": 0.0162, "step": 341920 }, { "epoch": 132.79, "learning_rate": 2.294886731391586e-06, "loss": 0.0322, "step": 341930 }, { "epoch": 132.79, "learning_rate": 2.294368932038835e-06, "loss": 0.0776, "step": 341940 }, { "epoch": 132.8, "learning_rate": 2.2938511326860843e-06, "loss": 0.0179, "step": 341950 }, { "epoch": 132.8, "learning_rate": 2.2933333333333335e-06, "loss": 0.0201, "step": 341960 }, { "epoch": 132.8, "learning_rate": 2.2928155339805827e-06, "loss": 0.0942, "step": 341970 }, { "epoch": 132.81, "learning_rate": 2.292297734627832e-06, "loss": 0.0383, "step": 341980 }, { "epoch": 132.81, "learning_rate": 2.291779935275081e-06, "loss": 0.0205, "step": 341990 }, { "epoch": 132.82, "learning_rate": 2.2912621359223303e-06, "loss": 0.0007, "step": 342000 }, { "epoch": 132.82, "learning_rate": 2.2907443365695795e-06, "loss": 0.0521, "step": 342010 }, { "epoch": 132.82, "learning_rate": 2.2902265372168287e-06, "loss": 0.001, "step": 342020 }, { "epoch": 132.83, "learning_rate": 2.289708737864078e-06, "loss": 0.0153, "step": 342030 }, { "epoch": 132.83, "learning_rate": 2.289190938511327e-06, "loss": 0.0873, "step": 342040 }, { "epoch": 132.83, "learning_rate": 2.2886731391585763e-06, "loss": 0.0119, "step": 342050 }, { "epoch": 132.84, "learning_rate": 2.2881553398058255e-06, "loss": 0.0006, "step": 342060 }, { "epoch": 132.84, "learning_rate": 2.2876375404530747e-06, "loss": 0.0002, "step": 342070 }, { "epoch": 132.85, "learning_rate": 2.287119741100324e-06, "loss": 0.0787, "step": 342080 }, { "epoch": 132.85, "learning_rate": 2.286601941747573e-06, "loss": 0.0345, "step": 342090 }, { "epoch": 132.85, "learning_rate": 2.2860841423948223e-06, "loss": 0.0206, "step": 342100 }, { "epoch": 132.86, "learning_rate": 2.2855663430420715e-06, "loss": 0.1171, "step": 342110 }, { "epoch": 132.86, "learning_rate": 2.2850485436893207e-06, "loss": 0.1066, "step": 342120 }, { "epoch": 132.87, "learning_rate": 2.28453074433657e-06, "loss": 0.0773, "step": 342130 }, { "epoch": 132.87, "learning_rate": 2.284012944983819e-06, "loss": 0.0014, "step": 342140 }, { "epoch": 132.87, "learning_rate": 2.2834951456310682e-06, "loss": 0.0536, "step": 342150 }, { "epoch": 132.88, "learning_rate": 2.2829773462783174e-06, "loss": 0.0604, "step": 342160 }, { "epoch": 132.88, "learning_rate": 2.2824595469255666e-06, "loss": 0.0102, "step": 342170 }, { "epoch": 132.89, "learning_rate": 2.281941747572816e-06, "loss": 0.1004, "step": 342180 }, { "epoch": 132.89, "learning_rate": 2.2814239482200646e-06, "loss": 0.0416, "step": 342190 }, { "epoch": 132.89, "learning_rate": 2.2809061488673138e-06, "loss": 0.0033, "step": 342200 }, { "epoch": 132.9, "learning_rate": 2.280388349514563e-06, "loss": 0.0341, "step": 342210 }, { "epoch": 132.9, "learning_rate": 2.279870550161812e-06, "loss": 0.0726, "step": 342220 }, { "epoch": 132.9, "learning_rate": 2.2793527508090614e-06, "loss": 0.0489, "step": 342230 }, { "epoch": 132.91, "learning_rate": 2.2788349514563106e-06, "loss": 0.0001, "step": 342240 }, { "epoch": 132.91, "learning_rate": 2.27831715210356e-06, "loss": 0.018, "step": 342250 }, { "epoch": 132.92, "learning_rate": 2.2777993527508094e-06, "loss": 0.055, "step": 342260 }, { "epoch": 132.92, "learning_rate": 2.2772815533980586e-06, "loss": 0.0085, "step": 342270 }, { "epoch": 132.92, "learning_rate": 2.2767637540453078e-06, "loss": 0.001, "step": 342280 }, { "epoch": 132.93, "learning_rate": 2.276245954692557e-06, "loss": 0.0002, "step": 342290 }, { "epoch": 132.93, "learning_rate": 2.275728155339806e-06, "loss": 0.0001, "step": 342300 }, { "epoch": 132.94, "learning_rate": 2.2752103559870553e-06, "loss": 0.0768, "step": 342310 }, { "epoch": 132.94, "learning_rate": 2.2746925566343045e-06, "loss": 0.0091, "step": 342320 }, { "epoch": 132.94, "learning_rate": 2.2741747572815537e-06, "loss": 0.0764, "step": 342330 }, { "epoch": 132.95, "learning_rate": 2.273656957928803e-06, "loss": 0.1267, "step": 342340 }, { "epoch": 132.95, "learning_rate": 2.273139158576052e-06, "loss": 0.0367, "step": 342350 }, { "epoch": 132.96, "learning_rate": 2.2726213592233013e-06, "loss": 0.0392, "step": 342360 }, { "epoch": 132.96, "learning_rate": 2.2721035598705505e-06, "loss": 0.0085, "step": 342370 }, { "epoch": 132.96, "learning_rate": 2.2715857605177997e-06, "loss": 0.0441, "step": 342380 }, { "epoch": 132.97, "learning_rate": 2.271067961165049e-06, "loss": 0.0056, "step": 342390 }, { "epoch": 132.97, "learning_rate": 2.270550161812298e-06, "loss": 0.0219, "step": 342400 }, { "epoch": 132.97, "learning_rate": 2.2700323624595473e-06, "loss": 0.0007, "step": 342410 }, { "epoch": 132.98, "learning_rate": 2.2695145631067965e-06, "loss": 0.0147, "step": 342420 }, { "epoch": 132.98, "learning_rate": 2.2689967637540457e-06, "loss": 0.0177, "step": 342430 }, { "epoch": 132.99, "learning_rate": 2.268478964401295e-06, "loss": 0.1327, "step": 342440 }, { "epoch": 132.99, "learning_rate": 2.267961165048544e-06, "loss": 0.011, "step": 342450 }, { "epoch": 132.99, "learning_rate": 2.2674433656957933e-06, "loss": 0.0011, "step": 342460 }, { "epoch": 133.0, "learning_rate": 2.2669255663430425e-06, "loss": 0.0094, "step": 342470 }, { "epoch": 133.0, "eval_accuracy": 0.9513067400275104, "eval_loss": 0.3967961072921753, "eval_runtime": 8.2847, "eval_samples_per_second": 438.76, "eval_steps_per_second": 54.92, "step": 342475 }, { "epoch": 133.0, "learning_rate": 2.2664077669902917e-06, "loss": 0.0257, "step": 342480 }, { "epoch": 133.01, "learning_rate": 2.265889967637541e-06, "loss": 0.0004, "step": 342490 }, { "epoch": 133.01, "learning_rate": 2.26537216828479e-06, "loss": 0.0972, "step": 342500 }, { "epoch": 133.01, "learning_rate": 2.264854368932039e-06, "loss": 0.0001, "step": 342510 }, { "epoch": 133.02, "learning_rate": 2.264336569579288e-06, "loss": 0.0053, "step": 342520 }, { "epoch": 133.02, "learning_rate": 2.263818770226537e-06, "loss": 0.0192, "step": 342530 }, { "epoch": 133.03, "learning_rate": 2.2633009708737864e-06, "loss": 0.0438, "step": 342540 }, { "epoch": 133.03, "learning_rate": 2.2627831715210356e-06, "loss": 0.068, "step": 342550 }, { "epoch": 133.03, "learning_rate": 2.2622653721682848e-06, "loss": 0.1022, "step": 342560 }, { "epoch": 133.04, "learning_rate": 2.261747572815534e-06, "loss": 0.0321, "step": 342570 }, { "epoch": 133.04, "learning_rate": 2.261229773462783e-06, "loss": 0.1201, "step": 342580 }, { "epoch": 133.04, "learning_rate": 2.2607119741100324e-06, "loss": 0.0472, "step": 342590 }, { "epoch": 133.05, "learning_rate": 2.2601941747572816e-06, "loss": 0.0723, "step": 342600 }, { "epoch": 133.05, "learning_rate": 2.2596763754045308e-06, "loss": 0.0667, "step": 342610 }, { "epoch": 133.06, "learning_rate": 2.25915857605178e-06, "loss": 0.061, "step": 342620 }, { "epoch": 133.06, "learning_rate": 2.258640776699029e-06, "loss": 0.0139, "step": 342630 }, { "epoch": 133.06, "learning_rate": 2.2581229773462783e-06, "loss": 0.0233, "step": 342640 }, { "epoch": 133.07, "learning_rate": 2.2576051779935275e-06, "loss": 0.062, "step": 342650 }, { "epoch": 133.07, "learning_rate": 2.2570873786407767e-06, "loss": 0.0049, "step": 342660 }, { "epoch": 133.08, "learning_rate": 2.256569579288026e-06, "loss": 0.0612, "step": 342670 }, { "epoch": 133.08, "learning_rate": 2.256051779935275e-06, "loss": 0.0785, "step": 342680 }, { "epoch": 133.08, "learning_rate": 2.2555339805825243e-06, "loss": 0.0529, "step": 342690 }, { "epoch": 133.09, "learning_rate": 2.2550161812297735e-06, "loss": 0.0183, "step": 342700 }, { "epoch": 133.09, "learning_rate": 2.2544983818770227e-06, "loss": 0.0563, "step": 342710 }, { "epoch": 133.1, "learning_rate": 2.253980582524272e-06, "loss": 0.0327, "step": 342720 }, { "epoch": 133.1, "learning_rate": 2.253462783171521e-06, "loss": 0.0003, "step": 342730 }, { "epoch": 133.1, "learning_rate": 2.2529449838187703e-06, "loss": 0.0172, "step": 342740 }, { "epoch": 133.11, "learning_rate": 2.2524271844660195e-06, "loss": 0.1251, "step": 342750 }, { "epoch": 133.11, "learning_rate": 2.2519093851132687e-06, "loss": 0.1382, "step": 342760 }, { "epoch": 133.11, "learning_rate": 2.251391585760518e-06, "loss": 0.1047, "step": 342770 }, { "epoch": 133.12, "learning_rate": 2.250873786407767e-06, "loss": 0.0169, "step": 342780 }, { "epoch": 133.12, "learning_rate": 2.2503559870550163e-06, "loss": 0.1114, "step": 342790 }, { "epoch": 133.13, "learning_rate": 2.2498381877022655e-06, "loss": 0.0519, "step": 342800 }, { "epoch": 133.13, "learning_rate": 2.249320388349515e-06, "loss": 0.0412, "step": 342810 }, { "epoch": 133.13, "learning_rate": 2.2488025889967643e-06, "loss": 0.1149, "step": 342820 }, { "epoch": 133.14, "learning_rate": 2.2482847896440135e-06, "loss": 0.0096, "step": 342830 }, { "epoch": 133.14, "learning_rate": 2.2477669902912622e-06, "loss": 0.061, "step": 342840 }, { "epoch": 133.15, "learning_rate": 2.2472491909385114e-06, "loss": 0.1242, "step": 342850 }, { "epoch": 133.15, "learning_rate": 2.2467313915857606e-06, "loss": 0.015, "step": 342860 }, { "epoch": 133.15, "learning_rate": 2.24621359223301e-06, "loss": 0.0006, "step": 342870 }, { "epoch": 133.16, "learning_rate": 2.245695792880259e-06, "loss": 0.0009, "step": 342880 }, { "epoch": 133.16, "learning_rate": 2.245177993527508e-06, "loss": 0.0706, "step": 342890 }, { "epoch": 133.17, "learning_rate": 2.2446601941747574e-06, "loss": 0.0005, "step": 342900 }, { "epoch": 133.17, "learning_rate": 2.2441423948220066e-06, "loss": 0.0264, "step": 342910 }, { "epoch": 133.17, "learning_rate": 2.243624595469256e-06, "loss": 0.0226, "step": 342920 }, { "epoch": 133.18, "learning_rate": 2.243106796116505e-06, "loss": 0.0252, "step": 342930 }, { "epoch": 133.18, "learning_rate": 2.242588996763754e-06, "loss": 0.0387, "step": 342940 }, { "epoch": 133.18, "learning_rate": 2.2420711974110034e-06, "loss": 0.1469, "step": 342950 }, { "epoch": 133.19, "learning_rate": 2.2415533980582526e-06, "loss": 0.0121, "step": 342960 }, { "epoch": 133.19, "learning_rate": 2.2410355987055018e-06, "loss": 0.0447, "step": 342970 }, { "epoch": 133.2, "learning_rate": 2.240517799352751e-06, "loss": 0.0383, "step": 342980 }, { "epoch": 133.2, "learning_rate": 2.24e-06, "loss": 0.0571, "step": 342990 }, { "epoch": 133.2, "learning_rate": 2.2394822006472493e-06, "loss": 0.0765, "step": 343000 }, { "epoch": 133.21, "learning_rate": 2.2389644012944985e-06, "loss": 0.0268, "step": 343010 }, { "epoch": 133.21, "learning_rate": 2.2384466019417477e-06, "loss": 0.0772, "step": 343020 }, { "epoch": 133.22, "learning_rate": 2.237928802588997e-06, "loss": 0.0008, "step": 343030 }, { "epoch": 133.22, "learning_rate": 2.237411003236246e-06, "loss": 0.0126, "step": 343040 }, { "epoch": 133.22, "learning_rate": 2.2368932038834953e-06, "loss": 0.0358, "step": 343050 }, { "epoch": 133.23, "learning_rate": 2.2363754045307445e-06, "loss": 0.0002, "step": 343060 }, { "epoch": 133.23, "learning_rate": 2.2358576051779937e-06, "loss": 0.0265, "step": 343070 }, { "epoch": 133.23, "learning_rate": 2.235339805825243e-06, "loss": 0.0189, "step": 343080 }, { "epoch": 133.24, "learning_rate": 2.234822006472492e-06, "loss": 0.0001, "step": 343090 }, { "epoch": 133.24, "learning_rate": 2.2343042071197413e-06, "loss": 0.0327, "step": 343100 }, { "epoch": 133.25, "learning_rate": 2.2337864077669905e-06, "loss": 0.1103, "step": 343110 }, { "epoch": 133.25, "learning_rate": 2.2332686084142397e-06, "loss": 0.0003, "step": 343120 }, { "epoch": 133.25, "learning_rate": 2.232750809061489e-06, "loss": 0.018, "step": 343130 }, { "epoch": 133.26, "learning_rate": 2.232233009708738e-06, "loss": 0.1129, "step": 343140 }, { "epoch": 133.26, "learning_rate": 2.2317152103559873e-06, "loss": 0.008, "step": 343150 }, { "epoch": 133.27, "learning_rate": 2.2311974110032365e-06, "loss": 0.0293, "step": 343160 }, { "epoch": 133.27, "learning_rate": 2.2306796116504857e-06, "loss": 0.0135, "step": 343170 }, { "epoch": 133.27, "learning_rate": 2.230161812297735e-06, "loss": 0.0001, "step": 343180 }, { "epoch": 133.28, "learning_rate": 2.229644012944984e-06, "loss": 0.1644, "step": 343190 }, { "epoch": 133.28, "learning_rate": 2.2291262135922332e-06, "loss": 0.0147, "step": 343200 }, { "epoch": 133.29, "learning_rate": 2.2286084142394824e-06, "loss": 0.0524, "step": 343210 }, { "epoch": 133.29, "learning_rate": 2.2280906148867316e-06, "loss": 0.0205, "step": 343220 }, { "epoch": 133.29, "learning_rate": 2.227572815533981e-06, "loss": 0.0053, "step": 343230 }, { "epoch": 133.3, "learning_rate": 2.22705501618123e-06, "loss": 0.0118, "step": 343240 }, { "epoch": 133.3, "learning_rate": 2.226537216828479e-06, "loss": 0.0732, "step": 343250 }, { "epoch": 133.3, "learning_rate": 2.2260194174757284e-06, "loss": 0.0483, "step": 343260 }, { "epoch": 133.31, "learning_rate": 2.2255016181229776e-06, "loss": 0.0208, "step": 343270 }, { "epoch": 133.31, "learning_rate": 2.224983818770227e-06, "loss": 0.0395, "step": 343280 }, { "epoch": 133.32, "learning_rate": 2.224466019417476e-06, "loss": 0.0051, "step": 343290 }, { "epoch": 133.32, "learning_rate": 2.223948220064725e-06, "loss": 0.042, "step": 343300 }, { "epoch": 133.32, "learning_rate": 2.2234304207119744e-06, "loss": 0.0035, "step": 343310 }, { "epoch": 133.33, "learning_rate": 2.2229126213592236e-06, "loss": 0.0688, "step": 343320 }, { "epoch": 133.33, "learning_rate": 2.2223948220064723e-06, "loss": 0.0383, "step": 343330 }, { "epoch": 133.34, "learning_rate": 2.2218770226537215e-06, "loss": 0.0009, "step": 343340 }, { "epoch": 133.34, "learning_rate": 2.2213592233009707e-06, "loss": 0.0399, "step": 343350 }, { "epoch": 133.34, "learning_rate": 2.22084142394822e-06, "loss": 0.0201, "step": 343360 }, { "epoch": 133.35, "learning_rate": 2.2203236245954695e-06, "loss": 0.0263, "step": 343370 }, { "epoch": 133.35, "learning_rate": 2.2198058252427187e-06, "loss": 0.0001, "step": 343380 }, { "epoch": 133.36, "learning_rate": 2.219288025889968e-06, "loss": 0.0818, "step": 343390 }, { "epoch": 133.36, "learning_rate": 2.218770226537217e-06, "loss": 0.092, "step": 343400 }, { "epoch": 133.36, "learning_rate": 2.2182524271844663e-06, "loss": 0.066, "step": 343410 }, { "epoch": 133.37, "learning_rate": 2.2177346278317155e-06, "loss": 0.0272, "step": 343420 }, { "epoch": 133.37, "learning_rate": 2.2172168284789647e-06, "loss": 0.0262, "step": 343430 }, { "epoch": 133.37, "learning_rate": 2.216699029126214e-06, "loss": 0.0089, "step": 343440 }, { "epoch": 133.38, "learning_rate": 2.216181229773463e-06, "loss": 0.0798, "step": 343450 }, { "epoch": 133.38, "learning_rate": 2.2156634304207123e-06, "loss": 0.0109, "step": 343460 }, { "epoch": 133.39, "learning_rate": 2.2151456310679615e-06, "loss": 0.0171, "step": 343470 }, { "epoch": 133.39, "learning_rate": 2.2146278317152107e-06, "loss": 0.0864, "step": 343480 }, { "epoch": 133.39, "learning_rate": 2.21411003236246e-06, "loss": 0.0095, "step": 343490 }, { "epoch": 133.4, "learning_rate": 2.213592233009709e-06, "loss": 0.0826, "step": 343500 }, { "epoch": 133.4, "learning_rate": 2.2130744336569583e-06, "loss": 0.0456, "step": 343510 }, { "epoch": 133.41, "learning_rate": 2.2125566343042075e-06, "loss": 0.0354, "step": 343520 }, { "epoch": 133.41, "learning_rate": 2.2120388349514567e-06, "loss": 0.1422, "step": 343530 }, { "epoch": 133.41, "learning_rate": 2.211521035598706e-06, "loss": 0.0278, "step": 343540 }, { "epoch": 133.42, "learning_rate": 2.211003236245955e-06, "loss": 0.0372, "step": 343550 }, { "epoch": 133.42, "learning_rate": 2.2104854368932042e-06, "loss": 0.0487, "step": 343560 }, { "epoch": 133.43, "learning_rate": 2.2099676375404534e-06, "loss": 0.0161, "step": 343570 }, { "epoch": 133.43, "learning_rate": 2.2094498381877026e-06, "loss": 0.2329, "step": 343580 }, { "epoch": 133.43, "learning_rate": 2.208932038834952e-06, "loss": 0.0107, "step": 343590 }, { "epoch": 133.44, "learning_rate": 2.208414239482201e-06, "loss": 0.0172, "step": 343600 }, { "epoch": 133.44, "learning_rate": 2.2078964401294502e-06, "loss": 0.0088, "step": 343610 }, { "epoch": 133.44, "learning_rate": 2.2073786407766994e-06, "loss": 0.0018, "step": 343620 }, { "epoch": 133.45, "learning_rate": 2.2068608414239486e-06, "loss": 0.1306, "step": 343630 }, { "epoch": 133.45, "learning_rate": 2.206343042071198e-06, "loss": 0.0721, "step": 343640 }, { "epoch": 133.46, "learning_rate": 2.205825242718447e-06, "loss": 0.0191, "step": 343650 }, { "epoch": 133.46, "learning_rate": 2.2053074433656958e-06, "loss": 0.0717, "step": 343660 }, { "epoch": 133.46, "learning_rate": 2.204789644012945e-06, "loss": 0.1604, "step": 343670 }, { "epoch": 133.47, "learning_rate": 2.204271844660194e-06, "loss": 0.1256, "step": 343680 }, { "epoch": 133.47, "learning_rate": 2.2037540453074433e-06, "loss": 0.035, "step": 343690 }, { "epoch": 133.48, "learning_rate": 2.2032362459546925e-06, "loss": 0.0129, "step": 343700 }, { "epoch": 133.48, "learning_rate": 2.2027184466019417e-06, "loss": 0.0441, "step": 343710 }, { "epoch": 133.48, "learning_rate": 2.202200647249191e-06, "loss": 0.0485, "step": 343720 }, { "epoch": 133.49, "learning_rate": 2.20168284789644e-06, "loss": 0.003, "step": 343730 }, { "epoch": 133.49, "learning_rate": 2.2011650485436893e-06, "loss": 0.059, "step": 343740 }, { "epoch": 133.5, "learning_rate": 2.2006472491909385e-06, "loss": 0.0404, "step": 343750 }, { "epoch": 133.5, "learning_rate": 2.2001294498381877e-06, "loss": 0.0699, "step": 343760 }, { "epoch": 133.5, "learning_rate": 2.199611650485437e-06, "loss": 0.0094, "step": 343770 }, { "epoch": 133.51, "learning_rate": 2.199093851132686e-06, "loss": 0.0039, "step": 343780 }, { "epoch": 133.51, "learning_rate": 2.1985760517799353e-06, "loss": 0.0911, "step": 343790 }, { "epoch": 133.51, "learning_rate": 2.1980582524271845e-06, "loss": 0.033, "step": 343800 }, { "epoch": 133.52, "learning_rate": 2.1975404530744337e-06, "loss": 0.1559, "step": 343810 }, { "epoch": 133.52, "learning_rate": 2.197022653721683e-06, "loss": 0.0193, "step": 343820 }, { "epoch": 133.53, "learning_rate": 2.196504854368932e-06, "loss": 0.0001, "step": 343830 }, { "epoch": 133.53, "learning_rate": 2.1959870550161813e-06, "loss": 0.0154, "step": 343840 }, { "epoch": 133.53, "learning_rate": 2.1954692556634305e-06, "loss": 0.0005, "step": 343850 }, { "epoch": 133.54, "learning_rate": 2.1949514563106797e-06, "loss": 0.0017, "step": 343860 }, { "epoch": 133.54, "learning_rate": 2.194433656957929e-06, "loss": 0.036, "step": 343870 }, { "epoch": 133.55, "learning_rate": 2.193915857605178e-06, "loss": 0.0451, "step": 343880 }, { "epoch": 133.55, "learning_rate": 2.1933980582524272e-06, "loss": 0.0348, "step": 343890 }, { "epoch": 133.55, "learning_rate": 2.1928802588996764e-06, "loss": 0.0675, "step": 343900 }, { "epoch": 133.56, "learning_rate": 2.1923624595469256e-06, "loss": 0.0004, "step": 343910 }, { "epoch": 133.56, "learning_rate": 2.191844660194175e-06, "loss": 0.0343, "step": 343920 }, { "epoch": 133.57, "learning_rate": 2.1913268608414244e-06, "loss": 0.0074, "step": 343930 }, { "epoch": 133.57, "learning_rate": 2.1908090614886736e-06, "loss": 0.1216, "step": 343940 }, { "epoch": 133.57, "learning_rate": 2.190291262135923e-06, "loss": 0.0001, "step": 343950 }, { "epoch": 133.58, "learning_rate": 2.189773462783172e-06, "loss": 0.0086, "step": 343960 }, { "epoch": 133.58, "learning_rate": 2.1892556634304212e-06, "loss": 0.0495, "step": 343970 }, { "epoch": 133.58, "learning_rate": 2.18873786407767e-06, "loss": 0.0014, "step": 343980 }, { "epoch": 133.59, "learning_rate": 2.188220064724919e-06, "loss": 0.0153, "step": 343990 }, { "epoch": 133.59, "learning_rate": 2.1877022653721684e-06, "loss": 0.0292, "step": 344000 }, { "epoch": 133.6, "learning_rate": 2.1871844660194176e-06, "loss": 0.0409, "step": 344010 }, { "epoch": 133.6, "learning_rate": 2.1866666666666668e-06, "loss": 0.0459, "step": 344020 }, { "epoch": 133.6, "learning_rate": 2.186148867313916e-06, "loss": 0.0671, "step": 344030 }, { "epoch": 133.61, "learning_rate": 2.185631067961165e-06, "loss": 0.0057, "step": 344040 }, { "epoch": 133.61, "learning_rate": 2.1851132686084143e-06, "loss": 0.0252, "step": 344050 }, { "epoch": 133.62, "learning_rate": 2.1845954692556635e-06, "loss": 0.0001, "step": 344060 }, { "epoch": 133.62, "learning_rate": 2.1840776699029127e-06, "loss": 0.0795, "step": 344070 }, { "epoch": 133.62, "learning_rate": 2.183559870550162e-06, "loss": 0.1801, "step": 344080 }, { "epoch": 133.63, "learning_rate": 2.183042071197411e-06, "loss": 0.0179, "step": 344090 }, { "epoch": 133.63, "learning_rate": 2.1825242718446603e-06, "loss": 0.0997, "step": 344100 }, { "epoch": 133.63, "learning_rate": 2.1820064724919095e-06, "loss": 0.0268, "step": 344110 }, { "epoch": 133.64, "learning_rate": 2.1814886731391587e-06, "loss": 0.0799, "step": 344120 }, { "epoch": 133.64, "learning_rate": 2.180970873786408e-06, "loss": 0.0697, "step": 344130 }, { "epoch": 133.65, "learning_rate": 2.180453074433657e-06, "loss": 0.0303, "step": 344140 }, { "epoch": 133.65, "learning_rate": 2.1799352750809063e-06, "loss": 0.0664, "step": 344150 }, { "epoch": 133.65, "learning_rate": 2.1794174757281555e-06, "loss": 0.0254, "step": 344160 }, { "epoch": 133.66, "learning_rate": 2.1788996763754047e-06, "loss": 0.1343, "step": 344170 }, { "epoch": 133.66, "learning_rate": 2.178381877022654e-06, "loss": 0.0141, "step": 344180 }, { "epoch": 133.67, "learning_rate": 2.177864077669903e-06, "loss": 0.0033, "step": 344190 }, { "epoch": 133.67, "learning_rate": 2.1773462783171523e-06, "loss": 0.0647, "step": 344200 }, { "epoch": 133.67, "learning_rate": 2.1768284789644015e-06, "loss": 0.0756, "step": 344210 }, { "epoch": 133.68, "learning_rate": 2.1763106796116507e-06, "loss": 0.0087, "step": 344220 }, { "epoch": 133.68, "learning_rate": 2.1757928802589e-06, "loss": 0.0508, "step": 344230 }, { "epoch": 133.69, "learning_rate": 2.175275080906149e-06, "loss": 0.0316, "step": 344240 }, { "epoch": 133.69, "learning_rate": 2.1747572815533982e-06, "loss": 0.0666, "step": 344250 }, { "epoch": 133.69, "learning_rate": 2.1742394822006474e-06, "loss": 0.0282, "step": 344260 }, { "epoch": 133.7, "learning_rate": 2.1737216828478966e-06, "loss": 0.0004, "step": 344270 }, { "epoch": 133.7, "learning_rate": 2.173203883495146e-06, "loss": 0.0186, "step": 344280 }, { "epoch": 133.7, "learning_rate": 2.172686084142395e-06, "loss": 0.0006, "step": 344290 }, { "epoch": 133.71, "learning_rate": 2.1721682847896442e-06, "loss": 0.0757, "step": 344300 }, { "epoch": 133.71, "learning_rate": 2.1716504854368934e-06, "loss": 0.0004, "step": 344310 }, { "epoch": 133.72, "learning_rate": 2.1711326860841426e-06, "loss": 0.0249, "step": 344320 }, { "epoch": 133.72, "learning_rate": 2.170614886731392e-06, "loss": 0.0001, "step": 344330 }, { "epoch": 133.72, "learning_rate": 2.170097087378641e-06, "loss": 0.0833, "step": 344340 }, { "epoch": 133.73, "learning_rate": 2.16957928802589e-06, "loss": 0.0127, "step": 344350 }, { "epoch": 133.73, "learning_rate": 2.1690614886731394e-06, "loss": 0.0167, "step": 344360 }, { "epoch": 133.74, "learning_rate": 2.1685436893203886e-06, "loss": 0.0086, "step": 344370 }, { "epoch": 133.74, "learning_rate": 2.1680258899676378e-06, "loss": 0.0008, "step": 344380 }, { "epoch": 133.74, "learning_rate": 2.167508090614887e-06, "loss": 0.0369, "step": 344390 }, { "epoch": 133.75, "learning_rate": 2.166990291262136e-06, "loss": 0.0772, "step": 344400 }, { "epoch": 133.75, "learning_rate": 2.1664724919093854e-06, "loss": 0.0686, "step": 344410 }, { "epoch": 133.76, "learning_rate": 2.1659546925566345e-06, "loss": 0.0624, "step": 344420 }, { "epoch": 133.76, "learning_rate": 2.1654368932038837e-06, "loss": 0.0072, "step": 344430 }, { "epoch": 133.76, "learning_rate": 2.164919093851133e-06, "loss": 0.0147, "step": 344440 }, { "epoch": 133.77, "learning_rate": 2.164401294498382e-06, "loss": 0.0555, "step": 344450 }, { "epoch": 133.77, "learning_rate": 2.1638834951456313e-06, "loss": 0.0009, "step": 344460 }, { "epoch": 133.77, "learning_rate": 2.16336569579288e-06, "loss": 0.0273, "step": 344470 }, { "epoch": 133.78, "learning_rate": 2.1628478964401293e-06, "loss": 0.0382, "step": 344480 }, { "epoch": 133.78, "learning_rate": 2.162330097087379e-06, "loss": 0.0438, "step": 344490 }, { "epoch": 133.79, "learning_rate": 2.161812297734628e-06, "loss": 0.0366, "step": 344500 }, { "epoch": 133.79, "learning_rate": 2.1612944983818773e-06, "loss": 0.0703, "step": 344510 }, { "epoch": 133.79, "learning_rate": 2.1607766990291265e-06, "loss": 0.0253, "step": 344520 }, { "epoch": 133.8, "learning_rate": 2.1602588996763757e-06, "loss": 0.0643, "step": 344530 }, { "epoch": 133.8, "learning_rate": 2.159741100323625e-06, "loss": 0.0909, "step": 344540 }, { "epoch": 133.81, "learning_rate": 2.159223300970874e-06, "loss": 0.0468, "step": 344550 }, { "epoch": 133.81, "learning_rate": 2.1587055016181233e-06, "loss": 0.118, "step": 344560 }, { "epoch": 133.81, "learning_rate": 2.1581877022653725e-06, "loss": 0.0001, "step": 344570 }, { "epoch": 133.82, "learning_rate": 2.1576699029126217e-06, "loss": 0.0004, "step": 344580 }, { "epoch": 133.82, "learning_rate": 2.157152103559871e-06, "loss": 0.0005, "step": 344590 }, { "epoch": 133.83, "learning_rate": 2.15663430420712e-06, "loss": 0.0941, "step": 344600 }, { "epoch": 133.83, "learning_rate": 2.1561165048543692e-06, "loss": 0.0844, "step": 344610 }, { "epoch": 133.83, "learning_rate": 2.1555987055016184e-06, "loss": 0.0857, "step": 344620 }, { "epoch": 133.84, "learning_rate": 2.1550809061488676e-06, "loss": 0.051, "step": 344630 }, { "epoch": 133.84, "learning_rate": 2.154563106796117e-06, "loss": 0.0304, "step": 344640 }, { "epoch": 133.84, "learning_rate": 2.154045307443366e-06, "loss": 0.0001, "step": 344650 }, { "epoch": 133.85, "learning_rate": 2.1535275080906152e-06, "loss": 0.036, "step": 344660 }, { "epoch": 133.85, "learning_rate": 2.1530097087378644e-06, "loss": 0.0616, "step": 344670 }, { "epoch": 133.86, "learning_rate": 2.1524919093851136e-06, "loss": 0.0126, "step": 344680 }, { "epoch": 133.86, "learning_rate": 2.151974110032363e-06, "loss": 0.0887, "step": 344690 }, { "epoch": 133.86, "learning_rate": 2.151456310679612e-06, "loss": 0.0332, "step": 344700 }, { "epoch": 133.87, "learning_rate": 2.150938511326861e-06, "loss": 0.0006, "step": 344710 }, { "epoch": 133.87, "learning_rate": 2.1504207119741104e-06, "loss": 0.0001, "step": 344720 }, { "epoch": 133.88, "learning_rate": 2.1499029126213596e-06, "loss": 0.0279, "step": 344730 }, { "epoch": 133.88, "learning_rate": 2.1493851132686088e-06, "loss": 0.05, "step": 344740 }, { "epoch": 133.88, "learning_rate": 2.148867313915858e-06, "loss": 0.1245, "step": 344750 }, { "epoch": 133.89, "learning_rate": 2.148349514563107e-06, "loss": 0.0454, "step": 344760 }, { "epoch": 133.89, "learning_rate": 2.1478317152103564e-06, "loss": 0.2008, "step": 344770 }, { "epoch": 133.9, "learning_rate": 2.1473139158576055e-06, "loss": 0.0164, "step": 344780 }, { "epoch": 133.9, "learning_rate": 2.1467961165048547e-06, "loss": 0.0645, "step": 344790 }, { "epoch": 133.9, "learning_rate": 2.1462783171521035e-06, "loss": 0.0128, "step": 344800 }, { "epoch": 133.91, "learning_rate": 2.1457605177993527e-06, "loss": 0.0311, "step": 344810 }, { "epoch": 133.91, "learning_rate": 2.145242718446602e-06, "loss": 0.0745, "step": 344820 }, { "epoch": 133.91, "learning_rate": 2.144724919093851e-06, "loss": 0.0093, "step": 344830 }, { "epoch": 133.92, "learning_rate": 2.1442071197411003e-06, "loss": 0.0634, "step": 344840 }, { "epoch": 133.92, "learning_rate": 2.1436893203883495e-06, "loss": 0.1034, "step": 344850 }, { "epoch": 133.93, "learning_rate": 2.1431715210355987e-06, "loss": 0.1079, "step": 344860 }, { "epoch": 133.93, "learning_rate": 2.142653721682848e-06, "loss": 0.0143, "step": 344870 }, { "epoch": 133.93, "learning_rate": 2.142135922330097e-06, "loss": 0.0176, "step": 344880 }, { "epoch": 133.94, "learning_rate": 2.1416181229773463e-06, "loss": 0.0005, "step": 344890 }, { "epoch": 133.94, "learning_rate": 2.1411003236245955e-06, "loss": 0.0286, "step": 344900 }, { "epoch": 133.95, "learning_rate": 2.1405825242718447e-06, "loss": 0.0662, "step": 344910 }, { "epoch": 133.95, "learning_rate": 2.140064724919094e-06, "loss": 0.0322, "step": 344920 }, { "epoch": 133.95, "learning_rate": 2.139546925566343e-06, "loss": 0.0759, "step": 344930 }, { "epoch": 133.96, "learning_rate": 2.1390291262135922e-06, "loss": 0.0002, "step": 344940 }, { "epoch": 133.96, "learning_rate": 2.1385113268608414e-06, "loss": 0.0484, "step": 344950 }, { "epoch": 133.97, "learning_rate": 2.1379935275080906e-06, "loss": 0.0027, "step": 344960 }, { "epoch": 133.97, "learning_rate": 2.13747572815534e-06, "loss": 0.0434, "step": 344970 }, { "epoch": 133.97, "learning_rate": 2.136957928802589e-06, "loss": 0.0495, "step": 344980 }, { "epoch": 133.98, "learning_rate": 2.136440129449838e-06, "loss": 0.0251, "step": 344990 }, { "epoch": 133.98, "learning_rate": 2.1359223300970874e-06, "loss": 0.0093, "step": 345000 }, { "epoch": 133.98, "learning_rate": 2.1354045307443366e-06, "loss": 0.0534, "step": 345010 }, { "epoch": 133.99, "learning_rate": 2.134886731391586e-06, "loss": 0.0067, "step": 345020 }, { "epoch": 133.99, "learning_rate": 2.134368932038835e-06, "loss": 0.0077, "step": 345030 }, { "epoch": 134.0, "learning_rate": 2.133851132686084e-06, "loss": 0.0521, "step": 345040 }, { "epoch": 134.0, "learning_rate": 2.133333333333334e-06, "loss": 0.0784, "step": 345050 }, { "epoch": 134.0, "eval_accuracy": 0.953232462173315, "eval_loss": 0.38705921173095703, "eval_runtime": 8.2715, "eval_samples_per_second": 439.46, "eval_steps_per_second": 55.008, "step": 345050 }, { "epoch": 134.0, "learning_rate": 2.132815533980583e-06, "loss": 0.001, "step": 345060 }, { "epoch": 134.01, "learning_rate": 2.132297734627832e-06, "loss": 0.0105, "step": 345070 }, { "epoch": 134.01, "learning_rate": 2.1317799352750814e-06, "loss": 0.0093, "step": 345080 }, { "epoch": 134.02, "learning_rate": 2.1312621359223306e-06, "loss": 0.0189, "step": 345090 }, { "epoch": 134.02, "learning_rate": 2.1307443365695798e-06, "loss": 0.0509, "step": 345100 }, { "epoch": 134.02, "learning_rate": 2.130226537216829e-06, "loss": 0.0418, "step": 345110 }, { "epoch": 134.03, "learning_rate": 2.1297087378640777e-06, "loss": 0.0662, "step": 345120 }, { "epoch": 134.03, "learning_rate": 2.129190938511327e-06, "loss": 0.0294, "step": 345130 }, { "epoch": 134.03, "learning_rate": 2.128673139158576e-06, "loss": 0.0208, "step": 345140 }, { "epoch": 134.04, "learning_rate": 2.1281553398058253e-06, "loss": 0.0701, "step": 345150 }, { "epoch": 134.04, "learning_rate": 2.1276375404530745e-06, "loss": 0.0538, "step": 345160 }, { "epoch": 134.05, "learning_rate": 2.1271197411003237e-06, "loss": 0.0096, "step": 345170 }, { "epoch": 134.05, "learning_rate": 2.126601941747573e-06, "loss": 0.1019, "step": 345180 }, { "epoch": 134.05, "learning_rate": 2.126084142394822e-06, "loss": 0.0009, "step": 345190 }, { "epoch": 134.06, "learning_rate": 2.1255663430420713e-06, "loss": 0.0252, "step": 345200 }, { "epoch": 134.06, "learning_rate": 2.1250485436893205e-06, "loss": 0.0336, "step": 345210 }, { "epoch": 134.07, "learning_rate": 2.1245307443365697e-06, "loss": 0.0021, "step": 345220 }, { "epoch": 134.07, "learning_rate": 2.124012944983819e-06, "loss": 0.0965, "step": 345230 }, { "epoch": 134.07, "learning_rate": 2.123495145631068e-06, "loss": 0.0106, "step": 345240 }, { "epoch": 134.08, "learning_rate": 2.1229773462783173e-06, "loss": 0.0348, "step": 345250 }, { "epoch": 134.08, "learning_rate": 2.1224595469255665e-06, "loss": 0.0821, "step": 345260 }, { "epoch": 134.09, "learning_rate": 2.1219417475728157e-06, "loss": 0.051, "step": 345270 }, { "epoch": 134.09, "learning_rate": 2.121423948220065e-06, "loss": 0.0276, "step": 345280 }, { "epoch": 134.09, "learning_rate": 2.120906148867314e-06, "loss": 0.0128, "step": 345290 }, { "epoch": 134.1, "learning_rate": 2.1203883495145632e-06, "loss": 0.0332, "step": 345300 }, { "epoch": 134.1, "learning_rate": 2.1198705501618124e-06, "loss": 0.1638, "step": 345310 }, { "epoch": 134.1, "learning_rate": 2.1193527508090616e-06, "loss": 0.2331, "step": 345320 }, { "epoch": 134.11, "learning_rate": 2.118834951456311e-06, "loss": 0.0499, "step": 345330 }, { "epoch": 134.11, "learning_rate": 2.11831715210356e-06, "loss": 0.0654, "step": 345340 }, { "epoch": 134.12, "learning_rate": 2.1177993527508092e-06, "loss": 0.0008, "step": 345350 }, { "epoch": 134.12, "learning_rate": 2.1172815533980584e-06, "loss": 0.0317, "step": 345360 }, { "epoch": 134.12, "learning_rate": 2.1167637540453076e-06, "loss": 0.1292, "step": 345370 }, { "epoch": 134.13, "learning_rate": 2.116245954692557e-06, "loss": 0.0019, "step": 345380 }, { "epoch": 134.13, "learning_rate": 2.115728155339806e-06, "loss": 0.0325, "step": 345390 }, { "epoch": 134.14, "learning_rate": 2.115210355987055e-06, "loss": 0.0933, "step": 345400 }, { "epoch": 134.14, "learning_rate": 2.1146925566343044e-06, "loss": 0.0025, "step": 345410 }, { "epoch": 134.14, "learning_rate": 2.1141747572815536e-06, "loss": 0.0102, "step": 345420 }, { "epoch": 134.15, "learning_rate": 2.1136569579288028e-06, "loss": 0.0241, "step": 345430 }, { "epoch": 134.15, "learning_rate": 2.113139158576052e-06, "loss": 0.0534, "step": 345440 }, { "epoch": 134.16, "learning_rate": 2.112621359223301e-06, "loss": 0.077, "step": 345450 }, { "epoch": 134.16, "learning_rate": 2.1121035598705504e-06, "loss": 0.0001, "step": 345460 }, { "epoch": 134.16, "learning_rate": 2.1115857605177995e-06, "loss": 0.0252, "step": 345470 }, { "epoch": 134.17, "learning_rate": 2.1110679611650487e-06, "loss": 0.0899, "step": 345480 }, { "epoch": 134.17, "learning_rate": 2.110550161812298e-06, "loss": 0.0904, "step": 345490 }, { "epoch": 134.17, "learning_rate": 2.110032362459547e-06, "loss": 0.0118, "step": 345500 }, { "epoch": 134.18, "learning_rate": 2.1095145631067963e-06, "loss": 0.0404, "step": 345510 }, { "epoch": 134.18, "learning_rate": 2.1089967637540455e-06, "loss": 0.1165, "step": 345520 }, { "epoch": 134.19, "learning_rate": 2.1084789644012947e-06, "loss": 0.0133, "step": 345530 }, { "epoch": 134.19, "learning_rate": 2.107961165048544e-06, "loss": 0.0022, "step": 345540 }, { "epoch": 134.19, "learning_rate": 2.107443365695793e-06, "loss": 0.0086, "step": 345550 }, { "epoch": 134.2, "learning_rate": 2.1069255663430423e-06, "loss": 0.0248, "step": 345560 }, { "epoch": 134.2, "learning_rate": 2.1064077669902915e-06, "loss": 0.0168, "step": 345570 }, { "epoch": 134.21, "learning_rate": 2.1058899676375407e-06, "loss": 0.0356, "step": 345580 }, { "epoch": 134.21, "learning_rate": 2.10537216828479e-06, "loss": 0.0141, "step": 345590 }, { "epoch": 134.21, "learning_rate": 2.104854368932039e-06, "loss": 0.0156, "step": 345600 }, { "epoch": 134.22, "learning_rate": 2.1043365695792883e-06, "loss": 0.0035, "step": 345610 }, { "epoch": 134.22, "learning_rate": 2.1038187702265375e-06, "loss": 0.0551, "step": 345620 }, { "epoch": 134.23, "learning_rate": 2.1033009708737867e-06, "loss": 0.042, "step": 345630 }, { "epoch": 134.23, "learning_rate": 2.102783171521036e-06, "loss": 0.0569, "step": 345640 }, { "epoch": 134.23, "learning_rate": 2.102265372168285e-06, "loss": 0.0001, "step": 345650 }, { "epoch": 134.24, "learning_rate": 2.1017475728155342e-06, "loss": 0.0003, "step": 345660 }, { "epoch": 134.24, "learning_rate": 2.1012297734627834e-06, "loss": 0.0245, "step": 345670 }, { "epoch": 134.24, "learning_rate": 2.1007119741100326e-06, "loss": 0.0406, "step": 345680 }, { "epoch": 134.25, "learning_rate": 2.100194174757282e-06, "loss": 0.0006, "step": 345690 }, { "epoch": 134.25, "learning_rate": 2.099676375404531e-06, "loss": 0.0567, "step": 345700 }, { "epoch": 134.26, "learning_rate": 2.0991585760517802e-06, "loss": 0.0255, "step": 345710 }, { "epoch": 134.26, "learning_rate": 2.0986407766990294e-06, "loss": 0.0724, "step": 345720 }, { "epoch": 134.26, "learning_rate": 2.0981229773462786e-06, "loss": 0.0002, "step": 345730 }, { "epoch": 134.27, "learning_rate": 2.097605177993528e-06, "loss": 0.0923, "step": 345740 }, { "epoch": 134.27, "learning_rate": 2.097087378640777e-06, "loss": 0.0777, "step": 345750 }, { "epoch": 134.28, "learning_rate": 2.096569579288026e-06, "loss": 0.1019, "step": 345760 }, { "epoch": 134.28, "learning_rate": 2.0960517799352754e-06, "loss": 0.0205, "step": 345770 }, { "epoch": 134.28, "learning_rate": 2.0955339805825246e-06, "loss": 0.0335, "step": 345780 }, { "epoch": 134.29, "learning_rate": 2.0950161812297738e-06, "loss": 0.0237, "step": 345790 }, { "epoch": 134.29, "learning_rate": 2.094498381877023e-06, "loss": 0.0886, "step": 345800 }, { "epoch": 134.3, "learning_rate": 2.093980582524272e-06, "loss": 0.0408, "step": 345810 }, { "epoch": 134.3, "learning_rate": 2.0934627831715214e-06, "loss": 0.0092, "step": 345820 }, { "epoch": 134.3, "learning_rate": 2.0929449838187706e-06, "loss": 0.0127, "step": 345830 }, { "epoch": 134.31, "learning_rate": 2.0924271844660197e-06, "loss": 0.038, "step": 345840 }, { "epoch": 134.31, "learning_rate": 2.091909385113269e-06, "loss": 0.0006, "step": 345850 }, { "epoch": 134.31, "learning_rate": 2.091391585760518e-06, "loss": 0.1537, "step": 345860 }, { "epoch": 134.32, "learning_rate": 2.0908737864077673e-06, "loss": 0.0533, "step": 345870 }, { "epoch": 134.32, "learning_rate": 2.0903559870550165e-06, "loss": 0.0007, "step": 345880 }, { "epoch": 134.33, "learning_rate": 2.0898381877022657e-06, "loss": 0.0141, "step": 345890 }, { "epoch": 134.33, "learning_rate": 2.089320388349515e-06, "loss": 0.0001, "step": 345900 }, { "epoch": 134.33, "learning_rate": 2.088802588996764e-06, "loss": 0.0001, "step": 345910 }, { "epoch": 134.34, "learning_rate": 2.0882847896440133e-06, "loss": 0.0398, "step": 345920 }, { "epoch": 134.34, "learning_rate": 2.0877669902912625e-06, "loss": 0.0384, "step": 345930 }, { "epoch": 134.35, "learning_rate": 2.0872491909385113e-06, "loss": 0.0002, "step": 345940 }, { "epoch": 134.35, "learning_rate": 2.0867313915857605e-06, "loss": 0.0435, "step": 345950 }, { "epoch": 134.35, "learning_rate": 2.0862135922330097e-06, "loss": 0.0669, "step": 345960 }, { "epoch": 134.36, "learning_rate": 2.085695792880259e-06, "loss": 0.0424, "step": 345970 }, { "epoch": 134.36, "learning_rate": 2.085177993527508e-06, "loss": 0.0533, "step": 345980 }, { "epoch": 134.37, "learning_rate": 2.0846601941747572e-06, "loss": 0.0212, "step": 345990 }, { "epoch": 134.37, "learning_rate": 2.0841423948220064e-06, "loss": 0.0741, "step": 346000 }, { "epoch": 134.37, "learning_rate": 2.0836245954692556e-06, "loss": 0.0094, "step": 346010 }, { "epoch": 134.38, "learning_rate": 2.083106796116505e-06, "loss": 0.0493, "step": 346020 }, { "epoch": 134.38, "learning_rate": 2.082588996763754e-06, "loss": 0.0003, "step": 346030 }, { "epoch": 134.38, "learning_rate": 2.0820711974110032e-06, "loss": 0.0164, "step": 346040 }, { "epoch": 134.39, "learning_rate": 2.0815533980582524e-06, "loss": 0.001, "step": 346050 }, { "epoch": 134.39, "learning_rate": 2.0810355987055016e-06, "loss": 0.0217, "step": 346060 }, { "epoch": 134.4, "learning_rate": 2.080517799352751e-06, "loss": 0.0001, "step": 346070 }, { "epoch": 134.4, "learning_rate": 2.08e-06, "loss": 0.0266, "step": 346080 }, { "epoch": 134.4, "learning_rate": 2.079482200647249e-06, "loss": 0.0101, "step": 346090 }, { "epoch": 134.41, "learning_rate": 2.0789644012944984e-06, "loss": 0.0001, "step": 346100 }, { "epoch": 134.41, "learning_rate": 2.0784466019417476e-06, "loss": 0.0001, "step": 346110 }, { "epoch": 134.42, "learning_rate": 2.0779288025889968e-06, "loss": 0.1427, "step": 346120 }, { "epoch": 134.42, "learning_rate": 2.077411003236246e-06, "loss": 0.0157, "step": 346130 }, { "epoch": 134.42, "learning_rate": 2.076893203883495e-06, "loss": 0.0573, "step": 346140 }, { "epoch": 134.43, "learning_rate": 2.0763754045307444e-06, "loss": 0.0329, "step": 346150 }, { "epoch": 134.43, "learning_rate": 2.0758576051779935e-06, "loss": 0.017, "step": 346160 }, { "epoch": 134.43, "learning_rate": 2.075339805825243e-06, "loss": 0.0319, "step": 346170 }, { "epoch": 134.44, "learning_rate": 2.0748220064724924e-06, "loss": 0.1339, "step": 346180 }, { "epoch": 134.44, "learning_rate": 2.0743042071197416e-06, "loss": 0.0182, "step": 346190 }, { "epoch": 134.45, "learning_rate": 2.0737864077669907e-06, "loss": 0.061, "step": 346200 }, { "epoch": 134.45, "learning_rate": 2.07326860841424e-06, "loss": 0.0383, "step": 346210 }, { "epoch": 134.45, "learning_rate": 2.072750809061489e-06, "loss": 0.1304, "step": 346220 }, { "epoch": 134.46, "learning_rate": 2.0722330097087383e-06, "loss": 0.0006, "step": 346230 }, { "epoch": 134.46, "learning_rate": 2.0717152103559875e-06, "loss": 0.0726, "step": 346240 }, { "epoch": 134.47, "learning_rate": 2.0711974110032367e-06, "loss": 0.0105, "step": 346250 }, { "epoch": 134.47, "learning_rate": 2.0706796116504855e-06, "loss": 0.0297, "step": 346260 }, { "epoch": 134.47, "learning_rate": 2.0701618122977347e-06, "loss": 0.0009, "step": 346270 }, { "epoch": 134.48, "learning_rate": 2.069644012944984e-06, "loss": 0.0908, "step": 346280 }, { "epoch": 134.48, "learning_rate": 2.069126213592233e-06, "loss": 0.0588, "step": 346290 }, { "epoch": 134.49, "learning_rate": 2.0686084142394823e-06, "loss": 0.0346, "step": 346300 }, { "epoch": 134.49, "learning_rate": 2.0680906148867315e-06, "loss": 0.0948, "step": 346310 }, { "epoch": 134.49, "learning_rate": 2.0675728155339807e-06, "loss": 0.0332, "step": 346320 }, { "epoch": 134.5, "learning_rate": 2.06705501618123e-06, "loss": 0.0098, "step": 346330 }, { "epoch": 134.5, "learning_rate": 2.066537216828479e-06, "loss": 0.0143, "step": 346340 }, { "epoch": 134.5, "learning_rate": 2.0660194174757282e-06, "loss": 0.052, "step": 346350 }, { "epoch": 134.51, "learning_rate": 2.0655016181229774e-06, "loss": 0.0002, "step": 346360 }, { "epoch": 134.51, "learning_rate": 2.0649838187702266e-06, "loss": 0.0505, "step": 346370 }, { "epoch": 134.52, "learning_rate": 2.064466019417476e-06, "loss": 0.1114, "step": 346380 }, { "epoch": 134.52, "learning_rate": 2.063948220064725e-06, "loss": 0.0333, "step": 346390 }, { "epoch": 134.52, "learning_rate": 2.0634304207119742e-06, "loss": 0.0612, "step": 346400 }, { "epoch": 134.53, "learning_rate": 2.0629126213592234e-06, "loss": 0.019, "step": 346410 }, { "epoch": 134.53, "learning_rate": 2.0623948220064726e-06, "loss": 0.0001, "step": 346420 }, { "epoch": 134.54, "learning_rate": 2.061877022653722e-06, "loss": 0.001, "step": 346430 }, { "epoch": 134.54, "learning_rate": 2.061359223300971e-06, "loss": 0.0511, "step": 346440 }, { "epoch": 134.54, "learning_rate": 2.06084142394822e-06, "loss": 0.0395, "step": 346450 }, { "epoch": 134.55, "learning_rate": 2.0603236245954694e-06, "loss": 0.0222, "step": 346460 }, { "epoch": 134.55, "learning_rate": 2.0598058252427186e-06, "loss": 0.0429, "step": 346470 }, { "epoch": 134.56, "learning_rate": 2.0592880258899678e-06, "loss": 0.0176, "step": 346480 }, { "epoch": 134.56, "learning_rate": 2.058770226537217e-06, "loss": 0.0492, "step": 346490 }, { "epoch": 134.56, "learning_rate": 2.058252427184466e-06, "loss": 0.0172, "step": 346500 }, { "epoch": 134.57, "learning_rate": 2.0577346278317154e-06, "loss": 0.0355, "step": 346510 }, { "epoch": 134.57, "learning_rate": 2.0572168284789645e-06, "loss": 0.0085, "step": 346520 }, { "epoch": 134.57, "learning_rate": 2.0566990291262137e-06, "loss": 0.0377, "step": 346530 }, { "epoch": 134.58, "learning_rate": 2.056181229773463e-06, "loss": 0.0708, "step": 346540 }, { "epoch": 134.58, "learning_rate": 2.055663430420712e-06, "loss": 0.0061, "step": 346550 }, { "epoch": 134.59, "learning_rate": 2.0551456310679613e-06, "loss": 0.0278, "step": 346560 }, { "epoch": 134.59, "learning_rate": 2.0546278317152105e-06, "loss": 0.0275, "step": 346570 }, { "epoch": 134.59, "learning_rate": 2.0541100323624597e-06, "loss": 0.1146, "step": 346580 }, { "epoch": 134.6, "learning_rate": 2.053592233009709e-06, "loss": 0.0211, "step": 346590 }, { "epoch": 134.6, "learning_rate": 2.053074433656958e-06, "loss": 0.0089, "step": 346600 }, { "epoch": 134.61, "learning_rate": 2.0525566343042073e-06, "loss": 0.0581, "step": 346610 }, { "epoch": 134.61, "learning_rate": 2.0520388349514565e-06, "loss": 0.0003, "step": 346620 }, { "epoch": 134.61, "learning_rate": 2.0515210355987057e-06, "loss": 0.0399, "step": 346630 }, { "epoch": 134.62, "learning_rate": 2.051003236245955e-06, "loss": 0.0089, "step": 346640 }, { "epoch": 134.62, "learning_rate": 2.050485436893204e-06, "loss": 0.0635, "step": 346650 }, { "epoch": 134.63, "learning_rate": 2.0499676375404533e-06, "loss": 0.0746, "step": 346660 }, { "epoch": 134.63, "learning_rate": 2.0494498381877025e-06, "loss": 0.0143, "step": 346670 }, { "epoch": 134.63, "learning_rate": 2.0489320388349517e-06, "loss": 0.0523, "step": 346680 }, { "epoch": 134.64, "learning_rate": 2.048414239482201e-06, "loss": 0.0141, "step": 346690 }, { "epoch": 134.64, "learning_rate": 2.04789644012945e-06, "loss": 0.0361, "step": 346700 }, { "epoch": 134.64, "learning_rate": 2.0473786407766992e-06, "loss": 0.0099, "step": 346710 }, { "epoch": 134.65, "learning_rate": 2.0468608414239484e-06, "loss": 0.0096, "step": 346720 }, { "epoch": 134.65, "learning_rate": 2.0463430420711976e-06, "loss": 0.0449, "step": 346730 }, { "epoch": 134.66, "learning_rate": 2.045825242718447e-06, "loss": 0.017, "step": 346740 }, { "epoch": 134.66, "learning_rate": 2.045307443365696e-06, "loss": 0.0846, "step": 346750 }, { "epoch": 134.66, "learning_rate": 2.0447896440129452e-06, "loss": 0.0258, "step": 346760 }, { "epoch": 134.67, "learning_rate": 2.0442718446601944e-06, "loss": 0.0644, "step": 346770 }, { "epoch": 134.67, "learning_rate": 2.0437540453074436e-06, "loss": 0.2867, "step": 346780 }, { "epoch": 134.68, "learning_rate": 2.043236245954693e-06, "loss": 0.105, "step": 346790 }, { "epoch": 134.68, "learning_rate": 2.042718446601942e-06, "loss": 0.0116, "step": 346800 }, { "epoch": 134.68, "learning_rate": 2.042200647249191e-06, "loss": 0.0109, "step": 346810 }, { "epoch": 134.69, "learning_rate": 2.0416828478964404e-06, "loss": 0.0352, "step": 346820 }, { "epoch": 134.69, "learning_rate": 2.0411650485436896e-06, "loss": 0.0027, "step": 346830 }, { "epoch": 134.7, "learning_rate": 2.0406472491909388e-06, "loss": 0.0112, "step": 346840 }, { "epoch": 134.7, "learning_rate": 2.040129449838188e-06, "loss": 0.0212, "step": 346850 }, { "epoch": 134.7, "learning_rate": 2.039611650485437e-06, "loss": 0.0325, "step": 346860 }, { "epoch": 134.71, "learning_rate": 2.0390938511326864e-06, "loss": 0.0117, "step": 346870 }, { "epoch": 134.71, "learning_rate": 2.0385760517799356e-06, "loss": 0.1116, "step": 346880 }, { "epoch": 134.71, "learning_rate": 2.0380582524271847e-06, "loss": 0.0021, "step": 346890 }, { "epoch": 134.72, "learning_rate": 2.037540453074434e-06, "loss": 0.0001, "step": 346900 }, { "epoch": 134.72, "learning_rate": 2.037022653721683e-06, "loss": 0.058, "step": 346910 }, { "epoch": 134.73, "learning_rate": 2.0365048543689323e-06, "loss": 0.0241, "step": 346920 }, { "epoch": 134.73, "learning_rate": 2.0359870550161815e-06, "loss": 0.0976, "step": 346930 }, { "epoch": 134.73, "learning_rate": 2.0354692556634307e-06, "loss": 0.0823, "step": 346940 }, { "epoch": 134.74, "learning_rate": 2.03495145631068e-06, "loss": 0.0048, "step": 346950 }, { "epoch": 134.74, "learning_rate": 2.034433656957929e-06, "loss": 0.0001, "step": 346960 }, { "epoch": 134.75, "learning_rate": 2.0339158576051783e-06, "loss": 0.0269, "step": 346970 }, { "epoch": 134.75, "learning_rate": 2.0333980582524275e-06, "loss": 0.0751, "step": 346980 }, { "epoch": 134.75, "learning_rate": 2.0328802588996767e-06, "loss": 0.0009, "step": 346990 }, { "epoch": 134.76, "learning_rate": 2.032362459546926e-06, "loss": 0.006, "step": 347000 }, { "epoch": 134.76, "learning_rate": 2.031844660194175e-06, "loss": 0.0015, "step": 347010 }, { "epoch": 134.77, "learning_rate": 2.0313268608414243e-06, "loss": 0.1033, "step": 347020 }, { "epoch": 134.77, "learning_rate": 2.0308090614886735e-06, "loss": 0.0552, "step": 347030 }, { "epoch": 134.77, "learning_rate": 2.0302912621359227e-06, "loss": 0.0004, "step": 347040 }, { "epoch": 134.78, "learning_rate": 2.029773462783172e-06, "loss": 0.0121, "step": 347050 }, { "epoch": 134.78, "learning_rate": 2.029255663430421e-06, "loss": 0.001, "step": 347060 }, { "epoch": 134.78, "learning_rate": 2.0287378640776702e-06, "loss": 0.0503, "step": 347070 }, { "epoch": 134.79, "learning_rate": 2.028220064724919e-06, "loss": 0.0183, "step": 347080 }, { "epoch": 134.79, "learning_rate": 2.0277022653721682e-06, "loss": 0.0523, "step": 347090 }, { "epoch": 134.8, "learning_rate": 2.0271844660194174e-06, "loss": 0.0176, "step": 347100 }, { "epoch": 134.8, "learning_rate": 2.0266666666666666e-06, "loss": 0.1015, "step": 347110 }, { "epoch": 134.8, "learning_rate": 2.026148867313916e-06, "loss": 0.0843, "step": 347120 }, { "epoch": 134.81, "learning_rate": 2.025631067961165e-06, "loss": 0.009, "step": 347130 }, { "epoch": 134.81, "learning_rate": 2.025113268608414e-06, "loss": 0.0933, "step": 347140 }, { "epoch": 134.82, "learning_rate": 2.0245954692556634e-06, "loss": 0.0229, "step": 347150 }, { "epoch": 134.82, "learning_rate": 2.0240776699029126e-06, "loss": 0.0639, "step": 347160 }, { "epoch": 134.82, "learning_rate": 2.0235598705501618e-06, "loss": 0.0394, "step": 347170 }, { "epoch": 134.83, "learning_rate": 2.023042071197411e-06, "loss": 0.0793, "step": 347180 }, { "epoch": 134.83, "learning_rate": 2.02252427184466e-06, "loss": 0.1003, "step": 347190 }, { "epoch": 134.83, "learning_rate": 2.0220064724919094e-06, "loss": 0.0012, "step": 347200 }, { "epoch": 134.84, "learning_rate": 2.0214886731391585e-06, "loss": 0.008, "step": 347210 }, { "epoch": 134.84, "learning_rate": 2.0209708737864077e-06, "loss": 0.0369, "step": 347220 }, { "epoch": 134.85, "learning_rate": 2.020453074433657e-06, "loss": 0.0137, "step": 347230 }, { "epoch": 134.85, "learning_rate": 2.019935275080906e-06, "loss": 0.1049, "step": 347240 }, { "epoch": 134.85, "learning_rate": 2.0194174757281553e-06, "loss": 0.0338, "step": 347250 }, { "epoch": 134.86, "learning_rate": 2.0188996763754045e-06, "loss": 0.019, "step": 347260 }, { "epoch": 134.86, "learning_rate": 2.0183818770226537e-06, "loss": 0.0156, "step": 347270 }, { "epoch": 134.87, "learning_rate": 2.017864077669903e-06, "loss": 0.0082, "step": 347280 }, { "epoch": 134.87, "learning_rate": 2.0173462783171525e-06, "loss": 0.0258, "step": 347290 }, { "epoch": 134.87, "learning_rate": 2.0168284789644017e-06, "loss": 0.0775, "step": 347300 }, { "epoch": 134.88, "learning_rate": 2.016310679611651e-06, "loss": 0.0546, "step": 347310 }, { "epoch": 134.88, "learning_rate": 2.0157928802589e-06, "loss": 0.0149, "step": 347320 }, { "epoch": 134.89, "learning_rate": 2.0152750809061493e-06, "loss": 0.13, "step": 347330 }, { "epoch": 134.89, "learning_rate": 2.0147572815533985e-06, "loss": 0.0165, "step": 347340 }, { "epoch": 134.89, "learning_rate": 2.0142394822006477e-06, "loss": 0.0504, "step": 347350 }, { "epoch": 134.9, "learning_rate": 2.013721682847897e-06, "loss": 0.0165, "step": 347360 }, { "epoch": 134.9, "learning_rate": 2.013203883495146e-06, "loss": 0.0939, "step": 347370 }, { "epoch": 134.9, "learning_rate": 2.0126860841423953e-06, "loss": 0.0256, "step": 347380 }, { "epoch": 134.91, "learning_rate": 2.0121682847896445e-06, "loss": 0.0161, "step": 347390 }, { "epoch": 134.91, "learning_rate": 2.0116504854368932e-06, "loss": 0.1366, "step": 347400 }, { "epoch": 134.92, "learning_rate": 2.0111326860841424e-06, "loss": 0.0786, "step": 347410 }, { "epoch": 134.92, "learning_rate": 2.0106148867313916e-06, "loss": 0.0139, "step": 347420 }, { "epoch": 134.92, "learning_rate": 2.010097087378641e-06, "loss": 0.0498, "step": 347430 }, { "epoch": 134.93, "learning_rate": 2.00957928802589e-06, "loss": 0.0301, "step": 347440 }, { "epoch": 134.93, "learning_rate": 2.0090614886731392e-06, "loss": 0.001, "step": 347450 }, { "epoch": 134.94, "learning_rate": 2.0085436893203884e-06, "loss": 0.0139, "step": 347460 }, { "epoch": 134.94, "learning_rate": 2.0080258899676376e-06, "loss": 0.0152, "step": 347470 }, { "epoch": 134.94, "learning_rate": 2.007508090614887e-06, "loss": 0.029, "step": 347480 }, { "epoch": 134.95, "learning_rate": 2.006990291262136e-06, "loss": 0.0201, "step": 347490 }, { "epoch": 134.95, "learning_rate": 2.006472491909385e-06, "loss": 0.1045, "step": 347500 }, { "epoch": 134.96, "learning_rate": 2.0059546925566344e-06, "loss": 0.1176, "step": 347510 }, { "epoch": 134.96, "learning_rate": 2.0054368932038836e-06, "loss": 0.0102, "step": 347520 }, { "epoch": 134.96, "learning_rate": 2.0049190938511328e-06, "loss": 0.0001, "step": 347530 }, { "epoch": 134.97, "learning_rate": 2.004401294498382e-06, "loss": 0.0064, "step": 347540 }, { "epoch": 134.97, "learning_rate": 2.003883495145631e-06, "loss": 0.0604, "step": 347550 }, { "epoch": 134.97, "learning_rate": 2.0033656957928804e-06, "loss": 0.0154, "step": 347560 }, { "epoch": 134.98, "learning_rate": 2.0028478964401296e-06, "loss": 0.0197, "step": 347570 }, { "epoch": 134.98, "learning_rate": 2.0023300970873787e-06, "loss": 0.0088, "step": 347580 }, { "epoch": 134.99, "learning_rate": 2.001812297734628e-06, "loss": 0.0053, "step": 347590 }, { "epoch": 134.99, "learning_rate": 2.001294498381877e-06, "loss": 0.0525, "step": 347600 }, { "epoch": 134.99, "learning_rate": 2.0007766990291263e-06, "loss": 0.0004, "step": 347610 }, { "epoch": 135.0, "learning_rate": 2.0002588996763755e-06, "loss": 0.0116, "step": 347620 }, { "epoch": 135.0, "eval_accuracy": 0.9537826685006877, "eval_loss": 0.3890344202518463, "eval_runtime": 8.1691, "eval_samples_per_second": 444.97, "eval_steps_per_second": 55.698, "step": 347625 }, { "epoch": 135.0, "learning_rate": 1.9997411003236247e-06, "loss": 0.1281, "step": 347630 }, { "epoch": 135.01, "learning_rate": 1.999223300970874e-06, "loss": 0.0068, "step": 347640 }, { "epoch": 135.01, "learning_rate": 1.998705501618123e-06, "loss": 0.076, "step": 347650 }, { "epoch": 135.01, "learning_rate": 1.9981877022653723e-06, "loss": 0.0405, "step": 347660 }, { "epoch": 135.02, "learning_rate": 1.9976699029126215e-06, "loss": 0.0598, "step": 347670 }, { "epoch": 135.02, "learning_rate": 1.9971521035598707e-06, "loss": 0.0281, "step": 347680 }, { "epoch": 135.03, "learning_rate": 1.99663430420712e-06, "loss": 0.1554, "step": 347690 }, { "epoch": 135.03, "learning_rate": 1.996116504854369e-06, "loss": 0.0095, "step": 347700 }, { "epoch": 135.03, "learning_rate": 1.9955987055016183e-06, "loss": 0.0112, "step": 347710 }, { "epoch": 135.04, "learning_rate": 1.9950809061488675e-06, "loss": 0.0647, "step": 347720 }, { "epoch": 135.04, "learning_rate": 1.9945631067961167e-06, "loss": 0.0671, "step": 347730 }, { "epoch": 135.04, "learning_rate": 1.994045307443366e-06, "loss": 0.0089, "step": 347740 }, { "epoch": 135.05, "learning_rate": 1.993527508090615e-06, "loss": 0.0016, "step": 347750 }, { "epoch": 135.05, "learning_rate": 1.9930097087378642e-06, "loss": 0.1162, "step": 347760 }, { "epoch": 135.06, "learning_rate": 1.9924919093851134e-06, "loss": 0.077, "step": 347770 }, { "epoch": 135.06, "learning_rate": 1.9919741100323626e-06, "loss": 0.0229, "step": 347780 }, { "epoch": 135.06, "learning_rate": 1.991456310679612e-06, "loss": 0.0156, "step": 347790 }, { "epoch": 135.07, "learning_rate": 1.990938511326861e-06, "loss": 0.0635, "step": 347800 }, { "epoch": 135.07, "learning_rate": 1.9904207119741102e-06, "loss": 0.067, "step": 347810 }, { "epoch": 135.08, "learning_rate": 1.9899029126213594e-06, "loss": 0.0107, "step": 347820 }, { "epoch": 135.08, "learning_rate": 1.9893851132686086e-06, "loss": 0.0128, "step": 347830 }, { "epoch": 135.08, "learning_rate": 1.988867313915858e-06, "loss": 0.0001, "step": 347840 }, { "epoch": 135.09, "learning_rate": 1.988349514563107e-06, "loss": 0.0427, "step": 347850 }, { "epoch": 135.09, "learning_rate": 1.987831715210356e-06, "loss": 0.0094, "step": 347860 }, { "epoch": 135.1, "learning_rate": 1.9873139158576054e-06, "loss": 0.025, "step": 347870 }, { "epoch": 135.1, "learning_rate": 1.9867961165048546e-06, "loss": 0.0802, "step": 347880 }, { "epoch": 135.1, "learning_rate": 1.9862783171521038e-06, "loss": 0.0253, "step": 347890 }, { "epoch": 135.11, "learning_rate": 1.985760517799353e-06, "loss": 0.0265, "step": 347900 }, { "epoch": 135.11, "learning_rate": 1.985242718446602e-06, "loss": 0.025, "step": 347910 }, { "epoch": 135.11, "learning_rate": 1.9847249190938514e-06, "loss": 0.0108, "step": 347920 }, { "epoch": 135.12, "learning_rate": 1.9842071197411006e-06, "loss": 0.0412, "step": 347930 }, { "epoch": 135.12, "learning_rate": 1.9836893203883497e-06, "loss": 0.073, "step": 347940 }, { "epoch": 135.13, "learning_rate": 1.983171521035599e-06, "loss": 0.0016, "step": 347950 }, { "epoch": 135.13, "learning_rate": 1.982653721682848e-06, "loss": 0.0093, "step": 347960 }, { "epoch": 135.13, "learning_rate": 1.9821359223300973e-06, "loss": 0.0018, "step": 347970 }, { "epoch": 135.14, "learning_rate": 1.9816181229773465e-06, "loss": 0.0687, "step": 347980 }, { "epoch": 135.14, "learning_rate": 1.9811003236245957e-06, "loss": 0.027, "step": 347990 }, { "epoch": 135.15, "learning_rate": 1.980582524271845e-06, "loss": 0.0017, "step": 348000 }, { "epoch": 135.15, "learning_rate": 1.980064724919094e-06, "loss": 0.0215, "step": 348010 }, { "epoch": 135.15, "learning_rate": 1.9795469255663433e-06, "loss": 0.0661, "step": 348020 }, { "epoch": 135.16, "learning_rate": 1.9790291262135925e-06, "loss": 0.0001, "step": 348030 }, { "epoch": 135.16, "learning_rate": 1.9785113268608417e-06, "loss": 0.0411, "step": 348040 }, { "epoch": 135.17, "learning_rate": 1.977993527508091e-06, "loss": 0.0366, "step": 348050 }, { "epoch": 135.17, "learning_rate": 1.97747572815534e-06, "loss": 0.0402, "step": 348060 }, { "epoch": 135.17, "learning_rate": 1.9769579288025893e-06, "loss": 0.0247, "step": 348070 }, { "epoch": 135.18, "learning_rate": 1.9764401294498385e-06, "loss": 0.0257, "step": 348080 }, { "epoch": 135.18, "learning_rate": 1.9759223300970877e-06, "loss": 0.0013, "step": 348090 }, { "epoch": 135.18, "learning_rate": 1.975404530744337e-06, "loss": 0.0522, "step": 348100 }, { "epoch": 135.19, "learning_rate": 1.974886731391586e-06, "loss": 0.0383, "step": 348110 }, { "epoch": 135.19, "learning_rate": 1.9743689320388353e-06, "loss": 0.0444, "step": 348120 }, { "epoch": 135.2, "learning_rate": 1.9738511326860844e-06, "loss": 0.0196, "step": 348130 }, { "epoch": 135.2, "learning_rate": 1.9733333333333336e-06, "loss": 0.0864, "step": 348140 }, { "epoch": 135.2, "learning_rate": 1.972815533980583e-06, "loss": 0.0151, "step": 348150 }, { "epoch": 135.21, "learning_rate": 1.972297734627832e-06, "loss": 0.0919, "step": 348160 }, { "epoch": 135.21, "learning_rate": 1.9717799352750812e-06, "loss": 0.0001, "step": 348170 }, { "epoch": 135.22, "learning_rate": 1.9712621359223304e-06, "loss": 0.0011, "step": 348180 }, { "epoch": 135.22, "learning_rate": 1.9707443365695796e-06, "loss": 0.0003, "step": 348190 }, { "epoch": 135.22, "learning_rate": 1.970226537216829e-06, "loss": 0.0346, "step": 348200 }, { "epoch": 135.23, "learning_rate": 1.969708737864078e-06, "loss": 0.0123, "step": 348210 }, { "epoch": 135.23, "learning_rate": 1.9691909385113268e-06, "loss": 0.0414, "step": 348220 }, { "epoch": 135.23, "learning_rate": 1.968673139158576e-06, "loss": 0.0793, "step": 348230 }, { "epoch": 135.24, "learning_rate": 1.968155339805825e-06, "loss": 0.1104, "step": 348240 }, { "epoch": 135.24, "learning_rate": 1.9676375404530744e-06, "loss": 0.0003, "step": 348250 }, { "epoch": 135.25, "learning_rate": 1.9671197411003236e-06, "loss": 0.0388, "step": 348260 }, { "epoch": 135.25, "learning_rate": 1.9666019417475727e-06, "loss": 0.0092, "step": 348270 }, { "epoch": 135.25, "learning_rate": 1.966084142394822e-06, "loss": 0.0001, "step": 348280 }, { "epoch": 135.26, "learning_rate": 1.965566343042071e-06, "loss": 0.0095, "step": 348290 }, { "epoch": 135.26, "learning_rate": 1.9650485436893203e-06, "loss": 0.0006, "step": 348300 }, { "epoch": 135.27, "learning_rate": 1.9645307443365695e-06, "loss": 0.1137, "step": 348310 }, { "epoch": 135.27, "learning_rate": 1.9640129449838187e-06, "loss": 0.0837, "step": 348320 }, { "epoch": 135.27, "learning_rate": 1.963495145631068e-06, "loss": 0.1381, "step": 348330 }, { "epoch": 135.28, "learning_rate": 1.962977346278317e-06, "loss": 0.1481, "step": 348340 }, { "epoch": 135.28, "learning_rate": 1.9624595469255663e-06, "loss": 0.1071, "step": 348350 }, { "epoch": 135.29, "learning_rate": 1.9619417475728155e-06, "loss": 0.0082, "step": 348360 }, { "epoch": 135.29, "learning_rate": 1.9614239482200647e-06, "loss": 0.0201, "step": 348370 }, { "epoch": 135.29, "learning_rate": 1.960906148867314e-06, "loss": 0.0093, "step": 348380 }, { "epoch": 135.3, "learning_rate": 1.960388349514563e-06, "loss": 0.053, "step": 348390 }, { "epoch": 135.3, "learning_rate": 1.9598705501618123e-06, "loss": 0.0809, "step": 348400 }, { "epoch": 135.3, "learning_rate": 1.959352750809062e-06, "loss": 0.1146, "step": 348410 }, { "epoch": 135.31, "learning_rate": 1.958834951456311e-06, "loss": 0.0772, "step": 348420 }, { "epoch": 135.31, "learning_rate": 1.9583171521035603e-06, "loss": 0.0121, "step": 348430 }, { "epoch": 135.32, "learning_rate": 1.9577993527508095e-06, "loss": 0.0801, "step": 348440 }, { "epoch": 135.32, "learning_rate": 1.9572815533980587e-06, "loss": 0.0643, "step": 348450 }, { "epoch": 135.32, "learning_rate": 1.956763754045308e-06, "loss": 0.0276, "step": 348460 }, { "epoch": 135.33, "learning_rate": 1.956245954692557e-06, "loss": 0.0443, "step": 348470 }, { "epoch": 135.33, "learning_rate": 1.9557281553398063e-06, "loss": 0.0088, "step": 348480 }, { "epoch": 135.34, "learning_rate": 1.9552103559870554e-06, "loss": 0.095, "step": 348490 }, { "epoch": 135.34, "learning_rate": 1.9546925566343046e-06, "loss": 0.0164, "step": 348500 }, { "epoch": 135.34, "learning_rate": 1.954174757281554e-06, "loss": 0.0601, "step": 348510 }, { "epoch": 135.35, "learning_rate": 1.953656957928803e-06, "loss": 0.0344, "step": 348520 }, { "epoch": 135.35, "learning_rate": 1.9531391585760522e-06, "loss": 0.0097, "step": 348530 }, { "epoch": 135.36, "learning_rate": 1.952621359223301e-06, "loss": 0.0221, "step": 348540 }, { "epoch": 135.36, "learning_rate": 1.95210355987055e-06, "loss": 0.0219, "step": 348550 }, { "epoch": 135.36, "learning_rate": 1.9515857605177994e-06, "loss": 0.0303, "step": 348560 }, { "epoch": 135.37, "learning_rate": 1.9510679611650486e-06, "loss": 0.0086, "step": 348570 }, { "epoch": 135.37, "learning_rate": 1.9505501618122978e-06, "loss": 0.0002, "step": 348580 }, { "epoch": 135.37, "learning_rate": 1.950032362459547e-06, "loss": 0.0358, "step": 348590 }, { "epoch": 135.38, "learning_rate": 1.949514563106796e-06, "loss": 0.0133, "step": 348600 }, { "epoch": 135.38, "learning_rate": 1.9489967637540454e-06, "loss": 0.0301, "step": 348610 }, { "epoch": 135.39, "learning_rate": 1.9484789644012946e-06, "loss": 0.027, "step": 348620 }, { "epoch": 135.39, "learning_rate": 1.9479611650485437e-06, "loss": 0.021, "step": 348630 }, { "epoch": 135.39, "learning_rate": 1.947443365695793e-06, "loss": 0.0179, "step": 348640 }, { "epoch": 135.4, "learning_rate": 1.946925566343042e-06, "loss": 0.0535, "step": 348650 }, { "epoch": 135.4, "learning_rate": 1.9464077669902913e-06, "loss": 0.0379, "step": 348660 }, { "epoch": 135.41, "learning_rate": 1.9458899676375405e-06, "loss": 0.0795, "step": 348670 }, { "epoch": 135.41, "learning_rate": 1.9453721682847897e-06, "loss": 0.0771, "step": 348680 }, { "epoch": 135.41, "learning_rate": 1.944854368932039e-06, "loss": 0.0989, "step": 348690 }, { "epoch": 135.42, "learning_rate": 1.944336569579288e-06, "loss": 0.0123, "step": 348700 }, { "epoch": 135.42, "learning_rate": 1.9438187702265373e-06, "loss": 0.1092, "step": 348710 }, { "epoch": 135.43, "learning_rate": 1.9433009708737865e-06, "loss": 0.0005, "step": 348720 }, { "epoch": 135.43, "learning_rate": 1.9427831715210357e-06, "loss": 0.0497, "step": 348730 }, { "epoch": 135.43, "learning_rate": 1.942265372168285e-06, "loss": 0.1345, "step": 348740 }, { "epoch": 135.44, "learning_rate": 1.941747572815534e-06, "loss": 0.0835, "step": 348750 }, { "epoch": 135.44, "learning_rate": 1.9412297734627833e-06, "loss": 0.0391, "step": 348760 }, { "epoch": 135.44, "learning_rate": 1.9407119741100325e-06, "loss": 0.0607, "step": 348770 }, { "epoch": 135.45, "learning_rate": 1.9401941747572817e-06, "loss": 0.0687, "step": 348780 }, { "epoch": 135.45, "learning_rate": 1.939676375404531e-06, "loss": 0.0835, "step": 348790 }, { "epoch": 135.46, "learning_rate": 1.93915857605178e-06, "loss": 0.0184, "step": 348800 }, { "epoch": 135.46, "learning_rate": 1.9386407766990292e-06, "loss": 0.0004, "step": 348810 }, { "epoch": 135.46, "learning_rate": 1.9381229773462784e-06, "loss": 0.0575, "step": 348820 }, { "epoch": 135.47, "learning_rate": 1.9376051779935276e-06, "loss": 0.0435, "step": 348830 }, { "epoch": 135.47, "learning_rate": 1.937087378640777e-06, "loss": 0.0004, "step": 348840 }, { "epoch": 135.48, "learning_rate": 1.936569579288026e-06, "loss": 0.0095, "step": 348850 }, { "epoch": 135.48, "learning_rate": 1.9360517799352752e-06, "loss": 0.06, "step": 348860 }, { "epoch": 135.48, "learning_rate": 1.9355339805825244e-06, "loss": 0.1928, "step": 348870 }, { "epoch": 135.49, "learning_rate": 1.9350161812297736e-06, "loss": 0.0006, "step": 348880 }, { "epoch": 135.49, "learning_rate": 1.934498381877023e-06, "loss": 0.0519, "step": 348890 }, { "epoch": 135.5, "learning_rate": 1.933980582524272e-06, "loss": 0.0175, "step": 348900 }, { "epoch": 135.5, "learning_rate": 1.933462783171521e-06, "loss": 0.0286, "step": 348910 }, { "epoch": 135.5, "learning_rate": 1.9329449838187704e-06, "loss": 0.0183, "step": 348920 }, { "epoch": 135.51, "learning_rate": 1.9324271844660196e-06, "loss": 0.0337, "step": 348930 }, { "epoch": 135.51, "learning_rate": 1.9319093851132688e-06, "loss": 0.0202, "step": 348940 }, { "epoch": 135.51, "learning_rate": 1.931391585760518e-06, "loss": 0.1068, "step": 348950 }, { "epoch": 135.52, "learning_rate": 1.930873786407767e-06, "loss": 0.008, "step": 348960 }, { "epoch": 135.52, "learning_rate": 1.9303559870550164e-06, "loss": 0.0308, "step": 348970 }, { "epoch": 135.53, "learning_rate": 1.9298381877022656e-06, "loss": 0.0019, "step": 348980 }, { "epoch": 135.53, "learning_rate": 1.9293203883495148e-06, "loss": 0.0225, "step": 348990 }, { "epoch": 135.53, "learning_rate": 1.928802588996764e-06, "loss": 0.1176, "step": 349000 }, { "epoch": 135.54, "learning_rate": 1.928284789644013e-06, "loss": 0.0003, "step": 349010 }, { "epoch": 135.54, "learning_rate": 1.9277669902912623e-06, "loss": 0.001, "step": 349020 }, { "epoch": 135.55, "learning_rate": 1.9272491909385115e-06, "loss": 0.0802, "step": 349030 }, { "epoch": 135.55, "learning_rate": 1.9267313915857607e-06, "loss": 0.0406, "step": 349040 }, { "epoch": 135.55, "learning_rate": 1.92621359223301e-06, "loss": 0.0267, "step": 349050 }, { "epoch": 135.56, "learning_rate": 1.925695792880259e-06, "loss": 0.1438, "step": 349060 }, { "epoch": 135.56, "learning_rate": 1.9251779935275083e-06, "loss": 0.0102, "step": 349070 }, { "epoch": 135.57, "learning_rate": 1.9246601941747575e-06, "loss": 0.0269, "step": 349080 }, { "epoch": 135.57, "learning_rate": 1.9241423948220067e-06, "loss": 0.0477, "step": 349090 }, { "epoch": 135.57, "learning_rate": 1.923624595469256e-06, "loss": 0.0878, "step": 349100 }, { "epoch": 135.58, "learning_rate": 1.923106796116505e-06, "loss": 0.0729, "step": 349110 }, { "epoch": 135.58, "learning_rate": 1.9225889967637543e-06, "loss": 0.0144, "step": 349120 }, { "epoch": 135.58, "learning_rate": 1.9220711974110035e-06, "loss": 0.0297, "step": 349130 }, { "epoch": 135.59, "learning_rate": 1.9215533980582527e-06, "loss": 0.0209, "step": 349140 }, { "epoch": 135.59, "learning_rate": 1.921035598705502e-06, "loss": 0.0001, "step": 349150 }, { "epoch": 135.6, "learning_rate": 1.920517799352751e-06, "loss": 0.0856, "step": 349160 }, { "epoch": 135.6, "learning_rate": 1.9200000000000003e-06, "loss": 0.1308, "step": 349170 }, { "epoch": 135.6, "learning_rate": 1.9194822006472494e-06, "loss": 0.1724, "step": 349180 }, { "epoch": 135.61, "learning_rate": 1.9189644012944986e-06, "loss": 0.0259, "step": 349190 }, { "epoch": 135.61, "learning_rate": 1.918446601941748e-06, "loss": 0.0001, "step": 349200 }, { "epoch": 135.62, "learning_rate": 1.917928802588997e-06, "loss": 0.0011, "step": 349210 }, { "epoch": 135.62, "learning_rate": 1.9174110032362462e-06, "loss": 0.0002, "step": 349220 }, { "epoch": 135.62, "learning_rate": 1.9168932038834954e-06, "loss": 0.0367, "step": 349230 }, { "epoch": 135.63, "learning_rate": 1.9163754045307446e-06, "loss": 0.072, "step": 349240 }, { "epoch": 135.63, "learning_rate": 1.915857605177994e-06, "loss": 0.0095, "step": 349250 }, { "epoch": 135.63, "learning_rate": 1.915339805825243e-06, "loss": 0.1022, "step": 349260 }, { "epoch": 135.64, "learning_rate": 1.914822006472492e-06, "loss": 0.0042, "step": 349270 }, { "epoch": 135.64, "learning_rate": 1.9143042071197414e-06, "loss": 0.0663, "step": 349280 }, { "epoch": 135.65, "learning_rate": 1.9137864077669906e-06, "loss": 0.0775, "step": 349290 }, { "epoch": 135.65, "learning_rate": 1.9132686084142398e-06, "loss": 0.0356, "step": 349300 }, { "epoch": 135.65, "learning_rate": 1.912750809061489e-06, "loss": 0.0005, "step": 349310 }, { "epoch": 135.66, "learning_rate": 1.912233009708738e-06, "loss": 0.0273, "step": 349320 }, { "epoch": 135.66, "learning_rate": 1.9117152103559874e-06, "loss": 0.0496, "step": 349330 }, { "epoch": 135.67, "learning_rate": 1.9111974110032366e-06, "loss": 0.0002, "step": 349340 }, { "epoch": 135.67, "learning_rate": 1.9106796116504858e-06, "loss": 0.0083, "step": 349350 }, { "epoch": 135.67, "learning_rate": 1.9101618122977345e-06, "loss": 0.0625, "step": 349360 }, { "epoch": 135.68, "learning_rate": 1.9096440129449837e-06, "loss": 0.1178, "step": 349370 }, { "epoch": 135.68, "learning_rate": 1.909126213592233e-06, "loss": 0.0615, "step": 349380 }, { "epoch": 135.69, "learning_rate": 1.908608414239482e-06, "loss": 0.0119, "step": 349390 }, { "epoch": 135.69, "learning_rate": 1.9080906148867313e-06, "loss": 0.0001, "step": 349400 }, { "epoch": 135.69, "learning_rate": 1.9075728155339805e-06, "loss": 0.0222, "step": 349410 }, { "epoch": 135.7, "learning_rate": 1.90705501618123e-06, "loss": 0.1245, "step": 349420 }, { "epoch": 135.7, "learning_rate": 1.906537216828479e-06, "loss": 0.027, "step": 349430 }, { "epoch": 135.7, "learning_rate": 1.9060194174757283e-06, "loss": 0.0753, "step": 349440 }, { "epoch": 135.71, "learning_rate": 1.9055016181229775e-06, "loss": 0.1084, "step": 349450 }, { "epoch": 135.71, "learning_rate": 1.9049838187702267e-06, "loss": 0.0091, "step": 349460 }, { "epoch": 135.72, "learning_rate": 1.9044660194174759e-06, "loss": 0.03, "step": 349470 }, { "epoch": 135.72, "learning_rate": 1.9039482200647249e-06, "loss": 0.0069, "step": 349480 }, { "epoch": 135.72, "learning_rate": 1.903430420711974e-06, "loss": 0.0004, "step": 349490 }, { "epoch": 135.73, "learning_rate": 1.9029126213592232e-06, "loss": 0.0595, "step": 349500 }, { "epoch": 135.73, "learning_rate": 1.9023948220064724e-06, "loss": 0.0662, "step": 349510 }, { "epoch": 135.74, "learning_rate": 1.9018770226537216e-06, "loss": 0.0317, "step": 349520 }, { "epoch": 135.74, "learning_rate": 1.9013592233009708e-06, "loss": 0.0577, "step": 349530 }, { "epoch": 135.74, "learning_rate": 1.9008414239482204e-06, "loss": 0.0085, "step": 349540 }, { "epoch": 135.75, "learning_rate": 1.9003236245954696e-06, "loss": 0.0823, "step": 349550 }, { "epoch": 135.75, "learning_rate": 1.8998058252427186e-06, "loss": 0.0002, "step": 349560 }, { "epoch": 135.76, "learning_rate": 1.8992880258899678e-06, "loss": 0.0329, "step": 349570 }, { "epoch": 135.76, "learning_rate": 1.898770226537217e-06, "loss": 0.0726, "step": 349580 }, { "epoch": 135.76, "learning_rate": 1.8982524271844662e-06, "loss": 0.0589, "step": 349590 }, { "epoch": 135.77, "learning_rate": 1.8977346278317154e-06, "loss": 0.0051, "step": 349600 }, { "epoch": 135.77, "learning_rate": 1.8972168284789646e-06, "loss": 0.0283, "step": 349610 }, { "epoch": 135.77, "learning_rate": 1.8966990291262138e-06, "loss": 0.0574, "step": 349620 }, { "epoch": 135.78, "learning_rate": 1.896181229773463e-06, "loss": 0.0185, "step": 349630 }, { "epoch": 135.78, "learning_rate": 1.8956634304207122e-06, "loss": 0.064, "step": 349640 }, { "epoch": 135.79, "learning_rate": 1.8951456310679614e-06, "loss": 0.0113, "step": 349650 }, { "epoch": 135.79, "learning_rate": 1.8946278317152106e-06, "loss": 0.0296, "step": 349660 }, { "epoch": 135.79, "learning_rate": 1.8941100323624598e-06, "loss": 0.0621, "step": 349670 }, { "epoch": 135.8, "learning_rate": 1.893592233009709e-06, "loss": 0.1406, "step": 349680 }, { "epoch": 135.8, "learning_rate": 1.8930744336569582e-06, "loss": 0.0761, "step": 349690 }, { "epoch": 135.81, "learning_rate": 1.8925566343042074e-06, "loss": 0.0693, "step": 349700 }, { "epoch": 135.81, "learning_rate": 1.8920388349514565e-06, "loss": 0.0006, "step": 349710 }, { "epoch": 135.81, "learning_rate": 1.8915210355987057e-06, "loss": 0.0521, "step": 349720 }, { "epoch": 135.82, "learning_rate": 1.891003236245955e-06, "loss": 0.0614, "step": 349730 }, { "epoch": 135.82, "learning_rate": 1.8904854368932041e-06, "loss": 0.0173, "step": 349740 }, { "epoch": 135.83, "learning_rate": 1.8899676375404533e-06, "loss": 0.0007, "step": 349750 }, { "epoch": 135.83, "learning_rate": 1.8894498381877025e-06, "loss": 0.0096, "step": 349760 }, { "epoch": 135.83, "learning_rate": 1.8889320388349517e-06, "loss": 0.0198, "step": 349770 }, { "epoch": 135.84, "learning_rate": 1.888414239482201e-06, "loss": 0.0405, "step": 349780 }, { "epoch": 135.84, "learning_rate": 1.88789644012945e-06, "loss": 0.0196, "step": 349790 }, { "epoch": 135.84, "learning_rate": 1.887378640776699e-06, "loss": 0.0085, "step": 349800 }, { "epoch": 135.85, "learning_rate": 1.8868608414239483e-06, "loss": 0.0487, "step": 349810 }, { "epoch": 135.85, "learning_rate": 1.8863430420711975e-06, "loss": 0.0835, "step": 349820 }, { "epoch": 135.86, "learning_rate": 1.8858252427184467e-06, "loss": 0.0807, "step": 349830 }, { "epoch": 135.86, "learning_rate": 1.8853074433656959e-06, "loss": 0.0009, "step": 349840 }, { "epoch": 135.86, "learning_rate": 1.884789644012945e-06, "loss": 0.0252, "step": 349850 }, { "epoch": 135.87, "learning_rate": 1.8842718446601943e-06, "loss": 0.0328, "step": 349860 }, { "epoch": 135.87, "learning_rate": 1.8837540453074434e-06, "loss": 0.0257, "step": 349870 }, { "epoch": 135.88, "learning_rate": 1.8832362459546926e-06, "loss": 0.0361, "step": 349880 }, { "epoch": 135.88, "learning_rate": 1.8827184466019418e-06, "loss": 0.0007, "step": 349890 }, { "epoch": 135.88, "learning_rate": 1.882200647249191e-06, "loss": 0.0715, "step": 349900 }, { "epoch": 135.89, "learning_rate": 1.8816828478964402e-06, "loss": 0.1279, "step": 349910 }, { "epoch": 135.89, "learning_rate": 1.8811650485436894e-06, "loss": 0.0352, "step": 349920 }, { "epoch": 135.9, "learning_rate": 1.8806472491909386e-06, "loss": 0.0564, "step": 349930 }, { "epoch": 135.9, "learning_rate": 1.8801294498381878e-06, "loss": 0.1019, "step": 349940 }, { "epoch": 135.9, "learning_rate": 1.879611650485437e-06, "loss": 0.0001, "step": 349950 }, { "epoch": 135.91, "learning_rate": 1.8790938511326862e-06, "loss": 0.0002, "step": 349960 }, { "epoch": 135.91, "learning_rate": 1.8785760517799354e-06, "loss": 0.0193, "step": 349970 }, { "epoch": 135.91, "learning_rate": 1.8780582524271846e-06, "loss": 0.0002, "step": 349980 }, { "epoch": 135.92, "learning_rate": 1.8775404530744338e-06, "loss": 0.0364, "step": 349990 }, { "epoch": 135.92, "learning_rate": 1.877022653721683e-06, "loss": 0.0558, "step": 350000 }, { "epoch": 135.93, "learning_rate": 1.8765048543689322e-06, "loss": 0.0086, "step": 350010 }, { "epoch": 135.93, "learning_rate": 1.8759870550161814e-06, "loss": 0.015, "step": 350020 }, { "epoch": 135.93, "learning_rate": 1.8754692556634306e-06, "loss": 0.0229, "step": 350030 }, { "epoch": 135.94, "learning_rate": 1.8749514563106798e-06, "loss": 0.1389, "step": 350040 }, { "epoch": 135.94, "learning_rate": 1.8744336569579287e-06, "loss": 0.0347, "step": 350050 }, { "epoch": 135.95, "learning_rate": 1.873915857605178e-06, "loss": 0.044, "step": 350060 }, { "epoch": 135.95, "learning_rate": 1.8733980582524271e-06, "loss": 0.0577, "step": 350070 }, { "epoch": 135.95, "learning_rate": 1.8728802588996763e-06, "loss": 0.0403, "step": 350080 }, { "epoch": 135.96, "learning_rate": 1.8723624595469255e-06, "loss": 0.0088, "step": 350090 }, { "epoch": 135.96, "learning_rate": 1.8718446601941751e-06, "loss": 0.0019, "step": 350100 }, { "epoch": 135.97, "learning_rate": 1.8713268608414243e-06, "loss": 0.0255, "step": 350110 }, { "epoch": 135.97, "learning_rate": 1.8708090614886735e-06, "loss": 0.0742, "step": 350120 }, { "epoch": 135.97, "learning_rate": 1.8702912621359225e-06, "loss": 0.0009, "step": 350130 }, { "epoch": 135.98, "learning_rate": 1.8697734627831717e-06, "loss": 0.0214, "step": 350140 }, { "epoch": 135.98, "learning_rate": 1.8692556634304209e-06, "loss": 0.0335, "step": 350150 }, { "epoch": 135.98, "learning_rate": 1.86873786407767e-06, "loss": 0.0029, "step": 350160 }, { "epoch": 135.99, "learning_rate": 1.8682200647249193e-06, "loss": 0.0541, "step": 350170 }, { "epoch": 135.99, "learning_rate": 1.8677022653721685e-06, "loss": 0.0318, "step": 350180 }, { "epoch": 136.0, "learning_rate": 1.8671844660194177e-06, "loss": 0.0468, "step": 350190 }, { "epoch": 136.0, "learning_rate": 1.8666666666666669e-06, "loss": 0.015, "step": 350200 }, { "epoch": 136.0, "eval_accuracy": 0.9529573590096286, "eval_loss": 0.3846341073513031, "eval_runtime": 8.1975, "eval_samples_per_second": 443.428, "eval_steps_per_second": 55.505, "step": 350200 }, { "epoch": 136.0, "learning_rate": 1.866148867313916e-06, "loss": 0.0001, "step": 350210 }, { "epoch": 136.01, "learning_rate": 1.8656310679611653e-06, "loss": 0.1259, "step": 350220 }, { "epoch": 136.01, "learning_rate": 1.8651132686084144e-06, "loss": 0.0715, "step": 350230 }, { "epoch": 136.02, "learning_rate": 1.8645954692556636e-06, "loss": 0.0381, "step": 350240 }, { "epoch": 136.02, "learning_rate": 1.8640776699029128e-06, "loss": 0.0181, "step": 350250 }, { "epoch": 136.02, "learning_rate": 1.863559870550162e-06, "loss": 0.0593, "step": 350260 }, { "epoch": 136.03, "learning_rate": 1.8630420711974112e-06, "loss": 0.0561, "step": 350270 }, { "epoch": 136.03, "learning_rate": 1.8625242718446604e-06, "loss": 0.0094, "step": 350280 }, { "epoch": 136.03, "learning_rate": 1.8620064724919096e-06, "loss": 0.0001, "step": 350290 }, { "epoch": 136.04, "learning_rate": 1.8614886731391588e-06, "loss": 0.0909, "step": 350300 }, { "epoch": 136.04, "learning_rate": 1.860970873786408e-06, "loss": 0.0155, "step": 350310 }, { "epoch": 136.05, "learning_rate": 1.8604530744336572e-06, "loss": 0.0398, "step": 350320 }, { "epoch": 136.05, "learning_rate": 1.8599352750809064e-06, "loss": 0.0733, "step": 350330 }, { "epoch": 136.05, "learning_rate": 1.8594174757281556e-06, "loss": 0.028, "step": 350340 }, { "epoch": 136.06, "learning_rate": 1.8588996763754048e-06, "loss": 0.1093, "step": 350350 }, { "epoch": 136.06, "learning_rate": 1.858381877022654e-06, "loss": 0.0181, "step": 350360 }, { "epoch": 136.07, "learning_rate": 1.857864077669903e-06, "loss": 0.0577, "step": 350370 }, { "epoch": 136.07, "learning_rate": 1.8573462783171522e-06, "loss": 0.0449, "step": 350380 }, { "epoch": 136.07, "learning_rate": 1.8568284789644013e-06, "loss": 0.0064, "step": 350390 }, { "epoch": 136.08, "learning_rate": 1.8563106796116505e-06, "loss": 0.0759, "step": 350400 }, { "epoch": 136.08, "learning_rate": 1.8557928802588997e-06, "loss": 0.0769, "step": 350410 }, { "epoch": 136.09, "learning_rate": 1.855275080906149e-06, "loss": 0.0001, "step": 350420 }, { "epoch": 136.09, "learning_rate": 1.8547572815533981e-06, "loss": 0.0614, "step": 350430 }, { "epoch": 136.09, "learning_rate": 1.8542394822006473e-06, "loss": 0.001, "step": 350440 }, { "epoch": 136.1, "learning_rate": 1.8537216828478965e-06, "loss": 0.0113, "step": 350450 }, { "epoch": 136.1, "learning_rate": 1.8532038834951457e-06, "loss": 0.0414, "step": 350460 }, { "epoch": 136.1, "learning_rate": 1.852686084142395e-06, "loss": 0.0427, "step": 350470 }, { "epoch": 136.11, "learning_rate": 1.852168284789644e-06, "loss": 0.0455, "step": 350480 }, { "epoch": 136.11, "learning_rate": 1.8516504854368933e-06, "loss": 0.0003, "step": 350490 }, { "epoch": 136.12, "learning_rate": 1.8511326860841425e-06, "loss": 0.017, "step": 350500 }, { "epoch": 136.12, "learning_rate": 1.8506148867313917e-06, "loss": 0.0003, "step": 350510 }, { "epoch": 136.12, "learning_rate": 1.8500970873786409e-06, "loss": 0.0008, "step": 350520 }, { "epoch": 136.13, "learning_rate": 1.84957928802589e-06, "loss": 0.0257, "step": 350530 }, { "epoch": 136.13, "learning_rate": 1.8490614886731393e-06, "loss": 0.0572, "step": 350540 }, { "epoch": 136.14, "learning_rate": 1.8485436893203885e-06, "loss": 0.0005, "step": 350550 }, { "epoch": 136.14, "learning_rate": 1.8480258899676377e-06, "loss": 0.0155, "step": 350560 }, { "epoch": 136.14, "learning_rate": 1.8475080906148869e-06, "loss": 0.0508, "step": 350570 }, { "epoch": 136.15, "learning_rate": 1.846990291262136e-06, "loss": 0.0002, "step": 350580 }, { "epoch": 136.15, "learning_rate": 1.8464724919093852e-06, "loss": 0.002, "step": 350590 }, { "epoch": 136.16, "learning_rate": 1.8459546925566344e-06, "loss": 0.0001, "step": 350600 }, { "epoch": 136.16, "learning_rate": 1.8454368932038836e-06, "loss": 0.0003, "step": 350610 }, { "epoch": 136.16, "learning_rate": 1.8449190938511326e-06, "loss": 0.038, "step": 350620 }, { "epoch": 136.17, "learning_rate": 1.8444012944983818e-06, "loss": 0.0411, "step": 350630 }, { "epoch": 136.17, "learning_rate": 1.843883495145631e-06, "loss": 0.0166, "step": 350640 }, { "epoch": 136.17, "learning_rate": 1.8433656957928802e-06, "loss": 0.0187, "step": 350650 }, { "epoch": 136.18, "learning_rate": 1.8428478964401298e-06, "loss": 0.0376, "step": 350660 }, { "epoch": 136.18, "learning_rate": 1.842330097087379e-06, "loss": 0.0758, "step": 350670 }, { "epoch": 136.19, "learning_rate": 1.8418122977346282e-06, "loss": 0.0797, "step": 350680 }, { "epoch": 136.19, "learning_rate": 1.8412944983818774e-06, "loss": 0.1447, "step": 350690 }, { "epoch": 136.19, "learning_rate": 1.8407766990291264e-06, "loss": 0.0611, "step": 350700 }, { "epoch": 136.2, "learning_rate": 1.8402588996763756e-06, "loss": 0.0143, "step": 350710 }, { "epoch": 136.2, "learning_rate": 1.8397411003236248e-06, "loss": 0.0616, "step": 350720 }, { "epoch": 136.21, "learning_rate": 1.839223300970874e-06, "loss": 0.0726, "step": 350730 }, { "epoch": 136.21, "learning_rate": 1.8387055016181232e-06, "loss": 0.0001, "step": 350740 }, { "epoch": 136.21, "learning_rate": 1.8381877022653724e-06, "loss": 0.0377, "step": 350750 }, { "epoch": 136.22, "learning_rate": 1.8376699029126215e-06, "loss": 0.018, "step": 350760 }, { "epoch": 136.22, "learning_rate": 1.8371521035598707e-06, "loss": 0.0002, "step": 350770 }, { "epoch": 136.23, "learning_rate": 1.83663430420712e-06, "loss": 0.0255, "step": 350780 }, { "epoch": 136.23, "learning_rate": 1.8361165048543691e-06, "loss": 0.0795, "step": 350790 }, { "epoch": 136.23, "learning_rate": 1.8355987055016183e-06, "loss": 0.009, "step": 350800 }, { "epoch": 136.24, "learning_rate": 1.8350809061488675e-06, "loss": 0.0007, "step": 350810 }, { "epoch": 136.24, "learning_rate": 1.8345631067961167e-06, "loss": 0.0243, "step": 350820 }, { "epoch": 136.24, "learning_rate": 1.834045307443366e-06, "loss": 0.0101, "step": 350830 }, { "epoch": 136.25, "learning_rate": 1.833527508090615e-06, "loss": 0.0054, "step": 350840 }, { "epoch": 136.25, "learning_rate": 1.8330097087378643e-06, "loss": 0.004, "step": 350850 }, { "epoch": 136.26, "learning_rate": 1.8324919093851135e-06, "loss": 0.0476, "step": 350860 }, { "epoch": 136.26, "learning_rate": 1.8319741100323627e-06, "loss": 0.0334, "step": 350870 }, { "epoch": 136.26, "learning_rate": 1.8314563106796119e-06, "loss": 0.0005, "step": 350880 }, { "epoch": 136.27, "learning_rate": 1.830938511326861e-06, "loss": 0.0062, "step": 350890 }, { "epoch": 136.27, "learning_rate": 1.8304207119741103e-06, "loss": 0.0001, "step": 350900 }, { "epoch": 136.28, "learning_rate": 1.8299029126213595e-06, "loss": 0.0229, "step": 350910 }, { "epoch": 136.28, "learning_rate": 1.8293851132686087e-06, "loss": 0.0454, "step": 350920 }, { "epoch": 136.28, "learning_rate": 1.8288673139158579e-06, "loss": 0.256, "step": 350930 }, { "epoch": 136.29, "learning_rate": 1.828349514563107e-06, "loss": 0.0413, "step": 350940 }, { "epoch": 136.29, "learning_rate": 1.827831715210356e-06, "loss": 0.0157, "step": 350950 }, { "epoch": 136.3, "learning_rate": 1.8273139158576052e-06, "loss": 0.0468, "step": 350960 }, { "epoch": 136.3, "learning_rate": 1.8267961165048544e-06, "loss": 0.0877, "step": 350970 }, { "epoch": 136.3, "learning_rate": 1.8262783171521036e-06, "loss": 0.1043, "step": 350980 }, { "epoch": 136.31, "learning_rate": 1.8257605177993528e-06, "loss": 0.0731, "step": 350990 }, { "epoch": 136.31, "learning_rate": 1.825242718446602e-06, "loss": 0.0005, "step": 351000 }, { "epoch": 136.31, "learning_rate": 1.8247249190938512e-06, "loss": 0.0174, "step": 351010 }, { "epoch": 136.32, "learning_rate": 1.8242071197411004e-06, "loss": 0.1207, "step": 351020 }, { "epoch": 136.32, "learning_rate": 1.8236893203883496e-06, "loss": 0.0281, "step": 351030 }, { "epoch": 136.33, "learning_rate": 1.8231715210355988e-06, "loss": 0.0994, "step": 351040 }, { "epoch": 136.33, "learning_rate": 1.822653721682848e-06, "loss": 0.0421, "step": 351050 }, { "epoch": 136.33, "learning_rate": 1.8221359223300972e-06, "loss": 0.0006, "step": 351060 }, { "epoch": 136.34, "learning_rate": 1.8216181229773464e-06, "loss": 0.0201, "step": 351070 }, { "epoch": 136.34, "learning_rate": 1.8211003236245956e-06, "loss": 0.0029, "step": 351080 }, { "epoch": 136.35, "learning_rate": 1.8205825242718448e-06, "loss": 0.0041, "step": 351090 }, { "epoch": 136.35, "learning_rate": 1.820064724919094e-06, "loss": 0.1635, "step": 351100 }, { "epoch": 136.35, "learning_rate": 1.8195469255663431e-06, "loss": 0.0198, "step": 351110 }, { "epoch": 136.36, "learning_rate": 1.8190291262135923e-06, "loss": 0.0089, "step": 351120 }, { "epoch": 136.36, "learning_rate": 1.8185113268608415e-06, "loss": 0.0094, "step": 351130 }, { "epoch": 136.37, "learning_rate": 1.8179935275080907e-06, "loss": 0.0002, "step": 351140 }, { "epoch": 136.37, "learning_rate": 1.81747572815534e-06, "loss": 0.0348, "step": 351150 }, { "epoch": 136.37, "learning_rate": 1.8169579288025891e-06, "loss": 0.0715, "step": 351160 }, { "epoch": 136.38, "learning_rate": 1.8164401294498383e-06, "loss": 0.0411, "step": 351170 }, { "epoch": 136.38, "learning_rate": 1.8159223300970875e-06, "loss": 0.0008, "step": 351180 }, { "epoch": 136.38, "learning_rate": 1.8154045307443365e-06, "loss": 0.0518, "step": 351190 }, { "epoch": 136.39, "learning_rate": 1.8148867313915857e-06, "loss": 0.1041, "step": 351200 }, { "epoch": 136.39, "learning_rate": 1.8143689320388349e-06, "loss": 0.0365, "step": 351210 }, { "epoch": 136.4, "learning_rate": 1.8138511326860845e-06, "loss": 0.0707, "step": 351220 }, { "epoch": 136.4, "learning_rate": 1.8133333333333337e-06, "loss": 0.0189, "step": 351230 }, { "epoch": 136.4, "learning_rate": 1.8128155339805829e-06, "loss": 0.0692, "step": 351240 }, { "epoch": 136.41, "learning_rate": 1.812297734627832e-06, "loss": 0.0313, "step": 351250 }, { "epoch": 136.41, "learning_rate": 1.8117799352750813e-06, "loss": 0.0679, "step": 351260 }, { "epoch": 136.42, "learning_rate": 1.8112621359223303e-06, "loss": 0.0001, "step": 351270 }, { "epoch": 136.42, "learning_rate": 1.8107443365695794e-06, "loss": 0.0214, "step": 351280 }, { "epoch": 136.42, "learning_rate": 1.8102265372168286e-06, "loss": 0.0094, "step": 351290 }, { "epoch": 136.43, "learning_rate": 1.8097087378640778e-06, "loss": 0.1306, "step": 351300 }, { "epoch": 136.43, "learning_rate": 1.809190938511327e-06, "loss": 0.0101, "step": 351310 }, { "epoch": 136.43, "learning_rate": 1.8086731391585762e-06, "loss": 0.0261, "step": 351320 }, { "epoch": 136.44, "learning_rate": 1.8081553398058254e-06, "loss": 0.0599, "step": 351330 }, { "epoch": 136.44, "learning_rate": 1.8076375404530746e-06, "loss": 0.0003, "step": 351340 }, { "epoch": 136.45, "learning_rate": 1.8071197411003238e-06, "loss": 0.0414, "step": 351350 }, { "epoch": 136.45, "learning_rate": 1.806601941747573e-06, "loss": 0.1387, "step": 351360 }, { "epoch": 136.45, "learning_rate": 1.8060841423948222e-06, "loss": 0.067, "step": 351370 }, { "epoch": 136.46, "learning_rate": 1.8055663430420714e-06, "loss": 0.0024, "step": 351380 }, { "epoch": 136.46, "learning_rate": 1.8050485436893206e-06, "loss": 0.0167, "step": 351390 }, { "epoch": 136.47, "learning_rate": 1.8045307443365698e-06, "loss": 0.0656, "step": 351400 }, { "epoch": 136.47, "learning_rate": 1.804012944983819e-06, "loss": 0.0157, "step": 351410 }, { "epoch": 136.47, "learning_rate": 1.8034951456310682e-06, "loss": 0.0313, "step": 351420 }, { "epoch": 136.48, "learning_rate": 1.8029773462783174e-06, "loss": 0.1502, "step": 351430 }, { "epoch": 136.48, "learning_rate": 1.8024595469255666e-06, "loss": 0.0565, "step": 351440 }, { "epoch": 136.49, "learning_rate": 1.8019417475728158e-06, "loss": 0.0114, "step": 351450 }, { "epoch": 136.49, "learning_rate": 1.801423948220065e-06, "loss": 0.0077, "step": 351460 }, { "epoch": 136.49, "learning_rate": 1.8009061488673141e-06, "loss": 0.0336, "step": 351470 }, { "epoch": 136.5, "learning_rate": 1.8003883495145633e-06, "loss": 0.0001, "step": 351480 }, { "epoch": 136.5, "learning_rate": 1.7998705501618125e-06, "loss": 0.1271, "step": 351490 }, { "epoch": 136.5, "learning_rate": 1.7993527508090617e-06, "loss": 0.0207, "step": 351500 }, { "epoch": 136.51, "learning_rate": 1.798834951456311e-06, "loss": 0.1185, "step": 351510 }, { "epoch": 136.51, "learning_rate": 1.79831715210356e-06, "loss": 0.0005, "step": 351520 }, { "epoch": 136.52, "learning_rate": 1.797799352750809e-06, "loss": 0.0506, "step": 351530 }, { "epoch": 136.52, "learning_rate": 1.7972815533980583e-06, "loss": 0.0149, "step": 351540 }, { "epoch": 136.52, "learning_rate": 1.7967637540453075e-06, "loss": 0.0865, "step": 351550 }, { "epoch": 136.53, "learning_rate": 1.7962459546925567e-06, "loss": 0.0014, "step": 351560 }, { "epoch": 136.53, "learning_rate": 1.7957281553398059e-06, "loss": 0.0012, "step": 351570 }, { "epoch": 136.54, "learning_rate": 1.795210355987055e-06, "loss": 0.0163, "step": 351580 }, { "epoch": 136.54, "learning_rate": 1.7946925566343043e-06, "loss": 0.0014, "step": 351590 }, { "epoch": 136.54, "learning_rate": 1.7941747572815535e-06, "loss": 0.0006, "step": 351600 }, { "epoch": 136.55, "learning_rate": 1.7936569579288027e-06, "loss": 0.001, "step": 351610 }, { "epoch": 136.55, "learning_rate": 1.7931391585760519e-06, "loss": 0.053, "step": 351620 }, { "epoch": 136.56, "learning_rate": 1.792621359223301e-06, "loss": 0.0802, "step": 351630 }, { "epoch": 136.56, "learning_rate": 1.7921035598705502e-06, "loss": 0.065, "step": 351640 }, { "epoch": 136.56, "learning_rate": 1.7915857605177994e-06, "loss": 0.0223, "step": 351650 }, { "epoch": 136.57, "learning_rate": 1.7910679611650486e-06, "loss": 0.0516, "step": 351660 }, { "epoch": 136.57, "learning_rate": 1.7905501618122978e-06, "loss": 0.0793, "step": 351670 }, { "epoch": 136.57, "learning_rate": 1.790032362459547e-06, "loss": 0.0048, "step": 351680 }, { "epoch": 136.58, "learning_rate": 1.7895145631067962e-06, "loss": 0.0522, "step": 351690 }, { "epoch": 136.58, "learning_rate": 1.7889967637540454e-06, "loss": 0.0392, "step": 351700 }, { "epoch": 136.59, "learning_rate": 1.7884789644012946e-06, "loss": 0.0169, "step": 351710 }, { "epoch": 136.59, "learning_rate": 1.7879611650485438e-06, "loss": 0.0023, "step": 351720 }, { "epoch": 136.59, "learning_rate": 1.787443365695793e-06, "loss": 0.1591, "step": 351730 }, { "epoch": 136.6, "learning_rate": 1.7869255663430422e-06, "loss": 0.0309, "step": 351740 }, { "epoch": 136.6, "learning_rate": 1.7864077669902914e-06, "loss": 0.0171, "step": 351750 }, { "epoch": 136.61, "learning_rate": 1.7858899676375404e-06, "loss": 0.078, "step": 351760 }, { "epoch": 136.61, "learning_rate": 1.7853721682847896e-06, "loss": 0.0004, "step": 351770 }, { "epoch": 136.61, "learning_rate": 1.7848543689320392e-06, "loss": 0.0631, "step": 351780 }, { "epoch": 136.62, "learning_rate": 1.7843365695792884e-06, "loss": 0.0341, "step": 351790 }, { "epoch": 136.62, "learning_rate": 1.7838187702265376e-06, "loss": 0.0214, "step": 351800 }, { "epoch": 136.63, "learning_rate": 1.7833009708737868e-06, "loss": 0.0612, "step": 351810 }, { "epoch": 136.63, "learning_rate": 1.782783171521036e-06, "loss": 0.0119, "step": 351820 }, { "epoch": 136.63, "learning_rate": 1.7822653721682851e-06, "loss": 0.0082, "step": 351830 }, { "epoch": 136.64, "learning_rate": 1.7817475728155341e-06, "loss": 0.0746, "step": 351840 }, { "epoch": 136.64, "learning_rate": 1.7812297734627833e-06, "loss": 0.0231, "step": 351850 }, { "epoch": 136.64, "learning_rate": 1.7807119741100325e-06, "loss": 0.0176, "step": 351860 }, { "epoch": 136.65, "learning_rate": 1.7801941747572817e-06, "loss": 0.0311, "step": 351870 }, { "epoch": 136.65, "learning_rate": 1.779676375404531e-06, "loss": 0.0011, "step": 351880 }, { "epoch": 136.66, "learning_rate": 1.77915857605178e-06, "loss": 0.0001, "step": 351890 }, { "epoch": 136.66, "learning_rate": 1.7786407766990293e-06, "loss": 0.0324, "step": 351900 }, { "epoch": 136.66, "learning_rate": 1.7781229773462785e-06, "loss": 0.0752, "step": 351910 }, { "epoch": 136.67, "learning_rate": 1.7776051779935277e-06, "loss": 0.0601, "step": 351920 }, { "epoch": 136.67, "learning_rate": 1.7770873786407769e-06, "loss": 0.0863, "step": 351930 }, { "epoch": 136.68, "learning_rate": 1.776569579288026e-06, "loss": 0.0935, "step": 351940 }, { "epoch": 136.68, "learning_rate": 1.7760517799352753e-06, "loss": 0.0537, "step": 351950 }, { "epoch": 136.68, "learning_rate": 1.7755339805825245e-06, "loss": 0.093, "step": 351960 }, { "epoch": 136.69, "learning_rate": 1.7750161812297737e-06, "loss": 0.0826, "step": 351970 }, { "epoch": 136.69, "learning_rate": 1.7744983818770229e-06, "loss": 0.0104, "step": 351980 }, { "epoch": 136.7, "learning_rate": 1.773980582524272e-06, "loss": 0.0718, "step": 351990 }, { "epoch": 136.7, "learning_rate": 1.7734627831715212e-06, "loss": 0.0507, "step": 352000 }, { "epoch": 136.7, "learning_rate": 1.7729449838187704e-06, "loss": 0.0005, "step": 352010 }, { "epoch": 136.71, "learning_rate": 1.7724271844660196e-06, "loss": 0.1083, "step": 352020 }, { "epoch": 136.71, "learning_rate": 1.7719093851132688e-06, "loss": 0.0169, "step": 352030 }, { "epoch": 136.71, "learning_rate": 1.771391585760518e-06, "loss": 0.0202, "step": 352040 }, { "epoch": 136.72, "learning_rate": 1.7708737864077672e-06, "loss": 0.0527, "step": 352050 }, { "epoch": 136.72, "learning_rate": 1.7703559870550164e-06, "loss": 0.0543, "step": 352060 }, { "epoch": 136.73, "learning_rate": 1.7698381877022656e-06, "loss": 0.0154, "step": 352070 }, { "epoch": 136.73, "learning_rate": 1.7693203883495148e-06, "loss": 0.0159, "step": 352080 }, { "epoch": 136.73, "learning_rate": 1.7688025889967638e-06, "loss": 0.0786, "step": 352090 }, { "epoch": 136.74, "learning_rate": 1.768284789644013e-06, "loss": 0.0104, "step": 352100 }, { "epoch": 136.74, "learning_rate": 1.7677669902912622e-06, "loss": 0.0662, "step": 352110 }, { "epoch": 136.75, "learning_rate": 1.7672491909385114e-06, "loss": 0.0548, "step": 352120 }, { "epoch": 136.75, "learning_rate": 1.7667313915857606e-06, "loss": 0.0133, "step": 352130 }, { "epoch": 136.75, "learning_rate": 1.7662135922330098e-06, "loss": 0.0104, "step": 352140 }, { "epoch": 136.76, "learning_rate": 1.765695792880259e-06, "loss": 0.0037, "step": 352150 }, { "epoch": 136.76, "learning_rate": 1.7651779935275081e-06, "loss": 0.085, "step": 352160 }, { "epoch": 136.77, "learning_rate": 1.7646601941747573e-06, "loss": 0.0001, "step": 352170 }, { "epoch": 136.77, "learning_rate": 1.7641423948220065e-06, "loss": 0.0739, "step": 352180 }, { "epoch": 136.77, "learning_rate": 1.7636245954692557e-06, "loss": 0.0589, "step": 352190 }, { "epoch": 136.78, "learning_rate": 1.763106796116505e-06, "loss": 0.0233, "step": 352200 }, { "epoch": 136.78, "learning_rate": 1.7625889967637541e-06, "loss": 0.0217, "step": 352210 }, { "epoch": 136.78, "learning_rate": 1.7620711974110033e-06, "loss": 0.0347, "step": 352220 }, { "epoch": 136.79, "learning_rate": 1.7615533980582525e-06, "loss": 0.05, "step": 352230 }, { "epoch": 136.79, "learning_rate": 1.7610355987055017e-06, "loss": 0.0313, "step": 352240 }, { "epoch": 136.8, "learning_rate": 1.760517799352751e-06, "loss": 0.0021, "step": 352250 }, { "epoch": 136.8, "learning_rate": 1.76e-06, "loss": 0.0716, "step": 352260 }, { "epoch": 136.8, "learning_rate": 1.7594822006472493e-06, "loss": 0.0652, "step": 352270 }, { "epoch": 136.81, "learning_rate": 1.7589644012944985e-06, "loss": 0.0455, "step": 352280 }, { "epoch": 136.81, "learning_rate": 1.7584466019417477e-06, "loss": 0.0334, "step": 352290 }, { "epoch": 136.82, "learning_rate": 1.7579288025889969e-06, "loss": 0.029, "step": 352300 }, { "epoch": 136.82, "learning_rate": 1.757411003236246e-06, "loss": 0.0005, "step": 352310 }, { "epoch": 136.82, "learning_rate": 1.7568932038834953e-06, "loss": 0.016, "step": 352320 }, { "epoch": 136.83, "learning_rate": 1.7563754045307442e-06, "loss": 0.04, "step": 352330 }, { "epoch": 136.83, "learning_rate": 1.7558576051779939e-06, "loss": 0.0359, "step": 352340 }, { "epoch": 136.83, "learning_rate": 1.755339805825243e-06, "loss": 0.1028, "step": 352350 }, { "epoch": 136.84, "learning_rate": 1.7548220064724922e-06, "loss": 0.0068, "step": 352360 }, { "epoch": 136.84, "learning_rate": 1.7543042071197414e-06, "loss": 0.017, "step": 352370 }, { "epoch": 136.85, "learning_rate": 1.7537864077669906e-06, "loss": 0.01, "step": 352380 }, { "epoch": 136.85, "learning_rate": 1.7532686084142398e-06, "loss": 0.0726, "step": 352390 }, { "epoch": 136.85, "learning_rate": 1.752750809061489e-06, "loss": 0.0878, "step": 352400 }, { "epoch": 136.86, "learning_rate": 1.752233009708738e-06, "loss": 0.0066, "step": 352410 }, { "epoch": 136.86, "learning_rate": 1.7517152103559872e-06, "loss": 0.0019, "step": 352420 }, { "epoch": 136.87, "learning_rate": 1.7511974110032364e-06, "loss": 0.0294, "step": 352430 }, { "epoch": 136.87, "learning_rate": 1.7506796116504856e-06, "loss": 0.089, "step": 352440 }, { "epoch": 136.87, "learning_rate": 1.7501618122977348e-06, "loss": 0.0341, "step": 352450 }, { "epoch": 136.88, "learning_rate": 1.749644012944984e-06, "loss": 0.0604, "step": 352460 }, { "epoch": 136.88, "learning_rate": 1.7491262135922332e-06, "loss": 0.0001, "step": 352470 }, { "epoch": 136.89, "learning_rate": 1.7486084142394824e-06, "loss": 0.0083, "step": 352480 }, { "epoch": 136.89, "learning_rate": 1.7480906148867316e-06, "loss": 0.1941, "step": 352490 }, { "epoch": 136.89, "learning_rate": 1.7475728155339808e-06, "loss": 0.0003, "step": 352500 }, { "epoch": 136.9, "learning_rate": 1.74705501618123e-06, "loss": 0.0507, "step": 352510 }, { "epoch": 136.9, "learning_rate": 1.7465372168284791e-06, "loss": 0.126, "step": 352520 }, { "epoch": 136.9, "learning_rate": 1.7460194174757283e-06, "loss": 0.1342, "step": 352530 }, { "epoch": 136.91, "learning_rate": 1.7455016181229775e-06, "loss": 0.0366, "step": 352540 }, { "epoch": 136.91, "learning_rate": 1.7449838187702267e-06, "loss": 0.0099, "step": 352550 }, { "epoch": 136.92, "learning_rate": 1.744466019417476e-06, "loss": 0.0387, "step": 352560 }, { "epoch": 136.92, "learning_rate": 1.7439482200647251e-06, "loss": 0.0822, "step": 352570 }, { "epoch": 136.92, "learning_rate": 1.7434304207119743e-06, "loss": 0.0804, "step": 352580 }, { "epoch": 136.93, "learning_rate": 1.7429126213592235e-06, "loss": 0.0865, "step": 352590 }, { "epoch": 136.93, "learning_rate": 1.7423948220064727e-06, "loss": 0.0401, "step": 352600 }, { "epoch": 136.94, "learning_rate": 1.741877022653722e-06, "loss": 0.0078, "step": 352610 }, { "epoch": 136.94, "learning_rate": 1.741359223300971e-06, "loss": 0.1411, "step": 352620 }, { "epoch": 136.94, "learning_rate": 1.7408414239482203e-06, "loss": 0.0197, "step": 352630 }, { "epoch": 136.95, "learning_rate": 1.7403236245954695e-06, "loss": 0.0096, "step": 352640 }, { "epoch": 136.95, "learning_rate": 1.7398058252427187e-06, "loss": 0.0853, "step": 352650 }, { "epoch": 136.96, "learning_rate": 1.7392880258899677e-06, "loss": 0.0235, "step": 352660 }, { "epoch": 136.96, "learning_rate": 1.7387702265372169e-06, "loss": 0.0772, "step": 352670 }, { "epoch": 136.96, "learning_rate": 1.738252427184466e-06, "loss": 0.0318, "step": 352680 }, { "epoch": 136.97, "learning_rate": 1.7377346278317152e-06, "loss": 0.0007, "step": 352690 }, { "epoch": 136.97, "learning_rate": 1.7372168284789644e-06, "loss": 0.0482, "step": 352700 }, { "epoch": 136.97, "learning_rate": 1.7366990291262136e-06, "loss": 0.0105, "step": 352710 }, { "epoch": 136.98, "learning_rate": 1.7361812297734628e-06, "loss": 0.0445, "step": 352720 }, { "epoch": 136.98, "learning_rate": 1.735663430420712e-06, "loss": 0.0354, "step": 352730 }, { "epoch": 136.99, "learning_rate": 1.7351456310679612e-06, "loss": 0.0185, "step": 352740 }, { "epoch": 136.99, "learning_rate": 1.7346278317152104e-06, "loss": 0.0873, "step": 352750 }, { "epoch": 136.99, "learning_rate": 1.7341100323624596e-06, "loss": 0.0061, "step": 352760 }, { "epoch": 137.0, "learning_rate": 1.7335922330097088e-06, "loss": 0.0307, "step": 352770 }, { "epoch": 137.0, "eval_accuracy": 0.9529573590096286, "eval_loss": 0.38503599166870117, "eval_runtime": 8.175, "eval_samples_per_second": 444.649, "eval_steps_per_second": 55.658, "step": 352775 }, { "epoch": 137.0, "learning_rate": 1.733074433656958e-06, "loss": 0.1058, "step": 352780 }, { "epoch": 137.01, "learning_rate": 1.7325566343042072e-06, "loss": 0.0063, "step": 352790 }, { "epoch": 137.01, "learning_rate": 1.7320388349514564e-06, "loss": 0.0502, "step": 352800 }, { "epoch": 137.01, "learning_rate": 1.7315210355987056e-06, "loss": 0.0092, "step": 352810 }, { "epoch": 137.02, "learning_rate": 1.7310032362459548e-06, "loss": 0.0067, "step": 352820 }, { "epoch": 137.02, "learning_rate": 1.730485436893204e-06, "loss": 0.0391, "step": 352830 }, { "epoch": 137.03, "learning_rate": 1.7299676375404532e-06, "loss": 0.0005, "step": 352840 }, { "epoch": 137.03, "learning_rate": 1.7294498381877024e-06, "loss": 0.0275, "step": 352850 }, { "epoch": 137.03, "learning_rate": 1.7289320388349515e-06, "loss": 0.0797, "step": 352860 }, { "epoch": 137.04, "learning_rate": 1.7284142394822007e-06, "loss": 0.0918, "step": 352870 }, { "epoch": 137.04, "learning_rate": 1.72789644012945e-06, "loss": 0.0828, "step": 352880 }, { "epoch": 137.04, "learning_rate": 1.7273786407766991e-06, "loss": 0.0777, "step": 352890 }, { "epoch": 137.05, "learning_rate": 1.7268608414239485e-06, "loss": 0.05, "step": 352900 }, { "epoch": 137.05, "learning_rate": 1.7263430420711977e-06, "loss": 0.0038, "step": 352910 }, { "epoch": 137.06, "learning_rate": 1.725825242718447e-06, "loss": 0.0389, "step": 352920 }, { "epoch": 137.06, "learning_rate": 1.7253074433656961e-06, "loss": 0.0149, "step": 352930 }, { "epoch": 137.06, "learning_rate": 1.7247896440129453e-06, "loss": 0.0168, "step": 352940 }, { "epoch": 137.07, "learning_rate": 1.7242718446601945e-06, "loss": 0.0909, "step": 352950 }, { "epoch": 137.07, "learning_rate": 1.7237540453074437e-06, "loss": 0.097, "step": 352960 }, { "epoch": 137.08, "learning_rate": 1.723236245954693e-06, "loss": 0.039, "step": 352970 }, { "epoch": 137.08, "learning_rate": 1.7227184466019419e-06, "loss": 0.1649, "step": 352980 }, { "epoch": 137.08, "learning_rate": 1.722200647249191e-06, "loss": 0.0298, "step": 352990 }, { "epoch": 137.09, "learning_rate": 1.7216828478964403e-06, "loss": 0.0667, "step": 353000 }, { "epoch": 137.09, "learning_rate": 1.7211650485436895e-06, "loss": 0.0188, "step": 353010 }, { "epoch": 137.1, "learning_rate": 1.7206472491909387e-06, "loss": 0.0002, "step": 353020 }, { "epoch": 137.1, "learning_rate": 1.7201294498381879e-06, "loss": 0.0942, "step": 353030 }, { "epoch": 137.1, "learning_rate": 1.719611650485437e-06, "loss": 0.0101, "step": 353040 }, { "epoch": 137.11, "learning_rate": 1.7190938511326862e-06, "loss": 0.0283, "step": 353050 }, { "epoch": 137.11, "learning_rate": 1.7185760517799354e-06, "loss": 0.0902, "step": 353060 }, { "epoch": 137.11, "learning_rate": 1.7180582524271846e-06, "loss": 0.0115, "step": 353070 }, { "epoch": 137.12, "learning_rate": 1.7175404530744338e-06, "loss": 0.0457, "step": 353080 }, { "epoch": 137.12, "learning_rate": 1.717022653721683e-06, "loss": 0.0333, "step": 353090 }, { "epoch": 137.13, "learning_rate": 1.7165048543689322e-06, "loss": 0.034, "step": 353100 }, { "epoch": 137.13, "learning_rate": 1.7159870550161814e-06, "loss": 0.0994, "step": 353110 }, { "epoch": 137.13, "learning_rate": 1.7154692556634306e-06, "loss": 0.1084, "step": 353120 }, { "epoch": 137.14, "learning_rate": 1.7149514563106798e-06, "loss": 0.1391, "step": 353130 }, { "epoch": 137.14, "learning_rate": 1.714433656957929e-06, "loss": 0.0931, "step": 353140 }, { "epoch": 137.15, "learning_rate": 1.7139158576051782e-06, "loss": 0.0367, "step": 353150 }, { "epoch": 137.15, "learning_rate": 1.7133980582524274e-06, "loss": 0.0001, "step": 353160 }, { "epoch": 137.15, "learning_rate": 1.7128802588996766e-06, "loss": 0.0005, "step": 353170 }, { "epoch": 137.16, "learning_rate": 1.7123624595469258e-06, "loss": 0.0247, "step": 353180 }, { "epoch": 137.16, "learning_rate": 1.711844660194175e-06, "loss": 0.0009, "step": 353190 }, { "epoch": 137.17, "learning_rate": 1.7113268608414242e-06, "loss": 0.1282, "step": 353200 }, { "epoch": 137.17, "learning_rate": 1.7108090614886734e-06, "loss": 0.0309, "step": 353210 }, { "epoch": 137.17, "learning_rate": 1.7102912621359226e-06, "loss": 0.0117, "step": 353220 }, { "epoch": 137.18, "learning_rate": 1.7097734627831715e-06, "loss": 0.0002, "step": 353230 }, { "epoch": 137.18, "learning_rate": 1.7092556634304207e-06, "loss": 0.0172, "step": 353240 }, { "epoch": 137.18, "learning_rate": 1.70873786407767e-06, "loss": 0.0001, "step": 353250 }, { "epoch": 137.19, "learning_rate": 1.7082200647249191e-06, "loss": 0.0299, "step": 353260 }, { "epoch": 137.19, "learning_rate": 1.7077022653721683e-06, "loss": 0.0001, "step": 353270 }, { "epoch": 137.2, "learning_rate": 1.7071844660194175e-06, "loss": 0.0318, "step": 353280 }, { "epoch": 137.2, "learning_rate": 1.7066666666666667e-06, "loss": 0.0582, "step": 353290 }, { "epoch": 137.2, "learning_rate": 1.706148867313916e-06, "loss": 0.0262, "step": 353300 }, { "epoch": 137.21, "learning_rate": 1.705631067961165e-06, "loss": 0.002, "step": 353310 }, { "epoch": 137.21, "learning_rate": 1.7051132686084143e-06, "loss": 0.0768, "step": 353320 }, { "epoch": 137.22, "learning_rate": 1.7045954692556635e-06, "loss": 0.0108, "step": 353330 }, { "epoch": 137.22, "learning_rate": 1.7040776699029127e-06, "loss": 0.0831, "step": 353340 }, { "epoch": 137.22, "learning_rate": 1.7035598705501619e-06, "loss": 0.0498, "step": 353350 }, { "epoch": 137.23, "learning_rate": 1.703042071197411e-06, "loss": 0.004, "step": 353360 }, { "epoch": 137.23, "learning_rate": 1.7025242718446603e-06, "loss": 0.0047, "step": 353370 }, { "epoch": 137.23, "learning_rate": 1.7020064724919095e-06, "loss": 0.0017, "step": 353380 }, { "epoch": 137.24, "learning_rate": 1.7014886731391586e-06, "loss": 0.0191, "step": 353390 }, { "epoch": 137.24, "learning_rate": 1.7009708737864078e-06, "loss": 0.0093, "step": 353400 }, { "epoch": 137.25, "learning_rate": 1.700453074433657e-06, "loss": 0.0607, "step": 353410 }, { "epoch": 137.25, "learning_rate": 1.6999352750809062e-06, "loss": 0.0103, "step": 353420 }, { "epoch": 137.25, "learning_rate": 1.6994174757281554e-06, "loss": 0.0485, "step": 353430 }, { "epoch": 137.26, "learning_rate": 1.6988996763754046e-06, "loss": 0.0124, "step": 353440 }, { "epoch": 137.26, "learning_rate": 1.6983818770226538e-06, "loss": 0.098, "step": 353450 }, { "epoch": 137.27, "learning_rate": 1.6978640776699032e-06, "loss": 0.0215, "step": 353460 }, { "epoch": 137.27, "learning_rate": 1.6973462783171524e-06, "loss": 0.0128, "step": 353470 }, { "epoch": 137.27, "learning_rate": 1.6968284789644016e-06, "loss": 0.0636, "step": 353480 }, { "epoch": 137.28, "learning_rate": 1.6963106796116508e-06, "loss": 0.001, "step": 353490 }, { "epoch": 137.28, "learning_rate": 1.6957928802589e-06, "loss": 0.0514, "step": 353500 }, { "epoch": 137.29, "learning_rate": 1.6952750809061492e-06, "loss": 0.0194, "step": 353510 }, { "epoch": 137.29, "learning_rate": 1.6947572815533984e-06, "loss": 0.0502, "step": 353520 }, { "epoch": 137.29, "learning_rate": 1.6942394822006476e-06, "loss": 0.0136, "step": 353530 }, { "epoch": 137.3, "learning_rate": 1.6937216828478968e-06, "loss": 0.0691, "step": 353540 }, { "epoch": 137.3, "learning_rate": 1.6932038834951458e-06, "loss": 0.0091, "step": 353550 }, { "epoch": 137.3, "learning_rate": 1.692686084142395e-06, "loss": 0.0219, "step": 353560 }, { "epoch": 137.31, "learning_rate": 1.6921682847896441e-06, "loss": 0.0059, "step": 353570 }, { "epoch": 137.31, "learning_rate": 1.6916504854368933e-06, "loss": 0.1721, "step": 353580 }, { "epoch": 137.32, "learning_rate": 1.6911326860841425e-06, "loss": 0.0335, "step": 353590 }, { "epoch": 137.32, "learning_rate": 1.6906148867313917e-06, "loss": 0.0772, "step": 353600 }, { "epoch": 137.32, "learning_rate": 1.690097087378641e-06, "loss": 0.0006, "step": 353610 }, { "epoch": 137.33, "learning_rate": 1.6895792880258901e-06, "loss": 0.0542, "step": 353620 }, { "epoch": 137.33, "learning_rate": 1.6890614886731393e-06, "loss": 0.0336, "step": 353630 }, { "epoch": 137.34, "learning_rate": 1.6885436893203885e-06, "loss": 0.042, "step": 353640 }, { "epoch": 137.34, "learning_rate": 1.6880258899676377e-06, "loss": 0.0472, "step": 353650 }, { "epoch": 137.34, "learning_rate": 1.687508090614887e-06, "loss": 0.0388, "step": 353660 }, { "epoch": 137.35, "learning_rate": 1.686990291262136e-06, "loss": 0.0002, "step": 353670 }, { "epoch": 137.35, "learning_rate": 1.6864724919093853e-06, "loss": 0.13, "step": 353680 }, { "epoch": 137.36, "learning_rate": 1.6859546925566345e-06, "loss": 0.0192, "step": 353690 }, { "epoch": 137.36, "learning_rate": 1.6854368932038837e-06, "loss": 0.033, "step": 353700 }, { "epoch": 137.36, "learning_rate": 1.6849190938511329e-06, "loss": 0.0911, "step": 353710 }, { "epoch": 137.37, "learning_rate": 1.684401294498382e-06, "loss": 0.0242, "step": 353720 }, { "epoch": 137.37, "learning_rate": 1.6838834951456313e-06, "loss": 0.0129, "step": 353730 }, { "epoch": 137.37, "learning_rate": 1.6833656957928805e-06, "loss": 0.0115, "step": 353740 }, { "epoch": 137.38, "learning_rate": 1.6828478964401297e-06, "loss": 0.0057, "step": 353750 }, { "epoch": 137.38, "learning_rate": 1.6823300970873788e-06, "loss": 0.1398, "step": 353760 }, { "epoch": 137.39, "learning_rate": 1.681812297734628e-06, "loss": 0.0177, "step": 353770 }, { "epoch": 137.39, "learning_rate": 1.6812944983818772e-06, "loss": 0.0288, "step": 353780 }, { "epoch": 137.39, "learning_rate": 1.6807766990291264e-06, "loss": 0.0125, "step": 353790 }, { "epoch": 137.4, "learning_rate": 1.6802588996763754e-06, "loss": 0.092, "step": 353800 }, { "epoch": 137.4, "learning_rate": 1.6797411003236246e-06, "loss": 0.0275, "step": 353810 }, { "epoch": 137.41, "learning_rate": 1.6792233009708738e-06, "loss": 0.0358, "step": 353820 }, { "epoch": 137.41, "learning_rate": 1.678705501618123e-06, "loss": 0.0021, "step": 353830 }, { "epoch": 137.41, "learning_rate": 1.6781877022653722e-06, "loss": 0.0095, "step": 353840 }, { "epoch": 137.42, "learning_rate": 1.6776699029126214e-06, "loss": 0.0248, "step": 353850 }, { "epoch": 137.42, "learning_rate": 1.6771521035598706e-06, "loss": 0.0747, "step": 353860 }, { "epoch": 137.43, "learning_rate": 1.6766343042071198e-06, "loss": 0.0087, "step": 353870 }, { "epoch": 137.43, "learning_rate": 1.676116504854369e-06, "loss": 0.0002, "step": 353880 }, { "epoch": 137.43, "learning_rate": 1.6755987055016182e-06, "loss": 0.0629, "step": 353890 }, { "epoch": 137.44, "learning_rate": 1.6750809061488674e-06, "loss": 0.0688, "step": 353900 }, { "epoch": 137.44, "learning_rate": 1.6745631067961166e-06, "loss": 0.0109, "step": 353910 }, { "epoch": 137.44, "learning_rate": 1.6740453074433657e-06, "loss": 0.01, "step": 353920 }, { "epoch": 137.45, "learning_rate": 1.673527508090615e-06, "loss": 0.0369, "step": 353930 }, { "epoch": 137.45, "learning_rate": 1.6730097087378641e-06, "loss": 0.1657, "step": 353940 }, { "epoch": 137.46, "learning_rate": 1.6724919093851133e-06, "loss": 0.0125, "step": 353950 }, { "epoch": 137.46, "learning_rate": 1.6719741100323625e-06, "loss": 0.0055, "step": 353960 }, { "epoch": 137.46, "learning_rate": 1.6714563106796117e-06, "loss": 0.0392, "step": 353970 }, { "epoch": 137.47, "learning_rate": 1.670938511326861e-06, "loss": 0.1075, "step": 353980 }, { "epoch": 137.47, "learning_rate": 1.6704207119741101e-06, "loss": 0.0123, "step": 353990 }, { "epoch": 137.48, "learning_rate": 1.6699029126213593e-06, "loss": 0.0689, "step": 354000 }, { "epoch": 137.48, "learning_rate": 1.6693851132686085e-06, "loss": 0.1215, "step": 354010 }, { "epoch": 137.48, "learning_rate": 1.668867313915858e-06, "loss": 0.0221, "step": 354020 }, { "epoch": 137.49, "learning_rate": 1.668349514563107e-06, "loss": 0.1155, "step": 354030 }, { "epoch": 137.49, "learning_rate": 1.6678317152103563e-06, "loss": 0.0557, "step": 354040 }, { "epoch": 137.5, "learning_rate": 1.6673139158576055e-06, "loss": 0.0004, "step": 354050 }, { "epoch": 137.5, "learning_rate": 1.6667961165048547e-06, "loss": 0.0191, "step": 354060 }, { "epoch": 137.5, "learning_rate": 1.6662783171521039e-06, "loss": 0.0504, "step": 354070 }, { "epoch": 137.51, "learning_rate": 1.665760517799353e-06, "loss": 0.1418, "step": 354080 }, { "epoch": 137.51, "learning_rate": 1.6652427184466023e-06, "loss": 0.0803, "step": 354090 }, { "epoch": 137.51, "learning_rate": 1.6647249190938515e-06, "loss": 0.0495, "step": 354100 }, { "epoch": 137.52, "learning_rate": 1.6642071197411007e-06, "loss": 0.0628, "step": 354110 }, { "epoch": 137.52, "learning_rate": 1.6636893203883496e-06, "loss": 0.0497, "step": 354120 }, { "epoch": 137.53, "learning_rate": 1.6631715210355988e-06, "loss": 0.0378, "step": 354130 }, { "epoch": 137.53, "learning_rate": 1.662653721682848e-06, "loss": 0.0971, "step": 354140 }, { "epoch": 137.53, "learning_rate": 1.6621359223300972e-06, "loss": 0.0151, "step": 354150 }, { "epoch": 137.54, "learning_rate": 1.6616181229773464e-06, "loss": 0.0343, "step": 354160 }, { "epoch": 137.54, "learning_rate": 1.6611003236245956e-06, "loss": 0.0244, "step": 354170 }, { "epoch": 137.55, "learning_rate": 1.6605825242718448e-06, "loss": 0.0514, "step": 354180 }, { "epoch": 137.55, "learning_rate": 1.660064724919094e-06, "loss": 0.0466, "step": 354190 }, { "epoch": 137.55, "learning_rate": 1.6595469255663432e-06, "loss": 0.0084, "step": 354200 }, { "epoch": 137.56, "learning_rate": 1.6590291262135924e-06, "loss": 0.0283, "step": 354210 }, { "epoch": 137.56, "learning_rate": 1.6585113268608416e-06, "loss": 0.1091, "step": 354220 }, { "epoch": 137.57, "learning_rate": 1.6579935275080908e-06, "loss": 0.0704, "step": 354230 }, { "epoch": 137.57, "learning_rate": 1.65747572815534e-06, "loss": 0.0004, "step": 354240 }, { "epoch": 137.57, "learning_rate": 1.6569579288025892e-06, "loss": 0.0138, "step": 354250 }, { "epoch": 137.58, "learning_rate": 1.6564401294498384e-06, "loss": 0.0172, "step": 354260 }, { "epoch": 137.58, "learning_rate": 1.6559223300970876e-06, "loss": 0.0151, "step": 354270 }, { "epoch": 137.58, "learning_rate": 1.6554045307443367e-06, "loss": 0.0616, "step": 354280 }, { "epoch": 137.59, "learning_rate": 1.654886731391586e-06, "loss": 0.0286, "step": 354290 }, { "epoch": 137.59, "learning_rate": 1.6543689320388351e-06, "loss": 0.0741, "step": 354300 }, { "epoch": 137.6, "learning_rate": 1.6538511326860843e-06, "loss": 0.0235, "step": 354310 }, { "epoch": 137.6, "learning_rate": 1.6533333333333335e-06, "loss": 0.0825, "step": 354320 }, { "epoch": 137.6, "learning_rate": 1.6528155339805827e-06, "loss": 0.0155, "step": 354330 }, { "epoch": 137.61, "learning_rate": 1.652297734627832e-06, "loss": 0.0007, "step": 354340 }, { "epoch": 137.61, "learning_rate": 1.6517799352750811e-06, "loss": 0.0134, "step": 354350 }, { "epoch": 137.62, "learning_rate": 1.6512621359223303e-06, "loss": 0.1164, "step": 354360 }, { "epoch": 137.62, "learning_rate": 1.6507443365695793e-06, "loss": 0.0002, "step": 354370 }, { "epoch": 137.62, "learning_rate": 1.6502265372168285e-06, "loss": 0.0301, "step": 354380 }, { "epoch": 137.63, "learning_rate": 1.6497087378640777e-06, "loss": 0.0452, "step": 354390 }, { "epoch": 137.63, "learning_rate": 1.6491909385113269e-06, "loss": 0.0095, "step": 354400 }, { "epoch": 137.63, "learning_rate": 1.648673139158576e-06, "loss": 0.0241, "step": 354410 }, { "epoch": 137.64, "learning_rate": 1.6481553398058253e-06, "loss": 0.0331, "step": 354420 }, { "epoch": 137.64, "learning_rate": 1.6476375404530745e-06, "loss": 0.0587, "step": 354430 }, { "epoch": 137.65, "learning_rate": 1.6471197411003236e-06, "loss": 0.0108, "step": 354440 }, { "epoch": 137.65, "learning_rate": 1.6466019417475728e-06, "loss": 0.0483, "step": 354450 }, { "epoch": 137.65, "learning_rate": 1.646084142394822e-06, "loss": 0.0013, "step": 354460 }, { "epoch": 137.66, "learning_rate": 1.6455663430420712e-06, "loss": 0.0146, "step": 354470 }, { "epoch": 137.66, "learning_rate": 1.6450485436893204e-06, "loss": 0.0351, "step": 354480 }, { "epoch": 137.67, "learning_rate": 1.6445307443365696e-06, "loss": 0.1314, "step": 354490 }, { "epoch": 137.67, "learning_rate": 1.6440129449838188e-06, "loss": 0.1066, "step": 354500 }, { "epoch": 137.67, "learning_rate": 1.643495145631068e-06, "loss": 0.0083, "step": 354510 }, { "epoch": 137.68, "learning_rate": 1.6429773462783172e-06, "loss": 0.0001, "step": 354520 }, { "epoch": 137.68, "learning_rate": 1.6424595469255664e-06, "loss": 0.0265, "step": 354530 }, { "epoch": 137.69, "learning_rate": 1.6419417475728156e-06, "loss": 0.0188, "step": 354540 }, { "epoch": 137.69, "learning_rate": 1.6414239482200648e-06, "loss": 0.0446, "step": 354550 }, { "epoch": 137.69, "learning_rate": 1.640906148867314e-06, "loss": 0.0003, "step": 354560 }, { "epoch": 137.7, "learning_rate": 1.6403883495145632e-06, "loss": 0.2866, "step": 354570 }, { "epoch": 137.7, "learning_rate": 1.6398705501618126e-06, "loss": 0.0371, "step": 354580 }, { "epoch": 137.7, "learning_rate": 1.6393527508090618e-06, "loss": 0.0723, "step": 354590 }, { "epoch": 137.71, "learning_rate": 1.638834951456311e-06, "loss": 0.0239, "step": 354600 }, { "epoch": 137.71, "learning_rate": 1.6383171521035602e-06, "loss": 0.025, "step": 354610 }, { "epoch": 137.72, "learning_rate": 1.6377993527508094e-06, "loss": 0.0934, "step": 354620 }, { "epoch": 137.72, "learning_rate": 1.6372815533980586e-06, "loss": 0.0503, "step": 354630 }, { "epoch": 137.72, "learning_rate": 1.6367637540453078e-06, "loss": 0.0259, "step": 354640 }, { "epoch": 137.73, "learning_rate": 1.636245954692557e-06, "loss": 0.1587, "step": 354650 }, { "epoch": 137.73, "learning_rate": 1.6357281553398061e-06, "loss": 0.004, "step": 354660 }, { "epoch": 137.74, "learning_rate": 1.6352103559870553e-06, "loss": 0.0014, "step": 354670 }, { "epoch": 137.74, "learning_rate": 1.6346925566343045e-06, "loss": 0.0634, "step": 354680 }, { "epoch": 137.74, "learning_rate": 1.6341747572815535e-06, "loss": 0.0014, "step": 354690 }, { "epoch": 137.75, "learning_rate": 1.6336569579288027e-06, "loss": 0.026, "step": 354700 }, { "epoch": 137.75, "learning_rate": 1.633139158576052e-06, "loss": 0.0138, "step": 354710 }, { "epoch": 137.76, "learning_rate": 1.632621359223301e-06, "loss": 0.1316, "step": 354720 }, { "epoch": 137.76, "learning_rate": 1.6321035598705503e-06, "loss": 0.0108, "step": 354730 }, { "epoch": 137.76, "learning_rate": 1.6315857605177995e-06, "loss": 0.0275, "step": 354740 }, { "epoch": 137.77, "learning_rate": 1.6310679611650487e-06, "loss": 0.0057, "step": 354750 }, { "epoch": 137.77, "learning_rate": 1.6305501618122979e-06, "loss": 0.0037, "step": 354760 }, { "epoch": 137.77, "learning_rate": 1.630032362459547e-06, "loss": 0.0002, "step": 354770 }, { "epoch": 137.78, "learning_rate": 1.6295145631067963e-06, "loss": 0.0098, "step": 354780 }, { "epoch": 137.78, "learning_rate": 1.6289967637540455e-06, "loss": 0.0882, "step": 354790 }, { "epoch": 137.79, "learning_rate": 1.6284789644012947e-06, "loss": 0.0617, "step": 354800 }, { "epoch": 137.79, "learning_rate": 1.6279611650485438e-06, "loss": 0.001, "step": 354810 }, { "epoch": 137.79, "learning_rate": 1.627443365695793e-06, "loss": 0.0305, "step": 354820 }, { "epoch": 137.8, "learning_rate": 1.6269255663430422e-06, "loss": 0.0301, "step": 354830 }, { "epoch": 137.8, "learning_rate": 1.6264077669902914e-06, "loss": 0.0246, "step": 354840 }, { "epoch": 137.81, "learning_rate": 1.6258899676375406e-06, "loss": 0.0212, "step": 354850 }, { "epoch": 137.81, "learning_rate": 1.6253721682847898e-06, "loss": 0.1337, "step": 354860 }, { "epoch": 137.81, "learning_rate": 1.624854368932039e-06, "loss": 0.0478, "step": 354870 }, { "epoch": 137.82, "learning_rate": 1.6243365695792882e-06, "loss": 0.0179, "step": 354880 }, { "epoch": 137.82, "learning_rate": 1.6238187702265374e-06, "loss": 0.0138, "step": 354890 }, { "epoch": 137.83, "learning_rate": 1.6233009708737866e-06, "loss": 0.0981, "step": 354900 }, { "epoch": 137.83, "learning_rate": 1.6227831715210358e-06, "loss": 0.0179, "step": 354910 }, { "epoch": 137.83, "learning_rate": 1.622265372168285e-06, "loss": 0.2243, "step": 354920 }, { "epoch": 137.84, "learning_rate": 1.6217475728155342e-06, "loss": 0.0331, "step": 354930 }, { "epoch": 137.84, "learning_rate": 1.6212297734627832e-06, "loss": 0.0623, "step": 354940 }, { "epoch": 137.84, "learning_rate": 1.6207119741100324e-06, "loss": 0.1719, "step": 354950 }, { "epoch": 137.85, "learning_rate": 1.6201941747572816e-06, "loss": 0.1306, "step": 354960 }, { "epoch": 137.85, "learning_rate": 1.6196763754045307e-06, "loss": 0.1103, "step": 354970 }, { "epoch": 137.86, "learning_rate": 1.61915857605178e-06, "loss": 0.0002, "step": 354980 }, { "epoch": 137.86, "learning_rate": 1.6186407766990291e-06, "loss": 0.1051, "step": 354990 }, { "epoch": 137.86, "learning_rate": 1.6181229773462783e-06, "loss": 0.0796, "step": 355000 }, { "epoch": 137.87, "learning_rate": 1.6176051779935275e-06, "loss": 0.0439, "step": 355010 }, { "epoch": 137.87, "learning_rate": 1.6170873786407767e-06, "loss": 0.0223, "step": 355020 }, { "epoch": 137.88, "learning_rate": 1.616569579288026e-06, "loss": 0.0347, "step": 355030 }, { "epoch": 137.88, "learning_rate": 1.6160517799352751e-06, "loss": 0.0113, "step": 355040 }, { "epoch": 137.88, "learning_rate": 1.6155339805825243e-06, "loss": 0.1329, "step": 355050 }, { "epoch": 137.89, "learning_rate": 1.6150161812297735e-06, "loss": 0.0516, "step": 355060 }, { "epoch": 137.89, "learning_rate": 1.6144983818770227e-06, "loss": 0.0176, "step": 355070 }, { "epoch": 137.9, "learning_rate": 1.6139805825242719e-06, "loss": 0.0303, "step": 355080 }, { "epoch": 137.9, "learning_rate": 1.613462783171521e-06, "loss": 0.0098, "step": 355090 }, { "epoch": 137.9, "learning_rate": 1.6129449838187703e-06, "loss": 0.0663, "step": 355100 }, { "epoch": 137.91, "learning_rate": 1.6124271844660195e-06, "loss": 0.0049, "step": 355110 }, { "epoch": 137.91, "learning_rate": 1.6119093851132687e-06, "loss": 0.0455, "step": 355120 }, { "epoch": 137.91, "learning_rate": 1.6113915857605179e-06, "loss": 0.1173, "step": 355130 }, { "epoch": 137.92, "learning_rate": 1.6108737864077673e-06, "loss": 0.0105, "step": 355140 }, { "epoch": 137.92, "learning_rate": 1.6103559870550165e-06, "loss": 0.0097, "step": 355150 }, { "epoch": 137.93, "learning_rate": 1.6098381877022657e-06, "loss": 0.0256, "step": 355160 }, { "epoch": 137.93, "learning_rate": 1.6093203883495148e-06, "loss": 0.0287, "step": 355170 }, { "epoch": 137.93, "learning_rate": 1.608802588996764e-06, "loss": 0.0303, "step": 355180 }, { "epoch": 137.94, "learning_rate": 1.6082847896440132e-06, "loss": 0.0638, "step": 355190 }, { "epoch": 137.94, "learning_rate": 1.6077669902912624e-06, "loss": 0.2044, "step": 355200 }, { "epoch": 137.95, "learning_rate": 1.6072491909385116e-06, "loss": 0.0576, "step": 355210 }, { "epoch": 137.95, "learning_rate": 1.6067313915857608e-06, "loss": 0.0651, "step": 355220 }, { "epoch": 137.95, "learning_rate": 1.60621359223301e-06, "loss": 0.0337, "step": 355230 }, { "epoch": 137.96, "learning_rate": 1.6056957928802592e-06, "loss": 0.0851, "step": 355240 }, { "epoch": 137.96, "learning_rate": 1.6051779935275084e-06, "loss": 0.0328, "step": 355250 }, { "epoch": 137.97, "learning_rate": 1.6046601941747574e-06, "loss": 0.0259, "step": 355260 }, { "epoch": 137.97, "learning_rate": 1.6041423948220066e-06, "loss": 0.0285, "step": 355270 }, { "epoch": 137.97, "learning_rate": 1.6036245954692558e-06, "loss": 0.0159, "step": 355280 }, { "epoch": 137.98, "learning_rate": 1.603106796116505e-06, "loss": 0.0013, "step": 355290 }, { "epoch": 137.98, "learning_rate": 1.6025889967637542e-06, "loss": 0.0983, "step": 355300 }, { "epoch": 137.98, "learning_rate": 1.6020711974110034e-06, "loss": 0.0032, "step": 355310 }, { "epoch": 137.99, "learning_rate": 1.6015533980582526e-06, "loss": 0.0138, "step": 355320 }, { "epoch": 137.99, "learning_rate": 1.6010355987055018e-06, "loss": 0.026, "step": 355330 }, { "epoch": 138.0, "learning_rate": 1.600517799352751e-06, "loss": 0.0337, "step": 355340 }, { "epoch": 138.0, "learning_rate": 1.6000000000000001e-06, "loss": 0.0081, "step": 355350 }, { "epoch": 138.0, "eval_accuracy": 0.953232462173315, "eval_loss": 0.3852311968803406, "eval_runtime": 8.1731, "eval_samples_per_second": 444.754, "eval_steps_per_second": 55.671, "step": 355350 }, { "epoch": 138.0, "learning_rate": 1.5994822006472493e-06, "loss": 0.0364, "step": 355360 }, { "epoch": 138.01, "learning_rate": 1.5989644012944985e-06, "loss": 0.016, "step": 355370 }, { "epoch": 138.01, "learning_rate": 1.5984466019417477e-06, "loss": 0.0079, "step": 355380 }, { "epoch": 138.02, "learning_rate": 1.597928802588997e-06, "loss": 0.0556, "step": 355390 }, { "epoch": 138.02, "learning_rate": 1.5974110032362461e-06, "loss": 0.01, "step": 355400 }, { "epoch": 138.02, "learning_rate": 1.5968932038834953e-06, "loss": 0.0161, "step": 355410 }, { "epoch": 138.03, "learning_rate": 1.5963754045307445e-06, "loss": 0.0109, "step": 355420 }, { "epoch": 138.03, "learning_rate": 1.5958576051779937e-06, "loss": 0.0532, "step": 355430 }, { "epoch": 138.03, "learning_rate": 1.5953398058252429e-06, "loss": 0.0353, "step": 355440 }, { "epoch": 138.04, "learning_rate": 1.594822006472492e-06, "loss": 0.1191, "step": 355450 }, { "epoch": 138.04, "learning_rate": 1.5943042071197413e-06, "loss": 0.0206, "step": 355460 }, { "epoch": 138.05, "learning_rate": 1.5937864077669905e-06, "loss": 0.0009, "step": 355470 }, { "epoch": 138.05, "learning_rate": 1.5932686084142397e-06, "loss": 0.0866, "step": 355480 }, { "epoch": 138.05, "learning_rate": 1.5927508090614889e-06, "loss": 0.002, "step": 355490 }, { "epoch": 138.06, "learning_rate": 1.592233009708738e-06, "loss": 0.0559, "step": 355500 }, { "epoch": 138.06, "learning_rate": 1.591715210355987e-06, "loss": 0.0689, "step": 355510 }, { "epoch": 138.07, "learning_rate": 1.5911974110032362e-06, "loss": 0.0411, "step": 355520 }, { "epoch": 138.07, "learning_rate": 1.5906796116504854e-06, "loss": 0.0352, "step": 355530 }, { "epoch": 138.07, "learning_rate": 1.5901618122977346e-06, "loss": 0.0901, "step": 355540 }, { "epoch": 138.08, "learning_rate": 1.5896440129449838e-06, "loss": 0.0441, "step": 355550 }, { "epoch": 138.08, "learning_rate": 1.589126213592233e-06, "loss": 0.121, "step": 355560 }, { "epoch": 138.09, "learning_rate": 1.5886084142394822e-06, "loss": 0.0689, "step": 355570 }, { "epoch": 138.09, "learning_rate": 1.5880906148867314e-06, "loss": 0.1046, "step": 355580 }, { "epoch": 138.09, "learning_rate": 1.5875728155339806e-06, "loss": 0.0276, "step": 355590 }, { "epoch": 138.1, "learning_rate": 1.5870550161812298e-06, "loss": 0.0586, "step": 355600 }, { "epoch": 138.1, "learning_rate": 1.586537216828479e-06, "loss": 0.0807, "step": 355610 }, { "epoch": 138.1, "learning_rate": 1.5860194174757282e-06, "loss": 0.0011, "step": 355620 }, { "epoch": 138.11, "learning_rate": 1.5855016181229774e-06, "loss": 0.0138, "step": 355630 }, { "epoch": 138.11, "learning_rate": 1.5849838187702266e-06, "loss": 0.026, "step": 355640 }, { "epoch": 138.12, "learning_rate": 1.5844660194174758e-06, "loss": 0.0748, "step": 355650 }, { "epoch": 138.12, "learning_rate": 1.583948220064725e-06, "loss": 0.001, "step": 355660 }, { "epoch": 138.12, "learning_rate": 1.5834304207119742e-06, "loss": 0.1211, "step": 355670 }, { "epoch": 138.13, "learning_rate": 1.5829126213592233e-06, "loss": 0.0131, "step": 355680 }, { "epoch": 138.13, "learning_rate": 1.5823948220064725e-06, "loss": 0.0098, "step": 355690 }, { "epoch": 138.14, "learning_rate": 1.581877022653722e-06, "loss": 0.0399, "step": 355700 }, { "epoch": 138.14, "learning_rate": 1.5813592233009711e-06, "loss": 0.0128, "step": 355710 }, { "epoch": 138.14, "learning_rate": 1.5808414239482203e-06, "loss": 0.0454, "step": 355720 }, { "epoch": 138.15, "learning_rate": 1.5803236245954695e-06, "loss": 0.0177, "step": 355730 }, { "epoch": 138.15, "learning_rate": 1.5798058252427187e-06, "loss": 0.0003, "step": 355740 }, { "epoch": 138.16, "learning_rate": 1.579288025889968e-06, "loss": 0.0207, "step": 355750 }, { "epoch": 138.16, "learning_rate": 1.5787702265372171e-06, "loss": 0.0628, "step": 355760 }, { "epoch": 138.16, "learning_rate": 1.5782524271844663e-06, "loss": 0.0442, "step": 355770 }, { "epoch": 138.17, "learning_rate": 1.5777346278317155e-06, "loss": 0.1086, "step": 355780 }, { "epoch": 138.17, "learning_rate": 1.5772168284789647e-06, "loss": 0.0061, "step": 355790 }, { "epoch": 138.17, "learning_rate": 1.5766990291262139e-06, "loss": 0.0019, "step": 355800 }, { "epoch": 138.18, "learning_rate": 1.576181229773463e-06, "loss": 0.0834, "step": 355810 }, { "epoch": 138.18, "learning_rate": 1.5756634304207123e-06, "loss": 0.0473, "step": 355820 }, { "epoch": 138.19, "learning_rate": 1.5751456310679613e-06, "loss": 0.0174, "step": 355830 }, { "epoch": 138.19, "learning_rate": 1.5746278317152105e-06, "loss": 0.0478, "step": 355840 }, { "epoch": 138.19, "learning_rate": 1.5741100323624597e-06, "loss": 0.0028, "step": 355850 }, { "epoch": 138.2, "learning_rate": 1.5735922330097088e-06, "loss": 0.0296, "step": 355860 }, { "epoch": 138.2, "learning_rate": 1.573074433656958e-06, "loss": 0.0004, "step": 355870 }, { "epoch": 138.21, "learning_rate": 1.5725566343042072e-06, "loss": 0.0013, "step": 355880 }, { "epoch": 138.21, "learning_rate": 1.5720388349514564e-06, "loss": 0.0433, "step": 355890 }, { "epoch": 138.21, "learning_rate": 1.5715210355987056e-06, "loss": 0.0148, "step": 355900 }, { "epoch": 138.22, "learning_rate": 1.5710032362459548e-06, "loss": 0.0256, "step": 355910 }, { "epoch": 138.22, "learning_rate": 1.570485436893204e-06, "loss": 0.0001, "step": 355920 }, { "epoch": 138.23, "learning_rate": 1.5699676375404532e-06, "loss": 0.0096, "step": 355930 }, { "epoch": 138.23, "learning_rate": 1.5694498381877024e-06, "loss": 0.0485, "step": 355940 }, { "epoch": 138.23, "learning_rate": 1.5689320388349516e-06, "loss": 0.066, "step": 355950 }, { "epoch": 138.24, "learning_rate": 1.5684142394822008e-06, "loss": 0.0216, "step": 355960 }, { "epoch": 138.24, "learning_rate": 1.56789644012945e-06, "loss": 0.0763, "step": 355970 }, { "epoch": 138.24, "learning_rate": 1.5673786407766992e-06, "loss": 0.0011, "step": 355980 }, { "epoch": 138.25, "learning_rate": 1.5668608414239484e-06, "loss": 0.0316, "step": 355990 }, { "epoch": 138.25, "learning_rate": 1.5663430420711976e-06, "loss": 0.0486, "step": 356000 }, { "epoch": 138.26, "learning_rate": 1.5658252427184468e-06, "loss": 0.0001, "step": 356010 }, { "epoch": 138.26, "learning_rate": 1.565307443365696e-06, "loss": 0.0494, "step": 356020 }, { "epoch": 138.26, "learning_rate": 1.5647896440129452e-06, "loss": 0.0106, "step": 356030 }, { "epoch": 138.27, "learning_rate": 1.5642718446601943e-06, "loss": 0.0001, "step": 356040 }, { "epoch": 138.27, "learning_rate": 1.5637540453074435e-06, "loss": 0.1834, "step": 356050 }, { "epoch": 138.28, "learning_rate": 1.5632362459546927e-06, "loss": 0.0006, "step": 356060 }, { "epoch": 138.28, "learning_rate": 1.562718446601942e-06, "loss": 0.0161, "step": 356070 }, { "epoch": 138.28, "learning_rate": 1.562200647249191e-06, "loss": 0.0005, "step": 356080 }, { "epoch": 138.29, "learning_rate": 1.5616828478964401e-06, "loss": 0.0007, "step": 356090 }, { "epoch": 138.29, "learning_rate": 1.5611650485436893e-06, "loss": 0.0001, "step": 356100 }, { "epoch": 138.3, "learning_rate": 1.5606472491909385e-06, "loss": 0.0162, "step": 356110 }, { "epoch": 138.3, "learning_rate": 1.5601294498381877e-06, "loss": 0.0087, "step": 356120 }, { "epoch": 138.3, "learning_rate": 1.5596116504854369e-06, "loss": 0.023, "step": 356130 }, { "epoch": 138.31, "learning_rate": 1.559093851132686e-06, "loss": 0.0104, "step": 356140 }, { "epoch": 138.31, "learning_rate": 1.5585760517799353e-06, "loss": 0.0424, "step": 356150 }, { "epoch": 138.31, "learning_rate": 1.5580582524271845e-06, "loss": 0.0256, "step": 356160 }, { "epoch": 138.32, "learning_rate": 1.5575404530744337e-06, "loss": 0.0639, "step": 356170 }, { "epoch": 138.32, "learning_rate": 1.5570226537216829e-06, "loss": 0.0601, "step": 356180 }, { "epoch": 138.33, "learning_rate": 1.556504854368932e-06, "loss": 0.058, "step": 356190 }, { "epoch": 138.33, "learning_rate": 1.5559870550161813e-06, "loss": 0.016, "step": 356200 }, { "epoch": 138.33, "learning_rate": 1.5554692556634304e-06, "loss": 0.1038, "step": 356210 }, { "epoch": 138.34, "learning_rate": 1.5549514563106796e-06, "loss": 0.0928, "step": 356220 }, { "epoch": 138.34, "learning_rate": 1.5544336569579288e-06, "loss": 0.0543, "step": 356230 }, { "epoch": 138.35, "learning_rate": 1.553915857605178e-06, "loss": 0.0666, "step": 356240 }, { "epoch": 138.35, "learning_rate": 1.5533980582524272e-06, "loss": 0.0157, "step": 356250 }, { "epoch": 138.35, "learning_rate": 1.5528802588996766e-06, "loss": 0.0368, "step": 356260 }, { "epoch": 138.36, "learning_rate": 1.5523624595469258e-06, "loss": 0.0908, "step": 356270 }, { "epoch": 138.36, "learning_rate": 1.551844660194175e-06, "loss": 0.0162, "step": 356280 }, { "epoch": 138.37, "learning_rate": 1.5513268608414242e-06, "loss": 0.0481, "step": 356290 }, { "epoch": 138.37, "learning_rate": 1.5508090614886734e-06, "loss": 0.0003, "step": 356300 }, { "epoch": 138.37, "learning_rate": 1.5502912621359226e-06, "loss": 0.0206, "step": 356310 }, { "epoch": 138.38, "learning_rate": 1.5497734627831718e-06, "loss": 0.0531, "step": 356320 }, { "epoch": 138.38, "learning_rate": 1.549255663430421e-06, "loss": 0.012, "step": 356330 }, { "epoch": 138.38, "learning_rate": 1.5487378640776702e-06, "loss": 0.0159, "step": 356340 }, { "epoch": 138.39, "learning_rate": 1.5482200647249194e-06, "loss": 0.1658, "step": 356350 }, { "epoch": 138.39, "learning_rate": 1.5477022653721686e-06, "loss": 0.0481, "step": 356360 }, { "epoch": 138.4, "learning_rate": 1.5471844660194178e-06, "loss": 0.0003, "step": 356370 }, { "epoch": 138.4, "learning_rate": 1.546666666666667e-06, "loss": 0.0001, "step": 356380 }, { "epoch": 138.4, "learning_rate": 1.5461488673139162e-06, "loss": 0.0044, "step": 356390 }, { "epoch": 138.41, "learning_rate": 1.5456310679611651e-06, "loss": 0.042, "step": 356400 }, { "epoch": 138.41, "learning_rate": 1.5451132686084143e-06, "loss": 0.0725, "step": 356410 }, { "epoch": 138.42, "learning_rate": 1.5445954692556635e-06, "loss": 0.0865, "step": 356420 }, { "epoch": 138.42, "learning_rate": 1.5440776699029127e-06, "loss": 0.0095, "step": 356430 }, { "epoch": 138.42, "learning_rate": 1.543559870550162e-06, "loss": 0.001, "step": 356440 }, { "epoch": 138.43, "learning_rate": 1.5430420711974111e-06, "loss": 0.0761, "step": 356450 }, { "epoch": 138.43, "learning_rate": 1.5425242718446603e-06, "loss": 0.0259, "step": 356460 }, { "epoch": 138.43, "learning_rate": 1.5420064724919095e-06, "loss": 0.1094, "step": 356470 }, { "epoch": 138.44, "learning_rate": 1.5414886731391587e-06, "loss": 0.0313, "step": 356480 }, { "epoch": 138.44, "learning_rate": 1.5409708737864079e-06, "loss": 0.0009, "step": 356490 }, { "epoch": 138.45, "learning_rate": 1.540453074433657e-06, "loss": 0.0761, "step": 356500 }, { "epoch": 138.45, "learning_rate": 1.5399352750809063e-06, "loss": 0.0001, "step": 356510 }, { "epoch": 138.45, "learning_rate": 1.5394174757281555e-06, "loss": 0.0001, "step": 356520 }, { "epoch": 138.46, "learning_rate": 1.5388996763754047e-06, "loss": 0.0165, "step": 356530 }, { "epoch": 138.46, "learning_rate": 1.5383818770226539e-06, "loss": 0.0534, "step": 356540 }, { "epoch": 138.47, "learning_rate": 1.537864077669903e-06, "loss": 0.0012, "step": 356550 }, { "epoch": 138.47, "learning_rate": 1.5373462783171523e-06, "loss": 0.0328, "step": 356560 }, { "epoch": 138.47, "learning_rate": 1.5368284789644014e-06, "loss": 0.0513, "step": 356570 }, { "epoch": 138.48, "learning_rate": 1.5363106796116506e-06, "loss": 0.0712, "step": 356580 }, { "epoch": 138.48, "learning_rate": 1.5357928802588998e-06, "loss": 0.0648, "step": 356590 }, { "epoch": 138.49, "learning_rate": 1.535275080906149e-06, "loss": 0.0004, "step": 356600 }, { "epoch": 138.49, "learning_rate": 1.5347572815533982e-06, "loss": 0.0656, "step": 356610 }, { "epoch": 138.49, "learning_rate": 1.5342394822006474e-06, "loss": 0.0773, "step": 356620 }, { "epoch": 138.5, "learning_rate": 1.5337216828478966e-06, "loss": 0.0255, "step": 356630 }, { "epoch": 138.5, "learning_rate": 1.5332038834951458e-06, "loss": 0.0098, "step": 356640 }, { "epoch": 138.5, "learning_rate": 1.5326860841423948e-06, "loss": 0.0774, "step": 356650 }, { "epoch": 138.51, "learning_rate": 1.532168284789644e-06, "loss": 0.0368, "step": 356660 }, { "epoch": 138.51, "learning_rate": 1.5316504854368932e-06, "loss": 0.0162, "step": 356670 }, { "epoch": 138.52, "learning_rate": 1.5311326860841424e-06, "loss": 0.0017, "step": 356680 }, { "epoch": 138.52, "learning_rate": 1.5306148867313916e-06, "loss": 0.0792, "step": 356690 }, { "epoch": 138.52, "learning_rate": 1.5300970873786408e-06, "loss": 0.0703, "step": 356700 }, { "epoch": 138.53, "learning_rate": 1.52957928802589e-06, "loss": 0.0025, "step": 356710 }, { "epoch": 138.53, "learning_rate": 1.5290614886731392e-06, "loss": 0.1126, "step": 356720 }, { "epoch": 138.54, "learning_rate": 1.5285436893203883e-06, "loss": 0.0388, "step": 356730 }, { "epoch": 138.54, "learning_rate": 1.5280258899676375e-06, "loss": 0.0087, "step": 356740 }, { "epoch": 138.54, "learning_rate": 1.5275080906148867e-06, "loss": 0.0055, "step": 356750 }, { "epoch": 138.55, "learning_rate": 1.526990291262136e-06, "loss": 0.0097, "step": 356760 }, { "epoch": 138.55, "learning_rate": 1.5264724919093851e-06, "loss": 0.0008, "step": 356770 }, { "epoch": 138.56, "learning_rate": 1.5259546925566343e-06, "loss": 0.032, "step": 356780 }, { "epoch": 138.56, "learning_rate": 1.5254368932038835e-06, "loss": 0.0102, "step": 356790 }, { "epoch": 138.56, "learning_rate": 1.5249190938511327e-06, "loss": 0.0768, "step": 356800 }, { "epoch": 138.57, "learning_rate": 1.524401294498382e-06, "loss": 0.0109, "step": 356810 }, { "epoch": 138.57, "learning_rate": 1.5238834951456313e-06, "loss": 0.0747, "step": 356820 }, { "epoch": 138.57, "learning_rate": 1.5233656957928805e-06, "loss": 0.1053, "step": 356830 }, { "epoch": 138.58, "learning_rate": 1.5228478964401297e-06, "loss": 0.0002, "step": 356840 }, { "epoch": 138.58, "learning_rate": 1.522330097087379e-06, "loss": 0.0578, "step": 356850 }, { "epoch": 138.59, "learning_rate": 1.521812297734628e-06, "loss": 0.0274, "step": 356860 }, { "epoch": 138.59, "learning_rate": 1.5212944983818773e-06, "loss": 0.1364, "step": 356870 }, { "epoch": 138.59, "learning_rate": 1.5207766990291265e-06, "loss": 0.0436, "step": 356880 }, { "epoch": 138.6, "learning_rate": 1.5202588996763757e-06, "loss": 0.0041, "step": 356890 }, { "epoch": 138.6, "learning_rate": 1.5197411003236249e-06, "loss": 0.0005, "step": 356900 }, { "epoch": 138.61, "learning_rate": 1.519223300970874e-06, "loss": 0.0923, "step": 356910 }, { "epoch": 138.61, "learning_rate": 1.5187055016181233e-06, "loss": 0.0158, "step": 356920 }, { "epoch": 138.61, "learning_rate": 1.5181877022653725e-06, "loss": 0.0505, "step": 356930 }, { "epoch": 138.62, "learning_rate": 1.5176699029126216e-06, "loss": 0.0442, "step": 356940 }, { "epoch": 138.62, "learning_rate": 1.5171521035598708e-06, "loss": 0.0092, "step": 356950 }, { "epoch": 138.63, "learning_rate": 1.51663430420712e-06, "loss": 0.021, "step": 356960 }, { "epoch": 138.63, "learning_rate": 1.516116504854369e-06, "loss": 0.015, "step": 356970 }, { "epoch": 138.63, "learning_rate": 1.5155987055016182e-06, "loss": 0.0107, "step": 356980 }, { "epoch": 138.64, "learning_rate": 1.5150809061488674e-06, "loss": 0.2272, "step": 356990 }, { "epoch": 138.64, "learning_rate": 1.5145631067961166e-06, "loss": 0.028, "step": 357000 }, { "epoch": 138.64, "learning_rate": 1.5140453074433658e-06, "loss": 0.0672, "step": 357010 }, { "epoch": 138.65, "learning_rate": 1.513527508090615e-06, "loss": 0.0621, "step": 357020 }, { "epoch": 138.65, "learning_rate": 1.5130097087378642e-06, "loss": 0.0251, "step": 357030 }, { "epoch": 138.66, "learning_rate": 1.5124919093851134e-06, "loss": 0.0439, "step": 357040 }, { "epoch": 138.66, "learning_rate": 1.5119741100323626e-06, "loss": 0.0421, "step": 357050 }, { "epoch": 138.66, "learning_rate": 1.5114563106796118e-06, "loss": 0.0078, "step": 357060 }, { "epoch": 138.67, "learning_rate": 1.510938511326861e-06, "loss": 0.0014, "step": 357070 }, { "epoch": 138.67, "learning_rate": 1.5104207119741102e-06, "loss": 0.0001, "step": 357080 }, { "epoch": 138.68, "learning_rate": 1.5099029126213594e-06, "loss": 0.0852, "step": 357090 }, { "epoch": 138.68, "learning_rate": 1.5093851132686085e-06, "loss": 0.0589, "step": 357100 }, { "epoch": 138.68, "learning_rate": 1.5088673139158577e-06, "loss": 0.0497, "step": 357110 }, { "epoch": 138.69, "learning_rate": 1.508349514563107e-06, "loss": 0.0028, "step": 357120 }, { "epoch": 138.69, "learning_rate": 1.5078317152103561e-06, "loss": 0.0714, "step": 357130 }, { "epoch": 138.7, "learning_rate": 1.5073139158576053e-06, "loss": 0.0453, "step": 357140 }, { "epoch": 138.7, "learning_rate": 1.5067961165048545e-06, "loss": 0.0007, "step": 357150 }, { "epoch": 138.7, "learning_rate": 1.5062783171521037e-06, "loss": 0.0422, "step": 357160 }, { "epoch": 138.71, "learning_rate": 1.505760517799353e-06, "loss": 0.0411, "step": 357170 }, { "epoch": 138.71, "learning_rate": 1.505242718446602e-06, "loss": 0.0083, "step": 357180 }, { "epoch": 138.71, "learning_rate": 1.5047249190938513e-06, "loss": 0.024, "step": 357190 }, { "epoch": 138.72, "learning_rate": 1.5042071197411005e-06, "loss": 0.0341, "step": 357200 }, { "epoch": 138.72, "learning_rate": 1.5036893203883497e-06, "loss": 0.0577, "step": 357210 }, { "epoch": 138.73, "learning_rate": 1.5031715210355987e-06, "loss": 0.0862, "step": 357220 }, { "epoch": 138.73, "learning_rate": 1.5026537216828479e-06, "loss": 0.0747, "step": 357230 }, { "epoch": 138.73, "learning_rate": 1.502135922330097e-06, "loss": 0.0769, "step": 357240 }, { "epoch": 138.74, "learning_rate": 1.5016181229773463e-06, "loss": 0.0183, "step": 357250 }, { "epoch": 138.74, "learning_rate": 1.5011003236245954e-06, "loss": 0.0014, "step": 357260 }, { "epoch": 138.75, "learning_rate": 1.5005825242718446e-06, "loss": 0.0159, "step": 357270 }, { "epoch": 138.75, "learning_rate": 1.5000647249190938e-06, "loss": 0.0853, "step": 357280 }, { "epoch": 138.75, "learning_rate": 1.499546925566343e-06, "loss": 0.047, "step": 357290 }, { "epoch": 138.76, "learning_rate": 1.4990291262135922e-06, "loss": 0.0161, "step": 357300 }, { "epoch": 138.76, "learning_rate": 1.4985113268608414e-06, "loss": 0.082, "step": 357310 }, { "epoch": 138.77, "learning_rate": 1.4979935275080906e-06, "loss": 0.0006, "step": 357320 }, { "epoch": 138.77, "learning_rate": 1.4974757281553398e-06, "loss": 0.0374, "step": 357330 }, { "epoch": 138.77, "learning_rate": 1.496957928802589e-06, "loss": 0.009, "step": 357340 }, { "epoch": 138.78, "learning_rate": 1.4964401294498382e-06, "loss": 0.0097, "step": 357350 }, { "epoch": 138.78, "learning_rate": 1.4959223300970874e-06, "loss": 0.0073, "step": 357360 }, { "epoch": 138.78, "learning_rate": 1.4954045307443366e-06, "loss": 0.1321, "step": 357370 }, { "epoch": 138.79, "learning_rate": 1.494886731391586e-06, "loss": 0.0783, "step": 357380 }, { "epoch": 138.79, "learning_rate": 1.4943689320388352e-06, "loss": 0.0184, "step": 357390 }, { "epoch": 138.8, "learning_rate": 1.4938511326860844e-06, "loss": 0.047, "step": 357400 }, { "epoch": 138.8, "learning_rate": 1.4933333333333336e-06, "loss": 0.0018, "step": 357410 }, { "epoch": 138.8, "learning_rate": 1.4928155339805828e-06, "loss": 0.0012, "step": 357420 }, { "epoch": 138.81, "learning_rate": 1.492297734627832e-06, "loss": 0.009, "step": 357430 }, { "epoch": 138.81, "learning_rate": 1.4917799352750812e-06, "loss": 0.0256, "step": 357440 }, { "epoch": 138.82, "learning_rate": 1.4912621359223304e-06, "loss": 0.0703, "step": 357450 }, { "epoch": 138.82, "learning_rate": 1.4907443365695795e-06, "loss": 0.0285, "step": 357460 }, { "epoch": 138.82, "learning_rate": 1.4902265372168287e-06, "loss": 0.0019, "step": 357470 }, { "epoch": 138.83, "learning_rate": 1.489708737864078e-06, "loss": 0.1281, "step": 357480 }, { "epoch": 138.83, "learning_rate": 1.4891909385113271e-06, "loss": 0.0204, "step": 357490 }, { "epoch": 138.83, "learning_rate": 1.4886731391585763e-06, "loss": 0.003, "step": 357500 }, { "epoch": 138.84, "learning_rate": 1.4881553398058255e-06, "loss": 0.0438, "step": 357510 }, { "epoch": 138.84, "learning_rate": 1.4876375404530747e-06, "loss": 0.0191, "step": 357520 }, { "epoch": 138.85, "learning_rate": 1.487119741100324e-06, "loss": 0.0244, "step": 357530 }, { "epoch": 138.85, "learning_rate": 1.4866019417475729e-06, "loss": 0.0996, "step": 357540 }, { "epoch": 138.85, "learning_rate": 1.486084142394822e-06, "loss": 0.0333, "step": 357550 }, { "epoch": 138.86, "learning_rate": 1.4855663430420713e-06, "loss": 0.0183, "step": 357560 }, { "epoch": 138.86, "learning_rate": 1.4850485436893205e-06, "loss": 0.0018, "step": 357570 }, { "epoch": 138.87, "learning_rate": 1.4845307443365697e-06, "loss": 0.0169, "step": 357580 }, { "epoch": 138.87, "learning_rate": 1.4840129449838189e-06, "loss": 0.0385, "step": 357590 }, { "epoch": 138.87, "learning_rate": 1.483495145631068e-06, "loss": 0.0217, "step": 357600 }, { "epoch": 138.88, "learning_rate": 1.4829773462783173e-06, "loss": 0.0063, "step": 357610 }, { "epoch": 138.88, "learning_rate": 1.4824595469255664e-06, "loss": 0.0496, "step": 357620 }, { "epoch": 138.89, "learning_rate": 1.4819417475728156e-06, "loss": 0.0607, "step": 357630 }, { "epoch": 138.89, "learning_rate": 1.4814239482200648e-06, "loss": 0.0002, "step": 357640 }, { "epoch": 138.89, "learning_rate": 1.480906148867314e-06, "loss": 0.0617, "step": 357650 }, { "epoch": 138.9, "learning_rate": 1.4803883495145632e-06, "loss": 0.011, "step": 357660 }, { "epoch": 138.9, "learning_rate": 1.4798705501618124e-06, "loss": 0.0096, "step": 357670 }, { "epoch": 138.9, "learning_rate": 1.4793527508090616e-06, "loss": 0.0476, "step": 357680 }, { "epoch": 138.91, "learning_rate": 1.4788349514563108e-06, "loss": 0.0601, "step": 357690 }, { "epoch": 138.91, "learning_rate": 1.47831715210356e-06, "loss": 0.0453, "step": 357700 }, { "epoch": 138.92, "learning_rate": 1.4777993527508092e-06, "loss": 0.0121, "step": 357710 }, { "epoch": 138.92, "learning_rate": 1.4772815533980584e-06, "loss": 0.0207, "step": 357720 }, { "epoch": 138.92, "learning_rate": 1.4767637540453076e-06, "loss": 0.0003, "step": 357730 }, { "epoch": 138.93, "learning_rate": 1.4762459546925568e-06, "loss": 0.0861, "step": 357740 }, { "epoch": 138.93, "learning_rate": 1.475728155339806e-06, "loss": 0.1142, "step": 357750 }, { "epoch": 138.94, "learning_rate": 1.4752103559870552e-06, "loss": 0.0296, "step": 357760 }, { "epoch": 138.94, "learning_rate": 1.4746925566343044e-06, "loss": 0.0009, "step": 357770 }, { "epoch": 138.94, "learning_rate": 1.4741747572815536e-06, "loss": 0.0881, "step": 357780 }, { "epoch": 138.95, "learning_rate": 1.4736569579288025e-06, "loss": 0.0243, "step": 357790 }, { "epoch": 138.95, "learning_rate": 1.4731391585760517e-06, "loss": 0.0855, "step": 357800 }, { "epoch": 138.96, "learning_rate": 1.472621359223301e-06, "loss": 0.0606, "step": 357810 }, { "epoch": 138.96, "learning_rate": 1.4721035598705501e-06, "loss": 0.1207, "step": 357820 }, { "epoch": 138.96, "learning_rate": 1.4715857605177993e-06, "loss": 0.0029, "step": 357830 }, { "epoch": 138.97, "learning_rate": 1.4710679611650485e-06, "loss": 0.0087, "step": 357840 }, { "epoch": 138.97, "learning_rate": 1.4705501618122977e-06, "loss": 0.0027, "step": 357850 }, { "epoch": 138.97, "learning_rate": 1.470032362459547e-06, "loss": 0.0969, "step": 357860 }, { "epoch": 138.98, "learning_rate": 1.469514563106796e-06, "loss": 0.0683, "step": 357870 }, { "epoch": 138.98, "learning_rate": 1.4689967637540453e-06, "loss": 0.0688, "step": 357880 }, { "epoch": 138.99, "learning_rate": 1.4684789644012945e-06, "loss": 0.0118, "step": 357890 }, { "epoch": 138.99, "learning_rate": 1.4679611650485437e-06, "loss": 0.0951, "step": 357900 }, { "epoch": 138.99, "learning_rate": 1.4674433656957929e-06, "loss": 0.0537, "step": 357910 }, { "epoch": 139.0, "learning_rate": 1.466925566343042e-06, "loss": 0.0705, "step": 357920 }, { "epoch": 139.0, "eval_accuracy": 0.9526822558459422, "eval_loss": 0.38591042160987854, "eval_runtime": 8.2344, "eval_samples_per_second": 441.439, "eval_steps_per_second": 55.256, "step": 357925 }, { "epoch": 139.0, "learning_rate": 1.4664077669902913e-06, "loss": 0.0276, "step": 357930 }, { "epoch": 139.01, "learning_rate": 1.4658899676375407e-06, "loss": 0.1208, "step": 357940 }, { "epoch": 139.01, "learning_rate": 1.4653721682847899e-06, "loss": 0.2903, "step": 357950 }, { "epoch": 139.01, "learning_rate": 1.464854368932039e-06, "loss": 0.1264, "step": 357960 }, { "epoch": 139.02, "learning_rate": 1.4643365695792883e-06, "loss": 0.0036, "step": 357970 }, { "epoch": 139.02, "learning_rate": 1.4638187702265375e-06, "loss": 0.0166, "step": 357980 }, { "epoch": 139.03, "learning_rate": 1.4633009708737866e-06, "loss": 0.0001, "step": 357990 }, { "epoch": 139.03, "learning_rate": 1.4627831715210358e-06, "loss": 0.0078, "step": 358000 }, { "epoch": 139.03, "learning_rate": 1.462265372168285e-06, "loss": 0.0108, "step": 358010 }, { "epoch": 139.04, "learning_rate": 1.4617475728155342e-06, "loss": 0.0659, "step": 358020 }, { "epoch": 139.04, "learning_rate": 1.4612297734627834e-06, "loss": 0.0258, "step": 358030 }, { "epoch": 139.04, "learning_rate": 1.4607119741100326e-06, "loss": 0.0542, "step": 358040 }, { "epoch": 139.05, "learning_rate": 1.4601941747572818e-06, "loss": 0.0226, "step": 358050 }, { "epoch": 139.05, "learning_rate": 1.459676375404531e-06, "loss": 0.0104, "step": 358060 }, { "epoch": 139.06, "learning_rate": 1.4591585760517802e-06, "loss": 0.082, "step": 358070 }, { "epoch": 139.06, "learning_rate": 1.4586407766990294e-06, "loss": 0.0002, "step": 358080 }, { "epoch": 139.06, "learning_rate": 1.4581229773462786e-06, "loss": 0.0782, "step": 358090 }, { "epoch": 139.07, "learning_rate": 1.4576051779935278e-06, "loss": 0.089, "step": 358100 }, { "epoch": 139.07, "learning_rate": 1.4570873786407768e-06, "loss": 0.2052, "step": 358110 }, { "epoch": 139.08, "learning_rate": 1.456569579288026e-06, "loss": 0.1633, "step": 358120 }, { "epoch": 139.08, "learning_rate": 1.4560517799352752e-06, "loss": 0.0809, "step": 358130 }, { "epoch": 139.08, "learning_rate": 1.4555339805825244e-06, "loss": 0.1189, "step": 358140 }, { "epoch": 139.09, "learning_rate": 1.4550161812297735e-06, "loss": 0.0309, "step": 358150 }, { "epoch": 139.09, "learning_rate": 1.4544983818770227e-06, "loss": 0.0925, "step": 358160 }, { "epoch": 139.1, "learning_rate": 1.453980582524272e-06, "loss": 0.0001, "step": 358170 }, { "epoch": 139.1, "learning_rate": 1.4534627831715211e-06, "loss": 0.0284, "step": 358180 }, { "epoch": 139.1, "learning_rate": 1.4529449838187703e-06, "loss": 0.0446, "step": 358190 }, { "epoch": 139.11, "learning_rate": 1.4524271844660195e-06, "loss": 0.121, "step": 358200 }, { "epoch": 139.11, "learning_rate": 1.4519093851132687e-06, "loss": 0.0519, "step": 358210 }, { "epoch": 139.11, "learning_rate": 1.451391585760518e-06, "loss": 0.0827, "step": 358220 }, { "epoch": 139.12, "learning_rate": 1.450873786407767e-06, "loss": 0.0002, "step": 358230 }, { "epoch": 139.12, "learning_rate": 1.4503559870550163e-06, "loss": 0.0007, "step": 358240 }, { "epoch": 139.13, "learning_rate": 1.4498381877022655e-06, "loss": 0.0289, "step": 358250 }, { "epoch": 139.13, "learning_rate": 1.4493203883495147e-06, "loss": 0.0197, "step": 358260 }, { "epoch": 139.13, "learning_rate": 1.4488025889967639e-06, "loss": 0.0681, "step": 358270 }, { "epoch": 139.14, "learning_rate": 1.448284789644013e-06, "loss": 0.0001, "step": 358280 }, { "epoch": 139.14, "learning_rate": 1.4477669902912623e-06, "loss": 0.0688, "step": 358290 }, { "epoch": 139.15, "learning_rate": 1.4472491909385115e-06, "loss": 0.0186, "step": 358300 }, { "epoch": 139.15, "learning_rate": 1.4467313915857607e-06, "loss": 0.074, "step": 358310 }, { "epoch": 139.15, "learning_rate": 1.4462135922330099e-06, "loss": 0.0103, "step": 358320 }, { "epoch": 139.16, "learning_rate": 1.445695792880259e-06, "loss": 0.1531, "step": 358330 }, { "epoch": 139.16, "learning_rate": 1.4451779935275082e-06, "loss": 0.0272, "step": 358340 }, { "epoch": 139.17, "learning_rate": 1.4446601941747574e-06, "loss": 0.0892, "step": 358350 }, { "epoch": 139.17, "learning_rate": 1.4441423948220064e-06, "loss": 0.0164, "step": 358360 }, { "epoch": 139.17, "learning_rate": 1.4436245954692556e-06, "loss": 0.132, "step": 358370 }, { "epoch": 139.18, "learning_rate": 1.4431067961165048e-06, "loss": 0.0544, "step": 358380 }, { "epoch": 139.18, "learning_rate": 1.442588996763754e-06, "loss": 0.0156, "step": 358390 }, { "epoch": 139.18, "learning_rate": 1.4420711974110032e-06, "loss": 0.0113, "step": 358400 }, { "epoch": 139.19, "learning_rate": 1.4415533980582524e-06, "loss": 0.0148, "step": 358410 }, { "epoch": 139.19, "learning_rate": 1.4410355987055016e-06, "loss": 0.0616, "step": 358420 }, { "epoch": 139.2, "learning_rate": 1.4405177993527508e-06, "loss": 0.0456, "step": 358430 }, { "epoch": 139.2, "learning_rate": 1.44e-06, "loss": 0.0156, "step": 358440 }, { "epoch": 139.2, "learning_rate": 1.4394822006472492e-06, "loss": 0.1591, "step": 358450 }, { "epoch": 139.21, "learning_rate": 1.4389644012944984e-06, "loss": 0.0206, "step": 358460 }, { "epoch": 139.21, "learning_rate": 1.4384466019417476e-06, "loss": 0.0004, "step": 358470 }, { "epoch": 139.22, "learning_rate": 1.4379288025889968e-06, "loss": 0.1253, "step": 358480 }, { "epoch": 139.22, "learning_rate": 1.437411003236246e-06, "loss": 0.0898, "step": 358490 }, { "epoch": 139.22, "learning_rate": 1.4368932038834951e-06, "loss": 0.0154, "step": 358500 }, { "epoch": 139.23, "learning_rate": 1.4363754045307446e-06, "loss": 0.0464, "step": 358510 }, { "epoch": 139.23, "learning_rate": 1.4358576051779937e-06, "loss": 0.016, "step": 358520 }, { "epoch": 139.23, "learning_rate": 1.435339805825243e-06, "loss": 0.2403, "step": 358530 }, { "epoch": 139.24, "learning_rate": 1.4348220064724921e-06, "loss": 0.018, "step": 358540 }, { "epoch": 139.24, "learning_rate": 1.4343042071197413e-06, "loss": 0.0691, "step": 358550 }, { "epoch": 139.25, "learning_rate": 1.4337864077669905e-06, "loss": 0.064, "step": 358560 }, { "epoch": 139.25, "learning_rate": 1.4332686084142397e-06, "loss": 0.0306, "step": 358570 }, { "epoch": 139.25, "learning_rate": 1.432750809061489e-06, "loss": 0.0499, "step": 358580 }, { "epoch": 139.26, "learning_rate": 1.4322330097087381e-06, "loss": 0.0187, "step": 358590 }, { "epoch": 139.26, "learning_rate": 1.4317152103559873e-06, "loss": 0.0226, "step": 358600 }, { "epoch": 139.27, "learning_rate": 1.4311974110032365e-06, "loss": 0.0006, "step": 358610 }, { "epoch": 139.27, "learning_rate": 1.4306796116504857e-06, "loss": 0.0001, "step": 358620 }, { "epoch": 139.27, "learning_rate": 1.4301618122977349e-06, "loss": 0.0472, "step": 358630 }, { "epoch": 139.28, "learning_rate": 1.429644012944984e-06, "loss": 0.0094, "step": 358640 }, { "epoch": 139.28, "learning_rate": 1.4291262135922333e-06, "loss": 0.1269, "step": 358650 }, { "epoch": 139.29, "learning_rate": 1.4286084142394825e-06, "loss": 0.001, "step": 358660 }, { "epoch": 139.29, "learning_rate": 1.4280906148867317e-06, "loss": 0.0696, "step": 358670 }, { "epoch": 139.29, "learning_rate": 1.4275728155339806e-06, "loss": 0.0093, "step": 358680 }, { "epoch": 139.3, "learning_rate": 1.4270550161812298e-06, "loss": 0.042, "step": 358690 }, { "epoch": 139.3, "learning_rate": 1.426537216828479e-06, "loss": 0.0488, "step": 358700 }, { "epoch": 139.3, "learning_rate": 1.4260194174757282e-06, "loss": 0.0228, "step": 358710 }, { "epoch": 139.31, "learning_rate": 1.4255016181229774e-06, "loss": 0.0143, "step": 358720 }, { "epoch": 139.31, "learning_rate": 1.4249838187702266e-06, "loss": 0.0002, "step": 358730 }, { "epoch": 139.32, "learning_rate": 1.4244660194174758e-06, "loss": 0.0938, "step": 358740 }, { "epoch": 139.32, "learning_rate": 1.423948220064725e-06, "loss": 0.0238, "step": 358750 }, { "epoch": 139.32, "learning_rate": 1.4234304207119742e-06, "loss": 0.0109, "step": 358760 }, { "epoch": 139.33, "learning_rate": 1.4229126213592234e-06, "loss": 0.064, "step": 358770 }, { "epoch": 139.33, "learning_rate": 1.4223948220064726e-06, "loss": 0.0028, "step": 358780 }, { "epoch": 139.34, "learning_rate": 1.4218770226537218e-06, "loss": 0.0102, "step": 358790 }, { "epoch": 139.34, "learning_rate": 1.421359223300971e-06, "loss": 0.0166, "step": 358800 }, { "epoch": 139.34, "learning_rate": 1.4208414239482202e-06, "loss": 0.0777, "step": 358810 }, { "epoch": 139.35, "learning_rate": 1.4203236245954694e-06, "loss": 0.0245, "step": 358820 }, { "epoch": 139.35, "learning_rate": 1.4198058252427186e-06, "loss": 0.0139, "step": 358830 }, { "epoch": 139.36, "learning_rate": 1.4192880258899678e-06, "loss": 0.0197, "step": 358840 }, { "epoch": 139.36, "learning_rate": 1.418770226537217e-06, "loss": 0.0275, "step": 358850 }, { "epoch": 139.36, "learning_rate": 1.4182524271844661e-06, "loss": 0.0092, "step": 358860 }, { "epoch": 139.37, "learning_rate": 1.4177346278317153e-06, "loss": 0.0892, "step": 358870 }, { "epoch": 139.37, "learning_rate": 1.4172168284789645e-06, "loss": 0.1016, "step": 358880 }, { "epoch": 139.37, "learning_rate": 1.4166990291262137e-06, "loss": 0.0087, "step": 358890 }, { "epoch": 139.38, "learning_rate": 1.416181229773463e-06, "loss": 0.0125, "step": 358900 }, { "epoch": 139.38, "learning_rate": 1.4156634304207121e-06, "loss": 0.0207, "step": 358910 }, { "epoch": 139.39, "learning_rate": 1.4151456310679613e-06, "loss": 0.0163, "step": 358920 }, { "epoch": 139.39, "learning_rate": 1.4146278317152103e-06, "loss": 0.0979, "step": 358930 }, { "epoch": 139.39, "learning_rate": 1.4141100323624595e-06, "loss": 0.0313, "step": 358940 }, { "epoch": 139.4, "learning_rate": 1.4135922330097087e-06, "loss": 0.0019, "step": 358950 }, { "epoch": 139.4, "learning_rate": 1.4130744336569579e-06, "loss": 0.0579, "step": 358960 }, { "epoch": 139.41, "learning_rate": 1.412556634304207e-06, "loss": 0.1496, "step": 358970 }, { "epoch": 139.41, "learning_rate": 1.4120388349514563e-06, "loss": 0.0754, "step": 358980 }, { "epoch": 139.41, "learning_rate": 1.4115210355987055e-06, "loss": 0.0077, "step": 358990 }, { "epoch": 139.42, "learning_rate": 1.4110032362459547e-06, "loss": 0.0007, "step": 359000 }, { "epoch": 139.42, "learning_rate": 1.4104854368932039e-06, "loss": 0.0029, "step": 359010 }, { "epoch": 139.43, "learning_rate": 1.409967637540453e-06, "loss": 0.0533, "step": 359020 }, { "epoch": 139.43, "learning_rate": 1.4094498381877022e-06, "loss": 0.0633, "step": 359030 }, { "epoch": 139.43, "learning_rate": 1.4089320388349514e-06, "loss": 0.0023, "step": 359040 }, { "epoch": 139.44, "learning_rate": 1.4084142394822006e-06, "loss": 0.0082, "step": 359050 }, { "epoch": 139.44, "learning_rate": 1.4078964401294498e-06, "loss": 0.0251, "step": 359060 }, { "epoch": 139.44, "learning_rate": 1.4073786407766992e-06, "loss": 0.0002, "step": 359070 }, { "epoch": 139.45, "learning_rate": 1.4068608414239484e-06, "loss": 0.0728, "step": 359080 }, { "epoch": 139.45, "learning_rate": 1.4063430420711976e-06, "loss": 0.052, "step": 359090 }, { "epoch": 139.46, "learning_rate": 1.4058252427184468e-06, "loss": 0.0082, "step": 359100 }, { "epoch": 139.46, "learning_rate": 1.405307443365696e-06, "loss": 0.0266, "step": 359110 }, { "epoch": 139.46, "learning_rate": 1.4047896440129452e-06, "loss": 0.0015, "step": 359120 }, { "epoch": 139.47, "learning_rate": 1.4042718446601944e-06, "loss": 0.0001, "step": 359130 }, { "epoch": 139.47, "learning_rate": 1.4037540453074436e-06, "loss": 0.0638, "step": 359140 }, { "epoch": 139.48, "learning_rate": 1.4032362459546928e-06, "loss": 0.0665, "step": 359150 }, { "epoch": 139.48, "learning_rate": 1.402718446601942e-06, "loss": 0.0964, "step": 359160 }, { "epoch": 139.48, "learning_rate": 1.4022006472491912e-06, "loss": 0.0143, "step": 359170 }, { "epoch": 139.49, "learning_rate": 1.4016828478964404e-06, "loss": 0.0819, "step": 359180 }, { "epoch": 139.49, "learning_rate": 1.4011650485436896e-06, "loss": 0.2471, "step": 359190 }, { "epoch": 139.5, "learning_rate": 1.4006472491909388e-06, "loss": 0.0301, "step": 359200 }, { "epoch": 139.5, "learning_rate": 1.400129449838188e-06, "loss": 0.1329, "step": 359210 }, { "epoch": 139.5, "learning_rate": 1.3996116504854372e-06, "loss": 0.0002, "step": 359220 }, { "epoch": 139.51, "learning_rate": 1.3990938511326863e-06, "loss": 0.0815, "step": 359230 }, { "epoch": 139.51, "learning_rate": 1.3985760517799355e-06, "loss": 0.0328, "step": 359240 }, { "epoch": 139.51, "learning_rate": 1.3980582524271845e-06, "loss": 0.0276, "step": 359250 }, { "epoch": 139.52, "learning_rate": 1.3975404530744337e-06, "loss": 0.0588, "step": 359260 }, { "epoch": 139.52, "learning_rate": 1.397022653721683e-06, "loss": 0.0009, "step": 359270 }, { "epoch": 139.53, "learning_rate": 1.396504854368932e-06, "loss": 0.0758, "step": 359280 }, { "epoch": 139.53, "learning_rate": 1.3959870550161813e-06, "loss": 0.0159, "step": 359290 }, { "epoch": 139.53, "learning_rate": 1.3954692556634305e-06, "loss": 0.0862, "step": 359300 }, { "epoch": 139.54, "learning_rate": 1.3949514563106797e-06, "loss": 0.0182, "step": 359310 }, { "epoch": 139.54, "learning_rate": 1.3944336569579289e-06, "loss": 0.0002, "step": 359320 }, { "epoch": 139.55, "learning_rate": 1.393915857605178e-06, "loss": 0.0008, "step": 359330 }, { "epoch": 139.55, "learning_rate": 1.3933980582524273e-06, "loss": 0.2292, "step": 359340 }, { "epoch": 139.55, "learning_rate": 1.3928802588996765e-06, "loss": 0.0315, "step": 359350 }, { "epoch": 139.56, "learning_rate": 1.3923624595469257e-06, "loss": 0.0048, "step": 359360 }, { "epoch": 139.56, "learning_rate": 1.3918446601941749e-06, "loss": 0.1813, "step": 359370 }, { "epoch": 139.57, "learning_rate": 1.391326860841424e-06, "loss": 0.0003, "step": 359380 }, { "epoch": 139.57, "learning_rate": 1.3908090614886732e-06, "loss": 0.0484, "step": 359390 }, { "epoch": 139.57, "learning_rate": 1.3902912621359224e-06, "loss": 0.0156, "step": 359400 }, { "epoch": 139.58, "learning_rate": 1.3897734627831716e-06, "loss": 0.0731, "step": 359410 }, { "epoch": 139.58, "learning_rate": 1.3892556634304208e-06, "loss": 0.0161, "step": 359420 }, { "epoch": 139.58, "learning_rate": 1.38873786407767e-06, "loss": 0.0599, "step": 359430 }, { "epoch": 139.59, "learning_rate": 1.3882200647249192e-06, "loss": 0.0191, "step": 359440 }, { "epoch": 139.59, "learning_rate": 1.3877022653721684e-06, "loss": 0.0104, "step": 359450 }, { "epoch": 139.6, "learning_rate": 1.3871844660194176e-06, "loss": 0.0532, "step": 359460 }, { "epoch": 139.6, "learning_rate": 1.3866666666666668e-06, "loss": 0.073, "step": 359470 }, { "epoch": 139.6, "learning_rate": 1.386148867313916e-06, "loss": 0.0922, "step": 359480 }, { "epoch": 139.61, "learning_rate": 1.3856310679611652e-06, "loss": 0.0166, "step": 359490 }, { "epoch": 139.61, "learning_rate": 1.3851132686084142e-06, "loss": 0.0235, "step": 359500 }, { "epoch": 139.62, "learning_rate": 1.3845954692556634e-06, "loss": 0.0178, "step": 359510 }, { "epoch": 139.62, "learning_rate": 1.3840776699029126e-06, "loss": 0.0336, "step": 359520 }, { "epoch": 139.62, "learning_rate": 1.3835598705501618e-06, "loss": 0.0191, "step": 359530 }, { "epoch": 139.63, "learning_rate": 1.383042071197411e-06, "loss": 0.0302, "step": 359540 }, { "epoch": 139.63, "learning_rate": 1.3825242718446601e-06, "loss": 0.0001, "step": 359550 }, { "epoch": 139.63, "learning_rate": 1.3820064724919093e-06, "loss": 0.0008, "step": 359560 }, { "epoch": 139.64, "learning_rate": 1.3814886731391585e-06, "loss": 0.0715, "step": 359570 }, { "epoch": 139.64, "learning_rate": 1.3809708737864077e-06, "loss": 0.0001, "step": 359580 }, { "epoch": 139.65, "learning_rate": 1.380453074433657e-06, "loss": 0.0118, "step": 359590 }, { "epoch": 139.65, "learning_rate": 1.3799352750809061e-06, "loss": 0.0251, "step": 359600 }, { "epoch": 139.65, "learning_rate": 1.3794174757281553e-06, "loss": 0.0266, "step": 359610 }, { "epoch": 139.66, "learning_rate": 1.3788996763754045e-06, "loss": 0.0256, "step": 359620 }, { "epoch": 139.66, "learning_rate": 1.378381877022654e-06, "loss": 0.1162, "step": 359630 }, { "epoch": 139.67, "learning_rate": 1.3778640776699031e-06, "loss": 0.0003, "step": 359640 }, { "epoch": 139.67, "learning_rate": 1.3773462783171523e-06, "loss": 0.0192, "step": 359650 }, { "epoch": 139.67, "learning_rate": 1.3768284789644015e-06, "loss": 0.0365, "step": 359660 }, { "epoch": 139.68, "learning_rate": 1.3763106796116507e-06, "loss": 0.0425, "step": 359670 }, { "epoch": 139.68, "learning_rate": 1.3757928802588999e-06, "loss": 0.012, "step": 359680 }, { "epoch": 139.69, "learning_rate": 1.375275080906149e-06, "loss": 0.0001, "step": 359690 }, { "epoch": 139.69, "learning_rate": 1.3747572815533983e-06, "loss": 0.0229, "step": 359700 }, { "epoch": 139.69, "learning_rate": 1.3742394822006475e-06, "loss": 0.1386, "step": 359710 }, { "epoch": 139.7, "learning_rate": 1.3737216828478967e-06, "loss": 0.0191, "step": 359720 }, { "epoch": 139.7, "learning_rate": 1.3732038834951459e-06, "loss": 0.0001, "step": 359730 }, { "epoch": 139.7, "learning_rate": 1.372686084142395e-06, "loss": 0.0321, "step": 359740 }, { "epoch": 139.71, "learning_rate": 1.3721682847896442e-06, "loss": 0.0077, "step": 359750 }, { "epoch": 139.71, "learning_rate": 1.3716504854368934e-06, "loss": 0.0003, "step": 359760 }, { "epoch": 139.72, "learning_rate": 1.3711326860841426e-06, "loss": 0.0112, "step": 359770 }, { "epoch": 139.72, "learning_rate": 1.3706148867313918e-06, "loss": 0.0148, "step": 359780 }, { "epoch": 139.72, "learning_rate": 1.370097087378641e-06, "loss": 0.0002, "step": 359790 }, { "epoch": 139.73, "learning_rate": 1.3695792880258902e-06, "loss": 0.0082, "step": 359800 }, { "epoch": 139.73, "learning_rate": 1.3690614886731394e-06, "loss": 0.0256, "step": 359810 }, { "epoch": 139.74, "learning_rate": 1.3685436893203884e-06, "loss": 0.0165, "step": 359820 }, { "epoch": 139.74, "learning_rate": 1.3680258899676376e-06, "loss": 0.0428, "step": 359830 }, { "epoch": 139.74, "learning_rate": 1.3675080906148868e-06, "loss": 0.0003, "step": 359840 }, { "epoch": 139.75, "learning_rate": 1.366990291262136e-06, "loss": 0.0916, "step": 359850 }, { "epoch": 139.75, "learning_rate": 1.3664724919093852e-06, "loss": 0.0833, "step": 359860 }, { "epoch": 139.76, "learning_rate": 1.3659546925566344e-06, "loss": 0.0182, "step": 359870 }, { "epoch": 139.76, "learning_rate": 1.3654368932038836e-06, "loss": 0.0599, "step": 359880 }, { "epoch": 139.76, "learning_rate": 1.3649190938511328e-06, "loss": 0.0634, "step": 359890 }, { "epoch": 139.77, "learning_rate": 1.364401294498382e-06, "loss": 0.0548, "step": 359900 }, { "epoch": 139.77, "learning_rate": 1.3638834951456311e-06, "loss": 0.0109, "step": 359910 }, { "epoch": 139.77, "learning_rate": 1.3633656957928803e-06, "loss": 0.0083, "step": 359920 }, { "epoch": 139.78, "learning_rate": 1.3628478964401295e-06, "loss": 0.002, "step": 359930 }, { "epoch": 139.78, "learning_rate": 1.3623300970873787e-06, "loss": 0.0787, "step": 359940 }, { "epoch": 139.79, "learning_rate": 1.361812297734628e-06, "loss": 0.0631, "step": 359950 }, { "epoch": 139.79, "learning_rate": 1.3612944983818771e-06, "loss": 0.0001, "step": 359960 }, { "epoch": 139.79, "learning_rate": 1.3607766990291263e-06, "loss": 0.0177, "step": 359970 }, { "epoch": 139.8, "learning_rate": 1.3602588996763755e-06, "loss": 0.0431, "step": 359980 }, { "epoch": 139.8, "learning_rate": 1.3597411003236247e-06, "loss": 0.0166, "step": 359990 }, { "epoch": 139.81, "learning_rate": 1.359223300970874e-06, "loss": 0.0409, "step": 360000 }, { "epoch": 139.81, "learning_rate": 1.358705501618123e-06, "loss": 0.0231, "step": 360010 }, { "epoch": 139.81, "learning_rate": 1.3581877022653723e-06, "loss": 0.0627, "step": 360020 }, { "epoch": 139.82, "learning_rate": 1.3576699029126215e-06, "loss": 0.0121, "step": 360030 }, { "epoch": 139.82, "learning_rate": 1.3571521035598707e-06, "loss": 0.0003, "step": 360040 }, { "epoch": 139.83, "learning_rate": 1.3566343042071199e-06, "loss": 0.0001, "step": 360050 }, { "epoch": 139.83, "learning_rate": 1.356116504854369e-06, "loss": 0.0663, "step": 360060 }, { "epoch": 139.83, "learning_rate": 1.355598705501618e-06, "loss": 0.0089, "step": 360070 }, { "epoch": 139.84, "learning_rate": 1.3550809061488672e-06, "loss": 0.0022, "step": 360080 }, { "epoch": 139.84, "learning_rate": 1.3545631067961164e-06, "loss": 0.0708, "step": 360090 }, { "epoch": 139.84, "learning_rate": 1.3540453074433656e-06, "loss": 0.0004, "step": 360100 }, { "epoch": 139.85, "learning_rate": 1.3535275080906148e-06, "loss": 0.0088, "step": 360110 }, { "epoch": 139.85, "learning_rate": 1.353009708737864e-06, "loss": 0.1098, "step": 360120 }, { "epoch": 139.86, "learning_rate": 1.3524919093851132e-06, "loss": 0.0698, "step": 360130 }, { "epoch": 139.86, "learning_rate": 1.3519741100323624e-06, "loss": 0.1153, "step": 360140 }, { "epoch": 139.86, "learning_rate": 1.3514563106796116e-06, "loss": 0.1187, "step": 360150 }, { "epoch": 139.87, "learning_rate": 1.3509385113268608e-06, "loss": 0.0379, "step": 360160 }, { "epoch": 139.87, "learning_rate": 1.35042071197411e-06, "loss": 0.001, "step": 360170 }, { "epoch": 139.88, "learning_rate": 1.3499029126213592e-06, "loss": 0.0693, "step": 360180 }, { "epoch": 139.88, "learning_rate": 1.3493851132686086e-06, "loss": 0.0854, "step": 360190 }, { "epoch": 139.88, "learning_rate": 1.3488673139158578e-06, "loss": 0.039, "step": 360200 }, { "epoch": 139.89, "learning_rate": 1.348349514563107e-06, "loss": 0.0022, "step": 360210 }, { "epoch": 139.89, "learning_rate": 1.3478317152103562e-06, "loss": 0.0061, "step": 360220 }, { "epoch": 139.9, "learning_rate": 1.3473139158576054e-06, "loss": 0.1372, "step": 360230 }, { "epoch": 139.9, "learning_rate": 1.3467961165048546e-06, "loss": 0.055, "step": 360240 }, { "epoch": 139.9, "learning_rate": 1.3462783171521038e-06, "loss": 0.111, "step": 360250 }, { "epoch": 139.91, "learning_rate": 1.345760517799353e-06, "loss": 0.0852, "step": 360260 }, { "epoch": 139.91, "learning_rate": 1.3452427184466022e-06, "loss": 0.0193, "step": 360270 }, { "epoch": 139.91, "learning_rate": 1.3447249190938513e-06, "loss": 0.1287, "step": 360280 }, { "epoch": 139.92, "learning_rate": 1.3442071197411005e-06, "loss": 0.0756, "step": 360290 }, { "epoch": 139.92, "learning_rate": 1.3436893203883497e-06, "loss": 0.0697, "step": 360300 }, { "epoch": 139.93, "learning_rate": 1.343171521035599e-06, "loss": 0.0006, "step": 360310 }, { "epoch": 139.93, "learning_rate": 1.3426537216828481e-06, "loss": 0.0001, "step": 360320 }, { "epoch": 139.93, "learning_rate": 1.3421359223300973e-06, "loss": 0.0953, "step": 360330 }, { "epoch": 139.94, "learning_rate": 1.3416181229773465e-06, "loss": 0.0192, "step": 360340 }, { "epoch": 139.94, "learning_rate": 1.3411003236245957e-06, "loss": 0.0185, "step": 360350 }, { "epoch": 139.95, "learning_rate": 1.340582524271845e-06, "loss": 0.0226, "step": 360360 }, { "epoch": 139.95, "learning_rate": 1.340064724919094e-06, "loss": 0.0007, "step": 360370 }, { "epoch": 139.95, "learning_rate": 1.3395469255663433e-06, "loss": 0.0541, "step": 360380 }, { "epoch": 139.96, "learning_rate": 1.3390291262135923e-06, "loss": 0.0101, "step": 360390 }, { "epoch": 139.96, "learning_rate": 1.3385113268608415e-06, "loss": 0.0765, "step": 360400 }, { "epoch": 139.97, "learning_rate": 1.3379935275080907e-06, "loss": 0.0017, "step": 360410 }, { "epoch": 139.97, "learning_rate": 1.3374757281553399e-06, "loss": 0.0091, "step": 360420 }, { "epoch": 139.97, "learning_rate": 1.336957928802589e-06, "loss": 0.0348, "step": 360430 }, { "epoch": 139.98, "learning_rate": 1.3364401294498382e-06, "loss": 0.0703, "step": 360440 }, { "epoch": 139.98, "learning_rate": 1.3359223300970874e-06, "loss": 0.1624, "step": 360450 }, { "epoch": 139.98, "learning_rate": 1.3354045307443366e-06, "loss": 0.0923, "step": 360460 }, { "epoch": 139.99, "learning_rate": 1.3348867313915858e-06, "loss": 0.0325, "step": 360470 }, { "epoch": 139.99, "learning_rate": 1.334368932038835e-06, "loss": 0.0917, "step": 360480 }, { "epoch": 140.0, "learning_rate": 1.3338511326860842e-06, "loss": 0.1865, "step": 360490 }, { "epoch": 140.0, "learning_rate": 1.3333333333333334e-06, "loss": 0.0442, "step": 360500 }, { "epoch": 140.0, "eval_accuracy": 0.9524071526822558, "eval_loss": 0.38710692524909973, "eval_runtime": 8.1858, "eval_samples_per_second": 444.063, "eval_steps_per_second": 55.584, "step": 360500 }, { "epoch": 140.0, "learning_rate": 1.3328155339805826e-06, "loss": 0.1342, "step": 360510 }, { "epoch": 140.01, "learning_rate": 1.3322977346278318e-06, "loss": 0.0503, "step": 360520 }, { "epoch": 140.01, "learning_rate": 1.331779935275081e-06, "loss": 0.0662, "step": 360530 }, { "epoch": 140.02, "learning_rate": 1.3312621359223302e-06, "loss": 0.0123, "step": 360540 }, { "epoch": 140.02, "learning_rate": 1.3307443365695794e-06, "loss": 0.0212, "step": 360550 }, { "epoch": 140.02, "learning_rate": 1.3302265372168286e-06, "loss": 0.0003, "step": 360560 }, { "epoch": 140.03, "learning_rate": 1.3297087378640778e-06, "loss": 0.1655, "step": 360570 }, { "epoch": 140.03, "learning_rate": 1.329190938511327e-06, "loss": 0.0595, "step": 360580 }, { "epoch": 140.03, "learning_rate": 1.3286731391585762e-06, "loss": 0.0414, "step": 360590 }, { "epoch": 140.04, "learning_rate": 1.3281553398058254e-06, "loss": 0.0883, "step": 360600 }, { "epoch": 140.04, "learning_rate": 1.3276375404530746e-06, "loss": 0.0147, "step": 360610 }, { "epoch": 140.05, "learning_rate": 1.3271197411003237e-06, "loss": 0.0324, "step": 360620 }, { "epoch": 140.05, "learning_rate": 1.326601941747573e-06, "loss": 0.0518, "step": 360630 }, { "epoch": 140.05, "learning_rate": 1.326084142394822e-06, "loss": 0.0126, "step": 360640 }, { "epoch": 140.06, "learning_rate": 1.3255663430420711e-06, "loss": 0.0254, "step": 360650 }, { "epoch": 140.06, "learning_rate": 1.3250485436893203e-06, "loss": 0.0087, "step": 360660 }, { "epoch": 140.07, "learning_rate": 1.3245307443365695e-06, "loss": 0.0253, "step": 360670 }, { "epoch": 140.07, "learning_rate": 1.3240129449838187e-06, "loss": 0.0141, "step": 360680 }, { "epoch": 140.07, "learning_rate": 1.323495145631068e-06, "loss": 0.0987, "step": 360690 }, { "epoch": 140.08, "learning_rate": 1.322977346278317e-06, "loss": 0.0002, "step": 360700 }, { "epoch": 140.08, "learning_rate": 1.3224595469255663e-06, "loss": 0.0263, "step": 360710 }, { "epoch": 140.09, "learning_rate": 1.3219417475728155e-06, "loss": 0.0308, "step": 360720 }, { "epoch": 140.09, "learning_rate": 1.3214239482200647e-06, "loss": 0.0338, "step": 360730 }, { "epoch": 140.09, "learning_rate": 1.3209061488673139e-06, "loss": 0.0905, "step": 360740 }, { "epoch": 140.1, "learning_rate": 1.3203883495145633e-06, "loss": 0.0241, "step": 360750 }, { "epoch": 140.1, "learning_rate": 1.3198705501618125e-06, "loss": 0.1399, "step": 360760 }, { "epoch": 140.1, "learning_rate": 1.3193527508090617e-06, "loss": 0.1117, "step": 360770 }, { "epoch": 140.11, "learning_rate": 1.3188349514563109e-06, "loss": 0.0607, "step": 360780 }, { "epoch": 140.11, "learning_rate": 1.31831715210356e-06, "loss": 0.0482, "step": 360790 }, { "epoch": 140.12, "learning_rate": 1.3177993527508092e-06, "loss": 0.0204, "step": 360800 }, { "epoch": 140.12, "learning_rate": 1.3172815533980584e-06, "loss": 0.0391, "step": 360810 }, { "epoch": 140.12, "learning_rate": 1.3167637540453076e-06, "loss": 0.0602, "step": 360820 }, { "epoch": 140.13, "learning_rate": 1.3162459546925568e-06, "loss": 0.0252, "step": 360830 }, { "epoch": 140.13, "learning_rate": 1.315728155339806e-06, "loss": 0.0442, "step": 360840 }, { "epoch": 140.14, "learning_rate": 1.3152103559870552e-06, "loss": 0.1645, "step": 360850 }, { "epoch": 140.14, "learning_rate": 1.3146925566343044e-06, "loss": 0.0169, "step": 360860 }, { "epoch": 140.14, "learning_rate": 1.3141747572815536e-06, "loss": 0.0003, "step": 360870 }, { "epoch": 140.15, "learning_rate": 1.3136569579288028e-06, "loss": 0.0645, "step": 360880 }, { "epoch": 140.15, "learning_rate": 1.313139158576052e-06, "loss": 0.0081, "step": 360890 }, { "epoch": 140.16, "learning_rate": 1.3126213592233012e-06, "loss": 0.0181, "step": 360900 }, { "epoch": 140.16, "learning_rate": 1.3121035598705504e-06, "loss": 0.0166, "step": 360910 }, { "epoch": 140.16, "learning_rate": 1.3115857605177996e-06, "loss": 0.0182, "step": 360920 }, { "epoch": 140.17, "learning_rate": 1.3110679611650488e-06, "loss": 0.0036, "step": 360930 }, { "epoch": 140.17, "learning_rate": 1.310550161812298e-06, "loss": 0.0003, "step": 360940 }, { "epoch": 140.17, "learning_rate": 1.3100323624595472e-06, "loss": 0.0003, "step": 360950 }, { "epoch": 140.18, "learning_rate": 1.3095145631067962e-06, "loss": 0.0006, "step": 360960 }, { "epoch": 140.18, "learning_rate": 1.3089967637540453e-06, "loss": 0.0519, "step": 360970 }, { "epoch": 140.19, "learning_rate": 1.3084789644012945e-06, "loss": 0.0001, "step": 360980 }, { "epoch": 140.19, "learning_rate": 1.3079611650485437e-06, "loss": 0.0004, "step": 360990 }, { "epoch": 140.19, "learning_rate": 1.307443365695793e-06, "loss": 0.0916, "step": 361000 }, { "epoch": 140.2, "learning_rate": 1.3069255663430421e-06, "loss": 0.0225, "step": 361010 }, { "epoch": 140.2, "learning_rate": 1.3064077669902913e-06, "loss": 0.0138, "step": 361020 }, { "epoch": 140.21, "learning_rate": 1.3058899676375405e-06, "loss": 0.0088, "step": 361030 }, { "epoch": 140.21, "learning_rate": 1.3053721682847897e-06, "loss": 0.0001, "step": 361040 }, { "epoch": 140.21, "learning_rate": 1.304854368932039e-06, "loss": 0.0258, "step": 361050 }, { "epoch": 140.22, "learning_rate": 1.304336569579288e-06, "loss": 0.0288, "step": 361060 }, { "epoch": 140.22, "learning_rate": 1.3038187702265373e-06, "loss": 0.0449, "step": 361070 }, { "epoch": 140.23, "learning_rate": 1.3033009708737865e-06, "loss": 0.0143, "step": 361080 }, { "epoch": 140.23, "learning_rate": 1.3027831715210357e-06, "loss": 0.0328, "step": 361090 }, { "epoch": 140.23, "learning_rate": 1.3022653721682849e-06, "loss": 0.0102, "step": 361100 }, { "epoch": 140.24, "learning_rate": 1.301747572815534e-06, "loss": 0.048, "step": 361110 }, { "epoch": 140.24, "learning_rate": 1.3012297734627833e-06, "loss": 0.0115, "step": 361120 }, { "epoch": 140.24, "learning_rate": 1.3007119741100325e-06, "loss": 0.0696, "step": 361130 }, { "epoch": 140.25, "learning_rate": 1.3001941747572817e-06, "loss": 0.0356, "step": 361140 }, { "epoch": 140.25, "learning_rate": 1.2996763754045308e-06, "loss": 0.0005, "step": 361150 }, { "epoch": 140.26, "learning_rate": 1.29915857605178e-06, "loss": 0.0006, "step": 361160 }, { "epoch": 140.26, "learning_rate": 1.2986407766990292e-06, "loss": 0.0395, "step": 361170 }, { "epoch": 140.26, "learning_rate": 1.2981229773462784e-06, "loss": 0.077, "step": 361180 }, { "epoch": 140.27, "learning_rate": 1.2976051779935276e-06, "loss": 0.012, "step": 361190 }, { "epoch": 140.27, "learning_rate": 1.2970873786407768e-06, "loss": 0.0472, "step": 361200 }, { "epoch": 140.28, "learning_rate": 1.2965695792880258e-06, "loss": 0.0243, "step": 361210 }, { "epoch": 140.28, "learning_rate": 1.296051779935275e-06, "loss": 0.1352, "step": 361220 }, { "epoch": 140.28, "learning_rate": 1.2955339805825242e-06, "loss": 0.0268, "step": 361230 }, { "epoch": 140.29, "learning_rate": 1.2950161812297734e-06, "loss": 0.0427, "step": 361240 }, { "epoch": 140.29, "learning_rate": 1.2944983818770226e-06, "loss": 0.0362, "step": 361250 }, { "epoch": 140.3, "learning_rate": 1.2939805825242718e-06, "loss": 0.0453, "step": 361260 }, { "epoch": 140.3, "learning_rate": 1.293462783171521e-06, "loss": 0.0109, "step": 361270 }, { "epoch": 140.3, "learning_rate": 1.2929449838187702e-06, "loss": 0.0838, "step": 361280 }, { "epoch": 140.31, "learning_rate": 1.2924271844660194e-06, "loss": 0.0757, "step": 361290 }, { "epoch": 140.31, "learning_rate": 1.2919093851132686e-06, "loss": 0.0401, "step": 361300 }, { "epoch": 140.31, "learning_rate": 1.291391585760518e-06, "loss": 0.0128, "step": 361310 }, { "epoch": 140.32, "learning_rate": 1.2908737864077672e-06, "loss": 0.0752, "step": 361320 }, { "epoch": 140.32, "learning_rate": 1.2903559870550163e-06, "loss": 0.0154, "step": 361330 }, { "epoch": 140.33, "learning_rate": 1.2898381877022655e-06, "loss": 0.0636, "step": 361340 }, { "epoch": 140.33, "learning_rate": 1.2893203883495147e-06, "loss": 0.0778, "step": 361350 }, { "epoch": 140.33, "learning_rate": 1.288802588996764e-06, "loss": 0.0851, "step": 361360 }, { "epoch": 140.34, "learning_rate": 1.2882847896440131e-06, "loss": 0.0217, "step": 361370 }, { "epoch": 140.34, "learning_rate": 1.2877669902912623e-06, "loss": 0.008, "step": 361380 }, { "epoch": 140.35, "learning_rate": 1.2872491909385115e-06, "loss": 0.0349, "step": 361390 }, { "epoch": 140.35, "learning_rate": 1.2867313915857607e-06, "loss": 0.0075, "step": 361400 }, { "epoch": 140.35, "learning_rate": 1.28621359223301e-06, "loss": 0.1626, "step": 361410 }, { "epoch": 140.36, "learning_rate": 1.285695792880259e-06, "loss": 0.0087, "step": 361420 }, { "epoch": 140.36, "learning_rate": 1.2851779935275083e-06, "loss": 0.0475, "step": 361430 }, { "epoch": 140.37, "learning_rate": 1.2846601941747575e-06, "loss": 0.046, "step": 361440 }, { "epoch": 140.37, "learning_rate": 1.2841423948220067e-06, "loss": 0.031, "step": 361450 }, { "epoch": 140.37, "learning_rate": 1.2836245954692559e-06, "loss": 0.0891, "step": 361460 }, { "epoch": 140.38, "learning_rate": 1.283106796116505e-06, "loss": 0.0132, "step": 361470 }, { "epoch": 140.38, "learning_rate": 1.2825889967637543e-06, "loss": 0.0424, "step": 361480 }, { "epoch": 140.38, "learning_rate": 1.2820711974110035e-06, "loss": 0.0005, "step": 361490 }, { "epoch": 140.39, "learning_rate": 1.2815533980582527e-06, "loss": 0.001, "step": 361500 }, { "epoch": 140.39, "learning_rate": 1.2810355987055018e-06, "loss": 0.0336, "step": 361510 }, { "epoch": 140.4, "learning_rate": 1.280517799352751e-06, "loss": 0.0625, "step": 361520 }, { "epoch": 140.4, "learning_rate": 1.28e-06, "loss": 0.0735, "step": 361530 }, { "epoch": 140.4, "learning_rate": 1.2794822006472492e-06, "loss": 0.067, "step": 361540 }, { "epoch": 140.41, "learning_rate": 1.2789644012944984e-06, "loss": 0.0094, "step": 361550 }, { "epoch": 140.41, "learning_rate": 1.2784466019417476e-06, "loss": 0.0289, "step": 361560 }, { "epoch": 140.42, "learning_rate": 1.2779288025889968e-06, "loss": 0.1281, "step": 361570 }, { "epoch": 140.42, "learning_rate": 1.277411003236246e-06, "loss": 0.0764, "step": 361580 }, { "epoch": 140.42, "learning_rate": 1.2768932038834952e-06, "loss": 0.0815, "step": 361590 }, { "epoch": 140.43, "learning_rate": 1.2763754045307444e-06, "loss": 0.1652, "step": 361600 }, { "epoch": 140.43, "learning_rate": 1.2758576051779936e-06, "loss": 0.1888, "step": 361610 }, { "epoch": 140.43, "learning_rate": 1.2753398058252428e-06, "loss": 0.0502, "step": 361620 }, { "epoch": 140.44, "learning_rate": 1.274822006472492e-06, "loss": 0.0751, "step": 361630 }, { "epoch": 140.44, "learning_rate": 1.2743042071197412e-06, "loss": 0.1014, "step": 361640 }, { "epoch": 140.45, "learning_rate": 1.2737864077669904e-06, "loss": 0.1135, "step": 361650 }, { "epoch": 140.45, "learning_rate": 1.2732686084142396e-06, "loss": 0.0263, "step": 361660 }, { "epoch": 140.45, "learning_rate": 1.2727508090614888e-06, "loss": 0.0003, "step": 361670 }, { "epoch": 140.46, "learning_rate": 1.272233009708738e-06, "loss": 0.001, "step": 361680 }, { "epoch": 140.46, "learning_rate": 1.2717152103559871e-06, "loss": 0.1085, "step": 361690 }, { "epoch": 140.47, "learning_rate": 1.2711974110032363e-06, "loss": 0.0733, "step": 361700 }, { "epoch": 140.47, "learning_rate": 1.2706796116504855e-06, "loss": 0.0006, "step": 361710 }, { "epoch": 140.47, "learning_rate": 1.2701618122977347e-06, "loss": 0.0554, "step": 361720 }, { "epoch": 140.48, "learning_rate": 1.269644012944984e-06, "loss": 0.043, "step": 361730 }, { "epoch": 140.48, "learning_rate": 1.2691262135922331e-06, "loss": 0.132, "step": 361740 }, { "epoch": 140.49, "learning_rate": 1.2686084142394823e-06, "loss": 0.1055, "step": 361750 }, { "epoch": 140.49, "learning_rate": 1.2680906148867315e-06, "loss": 0.0001, "step": 361760 }, { "epoch": 140.49, "learning_rate": 1.2675728155339807e-06, "loss": 0.0899, "step": 361770 }, { "epoch": 140.5, "learning_rate": 1.2670550161812297e-06, "loss": 0.1261, "step": 361780 }, { "epoch": 140.5, "learning_rate": 1.2665372168284789e-06, "loss": 0.0469, "step": 361790 }, { "epoch": 140.5, "learning_rate": 1.266019417475728e-06, "loss": 0.0078, "step": 361800 }, { "epoch": 140.51, "learning_rate": 1.2655016181229773e-06, "loss": 0.0934, "step": 361810 }, { "epoch": 140.51, "learning_rate": 1.2649838187702265e-06, "loss": 0.0014, "step": 361820 }, { "epoch": 140.52, "learning_rate": 1.2644660194174757e-06, "loss": 0.1355, "step": 361830 }, { "epoch": 140.52, "learning_rate": 1.2639482200647248e-06, "loss": 0.002, "step": 361840 }, { "epoch": 140.52, "learning_rate": 1.263430420711974e-06, "loss": 0.0513, "step": 361850 }, { "epoch": 140.53, "learning_rate": 1.2629126213592232e-06, "loss": 0.046, "step": 361860 }, { "epoch": 140.53, "learning_rate": 1.2623948220064726e-06, "loss": 0.0933, "step": 361870 }, { "epoch": 140.54, "learning_rate": 1.2618770226537218e-06, "loss": 0.1006, "step": 361880 }, { "epoch": 140.54, "learning_rate": 1.261359223300971e-06, "loss": 0.1502, "step": 361890 }, { "epoch": 140.54, "learning_rate": 1.2608414239482202e-06, "loss": 0.101, "step": 361900 }, { "epoch": 140.55, "learning_rate": 1.2603236245954694e-06, "loss": 0.0115, "step": 361910 }, { "epoch": 140.55, "learning_rate": 1.2598058252427186e-06, "loss": 0.0106, "step": 361920 }, { "epoch": 140.56, "learning_rate": 1.2592880258899678e-06, "loss": 0.0177, "step": 361930 }, { "epoch": 140.56, "learning_rate": 1.258770226537217e-06, "loss": 0.0001, "step": 361940 }, { "epoch": 140.56, "learning_rate": 1.2582524271844662e-06, "loss": 0.0003, "step": 361950 }, { "epoch": 140.57, "learning_rate": 1.2577346278317154e-06, "loss": 0.0954, "step": 361960 }, { "epoch": 140.57, "learning_rate": 1.2572168284789646e-06, "loss": 0.1693, "step": 361970 }, { "epoch": 140.57, "learning_rate": 1.2566990291262138e-06, "loss": 0.0409, "step": 361980 }, { "epoch": 140.58, "learning_rate": 1.256181229773463e-06, "loss": 0.0824, "step": 361990 }, { "epoch": 140.58, "learning_rate": 1.2556634304207122e-06, "loss": 0.0124, "step": 362000 }, { "epoch": 140.59, "learning_rate": 1.2551456310679614e-06, "loss": 0.0565, "step": 362010 }, { "epoch": 140.59, "learning_rate": 1.2546278317152106e-06, "loss": 0.0341, "step": 362020 }, { "epoch": 140.59, "learning_rate": 1.2541100323624598e-06, "loss": 0.0803, "step": 362030 }, { "epoch": 140.6, "learning_rate": 1.253592233009709e-06, "loss": 0.0274, "step": 362040 }, { "epoch": 140.6, "learning_rate": 1.2530744336569581e-06, "loss": 0.0041, "step": 362050 }, { "epoch": 140.61, "learning_rate": 1.2525566343042073e-06, "loss": 0.0082, "step": 362060 }, { "epoch": 140.61, "learning_rate": 1.2520388349514565e-06, "loss": 0.0203, "step": 362070 }, { "epoch": 140.61, "learning_rate": 1.2515210355987057e-06, "loss": 0.0294, "step": 362080 }, { "epoch": 140.62, "learning_rate": 1.251003236245955e-06, "loss": 0.0482, "step": 362090 }, { "epoch": 140.62, "learning_rate": 1.250485436893204e-06, "loss": 0.0001, "step": 362100 }, { "epoch": 140.63, "learning_rate": 1.249967637540453e-06, "loss": 0.0849, "step": 362110 }, { "epoch": 140.63, "learning_rate": 1.2494498381877023e-06, "loss": 0.0772, "step": 362120 }, { "epoch": 140.63, "learning_rate": 1.2489320388349515e-06, "loss": 0.0014, "step": 362130 }, { "epoch": 140.64, "learning_rate": 1.2484142394822007e-06, "loss": 0.0753, "step": 362140 }, { "epoch": 140.64, "learning_rate": 1.2478964401294499e-06, "loss": 0.0147, "step": 362150 }, { "epoch": 140.64, "learning_rate": 1.247378640776699e-06, "loss": 0.0631, "step": 362160 }, { "epoch": 140.65, "learning_rate": 1.2468608414239483e-06, "loss": 0.0179, "step": 362170 }, { "epoch": 140.65, "learning_rate": 1.2463430420711975e-06, "loss": 0.0126, "step": 362180 }, { "epoch": 140.66, "learning_rate": 1.2458252427184467e-06, "loss": 0.0613, "step": 362190 }, { "epoch": 140.66, "learning_rate": 1.2453074433656958e-06, "loss": 0.0178, "step": 362200 }, { "epoch": 140.66, "learning_rate": 1.244789644012945e-06, "loss": 0.0047, "step": 362210 }, { "epoch": 140.67, "learning_rate": 1.2442718446601942e-06, "loss": 0.0685, "step": 362220 }, { "epoch": 140.67, "learning_rate": 1.2437540453074434e-06, "loss": 0.0687, "step": 362230 }, { "epoch": 140.68, "learning_rate": 1.2432362459546926e-06, "loss": 0.0387, "step": 362240 }, { "epoch": 140.68, "learning_rate": 1.2427184466019418e-06, "loss": 0.0563, "step": 362250 }, { "epoch": 140.68, "learning_rate": 1.242200647249191e-06, "loss": 0.0179, "step": 362260 }, { "epoch": 140.69, "learning_rate": 1.2416828478964402e-06, "loss": 0.0318, "step": 362270 }, { "epoch": 140.69, "learning_rate": 1.2411650485436894e-06, "loss": 0.0002, "step": 362280 }, { "epoch": 140.7, "learning_rate": 1.2406472491909386e-06, "loss": 0.051, "step": 362290 }, { "epoch": 140.7, "learning_rate": 1.2401294498381878e-06, "loss": 0.0557, "step": 362300 }, { "epoch": 140.7, "learning_rate": 1.239611650485437e-06, "loss": 0.0348, "step": 362310 }, { "epoch": 140.71, "learning_rate": 1.2390938511326862e-06, "loss": 0.0368, "step": 362320 }, { "epoch": 140.71, "learning_rate": 1.2385760517799354e-06, "loss": 0.0169, "step": 362330 }, { "epoch": 140.71, "learning_rate": 1.2380582524271846e-06, "loss": 0.0926, "step": 362340 }, { "epoch": 140.72, "learning_rate": 1.2375404530744338e-06, "loss": 0.0001, "step": 362350 }, { "epoch": 140.72, "learning_rate": 1.237022653721683e-06, "loss": 0.0368, "step": 362360 }, { "epoch": 140.73, "learning_rate": 1.2365048543689322e-06, "loss": 0.0002, "step": 362370 }, { "epoch": 140.73, "learning_rate": 1.2359870550161813e-06, "loss": 0.0001, "step": 362380 }, { "epoch": 140.73, "learning_rate": 1.2354692556634305e-06, "loss": 0.0152, "step": 362390 }, { "epoch": 140.74, "learning_rate": 1.2349514563106797e-06, "loss": 0.0428, "step": 362400 }, { "epoch": 140.74, "learning_rate": 1.234433656957929e-06, "loss": 0.0027, "step": 362410 }, { "epoch": 140.75, "learning_rate": 1.2339158576051781e-06, "loss": 0.052, "step": 362420 }, { "epoch": 140.75, "learning_rate": 1.2333980582524273e-06, "loss": 0.1076, "step": 362430 }, { "epoch": 140.75, "learning_rate": 1.2328802588996765e-06, "loss": 0.0001, "step": 362440 }, { "epoch": 140.76, "learning_rate": 1.2323624595469257e-06, "loss": 0.0178, "step": 362450 }, { "epoch": 140.76, "learning_rate": 1.231844660194175e-06, "loss": 0.063, "step": 362460 }, { "epoch": 140.77, "learning_rate": 1.231326860841424e-06, "loss": 0.0003, "step": 362470 }, { "epoch": 140.77, "learning_rate": 1.2308090614886733e-06, "loss": 0.0088, "step": 362480 }, { "epoch": 140.77, "learning_rate": 1.2302912621359225e-06, "loss": 0.0039, "step": 362490 }, { "epoch": 140.78, "learning_rate": 1.2297734627831717e-06, "loss": 0.009, "step": 362500 }, { "epoch": 140.78, "learning_rate": 1.2292556634304207e-06, "loss": 0.0026, "step": 362510 }, { "epoch": 140.78, "learning_rate": 1.2287378640776699e-06, "loss": 0.1108, "step": 362520 }, { "epoch": 140.79, "learning_rate": 1.228220064724919e-06, "loss": 0.0237, "step": 362530 }, { "epoch": 140.79, "learning_rate": 1.2277022653721683e-06, "loss": 0.0356, "step": 362540 }, { "epoch": 140.8, "learning_rate": 1.2271844660194174e-06, "loss": 0.0031, "step": 362550 }, { "epoch": 140.8, "learning_rate": 1.2266666666666666e-06, "loss": 0.0709, "step": 362560 }, { "epoch": 140.8, "learning_rate": 1.226148867313916e-06, "loss": 0.0087, "step": 362570 }, { "epoch": 140.81, "learning_rate": 1.2256310679611652e-06, "loss": 0.068, "step": 362580 }, { "epoch": 140.81, "learning_rate": 1.2251132686084144e-06, "loss": 0.0816, "step": 362590 }, { "epoch": 140.82, "learning_rate": 1.2245954692556636e-06, "loss": 0.0003, "step": 362600 }, { "epoch": 140.82, "learning_rate": 1.2240776699029128e-06, "loss": 0.0602, "step": 362610 }, { "epoch": 140.82, "learning_rate": 1.223559870550162e-06, "loss": 0.014, "step": 362620 }, { "epoch": 140.83, "learning_rate": 1.2230420711974112e-06, "loss": 0.0016, "step": 362630 }, { "epoch": 140.83, "learning_rate": 1.2225242718446604e-06, "loss": 0.0191, "step": 362640 }, { "epoch": 140.83, "learning_rate": 1.2220064724919096e-06, "loss": 0.009, "step": 362650 }, { "epoch": 140.84, "learning_rate": 1.2214886731391588e-06, "loss": 0.0377, "step": 362660 }, { "epoch": 140.84, "learning_rate": 1.2209708737864078e-06, "loss": 0.0092, "step": 362670 }, { "epoch": 140.85, "learning_rate": 1.220453074433657e-06, "loss": 0.0206, "step": 362680 }, { "epoch": 140.85, "learning_rate": 1.2199352750809062e-06, "loss": 0.0002, "step": 362690 }, { "epoch": 140.85, "learning_rate": 1.2194174757281554e-06, "loss": 0.0678, "step": 362700 }, { "epoch": 140.86, "learning_rate": 1.2188996763754046e-06, "loss": 0.0638, "step": 362710 }, { "epoch": 140.86, "learning_rate": 1.2183818770226538e-06, "loss": 0.0763, "step": 362720 }, { "epoch": 140.87, "learning_rate": 1.217864077669903e-06, "loss": 0.0325, "step": 362730 }, { "epoch": 140.87, "learning_rate": 1.2173462783171521e-06, "loss": 0.0005, "step": 362740 }, { "epoch": 140.87, "learning_rate": 1.2168284789644013e-06, "loss": 0.0252, "step": 362750 }, { "epoch": 140.88, "learning_rate": 1.2163106796116505e-06, "loss": 0.0086, "step": 362760 }, { "epoch": 140.88, "learning_rate": 1.2157928802588997e-06, "loss": 0.0493, "step": 362770 }, { "epoch": 140.89, "learning_rate": 1.215275080906149e-06, "loss": 0.1083, "step": 362780 }, { "epoch": 140.89, "learning_rate": 1.2147572815533981e-06, "loss": 0.0352, "step": 362790 }, { "epoch": 140.89, "learning_rate": 1.2142394822006473e-06, "loss": 0.0471, "step": 362800 }, { "epoch": 140.9, "learning_rate": 1.2137216828478965e-06, "loss": 0.0637, "step": 362810 }, { "epoch": 140.9, "learning_rate": 1.2132038834951457e-06, "loss": 0.1292, "step": 362820 }, { "epoch": 140.9, "learning_rate": 1.2126860841423949e-06, "loss": 0.0153, "step": 362830 }, { "epoch": 140.91, "learning_rate": 1.212168284789644e-06, "loss": 0.0598, "step": 362840 }, { "epoch": 140.91, "learning_rate": 1.2116504854368933e-06, "loss": 0.0112, "step": 362850 }, { "epoch": 140.92, "learning_rate": 1.2111326860841425e-06, "loss": 0.0166, "step": 362860 }, { "epoch": 140.92, "learning_rate": 1.2106148867313917e-06, "loss": 0.0662, "step": 362870 }, { "epoch": 140.92, "learning_rate": 1.2100970873786409e-06, "loss": 0.0127, "step": 362880 }, { "epoch": 140.93, "learning_rate": 1.20957928802589e-06, "loss": 0.1013, "step": 362890 }, { "epoch": 140.93, "learning_rate": 1.2090614886731393e-06, "loss": 0.0419, "step": 362900 }, { "epoch": 140.94, "learning_rate": 1.2085436893203884e-06, "loss": 0.0099, "step": 362910 }, { "epoch": 140.94, "learning_rate": 1.2080258899676376e-06, "loss": 0.0005, "step": 362920 }, { "epoch": 140.94, "learning_rate": 1.2075080906148868e-06, "loss": 0.0915, "step": 362930 }, { "epoch": 140.95, "learning_rate": 1.206990291262136e-06, "loss": 0.085, "step": 362940 }, { "epoch": 140.95, "learning_rate": 1.2064724919093852e-06, "loss": 0.0449, "step": 362950 }, { "epoch": 140.96, "learning_rate": 1.2059546925566344e-06, "loss": 0.0273, "step": 362960 }, { "epoch": 140.96, "learning_rate": 1.2054368932038836e-06, "loss": 0.0001, "step": 362970 }, { "epoch": 140.96, "learning_rate": 1.2049190938511328e-06, "loss": 0.0194, "step": 362980 }, { "epoch": 140.97, "learning_rate": 1.204401294498382e-06, "loss": 0.0088, "step": 362990 }, { "epoch": 140.97, "learning_rate": 1.2038834951456312e-06, "loss": 0.047, "step": 363000 }, { "epoch": 140.97, "learning_rate": 1.2033656957928804e-06, "loss": 0.0019, "step": 363010 }, { "epoch": 140.98, "learning_rate": 1.2028478964401296e-06, "loss": 0.0154, "step": 363020 }, { "epoch": 140.98, "learning_rate": 1.2023300970873788e-06, "loss": 0.0556, "step": 363030 }, { "epoch": 140.99, "learning_rate": 1.201812297734628e-06, "loss": 0.0016, "step": 363040 }, { "epoch": 140.99, "learning_rate": 1.2012944983818772e-06, "loss": 0.0008, "step": 363050 }, { "epoch": 140.99, "learning_rate": 1.2007766990291264e-06, "loss": 0.1484, "step": 363060 }, { "epoch": 141.0, "learning_rate": 1.2002588996763756e-06, "loss": 0.0888, "step": 363070 }, { "epoch": 141.0, "eval_accuracy": 0.9535075653370014, "eval_loss": 0.3850953280925751, "eval_runtime": 8.289, "eval_samples_per_second": 438.535, "eval_steps_per_second": 54.892, "step": 363075 }, { "epoch": 141.0, "learning_rate": 1.1997411003236245e-06, "loss": 0.086, "step": 363080 }, { "epoch": 141.01, "learning_rate": 1.1992233009708737e-06, "loss": 0.0333, "step": 363090 }, { "epoch": 141.01, "learning_rate": 1.198705501618123e-06, "loss": 0.033, "step": 363100 }, { "epoch": 141.01, "learning_rate": 1.1981877022653721e-06, "loss": 0.0112, "step": 363110 }, { "epoch": 141.02, "learning_rate": 1.1976699029126213e-06, "loss": 0.1333, "step": 363120 }, { "epoch": 141.02, "learning_rate": 1.1971521035598707e-06, "loss": 0.0003, "step": 363130 }, { "epoch": 141.03, "learning_rate": 1.19663430420712e-06, "loss": 0.0507, "step": 363140 }, { "epoch": 141.03, "learning_rate": 1.1961165048543691e-06, "loss": 0.0301, "step": 363150 }, { "epoch": 141.03, "learning_rate": 1.1955987055016183e-06, "loss": 0.1031, "step": 363160 }, { "epoch": 141.04, "learning_rate": 1.1950809061488675e-06, "loss": 0.017, "step": 363170 }, { "epoch": 141.04, "learning_rate": 1.1945631067961167e-06, "loss": 0.0851, "step": 363180 }, { "epoch": 141.04, "learning_rate": 1.194045307443366e-06, "loss": 0.0165, "step": 363190 }, { "epoch": 141.05, "learning_rate": 1.193527508090615e-06, "loss": 0.0082, "step": 363200 }, { "epoch": 141.05, "learning_rate": 1.1930097087378643e-06, "loss": 0.036, "step": 363210 }, { "epoch": 141.06, "learning_rate": 1.1924919093851135e-06, "loss": 0.0986, "step": 363220 }, { "epoch": 141.06, "learning_rate": 1.1919741100323627e-06, "loss": 0.0351, "step": 363230 }, { "epoch": 141.06, "learning_rate": 1.1914563106796117e-06, "loss": 0.0572, "step": 363240 }, { "epoch": 141.07, "learning_rate": 1.1909385113268608e-06, "loss": 0.044, "step": 363250 }, { "epoch": 141.07, "learning_rate": 1.19042071197411e-06, "loss": 0.0277, "step": 363260 }, { "epoch": 141.08, "learning_rate": 1.1899029126213592e-06, "loss": 0.0215, "step": 363270 }, { "epoch": 141.08, "learning_rate": 1.1893851132686084e-06, "loss": 0.0196, "step": 363280 }, { "epoch": 141.08, "learning_rate": 1.1888673139158576e-06, "loss": 0.1493, "step": 363290 }, { "epoch": 141.09, "learning_rate": 1.1883495145631068e-06, "loss": 0.0732, "step": 363300 }, { "epoch": 141.09, "learning_rate": 1.187831715210356e-06, "loss": 0.0153, "step": 363310 }, { "epoch": 141.1, "learning_rate": 1.1873139158576052e-06, "loss": 0.0447, "step": 363320 }, { "epoch": 141.1, "learning_rate": 1.1867961165048544e-06, "loss": 0.0188, "step": 363330 }, { "epoch": 141.1, "learning_rate": 1.1862783171521036e-06, "loss": 0.0251, "step": 363340 }, { "epoch": 141.11, "learning_rate": 1.1857605177993528e-06, "loss": 0.012, "step": 363350 }, { "epoch": 141.11, "learning_rate": 1.185242718446602e-06, "loss": 0.0321, "step": 363360 }, { "epoch": 141.11, "learning_rate": 1.1847249190938512e-06, "loss": 0.1478, "step": 363370 }, { "epoch": 141.12, "learning_rate": 1.1842071197411004e-06, "loss": 0.005, "step": 363380 }, { "epoch": 141.12, "learning_rate": 1.1836893203883496e-06, "loss": 0.0637, "step": 363390 }, { "epoch": 141.13, "learning_rate": 1.1831715210355988e-06, "loss": 0.0405, "step": 363400 }, { "epoch": 141.13, "learning_rate": 1.182653721682848e-06, "loss": 0.0499, "step": 363410 }, { "epoch": 141.13, "learning_rate": 1.1821359223300972e-06, "loss": 0.1099, "step": 363420 }, { "epoch": 141.14, "learning_rate": 1.1816181229773464e-06, "loss": 0.0003, "step": 363430 }, { "epoch": 141.14, "learning_rate": 1.1811003236245955e-06, "loss": 0.0083, "step": 363440 }, { "epoch": 141.15, "learning_rate": 1.1805825242718447e-06, "loss": 0.1032, "step": 363450 }, { "epoch": 141.15, "learning_rate": 1.180064724919094e-06, "loss": 0.0365, "step": 363460 }, { "epoch": 141.15, "learning_rate": 1.1795469255663431e-06, "loss": 0.0298, "step": 363470 }, { "epoch": 141.16, "learning_rate": 1.1790291262135923e-06, "loss": 0.0226, "step": 363480 }, { "epoch": 141.16, "learning_rate": 1.1785113268608415e-06, "loss": 0.0603, "step": 363490 }, { "epoch": 141.17, "learning_rate": 1.1779935275080907e-06, "loss": 0.0072, "step": 363500 }, { "epoch": 141.17, "learning_rate": 1.17747572815534e-06, "loss": 0.1088, "step": 363510 }, { "epoch": 141.17, "learning_rate": 1.176957928802589e-06, "loss": 0.0006, "step": 363520 }, { "epoch": 141.18, "learning_rate": 1.1764401294498383e-06, "loss": 0.0202, "step": 363530 }, { "epoch": 141.18, "learning_rate": 1.1759223300970875e-06, "loss": 0.0638, "step": 363540 }, { "epoch": 141.18, "learning_rate": 1.1754045307443367e-06, "loss": 0.0617, "step": 363550 }, { "epoch": 141.19, "learning_rate": 1.1748867313915859e-06, "loss": 0.0103, "step": 363560 }, { "epoch": 141.19, "learning_rate": 1.174368932038835e-06, "loss": 0.1234, "step": 363570 }, { "epoch": 141.2, "learning_rate": 1.1738511326860843e-06, "loss": 0.0357, "step": 363580 }, { "epoch": 141.2, "learning_rate": 1.1733333333333335e-06, "loss": 0.0004, "step": 363590 }, { "epoch": 141.2, "learning_rate": 1.1728155339805827e-06, "loss": 0.015, "step": 363600 }, { "epoch": 141.21, "learning_rate": 1.1722977346278319e-06, "loss": 0.0003, "step": 363610 }, { "epoch": 141.21, "learning_rate": 1.171779935275081e-06, "loss": 0.0154, "step": 363620 }, { "epoch": 141.22, "learning_rate": 1.1712621359223302e-06, "loss": 0.0626, "step": 363630 }, { "epoch": 141.22, "learning_rate": 1.1707443365695794e-06, "loss": 0.0551, "step": 363640 }, { "epoch": 141.22, "learning_rate": 1.1702265372168284e-06, "loss": 0.0339, "step": 363650 }, { "epoch": 141.23, "learning_rate": 1.1697087378640776e-06, "loss": 0.0402, "step": 363660 }, { "epoch": 141.23, "learning_rate": 1.1691909385113268e-06, "loss": 0.027, "step": 363670 }, { "epoch": 141.23, "learning_rate": 1.168673139158576e-06, "loss": 0.0307, "step": 363680 }, { "epoch": 141.24, "learning_rate": 1.1681553398058254e-06, "loss": 0.0452, "step": 363690 }, { "epoch": 141.24, "learning_rate": 1.1676375404530746e-06, "loss": 0.0745, "step": 363700 }, { "epoch": 141.25, "learning_rate": 1.1671197411003238e-06, "loss": 0.0302, "step": 363710 }, { "epoch": 141.25, "learning_rate": 1.166601941747573e-06, "loss": 0.0003, "step": 363720 }, { "epoch": 141.25, "learning_rate": 1.1660841423948222e-06, "loss": 0.0137, "step": 363730 }, { "epoch": 141.26, "learning_rate": 1.1655663430420714e-06, "loss": 0.0001, "step": 363740 }, { "epoch": 141.26, "learning_rate": 1.1650485436893206e-06, "loss": 0.0001, "step": 363750 }, { "epoch": 141.27, "learning_rate": 1.1645307443365698e-06, "loss": 0.1645, "step": 363760 }, { "epoch": 141.27, "learning_rate": 1.164012944983819e-06, "loss": 0.0188, "step": 363770 }, { "epoch": 141.27, "learning_rate": 1.1634951456310682e-06, "loss": 0.0401, "step": 363780 }, { "epoch": 141.28, "learning_rate": 1.1629773462783174e-06, "loss": 0.0321, "step": 363790 }, { "epoch": 141.28, "learning_rate": 1.1624595469255665e-06, "loss": 0.0683, "step": 363800 }, { "epoch": 141.29, "learning_rate": 1.1619417475728155e-06, "loss": 0.01, "step": 363810 }, { "epoch": 141.29, "learning_rate": 1.1614239482200647e-06, "loss": 0.0096, "step": 363820 }, { "epoch": 141.29, "learning_rate": 1.160906148867314e-06, "loss": 0.0312, "step": 363830 }, { "epoch": 141.3, "learning_rate": 1.1603883495145631e-06, "loss": 0.0096, "step": 363840 }, { "epoch": 141.3, "learning_rate": 1.1598705501618123e-06, "loss": 0.0254, "step": 363850 }, { "epoch": 141.3, "learning_rate": 1.1593527508090615e-06, "loss": 0.0292, "step": 363860 }, { "epoch": 141.31, "learning_rate": 1.1588349514563107e-06, "loss": 0.0079, "step": 363870 }, { "epoch": 141.31, "learning_rate": 1.1583171521035599e-06, "loss": 0.0631, "step": 363880 }, { "epoch": 141.32, "learning_rate": 1.157799352750809e-06, "loss": 0.136, "step": 363890 }, { "epoch": 141.32, "learning_rate": 1.1572815533980583e-06, "loss": 0.0748, "step": 363900 }, { "epoch": 141.32, "learning_rate": 1.1567637540453075e-06, "loss": 0.0446, "step": 363910 }, { "epoch": 141.33, "learning_rate": 1.1562459546925567e-06, "loss": 0.0108, "step": 363920 }, { "epoch": 141.33, "learning_rate": 1.1557281553398059e-06, "loss": 0.0099, "step": 363930 }, { "epoch": 141.34, "learning_rate": 1.155210355987055e-06, "loss": 0.0152, "step": 363940 }, { "epoch": 141.34, "learning_rate": 1.1546925566343043e-06, "loss": 0.0003, "step": 363950 }, { "epoch": 141.34, "learning_rate": 1.1541747572815534e-06, "loss": 0.0505, "step": 363960 }, { "epoch": 141.35, "learning_rate": 1.1536569579288029e-06, "loss": 0.0001, "step": 363970 }, { "epoch": 141.35, "learning_rate": 1.1531391585760518e-06, "loss": 0.1009, "step": 363980 }, { "epoch": 141.36, "learning_rate": 1.152621359223301e-06, "loss": 0.027, "step": 363990 }, { "epoch": 141.36, "learning_rate": 1.1521035598705502e-06, "loss": 0.0746, "step": 364000 }, { "epoch": 141.36, "learning_rate": 1.1515857605177994e-06, "loss": 0.1233, "step": 364010 }, { "epoch": 141.37, "learning_rate": 1.1510679611650486e-06, "loss": 0.0111, "step": 364020 }, { "epoch": 141.37, "learning_rate": 1.1505501618122978e-06, "loss": 0.0023, "step": 364030 }, { "epoch": 141.37, "learning_rate": 1.150032362459547e-06, "loss": 0.0302, "step": 364040 }, { "epoch": 141.38, "learning_rate": 1.1495145631067962e-06, "loss": 0.0007, "step": 364050 }, { "epoch": 141.38, "learning_rate": 1.1489967637540454e-06, "loss": 0.0091, "step": 364060 }, { "epoch": 141.39, "learning_rate": 1.1484789644012946e-06, "loss": 0.0033, "step": 364070 }, { "epoch": 141.39, "learning_rate": 1.1479611650485438e-06, "loss": 0.0011, "step": 364080 }, { "epoch": 141.39, "learning_rate": 1.147443365695793e-06, "loss": 0.0003, "step": 364090 }, { "epoch": 141.4, "learning_rate": 1.1469255663430422e-06, "loss": 0.135, "step": 364100 }, { "epoch": 141.4, "learning_rate": 1.1464077669902914e-06, "loss": 0.0832, "step": 364110 }, { "epoch": 141.41, "learning_rate": 1.1458899676375406e-06, "loss": 0.009, "step": 364120 }, { "epoch": 141.41, "learning_rate": 1.1453721682847898e-06, "loss": 0.0401, "step": 364130 }, { "epoch": 141.41, "learning_rate": 1.144854368932039e-06, "loss": 0.0028, "step": 364140 }, { "epoch": 141.42, "learning_rate": 1.1443365695792881e-06, "loss": 0.0281, "step": 364150 }, { "epoch": 141.42, "learning_rate": 1.1438187702265373e-06, "loss": 0.0263, "step": 364160 }, { "epoch": 141.43, "learning_rate": 1.1433009708737865e-06, "loss": 0.1184, "step": 364170 }, { "epoch": 141.43, "learning_rate": 1.1427831715210357e-06, "loss": 0.0205, "step": 364180 }, { "epoch": 141.43, "learning_rate": 1.142265372168285e-06, "loss": 0.0296, "step": 364190 }, { "epoch": 141.44, "learning_rate": 1.1417475728155341e-06, "loss": 0.0001, "step": 364200 }, { "epoch": 141.44, "learning_rate": 1.1412297734627833e-06, "loss": 0.1048, "step": 364210 }, { "epoch": 141.44, "learning_rate": 1.1407119741100323e-06, "loss": 0.0939, "step": 364220 }, { "epoch": 141.45, "learning_rate": 1.1401941747572815e-06, "loss": 0.0512, "step": 364230 }, { "epoch": 141.45, "learning_rate": 1.1396763754045307e-06, "loss": 0.0359, "step": 364240 }, { "epoch": 141.46, "learning_rate": 1.13915857605178e-06, "loss": 0.0035, "step": 364250 }, { "epoch": 141.46, "learning_rate": 1.1386407766990293e-06, "loss": 0.0005, "step": 364260 }, { "epoch": 141.46, "learning_rate": 1.1381229773462785e-06, "loss": 0.0001, "step": 364270 }, { "epoch": 141.47, "learning_rate": 1.1376051779935277e-06, "loss": 0.0245, "step": 364280 }, { "epoch": 141.47, "learning_rate": 1.1370873786407769e-06, "loss": 0.0764, "step": 364290 }, { "epoch": 141.48, "learning_rate": 1.136569579288026e-06, "loss": 0.0912, "step": 364300 }, { "epoch": 141.48, "learning_rate": 1.1360517799352753e-06, "loss": 0.1404, "step": 364310 }, { "epoch": 141.48, "learning_rate": 1.1355339805825245e-06, "loss": 0.0004, "step": 364320 }, { "epoch": 141.49, "learning_rate": 1.1350161812297736e-06, "loss": 0.0198, "step": 364330 }, { "epoch": 141.49, "learning_rate": 1.1344983818770228e-06, "loss": 0.0856, "step": 364340 }, { "epoch": 141.5, "learning_rate": 1.133980582524272e-06, "loss": 0.0074, "step": 364350 }, { "epoch": 141.5, "learning_rate": 1.1334627831715212e-06, "loss": 0.0812, "step": 364360 }, { "epoch": 141.5, "learning_rate": 1.1329449838187704e-06, "loss": 0.0009, "step": 364370 }, { "epoch": 141.51, "learning_rate": 1.1324271844660194e-06, "loss": 0.035, "step": 364380 }, { "epoch": 141.51, "learning_rate": 1.1319093851132686e-06, "loss": 0.0003, "step": 364390 }, { "epoch": 141.51, "learning_rate": 1.1313915857605178e-06, "loss": 0.0268, "step": 364400 }, { "epoch": 141.52, "learning_rate": 1.130873786407767e-06, "loss": 0.0897, "step": 364410 }, { "epoch": 141.52, "learning_rate": 1.1303559870550162e-06, "loss": 0.0007, "step": 364420 }, { "epoch": 141.53, "learning_rate": 1.1298381877022654e-06, "loss": 0.1164, "step": 364430 }, { "epoch": 141.53, "learning_rate": 1.1293203883495146e-06, "loss": 0.0267, "step": 364440 }, { "epoch": 141.53, "learning_rate": 1.1288025889967638e-06, "loss": 0.0092, "step": 364450 }, { "epoch": 141.54, "learning_rate": 1.128284789644013e-06, "loss": 0.1292, "step": 364460 }, { "epoch": 141.54, "learning_rate": 1.1277669902912622e-06, "loss": 0.0776, "step": 364470 }, { "epoch": 141.55, "learning_rate": 1.1272491909385114e-06, "loss": 0.0579, "step": 364480 }, { "epoch": 141.55, "learning_rate": 1.1267313915857605e-06, "loss": 0.0452, "step": 364490 }, { "epoch": 141.55, "learning_rate": 1.1262135922330097e-06, "loss": 0.1305, "step": 364500 }, { "epoch": 141.56, "learning_rate": 1.125695792880259e-06, "loss": 0.0124, "step": 364510 }, { "epoch": 141.56, "learning_rate": 1.1251779935275081e-06, "loss": 0.0775, "step": 364520 }, { "epoch": 141.57, "learning_rate": 1.1246601941747575e-06, "loss": 0.1315, "step": 364530 }, { "epoch": 141.57, "learning_rate": 1.1241423948220067e-06, "loss": 0.0112, "step": 364540 }, { "epoch": 141.57, "learning_rate": 1.1236245954692557e-06, "loss": 0.0089, "step": 364550 }, { "epoch": 141.58, "learning_rate": 1.123106796116505e-06, "loss": 0.0145, "step": 364560 }, { "epoch": 141.58, "learning_rate": 1.122588996763754e-06, "loss": 0.04, "step": 364570 }, { "epoch": 141.58, "learning_rate": 1.1220711974110033e-06, "loss": 0.0512, "step": 364580 }, { "epoch": 141.59, "learning_rate": 1.1215533980582525e-06, "loss": 0.1498, "step": 364590 }, { "epoch": 141.59, "learning_rate": 1.1210355987055017e-06, "loss": 0.0307, "step": 364600 }, { "epoch": 141.6, "learning_rate": 1.1205177993527509e-06, "loss": 0.009, "step": 364610 }, { "epoch": 141.6, "learning_rate": 1.12e-06, "loss": 0.1615, "step": 364620 }, { "epoch": 141.6, "learning_rate": 1.1194822006472493e-06, "loss": 0.026, "step": 364630 }, { "epoch": 141.61, "learning_rate": 1.1189644012944985e-06, "loss": 0.0021, "step": 364640 }, { "epoch": 141.61, "learning_rate": 1.1184466019417477e-06, "loss": 0.0186, "step": 364650 }, { "epoch": 141.62, "learning_rate": 1.1179288025889969e-06, "loss": 0.0427, "step": 364660 }, { "epoch": 141.62, "learning_rate": 1.117411003236246e-06, "loss": 0.0285, "step": 364670 }, { "epoch": 141.62, "learning_rate": 1.1168932038834952e-06, "loss": 0.0473, "step": 364680 }, { "epoch": 141.63, "learning_rate": 1.1163754045307444e-06, "loss": 0.0169, "step": 364690 }, { "epoch": 141.63, "learning_rate": 1.1158576051779936e-06, "loss": 0.0176, "step": 364700 }, { "epoch": 141.63, "learning_rate": 1.1153398058252428e-06, "loss": 0.051, "step": 364710 }, { "epoch": 141.64, "learning_rate": 1.114822006472492e-06, "loss": 0.0591, "step": 364720 }, { "epoch": 141.64, "learning_rate": 1.1143042071197412e-06, "loss": 0.0192, "step": 364730 }, { "epoch": 141.65, "learning_rate": 1.1137864077669904e-06, "loss": 0.0561, "step": 364740 }, { "epoch": 141.65, "learning_rate": 1.1132686084142396e-06, "loss": 0.1368, "step": 364750 }, { "epoch": 141.65, "learning_rate": 1.1127508090614888e-06, "loss": 0.0396, "step": 364760 }, { "epoch": 141.66, "learning_rate": 1.112233009708738e-06, "loss": 0.0716, "step": 364770 }, { "epoch": 141.66, "learning_rate": 1.1117152103559872e-06, "loss": 0.0536, "step": 364780 }, { "epoch": 141.67, "learning_rate": 1.1111974110032362e-06, "loss": 0.0308, "step": 364790 }, { "epoch": 141.67, "learning_rate": 1.1106796116504854e-06, "loss": 0.0995, "step": 364800 }, { "epoch": 141.67, "learning_rate": 1.1101618122977348e-06, "loss": 0.0494, "step": 364810 }, { "epoch": 141.68, "learning_rate": 1.109644012944984e-06, "loss": 0.0559, "step": 364820 }, { "epoch": 141.68, "learning_rate": 1.1091262135922332e-06, "loss": 0.0409, "step": 364830 }, { "epoch": 141.69, "learning_rate": 1.1086084142394824e-06, "loss": 0.1285, "step": 364840 }, { "epoch": 141.69, "learning_rate": 1.1080906148867316e-06, "loss": 0.002, "step": 364850 }, { "epoch": 141.69, "learning_rate": 1.1075728155339807e-06, "loss": 0.0509, "step": 364860 }, { "epoch": 141.7, "learning_rate": 1.10705501618123e-06, "loss": 0.03, "step": 364870 }, { "epoch": 141.7, "learning_rate": 1.1065372168284791e-06, "loss": 0.001, "step": 364880 }, { "epoch": 141.7, "learning_rate": 1.1060194174757283e-06, "loss": 0.0906, "step": 364890 }, { "epoch": 141.71, "learning_rate": 1.1055016181229775e-06, "loss": 0.0805, "step": 364900 }, { "epoch": 141.71, "learning_rate": 1.1049838187702267e-06, "loss": 0.0004, "step": 364910 }, { "epoch": 141.72, "learning_rate": 1.104466019417476e-06, "loss": 0.0466, "step": 364920 }, { "epoch": 141.72, "learning_rate": 1.1039482200647251e-06, "loss": 0.0839, "step": 364930 }, { "epoch": 141.72, "learning_rate": 1.1034304207119743e-06, "loss": 0.1066, "step": 364940 }, { "epoch": 141.73, "learning_rate": 1.1029126213592235e-06, "loss": 0.1243, "step": 364950 }, { "epoch": 141.73, "learning_rate": 1.1023948220064725e-06, "loss": 0.0001, "step": 364960 }, { "epoch": 141.74, "learning_rate": 1.1018770226537217e-06, "loss": 0.0278, "step": 364970 }, { "epoch": 141.74, "learning_rate": 1.1013592233009709e-06, "loss": 0.0587, "step": 364980 }, { "epoch": 141.74, "learning_rate": 1.10084142394822e-06, "loss": 0.1067, "step": 364990 }, { "epoch": 141.75, "learning_rate": 1.1003236245954693e-06, "loss": 0.0716, "step": 365000 }, { "epoch": 141.75, "learning_rate": 1.0998058252427185e-06, "loss": 0.0093, "step": 365010 }, { "epoch": 141.76, "learning_rate": 1.0992880258899676e-06, "loss": 0.0167, "step": 365020 }, { "epoch": 141.76, "learning_rate": 1.0987702265372168e-06, "loss": 0.0092, "step": 365030 }, { "epoch": 141.76, "learning_rate": 1.098252427184466e-06, "loss": 0.1004, "step": 365040 }, { "epoch": 141.77, "learning_rate": 1.0977346278317152e-06, "loss": 0.0419, "step": 365050 }, { "epoch": 141.77, "learning_rate": 1.0972168284789644e-06, "loss": 0.0401, "step": 365060 }, { "epoch": 141.77, "learning_rate": 1.0966990291262136e-06, "loss": 0.0073, "step": 365070 }, { "epoch": 141.78, "learning_rate": 1.0961812297734628e-06, "loss": 0.0249, "step": 365080 }, { "epoch": 141.78, "learning_rate": 1.0956634304207122e-06, "loss": 0.0642, "step": 365090 }, { "epoch": 141.79, "learning_rate": 1.0951456310679614e-06, "loss": 0.0862, "step": 365100 }, { "epoch": 141.79, "learning_rate": 1.0946278317152106e-06, "loss": 0.0357, "step": 365110 }, { "epoch": 141.79, "learning_rate": 1.0941100323624596e-06, "loss": 0.0002, "step": 365120 }, { "epoch": 141.8, "learning_rate": 1.0935922330097088e-06, "loss": 0.0406, "step": 365130 }, { "epoch": 141.8, "learning_rate": 1.093074433656958e-06, "loss": 0.0232, "step": 365140 }, { "epoch": 141.81, "learning_rate": 1.0925566343042072e-06, "loss": 0.0087, "step": 365150 }, { "epoch": 141.81, "learning_rate": 1.0920388349514564e-06, "loss": 0.0173, "step": 365160 }, { "epoch": 141.81, "learning_rate": 1.0915210355987056e-06, "loss": 0.0005, "step": 365170 }, { "epoch": 141.82, "learning_rate": 1.0910032362459548e-06, "loss": 0.0201, "step": 365180 }, { "epoch": 141.82, "learning_rate": 1.090485436893204e-06, "loss": 0.0162, "step": 365190 }, { "epoch": 141.83, "learning_rate": 1.0899676375404531e-06, "loss": 0.0006, "step": 365200 }, { "epoch": 141.83, "learning_rate": 1.0894498381877023e-06, "loss": 0.0166, "step": 365210 }, { "epoch": 141.83, "learning_rate": 1.0889320388349515e-06, "loss": 0.0147, "step": 365220 }, { "epoch": 141.84, "learning_rate": 1.0884142394822007e-06, "loss": 0.1097, "step": 365230 }, { "epoch": 141.84, "learning_rate": 1.08789644012945e-06, "loss": 0.0305, "step": 365240 }, { "epoch": 141.84, "learning_rate": 1.0873786407766991e-06, "loss": 0.0107, "step": 365250 }, { "epoch": 141.85, "learning_rate": 1.0868608414239483e-06, "loss": 0.0179, "step": 365260 }, { "epoch": 141.85, "learning_rate": 1.0863430420711975e-06, "loss": 0.0467, "step": 365270 }, { "epoch": 141.86, "learning_rate": 1.0858252427184467e-06, "loss": 0.0001, "step": 365280 }, { "epoch": 141.86, "learning_rate": 1.085307443365696e-06, "loss": 0.002, "step": 365290 }, { "epoch": 141.86, "learning_rate": 1.084789644012945e-06, "loss": 0.0001, "step": 365300 }, { "epoch": 141.87, "learning_rate": 1.0842718446601943e-06, "loss": 0.0097, "step": 365310 }, { "epoch": 141.87, "learning_rate": 1.0837540453074435e-06, "loss": 0.0159, "step": 365320 }, { "epoch": 141.88, "learning_rate": 1.0832362459546927e-06, "loss": 0.0002, "step": 365330 }, { "epoch": 141.88, "learning_rate": 1.0827184466019419e-06, "loss": 0.0008, "step": 365340 }, { "epoch": 141.88, "learning_rate": 1.082200647249191e-06, "loss": 0.0814, "step": 365350 }, { "epoch": 141.89, "learning_rate": 1.08168284789644e-06, "loss": 0.0207, "step": 365360 }, { "epoch": 141.89, "learning_rate": 1.0811650485436895e-06, "loss": 0.0116, "step": 365370 }, { "epoch": 141.9, "learning_rate": 1.0806472491909386e-06, "loss": 0.0216, "step": 365380 }, { "epoch": 141.9, "learning_rate": 1.0801294498381878e-06, "loss": 0.0241, "step": 365390 }, { "epoch": 141.9, "learning_rate": 1.079611650485437e-06, "loss": 0.0068, "step": 365400 }, { "epoch": 141.91, "learning_rate": 1.0790938511326862e-06, "loss": 0.0317, "step": 365410 }, { "epoch": 141.91, "learning_rate": 1.0785760517799354e-06, "loss": 0.0239, "step": 365420 }, { "epoch": 141.91, "learning_rate": 1.0780582524271846e-06, "loss": 0.0089, "step": 365430 }, { "epoch": 141.92, "learning_rate": 1.0775404530744338e-06, "loss": 0.0139, "step": 365440 }, { "epoch": 141.92, "learning_rate": 1.077022653721683e-06, "loss": 0.0099, "step": 365450 }, { "epoch": 141.93, "learning_rate": 1.0765048543689322e-06, "loss": 0.0425, "step": 365460 }, { "epoch": 141.93, "learning_rate": 1.0759870550161814e-06, "loss": 0.0489, "step": 365470 }, { "epoch": 141.93, "learning_rate": 1.0754692556634306e-06, "loss": 0.0659, "step": 365480 }, { "epoch": 141.94, "learning_rate": 1.0749514563106798e-06, "loss": 0.0169, "step": 365490 }, { "epoch": 141.94, "learning_rate": 1.074433656957929e-06, "loss": 0.0096, "step": 365500 }, { "epoch": 141.95, "learning_rate": 1.0739158576051782e-06, "loss": 0.0249, "step": 365510 }, { "epoch": 141.95, "learning_rate": 1.0733980582524274e-06, "loss": 0.0152, "step": 365520 }, { "epoch": 141.95, "learning_rate": 1.0728802588996764e-06, "loss": 0.0188, "step": 365530 }, { "epoch": 141.96, "learning_rate": 1.0723624595469255e-06, "loss": 0.0651, "step": 365540 }, { "epoch": 141.96, "learning_rate": 1.0718446601941747e-06, "loss": 0.023, "step": 365550 }, { "epoch": 141.97, "learning_rate": 1.071326860841424e-06, "loss": 0.0393, "step": 365560 }, { "epoch": 141.97, "learning_rate": 1.0708090614886731e-06, "loss": 0.0842, "step": 365570 }, { "epoch": 141.97, "learning_rate": 1.0702912621359223e-06, "loss": 0.0324, "step": 365580 }, { "epoch": 141.98, "learning_rate": 1.0697734627831715e-06, "loss": 0.0597, "step": 365590 }, { "epoch": 141.98, "learning_rate": 1.0692556634304207e-06, "loss": 0.0232, "step": 365600 }, { "epoch": 141.98, "learning_rate": 1.06873786407767e-06, "loss": 0.0403, "step": 365610 }, { "epoch": 141.99, "learning_rate": 1.068220064724919e-06, "loss": 0.0003, "step": 365620 }, { "epoch": 141.99, "learning_rate": 1.0677022653721683e-06, "loss": 0.0381, "step": 365630 }, { "epoch": 142.0, "learning_rate": 1.0671844660194175e-06, "loss": 0.1106, "step": 365640 }, { "epoch": 142.0, "learning_rate": 1.066666666666667e-06, "loss": 0.0169, "step": 365650 }, { "epoch": 142.0, "eval_accuracy": 0.9526822558459422, "eval_loss": 0.3907836973667145, "eval_runtime": 8.2566, "eval_samples_per_second": 440.255, "eval_steps_per_second": 55.108, "step": 365650 }, { "epoch": 142.0, "learning_rate": 1.066148867313916e-06, "loss": 0.0112, "step": 365660 }, { "epoch": 142.01, "learning_rate": 1.0656310679611653e-06, "loss": 0.0668, "step": 365670 }, { "epoch": 142.01, "learning_rate": 1.0651132686084145e-06, "loss": 0.0004, "step": 365680 }, { "epoch": 142.02, "learning_rate": 1.0645954692556635e-06, "loss": 0.0881, "step": 365690 }, { "epoch": 142.02, "learning_rate": 1.0640776699029127e-06, "loss": 0.077, "step": 365700 }, { "epoch": 142.02, "learning_rate": 1.0635598705501619e-06, "loss": 0.061, "step": 365710 }, { "epoch": 142.03, "learning_rate": 1.063042071197411e-06, "loss": 0.0369, "step": 365720 }, { "epoch": 142.03, "learning_rate": 1.0625242718446602e-06, "loss": 0.0079, "step": 365730 }, { "epoch": 142.03, "learning_rate": 1.0620064724919094e-06, "loss": 0.0186, "step": 365740 }, { "epoch": 142.04, "learning_rate": 1.0614886731391586e-06, "loss": 0.0634, "step": 365750 }, { "epoch": 142.04, "learning_rate": 1.0609708737864078e-06, "loss": 0.1145, "step": 365760 }, { "epoch": 142.05, "learning_rate": 1.060453074433657e-06, "loss": 0.0702, "step": 365770 }, { "epoch": 142.05, "learning_rate": 1.0599352750809062e-06, "loss": 0.0935, "step": 365780 }, { "epoch": 142.05, "learning_rate": 1.0594174757281554e-06, "loss": 0.0536, "step": 365790 }, { "epoch": 142.06, "learning_rate": 1.0588996763754046e-06, "loss": 0.0755, "step": 365800 }, { "epoch": 142.06, "learning_rate": 1.0583818770226538e-06, "loss": 0.0888, "step": 365810 }, { "epoch": 142.07, "learning_rate": 1.057864077669903e-06, "loss": 0.0002, "step": 365820 }, { "epoch": 142.07, "learning_rate": 1.0573462783171522e-06, "loss": 0.0914, "step": 365830 }, { "epoch": 142.07, "learning_rate": 1.0568284789644014e-06, "loss": 0.0109, "step": 365840 }, { "epoch": 142.08, "learning_rate": 1.0563106796116506e-06, "loss": 0.018, "step": 365850 }, { "epoch": 142.08, "learning_rate": 1.0557928802588998e-06, "loss": 0.014, "step": 365860 }, { "epoch": 142.09, "learning_rate": 1.055275080906149e-06, "loss": 0.0299, "step": 365870 }, { "epoch": 142.09, "learning_rate": 1.0547572815533982e-06, "loss": 0.1437, "step": 365880 }, { "epoch": 142.09, "learning_rate": 1.0542394822006474e-06, "loss": 0.0289, "step": 365890 }, { "epoch": 142.1, "learning_rate": 1.0537216828478966e-06, "loss": 0.0551, "step": 365900 }, { "epoch": 142.1, "learning_rate": 1.0532038834951457e-06, "loss": 0.0376, "step": 365910 }, { "epoch": 142.1, "learning_rate": 1.052686084142395e-06, "loss": 0.0247, "step": 365920 }, { "epoch": 142.11, "learning_rate": 1.0521682847896441e-06, "loss": 0.0169, "step": 365930 }, { "epoch": 142.11, "learning_rate": 1.0516504854368933e-06, "loss": 0.0158, "step": 365940 }, { "epoch": 142.12, "learning_rate": 1.0511326860841425e-06, "loss": 0.0811, "step": 365950 }, { "epoch": 142.12, "learning_rate": 1.0506148867313917e-06, "loss": 0.0012, "step": 365960 }, { "epoch": 142.12, "learning_rate": 1.050097087378641e-06, "loss": 0.0226, "step": 365970 }, { "epoch": 142.13, "learning_rate": 1.0495792880258901e-06, "loss": 0.0007, "step": 365980 }, { "epoch": 142.13, "learning_rate": 1.0490614886731393e-06, "loss": 0.0118, "step": 365990 }, { "epoch": 142.14, "learning_rate": 1.0485436893203885e-06, "loss": 0.0424, "step": 366000 }, { "epoch": 142.14, "learning_rate": 1.0480258899676377e-06, "loss": 0.0331, "step": 366010 }, { "epoch": 142.14, "learning_rate": 1.0475080906148869e-06, "loss": 0.0341, "step": 366020 }, { "epoch": 142.15, "learning_rate": 1.046990291262136e-06, "loss": 0.014, "step": 366030 }, { "epoch": 142.15, "learning_rate": 1.0464724919093853e-06, "loss": 0.0245, "step": 366040 }, { "epoch": 142.16, "learning_rate": 1.0459546925566345e-06, "loss": 0.0263, "step": 366050 }, { "epoch": 142.16, "learning_rate": 1.0454368932038837e-06, "loss": 0.0491, "step": 366060 }, { "epoch": 142.16, "learning_rate": 1.0449190938511329e-06, "loss": 0.0307, "step": 366070 }, { "epoch": 142.17, "learning_rate": 1.044401294498382e-06, "loss": 0.0188, "step": 366080 }, { "epoch": 142.17, "learning_rate": 1.0438834951456312e-06, "loss": 0.0005, "step": 366090 }, { "epoch": 142.17, "learning_rate": 1.0433656957928802e-06, "loss": 0.0102, "step": 366100 }, { "epoch": 142.18, "learning_rate": 1.0428478964401294e-06, "loss": 0.0007, "step": 366110 }, { "epoch": 142.18, "learning_rate": 1.0423300970873786e-06, "loss": 0.0008, "step": 366120 }, { "epoch": 142.19, "learning_rate": 1.0418122977346278e-06, "loss": 0.0893, "step": 366130 }, { "epoch": 142.19, "learning_rate": 1.041294498381877e-06, "loss": 0.0001, "step": 366140 }, { "epoch": 142.19, "learning_rate": 1.0407766990291262e-06, "loss": 0.0257, "step": 366150 }, { "epoch": 142.2, "learning_rate": 1.0402588996763754e-06, "loss": 0.0485, "step": 366160 }, { "epoch": 142.2, "learning_rate": 1.0397411003236246e-06, "loss": 0.0576, "step": 366170 }, { "epoch": 142.21, "learning_rate": 1.0392233009708738e-06, "loss": 0.0055, "step": 366180 }, { "epoch": 142.21, "learning_rate": 1.038705501618123e-06, "loss": 0.0097, "step": 366190 }, { "epoch": 142.21, "learning_rate": 1.0381877022653722e-06, "loss": 0.0083, "step": 366200 }, { "epoch": 142.22, "learning_rate": 1.0376699029126216e-06, "loss": 0.0261, "step": 366210 }, { "epoch": 142.22, "learning_rate": 1.0371521035598708e-06, "loss": 0.0183, "step": 366220 }, { "epoch": 142.23, "learning_rate": 1.03663430420712e-06, "loss": 0.0428, "step": 366230 }, { "epoch": 142.23, "learning_rate": 1.0361165048543692e-06, "loss": 0.0008, "step": 366240 }, { "epoch": 142.23, "learning_rate": 1.0355987055016184e-06, "loss": 0.03, "step": 366250 }, { "epoch": 142.24, "learning_rate": 1.0350809061488673e-06, "loss": 0.0102, "step": 366260 }, { "epoch": 142.24, "learning_rate": 1.0345631067961165e-06, "loss": 0.0705, "step": 366270 }, { "epoch": 142.24, "learning_rate": 1.0340453074433657e-06, "loss": 0.0006, "step": 366280 }, { "epoch": 142.25, "learning_rate": 1.033527508090615e-06, "loss": 0.0166, "step": 366290 }, { "epoch": 142.25, "learning_rate": 1.0330097087378641e-06, "loss": 0.0625, "step": 366300 }, { "epoch": 142.26, "learning_rate": 1.0324919093851133e-06, "loss": 0.08, "step": 366310 }, { "epoch": 142.26, "learning_rate": 1.0319741100323625e-06, "loss": 0.0389, "step": 366320 }, { "epoch": 142.26, "learning_rate": 1.0314563106796117e-06, "loss": 0.0031, "step": 366330 }, { "epoch": 142.27, "learning_rate": 1.030938511326861e-06, "loss": 0.0396, "step": 366340 }, { "epoch": 142.27, "learning_rate": 1.03042071197411e-06, "loss": 0.0858, "step": 366350 }, { "epoch": 142.28, "learning_rate": 1.0299029126213593e-06, "loss": 0.0113, "step": 366360 }, { "epoch": 142.28, "learning_rate": 1.0293851132686085e-06, "loss": 0.0103, "step": 366370 }, { "epoch": 142.28, "learning_rate": 1.0288673139158577e-06, "loss": 0.0001, "step": 366380 }, { "epoch": 142.29, "learning_rate": 1.0283495145631069e-06, "loss": 0.0119, "step": 366390 }, { "epoch": 142.29, "learning_rate": 1.027831715210356e-06, "loss": 0.0485, "step": 366400 }, { "epoch": 142.3, "learning_rate": 1.0273139158576053e-06, "loss": 0.0164, "step": 366410 }, { "epoch": 142.3, "learning_rate": 1.0267961165048545e-06, "loss": 0.0099, "step": 366420 }, { "epoch": 142.3, "learning_rate": 1.0262783171521037e-06, "loss": 0.0928, "step": 366430 }, { "epoch": 142.31, "learning_rate": 1.0257605177993528e-06, "loss": 0.0634, "step": 366440 }, { "epoch": 142.31, "learning_rate": 1.025242718446602e-06, "loss": 0.0418, "step": 366450 }, { "epoch": 142.31, "learning_rate": 1.0247249190938512e-06, "loss": 0.1739, "step": 366460 }, { "epoch": 142.32, "learning_rate": 1.0242071197411004e-06, "loss": 0.0033, "step": 366470 }, { "epoch": 142.32, "learning_rate": 1.0236893203883496e-06, "loss": 0.0298, "step": 366480 }, { "epoch": 142.33, "learning_rate": 1.0231715210355988e-06, "loss": 0.0453, "step": 366490 }, { "epoch": 142.33, "learning_rate": 1.022653721682848e-06, "loss": 0.001, "step": 366500 }, { "epoch": 142.33, "learning_rate": 1.0221359223300972e-06, "loss": 0.0105, "step": 366510 }, { "epoch": 142.34, "learning_rate": 1.0216181229773464e-06, "loss": 0.0894, "step": 366520 }, { "epoch": 142.34, "learning_rate": 1.0211003236245956e-06, "loss": 0.0665, "step": 366530 }, { "epoch": 142.35, "learning_rate": 1.0205825242718448e-06, "loss": 0.0577, "step": 366540 }, { "epoch": 142.35, "learning_rate": 1.020064724919094e-06, "loss": 0.0298, "step": 366550 }, { "epoch": 142.35, "learning_rate": 1.0195469255663432e-06, "loss": 0.074, "step": 366560 }, { "epoch": 142.36, "learning_rate": 1.0190291262135924e-06, "loss": 0.0624, "step": 366570 }, { "epoch": 142.36, "learning_rate": 1.0185113268608416e-06, "loss": 0.0986, "step": 366580 }, { "epoch": 142.37, "learning_rate": 1.0179935275080908e-06, "loss": 0.0835, "step": 366590 }, { "epoch": 142.37, "learning_rate": 1.01747572815534e-06, "loss": 0.0009, "step": 366600 }, { "epoch": 142.37, "learning_rate": 1.0169579288025892e-06, "loss": 0.0005, "step": 366610 }, { "epoch": 142.38, "learning_rate": 1.0164401294498383e-06, "loss": 0.0117, "step": 366620 }, { "epoch": 142.38, "learning_rate": 1.0159223300970875e-06, "loss": 0.0545, "step": 366630 }, { "epoch": 142.38, "learning_rate": 1.0154045307443367e-06, "loss": 0.0269, "step": 366640 }, { "epoch": 142.39, "learning_rate": 1.014886731391586e-06, "loss": 0.0001, "step": 366650 }, { "epoch": 142.39, "learning_rate": 1.0143689320388351e-06, "loss": 0.0128, "step": 366660 }, { "epoch": 142.4, "learning_rate": 1.0138511326860841e-06, "loss": 0.1656, "step": 366670 }, { "epoch": 142.4, "learning_rate": 1.0133333333333333e-06, "loss": 0.0672, "step": 366680 }, { "epoch": 142.4, "learning_rate": 1.0128155339805825e-06, "loss": 0.0015, "step": 366690 }, { "epoch": 142.41, "learning_rate": 1.0122977346278317e-06, "loss": 0.0122, "step": 366700 }, { "epoch": 142.41, "learning_rate": 1.0117799352750809e-06, "loss": 0.0062, "step": 366710 }, { "epoch": 142.42, "learning_rate": 1.01126213592233e-06, "loss": 0.0842, "step": 366720 }, { "epoch": 142.42, "learning_rate": 1.0107443365695793e-06, "loss": 0.0174, "step": 366730 }, { "epoch": 142.42, "learning_rate": 1.0102265372168285e-06, "loss": 0.0467, "step": 366740 }, { "epoch": 142.43, "learning_rate": 1.0097087378640777e-06, "loss": 0.0075, "step": 366750 }, { "epoch": 142.43, "learning_rate": 1.0091909385113269e-06, "loss": 0.0778, "step": 366760 }, { "epoch": 142.43, "learning_rate": 1.0086731391585763e-06, "loss": 0.0014, "step": 366770 }, { "epoch": 142.44, "learning_rate": 1.0081553398058255e-06, "loss": 0.0282, "step": 366780 }, { "epoch": 142.44, "learning_rate": 1.0076375404530747e-06, "loss": 0.0003, "step": 366790 }, { "epoch": 142.45, "learning_rate": 1.0071197411003238e-06, "loss": 0.0128, "step": 366800 }, { "epoch": 142.45, "learning_rate": 1.006601941747573e-06, "loss": 0.023, "step": 366810 }, { "epoch": 142.45, "learning_rate": 1.0060841423948222e-06, "loss": 0.0309, "step": 366820 }, { "epoch": 142.46, "learning_rate": 1.0055663430420712e-06, "loss": 0.0145, "step": 366830 }, { "epoch": 142.46, "learning_rate": 1.0050485436893204e-06, "loss": 0.0007, "step": 366840 }, { "epoch": 142.47, "learning_rate": 1.0045307443365696e-06, "loss": 0.0855, "step": 366850 }, { "epoch": 142.47, "learning_rate": 1.0040129449838188e-06, "loss": 0.0113, "step": 366860 }, { "epoch": 142.47, "learning_rate": 1.003495145631068e-06, "loss": 0.0662, "step": 366870 }, { "epoch": 142.48, "learning_rate": 1.0029773462783172e-06, "loss": 0.0307, "step": 366880 }, { "epoch": 142.48, "learning_rate": 1.0024595469255664e-06, "loss": 0.1359, "step": 366890 }, { "epoch": 142.49, "learning_rate": 1.0019417475728156e-06, "loss": 0.0501, "step": 366900 }, { "epoch": 142.49, "learning_rate": 1.0014239482200648e-06, "loss": 0.0173, "step": 366910 }, { "epoch": 142.49, "learning_rate": 1.000906148867314e-06, "loss": 0.0087, "step": 366920 }, { "epoch": 142.5, "learning_rate": 1.0003883495145632e-06, "loss": 0.0012, "step": 366930 }, { "epoch": 142.5, "learning_rate": 9.998705501618124e-07, "loss": 0.0553, "step": 366940 }, { "epoch": 142.5, "learning_rate": 9.993527508090616e-07, "loss": 0.0084, "step": 366950 }, { "epoch": 142.51, "learning_rate": 9.988349514563107e-07, "loss": 0.0123, "step": 366960 }, { "epoch": 142.51, "learning_rate": 9.9831715210356e-07, "loss": 0.0332, "step": 366970 }, { "epoch": 142.52, "learning_rate": 9.977993527508091e-07, "loss": 0.0084, "step": 366980 }, { "epoch": 142.52, "learning_rate": 9.972815533980583e-07, "loss": 0.0608, "step": 366990 }, { "epoch": 142.52, "learning_rate": 9.967637540453075e-07, "loss": 0.0002, "step": 367000 }, { "epoch": 142.53, "learning_rate": 9.962459546925567e-07, "loss": 0.0003, "step": 367010 }, { "epoch": 142.53, "learning_rate": 9.95728155339806e-07, "loss": 0.0818, "step": 367020 }, { "epoch": 142.54, "learning_rate": 9.952103559870551e-07, "loss": 0.0895, "step": 367030 }, { "epoch": 142.54, "learning_rate": 9.946925566343043e-07, "loss": 0.0002, "step": 367040 }, { "epoch": 142.54, "learning_rate": 9.941747572815535e-07, "loss": 0.0227, "step": 367050 }, { "epoch": 142.55, "learning_rate": 9.936569579288027e-07, "loss": 0.02, "step": 367060 }, { "epoch": 142.55, "learning_rate": 9.931391585760519e-07, "loss": 0.0001, "step": 367070 }, { "epoch": 142.56, "learning_rate": 9.92621359223301e-07, "loss": 0.0366, "step": 367080 }, { "epoch": 142.56, "learning_rate": 9.921035598705503e-07, "loss": 0.1088, "step": 367090 }, { "epoch": 142.56, "learning_rate": 9.915857605177995e-07, "loss": 0.002, "step": 367100 }, { "epoch": 142.57, "learning_rate": 9.910679611650487e-07, "loss": 0.0356, "step": 367110 }, { "epoch": 142.57, "learning_rate": 9.905501618122979e-07, "loss": 0.0308, "step": 367120 }, { "epoch": 142.57, "learning_rate": 9.90032362459547e-07, "loss": 0.0006, "step": 367130 }, { "epoch": 142.58, "learning_rate": 9.895145631067962e-07, "loss": 0.0162, "step": 367140 }, { "epoch": 142.58, "learning_rate": 9.889967637540454e-07, "loss": 0.0548, "step": 367150 }, { "epoch": 142.59, "learning_rate": 9.884789644012946e-07, "loss": 0.0373, "step": 367160 }, { "epoch": 142.59, "learning_rate": 9.879611650485438e-07, "loss": 0.0232, "step": 367170 }, { "epoch": 142.59, "learning_rate": 9.87443365695793e-07, "loss": 0.0718, "step": 367180 }, { "epoch": 142.6, "learning_rate": 9.869255663430422e-07, "loss": 0.0853, "step": 367190 }, { "epoch": 142.6, "learning_rate": 9.864077669902914e-07, "loss": 0.0006, "step": 367200 }, { "epoch": 142.61, "learning_rate": 9.858899676375406e-07, "loss": 0.0982, "step": 367210 }, { "epoch": 142.61, "learning_rate": 9.853721682847898e-07, "loss": 0.0845, "step": 367220 }, { "epoch": 142.61, "learning_rate": 9.84854368932039e-07, "loss": 0.0214, "step": 367230 }, { "epoch": 142.62, "learning_rate": 9.84336569579288e-07, "loss": 0.0204, "step": 367240 }, { "epoch": 142.62, "learning_rate": 9.838187702265372e-07, "loss": 0.0074, "step": 367250 }, { "epoch": 142.63, "learning_rate": 9.833009708737864e-07, "loss": 0.0965, "step": 367260 }, { "epoch": 142.63, "learning_rate": 9.827831715210356e-07, "loss": 0.0411, "step": 367270 }, { "epoch": 142.63, "learning_rate": 9.822653721682848e-07, "loss": 0.0467, "step": 367280 }, { "epoch": 142.64, "learning_rate": 9.81747572815534e-07, "loss": 0.054, "step": 367290 }, { "epoch": 142.64, "learning_rate": 9.812297734627832e-07, "loss": 0.0795, "step": 367300 }, { "epoch": 142.64, "learning_rate": 9.807119741100323e-07, "loss": 0.0087, "step": 367310 }, { "epoch": 142.65, "learning_rate": 9.801941747572815e-07, "loss": 0.0215, "step": 367320 }, { "epoch": 142.65, "learning_rate": 9.79676375404531e-07, "loss": 0.0182, "step": 367330 }, { "epoch": 142.66, "learning_rate": 9.791585760517801e-07, "loss": 0.102, "step": 367340 }, { "epoch": 142.66, "learning_rate": 9.786407766990293e-07, "loss": 0.0095, "step": 367350 }, { "epoch": 142.66, "learning_rate": 9.781229773462785e-07, "loss": 0.0585, "step": 367360 }, { "epoch": 142.67, "learning_rate": 9.776051779935277e-07, "loss": 0.0411, "step": 367370 }, { "epoch": 142.67, "learning_rate": 9.77087378640777e-07, "loss": 0.0451, "step": 367380 }, { "epoch": 142.68, "learning_rate": 9.765695792880261e-07, "loss": 0.0198, "step": 367390 }, { "epoch": 142.68, "learning_rate": 9.76051779935275e-07, "loss": 0.0508, "step": 367400 }, { "epoch": 142.68, "learning_rate": 9.755339805825243e-07, "loss": 0.1014, "step": 367410 }, { "epoch": 142.69, "learning_rate": 9.750161812297735e-07, "loss": 0.0014, "step": 367420 }, { "epoch": 142.69, "learning_rate": 9.744983818770227e-07, "loss": 0.0006, "step": 367430 }, { "epoch": 142.7, "learning_rate": 9.739805825242719e-07, "loss": 0.0141, "step": 367440 }, { "epoch": 142.7, "learning_rate": 9.73462783171521e-07, "loss": 0.1458, "step": 367450 }, { "epoch": 142.7, "learning_rate": 9.729449838187703e-07, "loss": 0.0915, "step": 367460 }, { "epoch": 142.71, "learning_rate": 9.724271844660195e-07, "loss": 0.0604, "step": 367470 }, { "epoch": 142.71, "learning_rate": 9.719093851132687e-07, "loss": 0.0106, "step": 367480 }, { "epoch": 142.71, "learning_rate": 9.713915857605178e-07, "loss": 0.0247, "step": 367490 }, { "epoch": 142.72, "learning_rate": 9.70873786407767e-07, "loss": 0.0443, "step": 367500 }, { "epoch": 142.72, "learning_rate": 9.703559870550162e-07, "loss": 0.05, "step": 367510 }, { "epoch": 142.73, "learning_rate": 9.698381877022654e-07, "loss": 0.0994, "step": 367520 }, { "epoch": 142.73, "learning_rate": 9.693203883495146e-07, "loss": 0.0286, "step": 367530 }, { "epoch": 142.73, "learning_rate": 9.688025889967638e-07, "loss": 0.0123, "step": 367540 }, { "epoch": 142.74, "learning_rate": 9.68284789644013e-07, "loss": 0.0093, "step": 367550 }, { "epoch": 142.74, "learning_rate": 9.677669902912622e-07, "loss": 0.0095, "step": 367560 }, { "epoch": 142.75, "learning_rate": 9.672491909385114e-07, "loss": 0.0548, "step": 367570 }, { "epoch": 142.75, "learning_rate": 9.667313915857606e-07, "loss": 0.0957, "step": 367580 }, { "epoch": 142.75, "learning_rate": 9.662135922330098e-07, "loss": 0.0368, "step": 367590 }, { "epoch": 142.76, "learning_rate": 9.65695792880259e-07, "loss": 0.001, "step": 367600 }, { "epoch": 142.76, "learning_rate": 9.651779935275082e-07, "loss": 0.0719, "step": 367610 }, { "epoch": 142.77, "learning_rate": 9.646601941747574e-07, "loss": 0.087, "step": 367620 }, { "epoch": 142.77, "learning_rate": 9.641423948220066e-07, "loss": 0.0085, "step": 367630 }, { "epoch": 142.77, "learning_rate": 9.636245954692558e-07, "loss": 0.1278, "step": 367640 }, { "epoch": 142.78, "learning_rate": 9.63106796116505e-07, "loss": 0.0048, "step": 367650 }, { "epoch": 142.78, "learning_rate": 9.625889967637542e-07, "loss": 0.0007, "step": 367660 }, { "epoch": 142.78, "learning_rate": 9.620711974110033e-07, "loss": 0.0026, "step": 367670 }, { "epoch": 142.79, "learning_rate": 9.615533980582525e-07, "loss": 0.0743, "step": 367680 }, { "epoch": 142.79, "learning_rate": 9.610355987055017e-07, "loss": 0.0105, "step": 367690 }, { "epoch": 142.8, "learning_rate": 9.60517799352751e-07, "loss": 0.0147, "step": 367700 }, { "epoch": 142.8, "learning_rate": 9.600000000000001e-07, "loss": 0.0147, "step": 367710 }, { "epoch": 142.8, "learning_rate": 9.594822006472493e-07, "loss": 0.0436, "step": 367720 }, { "epoch": 142.81, "learning_rate": 9.589644012944985e-07, "loss": 0.0469, "step": 367730 }, { "epoch": 142.81, "learning_rate": 9.584466019417477e-07, "loss": 0.0713, "step": 367740 }, { "epoch": 142.82, "learning_rate": 9.57928802588997e-07, "loss": 0.0493, "step": 367750 }, { "epoch": 142.82, "learning_rate": 9.57411003236246e-07, "loss": 0.0259, "step": 367760 }, { "epoch": 142.82, "learning_rate": 9.568932038834953e-07, "loss": 0.0086, "step": 367770 }, { "epoch": 142.83, "learning_rate": 9.563754045307445e-07, "loss": 0.0773, "step": 367780 }, { "epoch": 142.83, "learning_rate": 9.558576051779937e-07, "loss": 0.0134, "step": 367790 }, { "epoch": 142.83, "learning_rate": 9.553398058252429e-07, "loss": 0.0002, "step": 367800 }, { "epoch": 142.84, "learning_rate": 9.548220064724919e-07, "loss": 0.0001, "step": 367810 }, { "epoch": 142.84, "learning_rate": 9.54304207119741e-07, "loss": 0.0278, "step": 367820 }, { "epoch": 142.85, "learning_rate": 9.537864077669902e-07, "loss": 0.0001, "step": 367830 }, { "epoch": 142.85, "learning_rate": 9.532686084142395e-07, "loss": 0.0419, "step": 367840 }, { "epoch": 142.85, "learning_rate": 9.527508090614887e-07, "loss": 0.0078, "step": 367850 }, { "epoch": 142.86, "learning_rate": 9.522330097087379e-07, "loss": 0.0161, "step": 367860 }, { "epoch": 142.86, "learning_rate": 9.51715210355987e-07, "loss": 0.0268, "step": 367870 }, { "epoch": 142.87, "learning_rate": 9.511974110032362e-07, "loss": 0.0172, "step": 367880 }, { "epoch": 142.87, "learning_rate": 9.506796116504854e-07, "loss": 0.0748, "step": 367890 }, { "epoch": 142.87, "learning_rate": 9.501618122977348e-07, "loss": 0.0152, "step": 367900 }, { "epoch": 142.88, "learning_rate": 9.496440129449839e-07, "loss": 0.035, "step": 367910 }, { "epoch": 142.88, "learning_rate": 9.491262135922331e-07, "loss": 0.0415, "step": 367920 }, { "epoch": 142.89, "learning_rate": 9.486084142394823e-07, "loss": 0.1204, "step": 367930 }, { "epoch": 142.89, "learning_rate": 9.480906148867315e-07, "loss": 0.0005, "step": 367940 }, { "epoch": 142.89, "learning_rate": 9.475728155339807e-07, "loss": 0.0537, "step": 367950 }, { "epoch": 142.9, "learning_rate": 9.470550161812299e-07, "loss": 0.0005, "step": 367960 }, { "epoch": 142.9, "learning_rate": 9.465372168284791e-07, "loss": 0.0136, "step": 367970 }, { "epoch": 142.9, "learning_rate": 9.460194174757283e-07, "loss": 0.0001, "step": 367980 }, { "epoch": 142.91, "learning_rate": 9.455016181229775e-07, "loss": 0.0565, "step": 367990 }, { "epoch": 142.91, "learning_rate": 9.449838187702267e-07, "loss": 0.0004, "step": 368000 }, { "epoch": 142.92, "learning_rate": 9.444660194174759e-07, "loss": 0.1031, "step": 368010 }, { "epoch": 142.92, "learning_rate": 9.43948220064725e-07, "loss": 0.0015, "step": 368020 }, { "epoch": 142.92, "learning_rate": 9.434304207119741e-07, "loss": 0.0002, "step": 368030 }, { "epoch": 142.93, "learning_rate": 9.429126213592233e-07, "loss": 0.0108, "step": 368040 }, { "epoch": 142.93, "learning_rate": 9.423948220064725e-07, "loss": 0.0089, "step": 368050 }, { "epoch": 142.94, "learning_rate": 9.418770226537217e-07, "loss": 0.0036, "step": 368060 }, { "epoch": 142.94, "learning_rate": 9.413592233009709e-07, "loss": 0.0098, "step": 368070 }, { "epoch": 142.94, "learning_rate": 9.408414239482201e-07, "loss": 0.0122, "step": 368080 }, { "epoch": 142.95, "learning_rate": 9.403236245954693e-07, "loss": 0.0003, "step": 368090 }, { "epoch": 142.95, "learning_rate": 9.398058252427185e-07, "loss": 0.0292, "step": 368100 }, { "epoch": 142.96, "learning_rate": 9.392880258899677e-07, "loss": 0.0333, "step": 368110 }, { "epoch": 142.96, "learning_rate": 9.387702265372169e-07, "loss": 0.0223, "step": 368120 }, { "epoch": 142.96, "learning_rate": 9.382524271844661e-07, "loss": 0.0264, "step": 368130 }, { "epoch": 142.97, "learning_rate": 9.377346278317153e-07, "loss": 0.1621, "step": 368140 }, { "epoch": 142.97, "learning_rate": 9.372168284789644e-07, "loss": 0.0086, "step": 368150 }, { "epoch": 142.97, "learning_rate": 9.366990291262136e-07, "loss": 0.0241, "step": 368160 }, { "epoch": 142.98, "learning_rate": 9.361812297734628e-07, "loss": 0.0832, "step": 368170 }, { "epoch": 142.98, "learning_rate": 9.356634304207122e-07, "loss": 0.0538, "step": 368180 }, { "epoch": 142.99, "learning_rate": 9.351456310679613e-07, "loss": 0.003, "step": 368190 }, { "epoch": 142.99, "learning_rate": 9.346278317152104e-07, "loss": 0.072, "step": 368200 }, { "epoch": 142.99, "learning_rate": 9.341100323624596e-07, "loss": 0.0783, "step": 368210 }, { "epoch": 143.0, "learning_rate": 9.335922330097088e-07, "loss": 0.0132, "step": 368220 }, { "epoch": 143.0, "eval_accuracy": 0.9526822558459422, "eval_loss": 0.39228132367134094, "eval_runtime": 8.1822, "eval_samples_per_second": 444.256, "eval_steps_per_second": 55.608, "step": 368225 }, { "epoch": 143.0, "learning_rate": 9.33074433656958e-07, "loss": 0.0849, "step": 368230 }, { "epoch": 143.01, "learning_rate": 9.325566343042072e-07, "loss": 0.0022, "step": 368240 }, { "epoch": 143.01, "learning_rate": 9.320388349514564e-07, "loss": 0.178, "step": 368250 }, { "epoch": 143.01, "learning_rate": 9.315210355987056e-07, "loss": 0.0013, "step": 368260 }, { "epoch": 143.02, "learning_rate": 9.310032362459548e-07, "loss": 0.0001, "step": 368270 }, { "epoch": 143.02, "learning_rate": 9.30485436893204e-07, "loss": 0.0168, "step": 368280 }, { "epoch": 143.03, "learning_rate": 9.299676375404532e-07, "loss": 0.0599, "step": 368290 }, { "epoch": 143.03, "learning_rate": 9.294498381877024e-07, "loss": 0.0266, "step": 368300 }, { "epoch": 143.03, "learning_rate": 9.289320388349515e-07, "loss": 0.1643, "step": 368310 }, { "epoch": 143.04, "learning_rate": 9.284142394822007e-07, "loss": 0.0003, "step": 368320 }, { "epoch": 143.04, "learning_rate": 9.278964401294499e-07, "loss": 0.1141, "step": 368330 }, { "epoch": 143.04, "learning_rate": 9.273786407766991e-07, "loss": 0.0286, "step": 368340 }, { "epoch": 143.05, "learning_rate": 9.268608414239483e-07, "loss": 0.0178, "step": 368350 }, { "epoch": 143.05, "learning_rate": 9.263430420711975e-07, "loss": 0.0728, "step": 368360 }, { "epoch": 143.06, "learning_rate": 9.258252427184466e-07, "loss": 0.0916, "step": 368370 }, { "epoch": 143.06, "learning_rate": 9.253074433656958e-07, "loss": 0.1091, "step": 368380 }, { "epoch": 143.06, "learning_rate": 9.24789644012945e-07, "loss": 0.0403, "step": 368390 }, { "epoch": 143.07, "learning_rate": 9.242718446601942e-07, "loss": 0.002, "step": 368400 }, { "epoch": 143.07, "learning_rate": 9.237540453074434e-07, "loss": 0.0026, "step": 368410 }, { "epoch": 143.08, "learning_rate": 9.232362459546926e-07, "loss": 0.1103, "step": 368420 }, { "epoch": 143.08, "learning_rate": 9.227184466019418e-07, "loss": 0.0004, "step": 368430 }, { "epoch": 143.08, "learning_rate": 9.222006472491909e-07, "loss": 0.0607, "step": 368440 }, { "epoch": 143.09, "learning_rate": 9.216828478964401e-07, "loss": 0.0093, "step": 368450 }, { "epoch": 143.09, "learning_rate": 9.211650485436895e-07, "loss": 0.0062, "step": 368460 }, { "epoch": 143.1, "learning_rate": 9.206472491909387e-07, "loss": 0.0864, "step": 368470 }, { "epoch": 143.1, "learning_rate": 9.201294498381878e-07, "loss": 0.027, "step": 368480 }, { "epoch": 143.1, "learning_rate": 9.19611650485437e-07, "loss": 0.0233, "step": 368490 }, { "epoch": 143.11, "learning_rate": 9.190938511326862e-07, "loss": 0.0207, "step": 368500 }, { "epoch": 143.11, "learning_rate": 9.185760517799354e-07, "loss": 0.0005, "step": 368510 }, { "epoch": 143.11, "learning_rate": 9.180582524271846e-07, "loss": 0.0682, "step": 368520 }, { "epoch": 143.12, "learning_rate": 9.175404530744338e-07, "loss": 0.0152, "step": 368530 }, { "epoch": 143.12, "learning_rate": 9.17022653721683e-07, "loss": 0.0147, "step": 368540 }, { "epoch": 143.13, "learning_rate": 9.165048543689321e-07, "loss": 0.0567, "step": 368550 }, { "epoch": 143.13, "learning_rate": 9.159870550161813e-07, "loss": 0.0173, "step": 368560 }, { "epoch": 143.13, "learning_rate": 9.154692556634305e-07, "loss": 0.0207, "step": 368570 }, { "epoch": 143.14, "learning_rate": 9.149514563106797e-07, "loss": 0.0529, "step": 368580 }, { "epoch": 143.14, "learning_rate": 9.144336569579289e-07, "loss": 0.0013, "step": 368590 }, { "epoch": 143.15, "learning_rate": 9.13915857605178e-07, "loss": 0.0389, "step": 368600 }, { "epoch": 143.15, "learning_rate": 9.133980582524272e-07, "loss": 0.0107, "step": 368610 }, { "epoch": 143.15, "learning_rate": 9.128802588996764e-07, "loss": 0.0143, "step": 368620 }, { "epoch": 143.16, "learning_rate": 9.123624595469256e-07, "loss": 0.0114, "step": 368630 }, { "epoch": 143.16, "learning_rate": 9.118446601941748e-07, "loss": 0.0182, "step": 368640 }, { "epoch": 143.17, "learning_rate": 9.11326860841424e-07, "loss": 0.0256, "step": 368650 }, { "epoch": 143.17, "learning_rate": 9.108090614886732e-07, "loss": 0.0143, "step": 368660 }, { "epoch": 143.17, "learning_rate": 9.102912621359224e-07, "loss": 0.0516, "step": 368670 }, { "epoch": 143.18, "learning_rate": 9.097734627831716e-07, "loss": 0.0236, "step": 368680 }, { "epoch": 143.18, "learning_rate": 9.092556634304208e-07, "loss": 0.0005, "step": 368690 }, { "epoch": 143.18, "learning_rate": 9.0873786407767e-07, "loss": 0.0497, "step": 368700 }, { "epoch": 143.19, "learning_rate": 9.082200647249192e-07, "loss": 0.0105, "step": 368710 }, { "epoch": 143.19, "learning_rate": 9.077022653721682e-07, "loss": 0.0295, "step": 368720 }, { "epoch": 143.2, "learning_rate": 9.071844660194174e-07, "loss": 0.0003, "step": 368730 }, { "epoch": 143.2, "learning_rate": 9.066666666666668e-07, "loss": 0.086, "step": 368740 }, { "epoch": 143.2, "learning_rate": 9.06148867313916e-07, "loss": 0.0023, "step": 368750 }, { "epoch": 143.21, "learning_rate": 9.056310679611651e-07, "loss": 0.0717, "step": 368760 }, { "epoch": 143.21, "learning_rate": 9.051132686084143e-07, "loss": 0.047, "step": 368770 }, { "epoch": 143.22, "learning_rate": 9.045954692556635e-07, "loss": 0.0005, "step": 368780 }, { "epoch": 143.22, "learning_rate": 9.040776699029127e-07, "loss": 0.0248, "step": 368790 }, { "epoch": 143.22, "learning_rate": 9.035598705501619e-07, "loss": 0.065, "step": 368800 }, { "epoch": 143.23, "learning_rate": 9.030420711974111e-07, "loss": 0.1755, "step": 368810 }, { "epoch": 143.23, "learning_rate": 9.025242718446603e-07, "loss": 0.0223, "step": 368820 }, { "epoch": 143.23, "learning_rate": 9.020064724919095e-07, "loss": 0.0013, "step": 368830 }, { "epoch": 143.24, "learning_rate": 9.014886731391587e-07, "loss": 0.1073, "step": 368840 }, { "epoch": 143.24, "learning_rate": 9.009708737864079e-07, "loss": 0.0045, "step": 368850 }, { "epoch": 143.25, "learning_rate": 9.004530744336571e-07, "loss": 0.0001, "step": 368860 }, { "epoch": 143.25, "learning_rate": 8.999352750809063e-07, "loss": 0.0723, "step": 368870 }, { "epoch": 143.25, "learning_rate": 8.994174757281555e-07, "loss": 0.1596, "step": 368880 }, { "epoch": 143.26, "learning_rate": 8.988996763754046e-07, "loss": 0.0445, "step": 368890 }, { "epoch": 143.26, "learning_rate": 8.983818770226537e-07, "loss": 0.0091, "step": 368900 }, { "epoch": 143.27, "learning_rate": 8.978640776699029e-07, "loss": 0.1099, "step": 368910 }, { "epoch": 143.27, "learning_rate": 8.973462783171521e-07, "loss": 0.0507, "step": 368920 }, { "epoch": 143.27, "learning_rate": 8.968284789644013e-07, "loss": 0.0092, "step": 368930 }, { "epoch": 143.28, "learning_rate": 8.963106796116505e-07, "loss": 0.0203, "step": 368940 }, { "epoch": 143.28, "learning_rate": 8.957928802588997e-07, "loss": 0.0096, "step": 368950 }, { "epoch": 143.29, "learning_rate": 8.952750809061489e-07, "loss": 0.0134, "step": 368960 }, { "epoch": 143.29, "learning_rate": 8.947572815533981e-07, "loss": 0.007, "step": 368970 }, { "epoch": 143.29, "learning_rate": 8.942394822006473e-07, "loss": 0.0298, "step": 368980 }, { "epoch": 143.3, "learning_rate": 8.937216828478965e-07, "loss": 0.0149, "step": 368990 }, { "epoch": 143.3, "learning_rate": 8.932038834951457e-07, "loss": 0.0153, "step": 369000 }, { "epoch": 143.3, "learning_rate": 8.926860841423948e-07, "loss": 0.0003, "step": 369010 }, { "epoch": 143.31, "learning_rate": 8.921682847896442e-07, "loss": 0.0864, "step": 369020 }, { "epoch": 143.31, "learning_rate": 8.916504854368934e-07, "loss": 0.0187, "step": 369030 }, { "epoch": 143.32, "learning_rate": 8.911326860841426e-07, "loss": 0.0278, "step": 369040 }, { "epoch": 143.32, "learning_rate": 8.906148867313917e-07, "loss": 0.0538, "step": 369050 }, { "epoch": 143.32, "learning_rate": 8.900970873786409e-07, "loss": 0.0005, "step": 369060 }, { "epoch": 143.33, "learning_rate": 8.8957928802589e-07, "loss": 0.0243, "step": 369070 }, { "epoch": 143.33, "learning_rate": 8.890614886731392e-07, "loss": 0.0001, "step": 369080 }, { "epoch": 143.34, "learning_rate": 8.885436893203884e-07, "loss": 0.0001, "step": 369090 }, { "epoch": 143.34, "learning_rate": 8.880258899676376e-07, "loss": 0.1447, "step": 369100 }, { "epoch": 143.34, "learning_rate": 8.875080906148868e-07, "loss": 0.0851, "step": 369110 }, { "epoch": 143.35, "learning_rate": 8.86990291262136e-07, "loss": 0.0281, "step": 369120 }, { "epoch": 143.35, "learning_rate": 8.864724919093852e-07, "loss": 0.1553, "step": 369130 }, { "epoch": 143.36, "learning_rate": 8.859546925566344e-07, "loss": 0.0095, "step": 369140 }, { "epoch": 143.36, "learning_rate": 8.854368932038836e-07, "loss": 0.1404, "step": 369150 }, { "epoch": 143.36, "learning_rate": 8.849190938511328e-07, "loss": 0.0002, "step": 369160 }, { "epoch": 143.37, "learning_rate": 8.844012944983819e-07, "loss": 0.0407, "step": 369170 }, { "epoch": 143.37, "learning_rate": 8.838834951456311e-07, "loss": 0.0026, "step": 369180 }, { "epoch": 143.37, "learning_rate": 8.833656957928803e-07, "loss": 0.0156, "step": 369190 }, { "epoch": 143.38, "learning_rate": 8.828478964401295e-07, "loss": 0.0092, "step": 369200 }, { "epoch": 143.38, "learning_rate": 8.823300970873787e-07, "loss": 0.0738, "step": 369210 }, { "epoch": 143.39, "learning_rate": 8.818122977346279e-07, "loss": 0.0705, "step": 369220 }, { "epoch": 143.39, "learning_rate": 8.812944983818771e-07, "loss": 0.1272, "step": 369230 }, { "epoch": 143.39, "learning_rate": 8.807766990291263e-07, "loss": 0.0231, "step": 369240 }, { "epoch": 143.4, "learning_rate": 8.802588996763754e-07, "loss": 0.0089, "step": 369250 }, { "epoch": 143.4, "learning_rate": 8.797411003236246e-07, "loss": 0.0046, "step": 369260 }, { "epoch": 143.41, "learning_rate": 8.792233009708738e-07, "loss": 0.0022, "step": 369270 }, { "epoch": 143.41, "learning_rate": 8.78705501618123e-07, "loss": 0.0002, "step": 369280 }, { "epoch": 143.41, "learning_rate": 8.781877022653721e-07, "loss": 0.0148, "step": 369290 }, { "epoch": 143.42, "learning_rate": 8.776699029126215e-07, "loss": 0.0104, "step": 369300 }, { "epoch": 143.42, "learning_rate": 8.771521035598707e-07, "loss": 0.0305, "step": 369310 }, { "epoch": 143.43, "learning_rate": 8.766343042071199e-07, "loss": 0.0014, "step": 369320 }, { "epoch": 143.43, "learning_rate": 8.76116504854369e-07, "loss": 0.1198, "step": 369330 }, { "epoch": 143.43, "learning_rate": 8.755987055016182e-07, "loss": 0.1378, "step": 369340 }, { "epoch": 143.44, "learning_rate": 8.750809061488674e-07, "loss": 0.1703, "step": 369350 }, { "epoch": 143.44, "learning_rate": 8.745631067961166e-07, "loss": 0.0528, "step": 369360 }, { "epoch": 143.44, "learning_rate": 8.740453074433658e-07, "loss": 0.0385, "step": 369370 }, { "epoch": 143.45, "learning_rate": 8.73527508090615e-07, "loss": 0.0005, "step": 369380 }, { "epoch": 143.45, "learning_rate": 8.730097087378642e-07, "loss": 0.0519, "step": 369390 }, { "epoch": 143.46, "learning_rate": 8.724919093851134e-07, "loss": 0.0184, "step": 369400 }, { "epoch": 143.46, "learning_rate": 8.719741100323626e-07, "loss": 0.0143, "step": 369410 }, { "epoch": 143.46, "learning_rate": 8.714563106796118e-07, "loss": 0.0635, "step": 369420 }, { "epoch": 143.47, "learning_rate": 8.70938511326861e-07, "loss": 0.0433, "step": 369430 }, { "epoch": 143.47, "learning_rate": 8.704207119741101e-07, "loss": 0.1202, "step": 369440 }, { "epoch": 143.48, "learning_rate": 8.699029126213593e-07, "loss": 0.0187, "step": 369450 }, { "epoch": 143.48, "learning_rate": 8.693851132686084e-07, "loss": 0.1338, "step": 369460 }, { "epoch": 143.48, "learning_rate": 8.688673139158576e-07, "loss": 0.0538, "step": 369470 }, { "epoch": 143.49, "learning_rate": 8.683495145631068e-07, "loss": 0.0112, "step": 369480 }, { "epoch": 143.49, "learning_rate": 8.67831715210356e-07, "loss": 0.111, "step": 369490 }, { "epoch": 143.5, "learning_rate": 8.673139158576052e-07, "loss": 0.0798, "step": 369500 }, { "epoch": 143.5, "learning_rate": 8.667961165048544e-07, "loss": 0.0499, "step": 369510 }, { "epoch": 143.5, "learning_rate": 8.662783171521036e-07, "loss": 0.0091, "step": 369520 }, { "epoch": 143.51, "learning_rate": 8.657605177993528e-07, "loss": 0.0077, "step": 369530 }, { "epoch": 143.51, "learning_rate": 8.65242718446602e-07, "loss": 0.0228, "step": 369540 }, { "epoch": 143.51, "learning_rate": 8.647249190938512e-07, "loss": 0.0218, "step": 369550 }, { "epoch": 143.52, "learning_rate": 8.642071197411004e-07, "loss": 0.008, "step": 369560 }, { "epoch": 143.52, "learning_rate": 8.636893203883496e-07, "loss": 0.1075, "step": 369570 }, { "epoch": 143.53, "learning_rate": 8.631715210355989e-07, "loss": 0.0093, "step": 369580 }, { "epoch": 143.53, "learning_rate": 8.626537216828481e-07, "loss": 0.0534, "step": 369590 }, { "epoch": 143.53, "learning_rate": 8.621359223300973e-07, "loss": 0.0388, "step": 369600 }, { "epoch": 143.54, "learning_rate": 8.616181229773465e-07, "loss": 0.0138, "step": 369610 }, { "epoch": 143.54, "learning_rate": 8.611003236245955e-07, "loss": 0.0543, "step": 369620 }, { "epoch": 143.55, "learning_rate": 8.605825242718447e-07, "loss": 0.0093, "step": 369630 }, { "epoch": 143.55, "learning_rate": 8.600647249190939e-07, "loss": 0.212, "step": 369640 }, { "epoch": 143.55, "learning_rate": 8.595469255663431e-07, "loss": 0.0105, "step": 369650 }, { "epoch": 143.56, "learning_rate": 8.590291262135923e-07, "loss": 0.0217, "step": 369660 }, { "epoch": 143.56, "learning_rate": 8.585113268608415e-07, "loss": 0.0016, "step": 369670 }, { "epoch": 143.57, "learning_rate": 8.579935275080907e-07, "loss": 0.0184, "step": 369680 }, { "epoch": 143.57, "learning_rate": 8.574757281553399e-07, "loss": 0.0767, "step": 369690 }, { "epoch": 143.57, "learning_rate": 8.569579288025891e-07, "loss": 0.1033, "step": 369700 }, { "epoch": 143.58, "learning_rate": 8.564401294498383e-07, "loss": 0.0094, "step": 369710 }, { "epoch": 143.58, "learning_rate": 8.559223300970875e-07, "loss": 0.0077, "step": 369720 }, { "epoch": 143.58, "learning_rate": 8.554045307443367e-07, "loss": 0.0243, "step": 369730 }, { "epoch": 143.59, "learning_rate": 8.548867313915858e-07, "loss": 0.0395, "step": 369740 }, { "epoch": 143.59, "learning_rate": 8.54368932038835e-07, "loss": 0.0108, "step": 369750 }, { "epoch": 143.6, "learning_rate": 8.538511326860842e-07, "loss": 0.1098, "step": 369760 }, { "epoch": 143.6, "learning_rate": 8.533333333333334e-07, "loss": 0.0734, "step": 369770 }, { "epoch": 143.6, "learning_rate": 8.528155339805825e-07, "loss": 0.0558, "step": 369780 }, { "epoch": 143.61, "learning_rate": 8.522977346278317e-07, "loss": 0.0315, "step": 369790 }, { "epoch": 143.61, "learning_rate": 8.517799352750809e-07, "loss": 0.0233, "step": 369800 }, { "epoch": 143.62, "learning_rate": 8.512621359223301e-07, "loss": 0.1292, "step": 369810 }, { "epoch": 143.62, "learning_rate": 8.507443365695793e-07, "loss": 0.0864, "step": 369820 }, { "epoch": 143.62, "learning_rate": 8.502265372168285e-07, "loss": 0.0001, "step": 369830 }, { "epoch": 143.63, "learning_rate": 8.497087378640777e-07, "loss": 0.009, "step": 369840 }, { "epoch": 143.63, "learning_rate": 8.491909385113269e-07, "loss": 0.0556, "step": 369850 }, { "epoch": 143.63, "learning_rate": 8.486731391585762e-07, "loss": 0.0083, "step": 369860 }, { "epoch": 143.64, "learning_rate": 8.481553398058254e-07, "loss": 0.0607, "step": 369870 }, { "epoch": 143.64, "learning_rate": 8.476375404530746e-07, "loss": 0.0004, "step": 369880 }, { "epoch": 143.65, "learning_rate": 8.471197411003238e-07, "loss": 0.0308, "step": 369890 }, { "epoch": 143.65, "learning_rate": 8.466019417475729e-07, "loss": 0.0719, "step": 369900 }, { "epoch": 143.65, "learning_rate": 8.460841423948221e-07, "loss": 0.0469, "step": 369910 }, { "epoch": 143.66, "learning_rate": 8.455663430420713e-07, "loss": 0.0096, "step": 369920 }, { "epoch": 143.66, "learning_rate": 8.450485436893205e-07, "loss": 0.0179, "step": 369930 }, { "epoch": 143.67, "learning_rate": 8.445307443365697e-07, "loss": 0.0088, "step": 369940 }, { "epoch": 143.67, "learning_rate": 8.440129449838189e-07, "loss": 0.0539, "step": 369950 }, { "epoch": 143.67, "learning_rate": 8.43495145631068e-07, "loss": 0.0001, "step": 369960 }, { "epoch": 143.68, "learning_rate": 8.429773462783172e-07, "loss": 0.2129, "step": 369970 }, { "epoch": 143.68, "learning_rate": 8.424595469255664e-07, "loss": 0.0047, "step": 369980 }, { "epoch": 143.69, "learning_rate": 8.419417475728156e-07, "loss": 0.0446, "step": 369990 }, { "epoch": 143.69, "learning_rate": 8.414239482200648e-07, "loss": 0.0435, "step": 370000 }, { "epoch": 143.69, "learning_rate": 8.40906148867314e-07, "loss": 0.0106, "step": 370010 }, { "epoch": 143.7, "learning_rate": 8.403883495145632e-07, "loss": 0.0299, "step": 370020 }, { "epoch": 143.7, "learning_rate": 8.398705501618123e-07, "loss": 0.0102, "step": 370030 }, { "epoch": 143.7, "learning_rate": 8.393527508090615e-07, "loss": 0.0096, "step": 370040 }, { "epoch": 143.71, "learning_rate": 8.388349514563107e-07, "loss": 0.0382, "step": 370050 }, { "epoch": 143.71, "learning_rate": 8.383171521035599e-07, "loss": 0.0629, "step": 370060 }, { "epoch": 143.72, "learning_rate": 8.377993527508091e-07, "loss": 0.113, "step": 370070 }, { "epoch": 143.72, "learning_rate": 8.372815533980583e-07, "loss": 0.0396, "step": 370080 }, { "epoch": 143.72, "learning_rate": 8.367637540453075e-07, "loss": 0.0342, "step": 370090 }, { "epoch": 143.73, "learning_rate": 8.362459546925567e-07, "loss": 0.0001, "step": 370100 }, { "epoch": 143.73, "learning_rate": 8.357281553398059e-07, "loss": 0.0899, "step": 370110 }, { "epoch": 143.74, "learning_rate": 8.352103559870551e-07, "loss": 0.097, "step": 370120 }, { "epoch": 143.74, "learning_rate": 8.346925566343042e-07, "loss": 0.0564, "step": 370130 }, { "epoch": 143.74, "learning_rate": 8.341747572815535e-07, "loss": 0.0667, "step": 370140 }, { "epoch": 143.75, "learning_rate": 8.336569579288027e-07, "loss": 0.1567, "step": 370150 }, { "epoch": 143.75, "learning_rate": 8.331391585760519e-07, "loss": 0.0219, "step": 370160 }, { "epoch": 143.76, "learning_rate": 8.326213592233011e-07, "loss": 0.01, "step": 370170 }, { "epoch": 143.76, "learning_rate": 8.321035598705503e-07, "loss": 0.0214, "step": 370180 }, { "epoch": 143.76, "learning_rate": 8.315857605177994e-07, "loss": 0.0785, "step": 370190 }, { "epoch": 143.77, "learning_rate": 8.310679611650486e-07, "loss": 0.0001, "step": 370200 }, { "epoch": 143.77, "learning_rate": 8.305501618122978e-07, "loss": 0.0166, "step": 370210 }, { "epoch": 143.77, "learning_rate": 8.30032362459547e-07, "loss": 0.0001, "step": 370220 }, { "epoch": 143.78, "learning_rate": 8.295145631067962e-07, "loss": 0.0878, "step": 370230 }, { "epoch": 143.78, "learning_rate": 8.289967637540454e-07, "loss": 0.0007, "step": 370240 }, { "epoch": 143.79, "learning_rate": 8.284789644012946e-07, "loss": 0.0262, "step": 370250 }, { "epoch": 143.79, "learning_rate": 8.279611650485438e-07, "loss": 0.0563, "step": 370260 }, { "epoch": 143.79, "learning_rate": 8.27443365695793e-07, "loss": 0.0031, "step": 370270 }, { "epoch": 143.8, "learning_rate": 8.269255663430422e-07, "loss": 0.0275, "step": 370280 }, { "epoch": 143.8, "learning_rate": 8.264077669902914e-07, "loss": 0.0265, "step": 370290 }, { "epoch": 143.81, "learning_rate": 8.258899676375406e-07, "loss": 0.0433, "step": 370300 }, { "epoch": 143.81, "learning_rate": 8.253721682847896e-07, "loss": 0.0133, "step": 370310 }, { "epoch": 143.81, "learning_rate": 8.248543689320388e-07, "loss": 0.0002, "step": 370320 }, { "epoch": 143.82, "learning_rate": 8.24336569579288e-07, "loss": 0.0664, "step": 370330 }, { "epoch": 143.82, "learning_rate": 8.238187702265372e-07, "loss": 0.0587, "step": 370340 }, { "epoch": 143.83, "learning_rate": 8.233009708737864e-07, "loss": 0.0165, "step": 370350 }, { "epoch": 143.83, "learning_rate": 8.227831715210356e-07, "loss": 0.01, "step": 370360 }, { "epoch": 143.83, "learning_rate": 8.222653721682848e-07, "loss": 0.0267, "step": 370370 }, { "epoch": 143.84, "learning_rate": 8.21747572815534e-07, "loss": 0.161, "step": 370380 }, { "epoch": 143.84, "learning_rate": 8.212297734627832e-07, "loss": 0.0739, "step": 370390 }, { "epoch": 143.84, "learning_rate": 8.207119741100324e-07, "loss": 0.0547, "step": 370400 }, { "epoch": 143.85, "learning_rate": 8.201941747572816e-07, "loss": 0.0001, "step": 370410 }, { "epoch": 143.85, "learning_rate": 8.196763754045309e-07, "loss": 0.0207, "step": 370420 }, { "epoch": 143.86, "learning_rate": 8.191585760517801e-07, "loss": 0.0877, "step": 370430 }, { "epoch": 143.86, "learning_rate": 8.186407766990293e-07, "loss": 0.0413, "step": 370440 }, { "epoch": 143.86, "learning_rate": 8.181229773462785e-07, "loss": 0.0298, "step": 370450 }, { "epoch": 143.87, "learning_rate": 8.176051779935277e-07, "loss": 0.0458, "step": 370460 }, { "epoch": 143.87, "learning_rate": 8.170873786407768e-07, "loss": 0.0002, "step": 370470 }, { "epoch": 143.88, "learning_rate": 8.16569579288026e-07, "loss": 0.1012, "step": 370480 }, { "epoch": 143.88, "learning_rate": 8.160517799352751e-07, "loss": 0.051, "step": 370490 }, { "epoch": 143.88, "learning_rate": 8.155339805825243e-07, "loss": 0.0486, "step": 370500 }, { "epoch": 143.89, "learning_rate": 8.150161812297735e-07, "loss": 0.0707, "step": 370510 }, { "epoch": 143.89, "learning_rate": 8.144983818770227e-07, "loss": 0.0079, "step": 370520 }, { "epoch": 143.9, "learning_rate": 8.139805825242719e-07, "loss": 0.0002, "step": 370530 }, { "epoch": 143.9, "learning_rate": 8.134627831715211e-07, "loss": 0.0204, "step": 370540 }, { "epoch": 143.9, "learning_rate": 8.129449838187703e-07, "loss": 0.0269, "step": 370550 }, { "epoch": 143.91, "learning_rate": 8.124271844660195e-07, "loss": 0.0079, "step": 370560 }, { "epoch": 143.91, "learning_rate": 8.119093851132687e-07, "loss": 0.0113, "step": 370570 }, { "epoch": 143.91, "learning_rate": 8.113915857605179e-07, "loss": 0.0143, "step": 370580 }, { "epoch": 143.92, "learning_rate": 8.108737864077671e-07, "loss": 0.0244, "step": 370590 }, { "epoch": 143.92, "learning_rate": 8.103559870550162e-07, "loss": 0.0002, "step": 370600 }, { "epoch": 143.93, "learning_rate": 8.098381877022654e-07, "loss": 0.0001, "step": 370610 }, { "epoch": 143.93, "learning_rate": 8.093203883495146e-07, "loss": 0.0023, "step": 370620 }, { "epoch": 143.93, "learning_rate": 8.088025889967638e-07, "loss": 0.0351, "step": 370630 }, { "epoch": 143.94, "learning_rate": 8.08284789644013e-07, "loss": 0.1355, "step": 370640 }, { "epoch": 143.94, "learning_rate": 8.077669902912622e-07, "loss": 0.0001, "step": 370650 }, { "epoch": 143.95, "learning_rate": 8.072491909385113e-07, "loss": 0.076, "step": 370660 }, { "epoch": 143.95, "learning_rate": 8.067313915857605e-07, "loss": 0.09, "step": 370670 }, { "epoch": 143.95, "learning_rate": 8.062135922330097e-07, "loss": 0.081, "step": 370680 }, { "epoch": 143.96, "learning_rate": 8.056957928802589e-07, "loss": 0.0451, "step": 370690 }, { "epoch": 143.96, "learning_rate": 8.051779935275082e-07, "loss": 0.0001, "step": 370700 }, { "epoch": 143.97, "learning_rate": 8.046601941747574e-07, "loss": 0.053, "step": 370710 }, { "epoch": 143.97, "learning_rate": 8.041423948220066e-07, "loss": 0.0001, "step": 370720 }, { "epoch": 143.97, "learning_rate": 8.036245954692558e-07, "loss": 0.0539, "step": 370730 }, { "epoch": 143.98, "learning_rate": 8.03106796116505e-07, "loss": 0.066, "step": 370740 }, { "epoch": 143.98, "learning_rate": 8.025889967637542e-07, "loss": 0.0076, "step": 370750 }, { "epoch": 143.98, "learning_rate": 8.020711974110033e-07, "loss": 0.0049, "step": 370760 }, { "epoch": 143.99, "learning_rate": 8.015533980582525e-07, "loss": 0.031, "step": 370770 }, { "epoch": 143.99, "learning_rate": 8.010355987055017e-07, "loss": 0.0134, "step": 370780 }, { "epoch": 144.0, "learning_rate": 8.005177993527509e-07, "loss": 0.0094, "step": 370790 }, { "epoch": 144.0, "learning_rate": 8.000000000000001e-07, "loss": 0.0349, "step": 370800 }, { "epoch": 144.0, "eval_accuracy": 0.9526822558459422, "eval_loss": 0.38799384236335754, "eval_runtime": 8.1817, "eval_samples_per_second": 444.282, "eval_steps_per_second": 55.612, "step": 370800 }, { "epoch": 144.0, "learning_rate": 7.994822006472493e-07, "loss": 0.0041, "step": 370810 }, { "epoch": 144.01, "learning_rate": 7.989644012944985e-07, "loss": 0.0408, "step": 370820 }, { "epoch": 144.01, "learning_rate": 7.984466019417477e-07, "loss": 0.0144, "step": 370830 }, { "epoch": 144.02, "learning_rate": 7.979288025889968e-07, "loss": 0.0184, "step": 370840 }, { "epoch": 144.02, "learning_rate": 7.97411003236246e-07, "loss": 0.0574, "step": 370850 }, { "epoch": 144.02, "learning_rate": 7.968932038834952e-07, "loss": 0.0096, "step": 370860 }, { "epoch": 144.03, "learning_rate": 7.963754045307444e-07, "loss": 0.0001, "step": 370870 }, { "epoch": 144.03, "learning_rate": 7.958576051779935e-07, "loss": 0.0095, "step": 370880 }, { "epoch": 144.03, "learning_rate": 7.953398058252427e-07, "loss": 0.0297, "step": 370890 }, { "epoch": 144.04, "learning_rate": 7.948220064724919e-07, "loss": 0.0091, "step": 370900 }, { "epoch": 144.04, "learning_rate": 7.943042071197411e-07, "loss": 0.0001, "step": 370910 }, { "epoch": 144.05, "learning_rate": 7.937864077669903e-07, "loss": 0.054, "step": 370920 }, { "epoch": 144.05, "learning_rate": 7.932686084142395e-07, "loss": 0.0666, "step": 370930 }, { "epoch": 144.05, "learning_rate": 7.927508090614887e-07, "loss": 0.0354, "step": 370940 }, { "epoch": 144.06, "learning_rate": 7.922330097087379e-07, "loss": 0.0402, "step": 370950 }, { "epoch": 144.06, "learning_rate": 7.917152103559871e-07, "loss": 0.0729, "step": 370960 }, { "epoch": 144.07, "learning_rate": 7.911974110032363e-07, "loss": 0.0603, "step": 370970 }, { "epoch": 144.07, "learning_rate": 7.906796116504856e-07, "loss": 0.0175, "step": 370980 }, { "epoch": 144.07, "learning_rate": 7.901618122977348e-07, "loss": 0.0002, "step": 370990 }, { "epoch": 144.08, "learning_rate": 7.89644012944984e-07, "loss": 0.0273, "step": 371000 }, { "epoch": 144.08, "learning_rate": 7.891262135922332e-07, "loss": 0.0005, "step": 371010 }, { "epoch": 144.09, "learning_rate": 7.886084142394823e-07, "loss": 0.124, "step": 371020 }, { "epoch": 144.09, "learning_rate": 7.880906148867315e-07, "loss": 0.1364, "step": 371030 }, { "epoch": 144.09, "learning_rate": 7.875728155339806e-07, "loss": 0.1187, "step": 371040 }, { "epoch": 144.1, "learning_rate": 7.870550161812298e-07, "loss": 0.0879, "step": 371050 }, { "epoch": 144.1, "learning_rate": 7.86537216828479e-07, "loss": 0.0673, "step": 371060 }, { "epoch": 144.1, "learning_rate": 7.860194174757282e-07, "loss": 0.015, "step": 371070 }, { "epoch": 144.11, "learning_rate": 7.855016181229774e-07, "loss": 0.0016, "step": 371080 }, { "epoch": 144.11, "learning_rate": 7.849838187702266e-07, "loss": 0.0378, "step": 371090 }, { "epoch": 144.12, "learning_rate": 7.844660194174758e-07, "loss": 0.0213, "step": 371100 }, { "epoch": 144.12, "learning_rate": 7.83948220064725e-07, "loss": 0.0286, "step": 371110 }, { "epoch": 144.12, "learning_rate": 7.834304207119742e-07, "loss": 0.0002, "step": 371120 }, { "epoch": 144.13, "learning_rate": 7.829126213592234e-07, "loss": 0.1241, "step": 371130 }, { "epoch": 144.13, "learning_rate": 7.823948220064726e-07, "loss": 0.0218, "step": 371140 }, { "epoch": 144.14, "learning_rate": 7.818770226537218e-07, "loss": 0.0247, "step": 371150 }, { "epoch": 144.14, "learning_rate": 7.81359223300971e-07, "loss": 0.002, "step": 371160 }, { "epoch": 144.14, "learning_rate": 7.808414239482201e-07, "loss": 0.1143, "step": 371170 }, { "epoch": 144.15, "learning_rate": 7.803236245954692e-07, "loss": 0.0568, "step": 371180 }, { "epoch": 144.15, "learning_rate": 7.798058252427184e-07, "loss": 0.0099, "step": 371190 }, { "epoch": 144.16, "learning_rate": 7.792880258899676e-07, "loss": 0.0192, "step": 371200 }, { "epoch": 144.16, "learning_rate": 7.787702265372168e-07, "loss": 0.0585, "step": 371210 }, { "epoch": 144.16, "learning_rate": 7.78252427184466e-07, "loss": 0.1407, "step": 371220 }, { "epoch": 144.17, "learning_rate": 7.777346278317152e-07, "loss": 0.0001, "step": 371230 }, { "epoch": 144.17, "learning_rate": 7.772168284789644e-07, "loss": 0.0788, "step": 371240 }, { "epoch": 144.17, "learning_rate": 7.766990291262136e-07, "loss": 0.0149, "step": 371250 }, { "epoch": 144.18, "learning_rate": 7.761812297734629e-07, "loss": 0.0176, "step": 371260 }, { "epoch": 144.18, "learning_rate": 7.756634304207121e-07, "loss": 0.0256, "step": 371270 }, { "epoch": 144.19, "learning_rate": 7.751456310679613e-07, "loss": 0.0002, "step": 371280 }, { "epoch": 144.19, "learning_rate": 7.746278317152105e-07, "loss": 0.0003, "step": 371290 }, { "epoch": 144.19, "learning_rate": 7.741100323624597e-07, "loss": 0.0184, "step": 371300 }, { "epoch": 144.2, "learning_rate": 7.735922330097089e-07, "loss": 0.0082, "step": 371310 }, { "epoch": 144.2, "learning_rate": 7.730744336569581e-07, "loss": 0.0699, "step": 371320 }, { "epoch": 144.21, "learning_rate": 7.725566343042072e-07, "loss": 0.036, "step": 371330 }, { "epoch": 144.21, "learning_rate": 7.720388349514564e-07, "loss": 0.1012, "step": 371340 }, { "epoch": 144.21, "learning_rate": 7.715210355987056e-07, "loss": 0.0097, "step": 371350 }, { "epoch": 144.22, "learning_rate": 7.710032362459548e-07, "loss": 0.0216, "step": 371360 }, { "epoch": 144.22, "learning_rate": 7.704854368932039e-07, "loss": 0.0453, "step": 371370 }, { "epoch": 144.23, "learning_rate": 7.699676375404531e-07, "loss": 0.0303, "step": 371380 }, { "epoch": 144.23, "learning_rate": 7.694498381877023e-07, "loss": 0.0217, "step": 371390 }, { "epoch": 144.23, "learning_rate": 7.689320388349515e-07, "loss": 0.0236, "step": 371400 }, { "epoch": 144.24, "learning_rate": 7.684142394822007e-07, "loss": 0.0082, "step": 371410 }, { "epoch": 144.24, "learning_rate": 7.678964401294499e-07, "loss": 0.0293, "step": 371420 }, { "epoch": 144.24, "learning_rate": 7.673786407766991e-07, "loss": 0.0133, "step": 371430 }, { "epoch": 144.25, "learning_rate": 7.668608414239483e-07, "loss": 0.0116, "step": 371440 }, { "epoch": 144.25, "learning_rate": 7.663430420711974e-07, "loss": 0.1064, "step": 371450 }, { "epoch": 144.26, "learning_rate": 7.658252427184466e-07, "loss": 0.0005, "step": 371460 }, { "epoch": 144.26, "learning_rate": 7.653074433656958e-07, "loss": 0.0077, "step": 371470 }, { "epoch": 144.26, "learning_rate": 7.64789644012945e-07, "loss": 0.0843, "step": 371480 }, { "epoch": 144.27, "learning_rate": 7.642718446601942e-07, "loss": 0.1686, "step": 371490 }, { "epoch": 144.27, "learning_rate": 7.637540453074434e-07, "loss": 0.0532, "step": 371500 }, { "epoch": 144.28, "learning_rate": 7.632362459546926e-07, "loss": 0.0528, "step": 371510 }, { "epoch": 144.28, "learning_rate": 7.627184466019418e-07, "loss": 0.0169, "step": 371520 }, { "epoch": 144.28, "learning_rate": 7.62200647249191e-07, "loss": 0.0696, "step": 371530 }, { "epoch": 144.29, "learning_rate": 7.616828478964403e-07, "loss": 0.0552, "step": 371540 }, { "epoch": 144.29, "learning_rate": 7.611650485436894e-07, "loss": 0.009, "step": 371550 }, { "epoch": 144.3, "learning_rate": 7.606472491909386e-07, "loss": 0.0011, "step": 371560 }, { "epoch": 144.3, "learning_rate": 7.601294498381878e-07, "loss": 0.0203, "step": 371570 }, { "epoch": 144.3, "learning_rate": 7.59611650485437e-07, "loss": 0.0163, "step": 371580 }, { "epoch": 144.31, "learning_rate": 7.590938511326862e-07, "loss": 0.0323, "step": 371590 }, { "epoch": 144.31, "learning_rate": 7.585760517799354e-07, "loss": 0.0742, "step": 371600 }, { "epoch": 144.31, "learning_rate": 7.580582524271845e-07, "loss": 0.0623, "step": 371610 }, { "epoch": 144.32, "learning_rate": 7.575404530744337e-07, "loss": 0.008, "step": 371620 }, { "epoch": 144.32, "learning_rate": 7.570226537216829e-07, "loss": 0.0578, "step": 371630 }, { "epoch": 144.33, "learning_rate": 7.565048543689321e-07, "loss": 0.0426, "step": 371640 }, { "epoch": 144.33, "learning_rate": 7.559870550161813e-07, "loss": 0.0091, "step": 371650 }, { "epoch": 144.33, "learning_rate": 7.554692556634305e-07, "loss": 0.0238, "step": 371660 }, { "epoch": 144.34, "learning_rate": 7.549514563106797e-07, "loss": 0.0077, "step": 371670 }, { "epoch": 144.34, "learning_rate": 7.544336569579289e-07, "loss": 0.011, "step": 371680 }, { "epoch": 144.35, "learning_rate": 7.539158576051781e-07, "loss": 0.0095, "step": 371690 }, { "epoch": 144.35, "learning_rate": 7.533980582524273e-07, "loss": 0.0106, "step": 371700 }, { "epoch": 144.35, "learning_rate": 7.528802588996765e-07, "loss": 0.0263, "step": 371710 }, { "epoch": 144.36, "learning_rate": 7.523624595469256e-07, "loss": 0.0085, "step": 371720 }, { "epoch": 144.36, "learning_rate": 7.518446601941748e-07, "loss": 0.0821, "step": 371730 }, { "epoch": 144.37, "learning_rate": 7.513268608414239e-07, "loss": 0.0046, "step": 371740 }, { "epoch": 144.37, "learning_rate": 7.508090614886731e-07, "loss": 0.0203, "step": 371750 }, { "epoch": 144.37, "learning_rate": 7.502912621359223e-07, "loss": 0.0141, "step": 371760 }, { "epoch": 144.38, "learning_rate": 7.497734627831715e-07, "loss": 0.0078, "step": 371770 }, { "epoch": 144.38, "learning_rate": 7.492556634304207e-07, "loss": 0.1114, "step": 371780 }, { "epoch": 144.38, "learning_rate": 7.487378640776699e-07, "loss": 0.0691, "step": 371790 }, { "epoch": 144.39, "learning_rate": 7.482200647249191e-07, "loss": 0.0721, "step": 371800 }, { "epoch": 144.39, "learning_rate": 7.477022653721683e-07, "loss": 0.0763, "step": 371810 }, { "epoch": 144.4, "learning_rate": 7.471844660194176e-07, "loss": 0.0001, "step": 371820 }, { "epoch": 144.4, "learning_rate": 7.466666666666668e-07, "loss": 0.0223, "step": 371830 }, { "epoch": 144.4, "learning_rate": 7.46148867313916e-07, "loss": 0.0406, "step": 371840 }, { "epoch": 144.41, "learning_rate": 7.456310679611652e-07, "loss": 0.0001, "step": 371850 }, { "epoch": 144.41, "learning_rate": 7.451132686084144e-07, "loss": 0.0325, "step": 371860 }, { "epoch": 144.42, "learning_rate": 7.445954692556636e-07, "loss": 0.1286, "step": 371870 }, { "epoch": 144.42, "learning_rate": 7.440776699029128e-07, "loss": 0.0006, "step": 371880 }, { "epoch": 144.42, "learning_rate": 7.43559870550162e-07, "loss": 0.0168, "step": 371890 }, { "epoch": 144.43, "learning_rate": 7.43042071197411e-07, "loss": 0.0247, "step": 371900 }, { "epoch": 144.43, "learning_rate": 7.425242718446602e-07, "loss": 0.0152, "step": 371910 }, { "epoch": 144.43, "learning_rate": 7.420064724919094e-07, "loss": 0.0985, "step": 371920 }, { "epoch": 144.44, "learning_rate": 7.414886731391586e-07, "loss": 0.0172, "step": 371930 }, { "epoch": 144.44, "learning_rate": 7.409708737864078e-07, "loss": 0.0752, "step": 371940 }, { "epoch": 144.45, "learning_rate": 7.40453074433657e-07, "loss": 0.0014, "step": 371950 }, { "epoch": 144.45, "learning_rate": 7.399352750809062e-07, "loss": 0.0893, "step": 371960 }, { "epoch": 144.45, "learning_rate": 7.394174757281554e-07, "loss": 0.0166, "step": 371970 }, { "epoch": 144.46, "learning_rate": 7.388996763754046e-07, "loss": 0.1515, "step": 371980 }, { "epoch": 144.46, "learning_rate": 7.383818770226538e-07, "loss": 0.0031, "step": 371990 }, { "epoch": 144.47, "learning_rate": 7.37864077669903e-07, "loss": 0.05, "step": 372000 }, { "epoch": 144.47, "learning_rate": 7.373462783171522e-07, "loss": 0.0795, "step": 372010 }, { "epoch": 144.47, "learning_rate": 7.368284789644013e-07, "loss": 0.0067, "step": 372020 }, { "epoch": 144.48, "learning_rate": 7.363106796116505e-07, "loss": 0.0203, "step": 372030 }, { "epoch": 144.48, "learning_rate": 7.357928802588997e-07, "loss": 0.0191, "step": 372040 }, { "epoch": 144.49, "learning_rate": 7.352750809061489e-07, "loss": 0.0004, "step": 372050 }, { "epoch": 144.49, "learning_rate": 7.34757281553398e-07, "loss": 0.0044, "step": 372060 }, { "epoch": 144.49, "learning_rate": 7.342394822006472e-07, "loss": 0.0876, "step": 372070 }, { "epoch": 144.5, "learning_rate": 7.337216828478964e-07, "loss": 0.0185, "step": 372080 }, { "epoch": 144.5, "learning_rate": 7.332038834951456e-07, "loss": 0.0236, "step": 372090 }, { "epoch": 144.5, "learning_rate": 7.326860841423949e-07, "loss": 0.0177, "step": 372100 }, { "epoch": 144.51, "learning_rate": 7.321682847896441e-07, "loss": 0.0558, "step": 372110 }, { "epoch": 144.51, "learning_rate": 7.316504854368933e-07, "loss": 0.0892, "step": 372120 }, { "epoch": 144.52, "learning_rate": 7.311326860841425e-07, "loss": 0.0194, "step": 372130 }, { "epoch": 144.52, "learning_rate": 7.306148867313917e-07, "loss": 0.0858, "step": 372140 }, { "epoch": 144.52, "learning_rate": 7.300970873786409e-07, "loss": 0.0463, "step": 372150 }, { "epoch": 144.53, "learning_rate": 7.295792880258901e-07, "loss": 0.1536, "step": 372160 }, { "epoch": 144.53, "learning_rate": 7.290614886731393e-07, "loss": 0.016, "step": 372170 }, { "epoch": 144.54, "learning_rate": 7.285436893203884e-07, "loss": 0.0091, "step": 372180 }, { "epoch": 144.54, "learning_rate": 7.280258899676376e-07, "loss": 0.0296, "step": 372190 }, { "epoch": 144.54, "learning_rate": 7.275080906148868e-07, "loss": 0.0079, "step": 372200 }, { "epoch": 144.55, "learning_rate": 7.26990291262136e-07, "loss": 0.0075, "step": 372210 }, { "epoch": 144.55, "learning_rate": 7.264724919093852e-07, "loss": 0.0001, "step": 372220 }, { "epoch": 144.56, "learning_rate": 7.259546925566344e-07, "loss": 0.0075, "step": 372230 }, { "epoch": 144.56, "learning_rate": 7.254368932038836e-07, "loss": 0.1023, "step": 372240 }, { "epoch": 144.56, "learning_rate": 7.249190938511327e-07, "loss": 0.0851, "step": 372250 }, { "epoch": 144.57, "learning_rate": 7.244012944983819e-07, "loss": 0.0407, "step": 372260 }, { "epoch": 144.57, "learning_rate": 7.238834951456311e-07, "loss": 0.0253, "step": 372270 }, { "epoch": 144.57, "learning_rate": 7.233656957928803e-07, "loss": 0.0301, "step": 372280 }, { "epoch": 144.58, "learning_rate": 7.228478964401295e-07, "loss": 0.1527, "step": 372290 }, { "epoch": 144.58, "learning_rate": 7.223300970873787e-07, "loss": 0.1019, "step": 372300 }, { "epoch": 144.59, "learning_rate": 7.218122977346278e-07, "loss": 0.0135, "step": 372310 }, { "epoch": 144.59, "learning_rate": 7.21294498381877e-07, "loss": 0.0017, "step": 372320 }, { "epoch": 144.59, "learning_rate": 7.207766990291262e-07, "loss": 0.0633, "step": 372330 }, { "epoch": 144.6, "learning_rate": 7.202588996763754e-07, "loss": 0.0185, "step": 372340 }, { "epoch": 144.6, "learning_rate": 7.197411003236246e-07, "loss": 0.0115, "step": 372350 }, { "epoch": 144.61, "learning_rate": 7.192233009708738e-07, "loss": 0.0176, "step": 372360 }, { "epoch": 144.61, "learning_rate": 7.18705501618123e-07, "loss": 0.0169, "step": 372370 }, { "epoch": 144.61, "learning_rate": 7.181877022653723e-07, "loss": 0.0106, "step": 372380 }, { "epoch": 144.62, "learning_rate": 7.176699029126215e-07, "loss": 0.103, "step": 372390 }, { "epoch": 144.62, "learning_rate": 7.171521035598707e-07, "loss": 0.1482, "step": 372400 }, { "epoch": 144.63, "learning_rate": 7.166343042071199e-07, "loss": 0.0683, "step": 372410 }, { "epoch": 144.63, "learning_rate": 7.161165048543691e-07, "loss": 0.0198, "step": 372420 }, { "epoch": 144.63, "learning_rate": 7.155987055016182e-07, "loss": 0.0004, "step": 372430 }, { "epoch": 144.64, "learning_rate": 7.150809061488674e-07, "loss": 0.0096, "step": 372440 }, { "epoch": 144.64, "learning_rate": 7.145631067961166e-07, "loss": 0.0017, "step": 372450 }, { "epoch": 144.64, "learning_rate": 7.140453074433658e-07, "loss": 0.0164, "step": 372460 }, { "epoch": 144.65, "learning_rate": 7.135275080906149e-07, "loss": 0.0719, "step": 372470 }, { "epoch": 144.65, "learning_rate": 7.130097087378641e-07, "loss": 0.0029, "step": 372480 }, { "epoch": 144.66, "learning_rate": 7.124919093851133e-07, "loss": 0.0149, "step": 372490 }, { "epoch": 144.66, "learning_rate": 7.119741100323625e-07, "loss": 0.0087, "step": 372500 }, { "epoch": 144.66, "learning_rate": 7.114563106796117e-07, "loss": 0.0193, "step": 372510 }, { "epoch": 144.67, "learning_rate": 7.109385113268609e-07, "loss": 0.0091, "step": 372520 }, { "epoch": 144.67, "learning_rate": 7.104207119741101e-07, "loss": 0.0012, "step": 372530 }, { "epoch": 144.68, "learning_rate": 7.099029126213593e-07, "loss": 0.0299, "step": 372540 }, { "epoch": 144.68, "learning_rate": 7.093851132686085e-07, "loss": 0.0246, "step": 372550 }, { "epoch": 144.68, "learning_rate": 7.088673139158577e-07, "loss": 0.0182, "step": 372560 }, { "epoch": 144.69, "learning_rate": 7.083495145631069e-07, "loss": 0.0322, "step": 372570 }, { "epoch": 144.69, "learning_rate": 7.078317152103561e-07, "loss": 0.0106, "step": 372580 }, { "epoch": 144.7, "learning_rate": 7.073139158576051e-07, "loss": 0.0562, "step": 372590 }, { "epoch": 144.7, "learning_rate": 7.067961165048543e-07, "loss": 0.1645, "step": 372600 }, { "epoch": 144.7, "learning_rate": 7.062783171521035e-07, "loss": 0.0887, "step": 372610 }, { "epoch": 144.71, "learning_rate": 7.057605177993527e-07, "loss": 0.0281, "step": 372620 }, { "epoch": 144.71, "learning_rate": 7.052427184466019e-07, "loss": 0.0299, "step": 372630 }, { "epoch": 144.71, "learning_rate": 7.047249190938511e-07, "loss": 0.1213, "step": 372640 }, { "epoch": 144.72, "learning_rate": 7.042071197411003e-07, "loss": 0.0381, "step": 372650 }, { "epoch": 144.72, "learning_rate": 7.036893203883496e-07, "loss": 0.072, "step": 372660 }, { "epoch": 144.73, "learning_rate": 7.031715210355988e-07, "loss": 0.1089, "step": 372670 }, { "epoch": 144.73, "learning_rate": 7.02653721682848e-07, "loss": 0.0513, "step": 372680 }, { "epoch": 144.73, "learning_rate": 7.021359223300972e-07, "loss": 0.0094, "step": 372690 }, { "epoch": 144.74, "learning_rate": 7.016181229773464e-07, "loss": 0.0434, "step": 372700 }, { "epoch": 144.74, "learning_rate": 7.011003236245956e-07, "loss": 0.1929, "step": 372710 }, { "epoch": 144.75, "learning_rate": 7.005825242718448e-07, "loss": 0.0001, "step": 372720 }, { "epoch": 144.75, "learning_rate": 7.00064724919094e-07, "loss": 0.0534, "step": 372730 }, { "epoch": 144.75, "learning_rate": 6.995469255663432e-07, "loss": 0.0132, "step": 372740 }, { "epoch": 144.76, "learning_rate": 6.990291262135923e-07, "loss": 0.0222, "step": 372750 }, { "epoch": 144.76, "learning_rate": 6.985113268608415e-07, "loss": 0.0002, "step": 372760 }, { "epoch": 144.77, "learning_rate": 6.979935275080907e-07, "loss": 0.1041, "step": 372770 }, { "epoch": 144.77, "learning_rate": 6.974757281553398e-07, "loss": 0.0334, "step": 372780 }, { "epoch": 144.77, "learning_rate": 6.96957928802589e-07, "loss": 0.1015, "step": 372790 }, { "epoch": 144.78, "learning_rate": 6.964401294498382e-07, "loss": 0.0002, "step": 372800 }, { "epoch": 144.78, "learning_rate": 6.959223300970874e-07, "loss": 0.0466, "step": 372810 }, { "epoch": 144.78, "learning_rate": 6.954045307443366e-07, "loss": 0.0547, "step": 372820 }, { "epoch": 144.79, "learning_rate": 6.948867313915858e-07, "loss": 0.0457, "step": 372830 }, { "epoch": 144.79, "learning_rate": 6.94368932038835e-07, "loss": 0.0903, "step": 372840 }, { "epoch": 144.8, "learning_rate": 6.938511326860842e-07, "loss": 0.0627, "step": 372850 }, { "epoch": 144.8, "learning_rate": 6.933333333333334e-07, "loss": 0.0544, "step": 372860 }, { "epoch": 144.8, "learning_rate": 6.928155339805826e-07, "loss": 0.0032, "step": 372870 }, { "epoch": 144.81, "learning_rate": 6.922977346278317e-07, "loss": 0.0001, "step": 372880 }, { "epoch": 144.81, "learning_rate": 6.917799352750809e-07, "loss": 0.0333, "step": 372890 }, { "epoch": 144.82, "learning_rate": 6.912621359223301e-07, "loss": 0.0916, "step": 372900 }, { "epoch": 144.82, "learning_rate": 6.907443365695793e-07, "loss": 0.0563, "step": 372910 }, { "epoch": 144.82, "learning_rate": 6.902265372168285e-07, "loss": 0.0567, "step": 372920 }, { "epoch": 144.83, "learning_rate": 6.897087378640777e-07, "loss": 0.0971, "step": 372930 }, { "epoch": 144.83, "learning_rate": 6.89190938511327e-07, "loss": 0.0342, "step": 372940 }, { "epoch": 144.83, "learning_rate": 6.886731391585762e-07, "loss": 0.0456, "step": 372950 }, { "epoch": 144.84, "learning_rate": 6.881553398058253e-07, "loss": 0.0187, "step": 372960 }, { "epoch": 144.84, "learning_rate": 6.876375404530745e-07, "loss": 0.0704, "step": 372970 }, { "epoch": 144.85, "learning_rate": 6.871197411003237e-07, "loss": 0.0108, "step": 372980 }, { "epoch": 144.85, "learning_rate": 6.866019417475729e-07, "loss": 0.0083, "step": 372990 }, { "epoch": 144.85, "learning_rate": 6.860841423948221e-07, "loss": 0.0341, "step": 373000 }, { "epoch": 144.86, "learning_rate": 6.855663430420713e-07, "loss": 0.0295, "step": 373010 }, { "epoch": 144.86, "learning_rate": 6.850485436893205e-07, "loss": 0.0107, "step": 373020 }, { "epoch": 144.87, "learning_rate": 6.845307443365697e-07, "loss": 0.0502, "step": 373030 }, { "epoch": 144.87, "learning_rate": 6.840129449838188e-07, "loss": 0.0004, "step": 373040 }, { "epoch": 144.87, "learning_rate": 6.83495145631068e-07, "loss": 0.0169, "step": 373050 }, { "epoch": 144.88, "learning_rate": 6.829773462783172e-07, "loss": 0.0465, "step": 373060 }, { "epoch": 144.88, "learning_rate": 6.824595469255664e-07, "loss": 0.0095, "step": 373070 }, { "epoch": 144.89, "learning_rate": 6.819417475728156e-07, "loss": 0.0006, "step": 373080 }, { "epoch": 144.89, "learning_rate": 6.814239482200648e-07, "loss": 0.0293, "step": 373090 }, { "epoch": 144.89, "learning_rate": 6.80906148867314e-07, "loss": 0.0177, "step": 373100 }, { "epoch": 144.9, "learning_rate": 6.803883495145632e-07, "loss": 0.009, "step": 373110 }, { "epoch": 144.9, "learning_rate": 6.798705501618124e-07, "loss": 0.0256, "step": 373120 }, { "epoch": 144.9, "learning_rate": 6.793527508090615e-07, "loss": 0.0015, "step": 373130 }, { "epoch": 144.91, "learning_rate": 6.788349514563107e-07, "loss": 0.0004, "step": 373140 }, { "epoch": 144.91, "learning_rate": 6.783171521035599e-07, "loss": 0.0001, "step": 373150 }, { "epoch": 144.92, "learning_rate": 6.77799352750809e-07, "loss": 0.0001, "step": 373160 }, { "epoch": 144.92, "learning_rate": 6.772815533980582e-07, "loss": 0.0689, "step": 373170 }, { "epoch": 144.92, "learning_rate": 6.767637540453074e-07, "loss": 0.1632, "step": 373180 }, { "epoch": 144.93, "learning_rate": 6.762459546925566e-07, "loss": 0.0231, "step": 373190 }, { "epoch": 144.93, "learning_rate": 6.757281553398058e-07, "loss": 0.0461, "step": 373200 }, { "epoch": 144.94, "learning_rate": 6.75210355987055e-07, "loss": 0.0329, "step": 373210 }, { "epoch": 144.94, "learning_rate": 6.746925566343043e-07, "loss": 0.0545, "step": 373220 }, { "epoch": 144.94, "learning_rate": 6.741747572815535e-07, "loss": 0.0377, "step": 373230 }, { "epoch": 144.95, "learning_rate": 6.736569579288027e-07, "loss": 0.0679, "step": 373240 }, { "epoch": 144.95, "learning_rate": 6.731391585760519e-07, "loss": 0.0012, "step": 373250 }, { "epoch": 144.96, "learning_rate": 6.726213592233011e-07, "loss": 0.001, "step": 373260 }, { "epoch": 144.96, "learning_rate": 6.721035598705503e-07, "loss": 0.0119, "step": 373270 }, { "epoch": 144.96, "learning_rate": 6.715857605177995e-07, "loss": 0.0305, "step": 373280 }, { "epoch": 144.97, "learning_rate": 6.710679611650487e-07, "loss": 0.1069, "step": 373290 }, { "epoch": 144.97, "learning_rate": 6.705501618122979e-07, "loss": 0.0665, "step": 373300 }, { "epoch": 144.97, "learning_rate": 6.70032362459547e-07, "loss": 0.0399, "step": 373310 }, { "epoch": 144.98, "learning_rate": 6.695145631067961e-07, "loss": 0.0485, "step": 373320 }, { "epoch": 144.98, "learning_rate": 6.689967637540453e-07, "loss": 0.009, "step": 373330 }, { "epoch": 144.99, "learning_rate": 6.684789644012945e-07, "loss": 0.0336, "step": 373340 }, { "epoch": 144.99, "learning_rate": 6.679611650485437e-07, "loss": 0.0221, "step": 373350 }, { "epoch": 144.99, "learning_rate": 6.674433656957929e-07, "loss": 0.0612, "step": 373360 }, { "epoch": 145.0, "learning_rate": 6.669255663430421e-07, "loss": 0.0014, "step": 373370 }, { "epoch": 145.0, "eval_accuracy": 0.9535075653370014, "eval_loss": 0.3874504864215851, "eval_runtime": 8.208, "eval_samples_per_second": 442.86, "eval_steps_per_second": 55.434, "step": 373375 }, { "epoch": 145.0, "learning_rate": 6.664077669902913e-07, "loss": 0.0255, "step": 373380 }, { "epoch": 145.01, "learning_rate": 6.658899676375405e-07, "loss": 0.0592, "step": 373390 }, { "epoch": 145.01, "learning_rate": 6.653721682847897e-07, "loss": 0.009, "step": 373400 }, { "epoch": 145.01, "learning_rate": 6.648543689320389e-07, "loss": 0.0057, "step": 373410 }, { "epoch": 145.02, "learning_rate": 6.643365695792881e-07, "loss": 0.0255, "step": 373420 }, { "epoch": 145.02, "learning_rate": 6.638187702265373e-07, "loss": 0.1079, "step": 373430 }, { "epoch": 145.03, "learning_rate": 6.633009708737865e-07, "loss": 0.0414, "step": 373440 }, { "epoch": 145.03, "learning_rate": 6.627831715210356e-07, "loss": 0.0142, "step": 373450 }, { "epoch": 145.03, "learning_rate": 6.622653721682848e-07, "loss": 0.0777, "step": 373460 }, { "epoch": 145.04, "learning_rate": 6.61747572815534e-07, "loss": 0.0102, "step": 373470 }, { "epoch": 145.04, "learning_rate": 6.612297734627831e-07, "loss": 0.0446, "step": 373480 }, { "epoch": 145.04, "learning_rate": 6.607119741100323e-07, "loss": 0.0009, "step": 373490 }, { "epoch": 145.05, "learning_rate": 6.601941747572816e-07, "loss": 0.086, "step": 373500 }, { "epoch": 145.05, "learning_rate": 6.596763754045308e-07, "loss": 0.0592, "step": 373510 }, { "epoch": 145.06, "learning_rate": 6.5915857605178e-07, "loss": 0.0039, "step": 373520 }, { "epoch": 145.06, "learning_rate": 6.586407766990292e-07, "loss": 0.0204, "step": 373530 }, { "epoch": 145.06, "learning_rate": 6.581229773462784e-07, "loss": 0.036, "step": 373540 }, { "epoch": 145.07, "learning_rate": 6.576051779935276e-07, "loss": 0.0024, "step": 373550 }, { "epoch": 145.07, "learning_rate": 6.570873786407768e-07, "loss": 0.0208, "step": 373560 }, { "epoch": 145.08, "learning_rate": 6.56569579288026e-07, "loss": 0.0087, "step": 373570 }, { "epoch": 145.08, "learning_rate": 6.560517799352752e-07, "loss": 0.0714, "step": 373580 }, { "epoch": 145.08, "learning_rate": 6.555339805825244e-07, "loss": 0.0212, "step": 373590 }, { "epoch": 145.09, "learning_rate": 6.550161812297736e-07, "loss": 0.0423, "step": 373600 }, { "epoch": 145.09, "learning_rate": 6.544983818770227e-07, "loss": 0.0537, "step": 373610 }, { "epoch": 145.1, "learning_rate": 6.539805825242719e-07, "loss": 0.0002, "step": 373620 }, { "epoch": 145.1, "learning_rate": 6.534627831715211e-07, "loss": 0.2241, "step": 373630 }, { "epoch": 145.1, "learning_rate": 6.529449838187703e-07, "loss": 0.0084, "step": 373640 }, { "epoch": 145.11, "learning_rate": 6.524271844660195e-07, "loss": 0.0247, "step": 373650 }, { "epoch": 145.11, "learning_rate": 6.519093851132686e-07, "loss": 0.1509, "step": 373660 }, { "epoch": 145.11, "learning_rate": 6.513915857605178e-07, "loss": 0.0991, "step": 373670 }, { "epoch": 145.12, "learning_rate": 6.50873786407767e-07, "loss": 0.01, "step": 373680 }, { "epoch": 145.12, "learning_rate": 6.503559870550162e-07, "loss": 0.0704, "step": 373690 }, { "epoch": 145.13, "learning_rate": 6.498381877022654e-07, "loss": 0.0017, "step": 373700 }, { "epoch": 145.13, "learning_rate": 6.493203883495146e-07, "loss": 0.071, "step": 373710 }, { "epoch": 145.13, "learning_rate": 6.488025889967638e-07, "loss": 0.0301, "step": 373720 }, { "epoch": 145.14, "learning_rate": 6.482847896440129e-07, "loss": 0.0067, "step": 373730 }, { "epoch": 145.14, "learning_rate": 6.477669902912621e-07, "loss": 0.1211, "step": 373740 }, { "epoch": 145.15, "learning_rate": 6.472491909385113e-07, "loss": 0.0202, "step": 373750 }, { "epoch": 145.15, "learning_rate": 6.467313915857605e-07, "loss": 0.0104, "step": 373760 }, { "epoch": 145.15, "learning_rate": 6.462135922330097e-07, "loss": 0.038, "step": 373770 }, { "epoch": 145.16, "learning_rate": 6.45695792880259e-07, "loss": 0.023, "step": 373780 }, { "epoch": 145.16, "learning_rate": 6.451779935275082e-07, "loss": 0.0811, "step": 373790 }, { "epoch": 145.17, "learning_rate": 6.446601941747574e-07, "loss": 0.0553, "step": 373800 }, { "epoch": 145.17, "learning_rate": 6.441423948220066e-07, "loss": 0.0002, "step": 373810 }, { "epoch": 145.17, "learning_rate": 6.436245954692558e-07, "loss": 0.0015, "step": 373820 }, { "epoch": 145.18, "learning_rate": 6.43106796116505e-07, "loss": 0.0292, "step": 373830 }, { "epoch": 145.18, "learning_rate": 6.425889967637541e-07, "loss": 0.0159, "step": 373840 }, { "epoch": 145.18, "learning_rate": 6.420711974110033e-07, "loss": 0.0059, "step": 373850 }, { "epoch": 145.19, "learning_rate": 6.415533980582525e-07, "loss": 0.0004, "step": 373860 }, { "epoch": 145.19, "learning_rate": 6.410355987055017e-07, "loss": 0.0383, "step": 373870 }, { "epoch": 145.2, "learning_rate": 6.405177993527509e-07, "loss": 0.0069, "step": 373880 }, { "epoch": 145.2, "learning_rate": 6.4e-07, "loss": 0.0514, "step": 373890 }, { "epoch": 145.2, "learning_rate": 6.394822006472492e-07, "loss": 0.0154, "step": 373900 }, { "epoch": 145.21, "learning_rate": 6.389644012944984e-07, "loss": 0.0086, "step": 373910 }, { "epoch": 145.21, "learning_rate": 6.384466019417476e-07, "loss": 0.0167, "step": 373920 }, { "epoch": 145.22, "learning_rate": 6.379288025889968e-07, "loss": 0.0347, "step": 373930 }, { "epoch": 145.22, "learning_rate": 6.37411003236246e-07, "loss": 0.0175, "step": 373940 }, { "epoch": 145.22, "learning_rate": 6.368932038834952e-07, "loss": 0.0495, "step": 373950 }, { "epoch": 145.23, "learning_rate": 6.363754045307444e-07, "loss": 0.0001, "step": 373960 }, { "epoch": 145.23, "learning_rate": 6.358576051779936e-07, "loss": 0.026, "step": 373970 }, { "epoch": 145.23, "learning_rate": 6.353398058252428e-07, "loss": 0.0088, "step": 373980 }, { "epoch": 145.24, "learning_rate": 6.34822006472492e-07, "loss": 0.042, "step": 373990 }, { "epoch": 145.24, "learning_rate": 6.343042071197412e-07, "loss": 0.0029, "step": 374000 }, { "epoch": 145.25, "learning_rate": 6.337864077669903e-07, "loss": 0.0821, "step": 374010 }, { "epoch": 145.25, "learning_rate": 6.332686084142394e-07, "loss": 0.0001, "step": 374020 }, { "epoch": 145.25, "learning_rate": 6.327508090614886e-07, "loss": 0.0016, "step": 374030 }, { "epoch": 145.26, "learning_rate": 6.322330097087378e-07, "loss": 0.0804, "step": 374040 }, { "epoch": 145.26, "learning_rate": 6.31715210355987e-07, "loss": 0.0493, "step": 374050 }, { "epoch": 145.27, "learning_rate": 6.311974110032363e-07, "loss": 0.0006, "step": 374060 }, { "epoch": 145.27, "learning_rate": 6.306796116504855e-07, "loss": 0.0974, "step": 374070 }, { "epoch": 145.27, "learning_rate": 6.301618122977347e-07, "loss": 0.0006, "step": 374080 }, { "epoch": 145.28, "learning_rate": 6.296440129449839e-07, "loss": 0.0043, "step": 374090 }, { "epoch": 145.28, "learning_rate": 6.291262135922331e-07, "loss": 0.0465, "step": 374100 }, { "epoch": 145.29, "learning_rate": 6.286084142394823e-07, "loss": 0.0007, "step": 374110 }, { "epoch": 145.29, "learning_rate": 6.280906148867315e-07, "loss": 0.0005, "step": 374120 }, { "epoch": 145.29, "learning_rate": 6.275728155339807e-07, "loss": 0.0134, "step": 374130 }, { "epoch": 145.3, "learning_rate": 6.270550161812299e-07, "loss": 0.1454, "step": 374140 }, { "epoch": 145.3, "learning_rate": 6.265372168284791e-07, "loss": 0.0277, "step": 374150 }, { "epoch": 145.3, "learning_rate": 6.260194174757283e-07, "loss": 0.1187, "step": 374160 }, { "epoch": 145.31, "learning_rate": 6.255016181229775e-07, "loss": 0.0145, "step": 374170 }, { "epoch": 145.31, "learning_rate": 6.249838187702265e-07, "loss": 0.0357, "step": 374180 }, { "epoch": 145.32, "learning_rate": 6.244660194174757e-07, "loss": 0.109, "step": 374190 }, { "epoch": 145.32, "learning_rate": 6.239482200647249e-07, "loss": 0.0336, "step": 374200 }, { "epoch": 145.32, "learning_rate": 6.234304207119741e-07, "loss": 0.0232, "step": 374210 }, { "epoch": 145.33, "learning_rate": 6.229126213592233e-07, "loss": 0.0084, "step": 374220 }, { "epoch": 145.33, "learning_rate": 6.223948220064725e-07, "loss": 0.0002, "step": 374230 }, { "epoch": 145.34, "learning_rate": 6.218770226537217e-07, "loss": 0.0001, "step": 374240 }, { "epoch": 145.34, "learning_rate": 6.213592233009709e-07, "loss": 0.0433, "step": 374250 }, { "epoch": 145.34, "learning_rate": 6.208414239482201e-07, "loss": 0.075, "step": 374260 }, { "epoch": 145.35, "learning_rate": 6.203236245954693e-07, "loss": 0.0088, "step": 374270 }, { "epoch": 145.35, "learning_rate": 6.198058252427185e-07, "loss": 0.0065, "step": 374280 }, { "epoch": 145.36, "learning_rate": 6.192880258899677e-07, "loss": 0.0258, "step": 374290 }, { "epoch": 145.36, "learning_rate": 6.187702265372169e-07, "loss": 0.0819, "step": 374300 }, { "epoch": 145.36, "learning_rate": 6.182524271844661e-07, "loss": 0.0661, "step": 374310 }, { "epoch": 145.37, "learning_rate": 6.177346278317153e-07, "loss": 0.0246, "step": 374320 }, { "epoch": 145.37, "learning_rate": 6.172168284789645e-07, "loss": 0.1516, "step": 374330 }, { "epoch": 145.37, "learning_rate": 6.166990291262137e-07, "loss": 0.0019, "step": 374340 }, { "epoch": 145.38, "learning_rate": 6.161812297734629e-07, "loss": 0.0123, "step": 374350 }, { "epoch": 145.38, "learning_rate": 6.15663430420712e-07, "loss": 0.0515, "step": 374360 }, { "epoch": 145.39, "learning_rate": 6.151456310679612e-07, "loss": 0.0118, "step": 374370 }, { "epoch": 145.39, "learning_rate": 6.146278317152103e-07, "loss": 0.008, "step": 374380 }, { "epoch": 145.39, "learning_rate": 6.141100323624595e-07, "loss": 0.0081, "step": 374390 }, { "epoch": 145.4, "learning_rate": 6.135922330097087e-07, "loss": 0.0519, "step": 374400 }, { "epoch": 145.4, "learning_rate": 6.13074433656958e-07, "loss": 0.036, "step": 374410 }, { "epoch": 145.41, "learning_rate": 6.125566343042072e-07, "loss": 0.0655, "step": 374420 }, { "epoch": 145.41, "learning_rate": 6.120388349514564e-07, "loss": 0.004, "step": 374430 }, { "epoch": 145.41, "learning_rate": 6.115210355987056e-07, "loss": 0.0135, "step": 374440 }, { "epoch": 145.42, "learning_rate": 6.110032362459548e-07, "loss": 0.0002, "step": 374450 }, { "epoch": 145.42, "learning_rate": 6.104854368932039e-07, "loss": 0.0452, "step": 374460 }, { "epoch": 145.43, "learning_rate": 6.099676375404531e-07, "loss": 0.0032, "step": 374470 }, { "epoch": 145.43, "learning_rate": 6.094498381877023e-07, "loss": 0.0001, "step": 374480 }, { "epoch": 145.43, "learning_rate": 6.089320388349515e-07, "loss": 0.0493, "step": 374490 }, { "epoch": 145.44, "learning_rate": 6.084142394822007e-07, "loss": 0.0873, "step": 374500 }, { "epoch": 145.44, "learning_rate": 6.078964401294499e-07, "loss": 0.0234, "step": 374510 }, { "epoch": 145.44, "learning_rate": 6.073786407766991e-07, "loss": 0.0005, "step": 374520 }, { "epoch": 145.45, "learning_rate": 6.068608414239483e-07, "loss": 0.0825, "step": 374530 }, { "epoch": 145.45, "learning_rate": 6.063430420711974e-07, "loss": 0.0032, "step": 374540 }, { "epoch": 145.46, "learning_rate": 6.058252427184466e-07, "loss": 0.0213, "step": 374550 }, { "epoch": 145.46, "learning_rate": 6.053074433656958e-07, "loss": 0.033, "step": 374560 }, { "epoch": 145.46, "learning_rate": 6.04789644012945e-07, "loss": 0.0083, "step": 374570 }, { "epoch": 145.47, "learning_rate": 6.042718446601942e-07, "loss": 0.0314, "step": 374580 }, { "epoch": 145.47, "learning_rate": 6.037540453074434e-07, "loss": 0.0186, "step": 374590 }, { "epoch": 145.48, "learning_rate": 6.032362459546926e-07, "loss": 0.0002, "step": 374600 }, { "epoch": 145.48, "learning_rate": 6.027184466019418e-07, "loss": 0.0307, "step": 374610 }, { "epoch": 145.48, "learning_rate": 6.02200647249191e-07, "loss": 0.0003, "step": 374620 }, { "epoch": 145.49, "learning_rate": 6.016828478964402e-07, "loss": 0.0017, "step": 374630 }, { "epoch": 145.49, "learning_rate": 6.011650485436894e-07, "loss": 0.0233, "step": 374640 }, { "epoch": 145.5, "learning_rate": 6.006472491909386e-07, "loss": 0.0142, "step": 374650 }, { "epoch": 145.5, "learning_rate": 6.001294498381878e-07, "loss": 0.0086, "step": 374660 }, { "epoch": 145.5, "learning_rate": 5.996116504854369e-07, "loss": 0.0007, "step": 374670 }, { "epoch": 145.51, "learning_rate": 5.990938511326861e-07, "loss": 0.0965, "step": 374680 }, { "epoch": 145.51, "learning_rate": 5.985760517799354e-07, "loss": 0.0281, "step": 374690 }, { "epoch": 145.51, "learning_rate": 5.980582524271846e-07, "loss": 0.0154, "step": 374700 }, { "epoch": 145.52, "learning_rate": 5.975404530744338e-07, "loss": 0.0093, "step": 374710 }, { "epoch": 145.52, "learning_rate": 5.97022653721683e-07, "loss": 0.099, "step": 374720 }, { "epoch": 145.53, "learning_rate": 5.965048543689321e-07, "loss": 0.0241, "step": 374730 }, { "epoch": 145.53, "learning_rate": 5.959870550161813e-07, "loss": 0.0072, "step": 374740 }, { "epoch": 145.53, "learning_rate": 5.954692556634304e-07, "loss": 0.0507, "step": 374750 }, { "epoch": 145.54, "learning_rate": 5.949514563106796e-07, "loss": 0.0125, "step": 374760 }, { "epoch": 145.54, "learning_rate": 5.944336569579288e-07, "loss": 0.0874, "step": 374770 }, { "epoch": 145.55, "learning_rate": 5.93915857605178e-07, "loss": 0.0591, "step": 374780 }, { "epoch": 145.55, "learning_rate": 5.933980582524272e-07, "loss": 0.01, "step": 374790 }, { "epoch": 145.55, "learning_rate": 5.928802588996764e-07, "loss": 0.0833, "step": 374800 }, { "epoch": 145.56, "learning_rate": 5.923624595469256e-07, "loss": 0.0219, "step": 374810 }, { "epoch": 145.56, "learning_rate": 5.918446601941748e-07, "loss": 0.0874, "step": 374820 }, { "epoch": 145.57, "learning_rate": 5.91326860841424e-07, "loss": 0.0163, "step": 374830 }, { "epoch": 145.57, "learning_rate": 5.908090614886732e-07, "loss": 0.0781, "step": 374840 }, { "epoch": 145.57, "learning_rate": 5.902912621359224e-07, "loss": 0.1512, "step": 374850 }, { "epoch": 145.58, "learning_rate": 5.897734627831716e-07, "loss": 0.0394, "step": 374860 }, { "epoch": 145.58, "learning_rate": 5.892556634304208e-07, "loss": 0.0497, "step": 374870 }, { "epoch": 145.58, "learning_rate": 5.8873786407767e-07, "loss": 0.0877, "step": 374880 }, { "epoch": 145.59, "learning_rate": 5.882200647249191e-07, "loss": 0.0005, "step": 374890 }, { "epoch": 145.59, "learning_rate": 5.877022653721683e-07, "loss": 0.1208, "step": 374900 }, { "epoch": 145.6, "learning_rate": 5.871844660194175e-07, "loss": 0.0576, "step": 374910 }, { "epoch": 145.6, "learning_rate": 5.866666666666667e-07, "loss": 0.0138, "step": 374920 }, { "epoch": 145.6, "learning_rate": 5.861488673139159e-07, "loss": 0.0004, "step": 374930 }, { "epoch": 145.61, "learning_rate": 5.856310679611651e-07, "loss": 0.0001, "step": 374940 }, { "epoch": 145.61, "learning_rate": 5.851132686084142e-07, "loss": 0.06, "step": 374950 }, { "epoch": 145.62, "learning_rate": 5.845954692556634e-07, "loss": 0.0735, "step": 374960 }, { "epoch": 145.62, "learning_rate": 5.840776699029127e-07, "loss": 0.0261, "step": 374970 }, { "epoch": 145.62, "learning_rate": 5.835598705501619e-07, "loss": 0.0102, "step": 374980 }, { "epoch": 145.63, "learning_rate": 5.830420711974111e-07, "loss": 0.0093, "step": 374990 }, { "epoch": 145.63, "learning_rate": 5.825242718446603e-07, "loss": 0.0378, "step": 375000 }, { "epoch": 145.63, "learning_rate": 5.820064724919095e-07, "loss": 0.0002, "step": 375010 }, { "epoch": 145.64, "learning_rate": 5.814886731391587e-07, "loss": 0.0032, "step": 375020 }, { "epoch": 145.64, "learning_rate": 5.809708737864078e-07, "loss": 0.0543, "step": 375030 }, { "epoch": 145.65, "learning_rate": 5.80453074433657e-07, "loss": 0.036, "step": 375040 }, { "epoch": 145.65, "learning_rate": 5.799352750809062e-07, "loss": 0.0544, "step": 375050 }, { "epoch": 145.65, "learning_rate": 5.794174757281553e-07, "loss": 0.0365, "step": 375060 }, { "epoch": 145.66, "learning_rate": 5.788996763754045e-07, "loss": 0.0007, "step": 375070 }, { "epoch": 145.66, "learning_rate": 5.783818770226537e-07, "loss": 0.0747, "step": 375080 }, { "epoch": 145.67, "learning_rate": 5.778640776699029e-07, "loss": 0.0001, "step": 375090 }, { "epoch": 145.67, "learning_rate": 5.773462783171521e-07, "loss": 0.0361, "step": 375100 }, { "epoch": 145.67, "learning_rate": 5.768284789644014e-07, "loss": 0.0003, "step": 375110 }, { "epoch": 145.68, "learning_rate": 5.763106796116505e-07, "loss": 0.0048, "step": 375120 }, { "epoch": 145.68, "learning_rate": 5.757928802588997e-07, "loss": 0.0236, "step": 375130 }, { "epoch": 145.69, "learning_rate": 5.752750809061489e-07, "loss": 0.039, "step": 375140 }, { "epoch": 145.69, "learning_rate": 5.747572815533981e-07, "loss": 0.0165, "step": 375150 }, { "epoch": 145.69, "learning_rate": 5.742394822006473e-07, "loss": 0.0789, "step": 375160 }, { "epoch": 145.7, "learning_rate": 5.737216828478965e-07, "loss": 0.0539, "step": 375170 }, { "epoch": 145.7, "learning_rate": 5.732038834951457e-07, "loss": 0.1107, "step": 375180 }, { "epoch": 145.7, "learning_rate": 5.726860841423949e-07, "loss": 0.0001, "step": 375190 }, { "epoch": 145.71, "learning_rate": 5.721682847896441e-07, "loss": 0.0718, "step": 375200 }, { "epoch": 145.71, "learning_rate": 5.716504854368933e-07, "loss": 0.014, "step": 375210 }, { "epoch": 145.72, "learning_rate": 5.711326860841425e-07, "loss": 0.0971, "step": 375220 }, { "epoch": 145.72, "learning_rate": 5.706148867313917e-07, "loss": 0.0102, "step": 375230 }, { "epoch": 145.72, "learning_rate": 5.700970873786407e-07, "loss": 0.1214, "step": 375240 }, { "epoch": 145.73, "learning_rate": 5.6957928802589e-07, "loss": 0.1091, "step": 375250 }, { "epoch": 145.73, "learning_rate": 5.690614886731392e-07, "loss": 0.0497, "step": 375260 }, { "epoch": 145.74, "learning_rate": 5.685436893203884e-07, "loss": 0.0106, "step": 375270 }, { "epoch": 145.74, "learning_rate": 5.680258899676376e-07, "loss": 0.0008, "step": 375280 }, { "epoch": 145.74, "learning_rate": 5.675080906148868e-07, "loss": 0.0022, "step": 375290 }, { "epoch": 145.75, "learning_rate": 5.66990291262136e-07, "loss": 0.0014, "step": 375300 }, { "epoch": 145.75, "learning_rate": 5.664724919093852e-07, "loss": 0.0354, "step": 375310 }, { "epoch": 145.76, "learning_rate": 5.659546925566343e-07, "loss": 0.0323, "step": 375320 }, { "epoch": 145.76, "learning_rate": 5.654368932038835e-07, "loss": 0.0458, "step": 375330 }, { "epoch": 145.76, "learning_rate": 5.649190938511327e-07, "loss": 0.0077, "step": 375340 }, { "epoch": 145.77, "learning_rate": 5.644012944983819e-07, "loss": 0.0699, "step": 375350 }, { "epoch": 145.77, "learning_rate": 5.638834951456311e-07, "loss": 0.0037, "step": 375360 }, { "epoch": 145.77, "learning_rate": 5.633656957928803e-07, "loss": 0.0866, "step": 375370 }, { "epoch": 145.78, "learning_rate": 5.628478964401295e-07, "loss": 0.0072, "step": 375380 }, { "epoch": 145.78, "learning_rate": 5.623300970873788e-07, "loss": 0.0008, "step": 375390 }, { "epoch": 145.79, "learning_rate": 5.618122977346279e-07, "loss": 0.0149, "step": 375400 }, { "epoch": 145.79, "learning_rate": 5.61294498381877e-07, "loss": 0.0286, "step": 375410 }, { "epoch": 145.79, "learning_rate": 5.607766990291262e-07, "loss": 0.018, "step": 375420 }, { "epoch": 145.8, "learning_rate": 5.602588996763754e-07, "loss": 0.1283, "step": 375430 }, { "epoch": 145.8, "learning_rate": 5.597411003236246e-07, "loss": 0.1025, "step": 375440 }, { "epoch": 145.81, "learning_rate": 5.592233009708738e-07, "loss": 0.0678, "step": 375450 }, { "epoch": 145.81, "learning_rate": 5.58705501618123e-07, "loss": 0.0684, "step": 375460 }, { "epoch": 145.81, "learning_rate": 5.581877022653722e-07, "loss": 0.0203, "step": 375470 }, { "epoch": 145.82, "learning_rate": 5.576699029126214e-07, "loss": 0.0669, "step": 375480 }, { "epoch": 145.82, "learning_rate": 5.571521035598706e-07, "loss": 0.0209, "step": 375490 }, { "epoch": 145.83, "learning_rate": 5.566343042071198e-07, "loss": 0.0271, "step": 375500 }, { "epoch": 145.83, "learning_rate": 5.56116504854369e-07, "loss": 0.0493, "step": 375510 }, { "epoch": 145.83, "learning_rate": 5.555987055016181e-07, "loss": 0.0299, "step": 375520 }, { "epoch": 145.84, "learning_rate": 5.550809061488674e-07, "loss": 0.0134, "step": 375530 }, { "epoch": 145.84, "learning_rate": 5.545631067961166e-07, "loss": 0.0003, "step": 375540 }, { "epoch": 145.84, "learning_rate": 5.540453074433658e-07, "loss": 0.0086, "step": 375550 }, { "epoch": 145.85, "learning_rate": 5.53527508090615e-07, "loss": 0.0077, "step": 375560 }, { "epoch": 145.85, "learning_rate": 5.530097087378642e-07, "loss": 0.0919, "step": 375570 }, { "epoch": 145.86, "learning_rate": 5.524919093851134e-07, "loss": 0.111, "step": 375580 }, { "epoch": 145.86, "learning_rate": 5.519741100323626e-07, "loss": 0.0123, "step": 375590 }, { "epoch": 145.86, "learning_rate": 5.514563106796117e-07, "loss": 0.0093, "step": 375600 }, { "epoch": 145.87, "learning_rate": 5.509385113268608e-07, "loss": 0.1048, "step": 375610 }, { "epoch": 145.87, "learning_rate": 5.5042071197411e-07, "loss": 0.0117, "step": 375620 }, { "epoch": 145.88, "learning_rate": 5.499029126213592e-07, "loss": 0.0005, "step": 375630 }, { "epoch": 145.88, "learning_rate": 5.493851132686084e-07, "loss": 0.045, "step": 375640 }, { "epoch": 145.88, "learning_rate": 5.488673139158576e-07, "loss": 0.2384, "step": 375650 }, { "epoch": 145.89, "learning_rate": 5.483495145631068e-07, "loss": 0.0492, "step": 375660 }, { "epoch": 145.89, "learning_rate": 5.478317152103561e-07, "loss": 0.0003, "step": 375670 }, { "epoch": 145.9, "learning_rate": 5.473139158576053e-07, "loss": 0.034, "step": 375680 }, { "epoch": 145.9, "learning_rate": 5.467961165048544e-07, "loss": 0.0001, "step": 375690 }, { "epoch": 145.9, "learning_rate": 5.462783171521036e-07, "loss": 0.0008, "step": 375700 }, { "epoch": 145.91, "learning_rate": 5.457605177993528e-07, "loss": 0.0157, "step": 375710 }, { "epoch": 145.91, "learning_rate": 5.45242718446602e-07, "loss": 0.0306, "step": 375720 }, { "epoch": 145.91, "learning_rate": 5.447249190938512e-07, "loss": 0.0004, "step": 375730 }, { "epoch": 145.92, "learning_rate": 5.442071197411004e-07, "loss": 0.1051, "step": 375740 }, { "epoch": 145.92, "learning_rate": 5.436893203883496e-07, "loss": 0.0285, "step": 375750 }, { "epoch": 145.93, "learning_rate": 5.431715210355988e-07, "loss": 0.1853, "step": 375760 }, { "epoch": 145.93, "learning_rate": 5.42653721682848e-07, "loss": 0.0013, "step": 375770 }, { "epoch": 145.93, "learning_rate": 5.421359223300971e-07, "loss": 0.0168, "step": 375780 }, { "epoch": 145.94, "learning_rate": 5.416181229773463e-07, "loss": 0.0158, "step": 375790 }, { "epoch": 145.94, "learning_rate": 5.411003236245955e-07, "loss": 0.0296, "step": 375800 }, { "epoch": 145.95, "learning_rate": 5.405825242718447e-07, "loss": 0.0146, "step": 375810 }, { "epoch": 145.95, "learning_rate": 5.400647249190939e-07, "loss": 0.0933, "step": 375820 }, { "epoch": 145.95, "learning_rate": 5.395469255663431e-07, "loss": 0.0241, "step": 375830 }, { "epoch": 145.96, "learning_rate": 5.390291262135923e-07, "loss": 0.0226, "step": 375840 }, { "epoch": 145.96, "learning_rate": 5.385113268608415e-07, "loss": 0.025, "step": 375850 }, { "epoch": 145.97, "learning_rate": 5.379935275080907e-07, "loss": 0.0164, "step": 375860 }, { "epoch": 145.97, "learning_rate": 5.374757281553399e-07, "loss": 0.0106, "step": 375870 }, { "epoch": 145.97, "learning_rate": 5.369579288025891e-07, "loss": 0.0064, "step": 375880 }, { "epoch": 145.98, "learning_rate": 5.364401294498382e-07, "loss": 0.0184, "step": 375890 }, { "epoch": 145.98, "learning_rate": 5.359223300970874e-07, "loss": 0.0189, "step": 375900 }, { "epoch": 145.98, "learning_rate": 5.354045307443366e-07, "loss": 0.0129, "step": 375910 }, { "epoch": 145.99, "learning_rate": 5.348867313915858e-07, "loss": 0.0336, "step": 375920 }, { "epoch": 145.99, "learning_rate": 5.34368932038835e-07, "loss": 0.0215, "step": 375930 }, { "epoch": 146.0, "learning_rate": 5.338511326860841e-07, "loss": 0.0196, "step": 375940 }, { "epoch": 146.0, "learning_rate": 5.333333333333335e-07, "loss": 0.0495, "step": 375950 }, { "epoch": 146.0, "eval_accuracy": 0.9535075653370014, "eval_loss": 0.38978180289268494, "eval_runtime": 8.2388, "eval_samples_per_second": 441.202, "eval_steps_per_second": 55.226, "step": 375950 }, { "epoch": 146.0, "learning_rate": 5.328155339805826e-07, "loss": 0.0003, "step": 375960 }, { "epoch": 146.01, "learning_rate": 5.322977346278317e-07, "loss": 0.041, "step": 375970 }, { "epoch": 146.01, "learning_rate": 5.317799352750809e-07, "loss": 0.018, "step": 375980 }, { "epoch": 146.02, "learning_rate": 5.312621359223301e-07, "loss": 0.0001, "step": 375990 }, { "epoch": 146.02, "learning_rate": 5.307443365695793e-07, "loss": 0.0193, "step": 376000 }, { "epoch": 146.02, "learning_rate": 5.302265372168285e-07, "loss": 0.0001, "step": 376010 }, { "epoch": 146.03, "learning_rate": 5.297087378640777e-07, "loss": 0.0089, "step": 376020 }, { "epoch": 146.03, "learning_rate": 5.291909385113269e-07, "loss": 0.0139, "step": 376030 }, { "epoch": 146.03, "learning_rate": 5.286731391585761e-07, "loss": 0.0334, "step": 376040 }, { "epoch": 146.04, "learning_rate": 5.281553398058253e-07, "loss": 0.0003, "step": 376050 }, { "epoch": 146.04, "learning_rate": 5.276375404530745e-07, "loss": 0.0215, "step": 376060 }, { "epoch": 146.05, "learning_rate": 5.271197411003237e-07, "loss": 0.0314, "step": 376070 }, { "epoch": 146.05, "learning_rate": 5.266019417475729e-07, "loss": 0.049, "step": 376080 }, { "epoch": 146.05, "learning_rate": 5.260841423948221e-07, "loss": 0.1313, "step": 376090 }, { "epoch": 146.06, "learning_rate": 5.255663430420713e-07, "loss": 0.0517, "step": 376100 }, { "epoch": 146.06, "learning_rate": 5.250485436893205e-07, "loss": 0.002, "step": 376110 }, { "epoch": 146.07, "learning_rate": 5.245307443365697e-07, "loss": 0.1678, "step": 376120 }, { "epoch": 146.07, "learning_rate": 5.240129449838188e-07, "loss": 0.0, "step": 376130 }, { "epoch": 146.07, "learning_rate": 5.23495145631068e-07, "loss": 0.0351, "step": 376140 }, { "epoch": 146.08, "learning_rate": 5.229773462783172e-07, "loss": 0.0229, "step": 376150 }, { "epoch": 146.08, "learning_rate": 5.224595469255664e-07, "loss": 0.0593, "step": 376160 }, { "epoch": 146.09, "learning_rate": 5.219417475728156e-07, "loss": 0.0844, "step": 376170 }, { "epoch": 146.09, "learning_rate": 5.214239482200647e-07, "loss": 0.0265, "step": 376180 }, { "epoch": 146.09, "learning_rate": 5.209061488673139e-07, "loss": 0.1203, "step": 376190 }, { "epoch": 146.1, "learning_rate": 5.203883495145631e-07, "loss": 0.0258, "step": 376200 }, { "epoch": 146.1, "learning_rate": 5.198705501618123e-07, "loss": 0.021, "step": 376210 }, { "epoch": 146.1, "learning_rate": 5.193527508090615e-07, "loss": 0.0001, "step": 376220 }, { "epoch": 146.11, "learning_rate": 5.188349514563108e-07, "loss": 0.1422, "step": 376230 }, { "epoch": 146.11, "learning_rate": 5.1831715210356e-07, "loss": 0.0332, "step": 376240 }, { "epoch": 146.12, "learning_rate": 5.177993527508092e-07, "loss": 0.1191, "step": 376250 }, { "epoch": 146.12, "learning_rate": 5.172815533980583e-07, "loss": 0.1056, "step": 376260 }, { "epoch": 146.12, "learning_rate": 5.167637540453075e-07, "loss": 0.0178, "step": 376270 }, { "epoch": 146.13, "learning_rate": 5.162459546925567e-07, "loss": 0.0513, "step": 376280 }, { "epoch": 146.13, "learning_rate": 5.157281553398059e-07, "loss": 0.0001, "step": 376290 }, { "epoch": 146.14, "learning_rate": 5.15210355987055e-07, "loss": 0.0085, "step": 376300 }, { "epoch": 146.14, "learning_rate": 5.146925566343042e-07, "loss": 0.0437, "step": 376310 }, { "epoch": 146.14, "learning_rate": 5.141747572815534e-07, "loss": 0.0001, "step": 376320 }, { "epoch": 146.15, "learning_rate": 5.136569579288026e-07, "loss": 0.0397, "step": 376330 }, { "epoch": 146.15, "learning_rate": 5.131391585760518e-07, "loss": 0.0452, "step": 376340 }, { "epoch": 146.16, "learning_rate": 5.12621359223301e-07, "loss": 0.0237, "step": 376350 }, { "epoch": 146.16, "learning_rate": 5.121035598705502e-07, "loss": 0.1014, "step": 376360 }, { "epoch": 146.16, "learning_rate": 5.115857605177994e-07, "loss": 0.074, "step": 376370 }, { "epoch": 146.17, "learning_rate": 5.110679611650486e-07, "loss": 0.0779, "step": 376380 }, { "epoch": 146.17, "learning_rate": 5.105501618122978e-07, "loss": 0.0419, "step": 376390 }, { "epoch": 146.17, "learning_rate": 5.10032362459547e-07, "loss": 0.0412, "step": 376400 }, { "epoch": 146.18, "learning_rate": 5.095145631067962e-07, "loss": 0.0849, "step": 376410 }, { "epoch": 146.18, "learning_rate": 5.089967637540454e-07, "loss": 0.098, "step": 376420 }, { "epoch": 146.19, "learning_rate": 5.084789644012946e-07, "loss": 0.0718, "step": 376430 }, { "epoch": 146.19, "learning_rate": 5.079611650485438e-07, "loss": 0.0872, "step": 376440 }, { "epoch": 146.19, "learning_rate": 5.07443365695793e-07, "loss": 0.0439, "step": 376450 }, { "epoch": 146.2, "learning_rate": 5.069255663430421e-07, "loss": 0.0573, "step": 376460 }, { "epoch": 146.2, "learning_rate": 5.064077669902912e-07, "loss": 0.005, "step": 376470 }, { "epoch": 146.21, "learning_rate": 5.058899676375404e-07, "loss": 0.0298, "step": 376480 }, { "epoch": 146.21, "learning_rate": 5.053721682847896e-07, "loss": 0.0601, "step": 376490 }, { "epoch": 146.21, "learning_rate": 5.048543689320388e-07, "loss": 0.0087, "step": 376500 }, { "epoch": 146.22, "learning_rate": 5.043365695792881e-07, "loss": 0.0262, "step": 376510 }, { "epoch": 146.22, "learning_rate": 5.038187702265373e-07, "loss": 0.0705, "step": 376520 }, { "epoch": 146.23, "learning_rate": 5.033009708737865e-07, "loss": 0.0772, "step": 376530 }, { "epoch": 146.23, "learning_rate": 5.027831715210356e-07, "loss": 0.0276, "step": 376540 }, { "epoch": 146.23, "learning_rate": 5.022653721682848e-07, "loss": 0.0861, "step": 376550 }, { "epoch": 146.24, "learning_rate": 5.01747572815534e-07, "loss": 0.0002, "step": 376560 }, { "epoch": 146.24, "learning_rate": 5.012297734627832e-07, "loss": 0.0489, "step": 376570 }, { "epoch": 146.24, "learning_rate": 5.007119741100324e-07, "loss": 0.0278, "step": 376580 }, { "epoch": 146.25, "learning_rate": 5.001941747572816e-07, "loss": 0.0268, "step": 376590 }, { "epoch": 146.25, "learning_rate": 4.996763754045308e-07, "loss": 0.0242, "step": 376600 }, { "epoch": 146.26, "learning_rate": 4.9915857605178e-07, "loss": 0.01, "step": 376610 }, { "epoch": 146.26, "learning_rate": 4.986407766990292e-07, "loss": 0.2157, "step": 376620 }, { "epoch": 146.26, "learning_rate": 4.981229773462784e-07, "loss": 0.0609, "step": 376630 }, { "epoch": 146.27, "learning_rate": 4.976051779935276e-07, "loss": 0.0952, "step": 376640 }, { "epoch": 146.27, "learning_rate": 4.970873786407767e-07, "loss": 0.0889, "step": 376650 }, { "epoch": 146.28, "learning_rate": 4.965695792880259e-07, "loss": 0.0002, "step": 376660 }, { "epoch": 146.28, "learning_rate": 4.960517799352751e-07, "loss": 0.0665, "step": 376670 }, { "epoch": 146.28, "learning_rate": 4.955339805825243e-07, "loss": 0.0453, "step": 376680 }, { "epoch": 146.29, "learning_rate": 4.950161812297735e-07, "loss": 0.0643, "step": 376690 }, { "epoch": 146.29, "learning_rate": 4.944983818770227e-07, "loss": 0.111, "step": 376700 }, { "epoch": 146.3, "learning_rate": 4.939805825242719e-07, "loss": 0.0116, "step": 376710 }, { "epoch": 146.3, "learning_rate": 4.934627831715211e-07, "loss": 0.0577, "step": 376720 }, { "epoch": 146.3, "learning_rate": 4.929449838187703e-07, "loss": 0.0488, "step": 376730 }, { "epoch": 146.31, "learning_rate": 4.924271844660195e-07, "loss": 0.0085, "step": 376740 }, { "epoch": 146.31, "learning_rate": 4.919093851132686e-07, "loss": 0.0008, "step": 376750 }, { "epoch": 146.31, "learning_rate": 4.913915857605178e-07, "loss": 0.1489, "step": 376760 }, { "epoch": 146.32, "learning_rate": 4.90873786407767e-07, "loss": 0.0416, "step": 376770 }, { "epoch": 146.32, "learning_rate": 4.903559870550162e-07, "loss": 0.0421, "step": 376780 }, { "epoch": 146.33, "learning_rate": 4.898381877022655e-07, "loss": 0.0172, "step": 376790 }, { "epoch": 146.33, "learning_rate": 4.893203883495147e-07, "loss": 0.0004, "step": 376800 }, { "epoch": 146.33, "learning_rate": 4.888025889967639e-07, "loss": 0.0339, "step": 376810 }, { "epoch": 146.34, "learning_rate": 4.882847896440131e-07, "loss": 0.0585, "step": 376820 }, { "epoch": 146.34, "learning_rate": 4.877669902912621e-07, "loss": 0.0006, "step": 376830 }, { "epoch": 146.35, "learning_rate": 4.872491909385113e-07, "loss": 0.0847, "step": 376840 }, { "epoch": 146.35, "learning_rate": 4.867313915857605e-07, "loss": 0.1234, "step": 376850 }, { "epoch": 146.35, "learning_rate": 4.862135922330097e-07, "loss": 0.0388, "step": 376860 }, { "epoch": 146.36, "learning_rate": 4.856957928802589e-07, "loss": 0.0077, "step": 376870 }, { "epoch": 146.36, "learning_rate": 4.851779935275081e-07, "loss": 0.1148, "step": 376880 }, { "epoch": 146.37, "learning_rate": 4.846601941747573e-07, "loss": 0.0001, "step": 376890 }, { "epoch": 146.37, "learning_rate": 4.841423948220065e-07, "loss": 0.0855, "step": 376900 }, { "epoch": 146.37, "learning_rate": 4.836245954692557e-07, "loss": 0.0022, "step": 376910 }, { "epoch": 146.38, "learning_rate": 4.831067961165049e-07, "loss": 0.0616, "step": 376920 }, { "epoch": 146.38, "learning_rate": 4.825889967637541e-07, "loss": 0.0231, "step": 376930 }, { "epoch": 146.38, "learning_rate": 4.820711974110033e-07, "loss": 0.0628, "step": 376940 }, { "epoch": 146.39, "learning_rate": 4.815533980582525e-07, "loss": 0.0081, "step": 376950 }, { "epoch": 146.39, "learning_rate": 4.810355987055017e-07, "loss": 0.036, "step": 376960 }, { "epoch": 146.4, "learning_rate": 4.805177993527509e-07, "loss": 0.0001, "step": 376970 }, { "epoch": 146.4, "learning_rate": 4.800000000000001e-07, "loss": 0.0087, "step": 376980 }, { "epoch": 146.4, "learning_rate": 4.794822006472493e-07, "loss": 0.0398, "step": 376990 }, { "epoch": 146.41, "learning_rate": 4.789644012944985e-07, "loss": 0.1106, "step": 377000 }, { "epoch": 146.41, "learning_rate": 4.784466019417476e-07, "loss": 0.0553, "step": 377010 }, { "epoch": 146.42, "learning_rate": 4.779288025889968e-07, "loss": 0.0261, "step": 377020 }, { "epoch": 146.42, "learning_rate": 4.774110032362459e-07, "loss": 0.0627, "step": 377030 }, { "epoch": 146.42, "learning_rate": 4.768932038834951e-07, "loss": 0.0461, "step": 377040 }, { "epoch": 146.43, "learning_rate": 4.7637540453074437e-07, "loss": 0.0635, "step": 377050 }, { "epoch": 146.43, "learning_rate": 4.758576051779935e-07, "loss": 0.0804, "step": 377060 }, { "epoch": 146.43, "learning_rate": 4.753398058252427e-07, "loss": 0.0646, "step": 377070 }, { "epoch": 146.44, "learning_rate": 4.7482200647249196e-07, "loss": 0.0001, "step": 377080 }, { "epoch": 146.44, "learning_rate": 4.7430420711974115e-07, "loss": 0.0073, "step": 377090 }, { "epoch": 146.45, "learning_rate": 4.7378640776699034e-07, "loss": 0.0164, "step": 377100 }, { "epoch": 146.45, "learning_rate": 4.7326860841423954e-07, "loss": 0.0339, "step": 377110 }, { "epoch": 146.45, "learning_rate": 4.7275080906148873e-07, "loss": 0.0402, "step": 377120 }, { "epoch": 146.46, "learning_rate": 4.7223300970873793e-07, "loss": 0.0346, "step": 377130 }, { "epoch": 146.46, "learning_rate": 4.7171521035598707e-07, "loss": 0.0931, "step": 377140 }, { "epoch": 146.47, "learning_rate": 4.7119741100323626e-07, "loss": 0.0054, "step": 377150 }, { "epoch": 146.47, "learning_rate": 4.7067961165048546e-07, "loss": 0.0105, "step": 377160 }, { "epoch": 146.47, "learning_rate": 4.7016181229773465e-07, "loss": 0.0522, "step": 377170 }, { "epoch": 146.48, "learning_rate": 4.6964401294498385e-07, "loss": 0.0039, "step": 377180 }, { "epoch": 146.48, "learning_rate": 4.6912621359223304e-07, "loss": 0.0185, "step": 377190 }, { "epoch": 146.49, "learning_rate": 4.686084142394822e-07, "loss": 0.033, "step": 377200 }, { "epoch": 146.49, "learning_rate": 4.680906148867314e-07, "loss": 0.0182, "step": 377210 }, { "epoch": 146.49, "learning_rate": 4.675728155339806e-07, "loss": 0.0032, "step": 377220 }, { "epoch": 146.5, "learning_rate": 4.670550161812298e-07, "loss": 0.0067, "step": 377230 }, { "epoch": 146.5, "learning_rate": 4.66537216828479e-07, "loss": 0.0725, "step": 377240 }, { "epoch": 146.5, "learning_rate": 4.660194174757282e-07, "loss": 0.0084, "step": 377250 }, { "epoch": 146.51, "learning_rate": 4.655016181229774e-07, "loss": 0.0245, "step": 377260 }, { "epoch": 146.51, "learning_rate": 4.649838187702266e-07, "loss": 0.0006, "step": 377270 }, { "epoch": 146.52, "learning_rate": 4.6446601941747574e-07, "loss": 0.0098, "step": 377280 }, { "epoch": 146.52, "learning_rate": 4.6394822006472493e-07, "loss": 0.0669, "step": 377290 }, { "epoch": 146.52, "learning_rate": 4.6343042071197413e-07, "loss": 0.0773, "step": 377300 }, { "epoch": 146.53, "learning_rate": 4.629126213592233e-07, "loss": 0.015, "step": 377310 }, { "epoch": 146.53, "learning_rate": 4.623948220064725e-07, "loss": 0.022, "step": 377320 }, { "epoch": 146.54, "learning_rate": 4.618770226537217e-07, "loss": 0.0493, "step": 377330 }, { "epoch": 146.54, "learning_rate": 4.613592233009709e-07, "loss": 0.1434, "step": 377340 }, { "epoch": 146.54, "learning_rate": 4.6084142394822005e-07, "loss": 0.0323, "step": 377350 }, { "epoch": 146.55, "learning_rate": 4.6032362459546935e-07, "loss": 0.0091, "step": 377360 }, { "epoch": 146.55, "learning_rate": 4.598058252427185e-07, "loss": 0.0573, "step": 377370 }, { "epoch": 146.56, "learning_rate": 4.592880258899677e-07, "loss": 0.1435, "step": 377380 }, { "epoch": 146.56, "learning_rate": 4.587702265372169e-07, "loss": 0.0747, "step": 377390 }, { "epoch": 146.56, "learning_rate": 4.582524271844661e-07, "loss": 0.0002, "step": 377400 }, { "epoch": 146.57, "learning_rate": 4.5773462783171527e-07, "loss": 0.0288, "step": 377410 }, { "epoch": 146.57, "learning_rate": 4.5721682847896446e-07, "loss": 0.0437, "step": 377420 }, { "epoch": 146.57, "learning_rate": 4.566990291262136e-07, "loss": 0.0587, "step": 377430 }, { "epoch": 146.58, "learning_rate": 4.561812297734628e-07, "loss": 0.0136, "step": 377440 }, { "epoch": 146.58, "learning_rate": 4.55663430420712e-07, "loss": 0.0527, "step": 377450 }, { "epoch": 146.59, "learning_rate": 4.551456310679612e-07, "loss": 0.0001, "step": 377460 }, { "epoch": 146.59, "learning_rate": 4.546278317152104e-07, "loss": 0.0001, "step": 377470 }, { "epoch": 146.59, "learning_rate": 4.541100323624596e-07, "loss": 0.0882, "step": 377480 }, { "epoch": 146.6, "learning_rate": 4.535922330097087e-07, "loss": 0.0419, "step": 377490 }, { "epoch": 146.6, "learning_rate": 4.53074433656958e-07, "loss": 0.0091, "step": 377500 }, { "epoch": 146.61, "learning_rate": 4.5255663430420716e-07, "loss": 0.0531, "step": 377510 }, { "epoch": 146.61, "learning_rate": 4.5203883495145636e-07, "loss": 0.0225, "step": 377520 }, { "epoch": 146.61, "learning_rate": 4.5152103559870555e-07, "loss": 0.0486, "step": 377530 }, { "epoch": 146.62, "learning_rate": 4.5100323624595474e-07, "loss": 0.0011, "step": 377540 }, { "epoch": 146.62, "learning_rate": 4.5048543689320394e-07, "loss": 0.0564, "step": 377550 }, { "epoch": 146.63, "learning_rate": 4.4996763754045313e-07, "loss": 0.0116, "step": 377560 }, { "epoch": 146.63, "learning_rate": 4.494498381877023e-07, "loss": 0.0287, "step": 377570 }, { "epoch": 146.63, "learning_rate": 4.4893203883495147e-07, "loss": 0.0082, "step": 377580 }, { "epoch": 146.64, "learning_rate": 4.4841423948220066e-07, "loss": 0.149, "step": 377590 }, { "epoch": 146.64, "learning_rate": 4.4789644012944986e-07, "loss": 0.0093, "step": 377600 }, { "epoch": 146.64, "learning_rate": 4.4737864077669905e-07, "loss": 0.0003, "step": 377610 }, { "epoch": 146.65, "learning_rate": 4.4686084142394825e-07, "loss": 0.0748, "step": 377620 }, { "epoch": 146.65, "learning_rate": 4.463430420711974e-07, "loss": 0.0252, "step": 377630 }, { "epoch": 146.66, "learning_rate": 4.458252427184467e-07, "loss": 0.0631, "step": 377640 }, { "epoch": 146.66, "learning_rate": 4.4530744336569583e-07, "loss": 0.1011, "step": 377650 }, { "epoch": 146.66, "learning_rate": 4.44789644012945e-07, "loss": 0.2093, "step": 377660 }, { "epoch": 146.67, "learning_rate": 4.442718446601942e-07, "loss": 0.041, "step": 377670 }, { "epoch": 146.67, "learning_rate": 4.437540453074434e-07, "loss": 0.0003, "step": 377680 }, { "epoch": 146.68, "learning_rate": 4.432362459546926e-07, "loss": 0.036, "step": 377690 }, { "epoch": 146.68, "learning_rate": 4.427184466019418e-07, "loss": 0.068, "step": 377700 }, { "epoch": 146.68, "learning_rate": 4.4220064724919095e-07, "loss": 0.0259, "step": 377710 }, { "epoch": 146.69, "learning_rate": 4.4168284789644014e-07, "loss": 0.0977, "step": 377720 }, { "epoch": 146.69, "learning_rate": 4.4116504854368933e-07, "loss": 0.0214, "step": 377730 }, { "epoch": 146.7, "learning_rate": 4.4064724919093853e-07, "loss": 0.0783, "step": 377740 }, { "epoch": 146.7, "learning_rate": 4.401294498381877e-07, "loss": 0.0028, "step": 377750 }, { "epoch": 146.7, "learning_rate": 4.396116504854369e-07, "loss": 0.1073, "step": 377760 }, { "epoch": 146.71, "learning_rate": 4.3909385113268606e-07, "loss": 0.0097, "step": 377770 }, { "epoch": 146.71, "learning_rate": 4.3857605177993536e-07, "loss": 0.0209, "step": 377780 }, { "epoch": 146.71, "learning_rate": 4.380582524271845e-07, "loss": 0.0543, "step": 377790 }, { "epoch": 146.72, "learning_rate": 4.375404530744337e-07, "loss": 0.0965, "step": 377800 }, { "epoch": 146.72, "learning_rate": 4.370226537216829e-07, "loss": 0.1859, "step": 377810 }, { "epoch": 146.73, "learning_rate": 4.365048543689321e-07, "loss": 0.0004, "step": 377820 }, { "epoch": 146.73, "learning_rate": 4.359870550161813e-07, "loss": 0.0001, "step": 377830 }, { "epoch": 146.73, "learning_rate": 4.354692556634305e-07, "loss": 0.0483, "step": 377840 }, { "epoch": 146.74, "learning_rate": 4.3495145631067967e-07, "loss": 0.0604, "step": 377850 }, { "epoch": 146.74, "learning_rate": 4.344336569579288e-07, "loss": 0.009, "step": 377860 }, { "epoch": 146.75, "learning_rate": 4.33915857605178e-07, "loss": 0.0093, "step": 377870 }, { "epoch": 146.75, "learning_rate": 4.333980582524272e-07, "loss": 0.0099, "step": 377880 }, { "epoch": 146.75, "learning_rate": 4.328802588996764e-07, "loss": 0.1267, "step": 377890 }, { "epoch": 146.76, "learning_rate": 4.323624595469256e-07, "loss": 0.0237, "step": 377900 }, { "epoch": 146.76, "learning_rate": 4.318446601941748e-07, "loss": 0.0248, "step": 377910 }, { "epoch": 146.77, "learning_rate": 4.3132686084142403e-07, "loss": 0.0006, "step": 377920 }, { "epoch": 146.77, "learning_rate": 4.308090614886732e-07, "loss": 0.0006, "step": 377930 }, { "epoch": 146.77, "learning_rate": 4.3029126213592237e-07, "loss": 0.0494, "step": 377940 }, { "epoch": 146.78, "learning_rate": 4.2977346278317156e-07, "loss": 0.0174, "step": 377950 }, { "epoch": 146.78, "learning_rate": 4.2925566343042076e-07, "loss": 0.0013, "step": 377960 }, { "epoch": 146.78, "learning_rate": 4.2873786407766995e-07, "loss": 0.091, "step": 377970 }, { "epoch": 146.79, "learning_rate": 4.2822006472491914e-07, "loss": 0.0001, "step": 377980 }, { "epoch": 146.79, "learning_rate": 4.2770226537216834e-07, "loss": 0.0066, "step": 377990 }, { "epoch": 146.8, "learning_rate": 4.271844660194175e-07, "loss": 0.0001, "step": 378000 }, { "epoch": 146.8, "learning_rate": 4.266666666666667e-07, "loss": 0.0412, "step": 378010 }, { "epoch": 146.8, "learning_rate": 4.2614886731391587e-07, "loss": 0.069, "step": 378020 }, { "epoch": 146.81, "learning_rate": 4.2563106796116506e-07, "loss": 0.0087, "step": 378030 }, { "epoch": 146.81, "learning_rate": 4.2511326860841426e-07, "loss": 0.0454, "step": 378040 }, { "epoch": 146.82, "learning_rate": 4.2459546925566345e-07, "loss": 0.0544, "step": 378050 }, { "epoch": 146.82, "learning_rate": 4.240776699029127e-07, "loss": 0.0598, "step": 378060 }, { "epoch": 146.82, "learning_rate": 4.235598705501619e-07, "loss": 0.0148, "step": 378070 }, { "epoch": 146.83, "learning_rate": 4.2304207119741104e-07, "loss": 0.0026, "step": 378080 }, { "epoch": 146.83, "learning_rate": 4.2252427184466023e-07, "loss": 0.0003, "step": 378090 }, { "epoch": 146.83, "learning_rate": 4.220064724919094e-07, "loss": 0.0251, "step": 378100 }, { "epoch": 146.84, "learning_rate": 4.214886731391586e-07, "loss": 0.0013, "step": 378110 }, { "epoch": 146.84, "learning_rate": 4.209708737864078e-07, "loss": 0.0085, "step": 378120 }, { "epoch": 146.85, "learning_rate": 4.20453074433657e-07, "loss": 0.016, "step": 378130 }, { "epoch": 146.85, "learning_rate": 4.1993527508090615e-07, "loss": 0.0831, "step": 378140 }, { "epoch": 146.85, "learning_rate": 4.1941747572815535e-07, "loss": 0.0649, "step": 378150 }, { "epoch": 146.86, "learning_rate": 4.1889967637540454e-07, "loss": 0.017, "step": 378160 }, { "epoch": 146.86, "learning_rate": 4.1838187702265374e-07, "loss": 0.0462, "step": 378170 }, { "epoch": 146.87, "learning_rate": 4.1786407766990293e-07, "loss": 0.0207, "step": 378180 }, { "epoch": 146.87, "learning_rate": 4.173462783171521e-07, "loss": 0.0826, "step": 378190 }, { "epoch": 146.87, "learning_rate": 4.1682847896440137e-07, "loss": 0.0163, "step": 378200 }, { "epoch": 146.88, "learning_rate": 4.1631067961165057e-07, "loss": 0.0362, "step": 378210 }, { "epoch": 146.88, "learning_rate": 4.157928802588997e-07, "loss": 0.1205, "step": 378220 }, { "epoch": 146.89, "learning_rate": 4.152750809061489e-07, "loss": 0.0238, "step": 378230 }, { "epoch": 146.89, "learning_rate": 4.147572815533981e-07, "loss": 0.0003, "step": 378240 }, { "epoch": 146.89, "learning_rate": 4.142394822006473e-07, "loss": 0.0001, "step": 378250 }, { "epoch": 146.9, "learning_rate": 4.137216828478965e-07, "loss": 0.0611, "step": 378260 }, { "epoch": 146.9, "learning_rate": 4.132038834951457e-07, "loss": 0.1566, "step": 378270 }, { "epoch": 146.9, "learning_rate": 4.126860841423948e-07, "loss": 0.2043, "step": 378280 }, { "epoch": 146.91, "learning_rate": 4.12168284789644e-07, "loss": 0.0352, "step": 378290 }, { "epoch": 146.91, "learning_rate": 4.116504854368932e-07, "loss": 0.1212, "step": 378300 }, { "epoch": 146.92, "learning_rate": 4.111326860841424e-07, "loss": 0.0748, "step": 378310 }, { "epoch": 146.92, "learning_rate": 4.106148867313916e-07, "loss": 0.0163, "step": 378320 }, { "epoch": 146.92, "learning_rate": 4.100970873786408e-07, "loss": 0.009, "step": 378330 }, { "epoch": 146.93, "learning_rate": 4.0957928802589004e-07, "loss": 0.1166, "step": 378340 }, { "epoch": 146.93, "learning_rate": 4.0906148867313924e-07, "loss": 0.0412, "step": 378350 }, { "epoch": 146.94, "learning_rate": 4.085436893203884e-07, "loss": 0.0014, "step": 378360 }, { "epoch": 146.94, "learning_rate": 4.0802588996763757e-07, "loss": 0.0269, "step": 378370 }, { "epoch": 146.94, "learning_rate": 4.0750809061488677e-07, "loss": 0.0718, "step": 378380 }, { "epoch": 146.95, "learning_rate": 4.0699029126213596e-07, "loss": 0.1028, "step": 378390 }, { "epoch": 146.95, "learning_rate": 4.0647249190938516e-07, "loss": 0.0003, "step": 378400 }, { "epoch": 146.96, "learning_rate": 4.0595469255663435e-07, "loss": 0.0333, "step": 378410 }, { "epoch": 146.96, "learning_rate": 4.0543689320388355e-07, "loss": 0.0644, "step": 378420 }, { "epoch": 146.96, "learning_rate": 4.049190938511327e-07, "loss": 0.0114, "step": 378430 }, { "epoch": 146.97, "learning_rate": 4.044012944983819e-07, "loss": 0.0736, "step": 378440 }, { "epoch": 146.97, "learning_rate": 4.038834951456311e-07, "loss": 0.0388, "step": 378450 }, { "epoch": 146.97, "learning_rate": 4.0336569579288027e-07, "loss": 0.063, "step": 378460 }, { "epoch": 146.98, "learning_rate": 4.0284789644012946e-07, "loss": 0.0228, "step": 378470 }, { "epoch": 146.98, "learning_rate": 4.023300970873787e-07, "loss": 0.0362, "step": 378480 }, { "epoch": 146.99, "learning_rate": 4.018122977346279e-07, "loss": 0.048, "step": 378490 }, { "epoch": 146.99, "learning_rate": 4.012944983818771e-07, "loss": 0.01, "step": 378500 }, { "epoch": 146.99, "learning_rate": 4.0077669902912624e-07, "loss": 0.0228, "step": 378510 }, { "epoch": 147.0, "learning_rate": 4.0025889967637544e-07, "loss": 0.0006, "step": 378520 }, { "epoch": 147.0, "eval_accuracy": 0.9529573590096286, "eval_loss": 0.3908064663410187, "eval_runtime": 8.3388, "eval_samples_per_second": 435.913, "eval_steps_per_second": 54.564, "step": 378525 }, { "epoch": 147.0, "learning_rate": 3.9974110032362463e-07, "loss": 0.0045, "step": 378530 }, { "epoch": 147.01, "learning_rate": 3.9922330097087383e-07, "loss": 0.011, "step": 378540 }, { "epoch": 147.01, "learning_rate": 3.98705501618123e-07, "loss": 0.1995, "step": 378550 }, { "epoch": 147.01, "learning_rate": 3.981877022653722e-07, "loss": 0.0307, "step": 378560 }, { "epoch": 147.02, "learning_rate": 3.9766990291262136e-07, "loss": 0.1278, "step": 378570 }, { "epoch": 147.02, "learning_rate": 3.9715210355987055e-07, "loss": 0.0226, "step": 378580 }, { "epoch": 147.03, "learning_rate": 3.9663430420711975e-07, "loss": 0.0592, "step": 378590 }, { "epoch": 147.03, "learning_rate": 3.9611650485436894e-07, "loss": 0.0611, "step": 378600 }, { "epoch": 147.03, "learning_rate": 3.9559870550161814e-07, "loss": 0.0165, "step": 378610 }, { "epoch": 147.04, "learning_rate": 3.950809061488674e-07, "loss": 0.1016, "step": 378620 }, { "epoch": 147.04, "learning_rate": 3.945631067961166e-07, "loss": 0.0138, "step": 378630 }, { "epoch": 147.04, "learning_rate": 3.9404530744336577e-07, "loss": 0.1123, "step": 378640 }, { "epoch": 147.05, "learning_rate": 3.935275080906149e-07, "loss": 0.0004, "step": 378650 }, { "epoch": 147.05, "learning_rate": 3.930097087378641e-07, "loss": 0.0002, "step": 378660 }, { "epoch": 147.06, "learning_rate": 3.924919093851133e-07, "loss": 0.0314, "step": 378670 }, { "epoch": 147.06, "learning_rate": 3.919741100323625e-07, "loss": 0.0025, "step": 378680 }, { "epoch": 147.06, "learning_rate": 3.914563106796117e-07, "loss": 0.0007, "step": 378690 }, { "epoch": 147.07, "learning_rate": 3.909385113268609e-07, "loss": 0.0242, "step": 378700 }, { "epoch": 147.07, "learning_rate": 3.9042071197411003e-07, "loss": 0.0309, "step": 378710 }, { "epoch": 147.08, "learning_rate": 3.899029126213592e-07, "loss": 0.0866, "step": 378720 }, { "epoch": 147.08, "learning_rate": 3.893851132686084e-07, "loss": 0.0098, "step": 378730 }, { "epoch": 147.08, "learning_rate": 3.888673139158576e-07, "loss": 0.0084, "step": 378740 }, { "epoch": 147.09, "learning_rate": 3.883495145631068e-07, "loss": 0.1268, "step": 378750 }, { "epoch": 147.09, "learning_rate": 3.8783171521035605e-07, "loss": 0.0004, "step": 378760 }, { "epoch": 147.1, "learning_rate": 3.8731391585760525e-07, "loss": 0.0625, "step": 378770 }, { "epoch": 147.1, "learning_rate": 3.8679611650485444e-07, "loss": 0.0202, "step": 378780 }, { "epoch": 147.1, "learning_rate": 3.862783171521036e-07, "loss": 0.0182, "step": 378790 }, { "epoch": 147.11, "learning_rate": 3.857605177993528e-07, "loss": 0.0414, "step": 378800 }, { "epoch": 147.11, "learning_rate": 3.8524271844660197e-07, "loss": 0.0006, "step": 378810 }, { "epoch": 147.11, "learning_rate": 3.8472491909385117e-07, "loss": 0.0184, "step": 378820 }, { "epoch": 147.12, "learning_rate": 3.8420711974110036e-07, "loss": 0.0717, "step": 378830 }, { "epoch": 147.12, "learning_rate": 3.8368932038834956e-07, "loss": 0.0001, "step": 378840 }, { "epoch": 147.13, "learning_rate": 3.831715210355987e-07, "loss": 0.0499, "step": 378850 }, { "epoch": 147.13, "learning_rate": 3.826537216828479e-07, "loss": 0.0002, "step": 378860 }, { "epoch": 147.13, "learning_rate": 3.821359223300971e-07, "loss": 0.0296, "step": 378870 }, { "epoch": 147.14, "learning_rate": 3.816181229773463e-07, "loss": 0.0222, "step": 378880 }, { "epoch": 147.14, "learning_rate": 3.811003236245955e-07, "loss": 0.0926, "step": 378890 }, { "epoch": 147.15, "learning_rate": 3.805825242718447e-07, "loss": 0.1034, "step": 378900 }, { "epoch": 147.15, "learning_rate": 3.800647249190939e-07, "loss": 0.0001, "step": 378910 }, { "epoch": 147.15, "learning_rate": 3.795469255663431e-07, "loss": 0.0016, "step": 378920 }, { "epoch": 147.16, "learning_rate": 3.7902912621359225e-07, "loss": 0.0086, "step": 378930 }, { "epoch": 147.16, "learning_rate": 3.7851132686084145e-07, "loss": 0.0827, "step": 378940 }, { "epoch": 147.17, "learning_rate": 3.7799352750809064e-07, "loss": 0.0068, "step": 378950 }, { "epoch": 147.17, "learning_rate": 3.7747572815533984e-07, "loss": 0.0091, "step": 378960 }, { "epoch": 147.17, "learning_rate": 3.7695792880258903e-07, "loss": 0.0076, "step": 378970 }, { "epoch": 147.18, "learning_rate": 3.7644012944983823e-07, "loss": 0.0033, "step": 378980 }, { "epoch": 147.18, "learning_rate": 3.759223300970874e-07, "loss": 0.0105, "step": 378990 }, { "epoch": 147.18, "learning_rate": 3.7540453074433656e-07, "loss": 0.0703, "step": 379000 }, { "epoch": 147.19, "learning_rate": 3.7488673139158576e-07, "loss": 0.0247, "step": 379010 }, { "epoch": 147.19, "learning_rate": 3.7436893203883495e-07, "loss": 0.0002, "step": 379020 }, { "epoch": 147.2, "learning_rate": 3.7385113268608415e-07, "loss": 0.0294, "step": 379030 }, { "epoch": 147.2, "learning_rate": 3.733333333333334e-07, "loss": 0.018, "step": 379040 }, { "epoch": 147.2, "learning_rate": 3.728155339805826e-07, "loss": 0.1245, "step": 379050 }, { "epoch": 147.21, "learning_rate": 3.722977346278318e-07, "loss": 0.0683, "step": 379060 }, { "epoch": 147.21, "learning_rate": 3.71779935275081e-07, "loss": 0.1195, "step": 379070 }, { "epoch": 147.22, "learning_rate": 3.712621359223301e-07, "loss": 0.1072, "step": 379080 }, { "epoch": 147.22, "learning_rate": 3.707443365695793e-07, "loss": 0.0022, "step": 379090 }, { "epoch": 147.22, "learning_rate": 3.702265372168285e-07, "loss": 0.0181, "step": 379100 }, { "epoch": 147.23, "learning_rate": 3.697087378640777e-07, "loss": 0.0268, "step": 379110 }, { "epoch": 147.23, "learning_rate": 3.691909385113269e-07, "loss": 0.0613, "step": 379120 }, { "epoch": 147.23, "learning_rate": 3.686731391585761e-07, "loss": 0.0798, "step": 379130 }, { "epoch": 147.24, "learning_rate": 3.6815533980582523e-07, "loss": 0.0011, "step": 379140 }, { "epoch": 147.24, "learning_rate": 3.6763754045307443e-07, "loss": 0.0006, "step": 379150 }, { "epoch": 147.25, "learning_rate": 3.671197411003236e-07, "loss": 0.0275, "step": 379160 }, { "epoch": 147.25, "learning_rate": 3.666019417475728e-07, "loss": 0.1002, "step": 379170 }, { "epoch": 147.25, "learning_rate": 3.6608414239482206e-07, "loss": 0.0112, "step": 379180 }, { "epoch": 147.26, "learning_rate": 3.6556634304207126e-07, "loss": 0.0696, "step": 379190 }, { "epoch": 147.26, "learning_rate": 3.6504854368932045e-07, "loss": 0.0004, "step": 379200 }, { "epoch": 147.27, "learning_rate": 3.6453074433656965e-07, "loss": 0.0077, "step": 379210 }, { "epoch": 147.27, "learning_rate": 3.640129449838188e-07, "loss": 0.037, "step": 379220 }, { "epoch": 147.27, "learning_rate": 3.63495145631068e-07, "loss": 0.0144, "step": 379230 }, { "epoch": 147.28, "learning_rate": 3.629773462783172e-07, "loss": 0.1069, "step": 379240 }, { "epoch": 147.28, "learning_rate": 3.6245954692556637e-07, "loss": 0.0865, "step": 379250 }, { "epoch": 147.29, "learning_rate": 3.6194174757281557e-07, "loss": 0.0009, "step": 379260 }, { "epoch": 147.29, "learning_rate": 3.6142394822006476e-07, "loss": 0.0519, "step": 379270 }, { "epoch": 147.29, "learning_rate": 3.609061488673139e-07, "loss": 0.0099, "step": 379280 }, { "epoch": 147.3, "learning_rate": 3.603883495145631e-07, "loss": 0.0585, "step": 379290 }, { "epoch": 147.3, "learning_rate": 3.598705501618123e-07, "loss": 0.053, "step": 379300 }, { "epoch": 147.3, "learning_rate": 3.593527508090615e-07, "loss": 0.0163, "step": 379310 }, { "epoch": 147.31, "learning_rate": 3.5883495145631073e-07, "loss": 0.0427, "step": 379320 }, { "epoch": 147.31, "learning_rate": 3.5831715210355993e-07, "loss": 0.0034, "step": 379330 }, { "epoch": 147.32, "learning_rate": 3.577993527508091e-07, "loss": 0.1451, "step": 379340 }, { "epoch": 147.32, "learning_rate": 3.572815533980583e-07, "loss": 0.0469, "step": 379350 }, { "epoch": 147.32, "learning_rate": 3.5676375404530746e-07, "loss": 0.0149, "step": 379360 }, { "epoch": 147.33, "learning_rate": 3.5624595469255665e-07, "loss": 0.0174, "step": 379370 }, { "epoch": 147.33, "learning_rate": 3.5572815533980585e-07, "loss": 0.0636, "step": 379380 }, { "epoch": 147.34, "learning_rate": 3.5521035598705504e-07, "loss": 0.0412, "step": 379390 }, { "epoch": 147.34, "learning_rate": 3.5469255663430424e-07, "loss": 0.0467, "step": 379400 }, { "epoch": 147.34, "learning_rate": 3.5417475728155343e-07, "loss": 0.002, "step": 379410 }, { "epoch": 147.35, "learning_rate": 3.536569579288026e-07, "loss": 0.0529, "step": 379420 }, { "epoch": 147.35, "learning_rate": 3.5313915857605177e-07, "loss": 0.0424, "step": 379430 }, { "epoch": 147.36, "learning_rate": 3.5262135922330096e-07, "loss": 0.0167, "step": 379440 }, { "epoch": 147.36, "learning_rate": 3.5210355987055016e-07, "loss": 0.0008, "step": 379450 }, { "epoch": 147.36, "learning_rate": 3.515857605177994e-07, "loss": 0.0004, "step": 379460 }, { "epoch": 147.37, "learning_rate": 3.510679611650486e-07, "loss": 0.059, "step": 379470 }, { "epoch": 147.37, "learning_rate": 3.505501618122978e-07, "loss": 0.0002, "step": 379480 }, { "epoch": 147.37, "learning_rate": 3.50032362459547e-07, "loss": 0.0432, "step": 379490 }, { "epoch": 147.38, "learning_rate": 3.4951456310679613e-07, "loss": 0.0694, "step": 379500 }, { "epoch": 147.38, "learning_rate": 3.489967637540453e-07, "loss": 0.0003, "step": 379510 }, { "epoch": 147.39, "learning_rate": 3.484789644012945e-07, "loss": 0.0003, "step": 379520 }, { "epoch": 147.39, "learning_rate": 3.479611650485437e-07, "loss": 0.0231, "step": 379530 }, { "epoch": 147.39, "learning_rate": 3.474433656957929e-07, "loss": 0.0002, "step": 379540 }, { "epoch": 147.4, "learning_rate": 3.469255663430421e-07, "loss": 0.0708, "step": 379550 }, { "epoch": 147.4, "learning_rate": 3.464077669902913e-07, "loss": 0.0003, "step": 379560 }, { "epoch": 147.41, "learning_rate": 3.4588996763754044e-07, "loss": 0.109, "step": 379570 }, { "epoch": 147.41, "learning_rate": 3.4537216828478963e-07, "loss": 0.0651, "step": 379580 }, { "epoch": 147.41, "learning_rate": 3.4485436893203883e-07, "loss": 0.1316, "step": 379590 }, { "epoch": 147.42, "learning_rate": 3.443365695792881e-07, "loss": 0.0138, "step": 379600 }, { "epoch": 147.42, "learning_rate": 3.4381877022653727e-07, "loss": 0.059, "step": 379610 }, { "epoch": 147.43, "learning_rate": 3.4330097087378646e-07, "loss": 0.0264, "step": 379620 }, { "epoch": 147.43, "learning_rate": 3.4278317152103566e-07, "loss": 0.0676, "step": 379630 }, { "epoch": 147.43, "learning_rate": 3.4226537216828485e-07, "loss": 0.0497, "step": 379640 }, { "epoch": 147.44, "learning_rate": 3.41747572815534e-07, "loss": 0.0102, "step": 379650 }, { "epoch": 147.44, "learning_rate": 3.412297734627832e-07, "loss": 0.019, "step": 379660 }, { "epoch": 147.44, "learning_rate": 3.407119741100324e-07, "loss": 0.0001, "step": 379670 }, { "epoch": 147.45, "learning_rate": 3.401941747572816e-07, "loss": 0.0241, "step": 379680 }, { "epoch": 147.45, "learning_rate": 3.396763754045308e-07, "loss": 0.0564, "step": 379690 }, { "epoch": 147.46, "learning_rate": 3.3915857605177997e-07, "loss": 0.0002, "step": 379700 }, { "epoch": 147.46, "learning_rate": 3.386407766990291e-07, "loss": 0.0671, "step": 379710 }, { "epoch": 147.46, "learning_rate": 3.381229773462783e-07, "loss": 0.0419, "step": 379720 }, { "epoch": 147.47, "learning_rate": 3.376051779935275e-07, "loss": 0.0215, "step": 379730 }, { "epoch": 147.47, "learning_rate": 3.3708737864077675e-07, "loss": 0.0112, "step": 379740 }, { "epoch": 147.48, "learning_rate": 3.3656957928802594e-07, "loss": 0.0173, "step": 379750 }, { "epoch": 147.48, "learning_rate": 3.3605177993527514e-07, "loss": 0.002, "step": 379760 }, { "epoch": 147.48, "learning_rate": 3.3553398058252433e-07, "loss": 0.0716, "step": 379770 }, { "epoch": 147.49, "learning_rate": 3.350161812297735e-07, "loss": 0.0166, "step": 379780 }, { "epoch": 147.49, "learning_rate": 3.3449838187702267e-07, "loss": 0.058, "step": 379790 }, { "epoch": 147.5, "learning_rate": 3.3398058252427186e-07, "loss": 0.1164, "step": 379800 }, { "epoch": 147.5, "learning_rate": 3.3346278317152105e-07, "loss": 0.0654, "step": 379810 }, { "epoch": 147.5, "learning_rate": 3.3294498381877025e-07, "loss": 0.0126, "step": 379820 }, { "epoch": 147.51, "learning_rate": 3.3242718446601944e-07, "loss": 0.0093, "step": 379830 }, { "epoch": 147.51, "learning_rate": 3.3190938511326864e-07, "loss": 0.0603, "step": 379840 }, { "epoch": 147.51, "learning_rate": 3.313915857605178e-07, "loss": 0.0174, "step": 379850 }, { "epoch": 147.52, "learning_rate": 3.30873786407767e-07, "loss": 0.0816, "step": 379860 }, { "epoch": 147.52, "learning_rate": 3.3035598705501617e-07, "loss": 0.0081, "step": 379870 }, { "epoch": 147.53, "learning_rate": 3.298381877022654e-07, "loss": 0.0052, "step": 379880 }, { "epoch": 147.53, "learning_rate": 3.293203883495146e-07, "loss": 0.0856, "step": 379890 }, { "epoch": 147.53, "learning_rate": 3.288025889967638e-07, "loss": 0.0581, "step": 379900 }, { "epoch": 147.54, "learning_rate": 3.28284789644013e-07, "loss": 0.1151, "step": 379910 }, { "epoch": 147.54, "learning_rate": 3.277669902912622e-07, "loss": 0.0251, "step": 379920 }, { "epoch": 147.55, "learning_rate": 3.2724919093851134e-07, "loss": 0.0384, "step": 379930 }, { "epoch": 147.55, "learning_rate": 3.2673139158576053e-07, "loss": 0.014, "step": 379940 }, { "epoch": 147.55, "learning_rate": 3.262135922330097e-07, "loss": 0.0615, "step": 379950 }, { "epoch": 147.56, "learning_rate": 3.256957928802589e-07, "loss": 0.0523, "step": 379960 }, { "epoch": 147.56, "learning_rate": 3.251779935275081e-07, "loss": 0.1021, "step": 379970 }, { "epoch": 147.57, "learning_rate": 3.246601941747573e-07, "loss": 0.0452, "step": 379980 }, { "epoch": 147.57, "learning_rate": 3.2414239482200645e-07, "loss": 0.07, "step": 379990 }, { "epoch": 147.57, "learning_rate": 3.2362459546925565e-07, "loss": 0.0258, "step": 380000 }, { "epoch": 147.58, "learning_rate": 3.2310679611650484e-07, "loss": 0.0666, "step": 380010 }, { "epoch": 147.58, "learning_rate": 3.225889967637541e-07, "loss": 0.0581, "step": 380020 }, { "epoch": 147.58, "learning_rate": 3.220711974110033e-07, "loss": 0.0005, "step": 380030 }, { "epoch": 147.59, "learning_rate": 3.215533980582525e-07, "loss": 0.13, "step": 380040 }, { "epoch": 147.59, "learning_rate": 3.2103559870550167e-07, "loss": 0.0737, "step": 380050 }, { "epoch": 147.6, "learning_rate": 3.2051779935275087e-07, "loss": 0.107, "step": 380060 }, { "epoch": 147.6, "learning_rate": 3.2e-07, "loss": 0.001, "step": 380070 }, { "epoch": 147.6, "learning_rate": 3.194822006472492e-07, "loss": 0.0024, "step": 380080 }, { "epoch": 147.61, "learning_rate": 3.189644012944984e-07, "loss": 0.0002, "step": 380090 }, { "epoch": 147.61, "learning_rate": 3.184466019417476e-07, "loss": 0.0278, "step": 380100 }, { "epoch": 147.62, "learning_rate": 3.179288025889968e-07, "loss": 0.179, "step": 380110 }, { "epoch": 147.62, "learning_rate": 3.17411003236246e-07, "loss": 0.0197, "step": 380120 }, { "epoch": 147.62, "learning_rate": 3.168932038834952e-07, "loss": 0.0384, "step": 380130 }, { "epoch": 147.63, "learning_rate": 3.163754045307443e-07, "loss": 0.0002, "step": 380140 }, { "epoch": 147.63, "learning_rate": 3.158576051779935e-07, "loss": 0.0558, "step": 380150 }, { "epoch": 147.63, "learning_rate": 3.1533980582524276e-07, "loss": 0.0001, "step": 380160 }, { "epoch": 147.64, "learning_rate": 3.1482200647249195e-07, "loss": 0.0983, "step": 380170 }, { "epoch": 147.64, "learning_rate": 3.1430420711974115e-07, "loss": 0.1535, "step": 380180 }, { "epoch": 147.65, "learning_rate": 3.1378640776699034e-07, "loss": 0.0104, "step": 380190 }, { "epoch": 147.65, "learning_rate": 3.1326860841423954e-07, "loss": 0.0098, "step": 380200 }, { "epoch": 147.65, "learning_rate": 3.1275080906148873e-07, "loss": 0.0039, "step": 380210 }, { "epoch": 147.66, "learning_rate": 3.1223300970873787e-07, "loss": 0.0645, "step": 380220 }, { "epoch": 147.66, "learning_rate": 3.1171521035598707e-07, "loss": 0.0452, "step": 380230 }, { "epoch": 147.67, "learning_rate": 3.1119741100323626e-07, "loss": 0.0388, "step": 380240 }, { "epoch": 147.67, "learning_rate": 3.1067961165048546e-07, "loss": 0.128, "step": 380250 }, { "epoch": 147.67, "learning_rate": 3.1016181229773465e-07, "loss": 0.024, "step": 380260 }, { "epoch": 147.68, "learning_rate": 3.0964401294498384e-07, "loss": 0.056, "step": 380270 }, { "epoch": 147.68, "learning_rate": 3.0912621359223304e-07, "loss": 0.0196, "step": 380280 }, { "epoch": 147.69, "learning_rate": 3.0860841423948223e-07, "loss": 0.0164, "step": 380290 }, { "epoch": 147.69, "learning_rate": 3.0809061488673143e-07, "loss": 0.1051, "step": 380300 }, { "epoch": 147.69, "learning_rate": 3.075728155339806e-07, "loss": 0.0015, "step": 380310 }, { "epoch": 147.7, "learning_rate": 3.0705501618122976e-07, "loss": 0.0005, "step": 380320 }, { "epoch": 147.7, "learning_rate": 3.06537216828479e-07, "loss": 0.1047, "step": 380330 }, { "epoch": 147.7, "learning_rate": 3.060194174757282e-07, "loss": 0.0672, "step": 380340 }, { "epoch": 147.71, "learning_rate": 3.055016181229774e-07, "loss": 0.0566, "step": 380350 }, { "epoch": 147.71, "learning_rate": 3.0498381877022654e-07, "loss": 0.0049, "step": 380360 }, { "epoch": 147.72, "learning_rate": 3.0446601941747574e-07, "loss": 0.0215, "step": 380370 }, { "epoch": 147.72, "learning_rate": 3.0394822006472493e-07, "loss": 0.0979, "step": 380380 }, { "epoch": 147.72, "learning_rate": 3.034304207119741e-07, "loss": 0.0389, "step": 380390 }, { "epoch": 147.73, "learning_rate": 3.029126213592233e-07, "loss": 0.0663, "step": 380400 }, { "epoch": 147.73, "learning_rate": 3.023948220064725e-07, "loss": 0.031, "step": 380410 }, { "epoch": 147.74, "learning_rate": 3.018770226537217e-07, "loss": 0.0002, "step": 380420 }, { "epoch": 147.74, "learning_rate": 3.013592233009709e-07, "loss": 0.0346, "step": 380430 }, { "epoch": 147.74, "learning_rate": 3.008414239482201e-07, "loss": 0.1019, "step": 380440 }, { "epoch": 147.75, "learning_rate": 3.003236245954693e-07, "loss": 0.0007, "step": 380450 }, { "epoch": 147.75, "learning_rate": 2.9980582524271843e-07, "loss": 0.0003, "step": 380460 }, { "epoch": 147.76, "learning_rate": 2.992880258899677e-07, "loss": 0.0001, "step": 380470 }, { "epoch": 147.76, "learning_rate": 2.987702265372169e-07, "loss": 0.0316, "step": 380480 }, { "epoch": 147.76, "learning_rate": 2.9825242718446607e-07, "loss": 0.0163, "step": 380490 }, { "epoch": 147.77, "learning_rate": 2.977346278317152e-07, "loss": 0.0252, "step": 380500 }, { "epoch": 147.77, "learning_rate": 2.972168284789644e-07, "loss": 0.0169, "step": 380510 }, { "epoch": 147.77, "learning_rate": 2.966990291262136e-07, "loss": 0.0027, "step": 380520 }, { "epoch": 147.78, "learning_rate": 2.961812297734628e-07, "loss": 0.014, "step": 380530 }, { "epoch": 147.78, "learning_rate": 2.95663430420712e-07, "loss": 0.0454, "step": 380540 }, { "epoch": 147.79, "learning_rate": 2.951456310679612e-07, "loss": 0.1332, "step": 380550 }, { "epoch": 147.79, "learning_rate": 2.946278317152104e-07, "loss": 0.0558, "step": 380560 }, { "epoch": 147.79, "learning_rate": 2.941100323624596e-07, "loss": 0.0453, "step": 380570 }, { "epoch": 147.8, "learning_rate": 2.9359223300970877e-07, "loss": 0.0072, "step": 380580 }, { "epoch": 147.8, "learning_rate": 2.9307443365695796e-07, "loss": 0.012, "step": 380590 }, { "epoch": 147.81, "learning_rate": 2.925566343042071e-07, "loss": 0.0198, "step": 380600 }, { "epoch": 147.81, "learning_rate": 2.9203883495145635e-07, "loss": 0.0943, "step": 380610 }, { "epoch": 147.81, "learning_rate": 2.9152103559870555e-07, "loss": 0.029, "step": 380620 }, { "epoch": 147.82, "learning_rate": 2.9100323624595474e-07, "loss": 0.0731, "step": 380630 }, { "epoch": 147.82, "learning_rate": 2.904854368932039e-07, "loss": 0.0008, "step": 380640 }, { "epoch": 147.83, "learning_rate": 2.899676375404531e-07, "loss": 0.0088, "step": 380650 }, { "epoch": 147.83, "learning_rate": 2.8944983818770227e-07, "loss": 0.1174, "step": 380660 }, { "epoch": 147.83, "learning_rate": 2.8893203883495147e-07, "loss": 0.0127, "step": 380670 }, { "epoch": 147.84, "learning_rate": 2.884142394822007e-07, "loss": 0.0565, "step": 380680 }, { "epoch": 147.84, "learning_rate": 2.8789644012944986e-07, "loss": 0.1208, "step": 380690 }, { "epoch": 147.84, "learning_rate": 2.8737864077669905e-07, "loss": 0.0415, "step": 380700 }, { "epoch": 147.85, "learning_rate": 2.8686084142394824e-07, "loss": 0.049, "step": 380710 }, { "epoch": 147.85, "learning_rate": 2.8634304207119744e-07, "loss": 0.027, "step": 380720 }, { "epoch": 147.86, "learning_rate": 2.8582524271844663e-07, "loss": 0.0001, "step": 380730 }, { "epoch": 147.86, "learning_rate": 2.8530744336569583e-07, "loss": 0.0044, "step": 380740 }, { "epoch": 147.86, "learning_rate": 2.84789644012945e-07, "loss": 0.0342, "step": 380750 }, { "epoch": 147.87, "learning_rate": 2.842718446601942e-07, "loss": 0.0006, "step": 380760 }, { "epoch": 147.87, "learning_rate": 2.837540453074434e-07, "loss": 0.0332, "step": 380770 }, { "epoch": 147.88, "learning_rate": 2.832362459546926e-07, "loss": 0.0017, "step": 380780 }, { "epoch": 147.88, "learning_rate": 2.8271844660194175e-07, "loss": 0.0782, "step": 380790 }, { "epoch": 147.88, "learning_rate": 2.8220064724919094e-07, "loss": 0.0321, "step": 380800 }, { "epoch": 147.89, "learning_rate": 2.8168284789644014e-07, "loss": 0.095, "step": 380810 }, { "epoch": 147.89, "learning_rate": 2.811650485436894e-07, "loss": 0.0004, "step": 380820 }, { "epoch": 147.9, "learning_rate": 2.806472491909385e-07, "loss": 0.0845, "step": 380830 }, { "epoch": 147.9, "learning_rate": 2.801294498381877e-07, "loss": 0.0089, "step": 380840 }, { "epoch": 147.9, "learning_rate": 2.796116504854369e-07, "loss": 0.0137, "step": 380850 }, { "epoch": 147.91, "learning_rate": 2.790938511326861e-07, "loss": 0.1203, "step": 380860 }, { "epoch": 147.91, "learning_rate": 2.785760517799353e-07, "loss": 0.0153, "step": 380870 }, { "epoch": 147.91, "learning_rate": 2.780582524271845e-07, "loss": 0.0166, "step": 380880 }, { "epoch": 147.92, "learning_rate": 2.775404530744337e-07, "loss": 0.0209, "step": 380890 }, { "epoch": 147.92, "learning_rate": 2.770226537216829e-07, "loss": 0.0012, "step": 380900 }, { "epoch": 147.93, "learning_rate": 2.765048543689321e-07, "loss": 0.2369, "step": 380910 }, { "epoch": 147.93, "learning_rate": 2.759870550161813e-07, "loss": 0.0001, "step": 380920 }, { "epoch": 147.93, "learning_rate": 2.754692556634304e-07, "loss": 0.0004, "step": 380930 }, { "epoch": 147.94, "learning_rate": 2.749514563106796e-07, "loss": 0.0253, "step": 380940 }, { "epoch": 147.94, "learning_rate": 2.744336569579288e-07, "loss": 0.0315, "step": 380950 }, { "epoch": 147.95, "learning_rate": 2.7391585760517805e-07, "loss": 0.0769, "step": 380960 }, { "epoch": 147.95, "learning_rate": 2.733980582524272e-07, "loss": 0.1122, "step": 380970 }, { "epoch": 147.95, "learning_rate": 2.728802588996764e-07, "loss": 0.0276, "step": 380980 }, { "epoch": 147.96, "learning_rate": 2.723624595469256e-07, "loss": 0.0138, "step": 380990 }, { "epoch": 147.96, "learning_rate": 2.718446601941748e-07, "loss": 0.0003, "step": 381000 }, { "epoch": 147.97, "learning_rate": 2.71326860841424e-07, "loss": 0.0616, "step": 381010 }, { "epoch": 147.97, "learning_rate": 2.7080906148867317e-07, "loss": 0.0491, "step": 381020 }, { "epoch": 147.97, "learning_rate": 2.7029126213592236e-07, "loss": 0.024, "step": 381030 }, { "epoch": 147.98, "learning_rate": 2.6977346278317156e-07, "loss": 0.0583, "step": 381040 }, { "epoch": 147.98, "learning_rate": 2.6925566343042075e-07, "loss": 0.0882, "step": 381050 }, { "epoch": 147.98, "learning_rate": 2.6873786407766995e-07, "loss": 0.0512, "step": 381060 }, { "epoch": 147.99, "learning_rate": 2.682200647249191e-07, "loss": 0.0046, "step": 381070 }, { "epoch": 147.99, "learning_rate": 2.677022653721683e-07, "loss": 0.0411, "step": 381080 }, { "epoch": 148.0, "learning_rate": 2.671844660194175e-07, "loss": 0.082, "step": 381090 }, { "epoch": 148.0, "learning_rate": 2.666666666666667e-07, "loss": 0.0226, "step": 381100 }, { "epoch": 148.0, "eval_accuracy": 0.9526822558459422, "eval_loss": 0.38993731141090393, "eval_runtime": 8.3283, "eval_samples_per_second": 436.465, "eval_steps_per_second": 54.633, "step": 381100 }, { "epoch": 148.0, "learning_rate": 2.6614886731391587e-07, "loss": 0.0181, "step": 381110 }, { "epoch": 148.01, "learning_rate": 2.6563106796116506e-07, "loss": 0.0486, "step": 381120 }, { "epoch": 148.01, "learning_rate": 2.6511326860841426e-07, "loss": 0.0245, "step": 381130 }, { "epoch": 148.02, "learning_rate": 2.6459546925566345e-07, "loss": 0.0749, "step": 381140 }, { "epoch": 148.02, "learning_rate": 2.6407766990291264e-07, "loss": 0.0853, "step": 381150 }, { "epoch": 148.02, "learning_rate": 2.6355987055016184e-07, "loss": 0.089, "step": 381160 }, { "epoch": 148.03, "learning_rate": 2.6304207119741103e-07, "loss": 0.016, "step": 381170 }, { "epoch": 148.03, "learning_rate": 2.6252427184466023e-07, "loss": 0.0264, "step": 381180 }, { "epoch": 148.03, "learning_rate": 2.620064724919094e-07, "loss": 0.0088, "step": 381190 }, { "epoch": 148.04, "learning_rate": 2.614886731391586e-07, "loss": 0.0151, "step": 381200 }, { "epoch": 148.04, "learning_rate": 2.609708737864078e-07, "loss": 0.0002, "step": 381210 }, { "epoch": 148.05, "learning_rate": 2.6045307443365695e-07, "loss": 0.0089, "step": 381220 }, { "epoch": 148.05, "learning_rate": 2.5993527508090615e-07, "loss": 0.0021, "step": 381230 }, { "epoch": 148.05, "learning_rate": 2.594174757281554e-07, "loss": 0.0141, "step": 381240 }, { "epoch": 148.06, "learning_rate": 2.588996763754046e-07, "loss": 0.1158, "step": 381250 }, { "epoch": 148.06, "learning_rate": 2.5838187702265373e-07, "loss": 0.0001, "step": 381260 }, { "epoch": 148.07, "learning_rate": 2.578640776699029e-07, "loss": 0.0525, "step": 381270 }, { "epoch": 148.07, "learning_rate": 2.573462783171521e-07, "loss": 0.0004, "step": 381280 }, { "epoch": 148.07, "learning_rate": 2.568284789644013e-07, "loss": 0.0318, "step": 381290 }, { "epoch": 148.08, "learning_rate": 2.563106796116505e-07, "loss": 0.0205, "step": 381300 }, { "epoch": 148.08, "learning_rate": 2.557928802588997e-07, "loss": 0.0492, "step": 381310 }, { "epoch": 148.09, "learning_rate": 2.552750809061489e-07, "loss": 0.0224, "step": 381320 }, { "epoch": 148.09, "learning_rate": 2.547572815533981e-07, "loss": 0.0089, "step": 381330 }, { "epoch": 148.09, "learning_rate": 2.542394822006473e-07, "loss": 0.0091, "step": 381340 }, { "epoch": 148.1, "learning_rate": 2.537216828478965e-07, "loss": 0.0022, "step": 381350 }, { "epoch": 148.1, "learning_rate": 2.532038834951456e-07, "loss": 0.0251, "step": 381360 }, { "epoch": 148.1, "learning_rate": 2.526860841423948e-07, "loss": 0.0098, "step": 381370 }, { "epoch": 148.11, "learning_rate": 2.5216828478964407e-07, "loss": 0.0565, "step": 381380 }, { "epoch": 148.11, "learning_rate": 2.5165048543689326e-07, "loss": 0.0444, "step": 381390 }, { "epoch": 148.12, "learning_rate": 2.511326860841424e-07, "loss": 0.0139, "step": 381400 }, { "epoch": 148.12, "learning_rate": 2.506148867313916e-07, "loss": 0.0683, "step": 381410 }, { "epoch": 148.12, "learning_rate": 2.500970873786408e-07, "loss": 0.0752, "step": 381420 }, { "epoch": 148.13, "learning_rate": 2.4957928802589e-07, "loss": 0.0094, "step": 381430 }, { "epoch": 148.13, "learning_rate": 2.490614886731392e-07, "loss": 0.0169, "step": 381440 }, { "epoch": 148.14, "learning_rate": 2.485436893203884e-07, "loss": 0.1743, "step": 381450 }, { "epoch": 148.14, "learning_rate": 2.4802588996763757e-07, "loss": 0.0002, "step": 381460 }, { "epoch": 148.14, "learning_rate": 2.4750809061488676e-07, "loss": 0.0085, "step": 381470 }, { "epoch": 148.15, "learning_rate": 2.4699029126213596e-07, "loss": 0.1197, "step": 381480 }, { "epoch": 148.15, "learning_rate": 2.4647249190938515e-07, "loss": 0.0002, "step": 381490 }, { "epoch": 148.16, "learning_rate": 2.459546925566343e-07, "loss": 0.0156, "step": 381500 }, { "epoch": 148.16, "learning_rate": 2.454368932038835e-07, "loss": 0.1005, "step": 381510 }, { "epoch": 148.16, "learning_rate": 2.4491909385113274e-07, "loss": 0.0002, "step": 381520 }, { "epoch": 148.17, "learning_rate": 2.4440129449838193e-07, "loss": 0.01, "step": 381530 }, { "epoch": 148.17, "learning_rate": 2.4388349514563107e-07, "loss": 0.0092, "step": 381540 }, { "epoch": 148.17, "learning_rate": 2.4336569579288027e-07, "loss": 0.0171, "step": 381550 }, { "epoch": 148.18, "learning_rate": 2.4284789644012946e-07, "loss": 0.0145, "step": 381560 }, { "epoch": 148.18, "learning_rate": 2.4233009708737866e-07, "loss": 0.1396, "step": 381570 }, { "epoch": 148.19, "learning_rate": 2.4181229773462785e-07, "loss": 0.0403, "step": 381580 }, { "epoch": 148.19, "learning_rate": 2.4129449838187705e-07, "loss": 0.0336, "step": 381590 }, { "epoch": 148.19, "learning_rate": 2.4077669902912624e-07, "loss": 0.0015, "step": 381600 }, { "epoch": 148.2, "learning_rate": 2.4025889967637543e-07, "loss": 0.0209, "step": 381610 }, { "epoch": 148.2, "learning_rate": 2.3974110032362463e-07, "loss": 0.0128, "step": 381620 }, { "epoch": 148.21, "learning_rate": 2.392233009708738e-07, "loss": 0.1755, "step": 381630 }, { "epoch": 148.21, "learning_rate": 2.3870550161812296e-07, "loss": 0.0804, "step": 381640 }, { "epoch": 148.21, "learning_rate": 2.3818770226537219e-07, "loss": 0.0611, "step": 381650 }, { "epoch": 148.22, "learning_rate": 2.3766990291262135e-07, "loss": 0.0425, "step": 381660 }, { "epoch": 148.22, "learning_rate": 2.3715210355987057e-07, "loss": 0.079, "step": 381670 }, { "epoch": 148.23, "learning_rate": 2.3663430420711977e-07, "loss": 0.0147, "step": 381680 }, { "epoch": 148.23, "learning_rate": 2.3611650485436896e-07, "loss": 0.0003, "step": 381690 }, { "epoch": 148.23, "learning_rate": 2.3559870550161813e-07, "loss": 0.0005, "step": 381700 }, { "epoch": 148.24, "learning_rate": 2.3508090614886733e-07, "loss": 0.0001, "step": 381710 }, { "epoch": 148.24, "learning_rate": 2.3456310679611652e-07, "loss": 0.0144, "step": 381720 }, { "epoch": 148.24, "learning_rate": 2.340453074433657e-07, "loss": 0.0006, "step": 381730 }, { "epoch": 148.25, "learning_rate": 2.335275080906149e-07, "loss": 0.0144, "step": 381740 }, { "epoch": 148.25, "learning_rate": 2.330097087378641e-07, "loss": 0.0082, "step": 381750 }, { "epoch": 148.26, "learning_rate": 2.324919093851133e-07, "loss": 0.0001, "step": 381760 }, { "epoch": 148.26, "learning_rate": 2.3197411003236247e-07, "loss": 0.0187, "step": 381770 }, { "epoch": 148.26, "learning_rate": 2.3145631067961166e-07, "loss": 0.0592, "step": 381780 }, { "epoch": 148.27, "learning_rate": 2.3093851132686086e-07, "loss": 0.0956, "step": 381790 }, { "epoch": 148.27, "learning_rate": 2.3042071197411002e-07, "loss": 0.0458, "step": 381800 }, { "epoch": 148.28, "learning_rate": 2.2990291262135925e-07, "loss": 0.0276, "step": 381810 }, { "epoch": 148.28, "learning_rate": 2.2938511326860844e-07, "loss": 0.0729, "step": 381820 }, { "epoch": 148.28, "learning_rate": 2.2886731391585763e-07, "loss": 0.0774, "step": 381830 }, { "epoch": 148.29, "learning_rate": 2.283495145631068e-07, "loss": 0.0017, "step": 381840 }, { "epoch": 148.29, "learning_rate": 2.27831715210356e-07, "loss": 0.064, "step": 381850 }, { "epoch": 148.3, "learning_rate": 2.273139158576052e-07, "loss": 0.0316, "step": 381860 }, { "epoch": 148.3, "learning_rate": 2.2679611650485436e-07, "loss": 0.0003, "step": 381870 }, { "epoch": 148.3, "learning_rate": 2.2627831715210358e-07, "loss": 0.0206, "step": 381880 }, { "epoch": 148.31, "learning_rate": 2.2576051779935278e-07, "loss": 0.0167, "step": 381890 }, { "epoch": 148.31, "learning_rate": 2.2524271844660197e-07, "loss": 0.0008, "step": 381900 }, { "epoch": 148.31, "learning_rate": 2.2472491909385114e-07, "loss": 0.0105, "step": 381910 }, { "epoch": 148.32, "learning_rate": 2.2420711974110033e-07, "loss": 0.0689, "step": 381920 }, { "epoch": 148.32, "learning_rate": 2.2368932038834953e-07, "loss": 0.0018, "step": 381930 }, { "epoch": 148.33, "learning_rate": 2.231715210355987e-07, "loss": 0.0462, "step": 381940 }, { "epoch": 148.33, "learning_rate": 2.2265372168284792e-07, "loss": 0.0084, "step": 381950 }, { "epoch": 148.33, "learning_rate": 2.221359223300971e-07, "loss": 0.0139, "step": 381960 }, { "epoch": 148.34, "learning_rate": 2.216181229773463e-07, "loss": 0.0026, "step": 381970 }, { "epoch": 148.34, "learning_rate": 2.2110032362459547e-07, "loss": 0.0074, "step": 381980 }, { "epoch": 148.35, "learning_rate": 2.2058252427184467e-07, "loss": 0.0006, "step": 381990 }, { "epoch": 148.35, "learning_rate": 2.2006472491909386e-07, "loss": 0.0324, "step": 382000 }, { "epoch": 148.35, "learning_rate": 2.1954692556634303e-07, "loss": 0.0121, "step": 382010 }, { "epoch": 148.36, "learning_rate": 2.1902912621359225e-07, "loss": 0.0786, "step": 382020 }, { "epoch": 148.36, "learning_rate": 2.1851132686084145e-07, "loss": 0.0245, "step": 382030 }, { "epoch": 148.37, "learning_rate": 2.1799352750809064e-07, "loss": 0.0389, "step": 382040 }, { "epoch": 148.37, "learning_rate": 2.1747572815533983e-07, "loss": 0.0758, "step": 382050 }, { "epoch": 148.37, "learning_rate": 2.16957928802589e-07, "loss": 0.0096, "step": 382060 }, { "epoch": 148.38, "learning_rate": 2.164401294498382e-07, "loss": 0.0107, "step": 382070 }, { "epoch": 148.38, "learning_rate": 2.159223300970874e-07, "loss": 0.0742, "step": 382080 }, { "epoch": 148.38, "learning_rate": 2.154045307443366e-07, "loss": 0.1248, "step": 382090 }, { "epoch": 148.39, "learning_rate": 2.1488673139158578e-07, "loss": 0.0189, "step": 382100 }, { "epoch": 148.39, "learning_rate": 2.1436893203883498e-07, "loss": 0.1885, "step": 382110 }, { "epoch": 148.4, "learning_rate": 2.1385113268608417e-07, "loss": 0.0098, "step": 382120 }, { "epoch": 148.4, "learning_rate": 2.1333333333333334e-07, "loss": 0.0089, "step": 382130 }, { "epoch": 148.4, "learning_rate": 2.1281553398058253e-07, "loss": 0.0076, "step": 382140 }, { "epoch": 148.41, "learning_rate": 2.1229773462783173e-07, "loss": 0.0675, "step": 382150 }, { "epoch": 148.41, "learning_rate": 2.1177993527508095e-07, "loss": 0.0841, "step": 382160 }, { "epoch": 148.42, "learning_rate": 2.1126213592233012e-07, "loss": 0.0697, "step": 382170 }, { "epoch": 148.42, "learning_rate": 2.107443365695793e-07, "loss": 0.0565, "step": 382180 }, { "epoch": 148.42, "learning_rate": 2.102265372168285e-07, "loss": 0.0007, "step": 382190 }, { "epoch": 148.43, "learning_rate": 2.0970873786407767e-07, "loss": 0.0011, "step": 382200 }, { "epoch": 148.43, "learning_rate": 2.0919093851132687e-07, "loss": 0.0093, "step": 382210 }, { "epoch": 148.43, "learning_rate": 2.0867313915857606e-07, "loss": 0.0303, "step": 382220 }, { "epoch": 148.44, "learning_rate": 2.0815533980582528e-07, "loss": 0.0189, "step": 382230 }, { "epoch": 148.44, "learning_rate": 2.0763754045307445e-07, "loss": 0.0215, "step": 382240 }, { "epoch": 148.45, "learning_rate": 2.0711974110032365e-07, "loss": 0.0296, "step": 382250 }, { "epoch": 148.45, "learning_rate": 2.0660194174757284e-07, "loss": 0.0462, "step": 382260 }, { "epoch": 148.45, "learning_rate": 2.06084142394822e-07, "loss": 0.0205, "step": 382270 }, { "epoch": 148.46, "learning_rate": 2.055663430420712e-07, "loss": 0.0523, "step": 382280 }, { "epoch": 148.46, "learning_rate": 2.050485436893204e-07, "loss": 0.006, "step": 382290 }, { "epoch": 148.47, "learning_rate": 2.0453074433656962e-07, "loss": 0.0964, "step": 382300 }, { "epoch": 148.47, "learning_rate": 2.0401294498381879e-07, "loss": 0.0002, "step": 382310 }, { "epoch": 148.47, "learning_rate": 2.0349514563106798e-07, "loss": 0.0992, "step": 382320 }, { "epoch": 148.48, "learning_rate": 2.0297734627831718e-07, "loss": 0.0007, "step": 382330 }, { "epoch": 148.48, "learning_rate": 2.0245954692556634e-07, "loss": 0.1296, "step": 382340 }, { "epoch": 148.49, "learning_rate": 2.0194174757281554e-07, "loss": 0.0038, "step": 382350 }, { "epoch": 148.49, "learning_rate": 2.0142394822006473e-07, "loss": 0.0286, "step": 382360 }, { "epoch": 148.49, "learning_rate": 2.0090614886731395e-07, "loss": 0.0093, "step": 382370 }, { "epoch": 148.5, "learning_rate": 2.0038834951456312e-07, "loss": 0.096, "step": 382380 }, { "epoch": 148.5, "learning_rate": 1.9987055016181232e-07, "loss": 0.0242, "step": 382390 }, { "epoch": 148.5, "learning_rate": 1.993527508090615e-07, "loss": 0.0189, "step": 382400 }, { "epoch": 148.51, "learning_rate": 1.9883495145631068e-07, "loss": 0.0589, "step": 382410 }, { "epoch": 148.51, "learning_rate": 1.9831715210355987e-07, "loss": 0.0676, "step": 382420 }, { "epoch": 148.52, "learning_rate": 1.9779935275080907e-07, "loss": 0.0209, "step": 382430 }, { "epoch": 148.52, "learning_rate": 1.972815533980583e-07, "loss": 0.0202, "step": 382440 }, { "epoch": 148.52, "learning_rate": 1.9676375404530746e-07, "loss": 0.0002, "step": 382450 }, { "epoch": 148.53, "learning_rate": 1.9624595469255665e-07, "loss": 0.0242, "step": 382460 }, { "epoch": 148.53, "learning_rate": 1.9572815533980585e-07, "loss": 0.0258, "step": 382470 }, { "epoch": 148.54, "learning_rate": 1.9521035598705501e-07, "loss": 0.1719, "step": 382480 }, { "epoch": 148.54, "learning_rate": 1.946925566343042e-07, "loss": 0.0001, "step": 382490 }, { "epoch": 148.54, "learning_rate": 1.941747572815534e-07, "loss": 0.0093, "step": 382500 }, { "epoch": 148.55, "learning_rate": 1.9365695792880262e-07, "loss": 0.078, "step": 382510 }, { "epoch": 148.55, "learning_rate": 1.931391585760518e-07, "loss": 0.0002, "step": 382520 }, { "epoch": 148.56, "learning_rate": 1.9262135922330099e-07, "loss": 0.0179, "step": 382530 }, { "epoch": 148.56, "learning_rate": 1.9210355987055018e-07, "loss": 0.0141, "step": 382540 }, { "epoch": 148.56, "learning_rate": 1.9158576051779935e-07, "loss": 0.0005, "step": 382550 }, { "epoch": 148.57, "learning_rate": 1.9106796116504854e-07, "loss": 0.0316, "step": 382560 }, { "epoch": 148.57, "learning_rate": 1.9055016181229774e-07, "loss": 0.0864, "step": 382570 }, { "epoch": 148.57, "learning_rate": 1.9003236245954696e-07, "loss": 0.0976, "step": 382580 }, { "epoch": 148.58, "learning_rate": 1.8951456310679613e-07, "loss": 0.0093, "step": 382590 }, { "epoch": 148.58, "learning_rate": 1.8899676375404532e-07, "loss": 0.0595, "step": 382600 }, { "epoch": 148.59, "learning_rate": 1.8847896440129452e-07, "loss": 0.1075, "step": 382610 }, { "epoch": 148.59, "learning_rate": 1.879611650485437e-07, "loss": 0.019, "step": 382620 }, { "epoch": 148.59, "learning_rate": 1.8744336569579288e-07, "loss": 0.064, "step": 382630 }, { "epoch": 148.6, "learning_rate": 1.8692556634304207e-07, "loss": 0.02, "step": 382640 }, { "epoch": 148.6, "learning_rate": 1.864077669902913e-07, "loss": 0.0001, "step": 382650 }, { "epoch": 148.61, "learning_rate": 1.858899676375405e-07, "loss": 0.0687, "step": 382660 }, { "epoch": 148.61, "learning_rate": 1.8537216828478966e-07, "loss": 0.0196, "step": 382670 }, { "epoch": 148.61, "learning_rate": 1.8485436893203885e-07, "loss": 0.0533, "step": 382680 }, { "epoch": 148.62, "learning_rate": 1.8433656957928805e-07, "loss": 0.0171, "step": 382690 }, { "epoch": 148.62, "learning_rate": 1.8381877022653721e-07, "loss": 0.0206, "step": 382700 }, { "epoch": 148.63, "learning_rate": 1.833009708737864e-07, "loss": 0.0002, "step": 382710 }, { "epoch": 148.63, "learning_rate": 1.8278317152103563e-07, "loss": 0.0528, "step": 382720 }, { "epoch": 148.63, "learning_rate": 1.8226537216828482e-07, "loss": 0.0976, "step": 382730 }, { "epoch": 148.64, "learning_rate": 1.81747572815534e-07, "loss": 0.0373, "step": 382740 }, { "epoch": 148.64, "learning_rate": 1.8122977346278319e-07, "loss": 0.0041, "step": 382750 }, { "epoch": 148.64, "learning_rate": 1.8071197411003238e-07, "loss": 0.008, "step": 382760 }, { "epoch": 148.65, "learning_rate": 1.8019417475728155e-07, "loss": 0.0583, "step": 382770 }, { "epoch": 148.65, "learning_rate": 1.7967637540453074e-07, "loss": 0.0013, "step": 382780 }, { "epoch": 148.66, "learning_rate": 1.7915857605177996e-07, "loss": 0.0003, "step": 382790 }, { "epoch": 148.66, "learning_rate": 1.7864077669902916e-07, "loss": 0.0191, "step": 382800 }, { "epoch": 148.66, "learning_rate": 1.7812297734627833e-07, "loss": 0.0003, "step": 382810 }, { "epoch": 148.67, "learning_rate": 1.7760517799352752e-07, "loss": 0.0262, "step": 382820 }, { "epoch": 148.67, "learning_rate": 1.7708737864077672e-07, "loss": 0.0561, "step": 382830 }, { "epoch": 148.68, "learning_rate": 1.7656957928802588e-07, "loss": 0.0004, "step": 382840 }, { "epoch": 148.68, "learning_rate": 1.7605177993527508e-07, "loss": 0.0523, "step": 382850 }, { "epoch": 148.68, "learning_rate": 1.755339805825243e-07, "loss": 0.0018, "step": 382860 }, { "epoch": 148.69, "learning_rate": 1.750161812297735e-07, "loss": 0.1185, "step": 382870 }, { "epoch": 148.69, "learning_rate": 1.7449838187702266e-07, "loss": 0.0004, "step": 382880 }, { "epoch": 148.7, "learning_rate": 1.7398058252427186e-07, "loss": 0.0086, "step": 382890 }, { "epoch": 148.7, "learning_rate": 1.7346278317152105e-07, "loss": 0.0994, "step": 382900 }, { "epoch": 148.7, "learning_rate": 1.7294498381877022e-07, "loss": 0.049, "step": 382910 }, { "epoch": 148.71, "learning_rate": 1.7242718446601941e-07, "loss": 0.0157, "step": 382920 }, { "epoch": 148.71, "learning_rate": 1.7190938511326864e-07, "loss": 0.1754, "step": 382930 }, { "epoch": 148.71, "learning_rate": 1.7139158576051783e-07, "loss": 0.0018, "step": 382940 }, { "epoch": 148.72, "learning_rate": 1.70873786407767e-07, "loss": 0.0006, "step": 382950 }, { "epoch": 148.72, "learning_rate": 1.703559870550162e-07, "loss": 0.0004, "step": 382960 }, { "epoch": 148.73, "learning_rate": 1.698381877022654e-07, "loss": 0.0663, "step": 382970 }, { "epoch": 148.73, "learning_rate": 1.6932038834951455e-07, "loss": 0.0083, "step": 382980 }, { "epoch": 148.73, "learning_rate": 1.6880258899676375e-07, "loss": 0.0176, "step": 382990 }, { "epoch": 148.74, "learning_rate": 1.6828478964401297e-07, "loss": 0.001, "step": 383000 }, { "epoch": 148.74, "learning_rate": 1.6776699029126216e-07, "loss": 0.0995, "step": 383010 }, { "epoch": 148.75, "learning_rate": 1.6724919093851133e-07, "loss": 0.0127, "step": 383020 }, { "epoch": 148.75, "learning_rate": 1.6673139158576053e-07, "loss": 0.1334, "step": 383030 }, { "epoch": 148.75, "learning_rate": 1.6621359223300972e-07, "loss": 0.0061, "step": 383040 }, { "epoch": 148.76, "learning_rate": 1.656957928802589e-07, "loss": 0.0115, "step": 383050 }, { "epoch": 148.76, "learning_rate": 1.6517799352750808e-07, "loss": 0.0948, "step": 383060 }, { "epoch": 148.77, "learning_rate": 1.646601941747573e-07, "loss": 0.0196, "step": 383070 }, { "epoch": 148.77, "learning_rate": 1.641423948220065e-07, "loss": 0.0234, "step": 383080 }, { "epoch": 148.77, "learning_rate": 1.6362459546925567e-07, "loss": 0.0668, "step": 383090 }, { "epoch": 148.78, "learning_rate": 1.6310679611650486e-07, "loss": 0.0667, "step": 383100 }, { "epoch": 148.78, "learning_rate": 1.6258899676375406e-07, "loss": 0.0426, "step": 383110 }, { "epoch": 148.78, "learning_rate": 1.6207119741100323e-07, "loss": 0.1047, "step": 383120 }, { "epoch": 148.79, "learning_rate": 1.6155339805825242e-07, "loss": 0.0021, "step": 383130 }, { "epoch": 148.79, "learning_rate": 1.6103559870550164e-07, "loss": 0.0086, "step": 383140 }, { "epoch": 148.8, "learning_rate": 1.6051779935275084e-07, "loss": 0.1403, "step": 383150 }, { "epoch": 148.8, "learning_rate": 1.6e-07, "loss": 0.032, "step": 383160 }, { "epoch": 148.8, "learning_rate": 1.594822006472492e-07, "loss": 0.0073, "step": 383170 }, { "epoch": 148.81, "learning_rate": 1.589644012944984e-07, "loss": 0.0005, "step": 383180 }, { "epoch": 148.81, "learning_rate": 1.584466019417476e-07, "loss": 0.0004, "step": 383190 }, { "epoch": 148.82, "learning_rate": 1.5792880258899675e-07, "loss": 0.1269, "step": 383200 }, { "epoch": 148.82, "learning_rate": 1.5741100323624598e-07, "loss": 0.0881, "step": 383210 }, { "epoch": 148.82, "learning_rate": 1.5689320388349517e-07, "loss": 0.0052, "step": 383220 }, { "epoch": 148.83, "learning_rate": 1.5637540453074437e-07, "loss": 0.1057, "step": 383230 }, { "epoch": 148.83, "learning_rate": 1.5585760517799353e-07, "loss": 0.0481, "step": 383240 }, { "epoch": 148.83, "learning_rate": 1.5533980582524273e-07, "loss": 0.0001, "step": 383250 }, { "epoch": 148.84, "learning_rate": 1.5482200647249192e-07, "loss": 0.0004, "step": 383260 }, { "epoch": 148.84, "learning_rate": 1.5430420711974112e-07, "loss": 0.0384, "step": 383270 }, { "epoch": 148.85, "learning_rate": 1.537864077669903e-07, "loss": 0.046, "step": 383280 }, { "epoch": 148.85, "learning_rate": 1.532686084142395e-07, "loss": 0.0151, "step": 383290 }, { "epoch": 148.85, "learning_rate": 1.527508090614887e-07, "loss": 0.0043, "step": 383300 }, { "epoch": 148.86, "learning_rate": 1.5223300970873787e-07, "loss": 0.0268, "step": 383310 }, { "epoch": 148.86, "learning_rate": 1.5171521035598706e-07, "loss": 0.0002, "step": 383320 }, { "epoch": 148.87, "learning_rate": 1.5119741100323626e-07, "loss": 0.0474, "step": 383330 }, { "epoch": 148.87, "learning_rate": 1.5067961165048545e-07, "loss": 0.0045, "step": 383340 }, { "epoch": 148.87, "learning_rate": 1.5016181229773465e-07, "loss": 0.0001, "step": 383350 }, { "epoch": 148.88, "learning_rate": 1.4964401294498384e-07, "loss": 0.0082, "step": 383360 }, { "epoch": 148.88, "learning_rate": 1.4912621359223304e-07, "loss": 0.1111, "step": 383370 }, { "epoch": 148.89, "learning_rate": 1.486084142394822e-07, "loss": 0.0223, "step": 383380 }, { "epoch": 148.89, "learning_rate": 1.480906148867314e-07, "loss": 0.0164, "step": 383390 }, { "epoch": 148.89, "learning_rate": 1.475728155339806e-07, "loss": 0.0011, "step": 383400 }, { "epoch": 148.9, "learning_rate": 1.470550161812298e-07, "loss": 0.0467, "step": 383410 }, { "epoch": 148.9, "learning_rate": 1.4653721682847898e-07, "loss": 0.0174, "step": 383420 }, { "epoch": 148.9, "learning_rate": 1.4601941747572818e-07, "loss": 0.0906, "step": 383430 }, { "epoch": 148.91, "learning_rate": 1.4550161812297737e-07, "loss": 0.0042, "step": 383440 }, { "epoch": 148.91, "learning_rate": 1.4498381877022654e-07, "loss": 0.0113, "step": 383450 }, { "epoch": 148.92, "learning_rate": 1.4446601941747573e-07, "loss": 0.0667, "step": 383460 }, { "epoch": 148.92, "learning_rate": 1.4394822006472493e-07, "loss": 0.1573, "step": 383470 }, { "epoch": 148.92, "learning_rate": 1.4343042071197412e-07, "loss": 0.0584, "step": 383480 }, { "epoch": 148.93, "learning_rate": 1.4291262135922332e-07, "loss": 0.002, "step": 383490 }, { "epoch": 148.93, "learning_rate": 1.423948220064725e-07, "loss": 0.0086, "step": 383500 }, { "epoch": 148.94, "learning_rate": 1.418770226537217e-07, "loss": 0.0396, "step": 383510 }, { "epoch": 148.94, "learning_rate": 1.4135922330097087e-07, "loss": 0.1335, "step": 383520 }, { "epoch": 148.94, "learning_rate": 1.4084142394822007e-07, "loss": 0.0043, "step": 383530 }, { "epoch": 148.95, "learning_rate": 1.4032362459546926e-07, "loss": 0.0777, "step": 383540 }, { "epoch": 148.95, "learning_rate": 1.3980582524271846e-07, "loss": 0.0278, "step": 383550 }, { "epoch": 148.96, "learning_rate": 1.3928802588996765e-07, "loss": 0.0299, "step": 383560 }, { "epoch": 148.96, "learning_rate": 1.3877022653721685e-07, "loss": 0.0251, "step": 383570 }, { "epoch": 148.96, "learning_rate": 1.3825242718446604e-07, "loss": 0.0501, "step": 383580 }, { "epoch": 148.97, "learning_rate": 1.377346278317152e-07, "loss": 0.1168, "step": 383590 }, { "epoch": 148.97, "learning_rate": 1.372168284789644e-07, "loss": 0.0291, "step": 383600 }, { "epoch": 148.97, "learning_rate": 1.366990291262136e-07, "loss": 0.0004, "step": 383610 }, { "epoch": 148.98, "learning_rate": 1.361812297734628e-07, "loss": 0.0239, "step": 383620 }, { "epoch": 148.98, "learning_rate": 1.35663430420712e-07, "loss": 0.0778, "step": 383630 }, { "epoch": 148.99, "learning_rate": 1.3514563106796118e-07, "loss": 0.1128, "step": 383640 }, { "epoch": 148.99, "learning_rate": 1.3462783171521038e-07, "loss": 0.001, "step": 383650 }, { "epoch": 148.99, "learning_rate": 1.3411003236245954e-07, "loss": 0.0071, "step": 383660 }, { "epoch": 149.0, "learning_rate": 1.3359223300970874e-07, "loss": 0.0927, "step": 383670 }, { "epoch": 149.0, "eval_accuracy": 0.9526822558459422, "eval_loss": 0.3894926905632019, "eval_runtime": 8.2902, "eval_samples_per_second": 438.468, "eval_steps_per_second": 54.884, "step": 383675 }, { "epoch": 149.0, "learning_rate": 1.3307443365695793e-07, "loss": 0.0071, "step": 383680 }, { "epoch": 149.01, "learning_rate": 1.3255663430420713e-07, "loss": 0.0002, "step": 383690 }, { "epoch": 149.01, "learning_rate": 1.3203883495145632e-07, "loss": 0.0561, "step": 383700 }, { "epoch": 149.01, "learning_rate": 1.3152103559870552e-07, "loss": 0.0316, "step": 383710 }, { "epoch": 149.02, "learning_rate": 1.310032362459547e-07, "loss": 0.0011, "step": 383720 }, { "epoch": 149.02, "learning_rate": 1.304854368932039e-07, "loss": 0.0542, "step": 383730 }, { "epoch": 149.03, "learning_rate": 1.2996763754045307e-07, "loss": 0.0003, "step": 383740 }, { "epoch": 149.03, "learning_rate": 1.294498381877023e-07, "loss": 0.0069, "step": 383750 }, { "epoch": 149.03, "learning_rate": 1.2893203883495146e-07, "loss": 0.039, "step": 383760 }, { "epoch": 149.04, "learning_rate": 1.2841423948220066e-07, "loss": 0.0189, "step": 383770 }, { "epoch": 149.04, "learning_rate": 1.2789644012944985e-07, "loss": 0.0062, "step": 383780 }, { "epoch": 149.04, "learning_rate": 1.2737864077669905e-07, "loss": 0.0702, "step": 383790 }, { "epoch": 149.05, "learning_rate": 1.2686084142394824e-07, "loss": 0.05, "step": 383800 }, { "epoch": 149.05, "learning_rate": 1.263430420711974e-07, "loss": 0.0028, "step": 383810 }, { "epoch": 149.06, "learning_rate": 1.2582524271844663e-07, "loss": 0.0087, "step": 383820 }, { "epoch": 149.06, "learning_rate": 1.253074433656958e-07, "loss": 0.0922, "step": 383830 }, { "epoch": 149.06, "learning_rate": 1.24789644012945e-07, "loss": 0.0092, "step": 383840 }, { "epoch": 149.07, "learning_rate": 1.242718446601942e-07, "loss": 0.1135, "step": 383850 }, { "epoch": 149.07, "learning_rate": 1.2375404530744338e-07, "loss": 0.0081, "step": 383860 }, { "epoch": 149.08, "learning_rate": 1.2323624595469258e-07, "loss": 0.0368, "step": 383870 }, { "epoch": 149.08, "learning_rate": 1.2271844660194174e-07, "loss": 0.0247, "step": 383880 }, { "epoch": 149.08, "learning_rate": 1.2220064724919097e-07, "loss": 0.0755, "step": 383890 }, { "epoch": 149.09, "learning_rate": 1.2168284789644013e-07, "loss": 0.0476, "step": 383900 }, { "epoch": 149.09, "learning_rate": 1.2116504854368933e-07, "loss": 0.0722, "step": 383910 }, { "epoch": 149.1, "learning_rate": 1.2064724919093852e-07, "loss": 0.0091, "step": 383920 }, { "epoch": 149.1, "learning_rate": 1.2012944983818772e-07, "loss": 0.0819, "step": 383930 }, { "epoch": 149.1, "learning_rate": 1.196116504854369e-07, "loss": 0.0855, "step": 383940 }, { "epoch": 149.11, "learning_rate": 1.1909385113268609e-07, "loss": 0.0012, "step": 383950 }, { "epoch": 149.11, "learning_rate": 1.1857605177993529e-07, "loss": 0.0069, "step": 383960 }, { "epoch": 149.11, "learning_rate": 1.1805825242718448e-07, "loss": 0.0384, "step": 383970 }, { "epoch": 149.12, "learning_rate": 1.1754045307443366e-07, "loss": 0.0369, "step": 383980 }, { "epoch": 149.12, "learning_rate": 1.1702265372168284e-07, "loss": 0.0289, "step": 383990 }, { "epoch": 149.13, "learning_rate": 1.1650485436893205e-07, "loss": 0.0401, "step": 384000 }, { "epoch": 149.13, "learning_rate": 1.1598705501618123e-07, "loss": 0.0492, "step": 384010 }, { "epoch": 149.13, "learning_rate": 1.1546925566343043e-07, "loss": 0.0115, "step": 384020 }, { "epoch": 149.14, "learning_rate": 1.1495145631067962e-07, "loss": 0.0217, "step": 384030 }, { "epoch": 149.14, "learning_rate": 1.1443365695792882e-07, "loss": 0.0003, "step": 384040 }, { "epoch": 149.15, "learning_rate": 1.13915857605178e-07, "loss": 0.013, "step": 384050 }, { "epoch": 149.15, "learning_rate": 1.1339805825242718e-07, "loss": 0.0089, "step": 384060 }, { "epoch": 149.15, "learning_rate": 1.1288025889967639e-07, "loss": 0.0876, "step": 384070 }, { "epoch": 149.16, "learning_rate": 1.1236245954692557e-07, "loss": 0.0049, "step": 384080 }, { "epoch": 149.16, "learning_rate": 1.1184466019417476e-07, "loss": 0.0454, "step": 384090 }, { "epoch": 149.17, "learning_rate": 1.1132686084142396e-07, "loss": 0.2019, "step": 384100 }, { "epoch": 149.17, "learning_rate": 1.1080906148867315e-07, "loss": 0.0144, "step": 384110 }, { "epoch": 149.17, "learning_rate": 1.1029126213592233e-07, "loss": 0.0885, "step": 384120 }, { "epoch": 149.18, "learning_rate": 1.0977346278317151e-07, "loss": 0.0213, "step": 384130 }, { "epoch": 149.18, "learning_rate": 1.0925566343042072e-07, "loss": 0.0161, "step": 384140 }, { "epoch": 149.18, "learning_rate": 1.0873786407766992e-07, "loss": 0.0266, "step": 384150 }, { "epoch": 149.19, "learning_rate": 1.082200647249191e-07, "loss": 0.0726, "step": 384160 }, { "epoch": 149.19, "learning_rate": 1.077022653721683e-07, "loss": 0.0188, "step": 384170 }, { "epoch": 149.2, "learning_rate": 1.0718446601941749e-07, "loss": 0.0638, "step": 384180 }, { "epoch": 149.2, "learning_rate": 1.0666666666666667e-07, "loss": 0.0919, "step": 384190 }, { "epoch": 149.2, "learning_rate": 1.0614886731391586e-07, "loss": 0.0624, "step": 384200 }, { "epoch": 149.21, "learning_rate": 1.0563106796116506e-07, "loss": 0.0261, "step": 384210 }, { "epoch": 149.21, "learning_rate": 1.0511326860841425e-07, "loss": 0.0323, "step": 384220 }, { "epoch": 149.22, "learning_rate": 1.0459546925566343e-07, "loss": 0.0532, "step": 384230 }, { "epoch": 149.22, "learning_rate": 1.0407766990291264e-07, "loss": 0.0402, "step": 384240 }, { "epoch": 149.22, "learning_rate": 1.0355987055016182e-07, "loss": 0.0184, "step": 384250 }, { "epoch": 149.23, "learning_rate": 1.03042071197411e-07, "loss": 0.0994, "step": 384260 }, { "epoch": 149.23, "learning_rate": 1.025242718446602e-07, "loss": 0.0279, "step": 384270 }, { "epoch": 149.23, "learning_rate": 1.0200647249190939e-07, "loss": 0.0001, "step": 384280 }, { "epoch": 149.24, "learning_rate": 1.0148867313915859e-07, "loss": 0.0239, "step": 384290 }, { "epoch": 149.24, "learning_rate": 1.0097087378640777e-07, "loss": 0.0526, "step": 384300 }, { "epoch": 149.25, "learning_rate": 1.0045307443365698e-07, "loss": 0.0683, "step": 384310 }, { "epoch": 149.25, "learning_rate": 9.993527508090616e-08, "loss": 0.0146, "step": 384320 }, { "epoch": 149.25, "learning_rate": 9.941747572815534e-08, "loss": 0.0891, "step": 384330 }, { "epoch": 149.26, "learning_rate": 9.889967637540453e-08, "loss": 0.0184, "step": 384340 }, { "epoch": 149.26, "learning_rate": 9.838187702265373e-08, "loss": 0.0872, "step": 384350 }, { "epoch": 149.27, "learning_rate": 9.786407766990292e-08, "loss": 0.0021, "step": 384360 }, { "epoch": 149.27, "learning_rate": 9.73462783171521e-08, "loss": 0.1018, "step": 384370 }, { "epoch": 149.27, "learning_rate": 9.682847896440131e-08, "loss": 0.0648, "step": 384380 }, { "epoch": 149.28, "learning_rate": 9.631067961165049e-08, "loss": 0.0001, "step": 384390 }, { "epoch": 149.28, "learning_rate": 9.579288025889967e-08, "loss": 0.0027, "step": 384400 }, { "epoch": 149.29, "learning_rate": 9.527508090614887e-08, "loss": 0.035, "step": 384410 }, { "epoch": 149.29, "learning_rate": 9.475728155339806e-08, "loss": 0.0249, "step": 384420 }, { "epoch": 149.29, "learning_rate": 9.423948220064726e-08, "loss": 0.0179, "step": 384430 }, { "epoch": 149.3, "learning_rate": 9.372168284789644e-08, "loss": 0.0224, "step": 384440 }, { "epoch": 149.3, "learning_rate": 9.320388349514565e-08, "loss": 0.0859, "step": 384450 }, { "epoch": 149.3, "learning_rate": 9.268608414239483e-08, "loss": 0.0819, "step": 384460 }, { "epoch": 149.31, "learning_rate": 9.216828478964402e-08, "loss": 0.0005, "step": 384470 }, { "epoch": 149.31, "learning_rate": 9.16504854368932e-08, "loss": 0.0016, "step": 384480 }, { "epoch": 149.32, "learning_rate": 9.113268608414241e-08, "loss": 0.0509, "step": 384490 }, { "epoch": 149.32, "learning_rate": 9.061488673139159e-08, "loss": 0.0106, "step": 384500 }, { "epoch": 149.32, "learning_rate": 9.009708737864077e-08, "loss": 0.0269, "step": 384510 }, { "epoch": 149.33, "learning_rate": 8.957928802588998e-08, "loss": 0.0981, "step": 384520 }, { "epoch": 149.33, "learning_rate": 8.906148867313916e-08, "loss": 0.0411, "step": 384530 }, { "epoch": 149.34, "learning_rate": 8.854368932038836e-08, "loss": 0.0137, "step": 384540 }, { "epoch": 149.34, "learning_rate": 8.802588996763754e-08, "loss": 0.0411, "step": 384550 }, { "epoch": 149.34, "learning_rate": 8.750809061488675e-08, "loss": 0.0108, "step": 384560 }, { "epoch": 149.35, "learning_rate": 8.699029126213593e-08, "loss": 0.022, "step": 384570 }, { "epoch": 149.35, "learning_rate": 8.647249190938511e-08, "loss": 0.01, "step": 384580 }, { "epoch": 149.36, "learning_rate": 8.595469255663432e-08, "loss": 0.02, "step": 384590 }, { "epoch": 149.36, "learning_rate": 8.54368932038835e-08, "loss": 0.077, "step": 384600 }, { "epoch": 149.36, "learning_rate": 8.49190938511327e-08, "loss": 0.051, "step": 384610 }, { "epoch": 149.37, "learning_rate": 8.440129449838187e-08, "loss": 0.034, "step": 384620 }, { "epoch": 149.37, "learning_rate": 8.388349514563108e-08, "loss": 0.1025, "step": 384630 }, { "epoch": 149.37, "learning_rate": 8.336569579288026e-08, "loss": 0.0199, "step": 384640 }, { "epoch": 149.38, "learning_rate": 8.284789644012945e-08, "loss": 0.0146, "step": 384650 }, { "epoch": 149.38, "learning_rate": 8.233009708737865e-08, "loss": 0.0195, "step": 384660 }, { "epoch": 149.39, "learning_rate": 8.181229773462783e-08, "loss": 0.0546, "step": 384670 }, { "epoch": 149.39, "learning_rate": 8.129449838187703e-08, "loss": 0.0981, "step": 384680 }, { "epoch": 149.39, "learning_rate": 8.077669902912621e-08, "loss": 0.0003, "step": 384690 }, { "epoch": 149.4, "learning_rate": 8.025889967637542e-08, "loss": 0.0019, "step": 384700 }, { "epoch": 149.4, "learning_rate": 7.97411003236246e-08, "loss": 0.0089, "step": 384710 }, { "epoch": 149.41, "learning_rate": 7.92233009708738e-08, "loss": 0.0088, "step": 384720 }, { "epoch": 149.41, "learning_rate": 7.870550161812299e-08, "loss": 0.0756, "step": 384730 }, { "epoch": 149.41, "learning_rate": 7.818770226537218e-08, "loss": 0.0001, "step": 384740 }, { "epoch": 149.42, "learning_rate": 7.766990291262136e-08, "loss": 0.0062, "step": 384750 }, { "epoch": 149.42, "learning_rate": 7.715210355987056e-08, "loss": 0.1209, "step": 384760 }, { "epoch": 149.43, "learning_rate": 7.663430420711975e-08, "loss": 0.0163, "step": 384770 }, { "epoch": 149.43, "learning_rate": 7.611650485436893e-08, "loss": 0.0443, "step": 384780 }, { "epoch": 149.43, "learning_rate": 7.559870550161813e-08, "loss": 0.0282, "step": 384790 }, { "epoch": 149.44, "learning_rate": 7.508090614886732e-08, "loss": 0.0002, "step": 384800 }, { "epoch": 149.44, "learning_rate": 7.456310679611652e-08, "loss": 0.0461, "step": 384810 }, { "epoch": 149.44, "learning_rate": 7.40453074433657e-08, "loss": 0.0383, "step": 384820 }, { "epoch": 149.45, "learning_rate": 7.35275080906149e-08, "loss": 0.093, "step": 384830 }, { "epoch": 149.45, "learning_rate": 7.300970873786409e-08, "loss": 0.0355, "step": 384840 }, { "epoch": 149.46, "learning_rate": 7.249190938511327e-08, "loss": 0.0733, "step": 384850 }, { "epoch": 149.46, "learning_rate": 7.197411003236246e-08, "loss": 0.0282, "step": 384860 }, { "epoch": 149.46, "learning_rate": 7.145631067961166e-08, "loss": 0.0019, "step": 384870 }, { "epoch": 149.47, "learning_rate": 7.093851132686085e-08, "loss": 0.0023, "step": 384880 }, { "epoch": 149.47, "learning_rate": 7.042071197411003e-08, "loss": 0.1254, "step": 384890 }, { "epoch": 149.48, "learning_rate": 6.990291262135923e-08, "loss": 0.0028, "step": 384900 }, { "epoch": 149.48, "learning_rate": 6.938511326860842e-08, "loss": 0.0228, "step": 384910 }, { "epoch": 149.48, "learning_rate": 6.88673139158576e-08, "loss": 0.0903, "step": 384920 }, { "epoch": 149.49, "learning_rate": 6.83495145631068e-08, "loss": 0.0107, "step": 384930 }, { "epoch": 149.49, "learning_rate": 6.7831715210356e-08, "loss": 0.0243, "step": 384940 }, { "epoch": 149.5, "learning_rate": 6.731391585760519e-08, "loss": 0.0263, "step": 384950 }, { "epoch": 149.5, "learning_rate": 6.679611650485437e-08, "loss": 0.0014, "step": 384960 }, { "epoch": 149.5, "learning_rate": 6.627831715210356e-08, "loss": 0.0949, "step": 384970 }, { "epoch": 149.51, "learning_rate": 6.576051779935276e-08, "loss": 0.1516, "step": 384980 }, { "epoch": 149.51, "learning_rate": 6.524271844660195e-08, "loss": 0.0726, "step": 384990 }, { "epoch": 149.51, "learning_rate": 6.472491909385115e-08, "loss": 0.0034, "step": 385000 }, { "epoch": 149.52, "learning_rate": 6.420711974110033e-08, "loss": 0.0615, "step": 385010 }, { "epoch": 149.52, "learning_rate": 6.368932038834952e-08, "loss": 0.0002, "step": 385020 }, { "epoch": 149.53, "learning_rate": 6.31715210355987e-08, "loss": 0.0001, "step": 385030 }, { "epoch": 149.53, "learning_rate": 6.26537216828479e-08, "loss": 0.0415, "step": 385040 }, { "epoch": 149.53, "learning_rate": 6.21359223300971e-08, "loss": 0.1139, "step": 385050 }, { "epoch": 149.54, "learning_rate": 6.161812297734629e-08, "loss": 0.0566, "step": 385060 }, { "epoch": 149.54, "learning_rate": 6.110032362459548e-08, "loss": 0.0781, "step": 385070 }, { "epoch": 149.55, "learning_rate": 6.058252427184466e-08, "loss": 0.0006, "step": 385080 }, { "epoch": 149.55, "learning_rate": 6.006472491909386e-08, "loss": 0.0261, "step": 385090 }, { "epoch": 149.55, "learning_rate": 5.9546925566343046e-08, "loss": 0.0139, "step": 385100 }, { "epoch": 149.56, "learning_rate": 5.902912621359224e-08, "loss": 0.0491, "step": 385110 }, { "epoch": 149.56, "learning_rate": 5.851132686084142e-08, "loss": 0.049, "step": 385120 }, { "epoch": 149.57, "learning_rate": 5.799352750809062e-08, "loss": 0.0214, "step": 385130 }, { "epoch": 149.57, "learning_rate": 5.747572815533981e-08, "loss": 0.0196, "step": 385140 }, { "epoch": 149.57, "learning_rate": 5.6957928802589e-08, "loss": 0.0187, "step": 385150 }, { "epoch": 149.58, "learning_rate": 5.6440129449838194e-08, "loss": 0.0301, "step": 385160 }, { "epoch": 149.58, "learning_rate": 5.592233009708738e-08, "loss": 0.0224, "step": 385170 }, { "epoch": 149.58, "learning_rate": 5.5404530744336576e-08, "loss": 0.0094, "step": 385180 }, { "epoch": 149.59, "learning_rate": 5.488673139158576e-08, "loss": 0.0006, "step": 385190 }, { "epoch": 149.59, "learning_rate": 5.436893203883496e-08, "loss": 0.0128, "step": 385200 }, { "epoch": 149.6, "learning_rate": 5.385113268608415e-08, "loss": 0.0717, "step": 385210 }, { "epoch": 149.6, "learning_rate": 5.3333333333333334e-08, "loss": 0.0061, "step": 385220 }, { "epoch": 149.6, "learning_rate": 5.281553398058253e-08, "loss": 0.0277, "step": 385230 }, { "epoch": 149.61, "learning_rate": 5.229773462783172e-08, "loss": 0.0217, "step": 385240 }, { "epoch": 149.61, "learning_rate": 5.177993527508091e-08, "loss": 0.0904, "step": 385250 }, { "epoch": 149.62, "learning_rate": 5.12621359223301e-08, "loss": 0.0011, "step": 385260 }, { "epoch": 149.62, "learning_rate": 5.0744336569579294e-08, "loss": 0.1023, "step": 385270 }, { "epoch": 149.62, "learning_rate": 5.022653721682849e-08, "loss": 0.0782, "step": 385280 }, { "epoch": 149.63, "learning_rate": 4.970873786407767e-08, "loss": 0.0149, "step": 385290 }, { "epoch": 149.63, "learning_rate": 4.9190938511326864e-08, "loss": 0.0322, "step": 385300 }, { "epoch": 149.63, "learning_rate": 4.867313915857605e-08, "loss": 0.0009, "step": 385310 }, { "epoch": 149.64, "learning_rate": 4.8155339805825247e-08, "loss": 0.0572, "step": 385320 }, { "epoch": 149.64, "learning_rate": 4.7637540453074435e-08, "loss": 0.1074, "step": 385330 }, { "epoch": 149.65, "learning_rate": 4.711974110032363e-08, "loss": 0.047, "step": 385340 }, { "epoch": 149.65, "learning_rate": 4.6601941747572824e-08, "loss": 0.1158, "step": 385350 }, { "epoch": 149.65, "learning_rate": 4.608414239482201e-08, "loss": 0.0077, "step": 385360 }, { "epoch": 149.66, "learning_rate": 4.5566343042071206e-08, "loss": 0.0008, "step": 385370 }, { "epoch": 149.66, "learning_rate": 4.504854368932039e-08, "loss": 0.061, "step": 385380 }, { "epoch": 149.67, "learning_rate": 4.453074433656958e-08, "loss": 0.0002, "step": 385390 }, { "epoch": 149.67, "learning_rate": 4.401294498381877e-08, "loss": 0.0389, "step": 385400 }, { "epoch": 149.67, "learning_rate": 4.3495145631067964e-08, "loss": 0.018, "step": 385410 }, { "epoch": 149.68, "learning_rate": 4.297734627831716e-08, "loss": 0.0214, "step": 385420 }, { "epoch": 149.68, "learning_rate": 4.245954692556635e-08, "loss": 0.0568, "step": 385430 }, { "epoch": 149.69, "learning_rate": 4.194174757281554e-08, "loss": 0.0089, "step": 385440 }, { "epoch": 149.69, "learning_rate": 4.142394822006472e-08, "loss": 0.0315, "step": 385450 }, { "epoch": 149.69, "learning_rate": 4.090614886731392e-08, "loss": 0.0376, "step": 385460 }, { "epoch": 149.7, "learning_rate": 4.0388349514563105e-08, "loss": 0.0099, "step": 385470 }, { "epoch": 149.7, "learning_rate": 3.98705501618123e-08, "loss": 0.0458, "step": 385480 }, { "epoch": 149.7, "learning_rate": 3.9352750809061494e-08, "loss": 0.144, "step": 385490 }, { "epoch": 149.71, "learning_rate": 3.883495145631068e-08, "loss": 0.0093, "step": 385500 }, { "epoch": 149.71, "learning_rate": 3.8317152103559876e-08, "loss": 0.0168, "step": 385510 }, { "epoch": 149.72, "learning_rate": 3.7799352750809064e-08, "loss": 0.0092, "step": 385520 }, { "epoch": 149.72, "learning_rate": 3.728155339805826e-08, "loss": 0.0076, "step": 385530 }, { "epoch": 149.72, "learning_rate": 3.676375404530745e-08, "loss": 0.0669, "step": 385540 }, { "epoch": 149.73, "learning_rate": 3.6245954692556635e-08, "loss": 0.0656, "step": 385550 }, { "epoch": 149.73, "learning_rate": 3.572815533980583e-08, "loss": 0.0634, "step": 385560 }, { "epoch": 149.74, "learning_rate": 3.521035598705502e-08, "loss": 0.0448, "step": 385570 }, { "epoch": 149.74, "learning_rate": 3.469255663430421e-08, "loss": 0.0249, "step": 385580 }, { "epoch": 149.74, "learning_rate": 3.41747572815534e-08, "loss": 0.0176, "step": 385590 }, { "epoch": 149.75, "learning_rate": 3.3656957928802594e-08, "loss": 0.0431, "step": 385600 }, { "epoch": 149.75, "learning_rate": 3.313915857605178e-08, "loss": 0.2028, "step": 385610 }, { "epoch": 149.76, "learning_rate": 3.2621359223300977e-08, "loss": 0.0553, "step": 385620 }, { "epoch": 149.76, "learning_rate": 3.2103559870550164e-08, "loss": 0.0001, "step": 385630 }, { "epoch": 149.76, "learning_rate": 3.158576051779935e-08, "loss": 0.0084, "step": 385640 }, { "epoch": 149.77, "learning_rate": 3.106796116504855e-08, "loss": 0.0864, "step": 385650 }, { "epoch": 149.77, "learning_rate": 3.055016181229774e-08, "loss": 0.0001, "step": 385660 }, { "epoch": 149.77, "learning_rate": 3.003236245954693e-08, "loss": 0.0291, "step": 385670 }, { "epoch": 149.78, "learning_rate": 2.951456310679612e-08, "loss": 0.0083, "step": 385680 }, { "epoch": 149.78, "learning_rate": 2.899676375404531e-08, "loss": 0.0109, "step": 385690 }, { "epoch": 149.79, "learning_rate": 2.84789644012945e-08, "loss": 0.1049, "step": 385700 }, { "epoch": 149.79, "learning_rate": 2.796116504854369e-08, "loss": 0.0001, "step": 385710 }, { "epoch": 149.79, "learning_rate": 2.744336569579288e-08, "loss": 0.0181, "step": 385720 }, { "epoch": 149.8, "learning_rate": 2.6925566343042077e-08, "loss": 0.0604, "step": 385730 }, { "epoch": 149.8, "learning_rate": 2.6407766990291264e-08, "loss": 0.0322, "step": 385740 }, { "epoch": 149.81, "learning_rate": 2.5889967637540456e-08, "loss": 0.0326, "step": 385750 }, { "epoch": 149.81, "learning_rate": 2.5372168284789647e-08, "loss": 0.1298, "step": 385760 }, { "epoch": 149.81, "learning_rate": 2.4854368932038835e-08, "loss": 0.0994, "step": 385770 }, { "epoch": 149.82, "learning_rate": 2.4336569579288026e-08, "loss": 0.0263, "step": 385780 }, { "epoch": 149.82, "learning_rate": 2.3818770226537217e-08, "loss": 0.0039, "step": 385790 }, { "epoch": 149.83, "learning_rate": 2.3300970873786412e-08, "loss": 0.0473, "step": 385800 }, { "epoch": 149.83, "learning_rate": 2.2783171521035603e-08, "loss": 0.0233, "step": 385810 }, { "epoch": 149.83, "learning_rate": 2.226537216828479e-08, "loss": 0.0328, "step": 385820 }, { "epoch": 149.84, "learning_rate": 2.1747572815533982e-08, "loss": 0.0278, "step": 385830 }, { "epoch": 149.84, "learning_rate": 2.1229773462783173e-08, "loss": 0.0304, "step": 385840 }, { "epoch": 149.84, "learning_rate": 2.071197411003236e-08, "loss": 0.0747, "step": 385850 }, { "epoch": 149.85, "learning_rate": 2.0194174757281552e-08, "loss": 0.0248, "step": 385860 }, { "epoch": 149.85, "learning_rate": 1.9676375404530747e-08, "loss": 0.0066, "step": 385870 }, { "epoch": 149.86, "learning_rate": 1.9158576051779938e-08, "loss": 0.0009, "step": 385880 }, { "epoch": 149.86, "learning_rate": 1.864077669902913e-08, "loss": 0.0613, "step": 385890 }, { "epoch": 149.86, "learning_rate": 1.8122977346278317e-08, "loss": 0.0418, "step": 385900 }, { "epoch": 149.87, "learning_rate": 1.760517799352751e-08, "loss": 0.0633, "step": 385910 }, { "epoch": 149.87, "learning_rate": 1.70873786407767e-08, "loss": 0.0337, "step": 385920 }, { "epoch": 149.88, "learning_rate": 1.656957928802589e-08, "loss": 0.0088, "step": 385930 }, { "epoch": 149.88, "learning_rate": 1.6051779935275082e-08, "loss": 0.0349, "step": 385940 }, { "epoch": 149.88, "learning_rate": 1.5533980582524273e-08, "loss": 0.0088, "step": 385950 }, { "epoch": 149.89, "learning_rate": 1.5016181229773465e-08, "loss": 0.0163, "step": 385960 }, { "epoch": 149.89, "learning_rate": 1.4498381877022654e-08, "loss": 0.06, "step": 385970 }, { "epoch": 149.9, "learning_rate": 1.3980582524271845e-08, "loss": 0.1068, "step": 385980 }, { "epoch": 149.9, "learning_rate": 1.3462783171521038e-08, "loss": 0.0158, "step": 385990 }, { "epoch": 149.9, "learning_rate": 1.2944983818770228e-08, "loss": 0.011, "step": 386000 }, { "epoch": 149.91, "learning_rate": 1.2427184466019417e-08, "loss": 0.0363, "step": 386010 }, { "epoch": 149.91, "learning_rate": 1.1909385113268609e-08, "loss": 0.0668, "step": 386020 }, { "epoch": 149.91, "learning_rate": 1.1391585760517802e-08, "loss": 0.0007, "step": 386030 }, { "epoch": 149.92, "learning_rate": 1.0873786407766991e-08, "loss": 0.0161, "step": 386040 }, { "epoch": 149.92, "learning_rate": 1.035598705501618e-08, "loss": 0.0881, "step": 386050 }, { "epoch": 149.93, "learning_rate": 9.838187702265373e-09, "loss": 0.0003, "step": 386060 }, { "epoch": 149.93, "learning_rate": 9.320388349514565e-09, "loss": 0.0034, "step": 386070 }, { "epoch": 149.93, "learning_rate": 8.802588996763754e-09, "loss": 0.017, "step": 386080 }, { "epoch": 149.94, "learning_rate": 8.284789644012945e-09, "loss": 0.0661, "step": 386090 }, { "epoch": 149.94, "learning_rate": 7.766990291262137e-09, "loss": 0.0339, "step": 386100 }, { "epoch": 149.95, "learning_rate": 7.249190938511327e-09, "loss": 0.0703, "step": 386110 }, { "epoch": 149.95, "learning_rate": 6.731391585760519e-09, "loss": 0.0009, "step": 386120 }, { "epoch": 149.95, "learning_rate": 6.213592233009709e-09, "loss": 0.0011, "step": 386130 }, { "epoch": 149.96, "learning_rate": 5.695792880258901e-09, "loss": 0.0504, "step": 386140 }, { "epoch": 149.96, "learning_rate": 5.17799352750809e-09, "loss": 0.0437, "step": 386150 }, { "epoch": 149.97, "learning_rate": 4.660194174757282e-09, "loss": 0.017, "step": 386160 }, { "epoch": 149.97, "learning_rate": 4.142394822006473e-09, "loss": 0.2117, "step": 386170 }, { "epoch": 149.97, "learning_rate": 3.6245954692556636e-09, "loss": 0.037, "step": 386180 }, { "epoch": 149.98, "learning_rate": 3.1067961165048544e-09, "loss": 0.0015, "step": 386190 }, { "epoch": 149.98, "learning_rate": 2.588996763754045e-09, "loss": 0.2308, "step": 386200 }, { "epoch": 149.98, "learning_rate": 2.0711974110032364e-09, "loss": 0.0001, "step": 386210 }, { "epoch": 149.99, "learning_rate": 1.5533980582524272e-09, "loss": 0.0456, "step": 386220 }, { "epoch": 149.99, "learning_rate": 1.0355987055016182e-09, "loss": 0.015, "step": 386230 }, { "epoch": 150.0, "learning_rate": 5.177993527508091e-10, "loss": 0.0411, "step": 386240 }, { "epoch": 150.0, "learning_rate": 0.0, "loss": 0.081, "step": 386250 }, { "epoch": 150.0, "eval_accuracy": 0.9526822558459422, "eval_loss": 0.38963595032691956, "eval_runtime": 8.2958, "eval_samples_per_second": 438.171, "eval_steps_per_second": 54.847, "step": 386250 }, { "epoch": 150.0, "step": 386250, "total_flos": 2.3944845939692818e+20, "train_loss": 0.08211426009329709, "train_runtime": 20002.7488, "train_samples_per_second": 154.464, "train_steps_per_second": 19.31 } ], "logging_steps": 10, "max_steps": 386250, "num_input_tokens_seen": 0, "num_train_epochs": 150, "save_steps": 500, "total_flos": 2.3944845939692818e+20, "trial_name": null, "trial_params": null }