diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -2,5418 +2,10806 @@ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, - "global_step": 899, + "global_step": 1797, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, - "learning_rate": 1.9977753058954393e-05, - "loss": 0.3597, + "learning_rate": 1.998887033945465e-05, + "loss": 0.672, "step": 1 }, { "epoch": 0.0, - "learning_rate": 1.995550611790879e-05, - "loss": 0.3028, + "learning_rate": 1.9977740678909294e-05, + "loss": 0.5685, "step": 2 }, { "epoch": 0.0, - "learning_rate": 1.9933259176863182e-05, - "loss": 0.2636, + "learning_rate": 1.996661101836394e-05, + "loss": 0.4998, "step": 3 }, { "epoch": 0.0, - "learning_rate": 1.9911012235817577e-05, - "loss": 0.2089, + "learning_rate": 1.995548135781859e-05, + "loss": 0.3927, "step": 4 }, { - "epoch": 0.01, - "learning_rate": 1.988876529477197e-05, - "loss": 0.2438, + "epoch": 0.0, + "learning_rate": 1.9944351697273234e-05, + "loss": 0.3488, "step": 5 }, { - "epoch": 0.01, - "learning_rate": 1.9866518353726366e-05, - "loss": 0.19, + "epoch": 0.0, + "learning_rate": 1.993322203672788e-05, + "loss": 0.3089, "step": 6 }, { - "epoch": 0.01, - "learning_rate": 1.9844271412680757e-05, - "loss": 0.1884, + "epoch": 0.0, + "learning_rate": 1.992209237618253e-05, + "loss": 0.2355, "step": 7 }, { - "epoch": 0.01, - "learning_rate": 1.982202447163515e-05, - "loss": 0.2393, + "epoch": 0.0, + "learning_rate": 1.9910962715637174e-05, + "loss": 0.2118, "step": 8 }, { "epoch": 0.01, - "learning_rate": 1.9799777530589546e-05, - "loss": 0.1862, + "learning_rate": 1.989983305509182e-05, + "loss": 0.2758, "step": 9 }, { "epoch": 0.01, - "learning_rate": 1.9777530589543937e-05, - "loss": 0.126, + "learning_rate": 1.9888703394546466e-05, + "loss": 0.2038, "step": 10 }, { "epoch": 0.01, - "learning_rate": 1.9755283648498332e-05, - "loss": 0.1401, + "learning_rate": 1.9877573734001114e-05, + "loss": 0.158, "step": 11 }, { "epoch": 0.01, - "learning_rate": 1.9733036707452726e-05, - "loss": 0.1846, + "learning_rate": 1.986644407345576e-05, + "loss": 0.2152, "step": 12 }, { "epoch": 0.01, - "learning_rate": 1.971078976640712e-05, - "loss": 0.1542, + "learning_rate": 1.9855314412910406e-05, + "loss": 0.2316, "step": 13 }, { - "epoch": 0.02, - "learning_rate": 1.9688542825361516e-05, - "loss": 0.1631, + "epoch": 0.01, + "learning_rate": 1.9844184752365054e-05, + "loss": 0.1348, "step": 14 }, { - "epoch": 0.02, - "learning_rate": 1.966629588431591e-05, - "loss": 0.1525, + "epoch": 0.01, + "learning_rate": 1.98330550918197e-05, + "loss": 0.2151, "step": 15 }, { - "epoch": 0.02, - "learning_rate": 1.96440489432703e-05, - "loss": 0.1558, + "epoch": 0.01, + "learning_rate": 1.9821925431274346e-05, + "loss": 0.2795, "step": 16 }, { - "epoch": 0.02, - "learning_rate": 1.9621802002224696e-05, - "loss": 0.201, + "epoch": 0.01, + "learning_rate": 1.9810795770728994e-05, + "loss": 0.2384, "step": 17 }, { - "epoch": 0.02, - "learning_rate": 1.959955506117909e-05, - "loss": 0.2093, + "epoch": 0.01, + "learning_rate": 1.979966611018364e-05, + "loss": 0.1468, "step": 18 }, { - "epoch": 0.02, - "learning_rate": 1.9577308120133482e-05, - "loss": 0.1792, + "epoch": 0.01, + "learning_rate": 1.9788536449638286e-05, + "loss": 0.076, "step": 19 }, { - "epoch": 0.02, - "learning_rate": 1.9555061179087876e-05, - "loss": 0.1571, + "epoch": 0.01, + "learning_rate": 1.9777406789092934e-05, + "loss": 0.1901, "step": 20 }, { - "epoch": 0.02, - "learning_rate": 1.953281423804227e-05, - "loss": 0.1716, + "epoch": 0.01, + "learning_rate": 1.976627712854758e-05, + "loss": 0.1171, "step": 21 }, { - "epoch": 0.02, - "learning_rate": 1.9510567296996666e-05, - "loss": 0.1548, + "epoch": 0.01, + "learning_rate": 1.9755147468002226e-05, + "loss": 0.1689, "step": 22 }, { - "epoch": 0.03, - "learning_rate": 1.9488320355951057e-05, - "loss": 0.0918, + "epoch": 0.01, + "learning_rate": 1.9744017807456874e-05, + "loss": 0.1939, "step": 23 }, { - "epoch": 0.03, - "learning_rate": 1.9466073414905455e-05, - "loss": 0.1617, + "epoch": 0.01, + "learning_rate": 1.973288814691152e-05, + "loss": 0.1632, "step": 24 }, { - "epoch": 0.03, - "learning_rate": 1.9443826473859846e-05, - "loss": 0.1641, + "epoch": 0.01, + "learning_rate": 1.9721758486366166e-05, + "loss": 0.1407, "step": 25 }, { - "epoch": 0.03, - "learning_rate": 1.9421579532814237e-05, - "loss": 0.1177, + "epoch": 0.01, + "learning_rate": 1.9710628825820814e-05, + "loss": 0.1643, "step": 26 }, { - "epoch": 0.03, - "learning_rate": 1.9399332591768635e-05, - "loss": 0.1971, + "epoch": 0.02, + "learning_rate": 1.969949916527546e-05, + "loss": 0.1675, "step": 27 }, { - "epoch": 0.03, - "learning_rate": 1.9377085650723026e-05, - "loss": 0.1493, + "epoch": 0.02, + "learning_rate": 1.9688369504730106e-05, + "loss": 0.166, "step": 28 }, { - "epoch": 0.03, - "learning_rate": 1.935483870967742e-05, - "loss": 0.174, + "epoch": 0.02, + "learning_rate": 1.9677239844184754e-05, + "loss": 0.1408, "step": 29 }, { - "epoch": 0.03, - "learning_rate": 1.9332591768631815e-05, - "loss": 0.1866, + "epoch": 0.02, + "learning_rate": 1.96661101836394e-05, + "loss": 0.1668, "step": 30 }, { - "epoch": 0.03, - "learning_rate": 1.931034482758621e-05, - "loss": 0.1819, + "epoch": 0.02, + "learning_rate": 1.9654980523094046e-05, + "loss": 0.1152, "step": 31 }, { - "epoch": 0.04, - "learning_rate": 1.92880978865406e-05, - "loss": 0.1585, + "epoch": 0.02, + "learning_rate": 1.9643850862548693e-05, + "loss": 0.1895, "step": 32 }, { - "epoch": 0.04, - "learning_rate": 1.9265850945494996e-05, - "loss": 0.1615, + "epoch": 0.02, + "learning_rate": 1.963272120200334e-05, + "loss": 0.218, "step": 33 }, { - "epoch": 0.04, - "learning_rate": 1.924360400444939e-05, - "loss": 0.1918, + "epoch": 0.02, + "learning_rate": 1.9621591541457986e-05, + "loss": 0.1892, "step": 34 }, { - "epoch": 0.04, - "learning_rate": 1.922135706340378e-05, - "loss": 0.233, + "epoch": 0.02, + "learning_rate": 1.9610461880912633e-05, + "loss": 0.1925, "step": 35 }, { - "epoch": 0.04, - "learning_rate": 1.9199110122358176e-05, - "loss": 0.1953, + "epoch": 0.02, + "learning_rate": 1.959933222036728e-05, + "loss": 0.2399, "step": 36 }, { - "epoch": 0.04, - "learning_rate": 1.917686318131257e-05, - "loss": 0.1619, + "epoch": 0.02, + "learning_rate": 1.9588202559821926e-05, + "loss": 0.1466, "step": 37 }, { - "epoch": 0.04, - "learning_rate": 1.9154616240266965e-05, - "loss": 0.1466, + "epoch": 0.02, + "learning_rate": 1.9577072899276573e-05, + "loss": 0.2122, "step": 38 }, { - "epoch": 0.04, - "learning_rate": 1.913236929922136e-05, - "loss": 0.2283, + "epoch": 0.02, + "learning_rate": 1.956594323873122e-05, + "loss": 0.1471, "step": 39 }, { - "epoch": 0.04, - "learning_rate": 1.9110122358175754e-05, - "loss": 0.141, + "epoch": 0.02, + "learning_rate": 1.9554813578185866e-05, + "loss": 0.163, "step": 40 }, { - "epoch": 0.05, - "learning_rate": 1.9087875417130146e-05, - "loss": 0.1253, + "epoch": 0.02, + "learning_rate": 1.9543683917640513e-05, + "loss": 0.1872, "step": 41 }, { - "epoch": 0.05, - "learning_rate": 1.906562847608454e-05, - "loss": 0.1218, + "epoch": 0.02, + "learning_rate": 1.953255425709516e-05, + "loss": 0.1625, "step": 42 }, { - "epoch": 0.05, - "learning_rate": 1.9043381535038935e-05, - "loss": 0.228, + "epoch": 0.02, + "learning_rate": 1.952142459654981e-05, + "loss": 0.1193, "step": 43 }, { - "epoch": 0.05, - "learning_rate": 1.9021134593993326e-05, - "loss": 0.1762, + "epoch": 0.02, + "learning_rate": 1.9510294936004453e-05, + "loss": 0.1918, "step": 44 }, { - "epoch": 0.05, - "learning_rate": 1.899888765294772e-05, - "loss": 0.146, + "epoch": 0.03, + "learning_rate": 1.94991652754591e-05, + "loss": 0.1135, "step": 45 }, { - "epoch": 0.05, - "learning_rate": 1.8976640711902115e-05, - "loss": 0.0873, + "epoch": 0.03, + "learning_rate": 1.948803561491375e-05, + "loss": 0.0658, "step": 46 }, { - "epoch": 0.05, - "learning_rate": 1.895439377085651e-05, - "loss": 0.2073, + "epoch": 0.03, + "learning_rate": 1.9476905954368393e-05, + "loss": 0.1668, "step": 47 }, { - "epoch": 0.05, - "learning_rate": 1.89321468298109e-05, - "loss": 0.1531, + "epoch": 0.03, + "learning_rate": 1.946577629382304e-05, + "loss": 0.1647, "step": 48 }, { - "epoch": 0.05, - "learning_rate": 1.89098998887653e-05, - "loss": 0.1897, + "epoch": 0.03, + "learning_rate": 1.945464663327769e-05, + "loss": 0.1928, "step": 49 }, { - "epoch": 0.06, - "learning_rate": 1.888765294771969e-05, - "loss": 0.1044, + "epoch": 0.03, + "learning_rate": 1.9443516972732333e-05, + "loss": 0.141, "step": 50 }, { - "epoch": 0.06, - "learning_rate": 1.886540600667408e-05, - "loss": 0.1517, + "epoch": 0.03, + "learning_rate": 1.943238731218698e-05, + "loss": 0.1412, "step": 51 }, { - "epoch": 0.06, - "learning_rate": 1.884315906562848e-05, - "loss": 0.1919, + "epoch": 0.03, + "learning_rate": 1.942125765164163e-05, + "loss": 0.0877, "step": 52 }, { - "epoch": 0.06, - "learning_rate": 1.882091212458287e-05, - "loss": 0.1601, + "epoch": 0.03, + "learning_rate": 1.9410127991096273e-05, + "loss": 0.1883, "step": 53 }, { - "epoch": 0.06, - "learning_rate": 1.8798665183537265e-05, - "loss": 0.1452, + "epoch": 0.03, + "learning_rate": 1.939899833055092e-05, + "loss": 0.2211, "step": 54 }, { - "epoch": 0.06, - "learning_rate": 1.877641824249166e-05, - "loss": 0.0979, + "epoch": 0.03, + "learning_rate": 1.938786867000557e-05, + "loss": 0.1698, "step": 55 }, { - "epoch": 0.06, - "learning_rate": 1.8754171301446054e-05, - "loss": 0.1448, + "epoch": 0.03, + "learning_rate": 1.9376739009460213e-05, + "loss": 0.1376, "step": 56 }, { - "epoch": 0.06, - "learning_rate": 1.8731924360400445e-05, - "loss": 0.1391, + "epoch": 0.03, + "learning_rate": 1.936560934891486e-05, + "loss": 0.1361, "step": 57 }, { - "epoch": 0.06, - "learning_rate": 1.870967741935484e-05, - "loss": 0.1886, + "epoch": 0.03, + "learning_rate": 1.935447968836951e-05, + "loss": 0.2193, "step": 58 }, { - "epoch": 0.07, - "learning_rate": 1.8687430478309235e-05, - "loss": 0.1399, + "epoch": 0.03, + "learning_rate": 1.9343350027824153e-05, + "loss": 0.2424, "step": 59 }, { - "epoch": 0.07, - "learning_rate": 1.8665183537263626e-05, - "loss": 0.1223, + "epoch": 0.03, + "learning_rate": 1.93322203672788e-05, + "loss": 0.1402, "step": 60 }, { - "epoch": 0.07, - "learning_rate": 1.864293659621802e-05, - "loss": 0.1544, + "epoch": 0.03, + "learning_rate": 1.9321090706733445e-05, + "loss": 0.2619, "step": 61 }, { - "epoch": 0.07, - "learning_rate": 1.8620689655172415e-05, - "loss": 0.1103, + "epoch": 0.03, + "learning_rate": 1.9309961046188093e-05, + "loss": 0.1199, "step": 62 }, { - "epoch": 0.07, - "learning_rate": 1.859844271412681e-05, - "loss": 0.1665, + "epoch": 0.04, + "learning_rate": 1.929883138564274e-05, + "loss": 0.1888, "step": 63 }, { - "epoch": 0.07, - "learning_rate": 1.8576195773081204e-05, - "loss": 0.0724, + "epoch": 0.04, + "learning_rate": 1.9287701725097385e-05, + "loss": 0.1406, "step": 64 }, { - "epoch": 0.07, - "learning_rate": 1.85539488320356e-05, - "loss": 0.0927, + "epoch": 0.04, + "learning_rate": 1.9276572064552033e-05, + "loss": 0.1217, "step": 65 }, { - "epoch": 0.07, - "learning_rate": 1.853170189098999e-05, - "loss": 0.1999, + "epoch": 0.04, + "learning_rate": 1.926544240400668e-05, + "loss": 0.2115, "step": 66 }, { - "epoch": 0.07, - "learning_rate": 1.8509454949944384e-05, - "loss": 0.318, + "epoch": 0.04, + "learning_rate": 1.9254312743461325e-05, + "loss": 0.2104, "step": 67 }, { - "epoch": 0.08, - "learning_rate": 1.848720800889878e-05, - "loss": 0.1065, + "epoch": 0.04, + "learning_rate": 1.9243183082915973e-05, + "loss": 0.1655, "step": 68 }, { - "epoch": 0.08, - "learning_rate": 1.846496106785317e-05, - "loss": 0.119, + "epoch": 0.04, + "learning_rate": 1.923205342237062e-05, + "loss": 0.2393, "step": 69 }, { - "epoch": 0.08, - "learning_rate": 1.8442714126807565e-05, - "loss": 0.2425, + "epoch": 0.04, + "learning_rate": 1.9220923761825265e-05, + "loss": 0.2369, "step": 70 }, { - "epoch": 0.08, - "learning_rate": 1.842046718576196e-05, - "loss": 0.0852, + "epoch": 0.04, + "learning_rate": 1.9209794101279913e-05, + "loss": 0.2062, "step": 71 }, { - "epoch": 0.08, - "learning_rate": 1.8398220244716354e-05, - "loss": 0.1443, + "epoch": 0.04, + "learning_rate": 1.919866444073456e-05, + "loss": 0.1879, "step": 72 }, { - "epoch": 0.08, - "learning_rate": 1.8375973303670745e-05, - "loss": 0.1345, + "epoch": 0.04, + "learning_rate": 1.9187534780189205e-05, + "loss": 0.1313, "step": 73 }, { - "epoch": 0.08, - "learning_rate": 1.8353726362625143e-05, - "loss": 0.1492, + "epoch": 0.04, + "learning_rate": 1.9176405119643853e-05, + "loss": 0.2118, "step": 74 }, { - "epoch": 0.08, - "learning_rate": 1.8331479421579534e-05, - "loss": 0.1651, + "epoch": 0.04, + "learning_rate": 1.9165275459098497e-05, + "loss": 0.0899, "step": 75 }, { - "epoch": 0.08, - "learning_rate": 1.8309232480533926e-05, - "loss": 0.2034, + "epoch": 0.04, + "learning_rate": 1.9154145798553145e-05, + "loss": 0.2171, "step": 76 }, { - "epoch": 0.09, - "learning_rate": 1.8286985539488324e-05, - "loss": 0.1517, + "epoch": 0.04, + "learning_rate": 1.9143016138007793e-05, + "loss": 0.1898, "step": 77 }, { - "epoch": 0.09, - "learning_rate": 1.8264738598442715e-05, - "loss": 0.1428, + "epoch": 0.04, + "learning_rate": 1.9131886477462437e-05, + "loss": 0.2668, "step": 78 }, { - "epoch": 0.09, - "learning_rate": 1.824249165739711e-05, - "loss": 0.1755, + "epoch": 0.04, + "learning_rate": 1.9120756816917085e-05, + "loss": 0.1138, "step": 79 }, { - "epoch": 0.09, - "learning_rate": 1.8220244716351504e-05, - "loss": 0.1029, + "epoch": 0.04, + "learning_rate": 1.9109627156371733e-05, + "loss": 0.1667, "step": 80 }, { - "epoch": 0.09, - "learning_rate": 1.81979977753059e-05, - "loss": 0.1242, + "epoch": 0.05, + "learning_rate": 1.9098497495826377e-05, + "loss": 0.1405, "step": 81 }, { - "epoch": 0.09, - "learning_rate": 1.817575083426029e-05, - "loss": 0.2067, + "epoch": 0.05, + "learning_rate": 1.9087367835281025e-05, + "loss": 0.1158, "step": 82 }, { - "epoch": 0.09, - "learning_rate": 1.8153503893214684e-05, - "loss": 0.1591, + "epoch": 0.05, + "learning_rate": 1.9076238174735673e-05, + "loss": 0.1124, "step": 83 }, { - "epoch": 0.09, - "learning_rate": 1.813125695216908e-05, - "loss": 0.1465, + "epoch": 0.05, + "learning_rate": 1.9065108514190317e-05, + "loss": 0.1418, "step": 84 }, { - "epoch": 0.09, - "learning_rate": 1.810901001112347e-05, - "loss": 0.1462, + "epoch": 0.05, + "learning_rate": 1.9053978853644965e-05, + "loss": 0.2222, "step": 85 }, { - "epoch": 0.1, - "learning_rate": 1.8086763070077865e-05, - "loss": 0.1751, + "epoch": 0.05, + "learning_rate": 1.9042849193099613e-05, + "loss": 0.2411, "step": 86 }, { - "epoch": 0.1, - "learning_rate": 1.806451612903226e-05, - "loss": 0.1788, + "epoch": 0.05, + "learning_rate": 1.9031719532554257e-05, + "loss": 0.2425, "step": 87 }, { - "epoch": 0.1, - "learning_rate": 1.8042269187986654e-05, - "loss": 0.0804, + "epoch": 0.05, + "learning_rate": 1.9020589872008905e-05, + "loss": 0.1151, "step": 88 }, { - "epoch": 0.1, - "learning_rate": 1.802002224694105e-05, - "loss": 0.1187, + "epoch": 0.05, + "learning_rate": 1.9009460211463553e-05, + "loss": 0.1854, "step": 89 }, { - "epoch": 0.1, - "learning_rate": 1.7997775305895443e-05, - "loss": 0.1467, + "epoch": 0.05, + "learning_rate": 1.8998330550918197e-05, + "loss": 0.1141, "step": 90 }, { - "epoch": 0.1, - "learning_rate": 1.7975528364849834e-05, - "loss": 0.0992, + "epoch": 0.05, + "learning_rate": 1.8987200890372845e-05, + "loss": 0.0904, "step": 91 }, { - "epoch": 0.1, - "learning_rate": 1.795328142380423e-05, - "loss": 0.1486, + "epoch": 0.05, + "learning_rate": 1.897607122982749e-05, + "loss": 0.0914, "step": 92 }, { - "epoch": 0.1, - "learning_rate": 1.7931034482758623e-05, - "loss": 0.1994, + "epoch": 0.05, + "learning_rate": 1.8964941569282137e-05, + "loss": 0.2434, "step": 93 }, { - "epoch": 0.1, - "learning_rate": 1.7908787541713015e-05, - "loss": 0.1112, + "epoch": 0.05, + "learning_rate": 1.8953811908736785e-05, + "loss": 0.1813, "step": 94 }, { - "epoch": 0.11, - "learning_rate": 1.788654060066741e-05, - "loss": 0.161, + "epoch": 0.05, + "learning_rate": 1.894268224819143e-05, + "loss": 0.2061, "step": 95 }, { - "epoch": 0.11, - "learning_rate": 1.7864293659621804e-05, - "loss": 0.1667, + "epoch": 0.05, + "learning_rate": 1.8931552587646077e-05, + "loss": 0.1254, "step": 96 }, { - "epoch": 0.11, - "learning_rate": 1.7842046718576198e-05, - "loss": 0.1416, + "epoch": 0.05, + "learning_rate": 1.8920422927100725e-05, + "loss": 0.2565, "step": 97 }, { - "epoch": 0.11, - "learning_rate": 1.781979977753059e-05, - "loss": 0.1986, + "epoch": 0.05, + "learning_rate": 1.890929326655537e-05, + "loss": 0.1283, "step": 98 }, { - "epoch": 0.11, - "learning_rate": 1.7797552836484984e-05, - "loss": 0.0908, + "epoch": 0.06, + "learning_rate": 1.8898163606010017e-05, + "loss": 0.0905, "step": 99 }, { - "epoch": 0.11, - "learning_rate": 1.777530589543938e-05, - "loss": 0.2254, + "epoch": 0.06, + "learning_rate": 1.8887033945464665e-05, + "loss": 0.1143, "step": 100 }, { - "epoch": 0.11, - "learning_rate": 1.775305895439377e-05, - "loss": 0.1893, + "epoch": 0.06, + "learning_rate": 1.887590428491931e-05, + "loss": 0.1378, "step": 101 }, { - "epoch": 0.11, - "learning_rate": 1.7730812013348168e-05, - "loss": 0.1303, + "epoch": 0.06, + "learning_rate": 1.8864774624373957e-05, + "loss": 0.1663, "step": 102 }, { - "epoch": 0.11, - "learning_rate": 1.770856507230256e-05, - "loss": 0.1742, + "epoch": 0.06, + "learning_rate": 1.8853644963828605e-05, + "loss": 0.2139, "step": 103 }, { - "epoch": 0.12, - "learning_rate": 1.7686318131256954e-05, - "loss": 0.1282, + "epoch": 0.06, + "learning_rate": 1.884251530328325e-05, + "loss": 0.1981, "step": 104 }, { - "epoch": 0.12, - "learning_rate": 1.7664071190211348e-05, - "loss": 0.1307, + "epoch": 0.06, + "learning_rate": 1.8831385642737897e-05, + "loss": 0.1944, "step": 105 }, { - "epoch": 0.12, - "learning_rate": 1.7641824249165743e-05, - "loss": 0.0995, + "epoch": 0.06, + "learning_rate": 1.8820255982192545e-05, + "loss": 0.1439, "step": 106 }, { - "epoch": 0.12, - "learning_rate": 1.7619577308120134e-05, - "loss": 0.1242, + "epoch": 0.06, + "learning_rate": 1.880912632164719e-05, + "loss": 0.1421, "step": 107 }, { - "epoch": 0.12, - "learning_rate": 1.759733036707453e-05, - "loss": 0.1959, + "epoch": 0.06, + "learning_rate": 1.8797996661101837e-05, + "loss": 0.1884, "step": 108 }, { - "epoch": 0.12, - "learning_rate": 1.7575083426028923e-05, - "loss": 0.1665, + "epoch": 0.06, + "learning_rate": 1.8786867000556485e-05, + "loss": 0.1123, "step": 109 }, { - "epoch": 0.12, - "learning_rate": 1.7552836484983314e-05, - "loss": 0.2246, + "epoch": 0.06, + "learning_rate": 1.8775737340011133e-05, + "loss": 0.0875, "step": 110 }, { - "epoch": 0.12, - "learning_rate": 1.753058954393771e-05, - "loss": 0.139, + "epoch": 0.06, + "learning_rate": 1.8764607679465777e-05, + "loss": 0.2113, "step": 111 }, { - "epoch": 0.12, - "learning_rate": 1.7508342602892103e-05, - "loss": 0.1272, + "epoch": 0.06, + "learning_rate": 1.8753478018920425e-05, + "loss": 0.0882, "step": 112 }, { - "epoch": 0.13, - "learning_rate": 1.7486095661846498e-05, - "loss": 0.0899, + "epoch": 0.06, + "learning_rate": 1.8742348358375073e-05, + "loss": 0.2431, "step": 113 }, { - "epoch": 0.13, - "learning_rate": 1.7463848720800893e-05, - "loss": 0.1121, + "epoch": 0.06, + "learning_rate": 1.8731218697829717e-05, + "loss": 0.0445, "step": 114 }, { - "epoch": 0.13, - "learning_rate": 1.7441601779755287e-05, - "loss": 0.2018, + "epoch": 0.06, + "learning_rate": 1.8720089037284365e-05, + "loss": 0.2365, "step": 115 }, { - "epoch": 0.13, - "learning_rate": 1.741935483870968e-05, - "loss": 0.1133, + "epoch": 0.06, + "learning_rate": 1.8708959376739012e-05, + "loss": 0.1377, "step": 116 }, { - "epoch": 0.13, - "learning_rate": 1.7397107897664073e-05, - "loss": 0.1117, + "epoch": 0.07, + "learning_rate": 1.8697829716193657e-05, + "loss": 0.2083, "step": 117 }, { - "epoch": 0.13, - "learning_rate": 1.7374860956618468e-05, - "loss": 0.1286, + "epoch": 0.07, + "learning_rate": 1.8686700055648305e-05, + "loss": 0.0626, "step": 118 }, { - "epoch": 0.13, - "learning_rate": 1.735261401557286e-05, - "loss": 0.1093, + "epoch": 0.07, + "learning_rate": 1.8675570395102952e-05, + "loss": 0.1394, "step": 119 }, { - "epoch": 0.13, - "learning_rate": 1.7330367074527253e-05, - "loss": 0.1698, + "epoch": 0.07, + "learning_rate": 1.8664440734557597e-05, + "loss": 0.1024, "step": 120 }, { - "epoch": 0.13, - "learning_rate": 1.7308120133481648e-05, - "loss": 0.1158, + "epoch": 0.07, + "learning_rate": 1.8653311074012245e-05, + "loss": 0.1855, "step": 121 }, { - "epoch": 0.14, - "learning_rate": 1.7285873192436043e-05, - "loss": 0.2543, + "epoch": 0.07, + "learning_rate": 1.8642181413466892e-05, + "loss": 0.116, "step": 122 }, { - "epoch": 0.14, - "learning_rate": 1.7263626251390434e-05, - "loss": 0.1264, + "epoch": 0.07, + "learning_rate": 1.8631051752921537e-05, + "loss": 0.1195, "step": 123 }, { - "epoch": 0.14, - "learning_rate": 1.7241379310344828e-05, - "loss": 0.1085, + "epoch": 0.07, + "learning_rate": 1.8619922092376185e-05, + "loss": 0.0912, "step": 124 }, { - "epoch": 0.14, - "learning_rate": 1.7219132369299223e-05, - "loss": 0.1683, + "epoch": 0.07, + "learning_rate": 1.8608792431830832e-05, + "loss": 0.1139, "step": 125 }, { - "epoch": 0.14, - "learning_rate": 1.7196885428253614e-05, - "loss": 0.1148, + "epoch": 0.07, + "learning_rate": 1.8597662771285477e-05, + "loss": 0.2218, "step": 126 }, { - "epoch": 0.14, - "learning_rate": 1.7174638487208012e-05, - "loss": 0.1611, + "epoch": 0.07, + "learning_rate": 1.8586533110740125e-05, + "loss": 0.0829, "step": 127 }, { - "epoch": 0.14, - "learning_rate": 1.7152391546162403e-05, - "loss": 0.219, + "epoch": 0.07, + "learning_rate": 1.8575403450194772e-05, + "loss": 0.0624, "step": 128 }, { - "epoch": 0.14, - "learning_rate": 1.7130144605116798e-05, - "loss": 0.246, + "epoch": 0.07, + "learning_rate": 1.8564273789649417e-05, + "loss": 0.0853, "step": 129 }, { - "epoch": 0.14, - "learning_rate": 1.7107897664071192e-05, - "loss": 0.1036, + "epoch": 0.07, + "learning_rate": 1.8553144129104065e-05, + "loss": 0.1109, "step": 130 }, { - "epoch": 0.15, - "learning_rate": 1.7085650723025587e-05, - "loss": 0.1537, + "epoch": 0.07, + "learning_rate": 1.8542014468558712e-05, + "loss": 0.12, "step": 131 }, { - "epoch": 0.15, - "learning_rate": 1.7063403781979978e-05, - "loss": 0.147, + "epoch": 0.07, + "learning_rate": 1.8530884808013357e-05, + "loss": 0.2633, "step": 132 }, { - "epoch": 0.15, - "learning_rate": 1.7041156840934373e-05, - "loss": 0.1204, + "epoch": 0.07, + "learning_rate": 1.8519755147468004e-05, + "loss": 0.2601, "step": 133 }, { - "epoch": 0.15, - "learning_rate": 1.7018909899888767e-05, - "loss": 0.1681, + "epoch": 0.07, + "learning_rate": 1.8508625486922652e-05, + "loss": 0.3669, "step": 134 }, { - "epoch": 0.15, - "learning_rate": 1.699666295884316e-05, - "loss": 0.1554, + "epoch": 0.08, + "learning_rate": 1.8497495826377297e-05, + "loss": 0.1213, "step": 135 }, { - "epoch": 0.15, - "learning_rate": 1.6974416017797553e-05, - "loss": 0.1023, + "epoch": 0.08, + "learning_rate": 1.8486366165831944e-05, + "loss": 0.0898, "step": 136 }, { - "epoch": 0.15, - "learning_rate": 1.6952169076751948e-05, - "loss": 0.1113, + "epoch": 0.08, + "learning_rate": 1.8475236505286592e-05, + "loss": 0.1421, "step": 137 }, { - "epoch": 0.15, - "learning_rate": 1.6929922135706342e-05, - "loss": 0.172, + "epoch": 0.08, + "learning_rate": 1.8464106844741237e-05, + "loss": 0.0914, "step": 138 }, { - "epoch": 0.15, - "learning_rate": 1.6907675194660737e-05, - "loss": 0.0839, + "epoch": 0.08, + "learning_rate": 1.8452977184195884e-05, + "loss": 0.1651, "step": 139 }, { - "epoch": 0.16, - "learning_rate": 1.688542825361513e-05, - "loss": 0.1646, + "epoch": 0.08, + "learning_rate": 1.8441847523650532e-05, + "loss": 0.3179, "step": 140 }, { - "epoch": 0.16, - "learning_rate": 1.6863181312569523e-05, - "loss": 0.1958, + "epoch": 0.08, + "learning_rate": 1.8430717863105177e-05, + "loss": 0.095, "step": 141 }, { - "epoch": 0.16, - "learning_rate": 1.6840934371523917e-05, - "loss": 0.0836, + "epoch": 0.08, + "learning_rate": 1.8419588202559824e-05, + "loss": 0.0657, "step": 142 }, { - "epoch": 0.16, - "learning_rate": 1.6818687430478312e-05, - "loss": 0.1434, + "epoch": 0.08, + "learning_rate": 1.840845854201447e-05, + "loss": 0.1159, "step": 143 }, { - "epoch": 0.16, - "learning_rate": 1.6796440489432703e-05, - "loss": 0.1552, + "epoch": 0.08, + "learning_rate": 1.8397328881469117e-05, + "loss": 0.1456, "step": 144 }, { - "epoch": 0.16, - "learning_rate": 1.6774193548387098e-05, - "loss": 0.1122, + "epoch": 0.08, + "learning_rate": 1.8386199220923764e-05, + "loss": 0.1654, "step": 145 }, { - "epoch": 0.16, - "learning_rate": 1.6751946607341492e-05, - "loss": 0.1263, + "epoch": 0.08, + "learning_rate": 1.837506956037841e-05, + "loss": 0.0861, "step": 146 }, { - "epoch": 0.16, - "learning_rate": 1.6729699666295887e-05, - "loss": 0.0977, + "epoch": 0.08, + "learning_rate": 1.8363939899833057e-05, + "loss": 0.0916, "step": 147 }, { - "epoch": 0.16, - "learning_rate": 1.6707452725250278e-05, - "loss": 0.1238, + "epoch": 0.08, + "learning_rate": 1.8352810239287704e-05, + "loss": 0.1923, "step": 148 }, { - "epoch": 0.17, - "learning_rate": 1.6685205784204673e-05, - "loss": 0.1962, + "epoch": 0.08, + "learning_rate": 1.834168057874235e-05, + "loss": 0.1719, "step": 149 }, { - "epoch": 0.17, - "learning_rate": 1.6662958843159067e-05, - "loss": 0.1497, + "epoch": 0.08, + "learning_rate": 1.8330550918196996e-05, + "loss": 0.14, "step": 150 }, { - "epoch": 0.17, - "learning_rate": 1.664071190211346e-05, - "loss": 0.1162, + "epoch": 0.08, + "learning_rate": 1.8319421257651644e-05, + "loss": 0.1924, "step": 151 }, { - "epoch": 0.17, - "learning_rate": 1.6618464961067856e-05, - "loss": 0.1397, + "epoch": 0.08, + "learning_rate": 1.830829159710629e-05, + "loss": 0.1968, "step": 152 }, { - "epoch": 0.17, - "learning_rate": 1.6596218020022247e-05, - "loss": 0.1656, + "epoch": 0.09, + "learning_rate": 1.8297161936560936e-05, + "loss": 0.1938, "step": 153 }, { - "epoch": 0.17, - "learning_rate": 1.6573971078976642e-05, - "loss": 0.2044, + "epoch": 0.09, + "learning_rate": 1.8286032276015584e-05, + "loss": 0.1132, "step": 154 }, { - "epoch": 0.17, - "learning_rate": 1.6551724137931037e-05, - "loss": 0.142, + "epoch": 0.09, + "learning_rate": 1.827490261547023e-05, + "loss": 0.165, "step": 155 }, { - "epoch": 0.17, - "learning_rate": 1.652947719688543e-05, - "loss": 0.1261, + "epoch": 0.09, + "learning_rate": 1.8263772954924876e-05, + "loss": 0.112, "step": 156 }, { - "epoch": 0.17, - "learning_rate": 1.6507230255839822e-05, - "loss": 0.1906, + "epoch": 0.09, + "learning_rate": 1.8252643294379524e-05, + "loss": 0.1897, "step": 157 }, { - "epoch": 0.18, - "learning_rate": 1.6484983314794217e-05, - "loss": 0.1451, + "epoch": 0.09, + "learning_rate": 1.824151363383417e-05, + "loss": 0.1394, "step": 158 }, { - "epoch": 0.18, - "learning_rate": 1.646273637374861e-05, - "loss": 0.1163, + "epoch": 0.09, + "learning_rate": 1.8230383973288816e-05, + "loss": 0.0672, "step": 159 }, { - "epoch": 0.18, - "learning_rate": 1.6440489432703003e-05, - "loss": 0.0779, + "epoch": 0.09, + "learning_rate": 1.821925431274346e-05, + "loss": 0.1412, "step": 160 }, { - "epoch": 0.18, - "learning_rate": 1.6418242491657397e-05, - "loss": 0.1511, + "epoch": 0.09, + "learning_rate": 1.820812465219811e-05, + "loss": 0.0675, "step": 161 }, { - "epoch": 0.18, - "learning_rate": 1.6395995550611792e-05, - "loss": 0.1401, + "epoch": 0.09, + "learning_rate": 1.8196994991652756e-05, + "loss": 0.1644, "step": 162 }, { - "epoch": 0.18, - "learning_rate": 1.6373748609566187e-05, - "loss": 0.1894, + "epoch": 0.09, + "learning_rate": 1.81858653311074e-05, + "loss": 0.3, "step": 163 }, { - "epoch": 0.18, - "learning_rate": 1.635150166852058e-05, - "loss": 0.1263, + "epoch": 0.09, + "learning_rate": 1.817473567056205e-05, + "loss": 0.1117, "step": 164 }, { - "epoch": 0.18, - "learning_rate": 1.6329254727474972e-05, - "loss": 0.1233, + "epoch": 0.09, + "learning_rate": 1.8163606010016696e-05, + "loss": 0.058, "step": 165 }, { - "epoch": 0.18, - "learning_rate": 1.6307007786429367e-05, - "loss": 0.1428, + "epoch": 0.09, + "learning_rate": 1.815247634947134e-05, + "loss": 0.2501, "step": 166 }, { - "epoch": 0.19, - "learning_rate": 1.628476084538376e-05, - "loss": 0.1615, + "epoch": 0.09, + "learning_rate": 1.814134668892599e-05, + "loss": 0.1408, "step": 167 }, { - "epoch": 0.19, - "learning_rate": 1.6262513904338156e-05, - "loss": 0.195, + "epoch": 0.09, + "learning_rate": 1.8130217028380636e-05, + "loss": 0.1373, "step": 168 }, { - "epoch": 0.19, - "learning_rate": 1.6240266963292547e-05, - "loss": 0.1727, + "epoch": 0.09, + "learning_rate": 1.811908736783528e-05, + "loss": 0.1658, "step": 169 }, { - "epoch": 0.19, - "learning_rate": 1.6218020022246942e-05, - "loss": 0.1086, + "epoch": 0.09, + "learning_rate": 1.810795770728993e-05, + "loss": 0.1117, "step": 170 }, { - "epoch": 0.19, - "learning_rate": 1.6195773081201336e-05, - "loss": 0.1692, + "epoch": 0.1, + "learning_rate": 1.8096828046744576e-05, + "loss": 0.2476, "step": 171 }, { - "epoch": 0.19, - "learning_rate": 1.617352614015573e-05, - "loss": 0.1243, + "epoch": 0.1, + "learning_rate": 1.808569838619922e-05, + "loss": 0.1137, "step": 172 }, { - "epoch": 0.19, - "learning_rate": 1.6151279199110122e-05, - "loss": 0.1263, + "epoch": 0.1, + "learning_rate": 1.807456872565387e-05, + "loss": 0.2162, "step": 173 }, { - "epoch": 0.19, - "learning_rate": 1.6129032258064517e-05, - "loss": 0.1632, + "epoch": 0.1, + "learning_rate": 1.8063439065108516e-05, + "loss": 0.1374, "step": 174 }, { - "epoch": 0.19, - "learning_rate": 1.610678531701891e-05, - "loss": 0.1465, + "epoch": 0.1, + "learning_rate": 1.805230940456316e-05, + "loss": 0.0894, "step": 175 }, { - "epoch": 0.2, - "learning_rate": 1.6084538375973303e-05, - "loss": 0.1518, + "epoch": 0.1, + "learning_rate": 1.804117974401781e-05, + "loss": 0.0645, "step": 176 }, { - "epoch": 0.2, - "learning_rate": 1.60622914349277e-05, - "loss": 0.1119, + "epoch": 0.1, + "learning_rate": 1.8030050083472456e-05, + "loss": 0.1127, "step": 177 }, { - "epoch": 0.2, - "learning_rate": 1.6040044493882092e-05, - "loss": 0.164, + "epoch": 0.1, + "learning_rate": 1.80189204229271e-05, + "loss": 0.1139, "step": 178 }, { - "epoch": 0.2, - "learning_rate": 1.6017797552836486e-05, - "loss": 0.1423, + "epoch": 0.1, + "learning_rate": 1.800779076238175e-05, + "loss": 0.1391, "step": 179 }, { - "epoch": 0.2, - "learning_rate": 1.599555061179088e-05, - "loss": 0.1921, + "epoch": 0.1, + "learning_rate": 1.7996661101836396e-05, + "loss": 0.1403, "step": 180 }, { - "epoch": 0.2, - "learning_rate": 1.5973303670745275e-05, - "loss": 0.0758, + "epoch": 0.1, + "learning_rate": 1.798553144129104e-05, + "loss": 0.0598, "step": 181 }, { - "epoch": 0.2, - "learning_rate": 1.5951056729699667e-05, - "loss": 0.158, + "epoch": 0.1, + "learning_rate": 1.797440178074569e-05, + "loss": 0.1412, "step": 182 }, { - "epoch": 0.2, - "learning_rate": 1.592880978865406e-05, - "loss": 0.0748, + "epoch": 0.1, + "learning_rate": 1.7963272120200336e-05, + "loss": 0.1958, "step": 183 }, { - "epoch": 0.2, - "learning_rate": 1.5906562847608456e-05, - "loss": 0.0855, + "epoch": 0.1, + "learning_rate": 1.795214245965498e-05, + "loss": 0.1107, "step": 184 }, { - "epoch": 0.21, - "learning_rate": 1.5884315906562847e-05, - "loss": 0.2064, + "epoch": 0.1, + "learning_rate": 1.7941012799109628e-05, + "loss": 0.2541, "step": 185 }, { - "epoch": 0.21, - "learning_rate": 1.586206896551724e-05, - "loss": 0.1268, + "epoch": 0.1, + "learning_rate": 1.7929883138564276e-05, + "loss": 0.1395, "step": 186 }, { - "epoch": 0.21, - "learning_rate": 1.5839822024471636e-05, - "loss": 0.1795, + "epoch": 0.1, + "learning_rate": 1.791875347801892e-05, + "loss": 0.1096, "step": 187 }, { - "epoch": 0.21, - "learning_rate": 1.581757508342603e-05, - "loss": 0.1271, + "epoch": 0.1, + "learning_rate": 1.7907623817473568e-05, + "loss": 0.1119, "step": 188 }, { - "epoch": 0.21, - "learning_rate": 1.5795328142380425e-05, - "loss": 0.1689, + "epoch": 0.11, + "learning_rate": 1.7896494156928216e-05, + "loss": 0.1679, "step": 189 }, { - "epoch": 0.21, - "learning_rate": 1.5773081201334817e-05, - "loss": 0.1817, + "epoch": 0.11, + "learning_rate": 1.788536449638286e-05, + "loss": 0.1388, "step": 190 }, { - "epoch": 0.21, - "learning_rate": 1.575083426028921e-05, - "loss": 0.1271, + "epoch": 0.11, + "learning_rate": 1.7874234835837508e-05, + "loss": 0.1121, "step": 191 }, { - "epoch": 0.21, - "learning_rate": 1.5728587319243606e-05, - "loss": 0.1685, + "epoch": 0.11, + "learning_rate": 1.7863105175292156e-05, + "loss": 0.2184, "step": 192 }, { - "epoch": 0.21, - "learning_rate": 1.5706340378198e-05, - "loss": 0.1368, + "epoch": 0.11, + "learning_rate": 1.7851975514746804e-05, + "loss": 0.1385, "step": 193 }, { - "epoch": 0.22, - "learning_rate": 1.568409343715239e-05, - "loss": 0.088, + "epoch": 0.11, + "learning_rate": 1.7840845854201448e-05, + "loss": 0.1402, "step": 194 }, { - "epoch": 0.22, - "learning_rate": 1.5661846496106786e-05, - "loss": 0.0971, + "epoch": 0.11, + "learning_rate": 1.7829716193656096e-05, + "loss": 0.2237, "step": 195 }, { - "epoch": 0.22, - "learning_rate": 1.563959955506118e-05, - "loss": 0.1124, + "epoch": 0.11, + "learning_rate": 1.7818586533110744e-05, + "loss": 0.1713, "step": 196 }, { - "epoch": 0.22, - "learning_rate": 1.5617352614015575e-05, - "loss": 0.1521, + "epoch": 0.11, + "learning_rate": 1.7807456872565388e-05, + "loss": 0.1193, "step": 197 }, { - "epoch": 0.22, - "learning_rate": 1.5595105672969966e-05, - "loss": 0.1711, + "epoch": 0.11, + "learning_rate": 1.7796327212020036e-05, + "loss": 0.0652, "step": 198 }, { - "epoch": 0.22, - "learning_rate": 1.557285873192436e-05, - "loss": 0.1651, + "epoch": 0.11, + "learning_rate": 1.7785197551474684e-05, + "loss": 0.2428, "step": 199 }, { - "epoch": 0.22, - "learning_rate": 1.5550611790878756e-05, - "loss": 0.167, + "epoch": 0.11, + "learning_rate": 1.7774067890929328e-05, + "loss": 0.2177, "step": 200 }, { - "epoch": 0.22, - "learning_rate": 1.552836484983315e-05, - "loss": 0.0826, + "epoch": 0.11, + "learning_rate": 1.7762938230383976e-05, + "loss": 0.216, "step": 201 }, { - "epoch": 0.22, - "learning_rate": 1.5506117908787545e-05, - "loss": 0.1001, + "epoch": 0.11, + "learning_rate": 1.7751808569838624e-05, + "loss": 0.1646, "step": 202 }, { - "epoch": 0.23, - "learning_rate": 1.5483870967741936e-05, - "loss": 0.1263, + "epoch": 0.11, + "learning_rate": 1.7740678909293268e-05, + "loss": 0.1242, "step": 203 }, { - "epoch": 0.23, - "learning_rate": 1.546162402669633e-05, - "loss": 0.0986, + "epoch": 0.11, + "learning_rate": 1.7729549248747916e-05, + "loss": 0.1404, "step": 204 }, { - "epoch": 0.23, - "learning_rate": 1.5439377085650725e-05, - "loss": 0.1578, + "epoch": 0.11, + "learning_rate": 1.7718419588202564e-05, + "loss": 0.1436, "step": 205 }, { - "epoch": 0.23, - "learning_rate": 1.541713014460512e-05, - "loss": 0.1387, + "epoch": 0.11, + "learning_rate": 1.7707289927657208e-05, + "loss": 0.1927, "step": 206 }, { - "epoch": 0.23, - "learning_rate": 1.539488320355951e-05, - "loss": 0.1384, + "epoch": 0.12, + "learning_rate": 1.7696160267111856e-05, + "loss": 0.1412, "step": 207 }, { - "epoch": 0.23, - "learning_rate": 1.5372636262513906e-05, - "loss": 0.1679, + "epoch": 0.12, + "learning_rate": 1.76850306065665e-05, + "loss": 0.1155, "step": 208 }, { - "epoch": 0.23, - "learning_rate": 1.53503893214683e-05, - "loss": 0.2111, + "epoch": 0.12, + "learning_rate": 1.7673900946021148e-05, + "loss": 0.1644, "step": 209 }, { - "epoch": 0.23, - "learning_rate": 1.532814238042269e-05, - "loss": 0.1777, + "epoch": 0.12, + "learning_rate": 1.7662771285475796e-05, + "loss": 0.0862, "step": 210 }, { - "epoch": 0.23, - "learning_rate": 1.5305895439377086e-05, - "loss": 0.1638, + "epoch": 0.12, + "learning_rate": 1.765164162493044e-05, + "loss": 0.0602, "step": 211 }, { - "epoch": 0.24, - "learning_rate": 1.528364849833148e-05, - "loss": 0.1473, + "epoch": 0.12, + "learning_rate": 1.7640511964385088e-05, + "loss": 0.136, "step": 212 }, { - "epoch": 0.24, - "learning_rate": 1.5261401557285875e-05, - "loss": 0.1365, + "epoch": 0.12, + "learning_rate": 1.7629382303839736e-05, + "loss": 0.1618, "step": 213 }, { - "epoch": 0.24, - "learning_rate": 1.5239154616240268e-05, - "loss": 0.1964, + "epoch": 0.12, + "learning_rate": 1.761825264329438e-05, + "loss": 0.0861, "step": 214 }, { - "epoch": 0.24, - "learning_rate": 1.521690767519466e-05, - "loss": 0.1425, + "epoch": 0.12, + "learning_rate": 1.7607122982749028e-05, + "loss": 0.1395, "step": 215 }, { - "epoch": 0.24, - "learning_rate": 1.5194660734149055e-05, - "loss": 0.1283, + "epoch": 0.12, + "learning_rate": 1.7595993322203676e-05, + "loss": 0.2494, "step": 216 }, { - "epoch": 0.24, - "learning_rate": 1.5172413793103448e-05, - "loss": 0.1776, + "epoch": 0.12, + "learning_rate": 1.758486366165832e-05, + "loss": 0.1391, "step": 217 }, { - "epoch": 0.24, - "learning_rate": 1.5150166852057845e-05, - "loss": 0.1678, + "epoch": 0.12, + "learning_rate": 1.7573734001112968e-05, + "loss": 0.1926, "step": 218 }, { - "epoch": 0.24, - "learning_rate": 1.5127919911012236e-05, - "loss": 0.104, + "epoch": 0.12, + "learning_rate": 1.7562604340567616e-05, + "loss": 0.2519, "step": 219 }, { - "epoch": 0.24, - "learning_rate": 1.5105672969966632e-05, - "loss": 0.1697, + "epoch": 0.12, + "learning_rate": 1.755147468002226e-05, + "loss": 0.19, "step": 220 }, { - "epoch": 0.25, - "learning_rate": 1.5083426028921025e-05, - "loss": 0.1292, + "epoch": 0.12, + "learning_rate": 1.7540345019476908e-05, + "loss": 0.1907, "step": 221 }, { - "epoch": 0.25, - "learning_rate": 1.506117908787542e-05, - "loss": 0.1151, + "epoch": 0.12, + "learning_rate": 1.7529215358931556e-05, + "loss": 0.0872, "step": 222 }, { - "epoch": 0.25, - "learning_rate": 1.5038932146829812e-05, - "loss": 0.1662, + "epoch": 0.12, + "learning_rate": 1.75180856983862e-05, + "loss": 0.1089, "step": 223 }, { - "epoch": 0.25, - "learning_rate": 1.5016685205784205e-05, - "loss": 0.1118, + "epoch": 0.12, + "learning_rate": 1.7506956037840848e-05, + "loss": 0.1385, "step": 224 }, { - "epoch": 0.25, - "learning_rate": 1.49944382647386e-05, - "loss": 0.1394, + "epoch": 0.13, + "learning_rate": 1.7495826377295492e-05, + "loss": 0.1379, "step": 225 }, { - "epoch": 0.25, - "learning_rate": 1.4972191323692993e-05, - "loss": 0.1814, + "epoch": 0.13, + "learning_rate": 1.748469671675014e-05, + "loss": 0.0389, "step": 226 }, { - "epoch": 0.25, - "learning_rate": 1.4949944382647387e-05, - "loss": 0.1291, + "epoch": 0.13, + "learning_rate": 1.7473567056204788e-05, + "loss": 0.1156, "step": 227 }, { - "epoch": 0.25, - "learning_rate": 1.492769744160178e-05, - "loss": 0.1509, + "epoch": 0.13, + "learning_rate": 1.7462437395659432e-05, + "loss": 0.1105, "step": 228 }, { - "epoch": 0.25, - "learning_rate": 1.4905450500556175e-05, - "loss": 0.124, + "epoch": 0.13, + "learning_rate": 1.745130773511408e-05, + "loss": 0.222, "step": 229 }, { - "epoch": 0.26, - "learning_rate": 1.4883203559510568e-05, - "loss": 0.1561, + "epoch": 0.13, + "learning_rate": 1.7440178074568728e-05, + "loss": 0.1877, "step": 230 }, { - "epoch": 0.26, - "learning_rate": 1.486095661846496e-05, - "loss": 0.1941, + "epoch": 0.13, + "learning_rate": 1.7429048414023372e-05, + "loss": 0.1134, "step": 231 }, { - "epoch": 0.26, - "learning_rate": 1.4838709677419357e-05, - "loss": 0.1522, + "epoch": 0.13, + "learning_rate": 1.741791875347802e-05, + "loss": 0.1127, "step": 232 }, { - "epoch": 0.26, - "learning_rate": 1.481646273637375e-05, - "loss": 0.1267, + "epoch": 0.13, + "learning_rate": 1.7406789092932668e-05, + "loss": 0.1092, "step": 233 }, { - "epoch": 0.26, - "learning_rate": 1.4794215795328144e-05, - "loss": 0.1505, + "epoch": 0.13, + "learning_rate": 1.7395659432387312e-05, + "loss": 0.1134, "step": 234 }, { - "epoch": 0.26, - "learning_rate": 1.4771968854282537e-05, - "loss": 0.127, + "epoch": 0.13, + "learning_rate": 1.738452977184196e-05, + "loss": 0.1145, "step": 235 }, { - "epoch": 0.26, - "learning_rate": 1.4749721913236932e-05, - "loss": 0.1281, + "epoch": 0.13, + "learning_rate": 1.7373400111296608e-05, + "loss": 0.1398, "step": 236 }, { - "epoch": 0.26, - "learning_rate": 1.4727474972191325e-05, - "loss": 0.1379, + "epoch": 0.13, + "learning_rate": 1.7362270450751252e-05, + "loss": 0.1118, "step": 237 }, { - "epoch": 0.26, - "learning_rate": 1.470522803114572e-05, - "loss": 0.1518, + "epoch": 0.13, + "learning_rate": 1.73511407902059e-05, + "loss": 0.1117, "step": 238 }, { - "epoch": 0.27, - "learning_rate": 1.4682981090100112e-05, - "loss": 0.1511, + "epoch": 0.13, + "learning_rate": 1.7340011129660548e-05, + "loss": 0.1405, "step": 239 }, { - "epoch": 0.27, - "learning_rate": 1.4660734149054505e-05, - "loss": 0.2012, + "epoch": 0.13, + "learning_rate": 1.7328881469115192e-05, + "loss": 0.1945, "step": 240 }, { - "epoch": 0.27, - "learning_rate": 1.46384872080089e-05, - "loss": 0.1492, + "epoch": 0.13, + "learning_rate": 1.731775180856984e-05, + "loss": 0.1145, "step": 241 }, { - "epoch": 0.27, - "learning_rate": 1.4616240266963293e-05, - "loss": 0.1444, + "epoch": 0.13, + "learning_rate": 1.7306622148024484e-05, + "loss": 0.1137, "step": 242 }, { - "epoch": 0.27, - "learning_rate": 1.4593993325917689e-05, - "loss": 0.0894, + "epoch": 0.14, + "learning_rate": 1.7295492487479132e-05, + "loss": 0.2834, "step": 243 }, { - "epoch": 0.27, - "learning_rate": 1.457174638487208e-05, - "loss": 0.115, + "epoch": 0.14, + "learning_rate": 1.728436282693378e-05, + "loss": 0.225, "step": 244 }, { - "epoch": 0.27, - "learning_rate": 1.4549499443826476e-05, - "loss": 0.155, + "epoch": 0.14, + "learning_rate": 1.7273233166388424e-05, + "loss": 0.0862, "step": 245 }, { - "epoch": 0.27, - "learning_rate": 1.452725250278087e-05, - "loss": 0.1707, + "epoch": 0.14, + "learning_rate": 1.7262103505843072e-05, + "loss": 0.1703, "step": 246 }, { - "epoch": 0.27, - "learning_rate": 1.4505005561735264e-05, - "loss": 0.1388, + "epoch": 0.14, + "learning_rate": 1.725097384529772e-05, + "loss": 0.1109, "step": 247 }, { - "epoch": 0.28, - "learning_rate": 1.4482758620689657e-05, - "loss": 0.1635, + "epoch": 0.14, + "learning_rate": 1.7239844184752364e-05, + "loss": 0.1122, "step": 248 }, { - "epoch": 0.28, - "learning_rate": 1.446051167964405e-05, - "loss": 0.126, + "epoch": 0.14, + "learning_rate": 1.7228714524207012e-05, + "loss": 0.1684, "step": 249 }, { - "epoch": 0.28, - "learning_rate": 1.4438264738598444e-05, - "loss": 0.0949, + "epoch": 0.14, + "learning_rate": 1.721758486366166e-05, + "loss": 0.1686, "step": 250 }, { - "epoch": 0.28, - "learning_rate": 1.4416017797552837e-05, - "loss": 0.0982, + "epoch": 0.14, + "learning_rate": 1.7206455203116304e-05, + "loss": 0.0882, "step": 251 }, { - "epoch": 0.28, - "learning_rate": 1.4393770856507232e-05, - "loss": 0.0994, + "epoch": 0.14, + "learning_rate": 1.7195325542570952e-05, + "loss": 0.1404, "step": 252 }, { - "epoch": 0.28, - "learning_rate": 1.4371523915461624e-05, - "loss": 0.1265, + "epoch": 0.14, + "learning_rate": 1.71841958820256e-05, + "loss": 0.1389, "step": 253 }, { - "epoch": 0.28, - "learning_rate": 1.4349276974416019e-05, - "loss": 0.1341, + "epoch": 0.14, + "learning_rate": 1.7173066221480244e-05, + "loss": 0.1669, "step": 254 }, { - "epoch": 0.28, - "learning_rate": 1.4327030033370412e-05, - "loss": 0.136, + "epoch": 0.14, + "learning_rate": 1.7161936560934892e-05, + "loss": 0.2524, "step": 255 }, { - "epoch": 0.28, - "learning_rate": 1.4304783092324805e-05, - "loss": 0.1367, + "epoch": 0.14, + "learning_rate": 1.715080690038954e-05, + "loss": 0.1907, "step": 256 }, { - "epoch": 0.29, - "learning_rate": 1.4282536151279201e-05, - "loss": 0.1139, + "epoch": 0.14, + "learning_rate": 1.7139677239844184e-05, + "loss": 0.2712, "step": 257 }, { - "epoch": 0.29, - "learning_rate": 1.4260289210233594e-05, - "loss": 0.125, + "epoch": 0.14, + "learning_rate": 1.7128547579298832e-05, + "loss": 0.2179, "step": 258 }, { - "epoch": 0.29, - "learning_rate": 1.4238042269187989e-05, - "loss": 0.1224, + "epoch": 0.14, + "learning_rate": 1.711741791875348e-05, + "loss": 0.1385, "step": 259 }, { - "epoch": 0.29, - "learning_rate": 1.4215795328142381e-05, - "loss": 0.1866, + "epoch": 0.14, + "learning_rate": 1.7106288258208127e-05, + "loss": 0.0652, "step": 260 }, { - "epoch": 0.29, - "learning_rate": 1.4193548387096776e-05, - "loss": 0.0824, + "epoch": 0.15, + "learning_rate": 1.7095158597662772e-05, + "loss": 0.1139, "step": 261 }, { - "epoch": 0.29, - "learning_rate": 1.4171301446051169e-05, - "loss": 0.1221, + "epoch": 0.15, + "learning_rate": 1.708402893711742e-05, + "loss": 0.1892, "step": 262 }, { - "epoch": 0.29, - "learning_rate": 1.4149054505005564e-05, - "loss": 0.1397, + "epoch": 0.15, + "learning_rate": 1.7072899276572067e-05, + "loss": 0.1193, "step": 263 }, { - "epoch": 0.29, - "learning_rate": 1.4126807563959956e-05, - "loss": 0.1498, + "epoch": 0.15, + "learning_rate": 1.7061769616026712e-05, + "loss": 0.1662, "step": 264 }, { - "epoch": 0.29, - "learning_rate": 1.410456062291435e-05, - "loss": 0.194, + "epoch": 0.15, + "learning_rate": 1.705063995548136e-05, + "loss": 0.1664, "step": 265 }, { - "epoch": 0.3, - "learning_rate": 1.4082313681868744e-05, - "loss": 0.2462, + "epoch": 0.15, + "learning_rate": 1.7039510294936007e-05, + "loss": 0.0675, "step": 266 }, { - "epoch": 0.3, - "learning_rate": 1.4060066740823137e-05, - "loss": 0.1401, + "epoch": 0.15, + "learning_rate": 1.7028380634390652e-05, + "loss": 0.1651, "step": 267 }, { - "epoch": 0.3, - "learning_rate": 1.4037819799777533e-05, - "loss": 0.1145, + "epoch": 0.15, + "learning_rate": 1.70172509738453e-05, + "loss": 0.167, "step": 268 }, { - "epoch": 0.3, - "learning_rate": 1.4015572858731924e-05, - "loss": 0.1187, + "epoch": 0.15, + "learning_rate": 1.7006121313299947e-05, + "loss": 0.0913, "step": 269 }, { - "epoch": 0.3, - "learning_rate": 1.399332591768632e-05, - "loss": 0.0934, + "epoch": 0.15, + "learning_rate": 1.6994991652754592e-05, + "loss": 0.2157, "step": 270 }, { - "epoch": 0.3, - "learning_rate": 1.3971078976640713e-05, - "loss": 0.1304, + "epoch": 0.15, + "learning_rate": 1.698386199220924e-05, + "loss": 0.0898, "step": 271 }, { - "epoch": 0.3, - "learning_rate": 1.3948832035595108e-05, - "loss": 0.1761, + "epoch": 0.15, + "learning_rate": 1.6972732331663887e-05, + "loss": 0.1117, "step": 272 }, { - "epoch": 0.3, - "learning_rate": 1.3926585094549501e-05, - "loss": 0.1373, + "epoch": 0.15, + "learning_rate": 1.696160267111853e-05, + "loss": 0.0836, "step": 273 }, { - "epoch": 0.3, - "learning_rate": 1.3904338153503894e-05, - "loss": 0.1008, + "epoch": 0.15, + "learning_rate": 1.695047301057318e-05, + "loss": 0.141, "step": 274 }, { - "epoch": 0.31, - "learning_rate": 1.3882091212458288e-05, - "loss": 0.1393, + "epoch": 0.15, + "learning_rate": 1.6939343350027827e-05, + "loss": 0.1727, "step": 275 }, { - "epoch": 0.31, - "learning_rate": 1.3859844271412681e-05, - "loss": 0.1058, + "epoch": 0.15, + "learning_rate": 1.692821368948247e-05, + "loss": 0.1669, "step": 276 }, { - "epoch": 0.31, - "learning_rate": 1.3837597330367076e-05, - "loss": 0.1129, + "epoch": 0.15, + "learning_rate": 1.691708402893712e-05, + "loss": 0.0853, "step": 277 }, { - "epoch": 0.31, - "learning_rate": 1.3815350389321469e-05, - "loss": 0.1152, + "epoch": 0.15, + "learning_rate": 1.6905954368391767e-05, + "loss": 0.0858, "step": 278 }, { - "epoch": 0.31, - "learning_rate": 1.3793103448275863e-05, - "loss": 0.1385, + "epoch": 0.16, + "learning_rate": 1.689482470784641e-05, + "loss": 0.1369, "step": 279 }, { - "epoch": 0.31, - "learning_rate": 1.3770856507230256e-05, - "loss": 0.1865, + "epoch": 0.16, + "learning_rate": 1.688369504730106e-05, + "loss": 0.1941, "step": 280 }, { - "epoch": 0.31, - "learning_rate": 1.3748609566184649e-05, - "loss": 0.1783, + "epoch": 0.16, + "learning_rate": 1.6872565386755707e-05, + "loss": 0.1979, "step": 281 }, { - "epoch": 0.31, - "learning_rate": 1.3726362625139045e-05, - "loss": 0.1278, + "epoch": 0.16, + "learning_rate": 1.686143572621035e-05, + "loss": 0.1968, "step": 282 }, { - "epoch": 0.31, - "learning_rate": 1.3704115684093438e-05, - "loss": 0.1797, + "epoch": 0.16, + "learning_rate": 1.6850306065665e-05, + "loss": 0.0579, "step": 283 }, { - "epoch": 0.32, - "learning_rate": 1.3681868743047833e-05, - "loss": 0.1101, + "epoch": 0.16, + "learning_rate": 1.6839176405119647e-05, + "loss": 0.1129, "step": 284 }, { - "epoch": 0.32, - "learning_rate": 1.3659621802002226e-05, - "loss": 0.1394, + "epoch": 0.16, + "learning_rate": 1.682804674457429e-05, + "loss": 0.0861, "step": 285 }, { - "epoch": 0.32, - "learning_rate": 1.363737486095662e-05, - "loss": 0.1645, + "epoch": 0.16, + "learning_rate": 1.681691708402894e-05, + "loss": 0.1907, "step": 286 }, { - "epoch": 0.32, - "learning_rate": 1.3615127919911013e-05, - "loss": 0.0854, + "epoch": 0.16, + "learning_rate": 1.6805787423483587e-05, + "loss": 0.1698, "step": 287 }, { - "epoch": 0.32, - "learning_rate": 1.3592880978865408e-05, - "loss": 0.0963, + "epoch": 0.16, + "learning_rate": 1.679465776293823e-05, + "loss": 0.1377, "step": 288 }, { - "epoch": 0.32, - "learning_rate": 1.35706340378198e-05, - "loss": 0.1229, + "epoch": 0.16, + "learning_rate": 1.678352810239288e-05, + "loss": 0.1127, "step": 289 }, { - "epoch": 0.32, - "learning_rate": 1.3548387096774194e-05, - "loss": 0.1717, + "epoch": 0.16, + "learning_rate": 1.6772398441847527e-05, + "loss": 0.1109, "step": 290 }, { - "epoch": 0.32, - "learning_rate": 1.3526140155728588e-05, - "loss": 0.1502, + "epoch": 0.16, + "learning_rate": 1.676126878130217e-05, + "loss": 0.1687, "step": 291 }, { - "epoch": 0.32, - "learning_rate": 1.3503893214682981e-05, - "loss": 0.1245, + "epoch": 0.16, + "learning_rate": 1.675013912075682e-05, + "loss": 0.0838, "step": 292 }, { - "epoch": 0.33, - "learning_rate": 1.3481646273637377e-05, - "loss": 0.0957, + "epoch": 0.16, + "learning_rate": 1.6739009460211464e-05, + "loss": 0.0848, "step": 293 }, { - "epoch": 0.33, - "learning_rate": 1.3459399332591769e-05, - "loss": 0.0829, + "epoch": 0.16, + "learning_rate": 1.672787979966611e-05, + "loss": 0.1128, "step": 294 }, { - "epoch": 0.33, - "learning_rate": 1.3437152391546165e-05, - "loss": 0.2046, + "epoch": 0.16, + "learning_rate": 1.671675013912076e-05, + "loss": 0.1356, "step": 295 }, { - "epoch": 0.33, - "learning_rate": 1.3414905450500558e-05, - "loss": 0.1544, + "epoch": 0.16, + "learning_rate": 1.6705620478575404e-05, + "loss": 0.1128, "step": 296 }, { - "epoch": 0.33, - "learning_rate": 1.339265850945495e-05, - "loss": 0.1204, + "epoch": 0.17, + "learning_rate": 1.669449081803005e-05, + "loss": 0.2552, "step": 297 }, { - "epoch": 0.33, - "learning_rate": 1.3370411568409345e-05, - "loss": 0.1099, + "epoch": 0.17, + "learning_rate": 1.66833611574847e-05, + "loss": 0.1335, "step": 298 }, { - "epoch": 0.33, - "learning_rate": 1.3348164627363738e-05, - "loss": 0.1369, + "epoch": 0.17, + "learning_rate": 1.6672231496939344e-05, + "loss": 0.1242, "step": 299 }, { - "epoch": 0.33, - "learning_rate": 1.3325917686318133e-05, - "loss": 0.0971, + "epoch": 0.17, + "learning_rate": 1.666110183639399e-05, + "loss": 0.1974, "step": 300 }, { - "epoch": 0.33, - "learning_rate": 1.3303670745272526e-05, - "loss": 0.2003, + "epoch": 0.17, + "learning_rate": 1.664997217584864e-05, + "loss": 0.165, "step": 301 }, { - "epoch": 0.34, - "learning_rate": 1.328142380422692e-05, - "loss": 0.1246, + "epoch": 0.17, + "learning_rate": 1.6638842515303284e-05, + "loss": 0.0581, "step": 302 }, { - "epoch": 0.34, - "learning_rate": 1.3259176863181313e-05, - "loss": 0.1783, + "epoch": 0.17, + "learning_rate": 1.662771285475793e-05, + "loss": 0.1391, "step": 303 }, { - "epoch": 0.34, - "learning_rate": 1.3236929922135708e-05, - "loss": 0.1114, + "epoch": 0.17, + "learning_rate": 1.661658319421258e-05, + "loss": 0.1391, "step": 304 }, { - "epoch": 0.34, - "learning_rate": 1.32146829810901e-05, - "loss": 0.1093, + "epoch": 0.17, + "learning_rate": 1.6605453533667224e-05, + "loss": 0.1407, "step": 305 }, { - "epoch": 0.34, - "learning_rate": 1.3192436040044493e-05, - "loss": 0.1327, + "epoch": 0.17, + "learning_rate": 1.659432387312187e-05, + "loss": 0.1913, "step": 306 }, { - "epoch": 0.34, - "learning_rate": 1.317018909899889e-05, - "loss": 0.1762, + "epoch": 0.17, + "learning_rate": 1.658319421257652e-05, + "loss": 0.2183, "step": 307 }, { - "epoch": 0.34, - "learning_rate": 1.3147942157953283e-05, - "loss": 0.1508, + "epoch": 0.17, + "learning_rate": 1.6572064552031163e-05, + "loss": 0.195, "step": 308 }, { - "epoch": 0.34, - "learning_rate": 1.3125695216907677e-05, - "loss": 0.1401, + "epoch": 0.17, + "learning_rate": 1.656093489148581e-05, + "loss": 0.138, "step": 309 }, { - "epoch": 0.34, - "learning_rate": 1.310344827586207e-05, - "loss": 0.1276, + "epoch": 0.17, + "learning_rate": 1.6549805230940456e-05, + "loss": 0.1424, "step": 310 }, { - "epoch": 0.35, - "learning_rate": 1.3081201334816465e-05, - "loss": 0.1106, + "epoch": 0.17, + "learning_rate": 1.6538675570395103e-05, + "loss": 0.1427, "step": 311 }, { - "epoch": 0.35, - "learning_rate": 1.3058954393770857e-05, - "loss": 0.1894, + "epoch": 0.17, + "learning_rate": 1.652754590984975e-05, + "loss": 0.116, "step": 312 }, { - "epoch": 0.35, - "learning_rate": 1.3036707452725252e-05, - "loss": 0.1045, + "epoch": 0.17, + "learning_rate": 1.6516416249304396e-05, + "loss": 0.1395, "step": 313 }, { - "epoch": 0.35, - "learning_rate": 1.3014460511679645e-05, - "loss": 0.162, + "epoch": 0.17, + "learning_rate": 1.6505286588759043e-05, + "loss": 0.2403, "step": 314 }, { - "epoch": 0.35, - "learning_rate": 1.2992213570634038e-05, - "loss": 0.1029, + "epoch": 0.18, + "learning_rate": 1.649415692821369e-05, + "loss": 0.1376, "step": 315 }, { - "epoch": 0.35, - "learning_rate": 1.2969966629588432e-05, - "loss": 0.1472, + "epoch": 0.18, + "learning_rate": 1.6483027267668336e-05, + "loss": 0.138, "step": 316 }, { - "epoch": 0.35, - "learning_rate": 1.2947719688542825e-05, - "loss": 0.1846, + "epoch": 0.18, + "learning_rate": 1.6471897607122983e-05, + "loss": 0.1156, "step": 317 }, { - "epoch": 0.35, - "learning_rate": 1.2925472747497222e-05, - "loss": 0.1557, + "epoch": 0.18, + "learning_rate": 1.646076794657763e-05, + "loss": 0.1135, "step": 318 }, { - "epoch": 0.35, - "learning_rate": 1.2903225806451613e-05, - "loss": 0.1099, + "epoch": 0.18, + "learning_rate": 1.6449638286032276e-05, + "loss": 0.0357, "step": 319 }, { - "epoch": 0.36, - "learning_rate": 1.2880978865406009e-05, - "loss": 0.1306, + "epoch": 0.18, + "learning_rate": 1.6438508625486923e-05, + "loss": 0.1139, "step": 320 }, { - "epoch": 0.36, - "learning_rate": 1.2858731924360402e-05, - "loss": 0.089, + "epoch": 0.18, + "learning_rate": 1.642737896494157e-05, + "loss": 0.1372, "step": 321 }, { - "epoch": 0.36, - "learning_rate": 1.2836484983314795e-05, - "loss": 0.1697, + "epoch": 0.18, + "learning_rate": 1.6416249304396216e-05, + "loss": 0.2193, "step": 322 }, { - "epoch": 0.36, - "learning_rate": 1.281423804226919e-05, - "loss": 0.1208, + "epoch": 0.18, + "learning_rate": 1.6405119643850863e-05, + "loss": 0.1694, "step": 323 }, { - "epoch": 0.36, - "learning_rate": 1.2791991101223582e-05, - "loss": 0.0941, + "epoch": 0.18, + "learning_rate": 1.639398998330551e-05, + "loss": 0.1085, "step": 324 }, { - "epoch": 0.36, - "learning_rate": 1.2769744160177977e-05, - "loss": 0.1103, + "epoch": 0.18, + "learning_rate": 1.6382860322760155e-05, + "loss": 0.2796, "step": 325 }, { - "epoch": 0.36, - "learning_rate": 1.274749721913237e-05, - "loss": 0.1815, + "epoch": 0.18, + "learning_rate": 1.6371730662214803e-05, + "loss": 0.1129, "step": 326 }, { - "epoch": 0.36, - "learning_rate": 1.2725250278086764e-05, - "loss": 0.174, + "epoch": 0.18, + "learning_rate": 1.636060100166945e-05, + "loss": 0.0844, "step": 327 }, { - "epoch": 0.36, - "learning_rate": 1.2703003337041157e-05, - "loss": 0.1094, + "epoch": 0.18, + "learning_rate": 1.6349471341124095e-05, + "loss": 0.1668, "step": 328 }, { - "epoch": 0.37, - "learning_rate": 1.2680756395995552e-05, - "loss": 0.1329, + "epoch": 0.18, + "learning_rate": 1.6338341680578743e-05, + "loss": 0.0855, "step": 329 }, { - "epoch": 0.37, - "learning_rate": 1.2658509454949945e-05, - "loss": 0.102, + "epoch": 0.18, + "learning_rate": 1.632721202003339e-05, + "loss": 0.1666, "step": 330 }, { - "epoch": 0.37, - "learning_rate": 1.2636262513904338e-05, - "loss": 0.1236, + "epoch": 0.18, + "learning_rate": 1.6316082359488035e-05, + "loss": 0.1695, "step": 331 }, { - "epoch": 0.37, - "learning_rate": 1.2614015572858734e-05, - "loss": 0.123, + "epoch": 0.18, + "learning_rate": 1.6304952698942683e-05, + "loss": 0.1102, "step": 332 }, { - "epoch": 0.37, - "learning_rate": 1.2591768631813127e-05, - "loss": 0.0853, + "epoch": 0.19, + "learning_rate": 1.629382303839733e-05, + "loss": 0.1102, "step": 333 }, { - "epoch": 0.37, - "learning_rate": 1.2569521690767521e-05, - "loss": 0.1865, + "epoch": 0.19, + "learning_rate": 1.6282693377851975e-05, + "loss": 0.221, "step": 334 }, { - "epoch": 0.37, - "learning_rate": 1.2547274749721914e-05, - "loss": 0.1145, + "epoch": 0.19, + "learning_rate": 1.6271563717306623e-05, + "loss": 0.165, "step": 335 }, { - "epoch": 0.37, - "learning_rate": 1.2525027808676309e-05, - "loss": 0.1631, + "epoch": 0.19, + "learning_rate": 1.626043405676127e-05, + "loss": 0.2215, "step": 336 }, { - "epoch": 0.37, - "learning_rate": 1.2502780867630702e-05, - "loss": 0.1286, + "epoch": 0.19, + "learning_rate": 1.6249304396215915e-05, + "loss": 0.1628, "step": 337 }, { - "epoch": 0.38, - "learning_rate": 1.2480533926585096e-05, - "loss": 0.1702, + "epoch": 0.19, + "learning_rate": 1.6238174735670563e-05, + "loss": 0.1661, "step": 338 }, { - "epoch": 0.38, - "learning_rate": 1.245828698553949e-05, - "loss": 0.1594, + "epoch": 0.19, + "learning_rate": 1.622704507512521e-05, + "loss": 0.1368, "step": 339 }, { - "epoch": 0.38, - "learning_rate": 1.2436040044493882e-05, - "loss": 0.1371, + "epoch": 0.19, + "learning_rate": 1.6215915414579855e-05, + "loss": 0.0875, "step": 340 }, { - "epoch": 0.38, - "learning_rate": 1.2413793103448277e-05, - "loss": 0.1514, + "epoch": 0.19, + "learning_rate": 1.6204785754034503e-05, + "loss": 0.1901, "step": 341 }, { - "epoch": 0.38, - "learning_rate": 1.239154616240267e-05, - "loss": 0.1147, + "epoch": 0.19, + "learning_rate": 1.619365609348915e-05, + "loss": 0.1403, "step": 342 }, { - "epoch": 0.38, - "learning_rate": 1.2369299221357066e-05, - "loss": 0.1334, + "epoch": 0.19, + "learning_rate": 1.61825264329438e-05, + "loss": 0.1373, "step": 343 }, { - "epoch": 0.38, - "learning_rate": 1.2347052280311459e-05, - "loss": 0.0993, + "epoch": 0.19, + "learning_rate": 1.6171396772398443e-05, + "loss": 0.1138, "step": 344 }, { - "epoch": 0.38, - "learning_rate": 1.2324805339265853e-05, - "loss": 0.1651, + "epoch": 0.19, + "learning_rate": 1.616026711185309e-05, + "loss": 0.1411, "step": 345 }, { - "epoch": 0.38, - "learning_rate": 1.2302558398220246e-05, - "loss": 0.1527, + "epoch": 0.19, + "learning_rate": 1.614913745130774e-05, + "loss": 0.1157, "step": 346 }, { - "epoch": 0.39, - "learning_rate": 1.2280311457174639e-05, - "loss": 0.1341, + "epoch": 0.19, + "learning_rate": 1.6138007790762383e-05, + "loss": 0.1898, "step": 347 }, { - "epoch": 0.39, - "learning_rate": 1.2258064516129034e-05, - "loss": 0.0667, + "epoch": 0.19, + "learning_rate": 1.612687813021703e-05, + "loss": 0.1348, "step": 348 }, { - "epoch": 0.39, - "learning_rate": 1.2235817575083427e-05, - "loss": 0.1488, + "epoch": 0.19, + "learning_rate": 1.611574846967168e-05, + "loss": 0.167, "step": 349 }, { - "epoch": 0.39, - "learning_rate": 1.2213570634037821e-05, - "loss": 0.1568, + "epoch": 0.19, + "learning_rate": 1.6104618809126323e-05, + "loss": 0.1403, "step": 350 }, { - "epoch": 0.39, - "learning_rate": 1.2191323692992214e-05, - "loss": 0.1596, + "epoch": 0.2, + "learning_rate": 1.609348914858097e-05, + "loss": 0.1629, "step": 351 }, { - "epoch": 0.39, - "learning_rate": 1.2169076751946609e-05, - "loss": 0.1998, + "epoch": 0.2, + "learning_rate": 1.608235948803562e-05, + "loss": 0.1429, "step": 352 }, { - "epoch": 0.39, - "learning_rate": 1.2146829810901001e-05, - "loss": 0.167, + "epoch": 0.2, + "learning_rate": 1.6071229827490263e-05, + "loss": 0.1416, "step": 353 }, { - "epoch": 0.39, - "learning_rate": 1.2124582869855396e-05, - "loss": 0.1583, + "epoch": 0.2, + "learning_rate": 1.606010016694491e-05, + "loss": 0.0884, "step": 354 }, { - "epoch": 0.39, - "learning_rate": 1.2102335928809789e-05, - "loss": 0.1266, + "epoch": 0.2, + "learning_rate": 1.604897050639956e-05, + "loss": 0.1638, "step": 355 }, { - "epoch": 0.4, - "learning_rate": 1.2080088987764182e-05, - "loss": 0.1191, + "epoch": 0.2, + "learning_rate": 1.6037840845854203e-05, + "loss": 0.162, "step": 356 }, { - "epoch": 0.4, - "learning_rate": 1.2057842046718578e-05, - "loss": 0.1002, + "epoch": 0.2, + "learning_rate": 1.602671118530885e-05, + "loss": 0.1694, "step": 357 }, { - "epoch": 0.4, - "learning_rate": 1.2035595105672971e-05, - "loss": 0.1033, + "epoch": 0.2, + "learning_rate": 1.6015581524763495e-05, + "loss": 0.1107, "step": 358 }, { - "epoch": 0.4, - "learning_rate": 1.2013348164627366e-05, - "loss": 0.0964, + "epoch": 0.2, + "learning_rate": 1.6004451864218143e-05, + "loss": 0.2696, "step": 359 }, { - "epoch": 0.4, - "learning_rate": 1.1991101223581758e-05, - "loss": 0.1612, + "epoch": 0.2, + "learning_rate": 1.599332220367279e-05, + "loss": 0.1129, "step": 360 }, { - "epoch": 0.4, - "learning_rate": 1.1968854282536153e-05, - "loss": 0.2018, + "epoch": 0.2, + "learning_rate": 1.5982192543127435e-05, + "loss": 0.1151, "step": 361 }, { - "epoch": 0.4, - "learning_rate": 1.1946607341490546e-05, - "loss": 0.1069, + "epoch": 0.2, + "learning_rate": 1.5971062882582083e-05, + "loss": 0.0351, "step": 362 }, { - "epoch": 0.4, - "learning_rate": 1.1924360400444939e-05, - "loss": 0.1268, + "epoch": 0.2, + "learning_rate": 1.595993322203673e-05, + "loss": 0.1139, "step": 363 }, { - "epoch": 0.4, - "learning_rate": 1.1902113459399333e-05, - "loss": 0.1485, + "epoch": 0.2, + "learning_rate": 1.5948803561491375e-05, + "loss": 0.2189, "step": 364 }, { - "epoch": 0.41, - "learning_rate": 1.1879866518353726e-05, - "loss": 0.1429, + "epoch": 0.2, + "learning_rate": 1.5937673900946023e-05, + "loss": 0.0583, "step": 365 }, { - "epoch": 0.41, - "learning_rate": 1.1857619577308121e-05, - "loss": 0.1374, + "epoch": 0.2, + "learning_rate": 1.592654424040067e-05, + "loss": 0.0849, "step": 366 }, { - "epoch": 0.41, - "learning_rate": 1.1835372636262514e-05, - "loss": 0.1323, + "epoch": 0.2, + "learning_rate": 1.5915414579855315e-05, + "loss": 0.0319, "step": 367 }, { - "epoch": 0.41, - "learning_rate": 1.181312569521691e-05, - "loss": 0.1134, + "epoch": 0.2, + "learning_rate": 1.5904284919309963e-05, + "loss": 0.1421, "step": 368 }, { - "epoch": 0.41, - "learning_rate": 1.1790878754171303e-05, - "loss": 0.149, + "epoch": 0.21, + "learning_rate": 1.589315525876461e-05, + "loss": 0.1913, "step": 369 }, { - "epoch": 0.41, - "learning_rate": 1.1768631813125698e-05, - "loss": 0.0928, + "epoch": 0.21, + "learning_rate": 1.5882025598219255e-05, + "loss": 0.2226, "step": 370 }, { - "epoch": 0.41, - "learning_rate": 1.174638487208009e-05, - "loss": 0.0988, + "epoch": 0.21, + "learning_rate": 1.5870895937673903e-05, + "loss": 0.0829, "step": 371 }, { - "epoch": 0.41, - "learning_rate": 1.1724137931034483e-05, - "loss": 0.1223, + "epoch": 0.21, + "learning_rate": 1.585976627712855e-05, + "loss": 0.1625, "step": 372 }, { - "epoch": 0.41, - "learning_rate": 1.1701890989988878e-05, - "loss": 0.1102, + "epoch": 0.21, + "learning_rate": 1.5848636616583195e-05, + "loss": 0.1934, "step": 373 }, { - "epoch": 0.42, - "learning_rate": 1.167964404894327e-05, - "loss": 0.2099, + "epoch": 0.21, + "learning_rate": 1.5837506956037843e-05, + "loss": 0.1667, "step": 374 }, { - "epoch": 0.42, - "learning_rate": 1.1657397107897665e-05, - "loss": 0.1, + "epoch": 0.21, + "learning_rate": 1.5826377295492487e-05, + "loss": 0.1116, "step": 375 }, { - "epoch": 0.42, - "learning_rate": 1.1635150166852058e-05, - "loss": 0.1355, + "epoch": 0.21, + "learning_rate": 1.5815247634947135e-05, + "loss": 0.1394, "step": 376 }, { - "epoch": 0.42, - "learning_rate": 1.1612903225806453e-05, - "loss": 0.1541, + "epoch": 0.21, + "learning_rate": 1.5804117974401783e-05, + "loss": 0.1121, "step": 377 }, { - "epoch": 0.42, - "learning_rate": 1.1590656284760846e-05, - "loss": 0.1624, + "epoch": 0.21, + "learning_rate": 1.5792988313856427e-05, + "loss": 0.2202, "step": 378 }, { - "epoch": 0.42, - "learning_rate": 1.1568409343715242e-05, - "loss": 0.1106, + "epoch": 0.21, + "learning_rate": 1.5781858653311075e-05, + "loss": 0.1657, "step": 379 }, { - "epoch": 0.42, - "learning_rate": 1.1546162402669633e-05, - "loss": 0.1526, + "epoch": 0.21, + "learning_rate": 1.5770728992765723e-05, + "loss": 0.1941, "step": 380 }, { - "epoch": 0.42, - "learning_rate": 1.1523915461624026e-05, - "loss": 0.1516, + "epoch": 0.21, + "learning_rate": 1.5759599332220367e-05, + "loss": 0.057, "step": 381 }, { - "epoch": 0.42, - "learning_rate": 1.1501668520578422e-05, - "loss": 0.1906, + "epoch": 0.21, + "learning_rate": 1.5748469671675015e-05, + "loss": 0.1964, "step": 382 }, { - "epoch": 0.43, - "learning_rate": 1.1479421579532815e-05, - "loss": 0.1259, + "epoch": 0.21, + "learning_rate": 1.5737340011129663e-05, + "loss": 0.1654, "step": 383 }, { - "epoch": 0.43, - "learning_rate": 1.145717463848721e-05, - "loss": 0.1446, + "epoch": 0.21, + "learning_rate": 1.5726210350584307e-05, + "loss": 0.1636, "step": 384 }, { - "epoch": 0.43, - "learning_rate": 1.1434927697441603e-05, - "loss": 0.0954, + "epoch": 0.21, + "learning_rate": 1.5715080690038955e-05, + "loss": 0.1127, "step": 385 }, { - "epoch": 0.43, - "learning_rate": 1.1412680756395997e-05, - "loss": 0.1493, + "epoch": 0.21, + "learning_rate": 1.5703951029493603e-05, + "loss": 0.1619, "step": 386 }, { - "epoch": 0.43, - "learning_rate": 1.139043381535039e-05, - "loss": 0.0875, + "epoch": 0.22, + "learning_rate": 1.5692821368948247e-05, + "loss": 0.0382, "step": 387 }, { - "epoch": 0.43, - "learning_rate": 1.1368186874304783e-05, - "loss": 0.0963, + "epoch": 0.22, + "learning_rate": 1.5681691708402895e-05, + "loss": 0.1385, "step": 388 }, { - "epoch": 0.43, - "learning_rate": 1.1345939933259178e-05, - "loss": 0.0652, + "epoch": 0.22, + "learning_rate": 1.5670562047857543e-05, + "loss": 0.1166, "step": 389 }, { - "epoch": 0.43, - "learning_rate": 1.132369299221357e-05, - "loss": 0.1669, + "epoch": 0.22, + "learning_rate": 1.5659432387312187e-05, + "loss": 0.09, "step": 390 }, { - "epoch": 0.43, - "learning_rate": 1.1301446051167965e-05, - "loss": 0.1409, + "epoch": 0.22, + "learning_rate": 1.5648302726766835e-05, + "loss": 0.0866, "step": 391 }, { - "epoch": 0.44, - "learning_rate": 1.1279199110122358e-05, - "loss": 0.1069, + "epoch": 0.22, + "learning_rate": 1.563717306622148e-05, + "loss": 0.1411, "step": 392 }, { - "epoch": 0.44, - "learning_rate": 1.1256952169076754e-05, - "loss": 0.1092, + "epoch": 0.22, + "learning_rate": 1.5626043405676127e-05, + "loss": 0.1425, "step": 393 }, { - "epoch": 0.44, - "learning_rate": 1.1234705228031147e-05, - "loss": 0.1222, + "epoch": 0.22, + "learning_rate": 1.5614913745130775e-05, + "loss": 0.166, "step": 394 }, { - "epoch": 0.44, - "learning_rate": 1.1212458286985542e-05, - "loss": 0.1234, + "epoch": 0.22, + "learning_rate": 1.560378408458542e-05, + "loss": 0.1683, "step": 395 }, { - "epoch": 0.44, - "learning_rate": 1.1190211345939935e-05, - "loss": 0.074, + "epoch": 0.22, + "learning_rate": 1.5592654424040067e-05, + "loss": 0.1932, "step": 396 }, { - "epoch": 0.44, - "learning_rate": 1.1167964404894328e-05, - "loss": 0.0612, + "epoch": 0.22, + "learning_rate": 1.5581524763494715e-05, + "loss": 0.1682, "step": 397 }, { - "epoch": 0.44, - "learning_rate": 1.1145717463848722e-05, - "loss": 0.1779, + "epoch": 0.22, + "learning_rate": 1.557039510294936e-05, + "loss": 0.1687, "step": 398 }, { - "epoch": 0.44, - "learning_rate": 1.1123470522803115e-05, - "loss": 0.1303, + "epoch": 0.22, + "learning_rate": 1.5559265442404007e-05, + "loss": 0.1134, "step": 399 }, { - "epoch": 0.44, - "learning_rate": 1.110122358175751e-05, - "loss": 0.1453, + "epoch": 0.22, + "learning_rate": 1.5548135781858655e-05, + "loss": 0.2225, "step": 400 }, { - "epoch": 0.45, - "learning_rate": 1.1078976640711903e-05, - "loss": 0.0602, + "epoch": 0.22, + "learning_rate": 1.55370061213133e-05, + "loss": 0.0587, "step": 401 }, { - "epoch": 0.45, - "learning_rate": 1.1056729699666297e-05, - "loss": 0.1564, + "epoch": 0.22, + "learning_rate": 1.5525876460767947e-05, + "loss": 0.1106, "step": 402 }, { - "epoch": 0.45, - "learning_rate": 1.103448275862069e-05, - "loss": 0.1226, + "epoch": 0.22, + "learning_rate": 1.5514746800222595e-05, + "loss": 0.1133, "step": 403 }, { - "epoch": 0.45, - "learning_rate": 1.1012235817575086e-05, - "loss": 0.1334, + "epoch": 0.22, + "learning_rate": 1.550361713967724e-05, + "loss": 0.0882, "step": 404 }, { - "epoch": 0.45, - "learning_rate": 1.0989988876529477e-05, - "loss": 0.1854, + "epoch": 0.23, + "learning_rate": 1.5492487479131887e-05, + "loss": 0.1411, "step": 405 }, { - "epoch": 0.45, - "learning_rate": 1.096774193548387e-05, - "loss": 0.1344, + "epoch": 0.23, + "learning_rate": 1.5481357818586535e-05, + "loss": 0.1122, "step": 406 }, { - "epoch": 0.45, - "learning_rate": 1.0945494994438267e-05, - "loss": 0.157, + "epoch": 0.23, + "learning_rate": 1.547022815804118e-05, + "loss": 0.1372, "step": 407 }, { - "epoch": 0.45, - "learning_rate": 1.092324805339266e-05, - "loss": 0.1419, + "epoch": 0.23, + "learning_rate": 1.5459098497495827e-05, + "loss": 0.0561, "step": 408 }, { - "epoch": 0.45, - "learning_rate": 1.0901001112347054e-05, - "loss": 0.1884, + "epoch": 0.23, + "learning_rate": 1.5447968836950474e-05, + "loss": 0.1115, "step": 409 }, { - "epoch": 0.46, - "learning_rate": 1.0878754171301447e-05, - "loss": 0.1676, + "epoch": 0.23, + "learning_rate": 1.5436839176405122e-05, + "loss": 0.1977, "step": 410 }, { - "epoch": 0.46, - "learning_rate": 1.0856507230255842e-05, - "loss": 0.1173, + "epoch": 0.23, + "learning_rate": 1.5425709515859767e-05, + "loss": 0.14, "step": 411 }, { - "epoch": 0.46, - "learning_rate": 1.0834260289210234e-05, - "loss": 0.0992, + "epoch": 0.23, + "learning_rate": 1.5414579855314414e-05, + "loss": 0.1397, "step": 412 }, { - "epoch": 0.46, - "learning_rate": 1.0812013348164627e-05, - "loss": 0.1187, + "epoch": 0.23, + "learning_rate": 1.5403450194769062e-05, + "loss": 0.2224, "step": 413 }, { - "epoch": 0.46, - "learning_rate": 1.0789766407119022e-05, - "loss": 0.1282, + "epoch": 0.23, + "learning_rate": 1.5392320534223707e-05, + "loss": 0.0571, "step": 414 }, { - "epoch": 0.46, - "learning_rate": 1.0767519466073415e-05, - "loss": 0.1508, + "epoch": 0.23, + "learning_rate": 1.5381190873678354e-05, + "loss": 0.2242, "step": 415 }, { - "epoch": 0.46, - "learning_rate": 1.074527252502781e-05, - "loss": 0.1056, + "epoch": 0.23, + "learning_rate": 1.5370061213133002e-05, + "loss": 0.1117, "step": 416 }, { - "epoch": 0.46, - "learning_rate": 1.0723025583982202e-05, - "loss": 0.1837, + "epoch": 0.23, + "learning_rate": 1.5358931552587647e-05, + "loss": 0.2245, "step": 417 }, { - "epoch": 0.46, - "learning_rate": 1.0700778642936599e-05, - "loss": 0.1114, + "epoch": 0.23, + "learning_rate": 1.5347801892042294e-05, + "loss": 0.1939, "step": 418 }, { - "epoch": 0.47, - "learning_rate": 1.0678531701890991e-05, - "loss": 0.1043, + "epoch": 0.23, + "learning_rate": 1.5336672231496942e-05, + "loss": 0.0584, "step": 419 }, { - "epoch": 0.47, - "learning_rate": 1.0656284760845386e-05, - "loss": 0.0863, + "epoch": 0.23, + "learning_rate": 1.5325542570951587e-05, + "loss": 0.3009, "step": 420 }, { - "epoch": 0.47, - "learning_rate": 1.0634037819799779e-05, - "loss": 0.1171, + "epoch": 0.23, + "learning_rate": 1.5314412910406234e-05, + "loss": 0.1116, "step": 421 }, { - "epoch": 0.47, - "learning_rate": 1.0611790878754172e-05, - "loss": 0.1687, + "epoch": 0.23, + "learning_rate": 1.5303283249860882e-05, + "loss": 0.2149, "step": 422 }, { - "epoch": 0.47, - "learning_rate": 1.0589543937708566e-05, - "loss": 0.1668, + "epoch": 0.24, + "learning_rate": 1.5292153589315527e-05, + "loss": 0.1678, "step": 423 }, { - "epoch": 0.47, - "learning_rate": 1.056729699666296e-05, - "loss": 0.1114, + "epoch": 0.24, + "learning_rate": 1.5281023928770174e-05, + "loss": 0.1429, "step": 424 }, { - "epoch": 0.47, - "learning_rate": 1.0545050055617354e-05, - "loss": 0.1236, + "epoch": 0.24, + "learning_rate": 1.5269894268224822e-05, + "loss": 0.1648, "step": 425 }, { - "epoch": 0.47, - "learning_rate": 1.0522803114571747e-05, - "loss": 0.0799, + "epoch": 0.24, + "learning_rate": 1.5258764607679466e-05, + "loss": 0.1169, "step": 426 }, { - "epoch": 0.47, - "learning_rate": 1.0500556173526141e-05, - "loss": 0.1358, + "epoch": 0.24, + "learning_rate": 1.5247634947134113e-05, + "loss": 0.1681, "step": 427 }, { - "epoch": 0.48, - "learning_rate": 1.0478309232480534e-05, - "loss": 0.0837, + "epoch": 0.24, + "learning_rate": 1.523650528658876e-05, + "loss": 0.2155, "step": 428 }, { - "epoch": 0.48, - "learning_rate": 1.0456062291434927e-05, - "loss": 0.1112, + "epoch": 0.24, + "learning_rate": 1.5225375626043406e-05, + "loss": 0.1664, "step": 429 }, { - "epoch": 0.48, - "learning_rate": 1.0433815350389322e-05, - "loss": 0.0838, + "epoch": 0.24, + "learning_rate": 1.5214245965498054e-05, + "loss": 0.1186, "step": 430 }, { - "epoch": 0.48, - "learning_rate": 1.0411568409343715e-05, - "loss": 0.1558, + "epoch": 0.24, + "learning_rate": 1.52031163049527e-05, + "loss": 0.1389, "step": 431 }, { - "epoch": 0.48, - "learning_rate": 1.0389321468298111e-05, - "loss": 0.1595, + "epoch": 0.24, + "learning_rate": 1.5191986644407346e-05, + "loss": 0.1182, "step": 432 }, { - "epoch": 0.48, - "learning_rate": 1.0367074527252504e-05, - "loss": 0.1729, + "epoch": 0.24, + "learning_rate": 1.5180856983861994e-05, + "loss": 0.1647, "step": 433 }, { - "epoch": 0.48, - "learning_rate": 1.0344827586206898e-05, - "loss": 0.0923, + "epoch": 0.24, + "learning_rate": 1.516972732331664e-05, + "loss": 0.1904, "step": 434 }, { - "epoch": 0.48, - "learning_rate": 1.0322580645161291e-05, - "loss": 0.1395, + "epoch": 0.24, + "learning_rate": 1.5158597662771286e-05, + "loss": 0.1151, "step": 435 }, { - "epoch": 0.48, - "learning_rate": 1.0300333704115686e-05, - "loss": 0.1475, + "epoch": 0.24, + "learning_rate": 1.5147468002225934e-05, + "loss": 0.2124, "step": 436 }, { - "epoch": 0.49, - "learning_rate": 1.0278086763070079e-05, - "loss": 0.104, + "epoch": 0.24, + "learning_rate": 1.513633834168058e-05, + "loss": 0.1143, "step": 437 }, { - "epoch": 0.49, - "learning_rate": 1.0255839822024472e-05, - "loss": 0.0751, + "epoch": 0.24, + "learning_rate": 1.5125208681135226e-05, + "loss": 0.0935, "step": 438 }, { - "epoch": 0.49, - "learning_rate": 1.0233592880978866e-05, - "loss": 0.1793, + "epoch": 0.24, + "learning_rate": 1.5114079020589874e-05, + "loss": 0.1883, "step": 439 }, { - "epoch": 0.49, - "learning_rate": 1.0211345939933259e-05, - "loss": 0.1233, + "epoch": 0.24, + "learning_rate": 1.510294936004452e-05, + "loss": 0.1384, "step": 440 }, { - "epoch": 0.49, - "learning_rate": 1.0189098998887654e-05, - "loss": 0.1875, + "epoch": 0.25, + "learning_rate": 1.5091819699499166e-05, + "loss": 0.1428, "step": 441 }, { - "epoch": 0.49, - "learning_rate": 1.0166852057842047e-05, - "loss": 0.0859, + "epoch": 0.25, + "learning_rate": 1.5080690038953814e-05, + "loss": 0.1118, "step": 442 }, { - "epoch": 0.49, - "learning_rate": 1.0144605116796443e-05, - "loss": 0.136, + "epoch": 0.25, + "learning_rate": 1.5069560378408459e-05, + "loss": 0.1435, "step": 443 }, { - "epoch": 0.49, - "learning_rate": 1.0122358175750836e-05, - "loss": 0.0792, + "epoch": 0.25, + "learning_rate": 1.5058430717863106e-05, + "loss": 0.089, "step": 444 }, { - "epoch": 0.49, - "learning_rate": 1.010011123470523e-05, - "loss": 0.1235, + "epoch": 0.25, + "learning_rate": 1.5047301057317754e-05, + "loss": 0.1391, "step": 445 }, { - "epoch": 0.5, - "learning_rate": 1.0077864293659623e-05, - "loss": 0.1159, + "epoch": 0.25, + "learning_rate": 1.5036171396772398e-05, + "loss": 0.189, "step": 446 }, { - "epoch": 0.5, - "learning_rate": 1.0055617352614016e-05, - "loss": 0.2034, + "epoch": 0.25, + "learning_rate": 1.5025041736227046e-05, + "loss": 0.1688, "step": 447 }, { - "epoch": 0.5, - "learning_rate": 1.003337041156841e-05, - "loss": 0.1197, + "epoch": 0.25, + "learning_rate": 1.5013912075681694e-05, + "loss": 0.0583, "step": 448 }, { - "epoch": 0.5, - "learning_rate": 1.0011123470522804e-05, - "loss": 0.1617, + "epoch": 0.25, + "learning_rate": 1.5002782415136338e-05, + "loss": 0.1426, "step": 449 }, { - "epoch": 0.5, - "learning_rate": 9.988876529477196e-06, - "loss": 0.1081, + "epoch": 0.25, + "learning_rate": 1.4991652754590986e-05, + "loss": 0.1388, "step": 450 }, { - "epoch": 0.5, - "learning_rate": 9.966629588431591e-06, - "loss": 0.1349, + "epoch": 0.25, + "learning_rate": 1.4980523094045634e-05, + "loss": 0.1649, "step": 451 }, { - "epoch": 0.5, - "learning_rate": 9.944382647385986e-06, - "loss": 0.1195, + "epoch": 0.25, + "learning_rate": 1.4969393433500278e-05, + "loss": 0.1987, "step": 452 }, { - "epoch": 0.5, - "learning_rate": 9.922135706340378e-06, - "loss": 0.1047, + "epoch": 0.25, + "learning_rate": 1.4958263772954926e-05, + "loss": 0.1146, "step": 453 }, { - "epoch": 0.51, - "learning_rate": 9.899888765294773e-06, - "loss": 0.1124, + "epoch": 0.25, + "learning_rate": 1.4947134112409574e-05, + "loss": 0.1369, "step": 454 }, { - "epoch": 0.51, - "learning_rate": 9.877641824249166e-06, - "loss": 0.2128, + "epoch": 0.25, + "learning_rate": 1.4936004451864218e-05, + "loss": 0.1647, "step": 455 }, { - "epoch": 0.51, - "learning_rate": 9.85539488320356e-06, - "loss": 0.0932, + "epoch": 0.25, + "learning_rate": 1.4924874791318866e-05, + "loss": 0.1414, "step": 456 }, { - "epoch": 0.51, - "learning_rate": 9.833147942157955e-06, - "loss": 0.174, + "epoch": 0.25, + "learning_rate": 1.4913745130773514e-05, + "loss": 0.1134, "step": 457 }, { - "epoch": 0.51, - "learning_rate": 9.810901001112348e-06, - "loss": 0.0757, + "epoch": 0.25, + "learning_rate": 1.4902615470228158e-05, + "loss": 0.1407, "step": 458 }, { - "epoch": 0.51, - "learning_rate": 9.788654060066741e-06, - "loss": 0.1388, + "epoch": 0.26, + "learning_rate": 1.4891485809682806e-05, + "loss": 0.0867, "step": 459 }, { - "epoch": 0.51, - "learning_rate": 9.766407119021135e-06, - "loss": 0.0941, + "epoch": 0.26, + "learning_rate": 1.4880356149137452e-05, + "loss": 0.2232, "step": 460 }, { - "epoch": 0.51, - "learning_rate": 9.744160177975528e-06, - "loss": 0.0583, + "epoch": 0.26, + "learning_rate": 1.4869226488592098e-05, + "loss": 0.1707, "step": 461 }, { - "epoch": 0.51, - "learning_rate": 9.721913236929923e-06, - "loss": 0.1737, + "epoch": 0.26, + "learning_rate": 1.4858096828046746e-05, + "loss": 0.222, "step": 462 }, { - "epoch": 0.52, - "learning_rate": 9.699666295884318e-06, - "loss": 0.1694, + "epoch": 0.26, + "learning_rate": 1.4846967167501392e-05, + "loss": 0.1671, "step": 463 }, { - "epoch": 0.52, - "learning_rate": 9.67741935483871e-06, - "loss": 0.1221, + "epoch": 0.26, + "learning_rate": 1.4835837506956038e-05, + "loss": 0.1391, "step": 464 }, { - "epoch": 0.52, - "learning_rate": 9.655172413793105e-06, - "loss": 0.1675, + "epoch": 0.26, + "learning_rate": 1.4824707846410686e-05, + "loss": 0.1137, "step": 465 }, { - "epoch": 0.52, - "learning_rate": 9.632925472747498e-06, - "loss": 0.168, + "epoch": 0.26, + "learning_rate": 1.4813578185865332e-05, + "loss": 0.139, "step": 466 }, { - "epoch": 0.52, - "learning_rate": 9.61067853170189e-06, - "loss": 0.1342, + "epoch": 0.26, + "learning_rate": 1.4802448525319978e-05, + "loss": 0.1936, "step": 467 }, { - "epoch": 0.52, - "learning_rate": 9.588431590656285e-06, - "loss": 0.1179, + "epoch": 0.26, + "learning_rate": 1.4791318864774626e-05, + "loss": 0.1112, "step": 468 }, { - "epoch": 0.52, - "learning_rate": 9.56618464961068e-06, - "loss": 0.1226, + "epoch": 0.26, + "learning_rate": 1.4780189204229272e-05, + "loss": 0.1392, "step": 469 }, { - "epoch": 0.52, - "learning_rate": 9.543937708565073e-06, - "loss": 0.1451, + "epoch": 0.26, + "learning_rate": 1.4769059543683918e-05, + "loss": 0.1125, "step": 470 }, { - "epoch": 0.52, - "learning_rate": 9.521690767519467e-06, - "loss": 0.0914, + "epoch": 0.26, + "learning_rate": 1.4757929883138566e-05, + "loss": 0.0883, "step": 471 }, { - "epoch": 0.53, - "learning_rate": 9.49944382647386e-06, - "loss": 0.1138, + "epoch": 0.26, + "learning_rate": 1.4746800222593212e-05, + "loss": 0.1633, "step": 472 }, { - "epoch": 0.53, - "learning_rate": 9.477196885428255e-06, - "loss": 0.2265, + "epoch": 0.26, + "learning_rate": 1.473567056204786e-05, + "loss": 0.1141, "step": 473 }, { - "epoch": 0.53, - "learning_rate": 9.45494994438265e-06, - "loss": 0.1218, + "epoch": 0.26, + "learning_rate": 1.4724540901502506e-05, + "loss": 0.1676, "step": 474 }, { - "epoch": 0.53, - "learning_rate": 9.43270300333704e-06, - "loss": 0.1603, + "epoch": 0.26, + "learning_rate": 1.4713411240957152e-05, + "loss": 0.1378, "step": 475 }, { - "epoch": 0.53, - "learning_rate": 9.410456062291435e-06, - "loss": 0.1883, + "epoch": 0.26, + "learning_rate": 1.47022815804118e-05, + "loss": 0.165, "step": 476 }, { - "epoch": 0.53, - "learning_rate": 9.38820912124583e-06, - "loss": 0.1457, + "epoch": 0.27, + "learning_rate": 1.4691151919866444e-05, + "loss": 0.0841, "step": 477 }, { - "epoch": 0.53, - "learning_rate": 9.365962180200223e-06, - "loss": 0.1743, + "epoch": 0.27, + "learning_rate": 1.4680022259321092e-05, + "loss": 0.2214, "step": 478 }, { - "epoch": 0.53, - "learning_rate": 9.343715239154617e-06, - "loss": 0.181, + "epoch": 0.27, + "learning_rate": 1.466889259877574e-05, + "loss": 0.2975, "step": 479 }, { - "epoch": 0.53, - "learning_rate": 9.32146829810901e-06, - "loss": 0.1338, + "epoch": 0.27, + "learning_rate": 1.4657762938230384e-05, + "loss": 0.1146, "step": 480 }, { - "epoch": 0.54, - "learning_rate": 9.299221357063405e-06, - "loss": 0.1293, + "epoch": 0.27, + "learning_rate": 1.4646633277685032e-05, + "loss": 0.2174, "step": 481 }, { - "epoch": 0.54, - "learning_rate": 9.2769744160178e-06, - "loss": 0.0753, + "epoch": 0.27, + "learning_rate": 1.463550361713968e-05, + "loss": 0.0883, "step": 482 }, { - "epoch": 0.54, - "learning_rate": 9.254727474972192e-06, - "loss": 0.0954, + "epoch": 0.27, + "learning_rate": 1.4624373956594324e-05, + "loss": 0.1438, "step": 483 }, { - "epoch": 0.54, - "learning_rate": 9.232480533926585e-06, - "loss": 0.1116, + "epoch": 0.27, + "learning_rate": 1.4613244296048972e-05, + "loss": 0.1387, "step": 484 }, { - "epoch": 0.54, - "learning_rate": 9.21023359288098e-06, - "loss": 0.163, + "epoch": 0.27, + "learning_rate": 1.460211463550362e-05, + "loss": 0.0864, "step": 485 }, { - "epoch": 0.54, - "learning_rate": 9.187986651835373e-06, - "loss": 0.1193, + "epoch": 0.27, + "learning_rate": 1.4590984974958264e-05, + "loss": 0.089, "step": 486 }, { - "epoch": 0.54, - "learning_rate": 9.165739710789767e-06, - "loss": 0.1505, + "epoch": 0.27, + "learning_rate": 1.4579855314412912e-05, + "loss": 0.0862, "step": 487 }, { - "epoch": 0.54, - "learning_rate": 9.143492769744162e-06, - "loss": 0.2265, + "epoch": 0.27, + "learning_rate": 1.456872565386756e-05, + "loss": 0.1413, "step": 488 }, { - "epoch": 0.54, - "learning_rate": 9.121245828698555e-06, - "loss": 0.0593, + "epoch": 0.27, + "learning_rate": 1.4557595993322204e-05, + "loss": 0.142, "step": 489 }, { - "epoch": 0.55, - "learning_rate": 9.09899888765295e-06, - "loss": 0.1985, + "epoch": 0.27, + "learning_rate": 1.4546466332776852e-05, + "loss": 0.1675, "step": 490 }, { - "epoch": 0.55, - "learning_rate": 9.076751946607342e-06, - "loss": 0.1006, + "epoch": 0.27, + "learning_rate": 1.4535336672231496e-05, + "loss": 0.085, "step": 491 }, { - "epoch": 0.55, - "learning_rate": 9.054505005561735e-06, - "loss": 0.1235, + "epoch": 0.27, + "learning_rate": 1.4524207011686144e-05, + "loss": 0.2425, "step": 492 }, { - "epoch": 0.55, - "learning_rate": 9.03225806451613e-06, - "loss": 0.0804, + "epoch": 0.27, + "learning_rate": 1.4513077351140792e-05, + "loss": 0.2228, "step": 493 }, { - "epoch": 0.55, - "learning_rate": 9.010011123470524e-06, - "loss": 0.146, + "epoch": 0.27, + "learning_rate": 1.4501947690595436e-05, + "loss": 0.0577, "step": 494 }, { - "epoch": 0.55, - "learning_rate": 8.987764182424917e-06, - "loss": 0.0699, + "epoch": 0.28, + "learning_rate": 1.4490818030050084e-05, + "loss": 0.1708, "step": 495 }, { - "epoch": 0.55, - "learning_rate": 8.965517241379312e-06, - "loss": 0.1299, + "epoch": 0.28, + "learning_rate": 1.4479688369504732e-05, + "loss": 0.165, "step": 496 }, { - "epoch": 0.55, - "learning_rate": 8.943270300333705e-06, - "loss": 0.1434, + "epoch": 0.28, + "learning_rate": 1.4468558708959378e-05, + "loss": 0.0857, "step": 497 }, { - "epoch": 0.55, - "learning_rate": 8.921023359288099e-06, - "loss": 0.1083, + "epoch": 0.28, + "learning_rate": 1.4457429048414024e-05, + "loss": 0.1637, "step": 498 }, { - "epoch": 0.56, - "learning_rate": 8.898776418242492e-06, - "loss": 0.1202, + "epoch": 0.28, + "learning_rate": 1.4446299387868672e-05, + "loss": 0.0581, "step": 499 }, { - "epoch": 0.56, - "learning_rate": 8.876529477196885e-06, - "loss": 0.16, + "epoch": 0.28, + "learning_rate": 1.4435169727323318e-05, + "loss": 0.1404, "step": 500 }, { - "epoch": 0.56, - "learning_rate": 8.85428253615128e-06, - "loss": 0.1725, + "epoch": 0.28, + "learning_rate": 1.4424040066777964e-05, + "loss": 0.084, "step": 501 }, { - "epoch": 0.56, - "learning_rate": 8.832035595105674e-06, - "loss": 0.084, + "epoch": 0.28, + "learning_rate": 1.4412910406232612e-05, + "loss": 0.1096, "step": 502 }, { - "epoch": 0.56, - "learning_rate": 8.809788654060067e-06, - "loss": 0.182, + "epoch": 0.28, + "learning_rate": 1.4401780745687258e-05, + "loss": 0.0574, "step": 503 }, { - "epoch": 0.56, - "learning_rate": 8.787541713014462e-06, - "loss": 0.0851, + "epoch": 0.28, + "learning_rate": 1.4390651085141904e-05, + "loss": 0.1419, "step": 504 }, { - "epoch": 0.56, - "learning_rate": 8.765294771968854e-06, - "loss": 0.1367, + "epoch": 0.28, + "learning_rate": 1.4379521424596552e-05, + "loss": 0.1142, "step": 505 }, { - "epoch": 0.56, - "learning_rate": 8.743047830923249e-06, - "loss": 0.1441, + "epoch": 0.28, + "learning_rate": 1.4368391764051198e-05, + "loss": 0.137, "step": 506 }, { - "epoch": 0.56, - "learning_rate": 8.720800889877644e-06, - "loss": 0.1773, + "epoch": 0.28, + "learning_rate": 1.4357262103505844e-05, + "loss": 0.1623, "step": 507 }, { - "epoch": 0.57, - "learning_rate": 8.698553948832036e-06, - "loss": 0.1206, + "epoch": 0.28, + "learning_rate": 1.434613244296049e-05, + "loss": 0.1122, "step": 508 }, { - "epoch": 0.57, - "learning_rate": 8.67630700778643e-06, - "loss": 0.1788, + "epoch": 0.28, + "learning_rate": 1.4335002782415138e-05, + "loss": 0.0552, "step": 509 }, { - "epoch": 0.57, - "learning_rate": 8.654060066740824e-06, - "loss": 0.1122, + "epoch": 0.28, + "learning_rate": 1.4323873121869784e-05, + "loss": 0.2233, "step": 510 }, { - "epoch": 0.57, - "learning_rate": 8.631813125695217e-06, - "loss": 0.1137, + "epoch": 0.28, + "learning_rate": 1.431274346132443e-05, + "loss": 0.1394, "step": 511 }, { - "epoch": 0.57, - "learning_rate": 8.609566184649611e-06, - "loss": 0.109, + "epoch": 0.28, + "learning_rate": 1.4301613800779078e-05, + "loss": 0.1377, "step": 512 }, { - "epoch": 0.57, - "learning_rate": 8.587319243604006e-06, - "loss": 0.1863, + "epoch": 0.29, + "learning_rate": 1.4290484140233725e-05, + "loss": 0.1389, "step": 513 }, { - "epoch": 0.57, - "learning_rate": 8.565072302558399e-06, - "loss": 0.1014, + "epoch": 0.29, + "learning_rate": 1.427935447968837e-05, + "loss": 0.0843, "step": 514 }, { - "epoch": 0.57, - "learning_rate": 8.542825361512793e-06, - "loss": 0.1367, + "epoch": 0.29, + "learning_rate": 1.4268224819143018e-05, + "loss": 0.1072, "step": 515 }, { - "epoch": 0.57, - "learning_rate": 8.520578420467186e-06, - "loss": 0.1078, + "epoch": 0.29, + "learning_rate": 1.4257095158597665e-05, + "loss": 0.143, "step": 516 }, { - "epoch": 0.58, - "learning_rate": 8.49833147942158e-06, - "loss": 0.1548, + "epoch": 0.29, + "learning_rate": 1.424596549805231e-05, + "loss": 0.193, "step": 517 }, { - "epoch": 0.58, - "learning_rate": 8.476084538375974e-06, - "loss": 0.1535, + "epoch": 0.29, + "learning_rate": 1.4234835837506958e-05, + "loss": 0.0558, "step": 518 }, { - "epoch": 0.58, - "learning_rate": 8.453837597330368e-06, - "loss": 0.0947, + "epoch": 0.29, + "learning_rate": 1.4223706176961605e-05, + "loss": 0.2175, "step": 519 }, { - "epoch": 0.58, - "learning_rate": 8.431590656284761e-06, - "loss": 0.1175, + "epoch": 0.29, + "learning_rate": 1.421257651641625e-05, + "loss": 0.1385, "step": 520 }, { - "epoch": 0.58, - "learning_rate": 8.409343715239156e-06, - "loss": 0.1945, + "epoch": 0.29, + "learning_rate": 1.4201446855870898e-05, + "loss": 0.0835, "step": 521 }, { - "epoch": 0.58, - "learning_rate": 8.387096774193549e-06, - "loss": 0.095, + "epoch": 0.29, + "learning_rate": 1.4190317195325545e-05, + "loss": 0.0859, "step": 522 }, { - "epoch": 0.58, - "learning_rate": 8.364849833147943e-06, - "loss": 0.0834, + "epoch": 0.29, + "learning_rate": 1.417918753478019e-05, + "loss": 0.1656, "step": 523 }, { - "epoch": 0.58, - "learning_rate": 8.342602892102336e-06, - "loss": 0.1149, + "epoch": 0.29, + "learning_rate": 1.4168057874234838e-05, + "loss": 0.0831, "step": 524 }, { - "epoch": 0.58, - "learning_rate": 8.32035595105673e-06, - "loss": 0.0997, + "epoch": 0.29, + "learning_rate": 1.4156928213689482e-05, + "loss": 0.1893, "step": 525 }, { - "epoch": 0.59, - "learning_rate": 8.298109010011124e-06, - "loss": 0.1504, + "epoch": 0.29, + "learning_rate": 1.414579855314413e-05, + "loss": 0.0832, "step": 526 }, { - "epoch": 0.59, - "learning_rate": 8.275862068965518e-06, - "loss": 0.1913, + "epoch": 0.29, + "learning_rate": 1.4134668892598778e-05, + "loss": 0.1694, "step": 527 }, { - "epoch": 0.59, - "learning_rate": 8.253615127919911e-06, - "loss": 0.0994, + "epoch": 0.29, + "learning_rate": 1.4123539232053422e-05, + "loss": 0.1336, "step": 528 }, { - "epoch": 0.59, - "learning_rate": 8.231368186874306e-06, - "loss": 0.0657, + "epoch": 0.29, + "learning_rate": 1.411240957150807e-05, + "loss": 0.1968, "step": 529 }, { - "epoch": 0.59, - "learning_rate": 8.209121245828699e-06, - "loss": 0.1146, + "epoch": 0.29, + "learning_rate": 1.4101279910962717e-05, + "loss": 0.1964, "step": 530 }, { - "epoch": 0.59, - "learning_rate": 8.186874304783093e-06, - "loss": 0.1536, + "epoch": 0.3, + "learning_rate": 1.4090150250417362e-05, + "loss": 0.1676, "step": 531 }, { - "epoch": 0.59, - "learning_rate": 8.164627363737486e-06, - "loss": 0.1366, + "epoch": 0.3, + "learning_rate": 1.407902058987201e-05, + "loss": 0.3213, "step": 532 }, { - "epoch": 0.59, - "learning_rate": 8.14238042269188e-06, - "loss": 0.1267, + "epoch": 0.3, + "learning_rate": 1.4067890929326657e-05, + "loss": 0.1679, "step": 533 }, { - "epoch": 0.59, - "learning_rate": 8.120133481646274e-06, - "loss": 0.1135, + "epoch": 0.3, + "learning_rate": 1.4056761268781302e-05, + "loss": 0.1139, "step": 534 }, { - "epoch": 0.6, - "learning_rate": 8.097886540600668e-06, - "loss": 0.0926, + "epoch": 0.3, + "learning_rate": 1.404563160823595e-05, + "loss": 0.0885, "step": 535 }, { - "epoch": 0.6, - "learning_rate": 8.075639599555061e-06, - "loss": 0.1566, + "epoch": 0.3, + "learning_rate": 1.4034501947690597e-05, + "loss": 0.141, "step": 536 }, { - "epoch": 0.6, - "learning_rate": 8.053392658509456e-06, - "loss": 0.124, + "epoch": 0.3, + "learning_rate": 1.4023372287145244e-05, + "loss": 0.0893, "step": 537 }, { - "epoch": 0.6, - "learning_rate": 8.03114571746385e-06, - "loss": 0.1865, + "epoch": 0.3, + "learning_rate": 1.401224262659989e-05, + "loss": 0.1421, "step": 538 }, { - "epoch": 0.6, - "learning_rate": 8.008898776418243e-06, - "loss": 0.1176, + "epoch": 0.3, + "learning_rate": 1.4001112966054537e-05, + "loss": 0.1145, "step": 539 }, { - "epoch": 0.6, - "learning_rate": 7.986651835372638e-06, - "loss": 0.1236, + "epoch": 0.3, + "learning_rate": 1.3989983305509183e-05, + "loss": 0.0645, "step": 540 }, { - "epoch": 0.6, - "learning_rate": 7.96440489432703e-06, - "loss": 0.142, + "epoch": 0.3, + "learning_rate": 1.397885364496383e-05, + "loss": 0.0879, "step": 541 }, { - "epoch": 0.6, - "learning_rate": 7.942157953281424e-06, - "loss": 0.1503, + "epoch": 0.3, + "learning_rate": 1.3967723984418476e-05, + "loss": 0.1646, "step": 542 }, { - "epoch": 0.6, - "learning_rate": 7.919911012235818e-06, - "loss": 0.1745, + "epoch": 0.3, + "learning_rate": 1.3956594323873123e-05, + "loss": 0.1638, "step": 543 }, { - "epoch": 0.61, - "learning_rate": 7.897664071190213e-06, - "loss": 0.1531, + "epoch": 0.3, + "learning_rate": 1.394546466332777e-05, + "loss": 0.1935, "step": 544 }, { - "epoch": 0.61, - "learning_rate": 7.875417130144606e-06, - "loss": 0.1299, + "epoch": 0.3, + "learning_rate": 1.3934335002782416e-05, + "loss": 0.1365, "step": 545 }, { - "epoch": 0.61, - "learning_rate": 7.853170189099e-06, - "loss": 0.1245, + "epoch": 0.3, + "learning_rate": 1.3923205342237063e-05, + "loss": 0.141, "step": 546 }, { - "epoch": 0.61, - "learning_rate": 7.830923248053393e-06, - "loss": 0.1316, + "epoch": 0.3, + "learning_rate": 1.391207568169171e-05, + "loss": 0.0592, "step": 547 }, { - "epoch": 0.61, - "learning_rate": 7.808676307007788e-06, - "loss": 0.123, + "epoch": 0.3, + "learning_rate": 1.3900946021146356e-05, + "loss": 0.1377, "step": 548 }, { - "epoch": 0.61, - "learning_rate": 7.78642936596218e-06, - "loss": 0.0601, + "epoch": 0.31, + "learning_rate": 1.3889816360601003e-05, + "loss": 0.1374, "step": 549 }, { - "epoch": 0.61, - "learning_rate": 7.764182424916575e-06, - "loss": 0.2135, + "epoch": 0.31, + "learning_rate": 1.387868670005565e-05, + "loss": 0.141, "step": 550 }, { - "epoch": 0.61, - "learning_rate": 7.741935483870968e-06, - "loss": 0.0962, + "epoch": 0.31, + "learning_rate": 1.3867557039510296e-05, + "loss": 0.1395, "step": 551 }, { - "epoch": 0.61, - "learning_rate": 7.719688542825363e-06, - "loss": 0.0931, + "epoch": 0.31, + "learning_rate": 1.3856427378964943e-05, + "loss": 0.084, "step": 552 }, { - "epoch": 0.62, - "learning_rate": 7.697441601779755e-06, - "loss": 0.1949, + "epoch": 0.31, + "learning_rate": 1.384529771841959e-05, + "loss": 0.1417, "step": 553 }, { - "epoch": 0.62, - "learning_rate": 7.67519466073415e-06, - "loss": 0.1218, + "epoch": 0.31, + "learning_rate": 1.3834168057874236e-05, + "loss": 0.0845, "step": 554 }, { - "epoch": 0.62, - "learning_rate": 7.652947719688543e-06, - "loss": 0.1151, + "epoch": 0.31, + "learning_rate": 1.3823038397328883e-05, + "loss": 0.113, "step": 555 }, { - "epoch": 0.62, - "learning_rate": 7.630700778642938e-06, - "loss": 0.1216, + "epoch": 0.31, + "learning_rate": 1.3811908736783531e-05, + "loss": 0.1141, "step": 556 }, { - "epoch": 0.62, - "learning_rate": 7.60845383759733e-06, - "loss": 0.0899, + "epoch": 0.31, + "learning_rate": 1.3800779076238175e-05, + "loss": 0.1399, "step": 557 }, { - "epoch": 0.62, - "learning_rate": 7.586206896551724e-06, - "loss": 0.1765, + "epoch": 0.31, + "learning_rate": 1.3789649415692823e-05, + "loss": 0.1375, "step": 558 }, { - "epoch": 0.62, - "learning_rate": 7.563959955506118e-06, - "loss": 0.117, + "epoch": 0.31, + "learning_rate": 1.3778519755147468e-05, + "loss": 0.2784, "step": 559 }, { - "epoch": 0.62, - "learning_rate": 7.5417130144605125e-06, - "loss": 0.1227, + "epoch": 0.31, + "learning_rate": 1.3767390094602115e-05, + "loss": 0.0856, "step": 560 }, { - "epoch": 0.62, - "learning_rate": 7.519466073414906e-06, - "loss": 0.1312, + "epoch": 0.31, + "learning_rate": 1.3756260434056763e-05, + "loss": 0.1094, "step": 561 }, { - "epoch": 0.63, - "learning_rate": 7.4972191323693e-06, - "loss": 0.1302, + "epoch": 0.31, + "learning_rate": 1.3745130773511408e-05, + "loss": 0.2491, "step": 562 }, { - "epoch": 0.63, - "learning_rate": 7.474972191323694e-06, - "loss": 0.0877, + "epoch": 0.31, + "learning_rate": 1.3734001112966055e-05, + "loss": 0.0838, "step": 563 }, { - "epoch": 0.63, - "learning_rate": 7.452725250278087e-06, - "loss": 0.1555, + "epoch": 0.31, + "learning_rate": 1.3722871452420703e-05, + "loss": 0.1667, "step": 564 }, { - "epoch": 0.63, - "learning_rate": 7.43047830923248e-06, - "loss": 0.1694, + "epoch": 0.31, + "learning_rate": 1.3711741791875348e-05, + "loss": 0.1652, "step": 565 }, { - "epoch": 0.63, - "learning_rate": 7.408231368186875e-06, - "loss": 0.1208, + "epoch": 0.31, + "learning_rate": 1.3700612131329995e-05, + "loss": 0.1934, "step": 566 }, { - "epoch": 0.63, - "learning_rate": 7.385984427141269e-06, - "loss": 0.158, + "epoch": 0.32, + "learning_rate": 1.3689482470784643e-05, + "loss": 0.0849, "step": 567 }, { - "epoch": 0.63, - "learning_rate": 7.363737486095662e-06, - "loss": 0.1357, + "epoch": 0.32, + "learning_rate": 1.3678352810239288e-05, + "loss": 0.1409, "step": 568 }, { - "epoch": 0.63, - "learning_rate": 7.341490545050056e-06, - "loss": 0.1652, + "epoch": 0.32, + "learning_rate": 1.3667223149693935e-05, + "loss": 0.1639, "step": 569 }, { - "epoch": 0.63, - "learning_rate": 7.31924360400445e-06, - "loss": 0.0939, + "epoch": 0.32, + "learning_rate": 1.3656093489148583e-05, + "loss": 0.1125, "step": 570 }, { - "epoch": 0.64, - "learning_rate": 7.296996662958844e-06, - "loss": 0.1393, + "epoch": 0.32, + "learning_rate": 1.3644963828603228e-05, + "loss": 0.1103, "step": 571 }, { - "epoch": 0.64, - "learning_rate": 7.274749721913238e-06, - "loss": 0.1975, + "epoch": 0.32, + "learning_rate": 1.3633834168057875e-05, + "loss": 0.2242, "step": 572 }, { - "epoch": 0.64, - "learning_rate": 7.252502780867632e-06, - "loss": 0.113, + "epoch": 0.32, + "learning_rate": 1.3622704507512523e-05, + "loss": 0.0885, "step": 573 }, { - "epoch": 0.64, - "learning_rate": 7.230255839822025e-06, - "loss": 0.1865, + "epoch": 0.32, + "learning_rate": 1.3611574846967167e-05, + "loss": 0.087, "step": 574 }, { - "epoch": 0.64, - "learning_rate": 7.2080088987764185e-06, - "loss": 0.1172, + "epoch": 0.32, + "learning_rate": 1.3600445186421815e-05, + "loss": 0.0336, "step": 575 }, { - "epoch": 0.64, - "learning_rate": 7.185761957730812e-06, - "loss": 0.1493, + "epoch": 0.32, + "learning_rate": 1.3589315525876461e-05, + "loss": 0.1635, "step": 576 }, { - "epoch": 0.64, - "learning_rate": 7.163515016685206e-06, - "loss": 0.0838, + "epoch": 0.32, + "learning_rate": 1.3578185865331107e-05, + "loss": 0.139, "step": 577 }, { - "epoch": 0.64, - "learning_rate": 7.1412680756396006e-06, - "loss": 0.1406, + "epoch": 0.32, + "learning_rate": 1.3567056204785755e-05, + "loss": 0.1134, "step": 578 }, { - "epoch": 0.64, - "learning_rate": 7.119021134593994e-06, - "loss": 0.0625, + "epoch": 0.32, + "learning_rate": 1.3555926544240401e-05, + "loss": 0.1409, "step": 579 }, { - "epoch": 0.65, - "learning_rate": 7.096774193548388e-06, - "loss": 0.0948, + "epoch": 0.32, + "learning_rate": 1.3544796883695049e-05, + "loss": 0.2204, "step": 580 }, { - "epoch": 0.65, - "learning_rate": 7.074527252502782e-06, - "loss": 0.1107, + "epoch": 0.32, + "learning_rate": 1.3533667223149695e-05, + "loss": 0.2198, "step": 581 }, { - "epoch": 0.65, - "learning_rate": 7.052280311457175e-06, - "loss": 0.1588, + "epoch": 0.32, + "learning_rate": 1.3522537562604341e-05, + "loss": 0.0863, "step": 582 }, { - "epoch": 0.65, - "learning_rate": 7.030033370411568e-06, - "loss": 0.0948, + "epoch": 0.32, + "learning_rate": 1.3511407902058989e-05, + "loss": 0.1117, "step": 583 }, { - "epoch": 0.65, - "learning_rate": 7.007786429365962e-06, - "loss": 0.1782, + "epoch": 0.32, + "learning_rate": 1.3500278241513635e-05, + "loss": 0.1355, "step": 584 }, { - "epoch": 0.65, - "learning_rate": 6.985539488320357e-06, - "loss": 0.1876, + "epoch": 0.33, + "learning_rate": 1.3489148580968281e-05, + "loss": 0.0308, "step": 585 }, { - "epoch": 0.65, - "learning_rate": 6.9632925472747504e-06, - "loss": 0.1564, + "epoch": 0.33, + "learning_rate": 1.3478018920422929e-05, + "loss": 0.1677, "step": 586 }, { - "epoch": 0.65, - "learning_rate": 6.941045606229144e-06, - "loss": 0.1393, + "epoch": 0.33, + "learning_rate": 1.3466889259877575e-05, + "loss": 0.058, "step": 587 }, { - "epoch": 0.65, - "learning_rate": 6.918798665183538e-06, - "loss": 0.0944, + "epoch": 0.33, + "learning_rate": 1.3455759599332221e-05, + "loss": 0.1105, "step": 588 }, { - "epoch": 0.66, - "learning_rate": 6.896551724137932e-06, - "loss": 0.1289, + "epoch": 0.33, + "learning_rate": 1.3444629938786869e-05, + "loss": 0.1391, "step": 589 }, { - "epoch": 0.66, - "learning_rate": 6.8743047830923245e-06, - "loss": 0.0833, + "epoch": 0.33, + "learning_rate": 1.3433500278241515e-05, + "loss": 0.2747, "step": 590 }, { - "epoch": 0.66, - "learning_rate": 6.852057842046719e-06, - "loss": 0.1976, + "epoch": 0.33, + "learning_rate": 1.3422370617696161e-05, + "loss": 0.1977, "step": 591 }, { - "epoch": 0.66, - "learning_rate": 6.829810901001113e-06, - "loss": 0.0764, + "epoch": 0.33, + "learning_rate": 1.3411240957150809e-05, + "loss": 0.114, "step": 592 }, { - "epoch": 0.66, - "learning_rate": 6.807563959955507e-06, - "loss": 0.1371, + "epoch": 0.33, + "learning_rate": 1.3400111296605453e-05, + "loss": 0.1663, "step": 593 }, { - "epoch": 0.66, - "learning_rate": 6.7853170189099e-06, - "loss": 0.1103, + "epoch": 0.33, + "learning_rate": 1.3388981636060101e-05, + "loss": 0.0842, "step": 594 }, { - "epoch": 0.66, - "learning_rate": 6.763070077864294e-06, - "loss": 0.1295, + "epoch": 0.33, + "learning_rate": 1.3377851975514749e-05, + "loss": 0.1149, "step": 595 }, { - "epoch": 0.66, - "learning_rate": 6.740823136818689e-06, - "loss": 0.1438, + "epoch": 0.33, + "learning_rate": 1.3366722314969393e-05, + "loss": 0.1123, "step": 596 }, { - "epoch": 0.66, - "learning_rate": 6.718576195773082e-06, - "loss": 0.1196, + "epoch": 0.33, + "learning_rate": 1.3355592654424041e-05, + "loss": 0.1978, "step": 597 }, { - "epoch": 0.67, - "learning_rate": 6.696329254727475e-06, - "loss": 0.1536, + "epoch": 0.33, + "learning_rate": 1.3344462993878689e-05, + "loss": 0.0836, "step": 598 }, { - "epoch": 0.67, - "learning_rate": 6.674082313681869e-06, - "loss": 0.1171, + "epoch": 0.33, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.0862, "step": 599 }, { - "epoch": 0.67, - "learning_rate": 6.651835372636263e-06, - "loss": 0.0995, + "epoch": 0.33, + "learning_rate": 1.3322203672787981e-05, + "loss": 0.1144, "step": 600 }, { - "epoch": 0.67, - "learning_rate": 6.6295884315906565e-06, - "loss": 0.128, + "epoch": 0.33, + "learning_rate": 1.3311074012242629e-05, + "loss": 0.2512, "step": 601 }, { - "epoch": 0.67, - "learning_rate": 6.60734149054505e-06, - "loss": 0.1729, + "epoch": 0.34, + "learning_rate": 1.3299944351697273e-05, + "loss": 0.1697, "step": 602 }, { - "epoch": 0.67, - "learning_rate": 6.585094549499445e-06, - "loss": 0.1814, + "epoch": 0.34, + "learning_rate": 1.3288814691151921e-05, + "loss": 0.1628, "step": 603 }, { - "epoch": 0.67, - "learning_rate": 6.5628476084538385e-06, - "loss": 0.1303, + "epoch": 0.34, + "learning_rate": 1.3277685030606569e-05, + "loss": 0.0828, "step": 604 }, { - "epoch": 0.67, - "learning_rate": 6.540600667408232e-06, - "loss": 0.1543, + "epoch": 0.34, + "learning_rate": 1.3266555370061213e-05, + "loss": 0.2441, "step": 605 }, { - "epoch": 0.67, - "learning_rate": 6.518353726362626e-06, - "loss": 0.139, + "epoch": 0.34, + "learning_rate": 1.3255425709515861e-05, + "loss": 0.1126, "step": 606 }, { - "epoch": 0.68, - "learning_rate": 6.496106785317019e-06, - "loss": 0.0994, + "epoch": 0.34, + "learning_rate": 1.3244296048970509e-05, + "loss": 0.0843, "step": 607 }, { - "epoch": 0.68, - "learning_rate": 6.473859844271413e-06, - "loss": 0.1305, + "epoch": 0.34, + "learning_rate": 1.3233166388425153e-05, + "loss": 0.1385, "step": 608 }, { - "epoch": 0.68, - "learning_rate": 6.451612903225806e-06, - "loss": 0.1032, + "epoch": 0.34, + "learning_rate": 1.3222036727879801e-05, + "loss": 0.0557, "step": 609 }, { - "epoch": 0.68, - "learning_rate": 6.429365962180201e-06, - "loss": 0.1587, + "epoch": 0.34, + "learning_rate": 1.3210907067334447e-05, + "loss": 0.1664, "step": 610 }, { - "epoch": 0.68, - "learning_rate": 6.407119021134595e-06, - "loss": 0.0986, + "epoch": 0.34, + "learning_rate": 1.3199777406789093e-05, + "loss": 0.1388, "step": 611 }, { - "epoch": 0.68, - "learning_rate": 6.3848720800889884e-06, - "loss": 0.1344, + "epoch": 0.34, + "learning_rate": 1.3188647746243741e-05, + "loss": 0.1371, "step": 612 }, { - "epoch": 0.68, - "learning_rate": 6.362625139043382e-06, - "loss": 0.1832, + "epoch": 0.34, + "learning_rate": 1.3177518085698387e-05, + "loss": 0.1666, "step": 613 }, { - "epoch": 0.68, - "learning_rate": 6.340378197997776e-06, - "loss": 0.081, + "epoch": 0.34, + "learning_rate": 1.3166388425153033e-05, + "loss": 0.1959, "step": 614 }, { - "epoch": 0.68, - "learning_rate": 6.318131256952169e-06, - "loss": 0.1609, + "epoch": 0.34, + "learning_rate": 1.3155258764607681e-05, + "loss": 0.1155, "step": 615 }, { - "epoch": 0.69, - "learning_rate": 6.295884315906563e-06, - "loss": 0.1742, + "epoch": 0.34, + "learning_rate": 1.3144129104062327e-05, + "loss": 0.1886, "step": 616 }, { - "epoch": 0.69, - "learning_rate": 6.273637374860957e-06, - "loss": 0.1004, + "epoch": 0.34, + "learning_rate": 1.3132999443516973e-05, + "loss": 0.1129, "step": 617 }, { - "epoch": 0.69, - "learning_rate": 6.251390433815351e-06, - "loss": 0.1308, + "epoch": 0.34, + "learning_rate": 1.3121869782971621e-05, + "loss": 0.1699, "step": 618 }, { - "epoch": 0.69, - "learning_rate": 6.229143492769745e-06, - "loss": 0.1363, + "epoch": 0.34, + "learning_rate": 1.3110740122426267e-05, + "loss": 0.1147, "step": 619 }, { - "epoch": 0.69, - "learning_rate": 6.206896551724138e-06, - "loss": 0.1272, + "epoch": 0.35, + "learning_rate": 1.3099610461880915e-05, + "loss": 0.1417, "step": 620 }, { - "epoch": 0.69, - "learning_rate": 6.184649610678533e-06, - "loss": 0.1451, + "epoch": 0.35, + "learning_rate": 1.308848080133556e-05, + "loss": 0.1399, "step": 621 }, { - "epoch": 0.69, - "learning_rate": 6.162402669632927e-06, - "loss": 0.1717, + "epoch": 0.35, + "learning_rate": 1.3077351140790207e-05, + "loss": 0.0841, "step": 622 }, { - "epoch": 0.69, - "learning_rate": 6.1401557285873195e-06, - "loss": 0.0978, + "epoch": 0.35, + "learning_rate": 1.3066221480244855e-05, + "loss": 0.2192, "step": 623 }, { - "epoch": 0.69, - "learning_rate": 6.117908787541713e-06, - "loss": 0.0851, + "epoch": 0.35, + "learning_rate": 1.3055091819699499e-05, + "loss": 0.1657, "step": 624 }, { - "epoch": 0.7, - "learning_rate": 6.095661846496107e-06, - "loss": 0.1123, + "epoch": 0.35, + "learning_rate": 1.3043962159154147e-05, + "loss": 0.1138, "step": 625 }, { - "epoch": 0.7, - "learning_rate": 6.073414905450501e-06, - "loss": 0.1026, + "epoch": 0.35, + "learning_rate": 1.3032832498608795e-05, + "loss": 0.0878, "step": 626 }, { - "epoch": 0.7, - "learning_rate": 6.0511679644048945e-06, - "loss": 0.1699, + "epoch": 0.35, + "learning_rate": 1.3021702838063439e-05, + "loss": 0.1111, "step": 627 }, { - "epoch": 0.7, - "learning_rate": 6.028921023359289e-06, - "loss": 0.1389, + "epoch": 0.35, + "learning_rate": 1.3010573177518087e-05, + "loss": 0.2216, "step": 628 }, { - "epoch": 0.7, - "learning_rate": 6.006674082313683e-06, - "loss": 0.1814, + "epoch": 0.35, + "learning_rate": 1.2999443516972735e-05, + "loss": 0.0859, "step": 629 }, { - "epoch": 0.7, - "learning_rate": 5.9844271412680765e-06, - "loss": 0.0873, + "epoch": 0.35, + "learning_rate": 1.2988313856427379e-05, + "loss": 0.1156, "step": 630 }, { - "epoch": 0.7, - "learning_rate": 5.962180200222469e-06, - "loss": 0.1457, + "epoch": 0.35, + "learning_rate": 1.2977184195882027e-05, + "loss": 0.1142, "step": 631 }, { - "epoch": 0.7, - "learning_rate": 5.939933259176863e-06, - "loss": 0.1748, + "epoch": 0.35, + "learning_rate": 1.2966054535336675e-05, + "loss": 0.1962, "step": 632 }, { - "epoch": 0.7, - "learning_rate": 5.917686318131257e-06, - "loss": 0.0879, + "epoch": 0.35, + "learning_rate": 1.2954924874791319e-05, + "loss": 0.1369, "step": 633 }, { - "epoch": 0.71, - "learning_rate": 5.8954393770856515e-06, - "loss": 0.1342, + "epoch": 0.35, + "learning_rate": 1.2943795214245967e-05, + "loss": 0.2185, "step": 634 }, { - "epoch": 0.71, - "learning_rate": 5.873192436040045e-06, - "loss": 0.1325, + "epoch": 0.35, + "learning_rate": 1.2932665553700615e-05, + "loss": 0.1959, "step": 635 }, { - "epoch": 0.71, - "learning_rate": 5.850945494994439e-06, - "loss": 0.1546, + "epoch": 0.35, + "learning_rate": 1.2921535893155259e-05, + "loss": 0.1109, "step": 636 }, { - "epoch": 0.71, - "learning_rate": 5.828698553948833e-06, - "loss": 0.0895, + "epoch": 0.35, + "learning_rate": 1.2910406232609907e-05, + "loss": 0.1371, "step": 637 }, { - "epoch": 0.71, - "learning_rate": 5.806451612903226e-06, - "loss": 0.1967, + "epoch": 0.36, + "learning_rate": 1.2899276572064555e-05, + "loss": 0.0865, "step": 638 }, { - "epoch": 0.71, - "learning_rate": 5.784204671857621e-06, - "loss": 0.1346, - "step": 639 + "epoch": 0.36, + "learning_rate": 1.2888146911519199e-05, + "loss": 0.084, + "step": 639 }, { - "epoch": 0.71, - "learning_rate": 5.761957730812013e-06, - "loss": 0.1509, + "epoch": 0.36, + "learning_rate": 1.2877017250973847e-05, + "loss": 0.1941, "step": 640 }, { - "epoch": 0.71, - "learning_rate": 5.739710789766408e-06, - "loss": 0.0653, + "epoch": 0.36, + "learning_rate": 1.2865887590428491e-05, + "loss": 0.1132, "step": 641 }, { - "epoch": 0.71, - "learning_rate": 5.717463848720801e-06, - "loss": 0.1007, + "epoch": 0.36, + "learning_rate": 1.2854757929883139e-05, + "loss": 0.0608, "step": 642 }, { - "epoch": 0.72, - "learning_rate": 5.695216907675195e-06, - "loss": 0.1161, + "epoch": 0.36, + "learning_rate": 1.2843628269337787e-05, + "loss": 0.142, "step": 643 }, { - "epoch": 0.72, - "learning_rate": 5.672969966629589e-06, - "loss": 0.1221, + "epoch": 0.36, + "learning_rate": 1.2832498608792431e-05, + "loss": 0.2204, "step": 644 }, { - "epoch": 0.72, - "learning_rate": 5.6507230255839826e-06, - "loss": 0.1192, + "epoch": 0.36, + "learning_rate": 1.2821368948247079e-05, + "loss": 0.1125, "step": 645 }, { - "epoch": 0.72, - "learning_rate": 5.628476084538377e-06, - "loss": 0.1577, + "epoch": 0.36, + "learning_rate": 1.2810239287701727e-05, + "loss": 0.1383, "step": 646 }, { - "epoch": 0.72, - "learning_rate": 5.606229143492771e-06, - "loss": 0.121, + "epoch": 0.36, + "learning_rate": 1.2799109627156373e-05, + "loss": 0.115, "step": 647 }, { - "epoch": 0.72, - "learning_rate": 5.583982202447164e-06, - "loss": 0.1169, + "epoch": 0.36, + "learning_rate": 1.2787979966611019e-05, + "loss": 0.0871, "step": 648 }, { - "epoch": 0.72, - "learning_rate": 5.5617352614015575e-06, - "loss": 0.1256, + "epoch": 0.36, + "learning_rate": 1.2776850306065667e-05, + "loss": 0.1376, "step": 649 }, { - "epoch": 0.72, - "learning_rate": 5.539488320355951e-06, - "loss": 0.1681, + "epoch": 0.36, + "learning_rate": 1.2765720645520313e-05, + "loss": 0.0842, "step": 650 }, { - "epoch": 0.72, - "learning_rate": 5.517241379310345e-06, - "loss": 0.051, + "epoch": 0.36, + "learning_rate": 1.2754590984974959e-05, + "loss": 0.082, "step": 651 }, { - "epoch": 0.73, - "learning_rate": 5.494994438264739e-06, - "loss": 0.0876, + "epoch": 0.36, + "learning_rate": 1.2743461324429607e-05, + "loss": 0.2781, "step": 652 }, { - "epoch": 0.73, - "learning_rate": 5.472747497219133e-06, - "loss": 0.0998, + "epoch": 0.36, + "learning_rate": 1.2732331663884253e-05, + "loss": 0.2215, "step": 653 }, { - "epoch": 0.73, - "learning_rate": 5.450500556173527e-06, - "loss": 0.1544, + "epoch": 0.36, + "learning_rate": 1.2721202003338899e-05, + "loss": 0.1384, "step": 654 }, { - "epoch": 0.73, - "learning_rate": 5.428253615127921e-06, - "loss": 0.1637, + "epoch": 0.36, + "learning_rate": 1.2710072342793547e-05, + "loss": 0.1398, "step": 655 }, { - "epoch": 0.73, - "learning_rate": 5.406006674082314e-06, - "loss": 0.1538, + "epoch": 0.37, + "learning_rate": 1.2698942682248193e-05, + "loss": 0.0861, "step": 656 }, { - "epoch": 0.73, - "learning_rate": 5.383759733036707e-06, - "loss": 0.157, + "epoch": 0.37, + "learning_rate": 1.2687813021702839e-05, + "loss": 0.0839, "step": 657 }, { - "epoch": 0.73, - "learning_rate": 5.361512791991101e-06, - "loss": 0.081, + "epoch": 0.37, + "learning_rate": 1.2676683361157485e-05, + "loss": 0.1958, "step": 658 }, { - "epoch": 0.73, - "learning_rate": 5.339265850945496e-06, - "loss": 0.1512, + "epoch": 0.37, + "learning_rate": 1.2665553700612133e-05, + "loss": 0.0848, "step": 659 }, { - "epoch": 0.73, - "learning_rate": 5.3170189098998895e-06, - "loss": 0.1404, + "epoch": 0.37, + "learning_rate": 1.2654424040066779e-05, + "loss": 0.1379, "step": 660 }, { - "epoch": 0.74, - "learning_rate": 5.294771968854283e-06, - "loss": 0.1109, + "epoch": 0.37, + "learning_rate": 1.2643294379521425e-05, + "loss": 0.0854, "step": 661 }, { - "epoch": 0.74, - "learning_rate": 5.272525027808677e-06, - "loss": 0.1473, + "epoch": 0.37, + "learning_rate": 1.2632164718976073e-05, + "loss": 0.1893, "step": 662 }, { - "epoch": 0.74, - "learning_rate": 5.250278086763071e-06, - "loss": 0.0665, + "epoch": 0.37, + "learning_rate": 1.262103505843072e-05, + "loss": 0.223, "step": 663 }, { - "epoch": 0.74, - "learning_rate": 5.2280311457174636e-06, - "loss": 0.0949, + "epoch": 0.37, + "learning_rate": 1.2609905397885365e-05, + "loss": 0.0313, "step": 664 }, { - "epoch": 0.74, - "learning_rate": 5.205784204671857e-06, - "loss": 0.1562, + "epoch": 0.37, + "learning_rate": 1.2598775737340013e-05, + "loss": 0.0892, "step": 665 }, { - "epoch": 0.74, - "learning_rate": 5.183537263626252e-06, - "loss": 0.1436, + "epoch": 0.37, + "learning_rate": 1.258764607679466e-05, + "loss": 0.0848, "step": 666 }, { - "epoch": 0.74, - "learning_rate": 5.161290322580646e-06, - "loss": 0.1641, + "epoch": 0.37, + "learning_rate": 1.2576516416249305e-05, + "loss": 0.2189, "step": 667 }, { - "epoch": 0.74, - "learning_rate": 5.139043381535039e-06, - "loss": 0.1152, + "epoch": 0.37, + "learning_rate": 1.2565386755703952e-05, + "loss": 0.1673, "step": 668 }, { - "epoch": 0.74, - "learning_rate": 5.116796440489433e-06, - "loss": 0.1611, + "epoch": 0.37, + "learning_rate": 1.25542570951586e-05, + "loss": 0.0549, "step": 669 }, { - "epoch": 0.75, - "learning_rate": 5.094549499443827e-06, - "loss": 0.1225, + "epoch": 0.37, + "learning_rate": 1.2543127434613245e-05, + "loss": 0.164, "step": 670 }, { - "epoch": 0.75, - "learning_rate": 5.072302558398221e-06, - "loss": 0.2414, + "epoch": 0.37, + "learning_rate": 1.2531997774067892e-05, + "loss": 0.1434, "step": 671 }, { - "epoch": 0.75, - "learning_rate": 5.050055617352615e-06, - "loss": 0.1613, + "epoch": 0.37, + "learning_rate": 1.252086811352254e-05, + "loss": 0.1935, "step": 672 }, { - "epoch": 0.75, - "learning_rate": 5.027808676307008e-06, - "loss": 0.134, + "epoch": 0.37, + "learning_rate": 1.2509738452977185e-05, + "loss": 0.1137, "step": 673 }, { - "epoch": 0.75, - "learning_rate": 5.005561735261402e-06, - "loss": 0.0747, + "epoch": 0.38, + "learning_rate": 1.2498608792431832e-05, + "loss": 0.1658, "step": 674 }, { - "epoch": 0.75, - "learning_rate": 4.9833147942157955e-06, - "loss": 0.1797, + "epoch": 0.38, + "learning_rate": 1.2487479131886477e-05, + "loss": 0.1677, "step": 675 }, { - "epoch": 0.75, - "learning_rate": 4.961067853170189e-06, - "loss": 0.1408, + "epoch": 0.38, + "learning_rate": 1.2476349471341125e-05, + "loss": 0.193, "step": 676 }, { - "epoch": 0.75, - "learning_rate": 4.938820912124583e-06, - "loss": 0.0535, + "epoch": 0.38, + "learning_rate": 1.2465219810795772e-05, + "loss": 0.1641, "step": 677 }, { - "epoch": 0.75, - "learning_rate": 4.9165739710789776e-06, - "loss": 0.0862, + "epoch": 0.38, + "learning_rate": 1.2454090150250417e-05, + "loss": 0.1698, "step": 678 }, { - "epoch": 0.76, - "learning_rate": 4.8943270300333704e-06, - "loss": 0.0843, + "epoch": 0.38, + "learning_rate": 1.2442960489705065e-05, + "loss": 0.0863, "step": 679 }, { - "epoch": 0.76, - "learning_rate": 4.872080088987764e-06, - "loss": 0.0879, + "epoch": 0.38, + "learning_rate": 1.2431830829159712e-05, + "loss": 0.1959, "step": 680 }, { - "epoch": 0.76, - "learning_rate": 4.849833147942159e-06, - "loss": 0.1176, + "epoch": 0.38, + "learning_rate": 1.2420701168614357e-05, + "loss": 0.1372, "step": 681 }, { - "epoch": 0.76, - "learning_rate": 4.8275862068965525e-06, - "loss": 0.1298, + "epoch": 0.38, + "learning_rate": 1.2409571508069005e-05, + "loss": 0.1956, "step": 682 }, { - "epoch": 0.76, - "learning_rate": 4.805339265850945e-06, - "loss": 0.0729, + "epoch": 0.38, + "learning_rate": 1.2398441847523652e-05, + "loss": 0.1411, "step": 683 }, { - "epoch": 0.76, - "learning_rate": 4.78309232480534e-06, - "loss": 0.1257, + "epoch": 0.38, + "learning_rate": 1.2387312186978297e-05, + "loss": 0.0881, "step": 684 }, { - "epoch": 0.76, - "learning_rate": 4.760845383759734e-06, - "loss": 0.1531, + "epoch": 0.38, + "learning_rate": 1.2376182526432944e-05, + "loss": 0.1169, "step": 685 }, { - "epoch": 0.76, - "learning_rate": 4.7385984427141274e-06, - "loss": 0.1339, + "epoch": 0.38, + "learning_rate": 1.2365052865887592e-05, + "loss": 0.1664, "step": 686 }, { - "epoch": 0.76, - "learning_rate": 4.71635150166852e-06, - "loss": 0.0858, + "epoch": 0.38, + "learning_rate": 1.2353923205342238e-05, + "loss": 0.1114, "step": 687 }, { - "epoch": 0.77, - "learning_rate": 4.694104560622915e-06, - "loss": 0.1735, + "epoch": 0.38, + "learning_rate": 1.2342793544796884e-05, + "loss": 0.0863, "step": 688 }, { - "epoch": 0.77, - "learning_rate": 4.671857619577309e-06, - "loss": 0.1912, + "epoch": 0.38, + "learning_rate": 1.2331663884251532e-05, + "loss": 0.1391, "step": 689 }, { - "epoch": 0.77, - "learning_rate": 4.649610678531702e-06, - "loss": 0.1782, + "epoch": 0.38, + "learning_rate": 1.2320534223706178e-05, + "loss": 0.2137, "step": 690 }, { - "epoch": 0.77, - "learning_rate": 4.627363737486096e-06, - "loss": 0.0905, + "epoch": 0.38, + "learning_rate": 1.2309404563160824e-05, + "loss": 0.1907, "step": 691 }, { - "epoch": 0.77, - "learning_rate": 4.60511679644049e-06, - "loss": 0.1193, + "epoch": 0.39, + "learning_rate": 1.229827490261547e-05, + "loss": 0.1138, "step": 692 }, { - "epoch": 0.77, - "learning_rate": 4.582869855394884e-06, - "loss": 0.1422, + "epoch": 0.39, + "learning_rate": 1.2287145242070118e-05, + "loss": 0.1655, "step": 693 }, { - "epoch": 0.77, - "learning_rate": 4.560622914349277e-06, - "loss": 0.1194, + "epoch": 0.39, + "learning_rate": 1.2276015581524764e-05, + "loss": 0.113, "step": 694 }, { - "epoch": 0.77, - "learning_rate": 4.538375973303671e-06, - "loss": 0.1289, + "epoch": 0.39, + "learning_rate": 1.226488592097941e-05, + "loss": 0.0871, "step": 695 }, { - "epoch": 0.77, - "learning_rate": 4.516129032258065e-06, - "loss": 0.1367, + "epoch": 0.39, + "learning_rate": 1.2253756260434058e-05, + "loss": 0.0613, "step": 696 }, { - "epoch": 0.78, - "learning_rate": 4.4938820912124585e-06, - "loss": 0.1569, + "epoch": 0.39, + "learning_rate": 1.2242626599888704e-05, + "loss": 0.1128, "step": 697 }, { - "epoch": 0.78, - "learning_rate": 4.471635150166852e-06, - "loss": 0.0773, + "epoch": 0.39, + "learning_rate": 1.223149693934335e-05, + "loss": 0.1907, "step": 698 }, { - "epoch": 0.78, - "learning_rate": 4.449388209121246e-06, - "loss": 0.1164, + "epoch": 0.39, + "learning_rate": 1.2220367278797998e-05, + "loss": 0.1935, "step": 699 }, { - "epoch": 0.78, - "learning_rate": 4.42714126807564e-06, - "loss": 0.1482, + "epoch": 0.39, + "learning_rate": 1.2209237618252644e-05, + "loss": 0.1399, "step": 700 }, { - "epoch": 0.78, - "learning_rate": 4.4048943270300335e-06, - "loss": 0.0938, + "epoch": 0.39, + "learning_rate": 1.219810795770729e-05, + "loss": 0.1387, "step": 701 }, { - "epoch": 0.78, - "learning_rate": 4.382647385984427e-06, - "loss": 0.1266, + "epoch": 0.39, + "learning_rate": 1.2186978297161938e-05, + "loss": 0.1935, "step": 702 }, { - "epoch": 0.78, - "learning_rate": 4.360400444938822e-06, - "loss": 0.1223, + "epoch": 0.39, + "learning_rate": 1.2175848636616584e-05, + "loss": 0.2188, "step": 703 }, { - "epoch": 0.78, - "learning_rate": 4.338153503893215e-06, - "loss": 0.108, + "epoch": 0.39, + "learning_rate": 1.216471897607123e-05, + "loss": 0.2209, "step": 704 }, { - "epoch": 0.78, - "learning_rate": 4.3159065628476084e-06, - "loss": 0.0668, + "epoch": 0.39, + "learning_rate": 1.2153589315525878e-05, + "loss": 0.1935, "step": 705 }, { - "epoch": 0.79, - "learning_rate": 4.293659621802003e-06, - "loss": 0.1183, + "epoch": 0.39, + "learning_rate": 1.2142459654980526e-05, + "loss": 0.1392, "step": 706 }, { - "epoch": 0.79, - "learning_rate": 4.271412680756397e-06, - "loss": 0.1405, + "epoch": 0.39, + "learning_rate": 1.213132999443517e-05, + "loss": 0.1157, "step": 707 }, { - "epoch": 0.79, - "learning_rate": 4.24916573971079e-06, - "loss": 0.1986, + "epoch": 0.39, + "learning_rate": 1.2120200333889818e-05, + "loss": 0.1695, "step": 708 }, { - "epoch": 0.79, - "learning_rate": 4.226918798665184e-06, - "loss": 0.1068, + "epoch": 0.39, + "learning_rate": 1.2109070673344463e-05, + "loss": 0.1647, "step": 709 }, { - "epoch": 0.79, - "learning_rate": 4.204671857619578e-06, - "loss": 0.0765, + "epoch": 0.4, + "learning_rate": 1.209794101279911e-05, + "loss": 0.0892, "step": 710 }, { - "epoch": 0.79, - "learning_rate": 4.182424916573972e-06, - "loss": 0.1545, + "epoch": 0.4, + "learning_rate": 1.2086811352253758e-05, + "loss": 0.0883, "step": 711 }, { - "epoch": 0.79, - "learning_rate": 4.160177975528365e-06, - "loss": 0.1233, + "epoch": 0.4, + "learning_rate": 1.2075681691708402e-05, + "loss": 0.1418, "step": 712 }, { - "epoch": 0.79, - "learning_rate": 4.137931034482759e-06, - "loss": 0.0831, + "epoch": 0.4, + "learning_rate": 1.206455203116305e-05, + "loss": 0.112, "step": 713 }, { - "epoch": 0.79, - "learning_rate": 4.115684093437153e-06, - "loss": 0.1246, + "epoch": 0.4, + "learning_rate": 1.2053422370617698e-05, + "loss": 0.0891, "step": 714 }, { - "epoch": 0.8, - "learning_rate": 4.093437152391547e-06, - "loss": 0.1768, + "epoch": 0.4, + "learning_rate": 1.2042292710072342e-05, + "loss": 0.1379, "step": 715 }, { - "epoch": 0.8, - "learning_rate": 4.07119021134594e-06, - "loss": 0.1872, + "epoch": 0.4, + "learning_rate": 1.203116304952699e-05, + "loss": 0.0854, "step": 716 }, { - "epoch": 0.8, - "learning_rate": 4.048943270300334e-06, - "loss": 0.0938, + "epoch": 0.4, + "learning_rate": 1.2020033388981638e-05, + "loss": 0.0587, "step": 717 }, { - "epoch": 0.8, - "learning_rate": 4.026696329254728e-06, - "loss": 0.1394, + "epoch": 0.4, + "learning_rate": 1.2008903728436282e-05, + "loss": 0.1375, "step": 718 }, { - "epoch": 0.8, - "learning_rate": 4.004449388209122e-06, - "loss": 0.1576, + "epoch": 0.4, + "learning_rate": 1.199777406789093e-05, + "loss": 0.1618, "step": 719 }, { - "epoch": 0.8, - "learning_rate": 3.982202447163515e-06, - "loss": 0.182, + "epoch": 0.4, + "learning_rate": 1.1986644407345578e-05, + "loss": 0.1389, "step": 720 }, { - "epoch": 0.8, - "learning_rate": 3.959955506117909e-06, - "loss": 0.1301, + "epoch": 0.4, + "learning_rate": 1.1975514746800222e-05, + "loss": 0.2247, "step": 721 }, { - "epoch": 0.8, - "learning_rate": 3.937708565072303e-06, - "loss": 0.0663, + "epoch": 0.4, + "learning_rate": 1.196438508625487e-05, + "loss": 0.1697, "step": 722 }, { - "epoch": 0.8, - "learning_rate": 3.9154616240266965e-06, - "loss": 0.0798, + "epoch": 0.4, + "learning_rate": 1.1953255425709518e-05, + "loss": 0.0579, "step": 723 }, { - "epoch": 0.81, - "learning_rate": 3.89321468298109e-06, - "loss": 0.1204, + "epoch": 0.4, + "learning_rate": 1.1942125765164162e-05, + "loss": 0.1677, "step": 724 }, { - "epoch": 0.81, - "learning_rate": 3.870967741935484e-06, - "loss": 0.1104, + "epoch": 0.4, + "learning_rate": 1.193099610461881e-05, + "loss": 0.112, "step": 725 }, { - "epoch": 0.81, - "learning_rate": 3.848720800889878e-06, - "loss": 0.1855, + "epoch": 0.4, + "learning_rate": 1.1919866444073456e-05, + "loss": 0.1421, "step": 726 }, { - "epoch": 0.81, - "learning_rate": 3.8264738598442715e-06, - "loss": 0.0931, + "epoch": 0.4, + "learning_rate": 1.1908736783528102e-05, + "loss": 0.1678, "step": 727 }, { - "epoch": 0.81, - "learning_rate": 3.804226918798665e-06, - "loss": 0.1468, + "epoch": 0.41, + "learning_rate": 1.189760712298275e-05, + "loss": 0.1356, "step": 728 }, { - "epoch": 0.81, - "learning_rate": 3.781979977753059e-06, - "loss": 0.1151, + "epoch": 0.41, + "learning_rate": 1.1886477462437396e-05, + "loss": 0.1631, "step": 729 }, { - "epoch": 0.81, - "learning_rate": 3.759733036707453e-06, - "loss": 0.1115, + "epoch": 0.41, + "learning_rate": 1.1875347801892044e-05, + "loss": 0.113, "step": 730 }, { - "epoch": 0.81, - "learning_rate": 3.737486095661847e-06, - "loss": 0.0745, - "step": 731 + "epoch": 0.41, + "learning_rate": 1.186421814134669e-05, + "loss": 0.0575, + "step": 731 }, { - "epoch": 0.81, - "learning_rate": 3.71523915461624e-06, - "loss": 0.0635, + "epoch": 0.41, + "learning_rate": 1.1853088480801336e-05, + "loss": 0.2224, "step": 732 }, { - "epoch": 0.82, - "learning_rate": 3.6929922135706343e-06, - "loss": 0.0787, + "epoch": 0.41, + "learning_rate": 1.1841958820255984e-05, + "loss": 0.1677, "step": 733 }, { - "epoch": 0.82, - "learning_rate": 3.670745272525028e-06, - "loss": 0.1109, + "epoch": 0.41, + "learning_rate": 1.183082915971063e-05, + "loss": 0.1128, "step": 734 }, { - "epoch": 0.82, - "learning_rate": 3.648498331479422e-06, - "loss": 0.1109, + "epoch": 0.41, + "learning_rate": 1.1819699499165276e-05, + "loss": 0.1417, "step": 735 }, { - "epoch": 0.82, - "learning_rate": 3.626251390433816e-06, - "loss": 0.088, + "epoch": 0.41, + "learning_rate": 1.1808569838619924e-05, + "loss": 0.0842, "step": 736 }, { - "epoch": 0.82, - "learning_rate": 3.6040044493882093e-06, - "loss": 0.068, + "epoch": 0.41, + "learning_rate": 1.179744017807457e-05, + "loss": 0.1127, "step": 737 }, { - "epoch": 0.82, - "learning_rate": 3.581757508342603e-06, - "loss": 0.1372, + "epoch": 0.41, + "learning_rate": 1.1786310517529216e-05, + "loss": 0.1894, "step": 738 }, { - "epoch": 0.82, - "learning_rate": 3.559510567296997e-06, - "loss": 0.1909, + "epoch": 0.41, + "learning_rate": 1.1775180856983864e-05, + "loss": 0.084, "step": 739 }, { - "epoch": 0.82, - "learning_rate": 3.537263626251391e-06, - "loss": 0.1157, + "epoch": 0.41, + "learning_rate": 1.176405119643851e-05, + "loss": 0.1122, "step": 740 }, { - "epoch": 0.82, - "learning_rate": 3.515016685205784e-06, - "loss": 0.1359, + "epoch": 0.41, + "learning_rate": 1.1752921535893156e-05, + "loss": 0.11, "step": 741 }, { - "epoch": 0.83, - "learning_rate": 3.4927697441601784e-06, - "loss": 0.1684, + "epoch": 0.41, + "learning_rate": 1.1741791875347804e-05, + "loss": 0.0841, "step": 742 }, { - "epoch": 0.83, - "learning_rate": 3.470522803114572e-06, - "loss": 0.1227, + "epoch": 0.41, + "learning_rate": 1.1730662214802448e-05, + "loss": 0.1708, "step": 743 }, { - "epoch": 0.83, - "learning_rate": 3.448275862068966e-06, - "loss": 0.1014, + "epoch": 0.41, + "learning_rate": 1.1719532554257096e-05, + "loss": 0.0849, "step": 744 }, { - "epoch": 0.83, - "learning_rate": 3.4260289210233596e-06, - "loss": 0.0783, + "epoch": 0.41, + "learning_rate": 1.1708402893711744e-05, + "loss": 0.1653, "step": 745 }, { - "epoch": 0.83, - "learning_rate": 3.4037819799777533e-06, - "loss": 0.1381, + "epoch": 0.42, + "learning_rate": 1.1697273233166388e-05, + "loss": 0.0556, "step": 746 }, { - "epoch": 0.83, - "learning_rate": 3.381535038932147e-06, - "loss": 0.1601, + "epoch": 0.42, + "learning_rate": 1.1686143572621036e-05, + "loss": 0.1667, "step": 747 }, { - "epoch": 0.83, - "learning_rate": 3.359288097886541e-06, - "loss": 0.1679, + "epoch": 0.42, + "learning_rate": 1.1675013912075684e-05, + "loss": 0.2793, "step": 748 }, { - "epoch": 0.83, - "learning_rate": 3.3370411568409345e-06, - "loss": 0.1359, + "epoch": 0.42, + "learning_rate": 1.1663884251530328e-05, + "loss": 0.0569, "step": 749 }, { - "epoch": 0.83, - "learning_rate": 3.3147942157953282e-06, - "loss": 0.1307, + "epoch": 0.42, + "learning_rate": 1.1652754590984976e-05, + "loss": 0.1419, "step": 750 }, { - "epoch": 0.84, - "learning_rate": 3.2925472747497224e-06, - "loss": 0.1425, + "epoch": 0.42, + "learning_rate": 1.1641624930439624e-05, + "loss": 0.1936, "step": 751 }, { - "epoch": 0.84, - "learning_rate": 3.270300333704116e-06, - "loss": 0.1993, + "epoch": 0.42, + "learning_rate": 1.1630495269894268e-05, + "loss": 0.0844, "step": 752 }, { - "epoch": 0.84, - "learning_rate": 3.2480533926585095e-06, - "loss": 0.2172, + "epoch": 0.42, + "learning_rate": 1.1619365609348916e-05, + "loss": 0.1716, "step": 753 }, { - "epoch": 0.84, - "learning_rate": 3.225806451612903e-06, - "loss": 0.0741, + "epoch": 0.42, + "learning_rate": 1.1608235948803564e-05, + "loss": 0.1702, "step": 754 }, { - "epoch": 0.84, - "learning_rate": 3.2035595105672973e-06, - "loss": 0.1076, + "epoch": 0.42, + "learning_rate": 1.1597106288258208e-05, + "loss": 0.1112, "step": 755 }, { - "epoch": 0.84, - "learning_rate": 3.181312569521691e-06, - "loss": 0.1392, + "epoch": 0.42, + "learning_rate": 1.1585976627712856e-05, + "loss": 0.2161, "step": 756 }, { - "epoch": 0.84, - "learning_rate": 3.1590656284760844e-06, - "loss": 0.1159, + "epoch": 0.42, + "learning_rate": 1.1574846967167504e-05, + "loss": 0.086, "step": 757 }, { - "epoch": 0.84, - "learning_rate": 3.1368186874304786e-06, - "loss": 0.1352, + "epoch": 0.42, + "learning_rate": 1.1563717306622148e-05, + "loss": 0.1645, "step": 758 }, { - "epoch": 0.84, - "learning_rate": 3.1145717463848723e-06, - "loss": 0.1347, + "epoch": 0.42, + "learning_rate": 1.1552587646076796e-05, + "loss": 0.1423, "step": 759 }, { - "epoch": 0.85, - "learning_rate": 3.0923248053392665e-06, - "loss": 0.1168, + "epoch": 0.42, + "learning_rate": 1.1541457985531442e-05, + "loss": 0.1912, "step": 760 }, { - "epoch": 0.85, - "learning_rate": 3.0700778642936598e-06, - "loss": 0.1058, + "epoch": 0.42, + "learning_rate": 1.1530328324986088e-05, + "loss": 0.1678, "step": 761 }, { - "epoch": 0.85, - "learning_rate": 3.0478309232480535e-06, - "loss": 0.125, + "epoch": 0.42, + "learning_rate": 1.1519198664440736e-05, + "loss": 0.1652, "step": 762 }, { - "epoch": 0.85, - "learning_rate": 3.0255839822024472e-06, - "loss": 0.1155, + "epoch": 0.42, + "learning_rate": 1.1508069003895382e-05, + "loss": 0.2156, "step": 763 }, { - "epoch": 0.85, - "learning_rate": 3.0033370411568414e-06, - "loss": 0.1313, + "epoch": 0.43, + "learning_rate": 1.1496939343350028e-05, + "loss": 0.1911, "step": 764 }, { - "epoch": 0.85, - "learning_rate": 2.9810901001112347e-06, - "loss": 0.1777, + "epoch": 0.43, + "learning_rate": 1.1485809682804676e-05, + "loss": 0.1619, "step": 765 }, { - "epoch": 0.85, - "learning_rate": 2.9588431590656284e-06, - "loss": 0.1185, + "epoch": 0.43, + "learning_rate": 1.1474680022259322e-05, + "loss": 0.1147, "step": 766 }, { - "epoch": 0.85, - "learning_rate": 2.9365962180200226e-06, - "loss": 0.0861, + "epoch": 0.43, + "learning_rate": 1.1463550361713968e-05, + "loss": 0.09, "step": 767 }, { - "epoch": 0.85, - "learning_rate": 2.9143492769744163e-06, - "loss": 0.1932, + "epoch": 0.43, + "learning_rate": 1.1452420701168616e-05, + "loss": 0.2109, "step": 768 }, { - "epoch": 0.86, - "learning_rate": 2.8921023359288105e-06, - "loss": 0.1722, + "epoch": 0.43, + "learning_rate": 1.1441291040623262e-05, + "loss": 0.1161, "step": 769 }, { - "epoch": 0.86, - "learning_rate": 2.869855394883204e-06, - "loss": 0.1382, + "epoch": 0.43, + "learning_rate": 1.143016138007791e-05, + "loss": 0.0874, "step": 770 }, { - "epoch": 0.86, - "learning_rate": 2.8476084538375975e-06, - "loss": 0.1256, + "epoch": 0.43, + "learning_rate": 1.1419031719532556e-05, + "loss": 0.2165, "step": 771 }, { - "epoch": 0.86, - "learning_rate": 2.8253615127919913e-06, - "loss": 0.0563, + "epoch": 0.43, + "learning_rate": 1.1407902058987202e-05, + "loss": 0.115, "step": 772 }, { - "epoch": 0.86, - "learning_rate": 2.8031145717463854e-06, - "loss": 0.1465, + "epoch": 0.43, + "learning_rate": 1.139677239844185e-05, + "loss": 0.1136, "step": 773 }, { - "epoch": 0.86, - "learning_rate": 2.7808676307007788e-06, - "loss": 0.1009, + "epoch": 0.43, + "learning_rate": 1.1385642737896494e-05, + "loss": 0.09, "step": 774 }, { - "epoch": 0.86, - "learning_rate": 2.7586206896551725e-06, - "loss": 0.1098, + "epoch": 0.43, + "learning_rate": 1.1374513077351142e-05, + "loss": 0.1117, "step": 775 }, { - "epoch": 0.86, - "learning_rate": 2.7363737486095667e-06, - "loss": 0.0802, + "epoch": 0.43, + "learning_rate": 1.136338341680579e-05, + "loss": 0.0871, "step": 776 }, { - "epoch": 0.86, - "learning_rate": 2.7141268075639604e-06, - "loss": 0.1427, + "epoch": 0.43, + "learning_rate": 1.1352253756260434e-05, + "loss": 0.0872, "step": 777 }, { - "epoch": 0.87, - "learning_rate": 2.6918798665183537e-06, - "loss": 0.2159, + "epoch": 0.43, + "learning_rate": 1.1341124095715082e-05, + "loss": 0.0594, "step": 778 }, { - "epoch": 0.87, - "learning_rate": 2.669632925472748e-06, - "loss": 0.1411, + "epoch": 0.43, + "learning_rate": 1.132999443516973e-05, + "loss": 0.1676, "step": 779 }, { - "epoch": 0.87, - "learning_rate": 2.6473859844271416e-06, - "loss": 0.1076, + "epoch": 0.43, + "learning_rate": 1.1318864774624374e-05, + "loss": 0.1918, "step": 780 }, { - "epoch": 0.87, - "learning_rate": 2.6251390433815353e-06, - "loss": 0.0909, + "epoch": 0.43, + "learning_rate": 1.1307735114079022e-05, + "loss": 0.1666, "step": 781 }, { - "epoch": 0.87, - "learning_rate": 2.6028921023359286e-06, - "loss": 0.1406, + "epoch": 0.44, + "learning_rate": 1.129660545353367e-05, + "loss": 0.1113, "step": 782 }, { - "epoch": 0.87, - "learning_rate": 2.580645161290323e-06, - "loss": 0.1497, + "epoch": 0.44, + "learning_rate": 1.1285475792988314e-05, + "loss": 0.1399, "step": 783 }, { - "epoch": 0.87, - "learning_rate": 2.5583982202447165e-06, - "loss": 0.2158, + "epoch": 0.44, + "learning_rate": 1.1274346132442962e-05, + "loss": 0.0843, "step": 784 }, { - "epoch": 0.87, - "learning_rate": 2.5361512791991107e-06, - "loss": 0.1208, + "epoch": 0.44, + "learning_rate": 1.126321647189761e-05, + "loss": 0.0554, "step": 785 }, { - "epoch": 0.87, - "learning_rate": 2.513904338153504e-06, - "loss": 0.0859, + "epoch": 0.44, + "learning_rate": 1.1252086811352254e-05, + "loss": 0.1674, "step": 786 }, { - "epoch": 0.88, - "learning_rate": 2.4916573971078977e-06, - "loss": 0.1194, + "epoch": 0.44, + "learning_rate": 1.1240957150806902e-05, + "loss": 0.1379, "step": 787 }, { - "epoch": 0.88, - "learning_rate": 2.4694104560622915e-06, - "loss": 0.1062, + "epoch": 0.44, + "learning_rate": 1.122982749026155e-05, + "loss": 0.1133, "step": 788 }, { - "epoch": 0.88, - "learning_rate": 2.4471635150166852e-06, - "loss": 0.1352, + "epoch": 0.44, + "learning_rate": 1.1218697829716194e-05, + "loss": 0.1673, "step": 789 }, { - "epoch": 0.88, - "learning_rate": 2.4249165739710794e-06, - "loss": 0.1686, + "epoch": 0.44, + "learning_rate": 1.1207568169170842e-05, + "loss": 0.1133, "step": 790 }, { - "epoch": 0.88, - "learning_rate": 2.4026696329254727e-06, - "loss": 0.1032, + "epoch": 0.44, + "learning_rate": 1.1196438508625486e-05, + "loss": 0.1094, "step": 791 }, { - "epoch": 0.88, - "learning_rate": 2.380422691879867e-06, - "loss": 0.1714, + "epoch": 0.44, + "learning_rate": 1.1185308848080134e-05, + "loss": 0.0265, "step": 792 }, { - "epoch": 0.88, - "learning_rate": 2.35817575083426e-06, - "loss": 0.0859, + "epoch": 0.44, + "learning_rate": 1.1174179187534782e-05, + "loss": 0.0571, "step": 793 }, { - "epoch": 0.88, - "learning_rate": 2.3359288097886543e-06, - "loss": 0.1005, + "epoch": 0.44, + "learning_rate": 1.1163049526989428e-05, + "loss": 0.0819, "step": 794 }, { - "epoch": 0.88, - "learning_rate": 2.313681868743048e-06, - "loss": 0.1104, + "epoch": 0.44, + "learning_rate": 1.1151919866444074e-05, + "loss": 0.1688, "step": 795 }, { - "epoch": 0.89, - "learning_rate": 2.291434927697442e-06, - "loss": 0.1599, + "epoch": 0.44, + "learning_rate": 1.1140790205898722e-05, + "loss": 0.2231, "step": 796 }, { - "epoch": 0.89, - "learning_rate": 2.2691879866518355e-06, - "loss": 0.1361, + "epoch": 0.44, + "learning_rate": 1.1129660545353368e-05, + "loss": 0.1668, "step": 797 }, { - "epoch": 0.89, - "learning_rate": 2.2469410456062293e-06, - "loss": 0.146, + "epoch": 0.44, + "learning_rate": 1.1118530884808014e-05, + "loss": 0.1107, "step": 798 }, { - "epoch": 0.89, - "learning_rate": 2.224694104560623e-06, - "loss": 0.1488, + "epoch": 0.44, + "learning_rate": 1.1107401224262661e-05, + "loss": 0.1418, "step": 799 }, { - "epoch": 0.89, - "learning_rate": 2.2024471635150167e-06, - "loss": 0.1307, + "epoch": 0.45, + "learning_rate": 1.1096271563717308e-05, + "loss": 0.1737, "step": 800 }, { - "epoch": 0.89, - "learning_rate": 2.180200222469411e-06, - "loss": 0.0982, + "epoch": 0.45, + "learning_rate": 1.1085141903171954e-05, + "loss": 0.0537, "step": 801 }, { - "epoch": 0.89, - "learning_rate": 2.1579532814238042e-06, - "loss": 0.1512, + "epoch": 0.45, + "learning_rate": 1.1074012242626601e-05, + "loss": 0.0533, "step": 802 }, { - "epoch": 0.89, - "learning_rate": 2.1357063403781984e-06, - "loss": 0.1409, + "epoch": 0.45, + "learning_rate": 1.1062882582081248e-05, + "loss": 0.2301, "step": 803 }, { - "epoch": 0.89, - "learning_rate": 2.113459399332592e-06, - "loss": 0.1334, + "epoch": 0.45, + "learning_rate": 1.1051752921535894e-05, + "loss": 0.1147, "step": 804 }, { - "epoch": 0.9, - "learning_rate": 2.091212458286986e-06, - "loss": 0.126, + "epoch": 0.45, + "learning_rate": 1.1040623260990541e-05, + "loss": 0.1968, "step": 805 }, { - "epoch": 0.9, - "learning_rate": 2.0689655172413796e-06, - "loss": 0.1499, + "epoch": 0.45, + "learning_rate": 1.1029493600445187e-05, + "loss": 0.0817, "step": 806 }, { - "epoch": 0.9, - "learning_rate": 2.0467185761957733e-06, - "loss": 0.1211, + "epoch": 0.45, + "learning_rate": 1.1018363939899834e-05, + "loss": 0.1679, "step": 807 }, { - "epoch": 0.9, - "learning_rate": 2.024471635150167e-06, - "loss": 0.1054, + "epoch": 0.45, + "learning_rate": 1.100723427935448e-05, + "loss": 0.1122, "step": 808 }, { - "epoch": 0.9, - "learning_rate": 2.002224694104561e-06, - "loss": 0.2367, + "epoch": 0.45, + "learning_rate": 1.0996104618809127e-05, + "loss": 0.2804, "step": 809 }, { - "epoch": 0.9, - "learning_rate": 1.9799777530589545e-06, - "loss": 0.1302, + "epoch": 0.45, + "learning_rate": 1.0984974958263774e-05, + "loss": 0.1408, "step": 810 }, { - "epoch": 0.9, - "learning_rate": 1.9577308120133483e-06, - "loss": 0.1165, + "epoch": 0.45, + "learning_rate": 1.097384529771842e-05, + "loss": 0.1107, "step": 811 }, { - "epoch": 0.9, - "learning_rate": 1.935483870967742e-06, - "loss": 0.1599, + "epoch": 0.45, + "learning_rate": 1.0962715637173067e-05, + "loss": 0.1984, "step": 812 }, { - "epoch": 0.9, - "learning_rate": 1.9132369299221357e-06, - "loss": 0.1346, + "epoch": 0.45, + "learning_rate": 1.0951585976627715e-05, + "loss": 0.1665, "step": 813 }, { - "epoch": 0.91, - "learning_rate": 1.8909899888765295e-06, - "loss": 0.1285, + "epoch": 0.45, + "learning_rate": 1.094045631608236e-05, + "loss": 0.1398, "step": 814 }, { - "epoch": 0.91, - "learning_rate": 1.8687430478309234e-06, - "loss": 0.1147, + "epoch": 0.45, + "learning_rate": 1.0929326655537007e-05, + "loss": 0.1713, "step": 815 }, { - "epoch": 0.91, - "learning_rate": 1.8464961067853172e-06, - "loss": 0.0707, + "epoch": 0.45, + "learning_rate": 1.0918196994991655e-05, + "loss": 0.1406, "step": 816 }, { - "epoch": 0.91, - "learning_rate": 1.824249165739711e-06, - "loss": 0.1767, + "epoch": 0.45, + "learning_rate": 1.09070673344463e-05, + "loss": 0.191, "step": 817 }, { - "epoch": 0.91, - "learning_rate": 1.8020022246941046e-06, - "loss": 0.149, + "epoch": 0.46, + "learning_rate": 1.0895937673900947e-05, + "loss": 0.1955, "step": 818 }, { - "epoch": 0.91, - "learning_rate": 1.7797552836484986e-06, - "loss": 0.1751, + "epoch": 0.46, + "learning_rate": 1.0884808013355595e-05, + "loss": 0.2685, "step": 819 }, { - "epoch": 0.91, - "learning_rate": 1.757508342602892e-06, - "loss": 0.0821, + "epoch": 0.46, + "learning_rate": 1.087367835281024e-05, + "loss": 0.0861, "step": 820 }, { - "epoch": 0.91, - "learning_rate": 1.735261401557286e-06, - "loss": 0.1253, + "epoch": 0.46, + "learning_rate": 1.0862548692264887e-05, + "loss": 0.1366, "step": 821 }, { - "epoch": 0.91, - "learning_rate": 1.7130144605116798e-06, - "loss": 0.1504, + "epoch": 0.46, + "learning_rate": 1.0851419031719535e-05, + "loss": 0.1124, "step": 822 }, { - "epoch": 0.92, - "learning_rate": 1.6907675194660735e-06, - "loss": 0.1373, + "epoch": 0.46, + "learning_rate": 1.084028937117418e-05, + "loss": 0.1154, "step": 823 }, { - "epoch": 0.92, - "learning_rate": 1.6685205784204673e-06, - "loss": 0.1826, + "epoch": 0.46, + "learning_rate": 1.0829159710628827e-05, + "loss": 0.1135, "step": 824 }, { - "epoch": 0.92, - "learning_rate": 1.6462736373748612e-06, - "loss": 0.1581, + "epoch": 0.46, + "learning_rate": 1.0818030050083472e-05, + "loss": 0.1671, "step": 825 }, { - "epoch": 0.92, - "learning_rate": 1.6240266963292547e-06, - "loss": 0.1321, + "epoch": 0.46, + "learning_rate": 1.080690038953812e-05, + "loss": 0.089, "step": 826 }, { - "epoch": 0.92, - "learning_rate": 1.6017797552836487e-06, - "loss": 0.1322, + "epoch": 0.46, + "learning_rate": 1.0795770728992767e-05, + "loss": 0.1949, "step": 827 }, { - "epoch": 0.92, - "learning_rate": 1.5795328142380422e-06, - "loss": 0.1273, + "epoch": 0.46, + "learning_rate": 1.0784641068447412e-05, + "loss": 0.089, "step": 828 }, { - "epoch": 0.92, - "learning_rate": 1.5572858731924361e-06, - "loss": 0.111, + "epoch": 0.46, + "learning_rate": 1.077351140790206e-05, + "loss": 0.1905, "step": 829 }, { - "epoch": 0.92, - "learning_rate": 1.5350389321468299e-06, - "loss": 0.0755, + "epoch": 0.46, + "learning_rate": 1.0762381747356707e-05, + "loss": 0.1164, "step": 830 }, { - "epoch": 0.92, - "learning_rate": 1.5127919911012236e-06, - "loss": 0.0928, + "epoch": 0.46, + "learning_rate": 1.0751252086811352e-05, + "loss": 0.1662, "step": 831 }, { - "epoch": 0.93, - "learning_rate": 1.4905450500556174e-06, - "loss": 0.0978, + "epoch": 0.46, + "learning_rate": 1.0740122426266e-05, + "loss": 0.0638, "step": 832 }, { - "epoch": 0.93, - "learning_rate": 1.4682981090100113e-06, - "loss": 0.1427, + "epoch": 0.46, + "learning_rate": 1.0728992765720647e-05, + "loss": 0.1907, "step": 833 }, { - "epoch": 0.93, - "learning_rate": 1.4460511679644053e-06, - "loss": 0.0859, + "epoch": 0.46, + "learning_rate": 1.0717863105175292e-05, + "loss": 0.1937, "step": 834 }, { - "epoch": 0.93, - "learning_rate": 1.4238042269187988e-06, - "loss": 0.1575, + "epoch": 0.46, + "learning_rate": 1.070673344462994e-05, + "loss": 0.1126, "step": 835 }, { - "epoch": 0.93, - "learning_rate": 1.4015572858731927e-06, - "loss": 0.1221, + "epoch": 0.47, + "learning_rate": 1.0695603784084587e-05, + "loss": 0.113, "step": 836 }, { - "epoch": 0.93, - "learning_rate": 1.3793103448275862e-06, - "loss": 0.1623, + "epoch": 0.47, + "learning_rate": 1.0684474123539233e-05, + "loss": 0.0856, "step": 837 }, { - "epoch": 0.93, - "learning_rate": 1.3570634037819802e-06, - "loss": 0.1594, + "epoch": 0.47, + "learning_rate": 1.067334446299388e-05, + "loss": 0.139, "step": 838 }, { - "epoch": 0.93, - "learning_rate": 1.334816462736374e-06, - "loss": 0.1627, + "epoch": 0.47, + "learning_rate": 1.0662214802448527e-05, + "loss": 0.0844, "step": 839 }, { - "epoch": 0.93, - "learning_rate": 1.3125695216907677e-06, - "loss": 0.1423, + "epoch": 0.47, + "learning_rate": 1.0651085141903173e-05, + "loss": 0.1131, "step": 840 }, { - "epoch": 0.94, - "learning_rate": 1.2903225806451614e-06, - "loss": 0.1432, + "epoch": 0.47, + "learning_rate": 1.063995548135782e-05, + "loss": 0.1918, "step": 841 }, { - "epoch": 0.94, - "learning_rate": 1.2680756395995554e-06, - "loss": 0.133, + "epoch": 0.47, + "learning_rate": 1.0628825820812465e-05, + "loss": 0.0848, "step": 842 }, { - "epoch": 0.94, - "learning_rate": 1.2458286985539489e-06, - "loss": 0.0962, + "epoch": 0.47, + "learning_rate": 1.0617696160267113e-05, + "loss": 0.1403, "step": 843 }, { - "epoch": 0.94, - "learning_rate": 1.2235817575083426e-06, - "loss": 0.1366, + "epoch": 0.47, + "learning_rate": 1.060656649972176e-05, + "loss": 0.2482, "step": 844 }, { - "epoch": 0.94, - "learning_rate": 1.2013348164627363e-06, - "loss": 0.1297, + "epoch": 0.47, + "learning_rate": 1.0595436839176405e-05, + "loss": 0.1924, "step": 845 }, { - "epoch": 0.94, - "learning_rate": 1.17908787541713e-06, - "loss": 0.0794, + "epoch": 0.47, + "learning_rate": 1.0584307178631053e-05, + "loss": 0.1392, "step": 846 }, { - "epoch": 0.94, - "learning_rate": 1.156840934371524e-06, - "loss": 0.0892, + "epoch": 0.47, + "learning_rate": 1.05731775180857e-05, + "loss": 0.1677, "step": 847 }, { - "epoch": 0.94, - "learning_rate": 1.1345939933259178e-06, - "loss": 0.141, + "epoch": 0.47, + "learning_rate": 1.0562047857540345e-05, + "loss": 0.0596, "step": 848 }, { - "epoch": 0.94, - "learning_rate": 1.1123470522803115e-06, - "loss": 0.0575, + "epoch": 0.47, + "learning_rate": 1.0550918196994993e-05, + "loss": 0.2211, "step": 849 }, { - "epoch": 0.95, - "learning_rate": 1.0901001112347055e-06, - "loss": 0.1203, + "epoch": 0.47, + "learning_rate": 1.053978853644964e-05, + "loss": 0.0566, "step": 850 }, { - "epoch": 0.95, - "learning_rate": 1.0678531701890992e-06, - "loss": 0.0925, + "epoch": 0.47, + "learning_rate": 1.0528658875904285e-05, + "loss": 0.0861, "step": 851 }, { - "epoch": 0.95, - "learning_rate": 1.045606229143493e-06, - "loss": 0.1518, + "epoch": 0.47, + "learning_rate": 1.0517529215358933e-05, + "loss": 0.0837, "step": 852 }, { - "epoch": 0.95, - "learning_rate": 1.0233592880978867e-06, - "loss": 0.168, + "epoch": 0.47, + "learning_rate": 1.0506399554813579e-05, + "loss": 0.1415, "step": 853 }, { - "epoch": 0.95, - "learning_rate": 1.0011123470522804e-06, - "loss": 0.107, + "epoch": 0.48, + "learning_rate": 1.0495269894268225e-05, + "loss": 0.1645, "step": 854 }, { - "epoch": 0.95, - "learning_rate": 9.788654060066741e-07, - "loss": 0.1435, + "epoch": 0.48, + "learning_rate": 1.0484140233722873e-05, + "loss": 0.0869, "step": 855 }, { - "epoch": 0.95, - "learning_rate": 9.566184649610679e-07, - "loss": 0.1292, + "epoch": 0.48, + "learning_rate": 1.047301057317752e-05, + "loss": 0.1153, "step": 856 }, { - "epoch": 0.95, - "learning_rate": 9.343715239154617e-07, - "loss": 0.0748, + "epoch": 0.48, + "learning_rate": 1.0461880912632165e-05, + "loss": 0.109, "step": 857 }, { - "epoch": 0.95, - "learning_rate": 9.121245828698556e-07, - "loss": 0.1233, + "epoch": 0.48, + "learning_rate": 1.0450751252086813e-05, + "loss": 0.1388, "step": 858 }, { - "epoch": 0.96, - "learning_rate": 8.898776418242493e-07, - "loss": 0.1081, + "epoch": 0.48, + "learning_rate": 1.0439621591541457e-05, + "loss": 0.1111, "step": 859 }, { - "epoch": 0.96, - "learning_rate": 8.67630700778643e-07, - "loss": 0.1432, + "epoch": 0.48, + "learning_rate": 1.0428491930996105e-05, + "loss": 0.0856, "step": 860 }, { - "epoch": 0.96, - "learning_rate": 8.453837597330368e-07, - "loss": 0.2187, + "epoch": 0.48, + "learning_rate": 1.0417362270450753e-05, + "loss": 0.1922, "step": 861 }, { - "epoch": 0.96, - "learning_rate": 8.231368186874306e-07, - "loss": 0.1533, + "epoch": 0.48, + "learning_rate": 1.0406232609905397e-05, + "loss": 0.1123, "step": 862 }, { - "epoch": 0.96, - "learning_rate": 8.008898776418243e-07, - "loss": 0.1128, + "epoch": 0.48, + "learning_rate": 1.0395102949360045e-05, + "loss": 0.17, "step": 863 }, { - "epoch": 0.96, - "learning_rate": 7.786429365962181e-07, - "loss": 0.1407, + "epoch": 0.48, + "learning_rate": 1.0383973288814693e-05, + "loss": 0.1683, "step": 864 }, { - "epoch": 0.96, - "learning_rate": 7.563959955506118e-07, - "loss": 0.118, + "epoch": 0.48, + "learning_rate": 1.0372843628269337e-05, + "loss": 0.2205, "step": 865 }, { - "epoch": 0.96, - "learning_rate": 7.341490545050057e-07, - "loss": 0.1084, + "epoch": 0.48, + "learning_rate": 1.0361713967723985e-05, + "loss": 0.1397, "step": 866 }, { - "epoch": 0.96, - "learning_rate": 7.119021134593994e-07, - "loss": 0.0935, + "epoch": 0.48, + "learning_rate": 1.0350584307178633e-05, + "loss": 0.1124, "step": 867 }, { - "epoch": 0.97, - "learning_rate": 6.896551724137931e-07, - "loss": 0.1299, + "epoch": 0.48, + "learning_rate": 1.0339454646633277e-05, + "loss": 0.0824, "step": 868 }, { - "epoch": 0.97, - "learning_rate": 6.67408231368187e-07, - "loss": 0.1639, + "epoch": 0.48, + "learning_rate": 1.0328324986087925e-05, + "loss": 0.1698, "step": 869 }, { - "epoch": 0.97, - "learning_rate": 6.451612903225807e-07, - "loss": 0.1489, + "epoch": 0.48, + "learning_rate": 1.0317195325542573e-05, + "loss": 0.1127, "step": 870 }, { - "epoch": 0.97, - "learning_rate": 6.229143492769744e-07, - "loss": 0.1268, + "epoch": 0.48, + "learning_rate": 1.0306065664997217e-05, + "loss": 0.1401, "step": 871 }, { - "epoch": 0.97, - "learning_rate": 6.006674082313682e-07, - "loss": 0.1215, + "epoch": 0.49, + "learning_rate": 1.0294936004451865e-05, + "loss": 0.1651, "step": 872 }, { - "epoch": 0.97, - "learning_rate": 5.78420467185762e-07, - "loss": 0.1177, + "epoch": 0.49, + "learning_rate": 1.0283806343906513e-05, + "loss": 0.0556, "step": 873 }, { - "epoch": 0.97, - "learning_rate": 5.561735261401558e-07, - "loss": 0.1334, + "epoch": 0.49, + "learning_rate": 1.0272676683361157e-05, + "loss": 0.1394, "step": 874 }, { - "epoch": 0.97, - "learning_rate": 5.339265850945496e-07, - "loss": 0.105, + "epoch": 0.49, + "learning_rate": 1.0261547022815805e-05, + "loss": 0.1153, "step": 875 }, { - "epoch": 0.97, - "learning_rate": 5.116796440489433e-07, - "loss": 0.199, + "epoch": 0.49, + "learning_rate": 1.0250417362270451e-05, + "loss": 0.0307, "step": 876 }, { - "epoch": 0.98, - "learning_rate": 4.894327030033371e-07, - "loss": 0.1091, + "epoch": 0.49, + "learning_rate": 1.0239287701725097e-05, + "loss": 0.2201, "step": 877 }, { - "epoch": 0.98, - "learning_rate": 4.6718576195773085e-07, - "loss": 0.091, + "epoch": 0.49, + "learning_rate": 1.0228158041179745e-05, + "loss": 0.1946, "step": 878 }, { - "epoch": 0.98, - "learning_rate": 4.4493882091212464e-07, - "loss": 0.1135, + "epoch": 0.49, + "learning_rate": 1.0217028380634391e-05, + "loss": 0.0822, "step": 879 }, { - "epoch": 0.98, - "learning_rate": 4.226918798665184e-07, - "loss": 0.0831, + "epoch": 0.49, + "learning_rate": 1.0205898720089039e-05, + "loss": 0.1637, "step": 880 }, { - "epoch": 0.98, - "learning_rate": 4.0044493882091217e-07, - "loss": 0.1089, + "epoch": 0.49, + "learning_rate": 1.0194769059543685e-05, + "loss": 0.136, "step": 881 }, { - "epoch": 0.98, - "learning_rate": 3.781979977753059e-07, - "loss": 0.1288, + "epoch": 0.49, + "learning_rate": 1.0183639398998331e-05, + "loss": 0.276, "step": 882 }, { - "epoch": 0.98, - "learning_rate": 3.559510567296997e-07, - "loss": 0.0766, + "epoch": 0.49, + "learning_rate": 1.0172509738452979e-05, + "loss": 0.0836, "step": 883 }, { - "epoch": 0.98, - "learning_rate": 3.337041156840935e-07, - "loss": 0.1562, + "epoch": 0.49, + "learning_rate": 1.0161380077907625e-05, + "loss": 0.0855, "step": 884 }, { - "epoch": 0.98, - "learning_rate": 3.114571746384872e-07, - "loss": 0.1062, + "epoch": 0.49, + "learning_rate": 1.0150250417362271e-05, + "loss": 0.1148, "step": 885 }, { - "epoch": 0.99, - "learning_rate": 2.89210233592881e-07, - "loss": 0.076, + "epoch": 0.49, + "learning_rate": 1.0139120756816919e-05, + "loss": 0.1653, "step": 886 }, { - "epoch": 0.99, - "learning_rate": 2.669632925472748e-07, - "loss": 0.0614, + "epoch": 0.49, + "learning_rate": 1.0127991096271565e-05, + "loss": 0.1143, "step": 887 }, { - "epoch": 0.99, - "learning_rate": 2.4471635150166853e-07, - "loss": 0.1095, + "epoch": 0.49, + "learning_rate": 1.0116861435726211e-05, + "loss": 0.0585, "step": 888 }, { - "epoch": 0.99, - "learning_rate": 2.2246941045606232e-07, - "loss": 0.1276, + "epoch": 0.49, + "learning_rate": 1.0105731775180859e-05, + "loss": 0.2188, "step": 889 }, { - "epoch": 0.99, - "learning_rate": 2.0022246941045608e-07, - "loss": 0.1209, + "epoch": 0.5, + "learning_rate": 1.0094602114635505e-05, + "loss": 0.0597, "step": 890 }, { - "epoch": 0.99, - "learning_rate": 1.7797552836484985e-07, - "loss": 0.1023, + "epoch": 0.5, + "learning_rate": 1.0083472454090151e-05, + "loss": 0.1672, "step": 891 }, { - "epoch": 0.99, - "learning_rate": 1.557285873192436e-07, - "loss": 0.1354, + "epoch": 0.5, + "learning_rate": 1.0072342793544799e-05, + "loss": 0.0838, "step": 892 }, { - "epoch": 0.99, - "learning_rate": 1.334816462736374e-07, - "loss": 0.0939, + "epoch": 0.5, + "learning_rate": 1.0061213132999443e-05, + "loss": 0.1934, "step": 893 }, { - "epoch": 0.99, - "learning_rate": 1.1123470522803116e-07, - "loss": 0.1586, + "epoch": 0.5, + "learning_rate": 1.0050083472454091e-05, + "loss": 0.2483, "step": 894 }, { - "epoch": 1.0, - "learning_rate": 8.898776418242492e-08, - "loss": 0.1408, + "epoch": 0.5, + "learning_rate": 1.0038953811908739e-05, + "loss": 0.1652, "step": 895 }, { - "epoch": 1.0, - "learning_rate": 6.67408231368187e-08, - "loss": 0.1975, + "epoch": 0.5, + "learning_rate": 1.0027824151363383e-05, + "loss": 0.085, "step": 896 }, { - "epoch": 1.0, - "learning_rate": 4.449388209121246e-08, - "loss": 0.1062, + "epoch": 0.5, + "learning_rate": 1.001669449081803e-05, + "loss": 0.1385, "step": 897 }, { - "epoch": 1.0, - "learning_rate": 2.224694104560623e-08, - "loss": 0.1045, + "epoch": 0.5, + "learning_rate": 1.0005564830272679e-05, + "loss": 0.2188, "step": 898 }, { - "epoch": 1.0, - "learning_rate": 0.0, - "loss": 0.468, + "epoch": 0.5, + "learning_rate": 9.994435169727325e-06, + "loss": 0.0863, "step": 899 }, + { + "epoch": 0.5, + "learning_rate": 9.98330550918197e-06, + "loss": 0.1122, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 9.972175848636617e-06, + "loss": 0.2172, + "step": 901 + }, + { + "epoch": 0.5, + "learning_rate": 9.961046188091265e-06, + "loss": 0.0854, + "step": 902 + }, + { + "epoch": 0.5, + "learning_rate": 9.94991652754591e-06, + "loss": 0.1387, + "step": 903 + }, + { + "epoch": 0.5, + "learning_rate": 9.938786867000557e-06, + "loss": 0.1146, + "step": 904 + }, + { + "epoch": 0.5, + "learning_rate": 9.927657206455203e-06, + "loss": 0.1128, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 9.91652754590985e-06, + "loss": 0.0875, + "step": 906 + }, + { + "epoch": 0.5, + "learning_rate": 9.905397885364497e-06, + "loss": 0.1904, + "step": 907 + }, + { + "epoch": 0.51, + "learning_rate": 9.894268224819143e-06, + "loss": 0.0595, + "step": 908 + }, + { + "epoch": 0.51, + "learning_rate": 9.88313856427379e-06, + "loss": 0.248, + "step": 909 + }, + { + "epoch": 0.51, + "learning_rate": 9.872008903728437e-06, + "loss": 0.1934, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 9.860879243183083e-06, + "loss": 0.1118, + "step": 911 + }, + { + "epoch": 0.51, + "learning_rate": 9.84974958263773e-06, + "loss": 0.0589, + "step": 912 + }, + { + "epoch": 0.51, + "learning_rate": 9.838619922092377e-06, + "loss": 0.1682, + "step": 913 + }, + { + "epoch": 0.51, + "learning_rate": 9.827490261547023e-06, + "loss": 0.1949, + "step": 914 + }, + { + "epoch": 0.51, + "learning_rate": 9.81636060100167e-06, + "loss": 0.0599, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 9.805230940456317e-06, + "loss": 0.1117, + "step": 916 + }, + { + "epoch": 0.51, + "learning_rate": 9.794101279910963e-06, + "loss": 0.0835, + "step": 917 + }, + { + "epoch": 0.51, + "learning_rate": 9.78297161936561e-06, + "loss": 0.1927, + "step": 918 + }, + { + "epoch": 0.51, + "learning_rate": 9.771841958820257e-06, + "loss": 0.0852, + "step": 919 + }, + { + "epoch": 0.51, + "learning_rate": 9.760712298274904e-06, + "loss": 0.1109, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 9.74958263772955e-06, + "loss": 0.0304, + "step": 921 + }, + { + "epoch": 0.51, + "learning_rate": 9.738452977184197e-06, + "loss": 0.0852, + "step": 922 + }, + { + "epoch": 0.51, + "learning_rate": 9.727323316638844e-06, + "loss": 0.2498, + "step": 923 + }, + { + "epoch": 0.51, + "learning_rate": 9.71619365609349e-06, + "loss": 0.1364, + "step": 924 + }, + { + "epoch": 0.51, + "learning_rate": 9.705063995548137e-06, + "loss": 0.2225, + "step": 925 + }, + { + "epoch": 0.52, + "learning_rate": 9.693934335002784e-06, + "loss": 0.1388, + "step": 926 + }, + { + "epoch": 0.52, + "learning_rate": 9.68280467445743e-06, + "loss": 0.0857, + "step": 927 + }, + { + "epoch": 0.52, + "learning_rate": 9.671675013912077e-06, + "loss": 0.1657, + "step": 928 + }, + { + "epoch": 0.52, + "learning_rate": 9.660545353366723e-06, + "loss": 0.1395, + "step": 929 + }, + { + "epoch": 0.52, + "learning_rate": 9.64941569282137e-06, + "loss": 0.2232, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 9.638286032276017e-06, + "loss": 0.2225, + "step": 931 + }, + { + "epoch": 0.52, + "learning_rate": 9.627156371730663e-06, + "loss": 0.1411, + "step": 932 + }, + { + "epoch": 0.52, + "learning_rate": 9.61602671118531e-06, + "loss": 0.1653, + "step": 933 + }, + { + "epoch": 0.52, + "learning_rate": 9.604897050639957e-06, + "loss": 0.1148, + "step": 934 + }, + { + "epoch": 0.52, + "learning_rate": 9.593767390094603e-06, + "loss": 0.1674, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 9.582637729549249e-06, + "loss": 0.0862, + "step": 936 + }, + { + "epoch": 0.52, + "learning_rate": 9.571508069003896e-06, + "loss": 0.1415, + "step": 937 + }, + { + "epoch": 0.52, + "learning_rate": 9.560378408458543e-06, + "loss": 0.1121, + "step": 938 + }, + { + "epoch": 0.52, + "learning_rate": 9.549248747913189e-06, + "loss": 0.1656, + "step": 939 + }, + { + "epoch": 0.52, + "learning_rate": 9.538119087367836e-06, + "loss": 0.1403, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 9.526989426822483e-06, + "loss": 0.1146, + "step": 941 + }, + { + "epoch": 0.52, + "learning_rate": 9.515859766277129e-06, + "loss": 0.0878, + "step": 942 + }, + { + "epoch": 0.52, + "learning_rate": 9.504730105731776e-06, + "loss": 0.1398, + "step": 943 + }, + { + "epoch": 0.53, + "learning_rate": 9.493600445186422e-06, + "loss": 0.0874, + "step": 944 + }, + { + "epoch": 0.53, + "learning_rate": 9.482470784641069e-06, + "loss": 0.2217, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 9.471341124095715e-06, + "loss": 0.246, + "step": 946 + }, + { + "epoch": 0.53, + "learning_rate": 9.460211463550362e-06, + "loss": 0.1135, + "step": 947 + }, + { + "epoch": 0.53, + "learning_rate": 9.449081803005009e-06, + "loss": 0.1424, + "step": 948 + }, + { + "epoch": 0.53, + "learning_rate": 9.437952142459655e-06, + "loss": 0.1637, + "step": 949 + }, + { + "epoch": 0.53, + "learning_rate": 9.426822481914302e-06, + "loss": 0.1675, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 9.415692821368949e-06, + "loss": 0.1626, + "step": 951 + }, + { + "epoch": 0.53, + "learning_rate": 9.404563160823595e-06, + "loss": 0.2147, + "step": 952 + }, + { + "epoch": 0.53, + "learning_rate": 9.393433500278242e-06, + "loss": 0.139, + "step": 953 + }, + { + "epoch": 0.53, + "learning_rate": 9.382303839732888e-06, + "loss": 0.141, + "step": 954 + }, + { + "epoch": 0.53, + "learning_rate": 9.371174179187536e-06, + "loss": 0.168, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 9.360044518642182e-06, + "loss": 0.2174, + "step": 956 + }, + { + "epoch": 0.53, + "learning_rate": 9.348914858096828e-06, + "loss": 0.2146, + "step": 957 + }, + { + "epoch": 0.53, + "learning_rate": 9.337785197551476e-06, + "loss": 0.1383, + "step": 958 + }, + { + "epoch": 0.53, + "learning_rate": 9.326655537006122e-06, + "loss": 0.1662, + "step": 959 + }, + { + "epoch": 0.53, + "learning_rate": 9.315525876460768e-06, + "loss": 0.0889, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 9.304396215915416e-06, + "loss": 0.0918, + "step": 961 + }, + { + "epoch": 0.54, + "learning_rate": 9.293266555370062e-06, + "loss": 0.1934, + "step": 962 + }, + { + "epoch": 0.54, + "learning_rate": 9.282136894824708e-06, + "loss": 0.0417, + "step": 963 + }, + { + "epoch": 0.54, + "learning_rate": 9.271007234279356e-06, + "loss": 0.1144, + "step": 964 + }, + { + "epoch": 0.54, + "learning_rate": 9.259877573734002e-06, + "loss": 0.167, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 9.248747913188648e-06, + "loss": 0.0375, + "step": 966 + }, + { + "epoch": 0.54, + "learning_rate": 9.237618252643296e-06, + "loss": 0.1693, + "step": 967 + }, + { + "epoch": 0.54, + "learning_rate": 9.226488592097942e-06, + "loss": 0.0836, + "step": 968 + }, + { + "epoch": 0.54, + "learning_rate": 9.215358931552588e-06, + "loss": 0.1919, + "step": 969 + }, + { + "epoch": 0.54, + "learning_rate": 9.204229271007234e-06, + "loss": 0.1717, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 9.193099610461882e-06, + "loss": 0.112, + "step": 971 + }, + { + "epoch": 0.54, + "learning_rate": 9.181969949916528e-06, + "loss": 0.1406, + "step": 972 + }, + { + "epoch": 0.54, + "learning_rate": 9.170840289371174e-06, + "loss": 0.169, + "step": 973 + }, + { + "epoch": 0.54, + "learning_rate": 9.159710628825822e-06, + "loss": 0.1669, + "step": 974 + }, + { + "epoch": 0.54, + "learning_rate": 9.148580968280468e-06, + "loss": 0.2477, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 9.137451307735114e-06, + "loss": 0.2218, + "step": 976 + }, + { + "epoch": 0.54, + "learning_rate": 9.126321647189762e-06, + "loss": 0.0568, + "step": 977 + }, + { + "epoch": 0.54, + "learning_rate": 9.115191986644408e-06, + "loss": 0.0835, + "step": 978 + }, + { + "epoch": 0.54, + "learning_rate": 9.104062326099054e-06, + "loss": 0.2461, + "step": 979 + }, + { + "epoch": 0.55, + "learning_rate": 9.0929326655537e-06, + "loss": 0.1433, + "step": 980 + }, + { + "epoch": 0.55, + "learning_rate": 9.081803005008348e-06, + "loss": 0.0839, + "step": 981 + }, + { + "epoch": 0.55, + "learning_rate": 9.070673344462994e-06, + "loss": 0.1391, + "step": 982 + }, + { + "epoch": 0.55, + "learning_rate": 9.05954368391764e-06, + "loss": 0.1656, + "step": 983 + }, + { + "epoch": 0.55, + "learning_rate": 9.048414023372288e-06, + "loss": 0.0831, + "step": 984 + }, + { + "epoch": 0.55, + "learning_rate": 9.037284362826934e-06, + "loss": 0.0867, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 9.02615470228158e-06, + "loss": 0.0822, + "step": 986 + }, + { + "epoch": 0.55, + "learning_rate": 9.015025041736228e-06, + "loss": 0.1385, + "step": 987 + }, + { + "epoch": 0.55, + "learning_rate": 9.003895381190874e-06, + "loss": 0.1663, + "step": 988 + }, + { + "epoch": 0.55, + "learning_rate": 8.99276572064552e-06, + "loss": 0.0877, + "step": 989 + }, + { + "epoch": 0.55, + "learning_rate": 8.981636060100168e-06, + "loss": 0.0559, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 8.970506399554814e-06, + "loss": 0.1934, + "step": 991 + }, + { + "epoch": 0.55, + "learning_rate": 8.95937673900946e-06, + "loss": 0.083, + "step": 992 + }, + { + "epoch": 0.55, + "learning_rate": 8.948247078464108e-06, + "loss": 0.1094, + "step": 993 + }, + { + "epoch": 0.55, + "learning_rate": 8.937117417918754e-06, + "loss": 0.1965, + "step": 994 + }, + { + "epoch": 0.55, + "learning_rate": 8.925987757373402e-06, + "loss": 0.1106, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 8.914858096828048e-06, + "loss": 0.1122, + "step": 996 + }, + { + "epoch": 0.55, + "learning_rate": 8.903728436282694e-06, + "loss": 0.1349, + "step": 997 + }, + { + "epoch": 0.56, + "learning_rate": 8.892598775737342e-06, + "loss": 0.1407, + "step": 998 + }, + { + "epoch": 0.56, + "learning_rate": 8.881469115191988e-06, + "loss": 0.139, + "step": 999 + }, + { + "epoch": 0.56, + "learning_rate": 8.870339454646634e-06, + "loss": 0.1942, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 8.859209794101282e-06, + "loss": 0.2223, + "step": 1001 + }, + { + "epoch": 0.56, + "learning_rate": 8.848080133555928e-06, + "loss": 0.1623, + "step": 1002 + }, + { + "epoch": 0.56, + "learning_rate": 8.836950473010574e-06, + "loss": 0.0842, + "step": 1003 + }, + { + "epoch": 0.56, + "learning_rate": 8.82582081246522e-06, + "loss": 0.0826, + "step": 1004 + }, + { + "epoch": 0.56, + "learning_rate": 8.814691151919868e-06, + "loss": 0.2264, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 8.803561491374514e-06, + "loss": 0.1665, + "step": 1006 + }, + { + "epoch": 0.56, + "learning_rate": 8.79243183082916e-06, + "loss": 0.1074, + "step": 1007 + }, + { + "epoch": 0.56, + "learning_rate": 8.781302170283808e-06, + "loss": 0.0855, + "step": 1008 + }, + { + "epoch": 0.56, + "learning_rate": 8.770172509738454e-06, + "loss": 0.1912, + "step": 1009 + }, + { + "epoch": 0.56, + "learning_rate": 8.7590428491931e-06, + "loss": 0.0869, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 8.747913188647746e-06, + "loss": 0.1385, + "step": 1011 + }, + { + "epoch": 0.56, + "learning_rate": 8.736783528102394e-06, + "loss": 0.1684, + "step": 1012 + }, + { + "epoch": 0.56, + "learning_rate": 8.72565386755704e-06, + "loss": 0.2747, + "step": 1013 + }, + { + "epoch": 0.56, + "learning_rate": 8.714524207011686e-06, + "loss": 0.1102, + "step": 1014 + }, + { + "epoch": 0.56, + "learning_rate": 8.703394546466334e-06, + "loss": 0.1103, + "step": 1015 + }, + { + "epoch": 0.57, + "learning_rate": 8.69226488592098e-06, + "loss": 0.1387, + "step": 1016 + }, + { + "epoch": 0.57, + "learning_rate": 8.681135225375626e-06, + "loss": 0.086, + "step": 1017 + }, + { + "epoch": 0.57, + "learning_rate": 8.670005564830274e-06, + "loss": 0.3244, + "step": 1018 + }, + { + "epoch": 0.57, + "learning_rate": 8.65887590428492e-06, + "loss": 0.1135, + "step": 1019 + }, + { + "epoch": 0.57, + "learning_rate": 8.647746243739566e-06, + "loss": 0.1373, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 8.636616583194212e-06, + "loss": 0.1138, + "step": 1021 + }, + { + "epoch": 0.57, + "learning_rate": 8.62548692264886e-06, + "loss": 0.1126, + "step": 1022 + }, + { + "epoch": 0.57, + "learning_rate": 8.614357262103506e-06, + "loss": 0.1386, + "step": 1023 + }, + { + "epoch": 0.57, + "learning_rate": 8.603227601558152e-06, + "loss": 0.0847, + "step": 1024 + }, + { + "epoch": 0.57, + "learning_rate": 8.5920979410128e-06, + "loss": 0.0882, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 8.580968280467446e-06, + "loss": 0.3012, + "step": 1026 + }, + { + "epoch": 0.57, + "learning_rate": 8.569838619922092e-06, + "loss": 0.1372, + "step": 1027 + }, + { + "epoch": 0.57, + "learning_rate": 8.55870895937674e-06, + "loss": 0.0596, + "step": 1028 + }, + { + "epoch": 0.57, + "learning_rate": 8.547579298831386e-06, + "loss": 0.1654, + "step": 1029 + }, + { + "epoch": 0.57, + "learning_rate": 8.536449638286034e-06, + "loss": 0.1149, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 8.52531997774068e-06, + "loss": 0.0886, + "step": 1031 + }, + { + "epoch": 0.57, + "learning_rate": 8.514190317195326e-06, + "loss": 0.1369, + "step": 1032 + }, + { + "epoch": 0.57, + "learning_rate": 8.503060656649974e-06, + "loss": 0.2194, + "step": 1033 + }, + { + "epoch": 0.58, + "learning_rate": 8.49193099610462e-06, + "loss": 0.1129, + "step": 1034 + }, + { + "epoch": 0.58, + "learning_rate": 8.480801335559266e-06, + "loss": 0.1149, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 8.469671675013914e-06, + "loss": 0.2173, + "step": 1036 + }, + { + "epoch": 0.58, + "learning_rate": 8.45854201446856e-06, + "loss": 0.0841, + "step": 1037 + }, + { + "epoch": 0.58, + "learning_rate": 8.447412353923206e-06, + "loss": 0.1399, + "step": 1038 + }, + { + "epoch": 0.58, + "learning_rate": 8.436282693377854e-06, + "loss": 0.1641, + "step": 1039 + }, + { + "epoch": 0.58, + "learning_rate": 8.4251530328325e-06, + "loss": 0.1097, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 8.414023372287146e-06, + "loss": 0.1943, + "step": 1041 + }, + { + "epoch": 0.58, + "learning_rate": 8.402893711741794e-06, + "loss": 0.1921, + "step": 1042 + }, + { + "epoch": 0.58, + "learning_rate": 8.39176405119644e-06, + "loss": 0.1372, + "step": 1043 + }, + { + "epoch": 0.58, + "learning_rate": 8.380634390651086e-06, + "loss": 0.0582, + "step": 1044 + }, + { + "epoch": 0.58, + "learning_rate": 8.369504730105732e-06, + "loss": 0.0847, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 8.35837506956038e-06, + "loss": 0.1124, + "step": 1046 + }, + { + "epoch": 0.58, + "learning_rate": 8.347245409015026e-06, + "loss": 0.1368, + "step": 1047 + }, + { + "epoch": 0.58, + "learning_rate": 8.336115748469672e-06, + "loss": 0.1105, + "step": 1048 + }, + { + "epoch": 0.58, + "learning_rate": 8.32498608792432e-06, + "loss": 0.0603, + "step": 1049 + }, + { + "epoch": 0.58, + "learning_rate": 8.313856427378966e-06, + "loss": 0.1644, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 8.302726766833612e-06, + "loss": 0.1882, + "step": 1051 + }, + { + "epoch": 0.59, + "learning_rate": 8.29159710628826e-06, + "loss": 0.1364, + "step": 1052 + }, + { + "epoch": 0.59, + "learning_rate": 8.280467445742906e-06, + "loss": 0.2753, + "step": 1053 + }, + { + "epoch": 0.59, + "learning_rate": 8.269337785197552e-06, + "loss": 0.1413, + "step": 1054 + }, + { + "epoch": 0.59, + "learning_rate": 8.258208124652198e-06, + "loss": 0.0878, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 8.247078464106846e-06, + "loss": 0.1132, + "step": 1056 + }, + { + "epoch": 0.59, + "learning_rate": 8.235948803561492e-06, + "loss": 0.0875, + "step": 1057 + }, + { + "epoch": 0.59, + "learning_rate": 8.224819143016138e-06, + "loss": 0.0593, + "step": 1058 + }, + { + "epoch": 0.59, + "learning_rate": 8.213689482470786e-06, + "loss": 0.1111, + "step": 1059 + }, + { + "epoch": 0.59, + "learning_rate": 8.202559821925432e-06, + "loss": 0.1125, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 8.191430161380078e-06, + "loss": 0.2209, + "step": 1061 + }, + { + "epoch": 0.59, + "learning_rate": 8.180300500834726e-06, + "loss": 0.1414, + "step": 1062 + }, + { + "epoch": 0.59, + "learning_rate": 8.169170840289372e-06, + "loss": 0.1656, + "step": 1063 + }, + { + "epoch": 0.59, + "learning_rate": 8.158041179744018e-06, + "loss": 0.1408, + "step": 1064 + }, + { + "epoch": 0.59, + "learning_rate": 8.146911519198665e-06, + "loss": 0.165, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 8.135781858653312e-06, + "loss": 0.1138, + "step": 1066 + }, + { + "epoch": 0.59, + "learning_rate": 8.124652198107958e-06, + "loss": 0.1912, + "step": 1067 + }, + { + "epoch": 0.59, + "learning_rate": 8.113522537562605e-06, + "loss": 0.0576, + "step": 1068 + }, + { + "epoch": 0.59, + "learning_rate": 8.102392877017252e-06, + "loss": 0.1122, + "step": 1069 + }, + { + "epoch": 0.6, + "learning_rate": 8.0912632164719e-06, + "loss": 0.0822, + "step": 1070 + }, + { + "epoch": 0.6, + "learning_rate": 8.080133555926545e-06, + "loss": 0.1682, + "step": 1071 + }, + { + "epoch": 0.6, + "learning_rate": 8.069003895381192e-06, + "loss": 0.1425, + "step": 1072 + }, + { + "epoch": 0.6, + "learning_rate": 8.05787423483584e-06, + "loss": 0.1373, + "step": 1073 + }, + { + "epoch": 0.6, + "learning_rate": 8.046744574290485e-06, + "loss": 0.1441, + "step": 1074 + }, + { + "epoch": 0.6, + "learning_rate": 8.035614913745131e-06, + "loss": 0.2467, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 8.02448525319978e-06, + "loss": 0.1671, + "step": 1076 + }, + { + "epoch": 0.6, + "learning_rate": 8.013355592654425e-06, + "loss": 0.136, + "step": 1077 + }, + { + "epoch": 0.6, + "learning_rate": 8.002225932109071e-06, + "loss": 0.0827, + "step": 1078 + }, + { + "epoch": 0.6, + "learning_rate": 7.991096271563718e-06, + "loss": 0.1673, + "step": 1079 + }, + { + "epoch": 0.6, + "learning_rate": 7.979966611018365e-06, + "loss": 0.1151, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 7.968836950473011e-06, + "loss": 0.1958, + "step": 1081 + }, + { + "epoch": 0.6, + "learning_rate": 7.957707289927657e-06, + "loss": 0.1109, + "step": 1082 + }, + { + "epoch": 0.6, + "learning_rate": 7.946577629382305e-06, + "loss": 0.1972, + "step": 1083 + }, + { + "epoch": 0.6, + "learning_rate": 7.935447968836951e-06, + "loss": 0.1124, + "step": 1084 + }, + { + "epoch": 0.6, + "learning_rate": 7.924318308291597e-06, + "loss": 0.2199, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 7.913188647746244e-06, + "loss": 0.1403, + "step": 1086 + }, + { + "epoch": 0.6, + "learning_rate": 7.902058987200891e-06, + "loss": 0.1394, + "step": 1087 + }, + { + "epoch": 0.61, + "learning_rate": 7.890929326655537e-06, + "loss": 0.1647, + "step": 1088 + }, + { + "epoch": 0.61, + "learning_rate": 7.879799666110184e-06, + "loss": 0.1666, + "step": 1089 + }, + { + "epoch": 0.61, + "learning_rate": 7.868670005564831e-06, + "loss": 0.1684, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 7.857540345019477e-06, + "loss": 0.1887, + "step": 1091 + }, + { + "epoch": 0.61, + "learning_rate": 7.846410684474123e-06, + "loss": 0.0879, + "step": 1092 + }, + { + "epoch": 0.61, + "learning_rate": 7.835281023928771e-06, + "loss": 0.1133, + "step": 1093 + }, + { + "epoch": 0.61, + "learning_rate": 7.824151363383417e-06, + "loss": 0.192, + "step": 1094 + }, + { + "epoch": 0.61, + "learning_rate": 7.813021702838063e-06, + "loss": 0.0859, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 7.80189204229271e-06, + "loss": 0.1672, + "step": 1096 + }, + { + "epoch": 0.61, + "learning_rate": 7.790762381747357e-06, + "loss": 0.0367, + "step": 1097 + }, + { + "epoch": 0.61, + "learning_rate": 7.779632721202003e-06, + "loss": 0.0874, + "step": 1098 + }, + { + "epoch": 0.61, + "learning_rate": 7.76850306065665e-06, + "loss": 0.2145, + "step": 1099 + }, + { + "epoch": 0.61, + "learning_rate": 7.757373400111297e-06, + "loss": 0.2679, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 7.746243739565943e-06, + "loss": 0.1161, + "step": 1101 + }, + { + "epoch": 0.61, + "learning_rate": 7.73511407902059e-06, + "loss": 0.116, + "step": 1102 + }, + { + "epoch": 0.61, + "learning_rate": 7.723984418475237e-06, + "loss": 0.0875, + "step": 1103 + }, + { + "epoch": 0.61, + "learning_rate": 7.712854757929883e-06, + "loss": 0.1121, + "step": 1104 + }, + { + "epoch": 0.61, + "learning_rate": 7.701725097384531e-06, + "loss": 0.166, + "step": 1105 + }, + { + "epoch": 0.62, + "learning_rate": 7.690595436839177e-06, + "loss": 0.246, + "step": 1106 + }, + { + "epoch": 0.62, + "learning_rate": 7.679465776293823e-06, + "loss": 0.0863, + "step": 1107 + }, + { + "epoch": 0.62, + "learning_rate": 7.668336115748471e-06, + "loss": 0.1894, + "step": 1108 + }, + { + "epoch": 0.62, + "learning_rate": 7.657206455203117e-06, + "loss": 0.0866, + "step": 1109 + }, + { + "epoch": 0.62, + "learning_rate": 7.646076794657763e-06, + "loss": 0.1342, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 7.634947134112411e-06, + "loss": 0.1647, + "step": 1111 + }, + { + "epoch": 0.62, + "learning_rate": 7.623817473567056e-06, + "loss": 0.1148, + "step": 1112 + }, + { + "epoch": 0.62, + "learning_rate": 7.612687813021703e-06, + "loss": 0.1124, + "step": 1113 + }, + { + "epoch": 0.62, + "learning_rate": 7.60155815247635e-06, + "loss": 0.0868, + "step": 1114 + }, + { + "epoch": 0.62, + "learning_rate": 7.590428491930997e-06, + "loss": 0.1924, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 7.579298831385643e-06, + "loss": 0.1977, + "step": 1116 + }, + { + "epoch": 0.62, + "learning_rate": 7.56816917084029e-06, + "loss": 0.1645, + "step": 1117 + }, + { + "epoch": 0.62, + "learning_rate": 7.557039510294937e-06, + "loss": 0.0883, + "step": 1118 + }, + { + "epoch": 0.62, + "learning_rate": 7.545909849749583e-06, + "loss": 0.1668, + "step": 1119 + }, + { + "epoch": 0.62, + "learning_rate": 7.534780189204229e-06, + "loss": 0.1142, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 7.523650528658877e-06, + "loss": 0.1635, + "step": 1121 + }, + { + "epoch": 0.62, + "learning_rate": 7.512520868113523e-06, + "loss": 0.1392, + "step": 1122 + }, + { + "epoch": 0.62, + "learning_rate": 7.501391207568169e-06, + "loss": 0.1137, + "step": 1123 + }, + { + "epoch": 0.63, + "learning_rate": 7.490261547022817e-06, + "loss": 0.1653, + "step": 1124 + }, + { + "epoch": 0.63, + "learning_rate": 7.479131886477463e-06, + "loss": 0.113, + "step": 1125 + }, + { + "epoch": 0.63, + "learning_rate": 7.468002225932109e-06, + "loss": 0.0842, + "step": 1126 + }, + { + "epoch": 0.63, + "learning_rate": 7.456872565386757e-06, + "loss": 0.0862, + "step": 1127 + }, + { + "epoch": 0.63, + "learning_rate": 7.445742904841403e-06, + "loss": 0.2214, + "step": 1128 + }, + { + "epoch": 0.63, + "learning_rate": 7.434613244296049e-06, + "loss": 0.2465, + "step": 1129 + }, + { + "epoch": 0.63, + "learning_rate": 7.423483583750696e-06, + "loss": 0.1372, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 7.412353923205343e-06, + "loss": 0.1944, + "step": 1131 + }, + { + "epoch": 0.63, + "learning_rate": 7.401224262659989e-06, + "loss": 0.1104, + "step": 1132 + }, + { + "epoch": 0.63, + "learning_rate": 7.390094602114636e-06, + "loss": 0.2434, + "step": 1133 + }, + { + "epoch": 0.63, + "learning_rate": 7.378964941569283e-06, + "loss": 0.0862, + "step": 1134 + }, + { + "epoch": 0.63, + "learning_rate": 7.36783528102393e-06, + "loss": 0.0329, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 7.356705620478576e-06, + "loss": 0.2433, + "step": 1136 + }, + { + "epoch": 0.63, + "learning_rate": 7.345575959933222e-06, + "loss": 0.1924, + "step": 1137 + }, + { + "epoch": 0.63, + "learning_rate": 7.33444629938787e-06, + "loss": 0.1628, + "step": 1138 + }, + { + "epoch": 0.63, + "learning_rate": 7.323316638842516e-06, + "loss": 0.1131, + "step": 1139 + }, + { + "epoch": 0.63, + "learning_rate": 7.312186978297162e-06, + "loss": 0.0857, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 7.30105731775181e-06, + "loss": 0.1393, + "step": 1141 + }, + { + "epoch": 0.64, + "learning_rate": 7.289927657206456e-06, + "loss": 0.1405, + "step": 1142 + }, + { + "epoch": 0.64, + "learning_rate": 7.278797996661102e-06, + "loss": 0.1867, + "step": 1143 + }, + { + "epoch": 0.64, + "learning_rate": 7.267668336115748e-06, + "loss": 0.2473, + "step": 1144 + }, + { + "epoch": 0.64, + "learning_rate": 7.256538675570396e-06, + "loss": 0.1122, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 7.245409015025042e-06, + "loss": 0.1079, + "step": 1146 + }, + { + "epoch": 0.64, + "learning_rate": 7.234279354479689e-06, + "loss": 0.1655, + "step": 1147 + }, + { + "epoch": 0.64, + "learning_rate": 7.223149693934336e-06, + "loss": 0.2445, + "step": 1148 + }, + { + "epoch": 0.64, + "learning_rate": 7.212020033388982e-06, + "loss": 0.142, + "step": 1149 + }, + { + "epoch": 0.64, + "learning_rate": 7.200890372843629e-06, + "loss": 0.0879, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 7.189760712298276e-06, + "loss": 0.1897, + "step": 1151 + }, + { + "epoch": 0.64, + "learning_rate": 7.178631051752922e-06, + "loss": 0.1628, + "step": 1152 + }, + { + "epoch": 0.64, + "learning_rate": 7.167501391207569e-06, + "loss": 0.062, + "step": 1153 + }, + { + "epoch": 0.64, + "learning_rate": 7.156371730662215e-06, + "loss": 0.0889, + "step": 1154 + }, + { + "epoch": 0.64, + "learning_rate": 7.145242070116863e-06, + "loss": 0.2473, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 7.134112409571509e-06, + "loss": 0.0609, + "step": 1156 + }, + { + "epoch": 0.64, + "learning_rate": 7.122982749026155e-06, + "loss": 0.0874, + "step": 1157 + }, + { + "epoch": 0.64, + "learning_rate": 7.111853088480803e-06, + "loss": 0.0345, + "step": 1158 + }, + { + "epoch": 0.64, + "learning_rate": 7.100723427935449e-06, + "loss": 0.0853, + "step": 1159 + }, + { + "epoch": 0.65, + "learning_rate": 7.089593767390095e-06, + "loss": 0.1126, + "step": 1160 + }, + { + "epoch": 0.65, + "learning_rate": 7.078464106844741e-06, + "loss": 0.059, + "step": 1161 + }, + { + "epoch": 0.65, + "learning_rate": 7.067334446299389e-06, + "loss": 0.1948, + "step": 1162 + }, + { + "epoch": 0.65, + "learning_rate": 7.056204785754035e-06, + "loss": 0.2422, + "step": 1163 + }, + { + "epoch": 0.65, + "learning_rate": 7.045075125208681e-06, + "loss": 0.0828, + "step": 1164 + }, + { + "epoch": 0.65, + "learning_rate": 7.033945464663329e-06, + "loss": 0.0849, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 7.022815804117975e-06, + "loss": 0.1383, + "step": 1166 + }, + { + "epoch": 0.65, + "learning_rate": 7.011686143572622e-06, + "loss": 0.2755, + "step": 1167 + }, + { + "epoch": 0.65, + "learning_rate": 7.000556483027269e-06, + "loss": 0.1105, + "step": 1168 + }, + { + "epoch": 0.65, + "learning_rate": 6.989426822481915e-06, + "loss": 0.2186, + "step": 1169 + }, + { + "epoch": 0.65, + "learning_rate": 6.978297161936562e-06, + "loss": 0.1672, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 6.967167501391208e-06, + "loss": 0.2211, + "step": 1171 + }, + { + "epoch": 0.65, + "learning_rate": 6.956037840845855e-06, + "loss": 0.109, + "step": 1172 + }, + { + "epoch": 0.65, + "learning_rate": 6.944908180300502e-06, + "loss": 0.1695, + "step": 1173 + }, + { + "epoch": 0.65, + "learning_rate": 6.933778519755148e-06, + "loss": 0.1129, + "step": 1174 + }, + { + "epoch": 0.65, + "learning_rate": 6.922648859209795e-06, + "loss": 0.0583, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 6.911519198664442e-06, + "loss": 0.1389, + "step": 1176 + }, + { + "epoch": 0.65, + "learning_rate": 6.900389538119088e-06, + "loss": 0.2199, + "step": 1177 + }, + { + "epoch": 0.66, + "learning_rate": 6.889259877573734e-06, + "loss": 0.0844, + "step": 1178 + }, + { + "epoch": 0.66, + "learning_rate": 6.878130217028382e-06, + "loss": 0.1431, + "step": 1179 + }, + { + "epoch": 0.66, + "learning_rate": 6.867000556483028e-06, + "loss": 0.0562, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 6.855870895937674e-06, + "loss": 0.1665, + "step": 1181 + }, + { + "epoch": 0.66, + "learning_rate": 6.844741235392322e-06, + "loss": 0.2231, + "step": 1182 + }, + { + "epoch": 0.66, + "learning_rate": 6.833611574846968e-06, + "loss": 0.059, + "step": 1183 + }, + { + "epoch": 0.66, + "learning_rate": 6.822481914301614e-06, + "loss": 0.1103, + "step": 1184 + }, + { + "epoch": 0.66, + "learning_rate": 6.8113522537562615e-06, + "loss": 0.1646, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 6.800222593210908e-06, + "loss": 0.138, + "step": 1186 + }, + { + "epoch": 0.66, + "learning_rate": 6.789092932665554e-06, + "loss": 0.0306, + "step": 1187 + }, + { + "epoch": 0.66, + "learning_rate": 6.777963272120201e-06, + "loss": 0.1911, + "step": 1188 + }, + { + "epoch": 0.66, + "learning_rate": 6.766833611574848e-06, + "loss": 0.1127, + "step": 1189 + }, + { + "epoch": 0.66, + "learning_rate": 6.7557039510294945e-06, + "loss": 0.195, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 6.744574290484141e-06, + "loss": 0.1644, + "step": 1191 + }, + { + "epoch": 0.66, + "learning_rate": 6.7334446299387876e-06, + "loss": 0.1639, + "step": 1192 + }, + { + "epoch": 0.66, + "learning_rate": 6.7223149693934345e-06, + "loss": 0.1387, + "step": 1193 + }, + { + "epoch": 0.66, + "learning_rate": 6.711185308848081e-06, + "loss": 0.1121, + "step": 1194 + }, + { + "epoch": 0.66, + "learning_rate": 6.700055648302727e-06, + "loss": 0.1375, + "step": 1195 + }, + { + "epoch": 0.67, + "learning_rate": 6.6889259877573745e-06, + "loss": 0.1657, + "step": 1196 + }, + { + "epoch": 0.67, + "learning_rate": 6.6777963272120206e-06, + "loss": 0.1426, + "step": 1197 + }, + { + "epoch": 0.67, + "learning_rate": 6.666666666666667e-06, + "loss": 0.1107, + "step": 1198 + }, + { + "epoch": 0.67, + "learning_rate": 6.6555370061213144e-06, + "loss": 0.1418, + "step": 1199 + }, + { + "epoch": 0.67, + "learning_rate": 6.6444073455759605e-06, + "loss": 0.0877, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 6.633277685030607e-06, + "loss": 0.1364, + "step": 1201 + }, + { + "epoch": 0.67, + "learning_rate": 6.622148024485254e-06, + "loss": 0.137, + "step": 1202 + }, + { + "epoch": 0.67, + "learning_rate": 6.6110183639399005e-06, + "loss": 0.1967, + "step": 1203 + }, + { + "epoch": 0.67, + "learning_rate": 6.599888703394547e-06, + "loss": 0.1967, + "step": 1204 + }, + { + "epoch": 0.67, + "learning_rate": 6.5887590428491935e-06, + "loss": 0.1146, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 6.5776293823038405e-06, + "loss": 0.2453, + "step": 1206 + }, + { + "epoch": 0.67, + "learning_rate": 6.5664997217584865e-06, + "loss": 0.1641, + "step": 1207 + }, + { + "epoch": 0.67, + "learning_rate": 6.5553700612131335e-06, + "loss": 0.1156, + "step": 1208 + }, + { + "epoch": 0.67, + "learning_rate": 6.54424040066778e-06, + "loss": 0.1644, + "step": 1209 + }, + { + "epoch": 0.67, + "learning_rate": 6.533110740122427e-06, + "loss": 0.1675, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 6.5219810795770734e-06, + "loss": 0.2192, + "step": 1211 + }, + { + "epoch": 0.67, + "learning_rate": 6.5108514190317195e-06, + "loss": 0.0607, + "step": 1212 + }, + { + "epoch": 0.68, + "learning_rate": 6.499721758486367e-06, + "loss": 0.0866, + "step": 1213 + }, + { + "epoch": 0.68, + "learning_rate": 6.488592097941013e-06, + "loss": 0.14, + "step": 1214 + }, + { + "epoch": 0.68, + "learning_rate": 6.4774624373956595e-06, + "loss": 0.1416, + "step": 1215 + }, + { + "epoch": 0.68, + "learning_rate": 6.466332776850307e-06, + "loss": 0.1385, + "step": 1216 + }, + { + "epoch": 0.68, + "learning_rate": 6.455203116304953e-06, + "loss": 0.0839, + "step": 1217 + }, + { + "epoch": 0.68, + "learning_rate": 6.4440734557595995e-06, + "loss": 0.1364, + "step": 1218 + }, + { + "epoch": 0.68, + "learning_rate": 6.4329437952142456e-06, + "loss": 0.1634, + "step": 1219 + }, + { + "epoch": 0.68, + "learning_rate": 6.421814134668893e-06, + "loss": 0.1392, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 6.4106844741235394e-06, + "loss": 0.1638, + "step": 1221 + }, + { + "epoch": 0.68, + "learning_rate": 6.399554813578186e-06, + "loss": 0.0598, + "step": 1222 + }, + { + "epoch": 0.68, + "learning_rate": 6.388425153032833e-06, + "loss": 0.1131, + "step": 1223 + }, + { + "epoch": 0.68, + "learning_rate": 6.377295492487479e-06, + "loss": 0.1947, + "step": 1224 + }, + { + "epoch": 0.68, + "learning_rate": 6.366165831942126e-06, + "loss": 0.1681, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 6.355036171396773e-06, + "loss": 0.2431, + "step": 1226 + }, + { + "epoch": 0.68, + "learning_rate": 6.343906510851419e-06, + "loss": 0.085, + "step": 1227 + }, + { + "epoch": 0.68, + "learning_rate": 6.332776850306066e-06, + "loss": 0.0873, + "step": 1228 + }, + { + "epoch": 0.68, + "learning_rate": 6.321647189760712e-06, + "loss": 0.2165, + "step": 1229 + }, + { + "epoch": 0.68, + "learning_rate": 6.31051752921536e-06, + "loss": 0.1118, + "step": 1230 + }, + { + "epoch": 0.69, + "learning_rate": 6.299387868670006e-06, + "loss": 0.2451, + "step": 1231 + }, + { + "epoch": 0.69, + "learning_rate": 6.288258208124652e-06, + "loss": 0.1424, + "step": 1232 + }, + { + "epoch": 0.69, + "learning_rate": 6.2771285475793e-06, + "loss": 0.0862, + "step": 1233 + }, + { + "epoch": 0.69, + "learning_rate": 6.265998887033946e-06, + "loss": 0.1163, + "step": 1234 + }, + { + "epoch": 0.69, + "learning_rate": 6.254869226488592e-06, + "loss": 0.1659, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 6.243739565943238e-06, + "loss": 0.1419, + "step": 1236 + }, + { + "epoch": 0.69, + "learning_rate": 6.232609905397886e-06, + "loss": 0.1397, + "step": 1237 + }, + { + "epoch": 0.69, + "learning_rate": 6.221480244852532e-06, + "loss": 0.1379, + "step": 1238 + }, + { + "epoch": 0.69, + "learning_rate": 6.210350584307178e-06, + "loss": 0.0601, + "step": 1239 + }, + { + "epoch": 0.69, + "learning_rate": 6.199220923761826e-06, + "loss": 0.1911, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 6.188091263216472e-06, + "loss": 0.1382, + "step": 1241 + }, + { + "epoch": 0.69, + "learning_rate": 6.176961602671119e-06, + "loss": 0.19, + "step": 1242 + }, + { + "epoch": 0.69, + "learning_rate": 6.165831942125766e-06, + "loss": 0.1116, + "step": 1243 + }, + { + "epoch": 0.69, + "learning_rate": 6.154702281580412e-06, + "loss": 0.242, + "step": 1244 + }, + { + "epoch": 0.69, + "learning_rate": 6.143572621035059e-06, + "loss": 0.1095, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 6.132442960489705e-06, + "loss": 0.1138, + "step": 1246 + }, + { + "epoch": 0.69, + "learning_rate": 6.121313299944352e-06, + "loss": 0.0875, + "step": 1247 + }, + { + "epoch": 0.69, + "learning_rate": 6.110183639398999e-06, + "loss": 0.0853, + "step": 1248 + }, + { + "epoch": 0.7, + "learning_rate": 6.099053978853645e-06, + "loss": 0.088, + "step": 1249 + }, + { + "epoch": 0.7, + "learning_rate": 6.087924318308292e-06, + "loss": 0.1678, + "step": 1250 + }, + { + "epoch": 0.7, + "learning_rate": 6.076794657762939e-06, + "loss": 0.1121, + "step": 1251 + }, + { + "epoch": 0.7, + "learning_rate": 6.065664997217585e-06, + "loss": 0.1103, + "step": 1252 + }, + { + "epoch": 0.7, + "learning_rate": 6.054535336672231e-06, + "loss": 0.1889, + "step": 1253 + }, + { + "epoch": 0.7, + "learning_rate": 6.043405676126879e-06, + "loss": 0.17, + "step": 1254 + }, + { + "epoch": 0.7, + "learning_rate": 6.032276015581525e-06, + "loss": 0.1373, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 6.021146355036171e-06, + "loss": 0.1405, + "step": 1256 + }, + { + "epoch": 0.7, + "learning_rate": 6.010016694490819e-06, + "loss": 0.2498, + "step": 1257 + }, + { + "epoch": 0.7, + "learning_rate": 5.998887033945465e-06, + "loss": 0.1388, + "step": 1258 + }, + { + "epoch": 0.7, + "learning_rate": 5.987757373400111e-06, + "loss": 0.166, + "step": 1259 + }, + { + "epoch": 0.7, + "learning_rate": 5.976627712854759e-06, + "loss": 0.0592, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 5.965498052309405e-06, + "loss": 0.1693, + "step": 1261 + }, + { + "epoch": 0.7, + "learning_rate": 5.954368391764051e-06, + "loss": 0.1891, + "step": 1262 + }, + { + "epoch": 0.7, + "learning_rate": 5.943238731218698e-06, + "loss": 0.2735, + "step": 1263 + }, + { + "epoch": 0.7, + "learning_rate": 5.932109070673345e-06, + "loss": 0.1135, + "step": 1264 + }, + { + "epoch": 0.7, + "learning_rate": 5.920979410127992e-06, + "loss": 0.0612, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 5.909849749582638e-06, + "loss": 0.1154, + "step": 1266 + }, + { + "epoch": 0.71, + "learning_rate": 5.898720089037285e-06, + "loss": 0.1648, + "step": 1267 + }, + { + "epoch": 0.71, + "learning_rate": 5.887590428491932e-06, + "loss": 0.1383, + "step": 1268 + }, + { + "epoch": 0.71, + "learning_rate": 5.876460767946578e-06, + "loss": 0.1666, + "step": 1269 + }, + { + "epoch": 0.71, + "learning_rate": 5.865331107401224e-06, + "loss": 0.1133, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 5.854201446855872e-06, + "loss": 0.1657, + "step": 1271 + }, + { + "epoch": 0.71, + "learning_rate": 5.843071786310518e-06, + "loss": 0.1396, + "step": 1272 + }, + { + "epoch": 0.71, + "learning_rate": 5.831942125765164e-06, + "loss": 0.0854, + "step": 1273 + }, + { + "epoch": 0.71, + "learning_rate": 5.820812465219812e-06, + "loss": 0.113, + "step": 1274 + }, + { + "epoch": 0.71, + "learning_rate": 5.809682804674458e-06, + "loss": 0.2173, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 5.798553144129104e-06, + "loss": 0.1913, + "step": 1276 + }, + { + "epoch": 0.71, + "learning_rate": 5.787423483583752e-06, + "loss": 0.1162, + "step": 1277 + }, + { + "epoch": 0.71, + "learning_rate": 5.776293823038398e-06, + "loss": 0.1626, + "step": 1278 + }, + { + "epoch": 0.71, + "learning_rate": 5.765164162493044e-06, + "loss": 0.2201, + "step": 1279 + }, + { + "epoch": 0.71, + "learning_rate": 5.754034501947691e-06, + "loss": 0.0852, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 5.742904841402338e-06, + "loss": 0.0339, + "step": 1281 + }, + { + "epoch": 0.71, + "learning_rate": 5.731775180856984e-06, + "loss": 0.1148, + "step": 1282 + }, + { + "epoch": 0.71, + "learning_rate": 5.720645520311631e-06, + "loss": 0.1122, + "step": 1283 + }, + { + "epoch": 0.71, + "learning_rate": 5.709515859766278e-06, + "loss": 0.1152, + "step": 1284 + }, + { + "epoch": 0.72, + "learning_rate": 5.698386199220925e-06, + "loss": 0.0856, + "step": 1285 + }, + { + "epoch": 0.72, + "learning_rate": 5.687256538675571e-06, + "loss": 0.1662, + "step": 1286 + }, + { + "epoch": 0.72, + "learning_rate": 5.676126878130217e-06, + "loss": 0.1927, + "step": 1287 + }, + { + "epoch": 0.72, + "learning_rate": 5.664997217584865e-06, + "loss": 0.0852, + "step": 1288 + }, + { + "epoch": 0.72, + "learning_rate": 5.653867557039511e-06, + "loss": 0.1421, + "step": 1289 + }, + { + "epoch": 0.72, + "learning_rate": 5.642737896494157e-06, + "loss": 0.1126, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 5.631608235948805e-06, + "loss": 0.1938, + "step": 1291 + }, + { + "epoch": 0.72, + "learning_rate": 5.620478575403451e-06, + "loss": 0.1392, + "step": 1292 + }, + { + "epoch": 0.72, + "learning_rate": 5.609348914858097e-06, + "loss": 0.2211, + "step": 1293 + }, + { + "epoch": 0.72, + "learning_rate": 5.598219254312743e-06, + "loss": 0.0318, + "step": 1294 + }, + { + "epoch": 0.72, + "learning_rate": 5.587089593767391e-06, + "loss": 0.0583, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 5.575959933222037e-06, + "loss": 0.2215, + "step": 1296 + }, + { + "epoch": 0.72, + "learning_rate": 5.564830272676684e-06, + "loss": 0.1402, + "step": 1297 + }, + { + "epoch": 0.72, + "learning_rate": 5.553700612131331e-06, + "loss": 0.1423, + "step": 1298 + }, + { + "epoch": 0.72, + "learning_rate": 5.542570951585977e-06, + "loss": 0.1909, + "step": 1299 + }, + { + "epoch": 0.72, + "learning_rate": 5.531441291040624e-06, + "loss": 0.1672, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 5.520311630495271e-06, + "loss": 0.0312, + "step": 1301 + }, + { + "epoch": 0.72, + "learning_rate": 5.509181969949917e-06, + "loss": 0.0857, + "step": 1302 + }, + { + "epoch": 0.73, + "learning_rate": 5.498052309404564e-06, + "loss": 0.1098, + "step": 1303 + }, + { + "epoch": 0.73, + "learning_rate": 5.48692264885921e-06, + "loss": 0.0833, + "step": 1304 + }, + { + "epoch": 0.73, + "learning_rate": 5.475792988313858e-06, + "loss": 0.1348, + "step": 1305 + }, + { + "epoch": 0.73, + "learning_rate": 5.464663327768504e-06, + "loss": 0.1106, + "step": 1306 + }, + { + "epoch": 0.73, + "learning_rate": 5.45353366722315e-06, + "loss": 0.1403, + "step": 1307 + }, + { + "epoch": 0.73, + "learning_rate": 5.4424040066777976e-06, + "loss": 0.2202, + "step": 1308 + }, + { + "epoch": 0.73, + "learning_rate": 5.431274346132444e-06, + "loss": 0.1674, + "step": 1309 + }, + { + "epoch": 0.73, + "learning_rate": 5.42014468558709e-06, + "loss": 0.1667, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 5.409015025041736e-06, + "loss": 0.1115, + "step": 1311 + }, + { + "epoch": 0.73, + "learning_rate": 5.397885364496384e-06, + "loss": 0.2246, + "step": 1312 + }, + { + "epoch": 0.73, + "learning_rate": 5.38675570395103e-06, + "loss": 0.1943, + "step": 1313 + }, + { + "epoch": 0.73, + "learning_rate": 5.375626043405676e-06, + "loss": 0.1964, + "step": 1314 + }, + { + "epoch": 0.73, + "learning_rate": 5.364496382860324e-06, + "loss": 0.0837, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 5.35336672231497e-06, + "loss": 0.0875, + "step": 1316 + }, + { + "epoch": 0.73, + "learning_rate": 5.342237061769617e-06, + "loss": 0.1886, + "step": 1317 + }, + { + "epoch": 0.73, + "learning_rate": 5.3311074012242636e-06, + "loss": 0.1664, + "step": 1318 + }, + { + "epoch": 0.73, + "learning_rate": 5.31997774067891e-06, + "loss": 0.1664, + "step": 1319 + }, + { + "epoch": 0.73, + "learning_rate": 5.308848080133557e-06, + "loss": 0.1653, + "step": 1320 + }, + { + "epoch": 0.74, + "learning_rate": 5.297718419588203e-06, + "loss": 0.1132, + "step": 1321 + }, + { + "epoch": 0.74, + "learning_rate": 5.28658875904285e-06, + "loss": 0.1127, + "step": 1322 + }, + { + "epoch": 0.74, + "learning_rate": 5.2754590984974965e-06, + "loss": 0.2193, + "step": 1323 + }, + { + "epoch": 0.74, + "learning_rate": 5.264329437952143e-06, + "loss": 0.0855, + "step": 1324 + }, + { + "epoch": 0.74, + "learning_rate": 5.2531997774067896e-06, + "loss": 0.117, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 5.2420701168614365e-06, + "loss": 0.0342, + "step": 1326 + }, + { + "epoch": 0.74, + "learning_rate": 5.230940456316083e-06, + "loss": 0.1384, + "step": 1327 + }, + { + "epoch": 0.74, + "learning_rate": 5.219810795770729e-06, + "loss": 0.1125, + "step": 1328 + }, + { + "epoch": 0.74, + "learning_rate": 5.2086811352253765e-06, + "loss": 0.2211, + "step": 1329 + }, + { + "epoch": 0.74, + "learning_rate": 5.1975514746800226e-06, + "loss": 0.1123, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 5.186421814134669e-06, + "loss": 0.1421, + "step": 1331 + }, + { + "epoch": 0.74, + "learning_rate": 5.1752921535893164e-06, + "loss": 0.1402, + "step": 1332 + }, + { + "epoch": 0.74, + "learning_rate": 5.1641624930439625e-06, + "loss": 0.1659, + "step": 1333 + }, + { + "epoch": 0.74, + "learning_rate": 5.153032832498609e-06, + "loss": 0.1933, + "step": 1334 + }, + { + "epoch": 0.74, + "learning_rate": 5.141903171953256e-06, + "loss": 0.1381, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 5.1307735114079025e-06, + "loss": 0.1133, + "step": 1336 + }, + { + "epoch": 0.74, + "learning_rate": 5.119643850862549e-06, + "loss": 0.2489, + "step": 1337 + }, + { + "epoch": 0.74, + "learning_rate": 5.1085141903171955e-06, + "loss": 0.1691, + "step": 1338 + }, + { + "epoch": 0.75, + "learning_rate": 5.0973845297718425e-06, + "loss": 0.1644, + "step": 1339 + }, + { + "epoch": 0.75, + "learning_rate": 5.086254869226489e-06, + "loss": 0.1115, + "step": 1340 + }, + { + "epoch": 0.75, + "learning_rate": 5.0751252086811355e-06, + "loss": 0.2506, + "step": 1341 + }, + { + "epoch": 0.75, + "learning_rate": 5.0639955481357824e-06, + "loss": 0.2209, + "step": 1342 + }, + { + "epoch": 0.75, + "learning_rate": 5.052865887590429e-06, + "loss": 0.2452, + "step": 1343 + }, + { + "epoch": 0.75, + "learning_rate": 5.0417362270450755e-06, + "loss": 0.0906, + "step": 1344 + }, + { + "epoch": 0.75, + "learning_rate": 5.0306065664997215e-06, + "loss": 0.1112, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 5.019476905954369e-06, + "loss": 0.1683, + "step": 1346 + }, + { + "epoch": 0.75, + "learning_rate": 5.008347245409015e-06, + "loss": 0.062, + "step": 1347 + }, + { + "epoch": 0.75, + "learning_rate": 4.997217584863662e-06, + "loss": 0.0617, + "step": 1348 + }, + { + "epoch": 0.75, + "learning_rate": 4.9860879243183084e-06, + "loss": 0.1653, + "step": 1349 + }, + { + "epoch": 0.75, + "learning_rate": 4.974958263772955e-06, + "loss": 0.1896, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.9638286032276015e-06, + "loss": 0.1626, + "step": 1351 + }, + { + "epoch": 0.75, + "learning_rate": 4.952698942682248e-06, + "loss": 0.114, + "step": 1352 + }, + { + "epoch": 0.75, + "learning_rate": 4.941569282136895e-06, + "loss": 0.0624, + "step": 1353 + }, + { + "epoch": 0.75, + "learning_rate": 4.9304396215915414e-06, + "loss": 0.0607, + "step": 1354 + }, + { + "epoch": 0.75, + "learning_rate": 4.919309961046188e-06, + "loss": 0.0872, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.908180300500835e-06, + "loss": 0.0869, + "step": 1356 + }, + { + "epoch": 0.76, + "learning_rate": 4.897050639955481e-06, + "loss": 0.085, + "step": 1357 + }, + { + "epoch": 0.76, + "learning_rate": 4.885920979410128e-06, + "loss": 0.1136, + "step": 1358 + }, + { + "epoch": 0.76, + "learning_rate": 4.874791318864775e-06, + "loss": 0.0331, + "step": 1359 + }, + { + "epoch": 0.76, + "learning_rate": 4.863661658319422e-06, + "loss": 0.1714, + "step": 1360 + }, + { + "epoch": 0.76, + "learning_rate": 4.852531997774068e-06, + "loss": 0.1622, + "step": 1361 + }, + { + "epoch": 0.76, + "learning_rate": 4.841402337228715e-06, + "loss": 0.111, + "step": 1362 + }, + { + "epoch": 0.76, + "learning_rate": 4.830272676683361e-06, + "loss": 0.1097, + "step": 1363 + }, + { + "epoch": 0.76, + "learning_rate": 4.819143016138008e-06, + "loss": 0.1661, + "step": 1364 + }, + { + "epoch": 0.76, + "learning_rate": 4.808013355592655e-06, + "loss": 0.0845, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.796883695047301e-06, + "loss": 0.0566, + "step": 1366 + }, + { + "epoch": 0.76, + "learning_rate": 4.785754034501948e-06, + "loss": 0.1385, + "step": 1367 + }, + { + "epoch": 0.76, + "learning_rate": 4.774624373956594e-06, + "loss": 0.1133, + "step": 1368 + }, + { + "epoch": 0.76, + "learning_rate": 4.763494713411241e-06, + "loss": 0.195, + "step": 1369 + }, + { + "epoch": 0.76, + "learning_rate": 4.752365052865888e-06, + "loss": 0.1392, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.741235392320534e-06, + "loss": 0.1677, + "step": 1371 + }, + { + "epoch": 0.76, + "learning_rate": 4.730105731775181e-06, + "loss": 0.1388, + "step": 1372 + }, + { + "epoch": 0.76, + "learning_rate": 4.718976071229827e-06, + "loss": 0.1109, + "step": 1373 + }, + { + "epoch": 0.76, + "learning_rate": 4.707846410684474e-06, + "loss": 0.0832, + "step": 1374 + }, + { + "epoch": 0.77, + "learning_rate": 4.696716750139121e-06, + "loss": 0.14, + "step": 1375 + }, + { + "epoch": 0.77, + "learning_rate": 4.685587089593768e-06, + "loss": 0.2526, + "step": 1376 + }, + { + "epoch": 0.77, + "learning_rate": 4.674457429048414e-06, + "loss": 0.2545, + "step": 1377 + }, + { + "epoch": 0.77, + "learning_rate": 4.663327768503061e-06, + "loss": 0.1376, + "step": 1378 + }, + { + "epoch": 0.77, + "learning_rate": 4.652198107957708e-06, + "loss": 0.1387, + "step": 1379 + }, + { + "epoch": 0.77, + "learning_rate": 4.641068447412354e-06, + "loss": 0.2471, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.629938786867001e-06, + "loss": 0.0844, + "step": 1381 + }, + { + "epoch": 0.77, + "learning_rate": 4.618809126321648e-06, + "loss": 0.1385, + "step": 1382 + }, + { + "epoch": 0.77, + "learning_rate": 4.607679465776294e-06, + "loss": 0.1656, + "step": 1383 + }, + { + "epoch": 0.77, + "learning_rate": 4.596549805230941e-06, + "loss": 0.1151, + "step": 1384 + }, + { + "epoch": 0.77, + "learning_rate": 4.585420144685587e-06, + "loss": 0.1964, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.574290484140234e-06, + "loss": 0.111, + "step": 1386 + }, + { + "epoch": 0.77, + "learning_rate": 4.563160823594881e-06, + "loss": 0.1949, + "step": 1387 + }, + { + "epoch": 0.77, + "learning_rate": 4.552031163049527e-06, + "loss": 0.0852, + "step": 1388 + }, + { + "epoch": 0.77, + "learning_rate": 4.540901502504174e-06, + "loss": 0.1105, + "step": 1389 + }, + { + "epoch": 0.77, + "learning_rate": 4.52977184195882e-06, + "loss": 0.1909, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.518642181413467e-06, + "loss": 0.1384, + "step": 1391 + }, + { + "epoch": 0.77, + "learning_rate": 4.507512520868114e-06, + "loss": 0.1375, + "step": 1392 + }, + { + "epoch": 0.78, + "learning_rate": 4.49638286032276e-06, + "loss": 0.0863, + "step": 1393 + }, + { + "epoch": 0.78, + "learning_rate": 4.485253199777407e-06, + "loss": 0.2477, + "step": 1394 + }, + { + "epoch": 0.78, + "learning_rate": 4.474123539232054e-06, + "loss": 0.112, + "step": 1395 + }, + { + "epoch": 0.78, + "learning_rate": 4.462993878686701e-06, + "loss": 0.086, + "step": 1396 + }, + { + "epoch": 0.78, + "learning_rate": 4.451864218141347e-06, + "loss": 0.1116, + "step": 1397 + }, + { + "epoch": 0.78, + "learning_rate": 4.440734557595994e-06, + "loss": 0.1406, + "step": 1398 + }, + { + "epoch": 0.78, + "learning_rate": 4.429604897050641e-06, + "loss": 0.1658, + "step": 1399 + }, + { + "epoch": 0.78, + "learning_rate": 4.418475236505287e-06, + "loss": 0.1651, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.407345575959934e-06, + "loss": 0.0825, + "step": 1401 + }, + { + "epoch": 0.78, + "learning_rate": 4.39621591541458e-06, + "loss": 0.1128, + "step": 1402 + }, + { + "epoch": 0.78, + "learning_rate": 4.385086254869227e-06, + "loss": 0.1959, + "step": 1403 + }, + { + "epoch": 0.78, + "learning_rate": 4.373956594323873e-06, + "loss": 0.1104, + "step": 1404 + }, + { + "epoch": 0.78, + "learning_rate": 4.36282693377852e-06, + "loss": 0.0828, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.351697273233167e-06, + "loss": 0.1396, + "step": 1406 + }, + { + "epoch": 0.78, + "learning_rate": 4.340567612687813e-06, + "loss": 0.1674, + "step": 1407 + }, + { + "epoch": 0.78, + "learning_rate": 4.32943795214246e-06, + "loss": 0.1143, + "step": 1408 + }, + { + "epoch": 0.78, + "learning_rate": 4.318308291597106e-06, + "loss": 0.0851, + "step": 1409 + }, + { + "epoch": 0.78, + "learning_rate": 4.307178631051753e-06, + "loss": 0.0578, + "step": 1410 + }, + { + "epoch": 0.79, + "learning_rate": 4.2960489705064e-06, + "loss": 0.1147, + "step": 1411 + }, + { + "epoch": 0.79, + "learning_rate": 4.284919309961046e-06, + "loss": 0.166, + "step": 1412 + }, + { + "epoch": 0.79, + "learning_rate": 4.273789649415693e-06, + "loss": 0.1128, + "step": 1413 + }, + { + "epoch": 0.79, + "learning_rate": 4.26265998887034e-06, + "loss": 0.1952, + "step": 1414 + }, + { + "epoch": 0.79, + "learning_rate": 4.251530328324987e-06, + "loss": 0.1667, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.240400667779633e-06, + "loss": 0.2227, + "step": 1416 + }, + { + "epoch": 0.79, + "learning_rate": 4.22927100723428e-06, + "loss": 0.0851, + "step": 1417 + }, + { + "epoch": 0.79, + "learning_rate": 4.218141346688927e-06, + "loss": 0.1666, + "step": 1418 + }, + { + "epoch": 0.79, + "learning_rate": 4.207011686143573e-06, + "loss": 0.0875, + "step": 1419 + }, + { + "epoch": 0.79, + "learning_rate": 4.19588202559822e-06, + "loss": 0.0888, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.184752365052866e-06, + "loss": 0.2192, + "step": 1421 + }, + { + "epoch": 0.79, + "learning_rate": 4.173622704507513e-06, + "loss": 0.1124, + "step": 1422 + }, + { + "epoch": 0.79, + "learning_rate": 4.16249304396216e-06, + "loss": 0.1393, + "step": 1423 + }, + { + "epoch": 0.79, + "learning_rate": 4.151363383416806e-06, + "loss": 0.1405, + "step": 1424 + }, + { + "epoch": 0.79, + "learning_rate": 4.140233722871453e-06, + "loss": 0.1104, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.129104062326099e-06, + "loss": 0.0592, + "step": 1426 + }, + { + "epoch": 0.79, + "learning_rate": 4.117974401780746e-06, + "loss": 0.0843, + "step": 1427 + }, + { + "epoch": 0.79, + "learning_rate": 4.106844741235393e-06, + "loss": 0.1906, + "step": 1428 + }, + { + "epoch": 0.8, + "learning_rate": 4.095715080690039e-06, + "loss": 0.1408, + "step": 1429 + }, + { + "epoch": 0.8, + "learning_rate": 4.084585420144686e-06, + "loss": 0.2232, + "step": 1430 + }, + { + "epoch": 0.8, + "learning_rate": 4.073455759599333e-06, + "loss": 0.1372, + "step": 1431 + }, + { + "epoch": 0.8, + "learning_rate": 4.062326099053979e-06, + "loss": 0.2751, + "step": 1432 + }, + { + "epoch": 0.8, + "learning_rate": 4.051196438508626e-06, + "loss": 0.141, + "step": 1433 + }, + { + "epoch": 0.8, + "learning_rate": 4.040066777963273e-06, + "loss": 0.0842, + "step": 1434 + }, + { + "epoch": 0.8, + "learning_rate": 4.02893711741792e-06, + "loss": 0.113, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.017807456872566e-06, + "loss": 0.1908, + "step": 1436 + }, + { + "epoch": 0.8, + "learning_rate": 4.006677796327213e-06, + "loss": 0.1912, + "step": 1437 + }, + { + "epoch": 0.8, + "learning_rate": 3.995548135781859e-06, + "loss": 0.1383, + "step": 1438 + }, + { + "epoch": 0.8, + "learning_rate": 3.984418475236506e-06, + "loss": 0.244, + "step": 1439 + }, + { + "epoch": 0.8, + "learning_rate": 3.973288814691153e-06, + "loss": 0.139, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 3.962159154145799e-06, + "loss": 0.1643, + "step": 1441 + }, + { + "epoch": 0.8, + "learning_rate": 3.951029493600446e-06, + "loss": 0.1386, + "step": 1442 + }, + { + "epoch": 0.8, + "learning_rate": 3.939899833055092e-06, + "loss": 0.1124, + "step": 1443 + }, + { + "epoch": 0.8, + "learning_rate": 3.928770172509739e-06, + "loss": 0.0593, + "step": 1444 + }, + { + "epoch": 0.8, + "learning_rate": 3.917640511964386e-06, + "loss": 0.1111, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 3.906510851419032e-06, + "loss": 0.0831, + "step": 1446 + }, + { + "epoch": 0.81, + "learning_rate": 3.895381190873679e-06, + "loss": 0.1383, + "step": 1447 + }, + { + "epoch": 0.81, + "learning_rate": 3.884251530328325e-06, + "loss": 0.1422, + "step": 1448 + }, + { + "epoch": 0.81, + "learning_rate": 3.873121869782972e-06, + "loss": 0.0868, + "step": 1449 + }, + { + "epoch": 0.81, + "learning_rate": 3.861992209237619e-06, + "loss": 0.1393, + "step": 1450 + }, + { + "epoch": 0.81, + "learning_rate": 3.8508625486922656e-06, + "loss": 0.1115, + "step": 1451 + }, + { + "epoch": 0.81, + "learning_rate": 3.839732888146912e-06, + "loss": 0.2712, + "step": 1452 + }, + { + "epoch": 0.81, + "learning_rate": 3.828603227601559e-06, + "loss": 0.058, + "step": 1453 + }, + { + "epoch": 0.81, + "learning_rate": 3.8174735670562055e-06, + "loss": 0.1666, + "step": 1454 + }, + { + "epoch": 0.81, + "learning_rate": 3.8063439065108516e-06, + "loss": 0.1125, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 3.7952142459654986e-06, + "loss": 0.222, + "step": 1456 + }, + { + "epoch": 0.81, + "learning_rate": 3.784084585420145e-06, + "loss": 0.1146, + "step": 1457 + }, + { + "epoch": 0.81, + "learning_rate": 3.7729549248747916e-06, + "loss": 0.166, + "step": 1458 + }, + { + "epoch": 0.81, + "learning_rate": 3.7618252643294385e-06, + "loss": 0.1123, + "step": 1459 + }, + { + "epoch": 0.81, + "learning_rate": 3.7506956037840846e-06, + "loss": 0.1123, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 3.7395659432387315e-06, + "loss": 0.0585, + "step": 1461 + }, + { + "epoch": 0.81, + "learning_rate": 3.7284362826933785e-06, + "loss": 0.0838, + "step": 1462 + }, + { + "epoch": 0.81, + "learning_rate": 3.7173066221480246e-06, + "loss": 0.0315, + "step": 1463 + }, + { + "epoch": 0.81, + "learning_rate": 3.7061769616026715e-06, + "loss": 0.0858, + "step": 1464 + }, + { + "epoch": 0.82, + "learning_rate": 3.695047301057318e-06, + "loss": 0.0588, + "step": 1465 + }, + { + "epoch": 0.82, + "learning_rate": 3.683917640511965e-06, + "loss": 0.114, + "step": 1466 + }, + { + "epoch": 0.82, + "learning_rate": 3.672787979966611e-06, + "loss": 0.1384, + "step": 1467 + }, + { + "epoch": 0.82, + "learning_rate": 3.661658319421258e-06, + "loss": 0.1396, + "step": 1468 + }, + { + "epoch": 0.82, + "learning_rate": 3.650528658875905e-06, + "loss": 0.1414, + "step": 1469 + }, + { + "epoch": 0.82, + "learning_rate": 3.639398998330551e-06, + "loss": 0.0844, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 3.628269337785198e-06, + "loss": 0.056, + "step": 1471 + }, + { + "epoch": 0.82, + "learning_rate": 3.6171396772398445e-06, + "loss": 0.1657, + "step": 1472 + }, + { + "epoch": 0.82, + "learning_rate": 3.606010016694491e-06, + "loss": 0.0836, + "step": 1473 + }, + { + "epoch": 0.82, + "learning_rate": 3.594880356149138e-06, + "loss": 0.0815, + "step": 1474 + }, + { + "epoch": 0.82, + "learning_rate": 3.5837506956037844e-06, + "loss": 0.1947, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 3.5726210350584314e-06, + "loss": 0.1095, + "step": 1476 + }, + { + "epoch": 0.82, + "learning_rate": 3.5614913745130775e-06, + "loss": 0.1648, + "step": 1477 + }, + { + "epoch": 0.82, + "learning_rate": 3.5503617139677244e-06, + "loss": 0.2219, + "step": 1478 + }, + { + "epoch": 0.82, + "learning_rate": 3.5392320534223705e-06, + "loss": 0.1403, + "step": 1479 + }, + { + "epoch": 0.82, + "learning_rate": 3.5281023928770174e-06, + "loss": 0.0826, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 3.5169727323316644e-06, + "loss": 0.1117, + "step": 1481 + }, + { + "epoch": 0.82, + "learning_rate": 3.505843071786311e-06, + "loss": 0.196, + "step": 1482 + }, + { + "epoch": 0.83, + "learning_rate": 3.4947134112409574e-06, + "loss": 0.1632, + "step": 1483 + }, + { + "epoch": 0.83, + "learning_rate": 3.483583750695604e-06, + "loss": 0.1935, + "step": 1484 + }, + { + "epoch": 0.83, + "learning_rate": 3.472454090150251e-06, + "loss": 0.2262, + "step": 1485 + }, + { + "epoch": 0.83, + "learning_rate": 3.4613244296048974e-06, + "loss": 0.081, + "step": 1486 + }, + { + "epoch": 0.83, + "learning_rate": 3.450194769059544e-06, + "loss": 0.1111, + "step": 1487 + }, + { + "epoch": 0.83, + "learning_rate": 3.439065108514191e-06, + "loss": 0.1099, + "step": 1488 + }, + { + "epoch": 0.83, + "learning_rate": 3.427935447968837e-06, + "loss": 0.0856, + "step": 1489 + }, + { + "epoch": 0.83, + "learning_rate": 3.416805787423484e-06, + "loss": 0.0845, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 3.4056761268781308e-06, + "loss": 0.1113, + "step": 1491 + }, + { + "epoch": 0.83, + "learning_rate": 3.394546466332777e-06, + "loss": 0.1405, + "step": 1492 + }, + { + "epoch": 0.83, + "learning_rate": 3.383416805787424e-06, + "loss": 0.1125, + "step": 1493 + }, + { + "epoch": 0.83, + "learning_rate": 3.3722871452420703e-06, + "loss": 0.2241, + "step": 1494 + }, + { + "epoch": 0.83, + "learning_rate": 3.3611574846967173e-06, + "loss": 0.1729, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 3.3500278241513633e-06, + "loss": 0.1678, + "step": 1496 + }, + { + "epoch": 0.83, + "learning_rate": 3.3388981636060103e-06, + "loss": 0.1119, + "step": 1497 + }, + { + "epoch": 0.83, + "learning_rate": 3.3277685030606572e-06, + "loss": 0.1953, + "step": 1498 + }, + { + "epoch": 0.83, + "learning_rate": 3.3166388425153033e-06, + "loss": 0.1912, + "step": 1499 + }, + { + "epoch": 0.83, + "learning_rate": 3.3055091819699502e-06, + "loss": 0.1125, + "step": 1500 + }, + { + "epoch": 0.84, + "learning_rate": 3.2943795214245968e-06, + "loss": 0.2174, + "step": 1501 + }, + { + "epoch": 0.84, + "learning_rate": 3.2832498608792433e-06, + "loss": 0.0863, + "step": 1502 + }, + { + "epoch": 0.84, + "learning_rate": 3.27212020033389e-06, + "loss": 0.1671, + "step": 1503 + }, + { + "epoch": 0.84, + "learning_rate": 3.2609905397885367e-06, + "loss": 0.2997, + "step": 1504 + }, + { + "epoch": 0.84, + "learning_rate": 3.2498608792431837e-06, + "loss": 0.2228, + "step": 1505 + }, + { + "epoch": 0.84, + "learning_rate": 3.2387312186978297e-06, + "loss": 0.2538, + "step": 1506 + }, + { + "epoch": 0.84, + "learning_rate": 3.2276015581524767e-06, + "loss": 0.0588, + "step": 1507 + }, + { + "epoch": 0.84, + "learning_rate": 3.2164718976071228e-06, + "loss": 0.0849, + "step": 1508 + }, + { + "epoch": 0.84, + "learning_rate": 3.2053422370617697e-06, + "loss": 0.0861, + "step": 1509 + }, + { + "epoch": 0.84, + "learning_rate": 3.1942125765164167e-06, + "loss": 0.1641, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 3.183082915971063e-06, + "loss": 0.1115, + "step": 1511 + }, + { + "epoch": 0.84, + "learning_rate": 3.1719532554257097e-06, + "loss": 0.1914, + "step": 1512 + }, + { + "epoch": 0.84, + "learning_rate": 3.160823594880356e-06, + "loss": 0.1121, + "step": 1513 + }, + { + "epoch": 0.84, + "learning_rate": 3.149693934335003e-06, + "loss": 0.1159, + "step": 1514 + }, + { + "epoch": 0.84, + "learning_rate": 3.13856427378965e-06, + "loss": 0.1117, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 3.127434613244296e-06, + "loss": 0.1961, + "step": 1516 + }, + { + "epoch": 0.84, + "learning_rate": 3.116304952698943e-06, + "loss": 0.1908, + "step": 1517 + }, + { + "epoch": 0.84, + "learning_rate": 3.105175292153589e-06, + "loss": 0.0866, + "step": 1518 + }, + { + "epoch": 0.85, + "learning_rate": 3.094045631608236e-06, + "loss": 0.1108, + "step": 1519 + }, + { + "epoch": 0.85, + "learning_rate": 3.082915971062883e-06, + "loss": 0.1638, + "step": 1520 + }, + { + "epoch": 0.85, + "learning_rate": 3.0717863105175296e-06, + "loss": 0.0859, + "step": 1521 + }, + { + "epoch": 0.85, + "learning_rate": 3.060656649972176e-06, + "loss": 0.1399, + "step": 1522 + }, + { + "epoch": 0.85, + "learning_rate": 3.0495269894268226e-06, + "loss": 0.1359, + "step": 1523 + }, + { + "epoch": 0.85, + "learning_rate": 3.0383973288814695e-06, + "loss": 0.1103, + "step": 1524 + }, + { + "epoch": 0.85, + "learning_rate": 3.0272676683361156e-06, + "loss": 0.1109, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 3.0161380077907626e-06, + "loss": 0.1142, + "step": 1526 + }, + { + "epoch": 0.85, + "learning_rate": 3.0050083472454095e-06, + "loss": 0.1631, + "step": 1527 + }, + { + "epoch": 0.85, + "learning_rate": 2.9938786867000556e-06, + "loss": 0.1097, + "step": 1528 + }, + { + "epoch": 0.85, + "learning_rate": 2.9827490261547025e-06, + "loss": 0.2175, + "step": 1529 + }, + { + "epoch": 0.85, + "learning_rate": 2.971619365609349e-06, + "loss": 0.1672, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 2.960489705063996e-06, + "loss": 0.058, + "step": 1531 + }, + { + "epoch": 0.85, + "learning_rate": 2.9493600445186425e-06, + "loss": 0.1935, + "step": 1532 + }, + { + "epoch": 0.85, + "learning_rate": 2.938230383973289e-06, + "loss": 0.1118, + "step": 1533 + }, + { + "epoch": 0.85, + "learning_rate": 2.927100723427936e-06, + "loss": 0.0599, + "step": 1534 + }, + { + "epoch": 0.85, + "learning_rate": 2.915971062882582e-06, + "loss": 0.2199, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 2.904841402337229e-06, + "loss": 0.1907, + "step": 1536 + }, + { + "epoch": 0.86, + "learning_rate": 2.893711741791876e-06, + "loss": 0.1941, + "step": 1537 + }, + { + "epoch": 0.86, + "learning_rate": 2.882582081246522e-06, + "loss": 0.1624, + "step": 1538 + }, + { + "epoch": 0.86, + "learning_rate": 2.871452420701169e-06, + "loss": 0.1387, + "step": 1539 + }, + { + "epoch": 0.86, + "learning_rate": 2.8603227601558155e-06, + "loss": 0.1359, + "step": 1540 + }, + { + "epoch": 0.86, + "learning_rate": 2.8491930996104624e-06, + "loss": 0.2166, + "step": 1541 + }, + { + "epoch": 0.86, + "learning_rate": 2.8380634390651085e-06, + "loss": 0.0589, + "step": 1542 + }, + { + "epoch": 0.86, + "learning_rate": 2.8269337785197554e-06, + "loss": 0.0331, + "step": 1543 + }, + { + "epoch": 0.86, + "learning_rate": 2.8158041179744024e-06, + "loss": 0.0866, + "step": 1544 + }, + { + "epoch": 0.86, + "learning_rate": 2.8046744574290484e-06, + "loss": 0.1942, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 2.7935447968836954e-06, + "loss": 0.1377, + "step": 1546 + }, + { + "epoch": 0.86, + "learning_rate": 2.782415136338342e-06, + "loss": 0.113, + "step": 1547 + }, + { + "epoch": 0.86, + "learning_rate": 2.7712854757929884e-06, + "loss": 0.1084, + "step": 1548 + }, + { + "epoch": 0.86, + "learning_rate": 2.7601558152476353e-06, + "loss": 0.085, + "step": 1549 + }, + { + "epoch": 0.86, + "learning_rate": 2.749026154702282e-06, + "loss": 0.1666, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 2.737896494156929e-06, + "loss": 0.0583, + "step": 1551 + }, + { + "epoch": 0.86, + "learning_rate": 2.726766833611575e-06, + "loss": 0.1121, + "step": 1552 + }, + { + "epoch": 0.86, + "learning_rate": 2.715637173066222e-06, + "loss": 0.139, + "step": 1553 + }, + { + "epoch": 0.86, + "learning_rate": 2.704507512520868e-06, + "loss": 0.1868, + "step": 1554 + }, + { + "epoch": 0.87, + "learning_rate": 2.693377851975515e-06, + "loss": 0.2478, + "step": 1555 + }, + { + "epoch": 0.87, + "learning_rate": 2.682248191430162e-06, + "loss": 0.1671, + "step": 1556 + }, + { + "epoch": 0.87, + "learning_rate": 2.6711185308848083e-06, + "loss": 0.164, + "step": 1557 + }, + { + "epoch": 0.87, + "learning_rate": 2.659988870339455e-06, + "loss": 0.1645, + "step": 1558 + }, + { + "epoch": 0.87, + "learning_rate": 2.6488592097941013e-06, + "loss": 0.0856, + "step": 1559 + }, + { + "epoch": 0.87, + "learning_rate": 2.6377295492487483e-06, + "loss": 0.1369, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 2.6265998887033948e-06, + "loss": 0.0864, + "step": 1561 + }, + { + "epoch": 0.87, + "learning_rate": 2.6154702281580413e-06, + "loss": 0.1128, + "step": 1562 + }, + { + "epoch": 0.87, + "learning_rate": 2.6043405676126882e-06, + "loss": 0.1962, + "step": 1563 + }, + { + "epoch": 0.87, + "learning_rate": 2.5932109070673343e-06, + "loss": 0.0826, + "step": 1564 + }, + { + "epoch": 0.87, + "learning_rate": 2.5820812465219813e-06, + "loss": 0.1397, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 2.570951585976628e-06, + "loss": 0.195, + "step": 1566 + }, + { + "epoch": 0.87, + "learning_rate": 2.5598219254312743e-06, + "loss": 0.1927, + "step": 1567 + }, + { + "epoch": 0.87, + "learning_rate": 2.5486922648859212e-06, + "loss": 0.2486, + "step": 1568 + }, + { + "epoch": 0.87, + "learning_rate": 2.5375626043405677e-06, + "loss": 0.14, + "step": 1569 + }, + { + "epoch": 0.87, + "learning_rate": 2.5264329437952147e-06, + "loss": 0.1127, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 2.5153032832498608e-06, + "loss": 0.0853, + "step": 1571 + }, + { + "epoch": 0.87, + "learning_rate": 2.5041736227045077e-06, + "loss": 0.0853, + "step": 1572 + }, + { + "epoch": 0.88, + "learning_rate": 2.4930439621591542e-06, + "loss": 0.1652, + "step": 1573 + }, + { + "epoch": 0.88, + "learning_rate": 2.4819143016138007e-06, + "loss": 0.0864, + "step": 1574 + }, + { + "epoch": 0.88, + "learning_rate": 2.4707846410684477e-06, + "loss": 0.085, + "step": 1575 + }, + { + "epoch": 0.88, + "learning_rate": 2.459654980523094e-06, + "loss": 0.1388, + "step": 1576 + }, + { + "epoch": 0.88, + "learning_rate": 2.4485253199777407e-06, + "loss": 0.1143, + "step": 1577 + }, + { + "epoch": 0.88, + "learning_rate": 2.4373956594323876e-06, + "loss": 0.2209, + "step": 1578 + }, + { + "epoch": 0.88, + "learning_rate": 2.426265998887034e-06, + "loss": 0.1143, + "step": 1579 + }, + { + "epoch": 0.88, + "learning_rate": 2.4151363383416807e-06, + "loss": 0.2156, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 2.4040066777963276e-06, + "loss": 0.1366, + "step": 1581 + }, + { + "epoch": 0.88, + "learning_rate": 2.392877017250974e-06, + "loss": 0.0851, + "step": 1582 + }, + { + "epoch": 0.88, + "learning_rate": 2.3817473567056206e-06, + "loss": 0.1138, + "step": 1583 + }, + { + "epoch": 0.88, + "learning_rate": 2.370617696160267e-06, + "loss": 0.2757, + "step": 1584 + }, + { + "epoch": 0.88, + "learning_rate": 2.3594880356149137e-06, + "loss": 0.1119, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 2.3483583750695606e-06, + "loss": 0.0583, + "step": 1586 + }, + { + "epoch": 0.88, + "learning_rate": 2.337228714524207e-06, + "loss": 0.0876, + "step": 1587 + }, + { + "epoch": 0.88, + "learning_rate": 2.326099053978854e-06, + "loss": 0.1106, + "step": 1588 + }, + { + "epoch": 0.88, + "learning_rate": 2.3149693934335006e-06, + "loss": 0.0858, + "step": 1589 + }, + { + "epoch": 0.88, + "learning_rate": 2.303839732888147e-06, + "loss": 0.1643, + "step": 1590 + }, + { + "epoch": 0.89, + "learning_rate": 2.2927100723427936e-06, + "loss": 0.1648, + "step": 1591 + }, + { + "epoch": 0.89, + "learning_rate": 2.2815804117974405e-06, + "loss": 0.1657, + "step": 1592 + }, + { + "epoch": 0.89, + "learning_rate": 2.270450751252087e-06, + "loss": 0.1956, + "step": 1593 + }, + { + "epoch": 0.89, + "learning_rate": 2.2593210907067336e-06, + "loss": 0.1115, + "step": 1594 + }, + { + "epoch": 0.89, + "learning_rate": 2.24819143016138e-06, + "loss": 0.1643, + "step": 1595 + }, + { + "epoch": 0.89, + "learning_rate": 2.237061769616027e-06, + "loss": 0.167, + "step": 1596 + }, + { + "epoch": 0.89, + "learning_rate": 2.2259321090706735e-06, + "loss": 0.1625, + "step": 1597 + }, + { + "epoch": 0.89, + "learning_rate": 2.2148024485253205e-06, + "loss": 0.1415, + "step": 1598 + }, + { + "epoch": 0.89, + "learning_rate": 2.203672787979967e-06, + "loss": 0.1904, + "step": 1599 + }, + { + "epoch": 0.89, + "learning_rate": 2.1925431274346135e-06, + "loss": 0.1129, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 2.18141346688926e-06, + "loss": 0.0595, + "step": 1601 + }, + { + "epoch": 0.89, + "learning_rate": 2.1702838063439065e-06, + "loss": 0.1394, + "step": 1602 + }, + { + "epoch": 0.89, + "learning_rate": 2.159154145798553e-06, + "loss": 0.1643, + "step": 1603 + }, + { + "epoch": 0.89, + "learning_rate": 2.1480244852532e-06, + "loss": 0.1653, + "step": 1604 + }, + { + "epoch": 0.89, + "learning_rate": 2.1368948247078465e-06, + "loss": 0.0874, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 2.1257651641624934e-06, + "loss": 0.2413, + "step": 1606 + }, + { + "epoch": 0.89, + "learning_rate": 2.11463550361714e-06, + "loss": 0.087, + "step": 1607 + }, + { + "epoch": 0.89, + "learning_rate": 2.1035058430717864e-06, + "loss": 0.1896, + "step": 1608 + }, + { + "epoch": 0.9, + "learning_rate": 2.092376182526433e-06, + "loss": 0.1098, + "step": 1609 + }, + { + "epoch": 0.9, + "learning_rate": 2.08124652198108e-06, + "loss": 0.1912, + "step": 1610 + }, + { + "epoch": 0.9, + "learning_rate": 2.0701168614357264e-06, + "loss": 0.1945, + "step": 1611 + }, + { + "epoch": 0.9, + "learning_rate": 2.058987200890373e-06, + "loss": 0.1389, + "step": 1612 + }, + { + "epoch": 0.9, + "learning_rate": 2.0478575403450194e-06, + "loss": 0.138, + "step": 1613 + }, + { + "epoch": 0.9, + "learning_rate": 2.0367278797996664e-06, + "loss": 0.1412, + "step": 1614 + }, + { + "epoch": 0.9, + "learning_rate": 2.025598219254313e-06, + "loss": 0.0615, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 2.01446855870896e-06, + "loss": 0.1641, + "step": 1616 + }, + { + "epoch": 0.9, + "learning_rate": 2.0033388981636063e-06, + "loss": 0.1911, + "step": 1617 + }, + { + "epoch": 0.9, + "learning_rate": 1.992209237618253e-06, + "loss": 0.3495, + "step": 1618 + }, + { + "epoch": 0.9, + "learning_rate": 1.9810795770728994e-06, + "loss": 0.1651, + "step": 1619 + }, + { + "epoch": 0.9, + "learning_rate": 1.969949916527546e-06, + "loss": 0.0884, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 1.958820255982193e-06, + "loss": 0.1697, + "step": 1621 + }, + { + "epoch": 0.9, + "learning_rate": 1.9476905954368393e-06, + "loss": 0.0879, + "step": 1622 + }, + { + "epoch": 0.9, + "learning_rate": 1.936560934891486e-06, + "loss": 0.2696, + "step": 1623 + }, + { + "epoch": 0.9, + "learning_rate": 1.9254312743461328e-06, + "loss": 0.0844, + "step": 1624 + }, + { + "epoch": 0.9, + "learning_rate": 1.9143016138007793e-06, + "loss": 0.1127, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 1.9031719532554258e-06, + "loss": 0.1655, + "step": 1626 + }, + { + "epoch": 0.91, + "learning_rate": 1.8920422927100725e-06, + "loss": 0.1621, + "step": 1627 + }, + { + "epoch": 0.91, + "learning_rate": 1.8809126321647193e-06, + "loss": 0.145, + "step": 1628 + }, + { + "epoch": 0.91, + "learning_rate": 1.8697829716193658e-06, + "loss": 0.1663, + "step": 1629 + }, + { + "epoch": 0.91, + "learning_rate": 1.8586533110740123e-06, + "loss": 0.0893, + "step": 1630 + }, + { + "epoch": 0.91, + "learning_rate": 1.847523650528659e-06, + "loss": 0.0601, + "step": 1631 + }, + { + "epoch": 0.91, + "learning_rate": 1.8363939899833055e-06, + "loss": 0.087, + "step": 1632 + }, + { + "epoch": 0.91, + "learning_rate": 1.8252643294379525e-06, + "loss": 0.2169, + "step": 1633 + }, + { + "epoch": 0.91, + "learning_rate": 1.814134668892599e-06, + "loss": 0.1937, + "step": 1634 + }, + { + "epoch": 0.91, + "learning_rate": 1.8030050083472455e-06, + "loss": 0.1432, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 1.7918753478018922e-06, + "loss": 0.1903, + "step": 1636 + }, + { + "epoch": 0.91, + "learning_rate": 1.7807456872565387e-06, + "loss": 0.1161, + "step": 1637 + }, + { + "epoch": 0.91, + "learning_rate": 1.7696160267111852e-06, + "loss": 0.2442, + "step": 1638 + }, + { + "epoch": 0.91, + "learning_rate": 1.7584863661658322e-06, + "loss": 0.1361, + "step": 1639 + }, + { + "epoch": 0.91, + "learning_rate": 1.7473567056204787e-06, + "loss": 0.06, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 1.7362270450751254e-06, + "loss": 0.1141, + "step": 1641 + }, + { + "epoch": 0.91, + "learning_rate": 1.725097384529772e-06, + "loss": 0.1653, + "step": 1642 + }, + { + "epoch": 0.91, + "learning_rate": 1.7139677239844184e-06, + "loss": 0.1666, + "step": 1643 + }, + { + "epoch": 0.91, + "learning_rate": 1.7028380634390654e-06, + "loss": 0.1663, + "step": 1644 + }, + { + "epoch": 0.92, + "learning_rate": 1.691708402893712e-06, + "loss": 0.0611, + "step": 1645 + }, + { + "epoch": 0.92, + "learning_rate": 1.6805787423483586e-06, + "loss": 0.2442, + "step": 1646 + }, + { + "epoch": 0.92, + "learning_rate": 1.6694490818030051e-06, + "loss": 0.1931, + "step": 1647 + }, + { + "epoch": 0.92, + "learning_rate": 1.6583194212576517e-06, + "loss": 0.1664, + "step": 1648 + }, + { + "epoch": 0.92, + "learning_rate": 1.6471897607122984e-06, + "loss": 0.1661, + "step": 1649 + }, + { + "epoch": 0.92, + "learning_rate": 1.636060100166945e-06, + "loss": 0.1672, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 1.6249304396215918e-06, + "loss": 0.1665, + "step": 1651 + }, + { + "epoch": 0.92, + "learning_rate": 1.6138007790762383e-06, + "loss": 0.1417, + "step": 1652 + }, + { + "epoch": 0.92, + "learning_rate": 1.6026711185308849e-06, + "loss": 0.1147, + "step": 1653 + }, + { + "epoch": 0.92, + "learning_rate": 1.5915414579855316e-06, + "loss": 0.137, + "step": 1654 + }, + { + "epoch": 0.92, + "learning_rate": 1.580411797440178e-06, + "loss": 0.0887, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 1.569282136894825e-06, + "loss": 0.1939, + "step": 1656 + }, + { + "epoch": 0.92, + "learning_rate": 1.5581524763494715e-06, + "loss": 0.1676, + "step": 1657 + }, + { + "epoch": 0.92, + "learning_rate": 1.547022815804118e-06, + "loss": 0.0881, + "step": 1658 + }, + { + "epoch": 0.92, + "learning_rate": 1.5358931552587648e-06, + "loss": 0.0627, + "step": 1659 + }, + { + "epoch": 0.92, + "learning_rate": 1.5247634947134113e-06, + "loss": 0.1131, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 1.5136338341680578e-06, + "loss": 0.0618, + "step": 1661 + }, + { + "epoch": 0.92, + "learning_rate": 1.5025041736227048e-06, + "loss": 0.1373, + "step": 1662 + }, + { + "epoch": 0.93, + "learning_rate": 1.4913745130773513e-06, + "loss": 0.0879, + "step": 1663 + }, + { + "epoch": 0.93, + "learning_rate": 1.480244852531998e-06, + "loss": 0.1127, + "step": 1664 + }, + { + "epoch": 0.93, + "learning_rate": 1.4691151919866445e-06, + "loss": 0.0348, + "step": 1665 + }, + { + "epoch": 0.93, + "learning_rate": 1.457985531441291e-06, + "loss": 0.2694, + "step": 1666 + }, + { + "epoch": 0.93, + "learning_rate": 1.446855870895938e-06, + "loss": 0.0881, + "step": 1667 + }, + { + "epoch": 0.93, + "learning_rate": 1.4357262103505845e-06, + "loss": 0.1131, + "step": 1668 + }, + { + "epoch": 0.93, + "learning_rate": 1.4245965498052312e-06, + "loss": 0.2455, + "step": 1669 + }, + { + "epoch": 0.93, + "learning_rate": 1.4134668892598777e-06, + "loss": 0.0872, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 1.4023372287145242e-06, + "loss": 0.1409, + "step": 1671 + }, + { + "epoch": 0.93, + "learning_rate": 1.391207568169171e-06, + "loss": 0.0605, + "step": 1672 + }, + { + "epoch": 0.93, + "learning_rate": 1.3800779076238177e-06, + "loss": 0.2232, + "step": 1673 + }, + { + "epoch": 0.93, + "learning_rate": 1.3689482470784644e-06, + "loss": 0.1421, + "step": 1674 + }, + { + "epoch": 0.93, + "learning_rate": 1.357818586533111e-06, + "loss": 0.1401, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 1.3466889259877574e-06, + "loss": 0.2136, + "step": 1676 + }, + { + "epoch": 0.93, + "learning_rate": 1.3355592654424042e-06, + "loss": 0.1954, + "step": 1677 + }, + { + "epoch": 0.93, + "learning_rate": 1.3244296048970507e-06, + "loss": 0.1405, + "step": 1678 + }, + { + "epoch": 0.93, + "learning_rate": 1.3132999443516974e-06, + "loss": 0.1406, + "step": 1679 + }, + { + "epoch": 0.93, + "learning_rate": 1.3021702838063441e-06, + "loss": 0.1386, + "step": 1680 + }, + { + "epoch": 0.94, + "learning_rate": 1.2910406232609906e-06, + "loss": 0.1138, + "step": 1681 + }, + { + "epoch": 0.94, + "learning_rate": 1.2799109627156371e-06, + "loss": 0.1675, + "step": 1682 + }, + { + "epoch": 0.94, + "learning_rate": 1.2687813021702839e-06, + "loss": 0.1947, + "step": 1683 + }, + { + "epoch": 0.94, + "learning_rate": 1.2576516416249304e-06, + "loss": 0.085, + "step": 1684 + }, + { + "epoch": 0.94, + "learning_rate": 1.2465219810795771e-06, + "loss": 0.1128, + "step": 1685 + }, + { + "epoch": 0.94, + "learning_rate": 1.2353923205342238e-06, + "loss": 0.0595, + "step": 1686 + }, + { + "epoch": 0.94, + "learning_rate": 1.2242626599888704e-06, + "loss": 0.169, + "step": 1687 + }, + { + "epoch": 0.94, + "learning_rate": 1.213132999443517e-06, + "loss": 0.1398, + "step": 1688 + }, + { + "epoch": 0.94, + "learning_rate": 1.2020033388981638e-06, + "loss": 0.1405, + "step": 1689 + }, + { + "epoch": 0.94, + "learning_rate": 1.1908736783528103e-06, + "loss": 0.1389, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 1.1797440178074568e-06, + "loss": 0.0852, + "step": 1691 + }, + { + "epoch": 0.94, + "learning_rate": 1.1686143572621036e-06, + "loss": 0.0854, + "step": 1692 + }, + { + "epoch": 0.94, + "learning_rate": 1.1574846967167503e-06, + "loss": 0.1383, + "step": 1693 + }, + { + "epoch": 0.94, + "learning_rate": 1.1463550361713968e-06, + "loss": 0.0585, + "step": 1694 + }, + { + "epoch": 0.94, + "learning_rate": 1.1352253756260435e-06, + "loss": 0.1389, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 1.12409571508069e-06, + "loss": 0.1962, + "step": 1696 + }, + { + "epoch": 0.94, + "learning_rate": 1.1129660545353368e-06, + "loss": 0.0598, + "step": 1697 + }, + { + "epoch": 0.94, + "learning_rate": 1.1018363939899835e-06, + "loss": 0.0867, + "step": 1698 + }, + { + "epoch": 0.95, + "learning_rate": 1.09070673344463e-06, + "loss": 0.14, + "step": 1699 + }, + { + "epoch": 0.95, + "learning_rate": 1.0795770728992765e-06, + "loss": 0.1377, + "step": 1700 + }, + { + "epoch": 0.95, + "learning_rate": 1.0684474123539232e-06, + "loss": 0.1392, + "step": 1701 + }, + { + "epoch": 0.95, + "learning_rate": 1.05731775180857e-06, + "loss": 0.0586, + "step": 1702 + }, + { + "epoch": 0.95, + "learning_rate": 1.0461880912632165e-06, + "loss": 0.191, + "step": 1703 + }, + { + "epoch": 0.95, + "learning_rate": 1.0350584307178632e-06, + "loss": 0.1387, + "step": 1704 + }, + { + "epoch": 0.95, + "learning_rate": 1.0239287701725097e-06, + "loss": 0.1665, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 1.0127991096271564e-06, + "loss": 0.1641, + "step": 1706 + }, + { + "epoch": 0.95, + "learning_rate": 1.0016694490818032e-06, + "loss": 0.142, + "step": 1707 + }, + { + "epoch": 0.95, + "learning_rate": 9.905397885364497e-07, + "loss": 0.1116, + "step": 1708 + }, + { + "epoch": 0.95, + "learning_rate": 9.794101279910964e-07, + "loss": 0.1937, + "step": 1709 + }, + { + "epoch": 0.95, + "learning_rate": 9.68280467445743e-07, + "loss": 0.1139, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 9.571508069003896e-07, + "loss": 0.1965, + "step": 1711 + }, + { + "epoch": 0.95, + "learning_rate": 9.460211463550363e-07, + "loss": 0.06, + "step": 1712 + }, + { + "epoch": 0.95, + "learning_rate": 9.348914858096829e-07, + "loss": 0.1106, + "step": 1713 + }, + { + "epoch": 0.95, + "learning_rate": 9.237618252643295e-07, + "loss": 0.0559, + "step": 1714 + }, + { + "epoch": 0.95, + "learning_rate": 9.126321647189762e-07, + "loss": 0.1644, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 9.015025041736227e-07, + "loss": 0.1078, + "step": 1716 + }, + { + "epoch": 0.96, + "learning_rate": 8.903728436282694e-07, + "loss": 0.1149, + "step": 1717 + }, + { + "epoch": 0.96, + "learning_rate": 8.792431830829161e-07, + "loss": 0.1127, + "step": 1718 + }, + { + "epoch": 0.96, + "learning_rate": 8.681135225375627e-07, + "loss": 0.1636, + "step": 1719 + }, + { + "epoch": 0.96, + "learning_rate": 8.569838619922092e-07, + "loss": 0.1366, + "step": 1720 + }, + { + "epoch": 0.96, + "learning_rate": 8.45854201446856e-07, + "loss": 0.2227, + "step": 1721 + }, + { + "epoch": 0.96, + "learning_rate": 8.347245409015026e-07, + "loss": 0.2509, + "step": 1722 + }, + { + "epoch": 0.96, + "learning_rate": 8.235948803561492e-07, + "loss": 0.2482, + "step": 1723 + }, + { + "epoch": 0.96, + "learning_rate": 8.124652198107959e-07, + "loss": 0.085, + "step": 1724 + }, + { + "epoch": 0.96, + "learning_rate": 8.013355592654424e-07, + "loss": 0.1918, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 7.90205898720089e-07, + "loss": 0.0829, + "step": 1726 + }, + { + "epoch": 0.96, + "learning_rate": 7.790762381747358e-07, + "loss": 0.1372, + "step": 1727 + }, + { + "epoch": 0.96, + "learning_rate": 7.679465776293824e-07, + "loss": 0.2195, + "step": 1728 + }, + { + "epoch": 0.96, + "learning_rate": 7.568169170840289e-07, + "loss": 0.112, + "step": 1729 + }, + { + "epoch": 0.96, + "learning_rate": 7.456872565386756e-07, + "loss": 0.1396, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 7.345575959933223e-07, + "loss": 0.0577, + "step": 1731 + }, + { + "epoch": 0.96, + "learning_rate": 7.23427935447969e-07, + "loss": 0.1926, + "step": 1732 + }, + { + "epoch": 0.96, + "learning_rate": 7.122982749026156e-07, + "loss": 0.0591, + "step": 1733 + }, + { + "epoch": 0.96, + "learning_rate": 7.011686143572621e-07, + "loss": 0.1622, + "step": 1734 + }, + { + "epoch": 0.97, + "learning_rate": 6.900389538119088e-07, + "loss": 0.1679, + "step": 1735 + }, + { + "epoch": 0.97, + "learning_rate": 6.789092932665555e-07, + "loss": 0.1105, + "step": 1736 + }, + { + "epoch": 0.97, + "learning_rate": 6.677796327212021e-07, + "loss": 0.245, + "step": 1737 + }, + { + "epoch": 0.97, + "learning_rate": 6.566499721758487e-07, + "loss": 0.0861, + "step": 1738 + }, + { + "epoch": 0.97, + "learning_rate": 6.455203116304953e-07, + "loss": 0.1406, + "step": 1739 + }, + { + "epoch": 0.97, + "learning_rate": 6.343906510851419e-07, + "loss": 0.1927, + "step": 1740 + }, + { + "epoch": 0.97, + "learning_rate": 6.232609905397886e-07, + "loss": 0.1349, + "step": 1741 + }, + { + "epoch": 0.97, + "learning_rate": 6.121313299944352e-07, + "loss": 0.1133, + "step": 1742 + }, + { + "epoch": 0.97, + "learning_rate": 6.010016694490819e-07, + "loss": 0.1371, + "step": 1743 + }, + { + "epoch": 0.97, + "learning_rate": 5.898720089037284e-07, + "loss": 0.1385, + "step": 1744 + }, + { + "epoch": 0.97, + "learning_rate": 5.787423483583751e-07, + "loss": 0.1116, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 5.676126878130218e-07, + "loss": 0.1654, + "step": 1746 + }, + { + "epoch": 0.97, + "learning_rate": 5.564830272676684e-07, + "loss": 0.0852, + "step": 1747 + }, + { + "epoch": 0.97, + "learning_rate": 5.45353366722315e-07, + "loss": 0.1911, + "step": 1748 + }, + { + "epoch": 0.97, + "learning_rate": 5.342237061769616e-07, + "loss": 0.1152, + "step": 1749 + }, + { + "epoch": 0.97, + "learning_rate": 5.230940456316082e-07, + "loss": 0.1126, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 5.119643850862549e-07, + "loss": 0.2206, + "step": 1751 + }, + { + "epoch": 0.97, + "learning_rate": 5.008347245409016e-07, + "loss": 0.2448, + "step": 1752 + }, + { + "epoch": 0.98, + "learning_rate": 4.897050639955482e-07, + "loss": 0.084, + "step": 1753 + }, + { + "epoch": 0.98, + "learning_rate": 4.785754034501948e-07, + "loss": 0.1389, + "step": 1754 + }, + { + "epoch": 0.98, + "learning_rate": 4.6744574290484144e-07, + "loss": 0.1089, + "step": 1755 + }, + { + "epoch": 0.98, + "learning_rate": 4.563160823594881e-07, + "loss": 0.1106, + "step": 1756 + }, + { + "epoch": 0.98, + "learning_rate": 4.451864218141347e-07, + "loss": 0.1119, + "step": 1757 + }, + { + "epoch": 0.98, + "learning_rate": 4.3405676126878136e-07, + "loss": 0.1144, + "step": 1758 + }, + { + "epoch": 0.98, + "learning_rate": 4.22927100723428e-07, + "loss": 0.0595, + "step": 1759 + }, + { + "epoch": 0.98, + "learning_rate": 4.117974401780746e-07, + "loss": 0.1376, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 4.006677796327212e-07, + "loss": 0.1668, + "step": 1761 + }, + { + "epoch": 0.98, + "learning_rate": 3.895381190873679e-07, + "loss": 0.0846, + "step": 1762 + }, + { + "epoch": 0.98, + "learning_rate": 3.7840845854201445e-07, + "loss": 0.1665, + "step": 1763 + }, + { + "epoch": 0.98, + "learning_rate": 3.672787979966611e-07, + "loss": 0.1139, + "step": 1764 + }, + { + "epoch": 0.98, + "learning_rate": 3.561491374513078e-07, + "loss": 0.0576, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.450194769059544e-07, + "loss": 0.1108, + "step": 1766 + }, + { + "epoch": 0.98, + "learning_rate": 3.3388981636060104e-07, + "loss": 0.1691, + "step": 1767 + }, + { + "epoch": 0.98, + "learning_rate": 3.2276015581524766e-07, + "loss": 0.1664, + "step": 1768 + }, + { + "epoch": 0.98, + "learning_rate": 3.116304952698943e-07, + "loss": 0.1643, + "step": 1769 + }, + { + "epoch": 0.98, + "learning_rate": 3.0050083472454095e-07, + "loss": 0.0856, + "step": 1770 + }, + { + "epoch": 0.99, + "learning_rate": 2.8937117417918757e-07, + "loss": 0.0591, + "step": 1771 + }, + { + "epoch": 0.99, + "learning_rate": 2.782415136338342e-07, + "loss": 0.0856, + "step": 1772 + }, + { + "epoch": 0.99, + "learning_rate": 2.671118530884808e-07, + "loss": 0.0579, + "step": 1773 + }, + { + "epoch": 0.99, + "learning_rate": 2.5598219254312743e-07, + "loss": 0.0848, + "step": 1774 + }, + { + "epoch": 0.99, + "learning_rate": 2.448525319977741e-07, + "loss": 0.1138, + "step": 1775 + }, + { + "epoch": 0.99, + "learning_rate": 2.3372287145242072e-07, + "loss": 0.1109, + "step": 1776 + }, + { + "epoch": 0.99, + "learning_rate": 2.2259321090706734e-07, + "loss": 0.0583, + "step": 1777 + }, + { + "epoch": 0.99, + "learning_rate": 2.11463550361714e-07, + "loss": 0.2195, + "step": 1778 + }, + { + "epoch": 0.99, + "learning_rate": 2.003338898163606e-07, + "loss": 0.0852, + "step": 1779 + }, + { + "epoch": 0.99, + "learning_rate": 1.8920422927100723e-07, + "loss": 0.1632, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 1.780745687256539e-07, + "loss": 0.1651, + "step": 1781 + }, + { + "epoch": 0.99, + "learning_rate": 1.6694490818030052e-07, + "loss": 0.0584, + "step": 1782 + }, + { + "epoch": 0.99, + "learning_rate": 1.5581524763494714e-07, + "loss": 0.1654, + "step": 1783 + }, + { + "epoch": 0.99, + "learning_rate": 1.4468558708959379e-07, + "loss": 0.1371, + "step": 1784 + }, + { + "epoch": 0.99, + "learning_rate": 1.335559265442404e-07, + "loss": 0.1111, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 1.2242626599888705e-07, + "loss": 0.1129, + "step": 1786 + }, + { + "epoch": 0.99, + "learning_rate": 1.1129660545353367e-07, + "loss": 0.1911, + "step": 1787 + }, + { + "epoch": 0.99, + "learning_rate": 1.001669449081803e-07, + "loss": 0.1378, + "step": 1788 + }, + { + "epoch": 1.0, + "learning_rate": 8.903728436282695e-08, + "loss": 0.1394, + "step": 1789 + }, + { + "epoch": 1.0, + "learning_rate": 7.790762381747357e-08, + "loss": 0.1657, + "step": 1790 + }, + { + "epoch": 1.0, + "learning_rate": 6.67779632721202e-08, + "loss": 0.1097, + "step": 1791 + }, + { + "epoch": 1.0, + "learning_rate": 5.5648302726766835e-08, + "loss": 0.3042, + "step": 1792 + }, + { + "epoch": 1.0, + "learning_rate": 4.4518642181413475e-08, + "loss": 0.1669, + "step": 1793 + }, + { + "epoch": 1.0, + "learning_rate": 3.33889816360601e-08, + "loss": 0.0582, + "step": 1794 + }, + { + "epoch": 1.0, + "learning_rate": 2.2259321090706737e-08, + "loss": 0.1397, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 1.1129660545353369e-08, + "loss": 0.1124, + "step": 1796 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "loss": 0.4311, + "step": 1797 + }, { "epoch": 1.0, - "step": 899, - "total_flos": 1.512441279525888e+16, - "train_loss": 0.13692307832326586, - "train_runtime": 237.2102, - "train_samples_per_second": 969.317, - "train_steps_per_second": 3.79 + "step": 1797, + "total_flos": 1.887835811853312e+16, + "train_loss": 0.1435431697057082, + "train_runtime": 376.8652, + "train_samples_per_second": 610.117, + "train_steps_per_second": 4.768 } ], - "max_steps": 899, + "max_steps": 1797, "num_train_epochs": 1, - "total_flos": 1.512441279525888e+16, + "total_flos": 1.887835811853312e+16, "trial_name": null, "trial_params": null }