{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.2272680646532144, "global_step": 1340000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 9.997710320774901e-05, "loss": 8.4343, "step": 500 }, { "epoch": 0.0, "learning_rate": 9.995420641549801e-05, "loss": 7.6761, "step": 1000 }, { "epoch": 0.0, "learning_rate": 9.993130962324703e-05, "loss": 7.2953, "step": 1500 }, { "epoch": 0.0, "learning_rate": 9.990841283099604e-05, "loss": 7.1186, "step": 2000 }, { "epoch": 0.0, "learning_rate": 9.988551603874504e-05, "loss": 6.9563, "step": 2500 }, { "epoch": 0.0, "learning_rate": 9.986261924649405e-05, "loss": 6.8496, "step": 3000 }, { "epoch": 0.0, "learning_rate": 9.983972245424305e-05, "loss": 6.7726, "step": 3500 }, { "epoch": 0.0, "learning_rate": 9.981682566199206e-05, "loss": 6.6604, "step": 4000 }, { "epoch": 0.0, "learning_rate": 9.979392886974108e-05, "loss": 6.5838, "step": 4500 }, { "epoch": 0.0, "learning_rate": 9.977103207749008e-05, "loss": 6.5021, "step": 5000 }, { "epoch": 0.01, "learning_rate": 9.974813528523909e-05, "loss": 6.4388, "step": 5500 }, { "epoch": 0.01, "learning_rate": 9.972523849298809e-05, "loss": 6.3715, "step": 6000 }, { "epoch": 0.01, "learning_rate": 9.97023874943216e-05, "loss": 6.3208, "step": 6500 }, { "epoch": 0.01, "learning_rate": 9.967953649565512e-05, "loss": 6.2523, "step": 7000 }, { "epoch": 0.01, "learning_rate": 9.965663970340412e-05, "loss": 6.2069, "step": 7500 }, { "epoch": 0.01, "learning_rate": 9.963374291115313e-05, "loss": 6.1543, "step": 8000 }, { "epoch": 0.01, "learning_rate": 9.961084611890213e-05, "loss": 6.1284, "step": 8500 }, { "epoch": 0.01, "learning_rate": 9.958794932665114e-05, "loss": 6.0781, "step": 9000 }, { "epoch": 0.01, "learning_rate": 9.956509832798466e-05, "loss": 6.0411, "step": 9500 }, { "epoch": 0.01, "learning_rate": 9.954220153573366e-05, "loss": 5.9981, "step": 10000 }, { "epoch": 0.01, "learning_rate": 9.951930474348267e-05, "loss": 5.9761, "step": 10500 }, { "epoch": 0.01, "learning_rate": 9.949640795123167e-05, "loss": 5.9395, "step": 11000 }, { "epoch": 0.01, "learning_rate": 9.947351115898068e-05, "loss": 5.8878, "step": 11500 }, { "epoch": 0.01, "learning_rate": 9.94506143667297e-05, "loss": 5.894, "step": 12000 }, { "epoch": 0.01, "learning_rate": 9.94277175744787e-05, "loss": 5.8519, "step": 12500 }, { "epoch": 0.01, "learning_rate": 9.940486657581219e-05, "loss": 5.8273, "step": 13000 }, { "epoch": 0.01, "learning_rate": 9.93819697835612e-05, "loss": 5.8008, "step": 13500 }, { "epoch": 0.01, "learning_rate": 9.93590729913102e-05, "loss": 5.7704, "step": 14000 }, { "epoch": 0.01, "learning_rate": 9.933617619905922e-05, "loss": 5.7637, "step": 14500 }, { "epoch": 0.01, "learning_rate": 9.931327940680823e-05, "loss": 5.7453, "step": 15000 }, { "epoch": 0.01, "learning_rate": 9.929038261455723e-05, "loss": 5.7167, "step": 15500 }, { "epoch": 0.01, "learning_rate": 9.926748582230624e-05, "loss": 5.6919, "step": 16000 }, { "epoch": 0.02, "learning_rate": 9.924458903005524e-05, "loss": 5.6712, "step": 16500 }, { "epoch": 0.02, "learning_rate": 9.922173803138876e-05, "loss": 5.6498, "step": 17000 }, { "epoch": 0.02, "learning_rate": 9.919884123913777e-05, "loss": 5.62, "step": 17500 }, { "epoch": 0.02, "learning_rate": 9.917599024047127e-05, "loss": 5.61, "step": 18000 }, { "epoch": 0.02, "learning_rate": 9.915309344822028e-05, "loss": 5.5865, "step": 18500 }, { "epoch": 0.02, "learning_rate": 9.913019665596928e-05, "loss": 5.5761, "step": 19000 }, { "epoch": 0.02, "learning_rate": 9.91072998637183e-05, "loss": 5.5614, "step": 19500 }, { "epoch": 0.02, "learning_rate": 9.90844488650518e-05, "loss": 5.5512, "step": 20000 }, { "epoch": 0.02, "learning_rate": 9.906155207280081e-05, "loss": 5.5278, "step": 20500 }, { "epoch": 0.02, "learning_rate": 9.903865528054982e-05, "loss": 5.5045, "step": 21000 }, { "epoch": 0.02, "learning_rate": 9.901575848829882e-05, "loss": 5.4876, "step": 21500 }, { "epoch": 0.02, "learning_rate": 9.899286169604784e-05, "loss": 5.4824, "step": 22000 }, { "epoch": 0.02, "learning_rate": 9.896996490379684e-05, "loss": 5.4701, "step": 22500 }, { "epoch": 0.02, "learning_rate": 9.894706811154585e-05, "loss": 5.4463, "step": 23000 }, { "epoch": 0.02, "learning_rate": 9.892417131929485e-05, "loss": 5.4411, "step": 23500 }, { "epoch": 0.02, "learning_rate": 9.890127452704386e-05, "loss": 5.4202, "step": 24000 }, { "epoch": 0.02, "learning_rate": 9.887846932196188e-05, "loss": 5.4271, "step": 24500 }, { "epoch": 0.02, "learning_rate": 9.885557252971088e-05, "loss": 5.4054, "step": 25000 }, { "epoch": 0.02, "learning_rate": 9.883267573745989e-05, "loss": 5.3969, "step": 25500 }, { "epoch": 0.02, "learning_rate": 9.88097789452089e-05, "loss": 5.3873, "step": 26000 }, { "epoch": 0.02, "learning_rate": 9.87868821529579e-05, "loss": 5.378, "step": 26500 }, { "epoch": 0.02, "learning_rate": 9.876403115429142e-05, "loss": 5.3648, "step": 27000 }, { "epoch": 0.03, "learning_rate": 9.874113436204042e-05, "loss": 5.3609, "step": 27500 }, { "epoch": 0.03, "learning_rate": 9.871823756978943e-05, "loss": 5.3499, "step": 28000 }, { "epoch": 0.03, "learning_rate": 9.869534077753843e-05, "loss": 5.3344, "step": 28500 }, { "epoch": 0.03, "learning_rate": 9.867244398528744e-05, "loss": 5.3369, "step": 29000 }, { "epoch": 0.03, "learning_rate": 9.864954719303646e-05, "loss": 5.3206, "step": 29500 }, { "epoch": 0.03, "learning_rate": 9.862665040078546e-05, "loss": 5.3247, "step": 30000 }, { "epoch": 0.03, "learning_rate": 9.860375360853447e-05, "loss": 5.3145, "step": 30500 }, { "epoch": 0.03, "learning_rate": 9.858085681628347e-05, "loss": 5.298, "step": 31000 }, { "epoch": 0.03, "learning_rate": 9.855805161120148e-05, "loss": 5.2957, "step": 31500 }, { "epoch": 0.03, "learning_rate": 9.85351548189505e-05, "loss": 5.2907, "step": 32000 }, { "epoch": 0.03, "learning_rate": 9.85122580266995e-05, "loss": 5.2881, "step": 32500 }, { "epoch": 0.03, "learning_rate": 9.848936123444851e-05, "loss": 5.2615, "step": 33000 }, { "epoch": 0.03, "learning_rate": 9.846651023578201e-05, "loss": 5.2705, "step": 33500 }, { "epoch": 0.03, "learning_rate": 9.844361344353102e-05, "loss": 5.2556, "step": 34000 }, { "epoch": 0.03, "learning_rate": 9.842071665128004e-05, "loss": 5.2577, "step": 34500 }, { "epoch": 0.03, "learning_rate": 9.839781985902904e-05, "loss": 5.2384, "step": 35000 }, { "epoch": 0.03, "learning_rate": 9.837492306677805e-05, "loss": 5.2469, "step": 35500 }, { "epoch": 0.03, "learning_rate": 9.835202627452705e-05, "loss": 5.2298, "step": 36000 }, { "epoch": 0.03, "learning_rate": 9.832912948227606e-05, "loss": 5.2368, "step": 36500 }, { "epoch": 0.03, "learning_rate": 9.830623269002506e-05, "loss": 5.2303, "step": 37000 }, { "epoch": 0.03, "learning_rate": 9.828333589777407e-05, "loss": 5.2134, "step": 37500 }, { "epoch": 0.03, "learning_rate": 9.826048489910757e-05, "loss": 5.222, "step": 38000 }, { "epoch": 0.04, "learning_rate": 9.823767969402559e-05, "loss": 5.1964, "step": 38500 }, { "epoch": 0.04, "learning_rate": 9.82147829017746e-05, "loss": 5.2077, "step": 39000 }, { "epoch": 0.04, "learning_rate": 9.81918861095236e-05, "loss": 5.1936, "step": 39500 }, { "epoch": 0.04, "learning_rate": 9.816898931727261e-05, "loss": 5.2013, "step": 40000 }, { "epoch": 0.04, "learning_rate": 9.814609252502161e-05, "loss": 5.1999, "step": 40500 }, { "epoch": 0.04, "learning_rate": 9.812319573277062e-05, "loss": 5.1802, "step": 41000 }, { "epoch": 0.04, "learning_rate": 9.810029894051964e-05, "loss": 5.1736, "step": 41500 }, { "epoch": 0.04, "learning_rate": 9.807740214826864e-05, "loss": 5.1556, "step": 42000 }, { "epoch": 0.04, "learning_rate": 9.805450535601765e-05, "loss": 5.167, "step": 42500 }, { "epoch": 0.04, "learning_rate": 9.803165435735115e-05, "loss": 5.1708, "step": 43000 }, { "epoch": 0.04, "learning_rate": 9.800875756510016e-05, "loss": 5.1625, "step": 43500 }, { "epoch": 0.04, "learning_rate": 9.798586077284918e-05, "loss": 5.1446, "step": 44000 }, { "epoch": 0.04, "learning_rate": 9.796296398059818e-05, "loss": 5.1512, "step": 44500 }, { "epoch": 0.04, "learning_rate": 9.794006718834719e-05, "loss": 5.1609, "step": 45000 }, { "epoch": 0.04, "learning_rate": 9.791717039609619e-05, "loss": 5.1432, "step": 45500 }, { "epoch": 0.04, "learning_rate": 9.78943193974297e-05, "loss": 5.1449, "step": 46000 }, { "epoch": 0.04, "learning_rate": 9.787142260517871e-05, "loss": 5.1338, "step": 46500 }, { "epoch": 0.04, "learning_rate": 9.784852581292772e-05, "loss": 5.1284, "step": 47000 }, { "epoch": 0.04, "learning_rate": 9.782562902067672e-05, "loss": 5.1237, "step": 47500 }, { "epoch": 0.04, "learning_rate": 9.780273222842573e-05, "loss": 5.1232, "step": 48000 }, { "epoch": 0.04, "learning_rate": 9.777983543617473e-05, "loss": 5.1129, "step": 48500 }, { "epoch": 0.04, "learning_rate": 9.775693864392374e-05, "loss": 5.1148, "step": 49000 }, { "epoch": 0.05, "learning_rate": 9.773408764525726e-05, "loss": 5.1125, "step": 49500 }, { "epoch": 0.05, "learning_rate": 9.771119085300626e-05, "loss": 5.0908, "step": 50000 }, { "epoch": 0.05, "learning_rate": 9.768829406075527e-05, "loss": 5.1003, "step": 50500 }, { "epoch": 0.05, "learning_rate": 9.766539726850427e-05, "loss": 5.1157, "step": 51000 }, { "epoch": 0.05, "learning_rate": 9.764250047625328e-05, "loss": 5.0904, "step": 51500 }, { "epoch": 0.05, "learning_rate": 9.76196494775868e-05, "loss": 5.095, "step": 52000 }, { "epoch": 0.05, "learning_rate": 9.75967526853358e-05, "loss": 5.0809, "step": 52500 }, { "epoch": 0.05, "learning_rate": 9.757385589308481e-05, "loss": 5.0771, "step": 53000 }, { "epoch": 0.05, "learning_rate": 9.755100489441831e-05, "loss": 5.0863, "step": 53500 }, { "epoch": 0.05, "learning_rate": 9.752810810216733e-05, "loss": 5.081, "step": 54000 }, { "epoch": 0.05, "learning_rate": 9.750521130991634e-05, "loss": 5.0804, "step": 54500 }, { "epoch": 0.05, "learning_rate": 9.748231451766534e-05, "loss": 5.0933, "step": 55000 }, { "epoch": 0.05, "learning_rate": 9.745941772541435e-05, "loss": 5.0863, "step": 55500 }, { "epoch": 0.05, "learning_rate": 9.743652093316335e-05, "loss": 5.0636, "step": 56000 }, { "epoch": 0.05, "learning_rate": 9.741362414091236e-05, "loss": 5.0717, "step": 56500 }, { "epoch": 0.05, "learning_rate": 9.739072734866138e-05, "loss": 5.0522, "step": 57000 }, { "epoch": 0.05, "learning_rate": 9.736783055641038e-05, "loss": 5.0556, "step": 57500 }, { "epoch": 0.05, "learning_rate": 9.734497955774389e-05, "loss": 5.0544, "step": 58000 }, { "epoch": 0.05, "learning_rate": 9.732208276549289e-05, "loss": 5.0498, "step": 58500 }, { "epoch": 0.05, "learning_rate": 9.72991859732419e-05, "loss": 5.0535, "step": 59000 }, { "epoch": 0.05, "learning_rate": 9.727628918099092e-05, "loss": 5.0444, "step": 59500 }, { "epoch": 0.05, "learning_rate": 9.725339238873992e-05, "loss": 5.0377, "step": 60000 }, { "epoch": 0.06, "learning_rate": 9.723049559648893e-05, "loss": 5.0399, "step": 60500 }, { "epoch": 0.06, "learning_rate": 9.720759880423793e-05, "loss": 5.0243, "step": 61000 }, { "epoch": 0.06, "learning_rate": 9.718470201198692e-05, "loss": 5.0291, "step": 61500 }, { "epoch": 0.06, "learning_rate": 9.716180521973594e-05, "loss": 5.0342, "step": 62000 }, { "epoch": 0.06, "learning_rate": 9.713895422106945e-05, "loss": 5.035, "step": 62500 }, { "epoch": 0.06, "learning_rate": 9.711610322240295e-05, "loss": 5.0205, "step": 63000 }, { "epoch": 0.06, "learning_rate": 9.709320643015196e-05, "loss": 5.0258, "step": 63500 }, { "epoch": 0.06, "learning_rate": 9.707030963790096e-05, "loss": 5.0201, "step": 64000 }, { "epoch": 0.06, "learning_rate": 9.704741284564998e-05, "loss": 5.0222, "step": 64500 }, { "epoch": 0.06, "learning_rate": 9.702456184698349e-05, "loss": 5.0088, "step": 65000 }, { "epoch": 0.06, "learning_rate": 9.700166505473249e-05, "loss": 5.0124, "step": 65500 }, { "epoch": 0.06, "learning_rate": 9.69787682624815e-05, "loss": 5.0043, "step": 66000 }, { "epoch": 0.06, "learning_rate": 9.69558714702305e-05, "loss": 5.004, "step": 66500 }, { "epoch": 0.06, "learning_rate": 9.693297467797952e-05, "loss": 5.0045, "step": 67000 }, { "epoch": 0.06, "learning_rate": 9.691007788572853e-05, "loss": 4.9946, "step": 67500 }, { "epoch": 0.06, "learning_rate": 9.688718109347753e-05, "loss": 5.0021, "step": 68000 }, { "epoch": 0.06, "learning_rate": 9.686433009481104e-05, "loss": 5.0055, "step": 68500 }, { "epoch": 0.06, "learning_rate": 9.684143330256004e-05, "loss": 4.9827, "step": 69000 }, { "epoch": 0.06, "learning_rate": 9.681853651030906e-05, "loss": 4.9951, "step": 69500 }, { "epoch": 0.06, "learning_rate": 9.679563971805806e-05, "loss": 5.0007, "step": 70000 }, { "epoch": 0.06, "learning_rate": 9.677274292580707e-05, "loss": 4.9938, "step": 70500 }, { "epoch": 0.07, "learning_rate": 9.674984613355607e-05, "loss": 4.9775, "step": 71000 }, { "epoch": 0.07, "learning_rate": 9.672699513488958e-05, "loss": 4.9694, "step": 71500 }, { "epoch": 0.07, "learning_rate": 9.67040983426386e-05, "loss": 4.9858, "step": 72000 }, { "epoch": 0.07, "learning_rate": 9.66812015503876e-05, "loss": 4.9756, "step": 72500 }, { "epoch": 0.07, "learning_rate": 9.665830475813661e-05, "loss": 4.9821, "step": 73000 }, { "epoch": 0.07, "learning_rate": 9.663545375947011e-05, "loss": 4.9886, "step": 73500 }, { "epoch": 0.07, "learning_rate": 9.661260276080363e-05, "loss": 4.9627, "step": 74000 }, { "epoch": 0.07, "learning_rate": 9.658970596855264e-05, "loss": 4.9704, "step": 74500 }, { "epoch": 0.07, "learning_rate": 9.656680917630164e-05, "loss": 4.9523, "step": 75000 }, { "epoch": 0.07, "learning_rate": 9.654391238405065e-05, "loss": 4.9592, "step": 75500 }, { "epoch": 0.07, "learning_rate": 9.652101559179965e-05, "loss": 4.974, "step": 76000 }, { "epoch": 0.07, "learning_rate": 9.649811879954866e-05, "loss": 4.961, "step": 76500 }, { "epoch": 0.07, "learning_rate": 9.647522200729768e-05, "loss": 4.9724, "step": 77000 }, { "epoch": 0.07, "learning_rate": 9.645232521504668e-05, "loss": 4.9627, "step": 77500 }, { "epoch": 0.07, "learning_rate": 9.642942842279569e-05, "loss": 4.9562, "step": 78000 }, { "epoch": 0.07, "learning_rate": 9.640653163054469e-05, "loss": 4.9546, "step": 78500 }, { "epoch": 0.07, "learning_rate": 9.63836806318782e-05, "loss": 4.9543, "step": 79000 }, { "epoch": 0.07, "learning_rate": 9.636078383962722e-05, "loss": 4.9458, "step": 79500 }, { "epoch": 0.07, "learning_rate": 9.633788704737622e-05, "loss": 4.9604, "step": 80000 }, { "epoch": 0.07, "learning_rate": 9.631499025512523e-05, "loss": 4.9354, "step": 80500 }, { "epoch": 0.07, "learning_rate": 9.629209346287423e-05, "loss": 4.9336, "step": 81000 }, { "epoch": 0.07, "learning_rate": 9.626919667062324e-05, "loss": 4.9344, "step": 81500 }, { "epoch": 0.08, "learning_rate": 9.624634567195676e-05, "loss": 4.9516, "step": 82000 }, { "epoch": 0.08, "learning_rate": 9.622344887970576e-05, "loss": 4.9416, "step": 82500 }, { "epoch": 0.08, "learning_rate": 9.620055208745477e-05, "loss": 4.948, "step": 83000 }, { "epoch": 0.08, "learning_rate": 9.617765529520377e-05, "loss": 4.9374, "step": 83500 }, { "epoch": 0.08, "learning_rate": 9.615475850295278e-05, "loss": 4.9447, "step": 84000 }, { "epoch": 0.08, "learning_rate": 9.61319075042863e-05, "loss": 4.9397, "step": 84500 }, { "epoch": 0.08, "learning_rate": 9.61090107120353e-05, "loss": 4.9379, "step": 85000 }, { "epoch": 0.08, "learning_rate": 9.608611391978429e-05, "loss": 4.9344, "step": 85500 }, { "epoch": 0.08, "learning_rate": 9.60632171275333e-05, "loss": 4.9262, "step": 86000 }, { "epoch": 0.08, "learning_rate": 9.604036612886682e-05, "loss": 4.9254, "step": 86500 }, { "epoch": 0.08, "learning_rate": 9.601746933661582e-05, "loss": 4.9279, "step": 87000 }, { "epoch": 0.08, "learning_rate": 9.599457254436483e-05, "loss": 4.9221, "step": 87500 }, { "epoch": 0.08, "learning_rate": 9.597167575211383e-05, "loss": 4.9334, "step": 88000 }, { "epoch": 0.08, "learning_rate": 9.594877895986284e-05, "loss": 4.9259, "step": 88500 }, { "epoch": 0.08, "learning_rate": 9.592588216761184e-05, "loss": 4.9217, "step": 89000 }, { "epoch": 0.08, "learning_rate": 9.590298537536086e-05, "loss": 4.9246, "step": 89500 }, { "epoch": 0.08, "learning_rate": 9.588013437669437e-05, "loss": 4.9131, "step": 90000 }, { "epoch": 0.08, "learning_rate": 9.585723758444337e-05, "loss": 4.9184, "step": 90500 }, { "epoch": 0.08, "learning_rate": 9.583434079219238e-05, "loss": 4.8998, "step": 91000 }, { "epoch": 0.08, "learning_rate": 9.581144399994138e-05, "loss": 4.9117, "step": 91500 }, { "epoch": 0.08, "learning_rate": 9.57885472076904e-05, "loss": 4.9162, "step": 92000 }, { "epoch": 0.08, "learning_rate": 9.57656504154394e-05, "loss": 4.901, "step": 92500 }, { "epoch": 0.09, "learning_rate": 9.574275362318841e-05, "loss": 4.9029, "step": 93000 }, { "epoch": 0.09, "learning_rate": 9.571985683093741e-05, "loss": 4.9123, "step": 93500 }, { "epoch": 0.09, "learning_rate": 9.569696003868642e-05, "loss": 4.9024, "step": 94000 }, { "epoch": 0.09, "learning_rate": 9.567406324643542e-05, "loss": 4.8946, "step": 94500 }, { "epoch": 0.09, "learning_rate": 9.565121224776894e-05, "loss": 4.8973, "step": 95000 }, { "epoch": 0.09, "learning_rate": 9.562836124910245e-05, "loss": 4.9087, "step": 95500 }, { "epoch": 0.09, "learning_rate": 9.560546445685145e-05, "loss": 4.9024, "step": 96000 }, { "epoch": 0.09, "learning_rate": 9.558256766460046e-05, "loss": 4.906, "step": 96500 }, { "epoch": 0.09, "learning_rate": 9.555967087234946e-05, "loss": 4.9054, "step": 97000 }, { "epoch": 0.09, "learning_rate": 9.553677408009848e-05, "loss": 4.9136, "step": 97500 }, { "epoch": 0.09, "learning_rate": 9.551387728784749e-05, "loss": 4.9013, "step": 98000 }, { "epoch": 0.09, "learning_rate": 9.54909804955965e-05, "loss": 4.8808, "step": 98500 }, { "epoch": 0.09, "learning_rate": 9.54680837033455e-05, "loss": 4.8892, "step": 99000 }, { "epoch": 0.09, "learning_rate": 9.54451869110945e-05, "loss": 4.8987, "step": 99500 }, { "epoch": 0.09, "learning_rate": 9.542229011884352e-05, "loss": 4.89, "step": 100000 }, { "epoch": 0.09, "learning_rate": 9.539943912017703e-05, "loss": 4.8891, "step": 100500 }, { "epoch": 0.09, "learning_rate": 9.537654232792603e-05, "loss": 4.8871, "step": 101000 }, { "epoch": 0.09, "learning_rate": 9.535364553567504e-05, "loss": 4.881, "step": 101500 }, { "epoch": 0.09, "learning_rate": 9.533079453700854e-05, "loss": 4.8753, "step": 102000 }, { "epoch": 0.09, "learning_rate": 9.530789774475756e-05, "loss": 4.8809, "step": 102500 }, { "epoch": 0.09, "learning_rate": 9.528500095250657e-05, "loss": 4.8657, "step": 103000 }, { "epoch": 0.09, "learning_rate": 9.526210416025557e-05, "loss": 4.8868, "step": 103500 }, { "epoch": 0.1, "learning_rate": 9.523920736800458e-05, "loss": 4.8808, "step": 104000 }, { "epoch": 0.1, "learning_rate": 9.521635636933808e-05, "loss": 4.8774, "step": 104500 }, { "epoch": 0.1, "learning_rate": 9.51934595770871e-05, "loss": 4.8726, "step": 105000 }, { "epoch": 0.1, "learning_rate": 9.51705627848361e-05, "loss": 4.863, "step": 105500 }, { "epoch": 0.1, "learning_rate": 9.514766599258511e-05, "loss": 4.871, "step": 106000 }, { "epoch": 0.1, "learning_rate": 9.512476920033412e-05, "loss": 4.866, "step": 106500 }, { "epoch": 0.1, "learning_rate": 9.510187240808312e-05, "loss": 4.8688, "step": 107000 }, { "epoch": 0.1, "learning_rate": 9.507902140941664e-05, "loss": 4.8668, "step": 107500 }, { "epoch": 0.1, "learning_rate": 9.505612461716565e-05, "loss": 4.8611, "step": 108000 }, { "epoch": 0.1, "learning_rate": 9.503322782491465e-05, "loss": 4.8612, "step": 108500 }, { "epoch": 0.1, "learning_rate": 9.501033103266366e-05, "loss": 4.8643, "step": 109000 }, { "epoch": 0.1, "learning_rate": 9.498743424041266e-05, "loss": 4.8752, "step": 109500 }, { "epoch": 0.1, "learning_rate": 9.496453744816167e-05, "loss": 4.8657, "step": 110000 }, { "epoch": 0.1, "learning_rate": 9.494164065591067e-05, "loss": 4.8758, "step": 110500 }, { "epoch": 0.1, "learning_rate": 9.491874386365968e-05, "loss": 4.8615, "step": 111000 }, { "epoch": 0.1, "learning_rate": 9.489589286499318e-05, "loss": 4.864, "step": 111500 }, { "epoch": 0.1, "learning_rate": 9.487299607274219e-05, "loss": 4.8481, "step": 112000 }, { "epoch": 0.1, "learning_rate": 9.48500992804912e-05, "loss": 4.8423, "step": 112500 }, { "epoch": 0.1, "learning_rate": 9.482720248824021e-05, "loss": 4.8536, "step": 113000 }, { "epoch": 0.1, "learning_rate": 9.480430569598922e-05, "loss": 4.8663, "step": 113500 }, { "epoch": 0.1, "learning_rate": 9.478140890373822e-05, "loss": 4.8439, "step": 114000 }, { "epoch": 0.1, "learning_rate": 9.475851211148723e-05, "loss": 4.8723, "step": 114500 }, { "epoch": 0.11, "learning_rate": 9.473561531923623e-05, "loss": 4.8474, "step": 115000 }, { "epoch": 0.11, "learning_rate": 9.471276432056975e-05, "loss": 4.8478, "step": 115500 }, { "epoch": 0.11, "learning_rate": 9.468986752831875e-05, "loss": 4.8462, "step": 116000 }, { "epoch": 0.11, "learning_rate": 9.466697073606776e-05, "loss": 4.8397, "step": 116500 }, { "epoch": 0.11, "learning_rate": 9.464411973740127e-05, "loss": 4.8486, "step": 117000 }, { "epoch": 0.11, "learning_rate": 9.462126873873478e-05, "loss": 4.8517, "step": 117500 }, { "epoch": 0.11, "learning_rate": 9.459837194648379e-05, "loss": 4.8274, "step": 118000 }, { "epoch": 0.11, "learning_rate": 9.45754751542328e-05, "loss": 4.8447, "step": 118500 }, { "epoch": 0.11, "learning_rate": 9.45525783619818e-05, "loss": 4.8462, "step": 119000 }, { "epoch": 0.11, "learning_rate": 9.452972736331532e-05, "loss": 4.8347, "step": 119500 }, { "epoch": 0.11, "learning_rate": 9.450683057106432e-05, "loss": 4.8413, "step": 120000 }, { "epoch": 0.11, "learning_rate": 9.448393377881333e-05, "loss": 4.8332, "step": 120500 }, { "epoch": 0.11, "learning_rate": 9.446103698656233e-05, "loss": 4.829, "step": 121000 }, { "epoch": 0.11, "learning_rate": 9.443818598789584e-05, "loss": 4.8338, "step": 121500 }, { "epoch": 0.11, "learning_rate": 9.441528919564484e-05, "loss": 4.849, "step": 122000 }, { "epoch": 0.11, "learning_rate": 9.439239240339386e-05, "loss": 4.8397, "step": 122500 }, { "epoch": 0.11, "learning_rate": 9.436949561114287e-05, "loss": 4.8379, "step": 123000 }, { "epoch": 0.11, "learning_rate": 9.434659881889187e-05, "loss": 4.8388, "step": 123500 }, { "epoch": 0.11, "learning_rate": 9.432370202664088e-05, "loss": 4.8316, "step": 124000 }, { "epoch": 0.11, "learning_rate": 9.430080523438988e-05, "loss": 4.8345, "step": 124500 }, { "epoch": 0.11, "learning_rate": 9.42779084421389e-05, "loss": 4.8384, "step": 125000 }, { "epoch": 0.11, "learning_rate": 9.425501164988791e-05, "loss": 4.8245, "step": 125500 }, { "epoch": 0.12, "learning_rate": 9.423211485763691e-05, "loss": 4.8235, "step": 126000 }, { "epoch": 0.12, "learning_rate": 9.420921806538592e-05, "loss": 4.8223, "step": 126500 }, { "epoch": 0.12, "learning_rate": 9.418632127313492e-05, "loss": 4.8159, "step": 127000 }, { "epoch": 0.12, "learning_rate": 9.416342448088393e-05, "loss": 4.8264, "step": 127500 }, { "epoch": 0.12, "learning_rate": 9.414066506938645e-05, "loss": 4.819, "step": 128000 }, { "epoch": 0.12, "learning_rate": 9.411776827713545e-05, "loss": 4.8198, "step": 128500 }, { "epoch": 0.12, "learning_rate": 9.409487148488446e-05, "loss": 4.8202, "step": 129000 }, { "epoch": 0.12, "learning_rate": 9.407197469263346e-05, "loss": 4.8242, "step": 129500 }, { "epoch": 0.12, "learning_rate": 9.404907790038248e-05, "loss": 4.8091, "step": 130000 }, { "epoch": 0.12, "learning_rate": 9.402618110813149e-05, "loss": 4.8118, "step": 130500 }, { "epoch": 0.12, "learning_rate": 9.400328431588049e-05, "loss": 4.8133, "step": 131000 }, { "epoch": 0.12, "learning_rate": 9.39803875236295e-05, "loss": 4.8093, "step": 131500 }, { "epoch": 0.12, "learning_rate": 9.39574907313785e-05, "loss": 4.8075, "step": 132000 }, { "epoch": 0.12, "learning_rate": 9.39345939391275e-05, "loss": 4.8173, "step": 132500 }, { "epoch": 0.12, "learning_rate": 9.391169714687653e-05, "loss": 4.8075, "step": 133000 }, { "epoch": 0.12, "learning_rate": 9.388880035462553e-05, "loss": 4.8097, "step": 133500 }, { "epoch": 0.12, "learning_rate": 9.386590356237454e-05, "loss": 4.8085, "step": 134000 }, { "epoch": 0.12, "learning_rate": 9.384300677012354e-05, "loss": 4.819, "step": 134500 }, { "epoch": 0.12, "learning_rate": 9.382010997787255e-05, "loss": 4.8082, "step": 135000 }, { "epoch": 0.12, "learning_rate": 9.379721318562155e-05, "loss": 4.8021, "step": 135500 }, { "epoch": 0.12, "learning_rate": 9.377431639337056e-05, "loss": 4.8132, "step": 136000 }, { "epoch": 0.13, "learning_rate": 9.375146539470406e-05, "loss": 4.8171, "step": 136500 }, { "epoch": 0.13, "learning_rate": 9.372861439603757e-05, "loss": 4.8076, "step": 137000 }, { "epoch": 0.13, "learning_rate": 9.370571760378659e-05, "loss": 4.8095, "step": 137500 }, { "epoch": 0.13, "learning_rate": 9.368286660512009e-05, "loss": 4.8058, "step": 138000 }, { "epoch": 0.13, "learning_rate": 9.36599698128691e-05, "loss": 4.7895, "step": 138500 }, { "epoch": 0.13, "learning_rate": 9.36370730206181e-05, "loss": 4.8062, "step": 139000 }, { "epoch": 0.13, "learning_rate": 9.36141762283671e-05, "loss": 4.796, "step": 139500 }, { "epoch": 0.13, "learning_rate": 9.359127943611612e-05, "loss": 4.798, "step": 140000 }, { "epoch": 0.13, "learning_rate": 9.356838264386513e-05, "loss": 4.7977, "step": 140500 }, { "epoch": 0.13, "learning_rate": 9.354553164519864e-05, "loss": 4.799, "step": 141000 }, { "epoch": 0.13, "learning_rate": 9.352263485294764e-05, "loss": 4.8073, "step": 141500 }, { "epoch": 0.13, "learning_rate": 9.349973806069665e-05, "loss": 4.7919, "step": 142000 }, { "epoch": 0.13, "learning_rate": 9.347684126844566e-05, "loss": 4.7796, "step": 142500 }, { "epoch": 0.13, "learning_rate": 9.345394447619467e-05, "loss": 4.7973, "step": 143000 }, { "epoch": 0.13, "learning_rate": 9.343104768394367e-05, "loss": 4.7906, "step": 143500 }, { "epoch": 0.13, "learning_rate": 9.340815089169268e-05, "loss": 4.8024, "step": 144000 }, { "epoch": 0.13, "learning_rate": 9.338525409944168e-05, "loss": 4.7982, "step": 144500 }, { "epoch": 0.13, "learning_rate": 9.336235730719069e-05, "loss": 4.7862, "step": 145000 }, { "epoch": 0.13, "learning_rate": 9.333946051493971e-05, "loss": 4.7838, "step": 145500 }, { "epoch": 0.13, "learning_rate": 9.331656372268871e-05, "loss": 4.7872, "step": 146000 }, { "epoch": 0.13, "learning_rate": 9.329371272402222e-05, "loss": 4.7776, "step": 146500 }, { "epoch": 0.13, "learning_rate": 9.327081593177122e-05, "loss": 4.7827, "step": 147000 }, { "epoch": 0.14, "learning_rate": 9.324791913952023e-05, "loss": 4.7906, "step": 147500 }, { "epoch": 0.14, "learning_rate": 9.322502234726925e-05, "loss": 4.7795, "step": 148000 }, { "epoch": 0.14, "learning_rate": 9.320212555501825e-05, "loss": 4.7873, "step": 148500 }, { "epoch": 0.14, "learning_rate": 9.317932034993626e-05, "loss": 4.786, "step": 149000 }, { "epoch": 0.14, "learning_rate": 9.315642355768526e-05, "loss": 4.7833, "step": 149500 }, { "epoch": 0.14, "learning_rate": 9.313352676543428e-05, "loss": 4.7912, "step": 150000 }, { "epoch": 0.14, "learning_rate": 9.311062997318329e-05, "loss": 4.7711, "step": 150500 }, { "epoch": 0.14, "learning_rate": 9.308773318093229e-05, "loss": 4.775, "step": 151000 }, { "epoch": 0.14, "learning_rate": 9.30648363886813e-05, "loss": 4.7816, "step": 151500 }, { "epoch": 0.14, "learning_rate": 9.30420311835993e-05, "loss": 4.769, "step": 152000 }, { "epoch": 0.14, "learning_rate": 9.301913439134832e-05, "loss": 4.7854, "step": 152500 }, { "epoch": 0.14, "learning_rate": 9.299623759909733e-05, "loss": 4.7933, "step": 153000 }, { "epoch": 0.14, "learning_rate": 9.297334080684633e-05, "loss": 4.7785, "step": 153500 }, { "epoch": 0.14, "learning_rate": 9.295044401459534e-05, "loss": 4.765, "step": 154000 }, { "epoch": 0.14, "learning_rate": 9.292754722234434e-05, "loss": 4.7787, "step": 154500 }, { "epoch": 0.14, "learning_rate": 9.290465043009335e-05, "loss": 4.7839, "step": 155000 }, { "epoch": 0.14, "learning_rate": 9.288175363784237e-05, "loss": 4.7812, "step": 155500 }, { "epoch": 0.14, "learning_rate": 9.285885684559137e-05, "loss": 4.7723, "step": 156000 }, { "epoch": 0.14, "learning_rate": 9.283596005334038e-05, "loss": 4.787, "step": 156500 }, { "epoch": 0.14, "learning_rate": 9.281306326108938e-05, "loss": 4.7707, "step": 157000 }, { "epoch": 0.14, "learning_rate": 9.279016646883839e-05, "loss": 4.7779, "step": 157500 }, { "epoch": 0.14, "learning_rate": 9.27672696765874e-05, "loss": 4.7703, "step": 158000 }, { "epoch": 0.15, "learning_rate": 9.274441867792091e-05, "loss": 4.7606, "step": 158500 }, { "epoch": 0.15, "learning_rate": 9.27215218856699e-05, "loss": 4.7575, "step": 159000 }, { "epoch": 0.15, "learning_rate": 9.269862509341891e-05, "loss": 4.7708, "step": 159500 }, { "epoch": 0.15, "learning_rate": 9.267572830116793e-05, "loss": 4.7602, "step": 160000 }, { "epoch": 0.15, "learning_rate": 9.265287730250143e-05, "loss": 4.7595, "step": 160500 }, { "epoch": 0.15, "learning_rate": 9.262998051025044e-05, "loss": 4.7712, "step": 161000 }, { "epoch": 0.15, "learning_rate": 9.260708371799944e-05, "loss": 4.7699, "step": 161500 }, { "epoch": 0.15, "learning_rate": 9.258418692574845e-05, "loss": 4.7676, "step": 162000 }, { "epoch": 0.15, "learning_rate": 9.256133592708197e-05, "loss": 4.7723, "step": 162500 }, { "epoch": 0.15, "learning_rate": 9.253843913483097e-05, "loss": 4.7604, "step": 163000 }, { "epoch": 0.15, "learning_rate": 9.251558813616448e-05, "loss": 4.7622, "step": 163500 }, { "epoch": 0.15, "learning_rate": 9.249269134391348e-05, "loss": 4.774, "step": 164000 }, { "epoch": 0.15, "learning_rate": 9.246979455166249e-05, "loss": 4.7561, "step": 164500 }, { "epoch": 0.15, "learning_rate": 9.24468977594115e-05, "loss": 4.7561, "step": 165000 }, { "epoch": 0.15, "learning_rate": 9.242400096716051e-05, "loss": 4.755, "step": 165500 }, { "epoch": 0.15, "learning_rate": 9.240114996849402e-05, "loss": 4.7644, "step": 166000 }, { "epoch": 0.15, "learning_rate": 9.237825317624302e-05, "loss": 4.7578, "step": 166500 }, { "epoch": 0.15, "learning_rate": 9.235535638399203e-05, "loss": 4.7633, "step": 167000 }, { "epoch": 0.15, "learning_rate": 9.233250538532554e-05, "loss": 4.7604, "step": 167500 }, { "epoch": 0.15, "learning_rate": 9.230960859307455e-05, "loss": 4.7465, "step": 168000 }, { "epoch": 0.15, "learning_rate": 9.228671180082355e-05, "loss": 4.7608, "step": 168500 }, { "epoch": 0.15, "learning_rate": 9.226381500857256e-05, "loss": 4.7431, "step": 169000 }, { "epoch": 0.16, "learning_rate": 9.224096400990608e-05, "loss": 4.7622, "step": 169500 }, { "epoch": 0.16, "learning_rate": 9.221806721765508e-05, "loss": 4.7538, "step": 170000 }, { "epoch": 0.16, "learning_rate": 9.219521621898859e-05, "loss": 4.742, "step": 170500 }, { "epoch": 0.16, "learning_rate": 9.21723194267376e-05, "loss": 4.7637, "step": 171000 }, { "epoch": 0.16, "learning_rate": 9.21494226344866e-05, "loss": 4.7577, "step": 171500 }, { "epoch": 0.16, "learning_rate": 9.21265258422356e-05, "loss": 4.7585, "step": 172000 }, { "epoch": 0.16, "learning_rate": 9.210367484356912e-05, "loss": 4.7495, "step": 172500 }, { "epoch": 0.16, "learning_rate": 9.208077805131813e-05, "loss": 4.7448, "step": 173000 }, { "epoch": 0.16, "learning_rate": 9.205788125906713e-05, "loss": 4.7499, "step": 173500 }, { "epoch": 0.16, "learning_rate": 9.203498446681614e-05, "loss": 4.7594, "step": 174000 }, { "epoch": 0.16, "learning_rate": 9.201208767456514e-05, "loss": 4.735, "step": 174500 }, { "epoch": 0.16, "learning_rate": 9.198919088231416e-05, "loss": 4.7598, "step": 175000 }, { "epoch": 0.16, "learning_rate": 9.196629409006317e-05, "loss": 4.7507, "step": 175500 }, { "epoch": 0.16, "learning_rate": 9.194339729781217e-05, "loss": 4.7464, "step": 176000 }, { "epoch": 0.16, "learning_rate": 9.192050050556118e-05, "loss": 4.7455, "step": 176500 }, { "epoch": 0.16, "learning_rate": 9.189760371331018e-05, "loss": 4.7331, "step": 177000 }, { "epoch": 0.16, "learning_rate": 9.18747069210592e-05, "loss": 4.7467, "step": 177500 }, { "epoch": 0.16, "learning_rate": 9.18518101288082e-05, "loss": 4.7463, "step": 178000 }, { "epoch": 0.16, "learning_rate": 9.182891333655721e-05, "loss": 4.7451, "step": 178500 }, { "epoch": 0.16, "learning_rate": 9.180601654430622e-05, "loss": 4.748, "step": 179000 }, { "epoch": 0.16, "learning_rate": 9.178316554563972e-05, "loss": 4.7535, "step": 179500 }, { "epoch": 0.16, "learning_rate": 9.176026875338873e-05, "loss": 4.7316, "step": 180000 }, { "epoch": 0.17, "learning_rate": 9.173737196113775e-05, "loss": 4.7333, "step": 180500 }, { "epoch": 0.17, "learning_rate": 9.171452096247125e-05, "loss": 4.7415, "step": 181000 }, { "epoch": 0.17, "learning_rate": 9.169162417022026e-05, "loss": 4.7351, "step": 181500 }, { "epoch": 0.17, "learning_rate": 9.166872737796926e-05, "loss": 4.7343, "step": 182000 }, { "epoch": 0.17, "learning_rate": 9.164583058571827e-05, "loss": 4.7392, "step": 182500 }, { "epoch": 0.17, "learning_rate": 9.162293379346727e-05, "loss": 4.727, "step": 183000 }, { "epoch": 0.17, "learning_rate": 9.160003700121628e-05, "loss": 4.7467, "step": 183500 }, { "epoch": 0.17, "learning_rate": 9.157714020896528e-05, "loss": 4.7399, "step": 184000 }, { "epoch": 0.17, "learning_rate": 9.155424341671429e-05, "loss": 4.7443, "step": 184500 }, { "epoch": 0.17, "learning_rate": 9.15313466244633e-05, "loss": 4.7317, "step": 185000 }, { "epoch": 0.17, "learning_rate": 9.150844983221231e-05, "loss": 4.7463, "step": 185500 }, { "epoch": 0.17, "learning_rate": 9.148555303996132e-05, "loss": 4.7344, "step": 186000 }, { "epoch": 0.17, "learning_rate": 9.146265624771032e-05, "loss": 4.7399, "step": 186500 }, { "epoch": 0.17, "learning_rate": 9.143975945545933e-05, "loss": 4.7438, "step": 187000 }, { "epoch": 0.17, "learning_rate": 9.141690845679284e-05, "loss": 4.7464, "step": 187500 }, { "epoch": 0.17, "learning_rate": 9.139401166454185e-05, "loss": 4.7286, "step": 188000 }, { "epoch": 0.17, "learning_rate": 9.137116066587536e-05, "loss": 4.7342, "step": 188500 }, { "epoch": 0.17, "learning_rate": 9.134826387362436e-05, "loss": 4.7316, "step": 189000 }, { "epoch": 0.17, "learning_rate": 9.132536708137337e-05, "loss": 4.7212, "step": 189500 }, { "epoch": 0.17, "learning_rate": 9.130247028912237e-05, "loss": 4.7231, "step": 190000 }, { "epoch": 0.17, "learning_rate": 9.127957349687139e-05, "loss": 4.7126, "step": 190500 }, { "epoch": 0.17, "learning_rate": 9.12566767046204e-05, "loss": 4.7251, "step": 191000 }, { "epoch": 0.18, "learning_rate": 9.12337799123694e-05, "loss": 4.7373, "step": 191500 }, { "epoch": 0.18, "learning_rate": 9.12108831201184e-05, "loss": 4.7227, "step": 192000 }, { "epoch": 0.18, "learning_rate": 9.118798632786741e-05, "loss": 4.729, "step": 192500 }, { "epoch": 0.18, "learning_rate": 9.116518112278543e-05, "loss": 4.7204, "step": 193000 }, { "epoch": 0.18, "learning_rate": 9.114228433053443e-05, "loss": 4.7389, "step": 193500 }, { "epoch": 0.18, "learning_rate": 9.111938753828344e-05, "loss": 4.7267, "step": 194000 }, { "epoch": 0.18, "learning_rate": 9.109649074603244e-05, "loss": 4.7145, "step": 194500 }, { "epoch": 0.18, "learning_rate": 9.107359395378145e-05, "loss": 4.7421, "step": 195000 }, { "epoch": 0.18, "learning_rate": 9.105074295511497e-05, "loss": 4.727, "step": 195500 }, { "epoch": 0.18, "learning_rate": 9.102784616286397e-05, "loss": 4.7267, "step": 196000 }, { "epoch": 0.18, "learning_rate": 9.100494937061298e-05, "loss": 4.7183, "step": 196500 }, { "epoch": 0.18, "learning_rate": 9.098205257836198e-05, "loss": 4.7246, "step": 197000 }, { "epoch": 0.18, "learning_rate": 9.095915578611099e-05, "loss": 4.7332, "step": 197500 }, { "epoch": 0.18, "learning_rate": 9.093625899386001e-05, "loss": 4.7065, "step": 198000 }, { "epoch": 0.18, "learning_rate": 9.091336220160901e-05, "loss": 4.7115, "step": 198500 }, { "epoch": 0.18, "learning_rate": 9.089046540935802e-05, "loss": 4.7332, "step": 199000 }, { "epoch": 0.18, "learning_rate": 9.086756861710702e-05, "loss": 4.7145, "step": 199500 }, { "epoch": 0.18, "learning_rate": 9.084471761844053e-05, "loss": 4.7091, "step": 200000 }, { "epoch": 0.18, "learning_rate": 9.082186661977405e-05, "loss": 4.7069, "step": 200500 }, { "epoch": 0.18, "learning_rate": 9.079901562110755e-05, "loss": 4.7214, "step": 201000 }, { "epoch": 0.18, "learning_rate": 9.077611882885656e-05, "loss": 4.7218, "step": 201500 }, { "epoch": 0.19, "learning_rate": 9.075322203660556e-05, "loss": 4.7131, "step": 202000 }, { "epoch": 0.19, "learning_rate": 9.073032524435458e-05, "loss": 4.7241, "step": 202500 }, { "epoch": 0.19, "learning_rate": 9.070742845210359e-05, "loss": 4.7084, "step": 203000 }, { "epoch": 0.19, "learning_rate": 9.068457745343709e-05, "loss": 4.7187, "step": 203500 }, { "epoch": 0.19, "learning_rate": 9.06616806611861e-05, "loss": 4.7114, "step": 204000 }, { "epoch": 0.19, "learning_rate": 9.06387838689351e-05, "loss": 4.7137, "step": 204500 }, { "epoch": 0.19, "learning_rate": 9.061588707668411e-05, "loss": 4.7077, "step": 205000 }, { "epoch": 0.19, "learning_rate": 9.059299028443313e-05, "loss": 4.7174, "step": 205500 }, { "epoch": 0.19, "learning_rate": 9.057009349218213e-05, "loss": 4.7118, "step": 206000 }, { "epoch": 0.19, "learning_rate": 9.054719669993114e-05, "loss": 4.7051, "step": 206500 }, { "epoch": 0.19, "learning_rate": 9.052429990768014e-05, "loss": 4.712, "step": 207000 }, { "epoch": 0.19, "learning_rate": 9.050144890901365e-05, "loss": 4.7138, "step": 207500 }, { "epoch": 0.19, "learning_rate": 9.047855211676265e-05, "loss": 4.7041, "step": 208000 }, { "epoch": 0.19, "learning_rate": 9.045565532451166e-05, "loss": 4.7185, "step": 208500 }, { "epoch": 0.19, "learning_rate": 9.043275853226066e-05, "loss": 4.7227, "step": 209000 }, { "epoch": 0.19, "learning_rate": 9.040986174000967e-05, "loss": 4.7194, "step": 209500 }, { "epoch": 0.19, "learning_rate": 9.038696494775869e-05, "loss": 4.7187, "step": 210000 }, { "epoch": 0.19, "learning_rate": 9.036406815550769e-05, "loss": 4.7064, "step": 210500 }, { "epoch": 0.19, "learning_rate": 9.03411713632567e-05, "loss": 4.7025, "step": 211000 }, { "epoch": 0.19, "learning_rate": 9.03182745710057e-05, "loss": 4.7065, "step": 211500 }, { "epoch": 0.19, "learning_rate": 9.02954235723392e-05, "loss": 4.7148, "step": 212000 }, { "epoch": 0.19, "learning_rate": 9.027252678008822e-05, "loss": 4.7038, "step": 212500 }, { "epoch": 0.2, "learning_rate": 9.024962998783723e-05, "loss": 4.6974, "step": 213000 }, { "epoch": 0.2, "learning_rate": 9.022677898917074e-05, "loss": 4.7006, "step": 213500 }, { "epoch": 0.2, "learning_rate": 9.020388219691974e-05, "loss": 4.7143, "step": 214000 }, { "epoch": 0.2, "learning_rate": 9.018098540466875e-05, "loss": 4.7062, "step": 214500 }, { "epoch": 0.2, "learning_rate": 9.015808861241775e-05, "loss": 4.6983, "step": 215000 }, { "epoch": 0.2, "learning_rate": 9.013519182016677e-05, "loss": 4.7196, "step": 215500 }, { "epoch": 0.2, "learning_rate": 9.011229502791577e-05, "loss": 4.7019, "step": 216000 }, { "epoch": 0.2, "learning_rate": 9.008939823566478e-05, "loss": 4.6808, "step": 216500 }, { "epoch": 0.2, "learning_rate": 9.006650144341378e-05, "loss": 4.7091, "step": 217000 }, { "epoch": 0.2, "learning_rate": 9.00436962383318e-05, "loss": 4.6953, "step": 217500 }, { "epoch": 0.2, "learning_rate": 9.002079944608081e-05, "loss": 4.6965, "step": 218000 }, { "epoch": 0.2, "learning_rate": 8.999790265382981e-05, "loss": 4.7058, "step": 218500 }, { "epoch": 0.2, "learning_rate": 8.997500586157882e-05, "loss": 4.7001, "step": 219000 }, { "epoch": 0.2, "learning_rate": 8.995210906932782e-05, "loss": 4.7157, "step": 219500 }, { "epoch": 0.2, "learning_rate": 8.992921227707683e-05, "loss": 4.6924, "step": 220000 }, { "epoch": 0.2, "learning_rate": 8.990631548482585e-05, "loss": 4.6984, "step": 220500 }, { "epoch": 0.2, "learning_rate": 8.988341869257485e-05, "loss": 4.7011, "step": 221000 }, { "epoch": 0.2, "learning_rate": 8.986056769390836e-05, "loss": 4.6908, "step": 221500 }, { "epoch": 0.2, "learning_rate": 8.983767090165736e-05, "loss": 4.7007, "step": 222000 }, { "epoch": 0.2, "learning_rate": 8.981477410940637e-05, "loss": 4.6884, "step": 222500 }, { "epoch": 0.2, "learning_rate": 8.979187731715539e-05, "loss": 4.6917, "step": 223000 }, { "epoch": 0.2, "learning_rate": 8.976898052490439e-05, "loss": 4.6879, "step": 223500 }, { "epoch": 0.21, "learning_rate": 8.97460837326534e-05, "loss": 4.7008, "step": 224000 }, { "epoch": 0.21, "learning_rate": 8.97231869404024e-05, "loss": 4.6931, "step": 224500 }, { "epoch": 0.21, "learning_rate": 8.970029014815141e-05, "loss": 4.7007, "step": 225000 }, { "epoch": 0.21, "learning_rate": 8.967743914948493e-05, "loss": 4.6898, "step": 225500 }, { "epoch": 0.21, "learning_rate": 8.965454235723393e-05, "loss": 4.6936, "step": 226000 }, { "epoch": 0.21, "learning_rate": 8.963164556498294e-05, "loss": 4.6953, "step": 226500 }, { "epoch": 0.21, "learning_rate": 8.960879456631644e-05, "loss": 4.6686, "step": 227000 }, { "epoch": 0.21, "learning_rate": 8.958589777406545e-05, "loss": 4.6965, "step": 227500 }, { "epoch": 0.21, "learning_rate": 8.956300098181447e-05, "loss": 4.7081, "step": 228000 }, { "epoch": 0.21, "learning_rate": 8.954010418956347e-05, "loss": 4.69, "step": 228500 }, { "epoch": 0.21, "learning_rate": 8.951720739731248e-05, "loss": 4.6863, "step": 229000 }, { "epoch": 0.21, "learning_rate": 8.949431060506148e-05, "loss": 4.69, "step": 229500 }, { "epoch": 0.21, "learning_rate": 8.947141381281049e-05, "loss": 4.69, "step": 230000 }, { "epoch": 0.21, "learning_rate": 8.944851702055949e-05, "loss": 4.6866, "step": 230500 }, { "epoch": 0.21, "learning_rate": 8.942562022830851e-05, "loss": 4.6946, "step": 231000 }, { "epoch": 0.21, "learning_rate": 8.940276922964202e-05, "loss": 4.6808, "step": 231500 }, { "epoch": 0.21, "learning_rate": 8.9379872437391e-05, "loss": 4.6888, "step": 232000 }, { "epoch": 0.21, "learning_rate": 8.935697564514001e-05, "loss": 4.6793, "step": 232500 }, { "epoch": 0.21, "learning_rate": 8.933412464647353e-05, "loss": 4.6792, "step": 233000 }, { "epoch": 0.21, "learning_rate": 8.931122785422254e-05, "loss": 4.6837, "step": 233500 }, { "epoch": 0.21, "learning_rate": 8.928833106197154e-05, "loss": 4.7022, "step": 234000 }, { "epoch": 0.21, "learning_rate": 8.926543426972055e-05, "loss": 4.6798, "step": 234500 }, { "epoch": 0.22, "learning_rate": 8.924258327105407e-05, "loss": 4.6858, "step": 235000 }, { "epoch": 0.22, "learning_rate": 8.921968647880307e-05, "loss": 4.674, "step": 235500 }, { "epoch": 0.22, "learning_rate": 8.919683548013658e-05, "loss": 4.6769, "step": 236000 }, { "epoch": 0.22, "learning_rate": 8.917393868788558e-05, "loss": 4.688, "step": 236500 }, { "epoch": 0.22, "learning_rate": 8.915104189563459e-05, "loss": 4.6883, "step": 237000 }, { "epoch": 0.22, "learning_rate": 8.91281451033836e-05, "loss": 4.6925, "step": 237500 }, { "epoch": 0.22, "learning_rate": 8.910524831113261e-05, "loss": 4.6764, "step": 238000 }, { "epoch": 0.22, "learning_rate": 8.908235151888161e-05, "loss": 4.6804, "step": 238500 }, { "epoch": 0.22, "learning_rate": 8.905945472663062e-05, "loss": 4.6807, "step": 239000 }, { "epoch": 0.22, "learning_rate": 8.903660372796413e-05, "loss": 4.693, "step": 239500 }, { "epoch": 0.22, "learning_rate": 8.901370693571313e-05, "loss": 4.6767, "step": 240000 }, { "epoch": 0.22, "learning_rate": 8.899081014346215e-05, "loss": 4.6675, "step": 240500 }, { "epoch": 0.22, "learning_rate": 8.896791335121115e-05, "loss": 4.6842, "step": 241000 }, { "epoch": 0.22, "learning_rate": 8.894501655896016e-05, "loss": 4.6811, "step": 241500 }, { "epoch": 0.22, "learning_rate": 8.892211976670916e-05, "loss": 4.6867, "step": 242000 }, { "epoch": 0.22, "learning_rate": 8.889922297445817e-05, "loss": 4.6791, "step": 242500 }, { "epoch": 0.22, "learning_rate": 8.887637197579169e-05, "loss": 4.672, "step": 243000 }, { "epoch": 0.22, "learning_rate": 8.88534751835407e-05, "loss": 4.6772, "step": 243500 }, { "epoch": 0.22, "learning_rate": 8.88305783912897e-05, "loss": 4.6779, "step": 244000 }, { "epoch": 0.22, "learning_rate": 8.88076815990387e-05, "loss": 4.6865, "step": 244500 }, { "epoch": 0.22, "learning_rate": 8.878478480678771e-05, "loss": 4.6762, "step": 245000 }, { "epoch": 0.22, "learning_rate": 8.876188801453673e-05, "loss": 4.6774, "step": 245500 }, { "epoch": 0.23, "learning_rate": 8.873899122228573e-05, "loss": 4.6884, "step": 246000 }, { "epoch": 0.23, "learning_rate": 8.871614022361924e-05, "loss": 4.6745, "step": 246500 }, { "epoch": 0.23, "learning_rate": 8.869324343136824e-05, "loss": 4.6841, "step": 247000 }, { "epoch": 0.23, "learning_rate": 8.867034663911725e-05, "loss": 4.6687, "step": 247500 }, { "epoch": 0.23, "learning_rate": 8.864744984686625e-05, "loss": 4.6751, "step": 248000 }, { "epoch": 0.23, "learning_rate": 8.862459884819977e-05, "loss": 4.6713, "step": 248500 }, { "epoch": 0.23, "learning_rate": 8.860170205594878e-05, "loss": 4.6759, "step": 249000 }, { "epoch": 0.23, "learning_rate": 8.857880526369778e-05, "loss": 4.6654, "step": 249500 }, { "epoch": 0.23, "learning_rate": 8.855590847144679e-05, "loss": 4.6717, "step": 250000 }, { "epoch": 0.23, "learning_rate": 8.853301167919579e-05, "loss": 4.6809, "step": 250500 }, { "epoch": 0.23, "learning_rate": 8.851011488694481e-05, "loss": 4.6718, "step": 251000 }, { "epoch": 0.23, "learning_rate": 8.848721809469382e-05, "loss": 4.6737, "step": 251500 }, { "epoch": 0.23, "learning_rate": 8.846432130244282e-05, "loss": 4.6611, "step": 252000 }, { "epoch": 0.23, "learning_rate": 8.844142451019183e-05, "loss": 4.6653, "step": 252500 }, { "epoch": 0.23, "learning_rate": 8.841852771794083e-05, "loss": 4.6504, "step": 253000 }, { "epoch": 0.23, "learning_rate": 8.839563092568985e-05, "loss": 4.6703, "step": 253500 }, { "epoch": 0.23, "learning_rate": 8.837273413343885e-05, "loss": 4.6781, "step": 254000 }, { "epoch": 0.23, "learning_rate": 8.834988313477236e-05, "loss": 4.6634, "step": 254500 }, { "epoch": 0.23, "learning_rate": 8.832698634252137e-05, "loss": 4.6633, "step": 255000 }, { "epoch": 0.23, "learning_rate": 8.830408955027037e-05, "loss": 4.6825, "step": 255500 }, { "epoch": 0.23, "learning_rate": 8.828119275801938e-05, "loss": 4.6819, "step": 256000 }, { "epoch": 0.23, "learning_rate": 8.825834175935288e-05, "loss": 4.665, "step": 256500 }, { "epoch": 0.24, "learning_rate": 8.823544496710189e-05, "loss": 4.6716, "step": 257000 }, { "epoch": 0.24, "learning_rate": 8.821254817485089e-05, "loss": 4.666, "step": 257500 }, { "epoch": 0.24, "learning_rate": 8.81896513825999e-05, "loss": 4.6797, "step": 258000 }, { "epoch": 0.24, "learning_rate": 8.816675459034891e-05, "loss": 4.6855, "step": 258500 }, { "epoch": 0.24, "learning_rate": 8.814385779809792e-05, "loss": 4.6556, "step": 259000 }, { "epoch": 0.24, "learning_rate": 8.812100679943143e-05, "loss": 4.6546, "step": 259500 }, { "epoch": 0.24, "learning_rate": 8.809811000718043e-05, "loss": 4.6736, "step": 260000 }, { "epoch": 0.24, "learning_rate": 8.807521321492944e-05, "loss": 4.6548, "step": 260500 }, { "epoch": 0.24, "learning_rate": 8.805231642267845e-05, "loss": 4.6674, "step": 261000 }, { "epoch": 0.24, "learning_rate": 8.802941963042746e-05, "loss": 4.6686, "step": 261500 }, { "epoch": 0.24, "learning_rate": 8.800652283817646e-05, "loss": 4.654, "step": 262000 }, { "epoch": 0.24, "learning_rate": 8.798362604592547e-05, "loss": 4.6613, "step": 262500 }, { "epoch": 0.24, "learning_rate": 8.796072925367447e-05, "loss": 4.6579, "step": 263000 }, { "epoch": 0.24, "learning_rate": 8.793783246142348e-05, "loss": 4.6778, "step": 263500 }, { "epoch": 0.24, "learning_rate": 8.79149356691725e-05, "loss": 4.6662, "step": 264000 }, { "epoch": 0.24, "learning_rate": 8.78920388769215e-05, "loss": 4.6614, "step": 264500 }, { "epoch": 0.24, "learning_rate": 8.786914208467051e-05, "loss": 4.6636, "step": 265000 }, { "epoch": 0.24, "learning_rate": 8.784629108600401e-05, "loss": 4.6584, "step": 265500 }, { "epoch": 0.24, "learning_rate": 8.782339429375302e-05, "loss": 4.6654, "step": 266000 }, { "epoch": 0.24, "learning_rate": 8.780049750150204e-05, "loss": 4.6383, "step": 266500 }, { "epoch": 0.24, "learning_rate": 8.777760070925104e-05, "loss": 4.6584, "step": 267000 }, { "epoch": 0.24, "learning_rate": 8.775474971058455e-05, "loss": 4.6792, "step": 267500 }, { "epoch": 0.25, "learning_rate": 8.773185291833355e-05, "loss": 4.6607, "step": 268000 }, { "epoch": 0.25, "learning_rate": 8.770895612608256e-05, "loss": 4.6578, "step": 268500 }, { "epoch": 0.25, "learning_rate": 8.768605933383158e-05, "loss": 4.6616, "step": 269000 }, { "epoch": 0.25, "learning_rate": 8.766316254158058e-05, "loss": 4.6598, "step": 269500 }, { "epoch": 0.25, "learning_rate": 8.764031154291409e-05, "loss": 4.6522, "step": 270000 }, { "epoch": 0.25, "learning_rate": 8.761741475066309e-05, "loss": 4.6665, "step": 270500 }, { "epoch": 0.25, "learning_rate": 8.75945179584121e-05, "loss": 4.6521, "step": 271000 }, { "epoch": 0.25, "learning_rate": 8.757162116616112e-05, "loss": 4.6564, "step": 271500 }, { "epoch": 0.25, "learning_rate": 8.754872437391012e-05, "loss": 4.6587, "step": 272000 }, { "epoch": 0.25, "learning_rate": 8.752587337524363e-05, "loss": 4.6491, "step": 272500 }, { "epoch": 0.25, "learning_rate": 8.750297658299263e-05, "loss": 4.6492, "step": 273000 }, { "epoch": 0.25, "learning_rate": 8.748012558432615e-05, "loss": 4.6553, "step": 273500 }, { "epoch": 0.25, "learning_rate": 8.745722879207516e-05, "loss": 4.652, "step": 274000 }, { "epoch": 0.25, "learning_rate": 8.743433199982416e-05, "loss": 4.6655, "step": 274500 }, { "epoch": 0.25, "learning_rate": 8.741143520757317e-05, "loss": 4.6577, "step": 275000 }, { "epoch": 0.25, "learning_rate": 8.738853841532217e-05, "loss": 4.6514, "step": 275500 }, { "epoch": 0.25, "learning_rate": 8.736564162307118e-05, "loss": 4.6559, "step": 276000 }, { "epoch": 0.25, "learning_rate": 8.73427448308202e-05, "loss": 4.6471, "step": 276500 }, { "epoch": 0.25, "learning_rate": 8.73198480385692e-05, "loss": 4.6578, "step": 277000 }, { "epoch": 0.25, "learning_rate": 8.72969512463182e-05, "loss": 4.6565, "step": 277500 }, { "epoch": 0.25, "learning_rate": 8.727405445406721e-05, "loss": 4.6543, "step": 278000 }, { "epoch": 0.26, "learning_rate": 8.725120345540072e-05, "loss": 4.6495, "step": 278500 }, { "epoch": 0.26, "learning_rate": 8.722830666314973e-05, "loss": 4.6636, "step": 279000 }, { "epoch": 0.26, "learning_rate": 8.720540987089874e-05, "loss": 4.6437, "step": 279500 }, { "epoch": 0.26, "learning_rate": 8.718251307864774e-05, "loss": 4.6527, "step": 280000 }, { "epoch": 0.26, "learning_rate": 8.715966207998125e-05, "loss": 4.6434, "step": 280500 }, { "epoch": 0.26, "learning_rate": 8.713681108131476e-05, "loss": 4.6564, "step": 281000 }, { "epoch": 0.26, "learning_rate": 8.711391428906376e-05, "loss": 4.6476, "step": 281500 }, { "epoch": 0.26, "learning_rate": 8.709101749681277e-05, "loss": 4.6617, "step": 282000 }, { "epoch": 0.26, "learning_rate": 8.706812070456177e-05, "loss": 4.6642, "step": 282500 }, { "epoch": 0.26, "learning_rate": 8.704522391231078e-05, "loss": 4.646, "step": 283000 }, { "epoch": 0.26, "learning_rate": 8.70223271200598e-05, "loss": 4.6217, "step": 283500 }, { "epoch": 0.26, "learning_rate": 8.69994303278088e-05, "loss": 4.656, "step": 284000 }, { "epoch": 0.26, "learning_rate": 8.69765335355578e-05, "loss": 4.6464, "step": 284500 }, { "epoch": 0.26, "learning_rate": 8.695363674330681e-05, "loss": 4.6532, "step": 285000 }, { "epoch": 0.26, "learning_rate": 8.693073995105581e-05, "loss": 4.6377, "step": 285500 }, { "epoch": 0.26, "learning_rate": 8.690784315880482e-05, "loss": 4.6475, "step": 286000 }, { "epoch": 0.26, "learning_rate": 8.688494636655384e-05, "loss": 4.6538, "step": 286500 }, { "epoch": 0.26, "learning_rate": 8.686204957430284e-05, "loss": 4.6526, "step": 287000 }, { "epoch": 0.26, "learning_rate": 8.683915278205185e-05, "loss": 4.6565, "step": 287500 }, { "epoch": 0.26, "learning_rate": 8.681625598980085e-05, "loss": 4.6445, "step": 288000 }, { "epoch": 0.26, "learning_rate": 8.679340499113436e-05, "loss": 4.6463, "step": 288500 }, { "epoch": 0.26, "learning_rate": 8.677050819888338e-05, "loss": 4.6335, "step": 289000 }, { "epoch": 0.27, "learning_rate": 8.674761140663238e-05, "loss": 4.634, "step": 289500 }, { "epoch": 0.27, "learning_rate": 8.672471461438139e-05, "loss": 4.6498, "step": 290000 }, { "epoch": 0.27, "learning_rate": 8.670181782213039e-05, "loss": 4.6323, "step": 290500 }, { "epoch": 0.27, "learning_rate": 8.66789210298794e-05, "loss": 4.6375, "step": 291000 }, { "epoch": 0.27, "learning_rate": 8.665607003121292e-05, "loss": 4.6396, "step": 291500 }, { "epoch": 0.27, "learning_rate": 8.663317323896192e-05, "loss": 4.6407, "step": 292000 }, { "epoch": 0.27, "learning_rate": 8.661027644671093e-05, "loss": 4.6481, "step": 292500 }, { "epoch": 0.27, "learning_rate": 8.658737965445993e-05, "loss": 4.638, "step": 293000 }, { "epoch": 0.27, "learning_rate": 8.656448286220894e-05, "loss": 4.6296, "step": 293500 }, { "epoch": 0.27, "learning_rate": 8.654158606995794e-05, "loss": 4.6407, "step": 294000 }, { "epoch": 0.27, "learning_rate": 8.651868927770696e-05, "loss": 4.6418, "step": 294500 }, { "epoch": 0.27, "learning_rate": 8.649579248545597e-05, "loss": 4.6554, "step": 295000 }, { "epoch": 0.27, "learning_rate": 8.647289569320497e-05, "loss": 4.6521, "step": 295500 }, { "epoch": 0.27, "learning_rate": 8.645013628170748e-05, "loss": 4.653, "step": 296000 }, { "epoch": 0.27, "learning_rate": 8.64272394894565e-05, "loss": 4.6392, "step": 296500 }, { "epoch": 0.27, "learning_rate": 8.64043426972055e-05, "loss": 4.6322, "step": 297000 }, { "epoch": 0.27, "learning_rate": 8.63814459049545e-05, "loss": 4.6315, "step": 297500 }, { "epoch": 0.27, "learning_rate": 8.635854911270351e-05, "loss": 4.6496, "step": 298000 }, { "epoch": 0.27, "learning_rate": 8.633565232045252e-05, "loss": 4.6368, "step": 298500 }, { "epoch": 0.27, "learning_rate": 8.631275552820152e-05, "loss": 4.6261, "step": 299000 }, { "epoch": 0.27, "learning_rate": 8.628985873595054e-05, "loss": 4.6399, "step": 299500 }, { "epoch": 0.27, "learning_rate": 8.626696194369954e-05, "loss": 4.6411, "step": 300000 }, { "epoch": 0.28, "learning_rate": 8.624406515144855e-05, "loss": 4.6408, "step": 300500 }, { "epoch": 0.28, "learning_rate": 8.622121415278206e-05, "loss": 4.6405, "step": 301000 }, { "epoch": 0.28, "learning_rate": 8.619831736053106e-05, "loss": 4.6487, "step": 301500 }, { "epoch": 0.28, "learning_rate": 8.617542056828008e-05, "loss": 4.6273, "step": 302000 }, { "epoch": 0.28, "learning_rate": 8.615252377602908e-05, "loss": 4.6293, "step": 302500 }, { "epoch": 0.28, "learning_rate": 8.612962698377809e-05, "loss": 4.6174, "step": 303000 }, { "epoch": 0.28, "learning_rate": 8.61067301915271e-05, "loss": 4.6405, "step": 303500 }, { "epoch": 0.28, "learning_rate": 8.60838333992761e-05, "loss": 4.6293, "step": 304000 }, { "epoch": 0.28, "learning_rate": 8.60609366070251e-05, "loss": 4.6402, "step": 304500 }, { "epoch": 0.28, "learning_rate": 8.603808560835862e-05, "loss": 4.6362, "step": 305000 }, { "epoch": 0.28, "learning_rate": 8.601523460969213e-05, "loss": 4.6163, "step": 305500 }, { "epoch": 0.28, "learning_rate": 8.599233781744112e-05, "loss": 4.6326, "step": 306000 }, { "epoch": 0.28, "learning_rate": 8.596944102519014e-05, "loss": 4.6327, "step": 306500 }, { "epoch": 0.28, "learning_rate": 8.594654423293914e-05, "loss": 4.6481, "step": 307000 }, { "epoch": 0.28, "learning_rate": 8.592364744068815e-05, "loss": 4.6308, "step": 307500 }, { "epoch": 0.28, "learning_rate": 8.590075064843715e-05, "loss": 4.6336, "step": 308000 }, { "epoch": 0.28, "learning_rate": 8.587789964977066e-05, "loss": 4.6358, "step": 308500 }, { "epoch": 0.28, "learning_rate": 8.585500285751968e-05, "loss": 4.636, "step": 309000 }, { "epoch": 0.28, "learning_rate": 8.583210606526868e-05, "loss": 4.6205, "step": 309500 }, { "epoch": 0.28, "learning_rate": 8.580920927301769e-05, "loss": 4.6351, "step": 310000 }, { "epoch": 0.28, "learning_rate": 8.57863124807667e-05, "loss": 4.6474, "step": 310500 }, { "epoch": 0.28, "learning_rate": 8.57634614821002e-05, "loss": 4.6371, "step": 311000 }, { "epoch": 0.29, "learning_rate": 8.574056468984922e-05, "loss": 4.6363, "step": 311500 }, { "epoch": 0.29, "learning_rate": 8.571771369118272e-05, "loss": 4.6324, "step": 312000 }, { "epoch": 0.29, "learning_rate": 8.569481689893173e-05, "loss": 4.6251, "step": 312500 }, { "epoch": 0.29, "learning_rate": 8.567192010668073e-05, "loss": 4.626, "step": 313000 }, { "epoch": 0.29, "learning_rate": 8.564902331442974e-05, "loss": 4.6245, "step": 313500 }, { "epoch": 0.29, "learning_rate": 8.562612652217876e-05, "loss": 4.6382, "step": 314000 }, { "epoch": 0.29, "learning_rate": 8.560322972992776e-05, "loss": 4.6282, "step": 314500 }, { "epoch": 0.29, "learning_rate": 8.558037873126127e-05, "loss": 4.6226, "step": 315000 }, { "epoch": 0.29, "learning_rate": 8.555748193901027e-05, "loss": 4.6274, "step": 315500 }, { "epoch": 0.29, "learning_rate": 8.553463094034378e-05, "loss": 4.6222, "step": 316000 }, { "epoch": 0.29, "learning_rate": 8.55117341480928e-05, "loss": 4.6261, "step": 316500 }, { "epoch": 0.29, "learning_rate": 8.54888373558418e-05, "loss": 4.6291, "step": 317000 }, { "epoch": 0.29, "learning_rate": 8.546594056359081e-05, "loss": 4.6235, "step": 317500 }, { "epoch": 0.29, "learning_rate": 8.544304377133981e-05, "loss": 4.6252, "step": 318000 }, { "epoch": 0.29, "learning_rate": 8.542014697908882e-05, "loss": 4.6219, "step": 318500 }, { "epoch": 0.29, "learning_rate": 8.539725018683784e-05, "loss": 4.6212, "step": 319000 }, { "epoch": 0.29, "learning_rate": 8.537435339458684e-05, "loss": 4.6196, "step": 319500 }, { "epoch": 0.29, "learning_rate": 8.535145660233585e-05, "loss": 4.6138, "step": 320000 }, { "epoch": 0.29, "learning_rate": 8.532855981008485e-05, "loss": 4.6091, "step": 320500 }, { "epoch": 0.29, "learning_rate": 8.530570881141836e-05, "loss": 4.6195, "step": 321000 }, { "epoch": 0.29, "learning_rate": 8.528281201916736e-05, "loss": 4.6347, "step": 321500 }, { "epoch": 0.29, "learning_rate": 8.525991522691638e-05, "loss": 4.6343, "step": 322000 }, { "epoch": 0.3, "learning_rate": 8.523701843466539e-05, "loss": 4.6422, "step": 322500 }, { "epoch": 0.3, "learning_rate": 8.521412164241439e-05, "loss": 4.6266, "step": 323000 }, { "epoch": 0.3, "learning_rate": 8.51912248501634e-05, "loss": 4.6285, "step": 323500 }, { "epoch": 0.3, "learning_rate": 8.51683280579124e-05, "loss": 4.626, "step": 324000 }, { "epoch": 0.3, "learning_rate": 8.514543126566142e-05, "loss": 4.6172, "step": 324500 }, { "epoch": 0.3, "learning_rate": 8.512253447341042e-05, "loss": 4.6202, "step": 325000 }, { "epoch": 0.3, "learning_rate": 8.509963768115943e-05, "loss": 4.6116, "step": 325500 }, { "epoch": 0.3, "learning_rate": 8.507678668249293e-05, "loss": 4.6084, "step": 326000 }, { "epoch": 0.3, "learning_rate": 8.505393568382644e-05, "loss": 4.6178, "step": 326500 }, { "epoch": 0.3, "learning_rate": 8.503103889157546e-05, "loss": 4.6282, "step": 327000 }, { "epoch": 0.3, "learning_rate": 8.500814209932446e-05, "loss": 4.6111, "step": 327500 }, { "epoch": 0.3, "learning_rate": 8.498524530707347e-05, "loss": 4.618, "step": 328000 }, { "epoch": 0.3, "learning_rate": 8.496234851482247e-05, "loss": 4.6023, "step": 328500 }, { "epoch": 0.3, "learning_rate": 8.493945172257148e-05, "loss": 4.6387, "step": 329000 }, { "epoch": 0.3, "learning_rate": 8.491655493032048e-05, "loss": 4.6187, "step": 329500 }, { "epoch": 0.3, "learning_rate": 8.48936581380695e-05, "loss": 4.6279, "step": 330000 }, { "epoch": 0.3, "learning_rate": 8.4870807139403e-05, "loss": 4.6116, "step": 330500 }, { "epoch": 0.3, "learning_rate": 8.4847910347152e-05, "loss": 4.6325, "step": 331000 }, { "epoch": 0.3, "learning_rate": 8.4825013554901e-05, "loss": 4.6297, "step": 331500 }, { "epoch": 0.3, "learning_rate": 8.480211676265002e-05, "loss": 4.6239, "step": 332000 }, { "epoch": 0.3, "learning_rate": 8.477921997039903e-05, "loss": 4.6297, "step": 332500 }, { "epoch": 0.3, "learning_rate": 8.475632317814803e-05, "loss": 4.6087, "step": 333000 }, { "epoch": 0.31, "learning_rate": 8.473342638589704e-05, "loss": 4.615, "step": 333500 }, { "epoch": 0.31, "learning_rate": 8.471052959364604e-05, "loss": 4.6136, "step": 334000 }, { "epoch": 0.31, "learning_rate": 8.468767859497956e-05, "loss": 4.6153, "step": 334500 }, { "epoch": 0.31, "learning_rate": 8.466478180272857e-05, "loss": 4.6311, "step": 335000 }, { "epoch": 0.31, "learning_rate": 8.464188501047757e-05, "loss": 4.6269, "step": 335500 }, { "epoch": 0.31, "learning_rate": 8.461898821822658e-05, "loss": 4.6227, "step": 336000 }, { "epoch": 0.31, "learning_rate": 8.459609142597558e-05, "loss": 4.618, "step": 336500 }, { "epoch": 0.31, "learning_rate": 8.45731946337246e-05, "loss": 4.6152, "step": 337000 }, { "epoch": 0.31, "learning_rate": 8.455029784147361e-05, "loss": 4.6142, "step": 337500 }, { "epoch": 0.31, "learning_rate": 8.452740104922261e-05, "loss": 4.6308, "step": 338000 }, { "epoch": 0.31, "learning_rate": 8.450455005055612e-05, "loss": 4.6287, "step": 338500 }, { "epoch": 0.31, "learning_rate": 8.448165325830512e-05, "loss": 4.6283, "step": 339000 }, { "epoch": 0.31, "learning_rate": 8.445875646605413e-05, "loss": 4.619, "step": 339500 }, { "epoch": 0.31, "learning_rate": 8.443590546738765e-05, "loss": 4.6181, "step": 340000 }, { "epoch": 0.31, "learning_rate": 8.441300867513665e-05, "loss": 4.6102, "step": 340500 }, { "epoch": 0.31, "learning_rate": 8.439011188288566e-05, "loss": 4.607, "step": 341000 }, { "epoch": 0.31, "learning_rate": 8.436721509063466e-05, "loss": 4.6017, "step": 341500 }, { "epoch": 0.31, "learning_rate": 8.434431829838367e-05, "loss": 4.617, "step": 342000 }, { "epoch": 0.31, "learning_rate": 8.432146729971719e-05, "loss": 4.6041, "step": 342500 }, { "epoch": 0.31, "learning_rate": 8.429857050746619e-05, "loss": 4.6075, "step": 343000 }, { "epoch": 0.31, "learning_rate": 8.42756737152152e-05, "loss": 4.6157, "step": 343500 }, { "epoch": 0.32, "learning_rate": 8.42527769229642e-05, "loss": 4.6175, "step": 344000 }, { "epoch": 0.32, "learning_rate": 8.42298801307132e-05, "loss": 4.6139, "step": 344500 }, { "epoch": 0.32, "learning_rate": 8.420698333846222e-05, "loss": 4.6043, "step": 345000 }, { "epoch": 0.32, "learning_rate": 8.418408654621123e-05, "loss": 4.6386, "step": 345500 }, { "epoch": 0.32, "learning_rate": 8.416118975396023e-05, "loss": 4.6122, "step": 346000 }, { "epoch": 0.32, "learning_rate": 8.413833875529374e-05, "loss": 4.6106, "step": 346500 }, { "epoch": 0.32, "learning_rate": 8.411544196304275e-05, "loss": 4.6218, "step": 347000 }, { "epoch": 0.32, "learning_rate": 8.409254517079176e-05, "loss": 4.6195, "step": 347500 }, { "epoch": 0.32, "learning_rate": 8.406964837854077e-05, "loss": 4.628, "step": 348000 }, { "epoch": 0.32, "learning_rate": 8.404675158628977e-05, "loss": 4.608, "step": 348500 }, { "epoch": 0.32, "learning_rate": 8.402385479403878e-05, "loss": 4.6176, "step": 349000 }, { "epoch": 0.32, "learning_rate": 8.400095800178778e-05, "loss": 4.6119, "step": 349500 }, { "epoch": 0.32, "learning_rate": 8.397806120953679e-05, "loss": 4.6197, "step": 350000 }, { "epoch": 0.32, "learning_rate": 8.395521021087031e-05, "loss": 4.6084, "step": 350500 }, { "epoch": 0.32, "learning_rate": 8.393231341861931e-05, "loss": 4.6051, "step": 351000 }, { "epoch": 0.32, "learning_rate": 8.390946241995282e-05, "loss": 4.6159, "step": 351500 }, { "epoch": 0.32, "learning_rate": 8.388656562770182e-05, "loss": 4.6103, "step": 352000 }, { "epoch": 0.32, "learning_rate": 8.386366883545084e-05, "loss": 4.6261, "step": 352500 }, { "epoch": 0.32, "learning_rate": 8.384077204319985e-05, "loss": 4.6091, "step": 353000 }, { "epoch": 0.32, "learning_rate": 8.381787525094885e-05, "loss": 4.6031, "step": 353500 }, { "epoch": 0.32, "learning_rate": 8.379497845869786e-05, "loss": 4.6035, "step": 354000 }, { "epoch": 0.32, "learning_rate": 8.377208166644686e-05, "loss": 4.5985, "step": 354500 }, { "epoch": 0.33, "learning_rate": 8.374918487419587e-05, "loss": 4.622, "step": 355000 }, { "epoch": 0.33, "learning_rate": 8.372628808194489e-05, "loss": 4.6241, "step": 355500 }, { "epoch": 0.33, "learning_rate": 8.370339128969388e-05, "loss": 4.6098, "step": 356000 }, { "epoch": 0.33, "learning_rate": 8.368054029102738e-05, "loss": 4.6032, "step": 356500 }, { "epoch": 0.33, "learning_rate": 8.365764349877639e-05, "loss": 4.6162, "step": 357000 }, { "epoch": 0.33, "learning_rate": 8.363474670652541e-05, "loss": 4.6138, "step": 357500 }, { "epoch": 0.33, "learning_rate": 8.361184991427441e-05, "loss": 4.614, "step": 358000 }, { "epoch": 0.33, "learning_rate": 8.358895312202342e-05, "loss": 4.5968, "step": 358500 }, { "epoch": 0.33, "learning_rate": 8.356610212335692e-05, "loss": 4.5913, "step": 359000 }, { "epoch": 0.33, "learning_rate": 8.354320533110593e-05, "loss": 4.6181, "step": 359500 }, { "epoch": 0.33, "learning_rate": 8.352030853885495e-05, "loss": 4.6084, "step": 360000 }, { "epoch": 0.33, "learning_rate": 8.349741174660395e-05, "loss": 4.6179, "step": 360500 }, { "epoch": 0.33, "learning_rate": 8.347451495435296e-05, "loss": 4.6327, "step": 361000 }, { "epoch": 0.33, "learning_rate": 8.345166395568646e-05, "loss": 4.5855, "step": 361500 }, { "epoch": 0.33, "learning_rate": 8.342876716343547e-05, "loss": 4.6119, "step": 362000 }, { "epoch": 0.33, "learning_rate": 8.340587037118449e-05, "loss": 4.6073, "step": 362500 }, { "epoch": 0.33, "learning_rate": 8.338297357893349e-05, "loss": 4.617, "step": 363000 }, { "epoch": 0.33, "learning_rate": 8.33600767866825e-05, "loss": 4.5957, "step": 363500 }, { "epoch": 0.33, "learning_rate": 8.33371799944315e-05, "loss": 4.6044, "step": 364000 }, { "epoch": 0.33, "learning_rate": 8.33142832021805e-05, "loss": 4.6085, "step": 364500 }, { "epoch": 0.33, "learning_rate": 8.329138640992951e-05, "loss": 4.5961, "step": 365000 }, { "epoch": 0.33, "learning_rate": 8.326853541126303e-05, "loss": 4.5876, "step": 365500 }, { "epoch": 0.34, "learning_rate": 8.324563861901204e-05, "loss": 4.592, "step": 366000 }, { "epoch": 0.34, "learning_rate": 8.322274182676104e-05, "loss": 4.5889, "step": 366500 }, { "epoch": 0.34, "learning_rate": 8.319984503451005e-05, "loss": 4.5954, "step": 367000 }, { "epoch": 0.34, "learning_rate": 8.317694824225905e-05, "loss": 4.6036, "step": 367500 }, { "epoch": 0.34, "learning_rate": 8.315405145000807e-05, "loss": 4.5898, "step": 368000 }, { "epoch": 0.34, "learning_rate": 8.313120045134157e-05, "loss": 4.6, "step": 368500 }, { "epoch": 0.34, "learning_rate": 8.310834945267508e-05, "loss": 4.5979, "step": 369000 }, { "epoch": 0.34, "learning_rate": 8.308545266042409e-05, "loss": 4.5892, "step": 369500 }, { "epoch": 0.34, "learning_rate": 8.30625558681731e-05, "loss": 4.6034, "step": 370000 }, { "epoch": 0.34, "learning_rate": 8.303965907592211e-05, "loss": 4.5901, "step": 370500 }, { "epoch": 0.34, "learning_rate": 8.301676228367111e-05, "loss": 4.5885, "step": 371000 }, { "epoch": 0.34, "learning_rate": 8.299391128500462e-05, "loss": 4.5984, "step": 371500 }, { "epoch": 0.34, "learning_rate": 8.297101449275362e-05, "loss": 4.5994, "step": 372000 }, { "epoch": 0.34, "learning_rate": 8.294811770050263e-05, "loss": 4.6071, "step": 372500 }, { "epoch": 0.34, "learning_rate": 8.292522090825165e-05, "loss": 4.5987, "step": 373000 }, { "epoch": 0.34, "learning_rate": 8.290232411600065e-05, "loss": 4.5988, "step": 373500 }, { "epoch": 0.34, "learning_rate": 8.287942732374966e-05, "loss": 4.5998, "step": 374000 }, { "epoch": 0.34, "learning_rate": 8.285653053149866e-05, "loss": 4.5928, "step": 374500 }, { "epoch": 0.34, "learning_rate": 8.283363373924767e-05, "loss": 4.5908, "step": 375000 }, { "epoch": 0.34, "learning_rate": 8.281073694699669e-05, "loss": 4.6028, "step": 375500 }, { "epoch": 0.34, "learning_rate": 8.278784015474569e-05, "loss": 4.596, "step": 376000 }, { "epoch": 0.34, "learning_rate": 8.27649891560792e-05, "loss": 4.5929, "step": 376500 }, { "epoch": 0.35, "learning_rate": 8.27420923638282e-05, "loss": 4.5929, "step": 377000 }, { "epoch": 0.35, "learning_rate": 8.271919557157721e-05, "loss": 4.589, "step": 377500 }, { "epoch": 0.35, "learning_rate": 8.269634457291073e-05, "loss": 4.6042, "step": 378000 }, { "epoch": 0.35, "learning_rate": 8.267344778065973e-05, "loss": 4.5838, "step": 378500 }, { "epoch": 0.35, "learning_rate": 8.265055098840874e-05, "loss": 4.6129, "step": 379000 }, { "epoch": 0.35, "learning_rate": 8.262765419615774e-05, "loss": 4.5982, "step": 379500 }, { "epoch": 0.35, "learning_rate": 8.260475740390675e-05, "loss": 4.5797, "step": 380000 }, { "epoch": 0.35, "learning_rate": 8.258186061165575e-05, "loss": 4.588, "step": 380500 }, { "epoch": 0.35, "learning_rate": 8.255896381940476e-05, "loss": 4.5937, "step": 381000 }, { "epoch": 0.35, "learning_rate": 8.253606702715376e-05, "loss": 4.5889, "step": 381500 }, { "epoch": 0.35, "learning_rate": 8.251317023490277e-05, "loss": 4.5966, "step": 382000 }, { "epoch": 0.35, "learning_rate": 8.249027344265177e-05, "loss": 4.6009, "step": 382500 }, { "epoch": 0.35, "learning_rate": 8.246742244398529e-05, "loss": 4.5838, "step": 383000 }, { "epoch": 0.35, "learning_rate": 8.24445256517343e-05, "loss": 4.6107, "step": 383500 }, { "epoch": 0.35, "learning_rate": 8.24216288594833e-05, "loss": 4.6128, "step": 384000 }, { "epoch": 0.35, "learning_rate": 8.239873206723231e-05, "loss": 4.5935, "step": 384500 }, { "epoch": 0.35, "learning_rate": 8.237583527498131e-05, "loss": 4.5895, "step": 385000 }, { "epoch": 0.35, "learning_rate": 8.235298427631483e-05, "loss": 4.6011, "step": 385500 }, { "epoch": 0.35, "learning_rate": 8.233008748406384e-05, "loss": 4.5992, "step": 386000 }, { "epoch": 0.35, "learning_rate": 8.230719069181284e-05, "loss": 4.6073, "step": 386500 }, { "epoch": 0.35, "learning_rate": 8.228429389956185e-05, "loss": 4.5914, "step": 387000 }, { "epoch": 0.35, "learning_rate": 8.226139710731085e-05, "loss": 4.5839, "step": 387500 }, { "epoch": 0.36, "learning_rate": 8.223854610864437e-05, "loss": 4.5873, "step": 388000 }, { "epoch": 0.36, "learning_rate": 8.221569510997788e-05, "loss": 4.5894, "step": 388500 }, { "epoch": 0.36, "learning_rate": 8.219279831772688e-05, "loss": 4.5976, "step": 389000 }, { "epoch": 0.36, "learning_rate": 8.216990152547589e-05, "loss": 4.6015, "step": 389500 }, { "epoch": 0.36, "learning_rate": 8.214700473322489e-05, "loss": 4.5936, "step": 390000 }, { "epoch": 0.36, "learning_rate": 8.212410794097391e-05, "loss": 4.586, "step": 390500 }, { "epoch": 0.36, "learning_rate": 8.210125694230742e-05, "loss": 4.5947, "step": 391000 }, { "epoch": 0.36, "learning_rate": 8.207836015005642e-05, "loss": 4.5821, "step": 391500 }, { "epoch": 0.36, "learning_rate": 8.205546335780543e-05, "loss": 4.5971, "step": 392000 }, { "epoch": 0.36, "learning_rate": 8.203256656555443e-05, "loss": 4.5956, "step": 392500 }, { "epoch": 0.36, "learning_rate": 8.200966977330345e-05, "loss": 4.5958, "step": 393000 }, { "epoch": 0.36, "learning_rate": 8.198681877463695e-05, "loss": 4.604, "step": 393500 }, { "epoch": 0.36, "learning_rate": 8.196392198238596e-05, "loss": 4.5874, "step": 394000 }, { "epoch": 0.36, "learning_rate": 8.194102519013496e-05, "loss": 4.5954, "step": 394500 }, { "epoch": 0.36, "learning_rate": 8.191812839788397e-05, "loss": 4.5907, "step": 395000 }, { "epoch": 0.36, "learning_rate": 8.189523160563299e-05, "loss": 4.5896, "step": 395500 }, { "epoch": 0.36, "learning_rate": 8.1872334813382e-05, "loss": 4.5966, "step": 396000 }, { "epoch": 0.36, "learning_rate": 8.1849438021131e-05, "loss": 4.5887, "step": 396500 }, { "epoch": 0.36, "learning_rate": 8.182654122888e-05, "loss": 4.5907, "step": 397000 }, { "epoch": 0.36, "learning_rate": 8.180369023021351e-05, "loss": 4.5904, "step": 397500 }, { "epoch": 0.36, "learning_rate": 8.178079343796253e-05, "loss": 4.5911, "step": 398000 }, { "epoch": 0.36, "learning_rate": 8.175789664571153e-05, "loss": 4.5894, "step": 398500 }, { "epoch": 0.37, "learning_rate": 8.173499985346054e-05, "loss": 4.5895, "step": 399000 }, { "epoch": 0.37, "learning_rate": 8.171210306120954e-05, "loss": 4.5827, "step": 399500 }, { "epoch": 0.37, "learning_rate": 8.168925206254305e-05, "loss": 4.5808, "step": 400000 }, { "epoch": 0.37, "learning_rate": 8.166640106387657e-05, "loss": 4.5848, "step": 400500 }, { "epoch": 0.37, "learning_rate": 8.164350427162557e-05, "loss": 4.5932, "step": 401000 }, { "epoch": 0.37, "learning_rate": 8.162060747937458e-05, "loss": 4.5781, "step": 401500 }, { "epoch": 0.37, "learning_rate": 8.159771068712358e-05, "loss": 4.5793, "step": 402000 }, { "epoch": 0.37, "learning_rate": 8.157481389487259e-05, "loss": 4.5755, "step": 402500 }, { "epoch": 0.37, "learning_rate": 8.155196289620611e-05, "loss": 4.586, "step": 403000 }, { "epoch": 0.37, "learning_rate": 8.152906610395511e-05, "loss": 4.5719, "step": 403500 }, { "epoch": 0.37, "learning_rate": 8.15061693117041e-05, "loss": 4.5961, "step": 404000 }, { "epoch": 0.37, "learning_rate": 8.148327251945311e-05, "loss": 4.5871, "step": 404500 }, { "epoch": 0.37, "learning_rate": 8.146037572720211e-05, "loss": 4.5826, "step": 405000 }, { "epoch": 0.37, "learning_rate": 8.143747893495113e-05, "loss": 4.5868, "step": 405500 }, { "epoch": 0.37, "learning_rate": 8.141462793628464e-05, "loss": 4.5922, "step": 406000 }, { "epoch": 0.37, "learning_rate": 8.139173114403364e-05, "loss": 4.5829, "step": 406500 }, { "epoch": 0.37, "learning_rate": 8.136883435178265e-05, "loss": 4.5958, "step": 407000 }, { "epoch": 0.37, "learning_rate": 8.134593755953165e-05, "loss": 4.5815, "step": 407500 }, { "epoch": 0.37, "learning_rate": 8.132304076728067e-05, "loss": 4.5791, "step": 408000 }, { "epoch": 0.37, "learning_rate": 8.130014397502968e-05, "loss": 4.5957, "step": 408500 }, { "epoch": 0.37, "learning_rate": 8.127724718277868e-05, "loss": 4.5963, "step": 409000 }, { "epoch": 0.38, "learning_rate": 8.125435039052769e-05, "loss": 4.589, "step": 409500 }, { "epoch": 0.38, "learning_rate": 8.123145359827669e-05, "loss": 4.5776, "step": 410000 }, { "epoch": 0.38, "learning_rate": 8.120855680602571e-05, "loss": 4.58, "step": 410500 }, { "epoch": 0.38, "learning_rate": 8.118566001377472e-05, "loss": 4.5863, "step": 411000 }, { "epoch": 0.38, "learning_rate": 8.116276322152372e-05, "loss": 4.5816, "step": 411500 }, { "epoch": 0.38, "learning_rate": 8.113991222285723e-05, "loss": 4.5746, "step": 412000 }, { "epoch": 0.38, "learning_rate": 8.111706122419073e-05, "loss": 4.5765, "step": 412500 }, { "epoch": 0.38, "learning_rate": 8.109416443193975e-05, "loss": 4.5712, "step": 413000 }, { "epoch": 0.38, "learning_rate": 8.107126763968876e-05, "loss": 4.5776, "step": 413500 }, { "epoch": 0.38, "learning_rate": 8.104837084743776e-05, "loss": 4.584, "step": 414000 }, { "epoch": 0.38, "learning_rate": 8.102547405518677e-05, "loss": 4.5958, "step": 414500 }, { "epoch": 0.38, "learning_rate": 8.100257726293577e-05, "loss": 4.5776, "step": 415000 }, { "epoch": 0.38, "learning_rate": 8.097968047068478e-05, "loss": 4.5665, "step": 415500 }, { "epoch": 0.38, "learning_rate": 8.09567836784338e-05, "loss": 4.584, "step": 416000 }, { "epoch": 0.38, "learning_rate": 8.09339326797673e-05, "loss": 4.5678, "step": 416500 }, { "epoch": 0.38, "learning_rate": 8.09110358875163e-05, "loss": 4.5839, "step": 417000 }, { "epoch": 0.38, "learning_rate": 8.088813909526531e-05, "loss": 4.5762, "step": 417500 }, { "epoch": 0.38, "learning_rate": 8.086524230301432e-05, "loss": 4.5892, "step": 418000 }, { "epoch": 0.38, "learning_rate": 8.084234551076333e-05, "loss": 4.5718, "step": 418500 }, { "epoch": 0.38, "learning_rate": 8.081949451209684e-05, "loss": 4.574, "step": 419000 }, { "epoch": 0.38, "learning_rate": 8.079659771984584e-05, "loss": 4.5815, "step": 419500 }, { "epoch": 0.38, "learning_rate": 8.077370092759485e-05, "loss": 4.5789, "step": 420000 }, { "epoch": 0.39, "learning_rate": 8.075080413534385e-05, "loss": 4.5798, "step": 420500 }, { "epoch": 0.39, "learning_rate": 8.072795313667737e-05, "loss": 4.5852, "step": 421000 }, { "epoch": 0.39, "learning_rate": 8.070505634442638e-05, "loss": 4.5742, "step": 421500 }, { "epoch": 0.39, "learning_rate": 8.068215955217538e-05, "loss": 4.5804, "step": 422000 }, { "epoch": 0.39, "learning_rate": 8.065926275992439e-05, "loss": 4.5866, "step": 422500 }, { "epoch": 0.39, "learning_rate": 8.06363659676734e-05, "loss": 4.5731, "step": 423000 }, { "epoch": 0.39, "learning_rate": 8.061346917542241e-05, "loss": 4.5768, "step": 423500 }, { "epoch": 0.39, "learning_rate": 8.059066397034042e-05, "loss": 4.5762, "step": 424000 }, { "epoch": 0.39, "learning_rate": 8.056776717808942e-05, "loss": 4.5823, "step": 424500 }, { "epoch": 0.39, "learning_rate": 8.054487038583843e-05, "loss": 4.5757, "step": 425000 }, { "epoch": 0.39, "learning_rate": 8.052197359358745e-05, "loss": 4.5723, "step": 425500 }, { "epoch": 0.39, "learning_rate": 8.049907680133645e-05, "loss": 4.5979, "step": 426000 }, { "epoch": 0.39, "learning_rate": 8.047618000908546e-05, "loss": 4.5851, "step": 426500 }, { "epoch": 0.39, "learning_rate": 8.045328321683446e-05, "loss": 4.5632, "step": 427000 }, { "epoch": 0.39, "learning_rate": 8.043038642458347e-05, "loss": 4.5904, "step": 427500 }, { "epoch": 0.39, "learning_rate": 8.040753542591699e-05, "loss": 4.581, "step": 428000 }, { "epoch": 0.39, "learning_rate": 8.038463863366598e-05, "loss": 4.5808, "step": 428500 }, { "epoch": 0.39, "learning_rate": 8.036174184141498e-05, "loss": 4.5756, "step": 429000 }, { "epoch": 0.39, "learning_rate": 8.033884504916399e-05, "loss": 4.5658, "step": 429500 }, { "epoch": 0.39, "learning_rate": 8.03159940504975e-05, "loss": 4.5819, "step": 430000 }, { "epoch": 0.39, "learning_rate": 8.029309725824651e-05, "loss": 4.5888, "step": 430500 }, { "epoch": 0.39, "learning_rate": 8.027020046599552e-05, "loss": 4.5719, "step": 431000 }, { "epoch": 0.4, "learning_rate": 8.024734946732902e-05, "loss": 4.574, "step": 431500 }, { "epoch": 0.4, "learning_rate": 8.022445267507803e-05, "loss": 4.5736, "step": 432000 }, { "epoch": 0.4, "learning_rate": 8.020155588282703e-05, "loss": 4.5823, "step": 432500 }, { "epoch": 0.4, "learning_rate": 8.017865909057605e-05, "loss": 4.5895, "step": 433000 }, { "epoch": 0.4, "learning_rate": 8.015576229832506e-05, "loss": 4.5695, "step": 433500 }, { "epoch": 0.4, "learning_rate": 8.013286550607406e-05, "loss": 4.5843, "step": 434000 }, { "epoch": 0.4, "learning_rate": 8.010996871382307e-05, "loss": 4.5808, "step": 434500 }, { "epoch": 0.4, "learning_rate": 8.008707192157207e-05, "loss": 4.5852, "step": 435000 }, { "epoch": 0.4, "learning_rate": 8.006417512932109e-05, "loss": 4.5786, "step": 435500 }, { "epoch": 0.4, "learning_rate": 8.00412783370701e-05, "loss": 4.5881, "step": 436000 }, { "epoch": 0.4, "learning_rate": 8.00183815448191e-05, "loss": 4.5682, "step": 436500 }, { "epoch": 0.4, "learning_rate": 7.99954847525681e-05, "loss": 4.5615, "step": 437000 }, { "epoch": 0.4, "learning_rate": 7.997258796031711e-05, "loss": 4.5721, "step": 437500 }, { "epoch": 0.4, "learning_rate": 7.994973696165063e-05, "loss": 4.5786, "step": 438000 }, { "epoch": 0.4, "learning_rate": 7.992693175656864e-05, "loss": 4.575, "step": 438500 }, { "epoch": 0.4, "learning_rate": 7.990403496431764e-05, "loss": 4.5544, "step": 439000 }, { "epoch": 0.4, "learning_rate": 7.988113817206665e-05, "loss": 4.565, "step": 439500 }, { "epoch": 0.4, "learning_rate": 7.985824137981565e-05, "loss": 4.5568, "step": 440000 }, { "epoch": 0.4, "learning_rate": 7.983534458756467e-05, "loss": 4.5711, "step": 440500 }, { "epoch": 0.4, "learning_rate": 7.981244779531367e-05, "loss": 4.5771, "step": 441000 }, { "epoch": 0.4, "learning_rate": 7.978955100306268e-05, "loss": 4.5804, "step": 441500 }, { "epoch": 0.4, "learning_rate": 7.976665421081168e-05, "loss": 4.573, "step": 442000 }, { "epoch": 0.41, "learning_rate": 7.974375741856069e-05, "loss": 4.5795, "step": 442500 }, { "epoch": 0.41, "learning_rate": 7.97208606263097e-05, "loss": 4.5832, "step": 443000 }, { "epoch": 0.41, "learning_rate": 7.969800962764321e-05, "loss": 4.5672, "step": 443500 }, { "epoch": 0.41, "learning_rate": 7.967511283539222e-05, "loss": 4.5591, "step": 444000 }, { "epoch": 0.41, "learning_rate": 7.965221604314122e-05, "loss": 4.5673, "step": 444500 }, { "epoch": 0.41, "learning_rate": 7.962931925089023e-05, "loss": 4.5655, "step": 445000 }, { "epoch": 0.41, "learning_rate": 7.960642245863923e-05, "loss": 4.5721, "step": 445500 }, { "epoch": 0.41, "learning_rate": 7.958352566638825e-05, "loss": 4.5587, "step": 446000 }, { "epoch": 0.41, "learning_rate": 7.956062887413726e-05, "loss": 4.5627, "step": 446500 }, { "epoch": 0.41, "learning_rate": 7.953773208188626e-05, "loss": 4.5877, "step": 447000 }, { "epoch": 0.41, "learning_rate": 7.951483528963527e-05, "loss": 4.5733, "step": 447500 }, { "epoch": 0.41, "learning_rate": 7.949193849738427e-05, "loss": 4.5746, "step": 448000 }, { "epoch": 0.41, "learning_rate": 7.946904170513328e-05, "loss": 4.5715, "step": 448500 }, { "epoch": 0.41, "learning_rate": 7.94461449128823e-05, "loss": 4.5766, "step": 449000 }, { "epoch": 0.41, "learning_rate": 7.94232939142158e-05, "loss": 4.5699, "step": 449500 }, { "epoch": 0.41, "learning_rate": 7.940039712196481e-05, "loss": 4.5695, "step": 450000 }, { "epoch": 0.41, "learning_rate": 7.937750032971381e-05, "loss": 4.5614, "step": 450500 }, { "epoch": 0.41, "learning_rate": 7.935460353746282e-05, "loss": 4.5627, "step": 451000 }, { "epoch": 0.41, "learning_rate": 7.933175253879634e-05, "loss": 4.5673, "step": 451500 }, { "epoch": 0.41, "learning_rate": 7.930885574654534e-05, "loss": 4.569, "step": 452000 }, { "epoch": 0.41, "learning_rate": 7.928595895429435e-05, "loss": 4.5614, "step": 452500 }, { "epoch": 0.41, "learning_rate": 7.926310795562785e-05, "loss": 4.5561, "step": 453000 }, { "epoch": 0.42, "learning_rate": 7.924021116337686e-05, "loss": 4.5649, "step": 453500 }, { "epoch": 0.42, "learning_rate": 7.921731437112586e-05, "loss": 4.5602, "step": 454000 }, { "epoch": 0.42, "learning_rate": 7.919441757887487e-05, "loss": 4.5516, "step": 454500 }, { "epoch": 0.42, "learning_rate": 7.917152078662387e-05, "loss": 4.5816, "step": 455000 }, { "epoch": 0.42, "learning_rate": 7.914862399437288e-05, "loss": 4.575, "step": 455500 }, { "epoch": 0.42, "learning_rate": 7.91257272021219e-05, "loss": 4.5682, "step": 456000 }, { "epoch": 0.42, "learning_rate": 7.91028304098709e-05, "loss": 4.5717, "step": 456500 }, { "epoch": 0.42, "learning_rate": 7.907997941120441e-05, "loss": 4.5566, "step": 457000 }, { "epoch": 0.42, "learning_rate": 7.905708261895341e-05, "loss": 4.5753, "step": 457500 }, { "epoch": 0.42, "learning_rate": 7.903418582670242e-05, "loss": 4.5744, "step": 458000 }, { "epoch": 0.42, "learning_rate": 7.901133482803594e-05, "loss": 4.5645, "step": 458500 }, { "epoch": 0.42, "learning_rate": 7.898843803578494e-05, "loss": 4.5685, "step": 459000 }, { "epoch": 0.42, "learning_rate": 7.896554124353395e-05, "loss": 4.5628, "step": 459500 }, { "epoch": 0.42, "learning_rate": 7.894264445128295e-05, "loss": 4.5802, "step": 460000 }, { "epoch": 0.42, "learning_rate": 7.891979345261647e-05, "loss": 4.5691, "step": 460500 }, { "epoch": 0.42, "learning_rate": 7.889689666036548e-05, "loss": 4.5733, "step": 461000 }, { "epoch": 0.42, "learning_rate": 7.887399986811448e-05, "loss": 4.5671, "step": 461500 }, { "epoch": 0.42, "learning_rate": 7.885110307586349e-05, "loss": 4.5643, "step": 462000 }, { "epoch": 0.42, "learning_rate": 7.882820628361249e-05, "loss": 4.564, "step": 462500 }, { "epoch": 0.42, "learning_rate": 7.88053094913615e-05, "loss": 4.5507, "step": 463000 }, { "epoch": 0.42, "learning_rate": 7.878241269911051e-05, "loss": 4.5662, "step": 463500 }, { "epoch": 0.42, "learning_rate": 7.875951590685952e-05, "loss": 4.5726, "step": 464000 }, { "epoch": 0.43, "learning_rate": 7.873666490819302e-05, "loss": 4.5638, "step": 464500 }, { "epoch": 0.43, "learning_rate": 7.871376811594203e-05, "loss": 4.5496, "step": 465000 }, { "epoch": 0.43, "learning_rate": 7.869087132369104e-05, "loss": 4.5691, "step": 465500 }, { "epoch": 0.43, "learning_rate": 7.866797453144005e-05, "loss": 4.5621, "step": 466000 }, { "epoch": 0.43, "learning_rate": 7.864507773918906e-05, "loss": 4.5756, "step": 466500 }, { "epoch": 0.43, "learning_rate": 7.862222674052256e-05, "loss": 4.5697, "step": 467000 }, { "epoch": 0.43, "learning_rate": 7.859932994827157e-05, "loss": 4.5553, "step": 467500 }, { "epoch": 0.43, "learning_rate": 7.857643315602057e-05, "loss": 4.5675, "step": 468000 }, { "epoch": 0.43, "learning_rate": 7.855353636376959e-05, "loss": 4.5694, "step": 468500 }, { "epoch": 0.43, "learning_rate": 7.85306395715186e-05, "loss": 4.5648, "step": 469000 }, { "epoch": 0.43, "learning_rate": 7.85077427792676e-05, "loss": 4.5601, "step": 469500 }, { "epoch": 0.43, "learning_rate": 7.848489178060111e-05, "loss": 4.5576, "step": 470000 }, { "epoch": 0.43, "learning_rate": 7.846199498835011e-05, "loss": 4.557, "step": 470500 }, { "epoch": 0.43, "learning_rate": 7.843909819609913e-05, "loss": 4.5717, "step": 471000 }, { "epoch": 0.43, "learning_rate": 7.841624719743264e-05, "loss": 4.5558, "step": 471500 }, { "epoch": 0.43, "learning_rate": 7.839335040518164e-05, "loss": 4.5685, "step": 472000 }, { "epoch": 0.43, "learning_rate": 7.837045361293065e-05, "loss": 4.5667, "step": 472500 }, { "epoch": 0.43, "learning_rate": 7.834755682067965e-05, "loss": 4.5577, "step": 473000 }, { "epoch": 0.43, "learning_rate": 7.832466002842866e-05, "loss": 4.5643, "step": 473500 }, { "epoch": 0.43, "learning_rate": 7.830176323617768e-05, "loss": 4.5669, "step": 474000 }, { "epoch": 0.43, "learning_rate": 7.827886644392668e-05, "loss": 4.5641, "step": 474500 }, { "epoch": 0.44, "learning_rate": 7.825596965167569e-05, "loss": 4.5535, "step": 475000 }, { "epoch": 0.44, "learning_rate": 7.823307285942469e-05, "loss": 4.5644, "step": 475500 }, { "epoch": 0.44, "learning_rate": 7.82102218607582e-05, "loss": 4.5601, "step": 476000 }, { "epoch": 0.44, "learning_rate": 7.818732506850722e-05, "loss": 4.5687, "step": 476500 }, { "epoch": 0.44, "learning_rate": 7.816442827625622e-05, "loss": 4.5485, "step": 477000 }, { "epoch": 0.44, "learning_rate": 7.814153148400523e-05, "loss": 4.5558, "step": 477500 }, { "epoch": 0.44, "learning_rate": 7.811868048533872e-05, "loss": 4.5549, "step": 478000 }, { "epoch": 0.44, "learning_rate": 7.809578369308774e-05, "loss": 4.5604, "step": 478500 }, { "epoch": 0.44, "learning_rate": 7.807288690083674e-05, "loss": 4.55, "step": 479000 }, { "epoch": 0.44, "learning_rate": 7.804999010858575e-05, "loss": 4.5581, "step": 479500 }, { "epoch": 0.44, "learning_rate": 7.802709331633475e-05, "loss": 4.5707, "step": 480000 }, { "epoch": 0.44, "learning_rate": 7.800424231766826e-05, "loss": 4.5598, "step": 480500 }, { "epoch": 0.44, "learning_rate": 7.798134552541728e-05, "loss": 4.5585, "step": 481000 }, { "epoch": 0.44, "learning_rate": 7.795844873316628e-05, "loss": 4.5602, "step": 481500 }, { "epoch": 0.44, "learning_rate": 7.793555194091529e-05, "loss": 4.5559, "step": 482000 }, { "epoch": 0.44, "learning_rate": 7.791270094224879e-05, "loss": 4.5481, "step": 482500 }, { "epoch": 0.44, "learning_rate": 7.78898041499978e-05, "loss": 4.5692, "step": 483000 }, { "epoch": 0.44, "learning_rate": 7.786690735774682e-05, "loss": 4.5554, "step": 483500 }, { "epoch": 0.44, "learning_rate": 7.784401056549582e-05, "loss": 4.5558, "step": 484000 }, { "epoch": 0.44, "learning_rate": 7.782111377324483e-05, "loss": 4.5503, "step": 484500 }, { "epoch": 0.44, "learning_rate": 7.779826277457833e-05, "loss": 4.5467, "step": 485000 }, { "epoch": 0.44, "learning_rate": 7.777536598232734e-05, "loss": 4.5649, "step": 485500 }, { "epoch": 0.45, "learning_rate": 7.775246919007635e-05, "loss": 4.5644, "step": 486000 }, { "epoch": 0.45, "learning_rate": 7.772961819140986e-05, "loss": 4.5455, "step": 486500 }, { "epoch": 0.45, "learning_rate": 7.770672139915887e-05, "loss": 4.5709, "step": 487000 }, { "epoch": 0.45, "learning_rate": 7.768382460690787e-05, "loss": 4.5536, "step": 487500 }, { "epoch": 0.45, "learning_rate": 7.766097360824139e-05, "loss": 4.5614, "step": 488000 }, { "epoch": 0.45, "learning_rate": 7.76380768159904e-05, "loss": 4.5719, "step": 488500 }, { "epoch": 0.45, "learning_rate": 7.76151800237394e-05, "loss": 4.5571, "step": 489000 }, { "epoch": 0.45, "learning_rate": 7.75922832314884e-05, "loss": 4.5477, "step": 489500 }, { "epoch": 0.45, "learning_rate": 7.756938643923741e-05, "loss": 4.5465, "step": 490000 }, { "epoch": 0.45, "learning_rate": 7.754648964698641e-05, "loss": 4.5567, "step": 490500 }, { "epoch": 0.45, "learning_rate": 7.752359285473543e-05, "loss": 4.5757, "step": 491000 }, { "epoch": 0.45, "learning_rate": 7.750069606248444e-05, "loss": 4.5574, "step": 491500 }, { "epoch": 0.45, "learning_rate": 7.747779927023344e-05, "loss": 4.5572, "step": 492000 }, { "epoch": 0.45, "learning_rate": 7.745490247798245e-05, "loss": 4.5575, "step": 492500 }, { "epoch": 0.45, "learning_rate": 7.743200568573145e-05, "loss": 4.565, "step": 493000 }, { "epoch": 0.45, "learning_rate": 7.740910889348046e-05, "loss": 4.5519, "step": 493500 }, { "epoch": 0.45, "learning_rate": 7.738625789481398e-05, "loss": 4.5427, "step": 494000 }, { "epoch": 0.45, "learning_rate": 7.736336110256298e-05, "loss": 4.5671, "step": 494500 }, { "epoch": 0.45, "learning_rate": 7.734051010389649e-05, "loss": 4.5689, "step": 495000 }, { "epoch": 0.45, "learning_rate": 7.73176133116455e-05, "loss": 4.5583, "step": 495500 }, { "epoch": 0.45, "learning_rate": 7.729471651939451e-05, "loss": 4.549, "step": 496000 }, { "epoch": 0.45, "learning_rate": 7.727181972714352e-05, "loss": 4.5552, "step": 496500 }, { "epoch": 0.46, "learning_rate": 7.724892293489252e-05, "loss": 4.5654, "step": 497000 }, { "epoch": 0.46, "learning_rate": 7.722602614264153e-05, "loss": 4.5525, "step": 497500 }, { "epoch": 0.46, "learning_rate": 7.720312935039053e-05, "loss": 4.5559, "step": 498000 }, { "epoch": 0.46, "learning_rate": 7.718023255813954e-05, "loss": 4.5545, "step": 498500 }, { "epoch": 0.46, "learning_rate": 7.715733576588856e-05, "loss": 4.5493, "step": 499000 }, { "epoch": 0.46, "learning_rate": 7.713448476722206e-05, "loss": 4.5495, "step": 499500 }, { "epoch": 0.46, "learning_rate": 7.711158797497107e-05, "loss": 4.5593, "step": 500000 }, { "epoch": 0.46, "learning_rate": 7.708869118272007e-05, "loss": 4.5554, "step": 500500 }, { "epoch": 0.46, "learning_rate": 7.706579439046908e-05, "loss": 4.5548, "step": 501000 }, { "epoch": 0.46, "learning_rate": 7.70428975982181e-05, "loss": 4.5456, "step": 501500 }, { "epoch": 0.46, "learning_rate": 7.702004659955159e-05, "loss": 4.5564, "step": 502000 }, { "epoch": 0.46, "learning_rate": 7.699714980730059e-05, "loss": 4.5602, "step": 502500 }, { "epoch": 0.46, "learning_rate": 7.69742988086341e-05, "loss": 4.5634, "step": 503000 }, { "epoch": 0.46, "learning_rate": 7.695140201638312e-05, "loss": 4.563, "step": 503500 }, { "epoch": 0.46, "learning_rate": 7.692850522413212e-05, "loss": 4.5379, "step": 504000 }, { "epoch": 0.46, "learning_rate": 7.690560843188113e-05, "loss": 4.5451, "step": 504500 }, { "epoch": 0.46, "learning_rate": 7.688271163963013e-05, "loss": 4.5509, "step": 505000 }, { "epoch": 0.46, "learning_rate": 7.685981484737914e-05, "loss": 4.5551, "step": 505500 }, { "epoch": 0.46, "learning_rate": 7.683691805512814e-05, "loss": 4.5557, "step": 506000 }, { "epoch": 0.46, "learning_rate": 7.681406705646166e-05, "loss": 4.5332, "step": 506500 }, { "epoch": 0.46, "learning_rate": 7.679117026421067e-05, "loss": 4.5512, "step": 507000 }, { "epoch": 0.46, "learning_rate": 7.676827347195967e-05, "loss": 4.543, "step": 507500 }, { "epoch": 0.47, "learning_rate": 7.674537667970868e-05, "loss": 4.5435, "step": 508000 }, { "epoch": 0.47, "learning_rate": 7.672247988745768e-05, "loss": 4.543, "step": 508500 }, { "epoch": 0.47, "learning_rate": 7.66995830952067e-05, "loss": 4.5551, "step": 509000 }, { "epoch": 0.47, "learning_rate": 7.66766863029557e-05, "loss": 4.5523, "step": 509500 }, { "epoch": 0.47, "learning_rate": 7.665378951070471e-05, "loss": 4.5601, "step": 510000 }, { "epoch": 0.47, "learning_rate": 7.663089271845372e-05, "loss": 4.5537, "step": 510500 }, { "epoch": 0.47, "learning_rate": 7.660804171978722e-05, "loss": 4.5377, "step": 511000 }, { "epoch": 0.47, "learning_rate": 7.658514492753624e-05, "loss": 4.5348, "step": 511500 }, { "epoch": 0.47, "learning_rate": 7.656224813528524e-05, "loss": 4.5546, "step": 512000 }, { "epoch": 0.47, "learning_rate": 7.653935134303425e-05, "loss": 4.5421, "step": 512500 }, { "epoch": 0.47, "learning_rate": 7.651650034436776e-05, "loss": 4.5403, "step": 513000 }, { "epoch": 0.47, "learning_rate": 7.649364934570127e-05, "loss": 4.5399, "step": 513500 }, { "epoch": 0.47, "learning_rate": 7.647075255345028e-05, "loss": 4.5485, "step": 514000 }, { "epoch": 0.47, "learning_rate": 7.644785576119928e-05, "loss": 4.5399, "step": 514500 }, { "epoch": 0.47, "learning_rate": 7.642495896894829e-05, "loss": 4.5458, "step": 515000 }, { "epoch": 0.47, "learning_rate": 7.64020621766973e-05, "loss": 4.5466, "step": 515500 }, { "epoch": 0.47, "learning_rate": 7.63791653844463e-05, "loss": 4.558, "step": 516000 }, { "epoch": 0.47, "learning_rate": 7.635626859219532e-05, "loss": 4.5493, "step": 516500 }, { "epoch": 0.47, "learning_rate": 7.633337179994432e-05, "loss": 4.5411, "step": 517000 }, { "epoch": 0.47, "learning_rate": 7.631052080127783e-05, "loss": 4.5522, "step": 517500 }, { "epoch": 0.47, "learning_rate": 7.628762400902683e-05, "loss": 4.538, "step": 518000 }, { "epoch": 0.47, "learning_rate": 7.626472721677584e-05, "loss": 4.5591, "step": 518500 }, { "epoch": 0.48, "learning_rate": 7.624183042452486e-05, "loss": 4.538, "step": 519000 }, { "epoch": 0.48, "learning_rate": 7.621893363227386e-05, "loss": 4.5448, "step": 519500 }, { "epoch": 0.48, "learning_rate": 7.619603684002287e-05, "loss": 4.5493, "step": 520000 }, { "epoch": 0.48, "learning_rate": 7.617314004777187e-05, "loss": 4.5587, "step": 520500 }, { "epoch": 0.48, "learning_rate": 7.615024325552088e-05, "loss": 4.5315, "step": 521000 }, { "epoch": 0.48, "learning_rate": 7.61273922568544e-05, "loss": 4.5492, "step": 521500 }, { "epoch": 0.48, "learning_rate": 7.61044954646034e-05, "loss": 4.5423, "step": 522000 }, { "epoch": 0.48, "learning_rate": 7.608159867235241e-05, "loss": 4.555, "step": 522500 }, { "epoch": 0.48, "learning_rate": 7.605870188010141e-05, "loss": 4.539, "step": 523000 }, { "epoch": 0.48, "learning_rate": 7.603580508785042e-05, "loss": 4.5549, "step": 523500 }, { "epoch": 0.48, "learning_rate": 7.601290829559942e-05, "loss": 4.5437, "step": 524000 }, { "epoch": 0.48, "learning_rate": 7.599001150334844e-05, "loss": 4.5429, "step": 524500 }, { "epoch": 0.48, "learning_rate": 7.596716050468195e-05, "loss": 4.5537, "step": 525000 }, { "epoch": 0.48, "learning_rate": 7.594426371243095e-05, "loss": 4.5464, "step": 525500 }, { "epoch": 0.48, "learning_rate": 7.592136692017996e-05, "loss": 4.5455, "step": 526000 }, { "epoch": 0.48, "learning_rate": 7.589847012792896e-05, "loss": 4.55, "step": 526500 }, { "epoch": 0.48, "learning_rate": 7.587557333567797e-05, "loss": 4.5448, "step": 527000 }, { "epoch": 0.48, "learning_rate": 7.585267654342697e-05, "loss": 4.5453, "step": 527500 }, { "epoch": 0.48, "learning_rate": 7.582977975117598e-05, "loss": 4.5503, "step": 528000 }, { "epoch": 0.48, "learning_rate": 7.580688295892498e-05, "loss": 4.5302, "step": 528500 }, { "epoch": 0.48, "learning_rate": 7.578398616667399e-05, "loss": 4.5426, "step": 529000 }, { "epoch": 0.48, "learning_rate": 7.5761089374423e-05, "loss": 4.5538, "step": 529500 }, { "epoch": 0.49, "learning_rate": 7.573823837575651e-05, "loss": 4.5406, "step": 530000 }, { "epoch": 0.49, "learning_rate": 7.571534158350552e-05, "loss": 4.5467, "step": 530500 }, { "epoch": 0.49, "learning_rate": 7.569249058483902e-05, "loss": 4.5473, "step": 531000 }, { "epoch": 0.49, "learning_rate": 7.566959379258804e-05, "loss": 4.5513, "step": 531500 }, { "epoch": 0.49, "learning_rate": 7.564669700033705e-05, "loss": 4.5551, "step": 532000 }, { "epoch": 0.49, "learning_rate": 7.562380020808605e-05, "loss": 4.5501, "step": 532500 }, { "epoch": 0.49, "learning_rate": 7.560090341583506e-05, "loss": 4.5249, "step": 533000 }, { "epoch": 0.49, "learning_rate": 7.557800662358406e-05, "loss": 4.5449, "step": 533500 }, { "epoch": 0.49, "learning_rate": 7.555510983133307e-05, "loss": 4.5346, "step": 534000 }, { "epoch": 0.49, "learning_rate": 7.553221303908208e-05, "loss": 4.5552, "step": 534500 }, { "epoch": 0.49, "learning_rate": 7.550931624683109e-05, "loss": 4.5346, "step": 535000 }, { "epoch": 0.49, "learning_rate": 7.54864652481646e-05, "loss": 4.5524, "step": 535500 }, { "epoch": 0.49, "learning_rate": 7.54635684559136e-05, "loss": 4.5393, "step": 536000 }, { "epoch": 0.49, "learning_rate": 7.54406716636626e-05, "loss": 4.5409, "step": 536500 }, { "epoch": 0.49, "learning_rate": 7.541782066499612e-05, "loss": 4.5425, "step": 537000 }, { "epoch": 0.49, "learning_rate": 7.539492387274513e-05, "loss": 4.5398, "step": 537500 }, { "epoch": 0.49, "learning_rate": 7.537202708049413e-05, "loss": 4.5412, "step": 538000 }, { "epoch": 0.49, "learning_rate": 7.534913028824314e-05, "loss": 4.5556, "step": 538500 }, { "epoch": 0.49, "learning_rate": 7.532627928957664e-05, "loss": 4.5316, "step": 539000 }, { "epoch": 0.49, "learning_rate": 7.530338249732566e-05, "loss": 4.546, "step": 539500 }, { "epoch": 0.49, "learning_rate": 7.528048570507467e-05, "loss": 4.5482, "step": 540000 }, { "epoch": 0.5, "learning_rate": 7.525758891282367e-05, "loss": 4.5461, "step": 540500 }, { "epoch": 0.5, "learning_rate": 7.523469212057268e-05, "loss": 4.5496, "step": 541000 }, { "epoch": 0.5, "learning_rate": 7.521184112190618e-05, "loss": 4.5432, "step": 541500 }, { "epoch": 0.5, "learning_rate": 7.51889443296552e-05, "loss": 4.5257, "step": 542000 }, { "epoch": 0.5, "learning_rate": 7.516604753740421e-05, "loss": 4.5619, "step": 542500 }, { "epoch": 0.5, "learning_rate": 7.514315074515321e-05, "loss": 4.5253, "step": 543000 }, { "epoch": 0.5, "learning_rate": 7.512029974648672e-05, "loss": 4.538, "step": 543500 }, { "epoch": 0.5, "learning_rate": 7.509740295423572e-05, "loss": 4.5265, "step": 544000 }, { "epoch": 0.5, "learning_rate": 7.507450616198474e-05, "loss": 4.5397, "step": 544500 }, { "epoch": 0.5, "learning_rate": 7.505160936973375e-05, "loss": 4.538, "step": 545000 }, { "epoch": 0.5, "learning_rate": 7.502875837106725e-05, "loss": 4.5379, "step": 545500 }, { "epoch": 0.5, "learning_rate": 7.500586157881626e-05, "loss": 4.5465, "step": 546000 }, { "epoch": 0.5, "learning_rate": 7.498296478656526e-05, "loss": 4.5483, "step": 546500 }, { "epoch": 0.5, "learning_rate": 7.496006799431428e-05, "loss": 4.5478, "step": 547000 }, { "epoch": 0.5, "learning_rate": 7.493717120206329e-05, "loss": 4.5451, "step": 547500 }, { "epoch": 0.5, "learning_rate": 7.491427440981229e-05, "loss": 4.5371, "step": 548000 }, { "epoch": 0.5, "learning_rate": 7.48914234111458e-05, "loss": 4.5331, "step": 548500 }, { "epoch": 0.5, "learning_rate": 7.48685266188948e-05, "loss": 4.5357, "step": 549000 }, { "epoch": 0.5, "learning_rate": 7.484562982664382e-05, "loss": 4.5315, "step": 549500 }, { "epoch": 0.5, "learning_rate": 7.482273303439283e-05, "loss": 4.5457, "step": 550000 }, { "epoch": 0.5, "learning_rate": 7.479988203572633e-05, "loss": 4.536, "step": 550500 }, { "epoch": 0.5, "learning_rate": 7.477698524347532e-05, "loss": 4.538, "step": 551000 }, { "epoch": 0.51, "learning_rate": 7.475408845122434e-05, "loss": 4.5409, "step": 551500 }, { "epoch": 0.51, "learning_rate": 7.473119165897335e-05, "loss": 4.5533, "step": 552000 }, { "epoch": 0.51, "learning_rate": 7.470829486672235e-05, "loss": 4.5393, "step": 552500 }, { "epoch": 0.51, "learning_rate": 7.468539807447136e-05, "loss": 4.5363, "step": 553000 }, { "epoch": 0.51, "learning_rate": 7.466254707580486e-05, "loss": 4.5348, "step": 553500 }, { "epoch": 0.51, "learning_rate": 7.463965028355388e-05, "loss": 4.5322, "step": 554000 }, { "epoch": 0.51, "learning_rate": 7.461675349130289e-05, "loss": 4.533, "step": 554500 }, { "epoch": 0.51, "learning_rate": 7.459385669905189e-05, "loss": 4.5346, "step": 555000 }, { "epoch": 0.51, "learning_rate": 7.45709599068009e-05, "loss": 4.5383, "step": 555500 }, { "epoch": 0.51, "learning_rate": 7.45480631145499e-05, "loss": 4.5306, "step": 556000 }, { "epoch": 0.51, "learning_rate": 7.45251663222989e-05, "loss": 4.5262, "step": 556500 }, { "epoch": 0.51, "learning_rate": 7.450226953004792e-05, "loss": 4.5397, "step": 557000 }, { "epoch": 0.51, "learning_rate": 7.447941853138143e-05, "loss": 4.5211, "step": 557500 }, { "epoch": 0.51, "learning_rate": 7.445652173913044e-05, "loss": 4.5246, "step": 558000 }, { "epoch": 0.51, "learning_rate": 7.443362494687944e-05, "loss": 4.5353, "step": 558500 }, { "epoch": 0.51, "learning_rate": 7.441072815462845e-05, "loss": 4.5254, "step": 559000 }, { "epoch": 0.51, "learning_rate": 7.438783136237746e-05, "loss": 4.5233, "step": 559500 }, { "epoch": 0.51, "learning_rate": 7.436498036371097e-05, "loss": 4.5454, "step": 560000 }, { "epoch": 0.51, "learning_rate": 7.434208357145997e-05, "loss": 4.5399, "step": 560500 }, { "epoch": 0.51, "learning_rate": 7.431923257279348e-05, "loss": 4.5343, "step": 561000 }, { "epoch": 0.51, "learning_rate": 7.42963357805425e-05, "loss": 4.5307, "step": 561500 }, { "epoch": 0.51, "learning_rate": 7.42734389882915e-05, "loss": 4.5268, "step": 562000 }, { "epoch": 0.52, "learning_rate": 7.425054219604051e-05, "loss": 4.5428, "step": 562500 }, { "epoch": 0.52, "learning_rate": 7.422764540378951e-05, "loss": 4.5391, "step": 563000 }, { "epoch": 0.52, "learning_rate": 7.420474861153852e-05, "loss": 4.5445, "step": 563500 }, { "epoch": 0.52, "learning_rate": 7.418185181928752e-05, "loss": 4.5354, "step": 564000 }, { "epoch": 0.52, "learning_rate": 7.415895502703654e-05, "loss": 4.5395, "step": 564500 }, { "epoch": 0.52, "learning_rate": 7.413610402837005e-05, "loss": 4.5385, "step": 565000 }, { "epoch": 0.52, "learning_rate": 7.411320723611905e-05, "loss": 4.5325, "step": 565500 }, { "epoch": 0.52, "learning_rate": 7.409031044386806e-05, "loss": 4.5407, "step": 566000 }, { "epoch": 0.52, "learning_rate": 7.406741365161706e-05, "loss": 4.5347, "step": 566500 }, { "epoch": 0.52, "learning_rate": 7.404451685936608e-05, "loss": 4.538, "step": 567000 }, { "epoch": 0.52, "learning_rate": 7.402166586069959e-05, "loss": 4.5477, "step": 567500 }, { "epoch": 0.52, "learning_rate": 7.399876906844859e-05, "loss": 4.5279, "step": 568000 }, { "epoch": 0.52, "learning_rate": 7.39758722761976e-05, "loss": 4.5261, "step": 568500 }, { "epoch": 0.52, "learning_rate": 7.39529754839466e-05, "loss": 4.5406, "step": 569000 }, { "epoch": 0.52, "learning_rate": 7.393012448528012e-05, "loss": 4.5364, "step": 569500 }, { "epoch": 0.52, "learning_rate": 7.390722769302913e-05, "loss": 4.5423, "step": 570000 }, { "epoch": 0.52, "learning_rate": 7.388433090077813e-05, "loss": 4.5374, "step": 570500 }, { "epoch": 0.52, "learning_rate": 7.386143410852714e-05, "loss": 4.5263, "step": 571000 }, { "epoch": 0.52, "learning_rate": 7.383858310986064e-05, "loss": 4.5312, "step": 571500 }, { "epoch": 0.52, "learning_rate": 7.381568631760966e-05, "loss": 4.5237, "step": 572000 }, { "epoch": 0.52, "learning_rate": 7.379278952535867e-05, "loss": 4.5272, "step": 572500 }, { "epoch": 0.52, "learning_rate": 7.376989273310767e-05, "loss": 4.5512, "step": 573000 }, { "epoch": 0.53, "learning_rate": 7.374704173444118e-05, "loss": 4.543, "step": 573500 }, { "epoch": 0.53, "learning_rate": 7.372414494219018e-05, "loss": 4.5244, "step": 574000 }, { "epoch": 0.53, "learning_rate": 7.37012939435237e-05, "loss": 4.5209, "step": 574500 }, { "epoch": 0.53, "learning_rate": 7.367839715127269e-05, "loss": 4.5363, "step": 575000 }, { "epoch": 0.53, "learning_rate": 7.36555003590217e-05, "loss": 4.5286, "step": 575500 }, { "epoch": 0.53, "learning_rate": 7.36326035667707e-05, "loss": 4.54, "step": 576000 }, { "epoch": 0.53, "learning_rate": 7.360970677451972e-05, "loss": 4.5356, "step": 576500 }, { "epoch": 0.53, "learning_rate": 7.358680998226873e-05, "loss": 4.5347, "step": 577000 }, { "epoch": 0.53, "learning_rate": 7.356391319001773e-05, "loss": 4.5134, "step": 577500 }, { "epoch": 0.53, "learning_rate": 7.354101639776674e-05, "loss": 4.5245, "step": 578000 }, { "epoch": 0.53, "learning_rate": 7.351816539910024e-05, "loss": 4.5239, "step": 578500 }, { "epoch": 0.53, "learning_rate": 7.349526860684926e-05, "loss": 4.535, "step": 579000 }, { "epoch": 0.53, "learning_rate": 7.347237181459827e-05, "loss": 4.5285, "step": 579500 }, { "epoch": 0.53, "learning_rate": 7.344947502234727e-05, "loss": 4.541, "step": 580000 }, { "epoch": 0.53, "learning_rate": 7.342657823009628e-05, "loss": 4.5242, "step": 580500 }, { "epoch": 0.53, "learning_rate": 7.340368143784528e-05, "loss": 4.5236, "step": 581000 }, { "epoch": 0.53, "learning_rate": 7.338078464559429e-05, "loss": 4.5274, "step": 581500 }, { "epoch": 0.53, "learning_rate": 7.33578878533433e-05, "loss": 4.5321, "step": 582000 }, { "epoch": 0.53, "learning_rate": 7.333503685467681e-05, "loss": 4.5282, "step": 582500 }, { "epoch": 0.53, "learning_rate": 7.331214006242581e-05, "loss": 4.54, "step": 583000 }, { "epoch": 0.53, "learning_rate": 7.328928906375932e-05, "loss": 4.5283, "step": 583500 }, { "epoch": 0.53, "learning_rate": 7.326639227150834e-05, "loss": 4.5289, "step": 584000 }, { "epoch": 0.54, "learning_rate": 7.324349547925734e-05, "loss": 4.539, "step": 584500 }, { "epoch": 0.54, "learning_rate": 7.322059868700635e-05, "loss": 4.5127, "step": 585000 }, { "epoch": 0.54, "learning_rate": 7.319770189475535e-05, "loss": 4.5306, "step": 585500 }, { "epoch": 0.54, "learning_rate": 7.317480510250436e-05, "loss": 4.5276, "step": 586000 }, { "epoch": 0.54, "learning_rate": 7.315190831025336e-05, "loss": 4.5339, "step": 586500 }, { "epoch": 0.54, "learning_rate": 7.312901151800238e-05, "loss": 4.5273, "step": 587000 }, { "epoch": 0.54, "learning_rate": 7.310616051933589e-05, "loss": 4.5375, "step": 587500 }, { "epoch": 0.54, "learning_rate": 7.30832637270849e-05, "loss": 4.5251, "step": 588000 }, { "epoch": 0.54, "learning_rate": 7.30603669348339e-05, "loss": 4.5265, "step": 588500 }, { "epoch": 0.54, "learning_rate": 7.30374701425829e-05, "loss": 4.5367, "step": 589000 }, { "epoch": 0.54, "learning_rate": 7.301457335033192e-05, "loss": 4.5165, "step": 589500 }, { "epoch": 0.54, "learning_rate": 7.299176814524993e-05, "loss": 4.5261, "step": 590000 }, { "epoch": 0.54, "learning_rate": 7.296887135299893e-05, "loss": 4.531, "step": 590500 }, { "epoch": 0.54, "learning_rate": 7.294597456074794e-05, "loss": 4.5387, "step": 591000 }, { "epoch": 0.54, "learning_rate": 7.292307776849694e-05, "loss": 4.5228, "step": 591500 }, { "epoch": 0.54, "learning_rate": 7.290018097624596e-05, "loss": 4.5218, "step": 592000 }, { "epoch": 0.54, "learning_rate": 7.287728418399497e-05, "loss": 4.5279, "step": 592500 }, { "epoch": 0.54, "learning_rate": 7.285438739174397e-05, "loss": 4.5236, "step": 593000 }, { "epoch": 0.54, "learning_rate": 7.283149059949298e-05, "loss": 4.5273, "step": 593500 }, { "epoch": 0.54, "learning_rate": 7.280859380724198e-05, "loss": 4.5143, "step": 594000 }, { "epoch": 0.54, "learning_rate": 7.27857428085755e-05, "loss": 4.5206, "step": 594500 }, { "epoch": 0.54, "learning_rate": 7.27628460163245e-05, "loss": 4.5198, "step": 595000 }, { "epoch": 0.55, "learning_rate": 7.273999501765801e-05, "loss": 4.5313, "step": 595500 }, { "epoch": 0.55, "learning_rate": 7.271709822540702e-05, "loss": 4.5313, "step": 596000 }, { "epoch": 0.55, "learning_rate": 7.269420143315602e-05, "loss": 4.512, "step": 596500 }, { "epoch": 0.55, "learning_rate": 7.267130464090504e-05, "loss": 4.5199, "step": 597000 }, { "epoch": 0.55, "learning_rate": 7.264840784865405e-05, "loss": 4.5128, "step": 597500 }, { "epoch": 0.55, "learning_rate": 7.262551105640305e-05, "loss": 4.52, "step": 598000 }, { "epoch": 0.55, "learning_rate": 7.260261426415206e-05, "loss": 4.5229, "step": 598500 }, { "epoch": 0.55, "learning_rate": 7.257971747190106e-05, "loss": 4.5219, "step": 599000 }, { "epoch": 0.55, "learning_rate": 7.255682067965007e-05, "loss": 4.526, "step": 599500 }, { "epoch": 0.55, "learning_rate": 7.253396968098357e-05, "loss": 4.5289, "step": 600000 }, { "epoch": 0.55, "learning_rate": 7.251111868231708e-05, "loss": 4.5284, "step": 600500 }, { "epoch": 0.55, "learning_rate": 7.248822189006608e-05, "loss": 4.5254, "step": 601000 }, { "epoch": 0.55, "learning_rate": 7.24653250978151e-05, "loss": 4.5253, "step": 601500 }, { "epoch": 0.55, "learning_rate": 7.24424283055641e-05, "loss": 4.5205, "step": 602000 }, { "epoch": 0.55, "learning_rate": 7.241953151331311e-05, "loss": 4.5154, "step": 602500 }, { "epoch": 0.55, "learning_rate": 7.239668051464662e-05, "loss": 4.5257, "step": 603000 }, { "epoch": 0.55, "learning_rate": 7.237378372239562e-05, "loss": 4.5307, "step": 603500 }, { "epoch": 0.55, "learning_rate": 7.235088693014464e-05, "loss": 4.519, "step": 604000 }, { "epoch": 0.55, "learning_rate": 7.232799013789365e-05, "loss": 4.5236, "step": 604500 }, { "epoch": 0.55, "learning_rate": 7.230509334564265e-05, "loss": 4.5399, "step": 605000 }, { "epoch": 0.55, "learning_rate": 7.228219655339166e-05, "loss": 4.511, "step": 605500 }, { "epoch": 0.56, "learning_rate": 7.225929976114066e-05, "loss": 4.5265, "step": 606000 }, { "epoch": 0.56, "learning_rate": 7.223644876247418e-05, "loss": 4.5211, "step": 606500 }, { "epoch": 0.56, "learning_rate": 7.221355197022318e-05, "loss": 4.5213, "step": 607000 }, { "epoch": 0.56, "learning_rate": 7.219065517797219e-05, "loss": 4.5197, "step": 607500 }, { "epoch": 0.56, "learning_rate": 7.21677583857212e-05, "loss": 4.5213, "step": 608000 }, { "epoch": 0.56, "learning_rate": 7.21448615934702e-05, "loss": 4.52, "step": 608500 }, { "epoch": 0.56, "learning_rate": 7.21219648012192e-05, "loss": 4.5163, "step": 609000 }, { "epoch": 0.56, "learning_rate": 7.209906800896822e-05, "loss": 4.5028, "step": 609500 }, { "epoch": 0.56, "learning_rate": 7.207617121671723e-05, "loss": 4.5231, "step": 610000 }, { "epoch": 0.56, "learning_rate": 7.205332021805073e-05, "loss": 4.5349, "step": 610500 }, { "epoch": 0.56, "learning_rate": 7.203042342579974e-05, "loss": 4.5125, "step": 611000 }, { "epoch": 0.56, "learning_rate": 7.200752663354874e-05, "loss": 4.53, "step": 611500 }, { "epoch": 0.56, "learning_rate": 7.198462984129776e-05, "loss": 4.5175, "step": 612000 }, { "epoch": 0.56, "learning_rate": 7.196177884263127e-05, "loss": 4.5107, "step": 612500 }, { "epoch": 0.56, "learning_rate": 7.193888205038027e-05, "loss": 4.5227, "step": 613000 }, { "epoch": 0.56, "learning_rate": 7.191598525812928e-05, "loss": 4.5187, "step": 613500 }, { "epoch": 0.56, "learning_rate": 7.189308846587828e-05, "loss": 4.5076, "step": 614000 }, { "epoch": 0.56, "learning_rate": 7.18701916736273e-05, "loss": 4.5177, "step": 614500 }, { "epoch": 0.56, "learning_rate": 7.184729488137631e-05, "loss": 4.5217, "step": 615000 }, { "epoch": 0.56, "learning_rate": 7.182439808912531e-05, "loss": 4.5233, "step": 615500 }, { "epoch": 0.56, "learning_rate": 7.180150129687432e-05, "loss": 4.517, "step": 616000 }, { "epoch": 0.56, "learning_rate": 7.177865029820782e-05, "loss": 4.5305, "step": 616500 }, { "epoch": 0.57, "learning_rate": 7.175575350595684e-05, "loss": 4.5206, "step": 617000 }, { "epoch": 0.57, "learning_rate": 7.173285671370585e-05, "loss": 4.532, "step": 617500 }, { "epoch": 0.57, "learning_rate": 7.170995992145485e-05, "loss": 4.5226, "step": 618000 }, { "epoch": 0.57, "learning_rate": 7.168710892278836e-05, "loss": 4.5306, "step": 618500 }, { "epoch": 0.57, "learning_rate": 7.166421213053736e-05, "loss": 4.5267, "step": 619000 }, { "epoch": 0.57, "learning_rate": 7.164131533828638e-05, "loss": 4.5171, "step": 619500 }, { "epoch": 0.57, "learning_rate": 7.161841854603539e-05, "loss": 4.5136, "step": 620000 }, { "epoch": 0.57, "learning_rate": 7.159552175378439e-05, "loss": 4.5101, "step": 620500 }, { "epoch": 0.57, "learning_rate": 7.15726707551179e-05, "loss": 4.538, "step": 621000 }, { "epoch": 0.57, "learning_rate": 7.15497739628669e-05, "loss": 4.5257, "step": 621500 }, { "epoch": 0.57, "learning_rate": 7.152687717061591e-05, "loss": 4.5231, "step": 622000 }, { "epoch": 0.57, "learning_rate": 7.150398037836493e-05, "loss": 4.5128, "step": 622500 }, { "epoch": 0.57, "learning_rate": 7.148112937969843e-05, "loss": 4.5193, "step": 623000 }, { "epoch": 0.57, "learning_rate": 7.145823258744744e-05, "loss": 4.5111, "step": 623500 }, { "epoch": 0.57, "learning_rate": 7.143533579519643e-05, "loss": 4.5255, "step": 624000 }, { "epoch": 0.57, "learning_rate": 7.141243900294545e-05, "loss": 4.5311, "step": 624500 }, { "epoch": 0.57, "learning_rate": 7.138958800427895e-05, "loss": 4.5211, "step": 625000 }, { "epoch": 0.57, "learning_rate": 7.136669121202796e-05, "loss": 4.5225, "step": 625500 }, { "epoch": 0.57, "learning_rate": 7.134379441977696e-05, "loss": 4.5101, "step": 626000 }, { "epoch": 0.57, "learning_rate": 7.132089762752597e-05, "loss": 4.517, "step": 626500 }, { "epoch": 0.57, "learning_rate": 7.129800083527499e-05, "loss": 4.5263, "step": 627000 }, { "epoch": 0.57, "learning_rate": 7.127514983660849e-05, "loss": 4.5069, "step": 627500 }, { "epoch": 0.58, "learning_rate": 7.12522530443575e-05, "loss": 4.5215, "step": 628000 }, { "epoch": 0.58, "learning_rate": 7.12293562521065e-05, "loss": 4.543, "step": 628500 }, { "epoch": 0.58, "learning_rate": 7.12064594598555e-05, "loss": 4.5266, "step": 629000 }, { "epoch": 0.58, "learning_rate": 7.118356266760452e-05, "loss": 4.5198, "step": 629500 }, { "epoch": 0.58, "learning_rate": 7.116071166893803e-05, "loss": 4.504, "step": 630000 }, { "epoch": 0.58, "learning_rate": 7.113781487668704e-05, "loss": 4.5098, "step": 630500 }, { "epoch": 0.58, "learning_rate": 7.111491808443604e-05, "loss": 4.5104, "step": 631000 }, { "epoch": 0.58, "learning_rate": 7.109202129218505e-05, "loss": 4.5162, "step": 631500 }, { "epoch": 0.58, "learning_rate": 7.106912449993406e-05, "loss": 4.5249, "step": 632000 }, { "epoch": 0.58, "learning_rate": 7.104627350126757e-05, "loss": 4.5211, "step": 632500 }, { "epoch": 0.58, "learning_rate": 7.102337670901657e-05, "loss": 4.5093, "step": 633000 }, { "epoch": 0.58, "learning_rate": 7.100047991676558e-05, "loss": 4.5225, "step": 633500 }, { "epoch": 0.58, "learning_rate": 7.097758312451458e-05, "loss": 4.5136, "step": 634000 }, { "epoch": 0.58, "learning_rate": 7.09546863322636e-05, "loss": 4.5198, "step": 634500 }, { "epoch": 0.58, "learning_rate": 7.093178954001261e-05, "loss": 4.513, "step": 635000 }, { "epoch": 0.58, "learning_rate": 7.090893854134611e-05, "loss": 4.5039, "step": 635500 }, { "epoch": 0.58, "learning_rate": 7.088604174909512e-05, "loss": 4.5081, "step": 636000 }, { "epoch": 0.58, "learning_rate": 7.086314495684412e-05, "loss": 4.5174, "step": 636500 }, { "epoch": 0.58, "learning_rate": 7.084024816459314e-05, "loss": 4.5198, "step": 637000 }, { "epoch": 0.58, "learning_rate": 7.081739716592665e-05, "loss": 4.4917, "step": 637500 }, { "epoch": 0.58, "learning_rate": 7.079454616726015e-05, "loss": 4.5467, "step": 638000 }, { "epoch": 0.58, "learning_rate": 7.077164937500916e-05, "loss": 4.5143, "step": 638500 }, { "epoch": 0.59, "learning_rate": 7.074875258275816e-05, "loss": 4.5066, "step": 639000 }, { "epoch": 0.59, "learning_rate": 7.072585579050718e-05, "loss": 4.5082, "step": 639500 }, { "epoch": 0.59, "learning_rate": 7.070300479184069e-05, "loss": 4.5381, "step": 640000 }, { "epoch": 0.59, "learning_rate": 7.06801079995897e-05, "loss": 4.5149, "step": 640500 }, { "epoch": 0.59, "learning_rate": 7.06572112073387e-05, "loss": 4.5167, "step": 641000 }, { "epoch": 0.59, "learning_rate": 7.06343144150877e-05, "loss": 4.5193, "step": 641500 }, { "epoch": 0.59, "learning_rate": 7.061141762283672e-05, "loss": 4.512, "step": 642000 }, { "epoch": 0.59, "learning_rate": 7.058852083058573e-05, "loss": 4.5206, "step": 642500 }, { "epoch": 0.59, "learning_rate": 7.056566983191923e-05, "loss": 4.5208, "step": 643000 }, { "epoch": 0.59, "learning_rate": 7.054277303966824e-05, "loss": 4.5358, "step": 643500 }, { "epoch": 0.59, "learning_rate": 7.051987624741724e-05, "loss": 4.5192, "step": 644000 }, { "epoch": 0.59, "learning_rate": 7.049697945516626e-05, "loss": 4.5143, "step": 644500 }, { "epoch": 0.59, "learning_rate": 7.047408266291527e-05, "loss": 4.5162, "step": 645000 }, { "epoch": 0.59, "learning_rate": 7.045118587066427e-05, "loss": 4.5074, "step": 645500 }, { "epoch": 0.59, "learning_rate": 7.042828907841328e-05, "loss": 4.5297, "step": 646000 }, { "epoch": 0.59, "learning_rate": 7.040539228616228e-05, "loss": 4.5012, "step": 646500 }, { "epoch": 0.59, "learning_rate": 7.038249549391129e-05, "loss": 4.5117, "step": 647000 }, { "epoch": 0.59, "learning_rate": 7.03596444952448e-05, "loss": 4.5197, "step": 647500 }, { "epoch": 0.59, "learning_rate": 7.03367477029938e-05, "loss": 4.5106, "step": 648000 }, { "epoch": 0.59, "learning_rate": 7.03138967043273e-05, "loss": 4.4993, "step": 648500 }, { "epoch": 0.59, "learning_rate": 7.029099991207632e-05, "loss": 4.5277, "step": 649000 }, { "epoch": 0.59, "learning_rate": 7.026810311982533e-05, "loss": 4.5132, "step": 649500 }, { "epoch": 0.6, "learning_rate": 7.024520632757433e-05, "loss": 4.53, "step": 650000 }, { "epoch": 0.6, "learning_rate": 7.022230953532334e-05, "loss": 4.5219, "step": 650500 }, { "epoch": 0.6, "learning_rate": 7.019941274307234e-05, "loss": 4.5085, "step": 651000 }, { "epoch": 0.6, "learning_rate": 7.017651595082135e-05, "loss": 4.5004, "step": 651500 }, { "epoch": 0.6, "learning_rate": 7.015361915857037e-05, "loss": 4.5118, "step": 652000 }, { "epoch": 0.6, "learning_rate": 7.013072236631937e-05, "loss": 4.4977, "step": 652500 }, { "epoch": 0.6, "learning_rate": 7.010787136765288e-05, "loss": 4.5102, "step": 653000 }, { "epoch": 0.6, "learning_rate": 7.008502036898638e-05, "loss": 4.5065, "step": 653500 }, { "epoch": 0.6, "learning_rate": 7.00621235767354e-05, "loss": 4.5124, "step": 654000 }, { "epoch": 0.6, "learning_rate": 7.00392267844844e-05, "loss": 4.5077, "step": 654500 }, { "epoch": 0.6, "learning_rate": 7.001632999223341e-05, "loss": 4.5166, "step": 655000 }, { "epoch": 0.6, "learning_rate": 6.999343319998242e-05, "loss": 4.51, "step": 655500 }, { "epoch": 0.6, "learning_rate": 6.997053640773142e-05, "loss": 4.512, "step": 656000 }, { "epoch": 0.6, "learning_rate": 6.994763961548043e-05, "loss": 4.5031, "step": 656500 }, { "epoch": 0.6, "learning_rate": 6.992474282322944e-05, "loss": 4.5069, "step": 657000 }, { "epoch": 0.6, "learning_rate": 6.990184603097845e-05, "loss": 4.5101, "step": 657500 }, { "epoch": 0.6, "learning_rate": 6.987894923872745e-05, "loss": 4.5034, "step": 658000 }, { "epoch": 0.6, "learning_rate": 6.985605244647646e-05, "loss": 4.5065, "step": 658500 }, { "epoch": 0.6, "learning_rate": 6.983315565422546e-05, "loss": 4.5145, "step": 659000 }, { "epoch": 0.6, "learning_rate": 6.981030465555898e-05, "loss": 4.4976, "step": 659500 }, { "epoch": 0.6, "learning_rate": 6.978740786330799e-05, "loss": 4.5073, "step": 660000 }, { "epoch": 0.6, "learning_rate": 6.9764511071057e-05, "loss": 4.5103, "step": 660500 }, { "epoch": 0.61, "learning_rate": 6.97416600723905e-05, "loss": 4.5107, "step": 661000 }, { "epoch": 0.61, "learning_rate": 6.97187632801395e-05, "loss": 4.5172, "step": 661500 }, { "epoch": 0.61, "learning_rate": 6.969586648788852e-05, "loss": 4.5104, "step": 662000 }, { "epoch": 0.61, "learning_rate": 6.967296969563753e-05, "loss": 4.5057, "step": 662500 }, { "epoch": 0.61, "learning_rate": 6.965007290338653e-05, "loss": 4.5057, "step": 663000 }, { "epoch": 0.61, "learning_rate": 6.962717611113554e-05, "loss": 4.5067, "step": 663500 }, { "epoch": 0.61, "learning_rate": 6.960427931888454e-05, "loss": 4.5054, "step": 664000 }, { "epoch": 0.61, "learning_rate": 6.958138252663355e-05, "loss": 4.5127, "step": 664500 }, { "epoch": 0.61, "learning_rate": 6.955853152796707e-05, "loss": 4.5075, "step": 665000 }, { "epoch": 0.61, "learning_rate": 6.953563473571607e-05, "loss": 4.5255, "step": 665500 }, { "epoch": 0.61, "learning_rate": 6.951278373704958e-05, "loss": 4.5209, "step": 666000 }, { "epoch": 0.61, "learning_rate": 6.948988694479858e-05, "loss": 4.5176, "step": 666500 }, { "epoch": 0.61, "learning_rate": 6.94669901525476e-05, "loss": 4.5111, "step": 667000 }, { "epoch": 0.61, "learning_rate": 6.94440933602966e-05, "loss": 4.5078, "step": 667500 }, { "epoch": 0.61, "learning_rate": 6.942124236163011e-05, "loss": 4.5194, "step": 668000 }, { "epoch": 0.61, "learning_rate": 6.939843715654812e-05, "loss": 4.5099, "step": 668500 }, { "epoch": 0.61, "learning_rate": 6.937554036429714e-05, "loss": 4.5164, "step": 669000 }, { "epoch": 0.61, "learning_rate": 6.935264357204614e-05, "loss": 4.5084, "step": 669500 }, { "epoch": 0.61, "learning_rate": 6.932974677979515e-05, "loss": 4.5003, "step": 670000 }, { "epoch": 0.61, "learning_rate": 6.930684998754415e-05, "loss": 4.5066, "step": 670500 }, { "epoch": 0.61, "learning_rate": 6.928395319529316e-05, "loss": 4.5052, "step": 671000 }, { "epoch": 0.62, "learning_rate": 6.926105640304216e-05, "loss": 4.5154, "step": 671500 }, { "epoch": 0.62, "learning_rate": 6.923815961079117e-05, "loss": 4.5067, "step": 672000 }, { "epoch": 0.62, "learning_rate": 6.921526281854017e-05, "loss": 4.5109, "step": 672500 }, { "epoch": 0.62, "learning_rate": 6.919236602628918e-05, "loss": 4.5085, "step": 673000 }, { "epoch": 0.62, "learning_rate": 6.916946923403818e-05, "loss": 4.5122, "step": 673500 }, { "epoch": 0.62, "learning_rate": 6.914657244178719e-05, "loss": 4.5164, "step": 674000 }, { "epoch": 0.62, "learning_rate": 6.91236756495362e-05, "loss": 4.5074, "step": 674500 }, { "epoch": 0.62, "learning_rate": 6.910077885728521e-05, "loss": 4.5002, "step": 675000 }, { "epoch": 0.62, "learning_rate": 6.907792785861872e-05, "loss": 4.5122, "step": 675500 }, { "epoch": 0.62, "learning_rate": 6.905503106636772e-05, "loss": 4.5123, "step": 676000 }, { "epoch": 0.62, "learning_rate": 6.903213427411673e-05, "loss": 4.4983, "step": 676500 }, { "epoch": 0.62, "learning_rate": 6.900923748186575e-05, "loss": 4.4989, "step": 677000 }, { "epoch": 0.62, "learning_rate": 6.898634068961475e-05, "loss": 4.5122, "step": 677500 }, { "epoch": 0.62, "learning_rate": 6.896344389736376e-05, "loss": 4.5116, "step": 678000 }, { "epoch": 0.62, "learning_rate": 6.894054710511276e-05, "loss": 4.4961, "step": 678500 }, { "epoch": 0.62, "learning_rate": 6.891765031286177e-05, "loss": 4.5088, "step": 679000 }, { "epoch": 0.62, "learning_rate": 6.889475352061078e-05, "loss": 4.5033, "step": 679500 }, { "epoch": 0.62, "learning_rate": 6.887190252194429e-05, "loss": 4.5126, "step": 680000 }, { "epoch": 0.62, "learning_rate": 6.88490057296933e-05, "loss": 4.4891, "step": 680500 }, { "epoch": 0.62, "learning_rate": 6.88261089374423e-05, "loss": 4.5024, "step": 681000 }, { "epoch": 0.62, "learning_rate": 6.88032121451913e-05, "loss": 4.5105, "step": 681500 }, { "epoch": 0.62, "learning_rate": 6.878031535294031e-05, "loss": 4.5002, "step": 682000 }, { "epoch": 0.63, "learning_rate": 6.875746435427383e-05, "loss": 4.5085, "step": 682500 }, { "epoch": 0.63, "learning_rate": 6.873456756202283e-05, "loss": 4.5095, "step": 683000 }, { "epoch": 0.63, "learning_rate": 6.871171656335634e-05, "loss": 4.5004, "step": 683500 }, { "epoch": 0.63, "learning_rate": 6.868881977110534e-05, "loss": 4.4968, "step": 684000 }, { "epoch": 0.63, "learning_rate": 6.866592297885436e-05, "loss": 4.5241, "step": 684500 }, { "epoch": 0.63, "learning_rate": 6.864302618660337e-05, "loss": 4.5098, "step": 685000 }, { "epoch": 0.63, "learning_rate": 6.862012939435237e-05, "loss": 4.5173, "step": 685500 }, { "epoch": 0.63, "learning_rate": 6.859723260210138e-05, "loss": 4.5197, "step": 686000 }, { "epoch": 0.63, "learning_rate": 6.857433580985038e-05, "loss": 4.5042, "step": 686500 }, { "epoch": 0.63, "learning_rate": 6.85514848111839e-05, "loss": 4.4914, "step": 687000 }, { "epoch": 0.63, "learning_rate": 6.852858801893291e-05, "loss": 4.5056, "step": 687500 }, { "epoch": 0.63, "learning_rate": 6.850569122668191e-05, "loss": 4.5033, "step": 688000 }, { "epoch": 0.63, "learning_rate": 6.848279443443092e-05, "loss": 4.5088, "step": 688500 }, { "epoch": 0.63, "learning_rate": 6.845989764217992e-05, "loss": 4.5133, "step": 689000 }, { "epoch": 0.63, "learning_rate": 6.843704664351344e-05, "loss": 4.4881, "step": 689500 }, { "epoch": 0.63, "learning_rate": 6.841414985126245e-05, "loss": 4.5163, "step": 690000 }, { "epoch": 0.63, "learning_rate": 6.839125305901145e-05, "loss": 4.4941, "step": 690500 }, { "epoch": 0.63, "learning_rate": 6.836835626676046e-05, "loss": 4.5006, "step": 691000 }, { "epoch": 0.63, "learning_rate": 6.834545947450946e-05, "loss": 4.5095, "step": 691500 }, { "epoch": 0.63, "learning_rate": 6.832256268225847e-05, "loss": 4.5001, "step": 692000 }, { "epoch": 0.63, "learning_rate": 6.829966589000749e-05, "loss": 4.5061, "step": 692500 }, { "epoch": 0.63, "learning_rate": 6.827676909775649e-05, "loss": 4.4967, "step": 693000 }, { "epoch": 0.64, "learning_rate": 6.825391809909e-05, "loss": 4.4954, "step": 693500 }, { "epoch": 0.64, "learning_rate": 6.8231021306839e-05, "loss": 4.505, "step": 694000 }, { "epoch": 0.64, "learning_rate": 6.8208124514588e-05, "loss": 4.5094, "step": 694500 }, { "epoch": 0.64, "learning_rate": 6.818522772233703e-05, "loss": 4.5081, "step": 695000 }, { "epoch": 0.64, "learning_rate": 6.816233093008603e-05, "loss": 4.5247, "step": 695500 }, { "epoch": 0.64, "learning_rate": 6.813943413783504e-05, "loss": 4.5025, "step": 696000 }, { "epoch": 0.64, "learning_rate": 6.811653734558404e-05, "loss": 4.4861, "step": 696500 }, { "epoch": 0.64, "learning_rate": 6.809364055333305e-05, "loss": 4.5081, "step": 697000 }, { "epoch": 0.64, "learning_rate": 6.807074376108205e-05, "loss": 4.4994, "step": 697500 }, { "epoch": 0.64, "learning_rate": 6.804789276241556e-05, "loss": 4.4941, "step": 698000 }, { "epoch": 0.64, "learning_rate": 6.802499597016456e-05, "loss": 4.5034, "step": 698500 }, { "epoch": 0.64, "learning_rate": 6.800214497149807e-05, "loss": 4.4934, "step": 699000 }, { "epoch": 0.64, "learning_rate": 6.797924817924709e-05, "loss": 4.5041, "step": 699500 }, { "epoch": 0.64, "learning_rate": 6.795635138699609e-05, "loss": 4.5051, "step": 700000 }, { "epoch": 0.64, "learning_rate": 6.79334545947451e-05, "loss": 4.5111, "step": 700500 }, { "epoch": 0.64, "learning_rate": 6.79105578024941e-05, "loss": 4.4981, "step": 701000 }, { "epoch": 0.64, "learning_rate": 6.78876610102431e-05, "loss": 4.5036, "step": 701500 }, { "epoch": 0.64, "learning_rate": 6.786476421799211e-05, "loss": 4.5069, "step": 702000 }, { "epoch": 0.64, "learning_rate": 6.784191321932563e-05, "loss": 4.4999, "step": 702500 }, { "epoch": 0.64, "learning_rate": 6.781901642707463e-05, "loss": 4.5134, "step": 703000 }, { "epoch": 0.64, "learning_rate": 6.779611963482364e-05, "loss": 4.4886, "step": 703500 }, { "epoch": 0.64, "learning_rate": 6.777322284257264e-05, "loss": 4.5002, "step": 704000 }, { "epoch": 0.65, "learning_rate": 6.775032605032165e-05, "loss": 4.5067, "step": 704500 }, { "epoch": 0.65, "learning_rate": 6.772742925807067e-05, "loss": 4.5094, "step": 705000 }, { "epoch": 0.65, "learning_rate": 6.770453246581967e-05, "loss": 4.5038, "step": 705500 }, { "epoch": 0.65, "learning_rate": 6.768163567356868e-05, "loss": 4.5023, "step": 706000 }, { "epoch": 0.65, "learning_rate": 6.765873888131768e-05, "loss": 4.4943, "step": 706500 }, { "epoch": 0.65, "learning_rate": 6.763588788265119e-05, "loss": 4.5032, "step": 707000 }, { "epoch": 0.65, "learning_rate": 6.761303688398471e-05, "loss": 4.5062, "step": 707500 }, { "epoch": 0.65, "learning_rate": 6.759014009173371e-05, "loss": 4.5056, "step": 708000 }, { "epoch": 0.65, "learning_rate": 6.756724329948272e-05, "loss": 4.4967, "step": 708500 }, { "epoch": 0.65, "learning_rate": 6.754434650723172e-05, "loss": 4.4952, "step": 709000 }, { "epoch": 0.65, "learning_rate": 6.752144971498073e-05, "loss": 4.4906, "step": 709500 }, { "epoch": 0.65, "learning_rate": 6.749855292272975e-05, "loss": 4.4871, "step": 710000 }, { "epoch": 0.65, "learning_rate": 6.747565613047875e-05, "loss": 4.5065, "step": 710500 }, { "epoch": 0.65, "learning_rate": 6.745280513181226e-05, "loss": 4.5009, "step": 711000 }, { "epoch": 0.65, "learning_rate": 6.742990833956126e-05, "loss": 4.486, "step": 711500 }, { "epoch": 0.65, "learning_rate": 6.740701154731027e-05, "loss": 4.4875, "step": 712000 }, { "epoch": 0.65, "learning_rate": 6.738411475505929e-05, "loss": 4.5009, "step": 712500 }, { "epoch": 0.65, "learning_rate": 6.736121796280829e-05, "loss": 4.4956, "step": 713000 }, { "epoch": 0.65, "learning_rate": 6.73383211705573e-05, "loss": 4.5123, "step": 713500 }, { "epoch": 0.65, "learning_rate": 6.73154701718908e-05, "loss": 4.4769, "step": 714000 }, { "epoch": 0.65, "learning_rate": 6.729257337963981e-05, "loss": 4.4848, "step": 714500 }, { "epoch": 0.65, "learning_rate": 6.726967658738881e-05, "loss": 4.5009, "step": 715000 }, { "epoch": 0.66, "learning_rate": 6.724677979513783e-05, "loss": 4.5041, "step": 715500 }, { "epoch": 0.66, "learning_rate": 6.722388300288684e-05, "loss": 4.5043, "step": 716000 }, { "epoch": 0.66, "learning_rate": 6.720103200422034e-05, "loss": 4.5016, "step": 716500 }, { "epoch": 0.66, "learning_rate": 6.717818100555385e-05, "loss": 4.509, "step": 717000 }, { "epoch": 0.66, "learning_rate": 6.715528421330287e-05, "loss": 4.4817, "step": 717500 }, { "epoch": 0.66, "learning_rate": 6.713238742105187e-05, "loss": 4.4877, "step": 718000 }, { "epoch": 0.66, "learning_rate": 6.710949062880088e-05, "loss": 4.5078, "step": 718500 }, { "epoch": 0.66, "learning_rate": 6.708659383654988e-05, "loss": 4.5087, "step": 719000 }, { "epoch": 0.66, "learning_rate": 6.706369704429889e-05, "loss": 4.4927, "step": 719500 }, { "epoch": 0.66, "learning_rate": 6.704080025204789e-05, "loss": 4.4916, "step": 720000 }, { "epoch": 0.66, "learning_rate": 6.701790345979691e-05, "loss": 4.4967, "step": 720500 }, { "epoch": 0.66, "learning_rate": 6.699500666754591e-05, "loss": 4.5194, "step": 721000 }, { "epoch": 0.66, "learning_rate": 6.697210987529492e-05, "loss": 4.4931, "step": 721500 }, { "epoch": 0.66, "learning_rate": 6.694921308304391e-05, "loss": 4.5015, "step": 722000 }, { "epoch": 0.66, "learning_rate": 6.692631629079292e-05, "loss": 4.4921, "step": 722500 }, { "epoch": 0.66, "learning_rate": 6.690346529212644e-05, "loss": 4.4961, "step": 723000 }, { "epoch": 0.66, "learning_rate": 6.688056849987544e-05, "loss": 4.5106, "step": 723500 }, { "epoch": 0.66, "learning_rate": 6.685767170762445e-05, "loss": 4.5038, "step": 724000 }, { "epoch": 0.66, "learning_rate": 6.683477491537345e-05, "loss": 4.4953, "step": 724500 }, { "epoch": 0.66, "learning_rate": 6.681192391670697e-05, "loss": 4.5079, "step": 725000 }, { "epoch": 0.66, "learning_rate": 6.678907291804048e-05, "loss": 4.4942, "step": 725500 }, { "epoch": 0.66, "learning_rate": 6.676617612578948e-05, "loss": 4.4954, "step": 726000 }, { "epoch": 0.67, "learning_rate": 6.674327933353849e-05, "loss": 4.5005, "step": 726500 }, { "epoch": 0.67, "learning_rate": 6.672038254128749e-05, "loss": 4.4861, "step": 727000 }, { "epoch": 0.67, "learning_rate": 6.669748574903651e-05, "loss": 4.5135, "step": 727500 }, { "epoch": 0.67, "learning_rate": 6.667463475037001e-05, "loss": 4.5118, "step": 728000 }, { "epoch": 0.67, "learning_rate": 6.665173795811902e-05, "loss": 4.4936, "step": 728500 }, { "epoch": 0.67, "learning_rate": 6.662884116586802e-05, "loss": 4.4923, "step": 729000 }, { "epoch": 0.67, "learning_rate": 6.660594437361703e-05, "loss": 4.509, "step": 729500 }, { "epoch": 0.67, "learning_rate": 6.658304758136605e-05, "loss": 4.5036, "step": 730000 }, { "epoch": 0.67, "learning_rate": 6.656015078911505e-05, "loss": 4.4942, "step": 730500 }, { "epoch": 0.67, "learning_rate": 6.653725399686406e-05, "loss": 4.4916, "step": 731000 }, { "epoch": 0.67, "learning_rate": 6.651444879178206e-05, "loss": 4.4935, "step": 731500 }, { "epoch": 0.67, "learning_rate": 6.649155199953107e-05, "loss": 4.5004, "step": 732000 }, { "epoch": 0.67, "learning_rate": 6.646865520728009e-05, "loss": 4.4926, "step": 732500 }, { "epoch": 0.67, "learning_rate": 6.64457584150291e-05, "loss": 4.4886, "step": 733000 }, { "epoch": 0.67, "learning_rate": 6.64228616227781e-05, "loss": 4.4934, "step": 733500 }, { "epoch": 0.67, "learning_rate": 6.63999648305271e-05, "loss": 4.4819, "step": 734000 }, { "epoch": 0.67, "learning_rate": 6.637706803827611e-05, "loss": 4.4935, "step": 734500 }, { "epoch": 0.67, "learning_rate": 6.635417124602513e-05, "loss": 4.4962, "step": 735000 }, { "epoch": 0.67, "learning_rate": 6.633127445377413e-05, "loss": 4.4903, "step": 735500 }, { "epoch": 0.67, "learning_rate": 6.630837766152314e-05, "loss": 4.5081, "step": 736000 }, { "epoch": 0.67, "learning_rate": 6.628552666285664e-05, "loss": 4.4942, "step": 736500 }, { "epoch": 0.67, "learning_rate": 6.626262987060565e-05, "loss": 4.5001, "step": 737000 }, { "epoch": 0.68, "learning_rate": 6.623973307835467e-05, "loss": 4.4875, "step": 737500 }, { "epoch": 0.68, "learning_rate": 6.621683628610367e-05, "loss": 4.4911, "step": 738000 }, { "epoch": 0.68, "learning_rate": 6.619398528743718e-05, "loss": 4.4985, "step": 738500 }, { "epoch": 0.68, "learning_rate": 6.617108849518618e-05, "loss": 4.4968, "step": 739000 }, { "epoch": 0.68, "learning_rate": 6.614819170293519e-05, "loss": 4.4834, "step": 739500 }, { "epoch": 0.68, "learning_rate": 6.612529491068419e-05, "loss": 4.4906, "step": 740000 }, { "epoch": 0.68, "learning_rate": 6.610239811843321e-05, "loss": 4.4881, "step": 740500 }, { "epoch": 0.68, "learning_rate": 6.607950132618222e-05, "loss": 4.4965, "step": 741000 }, { "epoch": 0.68, "learning_rate": 6.605660453393122e-05, "loss": 4.5056, "step": 741500 }, { "epoch": 0.68, "learning_rate": 6.603370774168023e-05, "loss": 4.5031, "step": 742000 }, { "epoch": 0.68, "learning_rate": 6.601081094942923e-05, "loss": 4.509, "step": 742500 }, { "epoch": 0.68, "learning_rate": 6.598795995076275e-05, "loss": 4.5112, "step": 743000 }, { "epoch": 0.68, "learning_rate": 6.596506315851176e-05, "loss": 4.5029, "step": 743500 }, { "epoch": 0.68, "learning_rate": 6.594216636626076e-05, "loss": 4.4892, "step": 744000 }, { "epoch": 0.68, "learning_rate": 6.591926957400977e-05, "loss": 4.4848, "step": 744500 }, { "epoch": 0.68, "learning_rate": 6.589641857534327e-05, "loss": 4.4839, "step": 745000 }, { "epoch": 0.68, "learning_rate": 6.587352178309228e-05, "loss": 4.4962, "step": 745500 }, { "epoch": 0.68, "learning_rate": 6.585062499084128e-05, "loss": 4.4872, "step": 746000 }, { "epoch": 0.68, "learning_rate": 6.582772819859029e-05, "loss": 4.4931, "step": 746500 }, { "epoch": 0.68, "learning_rate": 6.580487719992379e-05, "loss": 4.4849, "step": 747000 }, { "epoch": 0.68, "learning_rate": 6.578202620125731e-05, "loss": 4.4843, "step": 747500 }, { "epoch": 0.69, "learning_rate": 6.575912940900632e-05, "loss": 4.4984, "step": 748000 }, { "epoch": 0.69, "learning_rate": 6.573623261675532e-05, "loss": 4.4924, "step": 748500 }, { "epoch": 0.69, "learning_rate": 6.571333582450433e-05, "loss": 4.4967, "step": 749000 }, { "epoch": 0.69, "learning_rate": 6.569043903225333e-05, "loss": 4.4844, "step": 749500 }, { "epoch": 0.69, "learning_rate": 6.566754224000235e-05, "loss": 4.4798, "step": 750000 }, { "epoch": 0.69, "learning_rate": 6.564464544775135e-05, "loss": 4.498, "step": 750500 }, { "epoch": 0.69, "learning_rate": 6.562174865550036e-05, "loss": 4.486, "step": 751000 }, { "epoch": 0.69, "learning_rate": 6.559885186324936e-05, "loss": 4.4853, "step": 751500 }, { "epoch": 0.69, "learning_rate": 6.557600086458287e-05, "loss": 4.4833, "step": 752000 }, { "epoch": 0.69, "learning_rate": 6.555310407233189e-05, "loss": 4.4927, "step": 752500 }, { "epoch": 0.69, "learning_rate": 6.55302072800809e-05, "loss": 4.4805, "step": 753000 }, { "epoch": 0.69, "learning_rate": 6.55073104878299e-05, "loss": 4.4906, "step": 753500 }, { "epoch": 0.69, "learning_rate": 6.54844136955789e-05, "loss": 4.487, "step": 754000 }, { "epoch": 0.69, "learning_rate": 6.546151690332791e-05, "loss": 4.4895, "step": 754500 }, { "epoch": 0.69, "learning_rate": 6.543862011107691e-05, "loss": 4.4913, "step": 755000 }, { "epoch": 0.69, "learning_rate": 6.541572331882593e-05, "loss": 4.5076, "step": 755500 }, { "epoch": 0.69, "learning_rate": 6.539282652657494e-05, "loss": 4.4928, "step": 756000 }, { "epoch": 0.69, "learning_rate": 6.536997552790844e-05, "loss": 4.4865, "step": 756500 }, { "epoch": 0.69, "learning_rate": 6.534707873565745e-05, "loss": 4.4695, "step": 757000 }, { "epoch": 0.69, "learning_rate": 6.532418194340645e-05, "loss": 4.4996, "step": 757500 }, { "epoch": 0.69, "learning_rate": 6.530128515115547e-05, "loss": 4.5003, "step": 758000 }, { "epoch": 0.69, "learning_rate": 6.527838835890448e-05, "loss": 4.4859, "step": 758500 }, { "epoch": 0.7, "learning_rate": 6.525553736023798e-05, "loss": 4.4938, "step": 759000 }, { "epoch": 0.7, "learning_rate": 6.523264056798699e-05, "loss": 4.4893, "step": 759500 }, { "epoch": 0.7, "learning_rate": 6.520974377573599e-05, "loss": 4.4752, "step": 760000 }, { "epoch": 0.7, "learning_rate": 6.518684698348501e-05, "loss": 4.4843, "step": 760500 }, { "epoch": 0.7, "learning_rate": 6.516404177840302e-05, "loss": 4.4878, "step": 761000 }, { "epoch": 0.7, "learning_rate": 6.514114498615202e-05, "loss": 4.4936, "step": 761500 }, { "epoch": 0.7, "learning_rate": 6.511824819390103e-05, "loss": 4.4863, "step": 762000 }, { "epoch": 0.7, "learning_rate": 6.509535140165005e-05, "loss": 4.4873, "step": 762500 }, { "epoch": 0.7, "learning_rate": 6.507245460939905e-05, "loss": 4.4913, "step": 763000 }, { "epoch": 0.7, "learning_rate": 6.504955781714806e-05, "loss": 4.4785, "step": 763500 }, { "epoch": 0.7, "learning_rate": 6.502670681848156e-05, "loss": 4.4888, "step": 764000 }, { "epoch": 0.7, "learning_rate": 6.500381002623057e-05, "loss": 4.4967, "step": 764500 }, { "epoch": 0.7, "learning_rate": 6.498091323397957e-05, "loss": 4.4912, "step": 765000 }, { "epoch": 0.7, "learning_rate": 6.495801644172859e-05, "loss": 4.4846, "step": 765500 }, { "epoch": 0.7, "learning_rate": 6.49351196494776e-05, "loss": 4.4954, "step": 766000 }, { "epoch": 0.7, "learning_rate": 6.49122228572266e-05, "loss": 4.5018, "step": 766500 }, { "epoch": 0.7, "learning_rate": 6.488941765214461e-05, "loss": 4.4878, "step": 767000 }, { "epoch": 0.7, "learning_rate": 6.486652085989363e-05, "loss": 4.4775, "step": 767500 }, { "epoch": 0.7, "learning_rate": 6.484362406764263e-05, "loss": 4.488, "step": 768000 }, { "epoch": 0.7, "learning_rate": 6.482072727539164e-05, "loss": 4.5008, "step": 768500 }, { "epoch": 0.7, "learning_rate": 6.479783048314064e-05, "loss": 4.4915, "step": 769000 }, { "epoch": 0.7, "learning_rate": 6.477493369088963e-05, "loss": 4.4881, "step": 769500 }, { "epoch": 0.71, "learning_rate": 6.475203689863865e-05, "loss": 4.4886, "step": 770000 }, { "epoch": 0.71, "learning_rate": 6.472914010638766e-05, "loss": 4.4732, "step": 770500 }, { "epoch": 0.71, "learning_rate": 6.470624331413666e-05, "loss": 4.4746, "step": 771000 }, { "epoch": 0.71, "learning_rate": 6.468334652188567e-05, "loss": 4.4896, "step": 771500 }, { "epoch": 0.71, "learning_rate": 6.466044972963467e-05, "loss": 4.4905, "step": 772000 }, { "epoch": 0.71, "learning_rate": 6.463755293738368e-05, "loss": 4.4831, "step": 772500 }, { "epoch": 0.71, "learning_rate": 6.46147019387172e-05, "loss": 4.4831, "step": 773000 }, { "epoch": 0.71, "learning_rate": 6.45918051464662e-05, "loss": 4.5024, "step": 773500 }, { "epoch": 0.71, "learning_rate": 6.45689083542152e-05, "loss": 4.4825, "step": 774000 }, { "epoch": 0.71, "learning_rate": 6.454601156196421e-05, "loss": 4.4903, "step": 774500 }, { "epoch": 0.71, "learning_rate": 6.452311476971322e-05, "loss": 4.4875, "step": 775000 }, { "epoch": 0.71, "learning_rate": 6.450021797746223e-05, "loss": 4.4994, "step": 775500 }, { "epoch": 0.71, "learning_rate": 6.447732118521124e-05, "loss": 4.4986, "step": 776000 }, { "epoch": 0.71, "learning_rate": 6.445442439296024e-05, "loss": 4.5064, "step": 776500 }, { "epoch": 0.71, "learning_rate": 6.443157339429375e-05, "loss": 4.4808, "step": 777000 }, { "epoch": 0.71, "learning_rate": 6.440867660204275e-05, "loss": 4.4892, "step": 777500 }, { "epoch": 0.71, "learning_rate": 6.438577980979177e-05, "loss": 4.4802, "step": 778000 }, { "epoch": 0.71, "learning_rate": 6.436288301754078e-05, "loss": 4.4764, "step": 778500 }, { "epoch": 0.71, "learning_rate": 6.433998622528978e-05, "loss": 4.5041, "step": 779000 }, { "epoch": 0.71, "learning_rate": 6.431713522662329e-05, "loss": 4.4847, "step": 779500 }, { "epoch": 0.71, "learning_rate": 6.42942384343723e-05, "loss": 4.4783, "step": 780000 }, { "epoch": 0.71, "learning_rate": 6.427134164212131e-05, "loss": 4.4806, "step": 780500 }, { "epoch": 0.72, "learning_rate": 6.424844484987032e-05, "loss": 4.4777, "step": 781000 }, { "epoch": 0.72, "learning_rate": 6.422554805761932e-05, "loss": 4.4846, "step": 781500 }, { "epoch": 0.72, "learning_rate": 6.420265126536833e-05, "loss": 4.477, "step": 782000 }, { "epoch": 0.72, "learning_rate": 6.417980026670183e-05, "loss": 4.4904, "step": 782500 }, { "epoch": 0.72, "learning_rate": 6.415690347445085e-05, "loss": 4.498, "step": 783000 }, { "epoch": 0.72, "learning_rate": 6.413400668219986e-05, "loss": 4.4892, "step": 783500 }, { "epoch": 0.72, "learning_rate": 6.411115568353336e-05, "loss": 4.4924, "step": 784000 }, { "epoch": 0.72, "learning_rate": 6.408825889128237e-05, "loss": 4.4845, "step": 784500 }, { "epoch": 0.72, "learning_rate": 6.406536209903137e-05, "loss": 4.4904, "step": 785000 }, { "epoch": 0.72, "learning_rate": 6.404246530678039e-05, "loss": 4.4758, "step": 785500 }, { "epoch": 0.72, "learning_rate": 6.40195685145294e-05, "loss": 4.4753, "step": 786000 }, { "epoch": 0.72, "learning_rate": 6.39967175158629e-05, "loss": 4.488, "step": 786500 }, { "epoch": 0.72, "learning_rate": 6.397382072361191e-05, "loss": 4.4859, "step": 787000 }, { "epoch": 0.72, "learning_rate": 6.395092393136091e-05, "loss": 4.4974, "step": 787500 }, { "epoch": 0.72, "learning_rate": 6.392802713910993e-05, "loss": 4.4878, "step": 788000 }, { "epoch": 0.72, "learning_rate": 6.390513034685894e-05, "loss": 4.4861, "step": 788500 }, { "epoch": 0.72, "learning_rate": 6.388223355460794e-05, "loss": 4.4854, "step": 789000 }, { "epoch": 0.72, "learning_rate": 6.385933676235695e-05, "loss": 4.4921, "step": 789500 }, { "epoch": 0.72, "learning_rate": 6.383643997010595e-05, "loss": 4.494, "step": 790000 }, { "epoch": 0.72, "learning_rate": 6.381358897143947e-05, "loss": 4.4677, "step": 790500 }, { "epoch": 0.72, "learning_rate": 6.379069217918848e-05, "loss": 4.475, "step": 791000 }, { "epoch": 0.72, "learning_rate": 6.376784118052198e-05, "loss": 4.4795, "step": 791500 }, { "epoch": 0.73, "learning_rate": 6.374494438827099e-05, "loss": 4.4806, "step": 792000 }, { "epoch": 0.73, "learning_rate": 6.372204759601999e-05, "loss": 4.4931, "step": 792500 }, { "epoch": 0.73, "learning_rate": 6.369915080376901e-05, "loss": 4.4876, "step": 793000 }, { "epoch": 0.73, "learning_rate": 6.367625401151801e-05, "loss": 4.4863, "step": 793500 }, { "epoch": 0.73, "learning_rate": 6.365335721926702e-05, "loss": 4.4915, "step": 794000 }, { "epoch": 0.73, "learning_rate": 6.363046042701601e-05, "loss": 4.4881, "step": 794500 }, { "epoch": 0.73, "learning_rate": 6.360756363476502e-05, "loss": 4.4988, "step": 795000 }, { "epoch": 0.73, "learning_rate": 6.358471263609854e-05, "loss": 4.4825, "step": 795500 }, { "epoch": 0.73, "learning_rate": 6.356181584384754e-05, "loss": 4.4789, "step": 796000 }, { "epoch": 0.73, "learning_rate": 6.353896484518105e-05, "loss": 4.4815, "step": 796500 }, { "epoch": 0.73, "learning_rate": 6.351606805293005e-05, "loss": 4.4787, "step": 797000 }, { "epoch": 0.73, "learning_rate": 6.349317126067906e-05, "loss": 4.4824, "step": 797500 }, { "epoch": 0.73, "learning_rate": 6.347027446842807e-05, "loss": 4.4764, "step": 798000 }, { "epoch": 0.73, "learning_rate": 6.344737767617708e-05, "loss": 4.4851, "step": 798500 }, { "epoch": 0.73, "learning_rate": 6.342448088392608e-05, "loss": 4.4698, "step": 799000 }, { "epoch": 0.73, "learning_rate": 6.340158409167509e-05, "loss": 4.493, "step": 799500 }, { "epoch": 0.73, "learning_rate": 6.33786872994241e-05, "loss": 4.4822, "step": 800000 }, { "epoch": 0.73, "learning_rate": 6.335583630075761e-05, "loss": 4.4791, "step": 800500 }, { "epoch": 0.73, "learning_rate": 6.333293950850662e-05, "loss": 4.5043, "step": 801000 }, { "epoch": 0.73, "learning_rate": 6.331004271625562e-05, "loss": 4.4898, "step": 801500 }, { "epoch": 0.73, "learning_rate": 6.328714592400463e-05, "loss": 4.4771, "step": 802000 }, { "epoch": 0.73, "learning_rate": 6.326429492533813e-05, "loss": 4.4837, "step": 802500 }, { "epoch": 0.74, "learning_rate": 6.324139813308715e-05, "loss": 4.482, "step": 803000 }, { "epoch": 0.74, "learning_rate": 6.321850134083616e-05, "loss": 4.4697, "step": 803500 }, { "epoch": 0.74, "learning_rate": 6.319560454858516e-05, "loss": 4.4837, "step": 804000 }, { "epoch": 0.74, "learning_rate": 6.317279934350317e-05, "loss": 4.482, "step": 804500 }, { "epoch": 0.74, "learning_rate": 6.314990255125219e-05, "loss": 4.4807, "step": 805000 }, { "epoch": 0.74, "learning_rate": 6.31270057590012e-05, "loss": 4.4791, "step": 805500 }, { "epoch": 0.74, "learning_rate": 6.31041089667502e-05, "loss": 4.4841, "step": 806000 }, { "epoch": 0.74, "learning_rate": 6.30812121744992e-05, "loss": 4.4914, "step": 806500 }, { "epoch": 0.74, "learning_rate": 6.305831538224821e-05, "loss": 4.4678, "step": 807000 }, { "epoch": 0.74, "learning_rate": 6.303541858999721e-05, "loss": 4.4876, "step": 807500 }, { "epoch": 0.74, "learning_rate": 6.301252179774623e-05, "loss": 4.4782, "step": 808000 }, { "epoch": 0.74, "learning_rate": 6.298962500549524e-05, "loss": 4.4799, "step": 808500 }, { "epoch": 0.74, "learning_rate": 6.296672821324424e-05, "loss": 4.4772, "step": 809000 }, { "epoch": 0.74, "learning_rate": 6.294383142099325e-05, "loss": 4.4773, "step": 809500 }, { "epoch": 0.74, "learning_rate": 6.292098042232675e-05, "loss": 4.4862, "step": 810000 }, { "epoch": 0.74, "learning_rate": 6.289812942366027e-05, "loss": 4.4821, "step": 810500 }, { "epoch": 0.74, "learning_rate": 6.287523263140928e-05, "loss": 4.481, "step": 811000 }, { "epoch": 0.74, "learning_rate": 6.285233583915828e-05, "loss": 4.4859, "step": 811500 }, { "epoch": 0.74, "learning_rate": 6.282943904690729e-05, "loss": 4.4867, "step": 812000 }, { "epoch": 0.74, "learning_rate": 6.280654225465629e-05, "loss": 4.4705, "step": 812500 }, { "epoch": 0.74, "learning_rate": 6.278364546240531e-05, "loss": 4.4807, "step": 813000 }, { "epoch": 0.75, "learning_rate": 6.276074867015432e-05, "loss": 4.498, "step": 813500 }, { "epoch": 0.75, "learning_rate": 6.273785187790332e-05, "loss": 4.49, "step": 814000 }, { "epoch": 0.75, "learning_rate": 6.271495508565233e-05, "loss": 4.4779, "step": 814500 }, { "epoch": 0.75, "learning_rate": 6.269210408698583e-05, "loss": 4.483, "step": 815000 }, { "epoch": 0.75, "learning_rate": 6.266920729473485e-05, "loss": 4.4744, "step": 815500 }, { "epoch": 0.75, "learning_rate": 6.264631050248386e-05, "loss": 4.4862, "step": 816000 }, { "epoch": 0.75, "learning_rate": 6.262341371023286e-05, "loss": 4.4767, "step": 816500 }, { "epoch": 0.75, "learning_rate": 6.260051691798187e-05, "loss": 4.4874, "step": 817000 }, { "epoch": 0.75, "learning_rate": 6.257762012573087e-05, "loss": 4.4852, "step": 817500 }, { "epoch": 0.75, "learning_rate": 6.255476912706439e-05, "loss": 4.4666, "step": 818000 }, { "epoch": 0.75, "learning_rate": 6.253191812839788e-05, "loss": 4.4738, "step": 818500 }, { "epoch": 0.75, "learning_rate": 6.250902133614689e-05, "loss": 4.473, "step": 819000 }, { "epoch": 0.75, "learning_rate": 6.248612454389589e-05, "loss": 4.48, "step": 819500 }, { "epoch": 0.75, "learning_rate": 6.246322775164491e-05, "loss": 4.4844, "step": 820000 }, { "epoch": 0.75, "learning_rate": 6.244033095939392e-05, "loss": 4.4782, "step": 820500 }, { "epoch": 0.75, "learning_rate": 6.241743416714292e-05, "loss": 4.485, "step": 821000 }, { "epoch": 0.75, "learning_rate": 6.239453737489193e-05, "loss": 4.4845, "step": 821500 }, { "epoch": 0.75, "learning_rate": 6.237168637622543e-05, "loss": 4.4637, "step": 822000 }, { "epoch": 0.75, "learning_rate": 6.234878958397445e-05, "loss": 4.495, "step": 822500 }, { "epoch": 0.75, "learning_rate": 6.232589279172345e-05, "loss": 4.4822, "step": 823000 }, { "epoch": 0.75, "learning_rate": 6.230299599947246e-05, "loss": 4.4852, "step": 823500 }, { "epoch": 0.75, "learning_rate": 6.228009920722146e-05, "loss": 4.4744, "step": 824000 }, { "epoch": 0.76, "learning_rate": 6.225724820855497e-05, "loss": 4.4668, "step": 824500 }, { "epoch": 0.76, "learning_rate": 6.223435141630398e-05, "loss": 4.4675, "step": 825000 }, { "epoch": 0.76, "learning_rate": 6.2211454624053e-05, "loss": 4.4824, "step": 825500 }, { "epoch": 0.76, "learning_rate": 6.2188557831802e-05, "loss": 4.4708, "step": 826000 }, { "epoch": 0.76, "learning_rate": 6.2165661039551e-05, "loss": 4.4755, "step": 826500 }, { "epoch": 0.76, "learning_rate": 6.214276424730001e-05, "loss": 4.4785, "step": 827000 }, { "epoch": 0.76, "learning_rate": 6.211986745504901e-05, "loss": 4.4769, "step": 827500 }, { "epoch": 0.76, "learning_rate": 6.209697066279803e-05, "loss": 4.4802, "step": 828000 }, { "epoch": 0.76, "learning_rate": 6.207407387054704e-05, "loss": 4.4965, "step": 828500 }, { "epoch": 0.76, "learning_rate": 6.205117707829604e-05, "loss": 4.4981, "step": 829000 }, { "epoch": 0.76, "learning_rate": 6.202828028604505e-05, "loss": 4.4855, "step": 829500 }, { "epoch": 0.76, "learning_rate": 6.200538349379405e-05, "loss": 4.4754, "step": 830000 }, { "epoch": 0.76, "learning_rate": 6.198248670154306e-05, "loss": 4.4844, "step": 830500 }, { "epoch": 0.76, "learning_rate": 6.195963570287658e-05, "loss": 4.4691, "step": 831000 }, { "epoch": 0.76, "learning_rate": 6.193673891062558e-05, "loss": 4.4681, "step": 831500 }, { "epoch": 0.76, "learning_rate": 6.191384211837459e-05, "loss": 4.4673, "step": 832000 }, { "epoch": 0.76, "learning_rate": 6.189099111970809e-05, "loss": 4.4818, "step": 832500 }, { "epoch": 0.76, "learning_rate": 6.18680943274571e-05, "loss": 4.4755, "step": 833000 }, { "epoch": 0.76, "learning_rate": 6.184519753520612e-05, "loss": 4.4854, "step": 833500 }, { "epoch": 0.76, "learning_rate": 6.182230074295512e-05, "loss": 4.4871, "step": 834000 }, { "epoch": 0.76, "learning_rate": 6.179940395070413e-05, "loss": 4.4703, "step": 834500 }, { "epoch": 0.76, "learning_rate": 6.177650715845313e-05, "loss": 4.496, "step": 835000 }, { "epoch": 0.77, "learning_rate": 6.175361036620214e-05, "loss": 4.4752, "step": 835500 }, { "epoch": 0.77, "learning_rate": 6.173071357395116e-05, "loss": 4.4765, "step": 836000 }, { "epoch": 0.77, "learning_rate": 6.170781678170016e-05, "loss": 4.4596, "step": 836500 }, { "epoch": 0.77, "learning_rate": 6.168501157661817e-05, "loss": 4.4809, "step": 837000 }, { "epoch": 0.77, "learning_rate": 6.166211478436717e-05, "loss": 4.4796, "step": 837500 }, { "epoch": 0.77, "learning_rate": 6.163921799211618e-05, "loss": 4.4686, "step": 838000 }, { "epoch": 0.77, "learning_rate": 6.16163211998652e-05, "loss": 4.4818, "step": 838500 }, { "epoch": 0.77, "learning_rate": 6.15934244076142e-05, "loss": 4.4796, "step": 839000 }, { "epoch": 0.77, "learning_rate": 6.15705276153632e-05, "loss": 4.4713, "step": 839500 }, { "epoch": 0.77, "learning_rate": 6.154763082311221e-05, "loss": 4.4912, "step": 840000 }, { "epoch": 0.77, "learning_rate": 6.152473403086122e-05, "loss": 4.4758, "step": 840500 }, { "epoch": 0.77, "learning_rate": 6.150183723861022e-05, "loss": 4.478, "step": 841000 }, { "epoch": 0.77, "learning_rate": 6.147898623994374e-05, "loss": 4.4693, "step": 841500 }, { "epoch": 0.77, "learning_rate": 6.145608944769274e-05, "loss": 4.4646, "step": 842000 }, { "epoch": 0.77, "learning_rate": 6.143319265544175e-05, "loss": 4.4822, "step": 842500 }, { "epoch": 0.77, "learning_rate": 6.141029586319075e-05, "loss": 4.4715, "step": 843000 }, { "epoch": 0.77, "learning_rate": 6.138739907093976e-05, "loss": 4.4706, "step": 843500 }, { "epoch": 0.77, "learning_rate": 6.136454807227327e-05, "loss": 4.4766, "step": 844000 }, { "epoch": 0.77, "learning_rate": 6.134165128002227e-05, "loss": 4.4681, "step": 844500 }, { "epoch": 0.77, "learning_rate": 6.131880028135578e-05, "loss": 4.4651, "step": 845000 }, { "epoch": 0.77, "learning_rate": 6.12959034891048e-05, "loss": 4.4819, "step": 845500 }, { "epoch": 0.77, "learning_rate": 6.12730524904383e-05, "loss": 4.4691, "step": 846000 }, { "epoch": 0.78, "learning_rate": 6.12501556981873e-05, "loss": 4.4718, "step": 846500 }, { "epoch": 0.78, "learning_rate": 6.122725890593631e-05, "loss": 4.4699, "step": 847000 }, { "epoch": 0.78, "learning_rate": 6.120436211368532e-05, "loss": 4.4549, "step": 847500 }, { "epoch": 0.78, "learning_rate": 6.118146532143433e-05, "loss": 4.4691, "step": 848000 }, { "epoch": 0.78, "learning_rate": 6.115856852918334e-05, "loss": 4.4604, "step": 848500 }, { "epoch": 0.78, "learning_rate": 6.113567173693234e-05, "loss": 4.4737, "step": 849000 }, { "epoch": 0.78, "learning_rate": 6.111277494468135e-05, "loss": 4.4751, "step": 849500 }, { "epoch": 0.78, "learning_rate": 6.108987815243035e-05, "loss": 4.4875, "step": 850000 }, { "epoch": 0.78, "learning_rate": 6.106698136017936e-05, "loss": 4.4808, "step": 850500 }, { "epoch": 0.78, "learning_rate": 6.104408456792838e-05, "loss": 4.4795, "step": 851000 }, { "epoch": 0.78, "learning_rate": 6.1021233569261884e-05, "loss": 4.4743, "step": 851500 }, { "epoch": 0.78, "learning_rate": 6.099833677701089e-05, "loss": 4.479, "step": 852000 }, { "epoch": 0.78, "learning_rate": 6.0975439984759894e-05, "loss": 4.4732, "step": 852500 }, { "epoch": 0.78, "learning_rate": 6.0952543192508905e-05, "loss": 4.4726, "step": 853000 }, { "epoch": 0.78, "learning_rate": 6.092964640025791e-05, "loss": 4.4756, "step": 853500 }, { "epoch": 0.78, "learning_rate": 6.0906749608006916e-05, "loss": 4.4693, "step": 854000 }, { "epoch": 0.78, "learning_rate": 6.088385281575593e-05, "loss": 4.4672, "step": 854500 }, { "epoch": 0.78, "learning_rate": 6.086095602350493e-05, "loss": 4.4678, "step": 855000 }, { "epoch": 0.78, "learning_rate": 6.0838105024838445e-05, "loss": 4.4858, "step": 855500 }, { "epoch": 0.78, "learning_rate": 6.081525402617195e-05, "loss": 4.4714, "step": 856000 }, { "epoch": 0.78, "learning_rate": 6.079235723392096e-05, "loss": 4.4786, "step": 856500 }, { "epoch": 0.78, "learning_rate": 6.076946044166997e-05, "loss": 4.4755, "step": 857000 }, { "epoch": 0.79, "learning_rate": 6.074656364941897e-05, "loss": 4.4815, "step": 857500 }, { "epoch": 0.79, "learning_rate": 6.0723712650752485e-05, "loss": 4.4703, "step": 858000 }, { "epoch": 0.79, "learning_rate": 6.070081585850149e-05, "loss": 4.4696, "step": 858500 }, { "epoch": 0.79, "learning_rate": 6.06779190662505e-05, "loss": 4.4768, "step": 859000 }, { "epoch": 0.79, "learning_rate": 6.065502227399951e-05, "loss": 4.4688, "step": 859500 }, { "epoch": 0.79, "learning_rate": 6.063212548174851e-05, "loss": 4.4697, "step": 860000 }, { "epoch": 0.79, "learning_rate": 6.0609228689497523e-05, "loss": 4.4655, "step": 860500 }, { "epoch": 0.79, "learning_rate": 6.058637769083103e-05, "loss": 4.4768, "step": 861000 }, { "epoch": 0.79, "learning_rate": 6.056348089858004e-05, "loss": 4.481, "step": 861500 }, { "epoch": 0.79, "learning_rate": 6.0540584106329046e-05, "loss": 4.4708, "step": 862000 }, { "epoch": 0.79, "learning_rate": 6.051768731407805e-05, "loss": 4.47, "step": 862500 }, { "epoch": 0.79, "learning_rate": 6.049479052182706e-05, "loss": 4.4781, "step": 863000 }, { "epoch": 0.79, "learning_rate": 6.047189372957607e-05, "loss": 4.4652, "step": 863500 }, { "epoch": 0.79, "learning_rate": 6.044899693732507e-05, "loss": 4.4646, "step": 864000 }, { "epoch": 0.79, "learning_rate": 6.0426145938658585e-05, "loss": 4.4735, "step": 864500 }, { "epoch": 0.79, "learning_rate": 6.040324914640759e-05, "loss": 4.4904, "step": 865000 }, { "epoch": 0.79, "learning_rate": 6.03803523541566e-05, "loss": 4.4646, "step": 865500 }, { "epoch": 0.79, "learning_rate": 6.035745556190561e-05, "loss": 4.4856, "step": 866000 }, { "epoch": 0.79, "learning_rate": 6.033455876965461e-05, "loss": 4.4743, "step": 866500 }, { "epoch": 0.79, "learning_rate": 6.0311661977403624e-05, "loss": 4.4914, "step": 867000 }, { "epoch": 0.79, "learning_rate": 6.028876518515263e-05, "loss": 4.4764, "step": 867500 }, { "epoch": 0.79, "learning_rate": 6.026591418648613e-05, "loss": 4.4819, "step": 868000 }, { "epoch": 0.8, "learning_rate": 6.024301739423513e-05, "loss": 4.4666, "step": 868500 }, { "epoch": 0.8, "learning_rate": 6.0220120601984145e-05, "loss": 4.4538, "step": 869000 }, { "epoch": 0.8, "learning_rate": 6.019722380973315e-05, "loss": 4.4857, "step": 869500 }, { "epoch": 0.8, "learning_rate": 6.0174327017482155e-05, "loss": 4.4711, "step": 870000 }, { "epoch": 0.8, "learning_rate": 6.015143022523117e-05, "loss": 4.4813, "step": 870500 }, { "epoch": 0.8, "learning_rate": 6.012853343298017e-05, "loss": 4.4773, "step": 871000 }, { "epoch": 0.8, "learning_rate": 6.010563664072918e-05, "loss": 4.4837, "step": 871500 }, { "epoch": 0.8, "learning_rate": 6.008278564206269e-05, "loss": 4.4824, "step": 872000 }, { "epoch": 0.8, "learning_rate": 6.00599804369807e-05, "loss": 4.4751, "step": 872500 }, { "epoch": 0.8, "learning_rate": 6.003708364472971e-05, "loss": 4.4766, "step": 873000 }, { "epoch": 0.8, "learning_rate": 6.001418685247871e-05, "loss": 4.4763, "step": 873500 }, { "epoch": 0.8, "learning_rate": 5.9991290060227724e-05, "loss": 4.4768, "step": 874000 }, { "epoch": 0.8, "learning_rate": 5.996839326797673e-05, "loss": 4.4807, "step": 874500 }, { "epoch": 0.8, "learning_rate": 5.9945496475725734e-05, "loss": 4.4645, "step": 875000 }, { "epoch": 0.8, "learning_rate": 5.9922599683474746e-05, "loss": 4.47, "step": 875500 }, { "epoch": 0.8, "learning_rate": 5.989970289122375e-05, "loss": 4.4806, "step": 876000 }, { "epoch": 0.8, "learning_rate": 5.987680609897276e-05, "loss": 4.4734, "step": 876500 }, { "epoch": 0.8, "learning_rate": 5.985390930672177e-05, "loss": 4.4678, "step": 877000 }, { "epoch": 0.8, "learning_rate": 5.983101251447077e-05, "loss": 4.4711, "step": 877500 }, { "epoch": 0.8, "learning_rate": 5.9808161515804285e-05, "loss": 4.461, "step": 878000 }, { "epoch": 0.8, "learning_rate": 5.978526472355329e-05, "loss": 4.4693, "step": 878500 }, { "epoch": 0.81, "learning_rate": 5.9762367931302295e-05, "loss": 4.4749, "step": 879000 }, { "epoch": 0.81, "learning_rate": 5.973947113905131e-05, "loss": 4.4531, "step": 879500 }, { "epoch": 0.81, "learning_rate": 5.971657434680031e-05, "loss": 4.481, "step": 880000 }, { "epoch": 0.81, "learning_rate": 5.9693723348133825e-05, "loss": 4.4606, "step": 880500 }, { "epoch": 0.81, "learning_rate": 5.967082655588283e-05, "loss": 4.4707, "step": 881000 }, { "epoch": 0.81, "learning_rate": 5.9647929763631835e-05, "loss": 4.4672, "step": 881500 }, { "epoch": 0.81, "learning_rate": 5.962503297138085e-05, "loss": 4.4826, "step": 882000 }, { "epoch": 0.81, "learning_rate": 5.960213617912985e-05, "loss": 4.4719, "step": 882500 }, { "epoch": 0.81, "learning_rate": 5.957923938687886e-05, "loss": 4.4696, "step": 883000 }, { "epoch": 0.81, "learning_rate": 5.955638838821237e-05, "loss": 4.4569, "step": 883500 }, { "epoch": 0.81, "learning_rate": 5.9533491595961374e-05, "loss": 4.4778, "step": 884000 }, { "epoch": 0.81, "learning_rate": 5.9510594803710386e-05, "loss": 4.4643, "step": 884500 }, { "epoch": 0.81, "learning_rate": 5.948769801145939e-05, "loss": 4.4609, "step": 885000 }, { "epoch": 0.81, "learning_rate": 5.9464801219208396e-05, "loss": 4.4698, "step": 885500 }, { "epoch": 0.81, "learning_rate": 5.944190442695741e-05, "loss": 4.4731, "step": 886000 }, { "epoch": 0.81, "learning_rate": 5.941900763470641e-05, "loss": 4.471, "step": 886500 }, { "epoch": 0.81, "learning_rate": 5.939611084245542e-05, "loss": 4.4805, "step": 887000 }, { "epoch": 0.81, "learning_rate": 5.937325984378893e-05, "loss": 4.4689, "step": 887500 }, { "epoch": 0.81, "learning_rate": 5.9350363051537935e-05, "loss": 4.4522, "step": 888000 }, { "epoch": 0.81, "learning_rate": 5.932746625928695e-05, "loss": 4.4632, "step": 888500 }, { "epoch": 0.81, "learning_rate": 5.930456946703595e-05, "loss": 4.4881, "step": 889000 }, { "epoch": 0.81, "learning_rate": 5.928167267478496e-05, "loss": 4.4617, "step": 889500 }, { "epoch": 0.82, "learning_rate": 5.925882167611847e-05, "loss": 4.4716, "step": 890000 }, { "epoch": 0.82, "learning_rate": 5.9235924883867475e-05, "loss": 4.4492, "step": 890500 }, { "epoch": 0.82, "learning_rate": 5.9213028091616487e-05, "loss": 4.4725, "step": 891000 }, { "epoch": 0.82, "learning_rate": 5.919013129936549e-05, "loss": 4.4615, "step": 891500 }, { "epoch": 0.82, "learning_rate": 5.916728030069899e-05, "loss": 4.4577, "step": 892000 }, { "epoch": 0.82, "learning_rate": 5.9144383508447996e-05, "loss": 4.4576, "step": 892500 }, { "epoch": 0.82, "learning_rate": 5.912148671619701e-05, "loss": 4.4684, "step": 893000 }, { "epoch": 0.82, "learning_rate": 5.909858992394601e-05, "loss": 4.4504, "step": 893500 }, { "epoch": 0.82, "learning_rate": 5.907569313169502e-05, "loss": 4.4747, "step": 894000 }, { "epoch": 0.82, "learning_rate": 5.905279633944403e-05, "loss": 4.4656, "step": 894500 }, { "epoch": 0.82, "learning_rate": 5.9029945340777535e-05, "loss": 4.4636, "step": 895000 }, { "epoch": 0.82, "learning_rate": 5.900704854852655e-05, "loss": 4.4705, "step": 895500 }, { "epoch": 0.82, "learning_rate": 5.898415175627555e-05, "loss": 4.4647, "step": 896000 }, { "epoch": 0.82, "learning_rate": 5.896125496402456e-05, "loss": 4.465, "step": 896500 }, { "epoch": 0.82, "learning_rate": 5.893840396535807e-05, "loss": 4.4671, "step": 897000 }, { "epoch": 0.82, "learning_rate": 5.891555296669158e-05, "loss": 4.4645, "step": 897500 }, { "epoch": 0.82, "learning_rate": 5.889270196802509e-05, "loss": 4.4556, "step": 898000 }, { "epoch": 0.82, "learning_rate": 5.886980517577409e-05, "loss": 4.4744, "step": 898500 }, { "epoch": 0.82, "learning_rate": 5.8846908383523104e-05, "loss": 4.4529, "step": 899000 }, { "epoch": 0.82, "learning_rate": 5.882401159127211e-05, "loss": 4.4646, "step": 899500 }, { "epoch": 0.82, "learning_rate": 5.8801114799021114e-05, "loss": 4.4742, "step": 900000 }, { "epoch": 0.82, "learning_rate": 5.8778218006770126e-05, "loss": 4.4663, "step": 900500 }, { "epoch": 0.83, "learning_rate": 5.875532121451913e-05, "loss": 4.4505, "step": 901000 }, { "epoch": 0.83, "learning_rate": 5.8732470215852643e-05, "loss": 4.4694, "step": 901500 }, { "epoch": 0.83, "learning_rate": 5.870957342360165e-05, "loss": 4.468, "step": 902000 }, { "epoch": 0.83, "learning_rate": 5.8686676631350654e-05, "loss": 4.4629, "step": 902500 }, { "epoch": 0.83, "learning_rate": 5.8663779839099665e-05, "loss": 4.4696, "step": 903000 }, { "epoch": 0.83, "learning_rate": 5.864088304684867e-05, "loss": 4.4589, "step": 903500 }, { "epoch": 0.83, "learning_rate": 5.8617986254597675e-05, "loss": 4.4719, "step": 904000 }, { "epoch": 0.83, "learning_rate": 5.859508946234669e-05, "loss": 4.4714, "step": 904500 }, { "epoch": 0.83, "learning_rate": 5.857219267009569e-05, "loss": 4.4543, "step": 905000 }, { "epoch": 0.83, "learning_rate": 5.8549341671429205e-05, "loss": 4.4537, "step": 905500 }, { "epoch": 0.83, "learning_rate": 5.852644487917821e-05, "loss": 4.4706, "step": 906000 }, { "epoch": 0.83, "learning_rate": 5.8503548086927215e-05, "loss": 4.4554, "step": 906500 }, { "epoch": 0.83, "learning_rate": 5.8480651294676227e-05, "loss": 4.4572, "step": 907000 }, { "epoch": 0.83, "learning_rate": 5.845775450242523e-05, "loss": 4.473, "step": 907500 }, { "epoch": 0.83, "learning_rate": 5.8434903503758744e-05, "loss": 4.4681, "step": 908000 }, { "epoch": 0.83, "learning_rate": 5.841200671150775e-05, "loss": 4.4623, "step": 908500 }, { "epoch": 0.83, "learning_rate": 5.8389109919256754e-05, "loss": 4.4483, "step": 909000 }, { "epoch": 0.83, "learning_rate": 5.8366213127005766e-05, "loss": 4.462, "step": 909500 }, { "epoch": 0.83, "learning_rate": 5.834331633475477e-05, "loss": 4.4732, "step": 910000 }, { "epoch": 0.83, "learning_rate": 5.8320419542503776e-05, "loss": 4.4515, "step": 910500 }, { "epoch": 0.83, "learning_rate": 5.829752275025279e-05, "loss": 4.4718, "step": 911000 }, { "epoch": 0.83, "learning_rate": 5.827462595800179e-05, "loss": 4.4965, "step": 911500 }, { "epoch": 0.84, "learning_rate": 5.8251774959335305e-05, "loss": 4.464, "step": 912000 }, { "epoch": 0.84, "learning_rate": 5.822887816708431e-05, "loss": 4.4782, "step": 912500 }, { "epoch": 0.84, "learning_rate": 5.8205981374833315e-05, "loss": 4.4587, "step": 913000 }, { "epoch": 0.84, "learning_rate": 5.818308458258233e-05, "loss": 4.4595, "step": 913500 }, { "epoch": 0.84, "learning_rate": 5.816023358391583e-05, "loss": 4.4743, "step": 914000 }, { "epoch": 0.84, "learning_rate": 5.8137336791664845e-05, "loss": 4.4763, "step": 914500 }, { "epoch": 0.84, "learning_rate": 5.811443999941385e-05, "loss": 4.4679, "step": 915000 }, { "epoch": 0.84, "learning_rate": 5.809158900074736e-05, "loss": 4.462, "step": 915500 }, { "epoch": 0.84, "learning_rate": 5.8068692208496354e-05, "loss": 4.4646, "step": 916000 }, { "epoch": 0.84, "learning_rate": 5.8045795416245365e-05, "loss": 4.4671, "step": 916500 }, { "epoch": 0.84, "learning_rate": 5.802289862399437e-05, "loss": 4.4587, "step": 917000 }, { "epoch": 0.84, "learning_rate": 5.8000001831743375e-05, "loss": 4.4642, "step": 917500 }, { "epoch": 0.84, "learning_rate": 5.797710503949239e-05, "loss": 4.4569, "step": 918000 }, { "epoch": 0.84, "learning_rate": 5.795420824724139e-05, "loss": 4.476, "step": 918500 }, { "epoch": 0.84, "learning_rate": 5.7931403042159405e-05, "loss": 4.4579, "step": 919000 }, { "epoch": 0.84, "learning_rate": 5.790850624990841e-05, "loss": 4.4603, "step": 919500 }, { "epoch": 0.84, "learning_rate": 5.788560945765742e-05, "loss": 4.4582, "step": 920000 }, { "epoch": 0.84, "learning_rate": 5.786271266540643e-05, "loss": 4.4747, "step": 920500 }, { "epoch": 0.84, "learning_rate": 5.783981587315543e-05, "loss": 4.47, "step": 921000 }, { "epoch": 0.84, "learning_rate": 5.7816919080904444e-05, "loss": 4.4554, "step": 921500 }, { "epoch": 0.84, "learning_rate": 5.779402228865345e-05, "loss": 4.4655, "step": 922000 }, { "epoch": 0.84, "learning_rate": 5.7771125496402454e-05, "loss": 4.4533, "step": 922500 }, { "epoch": 0.85, "learning_rate": 5.7748228704151466e-05, "loss": 4.4714, "step": 923000 }, { "epoch": 0.85, "learning_rate": 5.772533191190047e-05, "loss": 4.4669, "step": 923500 }, { "epoch": 0.85, "learning_rate": 5.7702435119649476e-05, "loss": 4.465, "step": 924000 }, { "epoch": 0.85, "learning_rate": 5.767958412098299e-05, "loss": 4.4707, "step": 924500 }, { "epoch": 0.85, "learning_rate": 5.7656687328731994e-05, "loss": 4.4746, "step": 925000 }, { "epoch": 0.85, "learning_rate": 5.7633790536481005e-05, "loss": 4.4606, "step": 925500 }, { "epoch": 0.85, "learning_rate": 5.761089374423001e-05, "loss": 4.458, "step": 926000 }, { "epoch": 0.85, "learning_rate": 5.7587996951979015e-05, "loss": 4.4583, "step": 926500 }, { "epoch": 0.85, "learning_rate": 5.756510015972803e-05, "loss": 4.4605, "step": 927000 }, { "epoch": 0.85, "learning_rate": 5.754220336747703e-05, "loss": 4.4662, "step": 927500 }, { "epoch": 0.85, "learning_rate": 5.7519352368810545e-05, "loss": 4.4753, "step": 928000 }, { "epoch": 0.85, "learning_rate": 5.749645557655955e-05, "loss": 4.462, "step": 928500 }, { "epoch": 0.85, "learning_rate": 5.7473558784308555e-05, "loss": 4.4548, "step": 929000 }, { "epoch": 0.85, "learning_rate": 5.7450661992057567e-05, "loss": 4.4704, "step": 929500 }, { "epoch": 0.85, "learning_rate": 5.742776519980657e-05, "loss": 4.4667, "step": 930000 }, { "epoch": 0.85, "learning_rate": 5.740486840755558e-05, "loss": 4.4559, "step": 930500 }, { "epoch": 0.85, "learning_rate": 5.738197161530459e-05, "loss": 4.4784, "step": 931000 }, { "epoch": 0.85, "learning_rate": 5.7359074823053594e-05, "loss": 4.4705, "step": 931500 }, { "epoch": 0.85, "learning_rate": 5.7336223824387106e-05, "loss": 4.4697, "step": 932000 }, { "epoch": 0.85, "learning_rate": 5.731332703213611e-05, "loss": 4.4533, "step": 932500 }, { "epoch": 0.85, "learning_rate": 5.7290430239885116e-05, "loss": 4.4718, "step": 933000 }, { "epoch": 0.85, "learning_rate": 5.726753344763413e-05, "loss": 4.4582, "step": 933500 }, { "epoch": 0.86, "learning_rate": 5.7244682448967633e-05, "loss": 4.4423, "step": 934000 }, { "epoch": 0.86, "learning_rate": 5.7221785656716645e-05, "loss": 4.4568, "step": 934500 }, { "epoch": 0.86, "learning_rate": 5.719888886446565e-05, "loss": 4.4628, "step": 935000 }, { "epoch": 0.86, "learning_rate": 5.7175992072214655e-05, "loss": 4.4638, "step": 935500 }, { "epoch": 0.86, "learning_rate": 5.715318686713267e-05, "loss": 4.4489, "step": 936000 }, { "epoch": 0.86, "learning_rate": 5.7130290074881673e-05, "loss": 4.4605, "step": 936500 }, { "epoch": 0.86, "learning_rate": 5.7107439076215186e-05, "loss": 4.4577, "step": 937000 }, { "epoch": 0.86, "learning_rate": 5.708454228396419e-05, "loss": 4.4657, "step": 937500 }, { "epoch": 0.86, "learning_rate": 5.70616454917132e-05, "loss": 4.4626, "step": 938000 }, { "epoch": 0.86, "learning_rate": 5.703874869946221e-05, "loss": 4.459, "step": 938500 }, { "epoch": 0.86, "learning_rate": 5.701589770079572e-05, "loss": 4.4595, "step": 939000 }, { "epoch": 0.86, "learning_rate": 5.6993000908544725e-05, "loss": 4.4684, "step": 939500 }, { "epoch": 0.86, "learning_rate": 5.6970149909878224e-05, "loss": 4.4629, "step": 940000 }, { "epoch": 0.86, "learning_rate": 5.694725311762723e-05, "loss": 4.4678, "step": 940500 }, { "epoch": 0.86, "learning_rate": 5.692435632537624e-05, "loss": 4.4511, "step": 941000 }, { "epoch": 0.86, "learning_rate": 5.6901459533125246e-05, "loss": 4.4574, "step": 941500 }, { "epoch": 0.86, "learning_rate": 5.687856274087425e-05, "loss": 4.4715, "step": 942000 }, { "epoch": 0.86, "learning_rate": 5.685566594862326e-05, "loss": 4.4559, "step": 942500 }, { "epoch": 0.86, "learning_rate": 5.683276915637227e-05, "loss": 4.4523, "step": 943000 }, { "epoch": 0.86, "learning_rate": 5.680987236412127e-05, "loss": 4.4599, "step": 943500 }, { "epoch": 0.86, "learning_rate": 5.6786975571870285e-05, "loss": 4.4656, "step": 944000 }, { "epoch": 0.87, "learning_rate": 5.676407877961929e-05, "loss": 4.4454, "step": 944500 }, { "epoch": 0.87, "learning_rate": 5.6741181987368295e-05, "loss": 4.462, "step": 945000 }, { "epoch": 0.87, "learning_rate": 5.6718285195117307e-05, "loss": 4.463, "step": 945500 }, { "epoch": 0.87, "learning_rate": 5.669538840286631e-05, "loss": 4.464, "step": 946000 }, { "epoch": 0.87, "learning_rate": 5.6672537404199824e-05, "loss": 4.4504, "step": 946500 }, { "epoch": 0.87, "learning_rate": 5.664964061194883e-05, "loss": 4.4638, "step": 947000 }, { "epoch": 0.87, "learning_rate": 5.6626743819697834e-05, "loss": 4.4574, "step": 947500 }, { "epoch": 0.87, "learning_rate": 5.6603847027446846e-05, "loss": 4.4484, "step": 948000 }, { "epoch": 0.87, "learning_rate": 5.658095023519585e-05, "loss": 4.4596, "step": 948500 }, { "epoch": 0.87, "learning_rate": 5.6558053442944856e-05, "loss": 4.4733, "step": 949000 }, { "epoch": 0.87, "learning_rate": 5.653515665069387e-05, "loss": 4.4529, "step": 949500 }, { "epoch": 0.87, "learning_rate": 5.6512305652027373e-05, "loss": 4.4576, "step": 950000 }, { "epoch": 0.87, "learning_rate": 5.6489408859776385e-05, "loss": 4.462, "step": 950500 }, { "epoch": 0.87, "learning_rate": 5.646651206752539e-05, "loss": 4.4558, "step": 951000 }, { "epoch": 0.87, "learning_rate": 5.6443615275274395e-05, "loss": 4.4614, "step": 951500 }, { "epoch": 0.87, "learning_rate": 5.642071848302341e-05, "loss": 4.4527, "step": 952000 }, { "epoch": 0.87, "learning_rate": 5.639782169077241e-05, "loss": 4.4569, "step": 952500 }, { "epoch": 0.87, "learning_rate": 5.637492489852142e-05, "loss": 4.4526, "step": 953000 }, { "epoch": 0.87, "learning_rate": 5.635202810627043e-05, "loss": 4.4473, "step": 953500 }, { "epoch": 0.87, "learning_rate": 5.6329177107603935e-05, "loss": 4.4534, "step": 954000 }, { "epoch": 0.87, "learning_rate": 5.6306280315352947e-05, "loss": 4.468, "step": 954500 }, { "epoch": 0.87, "learning_rate": 5.628338352310195e-05, "loss": 4.4482, "step": 955000 }, { "epoch": 0.88, "learning_rate": 5.626048673085096e-05, "loss": 4.4639, "step": 955500 }, { "epoch": 0.88, "learning_rate": 5.623758993859997e-05, "loss": 4.4607, "step": 956000 }, { "epoch": 0.88, "learning_rate": 5.6214784733517975e-05, "loss": 4.4634, "step": 956500 }, { "epoch": 0.88, "learning_rate": 5.6191887941266986e-05, "loss": 4.4818, "step": 957000 }, { "epoch": 0.88, "learning_rate": 5.616899114901599e-05, "loss": 4.456, "step": 957500 }, { "epoch": 0.88, "learning_rate": 5.6146094356764997e-05, "loss": 4.4769, "step": 958000 }, { "epoch": 0.88, "learning_rate": 5.612319756451401e-05, "loss": 4.4402, "step": 958500 }, { "epoch": 0.88, "learning_rate": 5.6100300772263013e-05, "loss": 4.4515, "step": 959000 }, { "epoch": 0.88, "learning_rate": 5.6077403980012025e-05, "loss": 4.4555, "step": 959500 }, { "epoch": 0.88, "learning_rate": 5.605450718776103e-05, "loss": 4.4631, "step": 960000 }, { "epoch": 0.88, "learning_rate": 5.603170198267904e-05, "loss": 4.4536, "step": 960500 }, { "epoch": 0.88, "learning_rate": 5.600880519042805e-05, "loss": 4.4558, "step": 961000 }, { "epoch": 0.88, "learning_rate": 5.598590839817705e-05, "loss": 4.4659, "step": 961500 }, { "epoch": 0.88, "learning_rate": 5.5963011605926065e-05, "loss": 4.4566, "step": 962000 }, { "epoch": 0.88, "learning_rate": 5.594011481367507e-05, "loss": 4.4548, "step": 962500 }, { "epoch": 0.88, "learning_rate": 5.5917218021424075e-05, "loss": 4.4508, "step": 963000 }, { "epoch": 0.88, "learning_rate": 5.589436702275759e-05, "loss": 4.4575, "step": 963500 }, { "epoch": 0.88, "learning_rate": 5.587147023050659e-05, "loss": 4.4541, "step": 964000 }, { "epoch": 0.88, "learning_rate": 5.584857343825559e-05, "loss": 4.4524, "step": 964500 }, { "epoch": 0.88, "learning_rate": 5.5825676646004596e-05, "loss": 4.4607, "step": 965000 }, { "epoch": 0.88, "learning_rate": 5.580277985375361e-05, "loss": 4.4457, "step": 965500 }, { "epoch": 0.88, "learning_rate": 5.577988306150261e-05, "loss": 4.4556, "step": 966000 }, { "epoch": 0.89, "learning_rate": 5.575698626925162e-05, "loss": 4.465, "step": 966500 }, { "epoch": 0.89, "learning_rate": 5.573408947700063e-05, "loss": 4.4595, "step": 967000 }, { "epoch": 0.89, "learning_rate": 5.5711192684749635e-05, "loss": 4.4535, "step": 967500 }, { "epoch": 0.89, "learning_rate": 5.568829589249864e-05, "loss": 4.4625, "step": 968000 }, { "epoch": 0.89, "learning_rate": 5.566539910024765e-05, "loss": 4.4656, "step": 968500 }, { "epoch": 0.89, "learning_rate": 5.564254810158116e-05, "loss": 4.4637, "step": 969000 }, { "epoch": 0.89, "learning_rate": 5.561965130933017e-05, "loss": 4.4502, "step": 969500 }, { "epoch": 0.89, "learning_rate": 5.5596754517079174e-05, "loss": 4.4569, "step": 970000 }, { "epoch": 0.89, "learning_rate": 5.557385772482818e-05, "loss": 4.4592, "step": 970500 }, { "epoch": 0.89, "learning_rate": 5.555096093257719e-05, "loss": 4.4659, "step": 971000 }, { "epoch": 0.89, "learning_rate": 5.55281099339107e-05, "loss": 4.4468, "step": 971500 }, { "epoch": 0.89, "learning_rate": 5.550521314165971e-05, "loss": 4.4562, "step": 972000 }, { "epoch": 0.89, "learning_rate": 5.5482316349408713e-05, "loss": 4.4603, "step": 972500 }, { "epoch": 0.89, "learning_rate": 5.545941955715772e-05, "loss": 4.4577, "step": 973000 }, { "epoch": 0.89, "learning_rate": 5.543652276490673e-05, "loss": 4.4514, "step": 973500 }, { "epoch": 0.89, "learning_rate": 5.5413671766240236e-05, "loss": 4.4476, "step": 974000 }, { "epoch": 0.89, "learning_rate": 5.539077497398925e-05, "loss": 4.4569, "step": 974500 }, { "epoch": 0.89, "learning_rate": 5.536787818173825e-05, "loss": 4.4554, "step": 975000 }, { "epoch": 0.89, "learning_rate": 5.5345027183071765e-05, "loss": 4.4596, "step": 975500 }, { "epoch": 0.89, "learning_rate": 5.532213039082077e-05, "loss": 4.4515, "step": 976000 }, { "epoch": 0.89, "learning_rate": 5.5299233598569775e-05, "loss": 4.4624, "step": 976500 }, { "epoch": 0.89, "learning_rate": 5.527633680631879e-05, "loss": 4.4495, "step": 977000 }, { "epoch": 0.9, "learning_rate": 5.525344001406779e-05, "loss": 4.4549, "step": 977500 }, { "epoch": 0.9, "learning_rate": 5.52305432218168e-05, "loss": 4.4513, "step": 978000 }, { "epoch": 0.9, "learning_rate": 5.520769222315031e-05, "loss": 4.4609, "step": 978500 }, { "epoch": 0.9, "learning_rate": 5.5184795430899315e-05, "loss": 4.4639, "step": 979000 }, { "epoch": 0.9, "learning_rate": 5.5161898638648327e-05, "loss": 4.4492, "step": 979500 }, { "epoch": 0.9, "learning_rate": 5.513904763998183e-05, "loss": 4.4472, "step": 980000 }, { "epoch": 0.9, "learning_rate": 5.5116150847730844e-05, "loss": 4.4602, "step": 980500 }, { "epoch": 0.9, "learning_rate": 5.509325405547985e-05, "loss": 4.4486, "step": 981000 }, { "epoch": 0.9, "learning_rate": 5.5070357263228854e-05, "loss": 4.4524, "step": 981500 }, { "epoch": 0.9, "learning_rate": 5.5047460470977866e-05, "loss": 4.4546, "step": 982000 }, { "epoch": 0.9, "learning_rate": 5.502456367872687e-05, "loss": 4.4671, "step": 982500 }, { "epoch": 0.9, "learning_rate": 5.5001666886475876e-05, "loss": 4.4674, "step": 983000 }, { "epoch": 0.9, "learning_rate": 5.497877009422489e-05, "loss": 4.4553, "step": 983500 }, { "epoch": 0.9, "learning_rate": 5.495587330197389e-05, "loss": 4.4482, "step": 984000 }, { "epoch": 0.9, "learning_rate": 5.49329765097229e-05, "loss": 4.4641, "step": 984500 }, { "epoch": 0.9, "learning_rate": 5.491007971747191e-05, "loss": 4.4395, "step": 985000 }, { "epoch": 0.9, "learning_rate": 5.4887228718805415e-05, "loss": 4.4431, "step": 985500 }, { "epoch": 0.9, "learning_rate": 5.486433192655443e-05, "loss": 4.4617, "step": 986000 }, { "epoch": 0.9, "learning_rate": 5.484143513430343e-05, "loss": 4.4611, "step": 986500 }, { "epoch": 0.9, "learning_rate": 5.481853834205244e-05, "loss": 4.4528, "step": 987000 }, { "epoch": 0.9, "learning_rate": 5.479564154980145e-05, "loss": 4.4518, "step": 987500 }, { "epoch": 0.9, "learning_rate": 5.4772790551134955e-05, "loss": 4.4576, "step": 988000 }, { "epoch": 0.91, "learning_rate": 5.4749893758883966e-05, "loss": 4.4528, "step": 988500 }, { "epoch": 0.91, "learning_rate": 5.472699696663297e-05, "loss": 4.4476, "step": 989000 }, { "epoch": 0.91, "learning_rate": 5.470410017438197e-05, "loss": 4.4697, "step": 989500 }, { "epoch": 0.91, "learning_rate": 5.4681203382130975e-05, "loss": 4.4576, "step": 990000 }, { "epoch": 0.91, "learning_rate": 5.465830658987998e-05, "loss": 4.446, "step": 990500 }, { "epoch": 0.91, "learning_rate": 5.463540979762899e-05, "loss": 4.4554, "step": 991000 }, { "epoch": 0.91, "learning_rate": 5.46125587989625e-05, "loss": 4.4598, "step": 991500 }, { "epoch": 0.91, "learning_rate": 5.458966200671151e-05, "loss": 4.4521, "step": 992000 }, { "epoch": 0.91, "learning_rate": 5.4566765214460514e-05, "loss": 4.4632, "step": 992500 }, { "epoch": 0.91, "learning_rate": 5.454386842220952e-05, "loss": 4.4533, "step": 993000 }, { "epoch": 0.91, "learning_rate": 5.452097162995853e-05, "loss": 4.4442, "step": 993500 }, { "epoch": 0.91, "learning_rate": 5.4498074837707536e-05, "loss": 4.462, "step": 994000 }, { "epoch": 0.91, "learning_rate": 5.447517804545654e-05, "loss": 4.4586, "step": 994500 }, { "epoch": 0.91, "learning_rate": 5.445228125320555e-05, "loss": 4.4369, "step": 995000 }, { "epoch": 0.91, "learning_rate": 5.442938446095456e-05, "loss": 4.4454, "step": 995500 }, { "epoch": 0.91, "learning_rate": 5.440657925587257e-05, "loss": 4.4423, "step": 996000 }, { "epoch": 0.91, "learning_rate": 5.4383682463621576e-05, "loss": 4.4569, "step": 996500 }, { "epoch": 0.91, "learning_rate": 5.436078567137058e-05, "loss": 4.4554, "step": 997000 }, { "epoch": 0.91, "learning_rate": 5.433788887911959e-05, "loss": 4.469, "step": 997500 }, { "epoch": 0.91, "learning_rate": 5.43149920868686e-05, "loss": 4.4432, "step": 998000 }, { "epoch": 0.91, "learning_rate": 5.429209529461761e-05, "loss": 4.4512, "step": 998500 }, { "epoch": 0.91, "learning_rate": 5.4269198502366615e-05, "loss": 4.4535, "step": 999000 }, { "epoch": 0.92, "learning_rate": 5.424634750370012e-05, "loss": 4.4543, "step": 999500 }, { "epoch": 0.92, "learning_rate": 5.422345071144913e-05, "loss": 4.4488, "step": 1000000 }, { "epoch": 0.92, "learning_rate": 5.420055391919814e-05, "loss": 4.4444, "step": 1000500 }, { "epoch": 0.92, "learning_rate": 5.417765712694714e-05, "loss": 4.4342, "step": 1001000 }, { "epoch": 0.92, "learning_rate": 5.4154760334696154e-05, "loss": 4.4604, "step": 1001500 }, { "epoch": 0.92, "learning_rate": 5.413190933602966e-05, "loss": 4.4585, "step": 1002000 }, { "epoch": 0.92, "learning_rate": 5.410901254377867e-05, "loss": 4.4475, "step": 1002500 }, { "epoch": 0.92, "learning_rate": 5.4086115751527677e-05, "loss": 4.464, "step": 1003000 }, { "epoch": 0.92, "learning_rate": 5.406321895927668e-05, "loss": 4.443, "step": 1003500 }, { "epoch": 0.92, "learning_rate": 5.4040322167025693e-05, "loss": 4.4428, "step": 1004000 }, { "epoch": 0.92, "learning_rate": 5.40174253747747e-05, "loss": 4.4498, "step": 1004500 }, { "epoch": 0.92, "learning_rate": 5.3994528582523704e-05, "loss": 4.4552, "step": 1005000 }, { "epoch": 0.92, "learning_rate": 5.3971631790272715e-05, "loss": 4.4661, "step": 1005500 }, { "epoch": 0.92, "learning_rate": 5.394878079160622e-05, "loss": 4.4469, "step": 1006000 }, { "epoch": 0.92, "learning_rate": 5.392588399935523e-05, "loss": 4.4539, "step": 1006500 }, { "epoch": 0.92, "learning_rate": 5.390298720710424e-05, "loss": 4.4516, "step": 1007000 }, { "epoch": 0.92, "learning_rate": 5.388009041485324e-05, "loss": 4.4669, "step": 1007500 }, { "epoch": 0.92, "learning_rate": 5.3857285209771256e-05, "loss": 4.4489, "step": 1008000 }, { "epoch": 0.92, "learning_rate": 5.383438841752027e-05, "loss": 4.4536, "step": 1008500 }, { "epoch": 0.92, "learning_rate": 5.381149162526927e-05, "loss": 4.444, "step": 1009000 }, { "epoch": 0.92, "learning_rate": 5.378859483301828e-05, "loss": 4.4644, "step": 1009500 }, { "epoch": 0.93, "learning_rate": 5.376574383435179e-05, "loss": 4.4426, "step": 1010000 }, { "epoch": 0.93, "learning_rate": 5.3742847042100795e-05, "loss": 4.462, "step": 1010500 }, { "epoch": 0.93, "learning_rate": 5.371999604343431e-05, "loss": 4.4525, "step": 1011000 }, { "epoch": 0.93, "learning_rate": 5.369709925118331e-05, "loss": 4.4567, "step": 1011500 }, { "epoch": 0.93, "learning_rate": 5.3674202458932325e-05, "loss": 4.4598, "step": 1012000 }, { "epoch": 0.93, "learning_rate": 5.365130566668133e-05, "loss": 4.4422, "step": 1012500 }, { "epoch": 0.93, "learning_rate": 5.3628408874430335e-05, "loss": 4.4479, "step": 1013000 }, { "epoch": 0.93, "learning_rate": 5.360551208217933e-05, "loss": 4.4411, "step": 1013500 }, { "epoch": 0.93, "learning_rate": 5.358261528992834e-05, "loss": 4.4426, "step": 1014000 }, { "epoch": 0.93, "learning_rate": 5.355971849767735e-05, "loss": 4.4391, "step": 1014500 }, { "epoch": 0.93, "learning_rate": 5.3536821705426355e-05, "loss": 4.4611, "step": 1015000 }, { "epoch": 0.93, "learning_rate": 5.351392491317536e-05, "loss": 4.4463, "step": 1015500 }, { "epoch": 0.93, "learning_rate": 5.349107391450887e-05, "loss": 4.4561, "step": 1016000 }, { "epoch": 0.93, "learning_rate": 5.346822291584238e-05, "loss": 4.4545, "step": 1016500 }, { "epoch": 0.93, "learning_rate": 5.344532612359139e-05, "loss": 4.4427, "step": 1017000 }, { "epoch": 0.93, "learning_rate": 5.3422429331340395e-05, "loss": 4.4508, "step": 1017500 }, { "epoch": 0.93, "learning_rate": 5.33995325390894e-05, "loss": 4.4424, "step": 1018000 }, { "epoch": 0.93, "learning_rate": 5.337663574683841e-05, "loss": 4.4367, "step": 1018500 }, { "epoch": 0.93, "learning_rate": 5.3353738954587417e-05, "loss": 4.4444, "step": 1019000 }, { "epoch": 0.93, "learning_rate": 5.333084216233643e-05, "loss": 4.4531, "step": 1019500 }, { "epoch": 0.93, "learning_rate": 5.3307945370085433e-05, "loss": 4.4539, "step": 1020000 }, { "epoch": 0.93, "learning_rate": 5.328504857783444e-05, "loss": 4.4498, "step": 1020500 }, { "epoch": 0.94, "learning_rate": 5.326215178558345e-05, "loss": 4.4505, "step": 1021000 }, { "epoch": 0.94, "learning_rate": 5.3239300786916956e-05, "loss": 4.4424, "step": 1021500 }, { "epoch": 0.94, "learning_rate": 5.321640399466596e-05, "loss": 4.4595, "step": 1022000 }, { "epoch": 0.94, "learning_rate": 5.319350720241497e-05, "loss": 4.4449, "step": 1022500 }, { "epoch": 0.94, "learning_rate": 5.317061041016398e-05, "loss": 4.4428, "step": 1023000 }, { "epoch": 0.94, "learning_rate": 5.314771361791299e-05, "loss": 4.4465, "step": 1023500 }, { "epoch": 0.94, "learning_rate": 5.3124816825661995e-05, "loss": 4.4509, "step": 1024000 }, { "epoch": 0.94, "learning_rate": 5.3101920033411e-05, "loss": 4.4414, "step": 1024500 }, { "epoch": 0.94, "learning_rate": 5.307906903474451e-05, "loss": 4.452, "step": 1025000 }, { "epoch": 0.94, "learning_rate": 5.305617224249352e-05, "loss": 4.4565, "step": 1025500 }, { "epoch": 0.94, "learning_rate": 5.303327545024252e-05, "loss": 4.4598, "step": 1026000 }, { "epoch": 0.94, "learning_rate": 5.3010378657991534e-05, "loss": 4.4436, "step": 1026500 }, { "epoch": 0.94, "learning_rate": 5.298748186574054e-05, "loss": 4.4554, "step": 1027000 }, { "epoch": 0.94, "learning_rate": 5.296458507348955e-05, "loss": 4.4454, "step": 1027500 }, { "epoch": 0.94, "learning_rate": 5.2941734074823057e-05, "loss": 4.4602, "step": 1028000 }, { "epoch": 0.94, "learning_rate": 5.291883728257206e-05, "loss": 4.4431, "step": 1028500 }, { "epoch": 0.94, "learning_rate": 5.2895940490321073e-05, "loss": 4.4464, "step": 1029000 }, { "epoch": 0.94, "learning_rate": 5.287304369807008e-05, "loss": 4.4425, "step": 1029500 }, { "epoch": 0.94, "learning_rate": 5.2850146905819084e-05, "loss": 4.4396, "step": 1030000 }, { "epoch": 0.94, "learning_rate": 5.2827250113568095e-05, "loss": 4.4515, "step": 1030500 }, { "epoch": 0.94, "learning_rate": 5.28043991149016e-05, "loss": 4.4395, "step": 1031000 }, { "epoch": 0.94, "learning_rate": 5.278150232265061e-05, "loss": 4.453, "step": 1031500 }, { "epoch": 0.95, "learning_rate": 5.275860553039962e-05, "loss": 4.4417, "step": 1032000 }, { "epoch": 0.95, "learning_rate": 5.273570873814862e-05, "loss": 4.4546, "step": 1032500 }, { "epoch": 0.95, "learning_rate": 5.2712811945897635e-05, "loss": 4.4428, "step": 1033000 }, { "epoch": 0.95, "learning_rate": 5.268996094723114e-05, "loss": 4.4452, "step": 1033500 }, { "epoch": 0.95, "learning_rate": 5.266706415498015e-05, "loss": 4.4428, "step": 1034000 }, { "epoch": 0.95, "learning_rate": 5.264416736272916e-05, "loss": 4.445, "step": 1034500 }, { "epoch": 0.95, "learning_rate": 5.262127057047816e-05, "loss": 4.4376, "step": 1035000 }, { "epoch": 0.95, "learning_rate": 5.2598373778227174e-05, "loss": 4.4404, "step": 1035500 }, { "epoch": 0.95, "learning_rate": 5.257547698597618e-05, "loss": 4.4561, "step": 1036000 }, { "epoch": 0.95, "learning_rate": 5.2552580193725184e-05, "loss": 4.4414, "step": 1036500 }, { "epoch": 0.95, "learning_rate": 5.2529683401474196e-05, "loss": 4.4452, "step": 1037000 }, { "epoch": 0.95, "learning_rate": 5.25067866092232e-05, "loss": 4.4396, "step": 1037500 }, { "epoch": 0.95, "learning_rate": 5.248393561055671e-05, "loss": 4.4573, "step": 1038000 }, { "epoch": 0.95, "learning_rate": 5.246108461189021e-05, "loss": 4.4485, "step": 1038500 }, { "epoch": 0.95, "learning_rate": 5.243818781963922e-05, "loss": 4.4586, "step": 1039000 }, { "epoch": 0.95, "learning_rate": 5.241529102738822e-05, "loss": 4.4462, "step": 1039500 }, { "epoch": 0.95, "learning_rate": 5.2392394235137234e-05, "loss": 4.4549, "step": 1040000 }, { "epoch": 0.95, "learning_rate": 5.236949744288624e-05, "loss": 4.4559, "step": 1040500 }, { "epoch": 0.95, "learning_rate": 5.2346600650635244e-05, "loss": 4.4475, "step": 1041000 }, { "epoch": 0.95, "learning_rate": 5.2323703858384256e-05, "loss": 4.438, "step": 1041500 }, { "epoch": 0.95, "learning_rate": 5.230080706613326e-05, "loss": 4.4259, "step": 1042000 }, { "epoch": 0.95, "learning_rate": 5.2277956067466773e-05, "loss": 4.4481, "step": 1042500 }, { "epoch": 0.96, "learning_rate": 5.225505927521578e-05, "loss": 4.4393, "step": 1043000 }, { "epoch": 0.96, "learning_rate": 5.2232162482964784e-05, "loss": 4.4439, "step": 1043500 }, { "epoch": 0.96, "learning_rate": 5.2209311484298296e-05, "loss": 4.4451, "step": 1044000 }, { "epoch": 0.96, "learning_rate": 5.21864146920473e-05, "loss": 4.46, "step": 1044500 }, { "epoch": 0.96, "learning_rate": 5.216351789979631e-05, "loss": 4.4622, "step": 1045000 }, { "epoch": 0.96, "learning_rate": 5.214062110754532e-05, "loss": 4.4422, "step": 1045500 }, { "epoch": 0.96, "learning_rate": 5.211777010887883e-05, "loss": 4.4416, "step": 1046000 }, { "epoch": 0.96, "learning_rate": 5.2094873316627835e-05, "loss": 4.4496, "step": 1046500 }, { "epoch": 0.96, "learning_rate": 5.207197652437684e-05, "loss": 4.4364, "step": 1047000 }, { "epoch": 0.96, "learning_rate": 5.204907973212585e-05, "loss": 4.4405, "step": 1047500 }, { "epoch": 0.96, "learning_rate": 5.202618293987486e-05, "loss": 4.4379, "step": 1048000 }, { "epoch": 0.96, "learning_rate": 5.200328614762386e-05, "loss": 4.4444, "step": 1048500 }, { "epoch": 0.96, "learning_rate": 5.1980389355372874e-05, "loss": 4.4372, "step": 1049000 }, { "epoch": 0.96, "learning_rate": 5.195749256312188e-05, "loss": 4.448, "step": 1049500 }, { "epoch": 0.96, "learning_rate": 5.1934595770870884e-05, "loss": 4.4378, "step": 1050000 }, { "epoch": 0.96, "learning_rate": 5.1911698978619896e-05, "loss": 4.4294, "step": 1050500 }, { "epoch": 0.96, "learning_rate": 5.18888479799534e-05, "loss": 4.4293, "step": 1051000 }, { "epoch": 0.96, "learning_rate": 5.1865951187702413e-05, "loss": 4.4448, "step": 1051500 }, { "epoch": 0.96, "learning_rate": 5.184305439545142e-05, "loss": 4.4359, "step": 1052000 }, { "epoch": 0.96, "learning_rate": 5.1820157603200424e-05, "loss": 4.4554, "step": 1052500 }, { "epoch": 0.96, "learning_rate": 5.1797260810949435e-05, "loss": 4.433, "step": 1053000 }, { "epoch": 0.96, "learning_rate": 5.177436401869844e-05, "loss": 4.434, "step": 1053500 }, { "epoch": 0.97, "learning_rate": 5.1751467226447445e-05, "loss": 4.4396, "step": 1054000 }, { "epoch": 0.97, "learning_rate": 5.172861622778096e-05, "loss": 4.4493, "step": 1054500 }, { "epoch": 0.97, "learning_rate": 5.170571943552996e-05, "loss": 4.4457, "step": 1055000 }, { "epoch": 0.97, "learning_rate": 5.1682822643278975e-05, "loss": 4.4411, "step": 1055500 }, { "epoch": 0.97, "learning_rate": 5.165992585102798e-05, "loss": 4.4528, "step": 1056000 }, { "epoch": 0.97, "learning_rate": 5.1637029058776985e-05, "loss": 4.4448, "step": 1056500 }, { "epoch": 0.97, "learning_rate": 5.16141780601105e-05, "loss": 4.4453, "step": 1057000 }, { "epoch": 0.97, "learning_rate": 5.15912812678595e-05, "loss": 4.4505, "step": 1057500 }, { "epoch": 0.97, "learning_rate": 5.1568384475608514e-05, "loss": 4.4449, "step": 1058000 }, { "epoch": 0.97, "learning_rate": 5.154548768335752e-05, "loss": 4.4395, "step": 1058500 }, { "epoch": 0.97, "learning_rate": 5.1522636684691025e-05, "loss": 4.4428, "step": 1059000 }, { "epoch": 0.97, "learning_rate": 5.1499739892440037e-05, "loss": 4.4515, "step": 1059500 }, { "epoch": 0.97, "learning_rate": 5.147684310018904e-05, "loss": 4.4485, "step": 1060000 }, { "epoch": 0.97, "learning_rate": 5.1453946307938053e-05, "loss": 4.4432, "step": 1060500 }, { "epoch": 0.97, "learning_rate": 5.143104951568706e-05, "loss": 4.4421, "step": 1061000 }, { "epoch": 0.97, "learning_rate": 5.1408152723436063e-05, "loss": 4.4543, "step": 1061500 }, { "epoch": 0.97, "learning_rate": 5.1385255931185075e-05, "loss": 4.4538, "step": 1062000 }, { "epoch": 0.97, "learning_rate": 5.136235913893408e-05, "loss": 4.4346, "step": 1062500 }, { "epoch": 0.97, "learning_rate": 5.133950814026758e-05, "loss": 4.44, "step": 1063000 }, { "epoch": 0.97, "learning_rate": 5.1316611348016584e-05, "loss": 4.4439, "step": 1063500 }, { "epoch": 0.97, "learning_rate": 5.1293714555765596e-05, "loss": 4.4431, "step": 1064000 }, { "epoch": 0.97, "learning_rate": 5.12708177635146e-05, "loss": 4.4528, "step": 1064500 }, { "epoch": 0.98, "learning_rate": 5.1247920971263606e-05, "loss": 4.4507, "step": 1065000 }, { "epoch": 0.98, "learning_rate": 5.122506997259712e-05, "loss": 4.4394, "step": 1065500 }, { "epoch": 0.98, "learning_rate": 5.1202173180346124e-05, "loss": 4.4437, "step": 1066000 }, { "epoch": 0.98, "learning_rate": 5.1179276388095135e-05, "loss": 4.4177, "step": 1066500 }, { "epoch": 0.98, "learning_rate": 5.115637959584414e-05, "loss": 4.4535, "step": 1067000 }, { "epoch": 0.98, "learning_rate": 5.1133482803593145e-05, "loss": 4.438, "step": 1067500 }, { "epoch": 0.98, "learning_rate": 5.111063180492666e-05, "loss": 4.4439, "step": 1068000 }, { "epoch": 0.98, "learning_rate": 5.108773501267566e-05, "loss": 4.4428, "step": 1068500 }, { "epoch": 0.98, "learning_rate": 5.106483822042467e-05, "loss": 4.4386, "step": 1069000 }, { "epoch": 0.98, "learning_rate": 5.104194142817368e-05, "loss": 4.4486, "step": 1069500 }, { "epoch": 0.98, "learning_rate": 5.1019044635922685e-05, "loss": 4.434, "step": 1070000 }, { "epoch": 0.98, "learning_rate": 5.09961478436717e-05, "loss": 4.4438, "step": 1070500 }, { "epoch": 0.98, "learning_rate": 5.09732510514207e-05, "loss": 4.4502, "step": 1071000 }, { "epoch": 0.98, "learning_rate": 5.095035425916971e-05, "loss": 4.4403, "step": 1071500 }, { "epoch": 0.98, "learning_rate": 5.092754905408772e-05, "loss": 4.4406, "step": 1072000 }, { "epoch": 0.98, "learning_rate": 5.0904652261836725e-05, "loss": 4.4258, "step": 1072500 }, { "epoch": 0.98, "learning_rate": 5.0881755469585737e-05, "loss": 4.4436, "step": 1073000 }, { "epoch": 0.98, "learning_rate": 5.085885867733474e-05, "loss": 4.4419, "step": 1073500 }, { "epoch": 0.98, "learning_rate": 5.083596188508375e-05, "loss": 4.439, "step": 1074000 }, { "epoch": 0.98, "learning_rate": 5.081311088641726e-05, "loss": 4.4464, "step": 1074500 }, { "epoch": 0.98, "learning_rate": 5.0790214094166264e-05, "loss": 4.4401, "step": 1075000 }, { "epoch": 0.99, "learning_rate": 5.0767317301915276e-05, "loss": 4.4437, "step": 1075500 }, { "epoch": 0.99, "learning_rate": 5.074442050966428e-05, "loss": 4.4422, "step": 1076000 }, { "epoch": 0.99, "learning_rate": 5.0721523717413286e-05, "loss": 4.4427, "step": 1076500 }, { "epoch": 0.99, "learning_rate": 5.06986269251623e-05, "loss": 4.4321, "step": 1077000 }, { "epoch": 0.99, "learning_rate": 5.06757301329113e-05, "loss": 4.4389, "step": 1077500 }, { "epoch": 0.99, "learning_rate": 5.065283334066031e-05, "loss": 4.445, "step": 1078000 }, { "epoch": 0.99, "learning_rate": 5.062993654840932e-05, "loss": 4.4274, "step": 1078500 }, { "epoch": 0.99, "learning_rate": 5.0607085549742825e-05, "loss": 4.4375, "step": 1079000 }, { "epoch": 0.99, "learning_rate": 5.058418875749184e-05, "loss": 4.4407, "step": 1079500 }, { "epoch": 0.99, "learning_rate": 5.056129196524084e-05, "loss": 4.4448, "step": 1080000 }, { "epoch": 0.99, "learning_rate": 5.0538440966574355e-05, "loss": 4.4387, "step": 1080500 }, { "epoch": 0.99, "learning_rate": 5.051554417432336e-05, "loss": 4.4379, "step": 1081000 }, { "epoch": 0.99, "learning_rate": 5.0492647382072365e-05, "loss": 4.429, "step": 1081500 }, { "epoch": 0.99, "learning_rate": 5.0469750589821377e-05, "loss": 4.4272, "step": 1082000 }, { "epoch": 0.99, "learning_rate": 5.044685379757038e-05, "loss": 4.4356, "step": 1082500 }, { "epoch": 0.99, "learning_rate": 5.042395700531939e-05, "loss": 4.4389, "step": 1083000 }, { "epoch": 0.99, "learning_rate": 5.04011060066529e-05, "loss": 4.4396, "step": 1083500 }, { "epoch": 0.99, "learning_rate": 5.0378209214401904e-05, "loss": 4.4335, "step": 1084000 }, { "epoch": 0.99, "learning_rate": 5.0355312422150916e-05, "loss": 4.4456, "step": 1084500 }, { "epoch": 0.99, "learning_rate": 5.033241562989992e-05, "loss": 4.4385, "step": 1085000 }, { "epoch": 0.99, "learning_rate": 5.0309518837648926e-05, "loss": 4.4353, "step": 1085500 }, { "epoch": 0.99, "learning_rate": 5.028666783898244e-05, "loss": 4.4481, "step": 1086000 }, { "epoch": 1.0, "learning_rate": 5.0263771046731443e-05, "loss": 4.4259, "step": 1086500 }, { "epoch": 1.0, "learning_rate": 5.0240874254480455e-05, "loss": 4.4314, "step": 1087000 }, { "epoch": 1.0, "learning_rate": 5.021797746222945e-05, "loss": 4.4511, "step": 1087500 }, { "epoch": 1.0, "learning_rate": 5.019508066997846e-05, "loss": 4.437, "step": 1088000 }, { "epoch": 1.0, "learning_rate": 5.0172183877727464e-05, "loss": 4.4348, "step": 1088500 }, { "epoch": 1.0, "learning_rate": 5.014928708547647e-05, "loss": 4.4439, "step": 1089000 }, { "epoch": 1.0, "learning_rate": 5.012639029322548e-05, "loss": 4.4313, "step": 1089500 }, { "epoch": 1.0, "learning_rate": 5.0103539294558986e-05, "loss": 4.4349, "step": 1090000 }, { "epoch": 1.0, "learning_rate": 5.0080642502308e-05, "loss": 4.4445, "step": 1090500 }, { "epoch": 1.0, "learning_rate": 5.0057745710057e-05, "loss": 4.4503, "step": 1091000 }, { "epoch": 1.0, "learning_rate": 5.003484891780601e-05, "loss": 4.4407, "step": 1091500 }, { "epoch": 1.0, "learning_rate": 5.001195212555502e-05, "loss": 4.4341, "step": 1092000 }, { "epoch": 1.0, "learning_rate": 4.998910112688853e-05, "loss": 4.4384, "step": 1092500 }, { "epoch": 1.0, "learning_rate": 4.996620433463754e-05, "loss": 4.4309, "step": 1093000 }, { "epoch": 1.0, "learning_rate": 4.994335333597104e-05, "loss": 4.426, "step": 1093500 }, { "epoch": 1.0, "learning_rate": 4.9920502337304555e-05, "loss": 4.4174, "step": 1094000 }, { "epoch": 1.0, "learning_rate": 4.989760554505356e-05, "loss": 4.4254, "step": 1094500 }, { "epoch": 1.0, "learning_rate": 4.9874708752802565e-05, "loss": 4.4383, "step": 1095000 }, { "epoch": 1.0, "learning_rate": 4.985181196055158e-05, "loss": 4.4323, "step": 1095500 }, { "epoch": 1.0, "learning_rate": 4.982891516830058e-05, "loss": 4.4192, "step": 1096000 }, { "epoch": 1.0, "learning_rate": 4.980601837604959e-05, "loss": 4.4205, "step": 1096500 }, { "epoch": 1.0, "learning_rate": 4.97831215837986e-05, "loss": 4.4331, "step": 1097000 }, { "epoch": 1.01, "learning_rate": 4.9760224791547604e-05, "loss": 4.4381, "step": 1097500 }, { "epoch": 1.01, "learning_rate": 4.973732799929661e-05, "loss": 4.4344, "step": 1098000 }, { "epoch": 1.01, "learning_rate": 4.971443120704562e-05, "loss": 4.42, "step": 1098500 }, { "epoch": 1.01, "learning_rate": 4.9691534414794626e-05, "loss": 4.4287, "step": 1099000 }, { "epoch": 1.01, "learning_rate": 4.966868341612814e-05, "loss": 4.4309, "step": 1099500 }, { "epoch": 1.01, "learning_rate": 4.9645832417461644e-05, "loss": 4.4361, "step": 1100000 }, { "epoch": 1.01, "learning_rate": 4.9622935625210656e-05, "loss": 4.4172, "step": 1100500 }, { "epoch": 1.01, "learning_rate": 4.960003883295966e-05, "loss": 4.4123, "step": 1101000 }, { "epoch": 1.01, "learning_rate": 4.9577142040708666e-05, "loss": 4.4322, "step": 1101500 }, { "epoch": 1.01, "learning_rate": 4.955424524845768e-05, "loss": 4.4322, "step": 1102000 }, { "epoch": 1.01, "learning_rate": 4.953134845620668e-05, "loss": 4.4305, "step": 1102500 }, { "epoch": 1.01, "learning_rate": 4.950845166395569e-05, "loss": 4.4269, "step": 1103000 }, { "epoch": 1.01, "learning_rate": 4.94855548717047e-05, "loss": 4.4299, "step": 1103500 }, { "epoch": 1.01, "learning_rate": 4.9462658079453705e-05, "loss": 4.4283, "step": 1104000 }, { "epoch": 1.01, "learning_rate": 4.943976128720271e-05, "loss": 4.4226, "step": 1104500 }, { "epoch": 1.01, "learning_rate": 4.941686449495172e-05, "loss": 4.4302, "step": 1105000 }, { "epoch": 1.01, "learning_rate": 4.939401349628523e-05, "loss": 4.4315, "step": 1105500 }, { "epoch": 1.01, "learning_rate": 4.937111670403423e-05, "loss": 4.4141, "step": 1106000 }, { "epoch": 1.01, "learning_rate": 4.934821991178324e-05, "loss": 4.4294, "step": 1106500 }, { "epoch": 1.01, "learning_rate": 4.932532311953225e-05, "loss": 4.4393, "step": 1107000 }, { "epoch": 1.01, "learning_rate": 4.9302426327281254e-05, "loss": 4.4281, "step": 1107500 }, { "epoch": 1.01, "learning_rate": 4.9279575328614767e-05, "loss": 4.4285, "step": 1108000 }, { "epoch": 1.02, "learning_rate": 4.925667853636377e-05, "loss": 4.4436, "step": 1108500 }, { "epoch": 1.02, "learning_rate": 4.923378174411278e-05, "loss": 4.4207, "step": 1109000 }, { "epoch": 1.02, "learning_rate": 4.921093074544629e-05, "loss": 4.4205, "step": 1109500 }, { "epoch": 1.02, "learning_rate": 4.9188033953195294e-05, "loss": 4.421, "step": 1110000 }, { "epoch": 1.02, "learning_rate": 4.91651371609443e-05, "loss": 4.4407, "step": 1110500 }, { "epoch": 1.02, "learning_rate": 4.914224036869331e-05, "loss": 4.429, "step": 1111000 }, { "epoch": 1.02, "learning_rate": 4.9119389370026817e-05, "loss": 4.4245, "step": 1111500 }, { "epoch": 1.02, "learning_rate": 4.909649257777583e-05, "loss": 4.4209, "step": 1112000 }, { "epoch": 1.02, "learning_rate": 4.9073595785524833e-05, "loss": 4.4226, "step": 1112500 }, { "epoch": 1.02, "learning_rate": 4.905069899327384e-05, "loss": 4.4327, "step": 1113000 }, { "epoch": 1.02, "learning_rate": 4.902780220102285e-05, "loss": 4.4178, "step": 1113500 }, { "epoch": 1.02, "learning_rate": 4.9004905408771855e-05, "loss": 4.428, "step": 1114000 }, { "epoch": 1.02, "learning_rate": 4.898200861652086e-05, "loss": 4.4205, "step": 1114500 }, { "epoch": 1.02, "learning_rate": 4.895911182426987e-05, "loss": 4.4159, "step": 1115000 }, { "epoch": 1.02, "learning_rate": 4.893621503201888e-05, "loss": 4.4191, "step": 1115500 }, { "epoch": 1.02, "learning_rate": 4.891331823976789e-05, "loss": 4.4299, "step": 1116000 }, { "epoch": 1.02, "learning_rate": 4.8890421447516894e-05, "loss": 4.4295, "step": 1116500 }, { "epoch": 1.02, "learning_rate": 4.88675246552659e-05, "loss": 4.4262, "step": 1117000 }, { "epoch": 1.02, "learning_rate": 4.884462786301491e-05, "loss": 4.43, "step": 1117500 }, { "epoch": 1.02, "learning_rate": 4.882182265793292e-05, "loss": 4.4329, "step": 1118000 }, { "epoch": 1.02, "learning_rate": 4.879892586568192e-05, "loss": 4.4202, "step": 1118500 }, { "epoch": 1.02, "learning_rate": 4.877607486701543e-05, "loss": 4.4186, "step": 1119000 }, { "epoch": 1.03, "learning_rate": 4.875317807476444e-05, "loss": 4.4274, "step": 1119500 }, { "epoch": 1.03, "learning_rate": 4.8730281282513445e-05, "loss": 4.4316, "step": 1120000 }, { "epoch": 1.03, "learning_rate": 4.8707384490262457e-05, "loss": 4.4219, "step": 1120500 }, { "epoch": 1.03, "learning_rate": 4.868448769801146e-05, "loss": 4.416, "step": 1121000 }, { "epoch": 1.03, "learning_rate": 4.866159090576047e-05, "loss": 4.405, "step": 1121500 }, { "epoch": 1.03, "learning_rate": 4.863869411350948e-05, "loss": 4.4241, "step": 1122000 }, { "epoch": 1.03, "learning_rate": 4.8615797321258484e-05, "loss": 4.4155, "step": 1122500 }, { "epoch": 1.03, "learning_rate": 4.859290052900749e-05, "loss": 4.4291, "step": 1123000 }, { "epoch": 1.03, "learning_rate": 4.85700953239255e-05, "loss": 4.4275, "step": 1123500 }, { "epoch": 1.03, "learning_rate": 4.8547198531674507e-05, "loss": 4.4237, "step": 1124000 }, { "epoch": 1.03, "learning_rate": 4.852430173942352e-05, "loss": 4.4343, "step": 1124500 }, { "epoch": 1.03, "learning_rate": 4.8501404947172523e-05, "loss": 4.4315, "step": 1125000 }, { "epoch": 1.03, "learning_rate": 4.847850815492153e-05, "loss": 4.4408, "step": 1125500 }, { "epoch": 1.03, "learning_rate": 4.845561136267054e-05, "loss": 4.4295, "step": 1126000 }, { "epoch": 1.03, "learning_rate": 4.8432714570419545e-05, "loss": 4.4234, "step": 1126500 }, { "epoch": 1.03, "learning_rate": 4.840981777816855e-05, "loss": 4.4427, "step": 1127000 }, { "epoch": 1.03, "learning_rate": 4.838692098591756e-05, "loss": 4.422, "step": 1127500 }, { "epoch": 1.03, "learning_rate": 4.836402419366657e-05, "loss": 4.4238, "step": 1128000 }, { "epoch": 1.03, "learning_rate": 4.834112740141558e-05, "loss": 4.416, "step": 1128500 }, { "epoch": 1.03, "learning_rate": 4.8318230609164584e-05, "loss": 4.4296, "step": 1129000 }, { "epoch": 1.03, "learning_rate": 4.829537961049809e-05, "loss": 4.4239, "step": 1129500 }, { "epoch": 1.03, "learning_rate": 4.8272482818247095e-05, "loss": 4.4246, "step": 1130000 }, { "epoch": 1.04, "learning_rate": 4.82495860259961e-05, "loss": 4.4209, "step": 1130500 }, { "epoch": 1.04, "learning_rate": 4.822668923374511e-05, "loss": 4.4298, "step": 1131000 }, { "epoch": 1.04, "learning_rate": 4.820379244149412e-05, "loss": 4.4386, "step": 1131500 }, { "epoch": 1.04, "learning_rate": 4.818089564924312e-05, "loss": 4.4331, "step": 1132000 }, { "epoch": 1.04, "learning_rate": 4.8157998856992134e-05, "loss": 4.4278, "step": 1132500 }, { "epoch": 1.04, "learning_rate": 4.813510206474114e-05, "loss": 4.413, "step": 1133000 }, { "epoch": 1.04, "learning_rate": 4.8112205272490144e-05, "loss": 4.4205, "step": 1133500 }, { "epoch": 1.04, "learning_rate": 4.8089354273823656e-05, "loss": 4.417, "step": 1134000 }, { "epoch": 1.04, "learning_rate": 4.806645748157266e-05, "loss": 4.4304, "step": 1134500 }, { "epoch": 1.04, "learning_rate": 4.804356068932167e-05, "loss": 4.4277, "step": 1135000 }, { "epoch": 1.04, "learning_rate": 4.802066389707068e-05, "loss": 4.4096, "step": 1135500 }, { "epoch": 1.04, "learning_rate": 4.799785869198869e-05, "loss": 4.42, "step": 1136000 }, { "epoch": 1.04, "learning_rate": 4.7974961899737696e-05, "loss": 4.4103, "step": 1136500 }, { "epoch": 1.04, "learning_rate": 4.795206510748671e-05, "loss": 4.4283, "step": 1137000 }, { "epoch": 1.04, "learning_rate": 4.792916831523571e-05, "loss": 4.4172, "step": 1137500 }, { "epoch": 1.04, "learning_rate": 4.790627152298472e-05, "loss": 4.4106, "step": 1138000 }, { "epoch": 1.04, "learning_rate": 4.788337473073373e-05, "loss": 4.4371, "step": 1138500 }, { "epoch": 1.04, "learning_rate": 4.7860477938482735e-05, "loss": 4.4183, "step": 1139000 }, { "epoch": 1.04, "learning_rate": 4.783758114623174e-05, "loss": 4.4177, "step": 1139500 }, { "epoch": 1.04, "learning_rate": 4.781468435398075e-05, "loss": 4.425, "step": 1140000 }, { "epoch": 1.04, "learning_rate": 4.779187914889876e-05, "loss": 4.4133, "step": 1140500 }, { "epoch": 1.05, "learning_rate": 4.776898235664777e-05, "loss": 4.4149, "step": 1141000 }, { "epoch": 1.05, "learning_rate": 4.7746085564396775e-05, "loss": 4.426, "step": 1141500 }, { "epoch": 1.05, "learning_rate": 4.772318877214578e-05, "loss": 4.4455, "step": 1142000 }, { "epoch": 1.05, "learning_rate": 4.7700291979894785e-05, "loss": 4.4368, "step": 1142500 }, { "epoch": 1.05, "learning_rate": 4.767739518764379e-05, "loss": 4.4111, "step": 1143000 }, { "epoch": 1.05, "learning_rate": 4.76544983953928e-05, "loss": 4.4284, "step": 1143500 }, { "epoch": 1.05, "learning_rate": 4.763160160314181e-05, "loss": 4.4216, "step": 1144000 }, { "epoch": 1.05, "learning_rate": 4.760870481089081e-05, "loss": 4.4241, "step": 1144500 }, { "epoch": 1.05, "learning_rate": 4.7585853812224324e-05, "loss": 4.4331, "step": 1145000 }, { "epoch": 1.05, "learning_rate": 4.756295701997333e-05, "loss": 4.4347, "step": 1145500 }, { "epoch": 1.05, "learning_rate": 4.754006022772234e-05, "loss": 4.4134, "step": 1146000 }, { "epoch": 1.05, "learning_rate": 4.7517163435471346e-05, "loss": 4.4208, "step": 1146500 }, { "epoch": 1.05, "learning_rate": 4.749426664322035e-05, "loss": 4.4185, "step": 1147000 }, { "epoch": 1.05, "learning_rate": 4.7471461438138364e-05, "loss": 4.4217, "step": 1147500 }, { "epoch": 1.05, "learning_rate": 4.744856464588737e-05, "loss": 4.4251, "step": 1148000 }, { "epoch": 1.05, "learning_rate": 4.742566785363638e-05, "loss": 4.4322, "step": 1148500 }, { "epoch": 1.05, "learning_rate": 4.7402771061385386e-05, "loss": 4.4324, "step": 1149000 }, { "epoch": 1.05, "learning_rate": 4.73798742691344e-05, "loss": 4.4213, "step": 1149500 }, { "epoch": 1.05, "learning_rate": 4.73570232704679e-05, "loss": 4.4069, "step": 1150000 }, { "epoch": 1.05, "learning_rate": 4.733412647821691e-05, "loss": 4.4303, "step": 1150500 }, { "epoch": 1.05, "learning_rate": 4.731122968596592e-05, "loss": 4.4337, "step": 1151000 }, { "epoch": 1.05, "learning_rate": 4.7288332893714925e-05, "loss": 4.4252, "step": 1151500 }, { "epoch": 1.06, "learning_rate": 4.726543610146393e-05, "loss": 4.4379, "step": 1152000 }, { "epoch": 1.06, "learning_rate": 4.724253930921294e-05, "loss": 4.4189, "step": 1152500 }, { "epoch": 1.06, "learning_rate": 4.721964251696195e-05, "loss": 4.4295, "step": 1153000 }, { "epoch": 1.06, "learning_rate": 4.719679151829546e-05, "loss": 4.4232, "step": 1153500 }, { "epoch": 1.06, "learning_rate": 4.717389472604446e-05, "loss": 4.4268, "step": 1154000 }, { "epoch": 1.06, "learning_rate": 4.715099793379347e-05, "loss": 4.4354, "step": 1154500 }, { "epoch": 1.06, "learning_rate": 4.7128101141542475e-05, "loss": 4.4245, "step": 1155000 }, { "epoch": 1.06, "learning_rate": 4.710520434929148e-05, "loss": 4.416, "step": 1155500 }, { "epoch": 1.06, "learning_rate": 4.708230755704049e-05, "loss": 4.4372, "step": 1156000 }, { "epoch": 1.06, "learning_rate": 4.70594107647895e-05, "loss": 4.4236, "step": 1156500 }, { "epoch": 1.06, "learning_rate": 4.70365139725385e-05, "loss": 4.4334, "step": 1157000 }, { "epoch": 1.06, "learning_rate": 4.7013617180287513e-05, "loss": 4.4148, "step": 1157500 }, { "epoch": 1.06, "learning_rate": 4.699076618162102e-05, "loss": 4.4291, "step": 1158000 }, { "epoch": 1.06, "learning_rate": 4.696786938937003e-05, "loss": 4.4202, "step": 1158500 }, { "epoch": 1.06, "learning_rate": 4.6944972597119036e-05, "loss": 4.4285, "step": 1159000 }, { "epoch": 1.06, "learning_rate": 4.692207580486804e-05, "loss": 4.434, "step": 1159500 }, { "epoch": 1.06, "learning_rate": 4.6899224806201553e-05, "loss": 4.4177, "step": 1160000 }, { "epoch": 1.06, "learning_rate": 4.687632801395056e-05, "loss": 4.4184, "step": 1160500 }, { "epoch": 1.06, "learning_rate": 4.685343122169957e-05, "loss": 4.428, "step": 1161000 }, { "epoch": 1.06, "learning_rate": 4.6830534429448575e-05, "loss": 4.4265, "step": 1161500 }, { "epoch": 1.06, "learning_rate": 4.680763763719758e-05, "loss": 4.4404, "step": 1162000 }, { "epoch": 1.06, "learning_rate": 4.678478663853109e-05, "loss": 4.4393, "step": 1162500 }, { "epoch": 1.07, "learning_rate": 4.67618898462801e-05, "loss": 4.4265, "step": 1163000 }, { "epoch": 1.07, "learning_rate": 4.673899305402911e-05, "loss": 4.4366, "step": 1163500 }, { "epoch": 1.07, "learning_rate": 4.6716096261778115e-05, "loss": 4.4259, "step": 1164000 }, { "epoch": 1.07, "learning_rate": 4.669324526311162e-05, "loss": 4.4267, "step": 1164500 }, { "epoch": 1.07, "learning_rate": 4.667034847086063e-05, "loss": 4.4295, "step": 1165000 }, { "epoch": 1.07, "learning_rate": 4.664745167860964e-05, "loss": 4.4271, "step": 1165500 }, { "epoch": 1.07, "learning_rate": 4.662455488635865e-05, "loss": 4.4142, "step": 1166000 }, { "epoch": 1.07, "learning_rate": 4.660165809410765e-05, "loss": 4.4077, "step": 1166500 }, { "epoch": 1.07, "learning_rate": 4.657876130185665e-05, "loss": 4.4272, "step": 1167000 }, { "epoch": 1.07, "learning_rate": 4.6555864509605664e-05, "loss": 4.4279, "step": 1167500 }, { "epoch": 1.07, "learning_rate": 4.653301351093917e-05, "loss": 4.4359, "step": 1168000 }, { "epoch": 1.07, "learning_rate": 4.651016251227268e-05, "loss": 4.4342, "step": 1168500 }, { "epoch": 1.07, "learning_rate": 4.648726572002169e-05, "loss": 4.4272, "step": 1169000 }, { "epoch": 1.07, "learning_rate": 4.64643689277707e-05, "loss": 4.4162, "step": 1169500 }, { "epoch": 1.07, "learning_rate": 4.6441472135519704e-05, "loss": 4.4308, "step": 1170000 }, { "epoch": 1.07, "learning_rate": 4.6418621136853216e-05, "loss": 4.4263, "step": 1170500 }, { "epoch": 1.07, "learning_rate": 4.639572434460222e-05, "loss": 4.4255, "step": 1171000 }, { "epoch": 1.07, "learning_rate": 4.6372827552351227e-05, "loss": 4.415, "step": 1171500 }, { "epoch": 1.07, "learning_rate": 4.634993076010024e-05, "loss": 4.4248, "step": 1172000 }, { "epoch": 1.07, "learning_rate": 4.6327033967849243e-05, "loss": 4.4267, "step": 1172500 }, { "epoch": 1.07, "learning_rate": 4.630413717559825e-05, "loss": 4.4303, "step": 1173000 }, { "epoch": 1.07, "learning_rate": 4.628124038334726e-05, "loss": 4.4196, "step": 1173500 }, { "epoch": 1.08, "learning_rate": 4.6258343591096265e-05, "loss": 4.4202, "step": 1174000 }, { "epoch": 1.08, "learning_rate": 4.623544679884527e-05, "loss": 4.4065, "step": 1174500 }, { "epoch": 1.08, "learning_rate": 4.621259580017878e-05, "loss": 4.4157, "step": 1175000 }, { "epoch": 1.08, "learning_rate": 4.618969900792779e-05, "loss": 4.4063, "step": 1175500 }, { "epoch": 1.08, "learning_rate": 4.61668022156768e-05, "loss": 4.4162, "step": 1176000 }, { "epoch": 1.08, "learning_rate": 4.6143905423425805e-05, "loss": 4.4216, "step": 1176500 }, { "epoch": 1.08, "learning_rate": 4.612100863117481e-05, "loss": 4.4339, "step": 1177000 }, { "epoch": 1.08, "learning_rate": 4.609811183892382e-05, "loss": 4.4183, "step": 1177500 }, { "epoch": 1.08, "learning_rate": 4.6075215046672827e-05, "loss": 4.4241, "step": 1178000 }, { "epoch": 1.08, "learning_rate": 4.605236404800633e-05, "loss": 4.419, "step": 1178500 }, { "epoch": 1.08, "learning_rate": 4.602946725575534e-05, "loss": 4.4324, "step": 1179000 }, { "epoch": 1.08, "learning_rate": 4.600657046350434e-05, "loss": 4.4204, "step": 1179500 }, { "epoch": 1.08, "learning_rate": 4.5983673671253354e-05, "loss": 4.408, "step": 1180000 }, { "epoch": 1.08, "learning_rate": 4.596077687900236e-05, "loss": 4.4356, "step": 1180500 }, { "epoch": 1.08, "learning_rate": 4.5937880086751364e-05, "loss": 4.4245, "step": 1181000 }, { "epoch": 1.08, "learning_rate": 4.5914983294500376e-05, "loss": 4.4294, "step": 1181500 }, { "epoch": 1.08, "learning_rate": 4.589213229583388e-05, "loss": 4.419, "step": 1182000 }, { "epoch": 1.08, "learning_rate": 4.5869235503582893e-05, "loss": 4.4198, "step": 1182500 }, { "epoch": 1.08, "learning_rate": 4.58463387113319e-05, "loss": 4.4133, "step": 1183000 }, { "epoch": 1.08, "learning_rate": 4.5823441919080904e-05, "loss": 4.4243, "step": 1183500 }, { "epoch": 1.08, "learning_rate": 4.5800590920414416e-05, "loss": 4.4237, "step": 1184000 }, { "epoch": 1.08, "learning_rate": 4.577769412816342e-05, "loss": 4.4182, "step": 1184500 }, { "epoch": 1.09, "learning_rate": 4.575479733591243e-05, "loss": 4.4384, "step": 1185000 }, { "epoch": 1.09, "learning_rate": 4.573190054366144e-05, "loss": 4.4233, "step": 1185500 }, { "epoch": 1.09, "learning_rate": 4.570900375141044e-05, "loss": 4.4167, "step": 1186000 }, { "epoch": 1.09, "learning_rate": 4.5686106959159455e-05, "loss": 4.4184, "step": 1186500 }, { "epoch": 1.09, "learning_rate": 4.566321016690846e-05, "loss": 4.4281, "step": 1187000 }, { "epoch": 1.09, "learning_rate": 4.5640313374657465e-05, "loss": 4.4333, "step": 1187500 }, { "epoch": 1.09, "learning_rate": 4.5617416582406477e-05, "loss": 4.4293, "step": 1188000 }, { "epoch": 1.09, "learning_rate": 4.559456558373998e-05, "loss": 4.4253, "step": 1188500 }, { "epoch": 1.09, "learning_rate": 4.5571668791488994e-05, "loss": 4.4119, "step": 1189000 }, { "epoch": 1.09, "learning_rate": 4.5548771999238e-05, "loss": 4.427, "step": 1189500 }, { "epoch": 1.09, "learning_rate": 4.5525875206987004e-05, "loss": 4.4202, "step": 1190000 }, { "epoch": 1.09, "learning_rate": 4.5502978414736016e-05, "loss": 4.4252, "step": 1190500 }, { "epoch": 1.09, "learning_rate": 4.5480127416069515e-05, "loss": 4.4156, "step": 1191000 }, { "epoch": 1.09, "learning_rate": 4.545723062381853e-05, "loss": 4.4146, "step": 1191500 }, { "epoch": 1.09, "learning_rate": 4.543437962515203e-05, "loss": 4.4198, "step": 1192000 }, { "epoch": 1.09, "learning_rate": 4.5411482832901044e-05, "loss": 4.4293, "step": 1192500 }, { "epoch": 1.09, "learning_rate": 4.538858604065005e-05, "loss": 4.4095, "step": 1193000 }, { "epoch": 1.09, "learning_rate": 4.5365689248399054e-05, "loss": 4.4179, "step": 1193500 }, { "epoch": 1.09, "learning_rate": 4.5342792456148066e-05, "loss": 4.4147, "step": 1194000 }, { "epoch": 1.09, "learning_rate": 4.531989566389707e-05, "loss": 4.4145, "step": 1194500 }, { "epoch": 1.09, "learning_rate": 4.5296998871646076e-05, "loss": 4.4324, "step": 1195000 }, { "epoch": 1.09, "learning_rate": 4.527410207939509e-05, "loss": 4.4262, "step": 1195500 }, { "epoch": 1.1, "learning_rate": 4.5251251080728594e-05, "loss": 4.4298, "step": 1196000 }, { "epoch": 1.1, "learning_rate": 4.5228354288477605e-05, "loss": 4.414, "step": 1196500 }, { "epoch": 1.1, "learning_rate": 4.520545749622661e-05, "loss": 4.4033, "step": 1197000 }, { "epoch": 1.1, "learning_rate": 4.5182560703975615e-05, "loss": 4.4217, "step": 1197500 }, { "epoch": 1.1, "learning_rate": 4.515966391172463e-05, "loss": 4.431, "step": 1198000 }, { "epoch": 1.1, "learning_rate": 4.513676711947363e-05, "loss": 4.4146, "step": 1198500 }, { "epoch": 1.1, "learning_rate": 4.511387032722264e-05, "loss": 4.4151, "step": 1199000 }, { "epoch": 1.1, "learning_rate": 4.509097353497165e-05, "loss": 4.4123, "step": 1199500 }, { "epoch": 1.1, "learning_rate": 4.5068122536305155e-05, "loss": 4.4217, "step": 1200000 }, { "epoch": 1.1, "learning_rate": 4.5045225744054167e-05, "loss": 4.416, "step": 1200500 }, { "epoch": 1.1, "learning_rate": 4.502232895180317e-05, "loss": 4.4282, "step": 1201000 }, { "epoch": 1.1, "learning_rate": 4.4999477953136684e-05, "loss": 4.4162, "step": 1201500 }, { "epoch": 1.1, "learning_rate": 4.497658116088569e-05, "loss": 4.4245, "step": 1202000 }, { "epoch": 1.1, "learning_rate": 4.4953684368634694e-05, "loss": 4.4191, "step": 1202500 }, { "epoch": 1.1, "learning_rate": 4.4930787576383706e-05, "loss": 4.4111, "step": 1203000 }, { "epoch": 1.1, "learning_rate": 4.4907890784132704e-05, "loss": 4.4138, "step": 1203500 }, { "epoch": 1.1, "learning_rate": 4.4884993991881716e-05, "loss": 4.4241, "step": 1204000 }, { "epoch": 1.1, "learning_rate": 4.486209719963072e-05, "loss": 4.4195, "step": 1204500 }, { "epoch": 1.1, "learning_rate": 4.4839200407379726e-05, "loss": 4.4175, "step": 1205000 }, { "epoch": 1.1, "learning_rate": 4.481634940871324e-05, "loss": 4.428, "step": 1205500 }, { "epoch": 1.1, "learning_rate": 4.4793452616462244e-05, "loss": 4.4139, "step": 1206000 }, { "epoch": 1.1, "learning_rate": 4.4770555824211255e-05, "loss": 4.4214, "step": 1206500 }, { "epoch": 1.11, "learning_rate": 4.474765903196026e-05, "loss": 4.4192, "step": 1207000 }, { "epoch": 1.11, "learning_rate": 4.4724762239709265e-05, "loss": 4.4175, "step": 1207500 }, { "epoch": 1.11, "learning_rate": 4.470186544745828e-05, "loss": 4.4212, "step": 1208000 }, { "epoch": 1.11, "learning_rate": 4.467896865520728e-05, "loss": 4.4198, "step": 1208500 }, { "epoch": 1.11, "learning_rate": 4.465607186295629e-05, "loss": 4.4215, "step": 1209000 }, { "epoch": 1.11, "learning_rate": 4.46332208642898e-05, "loss": 4.426, "step": 1209500 }, { "epoch": 1.11, "learning_rate": 4.4610324072038805e-05, "loss": 4.4308, "step": 1210000 }, { "epoch": 1.11, "learning_rate": 4.458742727978782e-05, "loss": 4.4127, "step": 1210500 }, { "epoch": 1.11, "learning_rate": 4.456457628112132e-05, "loss": 4.42, "step": 1211000 }, { "epoch": 1.11, "learning_rate": 4.454167948887033e-05, "loss": 4.4039, "step": 1211500 }, { "epoch": 1.11, "learning_rate": 4.451878269661934e-05, "loss": 4.4301, "step": 1212000 }, { "epoch": 1.11, "learning_rate": 4.4495885904368344e-05, "loss": 4.4123, "step": 1212500 }, { "epoch": 1.11, "learning_rate": 4.447298911211735e-05, "loss": 4.4219, "step": 1213000 }, { "epoch": 1.11, "learning_rate": 4.445013811345086e-05, "loss": 4.4147, "step": 1213500 }, { "epoch": 1.11, "learning_rate": 4.442724132119987e-05, "loss": 4.4074, "step": 1214000 }, { "epoch": 1.11, "learning_rate": 4.440434452894888e-05, "loss": 4.4272, "step": 1214500 }, { "epoch": 1.11, "learning_rate": 4.4381447736697883e-05, "loss": 4.4219, "step": 1215000 }, { "epoch": 1.11, "learning_rate": 4.435859673803139e-05, "loss": 4.4198, "step": 1215500 }, { "epoch": 1.11, "learning_rate": 4.4335699945780394e-05, "loss": 4.4101, "step": 1216000 }, { "epoch": 1.11, "learning_rate": 4.4312803153529406e-05, "loss": 4.4224, "step": 1216500 }, { "epoch": 1.11, "learning_rate": 4.428990636127841e-05, "loss": 4.4223, "step": 1217000 }, { "epoch": 1.12, "learning_rate": 4.4267009569027416e-05, "loss": 4.4283, "step": 1217500 }, { "epoch": 1.12, "learning_rate": 4.424411277677643e-05, "loss": 4.4193, "step": 1218000 }, { "epoch": 1.12, "learning_rate": 4.4221261778109934e-05, "loss": 4.4093, "step": 1218500 }, { "epoch": 1.12, "learning_rate": 4.4198364985858945e-05, "loss": 4.4228, "step": 1219000 }, { "epoch": 1.12, "learning_rate": 4.417546819360795e-05, "loss": 4.4155, "step": 1219500 }, { "epoch": 1.12, "learning_rate": 4.4152571401356955e-05, "loss": 4.4238, "step": 1220000 }, { "epoch": 1.12, "learning_rate": 4.412967460910597e-05, "loss": 4.4198, "step": 1220500 }, { "epoch": 1.12, "learning_rate": 4.410682361043947e-05, "loss": 4.4244, "step": 1221000 }, { "epoch": 1.12, "learning_rate": 4.408392681818848e-05, "loss": 4.4016, "step": 1221500 }, { "epoch": 1.12, "learning_rate": 4.406103002593749e-05, "loss": 4.4192, "step": 1222000 }, { "epoch": 1.12, "learning_rate": 4.4038133233686495e-05, "loss": 4.4114, "step": 1222500 }, { "epoch": 1.12, "learning_rate": 4.4015236441435507e-05, "loss": 4.4138, "step": 1223000 }, { "epoch": 1.12, "learning_rate": 4.399238544276901e-05, "loss": 4.4168, "step": 1223500 }, { "epoch": 1.12, "learning_rate": 4.396948865051802e-05, "loss": 4.4065, "step": 1224000 }, { "epoch": 1.12, "learning_rate": 4.394659185826703e-05, "loss": 4.4125, "step": 1224500 }, { "epoch": 1.12, "learning_rate": 4.3923695066016034e-05, "loss": 4.4133, "step": 1225000 }, { "epoch": 1.12, "learning_rate": 4.390079827376504e-05, "loss": 4.4053, "step": 1225500 }, { "epoch": 1.12, "learning_rate": 4.387790148151405e-05, "loss": 4.4275, "step": 1226000 }, { "epoch": 1.12, "learning_rate": 4.3855004689263056e-05, "loss": 4.4123, "step": 1226500 }, { "epoch": 1.12, "learning_rate": 4.383215369059657e-05, "loss": 4.4192, "step": 1227000 }, { "epoch": 1.12, "learning_rate": 4.3809256898345573e-05, "loss": 4.4068, "step": 1227500 }, { "epoch": 1.12, "learning_rate": 4.378636010609458e-05, "loss": 4.4228, "step": 1228000 }, { "epoch": 1.13, "learning_rate": 4.3763463313843584e-05, "loss": 4.4234, "step": 1228500 }, { "epoch": 1.13, "learning_rate": 4.374056652159259e-05, "loss": 4.4086, "step": 1229000 }, { "epoch": 1.13, "learning_rate": 4.37176697293416e-05, "loss": 4.4194, "step": 1229500 }, { "epoch": 1.13, "learning_rate": 4.3694772937090605e-05, "loss": 4.4046, "step": 1230000 }, { "epoch": 1.13, "learning_rate": 4.367187614483961e-05, "loss": 4.41, "step": 1230500 }, { "epoch": 1.13, "learning_rate": 4.364902514617312e-05, "loss": 4.4214, "step": 1231000 }, { "epoch": 1.13, "learning_rate": 4.3626174147506635e-05, "loss": 4.423, "step": 1231500 }, { "epoch": 1.13, "learning_rate": 4.360327735525564e-05, "loss": 4.4175, "step": 1232000 }, { "epoch": 1.13, "learning_rate": 4.3580380563004645e-05, "loss": 4.4147, "step": 1232500 }, { "epoch": 1.13, "learning_rate": 4.355748377075366e-05, "loss": 4.4287, "step": 1233000 }, { "epoch": 1.13, "learning_rate": 4.353458697850266e-05, "loss": 4.4051, "step": 1233500 }, { "epoch": 1.13, "learning_rate": 4.351169018625167e-05, "loss": 4.412, "step": 1234000 }, { "epoch": 1.13, "learning_rate": 4.348879339400068e-05, "loss": 4.4247, "step": 1234500 }, { "epoch": 1.13, "learning_rate": 4.3465896601749684e-05, "loss": 4.4078, "step": 1235000 }, { "epoch": 1.13, "learning_rate": 4.3443045603083197e-05, "loss": 4.417, "step": 1235500 }, { "epoch": 1.13, "learning_rate": 4.34201488108322e-05, "loss": 4.4091, "step": 1236000 }, { "epoch": 1.13, "learning_rate": 4.339725201858121e-05, "loss": 4.4144, "step": 1236500 }, { "epoch": 1.13, "learning_rate": 4.337435522633022e-05, "loss": 4.4227, "step": 1237000 }, { "epoch": 1.13, "learning_rate": 4.3351504227663724e-05, "loss": 4.4054, "step": 1237500 }, { "epoch": 1.13, "learning_rate": 4.332860743541273e-05, "loss": 4.4112, "step": 1238000 }, { "epoch": 1.13, "learning_rate": 4.330571064316174e-05, "loss": 4.4232, "step": 1238500 }, { "epoch": 1.13, "learning_rate": 4.3282813850910746e-05, "loss": 4.4275, "step": 1239000 }, { "epoch": 1.14, "learning_rate": 4.325996285224426e-05, "loss": 4.4174, "step": 1239500 }, { "epoch": 1.14, "learning_rate": 4.323706605999326e-05, "loss": 4.4184, "step": 1240000 }, { "epoch": 1.14, "learning_rate": 4.321416926774227e-05, "loss": 4.4228, "step": 1240500 }, { "epoch": 1.14, "learning_rate": 4.3191272475491274e-05, "loss": 4.411, "step": 1241000 }, { "epoch": 1.14, "learning_rate": 4.3168421476824786e-05, "loss": 4.4123, "step": 1241500 }, { "epoch": 1.14, "learning_rate": 4.314552468457379e-05, "loss": 4.4046, "step": 1242000 }, { "epoch": 1.14, "learning_rate": 4.3122627892322796e-05, "loss": 4.4148, "step": 1242500 }, { "epoch": 1.14, "learning_rate": 4.309973110007181e-05, "loss": 4.3986, "step": 1243000 }, { "epoch": 1.14, "learning_rate": 4.307683430782081e-05, "loss": 4.3993, "step": 1243500 }, { "epoch": 1.14, "learning_rate": 4.3053983309154325e-05, "loss": 4.4199, "step": 1244000 }, { "epoch": 1.14, "learning_rate": 4.303108651690333e-05, "loss": 4.4159, "step": 1244500 }, { "epoch": 1.14, "learning_rate": 4.3008189724652335e-05, "loss": 4.4054, "step": 1245000 }, { "epoch": 1.14, "learning_rate": 4.298529293240135e-05, "loss": 4.4143, "step": 1245500 }, { "epoch": 1.14, "learning_rate": 4.296244193373485e-05, "loss": 4.3965, "step": 1246000 }, { "epoch": 1.14, "learning_rate": 4.293954514148386e-05, "loss": 4.4208, "step": 1246500 }, { "epoch": 1.14, "learning_rate": 4.291664834923287e-05, "loss": 4.4093, "step": 1247000 }, { "epoch": 1.14, "learning_rate": 4.2893751556981875e-05, "loss": 4.419, "step": 1247500 }, { "epoch": 1.14, "learning_rate": 4.287090055831539e-05, "loss": 4.4177, "step": 1248000 }, { "epoch": 1.14, "learning_rate": 4.284804955964889e-05, "loss": 4.407, "step": 1248500 }, { "epoch": 1.14, "learning_rate": 4.2825152767397905e-05, "loss": 4.4258, "step": 1249000 }, { "epoch": 1.14, "learning_rate": 4.280225597514691e-05, "loss": 4.4172, "step": 1249500 }, { "epoch": 1.14, "learning_rate": 4.2779359182895915e-05, "loss": 4.4124, "step": 1250000 }, { "epoch": 1.15, "learning_rate": 4.2756462390644926e-05, "loss": 4.4203, "step": 1250500 }, { "epoch": 1.15, "learning_rate": 4.273356559839393e-05, "loss": 4.4225, "step": 1251000 }, { "epoch": 1.15, "learning_rate": 4.271071459972744e-05, "loss": 4.4185, "step": 1251500 }, { "epoch": 1.15, "learning_rate": 4.268781780747644e-05, "loss": 4.4053, "step": 1252000 }, { "epoch": 1.15, "learning_rate": 4.2664921015225454e-05, "loss": 4.3984, "step": 1252500 }, { "epoch": 1.15, "learning_rate": 4.264202422297446e-05, "loss": 4.4282, "step": 1253000 }, { "epoch": 1.15, "learning_rate": 4.2619173224307965e-05, "loss": 4.4156, "step": 1253500 }, { "epoch": 1.15, "learning_rate": 4.2596276432056976e-05, "loss": 4.4123, "step": 1254000 }, { "epoch": 1.15, "learning_rate": 4.257342543339048e-05, "loss": 4.423, "step": 1254500 }, { "epoch": 1.15, "learning_rate": 4.2550528641139494e-05, "loss": 4.4145, "step": 1255000 }, { "epoch": 1.15, "learning_rate": 4.25276318488885e-05, "loss": 4.4287, "step": 1255500 }, { "epoch": 1.15, "learning_rate": 4.2504735056637504e-05, "loss": 4.4119, "step": 1256000 }, { "epoch": 1.15, "learning_rate": 4.2481838264386516e-05, "loss": 4.4238, "step": 1256500 }, { "epoch": 1.15, "learning_rate": 4.245894147213552e-05, "loss": 4.4202, "step": 1257000 }, { "epoch": 1.15, "learning_rate": 4.2436044679884526e-05, "loss": 4.4131, "step": 1257500 }, { "epoch": 1.15, "learning_rate": 4.241314788763354e-05, "loss": 4.4012, "step": 1258000 }, { "epoch": 1.15, "learning_rate": 4.239025109538254e-05, "loss": 4.4177, "step": 1258500 }, { "epoch": 1.15, "learning_rate": 4.236735430313155e-05, "loss": 4.4116, "step": 1259000 }, { "epoch": 1.15, "learning_rate": 4.234445751088056e-05, "loss": 4.4183, "step": 1259500 }, { "epoch": 1.15, "learning_rate": 4.2321606512214065e-05, "loss": 4.4069, "step": 1260000 }, { "epoch": 1.15, "learning_rate": 4.229870971996308e-05, "loss": 4.4023, "step": 1260500 }, { "epoch": 1.15, "learning_rate": 4.227581292771208e-05, "loss": 4.4078, "step": 1261000 }, { "epoch": 1.16, "learning_rate": 4.225291613546109e-05, "loss": 4.4133, "step": 1261500 }, { "epoch": 1.16, "learning_rate": 4.22300193432101e-05, "loss": 4.413, "step": 1262000 }, { "epoch": 1.16, "learning_rate": 4.2207122550959104e-05, "loss": 4.4157, "step": 1262500 }, { "epoch": 1.16, "learning_rate": 4.2184271552292616e-05, "loss": 4.4108, "step": 1263000 }, { "epoch": 1.16, "learning_rate": 4.2161374760041615e-05, "loss": 4.414, "step": 1263500 }, { "epoch": 1.16, "learning_rate": 4.2138477967790627e-05, "loss": 4.4172, "step": 1264000 }, { "epoch": 1.16, "learning_rate": 4.211558117553963e-05, "loss": 4.4207, "step": 1264500 }, { "epoch": 1.16, "learning_rate": 4.209268438328864e-05, "loss": 4.4053, "step": 1265000 }, { "epoch": 1.16, "learning_rate": 4.206978759103765e-05, "loss": 4.39, "step": 1265500 }, { "epoch": 1.16, "learning_rate": 4.2046936592371154e-05, "loss": 4.4118, "step": 1266000 }, { "epoch": 1.16, "learning_rate": 4.2024039800120166e-05, "loss": 4.4236, "step": 1266500 }, { "epoch": 1.16, "learning_rate": 4.200114300786917e-05, "loss": 4.4064, "step": 1267000 }, { "epoch": 1.16, "learning_rate": 4.1978246215618176e-05, "loss": 4.4153, "step": 1267500 }, { "epoch": 1.16, "learning_rate": 4.195534942336719e-05, "loss": 4.4091, "step": 1268000 }, { "epoch": 1.16, "learning_rate": 4.1932498424700693e-05, "loss": 4.4176, "step": 1268500 }, { "epoch": 1.16, "learning_rate": 4.1909601632449705e-05, "loss": 4.423, "step": 1269000 }, { "epoch": 1.16, "learning_rate": 4.188675063378321e-05, "loss": 4.3981, "step": 1269500 }, { "epoch": 1.16, "learning_rate": 4.1863853841532216e-05, "loss": 4.4033, "step": 1270000 }, { "epoch": 1.16, "learning_rate": 4.184095704928123e-05, "loss": 4.414, "step": 1270500 }, { "epoch": 1.16, "learning_rate": 4.181806025703023e-05, "loss": 4.4281, "step": 1271000 }, { "epoch": 1.16, "learning_rate": 4.179516346477924e-05, "loss": 4.4202, "step": 1271500 }, { "epoch": 1.16, "learning_rate": 4.177226667252825e-05, "loss": 4.4161, "step": 1272000 }, { "epoch": 1.17, "learning_rate": 4.1749369880277255e-05, "loss": 4.4082, "step": 1272500 }, { "epoch": 1.17, "learning_rate": 4.1726473088026266e-05, "loss": 4.4117, "step": 1273000 }, { "epoch": 1.17, "learning_rate": 4.170357629577527e-05, "loss": 4.4115, "step": 1273500 }, { "epoch": 1.17, "learning_rate": 4.1680679503524277e-05, "loss": 4.4065, "step": 1274000 }, { "epoch": 1.17, "learning_rate": 4.165778271127329e-05, "loss": 4.4226, "step": 1274500 }, { "epoch": 1.17, "learning_rate": 4.1634931712606794e-05, "loss": 4.4186, "step": 1275000 }, { "epoch": 1.17, "learning_rate": 4.16120349203558e-05, "loss": 4.4263, "step": 1275500 }, { "epoch": 1.17, "learning_rate": 4.1589138128104804e-05, "loss": 4.4077, "step": 1276000 }, { "epoch": 1.17, "learning_rate": 4.156624133585381e-05, "loss": 4.4245, "step": 1276500 }, { "epoch": 1.17, "learning_rate": 4.154334454360282e-05, "loss": 4.4106, "step": 1277000 }, { "epoch": 1.17, "learning_rate": 4.1520447751351826e-05, "loss": 4.4088, "step": 1277500 }, { "epoch": 1.17, "learning_rate": 4.149759675268534e-05, "loss": 4.4037, "step": 1278000 }, { "epoch": 1.17, "learning_rate": 4.1474699960434343e-05, "loss": 4.4139, "step": 1278500 }, { "epoch": 1.17, "learning_rate": 4.145180316818335e-05, "loss": 4.4032, "step": 1279000 }, { "epoch": 1.17, "learning_rate": 4.142890637593236e-05, "loss": 4.406, "step": 1279500 }, { "epoch": 1.17, "learning_rate": 4.1406009583681365e-05, "loss": 4.4106, "step": 1280000 }, { "epoch": 1.17, "learning_rate": 4.138315858501488e-05, "loss": 4.4053, "step": 1280500 }, { "epoch": 1.17, "learning_rate": 4.136026179276388e-05, "loss": 4.4091, "step": 1281000 }, { "epoch": 1.17, "learning_rate": 4.133736500051289e-05, "loss": 4.4148, "step": 1281500 }, { "epoch": 1.17, "learning_rate": 4.13144682082619e-05, "loss": 4.4165, "step": 1282000 }, { "epoch": 1.17, "learning_rate": 4.1291617209595405e-05, "loss": 4.4131, "step": 1282500 }, { "epoch": 1.18, "learning_rate": 4.126872041734442e-05, "loss": 4.4066, "step": 1283000 }, { "epoch": 1.18, "learning_rate": 4.124582362509342e-05, "loss": 4.4173, "step": 1283500 }, { "epoch": 1.18, "learning_rate": 4.122292683284243e-05, "loss": 4.4218, "step": 1284000 }, { "epoch": 1.18, "learning_rate": 4.120003004059144e-05, "loss": 4.4075, "step": 1284500 }, { "epoch": 1.18, "learning_rate": 4.1177133248340444e-05, "loss": 4.4196, "step": 1285000 }, { "epoch": 1.18, "learning_rate": 4.115423645608945e-05, "loss": 4.3996, "step": 1285500 }, { "epoch": 1.18, "learning_rate": 4.113138545742296e-05, "loss": 4.4027, "step": 1286000 }, { "epoch": 1.18, "learning_rate": 4.1108488665171967e-05, "loss": 4.3836, "step": 1286500 }, { "epoch": 1.18, "learning_rate": 4.108559187292098e-05, "loss": 4.4282, "step": 1287000 }, { "epoch": 1.18, "learning_rate": 4.1062695080669983e-05, "loss": 4.4009, "step": 1287500 }, { "epoch": 1.18, "learning_rate": 4.103984408200349e-05, "loss": 4.4014, "step": 1288000 }, { "epoch": 1.18, "learning_rate": 4.1016947289752494e-05, "loss": 4.4169, "step": 1288500 }, { "epoch": 1.18, "learning_rate": 4.09940504975015e-05, "loss": 4.4006, "step": 1289000 }, { "epoch": 1.18, "learning_rate": 4.097115370525051e-05, "loss": 4.3996, "step": 1289500 }, { "epoch": 1.18, "learning_rate": 4.0948302706584017e-05, "loss": 4.4154, "step": 1290000 }, { "epoch": 1.18, "learning_rate": 4.092540591433303e-05, "loss": 4.4105, "step": 1290500 }, { "epoch": 1.18, "learning_rate": 4.0902509122082033e-05, "loss": 4.4156, "step": 1291000 }, { "epoch": 1.18, "learning_rate": 4.087961232983104e-05, "loss": 4.4038, "step": 1291500 }, { "epoch": 1.18, "learning_rate": 4.085671553758005e-05, "loss": 4.3958, "step": 1292000 }, { "epoch": 1.18, "learning_rate": 4.0833818745329055e-05, "loss": 4.3943, "step": 1292500 }, { "epoch": 1.18, "learning_rate": 4.081092195307806e-05, "loss": 4.3942, "step": 1293000 }, { "epoch": 1.18, "learning_rate": 4.078802516082707e-05, "loss": 4.4054, "step": 1293500 }, { "epoch": 1.19, "learning_rate": 4.076512836857608e-05, "loss": 4.4086, "step": 1294000 }, { "epoch": 1.19, "learning_rate": 4.074227736990959e-05, "loss": 4.4123, "step": 1294500 }, { "epoch": 1.19, "learning_rate": 4.0719380577658595e-05, "loss": 4.4005, "step": 1295000 }, { "epoch": 1.19, "learning_rate": 4.06964837854076e-05, "loss": 4.4144, "step": 1295500 }, { "epoch": 1.19, "learning_rate": 4.067358699315661e-05, "loss": 4.4121, "step": 1296000 }, { "epoch": 1.19, "learning_rate": 4.065073599449012e-05, "loss": 4.4154, "step": 1296500 }, { "epoch": 1.19, "learning_rate": 4.062788499582363e-05, "loss": 4.4156, "step": 1297000 }, { "epoch": 1.19, "learning_rate": 4.0604988203572635e-05, "loss": 4.401, "step": 1297500 }, { "epoch": 1.19, "learning_rate": 4.0582091411321646e-05, "loss": 4.4075, "step": 1298000 }, { "epoch": 1.19, "learning_rate": 4.055919461907065e-05, "loss": 4.4088, "step": 1298500 }, { "epoch": 1.19, "learning_rate": 4.053634362040416e-05, "loss": 4.4148, "step": 1299000 }, { "epoch": 1.19, "learning_rate": 4.051344682815317e-05, "loss": 4.4076, "step": 1299500 }, { "epoch": 1.19, "learning_rate": 4.049055003590217e-05, "loss": 4.4173, "step": 1300000 }, { "epoch": 1.19, "learning_rate": 4.046765324365118e-05, "loss": 4.4068, "step": 1300500 }, { "epoch": 1.19, "learning_rate": 4.0444756451400184e-05, "loss": 4.4093, "step": 1301000 }, { "epoch": 1.19, "learning_rate": 4.042185965914919e-05, "loss": 4.4133, "step": 1301500 }, { "epoch": 1.19, "learning_rate": 4.03989628668982e-05, "loss": 4.4225, "step": 1302000 }, { "epoch": 1.19, "learning_rate": 4.0376066074647206e-05, "loss": 4.4042, "step": 1302500 }, { "epoch": 1.19, "learning_rate": 4.035316928239621e-05, "loss": 4.4035, "step": 1303000 }, { "epoch": 1.19, "learning_rate": 4.033027249014522e-05, "loss": 4.4114, "step": 1303500 }, { "epoch": 1.19, "learning_rate": 4.030737569789423e-05, "loss": 4.4059, "step": 1304000 }, { "epoch": 1.19, "learning_rate": 4.028447890564323e-05, "loss": 4.4109, "step": 1304500 }, { "epoch": 1.2, "learning_rate": 4.0261582113392245e-05, "loss": 4.4062, "step": 1305000 }, { "epoch": 1.2, "learning_rate": 4.023868532114125e-05, "loss": 4.4088, "step": 1305500 }, { "epoch": 1.2, "learning_rate": 4.021583432247476e-05, "loss": 4.4145, "step": 1306000 }, { "epoch": 1.2, "learning_rate": 4.019293753022377e-05, "loss": 4.3966, "step": 1306500 }, { "epoch": 1.2, "learning_rate": 4.017004073797277e-05, "loss": 4.4004, "step": 1307000 }, { "epoch": 1.2, "learning_rate": 4.0147143945721784e-05, "loss": 4.4122, "step": 1307500 }, { "epoch": 1.2, "learning_rate": 4.012424715347079e-05, "loss": 4.4059, "step": 1308000 }, { "epoch": 1.2, "learning_rate": 4.0101350361219794e-05, "loss": 4.4172, "step": 1308500 }, { "epoch": 1.2, "learning_rate": 4.0078453568968806e-05, "loss": 4.4086, "step": 1309000 }, { "epoch": 1.2, "learning_rate": 4.005555677671781e-05, "loss": 4.4079, "step": 1309500 }, { "epoch": 1.2, "learning_rate": 4.0032705778051323e-05, "loss": 4.4064, "step": 1310000 }, { "epoch": 1.2, "learning_rate": 4.000985477938483e-05, "loss": 4.412, "step": 1310500 }, { "epoch": 1.2, "learning_rate": 3.998695798713384e-05, "loss": 4.4017, "step": 1311000 }, { "epoch": 1.2, "learning_rate": 3.9964061194882846e-05, "loss": 4.4107, "step": 1311500 }, { "epoch": 1.2, "learning_rate": 3.994116440263185e-05, "loss": 4.3993, "step": 1312000 }, { "epoch": 1.2, "learning_rate": 3.991826761038086e-05, "loss": 4.4027, "step": 1312500 }, { "epoch": 1.2, "learning_rate": 3.989537081812986e-05, "loss": 4.4156, "step": 1313000 }, { "epoch": 1.2, "learning_rate": 3.987247402587887e-05, "loss": 4.4105, "step": 1313500 }, { "epoch": 1.2, "learning_rate": 3.984957723362788e-05, "loss": 4.4152, "step": 1314000 }, { "epoch": 1.2, "learning_rate": 3.982668044137688e-05, "loss": 4.4081, "step": 1314500 }, { "epoch": 1.2, "learning_rate": 3.9803829442710395e-05, "loss": 4.4055, "step": 1315000 }, { "epoch": 1.2, "learning_rate": 3.97809326504594e-05, "loss": 4.414, "step": 1315500 }, { "epoch": 1.21, "learning_rate": 3.975808165179291e-05, "loss": 4.4089, "step": 1316000 }, { "epoch": 1.21, "learning_rate": 3.973518485954192e-05, "loss": 4.4154, "step": 1316500 }, { "epoch": 1.21, "learning_rate": 3.971228806729092e-05, "loss": 4.41, "step": 1317000 }, { "epoch": 1.21, "learning_rate": 3.9689391275039935e-05, "loss": 4.4175, "step": 1317500 }, { "epoch": 1.21, "learning_rate": 3.966654027637344e-05, "loss": 4.407, "step": 1318000 }, { "epoch": 1.21, "learning_rate": 3.964364348412245e-05, "loss": 4.407, "step": 1318500 }, { "epoch": 1.21, "learning_rate": 3.962074669187146e-05, "loss": 4.4015, "step": 1319000 }, { "epoch": 1.21, "learning_rate": 3.959784989962046e-05, "loss": 4.4082, "step": 1319500 }, { "epoch": 1.21, "learning_rate": 3.9574953107369474e-05, "loss": 4.4005, "step": 1320000 }, { "epoch": 1.21, "learning_rate": 3.955210210870298e-05, "loss": 4.3903, "step": 1320500 }, { "epoch": 1.21, "learning_rate": 3.952920531645199e-05, "loss": 4.4045, "step": 1321000 }, { "epoch": 1.21, "learning_rate": 3.9506308524200997e-05, "loss": 4.4022, "step": 1321500 }, { "epoch": 1.21, "learning_rate": 3.948341173195e-05, "loss": 4.3928, "step": 1322000 }, { "epoch": 1.21, "learning_rate": 3.9460514939699013e-05, "loss": 4.3919, "step": 1322500 }, { "epoch": 1.21, "learning_rate": 3.943761814744802e-05, "loss": 4.4096, "step": 1323000 }, { "epoch": 1.21, "learning_rate": 3.941476714878153e-05, "loss": 4.4024, "step": 1323500 }, { "epoch": 1.21, "learning_rate": 3.9391870356530536e-05, "loss": 4.4062, "step": 1324000 }, { "epoch": 1.21, "learning_rate": 3.936897356427954e-05, "loss": 4.4008, "step": 1324500 }, { "epoch": 1.21, "learning_rate": 3.9346076772028546e-05, "loss": 4.4064, "step": 1325000 }, { "epoch": 1.21, "learning_rate": 3.932317997977755e-05, "loss": 4.3958, "step": 1325500 }, { "epoch": 1.21, "learning_rate": 3.930028318752656e-05, "loss": 4.3997, "step": 1326000 }, { "epoch": 1.21, "learning_rate": 3.927738639527557e-05, "loss": 4.393, "step": 1326500 }, { "epoch": 1.22, "learning_rate": 3.9254535396609074e-05, "loss": 4.4047, "step": 1327000 }, { "epoch": 1.22, "learning_rate": 3.9231638604358085e-05, "loss": 4.4087, "step": 1327500 }, { "epoch": 1.22, "learning_rate": 3.920874181210709e-05, "loss": 4.4069, "step": 1328000 }, { "epoch": 1.22, "learning_rate": 3.91858908134406e-05, "loss": 4.3996, "step": 1328500 }, { "epoch": 1.22, "learning_rate": 3.916299402118961e-05, "loss": 4.4074, "step": 1329000 }, { "epoch": 1.22, "learning_rate": 3.914009722893861e-05, "loss": 4.4063, "step": 1329500 }, { "epoch": 1.22, "learning_rate": 3.9117200436687625e-05, "loss": 4.4098, "step": 1330000 }, { "epoch": 1.22, "learning_rate": 3.909434943802113e-05, "loss": 4.3964, "step": 1330500 }, { "epoch": 1.22, "learning_rate": 3.907145264577014e-05, "loss": 4.4158, "step": 1331000 }, { "epoch": 1.22, "learning_rate": 3.904855585351915e-05, "loss": 4.4161, "step": 1331500 }, { "epoch": 1.22, "learning_rate": 3.902565906126815e-05, "loss": 4.4159, "step": 1332000 }, { "epoch": 1.22, "learning_rate": 3.9002762269017164e-05, "loss": 4.3829, "step": 1332500 }, { "epoch": 1.22, "learning_rate": 3.897986547676617e-05, "loss": 4.3922, "step": 1333000 }, { "epoch": 1.22, "learning_rate": 3.8956968684515174e-05, "loss": 4.4018, "step": 1333500 }, { "epoch": 1.22, "learning_rate": 3.8934071892264186e-05, "loss": 4.4068, "step": 1334000 }, { "epoch": 1.22, "learning_rate": 3.891117510001319e-05, "loss": 4.3906, "step": 1334500 }, { "epoch": 1.22, "learning_rate": 3.8888278307762196e-05, "loss": 4.4004, "step": 1335000 }, { "epoch": 1.22, "learning_rate": 3.886542730909571e-05, "loss": 4.4055, "step": 1335500 }, { "epoch": 1.22, "learning_rate": 3.8842530516844713e-05, "loss": 4.4105, "step": 1336000 }, { "epoch": 1.22, "learning_rate": 3.8819633724593725e-05, "loss": 4.4105, "step": 1336500 }, { "epoch": 1.22, "learning_rate": 3.879673693234273e-05, "loss": 4.3913, "step": 1337000 }, { "epoch": 1.22, "learning_rate": 3.8773840140091735e-05, "loss": 4.4093, "step": 1337500 }, { "epoch": 1.23, "learning_rate": 3.875094334784074e-05, "loss": 4.4066, "step": 1338000 }, { "epoch": 1.23, "learning_rate": 3.8728046555589745e-05, "loss": 4.4003, "step": 1338500 }, { "epoch": 1.23, "learning_rate": 3.870514976333876e-05, "loss": 4.3933, "step": 1339000 }, { "epoch": 1.23, "learning_rate": 3.868229876467226e-05, "loss": 4.3956, "step": 1339500 }, { "epoch": 1.23, "learning_rate": 3.8659401972421275e-05, "loss": 4.4029, "step": 1340000 } ], "max_steps": 2183712, "num_train_epochs": 2, "total_flos": 2.801049015287808e+18, "trial_name": null, "trial_params": null }