| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.7007369822924607, | |
| "eval_steps": 500, | |
| "global_step": 1520000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9999270159623114e-05, | |
| "loss": 0.1271, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.999707892753202e-05, | |
| "loss": 0.0859, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9993426431557147e-05, | |
| "loss": 0.0768, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9988311715921104e-05, | |
| "loss": 0.0727, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9981738587426694e-05, | |
| "loss": 0.0693, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.997370216884145e-05, | |
| "loss": 0.0677, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9964207314440955e-05, | |
| "loss": 0.0659, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.995325311774524e-05, | |
| "loss": 0.0657, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.994084284853358e-05, | |
| "loss": 0.0638, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.992697226521413e-05, | |
| "loss": 0.0623, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.991164130374091e-05, | |
| "loss": 0.0615, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.9894856993903285e-05, | |
| "loss": 0.0608, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.987661739580889e-05, | |
| "loss": 0.0604, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.985691948992265e-05, | |
| "loss": 0.0599, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.983577230843278e-05, | |
| "loss": 0.0591, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.981317329262237e-05, | |
| "loss": 0.0586, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.9789118806694247e-05, | |
| "loss": 0.0587, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.976361988048301e-05, | |
| "loss": 0.0576, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.9736673337793535e-05, | |
| "loss": 0.0576, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.970828075385029e-05, | |
| "loss": 0.0566, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.967844378840947e-05, | |
| "loss": 0.0567, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.964715778300843e-05, | |
| "loss": 0.0567, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.961443708338981e-05, | |
| "loss": 0.0568, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.9580277488129266e-05, | |
| "loss": 0.057, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.954467372832107e-05, | |
| "loss": 0.0551, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.9507634576843017e-05, | |
| "loss": 0.0549, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.9469185717167566e-05, | |
| "loss": 0.0563, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.942928322246712e-05, | |
| "loss": 0.0546, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.938796049480949e-05, | |
| "loss": 0.0538, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.934520341515526e-05, | |
| "loss": 0.0548, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.930104056921508e-05, | |
| "loss": 0.0541, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.925543918835136e-05, | |
| "loss": 0.0543, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.920843842545973e-05, | |
| "loss": 0.0538, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.9160003406180696e-05, | |
| "loss": 0.0539, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.9110165603726345e-05, | |
| "loss": 0.0536, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.9058918384022446e-05, | |
| "loss": 0.0537, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.900625406738267e-05, | |
| "loss": 0.0535, | |
| "step": 185000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.895219680226333e-05, | |
| "loss": 0.0527, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.889673935433758e-05, | |
| "loss": 0.0538, | |
| "step": 195000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.8839896480432604e-05, | |
| "loss": 0.0531, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.878163695931812e-05, | |
| "loss": 0.0527, | |
| "step": 205000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.872198666951938e-05, | |
| "loss": 0.0531, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.8660961447879297e-05, | |
| "loss": 0.0524, | |
| "step": 215000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.859854044346287e-05, | |
| "loss": 0.0527, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.853475227851366e-05, | |
| "loss": 0.0523, | |
| "step": 225000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.8469588333473586e-05, | |
| "loss": 0.0524, | |
| "step": 230000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.8403052417656516e-05, | |
| "loss": 0.0521, | |
| "step": 235000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.833514842057813e-05, | |
| "loss": 0.0518, | |
| "step": 240000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.826586631633889e-05, | |
| "loss": 0.052, | |
| "step": 245000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.8195237873239866e-05, | |
| "loss": 0.0525, | |
| "step": 250000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.8123253497174505e-05, | |
| "loss": 0.0514, | |
| "step": 255000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.804991739616124e-05, | |
| "loss": 0.0515, | |
| "step": 260000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.7975233857236826e-05, | |
| "loss": 0.0524, | |
| "step": 265000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.789920724620569e-05, | |
| "loss": 0.0506, | |
| "step": 270000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.7821857620213536e-05, | |
| "loss": 0.0513, | |
| "step": 275000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.774314266334363e-05, | |
| "loss": 0.0513, | |
| "step": 280000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.7663097669746146e-05, | |
| "loss": 0.0512, | |
| "step": 285000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.75817437300597e-05, | |
| "loss": 0.0508, | |
| "step": 290000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.749905304748714e-05, | |
| "loss": 0.0515, | |
| "step": 295000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.741504660273818e-05, | |
| "loss": 0.0504, | |
| "step": 300000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.732974650620106e-05, | |
| "loss": 0.0504, | |
| "step": 305000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.724315852930218e-05, | |
| "loss": 0.0506, | |
| "step": 310000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.715523536004309e-05, | |
| "loss": 0.0496, | |
| "step": 315000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 4.706605248838939e-05, | |
| "loss": 0.0503, | |
| "step": 320000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 4.697554374104222e-05, | |
| "loss": 0.0504, | |
| "step": 325000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.6883749846751347e-05, | |
| "loss": 0.05, | |
| "step": 330000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 4.679069491976989e-05, | |
| "loss": 0.0497, | |
| "step": 335000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.6696347165338586e-05, | |
| "loss": 0.0503, | |
| "step": 340000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.660074984524773e-05, | |
| "loss": 0.0503, | |
| "step": 345000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.650388980179216e-05, | |
| "loss": 0.0493, | |
| "step": 350000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.6405752940483196e-05, | |
| "loss": 0.0503, | |
| "step": 355000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.6306384260551005e-05, | |
| "loss": 0.0502, | |
| "step": 360000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 4.620577006509662e-05, | |
| "loss": 0.0493, | |
| "step": 365000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.6103916235762854e-05, | |
| "loss": 0.05, | |
| "step": 370000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.600082872665831e-05, | |
| "loss": 0.0492, | |
| "step": 375000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 4.589651356400925e-05, | |
| "loss": 0.0499, | |
| "step": 380000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 4.5790976845807375e-05, | |
| "loss": 0.0492, | |
| "step": 385000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.568422474145333e-05, | |
| "loss": 0.0496, | |
| "step": 390000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.5576241769938385e-05, | |
| "loss": 0.0496, | |
| "step": 395000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 4.5467099406767963e-05, | |
| "loss": 0.0491, | |
| "step": 400000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.535671666878825e-05, | |
| "loss": 0.0492, | |
| "step": 405000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 4.52451883295289e-05, | |
| "loss": 0.049, | |
| "step": 410000 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 4.513245430925934e-05, | |
| "loss": 0.048, | |
| "step": 415000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 4.5018543398343515e-05, | |
| "loss": 0.0472, | |
| "step": 420000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 4.4903415971499975e-05, | |
| "loss": 0.0463, | |
| "step": 425000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 4.47871708602701e-05, | |
| "loss": 0.047, | |
| "step": 430000 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 4.46697926477584e-05, | |
| "loss": 0.0472, | |
| "step": 435000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 4.455119354756587e-05, | |
| "loss": 0.0466, | |
| "step": 440000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 4.443154687048235e-05, | |
| "loss": 0.047, | |
| "step": 445000 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 4.4310692318277604e-05, | |
| "loss": 0.047, | |
| "step": 450000 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 4.418870846171951e-05, | |
| "loss": 0.047, | |
| "step": 455000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 4.406562717238809e-05, | |
| "loss": 0.0462, | |
| "step": 460000 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 4.394145631643063e-05, | |
| "loss": 0.0471, | |
| "step": 465000 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 4.381615345463161e-05, | |
| "loss": 0.0475, | |
| "step": 470000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 4.368972524944734e-05, | |
| "loss": 0.0467, | |
| "step": 475000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 4.3562229672692154e-05, | |
| "loss": 0.0467, | |
| "step": 480000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 4.3433674830975235e-05, | |
| "loss": 0.0463, | |
| "step": 485000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 4.3303990737414704e-05, | |
| "loss": 0.0463, | |
| "step": 490000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 4.317326247486547e-05, | |
| "loss": 0.0466, | |
| "step": 495000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 4.304147185090266e-05, | |
| "loss": 0.0463, | |
| "step": 500000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 4.290859988496148e-05, | |
| "loss": 0.0466, | |
| "step": 505000 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 4.277470750354905e-05, | |
| "loss": 0.0461, | |
| "step": 510000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 4.263980315969459e-05, | |
| "loss": 0.0463, | |
| "step": 515000 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 4.250381343966794e-05, | |
| "loss": 0.0463, | |
| "step": 520000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 4.236680008297452e-05, | |
| "loss": 0.0467, | |
| "step": 525000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 4.222879881460605e-05, | |
| "loss": 0.046, | |
| "step": 530000 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 4.2089790395700444e-05, | |
| "loss": 0.0465, | |
| "step": 535000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 4.194978295232646e-05, | |
| "loss": 0.0471, | |
| "step": 540000 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 4.180875635941759e-05, | |
| "loss": 0.046, | |
| "step": 545000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 4.1666746777322316e-05, | |
| "loss": 0.0463, | |
| "step": 550000 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 4.1523819909804684e-05, | |
| "loss": 0.0464, | |
| "step": 555000 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 4.1379869705757123e-05, | |
| "loss": 0.0462, | |
| "step": 560000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 4.123501975940446e-05, | |
| "loss": 0.0466, | |
| "step": 565000 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 4.108916259272307e-05, | |
| "loss": 0.0462, | |
| "step": 570000 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 4.094239397826731e-05, | |
| "loss": 0.0471, | |
| "step": 575000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 4.079466376852837e-05, | |
| "loss": 0.0456, | |
| "step": 580000 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 4.064606953152425e-05, | |
| "loss": 0.0459, | |
| "step": 585000 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 4.049653102158943e-05, | |
| "loss": 0.0464, | |
| "step": 590000 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 4.034608662496395e-05, | |
| "loss": 0.0453, | |
| "step": 595000 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 4.019474513623393e-05, | |
| "loss": 0.0466, | |
| "step": 600000 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 4.004248485601213e-05, | |
| "loss": 0.046, | |
| "step": 605000 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 3.9889375601035114e-05, | |
| "loss": 0.0454, | |
| "step": 610000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 3.973536505734094e-05, | |
| "loss": 0.0456, | |
| "step": 615000 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 3.958049277800142e-05, | |
| "loss": 0.045, | |
| "step": 620000 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 3.94248302918014e-05, | |
| "loss": 0.0452, | |
| "step": 625000 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 3.9268262097860184e-05, | |
| "loss": 0.0458, | |
| "step": 630000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 3.911089105446822e-05, | |
| "loss": 0.0455, | |
| "step": 635000 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 3.895269512527556e-05, | |
| "loss": 0.0453, | |
| "step": 640000 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 3.879368355800267e-05, | |
| "loss": 0.0459, | |
| "step": 645000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 3.863386564805006e-05, | |
| "loss": 0.0458, | |
| "step": 650000 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 3.8473250737954924e-05, | |
| "loss": 0.0452, | |
| "step": 655000 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 3.831181584524374e-05, | |
| "loss": 0.046, | |
| "step": 660000 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 3.8149602467155784e-05, | |
| "loss": 0.0454, | |
| "step": 665000 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 3.7986652769317283e-05, | |
| "loss": 0.0454, | |
| "step": 670000 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 3.782291107551002e-05, | |
| "loss": 0.0455, | |
| "step": 675000 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 3.7658452467105766e-05, | |
| "loss": 0.0453, | |
| "step": 680000 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 3.7493287004972016e-05, | |
| "loss": 0.0461, | |
| "step": 685000 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 3.7327324967260834e-05, | |
| "loss": 0.045, | |
| "step": 690000 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 3.716067543266813e-05, | |
| "loss": 0.0453, | |
| "step": 695000 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 3.6993281460825346e-05, | |
| "loss": 0.0454, | |
| "step": 700000 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 3.682521980780319e-05, | |
| "loss": 0.0455, | |
| "step": 705000 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 3.665646688334596e-05, | |
| "loss": 0.0445, | |
| "step": 710000 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 3.648706652022939e-05, | |
| "loss": 0.0448, | |
| "step": 715000 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 3.631692671937868e-05, | |
| "loss": 0.0444, | |
| "step": 720000 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 3.6146125095823744e-05, | |
| "loss": 0.0449, | |
| "step": 725000 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 3.597474036228449e-05, | |
| "loss": 0.0453, | |
| "step": 730000 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 3.580264535204181e-05, | |
| "loss": 0.0447, | |
| "step": 735000 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 3.562995320943442e-05, | |
| "loss": 0.0441, | |
| "step": 740000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 3.545660492947155e-05, | |
| "loss": 0.0448, | |
| "step": 745000 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 3.52826799984853e-05, | |
| "loss": 0.0442, | |
| "step": 750000 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 3.510818894752436e-05, | |
| "loss": 0.044, | |
| "step": 755000 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 3.49330721388963e-05, | |
| "loss": 0.044, | |
| "step": 760000 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 3.4757339457867414e-05, | |
| "loss": 0.0444, | |
| "step": 765000 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 3.4581071484102234e-05, | |
| "loss": 0.0435, | |
| "step": 770000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 3.440424342594606e-05, | |
| "loss": 0.0436, | |
| "step": 775000 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 3.4226865620304894e-05, | |
| "loss": 0.0444, | |
| "step": 780000 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 3.404891278526875e-05, | |
| "loss": 0.0442, | |
| "step": 785000 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 3.387046651852148e-05, | |
| "loss": 0.0446, | |
| "step": 790000 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 3.369146584901852e-05, | |
| "loss": 0.045, | |
| "step": 795000 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 3.3511992854912975e-05, | |
| "loss": 0.044, | |
| "step": 800000 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 3.333205832954606e-05, | |
| "loss": 0.0439, | |
| "step": 805000 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 3.315152846675856e-05, | |
| "loss": 0.0447, | |
| "step": 810000 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 3.2970630450412766e-05, | |
| "loss": 0.0439, | |
| "step": 815000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 3.2789230337843214e-05, | |
| "loss": 0.0438, | |
| "step": 820000 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 3.2607374887231645e-05, | |
| "loss": 0.0414, | |
| "step": 825000 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 3.24251112477632e-05, | |
| "loss": 0.0411, | |
| "step": 830000 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 3.22423405210557e-05, | |
| "loss": 0.0409, | |
| "step": 835000 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 3.2059146256623924e-05, | |
| "loss": 0.0403, | |
| "step": 840000 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 3.187561270645646e-05, | |
| "loss": 0.0407, | |
| "step": 845000 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 3.169164053812308e-05, | |
| "loss": 0.0406, | |
| "step": 850000 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 3.1507240268131666e-05, | |
| "loss": 0.0412, | |
| "step": 855000 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 3.1322459449652304e-05, | |
| "loss": 0.0406, | |
| "step": 860000 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 3.113734596258391e-05, | |
| "loss": 0.0407, | |
| "step": 865000 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 3.095187370231551e-05, | |
| "loss": 0.0406, | |
| "step": 870000 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 3.076605351107057e-05, | |
| "loss": 0.0412, | |
| "step": 875000 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 3.057989625141167e-05, | |
| "loss": 0.0407, | |
| "step": 880000 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 3.0393412805605544e-05, | |
| "loss": 0.0414, | |
| "step": 885000 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 3.0206576669471674e-05, | |
| "loss": 0.0409, | |
| "step": 890000 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 3.0019473514002417e-05, | |
| "loss": 0.0411, | |
| "step": 895000 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 2.983207693323402e-05, | |
| "loss": 0.0405, | |
| "step": 900000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 2.9644397881880708e-05, | |
| "loss": 0.0409, | |
| "step": 905000 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 2.9456447331169147e-05, | |
| "loss": 0.0408, | |
| "step": 910000 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 2.9268236268197174e-05, | |
| "loss": 0.041, | |
| "step": 915000 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 2.90797756952915e-05, | |
| "loss": 0.0413, | |
| "step": 920000 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 2.8891038851321416e-05, | |
| "loss": 0.0412, | |
| "step": 925000 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 2.8702150101270274e-05, | |
| "loss": 0.041, | |
| "step": 930000 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 2.8512969290515646e-05, | |
| "loss": 0.0408, | |
| "step": 935000 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 2.832358303857603e-05, | |
| "loss": 0.041, | |
| "step": 940000 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 2.8134040363313168e-05, | |
| "loss": 0.0409, | |
| "step": 945000 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 2.7944276502291256e-05, | |
| "loss": 0.041, | |
| "step": 950000 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 2.775437846896827e-05, | |
| "loss": 0.04, | |
| "step": 955000 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 2.756435746426503e-05, | |
| "loss": 0.04, | |
| "step": 960000 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 2.73741104713839e-05, | |
| "loss": 0.0406, | |
| "step": 965000 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 2.718376273659552e-05, | |
| "loss": 0.0401, | |
| "step": 970000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 2.699332546709509e-05, | |
| "loss": 0.0401, | |
| "step": 975000 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 2.680269543053059e-05, | |
| "loss": 0.0391, | |
| "step": 980000 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 2.6612036299970488e-05, | |
| "loss": 0.0402, | |
| "step": 985000 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 2.6421168424917686e-05, | |
| "loss": 0.0403, | |
| "step": 990000 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 2.623033199888022e-05, | |
| "loss": 0.0401, | |
| "step": 995000 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 2.6039385468590504e-05, | |
| "loss": 0.0396, | |
| "step": 1000000 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 2.5848339956464096e-05, | |
| "loss": 0.0398, | |
| "step": 1005000 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 2.5657283063641474e-05, | |
| "loss": 0.0401, | |
| "step": 1010000 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 2.5466225985224508e-05, | |
| "loss": 0.04, | |
| "step": 1015000 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 2.5275103421570534e-05, | |
| "loss": 0.0398, | |
| "step": 1020000 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 2.5083926532199688e-05, | |
| "loss": 0.0398, | |
| "step": 1025000 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 2.4892821222247636e-05, | |
| "loss": 0.04, | |
| "step": 1030000 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 2.4701607451032485e-05, | |
| "loss": 0.0402, | |
| "step": 1035000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 2.4510525847347732e-05, | |
| "loss": 0.0398, | |
| "step": 1040000 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 2.4319434615660547e-05, | |
| "loss": 0.0404, | |
| "step": 1045000 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 2.4128383168046513e-05, | |
| "loss": 0.0402, | |
| "step": 1050000 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 2.3937306253792384e-05, | |
| "loss": 0.0397, | |
| "step": 1055000 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 2.3746367903542062e-05, | |
| "loss": 0.0402, | |
| "step": 1060000 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 2.355550283734671e-05, | |
| "loss": 0.0393, | |
| "step": 1065000 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 2.3364684050478952e-05, | |
| "loss": 0.0397, | |
| "step": 1070000 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 2.3173999040117696e-05, | |
| "loss": 0.0394, | |
| "step": 1075000 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 2.29834207729988e-05, | |
| "loss": 0.039, | |
| "step": 1080000 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 2.2792922295048335e-05, | |
| "loss": 0.0393, | |
| "step": 1085000 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 2.2602629024425966e-05, | |
| "loss": 0.039, | |
| "step": 1090000 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 2.2412437803062146e-05, | |
| "loss": 0.0394, | |
| "step": 1095000 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 2.2222359836495102e-05, | |
| "loss": 0.0389, | |
| "step": 1100000 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 2.2032520255937674e-05, | |
| "loss": 0.0392, | |
| "step": 1105000 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 2.1842816139163587e-05, | |
| "loss": 0.039, | |
| "step": 1110000 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 2.1653258683231724e-05, | |
| "loss": 0.0392, | |
| "step": 1115000 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 2.1463934806956023e-05, | |
| "loss": 0.039, | |
| "step": 1120000 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 2.1274817639807107e-05, | |
| "loss": 0.0393, | |
| "step": 1125000 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 2.1085918237080158e-05, | |
| "loss": 0.0393, | |
| "step": 1130000 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 2.0897285366938386e-05, | |
| "loss": 0.0389, | |
| "step": 1135000 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 2.0708816881777654e-05, | |
| "loss": 0.0386, | |
| "step": 1140000 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 2.052059934835373e-05, | |
| "loss": 0.0387, | |
| "step": 1145000 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 2.0332718917197323e-05, | |
| "loss": 0.0387, | |
| "step": 1150000 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 2.0145036181854185e-05, | |
| "loss": 0.0386, | |
| "step": 1155000 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 1.9957712284869015e-05, | |
| "loss": 0.0388, | |
| "step": 1160000 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 1.9770645630812195e-05, | |
| "loss": 0.0385, | |
| "step": 1165000 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 1.958384733531826e-05, | |
| "loss": 0.0387, | |
| "step": 1170000 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 1.939744032341169e-05, | |
| "loss": 0.038, | |
| "step": 1175000 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 1.921132348667458e-05, | |
| "loss": 0.0389, | |
| "step": 1180000 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 1.9025545040833008e-05, | |
| "loss": 0.0388, | |
| "step": 1185000 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 1.8840078780999552e-05, | |
| "loss": 0.0383, | |
| "step": 1190000 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.8655009750029695e-05, | |
| "loss": 0.0388, | |
| "step": 1195000 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 1.8470311630602035e-05, | |
| "loss": 0.0379, | |
| "step": 1200000 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 1.8285995219685757e-05, | |
| "loss": 0.0378, | |
| "step": 1205000 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 1.8102071291936395e-05, | |
| "loss": 0.0381, | |
| "step": 1210000 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 1.7918587276844793e-05, | |
| "loss": 0.0382, | |
| "step": 1215000 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 1.7735443869214267e-05, | |
| "loss": 0.0385, | |
| "step": 1220000 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 1.7552725298494208e-05, | |
| "loss": 0.038, | |
| "step": 1225000 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 1.7370515089505386e-05, | |
| "loss": 0.0381, | |
| "step": 1230000 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 1.718875070180597e-05, | |
| "loss": 0.0362, | |
| "step": 1235000 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 1.7007370282692398e-05, | |
| "loss": 0.0339, | |
| "step": 1240000 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 1.682649342119219e-05, | |
| "loss": 0.0349, | |
| "step": 1245000 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 1.664609436151844e-05, | |
| "loss": 0.0345, | |
| "step": 1250000 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 1.6466183649328544e-05, | |
| "loss": 0.0348, | |
| "step": 1255000 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 1.628673595562077e-05, | |
| "loss": 0.0346, | |
| "step": 1260000 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 1.6107833563524666e-05, | |
| "loss": 0.0348, | |
| "step": 1265000 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 1.5929450984231475e-05, | |
| "loss": 0.0341, | |
| "step": 1270000 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 1.575159864552045e-05, | |
| "loss": 0.0346, | |
| "step": 1275000 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 1.5574286944174337e-05, | |
| "loss": 0.0348, | |
| "step": 1280000 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 1.5397526245371656e-05, | |
| "loss": 0.0347, | |
| "step": 1285000 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 1.5221362079222911e-05, | |
| "loss": 0.0346, | |
| "step": 1290000 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 1.5045699154455748e-05, | |
| "loss": 0.035, | |
| "step": 1295000 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 1.4870653329234462e-05, | |
| "loss": 0.0343, | |
| "step": 1300000 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 1.469619963913822e-05, | |
| "loss": 0.0344, | |
| "step": 1305000 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 1.4522348282273651e-05, | |
| "loss": 0.0346, | |
| "step": 1310000 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 1.434910942153659e-05, | |
| "loss": 0.0341, | |
| "step": 1315000 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 1.4176493184017924e-05, | |
| "loss": 0.0338, | |
| "step": 1320000 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 1.4004509660411627e-05, | |
| "loss": 0.0345, | |
| "step": 1325000 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 1.3833134687545127e-05, | |
| "loss": 0.0343, | |
| "step": 1330000 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 1.3662480932190311e-05, | |
| "loss": 0.0341, | |
| "step": 1335000 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 1.3492421770010699e-05, | |
| "loss": 0.0336, | |
| "step": 1340000 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 1.3323069396041015e-05, | |
| "loss": 0.0343, | |
| "step": 1345000 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 1.3154399624907232e-05, | |
| "loss": 0.0342, | |
| "step": 1350000 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "learning_rate": 1.2986422316603203e-05, | |
| "loss": 0.0346, | |
| "step": 1355000 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 1.2819147290643238e-05, | |
| "loss": 0.0341, | |
| "step": 1360000 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "learning_rate": 1.2652584325488027e-05, | |
| "loss": 0.0339, | |
| "step": 1365000 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "learning_rate": 1.2486743157973069e-05, | |
| "loss": 0.0342, | |
| "step": 1370000 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 1.2321666444080471e-05, | |
| "loss": 0.0344, | |
| "step": 1375000 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 1.2157264951667166e-05, | |
| "loss": 0.0338, | |
| "step": 1380000 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "learning_rate": 1.1993647173310798e-05, | |
| "loss": 0.0338, | |
| "step": 1385000 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 1.183078971233793e-05, | |
| "loss": 0.0334, | |
| "step": 1390000 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 1.1668669736135962e-05, | |
| "loss": 0.0347, | |
| "step": 1395000 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 1.1507361582461623e-05, | |
| "loss": 0.034, | |
| "step": 1400000 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "learning_rate": 1.134684217315512e-05, | |
| "loss": 0.0332, | |
| "step": 1405000 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 1.1187089015206759e-05, | |
| "loss": 0.0336, | |
| "step": 1410000 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 1.1028175361103207e-05, | |
| "loss": 0.0332, | |
| "step": 1415000 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 1.0870078463341248e-05, | |
| "loss": 0.0336, | |
| "step": 1420000 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 1.0712838947388687e-05, | |
| "loss": 0.0334, | |
| "step": 1425000 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 1.0556371856281719e-05, | |
| "loss": 0.0338, | |
| "step": 1430000 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "learning_rate": 1.0400780485452265e-05, | |
| "loss": 0.0337, | |
| "step": 1435000 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 1.0246042546828628e-05, | |
| "loss": 0.0335, | |
| "step": 1440000 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 1.009216708598616e-05, | |
| "loss": 0.0332, | |
| "step": 1445000 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "learning_rate": 9.939163098082024e-06, | |
| "loss": 0.0324, | |
| "step": 1450000 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 9.787039527329362e-06, | |
| "loss": 0.0333, | |
| "step": 1455000 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 9.635805266474399e-06, | |
| "loss": 0.0333, | |
| "step": 1460000 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 9.485439167479077e-06, | |
| "loss": 0.0335, | |
| "step": 1465000 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 9.335980372105996e-06, | |
| "loss": 0.0333, | |
| "step": 1470000 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 9.187496865476697e-06, | |
| "loss": 0.033, | |
| "step": 1475000 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 9.03987847348179e-06, | |
| "loss": 0.033, | |
| "step": 1480000 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 8.893222690812272e-06, | |
| "loss": 0.033, | |
| "step": 1485000 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 8.747537527998633e-06, | |
| "loss": 0.0331, | |
| "step": 1490000 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 8.602773192648179e-06, | |
| "loss": 0.0329, | |
| "step": 1495000 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 8.45893872011418e-06, | |
| "loss": 0.0329, | |
| "step": 1500000 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 8.316100063601678e-06, | |
| "loss": 0.0323, | |
| "step": 1505000 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "learning_rate": 8.174208418379433e-06, | |
| "loss": 0.0329, | |
| "step": 1510000 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "learning_rate": 8.03332884679727e-06, | |
| "loss": 0.033, | |
| "step": 1515000 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "learning_rate": 7.893441102454437e-06, | |
| "loss": 0.0328, | |
| "step": 1520000 | |
| } | |
| ], | |
| "logging_steps": 5000, | |
| "max_steps": 2053645, | |
| "num_train_epochs": 5, | |
| "save_steps": 40000, | |
| "total_flos": 3.8313184144529346e+20, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |