| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.0, | |
| "eval_steps": 500, | |
| "global_step": 570, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.08771929824561403, | |
| "grad_norm": 0.2171279639005661, | |
| "learning_rate": 1.3877192982456139e-05, | |
| "loss": 2.3087, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.17543859649122806, | |
| "grad_norm": 0.21565403044223785, | |
| "learning_rate": 1.375438596491228e-05, | |
| "loss": 2.2512, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.2631578947368421, | |
| "grad_norm": 0.24440069496631622, | |
| "learning_rate": 1.3631578947368421e-05, | |
| "loss": 2.1928, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.3508771929824561, | |
| "grad_norm": 0.24185337126255035, | |
| "learning_rate": 1.350877192982456e-05, | |
| "loss": 2.0981, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.43859649122807015, | |
| "grad_norm": 0.26988154649734497, | |
| "learning_rate": 1.3385964912280702e-05, | |
| "loss": 2.0544, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.5263157894736842, | |
| "grad_norm": 0.2789638042449951, | |
| "learning_rate": 1.3263157894736841e-05, | |
| "loss": 1.9431, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.6140350877192983, | |
| "grad_norm": 0.32065337896347046, | |
| "learning_rate": 1.3140350877192982e-05, | |
| "loss": 1.9116, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.7017543859649122, | |
| "grad_norm": 0.34021827578544617, | |
| "learning_rate": 1.3017543859649123e-05, | |
| "loss": 1.8307, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.7894736842105263, | |
| "grad_norm": 0.3664143681526184, | |
| "learning_rate": 1.2894736842105262e-05, | |
| "loss": 1.6933, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.8771929824561403, | |
| "grad_norm": 0.33720481395721436, | |
| "learning_rate": 1.2771929824561402e-05, | |
| "loss": 1.5989, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.9649122807017544, | |
| "grad_norm": 0.3138943910598755, | |
| "learning_rate": 1.2649122807017545e-05, | |
| "loss": 1.5041, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 1.0526315789473684, | |
| "grad_norm": 0.31080183386802673, | |
| "learning_rate": 1.2526315789473684e-05, | |
| "loss": 1.4221, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.1403508771929824, | |
| "grad_norm": 0.26811471581459045, | |
| "learning_rate": 1.2403508771929823e-05, | |
| "loss": 1.3467, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 1.2280701754385965, | |
| "grad_norm": 0.25024041533470154, | |
| "learning_rate": 1.2280701754385964e-05, | |
| "loss": 1.2758, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.3157894736842106, | |
| "grad_norm": 0.2651083171367645, | |
| "learning_rate": 1.2157894736842105e-05, | |
| "loss": 1.2279, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 1.4035087719298245, | |
| "grad_norm": 0.25894564390182495, | |
| "learning_rate": 1.2035087719298245e-05, | |
| "loss": 1.173, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.4912280701754386, | |
| "grad_norm": 0.2526499032974243, | |
| "learning_rate": 1.1912280701754386e-05, | |
| "loss": 1.1539, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 1.5789473684210527, | |
| "grad_norm": 0.22998183965682983, | |
| "learning_rate": 1.1789473684210525e-05, | |
| "loss": 1.0704, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 0.23367486894130707, | |
| "learning_rate": 1.1666666666666666e-05, | |
| "loss": 1.0527, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 1.7543859649122808, | |
| "grad_norm": 0.20629191398620605, | |
| "learning_rate": 1.1543859649122807e-05, | |
| "loss": 1.0167, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.8421052631578947, | |
| "grad_norm": 0.19482989609241486, | |
| "learning_rate": 1.1421052631578947e-05, | |
| "loss": 1.0097, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 1.9298245614035088, | |
| "grad_norm": 0.19629421830177307, | |
| "learning_rate": 1.1298245614035088e-05, | |
| "loss": 0.9724, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 2.017543859649123, | |
| "grad_norm": 0.16687360405921936, | |
| "learning_rate": 1.1175438596491229e-05, | |
| "loss": 0.9819, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 2.1052631578947367, | |
| "grad_norm": 0.17633700370788574, | |
| "learning_rate": 1.1052631578947368e-05, | |
| "loss": 0.9482, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.192982456140351, | |
| "grad_norm": 0.18230170011520386, | |
| "learning_rate": 1.0929824561403509e-05, | |
| "loss": 0.9359, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 2.280701754385965, | |
| "grad_norm": 0.1811186671257019, | |
| "learning_rate": 1.0807017543859648e-05, | |
| "loss": 0.9335, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 2.3684210526315788, | |
| "grad_norm": 0.204745814204216, | |
| "learning_rate": 1.068421052631579e-05, | |
| "loss": 0.9152, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 2.456140350877193, | |
| "grad_norm": 0.20484690368175507, | |
| "learning_rate": 1.056140350877193e-05, | |
| "loss": 0.9334, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.543859649122807, | |
| "grad_norm": 0.1847849041223526, | |
| "learning_rate": 1.043859649122807e-05, | |
| "loss": 0.9078, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 2.6315789473684212, | |
| "grad_norm": 0.19749857485294342, | |
| "learning_rate": 1.031578947368421e-05, | |
| "loss": 0.8918, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.719298245614035, | |
| "grad_norm": 0.22192838788032532, | |
| "learning_rate": 1.0192982456140352e-05, | |
| "loss": 0.9108, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 2.807017543859649, | |
| "grad_norm": 0.22351142764091492, | |
| "learning_rate": 1.0070175438596491e-05, | |
| "loss": 0.9176, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.8947368421052633, | |
| "grad_norm": 0.23690944910049438, | |
| "learning_rate": 9.94736842105263e-06, | |
| "loss": 0.8665, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 2.982456140350877, | |
| "grad_norm": 0.2534187436103821, | |
| "learning_rate": 9.824561403508772e-06, | |
| "loss": 0.9237, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 3.0701754385964914, | |
| "grad_norm": 0.30020347237586975, | |
| "learning_rate": 9.701754385964913e-06, | |
| "loss": 0.8558, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 3.1578947368421053, | |
| "grad_norm": 0.28877881169319153, | |
| "learning_rate": 9.578947368421052e-06, | |
| "loss": 0.8699, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 3.245614035087719, | |
| "grad_norm": 0.3179704546928406, | |
| "learning_rate": 9.456140350877193e-06, | |
| "loss": 0.8773, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 3.3333333333333335, | |
| "grad_norm": 0.36217424273490906, | |
| "learning_rate": 9.333333333333333e-06, | |
| "loss": 0.8581, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 3.4210526315789473, | |
| "grad_norm": 0.45486509799957275, | |
| "learning_rate": 9.210526315789474e-06, | |
| "loss": 0.8315, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 3.5087719298245617, | |
| "grad_norm": 0.6085723042488098, | |
| "learning_rate": 9.087719298245615e-06, | |
| "loss": 0.8414, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 3.5964912280701755, | |
| "grad_norm": 0.7863444685935974, | |
| "learning_rate": 8.964912280701754e-06, | |
| "loss": 0.8202, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 3.6842105263157894, | |
| "grad_norm": 0.32649222016334534, | |
| "learning_rate": 8.842105263157893e-06, | |
| "loss": 0.8005, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 3.7719298245614032, | |
| "grad_norm": 0.215932235121727, | |
| "learning_rate": 8.719298245614036e-06, | |
| "loss": 0.7807, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 3.8596491228070176, | |
| "grad_norm": 0.19240467250347137, | |
| "learning_rate": 8.596491228070176e-06, | |
| "loss": 0.788, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 3.9473684210526314, | |
| "grad_norm": 0.19347311556339264, | |
| "learning_rate": 8.473684210526315e-06, | |
| "loss": 0.8133, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 4.035087719298246, | |
| "grad_norm": 0.1866816133260727, | |
| "learning_rate": 8.350877192982456e-06, | |
| "loss": 0.8018, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 4.12280701754386, | |
| "grad_norm": 0.22412796318531036, | |
| "learning_rate": 8.228070175438597e-06, | |
| "loss": 0.7928, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 4.2105263157894735, | |
| "grad_norm": 0.19428610801696777, | |
| "learning_rate": 8.105263157894736e-06, | |
| "loss": 0.8002, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 4.298245614035087, | |
| "grad_norm": 0.18602155148983002, | |
| "learning_rate": 7.982456140350877e-06, | |
| "loss": 0.7937, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 4.385964912280702, | |
| "grad_norm": 0.18411681056022644, | |
| "learning_rate": 7.859649122807017e-06, | |
| "loss": 0.7788, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 4.473684210526316, | |
| "grad_norm": 0.19007687270641327, | |
| "learning_rate": 7.736842105263158e-06, | |
| "loss": 0.7814, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 4.56140350877193, | |
| "grad_norm": 0.19529704749584198, | |
| "learning_rate": 7.614035087719299e-06, | |
| "loss": 0.766, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 4.649122807017544, | |
| "grad_norm": 0.2058393806219101, | |
| "learning_rate": 7.491228070175438e-06, | |
| "loss": 0.7654, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 4.7368421052631575, | |
| "grad_norm": 0.2282346487045288, | |
| "learning_rate": 7.3684210526315784e-06, | |
| "loss": 0.7751, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 4.824561403508772, | |
| "grad_norm": 0.20693573355674744, | |
| "learning_rate": 7.24561403508772e-06, | |
| "loss": 0.773, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 4.912280701754386, | |
| "grad_norm": 0.2019735425710678, | |
| "learning_rate": 7.12280701754386e-06, | |
| "loss": 0.7645, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.27219313383102417, | |
| "learning_rate": 7e-06, | |
| "loss": 0.7646, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 5.087719298245614, | |
| "grad_norm": 0.19285519421100616, | |
| "learning_rate": 6.87719298245614e-06, | |
| "loss": 0.7764, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 5.175438596491228, | |
| "grad_norm": 0.22160640358924866, | |
| "learning_rate": 6.75438596491228e-06, | |
| "loss": 0.7828, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 5.2631578947368425, | |
| "grad_norm": 0.2303527593612671, | |
| "learning_rate": 6.6315789473684205e-06, | |
| "loss": 0.7415, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 5.350877192982456, | |
| "grad_norm": 0.19970931112766266, | |
| "learning_rate": 6.5087719298245616e-06, | |
| "loss": 0.7689, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 5.43859649122807, | |
| "grad_norm": 0.21704891324043274, | |
| "learning_rate": 6.385964912280701e-06, | |
| "loss": 0.7641, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 5.526315789473684, | |
| "grad_norm": 0.2142358273267746, | |
| "learning_rate": 6.263157894736842e-06, | |
| "loss": 0.7555, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 5.614035087719298, | |
| "grad_norm": 0.21201816201210022, | |
| "learning_rate": 6.140350877192982e-06, | |
| "loss": 0.7397, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 5.701754385964913, | |
| "grad_norm": 0.2286573052406311, | |
| "learning_rate": 6.017543859649122e-06, | |
| "loss": 0.7548, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 5.7894736842105265, | |
| "grad_norm": 0.2286461740732193, | |
| "learning_rate": 5.894736842105263e-06, | |
| "loss": 0.7421, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 5.87719298245614, | |
| "grad_norm": 0.22108690440654755, | |
| "learning_rate": 5.771929824561404e-06, | |
| "loss": 0.7728, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 5.964912280701754, | |
| "grad_norm": 0.21524551510810852, | |
| "learning_rate": 5.649122807017544e-06, | |
| "loss": 0.7384, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 6.052631578947368, | |
| "grad_norm": 0.2278275340795517, | |
| "learning_rate": 5.526315789473684e-06, | |
| "loss": 0.7536, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 6.140350877192983, | |
| "grad_norm": 0.2368898093700409, | |
| "learning_rate": 5.403508771929824e-06, | |
| "loss": 0.7463, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 6.228070175438597, | |
| "grad_norm": 0.23636922240257263, | |
| "learning_rate": 5.280701754385965e-06, | |
| "loss": 0.7329, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 6.315789473684211, | |
| "grad_norm": 0.21842700242996216, | |
| "learning_rate": 5.157894736842105e-06, | |
| "loss": 0.7334, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 6.4035087719298245, | |
| "grad_norm": 0.24194689095020294, | |
| "learning_rate": 5.035087719298246e-06, | |
| "loss": 0.7589, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 6.491228070175438, | |
| "grad_norm": 0.2541049122810364, | |
| "learning_rate": 4.912280701754386e-06, | |
| "loss": 0.7506, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 6.578947368421053, | |
| "grad_norm": 0.24515019357204437, | |
| "learning_rate": 4.789473684210526e-06, | |
| "loss": 0.7553, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 6.666666666666667, | |
| "grad_norm": 0.24450013041496277, | |
| "learning_rate": 4.666666666666666e-06, | |
| "loss": 0.7518, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 6.754385964912281, | |
| "grad_norm": 0.2295515537261963, | |
| "learning_rate": 4.543859649122807e-06, | |
| "loss": 0.7316, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 6.842105263157895, | |
| "grad_norm": 0.23279713094234467, | |
| "learning_rate": 4.421052631578947e-06, | |
| "loss": 0.7294, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 6.9298245614035086, | |
| "grad_norm": 0.24888701736927032, | |
| "learning_rate": 4.298245614035088e-06, | |
| "loss": 0.731, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 7.017543859649122, | |
| "grad_norm": 0.21917015314102173, | |
| "learning_rate": 4.175438596491228e-06, | |
| "loss": 0.7233, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 7.105263157894737, | |
| "grad_norm": 0.2383662313222885, | |
| "learning_rate": 4.052631578947368e-06, | |
| "loss": 0.7471, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 7.192982456140351, | |
| "grad_norm": 0.2388984113931656, | |
| "learning_rate": 3.929824561403508e-06, | |
| "loss": 0.7314, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 7.280701754385965, | |
| "grad_norm": 0.23913495242595673, | |
| "learning_rate": 3.8070175438596494e-06, | |
| "loss": 0.7343, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 7.368421052631579, | |
| "grad_norm": 0.27001267671585083, | |
| "learning_rate": 3.6842105263157892e-06, | |
| "loss": 0.7352, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 7.456140350877193, | |
| "grad_norm": 0.25270870327949524, | |
| "learning_rate": 3.56140350877193e-06, | |
| "loss": 0.7189, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 7.543859649122807, | |
| "grad_norm": 0.2609616816043854, | |
| "learning_rate": 3.43859649122807e-06, | |
| "loss": 0.708, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 7.631578947368421, | |
| "grad_norm": 0.25190067291259766, | |
| "learning_rate": 3.3157894736842103e-06, | |
| "loss": 0.7236, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 7.719298245614035, | |
| "grad_norm": 0.26472926139831543, | |
| "learning_rate": 3.1929824561403505e-06, | |
| "loss": 0.742, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 7.807017543859649, | |
| "grad_norm": 0.2787232995033264, | |
| "learning_rate": 3.070175438596491e-06, | |
| "loss": 0.7349, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 7.894736842105263, | |
| "grad_norm": 0.24300773441791534, | |
| "learning_rate": 2.9473684210526313e-06, | |
| "loss": 0.7234, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 7.982456140350877, | |
| "grad_norm": 0.27598220109939575, | |
| "learning_rate": 2.824561403508772e-06, | |
| "loss": 0.7296, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 8.070175438596491, | |
| "grad_norm": 0.2716473937034607, | |
| "learning_rate": 2.701754385964912e-06, | |
| "loss": 0.7042, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 8.157894736842104, | |
| "grad_norm": 0.26843708753585815, | |
| "learning_rate": 2.5789473684210523e-06, | |
| "loss": 0.7237, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 8.24561403508772, | |
| "grad_norm": 0.271550714969635, | |
| "learning_rate": 2.456140350877193e-06, | |
| "loss": 0.7153, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 8.333333333333334, | |
| "grad_norm": 0.24845653772354126, | |
| "learning_rate": 2.333333333333333e-06, | |
| "loss": 0.7385, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 8.421052631578947, | |
| "grad_norm": 0.30234792828559875, | |
| "learning_rate": 2.2105263157894734e-06, | |
| "loss": 0.7487, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 8.508771929824562, | |
| "grad_norm": 0.27301862835884094, | |
| "learning_rate": 2.087719298245614e-06, | |
| "loss": 0.7316, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 8.596491228070175, | |
| "grad_norm": 0.2653884291648865, | |
| "learning_rate": 1.964912280701754e-06, | |
| "loss": 0.719, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 8.68421052631579, | |
| "grad_norm": 0.2817953824996948, | |
| "learning_rate": 1.8421052631578946e-06, | |
| "loss": 0.7163, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 8.771929824561404, | |
| "grad_norm": 0.2780102491378784, | |
| "learning_rate": 1.719298245614035e-06, | |
| "loss": 0.7183, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 8.859649122807017, | |
| "grad_norm": 0.25589415431022644, | |
| "learning_rate": 1.5964912280701752e-06, | |
| "loss": 0.7127, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 8.947368421052632, | |
| "grad_norm": 0.25434452295303345, | |
| "learning_rate": 1.4736842105263156e-06, | |
| "loss": 0.6984, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 9.035087719298245, | |
| "grad_norm": 0.26028621196746826, | |
| "learning_rate": 1.350877192982456e-06, | |
| "loss": 0.723, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 9.12280701754386, | |
| "grad_norm": 0.2656114101409912, | |
| "learning_rate": 1.2280701754385965e-06, | |
| "loss": 0.7102, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 9.210526315789474, | |
| "grad_norm": 0.25901249051094055, | |
| "learning_rate": 1.1052631578947367e-06, | |
| "loss": 0.7161, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 9.298245614035087, | |
| "grad_norm": 0.27115657925605774, | |
| "learning_rate": 9.82456140350877e-07, | |
| "loss": 0.7114, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 9.385964912280702, | |
| "grad_norm": 0.28459489345550537, | |
| "learning_rate": 8.596491228070175e-07, | |
| "loss": 0.7148, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 9.473684210526315, | |
| "grad_norm": 0.2780962884426117, | |
| "learning_rate": 7.368421052631578e-07, | |
| "loss": 0.7157, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 9.56140350877193, | |
| "grad_norm": 0.29147860407829285, | |
| "learning_rate": 6.140350877192982e-07, | |
| "loss": 0.7133, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 9.649122807017545, | |
| "grad_norm": 0.2604060769081116, | |
| "learning_rate": 4.912280701754385e-07, | |
| "loss": 0.7097, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 9.736842105263158, | |
| "grad_norm": 0.279658704996109, | |
| "learning_rate": 3.684210526315789e-07, | |
| "loss": 0.7337, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 9.824561403508772, | |
| "grad_norm": 0.2638838589191437, | |
| "learning_rate": 2.456140350877193e-07, | |
| "loss": 0.7149, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 9.912280701754385, | |
| "grad_norm": 0.2900523245334625, | |
| "learning_rate": 1.2280701754385964e-07, | |
| "loss": 0.7002, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 0.3178514838218689, | |
| "learning_rate": 0.0, | |
| "loss": 0.7067, | |
| "step": 570 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 570, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.1446214105760768e+17, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |