| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9980582524271844, |
| "eval_steps": 500, |
| "global_step": 1544, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.012944983818770227, |
| "grad_norm": 15.61694845352194, |
| "learning_rate": 5.806451612903227e-07, |
| "loss": 0.9285, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.025889967637540454, |
| "grad_norm": 6.163649334499313, |
| "learning_rate": 1.2258064516129033e-06, |
| "loss": 0.8684, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.038834951456310676, |
| "grad_norm": 1.7865521352808105, |
| "learning_rate": 1.870967741935484e-06, |
| "loss": 0.6933, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.05177993527508091, |
| "grad_norm": 1.095428485943534, |
| "learning_rate": 2.5161290322580645e-06, |
| "loss": 0.6237, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.06472491909385113, |
| "grad_norm": 0.7674516500866138, |
| "learning_rate": 3.1612903225806453e-06, |
| "loss": 0.5203, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.07766990291262135, |
| "grad_norm": 0.9630457689752736, |
| "learning_rate": 3.8064516129032257e-06, |
| "loss": 0.4886, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.09061488673139159, |
| "grad_norm": 0.7702852585822165, |
| "learning_rate": 4.451612903225807e-06, |
| "loss": 0.4664, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.10355987055016182, |
| "grad_norm": 1.1750168365562916, |
| "learning_rate": 5.096774193548387e-06, |
| "loss": 0.4775, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.11650485436893204, |
| "grad_norm": 0.9263880445931769, |
| "learning_rate": 5.7419354838709685e-06, |
| "loss": 0.4551, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.12944983818770225, |
| "grad_norm": 0.7714118477811059, |
| "learning_rate": 6.3870967741935485e-06, |
| "loss": 0.466, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.1423948220064725, |
| "grad_norm": 1.1866615979991424, |
| "learning_rate": 7.03225806451613e-06, |
| "loss": 0.4584, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.1553398058252427, |
| "grad_norm": 0.9212312828699815, |
| "learning_rate": 7.67741935483871e-06, |
| "loss": 0.438, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.16828478964401294, |
| "grad_norm": 0.7898902772163053, |
| "learning_rate": 8.322580645161292e-06, |
| "loss": 0.444, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.18122977346278318, |
| "grad_norm": 0.6528162403920257, |
| "learning_rate": 8.967741935483871e-06, |
| "loss": 0.4084, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.1941747572815534, |
| "grad_norm": 1.1405911116056249, |
| "learning_rate": 9.612903225806453e-06, |
| "loss": 0.4399, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.20711974110032363, |
| "grad_norm": 0.7528331070534768, |
| "learning_rate": 9.999795378019876e-06, |
| "loss": 0.4147, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.22006472491909385, |
| "grad_norm": 0.7661476236923112, |
| "learning_rate": 9.997493573080172e-06, |
| "loss": 0.4423, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.23300970873786409, |
| "grad_norm": 1.1211801603378222, |
| "learning_rate": 9.992635367108505e-06, |
| "loss": 0.4239, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.2459546925566343, |
| "grad_norm": 0.7140655546022724, |
| "learning_rate": 9.985223245255192e-06, |
| "loss": 0.4311, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.2588996763754045, |
| "grad_norm": 0.6871932870114175, |
| "learning_rate": 9.975260999092083e-06, |
| "loss": 0.407, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.27184466019417475, |
| "grad_norm": 0.7810632722631204, |
| "learning_rate": 9.962753724673033e-06, |
| "loss": 0.4478, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.284789644012945, |
| "grad_norm": 0.7661773243067362, |
| "learning_rate": 9.947707819927075e-06, |
| "loss": 0.4204, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.2977346278317152, |
| "grad_norm": 1.0384473089256558, |
| "learning_rate": 9.930130981385653e-06, |
| "loss": 0.4333, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.3106796116504854, |
| "grad_norm": 0.6985582769526657, |
| "learning_rate": 9.910032200245559e-06, |
| "loss": 0.4259, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.32362459546925565, |
| "grad_norm": 0.699141638693493, |
| "learning_rate": 9.887421757769609e-06, |
| "loss": 0.4174, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.3365695792880259, |
| "grad_norm": 0.8592644909419284, |
| "learning_rate": 9.8623112200274e-06, |
| "loss": 0.4185, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.34951456310679613, |
| "grad_norm": 0.9421591672819436, |
| "learning_rate": 9.834713431978842e-06, |
| "loss": 0.4173, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.36245954692556637, |
| "grad_norm": 0.6685952560318938, |
| "learning_rate": 9.804642510903493e-06, |
| "loss": 0.3976, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.37540453074433655, |
| "grad_norm": 0.8540438332778423, |
| "learning_rate": 9.772113839179044e-06, |
| "loss": 0.4436, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.3883495145631068, |
| "grad_norm": 0.6783882339082041, |
| "learning_rate": 9.737144056412685e-06, |
| "loss": 0.4096, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.40129449838187703, |
| "grad_norm": 0.5523821720740434, |
| "learning_rate": 9.699751050929315e-06, |
| "loss": 0.4059, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.41423948220064727, |
| "grad_norm": 0.9452782745394067, |
| "learning_rate": 9.659953950621035e-06, |
| "loss": 0.4302, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.42718446601941745, |
| "grad_norm": 1.02042513260203, |
| "learning_rate": 9.617773113162505e-06, |
| "loss": 0.4451, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.4401294498381877, |
| "grad_norm": 0.8067558001024061, |
| "learning_rate": 9.573230115597263e-06, |
| "loss": 0.4007, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.45307443365695793, |
| "grad_norm": 0.6504637532458151, |
| "learning_rate": 9.526347743300276e-06, |
| "loss": 0.424, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.46601941747572817, |
| "grad_norm": 1.0298423934327736, |
| "learning_rate": 9.477149978322378e-06, |
| "loss": 0.4436, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.47896440129449835, |
| "grad_norm": 0.6601967120480519, |
| "learning_rate": 9.425661987122578e-06, |
| "loss": 0.4136, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.4919093851132686, |
| "grad_norm": 0.8128803380459234, |
| "learning_rate": 9.371910107694496e-06, |
| "loss": 0.4261, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.5048543689320388, |
| "grad_norm": 0.8827157163715996, |
| "learning_rate": 9.31592183609351e-06, |
| "loss": 0.4331, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.517799352750809, |
| "grad_norm": 0.6984667657039074, |
| "learning_rate": 9.257725812371522e-06, |
| "loss": 0.3943, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.5307443365695793, |
| "grad_norm": 24.36049975240688, |
| "learning_rate": 9.197351805926512e-06, |
| "loss": 0.4363, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.5436893203883495, |
| "grad_norm": 0.6800627472206026, |
| "learning_rate": 9.134830700274413e-06, |
| "loss": 0.4246, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.5566343042071198, |
| "grad_norm": 0.776400848559734, |
| "learning_rate": 9.070194477251051e-06, |
| "loss": 0.4308, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.56957928802589, |
| "grad_norm": 0.5674264388664758, |
| "learning_rate": 9.003476200652277e-06, |
| "loss": 0.4219, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.5825242718446602, |
| "grad_norm": 0.5410227713260688, |
| "learning_rate": 8.934709999320605e-06, |
| "loss": 0.4292, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.5954692556634305, |
| "grad_norm": 0.6340905923957859, |
| "learning_rate": 8.86393104968708e-06, |
| "loss": 0.4253, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.6084142394822006, |
| "grad_norm": 0.8726005993599312, |
| "learning_rate": 8.791175557777225e-06, |
| "loss": 0.4117, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.6213592233009708, |
| "grad_norm": 0.5997403366828548, |
| "learning_rate": 8.716480740690353e-06, |
| "loss": 0.4303, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.6343042071197411, |
| "grad_norm": 0.8012759255790656, |
| "learning_rate": 8.63988480756164e-06, |
| "loss": 0.4049, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.6472491909385113, |
| "grad_norm": 0.7967092349686535, |
| "learning_rate": 8.56142694001676e-06, |
| "loss": 0.405, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.6601941747572816, |
| "grad_norm": 0.6625255669227388, |
| "learning_rate": 8.48114727212905e-06, |
| "loss": 0.4116, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.6731391585760518, |
| "grad_norm": 0.6129524699744963, |
| "learning_rate": 8.399086869889456e-06, |
| "loss": 0.4077, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.686084142394822, |
| "grad_norm": 0.709844580818479, |
| "learning_rate": 8.315287710199782e-06, |
| "loss": 0.3928, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.6990291262135923, |
| "grad_norm": 0.7518034017239651, |
| "learning_rate": 8.229792659399957e-06, |
| "loss": 0.4132, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.7119741100323624, |
| "grad_norm": 1.0217091942914303, |
| "learning_rate": 8.14264545134034e-06, |
| "loss": 0.4059, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.7249190938511327, |
| "grad_norm": 0.6324802279555936, |
| "learning_rate": 8.053890665010253e-06, |
| "loss": 0.3985, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.7378640776699029, |
| "grad_norm": 0.8320222421244153, |
| "learning_rate": 7.963573701734185e-06, |
| "loss": 0.4089, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.7508090614886731, |
| "grad_norm": 0.8470009426231043, |
| "learning_rate": 7.871740761947367e-06, |
| "loss": 0.4059, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.7637540453074434, |
| "grad_norm": 0.6733069825526147, |
| "learning_rate": 7.778438821562543e-06, |
| "loss": 0.4181, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.7766990291262136, |
| "grad_norm": 0.574483818969458, |
| "learning_rate": 7.683715607940078e-06, |
| "loss": 0.4024, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.7896440129449838, |
| "grad_norm": 0.8611372319649616, |
| "learning_rate": 7.587619575473667e-06, |
| "loss": 0.4029, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.8025889967637541, |
| "grad_norm": 0.6396077006546321, |
| "learning_rate": 7.490199880804123e-06, |
| "loss": 0.3872, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.8155339805825242, |
| "grad_norm": 0.6274640808836972, |
| "learning_rate": 7.391506357673972e-06, |
| "loss": 0.3855, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.8284789644012945, |
| "grad_norm": 0.8869410726835968, |
| "learning_rate": 7.2915894914356586e-06, |
| "loss": 0.4144, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.8414239482200647, |
| "grad_norm": 0.5680510071203487, |
| "learning_rate": 7.1905003932264425e-06, |
| "loss": 0.396, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.8543689320388349, |
| "grad_norm": 0.6662378938973078, |
| "learning_rate": 7.088290773823177e-06, |
| "loss": 0.4139, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.8673139158576052, |
| "grad_norm": 0.9275653743570225, |
| "learning_rate": 6.985012917190352e-06, |
| "loss": 0.4134, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.8802588996763754, |
| "grad_norm": 0.6107284041645035, |
| "learning_rate": 6.880719653734934e-06, |
| "loss": 0.3953, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.8932038834951457, |
| "grad_norm": 0.8969857251620925, |
| "learning_rate": 6.775464333281674e-06, |
| "loss": 0.4026, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.9061488673139159, |
| "grad_norm": 0.6603689079997187, |
| "learning_rate": 6.669300797782724e-06, |
| "loss": 0.421, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.919093851132686, |
| "grad_norm": 0.6868234423877417, |
| "learning_rate": 6.562283353775496e-06, |
| "loss": 0.3924, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.9320388349514563, |
| "grad_norm": 0.8131724869937513, |
| "learning_rate": 6.454466744602888e-06, |
| "loss": 0.3906, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.9449838187702265, |
| "grad_norm": 0.8144333792001458, |
| "learning_rate": 6.345906122410055e-06, |
| "loss": 0.4045, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.9579288025889967, |
| "grad_norm": 0.8218412907794177, |
| "learning_rate": 6.236657019932065e-06, |
| "loss": 0.3786, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.970873786407767, |
| "grad_norm": 0.8175925841223551, |
| "learning_rate": 6.126775322086876e-06, |
| "loss": 0.4006, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.9838187702265372, |
| "grad_norm": 0.9078031023881843, |
| "learning_rate": 6.016317237388147e-06, |
| "loss": 0.4024, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.9967637540453075, |
| "grad_norm": 0.9148555859023916, |
| "learning_rate": 5.905339269192528e-06, |
| "loss": 0.4113, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.009061488673139, |
| "grad_norm": 0.9087613181284451, |
| "learning_rate": 5.793898186796118e-06, |
| "loss": 0.3644, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.0220064724919093, |
| "grad_norm": 0.6936875939569392, |
| "learning_rate": 5.682050996394902e-06, |
| "loss": 0.3476, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.0349514563106796, |
| "grad_norm": 0.8269427535244058, |
| "learning_rate": 5.569854911923982e-06, |
| "loss": 0.3597, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.04789644012945, |
| "grad_norm": 0.9206157155713364, |
| "learning_rate": 5.457367325790555e-06, |
| "loss": 0.3381, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.06084142394822, |
| "grad_norm": 0.7751464430815477, |
| "learning_rate": 5.344645779515601e-06, |
| "loss": 0.323, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.0737864077669903, |
| "grad_norm": 0.5059651976921438, |
| "learning_rate": 5.23174793429928e-06, |
| "loss": 0.346, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.0867313915857606, |
| "grad_norm": 0.9479395615054366, |
| "learning_rate": 5.118731541525123e-06, |
| "loss": 0.3362, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.0996763754045307, |
| "grad_norm": 0.7813567441931751, |
| "learning_rate": 5.005654413218073e-06, |
| "loss": 0.3423, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.112621359223301, |
| "grad_norm": 0.7973108235246822, |
| "learning_rate": 4.8925743924715295e-06, |
| "loss": 0.3283, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.1255663430420713, |
| "grad_norm": 0.8678030537547101, |
| "learning_rate": 4.779549323858476e-06, |
| "loss": 0.3602, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.1385113268608413, |
| "grad_norm": 0.8519648961912265, |
| "learning_rate": 4.66663702384187e-06, |
| "loss": 0.3209, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.1514563106796116, |
| "grad_norm": 0.9116476326022953, |
| "learning_rate": 4.553895251199397e-06, |
| "loss": 0.3538, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.164401294498382, |
| "grad_norm": 0.756742601211622, |
| "learning_rate": 4.441381677477748e-06, |
| "loss": 0.3417, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.177346278317152, |
| "grad_norm": 0.6550684463014615, |
| "learning_rate": 4.329153857491503e-06, |
| "loss": 0.3314, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.1902912621359223, |
| "grad_norm": 0.726205824506148, |
| "learning_rate": 4.217269199881742e-06, |
| "loss": 0.3199, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.2032362459546926, |
| "grad_norm": 0.7093362580269089, |
| "learning_rate": 4.105784937749419e-06, |
| "loss": 0.3277, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.2161812297734629, |
| "grad_norm": 0.8202866545998011, |
| "learning_rate": 3.994758099378526e-06, |
| "loss": 0.3336, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.229126213592233, |
| "grad_norm": 0.6743989658097767, |
| "learning_rate": 3.884245479064055e-06, |
| "loss": 0.3374, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.2420711974110032, |
| "grad_norm": 0.8853306904041354, |
| "learning_rate": 3.7743036080596184e-06, |
| "loss": 0.3348, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.2550161812297735, |
| "grad_norm": 0.7437644228370441, |
| "learning_rate": 3.6649887256596525e-06, |
| "loss": 0.3271, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.2679611650485436, |
| "grad_norm": 0.960622460955667, |
| "learning_rate": 3.5563567504309517e-06, |
| "loss": 0.3321, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.280906148867314, |
| "grad_norm": 0.6847427029357208, |
| "learning_rate": 3.4484632516082784e-06, |
| "loss": 0.3567, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.2938511326860842, |
| "grad_norm": 0.644485218326109, |
| "learning_rate": 3.3413634206686573e-06, |
| "loss": 0.3387, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.3067961165048545, |
| "grad_norm": 0.5706300525934794, |
| "learning_rate": 3.2351120430989223e-06, |
| "loss": 0.3243, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.3197411003236246, |
| "grad_norm": 1.0411862393547737, |
| "learning_rate": 3.129763470370924e-06, |
| "loss": 0.3273, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.3326860841423949, |
| "grad_norm": 0.627806330087536, |
| "learning_rate": 3.025371592138766e-06, |
| "loss": 0.3331, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.3456310679611652, |
| "grad_norm": 0.9321171902037464, |
| "learning_rate": 2.921989808672272e-06, |
| "loss": 0.3328, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.3585760517799352, |
| "grad_norm": 0.6816968174278729, |
| "learning_rate": 2.8196710035407905e-06, |
| "loss": 0.3166, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.3715210355987055, |
| "grad_norm": 0.6746897103495911, |
| "learning_rate": 2.71846751656131e-06, |
| "loss": 0.331, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.3844660194174758, |
| "grad_norm": 0.7232366313330948, |
| "learning_rate": 2.6184311170247333e-06, |
| "loss": 0.3289, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.397411003236246, |
| "grad_norm": 0.6367511579996826, |
| "learning_rate": 2.5196129772139766e-06, |
| "loss": 0.3427, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.4103559870550162, |
| "grad_norm": 0.8281522457022666, |
| "learning_rate": 2.4220636462274856e-06, |
| "loss": 0.3327, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.4233009708737865, |
| "grad_norm": 0.8064122643894402, |
| "learning_rate": 2.3258330241215054e-06, |
| "loss": 0.3249, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.4362459546925566, |
| "grad_norm": 0.9962980212269915, |
| "learning_rate": 2.2309703363843893e-06, |
| "loss": 0.3501, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.4491909385113269, |
| "grad_norm": 0.6771637759804194, |
| "learning_rate": 2.1375241087559452e-06, |
| "loss": 0.3109, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.4621359223300971, |
| "grad_norm": 0.8924300131683629, |
| "learning_rate": 2.045542142404746e-06, |
| "loss": 0.3306, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.4750809061488672, |
| "grad_norm": 0.563922085674731, |
| "learning_rate": 1.9550714894760913e-06, |
| "loss": 0.3217, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.4880258899676375, |
| "grad_norm": 0.6176236359155347, |
| "learning_rate": 1.866158429023089e-06, |
| "loss": 0.3284, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.5009708737864078, |
| "grad_norm": 0.8142924491288118, |
| "learning_rate": 1.778848443333248e-06, |
| "loss": 0.3416, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.5139158576051779, |
| "grad_norm": 0.6716826679482402, |
| "learning_rate": 1.6931861946626066e-06, |
| "loss": 0.3474, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.5268608414239482, |
| "grad_norm": 0.8482814491570608, |
| "learning_rate": 1.6092155023893619e-06, |
| "loss": 0.3508, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.5398058252427185, |
| "grad_norm": 0.8271018016598762, |
| "learning_rate": 1.5269793205986417e-06, |
| "loss": 0.3217, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.5527508090614885, |
| "grad_norm": 0.9566979873604965, |
| "learning_rate": 1.4465197161099305e-06, |
| "loss": 0.341, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.565695792880259, |
| "grad_norm": 0.7981610759059783, |
| "learning_rate": 1.3678778469583354e-06, |
| "loss": 0.329, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.5786407766990291, |
| "grad_norm": 0.6990599663626785, |
| "learning_rate": 1.2910939413407575e-06, |
| "loss": 0.3235, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.5915857605177992, |
| "grad_norm": 0.7012642562949789, |
| "learning_rate": 1.2162072770376848e-06, |
| "loss": 0.3258, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.6045307443365697, |
| "grad_norm": 0.8634092286154025, |
| "learning_rate": 1.1432561613211723e-06, |
| "loss": 0.3177, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.6174757281553398, |
| "grad_norm": 0.7212038887761122, |
| "learning_rate": 1.0722779113592736e-06, |
| "loss": 0.3422, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.6304207119741099, |
| "grad_norm": 0.677359508157572, |
| "learning_rate": 1.0033088351269338e-06, |
| "loss": 0.3443, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.6433656957928804, |
| "grad_norm": 0.8848184406105248, |
| "learning_rate": 9.363842128331385e-07, |
| "loss": 0.3746, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.6563106796116505, |
| "grad_norm": 0.7104585379552231, |
| "learning_rate": 8.715382788737875e-07, |
| "loss": 0.3305, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.6692556634304208, |
| "grad_norm": 1.124914373741215, |
| "learning_rate": 8.088042043195538e-07, |
| "loss": 0.318, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.682200647249191, |
| "grad_norm": 0.7676297776942568, |
| "learning_rate": 7.482140799476606e-07, |
| "loss": 0.3307, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.6951456310679611, |
| "grad_norm": 0.8657530453975777, |
| "learning_rate": 6.897988998262733e-07, |
| "loss": 0.3489, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.7080906148867314, |
| "grad_norm": 0.6721790294147096, |
| "learning_rate": 6.335885454599061e-07, |
| "loss": 0.3218, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.7210355987055017, |
| "grad_norm": 0.9020294929871794, |
| "learning_rate": 5.796117705039245e-07, |
| "loss": 0.3324, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.7339805825242718, |
| "grad_norm": 0.9697435296109225, |
| "learning_rate": 5.278961860560116e-07, |
| "loss": 0.3231, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.746925566343042, |
| "grad_norm": 0.8030185525766447, |
| "learning_rate": 4.784682465320661e-07, |
| "loss": 0.3349, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.7598705501618124, |
| "grad_norm": 0.7878441809073381, |
| "learning_rate": 4.313532361338124e-07, |
| "loss": 0.335, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.7728155339805824, |
| "grad_norm": 0.6367374927128188, |
| "learning_rate": 3.86575255914996e-07, |
| "loss": 0.3221, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.7857605177993527, |
| "grad_norm": 0.6854482856153125, |
| "learning_rate": 3.4415721145281743e-07, |
| "loss": 0.3491, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.798705501618123, |
| "grad_norm": 0.7919655958035984, |
| "learning_rate": 3.041208011308855e-07, |
| "loss": 0.3099, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.811650485436893, |
| "grad_norm": 0.8580457856572622, |
| "learning_rate": 2.6648650503970364e-07, |
| "loss": 0.3452, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.8245954692556634, |
| "grad_norm": 0.671501429009108, |
| "learning_rate": 2.3127357450035025e-07, |
| "loss": 0.3182, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.8375404530744337, |
| "grad_norm": 0.6636856565668088, |
| "learning_rate": 1.9850002221672438e-07, |
| "loss": 0.3294, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.8504854368932038, |
| "grad_norm": 0.7799383682590583, |
| "learning_rate": 1.6818261306138727e-07, |
| "loss": 0.3283, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.863430420711974, |
| "grad_norm": 0.9010376579878301, |
| "learning_rate": 1.4033685549971643e-07, |
| "loss": 0.36, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.8763754045307444, |
| "grad_norm": 0.7888901011236897, |
| "learning_rate": 1.1497699365675741e-07, |
| "loss": 0.3057, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.8893203883495144, |
| "grad_norm": 0.6747885894056055, |
| "learning_rate": 9.211600003083022e-08, |
| "loss": 0.3237, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.902265372168285, |
| "grad_norm": 0.46814984596962683, |
| "learning_rate": 7.176556885762465e-08, |
| "loss": 0.3422, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.915210355987055, |
| "grad_norm": 0.7914917623182065, |
| "learning_rate": 5.3936110128166576e-08, |
| "loss": 0.3323, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.928155339805825, |
| "grad_norm": 0.5972397797918109, |
| "learning_rate": 3.863674426372921e-08, |
| "loss": 0.3368, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.9411003236245956, |
| "grad_norm": 0.9171858217474231, |
| "learning_rate": 2.5875297450402848e-08, |
| "loss": 0.3244, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.9540453074433657, |
| "grad_norm": 0.7480538443475298, |
| "learning_rate": 1.565829763571114e-08, |
| "loss": 0.3451, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.9669902912621358, |
| "grad_norm": 0.8045475541528871, |
| "learning_rate": 7.990971189332786e-09, |
| "loss": 0.3164, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.9799352750809063, |
| "grad_norm": 0.6705552484481664, |
| "learning_rate": 2.877240229614553e-09, |
| "loss": 0.3341, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.9928802588996763, |
| "grad_norm": 0.4604946128151892, |
| "learning_rate": 3.1972061727114644e-10, |
| "loss": 0.3571, |
| "step": 1540 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1544, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 2000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 136084214841344.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|