| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.9878183831672205, | |
| "eval_steps": 500, | |
| "global_step": 1125, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004429678848283499, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 2.1161, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.008859357696566999, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 2.6201, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.013289036544850499, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 2.8945, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.017718715393133997, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 2.1602, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0221483942414175, | |
| "grad_norm": 10.605673789978027, | |
| "learning_rate": 2.9411764705882355e-06, | |
| "loss": 3.0746, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.026578073089700997, | |
| "grad_norm": 9.166430473327637, | |
| "learning_rate": 5.882352941176471e-06, | |
| "loss": 2.4977, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.031007751937984496, | |
| "grad_norm": 8.228297233581543, | |
| "learning_rate": 8.823529411764707e-06, | |
| "loss": 2.4886, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.035437430786267994, | |
| "grad_norm": 10.802596092224121, | |
| "learning_rate": 1.1764705882352942e-05, | |
| "loss": 3.153, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.03986710963455149, | |
| "grad_norm": 7.030856132507324, | |
| "learning_rate": 1.4705882352941177e-05, | |
| "loss": 2.3531, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.044296788482835, | |
| "grad_norm": 10.649585723876953, | |
| "learning_rate": 1.7647058823529414e-05, | |
| "loss": 2.3243, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.048726467331118496, | |
| "grad_norm": 7.4812493324279785, | |
| "learning_rate": 2.058823529411765e-05, | |
| "loss": 2.4188, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.053156146179401995, | |
| "grad_norm": 11.968286514282227, | |
| "learning_rate": 2.3529411764705884e-05, | |
| "loss": 2.5725, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.05758582502768549, | |
| "grad_norm": 5.769841194152832, | |
| "learning_rate": 2.647058823529412e-05, | |
| "loss": 2.7926, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.06201550387596899, | |
| "grad_norm": 12.024309158325195, | |
| "learning_rate": 2.9411764705882354e-05, | |
| "loss": 2.9558, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.0664451827242525, | |
| "grad_norm": 13.791522026062012, | |
| "learning_rate": 3.235294117647059e-05, | |
| "loss": 1.9734, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.07087486157253599, | |
| "grad_norm": 6.770407199859619, | |
| "learning_rate": 3.529411764705883e-05, | |
| "loss": 2.2124, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0753045404208195, | |
| "grad_norm": 6.50883150100708, | |
| "learning_rate": 3.8235294117647055e-05, | |
| "loss": 2.4441, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.07973421926910298, | |
| "grad_norm": 7.501686096191406, | |
| "learning_rate": 4.11764705882353e-05, | |
| "loss": 2.2666, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.08416389811738649, | |
| "grad_norm": 6.544267177581787, | |
| "learning_rate": 4.411764705882353e-05, | |
| "loss": 2.3097, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.08859357696567, | |
| "grad_norm": 7.397747993469238, | |
| "learning_rate": 4.705882352941177e-05, | |
| "loss": 2.4401, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.09302325581395349, | |
| "grad_norm": 7.936941623687744, | |
| "learning_rate": 5e-05, | |
| "loss": 2.5795, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.09745293466223699, | |
| "grad_norm": 10.404101371765137, | |
| "learning_rate": 5.294117647058824e-05, | |
| "loss": 2.5834, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.10188261351052048, | |
| "grad_norm": 9.431268692016602, | |
| "learning_rate": 5.588235294117647e-05, | |
| "loss": 2.5247, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.10631229235880399, | |
| "grad_norm": 10.473112106323242, | |
| "learning_rate": 5.882352941176471e-05, | |
| "loss": 2.5272, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.11074197120708748, | |
| "grad_norm": 6.828806400299072, | |
| "learning_rate": 6.176470588235295e-05, | |
| "loss": 2.1176, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.11517165005537099, | |
| "grad_norm": 5.893914699554443, | |
| "learning_rate": 6.470588235294118e-05, | |
| "loss": 2.6795, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.11960132890365449, | |
| "grad_norm": 6.441338539123535, | |
| "learning_rate": 6.764705882352942e-05, | |
| "loss": 2.301, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.12403100775193798, | |
| "grad_norm": 6.272483825683594, | |
| "learning_rate": 7.058823529411765e-05, | |
| "loss": 2.1281, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.12846068660022147, | |
| "grad_norm": 5.158710956573486, | |
| "learning_rate": 7.352941176470589e-05, | |
| "loss": 2.1881, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.132890365448505, | |
| "grad_norm": 16.882787704467773, | |
| "learning_rate": 7.647058823529411e-05, | |
| "loss": 2.7238, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.13732004429678848, | |
| "grad_norm": 12.249922752380371, | |
| "learning_rate": 7.941176470588235e-05, | |
| "loss": 2.2578, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.14174972314507198, | |
| "grad_norm": 5.460890769958496, | |
| "learning_rate": 8.23529411764706e-05, | |
| "loss": 1.8949, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.1461794019933555, | |
| "grad_norm": 6.217385768890381, | |
| "learning_rate": 8.529411764705883e-05, | |
| "loss": 2.0967, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.150609080841639, | |
| "grad_norm": 10.28000259399414, | |
| "learning_rate": 8.823529411764706e-05, | |
| "loss": 2.5619, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.15503875968992248, | |
| "grad_norm": 9.967302322387695, | |
| "learning_rate": 9.11764705882353e-05, | |
| "loss": 2.0039, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.15946843853820597, | |
| "grad_norm": 8.877910614013672, | |
| "learning_rate": 9.411764705882353e-05, | |
| "loss": 2.3817, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.1638981173864895, | |
| "grad_norm": 6.2244181632995605, | |
| "learning_rate": 9.705882352941177e-05, | |
| "loss": 1.9161, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.16832779623477298, | |
| "grad_norm": 6.652865409851074, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2661, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.17275747508305647, | |
| "grad_norm": 5.198558330535889, | |
| "learning_rate": 9.999979270446262e-05, | |
| "loss": 2.2035, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.17718715393134, | |
| "grad_norm": 7.7754950523376465, | |
| "learning_rate": 9.999917081956933e-05, | |
| "loss": 1.8432, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.18161683277962348, | |
| "grad_norm": 7.527754306793213, | |
| "learning_rate": 9.999813435047667e-05, | |
| "loss": 2.2061, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.18604651162790697, | |
| "grad_norm": 9.0021390914917, | |
| "learning_rate": 9.99966833057789e-05, | |
| "loss": 2.3816, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.19047619047619047, | |
| "grad_norm": 6.2614827156066895, | |
| "learning_rate": 9.999481769750779e-05, | |
| "loss": 2.3224, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.19490586932447398, | |
| "grad_norm": 9.527298927307129, | |
| "learning_rate": 9.999253754113262e-05, | |
| "loss": 2.6791, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.19933554817275748, | |
| "grad_norm": 8.42602252960205, | |
| "learning_rate": 9.998984285556007e-05, | |
| "loss": 2.4413, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.20376522702104097, | |
| "grad_norm": 11.76278018951416, | |
| "learning_rate": 9.998673366313398e-05, | |
| "loss": 2.2867, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.2081949058693245, | |
| "grad_norm": 5.570592403411865, | |
| "learning_rate": 9.998320998963521e-05, | |
| "loss": 2.2111, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.21262458471760798, | |
| "grad_norm": 9.666138648986816, | |
| "learning_rate": 9.997927186428145e-05, | |
| "loss": 2.2846, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.21705426356589147, | |
| "grad_norm": 6.573619842529297, | |
| "learning_rate": 9.997491931972694e-05, | |
| "loss": 2.1636, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.22148394241417496, | |
| "grad_norm": 8.646806716918945, | |
| "learning_rate": 9.997015239206215e-05, | |
| "loss": 2.0793, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.22591362126245848, | |
| "grad_norm": 11.25146484375, | |
| "learning_rate": 9.996497112081364e-05, | |
| "loss": 2.523, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.23034330011074197, | |
| "grad_norm": 7.36045503616333, | |
| "learning_rate": 9.99593755489436e-05, | |
| "loss": 2.2237, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.23477297895902546, | |
| "grad_norm": 10.66434383392334, | |
| "learning_rate": 9.995336572284946e-05, | |
| "loss": 2.3307, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.23920265780730898, | |
| "grad_norm": 8.106691360473633, | |
| "learning_rate": 9.994694169236365e-05, | |
| "loss": 1.9581, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.24363233665559247, | |
| "grad_norm": 8.106691360473633, | |
| "learning_rate": 9.994694169236365e-05, | |
| "loss": 2.1817, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.24806201550387597, | |
| "grad_norm": 9.079061508178711, | |
| "learning_rate": 9.994010351075309e-05, | |
| "loss": 2.1612, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.25249169435215946, | |
| "grad_norm": 12.104523658752441, | |
| "learning_rate": 9.993285123471877e-05, | |
| "loss": 2.798, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.25692137320044295, | |
| "grad_norm": 10.687878608703613, | |
| "learning_rate": 9.992518492439526e-05, | |
| "loss": 1.9192, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.26135105204872644, | |
| "grad_norm": 13.860992431640625, | |
| "learning_rate": 9.991710464335022e-05, | |
| "loss": 2.0584, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.26578073089701, | |
| "grad_norm": 9.031172752380371, | |
| "learning_rate": 9.990861045858392e-05, | |
| "loss": 2.3866, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.2702104097452935, | |
| "grad_norm": 10.569039344787598, | |
| "learning_rate": 9.989970244052861e-05, | |
| "loss": 2.5264, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.27464008859357697, | |
| "grad_norm": 10.764253616333008, | |
| "learning_rate": 9.989038066304799e-05, | |
| "loss": 1.9073, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.27906976744186046, | |
| "grad_norm": 9.956701278686523, | |
| "learning_rate": 9.988064520343659e-05, | |
| "loss": 2.1363, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.28349944629014395, | |
| "grad_norm": 9.418704986572266, | |
| "learning_rate": 9.987049614241905e-05, | |
| "loss": 2.1378, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.28792912513842744, | |
| "grad_norm": 5.377594947814941, | |
| "learning_rate": 9.985993356414966e-05, | |
| "loss": 2.5805, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.292358803986711, | |
| "grad_norm": 5.151512622833252, | |
| "learning_rate": 9.984895755621135e-05, | |
| "loss": 2.4775, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.2967884828349945, | |
| "grad_norm": 7.1843743324279785, | |
| "learning_rate": 9.983756820961528e-05, | |
| "loss": 2.5418, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.301218161683278, | |
| "grad_norm": 6.158773899078369, | |
| "learning_rate": 9.982576561879983e-05, | |
| "loss": 2.2148, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.30564784053156147, | |
| "grad_norm": 6.614671230316162, | |
| "learning_rate": 9.981354988163e-05, | |
| "loss": 2.718, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.31007751937984496, | |
| "grad_norm": 4.830013751983643, | |
| "learning_rate": 9.980092109939652e-05, | |
| "loss": 2.151, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.31450719822812845, | |
| "grad_norm": 7.057981967926025, | |
| "learning_rate": 9.978787937681496e-05, | |
| "loss": 2.1143, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.31893687707641194, | |
| "grad_norm": 7.486720085144043, | |
| "learning_rate": 9.977442482202498e-05, | |
| "loss": 2.0879, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.3233665559246955, | |
| "grad_norm": 11.194393157958984, | |
| "learning_rate": 9.976055754658935e-05, | |
| "loss": 2.9186, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.327796234772979, | |
| "grad_norm": 4.733929634094238, | |
| "learning_rate": 9.974627766549302e-05, | |
| "loss": 1.9993, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.33222591362126247, | |
| "grad_norm": 9.492938995361328, | |
| "learning_rate": 9.973158529714224e-05, | |
| "loss": 2.3098, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.33665559246954596, | |
| "grad_norm": 6.460459232330322, | |
| "learning_rate": 9.971648056336348e-05, | |
| "loss": 2.0154, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.34108527131782945, | |
| "grad_norm": 6.839648246765137, | |
| "learning_rate": 9.970096358940251e-05, | |
| "loss": 2.8716, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.34551495016611294, | |
| "grad_norm": 11.949552536010742, | |
| "learning_rate": 9.968503450392332e-05, | |
| "loss": 3.2224, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.34994462901439644, | |
| "grad_norm": 11.671749114990234, | |
| "learning_rate": 9.966869343900702e-05, | |
| "loss": 2.4969, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.35437430786268, | |
| "grad_norm": 7.032209396362305, | |
| "learning_rate": 9.965194053015083e-05, | |
| "loss": 2.0895, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.3588039867109635, | |
| "grad_norm": 7.36803674697876, | |
| "learning_rate": 9.963477591626687e-05, | |
| "loss": 2.1803, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.36323366555924697, | |
| "grad_norm": 9.958213806152344, | |
| "learning_rate": 9.961719973968102e-05, | |
| "loss": 2.482, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.36766334440753046, | |
| "grad_norm": 8.602214813232422, | |
| "learning_rate": 9.959921214613186e-05, | |
| "loss": 1.9825, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.37209302325581395, | |
| "grad_norm": 8.817967414855957, | |
| "learning_rate": 9.958081328476925e-05, | |
| "loss": 2.2188, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.37652270210409744, | |
| "grad_norm": 6.849939823150635, | |
| "learning_rate": 9.956200330815329e-05, | |
| "loss": 2.2715, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.38095238095238093, | |
| "grad_norm": 7.644628524780273, | |
| "learning_rate": 9.954278237225296e-05, | |
| "loss": 2.1561, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.3853820598006645, | |
| "grad_norm": 10.270885467529297, | |
| "learning_rate": 9.95231506364448e-05, | |
| "loss": 2.3277, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.38981173864894797, | |
| "grad_norm": 4.501772403717041, | |
| "learning_rate": 9.950310826351168e-05, | |
| "loss": 2.0594, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.39424141749723146, | |
| "grad_norm": 6.094904899597168, | |
| "learning_rate": 9.948265541964136e-05, | |
| "loss": 2.5396, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.39867109634551495, | |
| "grad_norm": 4.792986869812012, | |
| "learning_rate": 9.946179227442521e-05, | |
| "loss": 2.0759, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.40310077519379844, | |
| "grad_norm": 11.445024490356445, | |
| "learning_rate": 9.944051900085667e-05, | |
| "loss": 1.8679, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.40753045404208194, | |
| "grad_norm": 4.9725422859191895, | |
| "learning_rate": 9.941883577532993e-05, | |
| "loss": 1.9542, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.4119601328903654, | |
| "grad_norm": 8.572590827941895, | |
| "learning_rate": 9.939674277763844e-05, | |
| "loss": 1.8557, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.416389811738649, | |
| "grad_norm": 8.919875144958496, | |
| "learning_rate": 9.937424019097337e-05, | |
| "loss": 1.9008, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.42081949058693247, | |
| "grad_norm": 6.352705955505371, | |
| "learning_rate": 9.935132820192217e-05, | |
| "loss": 2.1908, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.42524916943521596, | |
| "grad_norm": 6.315227031707764, | |
| "learning_rate": 9.932800700046697e-05, | |
| "loss": 2.2764, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.42967884828349945, | |
| "grad_norm": 8.06071949005127, | |
| "learning_rate": 9.9304276779983e-05, | |
| "loss": 2.0963, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.43410852713178294, | |
| "grad_norm": 7.619894027709961, | |
| "learning_rate": 9.9280137737237e-05, | |
| "loss": 2.4399, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.43853820598006643, | |
| "grad_norm": 13.512142181396484, | |
| "learning_rate": 9.925559007238563e-05, | |
| "loss": 2.3685, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.4429678848283499, | |
| "grad_norm": 5.173255920410156, | |
| "learning_rate": 9.923063398897372e-05, | |
| "loss": 2.2099, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.44739756367663347, | |
| "grad_norm": 10.244203567504883, | |
| "learning_rate": 9.920526969393267e-05, | |
| "loss": 2.2032, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.45182724252491696, | |
| "grad_norm": 9.113722801208496, | |
| "learning_rate": 9.917949739757868e-05, | |
| "loss": 2.5239, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.45625692137320045, | |
| "grad_norm": 6.485414505004883, | |
| "learning_rate": 9.915331731361103e-05, | |
| "loss": 2.3008, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.46068660022148394, | |
| "grad_norm": 8.831535339355469, | |
| "learning_rate": 9.912672965911034e-05, | |
| "loss": 2.2914, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.46511627906976744, | |
| "grad_norm": 8.91296100616455, | |
| "learning_rate": 9.909973465453666e-05, | |
| "loss": 2.4561, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.4695459579180509, | |
| "grad_norm": 7.34066915512085, | |
| "learning_rate": 9.907233252372774e-05, | |
| "loss": 2.4614, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.4739756367663344, | |
| "grad_norm": 5.878296375274658, | |
| "learning_rate": 9.904452349389717e-05, | |
| "loss": 2.0118, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.47840531561461797, | |
| "grad_norm": 8.123944282531738, | |
| "learning_rate": 9.901630779563246e-05, | |
| "loss": 2.3756, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.48283499446290146, | |
| "grad_norm": 11.739887237548828, | |
| "learning_rate": 9.898768566289315e-05, | |
| "loss": 2.365, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.48726467331118495, | |
| "grad_norm": 5.946976184844971, | |
| "learning_rate": 9.895865733300886e-05, | |
| "loss": 2.2305, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.49169435215946844, | |
| "grad_norm": 5.463192462921143, | |
| "learning_rate": 9.89292230466773e-05, | |
| "loss": 2.5732, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.49612403100775193, | |
| "grad_norm": 4.646864891052246, | |
| "learning_rate": 9.889938304796236e-05, | |
| "loss": 2.1547, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.5005537098560354, | |
| "grad_norm": 6.221797943115234, | |
| "learning_rate": 9.886913758429193e-05, | |
| "loss": 2.015, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.5049833887043189, | |
| "grad_norm": 6.859386444091797, | |
| "learning_rate": 9.883848690645601e-05, | |
| "loss": 2.2811, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.5094130675526024, | |
| "grad_norm": 10.671966552734375, | |
| "learning_rate": 9.880743126860457e-05, | |
| "loss": 1.9567, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.5138427464008859, | |
| "grad_norm": 5.32224702835083, | |
| "learning_rate": 9.87759709282454e-05, | |
| "loss": 2.5827, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.5182724252491694, | |
| "grad_norm": 8.69084644317627, | |
| "learning_rate": 9.874410614624203e-05, | |
| "loss": 1.81, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.5227021040974529, | |
| "grad_norm": 9.261399269104004, | |
| "learning_rate": 9.871183718681153e-05, | |
| "loss": 2.3125, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.5271317829457365, | |
| "grad_norm": 6.587903022766113, | |
| "learning_rate": 9.867916431752238e-05, | |
| "loss": 2.4587, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.53156146179402, | |
| "grad_norm": 7.4397430419921875, | |
| "learning_rate": 9.864608780929216e-05, | |
| "loss": 2.5349, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.5359911406423035, | |
| "grad_norm": 8.569662094116211, | |
| "learning_rate": 9.861260793638538e-05, | |
| "loss": 2.302, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.540420819490587, | |
| "grad_norm": 8.569662094116211, | |
| "learning_rate": 9.861260793638538e-05, | |
| "loss": 2.3263, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.5448504983388704, | |
| "grad_norm": 7.741957664489746, | |
| "learning_rate": 9.857872497641117e-05, | |
| "loss": 2.4619, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.5492801771871539, | |
| "grad_norm": 13.403289794921875, | |
| "learning_rate": 9.854443921032097e-05, | |
| "loss": 2.4491, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.5537098560354374, | |
| "grad_norm": 11.84448528289795, | |
| "learning_rate": 9.850975092240625e-05, | |
| "loss": 2.4692, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.5581395348837209, | |
| "grad_norm": 6.203007698059082, | |
| "learning_rate": 9.847466040029609e-05, | |
| "loss": 2.2906, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.5625692137320044, | |
| "grad_norm": 6.883812427520752, | |
| "learning_rate": 9.843916793495487e-05, | |
| "loss": 2.5539, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.5669988925802879, | |
| "grad_norm": 11.461130142211914, | |
| "learning_rate": 9.840327382067973e-05, | |
| "loss": 1.9333, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.5714285714285714, | |
| "grad_norm": 13.166450500488281, | |
| "learning_rate": 9.836697835509827e-05, | |
| "loss": 2.2345, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.5758582502768549, | |
| "grad_norm": 8.314001083374023, | |
| "learning_rate": 9.833028183916602e-05, | |
| "loss": 2.6494, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.5802879291251384, | |
| "grad_norm": 6.866865634918213, | |
| "learning_rate": 9.829318457716394e-05, | |
| "loss": 1.9577, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.584717607973422, | |
| "grad_norm": 5.3526082038879395, | |
| "learning_rate": 9.82556868766959e-05, | |
| "loss": 2.3536, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.5891472868217055, | |
| "grad_norm": 12.298832893371582, | |
| "learning_rate": 9.821778904868616e-05, | |
| "loss": 2.4454, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.593576965669989, | |
| "grad_norm": 5.376310348510742, | |
| "learning_rate": 9.817949140737673e-05, | |
| "loss": 2.763, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.5980066445182725, | |
| "grad_norm": 7.576231002807617, | |
| "learning_rate": 9.814079427032478e-05, | |
| "loss": 2.3395, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.602436323366556, | |
| "grad_norm": 6.101162433624268, | |
| "learning_rate": 9.810169795840012e-05, | |
| "loss": 1.8174, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.6068660022148394, | |
| "grad_norm": 9.156957626342773, | |
| "learning_rate": 9.806220279578235e-05, | |
| "loss": 2.5992, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.6112956810631229, | |
| "grad_norm": 9.112500190734863, | |
| "learning_rate": 9.802230910995834e-05, | |
| "loss": 2.1072, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.6157253599114064, | |
| "grad_norm": 8.444991111755371, | |
| "learning_rate": 9.798201723171939e-05, | |
| "loss": 2.2685, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.6201550387596899, | |
| "grad_norm": 4.799725532531738, | |
| "learning_rate": 9.794132749515854e-05, | |
| "loss": 2.136, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.6245847176079734, | |
| "grad_norm": 8.076310157775879, | |
| "learning_rate": 9.790024023766789e-05, | |
| "loss": 2.2673, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.6290143964562569, | |
| "grad_norm": 4.653082847595215, | |
| "learning_rate": 9.785875579993557e-05, | |
| "loss": 1.8902, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.6334440753045404, | |
| "grad_norm": 11.317476272583008, | |
| "learning_rate": 9.781687452594318e-05, | |
| "loss": 2.4088, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.6378737541528239, | |
| "grad_norm": 7.295245170593262, | |
| "learning_rate": 9.777459676296275e-05, | |
| "loss": 2.2239, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.6423034330011074, | |
| "grad_norm": 10.208337783813477, | |
| "learning_rate": 9.773192286155394e-05, | |
| "loss": 2.3007, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.646733111849391, | |
| "grad_norm": 9.138147354125977, | |
| "learning_rate": 9.768885317556116e-05, | |
| "loss": 2.1123, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.6511627906976745, | |
| "grad_norm": 8.081501960754395, | |
| "learning_rate": 9.764538806211051e-05, | |
| "loss": 2.1822, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.655592469545958, | |
| "grad_norm": 10.28136920928955, | |
| "learning_rate": 9.760152788160697e-05, | |
| "loss": 2.0472, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.6600221483942414, | |
| "grad_norm": 12.062602996826172, | |
| "learning_rate": 9.755727299773134e-05, | |
| "loss": 2.2228, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.6644518272425249, | |
| "grad_norm": 8.189140319824219, | |
| "learning_rate": 9.75126237774372e-05, | |
| "loss": 2.1186, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.6688815060908084, | |
| "grad_norm": 4.761727809906006, | |
| "learning_rate": 9.746758059094791e-05, | |
| "loss": 2.141, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.6733111849390919, | |
| "grad_norm": 7.920103549957275, | |
| "learning_rate": 9.742214381175354e-05, | |
| "loss": 2.2799, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.6777408637873754, | |
| "grad_norm": 7.213951587677002, | |
| "learning_rate": 9.737631381660776e-05, | |
| "loss": 2.1124, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.6821705426356589, | |
| "grad_norm": 9.37542724609375, | |
| "learning_rate": 9.733009098552473e-05, | |
| "loss": 2.1971, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.6866002214839424, | |
| "grad_norm": 8.1496000289917, | |
| "learning_rate": 9.728347570177586e-05, | |
| "loss": 2.0953, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.6910299003322259, | |
| "grad_norm": 5.148806095123291, | |
| "learning_rate": 9.72364683518868e-05, | |
| "loss": 2.1246, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.6954595791805094, | |
| "grad_norm": 5.193173885345459, | |
| "learning_rate": 9.71890693256341e-05, | |
| "loss": 1.9214, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.6998892580287929, | |
| "grad_norm": 7.136893272399902, | |
| "learning_rate": 9.7141279016042e-05, | |
| "loss": 2.2632, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.7043189368770764, | |
| "grad_norm": 7.360518455505371, | |
| "learning_rate": 9.709309781937925e-05, | |
| "loss": 2.4558, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.70874861572536, | |
| "grad_norm": 9.435393333435059, | |
| "learning_rate": 9.704452613515571e-05, | |
| "loss": 2.1096, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.7131782945736435, | |
| "grad_norm": 7.742511749267578, | |
| "learning_rate": 9.699556436611912e-05, | |
| "loss": 2.3128, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.717607973421927, | |
| "grad_norm": 9.153509140014648, | |
| "learning_rate": 9.694621291825174e-05, | |
| "loss": 2.2575, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.7220376522702104, | |
| "grad_norm": 8.447454452514648, | |
| "learning_rate": 9.689647220076696e-05, | |
| "loss": 1.9745, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.7264673311184939, | |
| "grad_norm": 5.4174909591674805, | |
| "learning_rate": 9.684634262610593e-05, | |
| "loss": 2.4049, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.7308970099667774, | |
| "grad_norm": 9.518255233764648, | |
| "learning_rate": 9.679582460993413e-05, | |
| "loss": 1.8485, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.7353266888150609, | |
| "grad_norm": 8.27046012878418, | |
| "learning_rate": 9.674491857113792e-05, | |
| "loss": 2.665, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.7397563676633444, | |
| "grad_norm": 6.9749836921691895, | |
| "learning_rate": 9.669362493182111e-05, | |
| "loss": 2.3234, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.7441860465116279, | |
| "grad_norm": 5.377409934997559, | |
| "learning_rate": 9.664194411730141e-05, | |
| "loss": 1.8631, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.7486157253599114, | |
| "grad_norm": 8.406085014343262, | |
| "learning_rate": 9.658987655610687e-05, | |
| "loss": 2.2899, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.7530454042081949, | |
| "grad_norm": 4.888492584228516, | |
| "learning_rate": 9.653742267997246e-05, | |
| "loss": 2.0495, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.7574750830564784, | |
| "grad_norm": 10.016488075256348, | |
| "learning_rate": 9.648458292383631e-05, | |
| "loss": 1.9254, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.7619047619047619, | |
| "grad_norm": 5.880348205566406, | |
| "learning_rate": 9.643135772583627e-05, | |
| "loss": 1.9746, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.7663344407530454, | |
| "grad_norm": 8.261670112609863, | |
| "learning_rate": 9.63777475273062e-05, | |
| "loss": 2.437, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.770764119601329, | |
| "grad_norm": 15.291732788085938, | |
| "learning_rate": 9.632375277277226e-05, | |
| "loss": 2.6626, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.7751937984496124, | |
| "grad_norm": 7.9028096199035645, | |
| "learning_rate": 9.626937390994932e-05, | |
| "loss": 2.2399, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.7796234772978959, | |
| "grad_norm": 4.564765930175781, | |
| "learning_rate": 9.621461138973724e-05, | |
| "loss": 2.083, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.7840531561461794, | |
| "grad_norm": 7.957129955291748, | |
| "learning_rate": 9.615946566621702e-05, | |
| "loss": 1.8967, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.7884828349944629, | |
| "grad_norm": 16.633319854736328, | |
| "learning_rate": 9.610393719664719e-05, | |
| "loss": 1.9869, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.7929125138427464, | |
| "grad_norm": 8.166046142578125, | |
| "learning_rate": 9.604802644145989e-05, | |
| "loss": 2.141, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.7973421926910299, | |
| "grad_norm": 10.27008056640625, | |
| "learning_rate": 9.59917338642571e-05, | |
| "loss": 2.077, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.8017718715393134, | |
| "grad_norm": 5.996699810028076, | |
| "learning_rate": 9.593505993180687e-05, | |
| "loss": 1.9777, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.8062015503875969, | |
| "grad_norm": 8.765548706054688, | |
| "learning_rate": 9.58780051140393e-05, | |
| "loss": 1.9158, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.8106312292358804, | |
| "grad_norm": 4.515958786010742, | |
| "learning_rate": 9.582056988404275e-05, | |
| "loss": 1.9419, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.8150609080841639, | |
| "grad_norm": 10.045695304870605, | |
| "learning_rate": 9.576275471805993e-05, | |
| "loss": 2.2376, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.8194905869324474, | |
| "grad_norm": 5.502155303955078, | |
| "learning_rate": 9.570456009548383e-05, | |
| "loss": 1.9097, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.8239202657807309, | |
| "grad_norm": 6.540369987487793, | |
| "learning_rate": 9.56459864988539e-05, | |
| "loss": 2.1779, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.8283499446290143, | |
| "grad_norm": 6.4399285316467285, | |
| "learning_rate": 9.558703441385194e-05, | |
| "loss": 2.0636, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.832779623477298, | |
| "grad_norm": 17.52277946472168, | |
| "learning_rate": 9.552770432929811e-05, | |
| "loss": 2.0766, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.8372093023255814, | |
| "grad_norm": 7.090964317321777, | |
| "learning_rate": 9.54679967371469e-05, | |
| "loss": 2.2198, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.8416389811738649, | |
| "grad_norm": 4.9996490478515625, | |
| "learning_rate": 9.540791213248299e-05, | |
| "loss": 2.1353, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.8460686600221484, | |
| "grad_norm": 10.12185001373291, | |
| "learning_rate": 9.534745101351719e-05, | |
| "loss": 2.4016, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.8504983388704319, | |
| "grad_norm": 7.592304706573486, | |
| "learning_rate": 9.528661388158234e-05, | |
| "loss": 2.1985, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.8549280177187154, | |
| "grad_norm": 6.609996318817139, | |
| "learning_rate": 9.522540124112902e-05, | |
| "loss": 2.1224, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.8593576965669989, | |
| "grad_norm": 4.852092742919922, | |
| "learning_rate": 9.516381359972158e-05, | |
| "loss": 2.2335, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.8637873754152824, | |
| "grad_norm": 6.790386199951172, | |
| "learning_rate": 9.51018514680337e-05, | |
| "loss": 1.7354, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.8682170542635659, | |
| "grad_norm": 4.828433990478516, | |
| "learning_rate": 9.503951535984434e-05, | |
| "loss": 1.9872, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.8726467331118494, | |
| "grad_norm": 4.812173843383789, | |
| "learning_rate": 9.497680579203338e-05, | |
| "loss": 2.0323, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.8770764119601329, | |
| "grad_norm": 3.76886248588562, | |
| "learning_rate": 9.491372328457738e-05, | |
| "loss": 2.308, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.8815060908084164, | |
| "grad_norm": 4.918368339538574, | |
| "learning_rate": 9.485026836054519e-05, | |
| "loss": 2.0527, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.8859357696566998, | |
| "grad_norm": 5.6282639503479, | |
| "learning_rate": 9.478644154609372e-05, | |
| "loss": 2.4852, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.8903654485049833, | |
| "grad_norm": 8.104965209960938, | |
| "learning_rate": 9.472224337046357e-05, | |
| "loss": 1.6854, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.8947951273532669, | |
| "grad_norm": 5.7505598068237305, | |
| "learning_rate": 9.46576743659745e-05, | |
| "loss": 2.1109, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.8992248062015504, | |
| "grad_norm": 5.078502178192139, | |
| "learning_rate": 9.45927350680212e-05, | |
| "loss": 2.2926, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.9036544850498339, | |
| "grad_norm": 6.620337009429932, | |
| "learning_rate": 9.452742601506872e-05, | |
| "loss": 2.1566, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.9080841638981174, | |
| "grad_norm": 6.228148937225342, | |
| "learning_rate": 9.446174774864808e-05, | |
| "loss": 2.141, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.9125138427464009, | |
| "grad_norm": 5.25639533996582, | |
| "learning_rate": 9.439570081335173e-05, | |
| "loss": 2.1721, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.9169435215946844, | |
| "grad_norm": 7.61407470703125, | |
| "learning_rate": 9.432928575682907e-05, | |
| "loss": 1.9253, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.9213732004429679, | |
| "grad_norm": 7.121610641479492, | |
| "learning_rate": 9.42625031297819e-05, | |
| "loss": 2.3093, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.9258028792912514, | |
| "grad_norm": 8.246429443359375, | |
| "learning_rate": 9.419535348595985e-05, | |
| "loss": 2.321, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.9302325581395349, | |
| "grad_norm": 8.831986427307129, | |
| "learning_rate": 9.412783738215575e-05, | |
| "loss": 2.3273, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.9346622369878184, | |
| "grad_norm": 7.420327186584473, | |
| "learning_rate": 9.405995537820111e-05, | |
| "loss": 2.2849, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.9390919158361019, | |
| "grad_norm": 7.654839992523193, | |
| "learning_rate": 9.399170803696138e-05, | |
| "loss": 1.8333, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.9435215946843853, | |
| "grad_norm": 7.557086944580078, | |
| "learning_rate": 9.392309592433133e-05, | |
| "loss": 2.1632, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.9479512735326688, | |
| "grad_norm": 7.721208572387695, | |
| "learning_rate": 9.385411960923036e-05, | |
| "loss": 1.9735, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.9523809523809523, | |
| "grad_norm": 7.448840618133545, | |
| "learning_rate": 9.378477966359773e-05, | |
| "loss": 2.4228, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.9568106312292359, | |
| "grad_norm": 5.7559733390808105, | |
| "learning_rate": 9.371507666238794e-05, | |
| "loss": 2.3595, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.9612403100775194, | |
| "grad_norm": 5.419656753540039, | |
| "learning_rate": 9.36450111835658e-05, | |
| "loss": 2.3156, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.9656699889258029, | |
| "grad_norm": 8.596833229064941, | |
| "learning_rate": 9.357458380810175e-05, | |
| "loss": 2.3778, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.9700996677740864, | |
| "grad_norm": 15.337946891784668, | |
| "learning_rate": 9.350379511996705e-05, | |
| "loss": 2.3743, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.9745293466223699, | |
| "grad_norm": 6.938776016235352, | |
| "learning_rate": 9.343264570612883e-05, | |
| "loss": 2.2887, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.9789590254706534, | |
| "grad_norm": 8.320263862609863, | |
| "learning_rate": 9.336113615654534e-05, | |
| "loss": 1.9961, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.9833887043189369, | |
| "grad_norm": 5.129944801330566, | |
| "learning_rate": 9.328926706416101e-05, | |
| "loss": 1.8434, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.9878183831672204, | |
| "grad_norm": 6.840932369232178, | |
| "learning_rate": 9.321703902490151e-05, | |
| "loss": 2.0304, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.9922480620155039, | |
| "grad_norm": 8.26124095916748, | |
| "learning_rate": 9.314445263766888e-05, | |
| "loss": 2.2058, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.9966777408637874, | |
| "grad_norm": 9.003861427307129, | |
| "learning_rate": 9.307150850433643e-05, | |
| "loss": 2.3681, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.0022148394241417, | |
| "grad_norm": 10.506808280944824, | |
| "learning_rate": 9.299820722974395e-05, | |
| "loss": 2.1082, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 1.0066445182724253, | |
| "grad_norm": 13.937885284423828, | |
| "learning_rate": 9.29245494216925e-05, | |
| "loss": 2.1381, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 1.0110741971207087, | |
| "grad_norm": 10.9959077835083, | |
| "learning_rate": 9.285053569093946e-05, | |
| "loss": 2.5619, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 1.0155038759689923, | |
| "grad_norm": 8.583209991455078, | |
| "learning_rate": 9.27761666511935e-05, | |
| "loss": 2.4306, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 1.0199335548172757, | |
| "grad_norm": 8.024839401245117, | |
| "learning_rate": 9.270144291910941e-05, | |
| "loss": 2.2235, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.0243632336655593, | |
| "grad_norm": 7.364706516265869, | |
| "learning_rate": 9.262636511428304e-05, | |
| "loss": 2.4616, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 1.0287929125138426, | |
| "grad_norm": 8.752544403076172, | |
| "learning_rate": 9.255093385924614e-05, | |
| "loss": 2.127, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.0332225913621262, | |
| "grad_norm": 6.984606742858887, | |
| "learning_rate": 9.247514977946124e-05, | |
| "loss": 2.1871, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.0376522702104098, | |
| "grad_norm": 8.737010955810547, | |
| "learning_rate": 9.239901350331634e-05, | |
| "loss": 2.3309, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.0420819490586932, | |
| "grad_norm": 6.438765525817871, | |
| "learning_rate": 9.232252566211991e-05, | |
| "loss": 2.1588, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.0465116279069768, | |
| "grad_norm": 9.31252384185791, | |
| "learning_rate": 9.224568689009547e-05, | |
| "loss": 2.0311, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.0509413067552602, | |
| "grad_norm": 11.200642585754395, | |
| "learning_rate": 9.216849782437637e-05, | |
| "loss": 1.8581, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 1.0553709856035438, | |
| "grad_norm": 6.259405136108398, | |
| "learning_rate": 9.20909591050006e-05, | |
| "loss": 2.0763, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.0598006644518272, | |
| "grad_norm": 7.228961944580078, | |
| "learning_rate": 9.201307137490536e-05, | |
| "loss": 2.5367, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 1.0642303433001108, | |
| "grad_norm": 10.137297630310059, | |
| "learning_rate": 9.19348352799218e-05, | |
| "loss": 1.8874, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.0686600221483942, | |
| "grad_norm": 6.983479976654053, | |
| "learning_rate": 9.185625146876965e-05, | |
| "loss": 1.8221, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 1.0730897009966778, | |
| "grad_norm": 5.6672444343566895, | |
| "learning_rate": 9.177732059305186e-05, | |
| "loss": 2.119, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.0775193798449612, | |
| "grad_norm": 6.37617826461792, | |
| "learning_rate": 9.169804330724916e-05, | |
| "loss": 2.1792, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 1.0819490586932448, | |
| "grad_norm": 7.426950931549072, | |
| "learning_rate": 9.161842026871466e-05, | |
| "loss": 1.8846, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.0863787375415281, | |
| "grad_norm": 5.454375743865967, | |
| "learning_rate": 9.153845213766836e-05, | |
| "loss": 2.2066, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.0908084163898117, | |
| "grad_norm": 8.249213218688965, | |
| "learning_rate": 9.145813957719174e-05, | |
| "loss": 2.2305, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.0952380952380953, | |
| "grad_norm": 3.8901290893554688, | |
| "learning_rate": 9.137748325322222e-05, | |
| "loss": 2.2013, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.0996677740863787, | |
| "grad_norm": 5.952533721923828, | |
| "learning_rate": 9.129648383454764e-05, | |
| "loss": 2.1404, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.1040974529346623, | |
| "grad_norm": 9.447667121887207, | |
| "learning_rate": 9.121514199280071e-05, | |
| "loss": 2.1473, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.1085271317829457, | |
| "grad_norm": 7.8357415199279785, | |
| "learning_rate": 9.113345840245347e-05, | |
| "loss": 2.1353, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.1129568106312293, | |
| "grad_norm": 5.7361860275268555, | |
| "learning_rate": 9.105143374081168e-05, | |
| "loss": 2.1992, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.1173864894795127, | |
| "grad_norm": 10.3837251663208, | |
| "learning_rate": 9.096906868800917e-05, | |
| "loss": 2.3124, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.1218161683277963, | |
| "grad_norm": 3.863896369934082, | |
| "learning_rate": 9.088636392700227e-05, | |
| "loss": 2.257, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.1262458471760797, | |
| "grad_norm": 9.265199661254883, | |
| "learning_rate": 9.08033201435641e-05, | |
| "loss": 2.0167, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.1306755260243633, | |
| "grad_norm": 4.3140177726745605, | |
| "learning_rate": 9.071993802627887e-05, | |
| "loss": 2.3914, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.1351052048726467, | |
| "grad_norm": 14.065458297729492, | |
| "learning_rate": 9.063621826653624e-05, | |
| "loss": 2.7486, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.1395348837209303, | |
| "grad_norm": 9.089619636535645, | |
| "learning_rate": 9.055216155852548e-05, | |
| "loss": 1.7132, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.1439645625692136, | |
| "grad_norm": 6.716838836669922, | |
| "learning_rate": 9.046776859922983e-05, | |
| "loss": 1.8824, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.1483942414174972, | |
| "grad_norm": 14.335271835327148, | |
| "learning_rate": 9.038304008842064e-05, | |
| "loss": 1.744, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.1528239202657806, | |
| "grad_norm": 7.002466678619385, | |
| "learning_rate": 9.029797672865159e-05, | |
| "loss": 2.4813, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.1572535991140642, | |
| "grad_norm": 6.661802291870117, | |
| "learning_rate": 9.021257922525288e-05, | |
| "loss": 1.8213, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.1616832779623478, | |
| "grad_norm": 7.363678455352783, | |
| "learning_rate": 9.012684828632538e-05, | |
| "loss": 1.7975, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.1661129568106312, | |
| "grad_norm": 6.833836555480957, | |
| "learning_rate": 9.00407846227347e-05, | |
| "loss": 2.1189, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.1705426356589148, | |
| "grad_norm": 6.624426364898682, | |
| "learning_rate": 8.995438894810541e-05, | |
| "loss": 1.9159, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.1749723145071982, | |
| "grad_norm": 7.053401470184326, | |
| "learning_rate": 8.9867661978815e-05, | |
| "loss": 1.8237, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.1794019933554818, | |
| "grad_norm": 6.560683250427246, | |
| "learning_rate": 8.978060443398802e-05, | |
| "loss": 1.9715, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.1838316722037652, | |
| "grad_norm": 6.258054733276367, | |
| "learning_rate": 8.96932170354901e-05, | |
| "loss": 1.8396, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.1882613510520488, | |
| "grad_norm": 9.02526569366455, | |
| "learning_rate": 8.960550050792194e-05, | |
| "loss": 2.1659, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.1926910299003322, | |
| "grad_norm": 6.251518726348877, | |
| "learning_rate": 8.951745557861333e-05, | |
| "loss": 1.8894, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.1971207087486158, | |
| "grad_norm": 8.956218719482422, | |
| "learning_rate": 8.942908297761711e-05, | |
| "loss": 2.5807, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.2015503875968991, | |
| "grad_norm": 6.6762237548828125, | |
| "learning_rate": 8.934038343770312e-05, | |
| "loss": 2.2264, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.2059800664451827, | |
| "grad_norm": 4.275567531585693, | |
| "learning_rate": 8.925135769435211e-05, | |
| "loss": 1.6638, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.2104097452934663, | |
| "grad_norm": 6.121718406677246, | |
| "learning_rate": 8.916200648574964e-05, | |
| "loss": 2.0015, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.2148394241417497, | |
| "grad_norm": 5.744752883911133, | |
| "learning_rate": 8.907233055277999e-05, | |
| "loss": 1.9907, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.219269102990033, | |
| "grad_norm": 6.730654239654541, | |
| "learning_rate": 8.898233063902e-05, | |
| "loss": 2.1793, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.2236987818383167, | |
| "grad_norm": 14.31635856628418, | |
| "learning_rate": 8.889200749073285e-05, | |
| "loss": 2.0648, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.2281284606866003, | |
| "grad_norm": 5.152917385101318, | |
| "learning_rate": 8.880136185686201e-05, | |
| "loss": 1.793, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.2325581395348837, | |
| "grad_norm": 10.570582389831543, | |
| "learning_rate": 8.871039448902487e-05, | |
| "loss": 1.7875, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.2369878183831673, | |
| "grad_norm": 9.247808456420898, | |
| "learning_rate": 8.861910614150661e-05, | |
| "loss": 1.7261, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.2414174972314507, | |
| "grad_norm": 13.006294250488281, | |
| "learning_rate": 8.852749757125392e-05, | |
| "loss": 1.6614, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.2458471760797343, | |
| "grad_norm": 6.007842063903809, | |
| "learning_rate": 8.84355695378687e-05, | |
| "loss": 1.9225, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.2502768549280177, | |
| "grad_norm": 7.5843658447265625, | |
| "learning_rate": 8.83433228036018e-05, | |
| "loss": 2.3472, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.2547065337763013, | |
| "grad_norm": 8.273632049560547, | |
| "learning_rate": 8.825075813334669e-05, | |
| "loss": 2.0889, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.2591362126245846, | |
| "grad_norm": 7.670727729797363, | |
| "learning_rate": 8.815787629463305e-05, | |
| "loss": 1.5007, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.2635658914728682, | |
| "grad_norm": 10.460335731506348, | |
| "learning_rate": 8.806467805762055e-05, | |
| "loss": 1.9641, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.2679955703211516, | |
| "grad_norm": 8.615412712097168, | |
| "learning_rate": 8.79711641950923e-05, | |
| "loss": 2.0096, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.2724252491694352, | |
| "grad_norm": 8.69423770904541, | |
| "learning_rate": 8.787733548244858e-05, | |
| "loss": 1.8392, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.2768549280177188, | |
| "grad_norm": 4.95367431640625, | |
| "learning_rate": 8.778319269770034e-05, | |
| "loss": 1.8004, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.2812846068660022, | |
| "grad_norm": 8.5960693359375, | |
| "learning_rate": 8.768873662146271e-05, | |
| "loss": 2.2377, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.2857142857142856, | |
| "grad_norm": 7.77866268157959, | |
| "learning_rate": 8.759396803694861e-05, | |
| "loss": 2.0425, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.2901439645625692, | |
| "grad_norm": 6.187042236328125, | |
| "learning_rate": 8.749888772996226e-05, | |
| "loss": 2.1688, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.2945736434108528, | |
| "grad_norm": 8.12923812866211, | |
| "learning_rate": 8.740349648889259e-05, | |
| "loss": 2.4057, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.2990033222591362, | |
| "grad_norm": 5.084444522857666, | |
| "learning_rate": 8.730779510470671e-05, | |
| "loss": 2.092, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.3034330011074198, | |
| "grad_norm": 8.99492359161377, | |
| "learning_rate": 8.721178437094346e-05, | |
| "loss": 1.9337, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.3078626799557032, | |
| "grad_norm": 4.3779706954956055, | |
| "learning_rate": 8.711546508370666e-05, | |
| "loss": 2.2644, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.3122923588039868, | |
| "grad_norm": 5.100649356842041, | |
| "learning_rate": 8.701883804165866e-05, | |
| "loss": 1.7897, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.3167220376522701, | |
| "grad_norm": 7.3021769523620605, | |
| "learning_rate": 8.692190404601369e-05, | |
| "loss": 1.8963, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.3211517165005537, | |
| "grad_norm": 8.583118438720703, | |
| "learning_rate": 8.682466390053106e-05, | |
| "loss": 2.3528, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.3255813953488373, | |
| "grad_norm": 9.092851638793945, | |
| "learning_rate": 8.672711841150877e-05, | |
| "loss": 2.2453, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.3300110741971207, | |
| "grad_norm": 4.446629524230957, | |
| "learning_rate": 8.662926838777656e-05, | |
| "loss": 1.8826, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.334440753045404, | |
| "grad_norm": 8.508023262023926, | |
| "learning_rate": 8.653111464068937e-05, | |
| "loss": 1.7581, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.3388704318936877, | |
| "grad_norm": 3.5442817211151123, | |
| "learning_rate": 8.643265798412056e-05, | |
| "loss": 2.124, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.3433001107419713, | |
| "grad_norm": 10.445211410522461, | |
| "learning_rate": 8.633389923445514e-05, | |
| "loss": 2.6855, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.3477297895902547, | |
| "grad_norm": 6.55064582824707, | |
| "learning_rate": 8.623483921058303e-05, | |
| "loss": 2.4239, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.352159468438538, | |
| "grad_norm": 7.995059013366699, | |
| "learning_rate": 8.613547873389228e-05, | |
| "loss": 1.828, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.3565891472868217, | |
| "grad_norm": 7.152670860290527, | |
| "learning_rate": 8.603581862826222e-05, | |
| "loss": 1.944, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.3610188261351053, | |
| "grad_norm": 6.228992938995361, | |
| "learning_rate": 8.593585972005664e-05, | |
| "loss": 2.0547, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.3654485049833887, | |
| "grad_norm": 6.894842624664307, | |
| "learning_rate": 8.5835602838117e-05, | |
| "loss": 1.7732, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.3698781838316723, | |
| "grad_norm": 7.37748384475708, | |
| "learning_rate": 8.573504881375542e-05, | |
| "loss": 1.8529, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.3743078626799556, | |
| "grad_norm": 7.970264911651611, | |
| "learning_rate": 8.563419848074798e-05, | |
| "loss": 1.9512, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.3787375415282392, | |
| "grad_norm": 7.616507530212402, | |
| "learning_rate": 8.553305267532758e-05, | |
| "loss": 1.9866, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.3831672203765226, | |
| "grad_norm": 7.430023670196533, | |
| "learning_rate": 8.543161223617724e-05, | |
| "loss": 2.0003, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.3875968992248062, | |
| "grad_norm": 7.171806812286377, | |
| "learning_rate": 8.532987800442292e-05, | |
| "loss": 1.9861, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.3920265780730898, | |
| "grad_norm": 7.678527355194092, | |
| "learning_rate": 8.522785082362674e-05, | |
| "loss": 2.0038, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.3964562569213732, | |
| "grad_norm": 7.096549034118652, | |
| "learning_rate": 8.512553153977987e-05, | |
| "loss": 2.0742, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.4008859357696566, | |
| "grad_norm": 7.600286960601807, | |
| "learning_rate": 8.502292100129553e-05, | |
| "loss": 1.9543, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.4053156146179402, | |
| "grad_norm": 8.969922065734863, | |
| "learning_rate": 8.492002005900202e-05, | |
| "loss": 1.6853, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.4097452934662238, | |
| "grad_norm": 5.5180745124816895, | |
| "learning_rate": 8.481682956613555e-05, | |
| "loss": 1.7093, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.4141749723145072, | |
| "grad_norm": 13.428471565246582, | |
| "learning_rate": 8.471335037833328e-05, | |
| "loss": 2.0241, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.4186046511627908, | |
| "grad_norm": 16.033464431762695, | |
| "learning_rate": 8.460958335362616e-05, | |
| "loss": 1.788, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.4230343300110742, | |
| "grad_norm": 5.455560684204102, | |
| "learning_rate": 8.450552935243186e-05, | |
| "loss": 2.0701, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.4274640088593578, | |
| "grad_norm": 7.2362189292907715, | |
| "learning_rate": 8.440118923754757e-05, | |
| "loss": 1.7665, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.4318936877076411, | |
| "grad_norm": 6.421346664428711, | |
| "learning_rate": 8.429656387414289e-05, | |
| "loss": 1.843, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.4363233665559247, | |
| "grad_norm": 16.439189910888672, | |
| "learning_rate": 8.419165412975265e-05, | |
| "loss": 2.2937, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.4407530454042081, | |
| "grad_norm": 9.863225936889648, | |
| "learning_rate": 8.408646087426974e-05, | |
| "loss": 2.0961, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.4451827242524917, | |
| "grad_norm": 5.041436195373535, | |
| "learning_rate": 8.398098497993786e-05, | |
| "loss": 2.1183, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.449612403100775, | |
| "grad_norm": 8.520334243774414, | |
| "learning_rate": 8.387522732134428e-05, | |
| "loss": 1.7906, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.4540420819490587, | |
| "grad_norm": 6.228928565979004, | |
| "learning_rate": 8.376918877541264e-05, | |
| "loss": 2.1264, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.4584717607973423, | |
| "grad_norm": 8.996322631835938, | |
| "learning_rate": 8.366287022139561e-05, | |
| "loss": 2.1557, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.4629014396456257, | |
| "grad_norm": 5.682254791259766, | |
| "learning_rate": 8.35562725408677e-05, | |
| "loss": 1.9131, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.467331118493909, | |
| "grad_norm": 8.119385719299316, | |
| "learning_rate": 8.344939661771783e-05, | |
| "loss": 2.4303, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.4717607973421927, | |
| "grad_norm": 8.717931747436523, | |
| "learning_rate": 8.334224333814209e-05, | |
| "loss": 1.9083, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.4761904761904763, | |
| "grad_norm": 7.539588928222656, | |
| "learning_rate": 8.323481359063632e-05, | |
| "loss": 1.7957, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.4806201550387597, | |
| "grad_norm": 6.015201568603516, | |
| "learning_rate": 8.312710826598883e-05, | |
| "loss": 1.9132, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.4850498338870433, | |
| "grad_norm": 8.548601150512695, | |
| "learning_rate": 8.301912825727294e-05, | |
| "loss": 1.9532, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.4894795127353266, | |
| "grad_norm": 12.072663307189941, | |
| "learning_rate": 8.29108744598396e-05, | |
| "loss": 2.2549, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.4939091915836102, | |
| "grad_norm": 4.701639175415039, | |
| "learning_rate": 8.280234777131e-05, | |
| "loss": 2.013, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.4983388704318936, | |
| "grad_norm": 6.194016456604004, | |
| "learning_rate": 8.269354909156802e-05, | |
| "loss": 1.677, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.5027685492801772, | |
| "grad_norm": 6.05522346496582, | |
| "learning_rate": 8.258447932275295e-05, | |
| "loss": 1.844, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.5071982281284608, | |
| "grad_norm": 5.3696980476379395, | |
| "learning_rate": 8.24751393692518e-05, | |
| "loss": 1.9202, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.5116279069767442, | |
| "grad_norm": 4.895171165466309, | |
| "learning_rate": 8.236553013769197e-05, | |
| "loss": 1.8623, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.5160575858250276, | |
| "grad_norm": 10.872806549072266, | |
| "learning_rate": 8.225565253693364e-05, | |
| "loss": 1.6916, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.5204872646733112, | |
| "grad_norm": 6.46589994430542, | |
| "learning_rate": 8.214550747806227e-05, | |
| "loss": 2.0726, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.5249169435215948, | |
| "grad_norm": 7.604471683502197, | |
| "learning_rate": 8.203509587438098e-05, | |
| "loss": 2.05, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.5293466223698782, | |
| "grad_norm": 16.37729263305664, | |
| "learning_rate": 8.192441864140313e-05, | |
| "loss": 2.1448, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.5337763012181616, | |
| "grad_norm": 11.779487609863281, | |
| "learning_rate": 8.181347669684457e-05, | |
| "loss": 1.8318, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.5382059800664452, | |
| "grad_norm": 11.811964988708496, | |
| "learning_rate": 8.170227096061608e-05, | |
| "loss": 2.3381, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.5426356589147288, | |
| "grad_norm": 6.4559645652771, | |
| "learning_rate": 8.159080235481579e-05, | |
| "loss": 2.2196, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.5470653377630121, | |
| "grad_norm": 8.946720123291016, | |
| "learning_rate": 8.147907180372148e-05, | |
| "loss": 2.0286, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.5514950166112955, | |
| "grad_norm": 13.929659843444824, | |
| "learning_rate": 8.136708023378292e-05, | |
| "loss": 2.3538, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.5559246954595793, | |
| "grad_norm": 5.848323345184326, | |
| "learning_rate": 8.125482857361425e-05, | |
| "loss": 1.943, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.5603543743078627, | |
| "grad_norm": 5.630266189575195, | |
| "learning_rate": 8.114231775398617e-05, | |
| "loss": 2.2482, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.564784053156146, | |
| "grad_norm": 10.885608673095703, | |
| "learning_rate": 8.102954870781832e-05, | |
| "loss": 1.8583, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.5692137320044297, | |
| "grad_norm": 6.998108863830566, | |
| "learning_rate": 8.091652237017151e-05, | |
| "loss": 1.9425, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.5736434108527133, | |
| "grad_norm": 8.657463073730469, | |
| "learning_rate": 8.080323967823993e-05, | |
| "loss": 1.9427, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.5780730897009967, | |
| "grad_norm": 11.812101364135742, | |
| "learning_rate": 8.068970157134347e-05, | |
| "loss": 2.1204, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.58250276854928, | |
| "grad_norm": 6.619724273681641, | |
| "learning_rate": 8.057590899091984e-05, | |
| "loss": 1.8593, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.5869324473975637, | |
| "grad_norm": 8.670092582702637, | |
| "learning_rate": 8.046186288051681e-05, | |
| "loss": 2.001, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.5913621262458473, | |
| "grad_norm": 8.840471267700195, | |
| "learning_rate": 8.034756418578434e-05, | |
| "loss": 1.852, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.5957918050941307, | |
| "grad_norm": 4.436490535736084, | |
| "learning_rate": 8.023301385446682e-05, | |
| "loss": 2.3419, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.600221483942414, | |
| "grad_norm": 9.356131553649902, | |
| "learning_rate": 8.011821283639516e-05, | |
| "loss": 1.8724, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.6046511627906976, | |
| "grad_norm": 9.546539306640625, | |
| "learning_rate": 8.000316208347891e-05, | |
| "loss": 1.8241, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.6090808416389812, | |
| "grad_norm": 12.444693565368652, | |
| "learning_rate": 7.988786254969835e-05, | |
| "loss": 2.2428, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.6135105204872646, | |
| "grad_norm": 8.764139175415039, | |
| "learning_rate": 7.977231519109665e-05, | |
| "loss": 1.7305, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.6179401993355482, | |
| "grad_norm": 8.302329063415527, | |
| "learning_rate": 7.965652096577188e-05, | |
| "loss": 2.3451, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.6223698781838318, | |
| "grad_norm": 13.382746696472168, | |
| "learning_rate": 7.954048083386909e-05, | |
| "loss": 1.8125, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.6267995570321152, | |
| "grad_norm": 6.948853015899658, | |
| "learning_rate": 7.942419575757235e-05, | |
| "loss": 1.9318, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.6312292358803986, | |
| "grad_norm": 6.62320613861084, | |
| "learning_rate": 7.930766670109673e-05, | |
| "loss": 1.9616, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.6356589147286822, | |
| "grad_norm": 6.08026123046875, | |
| "learning_rate": 7.919089463068039e-05, | |
| "loss": 1.9908, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.6400885935769658, | |
| "grad_norm": 9.736669540405273, | |
| "learning_rate": 7.907388051457647e-05, | |
| "loss": 1.9624, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.6445182724252492, | |
| "grad_norm": 7.714630126953125, | |
| "learning_rate": 7.895662532304516e-05, | |
| "loss": 1.8318, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.6489479512735326, | |
| "grad_norm": 9.872337341308594, | |
| "learning_rate": 7.883913002834555e-05, | |
| "loss": 2.0592, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.6533776301218162, | |
| "grad_norm": 10.687009811401367, | |
| "learning_rate": 7.872139560472767e-05, | |
| "loss": 1.9198, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 1.6578073089700998, | |
| "grad_norm": 7.267500400543213, | |
| "learning_rate": 7.860342302842432e-05, | |
| "loss": 1.5929, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.6622369878183831, | |
| "grad_norm": 9.017212867736816, | |
| "learning_rate": 7.848521327764308e-05, | |
| "loss": 1.6919, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 7.3636860847473145, | |
| "learning_rate": 7.836676733255809e-05, | |
| "loss": 2.0157, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.6710963455149501, | |
| "grad_norm": 10.131976127624512, | |
| "learning_rate": 7.824808617530197e-05, | |
| "loss": 2.0387, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.6755260243632337, | |
| "grad_norm": 10.72778606414795, | |
| "learning_rate": 7.81291707899577e-05, | |
| "loss": 1.9898, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.679955703211517, | |
| "grad_norm": 8.190147399902344, | |
| "learning_rate": 7.801002216255042e-05, | |
| "loss": 1.9384, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 1.6843853820598007, | |
| "grad_norm": 8.989834785461426, | |
| "learning_rate": 7.789064128103929e-05, | |
| "loss": 1.9706, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.6888150609080843, | |
| "grad_norm": 6.380321025848389, | |
| "learning_rate": 7.777102913530928e-05, | |
| "loss": 2.0304, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.6932447397563677, | |
| "grad_norm": 6.1082258224487305, | |
| "learning_rate": 7.76511867171629e-05, | |
| "loss": 1.7976, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.697674418604651, | |
| "grad_norm": 8.573156356811523, | |
| "learning_rate": 7.753111502031214e-05, | |
| "loss": 1.59, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 1.7021040974529347, | |
| "grad_norm": 5.815869331359863, | |
| "learning_rate": 7.741081504037008e-05, | |
| "loss": 1.889, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.7065337763012183, | |
| "grad_norm": 8.750470161437988, | |
| "learning_rate": 7.729028777484267e-05, | |
| "loss": 1.7533, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.7109634551495017, | |
| "grad_norm": 12.4512300491333, | |
| "learning_rate": 7.716953422312045e-05, | |
| "loss": 2.0341, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.715393133997785, | |
| "grad_norm": 7.664438247680664, | |
| "learning_rate": 7.704855538647033e-05, | |
| "loss": 2.2063, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.7198228128460686, | |
| "grad_norm": 7.865803241729736, | |
| "learning_rate": 7.692735226802728e-05, | |
| "loss": 1.8626, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.7242524916943522, | |
| "grad_norm": 9.542865753173828, | |
| "learning_rate": 7.680592587278585e-05, | |
| "loss": 2.1088, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 1.7286821705426356, | |
| "grad_norm": 10.541374206542969, | |
| "learning_rate": 7.668427720759207e-05, | |
| "loss": 1.889, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.733111849390919, | |
| "grad_norm": 7.541441440582275, | |
| "learning_rate": 7.656240728113493e-05, | |
| "loss": 1.9824, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.7375415282392026, | |
| "grad_norm": 15.032777786254883, | |
| "learning_rate": 7.644031710393815e-05, | |
| "loss": 2.2166, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.7419712070874862, | |
| "grad_norm": 5.340550422668457, | |
| "learning_rate": 7.631800768835166e-05, | |
| "loss": 1.6734, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.7464008859357696, | |
| "grad_norm": 4.782181739807129, | |
| "learning_rate": 7.619548004854333e-05, | |
| "loss": 1.9972, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.7508305647840532, | |
| "grad_norm": 7.717755317687988, | |
| "learning_rate": 7.607273520049041e-05, | |
| "loss": 1.8174, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.7552602436323368, | |
| "grad_norm": 4.189438819885254, | |
| "learning_rate": 7.594977416197133e-05, | |
| "loss": 1.8464, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.7596899224806202, | |
| "grad_norm": 18.602073669433594, | |
| "learning_rate": 7.582659795255706e-05, | |
| "loss": 1.5871, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 1.7641196013289036, | |
| "grad_norm": 7.6945481300354, | |
| "learning_rate": 7.570320759360273e-05, | |
| "loss": 1.9943, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.7685492801771872, | |
| "grad_norm": 7.603806495666504, | |
| "learning_rate": 7.557960410823917e-05, | |
| "loss": 2.1524, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.7729789590254708, | |
| "grad_norm": 6.585241794586182, | |
| "learning_rate": 7.545578852136443e-05, | |
| "loss": 1.9966, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.7774086378737541, | |
| "grad_norm": 8.787303924560547, | |
| "learning_rate": 7.533176185963523e-05, | |
| "loss": 1.7846, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 1.7818383167220375, | |
| "grad_norm": 5.8807477951049805, | |
| "learning_rate": 7.520752515145855e-05, | |
| "loss": 1.7961, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.7862679955703211, | |
| "grad_norm": 11.430944442749023, | |
| "learning_rate": 7.508307942698295e-05, | |
| "loss": 1.5721, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 1.7906976744186047, | |
| "grad_norm": 9.947343826293945, | |
| "learning_rate": 7.495842571809021e-05, | |
| "loss": 1.8959, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.795127353266888, | |
| "grad_norm": 8.460419654846191, | |
| "learning_rate": 7.483356505838659e-05, | |
| "loss": 1.9603, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.7995570321151715, | |
| "grad_norm": 11.215185165405273, | |
| "learning_rate": 7.470849848319444e-05, | |
| "loss": 1.6616, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.8039867109634553, | |
| "grad_norm": 6.264514446258545, | |
| "learning_rate": 7.458322702954341e-05, | |
| "loss": 1.8412, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 1.8084163898117387, | |
| "grad_norm": 5.761272430419922, | |
| "learning_rate": 7.44577517361621e-05, | |
| "loss": 1.7221, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.812846068660022, | |
| "grad_norm": 5.823096752166748, | |
| "learning_rate": 7.433207364346919e-05, | |
| "loss": 1.8469, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 1.8172757475083057, | |
| "grad_norm": 7.28630256652832, | |
| "learning_rate": 7.420619379356503e-05, | |
| "loss": 1.8922, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.8217054263565893, | |
| "grad_norm": 5.539880275726318, | |
| "learning_rate": 7.408011323022286e-05, | |
| "loss": 1.9854, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.8261351052048727, | |
| "grad_norm": 6.604762077331543, | |
| "learning_rate": 7.395383299888019e-05, | |
| "loss": 1.927, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.830564784053156, | |
| "grad_norm": 6.813220977783203, | |
| "learning_rate": 7.382735414663016e-05, | |
| "loss": 1.9083, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 1.8349944629014396, | |
| "grad_norm": 4.795647621154785, | |
| "learning_rate": 7.370067772221285e-05, | |
| "loss": 1.9524, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.8394241417497232, | |
| "grad_norm": 7.515512466430664, | |
| "learning_rate": 7.357380477600653e-05, | |
| "loss": 2.0216, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.8438538205980066, | |
| "grad_norm": 10.164600372314453, | |
| "learning_rate": 7.344673636001906e-05, | |
| "loss": 2.2258, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.84828349944629, | |
| "grad_norm": 5.503859996795654, | |
| "learning_rate": 7.331947352787905e-05, | |
| "loss": 1.86, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 1.8527131782945736, | |
| "grad_norm": 6.191671371459961, | |
| "learning_rate": 7.319201733482715e-05, | |
| "loss": 1.9093, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.8571428571428572, | |
| "grad_norm": 7.223259925842285, | |
| "learning_rate": 7.306436883770742e-05, | |
| "loss": 2.1421, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 1.8615725359911406, | |
| "grad_norm": 7.5998992919921875, | |
| "learning_rate": 7.293652909495836e-05, | |
| "loss": 1.9365, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.8660022148394242, | |
| "grad_norm": 7.336147308349609, | |
| "learning_rate": 7.280849916660434e-05, | |
| "loss": 1.7375, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 1.8704318936877078, | |
| "grad_norm": 6.501189231872559, | |
| "learning_rate": 7.268028011424663e-05, | |
| "loss": 1.7906, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.8748615725359912, | |
| "grad_norm": 6.481664180755615, | |
| "learning_rate": 7.255187300105476e-05, | |
| "loss": 1.8908, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 1.8792912513842746, | |
| "grad_norm": 8.071866035461426, | |
| "learning_rate": 7.24232788917576e-05, | |
| "loss": 2.0077, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.8837209302325582, | |
| "grad_norm": 5.397129535675049, | |
| "learning_rate": 7.22944988526345e-05, | |
| "loss": 1.9436, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.8881506090808418, | |
| "grad_norm": 6.841665267944336, | |
| "learning_rate": 7.21655339515066e-05, | |
| "loss": 1.9098, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.8925802879291251, | |
| "grad_norm": 5.7756195068359375, | |
| "learning_rate": 7.203638525772782e-05, | |
| "loss": 1.8002, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 1.8970099667774085, | |
| "grad_norm": 5.458576679229736, | |
| "learning_rate": 7.190705384217608e-05, | |
| "loss": 1.6935, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.9014396456256921, | |
| "grad_norm": 5.86073112487793, | |
| "learning_rate": 7.17775407772444e-05, | |
| "loss": 2.2811, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 1.9058693244739757, | |
| "grad_norm": 6.207028388977051, | |
| "learning_rate": 7.164784713683197e-05, | |
| "loss": 1.7852, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.910299003322259, | |
| "grad_norm": 9.03230094909668, | |
| "learning_rate": 7.151797399633533e-05, | |
| "loss": 2.3004, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 1.9147286821705425, | |
| "grad_norm": 10.938101768493652, | |
| "learning_rate": 7.138792243263936e-05, | |
| "loss": 1.9295, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.919158361018826, | |
| "grad_norm": 4.8152995109558105, | |
| "learning_rate": 7.125769352410845e-05, | |
| "loss": 2.0202, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 1.9235880398671097, | |
| "grad_norm": 7.909144878387451, | |
| "learning_rate": 7.112728835057741e-05, | |
| "loss": 1.7597, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.928017718715393, | |
| "grad_norm": 7.012314319610596, | |
| "learning_rate": 7.099670799334269e-05, | |
| "loss": 2.0388, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.9324473975636767, | |
| "grad_norm": 8.393206596374512, | |
| "learning_rate": 7.08659535351533e-05, | |
| "loss": 1.9736, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.9368770764119603, | |
| "grad_norm": 10.293094635009766, | |
| "learning_rate": 7.073502606020187e-05, | |
| "loss": 2.1422, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 1.9413067552602437, | |
| "grad_norm": 9.445409774780273, | |
| "learning_rate": 7.060392665411564e-05, | |
| "loss": 1.6767, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.945736434108527, | |
| "grad_norm": 7.761300086975098, | |
| "learning_rate": 7.04726564039475e-05, | |
| "loss": 1.7759, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 1.9501661129568106, | |
| "grad_norm": 8.283308982849121, | |
| "learning_rate": 7.03412163981669e-05, | |
| "loss": 1.9943, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.9545957918050942, | |
| "grad_norm": 5.032713413238525, | |
| "learning_rate": 7.020960772665096e-05, | |
| "loss": 2.0963, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 1.9590254706533776, | |
| "grad_norm": 6.809930324554443, | |
| "learning_rate": 7.007783148067523e-05, | |
| "loss": 1.9811, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.963455149501661, | |
| "grad_norm": 8.002426147460938, | |
| "learning_rate": 6.994588875290487e-05, | |
| "loss": 2.2668, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 1.9678848283499446, | |
| "grad_norm": 11.3838529586792, | |
| "learning_rate": 6.981378063738539e-05, | |
| "loss": 2.0145, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.9723145071982282, | |
| "grad_norm": 14.399958610534668, | |
| "learning_rate": 6.968150822953372e-05, | |
| "loss": 2.1096, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.9767441860465116, | |
| "grad_norm": 4.975661277770996, | |
| "learning_rate": 6.954907262612905e-05, | |
| "loss": 1.6861, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.981173864894795, | |
| "grad_norm": 11.665627479553223, | |
| "learning_rate": 6.941647492530377e-05, | |
| "loss": 1.861, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 1.9856035437430788, | |
| "grad_norm": 5.3779706954956055, | |
| "learning_rate": 6.928371622653433e-05, | |
| "loss": 1.7886, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.9900332225913622, | |
| "grad_norm": 6.151978492736816, | |
| "learning_rate": 6.91507976306322e-05, | |
| "loss": 1.8549, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 1.9944629014396456, | |
| "grad_norm": 6.620397567749023, | |
| "learning_rate": 6.901772023973459e-05, | |
| "loss": 1.8999, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.9988925802879292, | |
| "grad_norm": 14.254983901977539, | |
| "learning_rate": 6.888448515729552e-05, | |
| "loss": 2.0419, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 2.0044296788482834, | |
| "grad_norm": 5.509189605712891, | |
| "learning_rate": 6.875109348807649e-05, | |
| "loss": 2.1954, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 2.008859357696567, | |
| "grad_norm": 5.627884864807129, | |
| "learning_rate": 6.861754633813739e-05, | |
| "loss": 1.8798, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 2.0132890365448506, | |
| "grad_norm": 9.019150733947754, | |
| "learning_rate": 6.84838448148274e-05, | |
| "loss": 2.1152, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 2.017718715393134, | |
| "grad_norm": 7.247636318206787, | |
| "learning_rate": 6.834999002677565e-05, | |
| "loss": 1.7317, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 2.0221483942414173, | |
| "grad_norm": 7.318861961364746, | |
| "learning_rate": 6.821598308388216e-05, | |
| "loss": 1.6497, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 2.026578073089701, | |
| "grad_norm": 7.973992347717285, | |
| "learning_rate": 6.808182509730858e-05, | |
| "loss": 2.0685, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 2.0310077519379846, | |
| "grad_norm": 6.912788391113281, | |
| "learning_rate": 6.794751717946896e-05, | |
| "loss": 1.8776, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 2.035437430786268, | |
| "grad_norm": 9.139986038208008, | |
| "learning_rate": 6.781306044402063e-05, | |
| "loss": 1.929, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 2.0398671096345513, | |
| "grad_norm": 10.722004890441895, | |
| "learning_rate": 6.767845600585479e-05, | |
| "loss": 1.9654, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.044296788482835, | |
| "grad_norm": 6.275202751159668, | |
| "learning_rate": 6.754370498108746e-05, | |
| "loss": 1.8465, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 2.0487264673311185, | |
| "grad_norm": 6.970125675201416, | |
| "learning_rate": 6.740880848705004e-05, | |
| "loss": 1.9397, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 2.053156146179402, | |
| "grad_norm": 6.303622245788574, | |
| "learning_rate": 6.727376764228019e-05, | |
| "loss": 1.7633, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 2.0575858250276853, | |
| "grad_norm": 6.273361682891846, | |
| "learning_rate": 6.713858356651253e-05, | |
| "loss": 1.748, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 2.062015503875969, | |
| "grad_norm": 10.793755531311035, | |
| "learning_rate": 6.700325738066922e-05, | |
| "loss": 1.7963, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 2.0664451827242525, | |
| "grad_norm": 6.142402172088623, | |
| "learning_rate": 6.686779020685089e-05, | |
| "loss": 1.7866, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 2.070874861572536, | |
| "grad_norm": 8.846867561340332, | |
| "learning_rate": 6.673218316832714e-05, | |
| "loss": 1.7278, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 2.0753045404208197, | |
| "grad_norm": 6.133031368255615, | |
| "learning_rate": 6.659643738952732e-05, | |
| "loss": 2.0824, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 2.079734219269103, | |
| "grad_norm": 6.360471725463867, | |
| "learning_rate": 6.646055399603122e-05, | |
| "loss": 1.8149, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 2.0841638981173864, | |
| "grad_norm": 10.420990943908691, | |
| "learning_rate": 6.632453411455965e-05, | |
| "loss": 1.7843, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.08859357696567, | |
| "grad_norm": 10.17164134979248, | |
| "learning_rate": 6.618837887296522e-05, | |
| "loss": 1.6479, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 2.0930232558139537, | |
| "grad_norm": 6.589805603027344, | |
| "learning_rate": 6.60520894002229e-05, | |
| "loss": 1.7869, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 2.097452934662237, | |
| "grad_norm": 15.679412841796875, | |
| "learning_rate": 6.591566682642061e-05, | |
| "loss": 1.9296, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 2.1018826135105204, | |
| "grad_norm": 5.550188064575195, | |
| "learning_rate": 6.577911228275003e-05, | |
| "loss": 1.5573, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 2.106312292358804, | |
| "grad_norm": 4.819809913635254, | |
| "learning_rate": 6.564242690149704e-05, | |
| "loss": 1.4723, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 2.1107419712070876, | |
| "grad_norm": 7.867583274841309, | |
| "learning_rate": 6.550561181603244e-05, | |
| "loss": 1.9197, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 2.115171650055371, | |
| "grad_norm": 7.505444526672363, | |
| "learning_rate": 6.536866816080247e-05, | |
| "loss": 1.96, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 2.1196013289036544, | |
| "grad_norm": 9.75400447845459, | |
| "learning_rate": 6.523159707131951e-05, | |
| "loss": 1.6, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 2.124031007751938, | |
| "grad_norm": 4.644130229949951, | |
| "learning_rate": 6.509439968415253e-05, | |
| "loss": 1.512, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 2.1284606866002216, | |
| "grad_norm": 5.214409828186035, | |
| "learning_rate": 6.495707713691778e-05, | |
| "loss": 1.7648, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.132890365448505, | |
| "grad_norm": 10.711379051208496, | |
| "learning_rate": 6.481963056826932e-05, | |
| "loss": 1.7958, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 2.1373200442967883, | |
| "grad_norm": 7.786211013793945, | |
| "learning_rate": 6.468206111788957e-05, | |
| "loss": 2.0541, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 2.141749723145072, | |
| "grad_norm": 9.271305084228516, | |
| "learning_rate": 6.454436992647984e-05, | |
| "loss": 1.9129, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 2.1461794019933556, | |
| "grad_norm": 9.571303367614746, | |
| "learning_rate": 6.440655813575093e-05, | |
| "loss": 1.6192, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 2.150609080841639, | |
| "grad_norm": 6.501114368438721, | |
| "learning_rate": 6.426862688841359e-05, | |
| "loss": 1.5886, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 2.1550387596899223, | |
| "grad_norm": 9.78244400024414, | |
| "learning_rate": 6.41305773281691e-05, | |
| "loss": 1.7534, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 2.159468438538206, | |
| "grad_norm": 6.028672218322754, | |
| "learning_rate": 6.399241059969978e-05, | |
| "loss": 1.7561, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 2.1638981173864895, | |
| "grad_norm": 6.525522708892822, | |
| "learning_rate": 6.385412784865948e-05, | |
| "loss": 1.9586, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 2.168327796234773, | |
| "grad_norm": 13.112713813781738, | |
| "learning_rate": 6.371573022166408e-05, | |
| "loss": 1.8899, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 2.1727574750830563, | |
| "grad_norm": 5.4037394523620605, | |
| "learning_rate": 6.357721886628201e-05, | |
| "loss": 1.7339, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.17718715393134, | |
| "grad_norm": 7.634363651275635, | |
| "learning_rate": 6.34385949310247e-05, | |
| "loss": 1.6131, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 2.1816168327796235, | |
| "grad_norm": 6.406160354614258, | |
| "learning_rate": 6.329985956533707e-05, | |
| "loss": 1.8587, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 2.186046511627907, | |
| "grad_norm": 6.028163909912109, | |
| "learning_rate": 6.3161013919588e-05, | |
| "loss": 1.6508, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 2.1904761904761907, | |
| "grad_norm": 7.243360996246338, | |
| "learning_rate": 6.302205914506083e-05, | |
| "loss": 1.8309, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 2.194905869324474, | |
| "grad_norm": 11.702513694763184, | |
| "learning_rate": 6.288299639394371e-05, | |
| "loss": 1.5447, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 2.1993355481727574, | |
| "grad_norm": 6.540017604827881, | |
| "learning_rate": 6.274382681932018e-05, | |
| "loss": 1.9629, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 2.203765227021041, | |
| "grad_norm": 7.083324432373047, | |
| "learning_rate": 6.260455157515947e-05, | |
| "loss": 1.8167, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 2.2081949058693247, | |
| "grad_norm": 4.912618160247803, | |
| "learning_rate": 6.24651718163071e-05, | |
| "loss": 1.88, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 2.212624584717608, | |
| "grad_norm": 7.98961067199707, | |
| "learning_rate": 6.23256886984751e-05, | |
| "loss": 1.9315, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 2.2170542635658914, | |
| "grad_norm": 8.828598976135254, | |
| "learning_rate": 6.218610337823261e-05, | |
| "loss": 2.0435, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.221483942414175, | |
| "grad_norm": 4.62863826751709, | |
| "learning_rate": 6.204641701299619e-05, | |
| "loss": 1.92, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 2.2259136212624586, | |
| "grad_norm": 6.152403354644775, | |
| "learning_rate": 6.190663076102025e-05, | |
| "loss": 1.8263, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 2.230343300110742, | |
| "grad_norm": 4.635473728179932, | |
| "learning_rate": 6.17667457813874e-05, | |
| "loss": 1.8494, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 2.2347729789590254, | |
| "grad_norm": 5.960703372955322, | |
| "learning_rate": 6.162676323399897e-05, | |
| "loss": 2.1242, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 2.2392026578073088, | |
| "grad_norm": 7.865222454071045, | |
| "learning_rate": 6.148668427956524e-05, | |
| "loss": 1.9256, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 2.2436323366555926, | |
| "grad_norm": 14.765120506286621, | |
| "learning_rate": 6.134651007959586e-05, | |
| "loss": 2.0467, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 2.248062015503876, | |
| "grad_norm": 9.482008934020996, | |
| "learning_rate": 6.120624179639032e-05, | |
| "loss": 1.7785, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 2.2524916943521593, | |
| "grad_norm": 5.620559215545654, | |
| "learning_rate": 6.106588059302818e-05, | |
| "loss": 1.9651, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 2.256921373200443, | |
| "grad_norm": 5.738358497619629, | |
| "learning_rate": 6.0925427633359466e-05, | |
| "loss": 1.5501, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 2.2613510520487266, | |
| "grad_norm": 8.628968238830566, | |
| "learning_rate": 6.0784884081995064e-05, | |
| "loss": 2.002, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.26578073089701, | |
| "grad_norm": 8.108263969421387, | |
| "learning_rate": 6.064425110429699e-05, | |
| "loss": 2.0662, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 2.2702104097452933, | |
| "grad_norm": 10.603029251098633, | |
| "learning_rate": 6.0503529866368826e-05, | |
| "loss": 1.6147, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 2.274640088593577, | |
| "grad_norm": 9.906684875488281, | |
| "learning_rate": 6.036272153504592e-05, | |
| "loss": 1.7281, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 2.2790697674418605, | |
| "grad_norm": 10.202539443969727, | |
| "learning_rate": 6.0221827277885856e-05, | |
| "loss": 1.7101, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 2.283499446290144, | |
| "grad_norm": 9.090194702148438, | |
| "learning_rate": 6.008084826315863e-05, | |
| "loss": 1.997, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 2.2879291251384273, | |
| "grad_norm": 9.38204574584961, | |
| "learning_rate": 5.993978565983709e-05, | |
| "loss": 1.7423, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 2.292358803986711, | |
| "grad_norm": 10.475700378417969, | |
| "learning_rate": 5.979864063758717e-05, | |
| "loss": 1.6178, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 2.2967884828349945, | |
| "grad_norm": 11.60288143157959, | |
| "learning_rate": 5.965741436675816e-05, | |
| "loss": 1.6708, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 2.301218161683278, | |
| "grad_norm": 4.961325168609619, | |
| "learning_rate": 5.951610801837314e-05, | |
| "loss": 1.585, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 2.3056478405315612, | |
| "grad_norm": 7.948910236358643, | |
| "learning_rate": 5.937472276411909e-05, | |
| "loss": 1.9334, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.310077519379845, | |
| "grad_norm": 12.114509582519531, | |
| "learning_rate": 5.9233259776337316e-05, | |
| "loss": 1.6873, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 2.3145071982281284, | |
| "grad_norm": 5.8998494148254395, | |
| "learning_rate": 5.9091720228013636e-05, | |
| "loss": 1.9225, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 2.318936877076412, | |
| "grad_norm": 5.098598480224609, | |
| "learning_rate": 5.8950105292768756e-05, | |
| "loss": 1.6197, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 2.3233665559246957, | |
| "grad_norm": 5.198586463928223, | |
| "learning_rate": 5.880841614484841e-05, | |
| "loss": 1.6607, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 2.327796234772979, | |
| "grad_norm": 10.038392066955566, | |
| "learning_rate": 5.8666653959113746e-05, | |
| "loss": 1.9545, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 2.3322259136212624, | |
| "grad_norm": 3.849628448486328, | |
| "learning_rate": 5.8524819911031495e-05, | |
| "loss": 1.6751, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 2.336655592469546, | |
| "grad_norm": 11.999637603759766, | |
| "learning_rate": 5.838291517666427e-05, | |
| "loss": 2.0971, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 2.3410852713178296, | |
| "grad_norm": 6.144494533538818, | |
| "learning_rate": 5.824094093266077e-05, | |
| "loss": 1.6829, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 2.345514950166113, | |
| "grad_norm": 7.545333385467529, | |
| "learning_rate": 5.8098898356246115e-05, | |
| "loss": 1.7714, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 2.3499446290143964, | |
| "grad_norm": 8.580190658569336, | |
| "learning_rate": 5.795678862521197e-05, | |
| "loss": 1.9958, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.35437430786268, | |
| "grad_norm": 7.868439197540283, | |
| "learning_rate": 5.781461291790688e-05, | |
| "loss": 2.0649, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 2.3588039867109636, | |
| "grad_norm": 4.296184539794922, | |
| "learning_rate": 5.76723724132264e-05, | |
| "loss": 1.7261, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 2.363233665559247, | |
| "grad_norm": 7.093977928161621, | |
| "learning_rate": 5.753006829060344e-05, | |
| "loss": 2.0423, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 2.3676633444075303, | |
| "grad_norm": 8.066122055053711, | |
| "learning_rate": 5.7387701729998346e-05, | |
| "loss": 1.8569, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 2.3720930232558137, | |
| "grad_norm": 9.265979766845703, | |
| "learning_rate": 5.7245273911889276e-05, | |
| "loss": 1.7729, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 2.3765227021040976, | |
| "grad_norm": 9.756134033203125, | |
| "learning_rate": 5.710278601726222e-05, | |
| "loss": 1.9864, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 2.380952380952381, | |
| "grad_norm": 9.198343276977539, | |
| "learning_rate": 5.696023922760141e-05, | |
| "loss": 1.7085, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 2.3853820598006643, | |
| "grad_norm": 7.285330295562744, | |
| "learning_rate": 5.6817634724879333e-05, | |
| "loss": 2.2659, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 2.389811738648948, | |
| "grad_norm": 11.868597030639648, | |
| "learning_rate": 5.6674973691547115e-05, | |
| "loss": 1.5843, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 2.3942414174972315, | |
| "grad_norm": 5.195461750030518, | |
| "learning_rate": 5.653225731052456e-05, | |
| "loss": 1.4188, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.398671096345515, | |
| "grad_norm": 6.345346450805664, | |
| "learning_rate": 5.638948676519042e-05, | |
| "loss": 2.1584, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 2.4031007751937983, | |
| "grad_norm": 10.57470703125, | |
| "learning_rate": 5.6246663239372565e-05, | |
| "loss": 1.6626, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 2.407530454042082, | |
| "grad_norm": 11.943028450012207, | |
| "learning_rate": 5.6103787917338214e-05, | |
| "loss": 1.959, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 2.4119601328903655, | |
| "grad_norm": 12.070340156555176, | |
| "learning_rate": 5.5960861983783985e-05, | |
| "loss": 2.0654, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 2.416389811738649, | |
| "grad_norm": 5.627765655517578, | |
| "learning_rate": 5.581788662382624e-05, | |
| "loss": 2.0118, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 2.4208194905869327, | |
| "grad_norm": 10.758605003356934, | |
| "learning_rate": 5.567486302299112e-05, | |
| "loss": 1.7541, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 2.425249169435216, | |
| "grad_norm": 6.83001708984375, | |
| "learning_rate": 5.5531792367204816e-05, | |
| "loss": 2.1466, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 2.4296788482834994, | |
| "grad_norm": 5.710721492767334, | |
| "learning_rate": 5.5388675842783646e-05, | |
| "loss": 1.6117, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 2.434108527131783, | |
| "grad_norm": 6.419375896453857, | |
| "learning_rate": 5.5245514636424286e-05, | |
| "loss": 2.098, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 2.438538205980066, | |
| "grad_norm": 6.893190860748291, | |
| "learning_rate": 5.5102309935193906e-05, | |
| "loss": 1.5716, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.44296788482835, | |
| "grad_norm": 10.09901237487793, | |
| "learning_rate": 5.4959062926520345e-05, | |
| "loss": 1.7729, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 2.4473975636766334, | |
| "grad_norm": 10.772427558898926, | |
| "learning_rate": 5.481577479818219e-05, | |
| "loss": 2.0269, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 2.451827242524917, | |
| "grad_norm": 10.259986877441406, | |
| "learning_rate": 5.467244673829908e-05, | |
| "loss": 2.1173, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 2.4562569213732006, | |
| "grad_norm": 4.576322078704834, | |
| "learning_rate": 5.452907993532164e-05, | |
| "loss": 1.9707, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 2.460686600221484, | |
| "grad_norm": 6.365279674530029, | |
| "learning_rate": 5.438567557802186e-05, | |
| "loss": 1.8728, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 2.4651162790697674, | |
| "grad_norm": 14.015559196472168, | |
| "learning_rate": 5.424223485548303e-05, | |
| "loss": 1.8777, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 2.4695459579180508, | |
| "grad_norm": 7.423595428466797, | |
| "learning_rate": 5.4098758957090055e-05, | |
| "loss": 1.7334, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 2.4739756367663346, | |
| "grad_norm": 6.120731830596924, | |
| "learning_rate": 5.395524907251944e-05, | |
| "loss": 1.8194, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 2.478405315614618, | |
| "grad_norm": 5.712774276733398, | |
| "learning_rate": 5.3811706391729545e-05, | |
| "loss": 1.8025, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 2.4828349944629013, | |
| "grad_norm": 5.074977397918701, | |
| "learning_rate": 5.366813210495067e-05, | |
| "loss": 1.6992, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.487264673311185, | |
| "grad_norm": 5.497018814086914, | |
| "learning_rate": 5.352452740267515e-05, | |
| "loss": 1.843, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 2.4916943521594686, | |
| "grad_norm": 6.377771854400635, | |
| "learning_rate": 5.338089347564757e-05, | |
| "loss": 1.7029, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 2.496124031007752, | |
| "grad_norm": 5.825841426849365, | |
| "learning_rate": 5.323723151485477e-05, | |
| "loss": 2.1683, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 2.5005537098560353, | |
| "grad_norm": 9.456023216247559, | |
| "learning_rate": 5.309354271151613e-05, | |
| "loss": 1.7453, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 2.5049833887043187, | |
| "grad_norm": 9.619424819946289, | |
| "learning_rate": 5.294982825707352e-05, | |
| "loss": 1.6813, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 2.5094130675526025, | |
| "grad_norm": 9.673927307128906, | |
| "learning_rate": 5.2806089343181564e-05, | |
| "loss": 1.5608, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 2.513842746400886, | |
| "grad_norm": 8.455759048461914, | |
| "learning_rate": 5.266232716169769e-05, | |
| "loss": 2.1658, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 2.5182724252491693, | |
| "grad_norm": 6.917920112609863, | |
| "learning_rate": 5.251854290467221e-05, | |
| "loss": 1.6381, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 2.522702104097453, | |
| "grad_norm": 17.744230270385742, | |
| "learning_rate": 5.237473776433853e-05, | |
| "loss": 2.1334, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 2.5271317829457365, | |
| "grad_norm": 8.317569732666016, | |
| "learning_rate": 5.223091293310324e-05, | |
| "loss": 1.6458, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.53156146179402, | |
| "grad_norm": 5.05155611038208, | |
| "learning_rate": 5.208706960353611e-05, | |
| "loss": 1.8317, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 2.5359911406423032, | |
| "grad_norm": 9.146858215332031, | |
| "learning_rate": 5.194320896836039e-05, | |
| "loss": 1.7776, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 2.540420819490587, | |
| "grad_norm": 8.478264808654785, | |
| "learning_rate": 5.1799332220442776e-05, | |
| "loss": 1.6552, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 2.5448504983388704, | |
| "grad_norm": 11.424778938293457, | |
| "learning_rate": 5.1655440552783584e-05, | |
| "loss": 1.6622, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 2.549280177187154, | |
| "grad_norm": 5.402507305145264, | |
| "learning_rate": 5.151153515850682e-05, | |
| "loss": 2.3321, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 2.5537098560354377, | |
| "grad_norm": 11.693721771240234, | |
| "learning_rate": 5.136761723085035e-05, | |
| "loss": 1.5945, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 2.558139534883721, | |
| "grad_norm": 7.056944847106934, | |
| "learning_rate": 5.1223687963155906e-05, | |
| "loss": 2.2514, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 2.5625692137320044, | |
| "grad_norm": 5.904301643371582, | |
| "learning_rate": 5.107974854885933e-05, | |
| "loss": 1.5878, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 2.566998892580288, | |
| "grad_norm": 10.586432456970215, | |
| "learning_rate": 5.093580018148052e-05, | |
| "loss": 1.5959, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 2.571428571428571, | |
| "grad_norm": 8.07880973815918, | |
| "learning_rate": 5.0791844054613646e-05, | |
| "loss": 1.874, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.575858250276855, | |
| "grad_norm": 8.128579139709473, | |
| "learning_rate": 5.0647881361917224e-05, | |
| "loss": 1.8369, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 2.5802879291251384, | |
| "grad_norm": 7.19783878326416, | |
| "learning_rate": 5.05039132971042e-05, | |
| "loss": 1.3947, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 2.584717607973422, | |
| "grad_norm": 13.905142784118652, | |
| "learning_rate": 5.0359941053932056e-05, | |
| "loss": 2.0522, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 2.5891472868217056, | |
| "grad_norm": 10.910123825073242, | |
| "learning_rate": 5.0215965826192954e-05, | |
| "loss": 2.1368, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 2.593576965669989, | |
| "grad_norm": 4.5783867835998535, | |
| "learning_rate": 5.0071988807703776e-05, | |
| "loss": 1.7897, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 2.5980066445182723, | |
| "grad_norm": 8.571553230285645, | |
| "learning_rate": 4.9928011192296236e-05, | |
| "loss": 2.1332, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 2.6024363233665557, | |
| "grad_norm": 8.85322380065918, | |
| "learning_rate": 4.978403417380706e-05, | |
| "loss": 1.4058, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 2.6068660022148396, | |
| "grad_norm": 8.1622896194458, | |
| "learning_rate": 4.964005894606795e-05, | |
| "loss": 1.6939, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 2.611295681063123, | |
| "grad_norm": 9.46420669555664, | |
| "learning_rate": 4.949608670289582e-05, | |
| "loss": 2.0323, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 2.6157253599114063, | |
| "grad_norm": 6.406886577606201, | |
| "learning_rate": 4.935211863808279e-05, | |
| "loss": 1.9075, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.62015503875969, | |
| "grad_norm": 5.835373878479004, | |
| "learning_rate": 4.9208155945386345e-05, | |
| "loss": 2.0062, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 2.6245847176079735, | |
| "grad_norm": 6.667347431182861, | |
| "learning_rate": 4.9064199818519495e-05, | |
| "loss": 1.8224, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 2.629014396456257, | |
| "grad_norm": 5.613856315612793, | |
| "learning_rate": 4.892025145114067e-05, | |
| "loss": 1.5333, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 2.6334440753045403, | |
| "grad_norm": 4.702713489532471, | |
| "learning_rate": 4.87763120368441e-05, | |
| "loss": 1.4557, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 2.6378737541528237, | |
| "grad_norm": 8.00310230255127, | |
| "learning_rate": 4.863238276914966e-05, | |
| "loss": 1.6452, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 2.6423034330011075, | |
| "grad_norm": 15.650694847106934, | |
| "learning_rate": 4.84884648414932e-05, | |
| "loss": 1.8716, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 2.646733111849391, | |
| "grad_norm": 9.945942878723145, | |
| "learning_rate": 4.834455944721643e-05, | |
| "loss": 1.9081, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 2.6511627906976747, | |
| "grad_norm": 11.016633987426758, | |
| "learning_rate": 4.820066777955724e-05, | |
| "loss": 1.7934, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 2.655592469545958, | |
| "grad_norm": 7.862950325012207, | |
| "learning_rate": 4.8056791031639614e-05, | |
| "loss": 1.9754, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 2.6600221483942414, | |
| "grad_norm": 5.507509708404541, | |
| "learning_rate": 4.791293039646389e-05, | |
| "loss": 1.9389, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.664451827242525, | |
| "grad_norm": 3.8648977279663086, | |
| "learning_rate": 4.776908706689678e-05, | |
| "loss": 1.4915, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 2.668881506090808, | |
| "grad_norm": 6.0287699699401855, | |
| "learning_rate": 4.762526223566147e-05, | |
| "loss": 1.8027, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 2.673311184939092, | |
| "grad_norm": 7.0374226570129395, | |
| "learning_rate": 4.7481457095327804e-05, | |
| "loss": 1.7526, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 2.6777408637873754, | |
| "grad_norm": 6.987399578094482, | |
| "learning_rate": 4.733767283830233e-05, | |
| "loss": 1.9017, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 2.682170542635659, | |
| "grad_norm": 10.102921485900879, | |
| "learning_rate": 4.719391065681845e-05, | |
| "loss": 1.7473, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 2.6866002214839426, | |
| "grad_norm": 5.184350967407227, | |
| "learning_rate": 4.705017174292649e-05, | |
| "loss": 1.5428, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 2.691029900332226, | |
| "grad_norm": 6.327332973480225, | |
| "learning_rate": 4.6906457288483895e-05, | |
| "loss": 2.0073, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 2.6954595791805094, | |
| "grad_norm": 4.9231672286987305, | |
| "learning_rate": 4.676276848514524e-05, | |
| "loss": 1.5135, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 2.6998892580287928, | |
| "grad_norm": 4.648147106170654, | |
| "learning_rate": 4.6619106524352445e-05, | |
| "loss": 1.7111, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 2.704318936877076, | |
| "grad_norm": 12.321041107177734, | |
| "learning_rate": 4.647547259732486e-05, | |
| "loss": 1.6586, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.70874861572536, | |
| "grad_norm": 10.489548683166504, | |
| "learning_rate": 4.633186789504933e-05, | |
| "loss": 1.9735, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 2.7131782945736433, | |
| "grad_norm": 7.285097599029541, | |
| "learning_rate": 4.618829360827046e-05, | |
| "loss": 1.9382, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 2.717607973421927, | |
| "grad_norm": 5.02518892288208, | |
| "learning_rate": 4.604475092748057e-05, | |
| "loss": 1.6625, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 2.7220376522702106, | |
| "grad_norm": 6.852099895477295, | |
| "learning_rate": 4.590124104290997e-05, | |
| "loss": 2.0571, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 2.726467331118494, | |
| "grad_norm": 15.236705780029297, | |
| "learning_rate": 4.575776514451698e-05, | |
| "loss": 1.7972, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 2.7308970099667773, | |
| "grad_norm": 7.635733127593994, | |
| "learning_rate": 4.561432442197817e-05, | |
| "loss": 1.7937, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 2.7353266888150607, | |
| "grad_norm": 7.20578670501709, | |
| "learning_rate": 4.547092006467837e-05, | |
| "loss": 1.8999, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 2.7397563676633445, | |
| "grad_norm": 6.789028644561768, | |
| "learning_rate": 4.532755326170093e-05, | |
| "loss": 1.816, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 2.744186046511628, | |
| "grad_norm": 5.359363555908203, | |
| "learning_rate": 4.518422520181781e-05, | |
| "loss": 1.9299, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 2.7486157253599113, | |
| "grad_norm": 14.938844680786133, | |
| "learning_rate": 4.504093707347966e-05, | |
| "loss": 1.7996, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.753045404208195, | |
| "grad_norm": 11.183375358581543, | |
| "learning_rate": 4.4897690064806106e-05, | |
| "loss": 1.7149, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 2.7574750830564785, | |
| "grad_norm": 6.135425567626953, | |
| "learning_rate": 4.475448536357572e-05, | |
| "loss": 1.8004, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 2.761904761904762, | |
| "grad_norm": 4.227088451385498, | |
| "learning_rate": 4.461132415721637e-05, | |
| "loss": 2.0701, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 2.7663344407530452, | |
| "grad_norm": 7.318582057952881, | |
| "learning_rate": 4.44682076327952e-05, | |
| "loss": 1.8482, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 2.770764119601329, | |
| "grad_norm": 5.547973155975342, | |
| "learning_rate": 4.432513697700889e-05, | |
| "loss": 2.215, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 2.7751937984496124, | |
| "grad_norm": 10.597505569458008, | |
| "learning_rate": 4.418211337617377e-05, | |
| "loss": 1.8472, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 2.779623477297896, | |
| "grad_norm": 7.636361598968506, | |
| "learning_rate": 4.403913801621602e-05, | |
| "loss": 2.1119, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 2.7840531561461797, | |
| "grad_norm": 5.91168737411499, | |
| "learning_rate": 4.3896212082661804e-05, | |
| "loss": 1.3616, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 2.788482834994463, | |
| "grad_norm": 6.863846778869629, | |
| "learning_rate": 4.375333676062743e-05, | |
| "loss": 1.5778, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 2.7929125138427464, | |
| "grad_norm": 5.835073471069336, | |
| "learning_rate": 4.3610513234809596e-05, | |
| "loss": 1.6205, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.79734219269103, | |
| "grad_norm": 7.397892475128174, | |
| "learning_rate": 4.346774268947545e-05, | |
| "loss": 1.8186, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 2.801771871539313, | |
| "grad_norm": 5.960362434387207, | |
| "learning_rate": 4.3325026308452896e-05, | |
| "loss": 1.5533, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 2.806201550387597, | |
| "grad_norm": 7.241276264190674, | |
| "learning_rate": 4.318236527512067e-05, | |
| "loss": 1.8679, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 2.8106312292358804, | |
| "grad_norm": 5.476749420166016, | |
| "learning_rate": 4.3039760772398616e-05, | |
| "loss": 1.7967, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 2.8150609080841638, | |
| "grad_norm": 7.25029993057251, | |
| "learning_rate": 4.289721398273779e-05, | |
| "loss": 1.8366, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 2.8194905869324476, | |
| "grad_norm": 6.801474571228027, | |
| "learning_rate": 4.275472608811073e-05, | |
| "loss": 1.9466, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 2.823920265780731, | |
| "grad_norm": 6.29557991027832, | |
| "learning_rate": 4.261229827000166e-05, | |
| "loss": 1.7434, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 2.8283499446290143, | |
| "grad_norm": 5.536020755767822, | |
| "learning_rate": 4.246993170939657e-05, | |
| "loss": 1.793, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 2.8327796234772977, | |
| "grad_norm": 8.520576477050781, | |
| "learning_rate": 4.232762758677362e-05, | |
| "loss": 1.6861, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 2.8372093023255816, | |
| "grad_norm": 7.413863182067871, | |
| "learning_rate": 4.2185387082093134e-05, | |
| "loss": 1.9494, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.841638981173865, | |
| "grad_norm": 5.839072227478027, | |
| "learning_rate": 4.204321137478806e-05, | |
| "loss": 1.7803, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 2.8460686600221483, | |
| "grad_norm": 4.603065490722656, | |
| "learning_rate": 4.1901101643753904e-05, | |
| "loss": 2.0012, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 2.850498338870432, | |
| "grad_norm": 7.950796127319336, | |
| "learning_rate": 4.1759059067339254e-05, | |
| "loss": 1.7754, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 2.8549280177187155, | |
| "grad_norm": 4.7786173820495605, | |
| "learning_rate": 4.161708482333575e-05, | |
| "loss": 1.8431, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 2.859357696566999, | |
| "grad_norm": 8.72305679321289, | |
| "learning_rate": 4.147518008896851e-05, | |
| "loss": 1.3848, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 2.8637873754152823, | |
| "grad_norm": 8.073031425476074, | |
| "learning_rate": 4.133334604088626e-05, | |
| "loss": 1.8088, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 2.8682170542635657, | |
| "grad_norm": 7.937218189239502, | |
| "learning_rate": 4.119158385515159e-05, | |
| "loss": 1.5952, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 2.8726467331118495, | |
| "grad_norm": 4.457007884979248, | |
| "learning_rate": 4.104989470723126e-05, | |
| "loss": 1.4987, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 2.877076411960133, | |
| "grad_norm": 8.611004829406738, | |
| "learning_rate": 4.0908279771986376e-05, | |
| "loss": 1.7993, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 2.8815060908084162, | |
| "grad_norm": 7.399827480316162, | |
| "learning_rate": 4.0766740223662716e-05, | |
| "loss": 1.6888, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.8859357696567, | |
| "grad_norm": 6.210876941680908, | |
| "learning_rate": 4.062527723588093e-05, | |
| "loss": 1.8918, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 2.8903654485049834, | |
| "grad_norm": 8.73986530303955, | |
| "learning_rate": 4.0483891981626885e-05, | |
| "loss": 1.9802, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 2.894795127353267, | |
| "grad_norm": 7.386151313781738, | |
| "learning_rate": 4.0342585633241846e-05, | |
| "loss": 1.791, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 2.89922480620155, | |
| "grad_norm": 6.179415225982666, | |
| "learning_rate": 4.020135936241285e-05, | |
| "loss": 1.862, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 2.903654485049834, | |
| "grad_norm": 5.072649002075195, | |
| "learning_rate": 4.006021434016293e-05, | |
| "loss": 1.7385, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 2.9080841638981174, | |
| "grad_norm": 7.220780849456787, | |
| "learning_rate": 3.991915173684138e-05, | |
| "loss": 1.9256, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 2.912513842746401, | |
| "grad_norm": 7.535258769989014, | |
| "learning_rate": 3.977817272211417e-05, | |
| "loss": 1.4881, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 2.9169435215946846, | |
| "grad_norm": 8.532382011413574, | |
| "learning_rate": 3.963727846495409e-05, | |
| "loss": 1.8854, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 2.921373200442968, | |
| "grad_norm": 4.557820796966553, | |
| "learning_rate": 3.9496470133631206e-05, | |
| "loss": 1.7547, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 2.9258028792912514, | |
| "grad_norm": 7.472973346710205, | |
| "learning_rate": 3.935574889570302e-05, | |
| "loss": 1.594, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.9302325581395348, | |
| "grad_norm": 11.085735321044922, | |
| "learning_rate": 3.921511591800494e-05, | |
| "loss": 1.7801, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 2.934662236987818, | |
| "grad_norm": 10.502659797668457, | |
| "learning_rate": 3.907457236664055e-05, | |
| "loss": 1.8752, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 2.939091915836102, | |
| "grad_norm": 6.8742523193359375, | |
| "learning_rate": 3.893411940697182e-05, | |
| "loss": 1.9468, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 2.9435215946843853, | |
| "grad_norm": 6.568576812744141, | |
| "learning_rate": 3.8793758203609685e-05, | |
| "loss": 1.9107, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 2.9479512735326687, | |
| "grad_norm": 7.630648612976074, | |
| "learning_rate": 3.8653489920404136e-05, | |
| "loss": 1.8425, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 2.9523809523809526, | |
| "grad_norm": 5.386789798736572, | |
| "learning_rate": 3.851331572043478e-05, | |
| "loss": 1.6793, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 2.956810631229236, | |
| "grad_norm": 7.836945056915283, | |
| "learning_rate": 3.837323676600104e-05, | |
| "loss": 1.9951, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 2.9612403100775193, | |
| "grad_norm": 6.634953498840332, | |
| "learning_rate": 3.82332542186126e-05, | |
| "loss": 1.6223, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 2.9656699889258027, | |
| "grad_norm": 4.823019504547119, | |
| "learning_rate": 3.809336923897977e-05, | |
| "loss": 1.8021, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 2.9700996677740865, | |
| "grad_norm": 5.209251880645752, | |
| "learning_rate": 3.7953582987003806e-05, | |
| "loss": 1.9003, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.97452934662237, | |
| "grad_norm": 6.102602481842041, | |
| "learning_rate": 3.7813896621767384e-05, | |
| "loss": 1.8635, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 2.9789590254706533, | |
| "grad_norm": 5.569439888000488, | |
| "learning_rate": 3.76743113015249e-05, | |
| "loss": 2.2471, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 2.983388704318937, | |
| "grad_norm": 7.8752827644348145, | |
| "learning_rate": 3.753482818369291e-05, | |
| "loss": 1.6478, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 2.9878183831672205, | |
| "grad_norm": 10.240179061889648, | |
| "learning_rate": 3.739544842484053e-05, | |
| "loss": 1.8428, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 2.992248062015504, | |
| "grad_norm": 6.250666618347168, | |
| "learning_rate": 3.725617318067984e-05, | |
| "loss": 1.5094, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 2.9966777408637872, | |
| "grad_norm": 5.396643161773682, | |
| "learning_rate": 3.71170036060563e-05, | |
| "loss": 1.8349, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 3.0022148394241417, | |
| "grad_norm": 6.2632269859313965, | |
| "learning_rate": 3.697794085493919e-05, | |
| "loss": 2.0916, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 3.006644518272425, | |
| "grad_norm": 4.901583194732666, | |
| "learning_rate": 3.683898608041201e-05, | |
| "loss": 1.4515, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 3.011074197120709, | |
| "grad_norm": 8.754711151123047, | |
| "learning_rate": 3.670014043466293e-05, | |
| "loss": 1.7908, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 3.0155038759689923, | |
| "grad_norm": 7.272785186767578, | |
| "learning_rate": 3.656140506897532e-05, | |
| "loss": 1.5918, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 3.0199335548172757, | |
| "grad_norm": 5.400629997253418, | |
| "learning_rate": 3.642278113371799e-05, | |
| "loss": 1.8925, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 3.024363233665559, | |
| "grad_norm": 5.312657833099365, | |
| "learning_rate": 3.6284269778335935e-05, | |
| "loss": 1.4915, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 3.028792912513843, | |
| "grad_norm": 11.454095840454102, | |
| "learning_rate": 3.614587215134052e-05, | |
| "loss": 1.6244, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 3.0332225913621262, | |
| "grad_norm": 9.896474838256836, | |
| "learning_rate": 3.600758940030024e-05, | |
| "loss": 1.7049, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 3.0376522702104096, | |
| "grad_norm": 9.982049942016602, | |
| "learning_rate": 3.586942267183091e-05, | |
| "loss": 1.4014, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 3.0420819490586934, | |
| "grad_norm": 4.595606327056885, | |
| "learning_rate": 3.573137311158644e-05, | |
| "loss": 1.7367, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 3.046511627906977, | |
| "grad_norm": 7.622084617614746, | |
| "learning_rate": 3.559344186424908e-05, | |
| "loss": 1.6358, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 3.05094130675526, | |
| "grad_norm": 4.9482645988464355, | |
| "learning_rate": 3.545563007352016e-05, | |
| "loss": 1.3935, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 3.0553709856035436, | |
| "grad_norm": 6.924054145812988, | |
| "learning_rate": 3.531793888211044e-05, | |
| "loss": 1.522, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 3.0598006644518274, | |
| "grad_norm": 6.1782636642456055, | |
| "learning_rate": 3.518036943173069e-05, | |
| "loss": 1.6191, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 3.064230343300111, | |
| "grad_norm": 5.369998455047607, | |
| "learning_rate": 3.504292286308224e-05, | |
| "loss": 1.7259, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 3.068660022148394, | |
| "grad_norm": 6.008235454559326, | |
| "learning_rate": 3.490560031584749e-05, | |
| "loss": 1.7566, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 3.0730897009966776, | |
| "grad_norm": 5.590559959411621, | |
| "learning_rate": 3.476840292868052e-05, | |
| "loss": 1.5846, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 3.0775193798449614, | |
| "grad_norm": 5.604452133178711, | |
| "learning_rate": 3.463133183919754e-05, | |
| "loss": 1.7103, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 3.0819490586932448, | |
| "grad_norm": 6.367408752441406, | |
| "learning_rate": 3.449438818396759e-05, | |
| "loss": 1.8149, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 3.086378737541528, | |
| "grad_norm": 7.243832588195801, | |
| "learning_rate": 3.435757309850297e-05, | |
| "loss": 1.4385, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 3.0908084163898115, | |
| "grad_norm": 11.65194034576416, | |
| "learning_rate": 3.4220887717249984e-05, | |
| "loss": 1.7527, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 3.0952380952380953, | |
| "grad_norm": 6.606711387634277, | |
| "learning_rate": 3.4084333173579404e-05, | |
| "loss": 1.516, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 3.0996677740863787, | |
| "grad_norm": 9.863051414489746, | |
| "learning_rate": 3.394791059977712e-05, | |
| "loss": 1.5221, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 3.104097452934662, | |
| "grad_norm": 6.656291961669922, | |
| "learning_rate": 3.381162112703479e-05, | |
| "loss": 2.0494, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 3.108527131782946, | |
| "grad_norm": 8.231369018554688, | |
| "learning_rate": 3.367546588544035e-05, | |
| "loss": 1.7698, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 3.1129568106312293, | |
| "grad_norm": 4.92387056350708, | |
| "learning_rate": 3.35394460039688e-05, | |
| "loss": 1.7323, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 3.1173864894795127, | |
| "grad_norm": 5.547698497772217, | |
| "learning_rate": 3.340356261047269e-05, | |
| "loss": 1.9306, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 3.121816168327796, | |
| "grad_norm": 12.515167236328125, | |
| "learning_rate": 3.326781683167288e-05, | |
| "loss": 1.6062, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 3.12624584717608, | |
| "grad_norm": 8.807191848754883, | |
| "learning_rate": 3.313220979314913e-05, | |
| "loss": 2.1803, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 3.1306755260243633, | |
| "grad_norm": 6.980557918548584, | |
| "learning_rate": 3.2996742619330776e-05, | |
| "loss": 1.6428, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 3.1351052048726467, | |
| "grad_norm": 11.408295631408691, | |
| "learning_rate": 3.286141643348748e-05, | |
| "loss": 1.7191, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 3.13953488372093, | |
| "grad_norm": 7.25669527053833, | |
| "learning_rate": 3.27262323577198e-05, | |
| "loss": 2.0474, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 3.143964562569214, | |
| "grad_norm": 7.197902679443359, | |
| "learning_rate": 3.259119151294997e-05, | |
| "loss": 1.4791, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 3.1483942414174972, | |
| "grad_norm": 7.758485317230225, | |
| "learning_rate": 3.2456295018912554e-05, | |
| "loss": 1.6882, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 3.1528239202657806, | |
| "grad_norm": 10.935338973999023, | |
| "learning_rate": 3.232154399414521e-05, | |
| "loss": 1.8307, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 3.157253599114064, | |
| "grad_norm": 9.585394859313965, | |
| "learning_rate": 3.218693955597938e-05, | |
| "loss": 1.6335, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 3.161683277962348, | |
| "grad_norm": 8.889345169067383, | |
| "learning_rate": 3.205248282053104e-05, | |
| "loss": 1.8018, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 3.166112956810631, | |
| "grad_norm": 10.92686653137207, | |
| "learning_rate": 3.191817490269143e-05, | |
| "loss": 1.5411, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 3.1705426356589146, | |
| "grad_norm": 5.509748935699463, | |
| "learning_rate": 3.1784016916117845e-05, | |
| "loss": 1.5074, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 3.1749723145071984, | |
| "grad_norm": 8.162956237792969, | |
| "learning_rate": 3.165000997322435e-05, | |
| "loss": 1.5617, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 3.179401993355482, | |
| "grad_norm": 7.924961566925049, | |
| "learning_rate": 3.15161551851726e-05, | |
| "loss": 1.8548, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 3.183831672203765, | |
| "grad_norm": 6.5894269943237305, | |
| "learning_rate": 3.138245366186261e-05, | |
| "loss": 1.3462, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 3.1882613510520486, | |
| "grad_norm": 5.109679698944092, | |
| "learning_rate": 3.1248906511923526e-05, | |
| "loss": 1.4762, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 3.1926910299003324, | |
| "grad_norm": 8.769123077392578, | |
| "learning_rate": 3.11155148427045e-05, | |
| "loss": 1.543, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 3.1971207087486158, | |
| "grad_norm": 9.410780906677246, | |
| "learning_rate": 3.098227976026542e-05, | |
| "loss": 1.6442, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 3.201550387596899, | |
| "grad_norm": 8.743534088134766, | |
| "learning_rate": 3.084920236936783e-05, | |
| "loss": 1.8122, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 3.2059800664451825, | |
| "grad_norm": 8.16320514678955, | |
| "learning_rate": 3.071628377346567e-05, | |
| "loss": 1.5269, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 3.2104097452934663, | |
| "grad_norm": 5.1464362144470215, | |
| "learning_rate": 3.058352507469623e-05, | |
| "loss": 1.8886, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 3.2148394241417497, | |
| "grad_norm": 4.970855712890625, | |
| "learning_rate": 3.045092737387096e-05, | |
| "loss": 1.6344, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 3.219269102990033, | |
| "grad_norm": 13.145638465881348, | |
| "learning_rate": 3.0318491770466275e-05, | |
| "loss": 2.1682, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 3.223698781838317, | |
| "grad_norm": 4.9592084884643555, | |
| "learning_rate": 3.018621936261462e-05, | |
| "loss": 1.5418, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 3.2281284606866003, | |
| "grad_norm": 6.089108467102051, | |
| "learning_rate": 3.0054111247095134e-05, | |
| "loss": 1.5348, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 3.2325581395348837, | |
| "grad_norm": 6.222757816314697, | |
| "learning_rate": 2.992216851932478e-05, | |
| "loss": 1.6028, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 3.236987818383167, | |
| "grad_norm": 9.714614868164062, | |
| "learning_rate": 2.979039227334905e-05, | |
| "loss": 1.5382, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 3.241417497231451, | |
| "grad_norm": 7.093183994293213, | |
| "learning_rate": 2.9658783601833106e-05, | |
| "loss": 1.9805, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 3.2458471760797343, | |
| "grad_norm": 5.3288397789001465, | |
| "learning_rate": 2.9527343596052513e-05, | |
| "loss": 1.61, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 3.2502768549280177, | |
| "grad_norm": 6.90171480178833, | |
| "learning_rate": 2.9396073345884363e-05, | |
| "loss": 1.5034, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 3.254706533776301, | |
| "grad_norm": 15.343708992004395, | |
| "learning_rate": 2.926497393979814e-05, | |
| "loss": 1.7569, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 3.259136212624585, | |
| "grad_norm": 6.383762836456299, | |
| "learning_rate": 2.9134046464846703e-05, | |
| "loss": 1.894, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 3.2635658914728682, | |
| "grad_norm": 7.399294376373291, | |
| "learning_rate": 2.9003292006657335e-05, | |
| "loss": 1.6227, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 3.2679955703211516, | |
| "grad_norm": 6.315852642059326, | |
| "learning_rate": 2.887271164942259e-05, | |
| "loss": 1.6429, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 3.2724252491694354, | |
| "grad_norm": 6.891193389892578, | |
| "learning_rate": 2.8742306475891574e-05, | |
| "loss": 1.3061, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 3.276854928017719, | |
| "grad_norm": 5.40971040725708, | |
| "learning_rate": 2.861207756736064e-05, | |
| "loss": 1.6944, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 3.281284606866002, | |
| "grad_norm": 6.299607276916504, | |
| "learning_rate": 2.8482026003664695e-05, | |
| "loss": 1.9616, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 3.2857142857142856, | |
| "grad_norm": 5.829094886779785, | |
| "learning_rate": 2.8352152863168035e-05, | |
| "loss": 2.0074, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 3.290143964562569, | |
| "grad_norm": 4.643031120300293, | |
| "learning_rate": 2.8222459222755605e-05, | |
| "loss": 1.2762, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 3.294573643410853, | |
| "grad_norm": 8.120580673217773, | |
| "learning_rate": 2.8092946157823928e-05, | |
| "loss": 1.4793, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 3.299003322259136, | |
| "grad_norm": 7.702285289764404, | |
| "learning_rate": 2.7963614742272186e-05, | |
| "loss": 1.7667, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 3.3034330011074196, | |
| "grad_norm": 7.583436012268066, | |
| "learning_rate": 2.78344660484934e-05, | |
| "loss": 1.6145, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 3.3078626799557034, | |
| "grad_norm": 8.428057670593262, | |
| "learning_rate": 2.7705501147365498e-05, | |
| "loss": 1.9334, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 3.3122923588039868, | |
| "grad_norm": 9.74374771118164, | |
| "learning_rate": 2.757672110824242e-05, | |
| "loss": 1.7522, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 3.31672203765227, | |
| "grad_norm": 5.473592281341553, | |
| "learning_rate": 2.744812699894524e-05, | |
| "loss": 1.8402, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 3.3211517165005535, | |
| "grad_norm": 5.208759784698486, | |
| "learning_rate": 2.731971988575337e-05, | |
| "loss": 1.5117, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 3.3255813953488373, | |
| "grad_norm": 5.262367248535156, | |
| "learning_rate": 2.719150083339567e-05, | |
| "loss": 1.7127, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 3.3300110741971207, | |
| "grad_norm": 5.584857940673828, | |
| "learning_rate": 2.7063470905041643e-05, | |
| "loss": 1.5979, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 3.334440753045404, | |
| "grad_norm": 5.59777307510376, | |
| "learning_rate": 2.693563116229261e-05, | |
| "loss": 1.9831, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 3.338870431893688, | |
| "grad_norm": 5.473875999450684, | |
| "learning_rate": 2.6807982665172858e-05, | |
| "loss": 1.6157, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 3.3433001107419713, | |
| "grad_norm": 7.791520595550537, | |
| "learning_rate": 2.668052647212097e-05, | |
| "loss": 1.9269, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 3.3477297895902547, | |
| "grad_norm": 8.027992248535156, | |
| "learning_rate": 2.655326363998094e-05, | |
| "loss": 1.5925, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 3.352159468438538, | |
| "grad_norm": 6.83018684387207, | |
| "learning_rate": 2.6426195223993476e-05, | |
| "loss": 1.6024, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 3.356589147286822, | |
| "grad_norm": 9.020490646362305, | |
| "learning_rate": 2.6299322277787163e-05, | |
| "loss": 1.8361, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 3.3610188261351053, | |
| "grad_norm": 5.8339524269104, | |
| "learning_rate": 2.6172645853369842e-05, | |
| "loss": 1.6429, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 3.3654485049833887, | |
| "grad_norm": 4.7356672286987305, | |
| "learning_rate": 2.6046167001119816e-05, | |
| "loss": 1.4711, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 3.369878183831672, | |
| "grad_norm": 10.283108711242676, | |
| "learning_rate": 2.5919886769777136e-05, | |
| "loss": 1.5224, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 3.374307862679956, | |
| "grad_norm": 6.590269088745117, | |
| "learning_rate": 2.5793806206434977e-05, | |
| "loss": 1.7833, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 3.3787375415282392, | |
| "grad_norm": 6.434593200683594, | |
| "learning_rate": 2.5667926356530797e-05, | |
| "loss": 1.8216, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 3.3831672203765226, | |
| "grad_norm": 5.936555862426758, | |
| "learning_rate": 2.554224826383792e-05, | |
| "loss": 1.5334, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 3.387596899224806, | |
| "grad_norm": 14.309653282165527, | |
| "learning_rate": 2.5416772970456592e-05, | |
| "loss": 1.4408, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 3.39202657807309, | |
| "grad_norm": 4.926191806793213, | |
| "learning_rate": 2.5291501516805582e-05, | |
| "loss": 1.553, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 3.396456256921373, | |
| "grad_norm": 6.072823524475098, | |
| "learning_rate": 2.5166434941613408e-05, | |
| "loss": 1.6255, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 3.4008859357696566, | |
| "grad_norm": 5.073078632354736, | |
| "learning_rate": 2.5041574281909814e-05, | |
| "loss": 1.4512, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 3.4053156146179404, | |
| "grad_norm": 5.974765300750732, | |
| "learning_rate": 2.491692057301706e-05, | |
| "loss": 1.5731, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 3.409745293466224, | |
| "grad_norm": 7.9622015953063965, | |
| "learning_rate": 2.4792474848541468e-05, | |
| "loss": 1.9739, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 3.414174972314507, | |
| "grad_norm": 6.868570327758789, | |
| "learning_rate": 2.466823814036477e-05, | |
| "loss": 1.8549, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 3.4186046511627906, | |
| "grad_norm": 5.171596527099609, | |
| "learning_rate": 2.4544211478635582e-05, | |
| "loss": 1.9368, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 3.4230343300110744, | |
| "grad_norm": 5.299581527709961, | |
| "learning_rate": 2.442039589176085e-05, | |
| "loss": 1.6896, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 3.4274640088593578, | |
| "grad_norm": 4.135156631469727, | |
| "learning_rate": 2.429679240639729e-05, | |
| "loss": 1.6126, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 3.431893687707641, | |
| "grad_norm": 10.067416191101074, | |
| "learning_rate": 2.417340204744295e-05, | |
| "loss": 1.5858, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 3.4363233665559245, | |
| "grad_norm": 6.024186611175537, | |
| "learning_rate": 2.4050225838028673e-05, | |
| "loss": 1.6173, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 3.4407530454042083, | |
| "grad_norm": 5.533918380737305, | |
| "learning_rate": 2.3927264799509607e-05, | |
| "loss": 1.6362, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 3.4451827242524917, | |
| "grad_norm": 11.559389114379883, | |
| "learning_rate": 2.38045199514567e-05, | |
| "loss": 1.7199, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 3.449612403100775, | |
| "grad_norm": 18.32662010192871, | |
| "learning_rate": 2.368199231164832e-05, | |
| "loss": 1.6391, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 3.4540420819490585, | |
| "grad_norm": 5.221166610717773, | |
| "learning_rate": 2.3559682896061845e-05, | |
| "loss": 1.9352, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 3.4584717607973423, | |
| "grad_norm": 10.909178733825684, | |
| "learning_rate": 2.3437592718865064e-05, | |
| "loss": 1.951, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 3.4629014396456257, | |
| "grad_norm": 11.137706756591797, | |
| "learning_rate": 2.3315722792407963e-05, | |
| "loss": 1.7804, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 3.467331118493909, | |
| "grad_norm": 5.704611301422119, | |
| "learning_rate": 2.319407412721416e-05, | |
| "loss": 1.7664, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 3.471760797342193, | |
| "grad_norm": 6.683074951171875, | |
| "learning_rate": 2.3072647731972746e-05, | |
| "loss": 1.7184, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 3.4761904761904763, | |
| "grad_norm": 5.341973781585693, | |
| "learning_rate": 2.2951444613529673e-05, | |
| "loss": 1.7536, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 3.4806201550387597, | |
| "grad_norm": 7.4396443367004395, | |
| "learning_rate": 2.283046577687958e-05, | |
| "loss": 1.8698, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 3.485049833887043, | |
| "grad_norm": 9.703819274902344, | |
| "learning_rate": 2.270971222515735e-05, | |
| "loss": 1.4894, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 3.489479512735327, | |
| "grad_norm": 7.606261730194092, | |
| "learning_rate": 2.2589184959629918e-05, | |
| "loss": 1.5671, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 3.4939091915836102, | |
| "grad_norm": 12.827896118164062, | |
| "learning_rate": 2.2468884979687865e-05, | |
| "loss": 1.8357, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 3.4983388704318936, | |
| "grad_norm": 5.452163219451904, | |
| "learning_rate": 2.2348813282837112e-05, | |
| "loss": 1.6886, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 3.5027685492801774, | |
| "grad_norm": 7.322004318237305, | |
| "learning_rate": 2.2228970864690746e-05, | |
| "loss": 1.4266, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 3.507198228128461, | |
| "grad_norm": 10.763130187988281, | |
| "learning_rate": 2.2109358718960723e-05, | |
| "loss": 1.861, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 3.511627906976744, | |
| "grad_norm": 5.822150707244873, | |
| "learning_rate": 2.1989977837449605e-05, | |
| "loss": 2.3509, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 3.5160575858250276, | |
| "grad_norm": 5.79690408706665, | |
| "learning_rate": 2.187082921004232e-05, | |
| "loss": 1.6493, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 3.520487264673311, | |
| "grad_norm": 8.250429153442383, | |
| "learning_rate": 2.1751913824698043e-05, | |
| "loss": 1.7764, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 3.524916943521595, | |
| "grad_norm": 8.250429153442383, | |
| "learning_rate": 2.1751913824698043e-05, | |
| "loss": 1.7552, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 3.529346622369878, | |
| "grad_norm": 11.57303237915039, | |
| "learning_rate": 2.163323266744192e-05, | |
| "loss": 1.6983, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 3.5337763012181616, | |
| "grad_norm": 11.181726455688477, | |
| "learning_rate": 2.1514786722356915e-05, | |
| "loss": 1.6178, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 3.5382059800664454, | |
| "grad_norm": 8.835942268371582, | |
| "learning_rate": 2.1396576971575687e-05, | |
| "loss": 1.9932, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 3.5426356589147288, | |
| "grad_norm": 9.03427505493164, | |
| "learning_rate": 2.1278604395272344e-05, | |
| "loss": 1.736, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 3.547065337763012, | |
| "grad_norm": 8.337313652038574, | |
| "learning_rate": 2.1160869971654463e-05, | |
| "loss": 1.7021, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 3.5514950166112955, | |
| "grad_norm": 7.044549942016602, | |
| "learning_rate": 2.1043374676954852e-05, | |
| "loss": 1.277, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 3.5559246954595793, | |
| "grad_norm": 7.318129062652588, | |
| "learning_rate": 2.0926119485423545e-05, | |
| "loss": 1.6063, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 3.5603543743078627, | |
| "grad_norm": 5.060410022735596, | |
| "learning_rate": 2.0809105369319627e-05, | |
| "loss": 1.5807, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 3.564784053156146, | |
| "grad_norm": 6.682000637054443, | |
| "learning_rate": 2.0692333298903262e-05, | |
| "loss": 1.5304, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 3.56921373200443, | |
| "grad_norm": 6.671335220336914, | |
| "learning_rate": 2.057580424242766e-05, | |
| "loss": 1.4664, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 3.5736434108527133, | |
| "grad_norm": 6.632824420928955, | |
| "learning_rate": 2.0459519166130908e-05, | |
| "loss": 1.6361, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 3.5780730897009967, | |
| "grad_norm": 8.885310173034668, | |
| "learning_rate": 2.034347903422812e-05, | |
| "loss": 1.7954, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 3.58250276854928, | |
| "grad_norm": 10.805225372314453, | |
| "learning_rate": 2.022768480890335e-05, | |
| "loss": 1.6574, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 3.5869324473975635, | |
| "grad_norm": 9.142356872558594, | |
| "learning_rate": 2.0112137450301673e-05, | |
| "loss": 2.0787, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 3.5913621262458473, | |
| "grad_norm": 6.482480525970459, | |
| "learning_rate": 1.999683791652111e-05, | |
| "loss": 1.6388, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 3.5957918050941307, | |
| "grad_norm": 5.05092191696167, | |
| "learning_rate": 1.9881787163604842e-05, | |
| "loss": 1.7928, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 3.600221483942414, | |
| "grad_norm": 11.357190132141113, | |
| "learning_rate": 1.976698614553318e-05, | |
| "loss": 1.3903, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 3.604651162790698, | |
| "grad_norm": 5.155529022216797, | |
| "learning_rate": 1.965243581421566e-05, | |
| "loss": 1.56, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 3.6090808416389812, | |
| "grad_norm": 8.153807640075684, | |
| "learning_rate": 1.9538137119483206e-05, | |
| "loss": 1.5867, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 3.6135105204872646, | |
| "grad_norm": 5.8331708908081055, | |
| "learning_rate": 1.9424091009080157e-05, | |
| "loss": 1.4878, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 3.617940199335548, | |
| "grad_norm": 6.762333869934082, | |
| "learning_rate": 1.9310298428656522e-05, | |
| "loss": 1.6243, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 3.622369878183832, | |
| "grad_norm": 5.154025077819824, | |
| "learning_rate": 1.919676032176006e-05, | |
| "loss": 1.7056, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 3.626799557032115, | |
| "grad_norm": 8.215972900390625, | |
| "learning_rate": 1.908347762982851e-05, | |
| "loss": 1.6746, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 3.6312292358803986, | |
| "grad_norm": 6.393702507019043, | |
| "learning_rate": 1.897045129218169e-05, | |
| "loss": 1.4001, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 3.6356589147286824, | |
| "grad_norm": 6.989351272583008, | |
| "learning_rate": 1.885768224601384e-05, | |
| "loss": 1.7093, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 3.640088593576966, | |
| "grad_norm": 7.712499618530273, | |
| "learning_rate": 1.874517142638576e-05, | |
| "loss": 1.9538, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 3.644518272425249, | |
| "grad_norm": 8.841022491455078, | |
| "learning_rate": 1.8632919766217077e-05, | |
| "loss": 1.9689, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 3.6489479512735326, | |
| "grad_norm": 7.386549949645996, | |
| "learning_rate": 1.852092819627854e-05, | |
| "loss": 1.6141, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 3.653377630121816, | |
| "grad_norm": 5.60568904876709, | |
| "learning_rate": 1.8409197645184205e-05, | |
| "loss": 1.4079, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 3.6578073089700998, | |
| "grad_norm": 10.555825233459473, | |
| "learning_rate": 1.829772903938393e-05, | |
| "loss": 1.7793, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 3.662236987818383, | |
| "grad_norm": 7.500188827514648, | |
| "learning_rate": 1.818652330315544e-05, | |
| "loss": 1.836, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 3.6666666666666665, | |
| "grad_norm": 4.860312461853027, | |
| "learning_rate": 1.807558135859689e-05, | |
| "loss": 1.5209, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 3.6710963455149503, | |
| "grad_norm": 5.605367183685303, | |
| "learning_rate": 1.7964904125619014e-05, | |
| "loss": 1.4998, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 3.6755260243632337, | |
| "grad_norm": 4.448379993438721, | |
| "learning_rate": 1.7854492521937753e-05, | |
| "loss": 1.3537, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 3.679955703211517, | |
| "grad_norm": 8.981001853942871, | |
| "learning_rate": 1.7744347463066364e-05, | |
| "loss": 1.7037, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 3.6843853820598005, | |
| "grad_norm": 5.554767608642578, | |
| "learning_rate": 1.7634469862308028e-05, | |
| "loss": 1.5133, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 3.6888150609080843, | |
| "grad_norm": 6.952533721923828, | |
| "learning_rate": 1.7524860630748196e-05, | |
| "loss": 1.9494, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 3.6932447397563677, | |
| "grad_norm": 5.131599426269531, | |
| "learning_rate": 1.741552067724705e-05, | |
| "loss": 1.7359, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 3.697674418604651, | |
| "grad_norm": 5.388726711273193, | |
| "learning_rate": 1.7306450908431988e-05, | |
| "loss": 1.6177, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 3.702104097452935, | |
| "grad_norm": 4.925945281982422, | |
| "learning_rate": 1.719765222869002e-05, | |
| "loss": 1.6502, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 3.7065337763012183, | |
| "grad_norm": 6.9254679679870605, | |
| "learning_rate": 1.7089125540160405e-05, | |
| "loss": 1.4616, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 3.7109634551495017, | |
| "grad_norm": 5.429500579833984, | |
| "learning_rate": 1.6980871742727067e-05, | |
| "loss": 1.9383, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 3.715393133997785, | |
| "grad_norm": 6.535275936126709, | |
| "learning_rate": 1.6872891734011194e-05, | |
| "loss": 1.7912, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 3.7198228128460684, | |
| "grad_norm": 10.143779754638672, | |
| "learning_rate": 1.6765186409363697e-05, | |
| "loss": 1.6256, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 3.7242524916943522, | |
| "grad_norm": 5.600742340087891, | |
| "learning_rate": 1.6657756661857926e-05, | |
| "loss": 1.3974, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 3.7286821705426356, | |
| "grad_norm": 4.404951095581055, | |
| "learning_rate": 1.655060338228217e-05, | |
| "loss": 1.7132, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 3.733111849390919, | |
| "grad_norm": 5.553696632385254, | |
| "learning_rate": 1.6443727459132295e-05, | |
| "loss": 1.6708, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 3.737541528239203, | |
| "grad_norm": 5.281304359436035, | |
| "learning_rate": 1.6337129778604404e-05, | |
| "loss": 1.4539, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 3.741971207087486, | |
| "grad_norm": 5.668034076690674, | |
| "learning_rate": 1.6230811224587388e-05, | |
| "loss": 1.7885, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 3.7464008859357696, | |
| "grad_norm": 10.235897064208984, | |
| "learning_rate": 1.6124772678655742e-05, | |
| "loss": 1.4588, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 3.750830564784053, | |
| "grad_norm": 5.400420188903809, | |
| "learning_rate": 1.6019015020062163e-05, | |
| "loss": 1.62, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 3.755260243632337, | |
| "grad_norm": 6.550075054168701, | |
| "learning_rate": 1.591353912573028e-05, | |
| "loss": 1.975, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 3.75968992248062, | |
| "grad_norm": 7.559651851654053, | |
| "learning_rate": 1.580834587024737e-05, | |
| "loss": 1.8536, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 3.7641196013289036, | |
| "grad_norm": 7.971467971801758, | |
| "learning_rate": 1.5703436125857117e-05, | |
| "loss": 1.5991, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 3.7685492801771874, | |
| "grad_norm": 5.560451507568359, | |
| "learning_rate": 1.5598810762452443e-05, | |
| "loss": 1.7564, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 3.7729789590254708, | |
| "grad_norm": 4.291793346405029, | |
| "learning_rate": 1.5494470647568144e-05, | |
| "loss": 1.5643, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 3.777408637873754, | |
| "grad_norm": 4.999351978302002, | |
| "learning_rate": 1.5390416646373835e-05, | |
| "loss": 1.5744, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 3.7818383167220375, | |
| "grad_norm": 9.698875427246094, | |
| "learning_rate": 1.5286649621666727e-05, | |
| "loss": 1.5056, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 3.786267995570321, | |
| "grad_norm": 10.186306953430176, | |
| "learning_rate": 1.5183170433864474e-05, | |
| "loss": 1.8846, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 3.7906976744186047, | |
| "grad_norm": 4.416024208068848, | |
| "learning_rate": 1.5079979940998e-05, | |
| "loss": 1.5333, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 3.795127353266888, | |
| "grad_norm": 5.287939071655273, | |
| "learning_rate": 1.497707899870448e-05, | |
| "loss": 1.7457, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 3.7995570321151715, | |
| "grad_norm": 7.013801574707031, | |
| "learning_rate": 1.4874468460220143e-05, | |
| "loss": 1.7936, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 3.8039867109634553, | |
| "grad_norm": 5.881109714508057, | |
| "learning_rate": 1.4772149176373262e-05, | |
| "loss": 1.6287, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 3.8084163898117387, | |
| "grad_norm": 6.294797420501709, | |
| "learning_rate": 1.4670121995577096e-05, | |
| "loss": 1.8849, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 3.812846068660022, | |
| "grad_norm": 4.177390098571777, | |
| "learning_rate": 1.4568387763822777e-05, | |
| "loss": 1.5562, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 3.8172757475083055, | |
| "grad_norm": 6.2013678550720215, | |
| "learning_rate": 1.446694732467242e-05, | |
| "loss": 1.5161, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 3.8217054263565893, | |
| "grad_norm": 11.345988273620605, | |
| "learning_rate": 1.4365801519252025e-05, | |
| "loss": 1.9987, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 3.8261351052048727, | |
| "grad_norm": 7.59104061126709, | |
| "learning_rate": 1.4264951186244585e-05, | |
| "loss": 1.6589, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 3.830564784053156, | |
| "grad_norm": 6.380128860473633, | |
| "learning_rate": 1.4164397161883019e-05, | |
| "loss": 1.4321, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 3.83499446290144, | |
| "grad_norm": 7.260960578918457, | |
| "learning_rate": 1.4064140279943367e-05, | |
| "loss": 1.9097, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 3.8394241417497232, | |
| "grad_norm": 4.328764915466309, | |
| "learning_rate": 1.3964181371737794e-05, | |
| "loss": 1.6674, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 3.8438538205980066, | |
| "grad_norm": 10.491178512573242, | |
| "learning_rate": 1.3864521266107728e-05, | |
| "loss": 1.6915, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 3.84828349944629, | |
| "grad_norm": 5.77324914932251, | |
| "learning_rate": 1.3765160789416986e-05, | |
| "loss": 2.1619, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 3.8527131782945734, | |
| "grad_norm": 11.36082935333252, | |
| "learning_rate": 1.3666100765544865e-05, | |
| "loss": 1.9967, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 3.857142857142857, | |
| "grad_norm": 4.28300666809082, | |
| "learning_rate": 1.3567342015879453e-05, | |
| "loss": 1.6779, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 3.8615725359911406, | |
| "grad_norm": 6.903509616851807, | |
| "learning_rate": 1.346888535931064e-05, | |
| "loss": 1.6562, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 3.8660022148394244, | |
| "grad_norm": 6.6412672996521, | |
| "learning_rate": 1.3370731612223463e-05, | |
| "loss": 1.834, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 3.870431893687708, | |
| "grad_norm": 5.490871429443359, | |
| "learning_rate": 1.3272881588491237e-05, | |
| "loss": 1.5606, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 3.874861572535991, | |
| "grad_norm": 7.209272384643555, | |
| "learning_rate": 1.3175336099468943e-05, | |
| "loss": 1.4649, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 3.8792912513842746, | |
| "grad_norm": 4.529653549194336, | |
| "learning_rate": 1.307809595398633e-05, | |
| "loss": 1.8369, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 3.883720930232558, | |
| "grad_norm": 6.793308734893799, | |
| "learning_rate": 1.2981161958341337e-05, | |
| "loss": 1.6708, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 3.8881506090808418, | |
| "grad_norm": 5.094226360321045, | |
| "learning_rate": 1.2884534916293345e-05, | |
| "loss": 1.8261, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 3.892580287929125, | |
| "grad_norm": 5.952579021453857, | |
| "learning_rate": 1.2788215629056548e-05, | |
| "loss": 1.8971, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 3.8970099667774085, | |
| "grad_norm": 4.1918745040893555, | |
| "learning_rate": 1.2692204895293298e-05, | |
| "loss": 1.5702, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 3.9014396456256923, | |
| "grad_norm": 6.7971391677856445, | |
| "learning_rate": 1.2596503511107428e-05, | |
| "loss": 1.2105, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 3.9058693244739757, | |
| "grad_norm": 3.9530787467956543, | |
| "learning_rate": 1.250111227003774e-05, | |
| "loss": 1.9222, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 3.910299003322259, | |
| "grad_norm": 5.808813095092773, | |
| "learning_rate": 1.240603196305139e-05, | |
| "loss": 1.5273, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 3.9147286821705425, | |
| "grad_norm": 4.9905242919921875, | |
| "learning_rate": 1.2311263378537314e-05, | |
| "loss": 1.9195, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 3.919158361018826, | |
| "grad_norm": 5.769536972045898, | |
| "learning_rate": 1.2216807302299683e-05, | |
| "loss": 1.8533, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 3.9235880398671097, | |
| "grad_norm": 6.07841682434082, | |
| "learning_rate": 1.2122664517551418e-05, | |
| "loss": 1.4366, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 3.928017718715393, | |
| "grad_norm": 5.433627128601074, | |
| "learning_rate": 1.2028835804907701e-05, | |
| "loss": 1.6119, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 3.932447397563677, | |
| "grad_norm": 5.320455551147461, | |
| "learning_rate": 1.1935321942379456e-05, | |
| "loss": 1.555, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 3.9368770764119603, | |
| "grad_norm": 5.33215856552124, | |
| "learning_rate": 1.1842123705366959e-05, | |
| "loss": 1.3726, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 3.9413067552602437, | |
| "grad_norm": 5.91862678527832, | |
| "learning_rate": 1.1749241866653327e-05, | |
| "loss": 1.6878, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 3.945736434108527, | |
| "grad_norm": 5.898477554321289, | |
| "learning_rate": 1.1656677196398197e-05, | |
| "loss": 2.099, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 3.9501661129568104, | |
| "grad_norm": 6.435544967651367, | |
| "learning_rate": 1.1564430462131299e-05, | |
| "loss": 1.7103, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 3.9545957918050942, | |
| "grad_norm": 5.444723129272461, | |
| "learning_rate": 1.147250242874609e-05, | |
| "loss": 1.9926, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 3.9590254706533776, | |
| "grad_norm": 7.465640068054199, | |
| "learning_rate": 1.138089385849338e-05, | |
| "loss": 1.473, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 3.963455149501661, | |
| "grad_norm": 5.691059112548828, | |
| "learning_rate": 1.128960551097512e-05, | |
| "loss": 1.7822, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 3.967884828349945, | |
| "grad_norm": 4.731316566467285, | |
| "learning_rate": 1.1198638143137996e-05, | |
| "loss": 1.4145, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 3.972314507198228, | |
| "grad_norm": 7.516561031341553, | |
| "learning_rate": 1.110799250926715e-05, | |
| "loss": 1.5675, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 3.9767441860465116, | |
| "grad_norm": 10.761414527893066, | |
| "learning_rate": 1.1017669360980016e-05, | |
| "loss": 1.6808, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 3.981173864894795, | |
| "grad_norm": 8.457561492919922, | |
| "learning_rate": 1.0927669447220012e-05, | |
| "loss": 1.6556, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 3.985603543743079, | |
| "grad_norm": 5.558786869049072, | |
| "learning_rate": 1.0837993514250371e-05, | |
| "loss": 1.5594, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 3.990033222591362, | |
| "grad_norm": 7.166381359100342, | |
| "learning_rate": 1.07486423056479e-05, | |
| "loss": 1.279, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 3.9944629014396456, | |
| "grad_norm": 4.120462417602539, | |
| "learning_rate": 1.0659616562296882e-05, | |
| "loss": 1.6244, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 3.9988925802879294, | |
| "grad_norm": 7.515431880950928, | |
| "learning_rate": 1.0570917022382887e-05, | |
| "loss": 1.4077, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 4.004429678848283, | |
| "grad_norm": 3.734769821166992, | |
| "learning_rate": 1.0482544421386669e-05, | |
| "loss": 1.4449, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 4.008859357696567, | |
| "grad_norm": 6.960366249084473, | |
| "learning_rate": 1.039449949207808e-05, | |
| "loss": 1.5066, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 4.01328903654485, | |
| "grad_norm": 6.179312705993652, | |
| "learning_rate": 1.030678296450992e-05, | |
| "loss": 1.6181, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 4.017718715393134, | |
| "grad_norm": 4.362559795379639, | |
| "learning_rate": 1.0219395566011992e-05, | |
| "loss": 1.6309, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 4.022148394241418, | |
| "grad_norm": 5.700815200805664, | |
| "learning_rate": 1.0132338021185012e-05, | |
| "loss": 2.0716, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 4.026578073089701, | |
| "grad_norm": 6.670535087585449, | |
| "learning_rate": 1.004561105189461e-05, | |
| "loss": 1.4746, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 4.0310077519379846, | |
| "grad_norm": 5.524834632873535, | |
| "learning_rate": 9.959215377265312e-06, | |
| "loss": 1.324, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 4.035437430786268, | |
| "grad_norm": 5.835739612579346, | |
| "learning_rate": 9.873151713674638e-06, | |
| "loss": 1.7052, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 4.039867109634551, | |
| "grad_norm": 5.3091044425964355, | |
| "learning_rate": 9.78742077474713e-06, | |
| "loss": 1.6702, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 4.044296788482835, | |
| "grad_norm": 6.9273762702941895, | |
| "learning_rate": 9.70202327134842e-06, | |
| "loss": 1.9085, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 4.048726467331118, | |
| "grad_norm": 4.531795978546143, | |
| "learning_rate": 9.616959911579388e-06, | |
| "loss": 1.3284, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 4.053156146179402, | |
| "grad_norm": 8.328673362731934, | |
| "learning_rate": 9.532231400770181e-06, | |
| "loss": 1.4044, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 4.057585825027686, | |
| "grad_norm": 5.203913688659668, | |
| "learning_rate": 9.447838441474538e-06, | |
| "loss": 1.6471, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 4.062015503875969, | |
| "grad_norm": 4.853526592254639, | |
| "learning_rate": 9.363781733463784e-06, | |
| "loss": 1.707, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 4.0664451827242525, | |
| "grad_norm": 5.6800737380981445, | |
| "learning_rate": 9.280061973721149e-06, | |
| "loss": 1.7479, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 4.070874861572536, | |
| "grad_norm": 4.587559700012207, | |
| "learning_rate": 9.196679856435908e-06, | |
| "loss": 1.589, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 4.075304540420819, | |
| "grad_norm": 4.8105788230896, | |
| "learning_rate": 9.113636072997744e-06, | |
| "loss": 1.5716, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 4.079734219269103, | |
| "grad_norm": 5.17236328125, | |
| "learning_rate": 9.030931311990842e-06, | |
| "loss": 1.499, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 4.084163898117387, | |
| "grad_norm": 6.545801162719727, | |
| "learning_rate": 8.948566259188334e-06, | |
| "loss": 1.7694, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 4.08859357696567, | |
| "grad_norm": 5.068369388580322, | |
| "learning_rate": 8.866541597546529e-06, | |
| "loss": 1.8775, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 4.093023255813954, | |
| "grad_norm": 6.716525554656982, | |
| "learning_rate": 8.784858007199292e-06, | |
| "loss": 1.6109, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 4.097452934662237, | |
| "grad_norm": 6.743626594543457, | |
| "learning_rate": 8.703516165452375e-06, | |
| "loss": 1.4904, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 4.10188261351052, | |
| "grad_norm": 7.2740983963012695, | |
| "learning_rate": 8.622516746777787e-06, | |
| "loss": 1.7105, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 4.106312292358804, | |
| "grad_norm": 4.581962585449219, | |
| "learning_rate": 8.541860422808267e-06, | |
| "loss": 1.4977, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 4.110741971207087, | |
| "grad_norm": 5.813825607299805, | |
| "learning_rate": 8.461547862331649e-06, | |
| "loss": 1.3004, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 4.1151716500553706, | |
| "grad_norm": 4.484111785888672, | |
| "learning_rate": 8.38157973128535e-06, | |
| "loss": 1.3782, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 4.119601328903655, | |
| "grad_norm": 4.56226110458374, | |
| "learning_rate": 8.30195669275085e-06, | |
| "loss": 1.6109, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 4.124031007751938, | |
| "grad_norm": 7.081721305847168, | |
| "learning_rate": 8.222679406948148e-06, | |
| "loss": 1.7734, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 4.128460686600222, | |
| "grad_norm": 7.574245452880859, | |
| "learning_rate": 8.143748531230361e-06, | |
| "loss": 1.6593, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 4.132890365448505, | |
| "grad_norm": 9.047913551330566, | |
| "learning_rate": 8.065164720078216e-06, | |
| "loss": 1.5733, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 4.137320044296788, | |
| "grad_norm": 4.532729625701904, | |
| "learning_rate": 7.986928625094663e-06, | |
| "loss": 1.6855, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 4.141749723145072, | |
| "grad_norm": 6.263223648071289, | |
| "learning_rate": 7.909040894999403e-06, | |
| "loss": 1.7791, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 4.146179401993355, | |
| "grad_norm": 6.938450813293457, | |
| "learning_rate": 7.831502175623628e-06, | |
| "loss": 1.6734, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 4.150609080841639, | |
| "grad_norm": 5.863696098327637, | |
| "learning_rate": 7.754313109904533e-06, | |
| "loss": 1.8766, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 4.155038759689923, | |
| "grad_norm": 7.904068470001221, | |
| "learning_rate": 7.67747433788008e-06, | |
| "loss": 1.5805, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 4.159468438538206, | |
| "grad_norm": 6.500148773193359, | |
| "learning_rate": 7.6009864966836546e-06, | |
| "loss": 1.5432, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 4.1638981173864895, | |
| "grad_norm": 5.123464584350586, | |
| "learning_rate": 7.5248502205387705e-06, | |
| "loss": 1.2146, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 4.168327796234773, | |
| "grad_norm": 4.51666259765625, | |
| "learning_rate": 7.4490661407538635e-06, | |
| "loss": 1.4389, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 4.172757475083056, | |
| "grad_norm": 7.513207912445068, | |
| "learning_rate": 7.373634885716968e-06, | |
| "loss": 1.4889, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 4.17718715393134, | |
| "grad_norm": 8.832489013671875, | |
| "learning_rate": 7.298557080890594e-06, | |
| "loss": 1.8399, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 4.181616832779623, | |
| "grad_norm": 4.61073637008667, | |
| "learning_rate": 7.223833348806502e-06, | |
| "loss": 1.5412, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 4.186046511627907, | |
| "grad_norm": 10.34188461303711, | |
| "learning_rate": 7.149464309060549e-06, | |
| "loss": 1.4855, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 4.190476190476191, | |
| "grad_norm": 7.247960090637207, | |
| "learning_rate": 7.075450578307513e-06, | |
| "loss": 1.5399, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 4.194905869324474, | |
| "grad_norm": 6.24155855178833, | |
| "learning_rate": 7.001792770256055e-06, | |
| "loss": 1.7671, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 4.1993355481727574, | |
| "grad_norm": 4.38225793838501, | |
| "learning_rate": 6.928491495663564e-06, | |
| "loss": 1.8296, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 4.203765227021041, | |
| "grad_norm": 6.093568801879883, | |
| "learning_rate": 6.8555473623311275e-06, | |
| "loss": 1.4753, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 4.208194905869324, | |
| "grad_norm": 5.5349626541137695, | |
| "learning_rate": 6.78296097509849e-06, | |
| "loss": 1.5375, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 4.212624584717608, | |
| "grad_norm": 6.367927074432373, | |
| "learning_rate": 6.710732935838998e-06, | |
| "loss": 1.7022, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 4.217054263565892, | |
| "grad_norm": 6.881635665893555, | |
| "learning_rate": 6.638863843454662e-06, | |
| "loss": 1.6797, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 4.221483942414175, | |
| "grad_norm": 14.884693145751953, | |
| "learning_rate": 6.567354293871181e-06, | |
| "loss": 1.432, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 4.225913621262459, | |
| "grad_norm": 8.686039924621582, | |
| "learning_rate": 6.496204880032969e-06, | |
| "loss": 1.6911, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 4.230343300110742, | |
| "grad_norm": 5.975069999694824, | |
| "learning_rate": 6.425416191898259e-06, | |
| "loss": 1.5197, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 4.234772978959025, | |
| "grad_norm": 7.568436622619629, | |
| "learning_rate": 6.3549888164342046e-06, | |
| "loss": 1.5966, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 4.239202657807309, | |
| "grad_norm": 6.951284885406494, | |
| "learning_rate": 6.284923337612069e-06, | |
| "loss": 1.7411, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 4.243632336655592, | |
| "grad_norm": 6.59275484085083, | |
| "learning_rate": 6.215220336402272e-06, | |
| "loss": 1.6294, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 4.248062015503876, | |
| "grad_norm": 6.854479789733887, | |
| "learning_rate": 6.145880390769665e-06, | |
| "loss": 1.1773, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 4.25249169435216, | |
| "grad_norm": 6.827851295471191, | |
| "learning_rate": 6.076904075668671e-06, | |
| "loss": 1.6421, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 4.256921373200443, | |
| "grad_norm": 4.86449670791626, | |
| "learning_rate": 6.008291963038632e-06, | |
| "loss": 1.6358, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 4.2613510520487266, | |
| "grad_norm": 4.6412835121154785, | |
| "learning_rate": 5.940044621798896e-06, | |
| "loss": 1.5333, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 4.26578073089701, | |
| "grad_norm": 4.715771198272705, | |
| "learning_rate": 5.872162617844268e-06, | |
| "loss": 1.7593, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 4.270210409745293, | |
| "grad_norm": 4.89564323425293, | |
| "learning_rate": 5.804646514040163e-06, | |
| "loss": 1.7706, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 4.274640088593577, | |
| "grad_norm": 6.569485187530518, | |
| "learning_rate": 5.737496870218101e-06, | |
| "loss": 1.4384, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 4.27906976744186, | |
| "grad_norm": 7.028257846832275, | |
| "learning_rate": 5.670714243170938e-06, | |
| "loss": 1.6896, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 4.283499446290144, | |
| "grad_norm": 5.102827072143555, | |
| "learning_rate": 5.604299186648282e-06, | |
| "loss": 1.8287, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 4.287929125138428, | |
| "grad_norm": 8.853278160095215, | |
| "learning_rate": 5.538252251351933e-06, | |
| "loss": 2.0511, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 4.292358803986711, | |
| "grad_norm": 5.434422016143799, | |
| "learning_rate": 5.472573984931284e-06, | |
| "loss": 1.3771, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 4.2967884828349945, | |
| "grad_norm": 7.120089054107666, | |
| "learning_rate": 5.407264931978812e-06, | |
| "loss": 1.7419, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 4.301218161683278, | |
| "grad_norm": 5.602823734283447, | |
| "learning_rate": 5.3423256340255026e-06, | |
| "loss": 1.6085, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 4.305647840531561, | |
| "grad_norm": 8.626879692077637, | |
| "learning_rate": 5.277756629536434e-06, | |
| "loss": 1.549, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 4.310077519379845, | |
| "grad_norm": 5.62658166885376, | |
| "learning_rate": 5.21355845390627e-06, | |
| "loss": 1.4692, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 4.314507198228128, | |
| "grad_norm": 4.512428283691406, | |
| "learning_rate": 5.149731639454819e-06, | |
| "loss": 1.5082, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 4.318936877076412, | |
| "grad_norm": 8.768449783325195, | |
| "learning_rate": 5.086276715422644e-06, | |
| "loss": 1.5685, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 4.323366555924696, | |
| "grad_norm": 6.369165897369385, | |
| "learning_rate": 5.023194207966614e-06, | |
| "loss": 1.6781, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 4.327796234772979, | |
| "grad_norm": 9.242612838745117, | |
| "learning_rate": 4.9604846401556656e-06, | |
| "loss": 1.7372, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 4.332225913621262, | |
| "grad_norm": 7.073103427886963, | |
| "learning_rate": 4.898148531966307e-06, | |
| "loss": 1.7124, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 4.336655592469546, | |
| "grad_norm": 7.333794116973877, | |
| "learning_rate": 4.836186400278442e-06, | |
| "loss": 1.742, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 4.341085271317829, | |
| "grad_norm": 6.159682750701904, | |
| "learning_rate": 4.774598758870979e-06, | |
| "loss": 1.2728, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 4.3455149501661126, | |
| "grad_norm": 6.649118423461914, | |
| "learning_rate": 4.713386118417684e-06, | |
| "loss": 1.5894, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 4.349944629014397, | |
| "grad_norm": 7.892576694488525, | |
| "learning_rate": 4.652548986482813e-06, | |
| "loss": 1.8148, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 4.35437430786268, | |
| "grad_norm": 4.917545795440674, | |
| "learning_rate": 4.59208786751702e-06, | |
| "loss": 1.5938, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 4.358803986710964, | |
| "grad_norm": 5.065957546234131, | |
| "learning_rate": 4.532003262853107e-06, | |
| "loss": 1.4148, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 4.363233665559247, | |
| "grad_norm": 5.68073034286499, | |
| "learning_rate": 4.472295670701887e-06, | |
| "loss": 1.4353, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 4.36766334440753, | |
| "grad_norm": 6.360957145690918, | |
| "learning_rate": 4.412965586148071e-06, | |
| "loss": 1.4035, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 4.372093023255814, | |
| "grad_norm": 4.0210981369018555, | |
| "learning_rate": 4.354013501146109e-06, | |
| "loss": 1.3468, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 4.376522702104097, | |
| "grad_norm": 6.830559730529785, | |
| "learning_rate": 4.2954399045161746e-06, | |
| "loss": 1.5327, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 4.380952380952381, | |
| "grad_norm": 4.727903842926025, | |
| "learning_rate": 4.237245281940078e-06, | |
| "loss": 1.4903, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 4.385382059800665, | |
| "grad_norm": 7.276794910430908, | |
| "learning_rate": 4.17943011595725e-06, | |
| "loss": 1.9205, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 4.389811738648948, | |
| "grad_norm": 4.359097003936768, | |
| "learning_rate": 4.121994885960706e-06, | |
| "loss": 1.6313, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 4.3942414174972315, | |
| "grad_norm": 4.636504173278809, | |
| "learning_rate": 4.064940068193129e-06, | |
| "loss": 1.3116, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 4.398671096345515, | |
| "grad_norm": 5.2092604637146, | |
| "learning_rate": 4.008266135742894e-06, | |
| "loss": 1.6403, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 4.403100775193798, | |
| "grad_norm": 5.949525356292725, | |
| "learning_rate": 3.951973558540118e-06, | |
| "loss": 1.3996, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 4.407530454042082, | |
| "grad_norm": 4.8397626876831055, | |
| "learning_rate": 3.896062803352818e-06, | |
| "loss": 1.8168, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 4.411960132890365, | |
| "grad_norm": 5.553168773651123, | |
| "learning_rate": 3.84053433378298e-06, | |
| "loss": 1.2202, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 4.416389811738649, | |
| "grad_norm": 5.277307987213135, | |
| "learning_rate": 3.785388610262769e-06, | |
| "loss": 1.3852, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 4.420819490586933, | |
| "grad_norm": 5.157022953033447, | |
| "learning_rate": 3.730626090050676e-06, | |
| "loss": 1.2935, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 4.425249169435216, | |
| "grad_norm": 6.345918655395508, | |
| "learning_rate": 3.6762472272277582e-06, | |
| "loss": 1.8446, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 4.4296788482834994, | |
| "grad_norm": 6.063523769378662, | |
| "learning_rate": 3.6222524726938157e-06, | |
| "loss": 1.5922, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 4.434108527131783, | |
| "grad_norm": 6.320123672485352, | |
| "learning_rate": 3.568642274163725e-06, | |
| "loss": 1.6638, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 4.438538205980066, | |
| "grad_norm": 4.40836238861084, | |
| "learning_rate": 3.5154170761636963e-06, | |
| "loss": 1.703, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 4.44296788482835, | |
| "grad_norm": 5.5650715827941895, | |
| "learning_rate": 3.462577320027555e-06, | |
| "loss": 1.6481, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 4.447397563676634, | |
| "grad_norm": 6.705456733703613, | |
| "learning_rate": 3.410123443893137e-06, | |
| "loss": 1.4962, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 4.451827242524917, | |
| "grad_norm": 6.810803413391113, | |
| "learning_rate": 3.3580558826985963e-06, | |
| "loss": 1.4737, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 4.456256921373201, | |
| "grad_norm": 5.0352091789245605, | |
| "learning_rate": 3.306375068178891e-06, | |
| "loss": 1.6533, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 4.460686600221484, | |
| "grad_norm": 5.479842185974121, | |
| "learning_rate": 3.2550814288620834e-06, | |
| "loss": 1.3461, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 4.465116279069767, | |
| "grad_norm": 4.528697967529297, | |
| "learning_rate": 3.204175390065889e-06, | |
| "loss": 1.6675, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 4.469545957918051, | |
| "grad_norm": 5.891976833343506, | |
| "learning_rate": 3.1536573738940812e-06, | |
| "loss": 1.6043, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 4.473975636766334, | |
| "grad_norm": 5.913291931152344, | |
| "learning_rate": 3.103527799233047e-06, | |
| "loss": 1.4799, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 4.4784053156146175, | |
| "grad_norm": 4.8424458503723145, | |
| "learning_rate": 3.0537870817482663e-06, | |
| "loss": 1.3448, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 4.482834994462902, | |
| "grad_norm": 4.321615695953369, | |
| "learning_rate": 3.004435633880881e-06, | |
| "loss": 1.8712, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 4.487264673311185, | |
| "grad_norm": 7.836488723754883, | |
| "learning_rate": 2.955473864844299e-06, | |
| "loss": 1.6831, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 4.4916943521594686, | |
| "grad_norm": 7.034804344177246, | |
| "learning_rate": 2.9069021806207573e-06, | |
| "loss": 1.7364, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 4.496124031007752, | |
| "grad_norm": 8.025514602661133, | |
| "learning_rate": 2.858720983958013e-06, | |
| "loss": 1.5019, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 4.500553709856035, | |
| "grad_norm": 3.540686845779419, | |
| "learning_rate": 2.8109306743659192e-06, | |
| "loss": 1.3985, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 4.504983388704319, | |
| "grad_norm": 10.38813591003418, | |
| "learning_rate": 2.76353164811321e-06, | |
| "loss": 1.8905, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 4.509413067552602, | |
| "grad_norm": 4.8591628074646, | |
| "learning_rate": 2.7165242982241436e-06, | |
| "loss": 1.7566, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 4.513842746400886, | |
| "grad_norm": 6.675249099731445, | |
| "learning_rate": 2.6699090144752803e-06, | |
| "loss": 1.5333, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 4.51827242524917, | |
| "grad_norm": 4.969045162200928, | |
| "learning_rate": 2.6236861833922376e-06, | |
| "loss": 1.1801, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 4.522702104097453, | |
| "grad_norm": 4.894344329833984, | |
| "learning_rate": 2.5778561882464525e-06, | |
| "loss": 1.629, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 4.5271317829457365, | |
| "grad_norm": 6.36023473739624, | |
| "learning_rate": 2.5324194090521002e-06, | |
| "loss": 1.59, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 4.53156146179402, | |
| "grad_norm": 8.083828926086426, | |
| "learning_rate": 2.487376222562815e-06, | |
| "loss": 1.3789, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 4.535991140642303, | |
| "grad_norm": 6.636145114898682, | |
| "learning_rate": 2.44272700226868e-06, | |
| "loss": 1.9007, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 4.540420819490587, | |
| "grad_norm": 6.143908500671387, | |
| "learning_rate": 2.3984721183930303e-06, | |
| "loss": 1.7806, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 4.544850498338871, | |
| "grad_norm": 5.793661594390869, | |
| "learning_rate": 2.3546119378895005e-06, | |
| "loss": 1.874, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 4.549280177187154, | |
| "grad_norm": 4.639335632324219, | |
| "learning_rate": 2.311146824438848e-06, | |
| "loss": 1.8634, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 4.553709856035438, | |
| "grad_norm": 12.957326889038086, | |
| "learning_rate": 2.2680771384460507e-06, | |
| "loss": 1.5008, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 4.558139534883721, | |
| "grad_norm": 5.100373268127441, | |
| "learning_rate": 2.2254032370372492e-06, | |
| "loss": 1.8479, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 4.562569213732004, | |
| "grad_norm": 4.1848015785217285, | |
| "learning_rate": 2.1831254740568242e-06, | |
| "loss": 1.4017, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 4.566998892580288, | |
| "grad_norm": 7.201414585113525, | |
| "learning_rate": 2.1412442000644352e-06, | |
| "loss": 1.756, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 4.571428571428571, | |
| "grad_norm": 7.138917446136475, | |
| "learning_rate": 2.0997597623321264e-06, | |
| "loss": 1.7556, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 4.5758582502768546, | |
| "grad_norm": 8.494391441345215, | |
| "learning_rate": 2.058672504841458e-06, | |
| "loss": 2.0976, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 4.580287929125138, | |
| "grad_norm": 9.561163902282715, | |
| "learning_rate": 2.017982768280624e-06, | |
| "loss": 1.1794, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 4.584717607973422, | |
| "grad_norm": 6.571170806884766, | |
| "learning_rate": 1.9776908900416777e-06, | |
| "loss": 1.4607, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 4.589147286821706, | |
| "grad_norm": 4.655641555786133, | |
| "learning_rate": 1.937797204217656e-06, | |
| "loss": 1.8607, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 4.593576965669989, | |
| "grad_norm": 4.481752872467041, | |
| "learning_rate": 1.898302041599892e-06, | |
| "loss": 1.6295, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 4.598006644518272, | |
| "grad_norm": 6.839178562164307, | |
| "learning_rate": 1.859205729675223e-06, | |
| "loss": 1.5424, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 4.602436323366556, | |
| "grad_norm": 5.645241737365723, | |
| "learning_rate": 1.820508592623288e-06, | |
| "loss": 1.2222, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 4.606866002214839, | |
| "grad_norm": 6.006126880645752, | |
| "learning_rate": 1.7822109513138453e-06, | |
| "loss": 1.6035, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 4.6112956810631225, | |
| "grad_norm": 7.534673690795898, | |
| "learning_rate": 1.744313123304092e-06, | |
| "loss": 1.4323, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 4.615725359911407, | |
| "grad_norm": 6.148129940032959, | |
| "learning_rate": 1.706815422836061e-06, | |
| "loss": 1.7501, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 4.62015503875969, | |
| "grad_norm": 4.745789051055908, | |
| "learning_rate": 1.6697181608339828e-06, | |
| "loss": 1.4226, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 4.6245847176079735, | |
| "grad_norm": 4.307113170623779, | |
| "learning_rate": 1.6330216449017443e-06, | |
| "loss": 1.6688, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 4.629014396456257, | |
| "grad_norm": 5.552404403686523, | |
| "learning_rate": 1.5967261793202904e-06, | |
| "loss": 1.5026, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 4.63344407530454, | |
| "grad_norm": 9.312074661254883, | |
| "learning_rate": 1.5608320650451425e-06, | |
| "loss": 1.3092, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 4.637873754152824, | |
| "grad_norm": 4.408802032470703, | |
| "learning_rate": 1.5253395997039066e-06, | |
| "loss": 1.4818, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 4.642303433001107, | |
| "grad_norm": 8.572196960449219, | |
| "learning_rate": 1.4902490775937527e-06, | |
| "loss": 1.4766, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 4.646733111849391, | |
| "grad_norm": 4.4483208656311035, | |
| "learning_rate": 1.4555607896790446e-06, | |
| "loss": 1.7217, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 4.651162790697675, | |
| "grad_norm": 5.438201904296875, | |
| "learning_rate": 1.4212750235888416e-06, | |
| "loss": 1.7277, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 4.655592469545958, | |
| "grad_norm": 4.61346960067749, | |
| "learning_rate": 1.3873920636146342e-06, | |
| "loss": 1.2524, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 4.6600221483942414, | |
| "grad_norm": 12.7802095413208, | |
| "learning_rate": 1.353912190707851e-06, | |
| "loss": 1.3947, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 4.664451827242525, | |
| "grad_norm": 9.374958992004395, | |
| "learning_rate": 1.320835682477628e-06, | |
| "loss": 1.8787, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 4.668881506090808, | |
| "grad_norm": 7.309841632843018, | |
| "learning_rate": 1.288162813188476e-06, | |
| "loss": 1.5597, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 4.673311184939092, | |
| "grad_norm": 7.758358478546143, | |
| "learning_rate": 1.2558938537579835e-06, | |
| "loss": 1.5036, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 4.677740863787376, | |
| "grad_norm": 5.403035640716553, | |
| "learning_rate": 1.2240290717546176e-06, | |
| "loss": 1.6294, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 4.682170542635659, | |
| "grad_norm": 4.8274102210998535, | |
| "learning_rate": 1.1925687313954437e-06, | |
| "loss": 1.5611, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 4.686600221483943, | |
| "grad_norm": 7.348108768463135, | |
| "learning_rate": 1.1615130935439978e-06, | |
| "loss": 1.504, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 4.691029900332226, | |
| "grad_norm": 7.85156774520874, | |
| "learning_rate": 1.1308624157080837e-06, | |
| "loss": 1.5926, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 4.695459579180509, | |
| "grad_norm": 7.822666168212891, | |
| "learning_rate": 1.1006169520376586e-06, | |
| "loss": 1.6488, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 4.699889258028793, | |
| "grad_norm": 13.061169624328613, | |
| "learning_rate": 1.070776953322694e-06, | |
| "loss": 1.2314, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 4.704318936877076, | |
| "grad_norm": 7.329505443572998, | |
| "learning_rate": 1.0413426669911408e-06, | |
| "loss": 1.7047, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 4.70874861572536, | |
| "grad_norm": 7.315627574920654, | |
| "learning_rate": 1.0123143371068456e-06, | |
| "loss": 1.4365, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 4.713178294573644, | |
| "grad_norm": 5.628126621246338, | |
| "learning_rate": 9.836922043675368e-07, | |
| "loss": 1.6823, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 4.717607973421927, | |
| "grad_norm": 6.389928340911865, | |
| "learning_rate": 9.554765061028371e-07, | |
| "loss": 1.4238, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 4.7220376522702106, | |
| "grad_norm": 5.262420177459717, | |
| "learning_rate": 9.276674762722704e-07, | |
| "loss": 1.486, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 4.726467331118494, | |
| "grad_norm": 4.576107025146484, | |
| "learning_rate": 9.002653454633581e-07, | |
| "loss": 1.5454, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 4.730897009966777, | |
| "grad_norm": 5.287619113922119, | |
| "learning_rate": 8.732703408896648e-07, | |
| "loss": 1.8704, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 4.735326688815061, | |
| "grad_norm": 4.985103607177734, | |
| "learning_rate": 8.46682686388961e-07, | |
| "loss": 1.3233, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 4.739756367663344, | |
| "grad_norm": 4.814030647277832, | |
| "learning_rate": 8.205026024213192e-07, | |
| "loss": 1.5228, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 4.7441860465116275, | |
| "grad_norm": 6.874385356903076, | |
| "learning_rate": 7.947303060673372e-07, | |
| "loss": 1.8679, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 4.748615725359912, | |
| "grad_norm": 5.1823225021362305, | |
| "learning_rate": 7.693660110262902e-07, | |
| "loss": 1.513, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 4.753045404208195, | |
| "grad_norm": 6.403878211975098, | |
| "learning_rate": 7.444099276143812e-07, | |
| "loss": 1.4745, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 4.7574750830564785, | |
| "grad_norm": 5.249782562255859, | |
| "learning_rate": 7.198622627630047e-07, | |
| "loss": 1.5437, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 4.761904761904762, | |
| "grad_norm": 11.559475898742676, | |
| "learning_rate": 6.957232200170083e-07, | |
| "loss": 1.7486, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 4.766334440753045, | |
| "grad_norm": 5.234459400177002, | |
| "learning_rate": 6.719929995330388e-07, | |
| "loss": 1.4821, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 4.770764119601329, | |
| "grad_norm": 6.172218322753906, | |
| "learning_rate": 6.486717980778323e-07, | |
| "loss": 1.6629, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 4.775193798449612, | |
| "grad_norm": 4.527020454406738, | |
| "learning_rate": 6.257598090266325e-07, | |
| "loss": 1.444, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 4.779623477297896, | |
| "grad_norm": 7.31265926361084, | |
| "learning_rate": 6.032572223615695e-07, | |
| "loss": 1.4468, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 4.78405315614618, | |
| "grad_norm": 5.2357988357543945, | |
| "learning_rate": 5.811642246700722e-07, | |
| "loss": 1.6563, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 4.788482834994463, | |
| "grad_norm": 4.705022811889648, | |
| "learning_rate": 5.594809991433414e-07, | |
| "loss": 1.6064, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 4.792912513842746, | |
| "grad_norm": 6.180661678314209, | |
| "learning_rate": 5.382077255747964e-07, | |
| "loss": 1.2207, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 4.79734219269103, | |
| "grad_norm": 6.87699031829834, | |
| "learning_rate": 5.173445803586307e-07, | |
| "loss": 1.7086, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 4.801771871539313, | |
| "grad_norm": 4.476713180541992, | |
| "learning_rate": 4.968917364883196e-07, | |
| "loss": 1.579, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 4.8062015503875966, | |
| "grad_norm": 7.013663291931152, | |
| "learning_rate": 4.76849363555204e-07, | |
| "loss": 1.3113, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 4.810631229235881, | |
| "grad_norm": 5.766423225402832, | |
| "learning_rate": 4.5721762774704747e-07, | |
| "loss": 1.646, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 4.815060908084164, | |
| "grad_norm": 5.878317832946777, | |
| "learning_rate": 4.379966918467093e-07, | |
| "loss": 1.4083, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 4.819490586932448, | |
| "grad_norm": 4.808867454528809, | |
| "learning_rate": 4.1918671523075716e-07, | |
| "loss": 1.5285, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 4.823920265780731, | |
| "grad_norm": 6.050039768218994, | |
| "learning_rate": 4.0078785386815644e-07, | |
| "loss": 1.8131, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 4.828349944629014, | |
| "grad_norm": 6.054554462432861, | |
| "learning_rate": 3.8280026031898287e-07, | |
| "loss": 1.2736, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 4.832779623477298, | |
| "grad_norm": 11.464051246643066, | |
| "learning_rate": 3.6522408373314553e-07, | |
| "loss": 1.8098, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 4.837209302325581, | |
| "grad_norm": 5.715487957000732, | |
| "learning_rate": 3.4805946984917683e-07, | |
| "loss": 1.7507, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 4.841638981173865, | |
| "grad_norm": 5.533278465270996, | |
| "learning_rate": 3.3130656099297777e-07, | |
| "loss": 1.6937, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 4.846068660022149, | |
| "grad_norm": 4.647676944732666, | |
| "learning_rate": 3.149654960766857e-07, | |
| "loss": 1.8874, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 4.850498338870432, | |
| "grad_norm": 6.739248752593994, | |
| "learning_rate": 2.9903641059749186e-07, | |
| "loss": 1.5665, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 4.8549280177187155, | |
| "grad_norm": 5.865329742431641, | |
| "learning_rate": 2.8351943663653124e-07, | |
| "loss": 1.4985, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 4.859357696566999, | |
| "grad_norm": 5.861200332641602, | |
| "learning_rate": 2.684147028577777e-07, | |
| "loss": 1.6865, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 4.863787375415282, | |
| "grad_norm": 5.523236274719238, | |
| "learning_rate": 2.537223345069895e-07, | |
| "loss": 1.5327, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 4.868217054263566, | |
| "grad_norm": 5.882645130157471, | |
| "learning_rate": 2.3944245341065987e-07, | |
| "loss": 1.6286, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 4.872646733111849, | |
| "grad_norm": 8.576865196228027, | |
| "learning_rate": 2.255751779750237e-07, | |
| "loss": 1.7435, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 4.877076411960132, | |
| "grad_norm": 5.604006767272949, | |
| "learning_rate": 2.12120623185047e-07, | |
| "loss": 1.4963, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 4.881506090808417, | |
| "grad_norm": 10.904509544372559, | |
| "learning_rate": 1.9907890060348878e-07, | |
| "loss": 1.636, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 4.8859357696567, | |
| "grad_norm": 5.717619895935059, | |
| "learning_rate": 1.8645011836999626e-07, | |
| "loss": 1.5698, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 4.8903654485049834, | |
| "grad_norm": 5.974473476409912, | |
| "learning_rate": 1.7423438120017234e-07, | |
| "loss": 1.7177, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 4.894795127353267, | |
| "grad_norm": 5.090288162231445, | |
| "learning_rate": 1.624317903847372e-07, | |
| "loss": 1.4201, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 4.89922480620155, | |
| "grad_norm": 4.930177688598633, | |
| "learning_rate": 1.5104244378865152e-07, | |
| "loss": 1.246, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 4.903654485049834, | |
| "grad_norm": 9.135576248168945, | |
| "learning_rate": 1.4006643585035006e-07, | |
| "loss": 1.6311, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 4.908084163898117, | |
| "grad_norm": 5.666680812835693, | |
| "learning_rate": 1.2950385758094263e-07, | |
| "loss": 1.513, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 4.912513842746401, | |
| "grad_norm": 6.164681434631348, | |
| "learning_rate": 1.1935479656342562e-07, | |
| "loss": 1.8122, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 4.916943521594685, | |
| "grad_norm": 8.394211769104004, | |
| "learning_rate": 1.0961933695201598e-07, | |
| "loss": 1.5239, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 4.921373200442968, | |
| "grad_norm": 6.864090919494629, | |
| "learning_rate": 1.0029755947139618e-07, | |
| "loss": 1.0922, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 4.925802879291251, | |
| "grad_norm": 5.1737751960754395, | |
| "learning_rate": 9.138954141608702e-08, | |
| "loss": 1.5132, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 4.930232558139535, | |
| "grad_norm": 4.374934673309326, | |
| "learning_rate": 8.289535664978698e-08, | |
| "loss": 1.495, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 4.934662236987818, | |
| "grad_norm": 4.535995960235596, | |
| "learning_rate": 7.481507560475053e-08, | |
| "loss": 1.787, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 4.9390919158361015, | |
| "grad_norm": 5.156623840332031, | |
| "learning_rate": 6.714876528123304e-08, | |
| "loss": 1.4265, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 4.943521594684386, | |
| "grad_norm": 5.1794328689575195, | |
| "learning_rate": 5.989648924690782e-08, | |
| "loss": 1.6533, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 4.947951273532669, | |
| "grad_norm": 6.805692672729492, | |
| "learning_rate": 5.305830763635555e-08, | |
| "loss": 1.4241, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 4.9523809523809526, | |
| "grad_norm": 4.86265230178833, | |
| "learning_rate": 4.663427715055346e-08, | |
| "loss": 1.4307, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 4.956810631229236, | |
| "grad_norm": 5.848089218139648, | |
| "learning_rate": 4.062445105642021e-08, | |
| "loss": 1.7906, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 4.961240310077519, | |
| "grad_norm": 6.41409969329834, | |
| "learning_rate": 3.502887918636066e-08, | |
| "loss": 1.5248, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 4.965669988925803, | |
| "grad_norm": 4.486702919006348, | |
| "learning_rate": 2.984760793785513e-08, | |
| "loss": 1.8782, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 4.970099667774086, | |
| "grad_norm": 10.109087944030762, | |
| "learning_rate": 2.508068027308186e-08, | |
| "loss": 1.5929, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 4.97452934662237, | |
| "grad_norm": 5.497599124908447, | |
| "learning_rate": 2.07281357185507e-08, | |
| "loss": 1.2086, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 4.978959025470654, | |
| "grad_norm": 5.0335001945495605, | |
| "learning_rate": 1.6790010364786667e-08, | |
| "loss": 1.5011, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 4.983388704318937, | |
| "grad_norm": 6.264035224914551, | |
| "learning_rate": 1.3266336866024631e-08, | |
| "loss": 1.5586, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 4.9878183831672205, | |
| "grad_norm": 4.8113274574279785, | |
| "learning_rate": 1.0157144439937317e-08, | |
| "loss": 1.4083, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 4.9878183831672205, | |
| "step": 1125, | |
| "total_flos": 1.8752222545942938e+18, | |
| "train_loss": 1.867327324125502, | |
| "train_runtime": 15337.9497, | |
| "train_samples_per_second": 2.353, | |
| "train_steps_per_second": 0.073 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 1125, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.8752222545942938e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |