| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 6885, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0014524328249818446, | |
| "grad_norm": 4.328955480739728, | |
| "learning_rate": 1.3062409288824383e-07, | |
| "loss": 0.9607, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.002904865649963689, | |
| "grad_norm": 4.469323164876104, | |
| "learning_rate": 2.757619738751814e-07, | |
| "loss": 0.9859, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.004357298474945534, | |
| "grad_norm": 4.000416594025176, | |
| "learning_rate": 4.2089985486211904e-07, | |
| "loss": 0.9872, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.005809731299927378, | |
| "grad_norm": 3.1566001029759914, | |
| "learning_rate": 5.660377358490567e-07, | |
| "loss": 0.9191, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.007262164124909223, | |
| "grad_norm": 2.000776925354802, | |
| "learning_rate": 7.111756168359943e-07, | |
| "loss": 0.866, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.008714596949891068, | |
| "grad_norm": 2.03383269865318, | |
| "learning_rate": 8.563134978229319e-07, | |
| "loss": 0.8475, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.010167029774872912, | |
| "grad_norm": 1.981671850063017, | |
| "learning_rate": 1.0014513788098695e-06, | |
| "loss": 0.8145, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.011619462599854757, | |
| "grad_norm": 1.9935447101504142, | |
| "learning_rate": 1.146589259796807e-06, | |
| "loss": 0.7874, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.013071895424836602, | |
| "grad_norm": 1.696794144473072, | |
| "learning_rate": 1.2917271407837448e-06, | |
| "loss": 0.7606, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.014524328249818447, | |
| "grad_norm": 1.8441704167155635, | |
| "learning_rate": 1.4368650217706823e-06, | |
| "loss": 0.7505, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.01597676107480029, | |
| "grad_norm": 1.6167640330505846, | |
| "learning_rate": 1.5820029027576197e-06, | |
| "loss": 0.7432, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.017429193899782137, | |
| "grad_norm": 1.7310300613256226, | |
| "learning_rate": 1.7271407837445576e-06, | |
| "loss": 0.7502, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.01888162672476398, | |
| "grad_norm": 1.5504171157690307, | |
| "learning_rate": 1.872278664731495e-06, | |
| "loss": 0.7075, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.020334059549745823, | |
| "grad_norm": 1.5001595551333269, | |
| "learning_rate": 2.0174165457184327e-06, | |
| "loss": 0.7242, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.02178649237472767, | |
| "grad_norm": 1.7680255328873922, | |
| "learning_rate": 2.1625544267053704e-06, | |
| "loss": 0.7299, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.023238925199709513, | |
| "grad_norm": 1.9776874021989124, | |
| "learning_rate": 2.307692307692308e-06, | |
| "loss": 0.7074, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.024691358024691357, | |
| "grad_norm": 1.645294675336186, | |
| "learning_rate": 2.4528301886792453e-06, | |
| "loss": 0.7003, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.026143790849673203, | |
| "grad_norm": 1.903626800669526, | |
| "learning_rate": 2.597968069666183e-06, | |
| "loss": 0.6935, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.027596223674655047, | |
| "grad_norm": 1.6296522016767983, | |
| "learning_rate": 2.7431059506531207e-06, | |
| "loss": 0.7099, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.029048656499636893, | |
| "grad_norm": 1.5624745122869332, | |
| "learning_rate": 2.8882438316400583e-06, | |
| "loss": 0.7082, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.030501089324618737, | |
| "grad_norm": 1.5327148829437787, | |
| "learning_rate": 3.033381712626996e-06, | |
| "loss": 0.6847, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.03195352214960058, | |
| "grad_norm": 1.4217156007581908, | |
| "learning_rate": 3.1785195936139337e-06, | |
| "loss": 0.6997, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.03340595497458242, | |
| "grad_norm": 1.678714535521671, | |
| "learning_rate": 3.323657474600871e-06, | |
| "loss": 0.6922, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.034858387799564274, | |
| "grad_norm": 1.6893028132334575, | |
| "learning_rate": 3.4687953555878086e-06, | |
| "loss": 0.6764, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.03631082062454612, | |
| "grad_norm": 1.6842923668045748, | |
| "learning_rate": 3.6139332365747467e-06, | |
| "loss": 0.6838, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.03776325344952796, | |
| "grad_norm": 2.0758637079489306, | |
| "learning_rate": 3.759071117561684e-06, | |
| "loss": 0.6961, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.0392156862745098, | |
| "grad_norm": 1.651886885559497, | |
| "learning_rate": 3.904208998548621e-06, | |
| "loss": 0.6619, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.04066811909949165, | |
| "grad_norm": 1.6813735734416895, | |
| "learning_rate": 4.049346879535559e-06, | |
| "loss": 0.691, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.04212055192447349, | |
| "grad_norm": 1.8001370749006687, | |
| "learning_rate": 4.194484760522497e-06, | |
| "loss": 0.6646, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.04357298474945534, | |
| "grad_norm": 1.8255351447030483, | |
| "learning_rate": 4.339622641509435e-06, | |
| "loss": 0.6595, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.04502541757443718, | |
| "grad_norm": 1.7918481140936697, | |
| "learning_rate": 4.484760522496372e-06, | |
| "loss": 0.6555, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.04647785039941903, | |
| "grad_norm": 1.6697318257583398, | |
| "learning_rate": 4.629898403483309e-06, | |
| "loss": 0.6734, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.04793028322440087, | |
| "grad_norm": 1.5656777878920214, | |
| "learning_rate": 4.775036284470247e-06, | |
| "loss": 0.6511, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.04938271604938271, | |
| "grad_norm": 1.6515736055504289, | |
| "learning_rate": 4.920174165457185e-06, | |
| "loss": 0.6651, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.050835148874364564, | |
| "grad_norm": 1.6517233906536315, | |
| "learning_rate": 5.065312046444122e-06, | |
| "loss": 0.665, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.05228758169934641, | |
| "grad_norm": 1.6987223199576384, | |
| "learning_rate": 5.210449927431061e-06, | |
| "loss": 0.6632, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.05374001452432825, | |
| "grad_norm": 1.578744968443496, | |
| "learning_rate": 5.355587808417998e-06, | |
| "loss": 0.665, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.05519244734931009, | |
| "grad_norm": 1.4975426293081397, | |
| "learning_rate": 5.500725689404935e-06, | |
| "loss": 0.6511, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.05664488017429194, | |
| "grad_norm": 1.7386717568110297, | |
| "learning_rate": 5.645863570391873e-06, | |
| "loss": 0.6676, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.05809731299927379, | |
| "grad_norm": 1.5916583497500596, | |
| "learning_rate": 5.7910014513788105e-06, | |
| "loss": 0.6635, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.05954974582425563, | |
| "grad_norm": 1.6931617934865184, | |
| "learning_rate": 5.936139332365748e-06, | |
| "loss": 0.6668, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.06100217864923747, | |
| "grad_norm": 1.5616372247201953, | |
| "learning_rate": 6.081277213352685e-06, | |
| "loss": 0.6685, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.06245461147421932, | |
| "grad_norm": 1.5424914283941253, | |
| "learning_rate": 6.226415094339623e-06, | |
| "loss": 0.659, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.06390704429920116, | |
| "grad_norm": 1.6468311050594455, | |
| "learning_rate": 6.37155297532656e-06, | |
| "loss": 0.6453, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.06535947712418301, | |
| "grad_norm": 1.5765402125957226, | |
| "learning_rate": 6.5166908563134976e-06, | |
| "loss": 0.6598, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.06681190994916485, | |
| "grad_norm": 1.7349394887283642, | |
| "learning_rate": 6.6618287373004365e-06, | |
| "loss": 0.6619, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.0682643427741467, | |
| "grad_norm": 1.6385635232751372, | |
| "learning_rate": 6.806966618287374e-06, | |
| "loss": 0.6692, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.06971677559912855, | |
| "grad_norm": 1.4945507177883908, | |
| "learning_rate": 6.952104499274311e-06, | |
| "loss": 0.6484, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.07116920842411038, | |
| "grad_norm": 1.583857774726375, | |
| "learning_rate": 7.097242380261249e-06, | |
| "loss": 0.657, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.07262164124909223, | |
| "grad_norm": 1.8780189334850588, | |
| "learning_rate": 7.242380261248186e-06, | |
| "loss": 0.6601, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.07407407407407407, | |
| "grad_norm": 1.5153409007972507, | |
| "learning_rate": 7.387518142235124e-06, | |
| "loss": 0.6542, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.07552650689905592, | |
| "grad_norm": 1.5243833834622142, | |
| "learning_rate": 7.532656023222062e-06, | |
| "loss": 0.6476, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.07697893972403776, | |
| "grad_norm": 1.6429693792028686, | |
| "learning_rate": 7.677793904208998e-06, | |
| "loss": 0.6451, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.0784313725490196, | |
| "grad_norm": 1.802860360098263, | |
| "learning_rate": 7.822931785195936e-06, | |
| "loss": 0.6527, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.07988380537400146, | |
| "grad_norm": 1.6594363957156038, | |
| "learning_rate": 7.968069666182874e-06, | |
| "loss": 0.661, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.0813362381989833, | |
| "grad_norm": 1.5938255936259151, | |
| "learning_rate": 8.113207547169812e-06, | |
| "loss": 0.6547, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.08278867102396514, | |
| "grad_norm": 1.3939924292770436, | |
| "learning_rate": 8.25834542815675e-06, | |
| "loss": 0.6609, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.08424110384894698, | |
| "grad_norm": 1.5321796462771227, | |
| "learning_rate": 8.403483309143687e-06, | |
| "loss": 0.6419, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.08569353667392883, | |
| "grad_norm": 1.5907007682060863, | |
| "learning_rate": 8.548621190130625e-06, | |
| "loss": 0.625, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.08714596949891068, | |
| "grad_norm": 1.6048966671231157, | |
| "learning_rate": 8.693759071117563e-06, | |
| "loss": 0.658, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.08859840232389252, | |
| "grad_norm": 1.457751877262412, | |
| "learning_rate": 8.8388969521045e-06, | |
| "loss": 0.6456, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.09005083514887437, | |
| "grad_norm": 1.3925725985786772, | |
| "learning_rate": 8.984034833091438e-06, | |
| "loss": 0.6494, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.0915032679738562, | |
| "grad_norm": 1.6476815627809678, | |
| "learning_rate": 9.129172714078376e-06, | |
| "loss": 0.6604, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.09295570079883805, | |
| "grad_norm": 1.4844043302240553, | |
| "learning_rate": 9.274310595065312e-06, | |
| "loss": 0.6462, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.0944081336238199, | |
| "grad_norm": 1.5541257847812342, | |
| "learning_rate": 9.41944847605225e-06, | |
| "loss": 0.6464, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.09586056644880174, | |
| "grad_norm": 1.5339956751582804, | |
| "learning_rate": 9.564586357039188e-06, | |
| "loss": 0.6471, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.09731299927378359, | |
| "grad_norm": 1.550006983868159, | |
| "learning_rate": 9.709724238026126e-06, | |
| "loss": 0.6519, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.09876543209876543, | |
| "grad_norm": 1.298622779401985, | |
| "learning_rate": 9.854862119013063e-06, | |
| "loss": 0.6508, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.10021786492374728, | |
| "grad_norm": 1.4545201677417376, | |
| "learning_rate": 1e-05, | |
| "loss": 0.6483, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.10167029774872913, | |
| "grad_norm": 1.7514454450540817, | |
| "learning_rate": 9.999935728859667e-06, | |
| "loss": 0.6517, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.10312273057371096, | |
| "grad_norm": 1.3010290416328456, | |
| "learning_rate": 9.999742917090981e-06, | |
| "loss": 0.6435, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.10457516339869281, | |
| "grad_norm": 1.5222737445349914, | |
| "learning_rate": 9.999421569650833e-06, | |
| "loss": 0.6355, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.10602759622367465, | |
| "grad_norm": 1.5758824439402839, | |
| "learning_rate": 9.99897169480057e-06, | |
| "loss": 0.6414, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.1074800290486565, | |
| "grad_norm": 1.3245458819453462, | |
| "learning_rate": 9.99839330410578e-06, | |
| "loss": 0.6416, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.10893246187363835, | |
| "grad_norm": 1.4753577499137038, | |
| "learning_rate": 9.997686412435996e-06, | |
| "loss": 0.6381, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.11038489469862019, | |
| "grad_norm": 1.4578988593383, | |
| "learning_rate": 9.99685103796431e-06, | |
| "loss": 0.6369, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.11183732752360204, | |
| "grad_norm": 1.389881220599468, | |
| "learning_rate": 9.99588720216691e-06, | |
| "loss": 0.6622, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.11328976034858387, | |
| "grad_norm": 1.2318560606230133, | |
| "learning_rate": 9.994794929822527e-06, | |
| "loss": 0.6279, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.11474219317356572, | |
| "grad_norm": 1.355472620629438, | |
| "learning_rate": 9.993574249011797e-06, | |
| "loss": 0.641, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.11619462599854757, | |
| "grad_norm": 1.4379602146139996, | |
| "learning_rate": 9.992225191116538e-06, | |
| "loss": 0.6439, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.11764705882352941, | |
| "grad_norm": 1.4777958226910466, | |
| "learning_rate": 9.990747790818946e-06, | |
| "loss": 0.6457, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.11909949164851126, | |
| "grad_norm": 1.2895229336241503, | |
| "learning_rate": 9.989142086100703e-06, | |
| "loss": 0.6483, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.1205519244734931, | |
| "grad_norm": 1.4811460587250382, | |
| "learning_rate": 9.987408118241995e-06, | |
| "loss": 0.6509, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.12200435729847495, | |
| "grad_norm": 1.3189208191268318, | |
| "learning_rate": 9.985545931820463e-06, | |
| "loss": 0.6181, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.12345679012345678, | |
| "grad_norm": 1.3731300368595278, | |
| "learning_rate": 9.983555574710043e-06, | |
| "loss": 0.6274, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.12490922294843863, | |
| "grad_norm": 1.4055775942483093, | |
| "learning_rate": 9.981437098079743e-06, | |
| "loss": 0.6398, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.12636165577342048, | |
| "grad_norm": 1.3307192435974602, | |
| "learning_rate": 9.979190556392326e-06, | |
| "loss": 0.6393, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.12781408859840232, | |
| "grad_norm": 1.5622917958142868, | |
| "learning_rate": 9.976816007402912e-06, | |
| "loss": 0.6456, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.12926652142338416, | |
| "grad_norm": 1.390636406480548, | |
| "learning_rate": 9.974313512157488e-06, | |
| "loss": 0.6288, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.13071895424836602, | |
| "grad_norm": 1.4427250843896926, | |
| "learning_rate": 9.971683134991344e-06, | |
| "loss": 0.6266, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.13217138707334786, | |
| "grad_norm": 1.4098179198178282, | |
| "learning_rate": 9.968924943527418e-06, | |
| "loss": 0.6411, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.1336238198983297, | |
| "grad_norm": 1.4962238363929918, | |
| "learning_rate": 9.96603900867455e-06, | |
| "loss": 0.6315, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.13507625272331156, | |
| "grad_norm": 1.3209044251278015, | |
| "learning_rate": 9.963025404625673e-06, | |
| "loss": 0.6423, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.1365286855482934, | |
| "grad_norm": 1.39955503516968, | |
| "learning_rate": 9.959884208855893e-06, | |
| "loss": 0.6361, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.13798111837327523, | |
| "grad_norm": 1.5348970475105241, | |
| "learning_rate": 9.956615502120504e-06, | |
| "loss": 0.6241, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.1394335511982571, | |
| "grad_norm": 1.48874630945738, | |
| "learning_rate": 9.953219368452908e-06, | |
| "loss": 0.631, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.14088598402323893, | |
| "grad_norm": 1.310857282598366, | |
| "learning_rate": 9.949695895162464e-06, | |
| "loss": 0.627, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.14233841684822077, | |
| "grad_norm": 1.3619342578169393, | |
| "learning_rate": 9.946045172832224e-06, | |
| "loss": 0.6387, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.1437908496732026, | |
| "grad_norm": 1.4936986486504984, | |
| "learning_rate": 9.942267295316625e-06, | |
| "loss": 0.6331, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.14524328249818447, | |
| "grad_norm": 1.32511584393411, | |
| "learning_rate": 9.938362359739068e-06, | |
| "loss": 0.626, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.1466957153231663, | |
| "grad_norm": 1.3291454266011833, | |
| "learning_rate": 9.934330466489414e-06, | |
| "loss": 0.6451, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.14814814814814814, | |
| "grad_norm": 1.3289648153139675, | |
| "learning_rate": 9.930171719221418e-06, | |
| "loss": 0.6333, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.14960058097313, | |
| "grad_norm": 1.3388955314518605, | |
| "learning_rate": 9.925886224850047e-06, | |
| "loss": 0.6329, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.15105301379811184, | |
| "grad_norm": 1.3788458990043229, | |
| "learning_rate": 9.921474093548748e-06, | |
| "loss": 0.6308, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.15250544662309368, | |
| "grad_norm": 1.2630947233952987, | |
| "learning_rate": 9.916935438746604e-06, | |
| "loss": 0.6366, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.1539578794480755, | |
| "grad_norm": 1.2586848110727198, | |
| "learning_rate": 9.912270377125424e-06, | |
| "loss": 0.6224, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.15541031227305738, | |
| "grad_norm": 1.5648142512317709, | |
| "learning_rate": 9.90747902861674e-06, | |
| "loss": 0.6261, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.1568627450980392, | |
| "grad_norm": 1.477705850244199, | |
| "learning_rate": 9.902561516398723e-06, | |
| "loss": 0.6207, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.15831517792302105, | |
| "grad_norm": 1.2950681154644361, | |
| "learning_rate": 9.897517966893023e-06, | |
| "loss": 0.6218, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.15976761074800291, | |
| "grad_norm": 1.4613516139089748, | |
| "learning_rate": 9.892348509761509e-06, | |
| "loss": 0.6237, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.16122004357298475, | |
| "grad_norm": 1.2641419484176866, | |
| "learning_rate": 9.887053277902943e-06, | |
| "loss": 0.6425, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.1626724763979666, | |
| "grad_norm": 1.2419109246681843, | |
| "learning_rate": 9.881632407449561e-06, | |
| "loss": 0.6423, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.16412490922294845, | |
| "grad_norm": 1.4096648257937974, | |
| "learning_rate": 9.876086037763575e-06, | |
| "loss": 0.6383, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.1655773420479303, | |
| "grad_norm": 1.2574892255736747, | |
| "learning_rate": 9.870414311433585e-06, | |
| "loss": 0.6059, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.16702977487291212, | |
| "grad_norm": 1.2716145459010044, | |
| "learning_rate": 9.86461737427092e-06, | |
| "loss": 0.6098, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.16848220769789396, | |
| "grad_norm": 1.1998298755084313, | |
| "learning_rate": 9.858695375305885e-06, | |
| "loss": 0.6214, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.16993464052287582, | |
| "grad_norm": 1.4281449888166444, | |
| "learning_rate": 9.852648466783927e-06, | |
| "loss": 0.6241, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.17138707334785766, | |
| "grad_norm": 1.4071764477667867, | |
| "learning_rate": 9.84647680416173e-06, | |
| "loss": 0.6474, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.1728395061728395, | |
| "grad_norm": 1.2174453861834778, | |
| "learning_rate": 9.840180546103215e-06, | |
| "loss": 0.6326, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.17429193899782136, | |
| "grad_norm": 1.3029300772595094, | |
| "learning_rate": 9.833759854475453e-06, | |
| "loss": 0.6185, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.1757443718228032, | |
| "grad_norm": 1.271112016193465, | |
| "learning_rate": 9.827214894344514e-06, | |
| "loss": 0.6301, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.17719680464778503, | |
| "grad_norm": 1.2997276991719462, | |
| "learning_rate": 9.82054583397122e-06, | |
| "loss": 0.6317, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.1786492374727669, | |
| "grad_norm": 1.2096030387104992, | |
| "learning_rate": 9.813752844806814e-06, | |
| "loss": 0.6159, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.18010167029774873, | |
| "grad_norm": 1.2973416257944899, | |
| "learning_rate": 9.806836101488561e-06, | |
| "loss": 0.6289, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.18155410312273057, | |
| "grad_norm": 1.3197440048632956, | |
| "learning_rate": 9.799795781835253e-06, | |
| "loss": 0.6088, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.1830065359477124, | |
| "grad_norm": 1.2535036782710556, | |
| "learning_rate": 9.79263206684264e-06, | |
| "loss": 0.6206, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.18445896877269427, | |
| "grad_norm": 1.3190252094745194, | |
| "learning_rate": 9.785345140678775e-06, | |
| "loss": 0.6149, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.1859114015976761, | |
| "grad_norm": 1.3148617882447478, | |
| "learning_rate": 9.777935190679277e-06, | |
| "loss": 0.6134, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.18736383442265794, | |
| "grad_norm": 1.3368521794263946, | |
| "learning_rate": 9.770402407342524e-06, | |
| "loss": 0.6258, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.1888162672476398, | |
| "grad_norm": 1.3941700458180073, | |
| "learning_rate": 9.762746984324743e-06, | |
| "loss": 0.6191, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.19026870007262164, | |
| "grad_norm": 1.3152403546822757, | |
| "learning_rate": 9.754969118435043e-06, | |
| "loss": 0.6446, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.19172113289760348, | |
| "grad_norm": 1.3013626770341264, | |
| "learning_rate": 9.747069009630347e-06, | |
| "loss": 0.6312, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.19317356572258534, | |
| "grad_norm": 1.3966383885583535, | |
| "learning_rate": 9.739046861010255e-06, | |
| "loss": 0.6207, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.19462599854756718, | |
| "grad_norm": 1.1439991746974036, | |
| "learning_rate": 9.730902878811825e-06, | |
| "loss": 0.6144, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.19607843137254902, | |
| "grad_norm": 1.3540894709055364, | |
| "learning_rate": 9.722637272404263e-06, | |
| "loss": 0.6044, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.19753086419753085, | |
| "grad_norm": 1.100639588271217, | |
| "learning_rate": 9.71425025428355e-06, | |
| "loss": 0.6036, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.19898329702251272, | |
| "grad_norm": 1.1874319432290736, | |
| "learning_rate": 9.705742040066977e-06, | |
| "loss": 0.6039, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.20043572984749455, | |
| "grad_norm": 1.1767671647303808, | |
| "learning_rate": 9.697112848487591e-06, | |
| "loss": 0.6376, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.2018881626724764, | |
| "grad_norm": 1.135879944041461, | |
| "learning_rate": 9.688362901388586e-06, | |
| "loss": 0.6035, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.20334059549745825, | |
| "grad_norm": 1.2315910796359388, | |
| "learning_rate": 9.679492423717596e-06, | |
| "loss": 0.6098, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.2047930283224401, | |
| "grad_norm": 1.4949408462288012, | |
| "learning_rate": 9.670501643520904e-06, | |
| "loss": 0.6203, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.20624546114742193, | |
| "grad_norm": 1.3180181445795711, | |
| "learning_rate": 9.66139079193759e-06, | |
| "loss": 0.6286, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.20769789397240376, | |
| "grad_norm": 1.2616556885045909, | |
| "learning_rate": 9.652160103193583e-06, | |
| "loss": 0.6274, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.20915032679738563, | |
| "grad_norm": 1.3174449455574337, | |
| "learning_rate": 9.642809814595637e-06, | |
| "loss": 0.6136, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.21060275962236746, | |
| "grad_norm": 1.296735377133819, | |
| "learning_rate": 9.633340166525238e-06, | |
| "loss": 0.6145, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.2120551924473493, | |
| "grad_norm": 1.2502497833244608, | |
| "learning_rate": 9.62375140243242e-06, | |
| "loss": 0.6031, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.21350762527233116, | |
| "grad_norm": 1.2288830705505374, | |
| "learning_rate": 9.6140437688295e-06, | |
| "loss": 0.6128, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.214960058097313, | |
| "grad_norm": 1.1119473380240397, | |
| "learning_rate": 9.604217515284753e-06, | |
| "loss": 0.6171, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.21641249092229484, | |
| "grad_norm": 1.2070397164389806, | |
| "learning_rate": 9.594272894415986e-06, | |
| "loss": 0.6238, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.2178649237472767, | |
| "grad_norm": 1.3345637205372078, | |
| "learning_rate": 9.584210161884049e-06, | |
| "loss": 0.6163, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.21931735657225854, | |
| "grad_norm": 1.1385043759036517, | |
| "learning_rate": 9.57402957638626e-06, | |
| "loss": 0.6083, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.22076978939724037, | |
| "grad_norm": 1.1936988121465326, | |
| "learning_rate": 9.563731399649756e-06, | |
| "loss": 0.5992, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.2222222222222222, | |
| "grad_norm": 1.4103572503621762, | |
| "learning_rate": 9.553315896424758e-06, | |
| "loss": 0.6054, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.22367465504720407, | |
| "grad_norm": 1.3209719950503893, | |
| "learning_rate": 9.54278333447778e-06, | |
| "loss": 0.596, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.2251270878721859, | |
| "grad_norm": 1.1693016501696898, | |
| "learning_rate": 9.532133984584721e-06, | |
| "loss": 0.6323, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.22657952069716775, | |
| "grad_norm": 1.1691510921859125, | |
| "learning_rate": 9.521368120523931e-06, | |
| "loss": 0.6027, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.2280319535221496, | |
| "grad_norm": 1.2114364957172101, | |
| "learning_rate": 9.510486019069154e-06, | |
| "loss": 0.6245, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.22948438634713145, | |
| "grad_norm": 1.265123327235345, | |
| "learning_rate": 9.499487959982415e-06, | |
| "loss": 0.6189, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.23093681917211328, | |
| "grad_norm": 1.3773059483594046, | |
| "learning_rate": 9.488374226006836e-06, | |
| "loss": 0.6106, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.23238925199709515, | |
| "grad_norm": 1.2737618179619303, | |
| "learning_rate": 9.477145102859357e-06, | |
| "loss": 0.6115, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.23384168482207698, | |
| "grad_norm": 1.3066121502077, | |
| "learning_rate": 9.4658008792234e-06, | |
| "loss": 0.609, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.23529411764705882, | |
| "grad_norm": 1.242518893517758, | |
| "learning_rate": 9.45434184674144e-06, | |
| "loss": 0.6, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.23674655047204066, | |
| "grad_norm": 1.2493334973003818, | |
| "learning_rate": 9.442768300007511e-06, | |
| "loss": 0.6144, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.23819898329702252, | |
| "grad_norm": 1.2775874117960886, | |
| "learning_rate": 9.431080536559631e-06, | |
| "loss": 0.6245, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.23965141612200436, | |
| "grad_norm": 1.247039996382283, | |
| "learning_rate": 9.419278856872154e-06, | |
| "loss": 0.6279, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.2411038489469862, | |
| "grad_norm": 1.302601682600637, | |
| "learning_rate": 9.407363564348047e-06, | |
| "loss": 0.5933, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.24255628177196806, | |
| "grad_norm": 1.431347455463815, | |
| "learning_rate": 9.39533496531108e-06, | |
| "loss": 0.6171, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.2440087145969499, | |
| "grad_norm": 1.2527655662771335, | |
| "learning_rate": 9.38319336899797e-06, | |
| "loss": 0.6099, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.24546114742193173, | |
| "grad_norm": 1.205551788839019, | |
| "learning_rate": 9.370939087550407e-06, | |
| "loss": 0.6077, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.24691358024691357, | |
| "grad_norm": 1.332981320431861, | |
| "learning_rate": 9.358572436007052e-06, | |
| "loss": 0.6126, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.24836601307189543, | |
| "grad_norm": 1.2112905977700383, | |
| "learning_rate": 9.346093732295422e-06, | |
| "loss": 0.6141, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.24981844589687727, | |
| "grad_norm": 1.1741115783770129, | |
| "learning_rate": 9.333503297223725e-06, | |
| "loss": 0.5977, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.2512708787218591, | |
| "grad_norm": 1.2308239868942004, | |
| "learning_rate": 9.320801454472607e-06, | |
| "loss": 0.6213, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.25272331154684097, | |
| "grad_norm": 1.3933258283474292, | |
| "learning_rate": 9.30798853058684e-06, | |
| "loss": 0.6217, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.2541757443718228, | |
| "grad_norm": 1.2467959691205432, | |
| "learning_rate": 9.29506485496691e-06, | |
| "loss": 0.6089, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.25562817719680464, | |
| "grad_norm": 1.106847677662664, | |
| "learning_rate": 9.282030759860566e-06, | |
| "loss": 0.6113, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.2570806100217865, | |
| "grad_norm": 1.225606521070107, | |
| "learning_rate": 9.268886580354272e-06, | |
| "loss": 0.6041, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.2585330428467683, | |
| "grad_norm": 1.1249241718792773, | |
| "learning_rate": 9.255632654364591e-06, | |
| "loss": 0.6112, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.2599854756717502, | |
| "grad_norm": 1.2347205288363368, | |
| "learning_rate": 9.242269322629494e-06, | |
| "loss": 0.6003, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.26143790849673204, | |
| "grad_norm": 1.3040805105750026, | |
| "learning_rate": 9.228796928699613e-06, | |
| "loss": 0.6187, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.26289034132171385, | |
| "grad_norm": 1.4585670240799034, | |
| "learning_rate": 9.215215818929392e-06, | |
| "loss": 0.612, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.2643427741466957, | |
| "grad_norm": 1.0974130075617774, | |
| "learning_rate": 9.201526342468202e-06, | |
| "loss": 0.6124, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.2657952069716776, | |
| "grad_norm": 1.2918051377461068, | |
| "learning_rate": 9.18772885125134e-06, | |
| "loss": 0.6055, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.2672476397966594, | |
| "grad_norm": 1.199609927095931, | |
| "learning_rate": 9.17382369999101e-06, | |
| "loss": 0.6086, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.26870007262164125, | |
| "grad_norm": 1.2736244478450063, | |
| "learning_rate": 9.159811246167182e-06, | |
| "loss": 0.6111, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.2701525054466231, | |
| "grad_norm": 1.2484696326393374, | |
| "learning_rate": 9.14569185001841e-06, | |
| "loss": 0.5951, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.2716049382716049, | |
| "grad_norm": 1.3221301583704237, | |
| "learning_rate": 9.131465874532568e-06, | |
| "loss": 0.5861, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.2730573710965868, | |
| "grad_norm": 1.2578322361866867, | |
| "learning_rate": 9.117133685437524e-06, | |
| "loss": 0.6073, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.27450980392156865, | |
| "grad_norm": 1.3260698149158467, | |
| "learning_rate": 9.102695651191737e-06, | |
| "loss": 0.5838, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.27596223674655046, | |
| "grad_norm": 1.2373193794097532, | |
| "learning_rate": 9.088152142974771e-06, | |
| "loss": 0.6013, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.2774146695715323, | |
| "grad_norm": 1.1997047870357698, | |
| "learning_rate": 9.073503534677773e-06, | |
| "loss": 0.6219, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.2788671023965142, | |
| "grad_norm": 1.2769112952981858, | |
| "learning_rate": 9.058750202893844e-06, | |
| "loss": 0.6052, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.280319535221496, | |
| "grad_norm": 1.2302296498321919, | |
| "learning_rate": 9.04389252690837e-06, | |
| "loss": 0.6124, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.28177196804647786, | |
| "grad_norm": 1.2009594091858158, | |
| "learning_rate": 9.02893088868926e-06, | |
| "loss": 0.604, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.28322440087145967, | |
| "grad_norm": 1.0539872600155336, | |
| "learning_rate": 9.013865672877133e-06, | |
| "loss": 0.6052, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.28467683369644153, | |
| "grad_norm": 1.2561895098497668, | |
| "learning_rate": 8.998697266775433e-06, | |
| "loss": 0.6077, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.2861292665214234, | |
| "grad_norm": 1.2763583417414128, | |
| "learning_rate": 8.98342606034046e-06, | |
| "loss": 0.6059, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.2875816993464052, | |
| "grad_norm": 1.1463184995763767, | |
| "learning_rate": 8.96805244617135e-06, | |
| "loss": 0.6183, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.28903413217138707, | |
| "grad_norm": 1.1421597790792624, | |
| "learning_rate": 8.952576819499998e-06, | |
| "loss": 0.602, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.29048656499636893, | |
| "grad_norm": 1.3046866547593934, | |
| "learning_rate": 8.93699957818087e-06, | |
| "loss": 0.5925, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.29193899782135074, | |
| "grad_norm": 1.27239619384718, | |
| "learning_rate": 8.921321122680789e-06, | |
| "loss": 0.6037, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.2933914306463326, | |
| "grad_norm": 1.3073284462474046, | |
| "learning_rate": 8.905541856068641e-06, | |
| "loss": 0.6077, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.29484386347131447, | |
| "grad_norm": 1.2694028140938955, | |
| "learning_rate": 8.889662184005007e-06, | |
| "loss": 0.6076, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.2962962962962963, | |
| "grad_norm": 1.1075058528848678, | |
| "learning_rate": 8.873682514731746e-06, | |
| "loss": 0.5986, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.29774872912127814, | |
| "grad_norm": 1.25011183641691, | |
| "learning_rate": 8.85760325906148e-06, | |
| "loss": 0.5911, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.29920116194626, | |
| "grad_norm": 1.230690665069067, | |
| "learning_rate": 8.841424830367051e-06, | |
| "loss": 0.5918, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.3006535947712418, | |
| "grad_norm": 1.2143851276582127, | |
| "learning_rate": 8.82514764457088e-06, | |
| "loss": 0.6026, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.3021060275962237, | |
| "grad_norm": 1.1711415813258073, | |
| "learning_rate": 8.808772120134286e-06, | |
| "loss": 0.6208, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.30355846042120554, | |
| "grad_norm": 1.2105658122447378, | |
| "learning_rate": 8.79229867804672e-06, | |
| "loss": 0.6178, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.30501089324618735, | |
| "grad_norm": 1.260614604486508, | |
| "learning_rate": 8.775727741814945e-06, | |
| "loss": 0.6033, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.3064633260711692, | |
| "grad_norm": 1.1949196588242055, | |
| "learning_rate": 8.75905973745215e-06, | |
| "loss": 0.5954, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.307915758896151, | |
| "grad_norm": 1.2358431757504627, | |
| "learning_rate": 8.742295093466993e-06, | |
| "loss": 0.5929, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.3093681917211329, | |
| "grad_norm": 1.1788915626896657, | |
| "learning_rate": 8.725434240852586e-06, | |
| "loss": 0.6014, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.31082062454611475, | |
| "grad_norm": 1.2899429468502281, | |
| "learning_rate": 8.708477613075422e-06, | |
| "loss": 0.588, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.31227305737109656, | |
| "grad_norm": 1.0436767601630443, | |
| "learning_rate": 8.691425646064222e-06, | |
| "loss": 0.6128, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.3137254901960784, | |
| "grad_norm": 1.1823668694466984, | |
| "learning_rate": 8.674278778198731e-06, | |
| "loss": 0.5939, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.3151779230210603, | |
| "grad_norm": 1.2287777612088193, | |
| "learning_rate": 8.657037450298449e-06, | |
| "loss": 0.5942, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.3166303558460421, | |
| "grad_norm": 1.1210160142803036, | |
| "learning_rate": 8.6397021056113e-06, | |
| "loss": 0.6068, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.31808278867102396, | |
| "grad_norm": 1.176574092958882, | |
| "learning_rate": 8.622273189802231e-06, | |
| "loss": 0.6099, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.31953522149600583, | |
| "grad_norm": 1.2276623152067967, | |
| "learning_rate": 8.604751150941758e-06, | |
| "loss": 0.598, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.32098765432098764, | |
| "grad_norm": 1.2049029589388036, | |
| "learning_rate": 8.58713643949445e-06, | |
| "loss": 0.5934, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.3224400871459695, | |
| "grad_norm": 1.2650704032924422, | |
| "learning_rate": 8.569429508307345e-06, | |
| "loss": 0.6039, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.32389251997095136, | |
| "grad_norm": 1.088534753663297, | |
| "learning_rate": 8.551630812598303e-06, | |
| "loss": 0.6038, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.3253449527959332, | |
| "grad_norm": 1.1678210415173849, | |
| "learning_rate": 8.533740809944317e-06, | |
| "loss": 0.6084, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.32679738562091504, | |
| "grad_norm": 1.251355519441971, | |
| "learning_rate": 8.515759960269731e-06, | |
| "loss": 0.5975, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.3282498184458969, | |
| "grad_norm": 1.1662322522769242, | |
| "learning_rate": 8.497688725834432e-06, | |
| "loss": 0.6106, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.3297022512708787, | |
| "grad_norm": 1.336372713961502, | |
| "learning_rate": 8.479527571221957e-06, | |
| "loss": 0.6224, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.3311546840958606, | |
| "grad_norm": 1.148371532122775, | |
| "learning_rate": 8.461276963327555e-06, | |
| "loss": 0.607, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.33260711692084244, | |
| "grad_norm": 1.3691981401078914, | |
| "learning_rate": 8.442937371346174e-06, | |
| "loss": 0.6001, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.33405954974582425, | |
| "grad_norm": 1.3343569533197541, | |
| "learning_rate": 8.424509266760413e-06, | |
| "loss": 0.6009, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.3355119825708061, | |
| "grad_norm": 1.0903008241967769, | |
| "learning_rate": 8.405993123328388e-06, | |
| "loss": 0.5852, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.3369644153957879, | |
| "grad_norm": 1.2770798153391716, | |
| "learning_rate": 8.387389417071565e-06, | |
| "loss": 0.5967, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.3384168482207698, | |
| "grad_norm": 1.1893611624135727, | |
| "learning_rate": 8.368698626262506e-06, | |
| "loss": 0.5906, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.33986928104575165, | |
| "grad_norm": 1.1182656055274527, | |
| "learning_rate": 8.349921231412588e-06, | |
| "loss": 0.6144, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.34132171387073346, | |
| "grad_norm": 1.1569225334439495, | |
| "learning_rate": 8.331057715259643e-06, | |
| "loss": 0.5945, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.3427741466957153, | |
| "grad_norm": 1.0553585361032343, | |
| "learning_rate": 8.312108562755547e-06, | |
| "loss": 0.6012, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.3442265795206972, | |
| "grad_norm": 1.0429439932782214, | |
| "learning_rate": 8.29307426105376e-06, | |
| "loss": 0.602, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.345679012345679, | |
| "grad_norm": 1.0397368512389722, | |
| "learning_rate": 8.273955299496787e-06, | |
| "loss": 0.5932, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.34713144517066086, | |
| "grad_norm": 1.0989788243486265, | |
| "learning_rate": 8.254752169603614e-06, | |
| "loss": 0.5987, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.3485838779956427, | |
| "grad_norm": 1.2513128657031618, | |
| "learning_rate": 8.235465365057067e-06, | |
| "loss": 0.597, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.35003631082062453, | |
| "grad_norm": 1.2696804086094644, | |
| "learning_rate": 8.21609538169111e-06, | |
| "loss": 0.5962, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.3514887436456064, | |
| "grad_norm": 1.3765675743894579, | |
| "learning_rate": 8.196642717478113e-06, | |
| "loss": 0.6083, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.35294117647058826, | |
| "grad_norm": 1.1525716644685924, | |
| "learning_rate": 8.177107872516041e-06, | |
| "loss": 0.5912, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.35439360929557007, | |
| "grad_norm": 1.1930516036081553, | |
| "learning_rate": 8.157491349015599e-06, | |
| "loss": 0.601, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.35584604212055193, | |
| "grad_norm": 1.3453249916774477, | |
| "learning_rate": 8.137793651287317e-06, | |
| "loss": 0.62, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.3572984749455338, | |
| "grad_norm": 1.216543063547056, | |
| "learning_rate": 8.118015285728598e-06, | |
| "loss": 0.6037, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.3587509077705156, | |
| "grad_norm": 1.129394528084983, | |
| "learning_rate": 8.098156760810683e-06, | |
| "loss": 0.598, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.36020334059549747, | |
| "grad_norm": 1.124156367954234, | |
| "learning_rate": 8.078218587065589e-06, | |
| "loss": 0.5813, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.3616557734204793, | |
| "grad_norm": 1.2039082584679666, | |
| "learning_rate": 8.058201277072981e-06, | |
| "loss": 0.5876, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.36310820624546114, | |
| "grad_norm": 1.1919842026488203, | |
| "learning_rate": 8.038105345446994e-06, | |
| "loss": 0.6115, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.364560639070443, | |
| "grad_norm": 1.2851968482663827, | |
| "learning_rate": 8.017931308823006e-06, | |
| "loss": 0.592, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.3660130718954248, | |
| "grad_norm": 1.1538243634302991, | |
| "learning_rate": 7.997679685844353e-06, | |
| "loss": 0.5867, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.3674655047204067, | |
| "grad_norm": 1.0704432112589999, | |
| "learning_rate": 7.977350997148994e-06, | |
| "loss": 0.6007, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.36891793754538854, | |
| "grad_norm": 1.2707334756597408, | |
| "learning_rate": 7.956945765356133e-06, | |
| "loss": 0.5746, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.37037037037037035, | |
| "grad_norm": 1.2061421625898763, | |
| "learning_rate": 7.936464515052776e-06, | |
| "loss": 0.601, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.3718228031953522, | |
| "grad_norm": 1.318015728266432, | |
| "learning_rate": 7.915907772780244e-06, | |
| "loss": 0.6081, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.3732752360203341, | |
| "grad_norm": 1.253197445356757, | |
| "learning_rate": 7.89527606702065e-06, | |
| "loss": 0.6046, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.3747276688453159, | |
| "grad_norm": 1.190199765539676, | |
| "learning_rate": 7.87456992818329e-06, | |
| "loss": 0.5986, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.37618010167029775, | |
| "grad_norm": 1.193398450040499, | |
| "learning_rate": 7.853789888591032e-06, | |
| "loss": 0.5889, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.3776325344952796, | |
| "grad_norm": 1.035053671117003, | |
| "learning_rate": 7.832936482466612e-06, | |
| "loss": 0.5934, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.3790849673202614, | |
| "grad_norm": 1.1386993400574172, | |
| "learning_rate": 7.812010245918903e-06, | |
| "loss": 0.586, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.3805374001452433, | |
| "grad_norm": 1.1022458257608025, | |
| "learning_rate": 7.79101171692914e-06, | |
| "loss": 0.5806, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.38198983297022515, | |
| "grad_norm": 1.1758543851880188, | |
| "learning_rate": 7.769941435337083e-06, | |
| "loss": 0.5618, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.38344226579520696, | |
| "grad_norm": 1.2426818455480244, | |
| "learning_rate": 7.748799942827147e-06, | |
| "loss": 0.6012, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.3848946986201888, | |
| "grad_norm": 1.0718204571931684, | |
| "learning_rate": 7.72758778291446e-06, | |
| "loss": 0.5887, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.3863471314451707, | |
| "grad_norm": 1.0289005823465374, | |
| "learning_rate": 7.706305500930909e-06, | |
| "loss": 0.6037, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.3877995642701525, | |
| "grad_norm": 1.2478985029233107, | |
| "learning_rate": 7.684953644011103e-06, | |
| "loss": 0.584, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.38925199709513436, | |
| "grad_norm": 1.1066991243562059, | |
| "learning_rate": 7.66353276107832e-06, | |
| "loss": 0.6007, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.39070442992011617, | |
| "grad_norm": 1.2345614999374477, | |
| "learning_rate": 7.64204340283039e-06, | |
| "loss": 0.6033, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.39215686274509803, | |
| "grad_norm": 1.0798799696274017, | |
| "learning_rate": 7.620486121725536e-06, | |
| "loss": 0.59, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.3936092955700799, | |
| "grad_norm": 1.1600968806836478, | |
| "learning_rate": 7.598861471968174e-06, | |
| "loss": 0.5948, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.3950617283950617, | |
| "grad_norm": 1.1860847221048887, | |
| "learning_rate": 7.577170009494665e-06, | |
| "loss": 0.5981, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.39651416122004357, | |
| "grad_norm": 1.0670434364146835, | |
| "learning_rate": 7.555412291959018e-06, | |
| "loss": 0.5772, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.39796659404502543, | |
| "grad_norm": 1.1865817610815497, | |
| "learning_rate": 7.533588878718561e-06, | |
| "loss": 0.584, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.39941902687000724, | |
| "grad_norm": 1.2092053148497965, | |
| "learning_rate": 7.511700330819556e-06, | |
| "loss": 0.5832, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.4008714596949891, | |
| "grad_norm": 1.1770338237370501, | |
| "learning_rate": 7.489747210982777e-06, | |
| "loss": 0.5984, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.40232389251997097, | |
| "grad_norm": 1.1434774901575833, | |
| "learning_rate": 7.4677300835890424e-06, | |
| "loss": 0.5755, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.4037763253449528, | |
| "grad_norm": 1.0366368031771818, | |
| "learning_rate": 7.445649514664703e-06, | |
| "loss": 0.5886, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.40522875816993464, | |
| "grad_norm": 1.2729396302065998, | |
| "learning_rate": 7.423506071867101e-06, | |
| "loss": 0.6134, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.4066811909949165, | |
| "grad_norm": 1.0518352889412923, | |
| "learning_rate": 7.401300324469961e-06, | |
| "loss": 0.5737, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.4081336238198983, | |
| "grad_norm": 1.2001944481237583, | |
| "learning_rate": 7.3790328433487665e-06, | |
| "loss": 0.5874, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.4095860566448802, | |
| "grad_norm": 1.250231920993964, | |
| "learning_rate": 7.3567042009660786e-06, | |
| "loss": 0.5862, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.41103848946986205, | |
| "grad_norm": 1.1512872210708966, | |
| "learning_rate": 7.3343149713568215e-06, | |
| "loss": 0.593, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.41249092229484385, | |
| "grad_norm": 1.1605256860138091, | |
| "learning_rate": 7.311865730113525e-06, | |
| "loss": 0.5939, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.4139433551198257, | |
| "grad_norm": 1.3940208410225592, | |
| "learning_rate": 7.2893570543715174e-06, | |
| "loss": 0.6028, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.4153957879448075, | |
| "grad_norm": 1.1976078557092422, | |
| "learning_rate": 7.266789522794104e-06, | |
| "loss": 0.6065, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.4168482207697894, | |
| "grad_norm": 1.035110243445679, | |
| "learning_rate": 7.244163715557683e-06, | |
| "loss": 0.5915, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.41830065359477125, | |
| "grad_norm": 1.1865073190747897, | |
| "learning_rate": 7.2214802143368225e-06, | |
| "loss": 0.5961, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.41975308641975306, | |
| "grad_norm": 1.0991372561424138, | |
| "learning_rate": 7.1987396022893216e-06, | |
| "loss": 0.5857, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.4212055192447349, | |
| "grad_norm": 1.0801243737112538, | |
| "learning_rate": 7.175942464041209e-06, | |
| "loss": 0.5829, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.4226579520697168, | |
| "grad_norm": 1.3295568712189132, | |
| "learning_rate": 7.15308938567171e-06, | |
| "loss": 0.5869, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.4241103848946986, | |
| "grad_norm": 1.0402363831702612, | |
| "learning_rate": 7.130180954698187e-06, | |
| "loss": 0.5842, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.42556281771968046, | |
| "grad_norm": 1.1031276144488775, | |
| "learning_rate": 7.107217760061036e-06, | |
| "loss": 0.5923, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.42701525054466233, | |
| "grad_norm": 1.183086396688286, | |
| "learning_rate": 7.0842003921085376e-06, | |
| "loss": 0.6053, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.42846768336964414, | |
| "grad_norm": 1.244303339507363, | |
| "learning_rate": 7.061129442581685e-06, | |
| "loss": 0.5924, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.429920116194626, | |
| "grad_norm": 1.2478572360385807, | |
| "learning_rate": 7.038005504598975e-06, | |
| "loss": 0.5922, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.43137254901960786, | |
| "grad_norm": 1.0447681879549313, | |
| "learning_rate": 7.0148291726411486e-06, | |
| "loss": 0.5825, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.4328249818445897, | |
| "grad_norm": 1.1025428022026995, | |
| "learning_rate": 6.9916010425359214e-06, | |
| "loss": 0.5956, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.43427741466957154, | |
| "grad_norm": 1.329010163267056, | |
| "learning_rate": 6.968321711442658e-06, | |
| "loss": 0.5772, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.4357298474945534, | |
| "grad_norm": 1.2330587975332181, | |
| "learning_rate": 6.9449917778370216e-06, | |
| "loss": 0.5933, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.4371822803195352, | |
| "grad_norm": 1.1656344009683823, | |
| "learning_rate": 6.921611841495584e-06, | |
| "loss": 0.5922, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.4386347131445171, | |
| "grad_norm": 1.2709734185927093, | |
| "learning_rate": 6.898182503480414e-06, | |
| "loss": 0.5911, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.4400871459694989, | |
| "grad_norm": 1.269770194129687, | |
| "learning_rate": 6.8747043661236215e-06, | |
| "loss": 0.6103, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.44153957879448075, | |
| "grad_norm": 1.106713465551905, | |
| "learning_rate": 6.851178033011869e-06, | |
| "loss": 0.5997, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.4429920116194626, | |
| "grad_norm": 1.1985970638971495, | |
| "learning_rate": 6.82760410897086e-06, | |
| "loss": 0.5727, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.4444444444444444, | |
| "grad_norm": 1.1259472634689607, | |
| "learning_rate": 6.8039832000497865e-06, | |
| "loss": 0.5983, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.4458968772694263, | |
| "grad_norm": 1.212189906596056, | |
| "learning_rate": 6.78031591350575e-06, | |
| "loss": 0.5958, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.44734931009440815, | |
| "grad_norm": 1.0999728539824523, | |
| "learning_rate": 6.756602857788148e-06, | |
| "loss": 0.5717, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.44880174291938996, | |
| "grad_norm": 1.1130187014726358, | |
| "learning_rate": 6.732844642523032e-06, | |
| "loss": 0.5793, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.4502541757443718, | |
| "grad_norm": 1.075132513625087, | |
| "learning_rate": 6.70904187849744e-06, | |
| "loss": 0.562, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.4517066085693537, | |
| "grad_norm": 1.2147850552839328, | |
| "learning_rate": 6.685195177643684e-06, | |
| "loss": 0.5978, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.4531590413943355, | |
| "grad_norm": 1.2836246837826484, | |
| "learning_rate": 6.661305153023628e-06, | |
| "loss": 0.5912, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.45461147421931736, | |
| "grad_norm": 1.1766776836268427, | |
| "learning_rate": 6.637372418812921e-06, | |
| "loss": 0.586, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.4560639070442992, | |
| "grad_norm": 1.3613669267848012, | |
| "learning_rate": 6.613397590285211e-06, | |
| "loss": 0.5998, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.45751633986928103, | |
| "grad_norm": 1.2051701552338834, | |
| "learning_rate": 6.589381283796325e-06, | |
| "loss": 0.5812, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.4589687726942629, | |
| "grad_norm": 1.1519365736041338, | |
| "learning_rate": 6.565324116768428e-06, | |
| "loss": 0.583, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.46042120551924476, | |
| "grad_norm": 1.1475917123110242, | |
| "learning_rate": 6.54122670767414e-06, | |
| "loss": 0.5765, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.46187363834422657, | |
| "grad_norm": 1.088676956077236, | |
| "learning_rate": 6.517089676020648e-06, | |
| "loss": 0.5997, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.46332607116920843, | |
| "grad_norm": 1.1195203213303881, | |
| "learning_rate": 6.492913642333768e-06, | |
| "loss": 0.565, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.4647785039941903, | |
| "grad_norm": 1.0927178103796473, | |
| "learning_rate": 6.468699228142004e-06, | |
| "loss": 0.5988, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.4662309368191721, | |
| "grad_norm": 1.1180323598233408, | |
| "learning_rate": 6.444447055960559e-06, | |
| "loss": 0.6034, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.46768336964415397, | |
| "grad_norm": 1.1581218721076667, | |
| "learning_rate": 6.420157749275341e-06, | |
| "loss": 0.5792, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.4691358024691358, | |
| "grad_norm": 1.2355006071990586, | |
| "learning_rate": 6.395831932526924e-06, | |
| "loss": 0.5914, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.47058823529411764, | |
| "grad_norm": 1.2628642644632941, | |
| "learning_rate": 6.371470231094498e-06, | |
| "loss": 0.5972, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.4720406681190995, | |
| "grad_norm": 1.30372441555249, | |
| "learning_rate": 6.3470732712798e-06, | |
| "loss": 0.5943, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.4734931009440813, | |
| "grad_norm": 1.2732465621842586, | |
| "learning_rate": 6.322641680290997e-06, | |
| "loss": 0.59, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.4749455337690632, | |
| "grad_norm": 1.1957460012906904, | |
| "learning_rate": 6.298176086226577e-06, | |
| "loss": 0.5908, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.47639796659404504, | |
| "grad_norm": 1.2666436895215651, | |
| "learning_rate": 6.273677118059192e-06, | |
| "loss": 0.579, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.47785039941902685, | |
| "grad_norm": 1.1740612442844354, | |
| "learning_rate": 6.24914540561949e-06, | |
| "loss": 0.5849, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.4793028322440087, | |
| "grad_norm": 1.170368029656733, | |
| "learning_rate": 6.2245815795799235e-06, | |
| "loss": 0.5914, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.4807552650689906, | |
| "grad_norm": 1.060432274782722, | |
| "learning_rate": 6.199986271438536e-06, | |
| "loss": 0.5692, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.4822076978939724, | |
| "grad_norm": 1.133481629336483, | |
| "learning_rate": 6.17536011350273e-06, | |
| "loss": 0.5789, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.48366013071895425, | |
| "grad_norm": 1.0779584839433474, | |
| "learning_rate": 6.150703738873004e-06, | |
| "loss": 0.5815, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.4851125635439361, | |
| "grad_norm": 1.138478981177591, | |
| "learning_rate": 6.1260177814266855e-06, | |
| "loss": 0.5754, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.4865649963689179, | |
| "grad_norm": 1.1290987276585867, | |
| "learning_rate": 6.101302875801628e-06, | |
| "loss": 0.5778, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.4880174291938998, | |
| "grad_norm": 1.1468009205478524, | |
| "learning_rate": 6.0765596573798994e-06, | |
| "loss": 0.5689, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.48946986201888165, | |
| "grad_norm": 1.0683998313181482, | |
| "learning_rate": 6.051788762271442e-06, | |
| "loss": 0.5692, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.49092229484386346, | |
| "grad_norm": 1.1889646870467425, | |
| "learning_rate": 6.0269908272977295e-06, | |
| "loss": 0.5808, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.4923747276688453, | |
| "grad_norm": 1.2529890364621932, | |
| "learning_rate": 6.002166489975385e-06, | |
| "loss": 0.5772, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.49382716049382713, | |
| "grad_norm": 1.1925487080641164, | |
| "learning_rate": 5.977316388499794e-06, | |
| "loss": 0.5862, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.495279593318809, | |
| "grad_norm": 1.1372201366075154, | |
| "learning_rate": 5.952441161728701e-06, | |
| "loss": 0.5662, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.49673202614379086, | |
| "grad_norm": 1.2981299245914195, | |
| "learning_rate": 5.927541449165783e-06, | |
| "loss": 0.5682, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.49818445896877267, | |
| "grad_norm": 1.1198285033650917, | |
| "learning_rate": 5.902617890944207e-06, | |
| "loss": 0.5894, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.49963689179375453, | |
| "grad_norm": 1.1442459802118357, | |
| "learning_rate": 5.8776711278101765e-06, | |
| "loss": 0.5735, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.5010893246187363, | |
| "grad_norm": 1.10045421098352, | |
| "learning_rate": 5.852701801106458e-06, | |
| "loss": 0.5838, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.5025417574437182, | |
| "grad_norm": 1.1675311387395517, | |
| "learning_rate": 5.82771055275589e-06, | |
| "loss": 0.5847, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.5039941902687001, | |
| "grad_norm": 1.0028532762834719, | |
| "learning_rate": 5.802698025244886e-06, | |
| "loss": 0.5656, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.5054466230936819, | |
| "grad_norm": 1.028656973511835, | |
| "learning_rate": 5.777664861606912e-06, | |
| "loss": 0.5871, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.5068990559186638, | |
| "grad_norm": 1.2007383871296113, | |
| "learning_rate": 5.752611705405957e-06, | |
| "loss": 0.5895, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.5083514887436456, | |
| "grad_norm": 1.1281898149999334, | |
| "learning_rate": 5.7275392007199896e-06, | |
| "loss": 0.573, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.5098039215686274, | |
| "grad_norm": 1.282146433020574, | |
| "learning_rate": 5.702447992124394e-06, | |
| "loss": 0.57, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.5112563543936093, | |
| "grad_norm": 1.05801689608913, | |
| "learning_rate": 5.677338724675406e-06, | |
| "loss": 0.5751, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.5127087872185911, | |
| "grad_norm": 1.2511793245069922, | |
| "learning_rate": 5.652212043893528e-06, | |
| "loss": 0.5805, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.514161220043573, | |
| "grad_norm": 1.2496537928999953, | |
| "learning_rate": 5.627068595746931e-06, | |
| "loss": 0.5734, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.5156136528685549, | |
| "grad_norm": 1.0586939290192166, | |
| "learning_rate": 5.601909026634846e-06, | |
| "loss": 0.573, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.5170660856935366, | |
| "grad_norm": 1.2135072197108623, | |
| "learning_rate": 5.576733983370955e-06, | |
| "loss": 0.5696, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.5185185185185185, | |
| "grad_norm": 1.096951604322022, | |
| "learning_rate": 5.551544113166752e-06, | |
| "loss": 0.5764, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.5199709513435004, | |
| "grad_norm": 1.067656908278471, | |
| "learning_rate": 5.5263400636149104e-06, | |
| "loss": 0.5945, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.5214233841684822, | |
| "grad_norm": 1.2528345132805765, | |
| "learning_rate": 5.50112248267263e-06, | |
| "loss": 0.5698, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.5228758169934641, | |
| "grad_norm": 1.153586426579592, | |
| "learning_rate": 5.475892018644989e-06, | |
| "loss": 0.5939, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.524328249818446, | |
| "grad_norm": 1.321281822598792, | |
| "learning_rate": 5.450649320168263e-06, | |
| "loss": 0.5764, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.5257806826434277, | |
| "grad_norm": 1.1546247883125684, | |
| "learning_rate": 5.4253950361932565e-06, | |
| "loss": 0.5698, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.5272331154684096, | |
| "grad_norm": 1.3090075714265825, | |
| "learning_rate": 5.400129815968623e-06, | |
| "loss": 0.58, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.5286855482933914, | |
| "grad_norm": 1.3546772950978652, | |
| "learning_rate": 5.374854309024167e-06, | |
| "loss": 0.5906, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.5301379811183733, | |
| "grad_norm": 1.0728126839197956, | |
| "learning_rate": 5.349569165154153e-06, | |
| "loss": 0.5617, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.5315904139433552, | |
| "grad_norm": 1.0481388119854531, | |
| "learning_rate": 5.32427503440059e-06, | |
| "loss": 0.5752, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.533042846768337, | |
| "grad_norm": 1.251734474368655, | |
| "learning_rate": 5.29897256703653e-06, | |
| "loss": 0.577, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.5344952795933188, | |
| "grad_norm": 1.1273771235496188, | |
| "learning_rate": 5.2736624135493465e-06, | |
| "loss": 0.5604, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.5359477124183006, | |
| "grad_norm": 1.1728285082039356, | |
| "learning_rate": 5.248345224624007e-06, | |
| "loss": 0.5799, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.5374001452432825, | |
| "grad_norm": 1.1207082347004158, | |
| "learning_rate": 5.223021651126356e-06, | |
| "loss": 0.5792, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.5388525780682644, | |
| "grad_norm": 1.096111126610637, | |
| "learning_rate": 5.197692344086369e-06, | |
| "loss": 0.582, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.5403050108932462, | |
| "grad_norm": 1.1432895144261512, | |
| "learning_rate": 5.172357954681427e-06, | |
| "loss": 0.5669, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.541757443718228, | |
| "grad_norm": 1.2795186578480655, | |
| "learning_rate": 5.147019134219569e-06, | |
| "loss": 0.5727, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.5432098765432098, | |
| "grad_norm": 1.1497619263404009, | |
| "learning_rate": 5.121676534122746e-06, | |
| "loss": 0.5665, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.5446623093681917, | |
| "grad_norm": 1.053760679670929, | |
| "learning_rate": 5.096330805910085e-06, | |
| "loss": 0.5758, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.5461147421931736, | |
| "grad_norm": 1.2455461930319618, | |
| "learning_rate": 5.0709826011811246e-06, | |
| "loss": 0.5715, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.5475671750181554, | |
| "grad_norm": 1.2714142743729588, | |
| "learning_rate": 5.045632571599076e-06, | |
| "loss": 0.5764, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.5490196078431373, | |
| "grad_norm": 1.2596602396359573, | |
| "learning_rate": 5.020281368874063e-06, | |
| "loss": 0.5777, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.5504720406681191, | |
| "grad_norm": 1.096076072807335, | |
| "learning_rate": 4.994929644746366e-06, | |
| "loss": 0.5752, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.5519244734931009, | |
| "grad_norm": 1.1180419407959938, | |
| "learning_rate": 4.969578050969675e-06, | |
| "loss": 0.5783, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.5533769063180828, | |
| "grad_norm": 1.1457632992717688, | |
| "learning_rate": 4.944227239294327e-06, | |
| "loss": 0.5706, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.5548293391430646, | |
| "grad_norm": 1.0431686309314605, | |
| "learning_rate": 4.918877861450553e-06, | |
| "loss": 0.5629, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.5562817719680465, | |
| "grad_norm": 1.1033442319502207, | |
| "learning_rate": 4.893530569131716e-06, | |
| "loss": 0.5611, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.5577342047930284, | |
| "grad_norm": 1.1929600913303742, | |
| "learning_rate": 4.8681860139775745e-06, | |
| "loss": 0.568, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.5591866376180101, | |
| "grad_norm": 1.281488846532093, | |
| "learning_rate": 4.842844847557508e-06, | |
| "loss": 0.5882, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.560639070442992, | |
| "grad_norm": 1.1195048036816224, | |
| "learning_rate": 4.817507721353785e-06, | |
| "loss": 0.596, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.5620915032679739, | |
| "grad_norm": 1.1077419816516767, | |
| "learning_rate": 4.792175286744802e-06, | |
| "loss": 0.5747, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.5635439360929557, | |
| "grad_norm": 1.3502747193694702, | |
| "learning_rate": 4.766848194988344e-06, | |
| "loss": 0.5915, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.5649963689179376, | |
| "grad_norm": 1.001203957804234, | |
| "learning_rate": 4.741527097204837e-06, | |
| "loss": 0.5732, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.5664488017429193, | |
| "grad_norm": 1.1428305709772093, | |
| "learning_rate": 4.7162126443606145e-06, | |
| "loss": 0.5682, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.5679012345679012, | |
| "grad_norm": 1.220191866232699, | |
| "learning_rate": 4.690905487251174e-06, | |
| "loss": 0.5695, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.5693536673928831, | |
| "grad_norm": 1.0555952997249456, | |
| "learning_rate": 4.665606276484455e-06, | |
| "loss": 0.5684, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.5708061002178649, | |
| "grad_norm": 1.1675138439049109, | |
| "learning_rate": 4.6403156624641085e-06, | |
| "loss": 0.5876, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.5722585330428468, | |
| "grad_norm": 1.2418849374572543, | |
| "learning_rate": 4.615034295372777e-06, | |
| "loss": 0.5838, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.5737109658678287, | |
| "grad_norm": 1.0616817293128535, | |
| "learning_rate": 4.589762825155374e-06, | |
| "loss": 0.57, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.5751633986928104, | |
| "grad_norm": 1.2414737852232787, | |
| "learning_rate": 4.564501901502386e-06, | |
| "loss": 0.5521, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.5766158315177923, | |
| "grad_norm": 1.0962764476368352, | |
| "learning_rate": 4.5392521738331585e-06, | |
| "loss": 0.5761, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.5780682643427741, | |
| "grad_norm": 1.2445755051746221, | |
| "learning_rate": 4.514014291279208e-06, | |
| "loss": 0.5612, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.579520697167756, | |
| "grad_norm": 1.1248791169953434, | |
| "learning_rate": 4.488788902667534e-06, | |
| "loss": 0.5651, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.5809731299927379, | |
| "grad_norm": 1.1052395709597995, | |
| "learning_rate": 4.463576656503927e-06, | |
| "loss": 0.5624, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.5824255628177197, | |
| "grad_norm": 1.0979993545936089, | |
| "learning_rate": 4.438378200956318e-06, | |
| "loss": 0.5747, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.5838779956427015, | |
| "grad_norm": 1.1585156096079503, | |
| "learning_rate": 4.413194183838091e-06, | |
| "loss": 0.5757, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.5853304284676834, | |
| "grad_norm": 1.0657343307419072, | |
| "learning_rate": 4.388025252591448e-06, | |
| "loss": 0.5826, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.5867828612926652, | |
| "grad_norm": 1.1584399941372348, | |
| "learning_rate": 4.362872054270753e-06, | |
| "loss": 0.561, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.5882352941176471, | |
| "grad_norm": 1.1136815017444102, | |
| "learning_rate": 4.337735235525904e-06, | |
| "loss": 0.5801, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.5896877269426289, | |
| "grad_norm": 1.2048049573288624, | |
| "learning_rate": 4.312615442585699e-06, | |
| "loss": 0.5748, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.5911401597676107, | |
| "grad_norm": 1.106968794623351, | |
| "learning_rate": 4.287513321241237e-06, | |
| "loss": 0.5665, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.5925925925925926, | |
| "grad_norm": 1.0773536810915454, | |
| "learning_rate": 4.262429516829299e-06, | |
| "loss": 0.5739, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.5940450254175744, | |
| "grad_norm": 1.2780512286596586, | |
| "learning_rate": 4.237364674215774e-06, | |
| "loss": 0.573, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.5954974582425563, | |
| "grad_norm": 1.015175880325257, | |
| "learning_rate": 4.212319437779066e-06, | |
| "loss": 0.5637, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.5969498910675382, | |
| "grad_norm": 1.1403330329394572, | |
| "learning_rate": 4.187294451393541e-06, | |
| "loss": 0.5807, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.59840232389252, | |
| "grad_norm": 1.1083139371642667, | |
| "learning_rate": 4.162290358412962e-06, | |
| "loss": 0.5704, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.5998547567175018, | |
| "grad_norm": 1.1372343052927192, | |
| "learning_rate": 4.1373078016539535e-06, | |
| "loss": 0.5559, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.6013071895424836, | |
| "grad_norm": 1.2137905963682751, | |
| "learning_rate": 4.1123474233794845e-06, | |
| "loss": 0.5588, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.6027596223674655, | |
| "grad_norm": 1.2130103389722957, | |
| "learning_rate": 4.087409865282341e-06, | |
| "loss": 0.5776, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.6042120551924474, | |
| "grad_norm": 1.21914550825707, | |
| "learning_rate": 4.062495768468646e-06, | |
| "loss": 0.5618, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.6056644880174292, | |
| "grad_norm": 1.1540562248868875, | |
| "learning_rate": 4.03760577344136e-06, | |
| "loss": 0.5784, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.6071169208424111, | |
| "grad_norm": 1.214796762228358, | |
| "learning_rate": 4.012740520083832e-06, | |
| "loss": 0.5814, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.6085693536673928, | |
| "grad_norm": 1.157806370832285, | |
| "learning_rate": 3.987900647643334e-06, | |
| "loss": 0.5791, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.6100217864923747, | |
| "grad_norm": 1.1517956672556253, | |
| "learning_rate": 3.963086794714639e-06, | |
| "loss": 0.5652, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.6114742193173566, | |
| "grad_norm": 1.1605789001720612, | |
| "learning_rate": 3.9382995992235955e-06, | |
| "loss": 0.5728, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 0.6129266521423384, | |
| "grad_norm": 1.0630436480054268, | |
| "learning_rate": 3.913539698410734e-06, | |
| "loss": 0.5684, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.6143790849673203, | |
| "grad_norm": 1.175513347812724, | |
| "learning_rate": 3.888807728814874e-06, | |
| "loss": 0.5664, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.615831517792302, | |
| "grad_norm": 1.1583525329647688, | |
| "learning_rate": 3.864104326256775e-06, | |
| "loss": 0.5805, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.6172839506172839, | |
| "grad_norm": 1.1058170223844426, | |
| "learning_rate": 3.8394301258227756e-06, | |
| "loss": 0.5622, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.6187363834422658, | |
| "grad_norm": 1.2295319541574912, | |
| "learning_rate": 3.814785761848475e-06, | |
| "loss": 0.5583, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.6201888162672476, | |
| "grad_norm": 1.092280135001415, | |
| "learning_rate": 3.790171867902426e-06, | |
| "loss": 0.5755, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 0.6216412490922295, | |
| "grad_norm": 1.274653674496685, | |
| "learning_rate": 3.7655890767698384e-06, | |
| "loss": 0.5729, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 0.6230936819172114, | |
| "grad_norm": 1.2166924621577075, | |
| "learning_rate": 3.741038020436323e-06, | |
| "loss": 0.5572, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.6245461147421931, | |
| "grad_norm": 1.0296689666125658, | |
| "learning_rate": 3.7165193300716297e-06, | |
| "loss": 0.5664, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.625998547567175, | |
| "grad_norm": 1.0530929308425294, | |
| "learning_rate": 3.6920336360134378e-06, | |
| "loss": 0.5679, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 0.6274509803921569, | |
| "grad_norm": 1.1137539642969592, | |
| "learning_rate": 3.6675815677511382e-06, | |
| "loss": 0.5607, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.6289034132171387, | |
| "grad_norm": 1.0875536687719785, | |
| "learning_rate": 3.6431637539096565e-06, | |
| "loss": 0.5691, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 0.6303558460421206, | |
| "grad_norm": 1.1268225507247402, | |
| "learning_rate": 3.6187808222332852e-06, | |
| "loss": 0.5668, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.6318082788671024, | |
| "grad_norm": 1.1757316218974525, | |
| "learning_rate": 3.594433399569559e-06, | |
| "loss": 0.5551, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.6332607116920842, | |
| "grad_norm": 1.1554119314408926, | |
| "learning_rate": 3.5701221118531195e-06, | |
| "loss": 0.5785, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 0.6347131445170661, | |
| "grad_norm": 1.0947128171930913, | |
| "learning_rate": 3.5458475840896434e-06, | |
| "loss": 0.5677, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 0.6361655773420479, | |
| "grad_norm": 1.2477952532418557, | |
| "learning_rate": 3.5216104403397623e-06, | |
| "loss": 0.5504, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.6376180101670298, | |
| "grad_norm": 1.1149755483280817, | |
| "learning_rate": 3.4974113037030257e-06, | |
| "loss": 0.5753, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 0.6390704429920117, | |
| "grad_norm": 1.214526641921585, | |
| "learning_rate": 3.473250796301874e-06, | |
| "loss": 0.5669, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.6405228758169934, | |
| "grad_norm": 1.1149175312128623, | |
| "learning_rate": 3.4491295392656497e-06, | |
| "loss": 0.5604, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.6419753086419753, | |
| "grad_norm": 1.1763746140746527, | |
| "learning_rate": 3.425048152714635e-06, | |
| "loss": 0.5651, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 0.6434277414669571, | |
| "grad_norm": 1.169802661186734, | |
| "learning_rate": 3.4010072557440967e-06, | |
| "loss": 0.5685, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 0.644880174291939, | |
| "grad_norm": 1.1404701148865375, | |
| "learning_rate": 3.3770074664083827e-06, | |
| "loss": 0.577, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.6463326071169209, | |
| "grad_norm": 1.2951511455390947, | |
| "learning_rate": 3.353049401705022e-06, | |
| "loss": 0.5546, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.6477850399419027, | |
| "grad_norm": 1.2188858191779428, | |
| "learning_rate": 3.329133677558873e-06, | |
| "loss": 0.5697, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 0.6492374727668845, | |
| "grad_norm": 1.1239635889524127, | |
| "learning_rate": 3.3052609088062767e-06, | |
| "loss": 0.5901, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 0.6506899055918663, | |
| "grad_norm": 1.0931476283773633, | |
| "learning_rate": 3.281431709179264e-06, | |
| "loss": 0.566, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.6521423384168482, | |
| "grad_norm": 1.4718901865939953, | |
| "learning_rate": 3.2576466912897674e-06, | |
| "loss": 0.5761, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 0.6535947712418301, | |
| "grad_norm": 1.2062192465520678, | |
| "learning_rate": 3.2339064666138783e-06, | |
| "loss": 0.5757, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.6550472040668119, | |
| "grad_norm": 1.2732571104572175, | |
| "learning_rate": 3.2102116454761168e-06, | |
| "loss": 0.5615, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 0.6564996368917938, | |
| "grad_norm": 1.198522063919598, | |
| "learning_rate": 3.1865628370337575e-06, | |
| "loss": 0.5632, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 0.6579520697167756, | |
| "grad_norm": 1.208764455797361, | |
| "learning_rate": 3.162960649261152e-06, | |
| "loss": 0.5472, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 0.6594045025417574, | |
| "grad_norm": 1.2300085896818644, | |
| "learning_rate": 3.1394056889341086e-06, | |
| "loss": 0.5737, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 0.6608569353667393, | |
| "grad_norm": 1.2362227883984134, | |
| "learning_rate": 3.1158985616142944e-06, | |
| "loss": 0.5467, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.6623093681917211, | |
| "grad_norm": 1.2577141886691818, | |
| "learning_rate": 3.092439871633658e-06, | |
| "loss": 0.5652, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.663761801016703, | |
| "grad_norm": 1.2246719550977323, | |
| "learning_rate": 3.0690302220789036e-06, | |
| "loss": 0.564, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 0.6652142338416849, | |
| "grad_norm": 0.952770111510269, | |
| "learning_rate": 3.0456702147759797e-06, | |
| "loss": 0.5538, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 1.2114290005968387, | |
| "learning_rate": 3.0223604502746097e-06, | |
| "loss": 0.5624, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 0.6681190994916485, | |
| "grad_norm": 1.2379634249474247, | |
| "learning_rate": 2.999101527832849e-06, | |
| "loss": 0.5581, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.6695715323166304, | |
| "grad_norm": 1.2432970361649818, | |
| "learning_rate": 2.9758940454016893e-06, | |
| "loss": 0.5519, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 0.6710239651416122, | |
| "grad_norm": 1.1827840525798392, | |
| "learning_rate": 2.9527385996096702e-06, | |
| "loss": 0.5512, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.6724763979665941, | |
| "grad_norm": 1.1313263342846276, | |
| "learning_rate": 2.929635785747558e-06, | |
| "loss": 0.5615, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 0.6739288307915758, | |
| "grad_norm": 1.0718626125088186, | |
| "learning_rate": 2.9065861977530263e-06, | |
| "loss": 0.5577, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 0.6753812636165577, | |
| "grad_norm": 1.2058366328226908, | |
| "learning_rate": 2.8835904281953984e-06, | |
| "loss": 0.5543, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.6768336964415396, | |
| "grad_norm": 1.2044090066060698, | |
| "learning_rate": 2.8606490682604083e-06, | |
| "loss": 0.563, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 0.6782861292665214, | |
| "grad_norm": 1.2440783490748353, | |
| "learning_rate": 2.837762707734999e-06, | |
| "loss": 0.5678, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 0.6797385620915033, | |
| "grad_norm": 1.1447619754452882, | |
| "learning_rate": 2.8149319349921678e-06, | |
| "loss": 0.5443, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.6811909949164852, | |
| "grad_norm": 1.0682059420594845, | |
| "learning_rate": 2.7921573369758344e-06, | |
| "loss": 0.5548, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 0.6826434277414669, | |
| "grad_norm": 1.0786981942796325, | |
| "learning_rate": 2.769439499185752e-06, | |
| "loss": 0.557, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.6840958605664488, | |
| "grad_norm": 1.1021974391300458, | |
| "learning_rate": 2.7467790056624565e-06, | |
| "loss": 0.5641, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 0.6855482933914306, | |
| "grad_norm": 1.172642324603278, | |
| "learning_rate": 2.7241764389722536e-06, | |
| "loss": 0.5579, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 0.6870007262164125, | |
| "grad_norm": 1.1739344769196898, | |
| "learning_rate": 2.7016323801922327e-06, | |
| "loss": 0.5426, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 0.6884531590413944, | |
| "grad_norm": 1.0908808031509236, | |
| "learning_rate": 2.679147408895349e-06, | |
| "loss": 0.5667, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.6899055918663762, | |
| "grad_norm": 1.1345661062696517, | |
| "learning_rate": 2.6567221031354907e-06, | |
| "loss": 0.5639, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.691358024691358, | |
| "grad_norm": 1.0249096917283105, | |
| "learning_rate": 2.634357039432656e-06, | |
| "loss": 0.5648, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 0.6928104575163399, | |
| "grad_norm": 1.1583880032183098, | |
| "learning_rate": 2.612052792758095e-06, | |
| "loss": 0.5651, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 0.6942628903413217, | |
| "grad_norm": 1.069684864764473, | |
| "learning_rate": 2.5898099365195626e-06, | |
| "loss": 0.5722, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 0.6957153231663036, | |
| "grad_norm": 1.0867414593247826, | |
| "learning_rate": 2.5676290425465496e-06, | |
| "loss": 0.5664, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 0.6971677559912854, | |
| "grad_norm": 1.1375716473128172, | |
| "learning_rate": 2.5455106810755957e-06, | |
| "loss": 0.5585, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.6986201888162672, | |
| "grad_norm": 1.034623153574018, | |
| "learning_rate": 2.5234554207356266e-06, | |
| "loss": 0.5722, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 0.7000726216412491, | |
| "grad_norm": 1.0654655922639538, | |
| "learning_rate": 2.5014638285333357e-06, | |
| "loss": 0.5643, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 0.7015250544662309, | |
| "grad_norm": 1.0988829596394427, | |
| "learning_rate": 2.479536469838606e-06, | |
| "loss": 0.5635, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 0.7029774872912128, | |
| "grad_norm": 1.050301540250255, | |
| "learning_rate": 2.4576739083699764e-06, | |
| "loss": 0.55, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 0.7044299201161947, | |
| "grad_norm": 1.3185971209726384, | |
| "learning_rate": 2.43587670618015e-06, | |
| "loss": 0.5686, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.7058823529411765, | |
| "grad_norm": 1.1036440984293434, | |
| "learning_rate": 2.4141454236415428e-06, | |
| "loss": 0.5617, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 0.7073347857661583, | |
| "grad_norm": 1.0669150287420783, | |
| "learning_rate": 2.392480619431879e-06, | |
| "loss": 0.5416, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 0.7087872185911401, | |
| "grad_norm": 1.0472161733755885, | |
| "learning_rate": 2.3708828505198265e-06, | |
| "loss": 0.5777, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 0.710239651416122, | |
| "grad_norm": 1.1252884484776227, | |
| "learning_rate": 2.349352672150681e-06, | |
| "loss": 0.5535, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 0.7116920842411039, | |
| "grad_norm": 1.1423409076437527, | |
| "learning_rate": 2.3278906378320854e-06, | |
| "loss": 0.5598, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.7131445170660857, | |
| "grad_norm": 0.9801237939355479, | |
| "learning_rate": 2.306497299319814e-06, | |
| "loss": 0.5551, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 0.7145969498910676, | |
| "grad_norm": 1.0526887175825372, | |
| "learning_rate": 2.285173206603564e-06, | |
| "loss": 0.5683, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 0.7160493827160493, | |
| "grad_norm": 1.1758853714133906, | |
| "learning_rate": 2.2639189078928453e-06, | |
| "loss": 0.5581, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 0.7175018155410312, | |
| "grad_norm": 1.107044757903735, | |
| "learning_rate": 2.242734949602856e-06, | |
| "loss": 0.5448, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 0.7189542483660131, | |
| "grad_norm": 1.2037164103649114, | |
| "learning_rate": 2.2216218763404647e-06, | |
| "loss": 0.5531, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.7204066811909949, | |
| "grad_norm": 1.0588992084011324, | |
| "learning_rate": 2.200580230890188e-06, | |
| "loss": 0.5501, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 0.7218591140159768, | |
| "grad_norm": 1.2543824405997601, | |
| "learning_rate": 2.17961055420024e-06, | |
| "loss": 0.5769, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 0.7233115468409586, | |
| "grad_norm": 1.1899069770329052, | |
| "learning_rate": 2.1587133853686422e-06, | |
| "loss": 0.5683, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 0.7247639796659404, | |
| "grad_norm": 1.144536370052011, | |
| "learning_rate": 2.137889261629334e-06, | |
| "loss": 0.5648, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 0.7262164124909223, | |
| "grad_norm": 1.1936078152653293, | |
| "learning_rate": 2.1171387183383936e-06, | |
| "loss": 0.5646, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.7276688453159041, | |
| "grad_norm": 1.26324013915445, | |
| "learning_rate": 2.096462288960251e-06, | |
| "loss": 0.5682, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 0.729121278140886, | |
| "grad_norm": 1.1381437228179463, | |
| "learning_rate": 2.0758605050539836e-06, | |
| "loss": 0.5571, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 0.7305737109658679, | |
| "grad_norm": 1.3500933515295954, | |
| "learning_rate": 2.0553338962596492e-06, | |
| "loss": 0.5716, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 0.7320261437908496, | |
| "grad_norm": 1.0940717331908218, | |
| "learning_rate": 2.03488299028467e-06, | |
| "loss": 0.5626, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.7334785766158315, | |
| "grad_norm": 1.1116999445105729, | |
| "learning_rate": 2.0145083128902647e-06, | |
| "loss": 0.5625, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.7349310094408134, | |
| "grad_norm": 1.144025480175903, | |
| "learning_rate": 1.9942103878779335e-06, | |
| "loss": 0.5601, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 0.7363834422657952, | |
| "grad_norm": 1.0557283567612936, | |
| "learning_rate": 1.9739897370759886e-06, | |
| "loss": 0.5523, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 0.7378358750907771, | |
| "grad_norm": 1.243995372081041, | |
| "learning_rate": 1.9538468803261514e-06, | |
| "loss": 0.5521, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 0.739288307915759, | |
| "grad_norm": 1.1122614530495916, | |
| "learning_rate": 1.9337823354701617e-06, | |
| "loss": 0.5615, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 0.7407407407407407, | |
| "grad_norm": 1.012804702506735, | |
| "learning_rate": 1.913796618336499e-06, | |
| "loss": 0.5514, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.7421931735657226, | |
| "grad_norm": 1.1487569184157758, | |
| "learning_rate": 1.8938902427270905e-06, | |
| "loss": 0.5595, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 0.7436456063907044, | |
| "grad_norm": 1.222308594990331, | |
| "learning_rate": 1.8740637204041195e-06, | |
| "loss": 0.5645, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 0.7450980392156863, | |
| "grad_norm": 1.1354476091482255, | |
| "learning_rate": 1.8543175610768715e-06, | |
| "loss": 0.5607, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 0.7465504720406682, | |
| "grad_norm": 1.2205544178436005, | |
| "learning_rate": 1.83465227238861e-06, | |
| "loss": 0.542, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 0.7480029048656499, | |
| "grad_norm": 1.2462160753237452, | |
| "learning_rate": 1.8150683599035517e-06, | |
| "loss": 0.5606, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.7494553376906318, | |
| "grad_norm": 1.1396860492016365, | |
| "learning_rate": 1.7955663270938501e-06, | |
| "loss": 0.5689, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 0.7509077705156136, | |
| "grad_norm": 1.1228524828818305, | |
| "learning_rate": 1.7761466753266598e-06, | |
| "loss": 0.5625, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 0.7523602033405955, | |
| "grad_norm": 1.1360291736903685, | |
| "learning_rate": 1.7568099038512466e-06, | |
| "loss": 0.5724, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 0.7538126361655774, | |
| "grad_norm": 1.226701284666325, | |
| "learning_rate": 1.7375565097861518e-06, | |
| "loss": 0.5653, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 0.7552650689905592, | |
| "grad_norm": 1.1971595467490777, | |
| "learning_rate": 1.7183869881064125e-06, | |
| "loss": 0.5681, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.756717501815541, | |
| "grad_norm": 1.003433379963408, | |
| "learning_rate": 1.6993018316308351e-06, | |
| "loss": 0.5497, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 0.7581699346405228, | |
| "grad_norm": 1.0677706687056256, | |
| "learning_rate": 1.6803015310093286e-06, | |
| "loss": 0.5663, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 0.7596223674655047, | |
| "grad_norm": 1.1960572257973088, | |
| "learning_rate": 1.6613865747102876e-06, | |
| "loss": 0.5566, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 0.7610748002904866, | |
| "grad_norm": 1.1110041512712467, | |
| "learning_rate": 1.6425574490080355e-06, | |
| "loss": 0.5474, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 0.7625272331154684, | |
| "grad_norm": 1.1953866183465143, | |
| "learning_rate": 1.6238146379703257e-06, | |
| "loss": 0.5602, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.7639796659404503, | |
| "grad_norm": 1.184221410195916, | |
| "learning_rate": 1.6051586234458932e-06, | |
| "loss": 0.558, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 0.7654320987654321, | |
| "grad_norm": 1.1917994670950118, | |
| "learning_rate": 1.5865898850520671e-06, | |
| "loss": 0.573, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 0.7668845315904139, | |
| "grad_norm": 1.205079091727242, | |
| "learning_rate": 1.5681089001624488e-06, | |
| "loss": 0.5565, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 0.7683369644153958, | |
| "grad_norm": 1.0590014592765518, | |
| "learning_rate": 1.5497161438946218e-06, | |
| "loss": 0.5537, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 0.7697893972403776, | |
| "grad_norm": 1.3045355829406655, | |
| "learning_rate": 1.5314120890979596e-06, | |
| "loss": 0.5608, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.7712418300653595, | |
| "grad_norm": 1.227226173650366, | |
| "learning_rate": 1.5131972063414451e-06, | |
| "loss": 0.563, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 0.7726942628903414, | |
| "grad_norm": 1.1505400844326525, | |
| "learning_rate": 1.4950719639015987e-06, | |
| "loss": 0.5618, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 0.7741466957153231, | |
| "grad_norm": 1.1971910791582392, | |
| "learning_rate": 1.4770368277504183e-06, | |
| "loss": 0.5559, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 0.775599128540305, | |
| "grad_norm": 1.1465426761189066, | |
| "learning_rate": 1.45909226154341e-06, | |
| "loss": 0.5757, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 0.7770515613652869, | |
| "grad_norm": 1.0530342043982832, | |
| "learning_rate": 1.4412387266076677e-06, | |
| "loss": 0.5699, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.7785039941902687, | |
| "grad_norm": 1.1921772808125664, | |
| "learning_rate": 1.4234766819300106e-06, | |
| "loss": 0.5592, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 0.7799564270152506, | |
| "grad_norm": 1.1969217401024441, | |
| "learning_rate": 1.4058065841451856e-06, | |
| "loss": 0.5658, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 0.7814088598402323, | |
| "grad_norm": 1.1371738180522346, | |
| "learning_rate": 1.3882288875241262e-06, | |
| "loss": 0.5523, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 0.7828612926652142, | |
| "grad_norm": 1.119312116230787, | |
| "learning_rate": 1.3707440439622754e-06, | |
| "loss": 0.5501, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 0.7843137254901961, | |
| "grad_norm": 1.200972988458609, | |
| "learning_rate": 1.353352502967966e-06, | |
| "loss": 0.5393, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.7857661583151779, | |
| "grad_norm": 1.005244568846047, | |
| "learning_rate": 1.336054711650867e-06, | |
| "loss": 0.5552, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 0.7872185911401598, | |
| "grad_norm": 0.9811514201367332, | |
| "learning_rate": 1.3188511147104882e-06, | |
| "loss": 0.5615, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 0.7886710239651417, | |
| "grad_norm": 1.2124333619418073, | |
| "learning_rate": 1.3017421544247466e-06, | |
| "loss": 0.5731, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 0.7901234567901234, | |
| "grad_norm": 1.0164638888045425, | |
| "learning_rate": 1.2847282706385962e-06, | |
| "loss": 0.5449, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 0.7915758896151053, | |
| "grad_norm": 1.0692055130184748, | |
| "learning_rate": 1.267809900752725e-06, | |
| "loss": 0.5581, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.7930283224400871, | |
| "grad_norm": 1.2243966381535343, | |
| "learning_rate": 1.2509874797122983e-06, | |
| "loss": 0.5694, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 0.794480755265069, | |
| "grad_norm": 1.1192058071022615, | |
| "learning_rate": 1.2342614399957952e-06, | |
| "loss": 0.5601, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 0.7959331880900509, | |
| "grad_norm": 1.210664779695526, | |
| "learning_rate": 1.217632211603868e-06, | |
| "loss": 0.5383, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 0.7973856209150327, | |
| "grad_norm": 1.2306429782422048, | |
| "learning_rate": 1.2011002220483099e-06, | |
| "loss": 0.5503, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 0.7988380537400145, | |
| "grad_norm": 1.1449496150562748, | |
| "learning_rate": 1.1846658963410472e-06, | |
| "loss": 0.561, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.8002904865649964, | |
| "grad_norm": 1.1809146975647171, | |
| "learning_rate": 1.168329656983222e-06, | |
| "loss": 0.5489, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 0.8017429193899782, | |
| "grad_norm": 1.1865786985653701, | |
| "learning_rate": 1.1520919239543272e-06, | |
| "loss": 0.5443, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 0.8031953522149601, | |
| "grad_norm": 1.2819514449232758, | |
| "learning_rate": 1.1359531147014102e-06, | |
| "loss": 0.5784, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 0.8046477850399419, | |
| "grad_norm": 1.140249494732679, | |
| "learning_rate": 1.11991364412834e-06, | |
| "loss": 0.5472, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 0.8061002178649237, | |
| "grad_norm": 1.0963574239357976, | |
| "learning_rate": 1.1039739245851426e-06, | |
| "loss": 0.5614, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.8075526506899056, | |
| "grad_norm": 1.1963836912036798, | |
| "learning_rate": 1.088134365857399e-06, | |
| "loss": 0.5516, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 0.8090050835148874, | |
| "grad_norm": 1.320400739555157, | |
| "learning_rate": 1.0723953751557098e-06, | |
| "loss": 0.5643, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 0.8104575163398693, | |
| "grad_norm": 1.2261172403861758, | |
| "learning_rate": 1.0567573571052265e-06, | |
| "loss": 0.545, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 0.8119099491648512, | |
| "grad_norm": 1.1363072652624087, | |
| "learning_rate": 1.0412207137352504e-06, | |
| "loss": 0.5562, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 0.813362381989833, | |
| "grad_norm": 1.0696753091917897, | |
| "learning_rate": 1.0257858444688968e-06, | |
| "loss": 0.5584, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.8148148148148148, | |
| "grad_norm": 1.092336652561905, | |
| "learning_rate": 1.0104531461128224e-06, | |
| "loss": 0.5509, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 0.8162672476397966, | |
| "grad_norm": 1.2190453226296554, | |
| "learning_rate": 9.952230128470358e-07, | |
| "loss": 0.5552, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 0.8177196804647785, | |
| "grad_norm": 1.1756174285580154, | |
| "learning_rate": 9.800958362147433e-07, | |
| "loss": 0.5611, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 0.8191721132897604, | |
| "grad_norm": 1.050298389841538, | |
| "learning_rate": 9.65072005112308e-07, | |
| "loss": 0.5536, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 0.8206245461147422, | |
| "grad_norm": 1.2990174959407426, | |
| "learning_rate": 9.501519057792275e-07, | |
| "loss": 0.5495, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.8220769789397241, | |
| "grad_norm": 1.1318695700100998, | |
| "learning_rate": 9.353359217882241e-07, | |
| "loss": 0.5557, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 0.8235294117647058, | |
| "grad_norm": 1.1818056539247317, | |
| "learning_rate": 9.206244340353732e-07, | |
| "loss": 0.5703, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 0.8249818445896877, | |
| "grad_norm": 1.191491253002993, | |
| "learning_rate": 9.060178207303077e-07, | |
| "loss": 0.5543, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 0.8264342774146696, | |
| "grad_norm": 1.2775803771232788, | |
| "learning_rate": 8.915164573865109e-07, | |
| "loss": 0.5673, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 0.8278867102396514, | |
| "grad_norm": 1.0993365384271814, | |
| "learning_rate": 8.771207168116407e-07, | |
| "loss": 0.5526, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.8293391430646333, | |
| "grad_norm": 1.2010857578242673, | |
| "learning_rate": 8.628309690979658e-07, | |
| "loss": 0.5465, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 0.830791575889615, | |
| "grad_norm": 1.1363204888828164, | |
| "learning_rate": 8.486475816128376e-07, | |
| "loss": 0.5522, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 0.8322440087145969, | |
| "grad_norm": 1.237168492535083, | |
| "learning_rate": 8.345709189892504e-07, | |
| "loss": 0.5377, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 0.8336964415395788, | |
| "grad_norm": 1.1890926723132464, | |
| "learning_rate": 8.206013431164683e-07, | |
| "loss": 0.5613, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 0.8351488743645606, | |
| "grad_norm": 1.2611972496063513, | |
| "learning_rate": 8.0673921313072e-07, | |
| "loss": 0.5562, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.8366013071895425, | |
| "grad_norm": 1.1453681982727373, | |
| "learning_rate": 7.929848854059663e-07, | |
| "loss": 0.5469, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 0.8380537400145244, | |
| "grad_norm": 1.1161546893459802, | |
| "learning_rate": 7.793387135447372e-07, | |
| "loss": 0.5688, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 0.8395061728395061, | |
| "grad_norm": 1.242951008236561, | |
| "learning_rate": 7.658010483690431e-07, | |
| "loss": 0.5516, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 0.840958605664488, | |
| "grad_norm": 1.1291848404892897, | |
| "learning_rate": 7.52372237911358e-07, | |
| "loss": 0.5558, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 0.8424110384894699, | |
| "grad_norm": 1.1344340429459099, | |
| "learning_rate": 7.390526274056625e-07, | |
| "loss": 0.5368, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.8438634713144517, | |
| "grad_norm": 1.2369341276497008, | |
| "learning_rate": 7.25842559278584e-07, | |
| "loss": 0.5438, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 0.8453159041394336, | |
| "grad_norm": 1.161564478717058, | |
| "learning_rate": 7.127423731405747e-07, | |
| "loss": 0.5524, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 0.8467683369644154, | |
| "grad_norm": 1.3389378618000198, | |
| "learning_rate": 6.997524057771964e-07, | |
| "loss": 0.5411, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 0.8482207697893972, | |
| "grad_norm": 1.2324708082947882, | |
| "learning_rate": 6.868729911404582e-07, | |
| "loss": 0.5594, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 0.8496732026143791, | |
| "grad_norm": 1.0931906751127958, | |
| "learning_rate": 6.741044603402214e-07, | |
| "loss": 0.5394, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.8511256354393609, | |
| "grad_norm": 1.1045798920330345, | |
| "learning_rate": 6.614471416357055e-07, | |
| "loss": 0.5517, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 0.8525780682643428, | |
| "grad_norm": 1.1003308882789462, | |
| "learning_rate": 6.489013604270277e-07, | |
| "loss": 0.5432, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 0.8540305010893247, | |
| "grad_norm": 1.1511825195957979, | |
| "learning_rate": 6.364674392468578e-07, | |
| "loss": 0.5543, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 0.8554829339143064, | |
| "grad_norm": 1.1016772920186344, | |
| "learning_rate": 6.241456977521115e-07, | |
| "loss": 0.5511, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 0.8569353667392883, | |
| "grad_norm": 1.2345711604547172, | |
| "learning_rate": 6.119364527157401e-07, | |
| "loss": 0.5546, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.8583877995642701, | |
| "grad_norm": 1.1026866190660687, | |
| "learning_rate": 5.998400180185838e-07, | |
| "loss": 0.5534, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 0.859840232389252, | |
| "grad_norm": 1.0696348901565953, | |
| "learning_rate": 5.878567046413025e-07, | |
| "loss": 0.5431, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 0.8612926652142339, | |
| "grad_norm": 1.074925388402079, | |
| "learning_rate": 5.759868206563834e-07, | |
| "loss": 0.5564, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 0.8627450980392157, | |
| "grad_norm": 1.1892355845709555, | |
| "learning_rate": 5.642306712202183e-07, | |
| "loss": 0.56, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 0.8641975308641975, | |
| "grad_norm": 1.1714018297678883, | |
| "learning_rate": 5.525885585652591e-07, | |
| "loss": 0.5477, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.8656499636891793, | |
| "grad_norm": 1.2243789216177572, | |
| "learning_rate": 5.410607819922481e-07, | |
| "loss": 0.5561, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 0.8671023965141612, | |
| "grad_norm": 1.158429282768604, | |
| "learning_rate": 5.296476378625237e-07, | |
| "loss": 0.5246, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 0.8685548293391431, | |
| "grad_norm": 1.2064879125921322, | |
| "learning_rate": 5.183494195904015e-07, | |
| "loss": 0.5434, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 0.8700072621641249, | |
| "grad_norm": 1.0370084252960212, | |
| "learning_rate": 5.071664176356294e-07, | |
| "loss": 0.556, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 0.8714596949891068, | |
| "grad_norm": 1.1529022886105922, | |
| "learning_rate": 4.960989194959225e-07, | |
| "loss": 0.5349, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.8729121278140886, | |
| "grad_norm": 1.0702466803229502, | |
| "learning_rate": 4.851472096995741e-07, | |
| "loss": 0.5641, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 0.8743645606390704, | |
| "grad_norm": 1.195504112892932, | |
| "learning_rate": 4.7431156979813097e-07, | |
| "loss": 0.5627, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 0.8758169934640523, | |
| "grad_norm": 1.0424744381436926, | |
| "learning_rate": 4.6359227835916954e-07, | |
| "loss": 0.5457, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 0.8772694262890341, | |
| "grad_norm": 1.136106426677912, | |
| "learning_rate": 4.529896109591203e-07, | |
| "loss": 0.5536, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 0.878721859114016, | |
| "grad_norm": 1.1941194023099557, | |
| "learning_rate": 4.425038401761961e-07, | |
| "loss": 0.5512, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 0.8801742919389978, | |
| "grad_norm": 1.1005592964409183, | |
| "learning_rate": 4.3213523558337354e-07, | |
| "loss": 0.5522, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 0.8816267247639796, | |
| "grad_norm": 1.3046172497671011, | |
| "learning_rate": 4.218840637414695e-07, | |
| "loss": 0.5389, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 0.8830791575889615, | |
| "grad_norm": 1.2050786337197097, | |
| "learning_rate": 4.117505881922856e-07, | |
| "loss": 0.5637, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 0.8845315904139434, | |
| "grad_norm": 1.1086711189663023, | |
| "learning_rate": 4.0173506945183295e-07, | |
| "loss": 0.5637, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 0.8859840232389252, | |
| "grad_norm": 1.142760086036647, | |
| "learning_rate": 3.9183776500363593e-07, | |
| "loss": 0.5639, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.8874364560639071, | |
| "grad_norm": 1.211597985547058, | |
| "learning_rate": 3.8205892929211175e-07, | |
| "loss": 0.5534, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 1.125094111731544, | |
| "learning_rate": 3.7239881371603005e-07, | |
| "loss": 0.5514, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 0.8903413217138707, | |
| "grad_norm": 1.1253410539349802, | |
| "learning_rate": 3.6285766662204735e-07, | |
| "loss": 0.5593, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 0.8917937545388526, | |
| "grad_norm": 1.076054931723469, | |
| "learning_rate": 3.534357332983257e-07, | |
| "loss": 0.5494, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 0.8932461873638344, | |
| "grad_norm": 1.2433138382241562, | |
| "learning_rate": 3.441332559682242e-07, | |
| "loss": 0.5507, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.8946986201888163, | |
| "grad_norm": 1.172111145318429, | |
| "learning_rate": 3.349504737840742e-07, | |
| "loss": 0.5632, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 0.8961510530137982, | |
| "grad_norm": 1.2018077073853302, | |
| "learning_rate": 3.258876228210267e-07, | |
| "loss": 0.5381, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 0.8976034858387799, | |
| "grad_norm": 1.1218901853415595, | |
| "learning_rate": 3.169449360709914e-07, | |
| "loss": 0.5651, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 0.8990559186637618, | |
| "grad_norm": 1.075452696669577, | |
| "learning_rate": 3.0812264343663467e-07, | |
| "loss": 0.5518, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 0.9005083514887436, | |
| "grad_norm": 1.2898875627777047, | |
| "learning_rate": 2.99420971725482e-07, | |
| "loss": 0.5535, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.9019607843137255, | |
| "grad_norm": 1.064409341720963, | |
| "learning_rate": 2.9084014464407837e-07, | |
| "loss": 0.551, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 0.9034132171387074, | |
| "grad_norm": 1.1430289990560287, | |
| "learning_rate": 2.8238038279224e-07, | |
| "loss": 0.5351, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 0.9048656499636892, | |
| "grad_norm": 1.0942084433621513, | |
| "learning_rate": 2.740419036573844e-07, | |
| "loss": 0.5628, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 0.906318082788671, | |
| "grad_norm": 1.1827726416299507, | |
| "learning_rate": 2.6582492160893536e-07, | |
| "loss": 0.5698, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 0.9077705156136529, | |
| "grad_norm": 1.0512203056975564, | |
| "learning_rate": 2.5772964789281593e-07, | |
| "loss": 0.539, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.9092229484386347, | |
| "grad_norm": 1.177449766279641, | |
| "learning_rate": 2.4975629062601534e-07, | |
| "loss": 0.5475, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 0.9106753812636166, | |
| "grad_norm": 1.2124754199233574, | |
| "learning_rate": 2.419050547912388e-07, | |
| "loss": 0.541, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 0.9121278140885984, | |
| "grad_norm": 1.3580937630552576, | |
| "learning_rate": 2.3417614223163908e-07, | |
| "loss": 0.5588, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 0.9135802469135802, | |
| "grad_norm": 1.1170472146222037, | |
| "learning_rate": 2.26569751645625e-07, | |
| "loss": 0.5436, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 0.9150326797385621, | |
| "grad_norm": 1.1184802548299553, | |
| "learning_rate": 2.1908607858175612e-07, | |
| "loss": 0.5377, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.9164851125635439, | |
| "grad_norm": 1.1396702009546613, | |
| "learning_rate": 2.117253154337118e-07, | |
| "loss": 0.5683, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 0.9179375453885258, | |
| "grad_norm": 1.2119088736658123, | |
| "learning_rate": 2.0448765143534942e-07, | |
| "loss": 0.5668, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 0.9193899782135077, | |
| "grad_norm": 1.0448734314632342, | |
| "learning_rate": 1.973732726558364e-07, | |
| "loss": 0.5437, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 0.9208424110384895, | |
| "grad_norm": 1.2851112602098311, | |
| "learning_rate": 1.9038236199486693e-07, | |
| "loss": 0.5622, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 0.9222948438634713, | |
| "grad_norm": 1.1700640178574329, | |
| "learning_rate": 1.8351509917796218e-07, | |
| "loss": 0.542, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 0.9237472766884531, | |
| "grad_norm": 1.1416778336018678, | |
| "learning_rate": 1.7677166075184548e-07, | |
| "loss": 0.5529, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 0.925199709513435, | |
| "grad_norm": 1.1230308913216087, | |
| "learning_rate": 1.7015222007990883e-07, | |
| "loss": 0.5559, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 0.9266521423384169, | |
| "grad_norm": 1.1568250466964043, | |
| "learning_rate": 1.6365694733775305e-07, | |
| "loss": 0.5507, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 0.9281045751633987, | |
| "grad_norm": 1.1602815569402067, | |
| "learning_rate": 1.572860095088108e-07, | |
| "loss": 0.552, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 0.9295570079883806, | |
| "grad_norm": 1.0423401424679095, | |
| "learning_rate": 1.5103957038005935e-07, | |
| "loss": 0.5446, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.9310094408133623, | |
| "grad_norm": 1.1374874233890928, | |
| "learning_rate": 1.4491779053780298e-07, | |
| "loss": 0.5473, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 0.9324618736383442, | |
| "grad_norm": 1.1755709384042587, | |
| "learning_rate": 1.3892082736355283e-07, | |
| "loss": 0.5486, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 0.9339143064633261, | |
| "grad_norm": 1.1744643775241368, | |
| "learning_rate": 1.3304883502997133e-07, | |
| "loss": 0.5518, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 0.9353667392883079, | |
| "grad_norm": 1.1216236591765696, | |
| "learning_rate": 1.2730196449691756e-07, | |
| "loss": 0.5492, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 0.9368191721132898, | |
| "grad_norm": 1.1470393369010776, | |
| "learning_rate": 1.2168036350755975e-07, | |
| "loss": 0.5322, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 0.9382716049382716, | |
| "grad_norm": 1.1985354195876317, | |
| "learning_rate": 1.1618417658458003e-07, | |
| "loss": 0.5616, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 0.9397240377632534, | |
| "grad_norm": 1.1475497479759824, | |
| "learning_rate": 1.1081354502645913e-07, | |
| "loss": 0.5531, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 0.9411764705882353, | |
| "grad_norm": 1.1396353932104606, | |
| "learning_rate": 1.0556860690384252e-07, | |
| "loss": 0.5472, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 0.9426289034132171, | |
| "grad_norm": 1.1215848254083782, | |
| "learning_rate": 1.0044949705599216e-07, | |
| "loss": 0.5429, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 0.944081336238199, | |
| "grad_norm": 1.005591582016032, | |
| "learning_rate": 9.545634708731988e-08, | |
| "loss": 0.5418, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.9455337690631809, | |
| "grad_norm": 1.215225242394237, | |
| "learning_rate": 9.058928536400058e-08, | |
| "loss": 0.5578, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 0.9469862018881626, | |
| "grad_norm": 1.152537711229488, | |
| "learning_rate": 8.584843701067935e-08, | |
| "loss": 0.5404, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 0.9484386347131445, | |
| "grad_norm": 1.175848365037797, | |
| "learning_rate": 8.123392390724682e-08, | |
| "loss": 0.5522, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 0.9498910675381264, | |
| "grad_norm": 1.0183498527962453, | |
| "learning_rate": 7.674586468570999e-08, | |
| "loss": 0.5564, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 0.9513435003631082, | |
| "grad_norm": 1.2151729065782833, | |
| "learning_rate": 7.238437472714466e-08, | |
| "loss": 0.5561, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 0.9527959331880901, | |
| "grad_norm": 1.1402236462651618, | |
| "learning_rate": 6.81495661587217e-08, | |
| "loss": 0.5411, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 0.954248366013072, | |
| "grad_norm": 1.1521868862152016, | |
| "learning_rate": 6.404154785083383e-08, | |
| "loss": 0.5539, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 0.9557007988380537, | |
| "grad_norm": 1.1258302178296054, | |
| "learning_rate": 6.006042541428669e-08, | |
| "loss": 0.5532, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 0.9571532316630356, | |
| "grad_norm": 1.173412519187008, | |
| "learning_rate": 5.6206301197594404e-08, | |
| "loss": 0.5505, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 0.9586056644880174, | |
| "grad_norm": 1.136513704911577, | |
| "learning_rate": 5.247927428433885e-08, | |
| "loss": 0.5435, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.9600580973129993, | |
| "grad_norm": 1.1972723133655234, | |
| "learning_rate": 4.887944049062843e-08, | |
| "loss": 0.548, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 0.9615105301379812, | |
| "grad_norm": 1.240930781464282, | |
| "learning_rate": 4.5406892362632185e-08, | |
| "loss": 0.5538, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 0.9629629629629629, | |
| "grad_norm": 1.2645184421648727, | |
| "learning_rate": 4.206171917420121e-08, | |
| "loss": 0.5616, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 0.9644153957879448, | |
| "grad_norm": 1.1619344530688336, | |
| "learning_rate": 3.884400692457435e-08, | |
| "loss": 0.5578, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 0.9658678286129266, | |
| "grad_norm": 1.0415045949293107, | |
| "learning_rate": 3.575383833616497e-08, | |
| "loss": 0.536, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 0.9673202614379085, | |
| "grad_norm": 1.1707683296063809, | |
| "learning_rate": 3.2791292852437096e-08, | |
| "loss": 0.5444, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 0.9687726942628904, | |
| "grad_norm": 0.9579807050337852, | |
| "learning_rate": 2.99564466358615e-08, | |
| "loss": 0.5604, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 0.9702251270878722, | |
| "grad_norm": 1.155540906901066, | |
| "learning_rate": 2.7249372565957277e-08, | |
| "loss": 0.5495, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 0.971677559912854, | |
| "grad_norm": 1.0959456715901421, | |
| "learning_rate": 2.4670140237419428e-08, | |
| "loss": 0.5483, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 0.9731299927378358, | |
| "grad_norm": 1.0366185075689953, | |
| "learning_rate": 2.2218815958329754e-08, | |
| "loss": 0.5497, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.9745824255628177, | |
| "grad_norm": 1.0759294981597065, | |
| "learning_rate": 1.9895462748450444e-08, | |
| "loss": 0.5634, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 0.9760348583877996, | |
| "grad_norm": 1.1209995693338786, | |
| "learning_rate": 1.770014033760592e-08, | |
| "loss": 0.5508, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 0.9774872912127814, | |
| "grad_norm": 1.210238366549934, | |
| "learning_rate": 1.5632905164145173e-08, | |
| "loss": 0.5813, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 0.9789397240377633, | |
| "grad_norm": 1.15542524575641, | |
| "learning_rate": 1.3693810373494598e-08, | |
| "loss": 0.5421, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 0.9803921568627451, | |
| "grad_norm": 1.194050906215969, | |
| "learning_rate": 1.188290581678575e-08, | |
| "loss": 0.5586, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.9818445896877269, | |
| "grad_norm": 1.1566645017111077, | |
| "learning_rate": 1.0200238049580258e-08, | |
| "loss": 0.5632, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 0.9832970225127088, | |
| "grad_norm": 1.0710546930410338, | |
| "learning_rate": 8.645850330668559e-09, | |
| "loss": 0.5368, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 0.9847494553376906, | |
| "grad_norm": 1.175731861197897, | |
| "learning_rate": 7.219782620958571e-09, | |
| "loss": 0.5388, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 0.9862018881626725, | |
| "grad_norm": 1.0791848418311811, | |
| "learning_rate": 5.922071582449285e-09, | |
| "loss": 0.5585, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 0.9876543209876543, | |
| "grad_norm": 1.21651622954666, | |
| "learning_rate": 4.752750577288745e-09, | |
| "loss": 0.5603, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.9891067538126361, | |
| "grad_norm": 1.294701087862953, | |
| "learning_rate": 3.711849666914735e-09, | |
| "loss": 0.5713, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 0.990559186637618, | |
| "grad_norm": 1.100757408335571, | |
| "learning_rate": 2.799395611281508e-09, | |
| "loss": 0.5587, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 0.9920116194625999, | |
| "grad_norm": 1.282263624241459, | |
| "learning_rate": 2.0154118681753322e-09, | |
| "loss": 0.5588, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 0.9934640522875817, | |
| "grad_norm": 1.0975199346392859, | |
| "learning_rate": 1.3599185926072012e-09, | |
| "loss": 0.5724, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 0.9949164851125636, | |
| "grad_norm": 1.1620574281790235, | |
| "learning_rate": 8.329326362976897e-10, | |
| "loss": 0.5621, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 0.9963689179375453, | |
| "grad_norm": 1.1717561623715795, | |
| "learning_rate": 4.34467547242301e-10, | |
| "loss": 0.5506, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 0.9978213507625272, | |
| "grad_norm": 1.155270191238308, | |
| "learning_rate": 1.645335693623018e-10, | |
| "loss": 0.5533, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 0.9992737835875091, | |
| "grad_norm": 1.240301119345841, | |
| "learning_rate": 2.3137642244375202e-11, | |
| "loss": 0.5538, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 6885, | |
| "total_flos": 1942329112002560.0, | |
| "train_loss": 0.5927019230420812, | |
| "train_runtime": 56356.5973, | |
| "train_samples_per_second": 1.955, | |
| "train_steps_per_second": 0.122 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 6885, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1942329112002560.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |