| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9999862446870104, |
| "eval_steps": 2500, |
| "global_step": 12495, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0008003091193973672, |
| "grad_norm": 75.25, |
| "learning_rate": 9.999984368969842e-07, |
| "loss": 135.0496, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0016006182387947345, |
| "grad_norm": 77.9375, |
| "learning_rate": 9.999968737939682e-07, |
| "loss": 134.7034, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0024009273581921016, |
| "grad_norm": 78.8125, |
| "learning_rate": 9.999953106909524e-07, |
| "loss": 134.1727, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.003201236477589469, |
| "grad_norm": 74.125, |
| "learning_rate": 9.999937475879366e-07, |
| "loss": 134.3995, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.004001545596986836, |
| "grad_norm": 77.8125, |
| "learning_rate": 9.999921844849208e-07, |
| "loss": 133.854, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.004801854716384203, |
| "grad_norm": 73.1875, |
| "learning_rate": 9.999906213819048e-07, |
| "loss": 134.2893, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.005602163835781571, |
| "grad_norm": 74.0625, |
| "learning_rate": 9.99989058278889e-07, |
| "loss": 134.4156, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.006402472955178938, |
| "grad_norm": 72.5625, |
| "learning_rate": 9.999874951758733e-07, |
| "loss": 132.5738, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.007202782074576305, |
| "grad_norm": 80.8125, |
| "learning_rate": 9.999859320728575e-07, |
| "loss": 134.4381, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.008003091193973673, |
| "grad_norm": 78.875, |
| "learning_rate": 9.999843689698417e-07, |
| "loss": 133.9594, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.00880340031337104, |
| "grad_norm": 71.125, |
| "learning_rate": 9.999828058668257e-07, |
| "loss": 133.9086, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.009603709432768406, |
| "grad_norm": 72.0625, |
| "learning_rate": 9.9998124276381e-07, |
| "loss": 133.434, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.010404018552165774, |
| "grad_norm": 79.25, |
| "learning_rate": 9.999796796607942e-07, |
| "loss": 133.4989, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.011204327671563142, |
| "grad_norm": 79.1875, |
| "learning_rate": 9.999781165577784e-07, |
| "loss": 134.517, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.012004636790960508, |
| "grad_norm": 71.9375, |
| "learning_rate": 9.999765534547624e-07, |
| "loss": 133.5077, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.012804945910357876, |
| "grad_norm": 74.625, |
| "learning_rate": 9.999749903517466e-07, |
| "loss": 133.3733, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.013605255029755244, |
| "grad_norm": 78.6875, |
| "learning_rate": 9.999734272487308e-07, |
| "loss": 132.901, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.01440556414915261, |
| "grad_norm": 76.625, |
| "learning_rate": 9.999718641457148e-07, |
| "loss": 135.7891, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.015205873268549977, |
| "grad_norm": 72.6875, |
| "learning_rate": 9.99970301042699e-07, |
| "loss": 134.1953, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.016006182387947345, |
| "grad_norm": 72.1875, |
| "learning_rate": 9.999687379396833e-07, |
| "loss": 132.8302, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.01680649150734471, |
| "grad_norm": 76.375, |
| "learning_rate": 9.999671748366673e-07, |
| "loss": 133.097, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.01760680062674208, |
| "grad_norm": 81.25, |
| "learning_rate": 9.999656117336515e-07, |
| "loss": 133.4282, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.018407109746139447, |
| "grad_norm": 85.125, |
| "learning_rate": 9.999640486306357e-07, |
| "loss": 134.319, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.019207418865536813, |
| "grad_norm": 79.125, |
| "learning_rate": 9.9996248552762e-07, |
| "loss": 132.7719, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.020007727984934182, |
| "grad_norm": 78.5, |
| "learning_rate": 9.999609224246041e-07, |
| "loss": 133.3401, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.02080803710433155, |
| "grad_norm": 82.625, |
| "learning_rate": 9.999593593215884e-07, |
| "loss": 134.8742, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.021608346223728914, |
| "grad_norm": 76.875, |
| "learning_rate": 9.999577962185724e-07, |
| "loss": 135.1933, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.022408655343126284, |
| "grad_norm": 76.3125, |
| "learning_rate": 9.999562331155566e-07, |
| "loss": 134.0234, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.02320896446252365, |
| "grad_norm": 74.5, |
| "learning_rate": 9.999546700125408e-07, |
| "loss": 133.5756, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.024009273581921016, |
| "grad_norm": 81.5625, |
| "learning_rate": 9.99953106909525e-07, |
| "loss": 134.8582, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.024809582701318385, |
| "grad_norm": 73.25, |
| "learning_rate": 9.99951543806509e-07, |
| "loss": 132.5909, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.02560989182071575, |
| "grad_norm": 71.3125, |
| "learning_rate": 9.999499807034932e-07, |
| "loss": 133.9742, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.026410200940113118, |
| "grad_norm": 71.3125, |
| "learning_rate": 9.999484176004775e-07, |
| "loss": 134.9034, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.027210510059510487, |
| "grad_norm": 73.5, |
| "learning_rate": 9.999468544974615e-07, |
| "loss": 132.86, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.028010819178907853, |
| "grad_norm": 73.4375, |
| "learning_rate": 9.999452913944457e-07, |
| "loss": 134.602, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.02881112829830522, |
| "grad_norm": 76.6875, |
| "learning_rate": 9.9994372829143e-07, |
| "loss": 132.606, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.02961143741770259, |
| "grad_norm": 75.9375, |
| "learning_rate": 9.99942165188414e-07, |
| "loss": 134.4456, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.030411746537099955, |
| "grad_norm": 73.8125, |
| "learning_rate": 9.999406020853981e-07, |
| "loss": 132.9977, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.03121205565649732, |
| "grad_norm": 79.375, |
| "learning_rate": 9.999390389823824e-07, |
| "loss": 133.731, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.03201236477589469, |
| "grad_norm": 77.4375, |
| "learning_rate": 9.999374758793666e-07, |
| "loss": 133.9014, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.032812673895292056, |
| "grad_norm": 70.8125, |
| "learning_rate": 9.999359127763508e-07, |
| "loss": 135.4049, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.03361298301468942, |
| "grad_norm": 75.125, |
| "learning_rate": 9.999343496733348e-07, |
| "loss": 135.3231, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.03441329213408679, |
| "grad_norm": 79.6875, |
| "learning_rate": 9.99932786570319e-07, |
| "loss": 132.8583, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.03521360125348416, |
| "grad_norm": 84.0625, |
| "learning_rate": 9.999312234673032e-07, |
| "loss": 133.4561, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.03601391037288153, |
| "grad_norm": 75.125, |
| "learning_rate": 9.999296603642874e-07, |
| "loss": 133.2269, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.03681421949227889, |
| "grad_norm": 71.4375, |
| "learning_rate": 9.999280972612715e-07, |
| "loss": 134.1606, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.03761452861167626, |
| "grad_norm": 72.9375, |
| "learning_rate": 9.999265341582557e-07, |
| "loss": 134.7773, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.038414837731073626, |
| "grad_norm": 76.875, |
| "learning_rate": 9.999249710552399e-07, |
| "loss": 134.0885, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.03921514685047099, |
| "grad_norm": 76.8125, |
| "learning_rate": 9.999234079522241e-07, |
| "loss": 134.5257, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.040015455969868365, |
| "grad_norm": 70.25, |
| "learning_rate": 9.999218448492081e-07, |
| "loss": 134.4442, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.04081576508926573, |
| "grad_norm": 79.875, |
| "learning_rate": 9.999202817461923e-07, |
| "loss": 134.2612, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.0416160742086631, |
| "grad_norm": 75.375, |
| "learning_rate": 9.999187186431766e-07, |
| "loss": 135.1053, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.04241638332806046, |
| "grad_norm": 75.4375, |
| "learning_rate": 9.999171555401606e-07, |
| "loss": 133.3491, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.04321669244745783, |
| "grad_norm": 81.4375, |
| "learning_rate": 9.999155924371448e-07, |
| "loss": 133.6457, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.044017001566855195, |
| "grad_norm": 76.5625, |
| "learning_rate": 9.99914029334129e-07, |
| "loss": 135.0537, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.04481731068625257, |
| "grad_norm": 77.375, |
| "learning_rate": 9.999124662311132e-07, |
| "loss": 134.9535, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.045617619805649934, |
| "grad_norm": 77.75, |
| "learning_rate": 9.999109031280974e-07, |
| "loss": 134.2294, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.0464179289250473, |
| "grad_norm": 82.5, |
| "learning_rate": 9.999093400250814e-07, |
| "loss": 133.7713, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.047218238044444666, |
| "grad_norm": 79.125, |
| "learning_rate": 9.999077769220657e-07, |
| "loss": 133.3779, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.04801854716384203, |
| "grad_norm": 78.4375, |
| "learning_rate": 9.999062138190499e-07, |
| "loss": 133.576, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.0488188562832394, |
| "grad_norm": 76.1875, |
| "learning_rate": 9.99904650716034e-07, |
| "loss": 136.3208, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.04961916540263677, |
| "grad_norm": 76.3125, |
| "learning_rate": 9.99903087613018e-07, |
| "loss": 132.9463, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.05041947452203414, |
| "grad_norm": 74.0, |
| "learning_rate": 9.999015245100023e-07, |
| "loss": 133.6666, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.0512197836414315, |
| "grad_norm": 73.75, |
| "learning_rate": 9.998999614069865e-07, |
| "loss": 134.8779, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.05202009276082887, |
| "grad_norm": 77.3125, |
| "learning_rate": 9.998983983039708e-07, |
| "loss": 133.0986, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.052820401880226235, |
| "grad_norm": 78.5625, |
| "learning_rate": 9.998968352009548e-07, |
| "loss": 135.0677, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.0536207109996236, |
| "grad_norm": 78.1875, |
| "learning_rate": 9.99895272097939e-07, |
| "loss": 133.2067, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.054421020119020974, |
| "grad_norm": 70.0625, |
| "learning_rate": 9.998937089949232e-07, |
| "loss": 133.9511, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.05522132923841834, |
| "grad_norm": 80.375, |
| "learning_rate": 9.998921458919072e-07, |
| "loss": 133.9503, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.056021638357815706, |
| "grad_norm": 74.6875, |
| "learning_rate": 9.998905827888914e-07, |
| "loss": 134.0494, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.05682194747721307, |
| "grad_norm": 77.25, |
| "learning_rate": 9.998890196858756e-07, |
| "loss": 134.0122, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.05762225659661044, |
| "grad_norm": 80.875, |
| "learning_rate": 9.998874565828599e-07, |
| "loss": 134.0668, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.05842256571600781, |
| "grad_norm": 84.125, |
| "learning_rate": 9.99885893479844e-07, |
| "loss": 134.0675, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.05922287483540518, |
| "grad_norm": 72.375, |
| "learning_rate": 9.99884330376828e-07, |
| "loss": 133.1081, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.06002318395480254, |
| "grad_norm": 72.5625, |
| "learning_rate": 9.998827672738123e-07, |
| "loss": 134.6276, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.06082349307419991, |
| "grad_norm": 78.3125, |
| "learning_rate": 9.998812041707965e-07, |
| "loss": 134.8289, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.061623802193597275, |
| "grad_norm": 78.125, |
| "learning_rate": 9.998796410677807e-07, |
| "loss": 134.8663, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.06242411131299464, |
| "grad_norm": 71.75, |
| "learning_rate": 9.998780779647647e-07, |
| "loss": 133.6727, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.06322442043239201, |
| "grad_norm": 74.4375, |
| "learning_rate": 9.99876514861749e-07, |
| "loss": 133.9329, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.06402472955178938, |
| "grad_norm": 74.625, |
| "learning_rate": 9.998749517587332e-07, |
| "loss": 133.3252, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.06482503867118675, |
| "grad_norm": 71.625, |
| "learning_rate": 9.998733886557172e-07, |
| "loss": 134.0405, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.06562534779058411, |
| "grad_norm": 80.625, |
| "learning_rate": 9.998718255527014e-07, |
| "loss": 133.8837, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.06642565690998148, |
| "grad_norm": 76.5625, |
| "learning_rate": 9.998702624496856e-07, |
| "loss": 134.2091, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.06722596602937884, |
| "grad_norm": 71.75, |
| "learning_rate": 9.998686993466698e-07, |
| "loss": 133.8055, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.06802627514877621, |
| "grad_norm": 78.9375, |
| "learning_rate": 9.998671362436539e-07, |
| "loss": 132.6678, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.06882658426817358, |
| "grad_norm": 78.8125, |
| "learning_rate": 9.99865573140638e-07, |
| "loss": 134.7295, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.06962689338757094, |
| "grad_norm": 77.375, |
| "learning_rate": 9.998640100376223e-07, |
| "loss": 134.0342, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.07042720250696832, |
| "grad_norm": 76.0625, |
| "learning_rate": 9.998624469346065e-07, |
| "loss": 132.651, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.07122751162636569, |
| "grad_norm": 75.0625, |
| "learning_rate": 9.998608838315905e-07, |
| "loss": 133.6062, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.07202782074576305, |
| "grad_norm": 73.5, |
| "learning_rate": 9.998593207285747e-07, |
| "loss": 132.7781, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.07282812986516042, |
| "grad_norm": 82.4375, |
| "learning_rate": 9.99857757625559e-07, |
| "loss": 134.2361, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.07362843898455779, |
| "grad_norm": 73.0625, |
| "learning_rate": 9.998561945225432e-07, |
| "loss": 133.2907, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.07442874810395515, |
| "grad_norm": 80.6875, |
| "learning_rate": 9.998546314195274e-07, |
| "loss": 134.2265, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.07522905722335252, |
| "grad_norm": 80.25, |
| "learning_rate": 9.998530683165114e-07, |
| "loss": 133.9985, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.07602936634274989, |
| "grad_norm": 74.125, |
| "learning_rate": 9.998515052134956e-07, |
| "loss": 134.877, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.07682967546214725, |
| "grad_norm": 75.8125, |
| "learning_rate": 9.998499421104798e-07, |
| "loss": 133.8007, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.07762998458154462, |
| "grad_norm": 80.0625, |
| "learning_rate": 9.998483790074638e-07, |
| "loss": 134.6694, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.07843029370094198, |
| "grad_norm": 76.125, |
| "learning_rate": 9.99846815904448e-07, |
| "loss": 133.3736, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.07923060282033935, |
| "grad_norm": 81.9375, |
| "learning_rate": 9.998452528014323e-07, |
| "loss": 134.208, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.08003091193973673, |
| "grad_norm": 75.4375, |
| "learning_rate": 9.998436896984163e-07, |
| "loss": 133.7086, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.0808312210591341, |
| "grad_norm": 79.4375, |
| "learning_rate": 9.998421265954005e-07, |
| "loss": 133.6408, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.08163153017853146, |
| "grad_norm": 77.8125, |
| "learning_rate": 9.998405634923847e-07, |
| "loss": 132.0228, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.08243183929792883, |
| "grad_norm": 79.75, |
| "learning_rate": 9.99839000389369e-07, |
| "loss": 133.7949, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.0832321484173262, |
| "grad_norm": 77.5, |
| "learning_rate": 9.998374372863532e-07, |
| "loss": 132.4473, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.08403245753672356, |
| "grad_norm": 78.625, |
| "learning_rate": 9.998358741833372e-07, |
| "loss": 133.509, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.08483276665612093, |
| "grad_norm": 74.625, |
| "learning_rate": 9.998343110803214e-07, |
| "loss": 134.2043, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.08563307577551829, |
| "grad_norm": 79.5, |
| "learning_rate": 9.998327479773056e-07, |
| "loss": 134.1369, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.08643338489491566, |
| "grad_norm": 81.3125, |
| "learning_rate": 9.998311848742898e-07, |
| "loss": 135.2469, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.08723369401431302, |
| "grad_norm": 76.8125, |
| "learning_rate": 9.99829621771274e-07, |
| "loss": 134.3792, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.08803400313371039, |
| "grad_norm": 83.0625, |
| "learning_rate": 9.99828058668258e-07, |
| "loss": 134.3031, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.08883431225310777, |
| "grad_norm": 77.5625, |
| "learning_rate": 9.998264955652423e-07, |
| "loss": 133.6508, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.08963462137250514, |
| "grad_norm": 72.5625, |
| "learning_rate": 9.998249324622265e-07, |
| "loss": 133.622, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.0904349304919025, |
| "grad_norm": 72.4375, |
| "learning_rate": 9.998233693592105e-07, |
| "loss": 133.585, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.09123523961129987, |
| "grad_norm": 83.8125, |
| "learning_rate": 9.998218062561947e-07, |
| "loss": 134.5366, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.09203554873069723, |
| "grad_norm": 75.6875, |
| "learning_rate": 9.99820243153179e-07, |
| "loss": 132.9767, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.0928358578500946, |
| "grad_norm": 75.6875, |
| "learning_rate": 9.99818680050163e-07, |
| "loss": 135.017, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.09363616696949197, |
| "grad_norm": 78.8125, |
| "learning_rate": 9.998171169471471e-07, |
| "loss": 133.7126, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.09443647608888933, |
| "grad_norm": 77.125, |
| "learning_rate": 9.998155538441314e-07, |
| "loss": 133.368, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.0952367852082867, |
| "grad_norm": 80.5625, |
| "learning_rate": 9.998139907411156e-07, |
| "loss": 134.3654, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.09603709432768406, |
| "grad_norm": 77.25, |
| "learning_rate": 9.998124276380998e-07, |
| "loss": 133.1469, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.09683740344708143, |
| "grad_norm": 71.0625, |
| "learning_rate": 9.998108645350838e-07, |
| "loss": 134.3933, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.0976377125664788, |
| "grad_norm": 71.125, |
| "learning_rate": 9.99809301432068e-07, |
| "loss": 132.187, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.09843802168587618, |
| "grad_norm": 73.0625, |
| "learning_rate": 9.998077383290522e-07, |
| "loss": 134.8376, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.09923833080527354, |
| "grad_norm": 72.6875, |
| "learning_rate": 9.998061752260365e-07, |
| "loss": 133.2062, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.10003863992467091, |
| "grad_norm": 82.9375, |
| "learning_rate": 9.998046121230207e-07, |
| "loss": 134.4847, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.10083894904406827, |
| "grad_norm": 80.5, |
| "learning_rate": 9.998030490200047e-07, |
| "loss": 133.9005, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.10163925816346564, |
| "grad_norm": 75.3125, |
| "learning_rate": 9.99801485916989e-07, |
| "loss": 132.473, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.102439567282863, |
| "grad_norm": 72.875, |
| "learning_rate": 9.997999228139731e-07, |
| "loss": 133.0457, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.10323987640226037, |
| "grad_norm": 75.875, |
| "learning_rate": 9.997983597109571e-07, |
| "loss": 132.4673, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.10404018552165774, |
| "grad_norm": 78.5625, |
| "learning_rate": 9.997967966079413e-07, |
| "loss": 133.7126, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.1048404946410551, |
| "grad_norm": 78.25, |
| "learning_rate": 9.997952335049256e-07, |
| "loss": 133.5371, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.10564080376045247, |
| "grad_norm": 76.1875, |
| "learning_rate": 9.997936704019096e-07, |
| "loss": 132.6477, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.10644111287984984, |
| "grad_norm": 73.875, |
| "learning_rate": 9.997921072988938e-07, |
| "loss": 131.6196, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.1072414219992472, |
| "grad_norm": 83.875, |
| "learning_rate": 9.99790544195878e-07, |
| "loss": 133.8508, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.10804173111864458, |
| "grad_norm": 72.25, |
| "learning_rate": 9.997889810928622e-07, |
| "loss": 133.3958, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.10884204023804195, |
| "grad_norm": 77.875, |
| "learning_rate": 9.997874179898464e-07, |
| "loss": 134.013, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.10964234935743931, |
| "grad_norm": 67.6875, |
| "learning_rate": 9.997858548868304e-07, |
| "loss": 133.0476, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.11044265847683668, |
| "grad_norm": 78.0, |
| "learning_rate": 9.997842917838147e-07, |
| "loss": 134.0779, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.11124296759623405, |
| "grad_norm": 75.25, |
| "learning_rate": 9.997827286807989e-07, |
| "loss": 135.0265, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.11204327671563141, |
| "grad_norm": 84.4375, |
| "learning_rate": 9.99781165577783e-07, |
| "loss": 132.7369, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.11284358583502878, |
| "grad_norm": 76.875, |
| "learning_rate": 9.997796024747673e-07, |
| "loss": 132.9652, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.11364389495442614, |
| "grad_norm": 75.8125, |
| "learning_rate": 9.997780393717513e-07, |
| "loss": 132.9404, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.11444420407382351, |
| "grad_norm": 81.5625, |
| "learning_rate": 9.997764762687355e-07, |
| "loss": 133.6693, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.11524451319322088, |
| "grad_norm": 79.0625, |
| "learning_rate": 9.997749131657198e-07, |
| "loss": 133.2776, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.11604482231261824, |
| "grad_norm": 72.6875, |
| "learning_rate": 9.997733500627038e-07, |
| "loss": 133.9213, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.11684513143201562, |
| "grad_norm": 77.0625, |
| "learning_rate": 9.99771786959688e-07, |
| "loss": 133.5122, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.11764544055141299, |
| "grad_norm": 78.125, |
| "learning_rate": 9.997702238566722e-07, |
| "loss": 134.5584, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.11844574967081035, |
| "grad_norm": 78.0, |
| "learning_rate": 9.997686607536562e-07, |
| "loss": 132.1062, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.11924605879020772, |
| "grad_norm": 81.4375, |
| "learning_rate": 9.997670976506404e-07, |
| "loss": 134.1707, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.12004636790960509, |
| "grad_norm": 80.375, |
| "learning_rate": 9.997655345476247e-07, |
| "loss": 134.8848, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.12084667702900245, |
| "grad_norm": 76.0625, |
| "learning_rate": 9.997639714446089e-07, |
| "loss": 131.9225, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.12164698614839982, |
| "grad_norm": 73.0, |
| "learning_rate": 9.997624083415929e-07, |
| "loss": 133.7994, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.12244729526779718, |
| "grad_norm": 78.3125, |
| "learning_rate": 9.99760845238577e-07, |
| "loss": 133.7843, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.12324760438719455, |
| "grad_norm": 69.4375, |
| "learning_rate": 9.997592821355613e-07, |
| "loss": 131.5354, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.12404791350659192, |
| "grad_norm": 75.0, |
| "learning_rate": 9.997577190325455e-07, |
| "loss": 133.611, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.12484822262598928, |
| "grad_norm": 75.625, |
| "learning_rate": 9.997561559295297e-07, |
| "loss": 132.3119, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.12564853174538665, |
| "grad_norm": 81.25, |
| "learning_rate": 9.997545928265138e-07, |
| "loss": 132.667, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.12644884086478403, |
| "grad_norm": 80.8125, |
| "learning_rate": 9.99753029723498e-07, |
| "loss": 133.3723, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.12724914998418138, |
| "grad_norm": 75.9375, |
| "learning_rate": 9.997514666204822e-07, |
| "loss": 131.5222, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.12804945910357876, |
| "grad_norm": 74.6875, |
| "learning_rate": 9.997499035174664e-07, |
| "loss": 133.6289, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.1288497682229761, |
| "grad_norm": 79.9375, |
| "learning_rate": 9.997483404144504e-07, |
| "loss": 133.2038, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.1296500773423735, |
| "grad_norm": 76.8125, |
| "learning_rate": 9.997467773114346e-07, |
| "loss": 133.7517, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.13045038646177085, |
| "grad_norm": 78.125, |
| "learning_rate": 9.997452142084189e-07, |
| "loss": 133.22, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.13125069558116823, |
| "grad_norm": 78.75, |
| "learning_rate": 9.997436511054029e-07, |
| "loss": 133.4411, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.1320510047005656, |
| "grad_norm": 75.125, |
| "learning_rate": 9.99742088002387e-07, |
| "loss": 132.7751, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.13285131381996296, |
| "grad_norm": 79.4375, |
| "learning_rate": 9.997405248993713e-07, |
| "loss": 133.3775, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.13365162293936034, |
| "grad_norm": 82.0, |
| "learning_rate": 9.997389617963555e-07, |
| "loss": 133.9474, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.1344519320587577, |
| "grad_norm": 76.4375, |
| "learning_rate": 9.997373986933395e-07, |
| "loss": 134.6912, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.13525224117815507, |
| "grad_norm": 75.875, |
| "learning_rate": 9.997358355903237e-07, |
| "loss": 135.1857, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.13605255029755242, |
| "grad_norm": 82.5625, |
| "learning_rate": 9.99734272487308e-07, |
| "loss": 135.5683, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.1368528594169498, |
| "grad_norm": 74.25, |
| "learning_rate": 9.997327093842922e-07, |
| "loss": 133.0515, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.13765316853634715, |
| "grad_norm": 75.6875, |
| "learning_rate": 9.997311462812764e-07, |
| "loss": 134.285, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.13845347765574453, |
| "grad_norm": 78.0, |
| "learning_rate": 9.997295831782604e-07, |
| "loss": 132.9074, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.13925378677514189, |
| "grad_norm": 77.0, |
| "learning_rate": 9.997280200752446e-07, |
| "loss": 133.4366, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.14005409589453927, |
| "grad_norm": 75.5625, |
| "learning_rate": 9.997264569722288e-07, |
| "loss": 133.7744, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.14085440501393665, |
| "grad_norm": 77.1875, |
| "learning_rate": 9.997248938692128e-07, |
| "loss": 133.4301, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.141654714133334, |
| "grad_norm": 76.5625, |
| "learning_rate": 9.99723330766197e-07, |
| "loss": 133.3505, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.14245502325273138, |
| "grad_norm": 81.25, |
| "learning_rate": 9.997217676631813e-07, |
| "loss": 133.3679, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.14325533237212873, |
| "grad_norm": 80.75, |
| "learning_rate": 9.997202045601655e-07, |
| "loss": 131.0644, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.1440556414915261, |
| "grad_norm": 78.6875, |
| "learning_rate": 9.997186414571495e-07, |
| "loss": 133.6262, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.14485595061092346, |
| "grad_norm": 71.6875, |
| "learning_rate": 9.997170783541337e-07, |
| "loss": 133.8133, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.14565625973032084, |
| "grad_norm": 73.875, |
| "learning_rate": 9.99715515251118e-07, |
| "loss": 133.2261, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.1464565688497182, |
| "grad_norm": 76.375, |
| "learning_rate": 9.997139521481022e-07, |
| "loss": 134.2764, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.14725687796911557, |
| "grad_norm": 80.0, |
| "learning_rate": 9.997123890450862e-07, |
| "loss": 133.0369, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.14805718708851293, |
| "grad_norm": 79.25, |
| "learning_rate": 9.997108259420704e-07, |
| "loss": 132.5432, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.1488574962079103, |
| "grad_norm": 76.4375, |
| "learning_rate": 9.997092628390546e-07, |
| "loss": 132.448, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.14965780532730769, |
| "grad_norm": 74.3125, |
| "learning_rate": 9.997076997360388e-07, |
| "loss": 134.039, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.15045811444670504, |
| "grad_norm": 76.625, |
| "learning_rate": 9.99706136633023e-07, |
| "loss": 133.9122, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.15125842356610242, |
| "grad_norm": 73.9375, |
| "learning_rate": 9.99704573530007e-07, |
| "loss": 133.0405, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.15205873268549977, |
| "grad_norm": 76.25, |
| "learning_rate": 9.997030104269913e-07, |
| "loss": 133.0878, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.15285904180489715, |
| "grad_norm": 74.0625, |
| "learning_rate": 9.997014473239755e-07, |
| "loss": 132.5003, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.1536593509242945, |
| "grad_norm": 76.8125, |
| "learning_rate": 9.996998842209595e-07, |
| "loss": 134.9118, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.15445966004369188, |
| "grad_norm": 86.4375, |
| "learning_rate": 9.996983211179437e-07, |
| "loss": 134.4519, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.15525996916308923, |
| "grad_norm": 76.8125, |
| "learning_rate": 9.99696758014928e-07, |
| "loss": 133.9741, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.15606027828248661, |
| "grad_norm": 76.1875, |
| "learning_rate": 9.996951949119121e-07, |
| "loss": 133.7216, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.15686058740188397, |
| "grad_norm": 72.125, |
| "learning_rate": 9.996936318088962e-07, |
| "loss": 132.1554, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.15766089652128135, |
| "grad_norm": 79.375, |
| "learning_rate": 9.996920687058804e-07, |
| "loss": 133.7076, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.1584612056406787, |
| "grad_norm": 79.375, |
| "learning_rate": 9.996905056028646e-07, |
| "loss": 133.1065, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.15926151476007608, |
| "grad_norm": 78.625, |
| "learning_rate": 9.996889424998486e-07, |
| "loss": 133.6056, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.16006182387947346, |
| "grad_norm": 79.9375, |
| "learning_rate": 9.996873793968328e-07, |
| "loss": 133.6693, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.1608621329988708, |
| "grad_norm": 73.5625, |
| "learning_rate": 9.99685816293817e-07, |
| "loss": 133.3085, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.1616624421182682, |
| "grad_norm": 72.625, |
| "learning_rate": 9.996842531908013e-07, |
| "loss": 132.2977, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.16246275123766554, |
| "grad_norm": 73.6875, |
| "learning_rate": 9.996826900877855e-07, |
| "loss": 133.0748, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.16326306035706292, |
| "grad_norm": 78.5625, |
| "learning_rate": 9.996811269847697e-07, |
| "loss": 134.3872, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.16406336947646027, |
| "grad_norm": 75.5625, |
| "learning_rate": 9.996795638817537e-07, |
| "loss": 130.7821, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.16486367859585765, |
| "grad_norm": 72.4375, |
| "learning_rate": 9.99678000778738e-07, |
| "loss": 132.3199, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.165663987715255, |
| "grad_norm": 77.1875, |
| "learning_rate": 9.996764376757221e-07, |
| "loss": 132.7069, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.1664642968346524, |
| "grad_norm": 78.75, |
| "learning_rate": 9.996748745727061e-07, |
| "loss": 134.6731, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.16726460595404974, |
| "grad_norm": 77.8125, |
| "learning_rate": 9.996733114696904e-07, |
| "loss": 132.4518, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.16806491507344712, |
| "grad_norm": 80.0625, |
| "learning_rate": 9.996717483666746e-07, |
| "loss": 131.4908, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.1688652241928445, |
| "grad_norm": 79.4375, |
| "learning_rate": 9.996701852636586e-07, |
| "loss": 132.5609, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.16966553331224185, |
| "grad_norm": 75.75, |
| "learning_rate": 9.996686221606428e-07, |
| "loss": 133.5363, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.17046584243163923, |
| "grad_norm": 74.8125, |
| "learning_rate": 9.99667059057627e-07, |
| "loss": 133.372, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.17126615155103658, |
| "grad_norm": 78.125, |
| "learning_rate": 9.996654959546112e-07, |
| "loss": 134.09, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.17206646067043396, |
| "grad_norm": 73.6875, |
| "learning_rate": 9.996639328515952e-07, |
| "loss": 134.2827, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.17286676978983131, |
| "grad_norm": 75.6875, |
| "learning_rate": 9.996623697485795e-07, |
| "loss": 133.4792, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.1736670789092287, |
| "grad_norm": 80.8125, |
| "learning_rate": 9.996608066455637e-07, |
| "loss": 133.7547, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.17446738802862605, |
| "grad_norm": 75.5625, |
| "learning_rate": 9.99659243542548e-07, |
| "loss": 132.9351, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.17526769714802343, |
| "grad_norm": 72.125, |
| "learning_rate": 9.996576804395321e-07, |
| "loss": 131.2331, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.17606800626742078, |
| "grad_norm": 79.4375, |
| "learning_rate": 9.996561173365161e-07, |
| "loss": 134.6388, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.17686831538681816, |
| "grad_norm": 77.125, |
| "learning_rate": 9.996545542335003e-07, |
| "loss": 133.2961, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.17766862450621554, |
| "grad_norm": 75.875, |
| "learning_rate": 9.996529911304846e-07, |
| "loss": 133.8811, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.1784689336256129, |
| "grad_norm": 73.5625, |
| "learning_rate": 9.996514280274688e-07, |
| "loss": 133.1757, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.17926924274501027, |
| "grad_norm": 78.875, |
| "learning_rate": 9.996498649244528e-07, |
| "loss": 132.6364, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.18006955186440762, |
| "grad_norm": 75.0, |
| "learning_rate": 9.99648301821437e-07, |
| "loss": 133.5787, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.180869860983805, |
| "grad_norm": 73.75, |
| "learning_rate": 9.996467387184212e-07, |
| "loss": 133.0472, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.18167017010320236, |
| "grad_norm": 73.5, |
| "learning_rate": 9.996451756154052e-07, |
| "loss": 132.8575, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.18247047922259974, |
| "grad_norm": 81.75, |
| "learning_rate": 9.996436125123894e-07, |
| "loss": 132.7385, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.1832707883419971, |
| "grad_norm": 71.5, |
| "learning_rate": 9.996420494093737e-07, |
| "loss": 132.8017, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.18407109746139447, |
| "grad_norm": 73.75, |
| "learning_rate": 9.996404863063579e-07, |
| "loss": 134.2902, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.18487140658079182, |
| "grad_norm": 72.375, |
| "learning_rate": 9.996389232033419e-07, |
| "loss": 133.3126, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.1856717157001892, |
| "grad_norm": 72.875, |
| "learning_rate": 9.99637360100326e-07, |
| "loss": 133.4326, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.18647202481958655, |
| "grad_norm": 79.875, |
| "learning_rate": 9.996357969973103e-07, |
| "loss": 135.3909, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.18727233393898393, |
| "grad_norm": 77.0625, |
| "learning_rate": 9.996342338942945e-07, |
| "loss": 133.5528, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.1880726430583813, |
| "grad_norm": 77.5, |
| "learning_rate": 9.996326707912788e-07, |
| "loss": 133.1468, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.18887295217777866, |
| "grad_norm": 79.0, |
| "learning_rate": 9.996311076882628e-07, |
| "loss": 132.3221, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.18967326129717604, |
| "grad_norm": 78.625, |
| "learning_rate": 9.99629544585247e-07, |
| "loss": 134.5704, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.1904735704165734, |
| "grad_norm": 75.0, |
| "learning_rate": 9.996279814822312e-07, |
| "loss": 133.3002, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.19127387953597078, |
| "grad_norm": 81.8125, |
| "learning_rate": 9.996264183792154e-07, |
| "loss": 133.6831, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.19207418865536813, |
| "grad_norm": 74.375, |
| "learning_rate": 9.996248552761994e-07, |
| "loss": 132.5035, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.1928744977747655, |
| "grad_norm": 83.8125, |
| "learning_rate": 9.996232921731836e-07, |
| "loss": 132.3072, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.19367480689416286, |
| "grad_norm": 76.6875, |
| "learning_rate": 9.996217290701679e-07, |
| "loss": 133.4728, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.19447511601356024, |
| "grad_norm": 80.25, |
| "learning_rate": 9.996201659671519e-07, |
| "loss": 132.2028, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.1952754251329576, |
| "grad_norm": 72.5625, |
| "learning_rate": 9.99618602864136e-07, |
| "loss": 133.2437, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.19607573425235497, |
| "grad_norm": 77.125, |
| "learning_rate": 9.996170397611203e-07, |
| "loss": 134.2502, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.19687604337175235, |
| "grad_norm": 78.0, |
| "learning_rate": 9.996154766581043e-07, |
| "loss": 134.4984, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.1976763524911497, |
| "grad_norm": 80.3125, |
| "learning_rate": 9.996139135550885e-07, |
| "loss": 132.5038, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.19847666161054708, |
| "grad_norm": 72.5, |
| "learning_rate": 9.996123504520728e-07, |
| "loss": 130.4979, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.19927697072994444, |
| "grad_norm": 76.3125, |
| "learning_rate": 9.99610787349057e-07, |
| "loss": 132.6546, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.20007727984934182, |
| "grad_norm": 74.5, |
| "learning_rate": 9.996092242460412e-07, |
| "loss": 132.9037, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.20007727984934182, |
| "eval_loss": 2.079555034637451, |
| "eval_runtime": 423.7834, |
| "eval_samples_per_second": 1548.333, |
| "eval_steps_per_second": 48.386, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.20087758896873917, |
| "grad_norm": 71.4375, |
| "learning_rate": 9.996076611430254e-07, |
| "loss": 132.1891, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.20167789808813655, |
| "grad_norm": 76.6875, |
| "learning_rate": 9.996060980400094e-07, |
| "loss": 133.4134, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.2024782072075339, |
| "grad_norm": 78.3125, |
| "learning_rate": 9.996045349369936e-07, |
| "loss": 134.7546, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.20327851632693128, |
| "grad_norm": 74.1875, |
| "learning_rate": 9.996029718339778e-07, |
| "loss": 132.944, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.20407882544632863, |
| "grad_norm": 73.125, |
| "learning_rate": 9.99601408730962e-07, |
| "loss": 132.8733, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.204879134565726, |
| "grad_norm": 77.1875, |
| "learning_rate": 9.99599845627946e-07, |
| "loss": 134.1901, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.2056794436851234, |
| "grad_norm": 79.9375, |
| "learning_rate": 9.995982825249303e-07, |
| "loss": 132.7988, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.20647975280452074, |
| "grad_norm": 76.75, |
| "learning_rate": 9.995967194219145e-07, |
| "loss": 133.0693, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.20728006192391812, |
| "grad_norm": 72.375, |
| "learning_rate": 9.995951563188985e-07, |
| "loss": 132.1166, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.20808037104331548, |
| "grad_norm": 80.0, |
| "learning_rate": 9.995935932158827e-07, |
| "loss": 133.8413, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.20888068016271286, |
| "grad_norm": 75.9375, |
| "learning_rate": 9.99592030112867e-07, |
| "loss": 134.1737, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.2096809892821102, |
| "grad_norm": 82.9375, |
| "learning_rate": 9.99590467009851e-07, |
| "loss": 131.8263, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.2104812984015076, |
| "grad_norm": 73.375, |
| "learning_rate": 9.995889039068352e-07, |
| "loss": 134.3057, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.21128160752090494, |
| "grad_norm": 79.9375, |
| "learning_rate": 9.995873408038194e-07, |
| "loss": 131.4325, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.21208191664030232, |
| "grad_norm": 71.9375, |
| "learning_rate": 9.995857777008036e-07, |
| "loss": 133.1537, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.21288222575969967, |
| "grad_norm": 74.875, |
| "learning_rate": 9.995842145977878e-07, |
| "loss": 131.8966, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.21368253487909705, |
| "grad_norm": 82.875, |
| "learning_rate": 9.995826514947718e-07, |
| "loss": 132.7061, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.2144828439984944, |
| "grad_norm": 75.4375, |
| "learning_rate": 9.99581088391756e-07, |
| "loss": 133.5048, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.21528315311789178, |
| "grad_norm": 78.875, |
| "learning_rate": 9.995795252887403e-07, |
| "loss": 133.0009, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.21608346223728916, |
| "grad_norm": 76.875, |
| "learning_rate": 9.995779621857245e-07, |
| "loss": 132.829, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.21688377135668652, |
| "grad_norm": 75.4375, |
| "learning_rate": 9.995763990827087e-07, |
| "loss": 131.0754, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.2176840804760839, |
| "grad_norm": 75.125, |
| "learning_rate": 9.995748359796927e-07, |
| "loss": 133.1994, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.21848438959548125, |
| "grad_norm": 77.25, |
| "learning_rate": 9.99573272876677e-07, |
| "loss": 133.7851, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.21928469871487863, |
| "grad_norm": 74.5, |
| "learning_rate": 9.995717097736612e-07, |
| "loss": 134.2786, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.22008500783427598, |
| "grad_norm": 78.1875, |
| "learning_rate": 9.995701466706452e-07, |
| "loss": 132.9056, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.22088531695367336, |
| "grad_norm": 73.0, |
| "learning_rate": 9.995685835676294e-07, |
| "loss": 133.8631, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.2216856260730707, |
| "grad_norm": 75.0, |
| "learning_rate": 9.995670204646136e-07, |
| "loss": 133.2654, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.2224859351924681, |
| "grad_norm": 72.25, |
| "learning_rate": 9.995654573615976e-07, |
| "loss": 133.3257, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.22328624431186544, |
| "grad_norm": 74.625, |
| "learning_rate": 9.995638942585818e-07, |
| "loss": 134.212, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.22408655343126282, |
| "grad_norm": 81.625, |
| "learning_rate": 9.99562331155566e-07, |
| "loss": 131.8705, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.2248868625506602, |
| "grad_norm": 74.5625, |
| "learning_rate": 9.995607680525503e-07, |
| "loss": 132.6001, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.22568717167005756, |
| "grad_norm": 81.6875, |
| "learning_rate": 9.995592049495345e-07, |
| "loss": 132.6342, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.22648748078945494, |
| "grad_norm": 74.75, |
| "learning_rate": 9.995576418465185e-07, |
| "loss": 133.3588, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.2272877899088523, |
| "grad_norm": 76.625, |
| "learning_rate": 9.995560787435027e-07, |
| "loss": 133.0776, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.22808809902824967, |
| "grad_norm": 75.375, |
| "learning_rate": 9.99554515640487e-07, |
| "loss": 132.3477, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.22888840814764702, |
| "grad_norm": 78.6875, |
| "learning_rate": 9.995529525374711e-07, |
| "loss": 133.229, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.2296887172670444, |
| "grad_norm": 74.1875, |
| "learning_rate": 9.995513894344551e-07, |
| "loss": 134.0937, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.23048902638644175, |
| "grad_norm": 74.625, |
| "learning_rate": 9.995498263314394e-07, |
| "loss": 133.6065, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.23128933550583913, |
| "grad_norm": 76.125, |
| "learning_rate": 9.995482632284236e-07, |
| "loss": 134.7819, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.23208964462523649, |
| "grad_norm": 70.0, |
| "learning_rate": 9.995467001254078e-07, |
| "loss": 131.2454, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.23288995374463387, |
| "grad_norm": 79.0625, |
| "learning_rate": 9.995451370223918e-07, |
| "loss": 132.6385, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.23369026286403125, |
| "grad_norm": 76.4375, |
| "learning_rate": 9.99543573919376e-07, |
| "loss": 132.4604, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.2344905719834286, |
| "grad_norm": 69.75, |
| "learning_rate": 9.995420108163602e-07, |
| "loss": 133.4625, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.23529088110282598, |
| "grad_norm": 73.75, |
| "learning_rate": 9.995404477133443e-07, |
| "loss": 133.4203, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.23609119022222333, |
| "grad_norm": 77.5, |
| "learning_rate": 9.995388846103285e-07, |
| "loss": 133.9867, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.2368914993416207, |
| "grad_norm": 73.0, |
| "learning_rate": 9.995373215073127e-07, |
| "loss": 134.0537, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.23769180846101806, |
| "grad_norm": 76.6875, |
| "learning_rate": 9.99535758404297e-07, |
| "loss": 133.4288, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.23849211758041544, |
| "grad_norm": 81.625, |
| "learning_rate": 9.995341953012811e-07, |
| "loss": 131.8527, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.2392924266998128, |
| "grad_norm": 75.8125, |
| "learning_rate": 9.995326321982651e-07, |
| "loss": 133.3051, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.24009273581921017, |
| "grad_norm": 77.4375, |
| "learning_rate": 9.995310690952493e-07, |
| "loss": 133.7738, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.24089304493860753, |
| "grad_norm": 77.5625, |
| "learning_rate": 9.995295059922336e-07, |
| "loss": 134.9874, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.2416933540580049, |
| "grad_norm": 74.0, |
| "learning_rate": 9.995279428892178e-07, |
| "loss": 133.7144, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.24249366317740229, |
| "grad_norm": 82.375, |
| "learning_rate": 9.995263797862018e-07, |
| "loss": 133.7562, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.24329397229679964, |
| "grad_norm": 79.0, |
| "learning_rate": 9.99524816683186e-07, |
| "loss": 133.503, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.24409428141619702, |
| "grad_norm": 76.375, |
| "learning_rate": 9.995232535801702e-07, |
| "loss": 134.0484, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.24489459053559437, |
| "grad_norm": 73.9375, |
| "learning_rate": 9.995216904771542e-07, |
| "loss": 132.5795, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.24569489965499175, |
| "grad_norm": 75.625, |
| "learning_rate": 9.995201273741385e-07, |
| "loss": 131.5031, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.2464952087743891, |
| "grad_norm": 74.875, |
| "learning_rate": 9.995185642711227e-07, |
| "loss": 132.9786, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.24729551789378648, |
| "grad_norm": 76.9375, |
| "learning_rate": 9.995170011681069e-07, |
| "loss": 132.8848, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.24809582701318383, |
| "grad_norm": 77.1875, |
| "learning_rate": 9.99515438065091e-07, |
| "loss": 133.2721, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.2488961361325812, |
| "grad_norm": 83.8125, |
| "learning_rate": 9.995138749620751e-07, |
| "loss": 132.621, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.24969644525197857, |
| "grad_norm": 77.5, |
| "learning_rate": 9.995123118590593e-07, |
| "loss": 133.5524, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.2504967543713759, |
| "grad_norm": 72.4375, |
| "learning_rate": 9.995107487560436e-07, |
| "loss": 133.8227, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.2512970634907733, |
| "grad_norm": 75.4375, |
| "learning_rate": 9.995091856530278e-07, |
| "loss": 133.7567, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.2520973726101707, |
| "grad_norm": 76.0, |
| "learning_rate": 9.995076225500118e-07, |
| "loss": 133.4744, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.25289768172956806, |
| "grad_norm": 77.6875, |
| "learning_rate": 9.99506059446996e-07, |
| "loss": 131.4135, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.25369799084896544, |
| "grad_norm": 77.4375, |
| "learning_rate": 9.995044963439802e-07, |
| "loss": 131.6956, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.25449829996836276, |
| "grad_norm": 74.625, |
| "learning_rate": 9.995029332409644e-07, |
| "loss": 133.5626, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.25529860908776014, |
| "grad_norm": 75.875, |
| "learning_rate": 9.995013701379484e-07, |
| "loss": 131.9732, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.2560989182071575, |
| "grad_norm": 73.0, |
| "learning_rate": 9.994998070349327e-07, |
| "loss": 132.5818, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.2568992273265549, |
| "grad_norm": 75.0625, |
| "learning_rate": 9.994982439319169e-07, |
| "loss": 133.6618, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.2576995364459522, |
| "grad_norm": 75.8125, |
| "learning_rate": 9.994966808289009e-07, |
| "loss": 132.3088, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.2584998455653496, |
| "grad_norm": 78.5, |
| "learning_rate": 9.99495117725885e-07, |
| "loss": 131.4488, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.259300154684747, |
| "grad_norm": 73.0, |
| "learning_rate": 9.994935546228693e-07, |
| "loss": 133.2048, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.26010046380414437, |
| "grad_norm": 74.3125, |
| "learning_rate": 9.994919915198535e-07, |
| "loss": 133.9069, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.2609007729235417, |
| "grad_norm": 73.8125, |
| "learning_rate": 9.994904284168375e-07, |
| "loss": 132.9612, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.26170108204293907, |
| "grad_norm": 75.5625, |
| "learning_rate": 9.994888653138218e-07, |
| "loss": 133.8464, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.26250139116233645, |
| "grad_norm": 75.625, |
| "learning_rate": 9.99487302210806e-07, |
| "loss": 132.2074, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.26330170028173383, |
| "grad_norm": 73.125, |
| "learning_rate": 9.994857391077902e-07, |
| "loss": 134.991, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.2641020094011312, |
| "grad_norm": 77.375, |
| "learning_rate": 9.994841760047742e-07, |
| "loss": 132.3946, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.26490231852052853, |
| "grad_norm": 77.8125, |
| "learning_rate": 9.994826129017584e-07, |
| "loss": 134.2822, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.2657026276399259, |
| "grad_norm": 76.5625, |
| "learning_rate": 9.994810497987426e-07, |
| "loss": 134.2205, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.2665029367593233, |
| "grad_norm": 80.75, |
| "learning_rate": 9.994794866957269e-07, |
| "loss": 133.8495, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.2673032458787207, |
| "grad_norm": 76.125, |
| "learning_rate": 9.99477923592711e-07, |
| "loss": 133.0968, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.268103554998118, |
| "grad_norm": 76.0625, |
| "learning_rate": 9.99476360489695e-07, |
| "loss": 132.9434, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.2689038641175154, |
| "grad_norm": 77.6875, |
| "learning_rate": 9.994747973866793e-07, |
| "loss": 131.3061, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.26970417323691276, |
| "grad_norm": 78.5625, |
| "learning_rate": 9.994732342836635e-07, |
| "loss": 133.879, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.27050448235631014, |
| "grad_norm": 77.0625, |
| "learning_rate": 9.994716711806475e-07, |
| "loss": 132.1836, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.2713047914757075, |
| "grad_norm": 78.625, |
| "learning_rate": 9.994701080776317e-07, |
| "loss": 133.099, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.27210510059510484, |
| "grad_norm": 70.3125, |
| "learning_rate": 9.99468544974616e-07, |
| "loss": 131.4613, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.2729054097145022, |
| "grad_norm": 79.3125, |
| "learning_rate": 9.994669818716e-07, |
| "loss": 133.228, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.2737057188338996, |
| "grad_norm": 74.875, |
| "learning_rate": 9.994654187685842e-07, |
| "loss": 133.2147, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.274506027953297, |
| "grad_norm": 80.75, |
| "learning_rate": 9.994638556655684e-07, |
| "loss": 132.7644, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.2753063370726943, |
| "grad_norm": 77.25, |
| "learning_rate": 9.994622925625526e-07, |
| "loss": 132.9558, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.2761066461920917, |
| "grad_norm": 77.8125, |
| "learning_rate": 9.994607294595368e-07, |
| "loss": 131.8311, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.27690695531148907, |
| "grad_norm": 74.8125, |
| "learning_rate": 9.994591663565208e-07, |
| "loss": 133.9894, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.27770726443088645, |
| "grad_norm": 73.5, |
| "learning_rate": 9.99457603253505e-07, |
| "loss": 133.7439, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.27850757355028377, |
| "grad_norm": 75.1875, |
| "learning_rate": 9.994560401504893e-07, |
| "loss": 133.1463, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.27930788266968115, |
| "grad_norm": 73.0625, |
| "learning_rate": 9.994544770474735e-07, |
| "loss": 133.7504, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.28010819178907853, |
| "grad_norm": 80.75, |
| "learning_rate": 9.994529139444577e-07, |
| "loss": 133.6309, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.2809085009084759, |
| "grad_norm": 72.8125, |
| "learning_rate": 9.994513508414417e-07, |
| "loss": 133.597, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.2817088100278733, |
| "grad_norm": 84.75, |
| "learning_rate": 9.99449787738426e-07, |
| "loss": 133.3475, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.2825091191472706, |
| "grad_norm": 81.3125, |
| "learning_rate": 9.994482246354102e-07, |
| "loss": 131.89, |
| "step": 3530 |
| }, |
| { |
| "epoch": 0.283309428266668, |
| "grad_norm": 76.25, |
| "learning_rate": 9.994466615323942e-07, |
| "loss": 134.1571, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.2841097373860654, |
| "grad_norm": 78.125, |
| "learning_rate": 9.994450984293784e-07, |
| "loss": 132.8667, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.28491004650546276, |
| "grad_norm": 77.3125, |
| "learning_rate": 9.994435353263626e-07, |
| "loss": 132.635, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.2857103556248601, |
| "grad_norm": 76.9375, |
| "learning_rate": 9.994419722233466e-07, |
| "loss": 132.5345, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.28651066474425746, |
| "grad_norm": 79.6875, |
| "learning_rate": 9.994404091203308e-07, |
| "loss": 133.5979, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.28731097386365484, |
| "grad_norm": 74.5625, |
| "learning_rate": 9.99438846017315e-07, |
| "loss": 133.0856, |
| "step": 3590 |
| }, |
| { |
| "epoch": 0.2881112829830522, |
| "grad_norm": 81.75, |
| "learning_rate": 9.994372829142993e-07, |
| "loss": 133.0596, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.28891159210244954, |
| "grad_norm": 79.25, |
| "learning_rate": 9.994357198112835e-07, |
| "loss": 133.8565, |
| "step": 3610 |
| }, |
| { |
| "epoch": 0.2897119012218469, |
| "grad_norm": 78.1875, |
| "learning_rate": 9.994341567082675e-07, |
| "loss": 133.0675, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.2905122103412443, |
| "grad_norm": 78.8125, |
| "learning_rate": 9.994325936052517e-07, |
| "loss": 132.4503, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.2913125194606417, |
| "grad_norm": 74.0, |
| "learning_rate": 9.99431030502236e-07, |
| "loss": 131.9785, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.29211282858003906, |
| "grad_norm": 81.25, |
| "learning_rate": 9.994294673992201e-07, |
| "loss": 131.6815, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.2929131376994364, |
| "grad_norm": 78.125, |
| "learning_rate": 9.994279042962044e-07, |
| "loss": 133.6587, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.29371344681883377, |
| "grad_norm": 82.8125, |
| "learning_rate": 9.994263411931884e-07, |
| "loss": 132.4716, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.29451375593823115, |
| "grad_norm": 77.875, |
| "learning_rate": 9.994247780901726e-07, |
| "loss": 134.3209, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.2953140650576285, |
| "grad_norm": 73.8125, |
| "learning_rate": 9.994232149871568e-07, |
| "loss": 132.6673, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.29611437417702585, |
| "grad_norm": 81.0, |
| "learning_rate": 9.994216518841408e-07, |
| "loss": 132.5012, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.29691468329642323, |
| "grad_norm": 71.125, |
| "learning_rate": 9.99420088781125e-07, |
| "loss": 133.0868, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.2977149924158206, |
| "grad_norm": 85.5625, |
| "learning_rate": 9.994185256781093e-07, |
| "loss": 133.206, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.298515301535218, |
| "grad_norm": 78.9375, |
| "learning_rate": 9.994169625750933e-07, |
| "loss": 132.3186, |
| "step": 3730 |
| }, |
| { |
| "epoch": 0.29931561065461537, |
| "grad_norm": 82.1875, |
| "learning_rate": 9.994153994720775e-07, |
| "loss": 132.2575, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.3001159197740127, |
| "grad_norm": 83.5625, |
| "learning_rate": 9.994138363690617e-07, |
| "loss": 133.2065, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.3009162288934101, |
| "grad_norm": 72.3125, |
| "learning_rate": 9.99412273266046e-07, |
| "loss": 133.6776, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.30171653801280746, |
| "grad_norm": 77.6875, |
| "learning_rate": 9.9941071016303e-07, |
| "loss": 132.2097, |
| "step": 3770 |
| }, |
| { |
| "epoch": 0.30251684713220484, |
| "grad_norm": 74.875, |
| "learning_rate": 9.994091470600141e-07, |
| "loss": 130.3959, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.30331715625160216, |
| "grad_norm": 74.375, |
| "learning_rate": 9.994075839569984e-07, |
| "loss": 131.0167, |
| "step": 3790 |
| }, |
| { |
| "epoch": 0.30411746537099954, |
| "grad_norm": 76.375, |
| "learning_rate": 9.994060208539826e-07, |
| "loss": 132.5872, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.3049177744903969, |
| "grad_norm": 75.6875, |
| "learning_rate": 9.994044577509668e-07, |
| "loss": 133.6776, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.3057180836097943, |
| "grad_norm": 75.125, |
| "learning_rate": 9.99402894647951e-07, |
| "loss": 133.0905, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.3065183927291916, |
| "grad_norm": 77.4375, |
| "learning_rate": 9.99401331544935e-07, |
| "loss": 133.0921, |
| "step": 3830 |
| }, |
| { |
| "epoch": 0.307318701848589, |
| "grad_norm": 80.9375, |
| "learning_rate": 9.993997684419192e-07, |
| "loss": 133.8795, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.3081190109679864, |
| "grad_norm": 73.75, |
| "learning_rate": 9.993982053389035e-07, |
| "loss": 133.65, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.30891932008738376, |
| "grad_norm": 76.6875, |
| "learning_rate": 9.993966422358875e-07, |
| "loss": 133.7787, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.30971962920678114, |
| "grad_norm": 76.6875, |
| "learning_rate": 9.993950791328717e-07, |
| "loss": 132.1005, |
| "step": 3870 |
| }, |
| { |
| "epoch": 0.31051993832617847, |
| "grad_norm": 70.75, |
| "learning_rate": 9.99393516029856e-07, |
| "loss": 131.8651, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.31132024744557585, |
| "grad_norm": 82.3125, |
| "learning_rate": 9.9939195292684e-07, |
| "loss": 133.0827, |
| "step": 3890 |
| }, |
| { |
| "epoch": 0.31212055656497323, |
| "grad_norm": 76.75, |
| "learning_rate": 9.993903898238241e-07, |
| "loss": 132.4445, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.3129208656843706, |
| "grad_norm": 83.1875, |
| "learning_rate": 9.993888267208083e-07, |
| "loss": 134.2083, |
| "step": 3910 |
| }, |
| { |
| "epoch": 0.31372117480376793, |
| "grad_norm": 78.9375, |
| "learning_rate": 9.993872636177926e-07, |
| "loss": 132.1415, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.3145214839231653, |
| "grad_norm": 77.625, |
| "learning_rate": 9.993857005147766e-07, |
| "loss": 133.3125, |
| "step": 3930 |
| }, |
| { |
| "epoch": 0.3153217930425627, |
| "grad_norm": 77.125, |
| "learning_rate": 9.993841374117608e-07, |
| "loss": 133.077, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.3161221021619601, |
| "grad_norm": 72.4375, |
| "learning_rate": 9.99382574308745e-07, |
| "loss": 132.9227, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.3169224112813574, |
| "grad_norm": 76.0625, |
| "learning_rate": 9.993810112057292e-07, |
| "loss": 131.9877, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.3177227204007548, |
| "grad_norm": 75.6875, |
| "learning_rate": 9.993794481027134e-07, |
| "loss": 134.1346, |
| "step": 3970 |
| }, |
| { |
| "epoch": 0.31852302952015216, |
| "grad_norm": 72.9375, |
| "learning_rate": 9.993778849996974e-07, |
| "loss": 133.5554, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.31932333863954954, |
| "grad_norm": 80.1875, |
| "learning_rate": 9.993763218966817e-07, |
| "loss": 132.0662, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.3201236477589469, |
| "grad_norm": 75.8125, |
| "learning_rate": 9.993747587936659e-07, |
| "loss": 132.8713, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.32092395687834424, |
| "grad_norm": 79.625, |
| "learning_rate": 9.9937319569065e-07, |
| "loss": 132.2075, |
| "step": 4010 |
| }, |
| { |
| "epoch": 0.3217242659977416, |
| "grad_norm": 79.25, |
| "learning_rate": 9.993716325876341e-07, |
| "loss": 133.7796, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.322524575117139, |
| "grad_norm": 73.0, |
| "learning_rate": 9.993700694846183e-07, |
| "loss": 132.6325, |
| "step": 4030 |
| }, |
| { |
| "epoch": 0.3233248842365364, |
| "grad_norm": 78.9375, |
| "learning_rate": 9.993685063816025e-07, |
| "loss": 132.7415, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.3241251933559337, |
| "grad_norm": 74.5625, |
| "learning_rate": 9.993669432785866e-07, |
| "loss": 132.9165, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.3249255024753311, |
| "grad_norm": 70.0, |
| "learning_rate": 9.993653801755708e-07, |
| "loss": 133.0353, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.32572581159472846, |
| "grad_norm": 85.625, |
| "learning_rate": 9.99363817072555e-07, |
| "loss": 131.7386, |
| "step": 4070 |
| }, |
| { |
| "epoch": 0.32652612071412584, |
| "grad_norm": 76.6875, |
| "learning_rate": 9.993622539695392e-07, |
| "loss": 132.1626, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.3273264298335232, |
| "grad_norm": 72.0625, |
| "learning_rate": 9.993606908665232e-07, |
| "loss": 132.8323, |
| "step": 4090 |
| }, |
| { |
| "epoch": 0.32812673895292055, |
| "grad_norm": 75.3125, |
| "learning_rate": 9.993591277635074e-07, |
| "loss": 132.1428, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.32892704807231793, |
| "grad_norm": 82.1875, |
| "learning_rate": 9.993575646604916e-07, |
| "loss": 133.5749, |
| "step": 4110 |
| }, |
| { |
| "epoch": 0.3297273571917153, |
| "grad_norm": 75.0, |
| "learning_rate": 9.993560015574759e-07, |
| "loss": 134.0208, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.3305276663111127, |
| "grad_norm": 78.875, |
| "learning_rate": 9.9935443845446e-07, |
| "loss": 133.0864, |
| "step": 4130 |
| }, |
| { |
| "epoch": 0.33132797543051, |
| "grad_norm": 81.3125, |
| "learning_rate": 9.99352875351444e-07, |
| "loss": 132.96, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.3321282845499074, |
| "grad_norm": 74.75, |
| "learning_rate": 9.993513122484283e-07, |
| "loss": 132.6293, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.3329285936693048, |
| "grad_norm": 75.9375, |
| "learning_rate": 9.993497491454125e-07, |
| "loss": 134.6045, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.33372890278870215, |
| "grad_norm": 78.8125, |
| "learning_rate": 9.993481860423965e-07, |
| "loss": 133.819, |
| "step": 4170 |
| }, |
| { |
| "epoch": 0.3345292119080995, |
| "grad_norm": 76.4375, |
| "learning_rate": 9.993466229393808e-07, |
| "loss": 130.7153, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.33532952102749686, |
| "grad_norm": 76.0, |
| "learning_rate": 9.99345059836365e-07, |
| "loss": 132.3732, |
| "step": 4190 |
| }, |
| { |
| "epoch": 0.33612983014689424, |
| "grad_norm": 77.1875, |
| "learning_rate": 9.993434967333492e-07, |
| "loss": 131.4807, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.3369301392662916, |
| "grad_norm": 77.3125, |
| "learning_rate": 9.993419336303332e-07, |
| "loss": 132.6123, |
| "step": 4210 |
| }, |
| { |
| "epoch": 0.337730448385689, |
| "grad_norm": 76.125, |
| "learning_rate": 9.993403705273174e-07, |
| "loss": 131.4161, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.3385307575050863, |
| "grad_norm": 74.6875, |
| "learning_rate": 9.993388074243016e-07, |
| "loss": 132.329, |
| "step": 4230 |
| }, |
| { |
| "epoch": 0.3393310666244837, |
| "grad_norm": 114.5, |
| "learning_rate": 9.993372443212856e-07, |
| "loss": 132.9315, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.3401313757438811, |
| "grad_norm": 83.125, |
| "learning_rate": 9.993356812182699e-07, |
| "loss": 133.2091, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.34093168486327846, |
| "grad_norm": 77.4375, |
| "learning_rate": 9.99334118115254e-07, |
| "loss": 133.3165, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.3417319939826758, |
| "grad_norm": 73.375, |
| "learning_rate": 9.993325550122383e-07, |
| "loss": 132.3926, |
| "step": 4270 |
| }, |
| { |
| "epoch": 0.34253230310207317, |
| "grad_norm": 81.8125, |
| "learning_rate": 9.993309919092225e-07, |
| "loss": 132.2934, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.34333261222147055, |
| "grad_norm": 75.0, |
| "learning_rate": 9.993294288062067e-07, |
| "loss": 131.8824, |
| "step": 4290 |
| }, |
| { |
| "epoch": 0.3441329213408679, |
| "grad_norm": 78.625, |
| "learning_rate": 9.993278657031907e-07, |
| "loss": 133.3519, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.34493323046026525, |
| "grad_norm": 78.8125, |
| "learning_rate": 9.99326302600175e-07, |
| "loss": 132.0518, |
| "step": 4310 |
| }, |
| { |
| "epoch": 0.34573353957966263, |
| "grad_norm": 82.25, |
| "learning_rate": 9.993247394971592e-07, |
| "loss": 133.0713, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.34653384869906, |
| "grad_norm": 71.6875, |
| "learning_rate": 9.993231763941432e-07, |
| "loss": 133.3473, |
| "step": 4330 |
| }, |
| { |
| "epoch": 0.3473341578184574, |
| "grad_norm": 69.3125, |
| "learning_rate": 9.993216132911274e-07, |
| "loss": 132.6307, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.34813446693785477, |
| "grad_norm": 71.125, |
| "learning_rate": 9.993200501881116e-07, |
| "loss": 133.2306, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.3489347760572521, |
| "grad_norm": 77.125, |
| "learning_rate": 9.993184870850958e-07, |
| "loss": 132.2587, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.3497350851766495, |
| "grad_norm": 72.4375, |
| "learning_rate": 9.993169239820798e-07, |
| "loss": 132.2048, |
| "step": 4370 |
| }, |
| { |
| "epoch": 0.35053539429604685, |
| "grad_norm": 74.6875, |
| "learning_rate": 9.99315360879064e-07, |
| "loss": 132.5754, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.35133570341544423, |
| "grad_norm": 80.75, |
| "learning_rate": 9.993137977760483e-07, |
| "loss": 133.0396, |
| "step": 4390 |
| }, |
| { |
| "epoch": 0.35213601253484156, |
| "grad_norm": 75.625, |
| "learning_rate": 9.993122346730323e-07, |
| "loss": 132.3304, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.35293632165423894, |
| "grad_norm": 74.9375, |
| "learning_rate": 9.993106715700165e-07, |
| "loss": 133.6866, |
| "step": 4410 |
| }, |
| { |
| "epoch": 0.3537366307736363, |
| "grad_norm": 76.5, |
| "learning_rate": 9.993091084670007e-07, |
| "loss": 131.9718, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.3545369398930337, |
| "grad_norm": 73.9375, |
| "learning_rate": 9.99307545363985e-07, |
| "loss": 132.9161, |
| "step": 4430 |
| }, |
| { |
| "epoch": 0.3553372490124311, |
| "grad_norm": 76.3125, |
| "learning_rate": 9.993059822609692e-07, |
| "loss": 133.0902, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.3561375581318284, |
| "grad_norm": 77.0625, |
| "learning_rate": 9.993044191579532e-07, |
| "loss": 132.3219, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.3569378672512258, |
| "grad_norm": 82.375, |
| "learning_rate": 9.993028560549374e-07, |
| "loss": 132.6951, |
| "step": 4460 |
| }, |
| { |
| "epoch": 0.35773817637062316, |
| "grad_norm": 79.375, |
| "learning_rate": 9.993012929519216e-07, |
| "loss": 133.4258, |
| "step": 4470 |
| }, |
| { |
| "epoch": 0.35853848549002054, |
| "grad_norm": 77.5625, |
| "learning_rate": 9.992997298489058e-07, |
| "loss": 132.2778, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.35933879460941787, |
| "grad_norm": 76.8125, |
| "learning_rate": 9.992981667458898e-07, |
| "loss": 132.1174, |
| "step": 4490 |
| }, |
| { |
| "epoch": 0.36013910372881525, |
| "grad_norm": 77.9375, |
| "learning_rate": 9.99296603642874e-07, |
| "loss": 131.4624, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.3609394128482126, |
| "grad_norm": 78.4375, |
| "learning_rate": 9.992950405398583e-07, |
| "loss": 132.2248, |
| "step": 4510 |
| }, |
| { |
| "epoch": 0.36173972196761, |
| "grad_norm": 77.75, |
| "learning_rate": 9.992934774368423e-07, |
| "loss": 132.4694, |
| "step": 4520 |
| }, |
| { |
| "epoch": 0.36254003108700733, |
| "grad_norm": 75.75, |
| "learning_rate": 9.992919143338265e-07, |
| "loss": 133.191, |
| "step": 4530 |
| }, |
| { |
| "epoch": 0.3633403402064047, |
| "grad_norm": 78.625, |
| "learning_rate": 9.992903512308107e-07, |
| "loss": 132.9599, |
| "step": 4540 |
| }, |
| { |
| "epoch": 0.3641406493258021, |
| "grad_norm": 72.25, |
| "learning_rate": 9.99288788127795e-07, |
| "loss": 133.3042, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.36494095844519947, |
| "grad_norm": 75.25, |
| "learning_rate": 9.99287225024779e-07, |
| "loss": 133.097, |
| "step": 4560 |
| }, |
| { |
| "epoch": 0.36574126756459685, |
| "grad_norm": 76.5, |
| "learning_rate": 9.992856619217632e-07, |
| "loss": 131.8935, |
| "step": 4570 |
| }, |
| { |
| "epoch": 0.3665415766839942, |
| "grad_norm": 77.3125, |
| "learning_rate": 9.992840988187474e-07, |
| "loss": 132.2206, |
| "step": 4580 |
| }, |
| { |
| "epoch": 0.36734188580339155, |
| "grad_norm": 78.25, |
| "learning_rate": 9.992825357157316e-07, |
| "loss": 132.6171, |
| "step": 4590 |
| }, |
| { |
| "epoch": 0.36814219492278893, |
| "grad_norm": 78.25, |
| "learning_rate": 9.992809726127158e-07, |
| "loss": 132.2898, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.3689425040421863, |
| "grad_norm": 78.5, |
| "learning_rate": 9.992794095096998e-07, |
| "loss": 133.2866, |
| "step": 4610 |
| }, |
| { |
| "epoch": 0.36974281316158364, |
| "grad_norm": 72.875, |
| "learning_rate": 9.99277846406684e-07, |
| "loss": 132.4634, |
| "step": 4620 |
| }, |
| { |
| "epoch": 0.370543122280981, |
| "grad_norm": 77.4375, |
| "learning_rate": 9.992762833036682e-07, |
| "loss": 132.7402, |
| "step": 4630 |
| }, |
| { |
| "epoch": 0.3713434314003784, |
| "grad_norm": 71.6875, |
| "learning_rate": 9.992747202006525e-07, |
| "loss": 132.4131, |
| "step": 4640 |
| }, |
| { |
| "epoch": 0.3721437405197758, |
| "grad_norm": 80.5, |
| "learning_rate": 9.992731570976365e-07, |
| "loss": 132.4793, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.3729440496391731, |
| "grad_norm": 76.4375, |
| "learning_rate": 9.992715939946207e-07, |
| "loss": 133.3043, |
| "step": 4660 |
| }, |
| { |
| "epoch": 0.3737443587585705, |
| "grad_norm": 76.1875, |
| "learning_rate": 9.99270030891605e-07, |
| "loss": 132.1431, |
| "step": 4670 |
| }, |
| { |
| "epoch": 0.37454466787796786, |
| "grad_norm": 81.4375, |
| "learning_rate": 9.99268467788589e-07, |
| "loss": 130.6737, |
| "step": 4680 |
| }, |
| { |
| "epoch": 0.37534497699736524, |
| "grad_norm": 77.1875, |
| "learning_rate": 9.992669046855731e-07, |
| "loss": 134.3274, |
| "step": 4690 |
| }, |
| { |
| "epoch": 0.3761452861167626, |
| "grad_norm": 74.8125, |
| "learning_rate": 9.992653415825574e-07, |
| "loss": 132.4399, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.37694559523615995, |
| "grad_norm": 72.6875, |
| "learning_rate": 9.992637784795414e-07, |
| "loss": 133.9349, |
| "step": 4710 |
| }, |
| { |
| "epoch": 0.3777459043555573, |
| "grad_norm": 76.3125, |
| "learning_rate": 9.992622153765256e-07, |
| "loss": 134.1812, |
| "step": 4720 |
| }, |
| { |
| "epoch": 0.3785462134749547, |
| "grad_norm": 74.5, |
| "learning_rate": 9.992606522735098e-07, |
| "loss": 131.5583, |
| "step": 4730 |
| }, |
| { |
| "epoch": 0.3793465225943521, |
| "grad_norm": 78.625, |
| "learning_rate": 9.99259089170494e-07, |
| "loss": 132.3389, |
| "step": 4740 |
| }, |
| { |
| "epoch": 0.3801468317137494, |
| "grad_norm": 77.0, |
| "learning_rate": 9.992575260674782e-07, |
| "loss": 133.7076, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.3809471408331468, |
| "grad_norm": 79.6875, |
| "learning_rate": 9.992559629644625e-07, |
| "loss": 133.8963, |
| "step": 4760 |
| }, |
| { |
| "epoch": 0.38174744995254417, |
| "grad_norm": 75.875, |
| "learning_rate": 9.992543998614465e-07, |
| "loss": 132.6479, |
| "step": 4770 |
| }, |
| { |
| "epoch": 0.38254775907194155, |
| "grad_norm": 78.0625, |
| "learning_rate": 9.992528367584307e-07, |
| "loss": 133.2928, |
| "step": 4780 |
| }, |
| { |
| "epoch": 0.38334806819133893, |
| "grad_norm": 77.6875, |
| "learning_rate": 9.992512736554149e-07, |
| "loss": 130.3455, |
| "step": 4790 |
| }, |
| { |
| "epoch": 0.38414837731073626, |
| "grad_norm": 78.5, |
| "learning_rate": 9.992497105523991e-07, |
| "loss": 131.2587, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.38494868643013364, |
| "grad_norm": 73.5, |
| "learning_rate": 9.992481474493831e-07, |
| "loss": 131.3443, |
| "step": 4810 |
| }, |
| { |
| "epoch": 0.385748995549531, |
| "grad_norm": 77.8125, |
| "learning_rate": 9.992465843463673e-07, |
| "loss": 133.4118, |
| "step": 4820 |
| }, |
| { |
| "epoch": 0.3865493046689284, |
| "grad_norm": 79.1875, |
| "learning_rate": 9.992450212433516e-07, |
| "loss": 131.2462, |
| "step": 4830 |
| }, |
| { |
| "epoch": 0.3873496137883257, |
| "grad_norm": 84.5, |
| "learning_rate": 9.992434581403356e-07, |
| "loss": 131.1748, |
| "step": 4840 |
| }, |
| { |
| "epoch": 0.3881499229077231, |
| "grad_norm": 75.125, |
| "learning_rate": 9.992418950373198e-07, |
| "loss": 132.5554, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.3889502320271205, |
| "grad_norm": 72.875, |
| "learning_rate": 9.99240331934304e-07, |
| "loss": 132.8411, |
| "step": 4860 |
| }, |
| { |
| "epoch": 0.38975054114651786, |
| "grad_norm": 77.8125, |
| "learning_rate": 9.99238768831288e-07, |
| "loss": 131.7171, |
| "step": 4870 |
| }, |
| { |
| "epoch": 0.3905508502659152, |
| "grad_norm": 74.625, |
| "learning_rate": 9.992372057282722e-07, |
| "loss": 132.1889, |
| "step": 4880 |
| }, |
| { |
| "epoch": 0.39135115938531256, |
| "grad_norm": 80.4375, |
| "learning_rate": 9.992356426252564e-07, |
| "loss": 131.4297, |
| "step": 4890 |
| }, |
| { |
| "epoch": 0.39215146850470994, |
| "grad_norm": 81.8125, |
| "learning_rate": 9.992340795222407e-07, |
| "loss": 132.5631, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.3929517776241073, |
| "grad_norm": 73.5, |
| "learning_rate": 9.992325164192249e-07, |
| "loss": 133.1258, |
| "step": 4910 |
| }, |
| { |
| "epoch": 0.3937520867435047, |
| "grad_norm": 74.0, |
| "learning_rate": 9.99230953316209e-07, |
| "loss": 132.7684, |
| "step": 4920 |
| }, |
| { |
| "epoch": 0.39455239586290203, |
| "grad_norm": 76.0, |
| "learning_rate": 9.99229390213193e-07, |
| "loss": 133.6151, |
| "step": 4930 |
| }, |
| { |
| "epoch": 0.3953527049822994, |
| "grad_norm": 85.1875, |
| "learning_rate": 9.992278271101773e-07, |
| "loss": 132.0966, |
| "step": 4940 |
| }, |
| { |
| "epoch": 0.3961530141016968, |
| "grad_norm": 76.75, |
| "learning_rate": 9.992262640071615e-07, |
| "loss": 133.1567, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.39695332322109417, |
| "grad_norm": 74.375, |
| "learning_rate": 9.992247009041458e-07, |
| "loss": 132.1937, |
| "step": 4960 |
| }, |
| { |
| "epoch": 0.3977536323404915, |
| "grad_norm": 79.75, |
| "learning_rate": 9.992231378011298e-07, |
| "loss": 131.0156, |
| "step": 4970 |
| }, |
| { |
| "epoch": 0.39855394145988887, |
| "grad_norm": 79.3125, |
| "learning_rate": 9.99221574698114e-07, |
| "loss": 132.8796, |
| "step": 4980 |
| }, |
| { |
| "epoch": 0.39935425057928625, |
| "grad_norm": 73.0, |
| "learning_rate": 9.992200115950982e-07, |
| "loss": 132.0237, |
| "step": 4990 |
| }, |
| { |
| "epoch": 0.40015455969868363, |
| "grad_norm": 75.625, |
| "learning_rate": 9.992184484920822e-07, |
| "loss": 133.5153, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.40015455969868363, |
| "eval_loss": 2.0742883682250977, |
| "eval_runtime": 418.6562, |
| "eval_samples_per_second": 1567.295, |
| "eval_steps_per_second": 48.978, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.40095486881808096, |
| "grad_norm": 72.4375, |
| "learning_rate": 9.992168853890664e-07, |
| "loss": 133.3489, |
| "step": 5010 |
| }, |
| { |
| "epoch": 0.40175517793747834, |
| "grad_norm": 74.5625, |
| "learning_rate": 9.992153222860506e-07, |
| "loss": 132.38, |
| "step": 5020 |
| }, |
| { |
| "epoch": 0.4025554870568757, |
| "grad_norm": 81.25, |
| "learning_rate": 9.992137591830347e-07, |
| "loss": 133.7667, |
| "step": 5030 |
| }, |
| { |
| "epoch": 0.4033557961762731, |
| "grad_norm": 74.5625, |
| "learning_rate": 9.992121960800189e-07, |
| "loss": 133.8293, |
| "step": 5040 |
| }, |
| { |
| "epoch": 0.4041561052956705, |
| "grad_norm": 71.25, |
| "learning_rate": 9.99210632977003e-07, |
| "loss": 133.3581, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.4049564144150678, |
| "grad_norm": 79.5625, |
| "learning_rate": 9.992090698739873e-07, |
| "loss": 132.9888, |
| "step": 5060 |
| }, |
| { |
| "epoch": 0.4057567235344652, |
| "grad_norm": 73.9375, |
| "learning_rate": 9.992075067709715e-07, |
| "loss": 132.4104, |
| "step": 5070 |
| }, |
| { |
| "epoch": 0.40655703265386256, |
| "grad_norm": 72.5, |
| "learning_rate": 9.992059436679555e-07, |
| "loss": 132.3801, |
| "step": 5080 |
| }, |
| { |
| "epoch": 0.40735734177325994, |
| "grad_norm": 76.3125, |
| "learning_rate": 9.992043805649397e-07, |
| "loss": 132.4045, |
| "step": 5090 |
| }, |
| { |
| "epoch": 0.40815765089265726, |
| "grad_norm": 76.5625, |
| "learning_rate": 9.99202817461924e-07, |
| "loss": 132.4824, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.40895796001205464, |
| "grad_norm": 74.125, |
| "learning_rate": 9.992012543589082e-07, |
| "loss": 132.0051, |
| "step": 5110 |
| }, |
| { |
| "epoch": 0.409758269131452, |
| "grad_norm": 72.9375, |
| "learning_rate": 9.991996912558924e-07, |
| "loss": 133.8627, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.4105585782508494, |
| "grad_norm": 78.9375, |
| "learning_rate": 9.991981281528764e-07, |
| "loss": 132.2818, |
| "step": 5130 |
| }, |
| { |
| "epoch": 0.4113588873702468, |
| "grad_norm": 75.375, |
| "learning_rate": 9.991965650498606e-07, |
| "loss": 131.2578, |
| "step": 5140 |
| }, |
| { |
| "epoch": 0.4121591964896441, |
| "grad_norm": 77.125, |
| "learning_rate": 9.991950019468448e-07, |
| "loss": 133.7901, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.4129595056090415, |
| "grad_norm": 80.9375, |
| "learning_rate": 9.991934388438289e-07, |
| "loss": 132.6137, |
| "step": 5160 |
| }, |
| { |
| "epoch": 0.41375981472843887, |
| "grad_norm": 81.875, |
| "learning_rate": 9.99191875740813e-07, |
| "loss": 132.9288, |
| "step": 5170 |
| }, |
| { |
| "epoch": 0.41456012384783625, |
| "grad_norm": 78.6875, |
| "learning_rate": 9.991903126377973e-07, |
| "loss": 132.9595, |
| "step": 5180 |
| }, |
| { |
| "epoch": 0.4153604329672336, |
| "grad_norm": 76.9375, |
| "learning_rate": 9.991887495347813e-07, |
| "loss": 132.3299, |
| "step": 5190 |
| }, |
| { |
| "epoch": 0.41616074208663095, |
| "grad_norm": 79.0, |
| "learning_rate": 9.991871864317655e-07, |
| "loss": 132.1807, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.41696105120602833, |
| "grad_norm": 75.4375, |
| "learning_rate": 9.991856233287497e-07, |
| "loss": 133.9722, |
| "step": 5210 |
| }, |
| { |
| "epoch": 0.4177613603254257, |
| "grad_norm": 73.4375, |
| "learning_rate": 9.99184060225734e-07, |
| "loss": 132.654, |
| "step": 5220 |
| }, |
| { |
| "epoch": 0.41856166944482304, |
| "grad_norm": 74.1875, |
| "learning_rate": 9.991824971227182e-07, |
| "loss": 133.0139, |
| "step": 5230 |
| }, |
| { |
| "epoch": 0.4193619785642204, |
| "grad_norm": 79.25, |
| "learning_rate": 9.991809340197022e-07, |
| "loss": 131.9049, |
| "step": 5240 |
| }, |
| { |
| "epoch": 0.4201622876836178, |
| "grad_norm": 75.8125, |
| "learning_rate": 9.991793709166864e-07, |
| "loss": 131.9633, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.4209625968030152, |
| "grad_norm": 74.25, |
| "learning_rate": 9.991778078136706e-07, |
| "loss": 132.2361, |
| "step": 5260 |
| }, |
| { |
| "epoch": 0.42176290592241256, |
| "grad_norm": 72.8125, |
| "learning_rate": 9.991762447106548e-07, |
| "loss": 131.267, |
| "step": 5270 |
| }, |
| { |
| "epoch": 0.4225632150418099, |
| "grad_norm": 77.5625, |
| "learning_rate": 9.991746816076388e-07, |
| "loss": 132.4257, |
| "step": 5280 |
| }, |
| { |
| "epoch": 0.42336352416120726, |
| "grad_norm": 79.3125, |
| "learning_rate": 9.99173118504623e-07, |
| "loss": 132.785, |
| "step": 5290 |
| }, |
| { |
| "epoch": 0.42416383328060464, |
| "grad_norm": 76.9375, |
| "learning_rate": 9.991715554016073e-07, |
| "loss": 131.3379, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.424964142400002, |
| "grad_norm": 75.25, |
| "learning_rate": 9.991699922985915e-07, |
| "loss": 133.1422, |
| "step": 5310 |
| }, |
| { |
| "epoch": 0.42576445151939935, |
| "grad_norm": 72.5625, |
| "learning_rate": 9.991684291955755e-07, |
| "loss": 132.1037, |
| "step": 5320 |
| }, |
| { |
| "epoch": 0.4265647606387967, |
| "grad_norm": 79.0, |
| "learning_rate": 9.991668660925597e-07, |
| "loss": 132.1573, |
| "step": 5330 |
| }, |
| { |
| "epoch": 0.4273650697581941, |
| "grad_norm": 75.0625, |
| "learning_rate": 9.99165302989544e-07, |
| "loss": 132.0645, |
| "step": 5340 |
| }, |
| { |
| "epoch": 0.4281653788775915, |
| "grad_norm": 72.3125, |
| "learning_rate": 9.99163739886528e-07, |
| "loss": 132.8727, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.4289656879969888, |
| "grad_norm": 77.4375, |
| "learning_rate": 9.991621767835122e-07, |
| "loss": 133.5671, |
| "step": 5360 |
| }, |
| { |
| "epoch": 0.4297659971163862, |
| "grad_norm": 74.3125, |
| "learning_rate": 9.991606136804964e-07, |
| "loss": 132.3078, |
| "step": 5370 |
| }, |
| { |
| "epoch": 0.43056630623578357, |
| "grad_norm": 81.375, |
| "learning_rate": 9.991590505774806e-07, |
| "loss": 133.0639, |
| "step": 5380 |
| }, |
| { |
| "epoch": 0.43136661535518095, |
| "grad_norm": 74.0625, |
| "learning_rate": 9.991574874744648e-07, |
| "loss": 131.2669, |
| "step": 5390 |
| }, |
| { |
| "epoch": 0.43216692447457833, |
| "grad_norm": 79.375, |
| "learning_rate": 9.991559243714488e-07, |
| "loss": 131.9368, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.43296723359397565, |
| "grad_norm": 77.9375, |
| "learning_rate": 9.99154361268433e-07, |
| "loss": 132.4038, |
| "step": 5410 |
| }, |
| { |
| "epoch": 0.43376754271337303, |
| "grad_norm": 83.375, |
| "learning_rate": 9.991527981654173e-07, |
| "loss": 132.8988, |
| "step": 5420 |
| }, |
| { |
| "epoch": 0.4345678518327704, |
| "grad_norm": 73.6875, |
| "learning_rate": 9.991512350624015e-07, |
| "loss": 133.3637, |
| "step": 5430 |
| }, |
| { |
| "epoch": 0.4353681609521678, |
| "grad_norm": 74.5625, |
| "learning_rate": 9.991496719593855e-07, |
| "loss": 133.3608, |
| "step": 5440 |
| }, |
| { |
| "epoch": 0.4361684700715651, |
| "grad_norm": 71.0, |
| "learning_rate": 9.991481088563697e-07, |
| "loss": 131.265, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.4369687791909625, |
| "grad_norm": 73.4375, |
| "learning_rate": 9.99146545753354e-07, |
| "loss": 132.0777, |
| "step": 5460 |
| }, |
| { |
| "epoch": 0.4377690883103599, |
| "grad_norm": 75.125, |
| "learning_rate": 9.99144982650338e-07, |
| "loss": 132.4698, |
| "step": 5470 |
| }, |
| { |
| "epoch": 0.43856939742975726, |
| "grad_norm": 73.875, |
| "learning_rate": 9.991434195473221e-07, |
| "loss": 131.6167, |
| "step": 5480 |
| }, |
| { |
| "epoch": 0.43936970654915464, |
| "grad_norm": 76.9375, |
| "learning_rate": 9.991418564443064e-07, |
| "loss": 131.9013, |
| "step": 5490 |
| }, |
| { |
| "epoch": 0.44017001566855196, |
| "grad_norm": 73.4375, |
| "learning_rate": 9.991402933412906e-07, |
| "loss": 131.2487, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.44097032478794934, |
| "grad_norm": 83.4375, |
| "learning_rate": 9.991387302382746e-07, |
| "loss": 133.703, |
| "step": 5510 |
| }, |
| { |
| "epoch": 0.4417706339073467, |
| "grad_norm": 78.375, |
| "learning_rate": 9.991371671352588e-07, |
| "loss": 133.2442, |
| "step": 5520 |
| }, |
| { |
| "epoch": 0.4425709430267441, |
| "grad_norm": 71.375, |
| "learning_rate": 9.99135604032243e-07, |
| "loss": 131.6987, |
| "step": 5530 |
| }, |
| { |
| "epoch": 0.4433712521461414, |
| "grad_norm": 76.1875, |
| "learning_rate": 9.991340409292272e-07, |
| "loss": 131.8815, |
| "step": 5540 |
| }, |
| { |
| "epoch": 0.4441715612655388, |
| "grad_norm": 80.8125, |
| "learning_rate": 9.991324778262112e-07, |
| "loss": 132.4904, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.4449718703849362, |
| "grad_norm": 75.1875, |
| "learning_rate": 9.991309147231955e-07, |
| "loss": 130.7195, |
| "step": 5560 |
| }, |
| { |
| "epoch": 0.44577217950433357, |
| "grad_norm": 74.5625, |
| "learning_rate": 9.991293516201797e-07, |
| "loss": 132.3294, |
| "step": 5570 |
| }, |
| { |
| "epoch": 0.4465724886237309, |
| "grad_norm": 77.5, |
| "learning_rate": 9.99127788517164e-07, |
| "loss": 132.4166, |
| "step": 5580 |
| }, |
| { |
| "epoch": 0.44737279774312827, |
| "grad_norm": 74.375, |
| "learning_rate": 9.991262254141481e-07, |
| "loss": 132.624, |
| "step": 5590 |
| }, |
| { |
| "epoch": 0.44817310686252565, |
| "grad_norm": 82.3125, |
| "learning_rate": 9.991246623111321e-07, |
| "loss": 131.5993, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.44897341598192303, |
| "grad_norm": 70.5, |
| "learning_rate": 9.991230992081163e-07, |
| "loss": 130.5851, |
| "step": 5610 |
| }, |
| { |
| "epoch": 0.4497737251013204, |
| "grad_norm": 78.9375, |
| "learning_rate": 9.991215361051006e-07, |
| "loss": 131.7671, |
| "step": 5620 |
| }, |
| { |
| "epoch": 0.45057403422071773, |
| "grad_norm": 78.9375, |
| "learning_rate": 9.991199730020846e-07, |
| "loss": 132.6647, |
| "step": 5630 |
| }, |
| { |
| "epoch": 0.4513743433401151, |
| "grad_norm": 77.5, |
| "learning_rate": 9.991184098990688e-07, |
| "loss": 131.5212, |
| "step": 5640 |
| }, |
| { |
| "epoch": 0.4521746524595125, |
| "grad_norm": 80.25, |
| "learning_rate": 9.99116846796053e-07, |
| "loss": 131.0642, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.4529749615789099, |
| "grad_norm": 80.875, |
| "learning_rate": 9.991152836930372e-07, |
| "loss": 131.6869, |
| "step": 5660 |
| }, |
| { |
| "epoch": 0.4537752706983072, |
| "grad_norm": 73.125, |
| "learning_rate": 9.991137205900212e-07, |
| "loss": 132.0529, |
| "step": 5670 |
| }, |
| { |
| "epoch": 0.4545755798177046, |
| "grad_norm": 77.8125, |
| "learning_rate": 9.991121574870055e-07, |
| "loss": 133.4353, |
| "step": 5680 |
| }, |
| { |
| "epoch": 0.45537588893710196, |
| "grad_norm": 78.9375, |
| "learning_rate": 9.991105943839897e-07, |
| "loss": 131.1083, |
| "step": 5690 |
| }, |
| { |
| "epoch": 0.45617619805649934, |
| "grad_norm": 81.6875, |
| "learning_rate": 9.991090312809739e-07, |
| "loss": 132.5374, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.45697650717589666, |
| "grad_norm": 75.8125, |
| "learning_rate": 9.99107468177958e-07, |
| "loss": 131.5541, |
| "step": 5710 |
| }, |
| { |
| "epoch": 0.45777681629529404, |
| "grad_norm": 77.875, |
| "learning_rate": 9.991059050749421e-07, |
| "loss": 133.4234, |
| "step": 5720 |
| }, |
| { |
| "epoch": 0.4585771254146914, |
| "grad_norm": 82.6875, |
| "learning_rate": 9.991043419719263e-07, |
| "loss": 132.8354, |
| "step": 5730 |
| }, |
| { |
| "epoch": 0.4593774345340888, |
| "grad_norm": 69.4375, |
| "learning_rate": 9.991027788689105e-07, |
| "loss": 130.9775, |
| "step": 5740 |
| }, |
| { |
| "epoch": 0.4601777436534862, |
| "grad_norm": 73.875, |
| "learning_rate": 9.991012157658948e-07, |
| "loss": 133.6315, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.4609780527728835, |
| "grad_norm": 77.125, |
| "learning_rate": 9.990996526628788e-07, |
| "loss": 133.1302, |
| "step": 5760 |
| }, |
| { |
| "epoch": 0.4617783618922809, |
| "grad_norm": 75.875, |
| "learning_rate": 9.99098089559863e-07, |
| "loss": 132.4343, |
| "step": 5770 |
| }, |
| { |
| "epoch": 0.46257867101167827, |
| "grad_norm": 74.8125, |
| "learning_rate": 9.990965264568472e-07, |
| "loss": 132.1773, |
| "step": 5780 |
| }, |
| { |
| "epoch": 0.46337898013107565, |
| "grad_norm": 74.5625, |
| "learning_rate": 9.990949633538312e-07, |
| "loss": 131.587, |
| "step": 5790 |
| }, |
| { |
| "epoch": 0.46417928925047297, |
| "grad_norm": 73.6875, |
| "learning_rate": 9.990934002508154e-07, |
| "loss": 132.3335, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.46497959836987035, |
| "grad_norm": 78.375, |
| "learning_rate": 9.990918371477997e-07, |
| "loss": 131.2027, |
| "step": 5810 |
| }, |
| { |
| "epoch": 0.46577990748926773, |
| "grad_norm": 74.875, |
| "learning_rate": 9.990902740447837e-07, |
| "loss": 132.1983, |
| "step": 5820 |
| }, |
| { |
| "epoch": 0.4665802166086651, |
| "grad_norm": 76.8125, |
| "learning_rate": 9.990887109417679e-07, |
| "loss": 130.0307, |
| "step": 5830 |
| }, |
| { |
| "epoch": 0.4673805257280625, |
| "grad_norm": 74.375, |
| "learning_rate": 9.99087147838752e-07, |
| "loss": 132.1818, |
| "step": 5840 |
| }, |
| { |
| "epoch": 0.4681808348474598, |
| "grad_norm": 80.25, |
| "learning_rate": 9.990855847357363e-07, |
| "loss": 131.9706, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.4689811439668572, |
| "grad_norm": 78.3125, |
| "learning_rate": 9.990840216327205e-07, |
| "loss": 132.3331, |
| "step": 5860 |
| }, |
| { |
| "epoch": 0.4697814530862546, |
| "grad_norm": 75.25, |
| "learning_rate": 9.990824585297045e-07, |
| "loss": 132.2483, |
| "step": 5870 |
| }, |
| { |
| "epoch": 0.47058176220565195, |
| "grad_norm": 73.125, |
| "learning_rate": 9.990808954266888e-07, |
| "loss": 132.6989, |
| "step": 5880 |
| }, |
| { |
| "epoch": 0.4713820713250493, |
| "grad_norm": 78.625, |
| "learning_rate": 9.99079332323673e-07, |
| "loss": 133.315, |
| "step": 5890 |
| }, |
| { |
| "epoch": 0.47218238044444666, |
| "grad_norm": 73.8125, |
| "learning_rate": 9.990777692206572e-07, |
| "loss": 131.2918, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.47298268956384404, |
| "grad_norm": 82.0625, |
| "learning_rate": 9.990762061176414e-07, |
| "loss": 132.8047, |
| "step": 5910 |
| }, |
| { |
| "epoch": 0.4737829986832414, |
| "grad_norm": 83.4375, |
| "learning_rate": 9.990746430146254e-07, |
| "loss": 133.7549, |
| "step": 5920 |
| }, |
| { |
| "epoch": 0.47458330780263874, |
| "grad_norm": 71.9375, |
| "learning_rate": 9.990730799116096e-07, |
| "loss": 132.3743, |
| "step": 5930 |
| }, |
| { |
| "epoch": 0.4753836169220361, |
| "grad_norm": 82.25, |
| "learning_rate": 9.990715168085939e-07, |
| "loss": 130.8257, |
| "step": 5940 |
| }, |
| { |
| "epoch": 0.4761839260414335, |
| "grad_norm": 75.8125, |
| "learning_rate": 9.990699537055779e-07, |
| "loss": 130.2371, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.4769842351608309, |
| "grad_norm": 78.1875, |
| "learning_rate": 9.99068390602562e-07, |
| "loss": 131.3828, |
| "step": 5960 |
| }, |
| { |
| "epoch": 0.47778454428022826, |
| "grad_norm": 75.9375, |
| "learning_rate": 9.990668274995463e-07, |
| "loss": 132.8423, |
| "step": 5970 |
| }, |
| { |
| "epoch": 0.4785848533996256, |
| "grad_norm": 77.3125, |
| "learning_rate": 9.990652643965303e-07, |
| "loss": 133.5517, |
| "step": 5980 |
| }, |
| { |
| "epoch": 0.47938516251902297, |
| "grad_norm": 72.5, |
| "learning_rate": 9.990637012935145e-07, |
| "loss": 132.8932, |
| "step": 5990 |
| }, |
| { |
| "epoch": 0.48018547163842035, |
| "grad_norm": 74.25, |
| "learning_rate": 9.990621381904987e-07, |
| "loss": 133.124, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.4809857807578177, |
| "grad_norm": 77.0625, |
| "learning_rate": 9.99060575087483e-07, |
| "loss": 132.1078, |
| "step": 6010 |
| }, |
| { |
| "epoch": 0.48178608987721505, |
| "grad_norm": 76.0625, |
| "learning_rate": 9.99059011984467e-07, |
| "loss": 132.8352, |
| "step": 6020 |
| }, |
| { |
| "epoch": 0.48258639899661243, |
| "grad_norm": 79.4375, |
| "learning_rate": 9.990574488814512e-07, |
| "loss": 132.3257, |
| "step": 6030 |
| }, |
| { |
| "epoch": 0.4833867081160098, |
| "grad_norm": 74.375, |
| "learning_rate": 9.990558857784354e-07, |
| "loss": 132.8362, |
| "step": 6040 |
| }, |
| { |
| "epoch": 0.4841870172354072, |
| "grad_norm": 74.375, |
| "learning_rate": 9.990543226754196e-07, |
| "loss": 131.3189, |
| "step": 6050 |
| }, |
| { |
| "epoch": 0.48498732635480457, |
| "grad_norm": 72.0625, |
| "learning_rate": 9.990527595724038e-07, |
| "loss": 131.8034, |
| "step": 6060 |
| }, |
| { |
| "epoch": 0.4857876354742019, |
| "grad_norm": 80.5625, |
| "learning_rate": 9.99051196469388e-07, |
| "loss": 131.7185, |
| "step": 6070 |
| }, |
| { |
| "epoch": 0.4865879445935993, |
| "grad_norm": 77.5625, |
| "learning_rate": 9.99049633366372e-07, |
| "loss": 133.0989, |
| "step": 6080 |
| }, |
| { |
| "epoch": 0.48738825371299666, |
| "grad_norm": 74.4375, |
| "learning_rate": 9.990480702633563e-07, |
| "loss": 131.1641, |
| "step": 6090 |
| }, |
| { |
| "epoch": 0.48818856283239404, |
| "grad_norm": 77.625, |
| "learning_rate": 9.990465071603405e-07, |
| "loss": 131.6567, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.48898887195179136, |
| "grad_norm": 80.25, |
| "learning_rate": 9.990449440573245e-07, |
| "loss": 132.3284, |
| "step": 6110 |
| }, |
| { |
| "epoch": 0.48978918107118874, |
| "grad_norm": 72.9375, |
| "learning_rate": 9.990433809543087e-07, |
| "loss": 131.856, |
| "step": 6120 |
| }, |
| { |
| "epoch": 0.4905894901905861, |
| "grad_norm": 72.6875, |
| "learning_rate": 9.99041817851293e-07, |
| "loss": 132.8506, |
| "step": 6130 |
| }, |
| { |
| "epoch": 0.4913897993099835, |
| "grad_norm": 76.0625, |
| "learning_rate": 9.99040254748277e-07, |
| "loss": 132.9671, |
| "step": 6140 |
| }, |
| { |
| "epoch": 0.4921901084293808, |
| "grad_norm": 81.875, |
| "learning_rate": 9.990386916452612e-07, |
| "loss": 133.4054, |
| "step": 6150 |
| }, |
| { |
| "epoch": 0.4929904175487782, |
| "grad_norm": 73.1875, |
| "learning_rate": 9.990371285422454e-07, |
| "loss": 132.2609, |
| "step": 6160 |
| }, |
| { |
| "epoch": 0.4937907266681756, |
| "grad_norm": 73.9375, |
| "learning_rate": 9.990355654392296e-07, |
| "loss": 133.1529, |
| "step": 6170 |
| }, |
| { |
| "epoch": 0.49459103578757296, |
| "grad_norm": 74.125, |
| "learning_rate": 9.990340023362136e-07, |
| "loss": 133.8595, |
| "step": 6180 |
| }, |
| { |
| "epoch": 0.49539134490697034, |
| "grad_norm": 72.0625, |
| "learning_rate": 9.990324392331978e-07, |
| "loss": 132.0925, |
| "step": 6190 |
| }, |
| { |
| "epoch": 0.49619165402636767, |
| "grad_norm": 72.5, |
| "learning_rate": 9.99030876130182e-07, |
| "loss": 133.2302, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.49699196314576505, |
| "grad_norm": 71.9375, |
| "learning_rate": 9.990293130271663e-07, |
| "loss": 132.7785, |
| "step": 6210 |
| }, |
| { |
| "epoch": 0.4977922722651624, |
| "grad_norm": 78.0625, |
| "learning_rate": 9.990277499241505e-07, |
| "loss": 133.0601, |
| "step": 6220 |
| }, |
| { |
| "epoch": 0.4985925813845598, |
| "grad_norm": 74.6875, |
| "learning_rate": 9.990261868211345e-07, |
| "loss": 132.1573, |
| "step": 6230 |
| }, |
| { |
| "epoch": 0.49939289050395713, |
| "grad_norm": 80.3125, |
| "learning_rate": 9.990246237181187e-07, |
| "loss": 132.7871, |
| "step": 6240 |
| }, |
| { |
| "epoch": 0.5001931996233545, |
| "grad_norm": 74.875, |
| "learning_rate": 9.99023060615103e-07, |
| "loss": 132.2812, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.5009935087427518, |
| "grad_norm": 80.875, |
| "learning_rate": 9.990214975120871e-07, |
| "loss": 130.9305, |
| "step": 6260 |
| }, |
| { |
| "epoch": 0.5017938178621493, |
| "grad_norm": 72.4375, |
| "learning_rate": 9.990199344090712e-07, |
| "loss": 133.3334, |
| "step": 6270 |
| }, |
| { |
| "epoch": 0.5025941269815466, |
| "grad_norm": 71.875, |
| "learning_rate": 9.990183713060554e-07, |
| "loss": 132.1767, |
| "step": 6280 |
| }, |
| { |
| "epoch": 0.503394436100944, |
| "grad_norm": 79.375, |
| "learning_rate": 9.990168082030396e-07, |
| "loss": 133.0485, |
| "step": 6290 |
| }, |
| { |
| "epoch": 0.5041947452203414, |
| "grad_norm": 72.8125, |
| "learning_rate": 9.990152451000236e-07, |
| "loss": 132.0657, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.5049950543397387, |
| "grad_norm": 82.5, |
| "learning_rate": 9.990136819970078e-07, |
| "loss": 130.4747, |
| "step": 6310 |
| }, |
| { |
| "epoch": 0.5057953634591361, |
| "grad_norm": 77.5, |
| "learning_rate": 9.99012118893992e-07, |
| "loss": 133.164, |
| "step": 6320 |
| }, |
| { |
| "epoch": 0.5065956725785334, |
| "grad_norm": 72.5625, |
| "learning_rate": 9.990105557909763e-07, |
| "loss": 131.2053, |
| "step": 6330 |
| }, |
| { |
| "epoch": 0.5073959816979309, |
| "grad_norm": 74.5, |
| "learning_rate": 9.990089926879603e-07, |
| "loss": 132.6195, |
| "step": 6340 |
| }, |
| { |
| "epoch": 0.5081962908173282, |
| "grad_norm": 78.5625, |
| "learning_rate": 9.990074295849445e-07, |
| "loss": 131.7617, |
| "step": 6350 |
| }, |
| { |
| "epoch": 0.5089965999367255, |
| "grad_norm": 73.5, |
| "learning_rate": 9.990058664819287e-07, |
| "loss": 131.7562, |
| "step": 6360 |
| }, |
| { |
| "epoch": 0.509796909056123, |
| "grad_norm": 79.4375, |
| "learning_rate": 9.99004303378913e-07, |
| "loss": 133.1088, |
| "step": 6370 |
| }, |
| { |
| "epoch": 0.5105972181755203, |
| "grad_norm": 79.3125, |
| "learning_rate": 9.990027402758971e-07, |
| "loss": 133.757, |
| "step": 6380 |
| }, |
| { |
| "epoch": 0.5113975272949176, |
| "grad_norm": 74.0625, |
| "learning_rate": 9.990011771728811e-07, |
| "loss": 133.9952, |
| "step": 6390 |
| }, |
| { |
| "epoch": 0.512197836414315, |
| "grad_norm": 72.5625, |
| "learning_rate": 9.989996140698654e-07, |
| "loss": 131.275, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.5129981455337124, |
| "grad_norm": 78.4375, |
| "learning_rate": 9.989980509668496e-07, |
| "loss": 131.3286, |
| "step": 6410 |
| }, |
| { |
| "epoch": 0.5137984546531098, |
| "grad_norm": 73.6875, |
| "learning_rate": 9.989964878638338e-07, |
| "loss": 131.1604, |
| "step": 6420 |
| }, |
| { |
| "epoch": 0.5145987637725071, |
| "grad_norm": 77.0, |
| "learning_rate": 9.989949247608178e-07, |
| "loss": 131.0352, |
| "step": 6430 |
| }, |
| { |
| "epoch": 0.5153990728919045, |
| "grad_norm": 77.8125, |
| "learning_rate": 9.98993361657802e-07, |
| "loss": 132.0469, |
| "step": 6440 |
| }, |
| { |
| "epoch": 0.5161993820113019, |
| "grad_norm": 76.1875, |
| "learning_rate": 9.989917985547862e-07, |
| "loss": 133.7251, |
| "step": 6450 |
| }, |
| { |
| "epoch": 0.5169996911306992, |
| "grad_norm": 70.6875, |
| "learning_rate": 9.989902354517702e-07, |
| "loss": 131.82, |
| "step": 6460 |
| }, |
| { |
| "epoch": 0.5178000002500966, |
| "grad_norm": 76.8125, |
| "learning_rate": 9.989886723487545e-07, |
| "loss": 131.712, |
| "step": 6470 |
| }, |
| { |
| "epoch": 0.518600309369494, |
| "grad_norm": 77.9375, |
| "learning_rate": 9.989871092457387e-07, |
| "loss": 132.1365, |
| "step": 6480 |
| }, |
| { |
| "epoch": 0.5194006184888913, |
| "grad_norm": 74.0, |
| "learning_rate": 9.989855461427227e-07, |
| "loss": 131.9362, |
| "step": 6490 |
| }, |
| { |
| "epoch": 0.5202009276082887, |
| "grad_norm": 77.3125, |
| "learning_rate": 9.98983983039707e-07, |
| "loss": 133.708, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.5210012367276861, |
| "grad_norm": 77.75, |
| "learning_rate": 9.989824199366911e-07, |
| "loss": 132.9561, |
| "step": 6510 |
| }, |
| { |
| "epoch": 0.5218015458470834, |
| "grad_norm": 76.8125, |
| "learning_rate": 9.989808568336753e-07, |
| "loss": 133.186, |
| "step": 6520 |
| }, |
| { |
| "epoch": 0.5226018549664808, |
| "grad_norm": 79.125, |
| "learning_rate": 9.989792937306596e-07, |
| "loss": 131.5064, |
| "step": 6530 |
| }, |
| { |
| "epoch": 0.5234021640858781, |
| "grad_norm": 74.5, |
| "learning_rate": 9.989777306276438e-07, |
| "loss": 131.7443, |
| "step": 6540 |
| }, |
| { |
| "epoch": 0.5242024732052756, |
| "grad_norm": 75.9375, |
| "learning_rate": 9.989761675246278e-07, |
| "loss": 132.0182, |
| "step": 6550 |
| }, |
| { |
| "epoch": 0.5250027823246729, |
| "grad_norm": 74.25, |
| "learning_rate": 9.98974604421612e-07, |
| "loss": 132.1322, |
| "step": 6560 |
| }, |
| { |
| "epoch": 0.5258030914440702, |
| "grad_norm": 77.25, |
| "learning_rate": 9.989730413185962e-07, |
| "loss": 131.4318, |
| "step": 6570 |
| }, |
| { |
| "epoch": 0.5266034005634677, |
| "grad_norm": 79.375, |
| "learning_rate": 9.989714782155802e-07, |
| "loss": 131.6486, |
| "step": 6580 |
| }, |
| { |
| "epoch": 0.527403709682865, |
| "grad_norm": 72.875, |
| "learning_rate": 9.989699151125644e-07, |
| "loss": 132.6251, |
| "step": 6590 |
| }, |
| { |
| "epoch": 0.5282040188022624, |
| "grad_norm": 77.75, |
| "learning_rate": 9.989683520095487e-07, |
| "loss": 130.9398, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.5290043279216597, |
| "grad_norm": 78.0625, |
| "learning_rate": 9.989667889065329e-07, |
| "loss": 132.2289, |
| "step": 6610 |
| }, |
| { |
| "epoch": 0.5298046370410571, |
| "grad_norm": 80.75, |
| "learning_rate": 9.989652258035169e-07, |
| "loss": 133.6902, |
| "step": 6620 |
| }, |
| { |
| "epoch": 0.5306049461604545, |
| "grad_norm": 77.0, |
| "learning_rate": 9.989636627005011e-07, |
| "loss": 131.2565, |
| "step": 6630 |
| }, |
| { |
| "epoch": 0.5314052552798518, |
| "grad_norm": 72.3125, |
| "learning_rate": 9.989620995974853e-07, |
| "loss": 133.0545, |
| "step": 6640 |
| }, |
| { |
| "epoch": 0.5322055643992493, |
| "grad_norm": 76.125, |
| "learning_rate": 9.989605364944693e-07, |
| "loss": 131.2922, |
| "step": 6650 |
| }, |
| { |
| "epoch": 0.5330058735186466, |
| "grad_norm": 70.8125, |
| "learning_rate": 9.989589733914536e-07, |
| "loss": 133.2325, |
| "step": 6660 |
| }, |
| { |
| "epoch": 0.5338061826380439, |
| "grad_norm": 83.5625, |
| "learning_rate": 9.989574102884378e-07, |
| "loss": 133.1627, |
| "step": 6670 |
| }, |
| { |
| "epoch": 0.5346064917574413, |
| "grad_norm": 73.4375, |
| "learning_rate": 9.98955847185422e-07, |
| "loss": 132.0812, |
| "step": 6680 |
| }, |
| { |
| "epoch": 0.5354068008768387, |
| "grad_norm": 84.8125, |
| "learning_rate": 9.989542840824062e-07, |
| "loss": 131.5462, |
| "step": 6690 |
| }, |
| { |
| "epoch": 0.536207109996236, |
| "grad_norm": 75.0, |
| "learning_rate": 9.989527209793904e-07, |
| "loss": 132.3298, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.5370074191156334, |
| "grad_norm": 82.0625, |
| "learning_rate": 9.989511578763744e-07, |
| "loss": 133.5979, |
| "step": 6710 |
| }, |
| { |
| "epoch": 0.5378077282350308, |
| "grad_norm": 73.8125, |
| "learning_rate": 9.989495947733586e-07, |
| "loss": 130.3924, |
| "step": 6720 |
| }, |
| { |
| "epoch": 0.5386080373544282, |
| "grad_norm": 85.1875, |
| "learning_rate": 9.989480316703429e-07, |
| "loss": 130.9538, |
| "step": 6730 |
| }, |
| { |
| "epoch": 0.5394083464738255, |
| "grad_norm": 77.4375, |
| "learning_rate": 9.989464685673269e-07, |
| "loss": 132.9888, |
| "step": 6740 |
| }, |
| { |
| "epoch": 0.5402086555932228, |
| "grad_norm": 78.75, |
| "learning_rate": 9.98944905464311e-07, |
| "loss": 132.472, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.5410089647126203, |
| "grad_norm": 73.6875, |
| "learning_rate": 9.989433423612953e-07, |
| "loss": 132.7828, |
| "step": 6760 |
| }, |
| { |
| "epoch": 0.5418092738320176, |
| "grad_norm": 74.9375, |
| "learning_rate": 9.989417792582795e-07, |
| "loss": 131.1772, |
| "step": 6770 |
| }, |
| { |
| "epoch": 0.542609582951415, |
| "grad_norm": 73.5625, |
| "learning_rate": 9.989402161552635e-07, |
| "loss": 132.5478, |
| "step": 6780 |
| }, |
| { |
| "epoch": 0.5434098920708124, |
| "grad_norm": 79.5, |
| "learning_rate": 9.989386530522478e-07, |
| "loss": 130.6843, |
| "step": 6790 |
| }, |
| { |
| "epoch": 0.5442102011902097, |
| "grad_norm": 73.5625, |
| "learning_rate": 9.98937089949232e-07, |
| "loss": 131.9659, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.5450105103096071, |
| "grad_norm": 73.3125, |
| "learning_rate": 9.98935526846216e-07, |
| "loss": 131.0417, |
| "step": 6810 |
| }, |
| { |
| "epoch": 0.5458108194290044, |
| "grad_norm": 74.3125, |
| "learning_rate": 9.989339637432002e-07, |
| "loss": 133.0612, |
| "step": 6820 |
| }, |
| { |
| "epoch": 0.5466111285484018, |
| "grad_norm": 75.25, |
| "learning_rate": 9.989324006401844e-07, |
| "loss": 132.5644, |
| "step": 6830 |
| }, |
| { |
| "epoch": 0.5474114376677992, |
| "grad_norm": 77.75, |
| "learning_rate": 9.989308375371686e-07, |
| "loss": 132.1365, |
| "step": 6840 |
| }, |
| { |
| "epoch": 0.5482117467871965, |
| "grad_norm": 77.1875, |
| "learning_rate": 9.989292744341529e-07, |
| "loss": 132.7418, |
| "step": 6850 |
| }, |
| { |
| "epoch": 0.549012055906594, |
| "grad_norm": 79.125, |
| "learning_rate": 9.989277113311369e-07, |
| "loss": 131.9871, |
| "step": 6860 |
| }, |
| { |
| "epoch": 0.5498123650259913, |
| "grad_norm": 75.0, |
| "learning_rate": 9.98926148228121e-07, |
| "loss": 131.313, |
| "step": 6870 |
| }, |
| { |
| "epoch": 0.5506126741453886, |
| "grad_norm": 74.6875, |
| "learning_rate": 9.989245851251053e-07, |
| "loss": 132.0703, |
| "step": 6880 |
| }, |
| { |
| "epoch": 0.551412983264786, |
| "grad_norm": 73.5625, |
| "learning_rate": 9.989230220220895e-07, |
| "loss": 130.9999, |
| "step": 6890 |
| }, |
| { |
| "epoch": 0.5522132923841834, |
| "grad_norm": 75.0625, |
| "learning_rate": 9.989214589190735e-07, |
| "loss": 132.2652, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.5530136015035808, |
| "grad_norm": 74.125, |
| "learning_rate": 9.989198958160577e-07, |
| "loss": 131.5572, |
| "step": 6910 |
| }, |
| { |
| "epoch": 0.5538139106229781, |
| "grad_norm": 75.8125, |
| "learning_rate": 9.98918332713042e-07, |
| "loss": 130.6713, |
| "step": 6920 |
| }, |
| { |
| "epoch": 0.5546142197423755, |
| "grad_norm": 75.0625, |
| "learning_rate": 9.98916769610026e-07, |
| "loss": 132.5074, |
| "step": 6930 |
| }, |
| { |
| "epoch": 0.5554145288617729, |
| "grad_norm": 83.5, |
| "learning_rate": 9.989152065070102e-07, |
| "loss": 132.7792, |
| "step": 6940 |
| }, |
| { |
| "epoch": 0.5562148379811702, |
| "grad_norm": 78.6875, |
| "learning_rate": 9.989136434039944e-07, |
| "loss": 132.4211, |
| "step": 6950 |
| }, |
| { |
| "epoch": 0.5570151471005675, |
| "grad_norm": 84.125, |
| "learning_rate": 9.989120803009786e-07, |
| "loss": 133.1216, |
| "step": 6960 |
| }, |
| { |
| "epoch": 0.557815456219965, |
| "grad_norm": 80.8125, |
| "learning_rate": 9.989105171979626e-07, |
| "loss": 131.8983, |
| "step": 6970 |
| }, |
| { |
| "epoch": 0.5586157653393623, |
| "grad_norm": 77.875, |
| "learning_rate": 9.989089540949468e-07, |
| "loss": 131.918, |
| "step": 6980 |
| }, |
| { |
| "epoch": 0.5594160744587597, |
| "grad_norm": 73.1875, |
| "learning_rate": 9.98907390991931e-07, |
| "loss": 131.1051, |
| "step": 6990 |
| }, |
| { |
| "epoch": 0.5602163835781571, |
| "grad_norm": 78.0625, |
| "learning_rate": 9.989058278889153e-07, |
| "loss": 132.776, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.5610166926975544, |
| "grad_norm": 82.875, |
| "learning_rate": 9.989042647858995e-07, |
| "loss": 131.2556, |
| "step": 7010 |
| }, |
| { |
| "epoch": 0.5618170018169518, |
| "grad_norm": 79.8125, |
| "learning_rate": 9.989027016828835e-07, |
| "loss": 131.6359, |
| "step": 7020 |
| }, |
| { |
| "epoch": 0.5626173109363491, |
| "grad_norm": 78.875, |
| "learning_rate": 9.989011385798677e-07, |
| "loss": 132.1506, |
| "step": 7030 |
| }, |
| { |
| "epoch": 0.5634176200557466, |
| "grad_norm": 74.5625, |
| "learning_rate": 9.98899575476852e-07, |
| "loss": 131.6759, |
| "step": 7040 |
| }, |
| { |
| "epoch": 0.5642179291751439, |
| "grad_norm": 76.5, |
| "learning_rate": 9.988980123738362e-07, |
| "loss": 131.1462, |
| "step": 7050 |
| }, |
| { |
| "epoch": 0.5650182382945412, |
| "grad_norm": 82.625, |
| "learning_rate": 9.988964492708202e-07, |
| "loss": 130.7009, |
| "step": 7060 |
| }, |
| { |
| "epoch": 0.5658185474139387, |
| "grad_norm": 84.875, |
| "learning_rate": 9.988948861678044e-07, |
| "loss": 134.6625, |
| "step": 7070 |
| }, |
| { |
| "epoch": 0.566618856533336, |
| "grad_norm": 77.1875, |
| "learning_rate": 9.988933230647886e-07, |
| "loss": 132.1597, |
| "step": 7080 |
| }, |
| { |
| "epoch": 0.5674191656527333, |
| "grad_norm": 75.6875, |
| "learning_rate": 9.988917599617726e-07, |
| "loss": 131.5931, |
| "step": 7090 |
| }, |
| { |
| "epoch": 0.5682194747721308, |
| "grad_norm": 73.6875, |
| "learning_rate": 9.988901968587568e-07, |
| "loss": 131.8727, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.5690197838915281, |
| "grad_norm": 73.25, |
| "learning_rate": 9.98888633755741e-07, |
| "loss": 132.6044, |
| "step": 7110 |
| }, |
| { |
| "epoch": 0.5698200930109255, |
| "grad_norm": 85.75, |
| "learning_rate": 9.98887070652725e-07, |
| "loss": 131.7462, |
| "step": 7120 |
| }, |
| { |
| "epoch": 0.5706204021303228, |
| "grad_norm": 74.0625, |
| "learning_rate": 9.988855075497093e-07, |
| "loss": 132.0296, |
| "step": 7130 |
| }, |
| { |
| "epoch": 0.5714207112497202, |
| "grad_norm": 80.6875, |
| "learning_rate": 9.988839444466935e-07, |
| "loss": 134.3069, |
| "step": 7140 |
| }, |
| { |
| "epoch": 0.5722210203691176, |
| "grad_norm": 76.0, |
| "learning_rate": 9.988823813436777e-07, |
| "loss": 131.401, |
| "step": 7150 |
| }, |
| { |
| "epoch": 0.5730213294885149, |
| "grad_norm": 81.5625, |
| "learning_rate": 9.98880818240662e-07, |
| "loss": 132.4782, |
| "step": 7160 |
| }, |
| { |
| "epoch": 0.5738216386079124, |
| "grad_norm": 76.4375, |
| "learning_rate": 9.988792551376461e-07, |
| "loss": 132.7547, |
| "step": 7170 |
| }, |
| { |
| "epoch": 0.5746219477273097, |
| "grad_norm": 72.9375, |
| "learning_rate": 9.988776920346301e-07, |
| "loss": 131.6157, |
| "step": 7180 |
| }, |
| { |
| "epoch": 0.575422256846707, |
| "grad_norm": 75.5625, |
| "learning_rate": 9.988761289316144e-07, |
| "loss": 132.9658, |
| "step": 7190 |
| }, |
| { |
| "epoch": 0.5762225659661044, |
| "grad_norm": 73.3125, |
| "learning_rate": 9.988745658285986e-07, |
| "loss": 131.4694, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.5770228750855018, |
| "grad_norm": 76.6875, |
| "learning_rate": 9.988730027255828e-07, |
| "loss": 132.546, |
| "step": 7210 |
| }, |
| { |
| "epoch": 0.5778231842048991, |
| "grad_norm": 77.6875, |
| "learning_rate": 9.988714396225668e-07, |
| "loss": 131.9255, |
| "step": 7220 |
| }, |
| { |
| "epoch": 0.5786234933242965, |
| "grad_norm": 82.0625, |
| "learning_rate": 9.98869876519551e-07, |
| "loss": 133.1269, |
| "step": 7230 |
| }, |
| { |
| "epoch": 0.5794238024436938, |
| "grad_norm": 75.5625, |
| "learning_rate": 9.988683134165352e-07, |
| "loss": 131.8465, |
| "step": 7240 |
| }, |
| { |
| "epoch": 0.5802241115630913, |
| "grad_norm": 78.125, |
| "learning_rate": 9.988667503135193e-07, |
| "loss": 131.8865, |
| "step": 7250 |
| }, |
| { |
| "epoch": 0.5810244206824886, |
| "grad_norm": 77.625, |
| "learning_rate": 9.988651872105035e-07, |
| "loss": 131.2077, |
| "step": 7260 |
| }, |
| { |
| "epoch": 0.5818247298018859, |
| "grad_norm": 73.0, |
| "learning_rate": 9.988636241074877e-07, |
| "loss": 132.2729, |
| "step": 7270 |
| }, |
| { |
| "epoch": 0.5826250389212834, |
| "grad_norm": 76.125, |
| "learning_rate": 9.988620610044717e-07, |
| "loss": 132.5173, |
| "step": 7280 |
| }, |
| { |
| "epoch": 0.5834253480406807, |
| "grad_norm": 72.875, |
| "learning_rate": 9.98860497901456e-07, |
| "loss": 130.8854, |
| "step": 7290 |
| }, |
| { |
| "epoch": 0.5842256571600781, |
| "grad_norm": 76.5625, |
| "learning_rate": 9.988589347984401e-07, |
| "loss": 130.3204, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.5850259662794755, |
| "grad_norm": 79.375, |
| "learning_rate": 9.988573716954244e-07, |
| "loss": 131.2047, |
| "step": 7310 |
| }, |
| { |
| "epoch": 0.5858262753988728, |
| "grad_norm": 76.6875, |
| "learning_rate": 9.988558085924086e-07, |
| "loss": 132.3948, |
| "step": 7320 |
| }, |
| { |
| "epoch": 0.5866265845182702, |
| "grad_norm": 75.0625, |
| "learning_rate": 9.988542454893926e-07, |
| "loss": 131.0941, |
| "step": 7330 |
| }, |
| { |
| "epoch": 0.5874268936376675, |
| "grad_norm": 80.375, |
| "learning_rate": 9.988526823863768e-07, |
| "loss": 132.392, |
| "step": 7340 |
| }, |
| { |
| "epoch": 0.588227202757065, |
| "grad_norm": 75.3125, |
| "learning_rate": 9.98851119283361e-07, |
| "loss": 132.1595, |
| "step": 7350 |
| }, |
| { |
| "epoch": 0.5890275118764623, |
| "grad_norm": 80.5, |
| "learning_rate": 9.988495561803452e-07, |
| "loss": 131.9138, |
| "step": 7360 |
| }, |
| { |
| "epoch": 0.5898278209958596, |
| "grad_norm": 74.5625, |
| "learning_rate": 9.988479930773294e-07, |
| "loss": 129.6595, |
| "step": 7370 |
| }, |
| { |
| "epoch": 0.590628130115257, |
| "grad_norm": 74.5625, |
| "learning_rate": 9.988464299743135e-07, |
| "loss": 130.2861, |
| "step": 7380 |
| }, |
| { |
| "epoch": 0.5914284392346544, |
| "grad_norm": 71.625, |
| "learning_rate": 9.988448668712977e-07, |
| "loss": 132.6865, |
| "step": 7390 |
| }, |
| { |
| "epoch": 0.5922287483540517, |
| "grad_norm": 80.25, |
| "learning_rate": 9.988433037682819e-07, |
| "loss": 132.5223, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.5930290574734491, |
| "grad_norm": 70.5, |
| "learning_rate": 9.98841740665266e-07, |
| "loss": 132.5188, |
| "step": 7410 |
| }, |
| { |
| "epoch": 0.5938293665928465, |
| "grad_norm": 71.4375, |
| "learning_rate": 9.988401775622501e-07, |
| "loss": 132.2418, |
| "step": 7420 |
| }, |
| { |
| "epoch": 0.5946296757122439, |
| "grad_norm": 75.25, |
| "learning_rate": 9.988386144592343e-07, |
| "loss": 130.8538, |
| "step": 7430 |
| }, |
| { |
| "epoch": 0.5954299848316412, |
| "grad_norm": 75.8125, |
| "learning_rate": 9.988370513562183e-07, |
| "loss": 131.213, |
| "step": 7440 |
| }, |
| { |
| "epoch": 0.5962302939510385, |
| "grad_norm": 74.0, |
| "learning_rate": 9.988354882532026e-07, |
| "loss": 132.8653, |
| "step": 7450 |
| }, |
| { |
| "epoch": 0.597030603070436, |
| "grad_norm": 74.25, |
| "learning_rate": 9.988339251501868e-07, |
| "loss": 131.1389, |
| "step": 7460 |
| }, |
| { |
| "epoch": 0.5978309121898333, |
| "grad_norm": 79.0625, |
| "learning_rate": 9.98832362047171e-07, |
| "loss": 131.8387, |
| "step": 7470 |
| }, |
| { |
| "epoch": 0.5986312213092307, |
| "grad_norm": 92.9375, |
| "learning_rate": 9.988307989441552e-07, |
| "loss": 132.9031, |
| "step": 7480 |
| }, |
| { |
| "epoch": 0.5994315304286281, |
| "grad_norm": 75.75, |
| "learning_rate": 9.988292358411392e-07, |
| "loss": 130.4166, |
| "step": 7490 |
| }, |
| { |
| "epoch": 0.6002318395480254, |
| "grad_norm": 75.25, |
| "learning_rate": 9.988276727381234e-07, |
| "loss": 131.6747, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.6002318395480254, |
| "eval_loss": 2.0600602626800537, |
| "eval_runtime": 418.6157, |
| "eval_samples_per_second": 1567.447, |
| "eval_steps_per_second": 48.983, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.6010321486674228, |
| "grad_norm": 79.875, |
| "learning_rate": 9.988261096351077e-07, |
| "loss": 131.0494, |
| "step": 7510 |
| }, |
| { |
| "epoch": 0.6018324577868202, |
| "grad_norm": 72.125, |
| "learning_rate": 9.988245465320919e-07, |
| "loss": 131.0795, |
| "step": 7520 |
| }, |
| { |
| "epoch": 0.6026327669062175, |
| "grad_norm": 73.4375, |
| "learning_rate": 9.98822983429076e-07, |
| "loss": 131.2448, |
| "step": 7530 |
| }, |
| { |
| "epoch": 0.6034330760256149, |
| "grad_norm": 78.125, |
| "learning_rate": 9.9882142032606e-07, |
| "loss": 132.0931, |
| "step": 7540 |
| }, |
| { |
| "epoch": 0.6042333851450122, |
| "grad_norm": 71.25, |
| "learning_rate": 9.988198572230443e-07, |
| "loss": 131.5498, |
| "step": 7550 |
| }, |
| { |
| "epoch": 0.6050336942644097, |
| "grad_norm": 77.5625, |
| "learning_rate": 9.988182941200285e-07, |
| "loss": 132.7485, |
| "step": 7560 |
| }, |
| { |
| "epoch": 0.605834003383807, |
| "grad_norm": 77.375, |
| "learning_rate": 9.988167310170125e-07, |
| "loss": 132.6295, |
| "step": 7570 |
| }, |
| { |
| "epoch": 0.6066343125032043, |
| "grad_norm": 78.1875, |
| "learning_rate": 9.988151679139968e-07, |
| "loss": 130.9077, |
| "step": 7580 |
| }, |
| { |
| "epoch": 0.6074346216226018, |
| "grad_norm": 79.9375, |
| "learning_rate": 9.98813604810981e-07, |
| "loss": 133.7221, |
| "step": 7590 |
| }, |
| { |
| "epoch": 0.6082349307419991, |
| "grad_norm": 77.9375, |
| "learning_rate": 9.98812041707965e-07, |
| "loss": 131.693, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.6090352398613965, |
| "grad_norm": 79.875, |
| "learning_rate": 9.988104786049492e-07, |
| "loss": 131.3416, |
| "step": 7610 |
| }, |
| { |
| "epoch": 0.6098355489807938, |
| "grad_norm": 73.0, |
| "learning_rate": 9.988089155019334e-07, |
| "loss": 132.1983, |
| "step": 7620 |
| }, |
| { |
| "epoch": 0.6106358581001912, |
| "grad_norm": 77.375, |
| "learning_rate": 9.988073523989176e-07, |
| "loss": 132.4808, |
| "step": 7630 |
| }, |
| { |
| "epoch": 0.6114361672195886, |
| "grad_norm": 76.5625, |
| "learning_rate": 9.988057892959019e-07, |
| "loss": 131.0641, |
| "step": 7640 |
| }, |
| { |
| "epoch": 0.6122364763389859, |
| "grad_norm": 84.75, |
| "learning_rate": 9.988042261928859e-07, |
| "loss": 132.1883, |
| "step": 7650 |
| }, |
| { |
| "epoch": 0.6130367854583832, |
| "grad_norm": 71.75, |
| "learning_rate": 9.9880266308987e-07, |
| "loss": 132.5537, |
| "step": 7660 |
| }, |
| { |
| "epoch": 0.6138370945777807, |
| "grad_norm": 83.5625, |
| "learning_rate": 9.988010999868543e-07, |
| "loss": 131.5181, |
| "step": 7670 |
| }, |
| { |
| "epoch": 0.614637403697178, |
| "grad_norm": 79.9375, |
| "learning_rate": 9.987995368838385e-07, |
| "loss": 132.7484, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.6154377128165754, |
| "grad_norm": 76.25, |
| "learning_rate": 9.987979737808225e-07, |
| "loss": 132.5646, |
| "step": 7690 |
| }, |
| { |
| "epoch": 0.6162380219359728, |
| "grad_norm": 77.75, |
| "learning_rate": 9.987964106778067e-07, |
| "loss": 132.2048, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.6170383310553701, |
| "grad_norm": 74.125, |
| "learning_rate": 9.98794847574791e-07, |
| "loss": 132.1626, |
| "step": 7710 |
| }, |
| { |
| "epoch": 0.6178386401747675, |
| "grad_norm": 79.0, |
| "learning_rate": 9.987932844717752e-07, |
| "loss": 131.6138, |
| "step": 7720 |
| }, |
| { |
| "epoch": 0.6186389492941649, |
| "grad_norm": 76.9375, |
| "learning_rate": 9.987917213687592e-07, |
| "loss": 130.9984, |
| "step": 7730 |
| }, |
| { |
| "epoch": 0.6194392584135623, |
| "grad_norm": 79.875, |
| "learning_rate": 9.987901582657434e-07, |
| "loss": 132.2128, |
| "step": 7740 |
| }, |
| { |
| "epoch": 0.6202395675329596, |
| "grad_norm": 75.5625, |
| "learning_rate": 9.987885951627276e-07, |
| "loss": 132.1526, |
| "step": 7750 |
| }, |
| { |
| "epoch": 0.6210398766523569, |
| "grad_norm": 71.75, |
| "learning_rate": 9.987870320597116e-07, |
| "loss": 130.6957, |
| "step": 7760 |
| }, |
| { |
| "epoch": 0.6218401857717544, |
| "grad_norm": 80.8125, |
| "learning_rate": 9.987854689566959e-07, |
| "loss": 131.2453, |
| "step": 7770 |
| }, |
| { |
| "epoch": 0.6226404948911517, |
| "grad_norm": 82.4375, |
| "learning_rate": 9.9878390585368e-07, |
| "loss": 130.4836, |
| "step": 7780 |
| }, |
| { |
| "epoch": 0.623440804010549, |
| "grad_norm": 72.0625, |
| "learning_rate": 9.987823427506643e-07, |
| "loss": 131.348, |
| "step": 7790 |
| }, |
| { |
| "epoch": 0.6242411131299465, |
| "grad_norm": 74.125, |
| "learning_rate": 9.987807796476483e-07, |
| "loss": 132.7037, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.6250414222493438, |
| "grad_norm": 71.625, |
| "learning_rate": 9.987792165446325e-07, |
| "loss": 132.4081, |
| "step": 7810 |
| }, |
| { |
| "epoch": 0.6258417313687412, |
| "grad_norm": 75.6875, |
| "learning_rate": 9.987776534416167e-07, |
| "loss": 130.35, |
| "step": 7820 |
| }, |
| { |
| "epoch": 0.6266420404881385, |
| "grad_norm": 72.8125, |
| "learning_rate": 9.98776090338601e-07, |
| "loss": 132.3228, |
| "step": 7830 |
| }, |
| { |
| "epoch": 0.6274423496075359, |
| "grad_norm": 80.6875, |
| "learning_rate": 9.987745272355852e-07, |
| "loss": 131.8622, |
| "step": 7840 |
| }, |
| { |
| "epoch": 0.6282426587269333, |
| "grad_norm": 81.4375, |
| "learning_rate": 9.987729641325692e-07, |
| "loss": 132.0134, |
| "step": 7850 |
| }, |
| { |
| "epoch": 0.6290429678463306, |
| "grad_norm": 78.9375, |
| "learning_rate": 9.987714010295534e-07, |
| "loss": 132.4322, |
| "step": 7860 |
| }, |
| { |
| "epoch": 0.6298432769657281, |
| "grad_norm": 76.5625, |
| "learning_rate": 9.987698379265376e-07, |
| "loss": 132.5075, |
| "step": 7870 |
| }, |
| { |
| "epoch": 0.6306435860851254, |
| "grad_norm": 77.75, |
| "learning_rate": 9.987682748235216e-07, |
| "loss": 131.9704, |
| "step": 7880 |
| }, |
| { |
| "epoch": 0.6314438952045227, |
| "grad_norm": 78.375, |
| "learning_rate": 9.987667117205058e-07, |
| "loss": 131.3642, |
| "step": 7890 |
| }, |
| { |
| "epoch": 0.6322442043239201, |
| "grad_norm": 75.9375, |
| "learning_rate": 9.9876514861749e-07, |
| "loss": 132.9338, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.6330445134433175, |
| "grad_norm": 73.25, |
| "learning_rate": 9.987635855144743e-07, |
| "loss": 131.7696, |
| "step": 7910 |
| }, |
| { |
| "epoch": 0.6338448225627148, |
| "grad_norm": 79.4375, |
| "learning_rate": 9.987620224114583e-07, |
| "loss": 132.7947, |
| "step": 7920 |
| }, |
| { |
| "epoch": 0.6346451316821122, |
| "grad_norm": 75.3125, |
| "learning_rate": 9.987604593084425e-07, |
| "loss": 131.7236, |
| "step": 7930 |
| }, |
| { |
| "epoch": 0.6354454408015096, |
| "grad_norm": 74.1875, |
| "learning_rate": 9.987588962054267e-07, |
| "loss": 131.487, |
| "step": 7940 |
| }, |
| { |
| "epoch": 0.636245749920907, |
| "grad_norm": 76.3125, |
| "learning_rate": 9.98757333102411e-07, |
| "loss": 132.3433, |
| "step": 7950 |
| }, |
| { |
| "epoch": 0.6370460590403043, |
| "grad_norm": 76.4375, |
| "learning_rate": 9.98755769999395e-07, |
| "loss": 132.2295, |
| "step": 7960 |
| }, |
| { |
| "epoch": 0.6378463681597016, |
| "grad_norm": 73.375, |
| "learning_rate": 9.987542068963792e-07, |
| "loss": 130.3616, |
| "step": 7970 |
| }, |
| { |
| "epoch": 0.6386466772790991, |
| "grad_norm": 71.75, |
| "learning_rate": 9.987526437933634e-07, |
| "loss": 133.0935, |
| "step": 7980 |
| }, |
| { |
| "epoch": 0.6394469863984964, |
| "grad_norm": 74.1875, |
| "learning_rate": 9.987510806903476e-07, |
| "loss": 131.3436, |
| "step": 7990 |
| }, |
| { |
| "epoch": 0.6402472955178938, |
| "grad_norm": 73.1875, |
| "learning_rate": 9.987495175873318e-07, |
| "loss": 132.8394, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.6410476046372912, |
| "grad_norm": 77.4375, |
| "learning_rate": 9.987479544843158e-07, |
| "loss": 131.8165, |
| "step": 8010 |
| }, |
| { |
| "epoch": 0.6418479137566885, |
| "grad_norm": 76.5, |
| "learning_rate": 9.987463913813e-07, |
| "loss": 131.6329, |
| "step": 8020 |
| }, |
| { |
| "epoch": 0.6426482228760859, |
| "grad_norm": 74.3125, |
| "learning_rate": 9.987448282782843e-07, |
| "loss": 132.5564, |
| "step": 8030 |
| }, |
| { |
| "epoch": 0.6434485319954832, |
| "grad_norm": 80.25, |
| "learning_rate": 9.987432651752683e-07, |
| "loss": 131.5118, |
| "step": 8040 |
| }, |
| { |
| "epoch": 0.6442488411148807, |
| "grad_norm": 81.4375, |
| "learning_rate": 9.987417020722525e-07, |
| "loss": 130.5542, |
| "step": 8050 |
| }, |
| { |
| "epoch": 0.645049150234278, |
| "grad_norm": 75.3125, |
| "learning_rate": 9.987401389692367e-07, |
| "loss": 132.7824, |
| "step": 8060 |
| }, |
| { |
| "epoch": 0.6458494593536753, |
| "grad_norm": 75.6875, |
| "learning_rate": 9.98738575866221e-07, |
| "loss": 131.6364, |
| "step": 8070 |
| }, |
| { |
| "epoch": 0.6466497684730728, |
| "grad_norm": 69.125, |
| "learning_rate": 9.98737012763205e-07, |
| "loss": 131.1746, |
| "step": 8080 |
| }, |
| { |
| "epoch": 0.6474500775924701, |
| "grad_norm": 79.6875, |
| "learning_rate": 9.987354496601891e-07, |
| "loss": 131.0759, |
| "step": 8090 |
| }, |
| { |
| "epoch": 0.6482503867118674, |
| "grad_norm": 74.0625, |
| "learning_rate": 9.987338865571734e-07, |
| "loss": 131.1551, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.6490506958312648, |
| "grad_norm": 81.8125, |
| "learning_rate": 9.987323234541576e-07, |
| "loss": 130.3787, |
| "step": 8110 |
| }, |
| { |
| "epoch": 0.6498510049506622, |
| "grad_norm": 76.125, |
| "learning_rate": 9.987307603511416e-07, |
| "loss": 132.5409, |
| "step": 8120 |
| }, |
| { |
| "epoch": 0.6506513140700596, |
| "grad_norm": 74.25, |
| "learning_rate": 9.987291972481258e-07, |
| "loss": 132.2028, |
| "step": 8130 |
| }, |
| { |
| "epoch": 0.6514516231894569, |
| "grad_norm": 79.9375, |
| "learning_rate": 9.9872763414511e-07, |
| "loss": 130.8431, |
| "step": 8140 |
| }, |
| { |
| "epoch": 0.6522519323088543, |
| "grad_norm": 71.0, |
| "learning_rate": 9.987260710420942e-07, |
| "loss": 132.5052, |
| "step": 8150 |
| }, |
| { |
| "epoch": 0.6530522414282517, |
| "grad_norm": 88.9375, |
| "learning_rate": 9.987245079390785e-07, |
| "loss": 131.6914, |
| "step": 8160 |
| }, |
| { |
| "epoch": 0.653852550547649, |
| "grad_norm": 71.75, |
| "learning_rate": 9.987229448360625e-07, |
| "loss": 131.5299, |
| "step": 8170 |
| }, |
| { |
| "epoch": 0.6546528596670464, |
| "grad_norm": 75.1875, |
| "learning_rate": 9.987213817330467e-07, |
| "loss": 131.2931, |
| "step": 8180 |
| }, |
| { |
| "epoch": 0.6554531687864438, |
| "grad_norm": 76.625, |
| "learning_rate": 9.98719818630031e-07, |
| "loss": 132.1339, |
| "step": 8190 |
| }, |
| { |
| "epoch": 0.6562534779058411, |
| "grad_norm": 76.5625, |
| "learning_rate": 9.98718255527015e-07, |
| "loss": 131.0958, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.6570537870252385, |
| "grad_norm": 72.6875, |
| "learning_rate": 9.987166924239991e-07, |
| "loss": 131.8033, |
| "step": 8210 |
| }, |
| { |
| "epoch": 0.6578540961446359, |
| "grad_norm": 77.4375, |
| "learning_rate": 9.987151293209833e-07, |
| "loss": 132.3042, |
| "step": 8220 |
| }, |
| { |
| "epoch": 0.6586544052640332, |
| "grad_norm": 72.0625, |
| "learning_rate": 9.987135662179674e-07, |
| "loss": 131.9262, |
| "step": 8230 |
| }, |
| { |
| "epoch": 0.6594547143834306, |
| "grad_norm": 81.125, |
| "learning_rate": 9.987120031149516e-07, |
| "loss": 131.4936, |
| "step": 8240 |
| }, |
| { |
| "epoch": 0.6602550235028279, |
| "grad_norm": 72.125, |
| "learning_rate": 9.987104400119358e-07, |
| "loss": 131.1532, |
| "step": 8250 |
| }, |
| { |
| "epoch": 0.6610553326222254, |
| "grad_norm": 76.5, |
| "learning_rate": 9.9870887690892e-07, |
| "loss": 132.0334, |
| "step": 8260 |
| }, |
| { |
| "epoch": 0.6618556417416227, |
| "grad_norm": 70.9375, |
| "learning_rate": 9.98707313805904e-07, |
| "loss": 131.7788, |
| "step": 8270 |
| }, |
| { |
| "epoch": 0.66265595086102, |
| "grad_norm": 78.5, |
| "learning_rate": 9.987057507028882e-07, |
| "loss": 131.3623, |
| "step": 8280 |
| }, |
| { |
| "epoch": 0.6634562599804175, |
| "grad_norm": 77.75, |
| "learning_rate": 9.987041875998724e-07, |
| "loss": 131.0159, |
| "step": 8290 |
| }, |
| { |
| "epoch": 0.6642565690998148, |
| "grad_norm": 75.0, |
| "learning_rate": 9.987026244968567e-07, |
| "loss": 131.802, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.6650568782192122, |
| "grad_norm": 74.3125, |
| "learning_rate": 9.987010613938409e-07, |
| "loss": 130.4846, |
| "step": 8310 |
| }, |
| { |
| "epoch": 0.6658571873386095, |
| "grad_norm": 78.0625, |
| "learning_rate": 9.98699498290825e-07, |
| "loss": 130.5051, |
| "step": 8320 |
| }, |
| { |
| "epoch": 0.6666574964580069, |
| "grad_norm": 71.625, |
| "learning_rate": 9.986979351878091e-07, |
| "loss": 131.5967, |
| "step": 8330 |
| }, |
| { |
| "epoch": 0.6674578055774043, |
| "grad_norm": 71.5, |
| "learning_rate": 9.986963720847933e-07, |
| "loss": 132.5512, |
| "step": 8340 |
| }, |
| { |
| "epoch": 0.6682581146968016, |
| "grad_norm": 78.375, |
| "learning_rate": 9.986948089817775e-07, |
| "loss": 131.1753, |
| "step": 8350 |
| }, |
| { |
| "epoch": 0.669058423816199, |
| "grad_norm": 74.1875, |
| "learning_rate": 9.986932458787616e-07, |
| "loss": 130.4604, |
| "step": 8360 |
| }, |
| { |
| "epoch": 0.6698587329355964, |
| "grad_norm": 73.4375, |
| "learning_rate": 9.986916827757458e-07, |
| "loss": 132.9316, |
| "step": 8370 |
| }, |
| { |
| "epoch": 0.6706590420549937, |
| "grad_norm": 74.875, |
| "learning_rate": 9.9869011967273e-07, |
| "loss": 131.4552, |
| "step": 8380 |
| }, |
| { |
| "epoch": 0.6714593511743912, |
| "grad_norm": 78.0, |
| "learning_rate": 9.98688556569714e-07, |
| "loss": 131.1365, |
| "step": 8390 |
| }, |
| { |
| "epoch": 0.6722596602937885, |
| "grad_norm": 72.8125, |
| "learning_rate": 9.986869934666982e-07, |
| "loss": 132.5655, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.6730599694131858, |
| "grad_norm": 75.3125, |
| "learning_rate": 9.986854303636824e-07, |
| "loss": 132.0653, |
| "step": 8410 |
| }, |
| { |
| "epoch": 0.6738602785325832, |
| "grad_norm": 80.0, |
| "learning_rate": 9.986838672606667e-07, |
| "loss": 130.1704, |
| "step": 8420 |
| }, |
| { |
| "epoch": 0.6746605876519806, |
| "grad_norm": 79.9375, |
| "learning_rate": 9.986823041576507e-07, |
| "loss": 131.86, |
| "step": 8430 |
| }, |
| { |
| "epoch": 0.675460896771378, |
| "grad_norm": 77.5, |
| "learning_rate": 9.986807410546349e-07, |
| "loss": 132.2835, |
| "step": 8440 |
| }, |
| { |
| "epoch": 0.6762612058907753, |
| "grad_norm": 76.125, |
| "learning_rate": 9.98679177951619e-07, |
| "loss": 133.6062, |
| "step": 8450 |
| }, |
| { |
| "epoch": 0.6770615150101726, |
| "grad_norm": 74.4375, |
| "learning_rate": 9.986776148486033e-07, |
| "loss": 133.1529, |
| "step": 8460 |
| }, |
| { |
| "epoch": 0.6778618241295701, |
| "grad_norm": 77.6875, |
| "learning_rate": 9.986760517455875e-07, |
| "loss": 130.7149, |
| "step": 8470 |
| }, |
| { |
| "epoch": 0.6786621332489674, |
| "grad_norm": 76.6875, |
| "learning_rate": 9.986744886425715e-07, |
| "loss": 131.2965, |
| "step": 8480 |
| }, |
| { |
| "epoch": 0.6794624423683647, |
| "grad_norm": 78.0625, |
| "learning_rate": 9.986729255395558e-07, |
| "loss": 131.7146, |
| "step": 8490 |
| }, |
| { |
| "epoch": 0.6802627514877622, |
| "grad_norm": 77.1875, |
| "learning_rate": 9.9867136243654e-07, |
| "loss": 131.4988, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.6810630606071595, |
| "grad_norm": 75.625, |
| "learning_rate": 9.986697993335242e-07, |
| "loss": 131.0816, |
| "step": 8510 |
| }, |
| { |
| "epoch": 0.6818633697265569, |
| "grad_norm": 73.625, |
| "learning_rate": 9.986682362305082e-07, |
| "loss": 131.8303, |
| "step": 8520 |
| }, |
| { |
| "epoch": 0.6826636788459542, |
| "grad_norm": 77.625, |
| "learning_rate": 9.986666731274924e-07, |
| "loss": 130.0348, |
| "step": 8530 |
| }, |
| { |
| "epoch": 0.6834639879653516, |
| "grad_norm": 79.0, |
| "learning_rate": 9.986651100244766e-07, |
| "loss": 132.8476, |
| "step": 8540 |
| }, |
| { |
| "epoch": 0.684264297084749, |
| "grad_norm": 74.625, |
| "learning_rate": 9.986635469214606e-07, |
| "loss": 131.6226, |
| "step": 8550 |
| }, |
| { |
| "epoch": 0.6850646062041463, |
| "grad_norm": 79.0, |
| "learning_rate": 9.986619838184449e-07, |
| "loss": 131.3241, |
| "step": 8560 |
| }, |
| { |
| "epoch": 0.6858649153235438, |
| "grad_norm": 76.9375, |
| "learning_rate": 9.98660420715429e-07, |
| "loss": 131.6735, |
| "step": 8570 |
| }, |
| { |
| "epoch": 0.6866652244429411, |
| "grad_norm": 70.8125, |
| "learning_rate": 9.986588576124133e-07, |
| "loss": 131.7806, |
| "step": 8580 |
| }, |
| { |
| "epoch": 0.6874655335623384, |
| "grad_norm": 83.25, |
| "learning_rate": 9.986572945093973e-07, |
| "loss": 132.0227, |
| "step": 8590 |
| }, |
| { |
| "epoch": 0.6882658426817359, |
| "grad_norm": 76.5625, |
| "learning_rate": 9.986557314063815e-07, |
| "loss": 131.6524, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.6890661518011332, |
| "grad_norm": 79.875, |
| "learning_rate": 9.986541683033657e-07, |
| "loss": 132.2215, |
| "step": 8610 |
| }, |
| { |
| "epoch": 0.6898664609205305, |
| "grad_norm": 85.125, |
| "learning_rate": 9.9865260520035e-07, |
| "loss": 132.894, |
| "step": 8620 |
| }, |
| { |
| "epoch": 0.6906667700399279, |
| "grad_norm": 78.625, |
| "learning_rate": 9.986510420973342e-07, |
| "loss": 131.7396, |
| "step": 8630 |
| }, |
| { |
| "epoch": 0.6914670791593253, |
| "grad_norm": 84.375, |
| "learning_rate": 9.986494789943182e-07, |
| "loss": 132.6336, |
| "step": 8640 |
| }, |
| { |
| "epoch": 0.6922673882787227, |
| "grad_norm": 80.4375, |
| "learning_rate": 9.986479158913024e-07, |
| "loss": 130.9795, |
| "step": 8650 |
| }, |
| { |
| "epoch": 0.69306769739812, |
| "grad_norm": 73.0625, |
| "learning_rate": 9.986463527882866e-07, |
| "loss": 132.994, |
| "step": 8660 |
| }, |
| { |
| "epoch": 0.6938680065175173, |
| "grad_norm": 77.75, |
| "learning_rate": 9.986447896852708e-07, |
| "loss": 131.1462, |
| "step": 8670 |
| }, |
| { |
| "epoch": 0.6946683156369148, |
| "grad_norm": 73.4375, |
| "learning_rate": 9.986432265822548e-07, |
| "loss": 132.2478, |
| "step": 8680 |
| }, |
| { |
| "epoch": 0.6954686247563121, |
| "grad_norm": 76.125, |
| "learning_rate": 9.98641663479239e-07, |
| "loss": 130.9919, |
| "step": 8690 |
| }, |
| { |
| "epoch": 0.6962689338757095, |
| "grad_norm": 76.0625, |
| "learning_rate": 9.986401003762233e-07, |
| "loss": 130.4246, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.6970692429951069, |
| "grad_norm": 75.25, |
| "learning_rate": 9.986385372732073e-07, |
| "loss": 132.2129, |
| "step": 8710 |
| }, |
| { |
| "epoch": 0.6978695521145042, |
| "grad_norm": 77.8125, |
| "learning_rate": 9.986369741701915e-07, |
| "loss": 132.2469, |
| "step": 8720 |
| }, |
| { |
| "epoch": 0.6986698612339016, |
| "grad_norm": 74.3125, |
| "learning_rate": 9.986354110671757e-07, |
| "loss": 131.4874, |
| "step": 8730 |
| }, |
| { |
| "epoch": 0.699470170353299, |
| "grad_norm": 74.6875, |
| "learning_rate": 9.986338479641597e-07, |
| "loss": 131.288, |
| "step": 8740 |
| }, |
| { |
| "epoch": 0.7002704794726964, |
| "grad_norm": 78.875, |
| "learning_rate": 9.98632284861144e-07, |
| "loss": 130.9051, |
| "step": 8750 |
| }, |
| { |
| "epoch": 0.7010707885920937, |
| "grad_norm": 70.1875, |
| "learning_rate": 9.986307217581282e-07, |
| "loss": 131.5563, |
| "step": 8760 |
| }, |
| { |
| "epoch": 0.701871097711491, |
| "grad_norm": 76.75, |
| "learning_rate": 9.986291586551124e-07, |
| "loss": 132.3225, |
| "step": 8770 |
| }, |
| { |
| "epoch": 0.7026714068308885, |
| "grad_norm": 84.9375, |
| "learning_rate": 9.986275955520966e-07, |
| "loss": 132.3153, |
| "step": 8780 |
| }, |
| { |
| "epoch": 0.7034717159502858, |
| "grad_norm": 79.0, |
| "learning_rate": 9.986260324490808e-07, |
| "loss": 131.7471, |
| "step": 8790 |
| }, |
| { |
| "epoch": 0.7042720250696831, |
| "grad_norm": 79.0, |
| "learning_rate": 9.986244693460648e-07, |
| "loss": 132.4772, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.7050723341890806, |
| "grad_norm": 74.5625, |
| "learning_rate": 9.98622906243049e-07, |
| "loss": 130.8647, |
| "step": 8810 |
| }, |
| { |
| "epoch": 0.7058726433084779, |
| "grad_norm": 75.9375, |
| "learning_rate": 9.986213431400333e-07, |
| "loss": 131.4361, |
| "step": 8820 |
| }, |
| { |
| "epoch": 0.7066729524278753, |
| "grad_norm": 75.3125, |
| "learning_rate": 9.986197800370175e-07, |
| "loss": 131.5961, |
| "step": 8830 |
| }, |
| { |
| "epoch": 0.7074732615472726, |
| "grad_norm": 74.5, |
| "learning_rate": 9.986182169340015e-07, |
| "loss": 131.5035, |
| "step": 8840 |
| }, |
| { |
| "epoch": 0.70827357066667, |
| "grad_norm": 77.125, |
| "learning_rate": 9.986166538309857e-07, |
| "loss": 131.417, |
| "step": 8850 |
| }, |
| { |
| "epoch": 0.7090738797860674, |
| "grad_norm": 71.4375, |
| "learning_rate": 9.9861509072797e-07, |
| "loss": 130.5734, |
| "step": 8860 |
| }, |
| { |
| "epoch": 0.7098741889054647, |
| "grad_norm": 79.75, |
| "learning_rate": 9.98613527624954e-07, |
| "loss": 130.9962, |
| "step": 8870 |
| }, |
| { |
| "epoch": 0.7106744980248622, |
| "grad_norm": 71.6875, |
| "learning_rate": 9.986119645219382e-07, |
| "loss": 131.3529, |
| "step": 8880 |
| }, |
| { |
| "epoch": 0.7114748071442595, |
| "grad_norm": 72.875, |
| "learning_rate": 9.986104014189224e-07, |
| "loss": 131.3156, |
| "step": 8890 |
| }, |
| { |
| "epoch": 0.7122751162636568, |
| "grad_norm": 73.0625, |
| "learning_rate": 9.986088383159064e-07, |
| "loss": 130.3418, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.7130754253830542, |
| "grad_norm": 79.1875, |
| "learning_rate": 9.986072752128906e-07, |
| "loss": 131.1646, |
| "step": 8910 |
| }, |
| { |
| "epoch": 0.7138757345024516, |
| "grad_norm": 81.6875, |
| "learning_rate": 9.986057121098748e-07, |
| "loss": 131.9824, |
| "step": 8920 |
| }, |
| { |
| "epoch": 0.7146760436218489, |
| "grad_norm": 81.8125, |
| "learning_rate": 9.98604149006859e-07, |
| "loss": 132.71, |
| "step": 8930 |
| }, |
| { |
| "epoch": 0.7154763527412463, |
| "grad_norm": 69.9375, |
| "learning_rate": 9.986025859038433e-07, |
| "loss": 131.8712, |
| "step": 8940 |
| }, |
| { |
| "epoch": 0.7162766618606436, |
| "grad_norm": 83.1875, |
| "learning_rate": 9.986010228008275e-07, |
| "loss": 131.4506, |
| "step": 8950 |
| }, |
| { |
| "epoch": 0.7170769709800411, |
| "grad_norm": 76.5625, |
| "learning_rate": 9.985994596978115e-07, |
| "loss": 131.5453, |
| "step": 8960 |
| }, |
| { |
| "epoch": 0.7178772800994384, |
| "grad_norm": 73.625, |
| "learning_rate": 9.985978965947957e-07, |
| "loss": 131.9996, |
| "step": 8970 |
| }, |
| { |
| "epoch": 0.7186775892188357, |
| "grad_norm": 78.75, |
| "learning_rate": 9.9859633349178e-07, |
| "loss": 132.0983, |
| "step": 8980 |
| }, |
| { |
| "epoch": 0.7194778983382332, |
| "grad_norm": 72.3125, |
| "learning_rate": 9.98594770388764e-07, |
| "loss": 131.87, |
| "step": 8990 |
| }, |
| { |
| "epoch": 0.7202782074576305, |
| "grad_norm": 73.625, |
| "learning_rate": 9.985932072857481e-07, |
| "loss": 131.6589, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.7210785165770279, |
| "grad_norm": 76.5, |
| "learning_rate": 9.985916441827324e-07, |
| "loss": 132.6549, |
| "step": 9010 |
| }, |
| { |
| "epoch": 0.7218788256964253, |
| "grad_norm": 80.75, |
| "learning_rate": 9.985900810797166e-07, |
| "loss": 131.3108, |
| "step": 9020 |
| }, |
| { |
| "epoch": 0.7226791348158226, |
| "grad_norm": 78.9375, |
| "learning_rate": 9.985885179767006e-07, |
| "loss": 132.2872, |
| "step": 9030 |
| }, |
| { |
| "epoch": 0.72347944393522, |
| "grad_norm": 76.8125, |
| "learning_rate": 9.985869548736848e-07, |
| "loss": 131.6559, |
| "step": 9040 |
| }, |
| { |
| "epoch": 0.7242797530546173, |
| "grad_norm": 80.1875, |
| "learning_rate": 9.98585391770669e-07, |
| "loss": 131.6873, |
| "step": 9050 |
| }, |
| { |
| "epoch": 0.7250800621740147, |
| "grad_norm": 76.875, |
| "learning_rate": 9.98583828667653e-07, |
| "loss": 131.4747, |
| "step": 9060 |
| }, |
| { |
| "epoch": 0.7258803712934121, |
| "grad_norm": 76.1875, |
| "learning_rate": 9.985822655646372e-07, |
| "loss": 131.7952, |
| "step": 9070 |
| }, |
| { |
| "epoch": 0.7266806804128094, |
| "grad_norm": 75.5, |
| "learning_rate": 9.985807024616215e-07, |
| "loss": 131.458, |
| "step": 9080 |
| }, |
| { |
| "epoch": 0.7274809895322069, |
| "grad_norm": 73.75, |
| "learning_rate": 9.985791393586057e-07, |
| "loss": 132.2186, |
| "step": 9090 |
| }, |
| { |
| "epoch": 0.7282812986516042, |
| "grad_norm": 79.5, |
| "learning_rate": 9.9857757625559e-07, |
| "loss": 133.5451, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.7290816077710015, |
| "grad_norm": 70.6875, |
| "learning_rate": 9.98576013152574e-07, |
| "loss": 131.524, |
| "step": 9110 |
| }, |
| { |
| "epoch": 0.7298819168903989, |
| "grad_norm": 78.25, |
| "learning_rate": 9.985744500495581e-07, |
| "loss": 132.1586, |
| "step": 9120 |
| }, |
| { |
| "epoch": 0.7306822260097963, |
| "grad_norm": 76.4375, |
| "learning_rate": 9.985728869465423e-07, |
| "loss": 131.0122, |
| "step": 9130 |
| }, |
| { |
| "epoch": 0.7314825351291937, |
| "grad_norm": 75.9375, |
| "learning_rate": 9.985713238435266e-07, |
| "loss": 132.5875, |
| "step": 9140 |
| }, |
| { |
| "epoch": 0.732282844248591, |
| "grad_norm": 77.8125, |
| "learning_rate": 9.985697607405106e-07, |
| "loss": 132.9425, |
| "step": 9150 |
| }, |
| { |
| "epoch": 0.7330831533679883, |
| "grad_norm": 71.6875, |
| "learning_rate": 9.985681976374948e-07, |
| "loss": 131.7688, |
| "step": 9160 |
| }, |
| { |
| "epoch": 0.7338834624873858, |
| "grad_norm": 78.0625, |
| "learning_rate": 9.98566634534479e-07, |
| "loss": 130.2091, |
| "step": 9170 |
| }, |
| { |
| "epoch": 0.7346837716067831, |
| "grad_norm": 73.375, |
| "learning_rate": 9.98565071431463e-07, |
| "loss": 132.0214, |
| "step": 9180 |
| }, |
| { |
| "epoch": 0.7354840807261804, |
| "grad_norm": 72.75, |
| "learning_rate": 9.985635083284472e-07, |
| "loss": 131.4804, |
| "step": 9190 |
| }, |
| { |
| "epoch": 0.7362843898455779, |
| "grad_norm": 72.8125, |
| "learning_rate": 9.985619452254314e-07, |
| "loss": 130.8607, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.7370846989649752, |
| "grad_norm": 75.4375, |
| "learning_rate": 9.985603821224157e-07, |
| "loss": 131.9145, |
| "step": 9210 |
| }, |
| { |
| "epoch": 0.7378850080843726, |
| "grad_norm": 73.875, |
| "learning_rate": 9.985588190193997e-07, |
| "loss": 130.5753, |
| "step": 9220 |
| }, |
| { |
| "epoch": 0.73868531720377, |
| "grad_norm": 81.0, |
| "learning_rate": 9.985572559163839e-07, |
| "loss": 132.0938, |
| "step": 9230 |
| }, |
| { |
| "epoch": 0.7394856263231673, |
| "grad_norm": 78.0625, |
| "learning_rate": 9.98555692813368e-07, |
| "loss": 132.836, |
| "step": 9240 |
| }, |
| { |
| "epoch": 0.7402859354425647, |
| "grad_norm": 76.25, |
| "learning_rate": 9.985541297103523e-07, |
| "loss": 131.6837, |
| "step": 9250 |
| }, |
| { |
| "epoch": 0.741086244561962, |
| "grad_norm": 74.125, |
| "learning_rate": 9.985525666073365e-07, |
| "loss": 132.7221, |
| "step": 9260 |
| }, |
| { |
| "epoch": 0.7418865536813595, |
| "grad_norm": 81.875, |
| "learning_rate": 9.985510035043205e-07, |
| "loss": 131.037, |
| "step": 9270 |
| }, |
| { |
| "epoch": 0.7426868628007568, |
| "grad_norm": 77.0, |
| "learning_rate": 9.985494404013048e-07, |
| "loss": 130.6028, |
| "step": 9280 |
| }, |
| { |
| "epoch": 0.7434871719201541, |
| "grad_norm": 76.4375, |
| "learning_rate": 9.98547877298289e-07, |
| "loss": 131.0622, |
| "step": 9290 |
| }, |
| { |
| "epoch": 0.7442874810395516, |
| "grad_norm": 72.3125, |
| "learning_rate": 9.985463141952732e-07, |
| "loss": 130.6438, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.7450877901589489, |
| "grad_norm": 73.75, |
| "learning_rate": 9.985447510922572e-07, |
| "loss": 132.5858, |
| "step": 9310 |
| }, |
| { |
| "epoch": 0.7458880992783462, |
| "grad_norm": 81.375, |
| "learning_rate": 9.985431879892414e-07, |
| "loss": 131.7563, |
| "step": 9320 |
| }, |
| { |
| "epoch": 0.7466884083977436, |
| "grad_norm": 75.0625, |
| "learning_rate": 9.985416248862256e-07, |
| "loss": 131.6322, |
| "step": 9330 |
| }, |
| { |
| "epoch": 0.747488717517141, |
| "grad_norm": 74.375, |
| "learning_rate": 9.985400617832097e-07, |
| "loss": 129.3306, |
| "step": 9340 |
| }, |
| { |
| "epoch": 0.7482890266365384, |
| "grad_norm": 77.25, |
| "learning_rate": 9.985384986801939e-07, |
| "loss": 131.3398, |
| "step": 9350 |
| }, |
| { |
| "epoch": 0.7490893357559357, |
| "grad_norm": 80.4375, |
| "learning_rate": 9.98536935577178e-07, |
| "loss": 131.5261, |
| "step": 9360 |
| }, |
| { |
| "epoch": 0.749889644875333, |
| "grad_norm": 76.625, |
| "learning_rate": 9.985353724741623e-07, |
| "loss": 130.4829, |
| "step": 9370 |
| }, |
| { |
| "epoch": 0.7506899539947305, |
| "grad_norm": 75.1875, |
| "learning_rate": 9.985338093711463e-07, |
| "loss": 130.9288, |
| "step": 9380 |
| }, |
| { |
| "epoch": 0.7514902631141278, |
| "grad_norm": 74.5, |
| "learning_rate": 9.985322462681305e-07, |
| "loss": 131.758, |
| "step": 9390 |
| }, |
| { |
| "epoch": 0.7522905722335252, |
| "grad_norm": 72.9375, |
| "learning_rate": 9.985306831651148e-07, |
| "loss": 131.2881, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.7530908813529226, |
| "grad_norm": 77.875, |
| "learning_rate": 9.98529120062099e-07, |
| "loss": 132.0472, |
| "step": 9410 |
| }, |
| { |
| "epoch": 0.7538911904723199, |
| "grad_norm": 81.875, |
| "learning_rate": 9.985275569590832e-07, |
| "loss": 133.1372, |
| "step": 9420 |
| }, |
| { |
| "epoch": 0.7546914995917173, |
| "grad_norm": 76.4375, |
| "learning_rate": 9.985259938560672e-07, |
| "loss": 132.0327, |
| "step": 9430 |
| }, |
| { |
| "epoch": 0.7554918087111147, |
| "grad_norm": 82.5, |
| "learning_rate": 9.985244307530514e-07, |
| "loss": 130.7069, |
| "step": 9440 |
| }, |
| { |
| "epoch": 0.7562921178305121, |
| "grad_norm": 79.9375, |
| "learning_rate": 9.985228676500356e-07, |
| "loss": 131.1774, |
| "step": 9450 |
| }, |
| { |
| "epoch": 0.7570924269499094, |
| "grad_norm": 76.9375, |
| "learning_rate": 9.985213045470198e-07, |
| "loss": 131.4195, |
| "step": 9460 |
| }, |
| { |
| "epoch": 0.7578927360693067, |
| "grad_norm": 83.25, |
| "learning_rate": 9.985197414440039e-07, |
| "loss": 130.8186, |
| "step": 9470 |
| }, |
| { |
| "epoch": 0.7586930451887042, |
| "grad_norm": 76.1875, |
| "learning_rate": 9.98518178340988e-07, |
| "loss": 129.8441, |
| "step": 9480 |
| }, |
| { |
| "epoch": 0.7594933543081015, |
| "grad_norm": 71.5, |
| "learning_rate": 9.985166152379723e-07, |
| "loss": 131.93, |
| "step": 9490 |
| }, |
| { |
| "epoch": 0.7602936634274988, |
| "grad_norm": 79.0625, |
| "learning_rate": 9.985150521349563e-07, |
| "loss": 131.2559, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.7610939725468963, |
| "grad_norm": 69.3125, |
| "learning_rate": 9.985134890319405e-07, |
| "loss": 131.3485, |
| "step": 9510 |
| }, |
| { |
| "epoch": 0.7618942816662936, |
| "grad_norm": 79.5, |
| "learning_rate": 9.985119259289247e-07, |
| "loss": 130.6342, |
| "step": 9520 |
| }, |
| { |
| "epoch": 0.762694590785691, |
| "grad_norm": 78.3125, |
| "learning_rate": 9.985103628259087e-07, |
| "loss": 132.0574, |
| "step": 9530 |
| }, |
| { |
| "epoch": 0.7634948999050883, |
| "grad_norm": 73.0, |
| "learning_rate": 9.98508799722893e-07, |
| "loss": 130.7042, |
| "step": 9540 |
| }, |
| { |
| "epoch": 0.7642952090244857, |
| "grad_norm": 87.75, |
| "learning_rate": 9.985072366198772e-07, |
| "loss": 131.7032, |
| "step": 9550 |
| }, |
| { |
| "epoch": 0.7650955181438831, |
| "grad_norm": 78.9375, |
| "learning_rate": 9.985056735168614e-07, |
| "loss": 132.2712, |
| "step": 9560 |
| }, |
| { |
| "epoch": 0.7658958272632804, |
| "grad_norm": 75.25, |
| "learning_rate": 9.985041104138456e-07, |
| "loss": 132.5755, |
| "step": 9570 |
| }, |
| { |
| "epoch": 0.7666961363826779, |
| "grad_norm": 74.9375, |
| "learning_rate": 9.985025473108296e-07, |
| "loss": 131.8575, |
| "step": 9580 |
| }, |
| { |
| "epoch": 0.7674964455020752, |
| "grad_norm": 74.3125, |
| "learning_rate": 9.985009842078138e-07, |
| "loss": 131.8135, |
| "step": 9590 |
| }, |
| { |
| "epoch": 0.7682967546214725, |
| "grad_norm": 74.5, |
| "learning_rate": 9.98499421104798e-07, |
| "loss": 130.349, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.76909706374087, |
| "grad_norm": 75.625, |
| "learning_rate": 9.984978580017823e-07, |
| "loss": 132.1411, |
| "step": 9610 |
| }, |
| { |
| "epoch": 0.7698973728602673, |
| "grad_norm": 78.4375, |
| "learning_rate": 9.984962948987665e-07, |
| "loss": 132.007, |
| "step": 9620 |
| }, |
| { |
| "epoch": 0.7706976819796646, |
| "grad_norm": 76.875, |
| "learning_rate": 9.984947317957505e-07, |
| "loss": 131.2099, |
| "step": 9630 |
| }, |
| { |
| "epoch": 0.771497991099062, |
| "grad_norm": 78.875, |
| "learning_rate": 9.984931686927347e-07, |
| "loss": 130.7354, |
| "step": 9640 |
| }, |
| { |
| "epoch": 0.7722983002184594, |
| "grad_norm": 79.875, |
| "learning_rate": 9.98491605589719e-07, |
| "loss": 132.1408, |
| "step": 9650 |
| }, |
| { |
| "epoch": 0.7730986093378568, |
| "grad_norm": 72.1875, |
| "learning_rate": 9.98490042486703e-07, |
| "loss": 130.8756, |
| "step": 9660 |
| }, |
| { |
| "epoch": 0.7738989184572541, |
| "grad_norm": 69.4375, |
| "learning_rate": 9.984884793836872e-07, |
| "loss": 131.2112, |
| "step": 9670 |
| }, |
| { |
| "epoch": 0.7746992275766514, |
| "grad_norm": 76.875, |
| "learning_rate": 9.984869162806714e-07, |
| "loss": 131.0196, |
| "step": 9680 |
| }, |
| { |
| "epoch": 0.7754995366960489, |
| "grad_norm": 85.0, |
| "learning_rate": 9.984853531776554e-07, |
| "loss": 131.2716, |
| "step": 9690 |
| }, |
| { |
| "epoch": 0.7762998458154462, |
| "grad_norm": 72.5, |
| "learning_rate": 9.984837900746396e-07, |
| "loss": 130.7754, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.7771001549348436, |
| "grad_norm": 75.0, |
| "learning_rate": 9.984822269716238e-07, |
| "loss": 131.3929, |
| "step": 9710 |
| }, |
| { |
| "epoch": 0.777900464054241, |
| "grad_norm": 75.6875, |
| "learning_rate": 9.98480663868608e-07, |
| "loss": 131.4491, |
| "step": 9720 |
| }, |
| { |
| "epoch": 0.7787007731736383, |
| "grad_norm": 77.8125, |
| "learning_rate": 9.984791007655923e-07, |
| "loss": 130.3078, |
| "step": 9730 |
| }, |
| { |
| "epoch": 0.7795010822930357, |
| "grad_norm": 79.375, |
| "learning_rate": 9.984775376625763e-07, |
| "loss": 132.3819, |
| "step": 9740 |
| }, |
| { |
| "epoch": 0.780301391412433, |
| "grad_norm": 77.875, |
| "learning_rate": 9.984759745595605e-07, |
| "loss": 132.8559, |
| "step": 9750 |
| }, |
| { |
| "epoch": 0.7811017005318304, |
| "grad_norm": 76.875, |
| "learning_rate": 9.984744114565447e-07, |
| "loss": 130.5132, |
| "step": 9760 |
| }, |
| { |
| "epoch": 0.7819020096512278, |
| "grad_norm": 80.625, |
| "learning_rate": 9.98472848353529e-07, |
| "loss": 130.5794, |
| "step": 9770 |
| }, |
| { |
| "epoch": 0.7827023187706251, |
| "grad_norm": 79.1875, |
| "learning_rate": 9.984712852505131e-07, |
| "loss": 132.3585, |
| "step": 9780 |
| }, |
| { |
| "epoch": 0.7835026278900226, |
| "grad_norm": 76.625, |
| "learning_rate": 9.984697221474971e-07, |
| "loss": 132.778, |
| "step": 9790 |
| }, |
| { |
| "epoch": 0.7843029370094199, |
| "grad_norm": 74.625, |
| "learning_rate": 9.984681590444814e-07, |
| "loss": 130.5637, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.7851032461288172, |
| "grad_norm": 82.9375, |
| "learning_rate": 9.984665959414656e-07, |
| "loss": 130.9602, |
| "step": 9810 |
| }, |
| { |
| "epoch": 0.7859035552482146, |
| "grad_norm": 78.5, |
| "learning_rate": 9.984650328384496e-07, |
| "loss": 131.8248, |
| "step": 9820 |
| }, |
| { |
| "epoch": 0.786703864367612, |
| "grad_norm": 71.875, |
| "learning_rate": 9.984634697354338e-07, |
| "loss": 132.0758, |
| "step": 9830 |
| }, |
| { |
| "epoch": 0.7875041734870094, |
| "grad_norm": 78.5625, |
| "learning_rate": 9.98461906632418e-07, |
| "loss": 131.6132, |
| "step": 9840 |
| }, |
| { |
| "epoch": 0.7883044826064067, |
| "grad_norm": 82.9375, |
| "learning_rate": 9.98460343529402e-07, |
| "loss": 130.1421, |
| "step": 9850 |
| }, |
| { |
| "epoch": 0.7891047917258041, |
| "grad_norm": 90.0625, |
| "learning_rate": 9.984587804263863e-07, |
| "loss": 130.7962, |
| "step": 9860 |
| }, |
| { |
| "epoch": 0.7899051008452015, |
| "grad_norm": 75.5, |
| "learning_rate": 9.984572173233705e-07, |
| "loss": 131.8376, |
| "step": 9870 |
| }, |
| { |
| "epoch": 0.7907054099645988, |
| "grad_norm": 75.4375, |
| "learning_rate": 9.984556542203547e-07, |
| "loss": 132.0439, |
| "step": 9880 |
| }, |
| { |
| "epoch": 0.7915057190839961, |
| "grad_norm": 86.25, |
| "learning_rate": 9.98454091117339e-07, |
| "loss": 130.9918, |
| "step": 9890 |
| }, |
| { |
| "epoch": 0.7923060282033936, |
| "grad_norm": 77.1875, |
| "learning_rate": 9.98452528014323e-07, |
| "loss": 132.0871, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.7931063373227909, |
| "grad_norm": 78.375, |
| "learning_rate": 9.984509649113071e-07, |
| "loss": 132.0267, |
| "step": 9910 |
| }, |
| { |
| "epoch": 0.7939066464421883, |
| "grad_norm": 75.125, |
| "learning_rate": 9.984494018082913e-07, |
| "loss": 131.7625, |
| "step": 9920 |
| }, |
| { |
| "epoch": 0.7947069555615857, |
| "grad_norm": 78.75, |
| "learning_rate": 9.984478387052756e-07, |
| "loss": 131.006, |
| "step": 9930 |
| }, |
| { |
| "epoch": 0.795507264680983, |
| "grad_norm": 72.375, |
| "learning_rate": 9.984462756022598e-07, |
| "loss": 130.9044, |
| "step": 9940 |
| }, |
| { |
| "epoch": 0.7963075738003804, |
| "grad_norm": 77.8125, |
| "learning_rate": 9.984447124992438e-07, |
| "loss": 131.8305, |
| "step": 9950 |
| }, |
| { |
| "epoch": 0.7971078829197777, |
| "grad_norm": 79.8125, |
| "learning_rate": 9.98443149396228e-07, |
| "loss": 129.9232, |
| "step": 9960 |
| }, |
| { |
| "epoch": 0.7979081920391752, |
| "grad_norm": 72.5625, |
| "learning_rate": 9.984415862932122e-07, |
| "loss": 131.7845, |
| "step": 9970 |
| }, |
| { |
| "epoch": 0.7987085011585725, |
| "grad_norm": 72.5, |
| "learning_rate": 9.984400231901962e-07, |
| "loss": 131.7831, |
| "step": 9980 |
| }, |
| { |
| "epoch": 0.7995088102779698, |
| "grad_norm": 77.125, |
| "learning_rate": 9.984384600871805e-07, |
| "loss": 133.024, |
| "step": 9990 |
| }, |
| { |
| "epoch": 0.8003091193973673, |
| "grad_norm": 81.375, |
| "learning_rate": 9.984368969841647e-07, |
| "loss": 131.0512, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.8003091193973673, |
| "eval_loss": 2.0527355670928955, |
| "eval_runtime": 416.7476, |
| "eval_samples_per_second": 1574.473, |
| "eval_steps_per_second": 49.202, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.8011094285167646, |
| "grad_norm": 77.1875, |
| "learning_rate": 9.984353338811487e-07, |
| "loss": 130.3064, |
| "step": 10010 |
| }, |
| { |
| "epoch": 0.8019097376361619, |
| "grad_norm": 74.0, |
| "learning_rate": 9.98433770778133e-07, |
| "loss": 132.0653, |
| "step": 10020 |
| }, |
| { |
| "epoch": 0.8027100467555593, |
| "grad_norm": 84.875, |
| "learning_rate": 9.984322076751171e-07, |
| "loss": 132.2227, |
| "step": 10030 |
| }, |
| { |
| "epoch": 0.8035103558749567, |
| "grad_norm": 76.625, |
| "learning_rate": 9.984306445721013e-07, |
| "loss": 130.7247, |
| "step": 10040 |
| }, |
| { |
| "epoch": 0.8043106649943541, |
| "grad_norm": 75.8125, |
| "learning_rate": 9.984290814690853e-07, |
| "loss": 131.7206, |
| "step": 10050 |
| }, |
| { |
| "epoch": 0.8051109741137514, |
| "grad_norm": 73.6875, |
| "learning_rate": 9.984275183660696e-07, |
| "loss": 131.013, |
| "step": 10060 |
| }, |
| { |
| "epoch": 0.8059112832331488, |
| "grad_norm": 72.625, |
| "learning_rate": 9.984259552630538e-07, |
| "loss": 132.8077, |
| "step": 10070 |
| }, |
| { |
| "epoch": 0.8067115923525462, |
| "grad_norm": 72.375, |
| "learning_rate": 9.98424392160038e-07, |
| "loss": 132.2377, |
| "step": 10080 |
| }, |
| { |
| "epoch": 0.8075119014719435, |
| "grad_norm": 82.5625, |
| "learning_rate": 9.984228290570222e-07, |
| "loss": 131.6139, |
| "step": 10090 |
| }, |
| { |
| "epoch": 0.808312210591341, |
| "grad_norm": 75.4375, |
| "learning_rate": 9.984212659540062e-07, |
| "loss": 130.3295, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.8091125197107383, |
| "grad_norm": 75.5, |
| "learning_rate": 9.984197028509904e-07, |
| "loss": 131.7549, |
| "step": 10110 |
| }, |
| { |
| "epoch": 0.8099128288301356, |
| "grad_norm": 77.875, |
| "learning_rate": 9.984181397479747e-07, |
| "loss": 131.4637, |
| "step": 10120 |
| }, |
| { |
| "epoch": 0.810713137949533, |
| "grad_norm": 74.8125, |
| "learning_rate": 9.984165766449589e-07, |
| "loss": 131.4822, |
| "step": 10130 |
| }, |
| { |
| "epoch": 0.8115134470689304, |
| "grad_norm": 78.6875, |
| "learning_rate": 9.984150135419429e-07, |
| "loss": 131.1819, |
| "step": 10140 |
| }, |
| { |
| "epoch": 0.8123137561883278, |
| "grad_norm": 78.0, |
| "learning_rate": 9.98413450438927e-07, |
| "loss": 131.0785, |
| "step": 10150 |
| }, |
| { |
| "epoch": 0.8131140653077251, |
| "grad_norm": 78.9375, |
| "learning_rate": 9.984118873359113e-07, |
| "loss": 130.286, |
| "step": 10160 |
| }, |
| { |
| "epoch": 0.8139143744271224, |
| "grad_norm": 78.0625, |
| "learning_rate": 9.984103242328953e-07, |
| "loss": 131.6022, |
| "step": 10170 |
| }, |
| { |
| "epoch": 0.8147146835465199, |
| "grad_norm": 78.375, |
| "learning_rate": 9.984087611298795e-07, |
| "loss": 131.5716, |
| "step": 10180 |
| }, |
| { |
| "epoch": 0.8155149926659172, |
| "grad_norm": 74.875, |
| "learning_rate": 9.984071980268638e-07, |
| "loss": 131.0095, |
| "step": 10190 |
| }, |
| { |
| "epoch": 0.8163153017853145, |
| "grad_norm": 77.875, |
| "learning_rate": 9.98405634923848e-07, |
| "loss": 131.2664, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.817115610904712, |
| "grad_norm": 73.5625, |
| "learning_rate": 9.98404071820832e-07, |
| "loss": 131.8767, |
| "step": 10210 |
| }, |
| { |
| "epoch": 0.8179159200241093, |
| "grad_norm": 79.8125, |
| "learning_rate": 9.984025087178162e-07, |
| "loss": 128.864, |
| "step": 10220 |
| }, |
| { |
| "epoch": 0.8187162291435067, |
| "grad_norm": 77.375, |
| "learning_rate": 9.984009456148004e-07, |
| "loss": 130.4429, |
| "step": 10230 |
| }, |
| { |
| "epoch": 0.819516538262904, |
| "grad_norm": 70.8125, |
| "learning_rate": 9.983993825117846e-07, |
| "loss": 128.6023, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.8203168473823014, |
| "grad_norm": 74.4375, |
| "learning_rate": 9.983978194087689e-07, |
| "loss": 130.9503, |
| "step": 10250 |
| }, |
| { |
| "epoch": 0.8211171565016988, |
| "grad_norm": 80.125, |
| "learning_rate": 9.983962563057529e-07, |
| "loss": 132.8031, |
| "step": 10260 |
| }, |
| { |
| "epoch": 0.8219174656210961, |
| "grad_norm": 73.8125, |
| "learning_rate": 9.98394693202737e-07, |
| "loss": 130.115, |
| "step": 10270 |
| }, |
| { |
| "epoch": 0.8227177747404936, |
| "grad_norm": 73.75, |
| "learning_rate": 9.983931300997213e-07, |
| "loss": 130.5055, |
| "step": 10280 |
| }, |
| { |
| "epoch": 0.8235180838598909, |
| "grad_norm": 74.0, |
| "learning_rate": 9.983915669967053e-07, |
| "loss": 130.6606, |
| "step": 10290 |
| }, |
| { |
| "epoch": 0.8243183929792882, |
| "grad_norm": 78.6875, |
| "learning_rate": 9.983900038936895e-07, |
| "loss": 130.8658, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.8251187020986857, |
| "grad_norm": 78.0, |
| "learning_rate": 9.983884407906737e-07, |
| "loss": 131.8639, |
| "step": 10310 |
| }, |
| { |
| "epoch": 0.825919011218083, |
| "grad_norm": 74.0, |
| "learning_rate": 9.98386877687658e-07, |
| "loss": 131.2057, |
| "step": 10320 |
| }, |
| { |
| "epoch": 0.8267193203374803, |
| "grad_norm": 78.0625, |
| "learning_rate": 9.98385314584642e-07, |
| "loss": 131.9255, |
| "step": 10330 |
| }, |
| { |
| "epoch": 0.8275196294568777, |
| "grad_norm": 78.75, |
| "learning_rate": 9.983837514816262e-07, |
| "loss": 131.7983, |
| "step": 10340 |
| }, |
| { |
| "epoch": 0.8283199385762751, |
| "grad_norm": 81.9375, |
| "learning_rate": 9.983821883786104e-07, |
| "loss": 132.316, |
| "step": 10350 |
| }, |
| { |
| "epoch": 0.8291202476956725, |
| "grad_norm": 80.875, |
| "learning_rate": 9.983806252755946e-07, |
| "loss": 131.1004, |
| "step": 10360 |
| }, |
| { |
| "epoch": 0.8299205568150698, |
| "grad_norm": 71.0, |
| "learning_rate": 9.983790621725786e-07, |
| "loss": 132.1643, |
| "step": 10370 |
| }, |
| { |
| "epoch": 0.8307208659344671, |
| "grad_norm": 74.375, |
| "learning_rate": 9.983774990695628e-07, |
| "loss": 130.8654, |
| "step": 10380 |
| }, |
| { |
| "epoch": 0.8315211750538646, |
| "grad_norm": 74.875, |
| "learning_rate": 9.98375935966547e-07, |
| "loss": 130.205, |
| "step": 10390 |
| }, |
| { |
| "epoch": 0.8323214841732619, |
| "grad_norm": 73.375, |
| "learning_rate": 9.983743728635313e-07, |
| "loss": 130.6661, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.8331217932926593, |
| "grad_norm": 73.4375, |
| "learning_rate": 9.983728097605155e-07, |
| "loss": 131.2944, |
| "step": 10410 |
| }, |
| { |
| "epoch": 0.8339221024120567, |
| "grad_norm": 75.875, |
| "learning_rate": 9.983712466574995e-07, |
| "loss": 132.2854, |
| "step": 10420 |
| }, |
| { |
| "epoch": 0.834722411531454, |
| "grad_norm": 80.625, |
| "learning_rate": 9.983696835544837e-07, |
| "loss": 130.7651, |
| "step": 10430 |
| }, |
| { |
| "epoch": 0.8355227206508514, |
| "grad_norm": 71.6875, |
| "learning_rate": 9.98368120451468e-07, |
| "loss": 130.8648, |
| "step": 10440 |
| }, |
| { |
| "epoch": 0.8363230297702487, |
| "grad_norm": 77.125, |
| "learning_rate": 9.98366557348452e-07, |
| "loss": 130.8581, |
| "step": 10450 |
| }, |
| { |
| "epoch": 0.8371233388896461, |
| "grad_norm": 73.625, |
| "learning_rate": 9.983649942454362e-07, |
| "loss": 130.3991, |
| "step": 10460 |
| }, |
| { |
| "epoch": 0.8379236480090435, |
| "grad_norm": 79.375, |
| "learning_rate": 9.983634311424204e-07, |
| "loss": 131.2294, |
| "step": 10470 |
| }, |
| { |
| "epoch": 0.8387239571284408, |
| "grad_norm": 78.3125, |
| "learning_rate": 9.983618680394046e-07, |
| "loss": 129.3754, |
| "step": 10480 |
| }, |
| { |
| "epoch": 0.8395242662478383, |
| "grad_norm": 81.75, |
| "learning_rate": 9.983603049363886e-07, |
| "loss": 131.7653, |
| "step": 10490 |
| }, |
| { |
| "epoch": 0.8403245753672356, |
| "grad_norm": 74.9375, |
| "learning_rate": 9.983587418333728e-07, |
| "loss": 131.5391, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.8411248844866329, |
| "grad_norm": 76.75, |
| "learning_rate": 9.98357178730357e-07, |
| "loss": 131.4889, |
| "step": 10510 |
| }, |
| { |
| "epoch": 0.8419251936060304, |
| "grad_norm": 72.1875, |
| "learning_rate": 9.98355615627341e-07, |
| "loss": 133.0458, |
| "step": 10520 |
| }, |
| { |
| "epoch": 0.8427255027254277, |
| "grad_norm": 81.3125, |
| "learning_rate": 9.983540525243253e-07, |
| "loss": 130.8461, |
| "step": 10530 |
| }, |
| { |
| "epoch": 0.8435258118448251, |
| "grad_norm": 74.75, |
| "learning_rate": 9.983524894213095e-07, |
| "loss": 131.7515, |
| "step": 10540 |
| }, |
| { |
| "epoch": 0.8443261209642224, |
| "grad_norm": 70.8125, |
| "learning_rate": 9.983509263182937e-07, |
| "loss": 132.1545, |
| "step": 10550 |
| }, |
| { |
| "epoch": 0.8451264300836198, |
| "grad_norm": 76.9375, |
| "learning_rate": 9.98349363215278e-07, |
| "loss": 130.9946, |
| "step": 10560 |
| }, |
| { |
| "epoch": 0.8459267392030172, |
| "grad_norm": 72.25, |
| "learning_rate": 9.983478001122621e-07, |
| "loss": 130.338, |
| "step": 10570 |
| }, |
| { |
| "epoch": 0.8467270483224145, |
| "grad_norm": 75.1875, |
| "learning_rate": 9.983462370092462e-07, |
| "loss": 131.5349, |
| "step": 10580 |
| }, |
| { |
| "epoch": 0.8475273574418118, |
| "grad_norm": 70.125, |
| "learning_rate": 9.983446739062304e-07, |
| "loss": 130.6219, |
| "step": 10590 |
| }, |
| { |
| "epoch": 0.8483276665612093, |
| "grad_norm": 78.0625, |
| "learning_rate": 9.983431108032146e-07, |
| "loss": 131.3493, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.8491279756806066, |
| "grad_norm": 74.0, |
| "learning_rate": 9.983415477001986e-07, |
| "loss": 131.0083, |
| "step": 10610 |
| }, |
| { |
| "epoch": 0.849928284800004, |
| "grad_norm": 80.75, |
| "learning_rate": 9.983399845971828e-07, |
| "loss": 131.9277, |
| "step": 10620 |
| }, |
| { |
| "epoch": 0.8507285939194014, |
| "grad_norm": 79.1875, |
| "learning_rate": 9.98338421494167e-07, |
| "loss": 130.602, |
| "step": 10630 |
| }, |
| { |
| "epoch": 0.8515289030387987, |
| "grad_norm": 77.5625, |
| "learning_rate": 9.98336858391151e-07, |
| "loss": 130.4636, |
| "step": 10640 |
| }, |
| { |
| "epoch": 0.8523292121581961, |
| "grad_norm": 77.75, |
| "learning_rate": 9.983352952881353e-07, |
| "loss": 131.2724, |
| "step": 10650 |
| }, |
| { |
| "epoch": 0.8531295212775935, |
| "grad_norm": 78.875, |
| "learning_rate": 9.983337321851195e-07, |
| "loss": 130.8466, |
| "step": 10660 |
| }, |
| { |
| "epoch": 0.8539298303969909, |
| "grad_norm": 77.375, |
| "learning_rate": 9.983321690821037e-07, |
| "loss": 130.6045, |
| "step": 10670 |
| }, |
| { |
| "epoch": 0.8547301395163882, |
| "grad_norm": 77.875, |
| "learning_rate": 9.983306059790877e-07, |
| "loss": 131.8468, |
| "step": 10680 |
| }, |
| { |
| "epoch": 0.8555304486357855, |
| "grad_norm": 75.25, |
| "learning_rate": 9.98329042876072e-07, |
| "loss": 130.7249, |
| "step": 10690 |
| }, |
| { |
| "epoch": 0.856330757755183, |
| "grad_norm": 79.25, |
| "learning_rate": 9.983274797730561e-07, |
| "loss": 130.1633, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.8571310668745803, |
| "grad_norm": 74.625, |
| "learning_rate": 9.983259166700404e-07, |
| "loss": 130.6425, |
| "step": 10710 |
| }, |
| { |
| "epoch": 0.8579313759939776, |
| "grad_norm": 75.25, |
| "learning_rate": 9.983243535670246e-07, |
| "loss": 130.9092, |
| "step": 10720 |
| }, |
| { |
| "epoch": 0.858731685113375, |
| "grad_norm": 77.625, |
| "learning_rate": 9.983227904640088e-07, |
| "loss": 129.9958, |
| "step": 10730 |
| }, |
| { |
| "epoch": 0.8595319942327724, |
| "grad_norm": 79.5, |
| "learning_rate": 9.983212273609928e-07, |
| "loss": 132.0485, |
| "step": 10740 |
| }, |
| { |
| "epoch": 0.8603323033521698, |
| "grad_norm": 77.0625, |
| "learning_rate": 9.98319664257977e-07, |
| "loss": 131.1308, |
| "step": 10750 |
| }, |
| { |
| "epoch": 0.8611326124715671, |
| "grad_norm": 82.4375, |
| "learning_rate": 9.983181011549612e-07, |
| "loss": 131.665, |
| "step": 10760 |
| }, |
| { |
| "epoch": 0.8619329215909645, |
| "grad_norm": 74.375, |
| "learning_rate": 9.983165380519452e-07, |
| "loss": 131.7367, |
| "step": 10770 |
| }, |
| { |
| "epoch": 0.8627332307103619, |
| "grad_norm": 77.9375, |
| "learning_rate": 9.983149749489295e-07, |
| "loss": 130.5749, |
| "step": 10780 |
| }, |
| { |
| "epoch": 0.8635335398297592, |
| "grad_norm": 72.1875, |
| "learning_rate": 9.983134118459137e-07, |
| "loss": 129.4679, |
| "step": 10790 |
| }, |
| { |
| "epoch": 0.8643338489491567, |
| "grad_norm": 79.1875, |
| "learning_rate": 9.983118487428977e-07, |
| "loss": 132.4236, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.865134158068554, |
| "grad_norm": 72.75, |
| "learning_rate": 9.98310285639882e-07, |
| "loss": 129.7827, |
| "step": 10810 |
| }, |
| { |
| "epoch": 0.8659344671879513, |
| "grad_norm": 73.5625, |
| "learning_rate": 9.983087225368661e-07, |
| "loss": 131.0587, |
| "step": 10820 |
| }, |
| { |
| "epoch": 0.8667347763073487, |
| "grad_norm": 75.375, |
| "learning_rate": 9.983071594338503e-07, |
| "loss": 131.5654, |
| "step": 10830 |
| }, |
| { |
| "epoch": 0.8675350854267461, |
| "grad_norm": 79.875, |
| "learning_rate": 9.983055963308343e-07, |
| "loss": 131.1106, |
| "step": 10840 |
| }, |
| { |
| "epoch": 0.8683353945461435, |
| "grad_norm": 74.0625, |
| "learning_rate": 9.983040332278186e-07, |
| "loss": 131.5627, |
| "step": 10850 |
| }, |
| { |
| "epoch": 0.8691357036655408, |
| "grad_norm": 75.0, |
| "learning_rate": 9.983024701248028e-07, |
| "loss": 130.7058, |
| "step": 10860 |
| }, |
| { |
| "epoch": 0.8699360127849382, |
| "grad_norm": 77.125, |
| "learning_rate": 9.98300907021787e-07, |
| "loss": 131.1618, |
| "step": 10870 |
| }, |
| { |
| "epoch": 0.8707363219043356, |
| "grad_norm": 78.5, |
| "learning_rate": 9.982993439187712e-07, |
| "loss": 131.6918, |
| "step": 10880 |
| }, |
| { |
| "epoch": 0.8715366310237329, |
| "grad_norm": 77.0, |
| "learning_rate": 9.982977808157552e-07, |
| "loss": 130.2754, |
| "step": 10890 |
| }, |
| { |
| "epoch": 0.8723369401431302, |
| "grad_norm": 76.375, |
| "learning_rate": 9.982962177127394e-07, |
| "loss": 130.6358, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.8731372492625277, |
| "grad_norm": 76.1875, |
| "learning_rate": 9.982946546097237e-07, |
| "loss": 131.4433, |
| "step": 10910 |
| }, |
| { |
| "epoch": 0.873937558381925, |
| "grad_norm": 76.8125, |
| "learning_rate": 9.982930915067079e-07, |
| "loss": 131.1567, |
| "step": 10920 |
| }, |
| { |
| "epoch": 0.8747378675013224, |
| "grad_norm": 75.0625, |
| "learning_rate": 9.982915284036919e-07, |
| "loss": 130.6782, |
| "step": 10930 |
| }, |
| { |
| "epoch": 0.8755381766207198, |
| "grad_norm": 73.4375, |
| "learning_rate": 9.982899653006761e-07, |
| "loss": 130.3121, |
| "step": 10940 |
| }, |
| { |
| "epoch": 0.8763384857401171, |
| "grad_norm": 75.25, |
| "learning_rate": 9.982884021976603e-07, |
| "loss": 131.139, |
| "step": 10950 |
| }, |
| { |
| "epoch": 0.8771387948595145, |
| "grad_norm": 73.4375, |
| "learning_rate": 9.982868390946443e-07, |
| "loss": 130.9144, |
| "step": 10960 |
| }, |
| { |
| "epoch": 0.8779391039789118, |
| "grad_norm": 79.5, |
| "learning_rate": 9.982852759916286e-07, |
| "loss": 129.6913, |
| "step": 10970 |
| }, |
| { |
| "epoch": 0.8787394130983093, |
| "grad_norm": 73.3125, |
| "learning_rate": 9.982837128886128e-07, |
| "loss": 129.9289, |
| "step": 10980 |
| }, |
| { |
| "epoch": 0.8795397222177066, |
| "grad_norm": 73.8125, |
| "learning_rate": 9.98282149785597e-07, |
| "loss": 132.2781, |
| "step": 10990 |
| }, |
| { |
| "epoch": 0.8803400313371039, |
| "grad_norm": 75.9375, |
| "learning_rate": 9.98280586682581e-07, |
| "loss": 130.8845, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.8811403404565014, |
| "grad_norm": 76.25, |
| "learning_rate": 9.982790235795652e-07, |
| "loss": 129.7611, |
| "step": 11010 |
| }, |
| { |
| "epoch": 0.8819406495758987, |
| "grad_norm": 83.375, |
| "learning_rate": 9.982774604765494e-07, |
| "loss": 129.9167, |
| "step": 11020 |
| }, |
| { |
| "epoch": 0.882740958695296, |
| "grad_norm": 76.3125, |
| "learning_rate": 9.982758973735337e-07, |
| "loss": 130.6375, |
| "step": 11030 |
| }, |
| { |
| "epoch": 0.8835412678146934, |
| "grad_norm": 77.4375, |
| "learning_rate": 9.982743342705179e-07, |
| "loss": 130.627, |
| "step": 11040 |
| }, |
| { |
| "epoch": 0.8843415769340908, |
| "grad_norm": 71.3125, |
| "learning_rate": 9.982727711675019e-07, |
| "loss": 130.6759, |
| "step": 11050 |
| }, |
| { |
| "epoch": 0.8851418860534882, |
| "grad_norm": 77.875, |
| "learning_rate": 9.98271208064486e-07, |
| "loss": 131.0551, |
| "step": 11060 |
| }, |
| { |
| "epoch": 0.8859421951728855, |
| "grad_norm": 73.8125, |
| "learning_rate": 9.982696449614703e-07, |
| "loss": 130.5227, |
| "step": 11070 |
| }, |
| { |
| "epoch": 0.8867425042922829, |
| "grad_norm": 77.25, |
| "learning_rate": 9.982680818584545e-07, |
| "loss": 132.4977, |
| "step": 11080 |
| }, |
| { |
| "epoch": 0.8875428134116803, |
| "grad_norm": 81.375, |
| "learning_rate": 9.982665187554385e-07, |
| "loss": 131.0946, |
| "step": 11090 |
| }, |
| { |
| "epoch": 0.8883431225310776, |
| "grad_norm": 74.375, |
| "learning_rate": 9.982649556524228e-07, |
| "loss": 130.3095, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.889143431650475, |
| "grad_norm": 76.3125, |
| "learning_rate": 9.98263392549407e-07, |
| "loss": 131.8633, |
| "step": 11110 |
| }, |
| { |
| "epoch": 0.8899437407698724, |
| "grad_norm": 83.125, |
| "learning_rate": 9.98261829446391e-07, |
| "loss": 131.3789, |
| "step": 11120 |
| }, |
| { |
| "epoch": 0.8907440498892697, |
| "grad_norm": 71.375, |
| "learning_rate": 9.982602663433752e-07, |
| "loss": 131.0635, |
| "step": 11130 |
| }, |
| { |
| "epoch": 0.8915443590086671, |
| "grad_norm": 76.0625, |
| "learning_rate": 9.982587032403594e-07, |
| "loss": 131.6485, |
| "step": 11140 |
| }, |
| { |
| "epoch": 0.8923446681280645, |
| "grad_norm": 84.5625, |
| "learning_rate": 9.982571401373434e-07, |
| "loss": 131.5727, |
| "step": 11150 |
| }, |
| { |
| "epoch": 0.8931449772474618, |
| "grad_norm": 76.3125, |
| "learning_rate": 9.982555770343276e-07, |
| "loss": 131.7047, |
| "step": 11160 |
| }, |
| { |
| "epoch": 0.8939452863668592, |
| "grad_norm": 78.25, |
| "learning_rate": 9.982540139313119e-07, |
| "loss": 132.1118, |
| "step": 11170 |
| }, |
| { |
| "epoch": 0.8947455954862565, |
| "grad_norm": 77.5, |
| "learning_rate": 9.98252450828296e-07, |
| "loss": 130.6895, |
| "step": 11180 |
| }, |
| { |
| "epoch": 0.895545904605654, |
| "grad_norm": 74.75, |
| "learning_rate": 9.982508877252803e-07, |
| "loss": 130.8122, |
| "step": 11190 |
| }, |
| { |
| "epoch": 0.8963462137250513, |
| "grad_norm": 75.0, |
| "learning_rate": 9.982493246222645e-07, |
| "loss": 130.9431, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.8971465228444486, |
| "grad_norm": 75.375, |
| "learning_rate": 9.982477615192485e-07, |
| "loss": 131.6024, |
| "step": 11210 |
| }, |
| { |
| "epoch": 0.8979468319638461, |
| "grad_norm": 75.25, |
| "learning_rate": 9.982461984162327e-07, |
| "loss": 130.6127, |
| "step": 11220 |
| }, |
| { |
| "epoch": 0.8987471410832434, |
| "grad_norm": 84.375, |
| "learning_rate": 9.98244635313217e-07, |
| "loss": 132.7165, |
| "step": 11230 |
| }, |
| { |
| "epoch": 0.8995474502026408, |
| "grad_norm": 76.75, |
| "learning_rate": 9.982430722102012e-07, |
| "loss": 129.617, |
| "step": 11240 |
| }, |
| { |
| "epoch": 0.9003477593220381, |
| "grad_norm": 84.6875, |
| "learning_rate": 9.982415091071852e-07, |
| "loss": 130.9968, |
| "step": 11250 |
| }, |
| { |
| "epoch": 0.9011480684414355, |
| "grad_norm": 74.1875, |
| "learning_rate": 9.982399460041694e-07, |
| "loss": 130.6432, |
| "step": 11260 |
| }, |
| { |
| "epoch": 0.9019483775608329, |
| "grad_norm": 77.9375, |
| "learning_rate": 9.982383829011536e-07, |
| "loss": 131.2904, |
| "step": 11270 |
| }, |
| { |
| "epoch": 0.9027486866802302, |
| "grad_norm": 71.9375, |
| "learning_rate": 9.982368197981376e-07, |
| "loss": 131.8605, |
| "step": 11280 |
| }, |
| { |
| "epoch": 0.9035489957996276, |
| "grad_norm": 76.75, |
| "learning_rate": 9.982352566951218e-07, |
| "loss": 132.6155, |
| "step": 11290 |
| }, |
| { |
| "epoch": 0.904349304919025, |
| "grad_norm": 74.8125, |
| "learning_rate": 9.98233693592106e-07, |
| "loss": 130.2238, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.9051496140384223, |
| "grad_norm": 78.4375, |
| "learning_rate": 9.9823213048909e-07, |
| "loss": 131.5677, |
| "step": 11310 |
| }, |
| { |
| "epoch": 0.9059499231578197, |
| "grad_norm": 72.0, |
| "learning_rate": 9.982305673860743e-07, |
| "loss": 130.6105, |
| "step": 11320 |
| }, |
| { |
| "epoch": 0.9067502322772171, |
| "grad_norm": 74.375, |
| "learning_rate": 9.982290042830585e-07, |
| "loss": 130.9643, |
| "step": 11330 |
| }, |
| { |
| "epoch": 0.9075505413966144, |
| "grad_norm": 80.125, |
| "learning_rate": 9.982274411800427e-07, |
| "loss": 131.6026, |
| "step": 11340 |
| }, |
| { |
| "epoch": 0.9083508505160118, |
| "grad_norm": 79.1875, |
| "learning_rate": 9.98225878077027e-07, |
| "loss": 131.4425, |
| "step": 11350 |
| }, |
| { |
| "epoch": 0.9091511596354092, |
| "grad_norm": 76.875, |
| "learning_rate": 9.98224314974011e-07, |
| "loss": 131.8976, |
| "step": 11360 |
| }, |
| { |
| "epoch": 0.9099514687548066, |
| "grad_norm": 80.0625, |
| "learning_rate": 9.982227518709952e-07, |
| "loss": 131.6046, |
| "step": 11370 |
| }, |
| { |
| "epoch": 0.9107517778742039, |
| "grad_norm": 80.0625, |
| "learning_rate": 9.982211887679794e-07, |
| "loss": 130.2627, |
| "step": 11380 |
| }, |
| { |
| "epoch": 0.9115520869936012, |
| "grad_norm": 73.6875, |
| "learning_rate": 9.982196256649636e-07, |
| "loss": 131.1418, |
| "step": 11390 |
| }, |
| { |
| "epoch": 0.9123523961129987, |
| "grad_norm": 75.3125, |
| "learning_rate": 9.982180625619476e-07, |
| "loss": 130.4075, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.913152705232396, |
| "grad_norm": 81.3125, |
| "learning_rate": 9.982164994589318e-07, |
| "loss": 131.0209, |
| "step": 11410 |
| }, |
| { |
| "epoch": 0.9139530143517933, |
| "grad_norm": 76.0625, |
| "learning_rate": 9.98214936355916e-07, |
| "loss": 130.3062, |
| "step": 11420 |
| }, |
| { |
| "epoch": 0.9147533234711908, |
| "grad_norm": 73.0, |
| "learning_rate": 9.982133732529003e-07, |
| "loss": 131.0498, |
| "step": 11430 |
| }, |
| { |
| "epoch": 0.9155536325905881, |
| "grad_norm": 80.6875, |
| "learning_rate": 9.982118101498843e-07, |
| "loss": 130.2114, |
| "step": 11440 |
| }, |
| { |
| "epoch": 0.9163539417099855, |
| "grad_norm": 75.25, |
| "learning_rate": 9.982102470468685e-07, |
| "loss": 130.6461, |
| "step": 11450 |
| }, |
| { |
| "epoch": 0.9171542508293828, |
| "grad_norm": 82.5, |
| "learning_rate": 9.982086839438527e-07, |
| "loss": 131.5808, |
| "step": 11460 |
| }, |
| { |
| "epoch": 0.9179545599487802, |
| "grad_norm": 75.0625, |
| "learning_rate": 9.982071208408367e-07, |
| "loss": 131.6145, |
| "step": 11470 |
| }, |
| { |
| "epoch": 0.9187548690681776, |
| "grad_norm": 77.375, |
| "learning_rate": 9.98205557737821e-07, |
| "loss": 131.2061, |
| "step": 11480 |
| }, |
| { |
| "epoch": 0.9195551781875749, |
| "grad_norm": 82.75, |
| "learning_rate": 9.982039946348052e-07, |
| "loss": 130.0431, |
| "step": 11490 |
| }, |
| { |
| "epoch": 0.9203554873069724, |
| "grad_norm": 73.8125, |
| "learning_rate": 9.982024315317894e-07, |
| "loss": 131.8762, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.9211557964263697, |
| "grad_norm": 71.25, |
| "learning_rate": 9.982008684287736e-07, |
| "loss": 131.6012, |
| "step": 11510 |
| }, |
| { |
| "epoch": 0.921956105545767, |
| "grad_norm": 77.5, |
| "learning_rate": 9.981993053257576e-07, |
| "loss": 131.3987, |
| "step": 11520 |
| }, |
| { |
| "epoch": 0.9227564146651644, |
| "grad_norm": 79.3125, |
| "learning_rate": 9.981977422227418e-07, |
| "loss": 132.0234, |
| "step": 11530 |
| }, |
| { |
| "epoch": 0.9235567237845618, |
| "grad_norm": 81.0, |
| "learning_rate": 9.98196179119726e-07, |
| "loss": 132.7055, |
| "step": 11540 |
| }, |
| { |
| "epoch": 0.9243570329039592, |
| "grad_norm": 81.5, |
| "learning_rate": 9.981946160167102e-07, |
| "loss": 131.5491, |
| "step": 11550 |
| }, |
| { |
| "epoch": 0.9251573420233565, |
| "grad_norm": 81.3125, |
| "learning_rate": 9.981930529136943e-07, |
| "loss": 129.9155, |
| "step": 11560 |
| }, |
| { |
| "epoch": 0.9259576511427539, |
| "grad_norm": 77.5, |
| "learning_rate": 9.981914898106785e-07, |
| "loss": 131.0387, |
| "step": 11570 |
| }, |
| { |
| "epoch": 0.9267579602621513, |
| "grad_norm": 74.5625, |
| "learning_rate": 9.981899267076627e-07, |
| "loss": 129.5762, |
| "step": 11580 |
| }, |
| { |
| "epoch": 0.9275582693815486, |
| "grad_norm": 72.5, |
| "learning_rate": 9.981883636046467e-07, |
| "loss": 129.8288, |
| "step": 11590 |
| }, |
| { |
| "epoch": 0.9283585785009459, |
| "grad_norm": 74.6875, |
| "learning_rate": 9.98186800501631e-07, |
| "loss": 130.526, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.9291588876203434, |
| "grad_norm": 79.3125, |
| "learning_rate": 9.981852373986151e-07, |
| "loss": 130.7928, |
| "step": 11610 |
| }, |
| { |
| "epoch": 0.9299591967397407, |
| "grad_norm": 74.5625, |
| "learning_rate": 9.981836742955994e-07, |
| "loss": 130.5095, |
| "step": 11620 |
| }, |
| { |
| "epoch": 0.9307595058591381, |
| "grad_norm": 78.6875, |
| "learning_rate": 9.981821111925834e-07, |
| "loss": 130.1216, |
| "step": 11630 |
| }, |
| { |
| "epoch": 0.9315598149785355, |
| "grad_norm": 73.9375, |
| "learning_rate": 9.981805480895676e-07, |
| "loss": 130.3142, |
| "step": 11640 |
| }, |
| { |
| "epoch": 0.9323601240979328, |
| "grad_norm": 80.25, |
| "learning_rate": 9.981789849865518e-07, |
| "loss": 129.8451, |
| "step": 11650 |
| }, |
| { |
| "epoch": 0.9331604332173302, |
| "grad_norm": 77.75, |
| "learning_rate": 9.98177421883536e-07, |
| "loss": 131.5875, |
| "step": 11660 |
| }, |
| { |
| "epoch": 0.9339607423367275, |
| "grad_norm": 75.9375, |
| "learning_rate": 9.981758587805202e-07, |
| "loss": 131.1091, |
| "step": 11670 |
| }, |
| { |
| "epoch": 0.934761051456125, |
| "grad_norm": 79.4375, |
| "learning_rate": 9.981742956775042e-07, |
| "loss": 129.5825, |
| "step": 11680 |
| }, |
| { |
| "epoch": 0.9355613605755223, |
| "grad_norm": 78.375, |
| "learning_rate": 9.981727325744885e-07, |
| "loss": 131.5865, |
| "step": 11690 |
| }, |
| { |
| "epoch": 0.9363616696949196, |
| "grad_norm": 74.25, |
| "learning_rate": 9.981711694714727e-07, |
| "loss": 131.3969, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.9371619788143171, |
| "grad_norm": 82.5625, |
| "learning_rate": 9.98169606368457e-07, |
| "loss": 131.5427, |
| "step": 11710 |
| }, |
| { |
| "epoch": 0.9379622879337144, |
| "grad_norm": 81.125, |
| "learning_rate": 9.98168043265441e-07, |
| "loss": 131.2085, |
| "step": 11720 |
| }, |
| { |
| "epoch": 0.9387625970531117, |
| "grad_norm": 76.25, |
| "learning_rate": 9.981664801624251e-07, |
| "loss": 130.8618, |
| "step": 11730 |
| }, |
| { |
| "epoch": 0.9395629061725091, |
| "grad_norm": 75.0625, |
| "learning_rate": 9.981649170594093e-07, |
| "loss": 130.0715, |
| "step": 11740 |
| }, |
| { |
| "epoch": 0.9403632152919065, |
| "grad_norm": 76.4375, |
| "learning_rate": 9.981633539563933e-07, |
| "loss": 131.8718, |
| "step": 11750 |
| }, |
| { |
| "epoch": 0.9411635244113039, |
| "grad_norm": 78.1875, |
| "learning_rate": 9.981617908533776e-07, |
| "loss": 131.3843, |
| "step": 11760 |
| }, |
| { |
| "epoch": 0.9419638335307012, |
| "grad_norm": 75.3125, |
| "learning_rate": 9.981602277503618e-07, |
| "loss": 130.1011, |
| "step": 11770 |
| }, |
| { |
| "epoch": 0.9427641426500986, |
| "grad_norm": 78.0625, |
| "learning_rate": 9.98158664647346e-07, |
| "loss": 131.4609, |
| "step": 11780 |
| }, |
| { |
| "epoch": 0.943564451769496, |
| "grad_norm": 73.75, |
| "learning_rate": 9.9815710154433e-07, |
| "loss": 130.214, |
| "step": 11790 |
| }, |
| { |
| "epoch": 0.9443647608888933, |
| "grad_norm": 81.3125, |
| "learning_rate": 9.981555384413142e-07, |
| "loss": 132.272, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.9451650700082908, |
| "grad_norm": 76.0625, |
| "learning_rate": 9.981539753382984e-07, |
| "loss": 130.5801, |
| "step": 11810 |
| }, |
| { |
| "epoch": 0.9459653791276881, |
| "grad_norm": 71.3125, |
| "learning_rate": 9.981524122352827e-07, |
| "loss": 130.7302, |
| "step": 11820 |
| }, |
| { |
| "epoch": 0.9467656882470854, |
| "grad_norm": 84.0625, |
| "learning_rate": 9.981508491322667e-07, |
| "loss": 130.839, |
| "step": 11830 |
| }, |
| { |
| "epoch": 0.9475659973664828, |
| "grad_norm": 78.5, |
| "learning_rate": 9.981492860292509e-07, |
| "loss": 130.5891, |
| "step": 11840 |
| }, |
| { |
| "epoch": 0.9483663064858802, |
| "grad_norm": 78.4375, |
| "learning_rate": 9.98147722926235e-07, |
| "loss": 130.0701, |
| "step": 11850 |
| }, |
| { |
| "epoch": 0.9491666156052775, |
| "grad_norm": 76.1875, |
| "learning_rate": 9.981461598232193e-07, |
| "loss": 130.6237, |
| "step": 11860 |
| }, |
| { |
| "epoch": 0.9499669247246749, |
| "grad_norm": 79.625, |
| "learning_rate": 9.981445967202035e-07, |
| "loss": 131.0294, |
| "step": 11870 |
| }, |
| { |
| "epoch": 0.9507672338440722, |
| "grad_norm": 79.3125, |
| "learning_rate": 9.981430336171875e-07, |
| "loss": 132.2408, |
| "step": 11880 |
| }, |
| { |
| "epoch": 0.9515675429634697, |
| "grad_norm": 72.0625, |
| "learning_rate": 9.981414705141718e-07, |
| "loss": 131.3878, |
| "step": 11890 |
| }, |
| { |
| "epoch": 0.952367852082867, |
| "grad_norm": 75.5625, |
| "learning_rate": 9.98139907411156e-07, |
| "loss": 131.9279, |
| "step": 11900 |
| }, |
| { |
| "epoch": 0.9531681612022643, |
| "grad_norm": 86.5, |
| "learning_rate": 9.9813834430814e-07, |
| "loss": 130.3583, |
| "step": 11910 |
| }, |
| { |
| "epoch": 0.9539684703216618, |
| "grad_norm": 74.4375, |
| "learning_rate": 9.981367812051242e-07, |
| "loss": 130.4393, |
| "step": 11920 |
| }, |
| { |
| "epoch": 0.9547687794410591, |
| "grad_norm": 74.6875, |
| "learning_rate": 9.981352181021084e-07, |
| "loss": 129.8773, |
| "step": 11930 |
| }, |
| { |
| "epoch": 0.9555690885604565, |
| "grad_norm": 77.6875, |
| "learning_rate": 9.981336549990924e-07, |
| "loss": 130.8676, |
| "step": 11940 |
| }, |
| { |
| "epoch": 0.9563693976798538, |
| "grad_norm": 80.5, |
| "learning_rate": 9.981320918960767e-07, |
| "loss": 131.1644, |
| "step": 11950 |
| }, |
| { |
| "epoch": 0.9571697067992512, |
| "grad_norm": 81.125, |
| "learning_rate": 9.981305287930609e-07, |
| "loss": 131.0869, |
| "step": 11960 |
| }, |
| { |
| "epoch": 0.9579700159186486, |
| "grad_norm": 85.0, |
| "learning_rate": 9.98128965690045e-07, |
| "loss": 130.5896, |
| "step": 11970 |
| }, |
| { |
| "epoch": 0.9587703250380459, |
| "grad_norm": 72.625, |
| "learning_rate": 9.981274025870293e-07, |
| "loss": 131.133, |
| "step": 11980 |
| }, |
| { |
| "epoch": 0.9595706341574433, |
| "grad_norm": 77.625, |
| "learning_rate": 9.981258394840133e-07, |
| "loss": 131.9948, |
| "step": 11990 |
| }, |
| { |
| "epoch": 0.9603709432768407, |
| "grad_norm": 78.25, |
| "learning_rate": 9.981242763809975e-07, |
| "loss": 130.3504, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.961171252396238, |
| "grad_norm": 69.0625, |
| "learning_rate": 9.981227132779817e-07, |
| "loss": 131.8812, |
| "step": 12010 |
| }, |
| { |
| "epoch": 0.9619715615156355, |
| "grad_norm": 76.125, |
| "learning_rate": 9.98121150174966e-07, |
| "loss": 131.7717, |
| "step": 12020 |
| }, |
| { |
| "epoch": 0.9627718706350328, |
| "grad_norm": 76.6875, |
| "learning_rate": 9.981195870719502e-07, |
| "loss": 131.0739, |
| "step": 12030 |
| }, |
| { |
| "epoch": 0.9635721797544301, |
| "grad_norm": 79.5, |
| "learning_rate": 9.981180239689342e-07, |
| "loss": 132.3484, |
| "step": 12040 |
| }, |
| { |
| "epoch": 0.9643724888738275, |
| "grad_norm": 81.25, |
| "learning_rate": 9.981164608659184e-07, |
| "loss": 131.6032, |
| "step": 12050 |
| }, |
| { |
| "epoch": 0.9651727979932249, |
| "grad_norm": 80.375, |
| "learning_rate": 9.981148977629026e-07, |
| "loss": 131.8261, |
| "step": 12060 |
| }, |
| { |
| "epoch": 0.9659731071126223, |
| "grad_norm": 80.9375, |
| "learning_rate": 9.981133346598866e-07, |
| "loss": 129.7895, |
| "step": 12070 |
| }, |
| { |
| "epoch": 0.9667734162320196, |
| "grad_norm": 77.75, |
| "learning_rate": 9.981117715568709e-07, |
| "loss": 129.8964, |
| "step": 12080 |
| }, |
| { |
| "epoch": 0.967573725351417, |
| "grad_norm": 72.5, |
| "learning_rate": 9.98110208453855e-07, |
| "loss": 130.2092, |
| "step": 12090 |
| }, |
| { |
| "epoch": 0.9683740344708144, |
| "grad_norm": 70.75, |
| "learning_rate": 9.98108645350839e-07, |
| "loss": 130.064, |
| "step": 12100 |
| }, |
| { |
| "epoch": 0.9691743435902117, |
| "grad_norm": 82.3125, |
| "learning_rate": 9.981070822478233e-07, |
| "loss": 133.1195, |
| "step": 12110 |
| }, |
| { |
| "epoch": 0.9699746527096091, |
| "grad_norm": 73.375, |
| "learning_rate": 9.981055191448075e-07, |
| "loss": 131.6047, |
| "step": 12120 |
| }, |
| { |
| "epoch": 0.9707749618290065, |
| "grad_norm": 78.0625, |
| "learning_rate": 9.981039560417917e-07, |
| "loss": 131.0189, |
| "step": 12130 |
| }, |
| { |
| "epoch": 0.9715752709484038, |
| "grad_norm": 80.375, |
| "learning_rate": 9.98102392938776e-07, |
| "loss": 131.1126, |
| "step": 12140 |
| }, |
| { |
| "epoch": 0.9723755800678012, |
| "grad_norm": 74.4375, |
| "learning_rate": 9.9810082983576e-07, |
| "loss": 131.885, |
| "step": 12150 |
| }, |
| { |
| "epoch": 0.9731758891871986, |
| "grad_norm": 78.25, |
| "learning_rate": 9.980992667327442e-07, |
| "loss": 130.9126, |
| "step": 12160 |
| }, |
| { |
| "epoch": 0.9739761983065959, |
| "grad_norm": 83.875, |
| "learning_rate": 9.980977036297284e-07, |
| "loss": 131.2764, |
| "step": 12170 |
| }, |
| { |
| "epoch": 0.9747765074259933, |
| "grad_norm": 72.875, |
| "learning_rate": 9.980961405267126e-07, |
| "loss": 132.7158, |
| "step": 12180 |
| }, |
| { |
| "epoch": 0.9755768165453906, |
| "grad_norm": 77.25, |
| "learning_rate": 9.980945774236968e-07, |
| "loss": 131.1642, |
| "step": 12190 |
| }, |
| { |
| "epoch": 0.9763771256647881, |
| "grad_norm": 77.125, |
| "learning_rate": 9.980930143206808e-07, |
| "loss": 130.3661, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.9771774347841854, |
| "grad_norm": 81.25, |
| "learning_rate": 9.98091451217665e-07, |
| "loss": 132.4058, |
| "step": 12210 |
| }, |
| { |
| "epoch": 0.9779777439035827, |
| "grad_norm": 77.1875, |
| "learning_rate": 9.980898881146493e-07, |
| "loss": 131.1993, |
| "step": 12220 |
| }, |
| { |
| "epoch": 0.9787780530229802, |
| "grad_norm": 78.375, |
| "learning_rate": 9.980883250116333e-07, |
| "loss": 129.8341, |
| "step": 12230 |
| }, |
| { |
| "epoch": 0.9795783621423775, |
| "grad_norm": 82.3125, |
| "learning_rate": 9.980867619086175e-07, |
| "loss": 130.408, |
| "step": 12240 |
| }, |
| { |
| "epoch": 0.9803786712617749, |
| "grad_norm": 79.375, |
| "learning_rate": 9.980851988056017e-07, |
| "loss": 132.0676, |
| "step": 12250 |
| }, |
| { |
| "epoch": 0.9811789803811722, |
| "grad_norm": 75.4375, |
| "learning_rate": 9.980836357025857e-07, |
| "loss": 130.5135, |
| "step": 12260 |
| }, |
| { |
| "epoch": 0.9819792895005696, |
| "grad_norm": 79.75, |
| "learning_rate": 9.9808207259957e-07, |
| "loss": 129.7958, |
| "step": 12270 |
| }, |
| { |
| "epoch": 0.982779598619967, |
| "grad_norm": 78.8125, |
| "learning_rate": 9.980805094965542e-07, |
| "loss": 131.8359, |
| "step": 12280 |
| }, |
| { |
| "epoch": 0.9835799077393643, |
| "grad_norm": 73.75, |
| "learning_rate": 9.980789463935384e-07, |
| "loss": 130.6134, |
| "step": 12290 |
| }, |
| { |
| "epoch": 0.9843802168587616, |
| "grad_norm": 79.0, |
| "learning_rate": 9.980773832905224e-07, |
| "loss": 130.2782, |
| "step": 12300 |
| }, |
| { |
| "epoch": 0.9851805259781591, |
| "grad_norm": 74.0, |
| "learning_rate": 9.980758201875066e-07, |
| "loss": 130.808, |
| "step": 12310 |
| }, |
| { |
| "epoch": 0.9859808350975564, |
| "grad_norm": 73.0, |
| "learning_rate": 9.980742570844908e-07, |
| "loss": 130.4844, |
| "step": 12320 |
| }, |
| { |
| "epoch": 0.9867811442169538, |
| "grad_norm": 72.4375, |
| "learning_rate": 9.98072693981475e-07, |
| "loss": 131.225, |
| "step": 12330 |
| }, |
| { |
| "epoch": 0.9875814533363512, |
| "grad_norm": 78.875, |
| "learning_rate": 9.980711308784593e-07, |
| "loss": 132.9991, |
| "step": 12340 |
| }, |
| { |
| "epoch": 0.9883817624557485, |
| "grad_norm": 75.9375, |
| "learning_rate": 9.980695677754435e-07, |
| "loss": 131.7026, |
| "step": 12350 |
| }, |
| { |
| "epoch": 0.9891820715751459, |
| "grad_norm": 74.125, |
| "learning_rate": 9.980680046724275e-07, |
| "loss": 130.6302, |
| "step": 12360 |
| }, |
| { |
| "epoch": 0.9899823806945433, |
| "grad_norm": 80.75, |
| "learning_rate": 9.980664415694117e-07, |
| "loss": 129.955, |
| "step": 12370 |
| }, |
| { |
| "epoch": 0.9907826898139407, |
| "grad_norm": 70.125, |
| "learning_rate": 9.98064878466396e-07, |
| "loss": 130.9879, |
| "step": 12380 |
| }, |
| { |
| "epoch": 0.991582998933338, |
| "grad_norm": 79.75, |
| "learning_rate": 9.9806331536338e-07, |
| "loss": 130.061, |
| "step": 12390 |
| }, |
| { |
| "epoch": 0.9923833080527353, |
| "grad_norm": 75.375, |
| "learning_rate": 9.980617522603641e-07, |
| "loss": 129.8558, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.9931836171721328, |
| "grad_norm": 78.4375, |
| "learning_rate": 9.980601891573484e-07, |
| "loss": 129.8251, |
| "step": 12410 |
| }, |
| { |
| "epoch": 0.9939839262915301, |
| "grad_norm": 70.1875, |
| "learning_rate": 9.980586260543324e-07, |
| "loss": 130.4507, |
| "step": 12420 |
| }, |
| { |
| "epoch": 0.9947842354109274, |
| "grad_norm": 74.4375, |
| "learning_rate": 9.980570629513166e-07, |
| "loss": 131.6167, |
| "step": 12430 |
| }, |
| { |
| "epoch": 0.9955845445303249, |
| "grad_norm": 76.1875, |
| "learning_rate": 9.980554998483008e-07, |
| "loss": 132.0188, |
| "step": 12440 |
| }, |
| { |
| "epoch": 0.9963848536497222, |
| "grad_norm": 79.6875, |
| "learning_rate": 9.98053936745285e-07, |
| "loss": 130.7771, |
| "step": 12450 |
| }, |
| { |
| "epoch": 0.9971851627691196, |
| "grad_norm": 74.25, |
| "learning_rate": 9.98052373642269e-07, |
| "loss": 130.4612, |
| "step": 12460 |
| }, |
| { |
| "epoch": 0.9979854718885169, |
| "grad_norm": 78.0625, |
| "learning_rate": 9.980508105392532e-07, |
| "loss": 129.6428, |
| "step": 12470 |
| }, |
| { |
| "epoch": 0.9987857810079143, |
| "grad_norm": 77.6875, |
| "learning_rate": 9.980492474362375e-07, |
| "loss": 130.6133, |
| "step": 12480 |
| }, |
| { |
| "epoch": 0.9995860901273117, |
| "grad_norm": 77.75, |
| "learning_rate": 9.980476843332217e-07, |
| "loss": 131.8606, |
| "step": 12490 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 12495, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 2500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.450015163893783e+19, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|