{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 7.0, "eval_steps": 500, "global_step": 735, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04792332268370607, "grad_norm": 8.982871202392836, "learning_rate": 2.1621621621621623e-06, "loss": 0.4936, "loss_nan_ranks": 0, "loss_rank_avg": 0.1592181921005249, "step": 5, "valid_targets_mean": 8838.0, "valid_targets_min": 2815 }, { "epoch": 0.09584664536741214, "grad_norm": 4.869040341382149, "learning_rate": 4.864864864864866e-06, "loss": 0.4591, "loss_nan_ranks": 0, "loss_rank_avg": 0.14799830317497253, "step": 10, "valid_targets_mean": 8579.1, "valid_targets_min": 2498 }, { "epoch": 0.14376996805111822, "grad_norm": 1.7556018862586495, "learning_rate": 7.567567567567569e-06, "loss": 0.4137, "loss_nan_ranks": 0, "loss_rank_avg": 0.13692015409469604, "step": 15, "valid_targets_mean": 8986.4, "valid_targets_min": 3372 }, { "epoch": 0.19169329073482427, "grad_norm": 1.0418919912451101, "learning_rate": 1.027027027027027e-05, "loss": 0.3792, "loss_nan_ranks": 0, "loss_rank_avg": 0.11497873812913895, "step": 20, "valid_targets_mean": 8493.1, "valid_targets_min": 1758 }, { "epoch": 0.23961661341853036, "grad_norm": 0.621330940276866, "learning_rate": 1.2972972972972975e-05, "loss": 0.3418, "loss_nan_ranks": 0, "loss_rank_avg": 0.10854463279247284, "step": 25, "valid_targets_mean": 7958.0, "valid_targets_min": 2019 }, { "epoch": 0.28753993610223644, "grad_norm": 0.4964510949959199, "learning_rate": 1.5675675675675676e-05, "loss": 0.3153, "loss_nan_ranks": 0, "loss_rank_avg": 0.11034172028303146, "step": 30, "valid_targets_mean": 9250.5, "valid_targets_min": 1615 }, { "epoch": 0.3354632587859425, "grad_norm": 0.3600917256903157, "learning_rate": 1.8378378378378383e-05, "loss": 0.2935, "loss_nan_ranks": 0, "loss_rank_avg": 0.09477648138999939, "step": 35, "valid_targets_mean": 8652.3, "valid_targets_min": 3949 }, { "epoch": 0.38338658146964855, "grad_norm": 0.30301686045879966, "learning_rate": 2.1081081081081082e-05, "loss": 0.2636, "loss_nan_ranks": 0, "loss_rank_avg": 0.07665778696537018, "step": 40, "valid_targets_mean": 8658.3, "valid_targets_min": 2634 }, { "epoch": 0.43130990415335463, "grad_norm": 0.2505991304110098, "learning_rate": 2.378378378378379e-05, "loss": 0.242, "loss_nan_ranks": 0, "loss_rank_avg": 0.07965461909770966, "step": 45, "valid_targets_mean": 8387.6, "valid_targets_min": 1267 }, { "epoch": 0.4792332268370607, "grad_norm": 0.2026378613321421, "learning_rate": 2.6486486486486488e-05, "loss": 0.2276, "loss_nan_ranks": 0, "loss_rank_avg": 0.06772664189338684, "step": 50, "valid_targets_mean": 8567.6, "valid_targets_min": 2215 }, { "epoch": 0.5271565495207667, "grad_norm": 0.18063483795315363, "learning_rate": 2.918918918918919e-05, "loss": 0.2204, "loss_nan_ranks": 0, "loss_rank_avg": 0.07574020326137543, "step": 55, "valid_targets_mean": 9203.6, "valid_targets_min": 2349 }, { "epoch": 0.5750798722044729, "grad_norm": 0.18260057468719235, "learning_rate": 3.1891891891891894e-05, "loss": 0.2114, "loss_nan_ranks": 0, "loss_rank_avg": 0.0646074041724205, "step": 60, "valid_targets_mean": 7918.9, "valid_targets_min": 1966 }, { "epoch": 0.6230031948881789, "grad_norm": 0.1952944182498782, "learning_rate": 3.45945945945946e-05, "loss": 0.2049, "loss_nan_ranks": 0, "loss_rank_avg": 0.06607531756162643, "step": 65, "valid_targets_mean": 8708.2, "valid_targets_min": 4730 }, { "epoch": 0.670926517571885, "grad_norm": 0.1596272851527542, "learning_rate": 3.72972972972973e-05, "loss": 0.195, "loss_nan_ranks": 0, "loss_rank_avg": 0.06611350178718567, "step": 70, "valid_targets_mean": 9198.4, "valid_targets_min": 2913 }, { "epoch": 0.7188498402555911, "grad_norm": 0.18104797378120346, "learning_rate": 4e-05, "loss": 0.1905, "loss_nan_ranks": 0, "loss_rank_avg": 0.06400163471698761, "step": 75, "valid_targets_mean": 7796.8, "valid_targets_min": 2499 }, { "epoch": 0.7667731629392971, "grad_norm": 0.16498793730423283, "learning_rate": 3.999435301808432e-05, "loss": 0.192, "loss_nan_ranks": 0, "loss_rank_avg": 0.05754277855157852, "step": 80, "valid_targets_mean": 7951.4, "valid_targets_min": 3044 }, { "epoch": 0.8146964856230032, "grad_norm": 0.16971530607334578, "learning_rate": 3.997741526117775e-05, "loss": 0.1862, "loss_nan_ranks": 0, "loss_rank_avg": 0.06384151428937912, "step": 85, "valid_targets_mean": 8073.7, "valid_targets_min": 1933 }, { "epoch": 0.8626198083067093, "grad_norm": 0.16016325817445584, "learning_rate": 3.994919629400098e-05, "loss": 0.1776, "loss_nan_ranks": 0, "loss_rank_avg": 0.06105092912912369, "step": 90, "valid_targets_mean": 8430.1, "valid_targets_min": 3147 }, { "epoch": 0.9105431309904153, "grad_norm": 0.15777170003961027, "learning_rate": 3.990971205175375e-05, "loss": 0.1806, "loss_nan_ranks": 0, "loss_rank_avg": 0.057581670582294464, "step": 95, "valid_targets_mean": 8585.2, "valid_targets_min": 2840 }, { "epoch": 0.9584664536741214, "grad_norm": 0.18646622741641677, "learning_rate": 3.985898483111624e-05, "loss": 0.1755, "loss_nan_ranks": 0, "loss_rank_avg": 0.05864211171865463, "step": 100, "valid_targets_mean": 8463.8, "valid_targets_min": 3838 }, { "epoch": 1.0, "grad_norm": 0.2511356329717353, "learning_rate": 3.979704327765823e-05, "loss": 0.1729, "loss_nan_ranks": 0, "loss_rank_avg": 0.16539210081100464, "step": 105, "valid_targets_mean": 8758.9, "valid_targets_min": 3781 }, { "epoch": 1.0479233226837061, "grad_norm": 0.16279705622118554, "learning_rate": 3.972392236966291e-05, "loss": 0.1706, "loss_nan_ranks": 0, "loss_rank_avg": 0.053829021751880646, "step": 110, "valid_targets_mean": 8902.6, "valid_targets_min": 1913 }, { "epoch": 1.095846645367412, "grad_norm": 0.17163665454737814, "learning_rate": 3.963966339837482e-05, "loss": 0.1714, "loss_nan_ranks": 0, "loss_rank_avg": 0.05452179163694382, "step": 115, "valid_targets_mean": 8963.6, "valid_targets_min": 4027 }, { "epoch": 1.1437699680511182, "grad_norm": 0.15502231795052268, "learning_rate": 3.954431394468266e-05, "loss": 0.1709, "loss_nan_ranks": 0, "loss_rank_avg": 0.061408303678035736, "step": 120, "valid_targets_mean": 9384.2, "valid_targets_min": 4126 }, { "epoch": 1.1916932907348243, "grad_norm": 0.26911312131506865, "learning_rate": 3.943792785225049e-05, "loss": 0.1666, "loss_nan_ranks": 0, "loss_rank_avg": 0.053546734154224396, "step": 125, "valid_targets_mean": 8582.1, "valid_targets_min": 1267 }, { "epoch": 1.2396166134185305, "grad_norm": 0.1632755315156534, "learning_rate": 3.932056519711232e-05, "loss": 0.1631, "loss_nan_ranks": 0, "loss_rank_avg": 0.052455998957157135, "step": 130, "valid_targets_mean": 9007.6, "valid_targets_min": 1489 }, { "epoch": 1.2875399361022364, "grad_norm": 0.17105174576530605, "learning_rate": 3.919229225374726e-05, "loss": 0.1656, "loss_nan_ranks": 0, "loss_rank_avg": 0.04397791996598244, "step": 135, "valid_targets_mean": 7291.8, "valid_targets_min": 941 }, { "epoch": 1.3354632587859425, "grad_norm": 0.17198756870105245, "learning_rate": 3.9053181457654465e-05, "loss": 0.1625, "loss_nan_ranks": 0, "loss_rank_avg": 0.05939662456512451, "step": 140, "valid_targets_mean": 9653.4, "valid_targets_min": 2045 }, { "epoch": 1.3833865814696487, "grad_norm": 0.15757402041255247, "learning_rate": 3.89033113644489e-05, "loss": 0.1584, "loss_nan_ranks": 0, "loss_rank_avg": 0.05102524161338806, "step": 145, "valid_targets_mean": 9247.4, "valid_targets_min": 2404 }, { "epoch": 1.4313099041533546, "grad_norm": 0.1669340893696013, "learning_rate": 3.874276660550119e-05, "loss": 0.1621, "loss_nan_ranks": 0, "loss_rank_avg": 0.054257169365882874, "step": 150, "valid_targets_mean": 8428.7, "valid_targets_min": 1817 }, { "epoch": 1.4792332268370607, "grad_norm": 0.17930744302420296, "learning_rate": 3.857163784014636e-05, "loss": 0.1632, "loss_nan_ranks": 0, "loss_rank_avg": 0.05627815052866936, "step": 155, "valid_targets_mean": 9194.1, "valid_targets_min": 1771 }, { "epoch": 1.5271565495207668, "grad_norm": 0.16977270176183656, "learning_rate": 3.8390021704488735e-05, "loss": 0.1613, "loss_nan_ranks": 0, "loss_rank_avg": 0.05593028664588928, "step": 160, "valid_targets_mean": 8621.9, "valid_targets_min": 3475 }, { "epoch": 1.5750798722044728, "grad_norm": 0.17445879345229784, "learning_rate": 3.8198020756831694e-05, "loss": 0.1595, "loss_nan_ranks": 0, "loss_rank_avg": 0.055247336626052856, "step": 165, "valid_targets_mean": 8989.5, "valid_targets_min": 3428 }, { "epoch": 1.623003194888179, "grad_norm": 0.17465115639380263, "learning_rate": 3.799574341976314e-05, "loss": 0.1546, "loss_nan_ranks": 0, "loss_rank_avg": 0.05567849799990654, "step": 170, "valid_targets_mean": 9782.6, "valid_targets_min": 2450 }, { "epoch": 1.670926517571885, "grad_norm": 0.1734995913932172, "learning_rate": 3.778330391892952e-05, "loss": 0.1593, "loss_nan_ranks": 0, "loss_rank_avg": 0.053123295307159424, "step": 175, "valid_targets_mean": 8914.3, "valid_targets_min": 2891 }, { "epoch": 1.718849840255591, "grad_norm": 0.15804349256301092, "learning_rate": 3.7560822218532774e-05, "loss": 0.1572, "loss_nan_ranks": 0, "loss_rank_avg": 0.050238993018865585, "step": 180, "valid_targets_mean": 8918.0, "valid_targets_min": 4581 }, { "epoch": 1.766773162939297, "grad_norm": 0.18475759093632027, "learning_rate": 3.732842395358677e-05, "loss": 0.1562, "loss_nan_ranks": 0, "loss_rank_avg": 0.050228629261255264, "step": 185, "valid_targets_mean": 8194.7, "valid_targets_min": 3799 }, { "epoch": 1.8146964856230032, "grad_norm": 0.17207806449259455, "learning_rate": 3.708624035897144e-05, "loss": 0.1561, "loss_nan_ranks": 0, "loss_rank_avg": 0.05894997715950012, "step": 190, "valid_targets_mean": 8873.7, "valid_targets_min": 3497 }, { "epoch": 1.8626198083067091, "grad_norm": 0.160383974551531, "learning_rate": 3.68344081953247e-05, "loss": 0.1558, "loss_nan_ranks": 0, "loss_rank_avg": 0.05507740378379822, "step": 195, "valid_targets_mean": 9449.2, "valid_targets_min": 2927 }, { "epoch": 1.9105431309904153, "grad_norm": 0.1688923848526839, "learning_rate": 3.657306967181394e-05, "loss": 0.1574, "loss_nan_ranks": 0, "loss_rank_avg": 0.051344458013772964, "step": 200, "valid_targets_mean": 7713.0, "valid_targets_min": 2578 }, { "epoch": 1.9584664536741214, "grad_norm": 0.17407301357558894, "learning_rate": 3.630237236583077e-05, "loss": 0.153, "loss_nan_ranks": 0, "loss_rank_avg": 0.056261416524648666, "step": 205, "valid_targets_mean": 8383.5, "valid_targets_min": 4391 }, { "epoch": 2.0, "grad_norm": 0.24633784795849772, "learning_rate": 3.6022469139654345e-05, "loss": 0.157, "loss_nan_ranks": 0, "loss_rank_avg": 0.15247748792171478, "step": 210, "valid_targets_mean": 8627.0, "valid_targets_min": 2270 }, { "epoch": 2.047923322683706, "grad_norm": 0.17399949855938723, "learning_rate": 3.57335180541303e-05, "loss": 0.1475, "loss_nan_ranks": 0, "loss_rank_avg": 0.04970664530992508, "step": 215, "valid_targets_mean": 9049.2, "valid_targets_min": 2971 }, { "epoch": 2.0958466453674123, "grad_norm": 0.22794184274320198, "learning_rate": 3.543568227941408e-05, "loss": 0.1454, "loss_nan_ranks": 0, "loss_rank_avg": 0.05050533637404442, "step": 220, "valid_targets_mean": 8644.0, "valid_targets_min": 3723 }, { "epoch": 2.143769968051118, "grad_norm": 0.18806892774190778, "learning_rate": 3.512913000282905e-05, "loss": 0.1489, "loss_nan_ranks": 0, "loss_rank_avg": 0.05213526636362076, "step": 225, "valid_targets_mean": 8811.9, "valid_targets_min": 1989 }, { "epoch": 2.191693290734824, "grad_norm": 0.1555212602218601, "learning_rate": 3.481403433389142e-05, "loss": 0.1477, "loss_nan_ranks": 0, "loss_rank_avg": 0.04308829829096794, "step": 230, "valid_targets_mean": 8940.4, "valid_targets_min": 1986 }, { "epoch": 2.2396166134185305, "grad_norm": 0.15895262335449095, "learning_rate": 3.449057320655561e-05, "loss": 0.1477, "loss_nan_ranks": 0, "loss_rank_avg": 0.048327021300792694, "step": 235, "valid_targets_mean": 9288.6, "valid_targets_min": 2617 }, { "epoch": 2.2875399361022364, "grad_norm": 0.15793093041554393, "learning_rate": 3.415892927873527e-05, "loss": 0.1515, "loss_nan_ranks": 0, "loss_rank_avg": 0.04683384671807289, "step": 240, "valid_targets_mean": 8370.8, "valid_targets_min": 1504 }, { "epoch": 2.3354632587859427, "grad_norm": 0.14678394997302097, "learning_rate": 3.381928982915668e-05, "loss": 0.1453, "loss_nan_ranks": 0, "loss_rank_avg": 0.04515903443098068, "step": 245, "valid_targets_mean": 9194.8, "valid_targets_min": 4400 }, { "epoch": 2.3833865814696487, "grad_norm": 0.1603781979087474, "learning_rate": 3.3471846651602815e-05, "loss": 0.1474, "loss_nan_ranks": 0, "loss_rank_avg": 0.05253840982913971, "step": 250, "valid_targets_mean": 8766.1, "valid_targets_min": 3267 }, { "epoch": 2.4313099041533546, "grad_norm": 0.15879085539353155, "learning_rate": 3.31167959466077e-05, "loss": 0.1473, "loss_nan_ranks": 0, "loss_rank_avg": 0.05009625852108002, "step": 255, "valid_targets_mean": 8309.4, "valid_targets_min": 4013 }, { "epoch": 2.479233226837061, "grad_norm": 0.14424018717194137, "learning_rate": 3.275433821066237e-05, "loss": 0.1461, "loss_nan_ranks": 0, "loss_rank_avg": 0.04807288199663162, "step": 260, "valid_targets_mean": 9051.2, "valid_targets_min": 2047 }, { "epoch": 2.527156549520767, "grad_norm": 0.159320759658969, "learning_rate": 3.238467812299483e-05, "loss": 0.1491, "loss_nan_ranks": 0, "loss_rank_avg": 0.04604367911815643, "step": 265, "valid_targets_mean": 8164.8, "valid_targets_min": 1963 }, { "epoch": 2.5750798722044728, "grad_norm": 0.16789223040102477, "learning_rate": 3.200802442998807e-05, "loss": 0.1459, "loss_nan_ranks": 0, "loss_rank_avg": 0.048190124332904816, "step": 270, "valid_targets_mean": 9103.1, "valid_targets_min": 1210 }, { "epoch": 2.623003194888179, "grad_norm": 0.17379236030377787, "learning_rate": 3.1624589827301395e-05, "loss": 0.1463, "loss_nan_ranks": 0, "loss_rank_avg": 0.05079440772533417, "step": 275, "valid_targets_mean": 7991.2, "valid_targets_min": 3517 }, { "epoch": 2.670926517571885, "grad_norm": 0.18305013542111226, "learning_rate": 3.123459083976152e-05, "loss": 0.1463, "loss_nan_ranks": 0, "loss_rank_avg": 0.04407097026705742, "step": 280, "valid_targets_mean": 8356.0, "valid_targets_min": 2326 }, { "epoch": 2.718849840255591, "grad_norm": 0.1664728450197544, "learning_rate": 3.083824769909142e-05, "loss": 0.1482, "loss_nan_ranks": 0, "loss_rank_avg": 0.04852219298481941, "step": 285, "valid_targets_mean": 9024.8, "valid_targets_min": 2931 }, { "epoch": 2.7667731629392973, "grad_norm": 0.1578734610961178, "learning_rate": 3.0435784219545872e-05, "loss": 0.1493, "loss_nan_ranks": 0, "loss_rank_avg": 0.047358766198158264, "step": 290, "valid_targets_mean": 8802.1, "valid_targets_min": 2204 }, { "epoch": 2.8146964856230032, "grad_norm": 0.22755109653603342, "learning_rate": 3.0027427671523957e-05, "loss": 0.1465, "loss_nan_ranks": 0, "loss_rank_avg": 0.0490507110953331, "step": 295, "valid_targets_mean": 8245.7, "valid_targets_min": 3666 }, { "epoch": 2.862619808306709, "grad_norm": 0.16269867408736244, "learning_rate": 2.961340865322984e-05, "loss": 0.1459, "loss_nan_ranks": 0, "loss_rank_avg": 0.04793532192707062, "step": 300, "valid_targets_mean": 8419.0, "valid_targets_min": 973 }, { "epoch": 2.9105431309904155, "grad_norm": 0.1528249668702955, "learning_rate": 2.9193960960454446e-05, "loss": 0.1467, "loss_nan_ranks": 0, "loss_rank_avg": 0.043026067316532135, "step": 305, "valid_targets_mean": 8449.1, "valid_targets_min": 2645 }, { "epoch": 2.9584664536741214, "grad_norm": 0.18558974597394937, "learning_rate": 2.8769321454551327e-05, "loss": 0.147, "loss_nan_ranks": 0, "loss_rank_avg": 0.044257231056690216, "step": 310, "valid_targets_mean": 8645.2, "valid_targets_min": 3405 }, { "epoch": 3.0, "grad_norm": 0.2546650444205805, "learning_rate": 2.833972992868154e-05, "loss": 0.1413, "loss_nan_ranks": 0, "loss_rank_avg": 0.13152235746383667, "step": 315, "valid_targets_mean": 9198.5, "valid_targets_min": 2013 }, { "epoch": 3.047923322683706, "grad_norm": 0.16032522655906664, "learning_rate": 2.7905428972402872e-05, "loss": 0.1403, "loss_nan_ranks": 0, "loss_rank_avg": 0.04698067158460617, "step": 320, "valid_targets_mean": 8638.6, "valid_targets_min": 2123 }, { "epoch": 3.0958466453674123, "grad_norm": 0.15952495144596782, "learning_rate": 2.7466663834679905e-05, "loss": 0.141, "loss_nan_ranks": 0, "loss_rank_avg": 0.05140436440706253, "step": 325, "valid_targets_mean": 9252.7, "valid_targets_min": 4005 }, { "epoch": 3.143769968051118, "grad_norm": 0.17042242626456597, "learning_rate": 2.7023682285392445e-05, "loss": 0.1389, "loss_nan_ranks": 0, "loss_rank_avg": 0.047651275992393494, "step": 330, "valid_targets_mean": 8656.8, "valid_targets_min": 2922 }, { "epoch": 3.191693290734824, "grad_norm": 0.16125983283276313, "learning_rate": 2.657673447542028e-05, "loss": 0.1428, "loss_nan_ranks": 0, "loss_rank_avg": 0.048372238874435425, "step": 335, "valid_targets_mean": 9040.1, "valid_targets_min": 3603 }, { "epoch": 3.2396166134185305, "grad_norm": 0.15799040110371987, "learning_rate": 2.6126072795383416e-05, "loss": 0.1367, "loss_nan_ranks": 0, "loss_rank_avg": 0.047681644558906555, "step": 340, "valid_targets_mean": 9583.6, "valid_targets_min": 3097 }, { "epoch": 3.2875399361022364, "grad_norm": 0.14805208338883794, "learning_rate": 2.5671951733117587e-05, "loss": 0.1415, "loss_nan_ranks": 0, "loss_rank_avg": 0.050001755356788635, "step": 345, "valid_targets_mean": 9660.5, "valid_targets_min": 4266 }, { "epoch": 3.3354632587859427, "grad_norm": 0.16843665921293052, "learning_rate": 2.5214627729965396e-05, "loss": 0.1398, "loss_nan_ranks": 0, "loss_rank_avg": 0.04726799577474594, "step": 350, "valid_targets_mean": 9226.8, "valid_targets_min": 5487 }, { "epoch": 3.3833865814696487, "grad_norm": 0.19266516280316792, "learning_rate": 2.47543590359644e-05, "loss": 0.1386, "loss_nan_ranks": 0, "loss_rank_avg": 0.04282751679420471, "step": 355, "valid_targets_mean": 7584.4, "valid_targets_min": 2293 }, { "epoch": 3.4313099041533546, "grad_norm": 0.16156169603076698, "learning_rate": 2.4291405564013727e-05, "loss": 0.1398, "loss_nan_ranks": 0, "loss_rank_avg": 0.04496710002422333, "step": 360, "valid_targets_mean": 9012.9, "valid_targets_min": 2869 }, { "epoch": 3.479233226837061, "grad_norm": 0.16770901380790357, "learning_rate": 2.3826028743101763e-05, "loss": 0.1415, "loss_nan_ranks": 0, "loss_rank_avg": 0.043900176882743835, "step": 365, "valid_targets_mean": 7966.8, "valid_targets_min": 2050 }, { "epoch": 3.527156549520767, "grad_norm": 0.14984841759414239, "learning_rate": 2.3358491370677693e-05, "loss": 0.1386, "loss_nan_ranks": 0, "loss_rank_avg": 0.04852374643087387, "step": 370, "valid_targets_mean": 9445.3, "valid_targets_min": 4423 }, { "epoch": 3.5750798722044728, "grad_norm": 0.1599146740589782, "learning_rate": 2.2889057464250196e-05, "loss": 0.1384, "loss_nan_ranks": 0, "loss_rank_avg": 0.04656771942973137, "step": 375, "valid_targets_mean": 9080.2, "valid_targets_min": 1597 }, { "epoch": 3.623003194888179, "grad_norm": 0.15143561107067025, "learning_rate": 2.2417992112297293e-05, "loss": 0.1407, "loss_nan_ranks": 0, "loss_rank_avg": 0.04388592019677162, "step": 380, "valid_targets_mean": 8641.7, "valid_targets_min": 3392 }, { "epoch": 3.670926517571885, "grad_norm": 0.15149756386513208, "learning_rate": 2.1945561324571366e-05, "loss": 0.1379, "loss_nan_ranks": 0, "loss_rank_avg": 0.05352652445435524, "step": 385, "valid_targets_mean": 9100.2, "valid_targets_min": 2539 }, { "epoch": 3.718849840255591, "grad_norm": 0.1665088787853794, "learning_rate": 2.1472031881883856e-05, "loss": 0.1394, "loss_nan_ranks": 0, "loss_rank_avg": 0.05265359953045845, "step": 390, "valid_targets_mean": 9276.7, "valid_targets_min": 3839 }, { "epoch": 3.7667731629392973, "grad_norm": 0.14824283461917753, "learning_rate": 2.0997671185454714e-05, "loss": 0.1396, "loss_nan_ranks": 0, "loss_rank_avg": 0.046166472136974335, "step": 395, "valid_targets_mean": 8789.5, "valid_targets_min": 3494 }, { "epoch": 3.8146964856230032, "grad_norm": 0.15960953666875605, "learning_rate": 2.0522747105911378e-05, "loss": 0.1381, "loss_nan_ranks": 0, "loss_rank_avg": 0.04279767721891403, "step": 400, "valid_targets_mean": 7870.9, "valid_targets_min": 2682 }, { "epoch": 3.862619808306709, "grad_norm": 0.16843602717950168, "learning_rate": 2.0047527832022674e-05, "loss": 0.1378, "loss_nan_ranks": 0, "loss_rank_avg": 0.0413513258099556, "step": 405, "valid_targets_mean": 8123.9, "valid_targets_min": 1989 }, { "epoch": 3.9105431309904155, "grad_norm": 0.16005031955715257, "learning_rate": 1.9572281719253186e-05, "loss": 0.1373, "loss_nan_ranks": 0, "loss_rank_avg": 0.04267306998372078, "step": 410, "valid_targets_mean": 8465.4, "valid_targets_min": 2045 }, { "epoch": 3.9584664536741214, "grad_norm": 0.15809782385997903, "learning_rate": 1.909727713822333e-05, "loss": 0.1379, "loss_nan_ranks": 0, "loss_rank_avg": 0.04258957877755165, "step": 415, "valid_targets_mean": 7733.6, "valid_targets_min": 2233 }, { "epoch": 4.0, "grad_norm": 0.24269592632341946, "learning_rate": 1.8622782323161014e-05, "loss": 0.1343, "loss_nan_ranks": 0, "loss_rank_avg": 0.1285635381937027, "step": 420, "valid_targets_mean": 8452.1, "valid_targets_min": 2045 }, { "epoch": 4.047923322683706, "grad_norm": 0.15907993684353738, "learning_rate": 1.8149065220430197e-05, "loss": 0.1358, "loss_nan_ranks": 0, "loss_rank_avg": 0.04193146899342537, "step": 425, "valid_targets_mean": 8624.6, "valid_targets_min": 4420 }, { "epoch": 4.095846645367412, "grad_norm": 0.18925345564072582, "learning_rate": 1.7676393337222115e-05, "loss": 0.1355, "loss_nan_ranks": 0, "loss_rank_avg": 0.04186321049928665, "step": 430, "valid_targets_mean": 7756.4, "valid_targets_min": 1221 }, { "epoch": 4.143769968051118, "grad_norm": 0.16711423931591982, "learning_rate": 1.7205033590494426e-05, "loss": 0.1363, "loss_nan_ranks": 0, "loss_rank_avg": 0.047112271189689636, "step": 435, "valid_targets_mean": 9224.7, "valid_targets_min": 1487 }, { "epoch": 4.1916932907348246, "grad_norm": 0.17036236145352987, "learning_rate": 1.6735252156243675e-05, "loss": 0.1337, "loss_nan_ranks": 0, "loss_rank_avg": 0.04689479246735573, "step": 440, "valid_targets_mean": 8922.7, "valid_targets_min": 3916 }, { "epoch": 4.23961661341853, "grad_norm": 0.1608190220970628, "learning_rate": 1.6267314319196215e-05, "loss": 0.1324, "loss_nan_ranks": 0, "loss_rank_avg": 0.04334461688995361, "step": 445, "valid_targets_mean": 8931.1, "valid_targets_min": 3494 }, { "epoch": 4.287539936102236, "grad_norm": 0.15484652532421975, "learning_rate": 1.580148432300241e-05, "loss": 0.1363, "loss_nan_ranks": 0, "loss_rank_avg": 0.04521436244249344, "step": 450, "valid_targets_mean": 8411.7, "valid_targets_min": 2459 }, { "epoch": 4.335463258785943, "grad_norm": 0.15640888798523264, "learning_rate": 1.5338025221018668e-05, "loss": 0.1356, "loss_nan_ranks": 0, "loss_rank_avg": 0.04313844442367554, "step": 455, "valid_targets_mean": 7779.2, "valid_targets_min": 2404 }, { "epoch": 4.383386581469648, "grad_norm": 0.17735167228744403, "learning_rate": 1.4877198727761748e-05, "loss": 0.1337, "loss_nan_ranks": 0, "loss_rank_avg": 0.04428839683532715, "step": 460, "valid_targets_mean": 8799.6, "valid_targets_min": 3400 }, { "epoch": 4.431309904153355, "grad_norm": 0.15431579302592524, "learning_rate": 1.4419265071119038e-05, "loss": 0.1379, "loss_nan_ranks": 0, "loss_rank_avg": 0.04533102735877037, "step": 465, "valid_targets_mean": 8351.8, "valid_targets_min": 1615 }, { "epoch": 4.479233226837061, "grad_norm": 0.1569404780015899, "learning_rate": 1.3964482845398281e-05, "loss": 0.1331, "loss_nan_ranks": 0, "loss_rank_avg": 0.04367567598819733, "step": 470, "valid_targets_mean": 8522.4, "valid_targets_min": 2404 }, { "epoch": 4.527156549520766, "grad_norm": 0.1780710965354458, "learning_rate": 1.3513108865299907e-05, "loss": 0.1326, "loss_nan_ranks": 0, "loss_rank_avg": 0.043544650077819824, "step": 475, "valid_targets_mean": 9276.5, "valid_targets_min": 1840 }, { "epoch": 4.575079872204473, "grad_norm": 0.15959810688800943, "learning_rate": 1.3065398020894202e-05, "loss": 0.1337, "loss_nan_ranks": 0, "loss_rank_avg": 0.04500932991504669, "step": 480, "valid_targets_mean": 8126.5, "valid_targets_min": 2047 }, { "epoch": 4.623003194888179, "grad_norm": 0.17116807313960172, "learning_rate": 1.2621603133685343e-05, "loss": 0.1333, "loss_nan_ranks": 0, "loss_rank_avg": 0.04575769975781441, "step": 485, "valid_targets_mean": 8706.4, "valid_targets_min": 1597 }, { "epoch": 4.6709265175718855, "grad_norm": 0.17715013972093857, "learning_rate": 1.218197481384356e-05, "loss": 0.134, "loss_nan_ranks": 0, "loss_rank_avg": 0.048376478254795074, "step": 490, "valid_targets_mean": 8893.8, "valid_targets_min": 4343 }, { "epoch": 4.718849840255591, "grad_norm": 0.1570645103662247, "learning_rate": 1.1746761318686044e-05, "loss": 0.1355, "loss_nan_ranks": 0, "loss_rank_avg": 0.044359538704156876, "step": 495, "valid_targets_mean": 9139.7, "valid_targets_min": 3056 }, { "epoch": 4.766773162939297, "grad_norm": 0.15423551749596906, "learning_rate": 1.1316208412486443e-05, "loss": 0.1313, "loss_nan_ranks": 0, "loss_rank_avg": 0.042892444878816605, "step": 500, "valid_targets_mean": 8499.4, "valid_targets_min": 1884 }, { "epoch": 4.814696485623003, "grad_norm": 0.16453480642681878, "learning_rate": 1.0890559227692265e-05, "loss": 0.1364, "loss_nan_ranks": 0, "loss_rank_avg": 0.04749216139316559, "step": 505, "valid_targets_mean": 8283.1, "valid_targets_min": 2513 }, { "epoch": 4.862619808306709, "grad_norm": 0.1664740998586081, "learning_rate": 1.0470054127628411e-05, "loss": 0.1322, "loss_nan_ranks": 0, "loss_rank_avg": 0.04286997765302658, "step": 510, "valid_targets_mean": 8387.6, "valid_targets_min": 1679 }, { "epoch": 4.9105431309904155, "grad_norm": 0.15516781277136965, "learning_rate": 1.0054930570764427e-05, "loss": 0.1327, "loss_nan_ranks": 0, "loss_rank_avg": 0.045819588005542755, "step": 515, "valid_targets_mean": 9220.8, "valid_targets_min": 3475 }, { "epoch": 4.958466453674122, "grad_norm": 0.1452771278632104, "learning_rate": 9.645422976622154e-06, "loss": 0.1313, "loss_nan_ranks": 0, "loss_rank_avg": 0.04815547540783882, "step": 520, "valid_targets_mean": 9208.9, "valid_targets_min": 3189 }, { "epoch": 5.0, "grad_norm": 0.2436229369532187, "learning_rate": 9.241762593399437e-06, "loss": 0.1322, "loss_nan_ranks": 0, "loss_rank_avg": 0.1331784427165985, "step": 525, "valid_targets_mean": 8954.4, "valid_targets_min": 2293 }, { "epoch": 5.047923322683706, "grad_norm": 0.1569691647374951, "learning_rate": 8.844177367384689e-06, "loss": 0.1287, "loss_nan_ranks": 0, "loss_rank_avg": 0.04233275726437569, "step": 530, "valid_targets_mean": 9024.9, "valid_targets_min": 3046 }, { "epoch": 5.095846645367412, "grad_norm": 0.1516535037265207, "learning_rate": 8.452891814236037e-06, "loss": 0.1319, "loss_nan_ranks": 0, "loss_rank_avg": 0.04785648733377457, "step": 535, "valid_targets_mean": 8453.7, "valid_targets_min": 1823 }, { "epoch": 5.143769968051118, "grad_norm": 0.1545110306606347, "learning_rate": 8.068126892197728e-06, "loss": 0.1327, "loss_nan_ranks": 0, "loss_rank_avg": 0.04464545473456383, "step": 540, "valid_targets_mean": 8877.0, "valid_targets_min": 4783 }, { "epoch": 5.1916932907348246, "grad_norm": 0.15588257021266194, "learning_rate": 7.690099877325419e-06, "loss": 0.1317, "loss_nan_ranks": 0, "loss_rank_avg": 0.04059495031833649, "step": 545, "valid_targets_mean": 8231.1, "valid_targets_min": 4328 }, { "epoch": 5.23961661341853, "grad_norm": 0.16029539285883082, "learning_rate": 7.319024240790768e-06, "loss": 0.1323, "loss_nan_ranks": 0, "loss_rank_avg": 0.04444342479109764, "step": 550, "valid_targets_mean": 8885.3, "valid_targets_min": 1621 }, { "epoch": 5.287539936102236, "grad_norm": 0.15541690395010793, "learning_rate": 6.955109528334667e-06, "loss": 0.1295, "loss_nan_ranks": 0, "loss_rank_avg": 0.04558786749839783, "step": 555, "valid_targets_mean": 8818.6, "valid_targets_min": 4804 }, { "epoch": 5.335463258785943, "grad_norm": 0.1632813232184168, "learning_rate": 6.59856124193712e-06, "loss": 0.1307, "loss_nan_ranks": 0, "loss_rank_avg": 0.04429105669260025, "step": 560, "valid_targets_mean": 8736.4, "valid_targets_min": 3477 }, { "epoch": 5.383386581469648, "grad_norm": 0.15319164306426114, "learning_rate": 6.249580723770665e-06, "loss": 0.1295, "loss_nan_ranks": 0, "loss_rank_avg": 0.040787823498249054, "step": 565, "valid_targets_mean": 8843.0, "valid_targets_min": 2326 }, { "epoch": 5.431309904153355, "grad_norm": 0.15092410861061442, "learning_rate": 5.908365042502801e-06, "loss": 0.1314, "loss_nan_ranks": 0, "loss_rank_avg": 0.04534231498837471, "step": 570, "valid_targets_mean": 9223.4, "valid_targets_min": 3669 }, { "epoch": 5.479233226837061, "grad_norm": 0.16472723067001716, "learning_rate": 5.5751068820116784e-06, "loss": 0.1307, "loss_nan_ranks": 0, "loss_rank_avg": 0.04310306906700134, "step": 575, "valid_targets_mean": 8187.3, "valid_targets_min": 3898 }, { "epoch": 5.527156549520766, "grad_norm": 0.15749687446681768, "learning_rate": 5.24999443257785e-06, "loss": 0.128, "loss_nan_ranks": 0, "loss_rank_avg": 0.04516970366239548, "step": 580, "valid_targets_mean": 8733.0, "valid_targets_min": 4982 }, { "epoch": 5.575079872204473, "grad_norm": 0.16061448332897996, "learning_rate": 4.9332112846135664e-06, "loss": 0.1329, "loss_nan_ranks": 0, "loss_rank_avg": 0.04158013314008713, "step": 585, "valid_targets_mean": 8391.5, "valid_targets_min": 2404 }, { "epoch": 5.623003194888179, "grad_norm": 0.1521419599103487, "learning_rate": 4.624936324989602e-06, "loss": 0.1321, "loss_nan_ranks": 0, "loss_rank_avg": 0.04744524508714676, "step": 590, "valid_targets_mean": 8968.4, "valid_targets_min": 3648 }, { "epoch": 5.6709265175718855, "grad_norm": 0.1570326671883014, "learning_rate": 4.325343636018165e-06, "loss": 0.129, "loss_nan_ranks": 0, "loss_rank_avg": 0.04405239224433899, "step": 595, "valid_targets_mean": 8272.5, "valid_targets_min": 2861 }, { "epoch": 5.718849840255591, "grad_norm": 0.1553455532119064, "learning_rate": 4.0346023971489215e-06, "loss": 0.1348, "loss_nan_ranks": 0, "loss_rank_avg": 0.040806423872709274, "step": 600, "valid_targets_mean": 7626.4, "valid_targets_min": 2787 }, { "epoch": 5.766773162939297, "grad_norm": 0.16542431175580907, "learning_rate": 3.752876789433677e-06, "loss": 0.1289, "loss_nan_ranks": 0, "loss_rank_avg": 0.041984084993600845, "step": 605, "valid_targets_mean": 7940.9, "valid_targets_min": 1813 }, { "epoch": 5.814696485623003, "grad_norm": 0.15600260550531508, "learning_rate": 3.480325902813624e-06, "loss": 0.1301, "loss_nan_ranks": 0, "loss_rank_avg": 0.042893338948488235, "step": 610, "valid_targets_mean": 9183.9, "valid_targets_min": 2349 }, { "epoch": 5.862619808306709, "grad_norm": 0.14503907630842894, "learning_rate": 3.2171036462815563e-06, "loss": 0.1296, "loss_nan_ranks": 0, "loss_rank_avg": 0.04696850851178169, "step": 615, "valid_targets_mean": 9935.0, "valid_targets_min": 3645 }, { "epoch": 5.9105431309904155, "grad_norm": 0.1460369617623546, "learning_rate": 2.9633586609697086e-06, "loss": 0.133, "loss_nan_ranks": 0, "loss_rank_avg": 0.04132133349776268, "step": 620, "valid_targets_mean": 8354.1, "valid_targets_min": 1863 }, { "epoch": 5.958466453674122, "grad_norm": 0.14429256697612128, "learning_rate": 2.7192342362124048e-06, "loss": 0.1318, "loss_nan_ranks": 0, "loss_rank_avg": 0.04253305494785309, "step": 625, "valid_targets_mean": 8797.8, "valid_targets_min": 2888 }, { "epoch": 6.0, "grad_norm": 0.24017089407840048, "learning_rate": 2.4848682286308346e-06, "loss": 0.1291, "loss_nan_ranks": 0, "loss_rank_avg": 0.11856501549482346, "step": 630, "valid_targets_mean": 9350.8, "valid_targets_min": 2204 }, { "epoch": 6.047923322683706, "grad_norm": 0.14604381915322243, "learning_rate": 2.260392984285633e-06, "loss": 0.1264, "loss_nan_ranks": 0, "loss_rank_avg": 0.04177611321210861, "step": 635, "valid_targets_mean": 8590.9, "valid_targets_min": 1520 }, { "epoch": 6.095846645367412, "grad_norm": 0.15105207764952777, "learning_rate": 2.0459352639413343e-06, "loss": 0.1307, "loss_nan_ranks": 0, "loss_rank_avg": 0.036490269005298615, "step": 640, "valid_targets_mean": 8303.5, "valid_targets_min": 983 }, { "epoch": 6.143769968051118, "grad_norm": 0.14365857813801858, "learning_rate": 1.841616171484797e-06, "loss": 0.1289, "loss_nan_ranks": 0, "loss_rank_avg": 0.03588128834962845, "step": 645, "valid_targets_mean": 7843.5, "valid_targets_min": 2037 }, { "epoch": 6.1916932907348246, "grad_norm": 0.17040842356447042, "learning_rate": 1.6475510855380195e-06, "loss": 0.1281, "loss_nan_ranks": 0, "loss_rank_avg": 0.03724232316017151, "step": 650, "valid_targets_mean": 8363.8, "valid_targets_min": 2404 }, { "epoch": 6.23961661341853, "grad_norm": 0.14563485589355804, "learning_rate": 1.4638495943040854e-06, "loss": 0.1319, "loss_nan_ranks": 0, "loss_rank_avg": 0.0440392792224884, "step": 655, "valid_targets_mean": 9282.5, "valid_targets_min": 1989 }, { "epoch": 6.287539936102236, "grad_norm": 0.15135495829792323, "learning_rate": 1.2906154336828913e-06, "loss": 0.1294, "loss_nan_ranks": 0, "loss_rank_avg": 0.044929858297109604, "step": 660, "valid_targets_mean": 9016.3, "valid_targets_min": 3426 }, { "epoch": 6.335463258785943, "grad_norm": 0.1512474724189636, "learning_rate": 1.1279464286916508e-06, "loss": 0.1272, "loss_nan_ranks": 0, "loss_rank_avg": 0.043425947427749634, "step": 665, "valid_targets_mean": 8946.5, "valid_targets_min": 1626 }, { "epoch": 6.383386581469648, "grad_norm": 0.16216185730198185, "learning_rate": 9.759344382233048e-07, "loss": 0.1299, "loss_nan_ranks": 0, "loss_rank_avg": 0.046256884932518005, "step": 670, "valid_targets_mean": 7957.6, "valid_targets_min": 2969 }, { "epoch": 6.431309904153355, "grad_norm": 0.15052277362683217, "learning_rate": 8.34665303173976e-07, "loss": 0.1296, "loss_nan_ranks": 0, "loss_rank_avg": 0.04201345890760422, "step": 675, "valid_targets_mean": 8329.0, "valid_targets_min": 2717 }, { "epoch": 6.479233226837061, "grad_norm": 0.1465602776227053, "learning_rate": 7.042187979687432e-07, "loss": 0.1302, "loss_nan_ranks": 0, "loss_rank_avg": 0.03988049924373627, "step": 680, "valid_targets_mean": 8283.2, "valid_targets_min": 2349 }, { "epoch": 6.527156549520766, "grad_norm": 0.14185496391585614, "learning_rate": 5.846685855131929e-07, "loss": 0.1301, "loss_nan_ranks": 0, "loss_rank_avg": 0.040278732776641846, "step": 685, "valid_targets_mean": 9115.4, "valid_targets_min": 3979 }, { "epoch": 6.575079872204473, "grad_norm": 0.1561692679171069, "learning_rate": 4.760821755961065e-07, "loss": 0.131, "loss_nan_ranks": 0, "loss_rank_avg": 0.04538102447986603, "step": 690, "valid_targets_mean": 8077.0, "valid_targets_min": 3672 }, { "epoch": 6.623003194888179, "grad_norm": 0.1505424403302318, "learning_rate": 3.7852088676678665e-07, "loss": 0.1305, "loss_nan_ranks": 0, "loss_rank_avg": 0.04282670468091965, "step": 695, "valid_targets_mean": 8324.1, "valid_targets_min": 2674 }, { "epoch": 6.6709265175718855, "grad_norm": 0.1530107403327867, "learning_rate": 2.920398117086043e-07, "loss": 0.1277, "loss_nan_ranks": 0, "loss_rank_avg": 0.04476252198219299, "step": 700, "valid_targets_mean": 8553.5, "valid_targets_min": 3077 }, { "epoch": 6.718849840255591, "grad_norm": 0.1451392180151871, "learning_rate": 2.1668778612825347e-07, "loss": 0.1289, "loss_nan_ranks": 0, "loss_rank_avg": 0.04205950349569321, "step": 705, "valid_targets_mean": 8100.0, "valid_targets_min": 2628 }, { "epoch": 6.766773162939297, "grad_norm": 0.1540835400434045, "learning_rate": 1.5250736117830455e-07, "loss": 0.1307, "loss_nan_ranks": 0, "loss_rank_avg": 0.04908519983291626, "step": 710, "valid_targets_mean": 8852.7, "valid_targets_min": 1823 }, { "epoch": 6.814696485623003, "grad_norm": 0.1511755346652257, "learning_rate": 9.953477942866052e-08, "loss": 0.1336, "loss_nan_ranks": 0, "loss_rank_avg": 0.047512397170066833, "step": 715, "valid_targets_mean": 8784.2, "valid_targets_min": 2941 }, { "epoch": 6.862619808306709, "grad_norm": 0.14155551457486815, "learning_rate": 5.779995440044594e-08, "loss": 0.1285, "loss_nan_ranks": 0, "loss_rank_avg": 0.04104035720229149, "step": 720, "valid_targets_mean": 9481.8, "valid_targets_min": 3244 }, { "epoch": 6.9105431309904155, "grad_norm": 0.13812752928008018, "learning_rate": 2.7326453673872653e-08, "loss": 0.1312, "loss_nan_ranks": 0, "loss_rank_avg": 0.04090867191553116, "step": 725, "valid_targets_mean": 8866.8, "valid_targets_min": 3547 }, { "epoch": 6.958466453674122, "grad_norm": 0.1527802388811017, "learning_rate": 8.131485579692121e-09, "loss": 0.1275, "loss_nan_ranks": 0, "loss_rank_avg": 0.04462620988488197, "step": 730, "valid_targets_mean": 9477.2, "valid_targets_min": 4703 }, { "epoch": 7.0, "grad_norm": 0.27520263662765626, "learning_rate": 2.2588948167756586e-10, "loss": 0.1299, "loss_nan_ranks": 0, "loss_rank_avg": 0.13444499671459198, "step": 735, "valid_targets_mean": 8868.2, "valid_targets_min": 4152 }, { "epoch": 7.0, "step": 735, "total_flos": 5.441373116175483e+18, "train_loss": 0.0, "train_runtime": 1.1873, "train_samples_per_second": 58956.554, "train_steps_per_second": 619.044 } ], "logging_steps": 5, "max_steps": 735, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 300, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.441373116175483e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }