coderforge-10000__Qwen3-8B / trainer_state.json
penfever's picture
Add files using upload-large-folder tool
63d1e43 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 7.0,
"eval_steps": 500,
"global_step": 735,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04792332268370607,
"grad_norm": 8.982871202392836,
"learning_rate": 2.1621621621621623e-06,
"loss": 0.4936,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1592181921005249,
"step": 5,
"valid_targets_mean": 8838.0,
"valid_targets_min": 2815
},
{
"epoch": 0.09584664536741214,
"grad_norm": 4.869040341382149,
"learning_rate": 4.864864864864866e-06,
"loss": 0.4591,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14799830317497253,
"step": 10,
"valid_targets_mean": 8579.1,
"valid_targets_min": 2498
},
{
"epoch": 0.14376996805111822,
"grad_norm": 1.7556018862586495,
"learning_rate": 7.567567567567569e-06,
"loss": 0.4137,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13692015409469604,
"step": 15,
"valid_targets_mean": 8986.4,
"valid_targets_min": 3372
},
{
"epoch": 0.19169329073482427,
"grad_norm": 1.0418919912451101,
"learning_rate": 1.027027027027027e-05,
"loss": 0.3792,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11497873812913895,
"step": 20,
"valid_targets_mean": 8493.1,
"valid_targets_min": 1758
},
{
"epoch": 0.23961661341853036,
"grad_norm": 0.621330940276866,
"learning_rate": 1.2972972972972975e-05,
"loss": 0.3418,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10854463279247284,
"step": 25,
"valid_targets_mean": 7958.0,
"valid_targets_min": 2019
},
{
"epoch": 0.28753993610223644,
"grad_norm": 0.4964510949959199,
"learning_rate": 1.5675675675675676e-05,
"loss": 0.3153,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11034172028303146,
"step": 30,
"valid_targets_mean": 9250.5,
"valid_targets_min": 1615
},
{
"epoch": 0.3354632587859425,
"grad_norm": 0.3600917256903157,
"learning_rate": 1.8378378378378383e-05,
"loss": 0.2935,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09477648138999939,
"step": 35,
"valid_targets_mean": 8652.3,
"valid_targets_min": 3949
},
{
"epoch": 0.38338658146964855,
"grad_norm": 0.30301686045879966,
"learning_rate": 2.1081081081081082e-05,
"loss": 0.2636,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07665778696537018,
"step": 40,
"valid_targets_mean": 8658.3,
"valid_targets_min": 2634
},
{
"epoch": 0.43130990415335463,
"grad_norm": 0.2505991304110098,
"learning_rate": 2.378378378378379e-05,
"loss": 0.242,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07965461909770966,
"step": 45,
"valid_targets_mean": 8387.6,
"valid_targets_min": 1267
},
{
"epoch": 0.4792332268370607,
"grad_norm": 0.2026378613321421,
"learning_rate": 2.6486486486486488e-05,
"loss": 0.2276,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06772664189338684,
"step": 50,
"valid_targets_mean": 8567.6,
"valid_targets_min": 2215
},
{
"epoch": 0.5271565495207667,
"grad_norm": 0.18063483795315363,
"learning_rate": 2.918918918918919e-05,
"loss": 0.2204,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07574020326137543,
"step": 55,
"valid_targets_mean": 9203.6,
"valid_targets_min": 2349
},
{
"epoch": 0.5750798722044729,
"grad_norm": 0.18260057468719235,
"learning_rate": 3.1891891891891894e-05,
"loss": 0.2114,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0646074041724205,
"step": 60,
"valid_targets_mean": 7918.9,
"valid_targets_min": 1966
},
{
"epoch": 0.6230031948881789,
"grad_norm": 0.1952944182498782,
"learning_rate": 3.45945945945946e-05,
"loss": 0.2049,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06607531756162643,
"step": 65,
"valid_targets_mean": 8708.2,
"valid_targets_min": 4730
},
{
"epoch": 0.670926517571885,
"grad_norm": 0.1596272851527542,
"learning_rate": 3.72972972972973e-05,
"loss": 0.195,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06611350178718567,
"step": 70,
"valid_targets_mean": 9198.4,
"valid_targets_min": 2913
},
{
"epoch": 0.7188498402555911,
"grad_norm": 0.18104797378120346,
"learning_rate": 4e-05,
"loss": 0.1905,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06400163471698761,
"step": 75,
"valid_targets_mean": 7796.8,
"valid_targets_min": 2499
},
{
"epoch": 0.7667731629392971,
"grad_norm": 0.16498793730423283,
"learning_rate": 3.999435301808432e-05,
"loss": 0.192,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05754277855157852,
"step": 80,
"valid_targets_mean": 7951.4,
"valid_targets_min": 3044
},
{
"epoch": 0.8146964856230032,
"grad_norm": 0.16971530607334578,
"learning_rate": 3.997741526117775e-05,
"loss": 0.1862,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06384151428937912,
"step": 85,
"valid_targets_mean": 8073.7,
"valid_targets_min": 1933
},
{
"epoch": 0.8626198083067093,
"grad_norm": 0.16016325817445584,
"learning_rate": 3.994919629400098e-05,
"loss": 0.1776,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06105092912912369,
"step": 90,
"valid_targets_mean": 8430.1,
"valid_targets_min": 3147
},
{
"epoch": 0.9105431309904153,
"grad_norm": 0.15777170003961027,
"learning_rate": 3.990971205175375e-05,
"loss": 0.1806,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.057581670582294464,
"step": 95,
"valid_targets_mean": 8585.2,
"valid_targets_min": 2840
},
{
"epoch": 0.9584664536741214,
"grad_norm": 0.18646622741641677,
"learning_rate": 3.985898483111624e-05,
"loss": 0.1755,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05864211171865463,
"step": 100,
"valid_targets_mean": 8463.8,
"valid_targets_min": 3838
},
{
"epoch": 1.0,
"grad_norm": 0.2511356329717353,
"learning_rate": 3.979704327765823e-05,
"loss": 0.1729,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16539210081100464,
"step": 105,
"valid_targets_mean": 8758.9,
"valid_targets_min": 3781
},
{
"epoch": 1.0479233226837061,
"grad_norm": 0.16279705622118554,
"learning_rate": 3.972392236966291e-05,
"loss": 0.1706,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.053829021751880646,
"step": 110,
"valid_targets_mean": 8902.6,
"valid_targets_min": 1913
},
{
"epoch": 1.095846645367412,
"grad_norm": 0.17163665454737814,
"learning_rate": 3.963966339837482e-05,
"loss": 0.1714,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05452179163694382,
"step": 115,
"valid_targets_mean": 8963.6,
"valid_targets_min": 4027
},
{
"epoch": 1.1437699680511182,
"grad_norm": 0.15502231795052268,
"learning_rate": 3.954431394468266e-05,
"loss": 0.1709,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.061408303678035736,
"step": 120,
"valid_targets_mean": 9384.2,
"valid_targets_min": 4126
},
{
"epoch": 1.1916932907348243,
"grad_norm": 0.26911312131506865,
"learning_rate": 3.943792785225049e-05,
"loss": 0.1666,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.053546734154224396,
"step": 125,
"valid_targets_mean": 8582.1,
"valid_targets_min": 1267
},
{
"epoch": 1.2396166134185305,
"grad_norm": 0.1632755315156534,
"learning_rate": 3.932056519711232e-05,
"loss": 0.1631,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.052455998957157135,
"step": 130,
"valid_targets_mean": 9007.6,
"valid_targets_min": 1489
},
{
"epoch": 1.2875399361022364,
"grad_norm": 0.17105174576530605,
"learning_rate": 3.919229225374726e-05,
"loss": 0.1656,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04397791996598244,
"step": 135,
"valid_targets_mean": 7291.8,
"valid_targets_min": 941
},
{
"epoch": 1.3354632587859425,
"grad_norm": 0.17198756870105245,
"learning_rate": 3.9053181457654465e-05,
"loss": 0.1625,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05939662456512451,
"step": 140,
"valid_targets_mean": 9653.4,
"valid_targets_min": 2045
},
{
"epoch": 1.3833865814696487,
"grad_norm": 0.15757402041255247,
"learning_rate": 3.89033113644489e-05,
"loss": 0.1584,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05102524161338806,
"step": 145,
"valid_targets_mean": 9247.4,
"valid_targets_min": 2404
},
{
"epoch": 1.4313099041533546,
"grad_norm": 0.1669340893696013,
"learning_rate": 3.874276660550119e-05,
"loss": 0.1621,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.054257169365882874,
"step": 150,
"valid_targets_mean": 8428.7,
"valid_targets_min": 1817
},
{
"epoch": 1.4792332268370607,
"grad_norm": 0.17930744302420296,
"learning_rate": 3.857163784014636e-05,
"loss": 0.1632,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05627815052866936,
"step": 155,
"valid_targets_mean": 9194.1,
"valid_targets_min": 1771
},
{
"epoch": 1.5271565495207668,
"grad_norm": 0.16977270176183656,
"learning_rate": 3.8390021704488735e-05,
"loss": 0.1613,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05593028664588928,
"step": 160,
"valid_targets_mean": 8621.9,
"valid_targets_min": 3475
},
{
"epoch": 1.5750798722044728,
"grad_norm": 0.17445879345229784,
"learning_rate": 3.8198020756831694e-05,
"loss": 0.1595,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.055247336626052856,
"step": 165,
"valid_targets_mean": 8989.5,
"valid_targets_min": 3428
},
{
"epoch": 1.623003194888179,
"grad_norm": 0.17465115639380263,
"learning_rate": 3.799574341976314e-05,
"loss": 0.1546,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05567849799990654,
"step": 170,
"valid_targets_mean": 9782.6,
"valid_targets_min": 2450
},
{
"epoch": 1.670926517571885,
"grad_norm": 0.1734995913932172,
"learning_rate": 3.778330391892952e-05,
"loss": 0.1593,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.053123295307159424,
"step": 175,
"valid_targets_mean": 8914.3,
"valid_targets_min": 2891
},
{
"epoch": 1.718849840255591,
"grad_norm": 0.15804349256301092,
"learning_rate": 3.7560822218532774e-05,
"loss": 0.1572,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.050238993018865585,
"step": 180,
"valid_targets_mean": 8918.0,
"valid_targets_min": 4581
},
{
"epoch": 1.766773162939297,
"grad_norm": 0.18475759093632027,
"learning_rate": 3.732842395358677e-05,
"loss": 0.1562,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.050228629261255264,
"step": 185,
"valid_targets_mean": 8194.7,
"valid_targets_min": 3799
},
{
"epoch": 1.8146964856230032,
"grad_norm": 0.17207806449259455,
"learning_rate": 3.708624035897144e-05,
"loss": 0.1561,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05894997715950012,
"step": 190,
"valid_targets_mean": 8873.7,
"valid_targets_min": 3497
},
{
"epoch": 1.8626198083067091,
"grad_norm": 0.160383974551531,
"learning_rate": 3.68344081953247e-05,
"loss": 0.1558,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05507740378379822,
"step": 195,
"valid_targets_mean": 9449.2,
"valid_targets_min": 2927
},
{
"epoch": 1.9105431309904153,
"grad_norm": 0.1688923848526839,
"learning_rate": 3.657306967181394e-05,
"loss": 0.1574,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.051344458013772964,
"step": 200,
"valid_targets_mean": 7713.0,
"valid_targets_min": 2578
},
{
"epoch": 1.9584664536741214,
"grad_norm": 0.17407301357558894,
"learning_rate": 3.630237236583077e-05,
"loss": 0.153,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.056261416524648666,
"step": 205,
"valid_targets_mean": 8383.5,
"valid_targets_min": 4391
},
{
"epoch": 2.0,
"grad_norm": 0.24633784795849772,
"learning_rate": 3.6022469139654345e-05,
"loss": 0.157,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15247748792171478,
"step": 210,
"valid_targets_mean": 8627.0,
"valid_targets_min": 2270
},
{
"epoch": 2.047923322683706,
"grad_norm": 0.17399949855938723,
"learning_rate": 3.57335180541303e-05,
"loss": 0.1475,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04970664530992508,
"step": 215,
"valid_targets_mean": 9049.2,
"valid_targets_min": 2971
},
{
"epoch": 2.0958466453674123,
"grad_norm": 0.22794184274320198,
"learning_rate": 3.543568227941408e-05,
"loss": 0.1454,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05050533637404442,
"step": 220,
"valid_targets_mean": 8644.0,
"valid_targets_min": 3723
},
{
"epoch": 2.143769968051118,
"grad_norm": 0.18806892774190778,
"learning_rate": 3.512913000282905e-05,
"loss": 0.1489,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05213526636362076,
"step": 225,
"valid_targets_mean": 8811.9,
"valid_targets_min": 1989
},
{
"epoch": 2.191693290734824,
"grad_norm": 0.1555212602218601,
"learning_rate": 3.481403433389142e-05,
"loss": 0.1477,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04308829829096794,
"step": 230,
"valid_targets_mean": 8940.4,
"valid_targets_min": 1986
},
{
"epoch": 2.2396166134185305,
"grad_norm": 0.15895262335449095,
"learning_rate": 3.449057320655561e-05,
"loss": 0.1477,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.048327021300792694,
"step": 235,
"valid_targets_mean": 9288.6,
"valid_targets_min": 2617
},
{
"epoch": 2.2875399361022364,
"grad_norm": 0.15793093041554393,
"learning_rate": 3.415892927873527e-05,
"loss": 0.1515,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04683384671807289,
"step": 240,
"valid_targets_mean": 8370.8,
"valid_targets_min": 1504
},
{
"epoch": 2.3354632587859427,
"grad_norm": 0.14678394997302097,
"learning_rate": 3.381928982915668e-05,
"loss": 0.1453,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04515903443098068,
"step": 245,
"valid_targets_mean": 9194.8,
"valid_targets_min": 4400
},
{
"epoch": 2.3833865814696487,
"grad_norm": 0.1603781979087474,
"learning_rate": 3.3471846651602815e-05,
"loss": 0.1474,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05253840982913971,
"step": 250,
"valid_targets_mean": 8766.1,
"valid_targets_min": 3267
},
{
"epoch": 2.4313099041533546,
"grad_norm": 0.15879085539353155,
"learning_rate": 3.31167959466077e-05,
"loss": 0.1473,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05009625852108002,
"step": 255,
"valid_targets_mean": 8309.4,
"valid_targets_min": 4013
},
{
"epoch": 2.479233226837061,
"grad_norm": 0.14424018717194137,
"learning_rate": 3.275433821066237e-05,
"loss": 0.1461,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04807288199663162,
"step": 260,
"valid_targets_mean": 9051.2,
"valid_targets_min": 2047
},
{
"epoch": 2.527156549520767,
"grad_norm": 0.159320759658969,
"learning_rate": 3.238467812299483e-05,
"loss": 0.1491,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04604367911815643,
"step": 265,
"valid_targets_mean": 8164.8,
"valid_targets_min": 1963
},
{
"epoch": 2.5750798722044728,
"grad_norm": 0.16789223040102477,
"learning_rate": 3.200802442998807e-05,
"loss": 0.1459,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.048190124332904816,
"step": 270,
"valid_targets_mean": 9103.1,
"valid_targets_min": 1210
},
{
"epoch": 2.623003194888179,
"grad_norm": 0.17379236030377787,
"learning_rate": 3.1624589827301395e-05,
"loss": 0.1463,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05079440772533417,
"step": 275,
"valid_targets_mean": 7991.2,
"valid_targets_min": 3517
},
{
"epoch": 2.670926517571885,
"grad_norm": 0.18305013542111226,
"learning_rate": 3.123459083976152e-05,
"loss": 0.1463,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04407097026705742,
"step": 280,
"valid_targets_mean": 8356.0,
"valid_targets_min": 2326
},
{
"epoch": 2.718849840255591,
"grad_norm": 0.1664728450197544,
"learning_rate": 3.083824769909142e-05,
"loss": 0.1482,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04852219298481941,
"step": 285,
"valid_targets_mean": 9024.8,
"valid_targets_min": 2931
},
{
"epoch": 2.7667731629392973,
"grad_norm": 0.1578734610961178,
"learning_rate": 3.0435784219545872e-05,
"loss": 0.1493,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.047358766198158264,
"step": 290,
"valid_targets_mean": 8802.1,
"valid_targets_min": 2204
},
{
"epoch": 2.8146964856230032,
"grad_norm": 0.22755109653603342,
"learning_rate": 3.0027427671523957e-05,
"loss": 0.1465,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0490507110953331,
"step": 295,
"valid_targets_mean": 8245.7,
"valid_targets_min": 3666
},
{
"epoch": 2.862619808306709,
"grad_norm": 0.16269867408736244,
"learning_rate": 2.961340865322984e-05,
"loss": 0.1459,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04793532192707062,
"step": 300,
"valid_targets_mean": 8419.0,
"valid_targets_min": 973
},
{
"epoch": 2.9105431309904155,
"grad_norm": 0.1528249668702955,
"learning_rate": 2.9193960960454446e-05,
"loss": 0.1467,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.043026067316532135,
"step": 305,
"valid_targets_mean": 8449.1,
"valid_targets_min": 2645
},
{
"epoch": 2.9584664536741214,
"grad_norm": 0.18558974597394937,
"learning_rate": 2.8769321454551327e-05,
"loss": 0.147,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.044257231056690216,
"step": 310,
"valid_targets_mean": 8645.2,
"valid_targets_min": 3405
},
{
"epoch": 3.0,
"grad_norm": 0.2546650444205805,
"learning_rate": 2.833972992868154e-05,
"loss": 0.1413,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13152235746383667,
"step": 315,
"valid_targets_mean": 9198.5,
"valid_targets_min": 2013
},
{
"epoch": 3.047923322683706,
"grad_norm": 0.16032522655906664,
"learning_rate": 2.7905428972402872e-05,
"loss": 0.1403,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04698067158460617,
"step": 320,
"valid_targets_mean": 8638.6,
"valid_targets_min": 2123
},
{
"epoch": 3.0958466453674123,
"grad_norm": 0.15952495144596782,
"learning_rate": 2.7466663834679905e-05,
"loss": 0.141,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05140436440706253,
"step": 325,
"valid_targets_mean": 9252.7,
"valid_targets_min": 4005
},
{
"epoch": 3.143769968051118,
"grad_norm": 0.17042242626456597,
"learning_rate": 2.7023682285392445e-05,
"loss": 0.1389,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.047651275992393494,
"step": 330,
"valid_targets_mean": 8656.8,
"valid_targets_min": 2922
},
{
"epoch": 3.191693290734824,
"grad_norm": 0.16125983283276313,
"learning_rate": 2.657673447542028e-05,
"loss": 0.1428,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.048372238874435425,
"step": 335,
"valid_targets_mean": 9040.1,
"valid_targets_min": 3603
},
{
"epoch": 3.2396166134185305,
"grad_norm": 0.15799040110371987,
"learning_rate": 2.6126072795383416e-05,
"loss": 0.1367,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.047681644558906555,
"step": 340,
"valid_targets_mean": 9583.6,
"valid_targets_min": 3097
},
{
"epoch": 3.2875399361022364,
"grad_norm": 0.14805208338883794,
"learning_rate": 2.5671951733117587e-05,
"loss": 0.1415,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.050001755356788635,
"step": 345,
"valid_targets_mean": 9660.5,
"valid_targets_min": 4266
},
{
"epoch": 3.3354632587859427,
"grad_norm": 0.16843665921293052,
"learning_rate": 2.5214627729965396e-05,
"loss": 0.1398,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04726799577474594,
"step": 350,
"valid_targets_mean": 9226.8,
"valid_targets_min": 5487
},
{
"epoch": 3.3833865814696487,
"grad_norm": 0.19266516280316792,
"learning_rate": 2.47543590359644e-05,
"loss": 0.1386,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04282751679420471,
"step": 355,
"valid_targets_mean": 7584.4,
"valid_targets_min": 2293
},
{
"epoch": 3.4313099041533546,
"grad_norm": 0.16156169603076698,
"learning_rate": 2.4291405564013727e-05,
"loss": 0.1398,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04496710002422333,
"step": 360,
"valid_targets_mean": 9012.9,
"valid_targets_min": 2869
},
{
"epoch": 3.479233226837061,
"grad_norm": 0.16770901380790357,
"learning_rate": 2.3826028743101763e-05,
"loss": 0.1415,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.043900176882743835,
"step": 365,
"valid_targets_mean": 7966.8,
"valid_targets_min": 2050
},
{
"epoch": 3.527156549520767,
"grad_norm": 0.14984841759414239,
"learning_rate": 2.3358491370677693e-05,
"loss": 0.1386,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04852374643087387,
"step": 370,
"valid_targets_mean": 9445.3,
"valid_targets_min": 4423
},
{
"epoch": 3.5750798722044728,
"grad_norm": 0.1599146740589782,
"learning_rate": 2.2889057464250196e-05,
"loss": 0.1384,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04656771942973137,
"step": 375,
"valid_targets_mean": 9080.2,
"valid_targets_min": 1597
},
{
"epoch": 3.623003194888179,
"grad_norm": 0.15143561107067025,
"learning_rate": 2.2417992112297293e-05,
"loss": 0.1407,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04388592019677162,
"step": 380,
"valid_targets_mean": 8641.7,
"valid_targets_min": 3392
},
{
"epoch": 3.670926517571885,
"grad_norm": 0.15149756386513208,
"learning_rate": 2.1945561324571366e-05,
"loss": 0.1379,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05352652445435524,
"step": 385,
"valid_targets_mean": 9100.2,
"valid_targets_min": 2539
},
{
"epoch": 3.718849840255591,
"grad_norm": 0.1665088787853794,
"learning_rate": 2.1472031881883856e-05,
"loss": 0.1394,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05265359953045845,
"step": 390,
"valid_targets_mean": 9276.7,
"valid_targets_min": 3839
},
{
"epoch": 3.7667731629392973,
"grad_norm": 0.14824283461917753,
"learning_rate": 2.0997671185454714e-05,
"loss": 0.1396,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.046166472136974335,
"step": 395,
"valid_targets_mean": 8789.5,
"valid_targets_min": 3494
},
{
"epoch": 3.8146964856230032,
"grad_norm": 0.15960953666875605,
"learning_rate": 2.0522747105911378e-05,
"loss": 0.1381,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04279767721891403,
"step": 400,
"valid_targets_mean": 7870.9,
"valid_targets_min": 2682
},
{
"epoch": 3.862619808306709,
"grad_norm": 0.16843602717950168,
"learning_rate": 2.0047527832022674e-05,
"loss": 0.1378,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0413513258099556,
"step": 405,
"valid_targets_mean": 8123.9,
"valid_targets_min": 1989
},
{
"epoch": 3.9105431309904155,
"grad_norm": 0.16005031955715257,
"learning_rate": 1.9572281719253186e-05,
"loss": 0.1373,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04267306998372078,
"step": 410,
"valid_targets_mean": 8465.4,
"valid_targets_min": 2045
},
{
"epoch": 3.9584664536741214,
"grad_norm": 0.15809782385997903,
"learning_rate": 1.909727713822333e-05,
"loss": 0.1379,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04258957877755165,
"step": 415,
"valid_targets_mean": 7733.6,
"valid_targets_min": 2233
},
{
"epoch": 4.0,
"grad_norm": 0.24269592632341946,
"learning_rate": 1.8622782323161014e-05,
"loss": 0.1343,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1285635381937027,
"step": 420,
"valid_targets_mean": 8452.1,
"valid_targets_min": 2045
},
{
"epoch": 4.047923322683706,
"grad_norm": 0.15907993684353738,
"learning_rate": 1.8149065220430197e-05,
"loss": 0.1358,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04193146899342537,
"step": 425,
"valid_targets_mean": 8624.6,
"valid_targets_min": 4420
},
{
"epoch": 4.095846645367412,
"grad_norm": 0.18925345564072582,
"learning_rate": 1.7676393337222115e-05,
"loss": 0.1355,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04186321049928665,
"step": 430,
"valid_targets_mean": 7756.4,
"valid_targets_min": 1221
},
{
"epoch": 4.143769968051118,
"grad_norm": 0.16711423931591982,
"learning_rate": 1.7205033590494426e-05,
"loss": 0.1363,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.047112271189689636,
"step": 435,
"valid_targets_mean": 9224.7,
"valid_targets_min": 1487
},
{
"epoch": 4.1916932907348246,
"grad_norm": 0.17036236145352987,
"learning_rate": 1.6735252156243675e-05,
"loss": 0.1337,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04689479246735573,
"step": 440,
"valid_targets_mean": 8922.7,
"valid_targets_min": 3916
},
{
"epoch": 4.23961661341853,
"grad_norm": 0.1608190220970628,
"learning_rate": 1.6267314319196215e-05,
"loss": 0.1324,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04334461688995361,
"step": 445,
"valid_targets_mean": 8931.1,
"valid_targets_min": 3494
},
{
"epoch": 4.287539936102236,
"grad_norm": 0.15484652532421975,
"learning_rate": 1.580148432300241e-05,
"loss": 0.1363,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04521436244249344,
"step": 450,
"valid_targets_mean": 8411.7,
"valid_targets_min": 2459
},
{
"epoch": 4.335463258785943,
"grad_norm": 0.15640888798523264,
"learning_rate": 1.5338025221018668e-05,
"loss": 0.1356,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04313844442367554,
"step": 455,
"valid_targets_mean": 7779.2,
"valid_targets_min": 2404
},
{
"epoch": 4.383386581469648,
"grad_norm": 0.17735167228744403,
"learning_rate": 1.4877198727761748e-05,
"loss": 0.1337,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04428839683532715,
"step": 460,
"valid_targets_mean": 8799.6,
"valid_targets_min": 3400
},
{
"epoch": 4.431309904153355,
"grad_norm": 0.15431579302592524,
"learning_rate": 1.4419265071119038e-05,
"loss": 0.1379,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04533102735877037,
"step": 465,
"valid_targets_mean": 8351.8,
"valid_targets_min": 1615
},
{
"epoch": 4.479233226837061,
"grad_norm": 0.1569404780015899,
"learning_rate": 1.3964482845398281e-05,
"loss": 0.1331,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04367567598819733,
"step": 470,
"valid_targets_mean": 8522.4,
"valid_targets_min": 2404
},
{
"epoch": 4.527156549520766,
"grad_norm": 0.1780710965354458,
"learning_rate": 1.3513108865299907e-05,
"loss": 0.1326,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.043544650077819824,
"step": 475,
"valid_targets_mean": 9276.5,
"valid_targets_min": 1840
},
{
"epoch": 4.575079872204473,
"grad_norm": 0.15959810688800943,
"learning_rate": 1.3065398020894202e-05,
"loss": 0.1337,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04500932991504669,
"step": 480,
"valid_targets_mean": 8126.5,
"valid_targets_min": 2047
},
{
"epoch": 4.623003194888179,
"grad_norm": 0.17116807313960172,
"learning_rate": 1.2621603133685343e-05,
"loss": 0.1333,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04575769975781441,
"step": 485,
"valid_targets_mean": 8706.4,
"valid_targets_min": 1597
},
{
"epoch": 4.6709265175718855,
"grad_norm": 0.17715013972093857,
"learning_rate": 1.218197481384356e-05,
"loss": 0.134,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.048376478254795074,
"step": 490,
"valid_targets_mean": 8893.8,
"valid_targets_min": 4343
},
{
"epoch": 4.718849840255591,
"grad_norm": 0.1570645103662247,
"learning_rate": 1.1746761318686044e-05,
"loss": 0.1355,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.044359538704156876,
"step": 495,
"valid_targets_mean": 9139.7,
"valid_targets_min": 3056
},
{
"epoch": 4.766773162939297,
"grad_norm": 0.15423551749596906,
"learning_rate": 1.1316208412486443e-05,
"loss": 0.1313,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.042892444878816605,
"step": 500,
"valid_targets_mean": 8499.4,
"valid_targets_min": 1884
},
{
"epoch": 4.814696485623003,
"grad_norm": 0.16453480642681878,
"learning_rate": 1.0890559227692265e-05,
"loss": 0.1364,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04749216139316559,
"step": 505,
"valid_targets_mean": 8283.1,
"valid_targets_min": 2513
},
{
"epoch": 4.862619808306709,
"grad_norm": 0.1664740998586081,
"learning_rate": 1.0470054127628411e-05,
"loss": 0.1322,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04286997765302658,
"step": 510,
"valid_targets_mean": 8387.6,
"valid_targets_min": 1679
},
{
"epoch": 4.9105431309904155,
"grad_norm": 0.15516781277136965,
"learning_rate": 1.0054930570764427e-05,
"loss": 0.1327,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.045819588005542755,
"step": 515,
"valid_targets_mean": 9220.8,
"valid_targets_min": 3475
},
{
"epoch": 4.958466453674122,
"grad_norm": 0.1452771278632104,
"learning_rate": 9.645422976622154e-06,
"loss": 0.1313,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04815547540783882,
"step": 520,
"valid_targets_mean": 9208.9,
"valid_targets_min": 3189
},
{
"epoch": 5.0,
"grad_norm": 0.2436229369532187,
"learning_rate": 9.241762593399437e-06,
"loss": 0.1322,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1331784427165985,
"step": 525,
"valid_targets_mean": 8954.4,
"valid_targets_min": 2293
},
{
"epoch": 5.047923322683706,
"grad_norm": 0.1569691647374951,
"learning_rate": 8.844177367384689e-06,
"loss": 0.1287,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04233275726437569,
"step": 530,
"valid_targets_mean": 9024.9,
"valid_targets_min": 3046
},
{
"epoch": 5.095846645367412,
"grad_norm": 0.1516535037265207,
"learning_rate": 8.452891814236037e-06,
"loss": 0.1319,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04785648733377457,
"step": 535,
"valid_targets_mean": 8453.7,
"valid_targets_min": 1823
},
{
"epoch": 5.143769968051118,
"grad_norm": 0.1545110306606347,
"learning_rate": 8.068126892197728e-06,
"loss": 0.1327,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04464545473456383,
"step": 540,
"valid_targets_mean": 8877.0,
"valid_targets_min": 4783
},
{
"epoch": 5.1916932907348246,
"grad_norm": 0.15588257021266194,
"learning_rate": 7.690099877325419e-06,
"loss": 0.1317,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04059495031833649,
"step": 545,
"valid_targets_mean": 8231.1,
"valid_targets_min": 4328
},
{
"epoch": 5.23961661341853,
"grad_norm": 0.16029539285883082,
"learning_rate": 7.319024240790768e-06,
"loss": 0.1323,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04444342479109764,
"step": 550,
"valid_targets_mean": 8885.3,
"valid_targets_min": 1621
},
{
"epoch": 5.287539936102236,
"grad_norm": 0.15541690395010793,
"learning_rate": 6.955109528334667e-06,
"loss": 0.1295,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04558786749839783,
"step": 555,
"valid_targets_mean": 8818.6,
"valid_targets_min": 4804
},
{
"epoch": 5.335463258785943,
"grad_norm": 0.1632813232184168,
"learning_rate": 6.59856124193712e-06,
"loss": 0.1307,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04429105669260025,
"step": 560,
"valid_targets_mean": 8736.4,
"valid_targets_min": 3477
},
{
"epoch": 5.383386581469648,
"grad_norm": 0.15319164306426114,
"learning_rate": 6.249580723770665e-06,
"loss": 0.1295,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.040787823498249054,
"step": 565,
"valid_targets_mean": 8843.0,
"valid_targets_min": 2326
},
{
"epoch": 5.431309904153355,
"grad_norm": 0.15092410861061442,
"learning_rate": 5.908365042502801e-06,
"loss": 0.1314,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04534231498837471,
"step": 570,
"valid_targets_mean": 9223.4,
"valid_targets_min": 3669
},
{
"epoch": 5.479233226837061,
"grad_norm": 0.16472723067001716,
"learning_rate": 5.5751068820116784e-06,
"loss": 0.1307,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04310306906700134,
"step": 575,
"valid_targets_mean": 8187.3,
"valid_targets_min": 3898
},
{
"epoch": 5.527156549520766,
"grad_norm": 0.15749687446681768,
"learning_rate": 5.24999443257785e-06,
"loss": 0.128,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04516970366239548,
"step": 580,
"valid_targets_mean": 8733.0,
"valid_targets_min": 4982
},
{
"epoch": 5.575079872204473,
"grad_norm": 0.16061448332897996,
"learning_rate": 4.9332112846135664e-06,
"loss": 0.1329,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04158013314008713,
"step": 585,
"valid_targets_mean": 8391.5,
"valid_targets_min": 2404
},
{
"epoch": 5.623003194888179,
"grad_norm": 0.1521419599103487,
"learning_rate": 4.624936324989602e-06,
"loss": 0.1321,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04744524508714676,
"step": 590,
"valid_targets_mean": 8968.4,
"valid_targets_min": 3648
},
{
"epoch": 5.6709265175718855,
"grad_norm": 0.1570326671883014,
"learning_rate": 4.325343636018165e-06,
"loss": 0.129,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04405239224433899,
"step": 595,
"valid_targets_mean": 8272.5,
"valid_targets_min": 2861
},
{
"epoch": 5.718849840255591,
"grad_norm": 0.1553455532119064,
"learning_rate": 4.0346023971489215e-06,
"loss": 0.1348,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.040806423872709274,
"step": 600,
"valid_targets_mean": 7626.4,
"valid_targets_min": 2787
},
{
"epoch": 5.766773162939297,
"grad_norm": 0.16542431175580907,
"learning_rate": 3.752876789433677e-06,
"loss": 0.1289,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.041984084993600845,
"step": 605,
"valid_targets_mean": 7940.9,
"valid_targets_min": 1813
},
{
"epoch": 5.814696485623003,
"grad_norm": 0.15600260550531508,
"learning_rate": 3.480325902813624e-06,
"loss": 0.1301,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.042893338948488235,
"step": 610,
"valid_targets_mean": 9183.9,
"valid_targets_min": 2349
},
{
"epoch": 5.862619808306709,
"grad_norm": 0.14503907630842894,
"learning_rate": 3.2171036462815563e-06,
"loss": 0.1296,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04696850851178169,
"step": 615,
"valid_targets_mean": 9935.0,
"valid_targets_min": 3645
},
{
"epoch": 5.9105431309904155,
"grad_norm": 0.1460369617623546,
"learning_rate": 2.9633586609697086e-06,
"loss": 0.133,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04132133349776268,
"step": 620,
"valid_targets_mean": 8354.1,
"valid_targets_min": 1863
},
{
"epoch": 5.958466453674122,
"grad_norm": 0.14429256697612128,
"learning_rate": 2.7192342362124048e-06,
"loss": 0.1318,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04253305494785309,
"step": 625,
"valid_targets_mean": 8797.8,
"valid_targets_min": 2888
},
{
"epoch": 6.0,
"grad_norm": 0.24017089407840048,
"learning_rate": 2.4848682286308346e-06,
"loss": 0.1291,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11856501549482346,
"step": 630,
"valid_targets_mean": 9350.8,
"valid_targets_min": 2204
},
{
"epoch": 6.047923322683706,
"grad_norm": 0.14604381915322243,
"learning_rate": 2.260392984285633e-06,
"loss": 0.1264,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04177611321210861,
"step": 635,
"valid_targets_mean": 8590.9,
"valid_targets_min": 1520
},
{
"epoch": 6.095846645367412,
"grad_norm": 0.15105207764952777,
"learning_rate": 2.0459352639413343e-06,
"loss": 0.1307,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.036490269005298615,
"step": 640,
"valid_targets_mean": 8303.5,
"valid_targets_min": 983
},
{
"epoch": 6.143769968051118,
"grad_norm": 0.14365857813801858,
"learning_rate": 1.841616171484797e-06,
"loss": 0.1289,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03588128834962845,
"step": 645,
"valid_targets_mean": 7843.5,
"valid_targets_min": 2037
},
{
"epoch": 6.1916932907348246,
"grad_norm": 0.17040842356447042,
"learning_rate": 1.6475510855380195e-06,
"loss": 0.1281,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03724232316017151,
"step": 650,
"valid_targets_mean": 8363.8,
"valid_targets_min": 2404
},
{
"epoch": 6.23961661341853,
"grad_norm": 0.14563485589355804,
"learning_rate": 1.4638495943040854e-06,
"loss": 0.1319,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0440392792224884,
"step": 655,
"valid_targets_mean": 9282.5,
"valid_targets_min": 1989
},
{
"epoch": 6.287539936102236,
"grad_norm": 0.15135495829792323,
"learning_rate": 1.2906154336828913e-06,
"loss": 0.1294,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.044929858297109604,
"step": 660,
"valid_targets_mean": 9016.3,
"valid_targets_min": 3426
},
{
"epoch": 6.335463258785943,
"grad_norm": 0.1512474724189636,
"learning_rate": 1.1279464286916508e-06,
"loss": 0.1272,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.043425947427749634,
"step": 665,
"valid_targets_mean": 8946.5,
"valid_targets_min": 1626
},
{
"epoch": 6.383386581469648,
"grad_norm": 0.16216185730198185,
"learning_rate": 9.759344382233048e-07,
"loss": 0.1299,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.046256884932518005,
"step": 670,
"valid_targets_mean": 7957.6,
"valid_targets_min": 2969
},
{
"epoch": 6.431309904153355,
"grad_norm": 0.15052277362683217,
"learning_rate": 8.34665303173976e-07,
"loss": 0.1296,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04201345890760422,
"step": 675,
"valid_targets_mean": 8329.0,
"valid_targets_min": 2717
},
{
"epoch": 6.479233226837061,
"grad_norm": 0.1465602776227053,
"learning_rate": 7.042187979687432e-07,
"loss": 0.1302,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03988049924373627,
"step": 680,
"valid_targets_mean": 8283.2,
"valid_targets_min": 2349
},
{
"epoch": 6.527156549520766,
"grad_norm": 0.14185496391585614,
"learning_rate": 5.846685855131929e-07,
"loss": 0.1301,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.040278732776641846,
"step": 685,
"valid_targets_mean": 9115.4,
"valid_targets_min": 3979
},
{
"epoch": 6.575079872204473,
"grad_norm": 0.1561692679171069,
"learning_rate": 4.760821755961065e-07,
"loss": 0.131,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04538102447986603,
"step": 690,
"valid_targets_mean": 8077.0,
"valid_targets_min": 3672
},
{
"epoch": 6.623003194888179,
"grad_norm": 0.1505424403302318,
"learning_rate": 3.7852088676678665e-07,
"loss": 0.1305,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04282670468091965,
"step": 695,
"valid_targets_mean": 8324.1,
"valid_targets_min": 2674
},
{
"epoch": 6.6709265175718855,
"grad_norm": 0.1530107403327867,
"learning_rate": 2.920398117086043e-07,
"loss": 0.1277,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04476252198219299,
"step": 700,
"valid_targets_mean": 8553.5,
"valid_targets_min": 3077
},
{
"epoch": 6.718849840255591,
"grad_norm": 0.1451392180151871,
"learning_rate": 2.1668778612825347e-07,
"loss": 0.1289,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04205950349569321,
"step": 705,
"valid_targets_mean": 8100.0,
"valid_targets_min": 2628
},
{
"epoch": 6.766773162939297,
"grad_norm": 0.1540835400434045,
"learning_rate": 1.5250736117830455e-07,
"loss": 0.1307,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04908519983291626,
"step": 710,
"valid_targets_mean": 8852.7,
"valid_targets_min": 1823
},
{
"epoch": 6.814696485623003,
"grad_norm": 0.1511755346652257,
"learning_rate": 9.953477942866052e-08,
"loss": 0.1336,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.047512397170066833,
"step": 715,
"valid_targets_mean": 8784.2,
"valid_targets_min": 2941
},
{
"epoch": 6.862619808306709,
"grad_norm": 0.14155551457486815,
"learning_rate": 5.779995440044594e-08,
"loss": 0.1285,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04104035720229149,
"step": 720,
"valid_targets_mean": 9481.8,
"valid_targets_min": 3244
},
{
"epoch": 6.9105431309904155,
"grad_norm": 0.13812752928008018,
"learning_rate": 2.7326453673872653e-08,
"loss": 0.1312,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04090867191553116,
"step": 725,
"valid_targets_mean": 8866.8,
"valid_targets_min": 3547
},
{
"epoch": 6.958466453674122,
"grad_norm": 0.1527802388811017,
"learning_rate": 8.131485579692121e-09,
"loss": 0.1275,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04462620988488197,
"step": 730,
"valid_targets_mean": 9477.2,
"valid_targets_min": 4703
},
{
"epoch": 7.0,
"grad_norm": 0.27520263662765626,
"learning_rate": 2.2588948167756586e-10,
"loss": 0.1299,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13444499671459198,
"step": 735,
"valid_targets_mean": 8868.2,
"valid_targets_min": 4152
},
{
"epoch": 7.0,
"step": 735,
"total_flos": 5.441373116175483e+18,
"train_loss": 0.0,
"train_runtime": 1.1873,
"train_samples_per_second": 58956.554,
"train_steps_per_second": 619.044
}
],
"logging_steps": 5,
"max_steps": 735,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 300,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.441373116175483e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}