penfever's picture
End of training
ba75d58 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 500,
"global_step": 1560,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.016025641025641024,
"grad_norm": 8.270559284395055,
"learning_rate": 1.0256410256410257e-06,
"loss": 0.8831,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5231465101242065,
"step": 5,
"valid_targets_mean": 2855.3,
"valid_targets_min": 310
},
{
"epoch": 0.03205128205128205,
"grad_norm": 6.156057008347062,
"learning_rate": 2.307692307692308e-06,
"loss": 0.8578,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3894450068473816,
"step": 10,
"valid_targets_mean": 2836.0,
"valid_targets_min": 343
},
{
"epoch": 0.04807692307692308,
"grad_norm": 4.055588313810187,
"learning_rate": 3.58974358974359e-06,
"loss": 0.8181,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.34461548924446106,
"step": 15,
"valid_targets_mean": 2279.3,
"valid_targets_min": 397
},
{
"epoch": 0.0641025641025641,
"grad_norm": 2.9358204191090445,
"learning_rate": 4.871794871794872e-06,
"loss": 0.7542,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.37403905391693115,
"step": 20,
"valid_targets_mean": 2386.4,
"valid_targets_min": 501
},
{
"epoch": 0.08012820512820513,
"grad_norm": 1.3469933311632771,
"learning_rate": 6.153846153846155e-06,
"loss": 0.7428,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3458966016769409,
"step": 25,
"valid_targets_mean": 2740.5,
"valid_targets_min": 447
},
{
"epoch": 0.09615384615384616,
"grad_norm": 1.1608353294901592,
"learning_rate": 7.435897435897437e-06,
"loss": 0.7218,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31971484422683716,
"step": 30,
"valid_targets_mean": 1962.6,
"valid_targets_min": 279
},
{
"epoch": 0.11217948717948718,
"grad_norm": 1.067171533736777,
"learning_rate": 8.717948717948719e-06,
"loss": 0.6814,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4454614520072937,
"step": 35,
"valid_targets_mean": 2451.4,
"valid_targets_min": 416
},
{
"epoch": 0.1282051282051282,
"grad_norm": 0.9774891251489538,
"learning_rate": 1e-05,
"loss": 0.7264,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3378103971481323,
"step": 40,
"valid_targets_mean": 1912.1,
"valid_targets_min": 609
},
{
"epoch": 0.14423076923076922,
"grad_norm": 0.8317361111005913,
"learning_rate": 1.1282051282051283e-05,
"loss": 0.6642,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.37407323718070984,
"step": 45,
"valid_targets_mean": 1732.8,
"valid_targets_min": 474
},
{
"epoch": 0.16025641025641027,
"grad_norm": 0.70420941452197,
"learning_rate": 1.2564102564102565e-05,
"loss": 0.6433,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3607226610183716,
"step": 50,
"valid_targets_mean": 2446.6,
"valid_targets_min": 396
},
{
"epoch": 0.1762820512820513,
"grad_norm": 0.640275050687205,
"learning_rate": 1.3846153846153847e-05,
"loss": 0.6505,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2584564685821533,
"step": 55,
"valid_targets_mean": 1938.5,
"valid_targets_min": 350
},
{
"epoch": 0.19230769230769232,
"grad_norm": 0.6130742352183888,
"learning_rate": 1.5128205128205129e-05,
"loss": 0.5794,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29696235060691833,
"step": 60,
"valid_targets_mean": 1760.7,
"valid_targets_min": 705
},
{
"epoch": 0.20833333333333334,
"grad_norm": 0.49753658846261495,
"learning_rate": 1.641025641025641e-05,
"loss": 0.5949,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23245546221733093,
"step": 65,
"valid_targets_mean": 2182.3,
"valid_targets_min": 495
},
{
"epoch": 0.22435897435897437,
"grad_norm": 0.5248371244445572,
"learning_rate": 1.7692307692307694e-05,
"loss": 0.5511,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27561211585998535,
"step": 70,
"valid_targets_mean": 2518.8,
"valid_targets_min": 537
},
{
"epoch": 0.2403846153846154,
"grad_norm": 0.5696118966316656,
"learning_rate": 1.8974358974358975e-05,
"loss": 0.6166,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27792054414749146,
"step": 75,
"valid_targets_mean": 2193.9,
"valid_targets_min": 320
},
{
"epoch": 0.2564102564102564,
"grad_norm": 0.6067800362816257,
"learning_rate": 2.025641025641026e-05,
"loss": 0.5518,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3141236901283264,
"step": 80,
"valid_targets_mean": 1968.6,
"valid_targets_min": 524
},
{
"epoch": 0.2724358974358974,
"grad_norm": 0.5486307014154258,
"learning_rate": 2.153846153846154e-05,
"loss": 0.5859,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.39914655685424805,
"step": 85,
"valid_targets_mean": 2844.7,
"valid_targets_min": 652
},
{
"epoch": 0.28846153846153844,
"grad_norm": 0.5467022186742057,
"learning_rate": 2.2820512820512822e-05,
"loss": 0.5494,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30601420998573303,
"step": 90,
"valid_targets_mean": 2242.3,
"valid_targets_min": 287
},
{
"epoch": 0.30448717948717946,
"grad_norm": 0.5073453583636481,
"learning_rate": 2.4102564102564103e-05,
"loss": 0.5672,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27047592401504517,
"step": 95,
"valid_targets_mean": 2702.0,
"valid_targets_min": 623
},
{
"epoch": 0.32051282051282054,
"grad_norm": 0.6374453094826855,
"learning_rate": 2.5384615384615386e-05,
"loss": 0.5258,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.34897756576538086,
"step": 100,
"valid_targets_mean": 2111.6,
"valid_targets_min": 632
},
{
"epoch": 0.33653846153846156,
"grad_norm": 0.4904714398710548,
"learning_rate": 2.6666666666666667e-05,
"loss": 0.5275,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2545839548110962,
"step": 105,
"valid_targets_mean": 2280.2,
"valid_targets_min": 528
},
{
"epoch": 0.3525641025641026,
"grad_norm": 0.5374741002067597,
"learning_rate": 2.794871794871795e-05,
"loss": 0.5159,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27961328625679016,
"step": 110,
"valid_targets_mean": 1705.3,
"valid_targets_min": 483
},
{
"epoch": 0.3685897435897436,
"grad_norm": 0.5401678923394666,
"learning_rate": 2.923076923076923e-05,
"loss": 0.5378,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28303176164627075,
"step": 115,
"valid_targets_mean": 2115.9,
"valid_targets_min": 460
},
{
"epoch": 0.38461538461538464,
"grad_norm": 0.5697964720982368,
"learning_rate": 3.0512820512820514e-05,
"loss": 0.5136,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2926599979400635,
"step": 120,
"valid_targets_mean": 2276.1,
"valid_targets_min": 491
},
{
"epoch": 0.40064102564102566,
"grad_norm": 0.5080731305686881,
"learning_rate": 3.1794871794871795e-05,
"loss": 0.5455,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21579426527023315,
"step": 125,
"valid_targets_mean": 1966.7,
"valid_targets_min": 455
},
{
"epoch": 0.4166666666666667,
"grad_norm": 0.5193806958702459,
"learning_rate": 3.307692307692308e-05,
"loss": 0.5503,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23866981267929077,
"step": 130,
"valid_targets_mean": 1884.6,
"valid_targets_min": 653
},
{
"epoch": 0.4326923076923077,
"grad_norm": 0.5215890997634938,
"learning_rate": 3.435897435897436e-05,
"loss": 0.5113,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22024506330490112,
"step": 135,
"valid_targets_mean": 2176.9,
"valid_targets_min": 551
},
{
"epoch": 0.44871794871794873,
"grad_norm": 0.5428844999735067,
"learning_rate": 3.5641025641025646e-05,
"loss": 0.5327,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2842090129852295,
"step": 140,
"valid_targets_mean": 2108.4,
"valid_targets_min": 395
},
{
"epoch": 0.46474358974358976,
"grad_norm": 0.5658357256109985,
"learning_rate": 3.692307692307693e-05,
"loss": 0.5012,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23009580373764038,
"step": 145,
"valid_targets_mean": 1416.6,
"valid_targets_min": 319
},
{
"epoch": 0.4807692307692308,
"grad_norm": 0.5287699494547455,
"learning_rate": 3.820512820512821e-05,
"loss": 0.5442,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25618451833724976,
"step": 150,
"valid_targets_mean": 2366.4,
"valid_targets_min": 555
},
{
"epoch": 0.4967948717948718,
"grad_norm": 0.5510710484739612,
"learning_rate": 3.948717948717949e-05,
"loss": 0.5202,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.261417031288147,
"step": 155,
"valid_targets_mean": 1912.0,
"valid_targets_min": 427
},
{
"epoch": 0.5128205128205128,
"grad_norm": 0.5223782503607328,
"learning_rate": 3.999954938420724e-05,
"loss": 0.538,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2376648485660553,
"step": 160,
"valid_targets_mean": 2203.6,
"valid_targets_min": 520
},
{
"epoch": 0.5288461538461539,
"grad_norm": 0.5639669943753522,
"learning_rate": 3.9996795694563096e-05,
"loss": 0.5519,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26910513639450073,
"step": 165,
"valid_targets_mean": 1978.4,
"valid_targets_min": 452
},
{
"epoch": 0.5448717948717948,
"grad_norm": 0.5934805718745008,
"learning_rate": 3.9991539001644015e-05,
"loss": 0.4864,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21558544039726257,
"step": 170,
"valid_targets_mean": 1433.1,
"valid_targets_min": 291
},
{
"epoch": 0.5608974358974359,
"grad_norm": 0.5444507017809859,
"learning_rate": 3.998377996343139e-05,
"loss": 0.5402,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22888971865177155,
"step": 175,
"valid_targets_mean": 2303.6,
"valid_targets_min": 665
},
{
"epoch": 0.5769230769230769,
"grad_norm": 0.5993323841226175,
"learning_rate": 3.9973519551125746e-05,
"loss": 0.5472,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2046593725681305,
"step": 180,
"valid_targets_mean": 1417.7,
"valid_targets_min": 614
},
{
"epoch": 0.592948717948718,
"grad_norm": 0.552244150619254,
"learning_rate": 3.99607590490251e-05,
"loss": 0.5254,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.240033358335495,
"step": 185,
"valid_targets_mean": 1792.0,
"valid_targets_min": 356
},
{
"epoch": 0.6089743589743589,
"grad_norm": 0.5676943862267904,
"learning_rate": 3.994550005436431e-05,
"loss": 0.5232,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2360553741455078,
"step": 190,
"valid_targets_mean": 1990.2,
"valid_targets_min": 571
},
{
"epoch": 0.625,
"grad_norm": 0.5686816790686919,
"learning_rate": 3.992774447711503e-05,
"loss": 0.5712,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3569658696651459,
"step": 195,
"valid_targets_mean": 2023.1,
"valid_targets_min": 425
},
{
"epoch": 0.6410256410256411,
"grad_norm": 0.593578700387874,
"learning_rate": 3.990749453974676e-05,
"loss": 0.5151,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2537527084350586,
"step": 200,
"valid_targets_mean": 1794.6,
"valid_targets_min": 397
},
{
"epoch": 0.657051282051282,
"grad_norm": 0.626123448487498,
"learning_rate": 3.9884752776948564e-05,
"loss": 0.5111,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20010629296302795,
"step": 205,
"valid_targets_mean": 1342.0,
"valid_targets_min": 328
},
{
"epoch": 0.6730769230769231,
"grad_norm": 0.5180554845628587,
"learning_rate": 3.985952203531184e-05,
"loss": 0.5206,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31396254897117615,
"step": 210,
"valid_targets_mean": 2744.9,
"valid_targets_min": 1089
},
{
"epoch": 0.6891025641025641,
"grad_norm": 0.6345163141192973,
"learning_rate": 3.983180547297404e-05,
"loss": 0.5026,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24182447791099548,
"step": 215,
"valid_targets_mean": 1557.4,
"valid_targets_min": 461
},
{
"epoch": 0.7051282051282052,
"grad_norm": 0.6054353969461813,
"learning_rate": 3.9801606559223286e-05,
"loss": 0.5125,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2920263409614563,
"step": 220,
"valid_targets_mean": 2164.1,
"valid_targets_min": 542
},
{
"epoch": 0.7211538461538461,
"grad_norm": 0.5253840091115919,
"learning_rate": 3.9768929074064206e-05,
"loss": 0.5073,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26178407669067383,
"step": 225,
"valid_targets_mean": 2276.9,
"valid_targets_min": 508
},
{
"epoch": 0.7371794871794872,
"grad_norm": 0.6036771997042879,
"learning_rate": 3.973377710774474e-05,
"loss": 0.5416,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30116069316864014,
"step": 230,
"valid_targets_mean": 1931.5,
"valid_targets_min": 465
},
{
"epoch": 0.7532051282051282,
"grad_norm": 0.5341073955982537,
"learning_rate": 3.9696155060244166e-05,
"loss": 0.4925,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2869190275669098,
"step": 235,
"valid_targets_mean": 2469.9,
"valid_targets_min": 390
},
{
"epoch": 0.7692307692307693,
"grad_norm": 0.5391946249539421,
"learning_rate": 3.965606764072237e-05,
"loss": 0.5149,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23443245887756348,
"step": 240,
"valid_targets_mean": 2517.3,
"valid_targets_min": 458
},
{
"epoch": 0.7852564102564102,
"grad_norm": 0.48726365599779214,
"learning_rate": 3.96135198669304e-05,
"loss": 0.5111,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2194036841392517,
"step": 245,
"valid_targets_mean": 2148.2,
"valid_targets_min": 284
},
{
"epoch": 0.8012820512820513,
"grad_norm": 0.4546312777762876,
"learning_rate": 3.956851706458236e-05,
"loss": 0.4951,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2413400411605835,
"step": 250,
"valid_targets_mean": 3277.6,
"valid_targets_min": 401
},
{
"epoch": 0.8173076923076923,
"grad_norm": 0.43195116515643417,
"learning_rate": 3.952106486668884e-05,
"loss": 0.4941,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24353349208831787,
"step": 255,
"valid_targets_mean": 2430.8,
"valid_targets_min": 354
},
{
"epoch": 0.8333333333333334,
"grad_norm": 0.5431449210079052,
"learning_rate": 3.9471169212851774e-05,
"loss": 0.4859,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3241618275642395,
"step": 260,
"valid_targets_mean": 2396.5,
"valid_targets_min": 559
},
{
"epoch": 0.8493589743589743,
"grad_norm": 0.4983128230175159,
"learning_rate": 3.9418836348521045e-05,
"loss": 0.507,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28434205055236816,
"step": 265,
"valid_targets_mean": 2714.4,
"valid_targets_min": 589
},
{
"epoch": 0.8653846153846154,
"grad_norm": 0.4433055547977104,
"learning_rate": 3.936407282421267e-05,
"loss": 0.489,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2021905481815338,
"step": 270,
"valid_targets_mean": 2192.3,
"valid_targets_min": 420
},
{
"epoch": 0.8814102564102564,
"grad_norm": 0.5372540899641901,
"learning_rate": 3.930688549468894e-05,
"loss": 0.4962,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3100685477256775,
"step": 275,
"valid_targets_mean": 2431.0,
"valid_targets_min": 523
},
{
"epoch": 0.8974358974358975,
"grad_norm": 0.48553038032931456,
"learning_rate": 3.924728151810034e-05,
"loss": 0.5118,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27802667021751404,
"step": 280,
"valid_targets_mean": 2424.8,
"valid_targets_min": 596
},
{
"epoch": 0.9134615384615384,
"grad_norm": 0.5494166379111899,
"learning_rate": 3.9185268355089606e-05,
"loss": 0.4939,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2717832922935486,
"step": 285,
"valid_targets_mean": 1919.1,
"valid_targets_min": 500
},
{
"epoch": 0.9294871794871795,
"grad_norm": 0.4690629180894029,
"learning_rate": 3.912085376785788e-05,
"loss": 0.4929,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20420989394187927,
"step": 290,
"valid_targets_mean": 1990.7,
"valid_targets_min": 498
},
{
"epoch": 0.9455128205128205,
"grad_norm": 0.47364731395403387,
"learning_rate": 3.9054045819193074e-05,
"loss": 0.4596,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.283137708902359,
"step": 295,
"valid_targets_mean": 3114.4,
"valid_targets_min": 389
},
{
"epoch": 0.9615384615384616,
"grad_norm": 0.43834286352294405,
"learning_rate": 3.898485287146068e-05,
"loss": 0.4846,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22700203955173492,
"step": 300,
"valid_targets_mean": 3102.8,
"valid_targets_min": 674
},
{
"epoch": 0.9775641025641025,
"grad_norm": 0.544832909701008,
"learning_rate": 3.8913283585557054e-05,
"loss": 0.488,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24065542221069336,
"step": 305,
"valid_targets_mean": 2371.9,
"valid_targets_min": 342
},
{
"epoch": 0.9935897435897436,
"grad_norm": 0.6034996255357719,
"learning_rate": 3.8839346919825304e-05,
"loss": 0.5011,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.252093642950058,
"step": 310,
"valid_targets_mean": 1962.2,
"valid_targets_min": 339
},
{
"epoch": 1.0096153846153846,
"grad_norm": 0.48322361076674575,
"learning_rate": 3.876305212893399e-05,
"loss": 0.4662,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25063151121139526,
"step": 315,
"valid_targets_mean": 2658.9,
"valid_targets_min": 507
},
{
"epoch": 1.0256410256410255,
"grad_norm": 0.6133158850042538,
"learning_rate": 3.868440876271871e-05,
"loss": 0.4777,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29740214347839355,
"step": 320,
"valid_targets_mean": 1867.5,
"valid_targets_min": 392
},
{
"epoch": 1.0416666666666667,
"grad_norm": 0.541900985104166,
"learning_rate": 3.860342666498677e-05,
"loss": 0.4866,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21024353802204132,
"step": 325,
"valid_targets_mean": 1411.2,
"valid_targets_min": 322
},
{
"epoch": 1.0576923076923077,
"grad_norm": 0.6020609614604076,
"learning_rate": 3.8520115972284975e-05,
"loss": 0.4639,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24874147772789001,
"step": 330,
"valid_targets_mean": 1505.4,
"valid_targets_min": 372
},
{
"epoch": 1.0737179487179487,
"grad_norm": 0.5637623048449515,
"learning_rate": 3.843448711263089e-05,
"loss": 0.5009,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3167960047721863,
"step": 335,
"valid_targets_mean": 2400.1,
"valid_targets_min": 471
},
{
"epoch": 1.0897435897435896,
"grad_norm": 0.482423724630125,
"learning_rate": 3.8346550804207544e-05,
"loss": 0.453,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2796696722507477,
"step": 340,
"valid_targets_mean": 2607.9,
"valid_targets_min": 358
},
{
"epoch": 1.1057692307692308,
"grad_norm": 0.42709049123064025,
"learning_rate": 3.825631805402182e-05,
"loss": 0.4675,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18846507370471954,
"step": 345,
"valid_targets_mean": 3064.7,
"valid_targets_min": 484
},
{
"epoch": 1.1217948717948718,
"grad_norm": 0.45654149738060995,
"learning_rate": 3.816380015652672e-05,
"loss": 0.4711,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22047562897205353,
"step": 350,
"valid_targets_mean": 2670.4,
"valid_targets_min": 497
},
{
"epoch": 1.1378205128205128,
"grad_norm": 0.46259303685534825,
"learning_rate": 3.806900869220765e-05,
"loss": 0.4289,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21656641364097595,
"step": 355,
"valid_targets_mean": 2637.6,
"valid_targets_min": 371
},
{
"epoch": 1.1538461538461537,
"grad_norm": 0.4795462410657758,
"learning_rate": 3.797195552613284e-05,
"loss": 0.4347,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28732848167419434,
"step": 360,
"valid_targets_mean": 2804.9,
"valid_targets_min": 972
},
{
"epoch": 1.169871794871795,
"grad_norm": 0.544805070309915,
"learning_rate": 3.787265280646825e-05,
"loss": 0.4612,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20876461267471313,
"step": 365,
"valid_targets_mean": 1629.6,
"valid_targets_min": 470
},
{
"epoch": 1.185897435897436,
"grad_norm": 0.6219497217509353,
"learning_rate": 3.7771112962956936e-05,
"loss": 0.4925,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24284450709819794,
"step": 370,
"valid_targets_mean": 1362.9,
"valid_targets_min": 362
},
{
"epoch": 1.2019230769230769,
"grad_norm": 0.6219562430857214,
"learning_rate": 3.7667348705363227e-05,
"loss": 0.4846,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26131337881088257,
"step": 375,
"valid_targets_mean": 2042.5,
"valid_targets_min": 538
},
{
"epoch": 1.217948717948718,
"grad_norm": 0.5273423780621191,
"learning_rate": 3.7561373021881885e-05,
"loss": 0.4764,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20557957887649536,
"step": 380,
"valid_targets_mean": 2154.4,
"valid_targets_min": 482
},
{
"epoch": 1.233974358974359,
"grad_norm": 0.5940055633638739,
"learning_rate": 3.745319917751229e-05,
"loss": 0.458,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19809526205062866,
"step": 385,
"valid_targets_mean": 1356.8,
"valid_targets_min": 496
},
{
"epoch": 1.25,
"grad_norm": 0.604986566337123,
"learning_rate": 3.734284071239811e-05,
"loss": 0.4753,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3080754280090332,
"step": 390,
"valid_targets_mean": 2211.4,
"valid_targets_min": 405
},
{
"epoch": 1.266025641025641,
"grad_norm": 0.6686876845094236,
"learning_rate": 3.7230311440132494e-05,
"loss": 0.4605,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2461152821779251,
"step": 395,
"valid_targets_mean": 1261.0,
"valid_targets_min": 384
},
{
"epoch": 1.282051282051282,
"grad_norm": 0.5383774605862381,
"learning_rate": 3.711562544602895e-05,
"loss": 0.47,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2216894030570984,
"step": 400,
"valid_targets_mean": 1972.8,
"valid_targets_min": 572
},
{
"epoch": 1.2980769230769231,
"grad_norm": 0.6237423049196263,
"learning_rate": 3.699879708535838e-05,
"loss": 0.4964,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3525855839252472,
"step": 405,
"valid_targets_mean": 2608.8,
"valid_targets_min": 553
},
{
"epoch": 1.314102564102564,
"grad_norm": 0.47346493238221604,
"learning_rate": 3.687984098155212e-05,
"loss": 0.4721,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23883157968521118,
"step": 410,
"valid_targets_mean": 2648.7,
"valid_targets_min": 559
},
{
"epoch": 1.330128205128205,
"grad_norm": 0.6009852407301991,
"learning_rate": 3.6758772024371626e-05,
"loss": 0.4985,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31522154808044434,
"step": 415,
"valid_targets_mean": 1891.1,
"valid_targets_min": 369
},
{
"epoch": 1.3461538461538463,
"grad_norm": 0.5221802033027123,
"learning_rate": 3.663560536804465e-05,
"loss": 0.4619,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2520959973335266,
"step": 420,
"valid_targets_mean": 2271.7,
"valid_targets_min": 479
},
{
"epoch": 1.3621794871794872,
"grad_norm": 0.5462328875847934,
"learning_rate": 3.65103564293684e-05,
"loss": 0.4923,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28057146072387695,
"step": 425,
"valid_targets_mean": 1829.6,
"valid_targets_min": 349
},
{
"epoch": 1.3782051282051282,
"grad_norm": 0.5230511743019115,
"learning_rate": 3.638304088577984e-05,
"loss": 0.4479,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20385369658470154,
"step": 430,
"valid_targets_mean": 2099.4,
"valid_targets_min": 450
},
{
"epoch": 1.3942307692307692,
"grad_norm": 0.5260439150355632,
"learning_rate": 3.625367467339329e-05,
"loss": 0.4409,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25196361541748047,
"step": 435,
"valid_targets_mean": 1848.4,
"valid_targets_min": 374
},
{
"epoch": 1.4102564102564101,
"grad_norm": 0.542530019050112,
"learning_rate": 3.612227398500575e-05,
"loss": 0.4625,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2730902433395386,
"step": 440,
"valid_targets_mean": 1855.6,
"valid_targets_min": 539
},
{
"epoch": 1.4262820512820513,
"grad_norm": 0.5362598043278441,
"learning_rate": 3.598885526807003e-05,
"loss": 0.4645,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20158651471138,
"step": 445,
"valid_targets_mean": 1960.3,
"valid_targets_min": 330
},
{
"epoch": 1.4423076923076923,
"grad_norm": 0.5038481547905878,
"learning_rate": 3.585343522263599e-05,
"loss": 0.4576,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23150920867919922,
"step": 450,
"valid_targets_mean": 2077.7,
"valid_targets_min": 685
},
{
"epoch": 1.4583333333333333,
"grad_norm": 0.6465319041135404,
"learning_rate": 3.571603079926024e-05,
"loss": 0.4734,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2389899045228958,
"step": 455,
"valid_targets_mean": 1801.0,
"valid_targets_min": 435
},
{
"epoch": 1.4743589743589745,
"grad_norm": 0.5472386327122035,
"learning_rate": 3.5576659196884395e-05,
"loss": 0.5179,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24516533315181732,
"step": 460,
"valid_targets_mean": 1980.1,
"valid_targets_min": 436
},
{
"epoch": 1.4903846153846154,
"grad_norm": 0.5414486709041462,
"learning_rate": 3.5435337860682304e-05,
"loss": 0.464,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2550250291824341,
"step": 465,
"valid_targets_mean": 1814.8,
"valid_targets_min": 343
},
{
"epoch": 1.5064102564102564,
"grad_norm": 0.4560866540685229,
"learning_rate": 3.529208447987641e-05,
"loss": 0.4928,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1900494247674942,
"step": 470,
"valid_targets_mean": 2029.3,
"valid_targets_min": 353
},
{
"epoch": 1.5224358974358974,
"grad_norm": 0.5257829172034205,
"learning_rate": 3.5146916985523604e-05,
"loss": 0.5122,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25625720620155334,
"step": 475,
"valid_targets_mean": 2358.8,
"valid_targets_min": 708
},
{
"epoch": 1.5384615384615383,
"grad_norm": 0.5000203832116034,
"learning_rate": 3.499985354827079e-05,
"loss": 0.4661,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19569867849349976,
"step": 480,
"valid_targets_mean": 1793.0,
"valid_targets_min": 407
},
{
"epoch": 1.5544871794871795,
"grad_norm": 0.5311956681183098,
"learning_rate": 3.485091257608047e-05,
"loss": 0.4946,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2879672348499298,
"step": 485,
"valid_targets_mean": 2306.1,
"valid_targets_min": 383
},
{
"epoch": 1.5705128205128205,
"grad_norm": 0.48828567970956843,
"learning_rate": 3.4700112711926574e-05,
"loss": 0.4632,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24961577355861664,
"step": 490,
"valid_targets_mean": 2378.9,
"valid_targets_min": 494
},
{
"epoch": 1.5865384615384617,
"grad_norm": 0.6003720701146319,
"learning_rate": 3.4547472831460976e-05,
"loss": 0.4662,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2577670216560364,
"step": 495,
"valid_targets_mean": 1839.1,
"valid_targets_min": 463
},
{
"epoch": 1.6025641025641026,
"grad_norm": 0.556322591943896,
"learning_rate": 3.439301204065077e-05,
"loss": 0.468,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20165708661079407,
"step": 500,
"valid_targets_mean": 1302.6,
"valid_targets_min": 365
},
{
"epoch": 1.6185897435897436,
"grad_norm": 0.4863359326950338,
"learning_rate": 3.423674967338681e-05,
"loss": 0.496,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24409791827201843,
"step": 505,
"valid_targets_mean": 2180.1,
"valid_targets_min": 564
},
{
"epoch": 1.6346153846153846,
"grad_norm": 0.5132971252924721,
"learning_rate": 3.407870528906366e-05,
"loss": 0.4911,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2089085876941681,
"step": 510,
"valid_targets_mean": 1815.9,
"valid_targets_min": 376
},
{
"epoch": 1.6506410256410255,
"grad_norm": 0.44374872733889337,
"learning_rate": 3.391889867013134e-05,
"loss": 0.4728,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25746238231658936,
"step": 515,
"valid_targets_mean": 3330.2,
"valid_targets_min": 535
},
{
"epoch": 1.6666666666666665,
"grad_norm": 0.41541671583956447,
"learning_rate": 3.375734981961918e-05,
"loss": 0.4579,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19313707947731018,
"step": 520,
"valid_targets_mean": 2933.1,
"valid_targets_min": 641
},
{
"epoch": 1.6826923076923077,
"grad_norm": 0.5025527033239986,
"learning_rate": 3.359407895863199e-05,
"loss": 0.4777,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2544252574443817,
"step": 525,
"valid_targets_mean": 2657.9,
"valid_targets_min": 634
},
{
"epoch": 1.6987179487179487,
"grad_norm": 0.5010762849646863,
"learning_rate": 3.342910652381902e-05,
"loss": 0.4582,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.253082811832428,
"step": 530,
"valid_targets_mean": 2349.6,
"valid_targets_min": 444
},
{
"epoch": 1.7147435897435899,
"grad_norm": 0.5050510280029894,
"learning_rate": 3.326245316481591e-05,
"loss": 0.4707,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2477722316980362,
"step": 535,
"valid_targets_mean": 1996.2,
"valid_targets_min": 477
},
{
"epoch": 1.7307692307692308,
"grad_norm": 0.5765193223693661,
"learning_rate": 3.30941397416599e-05,
"loss": 0.4785,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23156100511550903,
"step": 540,
"valid_targets_mean": 1675.4,
"valid_targets_min": 481
},
{
"epoch": 1.7467948717948718,
"grad_norm": 0.4560642161468493,
"learning_rate": 3.2924187322178865e-05,
"loss": 0.4794,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23049210011959076,
"step": 545,
"valid_targets_mean": 2340.4,
"valid_targets_min": 392
},
{
"epoch": 1.7628205128205128,
"grad_norm": 0.4922377750065325,
"learning_rate": 3.275261717935417e-05,
"loss": 0.4764,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24752816557884216,
"step": 550,
"valid_targets_mean": 2367.2,
"valid_targets_min": 476
},
{
"epoch": 1.7788461538461537,
"grad_norm": 0.4426230788977972,
"learning_rate": 3.2579450788657997e-05,
"loss": 0.4677,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21346285939216614,
"step": 555,
"valid_targets_mean": 2292.7,
"valid_targets_min": 432
},
{
"epoch": 1.7948717948717947,
"grad_norm": 0.624429526299482,
"learning_rate": 3.2404709825365204e-05,
"loss": 0.4717,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22740063071250916,
"step": 560,
"valid_targets_mean": 1412.7,
"valid_targets_min": 517
},
{
"epoch": 1.810897435897436,
"grad_norm": 0.555277859130487,
"learning_rate": 3.222841616184025e-05,
"loss": 0.4877,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32376670837402344,
"step": 565,
"valid_targets_mean": 2239.8,
"valid_targets_min": 373
},
{
"epoch": 1.8269230769230769,
"grad_norm": 0.6868507053050372,
"learning_rate": 3.2050591864799406e-05,
"loss": 0.454,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2284863293170929,
"step": 570,
"valid_targets_mean": 2318.9,
"valid_targets_min": 465
},
{
"epoch": 1.842948717948718,
"grad_norm": 0.5636785309744979,
"learning_rate": 3.187125919254869e-05,
"loss": 0.5021,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20378993451595306,
"step": 575,
"valid_targets_mean": 1406.4,
"valid_targets_min": 382
},
{
"epoch": 1.858974358974359,
"grad_norm": 0.5138362567576862,
"learning_rate": 3.169044059219778e-05,
"loss": 0.472,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24885600805282593,
"step": 580,
"valid_targets_mean": 2108.1,
"valid_targets_min": 297
},
{
"epoch": 1.875,
"grad_norm": 0.4788276150250492,
"learning_rate": 3.1508158696850275e-05,
"loss": 0.4439,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2000913918018341,
"step": 585,
"valid_targets_mean": 1723.1,
"valid_targets_min": 532
},
{
"epoch": 1.891025641025641,
"grad_norm": 0.5199878797975349,
"learning_rate": 3.132443632277075e-05,
"loss": 0.4341,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1967633068561554,
"step": 590,
"valid_targets_mean": 1640.8,
"valid_targets_min": 336
},
{
"epoch": 1.907051282051282,
"grad_norm": 0.5120839295430387,
"learning_rate": 3.113929646652879e-05,
"loss": 0.4752,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25105994939804077,
"step": 595,
"valid_targets_mean": 2039.7,
"valid_targets_min": 647
},
{
"epoch": 1.9230769230769231,
"grad_norm": 0.4639112950762158,
"learning_rate": 3.095276230212056e-05,
"loss": 0.4696,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2098376452922821,
"step": 600,
"valid_targets_mean": 2136.6,
"valid_targets_min": 495
},
{
"epoch": 1.939102564102564,
"grad_norm": 0.5166631970577467,
"learning_rate": 3.076485717806808e-05,
"loss": 0.4703,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18611370027065277,
"step": 605,
"valid_targets_mean": 1350.6,
"valid_targets_min": 575
},
{
"epoch": 1.9551282051282053,
"grad_norm": 0.4745285139732179,
"learning_rate": 3.057560461449665e-05,
"loss": 0.457,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.252684623003006,
"step": 610,
"valid_targets_mean": 2834.0,
"valid_targets_min": 460
},
{
"epoch": 1.9711538461538463,
"grad_norm": 0.48419329402472827,
"learning_rate": 3.038502830019092e-05,
"loss": 0.4735,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25076764822006226,
"step": 615,
"valid_targets_mean": 2311.8,
"valid_targets_min": 535
},
{
"epoch": 1.9871794871794872,
"grad_norm": 0.4856357542202215,
"learning_rate": 3.019315208962968e-05,
"loss": 0.4564,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22748295962810516,
"step": 620,
"valid_targets_mean": 1729.2,
"valid_targets_min": 404
},
{
"epoch": 2.003205128205128,
"grad_norm": 0.5415603488319026,
"learning_rate": 3.0000000000000004e-05,
"loss": 0.4663,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25027239322662354,
"step": 625,
"valid_targets_mean": 2599.8,
"valid_targets_min": 446
},
{
"epoch": 2.019230769230769,
"grad_norm": 0.5616447815364394,
"learning_rate": 2.9805596208191056e-05,
"loss": 0.4626,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31486913561820984,
"step": 630,
"valid_targets_mean": 2342.2,
"valid_targets_min": 481
},
{
"epoch": 2.03525641025641,
"grad_norm": 0.4616279441242081,
"learning_rate": 2.960996504776783e-05,
"loss": 0.4494,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2557230293750763,
"step": 635,
"valid_targets_mean": 3251.1,
"valid_targets_min": 374
},
{
"epoch": 2.051282051282051,
"grad_norm": 0.5503041087281749,
"learning_rate": 2.9413131005925296e-05,
"loss": 0.4178,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18771244585514069,
"step": 640,
"valid_targets_mean": 1542.1,
"valid_targets_min": 405
},
{
"epoch": 2.0673076923076925,
"grad_norm": 0.5145185224875077,
"learning_rate": 2.9215118720423375e-05,
"loss": 0.4356,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20002877712249756,
"step": 645,
"valid_targets_mean": 2503.4,
"valid_targets_min": 679
},
{
"epoch": 2.0833333333333335,
"grad_norm": 0.5259222795664407,
"learning_rate": 2.9015952976502994e-05,
"loss": 0.4819,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22057735919952393,
"step": 650,
"valid_targets_mean": 2547.9,
"valid_targets_min": 329
},
{
"epoch": 2.0993589743589745,
"grad_norm": 0.49973317567273595,
"learning_rate": 2.8815658703783715e-05,
"loss": 0.4179,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20730602741241455,
"step": 655,
"valid_targets_mean": 2237.2,
"valid_targets_min": 603
},
{
"epoch": 2.1153846153846154,
"grad_norm": 0.468034186388587,
"learning_rate": 2.8614260973143318e-05,
"loss": 0.4579,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2634885311126709,
"step": 660,
"valid_targets_mean": 2994.0,
"valid_targets_min": 613
},
{
"epoch": 2.1314102564102564,
"grad_norm": 0.5188728303293624,
"learning_rate": 2.8411784993579633e-05,
"loss": 0.4465,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32055070996284485,
"step": 665,
"valid_targets_mean": 2799.9,
"valid_targets_min": 464
},
{
"epoch": 2.1474358974358974,
"grad_norm": 0.5727755387928921,
"learning_rate": 2.820825610905514e-05,
"loss": 0.4517,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21268504858016968,
"step": 670,
"valid_targets_mean": 1740.4,
"valid_targets_min": 376
},
{
"epoch": 2.1634615384615383,
"grad_norm": 0.5102342820268265,
"learning_rate": 2.8003699795324674e-05,
"loss": 0.4343,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1873595267534256,
"step": 675,
"valid_targets_mean": 1921.6,
"valid_targets_min": 393
},
{
"epoch": 2.1794871794871793,
"grad_norm": 0.5212794879302061,
"learning_rate": 2.7798141656746606e-05,
"loss": 0.447,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2737618684768677,
"step": 680,
"valid_targets_mean": 2370.6,
"valid_targets_min": 346
},
{
"epoch": 2.1955128205128207,
"grad_norm": 0.4882810001947318,
"learning_rate": 2.7591607423077932e-05,
"loss": 0.481,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13172554969787598,
"step": 685,
"valid_targets_mean": 1362.9,
"valid_targets_min": 382
},
{
"epoch": 2.2115384615384617,
"grad_norm": 0.6207677882628246,
"learning_rate": 2.738412294625369e-05,
"loss": 0.4491,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23348423838615417,
"step": 690,
"valid_targets_mean": 2125.4,
"valid_targets_min": 544
},
{
"epoch": 2.2275641025641026,
"grad_norm": 0.5134803204787327,
"learning_rate": 2.717571419715107e-05,
"loss": 0.4205,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21478433907032013,
"step": 695,
"valid_targets_mean": 1724.8,
"valid_targets_min": 372
},
{
"epoch": 2.2435897435897436,
"grad_norm": 0.49066453024135814,
"learning_rate": 2.69664072623386e-05,
"loss": 0.4644,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23817721009254456,
"step": 700,
"valid_targets_mean": 2259.7,
"valid_targets_min": 408
},
{
"epoch": 2.2596153846153846,
"grad_norm": 0.5896960527239582,
"learning_rate": 2.6756228340810946e-05,
"loss": 0.4499,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2211766242980957,
"step": 705,
"valid_targets_mean": 1644.1,
"valid_targets_min": 303
},
{
"epoch": 2.2756410256410255,
"grad_norm": 0.47096626497110644,
"learning_rate": 2.6545203740709502e-05,
"loss": 0.444,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24633574485778809,
"step": 710,
"valid_targets_mean": 2245.6,
"valid_targets_min": 496
},
{
"epoch": 2.2916666666666665,
"grad_norm": 0.4898735632976753,
"learning_rate": 2.6333359876029455e-05,
"loss": 0.4469,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2112981081008911,
"step": 715,
"valid_targets_mean": 1969.1,
"valid_targets_min": 414
},
{
"epoch": 2.3076923076923075,
"grad_norm": 0.501391834341433,
"learning_rate": 2.612072326331351e-05,
"loss": 0.4178,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2247483730316162,
"step": 720,
"valid_targets_mean": 2423.8,
"valid_targets_min": 393
},
{
"epoch": 2.323717948717949,
"grad_norm": 0.4217215747925792,
"learning_rate": 2.5907320518332827e-05,
"loss": 0.4313,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19624283909797668,
"step": 725,
"valid_targets_mean": 2825.5,
"valid_targets_min": 544
},
{
"epoch": 2.33974358974359,
"grad_norm": 0.49601599026984694,
"learning_rate": 2.5693178352755497e-05,
"loss": 0.4074,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.202076256275177,
"step": 730,
"valid_targets_mean": 2533.4,
"valid_targets_min": 617
},
{
"epoch": 2.355769230769231,
"grad_norm": 0.5353940844579022,
"learning_rate": 2.547832357080305e-05,
"loss": 0.4227,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2339232861995697,
"step": 735,
"valid_targets_mean": 1994.9,
"valid_targets_min": 524
},
{
"epoch": 2.371794871794872,
"grad_norm": 0.5722824667248534,
"learning_rate": 2.5262783065895377e-05,
"loss": 0.4452,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2249128520488739,
"step": 740,
"valid_targets_mean": 1781.4,
"valid_targets_min": 326
},
{
"epoch": 2.3878205128205128,
"grad_norm": 0.5007302300195212,
"learning_rate": 2.5046583817284437e-05,
"loss": 0.4542,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17838139832019806,
"step": 745,
"valid_targets_mean": 1831.7,
"valid_targets_min": 347
},
{
"epoch": 2.4038461538461537,
"grad_norm": 0.4472396981588018,
"learning_rate": 2.48297528866773e-05,
"loss": 0.4087,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1966041922569275,
"step": 750,
"valid_targets_mean": 2101.4,
"valid_targets_min": 507
},
{
"epoch": 2.4198717948717947,
"grad_norm": 0.5475486438741652,
"learning_rate": 2.4612317414848804e-05,
"loss": 0.4367,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2544914484024048,
"step": 755,
"valid_targets_mean": 2436.7,
"valid_targets_min": 548
},
{
"epoch": 2.435897435897436,
"grad_norm": 0.47978909370549155,
"learning_rate": 2.4394304618244346e-05,
"loss": 0.4467,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2271379828453064,
"step": 760,
"valid_targets_mean": 2447.1,
"valid_targets_min": 399
},
{
"epoch": 2.451923076923077,
"grad_norm": 0.6270651047991183,
"learning_rate": 2.4175741785573177e-05,
"loss": 0.4528,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2507140636444092,
"step": 765,
"valid_targets_mean": 1775.8,
"valid_targets_min": 287
},
{
"epoch": 2.467948717948718,
"grad_norm": 0.3929435528764122,
"learning_rate": 2.39566562743927e-05,
"loss": 0.4189,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19187305867671967,
"step": 770,
"valid_targets_mean": 3374.7,
"valid_targets_min": 597
},
{
"epoch": 2.483974358974359,
"grad_norm": 0.527375808149042,
"learning_rate": 2.3737075507684103e-05,
"loss": 0.4494,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22173798084259033,
"step": 775,
"valid_targets_mean": 1856.5,
"valid_targets_min": 414
},
{
"epoch": 2.5,
"grad_norm": 0.5997723972038063,
"learning_rate": 2.3517026970419786e-05,
"loss": 0.4365,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24415044486522675,
"step": 780,
"valid_targets_mean": 1901.2,
"valid_targets_min": 779
},
{
"epoch": 2.516025641025641,
"grad_norm": 0.48377772022532683,
"learning_rate": 2.3296538206123134e-05,
"loss": 0.4155,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21287669241428375,
"step": 785,
"valid_targets_mean": 2185.6,
"valid_targets_min": 399
},
{
"epoch": 2.532051282051282,
"grad_norm": 0.5468759021698991,
"learning_rate": 2.307563681342081e-05,
"loss": 0.4634,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2165958732366562,
"step": 790,
"valid_targets_mean": 2127.9,
"valid_targets_min": 592
},
{
"epoch": 2.5480769230769234,
"grad_norm": 0.47406985778934707,
"learning_rate": 2.285435044258829e-05,
"loss": 0.4235,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20566615462303162,
"step": 795,
"valid_targets_mean": 2176.4,
"valid_targets_min": 353
},
{
"epoch": 2.564102564102564,
"grad_norm": 0.5531693361265784,
"learning_rate": 2.263270679208883e-05,
"loss": 0.4375,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20676007866859436,
"step": 800,
"valid_targets_mean": 1450.8,
"valid_targets_min": 343
},
{
"epoch": 2.5801282051282053,
"grad_norm": 0.5471762527126571,
"learning_rate": 2.2410733605106462e-05,
"loss": 0.4427,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2094150185585022,
"step": 805,
"valid_targets_mean": 1916.5,
"valid_targets_min": 657
},
{
"epoch": 2.5961538461538463,
"grad_norm": 0.457621920491644,
"learning_rate": 2.2188458666073382e-05,
"loss": 0.4447,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21128109097480774,
"step": 810,
"valid_targets_mean": 2713.3,
"valid_targets_min": 336
},
{
"epoch": 2.6121794871794872,
"grad_norm": 0.5354268147970072,
"learning_rate": 2.1965909797192143e-05,
"loss": 0.4372,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18181543052196503,
"step": 815,
"valid_targets_mean": 1601.1,
"valid_targets_min": 422
},
{
"epoch": 2.628205128205128,
"grad_norm": 0.5148256202815014,
"learning_rate": 2.174311485495317e-05,
"loss": 0.4303,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20850974321365356,
"step": 820,
"valid_targets_mean": 2054.1,
"valid_targets_min": 358
},
{
"epoch": 2.644230769230769,
"grad_norm": 0.5117934823979483,
"learning_rate": 2.1520101726647922e-05,
"loss": 0.4482,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24301588535308838,
"step": 825,
"valid_targets_mean": 2009.7,
"valid_targets_min": 548
},
{
"epoch": 2.66025641025641,
"grad_norm": 0.5932725780271455,
"learning_rate": 2.1296898326878282e-05,
"loss": 0.4368,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20315149426460266,
"step": 830,
"valid_targets_mean": 1422.9,
"valid_targets_min": 382
},
{
"epoch": 2.676282051282051,
"grad_norm": 0.5511240954228953,
"learning_rate": 2.1073532594062432e-05,
"loss": 0.4287,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20835387706756592,
"step": 835,
"valid_targets_mean": 1824.1,
"valid_targets_min": 406
},
{
"epoch": 2.6923076923076925,
"grad_norm": 0.48108009842119964,
"learning_rate": 2.0850032486937838e-05,
"loss": 0.413,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23764106631278992,
"step": 840,
"valid_targets_mean": 2332.3,
"valid_targets_min": 311
},
{
"epoch": 2.7083333333333335,
"grad_norm": 0.45578124236246004,
"learning_rate": 2.0626425981061608e-05,
"loss": 0.4221,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16820326447486877,
"step": 845,
"valid_targets_mean": 1958.2,
"valid_targets_min": 427
},
{
"epoch": 2.7243589743589745,
"grad_norm": 0.527649928455185,
"learning_rate": 2.0402741065308808e-05,
"loss": 0.4325,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2456822693347931,
"step": 850,
"valid_targets_mean": 1982.1,
"valid_targets_min": 447
},
{
"epoch": 2.7403846153846154,
"grad_norm": 0.545838162469139,
"learning_rate": 2.0179005738369098e-05,
"loss": 0.4479,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19371215999126434,
"step": 855,
"valid_targets_mean": 1953.6,
"valid_targets_min": 404
},
{
"epoch": 2.7564102564102564,
"grad_norm": 0.5137794607606895,
"learning_rate": 1.995524800524211e-05,
"loss": 0.4463,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17609572410583496,
"step": 860,
"valid_targets_mean": 1501.8,
"valid_targets_min": 397
},
{
"epoch": 2.7724358974358974,
"grad_norm": 0.5022662147440222,
"learning_rate": 1.9731495873732055e-05,
"loss": 0.4475,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26271939277648926,
"step": 865,
"valid_targets_mean": 2227.0,
"valid_targets_min": 380
},
{
"epoch": 2.7884615384615383,
"grad_norm": 0.44907950593239604,
"learning_rate": 1.9507777350941996e-05,
"loss": 0.417,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20136427879333496,
"step": 870,
"valid_targets_mean": 3125.4,
"valid_targets_min": 477
},
{
"epoch": 2.8044871794871797,
"grad_norm": 0.48568732874510356,
"learning_rate": 1.9284120439768192e-05,
"loss": 0.4371,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1716795265674591,
"step": 875,
"valid_targets_mean": 1739.9,
"valid_targets_min": 556
},
{
"epoch": 2.8205128205128203,
"grad_norm": 0.560681495754532,
"learning_rate": 1.9060553135394957e-05,
"loss": 0.4403,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25348377227783203,
"step": 880,
"valid_targets_mean": 2066.1,
"valid_targets_min": 481
},
{
"epoch": 2.8365384615384617,
"grad_norm": 0.5208153414018862,
"learning_rate": 1.8837103421790486e-05,
"loss": 0.4099,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18841373920440674,
"step": 885,
"valid_targets_mean": 2157.9,
"valid_targets_min": 500
},
{
"epoch": 2.8525641025641026,
"grad_norm": 0.532992574302052,
"learning_rate": 1.861379926820414e-05,
"loss": 0.4324,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2288801670074463,
"step": 890,
"valid_targets_mean": 1894.5,
"valid_targets_min": 394
},
{
"epoch": 2.8685897435897436,
"grad_norm": 0.49754798570296427,
"learning_rate": 1.8390668625665483e-05,
"loss": 0.4173,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25023743510246277,
"step": 895,
"valid_targets_mean": 2521.0,
"valid_targets_min": 615
},
{
"epoch": 2.8846153846153846,
"grad_norm": 0.5090535372637991,
"learning_rate": 1.8167739423485668e-05,
"loss": 0.4547,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20940105617046356,
"step": 900,
"valid_targets_mean": 2438.8,
"valid_targets_min": 464
},
{
"epoch": 2.9006410256410255,
"grad_norm": 0.4245436673848715,
"learning_rate": 1.794503956576152e-05,
"loss": 0.4422,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21880042552947998,
"step": 905,
"valid_targets_mean": 3292.9,
"valid_targets_min": 483
},
{
"epoch": 2.9166666666666665,
"grad_norm": 0.512498398614729,
"learning_rate": 1.7722596927882758e-05,
"loss": 0.456,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1915343850851059,
"step": 910,
"valid_targets_mean": 2097.7,
"valid_targets_min": 423
},
{
"epoch": 2.9326923076923075,
"grad_norm": 0.5002179520743993,
"learning_rate": 1.7500439353042834e-05,
"loss": 0.428,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19558550417423248,
"step": 915,
"valid_targets_mean": 1932.4,
"valid_targets_min": 352
},
{
"epoch": 2.948717948717949,
"grad_norm": 0.5271342144848477,
"learning_rate": 1.727859464875381e-05,
"loss": 0.4672,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23308855295181274,
"step": 920,
"valid_targets_mean": 2075.1,
"valid_targets_min": 458
},
{
"epoch": 2.96474358974359,
"grad_norm": 0.9339459055765652,
"learning_rate": 1.7057090583365678e-05,
"loss": 0.4491,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20995834469795227,
"step": 925,
"valid_targets_mean": 1836.2,
"valid_targets_min": 369
},
{
"epoch": 2.980769230769231,
"grad_norm": 0.4858744121036798,
"learning_rate": 1.6835954882590567e-05,
"loss": 0.4596,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21504682302474976,
"step": 930,
"valid_targets_mean": 2529.9,
"valid_targets_min": 390
},
{
"epoch": 2.996794871794872,
"grad_norm": 0.5223177985055565,
"learning_rate": 1.6615215226032332e-05,
"loss": 0.3969,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2861439883708954,
"step": 935,
"valid_targets_mean": 2334.1,
"valid_targets_min": 556
},
{
"epoch": 3.0128205128205128,
"grad_norm": 0.5081953568971541,
"learning_rate": 1.6394899243721887e-05,
"loss": 0.4444,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18385818600654602,
"step": 940,
"valid_targets_mean": 1744.4,
"valid_targets_min": 456
},
{
"epoch": 3.0288461538461537,
"grad_norm": 0.41196760326087184,
"learning_rate": 1.6175034512658753e-05,
"loss": 0.41,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1653943955898285,
"step": 945,
"valid_targets_mean": 2961.9,
"valid_targets_min": 464
},
{
"epoch": 3.0448717948717947,
"grad_norm": 0.46276766410726833,
"learning_rate": 1.5955648553359247e-05,
"loss": 0.4061,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1766689419746399,
"step": 950,
"valid_targets_mean": 2314.1,
"valid_targets_min": 519
},
{
"epoch": 3.0608974358974357,
"grad_norm": 0.56466145662721,
"learning_rate": 1.5736768826411683e-05,
"loss": 0.3975,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17233410477638245,
"step": 955,
"valid_targets_mean": 1736.1,
"valid_targets_min": 368
},
{
"epoch": 3.076923076923077,
"grad_norm": 0.6184183203209048,
"learning_rate": 1.5518422729039188e-05,
"loss": 0.4366,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2544904947280884,
"step": 960,
"valid_targets_mean": 1785.6,
"valid_targets_min": 453
},
{
"epoch": 3.092948717948718,
"grad_norm": 0.46138818996251113,
"learning_rate": 1.5300637591670357e-05,
"loss": 0.3877,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2141857147216797,
"step": 965,
"valid_targets_mean": 3189.5,
"valid_targets_min": 635
},
{
"epoch": 3.108974358974359,
"grad_norm": 0.5024028000700136,
"learning_rate": 1.5083440674518302e-05,
"loss": 0.4141,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20946665108203888,
"step": 970,
"valid_targets_mean": 2281.5,
"valid_targets_min": 477
},
{
"epoch": 3.125,
"grad_norm": 0.6617512350367286,
"learning_rate": 1.4866859164168466e-05,
"loss": 0.4462,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27546098828315735,
"step": 975,
"valid_targets_mean": 1994.2,
"valid_targets_min": 291
},
{
"epoch": 3.141025641025641,
"grad_norm": 0.5778578694843474,
"learning_rate": 1.4650920170175704e-05,
"loss": 0.431,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19753442704677582,
"step": 980,
"valid_targets_mean": 1745.9,
"valid_targets_min": 326
},
{
"epoch": 3.157051282051282,
"grad_norm": 0.4876046302233207,
"learning_rate": 1.443565072167095e-05,
"loss": 0.4193,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1774245798587799,
"step": 985,
"valid_targets_mean": 1624.2,
"valid_targets_min": 572
},
{
"epoch": 3.173076923076923,
"grad_norm": 0.5738704159928016,
"learning_rate": 1.4221077763977984e-05,
"loss": 0.4254,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2392444908618927,
"step": 990,
"valid_targets_mean": 1980.4,
"valid_targets_min": 784
},
{
"epoch": 3.189102564102564,
"grad_norm": 0.4850487138995122,
"learning_rate": 1.4007228155240696e-05,
"loss": 0.4209,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2174842357635498,
"step": 995,
"valid_targets_mean": 2492.1,
"valid_targets_min": 459
},
{
"epoch": 3.2051282051282053,
"grad_norm": 0.529762108647159,
"learning_rate": 1.37941286630612e-05,
"loss": 0.4147,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22387659549713135,
"step": 1000,
"valid_targets_mean": 1882.1,
"valid_targets_min": 355
},
{
"epoch": 3.2211538461538463,
"grad_norm": 0.4709252470917212,
"learning_rate": 1.3581805961149371e-05,
"loss": 0.4275,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17616716027259827,
"step": 1005,
"valid_targets_mean": 2260.7,
"valid_targets_min": 481
},
{
"epoch": 3.2371794871794872,
"grad_norm": 0.4920991562633389,
"learning_rate": 1.3370286625984089e-05,
"loss": 0.4291,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1874414086341858,
"step": 1010,
"valid_targets_mean": 1999.1,
"valid_targets_min": 389
},
{
"epoch": 3.253205128205128,
"grad_norm": 0.4366015680605223,
"learning_rate": 1.3159597133486628e-05,
"loss": 0.4132,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1775241196155548,
"step": 1015,
"valid_targets_mean": 2691.2,
"valid_targets_min": 492
},
{
"epoch": 3.269230769230769,
"grad_norm": 0.5159579418833546,
"learning_rate": 1.2949763855706678e-05,
"loss": 0.3984,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20066994428634644,
"step": 1020,
"valid_targets_mean": 2245.2,
"valid_targets_min": 414
},
{
"epoch": 3.28525641025641,
"grad_norm": 0.5876671407131282,
"learning_rate": 1.274081305752135e-05,
"loss": 0.4516,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1973981410264969,
"step": 1025,
"valid_targets_mean": 1791.3,
"valid_targets_min": 398
},
{
"epoch": 3.301282051282051,
"grad_norm": 0.6456536801741808,
"learning_rate": 1.2532770893347582e-05,
"loss": 0.427,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3110905885696411,
"step": 1030,
"valid_targets_mean": 1891.8,
"valid_targets_min": 324
},
{
"epoch": 3.3173076923076925,
"grad_norm": 0.5293106844776568,
"learning_rate": 1.2325663403868406e-05,
"loss": 0.4164,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.230901837348938,
"step": 1035,
"valid_targets_mean": 2292.3,
"valid_targets_min": 355
},
{
"epoch": 3.3333333333333335,
"grad_norm": 0.5522757760846092,
"learning_rate": 1.2119516512773424e-05,
"loss": 0.3895,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24217258393764496,
"step": 1040,
"valid_targets_mean": 2009.1,
"valid_targets_min": 408
},
{
"epoch": 3.3493589743589745,
"grad_norm": 0.5560321667957103,
"learning_rate": 1.1914356023513904e-05,
"loss": 0.4459,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23398807644844055,
"step": 1045,
"valid_targets_mean": 2263.5,
"valid_targets_min": 539
},
{
"epoch": 3.3653846153846154,
"grad_norm": 0.4825967281763318,
"learning_rate": 1.1710207616073001e-05,
"loss": 0.3927,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21472863852977753,
"step": 1050,
"valid_targets_mean": 2439.7,
"valid_targets_min": 286
},
{
"epoch": 3.3814102564102564,
"grad_norm": 0.5322770350466977,
"learning_rate": 1.1507096843751372e-05,
"loss": 0.4218,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17266938090324402,
"step": 1055,
"valid_targets_mean": 1598.1,
"valid_targets_min": 387
},
{
"epoch": 3.3974358974358974,
"grad_norm": 0.7827125593311665,
"learning_rate": 1.1305049129968637e-05,
"loss": 0.4349,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1991385519504547,
"step": 1060,
"valid_targets_mean": 2439.3,
"valid_targets_min": 608
},
{
"epoch": 3.4134615384615383,
"grad_norm": 0.44528916459778456,
"learning_rate": 1.110408976508118e-05,
"loss": 0.3907,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1545448899269104,
"step": 1065,
"valid_targets_mean": 2074.4,
"valid_targets_min": 396
},
{
"epoch": 3.4294871794871793,
"grad_norm": 0.48384890001970277,
"learning_rate": 1.090424390321648e-05,
"loss": 0.4352,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18344886600971222,
"step": 1070,
"valid_targets_mean": 2136.0,
"valid_targets_min": 406
},
{
"epoch": 3.4455128205128207,
"grad_norm": 0.46732111222809075,
"learning_rate": 1.070553655912463e-05,
"loss": 0.3894,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13502533733844757,
"step": 1075,
"valid_targets_mean": 1907.0,
"valid_targets_min": 400
},
{
"epoch": 3.4615384615384617,
"grad_norm": 0.46066416755356465,
"learning_rate": 1.0507992605047193e-05,
"loss": 0.4055,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1755571961402893,
"step": 1080,
"valid_targets_mean": 2637.7,
"valid_targets_min": 457
},
{
"epoch": 3.4775641025641026,
"grad_norm": 0.3719838719731155,
"learning_rate": 1.0311636767603952e-05,
"loss": 0.3946,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14000138640403748,
"step": 1085,
"valid_targets_mean": 2861.4,
"valid_targets_min": 495
},
{
"epoch": 3.4935897435897436,
"grad_norm": 0.5828556028812071,
"learning_rate": 1.0116493624697862e-05,
"loss": 0.4203,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3252260684967041,
"step": 1090,
"valid_targets_mean": 2756.8,
"valid_targets_min": 533
},
{
"epoch": 3.5096153846153846,
"grad_norm": 0.43638255215532396,
"learning_rate": 9.922587602438657e-06,
"loss": 0.3941,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18718764185905457,
"step": 1095,
"valid_targets_mean": 2623.4,
"valid_targets_min": 420
},
{
"epoch": 3.5256410256410255,
"grad_norm": 0.5594326378178172,
"learning_rate": 9.729942972085401e-06,
"loss": 0.4182,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20950523018836975,
"step": 1100,
"valid_targets_mean": 1888.2,
"valid_targets_min": 398
},
{
"epoch": 3.5416666666666665,
"grad_norm": 0.6153082072669694,
"learning_rate": 9.538583847008452e-06,
"loss": 0.4508,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24810105562210083,
"step": 1105,
"valid_targets_mean": 1652.6,
"valid_targets_min": 487
},
{
"epoch": 3.5576923076923075,
"grad_norm": 0.7347949249771237,
"learning_rate": 9.348534179671202e-06,
"loss": 0.4446,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1828898787498474,
"step": 1110,
"valid_targets_mean": 1291.9,
"valid_targets_min": 384
},
{
"epoch": 3.573717948717949,
"grad_norm": 0.4885273908189287,
"learning_rate": 9.159817758631923e-06,
"loss": 0.4148,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21009644865989685,
"step": 1115,
"valid_targets_mean": 2135.0,
"valid_targets_min": 345
},
{
"epoch": 3.58974358974359,
"grad_norm": 0.4973124736202378,
"learning_rate": 8.972458205566168e-06,
"loss": 0.4288,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15964004397392273,
"step": 1120,
"valid_targets_mean": 1893.4,
"valid_targets_min": 460
},
{
"epoch": 3.605769230769231,
"grad_norm": 0.5441599943681932,
"learning_rate": 8.786478972310023e-06,
"loss": 0.4375,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2124439775943756,
"step": 1125,
"valid_targets_mean": 2145.6,
"valid_targets_min": 336
},
{
"epoch": 3.621794871794872,
"grad_norm": 0.5226367162201916,
"learning_rate": 8.601903337924646e-06,
"loss": 0.3906,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19349414110183716,
"step": 1130,
"valid_targets_mean": 2012.8,
"valid_targets_min": 509
},
{
"epoch": 3.6378205128205128,
"grad_norm": 0.5145848515018033,
"learning_rate": 8.418754405782423e-06,
"loss": 0.4056,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2273569405078888,
"step": 1135,
"valid_targets_mean": 2077.1,
"valid_targets_min": 426
},
{
"epoch": 3.6538461538461537,
"grad_norm": 0.5175398660560526,
"learning_rate": 8.237055100675092e-06,
"loss": 0.4016,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22114655375480652,
"step": 1140,
"valid_targets_mean": 2420.2,
"valid_targets_min": 386
},
{
"epoch": 3.6698717948717947,
"grad_norm": 0.6277817059082492,
"learning_rate": 8.056828165944282e-06,
"loss": 0.4462,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26035845279693604,
"step": 1145,
"valid_targets_mean": 1739.6,
"valid_targets_min": 324
},
{
"epoch": 3.685897435897436,
"grad_norm": 0.5854299128793944,
"learning_rate": 7.878096160634675e-06,
"loss": 0.4342,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22393161058425903,
"step": 1150,
"valid_targets_mean": 1604.9,
"valid_targets_min": 517
},
{
"epoch": 3.7019230769230766,
"grad_norm": 0.6521482846929779,
"learning_rate": 7.700881456670342e-06,
"loss": 0.4199,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21592406928539276,
"step": 1155,
"valid_targets_mean": 1704.6,
"valid_targets_min": 372
},
{
"epoch": 3.717948717948718,
"grad_norm": 0.5205545018221379,
"learning_rate": 7.525206236054385e-06,
"loss": 0.4159,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19648584723472595,
"step": 1160,
"valid_targets_mean": 1921.7,
"valid_targets_min": 393
},
{
"epoch": 3.733974358974359,
"grad_norm": 0.5195989305298412,
"learning_rate": 7.3510924880924575e-06,
"loss": 0.4233,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17861494421958923,
"step": 1165,
"valid_targets_mean": 1717.1,
"valid_targets_min": 479
},
{
"epoch": 3.75,
"grad_norm": 0.6033763727537506,
"learning_rate": 7.178562006640337e-06,
"loss": 0.4242,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20282906293869019,
"step": 1170,
"valid_targets_mean": 1704.9,
"valid_targets_min": 409
},
{
"epoch": 3.766025641025641,
"grad_norm": 0.8520137624165722,
"learning_rate": 7.0076363873759865e-06,
"loss": 0.3928,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21843525767326355,
"step": 1175,
"valid_targets_mean": 2615.6,
"valid_targets_min": 500
},
{
"epoch": 3.782051282051282,
"grad_norm": 0.5354434243394129,
"learning_rate": 6.838337025096424e-06,
"loss": 0.3922,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2174426019191742,
"step": 1180,
"valid_targets_mean": 2017.8,
"valid_targets_min": 482
},
{
"epoch": 3.7980769230769234,
"grad_norm": 0.5105220829406444,
"learning_rate": 6.67068511103971e-06,
"loss": 0.4288,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18573346734046936,
"step": 1185,
"valid_targets_mean": 2131.7,
"valid_targets_min": 499
},
{
"epoch": 3.814102564102564,
"grad_norm": 0.46857007935338224,
"learning_rate": 6.504701630232475e-06,
"loss": 0.4107,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17006590962409973,
"step": 1190,
"valid_targets_mean": 1914.7,
"valid_targets_min": 476
},
{
"epoch": 3.8301282051282053,
"grad_norm": 0.5407396992987427,
"learning_rate": 6.340407358863167e-06,
"loss": 0.3835,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16990578174591064,
"step": 1195,
"valid_targets_mean": 1394.6,
"valid_targets_min": 363
},
{
"epoch": 3.8461538461538463,
"grad_norm": 0.47292976742071363,
"learning_rate": 6.177822861681557e-06,
"loss": 0.4242,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1876613199710846,
"step": 1200,
"valid_targets_mean": 2349.9,
"valid_targets_min": 393
},
{
"epoch": 3.8621794871794872,
"grad_norm": 0.4841137938946458,
"learning_rate": 6.016968489424572e-06,
"loss": 0.4346,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14386187493801117,
"step": 1205,
"valid_targets_mean": 1697.8,
"valid_targets_min": 220
},
{
"epoch": 3.878205128205128,
"grad_norm": 0.5708595597284355,
"learning_rate": 5.857864376269051e-06,
"loss": 0.4374,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23375602066516876,
"step": 1210,
"valid_targets_mean": 1680.6,
"valid_targets_min": 381
},
{
"epoch": 3.894230769230769,
"grad_norm": 0.5084621641793788,
"learning_rate": 5.700530437311509e-06,
"loss": 0.4229,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18736353516578674,
"step": 1215,
"valid_targets_mean": 1901.4,
"valid_targets_min": 381
},
{
"epoch": 3.91025641025641,
"grad_norm": 0.5301004661796407,
"learning_rate": 5.544986366075371e-06,
"loss": 0.4391,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1670445203781128,
"step": 1220,
"valid_targets_mean": 1648.1,
"valid_targets_min": 401
},
{
"epoch": 3.926282051282051,
"grad_norm": 0.6134172232948031,
"learning_rate": 5.39125163204594e-06,
"loss": 0.4133,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21446377038955688,
"step": 1225,
"valid_targets_mean": 1942.4,
"valid_targets_min": 483
},
{
"epoch": 3.9423076923076925,
"grad_norm": 0.5974202428194099,
"learning_rate": 5.239345478233364e-06,
"loss": 0.4258,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17354455590248108,
"step": 1230,
"valid_targets_mean": 1358.0,
"valid_targets_min": 369
},
{
"epoch": 3.9583333333333335,
"grad_norm": 0.4483178567987285,
"learning_rate": 5.089286918764031e-06,
"loss": 0.3961,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19412055611610413,
"step": 1235,
"valid_targets_mean": 2878.3,
"valid_targets_min": 803
},
{
"epoch": 3.9743589743589745,
"grad_norm": 0.4507178360947217,
"learning_rate": 4.941094736500522e-06,
"loss": 0.4006,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16912227869033813,
"step": 1240,
"valid_targets_mean": 2105.1,
"valid_targets_min": 357
},
{
"epoch": 3.9903846153846154,
"grad_norm": 0.5411117764458047,
"learning_rate": 4.794787480690597e-06,
"loss": 0.4147,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18049368262290955,
"step": 1245,
"valid_targets_mean": 1542.9,
"valid_targets_min": 366
},
{
"epoch": 4.006410256410256,
"grad_norm": 0.5020314686782774,
"learning_rate": 4.650383464645338e-06,
"loss": 0.4072,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17812328040599823,
"step": 1250,
"valid_targets_mean": 2127.0,
"valid_targets_min": 558
},
{
"epoch": 4.022435897435898,
"grad_norm": 0.8897516856466173,
"learning_rate": 4.507900763446911e-06,
"loss": 0.4323,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17299619317054749,
"step": 1255,
"valid_targets_mean": 1772.4,
"valid_targets_min": 401
},
{
"epoch": 4.038461538461538,
"grad_norm": 0.5155047635305318,
"learning_rate": 4.367357211686072e-06,
"loss": 0.4405,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2016928344964981,
"step": 1260,
"valid_targets_mean": 1984.0,
"valid_targets_min": 314
},
{
"epoch": 4.05448717948718,
"grad_norm": 0.5575252186312881,
"learning_rate": 4.228770401229824e-06,
"loss": 0.4146,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1600431501865387,
"step": 1265,
"valid_targets_mean": 1631.5,
"valid_targets_min": 422
},
{
"epoch": 4.07051282051282,
"grad_norm": 0.4739201323516146,
"learning_rate": 4.092157679019442e-06,
"loss": 0.4126,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2213498055934906,
"step": 1270,
"valid_targets_mean": 2722.4,
"valid_targets_min": 596
},
{
"epoch": 4.086538461538462,
"grad_norm": 0.5093029435546993,
"learning_rate": 3.957536144899123e-06,
"loss": 0.4053,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16781838238239288,
"step": 1275,
"valid_targets_mean": 2022.2,
"valid_targets_min": 454
},
{
"epoch": 4.102564102564102,
"grad_norm": 0.46573698307127626,
"learning_rate": 3.8249226494756445e-06,
"loss": 0.3686,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1653728187084198,
"step": 1280,
"valid_targets_mean": 2144.4,
"valid_targets_min": 370
},
{
"epoch": 4.118589743589744,
"grad_norm": 0.49547650926816716,
"learning_rate": 3.694333792009115e-06,
"loss": 0.4204,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16737449169158936,
"step": 1285,
"valid_targets_mean": 2180.3,
"valid_targets_min": 390
},
{
"epoch": 4.134615384615385,
"grad_norm": 0.4959235045064883,
"learning_rate": 3.565785918335292e-06,
"loss": 0.4121,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17612406611442566,
"step": 1290,
"valid_targets_mean": 2229.8,
"valid_targets_min": 415
},
{
"epoch": 4.1506410256410255,
"grad_norm": 0.5497305685489215,
"learning_rate": 3.43929511881953e-06,
"loss": 0.4143,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21826569736003876,
"step": 1295,
"valid_targets_mean": 2172.8,
"valid_targets_min": 378
},
{
"epoch": 4.166666666666667,
"grad_norm": 0.49016495730669646,
"learning_rate": 3.3148772263427743e-06,
"loss": 0.3962,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2120068520307541,
"step": 1300,
"valid_targets_mean": 2636.4,
"valid_targets_min": 502
},
{
"epoch": 4.1826923076923075,
"grad_norm": 0.5410817941449617,
"learning_rate": 3.1925478143197418e-06,
"loss": 0.4026,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1488579511642456,
"step": 1305,
"valid_targets_mean": 1622.9,
"valid_targets_min": 408
},
{
"epoch": 4.198717948717949,
"grad_norm": 0.5167531330373021,
"learning_rate": 3.0723221947495907e-06,
"loss": 0.4189,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16238239407539368,
"step": 1310,
"valid_targets_mean": 2115.7,
"valid_targets_min": 470
},
{
"epoch": 4.214743589743589,
"grad_norm": 0.5065500271116625,
"learning_rate": 2.954215416299331e-06,
"loss": 0.4129,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18277542293071747,
"step": 1315,
"valid_targets_mean": 2185.4,
"valid_targets_min": 358
},
{
"epoch": 4.230769230769231,
"grad_norm": 0.5004798136718417,
"learning_rate": 2.838242262420148e-06,
"loss": 0.4176,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22836771607398987,
"step": 1320,
"valid_targets_mean": 2597.8,
"valid_targets_min": 331
},
{
"epoch": 4.246794871794872,
"grad_norm": 0.5184618845461352,
"learning_rate": 2.7244172494969978e-06,
"loss": 0.4176,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18959830701351166,
"step": 1325,
"valid_targets_mean": 2182.0,
"valid_targets_min": 523
},
{
"epoch": 4.262820512820513,
"grad_norm": 0.4544775548224198,
"learning_rate": 2.6127546250315438e-06,
"loss": 0.3865,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16856324672698975,
"step": 1330,
"valid_targets_mean": 2052.1,
"valid_targets_min": 363
},
{
"epoch": 4.278846153846154,
"grad_norm": 0.6718829730259701,
"learning_rate": 2.503268365858831e-06,
"loss": 0.4097,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2597959339618683,
"step": 1335,
"valid_targets_mean": 1643.8,
"valid_targets_min": 521
},
{
"epoch": 4.294871794871795,
"grad_norm": 0.6447551224819277,
"learning_rate": 2.3959721763977805e-06,
"loss": 0.431,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22334489226341248,
"step": 1340,
"valid_targets_mean": 1838.8,
"valid_targets_min": 447
},
{
"epoch": 4.310897435897436,
"grad_norm": 0.47106359697416067,
"learning_rate": 2.2908794869358044e-06,
"loss": 0.4107,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1753031313419342,
"step": 1345,
"valid_targets_mean": 2324.8,
"valid_targets_min": 382
},
{
"epoch": 4.326923076923077,
"grad_norm": 0.4797964740997395,
"learning_rate": 2.188003451947747e-06,
"loss": 0.3904,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13902968168258667,
"step": 1350,
"valid_targets_mean": 1571.4,
"valid_targets_min": 365
},
{
"epoch": 4.342948717948718,
"grad_norm": 0.4974988695652104,
"learning_rate": 2.0873569484493305e-06,
"loss": 0.3924,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20952798426151276,
"step": 1355,
"valid_targets_mean": 2615.0,
"valid_targets_min": 499
},
{
"epoch": 4.358974358974359,
"grad_norm": 0.5841534150892278,
"learning_rate": 1.9889525743853323e-06,
"loss": 0.3984,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21221312880516052,
"step": 1360,
"valid_targets_mean": 1916.5,
"valid_targets_min": 199
},
{
"epoch": 4.375,
"grad_norm": 0.5200681440214403,
"learning_rate": 1.8928026470526917e-06,
"loss": 0.4282,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21217067539691925,
"step": 1365,
"valid_targets_mean": 1990.6,
"valid_targets_min": 398
},
{
"epoch": 4.391025641025641,
"grad_norm": 0.5225656986310022,
"learning_rate": 1.7989192015587776e-06,
"loss": 0.4046,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21198615431785583,
"step": 1370,
"valid_targets_mean": 2139.8,
"valid_targets_min": 495
},
{
"epoch": 4.407051282051282,
"grad_norm": 0.5718894420905976,
"learning_rate": 1.7073139893149092e-06,
"loss": 0.4265,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2715427577495575,
"step": 1375,
"valid_targets_mean": 2201.8,
"valid_targets_min": 620
},
{
"epoch": 4.423076923076923,
"grad_norm": 0.4832349361730212,
"learning_rate": 1.6179984765654743e-06,
"loss": 0.3997,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18295009434223175,
"step": 1380,
"valid_targets_mean": 2338.6,
"valid_targets_min": 333
},
{
"epoch": 4.439102564102564,
"grad_norm": 0.5009262761860173,
"learning_rate": 1.5309838429526714e-06,
"loss": 0.4229,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19573353230953217,
"step": 1385,
"valid_targets_mean": 2377.6,
"valid_targets_min": 359
},
{
"epoch": 4.455128205128205,
"grad_norm": 0.4626576145917675,
"learning_rate": 1.4462809801171428e-06,
"loss": 0.4182,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16738732159137726,
"step": 1390,
"valid_targets_mean": 2413.1,
"valid_targets_min": 632
},
{
"epoch": 4.471153846153846,
"grad_norm": 0.5619638360015893,
"learning_rate": 1.3639004903346954e-06,
"loss": 0.4131,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17360472679138184,
"step": 1395,
"valid_targets_mean": 1610.5,
"valid_targets_min": 454
},
{
"epoch": 4.487179487179487,
"grad_norm": 0.5089087979706497,
"learning_rate": 1.2838526851891864e-06,
"loss": 0.4082,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23789802193641663,
"step": 1400,
"valid_targets_mean": 2443.3,
"valid_targets_min": 541
},
{
"epoch": 4.503205128205128,
"grad_norm": 0.5942691756087387,
"learning_rate": 1.2061475842818337e-06,
"loss": 0.4333,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21138739585876465,
"step": 1405,
"valid_targets_mean": 1548.1,
"valid_targets_min": 474
},
{
"epoch": 4.519230769230769,
"grad_norm": 0.5181839449829251,
"learning_rate": 1.1307949139770446e-06,
"loss": 0.3959,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18168583512306213,
"step": 1410,
"valid_targets_mean": 1700.7,
"valid_targets_min": 316
},
{
"epoch": 4.535256410256411,
"grad_norm": 0.5691599181706188,
"learning_rate": 1.057804106184992e-06,
"loss": 0.4124,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20509378612041473,
"step": 1415,
"valid_targets_mean": 2220.6,
"valid_targets_min": 390
},
{
"epoch": 4.551282051282051,
"grad_norm": 0.5362388152801542,
"learning_rate": 9.871842971809853e-07,
"loss": 0.3719,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18971163034439087,
"step": 1420,
"valid_targets_mean": 2453.0,
"valid_targets_min": 438
},
{
"epoch": 4.5673076923076925,
"grad_norm": 0.4217117551642382,
"learning_rate": 9.189443264619102e-07,
"loss": 0.3898,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19224503636360168,
"step": 1425,
"valid_targets_mean": 3036.1,
"valid_targets_min": 491
},
{
"epoch": 4.583333333333333,
"grad_norm": 0.50137069774833,
"learning_rate": 8.530927356397778e-07,
"loss": 0.4182,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17977871000766754,
"step": 1430,
"valid_targets_mean": 1959.3,
"valid_targets_min": 462
},
{
"epoch": 4.5993589743589745,
"grad_norm": 0.5147630870730896,
"learning_rate": 7.896377673725553e-07,
"loss": 0.4009,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.190871000289917,
"step": 1435,
"valid_targets_mean": 1864.3,
"valid_targets_min": 436
},
{
"epoch": 4.615384615384615,
"grad_norm": 0.48363223511421033,
"learning_rate": 7.285873643324514e-07,
"loss": 0.4017,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21705955266952515,
"step": 1440,
"valid_targets_mean": 2462.3,
"valid_targets_min": 621
},
{
"epoch": 4.631410256410256,
"grad_norm": 0.547275826405447,
"learning_rate": 6.69949168211721e-07,
"loss": 0.395,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2083073854446411,
"step": 1445,
"valid_targets_mean": 1937.9,
"valid_targets_min": 472
},
{
"epoch": 4.647435897435898,
"grad_norm": 0.5515440239397343,
"learning_rate": 6.137305187661513e-07,
"loss": 0.4123,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19437691569328308,
"step": 1450,
"valid_targets_mean": 1709.7,
"valid_targets_min": 496
},
{
"epoch": 4.663461538461538,
"grad_norm": 0.5541400713883889,
"learning_rate": 5.599384528963425e-07,
"loss": 0.4069,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17162460088729858,
"step": 1455,
"valid_targets_mean": 1923.9,
"valid_targets_min": 338
},
{
"epoch": 4.67948717948718,
"grad_norm": 0.46459204192014386,
"learning_rate": 5.085797037669072e-07,
"loss": 0.3773,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19353806972503662,
"step": 1460,
"valid_targets_mean": 2262.0,
"valid_targets_min": 368
},
{
"epoch": 4.69551282051282,
"grad_norm": 0.5043482862484802,
"learning_rate": 4.5966069996365993e-07,
"loss": 0.3826,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18038004636764526,
"step": 1465,
"valid_targets_mean": 2006.8,
"valid_targets_min": 401
},
{
"epoch": 4.711538461538462,
"grad_norm": 0.47468830861646916,
"learning_rate": 4.1318756468897047e-07,
"loss": 0.4266,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21844631433486938,
"step": 1470,
"valid_targets_mean": 2434.2,
"valid_targets_min": 499
},
{
"epoch": 4.727564102564102,
"grad_norm": 0.5712016674577056,
"learning_rate": 3.691661149953096e-07,
"loss": 0.4055,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2230275571346283,
"step": 1475,
"valid_targets_mean": 2119.4,
"valid_targets_min": 454
},
{
"epoch": 4.743589743589744,
"grad_norm": 0.5286171152229552,
"learning_rate": 3.2760186105712964e-07,
"loss": 0.3816,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19953539967536926,
"step": 1480,
"valid_targets_mean": 2190.5,
"valid_targets_min": 390
},
{
"epoch": 4.759615384615385,
"grad_norm": 0.5355340610321968,
"learning_rate": 2.8850000548115155e-07,
"loss": 0.4263,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24080535769462585,
"step": 1485,
"valid_targets_mean": 1879.4,
"valid_targets_min": 488
},
{
"epoch": 4.7756410256410255,
"grad_norm": 0.5704120262786105,
"learning_rate": 2.518654426551592e-07,
"loss": 0.4061,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2327512800693512,
"step": 1490,
"valid_targets_mean": 1950.7,
"valid_targets_min": 424
},
{
"epoch": 4.791666666666667,
"grad_norm": 0.5892101716728328,
"learning_rate": 2.1770275813536746e-07,
"loss": 0.3985,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19074735045433044,
"step": 1495,
"valid_targets_mean": 1708.5,
"valid_targets_min": 530
},
{
"epoch": 4.8076923076923075,
"grad_norm": 0.5211300578683025,
"learning_rate": 1.8601622807244312e-07,
"loss": 0.4217,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21352173388004303,
"step": 1500,
"valid_targets_mean": 2224.9,
"valid_targets_min": 466
},
{
"epoch": 4.823717948717949,
"grad_norm": 0.5950739817226455,
"learning_rate": 1.5680981867625566e-07,
"loss": 0.4065,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21843212842941284,
"step": 1505,
"valid_targets_mean": 1907.6,
"valid_targets_min": 477
},
{
"epoch": 4.839743589743589,
"grad_norm": 0.4982907259604603,
"learning_rate": 1.3008718571943636e-07,
"loss": 0.4145,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2082642912864685,
"step": 1510,
"valid_targets_mean": 2288.1,
"valid_targets_min": 494
},
{
"epoch": 4.855769230769231,
"grad_norm": 0.7118715744452784,
"learning_rate": 1.058516740797777e-07,
"loss": 0.3914,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20714955031871796,
"step": 1515,
"valid_targets_mean": 1673.6,
"valid_targets_min": 453
},
{
"epoch": 4.871794871794872,
"grad_norm": 0.636093382876181,
"learning_rate": 8.410631732155062e-08,
"loss": 0.4275,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24439842998981476,
"step": 1520,
"valid_targets_mean": 1889.8,
"valid_targets_min": 369
},
{
"epoch": 4.887820512820513,
"grad_norm": 0.4919592475036989,
"learning_rate": 6.485383731580142e-08,
"loss": 0.3889,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14140692353248596,
"step": 1525,
"valid_targets_mean": 1333.6,
"valid_targets_min": 329
},
{
"epoch": 4.903846153846154,
"grad_norm": 0.46488274884652137,
"learning_rate": 4.809664389964441e-08,
"loss": 0.4254,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21400034427642822,
"step": 1530,
"valid_targets_mean": 2693.9,
"valid_targets_min": 523
},
{
"epoch": 4.919871794871795,
"grad_norm": 0.5040958449228231,
"learning_rate": 3.383683457463649e-08,
"loss": 0.3597,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1598944067955017,
"step": 1535,
"valid_targets_mean": 1648.6,
"valid_targets_min": 371
},
{
"epoch": 4.935897435897436,
"grad_norm": 0.525773314679964,
"learning_rate": 2.207619424421381e-08,
"loss": 0.4026,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20952820777893066,
"step": 1540,
"valid_targets_mean": 2557.2,
"valid_targets_min": 543
},
{
"epoch": 4.951923076923077,
"grad_norm": 0.5186226211951818,
"learning_rate": 1.281619499029274e-08,
"loss": 0.4238,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23216500878334045,
"step": 1545,
"valid_targets_mean": 2368.4,
"valid_targets_min": 554
},
{
"epoch": 4.967948717948718,
"grad_norm": 0.5881562216015847,
"learning_rate": 6.057995888997248e-09,
"loss": 0.384,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20564129948616028,
"step": 1550,
"valid_targets_mean": 1534.4,
"valid_targets_min": 530
},
{
"epoch": 4.983974358974359,
"grad_norm": 0.5078947883853017,
"learning_rate": 1.8024428655794012e-09,
"loss": 0.3829,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21099376678466797,
"step": 1555,
"valid_targets_mean": 2262.2,
"valid_targets_min": 342
},
{
"epoch": 5.0,
"grad_norm": 0.5570674341743406,
"learning_rate": 5.00685885418406e-11,
"loss": 0.4035,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24144230782985687,
"step": 1560,
"valid_targets_mean": 2344.4,
"valid_targets_min": 827
},
{
"epoch": 5.0,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24144230782985687,
"step": 1560,
"total_flos": 4.577212452957061e+17,
"train_loss": 0.45931496092906365,
"train_runtime": 11477.9832,
"train_samples_per_second": 4.348,
"train_steps_per_second": 0.136,
"valid_targets_mean": 2344.4,
"valid_targets_min": 827
}
],
"logging_steps": 5,
"max_steps": 1560,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4.577212452957061e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}