{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 1560, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.016025641025641024, "grad_norm": 8.270559284395055, "learning_rate": 1.0256410256410257e-06, "loss": 0.8831, "loss_nan_ranks": 0, "loss_rank_avg": 0.5231465101242065, "step": 5, "valid_targets_mean": 2855.3, "valid_targets_min": 310 }, { "epoch": 0.03205128205128205, "grad_norm": 6.156057008347062, "learning_rate": 2.307692307692308e-06, "loss": 0.8578, "loss_nan_ranks": 0, "loss_rank_avg": 0.3894450068473816, "step": 10, "valid_targets_mean": 2836.0, "valid_targets_min": 343 }, { "epoch": 0.04807692307692308, "grad_norm": 4.055588313810187, "learning_rate": 3.58974358974359e-06, "loss": 0.8181, "loss_nan_ranks": 0, "loss_rank_avg": 0.34461548924446106, "step": 15, "valid_targets_mean": 2279.3, "valid_targets_min": 397 }, { "epoch": 0.0641025641025641, "grad_norm": 2.9358204191090445, "learning_rate": 4.871794871794872e-06, "loss": 0.7542, "loss_nan_ranks": 0, "loss_rank_avg": 0.37403905391693115, "step": 20, "valid_targets_mean": 2386.4, "valid_targets_min": 501 }, { "epoch": 0.08012820512820513, "grad_norm": 1.3469933311632771, "learning_rate": 6.153846153846155e-06, "loss": 0.7428, "loss_nan_ranks": 0, "loss_rank_avg": 0.3458966016769409, "step": 25, "valid_targets_mean": 2740.5, "valid_targets_min": 447 }, { "epoch": 0.09615384615384616, "grad_norm": 1.1608353294901592, "learning_rate": 7.435897435897437e-06, "loss": 0.7218, "loss_nan_ranks": 0, "loss_rank_avg": 0.31971484422683716, "step": 30, "valid_targets_mean": 1962.6, "valid_targets_min": 279 }, { "epoch": 0.11217948717948718, "grad_norm": 1.067171533736777, "learning_rate": 8.717948717948719e-06, "loss": 0.6814, "loss_nan_ranks": 0, "loss_rank_avg": 0.4454614520072937, "step": 35, "valid_targets_mean": 2451.4, "valid_targets_min": 416 }, { "epoch": 0.1282051282051282, "grad_norm": 0.9774891251489538, "learning_rate": 1e-05, "loss": 0.7264, "loss_nan_ranks": 0, "loss_rank_avg": 0.3378103971481323, "step": 40, "valid_targets_mean": 1912.1, "valid_targets_min": 609 }, { "epoch": 0.14423076923076922, "grad_norm": 0.8317361111005913, "learning_rate": 1.1282051282051283e-05, "loss": 0.6642, "loss_nan_ranks": 0, "loss_rank_avg": 0.37407323718070984, "step": 45, "valid_targets_mean": 1732.8, "valid_targets_min": 474 }, { "epoch": 0.16025641025641027, "grad_norm": 0.70420941452197, "learning_rate": 1.2564102564102565e-05, "loss": 0.6433, "loss_nan_ranks": 0, "loss_rank_avg": 0.3607226610183716, "step": 50, "valid_targets_mean": 2446.6, "valid_targets_min": 396 }, { "epoch": 0.1762820512820513, "grad_norm": 0.640275050687205, "learning_rate": 1.3846153846153847e-05, "loss": 0.6505, "loss_nan_ranks": 0, "loss_rank_avg": 0.2584564685821533, "step": 55, "valid_targets_mean": 1938.5, "valid_targets_min": 350 }, { "epoch": 0.19230769230769232, "grad_norm": 0.6130742352183888, "learning_rate": 1.5128205128205129e-05, "loss": 0.5794, "loss_nan_ranks": 0, "loss_rank_avg": 0.29696235060691833, "step": 60, "valid_targets_mean": 1760.7, "valid_targets_min": 705 }, { "epoch": 0.20833333333333334, "grad_norm": 0.49753658846261495, "learning_rate": 1.641025641025641e-05, "loss": 0.5949, "loss_nan_ranks": 0, "loss_rank_avg": 0.23245546221733093, "step": 65, "valid_targets_mean": 2182.3, "valid_targets_min": 495 }, { "epoch": 0.22435897435897437, "grad_norm": 0.5248371244445572, "learning_rate": 1.7692307692307694e-05, "loss": 0.5511, "loss_nan_ranks": 0, "loss_rank_avg": 0.27561211585998535, "step": 70, "valid_targets_mean": 2518.8, "valid_targets_min": 537 }, { "epoch": 0.2403846153846154, "grad_norm": 0.5696118966316656, "learning_rate": 1.8974358974358975e-05, "loss": 0.6166, "loss_nan_ranks": 0, "loss_rank_avg": 0.27792054414749146, "step": 75, "valid_targets_mean": 2193.9, "valid_targets_min": 320 }, { "epoch": 0.2564102564102564, "grad_norm": 0.6067800362816257, "learning_rate": 2.025641025641026e-05, "loss": 0.5518, "loss_nan_ranks": 0, "loss_rank_avg": 0.3141236901283264, "step": 80, "valid_targets_mean": 1968.6, "valid_targets_min": 524 }, { "epoch": 0.2724358974358974, "grad_norm": 0.5486307014154258, "learning_rate": 2.153846153846154e-05, "loss": 0.5859, "loss_nan_ranks": 0, "loss_rank_avg": 0.39914655685424805, "step": 85, "valid_targets_mean": 2844.7, "valid_targets_min": 652 }, { "epoch": 0.28846153846153844, "grad_norm": 0.5467022186742057, "learning_rate": 2.2820512820512822e-05, "loss": 0.5494, "loss_nan_ranks": 0, "loss_rank_avg": 0.30601420998573303, "step": 90, "valid_targets_mean": 2242.3, "valid_targets_min": 287 }, { "epoch": 0.30448717948717946, "grad_norm": 0.5073453583636481, "learning_rate": 2.4102564102564103e-05, "loss": 0.5672, "loss_nan_ranks": 0, "loss_rank_avg": 0.27047592401504517, "step": 95, "valid_targets_mean": 2702.0, "valid_targets_min": 623 }, { "epoch": 0.32051282051282054, "grad_norm": 0.6374453094826855, "learning_rate": 2.5384615384615386e-05, "loss": 0.5258, "loss_nan_ranks": 0, "loss_rank_avg": 0.34897756576538086, "step": 100, "valid_targets_mean": 2111.6, "valid_targets_min": 632 }, { "epoch": 0.33653846153846156, "grad_norm": 0.4904714398710548, "learning_rate": 2.6666666666666667e-05, "loss": 0.5275, "loss_nan_ranks": 0, "loss_rank_avg": 0.2545839548110962, "step": 105, "valid_targets_mean": 2280.2, "valid_targets_min": 528 }, { "epoch": 0.3525641025641026, "grad_norm": 0.5374741002067597, "learning_rate": 2.794871794871795e-05, "loss": 0.5159, "loss_nan_ranks": 0, "loss_rank_avg": 0.27961328625679016, "step": 110, "valid_targets_mean": 1705.3, "valid_targets_min": 483 }, { "epoch": 0.3685897435897436, "grad_norm": 0.5401678923394666, "learning_rate": 2.923076923076923e-05, "loss": 0.5378, "loss_nan_ranks": 0, "loss_rank_avg": 0.28303176164627075, "step": 115, "valid_targets_mean": 2115.9, "valid_targets_min": 460 }, { "epoch": 0.38461538461538464, "grad_norm": 0.5697964720982368, "learning_rate": 3.0512820512820514e-05, "loss": 0.5136, "loss_nan_ranks": 0, "loss_rank_avg": 0.2926599979400635, "step": 120, "valid_targets_mean": 2276.1, "valid_targets_min": 491 }, { "epoch": 0.40064102564102566, "grad_norm": 0.5080731305686881, "learning_rate": 3.1794871794871795e-05, "loss": 0.5455, "loss_nan_ranks": 0, "loss_rank_avg": 0.21579426527023315, "step": 125, "valid_targets_mean": 1966.7, "valid_targets_min": 455 }, { "epoch": 0.4166666666666667, "grad_norm": 0.5193806958702459, "learning_rate": 3.307692307692308e-05, "loss": 0.5503, "loss_nan_ranks": 0, "loss_rank_avg": 0.23866981267929077, "step": 130, "valid_targets_mean": 1884.6, "valid_targets_min": 653 }, { "epoch": 0.4326923076923077, "grad_norm": 0.5215890997634938, "learning_rate": 3.435897435897436e-05, "loss": 0.5113, "loss_nan_ranks": 0, "loss_rank_avg": 0.22024506330490112, "step": 135, "valid_targets_mean": 2176.9, "valid_targets_min": 551 }, { "epoch": 0.44871794871794873, "grad_norm": 0.5428844999735067, "learning_rate": 3.5641025641025646e-05, "loss": 0.5327, "loss_nan_ranks": 0, "loss_rank_avg": 0.2842090129852295, "step": 140, "valid_targets_mean": 2108.4, "valid_targets_min": 395 }, { "epoch": 0.46474358974358976, "grad_norm": 0.5658357256109985, "learning_rate": 3.692307692307693e-05, "loss": 0.5012, "loss_nan_ranks": 0, "loss_rank_avg": 0.23009580373764038, "step": 145, "valid_targets_mean": 1416.6, "valid_targets_min": 319 }, { "epoch": 0.4807692307692308, "grad_norm": 0.5287699494547455, "learning_rate": 3.820512820512821e-05, "loss": 0.5442, "loss_nan_ranks": 0, "loss_rank_avg": 0.25618451833724976, "step": 150, "valid_targets_mean": 2366.4, "valid_targets_min": 555 }, { "epoch": 0.4967948717948718, "grad_norm": 0.5510710484739612, "learning_rate": 3.948717948717949e-05, "loss": 0.5202, "loss_nan_ranks": 0, "loss_rank_avg": 0.261417031288147, "step": 155, "valid_targets_mean": 1912.0, "valid_targets_min": 427 }, { "epoch": 0.5128205128205128, "grad_norm": 0.5223782503607328, "learning_rate": 3.999954938420724e-05, "loss": 0.538, "loss_nan_ranks": 0, "loss_rank_avg": 0.2376648485660553, "step": 160, "valid_targets_mean": 2203.6, "valid_targets_min": 520 }, { "epoch": 0.5288461538461539, "grad_norm": 0.5639669943753522, "learning_rate": 3.9996795694563096e-05, "loss": 0.5519, "loss_nan_ranks": 0, "loss_rank_avg": 0.26910513639450073, "step": 165, "valid_targets_mean": 1978.4, "valid_targets_min": 452 }, { "epoch": 0.5448717948717948, "grad_norm": 0.5934805718745008, "learning_rate": 3.9991539001644015e-05, "loss": 0.4864, "loss_nan_ranks": 0, "loss_rank_avg": 0.21558544039726257, "step": 170, "valid_targets_mean": 1433.1, "valid_targets_min": 291 }, { "epoch": 0.5608974358974359, "grad_norm": 0.5444507017809859, "learning_rate": 3.998377996343139e-05, "loss": 0.5402, "loss_nan_ranks": 0, "loss_rank_avg": 0.22888971865177155, "step": 175, "valid_targets_mean": 2303.6, "valid_targets_min": 665 }, { "epoch": 0.5769230769230769, "grad_norm": 0.5993323841226175, "learning_rate": 3.9973519551125746e-05, "loss": 0.5472, "loss_nan_ranks": 0, "loss_rank_avg": 0.2046593725681305, "step": 180, "valid_targets_mean": 1417.7, "valid_targets_min": 614 }, { "epoch": 0.592948717948718, "grad_norm": 0.552244150619254, "learning_rate": 3.99607590490251e-05, "loss": 0.5254, "loss_nan_ranks": 0, "loss_rank_avg": 0.240033358335495, "step": 185, "valid_targets_mean": 1792.0, "valid_targets_min": 356 }, { "epoch": 0.6089743589743589, "grad_norm": 0.5676943862267904, "learning_rate": 3.994550005436431e-05, "loss": 0.5232, "loss_nan_ranks": 0, "loss_rank_avg": 0.2360553741455078, "step": 190, "valid_targets_mean": 1990.2, "valid_targets_min": 571 }, { "epoch": 0.625, "grad_norm": 0.5686816790686919, "learning_rate": 3.992774447711503e-05, "loss": 0.5712, "loss_nan_ranks": 0, "loss_rank_avg": 0.3569658696651459, "step": 195, "valid_targets_mean": 2023.1, "valid_targets_min": 425 }, { "epoch": 0.6410256410256411, "grad_norm": 0.593578700387874, "learning_rate": 3.990749453974676e-05, "loss": 0.5151, "loss_nan_ranks": 0, "loss_rank_avg": 0.2537527084350586, "step": 200, "valid_targets_mean": 1794.6, "valid_targets_min": 397 }, { "epoch": 0.657051282051282, "grad_norm": 0.626123448487498, "learning_rate": 3.9884752776948564e-05, "loss": 0.5111, "loss_nan_ranks": 0, "loss_rank_avg": 0.20010629296302795, "step": 205, "valid_targets_mean": 1342.0, "valid_targets_min": 328 }, { "epoch": 0.6730769230769231, "grad_norm": 0.5180554845628587, "learning_rate": 3.985952203531184e-05, "loss": 0.5206, "loss_nan_ranks": 0, "loss_rank_avg": 0.31396254897117615, "step": 210, "valid_targets_mean": 2744.9, "valid_targets_min": 1089 }, { "epoch": 0.6891025641025641, "grad_norm": 0.6345163141192973, "learning_rate": 3.983180547297404e-05, "loss": 0.5026, "loss_nan_ranks": 0, "loss_rank_avg": 0.24182447791099548, "step": 215, "valid_targets_mean": 1557.4, "valid_targets_min": 461 }, { "epoch": 0.7051282051282052, "grad_norm": 0.6054353969461813, "learning_rate": 3.9801606559223286e-05, "loss": 0.5125, "loss_nan_ranks": 0, "loss_rank_avg": 0.2920263409614563, "step": 220, "valid_targets_mean": 2164.1, "valid_targets_min": 542 }, { "epoch": 0.7211538461538461, "grad_norm": 0.5253840091115919, "learning_rate": 3.9768929074064206e-05, "loss": 0.5073, "loss_nan_ranks": 0, "loss_rank_avg": 0.26178407669067383, "step": 225, "valid_targets_mean": 2276.9, "valid_targets_min": 508 }, { "epoch": 0.7371794871794872, "grad_norm": 0.6036771997042879, "learning_rate": 3.973377710774474e-05, "loss": 0.5416, "loss_nan_ranks": 0, "loss_rank_avg": 0.30116069316864014, "step": 230, "valid_targets_mean": 1931.5, "valid_targets_min": 465 }, { "epoch": 0.7532051282051282, "grad_norm": 0.5341073955982537, "learning_rate": 3.9696155060244166e-05, "loss": 0.4925, "loss_nan_ranks": 0, "loss_rank_avg": 0.2869190275669098, "step": 235, "valid_targets_mean": 2469.9, "valid_targets_min": 390 }, { "epoch": 0.7692307692307693, "grad_norm": 0.5391946249539421, "learning_rate": 3.965606764072237e-05, "loss": 0.5149, "loss_nan_ranks": 0, "loss_rank_avg": 0.23443245887756348, "step": 240, "valid_targets_mean": 2517.3, "valid_targets_min": 458 }, { "epoch": 0.7852564102564102, "grad_norm": 0.48726365599779214, "learning_rate": 3.96135198669304e-05, "loss": 0.5111, "loss_nan_ranks": 0, "loss_rank_avg": 0.2194036841392517, "step": 245, "valid_targets_mean": 2148.2, "valid_targets_min": 284 }, { "epoch": 0.8012820512820513, "grad_norm": 0.4546312777762876, "learning_rate": 3.956851706458236e-05, "loss": 0.4951, "loss_nan_ranks": 0, "loss_rank_avg": 0.2413400411605835, "step": 250, "valid_targets_mean": 3277.6, "valid_targets_min": 401 }, { "epoch": 0.8173076923076923, "grad_norm": 0.43195116515643417, "learning_rate": 3.952106486668884e-05, "loss": 0.4941, "loss_nan_ranks": 0, "loss_rank_avg": 0.24353349208831787, "step": 255, "valid_targets_mean": 2430.8, "valid_targets_min": 354 }, { "epoch": 0.8333333333333334, "grad_norm": 0.5431449210079052, "learning_rate": 3.9471169212851774e-05, "loss": 0.4859, "loss_nan_ranks": 0, "loss_rank_avg": 0.3241618275642395, "step": 260, "valid_targets_mean": 2396.5, "valid_targets_min": 559 }, { "epoch": 0.8493589743589743, "grad_norm": 0.4983128230175159, "learning_rate": 3.9418836348521045e-05, "loss": 0.507, "loss_nan_ranks": 0, "loss_rank_avg": 0.28434205055236816, "step": 265, "valid_targets_mean": 2714.4, "valid_targets_min": 589 }, { "epoch": 0.8653846153846154, "grad_norm": 0.4433055547977104, "learning_rate": 3.936407282421267e-05, "loss": 0.489, "loss_nan_ranks": 0, "loss_rank_avg": 0.2021905481815338, "step": 270, "valid_targets_mean": 2192.3, "valid_targets_min": 420 }, { "epoch": 0.8814102564102564, "grad_norm": 0.5372540899641901, "learning_rate": 3.930688549468894e-05, "loss": 0.4962, "loss_nan_ranks": 0, "loss_rank_avg": 0.3100685477256775, "step": 275, "valid_targets_mean": 2431.0, "valid_targets_min": 523 }, { "epoch": 0.8974358974358975, "grad_norm": 0.48553038032931456, "learning_rate": 3.924728151810034e-05, "loss": 0.5118, "loss_nan_ranks": 0, "loss_rank_avg": 0.27802667021751404, "step": 280, "valid_targets_mean": 2424.8, "valid_targets_min": 596 }, { "epoch": 0.9134615384615384, "grad_norm": 0.5494166379111899, "learning_rate": 3.9185268355089606e-05, "loss": 0.4939, "loss_nan_ranks": 0, "loss_rank_avg": 0.2717832922935486, "step": 285, "valid_targets_mean": 1919.1, "valid_targets_min": 500 }, { "epoch": 0.9294871794871795, "grad_norm": 0.4690629180894029, "learning_rate": 3.912085376785788e-05, "loss": 0.4929, "loss_nan_ranks": 0, "loss_rank_avg": 0.20420989394187927, "step": 290, "valid_targets_mean": 1990.7, "valid_targets_min": 498 }, { "epoch": 0.9455128205128205, "grad_norm": 0.47364731395403387, "learning_rate": 3.9054045819193074e-05, "loss": 0.4596, "loss_nan_ranks": 0, "loss_rank_avg": 0.283137708902359, "step": 295, "valid_targets_mean": 3114.4, "valid_targets_min": 389 }, { "epoch": 0.9615384615384616, "grad_norm": 0.43834286352294405, "learning_rate": 3.898485287146068e-05, "loss": 0.4846, "loss_nan_ranks": 0, "loss_rank_avg": 0.22700203955173492, "step": 300, "valid_targets_mean": 3102.8, "valid_targets_min": 674 }, { "epoch": 0.9775641025641025, "grad_norm": 0.544832909701008, "learning_rate": 3.8913283585557054e-05, "loss": 0.488, "loss_nan_ranks": 0, "loss_rank_avg": 0.24065542221069336, "step": 305, "valid_targets_mean": 2371.9, "valid_targets_min": 342 }, { "epoch": 0.9935897435897436, "grad_norm": 0.6034996255357719, "learning_rate": 3.8839346919825304e-05, "loss": 0.5011, "loss_nan_ranks": 0, "loss_rank_avg": 0.252093642950058, "step": 310, "valid_targets_mean": 1962.2, "valid_targets_min": 339 }, { "epoch": 1.0096153846153846, "grad_norm": 0.48322361076674575, "learning_rate": 3.876305212893399e-05, "loss": 0.4662, "loss_nan_ranks": 0, "loss_rank_avg": 0.25063151121139526, "step": 315, "valid_targets_mean": 2658.9, "valid_targets_min": 507 }, { "epoch": 1.0256410256410255, "grad_norm": 0.6133158850042538, "learning_rate": 3.868440876271871e-05, "loss": 0.4777, "loss_nan_ranks": 0, "loss_rank_avg": 0.29740214347839355, "step": 320, "valid_targets_mean": 1867.5, "valid_targets_min": 392 }, { "epoch": 1.0416666666666667, "grad_norm": 0.541900985104166, "learning_rate": 3.860342666498677e-05, "loss": 0.4866, "loss_nan_ranks": 0, "loss_rank_avg": 0.21024353802204132, "step": 325, "valid_targets_mean": 1411.2, "valid_targets_min": 322 }, { "epoch": 1.0576923076923077, "grad_norm": 0.6020609614604076, "learning_rate": 3.8520115972284975e-05, "loss": 0.4639, "loss_nan_ranks": 0, "loss_rank_avg": 0.24874147772789001, "step": 330, "valid_targets_mean": 1505.4, "valid_targets_min": 372 }, { "epoch": 1.0737179487179487, "grad_norm": 0.5637623048449515, "learning_rate": 3.843448711263089e-05, "loss": 0.5009, "loss_nan_ranks": 0, "loss_rank_avg": 0.3167960047721863, "step": 335, "valid_targets_mean": 2400.1, "valid_targets_min": 471 }, { "epoch": 1.0897435897435896, "grad_norm": 0.482423724630125, "learning_rate": 3.8346550804207544e-05, "loss": 0.453, "loss_nan_ranks": 0, "loss_rank_avg": 0.2796696722507477, "step": 340, "valid_targets_mean": 2607.9, "valid_targets_min": 358 }, { "epoch": 1.1057692307692308, "grad_norm": 0.42709049123064025, "learning_rate": 3.825631805402182e-05, "loss": 0.4675, "loss_nan_ranks": 0, "loss_rank_avg": 0.18846507370471954, "step": 345, "valid_targets_mean": 3064.7, "valid_targets_min": 484 }, { "epoch": 1.1217948717948718, "grad_norm": 0.45654149738060995, "learning_rate": 3.816380015652672e-05, "loss": 0.4711, "loss_nan_ranks": 0, "loss_rank_avg": 0.22047562897205353, "step": 350, "valid_targets_mean": 2670.4, "valid_targets_min": 497 }, { "epoch": 1.1378205128205128, "grad_norm": 0.46259303685534825, "learning_rate": 3.806900869220765e-05, "loss": 0.4289, "loss_nan_ranks": 0, "loss_rank_avg": 0.21656641364097595, "step": 355, "valid_targets_mean": 2637.6, "valid_targets_min": 371 }, { "epoch": 1.1538461538461537, "grad_norm": 0.4795462410657758, "learning_rate": 3.797195552613284e-05, "loss": 0.4347, "loss_nan_ranks": 0, "loss_rank_avg": 0.28732848167419434, "step": 360, "valid_targets_mean": 2804.9, "valid_targets_min": 972 }, { "epoch": 1.169871794871795, "grad_norm": 0.544805070309915, "learning_rate": 3.787265280646825e-05, "loss": 0.4612, "loss_nan_ranks": 0, "loss_rank_avg": 0.20876461267471313, "step": 365, "valid_targets_mean": 1629.6, "valid_targets_min": 470 }, { "epoch": 1.185897435897436, "grad_norm": 0.6219497217509353, "learning_rate": 3.7771112962956936e-05, "loss": 0.4925, "loss_nan_ranks": 0, "loss_rank_avg": 0.24284450709819794, "step": 370, "valid_targets_mean": 1362.9, "valid_targets_min": 362 }, { "epoch": 1.2019230769230769, "grad_norm": 0.6219562430857214, "learning_rate": 3.7667348705363227e-05, "loss": 0.4846, "loss_nan_ranks": 0, "loss_rank_avg": 0.26131337881088257, "step": 375, "valid_targets_mean": 2042.5, "valid_targets_min": 538 }, { "epoch": 1.217948717948718, "grad_norm": 0.5273423780621191, "learning_rate": 3.7561373021881885e-05, "loss": 0.4764, "loss_nan_ranks": 0, "loss_rank_avg": 0.20557957887649536, "step": 380, "valid_targets_mean": 2154.4, "valid_targets_min": 482 }, { "epoch": 1.233974358974359, "grad_norm": 0.5940055633638739, "learning_rate": 3.745319917751229e-05, "loss": 0.458, "loss_nan_ranks": 0, "loss_rank_avg": 0.19809526205062866, "step": 385, "valid_targets_mean": 1356.8, "valid_targets_min": 496 }, { "epoch": 1.25, "grad_norm": 0.604986566337123, "learning_rate": 3.734284071239811e-05, "loss": 0.4753, "loss_nan_ranks": 0, "loss_rank_avg": 0.3080754280090332, "step": 390, "valid_targets_mean": 2211.4, "valid_targets_min": 405 }, { "epoch": 1.266025641025641, "grad_norm": 0.6686876845094236, "learning_rate": 3.7230311440132494e-05, "loss": 0.4605, "loss_nan_ranks": 0, "loss_rank_avg": 0.2461152821779251, "step": 395, "valid_targets_mean": 1261.0, "valid_targets_min": 384 }, { "epoch": 1.282051282051282, "grad_norm": 0.5383774605862381, "learning_rate": 3.711562544602895e-05, "loss": 0.47, "loss_nan_ranks": 0, "loss_rank_avg": 0.2216894030570984, "step": 400, "valid_targets_mean": 1972.8, "valid_targets_min": 572 }, { "epoch": 1.2980769230769231, "grad_norm": 0.6237423049196263, "learning_rate": 3.699879708535838e-05, "loss": 0.4964, "loss_nan_ranks": 0, "loss_rank_avg": 0.3525855839252472, "step": 405, "valid_targets_mean": 2608.8, "valid_targets_min": 553 }, { "epoch": 1.314102564102564, "grad_norm": 0.47346493238221604, "learning_rate": 3.687984098155212e-05, "loss": 0.4721, "loss_nan_ranks": 0, "loss_rank_avg": 0.23883157968521118, "step": 410, "valid_targets_mean": 2648.7, "valid_targets_min": 559 }, { "epoch": 1.330128205128205, "grad_norm": 0.6009852407301991, "learning_rate": 3.6758772024371626e-05, "loss": 0.4985, "loss_nan_ranks": 0, "loss_rank_avg": 0.31522154808044434, "step": 415, "valid_targets_mean": 1891.1, "valid_targets_min": 369 }, { "epoch": 1.3461538461538463, "grad_norm": 0.5221802033027123, "learning_rate": 3.663560536804465e-05, "loss": 0.4619, "loss_nan_ranks": 0, "loss_rank_avg": 0.2520959973335266, "step": 420, "valid_targets_mean": 2271.7, "valid_targets_min": 479 }, { "epoch": 1.3621794871794872, "grad_norm": 0.5462328875847934, "learning_rate": 3.65103564293684e-05, "loss": 0.4923, "loss_nan_ranks": 0, "loss_rank_avg": 0.28057146072387695, "step": 425, "valid_targets_mean": 1829.6, "valid_targets_min": 349 }, { "epoch": 1.3782051282051282, "grad_norm": 0.5230511743019115, "learning_rate": 3.638304088577984e-05, "loss": 0.4479, "loss_nan_ranks": 0, "loss_rank_avg": 0.20385369658470154, "step": 430, "valid_targets_mean": 2099.4, "valid_targets_min": 450 }, { "epoch": 1.3942307692307692, "grad_norm": 0.5260439150355632, "learning_rate": 3.625367467339329e-05, "loss": 0.4409, "loss_nan_ranks": 0, "loss_rank_avg": 0.25196361541748047, "step": 435, "valid_targets_mean": 1848.4, "valid_targets_min": 374 }, { "epoch": 1.4102564102564101, "grad_norm": 0.542530019050112, "learning_rate": 3.612227398500575e-05, "loss": 0.4625, "loss_nan_ranks": 0, "loss_rank_avg": 0.2730902433395386, "step": 440, "valid_targets_mean": 1855.6, "valid_targets_min": 539 }, { "epoch": 1.4262820512820513, "grad_norm": 0.5362598043278441, "learning_rate": 3.598885526807003e-05, "loss": 0.4645, "loss_nan_ranks": 0, "loss_rank_avg": 0.20158651471138, "step": 445, "valid_targets_mean": 1960.3, "valid_targets_min": 330 }, { "epoch": 1.4423076923076923, "grad_norm": 0.5038481547905878, "learning_rate": 3.585343522263599e-05, "loss": 0.4576, "loss_nan_ranks": 0, "loss_rank_avg": 0.23150920867919922, "step": 450, "valid_targets_mean": 2077.7, "valid_targets_min": 685 }, { "epoch": 1.4583333333333333, "grad_norm": 0.6465319041135404, "learning_rate": 3.571603079926024e-05, "loss": 0.4734, "loss_nan_ranks": 0, "loss_rank_avg": 0.2389899045228958, "step": 455, "valid_targets_mean": 1801.0, "valid_targets_min": 435 }, { "epoch": 1.4743589743589745, "grad_norm": 0.5472386327122035, "learning_rate": 3.5576659196884395e-05, "loss": 0.5179, "loss_nan_ranks": 0, "loss_rank_avg": 0.24516533315181732, "step": 460, "valid_targets_mean": 1980.1, "valid_targets_min": 436 }, { "epoch": 1.4903846153846154, "grad_norm": 0.5414486709041462, "learning_rate": 3.5435337860682304e-05, "loss": 0.464, "loss_nan_ranks": 0, "loss_rank_avg": 0.2550250291824341, "step": 465, "valid_targets_mean": 1814.8, "valid_targets_min": 343 }, { "epoch": 1.5064102564102564, "grad_norm": 0.4560866540685229, "learning_rate": 3.529208447987641e-05, "loss": 0.4928, "loss_nan_ranks": 0, "loss_rank_avg": 0.1900494247674942, "step": 470, "valid_targets_mean": 2029.3, "valid_targets_min": 353 }, { "epoch": 1.5224358974358974, "grad_norm": 0.5257829172034205, "learning_rate": 3.5146916985523604e-05, "loss": 0.5122, "loss_nan_ranks": 0, "loss_rank_avg": 0.25625720620155334, "step": 475, "valid_targets_mean": 2358.8, "valid_targets_min": 708 }, { "epoch": 1.5384615384615383, "grad_norm": 0.5000203832116034, "learning_rate": 3.499985354827079e-05, "loss": 0.4661, "loss_nan_ranks": 0, "loss_rank_avg": 0.19569867849349976, "step": 480, "valid_targets_mean": 1793.0, "valid_targets_min": 407 }, { "epoch": 1.5544871794871795, "grad_norm": 0.5311956681183098, "learning_rate": 3.485091257608047e-05, "loss": 0.4946, "loss_nan_ranks": 0, "loss_rank_avg": 0.2879672348499298, "step": 485, "valid_targets_mean": 2306.1, "valid_targets_min": 383 }, { "epoch": 1.5705128205128205, "grad_norm": 0.48828567970956843, "learning_rate": 3.4700112711926574e-05, "loss": 0.4632, "loss_nan_ranks": 0, "loss_rank_avg": 0.24961577355861664, "step": 490, "valid_targets_mean": 2378.9, "valid_targets_min": 494 }, { "epoch": 1.5865384615384617, "grad_norm": 0.6003720701146319, "learning_rate": 3.4547472831460976e-05, "loss": 0.4662, "loss_nan_ranks": 0, "loss_rank_avg": 0.2577670216560364, "step": 495, "valid_targets_mean": 1839.1, "valid_targets_min": 463 }, { "epoch": 1.6025641025641026, "grad_norm": 0.556322591943896, "learning_rate": 3.439301204065077e-05, "loss": 0.468, "loss_nan_ranks": 0, "loss_rank_avg": 0.20165708661079407, "step": 500, "valid_targets_mean": 1302.6, "valid_targets_min": 365 }, { "epoch": 1.6185897435897436, "grad_norm": 0.4863359326950338, "learning_rate": 3.423674967338681e-05, "loss": 0.496, "loss_nan_ranks": 0, "loss_rank_avg": 0.24409791827201843, "step": 505, "valid_targets_mean": 2180.1, "valid_targets_min": 564 }, { "epoch": 1.6346153846153846, "grad_norm": 0.5132971252924721, "learning_rate": 3.407870528906366e-05, "loss": 0.4911, "loss_nan_ranks": 0, "loss_rank_avg": 0.2089085876941681, "step": 510, "valid_targets_mean": 1815.9, "valid_targets_min": 376 }, { "epoch": 1.6506410256410255, "grad_norm": 0.44374872733889337, "learning_rate": 3.391889867013134e-05, "loss": 0.4728, "loss_nan_ranks": 0, "loss_rank_avg": 0.25746238231658936, "step": 515, "valid_targets_mean": 3330.2, "valid_targets_min": 535 }, { "epoch": 1.6666666666666665, "grad_norm": 0.41541671583956447, "learning_rate": 3.375734981961918e-05, "loss": 0.4579, "loss_nan_ranks": 0, "loss_rank_avg": 0.19313707947731018, "step": 520, "valid_targets_mean": 2933.1, "valid_targets_min": 641 }, { "epoch": 1.6826923076923077, "grad_norm": 0.5025527033239986, "learning_rate": 3.359407895863199e-05, "loss": 0.4777, "loss_nan_ranks": 0, "loss_rank_avg": 0.2544252574443817, "step": 525, "valid_targets_mean": 2657.9, "valid_targets_min": 634 }, { "epoch": 1.6987179487179487, "grad_norm": 0.5010762849646863, "learning_rate": 3.342910652381902e-05, "loss": 0.4582, "loss_nan_ranks": 0, "loss_rank_avg": 0.253082811832428, "step": 530, "valid_targets_mean": 2349.6, "valid_targets_min": 444 }, { "epoch": 1.7147435897435899, "grad_norm": 0.5050510280029894, "learning_rate": 3.326245316481591e-05, "loss": 0.4707, "loss_nan_ranks": 0, "loss_rank_avg": 0.2477722316980362, "step": 535, "valid_targets_mean": 1996.2, "valid_targets_min": 477 }, { "epoch": 1.7307692307692308, "grad_norm": 0.5765193223693661, "learning_rate": 3.30941397416599e-05, "loss": 0.4785, "loss_nan_ranks": 0, "loss_rank_avg": 0.23156100511550903, "step": 540, "valid_targets_mean": 1675.4, "valid_targets_min": 481 }, { "epoch": 1.7467948717948718, "grad_norm": 0.4560642161468493, "learning_rate": 3.2924187322178865e-05, "loss": 0.4794, "loss_nan_ranks": 0, "loss_rank_avg": 0.23049210011959076, "step": 545, "valid_targets_mean": 2340.4, "valid_targets_min": 392 }, { "epoch": 1.7628205128205128, "grad_norm": 0.4922377750065325, "learning_rate": 3.275261717935417e-05, "loss": 0.4764, "loss_nan_ranks": 0, "loss_rank_avg": 0.24752816557884216, "step": 550, "valid_targets_mean": 2367.2, "valid_targets_min": 476 }, { "epoch": 1.7788461538461537, "grad_norm": 0.4426230788977972, "learning_rate": 3.2579450788657997e-05, "loss": 0.4677, "loss_nan_ranks": 0, "loss_rank_avg": 0.21346285939216614, "step": 555, "valid_targets_mean": 2292.7, "valid_targets_min": 432 }, { "epoch": 1.7948717948717947, "grad_norm": 0.624429526299482, "learning_rate": 3.2404709825365204e-05, "loss": 0.4717, "loss_nan_ranks": 0, "loss_rank_avg": 0.22740063071250916, "step": 560, "valid_targets_mean": 1412.7, "valid_targets_min": 517 }, { "epoch": 1.810897435897436, "grad_norm": 0.555277859130487, "learning_rate": 3.222841616184025e-05, "loss": 0.4877, "loss_nan_ranks": 0, "loss_rank_avg": 0.32376670837402344, "step": 565, "valid_targets_mean": 2239.8, "valid_targets_min": 373 }, { "epoch": 1.8269230769230769, "grad_norm": 0.6868507053050372, "learning_rate": 3.2050591864799406e-05, "loss": 0.454, "loss_nan_ranks": 0, "loss_rank_avg": 0.2284863293170929, "step": 570, "valid_targets_mean": 2318.9, "valid_targets_min": 465 }, { "epoch": 1.842948717948718, "grad_norm": 0.5636785309744979, "learning_rate": 3.187125919254869e-05, "loss": 0.5021, "loss_nan_ranks": 0, "loss_rank_avg": 0.20378993451595306, "step": 575, "valid_targets_mean": 1406.4, "valid_targets_min": 382 }, { "epoch": 1.858974358974359, "grad_norm": 0.5138362567576862, "learning_rate": 3.169044059219778e-05, "loss": 0.472, "loss_nan_ranks": 0, "loss_rank_avg": 0.24885600805282593, "step": 580, "valid_targets_mean": 2108.1, "valid_targets_min": 297 }, { "epoch": 1.875, "grad_norm": 0.4788276150250492, "learning_rate": 3.1508158696850275e-05, "loss": 0.4439, "loss_nan_ranks": 0, "loss_rank_avg": 0.2000913918018341, "step": 585, "valid_targets_mean": 1723.1, "valid_targets_min": 532 }, { "epoch": 1.891025641025641, "grad_norm": 0.5199878797975349, "learning_rate": 3.132443632277075e-05, "loss": 0.4341, "loss_nan_ranks": 0, "loss_rank_avg": 0.1967633068561554, "step": 590, "valid_targets_mean": 1640.8, "valid_targets_min": 336 }, { "epoch": 1.907051282051282, "grad_norm": 0.5120839295430387, "learning_rate": 3.113929646652879e-05, "loss": 0.4752, "loss_nan_ranks": 0, "loss_rank_avg": 0.25105994939804077, "step": 595, "valid_targets_mean": 2039.7, "valid_targets_min": 647 }, { "epoch": 1.9230769230769231, "grad_norm": 0.4639112950762158, "learning_rate": 3.095276230212056e-05, "loss": 0.4696, "loss_nan_ranks": 0, "loss_rank_avg": 0.2098376452922821, "step": 600, "valid_targets_mean": 2136.6, "valid_targets_min": 495 }, { "epoch": 1.939102564102564, "grad_norm": 0.5166631970577467, "learning_rate": 3.076485717806808e-05, "loss": 0.4703, "loss_nan_ranks": 0, "loss_rank_avg": 0.18611370027065277, "step": 605, "valid_targets_mean": 1350.6, "valid_targets_min": 575 }, { "epoch": 1.9551282051282053, "grad_norm": 0.4745285139732179, "learning_rate": 3.057560461449665e-05, "loss": 0.457, "loss_nan_ranks": 0, "loss_rank_avg": 0.252684623003006, "step": 610, "valid_targets_mean": 2834.0, "valid_targets_min": 460 }, { "epoch": 1.9711538461538463, "grad_norm": 0.48419329402472827, "learning_rate": 3.038502830019092e-05, "loss": 0.4735, "loss_nan_ranks": 0, "loss_rank_avg": 0.25076764822006226, "step": 615, "valid_targets_mean": 2311.8, "valid_targets_min": 535 }, { "epoch": 1.9871794871794872, "grad_norm": 0.4856357542202215, "learning_rate": 3.019315208962968e-05, "loss": 0.4564, "loss_nan_ranks": 0, "loss_rank_avg": 0.22748295962810516, "step": 620, "valid_targets_mean": 1729.2, "valid_targets_min": 404 }, { "epoch": 2.003205128205128, "grad_norm": 0.5415603488319026, "learning_rate": 3.0000000000000004e-05, "loss": 0.4663, "loss_nan_ranks": 0, "loss_rank_avg": 0.25027239322662354, "step": 625, "valid_targets_mean": 2599.8, "valid_targets_min": 446 }, { "epoch": 2.019230769230769, "grad_norm": 0.5616447815364394, "learning_rate": 2.9805596208191056e-05, "loss": 0.4626, "loss_nan_ranks": 0, "loss_rank_avg": 0.31486913561820984, "step": 630, "valid_targets_mean": 2342.2, "valid_targets_min": 481 }, { "epoch": 2.03525641025641, "grad_norm": 0.4616279441242081, "learning_rate": 2.960996504776783e-05, "loss": 0.4494, "loss_nan_ranks": 0, "loss_rank_avg": 0.2557230293750763, "step": 635, "valid_targets_mean": 3251.1, "valid_targets_min": 374 }, { "epoch": 2.051282051282051, "grad_norm": 0.5503041087281749, "learning_rate": 2.9413131005925296e-05, "loss": 0.4178, "loss_nan_ranks": 0, "loss_rank_avg": 0.18771244585514069, "step": 640, "valid_targets_mean": 1542.1, "valid_targets_min": 405 }, { "epoch": 2.0673076923076925, "grad_norm": 0.5145185224875077, "learning_rate": 2.9215118720423375e-05, "loss": 0.4356, "loss_nan_ranks": 0, "loss_rank_avg": 0.20002877712249756, "step": 645, "valid_targets_mean": 2503.4, "valid_targets_min": 679 }, { "epoch": 2.0833333333333335, "grad_norm": 0.5259222795664407, "learning_rate": 2.9015952976502994e-05, "loss": 0.4819, "loss_nan_ranks": 0, "loss_rank_avg": 0.22057735919952393, "step": 650, "valid_targets_mean": 2547.9, "valid_targets_min": 329 }, { "epoch": 2.0993589743589745, "grad_norm": 0.49973317567273595, "learning_rate": 2.8815658703783715e-05, "loss": 0.4179, "loss_nan_ranks": 0, "loss_rank_avg": 0.20730602741241455, "step": 655, "valid_targets_mean": 2237.2, "valid_targets_min": 603 }, { "epoch": 2.1153846153846154, "grad_norm": 0.468034186388587, "learning_rate": 2.8614260973143318e-05, "loss": 0.4579, "loss_nan_ranks": 0, "loss_rank_avg": 0.2634885311126709, "step": 660, "valid_targets_mean": 2994.0, "valid_targets_min": 613 }, { "epoch": 2.1314102564102564, "grad_norm": 0.5188728303293624, "learning_rate": 2.8411784993579633e-05, "loss": 0.4465, "loss_nan_ranks": 0, "loss_rank_avg": 0.32055070996284485, "step": 665, "valid_targets_mean": 2799.9, "valid_targets_min": 464 }, { "epoch": 2.1474358974358974, "grad_norm": 0.5727755387928921, "learning_rate": 2.820825610905514e-05, "loss": 0.4517, "loss_nan_ranks": 0, "loss_rank_avg": 0.21268504858016968, "step": 670, "valid_targets_mean": 1740.4, "valid_targets_min": 376 }, { "epoch": 2.1634615384615383, "grad_norm": 0.5102342820268265, "learning_rate": 2.8003699795324674e-05, "loss": 0.4343, "loss_nan_ranks": 0, "loss_rank_avg": 0.1873595267534256, "step": 675, "valid_targets_mean": 1921.6, "valid_targets_min": 393 }, { "epoch": 2.1794871794871793, "grad_norm": 0.5212794879302061, "learning_rate": 2.7798141656746606e-05, "loss": 0.447, "loss_nan_ranks": 0, "loss_rank_avg": 0.2737618684768677, "step": 680, "valid_targets_mean": 2370.6, "valid_targets_min": 346 }, { "epoch": 2.1955128205128207, "grad_norm": 0.4882810001947318, "learning_rate": 2.7591607423077932e-05, "loss": 0.481, "loss_nan_ranks": 0, "loss_rank_avg": 0.13172554969787598, "step": 685, "valid_targets_mean": 1362.9, "valid_targets_min": 382 }, { "epoch": 2.2115384615384617, "grad_norm": 0.6207677882628246, "learning_rate": 2.738412294625369e-05, "loss": 0.4491, "loss_nan_ranks": 0, "loss_rank_avg": 0.23348423838615417, "step": 690, "valid_targets_mean": 2125.4, "valid_targets_min": 544 }, { "epoch": 2.2275641025641026, "grad_norm": 0.5134803204787327, "learning_rate": 2.717571419715107e-05, "loss": 0.4205, "loss_nan_ranks": 0, "loss_rank_avg": 0.21478433907032013, "step": 695, "valid_targets_mean": 1724.8, "valid_targets_min": 372 }, { "epoch": 2.2435897435897436, "grad_norm": 0.49066453024135814, "learning_rate": 2.69664072623386e-05, "loss": 0.4644, "loss_nan_ranks": 0, "loss_rank_avg": 0.23817721009254456, "step": 700, "valid_targets_mean": 2259.7, "valid_targets_min": 408 }, { "epoch": 2.2596153846153846, "grad_norm": 0.5896960527239582, "learning_rate": 2.6756228340810946e-05, "loss": 0.4499, "loss_nan_ranks": 0, "loss_rank_avg": 0.2211766242980957, "step": 705, "valid_targets_mean": 1644.1, "valid_targets_min": 303 }, { "epoch": 2.2756410256410255, "grad_norm": 0.47096626497110644, "learning_rate": 2.6545203740709502e-05, "loss": 0.444, "loss_nan_ranks": 0, "loss_rank_avg": 0.24633574485778809, "step": 710, "valid_targets_mean": 2245.6, "valid_targets_min": 496 }, { "epoch": 2.2916666666666665, "grad_norm": 0.4898735632976753, "learning_rate": 2.6333359876029455e-05, "loss": 0.4469, "loss_nan_ranks": 0, "loss_rank_avg": 0.2112981081008911, "step": 715, "valid_targets_mean": 1969.1, "valid_targets_min": 414 }, { "epoch": 2.3076923076923075, "grad_norm": 0.501391834341433, "learning_rate": 2.612072326331351e-05, "loss": 0.4178, "loss_nan_ranks": 0, "loss_rank_avg": 0.2247483730316162, "step": 720, "valid_targets_mean": 2423.8, "valid_targets_min": 393 }, { "epoch": 2.323717948717949, "grad_norm": 0.4217215747925792, "learning_rate": 2.5907320518332827e-05, "loss": 0.4313, "loss_nan_ranks": 0, "loss_rank_avg": 0.19624283909797668, "step": 725, "valid_targets_mean": 2825.5, "valid_targets_min": 544 }, { "epoch": 2.33974358974359, "grad_norm": 0.49601599026984694, "learning_rate": 2.5693178352755497e-05, "loss": 0.4074, "loss_nan_ranks": 0, "loss_rank_avg": 0.202076256275177, "step": 730, "valid_targets_mean": 2533.4, "valid_targets_min": 617 }, { "epoch": 2.355769230769231, "grad_norm": 0.5353940844579022, "learning_rate": 2.547832357080305e-05, "loss": 0.4227, "loss_nan_ranks": 0, "loss_rank_avg": 0.2339232861995697, "step": 735, "valid_targets_mean": 1994.9, "valid_targets_min": 524 }, { "epoch": 2.371794871794872, "grad_norm": 0.5722824667248534, "learning_rate": 2.5262783065895377e-05, "loss": 0.4452, "loss_nan_ranks": 0, "loss_rank_avg": 0.2249128520488739, "step": 740, "valid_targets_mean": 1781.4, "valid_targets_min": 326 }, { "epoch": 2.3878205128205128, "grad_norm": 0.5007302300195212, "learning_rate": 2.5046583817284437e-05, "loss": 0.4542, "loss_nan_ranks": 0, "loss_rank_avg": 0.17838139832019806, "step": 745, "valid_targets_mean": 1831.7, "valid_targets_min": 347 }, { "epoch": 2.4038461538461537, "grad_norm": 0.4472396981588018, "learning_rate": 2.48297528866773e-05, "loss": 0.4087, "loss_nan_ranks": 0, "loss_rank_avg": 0.1966041922569275, "step": 750, "valid_targets_mean": 2101.4, "valid_targets_min": 507 }, { "epoch": 2.4198717948717947, "grad_norm": 0.5475486438741652, "learning_rate": 2.4612317414848804e-05, "loss": 0.4367, "loss_nan_ranks": 0, "loss_rank_avg": 0.2544914484024048, "step": 755, "valid_targets_mean": 2436.7, "valid_targets_min": 548 }, { "epoch": 2.435897435897436, "grad_norm": 0.47978909370549155, "learning_rate": 2.4394304618244346e-05, "loss": 0.4467, "loss_nan_ranks": 0, "loss_rank_avg": 0.2271379828453064, "step": 760, "valid_targets_mean": 2447.1, "valid_targets_min": 399 }, { "epoch": 2.451923076923077, "grad_norm": 0.6270651047991183, "learning_rate": 2.4175741785573177e-05, "loss": 0.4528, "loss_nan_ranks": 0, "loss_rank_avg": 0.2507140636444092, "step": 765, "valid_targets_mean": 1775.8, "valid_targets_min": 287 }, { "epoch": 2.467948717948718, "grad_norm": 0.3929435528764122, "learning_rate": 2.39566562743927e-05, "loss": 0.4189, "loss_nan_ranks": 0, "loss_rank_avg": 0.19187305867671967, "step": 770, "valid_targets_mean": 3374.7, "valid_targets_min": 597 }, { "epoch": 2.483974358974359, "grad_norm": 0.527375808149042, "learning_rate": 2.3737075507684103e-05, "loss": 0.4494, "loss_nan_ranks": 0, "loss_rank_avg": 0.22173798084259033, "step": 775, "valid_targets_mean": 1856.5, "valid_targets_min": 414 }, { "epoch": 2.5, "grad_norm": 0.5997723972038063, "learning_rate": 2.3517026970419786e-05, "loss": 0.4365, "loss_nan_ranks": 0, "loss_rank_avg": 0.24415044486522675, "step": 780, "valid_targets_mean": 1901.2, "valid_targets_min": 779 }, { "epoch": 2.516025641025641, "grad_norm": 0.48377772022532683, "learning_rate": 2.3296538206123134e-05, "loss": 0.4155, "loss_nan_ranks": 0, "loss_rank_avg": 0.21287669241428375, "step": 785, "valid_targets_mean": 2185.6, "valid_targets_min": 399 }, { "epoch": 2.532051282051282, "grad_norm": 0.5468759021698991, "learning_rate": 2.307563681342081e-05, "loss": 0.4634, "loss_nan_ranks": 0, "loss_rank_avg": 0.2165958732366562, "step": 790, "valid_targets_mean": 2127.9, "valid_targets_min": 592 }, { "epoch": 2.5480769230769234, "grad_norm": 0.47406985778934707, "learning_rate": 2.285435044258829e-05, "loss": 0.4235, "loss_nan_ranks": 0, "loss_rank_avg": 0.20566615462303162, "step": 795, "valid_targets_mean": 2176.4, "valid_targets_min": 353 }, { "epoch": 2.564102564102564, "grad_norm": 0.5531693361265784, "learning_rate": 2.263270679208883e-05, "loss": 0.4375, "loss_nan_ranks": 0, "loss_rank_avg": 0.20676007866859436, "step": 800, "valid_targets_mean": 1450.8, "valid_targets_min": 343 }, { "epoch": 2.5801282051282053, "grad_norm": 0.5471762527126571, "learning_rate": 2.2410733605106462e-05, "loss": 0.4427, "loss_nan_ranks": 0, "loss_rank_avg": 0.2094150185585022, "step": 805, "valid_targets_mean": 1916.5, "valid_targets_min": 657 }, { "epoch": 2.5961538461538463, "grad_norm": 0.457621920491644, "learning_rate": 2.2188458666073382e-05, "loss": 0.4447, "loss_nan_ranks": 0, "loss_rank_avg": 0.21128109097480774, "step": 810, "valid_targets_mean": 2713.3, "valid_targets_min": 336 }, { "epoch": 2.6121794871794872, "grad_norm": 0.5354268147970072, "learning_rate": 2.1965909797192143e-05, "loss": 0.4372, "loss_nan_ranks": 0, "loss_rank_avg": 0.18181543052196503, "step": 815, "valid_targets_mean": 1601.1, "valid_targets_min": 422 }, { "epoch": 2.628205128205128, "grad_norm": 0.5148256202815014, "learning_rate": 2.174311485495317e-05, "loss": 0.4303, "loss_nan_ranks": 0, "loss_rank_avg": 0.20850974321365356, "step": 820, "valid_targets_mean": 2054.1, "valid_targets_min": 358 }, { "epoch": 2.644230769230769, "grad_norm": 0.5117934823979483, "learning_rate": 2.1520101726647922e-05, "loss": 0.4482, "loss_nan_ranks": 0, "loss_rank_avg": 0.24301588535308838, "step": 825, "valid_targets_mean": 2009.7, "valid_targets_min": 548 }, { "epoch": 2.66025641025641, "grad_norm": 0.5932725780271455, "learning_rate": 2.1296898326878282e-05, "loss": 0.4368, "loss_nan_ranks": 0, "loss_rank_avg": 0.20315149426460266, "step": 830, "valid_targets_mean": 1422.9, "valid_targets_min": 382 }, { "epoch": 2.676282051282051, "grad_norm": 0.5511240954228953, "learning_rate": 2.1073532594062432e-05, "loss": 0.4287, "loss_nan_ranks": 0, "loss_rank_avg": 0.20835387706756592, "step": 835, "valid_targets_mean": 1824.1, "valid_targets_min": 406 }, { "epoch": 2.6923076923076925, "grad_norm": 0.48108009842119964, "learning_rate": 2.0850032486937838e-05, "loss": 0.413, "loss_nan_ranks": 0, "loss_rank_avg": 0.23764106631278992, "step": 840, "valid_targets_mean": 2332.3, "valid_targets_min": 311 }, { "epoch": 2.7083333333333335, "grad_norm": 0.45578124236246004, "learning_rate": 2.0626425981061608e-05, "loss": 0.4221, "loss_nan_ranks": 0, "loss_rank_avg": 0.16820326447486877, "step": 845, "valid_targets_mean": 1958.2, "valid_targets_min": 427 }, { "epoch": 2.7243589743589745, "grad_norm": 0.527649928455185, "learning_rate": 2.0402741065308808e-05, "loss": 0.4325, "loss_nan_ranks": 0, "loss_rank_avg": 0.2456822693347931, "step": 850, "valid_targets_mean": 1982.1, "valid_targets_min": 447 }, { "epoch": 2.7403846153846154, "grad_norm": 0.545838162469139, "learning_rate": 2.0179005738369098e-05, "loss": 0.4479, "loss_nan_ranks": 0, "loss_rank_avg": 0.19371215999126434, "step": 855, "valid_targets_mean": 1953.6, "valid_targets_min": 404 }, { "epoch": 2.7564102564102564, "grad_norm": 0.5137794607606895, "learning_rate": 1.995524800524211e-05, "loss": 0.4463, "loss_nan_ranks": 0, "loss_rank_avg": 0.17609572410583496, "step": 860, "valid_targets_mean": 1501.8, "valid_targets_min": 397 }, { "epoch": 2.7724358974358974, "grad_norm": 0.5022662147440222, "learning_rate": 1.9731495873732055e-05, "loss": 0.4475, "loss_nan_ranks": 0, "loss_rank_avg": 0.26271939277648926, "step": 865, "valid_targets_mean": 2227.0, "valid_targets_min": 380 }, { "epoch": 2.7884615384615383, "grad_norm": 0.44907950593239604, "learning_rate": 1.9507777350941996e-05, "loss": 0.417, "loss_nan_ranks": 0, "loss_rank_avg": 0.20136427879333496, "step": 870, "valid_targets_mean": 3125.4, "valid_targets_min": 477 }, { "epoch": 2.8044871794871797, "grad_norm": 0.48568732874510356, "learning_rate": 1.9284120439768192e-05, "loss": 0.4371, "loss_nan_ranks": 0, "loss_rank_avg": 0.1716795265674591, "step": 875, "valid_targets_mean": 1739.9, "valid_targets_min": 556 }, { "epoch": 2.8205128205128203, "grad_norm": 0.560681495754532, "learning_rate": 1.9060553135394957e-05, "loss": 0.4403, "loss_nan_ranks": 0, "loss_rank_avg": 0.25348377227783203, "step": 880, "valid_targets_mean": 2066.1, "valid_targets_min": 481 }, { "epoch": 2.8365384615384617, "grad_norm": 0.5208153414018862, "learning_rate": 1.8837103421790486e-05, "loss": 0.4099, "loss_nan_ranks": 0, "loss_rank_avg": 0.18841373920440674, "step": 885, "valid_targets_mean": 2157.9, "valid_targets_min": 500 }, { "epoch": 2.8525641025641026, "grad_norm": 0.532992574302052, "learning_rate": 1.861379926820414e-05, "loss": 0.4324, "loss_nan_ranks": 0, "loss_rank_avg": 0.2288801670074463, "step": 890, "valid_targets_mean": 1894.5, "valid_targets_min": 394 }, { "epoch": 2.8685897435897436, "grad_norm": 0.49754798570296427, "learning_rate": 1.8390668625665483e-05, "loss": 0.4173, "loss_nan_ranks": 0, "loss_rank_avg": 0.25023743510246277, "step": 895, "valid_targets_mean": 2521.0, "valid_targets_min": 615 }, { "epoch": 2.8846153846153846, "grad_norm": 0.5090535372637991, "learning_rate": 1.8167739423485668e-05, "loss": 0.4547, "loss_nan_ranks": 0, "loss_rank_avg": 0.20940105617046356, "step": 900, "valid_targets_mean": 2438.8, "valid_targets_min": 464 }, { "epoch": 2.9006410256410255, "grad_norm": 0.4245436673848715, "learning_rate": 1.794503956576152e-05, "loss": 0.4422, "loss_nan_ranks": 0, "loss_rank_avg": 0.21880042552947998, "step": 905, "valid_targets_mean": 3292.9, "valid_targets_min": 483 }, { "epoch": 2.9166666666666665, "grad_norm": 0.512498398614729, "learning_rate": 1.7722596927882758e-05, "loss": 0.456, "loss_nan_ranks": 0, "loss_rank_avg": 0.1915343850851059, "step": 910, "valid_targets_mean": 2097.7, "valid_targets_min": 423 }, { "epoch": 2.9326923076923075, "grad_norm": 0.5002179520743993, "learning_rate": 1.7500439353042834e-05, "loss": 0.428, "loss_nan_ranks": 0, "loss_rank_avg": 0.19558550417423248, "step": 915, "valid_targets_mean": 1932.4, "valid_targets_min": 352 }, { "epoch": 2.948717948717949, "grad_norm": 0.5271342144848477, "learning_rate": 1.727859464875381e-05, "loss": 0.4672, "loss_nan_ranks": 0, "loss_rank_avg": 0.23308855295181274, "step": 920, "valid_targets_mean": 2075.1, "valid_targets_min": 458 }, { "epoch": 2.96474358974359, "grad_norm": 0.9339459055765652, "learning_rate": 1.7057090583365678e-05, "loss": 0.4491, "loss_nan_ranks": 0, "loss_rank_avg": 0.20995834469795227, "step": 925, "valid_targets_mean": 1836.2, "valid_targets_min": 369 }, { "epoch": 2.980769230769231, "grad_norm": 0.4858744121036798, "learning_rate": 1.6835954882590567e-05, "loss": 0.4596, "loss_nan_ranks": 0, "loss_rank_avg": 0.21504682302474976, "step": 930, "valid_targets_mean": 2529.9, "valid_targets_min": 390 }, { "epoch": 2.996794871794872, "grad_norm": 0.5223177985055565, "learning_rate": 1.6615215226032332e-05, "loss": 0.3969, "loss_nan_ranks": 0, "loss_rank_avg": 0.2861439883708954, "step": 935, "valid_targets_mean": 2334.1, "valid_targets_min": 556 }, { "epoch": 3.0128205128205128, "grad_norm": 0.5081953568971541, "learning_rate": 1.6394899243721887e-05, "loss": 0.4444, "loss_nan_ranks": 0, "loss_rank_avg": 0.18385818600654602, "step": 940, "valid_targets_mean": 1744.4, "valid_targets_min": 456 }, { "epoch": 3.0288461538461537, "grad_norm": 0.41196760326087184, "learning_rate": 1.6175034512658753e-05, "loss": 0.41, "loss_nan_ranks": 0, "loss_rank_avg": 0.1653943955898285, "step": 945, "valid_targets_mean": 2961.9, "valid_targets_min": 464 }, { "epoch": 3.0448717948717947, "grad_norm": 0.46276766410726833, "learning_rate": 1.5955648553359247e-05, "loss": 0.4061, "loss_nan_ranks": 0, "loss_rank_avg": 0.1766689419746399, "step": 950, "valid_targets_mean": 2314.1, "valid_targets_min": 519 }, { "epoch": 3.0608974358974357, "grad_norm": 0.56466145662721, "learning_rate": 1.5736768826411683e-05, "loss": 0.3975, "loss_nan_ranks": 0, "loss_rank_avg": 0.17233410477638245, "step": 955, "valid_targets_mean": 1736.1, "valid_targets_min": 368 }, { "epoch": 3.076923076923077, "grad_norm": 0.6184183203209048, "learning_rate": 1.5518422729039188e-05, "loss": 0.4366, "loss_nan_ranks": 0, "loss_rank_avg": 0.2544904947280884, "step": 960, "valid_targets_mean": 1785.6, "valid_targets_min": 453 }, { "epoch": 3.092948717948718, "grad_norm": 0.46138818996251113, "learning_rate": 1.5300637591670357e-05, "loss": 0.3877, "loss_nan_ranks": 0, "loss_rank_avg": 0.2141857147216797, "step": 965, "valid_targets_mean": 3189.5, "valid_targets_min": 635 }, { "epoch": 3.108974358974359, "grad_norm": 0.5024028000700136, "learning_rate": 1.5083440674518302e-05, "loss": 0.4141, "loss_nan_ranks": 0, "loss_rank_avg": 0.20946665108203888, "step": 970, "valid_targets_mean": 2281.5, "valid_targets_min": 477 }, { "epoch": 3.125, "grad_norm": 0.6617512350367286, "learning_rate": 1.4866859164168466e-05, "loss": 0.4462, "loss_nan_ranks": 0, "loss_rank_avg": 0.27546098828315735, "step": 975, "valid_targets_mean": 1994.2, "valid_targets_min": 291 }, { "epoch": 3.141025641025641, "grad_norm": 0.5778578694843474, "learning_rate": 1.4650920170175704e-05, "loss": 0.431, "loss_nan_ranks": 0, "loss_rank_avg": 0.19753442704677582, "step": 980, "valid_targets_mean": 1745.9, "valid_targets_min": 326 }, { "epoch": 3.157051282051282, "grad_norm": 0.4876046302233207, "learning_rate": 1.443565072167095e-05, "loss": 0.4193, "loss_nan_ranks": 0, "loss_rank_avg": 0.1774245798587799, "step": 985, "valid_targets_mean": 1624.2, "valid_targets_min": 572 }, { "epoch": 3.173076923076923, "grad_norm": 0.5738704159928016, "learning_rate": 1.4221077763977984e-05, "loss": 0.4254, "loss_nan_ranks": 0, "loss_rank_avg": 0.2392444908618927, "step": 990, "valid_targets_mean": 1980.4, "valid_targets_min": 784 }, { "epoch": 3.189102564102564, "grad_norm": 0.4850487138995122, "learning_rate": 1.4007228155240696e-05, "loss": 0.4209, "loss_nan_ranks": 0, "loss_rank_avg": 0.2174842357635498, "step": 995, "valid_targets_mean": 2492.1, "valid_targets_min": 459 }, { "epoch": 3.2051282051282053, "grad_norm": 0.529762108647159, "learning_rate": 1.37941286630612e-05, "loss": 0.4147, "loss_nan_ranks": 0, "loss_rank_avg": 0.22387659549713135, "step": 1000, "valid_targets_mean": 1882.1, "valid_targets_min": 355 }, { "epoch": 3.2211538461538463, "grad_norm": 0.4709252470917212, "learning_rate": 1.3581805961149371e-05, "loss": 0.4275, "loss_nan_ranks": 0, "loss_rank_avg": 0.17616716027259827, "step": 1005, "valid_targets_mean": 2260.7, "valid_targets_min": 481 }, { "epoch": 3.2371794871794872, "grad_norm": 0.4920991562633389, "learning_rate": 1.3370286625984089e-05, "loss": 0.4291, "loss_nan_ranks": 0, "loss_rank_avg": 0.1874414086341858, "step": 1010, "valid_targets_mean": 1999.1, "valid_targets_min": 389 }, { "epoch": 3.253205128205128, "grad_norm": 0.4366015680605223, "learning_rate": 1.3159597133486628e-05, "loss": 0.4132, "loss_nan_ranks": 0, "loss_rank_avg": 0.1775241196155548, "step": 1015, "valid_targets_mean": 2691.2, "valid_targets_min": 492 }, { "epoch": 3.269230769230769, "grad_norm": 0.5159579418833546, "learning_rate": 1.2949763855706678e-05, "loss": 0.3984, "loss_nan_ranks": 0, "loss_rank_avg": 0.20066994428634644, "step": 1020, "valid_targets_mean": 2245.2, "valid_targets_min": 414 }, { "epoch": 3.28525641025641, "grad_norm": 0.5876671407131282, "learning_rate": 1.274081305752135e-05, "loss": 0.4516, "loss_nan_ranks": 0, "loss_rank_avg": 0.1973981410264969, "step": 1025, "valid_targets_mean": 1791.3, "valid_targets_min": 398 }, { "epoch": 3.301282051282051, "grad_norm": 0.6456536801741808, "learning_rate": 1.2532770893347582e-05, "loss": 0.427, "loss_nan_ranks": 0, "loss_rank_avg": 0.3110905885696411, "step": 1030, "valid_targets_mean": 1891.8, "valid_targets_min": 324 }, { "epoch": 3.3173076923076925, "grad_norm": 0.5293106844776568, "learning_rate": 1.2325663403868406e-05, "loss": 0.4164, "loss_nan_ranks": 0, "loss_rank_avg": 0.230901837348938, "step": 1035, "valid_targets_mean": 2292.3, "valid_targets_min": 355 }, { "epoch": 3.3333333333333335, "grad_norm": 0.5522757760846092, "learning_rate": 1.2119516512773424e-05, "loss": 0.3895, "loss_nan_ranks": 0, "loss_rank_avg": 0.24217258393764496, "step": 1040, "valid_targets_mean": 2009.1, "valid_targets_min": 408 }, { "epoch": 3.3493589743589745, "grad_norm": 0.5560321667957103, "learning_rate": 1.1914356023513904e-05, "loss": 0.4459, "loss_nan_ranks": 0, "loss_rank_avg": 0.23398807644844055, "step": 1045, "valid_targets_mean": 2263.5, "valid_targets_min": 539 }, { "epoch": 3.3653846153846154, "grad_norm": 0.4825967281763318, "learning_rate": 1.1710207616073001e-05, "loss": 0.3927, "loss_nan_ranks": 0, "loss_rank_avg": 0.21472863852977753, "step": 1050, "valid_targets_mean": 2439.7, "valid_targets_min": 286 }, { "epoch": 3.3814102564102564, "grad_norm": 0.5322770350466977, "learning_rate": 1.1507096843751372e-05, "loss": 0.4218, "loss_nan_ranks": 0, "loss_rank_avg": 0.17266938090324402, "step": 1055, "valid_targets_mean": 1598.1, "valid_targets_min": 387 }, { "epoch": 3.3974358974358974, "grad_norm": 0.7827125593311665, "learning_rate": 1.1305049129968637e-05, "loss": 0.4349, "loss_nan_ranks": 0, "loss_rank_avg": 0.1991385519504547, "step": 1060, "valid_targets_mean": 2439.3, "valid_targets_min": 608 }, { "epoch": 3.4134615384615383, "grad_norm": 0.44528916459778456, "learning_rate": 1.110408976508118e-05, "loss": 0.3907, "loss_nan_ranks": 0, "loss_rank_avg": 0.1545448899269104, "step": 1065, "valid_targets_mean": 2074.4, "valid_targets_min": 396 }, { "epoch": 3.4294871794871793, "grad_norm": 0.48384890001970277, "learning_rate": 1.090424390321648e-05, "loss": 0.4352, "loss_nan_ranks": 0, "loss_rank_avg": 0.18344886600971222, "step": 1070, "valid_targets_mean": 2136.0, "valid_targets_min": 406 }, { "epoch": 3.4455128205128207, "grad_norm": 0.46732111222809075, "learning_rate": 1.070553655912463e-05, "loss": 0.3894, "loss_nan_ranks": 0, "loss_rank_avg": 0.13502533733844757, "step": 1075, "valid_targets_mean": 1907.0, "valid_targets_min": 400 }, { "epoch": 3.4615384615384617, "grad_norm": 0.46066416755356465, "learning_rate": 1.0507992605047193e-05, "loss": 0.4055, "loss_nan_ranks": 0, "loss_rank_avg": 0.1755571961402893, "step": 1080, "valid_targets_mean": 2637.7, "valid_targets_min": 457 }, { "epoch": 3.4775641025641026, "grad_norm": 0.3719838719731155, "learning_rate": 1.0311636767603952e-05, "loss": 0.3946, "loss_nan_ranks": 0, "loss_rank_avg": 0.14000138640403748, "step": 1085, "valid_targets_mean": 2861.4, "valid_targets_min": 495 }, { "epoch": 3.4935897435897436, "grad_norm": 0.5828556028812071, "learning_rate": 1.0116493624697862e-05, "loss": 0.4203, "loss_nan_ranks": 0, "loss_rank_avg": 0.3252260684967041, "step": 1090, "valid_targets_mean": 2756.8, "valid_targets_min": 533 }, { "epoch": 3.5096153846153846, "grad_norm": 0.43638255215532396, "learning_rate": 9.922587602438657e-06, "loss": 0.3941, "loss_nan_ranks": 0, "loss_rank_avg": 0.18718764185905457, "step": 1095, "valid_targets_mean": 2623.4, "valid_targets_min": 420 }, { "epoch": 3.5256410256410255, "grad_norm": 0.5594326378178172, "learning_rate": 9.729942972085401e-06, "loss": 0.4182, "loss_nan_ranks": 0, "loss_rank_avg": 0.20950523018836975, "step": 1100, "valid_targets_mean": 1888.2, "valid_targets_min": 398 }, { "epoch": 3.5416666666666665, "grad_norm": 0.6153082072669694, "learning_rate": 9.538583847008452e-06, "loss": 0.4508, "loss_nan_ranks": 0, "loss_rank_avg": 0.24810105562210083, "step": 1105, "valid_targets_mean": 1652.6, "valid_targets_min": 487 }, { "epoch": 3.5576923076923075, "grad_norm": 0.7347949249771237, "learning_rate": 9.348534179671202e-06, "loss": 0.4446, "loss_nan_ranks": 0, "loss_rank_avg": 0.1828898787498474, "step": 1110, "valid_targets_mean": 1291.9, "valid_targets_min": 384 }, { "epoch": 3.573717948717949, "grad_norm": 0.4885273908189287, "learning_rate": 9.159817758631923e-06, "loss": 0.4148, "loss_nan_ranks": 0, "loss_rank_avg": 0.21009644865989685, "step": 1115, "valid_targets_mean": 2135.0, "valid_targets_min": 345 }, { "epoch": 3.58974358974359, "grad_norm": 0.4973124736202378, "learning_rate": 8.972458205566168e-06, "loss": 0.4288, "loss_nan_ranks": 0, "loss_rank_avg": 0.15964004397392273, "step": 1120, "valid_targets_mean": 1893.4, "valid_targets_min": 460 }, { "epoch": 3.605769230769231, "grad_norm": 0.5441599943681932, "learning_rate": 8.786478972310023e-06, "loss": 0.4375, "loss_nan_ranks": 0, "loss_rank_avg": 0.2124439775943756, "step": 1125, "valid_targets_mean": 2145.6, "valid_targets_min": 336 }, { "epoch": 3.621794871794872, "grad_norm": 0.5226367162201916, "learning_rate": 8.601903337924646e-06, "loss": 0.3906, "loss_nan_ranks": 0, "loss_rank_avg": 0.19349414110183716, "step": 1130, "valid_targets_mean": 2012.8, "valid_targets_min": 509 }, { "epoch": 3.6378205128205128, "grad_norm": 0.5145848515018033, "learning_rate": 8.418754405782423e-06, "loss": 0.4056, "loss_nan_ranks": 0, "loss_rank_avg": 0.2273569405078888, "step": 1135, "valid_targets_mean": 2077.1, "valid_targets_min": 426 }, { "epoch": 3.6538461538461537, "grad_norm": 0.5175398660560526, "learning_rate": 8.237055100675092e-06, "loss": 0.4016, "loss_nan_ranks": 0, "loss_rank_avg": 0.22114655375480652, "step": 1140, "valid_targets_mean": 2420.2, "valid_targets_min": 386 }, { "epoch": 3.6698717948717947, "grad_norm": 0.6277817059082492, "learning_rate": 8.056828165944282e-06, "loss": 0.4462, "loss_nan_ranks": 0, "loss_rank_avg": 0.26035845279693604, "step": 1145, "valid_targets_mean": 1739.6, "valid_targets_min": 324 }, { "epoch": 3.685897435897436, "grad_norm": 0.5854299128793944, "learning_rate": 7.878096160634675e-06, "loss": 0.4342, "loss_nan_ranks": 0, "loss_rank_avg": 0.22393161058425903, "step": 1150, "valid_targets_mean": 1604.9, "valid_targets_min": 517 }, { "epoch": 3.7019230769230766, "grad_norm": 0.6521482846929779, "learning_rate": 7.700881456670342e-06, "loss": 0.4199, "loss_nan_ranks": 0, "loss_rank_avg": 0.21592406928539276, "step": 1155, "valid_targets_mean": 1704.6, "valid_targets_min": 372 }, { "epoch": 3.717948717948718, "grad_norm": 0.5205545018221379, "learning_rate": 7.525206236054385e-06, "loss": 0.4159, "loss_nan_ranks": 0, "loss_rank_avg": 0.19648584723472595, "step": 1160, "valid_targets_mean": 1921.7, "valid_targets_min": 393 }, { "epoch": 3.733974358974359, "grad_norm": 0.5195989305298412, "learning_rate": 7.3510924880924575e-06, "loss": 0.4233, "loss_nan_ranks": 0, "loss_rank_avg": 0.17861494421958923, "step": 1165, "valid_targets_mean": 1717.1, "valid_targets_min": 479 }, { "epoch": 3.75, "grad_norm": 0.6033763727537506, "learning_rate": 7.178562006640337e-06, "loss": 0.4242, "loss_nan_ranks": 0, "loss_rank_avg": 0.20282906293869019, "step": 1170, "valid_targets_mean": 1704.9, "valid_targets_min": 409 }, { "epoch": 3.766025641025641, "grad_norm": 0.8520137624165722, "learning_rate": 7.0076363873759865e-06, "loss": 0.3928, "loss_nan_ranks": 0, "loss_rank_avg": 0.21843525767326355, "step": 1175, "valid_targets_mean": 2615.6, "valid_targets_min": 500 }, { "epoch": 3.782051282051282, "grad_norm": 0.5354434243394129, "learning_rate": 6.838337025096424e-06, "loss": 0.3922, "loss_nan_ranks": 0, "loss_rank_avg": 0.2174426019191742, "step": 1180, "valid_targets_mean": 2017.8, "valid_targets_min": 482 }, { "epoch": 3.7980769230769234, "grad_norm": 0.5105220829406444, "learning_rate": 6.67068511103971e-06, "loss": 0.4288, "loss_nan_ranks": 0, "loss_rank_avg": 0.18573346734046936, "step": 1185, "valid_targets_mean": 2131.7, "valid_targets_min": 499 }, { "epoch": 3.814102564102564, "grad_norm": 0.46857007935338224, "learning_rate": 6.504701630232475e-06, "loss": 0.4107, "loss_nan_ranks": 0, "loss_rank_avg": 0.17006590962409973, "step": 1190, "valid_targets_mean": 1914.7, "valid_targets_min": 476 }, { "epoch": 3.8301282051282053, "grad_norm": 0.5407396992987427, "learning_rate": 6.340407358863167e-06, "loss": 0.3835, "loss_nan_ranks": 0, "loss_rank_avg": 0.16990578174591064, "step": 1195, "valid_targets_mean": 1394.6, "valid_targets_min": 363 }, { "epoch": 3.8461538461538463, "grad_norm": 0.47292976742071363, "learning_rate": 6.177822861681557e-06, "loss": 0.4242, "loss_nan_ranks": 0, "loss_rank_avg": 0.1876613199710846, "step": 1200, "valid_targets_mean": 2349.9, "valid_targets_min": 393 }, { "epoch": 3.8621794871794872, "grad_norm": 0.4841137938946458, "learning_rate": 6.016968489424572e-06, "loss": 0.4346, "loss_nan_ranks": 0, "loss_rank_avg": 0.14386187493801117, "step": 1205, "valid_targets_mean": 1697.8, "valid_targets_min": 220 }, { "epoch": 3.878205128205128, "grad_norm": 0.5708595597284355, "learning_rate": 5.857864376269051e-06, "loss": 0.4374, "loss_nan_ranks": 0, "loss_rank_avg": 0.23375602066516876, "step": 1210, "valid_targets_mean": 1680.6, "valid_targets_min": 381 }, { "epoch": 3.894230769230769, "grad_norm": 0.5084621641793788, "learning_rate": 5.700530437311509e-06, "loss": 0.4229, "loss_nan_ranks": 0, "loss_rank_avg": 0.18736353516578674, "step": 1215, "valid_targets_mean": 1901.4, "valid_targets_min": 381 }, { "epoch": 3.91025641025641, "grad_norm": 0.5301004661796407, "learning_rate": 5.544986366075371e-06, "loss": 0.4391, "loss_nan_ranks": 0, "loss_rank_avg": 0.1670445203781128, "step": 1220, "valid_targets_mean": 1648.1, "valid_targets_min": 401 }, { "epoch": 3.926282051282051, "grad_norm": 0.6134172232948031, "learning_rate": 5.39125163204594e-06, "loss": 0.4133, "loss_nan_ranks": 0, "loss_rank_avg": 0.21446377038955688, "step": 1225, "valid_targets_mean": 1942.4, "valid_targets_min": 483 }, { "epoch": 3.9423076923076925, "grad_norm": 0.5974202428194099, "learning_rate": 5.239345478233364e-06, "loss": 0.4258, "loss_nan_ranks": 0, "loss_rank_avg": 0.17354455590248108, "step": 1230, "valid_targets_mean": 1358.0, "valid_targets_min": 369 }, { "epoch": 3.9583333333333335, "grad_norm": 0.4483178567987285, "learning_rate": 5.089286918764031e-06, "loss": 0.3961, "loss_nan_ranks": 0, "loss_rank_avg": 0.19412055611610413, "step": 1235, "valid_targets_mean": 2878.3, "valid_targets_min": 803 }, { "epoch": 3.9743589743589745, "grad_norm": 0.4507178360947217, "learning_rate": 4.941094736500522e-06, "loss": 0.4006, "loss_nan_ranks": 0, "loss_rank_avg": 0.16912227869033813, "step": 1240, "valid_targets_mean": 2105.1, "valid_targets_min": 357 }, { "epoch": 3.9903846153846154, "grad_norm": 0.5411117764458047, "learning_rate": 4.794787480690597e-06, "loss": 0.4147, "loss_nan_ranks": 0, "loss_rank_avg": 0.18049368262290955, "step": 1245, "valid_targets_mean": 1542.9, "valid_targets_min": 366 }, { "epoch": 4.006410256410256, "grad_norm": 0.5020314686782774, "learning_rate": 4.650383464645338e-06, "loss": 0.4072, "loss_nan_ranks": 0, "loss_rank_avg": 0.17812328040599823, "step": 1250, "valid_targets_mean": 2127.0, "valid_targets_min": 558 }, { "epoch": 4.022435897435898, "grad_norm": 0.8897516856466173, "learning_rate": 4.507900763446911e-06, "loss": 0.4323, "loss_nan_ranks": 0, "loss_rank_avg": 0.17299619317054749, "step": 1255, "valid_targets_mean": 1772.4, "valid_targets_min": 401 }, { "epoch": 4.038461538461538, "grad_norm": 0.5155047635305318, "learning_rate": 4.367357211686072e-06, "loss": 0.4405, "loss_nan_ranks": 0, "loss_rank_avg": 0.2016928344964981, "step": 1260, "valid_targets_mean": 1984.0, "valid_targets_min": 314 }, { "epoch": 4.05448717948718, "grad_norm": 0.5575252186312881, "learning_rate": 4.228770401229824e-06, "loss": 0.4146, "loss_nan_ranks": 0, "loss_rank_avg": 0.1600431501865387, "step": 1265, "valid_targets_mean": 1631.5, "valid_targets_min": 422 }, { "epoch": 4.07051282051282, "grad_norm": 0.4739201323516146, "learning_rate": 4.092157679019442e-06, "loss": 0.4126, "loss_nan_ranks": 0, "loss_rank_avg": 0.2213498055934906, "step": 1270, "valid_targets_mean": 2722.4, "valid_targets_min": 596 }, { "epoch": 4.086538461538462, "grad_norm": 0.5093029435546993, "learning_rate": 3.957536144899123e-06, "loss": 0.4053, "loss_nan_ranks": 0, "loss_rank_avg": 0.16781838238239288, "step": 1275, "valid_targets_mean": 2022.2, "valid_targets_min": 454 }, { "epoch": 4.102564102564102, "grad_norm": 0.46573698307127626, "learning_rate": 3.8249226494756445e-06, "loss": 0.3686, "loss_nan_ranks": 0, "loss_rank_avg": 0.1653728187084198, "step": 1280, "valid_targets_mean": 2144.4, "valid_targets_min": 370 }, { "epoch": 4.118589743589744, "grad_norm": 0.49547650926816716, "learning_rate": 3.694333792009115e-06, "loss": 0.4204, "loss_nan_ranks": 0, "loss_rank_avg": 0.16737449169158936, "step": 1285, "valid_targets_mean": 2180.3, "valid_targets_min": 390 }, { "epoch": 4.134615384615385, "grad_norm": 0.4959235045064883, "learning_rate": 3.565785918335292e-06, "loss": 0.4121, "loss_nan_ranks": 0, "loss_rank_avg": 0.17612406611442566, "step": 1290, "valid_targets_mean": 2229.8, "valid_targets_min": 415 }, { "epoch": 4.1506410256410255, "grad_norm": 0.5497305685489215, "learning_rate": 3.43929511881953e-06, "loss": 0.4143, "loss_nan_ranks": 0, "loss_rank_avg": 0.21826569736003876, "step": 1295, "valid_targets_mean": 2172.8, "valid_targets_min": 378 }, { "epoch": 4.166666666666667, "grad_norm": 0.49016495730669646, "learning_rate": 3.3148772263427743e-06, "loss": 0.3962, "loss_nan_ranks": 0, "loss_rank_avg": 0.2120068520307541, "step": 1300, "valid_targets_mean": 2636.4, "valid_targets_min": 502 }, { "epoch": 4.1826923076923075, "grad_norm": 0.5410817941449617, "learning_rate": 3.1925478143197418e-06, "loss": 0.4026, "loss_nan_ranks": 0, "loss_rank_avg": 0.1488579511642456, "step": 1305, "valid_targets_mean": 1622.9, "valid_targets_min": 408 }, { "epoch": 4.198717948717949, "grad_norm": 0.5167531330373021, "learning_rate": 3.0723221947495907e-06, "loss": 0.4189, "loss_nan_ranks": 0, "loss_rank_avg": 0.16238239407539368, "step": 1310, "valid_targets_mean": 2115.7, "valid_targets_min": 470 }, { "epoch": 4.214743589743589, "grad_norm": 0.5065500271116625, "learning_rate": 2.954215416299331e-06, "loss": 0.4129, "loss_nan_ranks": 0, "loss_rank_avg": 0.18277542293071747, "step": 1315, "valid_targets_mean": 2185.4, "valid_targets_min": 358 }, { "epoch": 4.230769230769231, "grad_norm": 0.5004798136718417, "learning_rate": 2.838242262420148e-06, "loss": 0.4176, "loss_nan_ranks": 0, "loss_rank_avg": 0.22836771607398987, "step": 1320, "valid_targets_mean": 2597.8, "valid_targets_min": 331 }, { "epoch": 4.246794871794872, "grad_norm": 0.5184618845461352, "learning_rate": 2.7244172494969978e-06, "loss": 0.4176, "loss_nan_ranks": 0, "loss_rank_avg": 0.18959830701351166, "step": 1325, "valid_targets_mean": 2182.0, "valid_targets_min": 523 }, { "epoch": 4.262820512820513, "grad_norm": 0.4544775548224198, "learning_rate": 2.6127546250315438e-06, "loss": 0.3865, "loss_nan_ranks": 0, "loss_rank_avg": 0.16856324672698975, "step": 1330, "valid_targets_mean": 2052.1, "valid_targets_min": 363 }, { "epoch": 4.278846153846154, "grad_norm": 0.6718829730259701, "learning_rate": 2.503268365858831e-06, "loss": 0.4097, "loss_nan_ranks": 0, "loss_rank_avg": 0.2597959339618683, "step": 1335, "valid_targets_mean": 1643.8, "valid_targets_min": 521 }, { "epoch": 4.294871794871795, "grad_norm": 0.6447551224819277, "learning_rate": 2.3959721763977805e-06, "loss": 0.431, "loss_nan_ranks": 0, "loss_rank_avg": 0.22334489226341248, "step": 1340, "valid_targets_mean": 1838.8, "valid_targets_min": 447 }, { "epoch": 4.310897435897436, "grad_norm": 0.47106359697416067, "learning_rate": 2.2908794869358044e-06, "loss": 0.4107, "loss_nan_ranks": 0, "loss_rank_avg": 0.1753031313419342, "step": 1345, "valid_targets_mean": 2324.8, "valid_targets_min": 382 }, { "epoch": 4.326923076923077, "grad_norm": 0.4797964740997395, "learning_rate": 2.188003451947747e-06, "loss": 0.3904, "loss_nan_ranks": 0, "loss_rank_avg": 0.13902968168258667, "step": 1350, "valid_targets_mean": 1571.4, "valid_targets_min": 365 }, { "epoch": 4.342948717948718, "grad_norm": 0.4974988695652104, "learning_rate": 2.0873569484493305e-06, "loss": 0.3924, "loss_nan_ranks": 0, "loss_rank_avg": 0.20952798426151276, "step": 1355, "valid_targets_mean": 2615.0, "valid_targets_min": 499 }, { "epoch": 4.358974358974359, "grad_norm": 0.5841534150892278, "learning_rate": 1.9889525743853323e-06, "loss": 0.3984, "loss_nan_ranks": 0, "loss_rank_avg": 0.21221312880516052, "step": 1360, "valid_targets_mean": 1916.5, "valid_targets_min": 199 }, { "epoch": 4.375, "grad_norm": 0.5200681440214403, "learning_rate": 1.8928026470526917e-06, "loss": 0.4282, "loss_nan_ranks": 0, "loss_rank_avg": 0.21217067539691925, "step": 1365, "valid_targets_mean": 1990.6, "valid_targets_min": 398 }, { "epoch": 4.391025641025641, "grad_norm": 0.5225656986310022, "learning_rate": 1.7989192015587776e-06, "loss": 0.4046, "loss_nan_ranks": 0, "loss_rank_avg": 0.21198615431785583, "step": 1370, "valid_targets_mean": 2139.8, "valid_targets_min": 495 }, { "epoch": 4.407051282051282, "grad_norm": 0.5718894420905976, "learning_rate": 1.7073139893149092e-06, "loss": 0.4265, "loss_nan_ranks": 0, "loss_rank_avg": 0.2715427577495575, "step": 1375, "valid_targets_mean": 2201.8, "valid_targets_min": 620 }, { "epoch": 4.423076923076923, "grad_norm": 0.4832349361730212, "learning_rate": 1.6179984765654743e-06, "loss": 0.3997, "loss_nan_ranks": 0, "loss_rank_avg": 0.18295009434223175, "step": 1380, "valid_targets_mean": 2338.6, "valid_targets_min": 333 }, { "epoch": 4.439102564102564, "grad_norm": 0.5009262761860173, "learning_rate": 1.5309838429526714e-06, "loss": 0.4229, "loss_nan_ranks": 0, "loss_rank_avg": 0.19573353230953217, "step": 1385, "valid_targets_mean": 2377.6, "valid_targets_min": 359 }, { "epoch": 4.455128205128205, "grad_norm": 0.4626576145917675, "learning_rate": 1.4462809801171428e-06, "loss": 0.4182, "loss_nan_ranks": 0, "loss_rank_avg": 0.16738732159137726, "step": 1390, "valid_targets_mean": 2413.1, "valid_targets_min": 632 }, { "epoch": 4.471153846153846, "grad_norm": 0.5619638360015893, "learning_rate": 1.3639004903346954e-06, "loss": 0.4131, "loss_nan_ranks": 0, "loss_rank_avg": 0.17360472679138184, "step": 1395, "valid_targets_mean": 1610.5, "valid_targets_min": 454 }, { "epoch": 4.487179487179487, "grad_norm": 0.5089087979706497, "learning_rate": 1.2838526851891864e-06, "loss": 0.4082, "loss_nan_ranks": 0, "loss_rank_avg": 0.23789802193641663, "step": 1400, "valid_targets_mean": 2443.3, "valid_targets_min": 541 }, { "epoch": 4.503205128205128, "grad_norm": 0.5942691756087387, "learning_rate": 1.2061475842818337e-06, "loss": 0.4333, "loss_nan_ranks": 0, "loss_rank_avg": 0.21138739585876465, "step": 1405, "valid_targets_mean": 1548.1, "valid_targets_min": 474 }, { "epoch": 4.519230769230769, "grad_norm": 0.5181839449829251, "learning_rate": 1.1307949139770446e-06, "loss": 0.3959, "loss_nan_ranks": 0, "loss_rank_avg": 0.18168583512306213, "step": 1410, "valid_targets_mean": 1700.7, "valid_targets_min": 316 }, { "epoch": 4.535256410256411, "grad_norm": 0.5691599181706188, "learning_rate": 1.057804106184992e-06, "loss": 0.4124, "loss_nan_ranks": 0, "loss_rank_avg": 0.20509378612041473, "step": 1415, "valid_targets_mean": 2220.6, "valid_targets_min": 390 }, { "epoch": 4.551282051282051, "grad_norm": 0.5362388152801542, "learning_rate": 9.871842971809853e-07, "loss": 0.3719, "loss_nan_ranks": 0, "loss_rank_avg": 0.18971163034439087, "step": 1420, "valid_targets_mean": 2453.0, "valid_targets_min": 438 }, { "epoch": 4.5673076923076925, "grad_norm": 0.4217117551642382, "learning_rate": 9.189443264619102e-07, "loss": 0.3898, "loss_nan_ranks": 0, "loss_rank_avg": 0.19224503636360168, "step": 1425, "valid_targets_mean": 3036.1, "valid_targets_min": 491 }, { "epoch": 4.583333333333333, "grad_norm": 0.50137069774833, "learning_rate": 8.530927356397778e-07, "loss": 0.4182, "loss_nan_ranks": 0, "loss_rank_avg": 0.17977871000766754, "step": 1430, "valid_targets_mean": 1959.3, "valid_targets_min": 462 }, { "epoch": 4.5993589743589745, "grad_norm": 0.5147630870730896, "learning_rate": 7.896377673725553e-07, "loss": 0.4009, "loss_nan_ranks": 0, "loss_rank_avg": 0.190871000289917, "step": 1435, "valid_targets_mean": 1864.3, "valid_targets_min": 436 }, { "epoch": 4.615384615384615, "grad_norm": 0.48363223511421033, "learning_rate": 7.285873643324514e-07, "loss": 0.4017, "loss_nan_ranks": 0, "loss_rank_avg": 0.21705955266952515, "step": 1440, "valid_targets_mean": 2462.3, "valid_targets_min": 621 }, { "epoch": 4.631410256410256, "grad_norm": 0.547275826405447, "learning_rate": 6.69949168211721e-07, "loss": 0.395, "loss_nan_ranks": 0, "loss_rank_avg": 0.2083073854446411, "step": 1445, "valid_targets_mean": 1937.9, "valid_targets_min": 472 }, { "epoch": 4.647435897435898, "grad_norm": 0.5515440239397343, "learning_rate": 6.137305187661513e-07, "loss": 0.4123, "loss_nan_ranks": 0, "loss_rank_avg": 0.19437691569328308, "step": 1450, "valid_targets_mean": 1709.7, "valid_targets_min": 496 }, { "epoch": 4.663461538461538, "grad_norm": 0.5541400713883889, "learning_rate": 5.599384528963425e-07, "loss": 0.4069, "loss_nan_ranks": 0, "loss_rank_avg": 0.17162460088729858, "step": 1455, "valid_targets_mean": 1923.9, "valid_targets_min": 338 }, { "epoch": 4.67948717948718, "grad_norm": 0.46459204192014386, "learning_rate": 5.085797037669072e-07, "loss": 0.3773, "loss_nan_ranks": 0, "loss_rank_avg": 0.19353806972503662, "step": 1460, "valid_targets_mean": 2262.0, "valid_targets_min": 368 }, { "epoch": 4.69551282051282, "grad_norm": 0.5043482862484802, "learning_rate": 4.5966069996365993e-07, "loss": 0.3826, "loss_nan_ranks": 0, "loss_rank_avg": 0.18038004636764526, "step": 1465, "valid_targets_mean": 2006.8, "valid_targets_min": 401 }, { "epoch": 4.711538461538462, "grad_norm": 0.47468830861646916, "learning_rate": 4.1318756468897047e-07, "loss": 0.4266, "loss_nan_ranks": 0, "loss_rank_avg": 0.21844631433486938, "step": 1470, "valid_targets_mean": 2434.2, "valid_targets_min": 499 }, { "epoch": 4.727564102564102, "grad_norm": 0.5712016674577056, "learning_rate": 3.691661149953096e-07, "loss": 0.4055, "loss_nan_ranks": 0, "loss_rank_avg": 0.2230275571346283, "step": 1475, "valid_targets_mean": 2119.4, "valid_targets_min": 454 }, { "epoch": 4.743589743589744, "grad_norm": 0.5286171152229552, "learning_rate": 3.2760186105712964e-07, "loss": 0.3816, "loss_nan_ranks": 0, "loss_rank_avg": 0.19953539967536926, "step": 1480, "valid_targets_mean": 2190.5, "valid_targets_min": 390 }, { "epoch": 4.759615384615385, "grad_norm": 0.5355340610321968, "learning_rate": 2.8850000548115155e-07, "loss": 0.4263, "loss_nan_ranks": 0, "loss_rank_avg": 0.24080535769462585, "step": 1485, "valid_targets_mean": 1879.4, "valid_targets_min": 488 }, { "epoch": 4.7756410256410255, "grad_norm": 0.5704120262786105, "learning_rate": 2.518654426551592e-07, "loss": 0.4061, "loss_nan_ranks": 0, "loss_rank_avg": 0.2327512800693512, "step": 1490, "valid_targets_mean": 1950.7, "valid_targets_min": 424 }, { "epoch": 4.791666666666667, "grad_norm": 0.5892101716728328, "learning_rate": 2.1770275813536746e-07, "loss": 0.3985, "loss_nan_ranks": 0, "loss_rank_avg": 0.19074735045433044, "step": 1495, "valid_targets_mean": 1708.5, "valid_targets_min": 530 }, { "epoch": 4.8076923076923075, "grad_norm": 0.5211300578683025, "learning_rate": 1.8601622807244312e-07, "loss": 0.4217, "loss_nan_ranks": 0, "loss_rank_avg": 0.21352173388004303, "step": 1500, "valid_targets_mean": 2224.9, "valid_targets_min": 466 }, { "epoch": 4.823717948717949, "grad_norm": 0.5950739817226455, "learning_rate": 1.5680981867625566e-07, "loss": 0.4065, "loss_nan_ranks": 0, "loss_rank_avg": 0.21843212842941284, "step": 1505, "valid_targets_mean": 1907.6, "valid_targets_min": 477 }, { "epoch": 4.839743589743589, "grad_norm": 0.4982907259604603, "learning_rate": 1.3008718571943636e-07, "loss": 0.4145, "loss_nan_ranks": 0, "loss_rank_avg": 0.2082642912864685, "step": 1510, "valid_targets_mean": 2288.1, "valid_targets_min": 494 }, { "epoch": 4.855769230769231, "grad_norm": 0.7118715744452784, "learning_rate": 1.058516740797777e-07, "loss": 0.3914, "loss_nan_ranks": 0, "loss_rank_avg": 0.20714955031871796, "step": 1515, "valid_targets_mean": 1673.6, "valid_targets_min": 453 }, { "epoch": 4.871794871794872, "grad_norm": 0.636093382876181, "learning_rate": 8.410631732155062e-08, "loss": 0.4275, "loss_nan_ranks": 0, "loss_rank_avg": 0.24439842998981476, "step": 1520, "valid_targets_mean": 1889.8, "valid_targets_min": 369 }, { "epoch": 4.887820512820513, "grad_norm": 0.4919592475036989, "learning_rate": 6.485383731580142e-08, "loss": 0.3889, "loss_nan_ranks": 0, "loss_rank_avg": 0.14140692353248596, "step": 1525, "valid_targets_mean": 1333.6, "valid_targets_min": 329 }, { "epoch": 4.903846153846154, "grad_norm": 0.46488274884652137, "learning_rate": 4.809664389964441e-08, "loss": 0.4254, "loss_nan_ranks": 0, "loss_rank_avg": 0.21400034427642822, "step": 1530, "valid_targets_mean": 2693.9, "valid_targets_min": 523 }, { "epoch": 4.919871794871795, "grad_norm": 0.5040958449228231, "learning_rate": 3.383683457463649e-08, "loss": 0.3597, "loss_nan_ranks": 0, "loss_rank_avg": 0.1598944067955017, "step": 1535, "valid_targets_mean": 1648.6, "valid_targets_min": 371 }, { "epoch": 4.935897435897436, "grad_norm": 0.525773314679964, "learning_rate": 2.207619424421381e-08, "loss": 0.4026, "loss_nan_ranks": 0, "loss_rank_avg": 0.20952820777893066, "step": 1540, "valid_targets_mean": 2557.2, "valid_targets_min": 543 }, { "epoch": 4.951923076923077, "grad_norm": 0.5186226211951818, "learning_rate": 1.281619499029274e-08, "loss": 0.4238, "loss_nan_ranks": 0, "loss_rank_avg": 0.23216500878334045, "step": 1545, "valid_targets_mean": 2368.4, "valid_targets_min": 554 }, { "epoch": 4.967948717948718, "grad_norm": 0.5881562216015847, "learning_rate": 6.057995888997248e-09, "loss": 0.384, "loss_nan_ranks": 0, "loss_rank_avg": 0.20564129948616028, "step": 1550, "valid_targets_mean": 1534.4, "valid_targets_min": 530 }, { "epoch": 4.983974358974359, "grad_norm": 0.5078947883853017, "learning_rate": 1.8024428655794012e-09, "loss": 0.3829, "loss_nan_ranks": 0, "loss_rank_avg": 0.21099376678466797, "step": 1555, "valid_targets_mean": 2262.2, "valid_targets_min": 342 }, { "epoch": 5.0, "grad_norm": 0.5570674341743406, "learning_rate": 5.00685885418406e-11, "loss": 0.4035, "loss_nan_ranks": 0, "loss_rank_avg": 0.24144230782985687, "step": 1560, "valid_targets_mean": 2344.4, "valid_targets_min": 827 }, { "epoch": 5.0, "loss_nan_ranks": 0, "loss_rank_avg": 0.24144230782985687, "step": 1560, "total_flos": 4.577212452957061e+17, "train_loss": 0.45931496092906365, "train_runtime": 11477.9832, "train_samples_per_second": 4.348, "train_steps_per_second": 0.136, "valid_targets_mean": 2344.4, "valid_targets_min": 827 } ], "logging_steps": 5, "max_steps": 1560, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.577212452957061e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }