magicoder-evol-instruct-110k-sandboxes-traces-terminus-2_overwrite-output-dir_True / trainer_state.json
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 1560, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.016025641025641024, | |
| "grad_norm": 8.270559284395055, | |
| "learning_rate": 1.0256410256410257e-06, | |
| "loss": 0.8831, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5231465101242065, | |
| "step": 5, | |
| "valid_targets_mean": 2855.3, | |
| "valid_targets_min": 310 | |
| }, | |
| { | |
| "epoch": 0.03205128205128205, | |
| "grad_norm": 6.156057008347062, | |
| "learning_rate": 2.307692307692308e-06, | |
| "loss": 0.8578, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3894450068473816, | |
| "step": 10, | |
| "valid_targets_mean": 2836.0, | |
| "valid_targets_min": 343 | |
| }, | |
| { | |
| "epoch": 0.04807692307692308, | |
| "grad_norm": 4.055588313810187, | |
| "learning_rate": 3.58974358974359e-06, | |
| "loss": 0.8181, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34461548924446106, | |
| "step": 15, | |
| "valid_targets_mean": 2279.3, | |
| "valid_targets_min": 397 | |
| }, | |
| { | |
| "epoch": 0.0641025641025641, | |
| "grad_norm": 2.9358204191090445, | |
| "learning_rate": 4.871794871794872e-06, | |
| "loss": 0.7542, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37403905391693115, | |
| "step": 20, | |
| "valid_targets_mean": 2386.4, | |
| "valid_targets_min": 501 | |
| }, | |
| { | |
| "epoch": 0.08012820512820513, | |
| "grad_norm": 1.3469933311632771, | |
| "learning_rate": 6.153846153846155e-06, | |
| "loss": 0.7428, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3458966016769409, | |
| "step": 25, | |
| "valid_targets_mean": 2740.5, | |
| "valid_targets_min": 447 | |
| }, | |
| { | |
| "epoch": 0.09615384615384616, | |
| "grad_norm": 1.1608353294901592, | |
| "learning_rate": 7.435897435897437e-06, | |
| "loss": 0.7218, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31971484422683716, | |
| "step": 30, | |
| "valid_targets_mean": 1962.6, | |
| "valid_targets_min": 279 | |
| }, | |
| { | |
| "epoch": 0.11217948717948718, | |
| "grad_norm": 1.067171533736777, | |
| "learning_rate": 8.717948717948719e-06, | |
| "loss": 0.6814, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4454614520072937, | |
| "step": 35, | |
| "valid_targets_mean": 2451.4, | |
| "valid_targets_min": 416 | |
| }, | |
| { | |
| "epoch": 0.1282051282051282, | |
| "grad_norm": 0.9774891251489538, | |
| "learning_rate": 1e-05, | |
| "loss": 0.7264, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3378103971481323, | |
| "step": 40, | |
| "valid_targets_mean": 1912.1, | |
| "valid_targets_min": 609 | |
| }, | |
| { | |
| "epoch": 0.14423076923076922, | |
| "grad_norm": 0.8317361111005913, | |
| "learning_rate": 1.1282051282051283e-05, | |
| "loss": 0.6642, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37407323718070984, | |
| "step": 45, | |
| "valid_targets_mean": 1732.8, | |
| "valid_targets_min": 474 | |
| }, | |
| { | |
| "epoch": 0.16025641025641027, | |
| "grad_norm": 0.70420941452197, | |
| "learning_rate": 1.2564102564102565e-05, | |
| "loss": 0.6433, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3607226610183716, | |
| "step": 50, | |
| "valid_targets_mean": 2446.6, | |
| "valid_targets_min": 396 | |
| }, | |
| { | |
| "epoch": 0.1762820512820513, | |
| "grad_norm": 0.640275050687205, | |
| "learning_rate": 1.3846153846153847e-05, | |
| "loss": 0.6505, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2584564685821533, | |
| "step": 55, | |
| "valid_targets_mean": 1938.5, | |
| "valid_targets_min": 350 | |
| }, | |
| { | |
| "epoch": 0.19230769230769232, | |
| "grad_norm": 0.6130742352183888, | |
| "learning_rate": 1.5128205128205129e-05, | |
| "loss": 0.5794, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29696235060691833, | |
| "step": 60, | |
| "valid_targets_mean": 1760.7, | |
| "valid_targets_min": 705 | |
| }, | |
| { | |
| "epoch": 0.20833333333333334, | |
| "grad_norm": 0.49753658846261495, | |
| "learning_rate": 1.641025641025641e-05, | |
| "loss": 0.5949, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23245546221733093, | |
| "step": 65, | |
| "valid_targets_mean": 2182.3, | |
| "valid_targets_min": 495 | |
| }, | |
| { | |
| "epoch": 0.22435897435897437, | |
| "grad_norm": 0.5248371244445572, | |
| "learning_rate": 1.7692307692307694e-05, | |
| "loss": 0.5511, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27561211585998535, | |
| "step": 70, | |
| "valid_targets_mean": 2518.8, | |
| "valid_targets_min": 537 | |
| }, | |
| { | |
| "epoch": 0.2403846153846154, | |
| "grad_norm": 0.5696118966316656, | |
| "learning_rate": 1.8974358974358975e-05, | |
| "loss": 0.6166, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27792054414749146, | |
| "step": 75, | |
| "valid_targets_mean": 2193.9, | |
| "valid_targets_min": 320 | |
| }, | |
| { | |
| "epoch": 0.2564102564102564, | |
| "grad_norm": 0.6067800362816257, | |
| "learning_rate": 2.025641025641026e-05, | |
| "loss": 0.5518, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3141236901283264, | |
| "step": 80, | |
| "valid_targets_mean": 1968.6, | |
| "valid_targets_min": 524 | |
| }, | |
| { | |
| "epoch": 0.2724358974358974, | |
| "grad_norm": 0.5486307014154258, | |
| "learning_rate": 2.153846153846154e-05, | |
| "loss": 0.5859, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39914655685424805, | |
| "step": 85, | |
| "valid_targets_mean": 2844.7, | |
| "valid_targets_min": 652 | |
| }, | |
| { | |
| "epoch": 0.28846153846153844, | |
| "grad_norm": 0.5467022186742057, | |
| "learning_rate": 2.2820512820512822e-05, | |
| "loss": 0.5494, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30601420998573303, | |
| "step": 90, | |
| "valid_targets_mean": 2242.3, | |
| "valid_targets_min": 287 | |
| }, | |
| { | |
| "epoch": 0.30448717948717946, | |
| "grad_norm": 0.5073453583636481, | |
| "learning_rate": 2.4102564102564103e-05, | |
| "loss": 0.5672, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27047592401504517, | |
| "step": 95, | |
| "valid_targets_mean": 2702.0, | |
| "valid_targets_min": 623 | |
| }, | |
| { | |
| "epoch": 0.32051282051282054, | |
| "grad_norm": 0.6374453094826855, | |
| "learning_rate": 2.5384615384615386e-05, | |
| "loss": 0.5258, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34897756576538086, | |
| "step": 100, | |
| "valid_targets_mean": 2111.6, | |
| "valid_targets_min": 632 | |
| }, | |
| { | |
| "epoch": 0.33653846153846156, | |
| "grad_norm": 0.4904714398710548, | |
| "learning_rate": 2.6666666666666667e-05, | |
| "loss": 0.5275, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2545839548110962, | |
| "step": 105, | |
| "valid_targets_mean": 2280.2, | |
| "valid_targets_min": 528 | |
| }, | |
| { | |
| "epoch": 0.3525641025641026, | |
| "grad_norm": 0.5374741002067597, | |
| "learning_rate": 2.794871794871795e-05, | |
| "loss": 0.5159, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27961328625679016, | |
| "step": 110, | |
| "valid_targets_mean": 1705.3, | |
| "valid_targets_min": 483 | |
| }, | |
| { | |
| "epoch": 0.3685897435897436, | |
| "grad_norm": 0.5401678923394666, | |
| "learning_rate": 2.923076923076923e-05, | |
| "loss": 0.5378, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28303176164627075, | |
| "step": 115, | |
| "valid_targets_mean": 2115.9, | |
| "valid_targets_min": 460 | |
| }, | |
| { | |
| "epoch": 0.38461538461538464, | |
| "grad_norm": 0.5697964720982368, | |
| "learning_rate": 3.0512820512820514e-05, | |
| "loss": 0.5136, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2926599979400635, | |
| "step": 120, | |
| "valid_targets_mean": 2276.1, | |
| "valid_targets_min": 491 | |
| }, | |
| { | |
| "epoch": 0.40064102564102566, | |
| "grad_norm": 0.5080731305686881, | |
| "learning_rate": 3.1794871794871795e-05, | |
| "loss": 0.5455, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21579426527023315, | |
| "step": 125, | |
| "valid_targets_mean": 1966.7, | |
| "valid_targets_min": 455 | |
| }, | |
| { | |
| "epoch": 0.4166666666666667, | |
| "grad_norm": 0.5193806958702459, | |
| "learning_rate": 3.307692307692308e-05, | |
| "loss": 0.5503, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23866981267929077, | |
| "step": 130, | |
| "valid_targets_mean": 1884.6, | |
| "valid_targets_min": 653 | |
| }, | |
| { | |
| "epoch": 0.4326923076923077, | |
| "grad_norm": 0.5215890997634938, | |
| "learning_rate": 3.435897435897436e-05, | |
| "loss": 0.5113, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22024506330490112, | |
| "step": 135, | |
| "valid_targets_mean": 2176.9, | |
| "valid_targets_min": 551 | |
| }, | |
| { | |
| "epoch": 0.44871794871794873, | |
| "grad_norm": 0.5428844999735067, | |
| "learning_rate": 3.5641025641025646e-05, | |
| "loss": 0.5327, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2842090129852295, | |
| "step": 140, | |
| "valid_targets_mean": 2108.4, | |
| "valid_targets_min": 395 | |
| }, | |
| { | |
| "epoch": 0.46474358974358976, | |
| "grad_norm": 0.5658357256109985, | |
| "learning_rate": 3.692307692307693e-05, | |
| "loss": 0.5012, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23009580373764038, | |
| "step": 145, | |
| "valid_targets_mean": 1416.6, | |
| "valid_targets_min": 319 | |
| }, | |
| { | |
| "epoch": 0.4807692307692308, | |
| "grad_norm": 0.5287699494547455, | |
| "learning_rate": 3.820512820512821e-05, | |
| "loss": 0.5442, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25618451833724976, | |
| "step": 150, | |
| "valid_targets_mean": 2366.4, | |
| "valid_targets_min": 555 | |
| }, | |
| { | |
| "epoch": 0.4967948717948718, | |
| "grad_norm": 0.5510710484739612, | |
| "learning_rate": 3.948717948717949e-05, | |
| "loss": 0.5202, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.261417031288147, | |
| "step": 155, | |
| "valid_targets_mean": 1912.0, | |
| "valid_targets_min": 427 | |
| }, | |
| { | |
| "epoch": 0.5128205128205128, | |
| "grad_norm": 0.5223782503607328, | |
| "learning_rate": 3.999954938420724e-05, | |
| "loss": 0.538, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2376648485660553, | |
| "step": 160, | |
| "valid_targets_mean": 2203.6, | |
| "valid_targets_min": 520 | |
| }, | |
| { | |
| "epoch": 0.5288461538461539, | |
| "grad_norm": 0.5639669943753522, | |
| "learning_rate": 3.9996795694563096e-05, | |
| "loss": 0.5519, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26910513639450073, | |
| "step": 165, | |
| "valid_targets_mean": 1978.4, | |
| "valid_targets_min": 452 | |
| }, | |
| { | |
| "epoch": 0.5448717948717948, | |
| "grad_norm": 0.5934805718745008, | |
| "learning_rate": 3.9991539001644015e-05, | |
| "loss": 0.4864, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21558544039726257, | |
| "step": 170, | |
| "valid_targets_mean": 1433.1, | |
| "valid_targets_min": 291 | |
| }, | |
| { | |
| "epoch": 0.5608974358974359, | |
| "grad_norm": 0.5444507017809859, | |
| "learning_rate": 3.998377996343139e-05, | |
| "loss": 0.5402, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22888971865177155, | |
| "step": 175, | |
| "valid_targets_mean": 2303.6, | |
| "valid_targets_min": 665 | |
| }, | |
| { | |
| "epoch": 0.5769230769230769, | |
| "grad_norm": 0.5993323841226175, | |
| "learning_rate": 3.9973519551125746e-05, | |
| "loss": 0.5472, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2046593725681305, | |
| "step": 180, | |
| "valid_targets_mean": 1417.7, | |
| "valid_targets_min": 614 | |
| }, | |
| { | |
| "epoch": 0.592948717948718, | |
| "grad_norm": 0.552244150619254, | |
| "learning_rate": 3.99607590490251e-05, | |
| "loss": 0.5254, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.240033358335495, | |
| "step": 185, | |
| "valid_targets_mean": 1792.0, | |
| "valid_targets_min": 356 | |
| }, | |
| { | |
| "epoch": 0.6089743589743589, | |
| "grad_norm": 0.5676943862267904, | |
| "learning_rate": 3.994550005436431e-05, | |
| "loss": 0.5232, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2360553741455078, | |
| "step": 190, | |
| "valid_targets_mean": 1990.2, | |
| "valid_targets_min": 571 | |
| }, | |
| { | |
| "epoch": 0.625, | |
| "grad_norm": 0.5686816790686919, | |
| "learning_rate": 3.992774447711503e-05, | |
| "loss": 0.5712, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3569658696651459, | |
| "step": 195, | |
| "valid_targets_mean": 2023.1, | |
| "valid_targets_min": 425 | |
| }, | |
| { | |
| "epoch": 0.6410256410256411, | |
| "grad_norm": 0.593578700387874, | |
| "learning_rate": 3.990749453974676e-05, | |
| "loss": 0.5151, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2537527084350586, | |
| "step": 200, | |
| "valid_targets_mean": 1794.6, | |
| "valid_targets_min": 397 | |
| }, | |
| { | |
| "epoch": 0.657051282051282, | |
| "grad_norm": 0.626123448487498, | |
| "learning_rate": 3.9884752776948564e-05, | |
| "loss": 0.5111, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20010629296302795, | |
| "step": 205, | |
| "valid_targets_mean": 1342.0, | |
| "valid_targets_min": 328 | |
| }, | |
| { | |
| "epoch": 0.6730769230769231, | |
| "grad_norm": 0.5180554845628587, | |
| "learning_rate": 3.985952203531184e-05, | |
| "loss": 0.5206, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31396254897117615, | |
| "step": 210, | |
| "valid_targets_mean": 2744.9, | |
| "valid_targets_min": 1089 | |
| }, | |
| { | |
| "epoch": 0.6891025641025641, | |
| "grad_norm": 0.6345163141192973, | |
| "learning_rate": 3.983180547297404e-05, | |
| "loss": 0.5026, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24182447791099548, | |
| "step": 215, | |
| "valid_targets_mean": 1557.4, | |
| "valid_targets_min": 461 | |
| }, | |
| { | |
| "epoch": 0.7051282051282052, | |
| "grad_norm": 0.6054353969461813, | |
| "learning_rate": 3.9801606559223286e-05, | |
| "loss": 0.5125, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2920263409614563, | |
| "step": 220, | |
| "valid_targets_mean": 2164.1, | |
| "valid_targets_min": 542 | |
| }, | |
| { | |
| "epoch": 0.7211538461538461, | |
| "grad_norm": 0.5253840091115919, | |
| "learning_rate": 3.9768929074064206e-05, | |
| "loss": 0.5073, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26178407669067383, | |
| "step": 225, | |
| "valid_targets_mean": 2276.9, | |
| "valid_targets_min": 508 | |
| }, | |
| { | |
| "epoch": 0.7371794871794872, | |
| "grad_norm": 0.6036771997042879, | |
| "learning_rate": 3.973377710774474e-05, | |
| "loss": 0.5416, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30116069316864014, | |
| "step": 230, | |
| "valid_targets_mean": 1931.5, | |
| "valid_targets_min": 465 | |
| }, | |
| { | |
| "epoch": 0.7532051282051282, | |
| "grad_norm": 0.5341073955982537, | |
| "learning_rate": 3.9696155060244166e-05, | |
| "loss": 0.4925, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2869190275669098, | |
| "step": 235, | |
| "valid_targets_mean": 2469.9, | |
| "valid_targets_min": 390 | |
| }, | |
| { | |
| "epoch": 0.7692307692307693, | |
| "grad_norm": 0.5391946249539421, | |
| "learning_rate": 3.965606764072237e-05, | |
| "loss": 0.5149, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23443245887756348, | |
| "step": 240, | |
| "valid_targets_mean": 2517.3, | |
| "valid_targets_min": 458 | |
| }, | |
| { | |
| "epoch": 0.7852564102564102, | |
| "grad_norm": 0.48726365599779214, | |
| "learning_rate": 3.96135198669304e-05, | |
| "loss": 0.5111, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2194036841392517, | |
| "step": 245, | |
| "valid_targets_mean": 2148.2, | |
| "valid_targets_min": 284 | |
| }, | |
| { | |
| "epoch": 0.8012820512820513, | |
| "grad_norm": 0.4546312777762876, | |
| "learning_rate": 3.956851706458236e-05, | |
| "loss": 0.4951, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2413400411605835, | |
| "step": 250, | |
| "valid_targets_mean": 3277.6, | |
| "valid_targets_min": 401 | |
| }, | |
| { | |
| "epoch": 0.8173076923076923, | |
| "grad_norm": 0.43195116515643417, | |
| "learning_rate": 3.952106486668884e-05, | |
| "loss": 0.4941, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24353349208831787, | |
| "step": 255, | |
| "valid_targets_mean": 2430.8, | |
| "valid_targets_min": 354 | |
| }, | |
| { | |
| "epoch": 0.8333333333333334, | |
| "grad_norm": 0.5431449210079052, | |
| "learning_rate": 3.9471169212851774e-05, | |
| "loss": 0.4859, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3241618275642395, | |
| "step": 260, | |
| "valid_targets_mean": 2396.5, | |
| "valid_targets_min": 559 | |
| }, | |
| { | |
| "epoch": 0.8493589743589743, | |
| "grad_norm": 0.4983128230175159, | |
| "learning_rate": 3.9418836348521045e-05, | |
| "loss": 0.507, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28434205055236816, | |
| "step": 265, | |
| "valid_targets_mean": 2714.4, | |
| "valid_targets_min": 589 | |
| }, | |
| { | |
| "epoch": 0.8653846153846154, | |
| "grad_norm": 0.4433055547977104, | |
| "learning_rate": 3.936407282421267e-05, | |
| "loss": 0.489, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2021905481815338, | |
| "step": 270, | |
| "valid_targets_mean": 2192.3, | |
| "valid_targets_min": 420 | |
| }, | |
| { | |
| "epoch": 0.8814102564102564, | |
| "grad_norm": 0.5372540899641901, | |
| "learning_rate": 3.930688549468894e-05, | |
| "loss": 0.4962, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3100685477256775, | |
| "step": 275, | |
| "valid_targets_mean": 2431.0, | |
| "valid_targets_min": 523 | |
| }, | |
| { | |
| "epoch": 0.8974358974358975, | |
| "grad_norm": 0.48553038032931456, | |
| "learning_rate": 3.924728151810034e-05, | |
| "loss": 0.5118, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27802667021751404, | |
| "step": 280, | |
| "valid_targets_mean": 2424.8, | |
| "valid_targets_min": 596 | |
| }, | |
| { | |
| "epoch": 0.9134615384615384, | |
| "grad_norm": 0.5494166379111899, | |
| "learning_rate": 3.9185268355089606e-05, | |
| "loss": 0.4939, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2717832922935486, | |
| "step": 285, | |
| "valid_targets_mean": 1919.1, | |
| "valid_targets_min": 500 | |
| }, | |
| { | |
| "epoch": 0.9294871794871795, | |
| "grad_norm": 0.4690629180894029, | |
| "learning_rate": 3.912085376785788e-05, | |
| "loss": 0.4929, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20420989394187927, | |
| "step": 290, | |
| "valid_targets_mean": 1990.7, | |
| "valid_targets_min": 498 | |
| }, | |
| { | |
| "epoch": 0.9455128205128205, | |
| "grad_norm": 0.47364731395403387, | |
| "learning_rate": 3.9054045819193074e-05, | |
| "loss": 0.4596, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.283137708902359, | |
| "step": 295, | |
| "valid_targets_mean": 3114.4, | |
| "valid_targets_min": 389 | |
| }, | |
| { | |
| "epoch": 0.9615384615384616, | |
| "grad_norm": 0.43834286352294405, | |
| "learning_rate": 3.898485287146068e-05, | |
| "loss": 0.4846, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22700203955173492, | |
| "step": 300, | |
| "valid_targets_mean": 3102.8, | |
| "valid_targets_min": 674 | |
| }, | |
| { | |
| "epoch": 0.9775641025641025, | |
| "grad_norm": 0.544832909701008, | |
| "learning_rate": 3.8913283585557054e-05, | |
| "loss": 0.488, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24065542221069336, | |
| "step": 305, | |
| "valid_targets_mean": 2371.9, | |
| "valid_targets_min": 342 | |
| }, | |
| { | |
| "epoch": 0.9935897435897436, | |
| "grad_norm": 0.6034996255357719, | |
| "learning_rate": 3.8839346919825304e-05, | |
| "loss": 0.5011, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.252093642950058, | |
| "step": 310, | |
| "valid_targets_mean": 1962.2, | |
| "valid_targets_min": 339 | |
| }, | |
| { | |
| "epoch": 1.0096153846153846, | |
| "grad_norm": 0.48322361076674575, | |
| "learning_rate": 3.876305212893399e-05, | |
| "loss": 0.4662, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25063151121139526, | |
| "step": 315, | |
| "valid_targets_mean": 2658.9, | |
| "valid_targets_min": 507 | |
| }, | |
| { | |
| "epoch": 1.0256410256410255, | |
| "grad_norm": 0.6133158850042538, | |
| "learning_rate": 3.868440876271871e-05, | |
| "loss": 0.4777, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29740214347839355, | |
| "step": 320, | |
| "valid_targets_mean": 1867.5, | |
| "valid_targets_min": 392 | |
| }, | |
| { | |
| "epoch": 1.0416666666666667, | |
| "grad_norm": 0.541900985104166, | |
| "learning_rate": 3.860342666498677e-05, | |
| "loss": 0.4866, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21024353802204132, | |
| "step": 325, | |
| "valid_targets_mean": 1411.2, | |
| "valid_targets_min": 322 | |
| }, | |
| { | |
| "epoch": 1.0576923076923077, | |
| "grad_norm": 0.6020609614604076, | |
| "learning_rate": 3.8520115972284975e-05, | |
| "loss": 0.4639, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24874147772789001, | |
| "step": 330, | |
| "valid_targets_mean": 1505.4, | |
| "valid_targets_min": 372 | |
| }, | |
| { | |
| "epoch": 1.0737179487179487, | |
| "grad_norm": 0.5637623048449515, | |
| "learning_rate": 3.843448711263089e-05, | |
| "loss": 0.5009, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3167960047721863, | |
| "step": 335, | |
| "valid_targets_mean": 2400.1, | |
| "valid_targets_min": 471 | |
| }, | |
| { | |
| "epoch": 1.0897435897435896, | |
| "grad_norm": 0.482423724630125, | |
| "learning_rate": 3.8346550804207544e-05, | |
| "loss": 0.453, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2796696722507477, | |
| "step": 340, | |
| "valid_targets_mean": 2607.9, | |
| "valid_targets_min": 358 | |
| }, | |
| { | |
| "epoch": 1.1057692307692308, | |
| "grad_norm": 0.42709049123064025, | |
| "learning_rate": 3.825631805402182e-05, | |
| "loss": 0.4675, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18846507370471954, | |
| "step": 345, | |
| "valid_targets_mean": 3064.7, | |
| "valid_targets_min": 484 | |
| }, | |
| { | |
| "epoch": 1.1217948717948718, | |
| "grad_norm": 0.45654149738060995, | |
| "learning_rate": 3.816380015652672e-05, | |
| "loss": 0.4711, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22047562897205353, | |
| "step": 350, | |
| "valid_targets_mean": 2670.4, | |
| "valid_targets_min": 497 | |
| }, | |
| { | |
| "epoch": 1.1378205128205128, | |
| "grad_norm": 0.46259303685534825, | |
| "learning_rate": 3.806900869220765e-05, | |
| "loss": 0.4289, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21656641364097595, | |
| "step": 355, | |
| "valid_targets_mean": 2637.6, | |
| "valid_targets_min": 371 | |
| }, | |
| { | |
| "epoch": 1.1538461538461537, | |
| "grad_norm": 0.4795462410657758, | |
| "learning_rate": 3.797195552613284e-05, | |
| "loss": 0.4347, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28732848167419434, | |
| "step": 360, | |
| "valid_targets_mean": 2804.9, | |
| "valid_targets_min": 972 | |
| }, | |
| { | |
| "epoch": 1.169871794871795, | |
| "grad_norm": 0.544805070309915, | |
| "learning_rate": 3.787265280646825e-05, | |
| "loss": 0.4612, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20876461267471313, | |
| "step": 365, | |
| "valid_targets_mean": 1629.6, | |
| "valid_targets_min": 470 | |
| }, | |
| { | |
| "epoch": 1.185897435897436, | |
| "grad_norm": 0.6219497217509353, | |
| "learning_rate": 3.7771112962956936e-05, | |
| "loss": 0.4925, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24284450709819794, | |
| "step": 370, | |
| "valid_targets_mean": 1362.9, | |
| "valid_targets_min": 362 | |
| }, | |
| { | |
| "epoch": 1.2019230769230769, | |
| "grad_norm": 0.6219562430857214, | |
| "learning_rate": 3.7667348705363227e-05, | |
| "loss": 0.4846, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26131337881088257, | |
| "step": 375, | |
| "valid_targets_mean": 2042.5, | |
| "valid_targets_min": 538 | |
| }, | |
| { | |
| "epoch": 1.217948717948718, | |
| "grad_norm": 0.5273423780621191, | |
| "learning_rate": 3.7561373021881885e-05, | |
| "loss": 0.4764, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20557957887649536, | |
| "step": 380, | |
| "valid_targets_mean": 2154.4, | |
| "valid_targets_min": 482 | |
| }, | |
| { | |
| "epoch": 1.233974358974359, | |
| "grad_norm": 0.5940055633638739, | |
| "learning_rate": 3.745319917751229e-05, | |
| "loss": 0.458, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19809526205062866, | |
| "step": 385, | |
| "valid_targets_mean": 1356.8, | |
| "valid_targets_min": 496 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 0.604986566337123, | |
| "learning_rate": 3.734284071239811e-05, | |
| "loss": 0.4753, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3080754280090332, | |
| "step": 390, | |
| "valid_targets_mean": 2211.4, | |
| "valid_targets_min": 405 | |
| }, | |
| { | |
| "epoch": 1.266025641025641, | |
| "grad_norm": 0.6686876845094236, | |
| "learning_rate": 3.7230311440132494e-05, | |
| "loss": 0.4605, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2461152821779251, | |
| "step": 395, | |
| "valid_targets_mean": 1261.0, | |
| "valid_targets_min": 384 | |
| }, | |
| { | |
| "epoch": 1.282051282051282, | |
| "grad_norm": 0.5383774605862381, | |
| "learning_rate": 3.711562544602895e-05, | |
| "loss": 0.47, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2216894030570984, | |
| "step": 400, | |
| "valid_targets_mean": 1972.8, | |
| "valid_targets_min": 572 | |
| }, | |
| { | |
| "epoch": 1.2980769230769231, | |
| "grad_norm": 0.6237423049196263, | |
| "learning_rate": 3.699879708535838e-05, | |
| "loss": 0.4964, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3525855839252472, | |
| "step": 405, | |
| "valid_targets_mean": 2608.8, | |
| "valid_targets_min": 553 | |
| }, | |
| { | |
| "epoch": 1.314102564102564, | |
| "grad_norm": 0.47346493238221604, | |
| "learning_rate": 3.687984098155212e-05, | |
| "loss": 0.4721, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23883157968521118, | |
| "step": 410, | |
| "valid_targets_mean": 2648.7, | |
| "valid_targets_min": 559 | |
| }, | |
| { | |
| "epoch": 1.330128205128205, | |
| "grad_norm": 0.6009852407301991, | |
| "learning_rate": 3.6758772024371626e-05, | |
| "loss": 0.4985, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31522154808044434, | |
| "step": 415, | |
| "valid_targets_mean": 1891.1, | |
| "valid_targets_min": 369 | |
| }, | |
| { | |
| "epoch": 1.3461538461538463, | |
| "grad_norm": 0.5221802033027123, | |
| "learning_rate": 3.663560536804465e-05, | |
| "loss": 0.4619, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2520959973335266, | |
| "step": 420, | |
| "valid_targets_mean": 2271.7, | |
| "valid_targets_min": 479 | |
| }, | |
| { | |
| "epoch": 1.3621794871794872, | |
| "grad_norm": 0.5462328875847934, | |
| "learning_rate": 3.65103564293684e-05, | |
| "loss": 0.4923, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28057146072387695, | |
| "step": 425, | |
| "valid_targets_mean": 1829.6, | |
| "valid_targets_min": 349 | |
| }, | |
| { | |
| "epoch": 1.3782051282051282, | |
| "grad_norm": 0.5230511743019115, | |
| "learning_rate": 3.638304088577984e-05, | |
| "loss": 0.4479, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20385369658470154, | |
| "step": 430, | |
| "valid_targets_mean": 2099.4, | |
| "valid_targets_min": 450 | |
| }, | |
| { | |
| "epoch": 1.3942307692307692, | |
| "grad_norm": 0.5260439150355632, | |
| "learning_rate": 3.625367467339329e-05, | |
| "loss": 0.4409, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25196361541748047, | |
| "step": 435, | |
| "valid_targets_mean": 1848.4, | |
| "valid_targets_min": 374 | |
| }, | |
| { | |
| "epoch": 1.4102564102564101, | |
| "grad_norm": 0.542530019050112, | |
| "learning_rate": 3.612227398500575e-05, | |
| "loss": 0.4625, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2730902433395386, | |
| "step": 440, | |
| "valid_targets_mean": 1855.6, | |
| "valid_targets_min": 539 | |
| }, | |
| { | |
| "epoch": 1.4262820512820513, | |
| "grad_norm": 0.5362598043278441, | |
| "learning_rate": 3.598885526807003e-05, | |
| "loss": 0.4645, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20158651471138, | |
| "step": 445, | |
| "valid_targets_mean": 1960.3, | |
| "valid_targets_min": 330 | |
| }, | |
| { | |
| "epoch": 1.4423076923076923, | |
| "grad_norm": 0.5038481547905878, | |
| "learning_rate": 3.585343522263599e-05, | |
| "loss": 0.4576, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23150920867919922, | |
| "step": 450, | |
| "valid_targets_mean": 2077.7, | |
| "valid_targets_min": 685 | |
| }, | |
| { | |
| "epoch": 1.4583333333333333, | |
| "grad_norm": 0.6465319041135404, | |
| "learning_rate": 3.571603079926024e-05, | |
| "loss": 0.4734, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2389899045228958, | |
| "step": 455, | |
| "valid_targets_mean": 1801.0, | |
| "valid_targets_min": 435 | |
| }, | |
| { | |
| "epoch": 1.4743589743589745, | |
| "grad_norm": 0.5472386327122035, | |
| "learning_rate": 3.5576659196884395e-05, | |
| "loss": 0.5179, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24516533315181732, | |
| "step": 460, | |
| "valid_targets_mean": 1980.1, | |
| "valid_targets_min": 436 | |
| }, | |
| { | |
| "epoch": 1.4903846153846154, | |
| "grad_norm": 0.5414486709041462, | |
| "learning_rate": 3.5435337860682304e-05, | |
| "loss": 0.464, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2550250291824341, | |
| "step": 465, | |
| "valid_targets_mean": 1814.8, | |
| "valid_targets_min": 343 | |
| }, | |
| { | |
| "epoch": 1.5064102564102564, | |
| "grad_norm": 0.4560866540685229, | |
| "learning_rate": 3.529208447987641e-05, | |
| "loss": 0.4928, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1900494247674942, | |
| "step": 470, | |
| "valid_targets_mean": 2029.3, | |
| "valid_targets_min": 353 | |
| }, | |
| { | |
| "epoch": 1.5224358974358974, | |
| "grad_norm": 0.5257829172034205, | |
| "learning_rate": 3.5146916985523604e-05, | |
| "loss": 0.5122, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25625720620155334, | |
| "step": 475, | |
| "valid_targets_mean": 2358.8, | |
| "valid_targets_min": 708 | |
| }, | |
| { | |
| "epoch": 1.5384615384615383, | |
| "grad_norm": 0.5000203832116034, | |
| "learning_rate": 3.499985354827079e-05, | |
| "loss": 0.4661, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19569867849349976, | |
| "step": 480, | |
| "valid_targets_mean": 1793.0, | |
| "valid_targets_min": 407 | |
| }, | |
| { | |
| "epoch": 1.5544871794871795, | |
| "grad_norm": 0.5311956681183098, | |
| "learning_rate": 3.485091257608047e-05, | |
| "loss": 0.4946, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2879672348499298, | |
| "step": 485, | |
| "valid_targets_mean": 2306.1, | |
| "valid_targets_min": 383 | |
| }, | |
| { | |
| "epoch": 1.5705128205128205, | |
| "grad_norm": 0.48828567970956843, | |
| "learning_rate": 3.4700112711926574e-05, | |
| "loss": 0.4632, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24961577355861664, | |
| "step": 490, | |
| "valid_targets_mean": 2378.9, | |
| "valid_targets_min": 494 | |
| }, | |
| { | |
| "epoch": 1.5865384615384617, | |
| "grad_norm": 0.6003720701146319, | |
| "learning_rate": 3.4547472831460976e-05, | |
| "loss": 0.4662, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2577670216560364, | |
| "step": 495, | |
| "valid_targets_mean": 1839.1, | |
| "valid_targets_min": 463 | |
| }, | |
| { | |
| "epoch": 1.6025641025641026, | |
| "grad_norm": 0.556322591943896, | |
| "learning_rate": 3.439301204065077e-05, | |
| "loss": 0.468, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20165708661079407, | |
| "step": 500, | |
| "valid_targets_mean": 1302.6, | |
| "valid_targets_min": 365 | |
| }, | |
| { | |
| "epoch": 1.6185897435897436, | |
| "grad_norm": 0.4863359326950338, | |
| "learning_rate": 3.423674967338681e-05, | |
| "loss": 0.496, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24409791827201843, | |
| "step": 505, | |
| "valid_targets_mean": 2180.1, | |
| "valid_targets_min": 564 | |
| }, | |
| { | |
| "epoch": 1.6346153846153846, | |
| "grad_norm": 0.5132971252924721, | |
| "learning_rate": 3.407870528906366e-05, | |
| "loss": 0.4911, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2089085876941681, | |
| "step": 510, | |
| "valid_targets_mean": 1815.9, | |
| "valid_targets_min": 376 | |
| }, | |
| { | |
| "epoch": 1.6506410256410255, | |
| "grad_norm": 0.44374872733889337, | |
| "learning_rate": 3.391889867013134e-05, | |
| "loss": 0.4728, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25746238231658936, | |
| "step": 515, | |
| "valid_targets_mean": 3330.2, | |
| "valid_targets_min": 535 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 0.41541671583956447, | |
| "learning_rate": 3.375734981961918e-05, | |
| "loss": 0.4579, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19313707947731018, | |
| "step": 520, | |
| "valid_targets_mean": 2933.1, | |
| "valid_targets_min": 641 | |
| }, | |
| { | |
| "epoch": 1.6826923076923077, | |
| "grad_norm": 0.5025527033239986, | |
| "learning_rate": 3.359407895863199e-05, | |
| "loss": 0.4777, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2544252574443817, | |
| "step": 525, | |
| "valid_targets_mean": 2657.9, | |
| "valid_targets_min": 634 | |
| }, | |
| { | |
| "epoch": 1.6987179487179487, | |
| "grad_norm": 0.5010762849646863, | |
| "learning_rate": 3.342910652381902e-05, | |
| "loss": 0.4582, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.253082811832428, | |
| "step": 530, | |
| "valid_targets_mean": 2349.6, | |
| "valid_targets_min": 444 | |
| }, | |
| { | |
| "epoch": 1.7147435897435899, | |
| "grad_norm": 0.5050510280029894, | |
| "learning_rate": 3.326245316481591e-05, | |
| "loss": 0.4707, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2477722316980362, | |
| "step": 535, | |
| "valid_targets_mean": 1996.2, | |
| "valid_targets_min": 477 | |
| }, | |
| { | |
| "epoch": 1.7307692307692308, | |
| "grad_norm": 0.5765193223693661, | |
| "learning_rate": 3.30941397416599e-05, | |
| "loss": 0.4785, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23156100511550903, | |
| "step": 540, | |
| "valid_targets_mean": 1675.4, | |
| "valid_targets_min": 481 | |
| }, | |
| { | |
| "epoch": 1.7467948717948718, | |
| "grad_norm": 0.4560642161468493, | |
| "learning_rate": 3.2924187322178865e-05, | |
| "loss": 0.4794, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23049210011959076, | |
| "step": 545, | |
| "valid_targets_mean": 2340.4, | |
| "valid_targets_min": 392 | |
| }, | |
| { | |
| "epoch": 1.7628205128205128, | |
| "grad_norm": 0.4922377750065325, | |
| "learning_rate": 3.275261717935417e-05, | |
| "loss": 0.4764, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24752816557884216, | |
| "step": 550, | |
| "valid_targets_mean": 2367.2, | |
| "valid_targets_min": 476 | |
| }, | |
| { | |
| "epoch": 1.7788461538461537, | |
| "grad_norm": 0.4426230788977972, | |
| "learning_rate": 3.2579450788657997e-05, | |
| "loss": 0.4677, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21346285939216614, | |
| "step": 555, | |
| "valid_targets_mean": 2292.7, | |
| "valid_targets_min": 432 | |
| }, | |
| { | |
| "epoch": 1.7948717948717947, | |
| "grad_norm": 0.624429526299482, | |
| "learning_rate": 3.2404709825365204e-05, | |
| "loss": 0.4717, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22740063071250916, | |
| "step": 560, | |
| "valid_targets_mean": 1412.7, | |
| "valid_targets_min": 517 | |
| }, | |
| { | |
| "epoch": 1.810897435897436, | |
| "grad_norm": 0.555277859130487, | |
| "learning_rate": 3.222841616184025e-05, | |
| "loss": 0.4877, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32376670837402344, | |
| "step": 565, | |
| "valid_targets_mean": 2239.8, | |
| "valid_targets_min": 373 | |
| }, | |
| { | |
| "epoch": 1.8269230769230769, | |
| "grad_norm": 0.6868507053050372, | |
| "learning_rate": 3.2050591864799406e-05, | |
| "loss": 0.454, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2284863293170929, | |
| "step": 570, | |
| "valid_targets_mean": 2318.9, | |
| "valid_targets_min": 465 | |
| }, | |
| { | |
| "epoch": 1.842948717948718, | |
| "grad_norm": 0.5636785309744979, | |
| "learning_rate": 3.187125919254869e-05, | |
| "loss": 0.5021, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20378993451595306, | |
| "step": 575, | |
| "valid_targets_mean": 1406.4, | |
| "valid_targets_min": 382 | |
| }, | |
| { | |
| "epoch": 1.858974358974359, | |
| "grad_norm": 0.5138362567576862, | |
| "learning_rate": 3.169044059219778e-05, | |
| "loss": 0.472, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24885600805282593, | |
| "step": 580, | |
| "valid_targets_mean": 2108.1, | |
| "valid_targets_min": 297 | |
| }, | |
| { | |
| "epoch": 1.875, | |
| "grad_norm": 0.4788276150250492, | |
| "learning_rate": 3.1508158696850275e-05, | |
| "loss": 0.4439, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2000913918018341, | |
| "step": 585, | |
| "valid_targets_mean": 1723.1, | |
| "valid_targets_min": 532 | |
| }, | |
| { | |
| "epoch": 1.891025641025641, | |
| "grad_norm": 0.5199878797975349, | |
| "learning_rate": 3.132443632277075e-05, | |
| "loss": 0.4341, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1967633068561554, | |
| "step": 590, | |
| "valid_targets_mean": 1640.8, | |
| "valid_targets_min": 336 | |
| }, | |
| { | |
| "epoch": 1.907051282051282, | |
| "grad_norm": 0.5120839295430387, | |
| "learning_rate": 3.113929646652879e-05, | |
| "loss": 0.4752, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25105994939804077, | |
| "step": 595, | |
| "valid_targets_mean": 2039.7, | |
| "valid_targets_min": 647 | |
| }, | |
| { | |
| "epoch": 1.9230769230769231, | |
| "grad_norm": 0.4639112950762158, | |
| "learning_rate": 3.095276230212056e-05, | |
| "loss": 0.4696, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2098376452922821, | |
| "step": 600, | |
| "valid_targets_mean": 2136.6, | |
| "valid_targets_min": 495 | |
| }, | |
| { | |
| "epoch": 1.939102564102564, | |
| "grad_norm": 0.5166631970577467, | |
| "learning_rate": 3.076485717806808e-05, | |
| "loss": 0.4703, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18611370027065277, | |
| "step": 605, | |
| "valid_targets_mean": 1350.6, | |
| "valid_targets_min": 575 | |
| }, | |
| { | |
| "epoch": 1.9551282051282053, | |
| "grad_norm": 0.4745285139732179, | |
| "learning_rate": 3.057560461449665e-05, | |
| "loss": 0.457, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.252684623003006, | |
| "step": 610, | |
| "valid_targets_mean": 2834.0, | |
| "valid_targets_min": 460 | |
| }, | |
| { | |
| "epoch": 1.9711538461538463, | |
| "grad_norm": 0.48419329402472827, | |
| "learning_rate": 3.038502830019092e-05, | |
| "loss": 0.4735, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25076764822006226, | |
| "step": 615, | |
| "valid_targets_mean": 2311.8, | |
| "valid_targets_min": 535 | |
| }, | |
| { | |
| "epoch": 1.9871794871794872, | |
| "grad_norm": 0.4856357542202215, | |
| "learning_rate": 3.019315208962968e-05, | |
| "loss": 0.4564, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22748295962810516, | |
| "step": 620, | |
| "valid_targets_mean": 1729.2, | |
| "valid_targets_min": 404 | |
| }, | |
| { | |
| "epoch": 2.003205128205128, | |
| "grad_norm": 0.5415603488319026, | |
| "learning_rate": 3.0000000000000004e-05, | |
| "loss": 0.4663, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25027239322662354, | |
| "step": 625, | |
| "valid_targets_mean": 2599.8, | |
| "valid_targets_min": 446 | |
| }, | |
| { | |
| "epoch": 2.019230769230769, | |
| "grad_norm": 0.5616447815364394, | |
| "learning_rate": 2.9805596208191056e-05, | |
| "loss": 0.4626, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31486913561820984, | |
| "step": 630, | |
| "valid_targets_mean": 2342.2, | |
| "valid_targets_min": 481 | |
| }, | |
| { | |
| "epoch": 2.03525641025641, | |
| "grad_norm": 0.4616279441242081, | |
| "learning_rate": 2.960996504776783e-05, | |
| "loss": 0.4494, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2557230293750763, | |
| "step": 635, | |
| "valid_targets_mean": 3251.1, | |
| "valid_targets_min": 374 | |
| }, | |
| { | |
| "epoch": 2.051282051282051, | |
| "grad_norm": 0.5503041087281749, | |
| "learning_rate": 2.9413131005925296e-05, | |
| "loss": 0.4178, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18771244585514069, | |
| "step": 640, | |
| "valid_targets_mean": 1542.1, | |
| "valid_targets_min": 405 | |
| }, | |
| { | |
| "epoch": 2.0673076923076925, | |
| "grad_norm": 0.5145185224875077, | |
| "learning_rate": 2.9215118720423375e-05, | |
| "loss": 0.4356, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20002877712249756, | |
| "step": 645, | |
| "valid_targets_mean": 2503.4, | |
| "valid_targets_min": 679 | |
| }, | |
| { | |
| "epoch": 2.0833333333333335, | |
| "grad_norm": 0.5259222795664407, | |
| "learning_rate": 2.9015952976502994e-05, | |
| "loss": 0.4819, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22057735919952393, | |
| "step": 650, | |
| "valid_targets_mean": 2547.9, | |
| "valid_targets_min": 329 | |
| }, | |
| { | |
| "epoch": 2.0993589743589745, | |
| "grad_norm": 0.49973317567273595, | |
| "learning_rate": 2.8815658703783715e-05, | |
| "loss": 0.4179, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20730602741241455, | |
| "step": 655, | |
| "valid_targets_mean": 2237.2, | |
| "valid_targets_min": 603 | |
| }, | |
| { | |
| "epoch": 2.1153846153846154, | |
| "grad_norm": 0.468034186388587, | |
| "learning_rate": 2.8614260973143318e-05, | |
| "loss": 0.4579, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2634885311126709, | |
| "step": 660, | |
| "valid_targets_mean": 2994.0, | |
| "valid_targets_min": 613 | |
| }, | |
| { | |
| "epoch": 2.1314102564102564, | |
| "grad_norm": 0.5188728303293624, | |
| "learning_rate": 2.8411784993579633e-05, | |
| "loss": 0.4465, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32055070996284485, | |
| "step": 665, | |
| "valid_targets_mean": 2799.9, | |
| "valid_targets_min": 464 | |
| }, | |
| { | |
| "epoch": 2.1474358974358974, | |
| "grad_norm": 0.5727755387928921, | |
| "learning_rate": 2.820825610905514e-05, | |
| "loss": 0.4517, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21268504858016968, | |
| "step": 670, | |
| "valid_targets_mean": 1740.4, | |
| "valid_targets_min": 376 | |
| }, | |
| { | |
| "epoch": 2.1634615384615383, | |
| "grad_norm": 0.5102342820268265, | |
| "learning_rate": 2.8003699795324674e-05, | |
| "loss": 0.4343, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1873595267534256, | |
| "step": 675, | |
| "valid_targets_mean": 1921.6, | |
| "valid_targets_min": 393 | |
| }, | |
| { | |
| "epoch": 2.1794871794871793, | |
| "grad_norm": 0.5212794879302061, | |
| "learning_rate": 2.7798141656746606e-05, | |
| "loss": 0.447, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2737618684768677, | |
| "step": 680, | |
| "valid_targets_mean": 2370.6, | |
| "valid_targets_min": 346 | |
| }, | |
| { | |
| "epoch": 2.1955128205128207, | |
| "grad_norm": 0.4882810001947318, | |
| "learning_rate": 2.7591607423077932e-05, | |
| "loss": 0.481, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13172554969787598, | |
| "step": 685, | |
| "valid_targets_mean": 1362.9, | |
| "valid_targets_min": 382 | |
| }, | |
| { | |
| "epoch": 2.2115384615384617, | |
| "grad_norm": 0.6207677882628246, | |
| "learning_rate": 2.738412294625369e-05, | |
| "loss": 0.4491, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23348423838615417, | |
| "step": 690, | |
| "valid_targets_mean": 2125.4, | |
| "valid_targets_min": 544 | |
| }, | |
| { | |
| "epoch": 2.2275641025641026, | |
| "grad_norm": 0.5134803204787327, | |
| "learning_rate": 2.717571419715107e-05, | |
| "loss": 0.4205, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21478433907032013, | |
| "step": 695, | |
| "valid_targets_mean": 1724.8, | |
| "valid_targets_min": 372 | |
| }, | |
| { | |
| "epoch": 2.2435897435897436, | |
| "grad_norm": 0.49066453024135814, | |
| "learning_rate": 2.69664072623386e-05, | |
| "loss": 0.4644, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23817721009254456, | |
| "step": 700, | |
| "valid_targets_mean": 2259.7, | |
| "valid_targets_min": 408 | |
| }, | |
| { | |
| "epoch": 2.2596153846153846, | |
| "grad_norm": 0.5896960527239582, | |
| "learning_rate": 2.6756228340810946e-05, | |
| "loss": 0.4499, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2211766242980957, | |
| "step": 705, | |
| "valid_targets_mean": 1644.1, | |
| "valid_targets_min": 303 | |
| }, | |
| { | |
| "epoch": 2.2756410256410255, | |
| "grad_norm": 0.47096626497110644, | |
| "learning_rate": 2.6545203740709502e-05, | |
| "loss": 0.444, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24633574485778809, | |
| "step": 710, | |
| "valid_targets_mean": 2245.6, | |
| "valid_targets_min": 496 | |
| }, | |
| { | |
| "epoch": 2.2916666666666665, | |
| "grad_norm": 0.4898735632976753, | |
| "learning_rate": 2.6333359876029455e-05, | |
| "loss": 0.4469, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2112981081008911, | |
| "step": 715, | |
| "valid_targets_mean": 1969.1, | |
| "valid_targets_min": 414 | |
| }, | |
| { | |
| "epoch": 2.3076923076923075, | |
| "grad_norm": 0.501391834341433, | |
| "learning_rate": 2.612072326331351e-05, | |
| "loss": 0.4178, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2247483730316162, | |
| "step": 720, | |
| "valid_targets_mean": 2423.8, | |
| "valid_targets_min": 393 | |
| }, | |
| { | |
| "epoch": 2.323717948717949, | |
| "grad_norm": 0.4217215747925792, | |
| "learning_rate": 2.5907320518332827e-05, | |
| "loss": 0.4313, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19624283909797668, | |
| "step": 725, | |
| "valid_targets_mean": 2825.5, | |
| "valid_targets_min": 544 | |
| }, | |
| { | |
| "epoch": 2.33974358974359, | |
| "grad_norm": 0.49601599026984694, | |
| "learning_rate": 2.5693178352755497e-05, | |
| "loss": 0.4074, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.202076256275177, | |
| "step": 730, | |
| "valid_targets_mean": 2533.4, | |
| "valid_targets_min": 617 | |
| }, | |
| { | |
| "epoch": 2.355769230769231, | |
| "grad_norm": 0.5353940844579022, | |
| "learning_rate": 2.547832357080305e-05, | |
| "loss": 0.4227, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2339232861995697, | |
| "step": 735, | |
| "valid_targets_mean": 1994.9, | |
| "valid_targets_min": 524 | |
| }, | |
| { | |
| "epoch": 2.371794871794872, | |
| "grad_norm": 0.5722824667248534, | |
| "learning_rate": 2.5262783065895377e-05, | |
| "loss": 0.4452, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2249128520488739, | |
| "step": 740, | |
| "valid_targets_mean": 1781.4, | |
| "valid_targets_min": 326 | |
| }, | |
| { | |
| "epoch": 2.3878205128205128, | |
| "grad_norm": 0.5007302300195212, | |
| "learning_rate": 2.5046583817284437e-05, | |
| "loss": 0.4542, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17838139832019806, | |
| "step": 745, | |
| "valid_targets_mean": 1831.7, | |
| "valid_targets_min": 347 | |
| }, | |
| { | |
| "epoch": 2.4038461538461537, | |
| "grad_norm": 0.4472396981588018, | |
| "learning_rate": 2.48297528866773e-05, | |
| "loss": 0.4087, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1966041922569275, | |
| "step": 750, | |
| "valid_targets_mean": 2101.4, | |
| "valid_targets_min": 507 | |
| }, | |
| { | |
| "epoch": 2.4198717948717947, | |
| "grad_norm": 0.5475486438741652, | |
| "learning_rate": 2.4612317414848804e-05, | |
| "loss": 0.4367, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2544914484024048, | |
| "step": 755, | |
| "valid_targets_mean": 2436.7, | |
| "valid_targets_min": 548 | |
| }, | |
| { | |
| "epoch": 2.435897435897436, | |
| "grad_norm": 0.47978909370549155, | |
| "learning_rate": 2.4394304618244346e-05, | |
| "loss": 0.4467, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2271379828453064, | |
| "step": 760, | |
| "valid_targets_mean": 2447.1, | |
| "valid_targets_min": 399 | |
| }, | |
| { | |
| "epoch": 2.451923076923077, | |
| "grad_norm": 0.6270651047991183, | |
| "learning_rate": 2.4175741785573177e-05, | |
| "loss": 0.4528, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2507140636444092, | |
| "step": 765, | |
| "valid_targets_mean": 1775.8, | |
| "valid_targets_min": 287 | |
| }, | |
| { | |
| "epoch": 2.467948717948718, | |
| "grad_norm": 0.3929435528764122, | |
| "learning_rate": 2.39566562743927e-05, | |
| "loss": 0.4189, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19187305867671967, | |
| "step": 770, | |
| "valid_targets_mean": 3374.7, | |
| "valid_targets_min": 597 | |
| }, | |
| { | |
| "epoch": 2.483974358974359, | |
| "grad_norm": 0.527375808149042, | |
| "learning_rate": 2.3737075507684103e-05, | |
| "loss": 0.4494, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22173798084259033, | |
| "step": 775, | |
| "valid_targets_mean": 1856.5, | |
| "valid_targets_min": 414 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 0.5997723972038063, | |
| "learning_rate": 2.3517026970419786e-05, | |
| "loss": 0.4365, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24415044486522675, | |
| "step": 780, | |
| "valid_targets_mean": 1901.2, | |
| "valid_targets_min": 779 | |
| }, | |
| { | |
| "epoch": 2.516025641025641, | |
| "grad_norm": 0.48377772022532683, | |
| "learning_rate": 2.3296538206123134e-05, | |
| "loss": 0.4155, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21287669241428375, | |
| "step": 785, | |
| "valid_targets_mean": 2185.6, | |
| "valid_targets_min": 399 | |
| }, | |
| { | |
| "epoch": 2.532051282051282, | |
| "grad_norm": 0.5468759021698991, | |
| "learning_rate": 2.307563681342081e-05, | |
| "loss": 0.4634, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2165958732366562, | |
| "step": 790, | |
| "valid_targets_mean": 2127.9, | |
| "valid_targets_min": 592 | |
| }, | |
| { | |
| "epoch": 2.5480769230769234, | |
| "grad_norm": 0.47406985778934707, | |
| "learning_rate": 2.285435044258829e-05, | |
| "loss": 0.4235, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20566615462303162, | |
| "step": 795, | |
| "valid_targets_mean": 2176.4, | |
| "valid_targets_min": 353 | |
| }, | |
| { | |
| "epoch": 2.564102564102564, | |
| "grad_norm": 0.5531693361265784, | |
| "learning_rate": 2.263270679208883e-05, | |
| "loss": 0.4375, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20676007866859436, | |
| "step": 800, | |
| "valid_targets_mean": 1450.8, | |
| "valid_targets_min": 343 | |
| }, | |
| { | |
| "epoch": 2.5801282051282053, | |
| "grad_norm": 0.5471762527126571, | |
| "learning_rate": 2.2410733605106462e-05, | |
| "loss": 0.4427, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2094150185585022, | |
| "step": 805, | |
| "valid_targets_mean": 1916.5, | |
| "valid_targets_min": 657 | |
| }, | |
| { | |
| "epoch": 2.5961538461538463, | |
| "grad_norm": 0.457621920491644, | |
| "learning_rate": 2.2188458666073382e-05, | |
| "loss": 0.4447, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21128109097480774, | |
| "step": 810, | |
| "valid_targets_mean": 2713.3, | |
| "valid_targets_min": 336 | |
| }, | |
| { | |
| "epoch": 2.6121794871794872, | |
| "grad_norm": 0.5354268147970072, | |
| "learning_rate": 2.1965909797192143e-05, | |
| "loss": 0.4372, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18181543052196503, | |
| "step": 815, | |
| "valid_targets_mean": 1601.1, | |
| "valid_targets_min": 422 | |
| }, | |
| { | |
| "epoch": 2.628205128205128, | |
| "grad_norm": 0.5148256202815014, | |
| "learning_rate": 2.174311485495317e-05, | |
| "loss": 0.4303, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20850974321365356, | |
| "step": 820, | |
| "valid_targets_mean": 2054.1, | |
| "valid_targets_min": 358 | |
| }, | |
| { | |
| "epoch": 2.644230769230769, | |
| "grad_norm": 0.5117934823979483, | |
| "learning_rate": 2.1520101726647922e-05, | |
| "loss": 0.4482, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24301588535308838, | |
| "step": 825, | |
| "valid_targets_mean": 2009.7, | |
| "valid_targets_min": 548 | |
| }, | |
| { | |
| "epoch": 2.66025641025641, | |
| "grad_norm": 0.5932725780271455, | |
| "learning_rate": 2.1296898326878282e-05, | |
| "loss": 0.4368, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20315149426460266, | |
| "step": 830, | |
| "valid_targets_mean": 1422.9, | |
| "valid_targets_min": 382 | |
| }, | |
| { | |
| "epoch": 2.676282051282051, | |
| "grad_norm": 0.5511240954228953, | |
| "learning_rate": 2.1073532594062432e-05, | |
| "loss": 0.4287, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20835387706756592, | |
| "step": 835, | |
| "valid_targets_mean": 1824.1, | |
| "valid_targets_min": 406 | |
| }, | |
| { | |
| "epoch": 2.6923076923076925, | |
| "grad_norm": 0.48108009842119964, | |
| "learning_rate": 2.0850032486937838e-05, | |
| "loss": 0.413, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23764106631278992, | |
| "step": 840, | |
| "valid_targets_mean": 2332.3, | |
| "valid_targets_min": 311 | |
| }, | |
| { | |
| "epoch": 2.7083333333333335, | |
| "grad_norm": 0.45578124236246004, | |
| "learning_rate": 2.0626425981061608e-05, | |
| "loss": 0.4221, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16820326447486877, | |
| "step": 845, | |
| "valid_targets_mean": 1958.2, | |
| "valid_targets_min": 427 | |
| }, | |
| { | |
| "epoch": 2.7243589743589745, | |
| "grad_norm": 0.527649928455185, | |
| "learning_rate": 2.0402741065308808e-05, | |
| "loss": 0.4325, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2456822693347931, | |
| "step": 850, | |
| "valid_targets_mean": 1982.1, | |
| "valid_targets_min": 447 | |
| }, | |
| { | |
| "epoch": 2.7403846153846154, | |
| "grad_norm": 0.545838162469139, | |
| "learning_rate": 2.0179005738369098e-05, | |
| "loss": 0.4479, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19371215999126434, | |
| "step": 855, | |
| "valid_targets_mean": 1953.6, | |
| "valid_targets_min": 404 | |
| }, | |
| { | |
| "epoch": 2.7564102564102564, | |
| "grad_norm": 0.5137794607606895, | |
| "learning_rate": 1.995524800524211e-05, | |
| "loss": 0.4463, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17609572410583496, | |
| "step": 860, | |
| "valid_targets_mean": 1501.8, | |
| "valid_targets_min": 397 | |
| }, | |
| { | |
| "epoch": 2.7724358974358974, | |
| "grad_norm": 0.5022662147440222, | |
| "learning_rate": 1.9731495873732055e-05, | |
| "loss": 0.4475, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26271939277648926, | |
| "step": 865, | |
| "valid_targets_mean": 2227.0, | |
| "valid_targets_min": 380 | |
| }, | |
| { | |
| "epoch": 2.7884615384615383, | |
| "grad_norm": 0.44907950593239604, | |
| "learning_rate": 1.9507777350941996e-05, | |
| "loss": 0.417, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20136427879333496, | |
| "step": 870, | |
| "valid_targets_mean": 3125.4, | |
| "valid_targets_min": 477 | |
| }, | |
| { | |
| "epoch": 2.8044871794871797, | |
| "grad_norm": 0.48568732874510356, | |
| "learning_rate": 1.9284120439768192e-05, | |
| "loss": 0.4371, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1716795265674591, | |
| "step": 875, | |
| "valid_targets_mean": 1739.9, | |
| "valid_targets_min": 556 | |
| }, | |
| { | |
| "epoch": 2.8205128205128203, | |
| "grad_norm": 0.560681495754532, | |
| "learning_rate": 1.9060553135394957e-05, | |
| "loss": 0.4403, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25348377227783203, | |
| "step": 880, | |
| "valid_targets_mean": 2066.1, | |
| "valid_targets_min": 481 | |
| }, | |
| { | |
| "epoch": 2.8365384615384617, | |
| "grad_norm": 0.5208153414018862, | |
| "learning_rate": 1.8837103421790486e-05, | |
| "loss": 0.4099, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18841373920440674, | |
| "step": 885, | |
| "valid_targets_mean": 2157.9, | |
| "valid_targets_min": 500 | |
| }, | |
| { | |
| "epoch": 2.8525641025641026, | |
| "grad_norm": 0.532992574302052, | |
| "learning_rate": 1.861379926820414e-05, | |
| "loss": 0.4324, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2288801670074463, | |
| "step": 890, | |
| "valid_targets_mean": 1894.5, | |
| "valid_targets_min": 394 | |
| }, | |
| { | |
| "epoch": 2.8685897435897436, | |
| "grad_norm": 0.49754798570296427, | |
| "learning_rate": 1.8390668625665483e-05, | |
| "loss": 0.4173, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25023743510246277, | |
| "step": 895, | |
| "valid_targets_mean": 2521.0, | |
| "valid_targets_min": 615 | |
| }, | |
| { | |
| "epoch": 2.8846153846153846, | |
| "grad_norm": 0.5090535372637991, | |
| "learning_rate": 1.8167739423485668e-05, | |
| "loss": 0.4547, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20940105617046356, | |
| "step": 900, | |
| "valid_targets_mean": 2438.8, | |
| "valid_targets_min": 464 | |
| }, | |
| { | |
| "epoch": 2.9006410256410255, | |
| "grad_norm": 0.4245436673848715, | |
| "learning_rate": 1.794503956576152e-05, | |
| "loss": 0.4422, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21880042552947998, | |
| "step": 905, | |
| "valid_targets_mean": 3292.9, | |
| "valid_targets_min": 483 | |
| }, | |
| { | |
| "epoch": 2.9166666666666665, | |
| "grad_norm": 0.512498398614729, | |
| "learning_rate": 1.7722596927882758e-05, | |
| "loss": 0.456, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1915343850851059, | |
| "step": 910, | |
| "valid_targets_mean": 2097.7, | |
| "valid_targets_min": 423 | |
| }, | |
| { | |
| "epoch": 2.9326923076923075, | |
| "grad_norm": 0.5002179520743993, | |
| "learning_rate": 1.7500439353042834e-05, | |
| "loss": 0.428, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19558550417423248, | |
| "step": 915, | |
| "valid_targets_mean": 1932.4, | |
| "valid_targets_min": 352 | |
| }, | |
| { | |
| "epoch": 2.948717948717949, | |
| "grad_norm": 0.5271342144848477, | |
| "learning_rate": 1.727859464875381e-05, | |
| "loss": 0.4672, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23308855295181274, | |
| "step": 920, | |
| "valid_targets_mean": 2075.1, | |
| "valid_targets_min": 458 | |
| }, | |
| { | |
| "epoch": 2.96474358974359, | |
| "grad_norm": 0.9339459055765652, | |
| "learning_rate": 1.7057090583365678e-05, | |
| "loss": 0.4491, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20995834469795227, | |
| "step": 925, | |
| "valid_targets_mean": 1836.2, | |
| "valid_targets_min": 369 | |
| }, | |
| { | |
| "epoch": 2.980769230769231, | |
| "grad_norm": 0.4858744121036798, | |
| "learning_rate": 1.6835954882590567e-05, | |
| "loss": 0.4596, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21504682302474976, | |
| "step": 930, | |
| "valid_targets_mean": 2529.9, | |
| "valid_targets_min": 390 | |
| }, | |
| { | |
| "epoch": 2.996794871794872, | |
| "grad_norm": 0.5223177985055565, | |
| "learning_rate": 1.6615215226032332e-05, | |
| "loss": 0.3969, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2861439883708954, | |
| "step": 935, | |
| "valid_targets_mean": 2334.1, | |
| "valid_targets_min": 556 | |
| }, | |
| { | |
| "epoch": 3.0128205128205128, | |
| "grad_norm": 0.5081953568971541, | |
| "learning_rate": 1.6394899243721887e-05, | |
| "loss": 0.4444, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18385818600654602, | |
| "step": 940, | |
| "valid_targets_mean": 1744.4, | |
| "valid_targets_min": 456 | |
| }, | |
| { | |
| "epoch": 3.0288461538461537, | |
| "grad_norm": 0.41196760326087184, | |
| "learning_rate": 1.6175034512658753e-05, | |
| "loss": 0.41, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1653943955898285, | |
| "step": 945, | |
| "valid_targets_mean": 2961.9, | |
| "valid_targets_min": 464 | |
| }, | |
| { | |
| "epoch": 3.0448717948717947, | |
| "grad_norm": 0.46276766410726833, | |
| "learning_rate": 1.5955648553359247e-05, | |
| "loss": 0.4061, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1766689419746399, | |
| "step": 950, | |
| "valid_targets_mean": 2314.1, | |
| "valid_targets_min": 519 | |
| }, | |
| { | |
| "epoch": 3.0608974358974357, | |
| "grad_norm": 0.56466145662721, | |
| "learning_rate": 1.5736768826411683e-05, | |
| "loss": 0.3975, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17233410477638245, | |
| "step": 955, | |
| "valid_targets_mean": 1736.1, | |
| "valid_targets_min": 368 | |
| }, | |
| { | |
| "epoch": 3.076923076923077, | |
| "grad_norm": 0.6184183203209048, | |
| "learning_rate": 1.5518422729039188e-05, | |
| "loss": 0.4366, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2544904947280884, | |
| "step": 960, | |
| "valid_targets_mean": 1785.6, | |
| "valid_targets_min": 453 | |
| }, | |
| { | |
| "epoch": 3.092948717948718, | |
| "grad_norm": 0.46138818996251113, | |
| "learning_rate": 1.5300637591670357e-05, | |
| "loss": 0.3877, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2141857147216797, | |
| "step": 965, | |
| "valid_targets_mean": 3189.5, | |
| "valid_targets_min": 635 | |
| }, | |
| { | |
| "epoch": 3.108974358974359, | |
| "grad_norm": 0.5024028000700136, | |
| "learning_rate": 1.5083440674518302e-05, | |
| "loss": 0.4141, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20946665108203888, | |
| "step": 970, | |
| "valid_targets_mean": 2281.5, | |
| "valid_targets_min": 477 | |
| }, | |
| { | |
| "epoch": 3.125, | |
| "grad_norm": 0.6617512350367286, | |
| "learning_rate": 1.4866859164168466e-05, | |
| "loss": 0.4462, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27546098828315735, | |
| "step": 975, | |
| "valid_targets_mean": 1994.2, | |
| "valid_targets_min": 291 | |
| }, | |
| { | |
| "epoch": 3.141025641025641, | |
| "grad_norm": 0.5778578694843474, | |
| "learning_rate": 1.4650920170175704e-05, | |
| "loss": 0.431, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19753442704677582, | |
| "step": 980, | |
| "valid_targets_mean": 1745.9, | |
| "valid_targets_min": 326 | |
| }, | |
| { | |
| "epoch": 3.157051282051282, | |
| "grad_norm": 0.4876046302233207, | |
| "learning_rate": 1.443565072167095e-05, | |
| "loss": 0.4193, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1774245798587799, | |
| "step": 985, | |
| "valid_targets_mean": 1624.2, | |
| "valid_targets_min": 572 | |
| }, | |
| { | |
| "epoch": 3.173076923076923, | |
| "grad_norm": 0.5738704159928016, | |
| "learning_rate": 1.4221077763977984e-05, | |
| "loss": 0.4254, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2392444908618927, | |
| "step": 990, | |
| "valid_targets_mean": 1980.4, | |
| "valid_targets_min": 784 | |
| }, | |
| { | |
| "epoch": 3.189102564102564, | |
| "grad_norm": 0.4850487138995122, | |
| "learning_rate": 1.4007228155240696e-05, | |
| "loss": 0.4209, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2174842357635498, | |
| "step": 995, | |
| "valid_targets_mean": 2492.1, | |
| "valid_targets_min": 459 | |
| }, | |
| { | |
| "epoch": 3.2051282051282053, | |
| "grad_norm": 0.529762108647159, | |
| "learning_rate": 1.37941286630612e-05, | |
| "loss": 0.4147, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22387659549713135, | |
| "step": 1000, | |
| "valid_targets_mean": 1882.1, | |
| "valid_targets_min": 355 | |
| }, | |
| { | |
| "epoch": 3.2211538461538463, | |
| "grad_norm": 0.4709252470917212, | |
| "learning_rate": 1.3581805961149371e-05, | |
| "loss": 0.4275, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17616716027259827, | |
| "step": 1005, | |
| "valid_targets_mean": 2260.7, | |
| "valid_targets_min": 481 | |
| }, | |
| { | |
| "epoch": 3.2371794871794872, | |
| "grad_norm": 0.4920991562633389, | |
| "learning_rate": 1.3370286625984089e-05, | |
| "loss": 0.4291, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1874414086341858, | |
| "step": 1010, | |
| "valid_targets_mean": 1999.1, | |
| "valid_targets_min": 389 | |
| }, | |
| { | |
| "epoch": 3.253205128205128, | |
| "grad_norm": 0.4366015680605223, | |
| "learning_rate": 1.3159597133486628e-05, | |
| "loss": 0.4132, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1775241196155548, | |
| "step": 1015, | |
| "valid_targets_mean": 2691.2, | |
| "valid_targets_min": 492 | |
| }, | |
| { | |
| "epoch": 3.269230769230769, | |
| "grad_norm": 0.5159579418833546, | |
| "learning_rate": 1.2949763855706678e-05, | |
| "loss": 0.3984, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20066994428634644, | |
| "step": 1020, | |
| "valid_targets_mean": 2245.2, | |
| "valid_targets_min": 414 | |
| }, | |
| { | |
| "epoch": 3.28525641025641, | |
| "grad_norm": 0.5876671407131282, | |
| "learning_rate": 1.274081305752135e-05, | |
| "loss": 0.4516, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1973981410264969, | |
| "step": 1025, | |
| "valid_targets_mean": 1791.3, | |
| "valid_targets_min": 398 | |
| }, | |
| { | |
| "epoch": 3.301282051282051, | |
| "grad_norm": 0.6456536801741808, | |
| "learning_rate": 1.2532770893347582e-05, | |
| "loss": 0.427, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3110905885696411, | |
| "step": 1030, | |
| "valid_targets_mean": 1891.8, | |
| "valid_targets_min": 324 | |
| }, | |
| { | |
| "epoch": 3.3173076923076925, | |
| "grad_norm": 0.5293106844776568, | |
| "learning_rate": 1.2325663403868406e-05, | |
| "loss": 0.4164, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.230901837348938, | |
| "step": 1035, | |
| "valid_targets_mean": 2292.3, | |
| "valid_targets_min": 355 | |
| }, | |
| { | |
| "epoch": 3.3333333333333335, | |
| "grad_norm": 0.5522757760846092, | |
| "learning_rate": 1.2119516512773424e-05, | |
| "loss": 0.3895, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24217258393764496, | |
| "step": 1040, | |
| "valid_targets_mean": 2009.1, | |
| "valid_targets_min": 408 | |
| }, | |
| { | |
| "epoch": 3.3493589743589745, | |
| "grad_norm": 0.5560321667957103, | |
| "learning_rate": 1.1914356023513904e-05, | |
| "loss": 0.4459, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23398807644844055, | |
| "step": 1045, | |
| "valid_targets_mean": 2263.5, | |
| "valid_targets_min": 539 | |
| }, | |
| { | |
| "epoch": 3.3653846153846154, | |
| "grad_norm": 0.4825967281763318, | |
| "learning_rate": 1.1710207616073001e-05, | |
| "loss": 0.3927, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21472863852977753, | |
| "step": 1050, | |
| "valid_targets_mean": 2439.7, | |
| "valid_targets_min": 286 | |
| }, | |
| { | |
| "epoch": 3.3814102564102564, | |
| "grad_norm": 0.5322770350466977, | |
| "learning_rate": 1.1507096843751372e-05, | |
| "loss": 0.4218, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17266938090324402, | |
| "step": 1055, | |
| "valid_targets_mean": 1598.1, | |
| "valid_targets_min": 387 | |
| }, | |
| { | |
| "epoch": 3.3974358974358974, | |
| "grad_norm": 0.7827125593311665, | |
| "learning_rate": 1.1305049129968637e-05, | |
| "loss": 0.4349, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1991385519504547, | |
| "step": 1060, | |
| "valid_targets_mean": 2439.3, | |
| "valid_targets_min": 608 | |
| }, | |
| { | |
| "epoch": 3.4134615384615383, | |
| "grad_norm": 0.44528916459778456, | |
| "learning_rate": 1.110408976508118e-05, | |
| "loss": 0.3907, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1545448899269104, | |
| "step": 1065, | |
| "valid_targets_mean": 2074.4, | |
| "valid_targets_min": 396 | |
| }, | |
| { | |
| "epoch": 3.4294871794871793, | |
| "grad_norm": 0.48384890001970277, | |
| "learning_rate": 1.090424390321648e-05, | |
| "loss": 0.4352, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18344886600971222, | |
| "step": 1070, | |
| "valid_targets_mean": 2136.0, | |
| "valid_targets_min": 406 | |
| }, | |
| { | |
| "epoch": 3.4455128205128207, | |
| "grad_norm": 0.46732111222809075, | |
| "learning_rate": 1.070553655912463e-05, | |
| "loss": 0.3894, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13502533733844757, | |
| "step": 1075, | |
| "valid_targets_mean": 1907.0, | |
| "valid_targets_min": 400 | |
| }, | |
| { | |
| "epoch": 3.4615384615384617, | |
| "grad_norm": 0.46066416755356465, | |
| "learning_rate": 1.0507992605047193e-05, | |
| "loss": 0.4055, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1755571961402893, | |
| "step": 1080, | |
| "valid_targets_mean": 2637.7, | |
| "valid_targets_min": 457 | |
| }, | |
| { | |
| "epoch": 3.4775641025641026, | |
| "grad_norm": 0.3719838719731155, | |
| "learning_rate": 1.0311636767603952e-05, | |
| "loss": 0.3946, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14000138640403748, | |
| "step": 1085, | |
| "valid_targets_mean": 2861.4, | |
| "valid_targets_min": 495 | |
| }, | |
| { | |
| "epoch": 3.4935897435897436, | |
| "grad_norm": 0.5828556028812071, | |
| "learning_rate": 1.0116493624697862e-05, | |
| "loss": 0.4203, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3252260684967041, | |
| "step": 1090, | |
| "valid_targets_mean": 2756.8, | |
| "valid_targets_min": 533 | |
| }, | |
| { | |
| "epoch": 3.5096153846153846, | |
| "grad_norm": 0.43638255215532396, | |
| "learning_rate": 9.922587602438657e-06, | |
| "loss": 0.3941, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18718764185905457, | |
| "step": 1095, | |
| "valid_targets_mean": 2623.4, | |
| "valid_targets_min": 420 | |
| }, | |
| { | |
| "epoch": 3.5256410256410255, | |
| "grad_norm": 0.5594326378178172, | |
| "learning_rate": 9.729942972085401e-06, | |
| "loss": 0.4182, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20950523018836975, | |
| "step": 1100, | |
| "valid_targets_mean": 1888.2, | |
| "valid_targets_min": 398 | |
| }, | |
| { | |
| "epoch": 3.5416666666666665, | |
| "grad_norm": 0.6153082072669694, | |
| "learning_rate": 9.538583847008452e-06, | |
| "loss": 0.4508, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24810105562210083, | |
| "step": 1105, | |
| "valid_targets_mean": 1652.6, | |
| "valid_targets_min": 487 | |
| }, | |
| { | |
| "epoch": 3.5576923076923075, | |
| "grad_norm": 0.7347949249771237, | |
| "learning_rate": 9.348534179671202e-06, | |
| "loss": 0.4446, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1828898787498474, | |
| "step": 1110, | |
| "valid_targets_mean": 1291.9, | |
| "valid_targets_min": 384 | |
| }, | |
| { | |
| "epoch": 3.573717948717949, | |
| "grad_norm": 0.4885273908189287, | |
| "learning_rate": 9.159817758631923e-06, | |
| "loss": 0.4148, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21009644865989685, | |
| "step": 1115, | |
| "valid_targets_mean": 2135.0, | |
| "valid_targets_min": 345 | |
| }, | |
| { | |
| "epoch": 3.58974358974359, | |
| "grad_norm": 0.4973124736202378, | |
| "learning_rate": 8.972458205566168e-06, | |
| "loss": 0.4288, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15964004397392273, | |
| "step": 1120, | |
| "valid_targets_mean": 1893.4, | |
| "valid_targets_min": 460 | |
| }, | |
| { | |
| "epoch": 3.605769230769231, | |
| "grad_norm": 0.5441599943681932, | |
| "learning_rate": 8.786478972310023e-06, | |
| "loss": 0.4375, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2124439775943756, | |
| "step": 1125, | |
| "valid_targets_mean": 2145.6, | |
| "valid_targets_min": 336 | |
| }, | |
| { | |
| "epoch": 3.621794871794872, | |
| "grad_norm": 0.5226367162201916, | |
| "learning_rate": 8.601903337924646e-06, | |
| "loss": 0.3906, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19349414110183716, | |
| "step": 1130, | |
| "valid_targets_mean": 2012.8, | |
| "valid_targets_min": 509 | |
| }, | |
| { | |
| "epoch": 3.6378205128205128, | |
| "grad_norm": 0.5145848515018033, | |
| "learning_rate": 8.418754405782423e-06, | |
| "loss": 0.4056, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2273569405078888, | |
| "step": 1135, | |
| "valid_targets_mean": 2077.1, | |
| "valid_targets_min": 426 | |
| }, | |
| { | |
| "epoch": 3.6538461538461537, | |
| "grad_norm": 0.5175398660560526, | |
| "learning_rate": 8.237055100675092e-06, | |
| "loss": 0.4016, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22114655375480652, | |
| "step": 1140, | |
| "valid_targets_mean": 2420.2, | |
| "valid_targets_min": 386 | |
| }, | |
| { | |
| "epoch": 3.6698717948717947, | |
| "grad_norm": 0.6277817059082492, | |
| "learning_rate": 8.056828165944282e-06, | |
| "loss": 0.4462, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26035845279693604, | |
| "step": 1145, | |
| "valid_targets_mean": 1739.6, | |
| "valid_targets_min": 324 | |
| }, | |
| { | |
| "epoch": 3.685897435897436, | |
| "grad_norm": 0.5854299128793944, | |
| "learning_rate": 7.878096160634675e-06, | |
| "loss": 0.4342, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22393161058425903, | |
| "step": 1150, | |
| "valid_targets_mean": 1604.9, | |
| "valid_targets_min": 517 | |
| }, | |
| { | |
| "epoch": 3.7019230769230766, | |
| "grad_norm": 0.6521482846929779, | |
| "learning_rate": 7.700881456670342e-06, | |
| "loss": 0.4199, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21592406928539276, | |
| "step": 1155, | |
| "valid_targets_mean": 1704.6, | |
| "valid_targets_min": 372 | |
| }, | |
| { | |
| "epoch": 3.717948717948718, | |
| "grad_norm": 0.5205545018221379, | |
| "learning_rate": 7.525206236054385e-06, | |
| "loss": 0.4159, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19648584723472595, | |
| "step": 1160, | |
| "valid_targets_mean": 1921.7, | |
| "valid_targets_min": 393 | |
| }, | |
| { | |
| "epoch": 3.733974358974359, | |
| "grad_norm": 0.5195989305298412, | |
| "learning_rate": 7.3510924880924575e-06, | |
| "loss": 0.4233, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17861494421958923, | |
| "step": 1165, | |
| "valid_targets_mean": 1717.1, | |
| "valid_targets_min": 479 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "grad_norm": 0.6033763727537506, | |
| "learning_rate": 7.178562006640337e-06, | |
| "loss": 0.4242, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20282906293869019, | |
| "step": 1170, | |
| "valid_targets_mean": 1704.9, | |
| "valid_targets_min": 409 | |
| }, | |
| { | |
| "epoch": 3.766025641025641, | |
| "grad_norm": 0.8520137624165722, | |
| "learning_rate": 7.0076363873759865e-06, | |
| "loss": 0.3928, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21843525767326355, | |
| "step": 1175, | |
| "valid_targets_mean": 2615.6, | |
| "valid_targets_min": 500 | |
| }, | |
| { | |
| "epoch": 3.782051282051282, | |
| "grad_norm": 0.5354434243394129, | |
| "learning_rate": 6.838337025096424e-06, | |
| "loss": 0.3922, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2174426019191742, | |
| "step": 1180, | |
| "valid_targets_mean": 2017.8, | |
| "valid_targets_min": 482 | |
| }, | |
| { | |
| "epoch": 3.7980769230769234, | |
| "grad_norm": 0.5105220829406444, | |
| "learning_rate": 6.67068511103971e-06, | |
| "loss": 0.4288, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18573346734046936, | |
| "step": 1185, | |
| "valid_targets_mean": 2131.7, | |
| "valid_targets_min": 499 | |
| }, | |
| { | |
| "epoch": 3.814102564102564, | |
| "grad_norm": 0.46857007935338224, | |
| "learning_rate": 6.504701630232475e-06, | |
| "loss": 0.4107, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17006590962409973, | |
| "step": 1190, | |
| "valid_targets_mean": 1914.7, | |
| "valid_targets_min": 476 | |
| }, | |
| { | |
| "epoch": 3.8301282051282053, | |
| "grad_norm": 0.5407396992987427, | |
| "learning_rate": 6.340407358863167e-06, | |
| "loss": 0.3835, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16990578174591064, | |
| "step": 1195, | |
| "valid_targets_mean": 1394.6, | |
| "valid_targets_min": 363 | |
| }, | |
| { | |
| "epoch": 3.8461538461538463, | |
| "grad_norm": 0.47292976742071363, | |
| "learning_rate": 6.177822861681557e-06, | |
| "loss": 0.4242, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1876613199710846, | |
| "step": 1200, | |
| "valid_targets_mean": 2349.9, | |
| "valid_targets_min": 393 | |
| }, | |
| { | |
| "epoch": 3.8621794871794872, | |
| "grad_norm": 0.4841137938946458, | |
| "learning_rate": 6.016968489424572e-06, | |
| "loss": 0.4346, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14386187493801117, | |
| "step": 1205, | |
| "valid_targets_mean": 1697.8, | |
| "valid_targets_min": 220 | |
| }, | |
| { | |
| "epoch": 3.878205128205128, | |
| "grad_norm": 0.5708595597284355, | |
| "learning_rate": 5.857864376269051e-06, | |
| "loss": 0.4374, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23375602066516876, | |
| "step": 1210, | |
| "valid_targets_mean": 1680.6, | |
| "valid_targets_min": 381 | |
| }, | |
| { | |
| "epoch": 3.894230769230769, | |
| "grad_norm": 0.5084621641793788, | |
| "learning_rate": 5.700530437311509e-06, | |
| "loss": 0.4229, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18736353516578674, | |
| "step": 1215, | |
| "valid_targets_mean": 1901.4, | |
| "valid_targets_min": 381 | |
| }, | |
| { | |
| "epoch": 3.91025641025641, | |
| "grad_norm": 0.5301004661796407, | |
| "learning_rate": 5.544986366075371e-06, | |
| "loss": 0.4391, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1670445203781128, | |
| "step": 1220, | |
| "valid_targets_mean": 1648.1, | |
| "valid_targets_min": 401 | |
| }, | |
| { | |
| "epoch": 3.926282051282051, | |
| "grad_norm": 0.6134172232948031, | |
| "learning_rate": 5.39125163204594e-06, | |
| "loss": 0.4133, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21446377038955688, | |
| "step": 1225, | |
| "valid_targets_mean": 1942.4, | |
| "valid_targets_min": 483 | |
| }, | |
| { | |
| "epoch": 3.9423076923076925, | |
| "grad_norm": 0.5974202428194099, | |
| "learning_rate": 5.239345478233364e-06, | |
| "loss": 0.4258, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17354455590248108, | |
| "step": 1230, | |
| "valid_targets_mean": 1358.0, | |
| "valid_targets_min": 369 | |
| }, | |
| { | |
| "epoch": 3.9583333333333335, | |
| "grad_norm": 0.4483178567987285, | |
| "learning_rate": 5.089286918764031e-06, | |
| "loss": 0.3961, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19412055611610413, | |
| "step": 1235, | |
| "valid_targets_mean": 2878.3, | |
| "valid_targets_min": 803 | |
| }, | |
| { | |
| "epoch": 3.9743589743589745, | |
| "grad_norm": 0.4507178360947217, | |
| "learning_rate": 4.941094736500522e-06, | |
| "loss": 0.4006, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16912227869033813, | |
| "step": 1240, | |
| "valid_targets_mean": 2105.1, | |
| "valid_targets_min": 357 | |
| }, | |
| { | |
| "epoch": 3.9903846153846154, | |
| "grad_norm": 0.5411117764458047, | |
| "learning_rate": 4.794787480690597e-06, | |
| "loss": 0.4147, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18049368262290955, | |
| "step": 1245, | |
| "valid_targets_mean": 1542.9, | |
| "valid_targets_min": 366 | |
| }, | |
| { | |
| "epoch": 4.006410256410256, | |
| "grad_norm": 0.5020314686782774, | |
| "learning_rate": 4.650383464645338e-06, | |
| "loss": 0.4072, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17812328040599823, | |
| "step": 1250, | |
| "valid_targets_mean": 2127.0, | |
| "valid_targets_min": 558 | |
| }, | |
| { | |
| "epoch": 4.022435897435898, | |
| "grad_norm": 0.8897516856466173, | |
| "learning_rate": 4.507900763446911e-06, | |
| "loss": 0.4323, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17299619317054749, | |
| "step": 1255, | |
| "valid_targets_mean": 1772.4, | |
| "valid_targets_min": 401 | |
| }, | |
| { | |
| "epoch": 4.038461538461538, | |
| "grad_norm": 0.5155047635305318, | |
| "learning_rate": 4.367357211686072e-06, | |
| "loss": 0.4405, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2016928344964981, | |
| "step": 1260, | |
| "valid_targets_mean": 1984.0, | |
| "valid_targets_min": 314 | |
| }, | |
| { | |
| "epoch": 4.05448717948718, | |
| "grad_norm": 0.5575252186312881, | |
| "learning_rate": 4.228770401229824e-06, | |
| "loss": 0.4146, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1600431501865387, | |
| "step": 1265, | |
| "valid_targets_mean": 1631.5, | |
| "valid_targets_min": 422 | |
| }, | |
| { | |
| "epoch": 4.07051282051282, | |
| "grad_norm": 0.4739201323516146, | |
| "learning_rate": 4.092157679019442e-06, | |
| "loss": 0.4126, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2213498055934906, | |
| "step": 1270, | |
| "valid_targets_mean": 2722.4, | |
| "valid_targets_min": 596 | |
| }, | |
| { | |
| "epoch": 4.086538461538462, | |
| "grad_norm": 0.5093029435546993, | |
| "learning_rate": 3.957536144899123e-06, | |
| "loss": 0.4053, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16781838238239288, | |
| "step": 1275, | |
| "valid_targets_mean": 2022.2, | |
| "valid_targets_min": 454 | |
| }, | |
| { | |
| "epoch": 4.102564102564102, | |
| "grad_norm": 0.46573698307127626, | |
| "learning_rate": 3.8249226494756445e-06, | |
| "loss": 0.3686, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1653728187084198, | |
| "step": 1280, | |
| "valid_targets_mean": 2144.4, | |
| "valid_targets_min": 370 | |
| }, | |
| { | |
| "epoch": 4.118589743589744, | |
| "grad_norm": 0.49547650926816716, | |
| "learning_rate": 3.694333792009115e-06, | |
| "loss": 0.4204, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16737449169158936, | |
| "step": 1285, | |
| "valid_targets_mean": 2180.3, | |
| "valid_targets_min": 390 | |
| }, | |
| { | |
| "epoch": 4.134615384615385, | |
| "grad_norm": 0.4959235045064883, | |
| "learning_rate": 3.565785918335292e-06, | |
| "loss": 0.4121, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17612406611442566, | |
| "step": 1290, | |
| "valid_targets_mean": 2229.8, | |
| "valid_targets_min": 415 | |
| }, | |
| { | |
| "epoch": 4.1506410256410255, | |
| "grad_norm": 0.5497305685489215, | |
| "learning_rate": 3.43929511881953e-06, | |
| "loss": 0.4143, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21826569736003876, | |
| "step": 1295, | |
| "valid_targets_mean": 2172.8, | |
| "valid_targets_min": 378 | |
| }, | |
| { | |
| "epoch": 4.166666666666667, | |
| "grad_norm": 0.49016495730669646, | |
| "learning_rate": 3.3148772263427743e-06, | |
| "loss": 0.3962, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2120068520307541, | |
| "step": 1300, | |
| "valid_targets_mean": 2636.4, | |
| "valid_targets_min": 502 | |
| }, | |
| { | |
| "epoch": 4.1826923076923075, | |
| "grad_norm": 0.5410817941449617, | |
| "learning_rate": 3.1925478143197418e-06, | |
| "loss": 0.4026, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1488579511642456, | |
| "step": 1305, | |
| "valid_targets_mean": 1622.9, | |
| "valid_targets_min": 408 | |
| }, | |
| { | |
| "epoch": 4.198717948717949, | |
| "grad_norm": 0.5167531330373021, | |
| "learning_rate": 3.0723221947495907e-06, | |
| "loss": 0.4189, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16238239407539368, | |
| "step": 1310, | |
| "valid_targets_mean": 2115.7, | |
| "valid_targets_min": 470 | |
| }, | |
| { | |
| "epoch": 4.214743589743589, | |
| "grad_norm": 0.5065500271116625, | |
| "learning_rate": 2.954215416299331e-06, | |
| "loss": 0.4129, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18277542293071747, | |
| "step": 1315, | |
| "valid_targets_mean": 2185.4, | |
| "valid_targets_min": 358 | |
| }, | |
| { | |
| "epoch": 4.230769230769231, | |
| "grad_norm": 0.5004798136718417, | |
| "learning_rate": 2.838242262420148e-06, | |
| "loss": 0.4176, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22836771607398987, | |
| "step": 1320, | |
| "valid_targets_mean": 2597.8, | |
| "valid_targets_min": 331 | |
| }, | |
| { | |
| "epoch": 4.246794871794872, | |
| "grad_norm": 0.5184618845461352, | |
| "learning_rate": 2.7244172494969978e-06, | |
| "loss": 0.4176, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18959830701351166, | |
| "step": 1325, | |
| "valid_targets_mean": 2182.0, | |
| "valid_targets_min": 523 | |
| }, | |
| { | |
| "epoch": 4.262820512820513, | |
| "grad_norm": 0.4544775548224198, | |
| "learning_rate": 2.6127546250315438e-06, | |
| "loss": 0.3865, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16856324672698975, | |
| "step": 1330, | |
| "valid_targets_mean": 2052.1, | |
| "valid_targets_min": 363 | |
| }, | |
| { | |
| "epoch": 4.278846153846154, | |
| "grad_norm": 0.6718829730259701, | |
| "learning_rate": 2.503268365858831e-06, | |
| "loss": 0.4097, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2597959339618683, | |
| "step": 1335, | |
| "valid_targets_mean": 1643.8, | |
| "valid_targets_min": 521 | |
| }, | |
| { | |
| "epoch": 4.294871794871795, | |
| "grad_norm": 0.6447551224819277, | |
| "learning_rate": 2.3959721763977805e-06, | |
| "loss": 0.431, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22334489226341248, | |
| "step": 1340, | |
| "valid_targets_mean": 1838.8, | |
| "valid_targets_min": 447 | |
| }, | |
| { | |
| "epoch": 4.310897435897436, | |
| "grad_norm": 0.47106359697416067, | |
| "learning_rate": 2.2908794869358044e-06, | |
| "loss": 0.4107, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1753031313419342, | |
| "step": 1345, | |
| "valid_targets_mean": 2324.8, | |
| "valid_targets_min": 382 | |
| }, | |
| { | |
| "epoch": 4.326923076923077, | |
| "grad_norm": 0.4797964740997395, | |
| "learning_rate": 2.188003451947747e-06, | |
| "loss": 0.3904, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13902968168258667, | |
| "step": 1350, | |
| "valid_targets_mean": 1571.4, | |
| "valid_targets_min": 365 | |
| }, | |
| { | |
| "epoch": 4.342948717948718, | |
| "grad_norm": 0.4974988695652104, | |
| "learning_rate": 2.0873569484493305e-06, | |
| "loss": 0.3924, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20952798426151276, | |
| "step": 1355, | |
| "valid_targets_mean": 2615.0, | |
| "valid_targets_min": 499 | |
| }, | |
| { | |
| "epoch": 4.358974358974359, | |
| "grad_norm": 0.5841534150892278, | |
| "learning_rate": 1.9889525743853323e-06, | |
| "loss": 0.3984, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21221312880516052, | |
| "step": 1360, | |
| "valid_targets_mean": 1916.5, | |
| "valid_targets_min": 199 | |
| }, | |
| { | |
| "epoch": 4.375, | |
| "grad_norm": 0.5200681440214403, | |
| "learning_rate": 1.8928026470526917e-06, | |
| "loss": 0.4282, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21217067539691925, | |
| "step": 1365, | |
| "valid_targets_mean": 1990.6, | |
| "valid_targets_min": 398 | |
| }, | |
| { | |
| "epoch": 4.391025641025641, | |
| "grad_norm": 0.5225656986310022, | |
| "learning_rate": 1.7989192015587776e-06, | |
| "loss": 0.4046, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21198615431785583, | |
| "step": 1370, | |
| "valid_targets_mean": 2139.8, | |
| "valid_targets_min": 495 | |
| }, | |
| { | |
| "epoch": 4.407051282051282, | |
| "grad_norm": 0.5718894420905976, | |
| "learning_rate": 1.7073139893149092e-06, | |
| "loss": 0.4265, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2715427577495575, | |
| "step": 1375, | |
| "valid_targets_mean": 2201.8, | |
| "valid_targets_min": 620 | |
| }, | |
| { | |
| "epoch": 4.423076923076923, | |
| "grad_norm": 0.4832349361730212, | |
| "learning_rate": 1.6179984765654743e-06, | |
| "loss": 0.3997, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18295009434223175, | |
| "step": 1380, | |
| "valid_targets_mean": 2338.6, | |
| "valid_targets_min": 333 | |
| }, | |
| { | |
| "epoch": 4.439102564102564, | |
| "grad_norm": 0.5009262761860173, | |
| "learning_rate": 1.5309838429526714e-06, | |
| "loss": 0.4229, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19573353230953217, | |
| "step": 1385, | |
| "valid_targets_mean": 2377.6, | |
| "valid_targets_min": 359 | |
| }, | |
| { | |
| "epoch": 4.455128205128205, | |
| "grad_norm": 0.4626576145917675, | |
| "learning_rate": 1.4462809801171428e-06, | |
| "loss": 0.4182, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16738732159137726, | |
| "step": 1390, | |
| "valid_targets_mean": 2413.1, | |
| "valid_targets_min": 632 | |
| }, | |
| { | |
| "epoch": 4.471153846153846, | |
| "grad_norm": 0.5619638360015893, | |
| "learning_rate": 1.3639004903346954e-06, | |
| "loss": 0.4131, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17360472679138184, | |
| "step": 1395, | |
| "valid_targets_mean": 1610.5, | |
| "valid_targets_min": 454 | |
| }, | |
| { | |
| "epoch": 4.487179487179487, | |
| "grad_norm": 0.5089087979706497, | |
| "learning_rate": 1.2838526851891864e-06, | |
| "loss": 0.4082, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23789802193641663, | |
| "step": 1400, | |
| "valid_targets_mean": 2443.3, | |
| "valid_targets_min": 541 | |
| }, | |
| { | |
| "epoch": 4.503205128205128, | |
| "grad_norm": 0.5942691756087387, | |
| "learning_rate": 1.2061475842818337e-06, | |
| "loss": 0.4333, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21138739585876465, | |
| "step": 1405, | |
| "valid_targets_mean": 1548.1, | |
| "valid_targets_min": 474 | |
| }, | |
| { | |
| "epoch": 4.519230769230769, | |
| "grad_norm": 0.5181839449829251, | |
| "learning_rate": 1.1307949139770446e-06, | |
| "loss": 0.3959, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18168583512306213, | |
| "step": 1410, | |
| "valid_targets_mean": 1700.7, | |
| "valid_targets_min": 316 | |
| }, | |
| { | |
| "epoch": 4.535256410256411, | |
| "grad_norm": 0.5691599181706188, | |
| "learning_rate": 1.057804106184992e-06, | |
| "loss": 0.4124, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20509378612041473, | |
| "step": 1415, | |
| "valid_targets_mean": 2220.6, | |
| "valid_targets_min": 390 | |
| }, | |
| { | |
| "epoch": 4.551282051282051, | |
| "grad_norm": 0.5362388152801542, | |
| "learning_rate": 9.871842971809853e-07, | |
| "loss": 0.3719, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18971163034439087, | |
| "step": 1420, | |
| "valid_targets_mean": 2453.0, | |
| "valid_targets_min": 438 | |
| }, | |
| { | |
| "epoch": 4.5673076923076925, | |
| "grad_norm": 0.4217117551642382, | |
| "learning_rate": 9.189443264619102e-07, | |
| "loss": 0.3898, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19224503636360168, | |
| "step": 1425, | |
| "valid_targets_mean": 3036.1, | |
| "valid_targets_min": 491 | |
| }, | |
| { | |
| "epoch": 4.583333333333333, | |
| "grad_norm": 0.50137069774833, | |
| "learning_rate": 8.530927356397778e-07, | |
| "loss": 0.4182, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17977871000766754, | |
| "step": 1430, | |
| "valid_targets_mean": 1959.3, | |
| "valid_targets_min": 462 | |
| }, | |
| { | |
| "epoch": 4.5993589743589745, | |
| "grad_norm": 0.5147630870730896, | |
| "learning_rate": 7.896377673725553e-07, | |
| "loss": 0.4009, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.190871000289917, | |
| "step": 1435, | |
| "valid_targets_mean": 1864.3, | |
| "valid_targets_min": 436 | |
| }, | |
| { | |
| "epoch": 4.615384615384615, | |
| "grad_norm": 0.48363223511421033, | |
| "learning_rate": 7.285873643324514e-07, | |
| "loss": 0.4017, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21705955266952515, | |
| "step": 1440, | |
| "valid_targets_mean": 2462.3, | |
| "valid_targets_min": 621 | |
| }, | |
| { | |
| "epoch": 4.631410256410256, | |
| "grad_norm": 0.547275826405447, | |
| "learning_rate": 6.69949168211721e-07, | |
| "loss": 0.395, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2083073854446411, | |
| "step": 1445, | |
| "valid_targets_mean": 1937.9, | |
| "valid_targets_min": 472 | |
| }, | |
| { | |
| "epoch": 4.647435897435898, | |
| "grad_norm": 0.5515440239397343, | |
| "learning_rate": 6.137305187661513e-07, | |
| "loss": 0.4123, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19437691569328308, | |
| "step": 1450, | |
| "valid_targets_mean": 1709.7, | |
| "valid_targets_min": 496 | |
| }, | |
| { | |
| "epoch": 4.663461538461538, | |
| "grad_norm": 0.5541400713883889, | |
| "learning_rate": 5.599384528963425e-07, | |
| "loss": 0.4069, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17162460088729858, | |
| "step": 1455, | |
| "valid_targets_mean": 1923.9, | |
| "valid_targets_min": 338 | |
| }, | |
| { | |
| "epoch": 4.67948717948718, | |
| "grad_norm": 0.46459204192014386, | |
| "learning_rate": 5.085797037669072e-07, | |
| "loss": 0.3773, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19353806972503662, | |
| "step": 1460, | |
| "valid_targets_mean": 2262.0, | |
| "valid_targets_min": 368 | |
| }, | |
| { | |
| "epoch": 4.69551282051282, | |
| "grad_norm": 0.5043482862484802, | |
| "learning_rate": 4.5966069996365993e-07, | |
| "loss": 0.3826, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18038004636764526, | |
| "step": 1465, | |
| "valid_targets_mean": 2006.8, | |
| "valid_targets_min": 401 | |
| }, | |
| { | |
| "epoch": 4.711538461538462, | |
| "grad_norm": 0.47468830861646916, | |
| "learning_rate": 4.1318756468897047e-07, | |
| "loss": 0.4266, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21844631433486938, | |
| "step": 1470, | |
| "valid_targets_mean": 2434.2, | |
| "valid_targets_min": 499 | |
| }, | |
| { | |
| "epoch": 4.727564102564102, | |
| "grad_norm": 0.5712016674577056, | |
| "learning_rate": 3.691661149953096e-07, | |
| "loss": 0.4055, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2230275571346283, | |
| "step": 1475, | |
| "valid_targets_mean": 2119.4, | |
| "valid_targets_min": 454 | |
| }, | |
| { | |
| "epoch": 4.743589743589744, | |
| "grad_norm": 0.5286171152229552, | |
| "learning_rate": 3.2760186105712964e-07, | |
| "loss": 0.3816, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19953539967536926, | |
| "step": 1480, | |
| "valid_targets_mean": 2190.5, | |
| "valid_targets_min": 390 | |
| }, | |
| { | |
| "epoch": 4.759615384615385, | |
| "grad_norm": 0.5355340610321968, | |
| "learning_rate": 2.8850000548115155e-07, | |
| "loss": 0.4263, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24080535769462585, | |
| "step": 1485, | |
| "valid_targets_mean": 1879.4, | |
| "valid_targets_min": 488 | |
| }, | |
| { | |
| "epoch": 4.7756410256410255, | |
| "grad_norm": 0.5704120262786105, | |
| "learning_rate": 2.518654426551592e-07, | |
| "loss": 0.4061, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2327512800693512, | |
| "step": 1490, | |
| "valid_targets_mean": 1950.7, | |
| "valid_targets_min": 424 | |
| }, | |
| { | |
| "epoch": 4.791666666666667, | |
| "grad_norm": 0.5892101716728328, | |
| "learning_rate": 2.1770275813536746e-07, | |
| "loss": 0.3985, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19074735045433044, | |
| "step": 1495, | |
| "valid_targets_mean": 1708.5, | |
| "valid_targets_min": 530 | |
| }, | |
| { | |
| "epoch": 4.8076923076923075, | |
| "grad_norm": 0.5211300578683025, | |
| "learning_rate": 1.8601622807244312e-07, | |
| "loss": 0.4217, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21352173388004303, | |
| "step": 1500, | |
| "valid_targets_mean": 2224.9, | |
| "valid_targets_min": 466 | |
| }, | |
| { | |
| "epoch": 4.823717948717949, | |
| "grad_norm": 0.5950739817226455, | |
| "learning_rate": 1.5680981867625566e-07, | |
| "loss": 0.4065, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21843212842941284, | |
| "step": 1505, | |
| "valid_targets_mean": 1907.6, | |
| "valid_targets_min": 477 | |
| }, | |
| { | |
| "epoch": 4.839743589743589, | |
| "grad_norm": 0.4982907259604603, | |
| "learning_rate": 1.3008718571943636e-07, | |
| "loss": 0.4145, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2082642912864685, | |
| "step": 1510, | |
| "valid_targets_mean": 2288.1, | |
| "valid_targets_min": 494 | |
| }, | |
| { | |
| "epoch": 4.855769230769231, | |
| "grad_norm": 0.7118715744452784, | |
| "learning_rate": 1.058516740797777e-07, | |
| "loss": 0.3914, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20714955031871796, | |
| "step": 1515, | |
| "valid_targets_mean": 1673.6, | |
| "valid_targets_min": 453 | |
| }, | |
| { | |
| "epoch": 4.871794871794872, | |
| "grad_norm": 0.636093382876181, | |
| "learning_rate": 8.410631732155062e-08, | |
| "loss": 0.4275, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24439842998981476, | |
| "step": 1520, | |
| "valid_targets_mean": 1889.8, | |
| "valid_targets_min": 369 | |
| }, | |
| { | |
| "epoch": 4.887820512820513, | |
| "grad_norm": 0.4919592475036989, | |
| "learning_rate": 6.485383731580142e-08, | |
| "loss": 0.3889, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14140692353248596, | |
| "step": 1525, | |
| "valid_targets_mean": 1333.6, | |
| "valid_targets_min": 329 | |
| }, | |
| { | |
| "epoch": 4.903846153846154, | |
| "grad_norm": 0.46488274884652137, | |
| "learning_rate": 4.809664389964441e-08, | |
| "loss": 0.4254, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21400034427642822, | |
| "step": 1530, | |
| "valid_targets_mean": 2693.9, | |
| "valid_targets_min": 523 | |
| }, | |
| { | |
| "epoch": 4.919871794871795, | |
| "grad_norm": 0.5040958449228231, | |
| "learning_rate": 3.383683457463649e-08, | |
| "loss": 0.3597, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1598944067955017, | |
| "step": 1535, | |
| "valid_targets_mean": 1648.6, | |
| "valid_targets_min": 371 | |
| }, | |
| { | |
| "epoch": 4.935897435897436, | |
| "grad_norm": 0.525773314679964, | |
| "learning_rate": 2.207619424421381e-08, | |
| "loss": 0.4026, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20952820777893066, | |
| "step": 1540, | |
| "valid_targets_mean": 2557.2, | |
| "valid_targets_min": 543 | |
| }, | |
| { | |
| "epoch": 4.951923076923077, | |
| "grad_norm": 0.5186226211951818, | |
| "learning_rate": 1.281619499029274e-08, | |
| "loss": 0.4238, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23216500878334045, | |
| "step": 1545, | |
| "valid_targets_mean": 2368.4, | |
| "valid_targets_min": 554 | |
| }, | |
| { | |
| "epoch": 4.967948717948718, | |
| "grad_norm": 0.5881562216015847, | |
| "learning_rate": 6.057995888997248e-09, | |
| "loss": 0.384, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20564129948616028, | |
| "step": 1550, | |
| "valid_targets_mean": 1534.4, | |
| "valid_targets_min": 530 | |
| }, | |
| { | |
| "epoch": 4.983974358974359, | |
| "grad_norm": 0.5078947883853017, | |
| "learning_rate": 1.8024428655794012e-09, | |
| "loss": 0.3829, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21099376678466797, | |
| "step": 1555, | |
| "valid_targets_mean": 2262.2, | |
| "valid_targets_min": 342 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.5570674341743406, | |
| "learning_rate": 5.00685885418406e-11, | |
| "loss": 0.4035, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24144230782985687, | |
| "step": 1560, | |
| "valid_targets_mean": 2344.4, | |
| "valid_targets_min": 827 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24144230782985687, | |
| "step": 1560, | |
| "total_flos": 4.577212452957061e+17, | |
| "train_loss": 0.45931496092906365, | |
| "train_runtime": 11477.9832, | |
| "train_samples_per_second": 4.348, | |
| "train_steps_per_second": 0.136, | |
| "valid_targets_mean": 2344.4, | |
| "valid_targets_min": 827 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1560, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.577212452957061e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |