diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,13654 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 6187, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.005659309564233163, + "grad_norm": 9.914572457905718, + "learning_rate": 2.584814216478191e-07, + "loss": 0.5975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24669498205184937, + "step": 5, + "valid_targets_mean": 2722.6, + "valid_targets_min": 458 + }, + { + "epoch": 0.011318619128466326, + "grad_norm": 10.438922592985111, + "learning_rate": 5.815831987075929e-07, + "loss": 0.5129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2588742971420288, + "step": 10, + "valid_targets_mean": 3113.1, + "valid_targets_min": 2461 + }, + { + "epoch": 0.01697792869269949, + "grad_norm": 9.005813832655129, + "learning_rate": 9.046849757673668e-07, + "loss": 0.457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2645283341407776, + "step": 15, + "valid_targets_mean": 3648.1, + "valid_targets_min": 2068 + }, + { + "epoch": 0.022637238256932653, + "grad_norm": 8.708674112283445, + "learning_rate": 1.2277867528271405e-06, + "loss": 0.5059, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24403518438339233, + "step": 20, + "valid_targets_mean": 3423.6, + "valid_targets_min": 2425 + }, + { + "epoch": 0.028296547821165818, + "grad_norm": 6.904960870586402, + "learning_rate": 1.5508885298869145e-06, + "loss": 0.4493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21857085824012756, + "step": 25, + "valid_targets_mean": 2724.1, + "valid_targets_min": 1198 + }, + { + "epoch": 0.03395585738539898, + "grad_norm": 8.365998763309381, + "learning_rate": 1.8739903069466882e-06, + "loss": 0.5155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34842467308044434, + "step": 30, + "valid_targets_mean": 1232.5, + "valid_targets_min": 807 + }, + { + "epoch": 0.039615166949632144, + "grad_norm": 4.063830159552931, + "learning_rate": 2.197092084006462e-06, + "loss": 0.6663, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2383221983909607, + "step": 35, + "valid_targets_mean": 3487.4, + "valid_targets_min": 2351 + }, + { + "epoch": 0.045274476513865305, + "grad_norm": 3.7857119761047264, + "learning_rate": 2.5201938610662364e-06, + "loss": 0.3914, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28476348519325256, + "step": 40, + "valid_targets_mean": 3126.4, + "valid_targets_min": 781 + }, + { + "epoch": 0.050933786078098474, + "grad_norm": 2.4522464500436354, + "learning_rate": 2.84329563812601e-06, + "loss": 0.3957, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1743302047252655, + "step": 45, + "valid_targets_mean": 3474.8, + "valid_targets_min": 799 + }, + { + "epoch": 0.056593095642331635, + "grad_norm": 1.3790997307109338, + "learning_rate": 3.166397415185784e-06, + "loss": 0.3205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13955508172512054, + "step": 50, + "valid_targets_mean": 5026.1, + "valid_targets_min": 2802 + }, + { + "epoch": 0.0622524052065648, + "grad_norm": 1.1583679812963346, + "learning_rate": 3.489499192245558e-06, + "loss": 0.2673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1148873046040535, + "step": 55, + "valid_targets_mean": 2611.0, + "valid_targets_min": 574 + }, + { + "epoch": 0.06791171477079797, + "grad_norm": 0.9944469930765569, + "learning_rate": 3.812600969305332e-06, + "loss": 0.286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13937455415725708, + "step": 60, + "valid_targets_mean": 4036.5, + "valid_targets_min": 2642 + }, + { + "epoch": 0.07357102433503113, + "grad_norm": 1.4470532850206264, + "learning_rate": 4.1357027463651056e-06, + "loss": 0.2839, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18129996955394745, + "step": 65, + "valid_targets_mean": 1960.5, + "valid_targets_min": 593 + }, + { + "epoch": 0.07923033389926429, + "grad_norm": 0.7634408558045028, + "learning_rate": 4.458804523424879e-06, + "loss": 0.3062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10794656723737717, + "step": 70, + "valid_targets_mean": 3354.2, + "valid_targets_min": 1353 + }, + { + "epoch": 0.08488964346349745, + "grad_norm": 0.9349363140842072, + "learning_rate": 4.781906300484653e-06, + "loss": 0.2458, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14142100512981415, + "step": 75, + "valid_targets_mean": 2300.9, + "valid_targets_min": 818 + }, + { + "epoch": 0.09054895302773061, + "grad_norm": 0.6983888013920614, + "learning_rate": 5.105008077544427e-06, + "loss": 0.2412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08703906089067459, + "step": 80, + "valid_targets_mean": 2295.8, + "valid_targets_min": 934 + }, + { + "epoch": 0.09620826259196379, + "grad_norm": 0.6714658796752845, + "learning_rate": 5.4281098546042014e-06, + "loss": 0.2072, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10908772051334381, + "step": 85, + "valid_targets_mean": 3524.4, + "valid_targets_min": 2798 + }, + { + "epoch": 0.10186757215619695, + "grad_norm": 0.7099703827584309, + "learning_rate": 5.751211631663974e-06, + "loss": 0.2518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10576918721199036, + "step": 90, + "valid_targets_mean": 2810.1, + "valid_targets_min": 471 + }, + { + "epoch": 0.10752688172043011, + "grad_norm": 0.8464358873303437, + "learning_rate": 6.074313408723749e-06, + "loss": 0.2415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13659444451332092, + "step": 95, + "valid_targets_mean": 3042.8, + "valid_targets_min": 903 + }, + { + "epoch": 0.11318619128466327, + "grad_norm": 0.6075699914515762, + "learning_rate": 6.397415185783522e-06, + "loss": 0.2321, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08541509509086609, + "step": 100, + "valid_targets_mean": 3081.6, + "valid_targets_min": 474 + }, + { + "epoch": 0.11884550084889643, + "grad_norm": 0.6669739914330303, + "learning_rate": 6.7205169628432965e-06, + "loss": 0.2259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10760879516601562, + "step": 105, + "valid_targets_mean": 3468.0, + "valid_targets_min": 640 + }, + { + "epoch": 0.1245048104131296, + "grad_norm": 0.5208329931917438, + "learning_rate": 7.043618739903069e-06, + "loss": 0.264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07734555006027222, + "step": 110, + "valid_targets_mean": 3438.4, + "valid_targets_min": 902 + }, + { + "epoch": 0.13016411997736277, + "grad_norm": 0.5811453804314388, + "learning_rate": 7.366720516962844e-06, + "loss": 0.1893, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09379353374242783, + "step": 115, + "valid_targets_mean": 3212.8, + "valid_targets_min": 1837 + }, + { + "epoch": 0.13582342954159593, + "grad_norm": 1.364458940018079, + "learning_rate": 7.689822294022618e-06, + "loss": 0.2649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21079102158546448, + "step": 120, + "valid_targets_mean": 1358.8, + "valid_targets_min": 756 + }, + { + "epoch": 0.1414827391058291, + "grad_norm": 0.8256973772819219, + "learning_rate": 8.012924071082391e-06, + "loss": 0.236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16139847040176392, + "step": 125, + "valid_targets_mean": 2896.2, + "valid_targets_min": 1072 + }, + { + "epoch": 0.14714204867006225, + "grad_norm": 0.6225640827519211, + "learning_rate": 8.336025848142165e-06, + "loss": 0.1892, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05986971780657768, + "step": 130, + "valid_targets_mean": 2908.1, + "valid_targets_min": 895 + }, + { + "epoch": 0.15280135823429541, + "grad_norm": 1.1717941040574062, + "learning_rate": 8.659127625201939e-06, + "loss": 0.3577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2685242295265198, + "step": 135, + "valid_targets_mean": 2562.2, + "valid_targets_min": 878 + }, + { + "epoch": 0.15846066779852858, + "grad_norm": 0.6408657487445313, + "learning_rate": 8.982229402261713e-06, + "loss": 0.3165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07971537113189697, + "step": 140, + "valid_targets_mean": 2087.6, + "valid_targets_min": 756 + }, + { + "epoch": 0.16411997736276174, + "grad_norm": 0.5543030970031532, + "learning_rate": 9.305331179321486e-06, + "loss": 0.2047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.075836680829525, + "step": 145, + "valid_targets_mean": 3421.8, + "valid_targets_min": 2963 + }, + { + "epoch": 0.1697792869269949, + "grad_norm": 1.3463459246904992, + "learning_rate": 9.62843295638126e-06, + "loss": 0.2878, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3113308250904083, + "step": 150, + "valid_targets_mean": 2375.2, + "valid_targets_min": 1153 + }, + { + "epoch": 0.17543859649122806, + "grad_norm": 0.42318491917252643, + "learning_rate": 9.951534733441036e-06, + "loss": 0.202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07192079722881317, + "step": 155, + "valid_targets_mean": 4886.2, + "valid_targets_min": 1700 + }, + { + "epoch": 0.18109790605546122, + "grad_norm": 0.4595289279004153, + "learning_rate": 1.0274636510500808e-05, + "loss": 0.17, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.056570179760456085, + "step": 160, + "valid_targets_mean": 3965.1, + "valid_targets_min": 789 + }, + { + "epoch": 0.1867572156196944, + "grad_norm": 0.5163853394413724, + "learning_rate": 1.0597738287560582e-05, + "loss": 0.1851, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09697248041629791, + "step": 165, + "valid_targets_mean": 3771.5, + "valid_targets_min": 995 + }, + { + "epoch": 0.19241652518392757, + "grad_norm": 0.5311230674646791, + "learning_rate": 1.0920840064620357e-05, + "loss": 0.1701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05757971107959747, + "step": 170, + "valid_targets_mean": 2220.8, + "valid_targets_min": 488 + }, + { + "epoch": 0.19807583474816073, + "grad_norm": 0.5390819623332389, + "learning_rate": 1.124394184168013e-05, + "loss": 0.177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09788284450769424, + "step": 175, + "valid_targets_mean": 3635.4, + "valid_targets_min": 2437 + }, + { + "epoch": 0.2037351443123939, + "grad_norm": 0.7926312286793373, + "learning_rate": 1.1567043618739904e-05, + "loss": 0.1653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06946968287229538, + "step": 180, + "valid_targets_mean": 1752.1, + "valid_targets_min": 712 + }, + { + "epoch": 0.20939445387662706, + "grad_norm": 0.48293318766038856, + "learning_rate": 1.1890145395799677e-05, + "loss": 0.1643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08222252130508423, + "step": 185, + "valid_targets_mean": 3713.4, + "valid_targets_min": 2422 + }, + { + "epoch": 0.21505376344086022, + "grad_norm": 0.4971735351453549, + "learning_rate": 1.2213247172859452e-05, + "loss": 0.1466, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07657366245985031, + "step": 190, + "valid_targets_mean": 3998.5, + "valid_targets_min": 3643 + }, + { + "epoch": 0.22071307300509338, + "grad_norm": 0.4956226134209193, + "learning_rate": 1.2536348949919226e-05, + "loss": 0.2331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.058330804109573364, + "step": 195, + "valid_targets_mean": 3319.0, + "valid_targets_min": 772 + }, + { + "epoch": 0.22637238256932654, + "grad_norm": 0.5575941398181454, + "learning_rate": 1.2859450726979e-05, + "loss": 0.1389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.061472129076719284, + "step": 200, + "valid_targets_mean": 2934.8, + "valid_targets_min": 722 + }, + { + "epoch": 0.2320316921335597, + "grad_norm": 0.44793904268623025, + "learning_rate": 1.3182552504038773e-05, + "loss": 0.1505, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05814511701464653, + "step": 205, + "valid_targets_mean": 3656.4, + "valid_targets_min": 2788 + }, + { + "epoch": 0.23769100169779286, + "grad_norm": 0.550319153108588, + "learning_rate": 1.3505654281098549e-05, + "loss": 0.1518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08354094624519348, + "step": 210, + "valid_targets_mean": 3202.5, + "valid_targets_min": 948 + }, + { + "epoch": 0.24335031126202603, + "grad_norm": 0.9127899909892149, + "learning_rate": 1.382875605815832e-05, + "loss": 0.1774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08435185253620148, + "step": 215, + "valid_targets_mean": 1002.0, + "valid_targets_min": 490 + }, + { + "epoch": 0.2490096208262592, + "grad_norm": 0.4576999004835555, + "learning_rate": 1.4151857835218094e-05, + "loss": 0.1972, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08616667985916138, + "step": 220, + "valid_targets_mean": 3751.9, + "valid_targets_min": 2800 + }, + { + "epoch": 0.2546689303904924, + "grad_norm": 0.7328421759963798, + "learning_rate": 1.4474959612277868e-05, + "loss": 0.2081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14095443487167358, + "step": 225, + "valid_targets_mean": 2641.8, + "valid_targets_min": 896 + }, + { + "epoch": 0.26032823995472554, + "grad_norm": 0.6898350003645934, + "learning_rate": 1.4798061389337644e-05, + "loss": 0.177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10551305115222931, + "step": 230, + "valid_targets_mean": 3199.0, + "valid_targets_min": 1333 + }, + { + "epoch": 0.2659875495189587, + "grad_norm": 0.6579151997851256, + "learning_rate": 1.5121163166397417e-05, + "loss": 0.199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09268459677696228, + "step": 235, + "valid_targets_mean": 2548.1, + "valid_targets_min": 693 + }, + { + "epoch": 0.27164685908319186, + "grad_norm": 0.6879982282446822, + "learning_rate": 1.544426494345719e-05, + "loss": 0.2075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06383742392063141, + "step": 240, + "valid_targets_mean": 2482.9, + "valid_targets_min": 803 + }, + { + "epoch": 0.277306168647425, + "grad_norm": 0.6380660382935808, + "learning_rate": 1.5767366720516963e-05, + "loss": 0.1641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08546900004148483, + "step": 245, + "valid_targets_mean": 3239.2, + "valid_targets_min": 2525 + }, + { + "epoch": 0.2829654782116582, + "grad_norm": 0.7993055032467087, + "learning_rate": 1.609046849757674e-05, + "loss": 0.1952, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07405424118041992, + "step": 250, + "valid_targets_mean": 3806.1, + "valid_targets_min": 2393 + }, + { + "epoch": 0.28862478777589134, + "grad_norm": 0.5672369044442851, + "learning_rate": 1.641357027463651e-05, + "loss": 0.1751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07551759481430054, + "step": 255, + "valid_targets_mean": 2990.9, + "valid_targets_min": 804 + }, + { + "epoch": 0.2942840973401245, + "grad_norm": 0.5175339323333864, + "learning_rate": 1.6736672051696286e-05, + "loss": 0.1533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07782085239887238, + "step": 260, + "valid_targets_mean": 4885.8, + "valid_targets_min": 3695 + }, + { + "epoch": 0.29994340690435767, + "grad_norm": 0.6525626849943169, + "learning_rate": 1.7059773828756058e-05, + "loss": 0.1436, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08853702247142792, + "step": 265, + "valid_targets_mean": 3128.8, + "valid_targets_min": 596 + }, + { + "epoch": 0.30560271646859083, + "grad_norm": 0.41556102017718966, + "learning_rate": 1.7382875605815834e-05, + "loss": 0.1371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05114654079079628, + "step": 270, + "valid_targets_mean": 4567.8, + "valid_targets_min": 3804 + }, + { + "epoch": 0.311262026032824, + "grad_norm": 0.5266957741817604, + "learning_rate": 1.770597738287561e-05, + "loss": 0.1487, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.057920850813388824, + "step": 275, + "valid_targets_mean": 3853.1, + "valid_targets_min": 2862 + }, + { + "epoch": 0.31692133559705715, + "grad_norm": 0.8261713489653706, + "learning_rate": 1.802907915993538e-05, + "loss": 0.1804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10130302608013153, + "step": 280, + "valid_targets_mean": 1072.6, + "valid_targets_min": 497 + }, + { + "epoch": 0.3225806451612903, + "grad_norm": 0.515135125153566, + "learning_rate": 1.8352180936995153e-05, + "loss": 0.1519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08355299383401871, + "step": 285, + "valid_targets_mean": 4191.0, + "valid_targets_min": 3918 + }, + { + "epoch": 0.3282399547255235, + "grad_norm": 0.9516259042811721, + "learning_rate": 1.867528271405493e-05, + "loss": 0.1712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07686220109462738, + "step": 290, + "valid_targets_mean": 2121.9, + "valid_targets_min": 785 + }, + { + "epoch": 0.33389926428975664, + "grad_norm": 0.5129659467647532, + "learning_rate": 1.8998384491114704e-05, + "loss": 0.1253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06434328854084015, + "step": 295, + "valid_targets_mean": 3451.6, + "valid_targets_min": 905 + }, + { + "epoch": 0.3395585738539898, + "grad_norm": 0.5908315094002874, + "learning_rate": 1.9321486268174476e-05, + "loss": 0.1619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11656630784273148, + "step": 300, + "valid_targets_mean": 2929.2, + "valid_targets_min": 657 + }, + { + "epoch": 0.34521788341822296, + "grad_norm": 0.48165238988141923, + "learning_rate": 1.9644588045234248e-05, + "loss": 0.153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0766848623752594, + "step": 305, + "valid_targets_mean": 3798.9, + "valid_targets_min": 3184 + }, + { + "epoch": 0.3508771929824561, + "grad_norm": 0.7047255808494555, + "learning_rate": 1.9967689822294024e-05, + "loss": 0.1347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07988546043634415, + "step": 310, + "valid_targets_mean": 1463.9, + "valid_targets_min": 627 + }, + { + "epoch": 0.3565365025466893, + "grad_norm": 0.6410892495479411, + "learning_rate": 2.0290791599353796e-05, + "loss": 0.1561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07225817441940308, + "step": 315, + "valid_targets_mean": 1701.5, + "valid_targets_min": 827 + }, + { + "epoch": 0.36219581211092244, + "grad_norm": 0.6561577601314719, + "learning_rate": 2.0613893376413575e-05, + "loss": 0.1587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07047143578529358, + "step": 320, + "valid_targets_mean": 1789.9, + "valid_targets_min": 940 + }, + { + "epoch": 0.3678551216751556, + "grad_norm": 0.5386901508448514, + "learning_rate": 2.0936995153473347e-05, + "loss": 0.1777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06335115432739258, + "step": 325, + "valid_targets_mean": 2943.8, + "valid_targets_min": 958 + }, + { + "epoch": 0.3735144312393888, + "grad_norm": 0.5744258368534766, + "learning_rate": 2.1260096930533122e-05, + "loss": 0.1484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.059802595525979996, + "step": 330, + "valid_targets_mean": 1859.2, + "valid_targets_min": 719 + }, + { + "epoch": 0.379173740803622, + "grad_norm": 0.6154652522754883, + "learning_rate": 2.1583198707592894e-05, + "loss": 0.1793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.052433621138334274, + "step": 335, + "valid_targets_mean": 1703.9, + "valid_targets_min": 759 + }, + { + "epoch": 0.38483305036785515, + "grad_norm": 1.051344151463182, + "learning_rate": 2.1906300484652666e-05, + "loss": 0.1768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17945857346057892, + "step": 340, + "valid_targets_mean": 1556.6, + "valid_targets_min": 521 + }, + { + "epoch": 0.3904923599320883, + "grad_norm": 0.5630507856667745, + "learning_rate": 2.2229402261712442e-05, + "loss": 0.1455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09105045348405838, + "step": 345, + "valid_targets_mean": 3267.6, + "valid_targets_min": 1032 + }, + { + "epoch": 0.39615166949632147, + "grad_norm": 0.4460141163792507, + "learning_rate": 2.2552504038772214e-05, + "loss": 0.1257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05309152603149414, + "step": 350, + "valid_targets_mean": 3609.0, + "valid_targets_min": 2741 + }, + { + "epoch": 0.40181097906055463, + "grad_norm": 0.5180312641700419, + "learning_rate": 2.2875605815831986e-05, + "loss": 0.1441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06146761775016785, + "step": 355, + "valid_targets_mean": 2735.8, + "valid_targets_min": 1692 + }, + { + "epoch": 0.4074702886247878, + "grad_norm": 0.5409993793906661, + "learning_rate": 2.3198707592891765e-05, + "loss": 0.1874, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08394213020801544, + "step": 360, + "valid_targets_mean": 3693.4, + "valid_targets_min": 2215 + }, + { + "epoch": 0.41312959818902095, + "grad_norm": 0.5147494120605212, + "learning_rate": 2.3521809369951537e-05, + "loss": 0.122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05552713945508003, + "step": 365, + "valid_targets_mean": 3109.9, + "valid_targets_min": 723 + }, + { + "epoch": 0.4187889077532541, + "grad_norm": 0.4317534882574773, + "learning_rate": 2.3844911147011312e-05, + "loss": 0.1438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.059747494757175446, + "step": 370, + "valid_targets_mean": 4984.2, + "valid_targets_min": 3282 + }, + { + "epoch": 0.4244482173174873, + "grad_norm": 0.45080990032392254, + "learning_rate": 2.4168012924071084e-05, + "loss": 0.1369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05908074602484703, + "step": 375, + "valid_targets_mean": 3863.4, + "valid_targets_min": 995 + }, + { + "epoch": 0.43010752688172044, + "grad_norm": 0.41875129087410196, + "learning_rate": 2.449111470113086e-05, + "loss": 0.1194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0577741339802742, + "step": 380, + "valid_targets_mean": 4201.0, + "valid_targets_min": 949 + }, + { + "epoch": 0.4357668364459536, + "grad_norm": 0.5377109399775152, + "learning_rate": 2.4814216478190632e-05, + "loss": 0.1518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09177965670824051, + "step": 385, + "valid_targets_mean": 4321.6, + "valid_targets_min": 2724 + }, + { + "epoch": 0.44142614601018676, + "grad_norm": 0.4501846938217892, + "learning_rate": 2.5137318255250404e-05, + "loss": 0.1276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04853462427854538, + "step": 390, + "valid_targets_mean": 3384.5, + "valid_targets_min": 424 + }, + { + "epoch": 0.4470854555744199, + "grad_norm": 0.5225479075414039, + "learning_rate": 2.546042003231018e-05, + "loss": 0.1722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.057531870901584625, + "step": 395, + "valid_targets_mean": 2987.2, + "valid_targets_min": 531 + }, + { + "epoch": 0.4527447651386531, + "grad_norm": 0.6093343138899836, + "learning_rate": 2.5783521809369955e-05, + "loss": 0.1616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05708811804652214, + "step": 400, + "valid_targets_mean": 3469.5, + "valid_targets_min": 1195 + }, + { + "epoch": 0.45840407470288624, + "grad_norm": 0.47514259971920086, + "learning_rate": 2.610662358642973e-05, + "loss": 0.1229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0996779128909111, + "step": 405, + "valid_targets_mean": 2682.8, + "valid_targets_min": 722 + }, + { + "epoch": 0.4640633842671194, + "grad_norm": 0.4415829951174395, + "learning_rate": 2.6429725363489502e-05, + "loss": 0.165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.061897553503513336, + "step": 410, + "valid_targets_mean": 3496.9, + "valid_targets_min": 1251 + }, + { + "epoch": 0.46972269383135257, + "grad_norm": 0.5725019400302965, + "learning_rate": 2.6752827140549274e-05, + "loss": 0.1356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07528968155384064, + "step": 415, + "valid_targets_mean": 2912.5, + "valid_targets_min": 970 + }, + { + "epoch": 0.47538200339558573, + "grad_norm": 0.8717286597062152, + "learning_rate": 2.707592891760905e-05, + "loss": 0.1592, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07313666492700577, + "step": 420, + "valid_targets_mean": 1423.6, + "valid_targets_min": 634 + }, + { + "epoch": 0.4810413129598189, + "grad_norm": 0.7946161029444309, + "learning_rate": 2.7399030694668822e-05, + "loss": 0.173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11132633686065674, + "step": 425, + "valid_targets_mean": 1423.9, + "valid_targets_min": 685 + }, + { + "epoch": 0.48670062252405205, + "grad_norm": 0.5451746393923975, + "learning_rate": 2.7722132471728597e-05, + "loss": 0.1855, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05576259642839432, + "step": 430, + "valid_targets_mean": 2932.5, + "valid_targets_min": 593 + }, + { + "epoch": 0.4923599320882852, + "grad_norm": 0.43001732817202887, + "learning_rate": 2.804523424878837e-05, + "loss": 0.1452, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.057551369071006775, + "step": 435, + "valid_targets_mean": 3945.8, + "valid_targets_min": 3129 + }, + { + "epoch": 0.4980192416525184, + "grad_norm": 0.5296946263488329, + "learning_rate": 2.8368336025848148e-05, + "loss": 0.1521, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08257763087749481, + "step": 440, + "valid_targets_mean": 3621.1, + "valid_targets_min": 859 + }, + { + "epoch": 0.5036785512167515, + "grad_norm": 0.6039488014877009, + "learning_rate": 2.869143780290792e-05, + "loss": 0.1377, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07461287826299667, + "step": 445, + "valid_targets_mean": 2553.0, + "valid_targets_min": 616 + }, + { + "epoch": 0.5093378607809848, + "grad_norm": 0.5640597101205123, + "learning_rate": 2.9014539579967692e-05, + "loss": 0.1367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06084398552775383, + "step": 450, + "valid_targets_mean": 2690.5, + "valid_targets_min": 541 + }, + { + "epoch": 0.5149971703452179, + "grad_norm": 1.0840649717270363, + "learning_rate": 2.9337641357027468e-05, + "loss": 0.1323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08512777090072632, + "step": 455, + "valid_targets_mean": 906.4, + "valid_targets_min": 598 + }, + { + "epoch": 0.5206564799094511, + "grad_norm": 0.48613293315128153, + "learning_rate": 2.966074313408724e-05, + "loss": 0.1603, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0669226422905922, + "step": 460, + "valid_targets_mean": 3147.2, + "valid_targets_min": 1101 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 0.5745050629760342, + "learning_rate": 2.9983844911147012e-05, + "loss": 0.1238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0562899075448513, + "step": 465, + "valid_targets_mean": 2331.8, + "valid_targets_min": 680 + }, + { + "epoch": 0.5319750990379174, + "grad_norm": 0.5104054659000805, + "learning_rate": 3.0306946688206787e-05, + "loss": 0.1331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07346203923225403, + "step": 470, + "valid_targets_mean": 2577.0, + "valid_targets_min": 774 + }, + { + "epoch": 0.5376344086021505, + "grad_norm": 0.49848622482429666, + "learning_rate": 3.063004846526656e-05, + "loss": 0.1525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07757288217544556, + "step": 475, + "valid_targets_mean": 4140.1, + "valid_targets_min": 1474 + }, + { + "epoch": 0.5432937181663837, + "grad_norm": 0.3015793802224776, + "learning_rate": 3.095315024232634e-05, + "loss": 0.1177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04140617698431015, + "step": 480, + "valid_targets_mean": 5498.9, + "valid_targets_min": 3677 + }, + { + "epoch": 0.5489530277306168, + "grad_norm": 0.4832074611140062, + "learning_rate": 3.127625201938611e-05, + "loss": 0.1319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.084229476749897, + "step": 485, + "valid_targets_mean": 3352.5, + "valid_targets_min": 675 + }, + { + "epoch": 0.55461233729485, + "grad_norm": 0.4470848264185768, + "learning_rate": 3.159935379644588e-05, + "loss": 0.1259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04923580586910248, + "step": 490, + "valid_targets_mean": 3902.5, + "valid_targets_min": 795 + }, + { + "epoch": 0.5602716468590832, + "grad_norm": 0.6112578592770107, + "learning_rate": 3.1922455573505654e-05, + "loss": 0.2602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11462072283029556, + "step": 495, + "valid_targets_mean": 2495.0, + "valid_targets_min": 605 + }, + { + "epoch": 0.5659309564233164, + "grad_norm": 0.5125681408522123, + "learning_rate": 3.224555735056543e-05, + "loss": 0.1241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0655694305896759, + "step": 500, + "valid_targets_mean": 3298.6, + "valid_targets_min": 985 + }, + { + "epoch": 0.5715902659875495, + "grad_norm": 0.5812333572331605, + "learning_rate": 3.2568659127625205e-05, + "loss": 0.1428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07321052998304367, + "step": 505, + "valid_targets_mean": 3505.2, + "valid_targets_min": 878 + }, + { + "epoch": 0.5772495755517827, + "grad_norm": 0.7865928436925346, + "learning_rate": 3.289176090468498e-05, + "loss": 0.1666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16015928983688354, + "step": 510, + "valid_targets_mean": 2289.5, + "valid_targets_min": 1214 + }, + { + "epoch": 0.5829088851160158, + "grad_norm": 0.5824181661697244, + "learning_rate": 3.321486268174475e-05, + "loss": 0.2887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1408720165491104, + "step": 515, + "valid_targets_mean": 5734.0, + "valid_targets_min": 4389 + }, + { + "epoch": 0.588568194680249, + "grad_norm": 0.4567419002970561, + "learning_rate": 3.353796445880453e-05, + "loss": 0.2791, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13881754875183105, + "step": 520, + "valid_targets_mean": 7783.9, + "valid_targets_min": 4541 + }, + { + "epoch": 0.5942275042444821, + "grad_norm": 0.5465419415444335, + "learning_rate": 3.38610662358643e-05, + "loss": 0.2773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16764724254608154, + "step": 525, + "valid_targets_mean": 6178.1, + "valid_targets_min": 4245 + }, + { + "epoch": 0.5998868138087153, + "grad_norm": 0.48682040378983465, + "learning_rate": 3.418416801292407e-05, + "loss": 0.27, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13740578293800354, + "step": 530, + "valid_targets_mean": 6757.1, + "valid_targets_min": 4108 + }, + { + "epoch": 0.6055461233729486, + "grad_norm": 0.5366939974052881, + "learning_rate": 3.450726978998385e-05, + "loss": 0.2753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15044130384922028, + "step": 535, + "valid_targets_mean": 6910.1, + "valid_targets_min": 4689 + }, + { + "epoch": 0.6112054329371817, + "grad_norm": 0.5279487744274057, + "learning_rate": 3.483037156704362e-05, + "loss": 0.2692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13835439085960388, + "step": 540, + "valid_targets_mean": 5941.6, + "valid_targets_min": 4398 + }, + { + "epoch": 0.6168647425014149, + "grad_norm": 0.44020640113224385, + "learning_rate": 3.5153473344103395e-05, + "loss": 0.264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12045064568519592, + "step": 545, + "valid_targets_mean": 6833.0, + "valid_targets_min": 4826 + }, + { + "epoch": 0.622524052065648, + "grad_norm": 0.5251322912691838, + "learning_rate": 3.547657512116317e-05, + "loss": 0.2607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13780143857002258, + "step": 550, + "valid_targets_mean": 6255.4, + "valid_targets_min": 4525 + }, + { + "epoch": 0.6281833616298812, + "grad_norm": 0.39348436596192704, + "learning_rate": 3.579967689822294e-05, + "loss": 0.2651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11609511077404022, + "step": 555, + "valid_targets_mean": 7195.8, + "valid_targets_min": 4970 + }, + { + "epoch": 0.6338426711941143, + "grad_norm": 0.5273638269180607, + "learning_rate": 3.612277867528272e-05, + "loss": 0.2451, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10894660651683807, + "step": 560, + "valid_targets_mean": 6242.6, + "valid_targets_min": 4552 + }, + { + "epoch": 0.6395019807583475, + "grad_norm": 0.9122789048666337, + "learning_rate": 3.644588045234249e-05, + "loss": 0.2585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12121409177780151, + "step": 565, + "valid_targets_mean": 6830.8, + "valid_targets_min": 3009 + }, + { + "epoch": 0.6451612903225806, + "grad_norm": 0.5131792448186862, + "learning_rate": 3.676898222940227e-05, + "loss": 0.2431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1229555606842041, + "step": 570, + "valid_targets_mean": 7165.4, + "valid_targets_min": 5304 + }, + { + "epoch": 0.6508205998868138, + "grad_norm": 0.46605359625276815, + "learning_rate": 3.709208400646204e-05, + "loss": 0.246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10549833625555038, + "step": 575, + "valid_targets_mean": 5539.0, + "valid_targets_min": 3815 + }, + { + "epoch": 0.656479909451047, + "grad_norm": 0.45106077332688427, + "learning_rate": 3.741518578352181e-05, + "loss": 0.2267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11322154104709625, + "step": 580, + "valid_targets_mean": 6168.2, + "valid_targets_min": 5417 + }, + { + "epoch": 0.6621392190152802, + "grad_norm": 0.4255427199157256, + "learning_rate": 3.7738287560581585e-05, + "loss": 0.2289, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12274488806724548, + "step": 585, + "valid_targets_mean": 8675.8, + "valid_targets_min": 4939 + }, + { + "epoch": 0.6677985285795133, + "grad_norm": 0.4484378096406636, + "learning_rate": 3.806138933764136e-05, + "loss": 0.2467, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11915712058544159, + "step": 590, + "valid_targets_mean": 6676.6, + "valid_targets_min": 5361 + }, + { + "epoch": 0.6734578381437465, + "grad_norm": 0.5019318747313286, + "learning_rate": 3.838449111470113e-05, + "loss": 0.2416, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08802414685487747, + "step": 595, + "valid_targets_mean": 6001.9, + "valid_targets_min": 4384 + }, + { + "epoch": 0.6791171477079796, + "grad_norm": 0.4418873429670228, + "learning_rate": 3.870759289176091e-05, + "loss": 0.2221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10844217240810394, + "step": 600, + "valid_targets_mean": 6403.6, + "valid_targets_min": 4823 + }, + { + "epoch": 0.6847764572722128, + "grad_norm": 0.4440434674725396, + "learning_rate": 3.903069466882068e-05, + "loss": 0.2184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10608142614364624, + "step": 605, + "valid_targets_mean": 6612.9, + "valid_targets_min": 5243 + }, + { + "epoch": 0.6904357668364459, + "grad_norm": 0.4503422259878201, + "learning_rate": 3.935379644588046e-05, + "loss": 0.2311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11856935918331146, + "step": 610, + "valid_targets_mean": 6701.5, + "valid_targets_min": 4868 + }, + { + "epoch": 0.6960950764006791, + "grad_norm": 0.4923644673669522, + "learning_rate": 3.967689822294023e-05, + "loss": 0.2552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14276891946792603, + "step": 615, + "valid_targets_mean": 6796.9, + "valid_targets_min": 4988 + }, + { + "epoch": 0.7017543859649122, + "grad_norm": 0.5460690074752241, + "learning_rate": 4e-05, + "loss": 0.2319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12587660551071167, + "step": 620, + "valid_targets_mean": 5708.9, + "valid_targets_min": 4499 + }, + { + "epoch": 0.7074136955291455, + "grad_norm": 0.5015534018290505, + "learning_rate": 3.999992044178504e-05, + "loss": 0.2354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15661609172821045, + "step": 625, + "valid_targets_mean": 6788.8, + "valid_targets_min": 5433 + }, + { + "epoch": 0.7130730050933786, + "grad_norm": 0.45796575600474154, + "learning_rate": 3.9999681767773104e-05, + "loss": 0.2435, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12810951471328735, + "step": 630, + "valid_targets_mean": 6085.2, + "valid_targets_min": 4619 + }, + { + "epoch": 0.7187323146576118, + "grad_norm": 0.4528405633708562, + "learning_rate": 3.999928397986304e-05, + "loss": 0.2647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1050669401884079, + "step": 635, + "valid_targets_mean": 5615.4, + "valid_targets_min": 4596 + }, + { + "epoch": 0.7243916242218449, + "grad_norm": 0.4898932826575515, + "learning_rate": 3.9998727081219585e-05, + "loss": 0.2665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11108730733394623, + "step": 640, + "valid_targets_mean": 6138.2, + "valid_targets_min": 4581 + }, + { + "epoch": 0.7300509337860781, + "grad_norm": 0.4861221135887663, + "learning_rate": 3.999801107627332e-05, + "loss": 0.2354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11672437191009521, + "step": 645, + "valid_targets_mean": 6245.5, + "valid_targets_min": 5647 + }, + { + "epoch": 0.7357102433503112, + "grad_norm": 0.48066951658004287, + "learning_rate": 3.9997135970720655e-05, + "loss": 0.2382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10546030104160309, + "step": 650, + "valid_targets_mean": 6804.4, + "valid_targets_min": 4398 + }, + { + "epoch": 0.7413695529145444, + "grad_norm": 0.5251677921215162, + "learning_rate": 3.9996101771523766e-05, + "loss": 0.2386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1344216763973236, + "step": 655, + "valid_targets_mean": 6334.9, + "valid_targets_min": 4418 + }, + { + "epoch": 0.7470288624787776, + "grad_norm": 0.708638598353069, + "learning_rate": 3.999490848691057e-05, + "loss": 0.2436, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.124361053109169, + "step": 660, + "valid_targets_mean": 6267.6, + "valid_targets_min": 4969 + }, + { + "epoch": 0.7526881720430108, + "grad_norm": 0.4277091013318971, + "learning_rate": 3.999355612637461e-05, + "loss": 0.2276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10481563210487366, + "step": 665, + "valid_targets_mean": 7183.8, + "valid_targets_min": 5042 + }, + { + "epoch": 0.758347481607244, + "grad_norm": 0.5009841519742476, + "learning_rate": 3.999204470067504e-05, + "loss": 0.2464, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12315921485424042, + "step": 670, + "valid_targets_mean": 7177.6, + "valid_targets_min": 4625 + }, + { + "epoch": 0.7640067911714771, + "grad_norm": 0.6278369807429757, + "learning_rate": 3.9990374221836484e-05, + "loss": 0.2386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12054689228534698, + "step": 675, + "valid_targets_mean": 6203.6, + "valid_targets_min": 4820 + }, + { + "epoch": 0.7696661007357103, + "grad_norm": 0.4569399192619534, + "learning_rate": 3.998854470314898e-05, + "loss": 0.222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11524000763893127, + "step": 680, + "valid_targets_mean": 5743.5, + "valid_targets_min": 4337 + }, + { + "epoch": 0.7753254102999434, + "grad_norm": 0.4303714868511609, + "learning_rate": 3.9986556159167846e-05, + "loss": 0.206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10841389000415802, + "step": 685, + "valid_targets_mean": 6947.5, + "valid_targets_min": 3281 + }, + { + "epoch": 0.7809847198641766, + "grad_norm": 0.5102847186556951, + "learning_rate": 3.998440860571358e-05, + "loss": 0.2285, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11190865933895111, + "step": 690, + "valid_targets_mean": 6575.4, + "valid_targets_min": 5298 + }, + { + "epoch": 0.7866440294284097, + "grad_norm": 0.5087553508622416, + "learning_rate": 3.998210205987175e-05, + "loss": 0.241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1291322261095047, + "step": 695, + "valid_targets_mean": 6347.8, + "valid_targets_min": 4543 + }, + { + "epoch": 0.7923033389926429, + "grad_norm": 0.47365975309041325, + "learning_rate": 3.9979636539992805e-05, + "loss": 0.2391, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11513371765613556, + "step": 700, + "valid_targets_mean": 6430.0, + "valid_targets_min": 4713 + }, + { + "epoch": 0.797962648556876, + "grad_norm": 0.4388599291341055, + "learning_rate": 3.9977012065692e-05, + "loss": 0.22, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09919897466897964, + "step": 705, + "valid_targets_mean": 5641.6, + "valid_targets_min": 5269 + }, + { + "epoch": 0.8036219581211093, + "grad_norm": 0.4463919108161072, + "learning_rate": 3.997422865784916e-05, + "loss": 0.2172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11490724980831146, + "step": 710, + "valid_targets_mean": 6397.2, + "valid_targets_min": 3450 + }, + { + "epoch": 0.8092812676853424, + "grad_norm": 0.4828357038644893, + "learning_rate": 3.99712863386086e-05, + "loss": 0.2291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11880475282669067, + "step": 715, + "valid_targets_mean": 7338.9, + "valid_targets_min": 4959 + }, + { + "epoch": 0.8149405772495756, + "grad_norm": 0.44858257832899123, + "learning_rate": 3.9968185131378876e-05, + "loss": 0.2316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12451828271150589, + "step": 720, + "valid_targets_mean": 6312.1, + "valid_targets_min": 5322 + }, + { + "epoch": 0.8205998868138087, + "grad_norm": 0.45496947721958614, + "learning_rate": 3.996492506083264e-05, + "loss": 0.2193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11696851253509521, + "step": 725, + "valid_targets_mean": 6234.5, + "valid_targets_min": 4716 + }, + { + "epoch": 0.8262591963780419, + "grad_norm": 0.4863152737300233, + "learning_rate": 3.9961506152906445e-05, + "loss": 0.2247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1271299421787262, + "step": 730, + "valid_targets_mean": 5793.6, + "valid_targets_min": 4309 + }, + { + "epoch": 0.831918505942275, + "grad_norm": 0.4828982888511043, + "learning_rate": 3.995792843480051e-05, + "loss": 0.2336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11888715624809265, + "step": 735, + "valid_targets_mean": 6092.1, + "valid_targets_min": 4923 + }, + { + "epoch": 0.8375778155065082, + "grad_norm": 0.4435116726374469, + "learning_rate": 3.9954191934978494e-05, + "loss": 0.1984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11484060436487198, + "step": 740, + "valid_targets_mean": 6810.4, + "valid_targets_min": 4112 + }, + { + "epoch": 0.8432371250707413, + "grad_norm": 1.3926773180242418, + "learning_rate": 3.995029668316735e-05, + "loss": 0.1942, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11206021904945374, + "step": 745, + "valid_targets_mean": 5986.9, + "valid_targets_min": 3761 + }, + { + "epoch": 0.8488964346349746, + "grad_norm": 0.4407981530154995, + "learning_rate": 3.9946242710356994e-05, + "loss": 0.2087, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10129470378160477, + "step": 750, + "valid_targets_mean": 6484.5, + "valid_targets_min": 5426 + }, + { + "epoch": 0.8545557441992077, + "grad_norm": 0.495875967462604, + "learning_rate": 3.994203004880012e-05, + "loss": 0.2153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10897064954042435, + "step": 755, + "valid_targets_mean": 5448.1, + "valid_targets_min": 3853 + }, + { + "epoch": 0.8602150537634409, + "grad_norm": 0.46378094101859335, + "learning_rate": 3.9937658732011905e-05, + "loss": 0.2431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12958288192749023, + "step": 760, + "valid_targets_mean": 6877.9, + "valid_targets_min": 5073 + }, + { + "epoch": 0.865874363327674, + "grad_norm": 0.4684871524871734, + "learning_rate": 3.993312879476976e-05, + "loss": 0.2644, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12010237574577332, + "step": 765, + "valid_targets_mean": 6082.2, + "valid_targets_min": 4487 + }, + { + "epoch": 0.8715336728919072, + "grad_norm": 0.44626241622158563, + "learning_rate": 3.992844027311307e-05, + "loss": 0.2145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09302426874637604, + "step": 770, + "valid_targets_mean": 5412.6, + "valid_targets_min": 4773 + }, + { + "epoch": 0.8771929824561403, + "grad_norm": 0.478268536045561, + "learning_rate": 3.992359320434287e-05, + "loss": 0.2237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12707342207431793, + "step": 775, + "valid_targets_mean": 6079.9, + "valid_targets_min": 4547 + }, + { + "epoch": 0.8828522920203735, + "grad_norm": 0.4334489650424965, + "learning_rate": 3.9918587627021566e-05, + "loss": 0.219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11817194521427155, + "step": 780, + "valid_targets_mean": 6327.1, + "valid_targets_min": 4826 + }, + { + "epoch": 0.8885116015846066, + "grad_norm": 0.44064245112430617, + "learning_rate": 3.991342358097265e-05, + "loss": 0.2463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11938846111297607, + "step": 785, + "valid_targets_mean": 7172.9, + "valid_targets_min": 4949 + }, + { + "epoch": 0.8941709111488398, + "grad_norm": 0.4413441030419955, + "learning_rate": 3.990810110728034e-05, + "loss": 0.2245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10728093981742859, + "step": 790, + "valid_targets_mean": 7059.6, + "valid_targets_min": 4489 + }, + { + "epoch": 0.8998302207130731, + "grad_norm": 0.4583051851681349, + "learning_rate": 3.99026202482893e-05, + "loss": 0.2261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10446344316005707, + "step": 795, + "valid_targets_mean": 6002.6, + "valid_targets_min": 4865 + }, + { + "epoch": 0.9054895302773062, + "grad_norm": 0.4528837327688818, + "learning_rate": 3.989698104760425e-05, + "loss": 0.2277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10144135355949402, + "step": 800, + "valid_targets_mean": 5837.2, + "valid_targets_min": 4452 + }, + { + "epoch": 0.9111488398415394, + "grad_norm": 0.5092026636177043, + "learning_rate": 3.989118355008968e-05, + "loss": 0.2005, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09619605541229248, + "step": 805, + "valid_targets_mean": 5746.0, + "valid_targets_min": 4915 + }, + { + "epoch": 0.9168081494057725, + "grad_norm": 0.45555758125273477, + "learning_rate": 3.988522780186943e-05, + "loss": 0.2173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08734960854053497, + "step": 810, + "valid_targets_mean": 5819.8, + "valid_targets_min": 3727 + }, + { + "epoch": 0.9224674589700057, + "grad_norm": 0.4284308999114465, + "learning_rate": 3.987911385032638e-05, + "loss": 0.2627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09507009387016296, + "step": 815, + "valid_targets_mean": 5818.0, + "valid_targets_min": 4482 + }, + { + "epoch": 0.9281267685342388, + "grad_norm": 0.42798736982714913, + "learning_rate": 3.987284174410203e-05, + "loss": 0.2087, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10232369601726532, + "step": 820, + "valid_targets_mean": 6368.6, + "valid_targets_min": 4598 + }, + { + "epoch": 0.933786078098472, + "grad_norm": 0.48771351810473773, + "learning_rate": 3.986641153309615e-05, + "loss": 0.225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10167411714792252, + "step": 825, + "valid_targets_mean": 5623.0, + "valid_targets_min": 4395 + }, + { + "epoch": 0.9394453876627051, + "grad_norm": 0.5266764822562989, + "learning_rate": 3.985982326846634e-05, + "loss": 0.2292, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11017732322216034, + "step": 830, + "valid_targets_mean": 5788.0, + "valid_targets_min": 4135 + }, + { + "epoch": 0.9451046972269384, + "grad_norm": 0.4531619635923777, + "learning_rate": 3.985307700262765e-05, + "loss": 0.2111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0969119668006897, + "step": 835, + "valid_targets_mean": 6081.9, + "valid_targets_min": 4262 + }, + { + "epoch": 0.9507640067911715, + "grad_norm": 0.46464108131609194, + "learning_rate": 3.984617278925218e-05, + "loss": 0.2012, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12038950622081757, + "step": 840, + "valid_targets_mean": 7015.5, + "valid_targets_min": 5149 + }, + { + "epoch": 0.9564233163554047, + "grad_norm": 0.514720376818517, + "learning_rate": 3.9839110683268624e-05, + "loss": 0.2062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09869180619716644, + "step": 845, + "valid_targets_mean": 5913.9, + "valid_targets_min": 3437 + }, + { + "epoch": 0.9620826259196378, + "grad_norm": 0.4471156064631796, + "learning_rate": 3.9831890740861826e-05, + "loss": 0.2121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10459499061107635, + "step": 850, + "valid_targets_mean": 6859.5, + "valid_targets_min": 3535 + }, + { + "epoch": 0.967741935483871, + "grad_norm": 0.46477473333283703, + "learning_rate": 3.982451301947236e-05, + "loss": 0.2082, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11860593408346176, + "step": 855, + "valid_targets_mean": 6618.5, + "valid_targets_min": 4712 + }, + { + "epoch": 0.9734012450481041, + "grad_norm": 0.40099102800583, + "learning_rate": 3.981697757779606e-05, + "loss": 0.2086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11344081908464432, + "step": 860, + "valid_targets_mean": 7741.1, + "valid_targets_min": 4987 + }, + { + "epoch": 0.9790605546123373, + "grad_norm": 0.48502045211369704, + "learning_rate": 3.980928447578356e-05, + "loss": 0.2199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11051562428474426, + "step": 865, + "valid_targets_mean": 6720.6, + "valid_targets_min": 3577 + }, + { + "epoch": 0.9847198641765704, + "grad_norm": 0.49591736111529194, + "learning_rate": 3.98014337746398e-05, + "loss": 0.1977, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1100778579711914, + "step": 870, + "valid_targets_mean": 5528.4, + "valid_targets_min": 3665 + }, + { + "epoch": 0.9903791737408036, + "grad_norm": 0.4699012152819031, + "learning_rate": 3.9793425536823555e-05, + "loss": 0.2249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10612514615058899, + "step": 875, + "valid_targets_mean": 7081.1, + "valid_targets_min": 5409 + }, + { + "epoch": 0.9960384833050367, + "grad_norm": 0.46536107231874635, + "learning_rate": 3.978525982604695e-05, + "loss": 0.2284, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11614855378866196, + "step": 880, + "valid_targets_mean": 5952.5, + "valid_targets_min": 3545 + }, + { + "epoch": 1.0011318619128466, + "grad_norm": 0.8865438033939964, + "learning_rate": 3.977693670727491e-05, + "loss": 0.225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09773808717727661, + "step": 885, + "valid_targets_mean": 1525.2, + "valid_targets_min": 534 + }, + { + "epoch": 1.0067911714770799, + "grad_norm": 0.5063370058343002, + "learning_rate": 3.9768456246724675e-05, + "loss": 0.1667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05781948193907738, + "step": 890, + "valid_targets_mean": 2735.4, + "valid_targets_min": 834 + }, + { + "epoch": 1.0124504810413129, + "grad_norm": 0.4751169750031123, + "learning_rate": 3.97598185118653e-05, + "loss": 0.1423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06671807169914246, + "step": 895, + "valid_targets_mean": 3253.9, + "valid_targets_min": 2790 + }, + { + "epoch": 1.018109790605546, + "grad_norm": 0.49455224268728337, + "learning_rate": 3.975102357141704e-05, + "loss": 0.1277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.062212906777858734, + "step": 900, + "valid_targets_mean": 3383.1, + "valid_targets_min": 2133 + }, + { + "epoch": 1.0237691001697793, + "grad_norm": 0.48491435433210656, + "learning_rate": 3.974207149535088e-05, + "loss": 0.1551, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07126481831073761, + "step": 905, + "valid_targets_mean": 2954.8, + "valid_targets_min": 2338 + }, + { + "epoch": 1.0294284097340125, + "grad_norm": 0.4814473235767768, + "learning_rate": 3.9732962354887936e-05, + "loss": 0.1235, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06773301959037781, + "step": 910, + "valid_targets_mean": 2680.4, + "valid_targets_min": 662 + }, + { + "epoch": 1.0350877192982457, + "grad_norm": 1.103079520003271, + "learning_rate": 3.972369622249891e-05, + "loss": 0.1614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11469177901744843, + "step": 915, + "valid_targets_mean": 1101.0, + "valid_targets_min": 559 + }, + { + "epoch": 1.0407470288624787, + "grad_norm": 0.42949449706213516, + "learning_rate": 3.9714273171903486e-05, + "loss": 0.1797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05430174618959427, + "step": 920, + "valid_targets_mean": 3490.6, + "valid_targets_min": 1091 + }, + { + "epoch": 1.046406338426712, + "grad_norm": 0.47845132220188324, + "learning_rate": 3.970469327806978e-05, + "loss": 0.138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06519544124603271, + "step": 925, + "valid_targets_mean": 3061.8, + "valid_targets_min": 947 + }, + { + "epoch": 1.0520656479909452, + "grad_norm": 0.6437310047721856, + "learning_rate": 3.969495661721372e-05, + "loss": 0.1729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1346655786037445, + "step": 930, + "valid_targets_mean": 2125.4, + "valid_targets_min": 859 + }, + { + "epoch": 1.0577249575551784, + "grad_norm": 0.31829812960731585, + "learning_rate": 3.9685063266798434e-05, + "loss": 0.1232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06885267049074173, + "step": 935, + "valid_targets_mean": 5837.8, + "valid_targets_min": 636 + }, + { + "epoch": 1.0633842671194114, + "grad_norm": 0.4465363199078046, + "learning_rate": 3.967501330553366e-05, + "loss": 0.1259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07051503658294678, + "step": 940, + "valid_targets_mean": 2185.0, + "valid_targets_min": 826 + }, + { + "epoch": 1.0690435766836446, + "grad_norm": 0.3705970828279186, + "learning_rate": 3.966480681337508e-05, + "loss": 0.1238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05657622963190079, + "step": 945, + "valid_targets_mean": 3446.5, + "valid_targets_min": 1096 + }, + { + "epoch": 1.0747028862478778, + "grad_norm": 0.5397895830818321, + "learning_rate": 3.965444387152375e-05, + "loss": 0.1509, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06327497959136963, + "step": 950, + "valid_targets_mean": 2343.1, + "valid_targets_min": 688 + }, + { + "epoch": 1.080362195812111, + "grad_norm": 0.40484299036114096, + "learning_rate": 3.9643924562425365e-05, + "loss": 0.1482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0464307963848114, + "step": 955, + "valid_targets_mean": 2938.5, + "valid_targets_min": 553 + }, + { + "epoch": 1.086021505376344, + "grad_norm": 0.527669961782359, + "learning_rate": 3.963324896976968e-05, + "loss": 0.1291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07065701484680176, + "step": 960, + "valid_targets_mean": 2988.9, + "valid_targets_min": 566 + }, + { + "epoch": 1.0916808149405772, + "grad_norm": 0.34631052663197365, + "learning_rate": 3.962241717848979e-05, + "loss": 0.1213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.049620211124420166, + "step": 965, + "valid_targets_mean": 3230.2, + "valid_targets_min": 1111 + }, + { + "epoch": 1.0973401245048104, + "grad_norm": 0.5785260713409839, + "learning_rate": 3.961142927476151e-05, + "loss": 0.1252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07470313459634781, + "step": 970, + "valid_targets_mean": 1232.1, + "valid_targets_min": 622 + }, + { + "epoch": 1.1029994340690437, + "grad_norm": 0.7279773364937, + "learning_rate": 3.960028534600264e-05, + "loss": 0.1452, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06258562207221985, + "step": 975, + "valid_targets_mean": 2132.5, + "valid_targets_min": 696 + }, + { + "epoch": 1.1086587436332767, + "grad_norm": 0.47232133102237744, + "learning_rate": 3.9588985480872275e-05, + "loss": 0.1395, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06839032471179962, + "step": 980, + "valid_targets_mean": 3225.2, + "valid_targets_min": 1922 + }, + { + "epoch": 1.1143180531975099, + "grad_norm": 0.4854644284648448, + "learning_rate": 3.9577529769270137e-05, + "loss": 0.1453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05005858838558197, + "step": 985, + "valid_targets_mean": 2008.5, + "valid_targets_min": 802 + }, + { + "epoch": 1.119977362761743, + "grad_norm": 0.559844287103089, + "learning_rate": 3.9565918302335816e-05, + "loss": 0.1517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09888643026351929, + "step": 990, + "valid_targets_mean": 1754.1, + "valid_targets_min": 957 + }, + { + "epoch": 1.1256366723259763, + "grad_norm": 0.42889645843313234, + "learning_rate": 3.955415117244807e-05, + "loss": 0.175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06962975114583969, + "step": 995, + "valid_targets_mean": 2498.5, + "valid_targets_min": 641 + }, + { + "epoch": 1.1312959818902093, + "grad_norm": 0.45602507669650366, + "learning_rate": 3.9542228473224086e-05, + "loss": 0.122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08043299615383148, + "step": 1000, + "valid_targets_mean": 2364.6, + "valid_targets_min": 924 + }, + { + "epoch": 1.1369552914544425, + "grad_norm": 0.5532316300725741, + "learning_rate": 3.953015029951874e-05, + "loss": 0.1777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12269890308380127, + "step": 1005, + "valid_targets_mean": 2385.5, + "valid_targets_min": 700 + }, + { + "epoch": 1.1426146010186757, + "grad_norm": 0.5581292862294611, + "learning_rate": 3.9517916747423804e-05, + "loss": 0.1499, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11780844628810883, + "step": 1010, + "valid_targets_mean": 3363.2, + "valid_targets_min": 2731 + }, + { + "epoch": 1.148273910582909, + "grad_norm": 0.574726721027817, + "learning_rate": 3.9505527914267255e-05, + "loss": 0.1333, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1179429143667221, + "step": 1015, + "valid_targets_mean": 2615.0, + "valid_targets_min": 893 + }, + { + "epoch": 1.1539332201471422, + "grad_norm": 0.701124705724105, + "learning_rate": 3.949298389861243e-05, + "loss": 0.3261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.325585275888443, + "step": 1020, + "valid_targets_mean": 2615.9, + "valid_targets_min": 922 + }, + { + "epoch": 1.1595925297113752, + "grad_norm": 0.6420209776362951, + "learning_rate": 3.948028480025728e-05, + "loss": 0.1742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05083722993731499, + "step": 1025, + "valid_targets_mean": 1357.4, + "valid_targets_min": 507 + }, + { + "epoch": 1.1652518392756084, + "grad_norm": 0.46908656533863097, + "learning_rate": 3.9467430720233555e-05, + "loss": 0.1477, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07212112843990326, + "step": 1030, + "valid_targets_mean": 3236.9, + "valid_targets_min": 615 + }, + { + "epoch": 1.1709111488398416, + "grad_norm": 0.5681321397261054, + "learning_rate": 3.945442176080604e-05, + "loss": 0.2414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09335005283355713, + "step": 1035, + "valid_targets_mean": 3023.1, + "valid_targets_min": 826 + }, + { + "epoch": 1.1765704584040746, + "grad_norm": 0.41618756001441537, + "learning_rate": 3.944125802547168e-05, + "loss": 0.1288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06382972002029419, + "step": 1040, + "valid_targets_mean": 2924.4, + "valid_targets_min": 851 + }, + { + "epoch": 1.1822297679683078, + "grad_norm": 0.39670580092470037, + "learning_rate": 3.942793961895881e-05, + "loss": 0.1247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08929502218961716, + "step": 1045, + "valid_targets_mean": 4528.4, + "valid_targets_min": 2665 + }, + { + "epoch": 1.187889077532541, + "grad_norm": 0.38695172358297053, + "learning_rate": 3.941446664722629e-05, + "loss": 0.1328, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04372069984674454, + "step": 1050, + "valid_targets_mean": 2820.6, + "valid_targets_min": 595 + }, + { + "epoch": 1.1935483870967742, + "grad_norm": 0.5172254191591813, + "learning_rate": 3.940083921746268e-05, + "loss": 0.1257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09082646667957306, + "step": 1055, + "valid_targets_mean": 2583.1, + "valid_targets_min": 825 + }, + { + "epoch": 1.1992076966610075, + "grad_norm": 0.4718431022285411, + "learning_rate": 3.938705743808538e-05, + "loss": 0.1215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.061327312141656876, + "step": 1060, + "valid_targets_mean": 3771.5, + "valid_targets_min": 672 + }, + { + "epoch": 1.2048670062252405, + "grad_norm": 0.39632696337146545, + "learning_rate": 3.9373121418739765e-05, + "loss": 0.116, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04865419119596481, + "step": 1065, + "valid_targets_mean": 3165.0, + "valid_targets_min": 1956 + }, + { + "epoch": 1.2105263157894737, + "grad_norm": 0.42106936959071706, + "learning_rate": 3.935903127029832e-05, + "loss": 0.1244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05617896094918251, + "step": 1070, + "valid_targets_mean": 3719.8, + "valid_targets_min": 2540 + }, + { + "epoch": 1.216185625353707, + "grad_norm": 0.4528101314505677, + "learning_rate": 3.934478710485975e-05, + "loss": 0.1124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07928674668073654, + "step": 1075, + "valid_targets_mean": 3447.5, + "valid_targets_min": 1240 + }, + { + "epoch": 1.22184493491794, + "grad_norm": 0.3120643737961244, + "learning_rate": 3.9330389035748086e-05, + "loss": 0.1685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03837960213422775, + "step": 1080, + "valid_targets_mean": 2975.2, + "valid_targets_min": 623 + }, + { + "epoch": 1.227504244482173, + "grad_norm": 0.3885296909293876, + "learning_rate": 3.9315837177511816e-05, + "loss": 0.1086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05274404585361481, + "step": 1085, + "valid_targets_mean": 2960.1, + "valid_targets_min": 583 + }, + { + "epoch": 1.2331635540464063, + "grad_norm": 0.356973749147182, + "learning_rate": 3.93011316459229e-05, + "loss": 0.1137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05195704102516174, + "step": 1090, + "valid_targets_mean": 3723.5, + "valid_targets_min": 3230 + }, + { + "epoch": 1.2388228636106395, + "grad_norm": 0.39169491503215326, + "learning_rate": 3.928627255797593e-05, + "loss": 0.1105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.047731343656778336, + "step": 1095, + "valid_targets_mean": 2520.9, + "valid_targets_min": 647 + }, + { + "epoch": 1.2444821731748728, + "grad_norm": 0.6653145002449634, + "learning_rate": 3.927126003188717e-05, + "loss": 0.1768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13609322905540466, + "step": 1100, + "valid_targets_mean": 1094.5, + "valid_targets_min": 195 + }, + { + "epoch": 1.2501414827391057, + "grad_norm": 0.38343539230965595, + "learning_rate": 3.925609418709358e-05, + "loss": 0.1195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07983401417732239, + "step": 1105, + "valid_targets_mean": 3635.9, + "valid_targets_min": 3050 + }, + { + "epoch": 1.255800792303339, + "grad_norm": 0.5909637066467904, + "learning_rate": 3.924077514425193e-05, + "loss": 0.1689, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10678569227457047, + "step": 1110, + "valid_targets_mean": 2382.9, + "valid_targets_min": 1087 + }, + { + "epoch": 1.2614601018675722, + "grad_norm": 0.396949567295422, + "learning_rate": 3.922530302523779e-05, + "loss": 0.1304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.053525328636169434, + "step": 1115, + "valid_targets_mean": 3025.5, + "valid_targets_min": 847 + }, + { + "epoch": 1.2671194114318054, + "grad_norm": 0.579659940889096, + "learning_rate": 3.920967795314456e-05, + "loss": 0.1628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08805930614471436, + "step": 1120, + "valid_targets_mean": 2263.1, + "valid_targets_min": 916 + }, + { + "epoch": 1.2727787209960386, + "grad_norm": 0.47685523392610285, + "learning_rate": 3.919390005228254e-05, + "loss": 0.1594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06404339522123337, + "step": 1125, + "valid_targets_mean": 2567.0, + "valid_targets_min": 1062 + }, + { + "epoch": 1.2784380305602716, + "grad_norm": 0.6989711984654079, + "learning_rate": 3.9177969448177884e-05, + "loss": 0.1587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09563852846622467, + "step": 1130, + "valid_targets_mean": 1443.4, + "valid_targets_min": 771 + }, + { + "epoch": 1.2840973401245048, + "grad_norm": 0.43053766505877294, + "learning_rate": 3.916188626757164e-05, + "loss": 0.126, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0617242157459259, + "step": 1135, + "valid_targets_mean": 2753.2, + "valid_targets_min": 707 + }, + { + "epoch": 1.289756649688738, + "grad_norm": 0.35448411029999566, + "learning_rate": 3.9145650638418724e-05, + "loss": 0.137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0613369345664978, + "step": 1140, + "valid_targets_mean": 3665.8, + "valid_targets_min": 2850 + }, + { + "epoch": 1.295415959252971, + "grad_norm": 0.3869846628456179, + "learning_rate": 3.91292626898869e-05, + "loss": 0.1259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04617086797952652, + "step": 1145, + "valid_targets_mean": 2238.2, + "valid_targets_min": 749 + }, + { + "epoch": 1.3010752688172043, + "grad_norm": 0.33613632328457826, + "learning_rate": 3.911272255235576e-05, + "loss": 0.1091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05651552975177765, + "step": 1150, + "valid_targets_mean": 4488.2, + "valid_targets_min": 1052 + }, + { + "epoch": 1.3067345783814375, + "grad_norm": 0.31285777012499805, + "learning_rate": 3.909603035741568e-05, + "loss": 0.1045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.056950271129608154, + "step": 1155, + "valid_targets_mean": 4258.2, + "valid_targets_min": 2540 + }, + { + "epoch": 1.3123938879456707, + "grad_norm": 0.9708093653119585, + "learning_rate": 3.90791862378668e-05, + "loss": 0.1192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04511269927024841, + "step": 1160, + "valid_targets_mean": 3782.5, + "valid_targets_min": 1859 + }, + { + "epoch": 1.318053197509904, + "grad_norm": 0.36781299612359436, + "learning_rate": 3.906219032771791e-05, + "loss": 0.1424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05373764410614967, + "step": 1165, + "valid_targets_mean": 2879.5, + "valid_targets_min": 893 + }, + { + "epoch": 1.323712507074137, + "grad_norm": 0.4460616004456905, + "learning_rate": 3.904504276218545e-05, + "loss": 0.131, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05803672969341278, + "step": 1170, + "valid_targets_mean": 2072.6, + "valid_targets_min": 753 + }, + { + "epoch": 1.3293718166383701, + "grad_norm": 0.3158007682003136, + "learning_rate": 3.902774367769238e-05, + "loss": 0.1236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.038449548184871674, + "step": 1175, + "valid_targets_mean": 4205.6, + "valid_targets_min": 3934 + }, + { + "epoch": 1.3350311262026033, + "grad_norm": 0.4783392925529815, + "learning_rate": 3.901029321186715e-05, + "loss": 0.1115, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05803724378347397, + "step": 1180, + "valid_targets_mean": 3174.9, + "valid_targets_min": 1576 + }, + { + "epoch": 1.3406904357668363, + "grad_norm": 0.43233209247081006, + "learning_rate": 3.8992691503542526e-05, + "loss": 0.1314, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07074721157550812, + "step": 1185, + "valid_targets_mean": 3241.8, + "valid_targets_min": 1006 + }, + { + "epoch": 1.3463497453310695, + "grad_norm": 0.2999666063440558, + "learning_rate": 3.8974938692754565e-05, + "loss": 0.112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.038348324596881866, + "step": 1190, + "valid_targets_mean": 3445.5, + "valid_targets_min": 980 + }, + { + "epoch": 1.3520090548953028, + "grad_norm": 0.5702681832091586, + "learning_rate": 3.895703492074147e-05, + "loss": 0.1237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08744599670171738, + "step": 1195, + "valid_targets_mean": 2083.9, + "valid_targets_min": 745 + }, + { + "epoch": 1.357668364459536, + "grad_norm": 0.5319499867993535, + "learning_rate": 3.893898032994244e-05, + "loss": 0.1251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08307814598083496, + "step": 1200, + "valid_targets_mean": 2768.8, + "valid_targets_min": 1020 + }, + { + "epoch": 1.3633276740237692, + "grad_norm": 0.45044487288489926, + "learning_rate": 3.892077506399659e-05, + "loss": 0.1241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08427537232637405, + "step": 1205, + "valid_targets_mean": 2314.8, + "valid_targets_min": 967 + }, + { + "epoch": 1.3689869835880022, + "grad_norm": 0.43653773157031, + "learning_rate": 3.890241926774176e-05, + "loss": 0.1459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05483022704720497, + "step": 1210, + "valid_targets_mean": 2462.0, + "valid_targets_min": 1015 + }, + { + "epoch": 1.3746462931522354, + "grad_norm": 0.6209470219739801, + "learning_rate": 3.888391308721339e-05, + "loss": 0.1237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06672392040491104, + "step": 1215, + "valid_targets_mean": 1214.9, + "valid_targets_min": 705 + }, + { + "epoch": 1.3803056027164686, + "grad_norm": 0.36268128265592686, + "learning_rate": 3.8865256669643345e-05, + "loss": 0.1375, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04450538009405136, + "step": 1220, + "valid_targets_mean": 2364.8, + "valid_targets_min": 622 + }, + { + "epoch": 1.3859649122807016, + "grad_norm": 0.519302521395283, + "learning_rate": 3.884645016345876e-05, + "loss": 0.1504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04675455018877983, + "step": 1225, + "valid_targets_mean": 3702.2, + "valid_targets_min": 1322 + }, + { + "epoch": 1.3916242218449348, + "grad_norm": 0.38648865826420076, + "learning_rate": 3.882749371828084e-05, + "loss": 0.121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.057519398629665375, + "step": 1230, + "valid_targets_mean": 2863.1, + "valid_targets_min": 1011 + }, + { + "epoch": 1.397283531409168, + "grad_norm": 0.460184569981678, + "learning_rate": 3.880838748492367e-05, + "loss": 0.1078, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.071271613240242, + "step": 1235, + "valid_targets_mean": 3106.1, + "valid_targets_min": 874 + }, + { + "epoch": 1.4029428409734013, + "grad_norm": 0.5766508644135783, + "learning_rate": 3.878913161539304e-05, + "loss": 0.1262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08002141118049622, + "step": 1240, + "valid_targets_mean": 3161.4, + "valid_targets_min": 1420 + }, + { + "epoch": 1.4086021505376345, + "grad_norm": 0.3860671704421311, + "learning_rate": 3.876972626288521e-05, + "loss": 0.1423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04819522798061371, + "step": 1245, + "valid_targets_mean": 2796.0, + "valid_targets_min": 1109 + }, + { + "epoch": 1.4142614601018675, + "grad_norm": 0.35053583101621766, + "learning_rate": 3.87501715817857e-05, + "loss": 0.0992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04520991072058678, + "step": 1250, + "valid_targets_mean": 1822.9, + "valid_targets_min": 684 + }, + { + "epoch": 1.4199207696661007, + "grad_norm": 0.42084556115909966, + "learning_rate": 3.873046772766806e-05, + "loss": 0.1231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06229548901319504, + "step": 1255, + "valid_targets_mean": 2389.8, + "valid_targets_min": 671 + }, + { + "epoch": 1.425580079230334, + "grad_norm": 0.33967966643494113, + "learning_rate": 3.871061485729264e-05, + "loss": 0.1108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.051834587007761, + "step": 1260, + "valid_targets_mean": 4513.8, + "valid_targets_min": 1057 + }, + { + "epoch": 1.4312393887945671, + "grad_norm": 0.35529195183530865, + "learning_rate": 3.8690613128605325e-05, + "loss": 0.0971, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04473290964961052, + "step": 1265, + "valid_targets_mean": 3533.1, + "valid_targets_min": 1243 + }, + { + "epoch": 1.4368986983588004, + "grad_norm": 0.3675645578200872, + "learning_rate": 3.867046270073631e-05, + "loss": 0.126, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05495892092585564, + "step": 1270, + "valid_targets_mean": 3911.2, + "valid_targets_min": 3322 + }, + { + "epoch": 1.4425580079230333, + "grad_norm": 0.5466385885827223, + "learning_rate": 3.8650163733998796e-05, + "loss": 0.1098, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06903824955224991, + "step": 1275, + "valid_targets_mean": 1820.0, + "valid_targets_min": 680 + }, + { + "epoch": 1.4482173174872666, + "grad_norm": 0.43224278596513105, + "learning_rate": 3.862971638988774e-05, + "loss": 0.1441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06648287177085876, + "step": 1280, + "valid_targets_mean": 3371.9, + "valid_targets_min": 640 + }, + { + "epoch": 1.4538766270514998, + "grad_norm": 0.27648092143925773, + "learning_rate": 3.860912083107856e-05, + "loss": 0.1237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03397554159164429, + "step": 1285, + "valid_targets_mean": 3327.0, + "valid_targets_min": 929 + }, + { + "epoch": 1.4595359366157328, + "grad_norm": 0.5125946762898674, + "learning_rate": 3.8588377221425846e-05, + "loss": 0.1375, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07937013357877731, + "step": 1290, + "valid_targets_mean": 3318.0, + "valid_targets_min": 2380 + }, + { + "epoch": 1.465195246179966, + "grad_norm": 0.32570324145205093, + "learning_rate": 3.8567485725962054e-05, + "loss": 0.1118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05373993515968323, + "step": 1295, + "valid_targets_mean": 3709.4, + "valid_targets_min": 2619 + }, + { + "epoch": 1.4708545557441992, + "grad_norm": 0.6432426263383474, + "learning_rate": 3.8546446510896196e-05, + "loss": 0.1242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07761438190937042, + "step": 1300, + "valid_targets_mean": 1574.6, + "valid_targets_min": 693 + }, + { + "epoch": 1.4765138653084324, + "grad_norm": 0.3759504861236696, + "learning_rate": 3.8525259743612504e-05, + "loss": 0.1225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06465225666761398, + "step": 1305, + "valid_targets_mean": 3885.1, + "valid_targets_min": 3129 + }, + { + "epoch": 1.4821731748726656, + "grad_norm": 0.7410415751745506, + "learning_rate": 3.850392559266912e-05, + "loss": 0.1536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0710059255361557, + "step": 1310, + "valid_targets_mean": 1611.0, + "valid_targets_min": 543 + }, + { + "epoch": 1.4878324844368986, + "grad_norm": 0.4491786380956953, + "learning_rate": 3.848244422779675e-05, + "loss": 0.1541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0697890967130661, + "step": 1315, + "valid_targets_mean": 2319.6, + "valid_targets_min": 503 + }, + { + "epoch": 1.4934917940011319, + "grad_norm": 0.4775574488067112, + "learning_rate": 3.8460815819897275e-05, + "loss": 0.1242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07620774209499359, + "step": 1320, + "valid_targets_mean": 1987.9, + "valid_targets_min": 625 + }, + { + "epoch": 1.499151103565365, + "grad_norm": 0.42587142989262655, + "learning_rate": 3.8439040541042477e-05, + "loss": 0.1193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.052210576832294464, + "step": 1325, + "valid_targets_mean": 3822.8, + "valid_targets_min": 2822 + }, + { + "epoch": 1.504810413129598, + "grad_norm": 0.37466197637926096, + "learning_rate": 3.8417118564472566e-05, + "loss": 0.1179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06935345381498337, + "step": 1330, + "valid_targets_mean": 3724.5, + "valid_targets_min": 2987 + }, + { + "epoch": 1.5104697226938315, + "grad_norm": 0.34309600816128266, + "learning_rate": 3.8395050064594886e-05, + "loss": 0.1073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04143518954515457, + "step": 1335, + "valid_targets_mean": 3022.0, + "valid_targets_min": 856 + }, + { + "epoch": 1.5161290322580645, + "grad_norm": 0.8380903918481841, + "learning_rate": 3.8372835216982474e-05, + "loss": 0.1247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06867626309394836, + "step": 1340, + "valid_targets_mean": 764.2, + "valid_targets_min": 563 + }, + { + "epoch": 1.5217883418222977, + "grad_norm": 0.39129534267372695, + "learning_rate": 3.83504741983727e-05, + "loss": 0.1203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.045590244233608246, + "step": 1345, + "valid_targets_mean": 3117.1, + "valid_targets_min": 751 + }, + { + "epoch": 1.527447651386531, + "grad_norm": 0.4709592139736282, + "learning_rate": 3.832796718666583e-05, + "loss": 0.1073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07025882601737976, + "step": 1350, + "valid_targets_mean": 3566.1, + "valid_targets_min": 980 + }, + { + "epoch": 1.533106960950764, + "grad_norm": 0.3754764647286503, + "learning_rate": 3.830531436092363e-05, + "loss": 0.1128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05809894576668739, + "step": 1355, + "valid_targets_mean": 3589.8, + "valid_targets_min": 2538 + }, + { + "epoch": 1.5387662705149971, + "grad_norm": 0.29918468907103524, + "learning_rate": 3.828251590136795e-05, + "loss": 0.1209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0431993193924427, + "step": 1360, + "valid_targets_mean": 4182.8, + "valid_targets_min": 485 + }, + { + "epoch": 1.5444255800792304, + "grad_norm": 0.2833736858661739, + "learning_rate": 3.8259571989379256e-05, + "loss": 0.1016, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04630671441555023, + "step": 1365, + "valid_targets_mean": 4832.8, + "valid_targets_min": 2551 + }, + { + "epoch": 1.5500848896434634, + "grad_norm": 0.3338178626223724, + "learning_rate": 3.8236482807495214e-05, + "loss": 0.1132, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05794106796383858, + "step": 1370, + "valid_targets_mean": 4733.6, + "valid_targets_min": 3365 + }, + { + "epoch": 1.5557441992076968, + "grad_norm": 0.504798476371182, + "learning_rate": 3.8213248539409236e-05, + "loss": 0.116, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07776099443435669, + "step": 1375, + "valid_targets_mean": 2954.4, + "valid_targets_min": 872 + }, + { + "epoch": 1.5614035087719298, + "grad_norm": 0.39332659782776147, + "learning_rate": 3.8189869369969016e-05, + "loss": 0.2146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.058538392186164856, + "step": 1380, + "valid_targets_mean": 2590.9, + "valid_targets_min": 1015 + }, + { + "epoch": 1.567062818336163, + "grad_norm": 0.4071498726340932, + "learning_rate": 3.816634548517505e-05, + "loss": 0.1099, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0820421576499939, + "step": 1385, + "valid_targets_mean": 2342.4, + "valid_targets_min": 967 + }, + { + "epoch": 1.5727221279003962, + "grad_norm": 0.34503766933511393, + "learning_rate": 3.814267707217917e-05, + "loss": 0.1131, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05009127035737038, + "step": 1390, + "valid_targets_mean": 3335.1, + "valid_targets_min": 886 + }, + { + "epoch": 1.5783814374646292, + "grad_norm": 0.49085271652953166, + "learning_rate": 3.8118864319283025e-05, + "loss": 0.1541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1052684485912323, + "step": 1395, + "valid_targets_mean": 5394.5, + "valid_targets_min": 3513 + }, + { + "epoch": 1.5840407470288624, + "grad_norm": 0.3975070541142933, + "learning_rate": 3.809490741593665e-05, + "loss": 0.2179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1035846471786499, + "step": 1400, + "valid_targets_mean": 7348.0, + "valid_targets_min": 4327 + }, + { + "epoch": 1.5897000565930957, + "grad_norm": 0.42605183941258545, + "learning_rate": 3.807080655273689e-05, + "loss": 0.2106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10495330393314362, + "step": 1405, + "valid_targets_mean": 6346.0, + "valid_targets_min": 4480 + }, + { + "epoch": 1.5953593661573287, + "grad_norm": 0.4098843199819319, + "learning_rate": 3.8046561921425895e-05, + "loss": 0.2015, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10180170834064484, + "step": 1410, + "valid_targets_mean": 6343.4, + "valid_targets_min": 4620 + }, + { + "epoch": 1.601018675721562, + "grad_norm": 0.41401813751759137, + "learning_rate": 3.802217371488964e-05, + "loss": 0.1982, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11393437534570694, + "step": 1415, + "valid_targets_mean": 7255.5, + "valid_targets_min": 4873 + }, + { + "epoch": 1.606677985285795, + "grad_norm": 0.4053213144145575, + "learning_rate": 3.799764212715633e-05, + "loss": 0.215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12544777989387512, + "step": 1420, + "valid_targets_mean": 7472.5, + "valid_targets_min": 4229 + }, + { + "epoch": 1.6123372948500283, + "grad_norm": 0.4481680933994446, + "learning_rate": 3.7972967353394906e-05, + "loss": 0.2107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11408184468746185, + "step": 1425, + "valid_targets_mean": 5988.0, + "valid_targets_min": 4279 + }, + { + "epoch": 1.6179966044142615, + "grad_norm": 0.4013637757195013, + "learning_rate": 3.794814958991346e-05, + "loss": 0.1988, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08839428424835205, + "step": 1430, + "valid_targets_mean": 5757.6, + "valid_targets_min": 4542 + }, + { + "epoch": 1.6236559139784945, + "grad_norm": 0.5460721320853573, + "learning_rate": 3.792318903415769e-05, + "loss": 0.2215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09095409512519836, + "step": 1435, + "valid_targets_mean": 2177.2, + "valid_targets_min": 1473 + }, + { + "epoch": 1.629315223542728, + "grad_norm": 0.38132411865496363, + "learning_rate": 3.789808588470932e-05, + "loss": 0.2017, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09979454427957535, + "step": 1440, + "valid_targets_mean": 6128.4, + "valid_targets_min": 5332 + }, + { + "epoch": 1.634974533106961, + "grad_norm": 0.4021534938140799, + "learning_rate": 3.787284034128453e-05, + "loss": 0.2026, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10964792221784592, + "step": 1445, + "valid_targets_mean": 5782.4, + "valid_targets_min": 4612 + }, + { + "epoch": 1.6406338426711942, + "grad_norm": 0.41280595181068663, + "learning_rate": 3.784745260473235e-05, + "loss": 0.2115, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10620203614234924, + "step": 1450, + "valid_targets_mean": 6704.2, + "valid_targets_min": 4491 + }, + { + "epoch": 1.6462931522354274, + "grad_norm": 0.42243167940800497, + "learning_rate": 3.782192287703309e-05, + "loss": 0.2016, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09460029751062393, + "step": 1455, + "valid_targets_mean": 6032.8, + "valid_targets_min": 4268 + }, + { + "epoch": 1.6519524617996604, + "grad_norm": 0.4632810068092794, + "learning_rate": 3.7796251361296695e-05, + "loss": 0.1938, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09115828573703766, + "step": 1460, + "valid_targets_mean": 6025.0, + "valid_targets_min": 4787 + }, + { + "epoch": 1.6576117713638936, + "grad_norm": 0.37677069362563936, + "learning_rate": 3.777043826176117e-05, + "loss": 0.1833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08965925872325897, + "step": 1465, + "valid_targets_mean": 6030.5, + "valid_targets_min": 5052 + }, + { + "epoch": 1.6632710809281268, + "grad_norm": 0.7634583360643158, + "learning_rate": 3.7744483783790924e-05, + "loss": 0.1898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09772264957427979, + "step": 1470, + "valid_targets_mean": 6500.6, + "valid_targets_min": 4664 + }, + { + "epoch": 1.6689303904923598, + "grad_norm": 0.4190566877617691, + "learning_rate": 3.771838813387516e-05, + "loss": 0.2004, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08660245686769485, + "step": 1475, + "valid_targets_mean": 5990.5, + "valid_targets_min": 4211 + }, + { + "epoch": 1.6745897000565932, + "grad_norm": 0.43553267639720167, + "learning_rate": 3.7692151519626196e-05, + "loss": 0.2001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09539982676506042, + "step": 1480, + "valid_targets_mean": 5844.5, + "valid_targets_min": 3898 + }, + { + "epoch": 1.6802490096208262, + "grad_norm": 0.3808590147998363, + "learning_rate": 3.766577414977786e-05, + "loss": 0.1795, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08017526566982269, + "step": 1485, + "valid_targets_mean": 5826.0, + "valid_targets_min": 4800 + }, + { + "epoch": 1.6859083191850595, + "grad_norm": 0.37139397588184625, + "learning_rate": 3.763925623418379e-05, + "loss": 0.1785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08735372871160507, + "step": 1490, + "valid_targets_mean": 7745.9, + "valid_targets_min": 6651 + }, + { + "epoch": 1.6915676287492927, + "grad_norm": 0.4191773916247659, + "learning_rate": 3.7612597983815797e-05, + "loss": 0.2018, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11389439553022385, + "step": 1495, + "valid_targets_mean": 6058.4, + "valid_targets_min": 3477 + }, + { + "epoch": 1.6972269383135257, + "grad_norm": 0.4133937352156518, + "learning_rate": 3.7585799610762166e-05, + "loss": 0.2188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10912871360778809, + "step": 1500, + "valid_targets_mean": 5174.0, + "valid_targets_min": 965 + }, + { + "epoch": 1.7028862478777589, + "grad_norm": 0.4120100472086927, + "learning_rate": 3.755886132822596e-05, + "loss": 0.1858, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10974337160587311, + "step": 1505, + "valid_targets_mean": 6904.5, + "valid_targets_min": 5226 + }, + { + "epoch": 1.708545557441992, + "grad_norm": 0.4132924563269811, + "learning_rate": 3.753178335052335e-05, + "loss": 0.1953, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09370157122612, + "step": 1510, + "valid_targets_mean": 6807.5, + "valid_targets_min": 4237 + }, + { + "epoch": 1.714204867006225, + "grad_norm": 0.4360013599921074, + "learning_rate": 3.750456589308189e-05, + "loss": 0.2115, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11260436475276947, + "step": 1515, + "valid_targets_mean": 5527.6, + "valid_targets_min": 3761 + }, + { + "epoch": 1.7198641765704585, + "grad_norm": 0.3869642916804208, + "learning_rate": 3.7477209172438824e-05, + "loss": 0.2276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12241595983505249, + "step": 1520, + "valid_targets_mean": 7231.9, + "valid_targets_min": 4993 + }, + { + "epoch": 1.7255234861346915, + "grad_norm": 0.43344417263376994, + "learning_rate": 3.744971340623932e-05, + "loss": 0.223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10435118526220322, + "step": 1525, + "valid_targets_mean": 6576.1, + "valid_targets_min": 5199 + }, + { + "epoch": 1.7311827956989247, + "grad_norm": 0.43576627273847, + "learning_rate": 3.74220788132348e-05, + "loss": 0.1997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11716549843549728, + "step": 1530, + "valid_targets_mean": 7336.0, + "valid_targets_min": 4534 + }, + { + "epoch": 1.736842105263158, + "grad_norm": 0.4299013929408139, + "learning_rate": 3.739430561328116e-05, + "loss": 0.2015, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09132295846939087, + "step": 1535, + "valid_targets_mean": 6389.6, + "valid_targets_min": 4751 + }, + { + "epoch": 1.742501414827391, + "grad_norm": 0.41927799631687634, + "learning_rate": 3.736639402733699e-05, + "loss": 0.2011, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08794035762548447, + "step": 1540, + "valid_targets_mean": 5551.6, + "valid_targets_min": 3888 + }, + { + "epoch": 1.7481607243916242, + "grad_norm": 0.43051456261992793, + "learning_rate": 3.733834427746192e-05, + "loss": 0.2085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11216922849416733, + "step": 1545, + "valid_targets_mean": 6779.5, + "valid_targets_min": 4455 + }, + { + "epoch": 1.7538200339558574, + "grad_norm": 0.37161737302703196, + "learning_rate": 3.7310156586814736e-05, + "loss": 0.1964, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10933095961809158, + "step": 1550, + "valid_targets_mean": 7067.5, + "valid_targets_min": 5230 + }, + { + "epoch": 1.7594793435200904, + "grad_norm": 0.3789176263366543, + "learning_rate": 3.7281831179651674e-05, + "loss": 0.2071, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08778786659240723, + "step": 1555, + "valid_targets_mean": 7394.4, + "valid_targets_min": 5668 + }, + { + "epoch": 1.7651386530843238, + "grad_norm": 0.4549485161473529, + "learning_rate": 3.725336828132462e-05, + "loss": 0.2043, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1037561446428299, + "step": 1560, + "valid_targets_mean": 6609.0, + "valid_targets_min": 4871 + }, + { + "epoch": 1.7707979626485568, + "grad_norm": 0.3545247802982652, + "learning_rate": 3.722476811827931e-05, + "loss": 0.1886, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11487674713134766, + "step": 1565, + "valid_targets_mean": 8925.8, + "valid_targets_min": 6195 + }, + { + "epoch": 1.77645727221279, + "grad_norm": 0.36277353523285893, + "learning_rate": 3.719603091805354e-05, + "loss": 0.1767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08930060267448425, + "step": 1570, + "valid_targets_mean": 7189.8, + "valid_targets_min": 3533 + }, + { + "epoch": 1.7821165817770233, + "grad_norm": 0.45227067238258667, + "learning_rate": 3.716715690927534e-05, + "loss": 0.2059, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11415096372365952, + "step": 1575, + "valid_targets_mean": 7004.8, + "valid_targets_min": 5199 + }, + { + "epoch": 1.7877758913412563, + "grad_norm": 0.45897291873533036, + "learning_rate": 3.713814632166117e-05, + "loss": 0.2071, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10266570001840591, + "step": 1580, + "valid_targets_mean": 5626.2, + "valid_targets_min": 4923 + }, + { + "epoch": 1.7934352009054897, + "grad_norm": 0.40908930308574387, + "learning_rate": 3.7108999386014094e-05, + "loss": 0.1984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08395086973905563, + "step": 1585, + "valid_targets_mean": 6766.5, + "valid_targets_min": 4920 + }, + { + "epoch": 1.7990945104697227, + "grad_norm": 0.40410711804240085, + "learning_rate": 3.707971633422192e-05, + "loss": 0.1966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1279432773590088, + "step": 1590, + "valid_targets_mean": 7549.5, + "valid_targets_min": 5551 + }, + { + "epoch": 1.804753820033956, + "grad_norm": 0.3866717411123608, + "learning_rate": 3.705029739925539e-05, + "loss": 0.1825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08999251574277878, + "step": 1595, + "valid_targets_mean": 6716.9, + "valid_targets_min": 4685 + }, + { + "epoch": 1.8104131295981891, + "grad_norm": 0.5079107039691413, + "learning_rate": 3.702074281516629e-05, + "loss": 0.1993, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08725505322217941, + "step": 1600, + "valid_targets_mean": 5615.0, + "valid_targets_min": 4875 + }, + { + "epoch": 1.8160724391624221, + "grad_norm": 0.3927449142097747, + "learning_rate": 3.699105281708562e-05, + "loss": 0.2006, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08934473991394043, + "step": 1605, + "valid_targets_mean": 6375.9, + "valid_targets_min": 4433 + }, + { + "epoch": 1.8217317487266553, + "grad_norm": 0.47740533561150034, + "learning_rate": 3.69612276412217e-05, + "loss": 0.1913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09106989204883575, + "step": 1610, + "valid_targets_mean": 5845.8, + "valid_targets_min": 4540 + }, + { + "epoch": 1.8273910582908885, + "grad_norm": 0.42177388710145675, + "learning_rate": 3.693126752485833e-05, + "loss": 0.2032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12526273727416992, + "step": 1615, + "valid_targets_mean": 7222.8, + "valid_targets_min": 4056 + }, + { + "epoch": 1.8330503678551215, + "grad_norm": 0.4068620675561105, + "learning_rate": 3.6901172706352804e-05, + "loss": 0.1925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07828503102064133, + "step": 1620, + "valid_targets_mean": 5307.6, + "valid_targets_min": 4499 + }, + { + "epoch": 1.838709677419355, + "grad_norm": 0.38269369152928895, + "learning_rate": 3.687094342513416e-05, + "loss": 0.1685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08113433420658112, + "step": 1625, + "valid_targets_mean": 6030.1, + "valid_targets_min": 5268 + }, + { + "epoch": 1.844368986983588, + "grad_norm": 0.3863151468513951, + "learning_rate": 3.6840579921701155e-05, + "loss": 0.1732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0900208055973053, + "step": 1630, + "valid_targets_mean": 6520.6, + "valid_targets_min": 5074 + }, + { + "epoch": 1.8500282965478212, + "grad_norm": 0.37476299112538597, + "learning_rate": 3.68100824376204e-05, + "loss": 0.1815, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08535371720790863, + "step": 1635, + "valid_targets_mean": 6091.6, + "valid_targets_min": 5221 + }, + { + "epoch": 1.8556876061120544, + "grad_norm": 0.4091994709243003, + "learning_rate": 3.6779451215524425e-05, + "loss": 0.1925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09082099795341492, + "step": 1640, + "valid_targets_mean": 5409.5, + "valid_targets_min": 4642 + }, + { + "epoch": 1.8613469156762874, + "grad_norm": 0.4830949072902286, + "learning_rate": 3.6748686499109784e-05, + "loss": 0.2177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10957798361778259, + "step": 1645, + "valid_targets_mean": 4272.0, + "valid_targets_min": 3222 + }, + { + "epoch": 1.8670062252405206, + "grad_norm": 0.4151545714626815, + "learning_rate": 3.6717788533135056e-05, + "loss": 0.2263, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09137696027755737, + "step": 1650, + "valid_targets_mean": 6490.6, + "valid_targets_min": 4371 + }, + { + "epoch": 1.8726655348047538, + "grad_norm": 0.44885692213614614, + "learning_rate": 3.6686757563418945e-05, + "loss": 0.1876, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10351642966270447, + "step": 1655, + "valid_targets_mean": 5688.4, + "valid_targets_min": 3689 + }, + { + "epoch": 1.8783248443689868, + "grad_norm": 0.4200752558338508, + "learning_rate": 3.665559383683832e-05, + "loss": 0.197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1036931574344635, + "step": 1660, + "valid_targets_mean": 6685.9, + "valid_targets_min": 4281 + }, + { + "epoch": 1.8839841539332203, + "grad_norm": 0.42397350541058904, + "learning_rate": 3.6624297601326205e-05, + "loss": 0.198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11201959103345871, + "step": 1665, + "valid_targets_mean": 6344.2, + "valid_targets_min": 4844 + }, + { + "epoch": 1.8896434634974533, + "grad_norm": 0.4080030610770839, + "learning_rate": 3.659286910586988e-05, + "loss": 0.2112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09051713347434998, + "step": 1670, + "valid_targets_mean": 5900.6, + "valid_targets_min": 4549 + }, + { + "epoch": 1.8953027730616865, + "grad_norm": 0.4160628972234572, + "learning_rate": 3.656130860050883e-05, + "loss": 0.196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08832396566867828, + "step": 1675, + "valid_targets_mean": 4921.4, + "valid_targets_min": 2461 + }, + { + "epoch": 1.9009620826259197, + "grad_norm": 0.39791164334313245, + "learning_rate": 3.652961633633282e-05, + "loss": 0.2009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11802472174167633, + "step": 1680, + "valid_targets_mean": 6683.8, + "valid_targets_min": 4563 + }, + { + "epoch": 1.9066213921901527, + "grad_norm": 0.3566026358241044, + "learning_rate": 3.649779256547984e-05, + "loss": 0.1944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08277365565299988, + "step": 1685, + "valid_targets_mean": 6517.6, + "valid_targets_min": 5192 + }, + { + "epoch": 1.912280701754386, + "grad_norm": 0.39067941057281313, + "learning_rate": 3.6465837541134114e-05, + "loss": 0.179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08514373749494553, + "step": 1690, + "valid_targets_mean": 5787.6, + "valid_targets_min": 5186 + }, + { + "epoch": 1.9179400113186191, + "grad_norm": 0.407667620077915, + "learning_rate": 3.643375151752414e-05, + "loss": 0.1932, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08429624140262604, + "step": 1695, + "valid_targets_mean": 5163.2, + "valid_targets_min": 3786 + }, + { + "epoch": 1.9235993208828521, + "grad_norm": 0.4152827211492994, + "learning_rate": 3.6401534749920566e-05, + "loss": 0.2364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10867131501436234, + "step": 1700, + "valid_targets_mean": 6142.8, + "valid_targets_min": 4560 + }, + { + "epoch": 1.9292586304470856, + "grad_norm": 0.39474558824207884, + "learning_rate": 3.636918749463426e-05, + "loss": 0.185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10353638231754303, + "step": 1705, + "valid_targets_mean": 5659.8, + "valid_targets_min": 3952 + }, + { + "epoch": 1.9349179400113186, + "grad_norm": 0.4330875696320046, + "learning_rate": 3.633671000901422e-05, + "loss": 0.1984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11550572514533997, + "step": 1710, + "valid_targets_mean": 6811.5, + "valid_targets_min": 5134 + }, + { + "epoch": 1.9405772495755518, + "grad_norm": 0.4429867842820959, + "learning_rate": 3.63041025514455e-05, + "loss": 0.2038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10658969730138779, + "step": 1715, + "valid_targets_mean": 6538.6, + "valid_targets_min": 3762 + }, + { + "epoch": 1.946236559139785, + "grad_norm": 0.44278076944927575, + "learning_rate": 3.627136538134723e-05, + "loss": 0.1768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08601470291614532, + "step": 1720, + "valid_targets_mean": 6285.8, + "valid_targets_min": 4726 + }, + { + "epoch": 1.951895868704018, + "grad_norm": 0.47706316548142697, + "learning_rate": 3.623849875917049e-05, + "loss": 0.1859, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10423313081264496, + "step": 1725, + "valid_targets_mean": 6054.1, + "valid_targets_min": 3724 + }, + { + "epoch": 1.9575551782682514, + "grad_norm": 1.2427803871821272, + "learning_rate": 3.620550294639625e-05, + "loss": 0.1719, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09197145700454712, + "step": 1730, + "valid_targets_mean": 6452.4, + "valid_targets_min": 5578 + }, + { + "epoch": 1.9632144878324844, + "grad_norm": 0.37933050898977705, + "learning_rate": 3.6172378205533316e-05, + "loss": 0.1943, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09184758365154266, + "step": 1735, + "valid_targets_mean": 6827.4, + "valid_targets_min": 5795 + }, + { + "epoch": 1.9688737973967176, + "grad_norm": 0.3713666814054497, + "learning_rate": 3.613912480011621e-05, + "loss": 0.1818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07646412402391434, + "step": 1740, + "valid_targets_mean": 6744.8, + "valid_targets_min": 5268 + }, + { + "epoch": 1.9745331069609509, + "grad_norm": 0.37046500322729714, + "learning_rate": 3.610574299470308e-05, + "loss": 0.1863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08873957395553589, + "step": 1745, + "valid_targets_mean": 6928.8, + "valid_targets_min": 4920 + }, + { + "epoch": 1.9801924165251839, + "grad_norm": 0.347080465291547, + "learning_rate": 3.6072233054873634e-05, + "loss": 0.1925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07456870377063751, + "step": 1750, + "valid_targets_mean": 7381.1, + "valid_targets_min": 5243 + }, + { + "epoch": 1.985851726089417, + "grad_norm": 0.3875616108130273, + "learning_rate": 3.6038595247226946e-05, + "loss": 0.1783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09468382596969604, + "step": 1755, + "valid_targets_mean": 6154.6, + "valid_targets_min": 4794 + }, + { + "epoch": 1.9915110356536503, + "grad_norm": 0.5613872353671522, + "learning_rate": 3.600482983937943e-05, + "loss": 0.1979, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09273573756217957, + "step": 1760, + "valid_targets_mean": 6188.1, + "valid_targets_min": 5168 + }, + { + "epoch": 1.9971703452178833, + "grad_norm": 0.46965060288835536, + "learning_rate": 3.597093709996263e-05, + "loss": 0.2075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.124471016228199, + "step": 1765, + "valid_targets_mean": 5461.5, + "valid_targets_min": 4156 + }, + { + "epoch": 2.0022637238256933, + "grad_norm": 0.5343589104355505, + "learning_rate": 3.593691729862114e-05, + "loss": 0.1786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06375987082719803, + "step": 1770, + "valid_targets_mean": 2736.0, + "valid_targets_min": 876 + }, + { + "epoch": 2.0079230333899263, + "grad_norm": 0.5519379811681631, + "learning_rate": 3.5902770706010414e-05, + "loss": 0.1403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.052028506994247437, + "step": 1775, + "valid_targets_mean": 1472.5, + "valid_targets_min": 324 + }, + { + "epoch": 2.0135823429541597, + "grad_norm": 0.4039268628622655, + "learning_rate": 3.586849759379466e-05, + "loss": 0.1091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06458728015422821, + "step": 1780, + "valid_targets_mean": 2699.9, + "valid_targets_min": 1064 + }, + { + "epoch": 2.0192416525183927, + "grad_norm": 0.41595399584522014, + "learning_rate": 3.583409823464464e-05, + "loss": 0.1063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05470968037843704, + "step": 1785, + "valid_targets_mean": 3137.0, + "valid_targets_min": 710 + }, + { + "epoch": 2.0249009620826257, + "grad_norm": 0.32856953624648133, + "learning_rate": 3.5799572902235506e-05, + "loss": 0.1223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0377882644534111, + "step": 1790, + "valid_targets_mean": 3472.9, + "valid_targets_min": 1223 + }, + { + "epoch": 2.030560271646859, + "grad_norm": 0.44520318976957635, + "learning_rate": 3.576492187124465e-05, + "loss": 0.1087, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05933263152837753, + "step": 1795, + "valid_targets_mean": 3326.0, + "valid_targets_min": 1431 + }, + { + "epoch": 2.036219581211092, + "grad_norm": 0.6748590008746032, + "learning_rate": 3.5730145417349486e-05, + "loss": 0.1455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06660880148410797, + "step": 1800, + "valid_targets_mean": 1161.2, + "valid_targets_min": 559 + }, + { + "epoch": 2.0418788907753256, + "grad_norm": 0.3459296124867082, + "learning_rate": 3.569524381722527e-05, + "loss": 0.1411, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05327014625072479, + "step": 1805, + "valid_targets_mean": 3598.2, + "valid_targets_min": 1623 + }, + { + "epoch": 2.0475382003395586, + "grad_norm": 0.42589163559071397, + "learning_rate": 3.5660217348542905e-05, + "loss": 0.1204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.061662595719099045, + "step": 1810, + "valid_targets_mean": 3178.2, + "valid_targets_min": 846 + }, + { + "epoch": 2.0531975099037916, + "grad_norm": 0.46588853281466014, + "learning_rate": 3.562506628996672e-05, + "loss": 0.1612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10813078284263611, + "step": 1815, + "valid_targets_mean": 3622.6, + "valid_targets_min": 1949 + }, + { + "epoch": 2.058856819468025, + "grad_norm": 0.3260360817250823, + "learning_rate": 3.558979092115227e-05, + "loss": 0.0881, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04911057651042938, + "step": 1820, + "valid_targets_mean": 3344.9, + "valid_targets_min": 764 + }, + { + "epoch": 2.064516129032258, + "grad_norm": 0.4572750079822724, + "learning_rate": 3.555439152274408e-05, + "loss": 0.1075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05797174200415611, + "step": 1825, + "valid_targets_mean": 2391.6, + "valid_targets_min": 776 + }, + { + "epoch": 2.0701754385964914, + "grad_norm": 0.39297056511962264, + "learning_rate": 3.551886837637346e-05, + "loss": 0.1085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.058884136378765106, + "step": 1830, + "valid_targets_mean": 2818.5, + "valid_targets_min": 754 + }, + { + "epoch": 2.0758347481607244, + "grad_norm": 0.4654746519862881, + "learning_rate": 3.548322176465622e-05, + "loss": 0.1306, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06505589187145233, + "step": 1835, + "valid_targets_mean": 1646.6, + "valid_targets_min": 635 + }, + { + "epoch": 2.0814940577249574, + "grad_norm": 0.3796269751062797, + "learning_rate": 3.544745197119042e-05, + "loss": 0.1245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.056274592876434326, + "step": 1840, + "valid_targets_mean": 3374.8, + "valid_targets_min": 1862 + }, + { + "epoch": 2.087153367289191, + "grad_norm": 0.3970755533915738, + "learning_rate": 3.541155928055418e-05, + "loss": 0.1094, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06704463064670563, + "step": 1845, + "valid_targets_mean": 2837.4, + "valid_targets_min": 574 + }, + { + "epoch": 2.092812676853424, + "grad_norm": 0.45205308230485036, + "learning_rate": 3.537554397830331e-05, + "loss": 0.1054, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.047444507479667664, + "step": 1850, + "valid_targets_mean": 2618.4, + "valid_targets_min": 523 + }, + { + "epoch": 2.098471986417657, + "grad_norm": 0.40906046372009836, + "learning_rate": 3.533940635096915e-05, + "loss": 0.1071, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06280224770307541, + "step": 1855, + "valid_targets_mean": 3548.9, + "valid_targets_min": 2189 + }, + { + "epoch": 2.1041312959818903, + "grad_norm": 0.3594281659067044, + "learning_rate": 3.530314668605621e-05, + "loss": 0.1152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04078724980354309, + "step": 1860, + "valid_targets_mean": 3564.6, + "valid_targets_min": 827 + }, + { + "epoch": 2.1097906055461233, + "grad_norm": 0.4405119633389643, + "learning_rate": 3.5266765272039895e-05, + "loss": 0.1298, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06051761656999588, + "step": 1865, + "valid_targets_mean": 1936.6, + "valid_targets_min": 597 + }, + { + "epoch": 2.1154499151103567, + "grad_norm": 0.4699306852938723, + "learning_rate": 3.523026239836426e-05, + "loss": 0.1247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.053375493735075, + "step": 1870, + "valid_targets_mean": 1747.5, + "valid_targets_min": 701 + }, + { + "epoch": 2.1211092246745897, + "grad_norm": 0.5009994914539639, + "learning_rate": 3.5193638355439635e-05, + "loss": 0.1494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1392790824174881, + "step": 1875, + "valid_targets_mean": 3329.6, + "valid_targets_min": 1652 + }, + { + "epoch": 2.1267685342388227, + "grad_norm": 0.3944392507851324, + "learning_rate": 3.515689343464038e-05, + "loss": 0.1293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.054778940975666046, + "step": 1880, + "valid_targets_mean": 3117.1, + "valid_targets_min": 786 + }, + { + "epoch": 2.132427843803056, + "grad_norm": 0.5339847438916009, + "learning_rate": 3.512002792830252e-05, + "loss": 0.107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06564757972955704, + "step": 1885, + "valid_targets_mean": 2574.4, + "valid_targets_min": 971 + }, + { + "epoch": 2.138087153367289, + "grad_norm": 0.39951693830902274, + "learning_rate": 3.508304212972145e-05, + "loss": 0.1494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.060700491070747375, + "step": 1890, + "valid_targets_mean": 3320.0, + "valid_targets_min": 2610 + }, + { + "epoch": 2.143746462931522, + "grad_norm": 0.3945705461968114, + "learning_rate": 3.504593633314957e-05, + "loss": 0.1294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.041278015822172165, + "step": 1895, + "valid_targets_mean": 2580.6, + "valid_targets_min": 753 + }, + { + "epoch": 2.1494057724957556, + "grad_norm": 0.505040296343978, + "learning_rate": 3.500871083379398e-05, + "loss": 0.1178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05414751172065735, + "step": 1900, + "valid_targets_mean": 1537.1, + "valid_targets_min": 619 + }, + { + "epoch": 2.1550650820599886, + "grad_norm": 0.501915030556122, + "learning_rate": 3.497136592781411e-05, + "loss": 0.3208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13047850131988525, + "step": 1905, + "valid_targets_mean": 2271.2, + "valid_targets_min": 987 + }, + { + "epoch": 2.160724391624222, + "grad_norm": 0.5774470883261211, + "learning_rate": 3.493390191231937e-05, + "loss": 0.1346, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06460921466350555, + "step": 1910, + "valid_targets_mean": 1351.5, + "valid_targets_min": 329 + }, + { + "epoch": 2.166383701188455, + "grad_norm": 0.47231456069415856, + "learning_rate": 3.4896319085366764e-05, + "loss": 0.1191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.054049719125032425, + "step": 1915, + "valid_targets_mean": 1553.9, + "valid_targets_min": 608 + }, + { + "epoch": 2.172043010752688, + "grad_norm": 0.596868119862558, + "learning_rate": 3.485861774595857e-05, + "loss": 0.2209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07800990343093872, + "step": 1920, + "valid_targets_mean": 3645.6, + "valid_targets_min": 873 + }, + { + "epoch": 2.1777023203169215, + "grad_norm": 0.2824839933919901, + "learning_rate": 3.482079819403991e-05, + "loss": 0.1018, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02868354693055153, + "step": 1925, + "valid_targets_mean": 4720.6, + "valid_targets_min": 3529 + }, + { + "epoch": 2.1833616298811545, + "grad_norm": 0.38361826489016726, + "learning_rate": 3.4782860730496385e-05, + "loss": 0.1185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06481634825468063, + "step": 1930, + "valid_targets_mean": 4437.6, + "valid_targets_min": 2336 + }, + { + "epoch": 2.1890209394453874, + "grad_norm": 0.40780977658908407, + "learning_rate": 3.474480565715168e-05, + "loss": 0.114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05100560560822487, + "step": 1935, + "valid_targets_mean": 3533.1, + "valid_targets_min": 1226 + }, + { + "epoch": 2.194680249009621, + "grad_norm": 0.3656835709426523, + "learning_rate": 3.470663327676517e-05, + "loss": 0.108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05036871135234833, + "step": 1940, + "valid_targets_mean": 3299.2, + "valid_targets_min": 798 + }, + { + "epoch": 2.200339558573854, + "grad_norm": 0.4512957818695427, + "learning_rate": 3.466834389302951e-05, + "loss": 0.1019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04307692497968674, + "step": 1945, + "valid_targets_mean": 1836.9, + "valid_targets_min": 723 + }, + { + "epoch": 2.2059988681380873, + "grad_norm": 0.3946416768429036, + "learning_rate": 3.4629937810568185e-05, + "loss": 0.1035, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05205557867884636, + "step": 1950, + "valid_targets_mean": 3316.0, + "valid_targets_min": 898 + }, + { + "epoch": 2.2116581777023203, + "grad_norm": 0.32804938512357273, + "learning_rate": 3.459141533493315e-05, + "loss": 0.0984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.039048101752996445, + "step": 1955, + "valid_targets_mean": 4027.2, + "valid_targets_min": 3278 + }, + { + "epoch": 2.2173174872665533, + "grad_norm": 0.45209680916546285, + "learning_rate": 3.455277677260231e-05, + "loss": 0.1068, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04982101917266846, + "step": 1960, + "valid_targets_mean": 2052.8, + "valid_targets_min": 739 + }, + { + "epoch": 2.2229767968307867, + "grad_norm": 0.492249954877648, + "learning_rate": 3.451402243097721e-05, + "loss": 0.1442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.056385502219200134, + "step": 1965, + "valid_targets_mean": 1574.9, + "valid_targets_min": 538 + }, + { + "epoch": 2.2286361063950197, + "grad_norm": 0.3964539132387015, + "learning_rate": 3.4475152618380456e-05, + "loss": 0.0869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.041245199739933014, + "step": 1970, + "valid_targets_mean": 1956.9, + "valid_targets_min": 654 + }, + { + "epoch": 2.234295415959253, + "grad_norm": 0.4423259353420391, + "learning_rate": 3.443616764405334e-05, + "loss": 0.1058, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06300957500934601, + "step": 1975, + "valid_targets_mean": 2735.4, + "valid_targets_min": 930 + }, + { + "epoch": 2.239954725523486, + "grad_norm": 0.42042784263990535, + "learning_rate": 3.4397067818153345e-05, + "loss": 0.094, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05541990324854851, + "step": 1980, + "valid_targets_mean": 2965.0, + "valid_targets_min": 684 + }, + { + "epoch": 2.245614035087719, + "grad_norm": 0.3823237810459968, + "learning_rate": 3.435785345175173e-05, + "loss": 0.1549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.055711839348077774, + "step": 1985, + "valid_targets_mean": 3344.6, + "valid_targets_min": 999 + }, + { + "epoch": 2.2512733446519526, + "grad_norm": 0.49579247528646964, + "learning_rate": 3.431852485683098e-05, + "loss": 0.1186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06579576432704926, + "step": 1990, + "valid_targets_mean": 2445.4, + "valid_targets_min": 696 + }, + { + "epoch": 2.2569326542161856, + "grad_norm": 0.3744697004230511, + "learning_rate": 3.4279082346282396e-05, + "loss": 0.1303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05816779285669327, + "step": 1995, + "valid_targets_mean": 3514.9, + "valid_targets_min": 1068 + }, + { + "epoch": 2.2625919637804186, + "grad_norm": 0.43769701229555164, + "learning_rate": 3.423952623390352e-05, + "loss": 0.1144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04952312633395195, + "step": 2000, + "valid_targets_mean": 1602.8, + "valid_targets_min": 536 + }, + { + "epoch": 2.268251273344652, + "grad_norm": 0.4972723099721578, + "learning_rate": 3.419985683439574e-05, + "loss": 0.1645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1218375414609909, + "step": 2005, + "valid_targets_mean": 3568.8, + "valid_targets_min": 1507 + }, + { + "epoch": 2.273910582908885, + "grad_norm": 0.40876282439264494, + "learning_rate": 3.416007446336172e-05, + "loss": 0.1187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05623714625835419, + "step": 2010, + "valid_targets_mean": 3292.2, + "valid_targets_min": 2342 + }, + { + "epoch": 2.279569892473118, + "grad_norm": 0.575333657640531, + "learning_rate": 3.4120179437302885e-05, + "loss": 0.1442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06543414294719696, + "step": 2015, + "valid_targets_mean": 1567.5, + "valid_targets_min": 687 + }, + { + "epoch": 2.2852292020373515, + "grad_norm": 0.502617446583266, + "learning_rate": 3.408017207361696e-05, + "loss": 0.1037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03619558364152908, + "step": 2020, + "valid_targets_mean": 1145.4, + "valid_targets_min": 449 + }, + { + "epoch": 2.2908885116015845, + "grad_norm": 0.3303411430626945, + "learning_rate": 3.4040052690595376e-05, + "loss": 0.1131, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04491014778614044, + "step": 2025, + "valid_targets_mean": 3191.0, + "valid_targets_min": 658 + }, + { + "epoch": 2.296547821165818, + "grad_norm": 0.5931100420211858, + "learning_rate": 3.399982160742079e-05, + "loss": 0.1144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06616038084030151, + "step": 2030, + "valid_targets_mean": 1924.8, + "valid_targets_min": 577 + }, + { + "epoch": 2.302207130730051, + "grad_norm": 0.32834282624415856, + "learning_rate": 3.3959479144164515e-05, + "loss": 0.0928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.045235902070999146, + "step": 2035, + "valid_targets_mean": 3992.5, + "valid_targets_min": 726 + }, + { + "epoch": 2.3078664402942843, + "grad_norm": 0.35739919576262585, + "learning_rate": 3.3919025621783996e-05, + "loss": 0.0932, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.054038695991039276, + "step": 2040, + "valid_targets_mean": 3626.4, + "valid_targets_min": 2252 + }, + { + "epoch": 2.3135257498585173, + "grad_norm": 0.4409496519995762, + "learning_rate": 3.387846136212022e-05, + "loss": 0.1037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04206996411085129, + "step": 2045, + "valid_targets_mean": 1556.2, + "valid_targets_min": 531 + }, + { + "epoch": 2.3191850594227503, + "grad_norm": 0.42359808354879613, + "learning_rate": 3.3837786687895214e-05, + "loss": 0.1241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.053804170340299606, + "step": 2050, + "valid_targets_mean": 2410.5, + "valid_targets_min": 744 + }, + { + "epoch": 2.3248443689869838, + "grad_norm": 0.41869586549521276, + "learning_rate": 3.3797001922709416e-05, + "loss": 0.1157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.062466152012348175, + "step": 2055, + "valid_targets_mean": 3795.1, + "valid_targets_min": 3136 + }, + { + "epoch": 2.3305036785512168, + "grad_norm": 0.2961135510626377, + "learning_rate": 3.375610739103913e-05, + "loss": 0.1015, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.043240226805210114, + "step": 2060, + "valid_targets_mean": 3324.4, + "valid_targets_min": 2366 + }, + { + "epoch": 2.3361629881154498, + "grad_norm": 0.35769557911734157, + "learning_rate": 3.371510341823396e-05, + "loss": 0.0995, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03898439556360245, + "step": 2065, + "valid_targets_mean": 1668.1, + "valid_targets_min": 669 + }, + { + "epoch": 2.341822297679683, + "grad_norm": 0.4078907877168531, + "learning_rate": 3.3673990330514197e-05, + "loss": 0.1218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06470276415348053, + "step": 2070, + "valid_targets_mean": 3529.8, + "valid_targets_min": 1785 + }, + { + "epoch": 2.347481607243916, + "grad_norm": 0.3050351893984349, + "learning_rate": 3.363276845496822e-05, + "loss": 0.0898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04898000508546829, + "step": 2075, + "valid_targets_mean": 4120.1, + "valid_targets_min": 3862 + }, + { + "epoch": 2.353140916808149, + "grad_norm": 0.3895061867392029, + "learning_rate": 3.359143811954992e-05, + "loss": 0.1078, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.047017570585012436, + "step": 2080, + "valid_targets_mean": 3062.2, + "valid_targets_min": 539 + }, + { + "epoch": 2.3588002263723826, + "grad_norm": 0.40020922560661926, + "learning_rate": 3.354999965307606e-05, + "loss": 0.1118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04285983368754387, + "step": 2085, + "valid_targets_mean": 2781.5, + "valid_targets_min": 1016 + }, + { + "epoch": 2.3644595359366156, + "grad_norm": 0.6579214230944497, + "learning_rate": 3.3508453385223684e-05, + "loss": 0.1234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07328744232654572, + "step": 2090, + "valid_targets_mean": 1102.9, + "valid_targets_min": 888 + }, + { + "epoch": 2.370118845500849, + "grad_norm": 0.4484198722432637, + "learning_rate": 3.346679964652749e-05, + "loss": 0.1129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.044897790998220444, + "step": 2095, + "valid_targets_mean": 2602.5, + "valid_targets_min": 697 + }, + { + "epoch": 2.375778155065082, + "grad_norm": 0.7923891815342871, + "learning_rate": 3.342503876837718e-05, + "loss": 0.1122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06976594030857086, + "step": 2100, + "valid_targets_mean": 1592.9, + "valid_targets_min": 605 + }, + { + "epoch": 2.381437464629315, + "grad_norm": 0.44766073428303044, + "learning_rate": 3.3383171083014856e-05, + "loss": 0.1113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04511480778455734, + "step": 2105, + "valid_targets_mean": 2899.1, + "valid_targets_min": 1117 + }, + { + "epoch": 2.3870967741935485, + "grad_norm": 0.3854842899628016, + "learning_rate": 3.3341196923532336e-05, + "loss": 0.1354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04914814978837967, + "step": 2110, + "valid_targets_mean": 3376.6, + "valid_targets_min": 1388 + }, + { + "epoch": 2.3927560837577815, + "grad_norm": 0.3421245181290135, + "learning_rate": 3.329911662386855e-05, + "loss": 0.106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04689233750104904, + "step": 2115, + "valid_targets_mean": 3243.4, + "valid_targets_min": 524 + }, + { + "epoch": 2.398415393322015, + "grad_norm": 0.43526759990227604, + "learning_rate": 3.3256930518806845e-05, + "loss": 0.0968, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04915416240692139, + "step": 2120, + "valid_targets_mean": 2468.0, + "valid_targets_min": 970 + }, + { + "epoch": 2.404074702886248, + "grad_norm": 0.4716090172233965, + "learning_rate": 3.321463894397235e-05, + "loss": 0.1203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.055890556424856186, + "step": 2125, + "valid_targets_mean": 2959.6, + "valid_targets_min": 877 + }, + { + "epoch": 2.409734012450481, + "grad_norm": 0.33045623066380864, + "learning_rate": 3.317224223582927e-05, + "loss": 0.1144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04590590298175812, + "step": 2130, + "valid_targets_mean": 3886.5, + "valid_targets_min": 3251 + }, + { + "epoch": 2.4153933220147144, + "grad_norm": 0.495374000160121, + "learning_rate": 3.312974073167825e-05, + "loss": 0.0938, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05848722159862518, + "step": 2135, + "valid_targets_mean": 2695.6, + "valid_targets_min": 564 + }, + { + "epoch": 2.4210526315789473, + "grad_norm": 0.32905133925375857, + "learning_rate": 3.30871347696537e-05, + "loss": 0.1028, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05663274973630905, + "step": 2140, + "valid_targets_mean": 4404.8, + "valid_targets_min": 2555 + }, + { + "epoch": 2.4267119411431803, + "grad_norm": 0.31029857156036833, + "learning_rate": 3.3044424688721016e-05, + "loss": 0.094, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.045422300696372986, + "step": 2145, + "valid_targets_mean": 4410.5, + "valid_targets_min": 3261 + }, + { + "epoch": 2.432371250707414, + "grad_norm": 0.3994352846867125, + "learning_rate": 3.300161082867398e-05, + "loss": 0.0851, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03942389786243439, + "step": 2150, + "valid_targets_mean": 1768.5, + "valid_targets_min": 618 + }, + { + "epoch": 2.4380305602716468, + "grad_norm": 0.3063122736017613, + "learning_rate": 3.295869353013204e-05, + "loss": 0.1115, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04628031700849533, + "step": 2155, + "valid_targets_mean": 3985.8, + "valid_targets_min": 3213 + }, + { + "epoch": 2.44368986983588, + "grad_norm": 0.35734873295090847, + "learning_rate": 3.291567313453754e-05, + "loss": 0.1077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.103085957467556, + "step": 2160, + "valid_targets_mean": 3149.1, + "valid_targets_min": 1539 + }, + { + "epoch": 2.449349179400113, + "grad_norm": 0.316704339390023, + "learning_rate": 3.287254998415308e-05, + "loss": 0.1184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03836618363857269, + "step": 2165, + "valid_targets_mean": 2094.8, + "valid_targets_min": 509 + }, + { + "epoch": 2.455008488964346, + "grad_norm": 0.31838255897118733, + "learning_rate": 3.282932442205875e-05, + "loss": 0.1088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.029482107609510422, + "step": 2170, + "valid_targets_mean": 2441.4, + "valid_targets_min": 633 + }, + { + "epoch": 2.4606677985285796, + "grad_norm": 0.42563343610522253, + "learning_rate": 3.2785996792149397e-05, + "loss": 0.1305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05118153989315033, + "step": 2175, + "valid_targets_mean": 2740.9, + "valid_targets_min": 737 + }, + { + "epoch": 2.4663271080928126, + "grad_norm": 0.35110501564800306, + "learning_rate": 3.274256743913192e-05, + "loss": 0.093, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04503490775823593, + "step": 2180, + "valid_targets_mean": 2726.9, + "valid_targets_min": 492 + }, + { + "epoch": 2.471986417657046, + "grad_norm": 0.5284106993544762, + "learning_rate": 3.2699036708522486e-05, + "loss": 0.118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07250982522964478, + "step": 2185, + "valid_targets_mean": 1607.9, + "valid_targets_min": 833 + }, + { + "epoch": 2.477645727221279, + "grad_norm": 0.32838112597791635, + "learning_rate": 3.265540494664383e-05, + "loss": 0.0984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03342479467391968, + "step": 2190, + "valid_targets_mean": 3502.6, + "valid_targets_min": 923 + }, + { + "epoch": 2.483305036785512, + "grad_norm": 0.6682289383654914, + "learning_rate": 3.261167250062246e-05, + "loss": 0.1478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07084657996892929, + "step": 2195, + "valid_targets_mean": 1553.9, + "valid_targets_min": 531 + }, + { + "epoch": 2.4889643463497455, + "grad_norm": 0.5788918787085738, + "learning_rate": 3.25678397183859e-05, + "loss": 0.1371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08993472903966904, + "step": 2200, + "valid_targets_mean": 1825.6, + "valid_targets_min": 575 + }, + { + "epoch": 2.4946236559139785, + "grad_norm": 0.4137059798137688, + "learning_rate": 3.252390694865995e-05, + "loss": 0.1045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05526465177536011, + "step": 2205, + "valid_targets_mean": 3701.9, + "valid_targets_min": 2784 + }, + { + "epoch": 2.5002829654782115, + "grad_norm": 0.33766167661538804, + "learning_rate": 3.247987454096588e-05, + "loss": 0.1031, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04647237807512283, + "step": 2210, + "valid_targets_mean": 3575.8, + "valid_targets_min": 2525 + }, + { + "epoch": 2.505942275042445, + "grad_norm": 0.33315570907766456, + "learning_rate": 3.2435742845617664e-05, + "loss": 0.0999, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03852749615907669, + "step": 2215, + "valid_targets_mean": 3081.0, + "valid_targets_min": 970 + }, + { + "epoch": 2.511601584606678, + "grad_norm": 0.46136266798103887, + "learning_rate": 3.2391512213719195e-05, + "loss": 0.0965, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04876357689499855, + "step": 2220, + "valid_targets_mean": 1963.5, + "valid_targets_min": 817 + }, + { + "epoch": 2.517260894170911, + "grad_norm": 0.7264199502052342, + "learning_rate": 3.23471829971615e-05, + "loss": 0.1216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08215144276618958, + "step": 2225, + "valid_targets_mean": 1270.9, + "valid_targets_min": 637 + }, + { + "epoch": 2.5229202037351444, + "grad_norm": 0.39900134281384, + "learning_rate": 3.230275554861988e-05, + "loss": 0.0912, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04432211071252823, + "step": 2230, + "valid_targets_mean": 2594.1, + "valid_targets_min": 893 + }, + { + "epoch": 2.5285795132993774, + "grad_norm": 0.3613977004667585, + "learning_rate": 3.2258230221551216e-05, + "loss": 0.0952, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04420216754078865, + "step": 2235, + "valid_targets_mean": 3222.2, + "valid_targets_min": 1606 + }, + { + "epoch": 2.534238822863611, + "grad_norm": 0.38220539961473143, + "learning_rate": 3.221360737019105e-05, + "loss": 0.1001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.046213869005441666, + "step": 2240, + "valid_targets_mean": 3023.8, + "valid_targets_min": 678 + }, + { + "epoch": 2.539898132427844, + "grad_norm": 0.30782875399074944, + "learning_rate": 3.216888734955082e-05, + "loss": 0.1058, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04539969190955162, + "step": 2245, + "valid_targets_mean": 4263.8, + "valid_targets_min": 1078 + }, + { + "epoch": 2.5455574419920772, + "grad_norm": 0.30145130948033844, + "learning_rate": 3.2124070515415026e-05, + "loss": 0.0913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.053275216370821, + "step": 2250, + "valid_targets_mean": 4150.0, + "valid_targets_min": 761 + }, + { + "epoch": 2.5512167515563102, + "grad_norm": 0.4481699482875616, + "learning_rate": 3.20791572243384e-05, + "loss": 0.1013, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05159508064389229, + "step": 2255, + "valid_targets_mean": 1368.4, + "valid_targets_min": 538 + }, + { + "epoch": 2.556876061120543, + "grad_norm": 0.49014476191694045, + "learning_rate": 3.2034147833643085e-05, + "loss": 0.1171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09637396037578583, + "step": 2260, + "valid_targets_mean": 2630.4, + "valid_targets_min": 780 + }, + { + "epoch": 2.5625353706847767, + "grad_norm": 0.36000553066283647, + "learning_rate": 3.1989042701415735e-05, + "loss": 0.1782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04270230978727341, + "step": 2265, + "valid_targets_mean": 3353.9, + "valid_targets_min": 2347 + }, + { + "epoch": 2.5681946802490097, + "grad_norm": 0.3837524899503797, + "learning_rate": 3.194384218650475e-05, + "loss": 0.1001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0583348423242569, + "step": 2270, + "valid_targets_mean": 3880.2, + "valid_targets_min": 2473 + }, + { + "epoch": 2.5738539898132426, + "grad_norm": 0.36597215244104614, + "learning_rate": 3.1898546648517344e-05, + "loss": 0.0977, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05031474679708481, + "step": 2275, + "valid_targets_mean": 3556.4, + "valid_targets_min": 2245 + }, + { + "epoch": 2.579513299377476, + "grad_norm": 0.45686116420648, + "learning_rate": 3.185315644781674e-05, + "loss": 0.1609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10099667310714722, + "step": 2280, + "valid_targets_mean": 7477.8, + "valid_targets_min": 6099 + }, + { + "epoch": 2.585172608941709, + "grad_norm": 0.3813066709188113, + "learning_rate": 3.1807671945519275e-05, + "loss": 0.1901, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09658841788768768, + "step": 2285, + "valid_targets_mean": 6943.4, + "valid_targets_min": 5066 + }, + { + "epoch": 2.590831918505942, + "grad_norm": 0.35783658375355415, + "learning_rate": 3.1762093503491515e-05, + "loss": 0.1836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08664655685424805, + "step": 2290, + "valid_targets_mean": 6880.4, + "valid_targets_min": 4923 + }, + { + "epoch": 2.5964912280701755, + "grad_norm": 0.4859680581264661, + "learning_rate": 3.171642148434743e-05, + "loss": 0.1782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0868193656206131, + "step": 2295, + "valid_targets_mean": 5842.0, + "valid_targets_min": 4099 + }, + { + "epoch": 2.6021505376344085, + "grad_norm": 0.3853649936254644, + "learning_rate": 3.167065625144544e-05, + "loss": 0.1764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09008608758449554, + "step": 2300, + "valid_targets_mean": 7123.2, + "valid_targets_min": 4797 + }, + { + "epoch": 2.6078098471986415, + "grad_norm": 0.406067923281501, + "learning_rate": 3.1624798168885566e-05, + "loss": 0.1959, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1066371351480484, + "step": 2305, + "valid_targets_mean": 6494.1, + "valid_targets_min": 3853 + }, + { + "epoch": 2.613469156762875, + "grad_norm": 0.3751345658950459, + "learning_rate": 3.157884760150653e-05, + "loss": 0.185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09144657850265503, + "step": 2310, + "valid_targets_mean": 6439.9, + "valid_targets_min": 4586 + }, + { + "epoch": 2.619128466327108, + "grad_norm": 0.3936546927517281, + "learning_rate": 3.153280491488285e-05, + "loss": 0.1766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10087771713733673, + "step": 2315, + "valid_targets_mean": 7251.0, + "valid_targets_min": 4728 + }, + { + "epoch": 2.6247877758913414, + "grad_norm": 0.47571609372351176, + "learning_rate": 3.148667047532191e-05, + "loss": 0.2079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14377912878990173, + "step": 2320, + "valid_targets_mean": 6318.6, + "valid_targets_min": 4375 + }, + { + "epoch": 2.6304470854555744, + "grad_norm": 0.37395928511083404, + "learning_rate": 3.1440444649861084e-05, + "loss": 0.1729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09737908840179443, + "step": 2325, + "valid_targets_mean": 7099.8, + "valid_targets_min": 4561 + }, + { + "epoch": 2.636106395019808, + "grad_norm": 0.4152141257019434, + "learning_rate": 3.139412780626478e-05, + "loss": 0.1879, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09891627728939056, + "step": 2330, + "valid_targets_mean": 4537.5, + "valid_targets_min": 3439 + }, + { + "epoch": 2.641765704584041, + "grad_norm": 0.39151064653586565, + "learning_rate": 3.134772031302156e-05, + "loss": 0.1779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07997195422649384, + "step": 2335, + "valid_targets_mean": 5579.2, + "valid_targets_min": 3635 + }, + { + "epoch": 2.647425014148274, + "grad_norm": 0.4044443479051716, + "learning_rate": 3.130122253934113e-05, + "loss": 0.1844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08669149875640869, + "step": 2340, + "valid_targets_mean": 6693.9, + "valid_targets_min": 4393 + }, + { + "epoch": 2.6530843237125072, + "grad_norm": 0.37016901815230124, + "learning_rate": 3.125463485515149e-05, + "loss": 0.17, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09471109509468079, + "step": 2345, + "valid_targets_mean": 7235.5, + "valid_targets_min": 4749 + }, + { + "epoch": 2.6587436332767402, + "grad_norm": 0.48451993488716116, + "learning_rate": 3.1207957631095944e-05, + "loss": 0.1701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05042736977338791, + "step": 2350, + "valid_targets_mean": 2121.1, + "valid_targets_min": 1062 + }, + { + "epoch": 2.6644029428409732, + "grad_norm": 0.40757994716047474, + "learning_rate": 3.116119123853014e-05, + "loss": 0.1695, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09001005440950394, + "step": 2355, + "valid_targets_mean": 6031.5, + "valid_targets_min": 3119 + }, + { + "epoch": 2.6700622524052067, + "grad_norm": 0.421113323241765, + "learning_rate": 3.1114336049519165e-05, + "loss": 0.179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09557072818279266, + "step": 2360, + "valid_targets_mean": 5516.4, + "valid_targets_min": 3774 + }, + { + "epoch": 2.6757215619694397, + "grad_norm": 0.4645749415310214, + "learning_rate": 3.106739243683453e-05, + "loss": 0.1771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08603259921073914, + "step": 2365, + "valid_targets_mean": 6248.4, + "valid_targets_min": 5140 + }, + { + "epoch": 2.6813808715336727, + "grad_norm": 0.3593632108982063, + "learning_rate": 3.1020360773951225e-05, + "loss": 0.1586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08268121629953384, + "step": 2370, + "valid_targets_mean": 6634.4, + "valid_targets_min": 5665 + }, + { + "epoch": 2.687040181097906, + "grad_norm": 0.39292843471019895, + "learning_rate": 3.097324143504479e-05, + "loss": 0.166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09025974571704865, + "step": 2375, + "valid_targets_mean": 6851.5, + "valid_targets_min": 4428 + }, + { + "epoch": 2.692699490662139, + "grad_norm": 0.42834346718026955, + "learning_rate": 3.092603479498826e-05, + "loss": 0.1899, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10379134118556976, + "step": 2380, + "valid_targets_mean": 5922.0, + "valid_targets_min": 4690 + }, + { + "epoch": 2.6983588002263725, + "grad_norm": 0.3783709839981614, + "learning_rate": 3.087874122934924e-05, + "loss": 0.1863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07935582101345062, + "step": 2385, + "valid_targets_mean": 5270.4, + "valid_targets_min": 4262 + }, + { + "epoch": 2.7040181097906055, + "grad_norm": 0.42404247224839714, + "learning_rate": 3.0831361114386905e-05, + "loss": 0.1692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09030906856060028, + "step": 2390, + "valid_targets_mean": 6213.9, + "valid_targets_min": 5149 + }, + { + "epoch": 2.709677419354839, + "grad_norm": 0.4042419368135553, + "learning_rate": 3.078389482704897e-05, + "loss": 0.1786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08515478670597076, + "step": 2395, + "valid_targets_mean": 5613.8, + "valid_targets_min": 3840 + }, + { + "epoch": 2.715336728919072, + "grad_norm": 0.4144672098350081, + "learning_rate": 3.0736342744968764e-05, + "loss": 0.1933, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10695742070674896, + "step": 2400, + "valid_targets_mean": 6458.6, + "valid_targets_min": 5104 + }, + { + "epoch": 2.720996038483305, + "grad_norm": 0.39526097299065227, + "learning_rate": 3.068870524646215e-05, + "loss": 0.208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08301019668579102, + "step": 2405, + "valid_targets_mean": 5527.5, + "valid_targets_min": 4224 + }, + { + "epoch": 2.7266553480475384, + "grad_norm": 0.4207334631291255, + "learning_rate": 3.064098271052457e-05, + "loss": 0.1968, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09231782704591751, + "step": 2410, + "valid_targets_mean": 6107.0, + "valid_targets_min": 4318 + }, + { + "epoch": 2.7323146576117714, + "grad_norm": 0.3776667868267842, + "learning_rate": 3.059317551682801e-05, + "loss": 0.1794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0937800407409668, + "step": 2415, + "valid_targets_mean": 6608.5, + "valid_targets_min": 4783 + }, + { + "epoch": 2.7379739671760044, + "grad_norm": 0.37027789136730715, + "learning_rate": 3.0545284045717956e-05, + "loss": 0.1805, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08681375533342361, + "step": 2420, + "valid_targets_mean": 7459.4, + "valid_targets_min": 4412 + }, + { + "epoch": 2.743633276740238, + "grad_norm": 0.40114239927315337, + "learning_rate": 3.0497308678210413e-05, + "loss": 0.1879, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09719152003526688, + "step": 2425, + "valid_targets_mean": 6031.6, + "valid_targets_min": 4332 + }, + { + "epoch": 2.749292586304471, + "grad_norm": 0.37900115665484424, + "learning_rate": 3.044924979598882e-05, + "loss": 0.1809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08584432303905487, + "step": 2430, + "valid_targets_mean": 5925.8, + "valid_targets_min": 5096 + }, + { + "epoch": 2.754951895868704, + "grad_norm": 0.4430659542546633, + "learning_rate": 3.0401107781401092e-05, + "loss": 0.1819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10050064325332642, + "step": 2435, + "valid_targets_mean": 5700.1, + "valid_targets_min": 2364 + }, + { + "epoch": 2.7606112054329373, + "grad_norm": 0.47768270919139494, + "learning_rate": 3.0352883017456497e-05, + "loss": 0.1884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09526128321886063, + "step": 2440, + "valid_targets_mean": 6654.9, + "valid_targets_min": 4793 + }, + { + "epoch": 2.7662705149971702, + "grad_norm": 0.4496406784426124, + "learning_rate": 3.0304575887822635e-05, + "loss": 0.1826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09403373301029205, + "step": 2445, + "valid_targets_mean": 5911.5, + "valid_targets_min": 4588 + }, + { + "epoch": 2.7719298245614032, + "grad_norm": 0.36149311840596915, + "learning_rate": 3.0256186776822415e-05, + "loss": 0.1665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07817672938108444, + "step": 2450, + "valid_targets_mean": 6948.5, + "valid_targets_min": 5388 + }, + { + "epoch": 2.7775891341256367, + "grad_norm": 0.42396774471012927, + "learning_rate": 3.0207716069430968e-05, + "loss": 0.1642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1000523716211319, + "step": 2455, + "valid_targets_mean": 6336.5, + "valid_targets_min": 3666 + }, + { + "epoch": 2.7832484436898697, + "grad_norm": 0.4235691448657738, + "learning_rate": 3.015916415127259e-05, + "loss": 0.1886, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10965962707996368, + "step": 2460, + "valid_targets_mean": 7032.6, + "valid_targets_min": 5077 + }, + { + "epoch": 2.788907753254103, + "grad_norm": 0.453165159769694, + "learning_rate": 3.011053140861768e-05, + "loss": 0.1892, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10380945354700089, + "step": 2465, + "valid_targets_mean": 5717.2, + "valid_targets_min": 3663 + }, + { + "epoch": 2.794567062818336, + "grad_norm": 0.4361045043646229, + "learning_rate": 3.006181822837964e-05, + "loss": 0.1775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0868472084403038, + "step": 2470, + "valid_targets_mean": 5608.9, + "valid_targets_min": 4105 + }, + { + "epoch": 2.8002263723825696, + "grad_norm": 0.37584720530392013, + "learning_rate": 3.0013024998111856e-05, + "loss": 0.1755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09811882674694061, + "step": 2475, + "valid_targets_mean": 7618.5, + "valid_targets_min": 5201 + }, + { + "epoch": 2.8058856819468025, + "grad_norm": 0.4324789733430624, + "learning_rate": 2.9964152106004546e-05, + "loss": 0.1705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09703391790390015, + "step": 2480, + "valid_targets_mean": 6253.0, + "valid_targets_min": 5069 + }, + { + "epoch": 2.8115449915110355, + "grad_norm": 0.40342691092349325, + "learning_rate": 2.9915199940881723e-05, + "loss": 0.1767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08094887435436249, + "step": 2485, + "valid_targets_mean": 5635.0, + "valid_targets_min": 4729 + }, + { + "epoch": 2.817204301075269, + "grad_norm": 0.4312322464757439, + "learning_rate": 2.9866168892198067e-05, + "loss": 0.1856, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09300193190574646, + "step": 2490, + "valid_targets_mean": 5936.4, + "valid_targets_min": 4567 + }, + { + "epoch": 2.822863610639502, + "grad_norm": 0.4321933129129484, + "learning_rate": 2.9817059350035858e-05, + "loss": 0.1703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08238036930561066, + "step": 2495, + "valid_targets_mean": 6021.9, + "valid_targets_min": 4464 + }, + { + "epoch": 2.828522920203735, + "grad_norm": 0.43337872296458924, + "learning_rate": 2.9767871705101834e-05, + "loss": 0.1882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08571532368659973, + "step": 2500, + "valid_targets_mean": 5414.9, + "valid_targets_min": 4908 + }, + { + "epoch": 2.8341822297679684, + "grad_norm": 0.45092675331202847, + "learning_rate": 2.9718606348724135e-05, + "loss": 0.167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07677493989467621, + "step": 2505, + "valid_targets_mean": 5470.8, + "valid_targets_min": 4443 + }, + { + "epoch": 2.8398415393322014, + "grad_norm": 0.4000960970251542, + "learning_rate": 2.966926367284913e-05, + "loss": 0.1529, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07979180663824081, + "step": 2510, + "valid_targets_mean": 6542.5, + "valid_targets_min": 3381 + }, + { + "epoch": 2.8455008488964344, + "grad_norm": 0.3979654843586047, + "learning_rate": 2.9619844070038336e-05, + "loss": 0.1594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08121464401483536, + "step": 2515, + "valid_targets_mean": 6168.5, + "valid_targets_min": 4790 + }, + { + "epoch": 2.851160158460668, + "grad_norm": 0.37816905867197753, + "learning_rate": 2.957034793346531e-05, + "loss": 0.1628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.079801045358181, + "step": 2520, + "valid_targets_mean": 6507.5, + "valid_targets_min": 5161 + }, + { + "epoch": 2.856819468024901, + "grad_norm": 0.38435518172062916, + "learning_rate": 2.9520775656912467e-05, + "loss": 0.1791, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08284391462802887, + "step": 2525, + "valid_targets_mean": 6376.0, + "valid_targets_min": 4491 + }, + { + "epoch": 2.8624787775891343, + "grad_norm": 0.6472018683033478, + "learning_rate": 2.9471127634767992e-05, + "loss": 0.2105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10909480601549149, + "step": 2530, + "valid_targets_mean": 2179.4, + "valid_targets_min": 817 + }, + { + "epoch": 2.8681380871533673, + "grad_norm": 0.42870449333604926, + "learning_rate": 2.9421404262022687e-05, + "loss": 0.1903, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08151814341545105, + "step": 2535, + "valid_targets_mean": 5207.4, + "valid_targets_min": 3893 + }, + { + "epoch": 2.8737973967176007, + "grad_norm": 0.48576110905094605, + "learning_rate": 2.9371605934266826e-05, + "loss": 0.171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09397213160991669, + "step": 2540, + "valid_targets_mean": 6132.4, + "valid_targets_min": 4622 + }, + { + "epoch": 2.8794567062818337, + "grad_norm": 0.39758832416258966, + "learning_rate": 2.9321733047687028e-05, + "loss": 0.1785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08779112994670868, + "step": 2545, + "valid_targets_mean": 7527.8, + "valid_targets_min": 6108 + }, + { + "epoch": 2.8851160158460667, + "grad_norm": 0.4014941053033983, + "learning_rate": 2.9271785999063058e-05, + "loss": 0.1832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0961420014500618, + "step": 2550, + "valid_targets_mean": 6656.1, + "valid_targets_min": 4253 + }, + { + "epoch": 2.8907753254103, + "grad_norm": 0.3932889537237683, + "learning_rate": 2.922176518576473e-05, + "loss": 0.1917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09385694563388824, + "step": 2555, + "valid_targets_mean": 6820.0, + "valid_targets_min": 4990 + }, + { + "epoch": 2.896434634974533, + "grad_norm": 0.4111962963781346, + "learning_rate": 2.9171671005748705e-05, + "loss": 0.1799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11026223003864288, + "step": 2560, + "valid_targets_mean": 6825.6, + "valid_targets_min": 4678 + }, + { + "epoch": 2.902093944538766, + "grad_norm": 0.3829985111464065, + "learning_rate": 2.9121503857555337e-05, + "loss": 0.1827, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09137346595525742, + "step": 2565, + "valid_targets_mean": 6218.2, + "valid_targets_min": 4775 + }, + { + "epoch": 2.9077532541029996, + "grad_norm": 0.37253615264351747, + "learning_rate": 2.9071264140305504e-05, + "loss": 0.1693, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06885994225740433, + "step": 2570, + "valid_targets_mean": 5699.0, + "valid_targets_min": 4098 + }, + { + "epoch": 2.9134125636672326, + "grad_norm": 0.40557670033767224, + "learning_rate": 2.9020952253697417e-05, + "loss": 0.1675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09723977744579315, + "step": 2575, + "valid_targets_mean": 6155.6, + "valid_targets_min": 4801 + }, + { + "epoch": 2.9190718732314656, + "grad_norm": 0.46032741699589297, + "learning_rate": 2.8970568598003485e-05, + "loss": 0.2189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20152047276496887, + "step": 2580, + "valid_targets_mean": 5976.9, + "valid_targets_min": 4734 + }, + { + "epoch": 2.924731182795699, + "grad_norm": 0.42670510429174907, + "learning_rate": 2.8920113574067063e-05, + "loss": 0.1784, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0888219028711319, + "step": 2585, + "valid_targets_mean": 5994.2, + "valid_targets_min": 4400 + }, + { + "epoch": 2.930390492359932, + "grad_norm": 0.4596152448984337, + "learning_rate": 2.8869587583299315e-05, + "loss": 0.1694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09296728670597076, + "step": 2590, + "valid_targets_mean": 6467.8, + "valid_targets_min": 4919 + }, + { + "epoch": 2.936049801924165, + "grad_norm": 0.4288831072130001, + "learning_rate": 2.8818991027676014e-05, + "loss": 0.1813, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10694336891174316, + "step": 2595, + "valid_targets_mean": 6262.5, + "valid_targets_min": 4611 + }, + { + "epoch": 2.9417091114883984, + "grad_norm": 0.3797399132709078, + "learning_rate": 2.876832430973432e-05, + "loss": 0.1809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07845215499401093, + "step": 2600, + "valid_targets_mean": 6272.1, + "valid_targets_min": 2275 + }, + { + "epoch": 2.9473684210526314, + "grad_norm": 0.4164521528436999, + "learning_rate": 2.8717587832569598e-05, + "loss": 0.1587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.082811139523983, + "step": 2605, + "valid_targets_mean": 6114.8, + "valid_targets_min": 4826 + }, + { + "epoch": 2.953027730616865, + "grad_norm": 0.3801213903767742, + "learning_rate": 2.8666781999832198e-05, + "loss": 0.1747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08315998315811157, + "step": 2610, + "valid_targets_mean": 6942.9, + "valid_targets_min": 4707 + }, + { + "epoch": 2.958687040181098, + "grad_norm": 0.39485809535379474, + "learning_rate": 2.8615907215724266e-05, + "loss": 0.1633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08990705013275146, + "step": 2615, + "valid_targets_mean": 7121.4, + "valid_targets_min": 5421 + }, + { + "epoch": 2.9643463497453313, + "grad_norm": 0.4966445577987727, + "learning_rate": 2.8564963884996494e-05, + "loss": 0.1694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08690492808818817, + "step": 2620, + "valid_targets_mean": 7213.6, + "valid_targets_min": 5440 + }, + { + "epoch": 2.9700056593095643, + "grad_norm": 0.42263654185236305, + "learning_rate": 2.851395241294493e-05, + "loss": 0.1686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08929473161697388, + "step": 2625, + "valid_targets_mean": 6386.6, + "valid_targets_min": 4831 + }, + { + "epoch": 2.9756649688737973, + "grad_norm": 0.40244069300386137, + "learning_rate": 2.8462873205407747e-05, + "loss": 0.1754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10473649948835373, + "step": 2630, + "valid_targets_mean": 6396.2, + "valid_targets_min": 4018 + }, + { + "epoch": 2.9813242784380307, + "grad_norm": 0.34774921085017213, + "learning_rate": 2.8411726668761998e-05, + "loss": 0.1641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06774158775806427, + "step": 2635, + "valid_targets_mean": 6891.8, + "valid_targets_min": 5034 + }, + { + "epoch": 2.9869835880022637, + "grad_norm": 0.4290323597843678, + "learning_rate": 2.8360513209920388e-05, + "loss": 0.1726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09784676134586334, + "step": 2640, + "valid_targets_mean": 6423.0, + "valid_targets_min": 4682 + }, + { + "epoch": 2.9926428975664967, + "grad_norm": 0.4782741863927516, + "learning_rate": 2.8309233236328074e-05, + "loss": 0.183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08807047456502914, + "step": 2645, + "valid_targets_mean": 5597.6, + "valid_targets_min": 4764 + }, + { + "epoch": 2.99830220713073, + "grad_norm": 0.4613350403309322, + "learning_rate": 2.8257887155959352e-05, + "loss": 0.1865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09654569625854492, + "step": 2650, + "valid_targets_mean": 4990.0, + "valid_targets_min": 2481 + }, + { + "epoch": 3.0033955857385397, + "grad_norm": 0.49867476024789664, + "learning_rate": 2.8206475377314486e-05, + "loss": 0.1569, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.112117700278759, + "step": 2655, + "valid_targets_mean": 2046.1, + "valid_targets_min": 333 + }, + { + "epoch": 3.009054895302773, + "grad_norm": 0.37228601244243087, + "learning_rate": 2.8154998309416404e-05, + "loss": 0.1074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05608535185456276, + "step": 2660, + "valid_targets_mean": 3455.9, + "valid_targets_min": 712 + }, + { + "epoch": 3.014714204867006, + "grad_norm": 0.44240647825464446, + "learning_rate": 2.8103456361807473e-05, + "loss": 0.0978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04511546343564987, + "step": 2665, + "valid_targets_mean": 2452.6, + "valid_targets_min": 732 + }, + { + "epoch": 3.0203735144312396, + "grad_norm": 0.4607141370178821, + "learning_rate": 2.8051849944546225e-05, + "loss": 0.1042, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04693339765071869, + "step": 2670, + "valid_targets_mean": 2228.9, + "valid_targets_min": 803 + }, + { + "epoch": 3.0260328239954726, + "grad_norm": 0.38307989696714345, + "learning_rate": 2.80001794682041e-05, + "loss": 0.0924, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.048715487122535706, + "step": 2675, + "valid_targets_mean": 3038.6, + "valid_targets_min": 818 + }, + { + "epoch": 3.0316921335597056, + "grad_norm": 0.41932954254381544, + "learning_rate": 2.7948445343862188e-05, + "loss": 0.1001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.051280081272125244, + "step": 2680, + "valid_targets_mean": 3049.4, + "valid_targets_min": 1524 + }, + { + "epoch": 3.037351443123939, + "grad_norm": 0.7586911048822879, + "learning_rate": 2.7896647983107952e-05, + "loss": 0.1454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0651891827583313, + "step": 2685, + "valid_targets_mean": 1256.1, + "valid_targets_min": 539 + }, + { + "epoch": 3.043010752688172, + "grad_norm": 0.37844312475214087, + "learning_rate": 2.784478779803194e-05, + "loss": 0.1112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.047880202531814575, + "step": 2690, + "valid_targets_mean": 3597.0, + "valid_targets_min": 2920 + }, + { + "epoch": 3.048670062252405, + "grad_norm": 0.4449710609341959, + "learning_rate": 2.7792865201224536e-05, + "loss": 0.1087, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05327470228075981, + "step": 2695, + "valid_targets_mean": 2737.9, + "valid_targets_min": 1048 + }, + { + "epoch": 3.0543293718166384, + "grad_norm": 0.3062929318516885, + "learning_rate": 2.7740880605772644e-05, + "loss": 0.136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03465072065591812, + "step": 2700, + "valid_targets_mean": 4340.1, + "valid_targets_min": 962 + }, + { + "epoch": 3.0599886813808714, + "grad_norm": 0.3536858987060171, + "learning_rate": 2.7688834425256426e-05, + "loss": 0.0833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05480746924877167, + "step": 2705, + "valid_targets_mean": 4283.1, + "valid_targets_min": 2091 + }, + { + "epoch": 3.065647990945105, + "grad_norm": 0.34355131976242337, + "learning_rate": 2.7636727073746015e-05, + "loss": 0.0939, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04400227963924408, + "step": 2710, + "valid_targets_mean": 4320.9, + "valid_targets_min": 3349 + }, + { + "epoch": 3.071307300509338, + "grad_norm": 0.3473400965332635, + "learning_rate": 2.7584558965798183e-05, + "loss": 0.0955, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.050659552216529846, + "step": 2715, + "valid_targets_mean": 3895.9, + "valid_targets_min": 518 + }, + { + "epoch": 3.076966610073571, + "grad_norm": 0.4883421261459582, + "learning_rate": 2.7532330516453094e-05, + "loss": 0.1382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05640873312950134, + "step": 2720, + "valid_targets_mean": 2219.6, + "valid_targets_min": 549 + }, + { + "epoch": 3.0826259196378043, + "grad_norm": 0.33344345331957614, + "learning_rate": 2.7480042141230963e-05, + "loss": 0.088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03706565871834755, + "step": 2725, + "valid_targets_mean": 3700.2, + "valid_targets_min": 2724 + }, + { + "epoch": 3.0882852292020373, + "grad_norm": 0.47444017456116305, + "learning_rate": 2.7427694256128776e-05, + "loss": 0.1028, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.049323540180921555, + "step": 2730, + "valid_targets_mean": 1672.5, + "valid_targets_min": 775 + }, + { + "epoch": 3.0939445387662703, + "grad_norm": 0.3169477407104796, + "learning_rate": 2.737528727761696e-05, + "loss": 0.0834, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.025108829140663147, + "step": 2735, + "valid_targets_mean": 3546.2, + "valid_targets_min": 3183 + }, + { + "epoch": 3.0996038483305037, + "grad_norm": 0.45802642303214147, + "learning_rate": 2.7322821622636077e-05, + "loss": 0.1002, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04511656612157822, + "step": 2740, + "valid_targets_mean": 2473.2, + "valid_targets_min": 577 + }, + { + "epoch": 3.1052631578947367, + "grad_norm": 0.5529272872722081, + "learning_rate": 2.7270297708593517e-05, + "loss": 0.1111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07362447679042816, + "step": 2745, + "valid_targets_mean": 1597.0, + "valid_targets_min": 557 + }, + { + "epoch": 3.11092246745897, + "grad_norm": 0.40781801639907334, + "learning_rate": 2.7217715953360166e-05, + "loss": 0.1056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05626679211854935, + "step": 2750, + "valid_targets_mean": 3019.1, + "valid_targets_min": 774 + }, + { + "epoch": 3.116581777023203, + "grad_norm": 0.3914645639461479, + "learning_rate": 2.716507677526707e-05, + "loss": 0.1081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04464668408036232, + "step": 2755, + "valid_targets_mean": 1825.8, + "valid_targets_min": 680 + }, + { + "epoch": 3.122241086587436, + "grad_norm": 0.5488045264692737, + "learning_rate": 2.711238059310215e-05, + "loss": 0.1524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08102589845657349, + "step": 2760, + "valid_targets_mean": 3686.9, + "valid_targets_min": 2819 + }, + { + "epoch": 3.1279003961516696, + "grad_norm": 0.36975350669309054, + "learning_rate": 2.7059627826106817e-05, + "loss": 0.0908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.038989197462797165, + "step": 2765, + "valid_targets_mean": 4137.8, + "valid_targets_min": 3927 + }, + { + "epoch": 3.1335597057159026, + "grad_norm": 0.5124554857187381, + "learning_rate": 2.700681889397267e-05, + "loss": 0.1023, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06412796676158905, + "step": 2770, + "valid_targets_mean": 1376.1, + "valid_targets_min": 714 + }, + { + "epoch": 3.139219015280136, + "grad_norm": 0.42403525497064243, + "learning_rate": 2.6953954216838148e-05, + "loss": 0.1294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.045657929033041, + "step": 2775, + "valid_targets_mean": 3432.0, + "valid_targets_min": 754 + }, + { + "epoch": 3.144878324844369, + "grad_norm": 0.37109234218920956, + "learning_rate": 2.6901034215285182e-05, + "loss": 0.1139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04888893663883209, + "step": 2780, + "valid_targets_mean": 3855.2, + "valid_targets_min": 3178 + }, + { + "epoch": 3.150537634408602, + "grad_norm": 0.7007792893263197, + "learning_rate": 2.684805931033586e-05, + "loss": 0.1366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13111639022827148, + "step": 2785, + "valid_targets_mean": 1566.6, + "valid_targets_min": 920 + }, + { + "epoch": 3.1561969439728355, + "grad_norm": 0.42704325731003817, + "learning_rate": 2.679502992344907e-05, + "loss": 0.2762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04908895492553711, + "step": 2790, + "valid_targets_mean": 3478.0, + "valid_targets_min": 2314 + }, + { + "epoch": 3.1618562535370685, + "grad_norm": 0.4160207265430514, + "learning_rate": 2.6741946476517146e-05, + "loss": 0.1116, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04074154049158096, + "step": 2795, + "valid_targets_mean": 2561.1, + "valid_targets_min": 833 + }, + { + "epoch": 3.1675155631013014, + "grad_norm": 0.4216708740511012, + "learning_rate": 2.6688809391862523e-05, + "loss": 0.1079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06856664270162582, + "step": 2800, + "valid_targets_mean": 3509.2, + "valid_targets_min": 1803 + }, + { + "epoch": 3.173174872665535, + "grad_norm": 0.4387140812495042, + "learning_rate": 2.663561909223435e-05, + "loss": 0.2045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07015801966190338, + "step": 2805, + "valid_targets_mean": 4291.4, + "valid_targets_min": 3089 + }, + { + "epoch": 3.178834182229768, + "grad_norm": 0.24134557097883164, + "learning_rate": 2.6582376000805165e-05, + "loss": 0.0804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.039008043706417084, + "step": 2810, + "valid_targets_mean": 4843.2, + "valid_targets_min": 2664 + }, + { + "epoch": 3.1844934917940013, + "grad_norm": 0.42824132808893417, + "learning_rate": 2.6529080541167495e-05, + "loss": 0.1095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.031571805477142334, + "step": 2815, + "valid_targets_mean": 3042.1, + "valid_targets_min": 560 + }, + { + "epoch": 3.1901528013582343, + "grad_norm": 0.3932975746399924, + "learning_rate": 2.6475733137330507e-05, + "loss": 0.1079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.053876928985118866, + "step": 2820, + "valid_targets_mean": 3863.6, + "valid_targets_min": 1615 + }, + { + "epoch": 3.1958121109224673, + "grad_norm": 0.35507034759593514, + "learning_rate": 2.6422334213716624e-05, + "loss": 0.0915, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.033908043056726456, + "step": 2825, + "valid_targets_mean": 3312.5, + "valid_targets_min": 2172 + }, + { + "epoch": 3.2014714204867007, + "grad_norm": 0.3743335731732549, + "learning_rate": 2.6368884195158143e-05, + "loss": 0.0881, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03953585773706436, + "step": 2830, + "valid_targets_mean": 3059.9, + "valid_targets_min": 536 + }, + { + "epoch": 3.2071307300509337, + "grad_norm": 0.3880753228682472, + "learning_rate": 2.6315383506893876e-05, + "loss": 0.0924, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04549029842019081, + "step": 2835, + "valid_targets_mean": 3881.0, + "valid_targets_min": 3295 + }, + { + "epoch": 3.212790039615167, + "grad_norm": 0.37105377174054777, + "learning_rate": 2.6261832574565752e-05, + "loss": 0.0867, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03725513815879822, + "step": 2840, + "valid_targets_mean": 3287.2, + "valid_targets_min": 820 + }, + { + "epoch": 3.2184493491794, + "grad_norm": 0.6135674153683103, + "learning_rate": 2.6208231824215417e-05, + "loss": 0.1366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13685393333435059, + "step": 2845, + "valid_targets_mean": 2566.6, + "valid_targets_min": 1046 + }, + { + "epoch": 3.224108658743633, + "grad_norm": 0.3247543158156727, + "learning_rate": 2.6154581682280892e-05, + "loss": 0.0841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.042167115956544876, + "step": 2850, + "valid_targets_mean": 3590.1, + "valid_targets_min": 2719 + }, + { + "epoch": 3.2297679683078666, + "grad_norm": 0.630994403345283, + "learning_rate": 2.610088257559311e-05, + "loss": 0.0868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06745156645774841, + "step": 2855, + "valid_targets_mean": 1638.1, + "valid_targets_min": 699 + }, + { + "epoch": 3.2354272778720996, + "grad_norm": 0.32258810125656434, + "learning_rate": 2.604713493137259e-05, + "loss": 0.083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03706847131252289, + "step": 2860, + "valid_targets_mean": 3621.6, + "valid_targets_min": 2353 + }, + { + "epoch": 3.2410865874363326, + "grad_norm": 0.34459531626543854, + "learning_rate": 2.5993339177226002e-05, + "loss": 0.084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04135024547576904, + "step": 2865, + "valid_targets_mean": 2328.1, + "valid_targets_min": 549 + }, + { + "epoch": 3.246745897000566, + "grad_norm": 0.3750427914243617, + "learning_rate": 2.593949574114274e-05, + "loss": 0.1404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03787697106599808, + "step": 2870, + "valid_targets_mean": 3330.4, + "valid_targets_min": 908 + }, + { + "epoch": 3.252405206564799, + "grad_norm": 0.35834456738616854, + "learning_rate": 2.5885605051491592e-05, + "loss": 0.1144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07931341230869293, + "step": 2875, + "valid_targets_mean": 3458.4, + "valid_targets_min": 2196 + }, + { + "epoch": 3.258064516129032, + "grad_norm": 0.36621640184414345, + "learning_rate": 2.583166753701725e-05, + "loss": 0.1023, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03504461795091629, + "step": 2880, + "valid_targets_mean": 1252.1, + "valid_targets_min": 598 + }, + { + "epoch": 3.2637238256932655, + "grad_norm": 0.8388186250463507, + "learning_rate": 2.5777683626836964e-05, + "loss": 0.1193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.105122409760952, + "step": 2885, + "valid_targets_mean": 1353.1, + "valid_targets_min": 578 + }, + { + "epoch": 3.2693831352574985, + "grad_norm": 0.6250870179668947, + "learning_rate": 2.5723653750437083e-05, + "loss": 0.1438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07785950601100922, + "step": 2890, + "valid_targets_mean": 1974.2, + "valid_targets_min": 695 + }, + { + "epoch": 3.275042444821732, + "grad_norm": 0.49383934588608436, + "learning_rate": 2.5669578337669653e-05, + "loss": 0.0978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04261304438114166, + "step": 2895, + "valid_targets_mean": 1724.5, + "valid_targets_min": 691 + }, + { + "epoch": 3.280701754385965, + "grad_norm": 0.33482884154545456, + "learning_rate": 2.5615457818749007e-05, + "loss": 0.1194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03891771659255028, + "step": 2900, + "valid_targets_mean": 3842.6, + "valid_targets_min": 2024 + }, + { + "epoch": 3.286361063950198, + "grad_norm": 0.610701381353736, + "learning_rate": 2.5561292624248344e-05, + "loss": 0.1028, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.062936931848526, + "step": 2905, + "valid_targets_mean": 1651.0, + "valid_targets_min": 523 + }, + { + "epoch": 3.2920203735144313, + "grad_norm": 0.33261200460919144, + "learning_rate": 2.5507083185096267e-05, + "loss": 0.0921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03689384460449219, + "step": 2910, + "valid_targets_mean": 3532.5, + "valid_targets_min": 468 + }, + { + "epoch": 3.2976796830786643, + "grad_norm": 0.28733728828226146, + "learning_rate": 2.545282993257341e-05, + "loss": 0.0964, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.024604657664895058, + "step": 2915, + "valid_targets_mean": 5298.0, + "valid_targets_min": 1195 + }, + { + "epoch": 3.3033389926428978, + "grad_norm": 0.35030140303019586, + "learning_rate": 2.5398533298308956e-05, + "loss": 0.0868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03047536499798298, + "step": 2920, + "valid_targets_mean": 3562.6, + "valid_targets_min": 459 + }, + { + "epoch": 3.3089983022071308, + "grad_norm": 0.3396849200129721, + "learning_rate": 2.534419371427724e-05, + "loss": 0.0862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0391726940870285, + "step": 2925, + "valid_targets_mean": 3548.0, + "valid_targets_min": 981 + }, + { + "epoch": 3.3146576117713638, + "grad_norm": 0.42836419743421056, + "learning_rate": 2.5289811612794297e-05, + "loss": 0.0979, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05852380022406578, + "step": 2930, + "valid_targets_mean": 2130.1, + "valid_targets_min": 697 + }, + { + "epoch": 3.320316921335597, + "grad_norm": 0.4119669824345928, + "learning_rate": 2.5235387426514405e-05, + "loss": 0.1056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04985947161912918, + "step": 2935, + "valid_targets_mean": 3661.6, + "valid_targets_min": 2759 + }, + { + "epoch": 3.32597623089983, + "grad_norm": 0.39827957163214495, + "learning_rate": 2.5180921588426693e-05, + "loss": 0.1033, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05189929157495499, + "step": 2940, + "valid_targets_mean": 3909.5, + "valid_targets_min": 3391 + }, + { + "epoch": 3.331635540464063, + "grad_norm": 0.32351171301938114, + "learning_rate": 2.5126414531851634e-05, + "loss": 0.0881, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03757951408624649, + "step": 2945, + "valid_targets_mean": 3319.9, + "valid_targets_min": 1099 + }, + { + "epoch": 3.3372948500282966, + "grad_norm": 0.401450449997819, + "learning_rate": 2.507186669043764e-05, + "loss": 0.0914, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04845608398318291, + "step": 2950, + "valid_targets_mean": 3717.0, + "valid_targets_min": 1874 + }, + { + "epoch": 3.3429541595925296, + "grad_norm": 0.3588098277530219, + "learning_rate": 2.5017278498157608e-05, + "loss": 0.104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0418374203145504, + "step": 2955, + "valid_targets_mean": 3263.5, + "valid_targets_min": 724 + }, + { + "epoch": 3.348613469156763, + "grad_norm": 0.2821542142135515, + "learning_rate": 2.496265038930545e-05, + "loss": 0.0779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03482644259929657, + "step": 2960, + "valid_targets_mean": 3707.5, + "valid_targets_min": 2769 + }, + { + "epoch": 3.354272778720996, + "grad_norm": 0.425467705850417, + "learning_rate": 2.4907982798492647e-05, + "loss": 0.101, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04261321574449539, + "step": 2965, + "valid_targets_mean": 2815.8, + "valid_targets_min": 645 + }, + { + "epoch": 3.359932088285229, + "grad_norm": 0.3729179661346765, + "learning_rate": 2.485327616064479e-05, + "loss": 0.0941, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.039063774049282074, + "step": 2970, + "valid_targets_mean": 3488.8, + "valid_targets_min": 600 + }, + { + "epoch": 3.3655913978494625, + "grad_norm": 0.3963000668065549, + "learning_rate": 2.4798530910998126e-05, + "loss": 0.1127, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0476563386619091, + "step": 2975, + "valid_targets_mean": 3373.4, + "valid_targets_min": 1648 + }, + { + "epoch": 3.3712507074136955, + "grad_norm": 0.4347727613279711, + "learning_rate": 2.474374748509609e-05, + "loss": 0.0976, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05656619369983673, + "step": 2980, + "valid_targets_mean": 2555.0, + "valid_targets_min": 821 + }, + { + "epoch": 3.376910016977929, + "grad_norm": 0.8531677563654521, + "learning_rate": 2.4688926318785845e-05, + "loss": 0.1117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06763380765914917, + "step": 2985, + "valid_targets_mean": 867.1, + "valid_targets_min": 612 + }, + { + "epoch": 3.382569326542162, + "grad_norm": 0.5959324801916324, + "learning_rate": 2.4634067848214797e-05, + "loss": 0.0977, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07143428921699524, + "step": 2990, + "valid_targets_mean": 2202.1, + "valid_targets_min": 830 + }, + { + "epoch": 3.388228636106395, + "grad_norm": 0.40398700006350446, + "learning_rate": 2.4579172509827146e-05, + "loss": 0.1109, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04321750998497009, + "step": 2995, + "valid_targets_mean": 3027.0, + "valid_targets_min": 1146 + }, + { + "epoch": 3.3938879456706283, + "grad_norm": 0.42215532219757984, + "learning_rate": 2.4524240740360404e-05, + "loss": 0.0957, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.058524373918771744, + "step": 3000, + "valid_targets_mean": 3296.6, + "valid_targets_min": 2760 + }, + { + "epoch": 3.3995472552348613, + "grad_norm": 0.6394230386674347, + "learning_rate": 2.4469272976841925e-05, + "loss": 0.0895, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06249532476067543, + "step": 3005, + "valid_targets_mean": 1469.5, + "valid_targets_min": 628 + }, + { + "epoch": 3.4052065647990943, + "grad_norm": 0.6002430318815608, + "learning_rate": 2.441426965658543e-05, + "loss": 0.113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09136934578418732, + "step": 3010, + "valid_targets_mean": 1752.1, + "valid_targets_min": 691 + }, + { + "epoch": 3.4108658743633278, + "grad_norm": 0.3232808526479633, + "learning_rate": 2.4359231217187508e-05, + "loss": 0.0876, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.025129452347755432, + "step": 3015, + "valid_targets_mean": 3863.9, + "valid_targets_min": 3613 + }, + { + "epoch": 3.4165251839275608, + "grad_norm": 0.3785001207675656, + "learning_rate": 2.430415809652416e-05, + "loss": 0.0854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04442009702324867, + "step": 3020, + "valid_targets_mean": 4304.2, + "valid_targets_min": 1773 + }, + { + "epoch": 3.4221844934917938, + "grad_norm": 0.3551280101771824, + "learning_rate": 2.4249050732747302e-05, + "loss": 0.0937, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.049544379115104675, + "step": 3025, + "valid_targets_mean": 1971.9, + "valid_targets_min": 762 + }, + { + "epoch": 3.427843803056027, + "grad_norm": 0.385576263745652, + "learning_rate": 2.4193909564281288e-05, + "loss": 0.0818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.058811601251363754, + "step": 3030, + "valid_targets_mean": 4554.1, + "valid_targets_min": 946 + }, + { + "epoch": 3.43350311262026, + "grad_norm": 0.3460366760046519, + "learning_rate": 2.4138735029819418e-05, + "loss": 0.0776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05385265499353409, + "step": 3035, + "valid_targets_mean": 5015.5, + "valid_targets_min": 4104 + }, + { + "epoch": 3.4391624221844936, + "grad_norm": 0.3614747621859789, + "learning_rate": 2.408352756832042e-05, + "loss": 0.0999, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04436466097831726, + "step": 3040, + "valid_targets_mean": 3064.4, + "valid_targets_min": 739 + }, + { + "epoch": 3.4448217317487266, + "grad_norm": 0.4712524391437108, + "learning_rate": 2.402828761900502e-05, + "loss": 0.1125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0511796772480011, + "step": 3045, + "valid_targets_mean": 3863.6, + "valid_targets_min": 3550 + }, + { + "epoch": 3.4504810413129596, + "grad_norm": 0.36121041271525794, + "learning_rate": 2.3973015621352382e-05, + "loss": 0.0916, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.052094023674726486, + "step": 3050, + "valid_targets_mean": 3474.9, + "valid_targets_min": 704 + }, + { + "epoch": 3.456140350877193, + "grad_norm": 0.3504346596616668, + "learning_rate": 2.3917712015096664e-05, + "loss": 0.0938, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.039699751883745193, + "step": 3055, + "valid_targets_mean": 2568.8, + "valid_targets_min": 774 + }, + { + "epoch": 3.461799660441426, + "grad_norm": 0.32816913612913545, + "learning_rate": 2.386237724022348e-05, + "loss": 0.1144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.036705564707517624, + "step": 3060, + "valid_targets_mean": 3486.2, + "valid_targets_min": 888 + }, + { + "epoch": 3.4674589700056595, + "grad_norm": 0.40338144229925676, + "learning_rate": 2.3807011736966414e-05, + "loss": 0.0882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.047249242663383484, + "step": 3065, + "valid_targets_mean": 3371.6, + "valid_targets_min": 1048 + }, + { + "epoch": 3.4731182795698925, + "grad_norm": 0.3535424736946541, + "learning_rate": 2.3751615945803547e-05, + "loss": 0.1002, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04499473422765732, + "step": 3070, + "valid_targets_mean": 2941.5, + "valid_targets_min": 722 + }, + { + "epoch": 3.4787775891341255, + "grad_norm": 0.49910869294611127, + "learning_rate": 2.3696190307453883e-05, + "loss": 0.0958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05616830289363861, + "step": 3075, + "valid_targets_mean": 2359.2, + "valid_targets_min": 865 + }, + { + "epoch": 3.484436898698359, + "grad_norm": 0.5057917812740189, + "learning_rate": 2.364073526287392e-05, + "loss": 0.1348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03738151490688324, + "step": 3080, + "valid_targets_mean": 1747.0, + "valid_targets_min": 570 + }, + { + "epoch": 3.490096208262592, + "grad_norm": 0.4574095932942482, + "learning_rate": 2.358525125325409e-05, + "loss": 0.1129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05637986958026886, + "step": 3085, + "valid_targets_mean": 3704.2, + "valid_targets_min": 2811 + }, + { + "epoch": 3.495755517826825, + "grad_norm": 0.40349874604175534, + "learning_rate": 2.352973872001527e-05, + "loss": 0.093, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04045228660106659, + "step": 3090, + "valid_targets_mean": 1749.1, + "valid_targets_min": 635 + }, + { + "epoch": 3.5014148273910584, + "grad_norm": 0.3913164678718162, + "learning_rate": 2.347419810480527e-05, + "loss": 0.0904, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.045254193246364594, + "step": 3095, + "valid_targets_mean": 2985.1, + "valid_targets_min": 1591 + }, + { + "epoch": 3.5070741369552914, + "grad_norm": 0.373402014080191, + "learning_rate": 2.34186298494953e-05, + "loss": 0.0893, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03965598717331886, + "step": 3100, + "valid_targets_mean": 2912.6, + "valid_targets_min": 800 + }, + { + "epoch": 3.5127334465195243, + "grad_norm": 0.35416739930750163, + "learning_rate": 2.3363034396176486e-05, + "loss": 0.0814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.043870240449905396, + "step": 3105, + "valid_targets_mean": 3084.9, + "valid_targets_min": 860 + }, + { + "epoch": 3.518392756083758, + "grad_norm": 0.48504459667904787, + "learning_rate": 2.3307412187156334e-05, + "loss": 0.1112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05146137624979019, + "step": 3110, + "valid_targets_mean": 2482.6, + "valid_targets_min": 878 + }, + { + "epoch": 3.524052065647991, + "grad_norm": 0.48286539941725964, + "learning_rate": 2.3251763664955208e-05, + "loss": 0.0788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.041975438594818115, + "step": 3115, + "valid_targets_mean": 3403.2, + "valid_targets_min": 1024 + }, + { + "epoch": 3.5297113752122242, + "grad_norm": 0.4240507601652416, + "learning_rate": 2.3196089272302813e-05, + "loss": 0.0884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.043690115213394165, + "step": 3120, + "valid_targets_mean": 2173.1, + "valid_targets_min": 786 + }, + { + "epoch": 3.535370684776457, + "grad_norm": 0.4661439062322214, + "learning_rate": 2.3140389452134677e-05, + "loss": 0.0903, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.057237282395362854, + "step": 3125, + "valid_targets_mean": 3233.9, + "valid_targets_min": 2573 + }, + { + "epoch": 3.5410299943406907, + "grad_norm": 0.33256859494846386, + "learning_rate": 2.3084664647588636e-05, + "loss": 0.0906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04802317917346954, + "step": 3130, + "valid_targets_mean": 3821.9, + "valid_targets_min": 2274 + }, + { + "epoch": 3.5466893039049237, + "grad_norm": 0.3773696940527812, + "learning_rate": 2.3028915302001286e-05, + "loss": 0.0804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05291939526796341, + "step": 3135, + "valid_targets_mean": 3791.2, + "valid_targets_min": 892 + }, + { + "epoch": 3.5523486134691566, + "grad_norm": 0.32177160076280925, + "learning_rate": 2.297314185890446e-05, + "loss": 0.0868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.035302624106407166, + "step": 3140, + "valid_targets_mean": 4820.4, + "valid_targets_min": 4491 + }, + { + "epoch": 3.55800792303339, + "grad_norm": 0.5654758830478894, + "learning_rate": 2.291734476202173e-05, + "loss": 0.1292, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1034613698720932, + "step": 3145, + "valid_targets_mean": 2159.1, + "valid_targets_min": 709 + }, + { + "epoch": 3.563667232597623, + "grad_norm": 0.35504828725122617, + "learning_rate": 2.286152445526482e-05, + "loss": 0.1392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.028719976544380188, + "step": 3150, + "valid_targets_mean": 3430.8, + "valid_targets_min": 1097 + }, + { + "epoch": 3.569326542161856, + "grad_norm": 0.3509166269383449, + "learning_rate": 2.2805681382730142e-05, + "loss": 0.0913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.038257353007793427, + "step": 3155, + "valid_targets_mean": 3032.0, + "valid_targets_min": 840 + }, + { + "epoch": 3.5749858517260895, + "grad_norm": 0.3309589835327151, + "learning_rate": 2.2749815988695208e-05, + "loss": 0.0844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0359211340546608, + "step": 3160, + "valid_targets_mean": 3812.2, + "valid_targets_min": 3106 + }, + { + "epoch": 3.5806451612903225, + "grad_norm": 0.567395487040236, + "learning_rate": 2.2693928717615118e-05, + "loss": 0.1687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08195723593235016, + "step": 3165, + "valid_targets_mean": 5518.8, + "valid_targets_min": 4751 + }, + { + "epoch": 3.5863044708545555, + "grad_norm": 0.3563034368992575, + "learning_rate": 2.2638020014119033e-05, + "loss": 0.1708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08806541562080383, + "step": 3170, + "valid_targets_mean": 8587.9, + "valid_targets_min": 5164 + }, + { + "epoch": 3.591963780418789, + "grad_norm": 0.3849035050615699, + "learning_rate": 2.2582090323006603e-05, + "loss": 0.1643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07838860154151917, + "step": 3175, + "valid_targets_mean": 6379.9, + "valid_targets_min": 4342 + }, + { + "epoch": 3.597623089983022, + "grad_norm": 0.38875598497022024, + "learning_rate": 2.2526140089244483e-05, + "loss": 0.1624, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07621696591377258, + "step": 3180, + "valid_targets_mean": 5692.2, + "valid_targets_min": 4321 + }, + { + "epoch": 3.6032823995472554, + "grad_norm": 0.4329260368721985, + "learning_rate": 2.247016975796274e-05, + "loss": 0.1718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10725659132003784, + "step": 3185, + "valid_targets_mean": 5674.0, + "valid_targets_min": 4808 + }, + { + "epoch": 3.6089417091114884, + "grad_norm": 0.4410680501495322, + "learning_rate": 2.2414179774451333e-05, + "loss": 0.1729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09051263332366943, + "step": 3190, + "valid_targets_mean": 6807.1, + "valid_targets_min": 5144 + }, + { + "epoch": 3.614601018675722, + "grad_norm": 0.4171450485527089, + "learning_rate": 2.2358170584156577e-05, + "loss": 0.1709, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10141411423683167, + "step": 3195, + "valid_targets_mean": 6545.6, + "valid_targets_min": 4350 + }, + { + "epoch": 3.620260328239955, + "grad_norm": 0.4250555357457158, + "learning_rate": 2.2302142632677605e-05, + "loss": 0.1592, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08770590275526047, + "step": 3200, + "valid_targets_mean": 6225.1, + "valid_targets_min": 3934 + }, + { + "epoch": 3.625919637804188, + "grad_norm": 0.3846688096270101, + "learning_rate": 2.2246096365762782e-05, + "loss": 0.1868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07190729677677155, + "step": 3205, + "valid_targets_mean": 6377.6, + "valid_targets_min": 5231 + }, + { + "epoch": 3.6315789473684212, + "grad_norm": 0.40401749622860034, + "learning_rate": 2.2190032229306215e-05, + "loss": 0.1635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08095844089984894, + "step": 3210, + "valid_targets_mean": 6346.8, + "valid_targets_min": 5062 + }, + { + "epoch": 3.6372382569326542, + "grad_norm": 0.47830080271081316, + "learning_rate": 2.2133950669344156e-05, + "loss": 0.1698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09106248617172241, + "step": 3215, + "valid_targets_mean": 4123.9, + "valid_targets_min": 229 + }, + { + "epoch": 3.6428975664968872, + "grad_norm": 0.392316454997919, + "learning_rate": 2.207785213205149e-05, + "loss": 0.1611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08440910279750824, + "step": 3220, + "valid_targets_mean": 6221.9, + "valid_targets_min": 4508 + }, + { + "epoch": 3.6485568760611207, + "grad_norm": 0.39968465625985466, + "learning_rate": 2.202173706373817e-05, + "loss": 0.1712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07806047052145004, + "step": 3225, + "valid_targets_mean": 5676.9, + "valid_targets_min": 4673 + }, + { + "epoch": 3.6542161856253537, + "grad_norm": 0.3555644605517091, + "learning_rate": 2.1965605910845654e-05, + "loss": 0.1537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06523491442203522, + "step": 3230, + "valid_targets_mean": 6040.0, + "valid_targets_min": 4995 + }, + { + "epoch": 3.6598754951895867, + "grad_norm": 0.44944510780326746, + "learning_rate": 2.1909459119943384e-05, + "loss": 0.1532, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09184688329696655, + "step": 3235, + "valid_targets_mean": 5542.8, + "valid_targets_min": 4108 + }, + { + "epoch": 3.66553480475382, + "grad_norm": 0.4275016378984178, + "learning_rate": 2.1853297137725204e-05, + "loss": 0.1629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1017594039440155, + "step": 3240, + "valid_targets_mean": 6554.6, + "valid_targets_min": 4527 + }, + { + "epoch": 3.671194114318053, + "grad_norm": 0.50371841141, + "learning_rate": 2.1797120411005807e-05, + "loss": 0.1748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10853755474090576, + "step": 3245, + "valid_targets_mean": 2038.2, + "valid_targets_min": 449 + }, + { + "epoch": 3.676853423882286, + "grad_norm": 0.3872351477266649, + "learning_rate": 2.1740929386717222e-05, + "loss": 0.1431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06891588866710663, + "step": 3250, + "valid_targets_mean": 6227.8, + "valid_targets_min": 5316 + }, + { + "epoch": 3.6825127334465195, + "grad_norm": 0.40645942147254843, + "learning_rate": 2.1684724511905193e-05, + "loss": 0.1486, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0807884931564331, + "step": 3255, + "valid_targets_mean": 5918.6, + "valid_targets_min": 5056 + }, + { + "epoch": 3.688172043010753, + "grad_norm": 0.3687741096062788, + "learning_rate": 2.1628506233725678e-05, + "loss": 0.1492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08449657261371613, + "step": 3260, + "valid_targets_mean": 8812.4, + "valid_targets_min": 4947 + }, + { + "epoch": 3.693831352574986, + "grad_norm": 0.39614599434710857, + "learning_rate": 2.1572274999441265e-05, + "loss": 0.1824, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.097935751080513, + "step": 3265, + "valid_targets_mean": 6913.9, + "valid_targets_min": 4181 + }, + { + "epoch": 3.699490662139219, + "grad_norm": 0.35275204669647287, + "learning_rate": 2.151603125641761e-05, + "loss": 0.1607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06949613988399506, + "step": 3270, + "valid_targets_mean": 6951.4, + "valid_targets_min": 4442 + }, + { + "epoch": 3.7051499717034524, + "grad_norm": 0.44903423639937967, + "learning_rate": 2.1459775452119898e-05, + "loss": 0.1581, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07566358149051666, + "step": 3275, + "valid_targets_mean": 6686.6, + "valid_targets_min": 4670 + }, + { + "epoch": 3.7108092812676854, + "grad_norm": 0.41477302725607434, + "learning_rate": 2.1403508034109262e-05, + "loss": 0.1661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09245507419109344, + "step": 3280, + "valid_targets_mean": 8420.9, + "valid_targets_min": 4501 + }, + { + "epoch": 3.7164685908319184, + "grad_norm": 0.39499206532982356, + "learning_rate": 2.1347229450039237e-05, + "loss": 0.181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08436868339776993, + "step": 3285, + "valid_targets_mean": 6460.8, + "valid_targets_min": 4998 + }, + { + "epoch": 3.722127900396152, + "grad_norm": 0.3954654463525336, + "learning_rate": 2.12909401476522e-05, + "loss": 0.1941, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09029680490493774, + "step": 3290, + "valid_targets_mean": 6186.5, + "valid_targets_min": 4858 + }, + { + "epoch": 3.727787209960385, + "grad_norm": 0.44157540298303294, + "learning_rate": 2.1234640574775783e-05, + "loss": 0.1752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08847709000110626, + "step": 3295, + "valid_targets_mean": 6387.4, + "valid_targets_min": 4800 + }, + { + "epoch": 3.733446519524618, + "grad_norm": 0.4427824912325539, + "learning_rate": 2.1178331179319336e-05, + "loss": 0.1677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08792071044445038, + "step": 3300, + "valid_targets_mean": 5474.0, + "valid_targets_min": 4455 + }, + { + "epoch": 3.7391058290888513, + "grad_norm": 0.4016277678564167, + "learning_rate": 2.112201240927037e-05, + "loss": 0.1632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07885824888944626, + "step": 3305, + "valid_targets_mean": 6437.2, + "valid_targets_min": 4891 + }, + { + "epoch": 3.7447651386530842, + "grad_norm": 0.4062041851075691, + "learning_rate": 2.1065684712690954e-05, + "loss": 0.1722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0895972028374672, + "step": 3310, + "valid_targets_mean": 5816.1, + "valid_targets_min": 4710 + }, + { + "epoch": 3.7504244482173172, + "grad_norm": 0.3965842895550913, + "learning_rate": 2.1009348537714194e-05, + "loss": 0.1666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07192260026931763, + "step": 3315, + "valid_targets_mean": 6220.5, + "valid_targets_min": 5370 + }, + { + "epoch": 3.7560837577815507, + "grad_norm": 0.3881037427237576, + "learning_rate": 2.0953004332540644e-05, + "loss": 0.1686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08083920180797577, + "step": 3320, + "valid_targets_mean": 6955.9, + "valid_targets_min": 5905 + }, + { + "epoch": 3.7617430673457837, + "grad_norm": 0.4374547536190714, + "learning_rate": 2.089665254543473e-05, + "loss": 0.1716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07745985686779022, + "step": 3325, + "valid_targets_mean": 5594.5, + "valid_targets_min": 4580 + }, + { + "epoch": 3.767402376910017, + "grad_norm": 0.4044150273809672, + "learning_rate": 2.0840293624721234e-05, + "loss": 0.1689, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07344076037406921, + "step": 3330, + "valid_targets_mean": 6287.1, + "valid_targets_min": 4754 + }, + { + "epoch": 3.77306168647425, + "grad_norm": 0.33357004095529647, + "learning_rate": 2.0783928018781644e-05, + "loss": 0.146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06659536063671112, + "step": 3335, + "valid_targets_mean": 7224.6, + "valid_targets_min": 5859 + }, + { + "epoch": 3.7787209960384835, + "grad_norm": 0.4231310519746198, + "learning_rate": 2.0727556176050676e-05, + "loss": 0.1583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08931884169578552, + "step": 3340, + "valid_targets_mean": 7274.0, + "valid_targets_min": 3468 + }, + { + "epoch": 3.7843803056027165, + "grad_norm": 0.4565210587968675, + "learning_rate": 2.0671178545012643e-05, + "loss": 0.1745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08933787047863007, + "step": 3345, + "valid_targets_mean": 6257.8, + "valid_targets_min": 5240 + }, + { + "epoch": 3.7900396151669495, + "grad_norm": 0.38347462923290676, + "learning_rate": 2.0614795574197907e-05, + "loss": 0.1715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07480037957429886, + "step": 3350, + "valid_targets_mean": 6370.9, + "valid_targets_min": 4438 + }, + { + "epoch": 3.795698924731183, + "grad_norm": 0.4174826310199394, + "learning_rate": 2.0558407712179334e-05, + "loss": 0.1659, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07558287680149078, + "step": 3355, + "valid_targets_mean": 5285.2, + "valid_targets_min": 3782 + }, + { + "epoch": 3.801358234295416, + "grad_norm": 0.3681284513424055, + "learning_rate": 2.050201540756868e-05, + "loss": 0.157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05968650057911873, + "step": 3360, + "valid_targets_mean": 5542.9, + "valid_targets_min": 4135 + }, + { + "epoch": 3.807017543859649, + "grad_norm": 0.4487408116099156, + "learning_rate": 2.0445619109013054e-05, + "loss": 0.1649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09562434256076813, + "step": 3365, + "valid_targets_mean": 6316.4, + "valid_targets_min": 4295 + }, + { + "epoch": 3.8126768534238824, + "grad_norm": 0.41772234509740175, + "learning_rate": 2.038921926519134e-05, + "loss": 0.1653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10487741231918335, + "step": 3370, + "valid_targets_mean": 6126.5, + "valid_targets_min": 5160 + }, + { + "epoch": 3.8183361629881154, + "grad_norm": 0.40546214270497016, + "learning_rate": 2.033281632481063e-05, + "loss": 0.1626, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07828259468078613, + "step": 3375, + "valid_targets_mean": 5957.0, + "valid_targets_min": 4067 + }, + { + "epoch": 3.8239954725523484, + "grad_norm": 0.46382207726042973, + "learning_rate": 2.027641073660265e-05, + "loss": 0.156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08233707398176193, + "step": 3380, + "valid_targets_mean": 6752.8, + "valid_targets_min": 4705 + }, + { + "epoch": 3.829654782116582, + "grad_norm": 0.4312316607890912, + "learning_rate": 2.0220002949320187e-05, + "loss": 0.1783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08896398544311523, + "step": 3385, + "valid_targets_mean": 5648.8, + "valid_targets_min": 4683 + }, + { + "epoch": 3.835314091680815, + "grad_norm": 0.36876344264400174, + "learning_rate": 2.0163593411733533e-05, + "loss": 0.1463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06412133574485779, + "step": 3390, + "valid_targets_mean": 5857.6, + "valid_targets_min": 4696 + }, + { + "epoch": 3.8409734012450483, + "grad_norm": 0.44856642528841706, + "learning_rate": 2.0107182572626897e-05, + "loss": 0.1411, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0687895193696022, + "step": 3395, + "valid_targets_mean": 5867.9, + "valid_targets_min": 4546 + }, + { + "epoch": 3.8466327108092813, + "grad_norm": 0.39097022730644765, + "learning_rate": 2.0050770880794843e-05, + "loss": 0.1471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07638616859912872, + "step": 3400, + "valid_targets_mean": 5898.2, + "valid_targets_min": 3200 + }, + { + "epoch": 3.8522920203735147, + "grad_norm": 0.39112753677463374, + "learning_rate": 1.9994358785038736e-05, + "loss": 0.152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07915256172418594, + "step": 3405, + "valid_targets_mean": 6725.5, + "valid_targets_min": 5010 + }, + { + "epoch": 3.8579513299377477, + "grad_norm": 0.42787949130052616, + "learning_rate": 1.9937946734163117e-05, + "loss": 0.1707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08065037429332733, + "step": 3410, + "valid_targets_mean": 5975.8, + "valid_targets_min": 4904 + }, + { + "epoch": 3.8636106395019807, + "grad_norm": 0.8453165006416562, + "learning_rate": 1.98815351769722e-05, + "loss": 0.1953, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10623349249362946, + "step": 3415, + "valid_targets_mean": 6284.1, + "valid_targets_min": 4522 + }, + { + "epoch": 3.869269949066214, + "grad_norm": 0.4366299359582474, + "learning_rate": 1.982512456226628e-05, + "loss": 0.1694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08151426911354065, + "step": 3420, + "valid_targets_mean": 5247.2, + "valid_targets_min": 3629 + }, + { + "epoch": 3.874929258630447, + "grad_norm": 0.41676033925329425, + "learning_rate": 1.976871533883812e-05, + "loss": 0.1585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07846318930387497, + "step": 3425, + "valid_targets_mean": 6143.1, + "valid_targets_min": 4324 + }, + { + "epoch": 3.88058856819468, + "grad_norm": 0.37287635266809355, + "learning_rate": 1.971230795546944e-05, + "loss": 0.1621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07138499617576599, + "step": 3430, + "valid_targets_mean": 6130.6, + "valid_targets_min": 4920 + }, + { + "epoch": 3.8862478777589136, + "grad_norm": 0.40081652465442297, + "learning_rate": 1.965590286092731e-05, + "loss": 0.1715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08055269718170166, + "step": 3435, + "valid_targets_mean": 5877.2, + "valid_targets_min": 4882 + }, + { + "epoch": 3.8919071873231466, + "grad_norm": 0.4007955677013251, + "learning_rate": 1.9599500503960596e-05, + "loss": 0.178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0851922482252121, + "step": 3440, + "valid_targets_mean": 7092.9, + "valid_targets_min": 3943 + }, + { + "epoch": 3.8975664968873796, + "grad_norm": 0.3876590560341837, + "learning_rate": 1.954310133329639e-05, + "loss": 0.1667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09008148312568665, + "step": 3445, + "valid_targets_mean": 7215.1, + "valid_targets_min": 5382 + }, + { + "epoch": 3.903225806451613, + "grad_norm": 0.40713771289534056, + "learning_rate": 1.948670579763641e-05, + "loss": 0.1654, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08729709684848785, + "step": 3450, + "valid_targets_mean": 6608.5, + "valid_targets_min": 4461 + }, + { + "epoch": 3.908885116015846, + "grad_norm": 0.39094538980691523, + "learning_rate": 1.9430314345653486e-05, + "loss": 0.1552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07166876643896103, + "step": 3455, + "valid_targets_mean": 6976.5, + "valid_targets_min": 4903 + }, + { + "epoch": 3.914544425580079, + "grad_norm": 0.36977759208312555, + "learning_rate": 1.9373927425987928e-05, + "loss": 0.1576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07982886582612991, + "step": 3460, + "valid_targets_mean": 5911.9, + "valid_targets_min": 4444 + }, + { + "epoch": 3.9202037351443124, + "grad_norm": 0.4016470140253359, + "learning_rate": 1.9317545487244016e-05, + "loss": 0.2111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08938796818256378, + "step": 3465, + "valid_targets_mean": 6241.1, + "valid_targets_min": 4474 + }, + { + "epoch": 3.9258630447085454, + "grad_norm": 0.356557350505706, + "learning_rate": 1.926116897798639e-05, + "loss": 0.1549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08672697097063065, + "step": 3470, + "valid_targets_mean": 8150.2, + "valid_targets_min": 6527 + }, + { + "epoch": 3.931522354272779, + "grad_norm": 0.3868612388305218, + "learning_rate": 1.9204798346736485e-05, + "loss": 0.1577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08350171148777008, + "step": 3475, + "valid_targets_mean": 7072.1, + "valid_targets_min": 4718 + }, + { + "epoch": 3.937181663837012, + "grad_norm": 0.4585198687791278, + "learning_rate": 1.914843404196899e-05, + "loss": 0.1698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09668946266174316, + "step": 3480, + "valid_targets_mean": 7318.2, + "valid_targets_min": 4714 + }, + { + "epoch": 3.9428409734012453, + "grad_norm": 0.4048562736885121, + "learning_rate": 1.9092076512108253e-05, + "loss": 0.1636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07855217903852463, + "step": 3485, + "valid_targets_mean": 5820.9, + "valid_targets_min": 4374 + }, + { + "epoch": 3.9485002829654783, + "grad_norm": 0.3917996576012431, + "learning_rate": 1.903572620552471e-05, + "loss": 0.1461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06844893842935562, + "step": 3490, + "valid_targets_mean": 6699.8, + "valid_targets_min": 5089 + }, + { + "epoch": 3.9541595925297113, + "grad_norm": 0.6298425070649368, + "learning_rate": 1.8979383570531358e-05, + "loss": 0.163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07428422570228577, + "step": 3495, + "valid_targets_mean": 6441.8, + "valid_targets_min": 4509 + }, + { + "epoch": 3.9598189020939447, + "grad_norm": 0.4160757754187375, + "learning_rate": 1.8923049055380128e-05, + "loss": 0.1548, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08737757056951523, + "step": 3500, + "valid_targets_mean": 7454.5, + "valid_targets_min": 5193 + }, + { + "epoch": 3.9654782116581777, + "grad_norm": 0.389030714615794, + "learning_rate": 1.8866723108258376e-05, + "loss": 0.1534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07964706420898438, + "step": 3505, + "valid_targets_mean": 6380.1, + "valid_targets_min": 5079 + }, + { + "epoch": 3.9711375212224107, + "grad_norm": 0.3976147668472962, + "learning_rate": 1.8810406177285282e-05, + "loss": 0.1584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07907824218273163, + "step": 3510, + "valid_targets_mean": 6233.4, + "valid_targets_min": 4815 + }, + { + "epoch": 3.976796830786644, + "grad_norm": 0.3776249638532913, + "learning_rate": 1.8754098710508294e-05, + "loss": 0.1609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07613420486450195, + "step": 3515, + "valid_targets_mean": 7910.5, + "valid_targets_min": 5751 + }, + { + "epoch": 3.982456140350877, + "grad_norm": 0.41508310436396745, + "learning_rate": 1.869780115589957e-05, + "loss": 0.1521, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07448315620422363, + "step": 3520, + "valid_targets_mean": 6159.1, + "valid_targets_min": 3977 + }, + { + "epoch": 3.98811544991511, + "grad_norm": 0.4083223864790042, + "learning_rate": 1.8641513961352396e-05, + "loss": 0.1643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09216412901878357, + "step": 3525, + "valid_targets_mean": 6877.8, + "valid_targets_min": 5546 + }, + { + "epoch": 3.9937747594793436, + "grad_norm": 0.4508755790299667, + "learning_rate": 1.858523757467765e-05, + "loss": 0.1715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09600496292114258, + "step": 3530, + "valid_targets_mean": 5488.2, + "valid_targets_min": 3559 + }, + { + "epoch": 3.9994340690435766, + "grad_norm": 0.458854399511331, + "learning_rate": 1.8528972443600226e-05, + "loss": 0.1686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08771546185016632, + "step": 3535, + "valid_targets_mean": 5528.9, + "valid_targets_min": 3619 + }, + { + "epoch": 4.005659309564233, + "grad_norm": 0.4514666991492436, + "learning_rate": 1.8472719015755452e-05, + "loss": 0.1482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04936162382364273, + "step": 3540, + "valid_targets_mean": 2722.6, + "valid_targets_min": 458 + }, + { + "epoch": 4.011318619128466, + "grad_norm": 0.4175050898748815, + "learning_rate": 1.8416477738685567e-05, + "loss": 0.0896, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.045595601201057434, + "step": 3545, + "valid_targets_mean": 3113.1, + "valid_targets_min": 2461 + }, + { + "epoch": 4.016977928692699, + "grad_norm": 0.37305228066072627, + "learning_rate": 1.8360249059836123e-05, + "loss": 0.082, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.048659227788448334, + "step": 3550, + "valid_targets_mean": 3648.1, + "valid_targets_min": 2068 + }, + { + "epoch": 4.022637238256933, + "grad_norm": 0.4589705258806371, + "learning_rate": 1.830403342655246e-05, + "loss": 0.1033, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04854677617549896, + "step": 3555, + "valid_targets_mean": 3423.6, + "valid_targets_min": 2425 + }, + { + "epoch": 4.028296547821165, + "grad_norm": 0.4398210555993455, + "learning_rate": 1.824783128607612e-05, + "loss": 0.0822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04191640019416809, + "step": 3560, + "valid_targets_mean": 2724.1, + "valid_targets_min": 1198 + }, + { + "epoch": 4.033955857385399, + "grad_norm": 0.6871422253545325, + "learning_rate": 1.8191643085541296e-05, + "loss": 0.0979, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06957017630338669, + "step": 3565, + "valid_targets_mean": 1232.5, + "valid_targets_min": 807 + }, + { + "epoch": 4.039615166949632, + "grad_norm": 0.4837318705542788, + "learning_rate": 1.813546927197129e-05, + "loss": 0.1351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05474815517663956, + "step": 3570, + "valid_targets_mean": 3487.4, + "valid_targets_min": 2351 + }, + { + "epoch": 4.045274476513866, + "grad_norm": 0.49181033753174, + "learning_rate": 1.8079310292274928e-05, + "loss": 0.092, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06998205929994583, + "step": 3575, + "valid_targets_mean": 3126.4, + "valid_targets_min": 781 + }, + { + "epoch": 4.050933786078098, + "grad_norm": 0.3471376618700252, + "learning_rate": 1.8023166593243026e-05, + "loss": 0.1017, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04436377435922623, + "step": 3580, + "valid_targets_mean": 3474.8, + "valid_targets_min": 799 + }, + { + "epoch": 4.056593095642332, + "grad_norm": 0.3174991015761313, + "learning_rate": 1.7967038621544845e-05, + "loss": 0.1065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03954795002937317, + "step": 3585, + "valid_targets_mean": 5026.1, + "valid_targets_min": 2802 + }, + { + "epoch": 4.062252405206565, + "grad_norm": 0.34281258735438996, + "learning_rate": 1.791092682372449e-05, + "loss": 0.0801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03165905550122261, + "step": 3590, + "valid_targets_mean": 2611.0, + "valid_targets_min": 574 + }, + { + "epoch": 4.067911714770798, + "grad_norm": 0.358911503965821, + "learning_rate": 1.7854831646197426e-05, + "loss": 0.0875, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04111532121896744, + "step": 3595, + "valid_targets_mean": 4036.5, + "valid_targets_min": 2642 + }, + { + "epoch": 4.073571024335031, + "grad_norm": 0.588027764171221, + "learning_rate": 1.7798753535246856e-05, + "loss": 0.0906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05866089090704918, + "step": 3600, + "valid_targets_mean": 1960.5, + "valid_targets_min": 593 + }, + { + "epoch": 4.079230333899265, + "grad_norm": 0.4127721916383591, + "learning_rate": 1.7742692937020234e-05, + "loss": 0.1162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03659496456384659, + "step": 3605, + "valid_targets_mean": 3354.2, + "valid_targets_min": 1353 + }, + { + "epoch": 4.084889643463497, + "grad_norm": 0.48262743451622975, + "learning_rate": 1.768665029752567e-05, + "loss": 0.0836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.048436567187309265, + "step": 3610, + "valid_targets_mean": 2300.9, + "valid_targets_min": 818 + }, + { + "epoch": 4.090548953027731, + "grad_norm": 0.3486116765836738, + "learning_rate": 1.763062606262839e-05, + "loss": 0.0831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.029195936396718025, + "step": 3615, + "valid_targets_mean": 2295.8, + "valid_targets_min": 934 + }, + { + "epoch": 4.096208262591964, + "grad_norm": 0.3611409661869497, + "learning_rate": 1.7574620678047215e-05, + "loss": 0.0762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04203696548938751, + "step": 3620, + "valid_targets_mean": 3524.4, + "valid_targets_min": 2798 + }, + { + "epoch": 4.101867572156197, + "grad_norm": 0.45875533279166925, + "learning_rate": 1.751863458935099e-05, + "loss": 0.096, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.038177717477083206, + "step": 3625, + "valid_targets_mean": 2810.1, + "valid_targets_min": 471 + }, + { + "epoch": 4.10752688172043, + "grad_norm": 0.4905943846685745, + "learning_rate": 1.746266824195504e-05, + "loss": 0.0974, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06193538382649422, + "step": 3630, + "valid_targets_mean": 3042.8, + "valid_targets_min": 903 + }, + { + "epoch": 4.1131861912846635, + "grad_norm": 0.41677480845735176, + "learning_rate": 1.7406722081117632e-05, + "loss": 0.097, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.038191840052604675, + "step": 3635, + "valid_targets_mean": 3081.6, + "valid_targets_min": 474 + }, + { + "epoch": 4.118845500848896, + "grad_norm": 0.4146124790584088, + "learning_rate": 1.7350796551936432e-05, + "loss": 0.0974, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.044857800006866455, + "step": 3640, + "valid_targets_mean": 3468.0, + "valid_targets_min": 640 + }, + { + "epoch": 4.124504810413129, + "grad_norm": 0.335753573182029, + "learning_rate": 1.7294892099344975e-05, + "loss": 0.1315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.031602635979652405, + "step": 3645, + "valid_targets_mean": 3438.4, + "valid_targets_min": 902 + }, + { + "epoch": 4.130164119977363, + "grad_norm": 0.3949556838750356, + "learning_rate": 1.7239009168109108e-05, + "loss": 0.0821, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04059648513793945, + "step": 3650, + "valid_targets_mean": 3212.8, + "valid_targets_min": 1837 + }, + { + "epoch": 4.135823429541596, + "grad_norm": 0.7057789283513779, + "learning_rate": 1.7183148202823445e-05, + "loss": 0.1109, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08382494747638702, + "step": 3655, + "valid_targets_mean": 1358.8, + "valid_targets_min": 756 + }, + { + "epoch": 4.141482739105829, + "grad_norm": 0.554019738468921, + "learning_rate": 1.7127309647907867e-05, + "loss": 0.1078, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08004680275917053, + "step": 3660, + "valid_targets_mean": 2896.2, + "valid_targets_min": 1072 + }, + { + "epoch": 4.147142048670062, + "grad_norm": 0.38360621800150774, + "learning_rate": 1.7071493947603942e-05, + "loss": 0.0884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.028133559972047806, + "step": 3665, + "valid_targets_mean": 2908.1, + "valid_targets_min": 895 + }, + { + "epoch": 4.152801358234296, + "grad_norm": 0.59174406288477, + "learning_rate": 1.7015701545971417e-05, + "loss": 0.2033, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1825319230556488, + "step": 3670, + "valid_targets_mean": 2562.2, + "valid_targets_min": 878 + }, + { + "epoch": 4.158460667798528, + "grad_norm": 0.4575103523467652, + "learning_rate": 1.695993288688469e-05, + "loss": 0.1865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04015088826417923, + "step": 3675, + "valid_targets_mean": 2087.6, + "valid_targets_min": 756 + }, + { + "epoch": 4.164119977362762, + "grad_norm": 0.36675291316640873, + "learning_rate": 1.6904188414029248e-05, + "loss": 0.0996, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03686298057436943, + "step": 3680, + "valid_targets_mean": 3421.8, + "valid_targets_min": 2963 + }, + { + "epoch": 4.169779286926995, + "grad_norm": 0.7950198987392267, + "learning_rate": 1.6848468570898172e-05, + "loss": 0.1673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22266818583011627, + "step": 3685, + "valid_targets_mean": 2375.2, + "valid_targets_min": 1153 + }, + { + "epoch": 4.175438596491228, + "grad_norm": 0.2869113465122139, + "learning_rate": 1.6792773800788583e-05, + "loss": 0.1099, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03493755683302879, + "step": 3690, + "valid_targets_mean": 4886.2, + "valid_targets_min": 1700 + }, + { + "epoch": 4.181097906055461, + "grad_norm": 0.34955060544114164, + "learning_rate": 1.673710454679813e-05, + "loss": 0.0871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02859926037490368, + "step": 3695, + "valid_targets_mean": 3965.1, + "valid_targets_min": 789 + }, + { + "epoch": 4.186757215619695, + "grad_norm": 0.39659681346145376, + "learning_rate": 1.668146125182147e-05, + "loss": 0.0963, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04969584941864014, + "step": 3700, + "valid_targets_mean": 3771.5, + "valid_targets_min": 995 + }, + { + "epoch": 4.192416525183927, + "grad_norm": 0.36356434916448555, + "learning_rate": 1.6625844358546715e-05, + "loss": 0.0842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.030216921120882034, + "step": 3705, + "valid_targets_mean": 2220.8, + "valid_targets_min": 488 + }, + { + "epoch": 4.198075834748161, + "grad_norm": 0.47192608303622713, + "learning_rate": 1.657025430945195e-05, + "loss": 0.0858, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04975958541035652, + "step": 3710, + "valid_targets_mean": 3635.4, + "valid_targets_min": 2437 + }, + { + "epoch": 4.203735144312394, + "grad_norm": 0.475457710619053, + "learning_rate": 1.651469154680167e-05, + "loss": 0.0808, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03584028780460358, + "step": 3715, + "valid_targets_mean": 1752.1, + "valid_targets_min": 712 + }, + { + "epoch": 4.2093944538766275, + "grad_norm": 0.4116697673921818, + "learning_rate": 1.6459156512643303e-05, + "loss": 0.083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04280740022659302, + "step": 3720, + "valid_targets_mean": 3713.4, + "valid_targets_min": 2422 + }, + { + "epoch": 4.21505376344086, + "grad_norm": 0.41913241399909845, + "learning_rate": 1.640364964880367e-05, + "loss": 0.0734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04091719165444374, + "step": 3725, + "valid_targets_mean": 3998.5, + "valid_targets_min": 3643 + }, + { + "epoch": 4.2207130730050935, + "grad_norm": 0.3277987128007894, + "learning_rate": 1.6348171396885468e-05, + "loss": 0.1272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02905534766614437, + "step": 3730, + "valid_targets_mean": 3319.0, + "valid_targets_min": 772 + }, + { + "epoch": 4.226372382569327, + "grad_norm": 0.3834794881774024, + "learning_rate": 1.6292722198263766e-05, + "loss": 0.0666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.028163105249404907, + "step": 3735, + "valid_targets_mean": 2934.8, + "valid_targets_min": 722 + }, + { + "epoch": 4.2320316921335595, + "grad_norm": 0.35925257132513966, + "learning_rate": 1.623730249408249e-05, + "loss": 0.0743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03061058185994625, + "step": 3740, + "valid_targets_mean": 3656.4, + "valid_targets_min": 2788 + }, + { + "epoch": 4.237691001697793, + "grad_norm": 0.406155250941594, + "learning_rate": 1.618191272525092e-05, + "loss": 0.0778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04038447141647339, + "step": 3745, + "valid_targets_mean": 3202.5, + "valid_targets_min": 948 + }, + { + "epoch": 4.243350311262026, + "grad_norm": 0.5704669418943428, + "learning_rate": 1.612655333244016e-05, + "loss": 0.095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04683380573987961, + "step": 3750, + "valid_targets_mean": 1002.0, + "valid_targets_min": 490 + }, + { + "epoch": 4.249009620826259, + "grad_norm": 0.3560156205810564, + "learning_rate": 1.6071224756079666e-05, + "loss": 0.1064, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04591444879770279, + "step": 3755, + "valid_targets_mean": 3751.9, + "valid_targets_min": 2800 + }, + { + "epoch": 4.254668930390492, + "grad_norm": 0.49109745879570516, + "learning_rate": 1.6015927436353713e-05, + "loss": 0.1135, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06892198324203491, + "step": 3760, + "valid_targets_mean": 2641.8, + "valid_targets_min": 896 + }, + { + "epoch": 4.260328239954726, + "grad_norm": 0.4549988536563424, + "learning_rate": 1.59606618131979e-05, + "loss": 0.0903, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.049910858273506165, + "step": 3765, + "valid_targets_mean": 3199.0, + "valid_targets_min": 1333 + }, + { + "epoch": 4.265987549518958, + "grad_norm": 0.4702658295473038, + "learning_rate": 1.5905428326295663e-05, + "loss": 0.1041, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.047066498547792435, + "step": 3770, + "valid_targets_mean": 2548.1, + "valid_targets_min": 693 + }, + { + "epoch": 4.271646859083192, + "grad_norm": 0.403574329923757, + "learning_rate": 1.585022741507477e-05, + "loss": 0.1197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03679243102669716, + "step": 3775, + "valid_targets_mean": 2482.9, + "valid_targets_min": 803 + }, + { + "epoch": 4.277306168647425, + "grad_norm": 0.42340797826239956, + "learning_rate": 1.579505951870381e-05, + "loss": 0.0889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04612584039568901, + "step": 3780, + "valid_targets_mean": 3239.2, + "valid_targets_min": 2525 + }, + { + "epoch": 4.282965478211658, + "grad_norm": 0.576805161329568, + "learning_rate": 1.573992507608872e-05, + "loss": 0.1105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04122603312134743, + "step": 3785, + "valid_targets_mean": 3806.1, + "valid_targets_min": 2393 + }, + { + "epoch": 4.288624787775891, + "grad_norm": 0.4243001196442452, + "learning_rate": 1.568482452586929e-05, + "loss": 0.0924, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0410628467798233, + "step": 3790, + "valid_targets_mean": 2990.9, + "valid_targets_min": 804 + }, + { + "epoch": 4.294284097340125, + "grad_norm": 0.41563802469921696, + "learning_rate": 1.5629758306415652e-05, + "loss": 0.0858, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.042630601674318314, + "step": 3795, + "valid_targets_mean": 4885.8, + "valid_targets_min": 3695 + }, + { + "epoch": 4.299943406904358, + "grad_norm": 0.41860033902397403, + "learning_rate": 1.5574726855824827e-05, + "loss": 0.0765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04467286542057991, + "step": 3800, + "valid_targets_mean": 3128.8, + "valid_targets_min": 596 + }, + { + "epoch": 4.305602716468591, + "grad_norm": 0.3141285736969467, + "learning_rate": 1.5519730611917206e-05, + "loss": 0.0753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.029025087133049965, + "step": 3805, + "valid_targets_mean": 4567.8, + "valid_targets_min": 3804 + }, + { + "epoch": 4.311262026032824, + "grad_norm": 0.3717556297971831, + "learning_rate": 1.546477001223309e-05, + "loss": 0.0819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.033049095422029495, + "step": 3810, + "valid_targets_mean": 3853.1, + "valid_targets_min": 2862 + }, + { + "epoch": 4.3169213355970575, + "grad_norm": 0.5564374516253535, + "learning_rate": 1.5409845494029208e-05, + "loss": 0.102, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05835752934217453, + "step": 3815, + "valid_targets_mean": 1072.6, + "valid_targets_min": 497 + }, + { + "epoch": 4.32258064516129, + "grad_norm": 0.419727637712494, + "learning_rate": 1.5354957494275207e-05, + "loss": 0.0857, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.048611339181661606, + "step": 3820, + "valid_targets_mean": 4191.0, + "valid_targets_min": 3918 + }, + { + "epoch": 4.3282399547255235, + "grad_norm": 0.5258066124856855, + "learning_rate": 1.5300106449650234e-05, + "loss": 0.0973, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04249310865998268, + "step": 3825, + "valid_targets_mean": 2121.9, + "valid_targets_min": 785 + }, + { + "epoch": 4.333899264289757, + "grad_norm": 0.3494728482101045, + "learning_rate": 1.524529279653939e-05, + "loss": 0.0693, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.035320721566677094, + "step": 3830, + "valid_targets_mean": 3451.6, + "valid_targets_min": 905 + }, + { + "epoch": 4.3395585738539895, + "grad_norm": 0.45771140437159885, + "learning_rate": 1.5190516971030324e-05, + "loss": 0.0909, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06552083045244217, + "step": 3835, + "valid_targets_mean": 2929.2, + "valid_targets_min": 657 + }, + { + "epoch": 4.345217883418223, + "grad_norm": 0.38872355420025406, + "learning_rate": 1.5135779408909732e-05, + "loss": 0.0832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.042473215609788895, + "step": 3840, + "valid_targets_mean": 3798.9, + "valid_targets_min": 3184 + }, + { + "epoch": 4.350877192982456, + "grad_norm": 0.5015431213941005, + "learning_rate": 1.5081080545659874e-05, + "loss": 0.0754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04622363671660423, + "step": 3845, + "valid_targets_mean": 1463.9, + "valid_targets_min": 627 + }, + { + "epoch": 4.356536502546689, + "grad_norm": 0.5140096283073178, + "learning_rate": 1.5026420816455156e-05, + "loss": 0.0836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0405731201171875, + "step": 3850, + "valid_targets_mean": 1701.5, + "valid_targets_min": 827 + }, + { + "epoch": 4.362195812110922, + "grad_norm": 0.48814633520116596, + "learning_rate": 1.4971800656158624e-05, + "loss": 0.0862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03916483372449875, + "step": 3855, + "valid_targets_mean": 1789.9, + "valid_targets_min": 940 + }, + { + "epoch": 4.367855121675156, + "grad_norm": 0.4132972975750427, + "learning_rate": 1.4917220499318506e-05, + "loss": 0.1004, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03779223561286926, + "step": 3860, + "valid_targets_mean": 2943.8, + "valid_targets_min": 958 + }, + { + "epoch": 4.373514431239389, + "grad_norm": 0.45275081977832515, + "learning_rate": 1.4862680780164805e-05, + "loss": 0.0831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.035530224442481995, + "step": 3865, + "valid_targets_mean": 1859.2, + "valid_targets_min": 719 + }, + { + "epoch": 4.379173740803622, + "grad_norm": 0.5025845347171094, + "learning_rate": 1.4808181932605787e-05, + "loss": 0.1019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03286820650100708, + "step": 3870, + "valid_targets_mean": 1703.9, + "valid_targets_min": 759 + }, + { + "epoch": 4.384833050367855, + "grad_norm": 0.7441302882705789, + "learning_rate": 1.4753724390224551e-05, + "loss": 0.105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10826053470373154, + "step": 3875, + "valid_targets_mean": 1556.6, + "valid_targets_min": 521 + }, + { + "epoch": 4.390492359932089, + "grad_norm": 0.433191395854235, + "learning_rate": 1.4699308586275591e-05, + "loss": 0.0843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05096182972192764, + "step": 3880, + "valid_targets_mean": 3267.6, + "valid_targets_min": 1032 + }, + { + "epoch": 4.396151669496321, + "grad_norm": 0.3763981856513456, + "learning_rate": 1.464493495368132e-05, + "loss": 0.0712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.028606755658984184, + "step": 3885, + "valid_targets_mean": 3609.0, + "valid_targets_min": 2741 + }, + { + "epoch": 4.401810979060555, + "grad_norm": 0.39195036983640796, + "learning_rate": 1.459060392502866e-05, + "loss": 0.0817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.034750986844301224, + "step": 3890, + "valid_targets_mean": 2735.8, + "valid_targets_min": 1692 + }, + { + "epoch": 4.407470288624788, + "grad_norm": 0.402175644220058, + "learning_rate": 1.4536315932565575e-05, + "loss": 0.1114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04643615335226059, + "step": 3895, + "valid_targets_mean": 3693.4, + "valid_targets_min": 2215 + }, + { + "epoch": 4.413129598189021, + "grad_norm": 0.37235681018612127, + "learning_rate": 1.448207140819764e-05, + "loss": 0.0681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.030617203563451767, + "step": 3900, + "valid_targets_mean": 3109.9, + "valid_targets_min": 723 + }, + { + "epoch": 4.418788907753254, + "grad_norm": 0.3386227891488351, + "learning_rate": 1.4427870783484613e-05, + "loss": 0.0845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03594548627734184, + "step": 3905, + "valid_targets_mean": 4984.2, + "valid_targets_min": 3282 + }, + { + "epoch": 4.4244482173174875, + "grad_norm": 0.3103755738346701, + "learning_rate": 1.4373714489636985e-05, + "loss": 0.0789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03337225317955017, + "step": 3910, + "valid_targets_mean": 3863.4, + "valid_targets_min": 995 + }, + { + "epoch": 4.43010752688172, + "grad_norm": 0.2770027109506393, + "learning_rate": 1.4319602957512564e-05, + "loss": 0.068, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.032206807285547256, + "step": 3915, + "valid_targets_mean": 4201.0, + "valid_targets_min": 949 + }, + { + "epoch": 4.4357668364459535, + "grad_norm": 0.40662436545355135, + "learning_rate": 1.4265536617613043e-05, + "loss": 0.0887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.055988721549510956, + "step": 3920, + "valid_targets_mean": 4321.6, + "valid_targets_min": 2724 + }, + { + "epoch": 4.441426146010187, + "grad_norm": 0.3409234587288552, + "learning_rate": 1.4211515900080568e-05, + "loss": 0.0735, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02467377483844757, + "step": 3925, + "valid_targets_mean": 3384.5, + "valid_targets_min": 424 + }, + { + "epoch": 4.44708545557442, + "grad_norm": 0.41356156416362616, + "learning_rate": 1.4157541234694324e-05, + "loss": 0.1065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.032063525170087814, + "step": 3930, + "valid_targets_mean": 2987.2, + "valid_targets_min": 531 + }, + { + "epoch": 4.452744765138653, + "grad_norm": 0.3208174346738755, + "learning_rate": 1.4103613050867117e-05, + "loss": 0.1014, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03163669630885124, + "step": 3935, + "valid_targets_mean": 3469.5, + "valid_targets_min": 1195 + }, + { + "epoch": 4.458404074702886, + "grad_norm": 0.37296747713192585, + "learning_rate": 1.4049731777641948e-05, + "loss": 0.0687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0622045174241066, + "step": 3940, + "valid_targets_mean": 2682.8, + "valid_targets_min": 722 + }, + { + "epoch": 4.46406338426712, + "grad_norm": 0.3788786441787262, + "learning_rate": 1.3995897843688615e-05, + "loss": 0.0978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03428567945957184, + "step": 3945, + "valid_targets_mean": 3496.9, + "valid_targets_min": 1251 + }, + { + "epoch": 4.469722693831352, + "grad_norm": 0.40435181825423283, + "learning_rate": 1.3942111677300276e-05, + "loss": 0.0783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.042698122560977936, + "step": 3950, + "valid_targets_mean": 2912.5, + "valid_targets_min": 970 + }, + { + "epoch": 4.475382003395586, + "grad_norm": 0.5648490933493956, + "learning_rate": 1.388837370639008e-05, + "loss": 0.089, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.043184563517570496, + "step": 3955, + "valid_targets_mean": 1423.6, + "valid_targets_min": 634 + }, + { + "epoch": 4.481041312959819, + "grad_norm": 0.6048089226818909, + "learning_rate": 1.3834684358487731e-05, + "loss": 0.1028, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06896237283945084, + "step": 3960, + "valid_targets_mean": 1423.9, + "valid_targets_min": 685 + }, + { + "epoch": 4.486700622524052, + "grad_norm": 0.42283095468212983, + "learning_rate": 1.3781044060736089e-05, + "loss": 0.1121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03126339986920357, + "step": 3965, + "valid_targets_mean": 2932.5, + "valid_targets_min": 593 + }, + { + "epoch": 4.492359932088285, + "grad_norm": 0.3930952737533528, + "learning_rate": 1.3727453239887796e-05, + "loss": 0.0869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03336304426193237, + "step": 3970, + "valid_targets_mean": 3945.8, + "valid_targets_min": 3129 + }, + { + "epoch": 4.498019241652519, + "grad_norm": 0.4228850582250867, + "learning_rate": 1.367391232230185e-05, + "loss": 0.0889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04839891940355301, + "step": 3975, + "valid_targets_mean": 3621.1, + "valid_targets_min": 859 + }, + { + "epoch": 4.503678551216751, + "grad_norm": 0.48907946540158925, + "learning_rate": 1.3620421733940234e-05, + "loss": 0.0796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04165098816156387, + "step": 3980, + "valid_targets_mean": 2553.0, + "valid_targets_min": 616 + }, + { + "epoch": 4.509337860780985, + "grad_norm": 0.4190442455071584, + "learning_rate": 1.356698190036453e-05, + "loss": 0.0785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03545888513326645, + "step": 3985, + "valid_targets_mean": 2690.5, + "valid_targets_min": 541 + }, + { + "epoch": 4.514997170345218, + "grad_norm": 0.8437217629188583, + "learning_rate": 1.3513593246732506e-05, + "loss": 0.0752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05044470727443695, + "step": 3990, + "valid_targets_mean": 906.4, + "valid_targets_min": 598 + }, + { + "epoch": 4.5206564799094515, + "grad_norm": 0.4600969761345104, + "learning_rate": 1.3460256197794768e-05, + "loss": 0.0915, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03999202698469162, + "step": 3995, + "valid_targets_mean": 3147.2, + "valid_targets_min": 1101 + }, + { + "epoch": 4.526315789473684, + "grad_norm": 0.5107071187294293, + "learning_rate": 1.3406971177891343e-05, + "loss": 0.0746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.034907665103673935, + "step": 4000, + "valid_targets_mean": 2331.8, + "valid_targets_min": 680 + }, + { + "epoch": 4.5319750990379175, + "grad_norm": 0.37300671528049695, + "learning_rate": 1.3353738610948347e-05, + "loss": 0.0755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04032396525144577, + "step": 4005, + "valid_targets_mean": 2577.0, + "valid_targets_min": 774 + }, + { + "epoch": 4.53763440860215, + "grad_norm": 0.4193766212899392, + "learning_rate": 1.3300558920474586e-05, + "loss": 0.089, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.047154612839221954, + "step": 4010, + "valid_targets_mean": 4140.1, + "valid_targets_min": 1474 + }, + { + "epoch": 4.5432937181663835, + "grad_norm": 0.2421791647357862, + "learning_rate": 1.3247432529558175e-05, + "loss": 0.0696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.024772703647613525, + "step": 4015, + "valid_targets_mean": 5498.9, + "valid_targets_min": 3677 + }, + { + "epoch": 4.548953027730617, + "grad_norm": 0.38238118057549253, + "learning_rate": 1.3194359860863201e-05, + "loss": 0.0764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.044898584485054016, + "step": 4020, + "valid_targets_mean": 3352.5, + "valid_targets_min": 675 + }, + { + "epoch": 4.55461233729485, + "grad_norm": 0.35568346023941233, + "learning_rate": 1.3141341336626336e-05, + "loss": 0.0741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03152969479560852, + "step": 4025, + "valid_targets_mean": 3902.5, + "valid_targets_min": 795 + }, + { + "epoch": 4.560271646859083, + "grad_norm": 0.5484882564273186, + "learning_rate": 1.3088377378653503e-05, + "loss": 0.1747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07694399356842041, + "step": 4030, + "valid_targets_mean": 2495.0, + "valid_targets_min": 605 + }, + { + "epoch": 4.565930956423316, + "grad_norm": 0.3475723012625417, + "learning_rate": 1.3035468408316501e-05, + "loss": 0.0731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03695917874574661, + "step": 4035, + "valid_targets_mean": 3298.6, + "valid_targets_min": 985 + }, + { + "epoch": 4.57159026598755, + "grad_norm": 0.41936708362600394, + "learning_rate": 1.2982614846549639e-05, + "loss": 0.0842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04045773297548294, + "step": 4040, + "valid_targets_mean": 3505.2, + "valid_targets_min": 878 + }, + { + "epoch": 4.577249575551782, + "grad_norm": 0.7145533132914758, + "learning_rate": 1.2929817113846428e-05, + "loss": 0.1049, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11567816138267517, + "step": 4045, + "valid_targets_mean": 2289.5, + "valid_targets_min": 1214 + }, + { + "epoch": 4.582908885116016, + "grad_norm": 0.5641645194220422, + "learning_rate": 1.287707563025621e-05, + "loss": 0.1585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0779387354850769, + "step": 4050, + "valid_targets_mean": 5734.0, + "valid_targets_min": 4389 + }, + { + "epoch": 4.588568194680249, + "grad_norm": 0.3839659782753559, + "learning_rate": 1.2824390815380805e-05, + "loss": 0.158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07936419546604156, + "step": 4055, + "valid_targets_mean": 7783.9, + "valid_targets_min": 4541 + }, + { + "epoch": 4.594227504244482, + "grad_norm": 0.43780586774804253, + "learning_rate": 1.2771763088371202e-05, + "loss": 0.1529, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09031634032726288, + "step": 4060, + "valid_targets_mean": 6178.1, + "valid_targets_min": 4245 + }, + { + "epoch": 4.599886813808715, + "grad_norm": 0.3811903286531149, + "learning_rate": 1.2719192867924197e-05, + "loss": 0.1535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08019092679023743, + "step": 4065, + "valid_targets_mean": 6757.1, + "valid_targets_min": 4108 + }, + { + "epoch": 4.605546123372949, + "grad_norm": 0.3921175297747304, + "learning_rate": 1.2666680572279083e-05, + "loss": 0.1609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08457393944263458, + "step": 4070, + "valid_targets_mean": 6910.1, + "valid_targets_min": 4689 + }, + { + "epoch": 4.611205432937181, + "grad_norm": 0.48690235782801106, + "learning_rate": 1.2614226619214317e-05, + "loss": 0.1595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08393862843513489, + "step": 4075, + "valid_targets_mean": 5941.6, + "valid_targets_min": 4398 + }, + { + "epoch": 4.616864742501415, + "grad_norm": 0.37472519424896533, + "learning_rate": 1.2561831426044173e-05, + "loss": 0.1579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06946370005607605, + "step": 4080, + "valid_targets_mean": 6833.0, + "valid_targets_min": 4826 + }, + { + "epoch": 4.622524052065648, + "grad_norm": 0.4310350072416117, + "learning_rate": 1.250949540961547e-05, + "loss": 0.1579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08658717572689056, + "step": 4085, + "valid_targets_mean": 6255.4, + "valid_targets_min": 4525 + }, + { + "epoch": 4.6281833616298815, + "grad_norm": 0.3837078175362443, + "learning_rate": 1.2457218986304196e-05, + "loss": 0.164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07161261141300201, + "step": 4090, + "valid_targets_mean": 7195.8, + "valid_targets_min": 4970 + }, + { + "epoch": 4.633842671194114, + "grad_norm": 0.379058903259548, + "learning_rate": 1.2405002572012252e-05, + "loss": 0.1508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06618999689817429, + "step": 4095, + "valid_targets_mean": 6242.6, + "valid_targets_min": 4552 + }, + { + "epoch": 4.6395019807583475, + "grad_norm": 0.3908252343440054, + "learning_rate": 1.2352846582164117e-05, + "loss": 0.1613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07632888108491898, + "step": 4100, + "valid_targets_mean": 6830.8, + "valid_targets_min": 3009 + }, + { + "epoch": 4.645161290322581, + "grad_norm": 0.41875605434500857, + "learning_rate": 1.2300751431703523e-05, + "loss": 0.1477, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08107197284698486, + "step": 4105, + "valid_targets_mean": 7165.4, + "valid_targets_min": 5304 + }, + { + "epoch": 4.6508205998868135, + "grad_norm": 0.38780398890332185, + "learning_rate": 1.2248717535090196e-05, + "loss": 0.1564, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06324753165245056, + "step": 4110, + "valid_targets_mean": 5539.0, + "valid_targets_min": 3815 + }, + { + "epoch": 4.656479909451047, + "grad_norm": 0.39156724007158206, + "learning_rate": 1.2196745306296522e-05, + "loss": 0.1397, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07122830301523209, + "step": 4115, + "valid_targets_mean": 6168.2, + "valid_targets_min": 5417 + }, + { + "epoch": 4.66213921901528, + "grad_norm": 0.3932438953465682, + "learning_rate": 1.2144835158804282e-05, + "loss": 0.1421, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08094830065965652, + "step": 4120, + "valid_targets_mean": 8675.8, + "valid_targets_min": 4939 + }, + { + "epoch": 4.667798528579513, + "grad_norm": 0.37836227546275575, + "learning_rate": 1.2092987505601346e-05, + "loss": 0.157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07368934154510498, + "step": 4125, + "valid_targets_mean": 6676.6, + "valid_targets_min": 5361 + }, + { + "epoch": 4.673457838143746, + "grad_norm": 0.3824607699024186, + "learning_rate": 1.2041202759178381e-05, + "loss": 0.1517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05529152974486351, + "step": 4130, + "valid_targets_mean": 6001.9, + "valid_targets_min": 4384 + }, + { + "epoch": 4.67911714770798, + "grad_norm": 0.41555495119465163, + "learning_rate": 1.198948133152559e-05, + "loss": 0.1384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06710748374462128, + "step": 4135, + "valid_targets_mean": 6403.6, + "valid_targets_min": 4823 + }, + { + "epoch": 4.684776457272212, + "grad_norm": 0.37979549651817585, + "learning_rate": 1.1937823634129416e-05, + "loss": 0.1358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06499824672937393, + "step": 4140, + "valid_targets_mean": 6612.9, + "valid_targets_min": 5243 + }, + { + "epoch": 4.690435766836446, + "grad_norm": 0.39709108751759126, + "learning_rate": 1.1886230077969278e-05, + "loss": 0.148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07931042462587357, + "step": 4145, + "valid_targets_mean": 6701.5, + "valid_targets_min": 4868 + }, + { + "epoch": 4.696095076400679, + "grad_norm": 0.38870622397209686, + "learning_rate": 1.1834701073514306e-05, + "loss": 0.1673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09310270100831985, + "step": 4150, + "valid_targets_mean": 6796.9, + "valid_targets_min": 4988 + }, + { + "epoch": 4.701754385964913, + "grad_norm": 0.43720168717472535, + "learning_rate": 1.1783237030720049e-05, + "loss": 0.1474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07925724238157272, + "step": 4155, + "valid_targets_mean": 5708.9, + "valid_targets_min": 4499 + }, + { + "epoch": 4.707413695529145, + "grad_norm": 0.44013489757304236, + "learning_rate": 1.1731838359025261e-05, + "loss": 0.1518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10216553509235382, + "step": 4160, + "valid_targets_mean": 6788.8, + "valid_targets_min": 5433 + }, + { + "epoch": 4.713073005093379, + "grad_norm": 0.45488884848440536, + "learning_rate": 1.1680505467348584e-05, + "loss": 0.159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0847107321023941, + "step": 4165, + "valid_targets_mean": 6085.2, + "valid_targets_min": 4619 + }, + { + "epoch": 4.718732314657612, + "grad_norm": 0.4098054717644627, + "learning_rate": 1.162923876408535e-05, + "loss": 0.1773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07096484303474426, + "step": 4170, + "valid_targets_mean": 5615.4, + "valid_targets_min": 4596 + }, + { + "epoch": 4.724391624221845, + "grad_norm": 0.43625761231220916, + "learning_rate": 1.1578038657104295e-05, + "loss": 0.1793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07293984293937683, + "step": 4175, + "valid_targets_mean": 6138.2, + "valid_targets_min": 4581 + }, + { + "epoch": 4.730050933786078, + "grad_norm": 0.40294247350992274, + "learning_rate": 1.1526905553744337e-05, + "loss": 0.1515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07699081301689148, + "step": 4180, + "valid_targets_mean": 6245.5, + "valid_targets_min": 5647 + }, + { + "epoch": 4.7357102433503115, + "grad_norm": 0.3897786464383688, + "learning_rate": 1.1475839860811301e-05, + "loss": 0.1549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06682749092578888, + "step": 4185, + "valid_targets_mean": 6804.4, + "valid_targets_min": 4398 + }, + { + "epoch": 4.741369552914544, + "grad_norm": 0.4457411071079685, + "learning_rate": 1.142484198457475e-05, + "loss": 0.1568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08734531700611115, + "step": 4190, + "valid_targets_mean": 6334.9, + "valid_targets_min": 4418 + }, + { + "epoch": 4.7470288624787775, + "grad_norm": 0.42983068360634674, + "learning_rate": 1.1373912330764671e-05, + "loss": 0.1618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07989311963319778, + "step": 4195, + "valid_targets_mean": 6267.6, + "valid_targets_min": 4969 + }, + { + "epoch": 4.752688172043011, + "grad_norm": 0.3754780927843705, + "learning_rate": 1.1323051304568292e-05, + "loss": 0.1482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06956741213798523, + "step": 4200, + "valid_targets_mean": 7183.8, + "valid_targets_min": 5042 + }, + { + "epoch": 4.7583474816072435, + "grad_norm": 0.4377212721323755, + "learning_rate": 1.1272259310626872e-05, + "loss": 0.1647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08422774076461792, + "step": 4205, + "valid_targets_mean": 7177.6, + "valid_targets_min": 4625 + }, + { + "epoch": 4.764006791171477, + "grad_norm": 0.44558446888656894, + "learning_rate": 1.122153675303244e-05, + "loss": 0.1597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07981698215007782, + "step": 4210, + "valid_targets_mean": 6203.6, + "valid_targets_min": 4820 + }, + { + "epoch": 4.76966610073571, + "grad_norm": 0.4167381948685433, + "learning_rate": 1.1170884035324607e-05, + "loss": 0.1472, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07194990664720535, + "step": 4215, + "valid_targets_mean": 5743.5, + "valid_targets_min": 4337 + }, + { + "epoch": 4.775325410299944, + "grad_norm": 0.4152881526429264, + "learning_rate": 1.1120301560487365e-05, + "loss": 0.1373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07468633353710175, + "step": 4220, + "valid_targets_mean": 6947.5, + "valid_targets_min": 3281 + }, + { + "epoch": 4.780984719864176, + "grad_norm": 0.45436545866640327, + "learning_rate": 1.1069789730945849e-05, + "loss": 0.1535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07758845388889313, + "step": 4225, + "valid_targets_mean": 6575.4, + "valid_targets_min": 5298 + }, + { + "epoch": 4.78664402942841, + "grad_norm": 0.47195793354702353, + "learning_rate": 1.1019348948563154e-05, + "loss": 0.1667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08708202838897705, + "step": 4230, + "valid_targets_mean": 6347.8, + "valid_targets_min": 4543 + }, + { + "epoch": 4.792303338992643, + "grad_norm": 0.43626009702072804, + "learning_rate": 1.096897961463714e-05, + "loss": 0.1605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07777862250804901, + "step": 4235, + "valid_targets_mean": 6430.0, + "valid_targets_min": 4713 + }, + { + "epoch": 4.797962648556876, + "grad_norm": 0.38932044404082405, + "learning_rate": 1.0918682129897244e-05, + "loss": 0.1453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06617245078086853, + "step": 4240, + "valid_targets_mean": 5641.6, + "valid_targets_min": 5269 + }, + { + "epoch": 4.803621958121109, + "grad_norm": 0.39367960502629834, + "learning_rate": 1.0868456894501276e-05, + "loss": 0.1468, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07634148746728897, + "step": 4245, + "valid_targets_mean": 6397.2, + "valid_targets_min": 3450 + }, + { + "epoch": 4.809281267685343, + "grad_norm": 0.42920395042668, + "learning_rate": 1.0818304308032232e-05, + "loss": 0.1541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08132395893335342, + "step": 4250, + "valid_targets_mean": 7338.9, + "valid_targets_min": 4959 + }, + { + "epoch": 4.814940577249575, + "grad_norm": 0.4213994531330286, + "learning_rate": 1.0768224769495155e-05, + "loss": 0.1597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08521014451980591, + "step": 4255, + "valid_targets_mean": 6312.1, + "valid_targets_min": 5322 + }, + { + "epoch": 4.820599886813809, + "grad_norm": 0.4243478471848131, + "learning_rate": 1.0718218677313904e-05, + "loss": 0.1471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0787557065486908, + "step": 4260, + "valid_targets_mean": 6234.5, + "valid_targets_min": 4716 + }, + { + "epoch": 4.826259196378042, + "grad_norm": 0.47056588318371356, + "learning_rate": 1.0668286429328015e-05, + "loss": 0.152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08404265344142914, + "step": 4265, + "valid_targets_mean": 5793.6, + "valid_targets_min": 4309 + }, + { + "epoch": 4.831918505942275, + "grad_norm": 0.4252455589626042, + "learning_rate": 1.0618428422789555e-05, + "loss": 0.1593, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08075364679098129, + "step": 4270, + "valid_targets_mean": 6092.1, + "valid_targets_min": 4923 + }, + { + "epoch": 4.837577815506508, + "grad_norm": 0.5743013606764064, + "learning_rate": 1.0568645054359919e-05, + "loss": 0.1324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07704248279333115, + "step": 4275, + "valid_targets_mean": 6810.4, + "valid_targets_min": 4112 + }, + { + "epoch": 4.8432371250707416, + "grad_norm": 0.39592548119886867, + "learning_rate": 1.051893672010669e-05, + "loss": 0.1295, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07613039761781693, + "step": 4280, + "valid_targets_mean": 5986.9, + "valid_targets_min": 3761 + }, + { + "epoch": 4.848896434634975, + "grad_norm": 0.3889401052006256, + "learning_rate": 1.0469303815500518e-05, + "loss": 0.1404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06770626455545425, + "step": 4285, + "valid_targets_mean": 6484.5, + "valid_targets_min": 5426 + }, + { + "epoch": 4.8545557441992075, + "grad_norm": 0.4252413307253176, + "learning_rate": 1.0419746735411922e-05, + "loss": 0.1461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07313274592161179, + "step": 4290, + "valid_targets_mean": 5448.1, + "valid_targets_min": 3853 + }, + { + "epoch": 4.860215053763441, + "grad_norm": 0.461735193671975, + "learning_rate": 1.037026587410819e-05, + "loss": 0.1666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09036986529827118, + "step": 4295, + "valid_targets_mean": 6877.9, + "valid_targets_min": 5073 + }, + { + "epoch": 4.8658743633276735, + "grad_norm": 0.44096608205122684, + "learning_rate": 1.032086162525021e-05, + "loss": 0.1842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08363276720046997, + "step": 4300, + "valid_targets_mean": 6082.2, + "valid_targets_min": 4487 + }, + { + "epoch": 4.871533672891907, + "grad_norm": 0.40175967153448644, + "learning_rate": 1.0271534381889385e-05, + "loss": 0.1467, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06342478841543198, + "step": 4305, + "valid_targets_mean": 5412.6, + "valid_targets_min": 4773 + }, + { + "epoch": 4.87719298245614, + "grad_norm": 0.4267089964629085, + "learning_rate": 1.0222284536464451e-05, + "loss": 0.1533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0881476104259491, + "step": 4310, + "valid_targets_mean": 6079.9, + "valid_targets_min": 4547 + }, + { + "epoch": 4.882852292020374, + "grad_norm": 0.40810288675710565, + "learning_rate": 1.0173112480798376e-05, + "loss": 0.1496, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07969158887863159, + "step": 4315, + "valid_targets_mean": 6327.1, + "valid_targets_min": 4826 + }, + { + "epoch": 4.888511601584606, + "grad_norm": 0.402645330063668, + "learning_rate": 1.0124018606095278e-05, + "loss": 0.1723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08202679455280304, + "step": 4320, + "valid_targets_mean": 7172.9, + "valid_targets_min": 4949 + }, + { + "epoch": 4.89417091114884, + "grad_norm": 0.4248337483053415, + "learning_rate": 1.0075003302937247e-05, + "loss": 0.1571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07410774379968643, + "step": 4325, + "valid_targets_mean": 7059.6, + "valid_targets_min": 4489 + }, + { + "epoch": 4.899830220713073, + "grad_norm": 0.4370083120382635, + "learning_rate": 1.0026066961281282e-05, + "loss": 0.1558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07156321406364441, + "step": 4330, + "valid_targets_mean": 6002.6, + "valid_targets_min": 4865 + }, + { + "epoch": 4.905489530277306, + "grad_norm": 0.3953350659908636, + "learning_rate": 9.977209970456194e-06, + "loss": 0.1581, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06931529939174652, + "step": 4335, + "valid_targets_mean": 5837.2, + "valid_targets_min": 4452 + }, + { + "epoch": 4.911148839841539, + "grad_norm": 0.40157243988288766, + "learning_rate": 9.928432719159475e-06, + "loss": 0.1381, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06669177114963531, + "step": 4340, + "valid_targets_mean": 5746.0, + "valid_targets_min": 4915 + }, + { + "epoch": 4.916808149405773, + "grad_norm": 0.38731243010187155, + "learning_rate": 9.879735595454232e-06, + "loss": 0.1523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.060908619314432144, + "step": 4345, + "valid_targets_mean": 5819.8, + "valid_targets_min": 3727 + }, + { + "epoch": 4.922467458970006, + "grad_norm": 0.3684276727705525, + "learning_rate": 9.831118986766084e-06, + "loss": 0.1936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06493750959634781, + "step": 4350, + "valid_targets_mean": 5818.0, + "valid_targets_min": 4482 + }, + { + "epoch": 4.928126768534239, + "grad_norm": 0.3873670628522635, + "learning_rate": 9.782583279880096e-06, + "loss": 0.1445, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07012508809566498, + "step": 4355, + "valid_targets_mean": 6368.6, + "valid_targets_min": 4598 + }, + { + "epoch": 4.933786078098472, + "grad_norm": 0.451605814289351, + "learning_rate": 9.734128860937675e-06, + "loss": 0.1571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07075818628072739, + "step": 4360, + "valid_targets_mean": 5623.0, + "valid_targets_min": 4395 + }, + { + "epoch": 4.939445387662705, + "grad_norm": 0.4432899759994766, + "learning_rate": 9.68575611543355e-06, + "loss": 0.1578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07715544104576111, + "step": 4365, + "valid_targets_mean": 5788.0, + "valid_targets_min": 4135 + }, + { + "epoch": 4.945104697226938, + "grad_norm": 0.4105329056121915, + "learning_rate": 9.637465428212636e-06, + "loss": 0.1443, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06590640544891357, + "step": 4370, + "valid_targets_mean": 6081.9, + "valid_targets_min": 4262 + }, + { + "epoch": 4.950764006791172, + "grad_norm": 0.4538542046801599, + "learning_rate": 9.589257183467025e-06, + "loss": 0.1409, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08474253118038177, + "step": 4375, + "valid_targets_mean": 7015.5, + "valid_targets_min": 5149 + }, + { + "epoch": 4.956423316355405, + "grad_norm": 0.42676147623064875, + "learning_rate": 9.541131764732896e-06, + "loss": 0.1453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06608035415410995, + "step": 4380, + "valid_targets_mean": 5913.9, + "valid_targets_min": 3437 + }, + { + "epoch": 4.962082625919638, + "grad_norm": 0.4221962774727456, + "learning_rate": 9.493089554887508e-06, + "loss": 0.1538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07253015041351318, + "step": 4385, + "valid_targets_mean": 6859.5, + "valid_targets_min": 3535 + }, + { + "epoch": 4.967741935483871, + "grad_norm": 0.42935647093307616, + "learning_rate": 9.445130936146098e-06, + "loss": 0.1447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08380566537380219, + "step": 4390, + "valid_targets_mean": 6618.5, + "valid_targets_min": 4712 + }, + { + "epoch": 4.973401245048104, + "grad_norm": 0.3644620796089173, + "learning_rate": 9.397256290058869e-06, + "loss": 0.1471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07990467548370361, + "step": 4395, + "valid_targets_mean": 7741.1, + "valid_targets_min": 4987 + }, + { + "epoch": 4.979060554612337, + "grad_norm": 0.43536716165333933, + "learning_rate": 9.349465997507974e-06, + "loss": 0.1572, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08105675131082535, + "step": 4400, + "valid_targets_mean": 6720.6, + "valid_targets_min": 3577 + }, + { + "epoch": 4.98471986417657, + "grad_norm": 0.4495760127220141, + "learning_rate": 9.301760438704442e-06, + "loss": 0.1335, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07325148582458496, + "step": 4405, + "valid_targets_mean": 5528.4, + "valid_targets_min": 3665 + }, + { + "epoch": 4.990379173740804, + "grad_norm": 0.5325255775898624, + "learning_rate": 9.254139993185176e-06, + "loss": 0.1606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07834718376398087, + "step": 4410, + "valid_targets_mean": 7081.1, + "valid_targets_min": 5409 + }, + { + "epoch": 4.996038483305036, + "grad_norm": 0.42523707223061147, + "learning_rate": 9.206605039809955e-06, + "loss": 0.16, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07933046668767929, + "step": 4415, + "valid_targets_mean": 5952.5, + "valid_targets_min": 3545 + }, + { + "epoch": 5.001131861912847, + "grad_norm": 0.638363339498774, + "learning_rate": 9.159155956758375e-06, + "loss": 0.1498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06102922558784485, + "step": 4420, + "valid_targets_mean": 1525.2, + "valid_targets_min": 534 + }, + { + "epoch": 5.006791171477079, + "grad_norm": 0.3876386384969105, + "learning_rate": 9.111793121526862e-06, + "loss": 0.1006, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.033130843192338943, + "step": 4425, + "valid_targets_mean": 2735.4, + "valid_targets_min": 834 + }, + { + "epoch": 5.012450481041313, + "grad_norm": 0.3723456784387418, + "learning_rate": 9.064516910925698e-06, + "loss": 0.0817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.038259223103523254, + "step": 4430, + "valid_targets_mean": 3253.9, + "valid_targets_min": 2790 + }, + { + "epoch": 5.018109790605546, + "grad_norm": 0.41959277022738695, + "learning_rate": 9.017327701075965e-06, + "loss": 0.0755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03765428066253662, + "step": 4435, + "valid_targets_mean": 3383.1, + "valid_targets_min": 2133 + }, + { + "epoch": 5.023769100169779, + "grad_norm": 0.4426402802559362, + "learning_rate": 8.970225867406593e-06, + "loss": 0.0951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.042580496519804, + "step": 4440, + "valid_targets_mean": 2954.8, + "valid_targets_min": 2338 + }, + { + "epoch": 5.029428409734012, + "grad_norm": 0.4350704423736611, + "learning_rate": 8.923211784651356e-06, + "loss": 0.0739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03806948661804199, + "step": 4445, + "valid_targets_mean": 2680.4, + "valid_targets_min": 662 + }, + { + "epoch": 5.035087719298246, + "grad_norm": 1.0300556636605873, + "learning_rate": 8.876285826845918e-06, + "loss": 0.1014, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07481037080287933, + "step": 4450, + "valid_targets_mean": 1101.0, + "valid_targets_min": 559 + }, + { + "epoch": 5.040747028862479, + "grad_norm": 0.404777286331085, + "learning_rate": 8.82944836732482e-06, + "loss": 0.1122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03385402262210846, + "step": 4455, + "valid_targets_mean": 3490.6, + "valid_targets_min": 1091 + }, + { + "epoch": 5.046406338426712, + "grad_norm": 0.40573696064641523, + "learning_rate": 8.782699778718516e-06, + "loss": 0.0852, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03949911519885063, + "step": 4460, + "valid_targets_mean": 3061.8, + "valid_targets_min": 947 + }, + { + "epoch": 5.052065647990945, + "grad_norm": 0.5489247139883345, + "learning_rate": 8.736040432950447e-06, + "loss": 0.1107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09486508369445801, + "step": 4465, + "valid_targets_mean": 2125.4, + "valid_targets_min": 859 + }, + { + "epoch": 5.057724957555179, + "grad_norm": 0.30796653853544864, + "learning_rate": 8.689470701234037e-06, + "loss": 0.0768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04153946787118912, + "step": 4470, + "valid_targets_mean": 5837.8, + "valid_targets_min": 636 + }, + { + "epoch": 5.063384267119411, + "grad_norm": 0.40499445861651706, + "learning_rate": 8.642990954069747e-06, + "loss": 0.0766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.042799048125743866, + "step": 4475, + "valid_targets_mean": 2185.0, + "valid_targets_min": 826 + }, + { + "epoch": 5.069043576683645, + "grad_norm": 0.31350898629226726, + "learning_rate": 8.596601561242167e-06, + "loss": 0.0755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03391445428133011, + "step": 4480, + "valid_targets_mean": 3446.5, + "valid_targets_min": 1096 + }, + { + "epoch": 5.074702886247878, + "grad_norm": 0.4854951603980173, + "learning_rate": 8.550302891817015e-06, + "loss": 0.0938, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03978767246007919, + "step": 4485, + "valid_targets_mean": 2343.1, + "valid_targets_min": 688 + }, + { + "epoch": 5.080362195812111, + "grad_norm": 0.3865411280267809, + "learning_rate": 8.50409531413824e-06, + "loss": 0.096, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02831077389419079, + "step": 4490, + "valid_targets_mean": 2938.5, + "valid_targets_min": 553 + }, + { + "epoch": 5.086021505376344, + "grad_norm": 0.4646528726080392, + "learning_rate": 8.457979195825076e-06, + "loss": 0.0775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.041827790439128876, + "step": 4495, + "valid_targets_mean": 2988.9, + "valid_targets_min": 566 + }, + { + "epoch": 5.0916808149405774, + "grad_norm": 0.3385439451476495, + "learning_rate": 8.411954903769145e-06, + "loss": 0.0712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.029555505141615868, + "step": 4500, + "valid_targets_mean": 3230.2, + "valid_targets_min": 1111 + }, + { + "epoch": 5.09734012450481, + "grad_norm": 0.45477204828158563, + "learning_rate": 8.366022804131487e-06, + "loss": 0.0764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05276007205247879, + "step": 4505, + "valid_targets_mean": 1232.1, + "valid_targets_min": 622 + }, + { + "epoch": 5.102999434069043, + "grad_norm": 0.6980159160392169, + "learning_rate": 8.320183262339686e-06, + "loss": 0.0875, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04193844646215439, + "step": 4510, + "valid_targets_mean": 2132.5, + "valid_targets_min": 696 + }, + { + "epoch": 5.108658743633277, + "grad_norm": 0.47831633893518705, + "learning_rate": 8.274436643084962e-06, + "loss": 0.086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.044397369027137756, + "step": 4515, + "valid_targets_mean": 3225.2, + "valid_targets_min": 1922 + }, + { + "epoch": 5.11431805319751, + "grad_norm": 0.4687043854565991, + "learning_rate": 8.228783310319248e-06, + "loss": 0.0865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.032066695392131805, + "step": 4520, + "valid_targets_mean": 2008.5, + "valid_targets_min": 802 + }, + { + "epoch": 5.119977362761743, + "grad_norm": 0.5540002518793067, + "learning_rate": 8.183223627252305e-06, + "loss": 0.0951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0679977536201477, + "step": 4525, + "valid_targets_mean": 1754.1, + "valid_targets_min": 957 + }, + { + "epoch": 5.125636672325976, + "grad_norm": 0.4103603284401227, + "learning_rate": 8.137757956348854e-06, + "loss": 0.113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.042907726019620895, + "step": 4530, + "valid_targets_mean": 2498.5, + "valid_targets_min": 641 + }, + { + "epoch": 5.13129598189021, + "grad_norm": 0.4269642088304196, + "learning_rate": 8.092386659325644e-06, + "loss": 0.0732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04733027517795563, + "step": 4535, + "valid_targets_mean": 2364.6, + "valid_targets_min": 924 + }, + { + "epoch": 5.136955291454442, + "grad_norm": 0.5325298248650074, + "learning_rate": 8.047110097148618e-06, + "loss": 0.1092, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07877378910779953, + "step": 4540, + "valid_targets_mean": 2385.5, + "valid_targets_min": 700 + }, + { + "epoch": 5.142614601018676, + "grad_norm": 1.2876473790910719, + "learning_rate": 8.001928630030017e-06, + "loss": 0.0912, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07444595545530319, + "step": 4545, + "valid_targets_mean": 3363.2, + "valid_targets_min": 2731 + }, + { + "epoch": 5.148273910582909, + "grad_norm": 0.5545015729804329, + "learning_rate": 7.95684261742554e-06, + "loss": 0.082, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06806674599647522, + "step": 4550, + "valid_targets_mean": 2615.0, + "valid_targets_min": 893 + }, + { + "epoch": 5.153933220147142, + "grad_norm": 0.794714466040992, + "learning_rate": 7.911852418031449e-06, + "loss": 0.2466, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27131420373916626, + "step": 4555, + "valid_targets_mean": 2615.9, + "valid_targets_min": 922 + }, + { + "epoch": 5.159592529711375, + "grad_norm": 0.4869722842978916, + "learning_rate": 7.866958389781736e-06, + "loss": 0.1153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03053898923099041, + "step": 4560, + "valid_targets_mean": 1357.4, + "valid_targets_min": 507 + }, + { + "epoch": 5.165251839275609, + "grad_norm": 0.4158433073059967, + "learning_rate": 7.822160889845286e-06, + "loss": 0.0945, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.046558868139982224, + "step": 4565, + "valid_targets_mean": 3236.9, + "valid_targets_min": 615 + }, + { + "epoch": 5.170911148839841, + "grad_norm": 0.5590265507295579, + "learning_rate": 7.777460274623005e-06, + "loss": 0.1741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06460557132959366, + "step": 4570, + "valid_targets_mean": 3023.1, + "valid_targets_min": 826 + }, + { + "epoch": 5.176570458404075, + "grad_norm": 0.4438843848729775, + "learning_rate": 7.732856899745003e-06, + "loss": 0.0815, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04189185053110123, + "step": 4575, + "valid_targets_mean": 2924.4, + "valid_targets_min": 851 + }, + { + "epoch": 5.182229767968308, + "grad_norm": 0.4849378260876977, + "learning_rate": 7.688351120067781e-06, + "loss": 0.0806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05326235294342041, + "step": 4580, + "valid_targets_mean": 4528.4, + "valid_targets_min": 2665 + }, + { + "epoch": 5.187889077532541, + "grad_norm": 0.3741307024817952, + "learning_rate": 7.643943289671374e-06, + "loss": 0.0844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.028149928897619247, + "step": 4585, + "valid_targets_mean": 2820.6, + "valid_targets_min": 595 + }, + { + "epoch": 5.193548387096774, + "grad_norm": 0.4452966410432175, + "learning_rate": 7.599633761856542e-06, + "loss": 0.076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04775497317314148, + "step": 4590, + "valid_targets_mean": 2583.1, + "valid_targets_min": 825 + }, + { + "epoch": 5.1992076966610075, + "grad_norm": 0.4440798702857089, + "learning_rate": 7.555422889141997e-06, + "loss": 0.0763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03668874502182007, + "step": 4595, + "valid_targets_mean": 3771.5, + "valid_targets_min": 672 + }, + { + "epoch": 5.204867006225241, + "grad_norm": 0.3763364307402238, + "learning_rate": 7.51131102326154e-06, + "loss": 0.0702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02842479571700096, + "step": 4600, + "valid_targets_mean": 3165.0, + "valid_targets_min": 1956 + }, + { + "epoch": 5.2105263157894735, + "grad_norm": 0.4070173177883625, + "learning_rate": 7.467298515161305e-06, + "loss": 0.0782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03486517071723938, + "step": 4605, + "valid_targets_mean": 3719.8, + "valid_targets_min": 2540 + }, + { + "epoch": 5.216185625353707, + "grad_norm": 0.45850435098186565, + "learning_rate": 7.423385714996942e-06, + "loss": 0.0688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0475509911775589, + "step": 4610, + "valid_targets_mean": 3447.5, + "valid_targets_min": 1240 + }, + { + "epoch": 5.22184493491794, + "grad_norm": 0.31850681395062364, + "learning_rate": 7.379572972130864e-06, + "loss": 0.1083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.021806105971336365, + "step": 4615, + "valid_targets_mean": 2975.2, + "valid_targets_min": 623 + }, + { + "epoch": 5.227504244482173, + "grad_norm": 0.3946655684730546, + "learning_rate": 7.335860635129426e-06, + "loss": 0.0639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03245675191283226, + "step": 4620, + "valid_targets_mean": 2960.1, + "valid_targets_min": 583 + }, + { + "epoch": 5.233163554046406, + "grad_norm": 0.39632121406728904, + "learning_rate": 7.292249051760169e-06, + "loss": 0.0681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.032696548849344254, + "step": 4625, + "valid_targets_mean": 3723.5, + "valid_targets_min": 3230 + }, + { + "epoch": 5.23882286361064, + "grad_norm": 0.4160150461946878, + "learning_rate": 7.2487385689890845e-06, + "loss": 0.0672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.027024395763874054, + "step": 4630, + "valid_targets_mean": 2520.9, + "valid_targets_min": 647 + }, + { + "epoch": 5.244482173174872, + "grad_norm": 0.7975024899296032, + "learning_rate": 7.205329532977794e-06, + "loss": 0.1126, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08603611588478088, + "step": 4635, + "valid_targets_mean": 1094.5, + "valid_targets_min": 195 + }, + { + "epoch": 5.250141482739106, + "grad_norm": 0.3999942886603403, + "learning_rate": 7.162022289080837e-06, + "loss": 0.0758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05493873730301857, + "step": 4640, + "valid_targets_mean": 3635.9, + "valid_targets_min": 3050 + }, + { + "epoch": 5.255800792303339, + "grad_norm": 0.5736349568031178, + "learning_rate": 7.1188171818429256e-06, + "loss": 0.1059, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06432878971099854, + "step": 4645, + "valid_targets_mean": 2382.9, + "valid_targets_min": 1087 + }, + { + "epoch": 5.261460101867572, + "grad_norm": 0.4376303927241186, + "learning_rate": 7.075714554996176e-06, + "loss": 0.0756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0334898978471756, + "step": 4650, + "valid_targets_mean": 3025.5, + "valid_targets_min": 847 + }, + { + "epoch": 5.267119411431805, + "grad_norm": 0.5964252725575304, + "learning_rate": 7.032714751457395e-06, + "loss": 0.0997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05674119293689728, + "step": 4655, + "valid_targets_mean": 2263.1, + "valid_targets_min": 916 + }, + { + "epoch": 5.272778720996039, + "grad_norm": 0.4910485937846962, + "learning_rate": 6.989818113325333e-06, + "loss": 0.1043, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03834018111228943, + "step": 4660, + "valid_targets_mean": 2567.0, + "valid_targets_min": 1062 + }, + { + "epoch": 5.278438030560272, + "grad_norm": 0.6491709824468674, + "learning_rate": 6.947024981878001e-06, + "loss": 0.1036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0645102709531784, + "step": 4665, + "valid_targets_mean": 1443.4, + "valid_targets_min": 771 + }, + { + "epoch": 5.284097340124505, + "grad_norm": 0.43550996778521045, + "learning_rate": 6.904335697569909e-06, + "loss": 0.0755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03509264439344406, + "step": 4670, + "valid_targets_mean": 2753.2, + "valid_targets_min": 707 + }, + { + "epoch": 5.289756649688738, + "grad_norm": 0.39357284756413263, + "learning_rate": 6.861750600029373e-06, + "loss": 0.0835, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03775586560368538, + "step": 4675, + "valid_targets_mean": 3665.8, + "valid_targets_min": 2850 + }, + { + "epoch": 5.2954159592529715, + "grad_norm": 0.45705922008286887, + "learning_rate": 6.819270028055844e-06, + "loss": 0.0804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.029992057010531425, + "step": 4680, + "valid_targets_mean": 2238.2, + "valid_targets_min": 749 + }, + { + "epoch": 5.301075268817204, + "grad_norm": 0.3912511294871573, + "learning_rate": 6.776894319617162e-06, + "loss": 0.0685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03843936324119568, + "step": 4685, + "valid_targets_mean": 4488.2, + "valid_targets_min": 1052 + }, + { + "epoch": 5.3067345783814375, + "grad_norm": 0.3348061698976915, + "learning_rate": 6.73462381184689e-06, + "loss": 0.0649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03513568267226219, + "step": 4690, + "valid_targets_mean": 4258.2, + "valid_targets_min": 2540 + }, + { + "epoch": 5.312393887945671, + "grad_norm": 0.36182117961891874, + "learning_rate": 6.6924588410416604e-06, + "loss": 0.0772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02987879142165184, + "step": 4695, + "valid_targets_mean": 3782.5, + "valid_targets_min": 1859 + }, + { + "epoch": 5.3180531975099035, + "grad_norm": 0.36416536322485177, + "learning_rate": 6.650399742658442e-06, + "loss": 0.0897, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.027936633676290512, + "step": 4700, + "valid_targets_mean": 2879.5, + "valid_targets_min": 893 + }, + { + "epoch": 5.323712507074137, + "grad_norm": 0.4663980199591092, + "learning_rate": 6.608446851311918e-06, + "loss": 0.0864, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.041042231023311615, + "step": 4705, + "valid_targets_mean": 2072.6, + "valid_targets_min": 753 + }, + { + "epoch": 5.32937181663837, + "grad_norm": 0.31707604000611345, + "learning_rate": 6.566600500771796e-06, + "loss": 0.08, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.023070527240633965, + "step": 4710, + "valid_targets_mean": 4205.6, + "valid_targets_min": 3934 + }, + { + "epoch": 5.335031126202603, + "grad_norm": 0.4897937756552784, + "learning_rate": 6.524861023960185e-06, + "loss": 0.0701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04043271765112877, + "step": 4715, + "valid_targets_mean": 3174.9, + "valid_targets_min": 1576 + }, + { + "epoch": 5.340690435766836, + "grad_norm": 0.48325227233709045, + "learning_rate": 6.483228752948902e-06, + "loss": 0.0816, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04258692264556885, + "step": 4720, + "valid_targets_mean": 3241.8, + "valid_targets_min": 1006 + }, + { + "epoch": 5.34634974533107, + "grad_norm": 0.32691007456243054, + "learning_rate": 6.441704018956858e-06, + "loss": 0.0676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.023861169815063477, + "step": 4725, + "valid_targets_mean": 3445.5, + "valid_targets_min": 980 + }, + { + "epoch": 5.352009054895303, + "grad_norm": 0.5818091088795315, + "learning_rate": 6.400287152347442e-06, + "loss": 0.0773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04909449815750122, + "step": 4730, + "valid_targets_mean": 2083.9, + "valid_targets_min": 745 + }, + { + "epoch": 5.357668364459536, + "grad_norm": 0.554619603353416, + "learning_rate": 6.35897848262584e-06, + "loss": 0.0745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.047421038150787354, + "step": 4735, + "valid_targets_mean": 2768.8, + "valid_targets_min": 1020 + }, + { + "epoch": 5.363327674023769, + "grad_norm": 0.48409933133299604, + "learning_rate": 6.317778338436449e-06, + "loss": 0.0754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.049306828528642654, + "step": 4740, + "valid_targets_mean": 2314.8, + "valid_targets_min": 967 + }, + { + "epoch": 5.368986983588003, + "grad_norm": 0.45435782254110785, + "learning_rate": 6.276687047560275e-06, + "loss": 0.0923, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03364328294992447, + "step": 4745, + "valid_targets_mean": 2462.0, + "valid_targets_min": 1015 + }, + { + "epoch": 5.374646293152235, + "grad_norm": 0.6119629510740321, + "learning_rate": 6.235704936912288e-06, + "loss": 0.0736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.042350221425294876, + "step": 4750, + "valid_targets_mean": 1214.9, + "valid_targets_min": 705 + }, + { + "epoch": 5.380305602716469, + "grad_norm": 0.4332130084367947, + "learning_rate": 6.194832332538838e-06, + "loss": 0.084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02597474679350853, + "step": 4755, + "valid_targets_mean": 2364.8, + "valid_targets_min": 622 + }, + { + "epoch": 5.385964912280702, + "grad_norm": 0.4754499958053214, + "learning_rate": 6.154069559615081e-06, + "loss": 0.0999, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.031848207116127014, + "step": 4760, + "valid_targets_mean": 3702.2, + "valid_targets_min": 1322 + }, + { + "epoch": 5.391624221844935, + "grad_norm": 0.42747058602486093, + "learning_rate": 6.113416942442358e-06, + "loss": 0.0752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03660539165139198, + "step": 4765, + "valid_targets_mean": 2863.1, + "valid_targets_min": 1011 + }, + { + "epoch": 5.397283531409168, + "grad_norm": 0.5094687605091125, + "learning_rate": 6.072874804445632e-06, + "loss": 0.067, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0420791395008564, + "step": 4770, + "valid_targets_mean": 3106.1, + "valid_targets_min": 874 + }, + { + "epoch": 5.4029428409734015, + "grad_norm": 0.5086084487703625, + "learning_rate": 6.032443468170912e-06, + "loss": 0.0818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05718608200550079, + "step": 4775, + "valid_targets_mean": 3161.4, + "valid_targets_min": 1420 + }, + { + "epoch": 5.408602150537634, + "grad_norm": 0.42094012131131764, + "learning_rate": 5.992123255282702e-06, + "loss": 0.0905, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.031266339123249054, + "step": 4780, + "valid_targets_mean": 2796.0, + "valid_targets_min": 1109 + }, + { + "epoch": 5.4142614601018675, + "grad_norm": 0.40853245195080023, + "learning_rate": 5.951914486561417e-06, + "loss": 0.0593, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03016936220228672, + "step": 4785, + "valid_targets_mean": 1822.9, + "valid_targets_min": 684 + }, + { + "epoch": 5.419920769666101, + "grad_norm": 0.48773046380654383, + "learning_rate": 5.911817481900832e-06, + "loss": 0.0799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03719595819711685, + "step": 4790, + "valid_targets_mean": 2389.8, + "valid_targets_min": 671 + }, + { + "epoch": 5.425580079230334, + "grad_norm": 0.44580837384944577, + "learning_rate": 5.871832560305573e-06, + "loss": 0.0697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0338919572532177, + "step": 4795, + "valid_targets_mean": 4513.8, + "valid_targets_min": 1057 + }, + { + "epoch": 5.431239388794567, + "grad_norm": 0.3882620773195289, + "learning_rate": 5.831960039888533e-06, + "loss": 0.0614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0298624224960804, + "step": 4800, + "valid_targets_mean": 3533.1, + "valid_targets_min": 1243 + }, + { + "epoch": 5.4368986983588, + "grad_norm": 0.3829480745673342, + "learning_rate": 5.792200237868361e-06, + "loss": 0.0828, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03660116344690323, + "step": 4805, + "valid_targets_mean": 3911.2, + "valid_targets_min": 3322 + }, + { + "epoch": 5.442558007923034, + "grad_norm": 0.5527432817006118, + "learning_rate": 5.752553470566957e-06, + "loss": 0.0697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04388725385069847, + "step": 4810, + "valid_targets_mean": 1820.0, + "valid_targets_min": 680 + }, + { + "epoch": 5.448217317487266, + "grad_norm": 0.48970237691333407, + "learning_rate": 5.713020053406917e-06, + "loss": 0.0982, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04085264354944229, + "step": 4815, + "valid_targets_mean": 3371.9, + "valid_targets_min": 640 + }, + { + "epoch": 5.4538766270515, + "grad_norm": 0.2904416815995412, + "learning_rate": 5.673600300909053e-06, + "loss": 0.0837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.01999470591545105, + "step": 4820, + "valid_targets_mean": 3327.0, + "valid_targets_min": 929 + }, + { + "epoch": 5.459535936615733, + "grad_norm": 0.56589993886369, + "learning_rate": 5.634294526689872e-06, + "loss": 0.0885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.054224856197834015, + "step": 4825, + "valid_targets_mean": 3318.0, + "valid_targets_min": 2380 + }, + { + "epoch": 5.465195246179966, + "grad_norm": 0.410529406768517, + "learning_rate": 5.595103043459109e-06, + "loss": 0.0677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.034830328077077866, + "step": 4830, + "valid_targets_mean": 3709.4, + "valid_targets_min": 2619 + }, + { + "epoch": 5.470854555744199, + "grad_norm": 0.6623030049021926, + "learning_rate": 5.556026163017205e-06, + "loss": 0.0772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04581884294748306, + "step": 4835, + "valid_targets_mean": 1574.6, + "valid_targets_min": 693 + }, + { + "epoch": 5.476513865308433, + "grad_norm": 0.40670033409080186, + "learning_rate": 5.517064196252837e-06, + "loss": 0.0732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.039534423500299454, + "step": 4840, + "valid_targets_mean": 3885.1, + "valid_targets_min": 3129 + }, + { + "epoch": 5.482173174872665, + "grad_norm": 0.7011159710065692, + "learning_rate": 5.478217453140471e-06, + "loss": 0.0985, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04617399722337723, + "step": 4845, + "valid_targets_mean": 1611.0, + "valid_targets_min": 543 + }, + { + "epoch": 5.487832484436899, + "grad_norm": 0.48240282066039986, + "learning_rate": 5.439486242737855e-06, + "loss": 0.1005, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04659516364336014, + "step": 4850, + "valid_targets_mean": 2319.6, + "valid_targets_min": 503 + }, + { + "epoch": 5.493491794001132, + "grad_norm": 0.5373785371414234, + "learning_rate": 5.400870873183583e-06, + "loss": 0.0794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05152829736471176, + "step": 4855, + "valid_targets_mean": 1987.9, + "valid_targets_min": 625 + }, + { + "epoch": 5.499151103565365, + "grad_norm": 0.43696541828486446, + "learning_rate": 5.362371651694647e-06, + "loss": 0.0752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03338533267378807, + "step": 4860, + "valid_targets_mean": 3822.8, + "valid_targets_min": 2822 + }, + { + "epoch": 5.504810413129598, + "grad_norm": 0.4549879906571369, + "learning_rate": 5.323988884563975e-06, + "loss": 0.0736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04524676501750946, + "step": 4865, + "valid_targets_mean": 3724.5, + "valid_targets_min": 2987 + }, + { + "epoch": 5.5104697226938315, + "grad_norm": 0.36902128800974515, + "learning_rate": 5.2857228771580105e-06, + "loss": 0.0652, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02631876803934574, + "step": 4870, + "valid_targets_mean": 3022.0, + "valid_targets_min": 856 + }, + { + "epoch": 5.516129032258064, + "grad_norm": 0.9847800453377397, + "learning_rate": 5.247573933914285e-06, + "loss": 0.0759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.042317889630794525, + "step": 4875, + "valid_targets_mean": 764.2, + "valid_targets_min": 563 + }, + { + "epoch": 5.5217883418222975, + "grad_norm": 0.41482562561823066, + "learning_rate": 5.20954235833898e-06, + "loss": 0.0752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03073599748313427, + "step": 4880, + "valid_targets_mean": 3117.1, + "valid_targets_min": 751 + }, + { + "epoch": 5.527447651386531, + "grad_norm": 0.5884650696735811, + "learning_rate": 5.171628453004512e-06, + "loss": 0.0694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04391670972108841, + "step": 4885, + "valid_targets_mean": 3566.1, + "valid_targets_min": 980 + }, + { + "epoch": 5.533106960950764, + "grad_norm": 0.46798870586464636, + "learning_rate": 5.133832519547155e-06, + "loss": 0.0676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03547874093055725, + "step": 4890, + "valid_targets_mean": 3589.8, + "valid_targets_min": 2538 + }, + { + "epoch": 5.538766270514997, + "grad_norm": 0.3934580415297562, + "learning_rate": 5.096154858664608e-06, + "loss": 0.0763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02586423233151436, + "step": 4895, + "valid_targets_mean": 4182.8, + "valid_targets_min": 485 + }, + { + "epoch": 5.54442558007923, + "grad_norm": 0.32631313227984365, + "learning_rate": 5.058595770113606e-06, + "loss": 0.0659, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03240755945444107, + "step": 4900, + "valid_targets_mean": 4832.8, + "valid_targets_min": 2551 + }, + { + "epoch": 5.550084889643464, + "grad_norm": 0.38184313196943154, + "learning_rate": 5.0211555527075664e-06, + "loss": 0.0702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.036381155252456665, + "step": 4905, + "valid_targets_mean": 4733.6, + "valid_targets_min": 3365 + }, + { + "epoch": 5.555744199207696, + "grad_norm": 0.4520989730377594, + "learning_rate": 4.9838345043141665e-06, + "loss": 0.0755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05727928876876831, + "step": 4910, + "valid_targets_mean": 2954.4, + "valid_targets_min": 872 + }, + { + "epoch": 5.56140350877193, + "grad_norm": 0.3837959516893204, + "learning_rate": 4.946632921853009e-06, + "loss": 0.1574, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03571176528930664, + "step": 4915, + "valid_targets_mean": 2590.9, + "valid_targets_min": 1015 + }, + { + "epoch": 5.567062818336163, + "grad_norm": 0.43348756979097136, + "learning_rate": 4.909551101293238e-06, + "loss": 0.0703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05673142522573471, + "step": 4920, + "valid_targets_mean": 2342.4, + "valid_targets_min": 967 + }, + { + "epoch": 5.572722127900396, + "grad_norm": 0.38621739830517215, + "learning_rate": 4.872589337651208e-06, + "loss": 0.0701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02983327955007553, + "step": 4925, + "valid_targets_mean": 3335.1, + "valid_targets_min": 886 + }, + { + "epoch": 5.578381437464629, + "grad_norm": 0.5155792204127467, + "learning_rate": 4.835747924988105e-06, + "loss": 0.1065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0668298527598381, + "step": 4930, + "valid_targets_mean": 5394.5, + "valid_targets_min": 3513 + }, + { + "epoch": 5.584040747028863, + "grad_norm": 0.6842699689835074, + "learning_rate": 4.799027156407632e-06, + "loss": 0.1613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.075546994805336, + "step": 4935, + "valid_targets_mean": 7348.0, + "valid_targets_min": 4327 + }, + { + "epoch": 5.589700056593095, + "grad_norm": 0.574649718242978, + "learning_rate": 4.7624273240536756e-06, + "loss": 0.1543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0768851786851883, + "step": 4940, + "valid_targets_mean": 6346.0, + "valid_targets_min": 4480 + }, + { + "epoch": 5.595359366157329, + "grad_norm": 0.43863529363278925, + "learning_rate": 4.725948719107965e-06, + "loss": 0.1453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07283368706703186, + "step": 4945, + "valid_targets_mean": 6343.4, + "valid_targets_min": 4620 + }, + { + "epoch": 5.601018675721562, + "grad_norm": 0.40923162994001255, + "learning_rate": 4.6895916317877624e-06, + "loss": 0.144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0814943015575409, + "step": 4950, + "valid_targets_mean": 7255.5, + "valid_targets_min": 4873 + }, + { + "epoch": 5.6066779852857955, + "grad_norm": 0.4200210296254552, + "learning_rate": 4.653356351343577e-06, + "loss": 0.1579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09169334173202515, + "step": 4955, + "valid_targets_mean": 7472.5, + "valid_targets_min": 4229 + }, + { + "epoch": 5.612337294850028, + "grad_norm": 0.4571508601994777, + "learning_rate": 4.617243166056826e-06, + "loss": 0.1527, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08159303665161133, + "step": 4960, + "valid_targets_mean": 5988.0, + "valid_targets_min": 4279 + }, + { + "epoch": 5.6179966044142615, + "grad_norm": 0.41836865655879546, + "learning_rate": 4.581252363237567e-06, + "loss": 0.1442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06469601392745972, + "step": 4965, + "valid_targets_mean": 5757.6, + "valid_targets_min": 4542 + }, + { + "epoch": 5.623655913978495, + "grad_norm": 0.5501335221318121, + "learning_rate": 4.545384229222196e-06, + "loss": 0.1615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07012242078781128, + "step": 4970, + "valid_targets_mean": 2177.2, + "valid_targets_min": 1473 + }, + { + "epoch": 5.6293152235427275, + "grad_norm": 0.40578892953545487, + "learning_rate": 4.509639049371193e-06, + "loss": 0.1462, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06969084590673447, + "step": 4975, + "valid_targets_mean": 6128.4, + "valid_targets_min": 5332 + }, + { + "epoch": 5.634974533106961, + "grad_norm": 0.41113077211068755, + "learning_rate": 4.474017108066828e-06, + "loss": 0.1472, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08000019937753677, + "step": 4980, + "valid_targets_mean": 5782.4, + "valid_targets_min": 4612 + }, + { + "epoch": 5.640633842671194, + "grad_norm": 0.4072753452175585, + "learning_rate": 4.438518688710898e-06, + "loss": 0.1524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.075807586312294, + "step": 4985, + "valid_targets_mean": 6704.2, + "valid_targets_min": 4491 + }, + { + "epoch": 5.646293152235427, + "grad_norm": 0.4158377397562646, + "learning_rate": 4.403144073722501e-06, + "loss": 0.1455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07113613188266754, + "step": 4990, + "valid_targets_mean": 6032.8, + "valid_targets_min": 4268 + }, + { + "epoch": 5.65195246179966, + "grad_norm": 0.38910371015499334, + "learning_rate": 4.367893544535757e-06, + "loss": 0.1419, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06525500118732452, + "step": 4995, + "valid_targets_mean": 6025.0, + "valid_targets_min": 4787 + }, + { + "epoch": 5.657611771363894, + "grad_norm": 0.3906512999596564, + "learning_rate": 4.332767381597575e-06, + "loss": 0.1333, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06492096185684204, + "step": 5000, + "valid_targets_mean": 6030.5, + "valid_targets_min": 5052 + }, + { + "epoch": 5.663271080928126, + "grad_norm": 0.44438137118544563, + "learning_rate": 4.297765864365453e-06, + "loss": 0.1375, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07491766661405563, + "step": 5005, + "valid_targets_mean": 6500.6, + "valid_targets_min": 4664 + }, + { + "epoch": 5.66893039049236, + "grad_norm": 0.40792463298943993, + "learning_rate": 4.262889271305204e-06, + "loss": 0.1448, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06292325258255005, + "step": 5010, + "valid_targets_mean": 5990.5, + "valid_targets_min": 4211 + }, + { + "epoch": 5.674589700056593, + "grad_norm": 0.9080398752612465, + "learning_rate": 4.228137879888774e-06, + "loss": 0.1452, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06906808167695999, + "step": 5015, + "valid_targets_mean": 5844.5, + "valid_targets_min": 3898 + }, + { + "epoch": 5.680249009620827, + "grad_norm": 0.3865959911695659, + "learning_rate": 4.193511966592041e-06, + "loss": 0.1296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05850920453667641, + "step": 5020, + "valid_targets_mean": 5826.0, + "valid_targets_min": 4800 + }, + { + "epoch": 5.685908319185059, + "grad_norm": 0.35519349973157655, + "learning_rate": 4.1590118068925815e-06, + "loss": 0.1283, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06189900264143944, + "step": 5025, + "valid_targets_mean": 7745.9, + "valid_targets_min": 6651 + }, + { + "epoch": 5.691567628749293, + "grad_norm": 0.44964434972817635, + "learning_rate": 4.124637675267511e-06, + "loss": 0.1478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08358560502529144, + "step": 5030, + "valid_targets_mean": 6058.4, + "valid_targets_min": 3477 + }, + { + "epoch": 5.697226938313526, + "grad_norm": 0.43305524456662614, + "learning_rate": 4.090389845191278e-06, + "loss": 0.1636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08374358713626862, + "step": 5035, + "valid_targets_mean": 5174.0, + "valid_targets_min": 965 + }, + { + "epoch": 5.702886247877759, + "grad_norm": 0.40569330844527285, + "learning_rate": 4.056268589133516e-06, + "loss": 0.1345, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08053141087293625, + "step": 5040, + "valid_targets_mean": 6904.5, + "valid_targets_min": 5226 + }, + { + "epoch": 5.708545557441992, + "grad_norm": 0.6229368671518989, + "learning_rate": 4.022274178556844e-06, + "loss": 0.1426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.070437952876091, + "step": 5045, + "valid_targets_mean": 6807.5, + "valid_targets_min": 4237 + }, + { + "epoch": 5.7142048670062255, + "grad_norm": 0.4676538257257657, + "learning_rate": 3.988406883914717e-06, + "loss": 0.156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08439238369464874, + "step": 5050, + "valid_targets_mean": 5527.6, + "valid_targets_min": 3761 + }, + { + "epoch": 5.719864176570458, + "grad_norm": 0.44172445940500477, + "learning_rate": 3.954666974649295e-06, + "loss": 0.1716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09334351867437363, + "step": 5055, + "valid_targets_mean": 7231.9, + "valid_targets_min": 4993 + }, + { + "epoch": 5.7255234861346915, + "grad_norm": 0.430348106859938, + "learning_rate": 3.921054719189272e-06, + "loss": 0.1661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07482936978340149, + "step": 5060, + "valid_targets_mean": 6576.1, + "valid_targets_min": 5199 + }, + { + "epoch": 5.731182795698925, + "grad_norm": 0.41526868988767185, + "learning_rate": 3.887570384947745e-06, + "loss": 0.1464, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08730171620845795, + "step": 5065, + "valid_targets_mean": 7336.0, + "valid_targets_min": 4534 + }, + { + "epoch": 5.7368421052631575, + "grad_norm": 0.4552951608917207, + "learning_rate": 3.854214238320109e-06, + "loss": 0.1466, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06736086308956146, + "step": 5070, + "valid_targets_mean": 6389.6, + "valid_targets_min": 4751 + }, + { + "epoch": 5.742501414827391, + "grad_norm": 0.43971118094431755, + "learning_rate": 3.8209865446819105e-06, + "loss": 0.1483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06496548652648926, + "step": 5075, + "valid_targets_mean": 5551.6, + "valid_targets_min": 3888 + }, + { + "epoch": 5.748160724391624, + "grad_norm": 0.4477703620338686, + "learning_rate": 3.7878875683867476e-06, + "loss": 0.1549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08471114188432693, + "step": 5080, + "valid_targets_mean": 6779.5, + "valid_targets_min": 4455 + }, + { + "epoch": 5.753820033955858, + "grad_norm": 0.42129657520822045, + "learning_rate": 3.7549175727641606e-06, + "loss": 0.1454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08212454617023468, + "step": 5085, + "valid_targets_mean": 7067.5, + "valid_targets_min": 5230 + }, + { + "epoch": 5.75947934352009, + "grad_norm": 0.4597845255394653, + "learning_rate": 3.7220768201175615e-06, + "loss": 0.1539, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06508892774581909, + "step": 5090, + "valid_targets_mean": 7394.4, + "valid_targets_min": 5668 + }, + { + "epoch": 5.765138653084324, + "grad_norm": 0.4309741399054899, + "learning_rate": 3.689365571722112e-06, + "loss": 0.153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07912438362836838, + "step": 5095, + "valid_targets_mean": 6609.0, + "valid_targets_min": 4871 + }, + { + "epoch": 5.770797962648556, + "grad_norm": 0.37688101342124336, + "learning_rate": 3.6567840878226577e-06, + "loss": 0.1385, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08403809368610382, + "step": 5100, + "valid_targets_mean": 8925.8, + "valid_targets_min": 6195 + }, + { + "epoch": 5.77645727221279, + "grad_norm": 0.44007921988753174, + "learning_rate": 3.624332627631679e-06, + "loss": 0.1302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06569415330886841, + "step": 5105, + "valid_targets_mean": 7189.8, + "valid_targets_min": 3533 + }, + { + "epoch": 5.782116581777023, + "grad_norm": 0.4574962152533444, + "learning_rate": 3.5920114493271974e-06, + "loss": 0.1547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08852200210094452, + "step": 5110, + "valid_targets_mean": 7004.8, + "valid_targets_min": 5199 + }, + { + "epoch": 5.787775891341257, + "grad_norm": 0.45303240097186165, + "learning_rate": 3.5598208100507314e-06, + "loss": 0.1551, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07674330472946167, + "step": 5115, + "valid_targets_mean": 5626.2, + "valid_targets_min": 4923 + }, + { + "epoch": 5.793435200905489, + "grad_norm": 0.4090459814796047, + "learning_rate": 3.5277609659052712e-06, + "loss": 0.1471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06186189875006676, + "step": 5120, + "valid_targets_mean": 6766.5, + "valid_targets_min": 4920 + }, + { + "epoch": 5.799094510469723, + "grad_norm": 0.4141944003074458, + "learning_rate": 3.4958321719532106e-06, + "loss": 0.1447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09608271718025208, + "step": 5125, + "valid_targets_mean": 7549.5, + "valid_targets_min": 5551 + }, + { + "epoch": 5.804753820033956, + "grad_norm": 0.3748390962268032, + "learning_rate": 3.4640346822143324e-06, + "loss": 0.1351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06477536261081696, + "step": 5130, + "valid_targets_mean": 6716.9, + "valid_targets_min": 4685 + }, + { + "epoch": 5.810413129598189, + "grad_norm": 0.47800208213453826, + "learning_rate": 3.4323687496637837e-06, + "loss": 0.1485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06609180569648743, + "step": 5135, + "valid_targets_mean": 5615.0, + "valid_targets_min": 4875 + }, + { + "epoch": 5.816072439162422, + "grad_norm": 0.4044231466758298, + "learning_rate": 3.4008346262300852e-06, + "loss": 0.1516, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06517118215560913, + "step": 5140, + "valid_targets_mean": 6375.9, + "valid_targets_min": 4433 + }, + { + "epoch": 5.8217317487266556, + "grad_norm": 0.41719224529071663, + "learning_rate": 3.3694325627930846e-06, + "loss": 0.1414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06750291585922241, + "step": 5145, + "valid_targets_mean": 5845.8, + "valid_targets_min": 4540 + }, + { + "epoch": 5.827391058290889, + "grad_norm": 0.45731199479505663, + "learning_rate": 3.3381628091819907e-06, + "loss": 0.1519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09496168047189713, + "step": 5150, + "valid_targets_mean": 7222.8, + "valid_targets_min": 4056 + }, + { + "epoch": 5.8330503678551215, + "grad_norm": 0.41338173995110794, + "learning_rate": 3.3070256141733913e-06, + "loss": 0.1435, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05752265453338623, + "step": 5155, + "valid_targets_mean": 5307.6, + "valid_targets_min": 4499 + }, + { + "epoch": 5.838709677419355, + "grad_norm": 0.3838616763271395, + "learning_rate": 3.2760212254892453e-06, + "loss": 0.1242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05935349315404892, + "step": 5160, + "valid_targets_mean": 6030.1, + "valid_targets_min": 5268 + }, + { + "epoch": 5.8443689869835875, + "grad_norm": 0.3899322287845033, + "learning_rate": 3.245149889794932e-06, + "loss": 0.1275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06788637489080429, + "step": 5165, + "valid_targets_mean": 6520.6, + "valid_targets_min": 5074 + }, + { + "epoch": 5.850028296547821, + "grad_norm": 0.4115546982310666, + "learning_rate": 3.2144118526972943e-06, + "loss": 0.1335, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.062251899391412735, + "step": 5170, + "valid_targets_mean": 6091.6, + "valid_targets_min": 5221 + }, + { + "epoch": 5.855687606112054, + "grad_norm": 0.4734002951345279, + "learning_rate": 3.1838073587426676e-06, + "loss": 0.143, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06955200433731079, + "step": 5175, + "valid_targets_mean": 5409.5, + "valid_targets_min": 4642 + }, + { + "epoch": 5.861346915676288, + "grad_norm": 0.4837186291109002, + "learning_rate": 3.153336651414933e-06, + "loss": 0.1638, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08475080132484436, + "step": 5180, + "valid_targets_mean": 4272.0, + "valid_targets_min": 3222 + }, + { + "epoch": 5.86700622524052, + "grad_norm": 0.42555524320393195, + "learning_rate": 3.1229999731336137e-06, + "loss": 0.1724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07051846385002136, + "step": 5185, + "valid_targets_mean": 6490.6, + "valid_targets_min": 4371 + }, + { + "epoch": 5.872665534804754, + "grad_norm": 0.4390909879658899, + "learning_rate": 3.0927975652518994e-06, + "loss": 0.1387, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07520343363285065, + "step": 5190, + "valid_targets_mean": 5688.4, + "valid_targets_min": 3689 + }, + { + "epoch": 5.878324844368987, + "grad_norm": 0.4418978337748812, + "learning_rate": 3.062729668054756e-06, + "loss": 0.1473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07577599585056305, + "step": 5195, + "valid_targets_mean": 6685.9, + "valid_targets_min": 4281 + }, + { + "epoch": 5.88398415393322, + "grad_norm": 0.5170281812175835, + "learning_rate": 3.032796520757002e-06, + "loss": 0.1493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.087665855884552, + "step": 5200, + "valid_targets_mean": 6344.2, + "valid_targets_min": 4844 + }, + { + "epoch": 5.889643463497453, + "grad_norm": 0.4694500824641689, + "learning_rate": 3.0029983615014234e-06, + "loss": 0.1592, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06826259195804596, + "step": 5205, + "valid_targets_mean": 5900.6, + "valid_targets_min": 4549 + }, + { + "epoch": 5.895302773061687, + "grad_norm": 0.435848447944153, + "learning_rate": 2.9733354273568514e-06, + "loss": 0.1482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06618955731391907, + "step": 5210, + "valid_targets_mean": 4921.4, + "valid_targets_min": 2461 + }, + { + "epoch": 5.900962082625919, + "grad_norm": 0.463891661814579, + "learning_rate": 2.9438079543162914e-06, + "loss": 0.1512, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0872935801744461, + "step": 5215, + "valid_targets_mean": 6683.8, + "valid_targets_min": 4563 + }, + { + "epoch": 5.906621392190153, + "grad_norm": 0.3710802641848626, + "learning_rate": 2.9144161772950564e-06, + "loss": 0.1465, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06095900386571884, + "step": 5220, + "valid_targets_mean": 6517.6, + "valid_targets_min": 5192 + }, + { + "epoch": 5.912280701754386, + "grad_norm": 0.444597214052587, + "learning_rate": 2.885160330128871e-06, + "loss": 0.1346, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06489832699298859, + "step": 5225, + "valid_targets_mean": 5787.6, + "valid_targets_min": 5186 + }, + { + "epoch": 5.917940011318619, + "grad_norm": 0.40979112756126596, + "learning_rate": 2.8560406455720333e-06, + "loss": 0.1479, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06624625623226166, + "step": 5230, + "valid_targets_mean": 5163.2, + "valid_targets_min": 3786 + }, + { + "epoch": 5.923599320882852, + "grad_norm": 0.4662552517261774, + "learning_rate": 2.8270573552955616e-06, + "loss": 0.1872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08207786083221436, + "step": 5235, + "valid_targets_mean": 6142.8, + "valid_targets_min": 4560 + }, + { + "epoch": 5.929258630447086, + "grad_norm": 0.4145089818755124, + "learning_rate": 2.798210689885337e-06, + "loss": 0.1388, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07824923098087311, + "step": 5240, + "valid_targets_mean": 5659.8, + "valid_targets_min": 3952 + }, + { + "epoch": 5.934917940011319, + "grad_norm": 0.4741527366870441, + "learning_rate": 2.7695008788402765e-06, + "loss": 0.15, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08582239598035812, + "step": 5245, + "valid_targets_mean": 6811.5, + "valid_targets_min": 5134 + }, + { + "epoch": 5.9405772495755516, + "grad_norm": 0.48919664522031875, + "learning_rate": 2.740928150570512e-06, + "loss": 0.1523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07910142838954926, + "step": 5250, + "valid_targets_mean": 6538.6, + "valid_targets_min": 3762 + }, + { + "epoch": 5.946236559139785, + "grad_norm": 0.4051020263744831, + "learning_rate": 2.712492732395575e-06, + "loss": 0.1314, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06463608890771866, + "step": 5255, + "valid_targets_mean": 6285.8, + "valid_targets_min": 4726 + }, + { + "epoch": 5.951895868704018, + "grad_norm": 0.4968167718781236, + "learning_rate": 2.6841948505425765e-06, + "loss": 0.1433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08195450156927109, + "step": 5260, + "valid_targets_mean": 6054.1, + "valid_targets_min": 3724 + }, + { + "epoch": 5.957555178268251, + "grad_norm": 0.3897686619052999, + "learning_rate": 2.6560347301444035e-06, + "loss": 0.1308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07241679728031158, + "step": 5265, + "valid_targets_mean": 6452.4, + "valid_targets_min": 5578 + }, + { + "epoch": 5.963214487832484, + "grad_norm": 0.422592692286166, + "learning_rate": 2.6280125952379567e-06, + "loss": 0.149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06874916702508926, + "step": 5270, + "valid_targets_mean": 6827.4, + "valid_targets_min": 5795 + }, + { + "epoch": 5.968873797396718, + "grad_norm": 0.41717357540278055, + "learning_rate": 2.6001286687623382e-06, + "loss": 0.1377, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05858433619141579, + "step": 5275, + "valid_targets_mean": 6744.8, + "valid_targets_min": 5268 + }, + { + "epoch": 5.97453310696095, + "grad_norm": 0.3939064722977669, + "learning_rate": 2.5723831725570848e-06, + "loss": 0.1422, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06576021760702133, + "step": 5280, + "valid_targets_mean": 6928.8, + "valid_targets_min": 4920 + }, + { + "epoch": 5.980192416525184, + "grad_norm": 0.3668668947520654, + "learning_rate": 2.544776327360419e-06, + "loss": 0.1478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05589766055345535, + "step": 5285, + "valid_targets_mean": 7381.1, + "valid_targets_min": 5243 + }, + { + "epoch": 5.985851726089417, + "grad_norm": 0.415418753840546, + "learning_rate": 2.5173083528074683e-06, + "loss": 0.1321, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07109523564577103, + "step": 5290, + "valid_targets_mean": 6154.6, + "valid_targets_min": 4794 + }, + { + "epoch": 5.99151103565365, + "grad_norm": 0.4403321980456123, + "learning_rate": 2.489979467428532e-06, + "loss": 0.1549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07372893393039703, + "step": 5295, + "valid_targets_mean": 6188.1, + "valid_targets_min": 5168 + }, + { + "epoch": 5.997170345217883, + "grad_norm": 0.48811642631892715, + "learning_rate": 2.4627898886473522e-06, + "loss": 0.1561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09354352951049805, + "step": 5300, + "valid_targets_mean": 5461.5, + "valid_targets_min": 4156 + }, + { + "epoch": 6.002263723825693, + "grad_norm": 0.45054259353256854, + "learning_rate": 2.435739832779358e-06, + "loss": 0.1263, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.041569337248802185, + "step": 5305, + "valid_targets_mean": 2736.0, + "valid_targets_min": 876 + }, + { + "epoch": 6.007923033389926, + "grad_norm": 0.5457119392194753, + "learning_rate": 2.408829515029969e-06, + "loss": 0.0965, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03631666675209999, + "step": 5310, + "valid_targets_mean": 1472.5, + "valid_targets_min": 324 + }, + { + "epoch": 6.01358234295416, + "grad_norm": 0.3754458703390065, + "learning_rate": 2.3820591494928635e-06, + "loss": 0.0736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.045335929840803146, + "step": 5315, + "valid_targets_mean": 2699.9, + "valid_targets_min": 1064 + }, + { + "epoch": 6.019241652518393, + "grad_norm": 0.5186729063465437, + "learning_rate": 2.3554289491482996e-06, + "loss": 0.0713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03758453577756882, + "step": 5320, + "valid_targets_mean": 3137.0, + "valid_targets_min": 710 + }, + { + "epoch": 6.024900962082626, + "grad_norm": 0.31573113138674164, + "learning_rate": 2.3289391258613938e-06, + "loss": 0.0861, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02670443430542946, + "step": 5325, + "valid_targets_mean": 3472.9, + "valid_targets_min": 1223 + }, + { + "epoch": 6.030560271646859, + "grad_norm": 0.39425067469899694, + "learning_rate": 2.3025898903804467e-06, + "loss": 0.0727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0401170551776886, + "step": 5330, + "valid_targets_mean": 3326.0, + "valid_targets_min": 1431 + }, + { + "epoch": 6.036219581211093, + "grad_norm": 0.6923326355826038, + "learning_rate": 2.276381452335281e-06, + "loss": 0.1037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.048819079995155334, + "step": 5335, + "valid_targets_mean": 1161.2, + "valid_targets_min": 559 + }, + { + "epoch": 6.041878890775325, + "grad_norm": 0.36607688464368443, + "learning_rate": 2.2503140202355488e-06, + "loss": 0.0988, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.037477947771549225, + "step": 5340, + "valid_targets_mean": 3598.2, + "valid_targets_min": 1623 + }, + { + "epoch": 6.047538200339559, + "grad_norm": 0.4234980780669887, + "learning_rate": 2.2243878014690834e-06, + "loss": 0.0819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04270897060632706, + "step": 5345, + "valid_targets_mean": 3178.2, + "valid_targets_min": 846 + }, + { + "epoch": 6.053197509903792, + "grad_norm": 0.5142251565843842, + "learning_rate": 2.1986030023002568e-06, + "loss": 0.1174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0819421112537384, + "step": 5350, + "valid_targets_mean": 3622.6, + "valid_targets_min": 1949 + }, + { + "epoch": 6.058856819468025, + "grad_norm": 0.3758512131860536, + "learning_rate": 2.1729598278683264e-06, + "loss": 0.0592, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03399980813264847, + "step": 5355, + "valid_targets_mean": 3344.9, + "valid_targets_min": 764 + }, + { + "epoch": 6.064516129032258, + "grad_norm": 0.4452945820004506, + "learning_rate": 2.147458482185807e-06, + "loss": 0.0721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04033152014017105, + "step": 5360, + "valid_targets_mean": 2391.6, + "valid_targets_min": 776 + }, + { + "epoch": 6.0701754385964914, + "grad_norm": 0.4085284180291562, + "learning_rate": 2.122099168136862e-06, + "loss": 0.0738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04312574118375778, + "step": 5365, + "valid_targets_mean": 2818.5, + "valid_targets_min": 754 + }, + { + "epoch": 6.075834748160724, + "grad_norm": 0.49070385670020644, + "learning_rate": 2.0968820874756625e-06, + "loss": 0.0907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0460464246571064, + "step": 5370, + "valid_targets_mean": 1646.6, + "valid_targets_min": 635 + }, + { + "epoch": 6.081494057724957, + "grad_norm": 0.46009308488755424, + "learning_rate": 2.0718074408247955e-06, + "loss": 0.0868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03763628378510475, + "step": 5375, + "valid_targets_mean": 3374.8, + "valid_targets_min": 1862 + }, + { + "epoch": 6.087153367289191, + "grad_norm": 0.4329620924703973, + "learning_rate": 2.0468754276736823e-06, + "loss": 0.0715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.040984734892845154, + "step": 5380, + "valid_targets_mean": 2837.4, + "valid_targets_min": 574 + }, + { + "epoch": 6.092812676853424, + "grad_norm": 0.45881292492892467, + "learning_rate": 2.0220862463769665e-06, + "loss": 0.0684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.030360141769051552, + "step": 5385, + "valid_targets_mean": 2618.4, + "valid_targets_min": 523 + }, + { + "epoch": 6.098471986417657, + "grad_norm": 0.4645108273119941, + "learning_rate": 1.9974400941529493e-06, + "loss": 0.0725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03978925943374634, + "step": 5390, + "valid_targets_mean": 3548.9, + "valid_targets_min": 2189 + }, + { + "epoch": 6.10413129598189, + "grad_norm": 0.40979314954932805, + "learning_rate": 1.972937167082014e-06, + "loss": 0.0764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.027495216578245163, + "step": 5395, + "valid_targets_mean": 3564.6, + "valid_targets_min": 827 + }, + { + "epoch": 6.109790605546124, + "grad_norm": 0.519291894521477, + "learning_rate": 1.948577660105082e-06, + "loss": 0.0868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.042828112840652466, + "step": 5400, + "valid_targets_mean": 1936.6, + "valid_targets_min": 597 + }, + { + "epoch": 6.115449915110356, + "grad_norm": 0.5028869384741634, + "learning_rate": 1.924361767022038e-06, + "loss": 0.0812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03645801544189453, + "step": 5405, + "valid_targets_mean": 1747.5, + "valid_targets_min": 701 + }, + { + "epoch": 6.12110922467459, + "grad_norm": 0.5160317808874488, + "learning_rate": 1.9002896804902039e-06, + "loss": 0.1063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10435350239276886, + "step": 5410, + "valid_targets_mean": 3329.6, + "valid_targets_min": 1652 + }, + { + "epoch": 6.126768534238823, + "grad_norm": 0.42693459577496057, + "learning_rate": 1.8763615920228084e-06, + "loss": 0.0877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0340992771089077, + "step": 5415, + "valid_targets_mean": 3117.1, + "valid_targets_min": 786 + }, + { + "epoch": 6.132427843803056, + "grad_norm": 0.5292079264665807, + "learning_rate": 1.8525776919874472e-06, + "loss": 0.0709, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.044943224638700485, + "step": 5420, + "valid_targets_mean": 2574.4, + "valid_targets_min": 971 + }, + { + "epoch": 6.138087153367289, + "grad_norm": 0.43184427339775594, + "learning_rate": 1.8289381696045817e-06, + "loss": 0.1019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0383368656039238, + "step": 5425, + "valid_targets_mean": 3320.0, + "valid_targets_min": 2610 + }, + { + "epoch": 6.143746462931523, + "grad_norm": 0.4578345136970855, + "learning_rate": 1.8054432129460386e-06, + "loss": 0.0875, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.027956079691648483, + "step": 5430, + "valid_targets_mean": 2580.6, + "valid_targets_min": 753 + }, + { + "epoch": 6.149405772495755, + "grad_norm": 0.543832615126229, + "learning_rate": 1.7820930089334965e-06, + "loss": 0.0781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0356718935072422, + "step": 5435, + "valid_targets_mean": 1537.1, + "valid_targets_min": 619 + }, + { + "epoch": 6.155065082059989, + "grad_norm": 0.6060966311486536, + "learning_rate": 1.7588877433370076e-06, + "loss": 0.263, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10278305411338806, + "step": 5440, + "valid_targets_mean": 2271.2, + "valid_targets_min": 987 + }, + { + "epoch": 6.160724391624222, + "grad_norm": 0.5924085806210888, + "learning_rate": 1.7358276007735276e-06, + "loss": 0.0914, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04437322914600372, + "step": 5445, + "valid_targets_mean": 1351.5, + "valid_targets_min": 329 + }, + { + "epoch": 6.166383701188455, + "grad_norm": 0.5013948977192829, + "learning_rate": 1.71291276470543e-06, + "loss": 0.0808, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.035337746143341064, + "step": 5450, + "valid_targets_mean": 1553.9, + "valid_targets_min": 608 + }, + { + "epoch": 6.172043010752688, + "grad_norm": 0.4871890547176153, + "learning_rate": 1.6901434174390652e-06, + "loss": 0.1721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.051572948694229126, + "step": 5455, + "valid_targets_mean": 3645.6, + "valid_targets_min": 873 + }, + { + "epoch": 6.1777023203169215, + "grad_norm": 0.29062212468543136, + "learning_rate": 1.6675197401232869e-06, + "loss": 0.0699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.021249324083328247, + "step": 5460, + "valid_targets_mean": 4720.6, + "valid_targets_min": 3529 + }, + { + "epoch": 6.183361629881155, + "grad_norm": 0.45735535340260947, + "learning_rate": 1.6450419127480422e-06, + "loss": 0.0826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.044959232211112976, + "step": 5465, + "valid_targets_mean": 4437.6, + "valid_targets_min": 2336 + }, + { + "epoch": 6.1890209394453874, + "grad_norm": 0.42171276299506233, + "learning_rate": 1.6227101141429114e-06, + "loss": 0.0772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0342087596654892, + "step": 5470, + "valid_targets_mean": 3533.1, + "valid_targets_min": 1226 + }, + { + "epoch": 6.194680249009621, + "grad_norm": 0.40618213284566573, + "learning_rate": 1.6005245219756927e-06, + "loss": 0.0711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03383510932326317, + "step": 5475, + "valid_targets_mean": 3299.2, + "valid_targets_min": 798 + }, + { + "epoch": 6.200339558573854, + "grad_norm": 0.461226104176819, + "learning_rate": 1.5784853127510058e-06, + "loss": 0.0698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.030750712379813194, + "step": 5480, + "valid_targets_mean": 1836.9, + "valid_targets_min": 723 + }, + { + "epoch": 6.205998868138087, + "grad_norm": 0.4333621108931707, + "learning_rate": 1.5565926618088578e-06, + "loss": 0.0681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.036275140941143036, + "step": 5485, + "valid_targets_mean": 3316.0, + "valid_targets_min": 898 + }, + { + "epoch": 6.21165817770232, + "grad_norm": 0.36550793893508166, + "learning_rate": 1.5348467433232728e-06, + "loss": 0.0668, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.025588275864720345, + "step": 5490, + "valid_targets_mean": 4027.2, + "valid_targets_min": 3278 + }, + { + "epoch": 6.217317487266554, + "grad_norm": 0.4772160854424314, + "learning_rate": 1.5132477303009018e-06, + "loss": 0.07, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03180842846632004, + "step": 5495, + "valid_targets_mean": 2052.8, + "valid_targets_min": 739 + }, + { + "epoch": 6.222976796830786, + "grad_norm": 0.5089783085241212, + "learning_rate": 1.4917957945796313e-06, + "loss": 0.1007, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03567662462592125, + "step": 5500, + "valid_targets_mean": 1574.9, + "valid_targets_min": 538 + }, + { + "epoch": 6.22863610639502, + "grad_norm": 0.36973669403155707, + "learning_rate": 1.4704911068272366e-06, + "loss": 0.056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.025814436376094818, + "step": 5505, + "valid_targets_mean": 1956.9, + "valid_targets_min": 654 + }, + { + "epoch": 6.234295415959253, + "grad_norm": 0.46229589116820236, + "learning_rate": 1.4493338365400034e-06, + "loss": 0.0697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04194366931915283, + "step": 5510, + "valid_targets_mean": 2735.4, + "valid_targets_min": 930 + }, + { + "epoch": 6.239954725523486, + "grad_norm": 0.4989305468229593, + "learning_rate": 1.428324152041407e-06, + "loss": 0.063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03797730803489685, + "step": 5515, + "valid_targets_mean": 2965.0, + "valid_targets_min": 684 + }, + { + "epoch": 6.245614035087719, + "grad_norm": 0.42393631597982256, + "learning_rate": 1.407462220480742e-06, + "loss": 0.107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03771083801984787, + "step": 5520, + "valid_targets_mean": 3344.6, + "valid_targets_min": 999 + }, + { + "epoch": 6.251273344651953, + "grad_norm": 0.8345963434877331, + "learning_rate": 1.3867482078318095e-06, + "loss": 0.0832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04547232389450073, + "step": 5525, + "valid_targets_mean": 2445.4, + "valid_targets_min": 696 + }, + { + "epoch": 6.256932654216186, + "grad_norm": 0.41053393654408254, + "learning_rate": 1.3661822788916013e-06, + "loss": 0.0866, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.036262039095163345, + "step": 5530, + "valid_targets_mean": 3514.9, + "valid_targets_min": 1068 + }, + { + "epoch": 6.262591963780419, + "grad_norm": 0.4910802121211489, + "learning_rate": 1.3457645972789778e-06, + "loss": 0.0727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03297572210431099, + "step": 5535, + "valid_targets_mean": 1602.8, + "valid_targets_min": 536 + }, + { + "epoch": 6.268251273344652, + "grad_norm": 0.5594867675696712, + "learning_rate": 1.3254953254333613e-06, + "loss": 0.1142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0913362205028534, + "step": 5540, + "valid_targets_mean": 3568.8, + "valid_targets_min": 1507 + }, + { + "epoch": 6.2739105829088855, + "grad_norm": 0.468982383102054, + "learning_rate": 1.305374624613469e-06, + "loss": 0.0819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04027446359395981, + "step": 5545, + "valid_targets_mean": 3292.2, + "valid_targets_min": 2342 + }, + { + "epoch": 6.279569892473118, + "grad_norm": 0.6076216609127739, + "learning_rate": 1.285402654896004e-06, + "loss": 0.0996, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.039696864783763885, + "step": 5550, + "valid_targets_mean": 1567.5, + "valid_targets_min": 687 + }, + { + "epoch": 6.2852292020373515, + "grad_norm": 0.5199065404593297, + "learning_rate": 1.265579575174387e-06, + "loss": 0.0679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.026267357170581818, + "step": 5555, + "valid_targets_mean": 1145.4, + "valid_targets_min": 449 + }, + { + "epoch": 6.290888511601585, + "grad_norm": 0.37842658390559863, + "learning_rate": 1.245905543157504e-06, + "loss": 0.0766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02909524366259575, + "step": 5560, + "valid_targets_mean": 3191.0, + "valid_targets_min": 658 + }, + { + "epoch": 6.2965478211658175, + "grad_norm": 0.6698590437305647, + "learning_rate": 1.2263807153684448e-06, + "loss": 0.0802, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04288920760154724, + "step": 5565, + "valid_targets_mean": 1924.8, + "valid_targets_min": 577 + }, + { + "epoch": 6.302207130730051, + "grad_norm": 0.4089958305848229, + "learning_rate": 1.2070052471432535e-06, + "loss": 0.0646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03255155310034752, + "step": 5570, + "valid_targets_mean": 3992.5, + "valid_targets_min": 726 + }, + { + "epoch": 6.307866440294284, + "grad_norm": 0.546203478116887, + "learning_rate": 1.1877792926296893e-06, + "loss": 0.063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.037086475640535355, + "step": 5575, + "valid_targets_mean": 3626.4, + "valid_targets_min": 2252 + }, + { + "epoch": 6.313525749858517, + "grad_norm": 0.48220913093657924, + "learning_rate": 1.1687030047860248e-06, + "loss": 0.0708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.028300970792770386, + "step": 5580, + "valid_targets_mean": 1556.2, + "valid_targets_min": 531 + }, + { + "epoch": 6.31918505942275, + "grad_norm": 0.49300440662921147, + "learning_rate": 1.1497765353797963e-06, + "loss": 0.0863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03746430575847626, + "step": 5585, + "valid_targets_mean": 2410.5, + "valid_targets_min": 744 + }, + { + "epoch": 6.324844368986984, + "grad_norm": 0.46516325493913885, + "learning_rate": 1.1310000349866136e-06, + "loss": 0.0832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04292897880077362, + "step": 5590, + "valid_targets_mean": 3795.1, + "valid_targets_min": 3136 + }, + { + "epoch": 6.330503678551217, + "grad_norm": 0.3516195402819051, + "learning_rate": 1.1123736529889674e-06, + "loss": 0.0709, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.029240943491458893, + "step": 5595, + "valid_targets_mean": 3324.4, + "valid_targets_min": 2366 + }, + { + "epoch": 6.33616298811545, + "grad_norm": 0.393646246729592, + "learning_rate": 1.093897537575026e-06, + "loss": 0.0671, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.026576902717351913, + "step": 5600, + "valid_targets_mean": 1668.1, + "valid_targets_min": 669 + }, + { + "epoch": 6.341822297679683, + "grad_norm": 0.5045719380300637, + "learning_rate": 1.075571835737459e-06, + "loss": 0.0826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04519212990999222, + "step": 5605, + "valid_targets_mean": 3529.8, + "valid_targets_min": 1785 + }, + { + "epoch": 6.347481607243917, + "grad_norm": 0.34572836118706846, + "learning_rate": 1.0573966932722902e-06, + "loss": 0.0586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.032416749745607376, + "step": 5610, + "valid_targets_mean": 4120.1, + "valid_targets_min": 3862 + }, + { + "epoch": 6.353140916808149, + "grad_norm": 0.414169116954712, + "learning_rate": 1.039372254777702e-06, + "loss": 0.0721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03024177812039852, + "step": 5615, + "valid_targets_mean": 3062.2, + "valid_targets_min": 539 + }, + { + "epoch": 6.358800226372383, + "grad_norm": 0.4551881018436352, + "learning_rate": 1.0214986636529112e-06, + "loss": 0.0728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.029075317084789276, + "step": 5620, + "valid_targets_mean": 2781.5, + "valid_targets_min": 1016 + }, + { + "epoch": 6.364459535936616, + "grad_norm": 0.7250068378090566, + "learning_rate": 1.003776062097015e-06, + "loss": 0.0833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04859713837504387, + "step": 5625, + "valid_targets_mean": 1102.9, + "valid_targets_min": 888 + }, + { + "epoch": 6.370118845500849, + "grad_norm": 0.5077689879385843, + "learning_rate": 9.862045911078733e-07, + "loss": 0.0753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.027197659015655518, + "step": 5630, + "valid_targets_mean": 2602.5, + "valid_targets_min": 697 + }, + { + "epoch": 6.375778155065082, + "grad_norm": 0.6899335032624531, + "learning_rate": 9.687843904809725e-07, + "loss": 0.0731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04702649265527725, + "step": 5635, + "valid_targets_mean": 1592.9, + "valid_targets_min": 605 + }, + { + "epoch": 6.3814374646293155, + "grad_norm": 0.4183486867477277, + "learning_rate": 9.515155988083125e-07, + "loss": 0.0739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.030234932899475098, + "step": 5640, + "valid_targets_mean": 2899.1, + "valid_targets_min": 1117 + }, + { + "epoch": 6.387096774193548, + "grad_norm": 0.39846802229129347, + "learning_rate": 9.343983534773238e-07, + "loss": 0.0951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03229912370443344, + "step": 5645, + "valid_targets_mean": 3376.6, + "valid_targets_min": 1388 + }, + { + "epoch": 6.3927560837577815, + "grad_norm": 0.4221055561946196, + "learning_rate": 9.174327906697522e-07, + "loss": 0.0718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03201095759868622, + "step": 5650, + "valid_targets_mean": 3243.4, + "valid_targets_min": 524 + }, + { + "epoch": 6.398415393322015, + "grad_norm": 0.47396946951089514, + "learning_rate": 9.006190453605867e-07, + "loss": 0.0636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03388165682554245, + "step": 5655, + "valid_targets_mean": 2468.0, + "valid_targets_min": 970 + }, + { + "epoch": 6.4040747028862475, + "grad_norm": 0.5372963519147712, + "learning_rate": 8.839572513169869e-07, + "loss": 0.0845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03781970217823982, + "step": 5660, + "valid_targets_mean": 2959.6, + "valid_targets_min": 877 + }, + { + "epoch": 6.409734012450481, + "grad_norm": 0.4414122630686379, + "learning_rate": 8.674475410972083e-07, + "loss": 0.0776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02942243218421936, + "step": 5665, + "valid_targets_mean": 3886.5, + "valid_targets_min": 3251 + }, + { + "epoch": 6.415393322014714, + "grad_norm": 0.5910320359764288, + "learning_rate": 8.510900460495608e-07, + "loss": 0.0616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04006580635905266, + "step": 5670, + "valid_targets_mean": 2695.6, + "valid_targets_min": 564 + }, + { + "epoch": 6.421052631578947, + "grad_norm": 0.41329259343824304, + "learning_rate": 8.348848963113498e-07, + "loss": 0.0716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03878196328878403, + "step": 5675, + "valid_targets_mean": 4404.8, + "valid_targets_min": 2555 + }, + { + "epoch": 6.42671194114318, + "grad_norm": 0.35021801890644144, + "learning_rate": 8.188322208078614e-07, + "loss": 0.0636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03053947165608406, + "step": 5680, + "valid_targets_mean": 4410.5, + "valid_targets_min": 3261 + }, + { + "epoch": 6.432371250707414, + "grad_norm": 0.4423919062535216, + "learning_rate": 8.02932147251314e-07, + "loss": 0.058, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.024922095239162445, + "step": 5685, + "valid_targets_mean": 1768.5, + "valid_targets_min": 618 + }, + { + "epoch": 6.438030560271647, + "grad_norm": 0.3428657526356263, + "learning_rate": 7.87184802139851e-07, + "loss": 0.08, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03279254585504532, + "step": 5690, + "valid_targets_mean": 3985.8, + "valid_targets_min": 3213 + }, + { + "epoch": 6.44368986983588, + "grad_norm": 0.387894200845498, + "learning_rate": 7.715903107565426e-07, + "loss": 0.0755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08078566938638687, + "step": 5695, + "valid_targets_mean": 3149.1, + "valid_targets_min": 1539 + }, + { + "epoch": 6.449349179400113, + "grad_norm": 0.3521771898348846, + "learning_rate": 7.56148797168379e-07, + "loss": 0.0831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.025293316692113876, + "step": 5700, + "valid_targets_mean": 2094.8, + "valid_targets_min": 509 + }, + { + "epoch": 6.455008488964347, + "grad_norm": 0.34983675153404814, + "learning_rate": 7.408603842252837e-07, + "loss": 0.078, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.018726779147982597, + "step": 5705, + "valid_targets_mean": 2441.4, + "valid_targets_min": 633 + }, + { + "epoch": 6.460667798528579, + "grad_norm": 0.43222227805674024, + "learning_rate": 7.257251935591436e-07, + "loss": 0.0908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.034915629774332047, + "step": 5710, + "valid_targets_mean": 2740.9, + "valid_targets_min": 737 + }, + { + "epoch": 6.466327108092813, + "grad_norm": 0.4081347230063779, + "learning_rate": 7.107433455828317e-07, + "loss": 0.0607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.028989259153604507, + "step": 5715, + "valid_targets_mean": 2726.9, + "valid_targets_min": 492 + }, + { + "epoch": 6.471986417657046, + "grad_norm": 0.5771132928708792, + "learning_rate": 6.959149594892567e-07, + "loss": 0.0779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.045439302921295166, + "step": 5720, + "valid_targets_mean": 1607.9, + "valid_targets_min": 833 + }, + { + "epoch": 6.477645727221279, + "grad_norm": 0.38282961964026013, + "learning_rate": 6.812401532504109e-07, + "loss": 0.0637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.022644251585006714, + "step": 5725, + "valid_targets_mean": 3502.6, + "valid_targets_min": 923 + }, + { + "epoch": 6.483305036785512, + "grad_norm": 0.7093665041938079, + "learning_rate": 6.667190436164351e-07, + "loss": 0.1006, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.049554117023944855, + "step": 5730, + "valid_targets_mean": 1553.9, + "valid_targets_min": 531 + }, + { + "epoch": 6.4889643463497455, + "grad_norm": 0.5395677136363789, + "learning_rate": 6.523517461146856e-07, + "loss": 0.0966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06349746882915497, + "step": 5735, + "valid_targets_mean": 1825.6, + "valid_targets_min": 575 + }, + { + "epoch": 6.494623655913978, + "grad_norm": 0.4683100032634949, + "learning_rate": 6.381383750488113e-07, + "loss": 0.0704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.036794550716876984, + "step": 5740, + "valid_targets_mean": 3701.9, + "valid_targets_min": 2784 + }, + { + "epoch": 6.5002829654782115, + "grad_norm": 0.39665598153857057, + "learning_rate": 6.240790434978628e-07, + "loss": 0.069, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03116479702293873, + "step": 5745, + "valid_targets_mean": 3575.8, + "valid_targets_min": 2525 + }, + { + "epoch": 6.505942275042445, + "grad_norm": 0.3909289070674195, + "learning_rate": 6.101738633153686e-07, + "loss": 0.0667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02514510788023472, + "step": 5750, + "valid_targets_mean": 3081.0, + "valid_targets_min": 970 + }, + { + "epoch": 6.511601584606678, + "grad_norm": 0.5221061620646068, + "learning_rate": 5.964229451284586e-07, + "loss": 0.0627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.030438585206866264, + "step": 5755, + "valid_targets_mean": 1963.5, + "valid_targets_min": 817 + }, + { + "epoch": 6.517260894170911, + "grad_norm": 0.8427273967468634, + "learning_rate": 5.828263983369864e-07, + "loss": 0.0809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.057581909000873566, + "step": 5760, + "valid_targets_mean": 1270.9, + "valid_targets_min": 637 + }, + { + "epoch": 6.522920203735144, + "grad_norm": 0.40291295842096636, + "learning_rate": 5.693843311126523e-07, + "loss": 0.0602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.031059563159942627, + "step": 5765, + "valid_targets_mean": 2594.1, + "valid_targets_min": 893 + }, + { + "epoch": 6.528579513299378, + "grad_norm": 0.38279978257420905, + "learning_rate": 5.560968503981378e-07, + "loss": 0.065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.027334999293088913, + "step": 5770, + "valid_targets_mean": 3222.2, + "valid_targets_min": 1606 + }, + { + "epoch": 6.53423882286361, + "grad_norm": 0.4376854384676312, + "learning_rate": 5.429640619062726e-07, + "loss": 0.0642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02784162387251854, + "step": 5775, + "valid_targets_mean": 3023.8, + "valid_targets_min": 678 + }, + { + "epoch": 6.539898132427844, + "grad_norm": 0.5182343274284491, + "learning_rate": 5.299860701191772e-07, + "loss": 0.073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.029868509620428085, + "step": 5780, + "valid_targets_mean": 4263.8, + "valid_targets_min": 1078 + }, + { + "epoch": 6.545557441992077, + "grad_norm": 0.3873691318800413, + "learning_rate": 5.171629782874354e-07, + "loss": 0.0632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.035855866968631744, + "step": 5785, + "valid_targets_mean": 4150.0, + "valid_targets_min": 761 + }, + { + "epoch": 6.55121675155631, + "grad_norm": 0.5265280283394623, + "learning_rate": 5.044948884292766e-07, + "loss": 0.0677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.035952128469944, + "step": 5790, + "valid_targets_mean": 1368.4, + "valid_targets_min": 538 + }, + { + "epoch": 6.556876061120543, + "grad_norm": 0.5692650186236182, + "learning_rate": 4.919819013297677e-07, + "loss": 0.0854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07685303688049316, + "step": 5795, + "valid_targets_mean": 2630.4, + "valid_targets_min": 780 + }, + { + "epoch": 6.562535370684777, + "grad_norm": 0.3992203816344988, + "learning_rate": 4.796241165399939e-07, + "loss": 0.1375, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.029380571097135544, + "step": 5800, + "valid_targets_mean": 3353.9, + "valid_targets_min": 2347 + }, + { + "epoch": 6.568194680249009, + "grad_norm": 0.4413274699616067, + "learning_rate": 4.674216323762881e-07, + "loss": 0.0676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.036058343946933746, + "step": 5805, + "valid_targets_mean": 3880.2, + "valid_targets_min": 2473 + }, + { + "epoch": 6.573853989813243, + "grad_norm": 0.4005766443151731, + "learning_rate": 4.5537454591943584e-07, + "loss": 0.0651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.032821476459503174, + "step": 5810, + "valid_targets_mean": 3556.4, + "valid_targets_min": 2245 + }, + { + "epoch": 6.579513299377476, + "grad_norm": 0.7487686722359814, + "learning_rate": 4.434829530139095e-07, + "loss": 0.1232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0820455551147461, + "step": 5815, + "valid_targets_mean": 7477.8, + "valid_targets_min": 6099 + }, + { + "epoch": 6.5851726089417095, + "grad_norm": 0.7534214710986934, + "learning_rate": 4.3174694826709107e-07, + "loss": 0.1544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07804305851459503, + "step": 5820, + "valid_targets_mean": 6943.4, + "valid_targets_min": 5066 + }, + { + "epoch": 6.590831918505942, + "grad_norm": 0.7163092047483358, + "learning_rate": 4.201666250485503e-07, + "loss": 0.1506, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07243961095809937, + "step": 5825, + "valid_targets_mean": 6880.4, + "valid_targets_min": 4923 + }, + { + "epoch": 6.5964912280701755, + "grad_norm": 0.6682497779585265, + "learning_rate": 4.087420754892635e-07, + "loss": 0.146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07224197685718536, + "step": 5830, + "valid_targets_mean": 5842.0, + "valid_targets_min": 4099 + }, + { + "epoch": 6.602150537634409, + "grad_norm": 0.6836051272147596, + "learning_rate": 3.9747339048091136e-07, + "loss": 0.1454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07733571529388428, + "step": 5835, + "valid_targets_mean": 7123.2, + "valid_targets_min": 4797 + }, + { + "epoch": 6.6078098471986415, + "grad_norm": 0.6266515389986721, + "learning_rate": 3.863606596751379e-07, + "loss": 0.1597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08477002382278442, + "step": 5840, + "valid_targets_mean": 6494.1, + "valid_targets_min": 3853 + }, + { + "epoch": 6.613469156762875, + "grad_norm": 0.5796635106347725, + "learning_rate": 3.7540397148284837e-07, + "loss": 0.1497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07463136315345764, + "step": 5845, + "valid_targets_mean": 6439.9, + "valid_targets_min": 4586 + }, + { + "epoch": 6.619128466327108, + "grad_norm": 0.5949510554792016, + "learning_rate": 3.6460341307349653e-07, + "loss": 0.1427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08274795114994049, + "step": 5850, + "valid_targets_mean": 7251.0, + "valid_targets_min": 4728 + }, + { + "epoch": 6.624787775891341, + "grad_norm": 0.6316364914906776, + "learning_rate": 3.539590703743967e-07, + "loss": 0.1672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1106606274843216, + "step": 5855, + "valid_targets_mean": 6318.6, + "valid_targets_min": 4375 + }, + { + "epoch": 6.630447085455574, + "grad_norm": 0.550135953230067, + "learning_rate": 3.434710280700415e-07, + "loss": 0.1405, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07912848144769669, + "step": 5860, + "valid_targets_mean": 7099.8, + "valid_targets_min": 4561 + }, + { + "epoch": 6.636106395019808, + "grad_norm": 0.5807128239797757, + "learning_rate": 3.331393696014207e-07, + "loss": 0.1523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08417834341526031, + "step": 5865, + "valid_targets_mean": 4537.5, + "valid_targets_min": 3439 + }, + { + "epoch": 6.64176570458404, + "grad_norm": 0.5219644715267242, + "learning_rate": 3.22964177165368e-07, + "loss": 0.1402, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06268523633480072, + "step": 5870, + "valid_targets_mean": 5579.2, + "valid_targets_min": 3635 + }, + { + "epoch": 6.647425014148274, + "grad_norm": 0.5263531444649148, + "learning_rate": 3.129455317138952e-07, + "loss": 0.149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07062210142612457, + "step": 5875, + "valid_targets_mean": 6693.9, + "valid_targets_min": 4393 + }, + { + "epoch": 6.653084323712507, + "grad_norm": 0.5039547873388956, + "learning_rate": 3.030835129535592e-07, + "loss": 0.1374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07635181397199631, + "step": 5880, + "valid_targets_mean": 7235.5, + "valid_targets_min": 4749 + }, + { + "epoch": 6.658743633276741, + "grad_norm": 0.5374975388564814, + "learning_rate": 2.9337819934481814e-07, + "loss": 0.1361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04057689011096954, + "step": 5885, + "valid_targets_mean": 2121.1, + "valid_targets_min": 1062 + }, + { + "epoch": 6.664402942840973, + "grad_norm": 0.5495976815602165, + "learning_rate": 2.838296681014185e-07, + "loss": 0.1353, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07217422872781754, + "step": 5890, + "valid_targets_mean": 6031.5, + "valid_targets_min": 3119 + }, + { + "epoch": 6.670062252405207, + "grad_norm": 0.4727749466552753, + "learning_rate": 2.744379951897691e-07, + "loss": 0.1434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07923989742994308, + "step": 5895, + "valid_targets_mean": 5516.4, + "valid_targets_min": 3774 + }, + { + "epoch": 6.67572156196944, + "grad_norm": 0.4918446356807094, + "learning_rate": 2.65203255328339e-07, + "loss": 0.1415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06927672028541565, + "step": 5900, + "valid_targets_mean": 6248.4, + "valid_targets_min": 5140 + }, + { + "epoch": 6.681380871533673, + "grad_norm": 0.48057048911669864, + "learning_rate": 2.561255219870762e-07, + "loss": 0.1256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06571323424577713, + "step": 5905, + "valid_targets_mean": 6634.4, + "valid_targets_min": 5665 + }, + { + "epoch": 6.687040181097906, + "grad_norm": 0.5212976091008016, + "learning_rate": 2.472048673868033e-07, + "loss": 0.1324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07338336110115051, + "step": 5910, + "valid_targets_mean": 6851.5, + "valid_targets_min": 4428 + }, + { + "epoch": 6.6926994906621395, + "grad_norm": 0.48101118665117376, + "learning_rate": 2.3844136249865367e-07, + "loss": 0.1537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08724261820316315, + "step": 5915, + "valid_targets_mean": 5922.0, + "valid_targets_min": 4690 + }, + { + "epoch": 6.698358800226372, + "grad_norm": 0.4764153735320399, + "learning_rate": 2.2983507704351426e-07, + "loss": 0.1509, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.062456920742988586, + "step": 5920, + "valid_targets_mean": 5270.4, + "valid_targets_min": 4262 + }, + { + "epoch": 6.7040181097906055, + "grad_norm": 0.4752188688462987, + "learning_rate": 2.213860794914524e-07, + "loss": 0.1348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07114718854427338, + "step": 5925, + "valid_targets_mean": 6213.9, + "valid_targets_min": 5149 + }, + { + "epoch": 6.709677419354839, + "grad_norm": 0.5402659781856378, + "learning_rate": 2.1309443706118538e-07, + "loss": 0.1444, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06940378993749619, + "step": 5930, + "valid_targets_mean": 5613.8, + "valid_targets_min": 3840 + }, + { + "epoch": 6.7153367289190715, + "grad_norm": 0.5297670453348129, + "learning_rate": 2.049602157195363e-07, + "loss": 0.1571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08687257766723633, + "step": 5935, + "valid_targets_mean": 6458.6, + "valid_targets_min": 5104 + }, + { + "epoch": 6.720996038483305, + "grad_norm": 0.5046345063884, + "learning_rate": 1.9698348018092338e-07, + "loss": 0.1711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06864124536514282, + "step": 5940, + "valid_targets_mean": 5527.5, + "valid_targets_min": 4224 + }, + { + "epoch": 6.726655348047538, + "grad_norm": 0.5065065013683697, + "learning_rate": 1.8916429390682944e-07, + "loss": 0.1601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07327083498239517, + "step": 5945, + "valid_targets_mean": 6107.0, + "valid_targets_min": 4318 + }, + { + "epoch": 6.732314657611772, + "grad_norm": 0.4747338857985507, + "learning_rate": 1.8150271910530204e-07, + "loss": 0.1447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07454071193933487, + "step": 5950, + "valid_targets_mean": 6608.5, + "valid_targets_min": 4783 + }, + { + "epoch": 6.737973967176004, + "grad_norm": 0.4760978068435683, + "learning_rate": 1.7399881673046736e-07, + "loss": 0.145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06898602843284607, + "step": 5955, + "valid_targets_mean": 7459.4, + "valid_targets_min": 4412 + }, + { + "epoch": 6.743633276740238, + "grad_norm": 0.5189702583573984, + "learning_rate": 1.666526464820284e-07, + "loss": 0.1526, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07886485755443573, + "step": 5960, + "valid_targets_mean": 6031.6, + "valid_targets_min": 4332 + }, + { + "epoch": 6.74929258630447, + "grad_norm": 0.4498647439722737, + "learning_rate": 1.594642668048052e-07, + "loss": 0.1467, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06920972466468811, + "step": 5965, + "valid_targets_mean": 5925.8, + "valid_targets_min": 5096 + }, + { + "epoch": 6.754951895868704, + "grad_norm": 0.5083555683499332, + "learning_rate": 1.5243373488826653e-07, + "loss": 0.148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07990719377994537, + "step": 5970, + "valid_targets_mean": 5700.1, + "valid_targets_min": 2364 + }, + { + "epoch": 6.760611205432937, + "grad_norm": 0.5399051754275803, + "learning_rate": 1.4556110666606783e-07, + "loss": 0.156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08077910542488098, + "step": 5975, + "valid_targets_mean": 6654.9, + "valid_targets_min": 4793 + }, + { + "epoch": 6.766270514997171, + "grad_norm": 0.5184458156430881, + "learning_rate": 1.388464368156095e-07, + "loss": 0.1473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0759461522102356, + "step": 5980, + "valid_targets_mean": 5911.5, + "valid_targets_min": 4588 + }, + { + "epoch": 6.771929824561403, + "grad_norm": 0.41738433888647436, + "learning_rate": 1.322897787576105e-07, + "loss": 0.1338, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06279043108224869, + "step": 5985, + "valid_targets_mean": 6948.5, + "valid_targets_min": 5388 + }, + { + "epoch": 6.777589134125637, + "grad_norm": 0.4799905628145097, + "learning_rate": 1.2589118465566875e-07, + "loss": 0.1336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0820792019367218, + "step": 5990, + "valid_targets_mean": 6336.5, + "valid_targets_min": 3666 + }, + { + "epoch": 6.78324844368987, + "grad_norm": 0.472587166851982, + "learning_rate": 1.1965070541585912e-07, + "loss": 0.1547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0906020849943161, + "step": 5995, + "valid_targets_mean": 7032.6, + "valid_targets_min": 5077 + }, + { + "epoch": 6.788907753254103, + "grad_norm": 0.500035985881612, + "learning_rate": 1.1356839068632053e-07, + "loss": 0.1545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08360623568296432, + "step": 6000, + "valid_targets_mean": 5717.2, + "valid_targets_min": 3663 + }, + { + "epoch": 6.794567062818336, + "grad_norm": 0.5092396065505584, + "learning_rate": 1.0764428885686073e-07, + "loss": 0.1438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0701868087053299, + "step": 6005, + "valid_targets_mean": 5608.9, + "valid_targets_min": 4105 + }, + { + "epoch": 6.8002263723825696, + "grad_norm": 0.46521670144612376, + "learning_rate": 1.0187844705857875e-07, + "loss": 0.1422, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07969950884580612, + "step": 6010, + "valid_targets_mean": 7618.5, + "valid_targets_min": 5201 + }, + { + "epoch": 6.805885681946802, + "grad_norm": 0.47901653225949864, + "learning_rate": 9.627091116348076e-08, + "loss": 0.1373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08053254336118698, + "step": 6015, + "valid_targets_mean": 6253.0, + "valid_targets_min": 5069 + }, + { + "epoch": 6.8115449915110355, + "grad_norm": 0.4426284149369684, + "learning_rate": 9.082172578412263e-08, + "loss": 0.1437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06579966843128204, + "step": 6020, + "valid_targets_mean": 5635.0, + "valid_targets_min": 4729 + }, + { + "epoch": 6.817204301075269, + "grad_norm": 0.4722004210289814, + "learning_rate": 8.553093427325243e-08, + "loss": 0.1521, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07432159781455994, + "step": 6025, + "valid_targets_mean": 5936.4, + "valid_targets_min": 4567 + }, + { + "epoch": 6.8228636106395015, + "grad_norm": 0.4492725967919617, + "learning_rate": 8.039857872345736e-08, + "loss": 0.1379, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06669019162654877, + "step": 6030, + "valid_targets_mean": 6021.9, + "valid_targets_min": 4464 + }, + { + "epoch": 6.828522920203735, + "grad_norm": 0.6467284197571562, + "learning_rate": 7.542469996684843e-08, + "loss": 0.1538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06998852640390396, + "step": 6035, + "valid_targets_mean": 5414.9, + "valid_targets_min": 4908 + }, + { + "epoch": 6.834182229767968, + "grad_norm": 0.4202759599615148, + "learning_rate": 7.06093375747141e-08, + "loss": 0.1359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.061411675065755844, + "step": 6040, + "valid_targets_mean": 5470.8, + "valid_targets_min": 4443 + }, + { + "epoch": 6.839841539332202, + "grad_norm": 0.4423730917992716, + "learning_rate": 6.595252985721834e-08, + "loss": 0.1235, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06360609829425812, + "step": 6045, + "valid_targets_mean": 6542.5, + "valid_targets_min": 3381 + }, + { + "epoch": 6.845500848896434, + "grad_norm": 0.4700795314852806, + "learning_rate": 6.145431386309186e-08, + "loss": 0.1291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06548164784908295, + "step": 6050, + "valid_targets_mean": 6168.5, + "valid_targets_min": 4790 + }, + { + "epoch": 6.851160158460668, + "grad_norm": 0.4718326978944311, + "learning_rate": 5.711472537933693e-08, + "loss": 0.1313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06408711522817612, + "step": 6055, + "valid_targets_mean": 6507.5, + "valid_targets_min": 5161 + }, + { + "epoch": 6.856819468024901, + "grad_norm": 0.47208225963903605, + "learning_rate": 5.293379893094752e-08, + "loss": 0.146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06727685034275055, + "step": 6060, + "valid_targets_mean": 6376.0, + "valid_targets_min": 4491 + }, + { + "epoch": 6.862478777589134, + "grad_norm": 0.6253433685183312, + "learning_rate": 4.891156778062734e-08, + "loss": 0.1723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08305980265140533, + "step": 6065, + "valid_targets_mean": 2179.4, + "valid_targets_min": 817 + }, + { + "epoch": 6.868138087153367, + "grad_norm": 0.4686566653341741, + "learning_rate": 4.5048063928527785e-08, + "loss": 0.1578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06473400443792343, + "step": 6070, + "valid_targets_mean": 5207.4, + "valid_targets_min": 3893 + }, + { + "epoch": 6.873797396717601, + "grad_norm": 0.4420292203905296, + "learning_rate": 4.134331811199932e-08, + "loss": 0.138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07685883343219757, + "step": 6075, + "valid_targets_mean": 6132.4, + "valid_targets_min": 4622 + }, + { + "epoch": 6.879456706281833, + "grad_norm": 0.46238066941479405, + "learning_rate": 3.7797359805333836e-08, + "loss": 0.1455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07107360661029816, + "step": 6080, + "valid_targets_mean": 7527.8, + "valid_targets_min": 6108 + }, + { + "epoch": 6.885116015846067, + "grad_norm": 0.4620714624124573, + "learning_rate": 3.441021721954485e-08, + "loss": 0.15, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07760952413082123, + "step": 6085, + "valid_targets_mean": 6656.1, + "valid_targets_min": 4253 + }, + { + "epoch": 6.8907753254103, + "grad_norm": 0.45090978086774036, + "learning_rate": 3.11819173021366e-08, + "loss": 0.1575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07610969245433807, + "step": 6090, + "valid_targets_mean": 6820.0, + "valid_targets_min": 4990 + }, + { + "epoch": 6.896434634974533, + "grad_norm": 0.48299963137500884, + "learning_rate": 2.8112485736881967e-08, + "loss": 0.1485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09029359370470047, + "step": 6095, + "valid_targets_mean": 6825.6, + "valid_targets_min": 4678 + }, + { + "epoch": 6.902093944538766, + "grad_norm": 0.4755841953185394, + "learning_rate": 2.520194694363376e-08, + "loss": 0.1502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07689206302165985, + "step": 6100, + "valid_targets_mean": 6218.2, + "valid_targets_min": 4775 + }, + { + "epoch": 6.907753254103, + "grad_norm": 0.4191527961377397, + "learning_rate": 2.2450324078120423e-08, + "loss": 0.1376, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05522661283612251, + "step": 6105, + "valid_targets_mean": 5699.0, + "valid_targets_min": 4098 + }, + { + "epoch": 6.913412563667233, + "grad_norm": 0.42766273620138967, + "learning_rate": 1.9857639031759522e-08, + "loss": 0.1371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07987165451049805, + "step": 6110, + "valid_targets_mean": 6155.6, + "valid_targets_min": 4801 + }, + { + "epoch": 6.9190718732314656, + "grad_norm": 0.506355717163856, + "learning_rate": 1.7423912431489e-08, + "loss": 0.1872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1834990680217743, + "step": 6115, + "valid_targets_mean": 5976.9, + "valid_targets_min": 4734 + }, + { + "epoch": 6.924731182795699, + "grad_norm": 0.4748706448466781, + "learning_rate": 1.51491636396095e-08, + "loss": 0.1477, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0730670690536499, + "step": 6120, + "valid_targets_mean": 5994.2, + "valid_targets_min": 4400 + }, + { + "epoch": 6.930390492359932, + "grad_norm": 0.547452357346653, + "learning_rate": 1.3033410753608977e-08, + "loss": 0.1389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07722797244787216, + "step": 6125, + "valid_targets_mean": 6467.8, + "valid_targets_min": 4919 + }, + { + "epoch": 6.936049801924165, + "grad_norm": 0.4801052149519746, + "learning_rate": 1.1076670606045004e-08, + "loss": 0.1472, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08595552295446396, + "step": 6130, + "valid_targets_mean": 6262.5, + "valid_targets_min": 4611 + }, + { + "epoch": 6.941709111488398, + "grad_norm": 0.46326567360516374, + "learning_rate": 9.278958764391554e-09, + "loss": 0.1473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06435564160346985, + "step": 6135, + "valid_targets_mean": 6272.1, + "valid_targets_min": 2275 + }, + { + "epoch": 6.947368421052632, + "grad_norm": 0.41730847822702216, + "learning_rate": 7.64028953092133e-09, + "loss": 0.1285, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06592851132154465, + "step": 6140, + "valid_targets_mean": 6114.8, + "valid_targets_min": 4826 + }, + { + "epoch": 6.953027730616864, + "grad_norm": 0.4360068854322992, + "learning_rate": 6.16067594259695e-09, + "loss": 0.1455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06857221573591232, + "step": 6145, + "valid_targets_mean": 6942.9, + "valid_targets_min": 4707 + }, + { + "epoch": 6.958687040181098, + "grad_norm": 0.4435183128729601, + "learning_rate": 4.840129770957713e-09, + "loss": 0.1347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07593577355146408, + "step": 6150, + "valid_targets_mean": 7121.4, + "valid_targets_min": 5421 + }, + { + "epoch": 6.964346349745331, + "grad_norm": 0.46723780592510317, + "learning_rate": 3.6786615220352208e-09, + "loss": 0.1389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0715329721570015, + "step": 6155, + "valid_targets_mean": 7213.6, + "valid_targets_min": 5440 + }, + { + "epoch": 6.970005659309564, + "grad_norm": 0.4849520918424895, + "learning_rate": 2.6762804362623353e-09, + "loss": 0.1384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07289247214794159, + "step": 6160, + "valid_targets_mean": 6386.6, + "valid_targets_min": 4831 + }, + { + "epoch": 6.975664968873797, + "grad_norm": 0.4556168581750176, + "learning_rate": 1.8329944884021288e-09, + "loss": 0.1449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08725398033857346, + "step": 6165, + "valid_targets_mean": 6396.2, + "valid_targets_min": 4018 + }, + { + "epoch": 6.981324278438031, + "grad_norm": 0.5425828130369534, + "learning_rate": 1.1488103874923717e-09, + "loss": 0.1355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05385049059987068, + "step": 6170, + "valid_targets_mean": 6891.8, + "valid_targets_min": 5034 + }, + { + "epoch": 6.986983588002264, + "grad_norm": 0.4817068062993981, + "learning_rate": 6.237335767744767e-10, + "loss": 0.1413, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08153702318668365, + "step": 6175, + "valid_targets_mean": 6423.0, + "valid_targets_min": 4682 + }, + { + "epoch": 6.992642897566497, + "grad_norm": 0.4865598353763302, + "learning_rate": 2.577682336690757e-10, + "loss": 0.1537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0725170373916626, + "step": 6180, + "valid_targets_mean": 5597.6, + "valid_targets_min": 4764 + }, + { + "epoch": 6.99830220713073, + "grad_norm": 0.5462954679245822, + "learning_rate": 5.091726972938915e-11, + "loss": 0.1527, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07812730967998505, + "step": 6185, + "valid_targets_mean": 4990.0, + "valid_targets_min": 2481 + }, + { + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1196058988571167, + "step": 6187, + "total_flos": 3.0358571973355766e+18, + "train_loss": 0.05744029661266912, + "train_runtime": 85306.5691, + "train_samples_per_second": 1.16, + "train_steps_per_second": 0.073, + "valid_targets_mean": 5778.9, + "valid_targets_min": 4371 + } + ], + "logging_steps": 5, + "max_steps": 6188, + "num_input_tokens_seen": 0, + "num_train_epochs": 7, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.0358571973355766e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}