diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,8297 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 6.0, + "eval_steps": 500, + "global_step": 3750, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.008, + "grad_norm": 8.70977307694482, + "learning_rate": 4.266666666666667e-07, + "loss": 0.8749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.8716846108436584, + "step": 5, + "valid_targets_mean": 2538.1, + "valid_targets_min": 314 + }, + { + "epoch": 0.016, + "grad_norm": 10.29090145396382, + "learning_rate": 9.600000000000001e-07, + "loss": 0.8863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.9336282014846802, + "step": 10, + "valid_targets_mean": 1696.2, + "valid_targets_min": 296 + }, + { + "epoch": 0.024, + "grad_norm": 7.5750029640791725, + "learning_rate": 1.4933333333333336e-06, + "loss": 0.8563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.8610094785690308, + "step": 15, + "valid_targets_mean": 2235.9, + "valid_targets_min": 420 + }, + { + "epoch": 0.032, + "grad_norm": 6.433424219734035, + "learning_rate": 2.0266666666666666e-06, + "loss": 0.7908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.8827441930770874, + "step": 20, + "valid_targets_mean": 2079.4, + "valid_targets_min": 286 + }, + { + "epoch": 0.04, + "grad_norm": 5.074760879549001, + "learning_rate": 2.56e-06, + "loss": 0.8504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.819884181022644, + "step": 25, + "valid_targets_mean": 1559.2, + "valid_targets_min": 286 + }, + { + "epoch": 0.048, + "grad_norm": 2.712799636966812, + "learning_rate": 3.093333333333334e-06, + "loss": 0.7851, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6933596134185791, + "step": 30, + "valid_targets_mean": 2028.6, + "valid_targets_min": 463 + }, + { + "epoch": 0.056, + "grad_norm": 2.0373406539066146, + "learning_rate": 3.6266666666666674e-06, + "loss": 0.7479, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.71419358253479, + "step": 35, + "valid_targets_mean": 2210.4, + "valid_targets_min": 617 + }, + { + "epoch": 0.064, + "grad_norm": 1.4068779549549064, + "learning_rate": 4.16e-06, + "loss": 0.745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6900101900100708, + "step": 40, + "valid_targets_mean": 1853.0, + "valid_targets_min": 412 + }, + { + "epoch": 0.072, + "grad_norm": 1.1097745676204722, + "learning_rate": 4.693333333333334e-06, + "loss": 0.6973, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6602718830108643, + "step": 45, + "valid_targets_mean": 2335.8, + "valid_targets_min": 345 + }, + { + "epoch": 0.08, + "grad_norm": 1.470642358348425, + "learning_rate": 5.226666666666667e-06, + "loss": 0.6999, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.8183687925338745, + "step": 50, + "valid_targets_mean": 1520.8, + "valid_targets_min": 285 + }, + { + "epoch": 0.088, + "grad_norm": 1.0526714033415538, + "learning_rate": 5.76e-06, + "loss": 0.6424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5843245983123779, + "step": 55, + "valid_targets_mean": 1541.6, + "valid_targets_min": 238 + }, + { + "epoch": 0.096, + "grad_norm": 0.9623188767849996, + "learning_rate": 6.293333333333334e-06, + "loss": 0.6522, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.660775363445282, + "step": 60, + "valid_targets_mean": 1922.9, + "valid_targets_min": 276 + }, + { + "epoch": 0.104, + "grad_norm": 0.8623555129729585, + "learning_rate": 6.826666666666667e-06, + "loss": 0.642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6567976474761963, + "step": 65, + "valid_targets_mean": 1989.9, + "valid_targets_min": 280 + }, + { + "epoch": 0.112, + "grad_norm": 0.7431652162692693, + "learning_rate": 7.360000000000001e-06, + "loss": 0.6009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6057322025299072, + "step": 70, + "valid_targets_mean": 2507.4, + "valid_targets_min": 358 + }, + { + "epoch": 0.12, + "grad_norm": 0.8529808791667168, + "learning_rate": 7.893333333333335e-06, + "loss": 0.648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6728823781013489, + "step": 75, + "valid_targets_mean": 1934.2, + "valid_targets_min": 369 + }, + { + "epoch": 0.128, + "grad_norm": 0.8778812689341119, + "learning_rate": 8.426666666666667e-06, + "loss": 0.597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6438958048820496, + "step": 80, + "valid_targets_mean": 1775.9, + "valid_targets_min": 312 + }, + { + "epoch": 0.136, + "grad_norm": 0.77264016634592, + "learning_rate": 8.96e-06, + "loss": 0.5859, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.626758873462677, + "step": 85, + "valid_targets_mean": 2020.9, + "valid_targets_min": 281 + }, + { + "epoch": 0.144, + "grad_norm": 0.7706519383923865, + "learning_rate": 9.493333333333334e-06, + "loss": 0.5884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5779508948326111, + "step": 90, + "valid_targets_mean": 1888.1, + "valid_targets_min": 263 + }, + { + "epoch": 0.152, + "grad_norm": 0.8275613861014995, + "learning_rate": 1.0026666666666667e-05, + "loss": 0.5942, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5887092351913452, + "step": 95, + "valid_targets_mean": 1662.2, + "valid_targets_min": 390 + }, + { + "epoch": 0.16, + "grad_norm": 0.6530661253948835, + "learning_rate": 1.056e-05, + "loss": 0.5709, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5566831827163696, + "step": 100, + "valid_targets_mean": 2530.6, + "valid_targets_min": 752 + }, + { + "epoch": 0.168, + "grad_norm": 0.6342642892399393, + "learning_rate": 1.1093333333333334e-05, + "loss": 0.5734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5590141415596008, + "step": 105, + "valid_targets_mean": 3199.7, + "valid_targets_min": 1446 + }, + { + "epoch": 0.176, + "grad_norm": 0.7503371762675456, + "learning_rate": 1.1626666666666668e-05, + "loss": 0.5718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5699026584625244, + "step": 110, + "valid_targets_mean": 2242.4, + "valid_targets_min": 298 + }, + { + "epoch": 0.184, + "grad_norm": 0.8004502688005221, + "learning_rate": 1.216e-05, + "loss": 0.5585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.48529720306396484, + "step": 115, + "valid_targets_mean": 1892.6, + "valid_targets_min": 395 + }, + { + "epoch": 0.192, + "grad_norm": 0.8162220821968871, + "learning_rate": 1.2693333333333336e-05, + "loss": 0.5403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.584542453289032, + "step": 120, + "valid_targets_mean": 2136.8, + "valid_targets_min": 271 + }, + { + "epoch": 0.2, + "grad_norm": 0.8057151061738206, + "learning_rate": 1.3226666666666668e-05, + "loss": 0.5595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5924099683761597, + "step": 125, + "valid_targets_mean": 1891.8, + "valid_targets_min": 271 + }, + { + "epoch": 0.208, + "grad_norm": 0.6095564722536361, + "learning_rate": 1.376e-05, + "loss": 0.5393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4800427556037903, + "step": 130, + "valid_targets_mean": 2730.5, + "valid_targets_min": 434 + }, + { + "epoch": 0.216, + "grad_norm": 0.8246272693017236, + "learning_rate": 1.4293333333333334e-05, + "loss": 0.558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6110150814056396, + "step": 135, + "valid_targets_mean": 1948.5, + "valid_targets_min": 330 + }, + { + "epoch": 0.224, + "grad_norm": 1.0235585826027744, + "learning_rate": 1.4826666666666666e-05, + "loss": 0.5627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6028550863265991, + "step": 140, + "valid_targets_mean": 1263.0, + "valid_targets_min": 255 + }, + { + "epoch": 0.232, + "grad_norm": 0.7397315844998203, + "learning_rate": 1.5360000000000002e-05, + "loss": 0.5543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5466543436050415, + "step": 145, + "valid_targets_mean": 2054.7, + "valid_targets_min": 541 + }, + { + "epoch": 0.24, + "grad_norm": 0.6654060201581206, + "learning_rate": 1.5893333333333333e-05, + "loss": 0.5127, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5320032835006714, + "step": 150, + "valid_targets_mean": 2859.1, + "valid_targets_min": 356 + }, + { + "epoch": 0.248, + "grad_norm": 0.8143742586812208, + "learning_rate": 1.642666666666667e-05, + "loss": 0.525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5205363035202026, + "step": 155, + "valid_targets_mean": 1714.2, + "valid_targets_min": 288 + }, + { + "epoch": 0.256, + "grad_norm": 0.7375443100560095, + "learning_rate": 1.696e-05, + "loss": 0.4991, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.48814937472343445, + "step": 160, + "valid_targets_mean": 2070.8, + "valid_targets_min": 203 + }, + { + "epoch": 0.264, + "grad_norm": 0.8228320965756343, + "learning_rate": 1.7493333333333334e-05, + "loss": 0.5455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5349533557891846, + "step": 165, + "valid_targets_mean": 1732.5, + "valid_targets_min": 348 + }, + { + "epoch": 0.272, + "grad_norm": 0.7833507734682047, + "learning_rate": 1.8026666666666668e-05, + "loss": 0.5186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5089653730392456, + "step": 170, + "valid_targets_mean": 1734.4, + "valid_targets_min": 462 + }, + { + "epoch": 0.28, + "grad_norm": 0.7264195058692751, + "learning_rate": 1.8560000000000002e-05, + "loss": 0.5339, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5141239762306213, + "step": 175, + "valid_targets_mean": 2259.6, + "valid_targets_min": 269 + }, + { + "epoch": 0.288, + "grad_norm": 0.8104091421121233, + "learning_rate": 1.9093333333333336e-05, + "loss": 0.5157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5261850357055664, + "step": 180, + "valid_targets_mean": 1862.8, + "valid_targets_min": 327 + }, + { + "epoch": 0.296, + "grad_norm": 0.6983796654352136, + "learning_rate": 1.9626666666666666e-05, + "loss": 0.493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5028804540634155, + "step": 185, + "valid_targets_mean": 2254.9, + "valid_targets_min": 417 + }, + { + "epoch": 0.304, + "grad_norm": 0.6419801158526326, + "learning_rate": 2.016e-05, + "loss": 0.5295, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4556776285171509, + "step": 190, + "valid_targets_mean": 2651.4, + "valid_targets_min": 904 + }, + { + "epoch": 0.312, + "grad_norm": 0.7843457544488607, + "learning_rate": 2.0693333333333334e-05, + "loss": 0.4888, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5008721947669983, + "step": 195, + "valid_targets_mean": 1968.2, + "valid_targets_min": 310 + }, + { + "epoch": 0.32, + "grad_norm": 0.7812164368583617, + "learning_rate": 2.1226666666666668e-05, + "loss": 0.5145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5007650852203369, + "step": 200, + "valid_targets_mean": 1982.9, + "valid_targets_min": 309 + }, + { + "epoch": 0.328, + "grad_norm": 0.7936202291339606, + "learning_rate": 2.1760000000000002e-05, + "loss": 0.513, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5165877938270569, + "step": 205, + "valid_targets_mean": 1944.8, + "valid_targets_min": 411 + }, + { + "epoch": 0.336, + "grad_norm": 0.7946620651898735, + "learning_rate": 2.2293333333333336e-05, + "loss": 0.4933, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5367198586463928, + "step": 210, + "valid_targets_mean": 1912.6, + "valid_targets_min": 303 + }, + { + "epoch": 0.344, + "grad_norm": 0.9145904545210979, + "learning_rate": 2.282666666666667e-05, + "loss": 0.5184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5727460384368896, + "step": 215, + "valid_targets_mean": 1951.7, + "valid_targets_min": 308 + }, + { + "epoch": 0.352, + "grad_norm": 1.003545549198533, + "learning_rate": 2.336e-05, + "loss": 0.5346, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.550015389919281, + "step": 220, + "valid_targets_mean": 1315.6, + "valid_targets_min": 237 + }, + { + "epoch": 0.36, + "grad_norm": 0.7238181904792715, + "learning_rate": 2.3893333333333337e-05, + "loss": 0.4984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4611344337463379, + "step": 225, + "valid_targets_mean": 2388.9, + "valid_targets_min": 282 + }, + { + "epoch": 0.368, + "grad_norm": 0.9173238345035478, + "learning_rate": 2.442666666666667e-05, + "loss": 0.5142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5616511702537537, + "step": 230, + "valid_targets_mean": 1766.1, + "valid_targets_min": 278 + }, + { + "epoch": 0.376, + "grad_norm": 0.6478598502052655, + "learning_rate": 2.496e-05, + "loss": 0.4951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4578341841697693, + "step": 235, + "valid_targets_mean": 2682.2, + "valid_targets_min": 432 + }, + { + "epoch": 0.384, + "grad_norm": 0.6938568409381498, + "learning_rate": 2.5493333333333335e-05, + "loss": 0.507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.49386435747146606, + "step": 240, + "valid_targets_mean": 2382.9, + "valid_targets_min": 500 + }, + { + "epoch": 0.392, + "grad_norm": 0.7557868859907161, + "learning_rate": 2.6026666666666666e-05, + "loss": 0.5027, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5205134749412537, + "step": 245, + "valid_targets_mean": 2112.2, + "valid_targets_min": 494 + }, + { + "epoch": 0.4, + "grad_norm": 0.8093682610285722, + "learning_rate": 2.6560000000000003e-05, + "loss": 0.5073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5215206742286682, + "step": 250, + "valid_targets_mean": 2039.8, + "valid_targets_min": 324 + }, + { + "epoch": 0.408, + "grad_norm": 0.7340752683942827, + "learning_rate": 2.7093333333333337e-05, + "loss": 0.5003, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.47431081533432007, + "step": 255, + "valid_targets_mean": 2196.7, + "valid_targets_min": 318 + }, + { + "epoch": 0.416, + "grad_norm": 0.6939008102591058, + "learning_rate": 2.7626666666666668e-05, + "loss": 0.5009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4682499170303345, + "step": 260, + "valid_targets_mean": 2425.0, + "valid_targets_min": 390 + }, + { + "epoch": 0.424, + "grad_norm": 0.6863977549436804, + "learning_rate": 2.816e-05, + "loss": 0.4787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.47333863377571106, + "step": 265, + "valid_targets_mean": 2685.8, + "valid_targets_min": 312 + }, + { + "epoch": 0.432, + "grad_norm": 0.8812971208838777, + "learning_rate": 2.869333333333334e-05, + "loss": 0.4723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.49015623331069946, + "step": 270, + "valid_targets_mean": 1530.0, + "valid_targets_min": 313 + }, + { + "epoch": 0.44, + "grad_norm": 0.9077218939101549, + "learning_rate": 2.922666666666667e-05, + "loss": 0.4884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4787595868110657, + "step": 275, + "valid_targets_mean": 1883.8, + "valid_targets_min": 297 + }, + { + "epoch": 0.448, + "grad_norm": 0.7582934364503591, + "learning_rate": 2.9760000000000003e-05, + "loss": 0.4814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4282434582710266, + "step": 280, + "valid_targets_mean": 2080.2, + "valid_targets_min": 324 + }, + { + "epoch": 0.456, + "grad_norm": 0.7647736018820112, + "learning_rate": 3.0293333333333334e-05, + "loss": 0.4783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.49454984068870544, + "step": 285, + "valid_targets_mean": 2026.6, + "valid_targets_min": 299 + }, + { + "epoch": 0.464, + "grad_norm": 0.6696963147508204, + "learning_rate": 3.0826666666666674e-05, + "loss": 0.4755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4329226613044739, + "step": 290, + "valid_targets_mean": 2336.9, + "valid_targets_min": 377 + }, + { + "epoch": 0.472, + "grad_norm": 0.6655141023286056, + "learning_rate": 3.1360000000000005e-05, + "loss": 0.4556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43723297119140625, + "step": 295, + "valid_targets_mean": 2561.8, + "valid_targets_min": 294 + }, + { + "epoch": 0.48, + "grad_norm": 0.8005479126868438, + "learning_rate": 3.1893333333333335e-05, + "loss": 0.4587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4513320028781891, + "step": 300, + "valid_targets_mean": 1946.9, + "valid_targets_min": 304 + }, + { + "epoch": 0.488, + "grad_norm": 0.9080116879046022, + "learning_rate": 3.2426666666666666e-05, + "loss": 0.4923, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.47190746665000916, + "step": 305, + "valid_targets_mean": 2113.0, + "valid_targets_min": 257 + }, + { + "epoch": 0.496, + "grad_norm": 0.6765207104829097, + "learning_rate": 3.296e-05, + "loss": 0.4641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4598078727722168, + "step": 310, + "valid_targets_mean": 2466.9, + "valid_targets_min": 380 + }, + { + "epoch": 0.504, + "grad_norm": 0.6903161847908216, + "learning_rate": 3.349333333333334e-05, + "loss": 0.5024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.46894365549087524, + "step": 315, + "valid_targets_mean": 2686.0, + "valid_targets_min": 354 + }, + { + "epoch": 0.512, + "grad_norm": 0.7016409822842518, + "learning_rate": 3.402666666666667e-05, + "loss": 0.478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.47489529848098755, + "step": 320, + "valid_targets_mean": 2268.2, + "valid_targets_min": 241 + }, + { + "epoch": 0.52, + "grad_norm": 0.7043729607954898, + "learning_rate": 3.456e-05, + "loss": 0.4812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4892638921737671, + "step": 325, + "valid_targets_mean": 2165.1, + "valid_targets_min": 300 + }, + { + "epoch": 0.528, + "grad_norm": 0.6909733881278005, + "learning_rate": 3.509333333333333e-05, + "loss": 0.4896, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4769917130470276, + "step": 330, + "valid_targets_mean": 2164.1, + "valid_targets_min": 318 + }, + { + "epoch": 0.536, + "grad_norm": 0.6824644579299378, + "learning_rate": 3.562666666666667e-05, + "loss": 0.4803, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4844236969947815, + "step": 335, + "valid_targets_mean": 2134.9, + "valid_targets_min": 323 + }, + { + "epoch": 0.544, + "grad_norm": 0.6650792790166427, + "learning_rate": 3.6160000000000006e-05, + "loss": 0.4832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4570949971675873, + "step": 340, + "valid_targets_mean": 2403.1, + "valid_targets_min": 235 + }, + { + "epoch": 0.552, + "grad_norm": 0.7510187405765582, + "learning_rate": 3.669333333333334e-05, + "loss": 0.4781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.47128066420555115, + "step": 345, + "valid_targets_mean": 1849.6, + "valid_targets_min": 364 + }, + { + "epoch": 0.56, + "grad_norm": 0.7583981476824141, + "learning_rate": 3.722666666666667e-05, + "loss": 0.4677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4223896861076355, + "step": 350, + "valid_targets_mean": 1775.6, + "valid_targets_min": 296 + }, + { + "epoch": 0.568, + "grad_norm": 0.786928642756561, + "learning_rate": 3.7760000000000004e-05, + "loss": 0.5033, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5244324207305908, + "step": 355, + "valid_targets_mean": 1991.3, + "valid_targets_min": 321 + }, + { + "epoch": 0.576, + "grad_norm": 0.7159975876945588, + "learning_rate": 3.8293333333333335e-05, + "loss": 0.456, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.47184303402900696, + "step": 360, + "valid_targets_mean": 2140.4, + "valid_targets_min": 363 + }, + { + "epoch": 0.584, + "grad_norm": 0.730639760598974, + "learning_rate": 3.882666666666667e-05, + "loss": 0.4611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4932243525981903, + "step": 365, + "valid_targets_mean": 2141.9, + "valid_targets_min": 386 + }, + { + "epoch": 0.592, + "grad_norm": 0.8175085697300658, + "learning_rate": 3.936e-05, + "loss": 0.4931, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.49952882528305054, + "step": 370, + "valid_targets_mean": 1697.2, + "valid_targets_min": 457 + }, + { + "epoch": 0.6, + "grad_norm": 0.6454425281547672, + "learning_rate": 3.989333333333333e-05, + "loss": 0.4682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43333718180656433, + "step": 375, + "valid_targets_mean": 2390.2, + "valid_targets_min": 470 + }, + { + "epoch": 0.608, + "grad_norm": 0.8248756022257087, + "learning_rate": 3.9999861365387784e-05, + "loss": 0.4802, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5221852660179138, + "step": 380, + "valid_targets_mean": 1886.6, + "valid_targets_min": 316 + }, + { + "epoch": 0.616, + "grad_norm": 0.7211999065193525, + "learning_rate": 3.9999298165569614e-05, + "loss": 0.4611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4462135434150696, + "step": 385, + "valid_targets_mean": 2022.9, + "valid_targets_min": 315 + }, + { + "epoch": 0.624, + "grad_norm": 0.7200019231629534, + "learning_rate": 3.999830174807269e-05, + "loss": 0.453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.47809314727783203, + "step": 390, + "valid_targets_mean": 1950.5, + "valid_targets_min": 319 + }, + { + "epoch": 0.632, + "grad_norm": 0.6858645868739105, + "learning_rate": 3.9996872134481036e-05, + "loss": 0.4651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4543530344963074, + "step": 395, + "valid_targets_mean": 1936.9, + "valid_targets_min": 341 + }, + { + "epoch": 0.64, + "grad_norm": 0.6573647289491387, + "learning_rate": 3.999500935576245e-05, + "loss": 0.4813, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4598807692527771, + "step": 400, + "valid_targets_mean": 2493.0, + "valid_targets_min": 398 + }, + { + "epoch": 0.648, + "grad_norm": 0.6225096702898597, + "learning_rate": 3.999271345226776e-05, + "loss": 0.447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44423946738243103, + "step": 405, + "valid_targets_mean": 2727.8, + "valid_targets_min": 455 + }, + { + "epoch": 0.656, + "grad_norm": 0.7976543700191413, + "learning_rate": 3.9989984473730035e-05, + "loss": 0.465, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4622921943664551, + "step": 410, + "valid_targets_mean": 1817.0, + "valid_targets_min": 254 + }, + { + "epoch": 0.664, + "grad_norm": 0.6816915744176464, + "learning_rate": 3.998682247926343e-05, + "loss": 0.4608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.457396000623703, + "step": 415, + "valid_targets_mean": 2165.4, + "valid_targets_min": 363 + }, + { + "epoch": 0.672, + "grad_norm": 0.7294754263312385, + "learning_rate": 3.998322753736193e-05, + "loss": 0.4713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43845027685165405, + "step": 420, + "valid_targets_mean": 1932.0, + "valid_targets_min": 295 + }, + { + "epoch": 0.68, + "grad_norm": 0.7554261488577307, + "learning_rate": 3.99791997258979e-05, + "loss": 0.4667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44777369499206543, + "step": 425, + "valid_targets_mean": 1979.6, + "valid_targets_min": 330 + }, + { + "epoch": 0.688, + "grad_norm": 0.7039021930583653, + "learning_rate": 3.997473913212036e-05, + "loss": 0.4629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4474635720252991, + "step": 430, + "valid_targets_mean": 1963.6, + "valid_targets_min": 337 + }, + { + "epoch": 0.696, + "grad_norm": 0.6450602000693427, + "learning_rate": 3.9969845852653087e-05, + "loss": 0.438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4050423204898834, + "step": 435, + "valid_targets_mean": 1908.8, + "valid_targets_min": 220 + }, + { + "epoch": 0.704, + "grad_norm": 0.7302175415037188, + "learning_rate": 3.996451999349258e-05, + "loss": 0.4649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.46385928988456726, + "step": 440, + "valid_targets_mean": 1720.4, + "valid_targets_min": 262 + }, + { + "epoch": 0.712, + "grad_norm": 0.699254201843429, + "learning_rate": 3.995876167000569e-05, + "loss": 0.4647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43578875064849854, + "step": 445, + "valid_targets_mean": 1768.2, + "valid_targets_min": 283 + }, + { + "epoch": 0.72, + "grad_norm": 0.6980324289594738, + "learning_rate": 3.9952571006927186e-05, + "loss": 0.4622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4525895416736603, + "step": 450, + "valid_targets_mean": 1971.2, + "valid_targets_min": 274 + }, + { + "epoch": 0.728, + "grad_norm": 0.6406310518930854, + "learning_rate": 3.9945948138356995e-05, + "loss": 0.4376, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42803841829299927, + "step": 455, + "valid_targets_mean": 2110.1, + "valid_targets_min": 261 + }, + { + "epoch": 0.736, + "grad_norm": 0.6437606245360337, + "learning_rate": 3.993889320775735e-05, + "loss": 0.4465, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4538410007953644, + "step": 460, + "valid_targets_mean": 1945.8, + "valid_targets_min": 359 + }, + { + "epoch": 0.744, + "grad_norm": 0.7279341720087018, + "learning_rate": 3.9931406367949627e-05, + "loss": 0.4671, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.47880733013153076, + "step": 465, + "valid_targets_mean": 2008.7, + "valid_targets_min": 269 + }, + { + "epoch": 0.752, + "grad_norm": 0.8141387889761877, + "learning_rate": 3.9923487781111106e-05, + "loss": 0.4427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.46549275517463684, + "step": 470, + "valid_targets_mean": 1529.1, + "valid_targets_min": 259 + }, + { + "epoch": 0.76, + "grad_norm": 0.6542535470707088, + "learning_rate": 3.9915137618771386e-05, + "loss": 0.4447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42678263783454895, + "step": 475, + "valid_targets_mean": 1936.6, + "valid_targets_min": 334 + }, + { + "epoch": 0.768, + "grad_norm": 0.7842908199505598, + "learning_rate": 3.9906356061808713e-05, + "loss": 0.4576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.46588754653930664, + "step": 480, + "valid_targets_mean": 1561.8, + "valid_targets_min": 422 + }, + { + "epoch": 0.776, + "grad_norm": 0.6449123706635504, + "learning_rate": 3.9897143300446055e-05, + "loss": 0.4296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4231323301792145, + "step": 485, + "valid_targets_mean": 2438.3, + "valid_targets_min": 425 + }, + { + "epoch": 0.784, + "grad_norm": 0.6181873020030192, + "learning_rate": 3.988749953424696e-05, + "loss": 0.4855, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.45973435044288635, + "step": 490, + "valid_targets_mean": 2474.9, + "valid_targets_min": 365 + }, + { + "epoch": 0.792, + "grad_norm": 0.626473245513706, + "learning_rate": 3.9877424972111264e-05, + "loss": 0.4606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.48623594641685486, + "step": 495, + "valid_targets_mean": 2455.1, + "valid_targets_min": 561 + }, + { + "epoch": 0.8, + "grad_norm": 0.7003221204119682, + "learning_rate": 3.9866919832270554e-05, + "loss": 0.4389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40918204188346863, + "step": 500, + "valid_targets_mean": 1859.0, + "valid_targets_min": 296 + }, + { + "epoch": 0.808, + "grad_norm": 0.6135130789421956, + "learning_rate": 3.9855984342283414e-05, + "loss": 0.4553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41726401448249817, + "step": 505, + "valid_targets_mean": 2217.7, + "valid_targets_min": 288 + }, + { + "epoch": 0.816, + "grad_norm": 0.7300560333061654, + "learning_rate": 3.9844618739030545e-05, + "loss": 0.4498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4671459197998047, + "step": 510, + "valid_targets_mean": 1747.1, + "valid_targets_min": 345 + }, + { + "epoch": 0.824, + "grad_norm": 0.6494517318016304, + "learning_rate": 3.98328232687096e-05, + "loss": 0.4407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.454100102186203, + "step": 515, + "valid_targets_mean": 2495.7, + "valid_targets_min": 441 + }, + { + "epoch": 0.832, + "grad_norm": 0.7385801637729515, + "learning_rate": 3.982059818682986e-05, + "loss": 0.4672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.46154558658599854, + "step": 520, + "valid_targets_mean": 1561.5, + "valid_targets_min": 311 + }, + { + "epoch": 0.84, + "grad_norm": 0.7867067019795361, + "learning_rate": 3.980794375820669e-05, + "loss": 0.4906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4851241707801819, + "step": 525, + "valid_targets_mean": 1604.1, + "valid_targets_min": 255 + }, + { + "epoch": 0.848, + "grad_norm": 0.6227178686981287, + "learning_rate": 3.9794860256955825e-05, + "loss": 0.4589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4222499132156372, + "step": 530, + "valid_targets_mean": 2254.4, + "valid_targets_min": 254 + }, + { + "epoch": 0.856, + "grad_norm": 0.6856645208311387, + "learning_rate": 3.9781347966487415e-05, + "loss": 0.4511, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44948041439056396, + "step": 535, + "valid_targets_mean": 2073.2, + "valid_targets_min": 310 + }, + { + "epoch": 0.864, + "grad_norm": 0.7450304493429178, + "learning_rate": 3.9767407179499875e-05, + "loss": 0.4564, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.48208606243133545, + "step": 540, + "valid_targets_mean": 1782.9, + "valid_targets_min": 376 + }, + { + "epoch": 0.872, + "grad_norm": 0.7714772450276868, + "learning_rate": 3.975303819797358e-05, + "loss": 0.4817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5147331953048706, + "step": 545, + "valid_targets_mean": 1765.1, + "valid_targets_min": 404 + }, + { + "epoch": 0.88, + "grad_norm": 0.6262710369567205, + "learning_rate": 3.973824133316431e-05, + "loss": 0.4573, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39105427265167236, + "step": 550, + "valid_targets_mean": 2131.3, + "valid_targets_min": 312 + }, + { + "epoch": 0.888, + "grad_norm": 0.6110261473931465, + "learning_rate": 3.972301690559645e-05, + "loss": 0.4233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44210729002952576, + "step": 555, + "valid_targets_mean": 2380.4, + "valid_targets_min": 511 + }, + { + "epoch": 0.896, + "grad_norm": 0.6805369584457025, + "learning_rate": 3.970736524505615e-05, + "loss": 0.469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.45157796144485474, + "step": 560, + "valid_targets_mean": 2133.4, + "valid_targets_min": 315 + }, + { + "epoch": 0.904, + "grad_norm": 0.7184087914454619, + "learning_rate": 3.969128669058411e-05, + "loss": 0.4386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4871930181980133, + "step": 565, + "valid_targets_mean": 1956.6, + "valid_targets_min": 237 + }, + { + "epoch": 0.912, + "grad_norm": 0.632960168702108, + "learning_rate": 3.9674781590468256e-05, + "loss": 0.4482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4217160642147064, + "step": 570, + "valid_targets_mean": 2133.4, + "valid_targets_min": 330 + }, + { + "epoch": 0.92, + "grad_norm": 0.5716067570655475, + "learning_rate": 3.9657850302236184e-05, + "loss": 0.4669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39666199684143066, + "step": 575, + "valid_targets_mean": 2431.0, + "valid_targets_min": 435 + }, + { + "epoch": 0.928, + "grad_norm": 0.6450176003705785, + "learning_rate": 3.964049319264744e-05, + "loss": 0.4478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4384343922138214, + "step": 580, + "valid_targets_mean": 1996.9, + "valid_targets_min": 298 + }, + { + "epoch": 0.936, + "grad_norm": 0.7426898198991789, + "learning_rate": 3.962271063768555e-05, + "loss": 0.4738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5046041011810303, + "step": 585, + "valid_targets_mean": 1812.4, + "valid_targets_min": 400 + }, + { + "epoch": 0.944, + "grad_norm": 0.7252987868247023, + "learning_rate": 3.960450302254989e-05, + "loss": 0.4728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4665234088897705, + "step": 590, + "valid_targets_mean": 1873.8, + "valid_targets_min": 307 + }, + { + "epoch": 0.952, + "grad_norm": 0.5509677719198839, + "learning_rate": 3.958587074164735e-05, + "loss": 0.4272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42085975408554077, + "step": 595, + "valid_targets_mean": 2728.6, + "valid_targets_min": 384 + }, + { + "epoch": 0.96, + "grad_norm": 0.6683946205285337, + "learning_rate": 3.956681419858376e-05, + "loss": 0.4518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4690259099006653, + "step": 600, + "valid_targets_mean": 2017.5, + "valid_targets_min": 293 + }, + { + "epoch": 0.968, + "grad_norm": 0.6840253099847687, + "learning_rate": 3.954733380615516e-05, + "loss": 0.4495, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4364042282104492, + "step": 605, + "valid_targets_mean": 1879.7, + "valid_targets_min": 371 + }, + { + "epoch": 0.976, + "grad_norm": 0.6260511177922704, + "learning_rate": 3.95274299863389e-05, + "loss": 0.4462, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.45733919739723206, + "step": 610, + "valid_targets_mean": 2186.1, + "valid_targets_min": 386 + }, + { + "epoch": 0.984, + "grad_norm": 0.7134692669207365, + "learning_rate": 3.950710317028443e-05, + "loss": 0.4395, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.50779128074646, + "step": 615, + "valid_targets_mean": 2000.1, + "valid_targets_min": 327 + }, + { + "epoch": 0.992, + "grad_norm": 0.6892162511444004, + "learning_rate": 3.9486353798303996e-05, + "loss": 0.4426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4779299199581146, + "step": 620, + "valid_targets_mean": 1964.6, + "valid_targets_min": 356 + }, + { + "epoch": 1.0, + "grad_norm": 0.6515675698335721, + "learning_rate": 3.946518231986313e-05, + "loss": 0.4313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4520461857318878, + "step": 625, + "valid_targets_mean": 2148.9, + "valid_targets_min": 421 + }, + { + "epoch": 1.008, + "grad_norm": 0.6240250726279455, + "learning_rate": 3.9443589193570847e-05, + "loss": 0.4267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4049533009529114, + "step": 630, + "valid_targets_mean": 1969.0, + "valid_targets_min": 286 + }, + { + "epoch": 1.016, + "grad_norm": 0.6414416644443255, + "learning_rate": 3.942157488716976e-05, + "loss": 0.4356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4244895577430725, + "step": 635, + "valid_targets_mean": 2082.3, + "valid_targets_min": 324 + }, + { + "epoch": 1.024, + "grad_norm": 0.6241575168649688, + "learning_rate": 3.939913987752595e-05, + "loss": 0.4312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.408047080039978, + "step": 640, + "valid_targets_mean": 2315.9, + "valid_targets_min": 447 + }, + { + "epoch": 1.032, + "grad_norm": 0.5633755778877994, + "learning_rate": 3.9376284650618605e-05, + "loss": 0.4078, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39470237493515015, + "step": 645, + "valid_targets_mean": 2380.6, + "valid_targets_min": 426 + }, + { + "epoch": 1.04, + "grad_norm": 0.6439090216593515, + "learning_rate": 3.935300970152952e-05, + "loss": 0.4466, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43030333518981934, + "step": 650, + "valid_targets_mean": 1925.1, + "valid_targets_min": 271 + }, + { + "epoch": 1.048, + "grad_norm": 0.6413404699842615, + "learning_rate": 3.932931553443235e-05, + "loss": 0.4039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4175097346305847, + "step": 655, + "valid_targets_mean": 2288.7, + "valid_targets_min": 458 + }, + { + "epoch": 1.056, + "grad_norm": 0.7348898005193101, + "learning_rate": 3.930520266258173e-05, + "loss": 0.4354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4317247271537781, + "step": 660, + "valid_targets_mean": 1749.6, + "valid_targets_min": 285 + }, + { + "epoch": 1.064, + "grad_norm": 0.6213909552275446, + "learning_rate": 3.928067160830208e-05, + "loss": 0.4201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4198188781738281, + "step": 665, + "valid_targets_mean": 2220.9, + "valid_targets_min": 322 + }, + { + "epoch": 1.072, + "grad_norm": 0.7037747317197413, + "learning_rate": 3.925572290297638e-05, + "loss": 0.4307, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4192028343677521, + "step": 670, + "valid_targets_mean": 1968.8, + "valid_targets_min": 390 + }, + { + "epoch": 1.08, + "grad_norm": 0.7120042990513531, + "learning_rate": 3.9230357087034606e-05, + "loss": 0.4386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4463686943054199, + "step": 675, + "valid_targets_mean": 1814.4, + "valid_targets_min": 358 + }, + { + "epoch": 1.088, + "grad_norm": 0.5526479446110066, + "learning_rate": 3.9204574709942036e-05, + "loss": 0.4459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4031902849674225, + "step": 680, + "valid_targets_mean": 2819.1, + "valid_targets_min": 298 + }, + { + "epoch": 1.096, + "grad_norm": 0.6665996408067918, + "learning_rate": 3.917837633018734e-05, + "loss": 0.4074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43310776352882385, + "step": 685, + "valid_targets_mean": 2139.0, + "valid_targets_min": 429 + }, + { + "epoch": 1.104, + "grad_norm": 0.5995581856388195, + "learning_rate": 3.915176251527051e-05, + "loss": 0.4103, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3874262571334839, + "step": 690, + "valid_targets_mean": 2195.1, + "valid_targets_min": 311 + }, + { + "epoch": 1.112, + "grad_norm": 0.5356516953538547, + "learning_rate": 3.912473384169051e-05, + "loss": 0.437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4291180372238159, + "step": 695, + "valid_targets_mean": 2826.2, + "valid_targets_min": 297 + }, + { + "epoch": 1.12, + "grad_norm": 0.7220284112015432, + "learning_rate": 3.9097290894932866e-05, + "loss": 0.4364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4589844346046448, + "step": 700, + "valid_targets_mean": 2123.4, + "valid_targets_min": 381 + }, + { + "epoch": 1.1280000000000001, + "grad_norm": 0.5843467247568136, + "learning_rate": 3.906943426945691e-05, + "loss": 0.4298, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.45251959562301636, + "step": 705, + "valid_targets_mean": 2777.2, + "valid_targets_min": 554 + }, + { + "epoch": 1.1360000000000001, + "grad_norm": 0.5965008772450432, + "learning_rate": 3.9041164568682955e-05, + "loss": 0.4132, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36841651797294617, + "step": 710, + "valid_targets_mean": 2081.3, + "valid_targets_min": 267 + }, + { + "epoch": 1.144, + "grad_norm": 0.7628305931405186, + "learning_rate": 3.90124824049792e-05, + "loss": 0.4238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5438158512115479, + "step": 715, + "valid_targets_mean": 2172.4, + "valid_targets_min": 344 + }, + { + "epoch": 1.152, + "grad_norm": 0.5994049693791643, + "learning_rate": 3.8983388399648465e-05, + "loss": 0.4144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4170791208744049, + "step": 720, + "valid_targets_mean": 2237.7, + "valid_targets_min": 377 + }, + { + "epoch": 1.16, + "grad_norm": 0.6965418685020897, + "learning_rate": 3.895388318291474e-05, + "loss": 0.4284, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.47118836641311646, + "step": 725, + "valid_targets_mean": 2137.4, + "valid_targets_min": 296 + }, + { + "epoch": 1.168, + "grad_norm": 0.7892481191638123, + "learning_rate": 3.892396739390952e-05, + "loss": 0.4444, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4336376190185547, + "step": 730, + "valid_targets_mean": 1510.0, + "valid_targets_min": 334 + }, + { + "epoch": 1.176, + "grad_norm": 0.5947107495004924, + "learning_rate": 3.8893641680657986e-05, + "loss": 0.4488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4223913550376892, + "step": 735, + "valid_targets_mean": 2586.2, + "valid_targets_min": 389 + }, + { + "epoch": 1.184, + "grad_norm": 0.6428094395927993, + "learning_rate": 3.886290670006495e-05, + "loss": 0.4557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4669094681739807, + "step": 740, + "valid_targets_mean": 2241.1, + "valid_targets_min": 370 + }, + { + "epoch": 1.192, + "grad_norm": 0.621692085111297, + "learning_rate": 3.8831763117900605e-05, + "loss": 0.4129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43175309896469116, + "step": 745, + "valid_targets_mean": 2252.1, + "valid_targets_min": 320 + }, + { + "epoch": 1.2, + "grad_norm": 0.6051358606200022, + "learning_rate": 3.8800211608786166e-05, + "loss": 0.4323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4480053782463074, + "step": 750, + "valid_targets_mean": 2474.6, + "valid_targets_min": 312 + }, + { + "epoch": 1.208, + "grad_norm": 0.6295723423544923, + "learning_rate": 3.876825285617918e-05, + "loss": 0.4264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38263633847236633, + "step": 755, + "valid_targets_mean": 1969.8, + "valid_targets_min": 366 + }, + { + "epoch": 1.216, + "grad_norm": 0.6768797453110101, + "learning_rate": 3.873588755235876e-05, + "loss": 0.4226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4260578155517578, + "step": 760, + "valid_targets_mean": 2088.6, + "valid_targets_min": 308 + }, + { + "epoch": 1.224, + "grad_norm": 0.7075395848719739, + "learning_rate": 3.870311639841062e-05, + "loss": 0.425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41223227977752686, + "step": 765, + "valid_targets_mean": 1929.6, + "valid_targets_min": 359 + }, + { + "epoch": 1.232, + "grad_norm": 0.7367787353503337, + "learning_rate": 3.866994010421182e-05, + "loss": 0.3936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41890084743499756, + "step": 770, + "valid_targets_mean": 1651.1, + "valid_targets_min": 257 + }, + { + "epoch": 1.24, + "grad_norm": 0.8566873360763317, + "learning_rate": 3.863635938841545e-05, + "loss": 0.4329, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4514118432998657, + "step": 775, + "valid_targets_mean": 1363.1, + "valid_targets_min": 309 + }, + { + "epoch": 1.248, + "grad_norm": 0.6268973996841051, + "learning_rate": 3.8602374978435015e-05, + "loss": 0.4064, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38157105445861816, + "step": 780, + "valid_targets_mean": 1858.1, + "valid_targets_min": 253 + }, + { + "epoch": 1.256, + "grad_norm": 0.6699996219849346, + "learning_rate": 3.8567987610428705e-05, + "loss": 0.4317, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41753286123275757, + "step": 785, + "valid_targets_mean": 1925.1, + "valid_targets_min": 392 + }, + { + "epoch": 1.264, + "grad_norm": 0.5754399055059435, + "learning_rate": 3.853319802928345e-05, + "loss": 0.4159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39910656213760376, + "step": 790, + "valid_targets_mean": 2242.6, + "valid_targets_min": 305 + }, + { + "epoch": 1.272, + "grad_norm": 0.7023438254788777, + "learning_rate": 3.849800698859877e-05, + "loss": 0.4236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38505062460899353, + "step": 795, + "valid_targets_mean": 1530.5, + "valid_targets_min": 307 + }, + { + "epoch": 1.28, + "grad_norm": 0.7031660958931999, + "learning_rate": 3.846241525067047e-05, + "loss": 0.4472, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4855435788631439, + "step": 800, + "valid_targets_mean": 1791.7, + "valid_targets_min": 252 + }, + { + "epoch": 1.288, + "grad_norm": 0.6212443097042906, + "learning_rate": 3.842642358647411e-05, + "loss": 0.4305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3779318928718567, + "step": 805, + "valid_targets_mean": 1982.6, + "valid_targets_min": 336 + }, + { + "epoch": 1.296, + "grad_norm": 0.7272523602576765, + "learning_rate": 3.839003277564831e-05, + "loss": 0.4223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.46407386660575867, + "step": 810, + "valid_targets_mean": 1663.7, + "valid_targets_min": 319 + }, + { + "epoch": 1.304, + "grad_norm": 0.7348835291097465, + "learning_rate": 3.835324360647785e-05, + "loss": 0.4367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4778377413749695, + "step": 815, + "valid_targets_mean": 1524.9, + "valid_targets_min": 262 + }, + { + "epoch": 1.312, + "grad_norm": 0.7212111246272881, + "learning_rate": 3.831605687587663e-05, + "loss": 0.4178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.46456441283226013, + "step": 820, + "valid_targets_mean": 1755.3, + "valid_targets_min": 333 + }, + { + "epoch": 1.32, + "grad_norm": 0.6552147965678339, + "learning_rate": 3.827847338937037e-05, + "loss": 0.4089, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4211863577365875, + "step": 825, + "valid_targets_mean": 1804.6, + "valid_targets_min": 315 + }, + { + "epoch": 1.328, + "grad_norm": 0.5902060523272189, + "learning_rate": 3.824049396107918e-05, + "loss": 0.4286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41596031188964844, + "step": 830, + "valid_targets_mean": 2226.1, + "valid_targets_min": 455 + }, + { + "epoch": 1.336, + "grad_norm": 0.5721474738378802, + "learning_rate": 3.8202119413699914e-05, + "loss": 0.4402, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4118640422821045, + "step": 835, + "valid_targets_mean": 2640.4, + "valid_targets_min": 437 + }, + { + "epoch": 1.3439999999999999, + "grad_norm": 0.49927426807478753, + "learning_rate": 3.8163350578488366e-05, + "loss": 0.4249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3671061396598816, + "step": 840, + "valid_targets_mean": 2876.6, + "valid_targets_min": 485 + }, + { + "epoch": 1.3519999999999999, + "grad_norm": 0.5891485955551514, + "learning_rate": 3.812418829524124e-05, + "loss": 0.4233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41898277401924133, + "step": 845, + "valid_targets_mean": 2178.2, + "valid_targets_min": 380 + }, + { + "epoch": 1.3599999999999999, + "grad_norm": 0.7378900002873952, + "learning_rate": 3.8084633412277974e-05, + "loss": 0.4138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.45365267992019653, + "step": 850, + "valid_targets_mean": 1550.7, + "valid_targets_min": 315 + }, + { + "epoch": 1.3679999999999999, + "grad_norm": 0.6136454073663492, + "learning_rate": 3.804468678642238e-05, + "loss": 0.4314, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42243918776512146, + "step": 855, + "valid_targets_mean": 2308.2, + "valid_targets_min": 417 + }, + { + "epoch": 1.376, + "grad_norm": 0.5924905188357824, + "learning_rate": 3.800434928298403e-05, + "loss": 0.4396, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41932016611099243, + "step": 860, + "valid_targets_mean": 2239.1, + "valid_targets_min": 248 + }, + { + "epoch": 1.384, + "grad_norm": 0.6287576566557505, + "learning_rate": 3.796362177573957e-05, + "loss": 0.4144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3955501914024353, + "step": 865, + "valid_targets_mean": 1976.8, + "valid_targets_min": 472 + }, + { + "epoch": 1.392, + "grad_norm": 0.6939588686318201, + "learning_rate": 3.792250514691378e-05, + "loss": 0.4343, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4510957896709442, + "step": 870, + "valid_targets_mean": 1880.4, + "valid_targets_min": 415 + }, + { + "epoch": 1.4, + "grad_norm": 0.740500044228491, + "learning_rate": 3.788100028716043e-05, + "loss": 0.4197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.47122618556022644, + "step": 875, + "valid_targets_mean": 1580.5, + "valid_targets_min": 294 + }, + { + "epoch": 1.408, + "grad_norm": 0.5292231218615612, + "learning_rate": 3.7839108095543016e-05, + "loss": 0.4042, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3627355694770813, + "step": 880, + "valid_targets_mean": 3259.8, + "valid_targets_min": 335 + }, + { + "epoch": 1.416, + "grad_norm": 0.5836131862331959, + "learning_rate": 3.7796829479515295e-05, + "loss": 0.4249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3939005136489868, + "step": 885, + "valid_targets_mean": 2124.9, + "valid_targets_min": 344 + }, + { + "epoch": 1.424, + "grad_norm": 0.583752798524485, + "learning_rate": 3.775416535490159e-05, + "loss": 0.4176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4177761971950531, + "step": 890, + "valid_targets_mean": 2234.2, + "valid_targets_min": 296 + }, + { + "epoch": 1.432, + "grad_norm": 0.6347872247635064, + "learning_rate": 3.7711116645876984e-05, + "loss": 0.4324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4340249300003052, + "step": 895, + "valid_targets_mean": 2302.2, + "valid_targets_min": 336 + }, + { + "epoch": 1.44, + "grad_norm": 0.6619847945161467, + "learning_rate": 3.7667684284947286e-05, + "loss": 0.442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4371194839477539, + "step": 900, + "valid_targets_mean": 1665.0, + "valid_targets_min": 315 + }, + { + "epoch": 1.448, + "grad_norm": 0.6571662766303369, + "learning_rate": 3.762386921292885e-05, + "loss": 0.4223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40661561489105225, + "step": 905, + "valid_targets_mean": 1818.6, + "valid_targets_min": 235 + }, + { + "epoch": 1.456, + "grad_norm": 0.890222065122217, + "learning_rate": 3.757967237892818e-05, + "loss": 0.4432, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.53070467710495, + "step": 910, + "valid_targets_mean": 1289.7, + "valid_targets_min": 247 + }, + { + "epoch": 1.464, + "grad_norm": 0.6535190488820531, + "learning_rate": 3.7535094740321334e-05, + "loss": 0.413, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40728655457496643, + "step": 915, + "valid_targets_mean": 1934.4, + "valid_targets_min": 387 + }, + { + "epoch": 1.472, + "grad_norm": 0.6162293241343897, + "learning_rate": 3.749013726273328e-05, + "loss": 0.4072, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37631893157958984, + "step": 920, + "valid_targets_mean": 1952.2, + "valid_targets_min": 369 + }, + { + "epoch": 1.48, + "grad_norm": 0.7475115020828276, + "learning_rate": 3.7444800920016875e-05, + "loss": 0.4291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4568881392478943, + "step": 925, + "valid_targets_mean": 1704.0, + "valid_targets_min": 287 + }, + { + "epoch": 1.488, + "grad_norm": 0.7980089165718817, + "learning_rate": 3.7399086694231864e-05, + "loss": 0.4443, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.48696544766426086, + "step": 930, + "valid_targets_mean": 1555.2, + "valid_targets_min": 342 + }, + { + "epoch": 1.496, + "grad_norm": 0.6731060349560027, + "learning_rate": 3.735299557562352e-05, + "loss": 0.4347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41527795791625977, + "step": 935, + "valid_targets_mean": 2049.9, + "valid_targets_min": 297 + }, + { + "epoch": 1.504, + "grad_norm": 0.6256098005592527, + "learning_rate": 3.7306528562601245e-05, + "loss": 0.431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4265291690826416, + "step": 940, + "valid_targets_mean": 2755.6, + "valid_targets_min": 401 + }, + { + "epoch": 1.512, + "grad_norm": 0.5212788613253625, + "learning_rate": 3.7259686661716945e-05, + "loss": 0.418, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40835675597190857, + "step": 945, + "valid_targets_mean": 2737.9, + "valid_targets_min": 600 + }, + { + "epoch": 1.52, + "grad_norm": 0.6806519793344622, + "learning_rate": 3.7212470887643204e-05, + "loss": 0.4183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3803894519805908, + "step": 950, + "valid_targets_mean": 1731.1, + "valid_targets_min": 444 + }, + { + "epoch": 1.528, + "grad_norm": 0.6236835096285962, + "learning_rate": 3.7164882263151315e-05, + "loss": 0.4253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38910186290740967, + "step": 955, + "valid_targets_mean": 1891.6, + "valid_targets_min": 263 + }, + { + "epoch": 1.536, + "grad_norm": 0.6136149127563765, + "learning_rate": 3.711692181908913e-05, + "loss": 0.4123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4157019853591919, + "step": 960, + "valid_targets_mean": 2114.7, + "valid_targets_min": 347 + }, + { + "epoch": 1.544, + "grad_norm": 0.663735677302251, + "learning_rate": 3.706859059435871e-05, + "loss": 0.4208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4292006492614746, + "step": 965, + "valid_targets_mean": 2126.3, + "valid_targets_min": 490 + }, + { + "epoch": 1.552, + "grad_norm": 0.6782677947247513, + "learning_rate": 3.701988963589384e-05, + "loss": 0.4212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4264790117740631, + "step": 970, + "valid_targets_mean": 1727.6, + "valid_targets_min": 310 + }, + { + "epoch": 1.56, + "grad_norm": 0.5184414648572789, + "learning_rate": 3.697081999863736e-05, + "loss": 0.417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32760345935821533, + "step": 975, + "valid_targets_mean": 2424.2, + "valid_targets_min": 383 + }, + { + "epoch": 1.568, + "grad_norm": 0.7911426355404242, + "learning_rate": 3.692138274551828e-05, + "loss": 0.4363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44045162200927734, + "step": 980, + "valid_targets_mean": 1370.6, + "valid_targets_min": 334 + }, + { + "epoch": 1.576, + "grad_norm": 0.6775066725506985, + "learning_rate": 3.687157894742878e-05, + "loss": 0.4273, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43112820386886597, + "step": 985, + "valid_targets_mean": 1683.9, + "valid_targets_min": 519 + }, + { + "epoch": 1.584, + "grad_norm": 0.6861103493292751, + "learning_rate": 3.682140968320101e-05, + "loss": 0.4393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4412302076816559, + "step": 990, + "valid_targets_mean": 1869.7, + "valid_targets_min": 344 + }, + { + "epoch": 1.592, + "grad_norm": 0.5222410114841262, + "learning_rate": 3.6770876039583725e-05, + "loss": 0.4057, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3676998019218445, + "step": 995, + "valid_targets_mean": 2598.5, + "valid_targets_min": 767 + }, + { + "epoch": 1.6, + "grad_norm": 0.7263035056116521, + "learning_rate": 3.671997911121871e-05, + "loss": 0.4331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44147950410842896, + "step": 1000, + "valid_targets_mean": 1699.4, + "valid_targets_min": 251 + }, + { + "epoch": 1.608, + "grad_norm": 0.6414401377935635, + "learning_rate": 3.6668720000617126e-05, + "loss": 0.4163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40461379289627075, + "step": 1005, + "valid_targets_mean": 1852.0, + "valid_targets_min": 260 + }, + { + "epoch": 1.616, + "grad_norm": 0.665030507442711, + "learning_rate": 3.661709981813558e-05, + "loss": 0.4091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42225736379623413, + "step": 1010, + "valid_targets_mean": 1901.0, + "valid_targets_min": 347 + }, + { + "epoch": 1.624, + "grad_norm": 0.6356037504055488, + "learning_rate": 3.6565119681952086e-05, + "loss": 0.4115, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4438035488128662, + "step": 1015, + "valid_targets_mean": 2260.8, + "valid_targets_min": 377 + }, + { + "epoch": 1.6320000000000001, + "grad_norm": 0.7732442093959834, + "learning_rate": 3.651278071804186e-05, + "loss": 0.426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4469642639160156, + "step": 1020, + "valid_targets_mean": 1409.3, + "valid_targets_min": 269 + }, + { + "epoch": 1.6400000000000001, + "grad_norm": 0.5821537829613552, + "learning_rate": 3.646008406015291e-05, + "loss": 0.4076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37731000781059265, + "step": 1025, + "valid_targets_mean": 2316.8, + "valid_targets_min": 276 + }, + { + "epoch": 1.6480000000000001, + "grad_norm": 0.5801157485975053, + "learning_rate": 3.6407030849781475e-05, + "loss": 0.4079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4072588086128235, + "step": 1030, + "valid_targets_mean": 2757.8, + "valid_targets_min": 653 + }, + { + "epoch": 1.6560000000000001, + "grad_norm": 0.8050988043280486, + "learning_rate": 3.635362223614733e-05, + "loss": 0.4277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4489116966724396, + "step": 1035, + "valid_targets_mean": 2312.9, + "valid_targets_min": 367 + }, + { + "epoch": 1.6640000000000001, + "grad_norm": 0.47234546863098514, + "learning_rate": 3.629985937616884e-05, + "loss": 0.4062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34627580642700195, + "step": 1040, + "valid_targets_mean": 2884.6, + "valid_targets_min": 333 + }, + { + "epoch": 1.6720000000000002, + "grad_norm": 0.6661188572410601, + "learning_rate": 3.624574343443794e-05, + "loss": 0.4154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4187551438808441, + "step": 1045, + "valid_targets_mean": 1783.8, + "valid_targets_min": 288 + }, + { + "epoch": 1.6800000000000002, + "grad_norm": 0.5719081478717788, + "learning_rate": 3.619127558319492e-05, + "loss": 0.4129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3960324227809906, + "step": 1050, + "valid_targets_mean": 2359.3, + "valid_targets_min": 316 + }, + { + "epoch": 1.688, + "grad_norm": 0.8110764350477331, + "learning_rate": 3.613645700230298e-05, + "loss": 0.4175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4404456913471222, + "step": 1055, + "valid_targets_mean": 1428.6, + "valid_targets_min": 231 + }, + { + "epoch": 1.696, + "grad_norm": 0.622831980933497, + "learning_rate": 3.6081288879222696e-05, + "loss": 0.4075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41072455048561096, + "step": 1060, + "valid_targets_mean": 2187.9, + "valid_targets_min": 375 + }, + { + "epoch": 1.704, + "grad_norm": 0.6293939115848193, + "learning_rate": 3.602577240898633e-05, + "loss": 0.4266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40541231632232666, + "step": 1065, + "valid_targets_mean": 1947.6, + "valid_targets_min": 411 + }, + { + "epoch": 1.712, + "grad_norm": 0.6851832609298075, + "learning_rate": 3.596990879417188e-05, + "loss": 0.4275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4256119728088379, + "step": 1070, + "valid_targets_mean": 1472.7, + "valid_targets_min": 275 + }, + { + "epoch": 1.72, + "grad_norm": 0.566532739385179, + "learning_rate": 3.591369924487711e-05, + "loss": 0.4191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42117422819137573, + "step": 1075, + "valid_targets_mean": 2386.8, + "valid_targets_min": 513 + }, + { + "epoch": 1.728, + "grad_norm": 0.5939056675036943, + "learning_rate": 3.585714497869326e-05, + "loss": 0.4186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4099000096321106, + "step": 1080, + "valid_targets_mean": 2248.2, + "valid_targets_min": 474 + }, + { + "epoch": 1.736, + "grad_norm": 0.5212578820241479, + "learning_rate": 3.580024722067872e-05, + "loss": 0.4016, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36171701550483704, + "step": 1085, + "valid_targets_mean": 2644.8, + "valid_targets_min": 427 + }, + { + "epoch": 1.744, + "grad_norm": 0.609750475691826, + "learning_rate": 3.574300720333247e-05, + "loss": 0.4287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40230706334114075, + "step": 1090, + "valid_targets_mean": 2274.1, + "valid_targets_min": 352 + }, + { + "epoch": 1.752, + "grad_norm": 0.5794914229611461, + "learning_rate": 3.568542616656739e-05, + "loss": 0.4114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4043058156967163, + "step": 1095, + "valid_targets_mean": 2435.9, + "valid_targets_min": 510 + }, + { + "epoch": 1.76, + "grad_norm": 0.5754248246253073, + "learning_rate": 3.5627505357683404e-05, + "loss": 0.4288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4389524757862091, + "step": 1100, + "valid_targets_mean": 2579.7, + "valid_targets_min": 436 + }, + { + "epoch": 1.768, + "grad_norm": 0.5220103919628871, + "learning_rate": 3.5569246031340474e-05, + "loss": 0.4194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3954297602176666, + "step": 1105, + "valid_targets_mean": 3061.6, + "valid_targets_min": 951 + }, + { + "epoch": 1.776, + "grad_norm": 0.5971366510150363, + "learning_rate": 3.5510649449531375e-05, + "loss": 0.4129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41429704427719116, + "step": 1110, + "valid_targets_mean": 2268.1, + "valid_targets_min": 458 + }, + { + "epoch": 1.784, + "grad_norm": 0.6407089899675739, + "learning_rate": 3.545171688155441e-05, + "loss": 0.4055, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44793933629989624, + "step": 1115, + "valid_targets_mean": 2024.1, + "valid_targets_min": 324 + }, + { + "epoch": 1.792, + "grad_norm": 0.45308297979181544, + "learning_rate": 3.5392449603985894e-05, + "loss": 0.3998, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32858961820602417, + "step": 1120, + "valid_targets_mean": 3030.6, + "valid_targets_min": 416 + }, + { + "epoch": 1.8, + "grad_norm": 0.6548803541766852, + "learning_rate": 3.53328489006525e-05, + "loss": 0.4377, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4534260034561157, + "step": 1125, + "valid_targets_mean": 2124.0, + "valid_targets_min": 224 + }, + { + "epoch": 1.808, + "grad_norm": 0.5816411189387833, + "learning_rate": 3.527291606260345e-05, + "loss": 0.4319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4444667100906372, + "step": 1130, + "valid_targets_mean": 2557.6, + "valid_targets_min": 310 + }, + { + "epoch": 1.8159999999999998, + "grad_norm": 0.5639209652953462, + "learning_rate": 3.521265238808255e-05, + "loss": 0.4243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3954368531703949, + "step": 1135, + "valid_targets_mean": 2317.2, + "valid_targets_min": 274 + }, + { + "epoch": 1.8239999999999998, + "grad_norm": 0.6457666417385817, + "learning_rate": 3.515205918250007e-05, + "loss": 0.4265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4396483600139618, + "step": 1140, + "valid_targets_mean": 2138.4, + "valid_targets_min": 447 + }, + { + "epoch": 1.8319999999999999, + "grad_norm": 0.6196236920879702, + "learning_rate": 3.5091137758404456e-05, + "loss": 0.4249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39901548624038696, + "step": 1145, + "valid_targets_mean": 2027.0, + "valid_targets_min": 318 + }, + { + "epoch": 1.8399999999999999, + "grad_norm": 0.767365714888303, + "learning_rate": 3.5029889435453924e-05, + "loss": 0.4252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42294585704803467, + "step": 1150, + "valid_targets_mean": 1286.5, + "valid_targets_min": 382 + }, + { + "epoch": 1.8479999999999999, + "grad_norm": 0.5065314027237436, + "learning_rate": 3.496831554038784e-05, + "loss": 0.3987, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3655644357204437, + "step": 1155, + "valid_targets_mean": 2544.6, + "valid_targets_min": 269 + }, + { + "epoch": 1.8559999999999999, + "grad_norm": 0.6224593008893803, + "learning_rate": 3.490641740699801e-05, + "loss": 0.4305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43739748001098633, + "step": 1160, + "valid_targets_mean": 1975.6, + "valid_targets_min": 315 + }, + { + "epoch": 1.8639999999999999, + "grad_norm": 0.6327725562239911, + "learning_rate": 3.484419637609977e-05, + "loss": 0.3991, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.403567910194397, + "step": 1165, + "valid_targets_mean": 1797.9, + "valid_targets_min": 255 + }, + { + "epoch": 1.8719999999999999, + "grad_norm": 0.6577564173316653, + "learning_rate": 3.478165379550292e-05, + "loss": 0.3962, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37376055121421814, + "step": 1170, + "valid_targets_mean": 1691.4, + "valid_targets_min": 303 + }, + { + "epoch": 1.88, + "grad_norm": 0.5264996171345204, + "learning_rate": 3.471879101998262e-05, + "loss": 0.4263, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4288645386695862, + "step": 1175, + "valid_targets_mean": 3079.2, + "valid_targets_min": 266 + }, + { + "epoch": 1.888, + "grad_norm": 0.6169190044077482, + "learning_rate": 3.465560941124992e-05, + "loss": 0.4112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40619945526123047, + "step": 1180, + "valid_targets_mean": 2156.5, + "valid_targets_min": 393 + }, + { + "epoch": 1.896, + "grad_norm": 0.6058457801896662, + "learning_rate": 3.459211033792233e-05, + "loss": 0.4112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4081569314002991, + "step": 1185, + "valid_targets_mean": 2144.6, + "valid_targets_min": 866 + }, + { + "epoch": 1.904, + "grad_norm": 0.6526732474784974, + "learning_rate": 3.4528295175494194e-05, + "loss": 0.4076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3930222988128662, + "step": 1190, + "valid_targets_mean": 1723.6, + "valid_targets_min": 359 + }, + { + "epoch": 1.912, + "grad_norm": 0.5775550846315493, + "learning_rate": 3.4464165306306845e-05, + "loss": 0.4187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4080520570278168, + "step": 1195, + "valid_targets_mean": 2086.7, + "valid_targets_min": 340 + }, + { + "epoch": 1.92, + "grad_norm": 0.6452796330672489, + "learning_rate": 3.4399722119518675e-05, + "loss": 0.4146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4046083092689514, + "step": 1200, + "valid_targets_mean": 1806.4, + "valid_targets_min": 271 + }, + { + "epoch": 1.928, + "grad_norm": 0.5700909509273088, + "learning_rate": 3.433496701107506e-05, + "loss": 0.3901, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39472758769989014, + "step": 1205, + "valid_targets_mean": 2073.8, + "valid_targets_min": 372 + }, + { + "epoch": 1.936, + "grad_norm": 0.5840559603258615, + "learning_rate": 3.426990138367813e-05, + "loss": 0.4359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4290251135826111, + "step": 1210, + "valid_targets_mean": 2437.6, + "valid_targets_min": 270 + }, + { + "epoch": 1.944, + "grad_norm": 0.6224656544519471, + "learning_rate": 3.420452664675633e-05, + "loss": 0.4173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40224143862724304, + "step": 1215, + "valid_targets_mean": 1884.1, + "valid_targets_min": 259 + }, + { + "epoch": 1.952, + "grad_norm": 0.6047148944697572, + "learning_rate": 3.4138844216433946e-05, + "loss": 0.4111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3841584324836731, + "step": 1220, + "valid_targets_mean": 2120.4, + "valid_targets_min": 324 + }, + { + "epoch": 1.96, + "grad_norm": 0.6705707751188134, + "learning_rate": 3.407285551550041e-05, + "loss": 0.4103, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42706602811813354, + "step": 1225, + "valid_targets_mean": 1812.0, + "valid_targets_min": 396 + }, + { + "epoch": 1.968, + "grad_norm": 0.7155983068467356, + "learning_rate": 3.4006561973379466e-05, + "loss": 0.4077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41739869117736816, + "step": 1230, + "valid_targets_mean": 1580.4, + "valid_targets_min": 345 + }, + { + "epoch": 1.976, + "grad_norm": 0.6911635067643795, + "learning_rate": 3.3939965026098245e-05, + "loss": 0.4123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4420611560344696, + "step": 1235, + "valid_targets_mean": 1608.1, + "valid_targets_min": 328 + }, + { + "epoch": 1.984, + "grad_norm": 0.6116239335275719, + "learning_rate": 3.38730661162561e-05, + "loss": 0.3997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40063539147377014, + "step": 1240, + "valid_targets_mean": 1901.7, + "valid_targets_min": 284 + }, + { + "epoch": 1.992, + "grad_norm": 0.5904479518619141, + "learning_rate": 3.3805866692993414e-05, + "loss": 0.4067, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4212016761302948, + "step": 1245, + "valid_targets_mean": 2145.4, + "valid_targets_min": 295 + }, + { + "epoch": 2.0, + "grad_norm": 0.5324617772180122, + "learning_rate": 3.373836821196018e-05, + "loss": 0.418, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3954014778137207, + "step": 1250, + "valid_targets_mean": 2559.7, + "valid_targets_min": 981 + }, + { + "epoch": 2.008, + "grad_norm": 0.8042679257405158, + "learning_rate": 3.3670572135284456e-05, + "loss": 0.3902, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43946588039398193, + "step": 1255, + "valid_targets_mean": 1429.3, + "valid_targets_min": 296 + }, + { + "epoch": 2.016, + "grad_norm": 0.5825868427650474, + "learning_rate": 3.360247993154073e-05, + "loss": 0.397, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3697652220726013, + "step": 1260, + "valid_targets_mean": 2332.2, + "valid_targets_min": 336 + }, + { + "epoch": 2.024, + "grad_norm": 0.546919198592147, + "learning_rate": 3.35340930757181e-05, + "loss": 0.3864, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3581070899963379, + "step": 1265, + "valid_targets_mean": 2658.7, + "valid_targets_min": 310 + }, + { + "epoch": 2.032, + "grad_norm": 0.5869935633164326, + "learning_rate": 3.3465413049188276e-05, + "loss": 0.3792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3628545105457306, + "step": 1270, + "valid_targets_mean": 2203.2, + "valid_targets_min": 287 + }, + { + "epoch": 2.04, + "grad_norm": 0.5343589346106076, + "learning_rate": 3.3396441339673564e-05, + "loss": 0.3766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32373273372650146, + "step": 1275, + "valid_targets_mean": 2545.1, + "valid_targets_min": 308 + }, + { + "epoch": 2.048, + "grad_norm": 0.5413135343451548, + "learning_rate": 3.3327179441214574e-05, + "loss": 0.3949, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35532093048095703, + "step": 1280, + "valid_targets_mean": 2464.2, + "valid_targets_min": 288 + }, + { + "epoch": 2.056, + "grad_norm": 0.6817969700174025, + "learning_rate": 3.325762885413791e-05, + "loss": 0.3988, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.382097989320755, + "step": 1285, + "valid_targets_mean": 1719.3, + "valid_targets_min": 294 + }, + { + "epoch": 2.064, + "grad_norm": 0.5815795286182812, + "learning_rate": 3.318779108502362e-05, + "loss": 0.3733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33395498991012573, + "step": 1290, + "valid_targets_mean": 1987.3, + "valid_targets_min": 413 + }, + { + "epoch": 2.072, + "grad_norm": 0.5696718808624218, + "learning_rate": 3.3117667646672616e-05, + "loss": 0.3754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34764641523361206, + "step": 1295, + "valid_targets_mean": 2239.6, + "valid_targets_min": 295 + }, + { + "epoch": 2.08, + "grad_norm": 0.6666024146123932, + "learning_rate": 3.304726005807386e-05, + "loss": 0.3775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3691959083080292, + "step": 1300, + "valid_targets_mean": 1835.2, + "valid_targets_min": 303 + }, + { + "epoch": 2.088, + "grad_norm": 0.6251542692665185, + "learning_rate": 3.297656984437148e-05, + "loss": 0.388, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37082281708717346, + "step": 1305, + "valid_targets_mean": 1891.2, + "valid_targets_min": 369 + }, + { + "epoch": 2.096, + "grad_norm": 0.7297170840175997, + "learning_rate": 3.2905598536831715e-05, + "loss": 0.4185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.418671578168869, + "step": 1310, + "valid_targets_mean": 1513.4, + "valid_targets_min": 292 + }, + { + "epoch": 2.104, + "grad_norm": 0.5429482228005845, + "learning_rate": 3.2834347672809776e-05, + "loss": 0.3794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37912678718566895, + "step": 1315, + "valid_targets_mean": 2980.8, + "valid_targets_min": 418 + }, + { + "epoch": 2.112, + "grad_norm": 0.5033579471390823, + "learning_rate": 3.276281879571651e-05, + "loss": 0.3675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37450850009918213, + "step": 1320, + "valid_targets_mean": 3124.1, + "valid_targets_min": 457 + }, + { + "epoch": 2.12, + "grad_norm": 0.684857507960299, + "learning_rate": 3.2691013454985006e-05, + "loss": 0.3883, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40342050790786743, + "step": 1325, + "valid_targets_mean": 1658.2, + "valid_targets_min": 282 + }, + { + "epoch": 2.128, + "grad_norm": 0.6038090725419427, + "learning_rate": 3.2618933206036994e-05, + "loss": 0.394, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.416049599647522, + "step": 1330, + "valid_targets_mean": 2440.7, + "valid_targets_min": 416 + }, + { + "epoch": 2.136, + "grad_norm": 0.5460804755588241, + "learning_rate": 3.2546579610249177e-05, + "loss": 0.381, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3303455710411072, + "step": 1335, + "valid_targets_mean": 2594.1, + "valid_targets_min": 352 + }, + { + "epoch": 2.144, + "grad_norm": 0.5097873502985986, + "learning_rate": 3.2473954234919386e-05, + "loss": 0.3831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35893014073371887, + "step": 1340, + "valid_targets_mean": 2558.4, + "valid_targets_min": 320 + }, + { + "epoch": 2.152, + "grad_norm": 0.8105476214389086, + "learning_rate": 3.240105865323266e-05, + "loss": 0.3837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41480281949043274, + "step": 1345, + "valid_targets_mean": 1307.5, + "valid_targets_min": 397 + }, + { + "epoch": 2.16, + "grad_norm": 0.5800204725164848, + "learning_rate": 3.232789444422714e-05, + "loss": 0.3803, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3462577760219574, + "step": 1350, + "valid_targets_mean": 2189.2, + "valid_targets_min": 309 + }, + { + "epoch": 2.168, + "grad_norm": 0.6828615072566614, + "learning_rate": 3.225446319275988e-05, + "loss": 0.407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3787284195423126, + "step": 1355, + "valid_targets_mean": 1971.3, + "valid_targets_min": 359 + }, + { + "epoch": 2.176, + "grad_norm": 0.7218364657157212, + "learning_rate": 3.218076648947251e-05, + "loss": 0.3962, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39449766278266907, + "step": 1360, + "valid_targets_mean": 1633.2, + "valid_targets_min": 383 + }, + { + "epoch": 2.184, + "grad_norm": 0.7856076876689837, + "learning_rate": 3.2106805930756804e-05, + "loss": 0.383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40469610691070557, + "step": 1365, + "valid_targets_mean": 1401.5, + "valid_targets_min": 332 + }, + { + "epoch": 2.192, + "grad_norm": 0.6694940856712724, + "learning_rate": 3.2032583118720045e-05, + "loss": 0.4085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44130784273147583, + "step": 1370, + "valid_targets_mean": 1951.4, + "valid_targets_min": 466 + }, + { + "epoch": 2.2, + "grad_norm": 0.49962476165457587, + "learning_rate": 3.195809966115038e-05, + "loss": 0.3659, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3548528552055359, + "step": 1375, + "valid_targets_mean": 2828.1, + "valid_targets_min": 200 + }, + { + "epoch": 2.208, + "grad_norm": 0.5741497964442147, + "learning_rate": 3.188335717148195e-05, + "loss": 0.3928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3834153115749359, + "step": 1380, + "valid_targets_mean": 2393.4, + "valid_targets_min": 428 + }, + { + "epoch": 2.216, + "grad_norm": 0.5342561972025831, + "learning_rate": 3.1808357268759964e-05, + "loss": 0.3745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.350533664226532, + "step": 1385, + "valid_targets_mean": 2746.4, + "valid_targets_min": 462 + }, + { + "epoch": 2.224, + "grad_norm": 0.5514827015335317, + "learning_rate": 3.173310157760563e-05, + "loss": 0.3747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3370269536972046, + "step": 1390, + "valid_targets_mean": 2396.6, + "valid_targets_min": 260 + }, + { + "epoch": 2.232, + "grad_norm": 0.6274109214397176, + "learning_rate": 3.165759172818093e-05, + "loss": 0.3792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3951869010925293, + "step": 1395, + "valid_targets_mean": 2055.7, + "valid_targets_min": 273 + }, + { + "epoch": 2.24, + "grad_norm": 0.6014822126164667, + "learning_rate": 3.158182935615336e-05, + "loss": 0.3977, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3926844000816345, + "step": 1400, + "valid_targets_mean": 2313.9, + "valid_targets_min": 306 + }, + { + "epoch": 2.248, + "grad_norm": 0.5574708482762952, + "learning_rate": 3.150581610266046e-05, + "loss": 0.3984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3544110357761383, + "step": 1405, + "valid_targets_mean": 2392.4, + "valid_targets_min": 307 + }, + { + "epoch": 2.2560000000000002, + "grad_norm": 0.6183989505811474, + "learning_rate": 3.1429553614274256e-05, + "loss": 0.381, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37505680322647095, + "step": 1410, + "valid_targets_mean": 2065.1, + "valid_targets_min": 367 + }, + { + "epoch": 2.2640000000000002, + "grad_norm": 0.5589768205400985, + "learning_rate": 3.1353043542965636e-05, + "loss": 0.3761, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35910564661026, + "step": 1415, + "valid_targets_mean": 2398.2, + "valid_targets_min": 374 + }, + { + "epoch": 2.2720000000000002, + "grad_norm": 0.5865868580427955, + "learning_rate": 3.1276287546068536e-05, + "loss": 0.3754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3461493253707886, + "step": 1420, + "valid_targets_mean": 2379.2, + "valid_targets_min": 298 + }, + { + "epoch": 2.2800000000000002, + "grad_norm": 0.6030591709759953, + "learning_rate": 3.1199287286244047e-05, + "loss": 0.3924, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3672935366630554, + "step": 1425, + "valid_targets_mean": 2200.0, + "valid_targets_min": 306 + }, + { + "epoch": 2.288, + "grad_norm": 0.7226166193399812, + "learning_rate": 3.112204443144438e-05, + "loss": 0.4048, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4033268690109253, + "step": 1430, + "valid_targets_mean": 1554.2, + "valid_targets_min": 248 + }, + { + "epoch": 2.296, + "grad_norm": 0.6717505093881606, + "learning_rate": 3.1044560654876775e-05, + "loss": 0.4009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4155598282814026, + "step": 1435, + "valid_targets_mean": 2037.2, + "valid_targets_min": 354 + }, + { + "epoch": 2.304, + "grad_norm": 0.6812221485042874, + "learning_rate": 3.0966837634967215e-05, + "loss": 0.3894, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3771967589855194, + "step": 1440, + "valid_targets_mean": 2331.6, + "valid_targets_min": 334 + }, + { + "epoch": 2.312, + "grad_norm": 0.7530405753256025, + "learning_rate": 3.088887705532409e-05, + "loss": 0.3724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4040983319282532, + "step": 1445, + "valid_targets_mean": 1532.9, + "valid_targets_min": 326 + }, + { + "epoch": 2.32, + "grad_norm": 0.6435976046060277, + "learning_rate": 3.081068060470174e-05, + "loss": 0.3722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37423545122146606, + "step": 1450, + "valid_targets_mean": 2140.8, + "valid_targets_min": 375 + }, + { + "epoch": 2.328, + "grad_norm": 0.6798369295901472, + "learning_rate": 3.073224997696385e-05, + "loss": 0.3892, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3684957027435303, + "step": 1455, + "valid_targets_mean": 1625.9, + "valid_targets_min": 312 + }, + { + "epoch": 2.336, + "grad_norm": 0.6399613920281401, + "learning_rate": 3.065358687104675e-05, + "loss": 0.3904, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41061943769454956, + "step": 1460, + "valid_targets_mean": 2171.6, + "valid_targets_min": 298 + }, + { + "epoch": 2.344, + "grad_norm": 0.568295011340494, + "learning_rate": 3.057469299092264e-05, + "loss": 0.3697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.363444060087204, + "step": 1465, + "valid_targets_mean": 2369.4, + "valid_targets_min": 404 + }, + { + "epoch": 2.352, + "grad_norm": 0.6749189154211231, + "learning_rate": 3.0495570045562686e-05, + "loss": 0.3702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3943634033203125, + "step": 1470, + "valid_targets_mean": 1813.8, + "valid_targets_min": 330 + }, + { + "epoch": 2.36, + "grad_norm": 0.5424934156524889, + "learning_rate": 3.041621974889996e-05, + "loss": 0.3708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3570939302444458, + "step": 1475, + "valid_targets_mean": 2509.6, + "valid_targets_min": 416 + }, + { + "epoch": 2.368, + "grad_norm": 0.5855175945300712, + "learning_rate": 3.0336643819792342e-05, + "loss": 0.4063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41212403774261475, + "step": 1480, + "valid_targets_mean": 2239.9, + "valid_targets_min": 340 + }, + { + "epoch": 2.376, + "grad_norm": 0.8589550576485474, + "learning_rate": 3.0256843981985295e-05, + "loss": 0.3957, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42375245690345764, + "step": 1485, + "valid_targets_mean": 1203.8, + "valid_targets_min": 244 + }, + { + "epoch": 2.384, + "grad_norm": 0.5872173730521972, + "learning_rate": 3.0176821964074503e-05, + "loss": 0.3766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.387226939201355, + "step": 1490, + "valid_targets_mean": 2280.3, + "valid_targets_min": 295 + }, + { + "epoch": 2.392, + "grad_norm": 0.5887630792204165, + "learning_rate": 3.009657949946844e-05, + "loss": 0.3711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36949145793914795, + "step": 1495, + "valid_targets_mean": 2453.8, + "valid_targets_min": 436 + }, + { + "epoch": 2.4, + "grad_norm": 0.5888662518478893, + "learning_rate": 3.00161183263508e-05, + "loss": 0.3863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36580803990364075, + "step": 1500, + "valid_targets_mean": 2082.8, + "valid_targets_min": 375 + }, + { + "epoch": 2.408, + "grad_norm": 0.7403053263669406, + "learning_rate": 2.993544018764289e-05, + "loss": 0.4124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.405397891998291, + "step": 1505, + "valid_targets_mean": 1624.9, + "valid_targets_min": 302 + }, + { + "epoch": 2.416, + "grad_norm": 0.6912705085590389, + "learning_rate": 2.9854546830965833e-05, + "loss": 0.4003, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43761569261550903, + "step": 1510, + "valid_targets_mean": 1889.9, + "valid_targets_min": 362 + }, + { + "epoch": 2.424, + "grad_norm": 0.672027760605599, + "learning_rate": 2.9773440008602736e-05, + "loss": 0.3808, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3860786557197571, + "step": 1515, + "valid_targets_mean": 1880.1, + "valid_targets_min": 287 + }, + { + "epoch": 2.432, + "grad_norm": 0.6176696783074045, + "learning_rate": 2.96921214774607e-05, + "loss": 0.4028, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36806997656822205, + "step": 1520, + "valid_targets_mean": 2141.5, + "valid_targets_min": 355 + }, + { + "epoch": 2.44, + "grad_norm": 0.5813640366994491, + "learning_rate": 2.9610592999032815e-05, + "loss": 0.3873, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3732055425643921, + "step": 1525, + "valid_targets_mean": 2313.7, + "valid_targets_min": 314 + }, + { + "epoch": 2.448, + "grad_norm": 0.5839312210260318, + "learning_rate": 2.9528856339359973e-05, + "loss": 0.365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31384432315826416, + "step": 1530, + "valid_targets_mean": 1865.6, + "valid_targets_min": 353 + }, + { + "epoch": 2.456, + "grad_norm": 0.6155472401477654, + "learning_rate": 2.9446913268992588e-05, + "loss": 0.3956, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3974214792251587, + "step": 1535, + "valid_targets_mean": 2303.0, + "valid_targets_min": 262 + }, + { + "epoch": 2.464, + "grad_norm": 0.7703616946243632, + "learning_rate": 2.936476556295229e-05, + "loss": 0.4069, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4263591170310974, + "step": 1540, + "valid_targets_mean": 1433.4, + "valid_targets_min": 281 + }, + { + "epoch": 2.472, + "grad_norm": 0.6233807624925547, + "learning_rate": 2.928241500069346e-05, + "loss": 0.3936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41739413142204285, + "step": 1545, + "valid_targets_mean": 2367.2, + "valid_targets_min": 323 + }, + { + "epoch": 2.48, + "grad_norm": 0.6603074426362654, + "learning_rate": 2.9199863366064655e-05, + "loss": 0.3799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4242068827152252, + "step": 1550, + "valid_targets_mean": 1868.7, + "valid_targets_min": 324 + }, + { + "epoch": 2.488, + "grad_norm": 0.6887756343088828, + "learning_rate": 2.9117112447270007e-05, + "loss": 0.369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37890493869781494, + "step": 1555, + "valid_targets_mean": 1584.6, + "valid_targets_min": 272 + }, + { + "epoch": 2.496, + "grad_norm": 0.6235381189635956, + "learning_rate": 2.9034164036830462e-05, + "loss": 0.3928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3724491000175476, + "step": 1560, + "valid_targets_mean": 2009.0, + "valid_targets_min": 388 + }, + { + "epoch": 2.504, + "grad_norm": 0.6157420171365863, + "learning_rate": 2.8951019931544975e-05, + "loss": 0.3754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39165908098220825, + "step": 1565, + "valid_targets_mean": 2098.3, + "valid_targets_min": 319 + }, + { + "epoch": 2.512, + "grad_norm": 0.612679970950294, + "learning_rate": 2.8867681932451544e-05, + "loss": 0.3882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36644482612609863, + "step": 1570, + "valid_targets_mean": 1927.0, + "valid_targets_min": 281 + }, + { + "epoch": 2.52, + "grad_norm": 0.6582982972720843, + "learning_rate": 2.8784151844788267e-05, + "loss": 0.4039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4032679796218872, + "step": 1575, + "valid_targets_mean": 1865.6, + "valid_targets_min": 362 + }, + { + "epoch": 2.528, + "grad_norm": 0.633080352002716, + "learning_rate": 2.8700431477954155e-05, + "loss": 0.3807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3910493850708008, + "step": 1580, + "valid_targets_mean": 2398.1, + "valid_targets_min": 331 + }, + { + "epoch": 2.536, + "grad_norm": 0.7483573101690317, + "learning_rate": 2.8616522645470012e-05, + "loss": 0.3926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3737632632255554, + "step": 1585, + "valid_targets_mean": 1429.8, + "valid_targets_min": 222 + }, + { + "epoch": 2.544, + "grad_norm": 0.6869251081787947, + "learning_rate": 2.8532427164939086e-05, + "loss": 0.3744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40796542167663574, + "step": 1590, + "valid_targets_mean": 1823.2, + "valid_targets_min": 310 + }, + { + "epoch": 2.552, + "grad_norm": 0.7574071608548221, + "learning_rate": 2.844814685800776e-05, + "loss": 0.3992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4246245324611664, + "step": 1595, + "valid_targets_mean": 1657.1, + "valid_targets_min": 413 + }, + { + "epoch": 2.56, + "grad_norm": 0.6473114839665947, + "learning_rate": 2.8363683550326028e-05, + "loss": 0.3992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39974695444107056, + "step": 1600, + "valid_targets_mean": 2080.9, + "valid_targets_min": 416 + }, + { + "epoch": 2.568, + "grad_norm": 0.5386297063629182, + "learning_rate": 2.8279039071508024e-05, + "loss": 0.3745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37923723459243774, + "step": 1605, + "valid_targets_mean": 2865.6, + "valid_targets_min": 292 + }, + { + "epoch": 2.576, + "grad_norm": 0.5494405350429861, + "learning_rate": 2.81942152550923e-05, + "loss": 0.3814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3400484025478363, + "step": 1610, + "valid_targets_mean": 2427.5, + "valid_targets_min": 319 + }, + { + "epoch": 2.584, + "grad_norm": 0.7521047882538389, + "learning_rate": 2.810921393850219e-05, + "loss": 0.3743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42146170139312744, + "step": 1615, + "valid_targets_mean": 1535.6, + "valid_targets_min": 304 + }, + { + "epoch": 2.592, + "grad_norm": 0.7367591533237979, + "learning_rate": 2.802403696300595e-05, + "loss": 0.3977, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41838860511779785, + "step": 1620, + "valid_targets_mean": 1505.9, + "valid_targets_min": 309 + }, + { + "epoch": 2.6, + "grad_norm": 0.6133882069897932, + "learning_rate": 2.7938686173676915e-05, + "loss": 0.3734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37919414043426514, + "step": 1625, + "valid_targets_mean": 1929.9, + "valid_targets_min": 322 + }, + { + "epoch": 2.608, + "grad_norm": 0.7141355316967185, + "learning_rate": 2.7853163419353505e-05, + "loss": 0.3914, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4131407141685486, + "step": 1630, + "valid_targets_mean": 1792.1, + "valid_targets_min": 518 + }, + { + "epoch": 2.616, + "grad_norm": 0.6010769733636125, + "learning_rate": 2.776747055259918e-05, + "loss": 0.3846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36372077465057373, + "step": 1635, + "valid_targets_mean": 1809.2, + "valid_targets_min": 413 + }, + { + "epoch": 2.624, + "grad_norm": 0.6864102738551969, + "learning_rate": 2.768160942966233e-05, + "loss": 0.3801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3999949097633362, + "step": 1640, + "valid_targets_mean": 1943.8, + "valid_targets_min": 370 + }, + { + "epoch": 2.632, + "grad_norm": 0.6882655679500438, + "learning_rate": 2.759558191043603e-05, + "loss": 0.3866, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37626349925994873, + "step": 1645, + "valid_targets_mean": 1692.8, + "valid_targets_min": 308 + }, + { + "epoch": 2.64, + "grad_norm": 0.5689931159535411, + "learning_rate": 2.7509389858417783e-05, + "loss": 0.383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35382065176963806, + "step": 1650, + "valid_targets_mean": 2328.6, + "valid_targets_min": 247 + }, + { + "epoch": 2.648, + "grad_norm": 0.641141720782201, + "learning_rate": 2.7423035140669147e-05, + "loss": 0.3977, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40661799907684326, + "step": 1655, + "valid_targets_mean": 2178.4, + "valid_targets_min": 363 + }, + { + "epoch": 2.656, + "grad_norm": 0.540586590816511, + "learning_rate": 2.7336519627775288e-05, + "loss": 0.3695, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3675329089164734, + "step": 1660, + "valid_targets_mean": 2639.7, + "valid_targets_min": 363 + }, + { + "epoch": 2.664, + "grad_norm": 0.7036792247374296, + "learning_rate": 2.724984519380444e-05, + "loss": 0.4005, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40206435322761536, + "step": 1665, + "valid_targets_mean": 1689.8, + "valid_targets_min": 252 + }, + { + "epoch": 2.672, + "grad_norm": 0.7009327566269239, + "learning_rate": 2.7163013716267353e-05, + "loss": 0.4008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3990801274776459, + "step": 1670, + "valid_targets_mean": 1999.7, + "valid_targets_min": 363 + }, + { + "epoch": 2.68, + "grad_norm": 0.6572656408273875, + "learning_rate": 2.707602707607659e-05, + "loss": 0.3943, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36183205246925354, + "step": 1675, + "valid_targets_mean": 1949.6, + "valid_targets_min": 329 + }, + { + "epoch": 2.6879999999999997, + "grad_norm": 0.6903300748113985, + "learning_rate": 2.6988887157505786e-05, + "loss": 0.3853, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4237315058708191, + "step": 1680, + "valid_targets_mean": 1675.9, + "valid_targets_min": 344 + }, + { + "epoch": 2.6959999999999997, + "grad_norm": 0.6289056881874757, + "learning_rate": 2.6901595848148842e-05, + "loss": 0.3943, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3660340905189514, + "step": 1685, + "valid_targets_mean": 1781.6, + "valid_targets_min": 372 + }, + { + "epoch": 2.7039999999999997, + "grad_norm": 0.6448522226591724, + "learning_rate": 2.681415503887904e-05, + "loss": 0.3868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3989563286304474, + "step": 1690, + "valid_targets_mean": 1838.6, + "valid_targets_min": 462 + }, + { + "epoch": 2.7119999999999997, + "grad_norm": 0.5914371896855446, + "learning_rate": 2.672656662380805e-05, + "loss": 0.3882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3665776252746582, + "step": 1695, + "valid_targets_mean": 2105.9, + "valid_targets_min": 406 + }, + { + "epoch": 2.7199999999999998, + "grad_norm": 0.6439781732921271, + "learning_rate": 2.6638832500244967e-05, + "loss": 0.3809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4117533564567566, + "step": 1700, + "valid_targets_mean": 2121.2, + "valid_targets_min": 328 + }, + { + "epoch": 2.7279999999999998, + "grad_norm": 0.6413138381209691, + "learning_rate": 2.655095456865514e-05, + "loss": 0.3938, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3822566866874695, + "step": 1705, + "valid_targets_mean": 2074.2, + "valid_targets_min": 343 + }, + { + "epoch": 2.7359999999999998, + "grad_norm": 0.5879107484234825, + "learning_rate": 2.6462934732619047e-05, + "loss": 0.3824, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3646334409713745, + "step": 1710, + "valid_targets_mean": 2142.2, + "valid_targets_min": 356 + }, + { + "epoch": 2.7439999999999998, + "grad_norm": 0.6103736396812272, + "learning_rate": 2.6374774898791047e-05, + "loss": 0.3786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42227813601493835, + "step": 1715, + "valid_targets_mean": 2381.5, + "valid_targets_min": 265 + }, + { + "epoch": 2.752, + "grad_norm": 0.7336758163732975, + "learning_rate": 2.6286476976858084e-05, + "loss": 0.3726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3858756721019745, + "step": 1720, + "valid_targets_mean": 1364.0, + "valid_targets_min": 299 + }, + { + "epoch": 2.76, + "grad_norm": 0.5804836400275201, + "learning_rate": 2.619804287949831e-05, + "loss": 0.4018, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40446221828460693, + "step": 1725, + "valid_targets_mean": 2540.4, + "valid_targets_min": 333 + }, + { + "epoch": 2.768, + "grad_norm": 0.6545153531379371, + "learning_rate": 2.6109474522339676e-05, + "loss": 0.3947, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35908615589141846, + "step": 1730, + "valid_targets_mean": 1981.3, + "valid_targets_min": 343 + }, + { + "epoch": 2.776, + "grad_norm": 0.5989728174431338, + "learning_rate": 2.6020773823918414e-05, + "loss": 0.3908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3607073426246643, + "step": 1735, + "valid_targets_mean": 2075.1, + "valid_targets_min": 259 + }, + { + "epoch": 2.784, + "grad_norm": 0.6548819062478236, + "learning_rate": 2.5931942705637473e-05, + "loss": 0.3821, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3777836561203003, + "step": 1740, + "valid_targets_mean": 1940.1, + "valid_targets_min": 231 + }, + { + "epoch": 2.792, + "grad_norm": 0.6167175937189906, + "learning_rate": 2.5842983091724923e-05, + "loss": 0.3935, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3925069272518158, + "step": 1745, + "valid_targets_mean": 2126.1, + "valid_targets_min": 302 + }, + { + "epoch": 2.8, + "grad_norm": 0.6019344696205082, + "learning_rate": 2.575389690919226e-05, + "loss": 0.3748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.349267840385437, + "step": 1750, + "valid_targets_mean": 1979.9, + "valid_targets_min": 342 + }, + { + "epoch": 2.808, + "grad_norm": 0.7070935809183736, + "learning_rate": 2.5664686087792658e-05, + "loss": 0.3966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4027559757232666, + "step": 1755, + "valid_targets_mean": 1645.1, + "valid_targets_min": 367 + }, + { + "epoch": 2.816, + "grad_norm": 0.5501386342474102, + "learning_rate": 2.5575352559979188e-05, + "loss": 0.3774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37497395277023315, + "step": 1760, + "valid_targets_mean": 2372.5, + "valid_targets_min": 399 + }, + { + "epoch": 2.824, + "grad_norm": 0.5850935407134304, + "learning_rate": 2.5485898260862936e-05, + "loss": 0.3917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3888002038002014, + "step": 1765, + "valid_targets_mean": 2355.9, + "valid_targets_min": 566 + }, + { + "epoch": 2.832, + "grad_norm": 0.5898705932054655, + "learning_rate": 2.5396325128171072e-05, + "loss": 0.3875, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3861275017261505, + "step": 1770, + "valid_targets_mean": 2501.8, + "valid_targets_min": 359 + }, + { + "epoch": 2.84, + "grad_norm": 0.6060182566256725, + "learning_rate": 2.5306635102204942e-05, + "loss": 0.4214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3947630524635315, + "step": 1775, + "valid_targets_mean": 2317.6, + "valid_targets_min": 275 + }, + { + "epoch": 2.848, + "grad_norm": 0.6343847329848039, + "learning_rate": 2.5216830125797943e-05, + "loss": 0.3941, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38855668902397156, + "step": 1780, + "valid_targets_mean": 1968.6, + "valid_targets_min": 314 + }, + { + "epoch": 2.856, + "grad_norm": 0.6683194188574011, + "learning_rate": 2.5126912144273517e-05, + "loss": 0.4111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41185641288757324, + "step": 1785, + "valid_targets_mean": 2012.8, + "valid_targets_min": 254 + }, + { + "epoch": 2.864, + "grad_norm": 0.7043340763689518, + "learning_rate": 2.5036883105402985e-05, + "loss": 0.398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3972740173339844, + "step": 1790, + "valid_targets_mean": 1628.4, + "valid_targets_min": 280 + }, + { + "epoch": 2.872, + "grad_norm": 0.76241175702457, + "learning_rate": 2.4946744959363343e-05, + "loss": 0.378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3741379976272583, + "step": 1795, + "valid_targets_mean": 1619.9, + "valid_targets_min": 297 + }, + { + "epoch": 2.88, + "grad_norm": 0.5384749734802096, + "learning_rate": 2.4856499658695018e-05, + "loss": 0.3726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36481788754463196, + "step": 1800, + "valid_targets_mean": 2501.4, + "valid_targets_min": 437 + }, + { + "epoch": 2.888, + "grad_norm": 0.693586139949456, + "learning_rate": 2.4766149158259603e-05, + "loss": 0.3892, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39813292026519775, + "step": 1805, + "valid_targets_mean": 1850.1, + "valid_targets_min": 237 + }, + { + "epoch": 2.896, + "grad_norm": 0.5409786475794984, + "learning_rate": 2.4675695415197476e-05, + "loss": 0.3644, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33471423387527466, + "step": 1810, + "valid_targets_mean": 2575.4, + "valid_targets_min": 469 + }, + { + "epoch": 2.904, + "grad_norm": 0.6356472255175909, + "learning_rate": 2.458514038888543e-05, + "loss": 0.3825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3808741569519043, + "step": 1815, + "valid_targets_mean": 1939.9, + "valid_targets_min": 386 + }, + { + "epoch": 2.912, + "grad_norm": 0.5454526005837897, + "learning_rate": 2.4494486040894208e-05, + "loss": 0.3637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35985440015792847, + "step": 1820, + "valid_targets_mean": 2616.5, + "valid_targets_min": 234 + }, + { + "epoch": 2.92, + "grad_norm": 0.6948250620664468, + "learning_rate": 2.440373433494603e-05, + "loss": 0.3955, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38276222348213196, + "step": 1825, + "valid_targets_mean": 1595.8, + "valid_targets_min": 354 + }, + { + "epoch": 2.928, + "grad_norm": 0.7665181846844737, + "learning_rate": 2.4312887236872066e-05, + "loss": 0.3909, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40464550256729126, + "step": 1830, + "valid_targets_mean": 1438.6, + "valid_targets_min": 574 + }, + { + "epoch": 2.936, + "grad_norm": 0.631549384178386, + "learning_rate": 2.4221946714569803e-05, + "loss": 0.3911, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4105145335197449, + "step": 1835, + "valid_targets_mean": 2105.4, + "valid_targets_min": 388 + }, + { + "epoch": 2.944, + "grad_norm": 0.6268974428016761, + "learning_rate": 2.4130914737960472e-05, + "loss": 0.4035, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37901395559310913, + "step": 1840, + "valid_targets_mean": 2055.2, + "valid_targets_min": 399 + }, + { + "epoch": 2.952, + "grad_norm": 0.7217347152331498, + "learning_rate": 2.4039793278946358e-05, + "loss": 0.3734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38400089740753174, + "step": 1845, + "valid_targets_mean": 1730.2, + "valid_targets_min": 270 + }, + { + "epoch": 2.96, + "grad_norm": 0.7522530270283125, + "learning_rate": 2.394858431136806e-05, + "loss": 0.3971, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43783271312713623, + "step": 1850, + "valid_targets_mean": 1617.4, + "valid_targets_min": 339 + }, + { + "epoch": 2.968, + "grad_norm": 0.7579554160801213, + "learning_rate": 2.385728981096178e-05, + "loss": 0.3865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4064823389053345, + "step": 1855, + "valid_targets_mean": 1314.1, + "valid_targets_min": 327 + }, + { + "epoch": 2.976, + "grad_norm": 0.6285737467792446, + "learning_rate": 2.3765911755316503e-05, + "loss": 0.3596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4188977777957916, + "step": 1860, + "valid_targets_mean": 2219.6, + "valid_targets_min": 340 + }, + { + "epoch": 2.984, + "grad_norm": 0.6156543382548351, + "learning_rate": 2.3674452123831125e-05, + "loss": 0.3703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3826698064804077, + "step": 1865, + "valid_targets_mean": 2188.8, + "valid_targets_min": 372 + }, + { + "epoch": 2.992, + "grad_norm": 0.6093125546240513, + "learning_rate": 2.358291289767165e-05, + "loss": 0.4017, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39687103033065796, + "step": 1870, + "valid_targets_mean": 2002.8, + "valid_targets_min": 298 + }, + { + "epoch": 3.0, + "grad_norm": 0.550468552788306, + "learning_rate": 2.3491296059728202e-05, + "loss": 0.3866, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38205671310424805, + "step": 1875, + "valid_targets_mean": 3186.2, + "valid_targets_min": 248 + }, + { + "epoch": 3.008, + "grad_norm": 0.5782335273031184, + "learning_rate": 2.339960359457212e-05, + "loss": 0.362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3214508891105652, + "step": 1880, + "valid_targets_mean": 2028.1, + "valid_targets_min": 534 + }, + { + "epoch": 3.016, + "grad_norm": 0.6851073073819501, + "learning_rate": 2.3307837488412955e-05, + "loss": 0.3414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3262726068496704, + "step": 1885, + "valid_targets_mean": 1637.2, + "valid_targets_min": 294 + }, + { + "epoch": 3.024, + "grad_norm": 0.6957033026128393, + "learning_rate": 2.3215999729055437e-05, + "loss": 0.3518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40083542466163635, + "step": 1890, + "valid_targets_mean": 1853.8, + "valid_targets_min": 364 + }, + { + "epoch": 3.032, + "grad_norm": 0.6918339266575398, + "learning_rate": 2.312409230585641e-05, + "loss": 0.354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34504324197769165, + "step": 1895, + "valid_targets_mean": 1833.5, + "valid_targets_min": 278 + }, + { + "epoch": 3.04, + "grad_norm": 0.6685994313776155, + "learning_rate": 2.3032117209681782e-05, + "loss": 0.3694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3745924234390259, + "step": 1900, + "valid_targets_mean": 1937.8, + "valid_targets_min": 344 + }, + { + "epoch": 3.048, + "grad_norm": 0.6290078653825849, + "learning_rate": 2.2940076432863335e-05, + "loss": 0.3576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35292524099349976, + "step": 1905, + "valid_targets_mean": 2088.0, + "valid_targets_min": 295 + }, + { + "epoch": 3.056, + "grad_norm": 0.6593208149721491, + "learning_rate": 2.2847971969155626e-05, + "loss": 0.3807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3467387557029724, + "step": 1910, + "valid_targets_mean": 1779.2, + "valid_targets_min": 330 + }, + { + "epoch": 3.064, + "grad_norm": 0.6288234163694197, + "learning_rate": 2.275580581369276e-05, + "loss": 0.3534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31876906752586365, + "step": 1915, + "valid_targets_mean": 1940.2, + "valid_targets_min": 261 + }, + { + "epoch": 3.072, + "grad_norm": 0.6645318426544585, + "learning_rate": 2.2663579962945205e-05, + "loss": 0.3557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33185040950775146, + "step": 1920, + "valid_targets_mean": 1885.4, + "valid_targets_min": 450 + }, + { + "epoch": 3.08, + "grad_norm": 0.6646159186220106, + "learning_rate": 2.2571296414676503e-05, + "loss": 0.344, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3251417875289917, + "step": 1925, + "valid_targets_mean": 1784.0, + "valid_targets_min": 281 + }, + { + "epoch": 3.088, + "grad_norm": 0.6035987948135627, + "learning_rate": 2.2478957167900038e-05, + "loss": 0.3738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34511998295783997, + "step": 1930, + "valid_targets_mean": 2332.1, + "valid_targets_min": 447 + }, + { + "epoch": 3.096, + "grad_norm": 0.6459268888860388, + "learning_rate": 2.23865642228357e-05, + "loss": 0.3558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34273070096969604, + "step": 1935, + "valid_targets_mean": 2084.6, + "valid_targets_min": 370 + }, + { + "epoch": 3.104, + "grad_norm": 0.6519205904366294, + "learning_rate": 2.2294119580866592e-05, + "loss": 0.3705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3446979522705078, + "step": 1940, + "valid_targets_mean": 2024.8, + "valid_targets_min": 515 + }, + { + "epoch": 3.112, + "grad_norm": 0.5863039333195743, + "learning_rate": 2.2201625244495646e-05, + "loss": 0.3582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31188952922821045, + "step": 1945, + "valid_targets_mean": 2198.8, + "valid_targets_min": 387 + }, + { + "epoch": 3.12, + "grad_norm": 0.8138604431085514, + "learning_rate": 2.2109083217302242e-05, + "loss": 0.3732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41089117527008057, + "step": 1950, + "valid_targets_mean": 1379.2, + "valid_targets_min": 314 + }, + { + "epoch": 3.128, + "grad_norm": 0.6223336814826458, + "learning_rate": 2.201649550389885e-05, + "loss": 0.348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35420143604278564, + "step": 1955, + "valid_targets_mean": 2208.2, + "valid_targets_min": 311 + }, + { + "epoch": 3.136, + "grad_norm": 0.5625729693816748, + "learning_rate": 2.1923864109887556e-05, + "loss": 0.355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.350006103515625, + "step": 1960, + "valid_targets_mean": 2587.9, + "valid_targets_min": 308 + }, + { + "epoch": 3.144, + "grad_norm": 0.5915272696073566, + "learning_rate": 2.1831191041816652e-05, + "loss": 0.3692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37210381031036377, + "step": 1965, + "valid_targets_mean": 2568.3, + "valid_targets_min": 504 + }, + { + "epoch": 3.152, + "grad_norm": 0.6491814772528394, + "learning_rate": 2.173847830713715e-05, + "loss": 0.3676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3734401762485504, + "step": 1970, + "valid_targets_mean": 2128.2, + "valid_targets_min": 323 + }, + { + "epoch": 3.16, + "grad_norm": 0.606126090375627, + "learning_rate": 2.1645727914159315e-05, + "loss": 0.3507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33269253373146057, + "step": 1975, + "valid_targets_mean": 2215.9, + "valid_targets_min": 397 + }, + { + "epoch": 3.168, + "grad_norm": 0.6028176800089939, + "learning_rate": 2.1552941872009144e-05, + "loss": 0.3617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36134254932403564, + "step": 1980, + "valid_targets_mean": 2474.1, + "valid_targets_min": 397 + }, + { + "epoch": 3.176, + "grad_norm": 0.6929456925738174, + "learning_rate": 2.1460122190584868e-05, + "loss": 0.3827, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3592289090156555, + "step": 1985, + "valid_targets_mean": 2259.2, + "valid_targets_min": 336 + }, + { + "epoch": 3.184, + "grad_norm": 0.7419254924501354, + "learning_rate": 2.1367270880513377e-05, + "loss": 0.3801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3836570382118225, + "step": 1990, + "valid_targets_mean": 1838.1, + "valid_targets_min": 288 + }, + { + "epoch": 3.192, + "grad_norm": 0.6716942733861659, + "learning_rate": 2.127438995310671e-05, + "loss": 0.3558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3701018989086151, + "step": 1995, + "valid_targets_mean": 2167.3, + "valid_targets_min": 420 + }, + { + "epoch": 3.2, + "grad_norm": 0.6623749519667427, + "learning_rate": 2.118148142031846e-05, + "loss": 0.3575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3783347010612488, + "step": 2000, + "valid_targets_mean": 1932.2, + "valid_targets_min": 504 + }, + { + "epoch": 3.208, + "grad_norm": 0.6711094923676311, + "learning_rate": 2.1088547294700182e-05, + "loss": 0.3556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38326647877693176, + "step": 2005, + "valid_targets_mean": 2020.8, + "valid_targets_min": 719 + }, + { + "epoch": 3.216, + "grad_norm": 0.7373832742848137, + "learning_rate": 2.0995589589357846e-05, + "loss": 0.3722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4027169346809387, + "step": 2010, + "valid_targets_mean": 1860.1, + "valid_targets_min": 511 + }, + { + "epoch": 3.224, + "grad_norm": 0.6609924694795588, + "learning_rate": 2.0902610317908175e-05, + "loss": 0.3702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37891778349876404, + "step": 2015, + "valid_targets_mean": 1891.6, + "valid_targets_min": 281 + }, + { + "epoch": 3.232, + "grad_norm": 0.6116427719169357, + "learning_rate": 2.080961149443505e-05, + "loss": 0.343, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35430964827537537, + "step": 2020, + "valid_targets_mean": 2165.3, + "valid_targets_min": 234 + }, + { + "epoch": 3.24, + "grad_norm": 0.7271187064226262, + "learning_rate": 2.071659513344589e-05, + "loss": 0.3616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37511664628982544, + "step": 2025, + "valid_targets_mean": 1475.1, + "valid_targets_min": 325 + }, + { + "epoch": 3.248, + "grad_norm": 0.5178449671491031, + "learning_rate": 2.0623563249828e-05, + "loss": 0.33, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3304227590560913, + "step": 2030, + "valid_targets_mean": 2904.6, + "valid_targets_min": 454 + }, + { + "epoch": 3.2560000000000002, + "grad_norm": 0.6009441585408077, + "learning_rate": 2.053051785880492e-05, + "loss": 0.3669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.349854052066803, + "step": 2035, + "valid_targets_mean": 2303.9, + "valid_targets_min": 353 + }, + { + "epoch": 3.2640000000000002, + "grad_norm": 0.6024939205010066, + "learning_rate": 2.0437460975892814e-05, + "loss": 0.3519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30770695209503174, + "step": 2040, + "valid_targets_mean": 1997.6, + "valid_targets_min": 290 + }, + { + "epoch": 3.2720000000000002, + "grad_norm": 0.6567641209970828, + "learning_rate": 2.0344394616856736e-05, + "loss": 0.3643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3653138279914856, + "step": 2045, + "valid_targets_mean": 2023.9, + "valid_targets_min": 262 + }, + { + "epoch": 3.2800000000000002, + "grad_norm": 0.5685017305666865, + "learning_rate": 2.0251320797667056e-05, + "loss": 0.3751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36403796076774597, + "step": 2050, + "valid_targets_mean": 2934.8, + "valid_targets_min": 630 + }, + { + "epoch": 3.288, + "grad_norm": 0.6636164656698944, + "learning_rate": 2.01582415344557e-05, + "loss": 0.3478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3850035071372986, + "step": 2055, + "valid_targets_mean": 2090.8, + "valid_targets_min": 315 + }, + { + "epoch": 3.296, + "grad_norm": 0.677882323785035, + "learning_rate": 2.006515884347255e-05, + "loss": 0.3677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34536224603652954, + "step": 2060, + "valid_targets_mean": 1828.5, + "valid_targets_min": 406 + }, + { + "epoch": 3.304, + "grad_norm": 0.6253603205859511, + "learning_rate": 1.9972074741041712e-05, + "loss": 0.3544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3635639548301697, + "step": 2065, + "valid_targets_mean": 2177.9, + "valid_targets_min": 437 + }, + { + "epoch": 3.312, + "grad_norm": 0.7571788308639501, + "learning_rate": 1.9878991243517913e-05, + "loss": 0.382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37547576427459717, + "step": 2070, + "valid_targets_mean": 1870.7, + "valid_targets_min": 488 + }, + { + "epoch": 3.32, + "grad_norm": 0.6162909260403991, + "learning_rate": 1.9785910367242712e-05, + "loss": 0.357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34492647647857666, + "step": 2075, + "valid_targets_mean": 2629.4, + "valid_targets_min": 336 + }, + { + "epoch": 3.328, + "grad_norm": 0.6459292713099473, + "learning_rate": 1.969283412850094e-05, + "loss": 0.3666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37682420015335083, + "step": 2080, + "valid_targets_mean": 2129.2, + "valid_targets_min": 270 + }, + { + "epoch": 3.336, + "grad_norm": 0.6185038841693136, + "learning_rate": 1.959976454347696e-05, + "loss": 0.3394, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31735754013061523, + "step": 2085, + "valid_targets_mean": 2153.9, + "valid_targets_min": 356 + }, + { + "epoch": 3.344, + "grad_norm": 0.669817767991775, + "learning_rate": 1.950670362821098e-05, + "loss": 0.3453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35490524768829346, + "step": 2090, + "valid_targets_mean": 2075.0, + "valid_targets_min": 291 + }, + { + "epoch": 3.352, + "grad_norm": 0.6336596094505553, + "learning_rate": 1.9413653398555437e-05, + "loss": 0.3756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34789595007896423, + "step": 2095, + "valid_targets_mean": 1968.0, + "valid_targets_min": 377 + }, + { + "epoch": 3.36, + "grad_norm": 0.8081160567298646, + "learning_rate": 1.9320615870131282e-05, + "loss": 0.3602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38415002822875977, + "step": 2100, + "valid_targets_mean": 1982.8, + "valid_targets_min": 324 + }, + { + "epoch": 3.368, + "grad_norm": 0.6962657438034732, + "learning_rate": 1.9227593058284343e-05, + "loss": 0.3674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3305050730705261, + "step": 2105, + "valid_targets_mean": 1824.0, + "valid_targets_min": 381 + }, + { + "epoch": 3.376, + "grad_norm": 0.740689669376568, + "learning_rate": 1.9134586978041663e-05, + "loss": 0.3718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3729479908943176, + "step": 2110, + "valid_targets_mean": 1616.3, + "valid_targets_min": 320 + }, + { + "epoch": 3.384, + "grad_norm": 0.6817372764014189, + "learning_rate": 1.9041599644067846e-05, + "loss": 0.382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.361863374710083, + "step": 2115, + "valid_targets_mean": 1781.7, + "valid_targets_min": 288 + }, + { + "epoch": 3.392, + "grad_norm": 0.6710191633507304, + "learning_rate": 1.8948633070621433e-05, + "loss": 0.3467, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35921335220336914, + "step": 2120, + "valid_targets_mean": 2027.4, + "valid_targets_min": 426 + }, + { + "epoch": 3.4, + "grad_norm": 0.62926154035665, + "learning_rate": 1.885568927151124e-05, + "loss": 0.3701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3318699598312378, + "step": 2125, + "valid_targets_mean": 2056.8, + "valid_targets_min": 486 + }, + { + "epoch": 3.408, + "grad_norm": 0.630812301281981, + "learning_rate": 1.8762770260052773e-05, + "loss": 0.3533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37095907330513, + "step": 2130, + "valid_targets_mean": 2337.6, + "valid_targets_min": 400 + }, + { + "epoch": 3.416, + "grad_norm": 0.5628803802505868, + "learning_rate": 1.8669878049024575e-05, + "loss": 0.3461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3343871831893921, + "step": 2135, + "valid_targets_mean": 2433.6, + "valid_targets_min": 328 + }, + { + "epoch": 3.424, + "grad_norm": 0.6587234164207159, + "learning_rate": 1.857701465062467e-05, + "loss": 0.3671, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34795311093330383, + "step": 2140, + "valid_targets_mean": 2278.2, + "valid_targets_min": 412 + }, + { + "epoch": 3.432, + "grad_norm": 0.6141886366824748, + "learning_rate": 1.848418207642693e-05, + "loss": 0.3641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.340614914894104, + "step": 2145, + "valid_targets_mean": 2468.8, + "valid_targets_min": 363 + }, + { + "epoch": 3.44, + "grad_norm": 0.7608947546773019, + "learning_rate": 1.8391382337337548e-05, + "loss": 0.3691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.387977659702301, + "step": 2150, + "valid_targets_mean": 1536.1, + "valid_targets_min": 287 + }, + { + "epoch": 3.448, + "grad_norm": 0.6952199985932443, + "learning_rate": 1.829861744355144e-05, + "loss": 0.3604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3693057894706726, + "step": 2155, + "valid_targets_mean": 2045.2, + "valid_targets_min": 302 + }, + { + "epoch": 3.456, + "grad_norm": 0.7439099787511875, + "learning_rate": 1.820588940450872e-05, + "loss": 0.3527, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3845769166946411, + "step": 2160, + "valid_targets_mean": 1662.5, + "valid_targets_min": 280 + }, + { + "epoch": 3.464, + "grad_norm": 0.634402246022689, + "learning_rate": 1.8113200228851163e-05, + "loss": 0.3391, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37684130668640137, + "step": 2165, + "valid_targets_mean": 2222.1, + "valid_targets_min": 299 + }, + { + "epoch": 3.472, + "grad_norm": 0.6894757109083375, + "learning_rate": 1.80205519243787e-05, + "loss": 0.3349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3498302698135376, + "step": 2170, + "valid_targets_mean": 1682.1, + "valid_targets_min": 272 + }, + { + "epoch": 3.48, + "grad_norm": 0.6476961900701566, + "learning_rate": 1.7927946498005934e-05, + "loss": 0.3465, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3206251859664917, + "step": 2175, + "valid_targets_mean": 2919.4, + "valid_targets_min": 522 + }, + { + "epoch": 3.488, + "grad_norm": 0.6753631402801488, + "learning_rate": 1.7835385955718653e-05, + "loss": 0.3525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3390992283821106, + "step": 2180, + "valid_targets_mean": 1935.4, + "valid_targets_min": 317 + }, + { + "epoch": 3.496, + "grad_norm": 0.6798516632249726, + "learning_rate": 1.7742872302530366e-05, + "loss": 0.3783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38496145606040955, + "step": 2185, + "valid_targets_mean": 2096.4, + "valid_targets_min": 253 + }, + { + "epoch": 3.504, + "grad_norm": 0.6933996256183506, + "learning_rate": 1.765040754243892e-05, + "loss": 0.3676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3730943202972412, + "step": 2190, + "valid_targets_mean": 1851.4, + "valid_targets_min": 357 + }, + { + "epoch": 3.512, + "grad_norm": 0.7110969950647408, + "learning_rate": 1.755799367838302e-05, + "loss": 0.3617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38612857460975647, + "step": 2195, + "valid_targets_mean": 2064.2, + "valid_targets_min": 351 + }, + { + "epoch": 3.52, + "grad_norm": 0.6596801867230143, + "learning_rate": 1.746563271219891e-05, + "loss": 0.3659, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3577587902545929, + "step": 2200, + "valid_targets_mean": 2278.2, + "valid_targets_min": 511 + }, + { + "epoch": 3.528, + "grad_norm": 0.671825575187384, + "learning_rate": 1.7373326644576965e-05, + "loss": 0.3708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3616862893104553, + "step": 2205, + "valid_targets_mean": 2457.8, + "valid_targets_min": 276 + }, + { + "epoch": 3.536, + "grad_norm": 0.6994958591376166, + "learning_rate": 1.728107747501836e-05, + "loss": 0.358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36732035875320435, + "step": 2210, + "valid_targets_mean": 1954.8, + "valid_targets_min": 369 + }, + { + "epoch": 3.544, + "grad_norm": 0.6016709776371766, + "learning_rate": 1.7188887201791785e-05, + "loss": 0.3413, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3304412364959717, + "step": 2215, + "valid_targets_mean": 2284.2, + "valid_targets_min": 314 + }, + { + "epoch": 3.552, + "grad_norm": 0.6746192722394049, + "learning_rate": 1.7096757821890117e-05, + "loss": 0.354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3492451310157776, + "step": 2220, + "valid_targets_mean": 1794.3, + "valid_targets_min": 299 + }, + { + "epoch": 3.56, + "grad_norm": 0.5979672015191129, + "learning_rate": 1.7004691330987196e-05, + "loss": 0.3785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3538179099559784, + "step": 2225, + "valid_targets_mean": 2432.1, + "valid_targets_min": 491 + }, + { + "epoch": 3.568, + "grad_norm": 0.7459353388391813, + "learning_rate": 1.691268972339458e-05, + "loss": 0.3714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3988783657550812, + "step": 2230, + "valid_targets_mean": 1644.5, + "valid_targets_min": 237 + }, + { + "epoch": 3.576, + "grad_norm": 0.752136783472821, + "learning_rate": 1.6820754992018344e-05, + "loss": 0.3672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39165884256362915, + "step": 2235, + "valid_targets_mean": 1668.0, + "valid_targets_min": 463 + }, + { + "epoch": 3.584, + "grad_norm": 0.5888303253878224, + "learning_rate": 1.6728889128315932e-05, + "loss": 0.3579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34385019540786743, + "step": 2240, + "valid_targets_mean": 2623.6, + "valid_targets_min": 446 + }, + { + "epoch": 3.592, + "grad_norm": 0.6520688037523493, + "learning_rate": 1.663709412225297e-05, + "loss": 0.3826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3680360019207001, + "step": 2245, + "valid_targets_mean": 2156.8, + "valid_targets_min": 322 + }, + { + "epoch": 3.6, + "grad_norm": 0.7039772129062588, + "learning_rate": 1.654537196226022e-05, + "loss": 0.3678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35406723618507385, + "step": 2250, + "valid_targets_mean": 1685.8, + "valid_targets_min": 362 + }, + { + "epoch": 3.608, + "grad_norm": 0.6987712882835031, + "learning_rate": 1.6453724635190455e-05, + "loss": 0.3615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37787187099456787, + "step": 2255, + "valid_targets_mean": 1718.6, + "valid_targets_min": 338 + }, + { + "epoch": 3.616, + "grad_norm": 0.7053910956653641, + "learning_rate": 1.6362154126275467e-05, + "loss": 0.361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3942461907863617, + "step": 2260, + "valid_targets_mean": 1716.6, + "valid_targets_min": 395 + }, + { + "epoch": 3.624, + "grad_norm": 0.687209335721668, + "learning_rate": 1.6270662419083018e-05, + "loss": 0.3654, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3939089775085449, + "step": 2265, + "valid_targets_mean": 2159.6, + "valid_targets_min": 588 + }, + { + "epoch": 3.632, + "grad_norm": 0.6484010824748857, + "learning_rate": 1.617925149547391e-05, + "loss": 0.3745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3499888777732849, + "step": 2270, + "valid_targets_mean": 2079.5, + "valid_targets_min": 441 + }, + { + "epoch": 3.64, + "grad_norm": 0.6185239225910694, + "learning_rate": 1.608792333555904e-05, + "loss": 0.3449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2988038659095764, + "step": 2275, + "valid_targets_mean": 1887.8, + "valid_targets_min": 315 + }, + { + "epoch": 3.648, + "grad_norm": 0.6622725425024248, + "learning_rate": 1.5996679917656492e-05, + "loss": 0.3584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3635316491127014, + "step": 2280, + "valid_targets_mean": 2113.4, + "valid_targets_min": 250 + }, + { + "epoch": 3.656, + "grad_norm": 0.6461440368572684, + "learning_rate": 1.5905523218248723e-05, + "loss": 0.3349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3423093259334564, + "step": 2285, + "valid_targets_mean": 1987.4, + "valid_targets_min": 393 + }, + { + "epoch": 3.664, + "grad_norm": 0.7423053801732263, + "learning_rate": 1.5814455211939698e-05, + "loss": 0.3565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3898213505744934, + "step": 2290, + "valid_targets_mean": 1741.2, + "valid_targets_min": 255 + }, + { + "epoch": 3.672, + "grad_norm": 0.6683929551171593, + "learning_rate": 1.5723477871412168e-05, + "loss": 0.3453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36176642775535583, + "step": 2295, + "valid_targets_mean": 1948.9, + "valid_targets_min": 279 + }, + { + "epoch": 3.68, + "grad_norm": 0.6585838124328314, + "learning_rate": 1.56325931673849e-05, + "loss": 0.3514, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33605048060417175, + "step": 2300, + "valid_targets_mean": 1879.3, + "valid_targets_min": 390 + }, + { + "epoch": 3.6879999999999997, + "grad_norm": 0.8149382661912283, + "learning_rate": 1.5541803068569993e-05, + "loss": 0.3696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35881364345550537, + "step": 2305, + "valid_targets_mean": 1977.1, + "valid_targets_min": 311 + }, + { + "epoch": 3.6959999999999997, + "grad_norm": 0.6351075990623459, + "learning_rate": 1.5451109541630275e-05, + "loss": 0.352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31659263372421265, + "step": 2310, + "valid_targets_mean": 2268.1, + "valid_targets_min": 416 + }, + { + "epoch": 3.7039999999999997, + "grad_norm": 0.5828673063408715, + "learning_rate": 1.536051455113663e-05, + "loss": 0.3394, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3484576940536499, + "step": 2315, + "valid_targets_mean": 2422.6, + "valid_targets_min": 416 + }, + { + "epoch": 3.7119999999999997, + "grad_norm": 0.5200997495321733, + "learning_rate": 1.527002005952551e-05, + "loss": 0.3523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3167843520641327, + "step": 2320, + "valid_targets_mean": 2799.5, + "valid_targets_min": 432 + }, + { + "epoch": 3.7199999999999998, + "grad_norm": 0.6191793089703284, + "learning_rate": 1.5179628027056373e-05, + "loss": 0.3576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36515581607818604, + "step": 2325, + "valid_targets_mean": 2321.8, + "valid_targets_min": 312 + }, + { + "epoch": 3.7279999999999998, + "grad_norm": 0.7703510487146173, + "learning_rate": 1.5089340411769257e-05, + "loss": 0.3683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41395729780197144, + "step": 2330, + "valid_targets_mean": 1609.9, + "valid_targets_min": 252 + }, + { + "epoch": 3.7359999999999998, + "grad_norm": 0.6881396778475732, + "learning_rate": 1.499915916944236e-05, + "loss": 0.3493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39570489525794983, + "step": 2335, + "valid_targets_mean": 2141.2, + "valid_targets_min": 327 + }, + { + "epoch": 3.7439999999999998, + "grad_norm": 0.7674492979368044, + "learning_rate": 1.490908625354964e-05, + "loss": 0.3509, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3919585943222046, + "step": 2340, + "valid_targets_mean": 1661.6, + "valid_targets_min": 230 + }, + { + "epoch": 3.752, + "grad_norm": 0.6616850774148547, + "learning_rate": 1.4819123615218556e-05, + "loss": 0.3514, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3458954095840454, + "step": 2345, + "valid_targets_mean": 1910.0, + "valid_targets_min": 318 + }, + { + "epoch": 3.76, + "grad_norm": 0.5996831317937957, + "learning_rate": 1.472927320318775e-05, + "loss": 0.3545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.336120069026947, + "step": 2350, + "valid_targets_mean": 2165.2, + "valid_targets_min": 418 + }, + { + "epoch": 3.768, + "grad_norm": 0.6063824449090175, + "learning_rate": 1.4639536963764878e-05, + "loss": 0.3457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35988757014274597, + "step": 2355, + "valid_targets_mean": 2332.4, + "valid_targets_min": 455 + }, + { + "epoch": 3.776, + "grad_norm": 0.6250271547006242, + "learning_rate": 1.4549916840784409e-05, + "loss": 0.3731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3625832498073578, + "step": 2360, + "valid_targets_mean": 2205.9, + "valid_targets_min": 311 + }, + { + "epoch": 3.784, + "grad_norm": 0.6838399225206161, + "learning_rate": 1.4460414775565555e-05, + "loss": 0.3627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39127451181411743, + "step": 2365, + "valid_targets_mean": 2025.2, + "valid_targets_min": 349 + }, + { + "epoch": 3.792, + "grad_norm": 0.6594077109708212, + "learning_rate": 1.43710327068702e-05, + "loss": 0.3576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3650404214859009, + "step": 2370, + "valid_targets_mean": 2368.4, + "valid_targets_min": 498 + }, + { + "epoch": 3.8, + "grad_norm": 0.5616864427647533, + "learning_rate": 1.4281772570860897e-05, + "loss": 0.3645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34412187337875366, + "step": 2375, + "valid_targets_mean": 2745.6, + "valid_targets_min": 313 + }, + { + "epoch": 3.808, + "grad_norm": 0.6422542729169898, + "learning_rate": 1.4192636301058952e-05, + "loss": 0.3473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3570175766944885, + "step": 2380, + "valid_targets_mean": 2231.0, + "valid_targets_min": 553 + }, + { + "epoch": 3.816, + "grad_norm": 0.6982317651744271, + "learning_rate": 1.4103625828302508e-05, + "loss": 0.3528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3916783928871155, + "step": 2385, + "valid_targets_mean": 2175.6, + "valid_targets_min": 514 + }, + { + "epoch": 3.824, + "grad_norm": 0.6711011803974902, + "learning_rate": 1.4014743080704743e-05, + "loss": 0.3608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3191945552825928, + "step": 2390, + "valid_targets_mean": 1913.1, + "valid_targets_min": 336 + }, + { + "epoch": 3.832, + "grad_norm": 0.7754864491548238, + "learning_rate": 1.3925989983612118e-05, + "loss": 0.3845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37967538833618164, + "step": 2395, + "valid_targets_mean": 1446.5, + "valid_targets_min": 312 + }, + { + "epoch": 3.84, + "grad_norm": 0.6115713228293688, + "learning_rate": 1.383736845956261e-05, + "loss": 0.3555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32667064666748047, + "step": 2400, + "valid_targets_mean": 2120.1, + "valid_targets_min": 254 + }, + { + "epoch": 3.848, + "grad_norm": 0.6816884753996482, + "learning_rate": 1.3748880428244154e-05, + "loss": 0.3465, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3406139612197876, + "step": 2405, + "valid_targets_mean": 1800.9, + "valid_targets_min": 282 + }, + { + "epoch": 3.856, + "grad_norm": 0.6274592212225665, + "learning_rate": 1.3660527806452965e-05, + "loss": 0.3575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37814486026763916, + "step": 2410, + "valid_targets_mean": 2376.9, + "valid_targets_min": 466 + }, + { + "epoch": 3.864, + "grad_norm": 0.6653705629008166, + "learning_rate": 1.3572312508052118e-05, + "loss": 0.3702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35926705598831177, + "step": 2415, + "valid_targets_mean": 1921.9, + "valid_targets_min": 358 + }, + { + "epoch": 3.872, + "grad_norm": 0.619145516831352, + "learning_rate": 1.3484236443929982e-05, + "loss": 0.3702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38096240162849426, + "step": 2420, + "valid_targets_mean": 2302.6, + "valid_targets_min": 410 + }, + { + "epoch": 3.88, + "grad_norm": 1.039265841304078, + "learning_rate": 1.3396301521958926e-05, + "loss": 0.3605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30115431547164917, + "step": 2425, + "valid_targets_mean": 1910.0, + "valid_targets_min": 298 + }, + { + "epoch": 3.888, + "grad_norm": 0.7177434329809323, + "learning_rate": 1.3308509646953934e-05, + "loss": 0.3582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3660653233528137, + "step": 2430, + "valid_targets_mean": 1765.5, + "valid_targets_min": 298 + }, + { + "epoch": 3.896, + "grad_norm": 0.5917075213127174, + "learning_rate": 1.3220862720631349e-05, + "loss": 0.3359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35105088353157043, + "step": 2435, + "valid_targets_mean": 2611.8, + "valid_targets_min": 648 + }, + { + "epoch": 3.904, + "grad_norm": 0.6873495434368752, + "learning_rate": 1.3133362641567697e-05, + "loss": 0.3514, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35561615228652954, + "step": 2440, + "valid_targets_mean": 1834.1, + "valid_targets_min": 345 + }, + { + "epoch": 3.912, + "grad_norm": 0.7717485015233096, + "learning_rate": 1.3046011305158546e-05, + "loss": 0.3665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3796098530292511, + "step": 2445, + "valid_targets_mean": 1418.3, + "valid_targets_min": 276 + }, + { + "epoch": 3.92, + "grad_norm": 0.5966354817843722, + "learning_rate": 1.2958810603577456e-05, + "loss": 0.3493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36848214268684387, + "step": 2450, + "valid_targets_mean": 2418.7, + "valid_targets_min": 330 + }, + { + "epoch": 3.928, + "grad_norm": 0.6059163698362121, + "learning_rate": 1.2871762425734989e-05, + "loss": 0.3501, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3625923693180084, + "step": 2455, + "valid_targets_mean": 2740.3, + "valid_targets_min": 381 + }, + { + "epoch": 3.936, + "grad_norm": 0.6534398663639183, + "learning_rate": 1.278486865723779e-05, + "loss": 0.3446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3527751564979553, + "step": 2460, + "valid_targets_mean": 2065.2, + "valid_targets_min": 406 + }, + { + "epoch": 3.944, + "grad_norm": 0.5877602369092997, + "learning_rate": 1.269813118034775e-05, + "loss": 0.3609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34739208221435547, + "step": 2465, + "valid_targets_mean": 2381.4, + "valid_targets_min": 298 + }, + { + "epoch": 3.952, + "grad_norm": 1.0190651565090763, + "learning_rate": 1.2611551873941213e-05, + "loss": 0.3819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42016249895095825, + "step": 2470, + "valid_targets_mean": 1098.1, + "valid_targets_min": 384 + }, + { + "epoch": 3.96, + "grad_norm": 0.6180474714320067, + "learning_rate": 1.2525132613468309e-05, + "loss": 0.3662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3400590717792511, + "step": 2475, + "valid_targets_mean": 2681.2, + "valid_targets_min": 320 + }, + { + "epoch": 3.968, + "grad_norm": 0.5915847013969876, + "learning_rate": 1.2438875270912294e-05, + "loss": 0.367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33024099469184875, + "step": 2480, + "valid_targets_mean": 2419.4, + "valid_targets_min": 388 + }, + { + "epoch": 3.976, + "grad_norm": 0.6959888683464479, + "learning_rate": 1.2352781714749016e-05, + "loss": 0.3733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38543227314949036, + "step": 2485, + "valid_targets_mean": 2036.9, + "valid_targets_min": 374 + }, + { + "epoch": 3.984, + "grad_norm": 0.7111431039056506, + "learning_rate": 1.2266853809906469e-05, + "loss": 0.3761, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3813594877719879, + "step": 2490, + "valid_targets_mean": 1833.9, + "valid_targets_min": 394 + }, + { + "epoch": 3.992, + "grad_norm": 0.6519706550375763, + "learning_rate": 1.2181093417724317e-05, + "loss": 0.3614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3967099189758301, + "step": 2495, + "valid_targets_mean": 2376.8, + "valid_targets_min": 392 + }, + { + "epoch": 4.0, + "grad_norm": 0.7228414756396276, + "learning_rate": 1.2095502395913676e-05, + "loss": 0.3682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34773457050323486, + "step": 2500, + "valid_targets_mean": 1735.1, + "valid_targets_min": 424 + }, + { + "epoch": 4.008, + "grad_norm": 0.7286469450360182, + "learning_rate": 1.2010082598516775e-05, + "loss": 0.3443, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3937542736530304, + "step": 2505, + "valid_targets_mean": 1759.4, + "valid_targets_min": 265 + }, + { + "epoch": 4.016, + "grad_norm": 0.8304747598684544, + "learning_rate": 1.1924835875866884e-05, + "loss": 0.3492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3632838726043701, + "step": 2510, + "valid_targets_mean": 1408.3, + "valid_targets_min": 299 + }, + { + "epoch": 4.024, + "grad_norm": 0.7035863294078009, + "learning_rate": 1.1839764074548145e-05, + "loss": 0.3354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35498493909835815, + "step": 2515, + "valid_targets_mean": 1933.1, + "valid_targets_min": 278 + }, + { + "epoch": 4.032, + "grad_norm": 0.6307727293144214, + "learning_rate": 1.1754869037355659e-05, + "loss": 0.3504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3607982397079468, + "step": 2520, + "valid_targets_mean": 2463.2, + "valid_targets_min": 776 + }, + { + "epoch": 4.04, + "grad_norm": 0.8393961248560299, + "learning_rate": 1.1670152603255504e-05, + "loss": 0.3354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3465605080127716, + "step": 2525, + "valid_targets_mean": 1448.4, + "valid_targets_min": 300 + }, + { + "epoch": 4.048, + "grad_norm": 0.793260792414273, + "learning_rate": 1.1585616607344909e-05, + "loss": 0.3515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38471972942352295, + "step": 2530, + "valid_targets_mean": 1655.2, + "valid_targets_min": 343 + }, + { + "epoch": 4.056, + "grad_norm": 0.6902235662186285, + "learning_rate": 1.1501262880812547e-05, + "loss": 0.3456, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34612828493118286, + "step": 2535, + "valid_targets_mean": 1969.1, + "valid_targets_min": 296 + }, + { + "epoch": 4.064, + "grad_norm": 0.7069090444533567, + "learning_rate": 1.141709325089881e-05, + "loss": 0.3441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.355415940284729, + "step": 2540, + "valid_targets_mean": 1886.8, + "valid_targets_min": 430 + }, + { + "epoch": 4.072, + "grad_norm": 0.639839754182613, + "learning_rate": 1.1333109540856257e-05, + "loss": 0.3431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3217964172363281, + "step": 2545, + "valid_targets_mean": 2287.9, + "valid_targets_min": 278 + }, + { + "epoch": 4.08, + "grad_norm": 0.6214464640012719, + "learning_rate": 1.1249313569910143e-05, + "loss": 0.3556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3286832571029663, + "step": 2550, + "valid_targets_mean": 2444.1, + "valid_targets_min": 281 + }, + { + "epoch": 4.088, + "grad_norm": 0.6544866558309532, + "learning_rate": 1.1165707153218942e-05, + "loss": 0.3354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.332338809967041, + "step": 2555, + "valid_targets_mean": 2312.6, + "valid_targets_min": 293 + }, + { + "epoch": 4.096, + "grad_norm": 0.6132945878233904, + "learning_rate": 1.1082292101835121e-05, + "loss": 0.3345, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2943424880504608, + "step": 2560, + "valid_targets_mean": 2460.2, + "valid_targets_min": 325 + }, + { + "epoch": 4.104, + "grad_norm": 0.7027991125110398, + "learning_rate": 1.099907022266582e-05, + "loss": 0.341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32788655161857605, + "step": 2565, + "valid_targets_mean": 1762.0, + "valid_targets_min": 263 + }, + { + "epoch": 4.112, + "grad_norm": 0.7415931434725705, + "learning_rate": 1.0916043318433767e-05, + "loss": 0.3408, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3493083119392395, + "step": 2570, + "valid_targets_mean": 1763.7, + "valid_targets_min": 292 + }, + { + "epoch": 4.12, + "grad_norm": 0.6741258090371485, + "learning_rate": 1.0833213187638203e-05, + "loss": 0.3453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3227207660675049, + "step": 2575, + "valid_targets_mean": 2098.1, + "valid_targets_min": 453 + }, + { + "epoch": 4.128, + "grad_norm": 0.8441947070410107, + "learning_rate": 1.0750581624515957e-05, + "loss": 0.3487, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37915170192718506, + "step": 2580, + "valid_targets_mean": 1423.5, + "valid_targets_min": 314 + }, + { + "epoch": 4.136, + "grad_norm": 0.5804235398456956, + "learning_rate": 1.0668150419002527e-05, + "loss": 0.3458, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3130751848220825, + "step": 2585, + "valid_targets_mean": 2746.5, + "valid_targets_min": 639 + }, + { + "epoch": 4.144, + "grad_norm": 0.6639882059386523, + "learning_rate": 1.0585921356693349e-05, + "loss": 0.3389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30582067370414734, + "step": 2590, + "valid_targets_mean": 2102.9, + "valid_targets_min": 326 + }, + { + "epoch": 4.152, + "grad_norm": 0.7928164946481391, + "learning_rate": 1.0503896218805112e-05, + "loss": 0.332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34261876344680786, + "step": 2595, + "valid_targets_mean": 2166.9, + "valid_targets_min": 237 + }, + { + "epoch": 4.16, + "grad_norm": 0.6944837315899954, + "learning_rate": 1.0422076782137155e-05, + "loss": 0.3368, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3524402976036072, + "step": 2600, + "valid_targets_mean": 2031.3, + "valid_targets_min": 328 + }, + { + "epoch": 4.168, + "grad_norm": 0.952218468273065, + "learning_rate": 1.0340464819032991e-05, + "loss": 0.335, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3416384756565094, + "step": 2605, + "valid_targets_mean": 1622.0, + "valid_targets_min": 309 + }, + { + "epoch": 4.176, + "grad_norm": 0.6300379469720988, + "learning_rate": 1.0259062097341911e-05, + "loss": 0.323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28312206268310547, + "step": 2610, + "valid_targets_mean": 2637.6, + "valid_targets_min": 417 + }, + { + "epoch": 4.184, + "grad_norm": 0.5573679849228219, + "learning_rate": 1.017787038038071e-05, + "loss": 0.3483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31698471307754517, + "step": 2615, + "valid_targets_mean": 2895.9, + "valid_targets_min": 272 + }, + { + "epoch": 4.192, + "grad_norm": 0.7066082481106857, + "learning_rate": 1.0096891426895476e-05, + "loss": 0.3371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.314532071352005, + "step": 2620, + "valid_targets_mean": 1845.6, + "valid_targets_min": 420 + }, + { + "epoch": 4.2, + "grad_norm": 0.7884744051747153, + "learning_rate": 1.0016126991023447e-05, + "loss": 0.34, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34831756353378296, + "step": 2625, + "valid_targets_mean": 1734.6, + "valid_targets_min": 293 + }, + { + "epoch": 4.208, + "grad_norm": 0.752357813315544, + "learning_rate": 9.935578822255113e-06, + "loss": 0.3559, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3773335814476013, + "step": 2630, + "valid_targets_mean": 1848.9, + "valid_targets_min": 410 + }, + { + "epoch": 4.216, + "grad_norm": 0.6660466309862689, + "learning_rate": 9.855248665396218e-06, + "loss": 0.3335, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34157776832580566, + "step": 2635, + "valid_targets_mean": 2457.2, + "valid_targets_min": 552 + }, + { + "epoch": 4.224, + "grad_norm": 0.7768388500955553, + "learning_rate": 9.775138260530046e-06, + "loss": 0.3349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3923647999763489, + "step": 2640, + "valid_targets_mean": 1858.2, + "valid_targets_min": 256 + }, + { + "epoch": 4.232, + "grad_norm": 0.7538268754577202, + "learning_rate": 9.695249342979667e-06, + "loss": 0.3415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31775760650634766, + "step": 2645, + "valid_targets_mean": 1545.6, + "valid_targets_min": 248 + }, + { + "epoch": 4.24, + "grad_norm": 0.6731102254536624, + "learning_rate": 9.615583643270371e-06, + "loss": 0.3476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3384263217449188, + "step": 2650, + "valid_targets_mean": 2115.8, + "valid_targets_min": 299 + }, + { + "epoch": 4.248, + "grad_norm": 0.744536684782561, + "learning_rate": 9.536142887092208e-06, + "loss": 0.329, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36594611406326294, + "step": 2655, + "valid_targets_mean": 2009.8, + "valid_targets_min": 389 + }, + { + "epoch": 4.256, + "grad_norm": 0.7266051622348179, + "learning_rate": 9.456928795262552e-06, + "loss": 0.3385, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31755226850509644, + "step": 2660, + "valid_targets_mean": 1801.4, + "valid_targets_min": 254 + }, + { + "epoch": 4.264, + "grad_norm": 0.6044550213908365, + "learning_rate": 9.377943083688873e-06, + "loss": 0.3261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.273038387298584, + "step": 2665, + "valid_targets_mean": 2034.6, + "valid_targets_min": 345 + }, + { + "epoch": 4.272, + "grad_norm": 0.6986297075905579, + "learning_rate": 9.29918746333153e-06, + "loss": 0.3337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35008662939071655, + "step": 2670, + "valid_targets_mean": 1931.9, + "valid_targets_min": 259 + }, + { + "epoch": 4.28, + "grad_norm": 0.6132289081293004, + "learning_rate": 9.220663640166756e-06, + "loss": 0.3504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3231913447380066, + "step": 2675, + "valid_targets_mean": 2267.9, + "valid_targets_min": 432 + }, + { + "epoch": 4.288, + "grad_norm": 0.7343616664045941, + "learning_rate": 9.142373315149655e-06, + "loss": 0.35, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3550803065299988, + "step": 2680, + "valid_targets_mean": 1771.3, + "valid_targets_min": 256 + }, + { + "epoch": 4.296, + "grad_norm": 0.7359371826700569, + "learning_rate": 9.064318184177373e-06, + "loss": 0.3506, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34089556336402893, + "step": 2685, + "valid_targets_mean": 1857.1, + "valid_targets_min": 412 + }, + { + "epoch": 4.304, + "grad_norm": 0.5878830118582525, + "learning_rate": 8.986499938052396e-06, + "loss": 0.3316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2948848605155945, + "step": 2690, + "valid_targets_mean": 2368.9, + "valid_targets_min": 314 + }, + { + "epoch": 4.312, + "grad_norm": 0.7765143196874953, + "learning_rate": 8.908920262445859e-06, + "loss": 0.3351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3355043828487396, + "step": 2695, + "valid_targets_mean": 1749.8, + "valid_targets_min": 271 + }, + { + "epoch": 4.32, + "grad_norm": 0.7582658266269858, + "learning_rate": 8.831580837861082e-06, + "loss": 0.3321, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32356560230255127, + "step": 2700, + "valid_targets_mean": 1642.6, + "valid_targets_min": 398 + }, + { + "epoch": 4.328, + "grad_norm": 0.7710195340666637, + "learning_rate": 8.754483339597166e-06, + "loss": 0.3447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.360440194606781, + "step": 2705, + "valid_targets_mean": 2261.8, + "valid_targets_min": 335 + }, + { + "epoch": 4.336, + "grad_norm": 0.8342762582487903, + "learning_rate": 8.677629437712665e-06, + "loss": 0.3519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32881706953048706, + "step": 2710, + "valid_targets_mean": 1501.9, + "valid_targets_min": 276 + }, + { + "epoch": 4.344, + "grad_norm": 0.6279159244745763, + "learning_rate": 8.601020796989467e-06, + "loss": 0.329, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3248441517353058, + "step": 2715, + "valid_targets_mean": 2789.1, + "valid_targets_min": 462 + }, + { + "epoch": 4.352, + "grad_norm": 0.5719966709561647, + "learning_rate": 8.524659076896656e-06, + "loss": 0.3383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32366693019866943, + "step": 2720, + "valid_targets_mean": 2923.1, + "valid_targets_min": 896 + }, + { + "epoch": 4.36, + "grad_norm": 0.6977283663303482, + "learning_rate": 8.448545931554652e-06, + "loss": 0.3359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33601656556129456, + "step": 2725, + "valid_targets_mean": 2315.9, + "valid_targets_min": 432 + }, + { + "epoch": 4.368, + "grad_norm": 0.7069369620627451, + "learning_rate": 8.372683009699307e-06, + "loss": 0.3335, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33306482434272766, + "step": 2730, + "valid_targets_mean": 1992.3, + "valid_targets_min": 394 + }, + { + "epoch": 4.376, + "grad_norm": 0.5742499188652059, + "learning_rate": 8.297071954646248e-06, + "loss": 0.3535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3281279504299164, + "step": 2735, + "valid_targets_mean": 2812.5, + "valid_targets_min": 348 + }, + { + "epoch": 4.384, + "grad_norm": 0.6098399152492764, + "learning_rate": 8.22171440425523e-06, + "loss": 0.3327, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31789901852607727, + "step": 2740, + "valid_targets_mean": 2494.6, + "valid_targets_min": 314 + }, + { + "epoch": 4.392, + "grad_norm": 0.7915304432145721, + "learning_rate": 8.146611990894683e-06, + "loss": 0.337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31900548934936523, + "step": 2745, + "valid_targets_mean": 1699.1, + "valid_targets_min": 347 + }, + { + "epoch": 4.4, + "grad_norm": 0.7601195994001059, + "learning_rate": 8.071766341406363e-06, + "loss": 0.357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34867095947265625, + "step": 2750, + "valid_targets_mean": 1685.8, + "valid_targets_min": 299 + }, + { + "epoch": 4.408, + "grad_norm": 0.8669948754659953, + "learning_rate": 7.997179077070092e-06, + "loss": 0.3297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3693384826183319, + "step": 2755, + "valid_targets_mean": 1537.1, + "valid_targets_min": 304 + }, + { + "epoch": 4.416, + "grad_norm": 0.6164068574777449, + "learning_rate": 7.92285181356864e-06, + "loss": 0.3463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30480194091796875, + "step": 2760, + "valid_targets_mean": 2245.4, + "valid_targets_min": 272 + }, + { + "epoch": 4.424, + "grad_norm": 0.6708366536968551, + "learning_rate": 7.848786160952726e-06, + "loss": 0.337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3752930164337158, + "step": 2765, + "valid_targets_mean": 2273.2, + "valid_targets_min": 324 + }, + { + "epoch": 4.432, + "grad_norm": 0.6476967033911869, + "learning_rate": 7.77498372360617e-06, + "loss": 0.3269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32428058981895447, + "step": 2770, + "valid_targets_mean": 2302.4, + "valid_targets_min": 351 + }, + { + "epoch": 4.44, + "grad_norm": 0.6817200102614068, + "learning_rate": 7.701446100211095e-06, + "loss": 0.3389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3381454348564148, + "step": 2775, + "valid_targets_mean": 2117.3, + "valid_targets_min": 301 + }, + { + "epoch": 4.448, + "grad_norm": 0.6936215026940229, + "learning_rate": 7.628174883713322e-06, + "loss": 0.3373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33592289686203003, + "step": 2780, + "valid_targets_mean": 2315.6, + "valid_targets_min": 335 + }, + { + "epoch": 4.456, + "grad_norm": 0.7769861722197998, + "learning_rate": 7.555171661287875e-06, + "loss": 0.3407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3716652989387512, + "step": 2785, + "valid_targets_mean": 1949.7, + "valid_targets_min": 266 + }, + { + "epoch": 4.464, + "grad_norm": 0.6021284909057042, + "learning_rate": 7.482438014304567e-06, + "loss": 0.3449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3120408058166504, + "step": 2790, + "valid_targets_mean": 2475.6, + "valid_targets_min": 420 + }, + { + "epoch": 4.4719999999999995, + "grad_norm": 0.5814826266496147, + "learning_rate": 7.4099755182937685e-06, + "loss": 0.3215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31242480874061584, + "step": 2795, + "valid_targets_mean": 2622.8, + "valid_targets_min": 513 + }, + { + "epoch": 4.48, + "grad_norm": 0.6469685216112735, + "learning_rate": 7.337785742912289e-06, + "loss": 0.3406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3127382695674896, + "step": 2800, + "valid_targets_mean": 2113.1, + "valid_targets_min": 298 + }, + { + "epoch": 4.4879999999999995, + "grad_norm": 0.7648541150536319, + "learning_rate": 7.265870251909335e-06, + "loss": 0.34, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3590547442436218, + "step": 2805, + "valid_targets_mean": 1728.8, + "valid_targets_min": 277 + }, + { + "epoch": 4.496, + "grad_norm": 0.8303349115720151, + "learning_rate": 7.194230603092697e-06, + "loss": 0.3363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3518087863922119, + "step": 2810, + "valid_targets_mean": 1715.7, + "valid_targets_min": 272 + }, + { + "epoch": 4.504, + "grad_norm": 0.6736659378057248, + "learning_rate": 7.122868348294927e-06, + "loss": 0.3489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3054027557373047, + "step": 2815, + "valid_targets_mean": 2124.8, + "valid_targets_min": 344 + }, + { + "epoch": 4.5120000000000005, + "grad_norm": 0.745289098779392, + "learning_rate": 7.051785033339804e-06, + "loss": 0.3667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3639134168624878, + "step": 2820, + "valid_targets_mean": 1994.8, + "valid_targets_min": 396 + }, + { + "epoch": 4.52, + "grad_norm": 0.6158853661814754, + "learning_rate": 6.980982198008785e-06, + "loss": 0.3234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3225085437297821, + "step": 2825, + "valid_targets_mean": 2275.7, + "valid_targets_min": 241 + }, + { + "epoch": 4.5280000000000005, + "grad_norm": 0.7616317562310106, + "learning_rate": 6.910461376007704e-06, + "loss": 0.3502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3805854916572571, + "step": 2830, + "valid_targets_mean": 2065.6, + "valid_targets_min": 251 + }, + { + "epoch": 4.536, + "grad_norm": 0.7988989277100885, + "learning_rate": 6.840224094933501e-06, + "loss": 0.337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3373550474643707, + "step": 2835, + "valid_targets_mean": 1581.1, + "valid_targets_min": 334 + }, + { + "epoch": 4.5440000000000005, + "grad_norm": 0.6303866936819127, + "learning_rate": 6.7702718762411505e-06, + "loss": 0.367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3261539340019226, + "step": 2840, + "valid_targets_mean": 2142.6, + "valid_targets_min": 318 + }, + { + "epoch": 4.552, + "grad_norm": 0.8081693015537023, + "learning_rate": 6.700606235210731e-06, + "loss": 0.3323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36277416348457336, + "step": 2845, + "valid_targets_mean": 1539.9, + "valid_targets_min": 407 + }, + { + "epoch": 4.5600000000000005, + "grad_norm": 0.7410154168191748, + "learning_rate": 6.631228680914558e-06, + "loss": 0.3447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34555795788764954, + "step": 2850, + "valid_targets_mean": 1858.9, + "valid_targets_min": 291 + }, + { + "epoch": 4.568, + "grad_norm": 0.720084820230822, + "learning_rate": 6.562140716184515e-06, + "loss": 0.3384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3737310767173767, + "step": 2855, + "valid_targets_mean": 2088.8, + "valid_targets_min": 278 + }, + { + "epoch": 4.576, + "grad_norm": 0.7346833698454145, + "learning_rate": 6.493343837579511e-06, + "loss": 0.3455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36689460277557373, + "step": 2860, + "valid_targets_mean": 1973.1, + "valid_targets_min": 305 + }, + { + "epoch": 4.584, + "grad_norm": 0.6779156780136647, + "learning_rate": 6.424839535353045e-06, + "loss": 0.3326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31229498982429504, + "step": 2865, + "valid_targets_mean": 1897.9, + "valid_targets_min": 377 + }, + { + "epoch": 4.592, + "grad_norm": 0.6429465962365933, + "learning_rate": 6.356629293420926e-06, + "loss": 0.3303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3346661627292633, + "step": 2870, + "valid_targets_mean": 2352.9, + "valid_targets_min": 334 + }, + { + "epoch": 4.6, + "grad_norm": 0.7283346631415872, + "learning_rate": 6.28871458932913e-06, + "loss": 0.3412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3508552014827728, + "step": 2875, + "valid_targets_mean": 2044.8, + "valid_targets_min": 368 + }, + { + "epoch": 4.608, + "grad_norm": 0.7576839986877464, + "learning_rate": 6.2210968942218206e-06, + "loss": 0.3349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33264338970184326, + "step": 2880, + "valid_targets_mean": 1746.8, + "valid_targets_min": 323 + }, + { + "epoch": 4.616, + "grad_norm": 0.7688337603614162, + "learning_rate": 6.153777672809438e-06, + "loss": 0.3407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38428616523742676, + "step": 2885, + "valid_targets_mean": 1919.3, + "valid_targets_min": 267 + }, + { + "epoch": 4.624, + "grad_norm": 0.7414416548490468, + "learning_rate": 6.086758383336984e-06, + "loss": 0.3434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3412899374961853, + "step": 2890, + "valid_targets_mean": 2032.7, + "valid_targets_min": 374 + }, + { + "epoch": 4.632, + "grad_norm": 0.6462128784115221, + "learning_rate": 6.0200404775524715e-06, + "loss": 0.3379, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31704598665237427, + "step": 2895, + "valid_targets_mean": 2154.6, + "valid_targets_min": 263 + }, + { + "epoch": 4.64, + "grad_norm": 0.7360062660104016, + "learning_rate": 5.9536254006754155e-06, + "loss": 0.3272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3256540298461914, + "step": 2900, + "valid_targets_mean": 1833.4, + "valid_targets_min": 312 + }, + { + "epoch": 4.648, + "grad_norm": 0.7158972606975016, + "learning_rate": 5.887514591365593e-06, + "loss": 0.3221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3094181418418884, + "step": 2905, + "valid_targets_mean": 1872.8, + "valid_targets_min": 377 + }, + { + "epoch": 4.656, + "grad_norm": 0.709391212865319, + "learning_rate": 5.821709481691798e-06, + "loss": 0.3408, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3378103971481323, + "step": 2910, + "valid_targets_mean": 2036.4, + "valid_targets_min": 463 + }, + { + "epoch": 4.664, + "grad_norm": 0.6871157747889591, + "learning_rate": 5.75621149710091e-06, + "loss": 0.3398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3434550166130066, + "step": 2915, + "valid_targets_mean": 2244.7, + "valid_targets_min": 634 + }, + { + "epoch": 4.672, + "grad_norm": 0.6331228522669606, + "learning_rate": 5.691022056386961e-06, + "loss": 0.3142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29856497049331665, + "step": 2920, + "valid_targets_mean": 2137.9, + "valid_targets_min": 317 + }, + { + "epoch": 4.68, + "grad_norm": 0.6347565330283759, + "learning_rate": 5.6261425716604136e-06, + "loss": 0.3279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3437310457229614, + "step": 2925, + "valid_targets_mean": 2575.1, + "valid_targets_min": 324 + }, + { + "epoch": 4.688, + "grad_norm": 0.6799231986824172, + "learning_rate": 5.56157444831757e-06, + "loss": 0.3434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3541477918624878, + "step": 2930, + "valid_targets_mean": 2470.2, + "valid_targets_min": 234 + }, + { + "epoch": 4.696, + "grad_norm": 0.7837577906764602, + "learning_rate": 5.4973190850101334e-06, + "loss": 0.3393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3557363748550415, + "step": 2935, + "valid_targets_mean": 1714.9, + "valid_targets_min": 312 + }, + { + "epoch": 4.704, + "grad_norm": 0.7808964571892477, + "learning_rate": 5.433377873614925e-06, + "loss": 0.3554, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34845733642578125, + "step": 2940, + "valid_targets_mean": 1640.8, + "valid_targets_min": 392 + }, + { + "epoch": 4.712, + "grad_norm": 0.7463159877230825, + "learning_rate": 5.369752199203702e-06, + "loss": 0.3521, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3448995053768158, + "step": 2945, + "valid_targets_mean": 1826.3, + "valid_targets_min": 306 + }, + { + "epoch": 4.72, + "grad_norm": 0.7239910602208591, + "learning_rate": 5.306443440013171e-06, + "loss": 0.338, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3213045001029968, + "step": 2950, + "valid_targets_mean": 2073.1, + "valid_targets_min": 279 + }, + { + "epoch": 4.728, + "grad_norm": 0.697542846280569, + "learning_rate": 5.243452967415155e-06, + "loss": 0.3398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.368432879447937, + "step": 2955, + "valid_targets_mean": 2343.1, + "valid_targets_min": 340 + }, + { + "epoch": 4.736, + "grad_norm": 0.6726585942037008, + "learning_rate": 5.180782145886846e-06, + "loss": 0.3363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32481229305267334, + "step": 2960, + "valid_targets_mean": 2228.3, + "valid_targets_min": 319 + }, + { + "epoch": 4.744, + "grad_norm": 0.6841328015946316, + "learning_rate": 5.118432332981273e-06, + "loss": 0.3177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34120649099349976, + "step": 2965, + "valid_targets_mean": 1947.8, + "valid_targets_min": 294 + }, + { + "epoch": 4.752, + "grad_norm": 0.7312881604013325, + "learning_rate": 5.056404879297887e-06, + "loss": 0.334, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3170173466205597, + "step": 2970, + "valid_targets_mean": 1949.8, + "valid_targets_min": 405 + }, + { + "epoch": 4.76, + "grad_norm": 0.7224196884926505, + "learning_rate": 4.994701128453325e-06, + "loss": 0.3332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3099290728569031, + "step": 2975, + "valid_targets_mean": 2041.4, + "valid_targets_min": 343 + }, + { + "epoch": 4.768, + "grad_norm": 0.6589153983091567, + "learning_rate": 4.933322417052269e-06, + "loss": 0.313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2728843688964844, + "step": 2980, + "valid_targets_mean": 2069.7, + "valid_targets_min": 305 + }, + { + "epoch": 4.776, + "grad_norm": 0.769835811513428, + "learning_rate": 4.8722700746585135e-06, + "loss": 0.3583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3480873703956604, + "step": 2985, + "valid_targets_mean": 1801.6, + "valid_targets_min": 322 + }, + { + "epoch": 4.784, + "grad_norm": 0.7192175550424258, + "learning_rate": 4.811545423766184e-06, + "loss": 0.3498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35846394300460815, + "step": 2990, + "valid_targets_mean": 2009.8, + "valid_targets_min": 294 + }, + { + "epoch": 4.792, + "grad_norm": 0.6047543622315156, + "learning_rate": 4.75114977977104e-06, + "loss": 0.3399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33460450172424316, + "step": 2995, + "valid_targets_mean": 2401.3, + "valid_targets_min": 289 + }, + { + "epoch": 4.8, + "grad_norm": 0.6041136581551483, + "learning_rate": 4.691084450942047e-06, + "loss": 0.3341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2866661548614502, + "step": 3000, + "valid_targets_mean": 2317.4, + "valid_targets_min": 335 + }, + { + "epoch": 4.808, + "grad_norm": 0.57423011099845, + "learning_rate": 4.631350738392955e-06, + "loss": 0.3357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3241468667984009, + "step": 3005, + "valid_targets_mean": 2535.6, + "valid_targets_min": 287 + }, + { + "epoch": 4.816, + "grad_norm": 0.8874875694801316, + "learning_rate": 4.571949936054197e-06, + "loss": 0.3496, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3535746932029724, + "step": 3010, + "valid_targets_mean": 1403.2, + "valid_targets_min": 308 + }, + { + "epoch": 4.824, + "grad_norm": 0.7524222098226543, + "learning_rate": 4.512883330644815e-06, + "loss": 0.3405, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.342057466506958, + "step": 3015, + "valid_targets_mean": 1874.9, + "valid_targets_min": 381 + }, + { + "epoch": 4.832, + "grad_norm": 0.655589657985156, + "learning_rate": 4.454152201644591e-06, + "loss": 0.3339, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32880115509033203, + "step": 3020, + "valid_targets_mean": 2234.2, + "valid_targets_min": 297 + }, + { + "epoch": 4.84, + "grad_norm": 0.6461768668422677, + "learning_rate": 4.395757821266333e-06, + "loss": 0.3356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32608258724212646, + "step": 3025, + "valid_targets_mean": 2179.1, + "valid_targets_min": 462 + }, + { + "epoch": 4.848, + "grad_norm": 0.6649863833814916, + "learning_rate": 4.337701454428322e-06, + "loss": 0.3451, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3342806398868561, + "step": 3030, + "valid_targets_mean": 2163.1, + "valid_targets_min": 382 + }, + { + "epoch": 4.856, + "grad_norm": 0.6098198423480345, + "learning_rate": 4.279984358726925e-06, + "loss": 0.3104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3045882284641266, + "step": 3035, + "valid_targets_mean": 2482.1, + "valid_targets_min": 323 + }, + { + "epoch": 4.864, + "grad_norm": 0.6783471430711661, + "learning_rate": 4.2226077844093205e-06, + "loss": 0.325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3344470262527466, + "step": 3040, + "valid_targets_mean": 2026.6, + "valid_targets_min": 328 + }, + { + "epoch": 4.872, + "grad_norm": 0.7271051798429343, + "learning_rate": 4.165572974346435e-06, + "loss": 0.3406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3305470049381256, + "step": 3045, + "valid_targets_mean": 1882.9, + "valid_targets_min": 440 + }, + { + "epoch": 4.88, + "grad_norm": 0.7064278926259979, + "learning_rate": 4.108881164006033e-06, + "loss": 0.3355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33677276968955994, + "step": 3050, + "valid_targets_mean": 2011.0, + "valid_targets_min": 298 + }, + { + "epoch": 4.888, + "grad_norm": 0.7670687816988632, + "learning_rate": 4.05253358142593e-06, + "loss": 0.3505, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38650834560394287, + "step": 3055, + "valid_targets_mean": 1712.5, + "valid_targets_min": 224 + }, + { + "epoch": 4.896, + "grad_norm": 0.823254864302537, + "learning_rate": 3.9965314471874035e-06, + "loss": 0.3437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3724706768989563, + "step": 3060, + "valid_targets_mean": 1539.1, + "valid_targets_min": 250 + }, + { + "epoch": 4.904, + "grad_norm": 0.7622151752672579, + "learning_rate": 3.940875974388749e-06, + "loss": 0.3517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3494607210159302, + "step": 3065, + "valid_targets_mean": 1794.1, + "valid_targets_min": 285 + }, + { + "epoch": 4.912, + "grad_norm": 0.7616459647662795, + "learning_rate": 3.885568368619013e-06, + "loss": 0.3474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36004048585891724, + "step": 3070, + "valid_targets_mean": 1942.1, + "valid_targets_min": 259 + }, + { + "epoch": 4.92, + "grad_norm": 0.7405269574107914, + "learning_rate": 3.830609827931877e-06, + "loss": 0.346, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33390575647354126, + "step": 3075, + "valid_targets_mean": 1956.4, + "valid_targets_min": 365 + }, + { + "epoch": 4.928, + "grad_norm": 0.629174699341205, + "learning_rate": 3.7760015428196694e-06, + "loss": 0.3382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33517584204673767, + "step": 3080, + "valid_targets_mean": 2459.3, + "valid_targets_min": 304 + }, + { + "epoch": 4.936, + "grad_norm": 0.7612521654496467, + "learning_rate": 3.7217446961876413e-06, + "loss": 0.3418, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3390304446220398, + "step": 3085, + "valid_targets_mean": 1742.9, + "valid_targets_min": 428 + }, + { + "epoch": 4.944, + "grad_norm": 0.8311235874235049, + "learning_rate": 3.6678404633282826e-06, + "loss": 0.3384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35949212312698364, + "step": 3090, + "valid_targets_mean": 1668.8, + "valid_targets_min": 306 + }, + { + "epoch": 4.952, + "grad_norm": 0.6704197389248887, + "learning_rate": 3.6142900118959158e-06, + "loss": 0.3463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3350984454154968, + "step": 3095, + "valid_targets_mean": 2150.2, + "valid_targets_min": 479 + }, + { + "epoch": 4.96, + "grad_norm": 0.6088463951252256, + "learning_rate": 3.561094501881339e-06, + "loss": 0.3249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3589388430118561, + "step": 3100, + "valid_targets_mean": 2522.9, + "valid_targets_min": 307 + }, + { + "epoch": 4.968, + "grad_norm": 0.886349784805225, + "learning_rate": 3.5082550855867693e-06, + "loss": 0.3571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3680537939071655, + "step": 3105, + "valid_targets_mean": 1367.1, + "valid_targets_min": 295 + }, + { + "epoch": 4.976, + "grad_norm": 0.7571526497670212, + "learning_rate": 3.455772907600841e-06, + "loss": 0.3519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.314603716135025, + "step": 3110, + "valid_targets_mean": 1763.2, + "valid_targets_min": 370 + }, + { + "epoch": 4.984, + "grad_norm": 0.6678147975519791, + "learning_rate": 3.4036491047738075e-06, + "loss": 0.3298, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3356750011444092, + "step": 3115, + "valid_targets_mean": 2106.9, + "valid_targets_min": 260 + }, + { + "epoch": 4.992, + "grad_norm": 0.7096651863010865, + "learning_rate": 3.351884806192933e-06, + "loss": 0.3398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3487074673175812, + "step": 3120, + "valid_targets_mean": 1872.3, + "valid_targets_min": 282 + }, + { + "epoch": 5.0, + "grad_norm": 0.801434104766138, + "learning_rate": 3.3004811331580268e-06, + "loss": 0.3541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3651238679885864, + "step": 3125, + "valid_targets_mean": 1574.1, + "valid_targets_min": 271 + }, + { + "epoch": 5.008, + "grad_norm": 0.6168166311695106, + "learning_rate": 3.249439199157167e-06, + "loss": 0.3318, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.316333532333374, + "step": 3130, + "valid_targets_mean": 2457.4, + "valid_targets_min": 479 + }, + { + "epoch": 5.016, + "grad_norm": 0.8095895502737311, + "learning_rate": 3.198760109842558e-06, + "loss": 0.3418, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3591688275337219, + "step": 3135, + "valid_targets_mean": 1600.9, + "valid_targets_min": 283 + }, + { + "epoch": 5.024, + "grad_norm": 0.7458829862809224, + "learning_rate": 3.1484449630065894e-06, + "loss": 0.3523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37618887424468994, + "step": 3140, + "valid_targets_mean": 1904.2, + "valid_targets_min": 323 + }, + { + "epoch": 5.032, + "grad_norm": 0.7176506812423299, + "learning_rate": 3.0984948485580736e-06, + "loss": 0.3208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3061801493167877, + "step": 3145, + "valid_targets_mean": 2161.0, + "valid_targets_min": 261 + }, + { + "epoch": 5.04, + "grad_norm": 0.7807096768279923, + "learning_rate": 3.048910848498605e-06, + "loss": 0.3195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32322657108306885, + "step": 3150, + "valid_targets_mean": 1788.6, + "valid_targets_min": 374 + }, + { + "epoch": 5.048, + "grad_norm": 0.6540428821985736, + "learning_rate": 2.9996940368991477e-06, + "loss": 0.3182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30878007411956787, + "step": 3155, + "valid_targets_mean": 2372.5, + "valid_targets_min": 635 + }, + { + "epoch": 5.056, + "grad_norm": 0.7062110572059693, + "learning_rate": 2.9508454798767516e-06, + "loss": 0.335, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31720206141471863, + "step": 3160, + "valid_targets_mean": 2043.1, + "valid_targets_min": 305 + }, + { + "epoch": 5.064, + "grad_norm": 0.6884128837476633, + "learning_rate": 2.9023662355714766e-06, + "loss": 0.322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31560999155044556, + "step": 3165, + "valid_targets_mean": 2201.7, + "valid_targets_min": 289 + }, + { + "epoch": 5.072, + "grad_norm": 0.9513676887383257, + "learning_rate": 2.8542573541234707e-06, + "loss": 0.3494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34736162424087524, + "step": 3170, + "valid_targets_mean": 1400.4, + "valid_targets_min": 333 + }, + { + "epoch": 5.08, + "grad_norm": 0.7583194020113082, + "learning_rate": 2.80651987765018e-06, + "loss": 0.3435, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3454587459564209, + "step": 3175, + "valid_targets_mean": 1794.6, + "valid_targets_min": 330 + }, + { + "epoch": 5.088, + "grad_norm": 0.7751252415284927, + "learning_rate": 2.759154840223843e-06, + "loss": 0.318, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32245928049087524, + "step": 3180, + "valid_targets_mean": 1767.7, + "valid_targets_min": 274 + }, + { + "epoch": 5.096, + "grad_norm": 0.678029128738721, + "learning_rate": 2.7121632678490327e-06, + "loss": 0.3382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3460327386856079, + "step": 3185, + "valid_targets_mean": 2288.6, + "valid_targets_min": 234 + }, + { + "epoch": 5.104, + "grad_norm": 0.8230802127344796, + "learning_rate": 2.6655461784404768e-06, + "loss": 0.325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2984952926635742, + "step": 3190, + "valid_targets_mean": 1525.9, + "valid_targets_min": 378 + }, + { + "epoch": 5.112, + "grad_norm": 0.6260791419331155, + "learning_rate": 2.6193045818009654e-06, + "loss": 0.3231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33746832609176636, + "step": 3195, + "valid_targets_mean": 2805.3, + "valid_targets_min": 809 + }, + { + "epoch": 5.12, + "grad_norm": 0.7745724962578587, + "learning_rate": 2.5734394795995066e-06, + "loss": 0.3169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29701170325279236, + "step": 3200, + "valid_targets_mean": 1686.8, + "valid_targets_min": 354 + }, + { + "epoch": 5.128, + "grad_norm": 0.764373687743754, + "learning_rate": 2.5279518653496272e-06, + "loss": 0.3427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3417860269546509, + "step": 3205, + "valid_targets_mean": 1898.5, + "valid_targets_min": 342 + }, + { + "epoch": 5.136, + "grad_norm": 0.5913178857112821, + "learning_rate": 2.4828427243878307e-06, + "loss": 0.3258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3062329888343811, + "step": 3210, + "valid_targets_mean": 2879.6, + "valid_targets_min": 323 + }, + { + "epoch": 5.144, + "grad_norm": 0.7000971424716644, + "learning_rate": 2.4381130338522762e-06, + "loss": 0.3112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3380877375602722, + "step": 3215, + "valid_targets_mean": 2145.3, + "valid_targets_min": 257 + }, + { + "epoch": 5.152, + "grad_norm": 0.8785549490261971, + "learning_rate": 2.393763762661596e-06, + "loss": 0.3527, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3691244125366211, + "step": 3220, + "valid_targets_mean": 1531.1, + "valid_targets_min": 272 + }, + { + "epoch": 5.16, + "grad_norm": 0.6881987970742812, + "learning_rate": 2.349795871493925e-06, + "loss": 0.313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.345796674489975, + "step": 3225, + "valid_targets_mean": 2233.5, + "valid_targets_min": 222 + }, + { + "epoch": 5.168, + "grad_norm": 0.7440524019380678, + "learning_rate": 2.3062103127660686e-06, + "loss": 0.343, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3765197992324829, + "step": 3230, + "valid_targets_mean": 1903.1, + "valid_targets_min": 340 + }, + { + "epoch": 5.176, + "grad_norm": 0.7116226874145382, + "learning_rate": 2.2630080306128833e-06, + "loss": 0.3361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3360140323638916, + "step": 3235, + "valid_targets_mean": 2187.1, + "valid_targets_min": 322 + }, + { + "epoch": 5.184, + "grad_norm": 0.7157353294285637, + "learning_rate": 2.2201899608668365e-06, + "loss": 0.3201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3292391002178192, + "step": 3240, + "valid_targets_mean": 1996.5, + "valid_targets_min": 289 + }, + { + "epoch": 5.192, + "grad_norm": 0.8075659035374775, + "learning_rate": 2.1777570310377084e-06, + "loss": 0.3604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.353488951921463, + "step": 3245, + "valid_targets_mean": 1800.3, + "valid_targets_min": 225 + }, + { + "epoch": 5.2, + "grad_norm": 0.8837596241034232, + "learning_rate": 2.1357101602925323e-06, + "loss": 0.3304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3177199363708496, + "step": 3250, + "valid_targets_mean": 1669.4, + "valid_targets_min": 271 + }, + { + "epoch": 5.208, + "grad_norm": 0.8592757152496271, + "learning_rate": 2.0940502594356427e-06, + "loss": 0.3424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34793126583099365, + "step": 3255, + "valid_targets_mean": 1629.4, + "valid_targets_min": 278 + }, + { + "epoch": 5.216, + "grad_norm": 0.6068332606006058, + "learning_rate": 2.052778230888994e-06, + "loss": 0.3176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27529823780059814, + "step": 3260, + "valid_targets_mean": 2799.7, + "valid_targets_min": 437 + }, + { + "epoch": 5.224, + "grad_norm": 0.6723133171431055, + "learning_rate": 2.0118949686725786e-06, + "loss": 0.325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2843213677406311, + "step": 3265, + "valid_targets_mean": 2273.4, + "valid_targets_min": 460 + }, + { + "epoch": 5.232, + "grad_norm": 0.5740122113088663, + "learning_rate": 1.971401358385072e-06, + "loss": 0.3163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3310263752937317, + "step": 3270, + "valid_targets_mean": 2962.2, + "valid_targets_min": 352 + }, + { + "epoch": 5.24, + "grad_norm": 0.6047834085752591, + "learning_rate": 1.9312982771846435e-06, + "loss": 0.3234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.292560875415802, + "step": 3275, + "valid_targets_mean": 2475.9, + "valid_targets_min": 443 + }, + { + "epoch": 5.248, + "grad_norm": 0.7123082387690532, + "learning_rate": 1.8915865937699652e-06, + "loss": 0.3396, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3306275010108948, + "step": 3280, + "valid_targets_mean": 2137.8, + "valid_targets_min": 368 + }, + { + "epoch": 5.256, + "grad_norm": 0.8544328983250468, + "learning_rate": 1.8522671683613946e-06, + "loss": 0.3384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.336727499961853, + "step": 3285, + "valid_targets_mean": 1707.2, + "valid_targets_min": 343 + }, + { + "epoch": 5.264, + "grad_norm": 0.7484704147015101, + "learning_rate": 1.8133408526823283e-06, + "loss": 0.3374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3111442029476166, + "step": 3290, + "valid_targets_mean": 2043.2, + "valid_targets_min": 283 + }, + { + "epoch": 5.272, + "grad_norm": 0.7559826239212142, + "learning_rate": 1.7748084899407558e-06, + "loss": 0.3275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3079603314399719, + "step": 3295, + "valid_targets_mean": 2453.8, + "valid_targets_min": 566 + }, + { + "epoch": 5.28, + "grad_norm": 0.9368604683494061, + "learning_rate": 1.7366709148110118e-06, + "loss": 0.3439, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3560614585876465, + "step": 3300, + "valid_targets_mean": 1434.8, + "valid_targets_min": 276 + }, + { + "epoch": 5.288, + "grad_norm": 0.692979552830229, + "learning_rate": 1.698928953415675e-06, + "loss": 0.3228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.292755126953125, + "step": 3305, + "valid_targets_mean": 2236.2, + "valid_targets_min": 391 + }, + { + "epoch": 5.296, + "grad_norm": 0.9674463953650514, + "learning_rate": 1.6615834233076756e-06, + "loss": 0.3281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.335943341255188, + "step": 3310, + "valid_targets_mean": 1242.4, + "valid_targets_min": 289 + }, + { + "epoch": 5.304, + "grad_norm": 0.8478280677057598, + "learning_rate": 1.6246351334525944e-06, + "loss": 0.3411, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3230572044849396, + "step": 3315, + "valid_targets_mean": 1569.6, + "valid_targets_min": 307 + }, + { + "epoch": 5.312, + "grad_norm": 0.8876053781501144, + "learning_rate": 1.5880848842111362e-06, + "loss": 0.3246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32399290800094604, + "step": 3320, + "valid_targets_mean": 1403.8, + "valid_targets_min": 261 + }, + { + "epoch": 5.32, + "grad_norm": 0.6632377237032445, + "learning_rate": 1.5519334673218023e-06, + "loss": 0.3429, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34573253989219666, + "step": 3325, + "valid_targets_mean": 2316.0, + "valid_targets_min": 452 + }, + { + "epoch": 5.328, + "grad_norm": 0.8801648422040877, + "learning_rate": 1.5161816658837002e-06, + "loss": 0.3289, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3267403244972229, + "step": 3330, + "valid_targets_mean": 1425.0, + "valid_targets_min": 351 + }, + { + "epoch": 5.336, + "grad_norm": 0.7714577400348394, + "learning_rate": 1.4808302543396423e-06, + "loss": 0.3307, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3234866261482239, + "step": 3335, + "valid_targets_mean": 1636.6, + "valid_targets_min": 263 + }, + { + "epoch": 5.344, + "grad_norm": 0.642035554386149, + "learning_rate": 1.445879998459314e-06, + "loss": 0.3205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3185698688030243, + "step": 3340, + "valid_targets_mean": 2389.4, + "valid_targets_min": 293 + }, + { + "epoch": 5.352, + "grad_norm": 0.7958677187406809, + "learning_rate": 1.4113316553227296e-06, + "loss": 0.3384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37837621569633484, + "step": 3345, + "valid_targets_mean": 2085.4, + "valid_targets_min": 376 + }, + { + "epoch": 5.36, + "grad_norm": 0.8254616003100667, + "learning_rate": 1.3771859733037896e-06, + "loss": 0.3316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34857773780822754, + "step": 3350, + "valid_targets_mean": 1714.2, + "valid_targets_min": 274 + }, + { + "epoch": 5.368, + "grad_norm": 0.6807508098199462, + "learning_rate": 1.3434436920541072e-06, + "loss": 0.3532, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3561273515224457, + "step": 3355, + "valid_targets_mean": 2241.6, + "valid_targets_min": 386 + }, + { + "epoch": 5.376, + "grad_norm": 0.7880738724594502, + "learning_rate": 1.3101055424869768e-06, + "loss": 0.3253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3386726677417755, + "step": 3360, + "valid_targets_mean": 1655.6, + "valid_targets_min": 341 + }, + { + "epoch": 5.384, + "grad_norm": 0.8044155112355199, + "learning_rate": 1.2771722467615266e-06, + "loss": 0.3361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34906601905822754, + "step": 3365, + "valid_targets_mean": 1888.2, + "valid_targets_min": 311 + }, + { + "epoch": 5.392, + "grad_norm": 0.946556302943149, + "learning_rate": 1.2446445182670818e-06, + "loss": 0.3231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33505770564079285, + "step": 3370, + "valid_targets_mean": 1463.1, + "valid_targets_min": 241 + }, + { + "epoch": 5.4, + "grad_norm": 0.6668652899131327, + "learning_rate": 1.21252306160772e-06, + "loss": 0.3273, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35054919123649597, + "step": 3375, + "valid_targets_mean": 2484.1, + "valid_targets_min": 567 + }, + { + "epoch": 5.408, + "grad_norm": 1.1006169555817005, + "learning_rate": 1.1808085725870088e-06, + "loss": 0.3374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3627769947052002, + "step": 3380, + "valid_targets_mean": 942.7, + "valid_targets_min": 244 + }, + { + "epoch": 5.416, + "grad_norm": 0.710177773783025, + "learning_rate": 1.1495017381929197e-06, + "loss": 0.3348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3235899806022644, + "step": 3385, + "valid_targets_mean": 2099.7, + "valid_targets_min": 389 + }, + { + "epoch": 5.424, + "grad_norm": 0.6360542443344822, + "learning_rate": 1.1186032365829514e-06, + "loss": 0.3141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2974139451980591, + "step": 3390, + "valid_targets_mean": 2257.2, + "valid_targets_min": 335 + }, + { + "epoch": 5.432, + "grad_norm": 0.7700543404194444, + "learning_rate": 1.088113737069456e-06, + "loss": 0.3457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3570263981819153, + "step": 3395, + "valid_targets_mean": 1822.6, + "valid_targets_min": 280 + }, + { + "epoch": 5.44, + "grad_norm": 0.9275216639277493, + "learning_rate": 1.0580339001051153e-06, + "loss": 0.3361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35213181376457214, + "step": 3400, + "valid_targets_mean": 1528.0, + "valid_targets_min": 358 + }, + { + "epoch": 5.448, + "grad_norm": 0.789841574621178, + "learning_rate": 1.0283643772686535e-06, + "loss": 0.3331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33587008714675903, + "step": 3405, + "valid_targets_mean": 1687.8, + "valid_targets_min": 279 + }, + { + "epoch": 5.456, + "grad_norm": 0.7625043418529229, + "learning_rate": 9.991058112507113e-07, + "loss": 0.3285, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3387317657470703, + "step": 3410, + "valid_targets_mean": 1853.2, + "valid_targets_min": 336 + }, + { + "epoch": 5.464, + "grad_norm": 0.7262108890341252, + "learning_rate": 9.702588358399345e-07, + "loss": 0.3383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35995280742645264, + "step": 3415, + "valid_targets_mean": 2125.7, + "valid_targets_min": 263 + }, + { + "epoch": 5.4719999999999995, + "grad_norm": 0.8471611948426642, + "learning_rate": 9.418240759092434e-07, + "loss": 0.3292, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3054879307746887, + "step": 3420, + "valid_targets_mean": 1520.5, + "valid_targets_min": 360 + }, + { + "epoch": 5.48, + "grad_norm": 0.6778318495071975, + "learning_rate": 9.138021474022763e-07, + "loss": 0.3233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2818812429904938, + "step": 3425, + "valid_targets_mean": 2094.9, + "valid_targets_min": 284 + }, + { + "epoch": 5.4879999999999995, + "grad_norm": 0.6708692528522718, + "learning_rate": 8.861936573200825e-07, + "loss": 0.3402, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32221177220344543, + "step": 3430, + "valid_targets_mean": 2342.2, + "valid_targets_min": 297 + }, + { + "epoch": 5.496, + "grad_norm": 0.6532748349469135, + "learning_rate": 8.58999203707942e-07, + "loss": 0.329, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30228191614151, + "step": 3435, + "valid_targets_mean": 2445.2, + "valid_targets_min": 404 + }, + { + "epoch": 5.504, + "grad_norm": 0.5726081781167237, + "learning_rate": 8.322193756424401e-07, + "loss": 0.322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2853536605834961, + "step": 3440, + "valid_targets_mean": 2608.8, + "valid_targets_min": 316 + }, + { + "epoch": 5.5120000000000005, + "grad_norm": 0.6241256031339275, + "learning_rate": 8.058547532186667e-07, + "loss": 0.3221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28844356536865234, + "step": 3445, + "valid_targets_mean": 2246.0, + "valid_targets_min": 344 + }, + { + "epoch": 5.52, + "grad_norm": 0.620098378031259, + "learning_rate": 7.799059075376991e-07, + "loss": 0.3226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32384443283081055, + "step": 3450, + "valid_targets_mean": 3012.6, + "valid_targets_min": 528 + }, + { + "epoch": 5.5280000000000005, + "grad_norm": 0.7945148445758893, + "learning_rate": 7.54373400694195e-07, + "loss": 0.3234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3543769121170044, + "step": 3455, + "valid_targets_mean": 2061.6, + "valid_targets_min": 207 + }, + { + "epoch": 5.536, + "grad_norm": 0.6549048898714214, + "learning_rate": 7.292577857642302e-07, + "loss": 0.3306, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3170838952064514, + "step": 3460, + "valid_targets_mean": 2555.1, + "valid_targets_min": 353 + }, + { + "epoch": 5.5440000000000005, + "grad_norm": 0.7943780052965277, + "learning_rate": 7.045596067933158e-07, + "loss": 0.3287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34069475531578064, + "step": 3465, + "valid_targets_mean": 1780.8, + "valid_targets_min": 485 + }, + { + "epoch": 5.552, + "grad_norm": 0.6612986111726853, + "learning_rate": 6.80279398784609e-07, + "loss": 0.3311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.348349004983902, + "step": 3470, + "valid_targets_mean": 2754.0, + "valid_targets_min": 306 + }, + { + "epoch": 5.5600000000000005, + "grad_norm": 0.7185356693695621, + "learning_rate": 6.56417687687343e-07, + "loss": 0.3301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33669647574424744, + "step": 3475, + "valid_targets_mean": 2008.5, + "valid_targets_min": 429 + }, + { + "epoch": 5.568, + "grad_norm": 0.6635770223121686, + "learning_rate": 6.329749903854066e-07, + "loss": 0.3111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30377066135406494, + "step": 3480, + "valid_targets_mean": 2455.4, + "valid_targets_min": 353 + }, + { + "epoch": 5.576, + "grad_norm": 0.731883116117695, + "learning_rate": 6.099518146861628e-07, + "loss": 0.3338, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3424713909626007, + "step": 3485, + "valid_targets_mean": 1975.4, + "valid_targets_min": 323 + }, + { + "epoch": 5.584, + "grad_norm": 0.6529003283713056, + "learning_rate": 5.873486593094546e-07, + "loss": 0.3387, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3306881785392761, + "step": 3490, + "valid_targets_mean": 2475.8, + "valid_targets_min": 416 + }, + { + "epoch": 5.592, + "grad_norm": 0.8033401606914047, + "learning_rate": 5.651660138767834e-07, + "loss": 0.3262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31199198961257935, + "step": 3495, + "valid_targets_mean": 1727.5, + "valid_targets_min": 280 + }, + { + "epoch": 5.6, + "grad_norm": 0.6888026517088008, + "learning_rate": 5.434043589007232e-07, + "loss": 0.3262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3344615697860718, + "step": 3500, + "valid_targets_mean": 2408.3, + "valid_targets_min": 318 + }, + { + "epoch": 5.608, + "grad_norm": 0.7167930643520513, + "learning_rate": 5.220641657744963e-07, + "loss": 0.3128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.289031445980072, + "step": 3505, + "valid_targets_mean": 2380.4, + "valid_targets_min": 343 + }, + { + "epoch": 5.616, + "grad_norm": 0.6650020725804082, + "learning_rate": 5.0114589676177e-07, + "loss": 0.3318, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3253558278083801, + "step": 3510, + "valid_targets_mean": 2200.4, + "valid_targets_min": 262 + }, + { + "epoch": 5.624, + "grad_norm": 0.8321317810618367, + "learning_rate": 4.806500049866492e-07, + "loss": 0.3355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3572784960269928, + "step": 3515, + "valid_targets_mean": 1833.8, + "valid_targets_min": 483 + }, + { + "epoch": 5.632, + "grad_norm": 0.7107632033001783, + "learning_rate": 4.6057693442383756e-07, + "loss": 0.3165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2704983651638031, + "step": 3520, + "valid_targets_mean": 2014.1, + "valid_targets_min": 342 + }, + { + "epoch": 5.64, + "grad_norm": 0.6520521303095141, + "learning_rate": 4.409271198890519e-07, + "loss": 0.324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31402522325515747, + "step": 3525, + "valid_targets_mean": 2245.9, + "valid_targets_min": 377 + }, + { + "epoch": 5.648, + "grad_norm": 0.738454412849037, + "learning_rate": 4.217009870295763e-07, + "loss": 0.3437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33777379989624023, + "step": 3530, + "valid_targets_mean": 1982.0, + "valid_targets_min": 373 + }, + { + "epoch": 5.656, + "grad_norm": 0.7089086442983696, + "learning_rate": 4.028989523150628e-07, + "loss": 0.3166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3117513656616211, + "step": 3535, + "valid_targets_mean": 2105.9, + "valid_targets_min": 254 + }, + { + "epoch": 5.664, + "grad_norm": 0.5722406607187598, + "learning_rate": 3.8452142302849216e-07, + "loss": 0.3266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27710044384002686, + "step": 3540, + "valid_targets_mean": 2860.7, + "valid_targets_min": 263 + }, + { + "epoch": 5.672, + "grad_norm": 0.7556472335095206, + "learning_rate": 3.665687972573606e-07, + "loss": 0.3379, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30273568630218506, + "step": 3545, + "valid_targets_mean": 1655.9, + "valid_targets_min": 259 + }, + { + "epoch": 5.68, + "grad_norm": 0.6632535758996941, + "learning_rate": 3.4904146388506475e-07, + "loss": 0.3226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28801876306533813, + "step": 3550, + "valid_targets_mean": 2170.7, + "valid_targets_min": 326 + }, + { + "epoch": 5.688, + "grad_norm": 0.8987750692265181, + "learning_rate": 3.319398025824572e-07, + "loss": 0.326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30036014318466187, + "step": 3555, + "valid_targets_mean": 1281.9, + "valid_targets_min": 271 + }, + { + "epoch": 5.696, + "grad_norm": 0.7550371265505398, + "learning_rate": 3.152641837996373e-07, + "loss": 0.3314, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3175736963748932, + "step": 3560, + "valid_targets_mean": 2092.7, + "valid_targets_min": 320 + }, + { + "epoch": 5.704, + "grad_norm": 0.7741459072666987, + "learning_rate": 2.990149687579247e-07, + "loss": 0.335, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3476804792881012, + "step": 3565, + "valid_targets_mean": 1795.7, + "valid_targets_min": 271 + }, + { + "epoch": 5.712, + "grad_norm": 0.7602774287821416, + "learning_rate": 2.8319250944203625e-07, + "loss": 0.3357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34763944149017334, + "step": 3570, + "valid_targets_mean": 1917.4, + "valid_targets_min": 299 + }, + { + "epoch": 5.72, + "grad_norm": 0.7742965493927609, + "learning_rate": 2.677971485924502e-07, + "loss": 0.3269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30256009101867676, + "step": 3575, + "valid_targets_mean": 1715.3, + "valid_targets_min": 288 + }, + { + "epoch": 5.728, + "grad_norm": 0.7300230601121387, + "learning_rate": 2.52829219697992e-07, + "loss": 0.3039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30355215072631836, + "step": 3580, + "valid_targets_mean": 1757.9, + "valid_targets_min": 267 + }, + { + "epoch": 5.736, + "grad_norm": 0.7762399783738475, + "learning_rate": 2.3828904698861565e-07, + "loss": 0.3188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3432141840457916, + "step": 3585, + "valid_targets_mean": 1766.1, + "valid_targets_min": 351 + }, + { + "epoch": 5.744, + "grad_norm": 0.6808736664760406, + "learning_rate": 2.2417694542836489e-07, + "loss": 0.3468, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3141390383243561, + "step": 3590, + "valid_targets_mean": 2099.9, + "valid_targets_min": 278 + }, + { + "epoch": 5.752, + "grad_norm": 0.8148780711629424, + "learning_rate": 2.104932207085586e-07, + "loss": 0.3187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3025011420249939, + "step": 3595, + "valid_targets_mean": 2257.2, + "valid_targets_min": 364 + }, + { + "epoch": 5.76, + "grad_norm": 0.6423137397790368, + "learning_rate": 1.97238169241174e-07, + "loss": 0.3364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3432384133338928, + "step": 3600, + "valid_targets_mean": 2428.4, + "valid_targets_min": 252 + }, + { + "epoch": 5.768, + "grad_norm": 0.7426654171361444, + "learning_rate": 1.8441207815241613e-07, + "loss": 0.3186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3106253147125244, + "step": 3605, + "valid_targets_mean": 2039.9, + "valid_targets_min": 271 + }, + { + "epoch": 5.776, + "grad_norm": 0.6652373779338768, + "learning_rate": 1.720152252765095e-07, + "loss": 0.3252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32659274339675903, + "step": 3610, + "valid_targets_mean": 2137.1, + "valid_targets_min": 409 + }, + { + "epoch": 5.784, + "grad_norm": 0.6701232063134795, + "learning_rate": 1.600478791496629e-07, + "loss": 0.3178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29657870531082153, + "step": 3615, + "valid_targets_mean": 2267.0, + "valid_targets_min": 441 + }, + { + "epoch": 5.792, + "grad_norm": 0.7303502269080819, + "learning_rate": 1.4851029900427415e-07, + "loss": 0.3064, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3133362829685211, + "step": 3620, + "valid_targets_mean": 1803.5, + "valid_targets_min": 238 + }, + { + "epoch": 5.8, + "grad_norm": 0.6533291656070508, + "learning_rate": 1.3740273476329224e-07, + "loss": 0.3158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30309417843818665, + "step": 3625, + "valid_targets_mean": 2136.8, + "valid_targets_min": 259 + }, + { + "epoch": 5.808, + "grad_norm": 0.7399739863973842, + "learning_rate": 1.2672542703482616e-07, + "loss": 0.3332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30318236351013184, + "step": 3630, + "valid_targets_mean": 2139.0, + "valid_targets_min": 494 + }, + { + "epoch": 5.816, + "grad_norm": 0.778385718165956, + "learning_rate": 1.164786071069135e-07, + "loss": 0.3257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2864413559436798, + "step": 3635, + "valid_targets_mean": 1539.3, + "valid_targets_min": 294 + }, + { + "epoch": 5.824, + "grad_norm": 0.8987433166924019, + "learning_rate": 1.0666249694251785e-07, + "loss": 0.3336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.308305025100708, + "step": 3640, + "valid_targets_mean": 1533.8, + "valid_targets_min": 374 + }, + { + "epoch": 5.832, + "grad_norm": 0.6927665376123957, + "learning_rate": 9.72773091747281e-08, + "loss": 0.3174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3018300533294678, + "step": 3645, + "valid_targets_mean": 1903.2, + "valid_targets_min": 265 + }, + { + "epoch": 5.84, + "grad_norm": 0.8298783342243384, + "learning_rate": 8.832324710214002e-08, + "loss": 0.3382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35006678104400635, + "step": 3650, + "valid_targets_mean": 1798.3, + "valid_targets_min": 334 + }, + { + "epoch": 5.848, + "grad_norm": 0.7622869191108226, + "learning_rate": 7.980050468445744e-08, + "loss": 0.336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3575170338153839, + "step": 3655, + "valid_targets_mean": 1869.7, + "valid_targets_min": 369 + }, + { + "epoch": 5.856, + "grad_norm": 0.8246638087544609, + "learning_rate": 7.170926653829347e-08, + "loss": 0.3574, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3515303134918213, + "step": 3660, + "valid_targets_mean": 1992.5, + "valid_targets_min": 425 + }, + { + "epoch": 5.864, + "grad_norm": 0.6754002624217912, + "learning_rate": 6.404970793317145e-08, + "loss": 0.3261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.330025851726532, + "step": 3665, + "valid_targets_mean": 2442.9, + "valid_targets_min": 433 + }, + { + "epoch": 5.872, + "grad_norm": 0.6473983262842183, + "learning_rate": 5.682199478772133e-08, + "loss": 0.3331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3028303384780884, + "step": 3670, + "valid_targets_mean": 2476.4, + "valid_targets_min": 383 + }, + { + "epoch": 5.88, + "grad_norm": 0.7639363603266618, + "learning_rate": 5.0026283666093635e-08, + "loss": 0.3309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33052030205726624, + "step": 3675, + "valid_targets_mean": 2025.9, + "valid_targets_min": 372 + }, + { + "epoch": 5.888, + "grad_norm": 0.6294062682519683, + "learning_rate": 4.366272177456665e-08, + "loss": 0.332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33227160573005676, + "step": 3680, + "valid_targets_mean": 2534.4, + "valid_targets_min": 334 + }, + { + "epoch": 5.896, + "grad_norm": 0.5896518196906118, + "learning_rate": 3.773144695834674e-08, + "loss": 0.3028, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27254822850227356, + "step": 3685, + "valid_targets_mean": 2382.2, + "valid_targets_min": 332 + }, + { + "epoch": 5.904, + "grad_norm": 0.7444979320101701, + "learning_rate": 3.223258769860405e-08, + "loss": 0.3259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30232083797454834, + "step": 3690, + "valid_targets_mean": 2272.6, + "valid_targets_min": 415 + }, + { + "epoch": 5.912, + "grad_norm": 0.649888311599965, + "learning_rate": 2.716626310966808e-08, + "loss": 0.3296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3510351777076721, + "step": 3695, + "valid_targets_mean": 2522.5, + "valid_targets_min": 390 + }, + { + "epoch": 5.92, + "grad_norm": 0.6752144213255896, + "learning_rate": 2.253258293645866e-08, + "loss": 0.3219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3135150671005249, + "step": 3700, + "valid_targets_mean": 2086.2, + "valid_targets_min": 274 + }, + { + "epoch": 5.928, + "grad_norm": 0.6630077719636371, + "learning_rate": 1.8331647552110033e-08, + "loss": 0.3286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.313254177570343, + "step": 3705, + "valid_targets_mean": 2617.1, + "valid_targets_min": 282 + }, + { + "epoch": 5.936, + "grad_norm": 0.7463183447088082, + "learning_rate": 1.456354795578374e-08, + "loss": 0.3228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3249838352203369, + "step": 3710, + "valid_targets_mean": 1802.7, + "valid_targets_min": 295 + }, + { + "epoch": 5.944, + "grad_norm": 0.7711320827925983, + "learning_rate": 1.1228365770714622e-08, + "loss": 0.346, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35792410373687744, + "step": 3715, + "valid_targets_mean": 1879.2, + "valid_targets_min": 422 + }, + { + "epoch": 5.952, + "grad_norm": 0.6984324142077142, + "learning_rate": 8.326173242432233e-09, + "loss": 0.337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3188132643699646, + "step": 3720, + "valid_targets_mean": 2298.1, + "valid_targets_min": 435 + }, + { + "epoch": 5.96, + "grad_norm": 0.7136132209222245, + "learning_rate": 5.857033237199883e-09, + "loss": 0.3268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3068236708641052, + "step": 3725, + "valid_targets_mean": 2063.9, + "valid_targets_min": 441 + }, + { + "epoch": 5.968, + "grad_norm": 0.70950185567798, + "learning_rate": 3.820999240644608e-09, + "loss": 0.326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3352380692958832, + "step": 3730, + "valid_targets_mean": 2178.1, + "valid_targets_min": 292 + }, + { + "epoch": 5.976, + "grad_norm": 0.6878972735540901, + "learning_rate": 2.2181153566158687e-09, + "loss": 0.3276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30763864517211914, + "step": 3735, + "valid_targets_mean": 2563.0, + "valid_targets_min": 262 + }, + { + "epoch": 5.984, + "grad_norm": 0.7742897122777167, + "learning_rate": 1.0484163062107755e-09, + "loss": 0.3181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32049447298049927, + "step": 3740, + "valid_targets_mean": 2073.6, + "valid_targets_min": 330 + }, + { + "epoch": 5.992, + "grad_norm": 0.6597358357567812, + "learning_rate": 3.11927427034675e-10, + "loss": 0.3225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3025015592575073, + "step": 3745, + "valid_targets_mean": 2095.5, + "valid_targets_min": 282 + }, + { + "epoch": 6.0, + "grad_norm": 0.7059371405305053, + "learning_rate": 8.664672648261985e-12, + "loss": 0.3141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28063035011291504, + "step": 3750, + "valid_targets_mean": 1914.4, + "valid_targets_min": 304 + }, + { + "epoch": 6.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28063035011291504, + "step": 3750, + "total_flos": 598298883194880.0, + "train_loss": 0.39238279145558674, + "train_runtime": 16274.8111, + "train_samples_per_second": 3.685, + "train_steps_per_second": 0.23, + "valid_targets_mean": 1914.4, + "valid_targets_min": 304 + } + ], + "logging_steps": 5, + "max_steps": 3750, + "num_input_tokens_seen": 0, + "num_train_epochs": 6, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 598298883194880.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}