| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 6.0, | |
| "eval_steps": 500, | |
| "global_step": 3750, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.008, | |
| "grad_norm": 8.70977307694482, | |
| "learning_rate": 4.266666666666667e-07, | |
| "loss": 0.8749, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.8716846108436584, | |
| "step": 5, | |
| "valid_targets_mean": 2538.1, | |
| "valid_targets_min": 314 | |
| }, | |
| { | |
| "epoch": 0.016, | |
| "grad_norm": 10.29090145396382, | |
| "learning_rate": 9.600000000000001e-07, | |
| "loss": 0.8863, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.9336282014846802, | |
| "step": 10, | |
| "valid_targets_mean": 1696.2, | |
| "valid_targets_min": 296 | |
| }, | |
| { | |
| "epoch": 0.024, | |
| "grad_norm": 7.5750029640791725, | |
| "learning_rate": 1.4933333333333336e-06, | |
| "loss": 0.8563, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.8610094785690308, | |
| "step": 15, | |
| "valid_targets_mean": 2235.9, | |
| "valid_targets_min": 420 | |
| }, | |
| { | |
| "epoch": 0.032, | |
| "grad_norm": 6.433424219734035, | |
| "learning_rate": 2.0266666666666666e-06, | |
| "loss": 0.7908, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.8827441930770874, | |
| "step": 20, | |
| "valid_targets_mean": 2079.4, | |
| "valid_targets_min": 286 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 5.074760879549001, | |
| "learning_rate": 2.56e-06, | |
| "loss": 0.8504, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.819884181022644, | |
| "step": 25, | |
| "valid_targets_mean": 1559.2, | |
| "valid_targets_min": 286 | |
| }, | |
| { | |
| "epoch": 0.048, | |
| "grad_norm": 2.712799636966812, | |
| "learning_rate": 3.093333333333334e-06, | |
| "loss": 0.7851, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6933596134185791, | |
| "step": 30, | |
| "valid_targets_mean": 2028.6, | |
| "valid_targets_min": 463 | |
| }, | |
| { | |
| "epoch": 0.056, | |
| "grad_norm": 2.0373406539066146, | |
| "learning_rate": 3.6266666666666674e-06, | |
| "loss": 0.7479, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.71419358253479, | |
| "step": 35, | |
| "valid_targets_mean": 2210.4, | |
| "valid_targets_min": 617 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 1.4068779549549064, | |
| "learning_rate": 4.16e-06, | |
| "loss": 0.745, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6900101900100708, | |
| "step": 40, | |
| "valid_targets_mean": 1853.0, | |
| "valid_targets_min": 412 | |
| }, | |
| { | |
| "epoch": 0.072, | |
| "grad_norm": 1.1097745676204722, | |
| "learning_rate": 4.693333333333334e-06, | |
| "loss": 0.6973, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6602718830108643, | |
| "step": 45, | |
| "valid_targets_mean": 2335.8, | |
| "valid_targets_min": 345 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 1.470642358348425, | |
| "learning_rate": 5.226666666666667e-06, | |
| "loss": 0.6999, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.8183687925338745, | |
| "step": 50, | |
| "valid_targets_mean": 1520.8, | |
| "valid_targets_min": 285 | |
| }, | |
| { | |
| "epoch": 0.088, | |
| "grad_norm": 1.0526714033415538, | |
| "learning_rate": 5.76e-06, | |
| "loss": 0.6424, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5843245983123779, | |
| "step": 55, | |
| "valid_targets_mean": 1541.6, | |
| "valid_targets_min": 238 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "grad_norm": 0.9623188767849996, | |
| "learning_rate": 6.293333333333334e-06, | |
| "loss": 0.6522, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.660775363445282, | |
| "step": 60, | |
| "valid_targets_mean": 1922.9, | |
| "valid_targets_min": 276 | |
| }, | |
| { | |
| "epoch": 0.104, | |
| "grad_norm": 0.8623555129729585, | |
| "learning_rate": 6.826666666666667e-06, | |
| "loss": 0.642, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6567976474761963, | |
| "step": 65, | |
| "valid_targets_mean": 1989.9, | |
| "valid_targets_min": 280 | |
| }, | |
| { | |
| "epoch": 0.112, | |
| "grad_norm": 0.7431652162692693, | |
| "learning_rate": 7.360000000000001e-06, | |
| "loss": 0.6009, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6057322025299072, | |
| "step": 70, | |
| "valid_targets_mean": 2507.4, | |
| "valid_targets_min": 358 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 0.8529808791667168, | |
| "learning_rate": 7.893333333333335e-06, | |
| "loss": 0.648, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6728823781013489, | |
| "step": 75, | |
| "valid_targets_mean": 1934.2, | |
| "valid_targets_min": 369 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 0.8778812689341119, | |
| "learning_rate": 8.426666666666667e-06, | |
| "loss": 0.597, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6438958048820496, | |
| "step": 80, | |
| "valid_targets_mean": 1775.9, | |
| "valid_targets_min": 312 | |
| }, | |
| { | |
| "epoch": 0.136, | |
| "grad_norm": 0.77264016634592, | |
| "learning_rate": 8.96e-06, | |
| "loss": 0.5859, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.626758873462677, | |
| "step": 85, | |
| "valid_targets_mean": 2020.9, | |
| "valid_targets_min": 281 | |
| }, | |
| { | |
| "epoch": 0.144, | |
| "grad_norm": 0.7706519383923865, | |
| "learning_rate": 9.493333333333334e-06, | |
| "loss": 0.5884, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5779508948326111, | |
| "step": 90, | |
| "valid_targets_mean": 1888.1, | |
| "valid_targets_min": 263 | |
| }, | |
| { | |
| "epoch": 0.152, | |
| "grad_norm": 0.8275613861014995, | |
| "learning_rate": 1.0026666666666667e-05, | |
| "loss": 0.5942, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5887092351913452, | |
| "step": 95, | |
| "valid_targets_mean": 1662.2, | |
| "valid_targets_min": 390 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 0.6530661253948835, | |
| "learning_rate": 1.056e-05, | |
| "loss": 0.5709, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5566831827163696, | |
| "step": 100, | |
| "valid_targets_mean": 2530.6, | |
| "valid_targets_min": 752 | |
| }, | |
| { | |
| "epoch": 0.168, | |
| "grad_norm": 0.6342642892399393, | |
| "learning_rate": 1.1093333333333334e-05, | |
| "loss": 0.5734, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5590141415596008, | |
| "step": 105, | |
| "valid_targets_mean": 3199.7, | |
| "valid_targets_min": 1446 | |
| }, | |
| { | |
| "epoch": 0.176, | |
| "grad_norm": 0.7503371762675456, | |
| "learning_rate": 1.1626666666666668e-05, | |
| "loss": 0.5718, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5699026584625244, | |
| "step": 110, | |
| "valid_targets_mean": 2242.4, | |
| "valid_targets_min": 298 | |
| }, | |
| { | |
| "epoch": 0.184, | |
| "grad_norm": 0.8004502688005221, | |
| "learning_rate": 1.216e-05, | |
| "loss": 0.5585, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.48529720306396484, | |
| "step": 115, | |
| "valid_targets_mean": 1892.6, | |
| "valid_targets_min": 395 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 0.8162220821968871, | |
| "learning_rate": 1.2693333333333336e-05, | |
| "loss": 0.5403, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.584542453289032, | |
| "step": 120, | |
| "valid_targets_mean": 2136.8, | |
| "valid_targets_min": 271 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 0.8057151061738206, | |
| "learning_rate": 1.3226666666666668e-05, | |
| "loss": 0.5595, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5924099683761597, | |
| "step": 125, | |
| "valid_targets_mean": 1891.8, | |
| "valid_targets_min": 271 | |
| }, | |
| { | |
| "epoch": 0.208, | |
| "grad_norm": 0.6095564722536361, | |
| "learning_rate": 1.376e-05, | |
| "loss": 0.5393, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4800427556037903, | |
| "step": 130, | |
| "valid_targets_mean": 2730.5, | |
| "valid_targets_min": 434 | |
| }, | |
| { | |
| "epoch": 0.216, | |
| "grad_norm": 0.8246272693017236, | |
| "learning_rate": 1.4293333333333334e-05, | |
| "loss": 0.558, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6110150814056396, | |
| "step": 135, | |
| "valid_targets_mean": 1948.5, | |
| "valid_targets_min": 330 | |
| }, | |
| { | |
| "epoch": 0.224, | |
| "grad_norm": 1.0235585826027744, | |
| "learning_rate": 1.4826666666666666e-05, | |
| "loss": 0.5627, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6028550863265991, | |
| "step": 140, | |
| "valid_targets_mean": 1263.0, | |
| "valid_targets_min": 255 | |
| }, | |
| { | |
| "epoch": 0.232, | |
| "grad_norm": 0.7397315844998203, | |
| "learning_rate": 1.5360000000000002e-05, | |
| "loss": 0.5543, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5466543436050415, | |
| "step": 145, | |
| "valid_targets_mean": 2054.7, | |
| "valid_targets_min": 541 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 0.6654060201581206, | |
| "learning_rate": 1.5893333333333333e-05, | |
| "loss": 0.5127, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5320032835006714, | |
| "step": 150, | |
| "valid_targets_mean": 2859.1, | |
| "valid_targets_min": 356 | |
| }, | |
| { | |
| "epoch": 0.248, | |
| "grad_norm": 0.8143742586812208, | |
| "learning_rate": 1.642666666666667e-05, | |
| "loss": 0.525, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5205363035202026, | |
| "step": 155, | |
| "valid_targets_mean": 1714.2, | |
| "valid_targets_min": 288 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "grad_norm": 0.7375443100560095, | |
| "learning_rate": 1.696e-05, | |
| "loss": 0.4991, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.48814937472343445, | |
| "step": 160, | |
| "valid_targets_mean": 2070.8, | |
| "valid_targets_min": 203 | |
| }, | |
| { | |
| "epoch": 0.264, | |
| "grad_norm": 0.8228320965756343, | |
| "learning_rate": 1.7493333333333334e-05, | |
| "loss": 0.5455, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5349533557891846, | |
| "step": 165, | |
| "valid_targets_mean": 1732.5, | |
| "valid_targets_min": 348 | |
| }, | |
| { | |
| "epoch": 0.272, | |
| "grad_norm": 0.7833507734682047, | |
| "learning_rate": 1.8026666666666668e-05, | |
| "loss": 0.5186, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5089653730392456, | |
| "step": 170, | |
| "valid_targets_mean": 1734.4, | |
| "valid_targets_min": 462 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 0.7264195058692751, | |
| "learning_rate": 1.8560000000000002e-05, | |
| "loss": 0.5339, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5141239762306213, | |
| "step": 175, | |
| "valid_targets_mean": 2259.6, | |
| "valid_targets_min": 269 | |
| }, | |
| { | |
| "epoch": 0.288, | |
| "grad_norm": 0.8104091421121233, | |
| "learning_rate": 1.9093333333333336e-05, | |
| "loss": 0.5157, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5261850357055664, | |
| "step": 180, | |
| "valid_targets_mean": 1862.8, | |
| "valid_targets_min": 327 | |
| }, | |
| { | |
| "epoch": 0.296, | |
| "grad_norm": 0.6983796654352136, | |
| "learning_rate": 1.9626666666666666e-05, | |
| "loss": 0.493, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5028804540634155, | |
| "step": 185, | |
| "valid_targets_mean": 2254.9, | |
| "valid_targets_min": 417 | |
| }, | |
| { | |
| "epoch": 0.304, | |
| "grad_norm": 0.6419801158526326, | |
| "learning_rate": 2.016e-05, | |
| "loss": 0.5295, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4556776285171509, | |
| "step": 190, | |
| "valid_targets_mean": 2651.4, | |
| "valid_targets_min": 904 | |
| }, | |
| { | |
| "epoch": 0.312, | |
| "grad_norm": 0.7843457544488607, | |
| "learning_rate": 2.0693333333333334e-05, | |
| "loss": 0.4888, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5008721947669983, | |
| "step": 195, | |
| "valid_targets_mean": 1968.2, | |
| "valid_targets_min": 310 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.7812164368583617, | |
| "learning_rate": 2.1226666666666668e-05, | |
| "loss": 0.5145, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5007650852203369, | |
| "step": 200, | |
| "valid_targets_mean": 1982.9, | |
| "valid_targets_min": 309 | |
| }, | |
| { | |
| "epoch": 0.328, | |
| "grad_norm": 0.7936202291339606, | |
| "learning_rate": 2.1760000000000002e-05, | |
| "loss": 0.513, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5165877938270569, | |
| "step": 205, | |
| "valid_targets_mean": 1944.8, | |
| "valid_targets_min": 411 | |
| }, | |
| { | |
| "epoch": 0.336, | |
| "grad_norm": 0.7946620651898735, | |
| "learning_rate": 2.2293333333333336e-05, | |
| "loss": 0.4933, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5367198586463928, | |
| "step": 210, | |
| "valid_targets_mean": 1912.6, | |
| "valid_targets_min": 303 | |
| }, | |
| { | |
| "epoch": 0.344, | |
| "grad_norm": 0.9145904545210979, | |
| "learning_rate": 2.282666666666667e-05, | |
| "loss": 0.5184, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5727460384368896, | |
| "step": 215, | |
| "valid_targets_mean": 1951.7, | |
| "valid_targets_min": 308 | |
| }, | |
| { | |
| "epoch": 0.352, | |
| "grad_norm": 1.003545549198533, | |
| "learning_rate": 2.336e-05, | |
| "loss": 0.5346, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.550015389919281, | |
| "step": 220, | |
| "valid_targets_mean": 1315.6, | |
| "valid_targets_min": 237 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 0.7238181904792715, | |
| "learning_rate": 2.3893333333333337e-05, | |
| "loss": 0.4984, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4611344337463379, | |
| "step": 225, | |
| "valid_targets_mean": 2388.9, | |
| "valid_targets_min": 282 | |
| }, | |
| { | |
| "epoch": 0.368, | |
| "grad_norm": 0.9173238345035478, | |
| "learning_rate": 2.442666666666667e-05, | |
| "loss": 0.5142, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5616511702537537, | |
| "step": 230, | |
| "valid_targets_mean": 1766.1, | |
| "valid_targets_min": 278 | |
| }, | |
| { | |
| "epoch": 0.376, | |
| "grad_norm": 0.6478598502052655, | |
| "learning_rate": 2.496e-05, | |
| "loss": 0.4951, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4578341841697693, | |
| "step": 235, | |
| "valid_targets_mean": 2682.2, | |
| "valid_targets_min": 432 | |
| }, | |
| { | |
| "epoch": 0.384, | |
| "grad_norm": 0.6938568409381498, | |
| "learning_rate": 2.5493333333333335e-05, | |
| "loss": 0.507, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.49386435747146606, | |
| "step": 240, | |
| "valid_targets_mean": 2382.9, | |
| "valid_targets_min": 500 | |
| }, | |
| { | |
| "epoch": 0.392, | |
| "grad_norm": 0.7557868859907161, | |
| "learning_rate": 2.6026666666666666e-05, | |
| "loss": 0.5027, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5205134749412537, | |
| "step": 245, | |
| "valid_targets_mean": 2112.2, | |
| "valid_targets_min": 494 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.8093682610285722, | |
| "learning_rate": 2.6560000000000003e-05, | |
| "loss": 0.5073, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5215206742286682, | |
| "step": 250, | |
| "valid_targets_mean": 2039.8, | |
| "valid_targets_min": 324 | |
| }, | |
| { | |
| "epoch": 0.408, | |
| "grad_norm": 0.7340752683942827, | |
| "learning_rate": 2.7093333333333337e-05, | |
| "loss": 0.5003, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.47431081533432007, | |
| "step": 255, | |
| "valid_targets_mean": 2196.7, | |
| "valid_targets_min": 318 | |
| }, | |
| { | |
| "epoch": 0.416, | |
| "grad_norm": 0.6939008102591058, | |
| "learning_rate": 2.7626666666666668e-05, | |
| "loss": 0.5009, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4682499170303345, | |
| "step": 260, | |
| "valid_targets_mean": 2425.0, | |
| "valid_targets_min": 390 | |
| }, | |
| { | |
| "epoch": 0.424, | |
| "grad_norm": 0.6863977549436804, | |
| "learning_rate": 2.816e-05, | |
| "loss": 0.4787, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.47333863377571106, | |
| "step": 265, | |
| "valid_targets_mean": 2685.8, | |
| "valid_targets_min": 312 | |
| }, | |
| { | |
| "epoch": 0.432, | |
| "grad_norm": 0.8812971208838777, | |
| "learning_rate": 2.869333333333334e-05, | |
| "loss": 0.4723, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.49015623331069946, | |
| "step": 270, | |
| "valid_targets_mean": 1530.0, | |
| "valid_targets_min": 313 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 0.9077218939101549, | |
| "learning_rate": 2.922666666666667e-05, | |
| "loss": 0.4884, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4787595868110657, | |
| "step": 275, | |
| "valid_targets_mean": 1883.8, | |
| "valid_targets_min": 297 | |
| }, | |
| { | |
| "epoch": 0.448, | |
| "grad_norm": 0.7582934364503591, | |
| "learning_rate": 2.9760000000000003e-05, | |
| "loss": 0.4814, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4282434582710266, | |
| "step": 280, | |
| "valid_targets_mean": 2080.2, | |
| "valid_targets_min": 324 | |
| }, | |
| { | |
| "epoch": 0.456, | |
| "grad_norm": 0.7647736018820112, | |
| "learning_rate": 3.0293333333333334e-05, | |
| "loss": 0.4783, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.49454984068870544, | |
| "step": 285, | |
| "valid_targets_mean": 2026.6, | |
| "valid_targets_min": 299 | |
| }, | |
| { | |
| "epoch": 0.464, | |
| "grad_norm": 0.6696963147508204, | |
| "learning_rate": 3.0826666666666674e-05, | |
| "loss": 0.4755, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4329226613044739, | |
| "step": 290, | |
| "valid_targets_mean": 2336.9, | |
| "valid_targets_min": 377 | |
| }, | |
| { | |
| "epoch": 0.472, | |
| "grad_norm": 0.6655141023286056, | |
| "learning_rate": 3.1360000000000005e-05, | |
| "loss": 0.4556, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.43723297119140625, | |
| "step": 295, | |
| "valid_targets_mean": 2561.8, | |
| "valid_targets_min": 294 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 0.8005479126868438, | |
| "learning_rate": 3.1893333333333335e-05, | |
| "loss": 0.4587, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4513320028781891, | |
| "step": 300, | |
| "valid_targets_mean": 1946.9, | |
| "valid_targets_min": 304 | |
| }, | |
| { | |
| "epoch": 0.488, | |
| "grad_norm": 0.9080116879046022, | |
| "learning_rate": 3.2426666666666666e-05, | |
| "loss": 0.4923, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.47190746665000916, | |
| "step": 305, | |
| "valid_targets_mean": 2113.0, | |
| "valid_targets_min": 257 | |
| }, | |
| { | |
| "epoch": 0.496, | |
| "grad_norm": 0.6765207104829097, | |
| "learning_rate": 3.296e-05, | |
| "loss": 0.4641, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4598078727722168, | |
| "step": 310, | |
| "valid_targets_mean": 2466.9, | |
| "valid_targets_min": 380 | |
| }, | |
| { | |
| "epoch": 0.504, | |
| "grad_norm": 0.6903161847908216, | |
| "learning_rate": 3.349333333333334e-05, | |
| "loss": 0.5024, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.46894365549087524, | |
| "step": 315, | |
| "valid_targets_mean": 2686.0, | |
| "valid_targets_min": 354 | |
| }, | |
| { | |
| "epoch": 0.512, | |
| "grad_norm": 0.7016409822842518, | |
| "learning_rate": 3.402666666666667e-05, | |
| "loss": 0.478, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.47489529848098755, | |
| "step": 320, | |
| "valid_targets_mean": 2268.2, | |
| "valid_targets_min": 241 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 0.7043729607954898, | |
| "learning_rate": 3.456e-05, | |
| "loss": 0.4812, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4892638921737671, | |
| "step": 325, | |
| "valid_targets_mean": 2165.1, | |
| "valid_targets_min": 300 | |
| }, | |
| { | |
| "epoch": 0.528, | |
| "grad_norm": 0.6909733881278005, | |
| "learning_rate": 3.509333333333333e-05, | |
| "loss": 0.4896, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4769917130470276, | |
| "step": 330, | |
| "valid_targets_mean": 2164.1, | |
| "valid_targets_min": 318 | |
| }, | |
| { | |
| "epoch": 0.536, | |
| "grad_norm": 0.6824644579299378, | |
| "learning_rate": 3.562666666666667e-05, | |
| "loss": 0.4803, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4844236969947815, | |
| "step": 335, | |
| "valid_targets_mean": 2134.9, | |
| "valid_targets_min": 323 | |
| }, | |
| { | |
| "epoch": 0.544, | |
| "grad_norm": 0.6650792790166427, | |
| "learning_rate": 3.6160000000000006e-05, | |
| "loss": 0.4832, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4570949971675873, | |
| "step": 340, | |
| "valid_targets_mean": 2403.1, | |
| "valid_targets_min": 235 | |
| }, | |
| { | |
| "epoch": 0.552, | |
| "grad_norm": 0.7510187405765582, | |
| "learning_rate": 3.669333333333334e-05, | |
| "loss": 0.4781, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.47128066420555115, | |
| "step": 345, | |
| "valid_targets_mean": 1849.6, | |
| "valid_targets_min": 364 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 0.7583981476824141, | |
| "learning_rate": 3.722666666666667e-05, | |
| "loss": 0.4677, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4223896861076355, | |
| "step": 350, | |
| "valid_targets_mean": 1775.6, | |
| "valid_targets_min": 296 | |
| }, | |
| { | |
| "epoch": 0.568, | |
| "grad_norm": 0.786928642756561, | |
| "learning_rate": 3.7760000000000004e-05, | |
| "loss": 0.5033, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5244324207305908, | |
| "step": 355, | |
| "valid_targets_mean": 1991.3, | |
| "valid_targets_min": 321 | |
| }, | |
| { | |
| "epoch": 0.576, | |
| "grad_norm": 0.7159975876945588, | |
| "learning_rate": 3.8293333333333335e-05, | |
| "loss": 0.456, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.47184303402900696, | |
| "step": 360, | |
| "valid_targets_mean": 2140.4, | |
| "valid_targets_min": 363 | |
| }, | |
| { | |
| "epoch": 0.584, | |
| "grad_norm": 0.730639760598974, | |
| "learning_rate": 3.882666666666667e-05, | |
| "loss": 0.4611, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4932243525981903, | |
| "step": 365, | |
| "valid_targets_mean": 2141.9, | |
| "valid_targets_min": 386 | |
| }, | |
| { | |
| "epoch": 0.592, | |
| "grad_norm": 0.8175085697300658, | |
| "learning_rate": 3.936e-05, | |
| "loss": 0.4931, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.49952882528305054, | |
| "step": 370, | |
| "valid_targets_mean": 1697.2, | |
| "valid_targets_min": 457 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 0.6454425281547672, | |
| "learning_rate": 3.989333333333333e-05, | |
| "loss": 0.4682, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.43333718180656433, | |
| "step": 375, | |
| "valid_targets_mean": 2390.2, | |
| "valid_targets_min": 470 | |
| }, | |
| { | |
| "epoch": 0.608, | |
| "grad_norm": 0.8248756022257087, | |
| "learning_rate": 3.9999861365387784e-05, | |
| "loss": 0.4802, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5221852660179138, | |
| "step": 380, | |
| "valid_targets_mean": 1886.6, | |
| "valid_targets_min": 316 | |
| }, | |
| { | |
| "epoch": 0.616, | |
| "grad_norm": 0.7211999065193525, | |
| "learning_rate": 3.9999298165569614e-05, | |
| "loss": 0.4611, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4462135434150696, | |
| "step": 385, | |
| "valid_targets_mean": 2022.9, | |
| "valid_targets_min": 315 | |
| }, | |
| { | |
| "epoch": 0.624, | |
| "grad_norm": 0.7200019231629534, | |
| "learning_rate": 3.999830174807269e-05, | |
| "loss": 0.453, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.47809314727783203, | |
| "step": 390, | |
| "valid_targets_mean": 1950.5, | |
| "valid_targets_min": 319 | |
| }, | |
| { | |
| "epoch": 0.632, | |
| "grad_norm": 0.6858645868739105, | |
| "learning_rate": 3.9996872134481036e-05, | |
| "loss": 0.4651, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4543530344963074, | |
| "step": 395, | |
| "valid_targets_mean": 1936.9, | |
| "valid_targets_min": 341 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 0.6573647289491387, | |
| "learning_rate": 3.999500935576245e-05, | |
| "loss": 0.4813, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4598807692527771, | |
| "step": 400, | |
| "valid_targets_mean": 2493.0, | |
| "valid_targets_min": 398 | |
| }, | |
| { | |
| "epoch": 0.648, | |
| "grad_norm": 0.6225096702898597, | |
| "learning_rate": 3.999271345226776e-05, | |
| "loss": 0.447, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44423946738243103, | |
| "step": 405, | |
| "valid_targets_mean": 2727.8, | |
| "valid_targets_min": 455 | |
| }, | |
| { | |
| "epoch": 0.656, | |
| "grad_norm": 0.7976543700191413, | |
| "learning_rate": 3.9989984473730035e-05, | |
| "loss": 0.465, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4622921943664551, | |
| "step": 410, | |
| "valid_targets_mean": 1817.0, | |
| "valid_targets_min": 254 | |
| }, | |
| { | |
| "epoch": 0.664, | |
| "grad_norm": 0.6816915744176464, | |
| "learning_rate": 3.998682247926343e-05, | |
| "loss": 0.4608, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.457396000623703, | |
| "step": 415, | |
| "valid_targets_mean": 2165.4, | |
| "valid_targets_min": 363 | |
| }, | |
| { | |
| "epoch": 0.672, | |
| "grad_norm": 0.7294754263312385, | |
| "learning_rate": 3.998322753736193e-05, | |
| "loss": 0.4713, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.43845027685165405, | |
| "step": 420, | |
| "valid_targets_mean": 1932.0, | |
| "valid_targets_min": 295 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 0.7554261488577307, | |
| "learning_rate": 3.99791997258979e-05, | |
| "loss": 0.4667, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44777369499206543, | |
| "step": 425, | |
| "valid_targets_mean": 1979.6, | |
| "valid_targets_min": 330 | |
| }, | |
| { | |
| "epoch": 0.688, | |
| "grad_norm": 0.7039021930583653, | |
| "learning_rate": 3.997473913212036e-05, | |
| "loss": 0.4629, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4474635720252991, | |
| "step": 430, | |
| "valid_targets_mean": 1963.6, | |
| "valid_targets_min": 337 | |
| }, | |
| { | |
| "epoch": 0.696, | |
| "grad_norm": 0.6450602000693427, | |
| "learning_rate": 3.9969845852653087e-05, | |
| "loss": 0.438, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4050423204898834, | |
| "step": 435, | |
| "valid_targets_mean": 1908.8, | |
| "valid_targets_min": 220 | |
| }, | |
| { | |
| "epoch": 0.704, | |
| "grad_norm": 0.7302175415037188, | |
| "learning_rate": 3.996451999349258e-05, | |
| "loss": 0.4649, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.46385928988456726, | |
| "step": 440, | |
| "valid_targets_mean": 1720.4, | |
| "valid_targets_min": 262 | |
| }, | |
| { | |
| "epoch": 0.712, | |
| "grad_norm": 0.699254201843429, | |
| "learning_rate": 3.995876167000569e-05, | |
| "loss": 0.4647, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.43578875064849854, | |
| "step": 445, | |
| "valid_targets_mean": 1768.2, | |
| "valid_targets_min": 283 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 0.6980324289594738, | |
| "learning_rate": 3.9952571006927186e-05, | |
| "loss": 0.4622, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4525895416736603, | |
| "step": 450, | |
| "valid_targets_mean": 1971.2, | |
| "valid_targets_min": 274 | |
| }, | |
| { | |
| "epoch": 0.728, | |
| "grad_norm": 0.6406310518930854, | |
| "learning_rate": 3.9945948138356995e-05, | |
| "loss": 0.4376, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42803841829299927, | |
| "step": 455, | |
| "valid_targets_mean": 2110.1, | |
| "valid_targets_min": 261 | |
| }, | |
| { | |
| "epoch": 0.736, | |
| "grad_norm": 0.6437606245360337, | |
| "learning_rate": 3.993889320775735e-05, | |
| "loss": 0.4465, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4538410007953644, | |
| "step": 460, | |
| "valid_targets_mean": 1945.8, | |
| "valid_targets_min": 359 | |
| }, | |
| { | |
| "epoch": 0.744, | |
| "grad_norm": 0.7279341720087018, | |
| "learning_rate": 3.9931406367949627e-05, | |
| "loss": 0.4671, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.47880733013153076, | |
| "step": 465, | |
| "valid_targets_mean": 2008.7, | |
| "valid_targets_min": 269 | |
| }, | |
| { | |
| "epoch": 0.752, | |
| "grad_norm": 0.8141387889761877, | |
| "learning_rate": 3.9923487781111106e-05, | |
| "loss": 0.4427, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.46549275517463684, | |
| "step": 470, | |
| "valid_targets_mean": 1529.1, | |
| "valid_targets_min": 259 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 0.6542535470707088, | |
| "learning_rate": 3.9915137618771386e-05, | |
| "loss": 0.4447, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42678263783454895, | |
| "step": 475, | |
| "valid_targets_mean": 1936.6, | |
| "valid_targets_min": 334 | |
| }, | |
| { | |
| "epoch": 0.768, | |
| "grad_norm": 0.7842908199505598, | |
| "learning_rate": 3.9906356061808713e-05, | |
| "loss": 0.4576, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.46588754653930664, | |
| "step": 480, | |
| "valid_targets_mean": 1561.8, | |
| "valid_targets_min": 422 | |
| }, | |
| { | |
| "epoch": 0.776, | |
| "grad_norm": 0.6449123706635504, | |
| "learning_rate": 3.9897143300446055e-05, | |
| "loss": 0.4296, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4231323301792145, | |
| "step": 485, | |
| "valid_targets_mean": 2438.3, | |
| "valid_targets_min": 425 | |
| }, | |
| { | |
| "epoch": 0.784, | |
| "grad_norm": 0.6181873020030192, | |
| "learning_rate": 3.988749953424696e-05, | |
| "loss": 0.4855, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.45973435044288635, | |
| "step": 490, | |
| "valid_targets_mean": 2474.9, | |
| "valid_targets_min": 365 | |
| }, | |
| { | |
| "epoch": 0.792, | |
| "grad_norm": 0.626473245513706, | |
| "learning_rate": 3.9877424972111264e-05, | |
| "loss": 0.4606, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.48623594641685486, | |
| "step": 495, | |
| "valid_targets_mean": 2455.1, | |
| "valid_targets_min": 561 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.7003221204119682, | |
| "learning_rate": 3.9866919832270554e-05, | |
| "loss": 0.4389, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40918204188346863, | |
| "step": 500, | |
| "valid_targets_mean": 1859.0, | |
| "valid_targets_min": 296 | |
| }, | |
| { | |
| "epoch": 0.808, | |
| "grad_norm": 0.6135130789421956, | |
| "learning_rate": 3.9855984342283414e-05, | |
| "loss": 0.4553, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41726401448249817, | |
| "step": 505, | |
| "valid_targets_mean": 2217.7, | |
| "valid_targets_min": 288 | |
| }, | |
| { | |
| "epoch": 0.816, | |
| "grad_norm": 0.7300560333061654, | |
| "learning_rate": 3.9844618739030545e-05, | |
| "loss": 0.4498, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4671459197998047, | |
| "step": 510, | |
| "valid_targets_mean": 1747.1, | |
| "valid_targets_min": 345 | |
| }, | |
| { | |
| "epoch": 0.824, | |
| "grad_norm": 0.6494517318016304, | |
| "learning_rate": 3.98328232687096e-05, | |
| "loss": 0.4407, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.454100102186203, | |
| "step": 515, | |
| "valid_targets_mean": 2495.7, | |
| "valid_targets_min": 441 | |
| }, | |
| { | |
| "epoch": 0.832, | |
| "grad_norm": 0.7385801637729515, | |
| "learning_rate": 3.982059818682986e-05, | |
| "loss": 0.4672, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.46154558658599854, | |
| "step": 520, | |
| "valid_targets_mean": 1561.5, | |
| "valid_targets_min": 311 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 0.7867067019795361, | |
| "learning_rate": 3.980794375820669e-05, | |
| "loss": 0.4906, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4851241707801819, | |
| "step": 525, | |
| "valid_targets_mean": 1604.1, | |
| "valid_targets_min": 255 | |
| }, | |
| { | |
| "epoch": 0.848, | |
| "grad_norm": 0.6227178686981287, | |
| "learning_rate": 3.9794860256955825e-05, | |
| "loss": 0.4589, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4222499132156372, | |
| "step": 530, | |
| "valid_targets_mean": 2254.4, | |
| "valid_targets_min": 254 | |
| }, | |
| { | |
| "epoch": 0.856, | |
| "grad_norm": 0.6856645208311387, | |
| "learning_rate": 3.9781347966487415e-05, | |
| "loss": 0.4511, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44948041439056396, | |
| "step": 535, | |
| "valid_targets_mean": 2073.2, | |
| "valid_targets_min": 310 | |
| }, | |
| { | |
| "epoch": 0.864, | |
| "grad_norm": 0.7450304493429178, | |
| "learning_rate": 3.9767407179499875e-05, | |
| "loss": 0.4564, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.48208606243133545, | |
| "step": 540, | |
| "valid_targets_mean": 1782.9, | |
| "valid_targets_min": 376 | |
| }, | |
| { | |
| "epoch": 0.872, | |
| "grad_norm": 0.7714772450276868, | |
| "learning_rate": 3.975303819797358e-05, | |
| "loss": 0.4817, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5147331953048706, | |
| "step": 545, | |
| "valid_targets_mean": 1765.1, | |
| "valid_targets_min": 404 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 0.6262710369567205, | |
| "learning_rate": 3.973824133316431e-05, | |
| "loss": 0.4573, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39105427265167236, | |
| "step": 550, | |
| "valid_targets_mean": 2131.3, | |
| "valid_targets_min": 312 | |
| }, | |
| { | |
| "epoch": 0.888, | |
| "grad_norm": 0.6110261473931465, | |
| "learning_rate": 3.972301690559645e-05, | |
| "loss": 0.4233, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44210729002952576, | |
| "step": 555, | |
| "valid_targets_mean": 2380.4, | |
| "valid_targets_min": 511 | |
| }, | |
| { | |
| "epoch": 0.896, | |
| "grad_norm": 0.6805369584457025, | |
| "learning_rate": 3.970736524505615e-05, | |
| "loss": 0.469, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.45157796144485474, | |
| "step": 560, | |
| "valid_targets_mean": 2133.4, | |
| "valid_targets_min": 315 | |
| }, | |
| { | |
| "epoch": 0.904, | |
| "grad_norm": 0.7184087914454619, | |
| "learning_rate": 3.969128669058411e-05, | |
| "loss": 0.4386, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4871930181980133, | |
| "step": 565, | |
| "valid_targets_mean": 1956.6, | |
| "valid_targets_min": 237 | |
| }, | |
| { | |
| "epoch": 0.912, | |
| "grad_norm": 0.632960168702108, | |
| "learning_rate": 3.9674781590468256e-05, | |
| "loss": 0.4482, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4217160642147064, | |
| "step": 570, | |
| "valid_targets_mean": 2133.4, | |
| "valid_targets_min": 330 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 0.5716067570655475, | |
| "learning_rate": 3.9657850302236184e-05, | |
| "loss": 0.4669, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39666199684143066, | |
| "step": 575, | |
| "valid_targets_mean": 2431.0, | |
| "valid_targets_min": 435 | |
| }, | |
| { | |
| "epoch": 0.928, | |
| "grad_norm": 0.6450176003705785, | |
| "learning_rate": 3.964049319264744e-05, | |
| "loss": 0.4478, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4384343922138214, | |
| "step": 580, | |
| "valid_targets_mean": 1996.9, | |
| "valid_targets_min": 298 | |
| }, | |
| { | |
| "epoch": 0.936, | |
| "grad_norm": 0.7426898198991789, | |
| "learning_rate": 3.962271063768555e-05, | |
| "loss": 0.4738, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5046041011810303, | |
| "step": 585, | |
| "valid_targets_mean": 1812.4, | |
| "valid_targets_min": 400 | |
| }, | |
| { | |
| "epoch": 0.944, | |
| "grad_norm": 0.7252987868247023, | |
| "learning_rate": 3.960450302254989e-05, | |
| "loss": 0.4728, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4665234088897705, | |
| "step": 590, | |
| "valid_targets_mean": 1873.8, | |
| "valid_targets_min": 307 | |
| }, | |
| { | |
| "epoch": 0.952, | |
| "grad_norm": 0.5509677719198839, | |
| "learning_rate": 3.958587074164735e-05, | |
| "loss": 0.4272, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42085975408554077, | |
| "step": 595, | |
| "valid_targets_mean": 2728.6, | |
| "valid_targets_min": 384 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.6683946205285337, | |
| "learning_rate": 3.956681419858376e-05, | |
| "loss": 0.4518, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4690259099006653, | |
| "step": 600, | |
| "valid_targets_mean": 2017.5, | |
| "valid_targets_min": 293 | |
| }, | |
| { | |
| "epoch": 0.968, | |
| "grad_norm": 0.6840253099847687, | |
| "learning_rate": 3.954733380615516e-05, | |
| "loss": 0.4495, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4364042282104492, | |
| "step": 605, | |
| "valid_targets_mean": 1879.7, | |
| "valid_targets_min": 371 | |
| }, | |
| { | |
| "epoch": 0.976, | |
| "grad_norm": 0.6260511177922704, | |
| "learning_rate": 3.95274299863389e-05, | |
| "loss": 0.4462, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.45733919739723206, | |
| "step": 610, | |
| "valid_targets_mean": 2186.1, | |
| "valid_targets_min": 386 | |
| }, | |
| { | |
| "epoch": 0.984, | |
| "grad_norm": 0.7134692669207365, | |
| "learning_rate": 3.950710317028443e-05, | |
| "loss": 0.4395, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.50779128074646, | |
| "step": 615, | |
| "valid_targets_mean": 2000.1, | |
| "valid_targets_min": 327 | |
| }, | |
| { | |
| "epoch": 0.992, | |
| "grad_norm": 0.6892162511444004, | |
| "learning_rate": 3.9486353798303996e-05, | |
| "loss": 0.4426, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4779299199581146, | |
| "step": 620, | |
| "valid_targets_mean": 1964.6, | |
| "valid_targets_min": 356 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.6515675698335721, | |
| "learning_rate": 3.946518231986313e-05, | |
| "loss": 0.4313, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4520461857318878, | |
| "step": 625, | |
| "valid_targets_mean": 2148.9, | |
| "valid_targets_min": 421 | |
| }, | |
| { | |
| "epoch": 1.008, | |
| "grad_norm": 0.6240250726279455, | |
| "learning_rate": 3.9443589193570847e-05, | |
| "loss": 0.4267, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4049533009529114, | |
| "step": 630, | |
| "valid_targets_mean": 1969.0, | |
| "valid_targets_min": 286 | |
| }, | |
| { | |
| "epoch": 1.016, | |
| "grad_norm": 0.6414416644443255, | |
| "learning_rate": 3.942157488716976e-05, | |
| "loss": 0.4356, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4244895577430725, | |
| "step": 635, | |
| "valid_targets_mean": 2082.3, | |
| "valid_targets_min": 324 | |
| }, | |
| { | |
| "epoch": 1.024, | |
| "grad_norm": 0.6241575168649688, | |
| "learning_rate": 3.939913987752595e-05, | |
| "loss": 0.4312, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.408047080039978, | |
| "step": 640, | |
| "valid_targets_mean": 2315.9, | |
| "valid_targets_min": 447 | |
| }, | |
| { | |
| "epoch": 1.032, | |
| "grad_norm": 0.5633755778877994, | |
| "learning_rate": 3.9376284650618605e-05, | |
| "loss": 0.4078, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39470237493515015, | |
| "step": 645, | |
| "valid_targets_mean": 2380.6, | |
| "valid_targets_min": 426 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 0.6439090216593515, | |
| "learning_rate": 3.935300970152952e-05, | |
| "loss": 0.4466, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.43030333518981934, | |
| "step": 650, | |
| "valid_targets_mean": 1925.1, | |
| "valid_targets_min": 271 | |
| }, | |
| { | |
| "epoch": 1.048, | |
| "grad_norm": 0.6413404699842615, | |
| "learning_rate": 3.932931553443235e-05, | |
| "loss": 0.4039, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4175097346305847, | |
| "step": 655, | |
| "valid_targets_mean": 2288.7, | |
| "valid_targets_min": 458 | |
| }, | |
| { | |
| "epoch": 1.056, | |
| "grad_norm": 0.7348898005193101, | |
| "learning_rate": 3.930520266258173e-05, | |
| "loss": 0.4354, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4317247271537781, | |
| "step": 660, | |
| "valid_targets_mean": 1749.6, | |
| "valid_targets_min": 285 | |
| }, | |
| { | |
| "epoch": 1.064, | |
| "grad_norm": 0.6213909552275446, | |
| "learning_rate": 3.928067160830208e-05, | |
| "loss": 0.4201, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4198188781738281, | |
| "step": 665, | |
| "valid_targets_mean": 2220.9, | |
| "valid_targets_min": 322 | |
| }, | |
| { | |
| "epoch": 1.072, | |
| "grad_norm": 0.7037747317197413, | |
| "learning_rate": 3.925572290297638e-05, | |
| "loss": 0.4307, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4192028343677521, | |
| "step": 670, | |
| "valid_targets_mean": 1968.8, | |
| "valid_targets_min": 390 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "grad_norm": 0.7120042990513531, | |
| "learning_rate": 3.9230357087034606e-05, | |
| "loss": 0.4386, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4463686943054199, | |
| "step": 675, | |
| "valid_targets_mean": 1814.4, | |
| "valid_targets_min": 358 | |
| }, | |
| { | |
| "epoch": 1.088, | |
| "grad_norm": 0.5526479446110066, | |
| "learning_rate": 3.9204574709942036e-05, | |
| "loss": 0.4459, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4031902849674225, | |
| "step": 680, | |
| "valid_targets_mean": 2819.1, | |
| "valid_targets_min": 298 | |
| }, | |
| { | |
| "epoch": 1.096, | |
| "grad_norm": 0.6665996408067918, | |
| "learning_rate": 3.917837633018734e-05, | |
| "loss": 0.4074, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.43310776352882385, | |
| "step": 685, | |
| "valid_targets_mean": 2139.0, | |
| "valid_targets_min": 429 | |
| }, | |
| { | |
| "epoch": 1.104, | |
| "grad_norm": 0.5995581856388195, | |
| "learning_rate": 3.915176251527051e-05, | |
| "loss": 0.4103, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3874262571334839, | |
| "step": 690, | |
| "valid_targets_mean": 2195.1, | |
| "valid_targets_min": 311 | |
| }, | |
| { | |
| "epoch": 1.112, | |
| "grad_norm": 0.5356516953538547, | |
| "learning_rate": 3.912473384169051e-05, | |
| "loss": 0.437, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4291180372238159, | |
| "step": 695, | |
| "valid_targets_mean": 2826.2, | |
| "valid_targets_min": 297 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "grad_norm": 0.7220284112015432, | |
| "learning_rate": 3.9097290894932866e-05, | |
| "loss": 0.4364, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4589844346046448, | |
| "step": 700, | |
| "valid_targets_mean": 2123.4, | |
| "valid_targets_min": 381 | |
| }, | |
| { | |
| "epoch": 1.1280000000000001, | |
| "grad_norm": 0.5843467247568136, | |
| "learning_rate": 3.906943426945691e-05, | |
| "loss": 0.4298, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.45251959562301636, | |
| "step": 705, | |
| "valid_targets_mean": 2777.2, | |
| "valid_targets_min": 554 | |
| }, | |
| { | |
| "epoch": 1.1360000000000001, | |
| "grad_norm": 0.5965008772450432, | |
| "learning_rate": 3.9041164568682955e-05, | |
| "loss": 0.4132, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36841651797294617, | |
| "step": 710, | |
| "valid_targets_mean": 2081.3, | |
| "valid_targets_min": 267 | |
| }, | |
| { | |
| "epoch": 1.144, | |
| "grad_norm": 0.7628305931405186, | |
| "learning_rate": 3.90124824049792e-05, | |
| "loss": 0.4238, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5438158512115479, | |
| "step": 715, | |
| "valid_targets_mean": 2172.4, | |
| "valid_targets_min": 344 | |
| }, | |
| { | |
| "epoch": 1.152, | |
| "grad_norm": 0.5994049693791643, | |
| "learning_rate": 3.8983388399648465e-05, | |
| "loss": 0.4144, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4170791208744049, | |
| "step": 720, | |
| "valid_targets_mean": 2237.7, | |
| "valid_targets_min": 377 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "grad_norm": 0.6965418685020897, | |
| "learning_rate": 3.895388318291474e-05, | |
| "loss": 0.4284, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.47118836641311646, | |
| "step": 725, | |
| "valid_targets_mean": 2137.4, | |
| "valid_targets_min": 296 | |
| }, | |
| { | |
| "epoch": 1.168, | |
| "grad_norm": 0.7892481191638123, | |
| "learning_rate": 3.892396739390952e-05, | |
| "loss": 0.4444, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4336376190185547, | |
| "step": 730, | |
| "valid_targets_mean": 1510.0, | |
| "valid_targets_min": 334 | |
| }, | |
| { | |
| "epoch": 1.176, | |
| "grad_norm": 0.5947107495004924, | |
| "learning_rate": 3.8893641680657986e-05, | |
| "loss": 0.4488, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4223913550376892, | |
| "step": 735, | |
| "valid_targets_mean": 2586.2, | |
| "valid_targets_min": 389 | |
| }, | |
| { | |
| "epoch": 1.184, | |
| "grad_norm": 0.6428094395927993, | |
| "learning_rate": 3.886290670006495e-05, | |
| "loss": 0.4557, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4669094681739807, | |
| "step": 740, | |
| "valid_targets_mean": 2241.1, | |
| "valid_targets_min": 370 | |
| }, | |
| { | |
| "epoch": 1.192, | |
| "grad_norm": 0.621692085111297, | |
| "learning_rate": 3.8831763117900605e-05, | |
| "loss": 0.4129, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.43175309896469116, | |
| "step": 745, | |
| "valid_targets_mean": 2252.1, | |
| "valid_targets_min": 320 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 0.6051358606200022, | |
| "learning_rate": 3.8800211608786166e-05, | |
| "loss": 0.4323, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4480053782463074, | |
| "step": 750, | |
| "valid_targets_mean": 2474.6, | |
| "valid_targets_min": 312 | |
| }, | |
| { | |
| "epoch": 1.208, | |
| "grad_norm": 0.6295723423544923, | |
| "learning_rate": 3.876825285617918e-05, | |
| "loss": 0.4264, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38263633847236633, | |
| "step": 755, | |
| "valid_targets_mean": 1969.8, | |
| "valid_targets_min": 366 | |
| }, | |
| { | |
| "epoch": 1.216, | |
| "grad_norm": 0.6768797453110101, | |
| "learning_rate": 3.873588755235876e-05, | |
| "loss": 0.4226, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4260578155517578, | |
| "step": 760, | |
| "valid_targets_mean": 2088.6, | |
| "valid_targets_min": 308 | |
| }, | |
| { | |
| "epoch": 1.224, | |
| "grad_norm": 0.7075395848719739, | |
| "learning_rate": 3.870311639841062e-05, | |
| "loss": 0.425, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41223227977752686, | |
| "step": 765, | |
| "valid_targets_mean": 1929.6, | |
| "valid_targets_min": 359 | |
| }, | |
| { | |
| "epoch": 1.232, | |
| "grad_norm": 0.7367787353503337, | |
| "learning_rate": 3.866994010421182e-05, | |
| "loss": 0.3936, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41890084743499756, | |
| "step": 770, | |
| "valid_targets_mean": 1651.1, | |
| "valid_targets_min": 257 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "grad_norm": 0.8566873360763317, | |
| "learning_rate": 3.863635938841545e-05, | |
| "loss": 0.4329, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4514118432998657, | |
| "step": 775, | |
| "valid_targets_mean": 1363.1, | |
| "valid_targets_min": 309 | |
| }, | |
| { | |
| "epoch": 1.248, | |
| "grad_norm": 0.6268973996841051, | |
| "learning_rate": 3.8602374978435015e-05, | |
| "loss": 0.4064, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38157105445861816, | |
| "step": 780, | |
| "valid_targets_mean": 1858.1, | |
| "valid_targets_min": 253 | |
| }, | |
| { | |
| "epoch": 1.256, | |
| "grad_norm": 0.6699996219849346, | |
| "learning_rate": 3.8567987610428705e-05, | |
| "loss": 0.4317, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41753286123275757, | |
| "step": 785, | |
| "valid_targets_mean": 1925.1, | |
| "valid_targets_min": 392 | |
| }, | |
| { | |
| "epoch": 1.264, | |
| "grad_norm": 0.5754399055059435, | |
| "learning_rate": 3.853319802928345e-05, | |
| "loss": 0.4159, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39910656213760376, | |
| "step": 790, | |
| "valid_targets_mean": 2242.6, | |
| "valid_targets_min": 305 | |
| }, | |
| { | |
| "epoch": 1.272, | |
| "grad_norm": 0.7023438254788777, | |
| "learning_rate": 3.849800698859877e-05, | |
| "loss": 0.4236, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38505062460899353, | |
| "step": 795, | |
| "valid_targets_mean": 1530.5, | |
| "valid_targets_min": 307 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 0.7031660958931999, | |
| "learning_rate": 3.846241525067047e-05, | |
| "loss": 0.4472, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4855435788631439, | |
| "step": 800, | |
| "valid_targets_mean": 1791.7, | |
| "valid_targets_min": 252 | |
| }, | |
| { | |
| "epoch": 1.288, | |
| "grad_norm": 0.6212443097042906, | |
| "learning_rate": 3.842642358647411e-05, | |
| "loss": 0.4305, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3779318928718567, | |
| "step": 805, | |
| "valid_targets_mean": 1982.6, | |
| "valid_targets_min": 336 | |
| }, | |
| { | |
| "epoch": 1.296, | |
| "grad_norm": 0.7272523602576765, | |
| "learning_rate": 3.839003277564831e-05, | |
| "loss": 0.4223, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.46407386660575867, | |
| "step": 810, | |
| "valid_targets_mean": 1663.7, | |
| "valid_targets_min": 319 | |
| }, | |
| { | |
| "epoch": 1.304, | |
| "grad_norm": 0.7348835291097465, | |
| "learning_rate": 3.835324360647785e-05, | |
| "loss": 0.4367, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4778377413749695, | |
| "step": 815, | |
| "valid_targets_mean": 1524.9, | |
| "valid_targets_min": 262 | |
| }, | |
| { | |
| "epoch": 1.312, | |
| "grad_norm": 0.7212111246272881, | |
| "learning_rate": 3.831605687587663e-05, | |
| "loss": 0.4178, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.46456441283226013, | |
| "step": 820, | |
| "valid_targets_mean": 1755.3, | |
| "valid_targets_min": 333 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 0.6552147965678339, | |
| "learning_rate": 3.827847338937037e-05, | |
| "loss": 0.4089, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4211863577365875, | |
| "step": 825, | |
| "valid_targets_mean": 1804.6, | |
| "valid_targets_min": 315 | |
| }, | |
| { | |
| "epoch": 1.328, | |
| "grad_norm": 0.5902060523272189, | |
| "learning_rate": 3.824049396107918e-05, | |
| "loss": 0.4286, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41596031188964844, | |
| "step": 830, | |
| "valid_targets_mean": 2226.1, | |
| "valid_targets_min": 455 | |
| }, | |
| { | |
| "epoch": 1.336, | |
| "grad_norm": 0.5721474738378802, | |
| "learning_rate": 3.8202119413699914e-05, | |
| "loss": 0.4402, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4118640422821045, | |
| "step": 835, | |
| "valid_targets_mean": 2640.4, | |
| "valid_targets_min": 437 | |
| }, | |
| { | |
| "epoch": 1.3439999999999999, | |
| "grad_norm": 0.49927426807478753, | |
| "learning_rate": 3.8163350578488366e-05, | |
| "loss": 0.4249, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3671061396598816, | |
| "step": 840, | |
| "valid_targets_mean": 2876.6, | |
| "valid_targets_min": 485 | |
| }, | |
| { | |
| "epoch": 1.3519999999999999, | |
| "grad_norm": 0.5891485955551514, | |
| "learning_rate": 3.812418829524124e-05, | |
| "loss": 0.4233, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41898277401924133, | |
| "step": 845, | |
| "valid_targets_mean": 2178.2, | |
| "valid_targets_min": 380 | |
| }, | |
| { | |
| "epoch": 1.3599999999999999, | |
| "grad_norm": 0.7378900002873952, | |
| "learning_rate": 3.8084633412277974e-05, | |
| "loss": 0.4138, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.45365267992019653, | |
| "step": 850, | |
| "valid_targets_mean": 1550.7, | |
| "valid_targets_min": 315 | |
| }, | |
| { | |
| "epoch": 1.3679999999999999, | |
| "grad_norm": 0.6136454073663492, | |
| "learning_rate": 3.804468678642238e-05, | |
| "loss": 0.4314, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42243918776512146, | |
| "step": 855, | |
| "valid_targets_mean": 2308.2, | |
| "valid_targets_min": 417 | |
| }, | |
| { | |
| "epoch": 1.376, | |
| "grad_norm": 0.5924905188357824, | |
| "learning_rate": 3.800434928298403e-05, | |
| "loss": 0.4396, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41932016611099243, | |
| "step": 860, | |
| "valid_targets_mean": 2239.1, | |
| "valid_targets_min": 248 | |
| }, | |
| { | |
| "epoch": 1.384, | |
| "grad_norm": 0.6287576566557505, | |
| "learning_rate": 3.796362177573957e-05, | |
| "loss": 0.4144, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3955501914024353, | |
| "step": 865, | |
| "valid_targets_mean": 1976.8, | |
| "valid_targets_min": 472 | |
| }, | |
| { | |
| "epoch": 1.392, | |
| "grad_norm": 0.6939588686318201, | |
| "learning_rate": 3.792250514691378e-05, | |
| "loss": 0.4343, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4510957896709442, | |
| "step": 870, | |
| "valid_targets_mean": 1880.4, | |
| "valid_targets_min": 415 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 0.740500044228491, | |
| "learning_rate": 3.788100028716043e-05, | |
| "loss": 0.4197, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.47122618556022644, | |
| "step": 875, | |
| "valid_targets_mean": 1580.5, | |
| "valid_targets_min": 294 | |
| }, | |
| { | |
| "epoch": 1.408, | |
| "grad_norm": 0.5292231218615612, | |
| "learning_rate": 3.7839108095543016e-05, | |
| "loss": 0.4042, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3627355694770813, | |
| "step": 880, | |
| "valid_targets_mean": 3259.8, | |
| "valid_targets_min": 335 | |
| }, | |
| { | |
| "epoch": 1.416, | |
| "grad_norm": 0.5836131862331959, | |
| "learning_rate": 3.7796829479515295e-05, | |
| "loss": 0.4249, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3939005136489868, | |
| "step": 885, | |
| "valid_targets_mean": 2124.9, | |
| "valid_targets_min": 344 | |
| }, | |
| { | |
| "epoch": 1.424, | |
| "grad_norm": 0.583752798524485, | |
| "learning_rate": 3.775416535490159e-05, | |
| "loss": 0.4176, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4177761971950531, | |
| "step": 890, | |
| "valid_targets_mean": 2234.2, | |
| "valid_targets_min": 296 | |
| }, | |
| { | |
| "epoch": 1.432, | |
| "grad_norm": 0.6347872247635064, | |
| "learning_rate": 3.7711116645876984e-05, | |
| "loss": 0.4324, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4340249300003052, | |
| "step": 895, | |
| "valid_targets_mean": 2302.2, | |
| "valid_targets_min": 336 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 0.6619847945161467, | |
| "learning_rate": 3.7667684284947286e-05, | |
| "loss": 0.442, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4371194839477539, | |
| "step": 900, | |
| "valid_targets_mean": 1665.0, | |
| "valid_targets_min": 315 | |
| }, | |
| { | |
| "epoch": 1.448, | |
| "grad_norm": 0.6571662766303369, | |
| "learning_rate": 3.762386921292885e-05, | |
| "loss": 0.4223, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40661561489105225, | |
| "step": 905, | |
| "valid_targets_mean": 1818.6, | |
| "valid_targets_min": 235 | |
| }, | |
| { | |
| "epoch": 1.456, | |
| "grad_norm": 0.890222065122217, | |
| "learning_rate": 3.757967237892818e-05, | |
| "loss": 0.4432, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.53070467710495, | |
| "step": 910, | |
| "valid_targets_mean": 1289.7, | |
| "valid_targets_min": 247 | |
| }, | |
| { | |
| "epoch": 1.464, | |
| "grad_norm": 0.6535190488820531, | |
| "learning_rate": 3.7535094740321334e-05, | |
| "loss": 0.413, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40728655457496643, | |
| "step": 915, | |
| "valid_targets_mean": 1934.4, | |
| "valid_targets_min": 387 | |
| }, | |
| { | |
| "epoch": 1.472, | |
| "grad_norm": 0.6162293241343897, | |
| "learning_rate": 3.749013726273328e-05, | |
| "loss": 0.4072, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37631893157958984, | |
| "step": 920, | |
| "valid_targets_mean": 1952.2, | |
| "valid_targets_min": 369 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "grad_norm": 0.7475115020828276, | |
| "learning_rate": 3.7444800920016875e-05, | |
| "loss": 0.4291, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4568881392478943, | |
| "step": 925, | |
| "valid_targets_mean": 1704.0, | |
| "valid_targets_min": 287 | |
| }, | |
| { | |
| "epoch": 1.488, | |
| "grad_norm": 0.7980089165718817, | |
| "learning_rate": 3.7399086694231864e-05, | |
| "loss": 0.4443, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.48696544766426086, | |
| "step": 930, | |
| "valid_targets_mean": 1555.2, | |
| "valid_targets_min": 342 | |
| }, | |
| { | |
| "epoch": 1.496, | |
| "grad_norm": 0.6731060349560027, | |
| "learning_rate": 3.735299557562352e-05, | |
| "loss": 0.4347, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41527795791625977, | |
| "step": 935, | |
| "valid_targets_mean": 2049.9, | |
| "valid_targets_min": 297 | |
| }, | |
| { | |
| "epoch": 1.504, | |
| "grad_norm": 0.6256098005592527, | |
| "learning_rate": 3.7306528562601245e-05, | |
| "loss": 0.431, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4265291690826416, | |
| "step": 940, | |
| "valid_targets_mean": 2755.6, | |
| "valid_targets_min": 401 | |
| }, | |
| { | |
| "epoch": 1.512, | |
| "grad_norm": 0.5212788613253625, | |
| "learning_rate": 3.7259686661716945e-05, | |
| "loss": 0.418, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40835675597190857, | |
| "step": 945, | |
| "valid_targets_mean": 2737.9, | |
| "valid_targets_min": 600 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 0.6806519793344622, | |
| "learning_rate": 3.7212470887643204e-05, | |
| "loss": 0.4183, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3803894519805908, | |
| "step": 950, | |
| "valid_targets_mean": 1731.1, | |
| "valid_targets_min": 444 | |
| }, | |
| { | |
| "epoch": 1.528, | |
| "grad_norm": 0.6236835096285962, | |
| "learning_rate": 3.7164882263151315e-05, | |
| "loss": 0.4253, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38910186290740967, | |
| "step": 955, | |
| "valid_targets_mean": 1891.6, | |
| "valid_targets_min": 263 | |
| }, | |
| { | |
| "epoch": 1.536, | |
| "grad_norm": 0.6136149127563765, | |
| "learning_rate": 3.711692181908913e-05, | |
| "loss": 0.4123, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4157019853591919, | |
| "step": 960, | |
| "valid_targets_mean": 2114.7, | |
| "valid_targets_min": 347 | |
| }, | |
| { | |
| "epoch": 1.544, | |
| "grad_norm": 0.663735677302251, | |
| "learning_rate": 3.706859059435871e-05, | |
| "loss": 0.4208, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4292006492614746, | |
| "step": 965, | |
| "valid_targets_mean": 2126.3, | |
| "valid_targets_min": 490 | |
| }, | |
| { | |
| "epoch": 1.552, | |
| "grad_norm": 0.6782677947247513, | |
| "learning_rate": 3.701988963589384e-05, | |
| "loss": 0.4212, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4264790117740631, | |
| "step": 970, | |
| "valid_targets_mean": 1727.6, | |
| "valid_targets_min": 310 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 0.5184414648572789, | |
| "learning_rate": 3.697081999863736e-05, | |
| "loss": 0.417, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32760345935821533, | |
| "step": 975, | |
| "valid_targets_mean": 2424.2, | |
| "valid_targets_min": 383 | |
| }, | |
| { | |
| "epoch": 1.568, | |
| "grad_norm": 0.7911426355404242, | |
| "learning_rate": 3.692138274551828e-05, | |
| "loss": 0.4363, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44045162200927734, | |
| "step": 980, | |
| "valid_targets_mean": 1370.6, | |
| "valid_targets_min": 334 | |
| }, | |
| { | |
| "epoch": 1.576, | |
| "grad_norm": 0.6775066725506985, | |
| "learning_rate": 3.687157894742878e-05, | |
| "loss": 0.4273, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.43112820386886597, | |
| "step": 985, | |
| "valid_targets_mean": 1683.9, | |
| "valid_targets_min": 519 | |
| }, | |
| { | |
| "epoch": 1.584, | |
| "grad_norm": 0.6861103493292751, | |
| "learning_rate": 3.682140968320101e-05, | |
| "loss": 0.4393, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4412302076816559, | |
| "step": 990, | |
| "valid_targets_mean": 1869.7, | |
| "valid_targets_min": 344 | |
| }, | |
| { | |
| "epoch": 1.592, | |
| "grad_norm": 0.5222410114841262, | |
| "learning_rate": 3.6770876039583725e-05, | |
| "loss": 0.4057, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3676998019218445, | |
| "step": 995, | |
| "valid_targets_mean": 2598.5, | |
| "valid_targets_min": 767 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 0.7263035056116521, | |
| "learning_rate": 3.671997911121871e-05, | |
| "loss": 0.4331, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44147950410842896, | |
| "step": 1000, | |
| "valid_targets_mean": 1699.4, | |
| "valid_targets_min": 251 | |
| }, | |
| { | |
| "epoch": 1.608, | |
| "grad_norm": 0.6414401377935635, | |
| "learning_rate": 3.6668720000617126e-05, | |
| "loss": 0.4163, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40461379289627075, | |
| "step": 1005, | |
| "valid_targets_mean": 1852.0, | |
| "valid_targets_min": 260 | |
| }, | |
| { | |
| "epoch": 1.616, | |
| "grad_norm": 0.665030507442711, | |
| "learning_rate": 3.661709981813558e-05, | |
| "loss": 0.4091, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42225736379623413, | |
| "step": 1010, | |
| "valid_targets_mean": 1901.0, | |
| "valid_targets_min": 347 | |
| }, | |
| { | |
| "epoch": 1.624, | |
| "grad_norm": 0.6356037504055488, | |
| "learning_rate": 3.6565119681952086e-05, | |
| "loss": 0.4115, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4438035488128662, | |
| "step": 1015, | |
| "valid_targets_mean": 2260.8, | |
| "valid_targets_min": 377 | |
| }, | |
| { | |
| "epoch": 1.6320000000000001, | |
| "grad_norm": 0.7732442093959834, | |
| "learning_rate": 3.651278071804186e-05, | |
| "loss": 0.426, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4469642639160156, | |
| "step": 1020, | |
| "valid_targets_mean": 1409.3, | |
| "valid_targets_min": 269 | |
| }, | |
| { | |
| "epoch": 1.6400000000000001, | |
| "grad_norm": 0.5821537829613552, | |
| "learning_rate": 3.646008406015291e-05, | |
| "loss": 0.4076, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37731000781059265, | |
| "step": 1025, | |
| "valid_targets_mean": 2316.8, | |
| "valid_targets_min": 276 | |
| }, | |
| { | |
| "epoch": 1.6480000000000001, | |
| "grad_norm": 0.5801157485975053, | |
| "learning_rate": 3.6407030849781475e-05, | |
| "loss": 0.4079, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4072588086128235, | |
| "step": 1030, | |
| "valid_targets_mean": 2757.8, | |
| "valid_targets_min": 653 | |
| }, | |
| { | |
| "epoch": 1.6560000000000001, | |
| "grad_norm": 0.8050988043280486, | |
| "learning_rate": 3.635362223614733e-05, | |
| "loss": 0.4277, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4489116966724396, | |
| "step": 1035, | |
| "valid_targets_mean": 2312.9, | |
| "valid_targets_min": 367 | |
| }, | |
| { | |
| "epoch": 1.6640000000000001, | |
| "grad_norm": 0.47234546863098514, | |
| "learning_rate": 3.629985937616884e-05, | |
| "loss": 0.4062, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34627580642700195, | |
| "step": 1040, | |
| "valid_targets_mean": 2884.6, | |
| "valid_targets_min": 333 | |
| }, | |
| { | |
| "epoch": 1.6720000000000002, | |
| "grad_norm": 0.6661188572410601, | |
| "learning_rate": 3.624574343443794e-05, | |
| "loss": 0.4154, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4187551438808441, | |
| "step": 1045, | |
| "valid_targets_mean": 1783.8, | |
| "valid_targets_min": 288 | |
| }, | |
| { | |
| "epoch": 1.6800000000000002, | |
| "grad_norm": 0.5719081478717788, | |
| "learning_rate": 3.619127558319492e-05, | |
| "loss": 0.4129, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3960324227809906, | |
| "step": 1050, | |
| "valid_targets_mean": 2359.3, | |
| "valid_targets_min": 316 | |
| }, | |
| { | |
| "epoch": 1.688, | |
| "grad_norm": 0.8110764350477331, | |
| "learning_rate": 3.613645700230298e-05, | |
| "loss": 0.4175, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4404456913471222, | |
| "step": 1055, | |
| "valid_targets_mean": 1428.6, | |
| "valid_targets_min": 231 | |
| }, | |
| { | |
| "epoch": 1.696, | |
| "grad_norm": 0.622831980933497, | |
| "learning_rate": 3.6081288879222696e-05, | |
| "loss": 0.4075, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41072455048561096, | |
| "step": 1060, | |
| "valid_targets_mean": 2187.9, | |
| "valid_targets_min": 375 | |
| }, | |
| { | |
| "epoch": 1.704, | |
| "grad_norm": 0.6293939115848193, | |
| "learning_rate": 3.602577240898633e-05, | |
| "loss": 0.4266, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40541231632232666, | |
| "step": 1065, | |
| "valid_targets_mean": 1947.6, | |
| "valid_targets_min": 411 | |
| }, | |
| { | |
| "epoch": 1.712, | |
| "grad_norm": 0.6851832609298075, | |
| "learning_rate": 3.596990879417188e-05, | |
| "loss": 0.4275, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4256119728088379, | |
| "step": 1070, | |
| "valid_targets_mean": 1472.7, | |
| "valid_targets_min": 275 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "grad_norm": 0.566532739385179, | |
| "learning_rate": 3.591369924487711e-05, | |
| "loss": 0.4191, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42117422819137573, | |
| "step": 1075, | |
| "valid_targets_mean": 2386.8, | |
| "valid_targets_min": 513 | |
| }, | |
| { | |
| "epoch": 1.728, | |
| "grad_norm": 0.5939056675036943, | |
| "learning_rate": 3.585714497869326e-05, | |
| "loss": 0.4186, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4099000096321106, | |
| "step": 1080, | |
| "valid_targets_mean": 2248.2, | |
| "valid_targets_min": 474 | |
| }, | |
| { | |
| "epoch": 1.736, | |
| "grad_norm": 0.5212578820241479, | |
| "learning_rate": 3.580024722067872e-05, | |
| "loss": 0.4016, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36171701550483704, | |
| "step": 1085, | |
| "valid_targets_mean": 2644.8, | |
| "valid_targets_min": 427 | |
| }, | |
| { | |
| "epoch": 1.744, | |
| "grad_norm": 0.609750475691826, | |
| "learning_rate": 3.574300720333247e-05, | |
| "loss": 0.4287, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40230706334114075, | |
| "step": 1090, | |
| "valid_targets_mean": 2274.1, | |
| "valid_targets_min": 352 | |
| }, | |
| { | |
| "epoch": 1.752, | |
| "grad_norm": 0.5794914229611461, | |
| "learning_rate": 3.568542616656739e-05, | |
| "loss": 0.4114, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4043058156967163, | |
| "step": 1095, | |
| "valid_targets_mean": 2435.9, | |
| "valid_targets_min": 510 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 0.5754248246253073, | |
| "learning_rate": 3.5627505357683404e-05, | |
| "loss": 0.4288, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4389524757862091, | |
| "step": 1100, | |
| "valid_targets_mean": 2579.7, | |
| "valid_targets_min": 436 | |
| }, | |
| { | |
| "epoch": 1.768, | |
| "grad_norm": 0.5220103919628871, | |
| "learning_rate": 3.5569246031340474e-05, | |
| "loss": 0.4194, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3954297602176666, | |
| "step": 1105, | |
| "valid_targets_mean": 3061.6, | |
| "valid_targets_min": 951 | |
| }, | |
| { | |
| "epoch": 1.776, | |
| "grad_norm": 0.5971366510150363, | |
| "learning_rate": 3.5510649449531375e-05, | |
| "loss": 0.4129, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41429704427719116, | |
| "step": 1110, | |
| "valid_targets_mean": 2268.1, | |
| "valid_targets_min": 458 | |
| }, | |
| { | |
| "epoch": 1.784, | |
| "grad_norm": 0.6407089899675739, | |
| "learning_rate": 3.545171688155441e-05, | |
| "loss": 0.4055, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44793933629989624, | |
| "step": 1115, | |
| "valid_targets_mean": 2024.1, | |
| "valid_targets_min": 324 | |
| }, | |
| { | |
| "epoch": 1.792, | |
| "grad_norm": 0.45308297979181544, | |
| "learning_rate": 3.5392449603985894e-05, | |
| "loss": 0.3998, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32858961820602417, | |
| "step": 1120, | |
| "valid_targets_mean": 3030.6, | |
| "valid_targets_min": 416 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 0.6548803541766852, | |
| "learning_rate": 3.53328489006525e-05, | |
| "loss": 0.4377, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4534260034561157, | |
| "step": 1125, | |
| "valid_targets_mean": 2124.0, | |
| "valid_targets_min": 224 | |
| }, | |
| { | |
| "epoch": 1.808, | |
| "grad_norm": 0.5816411189387833, | |
| "learning_rate": 3.527291606260345e-05, | |
| "loss": 0.4319, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4444667100906372, | |
| "step": 1130, | |
| "valid_targets_mean": 2557.6, | |
| "valid_targets_min": 310 | |
| }, | |
| { | |
| "epoch": 1.8159999999999998, | |
| "grad_norm": 0.5639209652953462, | |
| "learning_rate": 3.521265238808255e-05, | |
| "loss": 0.4243, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3954368531703949, | |
| "step": 1135, | |
| "valid_targets_mean": 2317.2, | |
| "valid_targets_min": 274 | |
| }, | |
| { | |
| "epoch": 1.8239999999999998, | |
| "grad_norm": 0.6457666417385817, | |
| "learning_rate": 3.515205918250007e-05, | |
| "loss": 0.4265, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4396483600139618, | |
| "step": 1140, | |
| "valid_targets_mean": 2138.4, | |
| "valid_targets_min": 447 | |
| }, | |
| { | |
| "epoch": 1.8319999999999999, | |
| "grad_norm": 0.6196236920879702, | |
| "learning_rate": 3.5091137758404456e-05, | |
| "loss": 0.4249, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39901548624038696, | |
| "step": 1145, | |
| "valid_targets_mean": 2027.0, | |
| "valid_targets_min": 318 | |
| }, | |
| { | |
| "epoch": 1.8399999999999999, | |
| "grad_norm": 0.767365714888303, | |
| "learning_rate": 3.5029889435453924e-05, | |
| "loss": 0.4252, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42294585704803467, | |
| "step": 1150, | |
| "valid_targets_mean": 1286.5, | |
| "valid_targets_min": 382 | |
| }, | |
| { | |
| "epoch": 1.8479999999999999, | |
| "grad_norm": 0.5065314027237436, | |
| "learning_rate": 3.496831554038784e-05, | |
| "loss": 0.3987, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3655644357204437, | |
| "step": 1155, | |
| "valid_targets_mean": 2544.6, | |
| "valid_targets_min": 269 | |
| }, | |
| { | |
| "epoch": 1.8559999999999999, | |
| "grad_norm": 0.6224593008893803, | |
| "learning_rate": 3.490641740699801e-05, | |
| "loss": 0.4305, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.43739748001098633, | |
| "step": 1160, | |
| "valid_targets_mean": 1975.6, | |
| "valid_targets_min": 315 | |
| }, | |
| { | |
| "epoch": 1.8639999999999999, | |
| "grad_norm": 0.6327725562239911, | |
| "learning_rate": 3.484419637609977e-05, | |
| "loss": 0.3991, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.403567910194397, | |
| "step": 1165, | |
| "valid_targets_mean": 1797.9, | |
| "valid_targets_min": 255 | |
| }, | |
| { | |
| "epoch": 1.8719999999999999, | |
| "grad_norm": 0.6577564173316653, | |
| "learning_rate": 3.478165379550292e-05, | |
| "loss": 0.3962, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37376055121421814, | |
| "step": 1170, | |
| "valid_targets_mean": 1691.4, | |
| "valid_targets_min": 303 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 0.5264996171345204, | |
| "learning_rate": 3.471879101998262e-05, | |
| "loss": 0.4263, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4288645386695862, | |
| "step": 1175, | |
| "valid_targets_mean": 3079.2, | |
| "valid_targets_min": 266 | |
| }, | |
| { | |
| "epoch": 1.888, | |
| "grad_norm": 0.6169190044077482, | |
| "learning_rate": 3.465560941124992e-05, | |
| "loss": 0.4112, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40619945526123047, | |
| "step": 1180, | |
| "valid_targets_mean": 2156.5, | |
| "valid_targets_min": 393 | |
| }, | |
| { | |
| "epoch": 1.896, | |
| "grad_norm": 0.6058457801896662, | |
| "learning_rate": 3.459211033792233e-05, | |
| "loss": 0.4112, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4081569314002991, | |
| "step": 1185, | |
| "valid_targets_mean": 2144.6, | |
| "valid_targets_min": 866 | |
| }, | |
| { | |
| "epoch": 1.904, | |
| "grad_norm": 0.6526732474784974, | |
| "learning_rate": 3.4528295175494194e-05, | |
| "loss": 0.4076, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3930222988128662, | |
| "step": 1190, | |
| "valid_targets_mean": 1723.6, | |
| "valid_targets_min": 359 | |
| }, | |
| { | |
| "epoch": 1.912, | |
| "grad_norm": 0.5775550846315493, | |
| "learning_rate": 3.4464165306306845e-05, | |
| "loss": 0.4187, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4080520570278168, | |
| "step": 1195, | |
| "valid_targets_mean": 2086.7, | |
| "valid_targets_min": 340 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 0.6452796330672489, | |
| "learning_rate": 3.4399722119518675e-05, | |
| "loss": 0.4146, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4046083092689514, | |
| "step": 1200, | |
| "valid_targets_mean": 1806.4, | |
| "valid_targets_min": 271 | |
| }, | |
| { | |
| "epoch": 1.928, | |
| "grad_norm": 0.5700909509273088, | |
| "learning_rate": 3.433496701107506e-05, | |
| "loss": 0.3901, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39472758769989014, | |
| "step": 1205, | |
| "valid_targets_mean": 2073.8, | |
| "valid_targets_min": 372 | |
| }, | |
| { | |
| "epoch": 1.936, | |
| "grad_norm": 0.5840559603258615, | |
| "learning_rate": 3.426990138367813e-05, | |
| "loss": 0.4359, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4290251135826111, | |
| "step": 1210, | |
| "valid_targets_mean": 2437.6, | |
| "valid_targets_min": 270 | |
| }, | |
| { | |
| "epoch": 1.944, | |
| "grad_norm": 0.6224656544519471, | |
| "learning_rate": 3.420452664675633e-05, | |
| "loss": 0.4173, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40224143862724304, | |
| "step": 1215, | |
| "valid_targets_mean": 1884.1, | |
| "valid_targets_min": 259 | |
| }, | |
| { | |
| "epoch": 1.952, | |
| "grad_norm": 0.6047148944697572, | |
| "learning_rate": 3.4138844216433946e-05, | |
| "loss": 0.4111, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3841584324836731, | |
| "step": 1220, | |
| "valid_targets_mean": 2120.4, | |
| "valid_targets_min": 324 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 0.6705707751188134, | |
| "learning_rate": 3.407285551550041e-05, | |
| "loss": 0.4103, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42706602811813354, | |
| "step": 1225, | |
| "valid_targets_mean": 1812.0, | |
| "valid_targets_min": 396 | |
| }, | |
| { | |
| "epoch": 1.968, | |
| "grad_norm": 0.7155983068467356, | |
| "learning_rate": 3.4006561973379466e-05, | |
| "loss": 0.4077, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41739869117736816, | |
| "step": 1230, | |
| "valid_targets_mean": 1580.4, | |
| "valid_targets_min": 345 | |
| }, | |
| { | |
| "epoch": 1.976, | |
| "grad_norm": 0.6911635067643795, | |
| "learning_rate": 3.3939965026098245e-05, | |
| "loss": 0.4123, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4420611560344696, | |
| "step": 1235, | |
| "valid_targets_mean": 1608.1, | |
| "valid_targets_min": 328 | |
| }, | |
| { | |
| "epoch": 1.984, | |
| "grad_norm": 0.6116239335275719, | |
| "learning_rate": 3.38730661162561e-05, | |
| "loss": 0.3997, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40063539147377014, | |
| "step": 1240, | |
| "valid_targets_mean": 1901.7, | |
| "valid_targets_min": 284 | |
| }, | |
| { | |
| "epoch": 1.992, | |
| "grad_norm": 0.5904479518619141, | |
| "learning_rate": 3.3805866692993414e-05, | |
| "loss": 0.4067, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4212016761302948, | |
| "step": 1245, | |
| "valid_targets_mean": 2145.4, | |
| "valid_targets_min": 295 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.5324617772180122, | |
| "learning_rate": 3.373836821196018e-05, | |
| "loss": 0.418, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3954014778137207, | |
| "step": 1250, | |
| "valid_targets_mean": 2559.7, | |
| "valid_targets_min": 981 | |
| }, | |
| { | |
| "epoch": 2.008, | |
| "grad_norm": 0.8042679257405158, | |
| "learning_rate": 3.3670572135284456e-05, | |
| "loss": 0.3902, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.43946588039398193, | |
| "step": 1255, | |
| "valid_targets_mean": 1429.3, | |
| "valid_targets_min": 296 | |
| }, | |
| { | |
| "epoch": 2.016, | |
| "grad_norm": 0.5825868427650474, | |
| "learning_rate": 3.360247993154073e-05, | |
| "loss": 0.397, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3697652220726013, | |
| "step": 1260, | |
| "valid_targets_mean": 2332.2, | |
| "valid_targets_min": 336 | |
| }, | |
| { | |
| "epoch": 2.024, | |
| "grad_norm": 0.546919198592147, | |
| "learning_rate": 3.35340930757181e-05, | |
| "loss": 0.3864, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3581070899963379, | |
| "step": 1265, | |
| "valid_targets_mean": 2658.7, | |
| "valid_targets_min": 310 | |
| }, | |
| { | |
| "epoch": 2.032, | |
| "grad_norm": 0.5869935633164326, | |
| "learning_rate": 3.3465413049188276e-05, | |
| "loss": 0.3792, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3628545105457306, | |
| "step": 1270, | |
| "valid_targets_mean": 2203.2, | |
| "valid_targets_min": 287 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "grad_norm": 0.5343589346106076, | |
| "learning_rate": 3.3396441339673564e-05, | |
| "loss": 0.3766, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32373273372650146, | |
| "step": 1275, | |
| "valid_targets_mean": 2545.1, | |
| "valid_targets_min": 308 | |
| }, | |
| { | |
| "epoch": 2.048, | |
| "grad_norm": 0.5413135343451548, | |
| "learning_rate": 3.3327179441214574e-05, | |
| "loss": 0.3949, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35532093048095703, | |
| "step": 1280, | |
| "valid_targets_mean": 2464.2, | |
| "valid_targets_min": 288 | |
| }, | |
| { | |
| "epoch": 2.056, | |
| "grad_norm": 0.6817969700174025, | |
| "learning_rate": 3.325762885413791e-05, | |
| "loss": 0.3988, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.382097989320755, | |
| "step": 1285, | |
| "valid_targets_mean": 1719.3, | |
| "valid_targets_min": 294 | |
| }, | |
| { | |
| "epoch": 2.064, | |
| "grad_norm": 0.5815795286182812, | |
| "learning_rate": 3.318779108502362e-05, | |
| "loss": 0.3733, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33395498991012573, | |
| "step": 1290, | |
| "valid_targets_mean": 1987.3, | |
| "valid_targets_min": 413 | |
| }, | |
| { | |
| "epoch": 2.072, | |
| "grad_norm": 0.5696718808624218, | |
| "learning_rate": 3.3117667646672616e-05, | |
| "loss": 0.3754, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34764641523361206, | |
| "step": 1295, | |
| "valid_targets_mean": 2239.6, | |
| "valid_targets_min": 295 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "grad_norm": 0.6666024146123932, | |
| "learning_rate": 3.304726005807386e-05, | |
| "loss": 0.3775, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3691959083080292, | |
| "step": 1300, | |
| "valid_targets_mean": 1835.2, | |
| "valid_targets_min": 303 | |
| }, | |
| { | |
| "epoch": 2.088, | |
| "grad_norm": 0.6251542692665185, | |
| "learning_rate": 3.297656984437148e-05, | |
| "loss": 0.388, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37082281708717346, | |
| "step": 1305, | |
| "valid_targets_mean": 1891.2, | |
| "valid_targets_min": 369 | |
| }, | |
| { | |
| "epoch": 2.096, | |
| "grad_norm": 0.7297170840175997, | |
| "learning_rate": 3.2905598536831715e-05, | |
| "loss": 0.4185, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.418671578168869, | |
| "step": 1310, | |
| "valid_targets_mean": 1513.4, | |
| "valid_targets_min": 292 | |
| }, | |
| { | |
| "epoch": 2.104, | |
| "grad_norm": 0.5429482228005845, | |
| "learning_rate": 3.2834347672809776e-05, | |
| "loss": 0.3794, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37912678718566895, | |
| "step": 1315, | |
| "valid_targets_mean": 2980.8, | |
| "valid_targets_min": 418 | |
| }, | |
| { | |
| "epoch": 2.112, | |
| "grad_norm": 0.5033579471390823, | |
| "learning_rate": 3.276281879571651e-05, | |
| "loss": 0.3675, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37450850009918213, | |
| "step": 1320, | |
| "valid_targets_mean": 3124.1, | |
| "valid_targets_min": 457 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "grad_norm": 0.684857507960299, | |
| "learning_rate": 3.2691013454985006e-05, | |
| "loss": 0.3883, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40342050790786743, | |
| "step": 1325, | |
| "valid_targets_mean": 1658.2, | |
| "valid_targets_min": 282 | |
| }, | |
| { | |
| "epoch": 2.128, | |
| "grad_norm": 0.6038090725419427, | |
| "learning_rate": 3.2618933206036994e-05, | |
| "loss": 0.394, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.416049599647522, | |
| "step": 1330, | |
| "valid_targets_mean": 2440.7, | |
| "valid_targets_min": 416 | |
| }, | |
| { | |
| "epoch": 2.136, | |
| "grad_norm": 0.5460804755588241, | |
| "learning_rate": 3.2546579610249177e-05, | |
| "loss": 0.381, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3303455710411072, | |
| "step": 1335, | |
| "valid_targets_mean": 2594.1, | |
| "valid_targets_min": 352 | |
| }, | |
| { | |
| "epoch": 2.144, | |
| "grad_norm": 0.5097873502985986, | |
| "learning_rate": 3.2473954234919386e-05, | |
| "loss": 0.3831, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35893014073371887, | |
| "step": 1340, | |
| "valid_targets_mean": 2558.4, | |
| "valid_targets_min": 320 | |
| }, | |
| { | |
| "epoch": 2.152, | |
| "grad_norm": 0.8105476214389086, | |
| "learning_rate": 3.240105865323266e-05, | |
| "loss": 0.3837, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41480281949043274, | |
| "step": 1345, | |
| "valid_targets_mean": 1307.5, | |
| "valid_targets_min": 397 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "grad_norm": 0.5800204725164848, | |
| "learning_rate": 3.232789444422714e-05, | |
| "loss": 0.3803, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3462577760219574, | |
| "step": 1350, | |
| "valid_targets_mean": 2189.2, | |
| "valid_targets_min": 309 | |
| }, | |
| { | |
| "epoch": 2.168, | |
| "grad_norm": 0.6828615072566614, | |
| "learning_rate": 3.225446319275988e-05, | |
| "loss": 0.407, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3787284195423126, | |
| "step": 1355, | |
| "valid_targets_mean": 1971.3, | |
| "valid_targets_min": 359 | |
| }, | |
| { | |
| "epoch": 2.176, | |
| "grad_norm": 0.7218364657157212, | |
| "learning_rate": 3.218076648947251e-05, | |
| "loss": 0.3962, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39449766278266907, | |
| "step": 1360, | |
| "valid_targets_mean": 1633.2, | |
| "valid_targets_min": 383 | |
| }, | |
| { | |
| "epoch": 2.184, | |
| "grad_norm": 0.7856076876689837, | |
| "learning_rate": 3.2106805930756804e-05, | |
| "loss": 0.383, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40469610691070557, | |
| "step": 1365, | |
| "valid_targets_mean": 1401.5, | |
| "valid_targets_min": 332 | |
| }, | |
| { | |
| "epoch": 2.192, | |
| "grad_norm": 0.6694940856712724, | |
| "learning_rate": 3.2032583118720045e-05, | |
| "loss": 0.4085, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44130784273147583, | |
| "step": 1370, | |
| "valid_targets_mean": 1951.4, | |
| "valid_targets_min": 466 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "grad_norm": 0.49962476165457587, | |
| "learning_rate": 3.195809966115038e-05, | |
| "loss": 0.3659, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3548528552055359, | |
| "step": 1375, | |
| "valid_targets_mean": 2828.1, | |
| "valid_targets_min": 200 | |
| }, | |
| { | |
| "epoch": 2.208, | |
| "grad_norm": 0.5741497964442147, | |
| "learning_rate": 3.188335717148195e-05, | |
| "loss": 0.3928, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3834153115749359, | |
| "step": 1380, | |
| "valid_targets_mean": 2393.4, | |
| "valid_targets_min": 428 | |
| }, | |
| { | |
| "epoch": 2.216, | |
| "grad_norm": 0.5342561972025831, | |
| "learning_rate": 3.1808357268759964e-05, | |
| "loss": 0.3745, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.350533664226532, | |
| "step": 1385, | |
| "valid_targets_mean": 2746.4, | |
| "valid_targets_min": 462 | |
| }, | |
| { | |
| "epoch": 2.224, | |
| "grad_norm": 0.5514827015335317, | |
| "learning_rate": 3.173310157760563e-05, | |
| "loss": 0.3747, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3370269536972046, | |
| "step": 1390, | |
| "valid_targets_mean": 2396.6, | |
| "valid_targets_min": 260 | |
| }, | |
| { | |
| "epoch": 2.232, | |
| "grad_norm": 0.6274109214397176, | |
| "learning_rate": 3.165759172818093e-05, | |
| "loss": 0.3792, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3951869010925293, | |
| "step": 1395, | |
| "valid_targets_mean": 2055.7, | |
| "valid_targets_min": 273 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "grad_norm": 0.6014822126164667, | |
| "learning_rate": 3.158182935615336e-05, | |
| "loss": 0.3977, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3926844000816345, | |
| "step": 1400, | |
| "valid_targets_mean": 2313.9, | |
| "valid_targets_min": 306 | |
| }, | |
| { | |
| "epoch": 2.248, | |
| "grad_norm": 0.5574708482762952, | |
| "learning_rate": 3.150581610266046e-05, | |
| "loss": 0.3984, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3544110357761383, | |
| "step": 1405, | |
| "valid_targets_mean": 2392.4, | |
| "valid_targets_min": 307 | |
| }, | |
| { | |
| "epoch": 2.2560000000000002, | |
| "grad_norm": 0.6183989505811474, | |
| "learning_rate": 3.1429553614274256e-05, | |
| "loss": 0.381, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37505680322647095, | |
| "step": 1410, | |
| "valid_targets_mean": 2065.1, | |
| "valid_targets_min": 367 | |
| }, | |
| { | |
| "epoch": 2.2640000000000002, | |
| "grad_norm": 0.5589768205400985, | |
| "learning_rate": 3.1353043542965636e-05, | |
| "loss": 0.3761, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35910564661026, | |
| "step": 1415, | |
| "valid_targets_mean": 2398.2, | |
| "valid_targets_min": 374 | |
| }, | |
| { | |
| "epoch": 2.2720000000000002, | |
| "grad_norm": 0.5865868580427955, | |
| "learning_rate": 3.1276287546068536e-05, | |
| "loss": 0.3754, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3461493253707886, | |
| "step": 1420, | |
| "valid_targets_mean": 2379.2, | |
| "valid_targets_min": 298 | |
| }, | |
| { | |
| "epoch": 2.2800000000000002, | |
| "grad_norm": 0.6030591709759953, | |
| "learning_rate": 3.1199287286244047e-05, | |
| "loss": 0.3924, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3672935366630554, | |
| "step": 1425, | |
| "valid_targets_mean": 2200.0, | |
| "valid_targets_min": 306 | |
| }, | |
| { | |
| "epoch": 2.288, | |
| "grad_norm": 0.7226166193399812, | |
| "learning_rate": 3.112204443144438e-05, | |
| "loss": 0.4048, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4033268690109253, | |
| "step": 1430, | |
| "valid_targets_mean": 1554.2, | |
| "valid_targets_min": 248 | |
| }, | |
| { | |
| "epoch": 2.296, | |
| "grad_norm": 0.6717505093881606, | |
| "learning_rate": 3.1044560654876775e-05, | |
| "loss": 0.4009, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4155598282814026, | |
| "step": 1435, | |
| "valid_targets_mean": 2037.2, | |
| "valid_targets_min": 354 | |
| }, | |
| { | |
| "epoch": 2.304, | |
| "grad_norm": 0.6812221485042874, | |
| "learning_rate": 3.0966837634967215e-05, | |
| "loss": 0.3894, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3771967589855194, | |
| "step": 1440, | |
| "valid_targets_mean": 2331.6, | |
| "valid_targets_min": 334 | |
| }, | |
| { | |
| "epoch": 2.312, | |
| "grad_norm": 0.7530405753256025, | |
| "learning_rate": 3.088887705532409e-05, | |
| "loss": 0.3724, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4040983319282532, | |
| "step": 1445, | |
| "valid_targets_mean": 1532.9, | |
| "valid_targets_min": 326 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "grad_norm": 0.6435976046060277, | |
| "learning_rate": 3.081068060470174e-05, | |
| "loss": 0.3722, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37423545122146606, | |
| "step": 1450, | |
| "valid_targets_mean": 2140.8, | |
| "valid_targets_min": 375 | |
| }, | |
| { | |
| "epoch": 2.328, | |
| "grad_norm": 0.6798369295901472, | |
| "learning_rate": 3.073224997696385e-05, | |
| "loss": 0.3892, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3684957027435303, | |
| "step": 1455, | |
| "valid_targets_mean": 1625.9, | |
| "valid_targets_min": 312 | |
| }, | |
| { | |
| "epoch": 2.336, | |
| "grad_norm": 0.6399613920281401, | |
| "learning_rate": 3.065358687104675e-05, | |
| "loss": 0.3904, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41061943769454956, | |
| "step": 1460, | |
| "valid_targets_mean": 2171.6, | |
| "valid_targets_min": 298 | |
| }, | |
| { | |
| "epoch": 2.344, | |
| "grad_norm": 0.568295011340494, | |
| "learning_rate": 3.057469299092264e-05, | |
| "loss": 0.3697, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.363444060087204, | |
| "step": 1465, | |
| "valid_targets_mean": 2369.4, | |
| "valid_targets_min": 404 | |
| }, | |
| { | |
| "epoch": 2.352, | |
| "grad_norm": 0.6749189154211231, | |
| "learning_rate": 3.0495570045562686e-05, | |
| "loss": 0.3702, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3943634033203125, | |
| "step": 1470, | |
| "valid_targets_mean": 1813.8, | |
| "valid_targets_min": 330 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "grad_norm": 0.5424934156524889, | |
| "learning_rate": 3.041621974889996e-05, | |
| "loss": 0.3708, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3570939302444458, | |
| "step": 1475, | |
| "valid_targets_mean": 2509.6, | |
| "valid_targets_min": 416 | |
| }, | |
| { | |
| "epoch": 2.368, | |
| "grad_norm": 0.5855175945300712, | |
| "learning_rate": 3.0336643819792342e-05, | |
| "loss": 0.4063, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41212403774261475, | |
| "step": 1480, | |
| "valid_targets_mean": 2239.9, | |
| "valid_targets_min": 340 | |
| }, | |
| { | |
| "epoch": 2.376, | |
| "grad_norm": 0.8589550576485474, | |
| "learning_rate": 3.0256843981985295e-05, | |
| "loss": 0.3957, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42375245690345764, | |
| "step": 1485, | |
| "valid_targets_mean": 1203.8, | |
| "valid_targets_min": 244 | |
| }, | |
| { | |
| "epoch": 2.384, | |
| "grad_norm": 0.5872173730521972, | |
| "learning_rate": 3.0176821964074503e-05, | |
| "loss": 0.3766, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.387226939201355, | |
| "step": 1490, | |
| "valid_targets_mean": 2280.3, | |
| "valid_targets_min": 295 | |
| }, | |
| { | |
| "epoch": 2.392, | |
| "grad_norm": 0.5887630792204165, | |
| "learning_rate": 3.009657949946844e-05, | |
| "loss": 0.3711, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36949145793914795, | |
| "step": 1495, | |
| "valid_targets_mean": 2453.8, | |
| "valid_targets_min": 436 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 0.5888662518478893, | |
| "learning_rate": 3.00161183263508e-05, | |
| "loss": 0.3863, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36580803990364075, | |
| "step": 1500, | |
| "valid_targets_mean": 2082.8, | |
| "valid_targets_min": 375 | |
| }, | |
| { | |
| "epoch": 2.408, | |
| "grad_norm": 0.7403053263669406, | |
| "learning_rate": 2.993544018764289e-05, | |
| "loss": 0.4124, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.405397891998291, | |
| "step": 1505, | |
| "valid_targets_mean": 1624.9, | |
| "valid_targets_min": 302 | |
| }, | |
| { | |
| "epoch": 2.416, | |
| "grad_norm": 0.6912705085590389, | |
| "learning_rate": 2.9854546830965833e-05, | |
| "loss": 0.4003, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.43761569261550903, | |
| "step": 1510, | |
| "valid_targets_mean": 1889.9, | |
| "valid_targets_min": 362 | |
| }, | |
| { | |
| "epoch": 2.424, | |
| "grad_norm": 0.672027760605599, | |
| "learning_rate": 2.9773440008602736e-05, | |
| "loss": 0.3808, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3860786557197571, | |
| "step": 1515, | |
| "valid_targets_mean": 1880.1, | |
| "valid_targets_min": 287 | |
| }, | |
| { | |
| "epoch": 2.432, | |
| "grad_norm": 0.6176696783074045, | |
| "learning_rate": 2.96921214774607e-05, | |
| "loss": 0.4028, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36806997656822205, | |
| "step": 1520, | |
| "valid_targets_mean": 2141.5, | |
| "valid_targets_min": 355 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "grad_norm": 0.5813640366994491, | |
| "learning_rate": 2.9610592999032815e-05, | |
| "loss": 0.3873, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3732055425643921, | |
| "step": 1525, | |
| "valid_targets_mean": 2313.7, | |
| "valid_targets_min": 314 | |
| }, | |
| { | |
| "epoch": 2.448, | |
| "grad_norm": 0.5839312210260318, | |
| "learning_rate": 2.9528856339359973e-05, | |
| "loss": 0.365, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31384432315826416, | |
| "step": 1530, | |
| "valid_targets_mean": 1865.6, | |
| "valid_targets_min": 353 | |
| }, | |
| { | |
| "epoch": 2.456, | |
| "grad_norm": 0.6155472401477654, | |
| "learning_rate": 2.9446913268992588e-05, | |
| "loss": 0.3956, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3974214792251587, | |
| "step": 1535, | |
| "valid_targets_mean": 2303.0, | |
| "valid_targets_min": 262 | |
| }, | |
| { | |
| "epoch": 2.464, | |
| "grad_norm": 0.7703616946243632, | |
| "learning_rate": 2.936476556295229e-05, | |
| "loss": 0.4069, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4263591170310974, | |
| "step": 1540, | |
| "valid_targets_mean": 1433.4, | |
| "valid_targets_min": 281 | |
| }, | |
| { | |
| "epoch": 2.472, | |
| "grad_norm": 0.6233807624925547, | |
| "learning_rate": 2.928241500069346e-05, | |
| "loss": 0.3936, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41739413142204285, | |
| "step": 1545, | |
| "valid_targets_mean": 2367.2, | |
| "valid_targets_min": 323 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "grad_norm": 0.6603074426362654, | |
| "learning_rate": 2.9199863366064655e-05, | |
| "loss": 0.3799, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4242068827152252, | |
| "step": 1550, | |
| "valid_targets_mean": 1868.7, | |
| "valid_targets_min": 324 | |
| }, | |
| { | |
| "epoch": 2.488, | |
| "grad_norm": 0.6887756343088828, | |
| "learning_rate": 2.9117112447270007e-05, | |
| "loss": 0.369, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37890493869781494, | |
| "step": 1555, | |
| "valid_targets_mean": 1584.6, | |
| "valid_targets_min": 272 | |
| }, | |
| { | |
| "epoch": 2.496, | |
| "grad_norm": 0.6235381189635956, | |
| "learning_rate": 2.9034164036830462e-05, | |
| "loss": 0.3928, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3724491000175476, | |
| "step": 1560, | |
| "valid_targets_mean": 2009.0, | |
| "valid_targets_min": 388 | |
| }, | |
| { | |
| "epoch": 2.504, | |
| "grad_norm": 0.6157420171365863, | |
| "learning_rate": 2.8951019931544975e-05, | |
| "loss": 0.3754, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39165908098220825, | |
| "step": 1565, | |
| "valid_targets_mean": 2098.3, | |
| "valid_targets_min": 319 | |
| }, | |
| { | |
| "epoch": 2.512, | |
| "grad_norm": 0.612679970950294, | |
| "learning_rate": 2.8867681932451544e-05, | |
| "loss": 0.3882, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36644482612609863, | |
| "step": 1570, | |
| "valid_targets_mean": 1927.0, | |
| "valid_targets_min": 281 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "grad_norm": 0.6582982972720843, | |
| "learning_rate": 2.8784151844788267e-05, | |
| "loss": 0.4039, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4032679796218872, | |
| "step": 1575, | |
| "valid_targets_mean": 1865.6, | |
| "valid_targets_min": 362 | |
| }, | |
| { | |
| "epoch": 2.528, | |
| "grad_norm": 0.633080352002716, | |
| "learning_rate": 2.8700431477954155e-05, | |
| "loss": 0.3807, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3910493850708008, | |
| "step": 1580, | |
| "valid_targets_mean": 2398.1, | |
| "valid_targets_min": 331 | |
| }, | |
| { | |
| "epoch": 2.536, | |
| "grad_norm": 0.7483573101690317, | |
| "learning_rate": 2.8616522645470012e-05, | |
| "loss": 0.3926, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3737632632255554, | |
| "step": 1585, | |
| "valid_targets_mean": 1429.8, | |
| "valid_targets_min": 222 | |
| }, | |
| { | |
| "epoch": 2.544, | |
| "grad_norm": 0.6869251081787947, | |
| "learning_rate": 2.8532427164939086e-05, | |
| "loss": 0.3744, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40796542167663574, | |
| "step": 1590, | |
| "valid_targets_mean": 1823.2, | |
| "valid_targets_min": 310 | |
| }, | |
| { | |
| "epoch": 2.552, | |
| "grad_norm": 0.7574071608548221, | |
| "learning_rate": 2.844814685800776e-05, | |
| "loss": 0.3992, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4246245324611664, | |
| "step": 1595, | |
| "valid_targets_mean": 1657.1, | |
| "valid_targets_min": 413 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "grad_norm": 0.6473114839665947, | |
| "learning_rate": 2.8363683550326028e-05, | |
| "loss": 0.3992, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39974695444107056, | |
| "step": 1600, | |
| "valid_targets_mean": 2080.9, | |
| "valid_targets_min": 416 | |
| }, | |
| { | |
| "epoch": 2.568, | |
| "grad_norm": 0.5386297063629182, | |
| "learning_rate": 2.8279039071508024e-05, | |
| "loss": 0.3745, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37923723459243774, | |
| "step": 1605, | |
| "valid_targets_mean": 2865.6, | |
| "valid_targets_min": 292 | |
| }, | |
| { | |
| "epoch": 2.576, | |
| "grad_norm": 0.5494405350429861, | |
| "learning_rate": 2.81942152550923e-05, | |
| "loss": 0.3814, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3400484025478363, | |
| "step": 1610, | |
| "valid_targets_mean": 2427.5, | |
| "valid_targets_min": 319 | |
| }, | |
| { | |
| "epoch": 2.584, | |
| "grad_norm": 0.7521047882538389, | |
| "learning_rate": 2.810921393850219e-05, | |
| "loss": 0.3743, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42146170139312744, | |
| "step": 1615, | |
| "valid_targets_mean": 1535.6, | |
| "valid_targets_min": 304 | |
| }, | |
| { | |
| "epoch": 2.592, | |
| "grad_norm": 0.7367591533237979, | |
| "learning_rate": 2.802403696300595e-05, | |
| "loss": 0.3977, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41838860511779785, | |
| "step": 1620, | |
| "valid_targets_mean": 1505.9, | |
| "valid_targets_min": 309 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 0.6133882069897932, | |
| "learning_rate": 2.7938686173676915e-05, | |
| "loss": 0.3734, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37919414043426514, | |
| "step": 1625, | |
| "valid_targets_mean": 1929.9, | |
| "valid_targets_min": 322 | |
| }, | |
| { | |
| "epoch": 2.608, | |
| "grad_norm": 0.7141355316967185, | |
| "learning_rate": 2.7853163419353505e-05, | |
| "loss": 0.3914, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4131407141685486, | |
| "step": 1630, | |
| "valid_targets_mean": 1792.1, | |
| "valid_targets_min": 518 | |
| }, | |
| { | |
| "epoch": 2.616, | |
| "grad_norm": 0.6010769733636125, | |
| "learning_rate": 2.776747055259918e-05, | |
| "loss": 0.3846, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36372077465057373, | |
| "step": 1635, | |
| "valid_targets_mean": 1809.2, | |
| "valid_targets_min": 413 | |
| }, | |
| { | |
| "epoch": 2.624, | |
| "grad_norm": 0.6864102738551969, | |
| "learning_rate": 2.768160942966233e-05, | |
| "loss": 0.3801, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3999949097633362, | |
| "step": 1640, | |
| "valid_targets_mean": 1943.8, | |
| "valid_targets_min": 370 | |
| }, | |
| { | |
| "epoch": 2.632, | |
| "grad_norm": 0.6882655679500438, | |
| "learning_rate": 2.759558191043603e-05, | |
| "loss": 0.3866, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37626349925994873, | |
| "step": 1645, | |
| "valid_targets_mean": 1692.8, | |
| "valid_targets_min": 308 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "grad_norm": 0.5689931159535411, | |
| "learning_rate": 2.7509389858417783e-05, | |
| "loss": 0.383, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35382065176963806, | |
| "step": 1650, | |
| "valid_targets_mean": 2328.6, | |
| "valid_targets_min": 247 | |
| }, | |
| { | |
| "epoch": 2.648, | |
| "grad_norm": 0.641141720782201, | |
| "learning_rate": 2.7423035140669147e-05, | |
| "loss": 0.3977, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40661799907684326, | |
| "step": 1655, | |
| "valid_targets_mean": 2178.4, | |
| "valid_targets_min": 363 | |
| }, | |
| { | |
| "epoch": 2.656, | |
| "grad_norm": 0.540586590816511, | |
| "learning_rate": 2.7336519627775288e-05, | |
| "loss": 0.3695, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3675329089164734, | |
| "step": 1660, | |
| "valid_targets_mean": 2639.7, | |
| "valid_targets_min": 363 | |
| }, | |
| { | |
| "epoch": 2.664, | |
| "grad_norm": 0.7036792247374296, | |
| "learning_rate": 2.724984519380444e-05, | |
| "loss": 0.4005, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40206435322761536, | |
| "step": 1665, | |
| "valid_targets_mean": 1689.8, | |
| "valid_targets_min": 252 | |
| }, | |
| { | |
| "epoch": 2.672, | |
| "grad_norm": 0.7009327566269239, | |
| "learning_rate": 2.7163013716267353e-05, | |
| "loss": 0.4008, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3990801274776459, | |
| "step": 1670, | |
| "valid_targets_mean": 1999.7, | |
| "valid_targets_min": 363 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "grad_norm": 0.6572656408273875, | |
| "learning_rate": 2.707602707607659e-05, | |
| "loss": 0.3943, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36183205246925354, | |
| "step": 1675, | |
| "valid_targets_mean": 1949.6, | |
| "valid_targets_min": 329 | |
| }, | |
| { | |
| "epoch": 2.6879999999999997, | |
| "grad_norm": 0.6903300748113985, | |
| "learning_rate": 2.6988887157505786e-05, | |
| "loss": 0.3853, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4237315058708191, | |
| "step": 1680, | |
| "valid_targets_mean": 1675.9, | |
| "valid_targets_min": 344 | |
| }, | |
| { | |
| "epoch": 2.6959999999999997, | |
| "grad_norm": 0.6289056881874757, | |
| "learning_rate": 2.6901595848148842e-05, | |
| "loss": 0.3943, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3660340905189514, | |
| "step": 1685, | |
| "valid_targets_mean": 1781.6, | |
| "valid_targets_min": 372 | |
| }, | |
| { | |
| "epoch": 2.7039999999999997, | |
| "grad_norm": 0.6448522226591724, | |
| "learning_rate": 2.681415503887904e-05, | |
| "loss": 0.3868, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3989563286304474, | |
| "step": 1690, | |
| "valid_targets_mean": 1838.6, | |
| "valid_targets_min": 462 | |
| }, | |
| { | |
| "epoch": 2.7119999999999997, | |
| "grad_norm": 0.5914371896855446, | |
| "learning_rate": 2.672656662380805e-05, | |
| "loss": 0.3882, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3665776252746582, | |
| "step": 1695, | |
| "valid_targets_mean": 2105.9, | |
| "valid_targets_min": 406 | |
| }, | |
| { | |
| "epoch": 2.7199999999999998, | |
| "grad_norm": 0.6439781732921271, | |
| "learning_rate": 2.6638832500244967e-05, | |
| "loss": 0.3809, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4117533564567566, | |
| "step": 1700, | |
| "valid_targets_mean": 2121.2, | |
| "valid_targets_min": 328 | |
| }, | |
| { | |
| "epoch": 2.7279999999999998, | |
| "grad_norm": 0.6413138381209691, | |
| "learning_rate": 2.655095456865514e-05, | |
| "loss": 0.3938, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3822566866874695, | |
| "step": 1705, | |
| "valid_targets_mean": 2074.2, | |
| "valid_targets_min": 343 | |
| }, | |
| { | |
| "epoch": 2.7359999999999998, | |
| "grad_norm": 0.5879107484234825, | |
| "learning_rate": 2.6462934732619047e-05, | |
| "loss": 0.3824, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3646334409713745, | |
| "step": 1710, | |
| "valid_targets_mean": 2142.2, | |
| "valid_targets_min": 356 | |
| }, | |
| { | |
| "epoch": 2.7439999999999998, | |
| "grad_norm": 0.6103736396812272, | |
| "learning_rate": 2.6374774898791047e-05, | |
| "loss": 0.3786, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42227813601493835, | |
| "step": 1715, | |
| "valid_targets_mean": 2381.5, | |
| "valid_targets_min": 265 | |
| }, | |
| { | |
| "epoch": 2.752, | |
| "grad_norm": 0.7336758163732975, | |
| "learning_rate": 2.6286476976858084e-05, | |
| "loss": 0.3726, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3858756721019745, | |
| "step": 1720, | |
| "valid_targets_mean": 1364.0, | |
| "valid_targets_min": 299 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "grad_norm": 0.5804836400275201, | |
| "learning_rate": 2.619804287949831e-05, | |
| "loss": 0.4018, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40446221828460693, | |
| "step": 1725, | |
| "valid_targets_mean": 2540.4, | |
| "valid_targets_min": 333 | |
| }, | |
| { | |
| "epoch": 2.768, | |
| "grad_norm": 0.6545153531379371, | |
| "learning_rate": 2.6109474522339676e-05, | |
| "loss": 0.3947, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35908615589141846, | |
| "step": 1730, | |
| "valid_targets_mean": 1981.3, | |
| "valid_targets_min": 343 | |
| }, | |
| { | |
| "epoch": 2.776, | |
| "grad_norm": 0.5989728174431338, | |
| "learning_rate": 2.6020773823918414e-05, | |
| "loss": 0.3908, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3607073426246643, | |
| "step": 1735, | |
| "valid_targets_mean": 2075.1, | |
| "valid_targets_min": 259 | |
| }, | |
| { | |
| "epoch": 2.784, | |
| "grad_norm": 0.6548819062478236, | |
| "learning_rate": 2.5931942705637473e-05, | |
| "loss": 0.3821, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3777836561203003, | |
| "step": 1740, | |
| "valid_targets_mean": 1940.1, | |
| "valid_targets_min": 231 | |
| }, | |
| { | |
| "epoch": 2.792, | |
| "grad_norm": 0.6167175937189906, | |
| "learning_rate": 2.5842983091724923e-05, | |
| "loss": 0.3935, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3925069272518158, | |
| "step": 1745, | |
| "valid_targets_mean": 2126.1, | |
| "valid_targets_min": 302 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 0.6019344696205082, | |
| "learning_rate": 2.575389690919226e-05, | |
| "loss": 0.3748, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.349267840385437, | |
| "step": 1750, | |
| "valid_targets_mean": 1979.9, | |
| "valid_targets_min": 342 | |
| }, | |
| { | |
| "epoch": 2.808, | |
| "grad_norm": 0.7070935809183736, | |
| "learning_rate": 2.5664686087792658e-05, | |
| "loss": 0.3966, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4027559757232666, | |
| "step": 1755, | |
| "valid_targets_mean": 1645.1, | |
| "valid_targets_min": 367 | |
| }, | |
| { | |
| "epoch": 2.816, | |
| "grad_norm": 0.5501386342474102, | |
| "learning_rate": 2.5575352559979188e-05, | |
| "loss": 0.3774, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37497395277023315, | |
| "step": 1760, | |
| "valid_targets_mean": 2372.5, | |
| "valid_targets_min": 399 | |
| }, | |
| { | |
| "epoch": 2.824, | |
| "grad_norm": 0.5850935407134304, | |
| "learning_rate": 2.5485898260862936e-05, | |
| "loss": 0.3917, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3888002038002014, | |
| "step": 1765, | |
| "valid_targets_mean": 2355.9, | |
| "valid_targets_min": 566 | |
| }, | |
| { | |
| "epoch": 2.832, | |
| "grad_norm": 0.5898705932054655, | |
| "learning_rate": 2.5396325128171072e-05, | |
| "loss": 0.3875, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3861275017261505, | |
| "step": 1770, | |
| "valid_targets_mean": 2501.8, | |
| "valid_targets_min": 359 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "grad_norm": 0.6060182566256725, | |
| "learning_rate": 2.5306635102204942e-05, | |
| "loss": 0.4214, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3947630524635315, | |
| "step": 1775, | |
| "valid_targets_mean": 2317.6, | |
| "valid_targets_min": 275 | |
| }, | |
| { | |
| "epoch": 2.848, | |
| "grad_norm": 0.6343847329848039, | |
| "learning_rate": 2.5216830125797943e-05, | |
| "loss": 0.3941, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38855668902397156, | |
| "step": 1780, | |
| "valid_targets_mean": 1968.6, | |
| "valid_targets_min": 314 | |
| }, | |
| { | |
| "epoch": 2.856, | |
| "grad_norm": 0.6683194188574011, | |
| "learning_rate": 2.5126912144273517e-05, | |
| "loss": 0.4111, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41185641288757324, | |
| "step": 1785, | |
| "valid_targets_mean": 2012.8, | |
| "valid_targets_min": 254 | |
| }, | |
| { | |
| "epoch": 2.864, | |
| "grad_norm": 0.7043340763689518, | |
| "learning_rate": 2.5036883105402985e-05, | |
| "loss": 0.398, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3972740173339844, | |
| "step": 1790, | |
| "valid_targets_mean": 1628.4, | |
| "valid_targets_min": 280 | |
| }, | |
| { | |
| "epoch": 2.872, | |
| "grad_norm": 0.76241175702457, | |
| "learning_rate": 2.4946744959363343e-05, | |
| "loss": 0.378, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3741379976272583, | |
| "step": 1795, | |
| "valid_targets_mean": 1619.9, | |
| "valid_targets_min": 297 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "grad_norm": 0.5384749734802096, | |
| "learning_rate": 2.4856499658695018e-05, | |
| "loss": 0.3726, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36481788754463196, | |
| "step": 1800, | |
| "valid_targets_mean": 2501.4, | |
| "valid_targets_min": 437 | |
| }, | |
| { | |
| "epoch": 2.888, | |
| "grad_norm": 0.693586139949456, | |
| "learning_rate": 2.4766149158259603e-05, | |
| "loss": 0.3892, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39813292026519775, | |
| "step": 1805, | |
| "valid_targets_mean": 1850.1, | |
| "valid_targets_min": 237 | |
| }, | |
| { | |
| "epoch": 2.896, | |
| "grad_norm": 0.5409786475794984, | |
| "learning_rate": 2.4675695415197476e-05, | |
| "loss": 0.3644, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33471423387527466, | |
| "step": 1810, | |
| "valid_targets_mean": 2575.4, | |
| "valid_targets_min": 469 | |
| }, | |
| { | |
| "epoch": 2.904, | |
| "grad_norm": 0.6356472255175909, | |
| "learning_rate": 2.458514038888543e-05, | |
| "loss": 0.3825, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3808741569519043, | |
| "step": 1815, | |
| "valid_targets_mean": 1939.9, | |
| "valid_targets_min": 386 | |
| }, | |
| { | |
| "epoch": 2.912, | |
| "grad_norm": 0.5454526005837897, | |
| "learning_rate": 2.4494486040894208e-05, | |
| "loss": 0.3637, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35985440015792847, | |
| "step": 1820, | |
| "valid_targets_mean": 2616.5, | |
| "valid_targets_min": 234 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "grad_norm": 0.6948250620664468, | |
| "learning_rate": 2.440373433494603e-05, | |
| "loss": 0.3955, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38276222348213196, | |
| "step": 1825, | |
| "valid_targets_mean": 1595.8, | |
| "valid_targets_min": 354 | |
| }, | |
| { | |
| "epoch": 2.928, | |
| "grad_norm": 0.7665181846844737, | |
| "learning_rate": 2.4312887236872066e-05, | |
| "loss": 0.3909, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40464550256729126, | |
| "step": 1830, | |
| "valid_targets_mean": 1438.6, | |
| "valid_targets_min": 574 | |
| }, | |
| { | |
| "epoch": 2.936, | |
| "grad_norm": 0.631549384178386, | |
| "learning_rate": 2.4221946714569803e-05, | |
| "loss": 0.3911, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4105145335197449, | |
| "step": 1835, | |
| "valid_targets_mean": 2105.4, | |
| "valid_targets_min": 388 | |
| }, | |
| { | |
| "epoch": 2.944, | |
| "grad_norm": 0.6268974428016761, | |
| "learning_rate": 2.4130914737960472e-05, | |
| "loss": 0.4035, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37901395559310913, | |
| "step": 1840, | |
| "valid_targets_mean": 2055.2, | |
| "valid_targets_min": 399 | |
| }, | |
| { | |
| "epoch": 2.952, | |
| "grad_norm": 0.7217347152331498, | |
| "learning_rate": 2.4039793278946358e-05, | |
| "loss": 0.3734, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38400089740753174, | |
| "step": 1845, | |
| "valid_targets_mean": 1730.2, | |
| "valid_targets_min": 270 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "grad_norm": 0.7522530270283125, | |
| "learning_rate": 2.394858431136806e-05, | |
| "loss": 0.3971, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.43783271312713623, | |
| "step": 1850, | |
| "valid_targets_mean": 1617.4, | |
| "valid_targets_min": 339 | |
| }, | |
| { | |
| "epoch": 2.968, | |
| "grad_norm": 0.7579554160801213, | |
| "learning_rate": 2.385728981096178e-05, | |
| "loss": 0.3865, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4064823389053345, | |
| "step": 1855, | |
| "valid_targets_mean": 1314.1, | |
| "valid_targets_min": 327 | |
| }, | |
| { | |
| "epoch": 2.976, | |
| "grad_norm": 0.6285737467792446, | |
| "learning_rate": 2.3765911755316503e-05, | |
| "loss": 0.3596, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4188977777957916, | |
| "step": 1860, | |
| "valid_targets_mean": 2219.6, | |
| "valid_targets_min": 340 | |
| }, | |
| { | |
| "epoch": 2.984, | |
| "grad_norm": 0.6156543382548351, | |
| "learning_rate": 2.3674452123831125e-05, | |
| "loss": 0.3703, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3826698064804077, | |
| "step": 1865, | |
| "valid_targets_mean": 2188.8, | |
| "valid_targets_min": 372 | |
| }, | |
| { | |
| "epoch": 2.992, | |
| "grad_norm": 0.6093125546240513, | |
| "learning_rate": 2.358291289767165e-05, | |
| "loss": 0.4017, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39687103033065796, | |
| "step": 1870, | |
| "valid_targets_mean": 2002.8, | |
| "valid_targets_min": 298 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.550468552788306, | |
| "learning_rate": 2.3491296059728202e-05, | |
| "loss": 0.3866, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38205671310424805, | |
| "step": 1875, | |
| "valid_targets_mean": 3186.2, | |
| "valid_targets_min": 248 | |
| }, | |
| { | |
| "epoch": 3.008, | |
| "grad_norm": 0.5782335273031184, | |
| "learning_rate": 2.339960359457212e-05, | |
| "loss": 0.362, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3214508891105652, | |
| "step": 1880, | |
| "valid_targets_mean": 2028.1, | |
| "valid_targets_min": 534 | |
| }, | |
| { | |
| "epoch": 3.016, | |
| "grad_norm": 0.6851073073819501, | |
| "learning_rate": 2.3307837488412955e-05, | |
| "loss": 0.3414, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3262726068496704, | |
| "step": 1885, | |
| "valid_targets_mean": 1637.2, | |
| "valid_targets_min": 294 | |
| }, | |
| { | |
| "epoch": 3.024, | |
| "grad_norm": 0.6957033026128393, | |
| "learning_rate": 2.3215999729055437e-05, | |
| "loss": 0.3518, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40083542466163635, | |
| "step": 1890, | |
| "valid_targets_mean": 1853.8, | |
| "valid_targets_min": 364 | |
| }, | |
| { | |
| "epoch": 3.032, | |
| "grad_norm": 0.6918339266575398, | |
| "learning_rate": 2.312409230585641e-05, | |
| "loss": 0.354, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34504324197769165, | |
| "step": 1895, | |
| "valid_targets_mean": 1833.5, | |
| "valid_targets_min": 278 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "grad_norm": 0.6685994313776155, | |
| "learning_rate": 2.3032117209681782e-05, | |
| "loss": 0.3694, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3745924234390259, | |
| "step": 1900, | |
| "valid_targets_mean": 1937.8, | |
| "valid_targets_min": 344 | |
| }, | |
| { | |
| "epoch": 3.048, | |
| "grad_norm": 0.6290078653825849, | |
| "learning_rate": 2.2940076432863335e-05, | |
| "loss": 0.3576, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35292524099349976, | |
| "step": 1905, | |
| "valid_targets_mean": 2088.0, | |
| "valid_targets_min": 295 | |
| }, | |
| { | |
| "epoch": 3.056, | |
| "grad_norm": 0.6593208149721491, | |
| "learning_rate": 2.2847971969155626e-05, | |
| "loss": 0.3807, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3467387557029724, | |
| "step": 1910, | |
| "valid_targets_mean": 1779.2, | |
| "valid_targets_min": 330 | |
| }, | |
| { | |
| "epoch": 3.064, | |
| "grad_norm": 0.6288234163694197, | |
| "learning_rate": 2.275580581369276e-05, | |
| "loss": 0.3534, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31876906752586365, | |
| "step": 1915, | |
| "valid_targets_mean": 1940.2, | |
| "valid_targets_min": 261 | |
| }, | |
| { | |
| "epoch": 3.072, | |
| "grad_norm": 0.6645318426544585, | |
| "learning_rate": 2.2663579962945205e-05, | |
| "loss": 0.3557, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33185040950775146, | |
| "step": 1920, | |
| "valid_targets_mean": 1885.4, | |
| "valid_targets_min": 450 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "grad_norm": 0.6646159186220106, | |
| "learning_rate": 2.2571296414676503e-05, | |
| "loss": 0.344, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3251417875289917, | |
| "step": 1925, | |
| "valid_targets_mean": 1784.0, | |
| "valid_targets_min": 281 | |
| }, | |
| { | |
| "epoch": 3.088, | |
| "grad_norm": 0.6035987948135627, | |
| "learning_rate": 2.2478957167900038e-05, | |
| "loss": 0.3738, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34511998295783997, | |
| "step": 1930, | |
| "valid_targets_mean": 2332.1, | |
| "valid_targets_min": 447 | |
| }, | |
| { | |
| "epoch": 3.096, | |
| "grad_norm": 0.6459268888860388, | |
| "learning_rate": 2.23865642228357e-05, | |
| "loss": 0.3558, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34273070096969604, | |
| "step": 1935, | |
| "valid_targets_mean": 2084.6, | |
| "valid_targets_min": 370 | |
| }, | |
| { | |
| "epoch": 3.104, | |
| "grad_norm": 0.6519205904366294, | |
| "learning_rate": 2.2294119580866592e-05, | |
| "loss": 0.3705, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3446979522705078, | |
| "step": 1940, | |
| "valid_targets_mean": 2024.8, | |
| "valid_targets_min": 515 | |
| }, | |
| { | |
| "epoch": 3.112, | |
| "grad_norm": 0.5863039333195743, | |
| "learning_rate": 2.2201625244495646e-05, | |
| "loss": 0.3582, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31188952922821045, | |
| "step": 1945, | |
| "valid_targets_mean": 2198.8, | |
| "valid_targets_min": 387 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "grad_norm": 0.8138604431085514, | |
| "learning_rate": 2.2109083217302242e-05, | |
| "loss": 0.3732, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41089117527008057, | |
| "step": 1950, | |
| "valid_targets_mean": 1379.2, | |
| "valid_targets_min": 314 | |
| }, | |
| { | |
| "epoch": 3.128, | |
| "grad_norm": 0.6223336814826458, | |
| "learning_rate": 2.201649550389885e-05, | |
| "loss": 0.348, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35420143604278564, | |
| "step": 1955, | |
| "valid_targets_mean": 2208.2, | |
| "valid_targets_min": 311 | |
| }, | |
| { | |
| "epoch": 3.136, | |
| "grad_norm": 0.5625729693816748, | |
| "learning_rate": 2.1923864109887556e-05, | |
| "loss": 0.355, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.350006103515625, | |
| "step": 1960, | |
| "valid_targets_mean": 2587.9, | |
| "valid_targets_min": 308 | |
| }, | |
| { | |
| "epoch": 3.144, | |
| "grad_norm": 0.5915272696073566, | |
| "learning_rate": 2.1831191041816652e-05, | |
| "loss": 0.3692, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37210381031036377, | |
| "step": 1965, | |
| "valid_targets_mean": 2568.3, | |
| "valid_targets_min": 504 | |
| }, | |
| { | |
| "epoch": 3.152, | |
| "grad_norm": 0.6491814772528394, | |
| "learning_rate": 2.173847830713715e-05, | |
| "loss": 0.3676, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3734401762485504, | |
| "step": 1970, | |
| "valid_targets_mean": 2128.2, | |
| "valid_targets_min": 323 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "grad_norm": 0.606126090375627, | |
| "learning_rate": 2.1645727914159315e-05, | |
| "loss": 0.3507, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33269253373146057, | |
| "step": 1975, | |
| "valid_targets_mean": 2215.9, | |
| "valid_targets_min": 397 | |
| }, | |
| { | |
| "epoch": 3.168, | |
| "grad_norm": 0.6028176800089939, | |
| "learning_rate": 2.1552941872009144e-05, | |
| "loss": 0.3617, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36134254932403564, | |
| "step": 1980, | |
| "valid_targets_mean": 2474.1, | |
| "valid_targets_min": 397 | |
| }, | |
| { | |
| "epoch": 3.176, | |
| "grad_norm": 0.6929456925738174, | |
| "learning_rate": 2.1460122190584868e-05, | |
| "loss": 0.3827, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3592289090156555, | |
| "step": 1985, | |
| "valid_targets_mean": 2259.2, | |
| "valid_targets_min": 336 | |
| }, | |
| { | |
| "epoch": 3.184, | |
| "grad_norm": 0.7419254924501354, | |
| "learning_rate": 2.1367270880513377e-05, | |
| "loss": 0.3801, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3836570382118225, | |
| "step": 1990, | |
| "valid_targets_mean": 1838.1, | |
| "valid_targets_min": 288 | |
| }, | |
| { | |
| "epoch": 3.192, | |
| "grad_norm": 0.6716942733861659, | |
| "learning_rate": 2.127438995310671e-05, | |
| "loss": 0.3558, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3701018989086151, | |
| "step": 1995, | |
| "valid_targets_mean": 2167.3, | |
| "valid_targets_min": 420 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "grad_norm": 0.6623749519667427, | |
| "learning_rate": 2.118148142031846e-05, | |
| "loss": 0.3575, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3783347010612488, | |
| "step": 2000, | |
| "valid_targets_mean": 1932.2, | |
| "valid_targets_min": 504 | |
| }, | |
| { | |
| "epoch": 3.208, | |
| "grad_norm": 0.6711094923676311, | |
| "learning_rate": 2.1088547294700182e-05, | |
| "loss": 0.3556, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38326647877693176, | |
| "step": 2005, | |
| "valid_targets_mean": 2020.8, | |
| "valid_targets_min": 719 | |
| }, | |
| { | |
| "epoch": 3.216, | |
| "grad_norm": 0.7373832742848137, | |
| "learning_rate": 2.0995589589357846e-05, | |
| "loss": 0.3722, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4027169346809387, | |
| "step": 2010, | |
| "valid_targets_mean": 1860.1, | |
| "valid_targets_min": 511 | |
| }, | |
| { | |
| "epoch": 3.224, | |
| "grad_norm": 0.6609924694795588, | |
| "learning_rate": 2.0902610317908175e-05, | |
| "loss": 0.3702, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37891778349876404, | |
| "step": 2015, | |
| "valid_targets_mean": 1891.6, | |
| "valid_targets_min": 281 | |
| }, | |
| { | |
| "epoch": 3.232, | |
| "grad_norm": 0.6116427719169357, | |
| "learning_rate": 2.080961149443505e-05, | |
| "loss": 0.343, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35430964827537537, | |
| "step": 2020, | |
| "valid_targets_mean": 2165.3, | |
| "valid_targets_min": 234 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "grad_norm": 0.7271187064226262, | |
| "learning_rate": 2.071659513344589e-05, | |
| "loss": 0.3616, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37511664628982544, | |
| "step": 2025, | |
| "valid_targets_mean": 1475.1, | |
| "valid_targets_min": 325 | |
| }, | |
| { | |
| "epoch": 3.248, | |
| "grad_norm": 0.5178449671491031, | |
| "learning_rate": 2.0623563249828e-05, | |
| "loss": 0.33, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3304227590560913, | |
| "step": 2030, | |
| "valid_targets_mean": 2904.6, | |
| "valid_targets_min": 454 | |
| }, | |
| { | |
| "epoch": 3.2560000000000002, | |
| "grad_norm": 0.6009441585408077, | |
| "learning_rate": 2.053051785880492e-05, | |
| "loss": 0.3669, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.349854052066803, | |
| "step": 2035, | |
| "valid_targets_mean": 2303.9, | |
| "valid_targets_min": 353 | |
| }, | |
| { | |
| "epoch": 3.2640000000000002, | |
| "grad_norm": 0.6024939205010066, | |
| "learning_rate": 2.0437460975892814e-05, | |
| "loss": 0.3519, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30770695209503174, | |
| "step": 2040, | |
| "valid_targets_mean": 1997.6, | |
| "valid_targets_min": 290 | |
| }, | |
| { | |
| "epoch": 3.2720000000000002, | |
| "grad_norm": 0.6567641209970828, | |
| "learning_rate": 2.0344394616856736e-05, | |
| "loss": 0.3643, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3653138279914856, | |
| "step": 2045, | |
| "valid_targets_mean": 2023.9, | |
| "valid_targets_min": 262 | |
| }, | |
| { | |
| "epoch": 3.2800000000000002, | |
| "grad_norm": 0.5685017305666865, | |
| "learning_rate": 2.0251320797667056e-05, | |
| "loss": 0.3751, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36403796076774597, | |
| "step": 2050, | |
| "valid_targets_mean": 2934.8, | |
| "valid_targets_min": 630 | |
| }, | |
| { | |
| "epoch": 3.288, | |
| "grad_norm": 0.6636164656698944, | |
| "learning_rate": 2.01582415344557e-05, | |
| "loss": 0.3478, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3850035071372986, | |
| "step": 2055, | |
| "valid_targets_mean": 2090.8, | |
| "valid_targets_min": 315 | |
| }, | |
| { | |
| "epoch": 3.296, | |
| "grad_norm": 0.677882323785035, | |
| "learning_rate": 2.006515884347255e-05, | |
| "loss": 0.3677, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34536224603652954, | |
| "step": 2060, | |
| "valid_targets_mean": 1828.5, | |
| "valid_targets_min": 406 | |
| }, | |
| { | |
| "epoch": 3.304, | |
| "grad_norm": 0.6253603205859511, | |
| "learning_rate": 1.9972074741041712e-05, | |
| "loss": 0.3544, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3635639548301697, | |
| "step": 2065, | |
| "valid_targets_mean": 2177.9, | |
| "valid_targets_min": 437 | |
| }, | |
| { | |
| "epoch": 3.312, | |
| "grad_norm": 0.7571788308639501, | |
| "learning_rate": 1.9878991243517913e-05, | |
| "loss": 0.382, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37547576427459717, | |
| "step": 2070, | |
| "valid_targets_mean": 1870.7, | |
| "valid_targets_min": 488 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "grad_norm": 0.6162909260403991, | |
| "learning_rate": 1.9785910367242712e-05, | |
| "loss": 0.357, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34492647647857666, | |
| "step": 2075, | |
| "valid_targets_mean": 2629.4, | |
| "valid_targets_min": 336 | |
| }, | |
| { | |
| "epoch": 3.328, | |
| "grad_norm": 0.6459292713099473, | |
| "learning_rate": 1.969283412850094e-05, | |
| "loss": 0.3666, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37682420015335083, | |
| "step": 2080, | |
| "valid_targets_mean": 2129.2, | |
| "valid_targets_min": 270 | |
| }, | |
| { | |
| "epoch": 3.336, | |
| "grad_norm": 0.6185038841693136, | |
| "learning_rate": 1.959976454347696e-05, | |
| "loss": 0.3394, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31735754013061523, | |
| "step": 2085, | |
| "valid_targets_mean": 2153.9, | |
| "valid_targets_min": 356 | |
| }, | |
| { | |
| "epoch": 3.344, | |
| "grad_norm": 0.669817767991775, | |
| "learning_rate": 1.950670362821098e-05, | |
| "loss": 0.3453, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35490524768829346, | |
| "step": 2090, | |
| "valid_targets_mean": 2075.0, | |
| "valid_targets_min": 291 | |
| }, | |
| { | |
| "epoch": 3.352, | |
| "grad_norm": 0.6336596094505553, | |
| "learning_rate": 1.9413653398555437e-05, | |
| "loss": 0.3756, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34789595007896423, | |
| "step": 2095, | |
| "valid_targets_mean": 1968.0, | |
| "valid_targets_min": 377 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "grad_norm": 0.8081160567298646, | |
| "learning_rate": 1.9320615870131282e-05, | |
| "loss": 0.3602, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38415002822875977, | |
| "step": 2100, | |
| "valid_targets_mean": 1982.8, | |
| "valid_targets_min": 324 | |
| }, | |
| { | |
| "epoch": 3.368, | |
| "grad_norm": 0.6962657438034732, | |
| "learning_rate": 1.9227593058284343e-05, | |
| "loss": 0.3674, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3305050730705261, | |
| "step": 2105, | |
| "valid_targets_mean": 1824.0, | |
| "valid_targets_min": 381 | |
| }, | |
| { | |
| "epoch": 3.376, | |
| "grad_norm": 0.740689669376568, | |
| "learning_rate": 1.9134586978041663e-05, | |
| "loss": 0.3718, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3729479908943176, | |
| "step": 2110, | |
| "valid_targets_mean": 1616.3, | |
| "valid_targets_min": 320 | |
| }, | |
| { | |
| "epoch": 3.384, | |
| "grad_norm": 0.6817372764014189, | |
| "learning_rate": 1.9041599644067846e-05, | |
| "loss": 0.382, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.361863374710083, | |
| "step": 2115, | |
| "valid_targets_mean": 1781.7, | |
| "valid_targets_min": 288 | |
| }, | |
| { | |
| "epoch": 3.392, | |
| "grad_norm": 0.6710191633507304, | |
| "learning_rate": 1.8948633070621433e-05, | |
| "loss": 0.3467, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35921335220336914, | |
| "step": 2120, | |
| "valid_targets_mean": 2027.4, | |
| "valid_targets_min": 426 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "grad_norm": 0.62926154035665, | |
| "learning_rate": 1.885568927151124e-05, | |
| "loss": 0.3701, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3318699598312378, | |
| "step": 2125, | |
| "valid_targets_mean": 2056.8, | |
| "valid_targets_min": 486 | |
| }, | |
| { | |
| "epoch": 3.408, | |
| "grad_norm": 0.630812301281981, | |
| "learning_rate": 1.8762770260052773e-05, | |
| "loss": 0.3533, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37095907330513, | |
| "step": 2130, | |
| "valid_targets_mean": 2337.6, | |
| "valid_targets_min": 400 | |
| }, | |
| { | |
| "epoch": 3.416, | |
| "grad_norm": 0.5628803802505868, | |
| "learning_rate": 1.8669878049024575e-05, | |
| "loss": 0.3461, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3343871831893921, | |
| "step": 2135, | |
| "valid_targets_mean": 2433.6, | |
| "valid_targets_min": 328 | |
| }, | |
| { | |
| "epoch": 3.424, | |
| "grad_norm": 0.6587234164207159, | |
| "learning_rate": 1.857701465062467e-05, | |
| "loss": 0.3671, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34795311093330383, | |
| "step": 2140, | |
| "valid_targets_mean": 2278.2, | |
| "valid_targets_min": 412 | |
| }, | |
| { | |
| "epoch": 3.432, | |
| "grad_norm": 0.6141886366824748, | |
| "learning_rate": 1.848418207642693e-05, | |
| "loss": 0.3641, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.340614914894104, | |
| "step": 2145, | |
| "valid_targets_mean": 2468.8, | |
| "valid_targets_min": 363 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "grad_norm": 0.7608947546773019, | |
| "learning_rate": 1.8391382337337548e-05, | |
| "loss": 0.3691, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.387977659702301, | |
| "step": 2150, | |
| "valid_targets_mean": 1536.1, | |
| "valid_targets_min": 287 | |
| }, | |
| { | |
| "epoch": 3.448, | |
| "grad_norm": 0.6952199985932443, | |
| "learning_rate": 1.829861744355144e-05, | |
| "loss": 0.3604, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3693057894706726, | |
| "step": 2155, | |
| "valid_targets_mean": 2045.2, | |
| "valid_targets_min": 302 | |
| }, | |
| { | |
| "epoch": 3.456, | |
| "grad_norm": 0.7439099787511875, | |
| "learning_rate": 1.820588940450872e-05, | |
| "loss": 0.3527, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3845769166946411, | |
| "step": 2160, | |
| "valid_targets_mean": 1662.5, | |
| "valid_targets_min": 280 | |
| }, | |
| { | |
| "epoch": 3.464, | |
| "grad_norm": 0.634402246022689, | |
| "learning_rate": 1.8113200228851163e-05, | |
| "loss": 0.3391, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37684130668640137, | |
| "step": 2165, | |
| "valid_targets_mean": 2222.1, | |
| "valid_targets_min": 299 | |
| }, | |
| { | |
| "epoch": 3.472, | |
| "grad_norm": 0.6894757109083375, | |
| "learning_rate": 1.80205519243787e-05, | |
| "loss": 0.3349, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3498302698135376, | |
| "step": 2170, | |
| "valid_targets_mean": 1682.1, | |
| "valid_targets_min": 272 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "grad_norm": 0.6476961900701566, | |
| "learning_rate": 1.7927946498005934e-05, | |
| "loss": 0.3465, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3206251859664917, | |
| "step": 2175, | |
| "valid_targets_mean": 2919.4, | |
| "valid_targets_min": 522 | |
| }, | |
| { | |
| "epoch": 3.488, | |
| "grad_norm": 0.6753631402801488, | |
| "learning_rate": 1.7835385955718653e-05, | |
| "loss": 0.3525, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3390992283821106, | |
| "step": 2180, | |
| "valid_targets_mean": 1935.4, | |
| "valid_targets_min": 317 | |
| }, | |
| { | |
| "epoch": 3.496, | |
| "grad_norm": 0.6798516632249726, | |
| "learning_rate": 1.7742872302530366e-05, | |
| "loss": 0.3783, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38496145606040955, | |
| "step": 2185, | |
| "valid_targets_mean": 2096.4, | |
| "valid_targets_min": 253 | |
| }, | |
| { | |
| "epoch": 3.504, | |
| "grad_norm": 0.6933996256183506, | |
| "learning_rate": 1.765040754243892e-05, | |
| "loss": 0.3676, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3730943202972412, | |
| "step": 2190, | |
| "valid_targets_mean": 1851.4, | |
| "valid_targets_min": 357 | |
| }, | |
| { | |
| "epoch": 3.512, | |
| "grad_norm": 0.7110969950647408, | |
| "learning_rate": 1.755799367838302e-05, | |
| "loss": 0.3617, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38612857460975647, | |
| "step": 2195, | |
| "valid_targets_mean": 2064.2, | |
| "valid_targets_min": 351 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "grad_norm": 0.6596801867230143, | |
| "learning_rate": 1.746563271219891e-05, | |
| "loss": 0.3659, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3577587902545929, | |
| "step": 2200, | |
| "valid_targets_mean": 2278.2, | |
| "valid_targets_min": 511 | |
| }, | |
| { | |
| "epoch": 3.528, | |
| "grad_norm": 0.671825575187384, | |
| "learning_rate": 1.7373326644576965e-05, | |
| "loss": 0.3708, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3616862893104553, | |
| "step": 2205, | |
| "valid_targets_mean": 2457.8, | |
| "valid_targets_min": 276 | |
| }, | |
| { | |
| "epoch": 3.536, | |
| "grad_norm": 0.6994958591376166, | |
| "learning_rate": 1.728107747501836e-05, | |
| "loss": 0.358, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36732035875320435, | |
| "step": 2210, | |
| "valid_targets_mean": 1954.8, | |
| "valid_targets_min": 369 | |
| }, | |
| { | |
| "epoch": 3.544, | |
| "grad_norm": 0.6016709776371766, | |
| "learning_rate": 1.7188887201791785e-05, | |
| "loss": 0.3413, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3304412364959717, | |
| "step": 2215, | |
| "valid_targets_mean": 2284.2, | |
| "valid_targets_min": 314 | |
| }, | |
| { | |
| "epoch": 3.552, | |
| "grad_norm": 0.6746192722394049, | |
| "learning_rate": 1.7096757821890117e-05, | |
| "loss": 0.354, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3492451310157776, | |
| "step": 2220, | |
| "valid_targets_mean": 1794.3, | |
| "valid_targets_min": 299 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "grad_norm": 0.5979672015191129, | |
| "learning_rate": 1.7004691330987196e-05, | |
| "loss": 0.3785, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3538179099559784, | |
| "step": 2225, | |
| "valid_targets_mean": 2432.1, | |
| "valid_targets_min": 491 | |
| }, | |
| { | |
| "epoch": 3.568, | |
| "grad_norm": 0.7459353388391813, | |
| "learning_rate": 1.691268972339458e-05, | |
| "loss": 0.3714, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3988783657550812, | |
| "step": 2230, | |
| "valid_targets_mean": 1644.5, | |
| "valid_targets_min": 237 | |
| }, | |
| { | |
| "epoch": 3.576, | |
| "grad_norm": 0.752136783472821, | |
| "learning_rate": 1.6820754992018344e-05, | |
| "loss": 0.3672, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39165884256362915, | |
| "step": 2235, | |
| "valid_targets_mean": 1668.0, | |
| "valid_targets_min": 463 | |
| }, | |
| { | |
| "epoch": 3.584, | |
| "grad_norm": 0.5888303253878224, | |
| "learning_rate": 1.6728889128315932e-05, | |
| "loss": 0.3579, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34385019540786743, | |
| "step": 2240, | |
| "valid_targets_mean": 2623.6, | |
| "valid_targets_min": 446 | |
| }, | |
| { | |
| "epoch": 3.592, | |
| "grad_norm": 0.6520688037523493, | |
| "learning_rate": 1.663709412225297e-05, | |
| "loss": 0.3826, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3680360019207001, | |
| "step": 2245, | |
| "valid_targets_mean": 2156.8, | |
| "valid_targets_min": 322 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "grad_norm": 0.7039772129062588, | |
| "learning_rate": 1.654537196226022e-05, | |
| "loss": 0.3678, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35406723618507385, | |
| "step": 2250, | |
| "valid_targets_mean": 1685.8, | |
| "valid_targets_min": 362 | |
| }, | |
| { | |
| "epoch": 3.608, | |
| "grad_norm": 0.6987712882835031, | |
| "learning_rate": 1.6453724635190455e-05, | |
| "loss": 0.3615, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37787187099456787, | |
| "step": 2255, | |
| "valid_targets_mean": 1718.6, | |
| "valid_targets_min": 338 | |
| }, | |
| { | |
| "epoch": 3.616, | |
| "grad_norm": 0.7053910956653641, | |
| "learning_rate": 1.6362154126275467e-05, | |
| "loss": 0.361, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3942461907863617, | |
| "step": 2260, | |
| "valid_targets_mean": 1716.6, | |
| "valid_targets_min": 395 | |
| }, | |
| { | |
| "epoch": 3.624, | |
| "grad_norm": 0.687209335721668, | |
| "learning_rate": 1.6270662419083018e-05, | |
| "loss": 0.3654, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3939089775085449, | |
| "step": 2265, | |
| "valid_targets_mean": 2159.6, | |
| "valid_targets_min": 588 | |
| }, | |
| { | |
| "epoch": 3.632, | |
| "grad_norm": 0.6484010824748857, | |
| "learning_rate": 1.617925149547391e-05, | |
| "loss": 0.3745, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3499888777732849, | |
| "step": 2270, | |
| "valid_targets_mean": 2079.5, | |
| "valid_targets_min": 441 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "grad_norm": 0.6185239225910694, | |
| "learning_rate": 1.608792333555904e-05, | |
| "loss": 0.3449, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2988038659095764, | |
| "step": 2275, | |
| "valid_targets_mean": 1887.8, | |
| "valid_targets_min": 315 | |
| }, | |
| { | |
| "epoch": 3.648, | |
| "grad_norm": 0.6622725425024248, | |
| "learning_rate": 1.5996679917656492e-05, | |
| "loss": 0.3584, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3635316491127014, | |
| "step": 2280, | |
| "valid_targets_mean": 2113.4, | |
| "valid_targets_min": 250 | |
| }, | |
| { | |
| "epoch": 3.656, | |
| "grad_norm": 0.6461440368572684, | |
| "learning_rate": 1.5905523218248723e-05, | |
| "loss": 0.3349, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3423093259334564, | |
| "step": 2285, | |
| "valid_targets_mean": 1987.4, | |
| "valid_targets_min": 393 | |
| }, | |
| { | |
| "epoch": 3.664, | |
| "grad_norm": 0.7423053801732263, | |
| "learning_rate": 1.5814455211939698e-05, | |
| "loss": 0.3565, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3898213505744934, | |
| "step": 2290, | |
| "valid_targets_mean": 1741.2, | |
| "valid_targets_min": 255 | |
| }, | |
| { | |
| "epoch": 3.672, | |
| "grad_norm": 0.6683929551171593, | |
| "learning_rate": 1.5723477871412168e-05, | |
| "loss": 0.3453, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36176642775535583, | |
| "step": 2295, | |
| "valid_targets_mean": 1948.9, | |
| "valid_targets_min": 279 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "grad_norm": 0.6585838124328314, | |
| "learning_rate": 1.56325931673849e-05, | |
| "loss": 0.3514, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33605048060417175, | |
| "step": 2300, | |
| "valid_targets_mean": 1879.3, | |
| "valid_targets_min": 390 | |
| }, | |
| { | |
| "epoch": 3.6879999999999997, | |
| "grad_norm": 0.8149382661912283, | |
| "learning_rate": 1.5541803068569993e-05, | |
| "loss": 0.3696, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35881364345550537, | |
| "step": 2305, | |
| "valid_targets_mean": 1977.1, | |
| "valid_targets_min": 311 | |
| }, | |
| { | |
| "epoch": 3.6959999999999997, | |
| "grad_norm": 0.6351075990623459, | |
| "learning_rate": 1.5451109541630275e-05, | |
| "loss": 0.352, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31659263372421265, | |
| "step": 2310, | |
| "valid_targets_mean": 2268.1, | |
| "valid_targets_min": 416 | |
| }, | |
| { | |
| "epoch": 3.7039999999999997, | |
| "grad_norm": 0.5828673063408715, | |
| "learning_rate": 1.536051455113663e-05, | |
| "loss": 0.3394, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3484576940536499, | |
| "step": 2315, | |
| "valid_targets_mean": 2422.6, | |
| "valid_targets_min": 416 | |
| }, | |
| { | |
| "epoch": 3.7119999999999997, | |
| "grad_norm": 0.5200997495321733, | |
| "learning_rate": 1.527002005952551e-05, | |
| "loss": 0.3523, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3167843520641327, | |
| "step": 2320, | |
| "valid_targets_mean": 2799.5, | |
| "valid_targets_min": 432 | |
| }, | |
| { | |
| "epoch": 3.7199999999999998, | |
| "grad_norm": 0.6191793089703284, | |
| "learning_rate": 1.5179628027056373e-05, | |
| "loss": 0.3576, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36515581607818604, | |
| "step": 2325, | |
| "valid_targets_mean": 2321.8, | |
| "valid_targets_min": 312 | |
| }, | |
| { | |
| "epoch": 3.7279999999999998, | |
| "grad_norm": 0.7703510487146173, | |
| "learning_rate": 1.5089340411769257e-05, | |
| "loss": 0.3683, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41395729780197144, | |
| "step": 2330, | |
| "valid_targets_mean": 1609.9, | |
| "valid_targets_min": 252 | |
| }, | |
| { | |
| "epoch": 3.7359999999999998, | |
| "grad_norm": 0.6881396778475732, | |
| "learning_rate": 1.499915916944236e-05, | |
| "loss": 0.3493, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39570489525794983, | |
| "step": 2335, | |
| "valid_targets_mean": 2141.2, | |
| "valid_targets_min": 327 | |
| }, | |
| { | |
| "epoch": 3.7439999999999998, | |
| "grad_norm": 0.7674492979368044, | |
| "learning_rate": 1.490908625354964e-05, | |
| "loss": 0.3509, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3919585943222046, | |
| "step": 2340, | |
| "valid_targets_mean": 1661.6, | |
| "valid_targets_min": 230 | |
| }, | |
| { | |
| "epoch": 3.752, | |
| "grad_norm": 0.6616850774148547, | |
| "learning_rate": 1.4819123615218556e-05, | |
| "loss": 0.3514, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3458954095840454, | |
| "step": 2345, | |
| "valid_targets_mean": 1910.0, | |
| "valid_targets_min": 318 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "grad_norm": 0.5996831317937957, | |
| "learning_rate": 1.472927320318775e-05, | |
| "loss": 0.3545, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.336120069026947, | |
| "step": 2350, | |
| "valid_targets_mean": 2165.2, | |
| "valid_targets_min": 418 | |
| }, | |
| { | |
| "epoch": 3.768, | |
| "grad_norm": 0.6063824449090175, | |
| "learning_rate": 1.4639536963764878e-05, | |
| "loss": 0.3457, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35988757014274597, | |
| "step": 2355, | |
| "valid_targets_mean": 2332.4, | |
| "valid_targets_min": 455 | |
| }, | |
| { | |
| "epoch": 3.776, | |
| "grad_norm": 0.6250271547006242, | |
| "learning_rate": 1.4549916840784409e-05, | |
| "loss": 0.3731, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3625832498073578, | |
| "step": 2360, | |
| "valid_targets_mean": 2205.9, | |
| "valid_targets_min": 311 | |
| }, | |
| { | |
| "epoch": 3.784, | |
| "grad_norm": 0.6838399225206161, | |
| "learning_rate": 1.4460414775565555e-05, | |
| "loss": 0.3627, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39127451181411743, | |
| "step": 2365, | |
| "valid_targets_mean": 2025.2, | |
| "valid_targets_min": 349 | |
| }, | |
| { | |
| "epoch": 3.792, | |
| "grad_norm": 0.6594077109708212, | |
| "learning_rate": 1.43710327068702e-05, | |
| "loss": 0.3576, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3650404214859009, | |
| "step": 2370, | |
| "valid_targets_mean": 2368.4, | |
| "valid_targets_min": 498 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "grad_norm": 0.5616864427647533, | |
| "learning_rate": 1.4281772570860897e-05, | |
| "loss": 0.3645, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34412187337875366, | |
| "step": 2375, | |
| "valid_targets_mean": 2745.6, | |
| "valid_targets_min": 313 | |
| }, | |
| { | |
| "epoch": 3.808, | |
| "grad_norm": 0.6422542729169898, | |
| "learning_rate": 1.4192636301058952e-05, | |
| "loss": 0.3473, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3570175766944885, | |
| "step": 2380, | |
| "valid_targets_mean": 2231.0, | |
| "valid_targets_min": 553 | |
| }, | |
| { | |
| "epoch": 3.816, | |
| "grad_norm": 0.6982317651744271, | |
| "learning_rate": 1.4103625828302508e-05, | |
| "loss": 0.3528, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3916783928871155, | |
| "step": 2385, | |
| "valid_targets_mean": 2175.6, | |
| "valid_targets_min": 514 | |
| }, | |
| { | |
| "epoch": 3.824, | |
| "grad_norm": 0.6711011803974902, | |
| "learning_rate": 1.4014743080704743e-05, | |
| "loss": 0.3608, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3191945552825928, | |
| "step": 2390, | |
| "valid_targets_mean": 1913.1, | |
| "valid_targets_min": 336 | |
| }, | |
| { | |
| "epoch": 3.832, | |
| "grad_norm": 0.7754864491548238, | |
| "learning_rate": 1.3925989983612118e-05, | |
| "loss": 0.3845, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37967538833618164, | |
| "step": 2395, | |
| "valid_targets_mean": 1446.5, | |
| "valid_targets_min": 312 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "grad_norm": 0.6115713228293688, | |
| "learning_rate": 1.383736845956261e-05, | |
| "loss": 0.3555, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32667064666748047, | |
| "step": 2400, | |
| "valid_targets_mean": 2120.1, | |
| "valid_targets_min": 254 | |
| }, | |
| { | |
| "epoch": 3.848, | |
| "grad_norm": 0.6816884753996482, | |
| "learning_rate": 1.3748880428244154e-05, | |
| "loss": 0.3465, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3406139612197876, | |
| "step": 2405, | |
| "valid_targets_mean": 1800.9, | |
| "valid_targets_min": 282 | |
| }, | |
| { | |
| "epoch": 3.856, | |
| "grad_norm": 0.6274592212225665, | |
| "learning_rate": 1.3660527806452965e-05, | |
| "loss": 0.3575, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37814486026763916, | |
| "step": 2410, | |
| "valid_targets_mean": 2376.9, | |
| "valid_targets_min": 466 | |
| }, | |
| { | |
| "epoch": 3.864, | |
| "grad_norm": 0.6653705629008166, | |
| "learning_rate": 1.3572312508052118e-05, | |
| "loss": 0.3702, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35926705598831177, | |
| "step": 2415, | |
| "valid_targets_mean": 1921.9, | |
| "valid_targets_min": 358 | |
| }, | |
| { | |
| "epoch": 3.872, | |
| "grad_norm": 0.619145516831352, | |
| "learning_rate": 1.3484236443929982e-05, | |
| "loss": 0.3702, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38096240162849426, | |
| "step": 2420, | |
| "valid_targets_mean": 2302.6, | |
| "valid_targets_min": 410 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "grad_norm": 1.039265841304078, | |
| "learning_rate": 1.3396301521958926e-05, | |
| "loss": 0.3605, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30115431547164917, | |
| "step": 2425, | |
| "valid_targets_mean": 1910.0, | |
| "valid_targets_min": 298 | |
| }, | |
| { | |
| "epoch": 3.888, | |
| "grad_norm": 0.7177434329809323, | |
| "learning_rate": 1.3308509646953934e-05, | |
| "loss": 0.3582, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3660653233528137, | |
| "step": 2430, | |
| "valid_targets_mean": 1765.5, | |
| "valid_targets_min": 298 | |
| }, | |
| { | |
| "epoch": 3.896, | |
| "grad_norm": 0.5917075213127174, | |
| "learning_rate": 1.3220862720631349e-05, | |
| "loss": 0.3359, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35105088353157043, | |
| "step": 2435, | |
| "valid_targets_mean": 2611.8, | |
| "valid_targets_min": 648 | |
| }, | |
| { | |
| "epoch": 3.904, | |
| "grad_norm": 0.6873495434368752, | |
| "learning_rate": 1.3133362641567697e-05, | |
| "loss": 0.3514, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35561615228652954, | |
| "step": 2440, | |
| "valid_targets_mean": 1834.1, | |
| "valid_targets_min": 345 | |
| }, | |
| { | |
| "epoch": 3.912, | |
| "grad_norm": 0.7717485015233096, | |
| "learning_rate": 1.3046011305158546e-05, | |
| "loss": 0.3665, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3796098530292511, | |
| "step": 2445, | |
| "valid_targets_mean": 1418.3, | |
| "valid_targets_min": 276 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "grad_norm": 0.5966354817843722, | |
| "learning_rate": 1.2958810603577456e-05, | |
| "loss": 0.3493, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36848214268684387, | |
| "step": 2450, | |
| "valid_targets_mean": 2418.7, | |
| "valid_targets_min": 330 | |
| }, | |
| { | |
| "epoch": 3.928, | |
| "grad_norm": 0.6059163698362121, | |
| "learning_rate": 1.2871762425734989e-05, | |
| "loss": 0.3501, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3625923693180084, | |
| "step": 2455, | |
| "valid_targets_mean": 2740.3, | |
| "valid_targets_min": 381 | |
| }, | |
| { | |
| "epoch": 3.936, | |
| "grad_norm": 0.6534398663639183, | |
| "learning_rate": 1.278486865723779e-05, | |
| "loss": 0.3446, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3527751564979553, | |
| "step": 2460, | |
| "valid_targets_mean": 2065.2, | |
| "valid_targets_min": 406 | |
| }, | |
| { | |
| "epoch": 3.944, | |
| "grad_norm": 0.5877602369092997, | |
| "learning_rate": 1.269813118034775e-05, | |
| "loss": 0.3609, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34739208221435547, | |
| "step": 2465, | |
| "valid_targets_mean": 2381.4, | |
| "valid_targets_min": 298 | |
| }, | |
| { | |
| "epoch": 3.952, | |
| "grad_norm": 1.0190651565090763, | |
| "learning_rate": 1.2611551873941213e-05, | |
| "loss": 0.3819, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42016249895095825, | |
| "step": 2470, | |
| "valid_targets_mean": 1098.1, | |
| "valid_targets_min": 384 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "grad_norm": 0.6180474714320067, | |
| "learning_rate": 1.2525132613468309e-05, | |
| "loss": 0.3662, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3400590717792511, | |
| "step": 2475, | |
| "valid_targets_mean": 2681.2, | |
| "valid_targets_min": 320 | |
| }, | |
| { | |
| "epoch": 3.968, | |
| "grad_norm": 0.5915847013969876, | |
| "learning_rate": 1.2438875270912294e-05, | |
| "loss": 0.367, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33024099469184875, | |
| "step": 2480, | |
| "valid_targets_mean": 2419.4, | |
| "valid_targets_min": 388 | |
| }, | |
| { | |
| "epoch": 3.976, | |
| "grad_norm": 0.6959888683464479, | |
| "learning_rate": 1.2352781714749016e-05, | |
| "loss": 0.3733, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38543227314949036, | |
| "step": 2485, | |
| "valid_targets_mean": 2036.9, | |
| "valid_targets_min": 374 | |
| }, | |
| { | |
| "epoch": 3.984, | |
| "grad_norm": 0.7111431039056506, | |
| "learning_rate": 1.2266853809906469e-05, | |
| "loss": 0.3761, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3813594877719879, | |
| "step": 2490, | |
| "valid_targets_mean": 1833.9, | |
| "valid_targets_min": 394 | |
| }, | |
| { | |
| "epoch": 3.992, | |
| "grad_norm": 0.6519706550375763, | |
| "learning_rate": 1.2181093417724317e-05, | |
| "loss": 0.3614, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3967099189758301, | |
| "step": 2495, | |
| "valid_targets_mean": 2376.8, | |
| "valid_targets_min": 392 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.7228414756396276, | |
| "learning_rate": 1.2095502395913676e-05, | |
| "loss": 0.3682, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34773457050323486, | |
| "step": 2500, | |
| "valid_targets_mean": 1735.1, | |
| "valid_targets_min": 424 | |
| }, | |
| { | |
| "epoch": 4.008, | |
| "grad_norm": 0.7286469450360182, | |
| "learning_rate": 1.2010082598516775e-05, | |
| "loss": 0.3443, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3937542736530304, | |
| "step": 2505, | |
| "valid_targets_mean": 1759.4, | |
| "valid_targets_min": 265 | |
| }, | |
| { | |
| "epoch": 4.016, | |
| "grad_norm": 0.8304747598684544, | |
| "learning_rate": 1.1924835875866884e-05, | |
| "loss": 0.3492, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3632838726043701, | |
| "step": 2510, | |
| "valid_targets_mean": 1408.3, | |
| "valid_targets_min": 299 | |
| }, | |
| { | |
| "epoch": 4.024, | |
| "grad_norm": 0.7035863294078009, | |
| "learning_rate": 1.1839764074548145e-05, | |
| "loss": 0.3354, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35498493909835815, | |
| "step": 2515, | |
| "valid_targets_mean": 1933.1, | |
| "valid_targets_min": 278 | |
| }, | |
| { | |
| "epoch": 4.032, | |
| "grad_norm": 0.6307727293144214, | |
| "learning_rate": 1.1754869037355659e-05, | |
| "loss": 0.3504, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3607982397079468, | |
| "step": 2520, | |
| "valid_targets_mean": 2463.2, | |
| "valid_targets_min": 776 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "grad_norm": 0.8393961248560299, | |
| "learning_rate": 1.1670152603255504e-05, | |
| "loss": 0.3354, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3465605080127716, | |
| "step": 2525, | |
| "valid_targets_mean": 1448.4, | |
| "valid_targets_min": 300 | |
| }, | |
| { | |
| "epoch": 4.048, | |
| "grad_norm": 0.793260792414273, | |
| "learning_rate": 1.1585616607344909e-05, | |
| "loss": 0.3515, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38471972942352295, | |
| "step": 2530, | |
| "valid_targets_mean": 1655.2, | |
| "valid_targets_min": 343 | |
| }, | |
| { | |
| "epoch": 4.056, | |
| "grad_norm": 0.6902235662186285, | |
| "learning_rate": 1.1501262880812547e-05, | |
| "loss": 0.3456, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34612828493118286, | |
| "step": 2535, | |
| "valid_targets_mean": 1969.1, | |
| "valid_targets_min": 296 | |
| }, | |
| { | |
| "epoch": 4.064, | |
| "grad_norm": 0.7069090444533567, | |
| "learning_rate": 1.141709325089881e-05, | |
| "loss": 0.3441, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.355415940284729, | |
| "step": 2540, | |
| "valid_targets_mean": 1886.8, | |
| "valid_targets_min": 430 | |
| }, | |
| { | |
| "epoch": 4.072, | |
| "grad_norm": 0.639839754182613, | |
| "learning_rate": 1.1333109540856257e-05, | |
| "loss": 0.3431, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3217964172363281, | |
| "step": 2545, | |
| "valid_targets_mean": 2287.9, | |
| "valid_targets_min": 278 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "grad_norm": 0.6214464640012719, | |
| "learning_rate": 1.1249313569910143e-05, | |
| "loss": 0.3556, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3286832571029663, | |
| "step": 2550, | |
| "valid_targets_mean": 2444.1, | |
| "valid_targets_min": 281 | |
| }, | |
| { | |
| "epoch": 4.088, | |
| "grad_norm": 0.6544866558309532, | |
| "learning_rate": 1.1165707153218942e-05, | |
| "loss": 0.3354, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.332338809967041, | |
| "step": 2555, | |
| "valid_targets_mean": 2312.6, | |
| "valid_targets_min": 293 | |
| }, | |
| { | |
| "epoch": 4.096, | |
| "grad_norm": 0.6132945878233904, | |
| "learning_rate": 1.1082292101835121e-05, | |
| "loss": 0.3345, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2943424880504608, | |
| "step": 2560, | |
| "valid_targets_mean": 2460.2, | |
| "valid_targets_min": 325 | |
| }, | |
| { | |
| "epoch": 4.104, | |
| "grad_norm": 0.7027991125110398, | |
| "learning_rate": 1.099907022266582e-05, | |
| "loss": 0.341, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32788655161857605, | |
| "step": 2565, | |
| "valid_targets_mean": 1762.0, | |
| "valid_targets_min": 263 | |
| }, | |
| { | |
| "epoch": 4.112, | |
| "grad_norm": 0.7415931434725705, | |
| "learning_rate": 1.0916043318433767e-05, | |
| "loss": 0.3408, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3493083119392395, | |
| "step": 2570, | |
| "valid_targets_mean": 1763.7, | |
| "valid_targets_min": 292 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "grad_norm": 0.6741258090371485, | |
| "learning_rate": 1.0833213187638203e-05, | |
| "loss": 0.3453, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3227207660675049, | |
| "step": 2575, | |
| "valid_targets_mean": 2098.1, | |
| "valid_targets_min": 453 | |
| }, | |
| { | |
| "epoch": 4.128, | |
| "grad_norm": 0.8441947070410107, | |
| "learning_rate": 1.0750581624515957e-05, | |
| "loss": 0.3487, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37915170192718506, | |
| "step": 2580, | |
| "valid_targets_mean": 1423.5, | |
| "valid_targets_min": 314 | |
| }, | |
| { | |
| "epoch": 4.136, | |
| "grad_norm": 0.5804235398456956, | |
| "learning_rate": 1.0668150419002527e-05, | |
| "loss": 0.3458, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3130751848220825, | |
| "step": 2585, | |
| "valid_targets_mean": 2746.5, | |
| "valid_targets_min": 639 | |
| }, | |
| { | |
| "epoch": 4.144, | |
| "grad_norm": 0.6639882059386523, | |
| "learning_rate": 1.0585921356693349e-05, | |
| "loss": 0.3389, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30582067370414734, | |
| "step": 2590, | |
| "valid_targets_mean": 2102.9, | |
| "valid_targets_min": 326 | |
| }, | |
| { | |
| "epoch": 4.152, | |
| "grad_norm": 0.7928164946481391, | |
| "learning_rate": 1.0503896218805112e-05, | |
| "loss": 0.332, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34261876344680786, | |
| "step": 2595, | |
| "valid_targets_mean": 2166.9, | |
| "valid_targets_min": 237 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "grad_norm": 0.6944837315899954, | |
| "learning_rate": 1.0422076782137155e-05, | |
| "loss": 0.3368, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3524402976036072, | |
| "step": 2600, | |
| "valid_targets_mean": 2031.3, | |
| "valid_targets_min": 328 | |
| }, | |
| { | |
| "epoch": 4.168, | |
| "grad_norm": 0.952218468273065, | |
| "learning_rate": 1.0340464819032991e-05, | |
| "loss": 0.335, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3416384756565094, | |
| "step": 2605, | |
| "valid_targets_mean": 1622.0, | |
| "valid_targets_min": 309 | |
| }, | |
| { | |
| "epoch": 4.176, | |
| "grad_norm": 0.6300379469720988, | |
| "learning_rate": 1.0259062097341911e-05, | |
| "loss": 0.323, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28312206268310547, | |
| "step": 2610, | |
| "valid_targets_mean": 2637.6, | |
| "valid_targets_min": 417 | |
| }, | |
| { | |
| "epoch": 4.184, | |
| "grad_norm": 0.5573679849228219, | |
| "learning_rate": 1.017787038038071e-05, | |
| "loss": 0.3483, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31698471307754517, | |
| "step": 2615, | |
| "valid_targets_mean": 2895.9, | |
| "valid_targets_min": 272 | |
| }, | |
| { | |
| "epoch": 4.192, | |
| "grad_norm": 0.7066082481106857, | |
| "learning_rate": 1.0096891426895476e-05, | |
| "loss": 0.3371, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.314532071352005, | |
| "step": 2620, | |
| "valid_targets_mean": 1845.6, | |
| "valid_targets_min": 420 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "grad_norm": 0.7884744051747153, | |
| "learning_rate": 1.0016126991023447e-05, | |
| "loss": 0.34, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34831756353378296, | |
| "step": 2625, | |
| "valid_targets_mean": 1734.6, | |
| "valid_targets_min": 293 | |
| }, | |
| { | |
| "epoch": 4.208, | |
| "grad_norm": 0.752357813315544, | |
| "learning_rate": 9.935578822255113e-06, | |
| "loss": 0.3559, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3773335814476013, | |
| "step": 2630, | |
| "valid_targets_mean": 1848.9, | |
| "valid_targets_min": 410 | |
| }, | |
| { | |
| "epoch": 4.216, | |
| "grad_norm": 0.6660466309862689, | |
| "learning_rate": 9.855248665396218e-06, | |
| "loss": 0.3335, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34157776832580566, | |
| "step": 2635, | |
| "valid_targets_mean": 2457.2, | |
| "valid_targets_min": 552 | |
| }, | |
| { | |
| "epoch": 4.224, | |
| "grad_norm": 0.7768388500955553, | |
| "learning_rate": 9.775138260530046e-06, | |
| "loss": 0.3349, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3923647999763489, | |
| "step": 2640, | |
| "valid_targets_mean": 1858.2, | |
| "valid_targets_min": 256 | |
| }, | |
| { | |
| "epoch": 4.232, | |
| "grad_norm": 0.7538268754577202, | |
| "learning_rate": 9.695249342979667e-06, | |
| "loss": 0.3415, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31775760650634766, | |
| "step": 2645, | |
| "valid_targets_mean": 1545.6, | |
| "valid_targets_min": 248 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "grad_norm": 0.6731102254536624, | |
| "learning_rate": 9.615583643270371e-06, | |
| "loss": 0.3476, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3384263217449188, | |
| "step": 2650, | |
| "valid_targets_mean": 2115.8, | |
| "valid_targets_min": 299 | |
| }, | |
| { | |
| "epoch": 4.248, | |
| "grad_norm": 0.744536684782561, | |
| "learning_rate": 9.536142887092208e-06, | |
| "loss": 0.329, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36594611406326294, | |
| "step": 2655, | |
| "valid_targets_mean": 2009.8, | |
| "valid_targets_min": 389 | |
| }, | |
| { | |
| "epoch": 4.256, | |
| "grad_norm": 0.7266051622348179, | |
| "learning_rate": 9.456928795262552e-06, | |
| "loss": 0.3385, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31755226850509644, | |
| "step": 2660, | |
| "valid_targets_mean": 1801.4, | |
| "valid_targets_min": 254 | |
| }, | |
| { | |
| "epoch": 4.264, | |
| "grad_norm": 0.6044550213908365, | |
| "learning_rate": 9.377943083688873e-06, | |
| "loss": 0.3261, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.273038387298584, | |
| "step": 2665, | |
| "valid_targets_mean": 2034.6, | |
| "valid_targets_min": 345 | |
| }, | |
| { | |
| "epoch": 4.272, | |
| "grad_norm": 0.6986297075905579, | |
| "learning_rate": 9.29918746333153e-06, | |
| "loss": 0.3337, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35008662939071655, | |
| "step": 2670, | |
| "valid_targets_mean": 1931.9, | |
| "valid_targets_min": 259 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "grad_norm": 0.6132289081293004, | |
| "learning_rate": 9.220663640166756e-06, | |
| "loss": 0.3504, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3231913447380066, | |
| "step": 2675, | |
| "valid_targets_mean": 2267.9, | |
| "valid_targets_min": 432 | |
| }, | |
| { | |
| "epoch": 4.288, | |
| "grad_norm": 0.7343616664045941, | |
| "learning_rate": 9.142373315149655e-06, | |
| "loss": 0.35, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3550803065299988, | |
| "step": 2680, | |
| "valid_targets_mean": 1771.3, | |
| "valid_targets_min": 256 | |
| }, | |
| { | |
| "epoch": 4.296, | |
| "grad_norm": 0.7359371826700569, | |
| "learning_rate": 9.064318184177373e-06, | |
| "loss": 0.3506, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34089556336402893, | |
| "step": 2685, | |
| "valid_targets_mean": 1857.1, | |
| "valid_targets_min": 412 | |
| }, | |
| { | |
| "epoch": 4.304, | |
| "grad_norm": 0.5878830118582525, | |
| "learning_rate": 8.986499938052396e-06, | |
| "loss": 0.3316, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2948848605155945, | |
| "step": 2690, | |
| "valid_targets_mean": 2368.9, | |
| "valid_targets_min": 314 | |
| }, | |
| { | |
| "epoch": 4.312, | |
| "grad_norm": 0.7765143196874953, | |
| "learning_rate": 8.908920262445859e-06, | |
| "loss": 0.3351, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3355043828487396, | |
| "step": 2695, | |
| "valid_targets_mean": 1749.8, | |
| "valid_targets_min": 271 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "grad_norm": 0.7582658266269858, | |
| "learning_rate": 8.831580837861082e-06, | |
| "loss": 0.3321, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32356560230255127, | |
| "step": 2700, | |
| "valid_targets_mean": 1642.6, | |
| "valid_targets_min": 398 | |
| }, | |
| { | |
| "epoch": 4.328, | |
| "grad_norm": 0.7710195340666637, | |
| "learning_rate": 8.754483339597166e-06, | |
| "loss": 0.3447, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.360440194606781, | |
| "step": 2705, | |
| "valid_targets_mean": 2261.8, | |
| "valid_targets_min": 335 | |
| }, | |
| { | |
| "epoch": 4.336, | |
| "grad_norm": 0.8342762582487903, | |
| "learning_rate": 8.677629437712665e-06, | |
| "loss": 0.3519, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32881706953048706, | |
| "step": 2710, | |
| "valid_targets_mean": 1501.9, | |
| "valid_targets_min": 276 | |
| }, | |
| { | |
| "epoch": 4.344, | |
| "grad_norm": 0.6279159244745763, | |
| "learning_rate": 8.601020796989467e-06, | |
| "loss": 0.329, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3248441517353058, | |
| "step": 2715, | |
| "valid_targets_mean": 2789.1, | |
| "valid_targets_min": 462 | |
| }, | |
| { | |
| "epoch": 4.352, | |
| "grad_norm": 0.5719966709561647, | |
| "learning_rate": 8.524659076896656e-06, | |
| "loss": 0.3383, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32366693019866943, | |
| "step": 2720, | |
| "valid_targets_mean": 2923.1, | |
| "valid_targets_min": 896 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "grad_norm": 0.6977283663303482, | |
| "learning_rate": 8.448545931554652e-06, | |
| "loss": 0.3359, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33601656556129456, | |
| "step": 2725, | |
| "valid_targets_mean": 2315.9, | |
| "valid_targets_min": 432 | |
| }, | |
| { | |
| "epoch": 4.368, | |
| "grad_norm": 0.7069369620627451, | |
| "learning_rate": 8.372683009699307e-06, | |
| "loss": 0.3335, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33306482434272766, | |
| "step": 2730, | |
| "valid_targets_mean": 1992.3, | |
| "valid_targets_min": 394 | |
| }, | |
| { | |
| "epoch": 4.376, | |
| "grad_norm": 0.5742499188652059, | |
| "learning_rate": 8.297071954646248e-06, | |
| "loss": 0.3535, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3281279504299164, | |
| "step": 2735, | |
| "valid_targets_mean": 2812.5, | |
| "valid_targets_min": 348 | |
| }, | |
| { | |
| "epoch": 4.384, | |
| "grad_norm": 0.6098399152492764, | |
| "learning_rate": 8.22171440425523e-06, | |
| "loss": 0.3327, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31789901852607727, | |
| "step": 2740, | |
| "valid_targets_mean": 2494.6, | |
| "valid_targets_min": 314 | |
| }, | |
| { | |
| "epoch": 4.392, | |
| "grad_norm": 0.7915304432145721, | |
| "learning_rate": 8.146611990894683e-06, | |
| "loss": 0.337, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31900548934936523, | |
| "step": 2745, | |
| "valid_targets_mean": 1699.1, | |
| "valid_targets_min": 347 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "grad_norm": 0.7601195994001059, | |
| "learning_rate": 8.071766341406363e-06, | |
| "loss": 0.357, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34867095947265625, | |
| "step": 2750, | |
| "valid_targets_mean": 1685.8, | |
| "valid_targets_min": 299 | |
| }, | |
| { | |
| "epoch": 4.408, | |
| "grad_norm": 0.8669948754659953, | |
| "learning_rate": 7.997179077070092e-06, | |
| "loss": 0.3297, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3693384826183319, | |
| "step": 2755, | |
| "valid_targets_mean": 1537.1, | |
| "valid_targets_min": 304 | |
| }, | |
| { | |
| "epoch": 4.416, | |
| "grad_norm": 0.6164068574777449, | |
| "learning_rate": 7.92285181356864e-06, | |
| "loss": 0.3463, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30480194091796875, | |
| "step": 2760, | |
| "valid_targets_mean": 2245.4, | |
| "valid_targets_min": 272 | |
| }, | |
| { | |
| "epoch": 4.424, | |
| "grad_norm": 0.6708366536968551, | |
| "learning_rate": 7.848786160952726e-06, | |
| "loss": 0.337, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3752930164337158, | |
| "step": 2765, | |
| "valid_targets_mean": 2273.2, | |
| "valid_targets_min": 324 | |
| }, | |
| { | |
| "epoch": 4.432, | |
| "grad_norm": 0.6476967033911869, | |
| "learning_rate": 7.77498372360617e-06, | |
| "loss": 0.3269, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32428058981895447, | |
| "step": 2770, | |
| "valid_targets_mean": 2302.4, | |
| "valid_targets_min": 351 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "grad_norm": 0.6817200102614068, | |
| "learning_rate": 7.701446100211095e-06, | |
| "loss": 0.3389, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3381454348564148, | |
| "step": 2775, | |
| "valid_targets_mean": 2117.3, | |
| "valid_targets_min": 301 | |
| }, | |
| { | |
| "epoch": 4.448, | |
| "grad_norm": 0.6936215026940229, | |
| "learning_rate": 7.628174883713322e-06, | |
| "loss": 0.3373, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33592289686203003, | |
| "step": 2780, | |
| "valid_targets_mean": 2315.6, | |
| "valid_targets_min": 335 | |
| }, | |
| { | |
| "epoch": 4.456, | |
| "grad_norm": 0.7769861722197998, | |
| "learning_rate": 7.555171661287875e-06, | |
| "loss": 0.3407, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3716652989387512, | |
| "step": 2785, | |
| "valid_targets_mean": 1949.7, | |
| "valid_targets_min": 266 | |
| }, | |
| { | |
| "epoch": 4.464, | |
| "grad_norm": 0.6021284909057042, | |
| "learning_rate": 7.482438014304567e-06, | |
| "loss": 0.3449, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3120408058166504, | |
| "step": 2790, | |
| "valid_targets_mean": 2475.6, | |
| "valid_targets_min": 420 | |
| }, | |
| { | |
| "epoch": 4.4719999999999995, | |
| "grad_norm": 0.5814826266496147, | |
| "learning_rate": 7.4099755182937685e-06, | |
| "loss": 0.3215, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31242480874061584, | |
| "step": 2795, | |
| "valid_targets_mean": 2622.8, | |
| "valid_targets_min": 513 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "grad_norm": 0.6469685216112735, | |
| "learning_rate": 7.337785742912289e-06, | |
| "loss": 0.3406, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3127382695674896, | |
| "step": 2800, | |
| "valid_targets_mean": 2113.1, | |
| "valid_targets_min": 298 | |
| }, | |
| { | |
| "epoch": 4.4879999999999995, | |
| "grad_norm": 0.7648541150536319, | |
| "learning_rate": 7.265870251909335e-06, | |
| "loss": 0.34, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3590547442436218, | |
| "step": 2805, | |
| "valid_targets_mean": 1728.8, | |
| "valid_targets_min": 277 | |
| }, | |
| { | |
| "epoch": 4.496, | |
| "grad_norm": 0.8303349115720151, | |
| "learning_rate": 7.194230603092697e-06, | |
| "loss": 0.3363, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3518087863922119, | |
| "step": 2810, | |
| "valid_targets_mean": 1715.7, | |
| "valid_targets_min": 272 | |
| }, | |
| { | |
| "epoch": 4.504, | |
| "grad_norm": 0.6736659378057248, | |
| "learning_rate": 7.122868348294927e-06, | |
| "loss": 0.3489, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3054027557373047, | |
| "step": 2815, | |
| "valid_targets_mean": 2124.8, | |
| "valid_targets_min": 344 | |
| }, | |
| { | |
| "epoch": 4.5120000000000005, | |
| "grad_norm": 0.745289098779392, | |
| "learning_rate": 7.051785033339804e-06, | |
| "loss": 0.3667, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3639134168624878, | |
| "step": 2820, | |
| "valid_targets_mean": 1994.8, | |
| "valid_targets_min": 396 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "grad_norm": 0.6158853661814754, | |
| "learning_rate": 6.980982198008785e-06, | |
| "loss": 0.3234, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3225085437297821, | |
| "step": 2825, | |
| "valid_targets_mean": 2275.7, | |
| "valid_targets_min": 241 | |
| }, | |
| { | |
| "epoch": 4.5280000000000005, | |
| "grad_norm": 0.7616317562310106, | |
| "learning_rate": 6.910461376007704e-06, | |
| "loss": 0.3502, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3805854916572571, | |
| "step": 2830, | |
| "valid_targets_mean": 2065.6, | |
| "valid_targets_min": 251 | |
| }, | |
| { | |
| "epoch": 4.536, | |
| "grad_norm": 0.7988989277100885, | |
| "learning_rate": 6.840224094933501e-06, | |
| "loss": 0.337, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3373550474643707, | |
| "step": 2835, | |
| "valid_targets_mean": 1581.1, | |
| "valid_targets_min": 334 | |
| }, | |
| { | |
| "epoch": 4.5440000000000005, | |
| "grad_norm": 0.6303866936819127, | |
| "learning_rate": 6.7702718762411505e-06, | |
| "loss": 0.367, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3261539340019226, | |
| "step": 2840, | |
| "valid_targets_mean": 2142.6, | |
| "valid_targets_min": 318 | |
| }, | |
| { | |
| "epoch": 4.552, | |
| "grad_norm": 0.8081693015537023, | |
| "learning_rate": 6.700606235210731e-06, | |
| "loss": 0.3323, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36277416348457336, | |
| "step": 2845, | |
| "valid_targets_mean": 1539.9, | |
| "valid_targets_min": 407 | |
| }, | |
| { | |
| "epoch": 4.5600000000000005, | |
| "grad_norm": 0.7410154168191748, | |
| "learning_rate": 6.631228680914558e-06, | |
| "loss": 0.3447, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34555795788764954, | |
| "step": 2850, | |
| "valid_targets_mean": 1858.9, | |
| "valid_targets_min": 291 | |
| }, | |
| { | |
| "epoch": 4.568, | |
| "grad_norm": 0.720084820230822, | |
| "learning_rate": 6.562140716184515e-06, | |
| "loss": 0.3384, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3737310767173767, | |
| "step": 2855, | |
| "valid_targets_mean": 2088.8, | |
| "valid_targets_min": 278 | |
| }, | |
| { | |
| "epoch": 4.576, | |
| "grad_norm": 0.7346833698454145, | |
| "learning_rate": 6.493343837579511e-06, | |
| "loss": 0.3455, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36689460277557373, | |
| "step": 2860, | |
| "valid_targets_mean": 1973.1, | |
| "valid_targets_min": 305 | |
| }, | |
| { | |
| "epoch": 4.584, | |
| "grad_norm": 0.6779156780136647, | |
| "learning_rate": 6.424839535353045e-06, | |
| "loss": 0.3326, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31229498982429504, | |
| "step": 2865, | |
| "valid_targets_mean": 1897.9, | |
| "valid_targets_min": 377 | |
| }, | |
| { | |
| "epoch": 4.592, | |
| "grad_norm": 0.6429465962365933, | |
| "learning_rate": 6.356629293420926e-06, | |
| "loss": 0.3303, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3346661627292633, | |
| "step": 2870, | |
| "valid_targets_mean": 2352.9, | |
| "valid_targets_min": 334 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "grad_norm": 0.7283346631415872, | |
| "learning_rate": 6.28871458932913e-06, | |
| "loss": 0.3412, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3508552014827728, | |
| "step": 2875, | |
| "valid_targets_mean": 2044.8, | |
| "valid_targets_min": 368 | |
| }, | |
| { | |
| "epoch": 4.608, | |
| "grad_norm": 0.7576839986877464, | |
| "learning_rate": 6.2210968942218206e-06, | |
| "loss": 0.3349, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33264338970184326, | |
| "step": 2880, | |
| "valid_targets_mean": 1746.8, | |
| "valid_targets_min": 323 | |
| }, | |
| { | |
| "epoch": 4.616, | |
| "grad_norm": 0.7688337603614162, | |
| "learning_rate": 6.153777672809438e-06, | |
| "loss": 0.3407, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38428616523742676, | |
| "step": 2885, | |
| "valid_targets_mean": 1919.3, | |
| "valid_targets_min": 267 | |
| }, | |
| { | |
| "epoch": 4.624, | |
| "grad_norm": 0.7414416548490468, | |
| "learning_rate": 6.086758383336984e-06, | |
| "loss": 0.3434, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3412899374961853, | |
| "step": 2890, | |
| "valid_targets_mean": 2032.7, | |
| "valid_targets_min": 374 | |
| }, | |
| { | |
| "epoch": 4.632, | |
| "grad_norm": 0.6462128784115221, | |
| "learning_rate": 6.0200404775524715e-06, | |
| "loss": 0.3379, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31704598665237427, | |
| "step": 2895, | |
| "valid_targets_mean": 2154.6, | |
| "valid_targets_min": 263 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "grad_norm": 0.7360062660104016, | |
| "learning_rate": 5.9536254006754155e-06, | |
| "loss": 0.3272, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3256540298461914, | |
| "step": 2900, | |
| "valid_targets_mean": 1833.4, | |
| "valid_targets_min": 312 | |
| }, | |
| { | |
| "epoch": 4.648, | |
| "grad_norm": 0.7158972606975016, | |
| "learning_rate": 5.887514591365593e-06, | |
| "loss": 0.3221, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3094181418418884, | |
| "step": 2905, | |
| "valid_targets_mean": 1872.8, | |
| "valid_targets_min": 377 | |
| }, | |
| { | |
| "epoch": 4.656, | |
| "grad_norm": 0.709391212865319, | |
| "learning_rate": 5.821709481691798e-06, | |
| "loss": 0.3408, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3378103971481323, | |
| "step": 2910, | |
| "valid_targets_mean": 2036.4, | |
| "valid_targets_min": 463 | |
| }, | |
| { | |
| "epoch": 4.664, | |
| "grad_norm": 0.6871157747889591, | |
| "learning_rate": 5.75621149710091e-06, | |
| "loss": 0.3398, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3434550166130066, | |
| "step": 2915, | |
| "valid_targets_mean": 2244.7, | |
| "valid_targets_min": 634 | |
| }, | |
| { | |
| "epoch": 4.672, | |
| "grad_norm": 0.6331228522669606, | |
| "learning_rate": 5.691022056386961e-06, | |
| "loss": 0.3142, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29856497049331665, | |
| "step": 2920, | |
| "valid_targets_mean": 2137.9, | |
| "valid_targets_min": 317 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "grad_norm": 0.6347565330283759, | |
| "learning_rate": 5.6261425716604136e-06, | |
| "loss": 0.3279, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3437310457229614, | |
| "step": 2925, | |
| "valid_targets_mean": 2575.1, | |
| "valid_targets_min": 324 | |
| }, | |
| { | |
| "epoch": 4.688, | |
| "grad_norm": 0.6799231986824172, | |
| "learning_rate": 5.56157444831757e-06, | |
| "loss": 0.3434, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3541477918624878, | |
| "step": 2930, | |
| "valid_targets_mean": 2470.2, | |
| "valid_targets_min": 234 | |
| }, | |
| { | |
| "epoch": 4.696, | |
| "grad_norm": 0.7837577906764602, | |
| "learning_rate": 5.4973190850101334e-06, | |
| "loss": 0.3393, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3557363748550415, | |
| "step": 2935, | |
| "valid_targets_mean": 1714.9, | |
| "valid_targets_min": 312 | |
| }, | |
| { | |
| "epoch": 4.704, | |
| "grad_norm": 0.7808964571892477, | |
| "learning_rate": 5.433377873614925e-06, | |
| "loss": 0.3554, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34845733642578125, | |
| "step": 2940, | |
| "valid_targets_mean": 1640.8, | |
| "valid_targets_min": 392 | |
| }, | |
| { | |
| "epoch": 4.712, | |
| "grad_norm": 0.7463159877230825, | |
| "learning_rate": 5.369752199203702e-06, | |
| "loss": 0.3521, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3448995053768158, | |
| "step": 2945, | |
| "valid_targets_mean": 1826.3, | |
| "valid_targets_min": 306 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "grad_norm": 0.7239910602208591, | |
| "learning_rate": 5.306443440013171e-06, | |
| "loss": 0.338, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3213045001029968, | |
| "step": 2950, | |
| "valid_targets_mean": 2073.1, | |
| "valid_targets_min": 279 | |
| }, | |
| { | |
| "epoch": 4.728, | |
| "grad_norm": 0.697542846280569, | |
| "learning_rate": 5.243452967415155e-06, | |
| "loss": 0.3398, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.368432879447937, | |
| "step": 2955, | |
| "valid_targets_mean": 2343.1, | |
| "valid_targets_min": 340 | |
| }, | |
| { | |
| "epoch": 4.736, | |
| "grad_norm": 0.6726585942037008, | |
| "learning_rate": 5.180782145886846e-06, | |
| "loss": 0.3363, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32481229305267334, | |
| "step": 2960, | |
| "valid_targets_mean": 2228.3, | |
| "valid_targets_min": 319 | |
| }, | |
| { | |
| "epoch": 4.744, | |
| "grad_norm": 0.6841328015946316, | |
| "learning_rate": 5.118432332981273e-06, | |
| "loss": 0.3177, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34120649099349976, | |
| "step": 2965, | |
| "valid_targets_mean": 1947.8, | |
| "valid_targets_min": 294 | |
| }, | |
| { | |
| "epoch": 4.752, | |
| "grad_norm": 0.7312881604013325, | |
| "learning_rate": 5.056404879297887e-06, | |
| "loss": 0.334, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3170173466205597, | |
| "step": 2970, | |
| "valid_targets_mean": 1949.8, | |
| "valid_targets_min": 405 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "grad_norm": 0.7224196884926505, | |
| "learning_rate": 4.994701128453325e-06, | |
| "loss": 0.3332, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3099290728569031, | |
| "step": 2975, | |
| "valid_targets_mean": 2041.4, | |
| "valid_targets_min": 343 | |
| }, | |
| { | |
| "epoch": 4.768, | |
| "grad_norm": 0.6589153983091567, | |
| "learning_rate": 4.933322417052269e-06, | |
| "loss": 0.313, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2728843688964844, | |
| "step": 2980, | |
| "valid_targets_mean": 2069.7, | |
| "valid_targets_min": 305 | |
| }, | |
| { | |
| "epoch": 4.776, | |
| "grad_norm": 0.769835811513428, | |
| "learning_rate": 4.8722700746585135e-06, | |
| "loss": 0.3583, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3480873703956604, | |
| "step": 2985, | |
| "valid_targets_mean": 1801.6, | |
| "valid_targets_min": 322 | |
| }, | |
| { | |
| "epoch": 4.784, | |
| "grad_norm": 0.7192175550424258, | |
| "learning_rate": 4.811545423766184e-06, | |
| "loss": 0.3498, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35846394300460815, | |
| "step": 2990, | |
| "valid_targets_mean": 2009.8, | |
| "valid_targets_min": 294 | |
| }, | |
| { | |
| "epoch": 4.792, | |
| "grad_norm": 0.6047543622315156, | |
| "learning_rate": 4.75114977977104e-06, | |
| "loss": 0.3399, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33460450172424316, | |
| "step": 2995, | |
| "valid_targets_mean": 2401.3, | |
| "valid_targets_min": 289 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "grad_norm": 0.6041136581551483, | |
| "learning_rate": 4.691084450942047e-06, | |
| "loss": 0.3341, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2866661548614502, | |
| "step": 3000, | |
| "valid_targets_mean": 2317.4, | |
| "valid_targets_min": 335 | |
| }, | |
| { | |
| "epoch": 4.808, | |
| "grad_norm": 0.57423011099845, | |
| "learning_rate": 4.631350738392955e-06, | |
| "loss": 0.3357, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3241468667984009, | |
| "step": 3005, | |
| "valid_targets_mean": 2535.6, | |
| "valid_targets_min": 287 | |
| }, | |
| { | |
| "epoch": 4.816, | |
| "grad_norm": 0.8874875694801316, | |
| "learning_rate": 4.571949936054197e-06, | |
| "loss": 0.3496, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3535746932029724, | |
| "step": 3010, | |
| "valid_targets_mean": 1403.2, | |
| "valid_targets_min": 308 | |
| }, | |
| { | |
| "epoch": 4.824, | |
| "grad_norm": 0.7524222098226543, | |
| "learning_rate": 4.512883330644815e-06, | |
| "loss": 0.3405, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.342057466506958, | |
| "step": 3015, | |
| "valid_targets_mean": 1874.9, | |
| "valid_targets_min": 381 | |
| }, | |
| { | |
| "epoch": 4.832, | |
| "grad_norm": 0.655589657985156, | |
| "learning_rate": 4.454152201644591e-06, | |
| "loss": 0.3339, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32880115509033203, | |
| "step": 3020, | |
| "valid_targets_mean": 2234.2, | |
| "valid_targets_min": 297 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "grad_norm": 0.6461768668422677, | |
| "learning_rate": 4.395757821266333e-06, | |
| "loss": 0.3356, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32608258724212646, | |
| "step": 3025, | |
| "valid_targets_mean": 2179.1, | |
| "valid_targets_min": 462 | |
| }, | |
| { | |
| "epoch": 4.848, | |
| "grad_norm": 0.6649863833814916, | |
| "learning_rate": 4.337701454428322e-06, | |
| "loss": 0.3451, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3342806398868561, | |
| "step": 3030, | |
| "valid_targets_mean": 2163.1, | |
| "valid_targets_min": 382 | |
| }, | |
| { | |
| "epoch": 4.856, | |
| "grad_norm": 0.6098198423480345, | |
| "learning_rate": 4.279984358726925e-06, | |
| "loss": 0.3104, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3045882284641266, | |
| "step": 3035, | |
| "valid_targets_mean": 2482.1, | |
| "valid_targets_min": 323 | |
| }, | |
| { | |
| "epoch": 4.864, | |
| "grad_norm": 0.6783471430711661, | |
| "learning_rate": 4.2226077844093205e-06, | |
| "loss": 0.325, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3344470262527466, | |
| "step": 3040, | |
| "valid_targets_mean": 2026.6, | |
| "valid_targets_min": 328 | |
| }, | |
| { | |
| "epoch": 4.872, | |
| "grad_norm": 0.7271051798429343, | |
| "learning_rate": 4.165572974346435e-06, | |
| "loss": 0.3406, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3305470049381256, | |
| "step": 3045, | |
| "valid_targets_mean": 1882.9, | |
| "valid_targets_min": 440 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "grad_norm": 0.7064278926259979, | |
| "learning_rate": 4.108881164006033e-06, | |
| "loss": 0.3355, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33677276968955994, | |
| "step": 3050, | |
| "valid_targets_mean": 2011.0, | |
| "valid_targets_min": 298 | |
| }, | |
| { | |
| "epoch": 4.888, | |
| "grad_norm": 0.7670687816988632, | |
| "learning_rate": 4.05253358142593e-06, | |
| "loss": 0.3505, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38650834560394287, | |
| "step": 3055, | |
| "valid_targets_mean": 1712.5, | |
| "valid_targets_min": 224 | |
| }, | |
| { | |
| "epoch": 4.896, | |
| "grad_norm": 0.823254864302537, | |
| "learning_rate": 3.9965314471874035e-06, | |
| "loss": 0.3437, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3724706768989563, | |
| "step": 3060, | |
| "valid_targets_mean": 1539.1, | |
| "valid_targets_min": 250 | |
| }, | |
| { | |
| "epoch": 4.904, | |
| "grad_norm": 0.7622151752672579, | |
| "learning_rate": 3.940875974388749e-06, | |
| "loss": 0.3517, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3494607210159302, | |
| "step": 3065, | |
| "valid_targets_mean": 1794.1, | |
| "valid_targets_min": 285 | |
| }, | |
| { | |
| "epoch": 4.912, | |
| "grad_norm": 0.7616459647662795, | |
| "learning_rate": 3.885568368619013e-06, | |
| "loss": 0.3474, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36004048585891724, | |
| "step": 3070, | |
| "valid_targets_mean": 1942.1, | |
| "valid_targets_min": 259 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "grad_norm": 0.7405269574107914, | |
| "learning_rate": 3.830609827931877e-06, | |
| "loss": 0.346, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33390575647354126, | |
| "step": 3075, | |
| "valid_targets_mean": 1956.4, | |
| "valid_targets_min": 365 | |
| }, | |
| { | |
| "epoch": 4.928, | |
| "grad_norm": 0.629174699341205, | |
| "learning_rate": 3.7760015428196694e-06, | |
| "loss": 0.3382, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33517584204673767, | |
| "step": 3080, | |
| "valid_targets_mean": 2459.3, | |
| "valid_targets_min": 304 | |
| }, | |
| { | |
| "epoch": 4.936, | |
| "grad_norm": 0.7612521654496467, | |
| "learning_rate": 3.7217446961876413e-06, | |
| "loss": 0.3418, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3390304446220398, | |
| "step": 3085, | |
| "valid_targets_mean": 1742.9, | |
| "valid_targets_min": 428 | |
| }, | |
| { | |
| "epoch": 4.944, | |
| "grad_norm": 0.8311235874235049, | |
| "learning_rate": 3.6678404633282826e-06, | |
| "loss": 0.3384, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35949212312698364, | |
| "step": 3090, | |
| "valid_targets_mean": 1668.8, | |
| "valid_targets_min": 306 | |
| }, | |
| { | |
| "epoch": 4.952, | |
| "grad_norm": 0.6704197389248887, | |
| "learning_rate": 3.6142900118959158e-06, | |
| "loss": 0.3463, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3350984454154968, | |
| "step": 3095, | |
| "valid_targets_mean": 2150.2, | |
| "valid_targets_min": 479 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "grad_norm": 0.6088463951252256, | |
| "learning_rate": 3.561094501881339e-06, | |
| "loss": 0.3249, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3589388430118561, | |
| "step": 3100, | |
| "valid_targets_mean": 2522.9, | |
| "valid_targets_min": 307 | |
| }, | |
| { | |
| "epoch": 4.968, | |
| "grad_norm": 0.886349784805225, | |
| "learning_rate": 3.5082550855867693e-06, | |
| "loss": 0.3571, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3680537939071655, | |
| "step": 3105, | |
| "valid_targets_mean": 1367.1, | |
| "valid_targets_min": 295 | |
| }, | |
| { | |
| "epoch": 4.976, | |
| "grad_norm": 0.7571526497670212, | |
| "learning_rate": 3.455772907600841e-06, | |
| "loss": 0.3519, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.314603716135025, | |
| "step": 3110, | |
| "valid_targets_mean": 1763.2, | |
| "valid_targets_min": 370 | |
| }, | |
| { | |
| "epoch": 4.984, | |
| "grad_norm": 0.6678147975519791, | |
| "learning_rate": 3.4036491047738075e-06, | |
| "loss": 0.3298, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3356750011444092, | |
| "step": 3115, | |
| "valid_targets_mean": 2106.9, | |
| "valid_targets_min": 260 | |
| }, | |
| { | |
| "epoch": 4.992, | |
| "grad_norm": 0.7096651863010865, | |
| "learning_rate": 3.351884806192933e-06, | |
| "loss": 0.3398, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3487074673175812, | |
| "step": 3120, | |
| "valid_targets_mean": 1872.3, | |
| "valid_targets_min": 282 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.801434104766138, | |
| "learning_rate": 3.3004811331580268e-06, | |
| "loss": 0.3541, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3651238679885864, | |
| "step": 3125, | |
| "valid_targets_mean": 1574.1, | |
| "valid_targets_min": 271 | |
| }, | |
| { | |
| "epoch": 5.008, | |
| "grad_norm": 0.6168166311695106, | |
| "learning_rate": 3.249439199157167e-06, | |
| "loss": 0.3318, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.316333532333374, | |
| "step": 3130, | |
| "valid_targets_mean": 2457.4, | |
| "valid_targets_min": 479 | |
| }, | |
| { | |
| "epoch": 5.016, | |
| "grad_norm": 0.8095895502737311, | |
| "learning_rate": 3.198760109842558e-06, | |
| "loss": 0.3418, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3591688275337219, | |
| "step": 3135, | |
| "valid_targets_mean": 1600.9, | |
| "valid_targets_min": 283 | |
| }, | |
| { | |
| "epoch": 5.024, | |
| "grad_norm": 0.7458829862809224, | |
| "learning_rate": 3.1484449630065894e-06, | |
| "loss": 0.3523, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37618887424468994, | |
| "step": 3140, | |
| "valid_targets_mean": 1904.2, | |
| "valid_targets_min": 323 | |
| }, | |
| { | |
| "epoch": 5.032, | |
| "grad_norm": 0.7176506812423299, | |
| "learning_rate": 3.0984948485580736e-06, | |
| "loss": 0.3208, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3061801493167877, | |
| "step": 3145, | |
| "valid_targets_mean": 2161.0, | |
| "valid_targets_min": 261 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "grad_norm": 0.7807096768279923, | |
| "learning_rate": 3.048910848498605e-06, | |
| "loss": 0.3195, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32322657108306885, | |
| "step": 3150, | |
| "valid_targets_mean": 1788.6, | |
| "valid_targets_min": 374 | |
| }, | |
| { | |
| "epoch": 5.048, | |
| "grad_norm": 0.6540428821985736, | |
| "learning_rate": 2.9996940368991477e-06, | |
| "loss": 0.3182, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30878007411956787, | |
| "step": 3155, | |
| "valid_targets_mean": 2372.5, | |
| "valid_targets_min": 635 | |
| }, | |
| { | |
| "epoch": 5.056, | |
| "grad_norm": 0.7062110572059693, | |
| "learning_rate": 2.9508454798767516e-06, | |
| "loss": 0.335, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31720206141471863, | |
| "step": 3160, | |
| "valid_targets_mean": 2043.1, | |
| "valid_targets_min": 305 | |
| }, | |
| { | |
| "epoch": 5.064, | |
| "grad_norm": 0.6884128837476633, | |
| "learning_rate": 2.9023662355714766e-06, | |
| "loss": 0.322, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31560999155044556, | |
| "step": 3165, | |
| "valid_targets_mean": 2201.7, | |
| "valid_targets_min": 289 | |
| }, | |
| { | |
| "epoch": 5.072, | |
| "grad_norm": 0.9513676887383257, | |
| "learning_rate": 2.8542573541234707e-06, | |
| "loss": 0.3494, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34736162424087524, | |
| "step": 3170, | |
| "valid_targets_mean": 1400.4, | |
| "valid_targets_min": 333 | |
| }, | |
| { | |
| "epoch": 5.08, | |
| "grad_norm": 0.7583194020113082, | |
| "learning_rate": 2.80651987765018e-06, | |
| "loss": 0.3435, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3454587459564209, | |
| "step": 3175, | |
| "valid_targets_mean": 1794.6, | |
| "valid_targets_min": 330 | |
| }, | |
| { | |
| "epoch": 5.088, | |
| "grad_norm": 0.7751252415284927, | |
| "learning_rate": 2.759154840223843e-06, | |
| "loss": 0.318, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32245928049087524, | |
| "step": 3180, | |
| "valid_targets_mean": 1767.7, | |
| "valid_targets_min": 274 | |
| }, | |
| { | |
| "epoch": 5.096, | |
| "grad_norm": 0.678029128738721, | |
| "learning_rate": 2.7121632678490327e-06, | |
| "loss": 0.3382, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3460327386856079, | |
| "step": 3185, | |
| "valid_targets_mean": 2288.6, | |
| "valid_targets_min": 234 | |
| }, | |
| { | |
| "epoch": 5.104, | |
| "grad_norm": 0.8230802127344796, | |
| "learning_rate": 2.6655461784404768e-06, | |
| "loss": 0.325, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2984952926635742, | |
| "step": 3190, | |
| "valid_targets_mean": 1525.9, | |
| "valid_targets_min": 378 | |
| }, | |
| { | |
| "epoch": 5.112, | |
| "grad_norm": 0.6260791419331155, | |
| "learning_rate": 2.6193045818009654e-06, | |
| "loss": 0.3231, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33746832609176636, | |
| "step": 3195, | |
| "valid_targets_mean": 2805.3, | |
| "valid_targets_min": 809 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "grad_norm": 0.7745724962578587, | |
| "learning_rate": 2.5734394795995066e-06, | |
| "loss": 0.3169, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29701170325279236, | |
| "step": 3200, | |
| "valid_targets_mean": 1686.8, | |
| "valid_targets_min": 354 | |
| }, | |
| { | |
| "epoch": 5.128, | |
| "grad_norm": 0.764373687743754, | |
| "learning_rate": 2.5279518653496272e-06, | |
| "loss": 0.3427, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3417860269546509, | |
| "step": 3205, | |
| "valid_targets_mean": 1898.5, | |
| "valid_targets_min": 342 | |
| }, | |
| { | |
| "epoch": 5.136, | |
| "grad_norm": 0.5913178857112821, | |
| "learning_rate": 2.4828427243878307e-06, | |
| "loss": 0.3258, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3062329888343811, | |
| "step": 3210, | |
| "valid_targets_mean": 2879.6, | |
| "valid_targets_min": 323 | |
| }, | |
| { | |
| "epoch": 5.144, | |
| "grad_norm": 0.7000971424716644, | |
| "learning_rate": 2.4381130338522762e-06, | |
| "loss": 0.3112, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3380877375602722, | |
| "step": 3215, | |
| "valid_targets_mean": 2145.3, | |
| "valid_targets_min": 257 | |
| }, | |
| { | |
| "epoch": 5.152, | |
| "grad_norm": 0.8785549490261971, | |
| "learning_rate": 2.393763762661596e-06, | |
| "loss": 0.3527, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3691244125366211, | |
| "step": 3220, | |
| "valid_targets_mean": 1531.1, | |
| "valid_targets_min": 272 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "grad_norm": 0.6881987970742812, | |
| "learning_rate": 2.349795871493925e-06, | |
| "loss": 0.313, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.345796674489975, | |
| "step": 3225, | |
| "valid_targets_mean": 2233.5, | |
| "valid_targets_min": 222 | |
| }, | |
| { | |
| "epoch": 5.168, | |
| "grad_norm": 0.7440524019380678, | |
| "learning_rate": 2.3062103127660686e-06, | |
| "loss": 0.343, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3765197992324829, | |
| "step": 3230, | |
| "valid_targets_mean": 1903.1, | |
| "valid_targets_min": 340 | |
| }, | |
| { | |
| "epoch": 5.176, | |
| "grad_norm": 0.7116226874145382, | |
| "learning_rate": 2.2630080306128833e-06, | |
| "loss": 0.3361, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3360140323638916, | |
| "step": 3235, | |
| "valid_targets_mean": 2187.1, | |
| "valid_targets_min": 322 | |
| }, | |
| { | |
| "epoch": 5.184, | |
| "grad_norm": 0.7157353294285637, | |
| "learning_rate": 2.2201899608668365e-06, | |
| "loss": 0.3201, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3292391002178192, | |
| "step": 3240, | |
| "valid_targets_mean": 1996.5, | |
| "valid_targets_min": 289 | |
| }, | |
| { | |
| "epoch": 5.192, | |
| "grad_norm": 0.8075659035374775, | |
| "learning_rate": 2.1777570310377084e-06, | |
| "loss": 0.3604, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.353488951921463, | |
| "step": 3245, | |
| "valid_targets_mean": 1800.3, | |
| "valid_targets_min": 225 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "grad_norm": 0.8837596241034232, | |
| "learning_rate": 2.1357101602925323e-06, | |
| "loss": 0.3304, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3177199363708496, | |
| "step": 3250, | |
| "valid_targets_mean": 1669.4, | |
| "valid_targets_min": 271 | |
| }, | |
| { | |
| "epoch": 5.208, | |
| "grad_norm": 0.8592757152496271, | |
| "learning_rate": 2.0940502594356427e-06, | |
| "loss": 0.3424, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34793126583099365, | |
| "step": 3255, | |
| "valid_targets_mean": 1629.4, | |
| "valid_targets_min": 278 | |
| }, | |
| { | |
| "epoch": 5.216, | |
| "grad_norm": 0.6068332606006058, | |
| "learning_rate": 2.052778230888994e-06, | |
| "loss": 0.3176, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27529823780059814, | |
| "step": 3260, | |
| "valid_targets_mean": 2799.7, | |
| "valid_targets_min": 437 | |
| }, | |
| { | |
| "epoch": 5.224, | |
| "grad_norm": 0.6723133171431055, | |
| "learning_rate": 2.0118949686725786e-06, | |
| "loss": 0.325, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2843213677406311, | |
| "step": 3265, | |
| "valid_targets_mean": 2273.4, | |
| "valid_targets_min": 460 | |
| }, | |
| { | |
| "epoch": 5.232, | |
| "grad_norm": 0.5740122113088663, | |
| "learning_rate": 1.971401358385072e-06, | |
| "loss": 0.3163, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3310263752937317, | |
| "step": 3270, | |
| "valid_targets_mean": 2962.2, | |
| "valid_targets_min": 352 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "grad_norm": 0.6047834085752591, | |
| "learning_rate": 1.9312982771846435e-06, | |
| "loss": 0.3234, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.292560875415802, | |
| "step": 3275, | |
| "valid_targets_mean": 2475.9, | |
| "valid_targets_min": 443 | |
| }, | |
| { | |
| "epoch": 5.248, | |
| "grad_norm": 0.7123082387690532, | |
| "learning_rate": 1.8915865937699652e-06, | |
| "loss": 0.3396, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3306275010108948, | |
| "step": 3280, | |
| "valid_targets_mean": 2137.8, | |
| "valid_targets_min": 368 | |
| }, | |
| { | |
| "epoch": 5.256, | |
| "grad_norm": 0.8544328983250468, | |
| "learning_rate": 1.8522671683613946e-06, | |
| "loss": 0.3384, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.336727499961853, | |
| "step": 3285, | |
| "valid_targets_mean": 1707.2, | |
| "valid_targets_min": 343 | |
| }, | |
| { | |
| "epoch": 5.264, | |
| "grad_norm": 0.7484704147015101, | |
| "learning_rate": 1.8133408526823283e-06, | |
| "loss": 0.3374, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3111442029476166, | |
| "step": 3290, | |
| "valid_targets_mean": 2043.2, | |
| "valid_targets_min": 283 | |
| }, | |
| { | |
| "epoch": 5.272, | |
| "grad_norm": 0.7559826239212142, | |
| "learning_rate": 1.7748084899407558e-06, | |
| "loss": 0.3275, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3079603314399719, | |
| "step": 3295, | |
| "valid_targets_mean": 2453.8, | |
| "valid_targets_min": 566 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "grad_norm": 0.9368604683494061, | |
| "learning_rate": 1.7366709148110118e-06, | |
| "loss": 0.3439, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3560614585876465, | |
| "step": 3300, | |
| "valid_targets_mean": 1434.8, | |
| "valid_targets_min": 276 | |
| }, | |
| { | |
| "epoch": 5.288, | |
| "grad_norm": 0.692979552830229, | |
| "learning_rate": 1.698928953415675e-06, | |
| "loss": 0.3228, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.292755126953125, | |
| "step": 3305, | |
| "valid_targets_mean": 2236.2, | |
| "valid_targets_min": 391 | |
| }, | |
| { | |
| "epoch": 5.296, | |
| "grad_norm": 0.9674463953650514, | |
| "learning_rate": 1.6615834233076756e-06, | |
| "loss": 0.3281, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.335943341255188, | |
| "step": 3310, | |
| "valid_targets_mean": 1242.4, | |
| "valid_targets_min": 289 | |
| }, | |
| { | |
| "epoch": 5.304, | |
| "grad_norm": 0.8478280677057598, | |
| "learning_rate": 1.6246351334525944e-06, | |
| "loss": 0.3411, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3230572044849396, | |
| "step": 3315, | |
| "valid_targets_mean": 1569.6, | |
| "valid_targets_min": 307 | |
| }, | |
| { | |
| "epoch": 5.312, | |
| "grad_norm": 0.8876053781501144, | |
| "learning_rate": 1.5880848842111362e-06, | |
| "loss": 0.3246, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32399290800094604, | |
| "step": 3320, | |
| "valid_targets_mean": 1403.8, | |
| "valid_targets_min": 261 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "grad_norm": 0.6632377237032445, | |
| "learning_rate": 1.5519334673218023e-06, | |
| "loss": 0.3429, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34573253989219666, | |
| "step": 3325, | |
| "valid_targets_mean": 2316.0, | |
| "valid_targets_min": 452 | |
| }, | |
| { | |
| "epoch": 5.328, | |
| "grad_norm": 0.8801648422040877, | |
| "learning_rate": 1.5161816658837002e-06, | |
| "loss": 0.3289, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3267403244972229, | |
| "step": 3330, | |
| "valid_targets_mean": 1425.0, | |
| "valid_targets_min": 351 | |
| }, | |
| { | |
| "epoch": 5.336, | |
| "grad_norm": 0.7714577400348394, | |
| "learning_rate": 1.4808302543396423e-06, | |
| "loss": 0.3307, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3234866261482239, | |
| "step": 3335, | |
| "valid_targets_mean": 1636.6, | |
| "valid_targets_min": 263 | |
| }, | |
| { | |
| "epoch": 5.344, | |
| "grad_norm": 0.642035554386149, | |
| "learning_rate": 1.445879998459314e-06, | |
| "loss": 0.3205, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3185698688030243, | |
| "step": 3340, | |
| "valid_targets_mean": 2389.4, | |
| "valid_targets_min": 293 | |
| }, | |
| { | |
| "epoch": 5.352, | |
| "grad_norm": 0.7958677187406809, | |
| "learning_rate": 1.4113316553227296e-06, | |
| "loss": 0.3384, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37837621569633484, | |
| "step": 3345, | |
| "valid_targets_mean": 2085.4, | |
| "valid_targets_min": 376 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "grad_norm": 0.8254616003100667, | |
| "learning_rate": 1.3771859733037896e-06, | |
| "loss": 0.3316, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34857773780822754, | |
| "step": 3350, | |
| "valid_targets_mean": 1714.2, | |
| "valid_targets_min": 274 | |
| }, | |
| { | |
| "epoch": 5.368, | |
| "grad_norm": 0.6807508098199462, | |
| "learning_rate": 1.3434436920541072e-06, | |
| "loss": 0.3532, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3561273515224457, | |
| "step": 3355, | |
| "valid_targets_mean": 2241.6, | |
| "valid_targets_min": 386 | |
| }, | |
| { | |
| "epoch": 5.376, | |
| "grad_norm": 0.7880738724594502, | |
| "learning_rate": 1.3101055424869768e-06, | |
| "loss": 0.3253, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3386726677417755, | |
| "step": 3360, | |
| "valid_targets_mean": 1655.6, | |
| "valid_targets_min": 341 | |
| }, | |
| { | |
| "epoch": 5.384, | |
| "grad_norm": 0.8044155112355199, | |
| "learning_rate": 1.2771722467615266e-06, | |
| "loss": 0.3361, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34906601905822754, | |
| "step": 3365, | |
| "valid_targets_mean": 1888.2, | |
| "valid_targets_min": 311 | |
| }, | |
| { | |
| "epoch": 5.392, | |
| "grad_norm": 0.946556302943149, | |
| "learning_rate": 1.2446445182670818e-06, | |
| "loss": 0.3231, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33505770564079285, | |
| "step": 3370, | |
| "valid_targets_mean": 1463.1, | |
| "valid_targets_min": 241 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "grad_norm": 0.6668652899131327, | |
| "learning_rate": 1.21252306160772e-06, | |
| "loss": 0.3273, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35054919123649597, | |
| "step": 3375, | |
| "valid_targets_mean": 2484.1, | |
| "valid_targets_min": 567 | |
| }, | |
| { | |
| "epoch": 5.408, | |
| "grad_norm": 1.1006169555817005, | |
| "learning_rate": 1.1808085725870088e-06, | |
| "loss": 0.3374, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3627769947052002, | |
| "step": 3380, | |
| "valid_targets_mean": 942.7, | |
| "valid_targets_min": 244 | |
| }, | |
| { | |
| "epoch": 5.416, | |
| "grad_norm": 0.710177773783025, | |
| "learning_rate": 1.1495017381929197e-06, | |
| "loss": 0.3348, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3235899806022644, | |
| "step": 3385, | |
| "valid_targets_mean": 2099.7, | |
| "valid_targets_min": 389 | |
| }, | |
| { | |
| "epoch": 5.424, | |
| "grad_norm": 0.6360542443344822, | |
| "learning_rate": 1.1186032365829514e-06, | |
| "loss": 0.3141, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2974139451980591, | |
| "step": 3390, | |
| "valid_targets_mean": 2257.2, | |
| "valid_targets_min": 335 | |
| }, | |
| { | |
| "epoch": 5.432, | |
| "grad_norm": 0.7700543404194444, | |
| "learning_rate": 1.088113737069456e-06, | |
| "loss": 0.3457, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3570263981819153, | |
| "step": 3395, | |
| "valid_targets_mean": 1822.6, | |
| "valid_targets_min": 280 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "grad_norm": 0.9275216639277493, | |
| "learning_rate": 1.0580339001051153e-06, | |
| "loss": 0.3361, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35213181376457214, | |
| "step": 3400, | |
| "valid_targets_mean": 1528.0, | |
| "valid_targets_min": 358 | |
| }, | |
| { | |
| "epoch": 5.448, | |
| "grad_norm": 0.789841574621178, | |
| "learning_rate": 1.0283643772686535e-06, | |
| "loss": 0.3331, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33587008714675903, | |
| "step": 3405, | |
| "valid_targets_mean": 1687.8, | |
| "valid_targets_min": 279 | |
| }, | |
| { | |
| "epoch": 5.456, | |
| "grad_norm": 0.7625043418529229, | |
| "learning_rate": 9.991058112507113e-07, | |
| "loss": 0.3285, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3387317657470703, | |
| "step": 3410, | |
| "valid_targets_mean": 1853.2, | |
| "valid_targets_min": 336 | |
| }, | |
| { | |
| "epoch": 5.464, | |
| "grad_norm": 0.7262108890341252, | |
| "learning_rate": 9.702588358399345e-07, | |
| "loss": 0.3383, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35995280742645264, | |
| "step": 3415, | |
| "valid_targets_mean": 2125.7, | |
| "valid_targets_min": 263 | |
| }, | |
| { | |
| "epoch": 5.4719999999999995, | |
| "grad_norm": 0.8471611948426642, | |
| "learning_rate": 9.418240759092434e-07, | |
| "loss": 0.3292, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3054879307746887, | |
| "step": 3420, | |
| "valid_targets_mean": 1520.5, | |
| "valid_targets_min": 360 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "grad_norm": 0.6778318495071975, | |
| "learning_rate": 9.138021474022763e-07, | |
| "loss": 0.3233, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2818812429904938, | |
| "step": 3425, | |
| "valid_targets_mean": 2094.9, | |
| "valid_targets_min": 284 | |
| }, | |
| { | |
| "epoch": 5.4879999999999995, | |
| "grad_norm": 0.6708692528522718, | |
| "learning_rate": 8.861936573200825e-07, | |
| "loss": 0.3402, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32221177220344543, | |
| "step": 3430, | |
| "valid_targets_mean": 2342.2, | |
| "valid_targets_min": 297 | |
| }, | |
| { | |
| "epoch": 5.496, | |
| "grad_norm": 0.6532748349469135, | |
| "learning_rate": 8.58999203707942e-07, | |
| "loss": 0.329, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30228191614151, | |
| "step": 3435, | |
| "valid_targets_mean": 2445.2, | |
| "valid_targets_min": 404 | |
| }, | |
| { | |
| "epoch": 5.504, | |
| "grad_norm": 0.5726081781167237, | |
| "learning_rate": 8.322193756424401e-07, | |
| "loss": 0.322, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2853536605834961, | |
| "step": 3440, | |
| "valid_targets_mean": 2608.8, | |
| "valid_targets_min": 316 | |
| }, | |
| { | |
| "epoch": 5.5120000000000005, | |
| "grad_norm": 0.6241256031339275, | |
| "learning_rate": 8.058547532186667e-07, | |
| "loss": 0.3221, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28844356536865234, | |
| "step": 3445, | |
| "valid_targets_mean": 2246.0, | |
| "valid_targets_min": 344 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "grad_norm": 0.620098378031259, | |
| "learning_rate": 7.799059075376991e-07, | |
| "loss": 0.3226, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32384443283081055, | |
| "step": 3450, | |
| "valid_targets_mean": 3012.6, | |
| "valid_targets_min": 528 | |
| }, | |
| { | |
| "epoch": 5.5280000000000005, | |
| "grad_norm": 0.7945148445758893, | |
| "learning_rate": 7.54373400694195e-07, | |
| "loss": 0.3234, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3543769121170044, | |
| "step": 3455, | |
| "valid_targets_mean": 2061.6, | |
| "valid_targets_min": 207 | |
| }, | |
| { | |
| "epoch": 5.536, | |
| "grad_norm": 0.6549048898714214, | |
| "learning_rate": 7.292577857642302e-07, | |
| "loss": 0.3306, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3170838952064514, | |
| "step": 3460, | |
| "valid_targets_mean": 2555.1, | |
| "valid_targets_min": 353 | |
| }, | |
| { | |
| "epoch": 5.5440000000000005, | |
| "grad_norm": 0.7943780052965277, | |
| "learning_rate": 7.045596067933158e-07, | |
| "loss": 0.3287, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34069475531578064, | |
| "step": 3465, | |
| "valid_targets_mean": 1780.8, | |
| "valid_targets_min": 485 | |
| }, | |
| { | |
| "epoch": 5.552, | |
| "grad_norm": 0.6612986111726853, | |
| "learning_rate": 6.80279398784609e-07, | |
| "loss": 0.3311, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.348349004983902, | |
| "step": 3470, | |
| "valid_targets_mean": 2754.0, | |
| "valid_targets_min": 306 | |
| }, | |
| { | |
| "epoch": 5.5600000000000005, | |
| "grad_norm": 0.7185356693695621, | |
| "learning_rate": 6.56417687687343e-07, | |
| "loss": 0.3301, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33669647574424744, | |
| "step": 3475, | |
| "valid_targets_mean": 2008.5, | |
| "valid_targets_min": 429 | |
| }, | |
| { | |
| "epoch": 5.568, | |
| "grad_norm": 0.6635770223121686, | |
| "learning_rate": 6.329749903854066e-07, | |
| "loss": 0.3111, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30377066135406494, | |
| "step": 3480, | |
| "valid_targets_mean": 2455.4, | |
| "valid_targets_min": 353 | |
| }, | |
| { | |
| "epoch": 5.576, | |
| "grad_norm": 0.731883116117695, | |
| "learning_rate": 6.099518146861628e-07, | |
| "loss": 0.3338, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3424713909626007, | |
| "step": 3485, | |
| "valid_targets_mean": 1975.4, | |
| "valid_targets_min": 323 | |
| }, | |
| { | |
| "epoch": 5.584, | |
| "grad_norm": 0.6529003283713056, | |
| "learning_rate": 5.873486593094546e-07, | |
| "loss": 0.3387, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3306881785392761, | |
| "step": 3490, | |
| "valid_targets_mean": 2475.8, | |
| "valid_targets_min": 416 | |
| }, | |
| { | |
| "epoch": 5.592, | |
| "grad_norm": 0.8033401606914047, | |
| "learning_rate": 5.651660138767834e-07, | |
| "loss": 0.3262, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31199198961257935, | |
| "step": 3495, | |
| "valid_targets_mean": 1727.5, | |
| "valid_targets_min": 280 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "grad_norm": 0.6888026517088008, | |
| "learning_rate": 5.434043589007232e-07, | |
| "loss": 0.3262, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3344615697860718, | |
| "step": 3500, | |
| "valid_targets_mean": 2408.3, | |
| "valid_targets_min": 318 | |
| }, | |
| { | |
| "epoch": 5.608, | |
| "grad_norm": 0.7167930643520513, | |
| "learning_rate": 5.220641657744963e-07, | |
| "loss": 0.3128, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.289031445980072, | |
| "step": 3505, | |
| "valid_targets_mean": 2380.4, | |
| "valid_targets_min": 343 | |
| }, | |
| { | |
| "epoch": 5.616, | |
| "grad_norm": 0.6650020725804082, | |
| "learning_rate": 5.0114589676177e-07, | |
| "loss": 0.3318, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3253558278083801, | |
| "step": 3510, | |
| "valid_targets_mean": 2200.4, | |
| "valid_targets_min": 262 | |
| }, | |
| { | |
| "epoch": 5.624, | |
| "grad_norm": 0.8321317810618367, | |
| "learning_rate": 4.806500049866492e-07, | |
| "loss": 0.3355, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3572784960269928, | |
| "step": 3515, | |
| "valid_targets_mean": 1833.8, | |
| "valid_targets_min": 483 | |
| }, | |
| { | |
| "epoch": 5.632, | |
| "grad_norm": 0.7107632033001783, | |
| "learning_rate": 4.6057693442383756e-07, | |
| "loss": 0.3165, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2704983651638031, | |
| "step": 3520, | |
| "valid_targets_mean": 2014.1, | |
| "valid_targets_min": 342 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "grad_norm": 0.6520521303095141, | |
| "learning_rate": 4.409271198890519e-07, | |
| "loss": 0.324, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31402522325515747, | |
| "step": 3525, | |
| "valid_targets_mean": 2245.9, | |
| "valid_targets_min": 377 | |
| }, | |
| { | |
| "epoch": 5.648, | |
| "grad_norm": 0.738454412849037, | |
| "learning_rate": 4.217009870295763e-07, | |
| "loss": 0.3437, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33777379989624023, | |
| "step": 3530, | |
| "valid_targets_mean": 1982.0, | |
| "valid_targets_min": 373 | |
| }, | |
| { | |
| "epoch": 5.656, | |
| "grad_norm": 0.7089086442983696, | |
| "learning_rate": 4.028989523150628e-07, | |
| "loss": 0.3166, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3117513656616211, | |
| "step": 3535, | |
| "valid_targets_mean": 2105.9, | |
| "valid_targets_min": 254 | |
| }, | |
| { | |
| "epoch": 5.664, | |
| "grad_norm": 0.5722406607187598, | |
| "learning_rate": 3.8452142302849216e-07, | |
| "loss": 0.3266, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27710044384002686, | |
| "step": 3540, | |
| "valid_targets_mean": 2860.7, | |
| "valid_targets_min": 263 | |
| }, | |
| { | |
| "epoch": 5.672, | |
| "grad_norm": 0.7556472335095206, | |
| "learning_rate": 3.665687972573606e-07, | |
| "loss": 0.3379, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30273568630218506, | |
| "step": 3545, | |
| "valid_targets_mean": 1655.9, | |
| "valid_targets_min": 259 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "grad_norm": 0.6632535758996941, | |
| "learning_rate": 3.4904146388506475e-07, | |
| "loss": 0.3226, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28801876306533813, | |
| "step": 3550, | |
| "valid_targets_mean": 2170.7, | |
| "valid_targets_min": 326 | |
| }, | |
| { | |
| "epoch": 5.688, | |
| "grad_norm": 0.8987750692265181, | |
| "learning_rate": 3.319398025824572e-07, | |
| "loss": 0.326, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30036014318466187, | |
| "step": 3555, | |
| "valid_targets_mean": 1281.9, | |
| "valid_targets_min": 271 | |
| }, | |
| { | |
| "epoch": 5.696, | |
| "grad_norm": 0.7550371265505398, | |
| "learning_rate": 3.152641837996373e-07, | |
| "loss": 0.3314, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3175736963748932, | |
| "step": 3560, | |
| "valid_targets_mean": 2092.7, | |
| "valid_targets_min": 320 | |
| }, | |
| { | |
| "epoch": 5.704, | |
| "grad_norm": 0.7741459072666987, | |
| "learning_rate": 2.990149687579247e-07, | |
| "loss": 0.335, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3476804792881012, | |
| "step": 3565, | |
| "valid_targets_mean": 1795.7, | |
| "valid_targets_min": 271 | |
| }, | |
| { | |
| "epoch": 5.712, | |
| "grad_norm": 0.7602774287821416, | |
| "learning_rate": 2.8319250944203625e-07, | |
| "loss": 0.3357, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34763944149017334, | |
| "step": 3570, | |
| "valid_targets_mean": 1917.4, | |
| "valid_targets_min": 299 | |
| }, | |
| { | |
| "epoch": 5.72, | |
| "grad_norm": 0.7742965493927609, | |
| "learning_rate": 2.677971485924502e-07, | |
| "loss": 0.3269, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30256009101867676, | |
| "step": 3575, | |
| "valid_targets_mean": 1715.3, | |
| "valid_targets_min": 288 | |
| }, | |
| { | |
| "epoch": 5.728, | |
| "grad_norm": 0.7300230601121387, | |
| "learning_rate": 2.52829219697992e-07, | |
| "loss": 0.3039, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30355215072631836, | |
| "step": 3580, | |
| "valid_targets_mean": 1757.9, | |
| "valid_targets_min": 267 | |
| }, | |
| { | |
| "epoch": 5.736, | |
| "grad_norm": 0.7762399783738475, | |
| "learning_rate": 2.3828904698861565e-07, | |
| "loss": 0.3188, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3432141840457916, | |
| "step": 3585, | |
| "valid_targets_mean": 1766.1, | |
| "valid_targets_min": 351 | |
| }, | |
| { | |
| "epoch": 5.744, | |
| "grad_norm": 0.6808736664760406, | |
| "learning_rate": 2.2417694542836489e-07, | |
| "loss": 0.3468, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3141390383243561, | |
| "step": 3590, | |
| "valid_targets_mean": 2099.9, | |
| "valid_targets_min": 278 | |
| }, | |
| { | |
| "epoch": 5.752, | |
| "grad_norm": 0.8148780711629424, | |
| "learning_rate": 2.104932207085586e-07, | |
| "loss": 0.3187, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3025011420249939, | |
| "step": 3595, | |
| "valid_targets_mean": 2257.2, | |
| "valid_targets_min": 364 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "grad_norm": 0.6423137397790368, | |
| "learning_rate": 1.97238169241174e-07, | |
| "loss": 0.3364, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3432384133338928, | |
| "step": 3600, | |
| "valid_targets_mean": 2428.4, | |
| "valid_targets_min": 252 | |
| }, | |
| { | |
| "epoch": 5.768, | |
| "grad_norm": 0.7426654171361444, | |
| "learning_rate": 1.8441207815241613e-07, | |
| "loss": 0.3186, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3106253147125244, | |
| "step": 3605, | |
| "valid_targets_mean": 2039.9, | |
| "valid_targets_min": 271 | |
| }, | |
| { | |
| "epoch": 5.776, | |
| "grad_norm": 0.6652373779338768, | |
| "learning_rate": 1.720152252765095e-07, | |
| "loss": 0.3252, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32659274339675903, | |
| "step": 3610, | |
| "valid_targets_mean": 2137.1, | |
| "valid_targets_min": 409 | |
| }, | |
| { | |
| "epoch": 5.784, | |
| "grad_norm": 0.6701232063134795, | |
| "learning_rate": 1.600478791496629e-07, | |
| "loss": 0.3178, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29657870531082153, | |
| "step": 3615, | |
| "valid_targets_mean": 2267.0, | |
| "valid_targets_min": 441 | |
| }, | |
| { | |
| "epoch": 5.792, | |
| "grad_norm": 0.7303502269080819, | |
| "learning_rate": 1.4851029900427415e-07, | |
| "loss": 0.3064, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3133362829685211, | |
| "step": 3620, | |
| "valid_targets_mean": 1803.5, | |
| "valid_targets_min": 238 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "grad_norm": 0.6533291656070508, | |
| "learning_rate": 1.3740273476329224e-07, | |
| "loss": 0.3158, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30309417843818665, | |
| "step": 3625, | |
| "valid_targets_mean": 2136.8, | |
| "valid_targets_min": 259 | |
| }, | |
| { | |
| "epoch": 5.808, | |
| "grad_norm": 0.7399739863973842, | |
| "learning_rate": 1.2672542703482616e-07, | |
| "loss": 0.3332, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30318236351013184, | |
| "step": 3630, | |
| "valid_targets_mean": 2139.0, | |
| "valid_targets_min": 494 | |
| }, | |
| { | |
| "epoch": 5.816, | |
| "grad_norm": 0.778385718165956, | |
| "learning_rate": 1.164786071069135e-07, | |
| "loss": 0.3257, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2864413559436798, | |
| "step": 3635, | |
| "valid_targets_mean": 1539.3, | |
| "valid_targets_min": 294 | |
| }, | |
| { | |
| "epoch": 5.824, | |
| "grad_norm": 0.8987433166924019, | |
| "learning_rate": 1.0666249694251785e-07, | |
| "loss": 0.3336, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.308305025100708, | |
| "step": 3640, | |
| "valid_targets_mean": 1533.8, | |
| "valid_targets_min": 374 | |
| }, | |
| { | |
| "epoch": 5.832, | |
| "grad_norm": 0.6927665376123957, | |
| "learning_rate": 9.72773091747281e-08, | |
| "loss": 0.3174, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3018300533294678, | |
| "step": 3645, | |
| "valid_targets_mean": 1903.2, | |
| "valid_targets_min": 265 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "grad_norm": 0.8298783342243384, | |
| "learning_rate": 8.832324710214002e-08, | |
| "loss": 0.3382, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35006678104400635, | |
| "step": 3650, | |
| "valid_targets_mean": 1798.3, | |
| "valid_targets_min": 334 | |
| }, | |
| { | |
| "epoch": 5.848, | |
| "grad_norm": 0.7622869191108226, | |
| "learning_rate": 7.980050468445744e-08, | |
| "loss": 0.336, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3575170338153839, | |
| "step": 3655, | |
| "valid_targets_mean": 1869.7, | |
| "valid_targets_min": 369 | |
| }, | |
| { | |
| "epoch": 5.856, | |
| "grad_norm": 0.8246638087544609, | |
| "learning_rate": 7.170926653829347e-08, | |
| "loss": 0.3574, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3515303134918213, | |
| "step": 3660, | |
| "valid_targets_mean": 1992.5, | |
| "valid_targets_min": 425 | |
| }, | |
| { | |
| "epoch": 5.864, | |
| "grad_norm": 0.6754002624217912, | |
| "learning_rate": 6.404970793317145e-08, | |
| "loss": 0.3261, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.330025851726532, | |
| "step": 3665, | |
| "valid_targets_mean": 2442.9, | |
| "valid_targets_min": 433 | |
| }, | |
| { | |
| "epoch": 5.872, | |
| "grad_norm": 0.6473983262842183, | |
| "learning_rate": 5.682199478772133e-08, | |
| "loss": 0.3331, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3028303384780884, | |
| "step": 3670, | |
| "valid_targets_mean": 2476.4, | |
| "valid_targets_min": 383 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "grad_norm": 0.7639363603266618, | |
| "learning_rate": 5.0026283666093635e-08, | |
| "loss": 0.3309, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33052030205726624, | |
| "step": 3675, | |
| "valid_targets_mean": 2025.9, | |
| "valid_targets_min": 372 | |
| }, | |
| { | |
| "epoch": 5.888, | |
| "grad_norm": 0.6294062682519683, | |
| "learning_rate": 4.366272177456665e-08, | |
| "loss": 0.332, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33227160573005676, | |
| "step": 3680, | |
| "valid_targets_mean": 2534.4, | |
| "valid_targets_min": 334 | |
| }, | |
| { | |
| "epoch": 5.896, | |
| "grad_norm": 0.5896518196906118, | |
| "learning_rate": 3.773144695834674e-08, | |
| "loss": 0.3028, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27254822850227356, | |
| "step": 3685, | |
| "valid_targets_mean": 2382.2, | |
| "valid_targets_min": 332 | |
| }, | |
| { | |
| "epoch": 5.904, | |
| "grad_norm": 0.7444979320101701, | |
| "learning_rate": 3.223258769860405e-08, | |
| "loss": 0.3259, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30232083797454834, | |
| "step": 3690, | |
| "valid_targets_mean": 2272.6, | |
| "valid_targets_min": 415 | |
| }, | |
| { | |
| "epoch": 5.912, | |
| "grad_norm": 0.649888311599965, | |
| "learning_rate": 2.716626310966808e-08, | |
| "loss": 0.3296, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3510351777076721, | |
| "step": 3695, | |
| "valid_targets_mean": 2522.5, | |
| "valid_targets_min": 390 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "grad_norm": 0.6752144213255896, | |
| "learning_rate": 2.253258293645866e-08, | |
| "loss": 0.3219, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3135150671005249, | |
| "step": 3700, | |
| "valid_targets_mean": 2086.2, | |
| "valid_targets_min": 274 | |
| }, | |
| { | |
| "epoch": 5.928, | |
| "grad_norm": 0.6630077719636371, | |
| "learning_rate": 1.8331647552110033e-08, | |
| "loss": 0.3286, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.313254177570343, | |
| "step": 3705, | |
| "valid_targets_mean": 2617.1, | |
| "valid_targets_min": 282 | |
| }, | |
| { | |
| "epoch": 5.936, | |
| "grad_norm": 0.7463183447088082, | |
| "learning_rate": 1.456354795578374e-08, | |
| "loss": 0.3228, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3249838352203369, | |
| "step": 3710, | |
| "valid_targets_mean": 1802.7, | |
| "valid_targets_min": 295 | |
| }, | |
| { | |
| "epoch": 5.944, | |
| "grad_norm": 0.7711320827925983, | |
| "learning_rate": 1.1228365770714622e-08, | |
| "loss": 0.346, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35792410373687744, | |
| "step": 3715, | |
| "valid_targets_mean": 1879.2, | |
| "valid_targets_min": 422 | |
| }, | |
| { | |
| "epoch": 5.952, | |
| "grad_norm": 0.6984324142077142, | |
| "learning_rate": 8.326173242432233e-09, | |
| "loss": 0.337, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3188132643699646, | |
| "step": 3720, | |
| "valid_targets_mean": 2298.1, | |
| "valid_targets_min": 435 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "grad_norm": 0.7136132209222245, | |
| "learning_rate": 5.857033237199883e-09, | |
| "loss": 0.3268, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3068236708641052, | |
| "step": 3725, | |
| "valid_targets_mean": 2063.9, | |
| "valid_targets_min": 441 | |
| }, | |
| { | |
| "epoch": 5.968, | |
| "grad_norm": 0.70950185567798, | |
| "learning_rate": 3.820999240644608e-09, | |
| "loss": 0.326, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3352380692958832, | |
| "step": 3730, | |
| "valid_targets_mean": 2178.1, | |
| "valid_targets_min": 292 | |
| }, | |
| { | |
| "epoch": 5.976, | |
| "grad_norm": 0.6878972735540901, | |
| "learning_rate": 2.2181153566158687e-09, | |
| "loss": 0.3276, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30763864517211914, | |
| "step": 3735, | |
| "valid_targets_mean": 2563.0, | |
| "valid_targets_min": 262 | |
| }, | |
| { | |
| "epoch": 5.984, | |
| "grad_norm": 0.7742897122777167, | |
| "learning_rate": 1.0484163062107755e-09, | |
| "loss": 0.3181, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32049447298049927, | |
| "step": 3740, | |
| "valid_targets_mean": 2073.6, | |
| "valid_targets_min": 330 | |
| }, | |
| { | |
| "epoch": 5.992, | |
| "grad_norm": 0.6597358357567812, | |
| "learning_rate": 3.11927427034675e-10, | |
| "loss": 0.3225, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3025015592575073, | |
| "step": 3745, | |
| "valid_targets_mean": 2095.5, | |
| "valid_targets_min": 282 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 0.7059371405305053, | |
| "learning_rate": 8.664672648261985e-12, | |
| "loss": 0.3141, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28063035011291504, | |
| "step": 3750, | |
| "valid_targets_mean": 1914.4, | |
| "valid_targets_min": 304 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28063035011291504, | |
| "step": 3750, | |
| "total_flos": 598298883194880.0, | |
| "train_loss": 0.39238279145558674, | |
| "train_runtime": 16274.8111, | |
| "train_samples_per_second": 3.685, | |
| "train_steps_per_second": 0.23, | |
| "valid_targets_mean": 1914.4, | |
| "valid_targets_min": 304 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 3750, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 6, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 598298883194880.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |