diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,8297 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 3752, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.009337068160597572, + "grad_norm": 11.842193000107052, + "learning_rate": 4.2553191489361704e-07, + "loss": 0.7252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39998501539230347, + "step": 5, + "valid_targets_mean": 5089.9, + "valid_targets_min": 663 + }, + { + "epoch": 0.018674136321195144, + "grad_norm": 11.11322323852151, + "learning_rate": 9.574468085106384e-07, + "loss": 0.7372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2813454270362854, + "step": 10, + "valid_targets_mean": 3333.6, + "valid_targets_min": 317 + }, + { + "epoch": 0.028011204481792718, + "grad_norm": 8.321236406351312, + "learning_rate": 1.4893617021276596e-06, + "loss": 0.7117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3215426206588745, + "step": 15, + "valid_targets_mean": 6668.4, + "valid_targets_min": 2493 + }, + { + "epoch": 0.03734827264239029, + "grad_norm": 6.988660484410487, + "learning_rate": 2.021276595744681e-06, + "loss": 0.7032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3694811463356018, + "step": 20, + "valid_targets_mean": 5102.9, + "valid_targets_min": 2508 + }, + { + "epoch": 0.04668534080298786, + "grad_norm": 4.17271611611374, + "learning_rate": 2.553191489361702e-06, + "loss": 0.6682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3270280361175537, + "step": 25, + "valid_targets_mean": 5968.6, + "valid_targets_min": 1455 + }, + { + "epoch": 0.056022408963585436, + "grad_norm": 3.630147128493012, + "learning_rate": 3.0851063829787237e-06, + "loss": 0.6429, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32875552773475647, + "step": 30, + "valid_targets_mean": 5018.0, + "valid_targets_min": 1154 + }, + { + "epoch": 0.06535947712418301, + "grad_norm": 2.8359174080340503, + "learning_rate": 3.6170212765957453e-06, + "loss": 0.5787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22009414434432983, + "step": 35, + "valid_targets_mean": 4123.0, + "valid_targets_min": 787 + }, + { + "epoch": 0.07469654528478058, + "grad_norm": 1.823640260543609, + "learning_rate": 4.148936170212766e-06, + "loss": 0.5378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25594162940979004, + "step": 40, + "valid_targets_mean": 4500.6, + "valid_targets_min": 526 + }, + { + "epoch": 0.08403361344537816, + "grad_norm": 1.2733430074959, + "learning_rate": 4.680851063829788e-06, + "loss": 0.5296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2038659304380417, + "step": 45, + "valid_targets_mean": 3575.8, + "valid_targets_min": 1010 + }, + { + "epoch": 0.09337068160597572, + "grad_norm": 0.9453407225653655, + "learning_rate": 5.212765957446809e-06, + "loss": 0.4999, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31766122579574585, + "step": 50, + "valid_targets_mean": 6457.0, + "valid_targets_min": 3011 + }, + { + "epoch": 0.10270774976657329, + "grad_norm": 0.8366621587763116, + "learning_rate": 5.744680851063831e-06, + "loss": 0.4893, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2810186743736267, + "step": 55, + "valid_targets_mean": 4946.5, + "valid_targets_min": 1322 + }, + { + "epoch": 0.11204481792717087, + "grad_norm": 0.7162187178442702, + "learning_rate": 6.276595744680851e-06, + "loss": 0.4988, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2409079372882843, + "step": 60, + "valid_targets_mean": 4815.0, + "valid_targets_min": 909 + }, + { + "epoch": 0.12138188608776844, + "grad_norm": 0.648521786931605, + "learning_rate": 6.808510638297873e-06, + "loss": 0.4321, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22791031002998352, + "step": 65, + "valid_targets_mean": 6434.8, + "valid_targets_min": 2350 + }, + { + "epoch": 0.13071895424836602, + "grad_norm": 0.7141512298084438, + "learning_rate": 7.340425531914894e-06, + "loss": 0.4525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21196916699409485, + "step": 70, + "valid_targets_mean": 4612.6, + "valid_targets_min": 376 + }, + { + "epoch": 0.1400560224089636, + "grad_norm": 0.6587583613135741, + "learning_rate": 7.872340425531916e-06, + "loss": 0.4575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3069528341293335, + "step": 75, + "valid_targets_mean": 5798.2, + "valid_targets_min": 1551 + }, + { + "epoch": 0.14939309056956115, + "grad_norm": 0.6119114462929907, + "learning_rate": 8.404255319148937e-06, + "loss": 0.4323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23691843450069427, + "step": 80, + "valid_targets_mean": 5551.4, + "valid_targets_min": 887 + }, + { + "epoch": 0.15873015873015872, + "grad_norm": 0.9060011007479786, + "learning_rate": 8.936170212765958e-06, + "loss": 0.4406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23803825676441193, + "step": 85, + "valid_targets_mean": 2542.2, + "valid_targets_min": 316 + }, + { + "epoch": 0.16806722689075632, + "grad_norm": 0.7007607617786337, + "learning_rate": 9.46808510638298e-06, + "loss": 0.4241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21719379723072052, + "step": 90, + "valid_targets_mean": 3796.9, + "valid_targets_min": 1279 + }, + { + "epoch": 0.17740429505135388, + "grad_norm": 0.6983734348921511, + "learning_rate": 1e-05, + "loss": 0.4368, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2181054949760437, + "step": 95, + "valid_targets_mean": 4321.0, + "valid_targets_min": 2154 + }, + { + "epoch": 0.18674136321195145, + "grad_norm": 0.5696210723009923, + "learning_rate": 1.0531914893617022e-05, + "loss": 0.4162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1508343368768692, + "step": 100, + "valid_targets_mean": 2977.1, + "valid_targets_min": 1543 + }, + { + "epoch": 0.19607843137254902, + "grad_norm": 0.5849433947897745, + "learning_rate": 1.1063829787234044e-05, + "loss": 0.3996, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21970131993293762, + "step": 105, + "valid_targets_mean": 5793.1, + "valid_targets_min": 2541 + }, + { + "epoch": 0.20541549953314658, + "grad_norm": 0.7055397282287685, + "learning_rate": 1.1595744680851065e-05, + "loss": 0.4245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24501527845859528, + "step": 110, + "valid_targets_mean": 3743.1, + "valid_targets_min": 515 + }, + { + "epoch": 0.21475256769374415, + "grad_norm": 0.5525107477637422, + "learning_rate": 1.2127659574468087e-05, + "loss": 0.3862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.197648823261261, + "step": 115, + "valid_targets_mean": 5232.9, + "valid_targets_min": 969 + }, + { + "epoch": 0.22408963585434175, + "grad_norm": 0.6400257020699087, + "learning_rate": 1.2659574468085108e-05, + "loss": 0.3925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21128052473068237, + "step": 120, + "valid_targets_mean": 5471.9, + "valid_targets_min": 1076 + }, + { + "epoch": 0.2334267040149393, + "grad_norm": 0.6280287457331123, + "learning_rate": 1.3191489361702127e-05, + "loss": 0.4009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16920611262321472, + "step": 125, + "valid_targets_mean": 3666.8, + "valid_targets_min": 1789 + }, + { + "epoch": 0.24276377217553688, + "grad_norm": 0.6099744312608174, + "learning_rate": 1.372340425531915e-05, + "loss": 0.3951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23417091369628906, + "step": 130, + "valid_targets_mean": 5374.9, + "valid_targets_min": 999 + }, + { + "epoch": 0.25210084033613445, + "grad_norm": 0.5854477449665976, + "learning_rate": 1.425531914893617e-05, + "loss": 0.3886, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18169349431991577, + "step": 135, + "valid_targets_mean": 4918.1, + "valid_targets_min": 2433 + }, + { + "epoch": 0.26143790849673204, + "grad_norm": 0.7060783126515469, + "learning_rate": 1.4787234042553193e-05, + "loss": 0.3819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21859022974967957, + "step": 140, + "valid_targets_mean": 4736.9, + "valid_targets_min": 1142 + }, + { + "epoch": 0.2707749766573296, + "grad_norm": 0.7074082595929656, + "learning_rate": 1.5319148936170214e-05, + "loss": 0.3984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18193143606185913, + "step": 145, + "valid_targets_mean": 3685.9, + "valid_targets_min": 1961 + }, + { + "epoch": 0.2801120448179272, + "grad_norm": 0.6230784536404037, + "learning_rate": 1.5851063829787235e-05, + "loss": 0.3752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19870468974113464, + "step": 150, + "valid_targets_mean": 4439.5, + "valid_targets_min": 1775 + }, + { + "epoch": 0.28944911297852477, + "grad_norm": 0.6590276916605332, + "learning_rate": 1.6382978723404255e-05, + "loss": 0.3775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14948788285255432, + "step": 155, + "valid_targets_mean": 3391.0, + "valid_targets_min": 953 + }, + { + "epoch": 0.2987861811391223, + "grad_norm": 0.5760014508262422, + "learning_rate": 1.6914893617021276e-05, + "loss": 0.382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1463763415813446, + "step": 160, + "valid_targets_mean": 3959.9, + "valid_targets_min": 206 + }, + { + "epoch": 0.3081232492997199, + "grad_norm": 0.6294388942519551, + "learning_rate": 1.74468085106383e-05, + "loss": 0.3968, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2105579525232315, + "step": 165, + "valid_targets_mean": 4441.1, + "valid_targets_min": 2225 + }, + { + "epoch": 0.31746031746031744, + "grad_norm": 0.51142613925171, + "learning_rate": 1.797872340425532e-05, + "loss": 0.3652, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18431127071380615, + "step": 170, + "valid_targets_mean": 5309.8, + "valid_targets_min": 1971 + }, + { + "epoch": 0.32679738562091504, + "grad_norm": 0.6509063857946139, + "learning_rate": 1.8510638297872342e-05, + "loss": 0.373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.224260151386261, + "step": 175, + "valid_targets_mean": 5864.8, + "valid_targets_min": 2627 + }, + { + "epoch": 0.33613445378151263, + "grad_norm": 0.6016165327149384, + "learning_rate": 1.9042553191489363e-05, + "loss": 0.3595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1491382122039795, + "step": 180, + "valid_targets_mean": 3579.5, + "valid_targets_min": 450 + }, + { + "epoch": 0.34547152194211017, + "grad_norm": 0.6114401758778295, + "learning_rate": 1.9574468085106384e-05, + "loss": 0.3742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23414869606494904, + "step": 185, + "valid_targets_mean": 5284.4, + "valid_targets_min": 1349 + }, + { + "epoch": 0.35480859010270777, + "grad_norm": 0.637747619465701, + "learning_rate": 2.0106382978723404e-05, + "loss": 0.3659, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20197659730911255, + "step": 190, + "valid_targets_mean": 4438.1, + "valid_targets_min": 850 + }, + { + "epoch": 0.3641456582633053, + "grad_norm": 0.6513256401687038, + "learning_rate": 2.063829787234043e-05, + "loss": 0.3746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1644812822341919, + "step": 195, + "valid_targets_mean": 3388.9, + "valid_targets_min": 1400 + }, + { + "epoch": 0.3734827264239029, + "grad_norm": 0.5847483804870854, + "learning_rate": 2.117021276595745e-05, + "loss": 0.3693, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21872743964195251, + "step": 200, + "valid_targets_mean": 5284.2, + "valid_targets_min": 2765 + }, + { + "epoch": 0.3828197945845005, + "grad_norm": 0.6512077358912021, + "learning_rate": 2.1702127659574467e-05, + "loss": 0.3595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2204078733921051, + "step": 205, + "valid_targets_mean": 4239.1, + "valid_targets_min": 1591 + }, + { + "epoch": 0.39215686274509803, + "grad_norm": 0.7348240045105063, + "learning_rate": 2.223404255319149e-05, + "loss": 0.3552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1850978285074234, + "step": 210, + "valid_targets_mean": 4006.6, + "valid_targets_min": 1285 + }, + { + "epoch": 0.40149393090569563, + "grad_norm": 0.5251644817376158, + "learning_rate": 2.2765957446808512e-05, + "loss": 0.3511, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15357327461242676, + "step": 215, + "valid_targets_mean": 5908.8, + "valid_targets_min": 1696 + }, + { + "epoch": 0.41083099906629317, + "grad_norm": 0.8220891622766759, + "learning_rate": 2.3297872340425536e-05, + "loss": 0.3686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2549870014190674, + "step": 220, + "valid_targets_mean": 3844.5, + "valid_targets_min": 1723 + }, + { + "epoch": 0.42016806722689076, + "grad_norm": 0.550716032207352, + "learning_rate": 2.3829787234042553e-05, + "loss": 0.3669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14315801858901978, + "step": 225, + "valid_targets_mean": 3617.4, + "valid_targets_min": 2110 + }, + { + "epoch": 0.4295051353874883, + "grad_norm": 0.6828943977157396, + "learning_rate": 2.4361702127659578e-05, + "loss": 0.3826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22359177470207214, + "step": 230, + "valid_targets_mean": 4374.8, + "valid_targets_min": 887 + }, + { + "epoch": 0.4388422035480859, + "grad_norm": 0.6515159861060119, + "learning_rate": 2.48936170212766e-05, + "loss": 0.3631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1701551377773285, + "step": 235, + "valid_targets_mean": 3890.4, + "valid_targets_min": 327 + }, + { + "epoch": 0.4481792717086835, + "grad_norm": 0.6103680294409951, + "learning_rate": 2.5425531914893616e-05, + "loss": 0.373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18854030966758728, + "step": 240, + "valid_targets_mean": 4670.8, + "valid_targets_min": 1484 + }, + { + "epoch": 0.45751633986928103, + "grad_norm": 0.6507150366013921, + "learning_rate": 2.595744680851064e-05, + "loss": 0.3635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18466567993164062, + "step": 245, + "valid_targets_mean": 4146.4, + "valid_targets_min": 3142 + }, + { + "epoch": 0.4668534080298786, + "grad_norm": 0.5291855469951989, + "learning_rate": 2.6489361702127664e-05, + "loss": 0.3587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15128082036972046, + "step": 250, + "valid_targets_mean": 4475.0, + "valid_targets_min": 953 + }, + { + "epoch": 0.47619047619047616, + "grad_norm": 0.5767850592241007, + "learning_rate": 2.702127659574468e-05, + "loss": 0.3513, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1303057074546814, + "step": 255, + "valid_targets_mean": 3885.5, + "valid_targets_min": 1567 + }, + { + "epoch": 0.48552754435107376, + "grad_norm": 0.6645294394741291, + "learning_rate": 2.7553191489361702e-05, + "loss": 0.3585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18141025304794312, + "step": 260, + "valid_targets_mean": 4179.8, + "valid_targets_min": 2197 + }, + { + "epoch": 0.49486461251167135, + "grad_norm": 0.576552190639025, + "learning_rate": 2.8085106382978727e-05, + "loss": 0.3563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19227007031440735, + "step": 265, + "valid_targets_mean": 5711.4, + "valid_targets_min": 3484 + }, + { + "epoch": 0.5042016806722689, + "grad_norm": 0.6482454136257102, + "learning_rate": 2.8617021276595747e-05, + "loss": 0.363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1801537573337555, + "step": 270, + "valid_targets_mean": 4007.5, + "valid_targets_min": 1524 + }, + { + "epoch": 0.5135387488328664, + "grad_norm": 0.585164039219563, + "learning_rate": 2.9148936170212768e-05, + "loss": 0.3495, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16872641444206238, + "step": 275, + "valid_targets_mean": 4382.8, + "valid_targets_min": 2210 + }, + { + "epoch": 0.5228758169934641, + "grad_norm": 0.5625951539709059, + "learning_rate": 2.968085106382979e-05, + "loss": 0.3558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17654171586036682, + "step": 280, + "valid_targets_mean": 6219.5, + "valid_targets_min": 526 + }, + { + "epoch": 0.5322128851540616, + "grad_norm": 0.7233123799206153, + "learning_rate": 3.0212765957446813e-05, + "loss": 0.3581, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20859920978546143, + "step": 285, + "valid_targets_mean": 4161.1, + "valid_targets_min": 781 + }, + { + "epoch": 0.5415499533146592, + "grad_norm": 0.7449269068986699, + "learning_rate": 3.074468085106383e-05, + "loss": 0.3545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1945279836654663, + "step": 290, + "valid_targets_mean": 3940.0, + "valid_targets_min": 878 + }, + { + "epoch": 0.5508870214752568, + "grad_norm": 0.6483974685516868, + "learning_rate": 3.127659574468085e-05, + "loss": 0.363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1878240555524826, + "step": 295, + "valid_targets_mean": 4856.4, + "valid_targets_min": 2137 + }, + { + "epoch": 0.5602240896358543, + "grad_norm": 1.1497936967816067, + "learning_rate": 3.180851063829788e-05, + "loss": 0.3546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16199401021003723, + "step": 300, + "valid_targets_mean": 2938.2, + "valid_targets_min": 386 + }, + { + "epoch": 0.5695611577964519, + "grad_norm": 0.6886945282365045, + "learning_rate": 3.234042553191489e-05, + "loss": 0.348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1798180341720581, + "step": 305, + "valid_targets_mean": 3616.1, + "valid_targets_min": 442 + }, + { + "epoch": 0.5788982259570495, + "grad_norm": 0.5961025260618755, + "learning_rate": 3.2872340425531914e-05, + "loss": 0.3373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15524005889892578, + "step": 310, + "valid_targets_mean": 4192.2, + "valid_targets_min": 904 + }, + { + "epoch": 0.5882352941176471, + "grad_norm": 0.5915190024659823, + "learning_rate": 3.340425531914894e-05, + "loss": 0.369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18020114302635193, + "step": 315, + "valid_targets_mean": 4968.1, + "valid_targets_min": 1927 + }, + { + "epoch": 0.5975723622782446, + "grad_norm": 0.6528477093898223, + "learning_rate": 3.393617021276596e-05, + "loss": 0.3438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1357184499502182, + "step": 320, + "valid_targets_mean": 3616.0, + "valid_targets_min": 622 + }, + { + "epoch": 0.6069094304388422, + "grad_norm": 0.6175446601948605, + "learning_rate": 3.446808510638298e-05, + "loss": 0.3512, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15919038653373718, + "step": 325, + "valid_targets_mean": 4098.4, + "valid_targets_min": 1171 + }, + { + "epoch": 0.6162464985994398, + "grad_norm": 0.7138242546779738, + "learning_rate": 3.5000000000000004e-05, + "loss": 0.3432, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15437497198581696, + "step": 330, + "valid_targets_mean": 3451.8, + "valid_targets_min": 1492 + }, + { + "epoch": 0.6255835667600373, + "grad_norm": 0.6589103794512452, + "learning_rate": 3.5531914893617025e-05, + "loss": 0.3472, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1586209088563919, + "step": 335, + "valid_targets_mean": 3655.2, + "valid_targets_min": 967 + }, + { + "epoch": 0.6349206349206349, + "grad_norm": 0.6460105504017868, + "learning_rate": 3.6063829787234045e-05, + "loss": 0.3487, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17687325179576874, + "step": 340, + "valid_targets_mean": 4335.0, + "valid_targets_min": 503 + }, + { + "epoch": 0.6442577030812325, + "grad_norm": 4.998670881269213, + "learning_rate": 3.6595744680851066e-05, + "loss": 0.3613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19570030272006989, + "step": 345, + "valid_targets_mean": 4080.4, + "valid_targets_min": 1639 + }, + { + "epoch": 0.6535947712418301, + "grad_norm": 0.7853740975550032, + "learning_rate": 3.712765957446809e-05, + "loss": 0.3567, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18426841497421265, + "step": 350, + "valid_targets_mean": 3329.6, + "valid_targets_min": 1579 + }, + { + "epoch": 0.6629318394024276, + "grad_norm": 0.656399729658791, + "learning_rate": 3.7659574468085114e-05, + "loss": 0.3428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1339481621980667, + "step": 355, + "valid_targets_mean": 3276.8, + "valid_targets_min": 330 + }, + { + "epoch": 0.6722689075630253, + "grad_norm": 0.5892805788602214, + "learning_rate": 3.819148936170213e-05, + "loss": 0.352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19089394807815552, + "step": 360, + "valid_targets_mean": 4998.6, + "valid_targets_min": 2257 + }, + { + "epoch": 0.6816059757236228, + "grad_norm": 0.8095910665541458, + "learning_rate": 3.872340425531915e-05, + "loss": 0.3431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18516522645950317, + "step": 365, + "valid_targets_mean": 4908.6, + "valid_targets_min": 218 + }, + { + "epoch": 0.6909430438842203, + "grad_norm": 0.6750008797300402, + "learning_rate": 3.925531914893618e-05, + "loss": 0.3508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1772959977388382, + "step": 370, + "valid_targets_mean": 5086.2, + "valid_targets_min": 1333 + }, + { + "epoch": 0.7002801120448179, + "grad_norm": 0.5983480993813252, + "learning_rate": 3.978723404255319e-05, + "loss": 0.3444, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1891724169254303, + "step": 375, + "valid_targets_mean": 5382.9, + "valid_targets_min": 1597 + }, + { + "epoch": 0.7096171802054155, + "grad_norm": 0.6744928680570041, + "learning_rate": 3.999992206418221e-05, + "loss": 0.3474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14973819255828857, + "step": 380, + "valid_targets_mean": 3067.2, + "valid_targets_min": 1181 + }, + { + "epoch": 0.7189542483660131, + "grad_norm": 0.6110159029391771, + "learning_rate": 3.999944579193981e-05, + "loss": 0.3282, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16792032122612, + "step": 385, + "valid_targets_mean": 4584.0, + "valid_targets_min": 2116 + }, + { + "epoch": 0.7282913165266106, + "grad_norm": 0.6283680829124466, + "learning_rate": 3.999853655542977e-05, + "loss": 0.3408, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16233767569065094, + "step": 390, + "valid_targets_mean": 4041.0, + "valid_targets_min": 1143 + }, + { + "epoch": 0.7376283846872083, + "grad_norm": 0.6335173724779134, + "learning_rate": 3.999719437433601e-05, + "loss": 0.3295, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1915360987186432, + "step": 395, + "valid_targets_mean": 4767.5, + "valid_targets_min": 1657 + }, + { + "epoch": 0.7469654528478058, + "grad_norm": 0.5871385777499087, + "learning_rate": 3.999541927771513e-05, + "loss": 0.3444, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1510940045118332, + "step": 400, + "valid_targets_mean": 3873.6, + "valid_targets_min": 584 + }, + { + "epoch": 0.7563025210084033, + "grad_norm": 0.7760773108862324, + "learning_rate": 3.999321130399588e-05, + "loss": 0.3488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15511666238307953, + "step": 405, + "valid_targets_mean": 2609.2, + "valid_targets_min": 236 + }, + { + "epoch": 0.765639589169001, + "grad_norm": 0.6425866500790508, + "learning_rate": 3.9990570500978275e-05, + "loss": 0.3394, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1541416496038437, + "step": 410, + "valid_targets_mean": 3852.8, + "valid_targets_min": 1462 + }, + { + "epoch": 0.7749766573295985, + "grad_norm": 0.6121663392711183, + "learning_rate": 3.998749692583255e-05, + "loss": 0.352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13458311557769775, + "step": 415, + "valid_targets_mean": 3311.2, + "valid_targets_min": 187 + }, + { + "epoch": 0.7843137254901961, + "grad_norm": 0.5479196119221734, + "learning_rate": 3.9983990645097977e-05, + "loss": 0.318, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18186694383621216, + "step": 420, + "valid_targets_mean": 4674.0, + "valid_targets_min": 1836 + }, + { + "epoch": 0.7936507936507936, + "grad_norm": 0.518462153073093, + "learning_rate": 3.9980051734681365e-05, + "loss": 0.3518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19562377035617828, + "step": 425, + "valid_targets_mean": 6218.0, + "valid_targets_min": 1779 + }, + { + "epoch": 0.8029878618113913, + "grad_norm": 0.5559481349890043, + "learning_rate": 3.9975680279855453e-05, + "loss": 0.3238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16600401699543, + "step": 430, + "valid_targets_mean": 4732.9, + "valid_targets_min": 1038 + }, + { + "epoch": 0.8123249299719888, + "grad_norm": 0.49278114392118105, + "learning_rate": 3.9970876375257044e-05, + "loss": 0.3224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15543261170387268, + "step": 435, + "valid_targets_mean": 5254.9, + "valid_targets_min": 1594 + }, + { + "epoch": 0.8216619981325863, + "grad_norm": 0.5541052468927554, + "learning_rate": 3.996564012488497e-05, + "loss": 0.3276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13396167755126953, + "step": 440, + "valid_targets_mean": 3985.5, + "valid_targets_min": 1569 + }, + { + "epoch": 0.830999066293184, + "grad_norm": 0.6050821837777287, + "learning_rate": 3.995997164209785e-05, + "loss": 0.326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12180531024932861, + "step": 445, + "valid_targets_mean": 3857.2, + "valid_targets_min": 748 + }, + { + "epoch": 0.8403361344537815, + "grad_norm": 0.5945124754792479, + "learning_rate": 3.99538710496116e-05, + "loss": 0.3398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13722115755081177, + "step": 450, + "valid_targets_mean": 3721.5, + "valid_targets_min": 971 + }, + { + "epoch": 0.8496732026143791, + "grad_norm": 0.6769887369944673, + "learning_rate": 3.9947338479496813e-05, + "loss": 0.3499, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15946775674819946, + "step": 455, + "valid_targets_mean": 3577.1, + "valid_targets_min": 1511 + }, + { + "epoch": 0.8590102707749766, + "grad_norm": 0.5483458814949701, + "learning_rate": 3.994037407317588e-05, + "loss": 0.3332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17706672847270966, + "step": 460, + "valid_targets_mean": 4671.5, + "valid_targets_min": 2358 + }, + { + "epoch": 0.8683473389355743, + "grad_norm": 0.6244929552648373, + "learning_rate": 3.993297798141992e-05, + "loss": 0.3331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19442218542099, + "step": 465, + "valid_targets_mean": 4256.0, + "valid_targets_min": 389 + }, + { + "epoch": 0.8776844070961718, + "grad_norm": 0.49146401420174923, + "learning_rate": 3.9925150364345556e-05, + "loss": 0.3253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1474149525165558, + "step": 470, + "valid_targets_mean": 5179.0, + "valid_targets_min": 1565 + }, + { + "epoch": 0.8870214752567693, + "grad_norm": 0.7554124502036874, + "learning_rate": 3.991689139141141e-05, + "loss": 0.3358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15803131461143494, + "step": 475, + "valid_targets_mean": 2531.4, + "valid_targets_min": 1016 + }, + { + "epoch": 0.896358543417367, + "grad_norm": 0.5935578295813002, + "learning_rate": 3.9908201241414434e-05, + "loss": 0.339, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1344335675239563, + "step": 480, + "valid_targets_mean": 3166.8, + "valid_targets_min": 1656 + }, + { + "epoch": 0.9056956115779645, + "grad_norm": 0.47605599900151113, + "learning_rate": 3.9899080102486065e-05, + "loss": 0.3264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17132365703582764, + "step": 485, + "valid_targets_mean": 6423.0, + "valid_targets_min": 190 + }, + { + "epoch": 0.9150326797385621, + "grad_norm": 0.6423530200860137, + "learning_rate": 3.988952817208815e-05, + "loss": 0.3261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14093393087387085, + "step": 490, + "valid_targets_mean": 3899.1, + "valid_targets_min": 194 + }, + { + "epoch": 0.9243697478991597, + "grad_norm": 0.46612782863347124, + "learning_rate": 3.987954565700863e-05, + "loss": 0.3154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16507190465927124, + "step": 495, + "valid_targets_mean": 7776.9, + "valid_targets_min": 3545 + }, + { + "epoch": 0.9337068160597572, + "grad_norm": 0.507060083003377, + "learning_rate": 3.9869132773357115e-05, + "loss": 0.3243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12540395557880402, + "step": 500, + "valid_targets_mean": 5227.1, + "valid_targets_min": 594 + }, + { + "epoch": 0.9430438842203548, + "grad_norm": 0.5231925986561323, + "learning_rate": 3.9858289746560183e-05, + "loss": 0.311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20082661509513855, + "step": 505, + "valid_targets_mean": 7246.0, + "valid_targets_min": 2455 + }, + { + "epoch": 0.9523809523809523, + "grad_norm": 0.6047109159969782, + "learning_rate": 3.98470168113565e-05, + "loss": 0.3171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18611443042755127, + "step": 510, + "valid_targets_mean": 4768.4, + "valid_targets_min": 927 + }, + { + "epoch": 0.96171802054155, + "grad_norm": 0.629239462779698, + "learning_rate": 3.983531421179172e-05, + "loss": 0.3247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17209777235984802, + "step": 515, + "valid_targets_mean": 4488.6, + "valid_targets_min": 619 + }, + { + "epoch": 0.9710550887021475, + "grad_norm": 0.5603032847732651, + "learning_rate": 3.9823182201213245e-05, + "loss": 0.3255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1148725152015686, + "step": 520, + "valid_targets_mean": 3318.5, + "valid_targets_min": 779 + }, + { + "epoch": 0.9803921568627451, + "grad_norm": 0.7676313830057679, + "learning_rate": 3.981062104226471e-05, + "loss": 0.3255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2031964212656021, + "step": 525, + "valid_targets_mean": 5854.0, + "valid_targets_min": 1893 + }, + { + "epoch": 0.9897292250233427, + "grad_norm": 0.5321973024163378, + "learning_rate": 3.97976310068803e-05, + "loss": 0.3237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13188382983207703, + "step": 530, + "valid_targets_mean": 3657.0, + "valid_targets_min": 1114 + }, + { + "epoch": 0.9990662931839402, + "grad_norm": 0.5995369203277318, + "learning_rate": 3.978421237627886e-05, + "loss": 0.3196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1760430932044983, + "step": 535, + "valid_targets_mean": 5740.8, + "valid_targets_min": 1380 + }, + { + "epoch": 1.007469654528478, + "grad_norm": 0.5865791565070957, + "learning_rate": 3.977036544095781e-05, + "loss": 0.3151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15728887915611267, + "step": 540, + "valid_targets_mean": 5136.5, + "valid_targets_min": 1331 + }, + { + "epoch": 1.0168067226890756, + "grad_norm": 0.7641213510410019, + "learning_rate": 3.975609050068689e-05, + "loss": 0.3092, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1726839542388916, + "step": 545, + "valid_targets_mean": 3108.1, + "valid_targets_min": 1079 + }, + { + "epoch": 1.026143790849673, + "grad_norm": 0.6671774455500653, + "learning_rate": 3.97413878645016e-05, + "loss": 0.3233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1539943516254425, + "step": 550, + "valid_targets_mean": 2958.0, + "valid_targets_min": 402 + }, + { + "epoch": 1.0354808590102709, + "grad_norm": 0.5087122776411305, + "learning_rate": 3.972625785069657e-05, + "loss": 0.2951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13054482638835907, + "step": 555, + "valid_targets_mean": 5293.9, + "valid_targets_min": 1890 + }, + { + "epoch": 1.0448179271708684, + "grad_norm": 0.690876644498558, + "learning_rate": 3.971070078681864e-05, + "loss": 0.3204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13715389370918274, + "step": 560, + "valid_targets_mean": 3878.5, + "valid_targets_min": 734 + }, + { + "epoch": 1.054154995331466, + "grad_norm": 0.5582822282560463, + "learning_rate": 3.969471700965979e-05, + "loss": 0.3241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16278928518295288, + "step": 565, + "valid_targets_mean": 5743.2, + "valid_targets_min": 996 + }, + { + "epoch": 1.0634920634920635, + "grad_norm": 0.6076559079128722, + "learning_rate": 3.967830686524982e-05, + "loss": 0.3054, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14494648575782776, + "step": 570, + "valid_targets_mean": 3438.0, + "valid_targets_min": 440 + }, + { + "epoch": 1.072829131652661, + "grad_norm": 0.6947036895238121, + "learning_rate": 3.966147070884888e-05, + "loss": 0.3236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17833882570266724, + "step": 575, + "valid_targets_mean": 3452.2, + "valid_targets_min": 1500 + }, + { + "epoch": 1.0821661998132586, + "grad_norm": 0.5892631460086643, + "learning_rate": 3.964420890493978e-05, + "loss": 0.3148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1592351198196411, + "step": 580, + "valid_targets_mean": 4542.5, + "valid_targets_min": 2688 + }, + { + "epoch": 1.091503267973856, + "grad_norm": 0.6337765522589037, + "learning_rate": 3.9626521827220096e-05, + "loss": 0.3032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17922143638134003, + "step": 585, + "valid_targets_mean": 4718.1, + "valid_targets_min": 349 + }, + { + "epoch": 1.1008403361344539, + "grad_norm": 0.6389666995465977, + "learning_rate": 3.960840985859406e-05, + "loss": 0.3067, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14055702090263367, + "step": 590, + "valid_targets_mean": 3730.0, + "valid_targets_min": 331 + }, + { + "epoch": 1.1101774042950514, + "grad_norm": 0.5847434963565058, + "learning_rate": 3.95898733911643e-05, + "loss": 0.3064, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0963490828871727, + "step": 595, + "valid_targets_mean": 2450.0, + "valid_targets_min": 1276 + }, + { + "epoch": 1.119514472455649, + "grad_norm": 0.664534506283319, + "learning_rate": 3.957091282622335e-05, + "loss": 0.3202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1829378604888916, + "step": 600, + "valid_targets_mean": 4110.1, + "valid_targets_min": 1535 + }, + { + "epoch": 1.1288515406162465, + "grad_norm": 0.5917050787588995, + "learning_rate": 3.955152857424493e-05, + "loss": 0.3121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17418399453163147, + "step": 605, + "valid_targets_mean": 5098.4, + "valid_targets_min": 2203 + }, + { + "epoch": 1.138188608776844, + "grad_norm": 0.59558257404901, + "learning_rate": 3.953172105487509e-05, + "loss": 0.3013, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1286385953426361, + "step": 610, + "valid_targets_mean": 4437.5, + "valid_targets_min": 2644 + }, + { + "epoch": 1.1475256769374416, + "grad_norm": 0.6361164679069865, + "learning_rate": 3.951149069692312e-05, + "loss": 0.3059, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14291919767856598, + "step": 615, + "valid_targets_mean": 3402.6, + "valid_targets_min": 310 + }, + { + "epoch": 1.156862745098039, + "grad_norm": 0.5543772787549324, + "learning_rate": 3.9490837938352267e-05, + "loss": 0.3082, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17909739911556244, + "step": 620, + "valid_targets_mean": 6086.2, + "valid_targets_min": 1327 + }, + { + "epoch": 1.1661998132586369, + "grad_norm": 0.597059664221142, + "learning_rate": 3.946976322627024e-05, + "loss": 0.296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17846858501434326, + "step": 625, + "valid_targets_mean": 4772.0, + "valid_targets_min": 928 + }, + { + "epoch": 1.1755368814192344, + "grad_norm": 0.5674707281396597, + "learning_rate": 3.944826701691955e-05, + "loss": 0.293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1758207380771637, + "step": 630, + "valid_targets_mean": 5279.2, + "valid_targets_min": 847 + }, + { + "epoch": 1.184873949579832, + "grad_norm": 0.5503414435946239, + "learning_rate": 3.942634977566761e-05, + "loss": 0.326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15737375617027283, + "step": 635, + "valid_targets_mean": 4255.0, + "valid_targets_min": 316 + }, + { + "epoch": 1.1942110177404295, + "grad_norm": 0.5642650384976748, + "learning_rate": 3.9404011976996696e-05, + "loss": 0.3029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16225208342075348, + "step": 640, + "valid_targets_mean": 5024.2, + "valid_targets_min": 398 + }, + { + "epoch": 1.203548085901027, + "grad_norm": 0.5406624693674554, + "learning_rate": 3.938125410449364e-05, + "loss": 0.3045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14381194114685059, + "step": 645, + "valid_targets_mean": 4134.6, + "valid_targets_min": 1523 + }, + { + "epoch": 1.2128851540616246, + "grad_norm": 0.6191381712195129, + "learning_rate": 3.935807665083936e-05, + "loss": 0.2981, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13662654161453247, + "step": 650, + "valid_targets_mean": 3686.8, + "valid_targets_min": 466 + }, + { + "epoch": 1.2222222222222223, + "grad_norm": 0.7087811476219604, + "learning_rate": 3.933448011779824e-05, + "loss": 0.3128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.160598024725914, + "step": 655, + "valid_targets_mean": 3279.9, + "valid_targets_min": 610 + }, + { + "epoch": 1.2315592903828199, + "grad_norm": 0.7259099470307365, + "learning_rate": 3.93104650162072e-05, + "loss": 0.3157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15276646614074707, + "step": 660, + "valid_targets_mean": 3185.5, + "valid_targets_min": 330 + }, + { + "epoch": 1.2408963585434174, + "grad_norm": 0.655423480082449, + "learning_rate": 3.928603186596471e-05, + "loss": 0.2911, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0953553095459938, + "step": 665, + "valid_targets_mean": 2033.9, + "valid_targets_min": 274 + }, + { + "epoch": 1.250233426704015, + "grad_norm": 0.5798990441649253, + "learning_rate": 3.926118119601946e-05, + "loss": 0.3046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18092554807662964, + "step": 670, + "valid_targets_mean": 5095.8, + "valid_targets_min": 754 + }, + { + "epoch": 1.2595704948646125, + "grad_norm": 0.59385056418379, + "learning_rate": 3.923591354435897e-05, + "loss": 0.3148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1915781944990158, + "step": 675, + "valid_targets_mean": 4503.2, + "valid_targets_min": 1491 + }, + { + "epoch": 1.26890756302521, + "grad_norm": 0.5651217254660733, + "learning_rate": 3.9210229457997916e-05, + "loss": 0.2939, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1345812976360321, + "step": 680, + "valid_targets_mean": 4752.4, + "valid_targets_min": 1142 + }, + { + "epoch": 1.2782446311858076, + "grad_norm": 0.5401890060850301, + "learning_rate": 3.9184129492966286e-05, + "loss": 0.3124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1761828362941742, + "step": 685, + "valid_targets_mean": 5272.6, + "valid_targets_min": 2870 + }, + { + "epoch": 1.287581699346405, + "grad_norm": 0.601428713991424, + "learning_rate": 3.915761421429735e-05, + "loss": 0.3093, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11842285096645355, + "step": 690, + "valid_targets_mean": 3402.2, + "valid_targets_min": 950 + }, + { + "epoch": 1.2969187675070029, + "grad_norm": 0.791912568157745, + "learning_rate": 3.913068419601542e-05, + "loss": 0.291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18513384461402893, + "step": 695, + "valid_targets_mean": 3705.0, + "valid_targets_min": 674 + }, + { + "epoch": 1.3062558356676004, + "grad_norm": 0.5452444269817829, + "learning_rate": 3.910334002112344e-05, + "loss": 0.3088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18488115072250366, + "step": 700, + "valid_targets_mean": 6944.1, + "valid_targets_min": 428 + }, + { + "epoch": 1.315592903828198, + "grad_norm": 0.5169395849124725, + "learning_rate": 3.907558228159034e-05, + "loss": 0.302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14646673202514648, + "step": 705, + "valid_targets_mean": 5416.5, + "valid_targets_min": 3563 + }, + { + "epoch": 1.3249299719887955, + "grad_norm": 0.5001226138599072, + "learning_rate": 3.904741157833825e-05, + "loss": 0.2939, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16307666897773743, + "step": 710, + "valid_targets_mean": 6462.2, + "valid_targets_min": 2573 + }, + { + "epoch": 1.334267040149393, + "grad_norm": 0.6088537184133012, + "learning_rate": 3.901882852122945e-05, + "loss": 0.3016, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15147876739501953, + "step": 715, + "valid_targets_mean": 3761.0, + "valid_targets_min": 386 + }, + { + "epoch": 1.3436041083099908, + "grad_norm": 0.568990669324948, + "learning_rate": 3.8989833729053226e-05, + "loss": 0.3037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13363492488861084, + "step": 720, + "valid_targets_mean": 4173.4, + "valid_targets_min": 2095 + }, + { + "epoch": 1.3529411764705883, + "grad_norm": 0.5053732611165703, + "learning_rate": 3.89604278295124e-05, + "loss": 0.304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12359647452831268, + "step": 725, + "valid_targets_mean": 4645.5, + "valid_targets_min": 1708 + }, + { + "epoch": 1.3622782446311859, + "grad_norm": 0.6064325241450877, + "learning_rate": 3.893061145920981e-05, + "loss": 0.3063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16584424674510956, + "step": 730, + "valid_targets_mean": 4379.5, + "valid_targets_min": 1399 + }, + { + "epoch": 1.3716153127917834, + "grad_norm": 0.5277563157529683, + "learning_rate": 3.890038526363449e-05, + "loss": 0.3019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17066998779773712, + "step": 735, + "valid_targets_mean": 5900.5, + "valid_targets_min": 3091 + }, + { + "epoch": 1.380952380952381, + "grad_norm": 0.5571788264230575, + "learning_rate": 3.88697498971477e-05, + "loss": 0.2984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10176724940538406, + "step": 740, + "valid_targets_mean": 4091.9, + "valid_targets_min": 1075 + }, + { + "epoch": 1.3902894491129785, + "grad_norm": 0.6894149551431228, + "learning_rate": 3.883870602296878e-05, + "loss": 0.3104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16245920956134796, + "step": 745, + "valid_targets_mean": 3946.9, + "valid_targets_min": 1964 + }, + { + "epoch": 1.399626517273576, + "grad_norm": 0.5475694010078543, + "learning_rate": 3.880725431316074e-05, + "loss": 0.304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15411341190338135, + "step": 750, + "valid_targets_mean": 4729.2, + "valid_targets_min": 487 + }, + { + "epoch": 1.4089635854341735, + "grad_norm": 0.57079178345435, + "learning_rate": 3.87753954486158e-05, + "loss": 0.3093, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18300111591815948, + "step": 755, + "valid_targets_mean": 6063.2, + "valid_targets_min": 1700 + }, + { + "epoch": 1.4183006535947713, + "grad_norm": 0.6058927970020223, + "learning_rate": 3.874313011904056e-05, + "loss": 0.316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14143604040145874, + "step": 760, + "valid_targets_mean": 3916.8, + "valid_targets_min": 331 + }, + { + "epoch": 1.4276377217553688, + "grad_norm": 0.5646986856232502, + "learning_rate": 3.871045902294112e-05, + "loss": 0.3049, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17111051082611084, + "step": 765, + "valid_targets_mean": 5721.9, + "valid_targets_min": 3513 + }, + { + "epoch": 1.4369747899159664, + "grad_norm": 0.5675094535096663, + "learning_rate": 3.867738286760793e-05, + "loss": 0.299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1547420471906662, + "step": 770, + "valid_targets_mean": 3879.9, + "valid_targets_min": 1163 + }, + { + "epoch": 1.446311858076564, + "grad_norm": 0.5677212789545684, + "learning_rate": 3.86439023691005e-05, + "loss": 0.3068, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15864408016204834, + "step": 775, + "valid_targets_mean": 4737.0, + "valid_targets_min": 3112 + }, + { + "epoch": 1.4556489262371615, + "grad_norm": 0.5691649864991416, + "learning_rate": 3.86100182522319e-05, + "loss": 0.2995, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15959599614143372, + "step": 780, + "valid_targets_mean": 5598.6, + "valid_targets_min": 1784 + }, + { + "epoch": 1.4649859943977592, + "grad_norm": 0.5987773462307701, + "learning_rate": 3.857573125055304e-05, + "loss": 0.3149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18664024770259857, + "step": 785, + "valid_targets_mean": 5270.9, + "valid_targets_min": 3048 + }, + { + "epoch": 1.4743230625583568, + "grad_norm": 0.5784865229355174, + "learning_rate": 3.8541042106336825e-05, + "loss": 0.3037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17758849263191223, + "step": 790, + "valid_targets_mean": 4656.0, + "valid_targets_min": 175 + }, + { + "epoch": 1.4836601307189543, + "grad_norm": 0.5783227906517225, + "learning_rate": 3.850595157056206e-05, + "loss": 0.305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1852608025074005, + "step": 795, + "valid_targets_mean": 5393.2, + "valid_targets_min": 1571 + }, + { + "epoch": 1.4929971988795518, + "grad_norm": 0.47411624840461025, + "learning_rate": 3.84704604028972e-05, + "loss": 0.3176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1625971794128418, + "step": 800, + "valid_targets_mean": 6435.0, + "valid_targets_min": 3472 + }, + { + "epoch": 1.5023342670401494, + "grad_norm": 0.46538112620395583, + "learning_rate": 3.84345693716839e-05, + "loss": 0.2959, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1026754230260849, + "step": 805, + "valid_targets_mean": 4727.1, + "valid_targets_min": 2085 + }, + { + "epoch": 1.511671335200747, + "grad_norm": 0.6105889514258763, + "learning_rate": 3.8398279253920403e-05, + "loss": 0.2876, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17879056930541992, + "step": 810, + "valid_targets_mean": 4379.4, + "valid_targets_min": 1332 + }, + { + "epoch": 1.5210084033613445, + "grad_norm": 0.535193993163554, + "learning_rate": 3.8361590835244696e-05, + "loss": 0.3051, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13922949135303497, + "step": 815, + "valid_targets_mean": 5620.6, + "valid_targets_min": 187 + }, + { + "epoch": 1.530345471521942, + "grad_norm": 0.5807580671483096, + "learning_rate": 3.8324504909917486e-05, + "loss": 0.295, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10466037690639496, + "step": 820, + "valid_targets_mean": 3171.5, + "valid_targets_min": 2084 + }, + { + "epoch": 1.5396825396825395, + "grad_norm": 0.6029471720243565, + "learning_rate": 3.8287022280805064e-05, + "loss": 0.3024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15232330560684204, + "step": 825, + "valid_targets_mean": 3812.1, + "valid_targets_min": 1719 + }, + { + "epoch": 1.5490196078431373, + "grad_norm": 0.523544297577617, + "learning_rate": 3.824914375936186e-05, + "loss": 0.3038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11569304764270782, + "step": 830, + "valid_targets_mean": 4110.5, + "valid_targets_min": 2015 + }, + { + "epoch": 1.5583566760037348, + "grad_norm": 0.65412578836097, + "learning_rate": 3.8210870165612913e-05, + "loss": 0.3122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16320863366127014, + "step": 835, + "valid_targets_mean": 5240.4, + "valid_targets_min": 1789 + }, + { + "epoch": 1.5676937441643324, + "grad_norm": 0.5614267126637476, + "learning_rate": 3.817220232813611e-05, + "loss": 0.3082, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15379010140895844, + "step": 840, + "valid_targets_mean": 5079.5, + "valid_targets_min": 425 + }, + { + "epoch": 1.57703081232493, + "grad_norm": 0.5275850122552819, + "learning_rate": 3.8133141084044243e-05, + "loss": 0.3005, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14050328731536865, + "step": 845, + "valid_targets_mean": 5571.9, + "valid_targets_min": 3874 + }, + { + "epoch": 1.5863678804855277, + "grad_norm": 0.761641942127212, + "learning_rate": 3.809368727896689e-05, + "loss": 0.3133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1947091966867447, + "step": 850, + "valid_targets_mean": 4126.8, + "valid_targets_min": 1483 + }, + { + "epoch": 1.5957049486461252, + "grad_norm": 0.6386701388185174, + "learning_rate": 3.805384176703211e-05, + "loss": 0.3063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14768196642398834, + "step": 855, + "valid_targets_mean": 3064.4, + "valid_targets_min": 510 + }, + { + "epoch": 1.6050420168067228, + "grad_norm": 0.683190907308287, + "learning_rate": 3.8013605410847956e-05, + "loss": 0.3122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.141540989279747, + "step": 860, + "valid_targets_mean": 3470.9, + "valid_targets_min": 1289 + }, + { + "epoch": 1.6143790849673203, + "grad_norm": 0.5587827176003827, + "learning_rate": 3.797297908148379e-05, + "loss": 0.3102, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1652992218732834, + "step": 865, + "valid_targets_mean": 5109.4, + "valid_targets_min": 2473 + }, + { + "epoch": 1.6237161531279178, + "grad_norm": 0.6400687686285255, + "learning_rate": 3.793196365845142e-05, + "loss": 0.3066, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15512387454509735, + "step": 870, + "valid_targets_mean": 3570.6, + "valid_targets_min": 1571 + }, + { + "epoch": 1.6330532212885154, + "grad_norm": 0.6083537423666949, + "learning_rate": 3.789056002968609e-05, + "loss": 0.3108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17498058080673218, + "step": 875, + "valid_targets_mean": 4867.8, + "valid_targets_min": 1853 + }, + { + "epoch": 1.642390289449113, + "grad_norm": 0.656156111050617, + "learning_rate": 3.784876909152723e-05, + "loss": 0.2924, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11363570392131805, + "step": 880, + "valid_targets_mean": 3142.8, + "valid_targets_min": 265 + }, + { + "epoch": 1.6517273576097105, + "grad_norm": 0.6155764144454728, + "learning_rate": 3.7806591748699056e-05, + "loss": 0.2975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16831986606121063, + "step": 885, + "valid_targets_mean": 3848.1, + "valid_targets_min": 2100 + }, + { + "epoch": 1.661064425770308, + "grad_norm": 0.5176225191548176, + "learning_rate": 3.776402891429098e-05, + "loss": 0.3014, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13697239756584167, + "step": 890, + "valid_targets_mean": 4229.2, + "valid_targets_min": 1538 + }, + { + "epoch": 1.6704014939309055, + "grad_norm": 0.5718207781290344, + "learning_rate": 3.772108150973786e-05, + "loss": 0.2918, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11886520683765411, + "step": 895, + "valid_targets_mean": 3282.4, + "valid_targets_min": 541 + }, + { + "epoch": 1.6797385620915033, + "grad_norm": 0.5718605047410331, + "learning_rate": 3.767775046480004e-05, + "loss": 0.3023, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12789753079414368, + "step": 900, + "valid_targets_mean": 3396.5, + "valid_targets_min": 975 + }, + { + "epoch": 1.6890756302521008, + "grad_norm": 0.6416071956632122, + "learning_rate": 3.7634036717543224e-05, + "loss": 0.2986, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2047657072544098, + "step": 905, + "valid_targets_mean": 4679.4, + "valid_targets_min": 2236 + }, + { + "epoch": 1.6984126984126984, + "grad_norm": 0.5820126927656643, + "learning_rate": 3.7589941214318156e-05, + "loss": 0.3012, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11794840544462204, + "step": 910, + "valid_targets_mean": 3893.4, + "valid_targets_min": 554 + }, + { + "epoch": 1.7077497665732961, + "grad_norm": 0.5690570443337464, + "learning_rate": 3.754546490974016e-05, + "loss": 0.3084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12699103355407715, + "step": 915, + "valid_targets_mean": 3685.1, + "valid_targets_min": 2242 + }, + { + "epoch": 1.7170868347338937, + "grad_norm": 0.5442979773312249, + "learning_rate": 3.750060876666844e-05, + "loss": 0.2941, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14266455173492432, + "step": 920, + "valid_targets_mean": 4402.6, + "valid_targets_min": 701 + }, + { + "epoch": 1.7264239028944912, + "grad_norm": 0.5278630485861002, + "learning_rate": 3.7455373756185276e-05, + "loss": 0.3074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13597539067268372, + "step": 925, + "valid_targets_mean": 5509.1, + "valid_targets_min": 619 + }, + { + "epoch": 1.7357609710550888, + "grad_norm": 0.6992136820465524, + "learning_rate": 3.740976085757495e-05, + "loss": 0.3028, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15323558449745178, + "step": 930, + "valid_targets_mean": 3350.9, + "valid_targets_min": 1438 + }, + { + "epoch": 1.7450980392156863, + "grad_norm": 0.6768175495857408, + "learning_rate": 3.736377105830259e-05, + "loss": 0.3012, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22255977988243103, + "step": 935, + "valid_targets_mean": 4122.4, + "valid_targets_min": 1885 + }, + { + "epoch": 1.7544351073762838, + "grad_norm": 0.6550517371985238, + "learning_rate": 3.731740535399278e-05, + "loss": 0.3027, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15840685367584229, + "step": 940, + "valid_targets_mean": 4409.4, + "valid_targets_min": 409 + }, + { + "epoch": 1.7637721755368814, + "grad_norm": 0.5017901108338207, + "learning_rate": 3.7270664748407985e-05, + "loss": 0.3157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12858417630195618, + "step": 945, + "valid_targets_mean": 5373.9, + "valid_targets_min": 1336 + }, + { + "epoch": 1.773109243697479, + "grad_norm": 0.5302623616389018, + "learning_rate": 3.722355025342686e-05, + "loss": 0.2999, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14893680810928345, + "step": 950, + "valid_targets_mean": 5269.2, + "valid_targets_min": 2090 + }, + { + "epoch": 1.7824463118580764, + "grad_norm": 0.4891977040564889, + "learning_rate": 3.7176062889022296e-05, + "loss": 0.2763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1149478331208229, + "step": 955, + "valid_targets_mean": 4636.0, + "valid_targets_min": 1013 + }, + { + "epoch": 1.791783380018674, + "grad_norm": 0.5599427856410248, + "learning_rate": 3.7128203683239384e-05, + "loss": 0.2942, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14062990248203278, + "step": 960, + "valid_targets_mean": 5140.0, + "valid_targets_min": 2050 + }, + { + "epoch": 1.8011204481792717, + "grad_norm": 0.6463153886171898, + "learning_rate": 3.7079973672173136e-05, + "loss": 0.3017, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16412320733070374, + "step": 965, + "valid_targets_mean": 4496.9, + "valid_targets_min": 826 + }, + { + "epoch": 1.8104575163398693, + "grad_norm": 0.5817630148688949, + "learning_rate": 3.703137389994606e-05, + "loss": 0.3058, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15573111176490784, + "step": 970, + "valid_targets_mean": 4136.2, + "valid_targets_min": 1815 + }, + { + "epoch": 1.8197945845004668, + "grad_norm": 0.5783661377289513, + "learning_rate": 3.698240541868554e-05, + "loss": 0.2837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14567843079566956, + "step": 975, + "valid_targets_mean": 3623.8, + "valid_targets_min": 1460 + }, + { + "epoch": 1.8291316526610646, + "grad_norm": 0.5975564460692906, + "learning_rate": 3.693306928850109e-05, + "loss": 0.3016, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13825063407421112, + "step": 980, + "valid_targets_mean": 4064.9, + "valid_targets_min": 857 + }, + { + "epoch": 1.8384687208216621, + "grad_norm": 0.5812430384637023, + "learning_rate": 3.688336657746137e-05, + "loss": 0.2978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1373342126607895, + "step": 985, + "valid_targets_mean": 3941.2, + "valid_targets_min": 1651 + }, + { + "epoch": 1.8478057889822597, + "grad_norm": 0.5063698286114703, + "learning_rate": 3.683329836157111e-05, + "loss": 0.2855, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1268635392189026, + "step": 990, + "valid_targets_mean": 5044.2, + "valid_targets_min": 1431 + }, + { + "epoch": 1.8571428571428572, + "grad_norm": 0.7524637922798835, + "learning_rate": 3.678286572474776e-05, + "loss": 0.302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13883943855762482, + "step": 995, + "valid_targets_mean": 4169.5, + "valid_targets_min": 2313 + }, + { + "epoch": 1.8664799253034547, + "grad_norm": 0.5290879100710519, + "learning_rate": 3.673206975879804e-05, + "loss": 0.2918, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16375523805618286, + "step": 1000, + "valid_targets_mean": 4760.6, + "valid_targets_min": 1889 + }, + { + "epoch": 1.8758169934640523, + "grad_norm": 0.5761483499622887, + "learning_rate": 3.668091156339435e-05, + "loss": 0.2981, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1597619652748108, + "step": 1005, + "valid_targets_mean": 4530.8, + "valid_targets_min": 1089 + }, + { + "epoch": 1.8851540616246498, + "grad_norm": 0.5376364386916198, + "learning_rate": 3.662939224605091e-05, + "loss": 0.3012, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17713791131973267, + "step": 1010, + "valid_targets_mean": 5880.8, + "valid_targets_min": 1026 + }, + { + "epoch": 1.8944911297852474, + "grad_norm": 0.5481477843587319, + "learning_rate": 3.657751292209979e-05, + "loss": 0.3099, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14710435271263123, + "step": 1015, + "valid_targets_mean": 4793.8, + "valid_targets_min": 1751 + }, + { + "epoch": 1.903828197945845, + "grad_norm": 0.4368760617403369, + "learning_rate": 3.65252747146668e-05, + "loss": 0.291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14206725358963013, + "step": 1020, + "valid_targets_mean": 6522.5, + "valid_targets_min": 2600 + }, + { + "epoch": 1.9131652661064424, + "grad_norm": 0.5098812390427527, + "learning_rate": 3.647267875464714e-05, + "loss": 0.307, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13344484567642212, + "step": 1025, + "valid_targets_mean": 4389.5, + "valid_targets_min": 1079 + }, + { + "epoch": 1.9225023342670402, + "grad_norm": 0.6083450739551823, + "learning_rate": 3.6419726180680914e-05, + "loss": 0.2948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15235048532485962, + "step": 1030, + "valid_targets_mean": 3545.0, + "valid_targets_min": 1303 + }, + { + "epoch": 1.9318394024276377, + "grad_norm": 0.5276048740170828, + "learning_rate": 3.636641813912851e-05, + "loss": 0.3001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15757082402706146, + "step": 1035, + "valid_targets_mean": 4662.6, + "valid_targets_min": 673 + }, + { + "epoch": 1.9411764705882353, + "grad_norm": 0.5849435396685478, + "learning_rate": 3.631275578404578e-05, + "loss": 0.3052, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1456948220729828, + "step": 1040, + "valid_targets_mean": 4085.0, + "valid_targets_min": 1516 + }, + { + "epoch": 1.9505135387488328, + "grad_norm": 0.5390598468936411, + "learning_rate": 3.6258740277159014e-05, + "loss": 0.2933, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16691184043884277, + "step": 1045, + "valid_targets_mean": 5488.8, + "valid_targets_min": 1775 + }, + { + "epoch": 1.9598506069094306, + "grad_norm": 0.55310729200931, + "learning_rate": 3.6204372787839844e-05, + "loss": 0.3041, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15889444947242737, + "step": 1050, + "valid_targets_mean": 4039.4, + "valid_targets_min": 2113 + }, + { + "epoch": 1.9691876750700281, + "grad_norm": 0.551754718371844, + "learning_rate": 3.614965449307989e-05, + "loss": 0.304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1810804009437561, + "step": 1055, + "valid_targets_mean": 4952.8, + "valid_targets_min": 626 + }, + { + "epoch": 1.9785247432306257, + "grad_norm": 0.517058993878925, + "learning_rate": 3.609458657746531e-05, + "loss": 0.2746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12134496867656708, + "step": 1060, + "valid_targets_mean": 4181.0, + "valid_targets_min": 1120 + }, + { + "epoch": 1.9878618113912232, + "grad_norm": 0.5673242453917726, + "learning_rate": 3.603917023315111e-05, + "loss": 0.2922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13162624835968018, + "step": 1065, + "valid_targets_mean": 3840.6, + "valid_targets_min": 1087 + }, + { + "epoch": 1.9971988795518207, + "grad_norm": 0.573059141181211, + "learning_rate": 3.598340665983539e-05, + "loss": 0.2965, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15008684992790222, + "step": 1070, + "valid_targets_mean": 3753.0, + "valid_targets_min": 1399 + }, + { + "epoch": 2.0056022408963585, + "grad_norm": 0.6289964327720287, + "learning_rate": 3.592729706473333e-05, + "loss": 0.2878, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1305919587612152, + "step": 1075, + "valid_targets_mean": 3301.4, + "valid_targets_min": 1446 + }, + { + "epoch": 2.014939309056956, + "grad_norm": 0.5231920660756277, + "learning_rate": 3.587084266255108e-05, + "loss": 0.2608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12542250752449036, + "step": 1080, + "valid_targets_mean": 5202.9, + "valid_targets_min": 2047 + }, + { + "epoch": 2.0242763772175536, + "grad_norm": 0.5409801331264958, + "learning_rate": 3.581404467545946e-05, + "loss": 0.2683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1235833466053009, + "step": 1085, + "valid_targets_mean": 4475.0, + "valid_targets_min": 203 + }, + { + "epoch": 2.033613445378151, + "grad_norm": 0.609880002055254, + "learning_rate": 3.575690433306748e-05, + "loss": 0.2769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17490249872207642, + "step": 1090, + "valid_targets_mean": 4918.8, + "valid_targets_min": 1528 + }, + { + "epoch": 2.0429505135387487, + "grad_norm": 0.6008829134146442, + "learning_rate": 3.5699422872395735e-05, + "loss": 0.2693, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11877821385860443, + "step": 1095, + "valid_targets_mean": 3280.1, + "valid_targets_min": 515 + }, + { + "epoch": 2.052287581699346, + "grad_norm": 0.5466477755989211, + "learning_rate": 3.564160153784965e-05, + "loss": 0.2571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1493881642818451, + "step": 1100, + "valid_targets_mean": 5209.1, + "valid_targets_min": 2266 + }, + { + "epoch": 2.0616246498599438, + "grad_norm": 0.50686139389742, + "learning_rate": 3.5583441581192484e-05, + "loss": 0.2672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08935274183750153, + "step": 1105, + "valid_targets_mean": 3966.4, + "valid_targets_min": 554 + }, + { + "epoch": 2.0709617180205417, + "grad_norm": 0.5725707820303284, + "learning_rate": 3.5524944261518295e-05, + "loss": 0.2745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16849642992019653, + "step": 1110, + "valid_targets_mean": 5336.0, + "valid_targets_min": 2093 + }, + { + "epoch": 2.0802987861811393, + "grad_norm": 0.6405960945631906, + "learning_rate": 3.546611084522462e-05, + "loss": 0.2661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12590524554252625, + "step": 1115, + "valid_targets_mean": 3358.5, + "valid_targets_min": 428 + }, + { + "epoch": 2.089635854341737, + "grad_norm": 0.5761860508604965, + "learning_rate": 3.540694260598512e-05, + "loss": 0.2721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13758838176727295, + "step": 1120, + "valid_targets_mean": 4280.8, + "valid_targets_min": 1107 + }, + { + "epoch": 2.0989729225023344, + "grad_norm": 0.6013804140127574, + "learning_rate": 3.5347440824721946e-05, + "loss": 0.2641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12691861391067505, + "step": 1125, + "valid_targets_mean": 3730.1, + "valid_targets_min": 1301 + }, + { + "epoch": 2.108309990662932, + "grad_norm": 0.6178455505597555, + "learning_rate": 3.5287606789578046e-05, + "loss": 0.2772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1531383991241455, + "step": 1130, + "valid_targets_mean": 4195.9, + "valid_targets_min": 222 + }, + { + "epoch": 2.1176470588235294, + "grad_norm": 0.6290575811801294, + "learning_rate": 3.522744179588928e-05, + "loss": 0.2703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1344100683927536, + "step": 1135, + "valid_targets_mean": 3293.2, + "valid_targets_min": 1024 + }, + { + "epoch": 2.126984126984127, + "grad_norm": 0.6683870332783652, + "learning_rate": 3.516694714615637e-05, + "loss": 0.2688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1465536653995514, + "step": 1140, + "valid_targets_mean": 3825.9, + "valid_targets_min": 1968 + }, + { + "epoch": 2.1363211951447245, + "grad_norm": 0.5344253224229382, + "learning_rate": 3.510612415001668e-05, + "loss": 0.2627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08985987305641174, + "step": 1145, + "valid_targets_mean": 3614.0, + "valid_targets_min": 232 + }, + { + "epoch": 2.145658263305322, + "grad_norm": 0.5867129650314131, + "learning_rate": 3.50449741242159e-05, + "loss": 0.2724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11107715964317322, + "step": 1150, + "valid_targets_mean": 3554.8, + "valid_targets_min": 791 + }, + { + "epoch": 2.1549953314659196, + "grad_norm": 0.5615155356317572, + "learning_rate": 3.4983498392579526e-05, + "loss": 0.2668, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11732366681098938, + "step": 1155, + "valid_targets_mean": 3986.0, + "valid_targets_min": 1950 + }, + { + "epoch": 2.164332399626517, + "grad_norm": 0.6549790988414723, + "learning_rate": 3.492169828598419e-05, + "loss": 0.2853, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1383931040763855, + "step": 1160, + "valid_targets_mean": 3551.4, + "valid_targets_min": 483 + }, + { + "epoch": 2.1736694677871147, + "grad_norm": 0.6079363281954829, + "learning_rate": 3.485957514232886e-05, + "loss": 0.2813, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15492889285087585, + "step": 1165, + "valid_targets_mean": 5034.4, + "valid_targets_min": 1164 + }, + { + "epoch": 2.183006535947712, + "grad_norm": 0.6222451263429442, + "learning_rate": 3.479713030650587e-05, + "loss": 0.262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11601906269788742, + "step": 1170, + "valid_targets_mean": 3689.1, + "valid_targets_min": 516 + }, + { + "epoch": 2.19234360410831, + "grad_norm": 0.6057986109999919, + "learning_rate": 3.4734365130371816e-05, + "loss": 0.2661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15222899615764618, + "step": 1175, + "valid_targets_mean": 4421.2, + "valid_targets_min": 2433 + }, + { + "epoch": 2.2016806722689077, + "grad_norm": 0.5492745940219825, + "learning_rate": 3.467128097271828e-05, + "loss": 0.2637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12324671447277069, + "step": 1180, + "valid_targets_mean": 4348.4, + "valid_targets_min": 1707 + }, + { + "epoch": 2.2110177404295053, + "grad_norm": 0.5614287009351661, + "learning_rate": 3.46078791992424e-05, + "loss": 0.2681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13080158829689026, + "step": 1185, + "valid_targets_mean": 4493.4, + "valid_targets_min": 2640 + }, + { + "epoch": 2.220354808590103, + "grad_norm": 0.5356133602064118, + "learning_rate": 3.454416118251734e-05, + "loss": 0.2649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10024262219667435, + "step": 1190, + "valid_targets_mean": 4154.4, + "valid_targets_min": 1840 + }, + { + "epoch": 2.2296918767507004, + "grad_norm": 0.530818014918039, + "learning_rate": 3.448012830196255e-05, + "loss": 0.27, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1275888979434967, + "step": 1195, + "valid_targets_mean": 5011.9, + "valid_targets_min": 1681 + }, + { + "epoch": 2.239028944911298, + "grad_norm": 0.6695022662936407, + "learning_rate": 3.441578194381389e-05, + "loss": 0.2531, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14178727567195892, + "step": 1200, + "valid_targets_mean": 4290.0, + "valid_targets_min": 1450 + }, + { + "epoch": 2.2483660130718954, + "grad_norm": 0.5215240650701132, + "learning_rate": 3.435112350109367e-05, + "loss": 0.2611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16548776626586914, + "step": 1205, + "valid_targets_mean": 6759.2, + "valid_targets_min": 331 + }, + { + "epoch": 2.257703081232493, + "grad_norm": 0.5886934744684844, + "learning_rate": 3.428615437358043e-05, + "loss": 0.2711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1632174253463745, + "step": 1210, + "valid_targets_mean": 4917.8, + "valid_targets_min": 1837 + }, + { + "epoch": 2.2670401493930905, + "grad_norm": 0.4414118912700557, + "learning_rate": 3.422087596777869e-05, + "loss": 0.2618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09914036095142365, + "step": 1215, + "valid_targets_mean": 5580.4, + "valid_targets_min": 1516 + }, + { + "epoch": 2.276377217553688, + "grad_norm": 0.5402906608394005, + "learning_rate": 3.415528969688846e-05, + "loss": 0.2603, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12900175154209137, + "step": 1220, + "valid_targets_mean": 5028.2, + "valid_targets_min": 1054 + }, + { + "epoch": 2.2857142857142856, + "grad_norm": 0.5956219621067834, + "learning_rate": 3.408939698077468e-05, + "loss": 0.2729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12066184729337692, + "step": 1225, + "valid_targets_mean": 4252.5, + "valid_targets_min": 661 + }, + { + "epoch": 2.295051353874883, + "grad_norm": 0.6355456182390576, + "learning_rate": 3.402319924593645e-05, + "loss": 0.2798, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16210763156414032, + "step": 1230, + "valid_targets_mean": 4088.9, + "valid_targets_min": 1935 + }, + { + "epoch": 2.3043884220354807, + "grad_norm": 0.5463725629994939, + "learning_rate": 3.395669792547618e-05, + "loss": 0.2568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13191089034080505, + "step": 1235, + "valid_targets_mean": 5007.2, + "valid_targets_min": 1735 + }, + { + "epoch": 2.313725490196078, + "grad_norm": 0.6404265650279691, + "learning_rate": 3.388989445906853e-05, + "loss": 0.2731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14546075463294983, + "step": 1240, + "valid_targets_mean": 3618.6, + "valid_targets_min": 440 + }, + { + "epoch": 2.323062558356676, + "grad_norm": 0.581315713946733, + "learning_rate": 3.382279029292928e-05, + "loss": 0.2697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11508671939373016, + "step": 1245, + "valid_targets_mean": 4513.2, + "valid_targets_min": 1893 + }, + { + "epoch": 2.3323996265172737, + "grad_norm": 0.5470553866078969, + "learning_rate": 3.3755386879783985e-05, + "loss": 0.2605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1427452266216278, + "step": 1250, + "valid_targets_mean": 5862.8, + "valid_targets_min": 1267 + }, + { + "epoch": 2.3417366946778713, + "grad_norm": 0.7064119978021407, + "learning_rate": 3.368768567883655e-05, + "loss": 0.277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13777950406074524, + "step": 1255, + "valid_targets_mean": 3830.6, + "valid_targets_min": 1241 + }, + { + "epoch": 2.351073762838469, + "grad_norm": 0.5824763559845575, + "learning_rate": 3.3619688155737646e-05, + "loss": 0.2619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15295156836509705, + "step": 1260, + "valid_targets_mean": 4753.1, + "valid_targets_min": 2545 + }, + { + "epoch": 2.3604108309990663, + "grad_norm": 0.6407428222250979, + "learning_rate": 3.355139578255294e-05, + "loss": 0.2708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10716196894645691, + "step": 1265, + "valid_targets_mean": 2856.0, + "valid_targets_min": 1298 + }, + { + "epoch": 2.369747899159664, + "grad_norm": 0.5868947845533452, + "learning_rate": 3.348281003773127e-05, + "loss": 0.2762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11341151595115662, + "step": 1270, + "valid_targets_mean": 3961.1, + "valid_targets_min": 1420 + }, + { + "epoch": 2.3790849673202614, + "grad_norm": 0.6062798859864545, + "learning_rate": 3.3413932406072626e-05, + "loss": 0.264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12417541444301605, + "step": 1275, + "valid_targets_mean": 4213.8, + "valid_targets_min": 1719 + }, + { + "epoch": 2.388422035480859, + "grad_norm": 0.5696685234669197, + "learning_rate": 3.3344764378695984e-05, + "loss": 0.2701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1483900249004364, + "step": 1280, + "valid_targets_mean": 4631.0, + "valid_targets_min": 1154 + }, + { + "epoch": 2.3977591036414565, + "grad_norm": 0.6331391712793214, + "learning_rate": 3.3275307453007066e-05, + "loss": 0.26, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14174684882164001, + "step": 1285, + "valid_targets_mean": 4766.0, + "valid_targets_min": 3742 + }, + { + "epoch": 2.407096171802054, + "grad_norm": 0.5888826438833322, + "learning_rate": 3.3205563132665876e-05, + "loss": 0.2601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15704655647277832, + "step": 1290, + "valid_targets_mean": 5314.2, + "valid_targets_min": 2938 + }, + { + "epoch": 2.4164332399626516, + "grad_norm": 0.5587068861565072, + "learning_rate": 3.313553292755419e-05, + "loss": 0.2681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16655419766902924, + "step": 1295, + "valid_targets_mean": 4885.5, + "valid_targets_min": 2559 + }, + { + "epoch": 2.425770308123249, + "grad_norm": 0.5919314398880574, + "learning_rate": 3.306521835374284e-05, + "loss": 0.2633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12082069367170334, + "step": 1300, + "valid_targets_mean": 3966.6, + "valid_targets_min": 1690 + }, + { + "epoch": 2.435107376283847, + "grad_norm": 0.5297023493113947, + "learning_rate": 3.2994620933458896e-05, + "loss": 0.2712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10435240715742111, + "step": 1305, + "valid_targets_mean": 4163.2, + "valid_targets_min": 241 + }, + { + "epoch": 2.4444444444444446, + "grad_norm": 0.5803805765552456, + "learning_rate": 3.292374219505274e-05, + "loss": 0.2783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13058415055274963, + "step": 1310, + "valid_targets_mean": 4168.2, + "valid_targets_min": 850 + }, + { + "epoch": 2.453781512605042, + "grad_norm": 0.6202674190299635, + "learning_rate": 3.285258367296496e-05, + "loss": 0.271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10562821477651596, + "step": 1315, + "valid_targets_mean": 3027.5, + "valid_targets_min": 1486 + }, + { + "epoch": 2.4631185807656397, + "grad_norm": 0.5276182016222534, + "learning_rate": 3.278114690769307e-05, + "loss": 0.2732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13356448709964752, + "step": 1320, + "valid_targets_mean": 5476.2, + "valid_targets_min": 2643 + }, + { + "epoch": 2.4724556489262373, + "grad_norm": 0.5194661077366948, + "learning_rate": 3.2709433445758304e-05, + "loss": 0.2569, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12140780687332153, + "step": 1325, + "valid_targets_mean": 4183.4, + "valid_targets_min": 205 + }, + { + "epoch": 2.481792717086835, + "grad_norm": 0.5631779014342798, + "learning_rate": 3.263744483967198e-05, + "loss": 0.2588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13764506578445435, + "step": 1330, + "valid_targets_mean": 3985.6, + "valid_targets_min": 497 + }, + { + "epoch": 2.4911297852474323, + "grad_norm": 0.5853873662965152, + "learning_rate": 3.2565182647902015e-05, + "loss": 0.2771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1506347954273224, + "step": 1335, + "valid_targets_mean": 5184.2, + "valid_targets_min": 2202 + }, + { + "epoch": 2.50046685340803, + "grad_norm": 0.5993876637608287, + "learning_rate": 3.249264843483909e-05, + "loss": 0.253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13839717209339142, + "step": 1340, + "valid_targets_mean": 4355.5, + "valid_targets_min": 1497 + }, + { + "epoch": 2.5098039215686274, + "grad_norm": 0.581722095743272, + "learning_rate": 3.2419843770762836e-05, + "loss": 0.2663, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12377430498600006, + "step": 1345, + "valid_targets_mean": 3709.0, + "valid_targets_min": 768 + }, + { + "epoch": 2.519140989729225, + "grad_norm": 0.5890419297370926, + "learning_rate": 3.2346770231807843e-05, + "loss": 0.2801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1415756344795227, + "step": 1350, + "valid_targets_mean": 4347.5, + "valid_targets_min": 1760 + }, + { + "epoch": 2.5284780578898225, + "grad_norm": 0.597281488853631, + "learning_rate": 3.227342939992951e-05, + "loss": 0.2663, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12997502088546753, + "step": 1355, + "valid_targets_mean": 4392.5, + "valid_targets_min": 2317 + }, + { + "epoch": 2.53781512605042, + "grad_norm": 0.5774111499795511, + "learning_rate": 3.2199822862869803e-05, + "loss": 0.2597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1421101689338684, + "step": 1360, + "valid_targets_mean": 4617.8, + "valid_targets_min": 1783 + }, + { + "epoch": 2.5471521942110176, + "grad_norm": 0.8119552336865475, + "learning_rate": 3.212595221412291e-05, + "loss": 0.2704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16131576895713806, + "step": 1365, + "valid_targets_mean": 5005.5, + "valid_targets_min": 1622 + }, + { + "epoch": 2.556489262371615, + "grad_norm": 0.5531086682631178, + "learning_rate": 3.2051819052900706e-05, + "loss": 0.2623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11332675069570541, + "step": 1370, + "valid_targets_mean": 4319.1, + "valid_targets_min": 204 + }, + { + "epoch": 2.5658263305322127, + "grad_norm": 0.8943451686587378, + "learning_rate": 3.1977424984098154e-05, + "loss": 0.2739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11362797021865845, + "step": 1375, + "valid_targets_mean": 3739.2, + "valid_targets_min": 1011 + }, + { + "epoch": 2.57516339869281, + "grad_norm": 0.7034068033944679, + "learning_rate": 3.190277161825857e-05, + "loss": 0.2826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15570110082626343, + "step": 1380, + "valid_targets_mean": 5485.9, + "valid_targets_min": 433 + }, + { + "epoch": 2.584500466853408, + "grad_norm": 0.561839838399755, + "learning_rate": 3.182786057153871e-05, + "loss": 0.2609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14492382109165192, + "step": 1385, + "valid_targets_mean": 4810.6, + "valid_targets_min": 1831 + }, + { + "epoch": 2.5938375350140057, + "grad_norm": 0.480176321740602, + "learning_rate": 3.175269346567386e-05, + "loss": 0.2644, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1473981738090515, + "step": 1390, + "valid_targets_mean": 6373.2, + "valid_targets_min": 648 + }, + { + "epoch": 2.6031746031746033, + "grad_norm": 0.5225428339204804, + "learning_rate": 3.167727192794265e-05, + "loss": 0.2665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10643284022808075, + "step": 1395, + "valid_targets_mean": 4128.8, + "valid_targets_min": 1455 + }, + { + "epoch": 2.612511671335201, + "grad_norm": 0.5676318322464813, + "learning_rate": 3.1601597591131864e-05, + "loss": 0.2688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1595524549484253, + "step": 1400, + "valid_targets_mean": 4656.8, + "valid_targets_min": 453 + }, + { + "epoch": 2.6218487394957983, + "grad_norm": 0.5929981876587711, + "learning_rate": 3.152567209350109e-05, + "loss": 0.2668, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14102894067764282, + "step": 1405, + "valid_targets_mean": 4431.4, + "valid_targets_min": 2280 + }, + { + "epoch": 2.631185807656396, + "grad_norm": 0.561210991663388, + "learning_rate": 3.144949707874726e-05, + "loss": 0.2716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.127023845911026, + "step": 1410, + "valid_targets_mean": 4127.8, + "valid_targets_min": 1505 + }, + { + "epoch": 2.6405228758169934, + "grad_norm": 0.6404579648249683, + "learning_rate": 3.137307419596904e-05, + "loss": 0.2768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15716612339019775, + "step": 1415, + "valid_targets_mean": 4155.0, + "valid_targets_min": 2365 + }, + { + "epoch": 2.649859943977591, + "grad_norm": 0.6024651523559907, + "learning_rate": 3.129640509963114e-05, + "loss": 0.2625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11580339819192886, + "step": 1420, + "valid_targets_mean": 4471.1, + "valid_targets_min": 1300 + }, + { + "epoch": 2.6591970121381885, + "grad_norm": 0.5369076580087565, + "learning_rate": 3.121949144952853e-05, + "loss": 0.255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12121870368719101, + "step": 1425, + "valid_targets_mean": 4702.2, + "valid_targets_min": 1680 + }, + { + "epoch": 2.668534080298786, + "grad_norm": 0.6146134818117958, + "learning_rate": 3.1142334910750426e-05, + "loss": 0.2701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1513212025165558, + "step": 1430, + "valid_targets_mean": 4036.5, + "valid_targets_min": 653 + }, + { + "epoch": 2.677871148459384, + "grad_norm": 0.6885047901349887, + "learning_rate": 3.1064937153644366e-05, + "loss": 0.2671, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13335812091827393, + "step": 1435, + "valid_targets_mean": 3662.0, + "valid_targets_min": 1523 + }, + { + "epoch": 2.6872082166199815, + "grad_norm": 0.7120056109369662, + "learning_rate": 3.098729985377992e-05, + "loss": 0.2868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11324841529130936, + "step": 1440, + "valid_targets_mean": 3588.0, + "valid_targets_min": 747 + }, + { + "epoch": 2.696545284780579, + "grad_norm": 0.5746351136193364, + "learning_rate": 3.090942469191251e-05, + "loss": 0.2688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1382647007703781, + "step": 1445, + "valid_targets_mean": 4746.4, + "valid_targets_min": 532 + }, + { + "epoch": 2.7058823529411766, + "grad_norm": 0.565010395699294, + "learning_rate": 3.083131335394696e-05, + "loss": 0.278, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13708089292049408, + "step": 1450, + "valid_targets_mean": 4281.9, + "valid_targets_min": 1724 + }, + { + "epoch": 2.715219421101774, + "grad_norm": 0.5133206046558262, + "learning_rate": 3.075296753090105e-05, + "loss": 0.2847, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12062004208564758, + "step": 1455, + "valid_targets_mean": 5014.5, + "valid_targets_min": 1628 + }, + { + "epoch": 2.7245564892623717, + "grad_norm": 0.4852284847946437, + "learning_rate": 3.0674388918868876e-05, + "loss": 0.2772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14060911536216736, + "step": 1460, + "valid_targets_mean": 6529.0, + "valid_targets_min": 2829 + }, + { + "epoch": 2.7338935574229692, + "grad_norm": 0.543699291462708, + "learning_rate": 3.0595579218984124e-05, + "loss": 0.2752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17192405462265015, + "step": 1465, + "valid_targets_mean": 6859.8, + "valid_targets_min": 1978 + }, + { + "epoch": 2.743230625583567, + "grad_norm": 0.6120661929202282, + "learning_rate": 3.0516540137383287e-05, + "loss": 0.2641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13601776957511902, + "step": 1470, + "valid_targets_mean": 5242.5, + "valid_targets_min": 3359 + }, + { + "epoch": 2.7525676937441643, + "grad_norm": 0.5854927510764434, + "learning_rate": 3.0437273385168677e-05, + "loss": 0.2637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20222458243370056, + "step": 1475, + "valid_targets_mean": 5797.0, + "valid_targets_min": 449 + }, + { + "epoch": 2.761904761904762, + "grad_norm": 0.5420243217705215, + "learning_rate": 3.0357780678371413e-05, + "loss": 0.2707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09299954771995544, + "step": 1480, + "valid_targets_mean": 3598.1, + "valid_targets_min": 423 + }, + { + "epoch": 2.7712418300653594, + "grad_norm": 0.517783445063666, + "learning_rate": 3.027806373791426e-05, + "loss": 0.2715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1401902735233307, + "step": 1485, + "valid_targets_mean": 5828.1, + "valid_targets_min": 2318 + }, + { + "epoch": 2.780578898225957, + "grad_norm": 0.6251345115345666, + "learning_rate": 3.0198124289574378e-05, + "loss": 0.2595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11012600362300873, + "step": 1490, + "valid_targets_mean": 5007.6, + "valid_targets_min": 1406 + }, + { + "epoch": 2.7899159663865545, + "grad_norm": 0.5745401996108711, + "learning_rate": 3.011796406394596e-05, + "loss": 0.2686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14370499551296234, + "step": 1495, + "valid_targets_mean": 5085.8, + "valid_targets_min": 508 + }, + { + "epoch": 2.799253034547152, + "grad_norm": 0.6546130130502117, + "learning_rate": 3.0037584796402767e-05, + "loss": 0.2848, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13107800483703613, + "step": 1500, + "valid_targets_mean": 2866.9, + "valid_targets_min": 1399 + }, + { + "epoch": 2.8085901027077496, + "grad_norm": 0.6634236687112981, + "learning_rate": 2.9956988227060536e-05, + "loss": 0.2808, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15548290312290192, + "step": 1505, + "valid_targets_mean": 3386.6, + "valid_targets_min": 1208 + }, + { + "epoch": 2.817927170868347, + "grad_norm": 0.5619835417691904, + "learning_rate": 2.9876176100739368e-05, + "loss": 0.2645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15587303042411804, + "step": 1510, + "valid_targets_mean": 5078.5, + "valid_targets_min": 2105 + }, + { + "epoch": 2.8272642390289446, + "grad_norm": 0.619579347952498, + "learning_rate": 2.979515016692589e-05, + "loss": 0.2712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1440526843070984, + "step": 1515, + "valid_targets_mean": 4344.1, + "valid_targets_min": 853 + }, + { + "epoch": 2.8366013071895426, + "grad_norm": 0.5361220945720404, + "learning_rate": 2.9713912179735404e-05, + "loss": 0.277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13349281251430511, + "step": 1520, + "valid_targets_mean": 5412.4, + "valid_targets_min": 1415 + }, + { + "epoch": 2.84593837535014, + "grad_norm": 0.5953188307883882, + "learning_rate": 2.9632463897873935e-05, + "loss": 0.2738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1630636751651764, + "step": 1525, + "valid_targets_mean": 4778.8, + "valid_targets_min": 1646 + }, + { + "epoch": 2.8552754435107377, + "grad_norm": 0.5691625657581211, + "learning_rate": 2.9550807084600134e-05, + "loss": 0.261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12363427877426147, + "step": 1530, + "valid_targets_mean": 3846.9, + "valid_targets_min": 290 + }, + { + "epoch": 2.8646125116713352, + "grad_norm": 0.5612768054890396, + "learning_rate": 2.946894350768709e-05, + "loss": 0.2748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11463121324777603, + "step": 1535, + "valid_targets_mean": 3417.2, + "valid_targets_min": 1891 + }, + { + "epoch": 2.8739495798319328, + "grad_norm": 0.5719019207281497, + "learning_rate": 2.9386874939384103e-05, + "loss": 0.262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13431930541992188, + "step": 1540, + "valid_targets_mean": 4896.5, + "valid_targets_min": 2346 + }, + { + "epoch": 2.8832866479925303, + "grad_norm": 0.5846248282999258, + "learning_rate": 2.9304603156378283e-05, + "loss": 0.2667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15737298130989075, + "step": 1545, + "valid_targets_mean": 4441.0, + "valid_targets_min": 1745 + }, + { + "epoch": 2.892623716153128, + "grad_norm": 0.6275755897181784, + "learning_rate": 2.9222129939756104e-05, + "loss": 0.2567, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1219942718744278, + "step": 1550, + "valid_targets_mean": 3536.1, + "valid_targets_min": 867 + }, + { + "epoch": 2.9019607843137254, + "grad_norm": 0.5417491982690265, + "learning_rate": 2.913945707496482e-05, + "loss": 0.2687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11118357628583908, + "step": 1555, + "valid_targets_mean": 3864.8, + "valid_targets_min": 1204 + }, + { + "epoch": 2.911297852474323, + "grad_norm": 0.5242166550352921, + "learning_rate": 2.9056586351773854e-05, + "loss": 0.2486, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13110053539276123, + "step": 1560, + "valid_targets_mean": 4668.1, + "valid_targets_min": 2157 + }, + { + "epoch": 2.9206349206349205, + "grad_norm": 0.5093086948788941, + "learning_rate": 2.8973519564236024e-05, + "loss": 0.2548, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14074693620204926, + "step": 1565, + "valid_targets_mean": 5207.4, + "valid_targets_min": 2182 + }, + { + "epoch": 2.9299719887955185, + "grad_norm": 0.7744852781999395, + "learning_rate": 2.8890258510648685e-05, + "loss": 0.2645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16393780708312988, + "step": 1570, + "valid_targets_mean": 2832.9, + "valid_targets_min": 975 + }, + { + "epoch": 2.939309056956116, + "grad_norm": 0.5743358177166721, + "learning_rate": 2.8806804993514852e-05, + "loss": 0.2672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08663859963417053, + "step": 1575, + "valid_targets_mean": 3032.6, + "valid_targets_min": 515 + }, + { + "epoch": 2.9486461251167135, + "grad_norm": 0.6712483409675128, + "learning_rate": 2.8723160819504133e-05, + "loss": 0.2806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12447357922792435, + "step": 1580, + "valid_targets_mean": 2699.6, + "valid_targets_min": 1374 + }, + { + "epoch": 2.957983193277311, + "grad_norm": 0.47834926746370676, + "learning_rate": 2.8639327799413635e-05, + "loss": 0.2672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12512102723121643, + "step": 1585, + "valid_targets_mean": 4781.4, + "valid_targets_min": 526 + }, + { + "epoch": 2.9673202614379086, + "grad_norm": 0.5476573544662984, + "learning_rate": 2.8555307748128752e-05, + "loss": 0.2632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1451941877603531, + "step": 1590, + "valid_targets_mean": 5267.5, + "valid_targets_min": 1789 + }, + { + "epoch": 2.976657329598506, + "grad_norm": 0.5282488067063034, + "learning_rate": 2.847110248458389e-05, + "loss": 0.2555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09722214937210083, + "step": 1595, + "valid_targets_mean": 3873.1, + "valid_targets_min": 209 + }, + { + "epoch": 2.9859943977591037, + "grad_norm": 0.5752619546721861, + "learning_rate": 2.838671383172307e-05, + "loss": 0.2637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15933603048324585, + "step": 1600, + "valid_targets_mean": 5433.6, + "valid_targets_min": 3351 + }, + { + "epoch": 2.9953314659197012, + "grad_norm": 0.47585602203548755, + "learning_rate": 2.830214361646049e-05, + "loss": 0.2621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1477259248495102, + "step": 1605, + "valid_targets_mean": 5640.6, + "valid_targets_min": 3543 + }, + { + "epoch": 3.003734827264239, + "grad_norm": 0.5398912037596993, + "learning_rate": 2.8217393669640944e-05, + "loss": 0.2486, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11716713011264801, + "step": 1610, + "valid_targets_mean": 4845.1, + "valid_targets_min": 1434 + }, + { + "epoch": 3.0130718954248366, + "grad_norm": 0.5766764309674295, + "learning_rate": 2.8132465826000207e-05, + "loss": 0.2355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11776542663574219, + "step": 1615, + "valid_targets_mean": 4713.9, + "valid_targets_min": 1467 + }, + { + "epoch": 3.022408963585434, + "grad_norm": 0.6030627713426905, + "learning_rate": 2.8047361924125307e-05, + "loss": 0.2374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1317153126001358, + "step": 1620, + "valid_targets_mean": 6159.8, + "valid_targets_min": 1814 + }, + { + "epoch": 3.0317460317460316, + "grad_norm": 0.5845245017388365, + "learning_rate": 2.7962083806414727e-05, + "loss": 0.2387, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1345043033361435, + "step": 1625, + "valid_targets_mean": 4951.0, + "valid_targets_min": 496 + }, + { + "epoch": 3.041083099906629, + "grad_norm": 0.6072312135826599, + "learning_rate": 2.7876633319038514e-05, + "loss": 0.2428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16239473223686218, + "step": 1630, + "valid_targets_mean": 5415.2, + "valid_targets_min": 823 + }, + { + "epoch": 3.0504201680672267, + "grad_norm": 0.675497155200832, + "learning_rate": 2.779101231189831e-05, + "loss": 0.2461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1600303053855896, + "step": 1635, + "valid_targets_mean": 3596.4, + "valid_targets_min": 2040 + }, + { + "epoch": 3.0597572362278243, + "grad_norm": 0.5503367276430657, + "learning_rate": 2.7705222638587308e-05, + "loss": 0.2458, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13220509886741638, + "step": 1640, + "valid_targets_mean": 5421.6, + "valid_targets_min": 1514 + }, + { + "epoch": 3.0690943043884222, + "grad_norm": 0.5825065808445947, + "learning_rate": 2.761926615635014e-05, + "loss": 0.2429, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08459004759788513, + "step": 1645, + "valid_targets_mean": 3891.6, + "valid_targets_min": 1181 + }, + { + "epoch": 3.0784313725490198, + "grad_norm": 0.6088835826267037, + "learning_rate": 2.7533144726042632e-05, + "loss": 0.2384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0877801924943924, + "step": 1650, + "valid_targets_mean": 3279.2, + "valid_targets_min": 1770 + }, + { + "epoch": 3.0877684407096173, + "grad_norm": 0.5485442871928295, + "learning_rate": 2.7446860212091548e-05, + "loss": 0.2518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13190056383609772, + "step": 1655, + "valid_targets_mean": 5794.4, + "valid_targets_min": 2271 + }, + { + "epoch": 3.097105508870215, + "grad_norm": 0.5850192131351082, + "learning_rate": 2.736041448245422e-05, + "loss": 0.2507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13561426103115082, + "step": 1660, + "valid_targets_mean": 5983.4, + "valid_targets_min": 4543 + }, + { + "epoch": 3.1064425770308124, + "grad_norm": 0.7755597437121091, + "learning_rate": 2.7273809408578107e-05, + "loss": 0.2423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1218942254781723, + "step": 1665, + "valid_targets_mean": 2428.4, + "valid_targets_min": 433 + }, + { + "epoch": 3.11577964519141, + "grad_norm": 0.6970297807180942, + "learning_rate": 2.7187046865360285e-05, + "loss": 0.2338, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14261086285114288, + "step": 1670, + "valid_targets_mean": 4966.0, + "valid_targets_min": 430 + }, + { + "epoch": 3.1251167133520075, + "grad_norm": 0.672739736732118, + "learning_rate": 2.7100128731106847e-05, + "loss": 0.244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11493118852376938, + "step": 1675, + "valid_targets_mean": 3379.4, + "valid_targets_min": 642 + }, + { + "epoch": 3.134453781512605, + "grad_norm": 0.5543050879433515, + "learning_rate": 2.7013056887492255e-05, + "loss": 0.2311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11292560398578644, + "step": 1680, + "valid_targets_mean": 6055.4, + "valid_targets_min": 1208 + }, + { + "epoch": 3.1437908496732025, + "grad_norm": 0.5741612988813815, + "learning_rate": 2.6925833219518596e-05, + "loss": 0.2431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13947980105876923, + "step": 1685, + "valid_targets_mean": 5237.2, + "valid_targets_min": 1708 + }, + { + "epoch": 3.1531279178338, + "grad_norm": 0.5311849133873362, + "learning_rate": 2.6838459615474758e-05, + "loss": 0.2369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11200509965419769, + "step": 1690, + "valid_targets_mean": 5533.0, + "valid_targets_min": 503 + }, + { + "epoch": 3.1624649859943976, + "grad_norm": 0.9865071205824503, + "learning_rate": 2.6750937966895603e-05, + "loss": 0.2335, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13982805609703064, + "step": 1695, + "valid_targets_mean": 4516.1, + "valid_targets_min": 339 + }, + { + "epoch": 3.171802054154995, + "grad_norm": 0.8859672792182297, + "learning_rate": 2.666327016852094e-05, + "loss": 0.2444, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1140211746096611, + "step": 1700, + "valid_targets_mean": 3525.8, + "valid_targets_min": 392 + }, + { + "epoch": 3.1811391223155927, + "grad_norm": 0.6263300961388557, + "learning_rate": 2.6575458118254576e-05, + "loss": 0.2421, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10092367231845856, + "step": 1705, + "valid_targets_mean": 3049.9, + "valid_targets_min": 1336 + }, + { + "epoch": 3.1904761904761907, + "grad_norm": 0.6106175652832359, + "learning_rate": 2.648750371712319e-05, + "loss": 0.2438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12995459139347076, + "step": 1710, + "valid_targets_mean": 4573.6, + "valid_targets_min": 1655 + }, + { + "epoch": 3.1998132586367882, + "grad_norm": 0.5485694993278112, + "learning_rate": 2.6399408869235187e-05, + "loss": 0.2391, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11681695282459259, + "step": 1715, + "valid_targets_mean": 4932.2, + "valid_targets_min": 1485 + }, + { + "epoch": 3.2091503267973858, + "grad_norm": 0.5589033213756798, + "learning_rate": 2.6311175481739473e-05, + "loss": 0.2369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11145725846290588, + "step": 1720, + "valid_targets_mean": 4345.5, + "valid_targets_min": 1293 + }, + { + "epoch": 3.2184873949579833, + "grad_norm": 0.5222131755759775, + "learning_rate": 2.6222805464784196e-05, + "loss": 0.2399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11845865100622177, + "step": 1725, + "valid_targets_mean": 5097.6, + "valid_targets_min": 756 + }, + { + "epoch": 3.227824463118581, + "grad_norm": 0.5249769678703706, + "learning_rate": 2.6134300731475337e-05, + "loss": 0.2276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09829113632440567, + "step": 1730, + "valid_targets_mean": 4702.0, + "valid_targets_min": 1522 + }, + { + "epoch": 3.2371615312791784, + "grad_norm": 0.5631084630632243, + "learning_rate": 2.6045663197835352e-05, + "loss": 0.2499, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11925292760133743, + "step": 1735, + "valid_targets_mean": 4615.8, + "valid_targets_min": 599 + }, + { + "epoch": 3.246498599439776, + "grad_norm": 0.5647599922575897, + "learning_rate": 2.5956894782761638e-05, + "loss": 0.2425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11370737105607986, + "step": 1740, + "valid_targets_mean": 4645.5, + "valid_targets_min": 1680 + }, + { + "epoch": 3.2558356676003735, + "grad_norm": 0.7100968312554239, + "learning_rate": 2.5867997407985063e-05, + "loss": 0.229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16735035181045532, + "step": 1745, + "valid_targets_mean": 3725.0, + "valid_targets_min": 2434 + }, + { + "epoch": 3.265172735760971, + "grad_norm": 0.6423983960802672, + "learning_rate": 2.577897299802828e-05, + "loss": 0.2404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.135835200548172, + "step": 1750, + "valid_targets_mean": 3751.8, + "valid_targets_min": 1909 + }, + { + "epoch": 3.2745098039215685, + "grad_norm": 0.5499189558493762, + "learning_rate": 2.568982348016412e-05, + "loss": 0.2345, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10953214764595032, + "step": 1755, + "valid_targets_mean": 4746.4, + "valid_targets_min": 1840 + }, + { + "epoch": 3.283846872082166, + "grad_norm": 0.6106938511075815, + "learning_rate": 2.560055078437385e-05, + "loss": 0.2344, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10772567242383957, + "step": 1760, + "valid_targets_mean": 3519.0, + "valid_targets_min": 1247 + }, + { + "epoch": 3.2931839402427636, + "grad_norm": 0.5795901272564159, + "learning_rate": 2.5511156843305393e-05, + "loss": 0.2299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11280179768800735, + "step": 1765, + "valid_targets_mean": 4104.2, + "valid_targets_min": 2258 + }, + { + "epoch": 3.302521008403361, + "grad_norm": 0.5835602718260745, + "learning_rate": 2.542164359223148e-05, + "loss": 0.2389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.121507927775383, + "step": 1770, + "valid_targets_mean": 5624.1, + "valid_targets_min": 1658 + }, + { + "epoch": 3.3118580765639587, + "grad_norm": 0.6855425367800956, + "learning_rate": 2.5332012969007786e-05, + "loss": 0.2484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17336659133434296, + "step": 1775, + "valid_targets_mean": 4455.8, + "valid_targets_min": 2271 + }, + { + "epoch": 3.3211951447245567, + "grad_norm": 0.6263317502982045, + "learning_rate": 2.5242266914030916e-05, + "loss": 0.2489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12615643441677094, + "step": 1780, + "valid_targets_mean": 5213.9, + "valid_targets_min": 734 + }, + { + "epoch": 3.330532212885154, + "grad_norm": 0.5697711366177932, + "learning_rate": 2.5152407370196467e-05, + "loss": 0.2461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0967068001627922, + "step": 1785, + "valid_targets_mean": 4048.8, + "valid_targets_min": 1911 + }, + { + "epoch": 3.3398692810457518, + "grad_norm": 0.6981173172501789, + "learning_rate": 2.5062436282856926e-05, + "loss": 0.2469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14307191967964172, + "step": 1790, + "valid_targets_mean": 4148.6, + "valid_targets_min": 1248 + }, + { + "epoch": 3.3492063492063493, + "grad_norm": 0.5694652488475922, + "learning_rate": 2.4972355599779566e-05, + "loss": 0.2448, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10676980763673782, + "step": 1795, + "valid_targets_mean": 3750.2, + "valid_targets_min": 795 + }, + { + "epoch": 3.358543417366947, + "grad_norm": 0.6326382316828768, + "learning_rate": 2.4882167271104272e-05, + "loss": 0.2399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10296850651502609, + "step": 1800, + "valid_targets_mean": 4194.2, + "valid_targets_min": 1691 + }, + { + "epoch": 3.3678804855275444, + "grad_norm": 0.5852380803850895, + "learning_rate": 2.4791873249301327e-05, + "loss": 0.2281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10884372889995575, + "step": 1805, + "valid_targets_mean": 4257.4, + "valid_targets_min": 1422 + }, + { + "epoch": 3.377217553688142, + "grad_norm": 0.6263489835715814, + "learning_rate": 2.4701475489129152e-05, + "loss": 0.2464, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12474148720502853, + "step": 1810, + "valid_targets_mean": 3966.6, + "valid_targets_min": 661 + }, + { + "epoch": 3.3865546218487395, + "grad_norm": 0.6788338443012708, + "learning_rate": 2.4610975947591988e-05, + "loss": 0.2573, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13089723885059357, + "step": 1815, + "valid_targets_mean": 3576.6, + "valid_targets_min": 353 + }, + { + "epoch": 3.395891690009337, + "grad_norm": 0.6223360893719095, + "learning_rate": 2.452037658389749e-05, + "loss": 0.2483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12548811733722687, + "step": 1820, + "valid_targets_mean": 4066.9, + "valid_targets_min": 1108 + }, + { + "epoch": 3.4052287581699345, + "grad_norm": 0.5272345815185357, + "learning_rate": 2.4429679359414386e-05, + "loss": 0.2491, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11924466490745544, + "step": 1825, + "valid_targets_mean": 4963.9, + "valid_targets_min": 1622 + }, + { + "epoch": 3.414565826330532, + "grad_norm": 0.5559131979744069, + "learning_rate": 2.433888623762994e-05, + "loss": 0.2435, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11159016191959381, + "step": 1830, + "valid_targets_mean": 4750.5, + "valid_targets_min": 1413 + }, + { + "epoch": 3.4239028944911296, + "grad_norm": 0.5811159174559087, + "learning_rate": 2.42479991841075e-05, + "loss": 0.2476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10369855165481567, + "step": 1835, + "valid_targets_mean": 3635.1, + "valid_targets_min": 2402 + }, + { + "epoch": 3.4332399626517276, + "grad_norm": 0.6649627571539759, + "learning_rate": 2.4157020166443913e-05, + "loss": 0.235, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10374829173088074, + "step": 1840, + "valid_targets_mean": 3024.5, + "valid_targets_min": 366 + }, + { + "epoch": 3.442577030812325, + "grad_norm": 1.013679798583159, + "learning_rate": 2.4065951154226948e-05, + "loss": 0.2403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12012753635644913, + "step": 1845, + "valid_targets_mean": 3298.2, + "valid_targets_min": 712 + }, + { + "epoch": 3.4519140989729227, + "grad_norm": 0.6053864928768478, + "learning_rate": 2.3974794118992652e-05, + "loss": 0.2469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14941412210464478, + "step": 1850, + "valid_targets_mean": 4688.5, + "valid_targets_min": 2392 + }, + { + "epoch": 3.46125116713352, + "grad_norm": 0.5931874155240943, + "learning_rate": 2.3883551034182647e-05, + "loss": 0.2437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13181057572364807, + "step": 1855, + "valid_targets_mean": 4924.1, + "valid_targets_min": 2825 + }, + { + "epoch": 3.4705882352941178, + "grad_norm": 0.6321540831360725, + "learning_rate": 2.3792223875101446e-05, + "loss": 0.2414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12288413941860199, + "step": 1860, + "valid_targets_mean": 3459.5, + "valid_targets_min": 181 + }, + { + "epoch": 3.4799253034547153, + "grad_norm": 0.5953892993471418, + "learning_rate": 2.3700814618873662e-05, + "loss": 0.2423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1339205652475357, + "step": 1865, + "valid_targets_mean": 4671.8, + "valid_targets_min": 1701 + }, + { + "epoch": 3.489262371615313, + "grad_norm": 0.613103560194598, + "learning_rate": 2.3609325244401212e-05, + "loss": 0.2483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13291889429092407, + "step": 1870, + "valid_targets_mean": 4060.0, + "valid_targets_min": 348 + }, + { + "epoch": 3.4985994397759104, + "grad_norm": 0.6417341802071617, + "learning_rate": 2.3517757732320484e-05, + "loss": 0.2364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09562905132770538, + "step": 1875, + "valid_targets_mean": 2939.5, + "valid_targets_min": 417 + }, + { + "epoch": 3.507936507936508, + "grad_norm": 0.5316666263742489, + "learning_rate": 2.342611406495946e-05, + "loss": 0.2437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11988166719675064, + "step": 1880, + "valid_targets_mean": 5158.9, + "valid_targets_min": 2603 + }, + { + "epoch": 3.5172735760971054, + "grad_norm": 0.5897777997066682, + "learning_rate": 2.333439622629476e-05, + "loss": 0.2346, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10294010490179062, + "step": 1885, + "valid_targets_mean": 3866.2, + "valid_targets_min": 2132 + }, + { + "epoch": 3.526610644257703, + "grad_norm": 0.5970997443779792, + "learning_rate": 2.3242606201908758e-05, + "loss": 0.2455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1178329586982727, + "step": 1890, + "valid_targets_mean": 3609.2, + "valid_targets_min": 2622 + }, + { + "epoch": 3.5359477124183005, + "grad_norm": 0.518085183783807, + "learning_rate": 2.3150745978946563e-05, + "loss": 0.2307, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0913931131362915, + "step": 1895, + "valid_targets_mean": 4489.0, + "valid_targets_min": 479 + }, + { + "epoch": 3.545284780578898, + "grad_norm": 0.59663246667053, + "learning_rate": 2.3058817546072976e-05, + "loss": 0.2446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10624435544013977, + "step": 1900, + "valid_targets_mean": 3351.0, + "valid_targets_min": 1057 + }, + { + "epoch": 3.5546218487394956, + "grad_norm": 0.5559632149597359, + "learning_rate": 2.2966822893429487e-05, + "loss": 0.2522, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1195775493979454, + "step": 1905, + "valid_targets_mean": 5033.8, + "valid_targets_min": 3245 + }, + { + "epoch": 3.563958916900093, + "grad_norm": 0.7081122701194221, + "learning_rate": 2.287476401259114e-05, + "loss": 0.2331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14587610960006714, + "step": 1910, + "valid_targets_mean": 4035.2, + "valid_targets_min": 453 + }, + { + "epoch": 3.5732959850606907, + "grad_norm": 0.6707186892776675, + "learning_rate": 2.278264289652347e-05, + "loss": 0.2344, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08934125304222107, + "step": 1915, + "valid_targets_mean": 3024.1, + "valid_targets_min": 252 + }, + { + "epoch": 3.5826330532212887, + "grad_norm": 0.5623806484413016, + "learning_rate": 2.269046153953931e-05, + "loss": 0.2432, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11895057559013367, + "step": 1920, + "valid_targets_mean": 4608.4, + "valid_targets_min": 1891 + }, + { + "epoch": 3.591970121381886, + "grad_norm": 0.5981527204412288, + "learning_rate": 2.2598221937255645e-05, + "loss": 0.2473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14081847667694092, + "step": 1925, + "valid_targets_mean": 4768.2, + "valid_targets_min": 3450 + }, + { + "epoch": 3.6013071895424837, + "grad_norm": 0.6546648660767088, + "learning_rate": 2.2505926086550404e-05, + "loss": 0.2355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11491870880126953, + "step": 1930, + "valid_targets_mean": 3720.8, + "valid_targets_min": 1445 + }, + { + "epoch": 3.6106442577030813, + "grad_norm": 0.5780853124496255, + "learning_rate": 2.2413575985519226e-05, + "loss": 0.2355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1022876426577568, + "step": 1935, + "valid_targets_mean": 3925.0, + "valid_targets_min": 853 + }, + { + "epoch": 3.619981325863679, + "grad_norm": 0.5718419321125848, + "learning_rate": 2.2321173633432202e-05, + "loss": 0.2409, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11140482127666473, + "step": 1940, + "valid_targets_mean": 3809.8, + "valid_targets_min": 2023 + }, + { + "epoch": 3.6293183940242764, + "grad_norm": 0.6423996100860025, + "learning_rate": 2.2228721030690603e-05, + "loss": 0.2328, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14068032801151276, + "step": 1945, + "valid_targets_mean": 4320.9, + "valid_targets_min": 746 + }, + { + "epoch": 3.638655462184874, + "grad_norm": 0.7051723945239639, + "learning_rate": 2.2136220178783557e-05, + "loss": 0.2335, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12804755568504333, + "step": 1950, + "valid_targets_mean": 3823.8, + "valid_targets_min": 1155 + }, + { + "epoch": 3.6479925303454714, + "grad_norm": 0.5837266799262242, + "learning_rate": 2.2043673080244744e-05, + "loss": 0.2512, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12217289209365845, + "step": 1955, + "valid_targets_mean": 4348.5, + "valid_targets_min": 1668 + }, + { + "epoch": 3.657329598506069, + "grad_norm": 0.5688108900126473, + "learning_rate": 2.1951081738609012e-05, + "loss": 0.2358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11772574484348297, + "step": 1960, + "valid_targets_mean": 4684.4, + "valid_targets_min": 375 + }, + { + "epoch": 3.6666666666666665, + "grad_norm": 0.6827475107634433, + "learning_rate": 2.1858448158369027e-05, + "loss": 0.236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1404980719089508, + "step": 1965, + "valid_targets_mean": 3966.1, + "valid_targets_min": 1585 + }, + { + "epoch": 3.6760037348272645, + "grad_norm": 0.50595598942101, + "learning_rate": 2.1765774344931877e-05, + "loss": 0.2385, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08946672827005386, + "step": 1970, + "valid_targets_mean": 6029.5, + "valid_targets_min": 1472 + }, + { + "epoch": 3.685340802987862, + "grad_norm": 0.5693686310960493, + "learning_rate": 2.167306230457564e-05, + "loss": 0.2404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12061534821987152, + "step": 1975, + "valid_targets_mean": 5528.8, + "valid_targets_min": 1385 + }, + { + "epoch": 3.6946778711484596, + "grad_norm": 0.5655807113580671, + "learning_rate": 2.1580314044405966e-05, + "loss": 0.2341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11793536692857742, + "step": 1980, + "valid_targets_mean": 4452.1, + "valid_targets_min": 377 + }, + { + "epoch": 3.704014939309057, + "grad_norm": 0.6525806788984181, + "learning_rate": 2.1487531572312626e-05, + "loss": 0.2531, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1193501278758049, + "step": 1985, + "valid_targets_mean": 3545.8, + "valid_targets_min": 1654 + }, + { + "epoch": 3.7133520074696547, + "grad_norm": 0.6566997526236068, + "learning_rate": 2.139471689692602e-05, + "loss": 0.245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08113911002874374, + "step": 1990, + "valid_targets_mean": 2655.5, + "valid_targets_min": 1110 + }, + { + "epoch": 3.722689075630252, + "grad_norm": 0.584817592881706, + "learning_rate": 2.130187202757375e-05, + "loss": 0.2347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0790875107049942, + "step": 1995, + "valid_targets_mean": 2707.4, + "valid_targets_min": 691 + }, + { + "epoch": 3.7320261437908497, + "grad_norm": 0.5916090106145291, + "learning_rate": 2.1208998974237036e-05, + "loss": 0.2403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1580054759979248, + "step": 2000, + "valid_targets_mean": 5653.5, + "valid_targets_min": 2859 + }, + { + "epoch": 3.7413632119514473, + "grad_norm": 0.584531652317962, + "learning_rate": 2.1116099747507274e-05, + "loss": 0.2417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12464003264904022, + "step": 2005, + "valid_targets_mean": 4105.1, + "valid_targets_min": 563 + }, + { + "epoch": 3.750700280112045, + "grad_norm": 0.5675336123022745, + "learning_rate": 2.1023176358542484e-05, + "loss": 0.2397, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14742407202720642, + "step": 2010, + "valid_targets_mean": 6388.1, + "valid_targets_min": 503 + }, + { + "epoch": 3.7600373482726424, + "grad_norm": 0.6564779383996031, + "learning_rate": 2.093023081902375e-05, + "loss": 0.2384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12806721031665802, + "step": 2015, + "valid_targets_mean": 4404.2, + "valid_targets_min": 1203 + }, + { + "epoch": 3.76937441643324, + "grad_norm": 0.6093246745021251, + "learning_rate": 2.083726514111171e-05, + "loss": 0.2328, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14893124997615814, + "step": 2020, + "valid_targets_mean": 4692.5, + "valid_targets_min": 1331 + }, + { + "epoch": 3.7787114845938374, + "grad_norm": 0.6010275646086412, + "learning_rate": 2.0744281337402966e-05, + "loss": 0.241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11711867898702621, + "step": 2025, + "valid_targets_mean": 4133.2, + "valid_targets_min": 567 + }, + { + "epoch": 3.788048552754435, + "grad_norm": 0.6254091087725708, + "learning_rate": 2.0651281420886522e-05, + "loss": 0.2362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.156945139169693, + "step": 2030, + "valid_targets_mean": 5756.1, + "valid_targets_min": 1027 + }, + { + "epoch": 3.7973856209150325, + "grad_norm": 0.4755971776381176, + "learning_rate": 2.0558267404900206e-05, + "loss": 0.2322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09898661077022552, + "step": 2035, + "valid_targets_mean": 5337.5, + "valid_targets_min": 3640 + }, + { + "epoch": 3.80672268907563, + "grad_norm": 0.5272008250728979, + "learning_rate": 2.0465241303087078e-05, + "loss": 0.2437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12633109092712402, + "step": 2040, + "valid_targets_mean": 5891.8, + "valid_targets_min": 2522 + }, + { + "epoch": 3.8160597572362276, + "grad_norm": 0.5555456108232831, + "learning_rate": 2.0372205129351856e-05, + "loss": 0.2516, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10209444165229797, + "step": 2045, + "valid_targets_mean": 4578.4, + "valid_targets_min": 2178 + }, + { + "epoch": 3.825396825396825, + "grad_norm": 0.5456750249777528, + "learning_rate": 2.0279160897817283e-05, + "loss": 0.2374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09951109439134598, + "step": 2050, + "valid_targets_mean": 4526.6, + "valid_targets_min": 1750 + }, + { + "epoch": 3.834733893557423, + "grad_norm": 0.6249334972604333, + "learning_rate": 2.0186110622780558e-05, + "loss": 0.2396, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16213475167751312, + "step": 2055, + "valid_targets_mean": 4881.0, + "valid_targets_min": 2154 + }, + { + "epoch": 3.8440709617180207, + "grad_norm": 0.7701297563356613, + "learning_rate": 2.0093056318669718e-05, + "loss": 0.2442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14267413318157196, + "step": 2060, + "valid_targets_mean": 4028.6, + "valid_targets_min": 1547 + }, + { + "epoch": 3.853408029878618, + "grad_norm": 0.5564737858255343, + "learning_rate": 2e-05, + "loss": 0.2285, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11553055793046951, + "step": 2065, + "valid_targets_mean": 4303.8, + "valid_targets_min": 1394 + }, + { + "epoch": 3.8627450980392157, + "grad_norm": 0.6401725943924041, + "learning_rate": 1.990694368133029e-05, + "loss": 0.245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12974008917808533, + "step": 2070, + "valid_targets_mean": 3441.5, + "valid_targets_min": 1736 + }, + { + "epoch": 3.8720821661998133, + "grad_norm": 0.650066376350458, + "learning_rate": 1.981388937721945e-05, + "loss": 0.2365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13156390190124512, + "step": 2075, + "valid_targets_mean": 3623.9, + "valid_targets_min": 1984 + }, + { + "epoch": 3.881419234360411, + "grad_norm": 0.556052675899098, + "learning_rate": 1.9720839102182717e-05, + "loss": 0.2394, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11586624383926392, + "step": 2080, + "valid_targets_mean": 4444.6, + "valid_targets_min": 1628 + }, + { + "epoch": 3.8907563025210083, + "grad_norm": 0.5278242585185127, + "learning_rate": 1.962779487064815e-05, + "loss": 0.2333, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13123421370983124, + "step": 2085, + "valid_targets_mean": 6120.8, + "valid_targets_min": 2291 + }, + { + "epoch": 3.900093370681606, + "grad_norm": 0.5430574502893332, + "learning_rate": 1.9534758696912922e-05, + "loss": 0.254, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08740156888961792, + "step": 2090, + "valid_targets_mean": 3660.0, + "valid_targets_min": 1184 + }, + { + "epoch": 3.9094304388422034, + "grad_norm": 0.5233785228635964, + "learning_rate": 1.9441732595099804e-05, + "loss": 0.2387, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11623501777648926, + "step": 2095, + "valid_targets_mean": 6349.6, + "valid_targets_min": 3407 + }, + { + "epoch": 3.918767507002801, + "grad_norm": 0.5579659833044305, + "learning_rate": 1.9348718579113484e-05, + "loss": 0.2325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0989152193069458, + "step": 2100, + "valid_targets_mean": 3930.8, + "valid_targets_min": 1747 + }, + { + "epoch": 3.928104575163399, + "grad_norm": 0.47795494934753135, + "learning_rate": 1.9255718662597044e-05, + "loss": 0.2308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11924523860216141, + "step": 2105, + "valid_targets_mean": 7423.0, + "valid_targets_min": 2814 + }, + { + "epoch": 3.9374416433239965, + "grad_norm": 0.5964058310413168, + "learning_rate": 1.9162734858888293e-05, + "loss": 0.2317, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11583800613880157, + "step": 2110, + "valid_targets_mean": 4434.4, + "valid_targets_min": 1776 + }, + { + "epoch": 3.946778711484594, + "grad_norm": 0.5632681871307349, + "learning_rate": 1.9069769180976263e-05, + "loss": 0.2465, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10669685900211334, + "step": 2115, + "valid_targets_mean": 4153.2, + "valid_targets_min": 1028 + }, + { + "epoch": 3.9561157796451916, + "grad_norm": 0.6071383385257072, + "learning_rate": 1.8976823641457523e-05, + "loss": 0.2492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13491106033325195, + "step": 2120, + "valid_targets_mean": 4078.5, + "valid_targets_min": 2486 + }, + { + "epoch": 3.965452847805789, + "grad_norm": 0.5182635672345792, + "learning_rate": 1.888390025249273e-05, + "loss": 0.24, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.096836157143116, + "step": 2125, + "valid_targets_mean": 4932.5, + "valid_targets_min": 2885 + }, + { + "epoch": 3.9747899159663866, + "grad_norm": 0.6210056271972928, + "learning_rate": 1.879100102576297e-05, + "loss": 0.2267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09092339873313904, + "step": 2130, + "valid_targets_mean": 2959.8, + "valid_targets_min": 353 + }, + { + "epoch": 3.984126984126984, + "grad_norm": 0.5893191758318193, + "learning_rate": 1.8698127972426255e-05, + "loss": 0.2545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13991564512252808, + "step": 2135, + "valid_targets_mean": 5399.9, + "valid_targets_min": 1075 + }, + { + "epoch": 3.9934640522875817, + "grad_norm": 0.6356408458968209, + "learning_rate": 1.8605283103073982e-05, + "loss": 0.234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11381387710571289, + "step": 2140, + "valid_targets_mean": 2949.1, + "valid_targets_min": 290 + }, + { + "epoch": 4.0018674136321195, + "grad_norm": 0.6101063577088682, + "learning_rate": 1.851246842768738e-05, + "loss": 0.2265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1097736805677414, + "step": 2145, + "valid_targets_mean": 3994.2, + "valid_targets_min": 1079 + }, + { + "epoch": 4.011204481792717, + "grad_norm": 0.5438651343446009, + "learning_rate": 1.841968595559404e-05, + "loss": 0.2169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08228534460067749, + "step": 2150, + "valid_targets_mean": 3436.8, + "valid_targets_min": 554 + }, + { + "epoch": 4.020541549953315, + "grad_norm": 0.6420966367258667, + "learning_rate": 1.8326937695424367e-05, + "loss": 0.2114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10745055228471756, + "step": 2155, + "valid_targets_mean": 3367.0, + "valid_targets_min": 1016 + }, + { + "epoch": 4.029878618113912, + "grad_norm": 0.6221120735902738, + "learning_rate": 1.8234225655068133e-05, + "loss": 0.2131, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12365928292274475, + "step": 2160, + "valid_targets_mean": 4578.6, + "valid_targets_min": 1280 + }, + { + "epoch": 4.03921568627451, + "grad_norm": 0.6719779743102471, + "learning_rate": 1.8141551841630976e-05, + "loss": 0.2182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11186438053846359, + "step": 2165, + "valid_targets_mean": 4246.0, + "valid_targets_min": 1413 + }, + { + "epoch": 4.048552754435107, + "grad_norm": 0.6059233217044933, + "learning_rate": 1.804891826139099e-05, + "loss": 0.2161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10147538036108017, + "step": 2170, + "valid_targets_mean": 5024.4, + "valid_targets_min": 1764 + }, + { + "epoch": 4.057889822595705, + "grad_norm": 0.5455947779822439, + "learning_rate": 1.795632691975526e-05, + "loss": 0.213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11876384913921356, + "step": 2175, + "valid_targets_mean": 6419.9, + "valid_targets_min": 1787 + }, + { + "epoch": 4.067226890756302, + "grad_norm": 0.5458591523944937, + "learning_rate": 1.7863779821216443e-05, + "loss": 0.2104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11384214460849762, + "step": 2180, + "valid_targets_mean": 5930.6, + "valid_targets_min": 1130 + }, + { + "epoch": 4.0765639589169, + "grad_norm": 0.5512697112219231, + "learning_rate": 1.77712789693094e-05, + "loss": 0.212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07746922969818115, + "step": 2185, + "valid_targets_mean": 3537.0, + "valid_targets_min": 1484 + }, + { + "epoch": 4.085901027077497, + "grad_norm": 0.5650380984647764, + "learning_rate": 1.76788263665678e-05, + "loss": 0.2187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1155230849981308, + "step": 2190, + "valid_targets_mean": 5164.5, + "valid_targets_min": 1367 + }, + { + "epoch": 4.095238095238095, + "grad_norm": 0.6107246439033267, + "learning_rate": 1.758642401448078e-05, + "loss": 0.2087, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10828185826539993, + "step": 2195, + "valid_targets_mean": 3868.8, + "valid_targets_min": 376 + }, + { + "epoch": 4.104575163398692, + "grad_norm": 0.5806577542913125, + "learning_rate": 1.74940739134496e-05, + "loss": 0.2084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08665976673364639, + "step": 2200, + "valid_targets_mean": 4237.5, + "valid_targets_min": 1122 + }, + { + "epoch": 4.11391223155929, + "grad_norm": 0.49947986338209793, + "learning_rate": 1.7401778062744365e-05, + "loss": 0.2026, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06916231662034988, + "step": 2205, + "valid_targets_mean": 3722.6, + "valid_targets_min": 222 + }, + { + "epoch": 4.1232492997198875, + "grad_norm": 0.590970003406632, + "learning_rate": 1.7309538460460698e-05, + "loss": 0.2105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10126953572034836, + "step": 2210, + "valid_targets_mean": 4672.5, + "valid_targets_min": 623 + }, + { + "epoch": 4.132586367880486, + "grad_norm": 0.6671681134306882, + "learning_rate": 1.721735710347654e-05, + "loss": 0.2124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10292886197566986, + "step": 2215, + "valid_targets_mean": 3148.0, + "valid_targets_min": 377 + }, + { + "epoch": 4.1419234360410835, + "grad_norm": 0.648141092912023, + "learning_rate": 1.7125235987408867e-05, + "loss": 0.218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10966353863477707, + "step": 2220, + "valid_targets_mean": 3729.8, + "valid_targets_min": 1585 + }, + { + "epoch": 4.151260504201681, + "grad_norm": 0.6025905382928063, + "learning_rate": 1.7033177106570516e-05, + "loss": 0.2213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14169800281524658, + "step": 2225, + "valid_targets_mean": 5699.9, + "valid_targets_min": 430 + }, + { + "epoch": 4.160597572362279, + "grad_norm": 0.6110437833479743, + "learning_rate": 1.694118245392703e-05, + "loss": 0.2148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12441076338291168, + "step": 2230, + "valid_targets_mean": 4735.1, + "valid_targets_min": 2405 + }, + { + "epoch": 4.169934640522876, + "grad_norm": 0.7027858157363389, + "learning_rate": 1.684925402105344e-05, + "loss": 0.2253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1052807867527008, + "step": 2235, + "valid_targets_mean": 2760.4, + "valid_targets_min": 781 + }, + { + "epoch": 4.179271708683474, + "grad_norm": 0.5828122002380771, + "learning_rate": 1.6757393798091245e-05, + "loss": 0.2185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1290610432624817, + "step": 2240, + "valid_targets_mean": 4907.1, + "valid_targets_min": 1768 + }, + { + "epoch": 4.188608776844071, + "grad_norm": 0.6045780682331374, + "learning_rate": 1.6665603773705247e-05, + "loss": 0.2058, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11273029446601868, + "step": 2245, + "valid_targets_mean": 4280.1, + "valid_targets_min": 417 + }, + { + "epoch": 4.197945845004669, + "grad_norm": 0.6111060486842843, + "learning_rate": 1.6573885935040554e-05, + "loss": 0.2216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12327971309423447, + "step": 2250, + "valid_targets_mean": 4789.4, + "valid_targets_min": 3114 + }, + { + "epoch": 4.207282913165266, + "grad_norm": 0.5689152736554843, + "learning_rate": 1.648224226767952e-05, + "loss": 0.223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12698994576931, + "step": 2255, + "valid_targets_mean": 5418.8, + "valid_targets_min": 2521 + }, + { + "epoch": 4.216619981325864, + "grad_norm": 0.6030863200790362, + "learning_rate": 1.6390674755598798e-05, + "loss": 0.2083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07864175736904144, + "step": 2260, + "valid_targets_mean": 2836.8, + "valid_targets_min": 1011 + }, + { + "epoch": 4.225957049486461, + "grad_norm": 0.6919168044398024, + "learning_rate": 1.6299185381126344e-05, + "loss": 0.2174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11061736941337585, + "step": 2265, + "valid_targets_mean": 4301.0, + "valid_targets_min": 1067 + }, + { + "epoch": 4.235294117647059, + "grad_norm": 0.6023360573651656, + "learning_rate": 1.6207776124898557e-05, + "loss": 0.2332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13825145363807678, + "step": 2270, + "valid_targets_mean": 5408.6, + "valid_targets_min": 2577 + }, + { + "epoch": 4.244631185807656, + "grad_norm": 0.5630871054601987, + "learning_rate": 1.611644896581736e-05, + "loss": 0.2276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09944741427898407, + "step": 2275, + "valid_targets_mean": 4812.6, + "valid_targets_min": 2193 + }, + { + "epoch": 4.253968253968254, + "grad_norm": 0.7298867947038662, + "learning_rate": 1.602520588100735e-05, + "loss": 0.229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11367127299308777, + "step": 2280, + "valid_targets_mean": 3215.6, + "valid_targets_min": 343 + }, + { + "epoch": 4.2633053221288515, + "grad_norm": 0.6689856172046564, + "learning_rate": 1.5934048845773055e-05, + "loss": 0.215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08669646084308624, + "step": 2285, + "valid_targets_mean": 2815.2, + "valid_targets_min": 199 + }, + { + "epoch": 4.272642390289449, + "grad_norm": 0.5141906523630698, + "learning_rate": 1.5842979833556084e-05, + "loss": 0.2031, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09228503704071045, + "step": 2290, + "valid_targets_mean": 5115.6, + "valid_targets_min": 1030 + }, + { + "epoch": 4.281979458450047, + "grad_norm": 0.6323835710856455, + "learning_rate": 1.575200081589251e-05, + "loss": 0.2175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12148559093475342, + "step": 2295, + "valid_targets_mean": 5730.2, + "valid_targets_min": 3910 + }, + { + "epoch": 4.291316526610644, + "grad_norm": 0.6772095306484774, + "learning_rate": 1.5661113762370063e-05, + "loss": 0.2087, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11034808307886124, + "step": 2300, + "valid_targets_mean": 3891.4, + "valid_targets_min": 1814 + }, + { + "epoch": 4.300653594771242, + "grad_norm": 0.5756391057666874, + "learning_rate": 1.5570320640585624e-05, + "loss": 0.2251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10767068713903427, + "step": 2305, + "valid_targets_mean": 4199.8, + "valid_targets_min": 881 + }, + { + "epoch": 4.309990662931839, + "grad_norm": 0.5282763733159656, + "learning_rate": 1.5479623416102512e-05, + "loss": 0.2259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09715651720762253, + "step": 2310, + "valid_targets_mean": 4911.5, + "valid_targets_min": 3268 + }, + { + "epoch": 4.319327731092437, + "grad_norm": 0.576239514638465, + "learning_rate": 1.5389024052408026e-05, + "loss": 0.2117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09804154187440872, + "step": 2315, + "valid_targets_mean": 4323.4, + "valid_targets_min": 787 + }, + { + "epoch": 4.328664799253034, + "grad_norm": 0.520128090829205, + "learning_rate": 1.529852451087085e-05, + "loss": 0.2074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0893818587064743, + "step": 2320, + "valid_targets_mean": 4989.4, + "valid_targets_min": 1105 + }, + { + "epoch": 4.338001867413632, + "grad_norm": 0.6775604435989203, + "learning_rate": 1.5208126750698676e-05, + "loss": 0.2153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12488840520381927, + "step": 2325, + "valid_targets_mean": 5632.2, + "valid_targets_min": 252 + }, + { + "epoch": 4.347338935574229, + "grad_norm": 0.6309372112951984, + "learning_rate": 1.5117832728895734e-05, + "loss": 0.2173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11499056220054626, + "step": 2330, + "valid_targets_mean": 4394.8, + "valid_targets_min": 2081 + }, + { + "epoch": 4.356676003734827, + "grad_norm": 0.5668978723478025, + "learning_rate": 1.5027644400220435e-05, + "loss": 0.2204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09072070568799973, + "step": 2335, + "valid_targets_mean": 4270.1, + "valid_targets_min": 1982 + }, + { + "epoch": 4.366013071895424, + "grad_norm": 0.5974505002570625, + "learning_rate": 1.4937563717143077e-05, + "loss": 0.219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11508934944868088, + "step": 2340, + "valid_targets_mean": 5186.6, + "valid_targets_min": 1816 + }, + { + "epoch": 4.375350140056023, + "grad_norm": 0.6316979226211048, + "learning_rate": 1.4847592629803538e-05, + "loss": 0.2247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10023558139801025, + "step": 2345, + "valid_targets_mean": 3984.4, + "valid_targets_min": 310 + }, + { + "epoch": 4.38468720821662, + "grad_norm": 0.4780150123344263, + "learning_rate": 1.4757733085969095e-05, + "loss": 0.2106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07734273374080658, + "step": 2350, + "valid_targets_mean": 5063.2, + "valid_targets_min": 258 + }, + { + "epoch": 4.394024276377218, + "grad_norm": 0.5529209785474667, + "learning_rate": 1.4667987030992223e-05, + "loss": 0.2124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1110001653432846, + "step": 2355, + "valid_targets_mean": 5034.5, + "valid_targets_min": 3499 + }, + { + "epoch": 4.4033613445378155, + "grad_norm": 0.6497287870437426, + "learning_rate": 1.4578356407768524e-05, + "loss": 0.2198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10980506241321564, + "step": 2360, + "valid_targets_mean": 3340.9, + "valid_targets_min": 522 + }, + { + "epoch": 4.412698412698413, + "grad_norm": 0.5546785769207205, + "learning_rate": 1.4488843156694614e-05, + "loss": 0.2231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08921074122190475, + "step": 2365, + "valid_targets_mean": 5025.6, + "valid_targets_min": 878 + }, + { + "epoch": 4.4220354808590105, + "grad_norm": 0.6064065968844321, + "learning_rate": 1.4399449215626154e-05, + "loss": 0.2292, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10236603021621704, + "step": 2370, + "valid_targets_mean": 4367.9, + "valid_targets_min": 236 + }, + { + "epoch": 4.431372549019608, + "grad_norm": 0.587871927901169, + "learning_rate": 1.4310176519835885e-05, + "loss": 0.2258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11132930219173431, + "step": 2375, + "valid_targets_mean": 4859.4, + "valid_targets_min": 2329 + }, + { + "epoch": 4.440709617180206, + "grad_norm": 0.6028873024687935, + "learning_rate": 1.422102700197172e-05, + "loss": 0.2226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10939637571573257, + "step": 2380, + "valid_targets_mean": 4651.0, + "valid_targets_min": 1211 + }, + { + "epoch": 4.450046685340803, + "grad_norm": 0.6266382658716586, + "learning_rate": 1.4132002592014939e-05, + "loss": 0.2099, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09197478741407394, + "step": 2385, + "valid_targets_mean": 3783.0, + "valid_targets_min": 1373 + }, + { + "epoch": 4.459383753501401, + "grad_norm": 0.5266258760230067, + "learning_rate": 1.404310521723836e-05, + "loss": 0.2057, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09512799978256226, + "step": 2390, + "valid_targets_mean": 5939.9, + "valid_targets_min": 1588 + }, + { + "epoch": 4.468720821661998, + "grad_norm": 0.6788404262038678, + "learning_rate": 1.3954336802164658e-05, + "loss": 0.2272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11945095658302307, + "step": 2395, + "valid_targets_mean": 3844.9, + "valid_targets_min": 903 + }, + { + "epoch": 4.478057889822596, + "grad_norm": 0.6864468964977486, + "learning_rate": 1.3865699268524668e-05, + "loss": 0.2307, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10773071646690369, + "step": 2400, + "valid_targets_mean": 3283.9, + "valid_targets_min": 1410 + }, + { + "epoch": 4.487394957983193, + "grad_norm": 0.6436361561640092, + "learning_rate": 1.3777194535215812e-05, + "loss": 0.2268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1354522705078125, + "step": 2405, + "valid_targets_mean": 4097.9, + "valid_targets_min": 1192 + }, + { + "epoch": 4.496732026143791, + "grad_norm": 0.5960062435782721, + "learning_rate": 1.3688824518260528e-05, + "loss": 0.2203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1135517954826355, + "step": 2410, + "valid_targets_mean": 4692.8, + "valid_targets_min": 1901 + }, + { + "epoch": 4.506069094304388, + "grad_norm": 0.5746434007845433, + "learning_rate": 1.3600591130764825e-05, + "loss": 0.214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08620750904083252, + "step": 2415, + "valid_targets_mean": 3363.4, + "valid_targets_min": 1474 + }, + { + "epoch": 4.515406162464986, + "grad_norm": 0.606674768383782, + "learning_rate": 1.3512496282876813e-05, + "loss": 0.2154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11640094220638275, + "step": 2420, + "valid_targets_mean": 4752.9, + "valid_targets_min": 1569 + }, + { + "epoch": 4.5247432306255835, + "grad_norm": 0.6090671137854642, + "learning_rate": 1.3424541881745425e-05, + "loss": 0.2156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14638464152812958, + "step": 2425, + "valid_targets_mean": 5501.1, + "valid_targets_min": 522 + }, + { + "epoch": 4.534080298786181, + "grad_norm": 0.5326614803255285, + "learning_rate": 1.3336729831479062e-05, + "loss": 0.212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07378380745649338, + "step": 2430, + "valid_targets_mean": 3980.1, + "valid_targets_min": 1565 + }, + { + "epoch": 4.543417366946779, + "grad_norm": 0.7829500404881947, + "learning_rate": 1.32490620331044e-05, + "loss": 0.2281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11092469096183777, + "step": 2435, + "valid_targets_mean": 2376.0, + "valid_targets_min": 1242 + }, + { + "epoch": 4.552754435107376, + "grad_norm": 0.6534950551231358, + "learning_rate": 1.3161540384525244e-05, + "loss": 0.2133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11590614169836044, + "step": 2440, + "valid_targets_mean": 4005.0, + "valid_targets_min": 772 + }, + { + "epoch": 4.562091503267974, + "grad_norm": 0.6459415624904914, + "learning_rate": 1.3074166780481412e-05, + "loss": 0.2271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10133199393749237, + "step": 2445, + "valid_targets_mean": 3537.4, + "valid_targets_min": 975 + }, + { + "epoch": 4.571428571428571, + "grad_norm": 0.6094344122464089, + "learning_rate": 1.2986943112507753e-05, + "loss": 0.2154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11566491425037384, + "step": 2450, + "valid_targets_mean": 4492.4, + "valid_targets_min": 413 + }, + { + "epoch": 4.580765639589169, + "grad_norm": 0.5522560237537026, + "learning_rate": 1.2899871268893161e-05, + "loss": 0.2088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08447899669408798, + "step": 2455, + "valid_targets_mean": 4144.4, + "valid_targets_min": 1818 + }, + { + "epoch": 4.590102707749766, + "grad_norm": 0.6400429069432625, + "learning_rate": 1.2812953134639725e-05, + "loss": 0.2249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11293605715036392, + "step": 2460, + "valid_targets_mean": 5663.5, + "valid_targets_min": 1561 + }, + { + "epoch": 4.599439775910364, + "grad_norm": 0.6215524966286726, + "learning_rate": 1.27261905914219e-05, + "loss": 0.2168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12139495462179184, + "step": 2465, + "valid_targets_mean": 4749.1, + "valid_targets_min": 724 + }, + { + "epoch": 4.608776844070961, + "grad_norm": 0.5840021462134765, + "learning_rate": 1.2639585517545783e-05, + "loss": 0.2304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1280387043952942, + "step": 2470, + "valid_targets_mean": 5680.2, + "valid_targets_min": 1331 + }, + { + "epoch": 4.618113912231559, + "grad_norm": 0.6234374716290477, + "learning_rate": 1.2553139787908457e-05, + "loss": 0.2164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1322997510433197, + "step": 2475, + "valid_targets_mean": 5078.8, + "valid_targets_min": 1415 + }, + { + "epoch": 4.627450980392156, + "grad_norm": 0.6337349687389539, + "learning_rate": 1.2466855273957372e-05, + "loss": 0.2221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09425517171621323, + "step": 2480, + "valid_targets_mean": 4022.9, + "valid_targets_min": 422 + }, + { + "epoch": 4.636788048552754, + "grad_norm": 0.5660402206031033, + "learning_rate": 1.2380733843649866e-05, + "loss": 0.2104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1244463175535202, + "step": 2485, + "valid_targets_mean": 6439.4, + "valid_targets_min": 274 + }, + { + "epoch": 4.646125116713352, + "grad_norm": 0.7251802708562897, + "learning_rate": 1.2294777361412696e-05, + "loss": 0.2316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14780986309051514, + "step": 2490, + "valid_targets_mean": 4200.4, + "valid_targets_min": 2607 + }, + { + "epoch": 4.65546218487395, + "grad_norm": 0.5366676972972876, + "learning_rate": 1.2208987688101701e-05, + "loss": 0.2172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09195961803197861, + "step": 2495, + "valid_targets_mean": 4812.2, + "valid_targets_min": 2023 + }, + { + "epoch": 4.6647992530345475, + "grad_norm": 0.6142311746678191, + "learning_rate": 1.2123366680961493e-05, + "loss": 0.2185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12631487846374512, + "step": 2500, + "valid_targets_mean": 4387.6, + "valid_targets_min": 1568 + }, + { + "epoch": 4.674136321195145, + "grad_norm": 0.6168032194751466, + "learning_rate": 1.2037916193585285e-05, + "loss": 0.2171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09252548962831497, + "step": 2505, + "valid_targets_mean": 3493.5, + "valid_targets_min": 403 + }, + { + "epoch": 4.6834733893557425, + "grad_norm": 0.7303503288944759, + "learning_rate": 1.1952638075874696e-05, + "loss": 0.2213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09095008671283722, + "step": 2510, + "valid_targets_mean": 2511.2, + "valid_targets_min": 450 + }, + { + "epoch": 4.69281045751634, + "grad_norm": 0.6646882295348588, + "learning_rate": 1.18675341739998e-05, + "loss": 0.2098, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12448584288358688, + "step": 2515, + "valid_targets_mean": 4407.8, + "valid_targets_min": 2580 + }, + { + "epoch": 4.702147525676938, + "grad_norm": 0.6410953993916263, + "learning_rate": 1.178260633035906e-05, + "loss": 0.222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13142627477645874, + "step": 2520, + "valid_targets_mean": 3664.2, + "valid_targets_min": 425 + }, + { + "epoch": 4.711484593837535, + "grad_norm": 0.5922465226013282, + "learning_rate": 1.1697856383539507e-05, + "loss": 0.2325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14885929226875305, + "step": 2525, + "valid_targets_mean": 5588.0, + "valid_targets_min": 1545 + }, + { + "epoch": 4.720821661998133, + "grad_norm": 0.6435915580949712, + "learning_rate": 1.1613286168276933e-05, + "loss": 0.2309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13050884008407593, + "step": 2530, + "valid_targets_mean": 5509.0, + "valid_targets_min": 674 + }, + { + "epoch": 4.73015873015873, + "grad_norm": 0.5937377490361547, + "learning_rate": 1.1528897515416114e-05, + "loss": 0.2161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08483701944351196, + "step": 2535, + "valid_targets_mean": 3736.6, + "valid_targets_min": 1387 + }, + { + "epoch": 4.739495798319328, + "grad_norm": 0.6300901538956722, + "learning_rate": 1.1444692251871253e-05, + "loss": 0.2136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13848449289798737, + "step": 2540, + "valid_targets_mean": 4718.5, + "valid_targets_min": 2025 + }, + { + "epoch": 4.748832866479925, + "grad_norm": 0.5450480892992908, + "learning_rate": 1.1360672200586365e-05, + "loss": 0.2255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08962833881378174, + "step": 2545, + "valid_targets_mean": 4219.0, + "valid_targets_min": 1349 + }, + { + "epoch": 4.758169934640523, + "grad_norm": 0.5629221022943864, + "learning_rate": 1.1276839180495877e-05, + "loss": 0.2191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1170375794172287, + "step": 2550, + "valid_targets_mean": 6766.6, + "valid_targets_min": 2600 + }, + { + "epoch": 4.76750700280112, + "grad_norm": 0.6511799503815409, + "learning_rate": 1.1193195006485153e-05, + "loss": 0.2157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07441490888595581, + "step": 2555, + "valid_targets_mean": 2661.2, + "valid_targets_min": 774 + }, + { + "epoch": 4.776844070961718, + "grad_norm": 0.5927890195912614, + "learning_rate": 1.1109741489351325e-05, + "loss": 0.2205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11961187422275543, + "step": 2560, + "valid_targets_mean": 5432.1, + "valid_targets_min": 1335 + }, + { + "epoch": 4.7861811391223155, + "grad_norm": 0.6289279217378587, + "learning_rate": 1.1026480435763984e-05, + "loss": 0.2231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06719185411930084, + "step": 2565, + "valid_targets_mean": 2462.8, + "valid_targets_min": 442 + }, + { + "epoch": 4.795518207282913, + "grad_norm": 0.6067614562512801, + "learning_rate": 1.0943413648226142e-05, + "loss": 0.2151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11109036952257156, + "step": 2570, + "valid_targets_mean": 3904.0, + "valid_targets_min": 1364 + }, + { + "epoch": 4.8048552754435105, + "grad_norm": 0.6740231429667027, + "learning_rate": 1.0860542925035187e-05, + "loss": 0.2242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11373037099838257, + "step": 2575, + "valid_targets_mean": 3327.6, + "valid_targets_min": 1768 + }, + { + "epoch": 4.814192343604108, + "grad_norm": 1.1448838813434017, + "learning_rate": 1.0777870060243902e-05, + "loss": 0.22, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11946254968643188, + "step": 2580, + "valid_targets_mean": 3914.6, + "valid_targets_min": 499 + }, + { + "epoch": 4.823529411764706, + "grad_norm": 0.7468017676737118, + "learning_rate": 1.0695396843621722e-05, + "loss": 0.2286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14492318034172058, + "step": 2585, + "valid_targets_mean": 3538.2, + "valid_targets_min": 1797 + }, + { + "epoch": 4.832866479925303, + "grad_norm": 0.5830635232446213, + "learning_rate": 1.0613125060615899e-05, + "loss": 0.2201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13873328268527985, + "step": 2590, + "valid_targets_mean": 5670.6, + "valid_targets_min": 841 + }, + { + "epoch": 4.842203548085901, + "grad_norm": 0.6218069414427071, + "learning_rate": 1.0531056492312914e-05, + "loss": 0.2214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10129464417695999, + "step": 2595, + "valid_targets_mean": 3931.9, + "valid_targets_min": 1505 + }, + { + "epoch": 4.851540616246498, + "grad_norm": 0.6188378809016613, + "learning_rate": 1.0449192915399878e-05, + "loss": 0.2188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12882709503173828, + "step": 2600, + "valid_targets_mean": 3972.5, + "valid_targets_min": 2225 + }, + { + "epoch": 4.860877684407097, + "grad_norm": 0.5752295736129378, + "learning_rate": 1.0367536102126072e-05, + "loss": 0.2168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09076380729675293, + "step": 2605, + "valid_targets_mean": 4174.8, + "valid_targets_min": 1455 + }, + { + "epoch": 4.870214752567694, + "grad_norm": 0.5741783274720529, + "learning_rate": 1.0286087820264603e-05, + "loss": 0.219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08802001178264618, + "step": 2610, + "valid_targets_mean": 4333.2, + "valid_targets_min": 503 + }, + { + "epoch": 4.879551820728292, + "grad_norm": 0.6057455218118614, + "learning_rate": 1.0204849833074122e-05, + "loss": 0.2111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1487019658088684, + "step": 2615, + "valid_targets_mean": 5871.8, + "valid_targets_min": 975 + }, + { + "epoch": 4.888888888888889, + "grad_norm": 0.4844870061151015, + "learning_rate": 1.0123823899260634e-05, + "loss": 0.208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09060899168252945, + "step": 2620, + "valid_targets_mean": 5688.1, + "valid_targets_min": 1856 + }, + { + "epoch": 4.898225957049487, + "grad_norm": 1.2055772500964743, + "learning_rate": 1.0043011772939466e-05, + "loss": 0.2216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11248426139354706, + "step": 2625, + "valid_targets_mean": 4663.5, + "valid_targets_min": 1723 + }, + { + "epoch": 4.907563025210084, + "grad_norm": 0.5447115008353857, + "learning_rate": 9.962415203597245e-06, + "loss": 0.2217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12108441442251205, + "step": 2630, + "valid_targets_mean": 6512.8, + "valid_targets_min": 440 + }, + { + "epoch": 4.916900093370682, + "grad_norm": 0.6074176608258992, + "learning_rate": 9.882035936054042e-06, + "loss": 0.2193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.104446642100811, + "step": 2635, + "valid_targets_mean": 3983.4, + "valid_targets_min": 835 + }, + { + "epoch": 4.926237161531279, + "grad_norm": 0.660662939013454, + "learning_rate": 9.801875710425625e-06, + "loss": 0.2203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07885172963142395, + "step": 2640, + "valid_targets_mean": 2450.4, + "valid_targets_min": 232 + }, + { + "epoch": 4.935574229691877, + "grad_norm": 0.6775865217374937, + "learning_rate": 9.72193626208574e-06, + "loss": 0.2226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1370387077331543, + "step": 2645, + "valid_targets_mean": 4292.4, + "valid_targets_min": 944 + }, + { + "epoch": 4.9449112978524745, + "grad_norm": 0.7475529487031847, + "learning_rate": 9.642219321628592e-06, + "loss": 0.2207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12736068665981293, + "step": 2650, + "valid_targets_mean": 3981.4, + "valid_targets_min": 2563 + }, + { + "epoch": 4.954248366013072, + "grad_norm": 0.6970956690660021, + "learning_rate": 9.56272661483133e-06, + "loss": 0.2247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12471716850996017, + "step": 2655, + "valid_targets_mean": 4563.8, + "valid_targets_min": 3031 + }, + { + "epoch": 4.96358543417367, + "grad_norm": 0.5648315853080815, + "learning_rate": 9.483459862616723e-06, + "loss": 0.2266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11102037876844406, + "step": 2660, + "valid_targets_mean": 5135.6, + "valid_targets_min": 2866 + }, + { + "epoch": 4.972922502334267, + "grad_norm": 0.5893081439373169, + "learning_rate": 9.404420781015881e-06, + "loss": 0.2179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13897529244422913, + "step": 2665, + "valid_targets_mean": 5695.9, + "valid_targets_min": 2140 + }, + { + "epoch": 4.982259570494865, + "grad_norm": 0.6426357882337819, + "learning_rate": 9.325611081131129e-06, + "loss": 0.2216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12212681770324707, + "step": 2670, + "valid_targets_mean": 4456.6, + "valid_targets_min": 2360 + }, + { + "epoch": 4.991596638655462, + "grad_norm": 0.5651802796690788, + "learning_rate": 9.247032469098953e-06, + "loss": 0.2238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11866939067840576, + "step": 2675, + "valid_targets_mean": 5320.2, + "valid_targets_min": 1701 + }, + { + "epoch": 5.0, + "grad_norm": 0.8060891822521504, + "learning_rate": 9.168686646053046e-06, + "loss": 0.2096, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2011338770389557, + "step": 2680, + "valid_targets_mean": 4278.1, + "valid_targets_min": 1473 + }, + { + "epoch": 5.0093370681605975, + "grad_norm": 0.5749530490829277, + "learning_rate": 9.090575308087502e-06, + "loss": 0.1934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08740301430225372, + "step": 2685, + "valid_targets_mean": 3521.8, + "valid_targets_min": 296 + }, + { + "epoch": 5.018674136321195, + "grad_norm": 0.5616542659329842, + "learning_rate": 9.012700146220082e-06, + "loss": 0.1881, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07899457216262817, + "step": 2690, + "valid_targets_mean": 4160.6, + "valid_targets_min": 1133 + }, + { + "epoch": 5.028011204481793, + "grad_norm": 0.566155862587317, + "learning_rate": 8.935062846355642e-06, + "loss": 0.2002, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08808444440364838, + "step": 2695, + "valid_targets_mean": 4331.2, + "valid_targets_min": 1837 + }, + { + "epoch": 5.03734827264239, + "grad_norm": 0.616370958938973, + "learning_rate": 8.857665089249572e-06, + "loss": 0.2056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10405054688453674, + "step": 2700, + "valid_targets_mean": 4815.5, + "valid_targets_min": 1024 + }, + { + "epoch": 5.046685340802988, + "grad_norm": 0.6478583496655455, + "learning_rate": 8.780508550471483e-06, + "loss": 0.1984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11639853566884995, + "step": 2705, + "valid_targets_mean": 4995.5, + "valid_targets_min": 2113 + }, + { + "epoch": 5.056022408963585, + "grad_norm": 0.5785277044730519, + "learning_rate": 8.703594900368866e-06, + "loss": 0.1987, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0766625627875328, + "step": 2710, + "valid_targets_mean": 3989.4, + "valid_targets_min": 1753 + }, + { + "epoch": 5.065359477124183, + "grad_norm": 0.5810419460210758, + "learning_rate": 8.62692580403097e-06, + "loss": 0.1954, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09971745312213898, + "step": 2715, + "valid_targets_mean": 5026.5, + "valid_targets_min": 1901 + }, + { + "epoch": 5.07469654528478, + "grad_norm": 0.6158476216036303, + "learning_rate": 8.550502921252742e-06, + "loss": 0.2093, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10376794636249542, + "step": 2720, + "valid_targets_mean": 4923.6, + "valid_targets_min": 2373 + }, + { + "epoch": 5.084033613445378, + "grad_norm": 0.7235246803668531, + "learning_rate": 8.474327906498905e-06, + "loss": 0.2065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08122521638870239, + "step": 2725, + "valid_targets_mean": 3193.0, + "valid_targets_min": 717 + }, + { + "epoch": 5.093370681605975, + "grad_norm": 0.5643519532776499, + "learning_rate": 8.398402408868145e-06, + "loss": 0.2115, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08213938772678375, + "step": 2730, + "valid_targets_mean": 4706.9, + "valid_targets_min": 222 + }, + { + "epoch": 5.102707749766573, + "grad_norm": 0.7176841091873969, + "learning_rate": 8.322728072057358e-06, + "loss": 0.205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.128128781914711, + "step": 2735, + "valid_targets_mean": 3698.9, + "valid_targets_min": 1722 + }, + { + "epoch": 5.1120448179271705, + "grad_norm": 0.6481654863050837, + "learning_rate": 8.247306534326147e-06, + "loss": 0.2124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11413657665252686, + "step": 2740, + "valid_targets_mean": 3742.1, + "valid_targets_min": 1353 + }, + { + "epoch": 5.121381886087768, + "grad_norm": 0.6699182407259507, + "learning_rate": 8.172139428461292e-06, + "loss": 0.2005, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1079854965209961, + "step": 2745, + "valid_targets_mean": 3827.5, + "valid_targets_min": 563 + }, + { + "epoch": 5.130718954248366, + "grad_norm": 0.6484099772867605, + "learning_rate": 8.097228381741441e-06, + "loss": 0.2042, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10050643235445023, + "step": 2750, + "valid_targets_mean": 4255.1, + "valid_targets_min": 1242 + }, + { + "epoch": 5.140056022408964, + "grad_norm": 0.7062736268477671, + "learning_rate": 8.022575015901854e-06, + "loss": 0.2126, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14072318375110626, + "step": 2755, + "valid_targets_mean": 4803.0, + "valid_targets_min": 2110 + }, + { + "epoch": 5.1493930905695615, + "grad_norm": 0.6162341361936751, + "learning_rate": 7.948180947099309e-06, + "loss": 0.1945, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07866983115673065, + "step": 2760, + "valid_targets_mean": 3158.0, + "valid_targets_min": 1591 + }, + { + "epoch": 5.158730158730159, + "grad_norm": 0.5254417097187098, + "learning_rate": 7.874047785877099e-06, + "loss": 0.1939, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08905071765184402, + "step": 2765, + "valid_targets_mean": 5803.0, + "valid_targets_min": 3258 + }, + { + "epoch": 5.168067226890757, + "grad_norm": 0.587276212673288, + "learning_rate": 7.800177137130198e-06, + "loss": 0.1917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10512028634548187, + "step": 2770, + "valid_targets_mean": 5849.0, + "valid_targets_min": 2165 + }, + { + "epoch": 5.177404295051354, + "grad_norm": 0.8649562998047281, + "learning_rate": 7.726570600070498e-06, + "loss": 0.1884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0897212028503418, + "step": 2775, + "valid_targets_mean": 3755.5, + "valid_targets_min": 290 + }, + { + "epoch": 5.186741363211952, + "grad_norm": 0.5977152199719782, + "learning_rate": 7.653229768192163e-06, + "loss": 0.2003, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11236905306577682, + "step": 2780, + "valid_targets_mean": 5411.2, + "valid_targets_min": 2620 + }, + { + "epoch": 5.196078431372549, + "grad_norm": 0.6925143879209762, + "learning_rate": 7.580156229237172e-06, + "loss": 0.2088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10914676636457443, + "step": 2785, + "valid_targets_mean": 4265.8, + "valid_targets_min": 1312 + }, + { + "epoch": 5.205415499533147, + "grad_norm": 0.5519399920801775, + "learning_rate": 7.507351565160918e-06, + "loss": 0.198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1013789027929306, + "step": 2790, + "valid_targets_mean": 5971.4, + "valid_targets_min": 2374 + }, + { + "epoch": 5.214752567693744, + "grad_norm": 0.5737005331204492, + "learning_rate": 7.434817352097994e-06, + "loss": 0.2035, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10660230368375778, + "step": 2795, + "valid_targets_mean": 6129.2, + "valid_targets_min": 773 + }, + { + "epoch": 5.224089635854342, + "grad_norm": 0.5910816810192487, + "learning_rate": 7.36255516032802e-06, + "loss": 0.2004, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09653788059949875, + "step": 2800, + "valid_targets_mean": 4589.6, + "valid_targets_min": 2958 + }, + { + "epoch": 5.233426704014939, + "grad_norm": 0.6135107912432586, + "learning_rate": 7.290566554241705e-06, + "loss": 0.2102, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10447406768798828, + "step": 2805, + "valid_targets_mean": 4323.2, + "valid_targets_min": 1978 + }, + { + "epoch": 5.242763772175537, + "grad_norm": 0.6117342828552415, + "learning_rate": 7.218853092306937e-06, + "loss": 0.1961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08905218541622162, + "step": 2810, + "valid_targets_mean": 4223.4, + "valid_targets_min": 327 + }, + { + "epoch": 5.2521008403361344, + "grad_norm": 0.5840238050389137, + "learning_rate": 7.147416327035057e-06, + "loss": 0.2032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0919448584318161, + "step": 2815, + "valid_targets_mean": 5078.2, + "valid_targets_min": 1502 + }, + { + "epoch": 5.261437908496732, + "grad_norm": 0.5692634390223054, + "learning_rate": 7.0762578049472574e-06, + "loss": 0.2005, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09533137083053589, + "step": 2820, + "valid_targets_mean": 4700.5, + "valid_targets_min": 1543 + }, + { + "epoch": 5.2707749766573295, + "grad_norm": 0.6098470838468779, + "learning_rate": 7.005379066541101e-06, + "loss": 0.2039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11203412711620331, + "step": 2825, + "valid_targets_mean": 4828.6, + "valid_targets_min": 1905 + }, + { + "epoch": 5.280112044817927, + "grad_norm": 0.6214513584050131, + "learning_rate": 6.9347816462571675e-06, + "loss": 0.2031, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09174573421478271, + "step": 2830, + "valid_targets_mean": 4359.8, + "valid_targets_min": 599 + }, + { + "epoch": 5.289449112978525, + "grad_norm": 0.6262470373598833, + "learning_rate": 6.864467072445817e-06, + "loss": 0.194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10590013861656189, + "step": 2835, + "valid_targets_mean": 5214.5, + "valid_targets_min": 401 + }, + { + "epoch": 5.298786181139122, + "grad_norm": 0.6137440014784042, + "learning_rate": 6.794436867334133e-06, + "loss": 0.2018, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08739018440246582, + "step": 2840, + "valid_targets_mean": 3920.2, + "valid_targets_min": 928 + }, + { + "epoch": 5.30812324929972, + "grad_norm": 0.5880458941353764, + "learning_rate": 6.7246925469929395e-06, + "loss": 0.187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1014954149723053, + "step": 2845, + "valid_targets_mean": 4713.8, + "valid_targets_min": 1452 + }, + { + "epoch": 5.317460317460317, + "grad_norm": 0.6628688496364581, + "learning_rate": 6.655235621304021e-06, + "loss": 0.1951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11350806057453156, + "step": 2850, + "valid_targets_mean": 4744.5, + "valid_targets_min": 3513 + }, + { + "epoch": 5.326797385620915, + "grad_norm": 0.6805111184165131, + "learning_rate": 6.5860675939273766e-06, + "loss": 0.2082, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1099146381020546, + "step": 2855, + "valid_targets_mean": 4515.6, + "valid_targets_min": 449 + }, + { + "epoch": 5.336134453781512, + "grad_norm": 0.6645509166519851, + "learning_rate": 6.517189962268733e-06, + "loss": 0.1951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08915534615516663, + "step": 2860, + "valid_targets_mean": 3661.6, + "valid_targets_min": 174 + }, + { + "epoch": 5.34547152194211, + "grad_norm": 0.6058033237297602, + "learning_rate": 6.448604217447067e-06, + "loss": 0.2025, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07665131986141205, + "step": 2865, + "valid_targets_mean": 3964.8, + "valid_targets_min": 1230 + }, + { + "epoch": 5.354808590102707, + "grad_norm": 0.651205444632114, + "learning_rate": 6.3803118442623565e-06, + "loss": 0.2048, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1076691597700119, + "step": 2870, + "valid_targets_mean": 4193.5, + "valid_targets_min": 704 + }, + { + "epoch": 5.364145658263305, + "grad_norm": 0.6449697851180105, + "learning_rate": 6.312314321163449e-06, + "loss": 0.2127, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12453481554985046, + "step": 2875, + "valid_targets_mean": 5271.2, + "valid_targets_min": 1010 + }, + { + "epoch": 5.373482726423903, + "grad_norm": 0.6084178766404235, + "learning_rate": 6.244613120216014e-06, + "loss": 0.2009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09028743207454681, + "step": 2880, + "valid_targets_mean": 5264.6, + "valid_targets_min": 1984 + }, + { + "epoch": 5.382819794584501, + "grad_norm": 0.6618041773127442, + "learning_rate": 6.177209707070722e-06, + "loss": 0.2136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07981983572244644, + "step": 2885, + "valid_targets_mean": 3158.6, + "valid_targets_min": 1107 + }, + { + "epoch": 5.392156862745098, + "grad_norm": 0.708559578041828, + "learning_rate": 6.110105540931472e-06, + "loss": 0.2044, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11263357847929001, + "step": 2890, + "valid_targets_mean": 3941.0, + "valid_targets_min": 567 + }, + { + "epoch": 5.401493930905696, + "grad_norm": 0.614555646366773, + "learning_rate": 6.043302074523827e-06, + "loss": 0.2021, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09675601124763489, + "step": 2895, + "valid_targets_mean": 4277.2, + "valid_targets_min": 1702 + }, + { + "epoch": 5.4108309990662935, + "grad_norm": 0.7231171957341705, + "learning_rate": 5.976800754063552e-06, + "loss": 0.2153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10107093304395676, + "step": 2900, + "valid_targets_mean": 3672.5, + "valid_targets_min": 1584 + }, + { + "epoch": 5.420168067226891, + "grad_norm": 0.6340643081182544, + "learning_rate": 5.9106030192253275e-06, + "loss": 0.2052, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09596800804138184, + "step": 2905, + "valid_targets_mean": 4001.5, + "valid_targets_min": 701 + }, + { + "epoch": 5.429505135387489, + "grad_norm": 0.5810297114063141, + "learning_rate": 5.844710303111541e-06, + "loss": 0.1927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08278018236160278, + "step": 2910, + "valid_targets_mean": 3871.4, + "valid_targets_min": 377 + }, + { + "epoch": 5.438842203548086, + "grad_norm": 0.6784192497968957, + "learning_rate": 5.7791240322213215e-06, + "loss": 0.2084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11687328666448593, + "step": 2915, + "valid_targets_mean": 4190.2, + "valid_targets_min": 1127 + }, + { + "epoch": 5.448179271708684, + "grad_norm": 0.6842637239654884, + "learning_rate": 5.7138456264195765e-06, + "loss": 0.2062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11656396090984344, + "step": 2920, + "valid_targets_mean": 3846.0, + "valid_targets_min": 1054 + }, + { + "epoch": 5.457516339869281, + "grad_norm": 0.5339946500297408, + "learning_rate": 5.648876498906335e-06, + "loss": 0.1994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11359822750091553, + "step": 2925, + "valid_targets_mean": 7320.0, + "valid_targets_min": 1332 + }, + { + "epoch": 5.466853408029879, + "grad_norm": 0.8732967491921777, + "learning_rate": 5.584218056186113e-06, + "loss": 0.2082, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0932447761297226, + "step": 2930, + "valid_targets_mean": 4099.4, + "valid_targets_min": 451 + }, + { + "epoch": 5.476190476190476, + "grad_norm": 0.6335925979225394, + "learning_rate": 5.519871698037453e-06, + "loss": 0.2088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10523591935634613, + "step": 2935, + "valid_targets_mean": 4751.8, + "valid_targets_min": 1948 + }, + { + "epoch": 5.485527544351074, + "grad_norm": 0.6212110281939103, + "learning_rate": 5.455838817482666e-06, + "loss": 0.2083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1071697250008583, + "step": 2940, + "valid_targets_mean": 4687.8, + "valid_targets_min": 3002 + }, + { + "epoch": 5.494864612511671, + "grad_norm": 0.6896565002014967, + "learning_rate": 5.392120800757603e-06, + "loss": 0.2172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0852847471833229, + "step": 2945, + "valid_targets_mean": 3400.4, + "valid_targets_min": 1707 + }, + { + "epoch": 5.504201680672269, + "grad_norm": 0.5975980403905801, + "learning_rate": 5.328719027281728e-06, + "loss": 0.2005, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06674560904502869, + "step": 2950, + "valid_targets_mean": 3381.5, + "valid_targets_min": 813 + }, + { + "epoch": 5.513538748832866, + "grad_norm": 0.6456720720472638, + "learning_rate": 5.265634869628182e-06, + "loss": 0.1914, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08055633306503296, + "step": 2955, + "valid_targets_mean": 2825.2, + "valid_targets_min": 1135 + }, + { + "epoch": 5.522875816993464, + "grad_norm": 0.6342661392195457, + "learning_rate": 5.20286969349413e-06, + "loss": 0.1982, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10066903382539749, + "step": 2960, + "valid_targets_mean": 4832.1, + "valid_targets_min": 623 + }, + { + "epoch": 5.5322128851540615, + "grad_norm": 0.5724104952393078, + "learning_rate": 5.140424857671145e-06, + "loss": 0.1995, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11310087144374847, + "step": 2965, + "valid_targets_mean": 5710.1, + "valid_targets_min": 724 + }, + { + "epoch": 5.541549953314659, + "grad_norm": 0.6572690846020527, + "learning_rate": 5.07830171401581e-06, + "loss": 0.208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10160873830318451, + "step": 2970, + "valid_targets_mean": 3892.9, + "valid_targets_min": 332 + }, + { + "epoch": 5.550887021475257, + "grad_norm": 0.9366588084980847, + "learning_rate": 5.016501607420476e-06, + "loss": 0.2048, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10841170698404312, + "step": 2975, + "valid_targets_mean": 2951.6, + "valid_targets_min": 522 + }, + { + "epoch": 5.560224089635854, + "grad_norm": 0.5566733674049423, + "learning_rate": 4.955025875784098e-06, + "loss": 0.1921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09049998968839645, + "step": 2980, + "valid_targets_mean": 4727.9, + "valid_targets_min": 1901 + }, + { + "epoch": 5.569561157796452, + "grad_norm": 0.6098130490899216, + "learning_rate": 4.893875849983323e-06, + "loss": 0.2035, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10082744807004929, + "step": 2985, + "valid_targets_mean": 4435.4, + "valid_targets_min": 1542 + }, + { + "epoch": 5.578898225957049, + "grad_norm": 0.6526013540087547, + "learning_rate": 4.833052853843636e-06, + "loss": 0.2071, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09441793709993362, + "step": 2990, + "valid_targets_mean": 3364.4, + "valid_targets_min": 653 + }, + { + "epoch": 5.588235294117647, + "grad_norm": 0.6939661708297988, + "learning_rate": 4.772558204110724e-06, + "loss": 0.2108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10013644397258759, + "step": 2995, + "valid_targets_mean": 4274.9, + "valid_targets_min": 1663 + }, + { + "epoch": 5.597572362278244, + "grad_norm": 0.710722059881649, + "learning_rate": 4.712393210421957e-06, + "loss": 0.2079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1229126900434494, + "step": 3000, + "valid_targets_mean": 4986.0, + "valid_targets_min": 2853 + }, + { + "epoch": 5.606909430438842, + "grad_norm": 0.5172456756468738, + "learning_rate": 4.652559175278062e-06, + "loss": 0.1982, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08745473623275757, + "step": 3005, + "valid_targets_mean": 5422.1, + "valid_targets_min": 2579 + }, + { + "epoch": 5.616246498599439, + "grad_norm": 0.6097253608014057, + "learning_rate": 4.593057394014882e-06, + "loss": 0.2004, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08570478856563568, + "step": 3010, + "valid_targets_mean": 3769.4, + "valid_targets_min": 1744 + }, + { + "epoch": 5.625583566760037, + "grad_norm": 0.6511844879908856, + "learning_rate": 4.5338891547753775e-06, + "loss": 0.2046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08466436713933945, + "step": 3015, + "valid_targets_mean": 3549.5, + "valid_targets_min": 996 + }, + { + "epoch": 5.634920634920634, + "grad_norm": 0.6673014721688839, + "learning_rate": 4.475055738481711e-06, + "loss": 0.2052, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1343677043914795, + "step": 3020, + "valid_targets_mean": 4916.4, + "valid_targets_min": 1537 + }, + { + "epoch": 5.644257703081233, + "grad_norm": 0.5634672541486365, + "learning_rate": 4.416558418807517e-06, + "loss": 0.2067, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09532289206981659, + "step": 3025, + "valid_targets_mean": 5269.6, + "valid_targets_min": 1093 + }, + { + "epoch": 5.65359477124183, + "grad_norm": 0.5501237676811432, + "learning_rate": 4.3583984621503596e-06, + "loss": 0.2034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08804108202457428, + "step": 3030, + "valid_targets_mean": 4992.9, + "valid_targets_min": 1627 + }, + { + "epoch": 5.662931839402428, + "grad_norm": 0.7780993402039763, + "learning_rate": 4.300577127604269e-06, + "loss": 0.1927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09752538055181503, + "step": 3035, + "valid_targets_mean": 4672.5, + "valid_targets_min": 1276 + }, + { + "epoch": 5.6722689075630255, + "grad_norm": 0.6878126480229166, + "learning_rate": 4.243095666932531e-06, + "loss": 0.2129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09764151275157928, + "step": 3040, + "valid_targets_mean": 4517.5, + "valid_targets_min": 746 + }, + { + "epoch": 5.681605975723623, + "grad_norm": 0.6315320262517807, + "learning_rate": 4.18595532454055e-06, + "loss": 0.206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08879482746124268, + "step": 3045, + "valid_targets_mean": 3532.6, + "valid_targets_min": 1976 + }, + { + "epoch": 5.690943043884221, + "grad_norm": 0.5944722365748681, + "learning_rate": 4.129157337448926e-06, + "loss": 0.2064, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10872494429349899, + "step": 3050, + "valid_targets_mean": 5064.9, + "valid_targets_min": 2732 + }, + { + "epoch": 5.700280112044818, + "grad_norm": 0.6708128877019888, + "learning_rate": 4.072702935266677e-06, + "loss": 0.206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12292296439409256, + "step": 3055, + "valid_targets_mean": 4739.0, + "valid_targets_min": 1629 + }, + { + "epoch": 5.709617180205416, + "grad_norm": 0.6037640443219398, + "learning_rate": 4.016593340164618e-06, + "loss": 0.2044, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10872720181941986, + "step": 3060, + "valid_targets_mean": 5400.8, + "valid_targets_min": 2209 + }, + { + "epoch": 5.718954248366013, + "grad_norm": 0.6569399252562855, + "learning_rate": 3.960829766848893e-06, + "loss": 0.2067, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08234910666942596, + "step": 3065, + "valid_targets_mean": 3420.9, + "valid_targets_min": 236 + }, + { + "epoch": 5.728291316526611, + "grad_norm": 0.6399526274619315, + "learning_rate": 3.905413422534696e-06, + "loss": 0.1982, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08259132504463196, + "step": 3070, + "valid_targets_mean": 2932.1, + "valid_targets_min": 1422 + }, + { + "epoch": 5.737628384687208, + "grad_norm": 0.6512696348065643, + "learning_rate": 3.85034550692011e-06, + "loss": 0.207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12191061675548553, + "step": 3075, + "valid_targets_mean": 4966.9, + "valid_targets_min": 1042 + }, + { + "epoch": 5.746965452847806, + "grad_norm": 0.5633577103568521, + "learning_rate": 3.7956272121601555e-06, + "loss": 0.2182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09584501385688782, + "step": 3080, + "valid_targets_mean": 5633.1, + "valid_targets_min": 2201 + }, + { + "epoch": 5.756302521008403, + "grad_norm": 0.647905916190883, + "learning_rate": 3.7412597228409884e-06, + "loss": 0.2055, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1064843088388443, + "step": 3085, + "valid_targets_mean": 4003.8, + "valid_targets_min": 2949 + }, + { + "epoch": 5.765639589169001, + "grad_norm": 0.629528469515987, + "learning_rate": 3.687244215954222e-06, + "loss": 0.2068, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08780631422996521, + "step": 3090, + "valid_targets_mean": 4090.9, + "valid_targets_min": 782 + }, + { + "epoch": 5.774976657329598, + "grad_norm": 0.6260672373662172, + "learning_rate": 3.633581860871491e-06, + "loss": 0.2051, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1319265067577362, + "step": 3095, + "valid_targets_mean": 5669.1, + "valid_targets_min": 362 + }, + { + "epoch": 5.784313725490196, + "grad_norm": 0.6805837905622385, + "learning_rate": 3.5802738193190954e-06, + "loss": 0.2154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10276803374290466, + "step": 3100, + "valid_targets_mean": 3566.4, + "valid_targets_min": 599 + }, + { + "epoch": 5.7936507936507935, + "grad_norm": 0.6262625403794273, + "learning_rate": 3.5273212453528705e-06, + "loss": 0.1957, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10209716856479645, + "step": 3105, + "valid_targets_mean": 4434.6, + "valid_targets_min": 417 + }, + { + "epoch": 5.802987861811391, + "grad_norm": 0.7131821786976387, + "learning_rate": 3.4747252853332004e-06, + "loss": 0.2033, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07827740907669067, + "step": 3110, + "valid_targets_mean": 2878.1, + "valid_targets_min": 447 + }, + { + "epoch": 5.812324929971989, + "grad_norm": 0.6372756010104069, + "learning_rate": 3.4224870779002117e-06, + "loss": 0.2034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07342477142810822, + "step": 3115, + "valid_targets_mean": 3871.6, + "valid_targets_min": 853 + }, + { + "epoch": 5.821661998132586, + "grad_norm": 0.6155087770311691, + "learning_rate": 3.3706077539490933e-06, + "loss": 0.2118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08762417733669281, + "step": 3120, + "valid_targets_mean": 3502.4, + "valid_targets_min": 1067 + }, + { + "epoch": 5.830999066293184, + "grad_norm": 0.6290473954665811, + "learning_rate": 3.3190884366056532e-06, + "loss": 0.2058, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0908953845500946, + "step": 3125, + "valid_targets_mean": 3652.8, + "valid_targets_min": 684 + }, + { + "epoch": 5.840336134453781, + "grad_norm": 0.5534023773980616, + "learning_rate": 3.2679302412019665e-06, + "loss": 0.2043, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11421425640583038, + "step": 3130, + "valid_targets_mean": 7037.0, + "valid_targets_min": 554 + }, + { + "epoch": 5.849673202614379, + "grad_norm": 0.6076118083178337, + "learning_rate": 3.2171342752522494e-06, + "loss": 0.2018, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1109100878238678, + "step": 3135, + "valid_targets_mean": 5634.1, + "valid_targets_min": 470 + }, + { + "epoch": 5.859010270774976, + "grad_norm": 0.6232825831073033, + "learning_rate": 3.166701638428895e-06, + "loss": 0.1884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0864684134721756, + "step": 3140, + "valid_targets_mean": 4072.8, + "valid_targets_min": 1238 + }, + { + "epoch": 5.868347338935575, + "grad_norm": 0.6841696956256673, + "learning_rate": 3.1166334225386306e-06, + "loss": 0.1992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09969945251941681, + "step": 3145, + "valid_targets_mean": 3823.6, + "valid_targets_min": 526 + }, + { + "epoch": 5.877684407096172, + "grad_norm": 0.6153054711915283, + "learning_rate": 3.066930711498921e-06, + "loss": 0.1878, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07580120861530304, + "step": 3150, + "valid_targets_mean": 3327.5, + "valid_targets_min": 174 + }, + { + "epoch": 5.88702147525677, + "grad_norm": 0.5337404292647568, + "learning_rate": 3.0175945813144668e-06, + "loss": 0.2113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11523711681365967, + "step": 3155, + "valid_targets_mean": 7234.9, + "valid_targets_min": 5066 + }, + { + "epoch": 5.896358543417367, + "grad_norm": 0.6083883516264043, + "learning_rate": 2.9686261000539484e-06, + "loss": 0.1986, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10684549808502197, + "step": 3160, + "valid_targets_mean": 4933.4, + "valid_targets_min": 209 + }, + { + "epoch": 5.905695611577965, + "grad_norm": 0.6019065871548795, + "learning_rate": 2.920026327826866e-06, + "loss": 0.2135, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10621412843465805, + "step": 3165, + "valid_targets_mean": 5018.0, + "valid_targets_min": 2196 + }, + { + "epoch": 5.915032679738562, + "grad_norm": 0.6302063100093341, + "learning_rate": 2.87179631676062e-06, + "loss": 0.1994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10037851333618164, + "step": 3170, + "valid_targets_mean": 4307.5, + "valid_targets_min": 1485 + }, + { + "epoch": 5.92436974789916, + "grad_norm": 0.6776668200191497, + "learning_rate": 2.8239371109777127e-06, + "loss": 0.2092, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09030836075544357, + "step": 3175, + "valid_targets_mean": 3039.6, + "valid_targets_min": 515 + }, + { + "epoch": 5.9337068160597575, + "grad_norm": 0.5722100157951456, + "learning_rate": 2.7764497465731487e-06, + "loss": 0.2015, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09112873673439026, + "step": 3180, + "valid_targets_mean": 4562.5, + "valid_targets_min": 1367 + }, + { + "epoch": 5.943043884220355, + "grad_norm": 0.6219572990389131, + "learning_rate": 2.72933525159202e-06, + "loss": 0.2074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08352036774158478, + "step": 3185, + "valid_targets_mean": 4023.1, + "valid_targets_min": 258 + }, + { + "epoch": 5.9523809523809526, + "grad_norm": 0.667639469118266, + "learning_rate": 2.6825946460072237e-06, + "loss": 0.2063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12139201164245605, + "step": 3190, + "valid_targets_mean": 4988.6, + "valid_targets_min": 1697 + }, + { + "epoch": 5.96171802054155, + "grad_norm": 0.6142298659915802, + "learning_rate": 2.636228941697414e-06, + "loss": 0.2042, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10242524743080139, + "step": 3195, + "valid_targets_mean": 4367.0, + "valid_targets_min": 2528 + }, + { + "epoch": 5.971055088702148, + "grad_norm": 0.6640841381992311, + "learning_rate": 2.5902391424250573e-06, + "loss": 0.2067, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09303018450737, + "step": 3200, + "valid_targets_mean": 3209.8, + "valid_targets_min": 1002 + }, + { + "epoch": 5.980392156862745, + "grad_norm": 0.5697398715321416, + "learning_rate": 2.544626243814732e-06, + "loss": 0.2111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1013244092464447, + "step": 3205, + "valid_targets_mean": 5252.0, + "valid_targets_min": 1983 + }, + { + "epoch": 5.989729225023343, + "grad_norm": 0.6465290373289106, + "learning_rate": 2.4993912333315605e-06, + "loss": 0.2046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08251726627349854, + "step": 3210, + "valid_targets_mean": 3695.8, + "valid_targets_min": 553 + }, + { + "epoch": 5.99906629318394, + "grad_norm": 0.6996156679197246, + "learning_rate": 2.4545350902598464e-06, + "loss": 0.2054, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10436704009771347, + "step": 3215, + "valid_targets_mean": 4194.0, + "valid_targets_min": 723 + }, + { + "epoch": 6.007469654528478, + "grad_norm": 0.5712166283780137, + "learning_rate": 2.4100587856818458e-06, + "loss": 0.2008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08268176019191742, + "step": 3220, + "valid_targets_mean": 3754.2, + "valid_targets_min": 1422 + }, + { + "epoch": 6.016806722689076, + "grad_norm": 0.547780256063662, + "learning_rate": 2.3659632824567823e-06, + "loss": 0.1927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09468269348144531, + "step": 3225, + "valid_targets_mean": 5644.9, + "valid_targets_min": 835 + }, + { + "epoch": 6.026143790849673, + "grad_norm": 0.6157056704018261, + "learning_rate": 2.322249535199965e-06, + "loss": 0.189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08802121132612228, + "step": 3230, + "valid_targets_mean": 4294.0, + "valid_targets_min": 1366 + }, + { + "epoch": 6.035480859010271, + "grad_norm": 0.616088396879546, + "learning_rate": 2.278918490262143e-06, + "loss": 0.1929, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07903400808572769, + "step": 3235, + "valid_targets_mean": 3260.9, + "valid_targets_min": 1708 + }, + { + "epoch": 6.044817927170868, + "grad_norm": 0.5648660658415104, + "learning_rate": 2.235971085709028e-06, + "loss": 0.1919, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0727640688419342, + "step": 3240, + "valid_targets_mean": 4127.4, + "valid_targets_min": 2391 + }, + { + "epoch": 6.054154995331466, + "grad_norm": 0.6887204470271658, + "learning_rate": 2.193408251300948e-06, + "loss": 0.1927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10112135112285614, + "step": 3245, + "valid_targets_mean": 3456.4, + "valid_targets_min": 787 + }, + { + "epoch": 6.063492063492063, + "grad_norm": 0.5873015081356037, + "learning_rate": 2.1512309084727724e-06, + "loss": 0.1995, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11303147673606873, + "step": 3250, + "valid_targets_mean": 5706.4, + "valid_targets_min": 1686 + }, + { + "epoch": 6.072829131652661, + "grad_norm": 0.5664414845916196, + "learning_rate": 2.1094399703139113e-06, + "loss": 0.193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09565187990665436, + "step": 3255, + "valid_targets_mean": 4299.9, + "valid_targets_min": 750 + }, + { + "epoch": 6.082166199813258, + "grad_norm": 0.6585640130733998, + "learning_rate": 2.068036341548585e-06, + "loss": 0.2035, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0778937041759491, + "step": 3260, + "valid_targets_mean": 3371.4, + "valid_targets_min": 1020 + }, + { + "epoch": 6.091503267973856, + "grad_norm": 0.725624378679936, + "learning_rate": 2.027020918516216e-06, + "loss": 0.2014, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1063174232840538, + "step": 3265, + "valid_targets_mean": 4077.0, + "valid_targets_min": 850 + }, + { + "epoch": 6.100840336134453, + "grad_norm": 0.5672412752637479, + "learning_rate": 1.9863945891520474e-06, + "loss": 0.1884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08120550215244293, + "step": 3270, + "valid_targets_mean": 5733.0, + "valid_targets_min": 2321 + }, + { + "epoch": 6.110177404295051, + "grad_norm": 0.6825864828377503, + "learning_rate": 1.9461582329678895e-06, + "loss": 0.2049, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16918836534023285, + "step": 3275, + "valid_targets_mean": 6274.6, + "valid_targets_min": 3243 + }, + { + "epoch": 6.1195144724556485, + "grad_norm": 0.5851873637399602, + "learning_rate": 1.9063127210331145e-06, + "loss": 0.197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08898302912712097, + "step": 3280, + "valid_targets_mean": 4436.1, + "valid_targets_min": 2707 + }, + { + "epoch": 6.128851540616246, + "grad_norm": 0.5869239329366702, + "learning_rate": 1.8668589159557626e-06, + "loss": 0.1985, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0954725593328476, + "step": 3285, + "valid_targets_mean": 4881.0, + "valid_targets_min": 679 + }, + { + "epoch": 6.1381886087768445, + "grad_norm": 0.5833483673119623, + "learning_rate": 1.8277976718638934e-06, + "loss": 0.1926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07201820611953735, + "step": 3290, + "valid_targets_mean": 3633.1, + "valid_targets_min": 1505 + }, + { + "epoch": 6.147525676937442, + "grad_norm": 0.559582142077021, + "learning_rate": 1.789129834387091e-06, + "loss": 0.1945, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08507552742958069, + "step": 3295, + "valid_targets_mean": 4332.2, + "valid_targets_min": 1180 + }, + { + "epoch": 6.1568627450980395, + "grad_norm": 0.6508165303667743, + "learning_rate": 1.7508562406381414e-06, + "loss": 0.1971, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09027493000030518, + "step": 3300, + "valid_targets_mean": 3839.5, + "valid_targets_min": 1553 + }, + { + "epoch": 6.166199813258637, + "grad_norm": 0.5717355301158279, + "learning_rate": 1.7129777191949437e-06, + "loss": 0.205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07736235857009888, + "step": 3305, + "valid_targets_mean": 4331.4, + "valid_targets_min": 1079 + }, + { + "epoch": 6.175536881419235, + "grad_norm": 0.6509648771846253, + "learning_rate": 1.6754950900825174e-06, + "loss": 0.1925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09676581621170044, + "step": 3310, + "valid_targets_mean": 4688.0, + "valid_targets_min": 519 + }, + { + "epoch": 6.184873949579832, + "grad_norm": 0.6572210674167744, + "learning_rate": 1.6384091647553125e-06, + "loss": 0.1948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11021740734577179, + "step": 3315, + "valid_targets_mean": 4494.5, + "valid_targets_min": 1785 + }, + { + "epoch": 6.19421101774043, + "grad_norm": 0.6269524696155288, + "learning_rate": 1.601720746079598e-06, + "loss": 0.1966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1151052713394165, + "step": 3320, + "valid_targets_mean": 5019.0, + "valid_targets_min": 3461 + }, + { + "epoch": 6.203548085901027, + "grad_norm": 0.6921226235719712, + "learning_rate": 1.5654306283161002e-06, + "loss": 0.1801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1175028383731842, + "step": 3325, + "valid_targets_mean": 4395.2, + "valid_targets_min": 682 + }, + { + "epoch": 6.212885154061625, + "grad_norm": 0.6522133877440931, + "learning_rate": 1.5295395971028094e-06, + "loss": 0.2021, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10393942147493362, + "step": 3330, + "valid_targets_mean": 4280.5, + "valid_targets_min": 1557 + }, + { + "epoch": 6.222222222222222, + "grad_norm": 0.6436878229117897, + "learning_rate": 1.4940484294379442e-06, + "loss": 0.197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1129285991191864, + "step": 3335, + "valid_targets_mean": 4575.6, + "valid_targets_min": 2126 + }, + { + "epoch": 6.23155929038282, + "grad_norm": 0.6648126215839155, + "learning_rate": 1.4589578936631776e-06, + "loss": 0.2086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11763853579759598, + "step": 3340, + "valid_targets_mean": 4905.2, + "valid_targets_min": 503 + }, + { + "epoch": 6.240896358543417, + "grad_norm": 0.7196651470123409, + "learning_rate": 1.424268749446962e-06, + "loss": 0.2076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12868764996528625, + "step": 3345, + "valid_targets_mean": 4010.6, + "valid_targets_min": 2365 + }, + { + "epoch": 6.250233426704015, + "grad_norm": 0.6592380885449605, + "learning_rate": 1.3899817477681056e-06, + "loss": 0.2038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0899820551276207, + "step": 3350, + "valid_targets_mean": 3732.9, + "valid_targets_min": 1766 + }, + { + "epoch": 6.2595704948646125, + "grad_norm": 0.5506167493988989, + "learning_rate": 1.3560976308995065e-06, + "loss": 0.1922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08871421962976456, + "step": 3355, + "valid_targets_mean": 5376.8, + "valid_targets_min": 1265 + }, + { + "epoch": 6.26890756302521, + "grad_norm": 0.5971213466542673, + "learning_rate": 1.3226171323920811e-06, + "loss": 0.2, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10413601994514465, + "step": 3360, + "valid_targets_mean": 5254.1, + "valid_targets_min": 2881 + }, + { + "epoch": 6.278244631185808, + "grad_norm": 0.6153079001479173, + "learning_rate": 1.2895409770588874e-06, + "loss": 0.1978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08798134326934814, + "step": 3365, + "valid_targets_mean": 4093.0, + "valid_targets_min": 867 + }, + { + "epoch": 6.287581699346405, + "grad_norm": 0.673481009666776, + "learning_rate": 1.2568698809594437e-06, + "loss": 0.1913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1036899983882904, + "step": 3370, + "valid_targets_mean": 4344.2, + "valid_targets_min": 813 + }, + { + "epoch": 6.296918767507003, + "grad_norm": 0.6190391287013424, + "learning_rate": 1.2246045513842008e-06, + "loss": 0.201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08923386037349701, + "step": 3375, + "valid_targets_mean": 4027.0, + "valid_targets_min": 1357 + }, + { + "epoch": 6.3062558356676, + "grad_norm": 0.7382750153793265, + "learning_rate": 1.1927456868392605e-06, + "loss": 0.2055, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12498262524604797, + "step": 3380, + "valid_targets_mean": 3591.5, + "valid_targets_min": 522 + }, + { + "epoch": 6.315592903828198, + "grad_norm": 0.6164432866883863, + "learning_rate": 1.1612939770312325e-06, + "loss": 0.194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10886119306087494, + "step": 3385, + "valid_targets_mean": 5814.4, + "valid_targets_min": 2529 + }, + { + "epoch": 6.324929971988795, + "grad_norm": 0.5849945069904484, + "learning_rate": 1.1302501028523039e-06, + "loss": 0.2017, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11015359312295914, + "step": 3390, + "valid_targets_mean": 5298.2, + "valid_targets_min": 648 + }, + { + "epoch": 6.334267040149393, + "grad_norm": 0.6521531628616221, + "learning_rate": 1.0996147363655175e-06, + "loss": 0.1906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08631982654333115, + "step": 3395, + "valid_targets_mean": 4031.9, + "valid_targets_min": 1276 + }, + { + "epoch": 6.34360410830999, + "grad_norm": 0.5739811049052548, + "learning_rate": 1.069388540790195e-06, + "loss": 0.1981, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08396562933921814, + "step": 3400, + "valid_targets_mean": 4857.5, + "valid_targets_min": 3372 + }, + { + "epoch": 6.352941176470588, + "grad_norm": 0.6604591326992927, + "learning_rate": 1.039572170487606e-06, + "loss": 0.1984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.135553777217865, + "step": 3405, + "valid_targets_mean": 5286.1, + "valid_targets_min": 2256 + }, + { + "epoch": 6.362278244631185, + "grad_norm": 0.712988930755191, + "learning_rate": 1.0101662709467841e-06, + "loss": 0.1907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10399291664361954, + "step": 3410, + "valid_targets_mean": 3817.6, + "valid_targets_min": 2003 + }, + { + "epoch": 6.371615312791784, + "grad_norm": 0.6046779670198157, + "learning_rate": 9.811714787705528e-07, + "loss": 0.193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10832379758358002, + "step": 3415, + "valid_targets_mean": 5011.5, + "valid_targets_min": 976 + }, + { + "epoch": 6.380952380952381, + "grad_norm": 0.8607151109129529, + "learning_rate": 9.525884216617575e-07, + "loss": 0.1894, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09346240013837814, + "step": 3420, + "valid_targets_mean": 2705.6, + "valid_targets_min": 331 + }, + { + "epoch": 6.390289449112979, + "grad_norm": 0.6721174379786756, + "learning_rate": 9.244177184096603e-07, + "loss": 0.2012, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08682118356227875, + "step": 3425, + "valid_targets_mean": 3992.6, + "valid_targets_min": 558 + }, + { + "epoch": 6.3996265172735765, + "grad_norm": 0.5907572595495145, + "learning_rate": 8.96659978876564e-07, + "loss": 0.1891, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06742614507675171, + "step": 3430, + "valid_targets_mean": 3867.1, + "valid_targets_min": 222 + }, + { + "epoch": 6.408963585434174, + "grad_norm": 0.6089781412166181, + "learning_rate": 8.693158039845851e-07, + "loss": 0.1915, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08533721417188644, + "step": 3435, + "valid_targets_mean": 4151.1, + "valid_targets_min": 2884 + }, + { + "epoch": 6.4183006535947715, + "grad_norm": 0.671465589657271, + "learning_rate": 8.423857857026574e-07, + "loss": 0.1975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11493104696273804, + "step": 3440, + "valid_targets_mean": 4189.4, + "valid_targets_min": 2154 + }, + { + "epoch": 6.427637721755369, + "grad_norm": 0.6857279727144713, + "learning_rate": 8.158705070337181e-07, + "loss": 0.1975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0752478614449501, + "step": 3445, + "valid_targets_mean": 2685.8, + "valid_targets_min": 622 + }, + { + "epoch": 6.436974789915967, + "grad_norm": 0.6224301781638081, + "learning_rate": 7.897705420020863e-07, + "loss": 0.1961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07470513880252838, + "step": 3450, + "valid_targets_mean": 3582.0, + "valid_targets_min": 1183 + }, + { + "epoch": 6.446311858076564, + "grad_norm": 0.6128757502974155, + "learning_rate": 7.640864556410311e-07, + "loss": 0.1926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0965309664607048, + "step": 3455, + "valid_targets_mean": 5570.2, + "valid_targets_min": 2843 + }, + { + "epoch": 6.455648926237162, + "grad_norm": 0.61224697714922, + "learning_rate": 7.388188039805455e-07, + "loss": 0.1921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11384371668100357, + "step": 3460, + "valid_targets_mean": 6137.4, + "valid_targets_min": 1750 + }, + { + "epoch": 6.464985994397759, + "grad_norm": 0.6673588757981107, + "learning_rate": 7.139681340352966e-07, + "loss": 0.1937, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11313086003065109, + "step": 3465, + "valid_targets_mean": 5570.2, + "valid_targets_min": 2576 + }, + { + "epoch": 6.474323062558357, + "grad_norm": 0.705280850508819, + "learning_rate": 6.895349837928033e-07, + "loss": 0.1985, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11846912652254105, + "step": 3470, + "valid_targets_mean": 4440.9, + "valid_targets_min": 174 + }, + { + "epoch": 6.483660130718954, + "grad_norm": 0.8510061678569963, + "learning_rate": 6.655198822017661e-07, + "loss": 0.1965, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10406720638275146, + "step": 3475, + "valid_targets_mean": 3995.0, + "valid_targets_min": 1332 + }, + { + "epoch": 6.492997198879552, + "grad_norm": 0.5975958949988172, + "learning_rate": 6.419233491606403e-07, + "loss": 0.1977, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0891023576259613, + "step": 3480, + "valid_targets_mean": 5441.6, + "valid_targets_min": 791 + }, + { + "epoch": 6.502334267040149, + "grad_norm": 0.6542675794583154, + "learning_rate": 6.187458955063652e-07, + "loss": 0.1965, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09982125461101532, + "step": 3485, + "valid_targets_mean": 4077.6, + "valid_targets_min": 1751 + }, + { + "epoch": 6.511671335200747, + "grad_norm": 0.6687353825497875, + "learning_rate": 5.959880230033043e-07, + "loss": 0.2, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09601489454507828, + "step": 3490, + "valid_targets_mean": 4978.4, + "valid_targets_min": 1067 + }, + { + "epoch": 6.5210084033613445, + "grad_norm": 0.5188789469936683, + "learning_rate": 5.736502243323894e-07, + "loss": 0.1968, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08085530251264572, + "step": 3495, + "valid_targets_mean": 5354.0, + "valid_targets_min": 1676 + }, + { + "epoch": 6.530345471521942, + "grad_norm": 0.689889448609767, + "learning_rate": 5.517329830804552e-07, + "loss": 0.1948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10119321942329407, + "step": 3500, + "valid_targets_mean": 4471.2, + "valid_targets_min": 1312 + }, + { + "epoch": 6.5396825396825395, + "grad_norm": 0.7097304844162888, + "learning_rate": 5.302367737297642e-07, + "loss": 0.2037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08398271352052689, + "step": 3505, + "valid_targets_mean": 3590.4, + "valid_targets_min": 515 + }, + { + "epoch": 6.549019607843137, + "grad_norm": 0.6248707722554311, + "learning_rate": 5.091620616477366e-07, + "loss": 0.2028, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09825564920902252, + "step": 3510, + "valid_targets_mean": 5292.5, + "valid_targets_min": 506 + }, + { + "epoch": 6.558356676003735, + "grad_norm": 0.6063828985309228, + "learning_rate": 4.88509303076885e-07, + "loss": 0.1906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06808625906705856, + "step": 3515, + "valid_targets_mean": 3380.5, + "valid_targets_min": 356 + }, + { + "epoch": 6.567693744164332, + "grad_norm": 0.6101605337570888, + "learning_rate": 4.6827894512491814e-07, + "loss": 0.1887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09382276237010956, + "step": 3520, + "valid_targets_mean": 4699.1, + "valid_targets_min": 302 + }, + { + "epoch": 6.57703081232493, + "grad_norm": 0.5737764222605023, + "learning_rate": 4.4847142575507706e-07, + "loss": 0.1983, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0991055965423584, + "step": 3525, + "valid_targets_mean": 5124.1, + "valid_targets_min": 1247 + }, + { + "epoch": 6.586367880485527, + "grad_norm": 0.6002893010875692, + "learning_rate": 4.290871737766544e-07, + "loss": 0.1931, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07745826989412308, + "step": 3530, + "valid_targets_mean": 3801.8, + "valid_targets_min": 317 + }, + { + "epoch": 6.595704948646125, + "grad_norm": 0.6174709718493193, + "learning_rate": 4.1012660883570146e-07, + "loss": 0.1984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10142934322357178, + "step": 3535, + "valid_targets_mean": 4593.8, + "valid_targets_min": 953 + }, + { + "epoch": 6.605042016806722, + "grad_norm": 0.607227421339816, + "learning_rate": 3.915901414059464e-07, + "loss": 0.1987, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09889011085033417, + "step": 3540, + "valid_targets_mean": 4678.9, + "valid_targets_min": 1742 + }, + { + "epoch": 6.61437908496732, + "grad_norm": 0.5663124043877272, + "learning_rate": 3.734781727799086e-07, + "loss": 0.1899, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07535569369792938, + "step": 3545, + "valid_targets_mean": 4787.6, + "valid_targets_min": 265 + }, + { + "epoch": 6.623716153127917, + "grad_norm": 0.6811694915102869, + "learning_rate": 3.557910950602228e-07, + "loss": 0.1966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08786097913980484, + "step": 3550, + "valid_targets_mean": 4415.1, + "valid_targets_min": 2755 + }, + { + "epoch": 6.633053221288515, + "grad_norm": 0.6530183374102152, + "learning_rate": 3.3852929115112177e-07, + "loss": 0.1988, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10903848707675934, + "step": 3555, + "valid_targets_mean": 4009.8, + "valid_targets_min": 2350 + }, + { + "epoch": 6.642390289449113, + "grad_norm": 0.67946125200666, + "learning_rate": 3.2169313475018506e-07, + "loss": 0.1838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0765589028596878, + "step": 3560, + "valid_targets_mean": 2764.6, + "valid_targets_min": 413 + }, + { + "epoch": 6.651727357609711, + "grad_norm": 0.5763008663778598, + "learning_rate": 3.0528299034021437e-07, + "loss": 0.1936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11489193886518478, + "step": 3565, + "valid_targets_mean": 6447.2, + "valid_targets_min": 1724 + }, + { + "epoch": 6.661064425770308, + "grad_norm": 0.7082204445622963, + "learning_rate": 2.892992131813621e-07, + "loss": 0.1937, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08627262711524963, + "step": 3570, + "valid_targets_mean": 3271.0, + "valid_targets_min": 1563 + }, + { + "epoch": 6.670401493930906, + "grad_norm": 0.6342087295271225, + "learning_rate": 2.737421493034331e-07, + "loss": 0.1993, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0958859920501709, + "step": 3575, + "valid_targets_mean": 4243.9, + "valid_targets_min": 287 + }, + { + "epoch": 6.6797385620915035, + "grad_norm": 0.6222207147685498, + "learning_rate": 2.586121354984017e-07, + "loss": 0.1949, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10129798948764801, + "step": 3580, + "valid_targets_mean": 4770.8, + "valid_targets_min": 539 + }, + { + "epoch": 6.689075630252101, + "grad_norm": 0.5570429194001549, + "learning_rate": 2.4390949931311304e-07, + "loss": 0.1933, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08367270976305008, + "step": 3585, + "valid_targets_mean": 5113.8, + "valid_targets_min": 1596 + }, + { + "epoch": 6.698412698412699, + "grad_norm": 0.7640808894988353, + "learning_rate": 2.296345590421889e-07, + "loss": 0.1964, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1052357479929924, + "step": 3590, + "valid_targets_mean": 3409.2, + "valid_targets_min": 1998 + }, + { + "epoch": 6.707749766573296, + "grad_norm": 0.5761011500746559, + "learning_rate": 2.157876237211487e-07, + "loss": 0.1944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08575554937124252, + "step": 3595, + "valid_targets_mean": 5183.9, + "valid_targets_min": 2848 + }, + { + "epoch": 6.717086834733894, + "grad_norm": 0.6140931003514568, + "learning_rate": 2.0236899311970593e-07, + "loss": 0.1948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08802841603755951, + "step": 3600, + "valid_targets_mean": 4399.1, + "valid_targets_min": 1206 + }, + { + "epoch": 6.726423902894491, + "grad_norm": 0.6787601970613741, + "learning_rate": 1.8937895773529114e-07, + "loss": 0.2016, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13343870639801025, + "step": 3605, + "valid_targets_mean": 4812.1, + "valid_targets_min": 1368 + }, + { + "epoch": 6.735760971055089, + "grad_norm": 0.6230107488538501, + "learning_rate": 1.7681779878675697e-07, + "loss": 0.1939, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08780519664287567, + "step": 3610, + "valid_targets_mean": 4044.4, + "valid_targets_min": 1730 + }, + { + "epoch": 6.745098039215686, + "grad_norm": 0.6618958930194253, + "learning_rate": 1.646857882082853e-07, + "loss": 0.1988, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08680258691310883, + "step": 3615, + "valid_targets_mean": 4031.1, + "valid_targets_min": 1529 + }, + { + "epoch": 6.754435107376284, + "grad_norm": 0.7910887025721222, + "learning_rate": 1.5298318864350738e-07, + "loss": 0.1981, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10770580172538757, + "step": 3620, + "valid_targets_mean": 3009.8, + "valid_targets_min": 235 + }, + { + "epoch": 6.763772175536881, + "grad_norm": 0.6064984194096277, + "learning_rate": 1.417102534398196e-07, + "loss": 0.2008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06854459643363953, + "step": 3625, + "valid_targets_mean": 3549.2, + "valid_targets_min": 386 + }, + { + "epoch": 6.773109243697479, + "grad_norm": 0.6143799234853238, + "learning_rate": 1.3086722664288787e-07, + "loss": 0.1871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09162665903568268, + "step": 3630, + "valid_targets_mean": 5373.8, + "valid_targets_min": 1135 + }, + { + "epoch": 6.7824463118580764, + "grad_norm": 0.5806117436092957, + "learning_rate": 1.2045434299137403e-07, + "loss": 0.2013, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10379506647586823, + "step": 3635, + "valid_targets_mean": 5841.8, + "valid_targets_min": 1210 + }, + { + "epoch": 6.791783380018674, + "grad_norm": 0.6627596945239722, + "learning_rate": 1.1047182791185551e-07, + "loss": 0.199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07972020655870438, + "step": 3640, + "valid_targets_mean": 3779.4, + "valid_targets_min": 1742 + }, + { + "epoch": 6.8011204481792715, + "grad_norm": 0.7789591760172466, + "learning_rate": 1.0091989751393583e-07, + "loss": 0.2022, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11666593700647354, + "step": 3645, + "valid_targets_mean": 3816.1, + "valid_targets_min": 1004 + }, + { + "epoch": 6.810457516339869, + "grad_norm": 0.5680982535951987, + "learning_rate": 9.179875858557285e-08, + "loss": 0.1971, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09722086042165756, + "step": 3650, + "valid_targets_mean": 5210.6, + "valid_targets_min": 2280 + }, + { + "epoch": 6.819794584500467, + "grad_norm": 0.6405951669405586, + "learning_rate": 8.310860858859793e-08, + "loss": 0.2001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1418842375278473, + "step": 3655, + "valid_targets_mean": 5812.0, + "valid_targets_min": 428 + }, + { + "epoch": 6.829131652661064, + "grad_norm": 0.6815850311634812, + "learning_rate": 7.484963565444813e-08, + "loss": 0.1893, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10712998360395432, + "step": 3660, + "valid_targets_mean": 3686.1, + "valid_targets_min": 1734 + }, + { + "epoch": 6.838468720821662, + "grad_norm": 0.622283695565583, + "learning_rate": 6.702201858008517e-08, + "loss": 0.201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09915557503700256, + "step": 3665, + "valid_targets_mean": 4790.1, + "valid_targets_min": 1435 + }, + { + "epoch": 6.847805788982259, + "grad_norm": 0.6965306240940333, + "learning_rate": 5.962592682412948e-08, + "loss": 0.1886, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06358710676431656, + "step": 3670, + "valid_targets_mean": 2294.0, + "valid_targets_min": 209 + }, + { + "epoch": 6.857142857142857, + "grad_norm": 0.6483078148025582, + "learning_rate": 5.2661520503192176e-08, + "loss": 0.2023, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09639430046081543, + "step": 3675, + "valid_targets_mean": 4081.2, + "valid_targets_min": 1238 + }, + { + "epoch": 6.866479925303455, + "grad_norm": 0.6765006291445825, + "learning_rate": 4.612895038840215e-08, + "loss": 0.1944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10045110434293747, + "step": 3680, + "valid_targets_mean": 5027.1, + "valid_targets_min": 1284 + }, + { + "epoch": 6.875816993464053, + "grad_norm": 0.697546431811492, + "learning_rate": 4.0028357902153204e-08, + "loss": 0.1936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11363200843334198, + "step": 3685, + "valid_targets_mean": 3996.2, + "valid_targets_min": 441 + }, + { + "epoch": 6.88515406162465, + "grad_norm": 0.7652775902275027, + "learning_rate": 3.435987511503314e-08, + "loss": 0.1922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10262599587440491, + "step": 3690, + "valid_targets_mean": 3617.0, + "valid_targets_min": 1165 + }, + { + "epoch": 6.894491129785248, + "grad_norm": 0.5219046043671652, + "learning_rate": 2.9123624742963818e-08, + "loss": 0.2045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09849290549755096, + "step": 3695, + "valid_targets_mean": 6824.1, + "valid_targets_min": 2257 + }, + { + "epoch": 6.903828197945845, + "grad_norm": 0.6992829078842656, + "learning_rate": 2.4319720144554417e-08, + "loss": 0.1931, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10512895882129669, + "step": 3700, + "valid_targets_mean": 3636.8, + "valid_targets_min": 1183 + }, + { + "epoch": 6.913165266106443, + "grad_norm": 0.6843933535662667, + "learning_rate": 1.9948265318638915e-08, + "loss": 0.1893, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08267563581466675, + "step": 3705, + "valid_targets_mean": 4939.9, + "valid_targets_min": 1181 + }, + { + "epoch": 6.92250233426704, + "grad_norm": 0.5939649662684485, + "learning_rate": 1.6009354902024598e-08, + "loss": 0.1926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08425945043563843, + "step": 3710, + "valid_targets_mean": 4273.1, + "valid_targets_min": 1880 + }, + { + "epoch": 6.931839402427638, + "grad_norm": 0.6673118123606684, + "learning_rate": 1.2503074167451445e-08, + "loss": 0.191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09551867842674255, + "step": 3715, + "valid_targets_mean": 4820.4, + "valid_targets_min": 1103 + }, + { + "epoch": 6.9411764705882355, + "grad_norm": 0.5977152012984712, + "learning_rate": 9.429499021731403e-09, + "loss": 0.2063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08592317998409271, + "step": 3720, + "valid_targets_mean": 4228.2, + "valid_targets_min": 1629 + }, + { + "epoch": 6.950513538748833, + "grad_norm": 0.7250825008015171, + "learning_rate": 6.788696004120798e-09, + "loss": 0.2042, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09734902530908585, + "step": 3725, + "valid_targets_mean": 4502.6, + "valid_targets_min": 2660 + }, + { + "epoch": 6.959850606909431, + "grad_norm": 0.6776303151567103, + "learning_rate": 4.580722284872608e-09, + "loss": 0.1949, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11439605057239532, + "step": 3730, + "valid_targets_mean": 4348.5, + "valid_targets_min": 1619 + }, + { + "epoch": 6.969187675070028, + "grad_norm": 0.6538406181218711, + "learning_rate": 2.8056256639974467e-09, + "loss": 0.1942, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0985015407204628, + "step": 3735, + "valid_targets_mean": 4420.2, + "valid_targets_min": 2342 + }, + { + "epoch": 6.978524743230626, + "grad_norm": 0.554859465375787, + "learning_rate": 1.4634445702288446e-09, + "loss": 0.1862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10054243355989456, + "step": 3740, + "valid_targets_mean": 6215.1, + "valid_targets_min": 2530 + }, + { + "epoch": 6.987861811391223, + "grad_norm": 0.7136526402872021, + "learning_rate": 5.542080601950161e-10, + "loss": 0.1915, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12352180480957031, + "step": 3745, + "valid_targets_mean": 4527.2, + "valid_targets_min": 3258 + }, + { + "epoch": 6.997198879551821, + "grad_norm": 1.380003054707034, + "learning_rate": 7.793581778825499e-11, + "loss": 0.1889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08922816812992096, + "step": 3750, + "valid_targets_mean": 5439.0, + "valid_targets_min": 1637 + }, + { + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22244934737682343, + "step": 3752, + "total_flos": 1.2133841162301604e+18, + "train_loss": 0.2596587872724416, + "train_runtime": 99495.5411, + "train_samples_per_second": 0.602, + "train_steps_per_second": 0.038, + "valid_targets_mean": 3751.8, + "valid_targets_min": 1819 + } + ], + "logging_steps": 5, + "max_steps": 3752, + "num_input_tokens_seen": 0, + "num_train_epochs": 7, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.2133841162301604e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}