diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,9254 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 4186, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.012626262626262626, + "grad_norm": 8.8502213152326, + "learning_rate": 5.755395683453238e-07, + "loss": 0.9749, + "loss_nan_ranks": 0, + "loss_rank_avg": 1.0252163410186768, + "step": 5, + "valid_targets_mean": 3810.4, + "valid_targets_min": 1456 + }, + { + "epoch": 0.025252525252525252, + "grad_norm": 7.0326382192271595, + "learning_rate": 1.2949640287769785e-06, + "loss": 0.97, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.8746949434280396, + "step": 10, + "valid_targets_mean": 4736.2, + "valid_targets_min": 1766 + }, + { + "epoch": 0.03787878787878788, + "grad_norm": 6.6940046336920975, + "learning_rate": 2.0143884892086333e-06, + "loss": 0.9064, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.8456529378890991, + "step": 15, + "valid_targets_mean": 3504.3, + "valid_targets_min": 638 + }, + { + "epoch": 0.050505050505050504, + "grad_norm": 7.365976748970077, + "learning_rate": 2.733812949640288e-06, + "loss": 0.8841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.9217684268951416, + "step": 20, + "valid_targets_mean": 2712.1, + "valid_targets_min": 1633 + }, + { + "epoch": 0.06313131313131314, + "grad_norm": 4.630647013342811, + "learning_rate": 3.453237410071943e-06, + "loss": 0.8408, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.7999893426895142, + "step": 25, + "valid_targets_mean": 2972.6, + "valid_targets_min": 1862 + }, + { + "epoch": 0.07575757575757576, + "grad_norm": 2.9138235176363723, + "learning_rate": 4.172661870503597e-06, + "loss": 0.7736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.7608543634414673, + "step": 30, + "valid_targets_mean": 3347.9, + "valid_targets_min": 1252 + }, + { + "epoch": 0.08838383838383838, + "grad_norm": 1.7558069634979983, + "learning_rate": 4.892086330935253e-06, + "loss": 0.7821, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.826799750328064, + "step": 35, + "valid_targets_mean": 4368.2, + "valid_targets_min": 591 + }, + { + "epoch": 0.10101010101010101, + "grad_norm": 1.7134790575388017, + "learning_rate": 5.611510791366906e-06, + "loss": 0.7603, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.8243893384933472, + "step": 40, + "valid_targets_mean": 3075.2, + "valid_targets_min": 596 + }, + { + "epoch": 0.11363636363636363, + "grad_norm": 1.2388060565695977, + "learning_rate": 6.330935251798561e-06, + "loss": 0.6669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6428935527801514, + "step": 45, + "valid_targets_mean": 3558.9, + "valid_targets_min": 1264 + }, + { + "epoch": 0.12626262626262627, + "grad_norm": 1.0686895415900157, + "learning_rate": 7.050359712230216e-06, + "loss": 0.6699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6036921143531799, + "step": 50, + "valid_targets_mean": 2548.7, + "valid_targets_min": 1499 + }, + { + "epoch": 0.1388888888888889, + "grad_norm": 0.8408313638128855, + "learning_rate": 7.769784172661872e-06, + "loss": 0.6825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6488921642303467, + "step": 55, + "valid_targets_mean": 3712.4, + "valid_targets_min": 741 + }, + { + "epoch": 0.15151515151515152, + "grad_norm": 0.8793404803472428, + "learning_rate": 8.489208633093526e-06, + "loss": 0.6406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5900353193283081, + "step": 60, + "valid_targets_mean": 2931.1, + "valid_targets_min": 1889 + }, + { + "epoch": 0.16414141414141414, + "grad_norm": 0.9233082070664455, + "learning_rate": 9.20863309352518e-06, + "loss": 0.6118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6843211650848389, + "step": 65, + "valid_targets_mean": 2688.2, + "valid_targets_min": 587 + }, + { + "epoch": 0.17676767676767677, + "grad_norm": 0.7695618266666612, + "learning_rate": 9.928057553956835e-06, + "loss": 0.5512, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5856298208236694, + "step": 70, + "valid_targets_mean": 2935.6, + "valid_targets_min": 750 + }, + { + "epoch": 0.1893939393939394, + "grad_norm": 0.7101539830507525, + "learning_rate": 1.0647482014388491e-05, + "loss": 0.5928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.602681577205658, + "step": 75, + "valid_targets_mean": 3589.7, + "valid_targets_min": 741 + }, + { + "epoch": 0.20202020202020202, + "grad_norm": 0.6757736826618855, + "learning_rate": 1.1366906474820146e-05, + "loss": 0.5919, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5044267177581787, + "step": 80, + "valid_targets_mean": 3180.0, + "valid_targets_min": 1420 + }, + { + "epoch": 0.21464646464646464, + "grad_norm": 0.7052512337384288, + "learning_rate": 1.20863309352518e-05, + "loss": 0.5713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.546467661857605, + "step": 85, + "valid_targets_mean": 2828.8, + "valid_targets_min": 1071 + }, + { + "epoch": 0.22727272727272727, + "grad_norm": 0.8245207595088883, + "learning_rate": 1.2805755395683454e-05, + "loss": 0.5641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.605029284954071, + "step": 90, + "valid_targets_mean": 2803.2, + "valid_targets_min": 643 + }, + { + "epoch": 0.2398989898989899, + "grad_norm": 0.7141934894188182, + "learning_rate": 1.3525179856115109e-05, + "loss": 0.5706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5871100425720215, + "step": 95, + "valid_targets_mean": 3075.2, + "valid_targets_min": 1141 + }, + { + "epoch": 0.25252525252525254, + "grad_norm": 0.7508217175713096, + "learning_rate": 1.4244604316546765e-05, + "loss": 0.5615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5834653973579407, + "step": 100, + "valid_targets_mean": 3024.4, + "valid_targets_min": 1610 + }, + { + "epoch": 0.26515151515151514, + "grad_norm": 0.6869296589904229, + "learning_rate": 1.496402877697842e-05, + "loss": 0.5107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5773706436157227, + "step": 105, + "valid_targets_mean": 3346.2, + "valid_targets_min": 465 + }, + { + "epoch": 0.2777777777777778, + "grad_norm": 0.6649670199118626, + "learning_rate": 1.5683453237410072e-05, + "loss": 0.6192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5906274318695068, + "step": 110, + "valid_targets_mean": 4020.4, + "valid_targets_min": 1802 + }, + { + "epoch": 0.2904040404040404, + "grad_norm": 0.749817497328354, + "learning_rate": 1.640287769784173e-05, + "loss": 0.542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6468950510025024, + "step": 115, + "valid_targets_mean": 3753.2, + "valid_targets_min": 550 + }, + { + "epoch": 0.30303030303030304, + "grad_norm": 0.6555186253660183, + "learning_rate": 1.7122302158273384e-05, + "loss": 0.5134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5901004076004028, + "step": 120, + "valid_targets_mean": 4881.6, + "valid_targets_min": 1494 + }, + { + "epoch": 0.31565656565656564, + "grad_norm": 0.7273014966297405, + "learning_rate": 1.784172661870504e-05, + "loss": 0.5702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5654794573783875, + "step": 125, + "valid_targets_mean": 3368.1, + "valid_targets_min": 1521 + }, + { + "epoch": 0.3282828282828283, + "grad_norm": 0.7586180547020986, + "learning_rate": 1.8561151079136693e-05, + "loss": 0.5293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41630545258522034, + "step": 130, + "valid_targets_mean": 3200.5, + "valid_targets_min": 1661 + }, + { + "epoch": 0.3409090909090909, + "grad_norm": 0.7688770234195088, + "learning_rate": 1.9280575539568347e-05, + "loss": 0.5299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5642590522766113, + "step": 135, + "valid_targets_mean": 3158.9, + "valid_targets_min": 799 + }, + { + "epoch": 0.35353535353535354, + "grad_norm": 0.8258141160820167, + "learning_rate": 2e-05, + "loss": 0.4706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43767279386520386, + "step": 140, + "valid_targets_mean": 2534.0, + "valid_targets_min": 527 + }, + { + "epoch": 0.3661616161616162, + "grad_norm": 0.6638208230368448, + "learning_rate": 2.0719424460431656e-05, + "loss": 0.5226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5216605067253113, + "step": 145, + "valid_targets_mean": 3478.1, + "valid_targets_min": 1455 + }, + { + "epoch": 0.3787878787878788, + "grad_norm": 0.6659508828600877, + "learning_rate": 2.143884892086331e-05, + "loss": 0.5054, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.46430742740631104, + "step": 150, + "valid_targets_mean": 3356.1, + "valid_targets_min": 1804 + }, + { + "epoch": 0.39141414141414144, + "grad_norm": 0.6857469917039041, + "learning_rate": 2.2158273381294965e-05, + "loss": 0.4803, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.46997326612472534, + "step": 155, + "valid_targets_mean": 3069.1, + "valid_targets_min": 712 + }, + { + "epoch": 0.40404040404040403, + "grad_norm": 0.7497478551719995, + "learning_rate": 2.287769784172662e-05, + "loss": 0.5096, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5357323884963989, + "step": 160, + "valid_targets_mean": 2978.5, + "valid_targets_min": 978 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.6266767386519286, + "learning_rate": 2.3597122302158274e-05, + "loss": 0.4854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.49465256929397583, + "step": 165, + "valid_targets_mean": 4385.0, + "valid_targets_min": 1146 + }, + { + "epoch": 0.4292929292929293, + "grad_norm": 0.7198305240954321, + "learning_rate": 2.431654676258993e-05, + "loss": 0.5133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4758256673812866, + "step": 170, + "valid_targets_mean": 3358.2, + "valid_targets_min": 1265 + }, + { + "epoch": 0.44191919191919193, + "grad_norm": 0.744564295446295, + "learning_rate": 2.5035971223021586e-05, + "loss": 0.4285, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4645718038082123, + "step": 175, + "valid_targets_mean": 3257.9, + "valid_targets_min": 713 + }, + { + "epoch": 0.45454545454545453, + "grad_norm": 0.741277701622229, + "learning_rate": 2.575539568345324e-05, + "loss": 0.4867, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5038046836853027, + "step": 180, + "valid_targets_mean": 3023.1, + "valid_targets_min": 696 + }, + { + "epoch": 0.4671717171717172, + "grad_norm": 0.7440421968116456, + "learning_rate": 2.6474820143884895e-05, + "loss": 0.5095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.570155918598175, + "step": 185, + "valid_targets_mean": 3422.4, + "valid_targets_min": 949 + }, + { + "epoch": 0.4797979797979798, + "grad_norm": 0.8024642384967607, + "learning_rate": 2.719424460431655e-05, + "loss": 0.4493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5823109149932861, + "step": 190, + "valid_targets_mean": 2894.9, + "valid_targets_min": 1061 + }, + { + "epoch": 0.49242424242424243, + "grad_norm": 0.7498359662712905, + "learning_rate": 2.7913669064748203e-05, + "loss": 0.5014, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5892109870910645, + "step": 195, + "valid_targets_mean": 3757.8, + "valid_targets_min": 1305 + }, + { + "epoch": 0.5050505050505051, + "grad_norm": 0.6969972771455335, + "learning_rate": 2.8633093525179858e-05, + "loss": 0.4966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4585283696651459, + "step": 200, + "valid_targets_mean": 3301.2, + "valid_targets_min": 1200 + }, + { + "epoch": 0.5176767676767676, + "grad_norm": 0.7428922534210859, + "learning_rate": 2.9352517985611512e-05, + "loss": 0.4873, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35433435440063477, + "step": 205, + "valid_targets_mean": 2534.4, + "valid_targets_min": 1051 + }, + { + "epoch": 0.5303030303030303, + "grad_norm": 0.7952927627365796, + "learning_rate": 3.0071942446043167e-05, + "loss": 0.4483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3344321846961975, + "step": 210, + "valid_targets_mean": 2273.5, + "valid_targets_min": 764 + }, + { + "epoch": 0.5429292929292929, + "grad_norm": 0.7452719253142043, + "learning_rate": 3.0791366906474824e-05, + "loss": 0.4887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4398002624511719, + "step": 215, + "valid_targets_mean": 2894.6, + "valid_targets_min": 1774 + }, + { + "epoch": 0.5555555555555556, + "grad_norm": 0.793089120317565, + "learning_rate": 3.1510791366906475e-05, + "loss": 0.4446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3552176058292389, + "step": 220, + "valid_targets_mean": 2370.2, + "valid_targets_min": 988 + }, + { + "epoch": 0.5681818181818182, + "grad_norm": 0.6957948039355799, + "learning_rate": 3.223021582733813e-05, + "loss": 0.4677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4744777977466583, + "step": 225, + "valid_targets_mean": 3270.9, + "valid_targets_min": 908 + }, + { + "epoch": 0.5808080808080808, + "grad_norm": 0.9598066573051235, + "learning_rate": 3.2949640287769784e-05, + "loss": 0.4678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.53702712059021, + "step": 230, + "valid_targets_mean": 3305.6, + "valid_targets_min": 1465 + }, + { + "epoch": 0.5934343434343434, + "grad_norm": 0.7385990473183633, + "learning_rate": 3.366906474820144e-05, + "loss": 0.4269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4913618564605713, + "step": 235, + "valid_targets_mean": 3022.6, + "valid_targets_min": 1359 + }, + { + "epoch": 0.6060606060606061, + "grad_norm": 0.8954334270151011, + "learning_rate": 3.438848920863309e-05, + "loss": 0.4773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4261593222618103, + "step": 240, + "valid_targets_mean": 3027.9, + "valid_targets_min": 780 + }, + { + "epoch": 0.6186868686868687, + "grad_norm": 0.7460534615902407, + "learning_rate": 3.510791366906475e-05, + "loss": 0.4695, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5014075040817261, + "step": 245, + "valid_targets_mean": 3182.8, + "valid_targets_min": 1704 + }, + { + "epoch": 0.6313131313131313, + "grad_norm": 0.7476045212031468, + "learning_rate": 3.582733812949641e-05, + "loss": 0.4897, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5569243431091309, + "step": 250, + "valid_targets_mean": 4160.6, + "valid_targets_min": 1184 + }, + { + "epoch": 0.6439393939393939, + "grad_norm": 0.7290825841839375, + "learning_rate": 3.654676258992806e-05, + "loss": 0.5141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5026414394378662, + "step": 255, + "valid_targets_mean": 3161.4, + "valid_targets_min": 710 + }, + { + "epoch": 0.6565656565656566, + "grad_norm": 0.7490321855827192, + "learning_rate": 3.726618705035972e-05, + "loss": 0.5036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.46991053223609924, + "step": 260, + "valid_targets_mean": 2653.6, + "valid_targets_min": 647 + }, + { + "epoch": 0.6691919191919192, + "grad_norm": 0.739603256968017, + "learning_rate": 3.798561151079137e-05, + "loss": 0.4898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4988292157649994, + "step": 265, + "valid_targets_mean": 3293.9, + "valid_targets_min": 1137 + }, + { + "epoch": 0.6818181818181818, + "grad_norm": 0.8025519110811695, + "learning_rate": 3.8705035971223026e-05, + "loss": 0.5011, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5377944707870483, + "step": 270, + "valid_targets_mean": 3180.2, + "valid_targets_min": 1127 + }, + { + "epoch": 0.6944444444444444, + "grad_norm": 0.7900087471325877, + "learning_rate": 3.942446043165468e-05, + "loss": 0.4673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5039912462234497, + "step": 275, + "valid_targets_mean": 2830.6, + "valid_targets_min": 1799 + }, + { + "epoch": 0.7070707070707071, + "grad_norm": 0.7659391333417415, + "learning_rate": 3.999998413256274e-05, + "loss": 0.5003, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4912080764770508, + "step": 280, + "valid_targets_mean": 3327.0, + "valid_targets_min": 790 + }, + { + "epoch": 0.7196969696969697, + "grad_norm": 0.694845702448467, + "learning_rate": 3.9999428774902425e-05, + "loss": 0.4588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5030688643455505, + "step": 285, + "valid_targets_mean": 3079.2, + "valid_targets_min": 1122 + }, + { + "epoch": 0.7323232323232324, + "grad_norm": 0.7144245421458943, + "learning_rate": 3.999808007055667e-05, + "loss": 0.4796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5815045237541199, + "step": 290, + "valid_targets_mean": 4749.9, + "valid_targets_min": 880 + }, + { + "epoch": 0.7449494949494949, + "grad_norm": 0.8137606805905706, + "learning_rate": 3.999593807302654e-05, + "loss": 0.4778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3889826536178589, + "step": 295, + "valid_targets_mean": 2409.6, + "valid_targets_min": 1216 + }, + { + "epoch": 0.7575757575757576, + "grad_norm": 0.7671755034689156, + "learning_rate": 3.999300286728176e-05, + "loss": 0.4424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40401798486709595, + "step": 300, + "valid_targets_mean": 2374.2, + "valid_targets_min": 412 + }, + { + "epoch": 0.7702020202020202, + "grad_norm": 0.775435011663138, + "learning_rate": 3.9989274569757467e-05, + "loss": 0.4529, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5113080143928528, + "step": 305, + "valid_targets_mean": 3065.1, + "valid_targets_min": 872 + }, + { + "epoch": 0.7828282828282829, + "grad_norm": 0.7829159491303057, + "learning_rate": 3.99847533283495e-05, + "loss": 0.4483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4134717285633087, + "step": 310, + "valid_targets_mean": 2551.4, + "valid_targets_min": 511 + }, + { + "epoch": 0.7954545454545454, + "grad_norm": 0.70594805762661, + "learning_rate": 3.9979439322408575e-05, + "loss": 0.4722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5452604293823242, + "step": 315, + "valid_targets_mean": 4207.9, + "valid_targets_min": 782 + }, + { + "epoch": 0.8080808080808081, + "grad_norm": 0.741059134063674, + "learning_rate": 3.9973332762733165e-05, + "loss": 0.4703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.523596465587616, + "step": 320, + "valid_targets_mean": 3050.4, + "valid_targets_min": 1682 + }, + { + "epoch": 0.8207070707070707, + "grad_norm": 0.646169301673137, + "learning_rate": 3.996643389156114e-05, + "loss": 0.4531, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4629998803138733, + "step": 325, + "valid_targets_mean": 3761.1, + "valid_targets_min": 1341 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.7311851901523451, + "learning_rate": 3.995874298256014e-05, + "loss": 0.4769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.49332910776138306, + "step": 330, + "valid_targets_mean": 3003.4, + "valid_targets_min": 1840 + }, + { + "epoch": 0.8459595959595959, + "grad_norm": 0.684697792793549, + "learning_rate": 3.9950260340816723e-05, + "loss": 0.4772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.476440966129303, + "step": 335, + "valid_targets_mean": 3521.4, + "valid_targets_min": 1763 + }, + { + "epoch": 0.8585858585858586, + "grad_norm": 0.7235672828868595, + "learning_rate": 3.9940986302824305e-05, + "loss": 0.4573, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4808931350708008, + "step": 340, + "valid_targets_mean": 3157.8, + "valid_targets_min": 1116 + }, + { + "epoch": 0.8712121212121212, + "grad_norm": 0.618583293178677, + "learning_rate": 3.993092123646975e-05, + "loss": 0.4419, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4222564101219177, + "step": 345, + "valid_targets_mean": 3462.4, + "valid_targets_min": 1697 + }, + { + "epoch": 0.8838383838383839, + "grad_norm": 0.9731914681849075, + "learning_rate": 3.992006554101881e-05, + "loss": 0.4431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4798777997493744, + "step": 350, + "valid_targets_mean": 2746.6, + "valid_targets_min": 674 + }, + { + "epoch": 0.8964646464646465, + "grad_norm": 0.6711603679996875, + "learning_rate": 3.99084196471003e-05, + "loss": 0.4315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39052852988243103, + "step": 355, + "valid_targets_mean": 2697.2, + "valid_targets_min": 467 + }, + { + "epoch": 0.9090909090909091, + "grad_norm": 0.6756824530106742, + "learning_rate": 3.9895984016688956e-05, + "loss": 0.4208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4884558618068695, + "step": 360, + "valid_targets_mean": 3316.8, + "valid_targets_min": 1433 + }, + { + "epoch": 0.9217171717171717, + "grad_norm": 0.6730754745335135, + "learning_rate": 3.9882759143087194e-05, + "loss": 0.4558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4529365003108978, + "step": 365, + "valid_targets_mean": 2792.6, + "valid_targets_min": 1934 + }, + { + "epoch": 0.9343434343434344, + "grad_norm": 0.7396239570533569, + "learning_rate": 3.9868745550905475e-05, + "loss": 0.4806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.48611992597579956, + "step": 370, + "valid_targets_mean": 3224.1, + "valid_targets_min": 1756 + }, + { + "epoch": 0.946969696969697, + "grad_norm": 0.5859949295622379, + "learning_rate": 3.985394379604152e-05, + "loss": 0.4543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4721411466598511, + "step": 375, + "valid_targets_mean": 4448.3, + "valid_targets_min": 1472 + }, + { + "epoch": 0.9595959595959596, + "grad_norm": 0.6661047355223618, + "learning_rate": 3.983835446565826e-05, + "loss": 0.4424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.49498146772384644, + "step": 380, + "valid_targets_mean": 3239.3, + "valid_targets_min": 479 + }, + { + "epoch": 0.9722222222222222, + "grad_norm": 0.5805898239548394, + "learning_rate": 3.982197817816054e-05, + "loss": 0.4422, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38239431381225586, + "step": 385, + "valid_targets_mean": 3780.7, + "valid_targets_min": 779 + }, + { + "epoch": 0.9848484848484849, + "grad_norm": 0.7798921127063227, + "learning_rate": 3.980481558317057e-05, + "loss": 0.4662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5424267053604126, + "step": 390, + "valid_targets_mean": 3867.1, + "valid_targets_min": 632 + }, + { + "epoch": 0.9974747474747475, + "grad_norm": 0.6339986265381534, + "learning_rate": 3.978686736150221e-05, + "loss": 0.4535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.47920912504196167, + "step": 395, + "valid_targets_mean": 3606.8, + "valid_targets_min": 1028 + }, + { + "epoch": 1.0101010101010102, + "grad_norm": 0.6706130776528586, + "learning_rate": 3.976813422513388e-05, + "loss": 0.437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4376629590988159, + "step": 400, + "valid_targets_mean": 3430.6, + "valid_targets_min": 1860 + }, + { + "epoch": 1.0227272727272727, + "grad_norm": 0.6708127261682855, + "learning_rate": 3.9748616917180406e-05, + "loss": 0.3809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3455820381641388, + "step": 405, + "valid_targets_mean": 2637.0, + "valid_targets_min": 580 + }, + { + "epoch": 1.0353535353535352, + "grad_norm": 0.7742495725599213, + "learning_rate": 3.972831621186345e-05, + "loss": 0.417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3599938154220581, + "step": 410, + "valid_targets_mean": 2649.8, + "valid_targets_min": 1327 + }, + { + "epoch": 1.047979797979798, + "grad_norm": 0.6102251044562367, + "learning_rate": 3.970723291448092e-05, + "loss": 0.4354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.445651650428772, + "step": 415, + "valid_targets_mean": 4083.1, + "valid_targets_min": 1361 + }, + { + "epoch": 1.0606060606060606, + "grad_norm": 0.6976342123087943, + "learning_rate": 3.968536786137487e-05, + "loss": 0.4382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40597984194755554, + "step": 420, + "valid_targets_mean": 2881.2, + "valid_targets_min": 1527 + }, + { + "epoch": 1.0732323232323233, + "grad_norm": 0.8976764193372789, + "learning_rate": 3.966272191989847e-05, + "loss": 0.4593, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5112510323524475, + "step": 425, + "valid_targets_mean": 3042.1, + "valid_targets_min": 506 + }, + { + "epoch": 1.0858585858585859, + "grad_norm": 0.7365732301930217, + "learning_rate": 3.963929598838149e-05, + "loss": 0.4145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43491220474243164, + "step": 430, + "valid_targets_mean": 2794.9, + "valid_targets_min": 1679 + }, + { + "epoch": 1.0984848484848484, + "grad_norm": 0.6773959614029044, + "learning_rate": 3.9615090996094726e-05, + "loss": 0.4226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5162553787231445, + "step": 435, + "valid_targets_mean": 4462.0, + "valid_targets_min": 509 + }, + { + "epoch": 1.1111111111111112, + "grad_norm": 0.5970043671496138, + "learning_rate": 3.959010790321313e-05, + "loss": 0.4334, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33793818950653076, + "step": 440, + "valid_targets_mean": 3196.2, + "valid_targets_min": 1361 + }, + { + "epoch": 1.1237373737373737, + "grad_norm": 0.7763112041835692, + "learning_rate": 3.9564347700777705e-05, + "loss": 0.4279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4716646075248718, + "step": 445, + "valid_targets_mean": 2790.6, + "valid_targets_min": 596 + }, + { + "epoch": 1.1363636363636362, + "grad_norm": 0.7191861233677, + "learning_rate": 3.953781141065619e-05, + "loss": 0.4571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4991220235824585, + "step": 450, + "valid_targets_mean": 2805.2, + "valid_targets_min": 1500 + }, + { + "epoch": 1.148989898989899, + "grad_norm": 0.6931761780564214, + "learning_rate": 3.9510500085502556e-05, + "loss": 0.403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3412482738494873, + "step": 455, + "valid_targets_mean": 2871.2, + "valid_targets_min": 1064 + }, + { + "epoch": 1.1616161616161615, + "grad_norm": 0.5776459149074374, + "learning_rate": 3.9482414808715194e-05, + "loss": 0.4203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4245602786540985, + "step": 460, + "valid_targets_mean": 4715.2, + "valid_targets_min": 808 + }, + { + "epoch": 1.1742424242424243, + "grad_norm": 0.6555627012964024, + "learning_rate": 3.945355669439399e-05, + "loss": 0.4475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44374722242355347, + "step": 465, + "valid_targets_mean": 3508.2, + "valid_targets_min": 882 + }, + { + "epoch": 1.1868686868686869, + "grad_norm": 1.2615252581904304, + "learning_rate": 3.9423926887296114e-05, + "loss": 0.4471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4283289909362793, + "step": 470, + "valid_targets_mean": 3297.8, + "valid_targets_min": 1445 + }, + { + "epoch": 1.1994949494949494, + "grad_norm": 0.7227367427497255, + "learning_rate": 3.939352656279059e-05, + "loss": 0.4105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4885120689868927, + "step": 475, + "valid_targets_mean": 3030.8, + "valid_targets_min": 1180 + }, + { + "epoch": 1.2121212121212122, + "grad_norm": 0.7171565868676774, + "learning_rate": 3.93623569268117e-05, + "loss": 0.4334, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37661659717559814, + "step": 480, + "valid_targets_mean": 2516.6, + "valid_targets_min": 935 + }, + { + "epoch": 1.2247474747474747, + "grad_norm": 0.6709298722578017, + "learning_rate": 3.933041921581113e-05, + "loss": 0.4524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4951278269290924, + "step": 485, + "valid_targets_mean": 3910.1, + "valid_targets_min": 1281 + }, + { + "epoch": 1.2373737373737375, + "grad_norm": 0.6284344251816701, + "learning_rate": 3.929771469670892e-05, + "loss": 0.4364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4901544451713562, + "step": 490, + "valid_targets_mean": 5372.7, + "valid_targets_min": 863 + }, + { + "epoch": 1.25, + "grad_norm": 0.695150714261409, + "learning_rate": 3.926424466684321e-05, + "loss": 0.4036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3937031328678131, + "step": 495, + "valid_targets_mean": 2620.4, + "valid_targets_min": 1061 + }, + { + "epoch": 1.2626262626262625, + "grad_norm": 0.5659681496879514, + "learning_rate": 3.92300104539188e-05, + "loss": 0.4347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43232929706573486, + "step": 500, + "valid_targets_mean": 4517.9, + "valid_targets_min": 1356 + }, + { + "epoch": 1.2752525252525253, + "grad_norm": 0.6499437965902374, + "learning_rate": 3.919501341595442e-05, + "loss": 0.4463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4581197500228882, + "step": 505, + "valid_targets_mean": 3634.6, + "valid_targets_min": 1176 + }, + { + "epoch": 1.2878787878787878, + "grad_norm": 0.5715005441806082, + "learning_rate": 3.9159254941228944e-05, + "loss": 0.4015, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3313661217689514, + "step": 510, + "valid_targets_mean": 3508.3, + "valid_targets_min": 1355 + }, + { + "epoch": 1.3005050505050506, + "grad_norm": 0.7308800974579575, + "learning_rate": 3.912273644822625e-05, + "loss": 0.4295, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4216563105583191, + "step": 515, + "valid_targets_mean": 2695.7, + "valid_targets_min": 1010 + }, + { + "epoch": 1.3131313131313131, + "grad_norm": 0.6837093491179472, + "learning_rate": 3.9085459385578984e-05, + "loss": 0.3947, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4423089027404785, + "step": 520, + "valid_targets_mean": 2883.6, + "valid_targets_min": 710 + }, + { + "epoch": 1.3257575757575757, + "grad_norm": 0.6554872754713126, + "learning_rate": 3.9047425232011076e-05, + "loss": 0.3826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3657465875148773, + "step": 525, + "valid_targets_mean": 3170.8, + "valid_targets_min": 1136 + }, + { + "epoch": 1.3383838383838385, + "grad_norm": 0.6685453762625018, + "learning_rate": 3.900863549627911e-05, + "loss": 0.428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4607548713684082, + "step": 530, + "valid_targets_mean": 3880.3, + "valid_targets_min": 1232 + }, + { + "epoch": 1.351010101010101, + "grad_norm": 0.6443269502166363, + "learning_rate": 3.896909171711245e-05, + "loss": 0.4348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4501422941684723, + "step": 535, + "valid_targets_mean": 3417.6, + "valid_targets_min": 1573 + }, + { + "epoch": 1.3636363636363638, + "grad_norm": 0.6126406593256342, + "learning_rate": 3.8928795463152186e-05, + "loss": 0.3799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3543352782726288, + "step": 540, + "valid_targets_mean": 3163.1, + "valid_targets_min": 1501 + }, + { + "epoch": 1.3762626262626263, + "grad_norm": 0.6881727131350188, + "learning_rate": 3.888774833288898e-05, + "loss": 0.4257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4091816842556, + "step": 545, + "valid_targets_mean": 3140.7, + "valid_targets_min": 1252 + }, + { + "epoch": 1.3888888888888888, + "grad_norm": 0.6580723165353671, + "learning_rate": 3.8845951954599545e-05, + "loss": 0.3958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3906816244125366, + "step": 550, + "valid_targets_mean": 2895.7, + "valid_targets_min": 1240 + }, + { + "epoch": 1.4015151515151514, + "grad_norm": 0.6770136577009271, + "learning_rate": 3.8803407986282156e-05, + "loss": 0.4221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35806700587272644, + "step": 555, + "valid_targets_mean": 2287.6, + "valid_targets_min": 594 + }, + { + "epoch": 1.4141414141414141, + "grad_norm": 0.6405715120436977, + "learning_rate": 3.876011811559084e-05, + "loss": 0.4279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4252093434333801, + "step": 560, + "valid_targets_mean": 3118.0, + "valid_targets_min": 707 + }, + { + "epoch": 1.4267676767676767, + "grad_norm": 0.6944966556141179, + "learning_rate": 3.871608405976838e-05, + "loss": 0.4115, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42029836773872375, + "step": 565, + "valid_targets_mean": 2919.8, + "valid_targets_min": 1070 + }, + { + "epoch": 1.4393939393939394, + "grad_norm": 0.6466971885955511, + "learning_rate": 3.867130756557832e-05, + "loss": 0.4312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3371127247810364, + "step": 570, + "valid_targets_mean": 2752.6, + "valid_targets_min": 750 + }, + { + "epoch": 1.452020202020202, + "grad_norm": 0.6100566892783015, + "learning_rate": 3.862579040923552e-05, + "loss": 0.3838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3892483115196228, + "step": 575, + "valid_targets_mean": 3388.8, + "valid_targets_min": 968 + }, + { + "epoch": 1.4646464646464645, + "grad_norm": 0.6600534442172271, + "learning_rate": 3.8579534396335835e-05, + "loss": 0.412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.46965640783309937, + "step": 580, + "valid_targets_mean": 3274.4, + "valid_targets_min": 685 + }, + { + "epoch": 1.4772727272727273, + "grad_norm": 0.6946470349793337, + "learning_rate": 3.853254136178437e-05, + "loss": 0.4195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4128877818584442, + "step": 585, + "valid_targets_mean": 2874.7, + "valid_targets_min": 1690 + }, + { + "epoch": 1.4898989898989898, + "grad_norm": 0.658007836439284, + "learning_rate": 3.8484813169722794e-05, + "loss": 0.4202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4265574812889099, + "step": 590, + "valid_targets_mean": 2866.8, + "valid_targets_min": 1791 + }, + { + "epoch": 1.5025252525252526, + "grad_norm": 0.7613954671864829, + "learning_rate": 3.843635171345532e-05, + "loss": 0.4369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44407981634140015, + "step": 595, + "valid_targets_mean": 3021.1, + "valid_targets_min": 638 + }, + { + "epoch": 1.5151515151515151, + "grad_norm": 0.7625903826308273, + "learning_rate": 3.838715891537365e-05, + "loss": 0.3805, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34579938650131226, + "step": 600, + "valid_targets_mean": 2343.3, + "valid_targets_min": 844 + }, + { + "epoch": 1.5277777777777777, + "grad_norm": 0.6482084611263487, + "learning_rate": 3.8337236726880674e-05, + "loss": 0.441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41672322154045105, + "step": 605, + "valid_targets_mean": 2920.0, + "valid_targets_min": 1776 + }, + { + "epoch": 1.5404040404040404, + "grad_norm": 0.6065329237085881, + "learning_rate": 3.828658712831311e-05, + "loss": 0.398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3971773386001587, + "step": 610, + "valid_targets_mean": 3118.1, + "valid_targets_min": 1473 + }, + { + "epoch": 1.553030303030303, + "grad_norm": 0.6544115401255524, + "learning_rate": 3.823521212886287e-05, + "loss": 0.441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.45314595103263855, + "step": 615, + "valid_targets_mean": 3466.2, + "valid_targets_min": 377 + }, + { + "epoch": 1.5656565656565657, + "grad_norm": 0.7218840612228915, + "learning_rate": 3.818311376649746e-05, + "loss": 0.3962, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40665388107299805, + "step": 620, + "valid_targets_mean": 3132.6, + "valid_targets_min": 1208 + }, + { + "epoch": 1.5782828282828283, + "grad_norm": 0.6457705553977882, + "learning_rate": 3.813029410787906e-05, + "loss": 0.4014, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4007648229598999, + "step": 625, + "valid_targets_mean": 3385.1, + "valid_targets_min": 1276 + }, + { + "epoch": 1.5909090909090908, + "grad_norm": 0.6340707674027326, + "learning_rate": 3.807675524828255e-05, + "loss": 0.3921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3663696050643921, + "step": 630, + "valid_targets_mean": 2994.5, + "valid_targets_min": 1839 + }, + { + "epoch": 1.6035353535353534, + "grad_norm": 0.6510799288150312, + "learning_rate": 3.802249931151245e-05, + "loss": 0.4083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4528045356273651, + "step": 635, + "valid_targets_mean": 3057.3, + "valid_targets_min": 724 + }, + { + "epoch": 1.6161616161616161, + "grad_norm": 0.7096957049322412, + "learning_rate": 3.7967528449818604e-05, + "loss": 0.4129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3862096667289734, + "step": 640, + "valid_targets_mean": 2390.7, + "valid_targets_min": 556 + }, + { + "epoch": 1.628787878787879, + "grad_norm": 0.7345826232266963, + "learning_rate": 3.791184484381083e-05, + "loss": 0.433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3896709382534027, + "step": 645, + "valid_targets_mean": 2601.5, + "valid_targets_min": 782 + }, + { + "epoch": 1.6414141414141414, + "grad_norm": 0.6381339146897167, + "learning_rate": 3.7855450702372446e-05, + "loss": 0.3921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42096877098083496, + "step": 650, + "valid_targets_mean": 2983.1, + "valid_targets_min": 1513 + }, + { + "epoch": 1.654040404040404, + "grad_norm": 0.7088141347121194, + "learning_rate": 3.7798348262572595e-05, + "loss": 0.4184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38489383459091187, + "step": 655, + "valid_targets_mean": 2589.2, + "valid_targets_min": 1265 + }, + { + "epoch": 1.6666666666666665, + "grad_norm": 0.6158104461556841, + "learning_rate": 3.774053978957754e-05, + "loss": 0.422, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33309605717658997, + "step": 660, + "valid_targets_mean": 2628.7, + "valid_targets_min": 858 + }, + { + "epoch": 1.6792929292929293, + "grad_norm": 0.6621621785950462, + "learning_rate": 3.76820275765608e-05, + "loss": 0.4482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3812219202518463, + "step": 665, + "valid_targets_mean": 2696.2, + "valid_targets_min": 1315 + }, + { + "epoch": 1.691919191919192, + "grad_norm": 0.5915638746228544, + "learning_rate": 3.7622813944612184e-05, + "loss": 0.4268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.45549070835113525, + "step": 670, + "valid_targets_mean": 4037.2, + "valid_targets_min": 1593 + }, + { + "epoch": 1.7045454545454546, + "grad_norm": 0.6214323792581383, + "learning_rate": 3.75629012426457e-05, + "loss": 0.4049, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30740225315093994, + "step": 675, + "valid_targets_mean": 2484.4, + "valid_targets_min": 991 + }, + { + "epoch": 1.7171717171717171, + "grad_norm": 0.6181383119204046, + "learning_rate": 3.750229184730643e-05, + "loss": 0.4034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41900163888931274, + "step": 680, + "valid_targets_mean": 3214.8, + "valid_targets_min": 1219 + }, + { + "epoch": 1.7297979797979797, + "grad_norm": 0.7022543694921178, + "learning_rate": 3.744098816287616e-05, + "loss": 0.3934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3738209009170532, + "step": 685, + "valid_targets_mean": 2518.6, + "valid_targets_min": 1060 + }, + { + "epoch": 1.7424242424242424, + "grad_norm": 0.6444777112364757, + "learning_rate": 3.73789926211781e-05, + "loss": 0.4013, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40667980909347534, + "step": 690, + "valid_targets_mean": 2943.8, + "valid_targets_min": 586 + }, + { + "epoch": 1.7550505050505052, + "grad_norm": 0.6456649483546918, + "learning_rate": 3.7316307681480364e-05, + "loss": 0.4464, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5095163583755493, + "step": 695, + "valid_targets_mean": 3686.2, + "valid_targets_min": 1323 + }, + { + "epoch": 1.7676767676767677, + "grad_norm": 0.6312723135470779, + "learning_rate": 3.725293583039843e-05, + "loss": 0.4029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3778528869152069, + "step": 700, + "valid_targets_mean": 3107.6, + "valid_targets_min": 1432 + }, + { + "epoch": 1.7803030303030303, + "grad_norm": 0.6535661127784683, + "learning_rate": 3.71888795817965e-05, + "loss": 0.4127, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.47286659479141235, + "step": 705, + "valid_targets_mean": 4064.8, + "valid_targets_min": 1051 + }, + { + "epoch": 1.7929292929292928, + "grad_norm": 0.6896428501647339, + "learning_rate": 3.712414147668777e-05, + "loss": 0.4215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4112127423286438, + "step": 710, + "valid_targets_mean": 2654.4, + "valid_targets_min": 1151 + }, + { + "epoch": 1.8055555555555556, + "grad_norm": 0.5820441201531631, + "learning_rate": 3.7058724083133645e-05, + "loss": 0.4077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34681016206741333, + "step": 715, + "valid_targets_mean": 2884.4, + "valid_targets_min": 1351 + }, + { + "epoch": 1.8181818181818183, + "grad_norm": 0.6800591266970959, + "learning_rate": 3.6992629996141864e-05, + "loss": 0.4424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4916991591453552, + "step": 720, + "valid_targets_mean": 3417.7, + "valid_targets_min": 1329 + }, + { + "epoch": 1.8308080808080809, + "grad_norm": 0.6563197557373552, + "learning_rate": 3.692586183756354e-05, + "loss": 0.3807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3805094361305237, + "step": 725, + "valid_targets_mean": 2503.5, + "valid_targets_min": 1064 + }, + { + "epoch": 1.8434343434343434, + "grad_norm": 0.7047851378134344, + "learning_rate": 3.685842225598919e-05, + "loss": 0.4278, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4543723464012146, + "step": 730, + "valid_targets_mean": 2930.4, + "valid_targets_min": 1798 + }, + { + "epoch": 1.856060606060606, + "grad_norm": 0.6499844550915188, + "learning_rate": 3.6790313926643664e-05, + "loss": 0.4119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3960087299346924, + "step": 735, + "valid_targets_mean": 2956.1, + "valid_targets_min": 1401 + }, + { + "epoch": 1.8686868686868687, + "grad_norm": 0.649385668021803, + "learning_rate": 3.6721539551279966e-05, + "loss": 0.4369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4332810640335083, + "step": 740, + "valid_targets_mean": 3076.4, + "valid_targets_min": 998 + }, + { + "epoch": 1.8813131313131313, + "grad_norm": 0.7091254200389234, + "learning_rate": 3.665210185807217e-05, + "loss": 0.4388, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44163447618484497, + "step": 745, + "valid_targets_mean": 3328.9, + "valid_targets_min": 1660 + }, + { + "epoch": 1.893939393939394, + "grad_norm": 0.5985641510783979, + "learning_rate": 3.658200360150713e-05, + "loss": 0.4091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37661778926849365, + "step": 750, + "valid_targets_mean": 3146.1, + "valid_targets_min": 1067 + }, + { + "epoch": 1.9065656565656566, + "grad_norm": 0.6268419140387759, + "learning_rate": 3.651124756227525e-05, + "loss": 0.401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4080426096916199, + "step": 755, + "valid_targets_mean": 3422.2, + "valid_targets_min": 1798 + }, + { + "epoch": 1.9191919191919191, + "grad_norm": 0.7426620476089505, + "learning_rate": 3.6439836547160144e-05, + "loss": 0.4044, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38891446590423584, + "step": 760, + "valid_targets_mean": 3295.2, + "valid_targets_min": 1546 + }, + { + "epoch": 1.9318181818181817, + "grad_norm": 0.6671491506286603, + "learning_rate": 3.636777338892732e-05, + "loss": 0.4466, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44117462635040283, + "step": 765, + "valid_targets_mean": 2958.4, + "valid_targets_min": 1837 + }, + { + "epoch": 1.9444444444444444, + "grad_norm": 0.6342292838426378, + "learning_rate": 3.629506094621183e-05, + "loss": 0.3886, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3897709250450134, + "step": 770, + "valid_targets_mean": 2692.6, + "valid_targets_min": 1547 + }, + { + "epoch": 1.9570707070707072, + "grad_norm": 0.621675970891514, + "learning_rate": 3.622170210340482e-05, + "loss": 0.4183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38866496086120605, + "step": 775, + "valid_targets_mean": 2999.8, + "valid_targets_min": 1116 + }, + { + "epoch": 1.9696969696969697, + "grad_norm": 0.5905010144602377, + "learning_rate": 3.614769977053914e-05, + "loss": 0.4291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3574790060520172, + "step": 780, + "valid_targets_mean": 3125.0, + "valid_targets_min": 963 + }, + { + "epoch": 1.9823232323232323, + "grad_norm": 0.5836024549642513, + "learning_rate": 3.607305688317391e-05, + "loss": 0.4046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4132801294326782, + "step": 785, + "valid_targets_mean": 3194.9, + "valid_targets_min": 1242 + }, + { + "epoch": 1.9949494949494948, + "grad_norm": 0.7228511270619273, + "learning_rate": 3.5997776402278055e-05, + "loss": 0.3925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42261964082717896, + "step": 790, + "valid_targets_mean": 3067.1, + "valid_targets_min": 1603 + }, + { + "epoch": 2.007575757575758, + "grad_norm": 0.6700499879024988, + "learning_rate": 3.592186131411288e-05, + "loss": 0.376, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3675363063812256, + "step": 795, + "valid_targets_mean": 2926.6, + "valid_targets_min": 1575 + }, + { + "epoch": 2.0202020202020203, + "grad_norm": 0.6733433964569425, + "learning_rate": 3.584531463011356e-05, + "loss": 0.3648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3609054982662201, + "step": 800, + "valid_targets_mean": 3172.1, + "valid_targets_min": 599 + }, + { + "epoch": 2.032828282828283, + "grad_norm": 0.6042940030317498, + "learning_rate": 3.576813938676973e-05, + "loss": 0.3973, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3894050121307373, + "step": 805, + "valid_targets_mean": 3448.1, + "valid_targets_min": 923 + }, + { + "epoch": 2.0454545454545454, + "grad_norm": 0.6749197830689422, + "learning_rate": 3.569033864550501e-05, + "loss": 0.38, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3814485967159271, + "step": 810, + "valid_targets_mean": 2783.8, + "valid_targets_min": 880 + }, + { + "epoch": 2.058080808080808, + "grad_norm": 0.6483067354879916, + "learning_rate": 3.561191549255555e-05, + "loss": 0.3565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3972342312335968, + "step": 815, + "valid_targets_mean": 3241.1, + "valid_targets_min": 1649 + }, + { + "epoch": 2.0707070707070705, + "grad_norm": 0.6353320580780115, + "learning_rate": 3.553287303884762e-05, + "loss": 0.3695, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3591780662536621, + "step": 820, + "valid_targets_mean": 2904.1, + "valid_targets_min": 1116 + }, + { + "epoch": 2.0833333333333335, + "grad_norm": 0.6551388984986852, + "learning_rate": 3.545321441987422e-05, + "loss": 0.3704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29770687222480774, + "step": 825, + "valid_targets_mean": 2442.8, + "valid_targets_min": 568 + }, + { + "epoch": 2.095959595959596, + "grad_norm": 0.6968910493636307, + "learning_rate": 3.5372942795570644e-05, + "loss": 0.358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3901800215244293, + "step": 830, + "valid_targets_mean": 2854.0, + "valid_targets_min": 682 + }, + { + "epoch": 2.1085858585858586, + "grad_norm": 0.568367502641086, + "learning_rate": 3.529206135018922e-05, + "loss": 0.3625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4262069761753082, + "step": 835, + "valid_targets_mean": 4330.8, + "valid_targets_min": 2021 + }, + { + "epoch": 2.121212121212121, + "grad_norm": 0.6824608784442717, + "learning_rate": 3.521057329217289e-05, + "loss": 0.3845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3504842519760132, + "step": 840, + "valid_targets_mean": 2486.9, + "valid_targets_min": 1574 + }, + { + "epoch": 2.1338383838383836, + "grad_norm": 0.6810643162239562, + "learning_rate": 3.512848185402804e-05, + "loss": 0.3885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42560821771621704, + "step": 845, + "valid_targets_mean": 3205.6, + "valid_targets_min": 1295 + }, + { + "epoch": 2.1464646464646466, + "grad_norm": 0.7051006217015395, + "learning_rate": 3.504579029219618e-05, + "loss": 0.385, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.47030025720596313, + "step": 850, + "valid_targets_mean": 4252.9, + "valid_targets_min": 620 + }, + { + "epoch": 2.159090909090909, + "grad_norm": 0.6158184161250323, + "learning_rate": 3.4962501886924817e-05, + "loss": 0.3643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3438853621482849, + "step": 855, + "valid_targets_mean": 3000.1, + "valid_targets_min": 1183 + }, + { + "epoch": 2.1717171717171717, + "grad_norm": 0.9033510849058328, + "learning_rate": 3.4878619942137346e-05, + "loss": 0.3566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36623990535736084, + "step": 860, + "valid_targets_mean": 3486.9, + "valid_targets_min": 1208 + }, + { + "epoch": 2.1843434343434343, + "grad_norm": 0.6336084994385605, + "learning_rate": 3.479414778530194e-05, + "loss": 0.3553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3282182812690735, + "step": 865, + "valid_targets_mean": 2926.6, + "valid_targets_min": 527 + }, + { + "epoch": 2.196969696969697, + "grad_norm": 0.642030989385258, + "learning_rate": 3.470908876729958e-05, + "loss": 0.3757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38485634326934814, + "step": 870, + "valid_targets_mean": 3797.9, + "valid_targets_min": 1802 + }, + { + "epoch": 2.20959595959596, + "grad_norm": 0.6859750653264282, + "learning_rate": 3.462344626229113e-05, + "loss": 0.3831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4057656526565552, + "step": 875, + "valid_targets_mean": 3077.2, + "valid_targets_min": 1477 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 0.7051975354184825, + "learning_rate": 3.4537223667583517e-05, + "loss": 0.3941, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3670094311237335, + "step": 880, + "valid_targets_mean": 2480.9, + "valid_targets_min": 949 + }, + { + "epoch": 2.234848484848485, + "grad_norm": 0.6263495715036997, + "learning_rate": 3.44504244034949e-05, + "loss": 0.3744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38415664434432983, + "step": 885, + "valid_targets_mean": 3738.8, + "valid_targets_min": 1905 + }, + { + "epoch": 2.2474747474747474, + "grad_norm": 0.6736583840874508, + "learning_rate": 3.4363051913219054e-05, + "loss": 0.3787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32438012957572937, + "step": 890, + "valid_targets_mean": 2544.9, + "valid_targets_min": 1115 + }, + { + "epoch": 2.26010101010101, + "grad_norm": 0.6358475233857454, + "learning_rate": 3.427510966268874e-05, + "loss": 0.4206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38413217663764954, + "step": 895, + "valid_targets_mean": 3256.4, + "valid_targets_min": 1028 + }, + { + "epoch": 2.2727272727272725, + "grad_norm": 0.5703544012357852, + "learning_rate": 3.418660114043824e-05, + "loss": 0.3628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34161847829818726, + "step": 900, + "valid_targets_mean": 3436.0, + "valid_targets_min": 1334 + }, + { + "epoch": 2.2853535353535355, + "grad_norm": 0.6756271018463891, + "learning_rate": 3.409752985746498e-05, + "loss": 0.3907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39379531145095825, + "step": 905, + "valid_targets_mean": 3228.2, + "valid_targets_min": 1693 + }, + { + "epoch": 2.297979797979798, + "grad_norm": 0.5750798297997253, + "learning_rate": 3.400789934709023e-05, + "loss": 0.4308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4697551727294922, + "step": 910, + "valid_targets_mean": 4887.2, + "valid_targets_min": 1320 + }, + { + "epoch": 2.3106060606060606, + "grad_norm": 0.6035695330159876, + "learning_rate": 3.391771316481895e-05, + "loss": 0.386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37602925300598145, + "step": 915, + "valid_targets_mean": 3335.6, + "valid_targets_min": 1865 + }, + { + "epoch": 2.323232323232323, + "grad_norm": 0.6231533529555526, + "learning_rate": 3.3826974888198764e-05, + "loss": 0.3847, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.46126362681388855, + "step": 920, + "valid_targets_mean": 4108.6, + "valid_targets_min": 1070 + }, + { + "epoch": 2.3358585858585856, + "grad_norm": 0.682022720271219, + "learning_rate": 3.373568811667802e-05, + "loss": 0.3822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39595192670822144, + "step": 925, + "valid_targets_mean": 2894.8, + "valid_targets_min": 1426 + }, + { + "epoch": 2.3484848484848486, + "grad_norm": 0.6797289333546708, + "learning_rate": 3.3643856471463036e-05, + "loss": 0.3785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38361161947250366, + "step": 930, + "valid_targets_mean": 3060.6, + "valid_targets_min": 1501 + }, + { + "epoch": 2.361111111111111, + "grad_norm": 0.6031465467740685, + "learning_rate": 3.355148359537441e-05, + "loss": 0.3961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44060710072517395, + "step": 935, + "valid_targets_mean": 4271.9, + "valid_targets_min": 1417 + }, + { + "epoch": 2.3737373737373737, + "grad_norm": 0.8364843828650894, + "learning_rate": 3.3458573152702556e-05, + "loss": 0.3854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36398792266845703, + "step": 940, + "valid_targets_mean": 2913.2, + "valid_targets_min": 1188 + }, + { + "epoch": 2.3863636363636362, + "grad_norm": 0.5980417429474294, + "learning_rate": 3.336512882906234e-05, + "loss": 0.3636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.307436466217041, + "step": 945, + "valid_targets_mean": 2891.1, + "valid_targets_min": 1219 + }, + { + "epoch": 2.398989898989899, + "grad_norm": 0.6020944292492557, + "learning_rate": 3.3271154331246825e-05, + "loss": 0.3548, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3505295515060425, + "step": 950, + "valid_targets_mean": 4080.4, + "valid_targets_min": 988 + }, + { + "epoch": 2.4116161616161618, + "grad_norm": 0.6948551306615971, + "learning_rate": 3.317665338708033e-05, + "loss": 0.3449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38028645515441895, + "step": 955, + "valid_targets_mean": 3400.9, + "valid_targets_min": 452 + }, + { + "epoch": 2.4242424242424243, + "grad_norm": 0.7054147906410959, + "learning_rate": 3.308162974527045e-05, + "loss": 0.3478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3440144658088684, + "step": 960, + "valid_targets_mean": 2568.9, + "valid_targets_min": 1248 + }, + { + "epoch": 2.436868686868687, + "grad_norm": 0.6502674537729093, + "learning_rate": 3.298608717525941e-05, + "loss": 0.3966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4167352616786957, + "step": 965, + "valid_targets_mean": 3487.2, + "valid_targets_min": 729 + }, + { + "epoch": 2.4494949494949494, + "grad_norm": 0.6945755822435844, + "learning_rate": 3.289002946707452e-05, + "loss": 0.3618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38510334491729736, + "step": 970, + "valid_targets_mean": 3070.3, + "valid_targets_min": 1615 + }, + { + "epoch": 2.462121212121212, + "grad_norm": 0.7008172093994913, + "learning_rate": 3.2793460431177827e-05, + "loss": 0.3892, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34910762310028076, + "step": 975, + "valid_targets_mean": 2654.6, + "valid_targets_min": 807 + }, + { + "epoch": 2.474747474747475, + "grad_norm": 0.6315928723707651, + "learning_rate": 3.269638389831498e-05, + "loss": 0.3683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2948976755142212, + "step": 980, + "valid_targets_mean": 2513.4, + "valid_targets_min": 1048 + }, + { + "epoch": 2.4873737373737375, + "grad_norm": 0.659399817304144, + "learning_rate": 3.2598803719363234e-05, + "loss": 0.3995, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3890318274497986, + "step": 985, + "valid_targets_mean": 2942.6, + "valid_targets_min": 959 + }, + { + "epoch": 2.5, + "grad_norm": 0.6602751081844956, + "learning_rate": 3.250072376517873e-05, + "loss": 0.39, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31299781799316406, + "step": 990, + "valid_targets_mean": 2744.5, + "valid_targets_min": 1621 + }, + { + "epoch": 2.5126262626262625, + "grad_norm": 0.5849536829497557, + "learning_rate": 3.240214792644291e-05, + "loss": 0.3908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4854844808578491, + "step": 995, + "valid_targets_mean": 4775.6, + "valid_targets_min": 1064 + }, + { + "epoch": 2.525252525252525, + "grad_norm": 0.5606732760702411, + "learning_rate": 3.2303080113508206e-05, + "loss": 0.3851, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3052321672439575, + "step": 1000, + "valid_targets_mean": 3365.4, + "valid_targets_min": 1298 + }, + { + "epoch": 2.537878787878788, + "grad_norm": 0.6875908835706088, + "learning_rate": 3.220352425624292e-05, + "loss": 0.382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3734514117240906, + "step": 1005, + "valid_targets_mean": 2893.5, + "valid_targets_min": 1773 + }, + { + "epoch": 2.5505050505050506, + "grad_norm": 0.5931935199736621, + "learning_rate": 3.210348430387531e-05, + "loss": 0.404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3862113058567047, + "step": 1010, + "valid_targets_mean": 3470.7, + "valid_targets_min": 2311 + }, + { + "epoch": 2.563131313131313, + "grad_norm": 0.5854966748703261, + "learning_rate": 3.200296422483694e-05, + "loss": 0.3898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3955685496330261, + "step": 1015, + "valid_targets_mean": 3589.4, + "valid_targets_min": 1829 + }, + { + "epoch": 2.5757575757575757, + "grad_norm": 0.6466992426589206, + "learning_rate": 3.19019680066053e-05, + "loss": 0.3517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3543087840080261, + "step": 1020, + "valid_targets_mean": 2809.2, + "valid_targets_min": 1982 + }, + { + "epoch": 2.5883838383838382, + "grad_norm": 0.6437875698154034, + "learning_rate": 3.180049965554554e-05, + "loss": 0.4091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5216984748840332, + "step": 1025, + "valid_targets_mean": 4236.3, + "valid_targets_min": 971 + }, + { + "epoch": 2.601010101010101, + "grad_norm": 0.6686276392769377, + "learning_rate": 3.169856319675163e-05, + "loss": 0.3711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4543102979660034, + "step": 1030, + "valid_targets_mean": 3561.1, + "valid_targets_min": 520 + }, + { + "epoch": 2.6136363636363638, + "grad_norm": 0.5951534964453324, + "learning_rate": 3.159616267388665e-05, + "loss": 0.3683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3885546326637268, + "step": 1035, + "valid_targets_mean": 3710.8, + "valid_targets_min": 935 + }, + { + "epoch": 2.6262626262626263, + "grad_norm": 0.5998214906422664, + "learning_rate": 3.1493302149022394e-05, + "loss": 0.3643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3495018482208252, + "step": 1040, + "valid_targets_mean": 3388.9, + "valid_targets_min": 1017 + }, + { + "epoch": 2.638888888888889, + "grad_norm": 0.679459404989866, + "learning_rate": 3.1389985702478204e-05, + "loss": 0.3733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40621423721313477, + "step": 1045, + "valid_targets_mean": 2743.4, + "valid_targets_min": 638 + }, + { + "epoch": 2.6515151515151514, + "grad_norm": 0.49821839728054024, + "learning_rate": 3.1286217432659176e-05, + "loss": 0.3848, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42677822709083557, + "step": 1050, + "valid_targets_mean": 5945.1, + "valid_targets_min": 1605 + }, + { + "epoch": 2.6641414141414144, + "grad_norm": 0.6245151459734792, + "learning_rate": 3.118200145589351e-05, + "loss": 0.4174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3920952081680298, + "step": 1055, + "valid_targets_mean": 3122.9, + "valid_targets_min": 1889 + }, + { + "epoch": 2.676767676767677, + "grad_norm": 0.6230627615382611, + "learning_rate": 3.1077341906269275e-05, + "loss": 0.3746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4046259820461273, + "step": 1060, + "valid_targets_mean": 3375.1, + "valid_targets_min": 876 + }, + { + "epoch": 2.6893939393939394, + "grad_norm": 0.5984044868920384, + "learning_rate": 3.097224293547039e-05, + "loss": 0.3761, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37177756428718567, + "step": 1065, + "valid_targets_mean": 3157.3, + "valid_targets_min": 928 + }, + { + "epoch": 2.702020202020202, + "grad_norm": 0.6200698875715712, + "learning_rate": 3.086670871261193e-05, + "loss": 0.3824, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4072413146495819, + "step": 1070, + "valid_targets_mean": 3493.7, + "valid_targets_min": 1659 + }, + { + "epoch": 2.7146464646464645, + "grad_norm": 0.7099464467086446, + "learning_rate": 3.076074342407476e-05, + "loss": 0.3903, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3890324532985687, + "step": 1075, + "valid_targets_mean": 2398.0, + "valid_targets_min": 580 + }, + { + "epoch": 2.7272727272727275, + "grad_norm": 0.5964382108688258, + "learning_rate": 3.0654351273339465e-05, + "loss": 0.3576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41678887605667114, + "step": 1080, + "valid_targets_mean": 4061.4, + "valid_targets_min": 587 + }, + { + "epoch": 2.73989898989899, + "grad_norm": 0.6483267090943723, + "learning_rate": 3.0547536480819584e-05, + "loss": 0.4063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38063323497772217, + "step": 1085, + "valid_targets_mean": 2821.2, + "valid_targets_min": 1120 + }, + { + "epoch": 2.7525252525252526, + "grad_norm": 0.6126539646079129, + "learning_rate": 3.044030328369422e-05, + "loss": 0.3809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3242890238761902, + "step": 1090, + "valid_targets_mean": 2823.2, + "valid_targets_min": 674 + }, + { + "epoch": 2.765151515151515, + "grad_norm": 0.666211099047787, + "learning_rate": 3.033265593573994e-05, + "loss": 0.3988, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35766348242759705, + "step": 1095, + "valid_targets_mean": 2767.8, + "valid_targets_min": 1512 + }, + { + "epoch": 2.7777777777777777, + "grad_norm": 0.5893274171238146, + "learning_rate": 3.0224598707162037e-05, + "loss": 0.3812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35240688920021057, + "step": 1100, + "valid_targets_mean": 3162.4, + "valid_targets_min": 855 + }, + { + "epoch": 2.7904040404040407, + "grad_norm": 0.6436336249871378, + "learning_rate": 3.0116135884425156e-05, + "loss": 0.377, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4571429193019867, + "step": 1105, + "valid_targets_mean": 3306.6, + "valid_targets_min": 999 + }, + { + "epoch": 2.8030303030303028, + "grad_norm": 0.6192587115834132, + "learning_rate": 3.0007271770083216e-05, + "loss": 0.3935, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2913767397403717, + "step": 1110, + "valid_targets_mean": 2528.9, + "valid_targets_min": 780 + }, + { + "epoch": 2.8156565656565657, + "grad_norm": 0.606484213579456, + "learning_rate": 2.989801068260878e-05, + "loss": 0.3754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29592519998550415, + "step": 1115, + "valid_targets_mean": 2622.3, + "valid_targets_min": 1436 + }, + { + "epoch": 2.8282828282828283, + "grad_norm": 0.645202232373261, + "learning_rate": 2.9788356956221712e-05, + "loss": 0.3682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3493780195713043, + "step": 1120, + "valid_targets_mean": 2796.6, + "valid_targets_min": 1281 + }, + { + "epoch": 2.840909090909091, + "grad_norm": 0.6382732894080507, + "learning_rate": 2.9678314940717276e-05, + "loss": 0.3192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31767457723617554, + "step": 1125, + "valid_targets_mean": 2719.1, + "valid_targets_min": 1784 + }, + { + "epoch": 2.8535353535353534, + "grad_norm": 0.6864660560112232, + "learning_rate": 2.9567889001293553e-05, + "loss": 0.3885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3313673436641693, + "step": 1130, + "valid_targets_mean": 2460.8, + "valid_targets_min": 588 + }, + { + "epoch": 2.866161616161616, + "grad_norm": 0.6023940706764531, + "learning_rate": 2.94570835183783e-05, + "loss": 0.3516, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34224456548690796, + "step": 1135, + "valid_targets_mean": 3116.4, + "valid_targets_min": 1116 + }, + { + "epoch": 2.878787878787879, + "grad_norm": 0.6067779962957488, + "learning_rate": 2.93459028874552e-05, + "loss": 0.3723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3636412024497986, + "step": 1140, + "valid_targets_mean": 3337.4, + "valid_targets_min": 571 + }, + { + "epoch": 2.8914141414141414, + "grad_norm": 0.6092698495443272, + "learning_rate": 2.9234351518889465e-05, + "loss": 0.4067, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4876616597175598, + "step": 1145, + "valid_targets_mean": 4359.3, + "valid_targets_min": 1940 + }, + { + "epoch": 2.904040404040404, + "grad_norm": 0.6240475836405307, + "learning_rate": 2.9122433837752906e-05, + "loss": 0.3557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31040680408477783, + "step": 1150, + "valid_targets_mean": 2761.3, + "valid_targets_min": 1568 + }, + { + "epoch": 2.9166666666666665, + "grad_norm": 0.5730069310790777, + "learning_rate": 2.9010154283648382e-05, + "loss": 0.403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3821036219596863, + "step": 1155, + "valid_targets_mean": 3784.9, + "valid_targets_min": 1335 + }, + { + "epoch": 2.929292929292929, + "grad_norm": 0.6572646648160343, + "learning_rate": 2.8897517310533733e-05, + "loss": 0.3557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3128521740436554, + "step": 1160, + "valid_targets_mean": 2357.7, + "valid_targets_min": 1131 + }, + { + "epoch": 2.941919191919192, + "grad_norm": 0.6115860029185797, + "learning_rate": 2.8784527386545006e-05, + "loss": 0.4476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.49554532766342163, + "step": 1165, + "valid_targets_mean": 4678.3, + "valid_targets_min": 808 + }, + { + "epoch": 2.9545454545454546, + "grad_norm": 0.6535005566432543, + "learning_rate": 2.8671188993819327e-05, + "loss": 0.4041, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4300668239593506, + "step": 1170, + "valid_targets_mean": 3714.4, + "valid_targets_min": 2330 + }, + { + "epoch": 2.967171717171717, + "grad_norm": 0.6344464229430659, + "learning_rate": 2.8557506628317e-05, + "loss": 0.3867, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4097946882247925, + "step": 1175, + "valid_targets_mean": 3480.2, + "valid_targets_min": 1844 + }, + { + "epoch": 2.9797979797979797, + "grad_norm": 0.5549040647946087, + "learning_rate": 2.844348479964323e-05, + "loss": 0.3997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36530980467796326, + "step": 1180, + "valid_targets_mean": 3680.7, + "valid_targets_min": 2068 + }, + { + "epoch": 2.992424242424242, + "grad_norm": 0.6350789264846295, + "learning_rate": 2.8329128030869183e-05, + "loss": 0.3765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36941784620285034, + "step": 1185, + "valid_targets_mean": 2964.5, + "valid_targets_min": 598 + }, + { + "epoch": 3.005050505050505, + "grad_norm": 0.6101230464628173, + "learning_rate": 2.8214440858352602e-05, + "loss": 0.3585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3312152624130249, + "step": 1190, + "valid_targets_mean": 3049.0, + "valid_targets_min": 1661 + }, + { + "epoch": 3.0176767676767677, + "grad_norm": 0.6717050734051245, + "learning_rate": 2.8099427831557836e-05, + "loss": 0.344, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3820994794368744, + "step": 1195, + "valid_targets_mean": 3147.5, + "valid_targets_min": 1505 + }, + { + "epoch": 3.0303030303030303, + "grad_norm": 0.6855402222878124, + "learning_rate": 2.798409351287537e-05, + "loss": 0.3489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3795431852340698, + "step": 1200, + "valid_targets_mean": 3020.4, + "valid_targets_min": 1208 + }, + { + "epoch": 3.042929292929293, + "grad_norm": 0.6290362160754815, + "learning_rate": 2.7868442477440845e-05, + "loss": 0.3716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3297278881072998, + "step": 1205, + "valid_targets_mean": 3352.2, + "valid_targets_min": 1870 + }, + { + "epoch": 3.0555555555555554, + "grad_norm": 0.6542985881986048, + "learning_rate": 2.7752479312953576e-05, + "loss": 0.3242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37977492809295654, + "step": 1210, + "valid_targets_mean": 3504.4, + "valid_targets_min": 1174 + }, + { + "epoch": 3.0681818181818183, + "grad_norm": 0.6865766533109904, + "learning_rate": 2.763620861949456e-05, + "loss": 0.3148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3136219382286072, + "step": 1215, + "valid_targets_mean": 2935.5, + "valid_targets_min": 671 + }, + { + "epoch": 3.080808080808081, + "grad_norm": 0.650463581157268, + "learning_rate": 2.7519635009343982e-05, + "loss": 0.3616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3669140934944153, + "step": 1220, + "valid_targets_mean": 3443.1, + "valid_targets_min": 1433 + }, + { + "epoch": 3.0934343434343434, + "grad_norm": 0.6374974160183221, + "learning_rate": 2.7402763106798295e-05, + "loss": 0.385, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3267042636871338, + "step": 1225, + "valid_targets_mean": 3336.6, + "valid_targets_min": 1490 + }, + { + "epoch": 3.106060606060606, + "grad_norm": 0.7164459430574219, + "learning_rate": 2.7285597547986715e-05, + "loss": 0.3452, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3801751732826233, + "step": 1230, + "valid_targets_mean": 3270.2, + "valid_targets_min": 1810 + }, + { + "epoch": 3.1186868686868685, + "grad_norm": 0.6786289930271091, + "learning_rate": 2.7168142980687394e-05, + "loss": 0.3578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.343783438205719, + "step": 1235, + "valid_targets_mean": 2905.7, + "valid_targets_min": 1686 + }, + { + "epoch": 3.1313131313131315, + "grad_norm": 0.8228076465104819, + "learning_rate": 2.7050404064142985e-05, + "loss": 0.3593, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3551754355430603, + "step": 1240, + "valid_targets_mean": 3203.6, + "valid_targets_min": 599 + }, + { + "epoch": 3.143939393939394, + "grad_norm": 0.7057452290840084, + "learning_rate": 2.693238546887583e-05, + "loss": 0.3613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38851505517959595, + "step": 1245, + "valid_targets_mean": 2690.1, + "valid_targets_min": 738 + }, + { + "epoch": 3.1565656565656566, + "grad_norm": 0.6516753493509879, + "learning_rate": 2.6814091876502738e-05, + "loss": 0.3702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30876827239990234, + "step": 1250, + "valid_targets_mean": 2591.5, + "valid_targets_min": 1481 + }, + { + "epoch": 3.169191919191919, + "grad_norm": 0.5373908122046772, + "learning_rate": 2.669552797954917e-05, + "loss": 0.3621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4019007682800293, + "step": 1255, + "valid_targets_mean": 4778.8, + "valid_targets_min": 1761 + }, + { + "epoch": 3.1818181818181817, + "grad_norm": 0.656469897426696, + "learning_rate": 2.6576698481263216e-05, + "loss": 0.3694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3878747522830963, + "step": 1260, + "valid_targets_mean": 3612.4, + "valid_targets_min": 1553 + }, + { + "epoch": 3.1944444444444446, + "grad_norm": 0.6890638135835326, + "learning_rate": 2.6457608095428925e-05, + "loss": 0.3607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37581291794776917, + "step": 1265, + "valid_targets_mean": 3227.2, + "valid_targets_min": 876 + }, + { + "epoch": 3.207070707070707, + "grad_norm": 0.6686732667303787, + "learning_rate": 2.633826154617938e-05, + "loss": 0.3687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32370471954345703, + "step": 1270, + "valid_targets_mean": 3021.6, + "valid_targets_min": 555 + }, + { + "epoch": 3.2196969696969697, + "grad_norm": 0.60906879769235, + "learning_rate": 2.621866356780924e-05, + "loss": 0.333, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2982814908027649, + "step": 1275, + "valid_targets_mean": 3329.8, + "valid_targets_min": 1659 + }, + { + "epoch": 3.2323232323232323, + "grad_norm": 0.6788697674939761, + "learning_rate": 2.6098818904587014e-05, + "loss": 0.362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3478686213493347, + "step": 1280, + "valid_targets_mean": 3108.2, + "valid_targets_min": 1337 + }, + { + "epoch": 3.244949494949495, + "grad_norm": 0.7418192989966806, + "learning_rate": 2.597873231056679e-05, + "loss": 0.342, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3505740165710449, + "step": 1285, + "valid_targets_mean": 2378.4, + "valid_targets_min": 699 + }, + { + "epoch": 3.257575757575758, + "grad_norm": 0.6743259771608354, + "learning_rate": 2.585840854939969e-05, + "loss": 0.3385, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.337390661239624, + "step": 1290, + "valid_targets_mean": 2974.2, + "valid_targets_min": 869 + }, + { + "epoch": 3.2702020202020203, + "grad_norm": 0.6553868057444038, + "learning_rate": 2.5737852394144903e-05, + "loss": 0.3231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3390675187110901, + "step": 1295, + "valid_targets_mean": 2955.1, + "valid_targets_min": 1599 + }, + { + "epoch": 3.282828282828283, + "grad_norm": 0.6403703744617418, + "learning_rate": 2.5617068627080305e-05, + "loss": 0.3688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4630083739757538, + "step": 1300, + "valid_targets_mean": 4076.2, + "valid_targets_min": 689 + }, + { + "epoch": 3.2954545454545454, + "grad_norm": 0.6023984043375903, + "learning_rate": 2.5496062039512834e-05, + "loss": 0.348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32059431076049805, + "step": 1305, + "valid_targets_mean": 3375.9, + "valid_targets_min": 1878 + }, + { + "epoch": 3.308080808080808, + "grad_norm": 0.6396425541446585, + "learning_rate": 2.5374837431588322e-05, + "loss": 0.3341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3704610764980316, + "step": 1310, + "valid_targets_mean": 3331.3, + "valid_targets_min": 1406 + }, + { + "epoch": 3.320707070707071, + "grad_norm": 0.6776583962480702, + "learning_rate": 2.525339961210117e-05, + "loss": 0.356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2946220636367798, + "step": 1315, + "valid_targets_mean": 2489.2, + "valid_targets_min": 587 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 0.7512035560086019, + "learning_rate": 2.5131753398303546e-05, + "loss": 0.3612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3226706385612488, + "step": 1320, + "valid_targets_mean": 2257.8, + "valid_targets_min": 513 + }, + { + "epoch": 3.345959595959596, + "grad_norm": 0.6691736317428321, + "learning_rate": 2.5009903615714288e-05, + "loss": 0.3713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38169464468955994, + "step": 1325, + "valid_targets_mean": 3674.4, + "valid_targets_min": 710 + }, + { + "epoch": 3.3585858585858586, + "grad_norm": 0.6529576389515854, + "learning_rate": 2.4887855097927515e-05, + "loss": 0.3191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3396865725517273, + "step": 1330, + "valid_targets_mean": 3407.2, + "valid_targets_min": 755 + }, + { + "epoch": 3.371212121212121, + "grad_norm": 0.7154816898888791, + "learning_rate": 2.4765612686420855e-05, + "loss": 0.3367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.343447744846344, + "step": 1335, + "valid_targets_mean": 2833.6, + "valid_targets_min": 506 + }, + { + "epoch": 3.3838383838383836, + "grad_norm": 0.6736312101027783, + "learning_rate": 2.46431812303634e-05, + "loss": 0.378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32376375794410706, + "step": 1340, + "valid_targets_mean": 2666.4, + "valid_targets_min": 1333 + }, + { + "epoch": 3.3964646464646466, + "grad_norm": 0.6475094233289387, + "learning_rate": 2.4520565586423364e-05, + "loss": 0.3204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29635465145111084, + "step": 1345, + "valid_targets_mean": 2748.7, + "valid_targets_min": 1392 + }, + { + "epoch": 3.409090909090909, + "grad_norm": 0.5843996644681299, + "learning_rate": 2.4397770618575402e-05, + "loss": 0.3462, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3405814468860626, + "step": 1350, + "valid_targets_mean": 3957.3, + "valid_targets_min": 1585 + }, + { + "epoch": 3.4217171717171717, + "grad_norm": 0.6569771648498826, + "learning_rate": 2.4274801197907665e-05, + "loss": 0.3674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35267573595046997, + "step": 1355, + "valid_targets_mean": 3056.4, + "valid_targets_min": 1380 + }, + { + "epoch": 3.4343434343434343, + "grad_norm": 0.6278190545664051, + "learning_rate": 2.4151662202428613e-05, + "loss": 0.3628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3695211410522461, + "step": 1360, + "valid_targets_mean": 3506.5, + "valid_targets_min": 1884 + }, + { + "epoch": 3.446969696969697, + "grad_norm": 0.634074126688538, + "learning_rate": 2.4028358516873427e-05, + "loss": 0.3216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3146069645881653, + "step": 1365, + "valid_targets_mean": 3289.8, + "valid_targets_min": 1669 + }, + { + "epoch": 3.45959595959596, + "grad_norm": 0.6801530900407086, + "learning_rate": 2.390489503251034e-05, + "loss": 0.3597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4268019199371338, + "step": 1370, + "valid_targets_mean": 3845.6, + "valid_targets_min": 1843 + }, + { + "epoch": 3.4722222222222223, + "grad_norm": 0.6302056512933344, + "learning_rate": 2.3781276646946526e-05, + "loss": 0.3483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35302233695983887, + "step": 1375, + "valid_targets_mean": 3840.2, + "valid_targets_min": 1261 + }, + { + "epoch": 3.484848484848485, + "grad_norm": 0.6377742108475873, + "learning_rate": 2.3657508263933874e-05, + "loss": 0.3612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33783674240112305, + "step": 1380, + "valid_targets_mean": 3263.4, + "valid_targets_min": 1149 + }, + { + "epoch": 3.4974747474747474, + "grad_norm": 0.7042898368764126, + "learning_rate": 2.3533594793174426e-05, + "loss": 0.3258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3383498191833496, + "step": 1385, + "valid_targets_mean": 2682.6, + "valid_targets_min": 1382 + }, + { + "epoch": 3.51010101010101, + "grad_norm": 0.7030408364050017, + "learning_rate": 2.3409541150125645e-05, + "loss": 0.3557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4075915217399597, + "step": 1390, + "valid_targets_mean": 3162.8, + "valid_targets_min": 1353 + }, + { + "epoch": 3.5227272727272725, + "grad_norm": 0.6274505021865276, + "learning_rate": 2.3285352255805398e-05, + "loss": 0.3496, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31670212745666504, + "step": 1395, + "valid_targets_mean": 3087.0, + "valid_targets_min": 392 + }, + { + "epoch": 3.5353535353535355, + "grad_norm": 1.2486599391824142, + "learning_rate": 2.316103303659678e-05, + "loss": 0.3296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36197564005851746, + "step": 1400, + "valid_targets_mean": 3082.6, + "valid_targets_min": 527 + }, + { + "epoch": 3.547979797979798, + "grad_norm": 0.5371320718863393, + "learning_rate": 2.3036588424052672e-05, + "loss": 0.351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3815935254096985, + "step": 1405, + "valid_targets_mean": 4752.9, + "valid_targets_min": 1819 + }, + { + "epoch": 3.5606060606060606, + "grad_norm": 0.705016854939806, + "learning_rate": 2.2912023354700105e-05, + "loss": 0.3665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3603851795196533, + "step": 1410, + "valid_targets_mean": 2926.8, + "valid_targets_min": 1512 + }, + { + "epoch": 3.573232323232323, + "grad_norm": 0.5984653768510367, + "learning_rate": 2.2787342769844444e-05, + "loss": 0.3222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3309054374694824, + "step": 1415, + "valid_targets_mean": 3481.1, + "valid_targets_min": 1110 + }, + { + "epoch": 3.5858585858585856, + "grad_norm": 0.8191525002477924, + "learning_rate": 2.2662551615373402e-05, + "loss": 0.3989, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44843584299087524, + "step": 1420, + "valid_targets_mean": 2668.3, + "valid_targets_min": 648 + }, + { + "epoch": 3.5984848484848486, + "grad_norm": 0.6651567018600946, + "learning_rate": 2.25376548415608e-05, + "loss": 0.3694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3173557221889496, + "step": 1425, + "valid_targets_mean": 2731.2, + "valid_targets_min": 1126 + }, + { + "epoch": 3.611111111111111, + "grad_norm": 0.6266431493382524, + "learning_rate": 2.241265740287021e-05, + "loss": 0.3443, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29255497455596924, + "step": 1430, + "valid_targets_mean": 2907.2, + "valid_targets_min": 1479 + }, + { + "epoch": 3.6237373737373737, + "grad_norm": 0.5693607197936612, + "learning_rate": 2.2287564257758446e-05, + "loss": 0.3977, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41386911273002625, + "step": 1435, + "valid_targets_mean": 4703.8, + "valid_targets_min": 707 + }, + { + "epoch": 3.6363636363636362, + "grad_norm": 0.7080853718994018, + "learning_rate": 2.2162380368478836e-05, + "loss": 0.3847, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3499618172645569, + "step": 1440, + "valid_targets_mean": 2786.1, + "valid_targets_min": 596 + }, + { + "epoch": 3.648989898989899, + "grad_norm": 0.5548114926983784, + "learning_rate": 2.2037110700884395e-05, + "loss": 0.3565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25791436433792114, + "step": 1445, + "valid_targets_mean": 3419.7, + "valid_targets_min": 1345 + }, + { + "epoch": 3.6616161616161618, + "grad_norm": 0.5977854910500919, + "learning_rate": 2.1911760224230824e-05, + "loss": 0.3493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33634889125823975, + "step": 1450, + "valid_targets_mean": 3442.4, + "valid_targets_min": 1773 + }, + { + "epoch": 3.6742424242424243, + "grad_norm": 0.6796398481161333, + "learning_rate": 2.17863339109794e-05, + "loss": 0.3525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36308521032333374, + "step": 1455, + "valid_targets_mean": 3190.7, + "valid_targets_min": 1471 + }, + { + "epoch": 3.686868686868687, + "grad_norm": 0.6479531096999324, + "learning_rate": 2.166083673659973e-05, + "loss": 0.336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35041192173957825, + "step": 1460, + "valid_targets_mean": 3109.9, + "valid_targets_min": 880 + }, + { + "epoch": 3.6994949494949494, + "grad_norm": 0.6190656382920072, + "learning_rate": 2.153527367937236e-05, + "loss": 0.3489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3769229054450989, + "step": 1465, + "valid_targets_mean": 3737.9, + "valid_targets_min": 1786 + }, + { + "epoch": 3.712121212121212, + "grad_norm": 0.6732653753234384, + "learning_rate": 2.140964972019132e-05, + "loss": 0.3496, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3730910122394562, + "step": 1470, + "valid_targets_mean": 3028.1, + "valid_targets_min": 786 + }, + { + "epoch": 3.724747474747475, + "grad_norm": 0.682620817181734, + "learning_rate": 2.128396984236651e-05, + "loss": 0.339, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37931591272354126, + "step": 1475, + "valid_targets_mean": 3248.3, + "valid_targets_min": 1448 + }, + { + "epoch": 3.7373737373737375, + "grad_norm": 1.5193685144502576, + "learning_rate": 2.115823903142606e-05, + "loss": 0.3605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3566587567329407, + "step": 1480, + "valid_targets_mean": 3345.8, + "valid_targets_min": 603 + }, + { + "epoch": 3.75, + "grad_norm": 0.6092501389476551, + "learning_rate": 2.1032462274918517e-05, + "loss": 0.3393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30876415967941284, + "step": 1485, + "valid_targets_mean": 3301.3, + "valid_targets_min": 952 + }, + { + "epoch": 3.7626262626262625, + "grad_norm": 0.7276893961486488, + "learning_rate": 2.0906644562215037e-05, + "loss": 0.3381, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3389100432395935, + "step": 1490, + "valid_targets_mean": 2715.4, + "valid_targets_min": 753 + }, + { + "epoch": 3.775252525252525, + "grad_norm": 0.5858712774395105, + "learning_rate": 2.078079088431143e-05, + "loss": 0.3767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3030000925064087, + "step": 1495, + "valid_targets_mean": 3219.7, + "valid_targets_min": 2366 + }, + { + "epoch": 3.787878787878788, + "grad_norm": 0.6888688838778153, + "learning_rate": 2.0654906233630197e-05, + "loss": 0.3738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.416027694940567, + "step": 1500, + "valid_targets_mean": 3128.2, + "valid_targets_min": 534 + }, + { + "epoch": 2.516722408026756, + "grad_norm": 0.662986162649258, + "learning_rate": 3.23558282621135e-05, + "loss": 0.3218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29847097396850586, + "step": 1505, + "valid_targets_mean": 3091.0, + "valid_targets_min": 508 + }, + { + "epoch": 2.5250836120401337, + "grad_norm": 0.6783328257944375, + "learning_rate": 3.229014188825108e-05, + "loss": 0.3139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3512507975101471, + "step": 1510, + "valid_targets_mean": 3352.5, + "valid_targets_min": 1133 + }, + { + "epoch": 2.5334448160535117, + "grad_norm": 0.6677148404277962, + "learning_rate": 3.2224241814261216e-05, + "loss": 0.3225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26659291982650757, + "step": 1515, + "valid_targets_mean": 2794.4, + "valid_targets_min": 1460 + }, + { + "epoch": 2.5418060200668897, + "grad_norm": 0.7219771342415074, + "learning_rate": 3.215812918600978e-05, + "loss": 0.3324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3495546579360962, + "step": 1520, + "valid_targets_mean": 2987.6, + "valid_targets_min": 632 + }, + { + "epoch": 2.5501672240802673, + "grad_norm": 0.6560298605364091, + "learning_rate": 3.209180515305855e-05, + "loss": 0.3119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.331092894077301, + "step": 1525, + "valid_targets_mean": 3713.1, + "valid_targets_min": 534 + }, + { + "epoch": 2.5585284280936453, + "grad_norm": 0.6919955457501047, + "learning_rate": 3.2025270868645146e-05, + "loss": 0.3497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3320353925228119, + "step": 1530, + "valid_targets_mean": 3199.7, + "valid_targets_min": 1028 + }, + { + "epoch": 2.5668896321070234, + "grad_norm": 0.708863656128023, + "learning_rate": 3.195852748966306e-05, + "loss": 0.3478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36112624406814575, + "step": 1535, + "valid_targets_mean": 3085.5, + "valid_targets_min": 588 + }, + { + "epoch": 2.5752508361204014, + "grad_norm": 0.7471046326039033, + "learning_rate": 3.189157617664151e-05, + "loss": 0.3075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28388604521751404, + "step": 1540, + "valid_targets_mean": 2699.7, + "valid_targets_min": 620 + }, + { + "epoch": 2.5836120401337794, + "grad_norm": 0.7471683542567666, + "learning_rate": 3.182441809372523e-05, + "loss": 0.317, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33689889311790466, + "step": 1545, + "valid_targets_mean": 2698.6, + "valid_targets_min": 1551 + }, + { + "epoch": 2.591973244147157, + "grad_norm": 0.6750267862750765, + "learning_rate": 3.1757054408654266e-05, + "loss": 0.3833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2789178192615509, + "step": 1550, + "valid_targets_mean": 2798.8, + "valid_targets_min": 1573 + }, + { + "epoch": 2.600334448160535, + "grad_norm": 0.681930396951124, + "learning_rate": 3.168948629274367e-05, + "loss": 0.3283, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3054756820201874, + "step": 1555, + "valid_targets_mean": 2962.2, + "valid_targets_min": 1403 + }, + { + "epoch": 2.608695652173913, + "grad_norm": 0.6634403273333718, + "learning_rate": 3.1621714920863104e-05, + "loss": 0.3259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4073365032672882, + "step": 1560, + "valid_targets_mean": 4461.2, + "valid_targets_min": 1433 + }, + { + "epoch": 2.617056856187291, + "grad_norm": 0.8238005424494197, + "learning_rate": 3.155374147141646e-05, + "loss": 0.3604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38494405150413513, + "step": 1565, + "valid_targets_mean": 2640.8, + "valid_targets_min": 863 + }, + { + "epoch": 2.625418060200669, + "grad_norm": 0.7052867973145739, + "learning_rate": 3.1485567126321295e-05, + "loss": 0.3451, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3390483260154724, + "step": 1570, + "valid_targets_mean": 3285.6, + "valid_targets_min": 1434 + }, + { + "epoch": 2.6337792642140467, + "grad_norm": 0.6953845280826801, + "learning_rate": 3.1417193070988383e-05, + "loss": 0.3313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3528243601322174, + "step": 1575, + "valid_targets_mean": 3543.2, + "valid_targets_min": 1601 + }, + { + "epoch": 2.6421404682274248, + "grad_norm": 0.7552847898076053, + "learning_rate": 3.134862049430099e-05, + "loss": 0.3392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39836952090263367, + "step": 1580, + "valid_targets_mean": 3433.9, + "valid_targets_min": 1805 + }, + { + "epoch": 2.650501672240803, + "grad_norm": 1.4277354366593005, + "learning_rate": 3.12798505885943e-05, + "loss": 0.36, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3085378408432007, + "step": 1585, + "valid_targets_mean": 3400.1, + "valid_targets_min": 1201 + }, + { + "epoch": 2.6588628762541804, + "grad_norm": 0.7410833217029822, + "learning_rate": 3.1210884549634624e-05, + "loss": 0.3107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28592583537101746, + "step": 1590, + "valid_targets_mean": 2576.4, + "valid_targets_min": 1460 + }, + { + "epoch": 2.6672240802675584, + "grad_norm": 0.8296406296042037, + "learning_rate": 3.114172357659861e-05, + "loss": 0.3542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.370746374130249, + "step": 1595, + "valid_targets_mean": 2589.8, + "valid_targets_min": 587 + }, + { + "epoch": 2.6755852842809364, + "grad_norm": 0.5928853152526612, + "learning_rate": 3.107236887205242e-05, + "loss": 0.3683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33890628814697266, + "step": 1600, + "valid_targets_mean": 4613.9, + "valid_targets_min": 1582 + }, + { + "epoch": 2.6839464882943145, + "grad_norm": 0.7237916757375374, + "learning_rate": 3.1002821641930815e-05, + "loss": 0.3467, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3008784055709839, + "step": 1605, + "valid_targets_mean": 2820.9, + "valid_targets_min": 1351 + }, + { + "epoch": 2.6923076923076925, + "grad_norm": 0.6958603576096208, + "learning_rate": 3.093308309551616e-05, + "loss": 0.3285, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26427051424980164, + "step": 1610, + "valid_targets_mean": 2401.6, + "valid_targets_min": 377 + }, + { + "epoch": 2.70066889632107, + "grad_norm": 0.7246194414411228, + "learning_rate": 3.0863154445417426e-05, + "loss": 0.3112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31494617462158203, + "step": 1615, + "valid_targets_mean": 2992.2, + "valid_targets_min": 1254 + }, + { + "epoch": 2.709030100334448, + "grad_norm": 0.6683077048389021, + "learning_rate": 3.079303690754908e-05, + "loss": 0.3588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.46505263447761536, + "step": 1620, + "valid_targets_mean": 4923.4, + "valid_targets_min": 1295 + }, + { + "epoch": 2.717391304347826, + "grad_norm": 0.6839193880655342, + "learning_rate": 3.072273170110998e-05, + "loss": 0.344, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3480769693851471, + "step": 1625, + "valid_targets_mean": 3295.5, + "valid_targets_min": 698 + }, + { + "epoch": 2.7257525083612038, + "grad_norm": 0.6148350220041032, + "learning_rate": 3.0652240048562134e-05, + "loss": 0.3337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3841598629951477, + "step": 1630, + "valid_targets_mean": 4531.8, + "valid_targets_min": 2092 + }, + { + "epoch": 2.734113712374582, + "grad_norm": 0.7604959335941064, + "learning_rate": 3.058156317560945e-05, + "loss": 0.3097, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3188338279724121, + "step": 1635, + "valid_targets_mean": 2672.3, + "valid_targets_min": 971 + }, + { + "epoch": 2.74247491638796, + "grad_norm": 0.7495694482080785, + "learning_rate": 3.0510702311176477e-05, + "loss": 0.3385, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28397685289382935, + "step": 1640, + "valid_targets_mean": 2509.3, + "valid_targets_min": 1020 + }, + { + "epoch": 2.750836120401338, + "grad_norm": 0.5960363613154568, + "learning_rate": 3.043965868738695e-05, + "loss": 0.33, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26706498861312866, + "step": 1645, + "valid_targets_mean": 3699.4, + "valid_targets_min": 638 + }, + { + "epoch": 2.759197324414716, + "grad_norm": 0.6692357148144276, + "learning_rate": 3.0368433539542433e-05, + "loss": 0.3714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2700284719467163, + "step": 1650, + "valid_targets_mean": 3072.4, + "valid_targets_min": 1064 + }, + { + "epoch": 2.7675585284280935, + "grad_norm": 0.7677279753834415, + "learning_rate": 3.029702810610082e-05, + "loss": 0.3281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3318488895893097, + "step": 1655, + "valid_targets_mean": 2859.8, + "valid_targets_min": 714 + }, + { + "epoch": 2.7759197324414715, + "grad_norm": 0.7231622567261902, + "learning_rate": 3.0225443628654787e-05, + "loss": 0.3182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3072254955768585, + "step": 1660, + "valid_targets_mean": 3285.6, + "valid_targets_min": 1067 + }, + { + "epoch": 2.7842809364548495, + "grad_norm": 0.7351401519067079, + "learning_rate": 3.0153681351910226e-05, + "loss": 0.3046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3622933626174927, + "step": 1665, + "valid_targets_mean": 3139.9, + "valid_targets_min": 1919 + }, + { + "epoch": 2.7926421404682276, + "grad_norm": 0.7218998613996677, + "learning_rate": 3.0081742523664576e-05, + "loss": 0.305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2836865782737732, + "step": 1670, + "valid_targets_mean": 2875.6, + "valid_targets_min": 908 + }, + { + "epoch": 2.8010033444816056, + "grad_norm": 0.7498483662923942, + "learning_rate": 3.0009628394785158e-05, + "loss": 0.3279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2725030481815338, + "step": 1675, + "valid_targets_mean": 2490.9, + "valid_targets_min": 858 + }, + { + "epoch": 2.809364548494983, + "grad_norm": 0.7901928997172852, + "learning_rate": 2.9937340219187402e-05, + "loss": 0.3407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3297320008277893, + "step": 1680, + "valid_targets_mean": 2595.1, + "valid_targets_min": 1608 + }, + { + "epoch": 2.817725752508361, + "grad_norm": 0.6829285663175374, + "learning_rate": 2.986487925381304e-05, + "loss": 0.3098, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28002995252609253, + "step": 1685, + "valid_targets_mean": 2900.0, + "valid_targets_min": 810 + }, + { + "epoch": 2.8260869565217392, + "grad_norm": 0.7868893655628152, + "learning_rate": 2.9792246758608283e-05, + "loss": 0.3628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3678167164325714, + "step": 1690, + "valid_targets_mean": 3206.1, + "valid_targets_min": 1573 + }, + { + "epoch": 2.834448160535117, + "grad_norm": 0.6961974009885317, + "learning_rate": 2.9719443996501858e-05, + "loss": 0.3441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3555162250995636, + "step": 1695, + "valid_targets_mean": 3276.0, + "valid_targets_min": 1491 + }, + { + "epoch": 2.842809364548495, + "grad_norm": 0.6616306549579406, + "learning_rate": 2.9646472233383118e-05, + "loss": 0.3263, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.307451993227005, + "step": 1700, + "valid_targets_mean": 3025.1, + "valid_targets_min": 508 + }, + { + "epoch": 2.851170568561873, + "grad_norm": 0.699054155600686, + "learning_rate": 2.9573332738079964e-05, + "loss": 0.3428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.325960248708725, + "step": 1705, + "valid_targets_mean": 3397.9, + "valid_targets_min": 553 + }, + { + "epoch": 2.859531772575251, + "grad_norm": 0.5618995876602438, + "learning_rate": 2.9500026782336828e-05, + "loss": 0.3321, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3432704210281372, + "step": 1710, + "valid_targets_mean": 5129.1, + "valid_targets_min": 794 + }, + { + "epoch": 2.867892976588629, + "grad_norm": 0.7396609388739099, + "learning_rate": 2.942655564079254e-05, + "loss": 0.3364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30100107192993164, + "step": 1715, + "valid_targets_mean": 2780.6, + "valid_targets_min": 741 + }, + { + "epoch": 2.8762541806020065, + "grad_norm": 0.6882252256679291, + "learning_rate": 2.9352920590958173e-05, + "loss": 0.3636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3655456006526947, + "step": 1720, + "valid_targets_mean": 3660.6, + "valid_targets_min": 1486 + }, + { + "epoch": 2.8846153846153846, + "grad_norm": 0.6954294304011069, + "learning_rate": 2.927912291319482e-05, + "loss": 0.2649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2898782789707184, + "step": 1725, + "valid_targets_mean": 2949.5, + "valid_targets_min": 1417 + }, + { + "epoch": 2.8929765886287626, + "grad_norm": 0.7685790812651909, + "learning_rate": 2.9205163890691338e-05, + "loss": 0.3633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.351645827293396, + "step": 1730, + "valid_targets_mean": 3392.4, + "valid_targets_min": 1028 + }, + { + "epoch": 2.90133779264214, + "grad_norm": 0.6514952351452494, + "learning_rate": 2.9131044809442038e-05, + "loss": 0.3061, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27277517318725586, + "step": 1735, + "valid_targets_mean": 3230.4, + "valid_targets_min": 1120 + }, + { + "epoch": 2.9096989966555182, + "grad_norm": 0.6336596359886507, + "learning_rate": 2.9056766958224324e-05, + "loss": 0.3282, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31338661909103394, + "step": 1740, + "valid_targets_mean": 3871.8, + "valid_targets_min": 726 + }, + { + "epoch": 2.9180602006688963, + "grad_norm": 0.7074870527746576, + "learning_rate": 2.898233162857627e-05, + "loss": 0.3159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31244152784347534, + "step": 1745, + "valid_targets_mean": 3252.4, + "valid_targets_min": 1672 + }, + { + "epoch": 2.9264214046822743, + "grad_norm": 0.7317476390005567, + "learning_rate": 2.8907740114774185e-05, + "loss": 0.3326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3308480381965637, + "step": 1750, + "valid_targets_mean": 2940.5, + "valid_targets_min": 971 + }, + { + "epoch": 2.9347826086956523, + "grad_norm": 0.669207925359114, + "learning_rate": 2.8832993713810095e-05, + "loss": 0.3291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34635135531425476, + "step": 1755, + "valid_targets_mean": 3485.1, + "valid_targets_min": 772 + }, + { + "epoch": 2.94314381270903, + "grad_norm": 0.7081348378617047, + "learning_rate": 2.8758093725369193e-05, + "loss": 0.3508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41460931301116943, + "step": 1760, + "valid_targets_mean": 3348.2, + "valid_targets_min": 580 + }, + { + "epoch": 2.951505016722408, + "grad_norm": 0.6563436808159772, + "learning_rate": 2.868304145180722e-05, + "loss": 0.363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43143677711486816, + "step": 1765, + "valid_targets_mean": 5450.4, + "valid_targets_min": 1058 + }, + { + "epoch": 2.959866220735786, + "grad_norm": 1.0813836021947982, + "learning_rate": 2.8607838198127886e-05, + "loss": 0.3024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3358667194843292, + "step": 1770, + "valid_targets_mean": 2942.9, + "valid_targets_min": 556 + }, + { + "epoch": 2.968227424749164, + "grad_norm": 0.6535458920771104, + "learning_rate": 2.8532485271960088e-05, + "loss": 0.3272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30923616886138916, + "step": 1775, + "valid_targets_mean": 3134.3, + "valid_targets_min": 1443 + }, + { + "epoch": 2.976588628762542, + "grad_norm": 0.7051449951363025, + "learning_rate": 2.8456983983535243e-05, + "loss": 0.3081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3197246789932251, + "step": 1780, + "valid_targets_mean": 3088.7, + "valid_targets_min": 722 + }, + { + "epoch": 2.9849498327759196, + "grad_norm": 0.7514550832469282, + "learning_rate": 2.838133564566447e-05, + "loss": 0.3221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33178478479385376, + "step": 1785, + "valid_targets_mean": 2903.9, + "valid_targets_min": 1051 + }, + { + "epoch": 2.9933110367892977, + "grad_norm": 0.7487764007212119, + "learning_rate": 2.8305541573715775e-05, + "loss": 0.355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32397782802581787, + "step": 1790, + "valid_targets_mean": 2655.2, + "valid_targets_min": 364 + }, + { + "epoch": 3.0016722408026757, + "grad_norm": 0.697866012772672, + "learning_rate": 2.8229603085591178e-05, + "loss": 0.3045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24206683039665222, + "step": 1795, + "valid_targets_mean": 2628.6, + "valid_targets_min": 1232 + }, + { + "epoch": 3.0100334448160537, + "grad_norm": 0.7292123243657525, + "learning_rate": 2.8153521501703803e-05, + "loss": 0.3032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32684776186943054, + "step": 1800, + "valid_targets_mean": 3399.4, + "valid_targets_min": 1615 + }, + { + "epoch": 3.0183946488294313, + "grad_norm": 0.7473811261850504, + "learning_rate": 2.8077298144954904e-05, + "loss": 0.2602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2628287672996521, + "step": 1805, + "valid_targets_mean": 2887.2, + "valid_targets_min": 807 + }, + { + "epoch": 3.0267558528428093, + "grad_norm": 0.7650083502129927, + "learning_rate": 2.8000934340710883e-05, + "loss": 0.316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30963340401649475, + "step": 1810, + "valid_targets_mean": 3063.6, + "valid_targets_min": 1264 + }, + { + "epoch": 3.0351170568561874, + "grad_norm": 0.7397530221539242, + "learning_rate": 2.792443141678022e-05, + "loss": 0.304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27429091930389404, + "step": 1815, + "valid_targets_mean": 2718.4, + "valid_targets_min": 757 + }, + { + "epoch": 3.0434782608695654, + "grad_norm": 0.889361629882369, + "learning_rate": 2.784779070339041e-05, + "loss": 0.316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3256457448005676, + "step": 1820, + "valid_targets_mean": 2798.4, + "valid_targets_min": 470 + }, + { + "epoch": 3.051839464882943, + "grad_norm": 0.6809205141917194, + "learning_rate": 2.7771013533164805e-05, + "loss": 0.3188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3276858329772949, + "step": 1825, + "valid_targets_mean": 3351.5, + "valid_targets_min": 399 + }, + { + "epoch": 3.060200668896321, + "grad_norm": 0.6744548172485136, + "learning_rate": 2.7694101241099484e-05, + "loss": 0.3659, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4031035006046295, + "step": 1830, + "valid_targets_mean": 4916.2, + "valid_targets_min": 772 + }, + { + "epoch": 3.068561872909699, + "grad_norm": 0.7582120508573842, + "learning_rate": 2.7617055164539993e-05, + "loss": 0.3349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31827855110168457, + "step": 1835, + "valid_targets_mean": 2926.5, + "valid_targets_min": 2041 + }, + { + "epoch": 3.076923076923077, + "grad_norm": 0.8315396768017139, + "learning_rate": 2.753987664315813e-05, + "loss": 0.3021, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35661745071411133, + "step": 1840, + "valid_targets_mean": 2858.8, + "valid_targets_min": 726 + }, + { + "epoch": 3.0852842809364547, + "grad_norm": 0.7155190185107607, + "learning_rate": 2.746256701892861e-05, + "loss": 0.3741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3801868259906769, + "step": 1845, + "valid_targets_mean": 4064.2, + "valid_targets_min": 1411 + }, + { + "epoch": 3.0936454849498327, + "grad_norm": 0.6741071495612008, + "learning_rate": 2.738512763610579e-05, + "loss": 0.308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37737980484962463, + "step": 1850, + "valid_targets_mean": 4971.6, + "valid_targets_min": 1956 + }, + { + "epoch": 3.1020066889632107, + "grad_norm": 0.7590694014485702, + "learning_rate": 2.7307559841200238e-05, + "loss": 0.3291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3324778079986572, + "step": 1855, + "valid_targets_mean": 2946.1, + "valid_targets_min": 1303 + }, + { + "epoch": 3.1103678929765888, + "grad_norm": 0.7215558419615479, + "learning_rate": 2.7229864982955328e-05, + "loss": 0.3186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3469077944755554, + "step": 1860, + "valid_targets_mean": 3110.1, + "valid_targets_min": 1665 + }, + { + "epoch": 3.1187290969899664, + "grad_norm": 0.6897412184844943, + "learning_rate": 2.7152044412323842e-05, + "loss": 0.3075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28832149505615234, + "step": 1865, + "valid_targets_mean": 3266.5, + "valid_targets_min": 1272 + }, + { + "epoch": 3.1270903010033444, + "grad_norm": 0.8950983106159963, + "learning_rate": 2.7074099482444406e-05, + "loss": 0.3128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3346301317214966, + "step": 1870, + "valid_targets_mean": 2962.5, + "valid_targets_min": 794 + }, + { + "epoch": 3.1354515050167224, + "grad_norm": 0.6829362728624939, + "learning_rate": 2.699603154861801e-05, + "loss": 0.3217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2530469298362732, + "step": 1875, + "valid_targets_mean": 3057.2, + "valid_targets_min": 1148 + }, + { + "epoch": 3.1438127090301005, + "grad_norm": 0.7233648289737619, + "learning_rate": 2.6917841968284433e-05, + "loss": 0.3161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3072529733181, + "step": 1880, + "valid_targets_mean": 2967.2, + "valid_targets_min": 1431 + }, + { + "epoch": 3.1521739130434785, + "grad_norm": 0.7416457175298982, + "learning_rate": 2.6839532100998623e-05, + "loss": 0.3185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3872751295566559, + "step": 1885, + "valid_targets_mean": 3862.7, + "valid_targets_min": 1334 + }, + { + "epoch": 3.160535117056856, + "grad_norm": 0.7812518570866998, + "learning_rate": 2.6761103308407076e-05, + "loss": 0.3277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25746119022369385, + "step": 1890, + "valid_targets_mean": 2487.5, + "valid_targets_min": 844 + }, + { + "epoch": 3.168896321070234, + "grad_norm": 0.7436986096190392, + "learning_rate": 2.668255695422415e-05, + "loss": 0.3709, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37360888719558716, + "step": 1895, + "valid_targets_mean": 3813.4, + "valid_targets_min": 1554 + }, + { + "epoch": 3.177257525083612, + "grad_norm": 0.7142319442876787, + "learning_rate": 2.660389440420836e-05, + "loss": 0.3068, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.272394061088562, + "step": 1900, + "valid_targets_mean": 2995.9, + "valid_targets_min": 1008 + }, + { + "epoch": 3.1856187290969897, + "grad_norm": 0.7424793152458009, + "learning_rate": 2.6525117026138614e-05, + "loss": 0.3392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34922927618026733, + "step": 1905, + "valid_targets_mean": 3819.7, + "valid_targets_min": 1601 + }, + { + "epoch": 3.1939799331103678, + "grad_norm": 0.7607562321366768, + "learning_rate": 2.644622618979047e-05, + "loss": 0.2896, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24981051683425903, + "step": 1910, + "valid_targets_mean": 2548.7, + "valid_targets_min": 923 + }, + { + "epoch": 3.202341137123746, + "grad_norm": 0.6904862318660174, + "learning_rate": 2.6367223266912252e-05, + "loss": 0.3317, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37764304876327515, + "step": 1915, + "valid_targets_mean": 3910.4, + "valid_targets_min": 1349 + }, + { + "epoch": 3.210702341137124, + "grad_norm": 0.8961099343679048, + "learning_rate": 2.6288109631201266e-05, + "loss": 0.3337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3819909393787384, + "step": 1920, + "valid_targets_mean": 2620.2, + "valid_targets_min": 942 + }, + { + "epoch": 3.219063545150502, + "grad_norm": 0.7566386982431123, + "learning_rate": 2.6208886658279875e-05, + "loss": 0.3137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3507644534111023, + "step": 1925, + "valid_targets_mean": 3339.7, + "valid_targets_min": 1683 + }, + { + "epoch": 3.2274247491638794, + "grad_norm": 0.8144566736238488, + "learning_rate": 2.6129555725671586e-05, + "loss": 0.3041, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3747752010822296, + "step": 1930, + "valid_targets_mean": 2877.8, + "valid_targets_min": 1324 + }, + { + "epoch": 3.2357859531772575, + "grad_norm": 0.8094873874087346, + "learning_rate": 2.605011821277712e-05, + "loss": 0.306, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3171420693397522, + "step": 1935, + "valid_targets_mean": 2959.7, + "valid_targets_min": 783 + }, + { + "epoch": 3.2441471571906355, + "grad_norm": 0.8547392925559153, + "learning_rate": 2.597057550085037e-05, + "loss": 0.3001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27529996633529663, + "step": 1940, + "valid_targets_mean": 2264.8, + "valid_targets_min": 971 + }, + { + "epoch": 3.2525083612040135, + "grad_norm": 0.6817740682466431, + "learning_rate": 2.589092897297447e-05, + "loss": 0.3122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33298245072364807, + "step": 1945, + "valid_targets_mean": 3641.5, + "valid_targets_min": 2028 + }, + { + "epoch": 3.260869565217391, + "grad_norm": 0.7178992056677376, + "learning_rate": 2.581118001403767e-05, + "loss": 0.3346, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.298176109790802, + "step": 1950, + "valid_targets_mean": 2812.1, + "valid_targets_min": 1666 + }, + { + "epoch": 3.269230769230769, + "grad_norm": 0.821395268384391, + "learning_rate": 2.573133001070928e-05, + "loss": 0.3032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32294681668281555, + "step": 1955, + "valid_targets_mean": 2872.3, + "valid_targets_min": 1526 + }, + { + "epoch": 3.277591973244147, + "grad_norm": 0.7018121583116861, + "learning_rate": 2.565138035141558e-05, + "loss": 0.3199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32964542508125305, + "step": 1960, + "valid_targets_mean": 3175.1, + "valid_targets_min": 1682 + }, + { + "epoch": 3.2859531772575252, + "grad_norm": 1.1803702361445025, + "learning_rate": 2.557133242631565e-05, + "loss": 0.3063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3171846866607666, + "step": 1965, + "valid_targets_mean": 3355.6, + "valid_targets_min": 467 + }, + { + "epoch": 3.294314381270903, + "grad_norm": 0.6610850683002595, + "learning_rate": 2.549118762727721e-05, + "loss": 0.3053, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24194839596748352, + "step": 1970, + "valid_targets_mean": 3080.6, + "valid_targets_min": 1573 + }, + { + "epoch": 3.302675585284281, + "grad_norm": 0.7255555656749366, + "learning_rate": 2.5410947347852436e-05, + "loss": 0.3355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4482927620410919, + "step": 1975, + "valid_targets_mean": 4331.0, + "valid_targets_min": 1168 + }, + { + "epoch": 3.311036789297659, + "grad_norm": 0.7459931964816805, + "learning_rate": 2.5330612983253667e-05, + "loss": 0.3369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28639882802963257, + "step": 1980, + "valid_targets_mean": 3048.4, + "valid_targets_min": 1599 + }, + { + "epoch": 3.319397993311037, + "grad_norm": 0.7132568663511498, + "learning_rate": 2.5250185930329235e-05, + "loss": 0.3194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3665810227394104, + "step": 1985, + "valid_targets_mean": 3538.0, + "valid_targets_min": 1274 + }, + { + "epoch": 3.327759197324415, + "grad_norm": 0.8008208379218488, + "learning_rate": 2.5169667587539105e-05, + "loss": 0.3226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30878740549087524, + "step": 1990, + "valid_targets_mean": 2999.9, + "valid_targets_min": 553 + }, + { + "epoch": 3.3361204013377925, + "grad_norm": 0.7114926198475543, + "learning_rate": 2.5089059354930584e-05, + "loss": 0.3253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3151680827140808, + "step": 1995, + "valid_targets_mean": 3198.5, + "valid_targets_min": 1876 + }, + { + "epoch": 3.3444816053511706, + "grad_norm": 0.6910899592340489, + "learning_rate": 2.5008362634113986e-05, + "loss": 0.3158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27248960733413696, + "step": 2000, + "valid_targets_mean": 3219.5, + "valid_targets_min": 1548 + }, + { + "epoch": 3.3528428093645486, + "grad_norm": 0.9194983641690248, + "learning_rate": 2.4927578828238253e-05, + "loss": 0.2827, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30196303129196167, + "step": 2005, + "valid_targets_mean": 2675.1, + "valid_targets_min": 1560 + }, + { + "epoch": 3.361204013377926, + "grad_norm": 0.7413625053509546, + "learning_rate": 2.484670934196654e-05, + "loss": 0.3028, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2616788148880005, + "step": 2010, + "valid_targets_mean": 2792.3, + "valid_targets_min": 1188 + }, + { + "epoch": 3.369565217391304, + "grad_norm": 0.7183291854874223, + "learning_rate": 2.476575558145183e-05, + "loss": 0.3222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3487267792224884, + "step": 2015, + "valid_targets_mean": 3637.6, + "valid_targets_min": 1018 + }, + { + "epoch": 3.3779264214046822, + "grad_norm": 0.7490283521521003, + "learning_rate": 2.468471895431243e-05, + "loss": 0.3021, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.325644314289093, + "step": 2020, + "valid_targets_mean": 3182.8, + "valid_targets_min": 1285 + }, + { + "epoch": 3.3862876254180603, + "grad_norm": 0.7101402332669482, + "learning_rate": 2.4603600869607564e-05, + "loss": 0.3267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2660444378852844, + "step": 2025, + "valid_targets_mean": 2812.0, + "valid_targets_min": 1258 + }, + { + "epoch": 3.3946488294314383, + "grad_norm": 0.730576453198831, + "learning_rate": 2.452240273781281e-05, + "loss": 0.3016, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26417773962020874, + "step": 2030, + "valid_targets_mean": 3180.6, + "valid_targets_min": 729 + }, + { + "epoch": 3.403010033444816, + "grad_norm": 0.7181814679922088, + "learning_rate": 2.444112597079558e-05, + "loss": 0.3162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2812889814376831, + "step": 2035, + "valid_targets_mean": 3133.4, + "valid_targets_min": 1126 + }, + { + "epoch": 3.411371237458194, + "grad_norm": 0.7924112516136512, + "learning_rate": 2.435977198179065e-05, + "loss": 0.3287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27810171246528625, + "step": 2040, + "valid_targets_mean": 2717.6, + "valid_targets_min": 682 + }, + { + "epoch": 3.419732441471572, + "grad_norm": 0.707141761893294, + "learning_rate": 2.4278342185375467e-05, + "loss": 0.3362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31040269136428833, + "step": 2045, + "valid_targets_mean": 3125.6, + "valid_targets_min": 1931 + }, + { + "epoch": 3.42809364548495, + "grad_norm": 0.7561619308472627, + "learning_rate": 2.4196837997445636e-05, + "loss": 0.3243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3123628795146942, + "step": 2050, + "valid_targets_mean": 3224.0, + "valid_targets_min": 1631 + }, + { + "epoch": 3.4364548494983276, + "grad_norm": 0.7289170075667627, + "learning_rate": 2.4115260835190285e-05, + "loss": 0.2989, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2535760998725891, + "step": 2055, + "valid_targets_mean": 3353.6, + "valid_targets_min": 1805 + }, + { + "epoch": 3.4448160535117056, + "grad_norm": 0.945847336384475, + "learning_rate": 2.4033612117067396e-05, + "loss": 0.2888, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2784852981567383, + "step": 2060, + "valid_targets_mean": 3911.6, + "valid_targets_min": 1064 + }, + { + "epoch": 3.4531772575250836, + "grad_norm": 0.7817559218890252, + "learning_rate": 2.395189326277918e-05, + "loss": 0.2968, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3658587336540222, + "step": 2065, + "valid_targets_mean": 3038.2, + "valid_targets_min": 1011 + }, + { + "epoch": 3.4615384615384617, + "grad_norm": 0.7908249845730267, + "learning_rate": 2.3870105693247347e-05, + "loss": 0.3314, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40112197399139404, + "step": 2070, + "valid_targets_mean": 3198.5, + "valid_targets_min": 707 + }, + { + "epoch": 3.4698996655518393, + "grad_norm": 0.7869243206602189, + "learning_rate": 2.3788250830588437e-05, + "loss": 0.3191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2704690992832184, + "step": 2075, + "valid_targets_mean": 2424.8, + "valid_targets_min": 580 + }, + { + "epoch": 3.4782608695652173, + "grad_norm": 0.7080463514373667, + "learning_rate": 2.3706330098089077e-05, + "loss": 0.314, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3315350413322449, + "step": 2080, + "valid_targets_mean": 4324.7, + "valid_targets_min": 1855 + }, + { + "epoch": 3.4866220735785953, + "grad_norm": 0.8218808851621214, + "learning_rate": 2.3624344920181243e-05, + "loss": 0.2889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3174235224723816, + "step": 2085, + "valid_targets_mean": 2845.2, + "valid_targets_min": 1569 + }, + { + "epoch": 3.4949832775919734, + "grad_norm": 0.7054886370767576, + "learning_rate": 2.3542296722417452e-05, + "loss": 0.3309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3576347529888153, + "step": 2090, + "valid_targets_mean": 3957.9, + "valid_targets_min": 1892 + }, + { + "epoch": 3.5033444816053514, + "grad_norm": 0.7737550873720773, + "learning_rate": 2.346018693144605e-05, + "loss": 0.3147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3562350869178772, + "step": 2095, + "valid_targets_mean": 3385.3, + "valid_targets_min": 1565 + }, + { + "epoch": 3.511705685618729, + "grad_norm": 0.793659888489234, + "learning_rate": 2.3378016974986326e-05, + "loss": 0.3287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3222711384296417, + "step": 2100, + "valid_targets_mean": 2910.6, + "valid_targets_min": 1569 + }, + { + "epoch": 3.520066889632107, + "grad_norm": 0.8708230054218504, + "learning_rate": 2.3295788281803733e-05, + "loss": 0.3126, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33255574107170105, + "step": 2105, + "valid_targets_mean": 3600.3, + "valid_targets_min": 1512 + }, + { + "epoch": 3.528428093645485, + "grad_norm": 0.7022641595914034, + "learning_rate": 2.321350228168505e-05, + "loss": 0.2912, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3107645511627197, + "step": 2110, + "valid_targets_mean": 3758.8, + "valid_targets_min": 1562 + }, + { + "epoch": 3.5367892976588626, + "grad_norm": 0.8652735159983888, + "learning_rate": 2.3131160405413472e-05, + "loss": 0.2842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26982659101486206, + "step": 2115, + "valid_targets_mean": 2795.1, + "valid_targets_min": 1208 + }, + { + "epoch": 3.5451505016722407, + "grad_norm": 0.656569555739008, + "learning_rate": 2.30487640847438e-05, + "loss": 0.3267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3019533157348633, + "step": 2120, + "valid_targets_mean": 3843.1, + "valid_targets_min": 1730 + }, + { + "epoch": 3.5535117056856187, + "grad_norm": 0.721412813412654, + "learning_rate": 2.296631475237749e-05, + "loss": 0.3217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.326846718788147, + "step": 2125, + "valid_targets_mean": 3144.4, + "valid_targets_min": 963 + }, + { + "epoch": 3.5618729096989967, + "grad_norm": 0.8698536209082071, + "learning_rate": 2.2883813841937754e-05, + "loss": 0.3396, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32312431931495667, + "step": 2130, + "valid_targets_mean": 2314.3, + "valid_targets_min": 467 + }, + { + "epoch": 3.5702341137123748, + "grad_norm": 0.8109874750171677, + "learning_rate": 2.2801262787944668e-05, + "loss": 0.3159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.298412948846817, + "step": 2135, + "valid_targets_mean": 3258.5, + "valid_targets_min": 1480 + }, + { + "epoch": 3.5785953177257523, + "grad_norm": 0.8162989082018804, + "learning_rate": 2.2718663025790183e-05, + "loss": 0.3138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2839236259460449, + "step": 2140, + "valid_targets_mean": 2840.1, + "valid_targets_min": 1006 + }, + { + "epoch": 3.5869565217391304, + "grad_norm": 0.8038116768717268, + "learning_rate": 2.2636015991713167e-05, + "loss": 0.3237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3331753611564636, + "step": 2145, + "valid_targets_mean": 2935.8, + "valid_targets_min": 1186 + }, + { + "epoch": 3.5953177257525084, + "grad_norm": 0.7739334985662348, + "learning_rate": 2.2553323122774487e-05, + "loss": 0.2988, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3323972821235657, + "step": 2150, + "valid_targets_mean": 3164.0, + "valid_targets_min": 527 + }, + { + "epoch": 3.6036789297658864, + "grad_norm": 0.7740161292149046, + "learning_rate": 2.2470585856831953e-05, + "loss": 0.2824, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25347644090652466, + "step": 2155, + "valid_targets_mean": 2834.1, + "valid_targets_min": 1341 + }, + { + "epoch": 3.6120401337792645, + "grad_norm": 0.6771441745861378, + "learning_rate": 2.2387805632515365e-05, + "loss": 0.3432, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31761401891708374, + "step": 2160, + "valid_targets_mean": 4497.9, + "valid_targets_min": 699 + }, + { + "epoch": 3.620401337792642, + "grad_norm": 0.7521566633081039, + "learning_rate": 2.2304983889201467e-05, + "loss": 0.3186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3334948420524597, + "step": 2165, + "valid_targets_mean": 3627.6, + "valid_targets_min": 1882 + }, + { + "epoch": 3.62876254180602, + "grad_norm": 0.7031338023674866, + "learning_rate": 2.222212206698894e-05, + "loss": 0.3302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24446901679039001, + "step": 2170, + "valid_targets_mean": 2985.4, + "valid_targets_min": 620 + }, + { + "epoch": 3.637123745819398, + "grad_norm": 0.6866912372395072, + "learning_rate": 2.2139221606673353e-05, + "loss": 0.3618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3053354024887085, + "step": 2175, + "valid_targets_mean": 3322.4, + "valid_targets_min": 1844 + }, + { + "epoch": 3.6454849498327757, + "grad_norm": 0.7043379955622305, + "learning_rate": 2.2056283949722114e-05, + "loss": 0.3045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29806143045425415, + "step": 2180, + "valid_targets_mean": 3245.2, + "valid_targets_min": 1284 + }, + { + "epoch": 3.6538461538461537, + "grad_norm": 0.7223387295053122, + "learning_rate": 2.197331053824939e-05, + "loss": 0.2912, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2715410888195038, + "step": 2185, + "valid_targets_mean": 3403.3, + "valid_targets_min": 1091 + }, + { + "epoch": 3.6622073578595318, + "grad_norm": 0.7923524210124755, + "learning_rate": 2.1890302814991075e-05, + "loss": 0.293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27055805921554565, + "step": 2190, + "valid_targets_mean": 2505.2, + "valid_targets_min": 808 + }, + { + "epoch": 3.67056856187291, + "grad_norm": 0.9333927510801652, + "learning_rate": 2.1807262223279633e-05, + "loss": 0.2979, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33472126722335815, + "step": 2195, + "valid_targets_mean": 2667.6, + "valid_targets_min": 1058 + }, + { + "epoch": 3.678929765886288, + "grad_norm": 0.7418078632890535, + "learning_rate": 2.172419020701907e-05, + "loss": 0.3113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2958923280239105, + "step": 2200, + "valid_targets_mean": 3098.1, + "valid_targets_min": 663 + }, + { + "epoch": 3.6872909698996654, + "grad_norm": 0.8027987445224076, + "learning_rate": 2.1641088210659804e-05, + "loss": 0.3027, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.294519305229187, + "step": 2205, + "valid_targets_mean": 2970.6, + "valid_targets_min": 594 + }, + { + "epoch": 3.6956521739130435, + "grad_norm": 0.8206281052811005, + "learning_rate": 2.155795767917352e-05, + "loss": 0.2961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31719639897346497, + "step": 2210, + "valid_targets_mean": 3145.6, + "valid_targets_min": 1123 + }, + { + "epoch": 3.7040133779264215, + "grad_norm": 0.7960144879970568, + "learning_rate": 2.14748000580281e-05, + "loss": 0.3162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3015314042568207, + "step": 2215, + "valid_targets_mean": 2424.9, + "valid_targets_min": 1020 + }, + { + "epoch": 3.712374581939799, + "grad_norm": 0.7242043132987579, + "learning_rate": 2.1391616793162435e-05, + "loss": 0.319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29711389541625977, + "step": 2220, + "valid_targets_mean": 3244.1, + "valid_targets_min": 599 + }, + { + "epoch": 3.720735785953177, + "grad_norm": 0.8514530826111597, + "learning_rate": 2.1308409330961308e-05, + "loss": 0.309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3484026789665222, + "step": 2225, + "valid_targets_mean": 3235.6, + "valid_targets_min": 509 + }, + { + "epoch": 3.729096989966555, + "grad_norm": 0.7964054451384952, + "learning_rate": 2.122517911823027e-05, + "loss": 0.3255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2610142230987549, + "step": 2230, + "valid_targets_mean": 2320.4, + "valid_targets_min": 779 + }, + { + "epoch": 3.737458193979933, + "grad_norm": 0.8055361034248192, + "learning_rate": 2.114192760217042e-05, + "loss": 0.2997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3181573450565338, + "step": 2235, + "valid_targets_mean": 3128.3, + "valid_targets_min": 799 + }, + { + "epoch": 3.745819397993311, + "grad_norm": 0.7412318158314767, + "learning_rate": 2.10586562303533e-05, + "loss": 0.2753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2673335671424866, + "step": 2240, + "valid_targets_mean": 2812.3, + "valid_targets_min": 511 + }, + { + "epoch": 3.754180602006689, + "grad_norm": 0.806735595661349, + "learning_rate": 2.0975366450695707e-05, + "loss": 0.2901, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34919291734695435, + "step": 2245, + "valid_targets_mean": 2936.5, + "valid_targets_min": 723 + }, + { + "epoch": 3.762541806020067, + "grad_norm": 0.7799986710841482, + "learning_rate": 2.0892059711434496e-05, + "loss": 0.3019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30157431960105896, + "step": 2250, + "valid_targets_mean": 2891.5, + "valid_targets_min": 937 + }, + { + "epoch": 3.770903010033445, + "grad_norm": 0.6961704800642899, + "learning_rate": 2.0808737461101417e-05, + "loss": 0.2889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2948909401893616, + "step": 2255, + "valid_targets_mean": 3901.8, + "valid_targets_min": 511 + }, + { + "epoch": 3.779264214046823, + "grad_norm": 0.6430882576013616, + "learning_rate": 2.0725401148497946e-05, + "loss": 0.3453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36942294239997864, + "step": 2260, + "valid_targets_mean": 4432.6, + "valid_targets_min": 2022 + }, + { + "epoch": 3.787625418060201, + "grad_norm": 0.9476578148171562, + "learning_rate": 2.0642052222670043e-05, + "loss": 0.3016, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34764429926872253, + "step": 2265, + "valid_targets_mean": 2577.2, + "valid_targets_min": 586 + }, + { + "epoch": 3.7959866220735785, + "grad_norm": 0.8377190542394216, + "learning_rate": 2.0558692132883008e-05, + "loss": 0.31, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3148440420627594, + "step": 2270, + "valid_targets_mean": 2928.6, + "valid_targets_min": 1524 + }, + { + "epoch": 3.8043478260869565, + "grad_norm": 0.7674746381928171, + "learning_rate": 2.047532232859625e-05, + "loss": 0.3163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2882377505302429, + "step": 2275, + "valid_targets_mean": 3211.1, + "valid_targets_min": 666 + }, + { + "epoch": 3.8127090301003346, + "grad_norm": 0.7603539777572138, + "learning_rate": 2.039194425943808e-05, + "loss": 0.3297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28924140334129333, + "step": 2280, + "valid_targets_mean": 2899.9, + "valid_targets_min": 1333 + }, + { + "epoch": 3.821070234113712, + "grad_norm": 0.6686475071499299, + "learning_rate": 2.0308559375180557e-05, + "loss": 0.274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22952501475811005, + "step": 2285, + "valid_targets_mean": 2880.6, + "valid_targets_min": 1364 + }, + { + "epoch": 3.82943143812709, + "grad_norm": 0.9368254198322394, + "learning_rate": 2.0225169125714193e-05, + "loss": 0.3089, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3129611909389496, + "step": 2290, + "valid_targets_mean": 3224.8, + "valid_targets_min": 1399 + }, + { + "epoch": 3.8377926421404682, + "grad_norm": 0.8093927362873512, + "learning_rate": 2.0141774961022826e-05, + "loss": 0.2892, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30943459272384644, + "step": 2295, + "valid_targets_mean": 2797.4, + "valid_targets_min": 1188 + }, + { + "epoch": 3.8461538461538463, + "grad_norm": 0.8283893332898696, + "learning_rate": 2.0058378331158357e-05, + "loss": 0.2882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3456169366836548, + "step": 2300, + "valid_targets_mean": 3146.9, + "valid_targets_min": 983 + }, + { + "epoch": 3.8545150501672243, + "grad_norm": 0.8137074950993536, + "learning_rate": 1.9974980686215546e-05, + "loss": 0.2795, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24720898270606995, + "step": 2305, + "valid_targets_mean": 2561.4, + "valid_targets_min": 1072 + }, + { + "epoch": 3.862876254180602, + "grad_norm": 0.7732084599764113, + "learning_rate": 1.9891583476306814e-05, + "loss": 0.327, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31134891510009766, + "step": 2310, + "valid_targets_mean": 2994.1, + "valid_targets_min": 377 + }, + { + "epoch": 3.87123745819398, + "grad_norm": 0.7919123584972602, + "learning_rate": 1.9808188151537008e-05, + "loss": 0.2996, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25116395950317383, + "step": 2315, + "valid_targets_mean": 2886.5, + "valid_targets_min": 1120 + }, + { + "epoch": 3.879598662207358, + "grad_norm": 0.7406481098660109, + "learning_rate": 1.972479616197821e-05, + "loss": 0.3043, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27166545391082764, + "step": 2320, + "valid_targets_mean": 3462.1, + "valid_targets_min": 1420 + }, + { + "epoch": 3.8879598662207355, + "grad_norm": 0.8731416317069042, + "learning_rate": 1.96414089576445e-05, + "loss": 0.2973, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35053551197052, + "step": 2325, + "valid_targets_mean": 2947.4, + "valid_targets_min": 918 + }, + { + "epoch": 3.8963210702341136, + "grad_norm": 0.7355424472980551, + "learning_rate": 1.9558027988466743e-05, + "loss": 0.3077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31646502017974854, + "step": 2330, + "valid_targets_mean": 3953.5, + "valid_targets_min": 1805 + }, + { + "epoch": 3.9046822742474916, + "grad_norm": 0.7933779157508104, + "learning_rate": 1.947465470426741e-05, + "loss": 0.3036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2982478141784668, + "step": 2335, + "valid_targets_mean": 3171.0, + "valid_targets_min": 587 + }, + { + "epoch": 3.9130434782608696, + "grad_norm": 0.8102903523307262, + "learning_rate": 1.9391290554735326e-05, + "loss": 0.3092, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3341084122657776, + "step": 2340, + "valid_targets_mean": 2779.8, + "valid_targets_min": 919 + }, + { + "epoch": 3.9214046822742477, + "grad_norm": 0.7472511964464619, + "learning_rate": 1.93079369894005e-05, + "loss": 0.3187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30373987555503845, + "step": 2345, + "valid_targets_mean": 3103.1, + "valid_targets_min": 1099 + }, + { + "epoch": 3.9297658862876252, + "grad_norm": 0.7947702151134352, + "learning_rate": 1.922459545760889e-05, + "loss": 0.3175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.363625705242157, + "step": 2350, + "valid_targets_mean": 3638.0, + "valid_targets_min": 1698 + }, + { + "epoch": 3.9381270903010033, + "grad_norm": 0.8068716471695595, + "learning_rate": 1.914126740849723e-05, + "loss": 0.3014, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2704453766345978, + "step": 2355, + "valid_targets_mean": 2755.2, + "valid_targets_min": 1140 + }, + { + "epoch": 3.9464882943143813, + "grad_norm": 0.8320104780946185, + "learning_rate": 1.9057954290967795e-05, + "loss": 0.3297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38438886404037476, + "step": 2360, + "valid_targets_mean": 4250.0, + "valid_targets_min": 1306 + }, + { + "epoch": 3.9548494983277593, + "grad_norm": 0.7539305683119704, + "learning_rate": 1.897465755366325e-05, + "loss": 0.3009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3422359228134155, + "step": 2365, + "valid_targets_mean": 3382.0, + "valid_targets_min": 692 + }, + { + "epoch": 3.9632107023411374, + "grad_norm": 0.7840574192641151, + "learning_rate": 1.8891378644941437e-05, + "loss": 0.2832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31690046191215515, + "step": 2370, + "valid_targets_mean": 3137.6, + "valid_targets_min": 978 + }, + { + "epoch": 3.971571906354515, + "grad_norm": 0.7175563191342146, + "learning_rate": 1.88081190128502e-05, + "loss": 0.2915, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30410125851631165, + "step": 2375, + "valid_targets_mean": 3167.8, + "valid_targets_min": 1180 + }, + { + "epoch": 3.979933110367893, + "grad_norm": 0.7773926485050648, + "learning_rate": 1.8724880105102196e-05, + "loss": 0.2751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2675161361694336, + "step": 2380, + "valid_targets_mean": 3070.1, + "valid_targets_min": 1741 + }, + { + "epoch": 3.988294314381271, + "grad_norm": 0.7560734554908822, + "learning_rate": 1.8641663369049724e-05, + "loss": 0.287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2527911365032196, + "step": 2385, + "valid_targets_mean": 2798.2, + "valid_targets_min": 660 + }, + { + "epoch": 3.9966555183946486, + "grad_norm": 0.9060459955316458, + "learning_rate": 1.8558470251659574e-05, + "loss": 0.2824, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27911093831062317, + "step": 2390, + "valid_targets_mean": 2562.9, + "valid_targets_min": 633 + }, + { + "epoch": 4.005016722408027, + "grad_norm": 0.8594449711299057, + "learning_rate": 1.8475302199487848e-05, + "loss": 0.309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2793564200401306, + "step": 2395, + "valid_targets_mean": 3010.3, + "valid_targets_min": 1511 + }, + { + "epoch": 4.013377926421405, + "grad_norm": 0.8838950374531943, + "learning_rate": 1.8392160658654826e-05, + "loss": 0.2648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3361824154853821, + "step": 2400, + "valid_targets_mean": 3615.7, + "valid_targets_min": 1133 + }, + { + "epoch": 4.021739130434782, + "grad_norm": 0.7437793364772181, + "learning_rate": 1.8309047074819805e-05, + "loss": 0.3187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2967781126499176, + "step": 2405, + "valid_targets_mean": 3316.2, + "valid_targets_min": 508 + }, + { + "epoch": 4.030100334448161, + "grad_norm": 0.8335679273374376, + "learning_rate": 1.822596289315596e-05, + "loss": 0.281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29241418838500977, + "step": 2410, + "valid_targets_mean": 3000.0, + "valid_targets_min": 729 + }, + { + "epoch": 4.038461538461538, + "grad_norm": 0.9450863777551641, + "learning_rate": 1.814290955832523e-05, + "loss": 0.2639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27432459592819214, + "step": 2415, + "valid_targets_mean": 2766.5, + "valid_targets_min": 701 + }, + { + "epoch": 4.046822742474917, + "grad_norm": 0.6892365494886082, + "learning_rate": 1.8059888514453196e-05, + "loss": 0.2743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35358893871307373, + "step": 2420, + "valid_targets_mean": 4687.2, + "valid_targets_min": 707 + }, + { + "epoch": 4.055183946488294, + "grad_norm": 0.8546396818192133, + "learning_rate": 1.7976901205103953e-05, + "loss": 0.2409, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2732149660587311, + "step": 2425, + "valid_targets_mean": 2872.1, + "valid_targets_min": 1565 + }, + { + "epoch": 4.063545150501672, + "grad_norm": 0.9074538701365267, + "learning_rate": 1.789394907325504e-05, + "loss": 0.2829, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27739718556404114, + "step": 2430, + "valid_targets_mean": 2549.9, + "valid_targets_min": 412 + }, + { + "epoch": 4.0719063545150505, + "grad_norm": 0.7010499122198948, + "learning_rate": 1.7811033561272328e-05, + "loss": 0.2805, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28559303283691406, + "step": 2435, + "valid_targets_mean": 4280.4, + "valid_targets_min": 1688 + }, + { + "epoch": 4.080267558528428, + "grad_norm": 0.7783072544646484, + "learning_rate": 1.7728156110884924e-05, + "loss": 0.2541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27712807059288025, + "step": 2440, + "valid_targets_mean": 3133.4, + "valid_targets_min": 474 + }, + { + "epoch": 4.088628762541806, + "grad_norm": 0.8270554308257784, + "learning_rate": 1.7645318163160146e-05, + "loss": 0.3121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2960726022720337, + "step": 2445, + "valid_targets_mean": 3287.0, + "valid_targets_min": 1298 + }, + { + "epoch": 4.096989966555184, + "grad_norm": 0.8001563378250733, + "learning_rate": 1.7562521158478432e-05, + "loss": 0.279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23369301855564117, + "step": 2450, + "valid_targets_mean": 3446.6, + "valid_targets_min": 924 + }, + { + "epoch": 4.105351170568562, + "grad_norm": 0.8436159622655983, + "learning_rate": 1.7479766536508313e-05, + "loss": 0.292, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28451332449913025, + "step": 2455, + "valid_targets_mean": 3320.6, + "valid_targets_min": 1505 + }, + { + "epoch": 4.11371237458194, + "grad_norm": 0.8511630672848115, + "learning_rate": 1.7397055736181366e-05, + "loss": 0.2975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2644152343273163, + "step": 2460, + "valid_targets_mean": 2837.6, + "valid_targets_min": 1265 + }, + { + "epoch": 4.122073578595318, + "grad_norm": 0.8329359927809505, + "learning_rate": 1.7314390195667193e-05, + "loss": 0.2839, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27568697929382324, + "step": 2465, + "valid_targets_mean": 3675.1, + "valid_targets_min": 1439 + }, + { + "epoch": 4.130434782608695, + "grad_norm": 0.8456711766375431, + "learning_rate": 1.723177135234844e-05, + "loss": 0.2939, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2696060538291931, + "step": 2470, + "valid_targets_mean": 3198.6, + "valid_targets_min": 550 + }, + { + "epoch": 4.138795986622074, + "grad_norm": 0.8660274510460868, + "learning_rate": 1.7149200642795765e-05, + "loss": 0.2902, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39124855399131775, + "step": 2475, + "valid_targets_mean": 3618.2, + "valid_targets_min": 1647 + }, + { + "epoch": 4.147157190635451, + "grad_norm": 0.8805366064387657, + "learning_rate": 1.70666795027429e-05, + "loss": 0.2943, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3205997943878174, + "step": 2480, + "valid_targets_mean": 3357.6, + "valid_targets_min": 1361 + }, + { + "epoch": 4.15551839464883, + "grad_norm": 0.8831645923939866, + "learning_rate": 1.6984209367061657e-05, + "loss": 0.2741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3723400831222534, + "step": 2485, + "valid_targets_mean": 4439.7, + "valid_targets_min": 872 + }, + { + "epoch": 4.1638795986622075, + "grad_norm": 0.8033626392233867, + "learning_rate": 1.6901791669736974e-05, + "loss": 0.2835, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23729437589645386, + "step": 2490, + "valid_targets_mean": 3279.5, + "valid_targets_min": 1317 + }, + { + "epoch": 4.172240802675585, + "grad_norm": 0.8529045414447846, + "learning_rate": 1.6819427843842016e-05, + "loss": 0.2724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24402417242527008, + "step": 2495, + "valid_targets_mean": 2829.3, + "valid_targets_min": 971 + }, + { + "epoch": 4.1806020066889635, + "grad_norm": 0.7772728028989128, + "learning_rate": 1.6737119321513224e-05, + "loss": 0.3132, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3176288604736328, + "step": 2500, + "valid_targets_mean": 3672.4, + "valid_targets_min": 965 + }, + { + "epoch": 4.188963210702341, + "grad_norm": 0.8338000934996834, + "learning_rate": 1.6654867533925418e-05, + "loss": 0.2758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2721063494682312, + "step": 2505, + "valid_targets_mean": 3436.9, + "valid_targets_min": 1539 + }, + { + "epoch": 4.197324414715719, + "grad_norm": 0.8267713909712678, + "learning_rate": 1.6572673911266943e-05, + "loss": 0.2976, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28024280071258545, + "step": 2510, + "valid_targets_mean": 3187.4, + "valid_targets_min": 1120 + }, + { + "epoch": 4.205685618729097, + "grad_norm": 0.9157546033073007, + "learning_rate": 1.6490539882714756e-05, + "loss": 0.2999, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2485034167766571, + "step": 2515, + "valid_targets_mean": 2835.7, + "valid_targets_min": 1525 + }, + { + "epoch": 4.214046822742475, + "grad_norm": 0.9249610301165988, + "learning_rate": 1.6408466876409596e-05, + "loss": 0.2725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23990267515182495, + "step": 2520, + "valid_targets_mean": 2692.9, + "valid_targets_min": 490 + }, + { + "epoch": 4.222408026755853, + "grad_norm": 0.8518340628069135, + "learning_rate": 1.6326456319431154e-05, + "loss": 0.2999, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31082072854042053, + "step": 2525, + "valid_targets_mean": 3444.6, + "valid_targets_min": 1569 + }, + { + "epoch": 4.230769230769231, + "grad_norm": 0.9837882931443535, + "learning_rate": 1.6244509637773256e-05, + "loss": 0.2714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29353979229927063, + "step": 2530, + "valid_targets_mean": 3000.7, + "valid_targets_min": 1243 + }, + { + "epoch": 4.239130434782608, + "grad_norm": 0.8579109041612042, + "learning_rate": 1.6162628256319078e-05, + "loss": 0.2849, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2931848168373108, + "step": 2535, + "valid_targets_mean": 2974.6, + "valid_targets_min": 1303 + }, + { + "epoch": 4.247491638795987, + "grad_norm": 0.7372703580470882, + "learning_rate": 1.6080813598816355e-05, + "loss": 0.3212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35204175114631653, + "step": 2540, + "valid_targets_mean": 4623.1, + "valid_targets_min": 1208 + }, + { + "epoch": 4.2558528428093645, + "grad_norm": 0.9167357129141266, + "learning_rate": 1.599906708785262e-05, + "loss": 0.2447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23422318696975708, + "step": 2545, + "valid_targets_mean": 2499.6, + "valid_targets_min": 1144 + }, + { + "epoch": 4.264214046822742, + "grad_norm": 0.8898102183876689, + "learning_rate": 1.5917390144830488e-05, + "loss": 0.2956, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.295245885848999, + "step": 2550, + "valid_targets_mean": 3242.8, + "valid_targets_min": 1323 + }, + { + "epoch": 4.2725752508361206, + "grad_norm": 0.9416423066048137, + "learning_rate": 1.583578418994294e-05, + "loss": 0.2993, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23591409623622894, + "step": 2555, + "valid_targets_mean": 2772.4, + "valid_targets_min": 1460 + }, + { + "epoch": 4.280936454849498, + "grad_norm": 0.8396792312207702, + "learning_rate": 1.5754250642148592e-05, + "loss": 0.2838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2691681385040283, + "step": 2560, + "valid_targets_mean": 3140.4, + "valid_targets_min": 1822 + }, + { + "epoch": 4.289297658862877, + "grad_norm": 0.7552376746174522, + "learning_rate": 1.5672790919147096e-05, + "loss": 0.2648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30901944637298584, + "step": 2565, + "valid_targets_mean": 3934.4, + "valid_targets_min": 1525 + }, + { + "epoch": 4.297658862876254, + "grad_norm": 0.9282959640580136, + "learning_rate": 1.5591406437354394e-05, + "loss": 0.2932, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2631564736366272, + "step": 2570, + "valid_targets_mean": 3684.6, + "valid_targets_min": 603 + }, + { + "epoch": 4.306020066889632, + "grad_norm": 0.8408763401027246, + "learning_rate": 1.5510098611878177e-05, + "loss": 0.2522, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1855039745569229, + "step": 2575, + "valid_targets_mean": 2278.2, + "valid_targets_min": 685 + }, + { + "epoch": 4.31438127090301, + "grad_norm": 0.9661559358244203, + "learning_rate": 1.542886885649322e-05, + "loss": 0.2875, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20213311910629272, + "step": 2580, + "valid_targets_mean": 2565.0, + "valid_targets_min": 1622 + }, + { + "epoch": 4.322742474916388, + "grad_norm": 0.8691049252102782, + "learning_rate": 1.534771858361683e-05, + "loss": 0.2705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2871206998825073, + "step": 2585, + "valid_targets_mean": 3261.2, + "valid_targets_min": 757 + }, + { + "epoch": 4.331103678929766, + "grad_norm": 0.8392260775398249, + "learning_rate": 1.5266649204284273e-05, + "loss": 0.2793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2738720774650574, + "step": 2590, + "valid_targets_mean": 3433.5, + "valid_targets_min": 1852 + }, + { + "epoch": 4.339464882943144, + "grad_norm": 0.8840473177758481, + "learning_rate": 1.5185662128124254e-05, + "loss": 0.2594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2708375155925751, + "step": 2595, + "valid_targets_mean": 3278.4, + "valid_targets_min": 399 + }, + { + "epoch": 4.3478260869565215, + "grad_norm": 0.8804892196325519, + "learning_rate": 1.510475876333438e-05, + "loss": 0.2994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23639748990535736, + "step": 2600, + "valid_targets_mean": 2654.6, + "valid_targets_min": 528 + }, + { + "epoch": 4.3561872909699, + "grad_norm": 0.8094688600381953, + "learning_rate": 1.5023940516656697e-05, + "loss": 0.2935, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2887652516365051, + "step": 2605, + "valid_targets_mean": 3643.9, + "valid_targets_min": 2109 + }, + { + "epoch": 4.364548494983278, + "grad_norm": 0.9244051200125512, + "learning_rate": 1.4943208793353235e-05, + "loss": 0.2653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2914060056209564, + "step": 2610, + "valid_targets_mean": 3336.8, + "valid_targets_min": 550 + }, + { + "epoch": 4.372909698996655, + "grad_norm": 0.7461063611424056, + "learning_rate": 1.4862564997181528e-05, + "loss": 0.2765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36067837476730347, + "step": 2615, + "valid_targets_mean": 5280.1, + "valid_targets_min": 1147 + }, + { + "epoch": 4.381270903010034, + "grad_norm": 0.9652367742763335, + "learning_rate": 1.4782010530370294e-05, + "loss": 0.2636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25113189220428467, + "step": 2620, + "valid_targets_mean": 2949.8, + "valid_targets_min": 1399 + }, + { + "epoch": 4.389632107023411, + "grad_norm": 0.8992134459024913, + "learning_rate": 1.470154679359495e-05, + "loss": 0.2412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24360308051109314, + "step": 2625, + "valid_targets_mean": 2608.5, + "valid_targets_min": 935 + }, + { + "epoch": 4.39799331103679, + "grad_norm": 0.8220794903907007, + "learning_rate": 1.4621175185953322e-05, + "loss": 0.277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3220457136631012, + "step": 2630, + "valid_targets_mean": 4098.1, + "valid_targets_min": 403 + }, + { + "epoch": 4.406354515050167, + "grad_norm": 0.8521683178016516, + "learning_rate": 1.4540897104941307e-05, + "loss": 0.2753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3359491527080536, + "step": 2635, + "valid_targets_mean": 3451.1, + "valid_targets_min": 794 + }, + { + "epoch": 4.414715719063545, + "grad_norm": 0.8796923872304094, + "learning_rate": 1.4460713946428553e-05, + "loss": 0.2772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24371229112148285, + "step": 2640, + "valid_targets_mean": 2400.8, + "valid_targets_min": 1000 + }, + { + "epoch": 4.423076923076923, + "grad_norm": 1.0131860235742853, + "learning_rate": 1.4380627104634224e-05, + "loss": 0.2554, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3007010817527771, + "step": 2645, + "valid_targets_mean": 2731.2, + "valid_targets_min": 1006 + }, + { + "epoch": 4.431438127090301, + "grad_norm": 0.7392458747612428, + "learning_rate": 1.4300637972102721e-05, + "loss": 0.2702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3089587688446045, + "step": 2650, + "valid_targets_mean": 4641.4, + "valid_targets_min": 1954 + }, + { + "epoch": 4.4397993311036785, + "grad_norm": 0.8818880175089461, + "learning_rate": 1.4220747939679478e-05, + "loss": 0.2579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2619357407093048, + "step": 2655, + "valid_targets_mean": 3080.8, + "valid_targets_min": 971 + }, + { + "epoch": 4.448160535117057, + "grad_norm": 0.9056508130411561, + "learning_rate": 1.414095839648679e-05, + "loss": 0.2827, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25068965554237366, + "step": 2660, + "valid_targets_mean": 2617.4, + "valid_targets_min": 1481 + }, + { + "epoch": 4.456521739130435, + "grad_norm": 0.8935220051791342, + "learning_rate": 1.4061270729899663e-05, + "loss": 0.2726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26286935806274414, + "step": 2665, + "valid_targets_mean": 2883.9, + "valid_targets_min": 928 + }, + { + "epoch": 4.464882943143813, + "grad_norm": 0.8961547416267401, + "learning_rate": 1.3981686325521647e-05, + "loss": 0.2768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29072946310043335, + "step": 2670, + "valid_targets_mean": 2993.4, + "valid_targets_min": 1894 + }, + { + "epoch": 4.473244147157191, + "grad_norm": 0.9599671002081342, + "learning_rate": 1.3902206567160827e-05, + "loss": 0.2455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28398674726486206, + "step": 2675, + "valid_targets_mean": 2923.8, + "valid_targets_min": 534 + }, + { + "epoch": 4.481605351170568, + "grad_norm": 0.8341406674593823, + "learning_rate": 1.3822832836805667e-05, + "loss": 0.3141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3429492115974426, + "step": 2680, + "valid_targets_mean": 3562.8, + "valid_targets_min": 949 + }, + { + "epoch": 4.489966555183947, + "grad_norm": 0.8660387274121687, + "learning_rate": 1.3743566514601037e-05, + "loss": 0.2615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28438010811805725, + "step": 2685, + "valid_targets_mean": 3383.6, + "valid_targets_min": 1276 + }, + { + "epoch": 4.498327759197324, + "grad_norm": 0.8240982742485928, + "learning_rate": 1.3664408978824209e-05, + "loss": 0.2648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2728864550590515, + "step": 2690, + "valid_targets_mean": 3949.8, + "valid_targets_min": 1505 + }, + { + "epoch": 4.506688963210703, + "grad_norm": 0.9047049810271199, + "learning_rate": 1.3585361605860863e-05, + "loss": 0.2831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2605753540992737, + "step": 2695, + "valid_targets_mean": 2817.6, + "valid_targets_min": 952 + }, + { + "epoch": 4.51505016722408, + "grad_norm": 0.8397078739499565, + "learning_rate": 1.3506425770181211e-05, + "loss": 0.2871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24365462362766266, + "step": 2700, + "valid_targets_mean": 2901.8, + "valid_targets_min": 2041 + }, + { + "epoch": 4.523411371237458, + "grad_norm": 0.8582678085193816, + "learning_rate": 1.342760284431603e-05, + "loss": 0.3192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2986323833465576, + "step": 2705, + "valid_targets_mean": 3100.2, + "valid_targets_min": 1217 + }, + { + "epoch": 4.531772575250836, + "grad_norm": 0.7889072695692275, + "learning_rate": 1.3348894198832845e-05, + "loss": 0.2708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26328298449516296, + "step": 2710, + "valid_targets_mean": 3108.0, + "valid_targets_min": 967 + }, + { + "epoch": 4.540133779264214, + "grad_norm": 0.742736182328933, + "learning_rate": 1.3270301202312075e-05, + "loss": 0.2846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37916791439056396, + "step": 2715, + "valid_targets_mean": 5153.8, + "valid_targets_min": 1141 + }, + { + "epoch": 4.548494983277592, + "grad_norm": 0.9919759799813364, + "learning_rate": 1.3191825221323246e-05, + "loss": 0.2655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30372777581214905, + "step": 2720, + "valid_targets_mean": 2784.2, + "valid_targets_min": 1361 + }, + { + "epoch": 4.55685618729097, + "grad_norm": 0.794724468575154, + "learning_rate": 1.311346762040123e-05, + "loss": 0.276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2651178538799286, + "step": 2725, + "valid_targets_mean": 3715.1, + "valid_targets_min": 983 + }, + { + "epoch": 4.565217391304348, + "grad_norm": 0.9806561211664588, + "learning_rate": 1.3035229762022513e-05, + "loss": 0.2577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2600407600402832, + "step": 2730, + "valid_targets_mean": 2778.1, + "valid_targets_min": 520 + }, + { + "epoch": 4.573578595317725, + "grad_norm": 0.8400790069869569, + "learning_rate": 1.2957113006581494e-05, + "loss": 0.2809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30648618936538696, + "step": 2735, + "valid_targets_mean": 3071.6, + "valid_targets_min": 1115 + }, + { + "epoch": 4.581939799331104, + "grad_norm": 0.912124026132302, + "learning_rate": 1.2879118712366858e-05, + "loss": 0.2922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3389705717563629, + "step": 2740, + "valid_targets_mean": 3479.6, + "valid_targets_min": 1737 + }, + { + "epoch": 4.590301003344481, + "grad_norm": 0.7136031625832305, + "learning_rate": 1.280124823553794e-05, + "loss": 0.2648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26324355602264404, + "step": 2745, + "valid_targets_mean": 4402.9, + "valid_targets_min": 1958 + }, + { + "epoch": 4.59866220735786, + "grad_norm": 0.8876231778745343, + "learning_rate": 1.2723502930101126e-05, + "loss": 0.3051, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3144656717777252, + "step": 2750, + "valid_targets_mean": 4292.6, + "valid_targets_min": 568 + }, + { + "epoch": 4.607023411371237, + "grad_norm": 0.9123329157042492, + "learning_rate": 1.2645884147886376e-05, + "loss": 0.2452, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22034093737602234, + "step": 2755, + "valid_targets_mean": 3341.9, + "valid_targets_min": 998 + }, + { + "epoch": 4.615384615384615, + "grad_norm": 0.8627497975800223, + "learning_rate": 1.2568393238523627e-05, + "loss": 0.2905, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28283166885375977, + "step": 2760, + "valid_targets_mean": 3059.2, + "valid_targets_min": 1435 + }, + { + "epoch": 4.6237458193979935, + "grad_norm": 0.8879285250035446, + "learning_rate": 1.2491031549419396e-05, + "loss": 0.2594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28905028104782104, + "step": 2765, + "valid_targets_mean": 3173.9, + "valid_targets_min": 1069 + }, + { + "epoch": 4.632107023411371, + "grad_norm": 0.7869539440700758, + "learning_rate": 1.2413800425733324e-05, + "loss": 0.2637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23459485173225403, + "step": 2770, + "valid_targets_mean": 3455.7, + "valid_targets_min": 1705 + }, + { + "epoch": 4.6404682274247495, + "grad_norm": 0.874081153668911, + "learning_rate": 1.2336701210354774e-05, + "loss": 0.2598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26058483123779297, + "step": 2775, + "valid_targets_mean": 3799.4, + "valid_targets_min": 2140 + }, + { + "epoch": 4.648829431438127, + "grad_norm": 0.8505418138060274, + "learning_rate": 1.2259735243879533e-05, + "loss": 0.271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2382025420665741, + "step": 2780, + "valid_targets_mean": 2949.2, + "valid_targets_min": 1662 + }, + { + "epoch": 4.657190635451505, + "grad_norm": 0.8408719538127842, + "learning_rate": 1.2182903864586424e-05, + "loss": 0.311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3271663188934326, + "step": 2785, + "valid_targets_mean": 3384.8, + "valid_targets_min": 1772 + }, + { + "epoch": 4.665551839464883, + "grad_norm": 0.7906487246754133, + "learning_rate": 1.2106208408414101e-05, + "loss": 0.261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23760370910167694, + "step": 2790, + "valid_targets_mean": 3003.1, + "valid_targets_min": 638 + }, + { + "epoch": 4.673913043478261, + "grad_norm": 0.7560469434433705, + "learning_rate": 1.202965020893779e-05, + "loss": 0.2795, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3065562844276428, + "step": 2795, + "valid_targets_mean": 4305.3, + "valid_targets_min": 1436 + }, + { + "epoch": 4.682274247491639, + "grad_norm": 0.931404543547185, + "learning_rate": 1.1953230597346116e-05, + "loss": 0.2814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30321380496025085, + "step": 2800, + "valid_targets_mean": 3184.8, + "valid_targets_min": 1215 + }, + { + "epoch": 4.690635451505017, + "grad_norm": 0.8785086713293649, + "learning_rate": 1.1876950902417921e-05, + "loss": 0.311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2660791277885437, + "step": 2805, + "valid_targets_mean": 2884.8, + "valid_targets_min": 991 + }, + { + "epoch": 4.698996655518394, + "grad_norm": 0.8658447278693211, + "learning_rate": 1.1800812450499227e-05, + "loss": 0.2447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24090614914894104, + "step": 2810, + "valid_targets_mean": 2715.7, + "valid_targets_min": 764 + }, + { + "epoch": 4.707357859531773, + "grad_norm": 1.0374938153628415, + "learning_rate": 1.1724816565480092e-05, + "loss": 0.2597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20534390211105347, + "step": 2815, + "valid_targets_mean": 2147.6, + "valid_targets_min": 401 + }, + { + "epoch": 4.7157190635451505, + "grad_norm": 0.7920774633218928, + "learning_rate": 1.1648964568771661e-05, + "loss": 0.2632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18782053887844086, + "step": 2820, + "valid_targets_mean": 3404.8, + "valid_targets_min": 2009 + }, + { + "epoch": 4.724080267558528, + "grad_norm": 0.8633417845835815, + "learning_rate": 1.157325777928314e-05, + "loss": 0.2725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30495747923851013, + "step": 2825, + "valid_targets_mean": 3513.8, + "valid_targets_min": 1059 + }, + { + "epoch": 4.7324414715719065, + "grad_norm": 0.9344722457587161, + "learning_rate": 1.149769751339889e-05, + "loss": 0.2654, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2682046592235565, + "step": 2830, + "valid_targets_mean": 3535.4, + "valid_targets_min": 1574 + }, + { + "epoch": 4.740802675585284, + "grad_norm": 0.9085942238263078, + "learning_rate": 1.142228508495553e-05, + "loss": 0.2743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27757948637008667, + "step": 2835, + "valid_targets_mean": 2801.4, + "valid_targets_min": 1637 + }, + { + "epoch": 4.749163879598662, + "grad_norm": 0.9016528116196614, + "learning_rate": 1.1347021805219092e-05, + "loss": 0.281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24009665846824646, + "step": 2840, + "valid_targets_mean": 2991.9, + "valid_targets_min": 962 + }, + { + "epoch": 4.75752508361204, + "grad_norm": 0.8216538528179507, + "learning_rate": 1.1271908982862214e-05, + "loss": 0.2614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2618323862552643, + "step": 2845, + "valid_targets_mean": 4103.2, + "valid_targets_min": 1949 + }, + { + "epoch": 4.765886287625418, + "grad_norm": 1.0231544478271972, + "learning_rate": 1.11969479239414e-05, + "loss": 0.2577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30647069215774536, + "step": 2850, + "valid_targets_mean": 2558.7, + "valid_targets_min": 364 + }, + { + "epoch": 4.774247491638796, + "grad_norm": 2.596885204828478, + "learning_rate": 1.1122139931874303e-05, + "loss": 0.3129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3939549922943115, + "step": 2855, + "valid_targets_mean": 4341.0, + "valid_targets_min": 821 + }, + { + "epoch": 4.782608695652174, + "grad_norm": 0.9290067746793299, + "learning_rate": 1.104748630741705e-05, + "loss": 0.3034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30313026905059814, + "step": 2860, + "valid_targets_mean": 2781.0, + "valid_targets_min": 392 + }, + { + "epoch": 4.790969899665551, + "grad_norm": 1.7683165858974776, + "learning_rate": 1.0972988348641643e-05, + "loss": 0.2548, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22824634611606598, + "step": 2865, + "valid_targets_mean": 2794.6, + "valid_targets_min": 721 + }, + { + "epoch": 4.79933110367893, + "grad_norm": 0.8612735757395678, + "learning_rate": 1.0898647350913376e-05, + "loss": 0.2637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24694600701332092, + "step": 2870, + "valid_targets_mean": 3144.4, + "valid_targets_min": 1359 + }, + { + "epoch": 4.8076923076923075, + "grad_norm": 0.883437344029108, + "learning_rate": 1.0824464606868323e-05, + "loss": 0.2571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24586498737335205, + "step": 2875, + "valid_targets_mean": 3431.4, + "valid_targets_min": 1257 + }, + { + "epoch": 4.816053511705686, + "grad_norm": 1.0330995770490803, + "learning_rate": 1.0750441406390841e-05, + "loss": 0.2459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2311995029449463, + "step": 2880, + "valid_targets_mean": 2488.1, + "valid_targets_min": 1242 + }, + { + "epoch": 4.8244147157190636, + "grad_norm": 0.958681058026221, + "learning_rate": 1.0676579036591167e-05, + "loss": 0.2742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2965131998062134, + "step": 2885, + "valid_targets_mean": 3851.2, + "valid_targets_min": 1182 + }, + { + "epoch": 4.832775919732441, + "grad_norm": 0.788119350116846, + "learning_rate": 1.0602878781783019e-05, + "loss": 0.2932, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.328529953956604, + "step": 2890, + "valid_targets_mean": 3779.6, + "valid_targets_min": 1456 + }, + { + "epoch": 4.84113712374582, + "grad_norm": 0.8696431488480126, + "learning_rate": 1.0529341923461272e-05, + "loss": 0.2744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26618334650993347, + "step": 2895, + "valid_targets_mean": 3219.2, + "valid_targets_min": 1480 + }, + { + "epoch": 4.849498327759197, + "grad_norm": 0.9950302873697183, + "learning_rate": 1.0455969740279675e-05, + "loss": 0.271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28443658351898193, + "step": 2900, + "valid_targets_mean": 2625.2, + "valid_targets_min": 999 + }, + { + "epoch": 4.857859531772576, + "grad_norm": 0.9363541770349809, + "learning_rate": 1.0382763508028615e-05, + "loss": 0.256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23824138939380646, + "step": 2905, + "valid_targets_mean": 2914.6, + "valid_targets_min": 580 + }, + { + "epoch": 4.866220735785953, + "grad_norm": 0.9212980221737899, + "learning_rate": 1.0309724499612939e-05, + "loss": 0.2566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30664288997650146, + "step": 2910, + "valid_targets_mean": 3167.0, + "valid_targets_min": 1422 + }, + { + "epoch": 4.874581939799331, + "grad_norm": 0.9658354985211547, + "learning_rate": 1.0236853985029815e-05, + "loss": 0.2841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2978072762489319, + "step": 2915, + "valid_targets_mean": 4677.4, + "valid_targets_min": 1257 + }, + { + "epoch": 4.882943143812709, + "grad_norm": 0.8858319243242195, + "learning_rate": 1.0164153231346656e-05, + "loss": 0.2423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24526861310005188, + "step": 2920, + "valid_targets_mean": 3126.1, + "valid_targets_min": 880 + }, + { + "epoch": 4.891304347826087, + "grad_norm": 1.0526204864364126, + "learning_rate": 1.0091623502679075e-05, + "loss": 0.3037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3539399802684784, + "step": 2925, + "valid_targets_mean": 3289.9, + "valid_targets_min": 1212 + }, + { + "epoch": 4.8996655518394645, + "grad_norm": 0.885379966379974, + "learning_rate": 1.0019266060168929e-05, + "loss": 0.2934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30143141746520996, + "step": 2930, + "valid_targets_mean": 3651.1, + "valid_targets_min": 1477 + }, + { + "epoch": 4.908026755852843, + "grad_norm": 1.1625828671998966, + "learning_rate": 9.947082161962363e-06, + "loss": 0.248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24516814947128296, + "step": 2935, + "valid_targets_mean": 2769.2, + "valid_targets_min": 569 + }, + { + "epoch": 4.916387959866221, + "grad_norm": 0.8652972290357003, + "learning_rate": 9.875073063187947e-06, + "loss": 0.2476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26153671741485596, + "step": 2940, + "valid_targets_mean": 3214.2, + "valid_targets_min": 1062 + }, + { + "epoch": 4.924749163879599, + "grad_norm": 0.9078402224774373, + "learning_rate": 9.803240015934859e-06, + "loss": 0.3063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2811887264251709, + "step": 2945, + "valid_targets_mean": 3040.2, + "valid_targets_min": 918 + }, + { + "epoch": 4.933110367892977, + "grad_norm": 0.855619293705552, + "learning_rate": 9.731584269231094e-06, + "loss": 0.2411, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24628743529319763, + "step": 2950, + "valid_targets_mean": 3259.8, + "valid_targets_min": 1456 + }, + { + "epoch": 4.941471571906354, + "grad_norm": 0.9213152242653578, + "learning_rate": 9.660107069021767e-06, + "loss": 0.2783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33864572644233704, + "step": 2955, + "valid_targets_mean": 3462.7, + "valid_targets_min": 527 + }, + { + "epoch": 4.949832775919733, + "grad_norm": 0.8302602925046146, + "learning_rate": 9.588809658147433e-06, + "loss": 0.2748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28112542629241943, + "step": 2960, + "valid_targets_mean": 3587.3, + "valid_targets_min": 1509 + }, + { + "epoch": 4.95819397993311, + "grad_norm": 0.9191160601073972, + "learning_rate": 9.517693276322488e-06, + "loss": 0.2888, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26543906331062317, + "step": 2965, + "valid_targets_mean": 2759.7, + "valid_targets_min": 520 + }, + { + "epoch": 4.966555183946488, + "grad_norm": 0.9353550508600107, + "learning_rate": 9.446759160113602e-06, + "loss": 0.2826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2775583863258362, + "step": 2970, + "valid_targets_mean": 2901.1, + "valid_targets_min": 1694 + }, + { + "epoch": 4.974916387959866, + "grad_norm": 0.9139877304453574, + "learning_rate": 9.376008542918227e-06, + "loss": 0.2281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19975979626178741, + "step": 2975, + "valid_targets_mean": 2663.2, + "valid_targets_min": 1166 + }, + { + "epoch": 4.983277591973244, + "grad_norm": 0.922883713258425, + "learning_rate": 9.305442654943145e-06, + "loss": 0.2498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2600483000278473, + "step": 2980, + "valid_targets_mean": 2795.2, + "valid_targets_min": 1070 + }, + { + "epoch": 4.991638795986622, + "grad_norm": 0.9322875420888264, + "learning_rate": 9.235062723183076e-06, + "loss": 0.2583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2565094232559204, + "step": 2985, + "valid_targets_mean": 2820.6, + "valid_targets_min": 1321 + }, + { + "epoch": 5.0, + "grad_norm": 0.7643560224911901, + "learning_rate": 9.164869971399359e-06, + "loss": 0.2743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3641391396522522, + "step": 2990, + "valid_targets_mean": 4868.6, + "valid_targets_min": 1329 + }, + { + "epoch": 5.008361204013378, + "grad_norm": 0.8481755315514735, + "learning_rate": 9.094865620098646e-06, + "loss": 0.2614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22899624705314636, + "step": 2995, + "valid_targets_mean": 3174.0, + "valid_targets_min": 1965 + }, + { + "epoch": 5.016722408026756, + "grad_norm": 0.8188076252143365, + "learning_rate": 9.025050886511702e-06, + "loss": 0.2747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3067629039287567, + "step": 3000, + "valid_targets_mean": 4291.9, + "valid_targets_min": 1704 + }, + { + "epoch": 5.025083612040134, + "grad_norm": 0.8029644241788463, + "learning_rate": 8.955426984572228e-06, + "loss": 0.2581, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3037480115890503, + "step": 3005, + "valid_targets_mean": 4106.2, + "valid_targets_min": 973 + }, + { + "epoch": 5.033444816053512, + "grad_norm": 0.8661738149420555, + "learning_rate": 8.885995124895768e-06, + "loss": 0.2402, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2674838900566101, + "step": 3010, + "valid_targets_mean": 3359.9, + "valid_targets_min": 1500 + }, + { + "epoch": 5.04180602006689, + "grad_norm": 0.9620964614380081, + "learning_rate": 8.816756514758634e-06, + "loss": 0.2832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2533591389656067, + "step": 3015, + "valid_targets_mean": 3118.5, + "valid_targets_min": 1356 + }, + { + "epoch": 5.050167224080267, + "grad_norm": 0.7541256752914219, + "learning_rate": 8.747712358076936e-06, + "loss": 0.2311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24237793684005737, + "step": 3020, + "valid_targets_mean": 4191.6, + "valid_targets_min": 1668 + }, + { + "epoch": 5.058528428093646, + "grad_norm": 0.9025605491692672, + "learning_rate": 8.678863855385646e-06, + "loss": 0.2646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33202728629112244, + "step": 3025, + "valid_targets_mean": 4466.5, + "valid_targets_min": 693 + }, + { + "epoch": 5.066889632107023, + "grad_norm": 0.8425388830294137, + "learning_rate": 8.61021220381771e-06, + "loss": 0.242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3084254860877991, + "step": 3030, + "valid_targets_mean": 4306.9, + "valid_targets_min": 968 + }, + { + "epoch": 5.075250836120401, + "grad_norm": 0.8389860612401873, + "learning_rate": 8.54175859708324e-06, + "loss": 0.2322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22211232781410217, + "step": 3035, + "valid_targets_mean": 3588.2, + "valid_targets_min": 880 + }, + { + "epoch": 5.083612040133779, + "grad_norm": 0.9514036120737082, + "learning_rate": 8.473504225448765e-06, + "loss": 0.257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2409243881702423, + "step": 3040, + "valid_targets_mean": 2856.7, + "valid_targets_min": 967 + }, + { + "epoch": 5.091973244147157, + "grad_norm": 0.9560659512366602, + "learning_rate": 8.405450275716525e-06, + "loss": 0.2544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2601991891860962, + "step": 3045, + "valid_targets_mean": 3723.9, + "valid_targets_min": 1569 + }, + { + "epoch": 5.1003344481605355, + "grad_norm": 0.9864743563923629, + "learning_rate": 8.337597931203836e-06, + "loss": 0.239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3188202381134033, + "step": 3050, + "valid_targets_mean": 3242.7, + "valid_targets_min": 1154 + }, + { + "epoch": 5.108695652173913, + "grad_norm": 1.1490912764067835, + "learning_rate": 8.269948371722518e-06, + "loss": 0.2602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21089932322502136, + "step": 3055, + "valid_targets_mean": 2516.8, + "valid_targets_min": 988 + }, + { + "epoch": 5.117056856187291, + "grad_norm": 0.8194749944721579, + "learning_rate": 8.20250277355838e-06, + "loss": 0.2787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3658130168914795, + "step": 3060, + "valid_targets_mean": 4739.9, + "valid_targets_min": 1512 + }, + { + "epoch": 5.125418060200669, + "grad_norm": 0.9728259830553914, + "learning_rate": 8.135262309450764e-06, + "loss": 0.2707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27498018741607666, + "step": 3065, + "valid_targets_mean": 3150.4, + "valid_targets_min": 594 + }, + { + "epoch": 5.133779264214047, + "grad_norm": 1.0179078265962471, + "learning_rate": 8.068228148572157e-06, + "loss": 0.2445, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19528654217720032, + "step": 3070, + "valid_targets_mean": 2590.3, + "valid_targets_min": 513 + }, + { + "epoch": 5.142140468227424, + "grad_norm": 1.0414753424178975, + "learning_rate": 8.001401456507858e-06, + "loss": 0.2298, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24913433194160461, + "step": 3075, + "valid_targets_mean": 2828.6, + "valid_targets_min": 1509 + }, + { + "epoch": 5.150501672240803, + "grad_norm": 0.9673371238498945, + "learning_rate": 7.934783395235716e-06, + "loss": 0.253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2619754672050476, + "step": 3080, + "valid_targets_mean": 3273.0, + "valid_targets_min": 1547 + }, + { + "epoch": 5.15886287625418, + "grad_norm": 1.0777145808692163, + "learning_rate": 7.868375123105921e-06, + "loss": 0.29, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.241988867521286, + "step": 3085, + "valid_targets_mean": 2760.0, + "valid_targets_min": 1115 + }, + { + "epoch": 5.167224080267559, + "grad_norm": 0.9357681236144693, + "learning_rate": 7.802177794820857e-06, + "loss": 0.2851, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4071310758590698, + "step": 3090, + "valid_targets_mean": 3665.1, + "valid_targets_min": 638 + }, + { + "epoch": 5.1755852842809364, + "grad_norm": 0.8810724848053256, + "learning_rate": 7.736192561415045e-06, + "loss": 0.2883, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3404349386692047, + "step": 3095, + "valid_targets_mean": 3941.1, + "valid_targets_min": 799 + }, + { + "epoch": 5.183946488294314, + "grad_norm": 0.9345756552103464, + "learning_rate": 7.670420570235113e-06, + "loss": 0.2718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20666499435901642, + "step": 3100, + "valid_targets_mean": 2741.7, + "valid_targets_min": 660 + }, + { + "epoch": 5.1923076923076925, + "grad_norm": 0.89831218286986, + "learning_rate": 7.604862964919819e-06, + "loss": 0.2531, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26516643166542053, + "step": 3105, + "valid_targets_mean": 3194.3, + "valid_targets_min": 1893 + }, + { + "epoch": 5.20066889632107, + "grad_norm": 1.0235151109065963, + "learning_rate": 7.539520885380242e-06, + "loss": 0.2647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23447315394878387, + "step": 3110, + "valid_targets_mean": 2732.0, + "valid_targets_min": 1261 + }, + { + "epoch": 5.209030100334449, + "grad_norm": 1.0527575176171846, + "learning_rate": 7.474395467779885e-06, + "loss": 0.2386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24746760725975037, + "step": 3115, + "valid_targets_mean": 3079.3, + "valid_targets_min": 1647 + }, + { + "epoch": 5.217391304347826, + "grad_norm": 0.8412479852328654, + "learning_rate": 7.409487844514946e-06, + "loss": 0.2765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2139432430267334, + "step": 3120, + "valid_targets_mean": 3529.4, + "valid_targets_min": 2262 + }, + { + "epoch": 5.225752508361204, + "grad_norm": 0.8859243166629239, + "learning_rate": 7.344799144194647e-06, + "loss": 0.2646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30506086349487305, + "step": 3125, + "valid_targets_mean": 4511.6, + "valid_targets_min": 1709 + }, + { + "epoch": 5.234113712374582, + "grad_norm": 0.928318267325103, + "learning_rate": 7.280330491621579e-06, + "loss": 0.265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25192520022392273, + "step": 3130, + "valid_targets_mean": 3102.3, + "valid_targets_min": 1604 + }, + { + "epoch": 5.24247491638796, + "grad_norm": 0.8514180338596942, + "learning_rate": 7.2160830077721655e-06, + "loss": 0.2806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42115187644958496, + "step": 3135, + "valid_targets_mean": 4393.4, + "valid_targets_min": 2044 + }, + { + "epoch": 5.250836120401337, + "grad_norm": 0.8701754054847802, + "learning_rate": 7.15205780977716e-06, + "loss": 0.2483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23187309503555298, + "step": 3140, + "valid_targets_mean": 3508.4, + "valid_targets_min": 1806 + }, + { + "epoch": 5.259197324414716, + "grad_norm": 1.0414461577784981, + "learning_rate": 7.0882560109022255e-06, + "loss": 0.2487, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2434636354446411, + "step": 3145, + "valid_targets_mean": 2689.4, + "valid_targets_min": 860 + }, + { + "epoch": 5.2675585284280935, + "grad_norm": 0.9992819578511724, + "learning_rate": 7.02467872052858e-06, + "loss": 0.2108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20966418087482452, + "step": 3150, + "valid_targets_mean": 3109.2, + "valid_targets_min": 1471 + }, + { + "epoch": 5.275919732441472, + "grad_norm": 0.9478720751528437, + "learning_rate": 6.9613270441337075e-06, + "loss": 0.2412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22975382208824158, + "step": 3155, + "valid_targets_mean": 3196.7, + "valid_targets_min": 1774 + }, + { + "epoch": 5.2842809364548495, + "grad_norm": 0.8672829908504149, + "learning_rate": 6.8982020832721054e-06, + "loss": 0.2426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22695282101631165, + "step": 3160, + "valid_targets_mean": 3513.6, + "valid_targets_min": 723 + }, + { + "epoch": 5.292642140468227, + "grad_norm": 1.022405240131088, + "learning_rate": 6.835304935556198e-06, + "loss": 0.2525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2423592507839203, + "step": 3165, + "valid_targets_mean": 3192.9, + "valid_targets_min": 1355 + }, + { + "epoch": 5.301003344481606, + "grad_norm": 1.081711172053293, + "learning_rate": 6.772636694637183e-06, + "loss": 0.2441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25607234239578247, + "step": 3170, + "valid_targets_mean": 2742.1, + "valid_targets_min": 1670 + }, + { + "epoch": 5.309364548494983, + "grad_norm": 0.959669265454484, + "learning_rate": 6.710198450186047e-06, + "loss": 0.2657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2445649951696396, + "step": 3175, + "valid_targets_mean": 2531.1, + "valid_targets_min": 741 + }, + { + "epoch": 5.317725752508361, + "grad_norm": 1.0556452222632704, + "learning_rate": 6.6479912878746225e-06, + "loss": 0.2698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32292306423187256, + "step": 3180, + "valid_targets_mean": 3753.7, + "valid_targets_min": 1316 + }, + { + "epoch": 5.326086956521739, + "grad_norm": 1.110335544316918, + "learning_rate": 6.586016289356692e-06, + "loss": 0.2582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20516835153102875, + "step": 3185, + "valid_targets_mean": 2823.1, + "valid_targets_min": 965 + }, + { + "epoch": 5.334448160535117, + "grad_norm": 0.9443496373098136, + "learning_rate": 6.524274532249195e-06, + "loss": 0.2247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2695116698741913, + "step": 3190, + "valid_targets_mean": 2830.7, + "valid_targets_min": 595 + }, + { + "epoch": 5.342809364548495, + "grad_norm": 1.2883459095067469, + "learning_rate": 6.462767090113486e-06, + "loss": 0.2514, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21128371357917786, + "step": 3195, + "valid_targets_mean": 2801.6, + "valid_targets_min": 1972 + }, + { + "epoch": 5.351170568561873, + "grad_norm": 0.7305670443838644, + "learning_rate": 6.401495032436667e-06, + "loss": 0.2613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34377604722976685, + "step": 3200, + "valid_targets_mean": 5868.9, + "valid_targets_min": 1873 + }, + { + "epoch": 5.3595317725752505, + "grad_norm": 0.9618080510519665, + "learning_rate": 6.34045942461299e-06, + "loss": 0.2266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20367145538330078, + "step": 3205, + "valid_targets_mean": 2934.6, + "valid_targets_min": 1274 + }, + { + "epoch": 5.367892976588629, + "grad_norm": 0.9183980494977025, + "learning_rate": 6.279661327925333e-06, + "loss": 0.2591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24812433123588562, + "step": 3210, + "valid_targets_mean": 3023.1, + "valid_targets_min": 1477 + }, + { + "epoch": 5.3762541806020065, + "grad_norm": 0.9596558209841023, + "learning_rate": 6.219101799526753e-06, + "loss": 0.2541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33065247535705566, + "step": 3215, + "valid_targets_mean": 4098.1, + "valid_targets_min": 1177 + }, + { + "epoch": 5.384615384615385, + "grad_norm": 1.065383634447373, + "learning_rate": 6.158781892422085e-06, + "loss": 0.2614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2475639134645462, + "step": 3220, + "valid_targets_mean": 2772.1, + "valid_targets_min": 1252 + }, + { + "epoch": 5.392976588628763, + "grad_norm": 1.044424208460433, + "learning_rate": 6.098702655449664e-06, + "loss": 0.2439, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2278965413570404, + "step": 3225, + "valid_targets_mean": 2502.4, + "valid_targets_min": 721 + }, + { + "epoch": 5.40133779264214, + "grad_norm": 0.9665704851257386, + "learning_rate": 6.038865133263054e-06, + "loss": 0.2448, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31926289200782776, + "step": 3230, + "valid_targets_mean": 3628.5, + "valid_targets_min": 1750 + }, + { + "epoch": 5.409698996655519, + "grad_norm": 0.964435362243508, + "learning_rate": 5.9792703663129125e-06, + "loss": 0.2289, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21948401629924774, + "step": 3235, + "valid_targets_mean": 2733.2, + "valid_targets_min": 484 + }, + { + "epoch": 5.418060200668896, + "grad_norm": 0.9794403625903487, + "learning_rate": 5.919919390828859e-06, + "loss": 0.2556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22015729546546936, + "step": 3240, + "valid_targets_mean": 2922.4, + "valid_targets_min": 1575 + }, + { + "epoch": 5.426421404682274, + "grad_norm": 0.9982047586124657, + "learning_rate": 5.860813238801523e-06, + "loss": 0.2553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21148087084293365, + "step": 3245, + "valid_targets_mean": 3052.9, + "valid_targets_min": 1548 + }, + { + "epoch": 5.434782608695652, + "grad_norm": 0.8716785695051501, + "learning_rate": 5.801952937964537e-06, + "loss": 0.2537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23328334093093872, + "step": 3250, + "valid_targets_mean": 3625.8, + "valid_targets_min": 648 + }, + { + "epoch": 5.44314381270903, + "grad_norm": 1.0317002727776252, + "learning_rate": 5.743339511776693e-06, + "loss": 0.2525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26123955845832825, + "step": 3255, + "valid_targets_mean": 3316.4, + "valid_targets_min": 553 + }, + { + "epoch": 5.451505016722408, + "grad_norm": 0.8933485431020612, + "learning_rate": 5.684973979404144e-06, + "loss": 0.2154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23536235094070435, + "step": 3260, + "valid_targets_mean": 3289.2, + "valid_targets_min": 1411 + }, + { + "epoch": 5.459866220735786, + "grad_norm": 0.9330309774110559, + "learning_rate": 5.6268573557026865e-06, + "loss": 0.274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25734925270080566, + "step": 3265, + "valid_targets_mean": 3473.9, + "valid_targets_min": 1469 + }, + { + "epoch": 5.468227424749164, + "grad_norm": 1.0119464981075554, + "learning_rate": 5.568990651200108e-06, + "loss": 0.2646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2644900977611542, + "step": 3270, + "valid_targets_mean": 3406.8, + "valid_targets_min": 569 + }, + { + "epoch": 5.476588628762542, + "grad_norm": 1.0468454697016878, + "learning_rate": 5.511374872078616e-06, + "loss": 0.2538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29436013102531433, + "step": 3275, + "valid_targets_mean": 3085.1, + "valid_targets_min": 1665 + }, + { + "epoch": 5.48494983277592, + "grad_norm": 0.9588401461605252, + "learning_rate": 5.454011020157348e-06, + "loss": 0.2243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21803942322731018, + "step": 3280, + "valid_targets_mean": 3367.6, + "valid_targets_min": 1776 + }, + { + "epoch": 5.493311036789297, + "grad_norm": 0.8920342971378608, + "learning_rate": 5.396900092874953e-06, + "loss": 0.2525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20749205350875854, + "step": 3285, + "valid_targets_mean": 3192.9, + "valid_targets_min": 1668 + }, + { + "epoch": 5.501672240802676, + "grad_norm": 0.8025673440389024, + "learning_rate": 5.340043083272239e-06, + "loss": 0.2647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25130224227905273, + "step": 3290, + "valid_targets_mean": 3616.1, + "valid_targets_min": 1456 + }, + { + "epoch": 5.510033444816053, + "grad_norm": 1.0290460488107447, + "learning_rate": 5.283440979974901e-06, + "loss": 0.2353, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.255595326423645, + "step": 3295, + "valid_targets_mean": 3070.9, + "valid_targets_min": 858 + }, + { + "epoch": 5.518394648829432, + "grad_norm": 0.9036243917362351, + "learning_rate": 5.227094767176364e-06, + "loss": 0.2535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2398861050605774, + "step": 3300, + "valid_targets_mean": 3360.8, + "valid_targets_min": 1448 + }, + { + "epoch": 5.526755852842809, + "grad_norm": 0.9707296599929639, + "learning_rate": 5.17100542462063e-06, + "loss": 0.2485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24867475032806396, + "step": 3305, + "valid_targets_mean": 3409.8, + "valid_targets_min": 1500 + }, + { + "epoch": 5.535117056856187, + "grad_norm": 1.0059190414101475, + "learning_rate": 5.115173927585264e-06, + "loss": 0.2462, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20245328545570374, + "step": 3310, + "valid_targets_mean": 2579.6, + "valid_targets_min": 965 + }, + { + "epoch": 5.543478260869565, + "grad_norm": 1.150440106449695, + "learning_rate": 5.059601246864438e-06, + "loss": 0.2338, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2849067747592926, + "step": 3315, + "valid_targets_mean": 3054.2, + "valid_targets_min": 1511 + }, + { + "epoch": 5.551839464882943, + "grad_norm": 1.082246481751666, + "learning_rate": 5.004288348752018e-06, + "loss": 0.2211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24570798873901367, + "step": 3320, + "valid_targets_mean": 3292.1, + "valid_targets_min": 1374 + }, + { + "epoch": 5.5602006688963215, + "grad_norm": 0.9677466519631982, + "learning_rate": 4.949236195024825e-06, + "loss": 0.2483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21175232529640198, + "step": 3325, + "valid_targets_mean": 2859.6, + "valid_targets_min": 452 + }, + { + "epoch": 5.568561872909699, + "grad_norm": 0.9670337251548189, + "learning_rate": 4.894445742925853e-06, + "loss": 0.2623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23612819612026215, + "step": 3330, + "valid_targets_mean": 2778.9, + "valid_targets_min": 479 + }, + { + "epoch": 5.576923076923077, + "grad_norm": 0.9399114769292228, + "learning_rate": 4.839917945147647e-06, + "loss": 0.2502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24552688002586365, + "step": 3335, + "valid_targets_mean": 2997.2, + "valid_targets_min": 1116 + }, + { + "epoch": 5.585284280936455, + "grad_norm": 0.8414887814395099, + "learning_rate": 4.785653749815744e-06, + "loss": 0.2561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29480239748954773, + "step": 3340, + "valid_targets_mean": 4310.2, + "valid_targets_min": 1636 + }, + { + "epoch": 5.593645484949833, + "grad_norm": 0.7931418317297482, + "learning_rate": 4.731654100472178e-06, + "loss": 0.2583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3128657341003418, + "step": 3345, + "valid_targets_mean": 4607.0, + "valid_targets_min": 1093 + }, + { + "epoch": 5.602006688963211, + "grad_norm": 0.8840163267997618, + "learning_rate": 4.677919936059064e-06, + "loss": 0.2558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24446120858192444, + "step": 3350, + "valid_targets_mean": 3667.1, + "valid_targets_min": 1631 + }, + { + "epoch": 5.610367892976589, + "grad_norm": 0.9191791952149919, + "learning_rate": 4.624452190902304e-06, + "loss": 0.2307, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2651020884513855, + "step": 3355, + "valid_targets_mean": 3420.4, + "valid_targets_min": 1745 + }, + { + "epoch": 5.618729096989966, + "grad_norm": 0.9355892323378777, + "learning_rate": 4.571251794695308e-06, + "loss": 0.2397, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29256555438041687, + "step": 3360, + "valid_targets_mean": 4212.4, + "valid_targets_min": 1120 + }, + { + "epoch": 5.627090301003345, + "grad_norm": 1.0090759936821445, + "learning_rate": 4.518319672482845e-06, + "loss": 0.2427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2667285203933716, + "step": 3365, + "valid_targets_mean": 2737.7, + "valid_targets_min": 729 + }, + { + "epoch": 5.635451505016722, + "grad_norm": 1.0171835292538327, + "learning_rate": 4.465656744644957e-06, + "loss": 0.2726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30735355615615845, + "step": 3370, + "valid_targets_mean": 3446.8, + "valid_targets_min": 1726 + }, + { + "epoch": 5.6438127090301, + "grad_norm": 0.9819290282194414, + "learning_rate": 4.413263926880935e-06, + "loss": 0.2608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20418116450309753, + "step": 3375, + "valid_targets_mean": 3223.5, + "valid_targets_min": 726 + }, + { + "epoch": 5.6521739130434785, + "grad_norm": 0.9512457595996446, + "learning_rate": 4.3611421301934435e-06, + "loss": 0.2741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2649402320384979, + "step": 3380, + "valid_targets_mean": 2930.5, + "valid_targets_min": 1139 + }, + { + "epoch": 5.660535117056856, + "grad_norm": 0.9658417094141116, + "learning_rate": 4.309292260872633e-06, + "loss": 0.2548, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21503464877605438, + "step": 3385, + "valid_targets_mean": 2954.3, + "valid_targets_min": 1684 + }, + { + "epoch": 5.668896321070234, + "grad_norm": 1.0062110489738594, + "learning_rate": 4.257715220480405e-06, + "loss": 0.2721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2543056011199951, + "step": 3390, + "valid_targets_mean": 3383.0, + "valid_targets_min": 1705 + }, + { + "epoch": 5.677257525083612, + "grad_norm": 1.0054767456103721, + "learning_rate": 4.206411905834733e-06, + "loss": 0.2492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3248503506183624, + "step": 3395, + "valid_targets_mean": 3018.8, + "valid_targets_min": 399 + }, + { + "epoch": 5.68561872909699, + "grad_norm": 1.085787300697935, + "learning_rate": 4.155383208994055e-06, + "loss": 0.2578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24291780591011047, + "step": 3400, + "valid_targets_mean": 3004.7, + "valid_targets_min": 1883 + }, + { + "epoch": 5.693979933110368, + "grad_norm": 1.0345601696467057, + "learning_rate": 4.10463001724178e-06, + "loss": 0.2416, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24932274222373962, + "step": 3405, + "valid_targets_mean": 2949.5, + "valid_targets_min": 1622 + }, + { + "epoch": 5.702341137123746, + "grad_norm": 0.9908894862017844, + "learning_rate": 4.054153213070868e-06, + "loss": 0.2495, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2516738772392273, + "step": 3410, + "valid_targets_mean": 3543.2, + "valid_targets_min": 1348 + }, + { + "epoch": 5.710702341137123, + "grad_norm": 1.0855149671023934, + "learning_rate": 4.003953674168455e-06, + "loss": 0.242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2057594358921051, + "step": 3415, + "valid_targets_mean": 3033.4, + "valid_targets_min": 1026 + }, + { + "epoch": 5.719063545150502, + "grad_norm": 0.9852747879835021, + "learning_rate": 3.954032273400608e-06, + "loss": 0.2406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.266365110874176, + "step": 3420, + "valid_targets_mean": 3131.4, + "valid_targets_min": 1591 + }, + { + "epoch": 5.7274247491638794, + "grad_norm": 0.8893474256351308, + "learning_rate": 3.904389878797159e-06, + "loss": 0.2598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2710364758968353, + "step": 3425, + "valid_targets_mean": 3534.0, + "valid_targets_min": 1096 + }, + { + "epoch": 5.735785953177258, + "grad_norm": 0.8715257939471994, + "learning_rate": 3.85502735353658e-06, + "loss": 0.2492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2547582983970642, + "step": 3430, + "valid_targets_mean": 3161.2, + "valid_targets_min": 1420 + }, + { + "epoch": 5.7441471571906355, + "grad_norm": 0.9676813409941313, + "learning_rate": 3.8059455559310167e-06, + "loss": 0.2423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2646714746952057, + "step": 3435, + "valid_targets_mean": 3319.5, + "valid_targets_min": 1710 + }, + { + "epoch": 5.752508361204013, + "grad_norm": 0.8470578522309716, + "learning_rate": 3.757145339411332e-06, + "loss": 0.229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22734609246253967, + "step": 3440, + "valid_targets_mean": 4302.4, + "valid_targets_min": 667 + }, + { + "epoch": 5.760869565217392, + "grad_norm": 0.853636864130374, + "learning_rate": 3.708627552512276e-06, + "loss": 0.2301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20801186561584473, + "step": 3445, + "valid_targets_mean": 3614.8, + "valid_targets_min": 1135 + }, + { + "epoch": 5.769230769230769, + "grad_norm": 0.8787913423829194, + "learning_rate": 3.660393038857739e-06, + "loss": 0.2541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22825166583061218, + "step": 3450, + "valid_targets_mean": 3887.0, + "valid_targets_min": 1180 + }, + { + "epoch": 5.777591973244148, + "grad_norm": 1.1205253969747466, + "learning_rate": 3.6124426371460542e-06, + "loss": 0.2611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.272126704454422, + "step": 3455, + "valid_targets_mean": 2778.0, + "valid_targets_min": 600 + }, + { + "epoch": 5.785953177257525, + "grad_norm": 1.0498337049709496, + "learning_rate": 3.564777181135466e-06, + "loss": 0.2762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2422400861978531, + "step": 3460, + "valid_targets_mean": 2545.4, + "valid_targets_min": 640 + }, + { + "epoch": 5.794314381270903, + "grad_norm": 1.1993568136310828, + "learning_rate": 3.517397499629589e-06, + "loss": 0.2378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2053757905960083, + "step": 3465, + "valid_targets_mean": 3302.1, + "valid_targets_min": 1127 + }, + { + "epoch": 5.802675585284281, + "grad_norm": 0.843797327600791, + "learning_rate": 3.4703044164630064e-06, + "loss": 0.1986, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2291642725467682, + "step": 3470, + "valid_targets_mean": 4161.9, + "valid_targets_min": 1041 + }, + { + "epoch": 5.811036789297659, + "grad_norm": 0.8755458954346165, + "learning_rate": 3.4234987504869553e-06, + "loss": 0.2483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3180461823940277, + "step": 3475, + "valid_targets_mean": 4406.1, + "valid_targets_min": 2243 + }, + { + "epoch": 5.8193979933110365, + "grad_norm": 1.0301442813101, + "learning_rate": 3.376981315555086e-06, + "loss": 0.2767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27760040760040283, + "step": 3480, + "valid_targets_mean": 2683.6, + "valid_targets_min": 591 + }, + { + "epoch": 5.827759197324415, + "grad_norm": 0.9573217341539905, + "learning_rate": 3.3307529205092903e-06, + "loss": 0.273, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.368574321269989, + "step": 3485, + "valid_targets_mean": 4355.6, + "valid_targets_min": 520 + }, + { + "epoch": 5.8361204013377925, + "grad_norm": 1.029121606843474, + "learning_rate": 3.2848143691656807e-06, + "loss": 0.2549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23345233500003815, + "step": 3490, + "valid_targets_mean": 2955.6, + "valid_targets_min": 1126 + }, + { + "epoch": 5.84448160535117, + "grad_norm": 1.0526837100245612, + "learning_rate": 3.239166460300571e-06, + "loss": 0.2456, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23312462866306305, + "step": 3495, + "valid_targets_mean": 2799.9, + "valid_targets_min": 1173 + }, + { + "epoch": 5.852842809364549, + "grad_norm": 0.9555127758551365, + "learning_rate": 3.1938099876366047e-06, + "loss": 0.2627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2574623227119446, + "step": 3500, + "valid_targets_mean": 2801.6, + "valid_targets_min": 1072 + }, + { + "epoch": 5.861204013377926, + "grad_norm": 1.0434914071493264, + "learning_rate": 3.1487457398289645e-06, + "loss": 0.2476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2559327781200409, + "step": 3505, + "valid_targets_mean": 3459.7, + "valid_targets_min": 788 + }, + { + "epoch": 5.869565217391305, + "grad_norm": 1.012926148789505, + "learning_rate": 3.1039745004516207e-06, + "loss": 0.2417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2865094840526581, + "step": 3510, + "valid_targets_mean": 3056.8, + "valid_targets_min": 696 + }, + { + "epoch": 5.877926421404682, + "grad_norm": 0.9712574615035141, + "learning_rate": 3.0594970479837683e-06, + "loss": 0.2177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21085739135742188, + "step": 3515, + "valid_targets_mean": 2824.9, + "valid_targets_min": 599 + }, + { + "epoch": 5.88628762541806, + "grad_norm": 1.0473259954733953, + "learning_rate": 3.015314155796234e-06, + "loss": 0.2031, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2044457346200943, + "step": 3520, + "valid_targets_mean": 2760.9, + "valid_targets_min": 715 + }, + { + "epoch": 5.894648829431438, + "grad_norm": 0.9843115195011657, + "learning_rate": 2.9714265921380557e-06, + "loss": 0.2371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20010721683502197, + "step": 3525, + "valid_targets_mean": 2771.8, + "valid_targets_min": 963 + }, + { + "epoch": 5.903010033444816, + "grad_norm": 0.8585558006225893, + "learning_rate": 2.927835120123128e-06, + "loss": 0.2512, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3017348051071167, + "step": 3530, + "valid_targets_mean": 3708.9, + "valid_targets_min": 1217 + }, + { + "epoch": 5.911371237458194, + "grad_norm": 1.0365669573233212, + "learning_rate": 2.8845404977169057e-06, + "loss": 0.2316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24910643696784973, + "step": 3535, + "valid_targets_mean": 3256.1, + "valid_targets_min": 1093 + }, + { + "epoch": 5.919732441471572, + "grad_norm": 1.1977783758414982, + "learning_rate": 2.841543477723254e-06, + "loss": 0.3115, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3304036557674408, + "step": 3540, + "valid_targets_mean": 4009.1, + "valid_targets_min": 1801 + }, + { + "epoch": 5.9280936454849495, + "grad_norm": 1.0475636986470853, + "learning_rate": 2.7988448077713592e-06, + "loss": 0.2254, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23120662569999695, + "step": 3545, + "valid_targets_mean": 2633.4, + "valid_targets_min": 1628 + }, + { + "epoch": 5.936454849498328, + "grad_norm": 0.8986613684835274, + "learning_rate": 2.7564452303027024e-06, + "loss": 0.24, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25718823075294495, + "step": 3550, + "valid_targets_mean": 3136.1, + "valid_targets_min": 1648 + }, + { + "epoch": 5.944816053511706, + "grad_norm": 0.8828527508561764, + "learning_rate": 2.7143454825581714e-06, + "loss": 0.2043, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.189050555229187, + "step": 3555, + "valid_targets_mean": 3121.1, + "valid_targets_min": 1683 + }, + { + "epoch": 5.953177257525084, + "grad_norm": 1.117960588110681, + "learning_rate": 2.672546296565237e-06, + "loss": 0.2232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24358290433883667, + "step": 3560, + "valid_targets_mean": 3175.4, + "valid_targets_min": 1662 + }, + { + "epoch": 5.961538461538462, + "grad_norm": 1.0074867797130804, + "learning_rate": 2.6310483991252133e-06, + "loss": 0.2442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2374524474143982, + "step": 3565, + "valid_targets_mean": 2834.6, + "valid_targets_min": 1201 + }, + { + "epoch": 5.969899665551839, + "grad_norm": 1.0690795488474825, + "learning_rate": 2.589852511800646e-06, + "loss": 0.2652, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17189118266105652, + "step": 3570, + "valid_targets_mean": 2602.5, + "valid_targets_min": 1331 + }, + { + "epoch": 5.978260869565218, + "grad_norm": 0.9971267052649938, + "learning_rate": 2.54895935090274e-06, + "loss": 0.2497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21394936740398407, + "step": 3575, + "valid_targets_mean": 2751.8, + "valid_targets_min": 750 + }, + { + "epoch": 5.986622073578595, + "grad_norm": 0.9537557291572413, + "learning_rate": 2.508369627478917e-06, + "loss": 0.2331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22841092944145203, + "step": 3580, + "valid_targets_mean": 3169.6, + "valid_targets_min": 799 + }, + { + "epoch": 5.994983277591973, + "grad_norm": 0.9343814378914798, + "learning_rate": 2.468084047300452e-06, + "loss": 0.2185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2370833456516266, + "step": 3585, + "valid_targets_mean": 3215.1, + "valid_targets_min": 998 + }, + { + "epoch": 6.003344481605351, + "grad_norm": 0.8238052983578956, + "learning_rate": 2.4281033108501873e-06, + "loss": 0.2314, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2642517685890198, + "step": 3590, + "valid_targets_mean": 4032.8, + "valid_targets_min": 1390 + }, + { + "epoch": 6.011705685618729, + "grad_norm": 0.8851781452831197, + "learning_rate": 2.3884281133103725e-06, + "loss": 0.2232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2074108123779297, + "step": 3595, + "valid_targets_mean": 3117.4, + "valid_targets_min": 685 + }, + { + "epoch": 6.0200668896321075, + "grad_norm": 1.0191065552656804, + "learning_rate": 2.3490591445505715e-06, + "loss": 0.2346, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20292872190475464, + "step": 3600, + "valid_targets_mean": 2607.1, + "valid_targets_min": 1071 + }, + { + "epoch": 6.028428093645485, + "grad_norm": 0.8274990269440808, + "learning_rate": 2.309997089115659e-06, + "loss": 0.2578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27467474341392517, + "step": 3605, + "valid_targets_mean": 4511.8, + "valid_targets_min": 1261 + }, + { + "epoch": 6.036789297658863, + "grad_norm": 0.9829693732867127, + "learning_rate": 2.271242626213925e-06, + "loss": 0.2406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2838166654109955, + "step": 3610, + "valid_targets_mean": 3051.6, + "valid_targets_min": 848 + }, + { + "epoch": 6.045150501672241, + "grad_norm": 0.9430424383463232, + "learning_rate": 2.232796429705253e-06, + "loss": 0.2751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2801671028137207, + "step": 3615, + "valid_targets_mean": 3661.6, + "valid_targets_min": 783 + }, + { + "epoch": 6.053511705685619, + "grad_norm": 0.9799256756829944, + "learning_rate": 2.1946591680894145e-06, + "loss": 0.2252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21253493428230286, + "step": 3620, + "valid_targets_mean": 3080.5, + "valid_targets_min": 1116 + }, + { + "epoch": 6.061872909698996, + "grad_norm": 0.890449059990666, + "learning_rate": 2.1568315044944586e-06, + "loss": 0.2279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23232722282409668, + "step": 3625, + "valid_targets_mean": 3467.1, + "valid_targets_min": 689 + }, + { + "epoch": 6.070234113712375, + "grad_norm": 1.553508951062491, + "learning_rate": 2.1193140966651484e-06, + "loss": 0.2247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2310488224029541, + "step": 3630, + "valid_targets_mean": 3239.2, + "valid_targets_min": 1426 + }, + { + "epoch": 6.078595317725752, + "grad_norm": 1.0015740592300846, + "learning_rate": 2.082107596951548e-06, + "loss": 0.2417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2560243308544159, + "step": 3635, + "valid_targets_mean": 3611.4, + "valid_targets_min": 1662 + }, + { + "epoch": 6.086956521739131, + "grad_norm": 0.9610763495680839, + "learning_rate": 2.0452126522976746e-06, + "loss": 0.2572, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23907433450222015, + "step": 3640, + "valid_targets_mean": 3443.9, + "valid_targets_min": 971 + }, + { + "epoch": 6.095317725752508, + "grad_norm": 1.0195087469479902, + "learning_rate": 2.008629904230237e-06, + "loss": 0.2266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21630674600601196, + "step": 3645, + "valid_targets_mean": 3096.0, + "valid_targets_min": 1263 + }, + { + "epoch": 6.103678929765886, + "grad_norm": 1.1781421008430892, + "learning_rate": 1.972359988847499e-06, + "loss": 0.2767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22949744760990143, + "step": 3650, + "valid_targets_mean": 2490.5, + "valid_targets_min": 1214 + }, + { + "epoch": 6.1120401337792645, + "grad_norm": 1.0009259653669014, + "learning_rate": 1.9364035368082222e-06, + "loss": 0.2501, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17091065645217896, + "step": 3655, + "valid_targets_mean": 2323.1, + "valid_targets_min": 1277 + }, + { + "epoch": 6.120401337792642, + "grad_norm": 0.9973471406872797, + "learning_rate": 1.9007611733206733e-06, + "loss": 0.2399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29807907342910767, + "step": 3660, + "valid_targets_mean": 3280.1, + "valid_targets_min": 919 + }, + { + "epoch": 6.12876254180602, + "grad_norm": 0.9458454127635654, + "learning_rate": 1.8654335181317784e-06, + "loss": 0.2427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26122528314590454, + "step": 3665, + "valid_targets_mean": 3286.4, + "valid_targets_min": 1471 + }, + { + "epoch": 6.137123745819398, + "grad_norm": 0.9302638864213695, + "learning_rate": 1.8304211855163311e-06, + "loss": 0.2551, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30081790685653687, + "step": 3670, + "valid_targets_mean": 3793.2, + "valid_targets_min": 392 + }, + { + "epoch": 6.145484949832776, + "grad_norm": 0.9293216572645322, + "learning_rate": 1.7957247842663194e-06, + "loss": 0.2067, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2717227041721344, + "step": 3675, + "valid_targets_mean": 3530.5, + "valid_targets_min": 1072 + }, + { + "epoch": 6.153846153846154, + "grad_norm": 1.0642259780797343, + "learning_rate": 1.7613449176803476e-06, + "loss": 0.191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18526840209960938, + "step": 3680, + "valid_targets_mean": 3227.3, + "valid_targets_min": 1819 + }, + { + "epoch": 6.162207357859532, + "grad_norm": 1.0950639668119169, + "learning_rate": 1.7272821835531295e-06, + "loss": 0.2536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23934021592140198, + "step": 3685, + "valid_targets_mean": 2936.0, + "valid_targets_min": 1404 + }, + { + "epoch": 6.170568561872909, + "grad_norm": 0.8320781108074271, + "learning_rate": 1.693537174165103e-06, + "loss": 0.2569, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2670717239379883, + "step": 3690, + "valid_targets_mean": 4768.5, + "valid_targets_min": 754 + }, + { + "epoch": 6.178929765886288, + "grad_norm": 1.0190310935542606, + "learning_rate": 1.660110476272132e-06, + "loss": 0.2267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1878700852394104, + "step": 3695, + "valid_targets_mean": 2822.3, + "valid_targets_min": 1513 + }, + { + "epoch": 6.187290969899665, + "grad_norm": 0.946671940766795, + "learning_rate": 1.6270026710952924e-06, + "loss": 0.2299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18372738361358643, + "step": 3700, + "valid_targets_mean": 3202.5, + "valid_targets_min": 1844 + }, + { + "epoch": 6.195652173913044, + "grad_norm": 1.0311761236131074, + "learning_rate": 1.5942143343107953e-06, + "loss": 0.2366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1737433671951294, + "step": 3705, + "valid_targets_mean": 2330.4, + "valid_targets_min": 983 + }, + { + "epoch": 6.2040133779264215, + "grad_norm": 1.7693463573078112, + "learning_rate": 1.5617460360399439e-06, + "loss": 0.2312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24771828949451447, + "step": 3710, + "valid_targets_mean": 2783.2, + "valid_targets_min": 674 + }, + { + "epoch": 6.212374581939799, + "grad_norm": 1.1913402227691237, + "learning_rate": 1.529598340839238e-06, + "loss": 0.2247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20294560492038727, + "step": 3715, + "valid_targets_mean": 3321.5, + "valid_targets_min": 1623 + }, + { + "epoch": 6.2207357859531776, + "grad_norm": 1.1205061236211051, + "learning_rate": 1.4977718076905533e-06, + "loss": 0.245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20099762082099915, + "step": 3720, + "valid_targets_mean": 2455.0, + "valid_targets_min": 682 + }, + { + "epoch": 6.229096989966555, + "grad_norm": 0.872701348205632, + "learning_rate": 1.4662669899914161e-06, + "loss": 0.2251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.240260511636734, + "step": 3725, + "valid_targets_mean": 4170.6, + "valid_targets_min": 587 + }, + { + "epoch": 6.237458193979933, + "grad_norm": 1.149500218715332, + "learning_rate": 1.4350844355453952e-06, + "loss": 0.2167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24577274918556213, + "step": 3730, + "valid_targets_mean": 2776.9, + "valid_targets_min": 1146 + }, + { + "epoch": 6.245819397993311, + "grad_norm": 0.9744589071128174, + "learning_rate": 1.404224686552571e-06, + "loss": 0.2699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2977054715156555, + "step": 3735, + "valid_targets_mean": 3161.8, + "valid_targets_min": 1420 + }, + { + "epoch": 6.254180602006689, + "grad_norm": 1.0622602777455052, + "learning_rate": 1.3736882796000983e-06, + "loss": 0.2217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2422657608985901, + "step": 3740, + "valid_targets_mean": 3276.9, + "valid_targets_min": 2083 + }, + { + "epoch": 6.262541806020067, + "grad_norm": 0.9192780596090956, + "learning_rate": 1.3434757456528868e-06, + "loss": 0.2165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23593655228614807, + "step": 3745, + "valid_targets_mean": 3154.8, + "valid_targets_min": 1058 + }, + { + "epoch": 6.270903010033445, + "grad_norm": 1.0794767225936577, + "learning_rate": 1.3135876100443557e-06, + "loss": 0.2451, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19183969497680664, + "step": 3750, + "valid_targets_mean": 2641.8, + "valid_targets_min": 1122 + }, + { + "epoch": 6.2792642140468224, + "grad_norm": 1.1069267787089752, + "learning_rate": 1.2840243924673202e-06, + "loss": 0.2327, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19526013731956482, + "step": 3755, + "valid_targets_mean": 2651.9, + "valid_targets_min": 1307 + }, + { + "epoch": 6.287625418060201, + "grad_norm": 0.9530346118359216, + "learning_rate": 1.2547866069649418e-06, + "loss": 0.2508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2443237155675888, + "step": 3760, + "valid_targets_mean": 2864.8, + "valid_targets_min": 1587 + }, + { + "epoch": 6.2959866220735785, + "grad_norm": 0.9997194895255284, + "learning_rate": 1.225874761921788e-06, + "loss": 0.2238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22411714494228363, + "step": 3765, + "valid_targets_mean": 2974.7, + "valid_targets_min": 1770 + }, + { + "epoch": 6.304347826086957, + "grad_norm": 0.9257546461579786, + "learning_rate": 1.1972893600550007e-06, + "loss": 0.271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20396903157234192, + "step": 3770, + "valid_targets_mean": 3238.4, + "valid_targets_min": 696 + }, + { + "epoch": 6.312709030100335, + "grad_norm": 0.9526741297223604, + "learning_rate": 1.1690308984055454e-06, + "loss": 0.2326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31394025683403015, + "step": 3775, + "valid_targets_mean": 3941.2, + "valid_targets_min": 1904 + }, + { + "epoch": 6.321070234113712, + "grad_norm": 1.0127893431246604, + "learning_rate": 1.141099868329576e-06, + "loss": 0.2697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2128710150718689, + "step": 3780, + "valid_targets_mean": 3173.2, + "valid_targets_min": 2167 + }, + { + "epoch": 6.329431438127091, + "grad_norm": 0.8562910539058333, + "learning_rate": 1.1134967554898868e-06, + "loss": 0.2487, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3077230155467987, + "step": 3785, + "valid_targets_mean": 3871.0, + "valid_targets_min": 1399 + }, + { + "epoch": 6.337792642140468, + "grad_norm": 1.0570481175477418, + "learning_rate": 1.0862220398474798e-06, + "loss": 0.202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24611452221870422, + "step": 3790, + "valid_targets_mean": 2720.8, + "valid_targets_min": 1018 + }, + { + "epoch": 6.346153846153846, + "grad_norm": 0.9974643589859099, + "learning_rate": 1.0592761956531983e-06, + "loss": 0.26, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15949031710624695, + "step": 3795, + "valid_targets_mean": 2890.8, + "valid_targets_min": 693 + }, + { + "epoch": 6.354515050167224, + "grad_norm": 1.0285315175075678, + "learning_rate": 1.0326596914395015e-06, + "loss": 0.2196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.218702495098114, + "step": 3800, + "valid_targets_mean": 2983.0, + "valid_targets_min": 1307 + }, + { + "epoch": 6.362876254180602, + "grad_norm": 1.0391314709729795, + "learning_rate": 1.0063729900122943e-06, + "loss": 0.2349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22132647037506104, + "step": 3805, + "valid_targets_mean": 3014.7, + "valid_targets_min": 1600 + }, + { + "epoch": 6.3712374581939795, + "grad_norm": 1.0732291866174521, + "learning_rate": 9.80416548442904e-07, + "loss": 0.2359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16350066661834717, + "step": 3810, + "valid_targets_mean": 2471.2, + "valid_targets_min": 1252 + }, + { + "epoch": 6.379598662207358, + "grad_norm": 0.9478207894233418, + "learning_rate": 9.547908180601274e-07, + "loss": 0.218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2005515992641449, + "step": 3815, + "valid_targets_mean": 3266.4, + "valid_targets_min": 1219 + }, + { + "epoch": 6.3879598662207355, + "grad_norm": 0.9884148175012485, + "learning_rate": 9.294962444423672e-07, + "loss": 0.2352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18229839205741882, + "step": 3820, + "valid_targets_mean": 2692.9, + "valid_targets_min": 1731 + }, + { + "epoch": 6.396321070234114, + "grad_norm": 1.0689420786608823, + "learning_rate": 9.045332674099039e-07, + "loss": 0.2429, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2374996840953827, + "step": 3825, + "valid_targets_mean": 3234.9, + "valid_targets_min": 978 + }, + { + "epoch": 6.404682274247492, + "grad_norm": 0.9999053556423455, + "learning_rate": 8.799023210172319e-07, + "loss": 0.2183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2577441334724426, + "step": 3830, + "valid_targets_mean": 3541.5, + "valid_targets_min": 860 + }, + { + "epoch": 6.413043478260869, + "grad_norm": 1.0162923003110396, + "learning_rate": 8.556038335455241e-07, + "loss": 0.2354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17862260341644287, + "step": 3835, + "valid_targets_mean": 3000.1, + "valid_targets_min": 1116 + }, + { + "epoch": 6.421404682274248, + "grad_norm": 0.9436887696227507, + "learning_rate": 8.316382274951773e-07, + "loss": 0.2596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3100453317165375, + "step": 3840, + "valid_targets_mean": 4021.4, + "valid_targets_min": 556 + }, + { + "epoch": 6.429765886287625, + "grad_norm": 0.9165404614671304, + "learning_rate": 8.080059195784829e-07, + "loss": 0.2715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29584699869155884, + "step": 3845, + "valid_targets_mean": 3990.2, + "valid_targets_min": 1509 + }, + { + "epoch": 6.438127090301004, + "grad_norm": 1.1633047424989935, + "learning_rate": 7.847073207123523e-07, + "loss": 0.2805, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2624181807041168, + "step": 3850, + "valid_targets_mean": 2811.7, + "valid_targets_min": 1472 + }, + { + "epoch": 6.446488294314381, + "grad_norm": 0.9983233707225826, + "learning_rate": 7.617428360111945e-07, + "loss": 0.2271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.281194269657135, + "step": 3855, + "valid_targets_mean": 3405.9, + "valid_targets_min": 738 + }, + { + "epoch": 6.454849498327759, + "grad_norm": 1.1246524097930575, + "learning_rate": 7.391128647798607e-07, + "loss": 0.2389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22077451646327972, + "step": 3860, + "valid_targets_mean": 2505.6, + "valid_targets_min": 1234 + }, + { + "epoch": 6.463210702341137, + "grad_norm": 0.886456015080147, + "learning_rate": 7.168178005067062e-07, + "loss": 0.2656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29799652099609375, + "step": 3865, + "valid_targets_mean": 3983.9, + "valid_targets_min": 1248 + }, + { + "epoch": 6.471571906354515, + "grad_norm": 1.103332362483573, + "learning_rate": 6.948580308567532e-07, + "loss": 0.2222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2571655511856079, + "step": 3870, + "valid_targets_mean": 2698.6, + "valid_targets_min": 710 + }, + { + "epoch": 6.479933110367893, + "grad_norm": 1.0813420856710951, + "learning_rate": 6.732339376649388e-07, + "loss": 0.2307, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22849063575267792, + "step": 3875, + "valid_targets_mean": 2950.5, + "valid_targets_min": 1156 + }, + { + "epoch": 6.488294314381271, + "grad_norm": 1.0707482467762934, + "learning_rate": 6.519458969294845e-07, + "loss": 0.2237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18700650334358215, + "step": 3880, + "valid_targets_mean": 2505.4, + "valid_targets_min": 1004 + }, + { + "epoch": 6.496655518394649, + "grad_norm": 0.9136997187616676, + "learning_rate": 6.309942788053502e-07, + "loss": 0.2166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18983229994773865, + "step": 3885, + "valid_targets_mean": 2847.4, + "valid_targets_min": 1011 + }, + { + "epoch": 6.505016722408027, + "grad_norm": 0.9836787164297042, + "learning_rate": 6.103794475978086e-07, + "loss": 0.2476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27802348136901855, + "step": 3890, + "valid_targets_mean": 3571.6, + "valid_targets_min": 1766 + }, + { + "epoch": 6.513377926421405, + "grad_norm": 1.0040587188078438, + "learning_rate": 5.901017617560989e-07, + "loss": 0.2256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2178075611591339, + "step": 3895, + "valid_targets_mean": 3485.3, + "valid_targets_min": 1748 + }, + { + "epoch": 6.521739130434782, + "grad_norm": 0.9972334739609068, + "learning_rate": 5.701615738672073e-07, + "loss": 0.2391, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2519034743309021, + "step": 3900, + "valid_targets_mean": 3683.1, + "valid_targets_min": 1279 + }, + { + "epoch": 6.530100334448161, + "grad_norm": 1.0048200677550907, + "learning_rate": 5.505592306497298e-07, + "loss": 0.2199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2037794291973114, + "step": 3905, + "valid_targets_mean": 2815.6, + "valid_targets_min": 1187 + }, + { + "epoch": 6.538461538461538, + "grad_norm": 0.9738058489579007, + "learning_rate": 5.312950729478327e-07, + "loss": 0.2571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1917770355939865, + "step": 3910, + "valid_targets_mean": 2968.3, + "valid_targets_min": 1757 + }, + { + "epoch": 6.546822742474916, + "grad_norm": 0.9699331976466884, + "learning_rate": 5.123694357253439e-07, + "loss": 0.2332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19296962022781372, + "step": 3915, + "valid_targets_mean": 3587.3, + "valid_targets_min": 1059 + }, + { + "epoch": 6.555183946488294, + "grad_norm": 0.9210291981550992, + "learning_rate": 4.937826480599195e-07, + "loss": 0.239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25981834530830383, + "step": 3920, + "valid_targets_mean": 3616.3, + "valid_targets_min": 772 + }, + { + "epoch": 6.563545150501672, + "grad_norm": 0.9639041354225487, + "learning_rate": 4.755350331373243e-07, + "loss": 0.2665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22324317693710327, + "step": 3925, + "valid_targets_mean": 3426.1, + "valid_targets_min": 1477 + }, + { + "epoch": 6.5719063545150505, + "grad_norm": 1.0060449285500996, + "learning_rate": 4.576269082458118e-07, + "loss": 0.2155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18587031960487366, + "step": 3930, + "valid_targets_mean": 2988.8, + "valid_targets_min": 1536 + }, + { + "epoch": 6.580267558528428, + "grad_norm": 1.125813362538308, + "learning_rate": 4.4005858477060404e-07, + "loss": 0.2445, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21866926550865173, + "step": 3935, + "valid_targets_mean": 3366.3, + "valid_targets_min": 1060 + }, + { + "epoch": 6.588628762541806, + "grad_norm": 1.0628904813820763, + "learning_rate": 4.228303681884782e-07, + "loss": 0.2324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25543057918548584, + "step": 3940, + "valid_targets_mean": 3191.6, + "valid_targets_min": 1800 + }, + { + "epoch": 6.596989966555184, + "grad_norm": 0.9915725733061522, + "learning_rate": 4.059425580624576e-07, + "loss": 0.2374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28621232509613037, + "step": 3945, + "valid_targets_mean": 3603.1, + "valid_targets_min": 1548 + }, + { + "epoch": 6.605351170568562, + "grad_norm": 0.9949146897868569, + "learning_rate": 3.893954480366091e-07, + "loss": 0.227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1640351116657257, + "step": 3950, + "valid_targets_mean": 2460.5, + "valid_targets_min": 880 + }, + { + "epoch": 6.61371237458194, + "grad_norm": 1.111214711835855, + "learning_rate": 3.731893258309227e-07, + "loss": 0.2271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22030051052570343, + "step": 3955, + "valid_targets_mean": 2509.4, + "valid_targets_min": 553 + }, + { + "epoch": 6.622073578595318, + "grad_norm": 1.1676078726615438, + "learning_rate": 3.573244732363179e-07, + "loss": 0.2437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.243607297539711, + "step": 3960, + "valid_targets_mean": 3134.6, + "valid_targets_min": 689 + }, + { + "epoch": 6.630434782608695, + "grad_norm": 0.9596071900127525, + "learning_rate": 3.4180116610973645e-07, + "loss": 0.2194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.280606210231781, + "step": 3965, + "valid_targets_mean": 3487.9, + "valid_targets_min": 1539 + }, + { + "epoch": 6.638795986622074, + "grad_norm": 1.0307311782355484, + "learning_rate": 3.2661967436936394e-07, + "loss": 0.2347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2129300832748413, + "step": 3970, + "valid_targets_mean": 3150.9, + "valid_targets_min": 364 + }, + { + "epoch": 6.647157190635451, + "grad_norm": 0.9029900853777432, + "learning_rate": 3.117802619899113e-07, + "loss": 0.2266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2022523730993271, + "step": 3975, + "valid_targets_mean": 3131.2, + "valid_targets_min": 1462 + }, + { + "epoch": 6.65551839464883, + "grad_norm": 0.9353250337198755, + "learning_rate": 2.9728318699804525e-07, + "loss": 0.2183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26376834511756897, + "step": 3980, + "valid_targets_mean": 3456.8, + "valid_targets_min": 1636 + }, + { + "epoch": 6.6638795986622075, + "grad_norm": 1.1365720683226703, + "learning_rate": 2.831287014678941e-07, + "loss": 0.2141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19698631763458252, + "step": 3985, + "valid_targets_mean": 2634.6, + "valid_targets_min": 569 + }, + { + "epoch": 6.672240802675585, + "grad_norm": 0.9037444665225245, + "learning_rate": 2.693170515166599e-07, + "loss": 0.2162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17331139743328094, + "step": 3990, + "valid_targets_mean": 3423.4, + "valid_targets_min": 2070 + }, + { + "epoch": 6.6806020066889635, + "grad_norm": 1.012894148288379, + "learning_rate": 2.558484773003445e-07, + "loss": 0.2485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2655693292617798, + "step": 3995, + "valid_targets_mean": 3323.4, + "valid_targets_min": 1844 + }, + { + "epoch": 6.688963210702341, + "grad_norm": 0.9548388318374232, + "learning_rate": 2.427232130095747e-07, + "loss": 0.2373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21322210133075714, + "step": 4000, + "valid_targets_mean": 2801.6, + "valid_targets_min": 1264 + }, + { + "epoch": 6.697324414715719, + "grad_norm": 1.0832658905494663, + "learning_rate": 2.299414868655281e-07, + "loss": 0.2434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2207055389881134, + "step": 4005, + "valid_targets_mean": 2772.9, + "valid_targets_min": 1399 + }, + { + "epoch": 6.705685618729097, + "grad_norm": 0.9005094142183284, + "learning_rate": 2.1750352111596707e-07, + "loss": 0.24, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24203582108020782, + "step": 4010, + "valid_targets_mean": 3876.8, + "valid_targets_min": 2273 + }, + { + "epoch": 6.714046822742475, + "grad_norm": 0.9978940251893587, + "learning_rate": 2.0540953203137093e-07, + "loss": 0.225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1879318505525589, + "step": 4015, + "valid_targets_mean": 3497.7, + "valid_targets_min": 1234 + }, + { + "epoch": 6.722408026755852, + "grad_norm": 0.9909059278250834, + "learning_rate": 1.9365972990117e-07, + "loss": 0.2323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1991642862558365, + "step": 4020, + "valid_targets_mean": 2930.1, + "valid_targets_min": 1353 + }, + { + "epoch": 6.730769230769231, + "grad_norm": 0.9925673396527694, + "learning_rate": 1.8225431903010403e-07, + "loss": 0.2172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23669245839118958, + "step": 4025, + "valid_targets_mean": 2976.0, + "valid_targets_min": 978 + }, + { + "epoch": 6.739130434782608, + "grad_norm": 0.97958659307702, + "learning_rate": 1.7119349773466076e-07, + "loss": 0.2493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18920472264289856, + "step": 4030, + "valid_targets_mean": 2922.8, + "valid_targets_min": 799 + }, + { + "epoch": 6.747491638795987, + "grad_norm": 0.9895938981944002, + "learning_rate": 1.6047745833962735e-07, + "loss": 0.2638, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4012299180030823, + "step": 4035, + "valid_targets_mean": 4044.7, + "valid_targets_min": 750 + }, + { + "epoch": 6.7558528428093645, + "grad_norm": 1.005865202853038, + "learning_rate": 1.5010638717474878e-07, + "loss": 0.2231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2346489280462265, + "step": 4040, + "valid_targets_mean": 3234.9, + "valid_targets_min": 1001 + }, + { + "epoch": 6.764214046822742, + "grad_norm": 0.9665878862524135, + "learning_rate": 1.400804645714815e-07, + "loss": 0.235, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20860449969768524, + "step": 4045, + "valid_targets_mean": 2963.2, + "valid_targets_min": 1512 + }, + { + "epoch": 6.7725752508361206, + "grad_norm": 0.9073442199596257, + "learning_rate": 1.30399864859867e-07, + "loss": 0.2187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21425005793571472, + "step": 4050, + "valid_targets_mean": 4054.4, + "valid_targets_min": 970 + }, + { + "epoch": 6.780936454849498, + "grad_norm": 1.1395884258364646, + "learning_rate": 1.2106475636549654e-07, + "loss": 0.2503, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2343166172504425, + "step": 4055, + "valid_targets_mean": 2424.9, + "valid_targets_min": 511 + }, + { + "epoch": 6.789297658862877, + "grad_norm": 1.2355832602043204, + "learning_rate": 1.1207530140658452e-07, + "loss": 0.247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1832336038351059, + "step": 4060, + "valid_targets_mean": 2785.1, + "valid_targets_min": 1439 + }, + { + "epoch": 6.797658862876254, + "grad_norm": 1.0756162811102452, + "learning_rate": 1.0343165629114416e-07, + "loss": 0.2292, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2562386989593506, + "step": 4065, + "valid_targets_mean": 2926.1, + "valid_targets_min": 1110 + }, + { + "epoch": 6.806020066889632, + "grad_norm": 1.12479348737203, + "learning_rate": 9.513397131427404e-08, + "loss": 0.2543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19680562615394592, + "step": 4070, + "valid_targets_mean": 3050.8, + "valid_targets_min": 1505 + }, + { + "epoch": 6.81438127090301, + "grad_norm": 1.3027817427113138, + "learning_rate": 8.71823907555358e-08, + "loss": 0.2794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2448652684688568, + "step": 4075, + "valid_targets_mean": 3234.3, + "valid_targets_min": 965 + }, + { + "epoch": 6.822742474916388, + "grad_norm": 1.0120756120555998, + "learning_rate": 7.957705287645834e-08, + "loss": 0.2318, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.236515074968338, + "step": 4080, + "valid_targets_mean": 3107.5, + "valid_targets_min": 1469 + }, + { + "epoch": 6.831103678929766, + "grad_norm": 1.1643671752495577, + "learning_rate": 7.231808991812639e-08, + "loss": 0.227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24128901958465576, + "step": 4085, + "valid_targets_mean": 2651.4, + "valid_targets_min": 513 + }, + { + "epoch": 6.839464882943144, + "grad_norm": 1.0224383394242054, + "learning_rate": 6.540562809887574e-08, + "loss": 0.2128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2086195945739746, + "step": 4090, + "valid_targets_mean": 2881.9, + "valid_targets_min": 594 + }, + { + "epoch": 6.8478260869565215, + "grad_norm": 0.8984874112503329, + "learning_rate": 5.8839787612114955e-08, + "loss": 0.2302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19981077313423157, + "step": 4095, + "valid_targets_mean": 3368.8, + "valid_targets_min": 1532 + }, + { + "epoch": 6.8561872909699, + "grad_norm": 1.1889036672332234, + "learning_rate": 5.2620682624213714e-08, + "loss": 0.2788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3057703673839569, + "step": 4100, + "valid_targets_mean": 2838.0, + "valid_targets_min": 520 + }, + { + "epoch": 6.864548494983278, + "grad_norm": 0.9722791686736607, + "learning_rate": 4.6748421272537756e-08, + "loss": 0.2356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3012353777885437, + "step": 4105, + "valid_targets_mean": 3410.2, + "valid_targets_min": 401 + }, + { + "epoch": 6.872909698996655, + "grad_norm": 0.8439698339063494, + "learning_rate": 4.1223105663554806e-08, + "loss": 0.2274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22458210587501526, + "step": 4110, + "valid_targets_mean": 4013.0, + "valid_targets_min": 967 + }, + { + "epoch": 6.881270903010034, + "grad_norm": 0.9937076155434867, + "learning_rate": 3.604483187106711e-08, + "loss": 0.2557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21647757291793823, + "step": 4115, + "valid_targets_mean": 2891.7, + "valid_targets_min": 1398 + }, + { + "epoch": 6.889632107023411, + "grad_norm": 0.9554683145250049, + "learning_rate": 3.1213689934537215e-08, + "loss": 0.2428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.227905735373497, + "step": 4120, + "valid_targets_mean": 3056.9, + "valid_targets_min": 1091 + }, + { + "epoch": 6.897993311036789, + "grad_norm": 0.9851841831055532, + "learning_rate": 2.6729763857522573e-08, + "loss": 0.2184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20012471079826355, + "step": 4125, + "valid_targets_mean": 2812.2, + "valid_targets_min": 959 + }, + { + "epoch": 6.906354515050167, + "grad_norm": 0.9102139042294596, + "learning_rate": 2.2593131606216677e-08, + "loss": 0.2404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22855457663536072, + "step": 4130, + "valid_targets_mean": 3411.4, + "valid_targets_min": 780 + }, + { + "epoch": 6.914715719063545, + "grad_norm": 0.9968709409763933, + "learning_rate": 1.880386510809018e-08, + "loss": 0.2175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24055655300617218, + "step": 4135, + "valid_targets_mean": 2944.4, + "valid_targets_min": 1447 + }, + { + "epoch": 6.923076923076923, + "grad_norm": 1.0089101681026145, + "learning_rate": 1.536203025064742e-08, + "loss": 0.2634, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23049476742744446, + "step": 4140, + "valid_targets_mean": 3192.5, + "valid_targets_min": 1745 + }, + { + "epoch": 6.931438127090301, + "grad_norm": 1.0054169631158152, + "learning_rate": 1.226768688026736e-08, + "loss": 0.2575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21784156560897827, + "step": 4145, + "valid_targets_mean": 2982.2, + "valid_targets_min": 663 + }, + { + "epoch": 6.9397993311036785, + "grad_norm": 1.0500654956007207, + "learning_rate": 9.520888801182182e-09, + "loss": 0.2275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21303492784500122, + "step": 4150, + "valid_targets_mean": 2794.8, + "valid_targets_min": 1323 + }, + { + "epoch": 6.948160535117057, + "grad_norm": 1.054419064540089, + "learning_rate": 7.121683774518051e-09, + "loss": 0.2378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17308101058006287, + "step": 4155, + "valid_targets_mean": 2646.4, + "valid_targets_min": 1295 + }, + { + "epoch": 6.956521739130435, + "grad_norm": 1.00371154489669, + "learning_rate": 5.0701135174890944e-09, + "loss": 0.2474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29931163787841797, + "step": 4160, + "valid_targets_mean": 3849.4, + "valid_targets_min": 403 + }, + { + "epoch": 6.964882943143813, + "grad_norm": 0.930732845194307, + "learning_rate": 3.3662137026535537e-09, + "loss": 0.2348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22841259837150574, + "step": 4165, + "valid_targets_mean": 3192.3, + "valid_targets_min": 1398 + }, + { + "epoch": 6.973244147157191, + "grad_norm": 0.9699847666900223, + "learning_rate": 2.0100139573031584e-09, + "loss": 0.2398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2246263176202774, + "step": 4170, + "valid_targets_mean": 2845.4, + "valid_targets_min": 1337 + }, + { + "epoch": 6.981605351170568, + "grad_norm": 0.9378995458223955, + "learning_rate": 1.0015378629413265e-09, + "loss": 0.2268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21635910868644714, + "step": 4175, + "valid_targets_mean": 2931.9, + "valid_targets_min": 780 + }, + { + "epoch": 6.989966555183947, + "grad_norm": 1.0220155583231438, + "learning_rate": 3.4080295488347903e-10, + "loss": 0.2187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2119850516319275, + "step": 4180, + "valid_targets_mean": 2728.2, + "valid_targets_min": 1313 + }, + { + "epoch": 6.998327759197324, + "grad_norm": 0.9751477515928492, + "learning_rate": 2.7820721939519902e-11, + "loss": 0.231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21303658187389374, + "step": 4185, + "valid_targets_mean": 3151.9, + "valid_targets_min": 1522 + }, + { + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22936706244945526, + "step": 4186, + "total_flos": 791537577689088.0, + "train_loss": 0.17696809689205084, + "train_runtime": 13311.7439, + "train_samples_per_second": 5.027, + "train_steps_per_second": 0.314, + "valid_targets_mean": 3387.6, + "valid_targets_min": 1204 + } + ], + "logging_steps": 5, + "max_steps": 4186, + "num_input_tokens_seen": 0, + "num_train_epochs": 7, + "save_steps": 1500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 791537577689088.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}