diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,13654 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 6187, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.005659309564233163, + "grad_norm": 12.813226147522716, + "learning_rate": 2.584814216478191e-07, + "loss": 0.6225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.266487717628479, + "step": 5, + "valid_targets_mean": 3682.5, + "valid_targets_min": 653 + }, + { + "epoch": 0.011318619128466326, + "grad_norm": 12.434309531258513, + "learning_rate": 5.815831987075929e-07, + "loss": 0.6232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3788124620914459, + "step": 10, + "valid_targets_mean": 5648.0, + "valid_targets_min": 1046 + }, + { + "epoch": 0.01697792869269949, + "grad_norm": 11.673211140826382, + "learning_rate": 9.046849757673668e-07, + "loss": 0.6045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22325100004673004, + "step": 15, + "valid_targets_mean": 2670.2, + "valid_targets_min": 711 + }, + { + "epoch": 0.022637238256932653, + "grad_norm": 9.157685464487718, + "learning_rate": 1.2277867528271405e-06, + "loss": 0.5757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2995016872882843, + "step": 20, + "valid_targets_mean": 3821.2, + "valid_targets_min": 712 + }, + { + "epoch": 0.028296547821165818, + "grad_norm": 6.109271281873889, + "learning_rate": 1.5508885298869145e-06, + "loss": 0.5634, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2571752965450287, + "step": 25, + "valid_targets_mean": 5306.2, + "valid_targets_min": 3041 + }, + { + "epoch": 0.03395585738539898, + "grad_norm": 5.19821375262202, + "learning_rate": 1.8739903069466882e-06, + "loss": 0.5258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25106507539749146, + "step": 30, + "valid_targets_mean": 4567.5, + "valid_targets_min": 819 + }, + { + "epoch": 0.039615166949632144, + "grad_norm": 4.944065644931284, + "learning_rate": 2.197092084006462e-06, + "loss": 0.4963, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2642008364200592, + "step": 35, + "valid_targets_mean": 4716.9, + "valid_targets_min": 2275 + }, + { + "epoch": 0.045274476513865305, + "grad_norm": 4.659921680618342, + "learning_rate": 2.5201938610662364e-06, + "loss": 0.4433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2430863380432129, + "step": 40, + "valid_targets_mean": 4759.8, + "valid_targets_min": 531 + }, + { + "epoch": 0.050933786078098474, + "grad_norm": 2.137271037605468, + "learning_rate": 2.84329563812601e-06, + "loss": 0.3983, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19000980257987976, + "step": 45, + "valid_targets_mean": 3641.9, + "valid_targets_min": 696 + }, + { + "epoch": 0.056593095642331635, + "grad_norm": 1.7630446672138598, + "learning_rate": 3.166397415185784e-06, + "loss": 0.3712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17306166887283325, + "step": 50, + "valid_targets_mean": 4573.2, + "valid_targets_min": 784 + }, + { + "epoch": 0.0622524052065648, + "grad_norm": 1.0904717416783254, + "learning_rate": 3.489499192245558e-06, + "loss": 0.3679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21692293882369995, + "step": 55, + "valid_targets_mean": 4747.1, + "valid_targets_min": 623 + }, + { + "epoch": 0.06791171477079797, + "grad_norm": 0.9609719887202809, + "learning_rate": 3.812600969305332e-06, + "loss": 0.3799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18585249781608582, + "step": 60, + "valid_targets_mean": 4746.1, + "valid_targets_min": 2141 + }, + { + "epoch": 0.07357102433503113, + "grad_norm": 0.8841077009328207, + "learning_rate": 4.1357027463651056e-06, + "loss": 0.3402, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14711645245552063, + "step": 65, + "valid_targets_mean": 3593.5, + "valid_targets_min": 1421 + }, + { + "epoch": 0.07923033389926429, + "grad_norm": 0.7224942065448002, + "learning_rate": 4.458804523424879e-06, + "loss": 0.3332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19751781225204468, + "step": 70, + "valid_targets_mean": 4813.8, + "valid_targets_min": 2313 + }, + { + "epoch": 0.08488964346349745, + "grad_norm": 0.6647342279737706, + "learning_rate": 4.781906300484653e-06, + "loss": 0.3236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13213017582893372, + "step": 75, + "valid_targets_mean": 3989.6, + "valid_targets_min": 932 + }, + { + "epoch": 0.09054895302773061, + "grad_norm": 0.5803005774430054, + "learning_rate": 5.105008077544427e-06, + "loss": 0.3117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12037070840597153, + "step": 80, + "valid_targets_mean": 4384.5, + "valid_targets_min": 3206 + }, + { + "epoch": 0.09620826259196379, + "grad_norm": 0.6360709303953993, + "learning_rate": 5.4281098546042014e-06, + "loss": 0.3405, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14573977887630463, + "step": 85, + "valid_targets_mean": 3884.0, + "valid_targets_min": 623 + }, + { + "epoch": 0.10186757215619695, + "grad_norm": 0.5695139283757488, + "learning_rate": 5.751211631663974e-06, + "loss": 0.3055, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1720479428768158, + "step": 90, + "valid_targets_mean": 4905.1, + "valid_targets_min": 856 + }, + { + "epoch": 0.10752688172043011, + "grad_norm": 0.5658124388589508, + "learning_rate": 6.074313408723749e-06, + "loss": 0.2897, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1653835028409958, + "step": 95, + "valid_targets_mean": 4370.4, + "valid_targets_min": 947 + }, + { + "epoch": 0.11318619128466327, + "grad_norm": 0.5892859252131134, + "learning_rate": 6.397415185783522e-06, + "loss": 0.3342, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1754002869129181, + "step": 100, + "valid_targets_mean": 3793.1, + "valid_targets_min": 1278 + }, + { + "epoch": 0.11884550084889643, + "grad_norm": 0.5682490253953324, + "learning_rate": 6.7205169628432965e-06, + "loss": 0.2976, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16364414989948273, + "step": 105, + "valid_targets_mean": 3827.1, + "valid_targets_min": 1530 + }, + { + "epoch": 0.1245048104131296, + "grad_norm": 0.5599951879323457, + "learning_rate": 7.043618739903069e-06, + "loss": 0.2875, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11121285706758499, + "step": 110, + "valid_targets_mean": 3072.1, + "valid_targets_min": 557 + }, + { + "epoch": 0.13016411997736277, + "grad_norm": 0.5832153246476397, + "learning_rate": 7.366720516962844e-06, + "loss": 0.2629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10435768961906433, + "step": 115, + "valid_targets_mean": 2981.0, + "valid_targets_min": 523 + }, + { + "epoch": 0.13582342954159593, + "grad_norm": 0.5431062300896765, + "learning_rate": 7.689822294022618e-06, + "loss": 0.2891, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1245010495185852, + "step": 120, + "valid_targets_mean": 4023.1, + "valid_targets_min": 722 + }, + { + "epoch": 0.1414827391058291, + "grad_norm": 0.5244533476104923, + "learning_rate": 8.012924071082391e-06, + "loss": 0.2689, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1435898244380951, + "step": 125, + "valid_targets_mean": 4216.1, + "valid_targets_min": 1114 + }, + { + "epoch": 0.14714204867006225, + "grad_norm": 0.6259596139324627, + "learning_rate": 8.336025848142165e-06, + "loss": 0.2401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14582374691963196, + "step": 130, + "valid_targets_mean": 3853.9, + "valid_targets_min": 1052 + }, + { + "epoch": 0.15280135823429541, + "grad_norm": 0.5599250947670149, + "learning_rate": 8.659127625201939e-06, + "loss": 0.2544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.138871431350708, + "step": 135, + "valid_targets_mean": 4404.9, + "valid_targets_min": 182 + }, + { + "epoch": 0.15846066779852858, + "grad_norm": 0.5750264854271747, + "learning_rate": 8.982229402261713e-06, + "loss": 0.2643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10757642984390259, + "step": 140, + "valid_targets_mean": 3443.6, + "valid_targets_min": 1064 + }, + { + "epoch": 0.16411997736276174, + "grad_norm": 0.5871020369242167, + "learning_rate": 9.305331179321486e-06, + "loss": 0.2556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1447587013244629, + "step": 145, + "valid_targets_mean": 4370.2, + "valid_targets_min": 543 + }, + { + "epoch": 0.1697792869269949, + "grad_norm": 0.53028030155745, + "learning_rate": 9.62843295638126e-06, + "loss": 0.2615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12581443786621094, + "step": 150, + "valid_targets_mean": 4347.8, + "valid_targets_min": 731 + }, + { + "epoch": 0.17543859649122806, + "grad_norm": 0.5636474369468014, + "learning_rate": 9.951534733441036e-06, + "loss": 0.2406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11906266957521439, + "step": 155, + "valid_targets_mean": 4753.1, + "valid_targets_min": 957 + }, + { + "epoch": 0.18109790605546122, + "grad_norm": 0.5320011524733922, + "learning_rate": 1.0274636510500808e-05, + "loss": 0.2417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08466179668903351, + "step": 160, + "valid_targets_mean": 3712.1, + "valid_targets_min": 861 + }, + { + "epoch": 0.1867572156196944, + "grad_norm": 0.5565200455680548, + "learning_rate": 1.0597738287560582e-05, + "loss": 0.2412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1551325023174286, + "step": 165, + "valid_targets_mean": 3834.8, + "valid_targets_min": 980 + }, + { + "epoch": 0.19241652518392757, + "grad_norm": 0.5163109462106636, + "learning_rate": 1.0920840064620357e-05, + "loss": 0.243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1269022524356842, + "step": 170, + "valid_targets_mean": 5029.0, + "valid_targets_min": 3420 + }, + { + "epoch": 0.19807583474816073, + "grad_norm": 0.5042162823945786, + "learning_rate": 1.124394184168013e-05, + "loss": 0.249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1677677035331726, + "step": 175, + "valid_targets_mean": 6114.9, + "valid_targets_min": 4150 + }, + { + "epoch": 0.2037351443123939, + "grad_norm": 0.5359253936437938, + "learning_rate": 1.1567043618739904e-05, + "loss": 0.2408, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12648260593414307, + "step": 180, + "valid_targets_mean": 4326.5, + "valid_targets_min": 2862 + }, + { + "epoch": 0.20939445387662706, + "grad_norm": 0.6031878191173545, + "learning_rate": 1.1890145395799677e-05, + "loss": 0.2331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10489478707313538, + "step": 185, + "valid_targets_mean": 2919.4, + "valid_targets_min": 486 + }, + { + "epoch": 0.21505376344086022, + "grad_norm": 0.5479360256334089, + "learning_rate": 1.2213247172859452e-05, + "loss": 0.239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10994553565979004, + "step": 190, + "valid_targets_mean": 4337.6, + "valid_targets_min": 3024 + }, + { + "epoch": 0.22071307300509338, + "grad_norm": 0.6277896900138535, + "learning_rate": 1.2536348949919226e-05, + "loss": 0.2505, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07831567525863647, + "step": 195, + "valid_targets_mean": 2649.0, + "valid_targets_min": 594 + }, + { + "epoch": 0.22637238256932654, + "grad_norm": 0.693878125902913, + "learning_rate": 1.2859450726979e-05, + "loss": 0.2599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12532927095890045, + "step": 200, + "valid_targets_mean": 5342.5, + "valid_targets_min": 2196 + }, + { + "epoch": 0.2320316921335597, + "grad_norm": 0.5134947724869195, + "learning_rate": 1.3182552504038773e-05, + "loss": 0.234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1453550159931183, + "step": 205, + "valid_targets_mean": 4967.6, + "valid_targets_min": 986 + }, + { + "epoch": 0.23769100169779286, + "grad_norm": 0.60397407469067, + "learning_rate": 1.3505654281098549e-05, + "loss": 0.2485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13283590972423553, + "step": 210, + "valid_targets_mean": 4819.9, + "valid_targets_min": 866 + }, + { + "epoch": 0.24335031126202603, + "grad_norm": 0.6626668547604381, + "learning_rate": 1.382875605815832e-05, + "loss": 0.2343, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17311999201774597, + "step": 215, + "valid_targets_mean": 4861.6, + "valid_targets_min": 1034 + }, + { + "epoch": 0.2490096208262592, + "grad_norm": 0.49901447151632433, + "learning_rate": 1.4151857835218094e-05, + "loss": 0.2323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12583303451538086, + "step": 220, + "valid_targets_mean": 4749.8, + "valid_targets_min": 2497 + }, + { + "epoch": 0.2546689303904924, + "grad_norm": 0.4648171738175884, + "learning_rate": 1.4474959612277868e-05, + "loss": 0.2139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1158398762345314, + "step": 225, + "valid_targets_mean": 5498.9, + "valid_targets_min": 3643 + }, + { + "epoch": 0.26032823995472554, + "grad_norm": 0.5933865904135527, + "learning_rate": 1.4798061389337644e-05, + "loss": 0.2203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12105406075716019, + "step": 230, + "valid_targets_mean": 3564.0, + "valid_targets_min": 530 + }, + { + "epoch": 0.2659875495189587, + "grad_norm": 0.538074658398744, + "learning_rate": 1.5121163166397417e-05, + "loss": 0.225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0594661571085453, + "step": 235, + "valid_targets_mean": 2630.8, + "valid_targets_min": 580 + }, + { + "epoch": 0.27164685908319186, + "grad_norm": 0.5936553345334965, + "learning_rate": 1.544426494345719e-05, + "loss": 0.2312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10497381538152695, + "step": 240, + "valid_targets_mean": 3492.5, + "valid_targets_min": 799 + }, + { + "epoch": 0.277306168647425, + "grad_norm": 0.4833514969638417, + "learning_rate": 1.5767366720516963e-05, + "loss": 0.2237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08097312599420547, + "step": 245, + "valid_targets_mean": 3711.0, + "valid_targets_min": 542 + }, + { + "epoch": 0.2829654782116582, + "grad_norm": 0.7357823097239291, + "learning_rate": 1.609046849757674e-05, + "loss": 0.2194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13270056247711182, + "step": 250, + "valid_targets_mean": 6132.0, + "valid_targets_min": 3882 + }, + { + "epoch": 0.28862478777589134, + "grad_norm": 0.5060876138195932, + "learning_rate": 1.641357027463651e-05, + "loss": 0.2248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08780373632907867, + "step": 255, + "valid_targets_mean": 3731.8, + "valid_targets_min": 589 + }, + { + "epoch": 0.2942840973401245, + "grad_norm": 0.5251441858938745, + "learning_rate": 1.6736672051696286e-05, + "loss": 0.2301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11237767338752747, + "step": 260, + "valid_targets_mean": 4570.6, + "valid_targets_min": 780 + }, + { + "epoch": 0.29994340690435767, + "grad_norm": 0.5140810911985951, + "learning_rate": 1.7059773828756058e-05, + "loss": 0.2039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12333481013774872, + "step": 265, + "valid_targets_mean": 5757.1, + "valid_targets_min": 3858 + }, + { + "epoch": 0.30560271646859083, + "grad_norm": 0.5432859906938778, + "learning_rate": 1.7382875605815834e-05, + "loss": 0.2339, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15551060438156128, + "step": 270, + "valid_targets_mean": 5700.2, + "valid_targets_min": 981 + }, + { + "epoch": 0.311262026032824, + "grad_norm": 0.5120159918967069, + "learning_rate": 1.770597738287561e-05, + "loss": 0.2353, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10404126346111298, + "step": 275, + "valid_targets_mean": 4982.6, + "valid_targets_min": 1258 + }, + { + "epoch": 0.31692133559705715, + "grad_norm": 0.5772072858307444, + "learning_rate": 1.802907915993538e-05, + "loss": 0.2316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11084461212158203, + "step": 280, + "valid_targets_mean": 4855.2, + "valid_targets_min": 2789 + }, + { + "epoch": 0.3225806451612903, + "grad_norm": 0.48901305259561123, + "learning_rate": 1.8352180936995153e-05, + "loss": 0.2259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10664913058280945, + "step": 285, + "valid_targets_mean": 4813.5, + "valid_targets_min": 831 + }, + { + "epoch": 0.3282399547255235, + "grad_norm": 0.531393299505263, + "learning_rate": 1.867528271405493e-05, + "loss": 0.2167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0892222672700882, + "step": 290, + "valid_targets_mean": 4613.5, + "valid_targets_min": 2930 + }, + { + "epoch": 0.33389926428975664, + "grad_norm": 0.5379778246509785, + "learning_rate": 1.8998384491114704e-05, + "loss": 0.2036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08487385511398315, + "step": 295, + "valid_targets_mean": 3702.4, + "valid_targets_min": 613 + }, + { + "epoch": 0.3395585738539898, + "grad_norm": 0.7415608256662145, + "learning_rate": 1.9321486268174476e-05, + "loss": 0.244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13783471286296844, + "step": 300, + "valid_targets_mean": 4215.9, + "valid_targets_min": 1061 + }, + { + "epoch": 0.34521788341822296, + "grad_norm": 0.5625263249464086, + "learning_rate": 1.9644588045234248e-05, + "loss": 0.2314, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10931971669197083, + "step": 305, + "valid_targets_mean": 3355.8, + "valid_targets_min": 907 + }, + { + "epoch": 0.3508771929824561, + "grad_norm": 0.5147539908781431, + "learning_rate": 1.9967689822294024e-05, + "loss": 0.2194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12160828709602356, + "step": 310, + "valid_targets_mean": 4966.4, + "valid_targets_min": 2018 + }, + { + "epoch": 0.3565365025466893, + "grad_norm": 0.5850483202366575, + "learning_rate": 2.0290791599353796e-05, + "loss": 0.2286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07158877700567245, + "step": 315, + "valid_targets_mean": 3608.6, + "valid_targets_min": 507 + }, + { + "epoch": 0.36219581211092244, + "grad_norm": 0.4992957870569539, + "learning_rate": 2.0613893376413575e-05, + "loss": 0.2238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09600482881069183, + "step": 320, + "valid_targets_mean": 4695.2, + "valid_targets_min": 1370 + }, + { + "epoch": 0.3678551216751556, + "grad_norm": 0.5505109795133273, + "learning_rate": 2.0936995153473347e-05, + "loss": 0.2248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11262305080890656, + "step": 325, + "valid_targets_mean": 4509.1, + "valid_targets_min": 577 + }, + { + "epoch": 0.3735144312393888, + "grad_norm": 0.5342663934645616, + "learning_rate": 2.1260096930533122e-05, + "loss": 0.2178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11352752894163132, + "step": 330, + "valid_targets_mean": 4437.4, + "valid_targets_min": 803 + }, + { + "epoch": 0.379173740803622, + "grad_norm": 0.5111587826954059, + "learning_rate": 2.1583198707592894e-05, + "loss": 0.211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11444622278213501, + "step": 335, + "valid_targets_mean": 4864.5, + "valid_targets_min": 2709 + }, + { + "epoch": 0.38483305036785515, + "grad_norm": 0.49139214737497255, + "learning_rate": 2.1906300484652666e-05, + "loss": 0.2117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09134054183959961, + "step": 340, + "valid_targets_mean": 4543.9, + "valid_targets_min": 1597 + }, + { + "epoch": 0.3904923599320883, + "grad_norm": 0.5656753129111157, + "learning_rate": 2.2229402261712442e-05, + "loss": 0.2091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12917183339595795, + "step": 345, + "valid_targets_mean": 5401.1, + "valid_targets_min": 2540 + }, + { + "epoch": 0.39615166949632147, + "grad_norm": 0.5127732053383066, + "learning_rate": 2.2552504038772214e-05, + "loss": 0.2143, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09598717093467712, + "step": 350, + "valid_targets_mean": 4171.5, + "valid_targets_min": 2370 + }, + { + "epoch": 0.40181097906055463, + "grad_norm": 0.5642754872219258, + "learning_rate": 2.2875605815831986e-05, + "loss": 0.2034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0842936635017395, + "step": 355, + "valid_targets_mean": 3385.2, + "valid_targets_min": 664 + }, + { + "epoch": 0.4074702886247878, + "grad_norm": 0.5337854343548862, + "learning_rate": 2.3198707592891765e-05, + "loss": 0.2097, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09393825381994247, + "step": 360, + "valid_targets_mean": 4427.6, + "valid_targets_min": 754 + }, + { + "epoch": 0.41312959818902095, + "grad_norm": 0.5376727652640517, + "learning_rate": 2.3521809369951537e-05, + "loss": 0.2177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06290898472070694, + "step": 365, + "valid_targets_mean": 3210.9, + "valid_targets_min": 618 + }, + { + "epoch": 0.4187889077532541, + "grad_norm": 0.4569704874813037, + "learning_rate": 2.3844911147011312e-05, + "loss": 0.2158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1269156038761139, + "step": 370, + "valid_targets_mean": 7079.1, + "valid_targets_min": 4048 + }, + { + "epoch": 0.4244482173174873, + "grad_norm": 0.5656583854147238, + "learning_rate": 2.4168012924071084e-05, + "loss": 0.2047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11031660437583923, + "step": 375, + "valid_targets_mean": 4650.1, + "valid_targets_min": 808 + }, + { + "epoch": 0.43010752688172044, + "grad_norm": 0.6570746918194288, + "learning_rate": 2.449111470113086e-05, + "loss": 0.2204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1448550522327423, + "step": 380, + "valid_targets_mean": 3118.8, + "valid_targets_min": 333 + }, + { + "epoch": 0.4357668364459536, + "grad_norm": 0.55950409007124, + "learning_rate": 2.4814216478190632e-05, + "loss": 0.2089, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09810247272253036, + "step": 385, + "valid_targets_mean": 5086.9, + "valid_targets_min": 2463 + }, + { + "epoch": 0.44142614601018676, + "grad_norm": 0.5772354969637692, + "learning_rate": 2.5137318255250404e-05, + "loss": 0.2028, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1355806142091751, + "step": 390, + "valid_targets_mean": 3598.2, + "valid_targets_min": 924 + }, + { + "epoch": 0.4470854555744199, + "grad_norm": 0.561384015841921, + "learning_rate": 2.546042003231018e-05, + "loss": 0.2012, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06471231579780579, + "step": 395, + "valid_targets_mean": 2976.8, + "valid_targets_min": 917 + }, + { + "epoch": 0.4527447651386531, + "grad_norm": 0.5669972709732071, + "learning_rate": 2.5783521809369955e-05, + "loss": 0.2065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10800547897815704, + "step": 400, + "valid_targets_mean": 4682.4, + "valid_targets_min": 2999 + }, + { + "epoch": 0.45840407470288624, + "grad_norm": 0.49967653724943956, + "learning_rate": 2.610662358642973e-05, + "loss": 0.2006, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11313368380069733, + "step": 405, + "valid_targets_mean": 4446.4, + "valid_targets_min": 1036 + }, + { + "epoch": 0.4640633842671194, + "grad_norm": 0.5152904474241592, + "learning_rate": 2.6429725363489502e-05, + "loss": 0.2148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0881739929318428, + "step": 410, + "valid_targets_mean": 4562.9, + "valid_targets_min": 1126 + }, + { + "epoch": 0.46972269383135257, + "grad_norm": 0.5557407859642174, + "learning_rate": 2.6752827140549274e-05, + "loss": 0.2023, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10014262795448303, + "step": 415, + "valid_targets_mean": 4247.5, + "valid_targets_min": 1267 + }, + { + "epoch": 0.47538200339558573, + "grad_norm": 0.6403901898998684, + "learning_rate": 2.707592891760905e-05, + "loss": 0.2031, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11550679802894592, + "step": 420, + "valid_targets_mean": 4960.9, + "valid_targets_min": 2671 + }, + { + "epoch": 0.4810413129598189, + "grad_norm": 0.5129380971439641, + "learning_rate": 2.7399030694668822e-05, + "loss": 0.2215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12999185919761658, + "step": 425, + "valid_targets_mean": 5321.1, + "valid_targets_min": 2144 + }, + { + "epoch": 0.48670062252405205, + "grad_norm": 0.6297560970964265, + "learning_rate": 2.7722132471728597e-05, + "loss": 0.1972, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07924368977546692, + "step": 430, + "valid_targets_mean": 2387.4, + "valid_targets_min": 622 + }, + { + "epoch": 0.4923599320882852, + "grad_norm": 0.5074941863429275, + "learning_rate": 2.804523424878837e-05, + "loss": 0.2151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09975864738225937, + "step": 435, + "valid_targets_mean": 4750.0, + "valid_targets_min": 881 + }, + { + "epoch": 0.4980192416525184, + "grad_norm": 0.5524129953125413, + "learning_rate": 2.8368336025848148e-05, + "loss": 0.2169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12875968217849731, + "step": 440, + "valid_targets_mean": 5293.2, + "valid_targets_min": 2767 + }, + { + "epoch": 0.5036785512167515, + "grad_norm": 0.5748855882996645, + "learning_rate": 2.869143780290792e-05, + "loss": 0.2174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10654495656490326, + "step": 445, + "valid_targets_mean": 4076.9, + "valid_targets_min": 597 + }, + { + "epoch": 0.5093378607809848, + "grad_norm": 0.5604378749096861, + "learning_rate": 2.9014539579967692e-05, + "loss": 0.1952, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10172678530216217, + "step": 450, + "valid_targets_mean": 4066.9, + "valid_targets_min": 843 + }, + { + "epoch": 0.5149971703452179, + "grad_norm": 0.4567946831515076, + "learning_rate": 2.9337641357027468e-05, + "loss": 0.206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09301639348268509, + "step": 455, + "valid_targets_mean": 5160.1, + "valid_targets_min": 884 + }, + { + "epoch": 0.5206564799094511, + "grad_norm": 0.49801793932476446, + "learning_rate": 2.966074313408724e-05, + "loss": 0.1958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10571038722991943, + "step": 460, + "valid_targets_mean": 4216.5, + "valid_targets_min": 542 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 0.47936281581487994, + "learning_rate": 2.9983844911147012e-05, + "loss": 0.2009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10441243648529053, + "step": 465, + "valid_targets_mean": 5424.6, + "valid_targets_min": 3404 + }, + { + "epoch": 0.5319750990379174, + "grad_norm": 0.9755369825447163, + "learning_rate": 3.0306946688206787e-05, + "loss": 0.2056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08892907947301865, + "step": 470, + "valid_targets_mean": 4851.6, + "valid_targets_min": 3160 + }, + { + "epoch": 0.5376344086021505, + "grad_norm": 0.4800925217430048, + "learning_rate": 3.063004846526656e-05, + "loss": 0.204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11628638207912445, + "step": 475, + "valid_targets_mean": 5653.9, + "valid_targets_min": 3152 + }, + { + "epoch": 0.5432937181663837, + "grad_norm": 0.47620558280915126, + "learning_rate": 3.095315024232634e-05, + "loss": 0.2048, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0822351947426796, + "step": 480, + "valid_targets_mean": 4639.6, + "valid_targets_min": 1171 + }, + { + "epoch": 0.5489530277306168, + "grad_norm": 0.5182161558171746, + "learning_rate": 3.127625201938611e-05, + "loss": 0.1776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09437121450901031, + "step": 485, + "valid_targets_mean": 4086.6, + "valid_targets_min": 1129 + }, + { + "epoch": 0.55461233729485, + "grad_norm": 0.5792448608116004, + "learning_rate": 3.159935379644588e-05, + "loss": 0.2168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09979522973299026, + "step": 490, + "valid_targets_mean": 3700.6, + "valid_targets_min": 494 + }, + { + "epoch": 0.5602716468590832, + "grad_norm": 0.7270395247858927, + "learning_rate": 3.1922455573505654e-05, + "loss": 0.2031, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08922898024320602, + "step": 495, + "valid_targets_mean": 2935.4, + "valid_targets_min": 820 + }, + { + "epoch": 0.5659309564233164, + "grad_norm": 0.5550875300100327, + "learning_rate": 3.224555735056543e-05, + "loss": 0.2079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16339895129203796, + "step": 500, + "valid_targets_mean": 5637.5, + "valid_targets_min": 3278 + }, + { + "epoch": 0.5715902659875495, + "grad_norm": 0.6133163061933703, + "learning_rate": 3.2568659127625205e-05, + "loss": 0.189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11058829724788666, + "step": 505, + "valid_targets_mean": 3993.6, + "valid_targets_min": 967 + }, + { + "epoch": 0.5772495755517827, + "grad_norm": 0.5938422275743988, + "learning_rate": 3.289176090468498e-05, + "loss": 0.2019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14039157330989838, + "step": 510, + "valid_targets_mean": 4099.4, + "valid_targets_min": 575 + }, + { + "epoch": 0.5829088851160158, + "grad_norm": 0.4905739810736693, + "learning_rate": 3.321486268174475e-05, + "loss": 0.1965, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0983400046825409, + "step": 515, + "valid_targets_mean": 5010.8, + "valid_targets_min": 1002 + }, + { + "epoch": 0.588568194680249, + "grad_norm": 0.5197502308863515, + "learning_rate": 3.353796445880453e-05, + "loss": 0.1929, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09322381019592285, + "step": 520, + "valid_targets_mean": 4704.6, + "valid_targets_min": 2677 + }, + { + "epoch": 0.5942275042444821, + "grad_norm": 0.5856378653440014, + "learning_rate": 3.38610662358643e-05, + "loss": 0.1947, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12427324801683426, + "step": 525, + "valid_targets_mean": 4901.8, + "valid_targets_min": 736 + }, + { + "epoch": 0.5998868138087153, + "grad_norm": 0.5816338173810459, + "learning_rate": 3.418416801292407e-05, + "loss": 0.1949, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08927009254693985, + "step": 530, + "valid_targets_mean": 3248.0, + "valid_targets_min": 763 + }, + { + "epoch": 0.6055461233729486, + "grad_norm": 0.5985677527679052, + "learning_rate": 3.450726978998385e-05, + "loss": 0.2003, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10206498205661774, + "step": 535, + "valid_targets_mean": 4213.0, + "valid_targets_min": 941 + }, + { + "epoch": 0.6112054329371817, + "grad_norm": 0.487222645020575, + "learning_rate": 3.483037156704362e-05, + "loss": 0.1908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.060657575726509094, + "step": 540, + "valid_targets_mean": 3416.1, + "valid_targets_min": 837 + }, + { + "epoch": 0.6168647425014149, + "grad_norm": 0.5829061134633076, + "learning_rate": 3.5153473344103395e-05, + "loss": 0.1919, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05984025076031685, + "step": 545, + "valid_targets_mean": 2408.6, + "valid_targets_min": 693 + }, + { + "epoch": 0.622524052065648, + "grad_norm": 0.6797906087234027, + "learning_rate": 3.547657512116317e-05, + "loss": 0.1937, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10143044590950012, + "step": 550, + "valid_targets_mean": 2706.5, + "valid_targets_min": 605 + }, + { + "epoch": 0.6281833616298812, + "grad_norm": 0.543553216798796, + "learning_rate": 3.579967689822294e-05, + "loss": 0.2006, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11305040121078491, + "step": 555, + "valid_targets_mean": 4347.0, + "valid_targets_min": 785 + }, + { + "epoch": 0.6338426711941143, + "grad_norm": 0.7099347689847226, + "learning_rate": 3.612277867528272e-05, + "loss": 0.2137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14038805663585663, + "step": 560, + "valid_targets_mean": 4658.8, + "valid_targets_min": 786 + }, + { + "epoch": 0.6395019807583475, + "grad_norm": 0.4884740944602011, + "learning_rate": 3.644588045234249e-05, + "loss": 0.1975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06367959082126617, + "step": 565, + "valid_targets_mean": 3880.6, + "valid_targets_min": 967 + }, + { + "epoch": 0.6451612903225806, + "grad_norm": 0.47520707270053175, + "learning_rate": 3.676898222940227e-05, + "loss": 0.1976, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06609955430030823, + "step": 570, + "valid_targets_mean": 3866.9, + "valid_targets_min": 707 + }, + { + "epoch": 0.6508205998868138, + "grad_norm": 0.552115820491288, + "learning_rate": 3.709208400646204e-05, + "loss": 0.2084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1201586127281189, + "step": 575, + "valid_targets_mean": 4789.1, + "valid_targets_min": 1057 + }, + { + "epoch": 0.656479909451047, + "grad_norm": 0.4823997056472823, + "learning_rate": 3.741518578352181e-05, + "loss": 0.1873, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0636964961886406, + "step": 580, + "valid_targets_mean": 3987.5, + "valid_targets_min": 669 + }, + { + "epoch": 0.6621392190152802, + "grad_norm": 0.49207134875225544, + "learning_rate": 3.7738287560581585e-05, + "loss": 0.1916, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07500476390123367, + "step": 585, + "valid_targets_mean": 4021.1, + "valid_targets_min": 647 + }, + { + "epoch": 0.6677985285795133, + "grad_norm": 0.539843627424898, + "learning_rate": 3.806138933764136e-05, + "loss": 0.2085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12484041601419449, + "step": 590, + "valid_targets_mean": 5065.2, + "valid_targets_min": 888 + }, + { + "epoch": 0.6734578381437465, + "grad_norm": 0.470597208726392, + "learning_rate": 3.838449111470113e-05, + "loss": 0.1907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13309787213802338, + "step": 595, + "valid_targets_mean": 5820.2, + "valid_targets_min": 2988 + }, + { + "epoch": 0.6791171477079796, + "grad_norm": 0.5391159051146083, + "learning_rate": 3.870759289176091e-05, + "loss": 0.2032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07989153265953064, + "step": 600, + "valid_targets_mean": 3483.2, + "valid_targets_min": 931 + }, + { + "epoch": 0.6847764572722128, + "grad_norm": 0.504815039149041, + "learning_rate": 3.903069466882068e-05, + "loss": 0.1921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08566620200872421, + "step": 605, + "valid_targets_mean": 4628.4, + "valid_targets_min": 3392 + }, + { + "epoch": 0.6904357668364459, + "grad_norm": 0.5046402035313866, + "learning_rate": 3.935379644588046e-05, + "loss": 0.1924, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10765756666660309, + "step": 610, + "valid_targets_mean": 4554.2, + "valid_targets_min": 979 + }, + { + "epoch": 0.6960950764006791, + "grad_norm": 0.48538087851305, + "learning_rate": 3.967689822294023e-05, + "loss": 0.1948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10865490138530731, + "step": 615, + "valid_targets_mean": 5697.9, + "valid_targets_min": 722 + }, + { + "epoch": 0.7017543859649122, + "grad_norm": 0.5721125586340572, + "learning_rate": 4e-05, + "loss": 0.1866, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0678119882941246, + "step": 620, + "valid_targets_mean": 3762.6, + "valid_targets_min": 357 + }, + { + "epoch": 0.7074136955291455, + "grad_norm": 0.5979128633099481, + "learning_rate": 3.999992044178504e-05, + "loss": 0.2146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12127690017223358, + "step": 625, + "valid_targets_mean": 3651.2, + "valid_targets_min": 841 + }, + { + "epoch": 0.7130730050933786, + "grad_norm": 0.46860637322664894, + "learning_rate": 3.9999681767773104e-05, + "loss": 0.1896, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09697499126195908, + "step": 630, + "valid_targets_mean": 5451.4, + "valid_targets_min": 3045 + }, + { + "epoch": 0.7187323146576118, + "grad_norm": 0.5116197877966975, + "learning_rate": 3.999928397986304e-05, + "loss": 0.1832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09591969102621078, + "step": 635, + "valid_targets_mean": 4015.0, + "valid_targets_min": 689 + }, + { + "epoch": 0.7243916242218449, + "grad_norm": 0.48614102898387124, + "learning_rate": 3.9998727081219585e-05, + "loss": 0.1951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06270407140254974, + "step": 640, + "valid_targets_mean": 3478.2, + "valid_targets_min": 684 + }, + { + "epoch": 0.7300509337860781, + "grad_norm": 0.5136787819057246, + "learning_rate": 3.999801107627332e-05, + "loss": 0.1781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06117028743028641, + "step": 645, + "valid_targets_mean": 3256.4, + "valid_targets_min": 722 + }, + { + "epoch": 0.7357102433503112, + "grad_norm": 0.5716059378238086, + "learning_rate": 3.9997135970720655e-05, + "loss": 0.1852, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1006719321012497, + "step": 650, + "valid_targets_mean": 3961.4, + "valid_targets_min": 574 + }, + { + "epoch": 0.7413695529145444, + "grad_norm": 0.4667654371085649, + "learning_rate": 3.9996101771523766e-05, + "loss": 0.1777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08582760393619537, + "step": 655, + "valid_targets_mean": 3858.8, + "valid_targets_min": 1199 + }, + { + "epoch": 0.7470288624787776, + "grad_norm": 0.49000793727939973, + "learning_rate": 3.999490848691057e-05, + "loss": 0.1884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08919184654951096, + "step": 660, + "valid_targets_mean": 4622.1, + "valid_targets_min": 1033 + }, + { + "epoch": 0.7526881720430108, + "grad_norm": 0.4896885475017624, + "learning_rate": 3.999355612637461e-05, + "loss": 0.1925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10601164400577545, + "step": 665, + "valid_targets_mean": 4146.9, + "valid_targets_min": 932 + }, + { + "epoch": 0.758347481607244, + "grad_norm": 0.4811811580056875, + "learning_rate": 3.999204470067504e-05, + "loss": 0.189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08481722325086594, + "step": 670, + "valid_targets_mean": 4489.1, + "valid_targets_min": 873 + }, + { + "epoch": 0.7640067911714771, + "grad_norm": 0.4927889754928825, + "learning_rate": 3.9990374221836484e-05, + "loss": 0.1884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11972365528345108, + "step": 675, + "valid_targets_mean": 5275.4, + "valid_targets_min": 1053 + }, + { + "epoch": 0.7696661007357103, + "grad_norm": 0.47651794751611715, + "learning_rate": 3.998854470314898e-05, + "loss": 0.1889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11051115393638611, + "step": 680, + "valid_targets_mean": 4301.6, + "valid_targets_min": 769 + }, + { + "epoch": 0.7753254102999434, + "grad_norm": 0.49306355583690087, + "learning_rate": 3.9986556159167846e-05, + "loss": 0.2044, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10798701643943787, + "step": 685, + "valid_targets_mean": 5170.5, + "valid_targets_min": 589 + }, + { + "epoch": 0.7809847198641766, + "grad_norm": 0.5033162155705667, + "learning_rate": 3.998440860571358e-05, + "loss": 0.1899, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12910602986812592, + "step": 690, + "valid_targets_mean": 5560.6, + "valid_targets_min": 3671 + }, + { + "epoch": 0.7866440294284097, + "grad_norm": 0.4576534686870833, + "learning_rate": 3.998210205987175e-05, + "loss": 0.2047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09165354073047638, + "step": 695, + "valid_targets_mean": 4983.0, + "valid_targets_min": 1259 + }, + { + "epoch": 0.7923033389926429, + "grad_norm": 0.49611497715775577, + "learning_rate": 3.9979636539992805e-05, + "loss": 0.1775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0714588612318039, + "step": 700, + "valid_targets_mean": 3803.1, + "valid_targets_min": 715 + }, + { + "epoch": 0.797962648556876, + "grad_norm": 0.47596257540810344, + "learning_rate": 3.9977012065692e-05, + "loss": 0.1702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09044930338859558, + "step": 705, + "valid_targets_mean": 4656.8, + "valid_targets_min": 659 + }, + { + "epoch": 0.8036219581211093, + "grad_norm": 0.45185051919756186, + "learning_rate": 3.997422865784916e-05, + "loss": 0.1861, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09395641088485718, + "step": 710, + "valid_targets_mean": 5318.2, + "valid_targets_min": 1521 + }, + { + "epoch": 0.8092812676853424, + "grad_norm": 0.48489006814477564, + "learning_rate": 3.99712863386086e-05, + "loss": 0.1854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04562477022409439, + "step": 715, + "valid_targets_mean": 2574.6, + "valid_targets_min": 765 + }, + { + "epoch": 0.8149405772495756, + "grad_norm": 0.46721075737810464, + "learning_rate": 3.9968185131378876e-05, + "loss": 0.1755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.051369089633226395, + "step": 720, + "valid_targets_mean": 3280.6, + "valid_targets_min": 844 + }, + { + "epoch": 0.8205998868138087, + "grad_norm": 0.5540843560594724, + "learning_rate": 3.996492506083264e-05, + "loss": 0.1894, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07264433801174164, + "step": 725, + "valid_targets_mean": 3546.2, + "valid_targets_min": 539 + }, + { + "epoch": 0.8262591963780419, + "grad_norm": 0.5224823587426883, + "learning_rate": 3.9961506152906445e-05, + "loss": 0.1911, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10288595408201218, + "step": 730, + "valid_targets_mean": 4376.5, + "valid_targets_min": 860 + }, + { + "epoch": 0.831918505942275, + "grad_norm": 0.47807495784222465, + "learning_rate": 3.995792843480051e-05, + "loss": 0.1801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07903150469064713, + "step": 735, + "valid_targets_mean": 4422.4, + "valid_targets_min": 2262 + }, + { + "epoch": 0.8375778155065082, + "grad_norm": 0.539220673985187, + "learning_rate": 3.9954191934978494e-05, + "loss": 0.1903, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10568754374980927, + "step": 740, + "valid_targets_mean": 4830.0, + "valid_targets_min": 1112 + }, + { + "epoch": 0.8432371250707413, + "grad_norm": 0.4405936884616324, + "learning_rate": 3.995029668316735e-05, + "loss": 0.1692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12706342339515686, + "step": 745, + "valid_targets_mean": 6917.2, + "valid_targets_min": 3497 + }, + { + "epoch": 0.8488964346349746, + "grad_norm": 0.4761586516836419, + "learning_rate": 3.9946242710356994e-05, + "loss": 0.186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11931536346673965, + "step": 750, + "valid_targets_mean": 5356.9, + "valid_targets_min": 3764 + }, + { + "epoch": 0.8545557441992077, + "grad_norm": 0.43686824661617357, + "learning_rate": 3.994203004880012e-05, + "loss": 0.1894, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0849871039390564, + "step": 755, + "valid_targets_mean": 4839.5, + "valid_targets_min": 716 + }, + { + "epoch": 0.8602150537634409, + "grad_norm": 0.5083873552604593, + "learning_rate": 3.9937658732011905e-05, + "loss": 0.1883, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12052398920059204, + "step": 760, + "valid_targets_mean": 4680.0, + "valid_targets_min": 1173 + }, + { + "epoch": 0.865874363327674, + "grad_norm": 0.48112199197689937, + "learning_rate": 3.993312879476976e-05, + "loss": 0.1708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08175818622112274, + "step": 765, + "valid_targets_mean": 3661.2, + "valid_targets_min": 729 + }, + { + "epoch": 0.8715336728919072, + "grad_norm": 0.41071527932517227, + "learning_rate": 3.992844027311307e-05, + "loss": 0.1763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0560702346265316, + "step": 770, + "valid_targets_mean": 3699.1, + "valid_targets_min": 892 + }, + { + "epoch": 0.8771929824561403, + "grad_norm": 0.5098210850595244, + "learning_rate": 3.992359320434287e-05, + "loss": 0.1891, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.060177333652973175, + "step": 775, + "valid_targets_mean": 3453.4, + "valid_targets_min": 820 + }, + { + "epoch": 0.8828522920203735, + "grad_norm": 0.4897732379839489, + "learning_rate": 3.9918587627021566e-05, + "loss": 0.1962, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10962501913309097, + "step": 780, + "valid_targets_mean": 4840.5, + "valid_targets_min": 924 + }, + { + "epoch": 0.8885116015846066, + "grad_norm": 0.5398079741245252, + "learning_rate": 3.991342358097265e-05, + "loss": 0.1927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09963786602020264, + "step": 785, + "valid_targets_mean": 3919.0, + "valid_targets_min": 1469 + }, + { + "epoch": 0.8941709111488398, + "grad_norm": 0.489278339550065, + "learning_rate": 3.990810110728034e-05, + "loss": 0.1822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08527357876300812, + "step": 790, + "valid_targets_mean": 4285.4, + "valid_targets_min": 1382 + }, + { + "epoch": 0.8998302207130731, + "grad_norm": 0.5030050083141236, + "learning_rate": 3.99026202482893e-05, + "loss": 0.1925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09702607989311218, + "step": 795, + "valid_targets_mean": 4566.4, + "valid_targets_min": 1536 + }, + { + "epoch": 0.9054895302773062, + "grad_norm": 0.4682622379470606, + "learning_rate": 3.989698104760425e-05, + "loss": 0.1701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07846413552761078, + "step": 800, + "valid_targets_mean": 4433.5, + "valid_targets_min": 2982 + }, + { + "epoch": 0.9111488398415394, + "grad_norm": 0.4888127293876126, + "learning_rate": 3.989118355008968e-05, + "loss": 0.1871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09182113409042358, + "step": 805, + "valid_targets_mean": 4202.0, + "valid_targets_min": 541 + }, + { + "epoch": 0.9168081494057725, + "grad_norm": 0.450798363768845, + "learning_rate": 3.988522780186943e-05, + "loss": 0.1846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0726768970489502, + "step": 810, + "valid_targets_mean": 3555.2, + "valid_targets_min": 619 + }, + { + "epoch": 0.9224674589700057, + "grad_norm": 0.47134448159703074, + "learning_rate": 3.987911385032638e-05, + "loss": 0.1889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07873281836509705, + "step": 815, + "valid_targets_mean": 3385.4, + "valid_targets_min": 610 + }, + { + "epoch": 0.9281267685342388, + "grad_norm": 0.5022451423902121, + "learning_rate": 3.987284174410203e-05, + "loss": 0.1809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09317578375339508, + "step": 820, + "valid_targets_mean": 4830.2, + "valid_targets_min": 762 + }, + { + "epoch": 0.933786078098472, + "grad_norm": 0.5219772729257867, + "learning_rate": 3.986641153309615e-05, + "loss": 0.1842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05027949810028076, + "step": 825, + "valid_targets_mean": 2801.2, + "valid_targets_min": 833 + }, + { + "epoch": 0.9394453876627051, + "grad_norm": 0.5605825275745376, + "learning_rate": 3.985982326846634e-05, + "loss": 0.1899, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08850441873073578, + "step": 830, + "valid_targets_mean": 4372.8, + "valid_targets_min": 638 + }, + { + "epoch": 0.9451046972269384, + "grad_norm": 0.6716335564720974, + "learning_rate": 3.985307700262765e-05, + "loss": 0.2018, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2462937831878662, + "step": 835, + "valid_targets_mean": 3825.2, + "valid_targets_min": 1187 + }, + { + "epoch": 0.9507640067911715, + "grad_norm": 0.468411327968257, + "learning_rate": 3.984617278925218e-05, + "loss": 0.1929, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07693798840045929, + "step": 840, + "valid_targets_mean": 4013.9, + "valid_targets_min": 712 + }, + { + "epoch": 0.9564233163554047, + "grad_norm": 0.4586128743610432, + "learning_rate": 3.9839110683268624e-05, + "loss": 0.1889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07137559354305267, + "step": 845, + "valid_targets_mean": 4351.5, + "valid_targets_min": 2791 + }, + { + "epoch": 0.9620826259196378, + "grad_norm": 0.5049252771996005, + "learning_rate": 3.9831890740861826e-05, + "loss": 0.1817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07987163215875626, + "step": 850, + "valid_targets_mean": 3546.0, + "valid_targets_min": 761 + }, + { + "epoch": 0.967741935483871, + "grad_norm": 0.4826700230297361, + "learning_rate": 3.982451301947236e-05, + "loss": 0.1774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10527510941028595, + "step": 855, + "valid_targets_mean": 4905.9, + "valid_targets_min": 3245 + }, + { + "epoch": 0.9734012450481041, + "grad_norm": 0.4825568935474119, + "learning_rate": 3.981697757779606e-05, + "loss": 0.1894, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12306796759366989, + "step": 860, + "valid_targets_mean": 6053.2, + "valid_targets_min": 3864 + }, + { + "epoch": 0.9790605546123373, + "grad_norm": 0.5011714360721748, + "learning_rate": 3.980928447578356e-05, + "loss": 0.211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0997605174779892, + "step": 865, + "valid_targets_mean": 5492.6, + "valid_targets_min": 2806 + }, + { + "epoch": 0.9847198641765704, + "grad_norm": 0.5326455359879171, + "learning_rate": 3.98014337746398e-05, + "loss": 0.195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08221840113401413, + "step": 870, + "valid_targets_mean": 3132.2, + "valid_targets_min": 1243 + }, + { + "epoch": 0.9903791737408036, + "grad_norm": 0.4605068210472626, + "learning_rate": 3.9793425536823555e-05, + "loss": 0.1909, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05985761433839798, + "step": 875, + "valid_targets_mean": 3465.5, + "valid_targets_min": 736 + }, + { + "epoch": 0.9960384833050367, + "grad_norm": 0.4998608438827163, + "learning_rate": 3.978525982604695e-05, + "loss": 0.1827, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07552091032266617, + "step": 880, + "valid_targets_mean": 2719.0, + "valid_targets_min": 458 + }, + { + "epoch": 1.0011318619128466, + "grad_norm": 0.4628893472316372, + "learning_rate": 3.977693670727491e-05, + "loss": 0.1807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07494200021028519, + "step": 885, + "valid_targets_mean": 3718.5, + "valid_targets_min": 804 + }, + { + "epoch": 1.0067911714770799, + "grad_norm": 0.5493854634155632, + "learning_rate": 3.9768456246724675e-05, + "loss": 0.1782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10945551097393036, + "step": 890, + "valid_targets_mean": 4598.5, + "valid_targets_min": 816 + }, + { + "epoch": 1.0124504810413129, + "grad_norm": 0.49334980209689755, + "learning_rate": 3.97598185118653e-05, + "loss": 0.1697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10318919271230698, + "step": 895, + "valid_targets_mean": 5166.4, + "valid_targets_min": 2706 + }, + { + "epoch": 1.018109790605546, + "grad_norm": 0.4665542677847791, + "learning_rate": 3.975102357141704e-05, + "loss": 0.1711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08501093089580536, + "step": 900, + "valid_targets_mean": 4739.9, + "valid_targets_min": 3146 + }, + { + "epoch": 1.0237691001697793, + "grad_norm": 0.5037956897220708, + "learning_rate": 3.974207149535088e-05, + "loss": 0.1688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08715382218360901, + "step": 905, + "valid_targets_mean": 4309.8, + "valid_targets_min": 494 + }, + { + "epoch": 1.0294284097340125, + "grad_norm": 0.46884421359420575, + "learning_rate": 3.9732962354887936e-05, + "loss": 0.1796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0527305006980896, + "step": 910, + "valid_targets_mean": 2960.1, + "valid_targets_min": 693 + }, + { + "epoch": 1.0350877192982457, + "grad_norm": 0.468232721846232, + "learning_rate": 3.972369622249891e-05, + "loss": 0.1768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12767651677131653, + "step": 915, + "valid_targets_mean": 5726.4, + "valid_targets_min": 3937 + }, + { + "epoch": 1.0407470288624787, + "grad_norm": 0.48253584661054705, + "learning_rate": 3.9714273171903486e-05, + "loss": 0.1656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06838169693946838, + "step": 920, + "valid_targets_mean": 4254.5, + "valid_targets_min": 965 + }, + { + "epoch": 1.046406338426712, + "grad_norm": 0.460896867055517, + "learning_rate": 3.970469327806978e-05, + "loss": 0.1706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08300100266933441, + "step": 925, + "valid_targets_mean": 4323.2, + "valid_targets_min": 2438 + }, + { + "epoch": 1.0520656479909452, + "grad_norm": 0.436058730661621, + "learning_rate": 3.969495661721372e-05, + "loss": 0.1788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10777398198843002, + "step": 930, + "valid_targets_mean": 4998.0, + "valid_targets_min": 878 + }, + { + "epoch": 1.0577249575551784, + "grad_norm": 0.443821466287199, + "learning_rate": 3.9685063266798434e-05, + "loss": 0.1697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05714694410562515, + "step": 935, + "valid_targets_mean": 4670.8, + "valid_targets_min": 3180 + }, + { + "epoch": 1.0633842671194114, + "grad_norm": 0.48931491069822725, + "learning_rate": 3.967501330553366e-05, + "loss": 0.174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08498260378837585, + "step": 940, + "valid_targets_mean": 3795.1, + "valid_targets_min": 934 + }, + { + "epoch": 1.0690435766836446, + "grad_norm": 0.4753832796757306, + "learning_rate": 3.966480681337508e-05, + "loss": 0.1772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09979462623596191, + "step": 945, + "valid_targets_mean": 5293.8, + "valid_targets_min": 974 + }, + { + "epoch": 1.0747028862478778, + "grad_norm": 0.4393088035323427, + "learning_rate": 3.965444387152375e-05, + "loss": 0.1739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07830634713172913, + "step": 950, + "valid_targets_mean": 5396.0, + "valid_targets_min": 3046 + }, + { + "epoch": 1.080362195812111, + "grad_norm": 0.5107813285857029, + "learning_rate": 3.9643924562425365e-05, + "loss": 0.1824, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09900990128517151, + "step": 955, + "valid_targets_mean": 4047.8, + "valid_targets_min": 208 + }, + { + "epoch": 1.086021505376344, + "grad_norm": 0.4446346454701824, + "learning_rate": 3.963324896976968e-05, + "loss": 0.1757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08870100975036621, + "step": 960, + "valid_targets_mean": 4562.6, + "valid_targets_min": 2251 + }, + { + "epoch": 1.0916808149405772, + "grad_norm": 0.5071446028674766, + "learning_rate": 3.962241717848979e-05, + "loss": 0.1695, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07109926640987396, + "step": 965, + "valid_targets_mean": 4042.5, + "valid_targets_min": 542 + }, + { + "epoch": 1.0973401245048104, + "grad_norm": 0.4702307884438037, + "learning_rate": 3.961142927476151e-05, + "loss": 0.163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08367455750703812, + "step": 970, + "valid_targets_mean": 4412.1, + "valid_targets_min": 1112 + }, + { + "epoch": 1.1029994340690437, + "grad_norm": 0.500278206270274, + "learning_rate": 3.960028534600264e-05, + "loss": 0.1749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11240851879119873, + "step": 975, + "valid_targets_mean": 5104.9, + "valid_targets_min": 3949 + }, + { + "epoch": 1.1086587436332767, + "grad_norm": 0.452205291941356, + "learning_rate": 3.9588985480872275e-05, + "loss": 0.1634, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07218680530786514, + "step": 980, + "valid_targets_mean": 3918.5, + "valid_targets_min": 721 + }, + { + "epoch": 1.1143180531975099, + "grad_norm": 0.49493407137948864, + "learning_rate": 3.9577529769270137e-05, + "loss": 0.1647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06644749641418457, + "step": 985, + "valid_targets_mean": 3751.2, + "valid_targets_min": 1010 + }, + { + "epoch": 1.119977362761743, + "grad_norm": 0.47833769220419, + "learning_rate": 3.9565918302335816e-05, + "loss": 0.1682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07740724831819534, + "step": 990, + "valid_targets_mean": 4656.6, + "valid_targets_min": 992 + }, + { + "epoch": 1.1256366723259763, + "grad_norm": 0.3729966677444496, + "learning_rate": 3.955415117244807e-05, + "loss": 0.1666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.050457730889320374, + "step": 995, + "valid_targets_mean": 4649.9, + "valid_targets_min": 3087 + }, + { + "epoch": 1.1312959818902093, + "grad_norm": 0.43822538776799025, + "learning_rate": 3.9542228473224086e-05, + "loss": 0.1678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06258300691843033, + "step": 1000, + "valid_targets_mean": 4241.1, + "valid_targets_min": 1210 + }, + { + "epoch": 1.1369552914544425, + "grad_norm": 0.414173980870268, + "learning_rate": 3.953015029951874e-05, + "loss": 0.17, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07223384827375412, + "step": 1005, + "valid_targets_mean": 4123.4, + "valid_targets_min": 1873 + }, + { + "epoch": 1.1426146010186757, + "grad_norm": 0.4668668446908682, + "learning_rate": 3.9517916747423804e-05, + "loss": 0.1777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08182642608880997, + "step": 1010, + "valid_targets_mean": 4018.4, + "valid_targets_min": 829 + }, + { + "epoch": 1.148273910582909, + "grad_norm": 0.47784482156709635, + "learning_rate": 3.9505527914267255e-05, + "loss": 0.1665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12447123974561691, + "step": 1015, + "valid_targets_mean": 5604.6, + "valid_targets_min": 3883 + }, + { + "epoch": 1.1539332201471422, + "grad_norm": 0.5330915710914416, + "learning_rate": 3.949298389861243e-05, + "loss": 0.182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0996529757976532, + "step": 1020, + "valid_targets_mean": 3934.1, + "valid_targets_min": 908 + }, + { + "epoch": 1.1595925297113752, + "grad_norm": 0.4320075593029899, + "learning_rate": 3.948028480025728e-05, + "loss": 0.1669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10552115738391876, + "step": 1025, + "valid_targets_mean": 4959.0, + "valid_targets_min": 722 + }, + { + "epoch": 1.1652518392756084, + "grad_norm": 0.4840878345233582, + "learning_rate": 3.9467430720233555e-05, + "loss": 0.1724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08098030835390091, + "step": 1030, + "valid_targets_mean": 3993.9, + "valid_targets_min": 745 + }, + { + "epoch": 1.1709111488398416, + "grad_norm": 0.42558305749071057, + "learning_rate": 3.945442176080604e-05, + "loss": 0.1481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07060349732637405, + "step": 1035, + "valid_targets_mean": 3894.4, + "valid_targets_min": 675 + }, + { + "epoch": 1.1765704584040746, + "grad_norm": 0.40451357945778155, + "learning_rate": 3.944125802547168e-05, + "loss": 0.1622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08304228633642197, + "step": 1040, + "valid_targets_mean": 5784.4, + "valid_targets_min": 3535 + }, + { + "epoch": 1.1822297679683078, + "grad_norm": 0.47445803593475144, + "learning_rate": 3.942793961895881e-05, + "loss": 0.1889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08120131492614746, + "step": 1045, + "valid_targets_mean": 4282.4, + "valid_targets_min": 633 + }, + { + "epoch": 1.187889077532541, + "grad_norm": 0.7469184138090547, + "learning_rate": 3.941446664722629e-05, + "loss": 0.1796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07243283092975616, + "step": 1050, + "valid_targets_mean": 3675.9, + "valid_targets_min": 1938 + }, + { + "epoch": 1.1935483870967742, + "grad_norm": 0.5187254028811755, + "learning_rate": 3.940083921746268e-05, + "loss": 0.1771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06537231057882309, + "step": 1055, + "valid_targets_mean": 2210.6, + "valid_targets_min": 585 + }, + { + "epoch": 1.1992076966610075, + "grad_norm": 0.46132685248945166, + "learning_rate": 3.938705743808538e-05, + "loss": 0.1637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09070291370153427, + "step": 1060, + "valid_targets_mean": 4892.1, + "valid_targets_min": 827 + }, + { + "epoch": 1.2048670062252405, + "grad_norm": 0.42107378234886195, + "learning_rate": 3.9373121418739765e-05, + "loss": 0.1929, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.051031287759542465, + "step": 1065, + "valid_targets_mean": 3665.1, + "valid_targets_min": 667 + }, + { + "epoch": 1.2105263157894737, + "grad_norm": 0.5998034252677006, + "learning_rate": 3.935903127029832e-05, + "loss": 0.175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09086823463439941, + "step": 1070, + "valid_targets_mean": 4395.1, + "valid_targets_min": 915 + }, + { + "epoch": 1.216185625353707, + "grad_norm": 0.4919420688347187, + "learning_rate": 3.934478710485975e-05, + "loss": 0.1599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07989349216222763, + "step": 1075, + "valid_targets_mean": 5468.0, + "valid_targets_min": 3413 + }, + { + "epoch": 1.22184493491794, + "grad_norm": 0.44931308740040277, + "learning_rate": 3.9330389035748086e-05, + "loss": 0.1757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10464761406183243, + "step": 1080, + "valid_targets_mean": 3901.5, + "valid_targets_min": 795 + }, + { + "epoch": 1.227504244482173, + "grad_norm": 0.4841132192848014, + "learning_rate": 3.9315837177511816e-05, + "loss": 0.1683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09728465229272842, + "step": 1085, + "valid_targets_mean": 4093.4, + "valid_targets_min": 802 + }, + { + "epoch": 1.2331635540464063, + "grad_norm": 0.5050118582970391, + "learning_rate": 3.93011316459229e-05, + "loss": 0.1791, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08465985208749771, + "step": 1090, + "valid_targets_mean": 4333.2, + "valid_targets_min": 782 + }, + { + "epoch": 1.2388228636106395, + "grad_norm": 0.47006483474037947, + "learning_rate": 3.928627255797593e-05, + "loss": 0.1707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08035241812467575, + "step": 1095, + "valid_targets_mean": 4582.1, + "valid_targets_min": 2092 + }, + { + "epoch": 1.2444821731748728, + "grad_norm": 0.5168075468571941, + "learning_rate": 3.927126003188717e-05, + "loss": 0.165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09381362795829773, + "step": 1100, + "valid_targets_mean": 3922.1, + "valid_targets_min": 580 + }, + { + "epoch": 1.2501414827391057, + "grad_norm": 0.42937223172023603, + "learning_rate": 3.925609418709358e-05, + "loss": 0.1615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09360407292842865, + "step": 1105, + "valid_targets_mean": 4937.6, + "valid_targets_min": 1537 + }, + { + "epoch": 1.255800792303339, + "grad_norm": 0.43618534012315924, + "learning_rate": 3.924077514425193e-05, + "loss": 0.1812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10003431141376495, + "step": 1110, + "valid_targets_mean": 4824.5, + "valid_targets_min": 2889 + }, + { + "epoch": 1.2614601018675722, + "grad_norm": 0.6241663145875258, + "learning_rate": 3.922530302523779e-05, + "loss": 0.1801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09008560329675674, + "step": 1115, + "valid_targets_mean": 5705.1, + "valid_targets_min": 1131 + }, + { + "epoch": 1.2671194114318054, + "grad_norm": 0.4627854367269658, + "learning_rate": 3.920967795314456e-05, + "loss": 0.1746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07201608270406723, + "step": 1120, + "valid_targets_mean": 4492.5, + "valid_targets_min": 637 + }, + { + "epoch": 1.2727787209960386, + "grad_norm": 0.4939836561149181, + "learning_rate": 3.919390005228254e-05, + "loss": 0.1846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10071797668933868, + "step": 1125, + "valid_targets_mean": 5061.6, + "valid_targets_min": 2471 + }, + { + "epoch": 1.2784380305602716, + "grad_norm": 0.5263181704509103, + "learning_rate": 3.9177969448177884e-05, + "loss": 0.1725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06983485072851181, + "step": 1130, + "valid_targets_mean": 5194.4, + "valid_targets_min": 1108 + }, + { + "epoch": 1.2840973401245048, + "grad_norm": 0.5333899612493772, + "learning_rate": 3.916188626757164e-05, + "loss": 0.1631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09519990533590317, + "step": 1135, + "valid_targets_mean": 3938.8, + "valid_targets_min": 15 + }, + { + "epoch": 1.289756649688738, + "grad_norm": 0.49894575234027866, + "learning_rate": 3.9145650638418724e-05, + "loss": 0.1699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07792861014604568, + "step": 1140, + "valid_targets_mean": 2669.5, + "valid_targets_min": 745 + }, + { + "epoch": 1.295415959252971, + "grad_norm": 0.5032211455364984, + "learning_rate": 3.91292626898869e-05, + "loss": 0.1797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10145407915115356, + "step": 1145, + "valid_targets_mean": 5544.5, + "valid_targets_min": 2553 + }, + { + "epoch": 1.3010752688172043, + "grad_norm": 0.5082977470620306, + "learning_rate": 3.911272255235576e-05, + "loss": 0.1539, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09474017471075058, + "step": 1150, + "valid_targets_mean": 4219.9, + "valid_targets_min": 785 + }, + { + "epoch": 1.3067345783814375, + "grad_norm": 0.5500969855621471, + "learning_rate": 3.909603035741568e-05, + "loss": 0.1812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07981321960687637, + "step": 1155, + "valid_targets_mean": 3778.5, + "valid_targets_min": 1597 + }, + { + "epoch": 1.3123938879456707, + "grad_norm": 0.5510028221465421, + "learning_rate": 3.90791862378668e-05, + "loss": 0.1781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07977518439292908, + "step": 1160, + "valid_targets_mean": 3087.8, + "valid_targets_min": 701 + }, + { + "epoch": 1.318053197509904, + "grad_norm": 0.4163843482919556, + "learning_rate": 3.906219032771791e-05, + "loss": 0.1627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07316476106643677, + "step": 1165, + "valid_targets_mean": 4405.0, + "valid_targets_min": 1049 + }, + { + "epoch": 1.323712507074137, + "grad_norm": 0.5358329444753391, + "learning_rate": 3.904504276218545e-05, + "loss": 0.1636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08892858028411865, + "step": 1170, + "valid_targets_mean": 4614.6, + "valid_targets_min": 994 + }, + { + "epoch": 1.3293718166383701, + "grad_norm": 0.46086590526260945, + "learning_rate": 3.902774367769238e-05, + "loss": 0.1769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08416149020195007, + "step": 1175, + "valid_targets_mean": 5230.2, + "valid_targets_min": 2795 + }, + { + "epoch": 1.3350311262026033, + "grad_norm": 0.4751092105006969, + "learning_rate": 3.901029321186715e-05, + "loss": 0.1658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08622816205024719, + "step": 1180, + "valid_targets_mean": 3966.8, + "valid_targets_min": 1075 + }, + { + "epoch": 1.3406904357668363, + "grad_norm": 0.45549377694633447, + "learning_rate": 3.8992691503542526e-05, + "loss": 0.1744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1080380231142044, + "step": 1185, + "valid_targets_mean": 4794.5, + "valid_targets_min": 1492 + }, + { + "epoch": 1.3463497453310695, + "grad_norm": 0.5128912494876406, + "learning_rate": 3.8974938692754565e-05, + "loss": 0.1771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.047625429928302765, + "step": 1190, + "valid_targets_mean": 2920.6, + "valid_targets_min": 766 + }, + { + "epoch": 1.3520090548953028, + "grad_norm": 0.4850220234048233, + "learning_rate": 3.895703492074147e-05, + "loss": 0.1798, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06328266113996506, + "step": 1195, + "valid_targets_mean": 3986.6, + "valid_targets_min": 649 + }, + { + "epoch": 1.357668364459536, + "grad_norm": 0.4791338051890519, + "learning_rate": 3.893898032994244e-05, + "loss": 0.1643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08418089151382446, + "step": 1200, + "valid_targets_mean": 3799.8, + "valid_targets_min": 538 + }, + { + "epoch": 1.3633276740237692, + "grad_norm": 0.40539567154387013, + "learning_rate": 3.892077506399659e-05, + "loss": 0.1674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09177593886852264, + "step": 1205, + "valid_targets_mean": 5939.2, + "valid_targets_min": 3590 + }, + { + "epoch": 1.3689869835880022, + "grad_norm": 0.44924221339414133, + "learning_rate": 3.890241926774176e-05, + "loss": 0.1697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10084094107151031, + "step": 1210, + "valid_targets_mean": 5455.0, + "valid_targets_min": 2221 + }, + { + "epoch": 1.3746462931522354, + "grad_norm": 0.5121991774486875, + "learning_rate": 3.888391308721339e-05, + "loss": 0.1669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08851577341556549, + "step": 1215, + "valid_targets_mean": 3401.2, + "valid_targets_min": 780 + }, + { + "epoch": 1.3803056027164686, + "grad_norm": 0.42009287730737793, + "learning_rate": 3.8865256669643345e-05, + "loss": 0.1724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05673500895500183, + "step": 1220, + "valid_targets_mean": 3161.4, + "valid_targets_min": 1098 + }, + { + "epoch": 1.3859649122807016, + "grad_norm": 0.46750169212645165, + "learning_rate": 3.884645016345876e-05, + "loss": 0.189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08093661069869995, + "step": 1225, + "valid_targets_mean": 4127.6, + "valid_targets_min": 831 + }, + { + "epoch": 1.3916242218449348, + "grad_norm": 0.5477737176509025, + "learning_rate": 3.882749371828084e-05, + "loss": 0.1782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10087968409061432, + "step": 1230, + "valid_targets_mean": 4692.5, + "valid_targets_min": 595 + }, + { + "epoch": 1.397283531409168, + "grad_norm": 1.078980263858723, + "learning_rate": 3.880838748492367e-05, + "loss": 0.1705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09843247383832932, + "step": 1235, + "valid_targets_mean": 4910.5, + "valid_targets_min": 1290 + }, + { + "epoch": 1.4029428409734013, + "grad_norm": 0.4765403366834978, + "learning_rate": 3.878913161539304e-05, + "loss": 0.172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07836732268333435, + "step": 1240, + "valid_targets_mean": 4141.2, + "valid_targets_min": 1072 + }, + { + "epoch": 1.4086021505376345, + "grad_norm": 0.4742515019966279, + "learning_rate": 3.876972626288521e-05, + "loss": 0.1698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07745373994112015, + "step": 1245, + "valid_targets_mean": 4078.5, + "valid_targets_min": 911 + }, + { + "epoch": 1.4142614601018675, + "grad_norm": 0.40113156730105887, + "learning_rate": 3.87501715817857e-05, + "loss": 0.173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0976414829492569, + "step": 1250, + "valid_targets_mean": 5969.4, + "valid_targets_min": 3854 + }, + { + "epoch": 1.4199207696661007, + "grad_norm": 0.4407783027341501, + "learning_rate": 3.873046772766806e-05, + "loss": 0.1588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04508190602064133, + "step": 1255, + "valid_targets_mean": 2916.4, + "valid_targets_min": 805 + }, + { + "epoch": 1.425580079230334, + "grad_norm": 0.4819613719574391, + "learning_rate": 3.871061485729264e-05, + "loss": 0.1828, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09332561492919922, + "step": 1260, + "valid_targets_mean": 3906.0, + "valid_targets_min": 458 + }, + { + "epoch": 1.4312393887945671, + "grad_norm": 0.45785335612246114, + "learning_rate": 3.8690613128605325e-05, + "loss": 0.165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04996608942747116, + "step": 1265, + "valid_targets_mean": 2526.4, + "valid_targets_min": 605 + }, + { + "epoch": 1.4368986983588004, + "grad_norm": 0.47893211444006956, + "learning_rate": 3.867046270073631e-05, + "loss": 0.1669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11241722106933594, + "step": 1270, + "valid_targets_mean": 4426.0, + "valid_targets_min": 1053 + }, + { + "epoch": 1.4425580079230333, + "grad_norm": 0.4051415862996819, + "learning_rate": 3.8650163733998796e-05, + "loss": 0.1791, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.099061019718647, + "step": 1275, + "valid_targets_mean": 5980.4, + "valid_targets_min": 3069 + }, + { + "epoch": 1.4482173174872666, + "grad_norm": 0.43193351469522073, + "learning_rate": 3.862971638988774e-05, + "loss": 0.1718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07562971115112305, + "step": 1280, + "valid_targets_mean": 4382.0, + "valid_targets_min": 814 + }, + { + "epoch": 1.4538766270514998, + "grad_norm": 0.5562865609979454, + "learning_rate": 3.860912083107856e-05, + "loss": 0.1633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10491952300071716, + "step": 1285, + "valid_targets_mean": 5855.9, + "valid_targets_min": 3745 + }, + { + "epoch": 1.4595359366157328, + "grad_norm": 0.4240431603380037, + "learning_rate": 3.8588377221425846e-05, + "loss": 0.1681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09419338405132294, + "step": 1290, + "valid_targets_mean": 3856.1, + "valid_targets_min": 917 + }, + { + "epoch": 1.465195246179966, + "grad_norm": 0.4256429369547614, + "learning_rate": 3.8567485725962054e-05, + "loss": 0.1615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08709464967250824, + "step": 1295, + "valid_targets_mean": 4358.2, + "valid_targets_min": 592 + }, + { + "epoch": 1.4708545557441992, + "grad_norm": 0.43296296073396, + "learning_rate": 3.8546446510896196e-05, + "loss": 0.1594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08416568487882614, + "step": 1300, + "valid_targets_mean": 4426.6, + "valid_targets_min": 1044 + }, + { + "epoch": 1.4765138653084324, + "grad_norm": 0.4757029215157015, + "learning_rate": 3.8525259743612504e-05, + "loss": 0.176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07040561735630035, + "step": 1305, + "valid_targets_mean": 3371.6, + "valid_targets_min": 618 + }, + { + "epoch": 1.4821731748726656, + "grad_norm": 0.4536305794203802, + "learning_rate": 3.850392559266912e-05, + "loss": 0.1672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08284276723861694, + "step": 1310, + "valid_targets_mean": 4372.6, + "valid_targets_min": 1245 + }, + { + "epoch": 1.4878324844368986, + "grad_norm": 0.4140956803323456, + "learning_rate": 3.848244422779675e-05, + "loss": 0.1743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08564776182174683, + "step": 1315, + "valid_targets_mean": 5399.5, + "valid_targets_min": 2506 + }, + { + "epoch": 1.4934917940011319, + "grad_norm": 0.42493198206523786, + "learning_rate": 3.8460815819897275e-05, + "loss": 0.1633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08379765599966049, + "step": 1320, + "valid_targets_mean": 4486.0, + "valid_targets_min": 788 + }, + { + "epoch": 1.499151103565365, + "grad_norm": 0.47021085136955376, + "learning_rate": 3.8439040541042477e-05, + "loss": 0.1707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08951611816883087, + "step": 1325, + "valid_targets_mean": 4878.9, + "valid_targets_min": 645 + }, + { + "epoch": 1.504810413129598, + "grad_norm": 0.4262433511521074, + "learning_rate": 3.8417118564472566e-05, + "loss": 0.1537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09468840062618256, + "step": 1330, + "valid_targets_mean": 4579.5, + "valid_targets_min": 966 + }, + { + "epoch": 1.5104697226938315, + "grad_norm": 0.5679694081053924, + "learning_rate": 3.8395050064594886e-05, + "loss": 0.16, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08240014314651489, + "step": 1335, + "valid_targets_mean": 5132.5, + "valid_targets_min": 3765 + }, + { + "epoch": 1.5161290322580645, + "grad_norm": 0.49765931740864866, + "learning_rate": 3.8372835216982474e-05, + "loss": 0.1744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0933043360710144, + "step": 1340, + "valid_targets_mean": 4363.1, + "valid_targets_min": 841 + }, + { + "epoch": 1.5217883418222977, + "grad_norm": 0.3839495020225596, + "learning_rate": 3.83504741983727e-05, + "loss": 0.1728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07548220455646515, + "step": 1345, + "valid_targets_mean": 5226.9, + "valid_targets_min": 792 + }, + { + "epoch": 1.527447651386531, + "grad_norm": 0.43898825980074324, + "learning_rate": 3.832796718666583e-05, + "loss": 0.1701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07292063534259796, + "step": 1350, + "valid_targets_mean": 4563.8, + "valid_targets_min": 2482 + }, + { + "epoch": 1.533106960950764, + "grad_norm": 0.48518356393795203, + "learning_rate": 3.830531436092363e-05, + "loss": 0.1577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07241171598434448, + "step": 1355, + "valid_targets_mean": 4070.2, + "valid_targets_min": 885 + }, + { + "epoch": 1.5387662705149971, + "grad_norm": 0.45236807786393446, + "learning_rate": 3.828251590136795e-05, + "loss": 0.1676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06732207536697388, + "step": 1360, + "valid_targets_mean": 4394.2, + "valid_targets_min": 838 + }, + { + "epoch": 1.5444255800792304, + "grad_norm": 0.39599418094090755, + "learning_rate": 3.8259571989379256e-05, + "loss": 0.1762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06596770882606506, + "step": 1365, + "valid_targets_mean": 4575.8, + "valid_targets_min": 1614 + }, + { + "epoch": 1.5500848896434634, + "grad_norm": 0.5129844028064013, + "learning_rate": 3.8236482807495214e-05, + "loss": 0.1764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09706057608127594, + "step": 1370, + "valid_targets_mean": 4956.1, + "valid_targets_min": 3160 + }, + { + "epoch": 1.5557441992076968, + "grad_norm": 0.6141381897581633, + "learning_rate": 3.8213248539409236e-05, + "loss": 0.1763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07557655870914459, + "step": 1375, + "valid_targets_mean": 2923.5, + "valid_targets_min": 446 + }, + { + "epoch": 1.5614035087719298, + "grad_norm": 0.47754545208990645, + "learning_rate": 3.8189869369969016e-05, + "loss": 0.175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10123506188392639, + "step": 1380, + "valid_targets_mean": 4382.2, + "valid_targets_min": 2584 + }, + { + "epoch": 1.567062818336163, + "grad_norm": 0.44248504366689034, + "learning_rate": 3.816634548517505e-05, + "loss": 0.1589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07783418893814087, + "step": 1385, + "valid_targets_mean": 4585.0, + "valid_targets_min": 1033 + }, + { + "epoch": 1.5727221279003962, + "grad_norm": 0.43030447121547316, + "learning_rate": 3.814267707217917e-05, + "loss": 0.1707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07308053970336914, + "step": 1390, + "valid_targets_mean": 4720.8, + "valid_targets_min": 919 + }, + { + "epoch": 1.5783814374646292, + "grad_norm": 0.4586930676443484, + "learning_rate": 3.8118864319283025e-05, + "loss": 0.1688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08720464259386063, + "step": 1395, + "valid_targets_mean": 4623.0, + "valid_targets_min": 3063 + }, + { + "epoch": 1.5840407470288624, + "grad_norm": 0.42051583620393174, + "learning_rate": 3.809490741593665e-05, + "loss": 0.1705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06851358711719513, + "step": 1400, + "valid_targets_mean": 4098.1, + "valid_targets_min": 942 + }, + { + "epoch": 1.5897000565930957, + "grad_norm": 0.426486860854374, + "learning_rate": 3.807080655273689e-05, + "loss": 0.1768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09186510741710663, + "step": 1405, + "valid_targets_mean": 4794.6, + "valid_targets_min": 1337 + }, + { + "epoch": 1.5953593661573287, + "grad_norm": 0.5092191248922854, + "learning_rate": 3.8046561921425895e-05, + "loss": 0.1673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10097256302833557, + "step": 1410, + "valid_targets_mean": 4049.1, + "valid_targets_min": 892 + }, + { + "epoch": 1.601018675721562, + "grad_norm": 0.4959017826333464, + "learning_rate": 3.802217371488964e-05, + "loss": 0.1674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10378652065992355, + "step": 1415, + "valid_targets_mean": 4263.1, + "valid_targets_min": 784 + }, + { + "epoch": 1.606677985285795, + "grad_norm": 0.4334448110285005, + "learning_rate": 3.799764212715633e-05, + "loss": 0.1669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08897187560796738, + "step": 1420, + "valid_targets_mean": 4737.1, + "valid_targets_min": 616 + }, + { + "epoch": 1.6123372948500283, + "grad_norm": 0.4431249471264606, + "learning_rate": 3.7972967353394906e-05, + "loss": 0.1663, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08749301731586456, + "step": 1425, + "valid_targets_mean": 4490.6, + "valid_targets_min": 1494 + }, + { + "epoch": 1.6179966044142615, + "grad_norm": 0.471697864372509, + "learning_rate": 3.794814958991346e-05, + "loss": 0.1629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07814671844244003, + "step": 1430, + "valid_targets_mean": 4200.8, + "valid_targets_min": 799 + }, + { + "epoch": 1.6236559139784945, + "grad_norm": 0.42695267108722634, + "learning_rate": 3.792318903415769e-05, + "loss": 0.1691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08809150010347366, + "step": 1435, + "valid_targets_mean": 4149.5, + "valid_targets_min": 1979 + }, + { + "epoch": 1.629315223542728, + "grad_norm": 0.46188114289897536, + "learning_rate": 3.789808588470932e-05, + "loss": 0.1738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13377422094345093, + "step": 1440, + "valid_targets_mean": 5021.4, + "valid_targets_min": 987 + }, + { + "epoch": 1.634974533106961, + "grad_norm": 0.4614437449542694, + "learning_rate": 3.787284034128453e-05, + "loss": 0.1758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1316135674715042, + "step": 1445, + "valid_targets_mean": 4855.9, + "valid_targets_min": 2768 + }, + { + "epoch": 1.6406338426711942, + "grad_norm": 0.42401849750170334, + "learning_rate": 3.784745260473235e-05, + "loss": 0.1907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09326368570327759, + "step": 1450, + "valid_targets_mean": 4557.5, + "valid_targets_min": 1404 + }, + { + "epoch": 1.6462931522354274, + "grad_norm": 0.4866900107370593, + "learning_rate": 3.782192287703309e-05, + "loss": 0.1678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09447487443685532, + "step": 1455, + "valid_targets_mean": 4396.1, + "valid_targets_min": 681 + }, + { + "epoch": 1.6519524617996604, + "grad_norm": 0.5670329845005558, + "learning_rate": 3.7796251361296695e-05, + "loss": 0.1659, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08563503623008728, + "step": 1460, + "valid_targets_mean": 4477.1, + "valid_targets_min": 970 + }, + { + "epoch": 1.6576117713638936, + "grad_norm": 0.48597443173722327, + "learning_rate": 3.777043826176117e-05, + "loss": 0.162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10906068235635757, + "step": 1465, + "valid_targets_mean": 4699.4, + "valid_targets_min": 1785 + }, + { + "epoch": 1.6632710809281268, + "grad_norm": 0.43633695396844524, + "learning_rate": 3.7744483783790924e-05, + "loss": 0.1678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0824105441570282, + "step": 1470, + "valid_targets_mean": 4050.4, + "valid_targets_min": 713 + }, + { + "epoch": 1.6689303904923598, + "grad_norm": 0.45975620063234174, + "learning_rate": 3.771838813387516e-05, + "loss": 0.1717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06895600259304047, + "step": 1475, + "valid_targets_mean": 2748.8, + "valid_targets_min": 518 + }, + { + "epoch": 1.6745897000565932, + "grad_norm": 0.47116502897375734, + "learning_rate": 3.7692151519626196e-05, + "loss": 0.1721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10219314694404602, + "step": 1480, + "valid_targets_mean": 4644.0, + "valid_targets_min": 1718 + }, + { + "epoch": 1.6802490096208262, + "grad_norm": 0.437534716359976, + "learning_rate": 3.766577414977786e-05, + "loss": 0.1638, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08322703838348389, + "step": 1485, + "valid_targets_mean": 3599.5, + "valid_targets_min": 702 + }, + { + "epoch": 1.6859083191850595, + "grad_norm": 0.4609434817876175, + "learning_rate": 3.763925623418379e-05, + "loss": 0.1797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09441813826560974, + "step": 1490, + "valid_targets_mean": 4214.4, + "valid_targets_min": 707 + }, + { + "epoch": 1.6915676287492927, + "grad_norm": 0.4889953026498548, + "learning_rate": 3.7612597983815797e-05, + "loss": 0.1636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0672110915184021, + "step": 1495, + "valid_targets_mean": 2605.5, + "valid_targets_min": 1006 + }, + { + "epoch": 1.6972269383135257, + "grad_norm": 0.4192839007012644, + "learning_rate": 3.7585799610762166e-05, + "loss": 0.1574, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07656887918710709, + "step": 1500, + "valid_targets_mean": 4995.6, + "valid_targets_min": 3152 + }, + { + "epoch": 1.7028862478777589, + "grad_norm": 0.4089458973289831, + "learning_rate": 3.755886132822596e-05, + "loss": 0.1549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07222750782966614, + "step": 1505, + "valid_targets_mean": 4711.1, + "valid_targets_min": 2985 + }, + { + "epoch": 1.708545557441992, + "grad_norm": 0.4476059201111342, + "learning_rate": 3.753178335052335e-05, + "loss": 0.1724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06474441289901733, + "step": 1510, + "valid_targets_mean": 4263.2, + "valid_targets_min": 907 + }, + { + "epoch": 1.714204867006225, + "grad_norm": 0.40230431448159, + "learning_rate": 3.750456589308189e-05, + "loss": 0.1673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06748899817466736, + "step": 1515, + "valid_targets_mean": 3847.9, + "valid_targets_min": 1053 + }, + { + "epoch": 1.7198641765704585, + "grad_norm": 0.45552405093533893, + "learning_rate": 3.7477209172438824e-05, + "loss": 0.1793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1874983310699463, + "step": 1520, + "valid_targets_mean": 4535.4, + "valid_targets_min": 3324 + }, + { + "epoch": 1.7255234861346915, + "grad_norm": 0.435468676442549, + "learning_rate": 3.744971340623932e-05, + "loss": 0.1757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10153517127037048, + "step": 1525, + "valid_targets_mean": 5321.5, + "valid_targets_min": 3033 + }, + { + "epoch": 1.7311827956989247, + "grad_norm": 0.4203152226427508, + "learning_rate": 3.74220788132348e-05, + "loss": 0.1627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0713239312171936, + "step": 1530, + "valid_targets_mean": 4939.5, + "valid_targets_min": 539 + }, + { + "epoch": 1.736842105263158, + "grad_norm": 0.3768752280829058, + "learning_rate": 3.739430561328116e-05, + "loss": 0.1717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07466333359479904, + "step": 1535, + "valid_targets_mean": 4691.0, + "valid_targets_min": 866 + }, + { + "epoch": 1.742501414827391, + "grad_norm": 0.5000948400819923, + "learning_rate": 3.736639402733699e-05, + "loss": 0.1576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09439294785261154, + "step": 1540, + "valid_targets_mean": 4558.9, + "valid_targets_min": 855 + }, + { + "epoch": 1.7481607243916242, + "grad_norm": 0.45359918218466555, + "learning_rate": 3.733834427746192e-05, + "loss": 0.1681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06033856421709061, + "step": 1545, + "valid_targets_mean": 3641.6, + "valid_targets_min": 702 + }, + { + "epoch": 1.7538200339558574, + "grad_norm": 0.40621155826121474, + "learning_rate": 3.7310156586814736e-05, + "loss": 0.1565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08569200336933136, + "step": 1550, + "valid_targets_mean": 5872.5, + "valid_targets_min": 538 + }, + { + "epoch": 1.7594793435200904, + "grad_norm": 0.5537632772731778, + "learning_rate": 3.7281831179651674e-05, + "loss": 0.1764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08237239718437195, + "step": 1555, + "valid_targets_mean": 4151.4, + "valid_targets_min": 834 + }, + { + "epoch": 1.7651386530843238, + "grad_norm": 0.4045864201373758, + "learning_rate": 3.725336828132462e-05, + "loss": 0.1546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09154015779495239, + "step": 1560, + "valid_targets_mean": 5237.5, + "valid_targets_min": 3287 + }, + { + "epoch": 1.7707979626485568, + "grad_norm": 0.4429464646778792, + "learning_rate": 3.722476811827931e-05, + "loss": 0.1888, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07061810791492462, + "step": 1565, + "valid_targets_mean": 4083.9, + "valid_targets_min": 886 + }, + { + "epoch": 1.77645727221279, + "grad_norm": 0.4329150724172389, + "learning_rate": 3.719603091805354e-05, + "loss": 0.1703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06012389063835144, + "step": 1570, + "valid_targets_mean": 4519.1, + "valid_targets_min": 757 + }, + { + "epoch": 1.7821165817770233, + "grad_norm": 0.4532667563787616, + "learning_rate": 3.716715690927534e-05, + "loss": 0.1643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06587879359722137, + "step": 1575, + "valid_targets_mean": 3429.0, + "valid_targets_min": 652 + }, + { + "epoch": 1.7877758913412563, + "grad_norm": 0.4602869186766888, + "learning_rate": 3.713814632166117e-05, + "loss": 0.1737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08349546790122986, + "step": 1580, + "valid_targets_mean": 4580.9, + "valid_targets_min": 813 + }, + { + "epoch": 1.7934352009054897, + "grad_norm": 0.43479514474501096, + "learning_rate": 3.7108999386014094e-05, + "loss": 0.1611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08423180878162384, + "step": 1585, + "valid_targets_mean": 4774.0, + "valid_targets_min": 1293 + }, + { + "epoch": 1.7990945104697227, + "grad_norm": 0.38480018852004316, + "learning_rate": 3.707971633422192e-05, + "loss": 0.165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06913229823112488, + "step": 1590, + "valid_targets_mean": 4858.5, + "valid_targets_min": 2530 + }, + { + "epoch": 1.804753820033956, + "grad_norm": 0.37920991451233066, + "learning_rate": 3.705029739925539e-05, + "loss": 0.169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05300310254096985, + "step": 1595, + "valid_targets_mean": 4163.0, + "valid_targets_min": 3420 + }, + { + "epoch": 1.8104131295981891, + "grad_norm": 0.5121283079990696, + "learning_rate": 3.702074281516629e-05, + "loss": 0.1593, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09372977912425995, + "step": 1600, + "valid_targets_mean": 3409.9, + "valid_targets_min": 888 + }, + { + "epoch": 1.8160724391624221, + "grad_norm": 0.7434439149630447, + "learning_rate": 3.699105281708562e-05, + "loss": 0.1671, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07295916974544525, + "step": 1605, + "valid_targets_mean": 2680.2, + "valid_targets_min": 609 + }, + { + "epoch": 1.8217317487266553, + "grad_norm": 0.3515437166590083, + "learning_rate": 3.69612276412217e-05, + "loss": 0.1644, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09418608248233795, + "step": 1610, + "valid_targets_mean": 5993.4, + "valid_targets_min": 1172 + }, + { + "epoch": 1.8273910582908885, + "grad_norm": 0.4124601295522288, + "learning_rate": 3.693126752485833e-05, + "loss": 0.1678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0780620202422142, + "step": 1615, + "valid_targets_mean": 4195.5, + "valid_targets_min": 1827 + }, + { + "epoch": 1.8330503678551215, + "grad_norm": 0.39514053571943636, + "learning_rate": 3.6901172706352804e-05, + "loss": 0.1628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09082343429327011, + "step": 1620, + "valid_targets_mean": 5891.2, + "valid_targets_min": 3395 + }, + { + "epoch": 1.838709677419355, + "grad_norm": 0.4776704377883989, + "learning_rate": 3.687094342513416e-05, + "loss": 0.1514, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10870778560638428, + "step": 1625, + "valid_targets_mean": 4887.2, + "valid_targets_min": 696 + }, + { + "epoch": 1.844368986983588, + "grad_norm": 0.44658165498993846, + "learning_rate": 3.6840579921701155e-05, + "loss": 0.1748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03744108974933624, + "step": 1630, + "valid_targets_mean": 2857.6, + "valid_targets_min": 519 + }, + { + "epoch": 1.8500282965478212, + "grad_norm": 0.39639049398686715, + "learning_rate": 3.68100824376204e-05, + "loss": 0.1663, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08571626245975494, + "step": 1635, + "valid_targets_mean": 5266.9, + "valid_targets_min": 2710 + }, + { + "epoch": 1.8556876061120544, + "grad_norm": 0.46124202068553993, + "learning_rate": 3.6779451215524425e-05, + "loss": 0.1776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12593677639961243, + "step": 1640, + "valid_targets_mean": 4627.6, + "valid_targets_min": 1099 + }, + { + "epoch": 1.8613469156762874, + "grad_norm": 0.3974760577524209, + "learning_rate": 3.6748686499109784e-05, + "loss": 0.1604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0651841014623642, + "step": 1645, + "valid_targets_mean": 3087.1, + "valid_targets_min": 641 + }, + { + "epoch": 1.8670062252405206, + "grad_norm": 0.44533365576743184, + "learning_rate": 3.6717788533135056e-05, + "loss": 0.1715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0940522775053978, + "step": 1650, + "valid_targets_mean": 4810.6, + "valid_targets_min": 549 + }, + { + "epoch": 1.8726655348047538, + "grad_norm": 0.3828204365917702, + "learning_rate": 3.6686757563418945e-05, + "loss": 0.1658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05665905773639679, + "step": 1655, + "valid_targets_mean": 4366.4, + "valid_targets_min": 1990 + }, + { + "epoch": 1.8783248443689868, + "grad_norm": 0.41482926244303475, + "learning_rate": 3.665559383683832e-05, + "loss": 0.1767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09432278573513031, + "step": 1660, + "valid_targets_mean": 5099.0, + "valid_targets_min": 3104 + }, + { + "epoch": 1.8839841539332203, + "grad_norm": 0.46641172087648614, + "learning_rate": 3.6624297601326205e-05, + "loss": 0.1694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0752536952495575, + "step": 1665, + "valid_targets_mean": 3882.4, + "valid_targets_min": 606 + }, + { + "epoch": 1.8896434634974533, + "grad_norm": 0.40809508030574715, + "learning_rate": 3.659286910586988e-05, + "loss": 0.1685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09947772324085236, + "step": 1670, + "valid_targets_mean": 5819.4, + "valid_targets_min": 2463 + }, + { + "epoch": 1.8953027730616865, + "grad_norm": 0.46697262961426633, + "learning_rate": 3.656130860050883e-05, + "loss": 0.177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1176053062081337, + "step": 1675, + "valid_targets_mean": 5384.8, + "valid_targets_min": 2999 + }, + { + "epoch": 1.9009620826259197, + "grad_norm": 0.40772178798520387, + "learning_rate": 3.652961633633282e-05, + "loss": 0.1769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07497359812259674, + "step": 1680, + "valid_targets_mean": 4524.5, + "valid_targets_min": 2422 + }, + { + "epoch": 1.9066213921901527, + "grad_norm": 0.48410402681820747, + "learning_rate": 3.649779256547984e-05, + "loss": 0.151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11213832348585129, + "step": 1685, + "valid_targets_mean": 5447.1, + "valid_targets_min": 1306 + }, + { + "epoch": 1.912280701754386, + "grad_norm": 0.44494313417092723, + "learning_rate": 3.6465837541134114e-05, + "loss": 0.1646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06522098928689957, + "step": 1690, + "valid_targets_mean": 3329.4, + "valid_targets_min": 582 + }, + { + "epoch": 1.9179400113186191, + "grad_norm": 0.4159935936201916, + "learning_rate": 3.643375151752414e-05, + "loss": 0.1609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09458167850971222, + "step": 1695, + "valid_targets_mean": 4523.0, + "valid_targets_min": 2527 + }, + { + "epoch": 1.9235993208828521, + "grad_norm": 0.46268831742419625, + "learning_rate": 3.6401534749920566e-05, + "loss": 0.1641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09030048549175262, + "step": 1700, + "valid_targets_mean": 4302.6, + "valid_targets_min": 799 + }, + { + "epoch": 1.9292586304470856, + "grad_norm": 0.4121471047740769, + "learning_rate": 3.636918749463426e-05, + "loss": 0.1583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10419407486915588, + "step": 1705, + "valid_targets_mean": 4844.0, + "valid_targets_min": 3186 + }, + { + "epoch": 1.9349179400113186, + "grad_norm": 0.45760821825741604, + "learning_rate": 3.633671000901422e-05, + "loss": 0.17, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08946783095598221, + "step": 1710, + "valid_targets_mean": 4509.2, + "valid_targets_min": 803 + }, + { + "epoch": 1.9405772495755518, + "grad_norm": 0.39939701382928094, + "learning_rate": 3.63041025514455e-05, + "loss": 0.1787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05140351504087448, + "step": 1715, + "valid_targets_mean": 3167.9, + "valid_targets_min": 804 + }, + { + "epoch": 1.946236559139785, + "grad_norm": 0.44092257279546754, + "learning_rate": 3.627136538134723e-05, + "loss": 0.1588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0948881134390831, + "step": 1720, + "valid_targets_mean": 5175.4, + "valid_targets_min": 593 + }, + { + "epoch": 1.951895868704018, + "grad_norm": 0.3766369899070683, + "learning_rate": 3.623849875917049e-05, + "loss": 0.1577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06955253332853317, + "step": 1725, + "valid_targets_mean": 4416.2, + "valid_targets_min": 1539 + }, + { + "epoch": 1.9575551782682514, + "grad_norm": 0.45005811542410384, + "learning_rate": 3.620550294639625e-05, + "loss": 0.1518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10431881248950958, + "step": 1730, + "valid_targets_mean": 4239.8, + "valid_targets_min": 731 + }, + { + "epoch": 1.9632144878324844, + "grad_norm": 0.4354013196495527, + "learning_rate": 3.6172378205533316e-05, + "loss": 0.1731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07305517792701721, + "step": 1735, + "valid_targets_mean": 4305.5, + "valid_targets_min": 488 + }, + { + "epoch": 1.9688737973967176, + "grad_norm": 0.5345446987710915, + "learning_rate": 3.613912480011621e-05, + "loss": 0.1655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05507698655128479, + "step": 1740, + "valid_targets_mean": 4315.2, + "valid_targets_min": 726 + }, + { + "epoch": 1.9745331069609509, + "grad_norm": 0.3931017359709917, + "learning_rate": 3.610574299470308e-05, + "loss": 0.1722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06514061987400055, + "step": 1745, + "valid_targets_mean": 4905.4, + "valid_targets_min": 2774 + }, + { + "epoch": 1.9801924165251839, + "grad_norm": 0.4084676016371109, + "learning_rate": 3.6072233054873634e-05, + "loss": 0.1581, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05309581384062767, + "step": 1750, + "valid_targets_mean": 3293.2, + "valid_targets_min": 875 + }, + { + "epoch": 1.985851726089417, + "grad_norm": 0.40719947036553433, + "learning_rate": 3.6038595247226946e-05, + "loss": 0.1684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05154433846473694, + "step": 1755, + "valid_targets_mean": 4403.5, + "valid_targets_min": 3483 + }, + { + "epoch": 1.9915110356536503, + "grad_norm": 0.42152391622926233, + "learning_rate": 3.600482983937943e-05, + "loss": 0.164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10693687945604324, + "step": 1760, + "valid_targets_mean": 4542.2, + "valid_targets_min": 965 + }, + { + "epoch": 1.9971703452178833, + "grad_norm": 0.468669752313274, + "learning_rate": 3.597093709996263e-05, + "loss": 0.158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09784398972988129, + "step": 1765, + "valid_targets_mean": 5566.4, + "valid_targets_min": 3179 + }, + { + "epoch": 2.0022637238256933, + "grad_norm": 0.38164634477438114, + "learning_rate": 3.593691729862114e-05, + "loss": 0.1563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05581476911902428, + "step": 1770, + "valid_targets_mean": 3847.4, + "valid_targets_min": 2834 + }, + { + "epoch": 2.0079230333899263, + "grad_norm": 0.4598605412377324, + "learning_rate": 3.5902770706010414e-05, + "loss": 0.1519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06393562257289886, + "step": 1775, + "valid_targets_mean": 3141.9, + "valid_targets_min": 857 + }, + { + "epoch": 2.0135823429541597, + "grad_norm": 0.4853998799481806, + "learning_rate": 3.586849759379466e-05, + "loss": 0.1534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0760306864976883, + "step": 1780, + "valid_targets_mean": 3283.2, + "valid_targets_min": 803 + }, + { + "epoch": 2.0192416525183927, + "grad_norm": 0.40950768715400654, + "learning_rate": 3.583409823464464e-05, + "loss": 0.1469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09658635407686234, + "step": 1785, + "valid_targets_mean": 5160.1, + "valid_targets_min": 1512 + }, + { + "epoch": 2.0249009620826257, + "grad_norm": 0.43022475076187133, + "learning_rate": 3.5799572902235506e-05, + "loss": 0.1587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06704957038164139, + "step": 1790, + "valid_targets_mean": 3235.5, + "valid_targets_min": 691 + }, + { + "epoch": 2.030560271646859, + "grad_norm": 0.45514971214272915, + "learning_rate": 3.576492187124465e-05, + "loss": 0.157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08505785465240479, + "step": 1795, + "valid_targets_mean": 3761.9, + "valid_targets_min": 665 + }, + { + "epoch": 2.036219581211092, + "grad_norm": 0.38830828219332025, + "learning_rate": 3.5730145417349486e-05, + "loss": 0.1446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07708819210529327, + "step": 1800, + "valid_targets_mean": 5218.2, + "valid_targets_min": 3969 + }, + { + "epoch": 2.0418788907753256, + "grad_norm": 0.40674797214880776, + "learning_rate": 3.569524381722527e-05, + "loss": 0.143, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04384655877947807, + "step": 1805, + "valid_targets_mean": 3537.0, + "valid_targets_min": 898 + }, + { + "epoch": 2.0475382003395586, + "grad_norm": 0.461951673040666, + "learning_rate": 3.5660217348542905e-05, + "loss": 0.1518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.057785239070653915, + "step": 1810, + "valid_targets_mean": 3609.1, + "valid_targets_min": 520 + }, + { + "epoch": 2.0531975099037916, + "grad_norm": 0.41783173366319004, + "learning_rate": 3.562506628996672e-05, + "loss": 0.1584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08158080279827118, + "step": 1815, + "valid_targets_mean": 5007.1, + "valid_targets_min": 813 + }, + { + "epoch": 2.058856819468025, + "grad_norm": 0.4217972179797958, + "learning_rate": 3.558979092115227e-05, + "loss": 0.1427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06197461113333702, + "step": 1820, + "valid_targets_mean": 3635.1, + "valid_targets_min": 947 + }, + { + "epoch": 2.064516129032258, + "grad_norm": 0.42597801368713495, + "learning_rate": 3.555439152274408e-05, + "loss": 0.1446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07840049266815186, + "step": 1825, + "valid_targets_mean": 5291.2, + "valid_targets_min": 3679 + }, + { + "epoch": 2.0701754385964914, + "grad_norm": 0.4731859258680113, + "learning_rate": 3.551886837637346e-05, + "loss": 0.148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0768074244260788, + "step": 1830, + "valid_targets_mean": 4411.2, + "valid_targets_min": 509 + }, + { + "epoch": 2.0758347481607244, + "grad_norm": 0.5118784640299195, + "learning_rate": 3.548322176465622e-05, + "loss": 0.1504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08978752791881561, + "step": 1835, + "valid_targets_mean": 5522.9, + "valid_targets_min": 457 + }, + { + "epoch": 2.0814940577249574, + "grad_norm": 0.5053935242898774, + "learning_rate": 3.544745197119042e-05, + "loss": 0.1357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.053898461163043976, + "step": 1840, + "valid_targets_mean": 4025.9, + "valid_targets_min": 835 + }, + { + "epoch": 2.087153367289191, + "grad_norm": 0.4754883031499429, + "learning_rate": 3.541155928055418e-05, + "loss": 0.146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07881324738264084, + "step": 1845, + "valid_targets_mean": 4451.1, + "valid_targets_min": 649 + }, + { + "epoch": 2.092812676853424, + "grad_norm": 0.3793192111099605, + "learning_rate": 3.537554397830331e-05, + "loss": 0.1483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0541934035718441, + "step": 1850, + "valid_targets_mean": 4109.9, + "valid_targets_min": 850 + }, + { + "epoch": 2.098471986417657, + "grad_norm": 0.392401216822069, + "learning_rate": 3.533940635096915e-05, + "loss": 0.1436, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06416421383619308, + "step": 1855, + "valid_targets_mean": 3447.4, + "valid_targets_min": 749 + }, + { + "epoch": 2.1041312959818903, + "grad_norm": 0.4284045829219447, + "learning_rate": 3.530314668605621e-05, + "loss": 0.1473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08904364705085754, + "step": 1860, + "valid_targets_mean": 5840.2, + "valid_targets_min": 3605 + }, + { + "epoch": 2.1097906055461233, + "grad_norm": 0.41891647003973426, + "learning_rate": 3.5266765272039895e-05, + "loss": 0.1428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06498528271913528, + "step": 1865, + "valid_targets_mean": 4160.9, + "valid_targets_min": 2234 + }, + { + "epoch": 2.1154499151103567, + "grad_norm": 0.4171242825363113, + "learning_rate": 3.523026239836426e-05, + "loss": 0.1531, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06435523927211761, + "step": 1870, + "valid_targets_mean": 4477.0, + "valid_targets_min": 842 + }, + { + "epoch": 2.1211092246745897, + "grad_norm": 0.48270456111758525, + "learning_rate": 3.5193638355439635e-05, + "loss": 0.1568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06888650357723236, + "step": 1875, + "valid_targets_mean": 3914.2, + "valid_targets_min": 703 + }, + { + "epoch": 2.1267685342388227, + "grad_norm": 0.4207467840251639, + "learning_rate": 3.515689343464038e-05, + "loss": 0.1373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.057466741651296616, + "step": 1880, + "valid_targets_mean": 3851.0, + "valid_targets_min": 831 + }, + { + "epoch": 2.132427843803056, + "grad_norm": 0.44643940581689956, + "learning_rate": 3.512002792830252e-05, + "loss": 0.145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10760434716939926, + "step": 1885, + "valid_targets_mean": 5129.9, + "valid_targets_min": 726 + }, + { + "epoch": 2.138087153367289, + "grad_norm": 0.4173071399920157, + "learning_rate": 3.508304212972145e-05, + "loss": 0.1662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09083765000104904, + "step": 1890, + "valid_targets_mean": 5537.1, + "valid_targets_min": 2262 + }, + { + "epoch": 2.143746462931522, + "grad_norm": 0.4131138429446764, + "learning_rate": 3.504593633314957e-05, + "loss": 0.1433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0552748367190361, + "step": 1895, + "valid_targets_mean": 4545.2, + "valid_targets_min": 3737 + }, + { + "epoch": 2.1494057724957556, + "grad_norm": 0.44696394601503486, + "learning_rate": 3.500871083379398e-05, + "loss": 0.1541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07711172103881836, + "step": 1900, + "valid_targets_mean": 3986.8, + "valid_targets_min": 961 + }, + { + "epoch": 2.1550650820599886, + "grad_norm": 0.4344083516085787, + "learning_rate": 3.497136592781411e-05, + "loss": 0.1602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0813417136669159, + "step": 1905, + "valid_targets_mean": 4604.6, + "valid_targets_min": 3314 + }, + { + "epoch": 2.160724391624222, + "grad_norm": 0.4177114929015115, + "learning_rate": 3.493390191231937e-05, + "loss": 0.154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06349954009056091, + "step": 1910, + "valid_targets_mean": 3767.6, + "valid_targets_min": 1365 + }, + { + "epoch": 2.166383701188455, + "grad_norm": 0.45479658530448075, + "learning_rate": 3.4896319085366764e-05, + "loss": 0.1538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06105777621269226, + "step": 1915, + "valid_targets_mean": 3140.4, + "valid_targets_min": 600 + }, + { + "epoch": 2.172043010752688, + "grad_norm": 0.42772927464599314, + "learning_rate": 3.485861774595857e-05, + "loss": 0.1398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06553034484386444, + "step": 1920, + "valid_targets_mean": 4002.6, + "valid_targets_min": 1369 + }, + { + "epoch": 2.1777023203169215, + "grad_norm": 0.39369813220146993, + "learning_rate": 3.482079819403991e-05, + "loss": 0.1457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04461285471916199, + "step": 1925, + "valid_targets_mean": 3621.9, + "valid_targets_min": 843 + }, + { + "epoch": 2.1833616298811545, + "grad_norm": 0.44147959388502445, + "learning_rate": 3.4782860730496385e-05, + "loss": 0.1389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07102487981319427, + "step": 1930, + "valid_targets_mean": 3977.1, + "valid_targets_min": 854 + }, + { + "epoch": 2.1890209394453874, + "grad_norm": 0.39371749205547546, + "learning_rate": 3.474480565715168e-05, + "loss": 0.1442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07203125208616257, + "step": 1935, + "valid_targets_mean": 4581.4, + "valid_targets_min": 3064 + }, + { + "epoch": 2.194680249009621, + "grad_norm": 0.40285017267387363, + "learning_rate": 3.470663327676517e-05, + "loss": 0.1465, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08734564483165741, + "step": 1940, + "valid_targets_mean": 5019.4, + "valid_targets_min": 796 + }, + { + "epoch": 2.200339558573854, + "grad_norm": 0.3948278031119211, + "learning_rate": 3.466834389302951e-05, + "loss": 0.1454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07350989431142807, + "step": 1945, + "valid_targets_mean": 4807.1, + "valid_targets_min": 2299 + }, + { + "epoch": 2.2059988681380873, + "grad_norm": 0.3970591777583386, + "learning_rate": 3.4629937810568185e-05, + "loss": 0.1545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04631432890892029, + "step": 1950, + "valid_targets_mean": 3908.2, + "valid_targets_min": 1020 + }, + { + "epoch": 2.2116581777023203, + "grad_norm": 0.4459356993331635, + "learning_rate": 3.459141533493315e-05, + "loss": 0.1555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0741652101278305, + "step": 1955, + "valid_targets_mean": 4168.5, + "valid_targets_min": 1226 + }, + { + "epoch": 2.2173174872665533, + "grad_norm": 0.4286919056205702, + "learning_rate": 3.455277677260231e-05, + "loss": 0.1464, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.046766247600317, + "step": 1960, + "valid_targets_mean": 3290.2, + "valid_targets_min": 543 + }, + { + "epoch": 2.2229767968307867, + "grad_norm": 0.45359873321900634, + "learning_rate": 3.451402243097721e-05, + "loss": 0.1504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07193360477685928, + "step": 1965, + "valid_targets_mean": 3724.8, + "valid_targets_min": 1095 + }, + { + "epoch": 2.2286361063950197, + "grad_norm": 0.4610896223901999, + "learning_rate": 3.4475152618380456e-05, + "loss": 0.147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07311922311782837, + "step": 1970, + "valid_targets_mean": 3807.1, + "valid_targets_min": 756 + }, + { + "epoch": 2.234295415959253, + "grad_norm": 0.45890949421852156, + "learning_rate": 3.443616764405334e-05, + "loss": 0.1466, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08454000949859619, + "step": 1975, + "valid_targets_mean": 5013.5, + "valid_targets_min": 3670 + }, + { + "epoch": 2.239954725523486, + "grad_norm": 0.3976276871306243, + "learning_rate": 3.4397067818153345e-05, + "loss": 0.1474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07557929307222366, + "step": 1980, + "valid_targets_mean": 5051.4, + "valid_targets_min": 2694 + }, + { + "epoch": 2.245614035087719, + "grad_norm": 0.5164643431672998, + "learning_rate": 3.435785345175173e-05, + "loss": 0.1481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08138914406299591, + "step": 1985, + "valid_targets_mean": 3482.2, + "valid_targets_min": 333 + }, + { + "epoch": 2.2512733446519526, + "grad_norm": 0.39235155232868035, + "learning_rate": 3.431852485683098e-05, + "loss": 0.146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0673598051071167, + "step": 1990, + "valid_targets_mean": 4212.6, + "valid_targets_min": 1091 + }, + { + "epoch": 2.2569326542161856, + "grad_norm": 0.42132759007295134, + "learning_rate": 3.4279082346282396e-05, + "loss": 0.1512, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07544170320034027, + "step": 1995, + "valid_targets_mean": 3662.5, + "valid_targets_min": 714 + }, + { + "epoch": 2.2625919637804186, + "grad_norm": 0.4371234214666715, + "learning_rate": 3.423952623390352e-05, + "loss": 0.1497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0634286105632782, + "step": 2000, + "valid_targets_mean": 3417.5, + "valid_targets_min": 1032 + }, + { + "epoch": 2.268251273344652, + "grad_norm": 0.4516073220780232, + "learning_rate": 3.419985683439574e-05, + "loss": 0.1437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08280553668737411, + "step": 2005, + "valid_targets_mean": 3872.0, + "valid_targets_min": 760 + }, + { + "epoch": 2.273910582908885, + "grad_norm": 0.4327606837018574, + "learning_rate": 3.416007446336172e-05, + "loss": 0.1601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07464776933193207, + "step": 2010, + "valid_targets_mean": 4113.1, + "valid_targets_min": 929 + }, + { + "epoch": 2.279569892473118, + "grad_norm": 0.45979213897938775, + "learning_rate": 3.4120179437302885e-05, + "loss": 0.1625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06329210102558136, + "step": 2015, + "valid_targets_mean": 3205.6, + "valid_targets_min": 838 + }, + { + "epoch": 2.2852292020373515, + "grad_norm": 0.5147030308340043, + "learning_rate": 3.408017207361696e-05, + "loss": 0.1604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06684980541467667, + "step": 2020, + "valid_targets_mean": 4525.1, + "valid_targets_min": 564 + }, + { + "epoch": 2.2908885116015845, + "grad_norm": 0.4273178592333042, + "learning_rate": 3.4040052690595376e-05, + "loss": 0.1555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0679871216416359, + "step": 2025, + "valid_targets_mean": 4480.8, + "valid_targets_min": 605 + }, + { + "epoch": 2.296547821165818, + "grad_norm": 0.44412466989906196, + "learning_rate": 3.399982160742079e-05, + "loss": 0.1682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09213119745254517, + "step": 2030, + "valid_targets_mean": 4025.1, + "valid_targets_min": 726 + }, + { + "epoch": 2.302207130730051, + "grad_norm": 0.44963401241502204, + "learning_rate": 3.3959479144164515e-05, + "loss": 0.1462, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0578080490231514, + "step": 2035, + "valid_targets_mean": 3141.4, + "valid_targets_min": 743 + }, + { + "epoch": 2.3078664402942843, + "grad_norm": 0.3757124295405019, + "learning_rate": 3.3919025621783996e-05, + "loss": 0.1488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09045130014419556, + "step": 2040, + "valid_targets_mean": 5871.6, + "valid_targets_min": 1173 + }, + { + "epoch": 2.3135257498585173, + "grad_norm": 0.41071737566918487, + "learning_rate": 3.387846136212022e-05, + "loss": 0.1646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09165093302726746, + "step": 2045, + "valid_targets_mean": 6308.2, + "valid_targets_min": 3031 + }, + { + "epoch": 2.3191850594227503, + "grad_norm": 0.4476630815546567, + "learning_rate": 3.3837786687895214e-05, + "loss": 0.153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11285878717899323, + "step": 2050, + "valid_targets_mean": 4769.9, + "valid_targets_min": 641 + }, + { + "epoch": 2.3248443689869838, + "grad_norm": 0.4393230995482543, + "learning_rate": 3.3797001922709416e-05, + "loss": 0.1524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06960204243659973, + "step": 2055, + "valid_targets_mean": 3908.6, + "valid_targets_min": 765 + }, + { + "epoch": 2.3305036785512168, + "grad_norm": 0.42068438511605677, + "learning_rate": 3.375610739103913e-05, + "loss": 0.153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06832148134708405, + "step": 2060, + "valid_targets_mean": 4344.8, + "valid_targets_min": 3004 + }, + { + "epoch": 2.3361629881154498, + "grad_norm": 0.3863254474498113, + "learning_rate": 3.371510341823396e-05, + "loss": 0.1613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06194869056344032, + "step": 2065, + "valid_targets_mean": 5143.4, + "valid_targets_min": 2901 + }, + { + "epoch": 2.341822297679683, + "grad_norm": 0.4091631273082714, + "learning_rate": 3.3673990330514197e-05, + "loss": 0.1474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07695840299129486, + "step": 2070, + "valid_targets_mean": 5505.4, + "valid_targets_min": 2700 + }, + { + "epoch": 2.347481607243916, + "grad_norm": 0.46076084442053256, + "learning_rate": 3.363276845496822e-05, + "loss": 0.1592, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08079289644956589, + "step": 2075, + "valid_targets_mean": 4149.0, + "valid_targets_min": 516 + }, + { + "epoch": 2.353140916808149, + "grad_norm": 0.39131668898512206, + "learning_rate": 3.359143811954992e-05, + "loss": 0.137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.059139885008335114, + "step": 2080, + "valid_targets_mean": 3908.0, + "valid_targets_min": 1167 + }, + { + "epoch": 2.3588002263723826, + "grad_norm": 0.4549116346582667, + "learning_rate": 3.354999965307606e-05, + "loss": 0.1528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0692763477563858, + "step": 2085, + "valid_targets_mean": 4050.5, + "valid_targets_min": 582 + }, + { + "epoch": 2.3644595359366156, + "grad_norm": 0.4217354798895774, + "learning_rate": 3.3508453385223684e-05, + "loss": 0.1473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.056441131979227066, + "step": 2090, + "valid_targets_mean": 3384.5, + "valid_targets_min": 533 + }, + { + "epoch": 2.370118845500849, + "grad_norm": 0.44033154527695634, + "learning_rate": 3.346679964652749e-05, + "loss": 0.1483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05215318500995636, + "step": 2095, + "valid_targets_mean": 3119.9, + "valid_targets_min": 511 + }, + { + "epoch": 2.375778155065082, + "grad_norm": 0.4182006017122001, + "learning_rate": 3.342503876837718e-05, + "loss": 0.1497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06546589732170105, + "step": 2100, + "valid_targets_mean": 3704.0, + "valid_targets_min": 575 + }, + { + "epoch": 2.381437464629315, + "grad_norm": 0.4212047746481986, + "learning_rate": 3.3383171083014856e-05, + "loss": 0.1513, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08931387960910797, + "step": 2105, + "valid_targets_mean": 4144.6, + "valid_targets_min": 555 + }, + { + "epoch": 2.3870967741935485, + "grad_norm": 0.4288474036977798, + "learning_rate": 3.3341196923532336e-05, + "loss": 0.1561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0849856287240982, + "step": 2110, + "valid_targets_mean": 4928.0, + "valid_targets_min": 865 + }, + { + "epoch": 2.3927560837577815, + "grad_norm": 0.38751229029431433, + "learning_rate": 3.329911662386855e-05, + "loss": 0.1552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05749806761741638, + "step": 2115, + "valid_targets_mean": 4271.8, + "valid_targets_min": 1232 + }, + { + "epoch": 2.398415393322015, + "grad_norm": 0.41618646597581344, + "learning_rate": 3.3256930518806845e-05, + "loss": 0.1515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06670378893613815, + "step": 2120, + "valid_targets_mean": 4715.2, + "valid_targets_min": 3320 + }, + { + "epoch": 2.404074702886248, + "grad_norm": 0.474401377226432, + "learning_rate": 3.321463894397235e-05, + "loss": 0.1411, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04895223304629326, + "step": 2125, + "valid_targets_mean": 2425.6, + "valid_targets_min": 567 + }, + { + "epoch": 2.409734012450481, + "grad_norm": 0.38685092605333266, + "learning_rate": 3.317224223582927e-05, + "loss": 0.152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07395574450492859, + "step": 2130, + "valid_targets_mean": 5728.9, + "valid_targets_min": 3951 + }, + { + "epoch": 2.4153933220147144, + "grad_norm": 0.3918290943068241, + "learning_rate": 3.312974073167825e-05, + "loss": 0.151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0735509991645813, + "step": 2135, + "valid_targets_mean": 5016.4, + "valid_targets_min": 3691 + }, + { + "epoch": 2.4210526315789473, + "grad_norm": 0.4629251712183381, + "learning_rate": 3.30871347696537e-05, + "loss": 0.1563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09464327245950699, + "step": 2140, + "valid_targets_mean": 4297.0, + "valid_targets_min": 1141 + }, + { + "epoch": 2.4267119411431803, + "grad_norm": 0.43001982073455564, + "learning_rate": 3.3044424688721016e-05, + "loss": 0.1551, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05989306792616844, + "step": 2145, + "valid_targets_mean": 3079.8, + "valid_targets_min": 622 + }, + { + "epoch": 2.432371250707414, + "grad_norm": 0.40529478100635535, + "learning_rate": 3.300161082867398e-05, + "loss": 0.1554, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05601631850004196, + "step": 2150, + "valid_targets_mean": 4970.9, + "valid_targets_min": 3761 + }, + { + "epoch": 2.4380305602716468, + "grad_norm": 0.4282952529198715, + "learning_rate": 3.295869353013204e-05, + "loss": 0.1485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04712842404842377, + "step": 2155, + "valid_targets_mean": 3694.9, + "valid_targets_min": 855 + }, + { + "epoch": 2.44368986983588, + "grad_norm": 0.4368949928760048, + "learning_rate": 3.291567313453754e-05, + "loss": 0.1651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09831711649894714, + "step": 2160, + "valid_targets_mean": 5302.5, + "valid_targets_min": 566 + }, + { + "epoch": 2.449349179400113, + "grad_norm": 0.4283967196738616, + "learning_rate": 3.287254998415308e-05, + "loss": 0.1528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05926726013422012, + "step": 2165, + "valid_targets_mean": 4209.5, + "valid_targets_min": 1235 + }, + { + "epoch": 2.455008488964346, + "grad_norm": 0.4601520310854504, + "learning_rate": 3.282932442205875e-05, + "loss": 0.1653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08058198541402817, + "step": 2170, + "valid_targets_mean": 4529.2, + "valid_targets_min": 3029 + }, + { + "epoch": 2.4606677985285796, + "grad_norm": 0.41455531743940477, + "learning_rate": 3.2785996792149397e-05, + "loss": 0.1533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.056564852595329285, + "step": 2175, + "valid_targets_mean": 3533.8, + "valid_targets_min": 503 + }, + { + "epoch": 2.4663271080928126, + "grad_norm": 0.4082041217576355, + "learning_rate": 3.274256743913192e-05, + "loss": 0.1517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08435338735580444, + "step": 2180, + "valid_targets_mean": 5211.6, + "valid_targets_min": 782 + }, + { + "epoch": 2.471986417657046, + "grad_norm": 0.41503179008053365, + "learning_rate": 3.2699036708522486e-05, + "loss": 0.1512, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08984512090682983, + "step": 2185, + "valid_targets_mean": 5146.2, + "valid_targets_min": 3698 + }, + { + "epoch": 2.477645727221279, + "grad_norm": 0.40654068944558236, + "learning_rate": 3.265540494664383e-05, + "loss": 0.154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0718407854437828, + "step": 2190, + "valid_targets_mean": 4568.1, + "valid_targets_min": 1298 + }, + { + "epoch": 2.483305036785512, + "grad_norm": 0.3871382104758788, + "learning_rate": 3.261167250062246e-05, + "loss": 0.1429, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06182036176323891, + "step": 2195, + "valid_targets_mean": 4259.8, + "valid_targets_min": 936 + }, + { + "epoch": 2.4889643463497455, + "grad_norm": 0.40196418123933986, + "learning_rate": 3.25678397183859e-05, + "loss": 0.1602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07311452180147171, + "step": 2200, + "valid_targets_mean": 4942.6, + "valid_targets_min": 1128 + }, + { + "epoch": 2.4946236559139785, + "grad_norm": 0.40355276539895685, + "learning_rate": 3.252390694865995e-05, + "loss": 0.1617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08624698966741562, + "step": 2205, + "valid_targets_mean": 3625.0, + "valid_targets_min": 841 + }, + { + "epoch": 2.5002829654782115, + "grad_norm": 0.42340242119765203, + "learning_rate": 3.247987454096588e-05, + "loss": 0.1406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04377666860818863, + "step": 2210, + "valid_targets_mean": 2506.9, + "valid_targets_min": 541 + }, + { + "epoch": 2.505942275042445, + "grad_norm": 0.3735018004653183, + "learning_rate": 3.2435742845617664e-05, + "loss": 0.153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0659358948469162, + "step": 2215, + "valid_targets_mean": 5061.8, + "valid_targets_min": 858 + }, + { + "epoch": 2.511601584606678, + "grad_norm": 0.3910106024638139, + "learning_rate": 3.2391512213719195e-05, + "loss": 0.1678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06071570888161659, + "step": 2220, + "valid_targets_mean": 3984.2, + "valid_targets_min": 797 + }, + { + "epoch": 2.517260894170911, + "grad_norm": 0.41022498748862024, + "learning_rate": 3.23471829971615e-05, + "loss": 0.1485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08031398057937622, + "step": 2225, + "valid_targets_mean": 4441.9, + "valid_targets_min": 902 + }, + { + "epoch": 2.5229202037351444, + "grad_norm": 0.45208808748524826, + "learning_rate": 3.230275554861988e-05, + "loss": 0.1486, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06380513310432434, + "step": 2230, + "valid_targets_mean": 3601.4, + "valid_targets_min": 840 + }, + { + "epoch": 2.5285795132993774, + "grad_norm": 0.49124129099212027, + "learning_rate": 3.2258230221551216e-05, + "loss": 0.1501, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09621863067150116, + "step": 2235, + "valid_targets_mean": 4221.6, + "valid_targets_min": 1002 + }, + { + "epoch": 2.534238822863611, + "grad_norm": 0.45069608459991756, + "learning_rate": 3.221360737019105e-05, + "loss": 0.1519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08188635110855103, + "step": 2240, + "valid_targets_mean": 4627.2, + "valid_targets_min": 602 + }, + { + "epoch": 2.539898132427844, + "grad_norm": 0.3717770971048161, + "learning_rate": 3.216888734955082e-05, + "loss": 0.1471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07405725121498108, + "step": 2245, + "valid_targets_mean": 5225.9, + "valid_targets_min": 1222 + }, + { + "epoch": 2.5455574419920772, + "grad_norm": 0.3836746061666795, + "learning_rate": 3.2124070515415026e-05, + "loss": 0.1589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04923267289996147, + "step": 2250, + "valid_targets_mean": 3801.4, + "valid_targets_min": 697 + }, + { + "epoch": 2.5512167515563102, + "grad_norm": 0.41207146754487395, + "learning_rate": 3.20791572243384e-05, + "loss": 0.1447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07558373361825943, + "step": 2255, + "valid_targets_mean": 4714.8, + "valid_targets_min": 1137 + }, + { + "epoch": 2.556876061120543, + "grad_norm": 0.40753875314352955, + "learning_rate": 3.2034147833643085e-05, + "loss": 0.1499, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08858911693096161, + "step": 2260, + "valid_targets_mean": 5047.2, + "valid_targets_min": 2089 + }, + { + "epoch": 2.5625353706847767, + "grad_norm": 0.42032582049648987, + "learning_rate": 3.1989042701415735e-05, + "loss": 0.1537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09731382876634598, + "step": 2265, + "valid_targets_mean": 5203.5, + "valid_targets_min": 1666 + }, + { + "epoch": 2.5681946802490097, + "grad_norm": 0.4488806402881286, + "learning_rate": 3.194384218650475e-05, + "loss": 0.1438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0860210582613945, + "step": 2270, + "valid_targets_mean": 3994.9, + "valid_targets_min": 940 + }, + { + "epoch": 2.5738539898132426, + "grad_norm": 0.47354442917342715, + "learning_rate": 3.1898546648517344e-05, + "loss": 0.145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04557155445218086, + "step": 2275, + "valid_targets_mean": 3269.5, + "valid_targets_min": 900 + }, + { + "epoch": 2.579513299377476, + "grad_norm": 0.42920371162836857, + "learning_rate": 3.185315644781674e-05, + "loss": 0.1537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05872861295938492, + "step": 2280, + "valid_targets_mean": 3707.4, + "valid_targets_min": 979 + }, + { + "epoch": 2.585172608941709, + "grad_norm": 0.37132954056850914, + "learning_rate": 3.1807671945519275e-05, + "loss": 0.1491, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08896137773990631, + "step": 2285, + "valid_targets_mean": 5777.6, + "valid_targets_min": 1024 + }, + { + "epoch": 2.590831918505942, + "grad_norm": 0.4908335623779361, + "learning_rate": 3.1762093503491515e-05, + "loss": 0.1466, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10549749433994293, + "step": 2290, + "valid_targets_mean": 4565.0, + "valid_targets_min": 1214 + }, + { + "epoch": 2.5964912280701755, + "grad_norm": 0.4175731909630048, + "learning_rate": 3.171642148434743e-05, + "loss": 0.1453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08646541088819504, + "step": 2295, + "valid_targets_mean": 4578.4, + "valid_targets_min": 751 + }, + { + "epoch": 2.6021505376344085, + "grad_norm": 0.39875994955354976, + "learning_rate": 3.167065625144544e-05, + "loss": 0.1607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06747978180646896, + "step": 2300, + "valid_targets_mean": 4489.2, + "valid_targets_min": 995 + }, + { + "epoch": 2.6078098471986415, + "grad_norm": 0.38092725982478753, + "learning_rate": 3.1624798168885566e-05, + "loss": 0.1457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06792249530553818, + "step": 2305, + "valid_targets_mean": 4156.2, + "valid_targets_min": 830 + }, + { + "epoch": 2.613469156762875, + "grad_norm": 0.4124089739684331, + "learning_rate": 3.157884760150653e-05, + "loss": 0.141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05572713166475296, + "step": 2310, + "valid_targets_mean": 4153.0, + "valid_targets_min": 1056 + }, + { + "epoch": 2.619128466327108, + "grad_norm": 0.3669941315615533, + "learning_rate": 3.153280491488285e-05, + "loss": 0.16, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05892043188214302, + "step": 2315, + "valid_targets_mean": 5032.2, + "valid_targets_min": 3534 + }, + { + "epoch": 2.6247877758913414, + "grad_norm": 0.3760575494744598, + "learning_rate": 3.148667047532191e-05, + "loss": 0.1414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09052421152591705, + "step": 2320, + "valid_targets_mean": 6134.5, + "valid_targets_min": 2907 + }, + { + "epoch": 2.6304470854555744, + "grad_norm": 0.3924992165822893, + "learning_rate": 3.1440444649861084e-05, + "loss": 0.1534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09136009961366653, + "step": 2325, + "valid_targets_mean": 4639.0, + "valid_targets_min": 593 + }, + { + "epoch": 2.636106395019808, + "grad_norm": 0.444337417482683, + "learning_rate": 3.139412780626478e-05, + "loss": 0.1484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10496281087398529, + "step": 2330, + "valid_targets_mean": 5259.1, + "valid_targets_min": 3087 + }, + { + "epoch": 2.641765704584041, + "grad_norm": 0.46361927995775437, + "learning_rate": 3.134772031302156e-05, + "loss": 0.1625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07918952405452728, + "step": 2335, + "valid_targets_mean": 4159.8, + "valid_targets_min": 614 + }, + { + "epoch": 2.647425014148274, + "grad_norm": 0.43604059915746735, + "learning_rate": 3.130122253934113e-05, + "loss": 0.1459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10850637406110764, + "step": 2340, + "valid_targets_mean": 4567.0, + "valid_targets_min": 1533 + }, + { + "epoch": 2.6530843237125072, + "grad_norm": 0.34968167277540757, + "learning_rate": 3.125463485515149e-05, + "loss": 0.1348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.066865473985672, + "step": 2345, + "valid_targets_mean": 4618.1, + "valid_targets_min": 867 + }, + { + "epoch": 2.6587436332767402, + "grad_norm": 0.37590826439644837, + "learning_rate": 3.1207957631095944e-05, + "loss": 0.1542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06878937780857086, + "step": 2350, + "valid_targets_mean": 4676.1, + "valid_targets_min": 650 + }, + { + "epoch": 2.6644029428409732, + "grad_norm": 0.43498857410718444, + "learning_rate": 3.116119123853014e-05, + "loss": 0.156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0715278685092926, + "step": 2355, + "valid_targets_mean": 4000.4, + "valid_targets_min": 1064 + }, + { + "epoch": 2.6700622524052067, + "grad_norm": 0.4325233255617633, + "learning_rate": 3.1114336049519165e-05, + "loss": 0.1488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08205617964267731, + "step": 2360, + "valid_targets_mean": 4495.0, + "valid_targets_min": 3361 + }, + { + "epoch": 2.6757215619694397, + "grad_norm": 0.4867071625595286, + "learning_rate": 3.106739243683453e-05, + "loss": 0.1525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09085595607757568, + "step": 2365, + "valid_targets_mean": 4596.4, + "valid_targets_min": 1151 + }, + { + "epoch": 2.6813808715336727, + "grad_norm": 0.4840513445692285, + "learning_rate": 3.1020360773951225e-05, + "loss": 0.1525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09496608376502991, + "step": 2370, + "valid_targets_mean": 5998.2, + "valid_targets_min": 848 + }, + { + "epoch": 2.687040181097906, + "grad_norm": 0.3825083606824115, + "learning_rate": 3.097324143504479e-05, + "loss": 0.1712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05611414089798927, + "step": 2375, + "valid_targets_mean": 4643.1, + "valid_targets_min": 2532 + }, + { + "epoch": 2.692699490662139, + "grad_norm": 0.37185312994075015, + "learning_rate": 3.092603479498826e-05, + "loss": 0.1461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05171559751033783, + "step": 2380, + "valid_targets_mean": 4414.4, + "valid_targets_min": 804 + }, + { + "epoch": 2.6983588002263725, + "grad_norm": 0.4529768380858292, + "learning_rate": 3.087874122934924e-05, + "loss": 0.1529, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10659369826316833, + "step": 2385, + "valid_targets_mean": 4850.8, + "valid_targets_min": 970 + }, + { + "epoch": 2.7040181097906055, + "grad_norm": 0.40953772781413506, + "learning_rate": 3.0831361114386905e-05, + "loss": 0.1387, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0732925534248352, + "step": 2390, + "valid_targets_mean": 4131.1, + "valid_targets_min": 1580 + }, + { + "epoch": 2.709677419354839, + "grad_norm": 0.4128393468690056, + "learning_rate": 3.078389482704897e-05, + "loss": 0.1825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06107977032661438, + "step": 2395, + "valid_targets_mean": 4340.4, + "valid_targets_min": 889 + }, + { + "epoch": 2.715336728919072, + "grad_norm": 0.38940843119794, + "learning_rate": 3.0736342744968764e-05, + "loss": 0.1519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0651690661907196, + "step": 2400, + "valid_targets_mean": 3953.8, + "valid_targets_min": 1788 + }, + { + "epoch": 2.720996038483305, + "grad_norm": 0.3902558994515665, + "learning_rate": 3.068870524646215e-05, + "loss": 0.1383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.058106355369091034, + "step": 2405, + "valid_targets_mean": 4521.8, + "valid_targets_min": 799 + }, + { + "epoch": 2.7266553480475384, + "grad_norm": 0.46226492163174343, + "learning_rate": 3.064098271052457e-05, + "loss": 0.1241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05499572306871414, + "step": 2410, + "valid_targets_mean": 4190.6, + "valid_targets_min": 951 + }, + { + "epoch": 2.7323146576117714, + "grad_norm": 0.4502379500010983, + "learning_rate": 3.059317551682801e-05, + "loss": 0.1296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.055237721651792526, + "step": 2415, + "valid_targets_mean": 3715.5, + "valid_targets_min": 1308 + }, + { + "epoch": 2.7379739671760044, + "grad_norm": 0.3951276275638442, + "learning_rate": 3.0545284045717956e-05, + "loss": 0.1306, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05399983376264572, + "step": 2420, + "valid_targets_mean": 5128.8, + "valid_targets_min": 3891 + }, + { + "epoch": 2.743633276740238, + "grad_norm": 0.46656617048679605, + "learning_rate": 3.0497308678210413e-05, + "loss": 0.129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06615126132965088, + "step": 2425, + "valid_targets_mean": 4154.8, + "valid_targets_min": 971 + }, + { + "epoch": 2.749292586304471, + "grad_norm": 0.4564732956435993, + "learning_rate": 3.044924979598882e-05, + "loss": 0.1313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0642513632774353, + "step": 2430, + "valid_targets_mean": 4204.2, + "valid_targets_min": 531 + }, + { + "epoch": 2.754951895868704, + "grad_norm": 0.40750417013353074, + "learning_rate": 3.0401107781401092e-05, + "loss": 0.1348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07343044877052307, + "step": 2435, + "valid_targets_mean": 3966.6, + "valid_targets_min": 877 + }, + { + "epoch": 2.7606112054329373, + "grad_norm": 0.41672746194881155, + "learning_rate": 3.0352883017456497e-05, + "loss": 0.1324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06949064880609512, + "step": 2440, + "valid_targets_mean": 4208.1, + "valid_targets_min": 493 + }, + { + "epoch": 2.7662705149971702, + "grad_norm": 0.4440170674321101, + "learning_rate": 3.0304575887822635e-05, + "loss": 0.1431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06081555783748627, + "step": 2445, + "valid_targets_mean": 3918.1, + "valid_targets_min": 894 + }, + { + "epoch": 2.7719298245614032, + "grad_norm": 0.4327731942064898, + "learning_rate": 3.0256186776822415e-05, + "loss": 0.1298, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0668378621339798, + "step": 2450, + "valid_targets_mean": 4771.1, + "valid_targets_min": 3350 + }, + { + "epoch": 2.7775891341256367, + "grad_norm": 0.39979909071916686, + "learning_rate": 3.0207716069430968e-05, + "loss": 0.1487, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06764158606529236, + "step": 2455, + "valid_targets_mean": 4707.1, + "valid_targets_min": 1550 + }, + { + "epoch": 2.7832484436898697, + "grad_norm": 0.41238229272880683, + "learning_rate": 3.015916415127259e-05, + "loss": 0.1433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08149917423725128, + "step": 2460, + "valid_targets_mean": 5554.6, + "valid_targets_min": 2696 + }, + { + "epoch": 2.788907753254103, + "grad_norm": 0.4403137588769108, + "learning_rate": 3.011053140861768e-05, + "loss": 0.1365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07517523318529129, + "step": 2465, + "valid_targets_mean": 4508.2, + "valid_targets_min": 611 + }, + { + "epoch": 2.794567062818336, + "grad_norm": 0.4164115163929644, + "learning_rate": 3.006181822837964e-05, + "loss": 0.1203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05789054185152054, + "step": 2470, + "valid_targets_mean": 3681.5, + "valid_targets_min": 688 + }, + { + "epoch": 2.8002263723825696, + "grad_norm": 0.5161985580419112, + "learning_rate": 3.0013024998111856e-05, + "loss": 0.1374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09378458559513092, + "step": 2475, + "valid_targets_mean": 5032.6, + "valid_targets_min": 3297 + }, + { + "epoch": 2.8058856819468025, + "grad_norm": 0.5641644659297382, + "learning_rate": 2.9964152106004546e-05, + "loss": 0.133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06760209053754807, + "step": 2480, + "valid_targets_mean": 3681.2, + "valid_targets_min": 823 + }, + { + "epoch": 2.8115449915110355, + "grad_norm": 0.39354271371644495, + "learning_rate": 2.9915199940881723e-05, + "loss": 0.1287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07409317046403885, + "step": 2485, + "valid_targets_mean": 4950.5, + "valid_targets_min": 1225 + }, + { + "epoch": 2.817204301075269, + "grad_norm": 0.4118252087628632, + "learning_rate": 2.9866168892198067e-05, + "loss": 0.1282, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0465342178940773, + "step": 2490, + "valid_targets_mean": 3788.1, + "valid_targets_min": 844 + }, + { + "epoch": 2.822863610639502, + "grad_norm": 0.44694456396218196, + "learning_rate": 2.9817059350035858e-05, + "loss": 0.1355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05914433300495148, + "step": 2495, + "valid_targets_mean": 4379.0, + "valid_targets_min": 829 + }, + { + "epoch": 2.828522920203735, + "grad_norm": 0.44532021174587827, + "learning_rate": 2.9767871705101834e-05, + "loss": 0.1316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.062388863414525986, + "step": 2500, + "valid_targets_mean": 4199.9, + "valid_targets_min": 1211 + }, + { + "epoch": 2.8341822297679684, + "grad_norm": 0.5330961930833514, + "learning_rate": 2.9718606348724135e-05, + "loss": 0.1371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07638075947761536, + "step": 2505, + "valid_targets_mean": 3336.9, + "valid_targets_min": 540 + }, + { + "epoch": 2.8398415393322014, + "grad_norm": 0.42197501295445955, + "learning_rate": 2.966926367284913e-05, + "loss": 0.132, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.057135194540023804, + "step": 2510, + "valid_targets_mean": 3894.9, + "valid_targets_min": 741 + }, + { + "epoch": 2.8455008488964344, + "grad_norm": 0.3966087648046828, + "learning_rate": 2.9619844070038336e-05, + "loss": 0.1226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.054441627115011215, + "step": 2515, + "valid_targets_mean": 4291.0, + "valid_targets_min": 685 + }, + { + "epoch": 2.851160158460668, + "grad_norm": 0.411807414129455, + "learning_rate": 2.957034793346531e-05, + "loss": 0.1367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.062863789498806, + "step": 2520, + "valid_targets_mean": 4376.8, + "valid_targets_min": 1087 + }, + { + "epoch": 2.856819468024901, + "grad_norm": 0.4321149765232491, + "learning_rate": 2.9520775656912467e-05, + "loss": 0.1275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0730627253651619, + "step": 2525, + "valid_targets_mean": 4490.9, + "valid_targets_min": 3442 + }, + { + "epoch": 2.8624787775891343, + "grad_norm": 0.43821419579610826, + "learning_rate": 2.9471127634767992e-05, + "loss": 0.1367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05529065430164337, + "step": 2530, + "valid_targets_mean": 3329.6, + "valid_targets_min": 820 + }, + { + "epoch": 2.8681380871533673, + "grad_norm": 0.4371553687917687, + "learning_rate": 2.9421404262022687e-05, + "loss": 0.1301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03851231187582016, + "step": 2535, + "valid_targets_mean": 3155.8, + "valid_targets_min": 615 + }, + { + "epoch": 2.8737973967176007, + "grad_norm": 0.4545315548868795, + "learning_rate": 2.9371605934266826e-05, + "loss": 0.1258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07047151774168015, + "step": 2540, + "valid_targets_mean": 3561.8, + "valid_targets_min": 559 + }, + { + "epoch": 2.8794567062818337, + "grad_norm": 0.45553437333510505, + "learning_rate": 2.9321733047687028e-05, + "loss": 0.1425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06275048851966858, + "step": 2545, + "valid_targets_mean": 4121.4, + "valid_targets_min": 886 + }, + { + "epoch": 2.8851160158460667, + "grad_norm": 0.4213686346567691, + "learning_rate": 2.9271785999063058e-05, + "loss": 0.1364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.061738520860672, + "step": 2550, + "valid_targets_mean": 4753.6, + "valid_targets_min": 490 + }, + { + "epoch": 2.8907753254103, + "grad_norm": 0.410229847478857, + "learning_rate": 2.922176518576473e-05, + "loss": 0.1396, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06403792649507523, + "step": 2555, + "valid_targets_mean": 4300.2, + "valid_targets_min": 2406 + }, + { + "epoch": 2.896434634974533, + "grad_norm": 0.473943504684631, + "learning_rate": 2.9171671005748705e-05, + "loss": 0.1407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09271862357854843, + "step": 2560, + "valid_targets_mean": 4401.2, + "valid_targets_min": 928 + }, + { + "epoch": 2.902093944538766, + "grad_norm": 0.3696219723268231, + "learning_rate": 2.9121503857555337e-05, + "loss": 0.1303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08061081916093826, + "step": 2565, + "valid_targets_mean": 6249.4, + "valid_targets_min": 1093 + }, + { + "epoch": 2.9077532541029996, + "grad_norm": 0.41617216659581885, + "learning_rate": 2.9071264140305504e-05, + "loss": 0.1315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10470962524414062, + "step": 2570, + "valid_targets_mean": 6975.6, + "valid_targets_min": 3381 + }, + { + "epoch": 2.9134125636672326, + "grad_norm": 0.42997626449687776, + "learning_rate": 2.9020952253697417e-05, + "loss": 0.1312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.048985183238983154, + "step": 2575, + "valid_targets_mean": 3660.8, + "valid_targets_min": 1492 + }, + { + "epoch": 2.9190718732314656, + "grad_norm": 0.42020207650954644, + "learning_rate": 2.8970568598003485e-05, + "loss": 0.1342, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07026906311511993, + "step": 2580, + "valid_targets_mean": 4580.1, + "valid_targets_min": 865 + }, + { + "epoch": 2.924731182795699, + "grad_norm": 0.4037348566810295, + "learning_rate": 2.8920113574067063e-05, + "loss": 0.1428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10306201875209808, + "step": 2585, + "valid_targets_mean": 6907.8, + "valid_targets_min": 3319 + }, + { + "epoch": 2.930390492359932, + "grad_norm": 0.4480545864375203, + "learning_rate": 2.8869587583299315e-05, + "loss": 0.1265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04124532639980316, + "step": 2590, + "valid_targets_mean": 2979.1, + "valid_targets_min": 660 + }, + { + "epoch": 2.936049801924165, + "grad_norm": 0.43662563284923867, + "learning_rate": 2.8818991027676014e-05, + "loss": 0.1421, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06055639684200287, + "step": 2595, + "valid_targets_mean": 4027.2, + "valid_targets_min": 920 + }, + { + "epoch": 2.9417091114883984, + "grad_norm": 0.4944728219651063, + "learning_rate": 2.876832430973432e-05, + "loss": 0.1313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0591311901807785, + "step": 2600, + "valid_targets_mean": 3538.6, + "valid_targets_min": 697 + }, + { + "epoch": 2.9473684210526314, + "grad_norm": 0.4322791098747658, + "learning_rate": 2.8717587832569598e-05, + "loss": 0.1547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07507988065481186, + "step": 2605, + "valid_targets_mean": 4613.2, + "valid_targets_min": 1163 + }, + { + "epoch": 2.953027730616865, + "grad_norm": 0.41881730323268423, + "learning_rate": 2.8666781999832198e-05, + "loss": 0.1403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09291787445545197, + "step": 2610, + "valid_targets_mean": 5678.1, + "valid_targets_min": 2646 + }, + { + "epoch": 2.958687040181098, + "grad_norm": 0.41158289898284994, + "learning_rate": 2.8615907215724266e-05, + "loss": 0.1348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07887524366378784, + "step": 2615, + "valid_targets_mean": 4231.6, + "valid_targets_min": 1804 + }, + { + "epoch": 2.9643463497453313, + "grad_norm": 0.43132127883237925, + "learning_rate": 2.8564963884996494e-05, + "loss": 0.1323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05875246599316597, + "step": 2620, + "valid_targets_mean": 4002.8, + "valid_targets_min": 771 + }, + { + "epoch": 2.9700056593095643, + "grad_norm": 0.4419497386620937, + "learning_rate": 2.851395241294493e-05, + "loss": 0.1408, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07658740133047104, + "step": 2625, + "valid_targets_mean": 4188.0, + "valid_targets_min": 919 + }, + { + "epoch": 2.9756649688737973, + "grad_norm": 0.4965731525783887, + "learning_rate": 2.8462873205407747e-05, + "loss": 0.148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0681377500295639, + "step": 2630, + "valid_targets_mean": 3273.9, + "valid_targets_min": 790 + }, + { + "epoch": 2.9813242784380307, + "grad_norm": 0.4487467250589296, + "learning_rate": 2.8411726668761998e-05, + "loss": 0.1463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08190510421991348, + "step": 2635, + "valid_targets_mean": 4488.1, + "valid_targets_min": 697 + }, + { + "epoch": 2.9869835880022637, + "grad_norm": 0.4227857717328058, + "learning_rate": 2.8360513209920388e-05, + "loss": 0.1473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09438923001289368, + "step": 2640, + "valid_targets_mean": 5551.5, + "valid_targets_min": 950 + }, + { + "epoch": 2.9926428975664967, + "grad_norm": 0.46437124649613637, + "learning_rate": 2.8309233236328074e-05, + "loss": 0.1374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05302552878856659, + "step": 2645, + "valid_targets_mean": 3442.1, + "valid_targets_min": 1814 + }, + { + "epoch": 2.99830220713073, + "grad_norm": 0.3936512895185506, + "learning_rate": 2.8257887155959352e-05, + "loss": 0.1264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06076087802648544, + "step": 2650, + "valid_targets_mean": 4136.9, + "valid_targets_min": 812 + }, + { + "epoch": 3.0045274476513866, + "grad_norm": 0.38617137171586235, + "learning_rate": 2.8206475377314486e-05, + "loss": 0.1705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04072452709078789, + "step": 2655, + "valid_targets_mean": 4266.8, + "valid_targets_min": 721 + }, + { + "epoch": 3.0101867572156196, + "grad_norm": 0.37018388740161995, + "learning_rate": 2.8154998309416404e-05, + "loss": 0.1367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04826600104570389, + "step": 2660, + "valid_targets_mean": 5019.4, + "valid_targets_min": 611 + }, + { + "epoch": 3.015846066779853, + "grad_norm": 0.4753192137401859, + "learning_rate": 2.8103456361807473e-05, + "loss": 0.1368, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09353355318307877, + "step": 2665, + "valid_targets_mean": 3913.4, + "valid_targets_min": 449 + }, + { + "epoch": 3.021505376344086, + "grad_norm": 0.4162235997812077, + "learning_rate": 2.8051849944546225e-05, + "loss": 0.1381, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08130108565092087, + "step": 2670, + "valid_targets_mean": 5543.4, + "valid_targets_min": 1253 + }, + { + "epoch": 3.027164685908319, + "grad_norm": 0.4799946352749431, + "learning_rate": 2.80001794682041e-05, + "loss": 0.1298, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0979180559515953, + "step": 2675, + "valid_targets_mean": 5186.1, + "valid_targets_min": 684 + }, + { + "epoch": 3.0328239954725524, + "grad_norm": 0.38957569933014347, + "learning_rate": 2.7948445343862188e-05, + "loss": 0.1427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.058769818395376205, + "step": 2680, + "valid_targets_mean": 4484.5, + "valid_targets_min": 1095 + }, + { + "epoch": 3.0384833050367854, + "grad_norm": 0.4602178184151295, + "learning_rate": 2.7896647983107952e-05, + "loss": 0.1359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07683011889457703, + "step": 2685, + "valid_targets_mean": 5735.2, + "valid_targets_min": 3061 + }, + { + "epoch": 3.044142614601019, + "grad_norm": 0.44388273959178315, + "learning_rate": 2.784478779803194e-05, + "loss": 0.1537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10529894381761551, + "step": 2690, + "valid_targets_mean": 5347.0, + "valid_targets_min": 3892 + }, + { + "epoch": 3.049801924165252, + "grad_norm": 0.46805273078610954, + "learning_rate": 2.7792865201224536e-05, + "loss": 0.1312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06282709538936615, + "step": 2695, + "valid_targets_mean": 4621.9, + "valid_targets_min": 1939 + }, + { + "epoch": 3.055461233729485, + "grad_norm": 0.45413181461886953, + "learning_rate": 2.7740880605772644e-05, + "loss": 0.144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07031915336847305, + "step": 2700, + "valid_targets_mean": 4726.9, + "valid_targets_min": 1786 + }, + { + "epoch": 3.0611205432937183, + "grad_norm": 0.47945297019191374, + "learning_rate": 2.7688834425256426e-05, + "loss": 0.1489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08839349448680878, + "step": 2705, + "valid_targets_mean": 4788.1, + "valid_targets_min": 1055 + }, + { + "epoch": 3.0667798528579513, + "grad_norm": 0.36167768284157664, + "learning_rate": 2.7636727073746015e-05, + "loss": 0.1275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.041729703545570374, + "step": 2710, + "valid_targets_mean": 3706.2, + "valid_targets_min": 834 + }, + { + "epoch": 3.0724391624221843, + "grad_norm": 0.9805538198902808, + "learning_rate": 2.7584558965798183e-05, + "loss": 0.1425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08435112237930298, + "step": 2715, + "valid_targets_mean": 4729.8, + "valid_targets_min": 1356 + }, + { + "epoch": 3.0780984719864177, + "grad_norm": 0.39525020948144796, + "learning_rate": 2.7532330516453094e-05, + "loss": 0.131, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08033818006515503, + "step": 2720, + "valid_targets_mean": 4829.4, + "valid_targets_min": 559 + }, + { + "epoch": 3.0837577815506507, + "grad_norm": 0.43738059975173377, + "learning_rate": 2.7480042141230963e-05, + "loss": 0.1419, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07897631824016571, + "step": 2725, + "valid_targets_mean": 4319.1, + "valid_targets_min": 900 + }, + { + "epoch": 3.089417091114884, + "grad_norm": 0.46333719654344013, + "learning_rate": 2.7427694256128776e-05, + "loss": 0.1294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03843440115451813, + "step": 2730, + "valid_targets_mean": 2687.5, + "valid_targets_min": 693 + }, + { + "epoch": 3.095076400679117, + "grad_norm": 0.40791748384565507, + "learning_rate": 2.737528727761696e-05, + "loss": 0.1297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.054624445736408234, + "step": 2735, + "valid_targets_mean": 4479.9, + "valid_targets_min": 2446 + }, + { + "epoch": 3.10073571024335, + "grad_norm": 0.4139710335990578, + "learning_rate": 2.7322821622636077e-05, + "loss": 0.1333, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06985824555158615, + "step": 2740, + "valid_targets_mean": 5858.2, + "valid_targets_min": 2589 + }, + { + "epoch": 3.1063950198075836, + "grad_norm": 0.4034373883635718, + "learning_rate": 2.7270297708593517e-05, + "loss": 0.1256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06487354636192322, + "step": 2745, + "valid_targets_mean": 4204.4, + "valid_targets_min": 2349 + }, + { + "epoch": 3.1120543293718166, + "grad_norm": 0.4089493135599601, + "learning_rate": 2.7217715953360166e-05, + "loss": 0.1217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.046125784516334534, + "step": 2750, + "valid_targets_mean": 2677.1, + "valid_targets_min": 570 + }, + { + "epoch": 3.11771363893605, + "grad_norm": 0.4481925813378899, + "learning_rate": 2.716507677526707e-05, + "loss": 0.1484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07276476919651031, + "step": 2755, + "valid_targets_mean": 4541.0, + "valid_targets_min": 3243 + }, + { + "epoch": 3.123372948500283, + "grad_norm": 0.429351176698915, + "learning_rate": 2.711238059310215e-05, + "loss": 0.132, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06741909682750702, + "step": 2760, + "valid_targets_mean": 4803.5, + "valid_targets_min": 759 + }, + { + "epoch": 3.129032258064516, + "grad_norm": 0.6533785579669603, + "learning_rate": 2.7059627826106817e-05, + "loss": 0.1324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05149967968463898, + "step": 2765, + "valid_targets_mean": 5019.0, + "valid_targets_min": 2253 + }, + { + "epoch": 3.1346915676287495, + "grad_norm": 0.5032443536578466, + "learning_rate": 2.700681889397267e-05, + "loss": 0.1438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06953762471675873, + "step": 2770, + "valid_targets_mean": 3493.1, + "valid_targets_min": 879 + }, + { + "epoch": 3.1403508771929824, + "grad_norm": 0.43995605923730946, + "learning_rate": 2.6953954216838148e-05, + "loss": 0.129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06564613431692123, + "step": 2775, + "valid_targets_mean": 3275.5, + "valid_targets_min": 774 + }, + { + "epoch": 3.1460101867572154, + "grad_norm": 0.5087300784770695, + "learning_rate": 2.6901034215285182e-05, + "loss": 0.1513, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10551923513412476, + "step": 2780, + "valid_targets_mean": 3929.6, + "valid_targets_min": 840 + }, + { + "epoch": 3.151669496321449, + "grad_norm": 0.4175713706135481, + "learning_rate": 2.684805931033586e-05, + "loss": 0.1385, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.052783794701099396, + "step": 2785, + "valid_targets_mean": 4381.0, + "valid_targets_min": 680 + }, + { + "epoch": 3.157328805885682, + "grad_norm": 0.4926876587544281, + "learning_rate": 2.679502992344907e-05, + "loss": 0.1375, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09102383255958557, + "step": 2790, + "valid_targets_mean": 4726.8, + "valid_targets_min": 895 + }, + { + "epoch": 3.1629881154499153, + "grad_norm": 0.4793999118083366, + "learning_rate": 2.6741946476517146e-05, + "loss": 0.1394, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1066889539361, + "step": 2795, + "valid_targets_mean": 5410.8, + "valid_targets_min": 732 + }, + { + "epoch": 3.1686474250141483, + "grad_norm": 0.4298087180425509, + "learning_rate": 2.6688809391862523e-05, + "loss": 0.1402, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08678670227527618, + "step": 2800, + "valid_targets_mean": 5320.8, + "valid_targets_min": 1177 + }, + { + "epoch": 3.1743067345783813, + "grad_norm": 0.42544741612676706, + "learning_rate": 2.663561909223435e-05, + "loss": 0.1477, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06231726333498955, + "step": 2805, + "valid_targets_mean": 4675.5, + "valid_targets_min": 1139 + }, + { + "epoch": 3.1799660441426147, + "grad_norm": 0.4165382890474721, + "learning_rate": 2.6582376000805165e-05, + "loss": 0.1275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09180489182472229, + "step": 2810, + "valid_targets_mean": 4585.4, + "valid_targets_min": 1087 + }, + { + "epoch": 3.1856253537068477, + "grad_norm": 0.36086325137573655, + "learning_rate": 2.6529080541167495e-05, + "loss": 0.1303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.050663821399211884, + "step": 2815, + "valid_targets_mean": 4916.4, + "valid_targets_min": 1113 + }, + { + "epoch": 3.1912846632710807, + "grad_norm": 0.4829731196825515, + "learning_rate": 2.6475733137330507e-05, + "loss": 0.1319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08046606183052063, + "step": 2820, + "valid_targets_mean": 4062.1, + "valid_targets_min": 1748 + }, + { + "epoch": 3.196943972835314, + "grad_norm": 0.401636148155781, + "learning_rate": 2.6422334213716624e-05, + "loss": 0.137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06997746974229813, + "step": 2825, + "valid_targets_mean": 5261.0, + "valid_targets_min": 542 + }, + { + "epoch": 3.202603282399547, + "grad_norm": 0.44718791597790863, + "learning_rate": 2.6368884195158143e-05, + "loss": 0.1299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06800203025341034, + "step": 2830, + "valid_targets_mean": 3977.8, + "valid_targets_min": 833 + }, + { + "epoch": 3.2082625919637806, + "grad_norm": 0.41951600720853166, + "learning_rate": 2.6315383506893876e-05, + "loss": 0.1309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06826755404472351, + "step": 2835, + "valid_targets_mean": 5027.1, + "valid_targets_min": 1014 + }, + { + "epoch": 3.2139219015280136, + "grad_norm": 0.41332996324096816, + "learning_rate": 2.6261832574565752e-05, + "loss": 0.1286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04512519761919975, + "step": 2840, + "valid_targets_mean": 4417.0, + "valid_targets_min": 3178 + }, + { + "epoch": 3.2195812110922466, + "grad_norm": 0.42260011885853077, + "learning_rate": 2.6208231824215417e-05, + "loss": 0.1347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07373794913291931, + "step": 2845, + "valid_targets_mean": 5095.4, + "valid_targets_min": 3484 + }, + { + "epoch": 3.22524052065648, + "grad_norm": 0.49501889361814555, + "learning_rate": 2.6154581682280892e-05, + "loss": 0.1422, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07608088105916977, + "step": 2850, + "valid_targets_mean": 3793.2, + "valid_targets_min": 808 + }, + { + "epoch": 3.230899830220713, + "grad_norm": 0.47078762346923797, + "learning_rate": 2.610088257559311e-05, + "loss": 0.1312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07664738595485687, + "step": 2855, + "valid_targets_mean": 3861.4, + "valid_targets_min": 742 + }, + { + "epoch": 3.236559139784946, + "grad_norm": 0.4102857709313117, + "learning_rate": 2.604713493137259e-05, + "loss": 0.1272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06762543320655823, + "step": 2860, + "valid_targets_mean": 5286.9, + "valid_targets_min": 3224 + }, + { + "epoch": 3.2422184493491795, + "grad_norm": 0.37340549361893327, + "learning_rate": 2.5993339177226002e-05, + "loss": 0.1406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08183964341878891, + "step": 2865, + "valid_targets_mean": 6662.0, + "valid_targets_min": 4096 + }, + { + "epoch": 3.2478777589134125, + "grad_norm": 0.447073811870187, + "learning_rate": 2.593949574114274e-05, + "loss": 0.1271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06419318169355392, + "step": 2870, + "valid_targets_mean": 4019.1, + "valid_targets_min": 1108 + }, + { + "epoch": 3.253537068477646, + "grad_norm": 0.4190813995484374, + "learning_rate": 2.5885605051491592e-05, + "loss": 0.1415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06568054854869843, + "step": 2875, + "valid_targets_mean": 5612.1, + "valid_targets_min": 2848 + }, + { + "epoch": 3.259196378041879, + "grad_norm": 0.45017295318745726, + "learning_rate": 2.583166753701725e-05, + "loss": 0.1328, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10807374119758606, + "step": 2880, + "valid_targets_mean": 5581.5, + "valid_targets_min": 950 + }, + { + "epoch": 3.264855687606112, + "grad_norm": 0.3726515886914176, + "learning_rate": 2.5777683626836964e-05, + "loss": 0.1329, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.064125657081604, + "step": 2885, + "valid_targets_mean": 5580.0, + "valid_targets_min": 881 + }, + { + "epoch": 3.2705149971703453, + "grad_norm": 0.44009131284480674, + "learning_rate": 2.5723653750437083e-05, + "loss": 0.1254, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05832365155220032, + "step": 2890, + "valid_targets_mean": 3716.4, + "valid_targets_min": 834 + }, + { + "epoch": 3.2761743067345783, + "grad_norm": 0.47788716719553376, + "learning_rate": 2.5669578337669653e-05, + "loss": 0.1342, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05208609253168106, + "step": 2895, + "valid_targets_mean": 3100.6, + "valid_targets_min": 851 + }, + { + "epoch": 3.2818336162988118, + "grad_norm": 0.4292453747498972, + "learning_rate": 2.5615457818749007e-05, + "loss": 0.1444, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07537626475095749, + "step": 2900, + "valid_targets_mean": 5695.0, + "valid_targets_min": 826 + }, + { + "epoch": 3.2874929258630448, + "grad_norm": 0.5382304754921993, + "learning_rate": 2.5561292624248344e-05, + "loss": 0.136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05201840028166771, + "step": 2905, + "valid_targets_mean": 3760.4, + "valid_targets_min": 2276 + }, + { + "epoch": 3.2931522354272778, + "grad_norm": 0.4002099717841858, + "learning_rate": 2.5507083185096267e-05, + "loss": 0.1364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.060707416385412216, + "step": 2910, + "valid_targets_mean": 4185.8, + "valid_targets_min": 845 + }, + { + "epoch": 3.298811544991511, + "grad_norm": 0.4200501952639469, + "learning_rate": 2.545282993257341e-05, + "loss": 0.141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07130303978919983, + "step": 2915, + "valid_targets_mean": 4564.8, + "valid_targets_min": 1579 + }, + { + "epoch": 3.304470854555744, + "grad_norm": 0.41353767634650146, + "learning_rate": 2.5398533298308956e-05, + "loss": 0.1405, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07808589190244675, + "step": 2920, + "valid_targets_mean": 4856.1, + "valid_targets_min": 1096 + }, + { + "epoch": 3.310130164119977, + "grad_norm": 0.482582650876856, + "learning_rate": 2.534419371427724e-05, + "loss": 0.1485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08143541216850281, + "step": 2925, + "valid_targets_mean": 4145.9, + "valid_targets_min": 229 + }, + { + "epoch": 3.3157894736842106, + "grad_norm": 0.3924533405872855, + "learning_rate": 2.5289811612794297e-05, + "loss": 0.1421, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0693478211760521, + "step": 2930, + "valid_targets_mean": 5023.2, + "valid_targets_min": 795 + }, + { + "epoch": 3.3214487832484436, + "grad_norm": 0.4482728433014447, + "learning_rate": 2.5235387426514405e-05, + "loss": 0.1449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0618685744702816, + "step": 2935, + "valid_targets_mean": 3398.0, + "valid_targets_min": 876 + }, + { + "epoch": 3.3271080928126766, + "grad_norm": 0.4397989922884252, + "learning_rate": 2.5180921588426693e-05, + "loss": 0.1341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0687597244977951, + "step": 2940, + "valid_targets_mean": 4467.8, + "valid_targets_min": 1062 + }, + { + "epoch": 3.33276740237691, + "grad_norm": 0.4042328754906967, + "learning_rate": 2.5126414531851634e-05, + "loss": 0.1345, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.056209929287433624, + "step": 2945, + "valid_targets_mean": 4668.9, + "valid_targets_min": 803 + }, + { + "epoch": 3.338426711941143, + "grad_norm": 0.4617307727327834, + "learning_rate": 2.507186669043764e-05, + "loss": 0.1375, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07561806589365005, + "step": 2950, + "valid_targets_mean": 3034.9, + "valid_targets_min": 1136 + }, + { + "epoch": 3.3440860215053765, + "grad_norm": 0.3688841810123643, + "learning_rate": 2.5017278498157608e-05, + "loss": 0.1269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04829462990164757, + "step": 2955, + "valid_targets_mean": 4267.0, + "valid_targets_min": 999 + }, + { + "epoch": 3.3497453310696095, + "grad_norm": 0.36653165005044896, + "learning_rate": 2.496265038930545e-05, + "loss": 0.136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05725232511758804, + "step": 2960, + "valid_targets_mean": 5086.4, + "valid_targets_min": 3289 + }, + { + "epoch": 3.355404640633843, + "grad_norm": 0.41462497919064906, + "learning_rate": 2.4907982798492647e-05, + "loss": 0.1353, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03575100749731064, + "step": 2965, + "valid_targets_mean": 3524.8, + "valid_targets_min": 531 + }, + { + "epoch": 3.361063950198076, + "grad_norm": 0.4010161629830161, + "learning_rate": 2.485327616064479e-05, + "loss": 0.1359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03458371013402939, + "step": 2970, + "valid_targets_mean": 3516.1, + "valid_targets_min": 1225 + }, + { + "epoch": 3.366723259762309, + "grad_norm": 0.49113145387496543, + "learning_rate": 2.4798530910998126e-05, + "loss": 0.1485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10003523528575897, + "step": 2975, + "valid_targets_mean": 4991.1, + "valid_targets_min": 757 + }, + { + "epoch": 3.3723825693265423, + "grad_norm": 0.4486934963639392, + "learning_rate": 2.474374748509609e-05, + "loss": 0.1322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08931628614664078, + "step": 2980, + "valid_targets_mean": 4595.4, + "valid_targets_min": 1610 + }, + { + "epoch": 3.3780418788907753, + "grad_norm": 0.42187506410482717, + "learning_rate": 2.4688926318785845e-05, + "loss": 0.1357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07669056951999664, + "step": 2985, + "valid_targets_mean": 4681.0, + "valid_targets_min": 771 + }, + { + "epoch": 3.3837011884550083, + "grad_norm": 0.4106559315796956, + "learning_rate": 2.4634067848214797e-05, + "loss": 0.1314, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07835520058870316, + "step": 2990, + "valid_targets_mean": 4854.5, + "valid_targets_min": 1353 + }, + { + "epoch": 3.3893604980192418, + "grad_norm": 0.35498657824501334, + "learning_rate": 2.4579172509827146e-05, + "loss": 0.1296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05351714789867401, + "step": 2995, + "valid_targets_mean": 4326.5, + "valid_targets_min": 647 + }, + { + "epoch": 3.3950198075834748, + "grad_norm": 0.42601246325353287, + "learning_rate": 2.4524240740360404e-05, + "loss": 0.143, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.046392712742090225, + "step": 3000, + "valid_targets_mean": 3124.1, + "valid_targets_min": 886 + }, + { + "epoch": 3.4006791171477078, + "grad_norm": 0.4738850686539022, + "learning_rate": 2.4469272976841925e-05, + "loss": 0.144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05239018052816391, + "step": 3005, + "valid_targets_mean": 3027.6, + "valid_targets_min": 631 + }, + { + "epoch": 3.406338426711941, + "grad_norm": 0.41225088951142774, + "learning_rate": 2.441426965658543e-05, + "loss": 0.1324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0731019526720047, + "step": 3010, + "valid_targets_mean": 4852.8, + "valid_targets_min": 768 + }, + { + "epoch": 3.411997736276174, + "grad_norm": 0.4627435354235877, + "learning_rate": 2.4359231217187508e-05, + "loss": 0.117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03063189424574375, + "step": 3015, + "valid_targets_mean": 2069.5, + "valid_targets_min": 518 + }, + { + "epoch": 3.4176570458404076, + "grad_norm": 0.40905090739948924, + "learning_rate": 2.430415809652416e-05, + "loss": 0.1395, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06283155083656311, + "step": 3020, + "valid_targets_mean": 3667.0, + "valid_targets_min": 907 + }, + { + "epoch": 3.4233163554046406, + "grad_norm": 0.4337886808607961, + "learning_rate": 2.4249050732747302e-05, + "loss": 0.1406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07665494084358215, + "step": 3025, + "valid_targets_mean": 4641.9, + "valid_targets_min": 1746 + }, + { + "epoch": 3.4289756649688736, + "grad_norm": 0.4295866275706557, + "learning_rate": 2.4193909564281288e-05, + "loss": 0.1342, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08412076532840729, + "step": 3030, + "valid_targets_mean": 4406.8, + "valid_targets_min": 1185 + }, + { + "epoch": 3.434634974533107, + "grad_norm": 0.3960059100305019, + "learning_rate": 2.4138735029819418e-05, + "loss": 0.1215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07398900389671326, + "step": 3035, + "valid_targets_mean": 5352.6, + "valid_targets_min": 687 + }, + { + "epoch": 3.44029428409734, + "grad_norm": 0.39771478824997886, + "learning_rate": 2.408352756832042e-05, + "loss": 0.1379, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04783628135919571, + "step": 3040, + "valid_targets_mean": 4032.5, + "valid_targets_min": 529 + }, + { + "epoch": 3.4459535936615735, + "grad_norm": 0.41985437971612694, + "learning_rate": 2.402828761900502e-05, + "loss": 0.1355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07030151039361954, + "step": 3045, + "valid_targets_mean": 4860.0, + "valid_targets_min": 815 + }, + { + "epoch": 3.4516129032258065, + "grad_norm": 0.43954983731587977, + "learning_rate": 2.3973015621352382e-05, + "loss": 0.1425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06285420060157776, + "step": 3050, + "valid_targets_mean": 3694.2, + "valid_targets_min": 918 + }, + { + "epoch": 3.4572722127900395, + "grad_norm": 0.40023511870411393, + "learning_rate": 2.3917712015096664e-05, + "loss": 0.1337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06622092425823212, + "step": 3055, + "valid_targets_mean": 4960.9, + "valid_targets_min": 3667 + }, + { + "epoch": 3.462931522354273, + "grad_norm": 0.46841644114553616, + "learning_rate": 2.386237724022348e-05, + "loss": 0.1431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07564839720726013, + "step": 3060, + "valid_targets_mean": 4691.8, + "valid_targets_min": 816 + }, + { + "epoch": 3.468590831918506, + "grad_norm": 0.38957711241076665, + "learning_rate": 2.3807011736966414e-05, + "loss": 0.1348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04594377055764198, + "step": 3065, + "valid_targets_mean": 4068.5, + "valid_targets_min": 1125 + }, + { + "epoch": 3.474250141482739, + "grad_norm": 0.4789503659074538, + "learning_rate": 2.3751615945803547e-05, + "loss": 0.1417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09858594834804535, + "step": 3070, + "valid_targets_mean": 4899.2, + "valid_targets_min": 2805 + }, + { + "epoch": 3.4799094510469724, + "grad_norm": 0.4316149134176734, + "learning_rate": 2.3696190307453883e-05, + "loss": 0.1296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.055925153195858, + "step": 3075, + "valid_targets_mean": 3696.4, + "valid_targets_min": 877 + }, + { + "epoch": 3.4855687606112054, + "grad_norm": 0.3905472702482599, + "learning_rate": 2.364073526287392e-05, + "loss": 0.1397, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06032529100775719, + "step": 3080, + "valid_targets_mean": 4886.5, + "valid_targets_min": 1334 + }, + { + "epoch": 3.4912280701754383, + "grad_norm": 0.4311025572009026, + "learning_rate": 2.358525125325409e-05, + "loss": 0.1405, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.058870475739240646, + "step": 3085, + "valid_targets_mean": 4219.4, + "valid_targets_min": 809 + }, + { + "epoch": 3.496887379739672, + "grad_norm": 0.4451973641674271, + "learning_rate": 2.352973872001527e-05, + "loss": 0.1409, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.055170938372612, + "step": 3090, + "valid_targets_mean": 3572.2, + "valid_targets_min": 781 + }, + { + "epoch": 3.502546689303905, + "grad_norm": 0.38696713743359284, + "learning_rate": 2.347419810480527e-05, + "loss": 0.1328, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0454559288918972, + "step": 3095, + "valid_targets_mean": 3296.8, + "valid_targets_min": 845 + }, + { + "epoch": 3.508205998868138, + "grad_norm": 0.37072709518626773, + "learning_rate": 2.34186298494953e-05, + "loss": 0.1231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.050815775990486145, + "step": 3100, + "valid_targets_mean": 4062.2, + "valid_targets_min": 868 + }, + { + "epoch": 3.513865308432371, + "grad_norm": 0.7445062684308681, + "learning_rate": 2.3363034396176486e-05, + "loss": 0.1437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07964637875556946, + "step": 3105, + "valid_targets_mean": 4495.9, + "valid_targets_min": 1389 + }, + { + "epoch": 3.5195246179966047, + "grad_norm": 0.3944878712782557, + "learning_rate": 2.3307412187156334e-05, + "loss": 0.1395, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05683618783950806, + "step": 3110, + "valid_targets_mean": 4376.6, + "valid_targets_min": 2794 + }, + { + "epoch": 3.5251839275608376, + "grad_norm": 0.4428779797938789, + "learning_rate": 2.3251763664955208e-05, + "loss": 0.1372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08108937740325928, + "step": 3115, + "valid_targets_mean": 4489.8, + "valid_targets_min": 912 + }, + { + "epoch": 3.5308432371250706, + "grad_norm": 0.4214827925554384, + "learning_rate": 2.3196089272302813e-05, + "loss": 0.1417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08208528161048889, + "step": 3120, + "valid_targets_mean": 5380.4, + "valid_targets_min": 1492 + }, + { + "epoch": 3.536502546689304, + "grad_norm": 0.4061462172272819, + "learning_rate": 2.3140389452134677e-05, + "loss": 0.1363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03488858789205551, + "step": 3125, + "valid_targets_mean": 2884.2, + "valid_targets_min": 668 + }, + { + "epoch": 3.542161856253537, + "grad_norm": 0.4535810750909953, + "learning_rate": 2.3084664647588636e-05, + "loss": 0.1485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09588392078876495, + "step": 3130, + "valid_targets_mean": 4742.0, + "valid_targets_min": 1699 + }, + { + "epoch": 3.54782116581777, + "grad_norm": 0.4525154274789726, + "learning_rate": 2.3028915302001286e-05, + "loss": 0.1378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.058035239577293396, + "step": 3135, + "valid_targets_mean": 2600.9, + "valid_targets_min": 569 + }, + { + "epoch": 3.5534804753820035, + "grad_norm": 0.44958581794684227, + "learning_rate": 2.297314185890446e-05, + "loss": 0.1292, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05830014497041702, + "step": 3140, + "valid_targets_mean": 3706.8, + "valid_targets_min": 1015 + }, + { + "epoch": 3.5591397849462365, + "grad_norm": 0.42422047384290124, + "learning_rate": 2.291734476202173e-05, + "loss": 0.1417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.052396416664123535, + "step": 3145, + "valid_targets_mean": 3603.8, + "valid_targets_min": 595 + }, + { + "epoch": 3.5647990945104695, + "grad_norm": 0.5505696528188843, + "learning_rate": 2.286152445526482e-05, + "loss": 0.1319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08826444298028946, + "step": 3150, + "valid_targets_mean": 3497.4, + "valid_targets_min": 919 + }, + { + "epoch": 3.570458404074703, + "grad_norm": 0.3635433738508, + "learning_rate": 2.2805681382730142e-05, + "loss": 0.1356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07275129854679108, + "step": 3155, + "valid_targets_mean": 5747.0, + "valid_targets_min": 739 + }, + { + "epoch": 3.576117713638936, + "grad_norm": 0.4533868460181728, + "learning_rate": 2.2749815988695208e-05, + "loss": 0.1341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07685845345258713, + "step": 3160, + "valid_targets_mean": 4838.1, + "valid_targets_min": 819 + }, + { + "epoch": 3.5817770232031694, + "grad_norm": 0.4048876594352088, + "learning_rate": 2.2693928717615118e-05, + "loss": 0.1393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06346229463815689, + "step": 3165, + "valid_targets_mean": 4321.2, + "valid_targets_min": 931 + }, + { + "epoch": 3.5874363327674024, + "grad_norm": 0.5289467730230922, + "learning_rate": 2.2638020014119033e-05, + "loss": 0.1446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06351981312036514, + "step": 3170, + "valid_targets_mean": 2533.4, + "valid_targets_min": 564 + }, + { + "epoch": 3.593095642331636, + "grad_norm": 0.4390603992241125, + "learning_rate": 2.2582090323006603e-05, + "loss": 0.1465, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06421498954296112, + "step": 3175, + "valid_targets_mean": 3677.4, + "valid_targets_min": 869 + }, + { + "epoch": 3.598754951895869, + "grad_norm": 0.44280386608800665, + "learning_rate": 2.2526140089244483e-05, + "loss": 0.133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06696519255638123, + "step": 3180, + "valid_targets_mean": 4045.6, + "valid_targets_min": 813 + }, + { + "epoch": 3.604414261460102, + "grad_norm": 0.4893649058929632, + "learning_rate": 2.247016975796274e-05, + "loss": 0.1335, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06364568322896957, + "step": 3185, + "valid_targets_mean": 3304.6, + "valid_targets_min": 485 + }, + { + "epoch": 3.6100735710243352, + "grad_norm": 0.43960333914811106, + "learning_rate": 2.2414179774451333e-05, + "loss": 0.154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07516974210739136, + "step": 3190, + "valid_targets_mean": 4378.1, + "valid_targets_min": 2432 + }, + { + "epoch": 3.6157328805885682, + "grad_norm": 0.45415546212598273, + "learning_rate": 2.2358170584156577e-05, + "loss": 0.1449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10768917202949524, + "step": 3195, + "valid_targets_mean": 5015.1, + "valid_targets_min": 2850 + }, + { + "epoch": 3.6213921901528012, + "grad_norm": 0.4085430800551796, + "learning_rate": 2.2302142632677605e-05, + "loss": 0.1309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.057084403932094574, + "step": 3200, + "valid_targets_mean": 4129.5, + "valid_targets_min": 721 + }, + { + "epoch": 3.6270514997170347, + "grad_norm": 0.447193196627962, + "learning_rate": 2.2246096365762782e-05, + "loss": 0.1403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06684181094169617, + "step": 3205, + "valid_targets_mean": 4496.9, + "valid_targets_min": 1033 + }, + { + "epoch": 3.6327108092812677, + "grad_norm": 0.3933004531102622, + "learning_rate": 2.2190032229306215e-05, + "loss": 0.1267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05431961640715599, + "step": 3210, + "valid_targets_mean": 4752.1, + "valid_targets_min": 751 + }, + { + "epoch": 3.6383701188455007, + "grad_norm": 0.405459983526177, + "learning_rate": 2.2133950669344156e-05, + "loss": 0.1307, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06501272320747375, + "step": 3215, + "valid_targets_mean": 4812.1, + "valid_targets_min": 856 + }, + { + "epoch": 3.644029428409734, + "grad_norm": 0.42233271639271774, + "learning_rate": 2.207785213205149e-05, + "loss": 0.1353, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05825245752930641, + "step": 3220, + "valid_targets_mean": 4341.5, + "valid_targets_min": 1154 + }, + { + "epoch": 3.649688737973967, + "grad_norm": 0.41361742820170727, + "learning_rate": 2.202173706373817e-05, + "loss": 0.1386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0748710185289383, + "step": 3225, + "valid_targets_mean": 4151.1, + "valid_targets_min": 645 + }, + { + "epoch": 3.6553480475382, + "grad_norm": 0.42149041367515566, + "learning_rate": 2.1965605910845654e-05, + "loss": 0.1382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07024779915809631, + "step": 3230, + "valid_targets_mean": 4816.6, + "valid_targets_min": 945 + }, + { + "epoch": 3.6610073571024335, + "grad_norm": 0.5049996843123017, + "learning_rate": 2.1909459119943384e-05, + "loss": 0.1327, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09718368947505951, + "step": 3235, + "valid_targets_mean": 4249.6, + "valid_targets_min": 580 + }, + { + "epoch": 3.6666666666666665, + "grad_norm": 0.4632080844809109, + "learning_rate": 2.1853297137725204e-05, + "loss": 0.1379, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05604666844010353, + "step": 3240, + "valid_targets_mean": 3193.6, + "valid_targets_min": 593 + }, + { + "epoch": 3.6723259762309, + "grad_norm": 0.4816062656827528, + "learning_rate": 2.1797120411005807e-05, + "loss": 0.1484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07544916868209839, + "step": 3245, + "valid_targets_mean": 4611.9, + "valid_targets_min": 842 + }, + { + "epoch": 3.677985285795133, + "grad_norm": 0.46758736830474223, + "learning_rate": 2.1740929386717222e-05, + "loss": 0.1301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07856447994709015, + "step": 3250, + "valid_targets_mean": 4368.1, + "valid_targets_min": 1037 + }, + { + "epoch": 3.6836445953593664, + "grad_norm": 0.346208587523274, + "learning_rate": 2.1684724511905193e-05, + "loss": 0.1191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03505310043692589, + "step": 3255, + "valid_targets_mean": 2896.4, + "valid_targets_min": 853 + }, + { + "epoch": 3.6893039049235994, + "grad_norm": 0.4601127480739952, + "learning_rate": 2.1628506233725678e-05, + "loss": 0.1289, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07584796845912933, + "step": 3260, + "valid_targets_mean": 4097.0, + "valid_targets_min": 1062 + }, + { + "epoch": 3.6949632144878324, + "grad_norm": 0.40465609062778946, + "learning_rate": 2.1572274999441265e-05, + "loss": 0.1291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.049052461981773376, + "step": 3265, + "valid_targets_mean": 3771.8, + "valid_targets_min": 790 + }, + { + "epoch": 3.700622524052066, + "grad_norm": 0.4278189699135824, + "learning_rate": 2.151603125641761e-05, + "loss": 0.1372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04881703108549118, + "step": 3270, + "valid_targets_mean": 3386.4, + "valid_targets_min": 636 + }, + { + "epoch": 3.706281833616299, + "grad_norm": 0.4036842442769569, + "learning_rate": 2.1459775452119898e-05, + "loss": 0.1332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07546769827604294, + "step": 3275, + "valid_targets_mean": 4936.2, + "valid_targets_min": 871 + }, + { + "epoch": 3.711941143180532, + "grad_norm": 0.39567195329988797, + "learning_rate": 2.1403508034109262e-05, + "loss": 0.1266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0556686595082283, + "step": 3280, + "valid_targets_mean": 4286.1, + "valid_targets_min": 1072 + }, + { + "epoch": 3.7176004527447652, + "grad_norm": 0.4348589667414177, + "learning_rate": 2.1347229450039237e-05, + "loss": 0.1302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08333688974380493, + "step": 3285, + "valid_targets_mean": 4316.9, + "valid_targets_min": 596 + }, + { + "epoch": 3.7232597623089982, + "grad_norm": 0.42775235433444636, + "learning_rate": 2.12909401476522e-05, + "loss": 0.1308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06224793195724487, + "step": 3290, + "valid_targets_mean": 3323.6, + "valid_targets_min": 726 + }, + { + "epoch": 3.7289190718732312, + "grad_norm": 0.374922790410709, + "learning_rate": 2.1234640574775783e-05, + "loss": 0.1267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06974637508392334, + "step": 3295, + "valid_targets_mean": 6207.1, + "valid_targets_min": 3176 + }, + { + "epoch": 3.7345783814374647, + "grad_norm": 0.4421655964033493, + "learning_rate": 2.1178331179319336e-05, + "loss": 0.1696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17371153831481934, + "step": 3300, + "valid_targets_mean": 4754.5, + "valid_targets_min": 758 + }, + { + "epoch": 3.7402376910016977, + "grad_norm": 0.38934920476024654, + "learning_rate": 2.112201240927037e-05, + "loss": 0.1431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05925353616476059, + "step": 3305, + "valid_targets_mean": 5131.0, + "valid_targets_min": 783 + }, + { + "epoch": 3.745897000565931, + "grad_norm": 0.37850410107445387, + "learning_rate": 2.1065684712690954e-05, + "loss": 0.1341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.052084051072597504, + "step": 3310, + "valid_targets_mean": 4142.1, + "valid_targets_min": 797 + }, + { + "epoch": 3.751556310130164, + "grad_norm": 0.46801608865252614, + "learning_rate": 2.1009348537714194e-05, + "loss": 0.1452, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08224552869796753, + "step": 3315, + "valid_targets_mean": 5051.6, + "valid_targets_min": 950 + }, + { + "epoch": 3.7572156196943975, + "grad_norm": 0.4256891198543974, + "learning_rate": 2.0953004332540644e-05, + "loss": 0.1269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.059774063527584076, + "step": 3320, + "valid_targets_mean": 3915.1, + "valid_targets_min": 770 + }, + { + "epoch": 3.7628749292586305, + "grad_norm": 0.40176004476436056, + "learning_rate": 2.089665254543473e-05, + "loss": 0.1234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07830460369586945, + "step": 3325, + "valid_targets_mean": 5301.0, + "valid_targets_min": 3136 + }, + { + "epoch": 3.7685342388228635, + "grad_norm": 0.39584594141294493, + "learning_rate": 2.0840293624721234e-05, + "loss": 0.142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07017828524112701, + "step": 3330, + "valid_targets_mean": 4507.1, + "valid_targets_min": 1318 + }, + { + "epoch": 3.774193548387097, + "grad_norm": 0.36672592240311425, + "learning_rate": 2.0783928018781644e-05, + "loss": 0.1259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.050538673996925354, + "step": 3335, + "valid_targets_mean": 4339.1, + "valid_targets_min": 1053 + }, + { + "epoch": 3.77985285795133, + "grad_norm": 0.4274388943855513, + "learning_rate": 2.0727556176050676e-05, + "loss": 0.1298, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06906148046255112, + "step": 3340, + "valid_targets_mean": 4898.4, + "valid_targets_min": 723 + }, + { + "epoch": 3.785512167515563, + "grad_norm": 0.4929360389846311, + "learning_rate": 2.0671178545012643e-05, + "loss": 0.128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06501125544309616, + "step": 3345, + "valid_targets_mean": 3514.1, + "valid_targets_min": 1123 + }, + { + "epoch": 3.7911714770797964, + "grad_norm": 0.6548791051468207, + "learning_rate": 2.0614795574197907e-05, + "loss": 0.1317, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05894312262535095, + "step": 3350, + "valid_targets_mean": 3610.0, + "valid_targets_min": 794 + }, + { + "epoch": 3.7968307866440294, + "grad_norm": 0.39675050747674023, + "learning_rate": 2.0558407712179334e-05, + "loss": 0.1375, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.077305868268013, + "step": 3355, + "valid_targets_mean": 4054.6, + "valid_targets_min": 715 + }, + { + "epoch": 3.8024900962082624, + "grad_norm": 0.39636570295188245, + "learning_rate": 2.050201540756868e-05, + "loss": 0.1392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0457785427570343, + "step": 3360, + "valid_targets_mean": 4274.4, + "valid_targets_min": 3143 + }, + { + "epoch": 3.808149405772496, + "grad_norm": 0.43196621470005653, + "learning_rate": 2.0445619109013054e-05, + "loss": 0.1213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07415586709976196, + "step": 3365, + "valid_targets_mean": 4793.2, + "valid_targets_min": 647 + }, + { + "epoch": 3.813808715336729, + "grad_norm": 0.4286848072532181, + "learning_rate": 2.038921926519134e-05, + "loss": 0.1358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0979953184723854, + "step": 3370, + "valid_targets_mean": 4855.2, + "valid_targets_min": 673 + }, + { + "epoch": 3.819468024900962, + "grad_norm": 0.4587709980050609, + "learning_rate": 2.033281632481063e-05, + "loss": 0.1282, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.057599350810050964, + "step": 3375, + "valid_targets_mean": 4269.9, + "valid_targets_min": 1530 + }, + { + "epoch": 3.8251273344651953, + "grad_norm": 0.3954736489259727, + "learning_rate": 2.027641073660265e-05, + "loss": 0.1308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0663749948143959, + "step": 3380, + "valid_targets_mean": 4863.5, + "valid_targets_min": 1173 + }, + { + "epoch": 3.8307866440294283, + "grad_norm": 0.36564308007155066, + "learning_rate": 2.0220002949320187e-05, + "loss": 0.1338, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.053442299365997314, + "step": 3385, + "valid_targets_mean": 4413.8, + "valid_targets_min": 1059 + }, + { + "epoch": 3.8364459535936617, + "grad_norm": 0.42243981489071025, + "learning_rate": 2.0163593411733533e-05, + "loss": 0.1354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0701003298163414, + "step": 3390, + "valid_targets_mean": 3776.6, + "valid_targets_min": 916 + }, + { + "epoch": 3.8421052631578947, + "grad_norm": 0.4216689398083385, + "learning_rate": 2.0107182572626897e-05, + "loss": 0.1426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.060196444392204285, + "step": 3395, + "valid_targets_mean": 4839.2, + "valid_targets_min": 459 + }, + { + "epoch": 3.847764572722128, + "grad_norm": 0.4032038376298053, + "learning_rate": 2.0050770880794843e-05, + "loss": 0.136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08659042418003082, + "step": 3400, + "valid_targets_mean": 5129.6, + "valid_targets_min": 802 + }, + { + "epoch": 3.853423882286361, + "grad_norm": 0.45895969240402956, + "learning_rate": 1.9994358785038736e-05, + "loss": 0.1382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07566884160041809, + "step": 3405, + "valid_targets_mean": 4869.5, + "valid_targets_min": 1284 + }, + { + "epoch": 3.859083191850594, + "grad_norm": 0.5232805230269381, + "learning_rate": 1.9937946734163117e-05, + "loss": 0.1352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08806587755680084, + "step": 3410, + "valid_targets_mean": 3452.1, + "valid_targets_min": 846 + }, + { + "epoch": 3.8647425014148276, + "grad_norm": 0.38307896981694084, + "learning_rate": 1.98815351769722e-05, + "loss": 0.1386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05788277089595795, + "step": 3415, + "valid_targets_mean": 4947.1, + "valid_targets_min": 3242 + }, + { + "epoch": 3.8704018109790606, + "grad_norm": 0.5413050251322713, + "learning_rate": 1.982512456226628e-05, + "loss": 0.1343, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09454470872879028, + "step": 3420, + "valid_targets_mean": 3194.0, + "valid_targets_min": 195 + }, + { + "epoch": 3.8760611205432935, + "grad_norm": 0.3921874358447072, + "learning_rate": 1.976871533883812e-05, + "loss": 0.1343, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08793818950653076, + "step": 3425, + "valid_targets_mean": 4932.2, + "valid_targets_min": 2748 + }, + { + "epoch": 3.881720430107527, + "grad_norm": 0.4193408860297325, + "learning_rate": 1.971230795546944e-05, + "loss": 0.1461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08685111254453659, + "step": 3430, + "valid_targets_mean": 5092.4, + "valid_targets_min": 3139 + }, + { + "epoch": 3.88737973967176, + "grad_norm": 0.44037891119441686, + "learning_rate": 1.965590286092731e-05, + "loss": 0.1446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06169677898287773, + "step": 3435, + "valid_targets_mean": 3109.9, + "valid_targets_min": 502 + }, + { + "epoch": 3.893039049235993, + "grad_norm": 0.5185652920425788, + "learning_rate": 1.9599500503960596e-05, + "loss": 0.1385, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08434075117111206, + "step": 3440, + "valid_targets_mean": 3767.2, + "valid_targets_min": 458 + }, + { + "epoch": 3.8986983588002264, + "grad_norm": 0.46573697245407886, + "learning_rate": 1.954310133329639e-05, + "loss": 0.132, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0805048942565918, + "step": 3445, + "valid_targets_mean": 3690.6, + "valid_targets_min": 764 + }, + { + "epoch": 3.9043576683644594, + "grad_norm": 0.3925579306081633, + "learning_rate": 1.948670579763641e-05, + "loss": 0.1415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.061228327453136444, + "step": 3450, + "valid_targets_mean": 4644.2, + "valid_targets_min": 890 + }, + { + "epoch": 3.910016977928693, + "grad_norm": 0.42940374466604453, + "learning_rate": 1.9430314345653486e-05, + "loss": 0.1378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07195357233285904, + "step": 3455, + "valid_targets_mean": 4788.4, + "valid_targets_min": 1744 + }, + { + "epoch": 3.915676287492926, + "grad_norm": 0.40076587676365877, + "learning_rate": 1.9373927425987928e-05, + "loss": 0.1497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05733758956193924, + "step": 3460, + "valid_targets_mean": 5301.6, + "valid_targets_min": 2962 + }, + { + "epoch": 3.9213355970571593, + "grad_norm": 0.44375438199894207, + "learning_rate": 1.9317545487244016e-05, + "loss": 0.1374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04761147499084473, + "step": 3465, + "valid_targets_mean": 3185.1, + "valid_targets_min": 835 + }, + { + "epoch": 3.9269949066213923, + "grad_norm": 0.43089188470089607, + "learning_rate": 1.926116897798639e-05, + "loss": 0.1345, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.059456273913383484, + "step": 3470, + "valid_targets_mean": 3884.1, + "valid_targets_min": 727 + }, + { + "epoch": 3.9326542161856253, + "grad_norm": 0.45964080384584727, + "learning_rate": 1.9204798346736485e-05, + "loss": 0.137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07701291888952255, + "step": 3475, + "valid_targets_mean": 4098.4, + "valid_targets_min": 865 + }, + { + "epoch": 3.9383135257498587, + "grad_norm": 0.4698451021957866, + "learning_rate": 1.914843404196899e-05, + "loss": 0.138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08727217465639114, + "step": 3480, + "valid_targets_mean": 5849.8, + "valid_targets_min": 3899 + }, + { + "epoch": 3.9439728353140917, + "grad_norm": 0.40170288860526393, + "learning_rate": 1.9092076512108253e-05, + "loss": 0.1286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0312468484044075, + "step": 3485, + "valid_targets_mean": 3420.4, + "valid_targets_min": 738 + }, + { + "epoch": 3.9496321448783247, + "grad_norm": 0.43276384802888396, + "learning_rate": 1.903572620552471e-05, + "loss": 0.1641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09738308191299438, + "step": 3490, + "valid_targets_mean": 4473.1, + "valid_targets_min": 852 + }, + { + "epoch": 3.955291454442558, + "grad_norm": 0.5388719543712218, + "learning_rate": 1.8979383570531358e-05, + "loss": 0.1262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04769424349069595, + "step": 3495, + "valid_targets_mean": 2816.8, + "valid_targets_min": 1067 + }, + { + "epoch": 3.960950764006791, + "grad_norm": 0.3716254814668609, + "learning_rate": 1.8923049055380128e-05, + "loss": 0.1327, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05011553317308426, + "step": 3500, + "valid_targets_mean": 4178.4, + "valid_targets_min": 2531 + }, + { + "epoch": 3.966610073571024, + "grad_norm": 0.4183514797771599, + "learning_rate": 1.8866723108258376e-05, + "loss": 0.1331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07371443510055542, + "step": 3505, + "valid_targets_mean": 4337.1, + "valid_targets_min": 654 + }, + { + "epoch": 3.9722693831352576, + "grad_norm": 0.4336604482727519, + "learning_rate": 1.8810406177285282e-05, + "loss": 0.1232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06532616168260574, + "step": 3510, + "valid_targets_mean": 3729.5, + "valid_targets_min": 813 + }, + { + "epoch": 3.9779286926994906, + "grad_norm": 0.4469382798064426, + "learning_rate": 1.8754098710508294e-05, + "loss": 0.1422, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07170473784208298, + "step": 3515, + "valid_targets_mean": 4074.0, + "valid_targets_min": 794 + }, + { + "epoch": 3.9835880022637236, + "grad_norm": 0.4294793837227325, + "learning_rate": 1.869780115589957e-05, + "loss": 0.1255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05397827923297882, + "step": 3520, + "valid_targets_mean": 4072.4, + "valid_targets_min": 839 + }, + { + "epoch": 3.989247311827957, + "grad_norm": 0.42235316646006354, + "learning_rate": 1.8641513961352396e-05, + "loss": 0.1322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.054098255932331085, + "step": 3525, + "valid_targets_mean": 3694.2, + "valid_targets_min": 647 + }, + { + "epoch": 3.99490662139219, + "grad_norm": 0.44323236581315834, + "learning_rate": 1.858523757467765e-05, + "loss": 0.1264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08219219744205475, + "step": 3530, + "valid_targets_mean": 4507.5, + "valid_targets_min": 817 + }, + { + "epoch": 4.0, + "grad_norm": 0.5289552582463047, + "learning_rate": 1.8528972443600226e-05, + "loss": 0.1459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12049068510532379, + "step": 3535, + "valid_targets_mean": 4704.8, + "valid_targets_min": 727 + }, + { + "epoch": 4.005659309564233, + "grad_norm": 0.4036473408889555, + "learning_rate": 1.8472719015755452e-05, + "loss": 0.1263, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05678657069802284, + "step": 3540, + "valid_targets_mean": 5569.6, + "valid_targets_min": 839 + }, + { + "epoch": 4.011318619128466, + "grad_norm": 0.4268247531521895, + "learning_rate": 1.8416477738685567e-05, + "loss": 0.1224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05326458811759949, + "step": 3545, + "valid_targets_mean": 4454.5, + "valid_targets_min": 854 + }, + { + "epoch": 4.016977928692699, + "grad_norm": 0.4418705101464708, + "learning_rate": 1.8360249059836123e-05, + "loss": 0.1298, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06818187236785889, + "step": 3550, + "valid_targets_mean": 4536.8, + "valid_targets_min": 560 + }, + { + "epoch": 4.022637238256933, + "grad_norm": 0.41063749438933966, + "learning_rate": 1.830403342655246e-05, + "loss": 0.1167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04480816423892975, + "step": 3555, + "valid_targets_mean": 4323.4, + "valid_targets_min": 854 + }, + { + "epoch": 4.028296547821165, + "grad_norm": 0.4472973858310253, + "learning_rate": 1.824783128607612e-05, + "loss": 0.1275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10498711466789246, + "step": 3560, + "valid_targets_mean": 4781.0, + "valid_targets_min": 733 + }, + { + "epoch": 4.033955857385399, + "grad_norm": 0.3958696578410186, + "learning_rate": 1.8191643085541296e-05, + "loss": 0.145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05419563874602318, + "step": 3565, + "valid_targets_mean": 4819.9, + "valid_targets_min": 3710 + }, + { + "epoch": 4.039615166949632, + "grad_norm": 0.4364483933843098, + "learning_rate": 1.813546927197129e-05, + "loss": 0.1159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06048429012298584, + "step": 3570, + "valid_targets_mean": 4419.2, + "valid_targets_min": 3640 + }, + { + "epoch": 4.045274476513866, + "grad_norm": 0.42082516651309754, + "learning_rate": 1.8079310292274928e-05, + "loss": 0.115, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05712258815765381, + "step": 3575, + "valid_targets_mean": 3756.2, + "valid_targets_min": 622 + }, + { + "epoch": 4.050933786078098, + "grad_norm": 0.4445614749183525, + "learning_rate": 1.8023166593243026e-05, + "loss": 0.1254, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05414709076285362, + "step": 3580, + "valid_targets_mean": 3712.5, + "valid_targets_min": 807 + }, + { + "epoch": 4.056593095642332, + "grad_norm": 0.4069457996706397, + "learning_rate": 1.7967038621544845e-05, + "loss": 0.1292, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07209647446870804, + "step": 3585, + "valid_targets_mean": 5813.4, + "valid_targets_min": 533 + }, + { + "epoch": 4.062252405206565, + "grad_norm": 0.4834461782160832, + "learning_rate": 1.791092682372449e-05, + "loss": 0.131, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07951731234788895, + "step": 3590, + "valid_targets_mean": 4773.0, + "valid_targets_min": 1164 + }, + { + "epoch": 4.067911714770798, + "grad_norm": 0.3750704065358872, + "learning_rate": 1.7854831646197426e-05, + "loss": 0.116, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05851902812719345, + "step": 3595, + "valid_targets_mean": 6308.5, + "valid_targets_min": 2967 + }, + { + "epoch": 4.073571024335031, + "grad_norm": 0.45058016083140984, + "learning_rate": 1.7798753535246856e-05, + "loss": 0.1164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04868333786725998, + "step": 3600, + "valid_targets_mean": 3310.5, + "valid_targets_min": 993 + }, + { + "epoch": 4.079230333899265, + "grad_norm": 0.41985834498367625, + "learning_rate": 1.7742692937020234e-05, + "loss": 0.1252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06979656964540482, + "step": 3605, + "valid_targets_mean": 5825.2, + "valid_targets_min": 1203 + }, + { + "epoch": 4.084889643463497, + "grad_norm": 0.4005851784890584, + "learning_rate": 1.768665029752567e-05, + "loss": 0.1206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04697588086128235, + "step": 3610, + "valid_targets_mean": 4358.9, + "valid_targets_min": 664 + }, + { + "epoch": 4.090548953027731, + "grad_norm": 0.44047181046562645, + "learning_rate": 1.763062606262839e-05, + "loss": 0.1277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09183581918478012, + "step": 3615, + "valid_targets_mean": 4585.9, + "valid_targets_min": 984 + }, + { + "epoch": 4.096208262591964, + "grad_norm": 0.4310310233662772, + "learning_rate": 1.7574620678047215e-05, + "loss": 0.1337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06533151865005493, + "step": 3620, + "valid_targets_mean": 3748.9, + "valid_targets_min": 722 + }, + { + "epoch": 4.101867572156197, + "grad_norm": 0.5051659197442496, + "learning_rate": 1.751863458935099e-05, + "loss": 0.1303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06272322684526443, + "step": 3625, + "valid_targets_mean": 3704.2, + "valid_targets_min": 614 + }, + { + "epoch": 4.10752688172043, + "grad_norm": 0.4679660122587458, + "learning_rate": 1.746266824195504e-05, + "loss": 0.1445, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1846861094236374, + "step": 3630, + "valid_targets_mean": 4474.0, + "valid_targets_min": 542 + }, + { + "epoch": 4.1131861912846635, + "grad_norm": 0.5025287110356622, + "learning_rate": 1.7406722081117632e-05, + "loss": 0.1185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04662357643246651, + "step": 3635, + "valid_targets_mean": 3184.9, + "valid_targets_min": 661 + }, + { + "epoch": 4.118845500848896, + "grad_norm": 0.507166610600806, + "learning_rate": 1.7350796551936432e-05, + "loss": 0.1225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05459631234407425, + "step": 3640, + "valid_targets_mean": 3330.9, + "valid_targets_min": 703 + }, + { + "epoch": 4.124504810413129, + "grad_norm": 0.4758980852545755, + "learning_rate": 1.7294892099344975e-05, + "loss": 0.1247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05308496206998825, + "step": 3645, + "valid_targets_mean": 3681.8, + "valid_targets_min": 673 + }, + { + "epoch": 4.130164119977363, + "grad_norm": 0.44564084263218856, + "learning_rate": 1.7239009168109108e-05, + "loss": 0.1183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06045549362897873, + "step": 3650, + "valid_targets_mean": 3790.4, + "valid_targets_min": 864 + }, + { + "epoch": 4.135823429541596, + "grad_norm": 0.4216123575764381, + "learning_rate": 1.7183148202823445e-05, + "loss": 0.1239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06929263472557068, + "step": 3655, + "valid_targets_mean": 5219.4, + "valid_targets_min": 3378 + }, + { + "epoch": 4.141482739105829, + "grad_norm": 0.4352450177340483, + "learning_rate": 1.7127309647907867e-05, + "loss": 0.1172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05109712481498718, + "step": 3660, + "valid_targets_mean": 3970.6, + "valid_targets_min": 1380 + }, + { + "epoch": 4.147142048670062, + "grad_norm": 0.43331799385220715, + "learning_rate": 1.7071493947603942e-05, + "loss": 0.1223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07668107748031616, + "step": 3665, + "valid_targets_mean": 4678.8, + "valid_targets_min": 669 + }, + { + "epoch": 4.152801358234296, + "grad_norm": 0.46872163068233347, + "learning_rate": 1.7015701545971417e-05, + "loss": 0.1114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.039187319576740265, + "step": 3670, + "valid_targets_mean": 2807.0, + "valid_targets_min": 718 + }, + { + "epoch": 4.158460667798528, + "grad_norm": 0.4387306246980307, + "learning_rate": 1.695993288688469e-05, + "loss": 0.1252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07323797047138214, + "step": 3675, + "valid_targets_mean": 6061.5, + "valid_targets_min": 2519 + }, + { + "epoch": 4.164119977362762, + "grad_norm": 0.42348043524118245, + "learning_rate": 1.6904188414029248e-05, + "loss": 0.1315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05862698331475258, + "step": 3680, + "valid_targets_mean": 3433.1, + "valid_targets_min": 811 + }, + { + "epoch": 4.169779286926995, + "grad_norm": 0.4702510175797183, + "learning_rate": 1.6848468570898172e-05, + "loss": 0.1371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11139865219593048, + "step": 3685, + "valid_targets_mean": 3575.0, + "valid_targets_min": 792 + }, + { + "epoch": 4.175438596491228, + "grad_norm": 0.433103672021495, + "learning_rate": 1.6792773800788583e-05, + "loss": 0.12, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04065656289458275, + "step": 3690, + "valid_targets_mean": 3380.2, + "valid_targets_min": 1054 + }, + { + "epoch": 4.181097906055461, + "grad_norm": 0.4863065187290204, + "learning_rate": 1.673710454679813e-05, + "loss": 0.1318, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06817498803138733, + "step": 3695, + "valid_targets_mean": 4204.2, + "valid_targets_min": 1129 + }, + { + "epoch": 4.186757215619695, + "grad_norm": 0.4596313606558194, + "learning_rate": 1.668146125182147e-05, + "loss": 0.1282, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08429364114999771, + "step": 3700, + "valid_targets_mean": 5096.6, + "valid_targets_min": 2642 + }, + { + "epoch": 4.192416525183927, + "grad_norm": 0.47674809371648486, + "learning_rate": 1.6625844358546715e-05, + "loss": 0.1212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09548940509557724, + "step": 3705, + "valid_targets_mean": 4687.4, + "valid_targets_min": 687 + }, + { + "epoch": 4.198075834748161, + "grad_norm": 0.40283233914446176, + "learning_rate": 1.657025430945195e-05, + "loss": 0.1277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04177000746130943, + "step": 3710, + "valid_targets_mean": 4533.5, + "valid_targets_min": 801 + }, + { + "epoch": 4.203735144312394, + "grad_norm": 0.4344052760918575, + "learning_rate": 1.651469154680167e-05, + "loss": 0.13, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07114529609680176, + "step": 3715, + "valid_targets_mean": 4721.9, + "valid_targets_min": 635 + }, + { + "epoch": 4.2093944538766275, + "grad_norm": 0.48976173210212776, + "learning_rate": 1.6459156512643303e-05, + "loss": 0.125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05411301180720329, + "step": 3720, + "valid_targets_mean": 3306.0, + "valid_targets_min": 716 + }, + { + "epoch": 4.21505376344086, + "grad_norm": 0.44061149448910525, + "learning_rate": 1.640364964880367e-05, + "loss": 0.1299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.043354593217372894, + "step": 3725, + "valid_targets_mean": 3967.4, + "valid_targets_min": 1302 + }, + { + "epoch": 4.2207130730050935, + "grad_norm": 0.4216883071598038, + "learning_rate": 1.6348171396885468e-05, + "loss": 0.1198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05597950890660286, + "step": 3730, + "valid_targets_mean": 4388.0, + "valid_targets_min": 1180 + }, + { + "epoch": 4.226372382569327, + "grad_norm": 0.4338985799216439, + "learning_rate": 1.6292722198263766e-05, + "loss": 0.1215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06400103867053986, + "step": 3735, + "valid_targets_mean": 4174.0, + "valid_targets_min": 767 + }, + { + "epoch": 4.2320316921335595, + "grad_norm": 0.4470975054375601, + "learning_rate": 1.623730249408249e-05, + "loss": 0.1218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05776383355259895, + "step": 3740, + "valid_targets_mean": 3340.1, + "valid_targets_min": 727 + }, + { + "epoch": 4.237691001697793, + "grad_norm": 0.40070548500925846, + "learning_rate": 1.618191272525092e-05, + "loss": 0.1223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0743846744298935, + "step": 3745, + "valid_targets_mean": 5911.6, + "valid_targets_min": 1823 + }, + { + "epoch": 4.243350311262026, + "grad_norm": 0.7585891252094583, + "learning_rate": 1.612655333244016e-05, + "loss": 0.1209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05915948376059532, + "step": 3750, + "valid_targets_mean": 5122.2, + "valid_targets_min": 827 + }, + { + "epoch": 4.249009620826259, + "grad_norm": 0.43878775761171657, + "learning_rate": 1.6071224756079666e-05, + "loss": 0.1217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0774497389793396, + "step": 3755, + "valid_targets_mean": 5431.2, + "valid_targets_min": 2351 + }, + { + "epoch": 4.254668930390492, + "grad_norm": 0.41534209331250566, + "learning_rate": 1.6015927436353713e-05, + "loss": 0.1312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06852725148200989, + "step": 3760, + "valid_targets_mean": 5300.2, + "valid_targets_min": 1401 + }, + { + "epoch": 4.260328239954726, + "grad_norm": 0.5040796153202105, + "learning_rate": 1.59606618131979e-05, + "loss": 0.1246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.057121746242046356, + "step": 3765, + "valid_targets_mean": 3299.4, + "valid_targets_min": 710 + }, + { + "epoch": 4.265987549518958, + "grad_norm": 0.410977165539141, + "learning_rate": 1.5905428326295663e-05, + "loss": 0.1308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06279418617486954, + "step": 3770, + "valid_targets_mean": 4751.5, + "valid_targets_min": 726 + }, + { + "epoch": 4.271646859083192, + "grad_norm": 0.41762083591288557, + "learning_rate": 1.585022741507477e-05, + "loss": 0.1212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.058160003274679184, + "step": 3775, + "valid_targets_mean": 4569.8, + "valid_targets_min": 927 + }, + { + "epoch": 4.277306168647425, + "grad_norm": 0.4555547057188535, + "learning_rate": 1.579505951870381e-05, + "loss": 0.1209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05260225385427475, + "step": 3780, + "valid_targets_mean": 4338.5, + "valid_targets_min": 1744 + }, + { + "epoch": 4.282965478211658, + "grad_norm": 0.4696947190049743, + "learning_rate": 1.573992507608872e-05, + "loss": 0.1258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07835404574871063, + "step": 3785, + "valid_targets_mean": 4676.2, + "valid_targets_min": 457 + }, + { + "epoch": 4.288624787775891, + "grad_norm": 0.4036596187392063, + "learning_rate": 1.568482452586929e-05, + "loss": 0.1148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0388009250164032, + "step": 3790, + "valid_targets_mean": 3826.8, + "valid_targets_min": 1064 + }, + { + "epoch": 4.294284097340125, + "grad_norm": 0.41366636015500186, + "learning_rate": 1.5629758306415652e-05, + "loss": 0.1149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04647793620824814, + "step": 3795, + "valid_targets_mean": 4372.6, + "valid_targets_min": 595 + }, + { + "epoch": 4.299943406904358, + "grad_norm": 0.463409741870885, + "learning_rate": 1.5574726855824827e-05, + "loss": 0.1259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05510412156581879, + "step": 3800, + "valid_targets_mean": 4235.1, + "valid_targets_min": 1241 + }, + { + "epoch": 4.305602716468591, + "grad_norm": 0.4423542907270176, + "learning_rate": 1.5519730611917206e-05, + "loss": 0.1271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0630895122885704, + "step": 3805, + "valid_targets_mean": 4004.2, + "valid_targets_min": 459 + }, + { + "epoch": 4.311262026032824, + "grad_norm": 0.4366748528014053, + "learning_rate": 1.546477001223309e-05, + "loss": 0.1308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.053095363080501556, + "step": 3810, + "valid_targets_mean": 3799.4, + "valid_targets_min": 707 + }, + { + "epoch": 4.3169213355970575, + "grad_norm": 0.4134249566446507, + "learning_rate": 1.5409845494029208e-05, + "loss": 0.1276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05636456981301308, + "step": 3815, + "valid_targets_mean": 4792.9, + "valid_targets_min": 1005 + }, + { + "epoch": 4.32258064516129, + "grad_norm": 0.4285613252790251, + "learning_rate": 1.5354957494275207e-05, + "loss": 0.1164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07631918787956238, + "step": 3820, + "valid_targets_mean": 4952.5, + "valid_targets_min": 682 + }, + { + "epoch": 4.3282399547255235, + "grad_norm": 0.4773595081179597, + "learning_rate": 1.5300106449650234e-05, + "loss": 0.1319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0532708466053009, + "step": 3825, + "valid_targets_mean": 4081.8, + "valid_targets_min": 898 + }, + { + "epoch": 4.333899264289757, + "grad_norm": 0.43423303738746805, + "learning_rate": 1.524529279653939e-05, + "loss": 0.1216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0736265629529953, + "step": 3830, + "valid_targets_mean": 4229.0, + "valid_targets_min": 2719 + }, + { + "epoch": 4.3395585738539895, + "grad_norm": 0.4280145062330283, + "learning_rate": 1.5190516971030324e-05, + "loss": 0.1322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09305986762046814, + "step": 3835, + "valid_targets_mean": 6348.9, + "valid_targets_min": 3420 + }, + { + "epoch": 4.345217883418223, + "grad_norm": 0.3896194190430841, + "learning_rate": 1.5135779408909732e-05, + "loss": 0.1217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.058081019669771194, + "step": 3840, + "valid_targets_mean": 5313.2, + "valid_targets_min": 3892 + }, + { + "epoch": 4.350877192982456, + "grad_norm": 0.39604575202728937, + "learning_rate": 1.5081080545659874e-05, + "loss": 0.1225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.060186851769685745, + "step": 3845, + "valid_targets_mean": 5486.6, + "valid_targets_min": 2494 + }, + { + "epoch": 4.356536502546689, + "grad_norm": 0.4628670750019828, + "learning_rate": 1.5026420816455156e-05, + "loss": 0.1193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04671153426170349, + "step": 3850, + "valid_targets_mean": 2826.9, + "valid_targets_min": 538 + }, + { + "epoch": 4.362195812110922, + "grad_norm": 0.659005820870722, + "learning_rate": 1.4971800656158624e-05, + "loss": 0.1193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08069370687007904, + "step": 3855, + "valid_targets_mean": 5243.4, + "valid_targets_min": 523 + }, + { + "epoch": 4.367855121675156, + "grad_norm": 0.41635685381638354, + "learning_rate": 1.4917220499318506e-05, + "loss": 0.1182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04617477208375931, + "step": 3860, + "valid_targets_mean": 4755.4, + "valid_targets_min": 2702 + }, + { + "epoch": 4.373514431239389, + "grad_norm": 0.4887415461573347, + "learning_rate": 1.4862680780164805e-05, + "loss": 0.1186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05413256958127022, + "step": 3865, + "valid_targets_mean": 3529.4, + "valid_targets_min": 927 + }, + { + "epoch": 4.379173740803622, + "grad_norm": 0.6897221373076788, + "learning_rate": 1.4808181932605787e-05, + "loss": 0.1328, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05371899530291557, + "step": 3870, + "valid_targets_mean": 3766.9, + "valid_targets_min": 804 + }, + { + "epoch": 4.384833050367855, + "grad_norm": 0.4238729375029099, + "learning_rate": 1.4753724390224551e-05, + "loss": 0.1351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06203164905309677, + "step": 3875, + "valid_targets_mean": 4563.1, + "valid_targets_min": 774 + }, + { + "epoch": 4.390492359932089, + "grad_norm": 0.431642544735302, + "learning_rate": 1.4699308586275591e-05, + "loss": 0.122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06149539351463318, + "step": 3880, + "valid_targets_mean": 4756.1, + "valid_targets_min": 663 + }, + { + "epoch": 4.396151669496321, + "grad_norm": 0.3690329286438631, + "learning_rate": 1.464493495368132e-05, + "loss": 0.1193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04727946221828461, + "step": 3885, + "valid_targets_mean": 4264.4, + "valid_targets_min": 814 + }, + { + "epoch": 4.401810979060555, + "grad_norm": 0.48013328270862476, + "learning_rate": 1.459060392502866e-05, + "loss": 0.13, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06702776253223419, + "step": 3890, + "valid_targets_mean": 3938.5, + "valid_targets_min": 819 + }, + { + "epoch": 4.407470288624788, + "grad_norm": 0.4144946293763333, + "learning_rate": 1.4536315932565575e-05, + "loss": 0.1234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05711708590388298, + "step": 3895, + "valid_targets_mean": 4862.5, + "valid_targets_min": 2915 + }, + { + "epoch": 4.413129598189021, + "grad_norm": 0.4053830917310123, + "learning_rate": 1.448207140819764e-05, + "loss": 0.1187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0640198215842247, + "step": 3900, + "valid_targets_mean": 4713.1, + "valid_targets_min": 1429 + }, + { + "epoch": 4.418788907753254, + "grad_norm": 0.4091953270552957, + "learning_rate": 1.4427870783484613e-05, + "loss": 0.1139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.058232441544532776, + "step": 3905, + "valid_targets_mean": 4950.1, + "valid_targets_min": 608 + }, + { + "epoch": 4.4244482173174875, + "grad_norm": 0.4693096340140407, + "learning_rate": 1.4373714489636985e-05, + "loss": 0.1225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.056374967098236084, + "step": 3910, + "valid_targets_mean": 3914.2, + "valid_targets_min": 733 + }, + { + "epoch": 4.43010752688172, + "grad_norm": 0.43446966122717634, + "learning_rate": 1.4319602957512564e-05, + "loss": 0.128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05685433745384216, + "step": 3915, + "valid_targets_mean": 3744.6, + "valid_targets_min": 958 + }, + { + "epoch": 4.4357668364459535, + "grad_norm": 0.42490889126957004, + "learning_rate": 1.4265536617613043e-05, + "loss": 0.1257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07658243179321289, + "step": 3920, + "valid_targets_mean": 4833.5, + "valid_targets_min": 773 + }, + { + "epoch": 4.441426146010187, + "grad_norm": 0.4235873888362472, + "learning_rate": 1.4211515900080568e-05, + "loss": 0.1202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07082770764827728, + "step": 3925, + "valid_targets_mean": 4799.4, + "valid_targets_min": 1093 + }, + { + "epoch": 4.44708545557442, + "grad_norm": 0.5267953967150498, + "learning_rate": 1.4157541234694324e-05, + "loss": 0.121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05297333747148514, + "step": 3930, + "valid_targets_mean": 4296.4, + "valid_targets_min": 799 + }, + { + "epoch": 4.452744765138653, + "grad_norm": 0.44174834496402104, + "learning_rate": 1.4103613050867117e-05, + "loss": 0.1264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06593005359172821, + "step": 3935, + "valid_targets_mean": 5117.9, + "valid_targets_min": 1108 + }, + { + "epoch": 4.458404074702886, + "grad_norm": 0.41481307092622194, + "learning_rate": 1.4049731777641948e-05, + "loss": 0.124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0797896683216095, + "step": 3940, + "valid_targets_mean": 6513.5, + "valid_targets_min": 3780 + }, + { + "epoch": 4.46406338426712, + "grad_norm": 0.5191007214492774, + "learning_rate": 1.3995897843688615e-05, + "loss": 0.1126, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04670059680938721, + "step": 3945, + "valid_targets_mean": 3263.2, + "valid_targets_min": 782 + }, + { + "epoch": 4.469722693831352, + "grad_norm": 0.39846399874509947, + "learning_rate": 1.3942111677300276e-05, + "loss": 0.1212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.052311014384031296, + "step": 3950, + "valid_targets_mean": 4876.6, + "valid_targets_min": 768 + }, + { + "epoch": 4.475382003395586, + "grad_norm": 0.42184764470465264, + "learning_rate": 1.388837370639008e-05, + "loss": 0.1263, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06705918908119202, + "step": 3955, + "valid_targets_mean": 5793.0, + "valid_targets_min": 2215 + }, + { + "epoch": 4.481041312959819, + "grad_norm": 0.48690415875578935, + "learning_rate": 1.3834684358487731e-05, + "loss": 0.1243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0516020804643631, + "step": 3960, + "valid_targets_mean": 4959.8, + "valid_targets_min": 3570 + }, + { + "epoch": 4.486700622524052, + "grad_norm": 0.4292384536926852, + "learning_rate": 1.3781044060736089e-05, + "loss": 0.1173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.047464773058891296, + "step": 3965, + "valid_targets_mean": 4129.6, + "valid_targets_min": 909 + }, + { + "epoch": 4.492359932088285, + "grad_norm": 0.5630207144423521, + "learning_rate": 1.3727453239887796e-05, + "loss": 0.1305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06837454438209534, + "step": 3970, + "valid_targets_mean": 3392.0, + "valid_targets_min": 549 + }, + { + "epoch": 4.498019241652519, + "grad_norm": 0.4560334523627107, + "learning_rate": 1.367391232230185e-05, + "loss": 0.1251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06927171349525452, + "step": 3975, + "valid_targets_mean": 4275.0, + "valid_targets_min": 971 + }, + { + "epoch": 4.503678551216751, + "grad_norm": 0.4477842416524273, + "learning_rate": 1.3620421733940234e-05, + "loss": 0.1162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0494828037917614, + "step": 3980, + "valid_targets_mean": 3081.4, + "valid_targets_min": 773 + }, + { + "epoch": 4.509337860780985, + "grad_norm": 0.41625195165802736, + "learning_rate": 1.356698190036453e-05, + "loss": 0.1168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06024770811200142, + "step": 3985, + "valid_targets_mean": 5191.1, + "valid_targets_min": 2553 + }, + { + "epoch": 4.514997170345218, + "grad_norm": 0.35501682706301524, + "learning_rate": 1.3513593246732506e-05, + "loss": 0.1139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.049159664660692215, + "step": 3990, + "valid_targets_mean": 5162.5, + "valid_targets_min": 3284 + }, + { + "epoch": 4.5206564799094515, + "grad_norm": 0.4683788311584072, + "learning_rate": 1.3460256197794768e-05, + "loss": 0.1231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06248003989458084, + "step": 3995, + "valid_targets_mean": 3987.2, + "valid_targets_min": 738 + }, + { + "epoch": 4.526315789473684, + "grad_norm": 0.41443180717996414, + "learning_rate": 1.3406971177891343e-05, + "loss": 0.1185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.047134581953287125, + "step": 4000, + "valid_targets_mean": 3953.2, + "valid_targets_min": 761 + }, + { + "epoch": 4.5319750990379175, + "grad_norm": 0.46741451356615543, + "learning_rate": 1.3353738610948347e-05, + "loss": 0.1305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07739831507205963, + "step": 4005, + "valid_targets_mean": 5125.5, + "valid_targets_min": 970 + }, + { + "epoch": 4.53763440860215, + "grad_norm": 0.4095519514112982, + "learning_rate": 1.3300558920474586e-05, + "loss": 0.114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06186375021934509, + "step": 4010, + "valid_targets_mean": 5025.2, + "valid_targets_min": 756 + }, + { + "epoch": 4.5432937181663835, + "grad_norm": 0.48438682315430864, + "learning_rate": 1.3247432529558175e-05, + "loss": 0.1205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.060278065502643585, + "step": 4015, + "valid_targets_mean": 3571.9, + "valid_targets_min": 920 + }, + { + "epoch": 4.548953027730617, + "grad_norm": 0.42868960664384664, + "learning_rate": 1.3194359860863201e-05, + "loss": 0.1277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0491119883954525, + "step": 4020, + "valid_targets_mean": 4010.4, + "valid_targets_min": 2191 + }, + { + "epoch": 4.55461233729485, + "grad_norm": 0.48222909580227935, + "learning_rate": 1.3141341336626336e-05, + "loss": 0.1223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07492602616548538, + "step": 4025, + "valid_targets_mean": 5607.8, + "valid_targets_min": 2415 + }, + { + "epoch": 4.560271646859083, + "grad_norm": 0.4246173816565741, + "learning_rate": 1.3088377378653503e-05, + "loss": 0.1206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.058856334537267685, + "step": 4030, + "valid_targets_mean": 4352.9, + "valid_targets_min": 2985 + }, + { + "epoch": 4.565930956423316, + "grad_norm": 0.5096411819765573, + "learning_rate": 1.3035468408316501e-05, + "loss": 0.1305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0776713564991951, + "step": 4035, + "valid_targets_mean": 4425.0, + "valid_targets_min": 940 + }, + { + "epoch": 4.57159026598755, + "grad_norm": 0.44732656994406245, + "learning_rate": 1.2982614846549639e-05, + "loss": 0.1143, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07919999957084656, + "step": 4040, + "valid_targets_mean": 6195.0, + "valid_targets_min": 950 + }, + { + "epoch": 4.577249575551782, + "grad_norm": 0.43720400770119544, + "learning_rate": 1.2929817113846428e-05, + "loss": 0.1209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07752040028572083, + "step": 4045, + "valid_targets_mean": 5509.5, + "valid_targets_min": 3476 + }, + { + "epoch": 4.582908885116016, + "grad_norm": 0.45003523560277675, + "learning_rate": 1.287707563025621e-05, + "loss": 0.1242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05949879437685013, + "step": 4050, + "valid_targets_mean": 3447.5, + "valid_targets_min": 790 + }, + { + "epoch": 4.588568194680249, + "grad_norm": 0.4713644567136156, + "learning_rate": 1.2824390815380805e-05, + "loss": 0.1195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07154825329780579, + "step": 4055, + "valid_targets_mean": 4254.8, + "valid_targets_min": 696 + }, + { + "epoch": 4.594227504244482, + "grad_norm": 0.4173577906775227, + "learning_rate": 1.2771763088371202e-05, + "loss": 0.123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.029596222564578056, + "step": 4060, + "valid_targets_mean": 3502.6, + "valid_targets_min": 424 + }, + { + "epoch": 4.599886813808715, + "grad_norm": 0.42901865712362014, + "learning_rate": 1.2719192867924197e-05, + "loss": 0.1196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0453319251537323, + "step": 4065, + "valid_targets_mean": 4132.0, + "valid_targets_min": 729 + }, + { + "epoch": 4.605546123372949, + "grad_norm": 0.38383013535093313, + "learning_rate": 1.2666680572279083e-05, + "loss": 0.1092, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05213354155421257, + "step": 4070, + "valid_targets_mean": 5256.1, + "valid_targets_min": 615 + }, + { + "epoch": 4.611205432937181, + "grad_norm": 0.458191657315042, + "learning_rate": 1.2614226619214317e-05, + "loss": 0.1335, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05429482460021973, + "step": 4075, + "valid_targets_mean": 4538.9, + "valid_targets_min": 1185 + }, + { + "epoch": 4.616864742501415, + "grad_norm": 0.3918576468783326, + "learning_rate": 1.2561831426044173e-05, + "loss": 0.1202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05884741246700287, + "step": 4080, + "valid_targets_mean": 4900.2, + "valid_targets_min": 2366 + }, + { + "epoch": 4.622524052065648, + "grad_norm": 0.4686434152504238, + "learning_rate": 1.250949540961547e-05, + "loss": 0.1186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.056355420500040054, + "step": 4085, + "valid_targets_mean": 3773.4, + "valid_targets_min": 2793 + }, + { + "epoch": 4.6281833616298815, + "grad_norm": 0.4931430975437431, + "learning_rate": 1.2457218986304196e-05, + "loss": 0.1247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.031206296756863594, + "step": 4090, + "valid_targets_mean": 2623.5, + "valid_targets_min": 623 + }, + { + "epoch": 4.633842671194114, + "grad_norm": 0.4546628255434425, + "learning_rate": 1.2405002572012252e-05, + "loss": 0.1287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09062971919775009, + "step": 4095, + "valid_targets_mean": 5102.9, + "valid_targets_min": 2165 + }, + { + "epoch": 4.6395019807583475, + "grad_norm": 0.5380914568958212, + "learning_rate": 1.2352846582164117e-05, + "loss": 0.1316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07665364444255829, + "step": 4100, + "valid_targets_mean": 4603.0, + "valid_targets_min": 845 + }, + { + "epoch": 4.645161290322581, + "grad_norm": 0.4819043939037198, + "learning_rate": 1.2300751431703523e-05, + "loss": 0.1258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06684902310371399, + "step": 4105, + "valid_targets_mean": 4399.1, + "valid_targets_min": 988 + }, + { + "epoch": 4.6508205998868135, + "grad_norm": 0.43279095261787515, + "learning_rate": 1.2248717535090196e-05, + "loss": 0.1255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0678836777806282, + "step": 4110, + "valid_targets_mean": 4746.0, + "valid_targets_min": 3773 + }, + { + "epoch": 4.656479909451047, + "grad_norm": 0.4309052036394988, + "learning_rate": 1.2196745306296522e-05, + "loss": 0.1253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.040802229195833206, + "step": 4115, + "valid_targets_mean": 3800.5, + "valid_targets_min": 884 + }, + { + "epoch": 4.66213921901528, + "grad_norm": 0.3796887914262953, + "learning_rate": 1.2144835158804282e-05, + "loss": 0.115, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04925809055566788, + "step": 4120, + "valid_targets_mean": 5000.5, + "valid_targets_min": 1085 + }, + { + "epoch": 4.667798528579513, + "grad_norm": 0.45044986237820184, + "learning_rate": 1.2092987505601346e-05, + "loss": 0.1327, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06213303655385971, + "step": 4125, + "valid_targets_mean": 3954.1, + "valid_targets_min": 705 + }, + { + "epoch": 4.673457838143746, + "grad_norm": 0.4565841775426236, + "learning_rate": 1.2041202759178381e-05, + "loss": 0.1327, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0786890834569931, + "step": 4130, + "valid_targets_mean": 4876.9, + "valid_targets_min": 2482 + }, + { + "epoch": 4.67911714770798, + "grad_norm": 0.3985805280645289, + "learning_rate": 1.198948133152559e-05, + "loss": 0.1129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05305895581841469, + "step": 4135, + "valid_targets_mean": 4218.0, + "valid_targets_min": 1003 + }, + { + "epoch": 4.684776457272212, + "grad_norm": 1.661062358770793, + "learning_rate": 1.1937823634129416e-05, + "loss": 0.1275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.061602454632520676, + "step": 4140, + "valid_targets_mean": 4663.6, + "valid_targets_min": 751 + }, + { + "epoch": 4.690435766836446, + "grad_norm": 0.42431185969915197, + "learning_rate": 1.1886230077969278e-05, + "loss": 0.1265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05881241708993912, + "step": 4145, + "valid_targets_mean": 4818.5, + "valid_targets_min": 3064 + }, + { + "epoch": 4.696095076400679, + "grad_norm": 0.4863840574293805, + "learning_rate": 1.1834701073514306e-05, + "loss": 0.1217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.056938230991363525, + "step": 4150, + "valid_targets_mean": 3475.4, + "valid_targets_min": 745 + }, + { + "epoch": 4.701754385964913, + "grad_norm": 0.40150845619285563, + "learning_rate": 1.1783237030720049e-05, + "loss": 0.1184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05364084988832474, + "step": 4155, + "valid_targets_mean": 5424.5, + "valid_targets_min": 2766 + }, + { + "epoch": 4.707413695529145, + "grad_norm": 0.4562839678904098, + "learning_rate": 1.1731838359025261e-05, + "loss": 0.1356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.061463966965675354, + "step": 4160, + "valid_targets_mean": 4468.1, + "valid_targets_min": 2057 + }, + { + "epoch": 4.713073005093379, + "grad_norm": 0.47793439887643785, + "learning_rate": 1.1680505467348584e-05, + "loss": 0.1173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0712067037820816, + "step": 4165, + "valid_targets_mean": 4023.0, + "valid_targets_min": 722 + }, + { + "epoch": 4.718732314657612, + "grad_norm": 0.4306208021351283, + "learning_rate": 1.162923876408535e-05, + "loss": 0.1172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04268105328083038, + "step": 4170, + "valid_targets_mean": 3159.6, + "valid_targets_min": 740 + }, + { + "epoch": 4.724391624221845, + "grad_norm": 0.40817220262785553, + "learning_rate": 1.1578038657104295e-05, + "loss": 0.1161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05487000569701195, + "step": 4175, + "valid_targets_mean": 4861.6, + "valid_targets_min": 918 + }, + { + "epoch": 4.730050933786078, + "grad_norm": 0.40763857024275735, + "learning_rate": 1.1526905553744337e-05, + "loss": 0.1251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08093965798616409, + "step": 4180, + "valid_targets_mean": 5678.8, + "valid_targets_min": 2247 + }, + { + "epoch": 4.7357102433503115, + "grad_norm": 0.43791203218498903, + "learning_rate": 1.1475839860811301e-05, + "loss": 0.132, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06179182976484299, + "step": 4185, + "valid_targets_mean": 5184.8, + "valid_targets_min": 741 + }, + { + "epoch": 4.741369552914544, + "grad_norm": 0.4789749298042253, + "learning_rate": 1.142484198457475e-05, + "loss": 0.1223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04083048179745674, + "step": 4190, + "valid_targets_mean": 3193.8, + "valid_targets_min": 518 + }, + { + "epoch": 4.7470288624787775, + "grad_norm": 0.3845695082189763, + "learning_rate": 1.1373912330764671e-05, + "loss": 0.1109, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05759603530168533, + "step": 4195, + "valid_targets_mean": 4392.2, + "valid_targets_min": 892 + }, + { + "epoch": 4.752688172043011, + "grad_norm": 0.44004667729218533, + "learning_rate": 1.1323051304568292e-05, + "loss": 0.1313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04937979206442833, + "step": 4200, + "valid_targets_mean": 3867.6, + "valid_targets_min": 829 + }, + { + "epoch": 4.7583474816072435, + "grad_norm": 0.4189938611519159, + "learning_rate": 1.1272259310626872e-05, + "loss": 0.1217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04238837584853172, + "step": 4205, + "valid_targets_mean": 3586.4, + "valid_targets_min": 669 + }, + { + "epoch": 4.764006791171477, + "grad_norm": 0.5508470662234496, + "learning_rate": 1.122153675303244e-05, + "loss": 0.1247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07365132868289948, + "step": 4210, + "valid_targets_mean": 4929.2, + "valid_targets_min": 989 + }, + { + "epoch": 4.76966610073571, + "grad_norm": 0.45016240420538156, + "learning_rate": 1.1170884035324607e-05, + "loss": 0.1151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06703575700521469, + "step": 4215, + "valid_targets_mean": 4916.9, + "valid_targets_min": 1006 + }, + { + "epoch": 4.775325410299944, + "grad_norm": 0.4611980260938275, + "learning_rate": 1.1120301560487365e-05, + "loss": 0.115, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06248089671134949, + "step": 4220, + "valid_targets_mean": 4061.2, + "valid_targets_min": 2135 + }, + { + "epoch": 4.780984719864176, + "grad_norm": 0.46977086186293865, + "learning_rate": 1.1069789730945849e-05, + "loss": 0.134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0966668576002121, + "step": 4225, + "valid_targets_mean": 5970.0, + "valid_targets_min": 2341 + }, + { + "epoch": 4.78664402942841, + "grad_norm": 0.43581488857534234, + "learning_rate": 1.1019348948563154e-05, + "loss": 0.1175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.053540997207164764, + "step": 4230, + "valid_targets_mean": 3687.8, + "valid_targets_min": 505 + }, + { + "epoch": 4.792303338992643, + "grad_norm": 0.4162294344905665, + "learning_rate": 1.096897961463714e-05, + "loss": 0.1246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.055918462574481964, + "step": 4235, + "valid_targets_mean": 4684.8, + "valid_targets_min": 1048 + }, + { + "epoch": 4.797962648556876, + "grad_norm": 0.45997489725486035, + "learning_rate": 1.0918682129897244e-05, + "loss": 0.1252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07810433208942413, + "step": 4240, + "valid_targets_mean": 5262.0, + "valid_targets_min": 1100 + }, + { + "epoch": 4.803621958121109, + "grad_norm": 0.41361569386578395, + "learning_rate": 1.0868456894501276e-05, + "loss": 0.1175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06643465161323547, + "step": 4245, + "valid_targets_mean": 5987.0, + "valid_targets_min": 3036 + }, + { + "epoch": 4.809281267685343, + "grad_norm": 0.4802943036306264, + "learning_rate": 1.0818304308032232e-05, + "loss": 0.1198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05267925560474396, + "step": 4250, + "valid_targets_mean": 3031.6, + "valid_targets_min": 771 + }, + { + "epoch": 4.814940577249575, + "grad_norm": 0.4570928687141176, + "learning_rate": 1.0768224769495155e-05, + "loss": 0.1202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.051592983305454254, + "step": 4255, + "valid_targets_mean": 3961.0, + "valid_targets_min": 928 + }, + { + "epoch": 4.820599886813809, + "grad_norm": 0.4468848699023753, + "learning_rate": 1.0718218677313904e-05, + "loss": 0.1117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07375261187553406, + "step": 4260, + "valid_targets_mean": 5034.0, + "valid_targets_min": 774 + }, + { + "epoch": 4.826259196378042, + "grad_norm": 0.4951714351158897, + "learning_rate": 1.0668286429328015e-05, + "loss": 0.1248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08573853969573975, + "step": 4265, + "valid_targets_mean": 4990.6, + "valid_targets_min": 809 + }, + { + "epoch": 4.831918505942275, + "grad_norm": 0.45659393010024574, + "learning_rate": 1.0618428422789555e-05, + "loss": 0.1001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05222148820757866, + "step": 4270, + "valid_targets_mean": 2994.1, + "valid_targets_min": 693 + }, + { + "epoch": 4.837577815506508, + "grad_norm": 0.4908523564326662, + "learning_rate": 1.0568645054359919e-05, + "loss": 0.1297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08589263260364532, + "step": 4275, + "valid_targets_mean": 3389.4, + "valid_targets_min": 363 + }, + { + "epoch": 4.8432371250707416, + "grad_norm": 0.47203406093602257, + "learning_rate": 1.051893672010669e-05, + "loss": 0.1299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06379195302724838, + "step": 4280, + "valid_targets_mean": 3900.6, + "valid_targets_min": 708 + }, + { + "epoch": 4.848896434634975, + "grad_norm": 0.4421238643776232, + "learning_rate": 1.0469303815500518e-05, + "loss": 0.1305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0728086531162262, + "step": 4285, + "valid_targets_mean": 3292.5, + "valid_targets_min": 892 + }, + { + "epoch": 4.8545557441992075, + "grad_norm": 0.4736363206313801, + "learning_rate": 1.0419746735411922e-05, + "loss": 0.1349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0703849345445633, + "step": 4290, + "valid_targets_mean": 4230.4, + "valid_targets_min": 758 + }, + { + "epoch": 4.860215053763441, + "grad_norm": 0.4381275272528742, + "learning_rate": 1.037026587410819e-05, + "loss": 0.1142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0795651376247406, + "step": 4295, + "valid_targets_mean": 5376.0, + "valid_targets_min": 3791 + }, + { + "epoch": 4.8658743633276735, + "grad_norm": 0.47324408485644676, + "learning_rate": 1.032086162525021e-05, + "loss": 0.1258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07254868745803833, + "step": 4300, + "valid_targets_mean": 4470.4, + "valid_targets_min": 1131 + }, + { + "epoch": 4.871533672891907, + "grad_norm": 0.42468900888066907, + "learning_rate": 1.0271534381889385e-05, + "loss": 0.1144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04841341823339462, + "step": 4305, + "valid_targets_mean": 4607.6, + "valid_targets_min": 688 + }, + { + "epoch": 4.87719298245614, + "grad_norm": 0.567638820133214, + "learning_rate": 1.0222284536464451e-05, + "loss": 0.1258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07067003846168518, + "step": 4310, + "valid_targets_mean": 4777.8, + "valid_targets_min": 1672 + }, + { + "epoch": 4.882852292020374, + "grad_norm": 0.4885743965574918, + "learning_rate": 1.0173112480798376e-05, + "loss": 0.1268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07873817533254623, + "step": 4315, + "valid_targets_mean": 3862.0, + "valid_targets_min": 1007 + }, + { + "epoch": 4.888511601584606, + "grad_norm": 0.41744390654341923, + "learning_rate": 1.0124018606095278e-05, + "loss": 0.1213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06686016172170639, + "step": 4320, + "valid_targets_mean": 5189.9, + "valid_targets_min": 1024 + }, + { + "epoch": 4.89417091114884, + "grad_norm": 0.42123305042817943, + "learning_rate": 1.0075003302937247e-05, + "loss": 0.115, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05603824555873871, + "step": 4325, + "valid_targets_mean": 4213.1, + "valid_targets_min": 503 + }, + { + "epoch": 4.899830220713073, + "grad_norm": 0.4343104937334871, + "learning_rate": 1.0026066961281282e-05, + "loss": 0.118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06654898822307587, + "step": 4330, + "valid_targets_mean": 4550.0, + "valid_targets_min": 693 + }, + { + "epoch": 4.905489530277306, + "grad_norm": 0.3985316518329403, + "learning_rate": 9.977209970456194e-06, + "loss": 0.1118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04045138508081436, + "step": 4335, + "valid_targets_mean": 3289.9, + "valid_targets_min": 795 + }, + { + "epoch": 4.911148839841539, + "grad_norm": 0.4365574775187602, + "learning_rate": 9.928432719159475e-06, + "loss": 0.1198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07033693790435791, + "step": 4340, + "valid_targets_mean": 4103.1, + "valid_targets_min": 1692 + }, + { + "epoch": 4.916808149405773, + "grad_norm": 0.43198216172381043, + "learning_rate": 9.879735595454232e-06, + "loss": 0.1198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07077064365148544, + "step": 4345, + "valid_targets_mean": 4912.0, + "valid_targets_min": 1836 + }, + { + "epoch": 4.922467458970006, + "grad_norm": 0.42687991598068287, + "learning_rate": 9.831118986766084e-06, + "loss": 0.1289, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08279694616794586, + "step": 4350, + "valid_targets_mean": 5927.6, + "valid_targets_min": 664 + }, + { + "epoch": 4.928126768534239, + "grad_norm": 0.4560619003174079, + "learning_rate": 9.782583279880096e-06, + "loss": 0.1348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07915261387825012, + "step": 4355, + "valid_targets_mean": 4922.6, + "valid_targets_min": 744 + }, + { + "epoch": 4.933786078098472, + "grad_norm": 0.45043824047787956, + "learning_rate": 9.734128860937675e-06, + "loss": 0.1282, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0838407650589943, + "step": 4360, + "valid_targets_mean": 4423.5, + "valid_targets_min": 759 + }, + { + "epoch": 4.939445387662705, + "grad_norm": 0.4341801984834592, + "learning_rate": 9.68575611543355e-06, + "loss": 0.1301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06302689760923386, + "step": 4365, + "valid_targets_mean": 3977.4, + "valid_targets_min": 947 + }, + { + "epoch": 4.945104697226938, + "grad_norm": 0.44610251910704196, + "learning_rate": 9.637465428212636e-06, + "loss": 0.1209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04932122677564621, + "step": 4370, + "valid_targets_mean": 5018.2, + "valid_targets_min": 1090 + }, + { + "epoch": 4.950764006791172, + "grad_norm": 0.394400835350503, + "learning_rate": 9.589257183467025e-06, + "loss": 0.1169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05004885792732239, + "step": 4375, + "valid_targets_mean": 4424.1, + "valid_targets_min": 2257 + }, + { + "epoch": 4.956423316355405, + "grad_norm": 0.4185408700765932, + "learning_rate": 9.541131764732896e-06, + "loss": 0.1258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04043199121952057, + "step": 4380, + "valid_targets_mean": 4084.8, + "valid_targets_min": 1099 + }, + { + "epoch": 4.962082625919638, + "grad_norm": 0.40365425647667436, + "learning_rate": 9.493089554887508e-06, + "loss": 0.114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05229596048593521, + "step": 4385, + "valid_targets_mean": 4016.4, + "valid_targets_min": 1202 + }, + { + "epoch": 4.967741935483871, + "grad_norm": 0.4973735475343671, + "learning_rate": 9.445130936146098e-06, + "loss": 0.1284, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04283924400806427, + "step": 4390, + "valid_targets_mean": 3275.9, + "valid_targets_min": 924 + }, + { + "epoch": 4.973401245048104, + "grad_norm": 0.4185346821076863, + "learning_rate": 9.397256290058869e-06, + "loss": 0.1201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05841696262359619, + "step": 4395, + "valid_targets_mean": 5108.8, + "valid_targets_min": 1865 + }, + { + "epoch": 4.979060554612337, + "grad_norm": 0.43805704153335184, + "learning_rate": 9.349465997507974e-06, + "loss": 0.1251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.061084259301424026, + "step": 4400, + "valid_targets_mean": 4876.4, + "valid_targets_min": 2438 + }, + { + "epoch": 4.98471986417657, + "grad_norm": 0.5006963864647884, + "learning_rate": 9.301760438704442e-06, + "loss": 0.1299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0983109176158905, + "step": 4405, + "valid_targets_mean": 5430.9, + "valid_targets_min": 1053 + }, + { + "epoch": 4.990379173740804, + "grad_norm": 0.4190677392043346, + "learning_rate": 9.254139993185176e-06, + "loss": 0.1231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04987921938300133, + "step": 4410, + "valid_targets_mean": 3885.6, + "valid_targets_min": 801 + }, + { + "epoch": 4.996038483305036, + "grad_norm": 0.5318687996502072, + "learning_rate": 9.206605039809955e-06, + "loss": 0.131, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09040506184101105, + "step": 4415, + "valid_targets_mean": 4945.2, + "valid_targets_min": 535 + }, + { + "epoch": 5.001131861912847, + "grad_norm": 0.4528607819102647, + "learning_rate": 9.159155956758375e-06, + "loss": 0.1316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0732337012887001, + "step": 4420, + "valid_targets_mean": 5129.4, + "valid_targets_min": 1044 + }, + { + "epoch": 5.006791171477079, + "grad_norm": 0.42699698804032793, + "learning_rate": 9.111793121526862e-06, + "loss": 0.1136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05684370547533035, + "step": 4425, + "valid_targets_mean": 4331.4, + "valid_targets_min": 1039 + }, + { + "epoch": 5.012450481041313, + "grad_norm": 0.4435087454984197, + "learning_rate": 9.064516910925698e-06, + "loss": 0.1151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06401576846837997, + "step": 4430, + "valid_targets_mean": 4907.1, + "valid_targets_min": 543 + }, + { + "epoch": 5.018109790605546, + "grad_norm": 0.5121451329432529, + "learning_rate": 9.017327701075965e-06, + "loss": 0.119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0593126118183136, + "step": 4435, + "valid_targets_mean": 4623.6, + "valid_targets_min": 914 + }, + { + "epoch": 5.023769100169779, + "grad_norm": 0.4800213696293887, + "learning_rate": 8.970225867406593e-06, + "loss": 0.1158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.059211261570453644, + "step": 4440, + "valid_targets_mean": 3641.9, + "valid_targets_min": 618 + }, + { + "epoch": 5.029428409734012, + "grad_norm": 0.3984191230190009, + "learning_rate": 8.923211784651356e-06, + "loss": 0.1129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.059294335544109344, + "step": 4445, + "valid_targets_mean": 5705.8, + "valid_targets_min": 2908 + }, + { + "epoch": 5.035087719298246, + "grad_norm": 0.42967376735431795, + "learning_rate": 8.876285826845918e-06, + "loss": 0.1135, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.054898668080568314, + "step": 4450, + "valid_targets_mean": 3892.6, + "valid_targets_min": 1205 + }, + { + "epoch": 5.040747028862479, + "grad_norm": 0.5426948319199142, + "learning_rate": 8.82944836732482e-06, + "loss": 0.1151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06854952871799469, + "step": 4455, + "valid_targets_mean": 4519.4, + "valid_targets_min": 871 + }, + { + "epoch": 5.046406338426712, + "grad_norm": 0.5187052286936951, + "learning_rate": 8.782699778718516e-06, + "loss": 0.1124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04849359765648842, + "step": 4460, + "valid_targets_mean": 3538.1, + "valid_targets_min": 933 + }, + { + "epoch": 5.052065647990945, + "grad_norm": 0.47390218136667217, + "learning_rate": 8.736040432950447e-06, + "loss": 0.1162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0499311126768589, + "step": 4465, + "valid_targets_mean": 3413.4, + "valid_targets_min": 560 + }, + { + "epoch": 5.057724957555179, + "grad_norm": 0.4412563172327523, + "learning_rate": 8.689470701234037e-06, + "loss": 0.1166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06588006019592285, + "step": 4470, + "valid_targets_mean": 3801.4, + "valid_targets_min": 920 + }, + { + "epoch": 5.063384267119411, + "grad_norm": 0.49857993416868396, + "learning_rate": 8.642990954069747e-06, + "loss": 0.1263, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04040566459298134, + "step": 4475, + "valid_targets_mean": 2885.4, + "valid_targets_min": 722 + }, + { + "epoch": 5.069043576683645, + "grad_norm": 0.4355303335910807, + "learning_rate": 8.596601561242167e-06, + "loss": 0.1148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03819742426276207, + "step": 4480, + "valid_targets_mean": 3716.5, + "valid_targets_min": 788 + }, + { + "epoch": 5.074702886247878, + "grad_norm": 0.4609910705524989, + "learning_rate": 8.550302891817015e-06, + "loss": 0.1037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0630742684006691, + "step": 4485, + "valid_targets_mean": 4683.2, + "valid_targets_min": 1187 + }, + { + "epoch": 5.080362195812111, + "grad_norm": 0.4271139859806239, + "learning_rate": 8.50409531413824e-06, + "loss": 0.1088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.041177332401275635, + "step": 4490, + "valid_targets_mean": 3650.6, + "valid_targets_min": 1008 + }, + { + "epoch": 5.086021505376344, + "grad_norm": 0.4607107301096912, + "learning_rate": 8.457979195825076e-06, + "loss": 0.1125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07016408443450928, + "step": 4495, + "valid_targets_mean": 5260.2, + "valid_targets_min": 2245 + }, + { + "epoch": 5.0916808149405774, + "grad_norm": 0.4636231778241085, + "learning_rate": 8.411954903769145e-06, + "loss": 0.1309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13878324627876282, + "step": 4500, + "valid_targets_mean": 4990.8, + "valid_targets_min": 3416 + }, + { + "epoch": 5.09734012450481, + "grad_norm": 0.44400966555741506, + "learning_rate": 8.366022804131487e-06, + "loss": 0.1095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07617906481027603, + "step": 4505, + "valid_targets_mean": 5265.5, + "valid_targets_min": 1198 + }, + { + "epoch": 5.102999434069043, + "grad_norm": 0.483387086407247, + "learning_rate": 8.320183262339686e-06, + "loss": 0.1159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05806516855955124, + "step": 4510, + "valid_targets_mean": 3897.8, + "valid_targets_min": 714 + }, + { + "epoch": 5.108658743633277, + "grad_norm": 0.46553157089358177, + "learning_rate": 8.274436643084962e-06, + "loss": 0.11, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06065685302019119, + "step": 4515, + "valid_targets_mean": 5166.6, + "valid_targets_min": 2804 + }, + { + "epoch": 5.11431805319751, + "grad_norm": 0.4160856336775538, + "learning_rate": 8.228783310319248e-06, + "loss": 0.1069, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06831451505422592, + "step": 4520, + "valid_targets_mean": 6378.8, + "valid_targets_min": 4287 + }, + { + "epoch": 5.119977362761743, + "grad_norm": 0.4820650355022886, + "learning_rate": 8.183223627252305e-06, + "loss": 0.1148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05689562112092972, + "step": 4525, + "valid_targets_mean": 3017.4, + "valid_targets_min": 723 + }, + { + "epoch": 5.125636672325976, + "grad_norm": 0.47682759957685666, + "learning_rate": 8.137757956348854e-06, + "loss": 0.1209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07163941860198975, + "step": 4530, + "valid_targets_mean": 5357.2, + "valid_targets_min": 3064 + }, + { + "epoch": 5.13129598189021, + "grad_norm": 0.4206385940263568, + "learning_rate": 8.092386659325644e-06, + "loss": 0.1113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04269436001777649, + "step": 4535, + "valid_targets_mean": 3616.1, + "valid_targets_min": 727 + }, + { + "epoch": 5.136955291454442, + "grad_norm": 0.44294939448105297, + "learning_rate": 8.047110097148618e-06, + "loss": 0.1128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05752409249544144, + "step": 4540, + "valid_targets_mean": 4481.1, + "valid_targets_min": 602 + }, + { + "epoch": 5.142614601018676, + "grad_norm": 0.38862156391602043, + "learning_rate": 8.001928630030017e-06, + "loss": 0.1137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04841848462820053, + "step": 4545, + "valid_targets_mean": 4809.2, + "valid_targets_min": 3508 + }, + { + "epoch": 5.148273910582909, + "grad_norm": 0.4722416158730644, + "learning_rate": 7.95684261742554e-06, + "loss": 0.1113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06221921369433403, + "step": 4550, + "valid_targets_mean": 3428.8, + "valid_targets_min": 550 + }, + { + "epoch": 5.153933220147142, + "grad_norm": 0.4081368764988746, + "learning_rate": 7.911852418031449e-06, + "loss": 0.1159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.025238852947950363, + "step": 4555, + "valid_targets_mean": 3402.9, + "valid_targets_min": 1273 + }, + { + "epoch": 5.159592529711375, + "grad_norm": 0.43068045632588475, + "learning_rate": 7.866958389781736e-06, + "loss": 0.1189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0609264150261879, + "step": 4560, + "valid_targets_mean": 5405.9, + "valid_targets_min": 794 + }, + { + "epoch": 5.165251839275609, + "grad_norm": 0.42375232461163775, + "learning_rate": 7.822160889845286e-06, + "loss": 0.1058, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.059470728039741516, + "step": 4565, + "valid_targets_mean": 5386.9, + "valid_targets_min": 2623 + }, + { + "epoch": 5.170911148839841, + "grad_norm": 0.4731415759950078, + "learning_rate": 7.777460274623005e-06, + "loss": 0.1062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04088635742664337, + "step": 4570, + "valid_targets_mean": 3525.8, + "valid_targets_min": 1122 + }, + { + "epoch": 5.176570458404075, + "grad_norm": 0.4658340464765865, + "learning_rate": 7.732856899745003e-06, + "loss": 0.11, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06058766692876816, + "step": 4575, + "valid_targets_mean": 4153.6, + "valid_targets_min": 1639 + }, + { + "epoch": 5.182229767968308, + "grad_norm": 0.48641602942080336, + "learning_rate": 7.688351120067781e-06, + "loss": 0.1107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.059576116502285004, + "step": 4580, + "valid_targets_mean": 4019.6, + "valid_targets_min": 1187 + }, + { + "epoch": 5.187889077532541, + "grad_norm": 0.4141899435401402, + "learning_rate": 7.643943289671374e-06, + "loss": 0.116, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05612596869468689, + "step": 4585, + "valid_targets_mean": 4486.8, + "valid_targets_min": 1343 + }, + { + "epoch": 5.193548387096774, + "grad_norm": 0.4833356483421522, + "learning_rate": 7.599633761856542e-06, + "loss": 0.1213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05341402813792229, + "step": 4590, + "valid_targets_mean": 4311.9, + "valid_targets_min": 1823 + }, + { + "epoch": 5.1992076966610075, + "grad_norm": 0.400979719386063, + "learning_rate": 7.555422889141997e-06, + "loss": 0.1074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04168704152107239, + "step": 4595, + "valid_targets_mean": 5222.9, + "valid_targets_min": 3136 + }, + { + "epoch": 5.204867006225241, + "grad_norm": 0.47724040694103653, + "learning_rate": 7.51131102326154e-06, + "loss": 0.1114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04749172925949097, + "step": 4600, + "valid_targets_mean": 3638.0, + "valid_targets_min": 677 + }, + { + "epoch": 5.2105263157894735, + "grad_norm": 0.4724346030973468, + "learning_rate": 7.467298515161305e-06, + "loss": 0.1226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06519964337348938, + "step": 4605, + "valid_targets_mean": 3845.0, + "valid_targets_min": 560 + }, + { + "epoch": 5.216185625353707, + "grad_norm": 0.4647841490683107, + "learning_rate": 7.423385714996942e-06, + "loss": 0.1178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07073988765478134, + "step": 4610, + "valid_targets_mean": 4993.6, + "valid_targets_min": 2789 + }, + { + "epoch": 5.22184493491794, + "grad_norm": 0.4194741538318132, + "learning_rate": 7.379572972130864e-06, + "loss": 0.1072, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.048078231513500214, + "step": 4615, + "valid_targets_mean": 4425.1, + "valid_targets_min": 699 + }, + { + "epoch": 5.227504244482173, + "grad_norm": 0.5458492533222778, + "learning_rate": 7.335860635129426e-06, + "loss": 0.1166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04463748633861542, + "step": 4620, + "valid_targets_mean": 2202.0, + "valid_targets_min": 803 + }, + { + "epoch": 5.233163554046406, + "grad_norm": 0.4785282212012468, + "learning_rate": 7.292249051760169e-06, + "loss": 0.1142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.058266982436180115, + "step": 4625, + "valid_targets_mean": 3895.4, + "valid_targets_min": 1547 + }, + { + "epoch": 5.23882286361064, + "grad_norm": 0.44215864222287077, + "learning_rate": 7.2487385689890845e-06, + "loss": 0.1094, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06722200661897659, + "step": 4630, + "valid_targets_mean": 5496.4, + "valid_targets_min": 2894 + }, + { + "epoch": 5.244482173174872, + "grad_norm": 0.4675345901347288, + "learning_rate": 7.205329532977794e-06, + "loss": 0.1213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07377038151025772, + "step": 4635, + "valid_targets_mean": 4594.9, + "valid_targets_min": 946 + }, + { + "epoch": 5.250141482739106, + "grad_norm": 0.42359842437058504, + "learning_rate": 7.162022289080837e-06, + "loss": 0.1154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05165419355034828, + "step": 4640, + "valid_targets_mean": 4212.8, + "valid_targets_min": 1177 + }, + { + "epoch": 5.255800792303339, + "grad_norm": 0.4091892397929575, + "learning_rate": 7.1188171818429256e-06, + "loss": 0.117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05602630600333214, + "step": 4645, + "valid_targets_mean": 4934.8, + "valid_targets_min": 869 + }, + { + "epoch": 5.261460101867572, + "grad_norm": 0.4910364312700327, + "learning_rate": 7.075714554996176e-06, + "loss": 0.1103, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.061264265328645706, + "step": 4650, + "valid_targets_mean": 3971.9, + "valid_targets_min": 676 + }, + { + "epoch": 5.267119411431805, + "grad_norm": 0.4497736788074925, + "learning_rate": 7.032714751457395e-06, + "loss": 0.1177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.038551460951566696, + "step": 4655, + "valid_targets_mean": 3472.0, + "valid_targets_min": 1003 + }, + { + "epoch": 5.272778720996039, + "grad_norm": 0.48542333459363973, + "learning_rate": 6.989818113325333e-06, + "loss": 0.112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05707547068595886, + "step": 4660, + "valid_targets_mean": 5350.0, + "valid_targets_min": 574 + }, + { + "epoch": 5.278438030560272, + "grad_norm": 0.4181217216286458, + "learning_rate": 6.947024981878001e-06, + "loss": 0.1056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05142673850059509, + "step": 4665, + "valid_targets_mean": 3972.5, + "valid_targets_min": 609 + }, + { + "epoch": 5.284097340124505, + "grad_norm": 0.4591739189576479, + "learning_rate": 6.904335697569909e-06, + "loss": 0.1019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05810660496354103, + "step": 4670, + "valid_targets_mean": 4062.6, + "valid_targets_min": 1179 + }, + { + "epoch": 5.289756649688738, + "grad_norm": 0.4112516471468084, + "learning_rate": 6.861750600029373e-06, + "loss": 0.1088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03796304762363434, + "step": 4675, + "valid_targets_mean": 4000.1, + "valid_targets_min": 534 + }, + { + "epoch": 5.2954159592529715, + "grad_norm": 0.4771222854878113, + "learning_rate": 6.819270028055844e-06, + "loss": 0.115, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06374678760766983, + "step": 4680, + "valid_targets_mean": 5190.5, + "valid_targets_min": 1002 + }, + { + "epoch": 5.301075268817204, + "grad_norm": 0.4442978921601879, + "learning_rate": 6.776894319617162e-06, + "loss": 0.1134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.035999611020088196, + "step": 4685, + "valid_targets_mean": 3792.2, + "valid_targets_min": 1293 + }, + { + "epoch": 5.3067345783814375, + "grad_norm": 0.4621250259768539, + "learning_rate": 6.73462381184689e-06, + "loss": 0.1171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06715947389602661, + "step": 4690, + "valid_targets_mean": 4781.9, + "valid_targets_min": 2488 + }, + { + "epoch": 5.312393887945671, + "grad_norm": 0.5070114160540078, + "learning_rate": 6.6924588410416604e-06, + "loss": 0.1225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.059045299887657166, + "step": 4695, + "valid_targets_mean": 4725.8, + "valid_targets_min": 1304 + }, + { + "epoch": 5.3180531975099035, + "grad_norm": 0.4238752705210963, + "learning_rate": 6.650399742658442e-06, + "loss": 0.1125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05651441216468811, + "step": 4700, + "valid_targets_mean": 5001.2, + "valid_targets_min": 3564 + }, + { + "epoch": 5.323712507074137, + "grad_norm": 0.4774463678169511, + "learning_rate": 6.608446851311918e-06, + "loss": 0.1185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06929890811443329, + "step": 4705, + "valid_targets_mean": 4300.0, + "valid_targets_min": 2555 + }, + { + "epoch": 5.32937181663837, + "grad_norm": 0.5680882738535958, + "learning_rate": 6.566600500771796e-06, + "loss": 0.1112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07619094848632812, + "step": 4710, + "valid_targets_mean": 6348.2, + "valid_targets_min": 2788 + }, + { + "epoch": 5.335031126202603, + "grad_norm": 0.49252745074451426, + "learning_rate": 6.524861023960185e-06, + "loss": 0.1083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04859968274831772, + "step": 4715, + "valid_targets_mean": 3455.2, + "valid_targets_min": 182 + }, + { + "epoch": 5.340690435766836, + "grad_norm": 0.41273249352651414, + "learning_rate": 6.483228752948902e-06, + "loss": 0.1057, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04569438099861145, + "step": 4720, + "valid_targets_mean": 3131.8, + "valid_targets_min": 740 + }, + { + "epoch": 5.34634974533107, + "grad_norm": 0.40554406233825924, + "learning_rate": 6.441704018956858e-06, + "loss": 0.1044, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04893747344613075, + "step": 4725, + "valid_targets_mean": 5415.4, + "valid_targets_min": 3609 + }, + { + "epoch": 5.352009054895303, + "grad_norm": 0.46455985365628116, + "learning_rate": 6.400287152347442e-06, + "loss": 0.1183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06593328714370728, + "step": 4730, + "valid_targets_mean": 4598.9, + "valid_targets_min": 1070 + }, + { + "epoch": 5.357668364459536, + "grad_norm": 0.5312658470935879, + "learning_rate": 6.35897848262584e-06, + "loss": 0.1142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05592180788516998, + "step": 4735, + "valid_targets_mean": 4188.5, + "valid_targets_min": 693 + }, + { + "epoch": 5.363327674023769, + "grad_norm": 0.43259100262014355, + "learning_rate": 6.317778338436449e-06, + "loss": 0.1167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04694347083568573, + "step": 4740, + "valid_targets_mean": 4166.8, + "valid_targets_min": 1766 + }, + { + "epoch": 5.368986983588003, + "grad_norm": 0.4314196701999109, + "learning_rate": 6.276687047560275e-06, + "loss": 0.1118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.055805183947086334, + "step": 4745, + "valid_targets_mean": 5157.2, + "valid_targets_min": 854 + }, + { + "epoch": 5.374646293152235, + "grad_norm": 0.4246118919493089, + "learning_rate": 6.235704936912288e-06, + "loss": 0.1163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06027965992689133, + "step": 4750, + "valid_targets_mean": 5311.4, + "valid_targets_min": 3550 + }, + { + "epoch": 5.380305602716469, + "grad_norm": 0.4954836399169219, + "learning_rate": 6.194832332538838e-06, + "loss": 0.1189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0475262776017189, + "step": 4755, + "valid_targets_mean": 3495.5, + "valid_targets_min": 636 + }, + { + "epoch": 5.385964912280702, + "grad_norm": 0.4801507285061669, + "learning_rate": 6.154069559615081e-06, + "loss": 0.1197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.061720751225948334, + "step": 4760, + "valid_targets_mean": 4356.9, + "valid_targets_min": 1377 + }, + { + "epoch": 5.391624221844935, + "grad_norm": 0.43527250436364373, + "learning_rate": 6.113416942442358e-06, + "loss": 0.111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0542495995759964, + "step": 4765, + "valid_targets_mean": 3909.4, + "valid_targets_min": 738 + }, + { + "epoch": 5.397283531409168, + "grad_norm": 0.4870896779995338, + "learning_rate": 6.072874804445632e-06, + "loss": 0.1121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04657881706953049, + "step": 4770, + "valid_targets_mean": 3526.8, + "valid_targets_min": 1253 + }, + { + "epoch": 5.4029428409734015, + "grad_norm": 0.4287275371079559, + "learning_rate": 6.032443468170912e-06, + "loss": 0.1114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03314714506268501, + "step": 4775, + "valid_targets_mean": 3698.0, + "valid_targets_min": 1012 + }, + { + "epoch": 5.408602150537634, + "grad_norm": 0.49199243356450895, + "learning_rate": 5.992123255282702e-06, + "loss": 0.1145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05596199631690979, + "step": 4780, + "valid_targets_mean": 3990.4, + "valid_targets_min": 1198 + }, + { + "epoch": 5.4142614601018675, + "grad_norm": 0.41169603973479024, + "learning_rate": 5.951914486561417e-06, + "loss": 0.1142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04324422776699066, + "step": 4785, + "valid_targets_mean": 3722.4, + "valid_targets_min": 1706 + }, + { + "epoch": 5.419920769666101, + "grad_norm": 0.483954415115045, + "learning_rate": 5.911817481900832e-06, + "loss": 0.1181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0564533956348896, + "step": 4790, + "valid_targets_mean": 4298.2, + "valid_targets_min": 622 + }, + { + "epoch": 5.425580079230334, + "grad_norm": 0.4433797987576531, + "learning_rate": 5.871832560305573e-06, + "loss": 0.117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05524902790784836, + "step": 4795, + "valid_targets_mean": 4600.6, + "valid_targets_min": 747 + }, + { + "epoch": 5.431239388794567, + "grad_norm": 0.3927923995206467, + "learning_rate": 5.831960039888533e-06, + "loss": 0.1182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0440894290804863, + "step": 4800, + "valid_targets_mean": 4589.6, + "valid_targets_min": 1607 + }, + { + "epoch": 5.4368986983588, + "grad_norm": 0.5112943526590745, + "learning_rate": 5.792200237868361e-06, + "loss": 0.1107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05474686995148659, + "step": 4805, + "valid_targets_mean": 3607.2, + "valid_targets_min": 611 + }, + { + "epoch": 5.442558007923034, + "grad_norm": 0.47512202575606816, + "learning_rate": 5.752553470566957e-06, + "loss": 0.1166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.061185047030448914, + "step": 4810, + "valid_targets_mean": 3820.2, + "valid_targets_min": 645 + }, + { + "epoch": 5.448217317487266, + "grad_norm": 0.4612513138120705, + "learning_rate": 5.713020053406917e-06, + "loss": 0.1152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06442756950855255, + "step": 4815, + "valid_targets_mean": 4651.8, + "valid_targets_min": 2500 + }, + { + "epoch": 5.4538766270515, + "grad_norm": 0.4493868647834442, + "learning_rate": 5.673600300909053e-06, + "loss": 0.1034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06132463365793228, + "step": 4820, + "valid_targets_mean": 4371.2, + "valid_targets_min": 894 + }, + { + "epoch": 5.459535936615733, + "grad_norm": 0.47507472687074187, + "learning_rate": 5.634294526689872e-06, + "loss": 0.1113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04925510659813881, + "step": 4825, + "valid_targets_mean": 3630.4, + "valid_targets_min": 516 + }, + { + "epoch": 5.465195246179966, + "grad_norm": 0.4655120344030465, + "learning_rate": 5.595103043459109e-06, + "loss": 0.1138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06488795578479767, + "step": 4830, + "valid_targets_mean": 4173.2, + "valid_targets_min": 2491 + }, + { + "epoch": 5.470854555744199, + "grad_norm": 0.4233502014071893, + "learning_rate": 5.556026163017205e-06, + "loss": 0.1127, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04904253035783768, + "step": 4835, + "valid_targets_mean": 3854.4, + "valid_targets_min": 1823 + }, + { + "epoch": 5.476513865308433, + "grad_norm": 0.39951537221666045, + "learning_rate": 5.517064196252837e-06, + "loss": 0.1133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06015561893582344, + "step": 4840, + "valid_targets_mean": 5693.1, + "valid_targets_min": 3433 + }, + { + "epoch": 5.482173174872665, + "grad_norm": 0.45689121320144144, + "learning_rate": 5.478217453140471e-06, + "loss": 0.1128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.059489406645298004, + "step": 4845, + "valid_targets_mean": 2950.0, + "valid_targets_min": 674 + }, + { + "epoch": 5.487832484436899, + "grad_norm": 0.4445797611841312, + "learning_rate": 5.439486242737855e-06, + "loss": 0.1241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.058133259415626526, + "step": 4850, + "valid_targets_mean": 5115.2, + "valid_targets_min": 1350 + }, + { + "epoch": 5.493491794001132, + "grad_norm": 0.4485010317645793, + "learning_rate": 5.400870873183583e-06, + "loss": 0.1077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06925121694803238, + "step": 4855, + "valid_targets_mean": 5346.2, + "valid_targets_min": 4035 + }, + { + "epoch": 5.499151103565365, + "grad_norm": 0.4249641457776462, + "learning_rate": 5.362371651694647e-06, + "loss": 0.1162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06470344960689545, + "step": 4860, + "valid_targets_mean": 4740.0, + "valid_targets_min": 1621 + }, + { + "epoch": 5.504810413129598, + "grad_norm": 0.48322082306365505, + "learning_rate": 5.323988884563975e-06, + "loss": 0.113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05999629199504852, + "step": 4865, + "valid_targets_mean": 4047.2, + "valid_targets_min": 732 + }, + { + "epoch": 5.5104697226938315, + "grad_norm": 0.4532805789074051, + "learning_rate": 5.2857228771580105e-06, + "loss": 0.1184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04834761470556259, + "step": 4870, + "valid_targets_mean": 3662.0, + "valid_targets_min": 885 + }, + { + "epoch": 5.516129032258064, + "grad_norm": 0.4564206254663075, + "learning_rate": 5.247573933914285e-06, + "loss": 0.1111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05620426684617996, + "step": 4875, + "valid_targets_mean": 4354.1, + "valid_targets_min": 2166 + }, + { + "epoch": 5.5217883418222975, + "grad_norm": 0.44818208238967583, + "learning_rate": 5.20954235833898e-06, + "loss": 0.1151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.043452322483062744, + "step": 4880, + "valid_targets_mean": 3705.6, + "valid_targets_min": 1075 + }, + { + "epoch": 5.527447651386531, + "grad_norm": 0.44997386473366485, + "learning_rate": 5.171628453004512e-06, + "loss": 0.1208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09598531574010849, + "step": 4885, + "valid_targets_mean": 3403.9, + "valid_targets_min": 809 + }, + { + "epoch": 5.533106960950764, + "grad_norm": 0.44746169408548236, + "learning_rate": 5.133832519547155e-06, + "loss": 0.1132, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04644259065389633, + "step": 4890, + "valid_targets_mean": 4080.4, + "valid_targets_min": 588 + }, + { + "epoch": 5.538766270514997, + "grad_norm": 0.499565422968923, + "learning_rate": 5.096154858664608e-06, + "loss": 0.1235, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06844837218523026, + "step": 4895, + "valid_targets_mean": 3538.4, + "valid_targets_min": 853 + }, + { + "epoch": 5.54442558007923, + "grad_norm": 0.628250261408482, + "learning_rate": 5.058595770113606e-06, + "loss": 0.1155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05735373869538307, + "step": 4900, + "valid_targets_mean": 4304.8, + "valid_targets_min": 1210 + }, + { + "epoch": 5.550084889643464, + "grad_norm": 0.4974820141208018, + "learning_rate": 5.0211555527075664e-06, + "loss": 0.1155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06533755362033844, + "step": 4905, + "valid_targets_mean": 4365.8, + "valid_targets_min": 1343 + }, + { + "epoch": 5.555744199207696, + "grad_norm": 0.4565863011605837, + "learning_rate": 4.9838345043141665e-06, + "loss": 0.1117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05046406388282776, + "step": 4910, + "valid_targets_mean": 5415.5, + "valid_targets_min": 2696 + }, + { + "epoch": 5.56140350877193, + "grad_norm": 0.5031633010739923, + "learning_rate": 4.946632921853009e-06, + "loss": 0.1145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.051251720637083054, + "step": 4915, + "valid_targets_mean": 3984.5, + "valid_targets_min": 1240 + }, + { + "epoch": 5.567062818336163, + "grad_norm": 0.4556469164235674, + "learning_rate": 4.909551101293238e-06, + "loss": 0.1168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.055873870849609375, + "step": 4920, + "valid_targets_mean": 4884.1, + "valid_targets_min": 1178 + }, + { + "epoch": 5.572722127900396, + "grad_norm": 0.44669372809235275, + "learning_rate": 4.872589337651208e-06, + "loss": 0.1124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06097705289721489, + "step": 4925, + "valid_targets_mean": 4559.6, + "valid_targets_min": 1313 + }, + { + "epoch": 5.578381437464629, + "grad_norm": 0.4873847875198912, + "learning_rate": 4.835747924988105e-06, + "loss": 0.1164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.046152450144290924, + "step": 4930, + "valid_targets_mean": 3567.5, + "valid_targets_min": 568 + }, + { + "epoch": 5.584040747028863, + "grad_norm": 0.46773124507327324, + "learning_rate": 4.799027156407632e-06, + "loss": 0.1187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0446518138051033, + "step": 4935, + "valid_targets_mean": 3866.4, + "valid_targets_min": 559 + }, + { + "epoch": 5.589700056593095, + "grad_norm": 0.45662980710750634, + "learning_rate": 4.7624273240536756e-06, + "loss": 0.1188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04517771303653717, + "step": 4940, + "valid_targets_mean": 3876.1, + "valid_targets_min": 531 + }, + { + "epoch": 5.595359366157329, + "grad_norm": 0.4630613906653166, + "learning_rate": 4.725948719107965e-06, + "loss": 0.1421, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06445273756980896, + "step": 4945, + "valid_targets_mean": 4911.2, + "valid_targets_min": 1166 + }, + { + "epoch": 5.601018675721562, + "grad_norm": 0.4881385991891543, + "learning_rate": 4.6895916317877624e-06, + "loss": 0.1139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06460549682378769, + "step": 4950, + "valid_targets_mean": 4823.2, + "valid_targets_min": 703 + }, + { + "epoch": 5.6066779852857955, + "grad_norm": 0.4615150611476631, + "learning_rate": 4.653356351343577e-06, + "loss": 0.1193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06274469941854477, + "step": 4955, + "valid_targets_mean": 4550.4, + "valid_targets_min": 756 + }, + { + "epoch": 5.612337294850028, + "grad_norm": 0.4449286920469187, + "learning_rate": 4.617243166056826e-06, + "loss": 0.1185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.054899074137210846, + "step": 4960, + "valid_targets_mean": 4739.8, + "valid_targets_min": 539 + }, + { + "epoch": 5.6179966044142615, + "grad_norm": 0.452981508619, + "learning_rate": 4.581252363237567e-06, + "loss": 0.1083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04050099477171898, + "step": 4965, + "valid_targets_mean": 3835.1, + "valid_targets_min": 1026 + }, + { + "epoch": 5.623655913978495, + "grad_norm": 0.4295852490868988, + "learning_rate": 4.545384229222196e-06, + "loss": 0.1207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05147762969136238, + "step": 4970, + "valid_targets_mean": 4653.4, + "valid_targets_min": 926 + }, + { + "epoch": 5.6293152235427275, + "grad_norm": 0.47942322605684273, + "learning_rate": 4.509639049371193e-06, + "loss": 0.1125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05537065863609314, + "step": 4975, + "valid_targets_mean": 3988.5, + "valid_targets_min": 494 + }, + { + "epoch": 5.634974533106961, + "grad_norm": 0.43820013940786146, + "learning_rate": 4.474017108066828e-06, + "loss": 0.1083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.040090110152959824, + "step": 4980, + "valid_targets_mean": 3757.9, + "valid_targets_min": 515 + }, + { + "epoch": 5.640633842671194, + "grad_norm": 0.5697971473634869, + "learning_rate": 4.438518688710898e-06, + "loss": 0.119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06687842309474945, + "step": 4985, + "valid_targets_mean": 4018.4, + "valid_targets_min": 807 + }, + { + "epoch": 5.646293152235427, + "grad_norm": 0.465558945978941, + "learning_rate": 4.403144073722501e-06, + "loss": 0.1255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04563640058040619, + "step": 4990, + "valid_targets_mean": 5189.0, + "valid_targets_min": 1358 + }, + { + "epoch": 5.65195246179966, + "grad_norm": 0.4656993178706541, + "learning_rate": 4.367893544535757e-06, + "loss": 0.1062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07473142445087433, + "step": 4995, + "valid_targets_mean": 5106.9, + "valid_targets_min": 2490 + }, + { + "epoch": 5.657611771363894, + "grad_norm": 0.5437147255715685, + "learning_rate": 4.332767381597575e-06, + "loss": 0.1197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04992275685071945, + "step": 5000, + "valid_targets_mean": 3940.1, + "valid_targets_min": 1035 + }, + { + "epoch": 5.663271080928126, + "grad_norm": 0.47478101647037685, + "learning_rate": 4.297765864365453e-06, + "loss": 0.1228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07100368291139603, + "step": 5005, + "valid_targets_mean": 4980.6, + "valid_targets_min": 596 + }, + { + "epoch": 5.66893039049236, + "grad_norm": 0.4796323209801453, + "learning_rate": 4.262889271305204e-06, + "loss": 0.1133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09020929038524628, + "step": 5010, + "valid_targets_mean": 5051.9, + "valid_targets_min": 703 + }, + { + "epoch": 5.674589700056593, + "grad_norm": 0.4828197486896997, + "learning_rate": 4.228137879888774e-06, + "loss": 0.1133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06680593639612198, + "step": 5015, + "valid_targets_mean": 4137.0, + "valid_targets_min": 594 + }, + { + "epoch": 5.680249009620827, + "grad_norm": 0.43954012370449086, + "learning_rate": 4.193511966592041e-06, + "loss": 0.1024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.044026486575603485, + "step": 5020, + "valid_targets_mean": 3684.9, + "valid_targets_min": 1136 + }, + { + "epoch": 5.685908319185059, + "grad_norm": 0.4090054480184231, + "learning_rate": 4.1590118068925815e-06, + "loss": 0.108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.049925319850444794, + "step": 5025, + "valid_targets_mean": 5016.1, + "valid_targets_min": 1495 + }, + { + "epoch": 5.691567628749293, + "grad_norm": 0.4818274752746228, + "learning_rate": 4.124637675267511e-06, + "loss": 0.1144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07429710030555725, + "step": 5030, + "valid_targets_mean": 5243.8, + "valid_targets_min": 2709 + }, + { + "epoch": 5.697226938313526, + "grad_norm": 0.4427542460373225, + "learning_rate": 4.090389845191278e-06, + "loss": 0.1097, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03946341201663017, + "step": 5035, + "valid_targets_mean": 4483.5, + "valid_targets_min": 2967 + }, + { + "epoch": 5.702886247877759, + "grad_norm": 0.4931416671848181, + "learning_rate": 4.056268589133516e-06, + "loss": 0.1134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06443046778440475, + "step": 5040, + "valid_targets_mean": 3958.8, + "valid_targets_min": 501 + }, + { + "epoch": 5.708545557441992, + "grad_norm": 0.4590515107879056, + "learning_rate": 4.022274178556844e-06, + "loss": 0.1091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06537705659866333, + "step": 5045, + "valid_targets_mean": 4428.6, + "valid_targets_min": 529 + }, + { + "epoch": 5.7142048670062255, + "grad_norm": 0.45371902819997745, + "learning_rate": 3.988406883914717e-06, + "loss": 0.1042, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07032101601362228, + "step": 5050, + "valid_targets_mean": 5996.9, + "valid_targets_min": 3785 + }, + { + "epoch": 5.719864176570458, + "grad_norm": 0.517412047216284, + "learning_rate": 3.954666974649295e-06, + "loss": 0.1195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.059205036610364914, + "step": 5055, + "valid_targets_mean": 4341.6, + "valid_targets_min": 738 + }, + { + "epoch": 5.7255234861346915, + "grad_norm": 0.4659447914535021, + "learning_rate": 3.921054719189272e-06, + "loss": 0.1119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05640418827533722, + "step": 5060, + "valid_targets_mean": 4091.0, + "valid_targets_min": 776 + }, + { + "epoch": 5.731182795698925, + "grad_norm": 0.4702646981560427, + "learning_rate": 3.887570384947745e-06, + "loss": 0.1165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0586191788315773, + "step": 5065, + "valid_targets_mean": 4374.8, + "valid_targets_min": 1178 + }, + { + "epoch": 5.7368421052631575, + "grad_norm": 0.4641538440273486, + "learning_rate": 3.854214238320109e-06, + "loss": 0.1167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05729852616786957, + "step": 5070, + "valid_targets_mean": 5137.5, + "valid_targets_min": 2615 + }, + { + "epoch": 5.742501414827391, + "grad_norm": 0.5008449253918498, + "learning_rate": 3.8209865446819105e-06, + "loss": 0.1137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.048976555466651917, + "step": 5075, + "valid_targets_mean": 2745.1, + "valid_targets_min": 866 + }, + { + "epoch": 5.748160724391624, + "grad_norm": 0.4384418060964319, + "learning_rate": 3.7878875683867476e-06, + "loss": 0.1138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0595720037817955, + "step": 5080, + "valid_targets_mean": 4913.1, + "valid_targets_min": 729 + }, + { + "epoch": 5.753820033955858, + "grad_norm": 0.4445496072268408, + "learning_rate": 3.7549175727641606e-06, + "loss": 0.1142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05522464960813522, + "step": 5085, + "valid_targets_mean": 4570.8, + "valid_targets_min": 1180 + }, + { + "epoch": 5.75947934352009, + "grad_norm": 0.47651723503865073, + "learning_rate": 3.7220768201175615e-06, + "loss": 0.1161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05467373877763748, + "step": 5090, + "valid_targets_mean": 4092.9, + "valid_targets_min": 700 + }, + { + "epoch": 5.765138653084324, + "grad_norm": 0.3899109388355857, + "learning_rate": 3.689365571722112e-06, + "loss": 0.1112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04819038510322571, + "step": 5095, + "valid_targets_mean": 4443.9, + "valid_targets_min": 3415 + }, + { + "epoch": 5.770797962648556, + "grad_norm": 0.4469674062440092, + "learning_rate": 3.6567840878226577e-06, + "loss": 0.1151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05831068381667137, + "step": 5100, + "valid_targets_mean": 4145.4, + "valid_targets_min": 2179 + }, + { + "epoch": 5.77645727221279, + "grad_norm": 0.5075844285277699, + "learning_rate": 3.624332627631679e-06, + "loss": 0.1049, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06018441170454025, + "step": 5105, + "valid_targets_mean": 4782.0, + "valid_targets_min": 3131 + }, + { + "epoch": 5.782116581777023, + "grad_norm": 0.4856582964223566, + "learning_rate": 3.5920114493271974e-06, + "loss": 0.1119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.050442568957805634, + "step": 5110, + "valid_targets_mean": 4035.5, + "valid_targets_min": 530 + }, + { + "epoch": 5.787775891341257, + "grad_norm": 0.48811891923242473, + "learning_rate": 3.5598208100507314e-06, + "loss": 0.1255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05038515850901604, + "step": 5115, + "valid_targets_mean": 3946.9, + "valid_targets_min": 1032 + }, + { + "epoch": 5.793435200905489, + "grad_norm": 0.4773658084882183, + "learning_rate": 3.5277609659052712e-06, + "loss": 0.1126, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.048454806208610535, + "step": 5120, + "valid_targets_mean": 3687.5, + "valid_targets_min": 724 + }, + { + "epoch": 5.799094510469723, + "grad_norm": 0.5075340927041329, + "learning_rate": 3.4958321719532106e-06, + "loss": 0.1154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.053706150501966476, + "step": 5125, + "valid_targets_mean": 3980.1, + "valid_targets_min": 856 + }, + { + "epoch": 5.804753820033956, + "grad_norm": 0.47743342935918603, + "learning_rate": 3.4640346822143324e-06, + "loss": 0.1123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06260836124420166, + "step": 5130, + "valid_targets_mean": 4131.0, + "valid_targets_min": 808 + }, + { + "epoch": 5.810413129598189, + "grad_norm": 0.4548395002872586, + "learning_rate": 3.4323687496637837e-06, + "loss": 0.1227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06270252168178558, + "step": 5135, + "valid_targets_mean": 5360.8, + "valid_targets_min": 3059 + }, + { + "epoch": 5.816072439162422, + "grad_norm": 0.5000774531741389, + "learning_rate": 3.4008346262300852e-06, + "loss": 0.125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08110244572162628, + "step": 5140, + "valid_targets_mean": 4953.1, + "valid_targets_min": 1226 + }, + { + "epoch": 5.8217317487266556, + "grad_norm": 0.4810587219768537, + "learning_rate": 3.3694325627930846e-06, + "loss": 0.1065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05325479060411453, + "step": 5145, + "valid_targets_mean": 3975.0, + "valid_targets_min": 709 + }, + { + "epoch": 5.827391058290889, + "grad_norm": 0.5203554051744634, + "learning_rate": 3.3381628091819907e-06, + "loss": 0.1178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0718921348452568, + "step": 5150, + "valid_targets_mean": 3900.1, + "valid_targets_min": 798 + }, + { + "epoch": 5.8330503678551215, + "grad_norm": 0.46369428596938056, + "learning_rate": 3.3070256141733913e-06, + "loss": 0.1081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05399859696626663, + "step": 5155, + "valid_targets_mean": 3345.5, + "valid_targets_min": 732 + }, + { + "epoch": 5.838709677419355, + "grad_norm": 0.4676045194852541, + "learning_rate": 3.2760212254892453e-06, + "loss": 0.1147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09883564710617065, + "step": 5160, + "valid_targets_mean": 6374.5, + "valid_targets_min": 3366 + }, + { + "epoch": 5.8443689869835875, + "grad_norm": 0.42077087762005777, + "learning_rate": 3.245149889794932e-06, + "loss": 0.1112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03414727374911308, + "step": 5165, + "valid_targets_mean": 2855.6, + "valid_targets_min": 583 + }, + { + "epoch": 5.850028296547821, + "grad_norm": 0.42856977274439045, + "learning_rate": 3.2144118526972943e-06, + "loss": 0.1162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06861759722232819, + "step": 5170, + "valid_targets_mean": 5804.0, + "valid_targets_min": 3230 + }, + { + "epoch": 5.855687606112054, + "grad_norm": 0.46708470061456847, + "learning_rate": 3.1838073587426676e-06, + "loss": 0.1158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06845404952764511, + "step": 5175, + "valid_targets_mean": 5279.1, + "valid_targets_min": 2482 + }, + { + "epoch": 5.861346915676288, + "grad_norm": 0.5126148079854738, + "learning_rate": 3.153336651414933e-06, + "loss": 0.1157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06930111348628998, + "step": 5180, + "valid_targets_mean": 3837.1, + "valid_targets_min": 716 + }, + { + "epoch": 5.86700622524052, + "grad_norm": 0.4607970089346907, + "learning_rate": 3.1229999731336137e-06, + "loss": 0.1106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.059755075722932816, + "step": 5185, + "valid_targets_mean": 4610.5, + "valid_targets_min": 772 + }, + { + "epoch": 5.872665534804754, + "grad_norm": 0.4726498913780238, + "learning_rate": 3.0927975652518994e-06, + "loss": 0.1142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07042832672595978, + "step": 5190, + "valid_targets_mean": 5310.8, + "valid_targets_min": 1422 + }, + { + "epoch": 5.878324844368987, + "grad_norm": 0.4386499575722893, + "learning_rate": 3.062729668054756e-06, + "loss": 0.1153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04626138508319855, + "step": 5195, + "valid_targets_mean": 4386.8, + "valid_targets_min": 1222 + }, + { + "epoch": 5.88398415393322, + "grad_norm": 0.44063539802868457, + "learning_rate": 3.032796520757002e-06, + "loss": 0.1049, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.045647792518138885, + "step": 5200, + "valid_targets_mean": 4097.9, + "valid_targets_min": 830 + }, + { + "epoch": 5.889643463497453, + "grad_norm": 0.5248051229846893, + "learning_rate": 3.0029983615014234e-06, + "loss": 0.1189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.059647202491760254, + "step": 5205, + "valid_targets_mean": 3236.0, + "valid_targets_min": 874 + }, + { + "epoch": 5.895302773061687, + "grad_norm": 0.45165568572132947, + "learning_rate": 2.9733354273568514e-06, + "loss": 0.1186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.058211710304021835, + "step": 5210, + "valid_targets_mean": 4962.8, + "valid_targets_min": 1349 + }, + { + "epoch": 5.900962082625919, + "grad_norm": 0.42395422138869876, + "learning_rate": 2.9438079543162914e-06, + "loss": 0.1152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05662113428115845, + "step": 5215, + "valid_targets_mean": 5135.1, + "valid_targets_min": 1524 + }, + { + "epoch": 5.906621392190153, + "grad_norm": 0.49861537628796837, + "learning_rate": 2.9144161772950564e-06, + "loss": 0.1134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.061767980456352234, + "step": 5220, + "valid_targets_mean": 3732.8, + "valid_targets_min": 776 + }, + { + "epoch": 5.912280701754386, + "grad_norm": 0.4818073963102354, + "learning_rate": 2.885160330128871e-06, + "loss": 0.1108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06792585551738739, + "step": 5225, + "valid_targets_mean": 4085.8, + "valid_targets_min": 953 + }, + { + "epoch": 5.917940011318619, + "grad_norm": 0.5156608137270137, + "learning_rate": 2.8560406455720333e-06, + "loss": 0.1142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0830899327993393, + "step": 5230, + "valid_targets_mean": 5235.8, + "valid_targets_min": 1185 + }, + { + "epoch": 5.923599320882852, + "grad_norm": 0.4850906294486573, + "learning_rate": 2.8270573552955616e-06, + "loss": 0.1098, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05480193346738815, + "step": 5235, + "valid_targets_mean": 4005.5, + "valid_targets_min": 640 + }, + { + "epoch": 5.929258630447086, + "grad_norm": 0.47503373297264345, + "learning_rate": 2.798210689885337e-06, + "loss": 0.1106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.040112655609846115, + "step": 5240, + "valid_targets_mean": 3111.0, + "valid_targets_min": 539 + }, + { + "epoch": 5.934917940011319, + "grad_norm": 0.5243944003082727, + "learning_rate": 2.7695008788402765e-06, + "loss": 0.1239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04650159552693367, + "step": 5245, + "valid_targets_mean": 3039.9, + "valid_targets_min": 811 + }, + { + "epoch": 5.9405772495755516, + "grad_norm": 0.48998365951816714, + "learning_rate": 2.740928150570512e-06, + "loss": 0.1051, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.046303316950798035, + "step": 5250, + "valid_targets_mean": 3247.0, + "valid_targets_min": 616 + }, + { + "epoch": 5.946236559139785, + "grad_norm": 0.4248965464288131, + "learning_rate": 2.712492732395575e-06, + "loss": 0.1242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07540948688983917, + "step": 5255, + "valid_targets_mean": 6559.6, + "valid_targets_min": 2767 + }, + { + "epoch": 5.951895868704018, + "grad_norm": 0.41105055303792143, + "learning_rate": 2.6841948505425765e-06, + "loss": 0.1018, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.054322581738233566, + "step": 5260, + "valid_targets_mean": 4982.9, + "valid_targets_min": 1746 + }, + { + "epoch": 5.957555178268251, + "grad_norm": 0.4565361917205376, + "learning_rate": 2.6560347301444035e-06, + "loss": 0.1127, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05679383501410484, + "step": 5265, + "valid_targets_mean": 3661.6, + "valid_targets_min": 939 + }, + { + "epoch": 5.963214487832484, + "grad_norm": 0.4562577335872036, + "learning_rate": 2.6280125952379567e-06, + "loss": 0.1191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06343025714159012, + "step": 5270, + "valid_targets_mean": 4227.8, + "valid_targets_min": 2999 + }, + { + "epoch": 5.968873797396718, + "grad_norm": 0.4637250775421634, + "learning_rate": 2.6001286687623382e-06, + "loss": 0.1145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04459439963102341, + "step": 5275, + "valid_targets_mean": 4054.1, + "valid_targets_min": 2367 + }, + { + "epoch": 5.97453310696095, + "grad_norm": 0.5091610107476553, + "learning_rate": 2.5723831725570848e-06, + "loss": 0.1145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06401020288467407, + "step": 5280, + "valid_targets_mean": 4010.1, + "valid_targets_min": 924 + }, + { + "epoch": 5.980192416525184, + "grad_norm": 0.4289017945929079, + "learning_rate": 2.544776327360419e-06, + "loss": 0.1154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07693204283714294, + "step": 5285, + "valid_targets_mean": 7151.2, + "valid_targets_min": 3466 + }, + { + "epoch": 5.985851726089417, + "grad_norm": 0.44843170990868964, + "learning_rate": 2.5173083528074683e-06, + "loss": 0.1083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06993307173252106, + "step": 5290, + "valid_targets_mean": 3430.6, + "valid_targets_min": 859 + }, + { + "epoch": 5.99151103565365, + "grad_norm": 0.4676315316365486, + "learning_rate": 2.489979467428532e-06, + "loss": 0.1031, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03965194895863533, + "step": 5295, + "valid_targets_mean": 3340.4, + "valid_targets_min": 672 + }, + { + "epoch": 5.997170345217883, + "grad_norm": 0.43710405022558463, + "learning_rate": 2.4627898886473522e-06, + "loss": 0.1147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05103425681591034, + "step": 5300, + "valid_targets_mean": 4044.8, + "valid_targets_min": 819 + }, + { + "epoch": 6.002263723825693, + "grad_norm": 0.4286711561964569, + "learning_rate": 2.435739832779358e-06, + "loss": 0.1181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.050693124532699585, + "step": 5305, + "valid_targets_mean": 4859.2, + "valid_targets_min": 2829 + }, + { + "epoch": 6.007923033389926, + "grad_norm": 0.42174501696346867, + "learning_rate": 2.408829515029969e-06, + "loss": 0.1007, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.050569087266922, + "step": 5310, + "valid_targets_mean": 4945.8, + "valid_targets_min": 2301 + }, + { + "epoch": 6.01358234295416, + "grad_norm": 0.42334429728579814, + "learning_rate": 2.3820591494928635e-06, + "loss": 0.1047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06259667873382568, + "step": 5315, + "valid_targets_mean": 4549.0, + "valid_targets_min": 1979 + }, + { + "epoch": 6.019241652518393, + "grad_norm": 0.46546987620443286, + "learning_rate": 2.3554289491482996e-06, + "loss": 0.1074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05643333122134209, + "step": 5320, + "valid_targets_mean": 6023.8, + "valid_targets_min": 780 + }, + { + "epoch": 6.024900962082626, + "grad_norm": 0.4685913820614672, + "learning_rate": 2.3289391258613938e-06, + "loss": 0.1129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05431240051984787, + "step": 5325, + "valid_targets_mean": 4524.8, + "valid_targets_min": 742 + }, + { + "epoch": 6.030560271646859, + "grad_norm": 0.4804951321011701, + "learning_rate": 2.3025898903804467e-06, + "loss": 0.1057, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08532443642616272, + "step": 5330, + "valid_targets_mean": 3646.2, + "valid_targets_min": 807 + }, + { + "epoch": 6.036219581211093, + "grad_norm": 0.5410653365932933, + "learning_rate": 2.276381452335281e-06, + "loss": 0.1043, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07488438487052917, + "step": 5335, + "valid_targets_mean": 3353.9, + "valid_targets_min": 728 + }, + { + "epoch": 6.041878890775325, + "grad_norm": 0.4642223383273615, + "learning_rate": 2.2503140202355488e-06, + "loss": 0.1045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04771353304386139, + "step": 5340, + "valid_targets_mean": 3461.6, + "valid_targets_min": 619 + }, + { + "epoch": 6.047538200339559, + "grad_norm": 0.4203928734077498, + "learning_rate": 2.2243878014690834e-06, + "loss": 0.1082, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04168502613902092, + "step": 5345, + "valid_targets_mean": 4563.0, + "valid_targets_min": 828 + }, + { + "epoch": 6.053197509903792, + "grad_norm": 0.4528749899837478, + "learning_rate": 2.1986030023002568e-06, + "loss": 0.1078, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06076666712760925, + "step": 5350, + "valid_targets_mean": 5164.8, + "valid_targets_min": 553 + }, + { + "epoch": 6.058856819468025, + "grad_norm": 0.4554392379286468, + "learning_rate": 2.1729598278683264e-06, + "loss": 0.11, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05477149039506912, + "step": 5355, + "valid_targets_mean": 4764.0, + "valid_targets_min": 2892 + }, + { + "epoch": 6.064516129032258, + "grad_norm": 0.4098957835558889, + "learning_rate": 2.147458482185807e-06, + "loss": 0.109, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05490132048726082, + "step": 5360, + "valid_targets_mean": 4295.5, + "valid_targets_min": 764 + }, + { + "epoch": 6.0701754385964914, + "grad_norm": 0.44771483360492126, + "learning_rate": 2.122099168136862e-06, + "loss": 0.1297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04792318493127823, + "step": 5365, + "valid_targets_mean": 3874.1, + "valid_targets_min": 635 + }, + { + "epoch": 6.075834748160724, + "grad_norm": 0.4322008340928698, + "learning_rate": 2.0968820874756625e-06, + "loss": 0.1021, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06207504868507385, + "step": 5370, + "valid_targets_mean": 6766.9, + "valid_targets_min": 2827 + }, + { + "epoch": 6.081494057724957, + "grad_norm": 0.4485766076097201, + "learning_rate": 2.0718074408247955e-06, + "loss": 0.1161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03774695843458176, + "step": 5375, + "valid_targets_mean": 4388.5, + "valid_targets_min": 729 + }, + { + "epoch": 6.087153367289191, + "grad_norm": 0.45133725045471934, + "learning_rate": 2.0468754276736823e-06, + "loss": 0.1081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0448567271232605, + "step": 5380, + "valid_targets_mean": 4150.8, + "valid_targets_min": 696 + }, + { + "epoch": 6.092812676853424, + "grad_norm": 0.43507183768923463, + "learning_rate": 2.0220862463769665e-06, + "loss": 0.1051, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05283046513795853, + "step": 5385, + "valid_targets_mean": 4829.1, + "valid_targets_min": 613 + }, + { + "epoch": 6.098471986417657, + "grad_norm": 0.7163155466406244, + "learning_rate": 1.9974400941529493e-06, + "loss": 0.1166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07636456191539764, + "step": 5390, + "valid_targets_mean": 4646.8, + "valid_targets_min": 2004 + }, + { + "epoch": 6.10413129598189, + "grad_norm": 0.5120619915658925, + "learning_rate": 1.972937167082014e-06, + "loss": 0.1069, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06319265812635422, + "step": 5395, + "valid_targets_mean": 5762.8, + "valid_targets_min": 2782 + }, + { + "epoch": 6.109790605546124, + "grad_norm": 0.4252087057405538, + "learning_rate": 1.948577660105082e-06, + "loss": 0.1061, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06399044394493103, + "step": 5400, + "valid_targets_mean": 4840.8, + "valid_targets_min": 1689 + }, + { + "epoch": 6.115449915110356, + "grad_norm": 0.4492295074985939, + "learning_rate": 1.924361767022038e-06, + "loss": 0.1059, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05430837348103523, + "step": 5405, + "valid_targets_mean": 3899.9, + "valid_targets_min": 764 + }, + { + "epoch": 6.12110922467459, + "grad_norm": 0.4950793125265697, + "learning_rate": 1.9002896804902039e-06, + "loss": 0.1101, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05699116736650467, + "step": 5410, + "valid_targets_mean": 4535.6, + "valid_targets_min": 2888 + }, + { + "epoch": 6.126768534238823, + "grad_norm": 0.4770911046966917, + "learning_rate": 1.8763615920228084e-06, + "loss": 0.1115, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05956456810235977, + "step": 5415, + "valid_targets_mean": 5491.8, + "valid_targets_min": 2309 + }, + { + "epoch": 6.132427843803056, + "grad_norm": 0.4842769800891266, + "learning_rate": 1.8525776919874472e-06, + "loss": 0.1073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.043541569262742996, + "step": 5420, + "valid_targets_mean": 4292.5, + "valid_targets_min": 766 + }, + { + "epoch": 6.138087153367289, + "grad_norm": 0.44627034590989584, + "learning_rate": 1.8289381696045817e-06, + "loss": 0.1074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04568861797451973, + "step": 5425, + "valid_targets_mean": 4680.4, + "valid_targets_min": 2901 + }, + { + "epoch": 6.143746462931523, + "grad_norm": 0.4814282816741954, + "learning_rate": 1.8054432129460386e-06, + "loss": 0.1095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06312857568264008, + "step": 5430, + "valid_targets_mean": 3956.2, + "valid_targets_min": 779 + }, + { + "epoch": 6.149405772495755, + "grad_norm": 0.42224436883629174, + "learning_rate": 1.7820930089334965e-06, + "loss": 0.1088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04467467591166496, + "step": 5435, + "valid_targets_mean": 3929.5, + "valid_targets_min": 2594 + }, + { + "epoch": 6.155065082059989, + "grad_norm": 0.5421047618079458, + "learning_rate": 1.7588877433370076e-06, + "loss": 0.1045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08192360401153564, + "step": 5440, + "valid_targets_mean": 5215.9, + "valid_targets_min": 718 + }, + { + "epoch": 6.160724391624222, + "grad_norm": 0.5030304285323317, + "learning_rate": 1.7358276007735276e-06, + "loss": 0.1144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05652017891407013, + "step": 5445, + "valid_targets_mean": 3996.4, + "valid_targets_min": 611 + }, + { + "epoch": 6.166383701188455, + "grad_norm": 0.4955108689807199, + "learning_rate": 1.71291276470543e-06, + "loss": 0.0979, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04624000936746597, + "step": 5450, + "valid_targets_mean": 3092.4, + "valid_targets_min": 457 + }, + { + "epoch": 6.172043010752688, + "grad_norm": 0.46584798579859354, + "learning_rate": 1.6901434174390652e-06, + "loss": 0.1052, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0591970756649971, + "step": 5455, + "valid_targets_mean": 4511.0, + "valid_targets_min": 474 + }, + { + "epoch": 6.1777023203169215, + "grad_norm": 0.4492004064393248, + "learning_rate": 1.6675197401232869e-06, + "loss": 0.1141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04524921625852585, + "step": 5460, + "valid_targets_mean": 4468.4, + "valid_targets_min": 774 + }, + { + "epoch": 6.183361629881155, + "grad_norm": 0.4633121184463679, + "learning_rate": 1.6450419127480422e-06, + "loss": 0.1185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06392519921064377, + "step": 5465, + "valid_targets_mean": 4509.8, + "valid_targets_min": 1911 + }, + { + "epoch": 6.1890209394453874, + "grad_norm": 0.5649714844080365, + "learning_rate": 1.6227101141429114e-06, + "loss": 0.1179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06929600238800049, + "step": 5470, + "valid_targets_mean": 5229.1, + "valid_targets_min": 2568 + }, + { + "epoch": 6.194680249009621, + "grad_norm": 0.4122854752076458, + "learning_rate": 1.6005245219756927e-06, + "loss": 0.1141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05264044180512428, + "step": 5475, + "valid_targets_mean": 5210.6, + "valid_targets_min": 1211 + }, + { + "epoch": 6.200339558573854, + "grad_norm": 0.4684742551503162, + "learning_rate": 1.5784853127510058e-06, + "loss": 0.1114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06229633092880249, + "step": 5480, + "valid_targets_mean": 4926.9, + "valid_targets_min": 1245 + }, + { + "epoch": 6.205998868138087, + "grad_norm": 0.4134220177786379, + "learning_rate": 1.5565926618088578e-06, + "loss": 0.1104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04545718431472778, + "step": 5485, + "valid_targets_mean": 3964.0, + "valid_targets_min": 1165 + }, + { + "epoch": 6.21165817770232, + "grad_norm": 0.5590077270291888, + "learning_rate": 1.5348467433232728e-06, + "loss": 0.1147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06085751950740814, + "step": 5490, + "valid_targets_mean": 4258.6, + "valid_targets_min": 1062 + }, + { + "epoch": 6.217317487266554, + "grad_norm": 0.4694767656913774, + "learning_rate": 1.5132477303009018e-06, + "loss": 0.1171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05622674524784088, + "step": 5495, + "valid_targets_mean": 4519.6, + "valid_targets_min": 937 + }, + { + "epoch": 6.222976796830786, + "grad_norm": 0.49963090653180947, + "learning_rate": 1.4917957945796313e-06, + "loss": 0.1178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05097990110516548, + "step": 5500, + "valid_targets_mean": 3804.4, + "valid_targets_min": 641 + }, + { + "epoch": 6.22863610639502, + "grad_norm": 0.48655593983935475, + "learning_rate": 1.4704911068272366e-06, + "loss": 0.1055, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.050442397594451904, + "step": 5505, + "valid_targets_mean": 3600.1, + "valid_targets_min": 772 + }, + { + "epoch": 6.234295415959253, + "grad_norm": 0.4456045608852177, + "learning_rate": 1.4493338365400034e-06, + "loss": 0.1113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.042644962668418884, + "step": 5510, + "valid_targets_mean": 4984.8, + "valid_targets_min": 2934 + }, + { + "epoch": 6.239954725523486, + "grad_norm": 0.4432670914786111, + "learning_rate": 1.428324152041407e-06, + "loss": 0.1109, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06132501736283302, + "step": 5515, + "valid_targets_mean": 5798.8, + "valid_targets_min": 3903 + }, + { + "epoch": 6.245614035087719, + "grad_norm": 0.475851423040468, + "learning_rate": 1.407462220480742e-06, + "loss": 0.1086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05307042971253395, + "step": 5520, + "valid_targets_mean": 4151.8, + "valid_targets_min": 910 + }, + { + "epoch": 6.251273344651953, + "grad_norm": 0.4963430844598505, + "learning_rate": 1.3867482078318095e-06, + "loss": 0.1117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0640251636505127, + "step": 5525, + "valid_targets_mean": 4138.5, + "valid_targets_min": 854 + }, + { + "epoch": 6.256932654216186, + "grad_norm": 0.43112315834714693, + "learning_rate": 1.3661822788916013e-06, + "loss": 0.1166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07484033703804016, + "step": 5530, + "valid_targets_mean": 5383.0, + "valid_targets_min": 717 + }, + { + "epoch": 6.262591963780419, + "grad_norm": 0.5104039915921019, + "learning_rate": 1.3457645972789778e-06, + "loss": 0.1115, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06255707144737244, + "step": 5535, + "valid_targets_mean": 3632.5, + "valid_targets_min": 757 + }, + { + "epoch": 6.268251273344652, + "grad_norm": 0.4556079658640974, + "learning_rate": 1.3254953254333613e-06, + "loss": 0.1064, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0622066855430603, + "step": 5540, + "valid_targets_mean": 5235.0, + "valid_targets_min": 757 + }, + { + "epoch": 6.2739105829088855, + "grad_norm": 0.39283631659282053, + "learning_rate": 1.305374624613469e-06, + "loss": 0.1093, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04665352404117584, + "step": 5545, + "valid_targets_mean": 3987.5, + "valid_targets_min": 731 + }, + { + "epoch": 6.279569892473118, + "grad_norm": 0.4954506562734886, + "learning_rate": 1.285402654896004e-06, + "loss": 0.1149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05268310010433197, + "step": 5550, + "valid_targets_mean": 4210.9, + "valid_targets_min": 907 + }, + { + "epoch": 6.2852292020373515, + "grad_norm": 0.6171604277118048, + "learning_rate": 1.265579575174387e-06, + "loss": 0.1104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06200452148914337, + "step": 5555, + "valid_targets_mean": 6418.9, + "valid_targets_min": 4158 + }, + { + "epoch": 6.290888511601585, + "grad_norm": 0.4387398907398315, + "learning_rate": 1.245905543157504e-06, + "loss": 0.1097, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04121387004852295, + "step": 5560, + "valid_targets_mean": 3847.1, + "valid_targets_min": 350 + }, + { + "epoch": 6.2965478211658175, + "grad_norm": 0.43369634977150906, + "learning_rate": 1.2263807153684448e-06, + "loss": 0.1095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.059001047164201736, + "step": 5565, + "valid_targets_mean": 5436.4, + "valid_targets_min": 3306 + }, + { + "epoch": 6.302207130730051, + "grad_norm": 0.4997250526713922, + "learning_rate": 1.2070052471432535e-06, + "loss": 0.1128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07998879253864288, + "step": 5570, + "valid_targets_mean": 5359.5, + "valid_targets_min": 908 + }, + { + "epoch": 6.307866440294284, + "grad_norm": 0.4456612429278142, + "learning_rate": 1.1877792926296893e-06, + "loss": 0.0997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04641469568014145, + "step": 5575, + "valid_targets_mean": 3938.2, + "valid_targets_min": 825 + }, + { + "epoch": 6.313525749858517, + "grad_norm": 0.4621463735779093, + "learning_rate": 1.1687030047860248e-06, + "loss": 0.1007, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04436139017343521, + "step": 5580, + "valid_targets_mean": 3619.8, + "valid_targets_min": 520 + }, + { + "epoch": 6.31918505942275, + "grad_norm": 0.4510535986765808, + "learning_rate": 1.1497765353797963e-06, + "loss": 0.1051, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04282500222325325, + "step": 5585, + "valid_targets_mean": 3419.9, + "valid_targets_min": 647 + }, + { + "epoch": 6.324844368986984, + "grad_norm": 0.4191551349733361, + "learning_rate": 1.1310000349866136e-06, + "loss": 0.1032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05155666917562485, + "step": 5590, + "valid_targets_mean": 4503.4, + "valid_targets_min": 1463 + }, + { + "epoch": 6.330503678551217, + "grad_norm": 0.4491720263761463, + "learning_rate": 1.1123736529889674e-06, + "loss": 0.1187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.057728201150894165, + "step": 5595, + "valid_targets_mean": 3847.9, + "valid_targets_min": 1155 + }, + { + "epoch": 6.33616298811545, + "grad_norm": 0.49133598054036454, + "learning_rate": 1.093897537575026e-06, + "loss": 0.1033, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05889619141817093, + "step": 5600, + "valid_targets_mean": 4173.5, + "valid_targets_min": 1199 + }, + { + "epoch": 6.341822297679683, + "grad_norm": 0.4355852035872657, + "learning_rate": 1.075571835737459e-06, + "loss": 0.1169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04971563071012497, + "step": 5605, + "valid_targets_mean": 3936.5, + "valid_targets_min": 1982 + }, + { + "epoch": 6.347481607243917, + "grad_norm": 0.47112278691726356, + "learning_rate": 1.0573966932722902e-06, + "loss": 0.1166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08222422003746033, + "step": 5610, + "valid_targets_mean": 5463.8, + "valid_targets_min": 3255 + }, + { + "epoch": 6.353140916808149, + "grad_norm": 0.4391137908198263, + "learning_rate": 1.039372254777702e-06, + "loss": 0.1096, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.045624878257513046, + "step": 5615, + "valid_targets_mean": 4624.0, + "valid_targets_min": 815 + }, + { + "epoch": 6.358800226372383, + "grad_norm": 0.4268441279169322, + "learning_rate": 1.0214986636529112e-06, + "loss": 0.1089, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05092786252498627, + "step": 5620, + "valid_targets_mean": 4906.9, + "valid_targets_min": 2996 + }, + { + "epoch": 6.364459535936616, + "grad_norm": 0.5458150571125557, + "learning_rate": 1.003776062097015e-06, + "loss": 0.1182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0669512003660202, + "step": 5625, + "valid_targets_mean": 4385.0, + "valid_targets_min": 827 + }, + { + "epoch": 6.370118845500849, + "grad_norm": 0.4642071333132514, + "learning_rate": 9.862045911078733e-07, + "loss": 0.1115, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06251871585845947, + "step": 5630, + "valid_targets_mean": 6028.8, + "valid_targets_min": 2456 + }, + { + "epoch": 6.375778155065082, + "grad_norm": 0.4249397684950881, + "learning_rate": 9.687843904809725e-07, + "loss": 0.1074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06095593795180321, + "step": 5635, + "valid_targets_mean": 5000.0, + "valid_targets_min": 3915 + }, + { + "epoch": 6.3814374646293155, + "grad_norm": 0.5019556590941587, + "learning_rate": 9.515155988083125e-07, + "loss": 0.1086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.039802275598049164, + "step": 5640, + "valid_targets_mean": 3441.1, + "valid_targets_min": 853 + }, + { + "epoch": 6.387096774193548, + "grad_norm": 0.43679586076548677, + "learning_rate": 9.343983534773238e-07, + "loss": 0.103, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0554230734705925, + "step": 5645, + "valid_targets_mean": 5216.5, + "valid_targets_min": 1033 + }, + { + "epoch": 6.3927560837577815, + "grad_norm": 0.4530005045440894, + "learning_rate": 9.174327906697522e-07, + "loss": 0.1096, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04241374880075455, + "step": 5650, + "valid_targets_mean": 4422.8, + "valid_targets_min": 856 + }, + { + "epoch": 6.398415393322015, + "grad_norm": 0.43296859018421313, + "learning_rate": 9.006190453605867e-07, + "loss": 0.1107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06314845383167267, + "step": 5655, + "valid_targets_mean": 4671.5, + "valid_targets_min": 1469 + }, + { + "epoch": 6.4040747028862475, + "grad_norm": 0.5123924012095858, + "learning_rate": 8.839572513169869e-07, + "loss": 0.1051, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06240767240524292, + "step": 5660, + "valid_targets_mean": 4240.8, + "valid_targets_min": 658 + }, + { + "epoch": 6.409734012450481, + "grad_norm": 0.4314225949712511, + "learning_rate": 8.674475410972083e-07, + "loss": 0.1046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.057490866631269455, + "step": 5665, + "valid_targets_mean": 4704.8, + "valid_targets_min": 1766 + }, + { + "epoch": 6.415393322014714, + "grad_norm": 0.49324948546397906, + "learning_rate": 8.510900460495608e-07, + "loss": 0.1113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.061774738132953644, + "step": 5670, + "valid_targets_mean": 4158.9, + "valid_targets_min": 629 + }, + { + "epoch": 6.421052631578947, + "grad_norm": 0.47982286000305036, + "learning_rate": 8.348848963113498e-07, + "loss": 0.1091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03885657340288162, + "step": 5675, + "valid_targets_mean": 3390.9, + "valid_targets_min": 786 + }, + { + "epoch": 6.42671194114318, + "grad_norm": 0.4545823170061289, + "learning_rate": 8.188322208078614e-07, + "loss": 0.1055, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04772159457206726, + "step": 5680, + "valid_targets_mean": 3512.1, + "valid_targets_min": 802 + }, + { + "epoch": 6.432371250707414, + "grad_norm": 0.4941588528094909, + "learning_rate": 8.02932147251314e-07, + "loss": 0.1074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05159895494580269, + "step": 5685, + "valid_targets_mean": 3807.2, + "valid_targets_min": 983 + }, + { + "epoch": 6.438030560271647, + "grad_norm": 0.48634222751192663, + "learning_rate": 7.87184802139851e-07, + "loss": 0.1169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.059147678315639496, + "step": 5690, + "valid_targets_mean": 5274.8, + "valid_targets_min": 1237 + }, + { + "epoch": 6.44368986983588, + "grad_norm": 0.47515611836798244, + "learning_rate": 7.715903107565426e-07, + "loss": 0.1163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05537790805101395, + "step": 5695, + "valid_targets_mean": 4850.4, + "valid_targets_min": 3271 + }, + { + "epoch": 6.449349179400113, + "grad_norm": 0.48631499695024394, + "learning_rate": 7.56148797168379e-07, + "loss": 0.1124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.049857817590236664, + "step": 5700, + "valid_targets_mean": 3498.4, + "valid_targets_min": 923 + }, + { + "epoch": 6.455008488964347, + "grad_norm": 0.4754550892600179, + "learning_rate": 7.408603842252837e-07, + "loss": 0.1105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0813467875123024, + "step": 5705, + "valid_targets_mean": 5031.0, + "valid_targets_min": 771 + }, + { + "epoch": 6.460667798528579, + "grad_norm": 0.49328402822248874, + "learning_rate": 7.257251935591436e-07, + "loss": 0.1151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.046455398201942444, + "step": 5710, + "valid_targets_mean": 3143.4, + "valid_targets_min": 647 + }, + { + "epoch": 6.466327108092813, + "grad_norm": 0.5466982667435122, + "learning_rate": 7.107433455828317e-07, + "loss": 0.1023, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0489300936460495, + "step": 5715, + "valid_targets_mean": 3009.2, + "valid_targets_min": 567 + }, + { + "epoch": 6.471986417657046, + "grad_norm": 0.44726206078270786, + "learning_rate": 6.959149594892567e-07, + "loss": 0.0997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.042425498366355896, + "step": 5720, + "valid_targets_mean": 4072.5, + "valid_targets_min": 720 + }, + { + "epoch": 6.477645727221279, + "grad_norm": 0.46654083493080434, + "learning_rate": 6.812401532504109e-07, + "loss": 0.1087, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06563600897789001, + "step": 5725, + "valid_targets_mean": 5655.5, + "valid_targets_min": 629 + }, + { + "epoch": 6.483305036785512, + "grad_norm": 0.4676657064580531, + "learning_rate": 6.667190436164351e-07, + "loss": 0.1113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05517805367708206, + "step": 5730, + "valid_targets_mean": 4520.8, + "valid_targets_min": 1070 + }, + { + "epoch": 6.4889643463497455, + "grad_norm": 0.4322085489919572, + "learning_rate": 6.523517461146856e-07, + "loss": 0.1071, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05082578584551811, + "step": 5735, + "valid_targets_mean": 4375.4, + "valid_targets_min": 1016 + }, + { + "epoch": 6.494623655913978, + "grad_norm": 0.4360576625322933, + "learning_rate": 6.381383750488113e-07, + "loss": 0.1037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.054989445954561234, + "step": 5740, + "valid_targets_mean": 3996.5, + "valid_targets_min": 631 + }, + { + "epoch": 6.5002829654782115, + "grad_norm": 0.4538680003263819, + "learning_rate": 6.240790434978628e-07, + "loss": 0.1075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05418688803911209, + "step": 5745, + "valid_targets_mean": 5151.8, + "valid_targets_min": 2160 + }, + { + "epoch": 6.505942275042445, + "grad_norm": 0.5430381498527266, + "learning_rate": 6.101738633153686e-07, + "loss": 0.1091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03999526798725128, + "step": 5750, + "valid_targets_mean": 3078.9, + "valid_targets_min": 777 + }, + { + "epoch": 6.511601584606678, + "grad_norm": 0.44070368498297, + "learning_rate": 5.964229451284586e-07, + "loss": 0.1047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.041160114109516144, + "step": 5755, + "valid_targets_mean": 4771.8, + "valid_targets_min": 2696 + }, + { + "epoch": 6.517260894170911, + "grad_norm": 0.44757719594124895, + "learning_rate": 5.828263983369864e-07, + "loss": 0.1093, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.042410582304000854, + "step": 5760, + "valid_targets_mean": 4844.5, + "valid_targets_min": 2711 + }, + { + "epoch": 6.522920203735144, + "grad_norm": 0.46590468932997586, + "learning_rate": 5.693843311126523e-07, + "loss": 0.0987, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.059580568224191666, + "step": 5765, + "valid_targets_mean": 5246.9, + "valid_targets_min": 3991 + }, + { + "epoch": 6.528579513299378, + "grad_norm": 0.529918714236831, + "learning_rate": 5.560968503981378e-07, + "loss": 0.1047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07430808991193771, + "step": 5770, + "valid_targets_mean": 4766.0, + "valid_targets_min": 2671 + }, + { + "epoch": 6.53423882286361, + "grad_norm": 0.48999189984513425, + "learning_rate": 5.429640619062726e-07, + "loss": 0.1117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.054301969707012177, + "step": 5775, + "valid_targets_mean": 4982.6, + "valid_targets_min": 643 + }, + { + "epoch": 6.539898132427844, + "grad_norm": 0.45197563283965847, + "learning_rate": 5.299860701191772e-07, + "loss": 0.1023, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0497448593378067, + "step": 5780, + "valid_targets_mean": 5143.0, + "valid_targets_min": 1909 + }, + { + "epoch": 6.545557441992077, + "grad_norm": 0.49719813685079606, + "learning_rate": 5.171629782874354e-07, + "loss": 0.1123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04627831280231476, + "step": 5785, + "valid_targets_mean": 3856.8, + "valid_targets_min": 1205 + }, + { + "epoch": 6.55121675155631, + "grad_norm": 0.4902332909073729, + "learning_rate": 5.044948884292766e-07, + "loss": 0.1123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06381359696388245, + "step": 5790, + "valid_targets_mean": 5376.4, + "valid_targets_min": 2962 + }, + { + "epoch": 6.556876061120543, + "grad_norm": 0.4543212560751884, + "learning_rate": 4.919819013297677e-07, + "loss": 0.0988, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04228081554174423, + "step": 5795, + "valid_targets_mean": 3700.8, + "valid_targets_min": 693 + }, + { + "epoch": 6.562535370684777, + "grad_norm": 0.47769030432239146, + "learning_rate": 4.796241165399939e-07, + "loss": 0.11, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0564909428358078, + "step": 5800, + "valid_targets_mean": 3940.2, + "valid_targets_min": 988 + }, + { + "epoch": 6.568194680249009, + "grad_norm": 0.4789568067431446, + "learning_rate": 4.674216323762881e-07, + "loss": 0.107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04393457621335983, + "step": 5805, + "valid_targets_mean": 3551.6, + "valid_targets_min": 538 + }, + { + "epoch": 6.573853989813243, + "grad_norm": 0.5036221928369926, + "learning_rate": 4.5537454591943584e-07, + "loss": 0.1169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05866064876317978, + "step": 5810, + "valid_targets_mean": 4824.0, + "valid_targets_min": 1122 + }, + { + "epoch": 6.579513299377476, + "grad_norm": 0.4895133064285197, + "learning_rate": 4.434829530139095e-07, + "loss": 0.111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05340457707643509, + "step": 5815, + "valid_targets_mean": 3849.8, + "valid_targets_min": 949 + }, + { + "epoch": 6.5851726089417095, + "grad_norm": 0.4359159196738691, + "learning_rate": 4.3174694826709107e-07, + "loss": 0.099, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.051412180066108704, + "step": 5820, + "valid_targets_mean": 4975.5, + "valid_targets_min": 969 + }, + { + "epoch": 6.590831918505942, + "grad_norm": 0.46193736756343323, + "learning_rate": 4.201666250485503e-07, + "loss": 0.1114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05025537312030792, + "step": 5825, + "valid_targets_mean": 3928.1, + "valid_targets_min": 940 + }, + { + "epoch": 6.5964912280701755, + "grad_norm": 0.47726384793162263, + "learning_rate": 4.087420754892635e-07, + "loss": 0.1033, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.046420466154813766, + "step": 5830, + "valid_targets_mean": 4443.2, + "valid_targets_min": 916 + }, + { + "epoch": 6.602150537634409, + "grad_norm": 0.4434783136235322, + "learning_rate": 3.9747339048091136e-07, + "loss": 0.1134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05376073345541954, + "step": 5835, + "valid_targets_mean": 4851.6, + "valid_targets_min": 2849 + }, + { + "epoch": 6.6078098471986415, + "grad_norm": 0.5054370176258929, + "learning_rate": 3.863606596751379e-07, + "loss": 0.1139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06862815469503403, + "step": 5840, + "valid_targets_mean": 3622.0, + "valid_targets_min": 995 + }, + { + "epoch": 6.613469156762875, + "grad_norm": 0.4871801642631746, + "learning_rate": 3.7540397148284837e-07, + "loss": 0.108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07087269425392151, + "step": 5845, + "valid_targets_mean": 5376.6, + "valid_targets_min": 2966 + }, + { + "epoch": 6.619128466327108, + "grad_norm": 0.49045049165273996, + "learning_rate": 3.6460341307349653e-07, + "loss": 0.1059, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.061245039105415344, + "step": 5850, + "valid_targets_mean": 4925.1, + "valid_targets_min": 2975 + }, + { + "epoch": 6.624787775891341, + "grad_norm": 0.45613838192683676, + "learning_rate": 3.539590703743967e-07, + "loss": 0.1085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04648123309016228, + "step": 5855, + "valid_targets_mean": 4671.9, + "valid_targets_min": 2404 + }, + { + "epoch": 6.630447085455574, + "grad_norm": 0.4149000037396337, + "learning_rate": 3.434710280700415e-07, + "loss": 0.1129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03987503796815872, + "step": 5860, + "valid_targets_mean": 4631.6, + "valid_targets_min": 539 + }, + { + "epoch": 6.636106395019808, + "grad_norm": 0.4993198099239122, + "learning_rate": 3.331393696014207e-07, + "loss": 0.1108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.040497180074453354, + "step": 5865, + "valid_targets_mean": 3185.8, + "valid_targets_min": 720 + }, + { + "epoch": 6.64176570458404, + "grad_norm": 0.46162573668771467, + "learning_rate": 3.22964177165368e-07, + "loss": 0.1028, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.057105813175439835, + "step": 5870, + "valid_targets_mean": 5319.8, + "valid_targets_min": 3856 + }, + { + "epoch": 6.647425014148274, + "grad_norm": 0.7386823072754611, + "learning_rate": 3.129455317138952e-07, + "loss": 0.1141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04742707312107086, + "step": 5875, + "valid_targets_mean": 3950.4, + "valid_targets_min": 844 + }, + { + "epoch": 6.653084323712507, + "grad_norm": 0.44631289673687735, + "learning_rate": 3.030835129535592e-07, + "loss": 0.0994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04715785011649132, + "step": 5880, + "valid_targets_mean": 4266.6, + "valid_targets_min": 333 + }, + { + "epoch": 6.658743633276741, + "grad_norm": 0.45277207892503146, + "learning_rate": 2.9337819934481814e-07, + "loss": 0.1109, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06291121989488602, + "step": 5885, + "valid_targets_mean": 5319.5, + "valid_targets_min": 2894 + }, + { + "epoch": 6.664402942840973, + "grad_norm": 0.4749175021506203, + "learning_rate": 2.838296681014185e-07, + "loss": 0.1105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06835667788982391, + "step": 5890, + "valid_targets_mean": 5180.0, + "valid_targets_min": 1140 + }, + { + "epoch": 6.670062252405207, + "grad_norm": 0.5041174129601913, + "learning_rate": 2.744379951897691e-07, + "loss": 0.1136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05334525555372238, + "step": 5895, + "valid_targets_mean": 4018.2, + "valid_targets_min": 702 + }, + { + "epoch": 6.67572156196944, + "grad_norm": 0.4802750173012651, + "learning_rate": 2.65203255328339e-07, + "loss": 0.1101, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05819660425186157, + "step": 5900, + "valid_targets_mean": 4938.0, + "valid_targets_min": 1156 + }, + { + "epoch": 6.681380871533673, + "grad_norm": 0.4469065045929307, + "learning_rate": 2.561255219870762e-07, + "loss": 0.1115, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04390975832939148, + "step": 5905, + "valid_targets_mean": 4368.1, + "valid_targets_min": 701 + }, + { + "epoch": 6.687040181097906, + "grad_norm": 0.4313060541402237, + "learning_rate": 2.472048673868033e-07, + "loss": 0.1042, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04713157191872597, + "step": 5910, + "valid_targets_mean": 4396.4, + "valid_targets_min": 858 + }, + { + "epoch": 6.6926994906621395, + "grad_norm": 0.47058158720801235, + "learning_rate": 2.3844136249865367e-07, + "loss": 0.108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.058971039950847626, + "step": 5915, + "valid_targets_mean": 4760.5, + "valid_targets_min": 733 + }, + { + "epoch": 6.698358800226372, + "grad_norm": 0.4350879068513107, + "learning_rate": 2.2983507704351426e-07, + "loss": 0.1052, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.048284661024808884, + "step": 5920, + "valid_targets_mean": 4709.4, + "valid_targets_min": 987 + }, + { + "epoch": 6.7040181097906055, + "grad_norm": 0.4929642251391588, + "learning_rate": 2.213860794914524e-07, + "loss": 0.1129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04072011262178421, + "step": 5925, + "valid_targets_mean": 3180.0, + "valid_targets_min": 502 + }, + { + "epoch": 6.709677419354839, + "grad_norm": 0.4666049431974343, + "learning_rate": 2.1309443706118538e-07, + "loss": 0.1114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05139001086354256, + "step": 5930, + "valid_targets_mean": 4017.6, + "valid_targets_min": 1353 + }, + { + "epoch": 6.7153367289190715, + "grad_norm": 0.46099449688145855, + "learning_rate": 2.049602157195363e-07, + "loss": 0.1161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0440276637673378, + "step": 5935, + "valid_targets_mean": 3761.9, + "valid_targets_min": 751 + }, + { + "epoch": 6.720996038483305, + "grad_norm": 0.4545373627276294, + "learning_rate": 1.9698348018092338e-07, + "loss": 0.1088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05156515911221504, + "step": 5940, + "valid_targets_mean": 4165.2, + "valid_targets_min": 516 + }, + { + "epoch": 6.726655348047538, + "grad_norm": 0.4670902946029518, + "learning_rate": 1.8916429390682944e-07, + "loss": 0.1057, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05758592113852501, + "step": 5945, + "valid_targets_mean": 4056.5, + "valid_targets_min": 1173 + }, + { + "epoch": 6.732314657611772, + "grad_norm": 0.44046605043945203, + "learning_rate": 1.8150271910530204e-07, + "loss": 0.1125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06280037015676498, + "step": 5950, + "valid_targets_mean": 5061.5, + "valid_targets_min": 597 + }, + { + "epoch": 6.737973967176004, + "grad_norm": 0.39975936099169246, + "learning_rate": 1.7399881673046736e-07, + "loss": 0.0997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05804266780614853, + "step": 5955, + "valid_targets_mean": 5704.5, + "valid_targets_min": 2704 + }, + { + "epoch": 6.743633276740238, + "grad_norm": 0.44732731322031183, + "learning_rate": 1.666526464820284e-07, + "loss": 0.1204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0514485165476799, + "step": 5960, + "valid_targets_mean": 5201.5, + "valid_targets_min": 2299 + }, + { + "epoch": 6.74929258630447, + "grad_norm": 0.5195163487251693, + "learning_rate": 1.594642668048052e-07, + "loss": 0.1086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05142718926072121, + "step": 5965, + "valid_targets_mean": 3306.6, + "valid_targets_min": 708 + }, + { + "epoch": 6.754951895868704, + "grad_norm": 0.43844072245839727, + "learning_rate": 1.5243373488826653e-07, + "loss": 0.1091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06188996136188507, + "step": 5970, + "valid_targets_mean": 5304.4, + "valid_targets_min": 2710 + }, + { + "epoch": 6.760611205432937, + "grad_norm": 0.5408606069799887, + "learning_rate": 1.4556110666606783e-07, + "loss": 0.1088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.050650183111429214, + "step": 5975, + "valid_targets_mean": 3342.8, + "valid_targets_min": 872 + }, + { + "epoch": 6.766270514997171, + "grad_norm": 0.5207556459124271, + "learning_rate": 1.388464368156095e-07, + "loss": 0.1111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0543728843331337, + "step": 5980, + "valid_targets_mean": 3925.1, + "valid_targets_min": 596 + }, + { + "epoch": 6.771929824561403, + "grad_norm": 0.4633523080868729, + "learning_rate": 1.322897787576105e-07, + "loss": 0.1121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.051270902156829834, + "step": 5985, + "valid_targets_mean": 4464.5, + "valid_targets_min": 797 + }, + { + "epoch": 6.777589134125637, + "grad_norm": 0.7291395268186434, + "learning_rate": 1.2589118465566875e-07, + "loss": 0.1154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06924300640821457, + "step": 5990, + "valid_targets_mean": 4162.1, + "valid_targets_min": 715 + }, + { + "epoch": 6.78324844368987, + "grad_norm": 0.4500049882738217, + "learning_rate": 1.1965070541585912e-07, + "loss": 0.11, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05760305002331734, + "step": 5995, + "valid_targets_mean": 4454.6, + "valid_targets_min": 1144 + }, + { + "epoch": 6.788907753254103, + "grad_norm": 0.4513690495407411, + "learning_rate": 1.1356839068632053e-07, + "loss": 0.1116, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.051529206335544586, + "step": 6000, + "valid_targets_mean": 3851.0, + "valid_targets_min": 927 + }, + { + "epoch": 6.794567062818336, + "grad_norm": 0.4893009946283525, + "learning_rate": 1.0764428885686073e-07, + "loss": 0.1147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06949000805616379, + "step": 6005, + "valid_targets_mean": 5391.4, + "valid_targets_min": 1942 + }, + { + "epoch": 6.8002263723825696, + "grad_norm": 0.45359009636516834, + "learning_rate": 1.0187844705857875e-07, + "loss": 0.1095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.056670181453228, + "step": 6010, + "valid_targets_mean": 3454.2, + "valid_targets_min": 999 + }, + { + "epoch": 6.805885681946802, + "grad_norm": 0.42070631393498314, + "learning_rate": 9.627091116348076e-08, + "loss": 0.1156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04699110984802246, + "step": 6015, + "valid_targets_mean": 4936.1, + "valid_targets_min": 888 + }, + { + "epoch": 6.8115449915110355, + "grad_norm": 0.45399057680592864, + "learning_rate": 9.082172578412263e-08, + "loss": 0.1021, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05920761823654175, + "step": 6020, + "valid_targets_mean": 5323.4, + "valid_targets_min": 2353 + }, + { + "epoch": 6.817204301075269, + "grad_norm": 0.47560910590891037, + "learning_rate": 8.553093427325243e-08, + "loss": 0.1038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05265574902296066, + "step": 6025, + "valid_targets_mean": 3967.0, + "valid_targets_min": 729 + }, + { + "epoch": 6.8228636106395015, + "grad_norm": 0.4743813656307752, + "learning_rate": 8.039857872345736e-08, + "loss": 0.1069, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05002026632428169, + "step": 6030, + "valid_targets_mean": 3757.0, + "valid_targets_min": 817 + }, + { + "epoch": 6.828522920203735, + "grad_norm": 0.40876522024197515, + "learning_rate": 7.542469996684843e-08, + "loss": 0.1088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05608794838190079, + "step": 6035, + "valid_targets_mean": 4902.8, + "valid_targets_min": 1002 + }, + { + "epoch": 6.834182229767968, + "grad_norm": 0.4503791107973096, + "learning_rate": 7.06093375747141e-08, + "loss": 0.1091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.052000805735588074, + "step": 6040, + "valid_targets_mean": 4743.8, + "valid_targets_min": 1307 + }, + { + "epoch": 6.839841539332202, + "grad_norm": 0.47435983702519186, + "learning_rate": 6.595252985721834e-08, + "loss": 0.1057, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.058356739580631256, + "step": 6045, + "valid_targets_mean": 5329.9, + "valid_targets_min": 3860 + }, + { + "epoch": 6.845500848896434, + "grad_norm": 0.522622112169546, + "learning_rate": 6.145431386309186e-08, + "loss": 0.118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0768006443977356, + "step": 6050, + "valid_targets_mean": 5166.8, + "valid_targets_min": 3695 + }, + { + "epoch": 6.851160158460668, + "grad_norm": 0.4546806652027603, + "learning_rate": 5.711472537933693e-08, + "loss": 0.1125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05384565889835358, + "step": 6055, + "valid_targets_mean": 4284.8, + "valid_targets_min": 857 + }, + { + "epoch": 6.856819468024901, + "grad_norm": 0.48276482410619836, + "learning_rate": 5.293379893094752e-08, + "loss": 0.1027, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05803103372454643, + "step": 6060, + "valid_targets_mean": 4420.9, + "valid_targets_min": 907 + }, + { + "epoch": 6.862478777589134, + "grad_norm": 0.4790672448325604, + "learning_rate": 4.891156778062734e-08, + "loss": 0.1072, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.053459323942661285, + "step": 6065, + "valid_targets_mean": 3524.8, + "valid_targets_min": 1057 + }, + { + "epoch": 6.868138087153367, + "grad_norm": 0.48307477152052714, + "learning_rate": 4.5048063928527785e-08, + "loss": 0.1045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06252394616603851, + "step": 6070, + "valid_targets_mean": 4446.2, + "valid_targets_min": 527 + }, + { + "epoch": 6.873797396717601, + "grad_norm": 0.4338653635329935, + "learning_rate": 4.134331811199932e-08, + "loss": 0.1091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06005716696381569, + "step": 6075, + "valid_targets_mean": 5505.5, + "valid_targets_min": 1479 + }, + { + "epoch": 6.879456706281833, + "grad_norm": 0.43120513223825724, + "learning_rate": 3.7797359805333836e-08, + "loss": 0.1203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05500316619873047, + "step": 6080, + "valid_targets_mean": 5784.8, + "valid_targets_min": 3391 + }, + { + "epoch": 6.885116015846067, + "grad_norm": 0.41499349845873795, + "learning_rate": 3.441021721954485e-08, + "loss": 0.1025, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06064637750387192, + "step": 6085, + "valid_targets_mean": 5319.6, + "valid_targets_min": 2709 + }, + { + "epoch": 6.8907753254103, + "grad_norm": 0.4971291224555983, + "learning_rate": 3.11819173021366e-08, + "loss": 0.1054, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0651983842253685, + "step": 6090, + "valid_targets_mean": 4527.2, + "valid_targets_min": 959 + }, + { + "epoch": 6.896434634974533, + "grad_norm": 0.48013999943041796, + "learning_rate": 2.8112485736881967e-08, + "loss": 0.1093, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.051422201097011566, + "step": 6095, + "valid_targets_mean": 3678.6, + "valid_targets_min": 1146 + }, + { + "epoch": 6.902093944538766, + "grad_norm": 0.5794318380528889, + "learning_rate": 2.520194694363376e-08, + "loss": 0.1005, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.055679094046354294, + "step": 6100, + "valid_targets_mean": 5373.1, + "valid_targets_min": 876 + }, + { + "epoch": 6.907753254103, + "grad_norm": 0.4368379401029065, + "learning_rate": 2.2450324078120423e-08, + "loss": 0.107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03872787952423096, + "step": 6105, + "valid_targets_mean": 3997.0, + "valid_targets_min": 636 + }, + { + "epoch": 6.913412563667233, + "grad_norm": 0.4739941624316716, + "learning_rate": 1.9857639031759522e-08, + "loss": 0.1073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05454606935381889, + "step": 6110, + "valid_targets_mean": 5194.5, + "valid_targets_min": 1329 + }, + { + "epoch": 6.9190718732314656, + "grad_norm": 0.46512759294089845, + "learning_rate": 1.7423912431489e-08, + "loss": 0.1112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03343780338764191, + "step": 6115, + "valid_targets_mean": 3617.0, + "valid_targets_min": 813 + }, + { + "epoch": 6.924731182795699, + "grad_norm": 0.5029302136346466, + "learning_rate": 1.51491636396095e-08, + "loss": 0.1164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05310727283358574, + "step": 6120, + "valid_targets_mean": 4289.4, + "valid_targets_min": 1083 + }, + { + "epoch": 6.930390492359932, + "grad_norm": 0.470309549438301, + "learning_rate": 1.3033410753608977e-08, + "loss": 0.1098, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05252791568636894, + "step": 6125, + "valid_targets_mean": 5231.6, + "valid_targets_min": 804 + }, + { + "epoch": 6.936049801924165, + "grad_norm": 0.5036633171808765, + "learning_rate": 1.1076670606045004e-08, + "loss": 0.1181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06574505567550659, + "step": 6130, + "valid_targets_mean": 3916.4, + "valid_targets_min": 890 + }, + { + "epoch": 6.941709111488398, + "grad_norm": 0.4407901899170234, + "learning_rate": 9.278958764391554e-09, + "loss": 0.1107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06226177513599396, + "step": 6135, + "valid_targets_mean": 6105.5, + "valid_targets_min": 1004 + }, + { + "epoch": 6.947368421052632, + "grad_norm": 0.4638526288610801, + "learning_rate": 7.64028953092133e-09, + "loss": 0.1081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05736488848924637, + "step": 6140, + "valid_targets_mean": 4652.8, + "valid_targets_min": 1193 + }, + { + "epoch": 6.953027730616864, + "grad_norm": 0.46633670125671284, + "learning_rate": 6.16067594259695e-09, + "loss": 0.1079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04559387266635895, + "step": 6145, + "valid_targets_mean": 3917.2, + "valid_targets_min": 1101 + }, + { + "epoch": 6.958687040181098, + "grad_norm": 0.44744154666726843, + "learning_rate": 4.840129770957713e-09, + "loss": 0.1099, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.054358646273612976, + "step": 6150, + "valid_targets_mean": 5090.1, + "valid_targets_min": 3765 + }, + { + "epoch": 6.964346349745331, + "grad_norm": 0.5230643582937403, + "learning_rate": 3.6786615220352208e-09, + "loss": 0.1171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05299248546361923, + "step": 6155, + "valid_targets_mean": 3431.9, + "valid_targets_min": 509 + }, + { + "epoch": 6.970005659309564, + "grad_norm": 0.5147911655567775, + "learning_rate": 2.6762804362623353e-09, + "loss": 0.1085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0875663012266159, + "step": 6160, + "valid_targets_mean": 5648.8, + "valid_targets_min": 912 + }, + { + "epoch": 6.975664968873797, + "grad_norm": 0.43418653529375867, + "learning_rate": 1.8329944884021288e-09, + "loss": 0.1051, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04747024178504944, + "step": 6165, + "valid_targets_mean": 4161.5, + "valid_targets_min": 970 + }, + { + "epoch": 6.981324278438031, + "grad_norm": 0.4368040896047236, + "learning_rate": 1.1488103874923717e-09, + "loss": 0.112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.058158792555332184, + "step": 6170, + "valid_targets_mean": 4255.8, + "valid_targets_min": 999 + }, + { + "epoch": 6.986983588002264, + "grad_norm": 0.47597389018879266, + "learning_rate": 6.237335767744767e-10, + "loss": 0.109, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05641796439886093, + "step": 6175, + "valid_targets_mean": 5616.9, + "valid_targets_min": 776 + }, + { + "epoch": 6.992642897566497, + "grad_norm": 0.49455640618000407, + "learning_rate": 2.577682336690757e-10, + "loss": 0.103, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05923860892653465, + "step": 6180, + "valid_targets_mean": 4329.9, + "valid_targets_min": 689 + }, + { + "epoch": 6.99830220713073, + "grad_norm": 0.43701508673261275, + "learning_rate": 5.091726972938915e-11, + "loss": 0.1157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04594724625349045, + "step": 6185, + "valid_targets_mean": 4271.5, + "valid_targets_min": 1746 + }, + { + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1120762899518013, + "step": 6187, + "total_flos": 3.03922427097147e+18, + "train_loss": 0.07450656744579091, + "train_runtime": 128018.7496, + "train_samples_per_second": 0.773, + "train_steps_per_second": 0.048, + "valid_targets_mean": 4571.4, + "valid_targets_min": 927 + } + ], + "logging_steps": 5, + "max_steps": 6188, + "num_input_tokens_seen": 0, + "num_train_epochs": 7, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.03922427097147e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}