diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,8517 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 3850, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.009099181073703366, + "grad_norm": 14.786740346535169, + "learning_rate": 4.155844155844156e-07, + "loss": 0.7964, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4239060580730438, + "step": 5, + "valid_targets_mean": 3328.6, + "valid_targets_min": 1920 + }, + { + "epoch": 0.018198362147406732, + "grad_norm": 16.012612850793044, + "learning_rate": 9.350649350649352e-07, + "loss": 0.7542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43628671765327454, + "step": 10, + "valid_targets_mean": 4038.5, + "valid_targets_min": 510 + }, + { + "epoch": 0.0272975432211101, + "grad_norm": 11.637341370322037, + "learning_rate": 1.4545454545454546e-06, + "loss": 0.6985, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25581756234169006, + "step": 15, + "valid_targets_mean": 2830.5, + "valid_targets_min": 1594 + }, + { + "epoch": 0.036396724294813464, + "grad_norm": 9.205480878961371, + "learning_rate": 1.9740259740259743e-06, + "loss": 0.7378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43626388907432556, + "step": 20, + "valid_targets_mean": 2573.2, + "valid_targets_min": 503 + }, + { + "epoch": 0.04549590536851683, + "grad_norm": 6.503069240270076, + "learning_rate": 2.4935064935064936e-06, + "loss": 0.6758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3213389217853546, + "step": 25, + "valid_targets_mean": 3504.4, + "valid_targets_min": 562 + }, + { + "epoch": 0.0545950864422202, + "grad_norm": 5.450861291416442, + "learning_rate": 3.0129870129870133e-06, + "loss": 0.6141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31898123025894165, + "step": 30, + "valid_targets_mean": 3968.9, + "valid_targets_min": 2624 + }, + { + "epoch": 0.06369426751592357, + "grad_norm": 4.3390051294027145, + "learning_rate": 3.532467532467533e-06, + "loss": 0.5348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29908043146133423, + "step": 35, + "valid_targets_mean": 3511.2, + "valid_targets_min": 1657 + }, + { + "epoch": 0.07279344858962693, + "grad_norm": 1.7434242750268414, + "learning_rate": 4.051948051948053e-06, + "loss": 0.4927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2466040551662445, + "step": 40, + "valid_targets_mean": 4462.8, + "valid_targets_min": 862 + }, + { + "epoch": 0.0818926296633303, + "grad_norm": 1.3123142041030291, + "learning_rate": 4.571428571428572e-06, + "loss": 0.4902, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24911117553710938, + "step": 45, + "valid_targets_mean": 4395.0, + "valid_targets_min": 2994 + }, + { + "epoch": 0.09099181073703366, + "grad_norm": 1.0370754476225932, + "learning_rate": 5.090909090909091e-06, + "loss": 0.4637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21570774912834167, + "step": 50, + "valid_targets_mean": 3738.4, + "valid_targets_min": 601 + }, + { + "epoch": 0.10009099181073704, + "grad_norm": 1.0104012155244833, + "learning_rate": 5.6103896103896105e-06, + "loss": 0.4562, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24701082706451416, + "step": 55, + "valid_targets_mean": 2790.5, + "valid_targets_min": 1528 + }, + { + "epoch": 0.1091901728844404, + "grad_norm": 0.7663704291957927, + "learning_rate": 6.129870129870131e-06, + "loss": 0.4542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23319780826568604, + "step": 60, + "valid_targets_mean": 4510.6, + "valid_targets_min": 1800 + }, + { + "epoch": 0.11828935395814377, + "grad_norm": 0.8498028917774307, + "learning_rate": 6.64935064935065e-06, + "loss": 0.4485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24339726567268372, + "step": 65, + "valid_targets_mean": 3079.8, + "valid_targets_min": 1365 + }, + { + "epoch": 0.12738853503184713, + "grad_norm": 0.7238294258812147, + "learning_rate": 7.16883116883117e-06, + "loss": 0.4281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18463903665542603, + "step": 70, + "valid_targets_mean": 2951.8, + "valid_targets_min": 306 + }, + { + "epoch": 0.1364877161055505, + "grad_norm": 0.7422498404553763, + "learning_rate": 7.68831168831169e-06, + "loss": 0.4167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2971900403499603, + "step": 75, + "valid_targets_mean": 5033.6, + "valid_targets_min": 705 + }, + { + "epoch": 0.14558689717925385, + "grad_norm": 0.7477819199483188, + "learning_rate": 8.20779220779221e-06, + "loss": 0.4342, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19027340412139893, + "step": 80, + "valid_targets_mean": 2463.9, + "valid_targets_min": 287 + }, + { + "epoch": 0.15468607825295724, + "grad_norm": 0.7810954141754345, + "learning_rate": 8.727272727272728e-06, + "loss": 0.4291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21374209225177765, + "step": 85, + "valid_targets_mean": 2681.8, + "valid_targets_min": 1059 + }, + { + "epoch": 0.1637852593266606, + "grad_norm": 0.736357169889036, + "learning_rate": 9.246753246753248e-06, + "loss": 0.411, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24572578072547913, + "step": 90, + "valid_targets_mean": 3635.2, + "valid_targets_min": 1665 + }, + { + "epoch": 0.17288444040036396, + "grad_norm": 0.601778477867443, + "learning_rate": 9.766233766233766e-06, + "loss": 0.4094, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20906668901443481, + "step": 95, + "valid_targets_mean": 4694.9, + "valid_targets_min": 2787 + }, + { + "epoch": 0.18198362147406733, + "grad_norm": 0.7108557287411013, + "learning_rate": 1.0285714285714285e-05, + "loss": 0.4183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23694711923599243, + "step": 100, + "valid_targets_mean": 3789.9, + "valid_targets_min": 2115 + }, + { + "epoch": 0.1910828025477707, + "grad_norm": 0.7213642248877461, + "learning_rate": 1.0805194805194805e-05, + "loss": 0.3775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17074792087078094, + "step": 105, + "valid_targets_mean": 2586.1, + "valid_targets_min": 188 + }, + { + "epoch": 0.20018198362147407, + "grad_norm": 0.5465254431332037, + "learning_rate": 1.1324675324675325e-05, + "loss": 0.3878, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18964694440364838, + "step": 110, + "valid_targets_mean": 3826.4, + "valid_targets_min": 1437 + }, + { + "epoch": 0.20928116469517744, + "grad_norm": 0.6290670154418285, + "learning_rate": 1.1844155844155845e-05, + "loss": 0.3946, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17479845881462097, + "step": 115, + "valid_targets_mean": 3998.0, + "valid_targets_min": 501 + }, + { + "epoch": 0.2183803457688808, + "grad_norm": 0.7219274776090822, + "learning_rate": 1.2363636363636364e-05, + "loss": 0.3796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22305920720100403, + "step": 120, + "valid_targets_mean": 3191.8, + "valid_targets_min": 1186 + }, + { + "epoch": 0.22747952684258416, + "grad_norm": 0.6217810003610046, + "learning_rate": 1.2883116883116884e-05, + "loss": 0.3889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23009517788887024, + "step": 125, + "valid_targets_mean": 4508.2, + "valid_targets_min": 2876 + }, + { + "epoch": 0.23657870791628755, + "grad_norm": 0.7528270014107937, + "learning_rate": 1.3402597402597404e-05, + "loss": 0.4085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2731509804725647, + "step": 130, + "valid_targets_mean": 4044.8, + "valid_targets_min": 191 + }, + { + "epoch": 0.2456778889899909, + "grad_norm": 0.773354064973906, + "learning_rate": 1.3922077922077924e-05, + "loss": 0.4325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24367764592170715, + "step": 135, + "valid_targets_mean": 2869.1, + "valid_targets_min": 276 + }, + { + "epoch": 0.25477707006369427, + "grad_norm": 0.6366461975764265, + "learning_rate": 1.4441558441558442e-05, + "loss": 0.3849, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2264237254858017, + "step": 140, + "valid_targets_mean": 4120.5, + "valid_targets_min": 297 + }, + { + "epoch": 0.26387625113739765, + "grad_norm": 0.73407765075352, + "learning_rate": 1.4961038961038962e-05, + "loss": 0.3789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24225257337093353, + "step": 145, + "valid_targets_mean": 3395.4, + "valid_targets_min": 2194 + }, + { + "epoch": 0.272975432211101, + "grad_norm": 0.528975542490164, + "learning_rate": 1.548051948051948e-05, + "loss": 0.3669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15162131190299988, + "step": 150, + "valid_targets_mean": 5184.5, + "valid_targets_min": 1548 + }, + { + "epoch": 0.2820746132848044, + "grad_norm": 0.6407623243411594, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.3807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17568965256214142, + "step": 155, + "valid_targets_mean": 3090.6, + "valid_targets_min": 1391 + }, + { + "epoch": 0.2911737943585077, + "grad_norm": 0.5598898794356707, + "learning_rate": 1.651948051948052e-05, + "loss": 0.3768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18702304363250732, + "step": 160, + "valid_targets_mean": 4726.5, + "valid_targets_min": 1965 + }, + { + "epoch": 0.3002729754322111, + "grad_norm": 0.80253866312149, + "learning_rate": 1.703896103896104e-05, + "loss": 0.3835, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20537051558494568, + "step": 165, + "valid_targets_mean": 3467.9, + "valid_targets_min": 1035 + }, + { + "epoch": 0.3093721565059145, + "grad_norm": 0.617856603797492, + "learning_rate": 1.7558441558441558e-05, + "loss": 0.3794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21695730090141296, + "step": 170, + "valid_targets_mean": 4163.9, + "valid_targets_min": 2707 + }, + { + "epoch": 0.3184713375796178, + "grad_norm": 0.6035798544318993, + "learning_rate": 1.807792207792208e-05, + "loss": 0.3703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15242548286914825, + "step": 175, + "valid_targets_mean": 3452.9, + "valid_targets_min": 863 + }, + { + "epoch": 0.3275705186533212, + "grad_norm": 0.7205194191830786, + "learning_rate": 1.8597402597402598e-05, + "loss": 0.3566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22975976765155792, + "step": 180, + "valid_targets_mean": 3661.2, + "valid_targets_min": 1386 + }, + { + "epoch": 0.33666969972702454, + "grad_norm": 0.664114211118228, + "learning_rate": 1.9116883116883117e-05, + "loss": 0.3906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1819133758544922, + "step": 185, + "valid_targets_mean": 3520.2, + "valid_targets_min": 302 + }, + { + "epoch": 0.34576888080072793, + "grad_norm": 0.630988318809503, + "learning_rate": 1.963636363636364e-05, + "loss": 0.361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1779000461101532, + "step": 190, + "valid_targets_mean": 3337.4, + "valid_targets_min": 2080 + }, + { + "epoch": 0.3548680618744313, + "grad_norm": 0.657730355236433, + "learning_rate": 2.0155844155844157e-05, + "loss": 0.3542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17508140206336975, + "step": 195, + "valid_targets_mean": 3508.1, + "valid_targets_min": 1803 + }, + { + "epoch": 0.36396724294813465, + "grad_norm": 0.5665992228116543, + "learning_rate": 2.0675324675324675e-05, + "loss": 0.3336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14676086604595184, + "step": 200, + "valid_targets_mean": 7127.0, + "valid_targets_min": 1887 + }, + { + "epoch": 0.37306642402183804, + "grad_norm": 0.6282569130705208, + "learning_rate": 2.1194805194805194e-05, + "loss": 0.3454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19846948981285095, + "step": 205, + "valid_targets_mean": 3845.4, + "valid_targets_min": 545 + }, + { + "epoch": 0.3821656050955414, + "grad_norm": 0.6716368289981851, + "learning_rate": 2.1714285714285715e-05, + "loss": 0.3542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20502102375030518, + "step": 210, + "valid_targets_mean": 4379.2, + "valid_targets_min": 1637 + }, + { + "epoch": 0.39126478616924476, + "grad_norm": 0.586040871093467, + "learning_rate": 2.2233766233766234e-05, + "loss": 0.3325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20062410831451416, + "step": 215, + "valid_targets_mean": 4620.4, + "valid_targets_min": 306 + }, + { + "epoch": 0.40036396724294815, + "grad_norm": 0.7116115352531075, + "learning_rate": 2.2753246753246752e-05, + "loss": 0.355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20102885365486145, + "step": 220, + "valid_targets_mean": 4042.2, + "valid_targets_min": 692 + }, + { + "epoch": 0.4094631483166515, + "grad_norm": 0.7593643012524895, + "learning_rate": 2.3272727272727274e-05, + "loss": 0.3669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16333557665348053, + "step": 225, + "valid_targets_mean": 2727.2, + "valid_targets_min": 1471 + }, + { + "epoch": 0.41856232939035487, + "grad_norm": 0.6436238397243124, + "learning_rate": 2.3792207792207793e-05, + "loss": 0.377, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19011452794075012, + "step": 230, + "valid_targets_mean": 3853.6, + "valid_targets_min": 327 + }, + { + "epoch": 0.42766151046405826, + "grad_norm": 0.49381503876442573, + "learning_rate": 2.4311688311688314e-05, + "loss": 0.3322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1791185438632965, + "step": 235, + "valid_targets_mean": 6669.8, + "valid_targets_min": 4161 + }, + { + "epoch": 0.4367606915377616, + "grad_norm": 0.7562039996015982, + "learning_rate": 2.4831168831168833e-05, + "loss": 0.3279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1556703746318817, + "step": 240, + "valid_targets_mean": 2839.9, + "valid_targets_min": 690 + }, + { + "epoch": 0.445859872611465, + "grad_norm": 0.6948851833431993, + "learning_rate": 2.535064935064935e-05, + "loss": 0.3472, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16435737907886505, + "step": 245, + "valid_targets_mean": 3262.5, + "valid_targets_min": 1379 + }, + { + "epoch": 0.4549590536851683, + "grad_norm": 0.58823239503998, + "learning_rate": 2.5870129870129873e-05, + "loss": 0.3371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18383200466632843, + "step": 250, + "valid_targets_mean": 4217.0, + "valid_targets_min": 1857 + }, + { + "epoch": 0.4640582347588717, + "grad_norm": 0.7146593511175929, + "learning_rate": 2.638961038961039e-05, + "loss": 0.3456, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20923785865306854, + "step": 255, + "valid_targets_mean": 3851.0, + "valid_targets_min": 2105 + }, + { + "epoch": 0.4731574158325751, + "grad_norm": 0.639172708679333, + "learning_rate": 2.690909090909091e-05, + "loss": 0.3481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16769519448280334, + "step": 260, + "valid_targets_mean": 3705.9, + "valid_targets_min": 1050 + }, + { + "epoch": 0.4822565969062784, + "grad_norm": 0.5819486451804095, + "learning_rate": 2.742857142857143e-05, + "loss": 0.3405, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1527741402387619, + "step": 265, + "valid_targets_mean": 3778.8, + "valid_targets_min": 1858 + }, + { + "epoch": 0.4913557779799818, + "grad_norm": 0.7052096499331427, + "learning_rate": 2.794805194805195e-05, + "loss": 0.3661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16779346764087677, + "step": 270, + "valid_targets_mean": 2560.2, + "valid_targets_min": 979 + }, + { + "epoch": 0.5004549590536852, + "grad_norm": 0.6155479988948123, + "learning_rate": 2.8467532467532472e-05, + "loss": 0.3386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17713932693004608, + "step": 275, + "valid_targets_mean": 4008.5, + "valid_targets_min": 1179 + }, + { + "epoch": 0.5095541401273885, + "grad_norm": 0.7222593319271032, + "learning_rate": 2.898701298701299e-05, + "loss": 0.3627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18285799026489258, + "step": 280, + "valid_targets_mean": 2894.9, + "valid_targets_min": 1384 + }, + { + "epoch": 0.5186533212010919, + "grad_norm": 0.5061744009962925, + "learning_rate": 2.950649350649351e-05, + "loss": 0.3327, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11711747944355011, + "step": 285, + "valid_targets_mean": 4999.4, + "valid_targets_min": 506 + }, + { + "epoch": 0.5277525022747953, + "grad_norm": 0.628458589369126, + "learning_rate": 3.002597402597403e-05, + "loss": 0.3524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20639720559120178, + "step": 290, + "valid_targets_mean": 3854.2, + "valid_targets_min": 1366 + }, + { + "epoch": 0.5368516833484986, + "grad_norm": 0.7356451176164959, + "learning_rate": 3.054545454545455e-05, + "loss": 0.3704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17715367674827576, + "step": 295, + "valid_targets_mean": 2221.9, + "valid_targets_min": 159 + }, + { + "epoch": 0.545950864422202, + "grad_norm": 0.6834528754923809, + "learning_rate": 3.106493506493507e-05, + "loss": 0.3664, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.202093243598938, + "step": 300, + "valid_targets_mean": 3654.0, + "valid_targets_min": 2125 + }, + { + "epoch": 0.5550500454959054, + "grad_norm": 0.644798154871265, + "learning_rate": 3.158441558441559e-05, + "loss": 0.3375, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17941009998321533, + "step": 305, + "valid_targets_mean": 4599.6, + "valid_targets_min": 1266 + }, + { + "epoch": 0.5641492265696088, + "grad_norm": 0.6306840599749488, + "learning_rate": 3.210389610389611e-05, + "loss": 0.3457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1706070601940155, + "step": 310, + "valid_targets_mean": 3753.9, + "valid_targets_min": 867 + }, + { + "epoch": 0.5732484076433121, + "grad_norm": 0.6591743834234475, + "learning_rate": 3.2623376623376626e-05, + "loss": 0.3473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17178833484649658, + "step": 315, + "valid_targets_mean": 3608.0, + "valid_targets_min": 402 + }, + { + "epoch": 0.5823475887170154, + "grad_norm": 0.6834492202812368, + "learning_rate": 3.314285714285715e-05, + "loss": 0.3601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16191285848617554, + "step": 320, + "valid_targets_mean": 2754.0, + "valid_targets_min": 287 + }, + { + "epoch": 0.5914467697907189, + "grad_norm": 0.6218165451057371, + "learning_rate": 3.366233766233766e-05, + "loss": 0.3288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18606127798557281, + "step": 325, + "valid_targets_mean": 4163.6, + "valid_targets_min": 1883 + }, + { + "epoch": 0.6005459508644222, + "grad_norm": 0.6971555869668246, + "learning_rate": 3.4181818181818185e-05, + "loss": 0.3588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1653805673122406, + "step": 330, + "valid_targets_mean": 3056.8, + "valid_targets_min": 286 + }, + { + "epoch": 0.6096451319381255, + "grad_norm": 0.6694568351469594, + "learning_rate": 3.47012987012987e-05, + "loss": 0.3542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18698689341545105, + "step": 335, + "valid_targets_mean": 3503.6, + "valid_targets_min": 352 + }, + { + "epoch": 0.618744313011829, + "grad_norm": 0.7602070220543796, + "learning_rate": 3.522077922077922e-05, + "loss": 0.3574, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18287214636802673, + "step": 340, + "valid_targets_mean": 3039.4, + "valid_targets_min": 902 + }, + { + "epoch": 0.6278434940855323, + "grad_norm": 1.4136824233595235, + "learning_rate": 3.5740259740259743e-05, + "loss": 0.3515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17238737642765045, + "step": 345, + "valid_targets_mean": 2872.9, + "valid_targets_min": 872 + }, + { + "epoch": 0.6369426751592356, + "grad_norm": 0.5035685691094948, + "learning_rate": 3.625974025974026e-05, + "loss": 0.3246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1299327313899994, + "step": 350, + "valid_targets_mean": 3888.1, + "valid_targets_min": 811 + }, + { + "epoch": 0.6460418562329391, + "grad_norm": 0.7509236623969442, + "learning_rate": 3.677922077922078e-05, + "loss": 0.3434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16455626487731934, + "step": 355, + "valid_targets_mean": 2435.2, + "valid_targets_min": 382 + }, + { + "epoch": 0.6551410373066424, + "grad_norm": 0.7267357043550218, + "learning_rate": 3.72987012987013e-05, + "loss": 0.3339, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15680286288261414, + "step": 360, + "valid_targets_mean": 2537.6, + "valid_targets_min": 723 + }, + { + "epoch": 0.6642402183803457, + "grad_norm": 0.5882397350664301, + "learning_rate": 3.7818181818181824e-05, + "loss": 0.3445, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17925706505775452, + "step": 365, + "valid_targets_mean": 3746.2, + "valid_targets_min": 174 + }, + { + "epoch": 0.6733393994540491, + "grad_norm": 0.6701257363851001, + "learning_rate": 3.833766233766234e-05, + "loss": 0.3429, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15250617265701294, + "step": 370, + "valid_targets_mean": 3226.2, + "valid_targets_min": 534 + }, + { + "epoch": 0.6824385805277525, + "grad_norm": 0.7709420665796557, + "learning_rate": 3.885714285714286e-05, + "loss": 0.3363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19574809074401855, + "step": 375, + "valid_targets_mean": 2969.6, + "valid_targets_min": 579 + }, + { + "epoch": 0.6915377616014559, + "grad_norm": 0.7321311016328584, + "learning_rate": 3.937662337662338e-05, + "loss": 0.3474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18352022767066956, + "step": 380, + "valid_targets_mean": 3078.8, + "valid_targets_min": 246 + }, + { + "epoch": 0.7006369426751592, + "grad_norm": 0.5227688266969235, + "learning_rate": 3.98961038961039e-05, + "loss": 0.3135, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1240387111902237, + "step": 385, + "valid_targets_mean": 5729.1, + "valid_targets_min": 148 + }, + { + "epoch": 0.7097361237488626, + "grad_norm": 0.6634696872428075, + "learning_rate": 3.999986847364818e-05, + "loss": 0.3455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14379724860191345, + "step": 390, + "valid_targets_mean": 3921.5, + "valid_targets_min": 2435 + }, + { + "epoch": 0.718835304822566, + "grad_norm": 0.6786231321488037, + "learning_rate": 3.999933415080877e-05, + "loss": 0.3428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15791067481040955, + "step": 395, + "valid_targets_mean": 2961.2, + "valid_targets_min": 1549 + }, + { + "epoch": 0.7279344858962693, + "grad_norm": 0.7042840913170564, + "learning_rate": 3.999838882205719e-05, + "loss": 0.3347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17206677794456482, + "step": 400, + "valid_targets_mean": 4209.1, + "valid_targets_min": 295 + }, + { + "epoch": 0.7370336669699727, + "grad_norm": 0.776618106056019, + "learning_rate": 3.999703250682087e-05, + "loss": 0.3471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1754663586616516, + "step": 405, + "valid_targets_mean": 2778.5, + "valid_targets_min": 513 + }, + { + "epoch": 0.7461328480436761, + "grad_norm": 0.6821666886795281, + "learning_rate": 3.9995265232973414e-05, + "loss": 0.321, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13924285769462585, + "step": 410, + "valid_targets_mean": 3779.0, + "valid_targets_min": 270 + }, + { + "epoch": 0.7552320291173794, + "grad_norm": 0.6028714659678347, + "learning_rate": 3.9993087036834034e-05, + "loss": 0.3242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1615755558013916, + "step": 415, + "valid_targets_mean": 3375.9, + "valid_targets_min": 2078 + }, + { + "epoch": 0.7643312101910829, + "grad_norm": 0.629543345625808, + "learning_rate": 3.9990497963166797e-05, + "loss": 0.3329, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1638343334197998, + "step": 420, + "valid_targets_mean": 3765.9, + "valid_targets_min": 1599 + }, + { + "epoch": 0.7734303912647862, + "grad_norm": 0.6579488775198418, + "learning_rate": 3.99874980651797e-05, + "loss": 0.3283, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14738169312477112, + "step": 425, + "valid_targets_mean": 3576.5, + "valid_targets_min": 2775 + }, + { + "epoch": 0.7825295723384895, + "grad_norm": 0.6561952681802788, + "learning_rate": 3.998408740452359e-05, + "loss": 0.3288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16806158423423767, + "step": 430, + "valid_targets_mean": 3370.6, + "valid_targets_min": 388 + }, + { + "epoch": 0.7916287534121929, + "grad_norm": 0.7592724078668787, + "learning_rate": 3.998026605129088e-05, + "loss": 0.342, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21901732683181763, + "step": 435, + "valid_targets_mean": 3533.4, + "valid_targets_min": 433 + }, + { + "epoch": 0.8007279344858963, + "grad_norm": 0.7391353010655607, + "learning_rate": 3.997603408401413e-05, + "loss": 0.3378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1867692768573761, + "step": 440, + "valid_targets_mean": 4258.1, + "valid_targets_min": 2031 + }, + { + "epoch": 0.8098271155595996, + "grad_norm": 0.7795273567250613, + "learning_rate": 3.997139158966441e-05, + "loss": 0.3335, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13619282841682434, + "step": 445, + "valid_targets_mean": 2759.1, + "valid_targets_min": 1005 + }, + { + "epoch": 0.818926296633303, + "grad_norm": 0.6562343920190629, + "learning_rate": 3.996633866364953e-05, + "loss": 0.3274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18688486516475677, + "step": 450, + "valid_targets_mean": 4256.5, + "valid_targets_min": 2352 + }, + { + "epoch": 0.8280254777070064, + "grad_norm": 0.6567770384652188, + "learning_rate": 3.996087540981206e-05, + "loss": 0.3392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1674097329378128, + "step": 455, + "valid_targets_mean": 3862.6, + "valid_targets_min": 1844 + }, + { + "epoch": 0.8371246587807097, + "grad_norm": 0.43691333266562116, + "learning_rate": 3.9955001940427236e-05, + "loss": 0.3385, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13044530153274536, + "step": 460, + "valid_targets_mean": 7291.9, + "valid_targets_min": 1771 + }, + { + "epoch": 0.8462238398544131, + "grad_norm": 0.5118948109279933, + "learning_rate": 3.99487183762006e-05, + "loss": 0.308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12879431247711182, + "step": 465, + "valid_targets_mean": 4598.5, + "valid_targets_min": 776 + }, + { + "epoch": 0.8553230209281165, + "grad_norm": 0.8074499299412539, + "learning_rate": 3.994202484626555e-05, + "loss": 0.3267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14642921090126038, + "step": 470, + "valid_targets_mean": 2749.0, + "valid_targets_min": 1419 + }, + { + "epoch": 0.8644222020018199, + "grad_norm": 0.46974985023916216, + "learning_rate": 3.993492148818069e-05, + "loss": 0.3302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10927803814411163, + "step": 475, + "valid_targets_mean": 3984.2, + "valid_targets_min": 2741 + }, + { + "epoch": 0.8735213830755232, + "grad_norm": 0.5801889932724914, + "learning_rate": 3.992740844792699e-05, + "loss": 0.3269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16290982067584991, + "step": 480, + "valid_targets_mean": 3765.0, + "valid_targets_min": 2484 + }, + { + "epoch": 0.8826205641492265, + "grad_norm": 0.7885165014083056, + "learning_rate": 3.991948587990479e-05, + "loss": 0.338, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1817663013935089, + "step": 485, + "valid_targets_mean": 3399.4, + "valid_targets_min": 332 + }, + { + "epoch": 0.89171974522293, + "grad_norm": 0.6491169797538565, + "learning_rate": 3.991115394693061e-05, + "loss": 0.3351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18687278032302856, + "step": 490, + "valid_targets_mean": 4333.0, + "valid_targets_min": 3067 + }, + { + "epoch": 0.9008189262966333, + "grad_norm": 0.6388245302934621, + "learning_rate": 3.990241282023385e-05, + "loss": 0.3066, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.171747088432312, + "step": 495, + "valid_targets_mean": 3816.8, + "valid_targets_min": 1782 + }, + { + "epoch": 0.9099181073703366, + "grad_norm": 0.9397629711325225, + "learning_rate": 3.989326267945323e-05, + "loss": 0.3338, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1524638682603836, + "step": 500, + "valid_targets_mean": 2860.0, + "valid_targets_min": 1797 + }, + { + "epoch": 0.9190172884440401, + "grad_norm": 0.5993283056073379, + "learning_rate": 3.98837037126331e-05, + "loss": 0.3366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1285330355167389, + "step": 505, + "valid_targets_mean": 3083.9, + "valid_targets_min": 344 + }, + { + "epoch": 0.9281164695177434, + "grad_norm": 0.6429169438989089, + "learning_rate": 3.98737361162196e-05, + "loss": 0.349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15727660059928894, + "step": 510, + "valid_targets_mean": 3575.6, + "valid_targets_min": 590 + }, + { + "epoch": 0.9372156505914467, + "grad_norm": 0.6432681607328898, + "learning_rate": 3.986336009505659e-05, + "loss": 0.3, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.134327232837677, + "step": 515, + "valid_targets_mean": 2672.4, + "valid_targets_min": 378 + }, + { + "epoch": 0.9463148316651502, + "grad_norm": 0.5634556848468228, + "learning_rate": 3.985257586238149e-05, + "loss": 0.3458, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1891605108976364, + "step": 520, + "valid_targets_mean": 4462.9, + "valid_targets_min": 2757 + }, + { + "epoch": 0.9554140127388535, + "grad_norm": 0.7126742924631625, + "learning_rate": 3.984138363982084e-05, + "loss": 0.3403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16645973920822144, + "step": 525, + "valid_targets_mean": 2579.2, + "valid_targets_min": 303 + }, + { + "epoch": 0.9645131938125568, + "grad_norm": 0.5297879582892905, + "learning_rate": 3.982978365738578e-05, + "loss": 0.3148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10891679674386978, + "step": 530, + "valid_targets_mean": 4010.6, + "valid_targets_min": 2391 + }, + { + "epoch": 0.9736123748862603, + "grad_norm": 0.6474325926954897, + "learning_rate": 3.981777615346731e-05, + "loss": 0.3209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1598714292049408, + "step": 535, + "valid_targets_mean": 3160.5, + "valid_targets_min": 1483 + }, + { + "epoch": 0.9827115559599636, + "grad_norm": 0.6380981419738545, + "learning_rate": 3.980536137483141e-05, + "loss": 0.3326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15230050683021545, + "step": 540, + "valid_targets_mean": 3283.4, + "valid_targets_min": 418 + }, + { + "epoch": 0.991810737033667, + "grad_norm": 0.6228267400147609, + "learning_rate": 3.9792539576613934e-05, + "loss": 0.3276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1523316502571106, + "step": 545, + "valid_targets_mean": 2762.2, + "valid_targets_min": 988 + }, + { + "epoch": 1.0, + "grad_norm": 0.7703107249289565, + "learning_rate": 3.9779311022315405e-05, + "loss": 0.334, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3257380425930023, + "step": 550, + "valid_targets_mean": 4005.5, + "valid_targets_min": 552 + }, + { + "epoch": 1.0090991810737033, + "grad_norm": 0.6564463452360433, + "learning_rate": 3.976567598379558e-05, + "loss": 0.3199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16916556656360626, + "step": 555, + "valid_targets_mean": 3139.6, + "valid_targets_min": 557 + }, + { + "epoch": 1.0181983621474067, + "grad_norm": 0.6316598177475052, + "learning_rate": 3.975163474126785e-05, + "loss": 0.314, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1529175043106079, + "step": 560, + "valid_targets_mean": 3807.1, + "valid_targets_min": 748 + }, + { + "epoch": 1.02729754322111, + "grad_norm": 0.5512261147650673, + "learning_rate": 3.9737187583293505e-05, + "loss": 0.302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11598987877368927, + "step": 565, + "valid_targets_mean": 3748.8, + "valid_targets_min": 1146 + }, + { + "epoch": 1.0363967242948136, + "grad_norm": 0.5664752755228538, + "learning_rate": 3.9722334806775806e-05, + "loss": 0.3217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12348569184541702, + "step": 570, + "valid_targets_mean": 3140.4, + "valid_targets_min": 1894 + }, + { + "epoch": 1.0454959053685169, + "grad_norm": 0.6383017456860482, + "learning_rate": 3.9707076716953866e-05, + "loss": 0.3028, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15872514247894287, + "step": 575, + "valid_targets_mean": 4805.6, + "valid_targets_min": 1894 + }, + { + "epoch": 1.0545950864422202, + "grad_norm": 0.5375002734811841, + "learning_rate": 3.969141362739636e-05, + "loss": 0.2955, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1593000888824463, + "step": 580, + "valid_targets_mean": 4595.2, + "valid_targets_min": 2395 + }, + { + "epoch": 1.0636942675159236, + "grad_norm": 0.5566540119334547, + "learning_rate": 3.967534585999515e-05, + "loss": 0.3154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12608671188354492, + "step": 585, + "valid_targets_mean": 3549.9, + "valid_targets_min": 402 + }, + { + "epoch": 1.0727934485896269, + "grad_norm": 0.5858299909987527, + "learning_rate": 3.965887374495859e-05, + "loss": 0.2855, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15359722077846527, + "step": 590, + "valid_targets_mean": 4215.9, + "valid_targets_min": 501 + }, + { + "epoch": 1.0818926296633302, + "grad_norm": 0.5380275782190423, + "learning_rate": 3.964199762080478e-05, + "loss": 0.3086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1915718913078308, + "step": 595, + "valid_targets_mean": 5193.9, + "valid_targets_min": 1867 + }, + { + "epoch": 1.0909918107370338, + "grad_norm": 1.3584511182550558, + "learning_rate": 3.9624717834354606e-05, + "loss": 0.3112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17153777182102203, + "step": 600, + "valid_targets_mean": 3738.6, + "valid_targets_min": 1454 + }, + { + "epoch": 1.100090991810737, + "grad_norm": 0.559010500557733, + "learning_rate": 3.9607034740724615e-05, + "loss": 0.3046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11709210276603699, + "step": 605, + "valid_targets_mean": 3672.4, + "valid_targets_min": 388 + }, + { + "epoch": 1.1091901728844404, + "grad_norm": 0.6702458849968536, + "learning_rate": 3.958894870331971e-05, + "loss": 0.3029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15884870290756226, + "step": 610, + "valid_targets_mean": 3277.5, + "valid_targets_min": 1322 + }, + { + "epoch": 1.1182893539581438, + "grad_norm": 0.6566718317921177, + "learning_rate": 3.9570460093825664e-05, + "loss": 0.3032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13488319516181946, + "step": 615, + "valid_targets_mean": 3118.4, + "valid_targets_min": 991 + }, + { + "epoch": 1.127388535031847, + "grad_norm": 0.5392787236116686, + "learning_rate": 3.9551569292201536e-05, + "loss": 0.3105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12678071856498718, + "step": 620, + "valid_targets_mean": 5099.2, + "valid_targets_min": 2259 + }, + { + "epoch": 1.1364877161055504, + "grad_norm": 0.5439955061560059, + "learning_rate": 3.9532276686671804e-05, + "loss": 0.2998, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17293554544448853, + "step": 625, + "valid_targets_mean": 5169.0, + "valid_targets_min": 858 + }, + { + "epoch": 1.1455868971792538, + "grad_norm": 0.5854445058276169, + "learning_rate": 3.951258267371841e-05, + "loss": 0.3012, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.132243812084198, + "step": 630, + "valid_targets_mean": 2877.4, + "valid_targets_min": 1308 + }, + { + "epoch": 1.1546860782529573, + "grad_norm": 0.5041148775156069, + "learning_rate": 3.9492487658072615e-05, + "loss": 0.3091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09935662150382996, + "step": 635, + "valid_targets_mean": 3389.1, + "valid_targets_min": 690 + }, + { + "epoch": 1.1637852593266607, + "grad_norm": 0.44735914114363684, + "learning_rate": 3.947199205270668e-05, + "loss": 0.2936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10296198725700378, + "step": 640, + "valid_targets_mean": 5112.4, + "valid_targets_min": 1001 + }, + { + "epoch": 1.172884440400364, + "grad_norm": 0.5201900009933367, + "learning_rate": 3.9451096278825386e-05, + "loss": 0.2938, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16323642432689667, + "step": 645, + "valid_targets_mean": 5439.1, + "valid_targets_min": 2591 + }, + { + "epoch": 1.1819836214740673, + "grad_norm": 0.5540042968486397, + "learning_rate": 3.942980076585735e-05, + "loss": 0.2944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1331024318933487, + "step": 650, + "valid_targets_mean": 4330.9, + "valid_targets_min": 1426 + }, + { + "epoch": 1.1910828025477707, + "grad_norm": 0.667990204314806, + "learning_rate": 3.940810595144624e-05, + "loss": 0.3258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17009712755680084, + "step": 655, + "valid_targets_mean": 4543.8, + "valid_targets_min": 2084 + }, + { + "epoch": 1.200181983621474, + "grad_norm": 0.5266945348745354, + "learning_rate": 3.938601228144173e-05, + "loss": 0.3017, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15906137228012085, + "step": 660, + "valid_targets_mean": 4344.6, + "valid_targets_min": 2687 + }, + { + "epoch": 1.2092811646951773, + "grad_norm": 0.7452722762074235, + "learning_rate": 3.9363520209890405e-05, + "loss": 0.3122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17923469841480255, + "step": 665, + "valid_targets_mean": 3565.0, + "valid_targets_min": 1291 + }, + { + "epoch": 1.2183803457688809, + "grad_norm": 0.5906857173983012, + "learning_rate": 3.9340630199026365e-05, + "loss": 0.3121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14141051471233368, + "step": 670, + "valid_targets_mean": 3291.1, + "valid_targets_min": 1233 + }, + { + "epoch": 1.2274795268425842, + "grad_norm": 0.6496938598519507, + "learning_rate": 3.931734271926176e-05, + "loss": 0.2952, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15126341581344604, + "step": 675, + "valid_targets_mean": 3045.4, + "valid_targets_min": 236 + }, + { + "epoch": 1.2365787079162875, + "grad_norm": 0.5486756887499763, + "learning_rate": 3.929365824917712e-05, + "loss": 0.3211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12430839240550995, + "step": 680, + "valid_targets_mean": 3235.9, + "valid_targets_min": 293 + }, + { + "epoch": 1.2456778889899909, + "grad_norm": 0.6798422752091549, + "learning_rate": 3.9269577275511504e-05, + "loss": 0.2986, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16678652167320251, + "step": 685, + "valid_targets_mean": 3219.2, + "valid_targets_min": 1768 + }, + { + "epoch": 1.2547770700636942, + "grad_norm": 0.5514632289376944, + "learning_rate": 3.924510029315253e-05, + "loss": 0.2955, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14286918938159943, + "step": 690, + "valid_targets_mean": 4353.2, + "valid_targets_min": 640 + }, + { + "epoch": 1.2638762511373978, + "grad_norm": 0.5624165020198547, + "learning_rate": 3.922022780512614e-05, + "loss": 0.3092, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14001207053661346, + "step": 695, + "valid_targets_mean": 3567.9, + "valid_targets_min": 175 + }, + { + "epoch": 1.2729754322111009, + "grad_norm": 0.7326351561063902, + "learning_rate": 3.919496032258637e-05, + "loss": 0.2925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17794157564640045, + "step": 700, + "valid_targets_mean": 4092.0, + "valid_targets_min": 3099 + }, + { + "epoch": 1.2820746132848044, + "grad_norm": 0.6387174425772494, + "learning_rate": 3.9169298364804716e-05, + "loss": 0.2998, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18912354111671448, + "step": 705, + "valid_targets_mean": 3796.1, + "valid_targets_min": 1231 + }, + { + "epoch": 1.2911737943585078, + "grad_norm": 0.6358201723562807, + "learning_rate": 3.914324245915956e-05, + "loss": 0.3088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17825400829315186, + "step": 710, + "valid_targets_mean": 3973.0, + "valid_targets_min": 1168 + }, + { + "epoch": 1.300272975432211, + "grad_norm": 0.7306352957917734, + "learning_rate": 3.91167931411253e-05, + "loss": 0.3107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13277125358581543, + "step": 715, + "valid_targets_mean": 3033.8, + "valid_targets_min": 544 + }, + { + "epoch": 1.3093721565059144, + "grad_norm": 0.5766296412620744, + "learning_rate": 3.908995095426134e-05, + "loss": 0.3044, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13581085205078125, + "step": 720, + "valid_targets_mean": 3316.8, + "valid_targets_min": 1310 + }, + { + "epoch": 1.3184713375796178, + "grad_norm": 0.8249621873775245, + "learning_rate": 3.90627164502009e-05, + "loss": 0.2943, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1622616946697235, + "step": 725, + "valid_targets_mean": 4480.9, + "valid_targets_min": 2230 + }, + { + "epoch": 1.3275705186533213, + "grad_norm": 0.6193536191489676, + "learning_rate": 3.903509018863974e-05, + "loss": 0.3189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17137649655342102, + "step": 730, + "valid_targets_mean": 3858.5, + "valid_targets_min": 1147 + }, + { + "epoch": 1.3366696997270244, + "grad_norm": 0.6532042961114637, + "learning_rate": 3.90070727373246e-05, + "loss": 0.3137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1350535899400711, + "step": 735, + "valid_targets_mean": 2866.0, + "valid_targets_min": 493 + }, + { + "epoch": 1.345768880800728, + "grad_norm": 0.4861900826239269, + "learning_rate": 3.897866467204155e-05, + "loss": 0.3039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11634202301502228, + "step": 740, + "valid_targets_mean": 2838.1, + "valid_targets_min": 304 + }, + { + "epoch": 1.3548680618744313, + "grad_norm": 0.713209570991044, + "learning_rate": 3.894986657660418e-05, + "loss": 0.3086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19738173484802246, + "step": 745, + "valid_targets_mean": 3062.8, + "valid_targets_min": 166 + }, + { + "epoch": 1.3639672429481347, + "grad_norm": 0.6296705554955353, + "learning_rate": 3.892067904284154e-05, + "loss": 0.2966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12983810901641846, + "step": 750, + "valid_targets_mean": 2600.9, + "valid_targets_min": 241 + }, + { + "epoch": 1.373066424021838, + "grad_norm": 0.5361889757739225, + "learning_rate": 3.889110267058608e-05, + "loss": 0.3042, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15422046184539795, + "step": 755, + "valid_targets_mean": 4254.5, + "valid_targets_min": 2720 + }, + { + "epoch": 1.3821656050955413, + "grad_norm": 0.5803866643895204, + "learning_rate": 3.886113806766121e-05, + "loss": 0.3039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14216673374176025, + "step": 760, + "valid_targets_mean": 3402.6, + "valid_targets_min": 601 + }, + { + "epoch": 1.3912647861692449, + "grad_norm": 0.5729756325028772, + "learning_rate": 3.883078584986888e-05, + "loss": 0.3024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13551153242588043, + "step": 765, + "valid_targets_mean": 3336.5, + "valid_targets_min": 2241 + }, + { + "epoch": 1.4003639672429482, + "grad_norm": 0.6263881007967305, + "learning_rate": 3.8800046640976916e-05, + "loss": 0.3073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15259312093257904, + "step": 770, + "valid_targets_mean": 3664.9, + "valid_targets_min": 1487 + }, + { + "epoch": 1.4094631483166515, + "grad_norm": 0.6180611235058632, + "learning_rate": 3.876892107270616e-05, + "loss": 0.3087, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12695425748825073, + "step": 775, + "valid_targets_mean": 3159.2, + "valid_targets_min": 379 + }, + { + "epoch": 1.4185623293903549, + "grad_norm": 0.559623144458965, + "learning_rate": 3.873740978471755e-05, + "loss": 0.3101, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18561246991157532, + "step": 780, + "valid_targets_mean": 4742.5, + "valid_targets_min": 965 + }, + { + "epoch": 1.4276615104640582, + "grad_norm": 0.4243966512830656, + "learning_rate": 3.8705513424598934e-05, + "loss": 0.3037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10521087050437927, + "step": 785, + "valid_targets_mean": 3032.1, + "valid_targets_min": 1392 + }, + { + "epoch": 1.4367606915377615, + "grad_norm": 0.624333407768036, + "learning_rate": 3.8673232647851756e-05, + "loss": 0.3022, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1458563506603241, + "step": 790, + "valid_targets_mean": 3420.6, + "valid_targets_min": 346 + }, + { + "epoch": 1.4458598726114649, + "grad_norm": 0.5646564966792745, + "learning_rate": 3.8640568117877594e-05, + "loss": 0.2825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1571834534406662, + "step": 795, + "valid_targets_mean": 4051.5, + "valid_targets_min": 2398 + }, + { + "epoch": 1.4549590536851684, + "grad_norm": 0.5670849272463157, + "learning_rate": 3.8607520505964574e-05, + "loss": 0.3015, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1635785698890686, + "step": 800, + "valid_targets_mean": 4177.8, + "valid_targets_min": 2180 + }, + { + "epoch": 1.4640582347588718, + "grad_norm": 0.6420218837308556, + "learning_rate": 3.857409049127348e-05, + "loss": 0.3207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15793652832508087, + "step": 805, + "valid_targets_mean": 3131.4, + "valid_targets_min": 1480 + }, + { + "epoch": 1.473157415832575, + "grad_norm": 0.6343008579526399, + "learning_rate": 3.8540278760823866e-05, + "loss": 0.3036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16844598948955536, + "step": 810, + "valid_targets_mean": 3513.1, + "valid_targets_min": 335 + }, + { + "epoch": 1.4822565969062784, + "grad_norm": 0.548504727537003, + "learning_rate": 3.8506086009479934e-05, + "loss": 0.2996, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13627293705940247, + "step": 815, + "valid_targets_mean": 3978.2, + "valid_targets_min": 2587 + }, + { + "epoch": 1.4913557779799818, + "grad_norm": 0.6522144344848967, + "learning_rate": 3.8471512939936224e-05, + "loss": 0.2944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1826554834842682, + "step": 820, + "valid_targets_mean": 3473.9, + "valid_targets_min": 891 + }, + { + "epoch": 1.5004549590536853, + "grad_norm": 0.47235032448464315, + "learning_rate": 3.843656026270319e-05, + "loss": 0.2899, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10905256867408752, + "step": 825, + "valid_targets_mean": 4995.2, + "valid_targets_min": 255 + }, + { + "epoch": 1.5095541401273884, + "grad_norm": 0.5919677581523133, + "learning_rate": 3.840122869609258e-05, + "loss": 0.2962, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1688118577003479, + "step": 830, + "valid_targets_mean": 3939.0, + "valid_targets_min": 2195 + }, + { + "epoch": 1.518653321201092, + "grad_norm": 0.6120851821796398, + "learning_rate": 3.8365518966202724e-05, + "loss": 0.2793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1763344705104828, + "step": 835, + "valid_targets_mean": 3543.9, + "valid_targets_min": 1824 + }, + { + "epoch": 1.5277525022747953, + "grad_norm": 0.5494270450729506, + "learning_rate": 3.832943180690356e-05, + "loss": 0.2836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10270395129919052, + "step": 840, + "valid_targets_mean": 3121.2, + "valid_targets_min": 469 + }, + { + "epoch": 1.5368516833484986, + "grad_norm": 0.668549792242741, + "learning_rate": 3.829296795982156e-05, + "loss": 0.303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1624075323343277, + "step": 845, + "valid_targets_mean": 3347.9, + "valid_targets_min": 1757 + }, + { + "epoch": 1.545950864422202, + "grad_norm": 0.6025824994023757, + "learning_rate": 3.8256128174324515e-05, + "loss": 0.292, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13117438554763794, + "step": 850, + "valid_targets_mean": 3289.5, + "valid_targets_min": 2407 + }, + { + "epoch": 1.5550500454959053, + "grad_norm": 1.5587470663669738, + "learning_rate": 3.82189132075061e-05, + "loss": 0.2833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17107094824314117, + "step": 855, + "valid_targets_mean": 3623.4, + "valid_targets_min": 457 + }, + { + "epoch": 1.5641492265696089, + "grad_norm": 0.665859516324618, + "learning_rate": 3.818132382417037e-05, + "loss": 0.2962, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16790863871574402, + "step": 860, + "valid_targets_mean": 3379.4, + "valid_targets_min": 352 + }, + { + "epoch": 1.573248407643312, + "grad_norm": 0.6963496071560168, + "learning_rate": 3.8143360796815964e-05, + "loss": 0.3039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16974878311157227, + "step": 865, + "valid_targets_mean": 3827.0, + "valid_targets_min": 1539 + }, + { + "epoch": 1.5823475887170155, + "grad_norm": 0.41729879716752555, + "learning_rate": 3.81050249056203e-05, + "loss": 0.2879, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11725392192602158, + "step": 870, + "valid_targets_mean": 7242.0, + "valid_targets_min": 1288 + }, + { + "epoch": 1.5914467697907189, + "grad_norm": 0.5866727554490438, + "learning_rate": 3.8066316938423495e-05, + "loss": 0.2997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20750758051872253, + "step": 875, + "valid_targets_mean": 5255.9, + "valid_targets_min": 2124 + }, + { + "epoch": 1.6005459508644222, + "grad_norm": 0.5863686910007585, + "learning_rate": 3.8027237690712206e-05, + "loss": 0.3098, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10066068172454834, + "step": 880, + "valid_targets_mean": 2762.9, + "valid_targets_min": 1092 + }, + { + "epoch": 1.6096451319381255, + "grad_norm": 0.6167527707800907, + "learning_rate": 3.798778796560326e-05, + "loss": 0.2901, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16851675510406494, + "step": 885, + "valid_targets_mean": 3696.9, + "valid_targets_min": 1240 + }, + { + "epoch": 1.6187443130118289, + "grad_norm": 0.6924917088244582, + "learning_rate": 3.794796857382717e-05, + "loss": 0.2978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13787582516670227, + "step": 890, + "valid_targets_mean": 2862.2, + "valid_targets_min": 1659 + }, + { + "epoch": 1.6278434940855324, + "grad_norm": 0.5789838916428676, + "learning_rate": 3.790778033371145e-05, + "loss": 0.3023, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11418955028057098, + "step": 895, + "valid_targets_mean": 3481.0, + "valid_targets_min": 348 + }, + { + "epoch": 1.6369426751592355, + "grad_norm": 0.7106975900014304, + "learning_rate": 3.786722407116379e-05, + "loss": 0.3085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12953606247901917, + "step": 900, + "valid_targets_mean": 2206.0, + "valid_targets_min": 312 + }, + { + "epoch": 1.646041856232939, + "grad_norm": 0.6336291450885424, + "learning_rate": 3.782630061965515e-05, + "loss": 0.3064, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15805183351039886, + "step": 905, + "valid_targets_mean": 3122.5, + "valid_targets_min": 1578 + }, + { + "epoch": 1.6551410373066424, + "grad_norm": 0.483971331402685, + "learning_rate": 3.778501082020255e-05, + "loss": 0.261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09273171424865723, + "step": 910, + "valid_targets_mean": 3567.9, + "valid_targets_min": 1520 + }, + { + "epoch": 1.6642402183803457, + "grad_norm": 0.5149368407010124, + "learning_rate": 3.7743355521351814e-05, + "loss": 0.2758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11179894208908081, + "step": 915, + "valid_targets_mean": 5487.8, + "valid_targets_min": 2348 + }, + { + "epoch": 1.673339399454049, + "grad_norm": 0.5917668119100274, + "learning_rate": 3.7701335579160147e-05, + "loss": 0.2961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1767260581254959, + "step": 920, + "valid_targets_mean": 4154.2, + "valid_targets_min": 2792 + }, + { + "epoch": 1.6824385805277524, + "grad_norm": 0.7652973549943375, + "learning_rate": 3.7658951857178544e-05, + "loss": 0.3086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17313481867313385, + "step": 925, + "valid_targets_mean": 3225.0, + "valid_targets_min": 1253 + }, + { + "epoch": 1.691537761601456, + "grad_norm": 0.5994539780074473, + "learning_rate": 3.7616205226434005e-05, + "loss": 0.2772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1413244605064392, + "step": 930, + "valid_targets_mean": 3994.2, + "valid_targets_min": 2883 + }, + { + "epoch": 1.700636942675159, + "grad_norm": 0.5395727379224268, + "learning_rate": 3.7573096565411694e-05, + "loss": 0.2902, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13815945386886597, + "step": 935, + "valid_targets_mean": 4574.5, + "valid_targets_min": 3154 + }, + { + "epoch": 1.7097361237488626, + "grad_norm": 0.598304083632461, + "learning_rate": 3.7529626760036814e-05, + "loss": 0.3026, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13133779168128967, + "step": 940, + "valid_targets_mean": 3710.1, + "valid_targets_min": 1871 + }, + { + "epoch": 1.718835304822566, + "grad_norm": 0.6169878314374032, + "learning_rate": 3.7485796703656475e-05, + "loss": 0.301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19230428338050842, + "step": 945, + "valid_targets_mean": 4477.2, + "valid_targets_min": 2484 + }, + { + "epoch": 1.7279344858962693, + "grad_norm": 0.6494778619925592, + "learning_rate": 3.7441607297021254e-05, + "loss": 0.3127, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18376144766807556, + "step": 950, + "valid_targets_mean": 3503.4, + "valid_targets_min": 689 + }, + { + "epoch": 1.7370336669699729, + "grad_norm": 0.5287904809918028, + "learning_rate": 3.7397059448266786e-05, + "loss": 0.2877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15362298488616943, + "step": 955, + "valid_targets_mean": 4776.2, + "valid_targets_min": 2511 + }, + { + "epoch": 1.746132848043676, + "grad_norm": 0.6494659791119134, + "learning_rate": 3.735215407289498e-05, + "loss": 0.2944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18310752511024475, + "step": 960, + "valid_targets_mean": 4014.9, + "valid_targets_min": 1611 + }, + { + "epoch": 1.7552320291173795, + "grad_norm": 0.5731006048795201, + "learning_rate": 3.730689209375533e-05, + "loss": 0.3011, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18742819130420685, + "step": 965, + "valid_targets_mean": 4962.2, + "valid_targets_min": 2034 + }, + { + "epoch": 1.7643312101910829, + "grad_norm": 0.5506132886158287, + "learning_rate": 3.726127444102583e-05, + "loss": 0.2814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15207810699939728, + "step": 970, + "valid_targets_mean": 4009.9, + "valid_targets_min": 1103 + }, + { + "epoch": 1.7734303912647862, + "grad_norm": 0.4278231884632061, + "learning_rate": 3.721530205219395e-05, + "loss": 0.271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10765276849269867, + "step": 975, + "valid_targets_mean": 5876.9, + "valid_targets_min": 283 + }, + { + "epoch": 1.7825295723384895, + "grad_norm": 0.5432370249962426, + "learning_rate": 3.716897587203733e-05, + "loss": 0.2907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15847131609916687, + "step": 980, + "valid_targets_mean": 4284.6, + "valid_targets_min": 2093 + }, + { + "epoch": 1.7916287534121929, + "grad_norm": 0.5409964771591507, + "learning_rate": 3.712229685260434e-05, + "loss": 0.3063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1669999659061432, + "step": 985, + "valid_targets_mean": 4959.6, + "valid_targets_min": 1444 + }, + { + "epoch": 1.8007279344858964, + "grad_norm": 0.5124178526933512, + "learning_rate": 3.707526595319459e-05, + "loss": 0.2958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14999991655349731, + "step": 990, + "valid_targets_mean": 4784.5, + "valid_targets_min": 1865 + }, + { + "epoch": 1.8098271155595995, + "grad_norm": 0.48227382336098407, + "learning_rate": 3.7027884140339144e-05, + "loss": 0.2965, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12232952564954758, + "step": 995, + "valid_targets_mean": 3921.4, + "valid_targets_min": 1567 + }, + { + "epoch": 1.818926296633303, + "grad_norm": 0.5838105781492224, + "learning_rate": 3.698015238778066e-05, + "loss": 0.2801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12059933692216873, + "step": 1000, + "valid_targets_mean": 2937.2, + "valid_targets_min": 281 + }, + { + "epoch": 1.8280254777070064, + "grad_norm": 0.5443419726188922, + "learning_rate": 3.693207167645344e-05, + "loss": 0.2814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13547688722610474, + "step": 1005, + "valid_targets_mean": 3484.6, + "valid_targets_min": 1610 + }, + { + "epoch": 1.8371246587807097, + "grad_norm": 0.6629227757168609, + "learning_rate": 3.6883642994463194e-05, + "loss": 0.2925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14396211504936218, + "step": 1010, + "valid_targets_mean": 2764.6, + "valid_targets_min": 1192 + }, + { + "epoch": 1.846223839854413, + "grad_norm": 0.5769994708000856, + "learning_rate": 3.6834867337066805e-05, + "loss": 0.2974, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15609201788902283, + "step": 1015, + "valid_targets_mean": 4123.5, + "valid_targets_min": 1895 + }, + { + "epoch": 1.8553230209281164, + "grad_norm": 0.626324397009088, + "learning_rate": 3.678574570665181e-05, + "loss": 0.2836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11908911168575287, + "step": 1020, + "valid_targets_mean": 2649.9, + "valid_targets_min": 1653 + }, + { + "epoch": 1.86442220200182, + "grad_norm": 0.633036325528964, + "learning_rate": 3.673627911271586e-05, + "loss": 0.2953, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11095995455980301, + "step": 1025, + "valid_targets_mean": 3540.4, + "valid_targets_min": 1594 + }, + { + "epoch": 1.873521383075523, + "grad_norm": 0.6333066712978032, + "learning_rate": 3.668646857184591e-05, + "loss": 0.3157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15657323598861694, + "step": 1030, + "valid_targets_mean": 2872.5, + "valid_targets_min": 969 + }, + { + "epoch": 1.8826205641492266, + "grad_norm": 0.5660080861885046, + "learning_rate": 3.663631510769739e-05, + "loss": 0.3093, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1922149509191513, + "step": 1035, + "valid_targets_mean": 4844.6, + "valid_targets_min": 1568 + }, + { + "epoch": 1.89171974522293, + "grad_norm": 0.7412294025780711, + "learning_rate": 3.658581975097311e-05, + "loss": 0.3016, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17560440301895142, + "step": 1040, + "valid_targets_mean": 3082.6, + "valid_targets_min": 330 + }, + { + "epoch": 1.9008189262966333, + "grad_norm": 0.555376644130959, + "learning_rate": 3.653498353940215e-05, + "loss": 0.2961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16862821578979492, + "step": 1045, + "valid_targets_mean": 3799.9, + "valid_targets_min": 1053 + }, + { + "epoch": 1.9099181073703366, + "grad_norm": 0.5686697582781365, + "learning_rate": 3.648380751771846e-05, + "loss": 0.2948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18659132719039917, + "step": 1050, + "valid_targets_mean": 4709.6, + "valid_targets_min": 2665 + }, + { + "epoch": 1.91901728844404, + "grad_norm": 0.5600510837685639, + "learning_rate": 3.6432292737639426e-05, + "loss": 0.2813, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15304958820343018, + "step": 1055, + "valid_targets_mean": 4016.4, + "valid_targets_min": 2083 + }, + { + "epoch": 1.9281164695177435, + "grad_norm": 0.5777594563764268, + "learning_rate": 3.638044025784425e-05, + "loss": 0.2946, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14716693758964539, + "step": 1060, + "valid_targets_mean": 3776.5, + "valid_targets_min": 282 + }, + { + "epoch": 1.9372156505914466, + "grad_norm": 0.7107526464676869, + "learning_rate": 3.63282511439522e-05, + "loss": 0.2967, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15589915215969086, + "step": 1065, + "valid_targets_mean": 2547.8, + "valid_targets_min": 503 + }, + { + "epoch": 1.9463148316651502, + "grad_norm": 0.6533274131286898, + "learning_rate": 3.627572646850069e-05, + "loss": 0.296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12605586647987366, + "step": 1070, + "valid_targets_mean": 2884.5, + "valid_targets_min": 184 + }, + { + "epoch": 1.9554140127388535, + "grad_norm": 0.5095962807954009, + "learning_rate": 3.6222867310923296e-05, + "loss": 0.3, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13677296042442322, + "step": 1075, + "valid_targets_mean": 4426.5, + "valid_targets_min": 1530 + }, + { + "epoch": 1.9645131938125568, + "grad_norm": 0.5431060924066419, + "learning_rate": 3.6169674757527466e-05, + "loss": 0.2869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13465212285518646, + "step": 1080, + "valid_targets_mean": 3468.8, + "valid_targets_min": 1512 + }, + { + "epoch": 1.9736123748862604, + "grad_norm": 0.508309995874022, + "learning_rate": 3.61161499014723e-05, + "loss": 0.2896, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1512061208486557, + "step": 1085, + "valid_targets_mean": 4771.5, + "valid_targets_min": 3037 + }, + { + "epoch": 1.9827115559599635, + "grad_norm": 0.563426896365882, + "learning_rate": 3.606229384274604e-05, + "loss": 0.2915, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1543359011411667, + "step": 1090, + "valid_targets_mean": 4113.5, + "valid_targets_min": 1392 + }, + { + "epoch": 1.991810737033667, + "grad_norm": 0.5744094191621176, + "learning_rate": 3.600810768814345e-05, + "loss": 0.2972, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1515529453754425, + "step": 1095, + "valid_targets_mean": 4293.8, + "valid_targets_min": 1114 + }, + { + "epoch": 2.0, + "grad_norm": 0.7949063431983748, + "learning_rate": 3.595359255124311e-05, + "loss": 0.3044, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3090308606624603, + "step": 1100, + "valid_targets_mean": 4015.4, + "valid_targets_min": 2457 + }, + { + "epoch": 2.0090991810737036, + "grad_norm": 0.6751657595795937, + "learning_rate": 3.589874955238449e-05, + "loss": 0.2748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15675446391105652, + "step": 1105, + "valid_targets_mean": 4197.4, + "valid_targets_min": 1874 + }, + { + "epoch": 2.0181983621474067, + "grad_norm": 0.6454281420919674, + "learning_rate": 3.5843579818644956e-05, + "loss": 0.2744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.139465793967247, + "step": 1110, + "valid_targets_mean": 3443.8, + "valid_targets_min": 683 + }, + { + "epoch": 2.02729754322111, + "grad_norm": 0.4294367360499809, + "learning_rate": 3.5788084483816587e-05, + "loss": 0.2697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08601902425289154, + "step": 1115, + "valid_targets_mean": 6044.8, + "valid_targets_min": 293 + }, + { + "epoch": 2.0363967242948133, + "grad_norm": 0.6474434008705867, + "learning_rate": 3.573226468838289e-05, + "loss": 0.2756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15890777111053467, + "step": 1120, + "valid_targets_mean": 4120.4, + "valid_targets_min": 2236 + }, + { + "epoch": 2.045495905368517, + "grad_norm": 0.6607678287468948, + "learning_rate": 3.567612157949536e-05, + "loss": 0.2764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13646885752677917, + "step": 1125, + "valid_targets_mean": 3556.9, + "valid_targets_min": 2256 + }, + { + "epoch": 2.05459508644222, + "grad_norm": 0.6001404112466459, + "learning_rate": 3.561965631094988e-05, + "loss": 0.2759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1463991105556488, + "step": 1130, + "valid_targets_mean": 4032.1, + "valid_targets_min": 1838 + }, + { + "epoch": 2.0636942675159236, + "grad_norm": 0.5395091924578818, + "learning_rate": 3.556287004316305e-05, + "loss": 0.2375, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12247171252965927, + "step": 1135, + "valid_targets_mean": 3741.5, + "valid_targets_min": 1198 + }, + { + "epoch": 2.072793448589627, + "grad_norm": 0.5912809755997814, + "learning_rate": 3.5505763943148324e-05, + "loss": 0.2636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14579446613788605, + "step": 1140, + "valid_targets_mean": 4449.2, + "valid_targets_min": 2512 + }, + { + "epoch": 2.08189262966333, + "grad_norm": 0.5639172171971991, + "learning_rate": 3.544833918449199e-05, + "loss": 0.2538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12566199898719788, + "step": 1145, + "valid_targets_mean": 3620.0, + "valid_targets_min": 726 + }, + { + "epoch": 2.0909918107370338, + "grad_norm": 0.5348382118034839, + "learning_rate": 3.5390596947329124e-05, + "loss": 0.2615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14052331447601318, + "step": 1150, + "valid_targets_mean": 4695.1, + "valid_targets_min": 3057 + }, + { + "epoch": 2.100090991810737, + "grad_norm": 0.5616636512428199, + "learning_rate": 3.5332538418319254e-05, + "loss": 0.2609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08257120102643967, + "step": 1155, + "valid_targets_mean": 2379.0, + "valid_targets_min": 382 + }, + { + "epoch": 2.1091901728844404, + "grad_norm": 0.5992218870310196, + "learning_rate": 3.527416479062205e-05, + "loss": 0.2721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1420498937368393, + "step": 1160, + "valid_targets_mean": 3988.2, + "valid_targets_min": 1040 + }, + { + "epoch": 2.1182893539581436, + "grad_norm": 0.6456667062309294, + "learning_rate": 3.521547726387275e-05, + "loss": 0.2635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12069796770811081, + "step": 1165, + "valid_targets_mean": 2920.1, + "valid_targets_min": 282 + }, + { + "epoch": 2.127388535031847, + "grad_norm": 0.716226667426586, + "learning_rate": 3.515647704415754e-05, + "loss": 0.2672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10610539466142654, + "step": 1170, + "valid_targets_mean": 2205.1, + "valid_targets_min": 330 + }, + { + "epoch": 2.1364877161055507, + "grad_norm": 0.5716159999832333, + "learning_rate": 3.509716534398873e-05, + "loss": 0.2813, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11310961842536926, + "step": 1175, + "valid_targets_mean": 3156.5, + "valid_targets_min": 1663 + }, + { + "epoch": 2.1455868971792538, + "grad_norm": 0.6345606982139681, + "learning_rate": 3.503754338227989e-05, + "loss": 0.2779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1598322093486786, + "step": 1180, + "valid_targets_mean": 4152.5, + "valid_targets_min": 373 + }, + { + "epoch": 2.1546860782529573, + "grad_norm": 0.6444097109585533, + "learning_rate": 3.497761238432073e-05, + "loss": 0.2708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1359652876853943, + "step": 1185, + "valid_targets_mean": 3126.6, + "valid_targets_min": 1108 + }, + { + "epoch": 2.1637852593266604, + "grad_norm": 0.5743362772516076, + "learning_rate": 3.4917373581752e-05, + "loss": 0.2539, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15441018342971802, + "step": 1190, + "valid_targets_mean": 4133.0, + "valid_targets_min": 1427 + }, + { + "epoch": 2.172884440400364, + "grad_norm": 0.5804857594862529, + "learning_rate": 3.4856828212540094e-05, + "loss": 0.2577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10116176307201385, + "step": 1195, + "valid_targets_mean": 3043.6, + "valid_targets_min": 302 + }, + { + "epoch": 2.1819836214740675, + "grad_norm": 0.5706299053526133, + "learning_rate": 3.4795977520951684e-05, + "loss": 0.2593, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11291206628084183, + "step": 1200, + "valid_targets_mean": 3438.2, + "valid_targets_min": 250 + }, + { + "epoch": 2.1910828025477707, + "grad_norm": 0.6307638349852748, + "learning_rate": 3.47348227575281e-05, + "loss": 0.2545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17008806765079498, + "step": 1205, + "valid_targets_mean": 4208.5, + "valid_targets_min": 1336 + }, + { + "epoch": 2.200181983621474, + "grad_norm": 0.6618907052519223, + "learning_rate": 3.467336517905966e-05, + "loss": 0.2596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13128957152366638, + "step": 1210, + "valid_targets_mean": 2842.1, + "valid_targets_min": 201 + }, + { + "epoch": 2.2092811646951773, + "grad_norm": 0.5658986082884031, + "learning_rate": 3.46116060485598e-05, + "loss": 0.26, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1362631469964981, + "step": 1215, + "valid_targets_mean": 4190.9, + "valid_targets_min": 324 + }, + { + "epoch": 2.218380345768881, + "grad_norm": 0.5348818447353793, + "learning_rate": 3.4549546635239167e-05, + "loss": 0.2741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11287876963615417, + "step": 1220, + "valid_targets_mean": 3727.4, + "valid_targets_min": 2818 + }, + { + "epoch": 2.227479526842584, + "grad_norm": 0.6385706591191282, + "learning_rate": 3.448718821447953e-05, + "loss": 0.2698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12544016540050507, + "step": 1225, + "valid_targets_mean": 3231.2, + "valid_targets_min": 979 + }, + { + "epoch": 2.2365787079162875, + "grad_norm": 0.7819910500944424, + "learning_rate": 3.442453206780751e-05, + "loss": 0.2781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16756212711334229, + "step": 1230, + "valid_targets_mean": 4297.6, + "valid_targets_min": 2240 + }, + { + "epoch": 2.245677888989991, + "grad_norm": 0.6080355702316727, + "learning_rate": 3.4361579482868325e-05, + "loss": 0.2686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13607972860336304, + "step": 1235, + "valid_targets_mean": 3773.0, + "valid_targets_min": 2000 + }, + { + "epoch": 2.254777070063694, + "grad_norm": 0.7152228635802768, + "learning_rate": 3.429833175339927e-05, + "loss": 0.2633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1547776311635971, + "step": 1240, + "valid_targets_mean": 3546.6, + "valid_targets_min": 573 + }, + { + "epoch": 2.2638762511373978, + "grad_norm": 0.4914702585050212, + "learning_rate": 3.423479017920317e-05, + "loss": 0.2612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1395949423313141, + "step": 1245, + "valid_targets_mean": 4476.6, + "valid_targets_min": 2453 + }, + { + "epoch": 2.272975432211101, + "grad_norm": 0.5690362474952183, + "learning_rate": 3.4170956066121616e-05, + "loss": 0.2645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1314096301794052, + "step": 1250, + "valid_targets_mean": 3731.1, + "valid_targets_min": 313 + }, + { + "epoch": 2.2820746132848044, + "grad_norm": 0.6288288462340934, + "learning_rate": 3.410683072600818e-05, + "loss": 0.2803, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14974285662174225, + "step": 1255, + "valid_targets_mean": 3642.4, + "valid_targets_min": 1186 + }, + { + "epoch": 2.2911737943585075, + "grad_norm": 0.6119487602662945, + "learning_rate": 3.4042415476701434e-05, + "loss": 0.2529, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14314958453178406, + "step": 1260, + "valid_targets_mean": 3669.4, + "valid_targets_min": 1599 + }, + { + "epoch": 2.300272975432211, + "grad_norm": 0.671669349060168, + "learning_rate": 3.397771164199787e-05, + "loss": 0.2434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11376748979091644, + "step": 1265, + "valid_targets_mean": 3127.4, + "valid_targets_min": 148 + }, + { + "epoch": 2.3093721565059147, + "grad_norm": 0.8943752547371007, + "learning_rate": 3.3912720551624684e-05, + "loss": 0.2519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12145446240901947, + "step": 1270, + "valid_targets_mean": 3842.5, + "valid_targets_min": 414 + }, + { + "epoch": 2.3184713375796178, + "grad_norm": 0.5656946255603227, + "learning_rate": 3.384744354121246e-05, + "loss": 0.2601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11813884228467941, + "step": 1275, + "valid_targets_mean": 3730.5, + "valid_targets_min": 2429 + }, + { + "epoch": 2.3275705186533213, + "grad_norm": 0.7149215078350728, + "learning_rate": 3.3781881952267715e-05, + "loss": 0.258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15160393714904785, + "step": 1280, + "valid_targets_mean": 2399.2, + "valid_targets_min": 270 + }, + { + "epoch": 2.3366696997270244, + "grad_norm": 0.5280911417580575, + "learning_rate": 3.3716037132145354e-05, + "loss": 0.2674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13931462168693542, + "step": 1285, + "valid_targets_mean": 5500.1, + "valid_targets_min": 304 + }, + { + "epoch": 2.345768880800728, + "grad_norm": 0.5450007462035853, + "learning_rate": 3.3649910434020934e-05, + "loss": 0.2734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14486107230186462, + "step": 1290, + "valid_targets_mean": 4728.0, + "valid_targets_min": 1757 + }, + { + "epoch": 2.3548680618744315, + "grad_norm": 0.5206771266132671, + "learning_rate": 3.35835032168629e-05, + "loss": 0.2704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11571803689002991, + "step": 1295, + "valid_targets_mean": 4034.5, + "valid_targets_min": 850 + }, + { + "epoch": 2.3639672429481347, + "grad_norm": 0.5980886546519958, + "learning_rate": 3.351681684540462e-05, + "loss": 0.2702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1225738599896431, + "step": 1300, + "valid_targets_mean": 3550.4, + "valid_targets_min": 352 + }, + { + "epoch": 2.373066424021838, + "grad_norm": 0.6270451107929509, + "learning_rate": 3.3449852690116375e-05, + "loss": 0.2557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10917264223098755, + "step": 1305, + "valid_targets_mean": 2968.6, + "valid_targets_min": 240 + }, + { + "epoch": 2.3821656050955413, + "grad_norm": 0.4647051724658464, + "learning_rate": 3.3382612127177166e-05, + "loss": 0.2489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1091746836900711, + "step": 1310, + "valid_targets_mean": 4024.8, + "valid_targets_min": 1696 + }, + { + "epoch": 2.391264786169245, + "grad_norm": 0.6011078170019583, + "learning_rate": 3.331509653844644e-05, + "loss": 0.2371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1194903701543808, + "step": 1315, + "valid_targets_mean": 3338.9, + "valid_targets_min": 164 + }, + { + "epoch": 2.400363967242948, + "grad_norm": 0.6174349839198626, + "learning_rate": 3.324730731143571e-05, + "loss": 0.2827, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1434527039527893, + "step": 1320, + "valid_targets_mean": 3696.2, + "valid_targets_min": 1616 + }, + { + "epoch": 2.4094631483166515, + "grad_norm": 0.7158219129507043, + "learning_rate": 3.317924583927999e-05, + "loss": 0.2506, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17214879393577576, + "step": 1325, + "valid_targets_mean": 3366.8, + "valid_targets_min": 326 + }, + { + "epoch": 2.4185623293903546, + "grad_norm": 0.6189432134650674, + "learning_rate": 3.311091352070924e-05, + "loss": 0.2644, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13491564989089966, + "step": 1330, + "valid_targets_mean": 3157.6, + "valid_targets_min": 1814 + }, + { + "epoch": 2.427661510464058, + "grad_norm": 0.659452736598218, + "learning_rate": 3.3042311760019554e-05, + "loss": 0.2733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13368107378482819, + "step": 1335, + "valid_targets_mean": 3190.8, + "valid_targets_min": 557 + }, + { + "epoch": 2.4367606915377618, + "grad_norm": 0.5820175017066158, + "learning_rate": 3.297344196704431e-05, + "loss": 0.253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14730173349380493, + "step": 1340, + "valid_targets_mean": 4446.1, + "valid_targets_min": 1305 + }, + { + "epoch": 2.445859872611465, + "grad_norm": 0.64079393332815, + "learning_rate": 3.2904305557125265e-05, + "loss": 0.2732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17309501767158508, + "step": 1345, + "valid_targets_mean": 4094.2, + "valid_targets_min": 2282 + }, + { + "epoch": 2.4549590536851684, + "grad_norm": 0.7018821745777597, + "learning_rate": 3.2834903951083363e-05, + "loss": 0.2771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16230127215385437, + "step": 1350, + "valid_targets_mean": 3237.2, + "valid_targets_min": 1578 + }, + { + "epoch": 2.4640582347588715, + "grad_norm": 0.5965681886862967, + "learning_rate": 3.27652385751896e-05, + "loss": 0.2546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1440294235944748, + "step": 1355, + "valid_targets_mean": 4534.4, + "valid_targets_min": 3064 + }, + { + "epoch": 2.473157415832575, + "grad_norm": 0.6376682960206415, + "learning_rate": 3.269531086113573e-05, + "loss": 0.2753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10924984514713287, + "step": 1360, + "valid_targets_mean": 3105.8, + "valid_targets_min": 262 + }, + { + "epoch": 2.4822565969062786, + "grad_norm": 0.5685224339416516, + "learning_rate": 3.262512224600478e-05, + "loss": 0.2563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11022941768169403, + "step": 1365, + "valid_targets_mean": 3321.5, + "valid_targets_min": 1122 + }, + { + "epoch": 2.4913557779799818, + "grad_norm": 0.4679822971053616, + "learning_rate": 3.2554674172241565e-05, + "loss": 0.2566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10667596012353897, + "step": 1370, + "valid_targets_mean": 6181.6, + "valid_targets_min": 1169 + }, + { + "epoch": 2.5004549590536853, + "grad_norm": 0.6124226942623071, + "learning_rate": 3.2483968087623026e-05, + "loss": 0.2681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14309075474739075, + "step": 1375, + "valid_targets_mean": 3384.4, + "valid_targets_min": 274 + }, + { + "epoch": 2.5095541401273884, + "grad_norm": 0.6247095676280948, + "learning_rate": 3.241300544522848e-05, + "loss": 0.2685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1395222246646881, + "step": 1380, + "valid_targets_mean": 3201.8, + "valid_targets_min": 2156 + }, + { + "epoch": 2.518653321201092, + "grad_norm": 0.5328578793777622, + "learning_rate": 3.234178770340975e-05, + "loss": 0.2557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08864390850067139, + "step": 1385, + "valid_targets_mean": 4039.1, + "valid_targets_min": 286 + }, + { + "epoch": 2.5277525022747955, + "grad_norm": 0.4773123951813783, + "learning_rate": 3.227031632576122e-05, + "loss": 0.252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13746377825737, + "step": 1390, + "valid_targets_mean": 6369.5, + "valid_targets_min": 2005 + }, + { + "epoch": 2.5368516833484986, + "grad_norm": 0.6004745142765648, + "learning_rate": 3.219859278108972e-05, + "loss": 0.2679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12306511402130127, + "step": 1395, + "valid_targets_mean": 3561.5, + "valid_targets_min": 238 + }, + { + "epoch": 2.5459508644222018, + "grad_norm": 0.6878586191935012, + "learning_rate": 3.212661854338438e-05, + "loss": 0.2602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16240370273590088, + "step": 1400, + "valid_targets_mean": 3584.8, + "valid_targets_min": 1052 + }, + { + "epoch": 2.5550500454959053, + "grad_norm": 0.5824202233287424, + "learning_rate": 3.20543950917863e-05, + "loss": 0.2626, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13346585631370544, + "step": 1405, + "valid_targets_mean": 2869.1, + "valid_targets_min": 2043 + }, + { + "epoch": 2.564149226569609, + "grad_norm": 0.680238515947865, + "learning_rate": 3.1981923910558164e-05, + "loss": 0.2769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15468516945838928, + "step": 1410, + "valid_targets_mean": 3367.0, + "valid_targets_min": 1629 + }, + { + "epoch": 2.573248407643312, + "grad_norm": 0.5202125549195913, + "learning_rate": 3.190920648905376e-05, + "loss": 0.2531, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10637877881526947, + "step": 1415, + "valid_targets_mean": 3411.2, + "valid_targets_min": 297 + }, + { + "epoch": 2.5823475887170155, + "grad_norm": 0.5817714257013601, + "learning_rate": 3.183624432168736e-05, + "loss": 0.253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11493627727031708, + "step": 1420, + "valid_targets_mean": 3214.5, + "valid_targets_min": 1573 + }, + { + "epoch": 2.5914467697907186, + "grad_norm": 0.5458489312850151, + "learning_rate": 3.1763038907902976e-05, + "loss": 0.2737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10880685597658157, + "step": 1425, + "valid_targets_mean": 3591.6, + "valid_targets_min": 633 + }, + { + "epoch": 2.600545950864422, + "grad_norm": 0.5542156910126472, + "learning_rate": 3.16895917521436e-05, + "loss": 0.2465, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13165003061294556, + "step": 1430, + "valid_targets_mean": 4754.2, + "valid_targets_min": 3017 + }, + { + "epoch": 2.6096451319381258, + "grad_norm": 0.6533595108199927, + "learning_rate": 3.161590436382023e-05, + "loss": 0.2768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1214178055524826, + "step": 1435, + "valid_targets_mean": 2685.4, + "valid_targets_min": 355 + }, + { + "epoch": 2.618744313011829, + "grad_norm": 0.5111980360739798, + "learning_rate": 3.1541978257280915e-05, + "loss": 0.2575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12494254857301712, + "step": 1440, + "valid_targets_mean": 5197.2, + "valid_targets_min": 2480 + }, + { + "epoch": 2.6278434940855324, + "grad_norm": 0.5763523371672339, + "learning_rate": 3.1467814951779564e-05, + "loss": 0.2686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09673012048006058, + "step": 1445, + "valid_targets_mean": 2934.5, + "valid_targets_min": 640 + }, + { + "epoch": 2.6369426751592355, + "grad_norm": 0.7378840239597381, + "learning_rate": 3.139341597144478e-05, + "loss": 0.2567, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11686737090349197, + "step": 1450, + "valid_targets_mean": 3688.1, + "valid_targets_min": 414 + }, + { + "epoch": 2.646041856232939, + "grad_norm": 0.5926896588467113, + "learning_rate": 3.13187828452485e-05, + "loss": 0.2758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13565243780612946, + "step": 1455, + "valid_targets_mean": 3212.2, + "valid_targets_min": 303 + }, + { + "epoch": 2.6551410373066426, + "grad_norm": 0.5485940410600364, + "learning_rate": 3.1243917106974583e-05, + "loss": 0.2685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13510656356811523, + "step": 1460, + "valid_targets_mean": 3864.6, + "valid_targets_min": 2506 + }, + { + "epoch": 2.6642402183803457, + "grad_norm": 0.6320710190885632, + "learning_rate": 3.116882029518732e-05, + "loss": 0.2673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15843375027179718, + "step": 1465, + "valid_targets_mean": 3470.5, + "valid_targets_min": 1637 + }, + { + "epoch": 2.673339399454049, + "grad_norm": 0.6126966480462319, + "learning_rate": 3.109349395319976e-05, + "loss": 0.2791, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15615004301071167, + "step": 1470, + "valid_targets_mean": 3915.4, + "valid_targets_min": 1532 + }, + { + "epoch": 2.6824385805277524, + "grad_norm": 0.5686770606117241, + "learning_rate": 3.101793962904205e-05, + "loss": 0.2586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1370496153831482, + "step": 1475, + "valid_targets_mean": 4079.1, + "valid_targets_min": 919 + }, + { + "epoch": 2.691537761601456, + "grad_norm": 0.5856441860541246, + "learning_rate": 3.094215887542957e-05, + "loss": 0.2669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.110836461186409, + "step": 1480, + "valid_targets_mean": 2751.5, + "valid_targets_min": 1269 + }, + { + "epoch": 2.700636942675159, + "grad_norm": 0.6233578615196819, + "learning_rate": 3.086615324973107e-05, + "loss": 0.2708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15025722980499268, + "step": 1485, + "valid_targets_mean": 3894.9, + "valid_targets_min": 312 + }, + { + "epoch": 2.7097361237488626, + "grad_norm": 0.536781802707285, + "learning_rate": 3.07899243139366e-05, + "loss": 0.2633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09888775646686554, + "step": 1490, + "valid_targets_mean": 3418.4, + "valid_targets_min": 844 + }, + { + "epoch": 2.7188353048225657, + "grad_norm": 0.6866854486013821, + "learning_rate": 3.0713473634625507e-05, + "loss": 0.2602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12067543715238571, + "step": 1495, + "valid_targets_mean": 3014.2, + "valid_targets_min": 643 + }, + { + "epoch": 2.7279344858962693, + "grad_norm": 0.6751006196997635, + "learning_rate": 3.0636802782934146e-05, + "loss": 0.2542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1544184535741806, + "step": 1500, + "valid_targets_mean": 2988.5, + "valid_targets_min": 1152 + }, + { + "epoch": 2.737033666969973, + "grad_norm": 0.442079678607608, + "learning_rate": 3.055991333452364e-05, + "loss": 0.2629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1539030522108078, + "step": 1505, + "valid_targets_mean": 6319.1, + "valid_targets_min": 1834 + }, + { + "epoch": 2.746132848043676, + "grad_norm": 0.5873539144786856, + "learning_rate": 3.0482806869547495e-05, + "loss": 0.2536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1154666393995285, + "step": 1510, + "valid_targets_mean": 3250.0, + "valid_targets_min": 991 + }, + { + "epoch": 2.7552320291173795, + "grad_norm": 0.5752182793640411, + "learning_rate": 3.0405484972619116e-05, + "loss": 0.2677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14140355587005615, + "step": 1515, + "valid_targets_mean": 4366.2, + "valid_targets_min": 1602 + }, + { + "epoch": 2.7643312101910826, + "grad_norm": 0.726912967043729, + "learning_rate": 3.0327949232779242e-05, + "loss": 0.2677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13592445850372314, + "step": 1520, + "valid_targets_mean": 3295.1, + "valid_targets_min": 2203 + }, + { + "epoch": 2.773430391264786, + "grad_norm": 0.6215196361746996, + "learning_rate": 3.0250201243463297e-05, + "loss": 0.2581, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16549277305603027, + "step": 1525, + "valid_targets_mean": 3900.1, + "valid_targets_min": 1860 + }, + { + "epoch": 2.7825295723384897, + "grad_norm": 0.5415517684578123, + "learning_rate": 3.0172242602468637e-05, + "loss": 0.2579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12296943366527557, + "step": 1530, + "valid_targets_mean": 4461.1, + "valid_targets_min": 820 + }, + { + "epoch": 2.791628753412193, + "grad_norm": 0.4749079544937464, + "learning_rate": 3.009407491192172e-05, + "loss": 0.2651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09805374592542648, + "step": 1535, + "valid_targets_mean": 4534.9, + "valid_targets_min": 1317 + }, + { + "epoch": 2.8007279344858964, + "grad_norm": 0.5380978879739551, + "learning_rate": 3.0015699778245177e-05, + "loss": 0.2521, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11909287422895432, + "step": 1540, + "valid_targets_mean": 3597.5, + "valid_targets_min": 311 + }, + { + "epoch": 2.8098271155595995, + "grad_norm": 0.6019469943728163, + "learning_rate": 2.9937118812124796e-05, + "loss": 0.2713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13189806044101715, + "step": 1545, + "valid_targets_mean": 3001.1, + "valid_targets_min": 2021 + }, + { + "epoch": 2.818926296633303, + "grad_norm": 0.6243801288042384, + "learning_rate": 2.9858333628476423e-05, + "loss": 0.2442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16052299737930298, + "step": 1550, + "valid_targets_mean": 4131.5, + "valid_targets_min": 2309 + }, + { + "epoch": 2.8280254777070066, + "grad_norm": 0.7862122605057308, + "learning_rate": 2.977934584641278e-05, + "loss": 0.27, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12268385291099548, + "step": 1555, + "valid_targets_mean": 1729.8, + "valid_targets_min": 275 + }, + { + "epoch": 2.8371246587807097, + "grad_norm": 0.5889590266099785, + "learning_rate": 2.9700157089210174e-05, + "loss": 0.2648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.133815199136734, + "step": 1560, + "valid_targets_mean": 4164.0, + "valid_targets_min": 169 + }, + { + "epoch": 2.846223839854413, + "grad_norm": 0.5718002161440873, + "learning_rate": 2.9620768984275163e-05, + "loss": 0.2607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13359403610229492, + "step": 1565, + "valid_targets_mean": 3803.5, + "valid_targets_min": 2257 + }, + { + "epoch": 2.8553230209281164, + "grad_norm": 0.5008652494906181, + "learning_rate": 2.9541183163111076e-05, + "loss": 0.2631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10507234930992126, + "step": 1570, + "valid_targets_mean": 3964.6, + "valid_targets_min": 2507 + }, + { + "epoch": 2.86442220200182, + "grad_norm": 0.5010428450320672, + "learning_rate": 2.9461401261284536e-05, + "loss": 0.2534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1268589198589325, + "step": 1575, + "valid_targets_mean": 5799.9, + "valid_targets_min": 2969 + }, + { + "epoch": 2.873521383075523, + "grad_norm": 0.58820416909119, + "learning_rate": 2.9381424918391775e-05, + "loss": 0.2388, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10214714705944061, + "step": 1580, + "valid_targets_mean": 2422.4, + "valid_targets_min": 261 + }, + { + "epoch": 2.8826205641492266, + "grad_norm": 0.5880343496175168, + "learning_rate": 2.9301255778025014e-05, + "loss": 0.2704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1449739933013916, + "step": 1585, + "valid_targets_mean": 4025.0, + "valid_targets_min": 2526 + }, + { + "epoch": 2.8917197452229297, + "grad_norm": 0.6036710857573726, + "learning_rate": 2.9220895487738627e-05, + "loss": 0.2617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13294830918312073, + "step": 1590, + "valid_targets_mean": 2878.0, + "valid_targets_min": 1298 + }, + { + "epoch": 2.9008189262966333, + "grad_norm": 0.6020483772824585, + "learning_rate": 2.9140345699015328e-05, + "loss": 0.2699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12777581810951233, + "step": 1595, + "valid_targets_mean": 3732.0, + "valid_targets_min": 1366 + }, + { + "epoch": 2.909918107370337, + "grad_norm": 0.6714113794998876, + "learning_rate": 2.905960806723219e-05, + "loss": 0.286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13986052572727203, + "step": 1600, + "valid_targets_mean": 3049.1, + "valid_targets_min": 1681 + }, + { + "epoch": 2.91901728844404, + "grad_norm": 0.4996309143387443, + "learning_rate": 2.8978684251626652e-05, + "loss": 0.2661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1562909185886383, + "step": 1605, + "valid_targets_mean": 6508.9, + "valid_targets_min": 3385 + }, + { + "epoch": 2.9281164695177435, + "grad_norm": 0.5506771309751523, + "learning_rate": 2.8897575915262418e-05, + "loss": 0.2496, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13283151388168335, + "step": 1610, + "valid_targets_mean": 4325.6, + "valid_targets_min": 1944 + }, + { + "epoch": 2.9372156505914466, + "grad_norm": 0.566063666317329, + "learning_rate": 2.8816284724995273e-05, + "loss": 0.2614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09932658076286316, + "step": 1615, + "valid_targets_mean": 2903.8, + "valid_targets_min": 493 + }, + { + "epoch": 2.94631483166515, + "grad_norm": 0.5468189644584138, + "learning_rate": 2.8734812351438823e-05, + "loss": 0.2622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11008670181035995, + "step": 1620, + "valid_targets_mean": 3202.6, + "valid_targets_min": 938 + }, + { + "epoch": 2.9554140127388537, + "grad_norm": 0.5886448368356968, + "learning_rate": 2.8653160468930168e-05, + "loss": 0.2618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1270906925201416, + "step": 1625, + "valid_targets_mean": 3301.6, + "valid_targets_min": 248 + }, + { + "epoch": 2.964513193812557, + "grad_norm": 0.6003432400545452, + "learning_rate": 2.85713307554955e-05, + "loss": 0.2674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15293045341968536, + "step": 1630, + "valid_targets_mean": 3731.0, + "valid_targets_min": 1299 + }, + { + "epoch": 2.9736123748862604, + "grad_norm": 0.5216458066693334, + "learning_rate": 2.8489324892815604e-05, + "loss": 0.2524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10243799537420273, + "step": 1635, + "valid_targets_mean": 4290.9, + "valid_targets_min": 2556 + }, + { + "epoch": 2.9827115559599635, + "grad_norm": 0.647682228722322, + "learning_rate": 2.8407144566191315e-05, + "loss": 0.2641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11987321078777313, + "step": 1640, + "valid_targets_mean": 2687.5, + "valid_targets_min": 331 + }, + { + "epoch": 2.991810737033667, + "grad_norm": 0.5956168466588846, + "learning_rate": 2.8324791464508856e-05, + "loss": 0.2804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12937332689762115, + "step": 1645, + "valid_targets_mean": 3305.0, + "valid_targets_min": 1403 + }, + { + "epoch": 3.0, + "grad_norm": 0.9029027098902189, + "learning_rate": 2.824226728020516e-05, + "loss": 0.2554, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30957484245300293, + "step": 1650, + "valid_targets_mean": 3302.6, + "valid_targets_min": 1365 + }, + { + "epoch": 3.0090991810737036, + "grad_norm": 0.6627737045647052, + "learning_rate": 2.8159573709233074e-05, + "loss": 0.2479, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1534401923418045, + "step": 1655, + "valid_targets_mean": 3602.0, + "valid_targets_min": 685 + }, + { + "epoch": 3.0181983621474067, + "grad_norm": 0.6291036345714702, + "learning_rate": 2.80767124510265e-05, + "loss": 0.2109, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12472149729728699, + "step": 1660, + "valid_targets_mean": 3638.4, + "valid_targets_min": 1366 + }, + { + "epoch": 3.02729754322111, + "grad_norm": 0.5541449376554247, + "learning_rate": 2.7993685208465483e-05, + "loss": 0.2474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1034252867102623, + "step": 1665, + "valid_targets_mean": 4169.1, + "valid_targets_min": 1226 + }, + { + "epoch": 3.0363967242948133, + "grad_norm": 0.6778018211647409, + "learning_rate": 2.7910493687841213e-05, + "loss": 0.2465, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13861678540706635, + "step": 1670, + "valid_targets_mean": 4292.2, + "valid_targets_min": 1284 + }, + { + "epoch": 3.045495905368517, + "grad_norm": 0.6258555524981517, + "learning_rate": 2.7827139598820947e-05, + "loss": 0.2359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10056846588850021, + "step": 1675, + "valid_targets_mean": 2452.4, + "valid_targets_min": 1017 + }, + { + "epoch": 3.05459508644222, + "grad_norm": 0.5339850920139807, + "learning_rate": 2.774362465441288e-05, + "loss": 0.22, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10936719179153442, + "step": 1680, + "valid_targets_mean": 4660.5, + "valid_targets_min": 313 + }, + { + "epoch": 3.0636942675159236, + "grad_norm": 0.6071416816239227, + "learning_rate": 2.7659950570930956e-05, + "loss": 0.2209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10698353499174118, + "step": 1685, + "valid_targets_mean": 2520.0, + "valid_targets_min": 328 + }, + { + "epoch": 3.072793448589627, + "grad_norm": 0.5865148303237524, + "learning_rate": 2.7576119067959565e-05, + "loss": 0.2398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09415340423583984, + "step": 1690, + "valid_targets_mean": 3474.0, + "valid_targets_min": 1947 + }, + { + "epoch": 3.08189262966333, + "grad_norm": 0.6516039766541762, + "learning_rate": 2.7492131868318247e-05, + "loss": 0.2305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10586540400981903, + "step": 1695, + "valid_targets_mean": 2984.5, + "valid_targets_min": 1568 + }, + { + "epoch": 3.0909918107370338, + "grad_norm": 0.5753829853634493, + "learning_rate": 2.7407990698026227e-05, + "loss": 0.2255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13014456629753113, + "step": 1700, + "valid_targets_mean": 4518.5, + "valid_targets_min": 298 + }, + { + "epoch": 3.100090991810737, + "grad_norm": 0.712305406096257, + "learning_rate": 2.7323697286266998e-05, + "loss": 0.2451, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15478244423866272, + "step": 1705, + "valid_targets_mean": 3958.9, + "valid_targets_min": 1431 + }, + { + "epoch": 3.1091901728844404, + "grad_norm": 0.5603421184349171, + "learning_rate": 2.7239253365352774e-05, + "loss": 0.2337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1260107159614563, + "step": 1710, + "valid_targets_mean": 4813.8, + "valid_targets_min": 2219 + }, + { + "epoch": 3.1182893539581436, + "grad_norm": 0.6945149678969376, + "learning_rate": 2.7154660670688867e-05, + "loss": 0.2387, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1382228136062622, + "step": 1715, + "valid_targets_mean": 2869.9, + "valid_targets_min": 1752 + }, + { + "epoch": 3.127388535031847, + "grad_norm": 0.6608358333888942, + "learning_rate": 2.706992094073803e-05, + "loss": 0.2232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1332719475030899, + "step": 1720, + "valid_targets_mean": 3072.0, + "valid_targets_min": 2326 + }, + { + "epoch": 3.1364877161055507, + "grad_norm": 0.5884814788647655, + "learning_rate": 2.6985035916984746e-05, + "loss": 0.2488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1449941098690033, + "step": 1725, + "valid_targets_mean": 5181.2, + "valid_targets_min": 3160 + }, + { + "epoch": 3.1455868971792538, + "grad_norm": 0.5685308536656252, + "learning_rate": 2.6900007343899414e-05, + "loss": 0.2331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11001268029212952, + "step": 1730, + "valid_targets_mean": 4469.2, + "valid_targets_min": 2386 + }, + { + "epoch": 3.1546860782529573, + "grad_norm": 0.692558286911919, + "learning_rate": 2.6814836968902535e-05, + "loss": 0.2378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14557890594005585, + "step": 1735, + "valid_targets_mean": 3579.2, + "valid_targets_min": 1998 + }, + { + "epoch": 3.1637852593266604, + "grad_norm": 0.589001169535335, + "learning_rate": 2.6729526542328755e-05, + "loss": 0.2372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11505816876888275, + "step": 1740, + "valid_targets_mean": 4077.8, + "valid_targets_min": 1171 + }, + { + "epoch": 3.172884440400364, + "grad_norm": 0.5372069609633437, + "learning_rate": 2.6644077817390933e-05, + "loss": 0.2351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11513690650463104, + "step": 1745, + "valid_targets_mean": 4349.6, + "valid_targets_min": 2454 + }, + { + "epoch": 3.1819836214740675, + "grad_norm": 0.6140011398548391, + "learning_rate": 2.6558492550144092e-05, + "loss": 0.2388, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13439062237739563, + "step": 1750, + "valid_targets_mean": 4064.9, + "valid_targets_min": 2142 + }, + { + "epoch": 3.1910828025477707, + "grad_norm": 0.5956060584442296, + "learning_rate": 2.6472772499449323e-05, + "loss": 0.2321, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08770699054002762, + "step": 1755, + "valid_targets_mean": 2663.2, + "valid_targets_min": 1140 + }, + { + "epoch": 3.200181983621474, + "grad_norm": 0.6430662067017598, + "learning_rate": 2.6386919426937655e-05, + "loss": 0.2365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1511210948228836, + "step": 1760, + "valid_targets_mean": 3950.0, + "valid_targets_min": 545 + }, + { + "epoch": 3.2092811646951773, + "grad_norm": 0.5351581599278695, + "learning_rate": 2.6300935096973858e-05, + "loss": 0.2525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11395445466041565, + "step": 1765, + "valid_targets_mean": 4045.6, + "valid_targets_min": 2197 + }, + { + "epoch": 3.218380345768881, + "grad_norm": 0.6904452232059044, + "learning_rate": 2.6214821276620157e-05, + "loss": 0.231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12120945751667023, + "step": 1770, + "valid_targets_mean": 3098.6, + "valid_targets_min": 250 + }, + { + "epoch": 3.227479526842584, + "grad_norm": 0.5777775090819005, + "learning_rate": 2.6128579735599924e-05, + "loss": 0.2216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08591067045927048, + "step": 1775, + "valid_targets_mean": 1854.9, + "valid_targets_min": 276 + }, + { + "epoch": 3.2365787079162875, + "grad_norm": 0.5250787648078289, + "learning_rate": 2.6042212246261337e-05, + "loss": 0.2391, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10418880730867386, + "step": 1780, + "valid_targets_mean": 3393.4, + "valid_targets_min": 1993 + }, + { + "epoch": 3.245677888989991, + "grad_norm": 0.6748695446147454, + "learning_rate": 2.595572058354092e-05, + "loss": 0.2215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11711028218269348, + "step": 1785, + "valid_targets_mean": 3478.6, + "valid_targets_min": 1383 + }, + { + "epoch": 3.254777070063694, + "grad_norm": 0.680006289788221, + "learning_rate": 2.5869106524927096e-05, + "loss": 0.2415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13660980761051178, + "step": 1790, + "valid_targets_mean": 3546.8, + "valid_targets_min": 260 + }, + { + "epoch": 3.2638762511373978, + "grad_norm": 0.6203082184778486, + "learning_rate": 2.5782371850423627e-05, + "loss": 0.2297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.133883997797966, + "step": 1795, + "valid_targets_mean": 4430.2, + "valid_targets_min": 3003 + }, + { + "epoch": 3.272975432211101, + "grad_norm": 0.592304583983555, + "learning_rate": 2.5695518342513047e-05, + "loss": 0.2287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11032648384571075, + "step": 1800, + "valid_targets_mean": 3791.2, + "valid_targets_min": 2560 + }, + { + "epoch": 3.2820746132848044, + "grad_norm": 0.6615154368059429, + "learning_rate": 2.5608547786120056e-05, + "loss": 0.235, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10840634256601334, + "step": 1805, + "valid_targets_mean": 3448.1, + "valid_targets_min": 965 + }, + { + "epoch": 3.2911737943585075, + "grad_norm": 0.6061214028536877, + "learning_rate": 2.55214619685748e-05, + "loss": 0.2385, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11647415161132812, + "step": 1810, + "valid_targets_mean": 3058.6, + "valid_targets_min": 251 + }, + { + "epoch": 3.300272975432211, + "grad_norm": 0.612922757802074, + "learning_rate": 2.5434262679576157e-05, + "loss": 0.226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12078569084405899, + "step": 1815, + "valid_targets_mean": 4058.1, + "valid_targets_min": 275 + }, + { + "epoch": 3.3093721565059147, + "grad_norm": 0.7106121648750277, + "learning_rate": 2.5346951711154946e-05, + "loss": 0.2506, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10854356735944748, + "step": 1820, + "valid_targets_mean": 2593.4, + "valid_targets_min": 324 + }, + { + "epoch": 3.3184713375796178, + "grad_norm": 0.6913113203702447, + "learning_rate": 2.5259530857637125e-05, + "loss": 0.2351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13775470852851868, + "step": 1825, + "valid_targets_mean": 3313.2, + "valid_targets_min": 2581 + }, + { + "epoch": 3.3275705186533213, + "grad_norm": 0.682855819822667, + "learning_rate": 2.5172001915606883e-05, + "loss": 0.2305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14159566164016724, + "step": 1830, + "valid_targets_mean": 3209.6, + "valid_targets_min": 496 + }, + { + "epoch": 3.3366696997270244, + "grad_norm": 0.5972089007845723, + "learning_rate": 2.5084366683869746e-05, + "loss": 0.2321, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09136951714754105, + "step": 1835, + "valid_targets_mean": 2752.5, + "valid_targets_min": 1878 + }, + { + "epoch": 3.345768880800728, + "grad_norm": 0.6302061631973847, + "learning_rate": 2.4996626963415577e-05, + "loss": 0.2267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09223796427249908, + "step": 1840, + "valid_targets_mean": 2731.0, + "valid_targets_min": 289 + }, + { + "epoch": 3.3548680618744315, + "grad_norm": 0.635200704727907, + "learning_rate": 2.4908784557381616e-05, + "loss": 0.2341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12977059185504913, + "step": 1845, + "valid_targets_mean": 3390.6, + "valid_targets_min": 1780 + }, + { + "epoch": 3.3639672429481347, + "grad_norm": 0.616785501369989, + "learning_rate": 2.4820841271015364e-05, + "loss": 0.2301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12457115948200226, + "step": 1850, + "valid_targets_mean": 4544.5, + "valid_targets_min": 2092 + }, + { + "epoch": 3.373066424021838, + "grad_norm": 0.5691230207719173, + "learning_rate": 2.4732798911637525e-05, + "loss": 0.2271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10241171717643738, + "step": 1855, + "valid_targets_mean": 3519.0, + "valid_targets_min": 274 + }, + { + "epoch": 3.3821656050955413, + "grad_norm": 0.6705750722839043, + "learning_rate": 2.4644659288604853e-05, + "loss": 0.2433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08712461590766907, + "step": 1860, + "valid_targets_mean": 1878.9, + "valid_targets_min": 253 + }, + { + "epoch": 3.391264786169245, + "grad_norm": 0.6859178535407267, + "learning_rate": 2.4556424213272955e-05, + "loss": 0.2149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10289760679006577, + "step": 1865, + "valid_targets_mean": 2637.1, + "valid_targets_min": 411 + }, + { + "epoch": 3.400363967242948, + "grad_norm": 0.6468930323424454, + "learning_rate": 2.4468095498959086e-05, + "loss": 0.2248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12468135356903076, + "step": 1870, + "valid_targets_mean": 3236.8, + "valid_targets_min": 341 + }, + { + "epoch": 3.4094631483166515, + "grad_norm": 0.6573233362355302, + "learning_rate": 2.4379674960904867e-05, + "loss": 0.2412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1340944617986679, + "step": 1875, + "valid_targets_mean": 3330.8, + "valid_targets_min": 785 + }, + { + "epoch": 3.4185623293903546, + "grad_norm": 0.6857110616562382, + "learning_rate": 2.4291164416238994e-05, + "loss": 0.2342, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13505122065544128, + "step": 1880, + "valid_targets_mean": 3543.1, + "valid_targets_min": 1663 + }, + { + "epoch": 3.427661510464058, + "grad_norm": 0.5745741235715318, + "learning_rate": 2.4202565683939872e-05, + "loss": 0.2423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1247991994023323, + "step": 1885, + "valid_targets_mean": 4356.1, + "valid_targets_min": 1386 + }, + { + "epoch": 3.4367606915377618, + "grad_norm": 0.626758290724901, + "learning_rate": 2.411388058479827e-05, + "loss": 0.2253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12153643369674683, + "step": 1890, + "valid_targets_mean": 3427.8, + "valid_targets_min": 1926 + }, + { + "epoch": 3.445859872611465, + "grad_norm": 0.6666850067429048, + "learning_rate": 2.402511094137987e-05, + "loss": 0.2139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14569628238677979, + "step": 1895, + "valid_targets_mean": 4079.9, + "valid_targets_min": 2271 + }, + { + "epoch": 3.4549590536851684, + "grad_norm": 0.6201028472502161, + "learning_rate": 2.3936258577987807e-05, + "loss": 0.2317, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13898374140262604, + "step": 1900, + "valid_targets_mean": 3923.2, + "valid_targets_min": 1147 + }, + { + "epoch": 3.4640582347588715, + "grad_norm": 0.5393992734649197, + "learning_rate": 2.3847325320625223e-05, + "loss": 0.2384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11783619225025177, + "step": 1905, + "valid_targets_mean": 4982.4, + "valid_targets_min": 2594 + }, + { + "epoch": 3.473157415832575, + "grad_norm": 0.6058665738384407, + "learning_rate": 2.3758312996957676e-05, + "loss": 0.231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11445163190364838, + "step": 1910, + "valid_targets_mean": 3420.1, + "valid_targets_min": 1665 + }, + { + "epoch": 3.4822565969062786, + "grad_norm": 0.569106347802839, + "learning_rate": 2.366922343627565e-05, + "loss": 0.2201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1065153181552887, + "step": 1915, + "valid_targets_mean": 3185.2, + "valid_targets_min": 1122 + }, + { + "epoch": 3.4913557779799818, + "grad_norm": 0.6711360787435348, + "learning_rate": 2.358005846945689e-05, + "loss": 0.2336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13603505492210388, + "step": 1920, + "valid_targets_mean": 3550.5, + "valid_targets_min": 1731 + }, + { + "epoch": 3.5004549590536853, + "grad_norm": 0.6029423817813914, + "learning_rate": 2.349081992892885e-05, + "loss": 0.2365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1385492980480194, + "step": 1925, + "valid_targets_mean": 4586.5, + "valid_targets_min": 2817 + }, + { + "epoch": 3.5095541401273884, + "grad_norm": 0.5721952870442969, + "learning_rate": 2.3401509648630954e-05, + "loss": 0.2385, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10515997558832169, + "step": 1930, + "valid_targets_mean": 4997.0, + "valid_targets_min": 3277 + }, + { + "epoch": 3.518653321201092, + "grad_norm": 0.6669379189824646, + "learning_rate": 2.331212946397698e-05, + "loss": 0.2434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12849804759025574, + "step": 1935, + "valid_targets_mean": 3554.0, + "valid_targets_min": 1566 + }, + { + "epoch": 3.5277525022747955, + "grad_norm": 0.5582718810153309, + "learning_rate": 2.3222681211817287e-05, + "loss": 0.2386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10624731332063675, + "step": 1940, + "valid_targets_mean": 4358.5, + "valid_targets_min": 2948 + }, + { + "epoch": 3.5368516833484986, + "grad_norm": 0.6109402511946427, + "learning_rate": 2.31331667304011e-05, + "loss": 0.2433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11600244790315628, + "step": 1945, + "valid_targets_mean": 3243.4, + "valid_targets_min": 1742 + }, + { + "epoch": 3.5459508644222018, + "grad_norm": 0.6223125409986712, + "learning_rate": 2.3043587859338735e-05, + "loss": 0.2238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11614356935024261, + "step": 1950, + "valid_targets_mean": 3193.0, + "valid_targets_min": 1706 + }, + { + "epoch": 3.5550500454959053, + "grad_norm": 0.7483186029922464, + "learning_rate": 2.2953946439563736e-05, + "loss": 0.2392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1298794150352478, + "step": 1955, + "valid_targets_mean": 2468.0, + "valid_targets_min": 931 + }, + { + "epoch": 3.564149226569609, + "grad_norm": 0.5256637837739685, + "learning_rate": 2.286424431329513e-05, + "loss": 0.2445, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08543480932712555, + "step": 1960, + "valid_targets_mean": 3995.2, + "valid_targets_min": 635 + }, + { + "epoch": 3.573248407643312, + "grad_norm": 0.6920745965137178, + "learning_rate": 2.277448332399949e-05, + "loss": 0.2228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14779648184776306, + "step": 1965, + "valid_targets_mean": 3713.6, + "valid_targets_min": 362 + }, + { + "epoch": 3.5823475887170155, + "grad_norm": 0.6731284896447256, + "learning_rate": 2.2684665316353112e-05, + "loss": 0.2414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.136958509683609, + "step": 1970, + "valid_targets_mean": 3991.2, + "valid_targets_min": 2124 + }, + { + "epoch": 3.5914467697907186, + "grad_norm": 0.5687167935216879, + "learning_rate": 2.2594792136204037e-05, + "loss": 0.2388, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1017768532037735, + "step": 1975, + "valid_targets_mean": 3863.9, + "valid_targets_min": 2661 + }, + { + "epoch": 3.600545950864422, + "grad_norm": 0.5722468143481457, + "learning_rate": 2.250486563053419e-05, + "loss": 0.2411, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13285937905311584, + "step": 1980, + "valid_targets_mean": 4369.8, + "valid_targets_min": 336 + }, + { + "epoch": 3.6096451319381258, + "grad_norm": 0.6478782120428274, + "learning_rate": 2.241488764742135e-05, + "loss": 0.242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11048940569162369, + "step": 1985, + "valid_targets_mean": 3022.1, + "valid_targets_min": 204 + }, + { + "epoch": 3.618744313011829, + "grad_norm": 0.7168992027139708, + "learning_rate": 2.232486003600126e-05, + "loss": 0.2209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1075039803981781, + "step": 1990, + "valid_targets_mean": 3019.2, + "valid_targets_min": 1241 + }, + { + "epoch": 3.6278434940855324, + "grad_norm": 0.6570252469182816, + "learning_rate": 2.223478464642952e-05, + "loss": 0.2267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11854603886604309, + "step": 1995, + "valid_targets_mean": 3483.4, + "valid_targets_min": 1784 + }, + { + "epoch": 3.6369426751592355, + "grad_norm": 0.6144475955980022, + "learning_rate": 2.2144663329843653e-05, + "loss": 0.2339, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13097110390663147, + "step": 2000, + "valid_targets_mean": 3481.0, + "valid_targets_min": 905 + }, + { + "epoch": 3.646041856232939, + "grad_norm": 0.6052858271327498, + "learning_rate": 2.205449793832502e-05, + "loss": 0.2284, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10313941538333893, + "step": 2005, + "valid_targets_mean": 2982.5, + "valid_targets_min": 1198 + }, + { + "epoch": 3.6551410373066426, + "grad_norm": 0.44991266551208103, + "learning_rate": 2.1964290324860746e-05, + "loss": 0.216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0842527225613594, + "step": 2010, + "valid_targets_mean": 3413.9, + "valid_targets_min": 1441 + }, + { + "epoch": 3.6642402183803457, + "grad_norm": 0.6071858600053288, + "learning_rate": 2.1874042343305685e-05, + "loss": 0.2497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13529914617538452, + "step": 2015, + "valid_targets_mean": 4147.6, + "valid_targets_min": 1343 + }, + { + "epoch": 3.673339399454049, + "grad_norm": 0.47499136647666873, + "learning_rate": 2.1783755848344276e-05, + "loss": 0.222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09141851961612701, + "step": 2020, + "valid_targets_mean": 5427.8, + "valid_targets_min": 1521 + }, + { + "epoch": 3.6824385805277524, + "grad_norm": 0.6369533030675754, + "learning_rate": 2.1693432695452467e-05, + "loss": 0.2503, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0990004688501358, + "step": 2025, + "valid_targets_mean": 3742.5, + "valid_targets_min": 2285 + }, + { + "epoch": 3.691537761601456, + "grad_norm": 0.5840261880984583, + "learning_rate": 2.1603074740859534e-05, + "loss": 0.2319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14113259315490723, + "step": 2030, + "valid_targets_mean": 4586.0, + "valid_targets_min": 2810 + }, + { + "epoch": 3.700636942675159, + "grad_norm": 0.6175870857155759, + "learning_rate": 2.1512683841509982e-05, + "loss": 0.242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1321922242641449, + "step": 2035, + "valid_targets_mean": 4026.5, + "valid_targets_min": 1495 + }, + { + "epoch": 3.7097361237488626, + "grad_norm": 0.6077331223001471, + "learning_rate": 2.1422261855025357e-05, + "loss": 0.2382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13017022609710693, + "step": 2040, + "valid_targets_mean": 3657.9, + "valid_targets_min": 938 + }, + { + "epoch": 3.7188353048225657, + "grad_norm": 0.5264657471184423, + "learning_rate": 2.133181063966608e-05, + "loss": 0.2257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1340341866016388, + "step": 2045, + "valid_targets_mean": 5327.8, + "valid_targets_min": 2985 + }, + { + "epoch": 3.7279344858962693, + "grad_norm": 0.47835522815598563, + "learning_rate": 2.1241332054293243e-05, + "loss": 0.2368, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10275916755199432, + "step": 2050, + "valid_targets_mean": 4582.0, + "valid_targets_min": 1621 + }, + { + "epoch": 3.737033666969973, + "grad_norm": 0.6090285873450105, + "learning_rate": 2.115082795833044e-05, + "loss": 0.2339, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13608521223068237, + "step": 2055, + "valid_targets_mean": 4380.6, + "valid_targets_min": 1129 + }, + { + "epoch": 3.746132848043676, + "grad_norm": 0.6171346398603881, + "learning_rate": 2.1060300211725496e-05, + "loss": 0.2266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06999486684799194, + "step": 2060, + "valid_targets_mean": 2120.9, + "valid_targets_min": 264 + }, + { + "epoch": 3.7552320291173795, + "grad_norm": 0.5078775221738135, + "learning_rate": 2.096975067491233e-05, + "loss": 0.2142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07993730902671814, + "step": 2065, + "valid_targets_mean": 3289.5, + "valid_targets_min": 1058 + }, + { + "epoch": 3.7643312101910826, + "grad_norm": 0.5666497704776816, + "learning_rate": 2.087918120877263e-05, + "loss": 0.2361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10957177728414536, + "step": 2070, + "valid_targets_mean": 3678.8, + "valid_targets_min": 2266 + }, + { + "epoch": 3.773430391264786, + "grad_norm": 0.6118108100047842, + "learning_rate": 2.0788593674597663e-05, + "loss": 0.2499, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12245075404644012, + "step": 2075, + "valid_targets_mean": 4218.4, + "valid_targets_min": 2510 + }, + { + "epoch": 3.7825295723384897, + "grad_norm": 0.568781777071965, + "learning_rate": 2.0697989934050025e-05, + "loss": 0.2336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12045073509216309, + "step": 2080, + "valid_targets_mean": 4220.2, + "valid_targets_min": 788 + }, + { + "epoch": 3.791628753412193, + "grad_norm": 0.5974784123285087, + "learning_rate": 2.0607371849125345e-05, + "loss": 0.2272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1365862637758255, + "step": 2085, + "valid_targets_mean": 4663.0, + "valid_targets_min": 2105 + }, + { + "epoch": 3.8007279344858964, + "grad_norm": 0.6015774380183622, + "learning_rate": 2.0516741282114062e-05, + "loss": 0.2324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11920566111803055, + "step": 2090, + "valid_targets_mean": 4117.2, + "valid_targets_min": 918 + }, + { + "epoch": 3.8098271155595995, + "grad_norm": 0.5784731444927348, + "learning_rate": 2.0426100095563132e-05, + "loss": 0.2286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11204256862401962, + "step": 2095, + "valid_targets_mean": 3581.0, + "valid_targets_min": 1857 + }, + { + "epoch": 3.818926296633303, + "grad_norm": 0.6283724815642847, + "learning_rate": 2.0335450152237742e-05, + "loss": 0.2348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12869086861610413, + "step": 2100, + "valid_targets_mean": 3699.2, + "valid_targets_min": 323 + }, + { + "epoch": 3.8280254777070066, + "grad_norm": 0.642415100550998, + "learning_rate": 2.0244793315083043e-05, + "loss": 0.2458, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14838621020317078, + "step": 2105, + "valid_targets_mean": 4103.6, + "valid_targets_min": 1857 + }, + { + "epoch": 3.8371246587807097, + "grad_norm": 0.611330076891204, + "learning_rate": 2.0154131447185876e-05, + "loss": 0.2371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1300453096628189, + "step": 2110, + "valid_targets_mean": 4131.9, + "valid_targets_min": 2222 + }, + { + "epoch": 3.846223839854413, + "grad_norm": 0.6382028796568355, + "learning_rate": 2.0063466411736447e-05, + "loss": 0.2385, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12640610337257385, + "step": 2115, + "valid_targets_mean": 3200.0, + "valid_targets_min": 1079 + }, + { + "epoch": 3.8553230209281164, + "grad_norm": 0.592317547174707, + "learning_rate": 1.997280007199008e-05, + "loss": 0.2202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14620466530323029, + "step": 2120, + "valid_targets_mean": 4840.1, + "valid_targets_min": 2407 + }, + { + "epoch": 3.86442220200182, + "grad_norm": 0.5955652421896328, + "learning_rate": 1.9882134291228877e-05, + "loss": 0.2247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12693578004837036, + "step": 2125, + "valid_targets_mean": 3716.4, + "valid_targets_min": 2313 + }, + { + "epoch": 3.873521383075523, + "grad_norm": 0.45425164985864347, + "learning_rate": 1.9791470932723486e-05, + "loss": 0.2363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06094308942556381, + "step": 2130, + "valid_targets_mean": 2929.5, + "valid_targets_min": 406 + }, + { + "epoch": 3.8826205641492266, + "grad_norm": 0.5746326658191223, + "learning_rate": 1.9700811859694734e-05, + "loss": 0.2153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12974999845027924, + "step": 2135, + "valid_targets_mean": 4884.5, + "valid_targets_min": 1969 + }, + { + "epoch": 3.8917197452229297, + "grad_norm": 0.6158150136679469, + "learning_rate": 1.961015893527541e-05, + "loss": 0.24, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1246422678232193, + "step": 2140, + "valid_targets_mean": 3744.6, + "valid_targets_min": 271 + }, + { + "epoch": 3.9008189262966333, + "grad_norm": 0.5134096098504591, + "learning_rate": 1.9519514022471933e-05, + "loss": 0.224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10631200671195984, + "step": 2145, + "valid_targets_mean": 5187.0, + "valid_targets_min": 2402 + }, + { + "epoch": 3.909918107370337, + "grad_norm": 0.6462071879791262, + "learning_rate": 1.942887898412608e-05, + "loss": 0.2441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10570937395095825, + "step": 2150, + "valid_targets_mean": 3218.5, + "valid_targets_min": 693 + }, + { + "epoch": 3.91901728844404, + "grad_norm": 0.6127776684285441, + "learning_rate": 1.9338255682876682e-05, + "loss": 0.2296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09252439439296722, + "step": 2155, + "valid_targets_mean": 3153.1, + "valid_targets_min": 293 + }, + { + "epoch": 3.9281164695177435, + "grad_norm": 0.7186098539492584, + "learning_rate": 1.924764598112138e-05, + "loss": 0.2252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08831615746021271, + "step": 2160, + "valid_targets_mean": 2062.8, + "valid_targets_min": 311 + }, + { + "epoch": 3.9372156505914466, + "grad_norm": 0.6015779882414538, + "learning_rate": 1.9157051740978326e-05, + "loss": 0.2304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08883961290121078, + "step": 2165, + "valid_targets_mean": 2913.9, + "valid_targets_min": 866 + }, + { + "epoch": 3.94631483166515, + "grad_norm": 0.7661092923072249, + "learning_rate": 1.9066474824247913e-05, + "loss": 0.2451, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11353622376918793, + "step": 2170, + "valid_targets_mean": 4695.0, + "valid_targets_min": 2684 + }, + { + "epoch": 3.9554140127388537, + "grad_norm": 0.6358343088472361, + "learning_rate": 1.8975917092374542e-05, + "loss": 0.2363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08747850358486176, + "step": 2175, + "valid_targets_mean": 2095.9, + "valid_targets_min": 246 + }, + { + "epoch": 3.964513193812557, + "grad_norm": 0.6356945476266306, + "learning_rate": 1.888538040640831e-05, + "loss": 0.2255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10770977288484573, + "step": 2180, + "valid_targets_mean": 3358.5, + "valid_targets_min": 1388 + }, + { + "epoch": 3.9736123748862604, + "grad_norm": 0.4680125277198997, + "learning_rate": 1.8794866626966834e-05, + "loss": 0.2267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1037270575761795, + "step": 2185, + "valid_targets_mean": 6218.2, + "valid_targets_min": 979 + }, + { + "epoch": 3.9827115559599635, + "grad_norm": 0.561916925920865, + "learning_rate": 1.8704377614196963e-05, + "loss": 0.2209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12491119652986526, + "step": 2190, + "valid_targets_mean": 3551.0, + "valid_targets_min": 2256 + }, + { + "epoch": 3.991810737033667, + "grad_norm": 0.6171678094348222, + "learning_rate": 1.8613915227736584e-05, + "loss": 0.2489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13132691383361816, + "step": 2195, + "valid_targets_mean": 3616.2, + "valid_targets_min": 1897 + }, + { + "epoch": 4.0, + "grad_norm": 0.8713646274215626, + "learning_rate": 1.852348132667635e-05, + "loss": 0.2441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2565116882324219, + "step": 2200, + "valid_targets_mean": 3516.8, + "valid_targets_min": 820 + }, + { + "epoch": 4.0090991810737036, + "grad_norm": 0.7610478323964359, + "learning_rate": 1.843307776952155e-05, + "loss": 0.2054, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1043773666024208, + "step": 2205, + "valid_targets_mean": 2845.2, + "valid_targets_min": 419 + }, + { + "epoch": 4.018198362147407, + "grad_norm": 0.6999568780697065, + "learning_rate": 1.834270641415386e-05, + "loss": 0.2276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09883319586515427, + "step": 2210, + "valid_targets_mean": 3011.8, + "valid_targets_min": 191 + }, + { + "epoch": 4.02729754322111, + "grad_norm": 0.639663595129245, + "learning_rate": 1.8252369117793172e-05, + "loss": 0.2078, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10715536773204803, + "step": 2215, + "valid_targets_mean": 3646.4, + "valid_targets_min": 705 + }, + { + "epoch": 4.036396724294813, + "grad_norm": 0.6155902648302152, + "learning_rate": 1.8162067736959454e-05, + "loss": 0.2153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08828684687614441, + "step": 2220, + "valid_targets_mean": 3454.4, + "valid_targets_min": 272 + }, + { + "epoch": 4.045495905368517, + "grad_norm": 0.6985946692476135, + "learning_rate": 1.8071804127434545e-05, + "loss": 0.2098, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11581951379776001, + "step": 2225, + "valid_targets_mean": 2963.2, + "valid_targets_min": 343 + }, + { + "epoch": 4.05459508644222, + "grad_norm": 0.6305907311339457, + "learning_rate": 1.7981580144224066e-05, + "loss": 0.2029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1013331338763237, + "step": 2230, + "valid_targets_mean": 3289.9, + "valid_targets_min": 1675 + }, + { + "epoch": 4.063694267515924, + "grad_norm": 0.5757227023580752, + "learning_rate": 1.7891397641519272e-05, + "loss": 0.1952, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08844532072544098, + "step": 2235, + "valid_targets_mean": 3442.2, + "valid_targets_min": 462 + }, + { + "epoch": 4.072793448589627, + "grad_norm": 0.6815233873910455, + "learning_rate": 1.7801258472658964e-05, + "loss": 0.2011, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11015966534614563, + "step": 2240, + "valid_targets_mean": 3494.4, + "valid_targets_min": 1363 + }, + { + "epoch": 4.08189262966333, + "grad_norm": 0.5289726572096387, + "learning_rate": 1.7711164490091365e-05, + "loss": 0.2032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09975562989711761, + "step": 2245, + "valid_targets_mean": 5850.0, + "valid_targets_min": 2197 + }, + { + "epoch": 4.090991810737034, + "grad_norm": 0.6366132972111163, + "learning_rate": 1.7621117545336098e-05, + "loss": 0.2079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10522039234638214, + "step": 2250, + "valid_targets_mean": 3253.4, + "valid_targets_min": 1322 + }, + { + "epoch": 4.100090991810737, + "grad_norm": 0.718026894601455, + "learning_rate": 1.7531119488946107e-05, + "loss": 0.2149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15831488370895386, + "step": 2255, + "valid_targets_mean": 4147.0, + "valid_targets_min": 2306 + }, + { + "epoch": 4.10919017288444, + "grad_norm": 0.6415953800163945, + "learning_rate": 1.7441172170469634e-05, + "loss": 0.208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12810300290584564, + "step": 2260, + "valid_targets_mean": 4011.8, + "valid_targets_min": 1456 + }, + { + "epoch": 4.1182893539581436, + "grad_norm": 0.6763238164852513, + "learning_rate": 1.7351277438412197e-05, + "loss": 0.2117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09420953691005707, + "step": 2265, + "valid_targets_mean": 2700.8, + "valid_targets_min": 443 + }, + { + "epoch": 4.127388535031847, + "grad_norm": 0.6513239865451258, + "learning_rate": 1.726143714019862e-05, + "loss": 0.2098, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1157243549823761, + "step": 2270, + "valid_targets_mean": 3612.5, + "valid_targets_min": 258 + }, + { + "epoch": 4.136487716105551, + "grad_norm": 0.669251316448323, + "learning_rate": 1.7171653122135065e-05, + "loss": 0.2144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11020094156265259, + "step": 2275, + "valid_targets_mean": 3440.4, + "valid_targets_min": 411 + }, + { + "epoch": 4.145586897179254, + "grad_norm": 0.6459739960236776, + "learning_rate": 1.708192722937106e-05, + "loss": 0.2024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1289825737476349, + "step": 2280, + "valid_targets_mean": 4582.9, + "valid_targets_min": 1766 + }, + { + "epoch": 4.154686078252957, + "grad_norm": 0.5874955425165647, + "learning_rate": 1.6992261305861635e-05, + "loss": 0.2041, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0869600772857666, + "step": 2285, + "valid_targets_mean": 3355.1, + "valid_targets_min": 1226 + }, + { + "epoch": 4.16378525932666, + "grad_norm": 0.5381128847881688, + "learning_rate": 1.6902657194329357e-05, + "loss": 0.2016, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10084788501262665, + "step": 2290, + "valid_targets_mean": 4416.6, + "valid_targets_min": 1936 + }, + { + "epoch": 4.172884440400364, + "grad_norm": 0.6222544662905637, + "learning_rate": 1.681311673622651e-05, + "loss": 0.2046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07266297936439514, + "step": 2295, + "valid_targets_mean": 2939.8, + "valid_targets_min": 291 + }, + { + "epoch": 4.1819836214740675, + "grad_norm": 0.6319977100323101, + "learning_rate": 1.6723641771697246e-05, + "loss": 0.2067, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1194155216217041, + "step": 2300, + "valid_targets_mean": 3916.6, + "valid_targets_min": 518 + }, + { + "epoch": 4.191082802547771, + "grad_norm": 0.591815007892247, + "learning_rate": 1.663423413953976e-05, + "loss": 0.2171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10994294285774231, + "step": 2305, + "valid_targets_mean": 4506.1, + "valid_targets_min": 1525 + }, + { + "epoch": 4.200181983621474, + "grad_norm": 0.6379876571697218, + "learning_rate": 1.6544895677168483e-05, + "loss": 0.2097, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08801357448101044, + "step": 2310, + "valid_targets_mean": 3171.1, + "valid_targets_min": 502 + }, + { + "epoch": 4.209281164695177, + "grad_norm": 0.6711685746669366, + "learning_rate": 1.6455628220576357e-05, + "loss": 0.2176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0922049880027771, + "step": 2315, + "valid_targets_mean": 3022.6, + "valid_targets_min": 335 + }, + { + "epoch": 4.218380345768881, + "grad_norm": 0.6513788938326086, + "learning_rate": 1.6366433604297072e-05, + "loss": 0.2042, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1115066409111023, + "step": 2320, + "valid_targets_mean": 4586.1, + "valid_targets_min": 683 + }, + { + "epoch": 4.227479526842584, + "grad_norm": 0.6872049756707485, + "learning_rate": 1.62773136613674e-05, + "loss": 0.2183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11089625954627991, + "step": 2325, + "valid_targets_mean": 3216.5, + "valid_targets_min": 1052 + }, + { + "epoch": 4.236578707916287, + "grad_norm": 0.6140058427675289, + "learning_rate": 1.6188270223289483e-05, + "loss": 0.2024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09252717345952988, + "step": 2330, + "valid_targets_mean": 3930.9, + "valid_targets_min": 2630 + }, + { + "epoch": 4.245677888989991, + "grad_norm": 0.5704927973308062, + "learning_rate": 1.609930511999321e-05, + "loss": 0.2008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09383821487426758, + "step": 2335, + "valid_targets_mean": 3737.4, + "valid_targets_min": 1864 + }, + { + "epoch": 4.254777070063694, + "grad_norm": 0.6500476887222959, + "learning_rate": 1.6010420179798623e-05, + "loss": 0.2221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10442210733890533, + "step": 2340, + "valid_targets_mean": 3499.1, + "valid_targets_min": 1180 + }, + { + "epoch": 4.263876251137398, + "grad_norm": 0.6077011125705658, + "learning_rate": 1.5921617229378338e-05, + "loss": 0.2113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09526053071022034, + "step": 2345, + "valid_targets_mean": 3782.8, + "valid_targets_min": 1700 + }, + { + "epoch": 4.272975432211101, + "grad_norm": 0.6716438775104191, + "learning_rate": 1.583289809372e-05, + "loss": 0.2147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10904216766357422, + "step": 2350, + "valid_targets_mean": 3186.6, + "valid_targets_min": 293 + }, + { + "epoch": 4.282074613284804, + "grad_norm": 0.6442033894228911, + "learning_rate": 1.5744264596088763e-05, + "loss": 0.2127, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09692924469709396, + "step": 2355, + "valid_targets_mean": 3195.0, + "valid_targets_min": 1919 + }, + { + "epoch": 4.2911737943585075, + "grad_norm": 0.605259860092484, + "learning_rate": 1.5655718557989848e-05, + "loss": 0.2081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12487993389368057, + "step": 2360, + "valid_targets_mean": 4756.9, + "valid_targets_min": 174 + }, + { + "epoch": 4.300272975432211, + "grad_norm": 0.6012856431758457, + "learning_rate": 1.5567261799131102e-05, + "loss": 0.2041, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11706040799617767, + "step": 2365, + "valid_targets_mean": 4453.1, + "valid_targets_min": 1956 + }, + { + "epoch": 4.309372156505915, + "grad_norm": 0.6662252762890644, + "learning_rate": 1.5478896137385584e-05, + "loss": 0.2046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11574854701757431, + "step": 2370, + "valid_targets_mean": 4047.9, + "valid_targets_min": 1240 + }, + { + "epoch": 4.318471337579618, + "grad_norm": 0.6949646893771946, + "learning_rate": 1.5390623388754232e-05, + "loss": 0.2171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13453787565231323, + "step": 2375, + "valid_targets_mean": 3733.9, + "valid_targets_min": 246 + }, + { + "epoch": 4.327570518653321, + "grad_norm": 0.6364584731603552, + "learning_rate": 1.5302445367328507e-05, + "loss": 0.2065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09400258213281631, + "step": 2380, + "valid_targets_mean": 3628.4, + "valid_targets_min": 2326 + }, + { + "epoch": 4.336669699727024, + "grad_norm": 0.6981094613137421, + "learning_rate": 1.5214363885253156e-05, + "loss": 0.2029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11428219079971313, + "step": 2385, + "valid_targets_mean": 3146.9, + "valid_targets_min": 1342 + }, + { + "epoch": 4.345768880800728, + "grad_norm": 0.5204287394542879, + "learning_rate": 1.5126380752688934e-05, + "loss": 0.2029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10040925443172455, + "step": 2390, + "valid_targets_mean": 5846.6, + "valid_targets_min": 1070 + }, + { + "epoch": 4.3548680618744315, + "grad_norm": 0.6916260441992553, + "learning_rate": 1.503849777777543e-05, + "loss": 0.2009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12089954316616058, + "step": 2395, + "valid_targets_mean": 3570.8, + "valid_targets_min": 174 + }, + { + "epoch": 4.363967242948135, + "grad_norm": 0.6330493436940652, + "learning_rate": 1.4950716766593872e-05, + "loss": 0.2047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11220642179250717, + "step": 2400, + "valid_targets_mean": 4110.6, + "valid_targets_min": 2053 + }, + { + "epoch": 4.373066424021838, + "grad_norm": 0.721664357235738, + "learning_rate": 1.4863039523130054e-05, + "loss": 0.2024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1065458357334137, + "step": 2405, + "valid_targets_mean": 3365.8, + "valid_targets_min": 643 + }, + { + "epoch": 4.382165605095541, + "grad_norm": 0.6645783192296282, + "learning_rate": 1.4775467849237234e-05, + "loss": 0.2137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09038594365119934, + "step": 2410, + "valid_targets_mean": 2629.5, + "valid_targets_min": 1128 + }, + { + "epoch": 4.391264786169245, + "grad_norm": 0.6881468289493442, + "learning_rate": 1.468800354459912e-05, + "loss": 0.2165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10560519993305206, + "step": 2415, + "valid_targets_mean": 2953.2, + "valid_targets_min": 1138 + }, + { + "epoch": 4.400363967242948, + "grad_norm": 0.6931148166696036, + "learning_rate": 1.4600648406692863e-05, + "loss": 0.2123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11844750493764877, + "step": 2420, + "valid_targets_mean": 3428.8, + "valid_targets_min": 1667 + }, + { + "epoch": 4.409463148316651, + "grad_norm": 0.6526904247649924, + "learning_rate": 1.451340423075214e-05, + "loss": 0.2079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12365461885929108, + "step": 2425, + "valid_targets_mean": 3943.5, + "valid_targets_min": 384 + }, + { + "epoch": 4.418562329390355, + "grad_norm": 0.6996140130562708, + "learning_rate": 1.4426272809730248e-05, + "loss": 0.2163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09634485095739365, + "step": 2430, + "valid_targets_mean": 3138.9, + "valid_targets_min": 348 + }, + { + "epoch": 4.427661510464058, + "grad_norm": 0.737117477945688, + "learning_rate": 1.433925593426326e-05, + "loss": 0.2152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10732056200504303, + "step": 2435, + "valid_targets_mean": 3061.6, + "valid_targets_min": 1399 + }, + { + "epoch": 4.436760691537762, + "grad_norm": 0.7083417741605627, + "learning_rate": 1.4252355392633237e-05, + "loss": 0.2129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10135871171951294, + "step": 2440, + "valid_targets_mean": 2752.9, + "valid_targets_min": 1729 + }, + { + "epoch": 4.445859872611465, + "grad_norm": 0.6487462790943196, + "learning_rate": 1.4165572970731435e-05, + "loss": 0.208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11768800020217896, + "step": 2445, + "valid_targets_mean": 3779.2, + "valid_targets_min": 1305 + }, + { + "epoch": 4.454959053685168, + "grad_norm": 0.6223053953705814, + "learning_rate": 1.4078910452021664e-05, + "loss": 0.2145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10405602306127548, + "step": 2450, + "valid_targets_mean": 3966.5, + "valid_targets_min": 1867 + }, + { + "epoch": 4.4640582347588715, + "grad_norm": 0.5530441312033447, + "learning_rate": 1.3992369617503594e-05, + "loss": 0.2071, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07695738971233368, + "step": 2455, + "valid_targets_mean": 3257.8, + "valid_targets_min": 1033 + }, + { + "epoch": 4.473157415832575, + "grad_norm": 0.4801472386964888, + "learning_rate": 1.3905952245676173e-05, + "loss": 0.2026, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07861844450235367, + "step": 2460, + "valid_targets_mean": 5649.1, + "valid_targets_min": 319 + }, + { + "epoch": 4.482256596906279, + "grad_norm": 0.539148660231581, + "learning_rate": 1.3819660112501054e-05, + "loss": 0.1955, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08029848337173462, + "step": 2465, + "valid_targets_mean": 3955.0, + "valid_targets_min": 479 + }, + { + "epoch": 4.491355777979982, + "grad_norm": 0.6401695191719436, + "learning_rate": 1.3733494991366128e-05, + "loss": 0.2162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09944336116313934, + "step": 2470, + "valid_targets_mean": 3213.2, + "valid_targets_min": 1200 + }, + { + "epoch": 4.500454959053685, + "grad_norm": 0.5262655333217622, + "learning_rate": 1.364745865304906e-05, + "loss": 0.2122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09738464653491974, + "step": 2475, + "valid_targets_mean": 4905.0, + "valid_targets_min": 1553 + }, + { + "epoch": 4.509554140127388, + "grad_norm": 0.6146356778717295, + "learning_rate": 1.3561552865680899e-05, + "loss": 0.2011, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10408352315425873, + "step": 2480, + "valid_targets_mean": 3832.9, + "valid_targets_min": 2663 + }, + { + "epoch": 4.518653321201092, + "grad_norm": 0.7659267914126828, + "learning_rate": 1.3475779394709754e-05, + "loss": 0.2134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1010979562997818, + "step": 2485, + "valid_targets_mean": 2808.4, + "valid_targets_min": 278 + }, + { + "epoch": 4.5277525022747955, + "grad_norm": 0.510564342275182, + "learning_rate": 1.3390140002864481e-05, + "loss": 0.1926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08257513493299484, + "step": 2490, + "valid_targets_mean": 4374.2, + "valid_targets_min": 288 + }, + { + "epoch": 4.536851683348498, + "grad_norm": 0.6523581194287932, + "learning_rate": 1.3304636450118495e-05, + "loss": 0.2065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08213072270154953, + "step": 2495, + "valid_targets_mean": 2523.9, + "valid_targets_min": 296 + }, + { + "epoch": 4.545950864422202, + "grad_norm": 0.5793152107148682, + "learning_rate": 1.3219270493653587e-05, + "loss": 0.2016, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08051525056362152, + "step": 2500, + "valid_targets_mean": 3704.6, + "valid_targets_min": 1690 + }, + { + "epoch": 4.555050045495905, + "grad_norm": 0.6131550258175434, + "learning_rate": 1.3134043887823807e-05, + "loss": 0.207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09287137538194656, + "step": 2505, + "valid_targets_mean": 3077.4, + "valid_targets_min": 1546 + }, + { + "epoch": 4.564149226569609, + "grad_norm": 0.7031749159784869, + "learning_rate": 1.3048958384119397e-05, + "loss": 0.2255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11600330471992493, + "step": 2510, + "valid_targets_mean": 3279.9, + "valid_targets_min": 234 + }, + { + "epoch": 4.573248407643312, + "grad_norm": 0.7014191302183409, + "learning_rate": 1.2964015731130836e-05, + "loss": 0.2081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1107967346906662, + "step": 2515, + "valid_targets_mean": 2881.2, + "valid_targets_min": 303 + }, + { + "epoch": 4.582347588717015, + "grad_norm": 0.6277578775645954, + "learning_rate": 1.2879217674512865e-05, + "loss": 0.2151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09785157442092896, + "step": 2520, + "valid_targets_mean": 3342.5, + "valid_targets_min": 1204 + }, + { + "epoch": 4.591446769790719, + "grad_norm": 0.5777297826894398, + "learning_rate": 1.279456595694864e-05, + "loss": 0.2064, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09805427491664886, + "step": 2525, + "valid_targets_mean": 3845.9, + "valid_targets_min": 2171 + }, + { + "epoch": 4.600545950864422, + "grad_norm": 0.6598092832045845, + "learning_rate": 1.2710062318113887e-05, + "loss": 0.2154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11755596101284027, + "step": 2530, + "valid_targets_mean": 3864.6, + "valid_targets_min": 1366 + }, + { + "epoch": 4.609645131938126, + "grad_norm": 0.6671458531470443, + "learning_rate": 1.2625708494641188e-05, + "loss": 0.2144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1005607545375824, + "step": 2535, + "valid_targets_mean": 3189.8, + "valid_targets_min": 281 + }, + { + "epoch": 4.618744313011829, + "grad_norm": 0.6140756298428148, + "learning_rate": 1.2541506220084262e-05, + "loss": 0.2042, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09862984716892242, + "step": 2540, + "valid_targets_mean": 3352.1, + "valid_targets_min": 832 + }, + { + "epoch": 4.627843494085532, + "grad_norm": 0.6276792709672552, + "learning_rate": 1.2457457224882356e-05, + "loss": 0.2156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12656915187835693, + "step": 2545, + "valid_targets_mean": 4384.1, + "valid_targets_min": 2344 + }, + { + "epoch": 4.6369426751592355, + "grad_norm": 0.5611724864069938, + "learning_rate": 1.237356323632468e-05, + "loss": 0.2194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08815105259418488, + "step": 2550, + "valid_targets_mean": 3377.1, + "valid_targets_min": 844 + }, + { + "epoch": 4.646041856232939, + "grad_norm": 0.5969416948846179, + "learning_rate": 1.2289825978514882e-05, + "loss": 0.1953, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1119343563914299, + "step": 2555, + "valid_targets_mean": 5220.2, + "valid_targets_min": 224 + }, + { + "epoch": 4.655141037306643, + "grad_norm": 0.6312969359281144, + "learning_rate": 1.2206247172335662e-05, + "loss": 0.2027, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10778310894966125, + "step": 2560, + "valid_targets_mean": 3573.1, + "valid_targets_min": 1281 + }, + { + "epoch": 4.664240218380346, + "grad_norm": 0.6080986613871091, + "learning_rate": 1.2122828535413378e-05, + "loss": 0.2196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09418709576129913, + "step": 2565, + "valid_targets_mean": 4105.8, + "valid_targets_min": 983 + }, + { + "epoch": 4.673339399454049, + "grad_norm": 0.5115892671372317, + "learning_rate": 1.2039571782082762e-05, + "loss": 0.1972, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09335912019014359, + "step": 2570, + "valid_targets_mean": 5629.6, + "valid_targets_min": 1092 + }, + { + "epoch": 4.682438580527752, + "grad_norm": 0.5143277504292695, + "learning_rate": 1.1956478623351652e-05, + "loss": 0.2062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08968564122915268, + "step": 2575, + "valid_targets_mean": 3954.0, + "valid_targets_min": 266 + }, + { + "epoch": 4.691537761601456, + "grad_norm": 0.5350470272834709, + "learning_rate": 1.187355076686589e-05, + "loss": 0.2192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09997965395450592, + "step": 2580, + "valid_targets_mean": 4128.8, + "valid_targets_min": 1291 + }, + { + "epoch": 4.7006369426751595, + "grad_norm": 0.575219861090753, + "learning_rate": 1.1790789916874172e-05, + "loss": 0.2052, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09727989137172699, + "step": 2585, + "valid_targets_mean": 4808.1, + "valid_targets_min": 1380 + }, + { + "epoch": 4.709736123748863, + "grad_norm": 0.4802440243107621, + "learning_rate": 1.1708197774193055e-05, + "loss": 0.2034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06863594055175781, + "step": 2590, + "valid_targets_mean": 3348.6, + "valid_targets_min": 296 + }, + { + "epoch": 4.718835304822566, + "grad_norm": 0.6089877547455099, + "learning_rate": 1.1625776036172006e-05, + "loss": 0.2166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11346261203289032, + "step": 2595, + "valid_targets_mean": 4466.2, + "valid_targets_min": 3012 + }, + { + "epoch": 4.727934485896269, + "grad_norm": 0.6172209994895129, + "learning_rate": 1.1543526396658475e-05, + "loss": 0.2217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0995795875787735, + "step": 2600, + "valid_targets_mean": 3648.6, + "valid_targets_min": 1229 + }, + { + "epoch": 4.737033666969973, + "grad_norm": 0.7217327753696113, + "learning_rate": 1.1461450545963167e-05, + "loss": 0.2136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09528639167547226, + "step": 2605, + "valid_targets_mean": 2344.5, + "valid_targets_min": 882 + }, + { + "epoch": 4.746132848043676, + "grad_norm": 0.5818659549176181, + "learning_rate": 1.137955017082521e-05, + "loss": 0.2093, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12783287465572357, + "step": 2610, + "valid_targets_mean": 5329.2, + "valid_targets_min": 2962 + }, + { + "epoch": 4.755232029117379, + "grad_norm": 0.6354180951281558, + "learning_rate": 1.1297826954377587e-05, + "loss": 0.2091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10776399075984955, + "step": 2615, + "valid_targets_mean": 3988.8, + "valid_targets_min": 2456 + }, + { + "epoch": 4.764331210191083, + "grad_norm": 0.5491068184424446, + "learning_rate": 1.1216282576112436e-05, + "loss": 0.2153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08702103793621063, + "step": 2620, + "valid_targets_mean": 4804.8, + "valid_targets_min": 1155 + }, + { + "epoch": 4.773430391264786, + "grad_norm": 0.6451125893227769, + "learning_rate": 1.1134918711846651e-05, + "loss": 0.197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08675537258386612, + "step": 2625, + "valid_targets_mean": 3419.2, + "valid_targets_min": 1706 + }, + { + "epoch": 4.78252957233849, + "grad_norm": 0.47554214997356037, + "learning_rate": 1.1053737033687346e-05, + "loss": 0.1997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11160045862197876, + "step": 2630, + "valid_targets_mean": 6470.4, + "valid_targets_min": 2796 + }, + { + "epoch": 4.791628753412192, + "grad_norm": 0.6647977556732426, + "learning_rate": 1.097273920999757e-05, + "loss": 0.2081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10980549454689026, + "step": 2635, + "valid_targets_mean": 3851.6, + "valid_targets_min": 1611 + }, + { + "epoch": 4.800727934485896, + "grad_norm": 0.7333185609047528, + "learning_rate": 1.0891926905361948e-05, + "loss": 0.206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09020430594682693, + "step": 2640, + "valid_targets_mean": 2755.9, + "valid_targets_min": 1097 + }, + { + "epoch": 4.8098271155595995, + "grad_norm": 0.8175132256728149, + "learning_rate": 1.081130178055251e-05, + "loss": 0.2003, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10943802446126938, + "step": 2645, + "valid_targets_mean": 3038.6, + "valid_targets_min": 649 + }, + { + "epoch": 4.818926296633303, + "grad_norm": 0.5704955810867999, + "learning_rate": 1.0730865492494593e-05, + "loss": 0.2025, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0999983698129654, + "step": 2650, + "valid_targets_mean": 3916.9, + "valid_targets_min": 530 + }, + { + "epoch": 4.828025477707007, + "grad_norm": 0.5812062616005016, + "learning_rate": 1.0650619694232704e-05, + "loss": 0.197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08837728202342987, + "step": 2655, + "valid_targets_mean": 4211.1, + "valid_targets_min": 1487 + }, + { + "epoch": 4.837124658780709, + "grad_norm": 0.6858967753840393, + "learning_rate": 1.057056603489665e-05, + "loss": 0.2221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08689923584461212, + "step": 2660, + "valid_targets_mean": 2381.0, + "valid_targets_min": 734 + }, + { + "epoch": 4.846223839854413, + "grad_norm": 0.5225583496996413, + "learning_rate": 1.0490706159667534e-05, + "loss": 0.2016, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10315491259098053, + "step": 2665, + "valid_targets_mean": 5551.4, + "valid_targets_min": 261 + }, + { + "epoch": 4.855323020928116, + "grad_norm": 0.6353564860251033, + "learning_rate": 1.0411041709744063e-05, + "loss": 0.2155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10783381760120392, + "step": 2670, + "valid_targets_mean": 3868.9, + "valid_targets_min": 1684 + }, + { + "epoch": 4.86442220200182, + "grad_norm": 0.5953265038631231, + "learning_rate": 1.0331574322308722e-05, + "loss": 0.2296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1189485713839531, + "step": 2675, + "valid_targets_mean": 5104.8, + "valid_targets_min": 3054 + }, + { + "epoch": 4.8735213830755235, + "grad_norm": 0.6415429927593447, + "learning_rate": 1.0252305630494201e-05, + "loss": 0.2154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12192051112651825, + "step": 2680, + "valid_targets_mean": 4007.9, + "valid_targets_min": 2307 + }, + { + "epoch": 4.882620564149226, + "grad_norm": 0.6184392622335321, + "learning_rate": 1.0173237263349776e-05, + "loss": 0.218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11877385526895523, + "step": 2685, + "valid_targets_mean": 4085.5, + "valid_targets_min": 2538 + }, + { + "epoch": 4.89171974522293, + "grad_norm": 0.7931778991412655, + "learning_rate": 1.0094370845807857e-05, + "loss": 0.2072, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08358285576105118, + "step": 2690, + "valid_targets_mean": 3763.9, + "valid_targets_min": 862 + }, + { + "epoch": 4.900818926296633, + "grad_norm": 0.6311410360214902, + "learning_rate": 1.001570799865061e-05, + "loss": 0.2087, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11487400531768799, + "step": 2695, + "valid_targets_mean": 4217.5, + "valid_targets_min": 1774 + }, + { + "epoch": 4.909918107370337, + "grad_norm": 0.7140049951695571, + "learning_rate": 9.937250338476607e-06, + "loss": 0.2025, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10089785605669022, + "step": 2700, + "valid_targets_mean": 3144.9, + "valid_targets_min": 270 + }, + { + "epoch": 4.91901728844404, + "grad_norm": 0.6106766257514653, + "learning_rate": 9.858999477667656e-06, + "loss": 0.2104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11438564956188202, + "step": 2705, + "valid_targets_mean": 4361.9, + "valid_targets_min": 2272 + }, + { + "epoch": 4.928116469517743, + "grad_norm": 0.6079273725174243, + "learning_rate": 9.780957024355591e-06, + "loss": 0.2153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12418875843286514, + "step": 2710, + "valid_targets_mean": 4085.1, + "valid_targets_min": 1507 + }, + { + "epoch": 4.937215650591447, + "grad_norm": 0.6640440215081552, + "learning_rate": 9.703124582389312e-06, + "loss": 0.2035, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10496009886264801, + "step": 2715, + "valid_targets_mean": 3474.0, + "valid_targets_min": 623 + }, + { + "epoch": 4.94631483166515, + "grad_norm": 0.5074954317268019, + "learning_rate": 9.62550375130175e-06, + "loss": 0.2095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11362551897764206, + "step": 2720, + "valid_targets_mean": 6144.5, + "valid_targets_min": 2101 + }, + { + "epoch": 4.955414012738854, + "grad_norm": 0.6141137566846254, + "learning_rate": 9.548096126277058e-06, + "loss": 0.2038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10736232250928879, + "step": 2725, + "valid_targets_mean": 4007.6, + "valid_targets_min": 782 + }, + { + "epoch": 4.964513193812557, + "grad_norm": 0.7037671666670862, + "learning_rate": 9.470903298117744e-06, + "loss": 0.2149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08674223721027374, + "step": 2730, + "valid_targets_mean": 2620.1, + "valid_targets_min": 503 + }, + { + "epoch": 4.97361237488626, + "grad_norm": 0.6720492919968285, + "learning_rate": 9.393926853212083e-06, + "loss": 0.222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09978747367858887, + "step": 2735, + "valid_targets_mean": 2898.4, + "valid_targets_min": 312 + }, + { + "epoch": 4.9827115559599635, + "grad_norm": 0.6640282659720391, + "learning_rate": 9.317168373501426e-06, + "loss": 0.2098, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12521491944789886, + "step": 2740, + "valid_targets_mean": 3868.2, + "valid_targets_min": 717 + }, + { + "epoch": 4.991810737033667, + "grad_norm": 0.639234313775148, + "learning_rate": 9.240629436447752e-06, + "loss": 0.2084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10244851559400558, + "step": 2745, + "valid_targets_mean": 3245.0, + "valid_targets_min": 1846 + }, + { + "epoch": 5.0, + "grad_norm": 0.8400368899966859, + "learning_rate": 9.164311615001202e-06, + "loss": 0.1959, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21654963493347168, + "step": 2750, + "valid_targets_mean": 4341.1, + "valid_targets_min": 2991 + }, + { + "epoch": 5.0090991810737036, + "grad_norm": 0.6599564352988212, + "learning_rate": 9.08821647756778e-06, + "loss": 0.1955, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09215378761291504, + "step": 2755, + "valid_targets_mean": 3409.8, + "valid_targets_min": 1066 + }, + { + "epoch": 5.018198362147407, + "grad_norm": 0.6391140878318657, + "learning_rate": 9.012345587977129e-06, + "loss": 0.1922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10837959498167038, + "step": 2760, + "valid_targets_mean": 3909.9, + "valid_targets_min": 1191 + }, + { + "epoch": 5.02729754322111, + "grad_norm": 0.6596315232469888, + "learning_rate": 8.936700505450356e-06, + "loss": 0.1846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0778820738196373, + "step": 2765, + "valid_targets_mean": 3222.8, + "valid_targets_min": 651 + }, + { + "epoch": 5.036396724294813, + "grad_norm": 0.6659317308109189, + "learning_rate": 8.861282784568045e-06, + "loss": 0.1956, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09588353335857391, + "step": 2770, + "valid_targets_mean": 3816.9, + "valid_targets_min": 1487 + }, + { + "epoch": 5.045495905368517, + "grad_norm": 0.6408446103459217, + "learning_rate": 8.786093975238226e-06, + "loss": 0.1828, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10558507591485977, + "step": 2775, + "valid_targets_mean": 3739.4, + "valid_targets_min": 905 + }, + { + "epoch": 5.05459508644222, + "grad_norm": 0.6889473769582733, + "learning_rate": 8.711135622664622e-06, + "loss": 0.2025, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09545275568962097, + "step": 2780, + "valid_targets_mean": 3136.0, + "valid_targets_min": 774 + }, + { + "epoch": 5.063694267515924, + "grad_norm": 0.6209629716703886, + "learning_rate": 8.636409267314806e-06, + "loss": 0.1969, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11099449545145035, + "step": 2785, + "valid_targets_mean": 4257.4, + "valid_targets_min": 2079 + }, + { + "epoch": 5.072793448589627, + "grad_norm": 0.7331847549756079, + "learning_rate": 8.561916444888618e-06, + "loss": 0.1865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07423612475395203, + "step": 2790, + "valid_targets_mean": 2260.2, + "valid_targets_min": 532 + }, + { + "epoch": 5.08189262966333, + "grad_norm": 0.6779516599757347, + "learning_rate": 8.487658686286533e-06, + "loss": 0.194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08769124746322632, + "step": 2795, + "valid_targets_mean": 3655.4, + "valid_targets_min": 1305 + }, + { + "epoch": 5.090991810737034, + "grad_norm": 0.5999220678378101, + "learning_rate": 8.413637517578246e-06, + "loss": 0.1857, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07237271219491959, + "step": 2800, + "valid_targets_mean": 2676.8, + "valid_targets_min": 906 + }, + { + "epoch": 5.100090991810737, + "grad_norm": 0.644834454156687, + "learning_rate": 8.339854459971313e-06, + "loss": 0.1982, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11657403409481049, + "step": 2805, + "valid_targets_mean": 4839.6, + "valid_targets_min": 2415 + }, + { + "epoch": 5.10919017288444, + "grad_norm": 0.7045688864470244, + "learning_rate": 8.266311029779843e-06, + "loss": 0.1906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1073337197303772, + "step": 2810, + "valid_targets_mean": 3931.5, + "valid_targets_min": 1637 + }, + { + "epoch": 5.1182893539581436, + "grad_norm": 0.565876863581839, + "learning_rate": 8.193008738393409e-06, + "loss": 0.1926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0898812934756279, + "step": 2815, + "valid_targets_mean": 4452.4, + "valid_targets_min": 1995 + }, + { + "epoch": 5.127388535031847, + "grad_norm": 0.6506292732182613, + "learning_rate": 8.119949092245893e-06, + "loss": 0.1915, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11412489414215088, + "step": 2820, + "valid_targets_mean": 4151.5, + "valid_targets_min": 1833 + }, + { + "epoch": 5.136487716105551, + "grad_norm": 0.5027182726275019, + "learning_rate": 8.047133592784626e-06, + "loss": 0.1777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.052972182631492615, + "step": 2825, + "valid_targets_mean": 2347.1, + "valid_targets_min": 1377 + }, + { + "epoch": 5.145586897179254, + "grad_norm": 0.6458372413032774, + "learning_rate": 7.974563736439454e-06, + "loss": 0.1962, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10185573995113373, + "step": 2830, + "valid_targets_mean": 3600.2, + "valid_targets_min": 1398 + }, + { + "epoch": 5.154686078252957, + "grad_norm": 0.6506764230442754, + "learning_rate": 7.902241014592042e-06, + "loss": 0.195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10488687455654144, + "step": 2835, + "valid_targets_mean": 4610.5, + "valid_targets_min": 1416 + }, + { + "epoch": 5.16378525932666, + "grad_norm": 0.6436541968180498, + "learning_rate": 7.830166913545181e-06, + "loss": 0.1921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09069611132144928, + "step": 2840, + "valid_targets_mean": 3433.5, + "valid_targets_min": 505 + }, + { + "epoch": 5.172884440400364, + "grad_norm": 0.4626395887872623, + "learning_rate": 7.758342914492257e-06, + "loss": 0.1904, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06200661510229111, + "step": 2845, + "valid_targets_mean": 3226.2, + "valid_targets_min": 2125 + }, + { + "epoch": 5.1819836214740675, + "grad_norm": 0.7215464420250767, + "learning_rate": 7.686770493486835e-06, + "loss": 0.1977, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1292155385017395, + "step": 2850, + "valid_targets_mean": 4119.8, + "valid_targets_min": 3043 + }, + { + "epoch": 5.191082802547771, + "grad_norm": 0.7567338525506948, + "learning_rate": 7.615451121412285e-06, + "loss": 0.2044, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09649307280778885, + "step": 2855, + "valid_targets_mean": 2608.2, + "valid_targets_min": 1653 + }, + { + "epoch": 5.200181983621474, + "grad_norm": 0.5615329874332277, + "learning_rate": 7.5443862639516e-06, + "loss": 0.1778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08881063759326935, + "step": 2860, + "valid_targets_mean": 4857.6, + "valid_targets_min": 1486 + }, + { + "epoch": 5.209281164695177, + "grad_norm": 0.6474968897106395, + "learning_rate": 7.4735773815572044e-06, + "loss": 0.1996, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08092313259840012, + "step": 2865, + "valid_targets_mean": 3331.4, + "valid_targets_min": 1751 + }, + { + "epoch": 5.218380345768881, + "grad_norm": 0.7512014770821924, + "learning_rate": 7.403025929421026e-06, + "loss": 0.1935, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10584090650081635, + "step": 2870, + "valid_targets_mean": 3191.8, + "valid_targets_min": 418 + }, + { + "epoch": 5.227479526842584, + "grad_norm": 0.6171676317926769, + "learning_rate": 7.332733357444524e-06, + "loss": 0.1915, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.101641945540905, + "step": 2875, + "valid_targets_mean": 4167.4, + "valid_targets_min": 1878 + }, + { + "epoch": 5.236578707916287, + "grad_norm": 0.6090694716523611, + "learning_rate": 7.262701110208936e-06, + "loss": 0.1837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1275119036436081, + "step": 2880, + "valid_targets_mean": 4991.8, + "valid_targets_min": 1623 + }, + { + "epoch": 5.245677888989991, + "grad_norm": 0.6487776113260872, + "learning_rate": 7.192930626945556e-06, + "loss": 0.1875, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11216533184051514, + "step": 2885, + "valid_targets_mean": 4438.9, + "valid_targets_min": 287 + }, + { + "epoch": 5.254777070063694, + "grad_norm": 0.6935423881379369, + "learning_rate": 7.123423341506168e-06, + "loss": 0.1843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13863793015480042, + "step": 2890, + "valid_targets_mean": 4484.0, + "valid_targets_min": 1794 + }, + { + "epoch": 5.263876251137398, + "grad_norm": 0.7180859077507289, + "learning_rate": 7.054180682333602e-06, + "loss": 0.2006, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11712032556533813, + "step": 2895, + "valid_targets_mean": 3767.9, + "valid_targets_min": 826 + }, + { + "epoch": 5.272975432211101, + "grad_norm": 0.8564178897094312, + "learning_rate": 6.985204072432348e-06, + "loss": 0.1836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10308965295553207, + "step": 2900, + "valid_targets_mean": 3870.2, + "valid_targets_min": 315 + }, + { + "epoch": 5.282074613284804, + "grad_norm": 0.6990237305747672, + "learning_rate": 6.916494929339315e-06, + "loss": 0.1856, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09179669618606567, + "step": 2905, + "valid_targets_mean": 3426.9, + "valid_targets_min": 2150 + }, + { + "epoch": 5.2911737943585075, + "grad_norm": 0.6611162751423953, + "learning_rate": 6.848054665094714e-06, + "loss": 0.1992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10533514618873596, + "step": 2910, + "valid_targets_mean": 4009.6, + "valid_targets_min": 2796 + }, + { + "epoch": 5.300272975432211, + "grad_norm": 0.632945921815168, + "learning_rate": 6.779884686213043e-06, + "loss": 0.1842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08963876217603683, + "step": 2915, + "valid_targets_mean": 3893.6, + "valid_targets_min": 2795 + }, + { + "epoch": 5.309372156505915, + "grad_norm": 0.658709053791018, + "learning_rate": 6.71198639365415e-06, + "loss": 0.1913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09715458005666733, + "step": 2920, + "valid_targets_mean": 4235.1, + "valid_targets_min": 2097 + }, + { + "epoch": 5.318471337579618, + "grad_norm": 0.6250400516126823, + "learning_rate": 6.644361182794494e-06, + "loss": 0.2024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0602688193321228, + "step": 2925, + "valid_targets_mean": 2938.6, + "valid_targets_min": 307 + }, + { + "epoch": 5.327570518653321, + "grad_norm": 0.55876772270244, + "learning_rate": 6.577010443398388e-06, + "loss": 0.1906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11603139340877533, + "step": 2930, + "valid_targets_mean": 4585.6, + "valid_targets_min": 2899 + }, + { + "epoch": 5.336669699727024, + "grad_norm": 0.9315616265660619, + "learning_rate": 6.50993555958954e-06, + "loss": 0.2053, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12010517716407776, + "step": 2935, + "valid_targets_mean": 3430.1, + "valid_targets_min": 316 + }, + { + "epoch": 5.345768880800728, + "grad_norm": 0.7204613647012345, + "learning_rate": 6.4431379098225185e-06, + "loss": 0.1951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10857885330915451, + "step": 2940, + "valid_targets_mean": 2946.4, + "valid_targets_min": 1170 + }, + { + "epoch": 5.3548680618744315, + "grad_norm": 0.6296169963119035, + "learning_rate": 6.376618866854485e-06, + "loss": 0.1958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11538192629814148, + "step": 2945, + "valid_targets_mean": 4234.2, + "valid_targets_min": 2754 + }, + { + "epoch": 5.363967242948135, + "grad_norm": 0.6054462885658843, + "learning_rate": 6.310379797716946e-06, + "loss": 0.1772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09557513147592545, + "step": 2950, + "valid_targets_mean": 3620.1, + "valid_targets_min": 2284 + }, + { + "epoch": 5.373066424021838, + "grad_norm": 0.654527494183749, + "learning_rate": 6.24442206368766e-06, + "loss": 0.1811, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10764510929584503, + "step": 2955, + "valid_targets_mean": 4421.6, + "valid_targets_min": 2417 + }, + { + "epoch": 5.382165605095541, + "grad_norm": 0.5786089054473685, + "learning_rate": 6.178747020262708e-06, + "loss": 0.1905, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08632392436265945, + "step": 2960, + "valid_targets_mean": 4199.2, + "valid_targets_min": 2021 + }, + { + "epoch": 5.391264786169245, + "grad_norm": 0.6546097104084383, + "learning_rate": 6.1133560171285625e-06, + "loss": 0.1891, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1134294718503952, + "step": 2965, + "valid_targets_mean": 4297.5, + "valid_targets_min": 373 + }, + { + "epoch": 5.400363967242948, + "grad_norm": 0.6646922900616847, + "learning_rate": 6.04825039813443e-06, + "loss": 0.197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11780969798564911, + "step": 2970, + "valid_targets_mean": 3949.9, + "valid_targets_min": 2606 + }, + { + "epoch": 5.409463148316651, + "grad_norm": 0.7577811833468597, + "learning_rate": 5.983431501264545e-06, + "loss": 0.1966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11065106838941574, + "step": 2975, + "valid_targets_mean": 3088.9, + "valid_targets_min": 146 + }, + { + "epoch": 5.418562329390355, + "grad_norm": 0.7097525789137502, + "learning_rate": 5.918900658610765e-06, + "loss": 0.1897, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08051986992359161, + "step": 2980, + "valid_targets_mean": 2664.9, + "valid_targets_min": 1134 + }, + { + "epoch": 5.427661510464058, + "grad_norm": 0.6710138217823898, + "learning_rate": 5.8546591963451226e-06, + "loss": 0.1961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11588048934936523, + "step": 2985, + "valid_targets_mean": 4215.2, + "valid_targets_min": 570 + }, + { + "epoch": 5.436760691537762, + "grad_norm": 0.6354446709712156, + "learning_rate": 5.790708434692627e-06, + "loss": 0.2182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09757719933986664, + "step": 2990, + "valid_targets_mean": 4249.8, + "valid_targets_min": 562 + }, + { + "epoch": 5.445859872611465, + "grad_norm": 0.7042148180116533, + "learning_rate": 5.727049687904076e-06, + "loss": 0.1952, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12138056755065918, + "step": 2995, + "valid_targets_mean": 3693.6, + "valid_targets_min": 324 + }, + { + "epoch": 5.454959053685168, + "grad_norm": 0.7040724026167792, + "learning_rate": 5.66368426422909e-06, + "loss": 0.1847, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08605970442295074, + "step": 3000, + "valid_targets_mean": 3419.9, + "valid_targets_min": 1530 + }, + { + "epoch": 5.4640582347588715, + "grad_norm": 0.6446010445583191, + "learning_rate": 5.60061346588922e-06, + "loss": 0.1837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09227697551250458, + "step": 3005, + "valid_targets_mean": 3576.0, + "valid_targets_min": 963 + }, + { + "epoch": 5.473157415832575, + "grad_norm": 0.6732664608100785, + "learning_rate": 5.537838589051155e-06, + "loss": 0.1961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09739650785923004, + "step": 3010, + "valid_targets_mean": 3776.4, + "valid_targets_min": 1882 + }, + { + "epoch": 5.482256596906279, + "grad_norm": 0.7452964277656658, + "learning_rate": 5.475360923800141e-06, + "loss": 0.2027, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10715839266777039, + "step": 3015, + "valid_targets_mean": 3063.9, + "valid_targets_min": 1229 + }, + { + "epoch": 5.491355777979982, + "grad_norm": 0.6107630351697494, + "learning_rate": 5.413181754113392e-06, + "loss": 0.1934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0687195211648941, + "step": 3020, + "valid_targets_mean": 3125.1, + "valid_targets_min": 1139 + }, + { + "epoch": 5.500454959053685, + "grad_norm": 0.7590005159280938, + "learning_rate": 5.351302357833785e-06, + "loss": 0.2038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11881700158119202, + "step": 3025, + "valid_targets_mean": 3752.2, + "valid_targets_min": 1901 + }, + { + "epoch": 5.509554140127388, + "grad_norm": 0.6632847021155858, + "learning_rate": 5.289724006643529e-06, + "loss": 0.1845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08095594495534897, + "step": 3030, + "valid_targets_mean": 3555.4, + "valid_targets_min": 282 + }, + { + "epoch": 5.518653321201092, + "grad_norm": 0.7147244658894554, + "learning_rate": 5.2284479660380906e-06, + "loss": 0.1991, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1063327044248581, + "step": 3035, + "valid_targets_mean": 3433.1, + "valid_targets_min": 863 + }, + { + "epoch": 5.5277525022747955, + "grad_norm": 0.6674123109790042, + "learning_rate": 5.167475495300134e-06, + "loss": 0.1872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08822742849588394, + "step": 3040, + "valid_targets_mean": 3217.4, + "valid_targets_min": 184 + }, + { + "epoch": 5.536851683348498, + "grad_norm": 0.7927516257807617, + "learning_rate": 5.1068078474736695e-06, + "loss": 0.1934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08748479187488556, + "step": 3045, + "valid_targets_mean": 2495.0, + "valid_targets_min": 341 + }, + { + "epoch": 5.545950864422202, + "grad_norm": 0.6092514194762859, + "learning_rate": 5.046446269338314e-06, + "loss": 0.1881, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07830236107110977, + "step": 3050, + "valid_targets_mean": 3540.8, + "valid_targets_min": 1729 + }, + { + "epoch": 5.555050045495905, + "grad_norm": 0.6198263788192565, + "learning_rate": 4.986392001383633e-06, + "loss": 0.2015, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08909077942371368, + "step": 3055, + "valid_targets_mean": 3665.4, + "valid_targets_min": 2018 + }, + { + "epoch": 5.564149226569609, + "grad_norm": 0.6063356340051946, + "learning_rate": 4.926646277783675e-06, + "loss": 0.2002, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11191258579492569, + "step": 3060, + "valid_targets_mean": 4533.5, + "valid_targets_min": 2212 + }, + { + "epoch": 5.573248407643312, + "grad_norm": 0.6174222418189697, + "learning_rate": 4.867210326371596e-06, + "loss": 0.1987, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10383233428001404, + "step": 3065, + "valid_targets_mean": 3871.8, + "valid_targets_min": 934 + }, + { + "epoch": 5.582347588717015, + "grad_norm": 0.6672351852695292, + "learning_rate": 4.808085368614441e-06, + "loss": 0.1917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08448068797588348, + "step": 3070, + "valid_targets_mean": 3447.0, + "valid_targets_min": 2571 + }, + { + "epoch": 5.591446769790719, + "grad_norm": 0.6342439680320546, + "learning_rate": 4.74927261958801e-06, + "loss": 0.1853, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09464578330516815, + "step": 3075, + "valid_targets_mean": 4008.8, + "valid_targets_min": 2600 + }, + { + "epoch": 5.600545950864422, + "grad_norm": 0.5561239465486564, + "learning_rate": 4.690773287951942e-06, + "loss": 0.1875, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10652168095111847, + "step": 3080, + "valid_targets_mean": 6017.8, + "valid_targets_min": 617 + }, + { + "epoch": 5.609645131938126, + "grad_norm": 0.6579389422471904, + "learning_rate": 4.632588575924795e-06, + "loss": 0.1929, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07432182878255844, + "step": 3085, + "valid_targets_mean": 3037.5, + "valid_targets_min": 830 + }, + { + "epoch": 5.618744313011829, + "grad_norm": 0.6928933643318304, + "learning_rate": 4.574719679259425e-06, + "loss": 0.1948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0847567692399025, + "step": 3090, + "valid_targets_mean": 2883.1, + "valid_targets_min": 1811 + }, + { + "epoch": 5.627843494085532, + "grad_norm": 0.6665338076859805, + "learning_rate": 4.5171677872183506e-06, + "loss": 0.182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11493894457817078, + "step": 3095, + "valid_targets_mean": 4355.8, + "valid_targets_min": 2402 + }, + { + "epoch": 5.6369426751592355, + "grad_norm": 0.6558712450079212, + "learning_rate": 4.459934082549353e-06, + "loss": 0.1807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07048974186182022, + "step": 3100, + "valid_targets_mean": 2948.5, + "valid_targets_min": 303 + }, + { + "epoch": 5.646041856232939, + "grad_norm": 0.7512626734457848, + "learning_rate": 4.4030197414611344e-06, + "loss": 0.1861, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08836065977811813, + "step": 3105, + "valid_targets_mean": 2718.9, + "valid_targets_min": 328 + }, + { + "epoch": 5.655141037306643, + "grad_norm": 0.584447343787106, + "learning_rate": 4.346425933599165e-06, + "loss": 0.1932, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09327764809131622, + "step": 3110, + "valid_targets_mean": 4604.0, + "valid_targets_min": 2483 + }, + { + "epoch": 5.664240218380346, + "grad_norm": 0.7012629818494418, + "learning_rate": 4.2901538220216565e-06, + "loss": 0.1861, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1240246444940567, + "step": 3115, + "valid_targets_mean": 3864.8, + "valid_targets_min": 1696 + }, + { + "epoch": 5.673339399454049, + "grad_norm": 0.6818751320160401, + "learning_rate": 4.234204563175625e-06, + "loss": 0.1964, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11969830095767975, + "step": 3120, + "valid_targets_mean": 4422.2, + "valid_targets_min": 1399 + }, + { + "epoch": 5.682438580527752, + "grad_norm": 0.6243267919910356, + "learning_rate": 4.17857930687318e-06, + "loss": 0.1922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0908406525850296, + "step": 3125, + "valid_targets_mean": 3557.4, + "valid_targets_min": 1688 + }, + { + "epoch": 5.691537761601456, + "grad_norm": 0.6263500654494952, + "learning_rate": 4.123279196267815e-06, + "loss": 0.1961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10891071707010269, + "step": 3130, + "valid_targets_mean": 4622.1, + "valid_targets_min": 2885 + }, + { + "epoch": 5.7006369426751595, + "grad_norm": 0.5071756770136553, + "learning_rate": 4.068305367831002e-06, + "loss": 0.199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08039593696594238, + "step": 3135, + "valid_targets_mean": 6038.0, + "valid_targets_min": 2348 + }, + { + "epoch": 5.709736123748863, + "grad_norm": 0.4614868119067622, + "learning_rate": 4.013658951328769e-06, + "loss": 0.1804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0638853907585144, + "step": 3140, + "valid_targets_mean": 4969.6, + "valid_targets_min": 1838 + }, + { + "epoch": 5.718835304822566, + "grad_norm": 0.6146918657528648, + "learning_rate": 3.95934106979853e-06, + "loss": 0.1995, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08923767507076263, + "step": 3145, + "valid_targets_mean": 3871.9, + "valid_targets_min": 1397 + }, + { + "epoch": 5.727934485896269, + "grad_norm": 0.6962786938805647, + "learning_rate": 3.905352839525962e-06, + "loss": 0.1822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10900229960680008, + "step": 3150, + "valid_targets_mean": 3616.6, + "valid_targets_min": 2038 + }, + { + "epoch": 5.737033666969973, + "grad_norm": 0.8242667170719575, + "learning_rate": 3.851695370022093e-06, + "loss": 0.2048, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11732503771781921, + "step": 3155, + "valid_targets_mean": 2471.9, + "valid_targets_min": 981 + }, + { + "epoch": 5.746132848043676, + "grad_norm": 0.514853122363082, + "learning_rate": 3.7983697640005048e-06, + "loss": 0.1789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06776493042707443, + "step": 3160, + "valid_targets_mean": 5999.6, + "valid_targets_min": 1698 + }, + { + "epoch": 5.755232029117379, + "grad_norm": 0.6973901617447613, + "learning_rate": 3.7453771173546426e-06, + "loss": 0.1868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10990776866674423, + "step": 3165, + "valid_targets_mean": 3826.5, + "valid_targets_min": 1536 + }, + { + "epoch": 5.764331210191083, + "grad_norm": 0.7899673614884827, + "learning_rate": 3.6927185191353188e-06, + "loss": 0.1889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10077675431966782, + "step": 3170, + "valid_targets_mean": 2454.1, + "valid_targets_min": 296 + }, + { + "epoch": 5.773430391264786, + "grad_norm": 0.7435936878316338, + "learning_rate": 3.640395051528316e-06, + "loss": 0.1992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10760288685560226, + "step": 3175, + "valid_targets_mean": 3099.8, + "valid_targets_min": 479 + }, + { + "epoch": 5.78252957233849, + "grad_norm": 0.5855128837104117, + "learning_rate": 3.5884077898321713e-06, + "loss": 0.1808, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08257067948579788, + "step": 3180, + "valid_targets_mean": 4963.6, + "valid_targets_min": 1282 + }, + { + "epoch": 5.791628753412192, + "grad_norm": 0.73223681822564, + "learning_rate": 3.536757802436039e-06, + "loss": 0.1711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10566624999046326, + "step": 3185, + "valid_targets_mean": 4045.1, + "valid_targets_min": 2082 + }, + { + "epoch": 5.800727934485896, + "grad_norm": 0.8342270549063188, + "learning_rate": 3.4854461507977776e-06, + "loss": 0.1922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1105557456612587, + "step": 3190, + "valid_targets_mean": 3795.4, + "valid_targets_min": 1844 + }, + { + "epoch": 5.8098271155595995, + "grad_norm": 0.7488071230149675, + "learning_rate": 3.4344738894220964e-06, + "loss": 0.1927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09993422031402588, + "step": 3195, + "valid_targets_mean": 2751.0, + "valid_targets_min": 304 + }, + { + "epoch": 5.818926296633303, + "grad_norm": 0.6527632692808096, + "learning_rate": 3.383842065838907e-06, + "loss": 0.1952, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08624714612960815, + "step": 3200, + "valid_targets_mean": 4010.1, + "valid_targets_min": 656 + }, + { + "epoch": 5.828025477707007, + "grad_norm": 0.7057227913605232, + "learning_rate": 3.3335517205818e-06, + "loss": 0.1918, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1098092719912529, + "step": 3205, + "valid_targets_mean": 3173.8, + "valid_targets_min": 306 + }, + { + "epoch": 5.837124658780709, + "grad_norm": 0.7498429878517248, + "learning_rate": 3.2836038871666444e-06, + "loss": 0.1941, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09733176231384277, + "step": 3210, + "valid_targets_mean": 2920.4, + "valid_targets_min": 323 + }, + { + "epoch": 5.846223839854413, + "grad_norm": 0.7678861288920167, + "learning_rate": 3.2339995920703517e-06, + "loss": 0.1943, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09264662861824036, + "step": 3215, + "valid_targets_mean": 2972.4, + "valid_targets_min": 432 + }, + { + "epoch": 5.855323020928116, + "grad_norm": 0.7126042337111156, + "learning_rate": 3.184739854709784e-06, + "loss": 0.1908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09754851460456848, + "step": 3220, + "valid_targets_mean": 2903.9, + "valid_targets_min": 378 + }, + { + "epoch": 5.86442220200182, + "grad_norm": 0.5870517895385682, + "learning_rate": 3.1358256874208214e-06, + "loss": 0.1944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07877151668071747, + "step": 3225, + "valid_targets_mean": 4363.0, + "valid_targets_min": 2095 + }, + { + "epoch": 5.8735213830755235, + "grad_norm": 0.41510201360538945, + "learning_rate": 3.0872580954375177e-06, + "loss": 0.177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06783401966094971, + "step": 3230, + "valid_targets_mean": 6266.4, + "valid_targets_min": 220 + }, + { + "epoch": 5.882620564149226, + "grad_norm": 0.81215723384434, + "learning_rate": 3.039038076871481e-06, + "loss": 0.2001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10604351758956909, + "step": 3235, + "valid_targets_mean": 2252.6, + "valid_targets_min": 330 + }, + { + "epoch": 5.89171974522293, + "grad_norm": 0.6607354187702422, + "learning_rate": 2.9911666226913374e-06, + "loss": 0.184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10948610305786133, + "step": 3240, + "valid_targets_mean": 3968.1, + "valid_targets_min": 2753 + }, + { + "epoch": 5.900818926296633, + "grad_norm": 0.7603934954380941, + "learning_rate": 2.9436447167023674e-06, + "loss": 0.1972, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0869530588388443, + "step": 3245, + "valid_targets_mean": 2893.4, + "valid_targets_min": 1586 + }, + { + "epoch": 5.909918107370337, + "grad_norm": 0.688654887024806, + "learning_rate": 2.896473335526313e-06, + "loss": 0.1979, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10053776204586029, + "step": 3250, + "valid_targets_mean": 3450.5, + "valid_targets_min": 1140 + }, + { + "epoch": 5.91901728844404, + "grad_norm": 0.5557339400861658, + "learning_rate": 2.849653448581271e-06, + "loss": 0.2019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09264858812093735, + "step": 3255, + "valid_targets_mean": 5173.5, + "valid_targets_min": 3253 + }, + { + "epoch": 5.928116469517743, + "grad_norm": 0.7416951109335438, + "learning_rate": 2.8031860180617898e-06, + "loss": 0.1945, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08064906299114227, + "step": 3260, + "valid_targets_mean": 2143.0, + "valid_targets_min": 321 + }, + { + "epoch": 5.937215650591447, + "grad_norm": 0.6806462407891796, + "learning_rate": 2.757071998919094e-06, + "loss": 0.2018, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08997666835784912, + "step": 3265, + "valid_targets_mean": 3098.5, + "valid_targets_min": 1426 + }, + { + "epoch": 5.94631483166515, + "grad_norm": 0.9457363421014185, + "learning_rate": 2.7113123388414674e-06, + "loss": 0.1798, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10374726355075836, + "step": 3270, + "valid_targets_mean": 3998.0, + "valid_targets_min": 1284 + }, + { + "epoch": 5.955414012738854, + "grad_norm": 0.6333595915046111, + "learning_rate": 2.665907978234754e-06, + "loss": 0.1958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0839000940322876, + "step": 3275, + "valid_targets_mean": 3718.5, + "valid_targets_min": 1549 + }, + { + "epoch": 5.964513193812557, + "grad_norm": 0.6317391914040702, + "learning_rate": 2.6208598502030546e-06, + "loss": 0.2124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07819197326898575, + "step": 3280, + "valid_targets_mean": 3617.8, + "valid_targets_min": 324 + }, + { + "epoch": 5.97361237488626, + "grad_norm": 0.7013649236653172, + "learning_rate": 2.5761688805295305e-06, + "loss": 0.1758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08413440734148026, + "step": 3285, + "valid_targets_mean": 4389.9, + "valid_targets_min": 1822 + }, + { + "epoch": 5.9827115559599635, + "grad_norm": 0.6373036325583057, + "learning_rate": 2.531835987657407e-06, + "loss": 0.1948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08172917366027832, + "step": 3290, + "valid_targets_mean": 3622.1, + "valid_targets_min": 1142 + }, + { + "epoch": 5.991810737033667, + "grad_norm": 0.6456213843152628, + "learning_rate": 2.487862082671064e-06, + "loss": 0.1954, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10779787600040436, + "step": 3295, + "valid_targets_mean": 4613.9, + "valid_targets_min": 2582 + }, + { + "epoch": 6.0, + "grad_norm": 0.9311049045037445, + "learning_rate": 2.4442480692773398e-06, + "loss": 0.2004, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20936749875545502, + "step": 3300, + "valid_targets_mean": 4251.6, + "valid_targets_min": 1194 + }, + { + "epoch": 6.0090991810737036, + "grad_norm": 0.6630710786735985, + "learning_rate": 2.400994843786939e-06, + "loss": 0.1874, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10251843929290771, + "step": 3305, + "valid_targets_mean": 3764.8, + "valid_targets_min": 2661 + }, + { + "epoch": 6.018198362147407, + "grad_norm": 0.7119618968014333, + "learning_rate": 2.3581032950960215e-06, + "loss": 0.1806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10553539544343948, + "step": 3310, + "valid_targets_mean": 3514.8, + "valid_targets_min": 284 + }, + { + "epoch": 6.02729754322111, + "grad_norm": 0.6316945505068378, + "learning_rate": 2.3155743046679468e-06, + "loss": 0.1795, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07232202589511871, + "step": 3315, + "valid_targets_mean": 2813.9, + "valid_targets_min": 450 + }, + { + "epoch": 6.036396724294813, + "grad_norm": 0.7284470744983149, + "learning_rate": 2.273408746515133e-06, + "loss": 0.2045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10341562330722809, + "step": 3320, + "valid_targets_mean": 3263.8, + "valid_targets_min": 1226 + }, + { + "epoch": 6.045495905368517, + "grad_norm": 0.6753238887882468, + "learning_rate": 2.2316074871811157e-06, + "loss": 0.1911, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11383339017629623, + "step": 3325, + "valid_targets_mean": 4718.8, + "valid_targets_min": 1925 + }, + { + "epoch": 6.05459508644222, + "grad_norm": 0.79171407891943, + "learning_rate": 2.190171385722726e-06, + "loss": 0.1837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.120953768491745, + "step": 3330, + "valid_targets_mean": 3292.0, + "valid_targets_min": 1226 + }, + { + "epoch": 6.063694267515924, + "grad_norm": 0.7571964417090677, + "learning_rate": 2.1491012936924548e-06, + "loss": 0.1814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10194280743598938, + "step": 3335, + "valid_targets_mean": 3252.8, + "valid_targets_min": 1129 + }, + { + "epoch": 6.072793448589627, + "grad_norm": 0.6462489524858733, + "learning_rate": 2.108398055120926e-06, + "loss": 0.1885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09468749165534973, + "step": 3340, + "valid_targets_mean": 4044.8, + "valid_targets_min": 2258 + }, + { + "epoch": 6.08189262966333, + "grad_norm": 0.6329963807730029, + "learning_rate": 2.068062506499584e-06, + "loss": 0.1938, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09946207702159882, + "step": 3345, + "valid_targets_mean": 4238.4, + "valid_targets_min": 3247 + }, + { + "epoch": 6.090991810737034, + "grad_norm": 0.6534737467236128, + "learning_rate": 2.0280954767634674e-06, + "loss": 0.1814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1006544977426529, + "step": 3350, + "valid_targets_mean": 4195.1, + "valid_targets_min": 3324 + }, + { + "epoch": 6.100090991810737, + "grad_norm": 0.6084808914799551, + "learning_rate": 1.988497787274195e-06, + "loss": 0.1726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08862239122390747, + "step": 3355, + "valid_targets_mean": 4239.9, + "valid_targets_min": 1722 + }, + { + "epoch": 6.10919017288444, + "grad_norm": 0.589702916474223, + "learning_rate": 1.9492702518030905e-06, + "loss": 0.1739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07204552739858627, + "step": 3360, + "valid_targets_mean": 4596.2, + "valid_targets_min": 370 + }, + { + "epoch": 6.1182893539581436, + "grad_norm": 0.6606144333508663, + "learning_rate": 1.910413676514438e-06, + "loss": 0.1872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07720930874347687, + "step": 3365, + "valid_targets_mean": 2887.5, + "valid_targets_min": 1416 + }, + { + "epoch": 6.127388535031847, + "grad_norm": 0.7498116580413373, + "learning_rate": 1.8719288599489304e-06, + "loss": 0.1849, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10034845769405365, + "step": 3370, + "valid_targets_mean": 2805.8, + "valid_targets_min": 366 + }, + { + "epoch": 6.136487716105551, + "grad_norm": 0.7087123237706273, + "learning_rate": 1.833816593007256e-06, + "loss": 0.1876, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06755095720291138, + "step": 3375, + "valid_targets_mean": 2808.6, + "valid_targets_min": 241 + }, + { + "epoch": 6.145586897179254, + "grad_norm": 0.7335482831720123, + "learning_rate": 1.796077658933848e-06, + "loss": 0.1799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08181584626436234, + "step": 3380, + "valid_targets_mean": 2851.8, + "valid_targets_min": 388 + }, + { + "epoch": 6.154686078252957, + "grad_norm": 0.6343581720941749, + "learning_rate": 1.7587128333007709e-06, + "loss": 0.1756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06355266273021698, + "step": 3385, + "valid_targets_mean": 2750.6, + "valid_targets_min": 466 + }, + { + "epoch": 6.16378525932666, + "grad_norm": 0.7485197111003649, + "learning_rate": 1.7217228839918098e-06, + "loss": 0.1882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12135298550128937, + "step": 3390, + "valid_targets_mean": 3754.4, + "valid_targets_min": 1390 + }, + { + "epoch": 6.172884440400364, + "grad_norm": 0.6326523371386163, + "learning_rate": 1.6851085711866598e-06, + "loss": 0.1896, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0887572169303894, + "step": 3395, + "valid_targets_mean": 3659.1, + "valid_targets_min": 439 + }, + { + "epoch": 6.1819836214740675, + "grad_norm": 0.6855263364388163, + "learning_rate": 1.648870647345322e-06, + "loss": 0.179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09419068694114685, + "step": 3400, + "valid_targets_mean": 3603.6, + "valid_targets_min": 1045 + }, + { + "epoch": 6.191082802547771, + "grad_norm": 0.7052701277624316, + "learning_rate": 1.6130098571926468e-06, + "loss": 0.1759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08835876733064651, + "step": 3405, + "valid_targets_mean": 3416.0, + "valid_targets_min": 1820 + }, + { + "epoch": 6.200181983621474, + "grad_norm": 0.805871081345842, + "learning_rate": 1.577526937703e-06, + "loss": 0.1728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08823437243700027, + "step": 3410, + "valid_targets_mean": 3513.1, + "valid_targets_min": 850 + }, + { + "epoch": 6.209281164695177, + "grad_norm": 0.6952014427915162, + "learning_rate": 1.5424226180851443e-06, + "loss": 0.1849, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09718883037567139, + "step": 3415, + "valid_targets_mean": 2961.5, + "valid_targets_min": 1589 + }, + { + "epoch": 6.218380345768881, + "grad_norm": 0.7522478675724774, + "learning_rate": 1.5076976197672432e-06, + "loss": 0.1857, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09957942366600037, + "step": 3420, + "valid_targets_mean": 2994.0, + "valid_targets_min": 261 + }, + { + "epoch": 6.227479526842584, + "grad_norm": 0.5843906150152982, + "learning_rate": 1.473352656382039e-06, + "loss": 0.1796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08296208083629608, + "step": 3425, + "valid_targets_mean": 4887.5, + "valid_targets_min": 1391 + }, + { + "epoch": 6.236578707916287, + "grad_norm": 0.5837216159683262, + "learning_rate": 1.439388433752178e-06, + "loss": 0.1831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07392477989196777, + "step": 3430, + "valid_targets_mean": 3705.4, + "valid_targets_min": 1438 + }, + { + "epoch": 6.245677888989991, + "grad_norm": 0.832232290805666, + "learning_rate": 1.4058056498757112e-06, + "loss": 0.1891, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08831807225942612, + "step": 3435, + "valid_targets_mean": 2949.4, + "valid_targets_min": 2220 + }, + { + "epoch": 6.254777070063694, + "grad_norm": 0.6668489366701206, + "learning_rate": 1.372604994911757e-06, + "loss": 0.1905, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.098930224776268, + "step": 3440, + "valid_targets_mean": 3662.5, + "valid_targets_min": 1352 + }, + { + "epoch": 6.263876251137398, + "grad_norm": 0.7886986156617598, + "learning_rate": 1.3397871511662986e-06, + "loss": 0.1815, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07638974487781525, + "step": 3445, + "valid_targets_mean": 2858.8, + "valid_targets_min": 300 + }, + { + "epoch": 6.272975432211101, + "grad_norm": 0.6775710838871868, + "learning_rate": 1.307352793078187e-06, + "loss": 0.1896, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06637515127658844, + "step": 3450, + "valid_targets_mean": 2564.6, + "valid_targets_min": 340 + }, + { + "epoch": 6.282074613284804, + "grad_norm": 0.674208803714708, + "learning_rate": 1.275302587205256e-06, + "loss": 0.1945, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09290862083435059, + "step": 3455, + "valid_targets_mean": 3960.8, + "valid_targets_min": 1941 + }, + { + "epoch": 6.2911737943585075, + "grad_norm": 0.5635004473154176, + "learning_rate": 1.2436371922106404e-06, + "loss": 0.1843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08775106072425842, + "step": 3460, + "valid_targets_mean": 5250.9, + "valid_targets_min": 1717 + }, + { + "epoch": 6.300272975432211, + "grad_norm": 0.7520104883929547, + "learning_rate": 1.2123572588492306e-06, + "loss": 0.1759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10040166974067688, + "step": 3465, + "valid_targets_mean": 2924.0, + "valid_targets_min": 152 + }, + { + "epoch": 6.309372156505915, + "grad_norm": 0.5052669919871685, + "learning_rate": 1.1814634299543103e-06, + "loss": 0.1835, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05836692452430725, + "step": 3470, + "valid_targets_mean": 3446.1, + "valid_targets_min": 274 + }, + { + "epoch": 6.318471337579618, + "grad_norm": 0.6764453024690077, + "learning_rate": 1.1509563404243274e-06, + "loss": 0.1871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0781787857413292, + "step": 3475, + "valid_targets_mean": 2825.0, + "valid_targets_min": 1768 + }, + { + "epoch": 6.327570518653321, + "grad_norm": 0.6467509750256821, + "learning_rate": 1.1208366172098684e-06, + "loss": 0.1752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08091972023248672, + "step": 3480, + "valid_targets_mean": 4351.2, + "valid_targets_min": 2307 + }, + { + "epoch": 6.336669699727024, + "grad_norm": 0.6973506532346149, + "learning_rate": 1.0911048793007484e-06, + "loss": 0.1813, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10063889622688293, + "step": 3485, + "valid_targets_mean": 3283.0, + "valid_targets_min": 1507 + }, + { + "epoch": 6.345768880800728, + "grad_norm": 0.7884690054718912, + "learning_rate": 1.0617617377133205e-06, + "loss": 0.1868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09718523919582367, + "step": 3490, + "valid_targets_mean": 2890.5, + "valid_targets_min": 1782 + }, + { + "epoch": 6.3548680618744315, + "grad_norm": 0.6032301705318348, + "learning_rate": 1.0328077954778904e-06, + "loss": 0.182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10235874354839325, + "step": 3495, + "valid_targets_mean": 4765.8, + "valid_targets_min": 274 + }, + { + "epoch": 6.363967242948135, + "grad_norm": 0.6690252665633033, + "learning_rate": 1.004243647626344e-06, + "loss": 0.1784, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09461713582277298, + "step": 3500, + "valid_targets_mean": 3810.2, + "valid_targets_min": 2408 + }, + { + "epoch": 6.373066424021838, + "grad_norm": 0.6384555467290254, + "learning_rate": 9.760698811799064e-07, + "loss": 0.1872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08460795879364014, + "step": 3505, + "valid_targets_mean": 3914.2, + "valid_targets_min": 424 + }, + { + "epoch": 6.382165605095541, + "grad_norm": 0.6491331565328212, + "learning_rate": 9.482870751370755e-07, + "loss": 0.1805, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09265074133872986, + "step": 3510, + "valid_targets_mean": 3294.6, + "valid_targets_min": 2030 + }, + { + "epoch": 6.391264786169245, + "grad_norm": 0.6514066852280065, + "learning_rate": 9.208958004617475e-07, + "loss": 0.1942, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08725515007972717, + "step": 3515, + "valid_targets_mean": 3477.8, + "valid_targets_min": 2131 + }, + { + "epoch": 6.400363967242948, + "grad_norm": 0.5939503819831969, + "learning_rate": 8.938966200714482e-07, + "loss": 0.1768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0967637449502945, + "step": 3520, + "valid_targets_mean": 6093.5, + "valid_targets_min": 3168 + }, + { + "epoch": 6.409463148316651, + "grad_norm": 0.6196766279683386, + "learning_rate": 8.672900888257918e-07, + "loss": 0.1808, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10511632263660431, + "step": 3525, + "valid_targets_mean": 5057.6, + "valid_targets_min": 2994 + }, + { + "epoch": 6.418562329390355, + "grad_norm": 0.4824895138585985, + "learning_rate": 8.410767535150599e-07, + "loss": 0.1723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05214907228946686, + "step": 3530, + "valid_targets_mean": 2645.6, + "valid_targets_min": 1427 + }, + { + "epoch": 6.427661510464058, + "grad_norm": 0.6827325735386334, + "learning_rate": 8.152571528489828e-07, + "loss": 0.1863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09623458236455917, + "step": 3535, + "valid_targets_mean": 3689.5, + "valid_targets_min": 169 + }, + { + "epoch": 6.436760691537762, + "grad_norm": 0.663892399984698, + "learning_rate": 7.898318174456498e-07, + "loss": 0.1864, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0786326676607132, + "step": 3540, + "valid_targets_mean": 3200.2, + "valid_targets_min": 191 + }, + { + "epoch": 6.445859872611465, + "grad_norm": 0.6940887008237617, + "learning_rate": 7.64801269820612e-07, + "loss": 0.1898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0850784108042717, + "step": 3545, + "valid_targets_mean": 3372.0, + "valid_targets_min": 340 + }, + { + "epoch": 6.454959053685168, + "grad_norm": 0.6899041090836107, + "learning_rate": 7.401660243761543e-07, + "loss": 0.1771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08928936719894409, + "step": 3550, + "valid_targets_mean": 3396.8, + "valid_targets_min": 384 + }, + { + "epoch": 6.4640582347588715, + "grad_norm": 0.6908252432631689, + "learning_rate": 7.159265873907006e-07, + "loss": 0.193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11242173612117767, + "step": 3555, + "valid_targets_mean": 4323.0, + "valid_targets_min": 3470 + }, + { + "epoch": 6.473157415832575, + "grad_norm": 0.8113352916097301, + "learning_rate": 6.920834570084389e-07, + "loss": 0.1904, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12070198357105255, + "step": 3560, + "valid_targets_mean": 3102.1, + "valid_targets_min": 433 + }, + { + "epoch": 6.482256596906279, + "grad_norm": 0.7063405925347518, + "learning_rate": 6.686371232290567e-07, + "loss": 0.1982, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0846632719039917, + "step": 3565, + "valid_targets_mean": 3217.4, + "valid_targets_min": 1225 + }, + { + "epoch": 6.491355777979982, + "grad_norm": 0.6875590318727268, + "learning_rate": 6.455880678976845e-07, + "loss": 0.1829, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07678349316120148, + "step": 3570, + "valid_targets_mean": 2996.9, + "valid_targets_min": 293 + }, + { + "epoch": 6.500454959053685, + "grad_norm": 0.6775291716880106, + "learning_rate": 6.229367646949924e-07, + "loss": 0.1711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1015455350279808, + "step": 3575, + "valid_targets_mean": 3940.4, + "valid_targets_min": 1506 + }, + { + "epoch": 6.509554140127388, + "grad_norm": 0.8193290349914585, + "learning_rate": 6.006836791274606e-07, + "loss": 0.1795, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11495477706193924, + "step": 3580, + "valid_targets_mean": 3150.9, + "valid_targets_min": 321 + }, + { + "epoch": 6.518653321201092, + "grad_norm": 0.5890978652371885, + "learning_rate": 5.788292685177954e-07, + "loss": 0.1904, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10000624507665634, + "step": 3585, + "valid_targets_mean": 4716.1, + "valid_targets_min": 2782 + }, + { + "epoch": 6.5277525022747955, + "grad_norm": 0.6121071180946909, + "learning_rate": 5.573739819955459e-07, + "loss": 0.1727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07249385118484497, + "step": 3590, + "valid_targets_mean": 3096.5, + "valid_targets_min": 458 + }, + { + "epoch": 6.536851683348498, + "grad_norm": 0.6411275060644541, + "learning_rate": 5.363182604878803e-07, + "loss": 0.1855, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0972464308142662, + "step": 3595, + "valid_targets_mean": 4242.2, + "valid_targets_min": 3201 + }, + { + "epoch": 6.545950864422202, + "grad_norm": 0.592828910076141, + "learning_rate": 5.156625367104973e-07, + "loss": 0.1914, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07627043128013611, + "step": 3600, + "valid_targets_mean": 3982.1, + "valid_targets_min": 308 + }, + { + "epoch": 6.555050045495905, + "grad_norm": 0.7052782207800322, + "learning_rate": 4.954072351587646e-07, + "loss": 0.1788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0874359980225563, + "step": 3605, + "valid_targets_mean": 3140.6, + "valid_targets_min": 1511 + }, + { + "epoch": 6.564149226569609, + "grad_norm": 0.6894516190804516, + "learning_rate": 4.75552772098975e-07, + "loss": 0.1937, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09235049039125443, + "step": 3610, + "valid_targets_mean": 3940.2, + "valid_targets_min": 311 + }, + { + "epoch": 6.573248407643312, + "grad_norm": 0.6215785808807689, + "learning_rate": 4.560995555597969e-07, + "loss": 0.1814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0887666791677475, + "step": 3615, + "valid_targets_mean": 3609.2, + "valid_targets_min": 1385 + }, + { + "epoch": 6.582347588717015, + "grad_norm": 0.6192208581963837, + "learning_rate": 4.3704798532388624e-07, + "loss": 0.1846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10467948019504547, + "step": 3620, + "valid_targets_mean": 4307.4, + "valid_targets_min": 2053 + }, + { + "epoch": 6.591446769790719, + "grad_norm": 0.6338893728376359, + "learning_rate": 4.1839845291968607e-07, + "loss": 0.1878, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10062219947576523, + "step": 3625, + "valid_targets_mean": 4577.0, + "valid_targets_min": 2230 + }, + { + "epoch": 6.600545950864422, + "grad_norm": 0.6990330295376892, + "learning_rate": 4.001513416133551e-07, + "loss": 0.177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08158162236213684, + "step": 3630, + "valid_targets_mean": 3457.1, + "valid_targets_min": 336 + }, + { + "epoch": 6.609645131938126, + "grad_norm": 0.506941996196486, + "learning_rate": 3.823070264009099e-07, + "loss": 0.1786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07667286694049835, + "step": 3635, + "valid_targets_mean": 4367.6, + "valid_targets_min": 278 + }, + { + "epoch": 6.618744313011829, + "grad_norm": 0.6337734569018411, + "learning_rate": 3.648658740005107e-07, + "loss": 0.18, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07626718282699585, + "step": 3640, + "valid_targets_mean": 3231.2, + "valid_targets_min": 2235 + }, + { + "epoch": 6.627843494085532, + "grad_norm": 0.5875375339866471, + "learning_rate": 3.4782824284492975e-07, + "loss": 0.1802, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10731066018342972, + "step": 3645, + "valid_targets_mean": 5920.9, + "valid_targets_min": 3895 + }, + { + "epoch": 6.6369426751592355, + "grad_norm": 0.67413263385624, + "learning_rate": 3.31194483074182e-07, + "loss": 0.1736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09552273899316788, + "step": 3650, + "valid_targets_mean": 4226.9, + "valid_targets_min": 261 + }, + { + "epoch": 6.646041856232939, + "grad_norm": 0.6521249448828749, + "learning_rate": 3.149649365283258e-07, + "loss": 0.1821, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06236450746655464, + "step": 3655, + "valid_targets_mean": 2752.9, + "valid_targets_min": 288 + }, + { + "epoch": 6.655141037306643, + "grad_norm": 0.6212796752617139, + "learning_rate": 2.9913993674044904e-07, + "loss": 0.1848, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09087605029344559, + "step": 3660, + "valid_targets_mean": 4887.8, + "valid_targets_min": 1841 + }, + { + "epoch": 6.664240218380346, + "grad_norm": 0.7027944694981803, + "learning_rate": 2.8371980892979436e-07, + "loss": 0.1859, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1104184091091156, + "step": 3665, + "valid_targets_mean": 3617.5, + "valid_targets_min": 1831 + }, + { + "epoch": 6.673339399454049, + "grad_norm": 0.6087584684328681, + "learning_rate": 2.687048699951067e-07, + "loss": 0.191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07848186790943146, + "step": 3670, + "valid_targets_mean": 3574.5, + "valid_targets_min": 1333 + }, + { + "epoch": 6.682438580527752, + "grad_norm": 0.6580904025427522, + "learning_rate": 2.5409542850808765e-07, + "loss": 0.1877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10046820342540741, + "step": 3675, + "valid_targets_mean": 3933.9, + "valid_targets_min": 260 + }, + { + "epoch": 6.691537761601456, + "grad_norm": 0.6087191196250359, + "learning_rate": 2.3989178470707364e-07, + "loss": 0.18, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08826770633459091, + "step": 3680, + "valid_targets_mean": 4082.4, + "valid_targets_min": 1308 + }, + { + "epoch": 6.7006369426751595, + "grad_norm": 0.8153778673745999, + "learning_rate": 2.260942304908609e-07, + "loss": 0.1881, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11358342319726944, + "step": 3685, + "valid_targets_mean": 3213.5, + "valid_targets_min": 588 + }, + { + "epoch": 6.709736123748863, + "grad_norm": 0.7140363348627277, + "learning_rate": 2.1270304941271025e-07, + "loss": 0.2003, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11204315721988678, + "step": 3690, + "valid_targets_mean": 4244.4, + "valid_targets_min": 2250 + }, + { + "epoch": 6.718835304822566, + "grad_norm": 0.7549599688622531, + "learning_rate": 1.9971851667451413e-07, + "loss": 0.193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1169305294752121, + "step": 3695, + "valid_targets_mean": 3374.1, + "valid_targets_min": 1968 + }, + { + "epoch": 6.727934485896269, + "grad_norm": 0.6941835790300109, + "learning_rate": 1.8714089912113876e-07, + "loss": 0.187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1251552700996399, + "step": 3700, + "valid_targets_mean": 4214.8, + "valid_targets_min": 2772 + }, + { + "epoch": 6.737033666969973, + "grad_norm": 0.8007989348447613, + "learning_rate": 1.749704552349507e-07, + "loss": 0.1944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10345979034900665, + "step": 3705, + "valid_targets_mean": 2992.9, + "valid_targets_min": 263 + }, + { + "epoch": 6.746132848043676, + "grad_norm": 0.719702971321928, + "learning_rate": 1.6320743513049686e-07, + "loss": 0.1774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10299459844827652, + "step": 3710, + "valid_targets_mean": 2893.6, + "valid_targets_min": 313 + }, + { + "epoch": 6.755232029117379, + "grad_norm": 0.7087967586285943, + "learning_rate": 1.5185208054936394e-07, + "loss": 0.1839, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1190565824508667, + "step": 3715, + "valid_targets_mean": 4597.0, + "valid_targets_min": 1703 + }, + { + "epoch": 6.764331210191083, + "grad_norm": 0.5318051425992069, + "learning_rate": 1.4090462485521816e-07, + "loss": 0.1892, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07275549322366714, + "step": 3720, + "valid_targets_mean": 3686.5, + "valid_targets_min": 355 + }, + { + "epoch": 6.773430391264786, + "grad_norm": 0.7887933323438483, + "learning_rate": 1.303652930289956e-07, + "loss": 0.1857, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0893682911992073, + "step": 3725, + "valid_targets_mean": 2508.0, + "valid_targets_min": 894 + }, + { + "epoch": 6.78252957233849, + "grad_norm": 0.6939538297031341, + "learning_rate": 1.2023430166429485e-07, + "loss": 0.1913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09880831837654114, + "step": 3730, + "valid_targets_mean": 3593.8, + "valid_targets_min": 235 + }, + { + "epoch": 6.791628753412192, + "grad_norm": 0.7431914466111932, + "learning_rate": 1.1051185896291616e-07, + "loss": 0.182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07245789468288422, + "step": 3735, + "valid_targets_mean": 2910.5, + "valid_targets_min": 164 + }, + { + "epoch": 6.800727934485896, + "grad_norm": 0.6531446016749648, + "learning_rate": 1.011981647305782e-07, + "loss": 0.1607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07852613925933838, + "step": 3740, + "valid_targets_mean": 3377.0, + "valid_targets_min": 1521 + }, + { + "epoch": 6.8098271155595995, + "grad_norm": 0.7324557362184932, + "learning_rate": 9.22934103728279e-08, + "loss": 0.1792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07569163292646408, + "step": 3745, + "valid_targets_mean": 2686.8, + "valid_targets_min": 339 + }, + { + "epoch": 6.818926296633303, + "grad_norm": 0.6590389864988105, + "learning_rate": 8.37977788910882e-08, + "loss": 0.17, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07628272473812103, + "step": 3750, + "valid_targets_mean": 2587.9, + "valid_targets_min": 686 + }, + { + "epoch": 6.828025477707007, + "grad_norm": 0.7436442598809084, + "learning_rate": 7.571144487891202e-08, + "loss": 0.1922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09393531084060669, + "step": 3755, + "valid_targets_mean": 2498.8, + "valid_targets_min": 581 + }, + { + "epoch": 6.837124658780709, + "grad_norm": 0.7374402989833732, + "learning_rate": 6.803457451838746e-08, + "loss": 0.1849, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1075291633605957, + "step": 3760, + "valid_targets_mean": 3226.6, + "valid_targets_min": 734 + }, + { + "epoch": 6.846223839854413, + "grad_norm": 0.4604358460897986, + "learning_rate": 6.076732557672272e-08, + "loss": 0.1717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07132543623447418, + "step": 3765, + "valid_targets_mean": 7595.2, + "valid_targets_min": 360 + }, + { + "epoch": 6.855323020928116, + "grad_norm": 0.7017036176712402, + "learning_rate": 5.390984740299976e-08, + "loss": 0.1945, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09402775019407272, + "step": 3770, + "valid_targets_mean": 3475.1, + "valid_targets_min": 1721 + }, + { + "epoch": 6.86442220200182, + "grad_norm": 0.7041095060978012, + "learning_rate": 4.7462280925116847e-08, + "loss": 0.1769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09718748927116394, + "step": 3775, + "valid_targets_mean": 4057.4, + "valid_targets_min": 334 + }, + { + "epoch": 6.8735213830755235, + "grad_norm": 0.7879474477338477, + "learning_rate": 4.142475864688411e-08, + "loss": 0.1898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09216184914112091, + "step": 3780, + "valid_targets_mean": 2256.6, + "valid_targets_min": 263 + }, + { + "epoch": 6.882620564149226, + "grad_norm": 0.7052882526108005, + "learning_rate": 3.5797404645296906e-08, + "loss": 0.189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0836934745311737, + "step": 3785, + "valid_targets_mean": 3189.5, + "valid_targets_min": 302 + }, + { + "epoch": 6.89171974522293, + "grad_norm": 0.6415834858261883, + "learning_rate": 3.0580334567995585e-08, + "loss": 0.189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10515318065881729, + "step": 3790, + "valid_targets_mean": 4577.0, + "valid_targets_min": 2504 + }, + { + "epoch": 6.900818926296633, + "grad_norm": 0.7167619855564329, + "learning_rate": 2.5773655630880746e-08, + "loss": 0.1956, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1195126473903656, + "step": 3795, + "valid_targets_mean": 4239.6, + "valid_targets_min": 2600 + }, + { + "epoch": 6.909918107370337, + "grad_norm": 0.5847738389354258, + "learning_rate": 2.1377466615912778e-08, + "loss": 0.1808, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07136321067810059, + "step": 3800, + "valid_targets_mean": 4093.9, + "valid_targets_min": 1204 + }, + { + "epoch": 6.91901728844404, + "grad_norm": 0.6674740866579093, + "learning_rate": 1.7391857869086815e-08, + "loss": 0.1794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09090028703212738, + "step": 3805, + "valid_targets_mean": 3187.9, + "valid_targets_min": 1087 + }, + { + "epoch": 6.928116469517743, + "grad_norm": 0.6940020851233624, + "learning_rate": 1.3816911298565327e-08, + "loss": 0.1821, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08191139996051788, + "step": 3810, + "valid_targets_mean": 3583.0, + "valid_targets_min": 2403 + }, + { + "epoch": 6.937215650591447, + "grad_norm": 0.6521002518067545, + "learning_rate": 1.0652700373006142e-08, + "loss": 0.1841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0826297402381897, + "step": 3815, + "valid_targets_mean": 3706.0, + "valid_targets_min": 1384 + }, + { + "epoch": 6.94631483166515, + "grad_norm": 0.6871719319654492, + "learning_rate": 7.899290120039205e-09, + "loss": 0.1694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0961792916059494, + "step": 3820, + "valid_targets_mean": 3572.2, + "valid_targets_min": 986 + }, + { + "epoch": 6.955414012738854, + "grad_norm": 0.7108375386012565, + "learning_rate": 5.556737124945422e-09, + "loss": 0.1964, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10159586369991302, + "step": 3825, + "valid_targets_mean": 3412.9, + "valid_targets_min": 633 + }, + { + "epoch": 6.964513193812557, + "grad_norm": 0.7344106634967527, + "learning_rate": 3.6250895294842605e-09, + "loss": 0.1839, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08290895074605942, + "step": 3830, + "valid_targets_mean": 2692.9, + "valid_targets_min": 301 + }, + { + "epoch": 6.97361237488626, + "grad_norm": 0.6078419999211205, + "learning_rate": 2.1043870309078727e-09, + "loss": 0.1842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08749958872795105, + "step": 3835, + "valid_targets_mean": 4106.5, + "valid_targets_min": 1108 + }, + { + "epoch": 6.9827115559599635, + "grad_norm": 0.6441846884391178, + "learning_rate": 9.946608811395308e-10, + "loss": 0.1778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09097015857696533, + "step": 3840, + "valid_targets_mean": 3864.9, + "valid_targets_min": 1595 + }, + { + "epoch": 6.991810737033667, + "grad_norm": 0.6584217092501846, + "learning_rate": 2.959338861407979e-10, + "loss": 0.1885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09941902756690979, + "step": 3845, + "valid_targets_mean": 3675.5, + "valid_targets_min": 2043 + }, + { + "epoch": 7.0, + "grad_norm": 0.8102798117849903, + "learning_rate": 8.220405436354384e-12, + "loss": 0.1758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1581478863954544, + "step": 3850, + "valid_targets_mean": 4096.2, + "valid_targets_min": 1169 + }, + { + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1581478863954544, + "step": 3850, + "total_flos": 1.0887953365472379e+18, + "train_loss": 0.25177762028458833, + "train_runtime": 92981.7064, + "train_samples_per_second": 0.662, + "train_steps_per_second": 0.041, + "valid_targets_mean": 4096.2, + "valid_targets_min": 1169 + } + ], + "logging_steps": 5, + "max_steps": 3850, + "num_input_tokens_seen": 0, + "num_train_epochs": 7, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.0887953365472379e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}