diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,18967 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 6.999592999592999, + "eval_steps": 500, + "global_step": 8602, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00407000407000407, + "grad_norm": 16.688738856727717, + "learning_rate": 1.8583042973286877e-07, + "loss": 0.6602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.309528112411499, + "step": 5, + "valid_targets_mean": 7389.0, + "valid_targets_min": 5627 + }, + { + "epoch": 0.00814000814000814, + "grad_norm": 16.284518291358502, + "learning_rate": 4.181184668989548e-07, + "loss": 0.7004, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23656058311462402, + "step": 10, + "valid_targets_mean": 1294.8, + "valid_targets_min": 422 + }, + { + "epoch": 0.01221001221001221, + "grad_norm": 14.7645423037296, + "learning_rate": 6.504065040650407e-07, + "loss": 0.6791, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3238072991371155, + "step": 15, + "valid_targets_mean": 8539.9, + "valid_targets_min": 6761 + }, + { + "epoch": 0.01628001628001628, + "grad_norm": 13.258019811923578, + "learning_rate": 8.826945412311266e-07, + "loss": 0.6127, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28863972425460815, + "step": 20, + "valid_targets_mean": 7579.0, + "valid_targets_min": 6174 + }, + { + "epoch": 0.02035002035002035, + "grad_norm": 10.994253756620328, + "learning_rate": 1.1149825783972125e-06, + "loss": 0.6115, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29230475425720215, + "step": 25, + "valid_targets_mean": 7328.2, + "valid_targets_min": 4839 + }, + { + "epoch": 0.02442002442002442, + "grad_norm": 7.768784524679733, + "learning_rate": 1.3472706155632985e-06, + "loss": 0.604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3191682696342468, + "step": 30, + "valid_targets_mean": 6838.2, + "valid_targets_min": 5034 + }, + { + "epoch": 0.02849002849002849, + "grad_norm": 5.751080564869644, + "learning_rate": 1.5795586527293845e-06, + "loss": 0.5845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29048168659210205, + "step": 35, + "valid_targets_mean": 7248.2, + "valid_targets_min": 5567 + }, + { + "epoch": 0.03256003256003256, + "grad_norm": 5.28995649654892, + "learning_rate": 1.8118466898954705e-06, + "loss": 0.5404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2563433349132538, + "step": 40, + "valid_targets_mean": 6234.0, + "valid_targets_min": 5603 + }, + { + "epoch": 0.03663003663003663, + "grad_norm": 6.617277965973996, + "learning_rate": 2.0441347270615568e-06, + "loss": 0.5329, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12342505156993866, + "step": 45, + "valid_targets_mean": 602.0, + "valid_targets_min": 152 + }, + { + "epoch": 0.0407000407000407, + "grad_norm": 3.0152890224296596, + "learning_rate": 2.2764227642276426e-06, + "loss": 0.4588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22148989140987396, + "step": 50, + "valid_targets_mean": 7299.9, + "valid_targets_min": 6158 + }, + { + "epoch": 0.04477004477004477, + "grad_norm": 2.061368844016502, + "learning_rate": 2.5087108013937284e-06, + "loss": 0.4436, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2079901248216629, + "step": 55, + "valid_targets_mean": 6140.0, + "valid_targets_min": 4648 + }, + { + "epoch": 0.04884004884004884, + "grad_norm": 1.356693424104223, + "learning_rate": 2.7409988385598146e-06, + "loss": 0.4246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21061325073242188, + "step": 60, + "valid_targets_mean": 6603.0, + "valid_targets_min": 4882 + }, + { + "epoch": 0.05291005291005291, + "grad_norm": 1.0151992206933238, + "learning_rate": 2.9732868757259004e-06, + "loss": 0.4018, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20564204454421997, + "step": 65, + "valid_targets_mean": 8015.0, + "valid_targets_min": 6344 + }, + { + "epoch": 0.05698005698005698, + "grad_norm": 0.8990999330833235, + "learning_rate": 3.205574912891986e-06, + "loss": 0.3898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18953731656074524, + "step": 70, + "valid_targets_mean": 6305.2, + "valid_targets_min": 4622 + }, + { + "epoch": 0.06105006105006105, + "grad_norm": 0.7436312346393562, + "learning_rate": 3.4378629500580724e-06, + "loss": 0.392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20007365942001343, + "step": 75, + "valid_targets_mean": 7890.8, + "valid_targets_min": 5485 + }, + { + "epoch": 0.06512006512006512, + "grad_norm": 0.6714920662823174, + "learning_rate": 3.670150987224158e-06, + "loss": 0.387, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.175824835896492, + "step": 80, + "valid_targets_mean": 7016.6, + "valid_targets_min": 4960 + }, + { + "epoch": 0.06919006919006919, + "grad_norm": 0.6272831883685218, + "learning_rate": 3.902439024390244e-06, + "loss": 0.373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17543438076972961, + "step": 85, + "valid_targets_mean": 6527.1, + "valid_targets_min": 5399 + }, + { + "epoch": 0.07326007326007326, + "grad_norm": 0.7089605501278246, + "learning_rate": 4.13472706155633e-06, + "loss": 0.3711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18534767627716064, + "step": 90, + "valid_targets_mean": 4995.0, + "valid_targets_min": 618 + }, + { + "epoch": 0.07733007733007732, + "grad_norm": 0.5692706983764877, + "learning_rate": 4.367015098722416e-06, + "loss": 0.3807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1900652050971985, + "step": 95, + "valid_targets_mean": 7219.4, + "valid_targets_min": 5764 + }, + { + "epoch": 0.0814000814000814, + "grad_norm": 0.5526365515310625, + "learning_rate": 4.599303135888502e-06, + "loss": 0.364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18986254930496216, + "step": 100, + "valid_targets_mean": 7998.5, + "valid_targets_min": 5902 + }, + { + "epoch": 0.08547008547008547, + "grad_norm": 0.5591512425354309, + "learning_rate": 4.831591173054588e-06, + "loss": 0.3303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1477121263742447, + "step": 105, + "valid_targets_mean": 7377.4, + "valid_targets_min": 5015 + }, + { + "epoch": 0.08954008954008955, + "grad_norm": 0.47170286143316387, + "learning_rate": 5.063879210220674e-06, + "loss": 0.3322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14665284752845764, + "step": 110, + "valid_targets_mean": 6794.0, + "valid_targets_min": 5283 + }, + { + "epoch": 0.0936100936100936, + "grad_norm": 0.546971917140672, + "learning_rate": 5.29616724738676e-06, + "loss": 0.3457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1953507661819458, + "step": 115, + "valid_targets_mean": 6656.4, + "valid_targets_min": 4364 + }, + { + "epoch": 0.09768009768009768, + "grad_norm": 0.4730703657980557, + "learning_rate": 5.528455284552846e-06, + "loss": 0.3323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1581360101699829, + "step": 120, + "valid_targets_mean": 7178.4, + "valid_targets_min": 4448 + }, + { + "epoch": 0.10175010175010175, + "grad_norm": 0.5438344423212753, + "learning_rate": 5.7607433217189324e-06, + "loss": 0.3264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18480417132377625, + "step": 125, + "valid_targets_mean": 7619.8, + "valid_targets_min": 5737 + }, + { + "epoch": 0.10582010582010581, + "grad_norm": 0.42704045318550676, + "learning_rate": 5.993031358885018e-06, + "loss": 0.2884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13994517922401428, + "step": 130, + "valid_targets_mean": 8754.4, + "valid_targets_min": 5752 + }, + { + "epoch": 0.10989010989010989, + "grad_norm": 0.5266020359474044, + "learning_rate": 6.225319396051104e-06, + "loss": 0.2866, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15675979852676392, + "step": 135, + "valid_targets_mean": 6341.0, + "valid_targets_min": 4543 + }, + { + "epoch": 0.11396011396011396, + "grad_norm": 0.5048534740315108, + "learning_rate": 6.45760743321719e-06, + "loss": 0.2751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15559503436088562, + "step": 140, + "valid_targets_mean": 7181.9, + "valid_targets_min": 4563 + }, + { + "epoch": 0.11803011803011804, + "grad_norm": 1.134587905553992, + "learning_rate": 6.6898954703832765e-06, + "loss": 0.3357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12442926317453384, + "step": 145, + "valid_targets_mean": 1104.4, + "valid_targets_min": 161 + }, + { + "epoch": 0.1221001221001221, + "grad_norm": 0.4270206279409013, + "learning_rate": 6.922183507549362e-06, + "loss": 0.32, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13907991349697113, + "step": 150, + "valid_targets_mean": 7018.2, + "valid_targets_min": 5424 + }, + { + "epoch": 0.12617012617012616, + "grad_norm": 0.4635086442233948, + "learning_rate": 7.154471544715448e-06, + "loss": 0.3187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1630023717880249, + "step": 155, + "valid_targets_mean": 7738.4, + "valid_targets_min": 5329 + }, + { + "epoch": 0.13024013024013023, + "grad_norm": 0.5079812404242928, + "learning_rate": 7.386759581881534e-06, + "loss": 0.325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1584567427635193, + "step": 160, + "valid_targets_mean": 6426.6, + "valid_targets_min": 5560 + }, + { + "epoch": 0.1343101343101343, + "grad_norm": 0.5050284094328902, + "learning_rate": 7.61904761904762e-06, + "loss": 0.3163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16165009140968323, + "step": 165, + "valid_targets_mean": 6169.9, + "valid_targets_min": 3446 + }, + { + "epoch": 0.13838013838013838, + "grad_norm": 0.498091820817201, + "learning_rate": 7.851335656213705e-06, + "loss": 0.301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15471090376377106, + "step": 170, + "valid_targets_mean": 7204.6, + "valid_targets_min": 5381 + }, + { + "epoch": 0.14245014245014245, + "grad_norm": 0.48505113847764975, + "learning_rate": 8.083623693379791e-06, + "loss": 0.3053, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1558820754289627, + "step": 175, + "valid_targets_mean": 7600.4, + "valid_targets_min": 5241 + }, + { + "epoch": 0.14652014652014653, + "grad_norm": 0.4895882936126897, + "learning_rate": 8.315911730545877e-06, + "loss": 0.2968, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16261817514896393, + "step": 180, + "valid_targets_mean": 7098.5, + "valid_targets_min": 5607 + }, + { + "epoch": 0.1505901505901506, + "grad_norm": 0.49703616694051983, + "learning_rate": 8.548199767711964e-06, + "loss": 0.3019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15736651420593262, + "step": 185, + "valid_targets_mean": 6955.1, + "valid_targets_min": 5530 + }, + { + "epoch": 0.15466015466015465, + "grad_norm": 0.47329084552349354, + "learning_rate": 8.78048780487805e-06, + "loss": 0.2955, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15062673389911652, + "step": 190, + "valid_targets_mean": 6748.1, + "valid_targets_min": 5364 + }, + { + "epoch": 0.15873015873015872, + "grad_norm": 0.5304350179179551, + "learning_rate": 9.012775842044136e-06, + "loss": 0.299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14595365524291992, + "step": 195, + "valid_targets_mean": 6273.9, + "valid_targets_min": 5438 + }, + { + "epoch": 0.1628001628001628, + "grad_norm": 0.4804611842699093, + "learning_rate": 9.24506387921022e-06, + "loss": 0.2991, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14614039659500122, + "step": 200, + "valid_targets_mean": 6419.5, + "valid_targets_min": 4879 + }, + { + "epoch": 0.16687016687016687, + "grad_norm": 1.0372553917536562, + "learning_rate": 9.477351916376307e-06, + "loss": 0.2949, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1451706886291504, + "step": 205, + "valid_targets_mean": 6483.4, + "valid_targets_min": 4309 + }, + { + "epoch": 0.17094017094017094, + "grad_norm": 0.5083493341784381, + "learning_rate": 9.709639953542393e-06, + "loss": 0.2928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16406603157520294, + "step": 210, + "valid_targets_mean": 6825.0, + "valid_targets_min": 5205 + }, + { + "epoch": 0.17501017501017502, + "grad_norm": 0.46301643664731623, + "learning_rate": 9.94192799070848e-06, + "loss": 0.2875, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13244540989398956, + "step": 215, + "valid_targets_mean": 6729.4, + "valid_targets_min": 5200 + }, + { + "epoch": 0.1790801790801791, + "grad_norm": 0.49215202614656767, + "learning_rate": 1.0174216027874565e-05, + "loss": 0.2844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13969576358795166, + "step": 220, + "valid_targets_mean": 6094.0, + "valid_targets_min": 5302 + }, + { + "epoch": 0.18315018315018314, + "grad_norm": 0.4871918972499842, + "learning_rate": 1.0406504065040652e-05, + "loss": 0.2876, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14923205971717834, + "step": 225, + "valid_targets_mean": 7140.0, + "valid_targets_min": 5270 + }, + { + "epoch": 0.1872201872201872, + "grad_norm": 0.4471280600208976, + "learning_rate": 1.0638792102206736e-05, + "loss": 0.2776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.131822407245636, + "step": 230, + "valid_targets_mean": 7425.0, + "valid_targets_min": 5281 + }, + { + "epoch": 0.19129019129019129, + "grad_norm": 1.2605795171969003, + "learning_rate": 1.0871080139372822e-05, + "loss": 0.2845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1726950705051422, + "step": 235, + "valid_targets_mean": 7063.4, + "valid_targets_min": 5386 + }, + { + "epoch": 0.19536019536019536, + "grad_norm": 0.4932596835670058, + "learning_rate": 1.1103368176538909e-05, + "loss": 0.2845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14937910437583923, + "step": 240, + "valid_targets_mean": 6827.0, + "valid_targets_min": 4547 + }, + { + "epoch": 0.19943019943019943, + "grad_norm": 0.9167985105196412, + "learning_rate": 1.1335656213704995e-05, + "loss": 0.2634, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09083765000104904, + "step": 245, + "valid_targets_mean": 2047.0, + "valid_targets_min": 165 + }, + { + "epoch": 0.2035002035002035, + "grad_norm": 0.4621000775660505, + "learning_rate": 1.1567944250871081e-05, + "loss": 0.2744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1386224627494812, + "step": 250, + "valid_targets_mean": 6754.1, + "valid_targets_min": 5196 + }, + { + "epoch": 0.20757020757020758, + "grad_norm": 0.5251600558673276, + "learning_rate": 1.1800232288037167e-05, + "loss": 0.2744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13912081718444824, + "step": 255, + "valid_targets_mean": 5985.6, + "valid_targets_min": 5028 + }, + { + "epoch": 0.21164021164021163, + "grad_norm": 0.5423901600668142, + "learning_rate": 1.2032520325203254e-05, + "loss": 0.2795, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12727007269859314, + "step": 260, + "valid_targets_mean": 5963.9, + "valid_targets_min": 5237 + }, + { + "epoch": 0.2157102157102157, + "grad_norm": 0.5203028801997557, + "learning_rate": 1.2264808362369338e-05, + "loss": 0.274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1433340162038803, + "step": 265, + "valid_targets_mean": 6319.0, + "valid_targets_min": 5412 + }, + { + "epoch": 0.21978021978021978, + "grad_norm": 0.5281674096137197, + "learning_rate": 1.2497096399535424e-05, + "loss": 0.2597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11158867180347443, + "step": 270, + "valid_targets_mean": 7125.8, + "valid_targets_min": 5080 + }, + { + "epoch": 0.22385022385022385, + "grad_norm": 0.6582757970482628, + "learning_rate": 1.272938443670151e-05, + "loss": 0.2744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13590899109840393, + "step": 275, + "valid_targets_mean": 6123.5, + "valid_targets_min": 4521 + }, + { + "epoch": 0.22792022792022792, + "grad_norm": 2.7614047900823837, + "learning_rate": 1.2961672473867597e-05, + "loss": 0.3331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24310331046581268, + "step": 280, + "valid_targets_mean": 1886.0, + "valid_targets_min": 1111 + }, + { + "epoch": 0.231990231990232, + "grad_norm": 1.5491066018218123, + "learning_rate": 1.3193960511033683e-05, + "loss": 0.4133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20701418817043304, + "step": 285, + "valid_targets_mean": 1497.6, + "valid_targets_min": 893 + }, + { + "epoch": 0.23606023606023607, + "grad_norm": 1.1536064750715551, + "learning_rate": 1.3426248548199769e-05, + "loss": 0.3633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1264546811580658, + "step": 290, + "valid_targets_mean": 1356.0, + "valid_targets_min": 914 + }, + { + "epoch": 0.24013024013024012, + "grad_norm": 1.2647474847869884, + "learning_rate": 1.3658536585365855e-05, + "loss": 0.3452, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1455959528684616, + "step": 295, + "valid_targets_mean": 1324.5, + "valid_targets_min": 680 + }, + { + "epoch": 0.2442002442002442, + "grad_norm": 0.997749848075399, + "learning_rate": 1.389082462253194e-05, + "loss": 0.3337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15397366881370544, + "step": 300, + "valid_targets_mean": 1931.8, + "valid_targets_min": 876 + }, + { + "epoch": 0.24827024827024827, + "grad_norm": 1.0630278095878878, + "learning_rate": 1.4123112659698026e-05, + "loss": 0.32, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1414937824010849, + "step": 305, + "valid_targets_mean": 1292.2, + "valid_targets_min": 734 + }, + { + "epoch": 0.2523402523402523, + "grad_norm": 0.9931657056115516, + "learning_rate": 1.4355400696864112e-05, + "loss": 0.3245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1270873248577118, + "step": 310, + "valid_targets_mean": 1469.6, + "valid_targets_min": 655 + }, + { + "epoch": 0.2564102564102564, + "grad_norm": 1.1018628472649998, + "learning_rate": 1.4587688734030199e-05, + "loss": 0.2936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15461672842502594, + "step": 315, + "valid_targets_mean": 1548.4, + "valid_targets_min": 737 + }, + { + "epoch": 0.26048026048026046, + "grad_norm": 1.0914493892414534, + "learning_rate": 1.4819976771196285e-05, + "loss": 0.3116, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13322024047374725, + "step": 320, + "valid_targets_mean": 1316.5, + "valid_targets_min": 759 + }, + { + "epoch": 0.26455026455026454, + "grad_norm": 1.1154554492176114, + "learning_rate": 1.5052264808362371e-05, + "loss": 0.2911, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11124762892723083, + "step": 325, + "valid_targets_mean": 1249.6, + "valid_targets_min": 753 + }, + { + "epoch": 0.2686202686202686, + "grad_norm": 1.056667017250088, + "learning_rate": 1.528455284552846e-05, + "loss": 0.2896, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18556742370128632, + "step": 330, + "valid_targets_mean": 1833.0, + "valid_targets_min": 927 + }, + { + "epoch": 0.2726902726902727, + "grad_norm": 1.004899439380304, + "learning_rate": 1.5516840882694542e-05, + "loss": 0.299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14251361787319183, + "step": 335, + "valid_targets_mean": 1338.4, + "valid_targets_min": 679 + }, + { + "epoch": 0.27676027676027676, + "grad_norm": 1.1790950500190507, + "learning_rate": 1.5749128919860628e-05, + "loss": 0.2872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12715528905391693, + "step": 340, + "valid_targets_mean": 1244.6, + "valid_targets_min": 771 + }, + { + "epoch": 0.28083028083028083, + "grad_norm": 1.3870604631519712, + "learning_rate": 1.5981416957026714e-05, + "loss": 0.2683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12742307782173157, + "step": 345, + "valid_targets_mean": 1732.5, + "valid_targets_min": 986 + }, + { + "epoch": 0.2849002849002849, + "grad_norm": 1.0033141410182251, + "learning_rate": 1.62137049941928e-05, + "loss": 0.277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12324304133653641, + "step": 350, + "valid_targets_mean": 1389.9, + "valid_targets_min": 1103 + }, + { + "epoch": 0.288970288970289, + "grad_norm": 1.0333697812051308, + "learning_rate": 1.6445993031358887e-05, + "loss": 0.2801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15279968082904816, + "step": 355, + "valid_targets_mean": 1593.2, + "valid_targets_min": 675 + }, + { + "epoch": 0.29304029304029305, + "grad_norm": 0.915729241739608, + "learning_rate": 1.6678281068524973e-05, + "loss": 0.2777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16900552809238434, + "step": 360, + "valid_targets_mean": 1926.6, + "valid_targets_min": 974 + }, + { + "epoch": 0.29711029711029713, + "grad_norm": 1.0882207091700582, + "learning_rate": 1.691056910569106e-05, + "loss": 0.2835, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1652759164571762, + "step": 365, + "valid_targets_mean": 1953.1, + "valid_targets_min": 869 + }, + { + "epoch": 0.3011803011803012, + "grad_norm": 0.9858438574283391, + "learning_rate": 1.7142857142857142e-05, + "loss": 0.28, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12480325996875763, + "step": 370, + "valid_targets_mean": 1327.2, + "valid_targets_min": 1023 + }, + { + "epoch": 0.3052503052503053, + "grad_norm": 0.9782058898631699, + "learning_rate": 1.7375145180023228e-05, + "loss": 0.2822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14944294095039368, + "step": 375, + "valid_targets_mean": 1717.9, + "valid_targets_min": 709 + }, + { + "epoch": 0.3093203093203093, + "grad_norm": 1.2128421132256473, + "learning_rate": 1.7607433217189314e-05, + "loss": 0.2676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11604733765125275, + "step": 380, + "valid_targets_mean": 1139.9, + "valid_targets_min": 658 + }, + { + "epoch": 0.31339031339031337, + "grad_norm": 1.2503504626315993, + "learning_rate": 1.78397212543554e-05, + "loss": 0.2778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13267236948013306, + "step": 385, + "valid_targets_mean": 1444.8, + "valid_targets_min": 662 + }, + { + "epoch": 0.31746031746031744, + "grad_norm": 1.0828865674968162, + "learning_rate": 1.807200929152149e-05, + "loss": 0.2823, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09767653793096542, + "step": 390, + "valid_targets_mean": 1242.2, + "valid_targets_min": 859 + }, + { + "epoch": 0.3215303215303215, + "grad_norm": 1.0500436494857386, + "learning_rate": 1.8304297328687576e-05, + "loss": 0.2551, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15620818734169006, + "step": 395, + "valid_targets_mean": 1569.2, + "valid_targets_min": 950 + }, + { + "epoch": 0.3256003256003256, + "grad_norm": 1.2368316222797435, + "learning_rate": 1.8536585365853663e-05, + "loss": 0.2673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12553861737251282, + "step": 400, + "valid_targets_mean": 1539.8, + "valid_targets_min": 686 + }, + { + "epoch": 0.32967032967032966, + "grad_norm": 0.970316022027174, + "learning_rate": 1.8768873403019745e-05, + "loss": 0.2611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14505316317081451, + "step": 405, + "valid_targets_mean": 1610.6, + "valid_targets_min": 702 + }, + { + "epoch": 0.33374033374033374, + "grad_norm": 1.028981175641749, + "learning_rate": 1.900116144018583e-05, + "loss": 0.2553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09692301601171494, + "step": 410, + "valid_targets_mean": 1190.4, + "valid_targets_min": 779 + }, + { + "epoch": 0.3378103378103378, + "grad_norm": 1.079548761208398, + "learning_rate": 1.9233449477351918e-05, + "loss": 0.2736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1386578381061554, + "step": 415, + "valid_targets_mean": 1467.1, + "valid_targets_min": 822 + }, + { + "epoch": 0.3418803418803419, + "grad_norm": 1.0316182713272737, + "learning_rate": 1.9465737514518004e-05, + "loss": 0.2507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10993292927742004, + "step": 420, + "valid_targets_mean": 1350.5, + "valid_targets_min": 868 + }, + { + "epoch": 0.34595034595034596, + "grad_norm": 1.1022256452208465, + "learning_rate": 1.969802555168409e-05, + "loss": 0.2608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10702995955944061, + "step": 425, + "valid_targets_mean": 1283.5, + "valid_targets_min": 781 + }, + { + "epoch": 0.35002035002035004, + "grad_norm": 0.9493395584058351, + "learning_rate": 1.9930313588850176e-05, + "loss": 0.2614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10873595625162125, + "step": 430, + "valid_targets_mean": 1303.2, + "valid_targets_min": 793 + }, + { + "epoch": 0.3540903540903541, + "grad_norm": 1.0733764593758766, + "learning_rate": 2.016260162601626e-05, + "loss": 0.2592, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11143694072961807, + "step": 435, + "valid_targets_mean": 1309.1, + "valid_targets_min": 806 + }, + { + "epoch": 0.3581603581603582, + "grad_norm": 0.9098321906535183, + "learning_rate": 2.039488966318235e-05, + "loss": 0.2569, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10678255558013916, + "step": 440, + "valid_targets_mean": 1397.6, + "valid_targets_min": 803 + }, + { + "epoch": 0.36223036223036226, + "grad_norm": 0.9876007650935522, + "learning_rate": 2.0627177700348432e-05, + "loss": 0.2537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09560855478048325, + "step": 445, + "valid_targets_mean": 1187.8, + "valid_targets_min": 660 + }, + { + "epoch": 0.3663003663003663, + "grad_norm": 0.9221536800328648, + "learning_rate": 2.085946573751452e-05, + "loss": 0.2369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10400258004665375, + "step": 450, + "valid_targets_mean": 1547.0, + "valid_targets_min": 853 + }, + { + "epoch": 0.37037037037037035, + "grad_norm": 1.1802888394338766, + "learning_rate": 2.1091753774680604e-05, + "loss": 0.2557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13287541270256042, + "step": 455, + "valid_targets_mean": 1704.2, + "valid_targets_min": 719 + }, + { + "epoch": 0.3744403744403744, + "grad_norm": 0.9714075439297265, + "learning_rate": 2.132404181184669e-05, + "loss": 0.258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09262743592262268, + "step": 460, + "valid_targets_mean": 1255.0, + "valid_targets_min": 765 + }, + { + "epoch": 0.3785103785103785, + "grad_norm": 0.9864351204877326, + "learning_rate": 2.1556329849012777e-05, + "loss": 0.2523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12507298588752747, + "step": 465, + "valid_targets_mean": 1550.8, + "valid_targets_min": 875 + }, + { + "epoch": 0.38258038258038257, + "grad_norm": 1.0261001318904166, + "learning_rate": 2.1788617886178863e-05, + "loss": 0.2503, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12351306527853012, + "step": 470, + "valid_targets_mean": 1362.0, + "valid_targets_min": 739 + }, + { + "epoch": 0.38665038665038665, + "grad_norm": 0.9746834269517334, + "learning_rate": 2.202090592334495e-05, + "loss": 0.2597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14458400011062622, + "step": 475, + "valid_targets_mean": 1666.8, + "valid_targets_min": 760 + }, + { + "epoch": 0.3907203907203907, + "grad_norm": 1.0332993949409497, + "learning_rate": 2.2253193960511035e-05, + "loss": 0.243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10632194578647614, + "step": 480, + "valid_targets_mean": 1319.6, + "valid_targets_min": 1046 + }, + { + "epoch": 0.3947903947903948, + "grad_norm": 0.9844297761113842, + "learning_rate": 2.2485481997677125e-05, + "loss": 0.2621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14072856307029724, + "step": 485, + "valid_targets_mean": 1727.9, + "valid_targets_min": 851 + }, + { + "epoch": 0.39886039886039887, + "grad_norm": 0.9685951678785188, + "learning_rate": 2.2717770034843208e-05, + "loss": 0.2609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13506445288658142, + "step": 490, + "valid_targets_mean": 1643.2, + "valid_targets_min": 907 + }, + { + "epoch": 0.40293040293040294, + "grad_norm": 1.0189730102784529, + "learning_rate": 2.295005807200929e-05, + "loss": 0.2662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11292707920074463, + "step": 495, + "valid_targets_mean": 1478.5, + "valid_targets_min": 764 + }, + { + "epoch": 0.407000407000407, + "grad_norm": 0.8788626334372063, + "learning_rate": 2.318234610917538e-05, + "loss": 0.2454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11425843089818954, + "step": 500, + "valid_targets_mean": 1522.6, + "valid_targets_min": 822 + }, + { + "epoch": 0.4110704110704111, + "grad_norm": 0.966805238471609, + "learning_rate": 2.3414634146341463e-05, + "loss": 0.245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12144134938716888, + "step": 505, + "valid_targets_mean": 1589.0, + "valid_targets_min": 982 + }, + { + "epoch": 0.41514041514041516, + "grad_norm": 0.9177681985342081, + "learning_rate": 2.3646922183507553e-05, + "loss": 0.2566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12525641918182373, + "step": 510, + "valid_targets_mean": 1574.8, + "valid_targets_min": 816 + }, + { + "epoch": 0.4192104192104192, + "grad_norm": 1.0766725696353001, + "learning_rate": 2.3879210220673635e-05, + "loss": 0.2458, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12393151223659515, + "step": 515, + "valid_targets_mean": 1422.0, + "valid_targets_min": 721 + }, + { + "epoch": 0.42328042328042326, + "grad_norm": 1.2191089483428443, + "learning_rate": 2.4111498257839725e-05, + "loss": 0.2391, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09133152663707733, + "step": 520, + "valid_targets_mean": 1150.6, + "valid_targets_min": 741 + }, + { + "epoch": 0.42735042735042733, + "grad_norm": 0.9898911682614888, + "learning_rate": 2.4343786295005808e-05, + "loss": 0.2475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13438482582569122, + "step": 525, + "valid_targets_mean": 1684.2, + "valid_targets_min": 734 + }, + { + "epoch": 0.4314204314204314, + "grad_norm": 1.260093754517711, + "learning_rate": 2.4576074332171894e-05, + "loss": 0.2505, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14740093052387238, + "step": 530, + "valid_targets_mean": 1937.1, + "valid_targets_min": 744 + }, + { + "epoch": 0.4354904354904355, + "grad_norm": 0.9354629606274133, + "learning_rate": 2.480836236933798e-05, + "loss": 0.2472, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13996005058288574, + "step": 535, + "valid_targets_mean": 1842.2, + "valid_targets_min": 620 + }, + { + "epoch": 0.43956043956043955, + "grad_norm": 0.9747857839014156, + "learning_rate": 2.5040650406504066e-05, + "loss": 0.2407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14165043830871582, + "step": 540, + "valid_targets_mean": 1637.6, + "valid_targets_min": 785 + }, + { + "epoch": 0.4436304436304436, + "grad_norm": 0.9131689808991684, + "learning_rate": 2.5272938443670153e-05, + "loss": 0.246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12118691951036453, + "step": 545, + "valid_targets_mean": 1969.9, + "valid_targets_min": 740 + }, + { + "epoch": 0.4477004477004477, + "grad_norm": 1.119476251101989, + "learning_rate": 2.550522648083624e-05, + "loss": 0.2414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11075526475906372, + "step": 550, + "valid_targets_mean": 1432.1, + "valid_targets_min": 711 + }, + { + "epoch": 0.4517704517704518, + "grad_norm": 0.9872821443353325, + "learning_rate": 2.5737514518002325e-05, + "loss": 0.2427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1334969699382782, + "step": 555, + "valid_targets_mean": 1695.6, + "valid_targets_min": 1122 + }, + { + "epoch": 0.45584045584045585, + "grad_norm": 1.1068786822143657, + "learning_rate": 2.596980255516841e-05, + "loss": 0.2403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12975133955478668, + "step": 560, + "valid_targets_mean": 1524.5, + "valid_targets_min": 722 + }, + { + "epoch": 0.4599104599104599, + "grad_norm": 1.006903711549231, + "learning_rate": 2.6202090592334494e-05, + "loss": 0.2268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10383662581443787, + "step": 565, + "valid_targets_mean": 1289.2, + "valid_targets_min": 661 + }, + { + "epoch": 0.463980463980464, + "grad_norm": 1.0929411309492179, + "learning_rate": 2.6434378629500584e-05, + "loss": 0.2331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11088858544826508, + "step": 570, + "valid_targets_mean": 1519.5, + "valid_targets_min": 1011 + }, + { + "epoch": 0.46805046805046807, + "grad_norm": 1.038541955016419, + "learning_rate": 2.6666666666666667e-05, + "loss": 0.2433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12774452567100525, + "step": 575, + "valid_targets_mean": 1731.4, + "valid_targets_min": 1277 + }, + { + "epoch": 0.47212047212047215, + "grad_norm": 1.012572853810221, + "learning_rate": 2.6898954703832756e-05, + "loss": 0.2307, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12323933839797974, + "step": 580, + "valid_targets_mean": 1486.1, + "valid_targets_min": 807 + }, + { + "epoch": 0.47619047619047616, + "grad_norm": 0.9487437622651357, + "learning_rate": 2.713124274099884e-05, + "loss": 0.2441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13745994865894318, + "step": 585, + "valid_targets_mean": 1549.9, + "valid_targets_min": 679 + }, + { + "epoch": 0.48026048026048024, + "grad_norm": 0.9664417051166028, + "learning_rate": 2.736353077816493e-05, + "loss": 0.239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11426427215337753, + "step": 590, + "valid_targets_mean": 1656.0, + "valid_targets_min": 1294 + }, + { + "epoch": 0.4843304843304843, + "grad_norm": 0.9917889070000425, + "learning_rate": 2.759581881533101e-05, + "loss": 0.2403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.108737051486969, + "step": 595, + "valid_targets_mean": 1368.8, + "valid_targets_min": 790 + }, + { + "epoch": 0.4884004884004884, + "grad_norm": 0.9927611626798195, + "learning_rate": 2.7828106852497098e-05, + "loss": 0.2358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1328611820936203, + "step": 600, + "valid_targets_mean": 1765.8, + "valid_targets_min": 592 + }, + { + "epoch": 0.49247049247049246, + "grad_norm": 0.9537846739982789, + "learning_rate": 2.8060394889663184e-05, + "loss": 0.2408, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12659400701522827, + "step": 605, + "valid_targets_mean": 1586.8, + "valid_targets_min": 662 + }, + { + "epoch": 0.49654049654049653, + "grad_norm": 1.0425594316679838, + "learning_rate": 2.829268292682927e-05, + "loss": 0.2454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11810390651226044, + "step": 610, + "valid_targets_mean": 1336.9, + "valid_targets_min": 622 + }, + { + "epoch": 0.5006105006105006, + "grad_norm": 0.9994638757290513, + "learning_rate": 2.8524970963995356e-05, + "loss": 0.2419, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07577566802501678, + "step": 615, + "valid_targets_mean": 1097.1, + "valid_targets_min": 625 + }, + { + "epoch": 0.5046805046805046, + "grad_norm": 0.9987546354296077, + "learning_rate": 2.8757259001161443e-05, + "loss": 0.2543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.095686174929142, + "step": 620, + "valid_targets_mean": 1484.9, + "valid_targets_min": 888 + }, + { + "epoch": 0.5087505087505088, + "grad_norm": 0.9799026275534664, + "learning_rate": 2.898954703832753e-05, + "loss": 0.2462, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12912192940711975, + "step": 625, + "valid_targets_mean": 1757.0, + "valid_targets_min": 635 + }, + { + "epoch": 0.5128205128205128, + "grad_norm": 1.1131113884891992, + "learning_rate": 2.9221835075493615e-05, + "loss": 0.2488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1408751755952835, + "step": 630, + "valid_targets_mean": 1476.1, + "valid_targets_min": 522 + }, + { + "epoch": 0.5168905168905169, + "grad_norm": 1.380784458630799, + "learning_rate": 2.9454123112659698e-05, + "loss": 0.2346, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10641973465681076, + "step": 635, + "valid_targets_mean": 1230.9, + "valid_targets_min": 697 + }, + { + "epoch": 0.5209605209605209, + "grad_norm": 0.9088580356736188, + "learning_rate": 2.9686411149825787e-05, + "loss": 0.2325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12748828530311584, + "step": 640, + "valid_targets_mean": 1671.0, + "valid_targets_min": 1010 + }, + { + "epoch": 0.525030525030525, + "grad_norm": 1.164121895920127, + "learning_rate": 2.991869918699187e-05, + "loss": 0.2378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11815395206212997, + "step": 645, + "valid_targets_mean": 1411.0, + "valid_targets_min": 769 + }, + { + "epoch": 0.5291005291005291, + "grad_norm": 0.8907723008997975, + "learning_rate": 3.015098722415796e-05, + "loss": 0.2399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10028189420700073, + "step": 650, + "valid_targets_mean": 1298.6, + "valid_targets_min": 745 + }, + { + "epoch": 0.5331705331705332, + "grad_norm": 1.0360817268903895, + "learning_rate": 3.0383275261324043e-05, + "loss": 0.2356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10057306289672852, + "step": 655, + "valid_targets_mean": 1118.9, + "valid_targets_min": 680 + }, + { + "epoch": 0.5372405372405372, + "grad_norm": 1.0170055972555234, + "learning_rate": 3.061556329849013e-05, + "loss": 0.2286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1395065188407898, + "step": 660, + "valid_targets_mean": 1905.9, + "valid_targets_min": 1347 + }, + { + "epoch": 0.5413105413105413, + "grad_norm": 0.9835868888525366, + "learning_rate": 3.084785133565622e-05, + "loss": 0.2448, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1703340709209442, + "step": 665, + "valid_targets_mean": 1636.0, + "valid_targets_min": 851 + }, + { + "epoch": 0.5453805453805454, + "grad_norm": 1.140427074478225, + "learning_rate": 3.10801393728223e-05, + "loss": 0.2303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10298797488212585, + "step": 670, + "valid_targets_mean": 1457.2, + "valid_targets_min": 903 + }, + { + "epoch": 0.5494505494505495, + "grad_norm": 0.9191202259828434, + "learning_rate": 3.131242740998839e-05, + "loss": 0.2275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12798643112182617, + "step": 675, + "valid_targets_mean": 1674.4, + "valid_targets_min": 1240 + }, + { + "epoch": 0.5535205535205535, + "grad_norm": 0.7293467596139723, + "learning_rate": 3.154471544715447e-05, + "loss": 0.2256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09391629695892334, + "step": 680, + "valid_targets_mean": 1697.1, + "valid_targets_min": 917 + }, + { + "epoch": 0.5575905575905576, + "grad_norm": 0.904279770364778, + "learning_rate": 3.177700348432056e-05, + "loss": 0.2122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09674836695194244, + "step": 685, + "valid_targets_mean": 1580.9, + "valid_targets_min": 930 + }, + { + "epoch": 0.5616605616605617, + "grad_norm": 0.914638142832871, + "learning_rate": 3.200929152148664e-05, + "loss": 0.2356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12186380475759506, + "step": 690, + "valid_targets_mean": 1655.1, + "valid_targets_min": 1227 + }, + { + "epoch": 0.5657305657305657, + "grad_norm": 0.980002165464352, + "learning_rate": 3.2241579558652736e-05, + "loss": 0.2299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11551667749881744, + "step": 695, + "valid_targets_mean": 1565.8, + "valid_targets_min": 870 + }, + { + "epoch": 0.5698005698005698, + "grad_norm": 1.0160528027914253, + "learning_rate": 3.2473867595818815e-05, + "loss": 0.234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11251623183488846, + "step": 700, + "valid_targets_mean": 1564.6, + "valid_targets_min": 822 + }, + { + "epoch": 0.5738705738705738, + "grad_norm": 0.874400875096317, + "learning_rate": 3.270615563298491e-05, + "loss": 0.2377, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10493709146976471, + "step": 705, + "valid_targets_mean": 1540.5, + "valid_targets_min": 1214 + }, + { + "epoch": 0.577940577940578, + "grad_norm": 1.0055632341013279, + "learning_rate": 3.293844367015099e-05, + "loss": 0.2264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10630016028881073, + "step": 710, + "valid_targets_mean": 1268.6, + "valid_targets_min": 591 + }, + { + "epoch": 0.582010582010582, + "grad_norm": 0.9150089746862734, + "learning_rate": 3.3170731707317074e-05, + "loss": 0.2416, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08800828456878662, + "step": 715, + "valid_targets_mean": 1232.5, + "valid_targets_min": 646 + }, + { + "epoch": 0.5860805860805861, + "grad_norm": 1.0300200490056817, + "learning_rate": 3.340301974448316e-05, + "loss": 0.2413, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12390526384115219, + "step": 720, + "valid_targets_mean": 3032.8, + "valid_targets_min": 2115 + }, + { + "epoch": 0.5901505901505901, + "grad_norm": 0.7910194368969308, + "learning_rate": 3.3635307781649246e-05, + "loss": 0.2256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10102251172065735, + "step": 725, + "valid_targets_mean": 2553.1, + "valid_targets_min": 324 + }, + { + "epoch": 0.5942205942205943, + "grad_norm": 0.6300733315613507, + "learning_rate": 3.386759581881533e-05, + "loss": 0.1743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08729104697704315, + "step": 730, + "valid_targets_mean": 2212.5, + "valid_targets_min": 982 + }, + { + "epoch": 0.5982905982905983, + "grad_norm": 0.5343519490234325, + "learning_rate": 3.409988385598142e-05, + "loss": 0.158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09156970679759979, + "step": 735, + "valid_targets_mean": 3619.4, + "valid_targets_min": 2479 + }, + { + "epoch": 0.6023606023606024, + "grad_norm": 0.5149220005414784, + "learning_rate": 3.4332171893147505e-05, + "loss": 0.1691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.058362703770399094, + "step": 740, + "valid_targets_mean": 3030.5, + "valid_targets_min": 811 + }, + { + "epoch": 0.6064306064306064, + "grad_norm": 0.548518746299089, + "learning_rate": 3.456445993031359e-05, + "loss": 0.1633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08780361711978912, + "step": 745, + "valid_targets_mean": 2922.1, + "valid_targets_min": 1431 + }, + { + "epoch": 0.6105006105006106, + "grad_norm": 0.9179436846133354, + "learning_rate": 3.479674796747968e-05, + "loss": 0.2208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17820069193840027, + "step": 750, + "valid_targets_mean": 1991.0, + "valid_targets_min": 703 + }, + { + "epoch": 0.6145706145706146, + "grad_norm": 0.49726406345055274, + "learning_rate": 3.5029036004645764e-05, + "loss": 0.1867, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0863165408372879, + "step": 755, + "valid_targets_mean": 3251.1, + "valid_targets_min": 621 + }, + { + "epoch": 0.6186406186406186, + "grad_norm": 0.5932447846887592, + "learning_rate": 3.526132404181185e-05, + "loss": 0.1687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08991111069917679, + "step": 760, + "valid_targets_mean": 2681.1, + "valid_targets_min": 846 + }, + { + "epoch": 0.6227106227106227, + "grad_norm": 0.6378496859580239, + "learning_rate": 3.5493612078977936e-05, + "loss": 0.2089, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10263900458812714, + "step": 765, + "valid_targets_mean": 3618.6, + "valid_targets_min": 1369 + }, + { + "epoch": 0.6267806267806267, + "grad_norm": 0.48515918366222455, + "learning_rate": 3.572590011614402e-05, + "loss": 0.125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07100746035575867, + "step": 770, + "valid_targets_mean": 2980.5, + "valid_targets_min": 754 + }, + { + "epoch": 0.6308506308506309, + "grad_norm": 0.5735235317908716, + "learning_rate": 3.595818815331011e-05, + "loss": 0.1468, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08798329532146454, + "step": 775, + "valid_targets_mean": 2610.8, + "valid_targets_min": 776 + }, + { + "epoch": 0.6349206349206349, + "grad_norm": 0.456776675223125, + "learning_rate": 3.6190476190476195e-05, + "loss": 0.1449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07948589324951172, + "step": 780, + "valid_targets_mean": 3132.1, + "valid_targets_min": 754 + }, + { + "epoch": 0.638990638990639, + "grad_norm": 0.9011326371623981, + "learning_rate": 3.642276422764228e-05, + "loss": 0.2117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2592279314994812, + "step": 785, + "valid_targets_mean": 2020.8, + "valid_targets_min": 664 + }, + { + "epoch": 0.643060643060643, + "grad_norm": 0.4732929096663725, + "learning_rate": 3.665505226480837e-05, + "loss": 0.1367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0800432413816452, + "step": 790, + "valid_targets_mean": 3713.8, + "valid_targets_min": 2542 + }, + { + "epoch": 0.6471306471306472, + "grad_norm": 0.526624007045652, + "learning_rate": 3.688734030197445e-05, + "loss": 0.1514, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09772545099258423, + "step": 795, + "valid_targets_mean": 3191.9, + "valid_targets_min": 1462 + }, + { + "epoch": 0.6512006512006512, + "grad_norm": 0.559506972012892, + "learning_rate": 3.711962833914054e-05, + "loss": 0.1341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09087392687797546, + "step": 800, + "valid_targets_mean": 3721.0, + "valid_targets_min": 3281 + }, + { + "epoch": 0.6552706552706553, + "grad_norm": 0.556479447905555, + "learning_rate": 3.7351916376306626e-05, + "loss": 0.1439, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07183747738599777, + "step": 805, + "valid_targets_mean": 3360.4, + "valid_targets_min": 2189 + }, + { + "epoch": 0.6593406593406593, + "grad_norm": 0.47771197862438014, + "learning_rate": 3.758420441347271e-05, + "loss": 0.1509, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06540735065937042, + "step": 810, + "valid_targets_mean": 3223.9, + "valid_targets_min": 1590 + }, + { + "epoch": 0.6634106634106635, + "grad_norm": 0.6194871689055971, + "learning_rate": 3.78164924506388e-05, + "loss": 0.1668, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09501364827156067, + "step": 815, + "valid_targets_mean": 2352.1, + "valid_targets_min": 597 + }, + { + "epoch": 0.6674806674806675, + "grad_norm": 0.5709548511328791, + "learning_rate": 3.804878048780488e-05, + "loss": 0.1586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07070401310920715, + "step": 820, + "valid_targets_mean": 2612.9, + "valid_targets_min": 701 + }, + { + "epoch": 0.6715506715506715, + "grad_norm": 0.6298911863782369, + "learning_rate": 3.828106852497097e-05, + "loss": 0.1973, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13569210469722748, + "step": 825, + "valid_targets_mean": 2791.8, + "valid_targets_min": 1652 + }, + { + "epoch": 0.6756206756206756, + "grad_norm": 0.41285244882235234, + "learning_rate": 3.851335656213705e-05, + "loss": 0.1546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06671032309532166, + "step": 830, + "valid_targets_mean": 3704.5, + "valid_targets_min": 1908 + }, + { + "epoch": 0.6796906796906796, + "grad_norm": 0.5540522932015873, + "learning_rate": 3.874564459930314e-05, + "loss": 0.1368, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05759810283780098, + "step": 835, + "valid_targets_mean": 3204.0, + "valid_targets_min": 842 + }, + { + "epoch": 0.6837606837606838, + "grad_norm": 0.5067086140456947, + "learning_rate": 3.897793263646922e-05, + "loss": 0.1998, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07755052298307419, + "step": 840, + "valid_targets_mean": 3402.9, + "valid_targets_min": 2552 + }, + { + "epoch": 0.6878306878306878, + "grad_norm": 0.4997196495634557, + "learning_rate": 3.9210220673635316e-05, + "loss": 0.1657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06302007287740707, + "step": 845, + "valid_targets_mean": 2979.9, + "valid_targets_min": 753 + }, + { + "epoch": 0.6919006919006919, + "grad_norm": 0.6481848237866301, + "learning_rate": 3.9442508710801395e-05, + "loss": 0.1583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08520719408988953, + "step": 850, + "valid_targets_mean": 1296.9, + "valid_targets_min": 568 + }, + { + "epoch": 0.6959706959706959, + "grad_norm": 0.6442175252045691, + "learning_rate": 3.967479674796748e-05, + "loss": 0.3765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15752309560775757, + "step": 855, + "valid_targets_mean": 3126.9, + "valid_targets_min": 1070 + }, + { + "epoch": 0.7000407000407001, + "grad_norm": 0.6674141205524147, + "learning_rate": 3.990708478513357e-05, + "loss": 0.157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07941246777772903, + "step": 860, + "valid_targets_mean": 2456.6, + "valid_targets_min": 329 + }, + { + "epoch": 0.7041107041107041, + "grad_norm": 0.9275941441191697, + "learning_rate": 3.999998518042097e-05, + "loss": 0.1578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09268821775913239, + "step": 865, + "valid_targets_mean": 1027.8, + "valid_targets_min": 608 + }, + { + "epoch": 0.7081807081807082, + "grad_norm": 0.5285806533784231, + "learning_rate": 3.99998946164064e-05, + "loss": 0.2591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09387123584747314, + "step": 870, + "valid_targets_mean": 4479.8, + "valid_targets_min": 2772 + }, + { + "epoch": 0.7122507122507122, + "grad_norm": 0.37957034276853, + "learning_rate": 3.99997217218491e-05, + "loss": 0.1232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03582998737692833, + "step": 875, + "valid_targets_mean": 4843.0, + "valid_targets_min": 3778 + }, + { + "epoch": 0.7163207163207164, + "grad_norm": 0.41394530709191624, + "learning_rate": 3.999946649746077e-05, + "loss": 0.1502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07335696369409561, + "step": 880, + "valid_targets_mean": 4306.0, + "valid_targets_min": 2336 + }, + { + "epoch": 0.7203907203907204, + "grad_norm": 0.4646047347434619, + "learning_rate": 3.999912894429209e-05, + "loss": 0.1473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09124352037906647, + "step": 885, + "valid_targets_mean": 3579.5, + "valid_targets_min": 846 + }, + { + "epoch": 0.7244607244607245, + "grad_norm": 0.44725991333845716, + "learning_rate": 3.999870906373257e-05, + "loss": 0.136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07628832757472992, + "step": 890, + "valid_targets_mean": 2909.1, + "valid_targets_min": 798 + }, + { + "epoch": 0.7285307285307285, + "grad_norm": 0.5172719098407891, + "learning_rate": 3.999820685751071e-05, + "loss": 0.1286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.057969748973846436, + "step": 895, + "valid_targets_mean": 2008.5, + "valid_targets_min": 533 + }, + { + "epoch": 0.7326007326007326, + "grad_norm": 0.5824331819625881, + "learning_rate": 3.9997622327693844e-05, + "loss": 0.1338, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06565313041210175, + "step": 900, + "valid_targets_mean": 2598.2, + "valid_targets_min": 833 + }, + { + "epoch": 0.7366707366707367, + "grad_norm": 0.3535344374037899, + "learning_rate": 3.999695547668823e-05, + "loss": 0.1257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06244448944926262, + "step": 905, + "valid_targets_mean": 4031.8, + "valid_targets_min": 3391 + }, + { + "epoch": 0.7407407407407407, + "grad_norm": 0.6239344756497878, + "learning_rate": 3.9996206307238974e-05, + "loss": 0.1553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15695582330226898, + "step": 910, + "valid_targets_mean": 1909.9, + "valid_targets_min": 732 + }, + { + "epoch": 0.7448107448107448, + "grad_norm": 0.5990548788322103, + "learning_rate": 3.9995374822430085e-05, + "loss": 0.1626, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06945429742336273, + "step": 915, + "valid_targets_mean": 1908.9, + "valid_targets_min": 538 + }, + { + "epoch": 0.7488807488807488, + "grad_norm": 0.7675363094142649, + "learning_rate": 3.999446102568441e-05, + "loss": 0.12, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06405394524335861, + "step": 920, + "valid_targets_mean": 824.9, + "valid_targets_min": 720 + }, + { + "epoch": 0.752950752950753, + "grad_norm": 0.485700948966615, + "learning_rate": 3.9993464920763625e-05, + "loss": 0.1248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.059826068580150604, + "step": 925, + "valid_targets_mean": 3181.1, + "valid_targets_min": 2328 + }, + { + "epoch": 0.757020757020757, + "grad_norm": 0.5240818086557136, + "learning_rate": 3.9992386511768256e-05, + "loss": 0.1205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0472230464220047, + "step": 930, + "valid_targets_mean": 2581.4, + "valid_targets_min": 684 + }, + { + "epoch": 0.7610907610907611, + "grad_norm": 0.45360997168418743, + "learning_rate": 3.999122580313763e-05, + "loss": 0.1932, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06742153316736221, + "step": 935, + "valid_targets_mean": 3461.2, + "valid_targets_min": 2795 + }, + { + "epoch": 0.7651607651607651, + "grad_norm": 0.44183845519885645, + "learning_rate": 3.998998279964985e-05, + "loss": 0.1417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06665977835655212, + "step": 940, + "valid_targets_mean": 3382.6, + "valid_targets_min": 744 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 0.3888042558436287, + "learning_rate": 3.99886575064218e-05, + "loss": 0.1605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04930161312222481, + "step": 945, + "valid_targets_mean": 3966.6, + "valid_targets_min": 3279 + }, + { + "epoch": 0.7733007733007733, + "grad_norm": 0.781091504085825, + "learning_rate": 3.9987249928909134e-05, + "loss": 0.1596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06562864780426025, + "step": 950, + "valid_targets_mean": 792.1, + "valid_targets_min": 536 + }, + { + "epoch": 0.7773707773707774, + "grad_norm": 0.5459088120181143, + "learning_rate": 3.998576007290619e-05, + "loss": 0.2026, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07265473157167435, + "step": 955, + "valid_targets_mean": 2227.4, + "valid_targets_min": 853 + }, + { + "epoch": 0.7814407814407814, + "grad_norm": 0.5220107941150061, + "learning_rate": 3.998418794454604e-05, + "loss": 0.1365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06951494514942169, + "step": 960, + "valid_targets_mean": 3105.6, + "valid_targets_min": 2621 + }, + { + "epoch": 0.7855107855107855, + "grad_norm": 0.8069948423800993, + "learning_rate": 3.998253355030043e-05, + "loss": 0.1811, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08019239455461502, + "step": 965, + "valid_targets_mean": 1028.0, + "valid_targets_min": 728 + }, + { + "epoch": 0.7895807895807896, + "grad_norm": 0.7429706122743682, + "learning_rate": 3.9980796896979754e-05, + "loss": 0.1414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08751674741506577, + "step": 970, + "valid_targets_mean": 1574.2, + "valid_targets_min": 618 + }, + { + "epoch": 0.7936507936507936, + "grad_norm": 0.39203442846152786, + "learning_rate": 3.997897799173304e-05, + "loss": 0.1386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08704735338687897, + "step": 975, + "valid_targets_mean": 4135.0, + "valid_targets_min": 907 + }, + { + "epoch": 0.7977207977207977, + "grad_norm": 0.5960348844477907, + "learning_rate": 3.9977076842047875e-05, + "loss": 0.1466, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11054828017950058, + "step": 980, + "valid_targets_mean": 3060.8, + "valid_targets_min": 580 + }, + { + "epoch": 0.8017908017908018, + "grad_norm": 0.4111968138383219, + "learning_rate": 3.997509345575045e-05, + "loss": 0.1165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05692315846681595, + "step": 985, + "valid_targets_mean": 3413.8, + "valid_targets_min": 726 + }, + { + "epoch": 0.8058608058608059, + "grad_norm": 0.4256293187737857, + "learning_rate": 3.997302784100548e-05, + "loss": 0.1143, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07128673046827316, + "step": 990, + "valid_targets_mean": 4560.9, + "valid_targets_min": 2613 + }, + { + "epoch": 0.8099308099308099, + "grad_norm": 0.5345662568540609, + "learning_rate": 3.9970880006316154e-05, + "loss": 0.13, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08130151033401489, + "step": 995, + "valid_targets_mean": 2847.2, + "valid_targets_min": 531 + }, + { + "epoch": 0.814000814000814, + "grad_norm": 0.4593774947539876, + "learning_rate": 3.996864996052416e-05, + "loss": 0.1566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06226513907313347, + "step": 1000, + "valid_targets_mean": 2571.5, + "valid_targets_min": 826 + }, + { + "epoch": 0.818070818070818, + "grad_norm": 0.4128348658429294, + "learning_rate": 3.996633771280956e-05, + "loss": 0.1443, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05955663323402405, + "step": 1005, + "valid_targets_mean": 3709.6, + "valid_targets_min": 3024 + }, + { + "epoch": 0.8221408221408222, + "grad_norm": 0.4342247430230778, + "learning_rate": 3.996394327269085e-05, + "loss": 0.1308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07043300569057465, + "step": 1010, + "valid_targets_mean": 3559.0, + "valid_targets_min": 2588 + }, + { + "epoch": 0.8262108262108262, + "grad_norm": 0.6288265395421371, + "learning_rate": 3.996146665002486e-05, + "loss": 0.1342, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08794619888067245, + "step": 1015, + "valid_targets_mean": 913.9, + "valid_targets_min": 502 + }, + { + "epoch": 0.8302808302808303, + "grad_norm": 0.42302660971720984, + "learning_rate": 3.995890785500673e-05, + "loss": 0.1468, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.061350464820861816, + "step": 1020, + "valid_targets_mean": 3301.9, + "valid_targets_min": 1158 + }, + { + "epoch": 0.8343508343508343, + "grad_norm": 0.3202899133836535, + "learning_rate": 3.995626689816986e-05, + "loss": 0.1109, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05292180925607681, + "step": 1025, + "valid_targets_mean": 3874.9, + "valid_targets_min": 3044 + }, + { + "epoch": 0.8384208384208384, + "grad_norm": 0.4395087698128267, + "learning_rate": 3.9953543790385885e-05, + "loss": 0.1466, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06124342978000641, + "step": 1030, + "valid_targets_mean": 2167.8, + "valid_targets_min": 539 + }, + { + "epoch": 0.8424908424908425, + "grad_norm": 0.45572335142795467, + "learning_rate": 3.99507385428646e-05, + "loss": 0.1409, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0583365373313427, + "step": 1035, + "valid_targets_mean": 2397.0, + "valid_targets_min": 387 + }, + { + "epoch": 0.8465608465608465, + "grad_norm": 0.6042755009543974, + "learning_rate": 3.994785116715395e-05, + "loss": 0.1498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08790750801563263, + "step": 1040, + "valid_targets_mean": 2340.9, + "valid_targets_min": 872 + }, + { + "epoch": 0.8506308506308506, + "grad_norm": 0.4360163793065081, + "learning_rate": 3.9944881675139956e-05, + "loss": 0.1384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.052534107118844986, + "step": 1045, + "valid_targets_mean": 3017.2, + "valid_targets_min": 822 + }, + { + "epoch": 0.8547008547008547, + "grad_norm": 0.7915374414247746, + "learning_rate": 3.9941830079046686e-05, + "loss": 0.149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10059764981269836, + "step": 1050, + "valid_targets_mean": 1405.9, + "valid_targets_min": 605 + }, + { + "epoch": 0.8587708587708588, + "grad_norm": 0.5768502808481547, + "learning_rate": 3.9938696391436165e-05, + "loss": 0.1304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08434294909238815, + "step": 1055, + "valid_targets_mean": 2249.2, + "valid_targets_min": 808 + }, + { + "epoch": 0.8628408628408628, + "grad_norm": 0.44611255063891225, + "learning_rate": 3.993548062520839e-05, + "loss": 0.1566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06437411904335022, + "step": 1060, + "valid_targets_mean": 2900.5, + "valid_targets_min": 1010 + }, + { + "epoch": 0.8669108669108669, + "grad_norm": 0.40352294092211805, + "learning_rate": 3.9932182793601216e-05, + "loss": 0.1292, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.047206662595272064, + "step": 1065, + "valid_targets_mean": 2432.6, + "valid_targets_min": 524 + }, + { + "epoch": 0.870980870980871, + "grad_norm": 0.48655652159394597, + "learning_rate": 3.992880291019032e-05, + "loss": 0.1236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04721086844801903, + "step": 1070, + "valid_targets_mean": 1303.5, + "valid_targets_min": 516 + }, + { + "epoch": 0.8750508750508751, + "grad_norm": 0.5368186314445392, + "learning_rate": 3.992534098888916e-05, + "loss": 0.1446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06309214979410172, + "step": 1075, + "valid_targets_mean": 1762.1, + "valid_targets_min": 342 + }, + { + "epoch": 0.8791208791208791, + "grad_norm": 0.3615301144651535, + "learning_rate": 3.992179704394891e-05, + "loss": 0.1391, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.061992619186639786, + "step": 1080, + "valid_targets_mean": 3301.8, + "valid_targets_min": 1440 + }, + { + "epoch": 0.8831908831908832, + "grad_norm": 0.47684964360761173, + "learning_rate": 3.991817108995838e-05, + "loss": 0.1145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.053332049399614334, + "step": 1085, + "valid_targets_mean": 2274.2, + "valid_targets_min": 708 + }, + { + "epoch": 0.8872608872608873, + "grad_norm": 0.3277156842140217, + "learning_rate": 3.9914463141844e-05, + "loss": 0.1241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.058171793818473816, + "step": 1090, + "valid_targets_mean": 5103.1, + "valid_targets_min": 3653 + }, + { + "epoch": 0.8913308913308914, + "grad_norm": 0.37913532995319227, + "learning_rate": 3.991067321486971e-05, + "loss": 0.1207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04157901927828789, + "step": 1095, + "valid_targets_mean": 2366.5, + "valid_targets_min": 684 + }, + { + "epoch": 0.8954008954008954, + "grad_norm": 0.44728273843652605, + "learning_rate": 3.990680132463694e-05, + "loss": 0.1065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06624357402324677, + "step": 1100, + "valid_targets_mean": 3352.1, + "valid_targets_min": 726 + }, + { + "epoch": 0.8994708994708994, + "grad_norm": 0.3699276991369879, + "learning_rate": 3.99028474870845e-05, + "loss": 0.1372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06432875990867615, + "step": 1105, + "valid_targets_mean": 4009.6, + "valid_targets_min": 3076 + }, + { + "epoch": 0.9035409035409036, + "grad_norm": 0.559438917402308, + "learning_rate": 3.989881171848857e-05, + "loss": 0.1463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19042916595935822, + "step": 1110, + "valid_targets_mean": 2811.9, + "valid_targets_min": 1536 + }, + { + "epoch": 0.9076109076109076, + "grad_norm": 0.4455049575824879, + "learning_rate": 3.989469403546258e-05, + "loss": 0.1275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07045942544937134, + "step": 1115, + "valid_targets_mean": 2669.4, + "valid_targets_min": 582 + }, + { + "epoch": 0.9116809116809117, + "grad_norm": 0.35606075856961944, + "learning_rate": 3.989049445495718e-05, + "loss": 0.1334, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05114404112100601, + "step": 1120, + "valid_targets_mean": 3261.9, + "valid_targets_min": 633 + }, + { + "epoch": 0.9157509157509157, + "grad_norm": 0.37757857329286704, + "learning_rate": 3.9886212994260146e-05, + "loss": 0.1585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04709208756685257, + "step": 1125, + "valid_targets_mean": 3287.2, + "valid_targets_min": 1130 + }, + { + "epoch": 0.9198209198209198, + "grad_norm": 0.40732084354821907, + "learning_rate": 3.98818496709963e-05, + "loss": 0.1207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06523814797401428, + "step": 1130, + "valid_targets_mean": 2858.9, + "valid_targets_min": 492 + }, + { + "epoch": 0.9238909238909239, + "grad_norm": 0.7103621320340062, + "learning_rate": 3.987740450312751e-05, + "loss": 0.1378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08393198251724243, + "step": 1135, + "valid_targets_mean": 2531.5, + "valid_targets_min": 1045 + }, + { + "epoch": 0.927960927960928, + "grad_norm": 0.38515151498760647, + "learning_rate": 3.9872877508952506e-05, + "loss": 0.1224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04183115065097809, + "step": 1140, + "valid_targets_mean": 2692.0, + "valid_targets_min": 647 + }, + { + "epoch": 0.932030932030932, + "grad_norm": 0.6540079458171796, + "learning_rate": 3.9868268707106884e-05, + "loss": 0.1961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13763931393623352, + "step": 1145, + "valid_targets_mean": 1831.5, + "valid_targets_min": 806 + }, + { + "epoch": 0.9361009361009361, + "grad_norm": 0.6012472482945602, + "learning_rate": 3.9863578116563e-05, + "loss": 0.1604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06246567517518997, + "step": 1150, + "valid_targets_mean": 1300.1, + "valid_targets_min": 575 + }, + { + "epoch": 0.9401709401709402, + "grad_norm": 0.44241303636120555, + "learning_rate": 3.9858805756629906e-05, + "loss": 0.1246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.057974182069301605, + "step": 1155, + "valid_targets_mean": 3615.9, + "valid_targets_min": 3193 + }, + { + "epoch": 0.9442409442409443, + "grad_norm": 0.3903708665427787, + "learning_rate": 3.985395164695324e-05, + "loss": 0.1287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05488427355885506, + "step": 1160, + "valid_targets_mean": 3944.1, + "valid_targets_min": 3433 + }, + { + "epoch": 0.9483109483109483, + "grad_norm": 0.39685042835856477, + "learning_rate": 3.98490158075152e-05, + "loss": 0.1271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05394378677010536, + "step": 1165, + "valid_targets_mean": 2874.5, + "valid_targets_min": 767 + }, + { + "epoch": 0.9523809523809523, + "grad_norm": 0.4929139704427634, + "learning_rate": 3.9843998258634397e-05, + "loss": 0.119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0530795156955719, + "step": 1170, + "valid_targets_mean": 2262.1, + "valid_targets_min": 785 + }, + { + "epoch": 0.9564509564509565, + "grad_norm": 0.7315882847416906, + "learning_rate": 3.983889902096582e-05, + "loss": 0.1631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08292382210493088, + "step": 1175, + "valid_targets_mean": 1222.8, + "valid_targets_min": 758 + }, + { + "epoch": 0.9605209605209605, + "grad_norm": 0.4497078672225934, + "learning_rate": 3.9833718115500735e-05, + "loss": 0.1105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.057786524295806885, + "step": 1180, + "valid_targets_mean": 2184.0, + "valid_targets_min": 797 + }, + { + "epoch": 0.9645909645909646, + "grad_norm": 0.37472458302150724, + "learning_rate": 3.9828455563566585e-05, + "loss": 0.1212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.058492764830589294, + "step": 1185, + "valid_targets_mean": 3445.0, + "valid_targets_min": 1606 + }, + { + "epoch": 0.9686609686609686, + "grad_norm": 0.49178605406882114, + "learning_rate": 3.982311138682693e-05, + "loss": 0.1299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07257603108882904, + "step": 1190, + "valid_targets_mean": 2760.0, + "valid_targets_min": 678 + }, + { + "epoch": 0.9727309727309728, + "grad_norm": 0.30926831647178493, + "learning_rate": 3.981768560728132e-05, + "loss": 0.1272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06540581583976746, + "step": 1195, + "valid_targets_mean": 5550.0, + "valid_targets_min": 3997 + }, + { + "epoch": 0.9768009768009768, + "grad_norm": 0.3480919181704012, + "learning_rate": 3.981217824726525e-05, + "loss": 0.113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.035894401371479034, + "step": 1200, + "valid_targets_mean": 2142.1, + "valid_targets_min": 706 + }, + { + "epoch": 0.9808709808709809, + "grad_norm": 0.6953856332846773, + "learning_rate": 3.9806589329450045e-05, + "loss": 0.1299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09473567456007004, + "step": 1205, + "valid_targets_mean": 1705.0, + "valid_targets_min": 580 + }, + { + "epoch": 0.9849409849409849, + "grad_norm": 0.6447718942126139, + "learning_rate": 3.980091887684274e-05, + "loss": 0.1484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11416690051555634, + "step": 1210, + "valid_targets_mean": 2792.0, + "valid_targets_min": 2379 + }, + { + "epoch": 0.989010989010989, + "grad_norm": 0.44493565718387146, + "learning_rate": 3.979516691278605e-05, + "loss": 0.2021, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05794357508420944, + "step": 1215, + "valid_targets_mean": 3489.0, + "valid_targets_min": 2861 + }, + { + "epoch": 0.9930809930809931, + "grad_norm": 0.3554690075057992, + "learning_rate": 3.9789333460958195e-05, + "loss": 0.1248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06033288687467575, + "step": 1220, + "valid_targets_mean": 3850.4, + "valid_targets_min": 2453 + }, + { + "epoch": 0.9971509971509972, + "grad_norm": 0.39798751935774657, + "learning_rate": 3.978341854537288e-05, + "loss": 0.1202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06392422318458557, + "step": 1225, + "valid_targets_mean": 3932.9, + "valid_targets_min": 3389 + }, + { + "epoch": 1.0008140008140007, + "grad_norm": 0.5953058538336475, + "learning_rate": 3.977742219037914e-05, + "loss": 0.1898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14234861731529236, + "step": 1230, + "valid_targets_mean": 8215.2, + "valid_targets_min": 6163 + }, + { + "epoch": 1.0048840048840049, + "grad_norm": 0.4684299696968115, + "learning_rate": 3.9771344420661265e-05, + "loss": 0.2882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.144585520029068, + "step": 1235, + "valid_targets_mean": 8594.4, + "valid_targets_min": 6006 + }, + { + "epoch": 1.008954008954009, + "grad_norm": 0.6867925767436964, + "learning_rate": 3.9765185261238685e-05, + "loss": 0.28, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1256946623325348, + "step": 1240, + "valid_targets_mean": 3578.4, + "valid_targets_min": 289 + }, + { + "epoch": 1.0130240130240131, + "grad_norm": 0.42999406091039816, + "learning_rate": 3.9758944737465885e-05, + "loss": 0.2688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13307125866413116, + "step": 1245, + "valid_targets_mean": 7019.9, + "valid_targets_min": 4985 + }, + { + "epoch": 1.017094017094017, + "grad_norm": 0.43330214352879454, + "learning_rate": 3.9752622875032275e-05, + "loss": 0.2461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12875577807426453, + "step": 1250, + "valid_targets_mean": 6956.6, + "valid_targets_min": 1457 + }, + { + "epoch": 1.0211640211640212, + "grad_norm": 0.4137992292045322, + "learning_rate": 3.974621969996213e-05, + "loss": 0.253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12030914425849915, + "step": 1255, + "valid_targets_mean": 7361.6, + "valid_targets_min": 4961 + }, + { + "epoch": 1.0252340252340253, + "grad_norm": 0.5022158686145597, + "learning_rate": 3.973973523861442e-05, + "loss": 0.269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15889611840248108, + "step": 1260, + "valid_targets_mean": 7058.5, + "valid_targets_min": 5985 + }, + { + "epoch": 1.0293040293040292, + "grad_norm": 0.39049657893691275, + "learning_rate": 3.973316951768275e-05, + "loss": 0.2651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14278875291347504, + "step": 1265, + "valid_targets_mean": 7664.2, + "valid_targets_min": 5169 + }, + { + "epoch": 1.0333740333740333, + "grad_norm": 0.42504045855242684, + "learning_rate": 3.972652256419522e-05, + "loss": 0.2518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12719795107841492, + "step": 1270, + "valid_targets_mean": 7111.8, + "valid_targets_min": 5348 + }, + { + "epoch": 1.0374440374440375, + "grad_norm": 0.4952882040537086, + "learning_rate": 3.971979440551436e-05, + "loss": 0.2482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11728663742542267, + "step": 1275, + "valid_targets_mean": 4238.9, + "valid_targets_min": 152 + }, + { + "epoch": 1.0415140415140416, + "grad_norm": 0.4181601879292703, + "learning_rate": 3.9712985069336955e-05, + "loss": 0.2433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12817908823490143, + "step": 1280, + "valid_targets_mean": 7199.4, + "valid_targets_min": 4258 + }, + { + "epoch": 1.0455840455840455, + "grad_norm": 0.48722985192181184, + "learning_rate": 3.9706094583693954e-05, + "loss": 0.2459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1204463317990303, + "step": 1285, + "valid_targets_mean": 7243.4, + "valid_targets_min": 5831 + }, + { + "epoch": 1.0496540496540496, + "grad_norm": 0.4108207703430436, + "learning_rate": 3.96991229769504e-05, + "loss": 0.2421, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1202164739370346, + "step": 1290, + "valid_targets_mean": 6950.6, + "valid_targets_min": 3862 + }, + { + "epoch": 1.0537240537240538, + "grad_norm": 0.40991192760617895, + "learning_rate": 3.969207027780524e-05, + "loss": 0.239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1293250173330307, + "step": 1295, + "valid_targets_mean": 6991.9, + "valid_targets_min": 5696 + }, + { + "epoch": 1.0577940577940579, + "grad_norm": 0.42826333297089153, + "learning_rate": 3.968493651529126e-05, + "loss": 0.2323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1293313354253769, + "step": 1300, + "valid_targets_mean": 6963.5, + "valid_targets_min": 4947 + }, + { + "epoch": 1.0618640618640618, + "grad_norm": 0.39443414280580313, + "learning_rate": 3.967772171877494e-05, + "loss": 0.2361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11275345087051392, + "step": 1305, + "valid_targets_mean": 6436.9, + "valid_targets_min": 4988 + }, + { + "epoch": 1.065934065934066, + "grad_norm": 0.4373139454687315, + "learning_rate": 3.967042591795634e-05, + "loss": 0.2394, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11951905488967896, + "step": 1310, + "valid_targets_mean": 7518.0, + "valid_targets_min": 5476 + }, + { + "epoch": 1.07000407000407, + "grad_norm": 0.38657017917234543, + "learning_rate": 3.966304914286898e-05, + "loss": 0.2313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10981670022010803, + "step": 1315, + "valid_targets_mean": 7422.6, + "valid_targets_min": 5226 + }, + { + "epoch": 1.074074074074074, + "grad_norm": 0.46603622908971615, + "learning_rate": 3.965559142387972e-05, + "loss": 0.2526, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15436352789402008, + "step": 1320, + "valid_targets_mean": 7080.0, + "valid_targets_min": 3753 + }, + { + "epoch": 1.078144078144078, + "grad_norm": 0.5089544183960755, + "learning_rate": 3.964805279168862e-05, + "loss": 0.2634, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13544388115406036, + "step": 1325, + "valid_targets_mean": 6064.6, + "valid_targets_min": 5161 + }, + { + "epoch": 1.0822140822140822, + "grad_norm": 0.4213050484999416, + "learning_rate": 3.9640433277328824e-05, + "loss": 0.2211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11462973058223724, + "step": 1330, + "valid_targets_mean": 7388.1, + "valid_targets_min": 5795 + }, + { + "epoch": 1.0862840862840863, + "grad_norm": 0.3819112265649167, + "learning_rate": 3.963273291216645e-05, + "loss": 0.2253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10795051604509354, + "step": 1335, + "valid_targets_mean": 7696.0, + "valid_targets_min": 4658 + }, + { + "epoch": 1.0903540903540903, + "grad_norm": 0.4114432948340316, + "learning_rate": 3.9624951727900406e-05, + "loss": 0.2316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12693330645561218, + "step": 1340, + "valid_targets_mean": 7799.1, + "valid_targets_min": 5508 + }, + { + "epoch": 1.0944240944240944, + "grad_norm": 0.4056233219924477, + "learning_rate": 3.9617089756562334e-05, + "loss": 0.2435, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11809612810611725, + "step": 1345, + "valid_targets_mean": 7341.6, + "valid_targets_min": 6454 + }, + { + "epoch": 1.0984940984940985, + "grad_norm": 0.4188231753878829, + "learning_rate": 3.960914703051642e-05, + "loss": 0.234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10444575548171997, + "step": 1350, + "valid_targets_mean": 6818.0, + "valid_targets_min": 4392 + }, + { + "epoch": 1.1025641025641026, + "grad_norm": 0.3886197018701831, + "learning_rate": 3.960112358245927e-05, + "loss": 0.2331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10350356996059418, + "step": 1355, + "valid_targets_mean": 7857.6, + "valid_targets_min": 5394 + }, + { + "epoch": 1.1066341066341066, + "grad_norm": 0.35882023777430566, + "learning_rate": 3.9593019445419814e-05, + "loss": 0.205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09432516247034073, + "step": 1360, + "valid_targets_mean": 8314.1, + "valid_targets_min": 4837 + }, + { + "epoch": 1.1107041107041107, + "grad_norm": 0.3392184288683715, + "learning_rate": 3.9584834652759124e-05, + "loss": 0.2068, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08807634562253952, + "step": 1365, + "valid_targets_mean": 7368.2, + "valid_targets_min": 6027 + }, + { + "epoch": 1.1147741147741148, + "grad_norm": 0.4323967342653223, + "learning_rate": 3.95765692381703e-05, + "loss": 0.2166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11778441816568375, + "step": 1370, + "valid_targets_mean": 6435.0, + "valid_targets_min": 4238 + }, + { + "epoch": 1.118844118844119, + "grad_norm": 1.7395054695099226, + "learning_rate": 3.956822323567832e-05, + "loss": 0.2381, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10033954679965973, + "step": 1375, + "valid_targets_mean": 247.8, + "valid_targets_min": 137 + }, + { + "epoch": 1.1229141229141228, + "grad_norm": 0.42995283027411474, + "learning_rate": 3.9559796679639914e-05, + "loss": 0.2437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11196690052747726, + "step": 1380, + "valid_targets_mean": 6807.0, + "valid_targets_min": 5165 + }, + { + "epoch": 1.126984126984127, + "grad_norm": 0.4579494248210466, + "learning_rate": 3.955128960474341e-05, + "loss": 0.2476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1293865293264389, + "step": 1385, + "valid_targets_mean": 6989.9, + "valid_targets_min": 5104 + }, + { + "epoch": 1.131054131054131, + "grad_norm": 0.41699725958422584, + "learning_rate": 3.95427020460086e-05, + "loss": 0.2457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13233298063278198, + "step": 1390, + "valid_targets_mean": 6692.2, + "valid_targets_min": 5068 + }, + { + "epoch": 1.1351241351241352, + "grad_norm": 0.4309390492887969, + "learning_rate": 3.953403403878659e-05, + "loss": 0.2386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1428421139717102, + "step": 1395, + "valid_targets_mean": 7419.6, + "valid_targets_min": 5578 + }, + { + "epoch": 1.1391941391941391, + "grad_norm": 0.3957195172464458, + "learning_rate": 3.952528561875966e-05, + "loss": 0.2278, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11624685674905777, + "step": 1400, + "valid_targets_mean": 7536.5, + "valid_targets_min": 5075 + }, + { + "epoch": 1.1432641432641433, + "grad_norm": 0.4039821590351794, + "learning_rate": 3.951645682194109e-05, + "loss": 0.2366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1071617528796196, + "step": 1405, + "valid_targets_mean": 7221.5, + "valid_targets_min": 5841 + }, + { + "epoch": 1.1473341473341474, + "grad_norm": 0.41845015880407643, + "learning_rate": 3.950754768467506e-05, + "loss": 0.23, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11402362585067749, + "step": 1410, + "valid_targets_mean": 7287.5, + "valid_targets_min": 5151 + }, + { + "epoch": 1.1514041514041513, + "grad_norm": 0.40797870681631443, + "learning_rate": 3.949855824363647e-05, + "loss": 0.2405, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11144113540649414, + "step": 1415, + "valid_targets_mean": 6738.4, + "valid_targets_min": 4042 + }, + { + "epoch": 1.1554741554741554, + "grad_norm": 0.43409193061927426, + "learning_rate": 3.948948853583081e-05, + "loss": 0.2307, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12913505733013153, + "step": 1420, + "valid_targets_mean": 6331.0, + "valid_targets_min": 5496 + }, + { + "epoch": 1.1595441595441596, + "grad_norm": 0.43403376610064975, + "learning_rate": 3.9480338598593955e-05, + "loss": 0.2347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11496134102344513, + "step": 1425, + "valid_targets_mean": 6271.1, + "valid_targets_min": 4695 + }, + { + "epoch": 1.1636141636141637, + "grad_norm": 0.45486378077738254, + "learning_rate": 3.947110846959207e-05, + "loss": 0.2352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12110753357410431, + "step": 1430, + "valid_targets_mean": 5790.8, + "valid_targets_min": 5021 + }, + { + "epoch": 1.1676841676841676, + "grad_norm": 0.4865911390888994, + "learning_rate": 3.9461798186821434e-05, + "loss": 0.2264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09251779317855835, + "step": 1435, + "valid_targets_mean": 3386.8, + "valid_targets_min": 2279 + }, + { + "epoch": 1.1717541717541717, + "grad_norm": 0.45904203833658064, + "learning_rate": 3.9452407788608275e-05, + "loss": 0.2263, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11358363926410675, + "step": 1440, + "valid_targets_mean": 6240.5, + "valid_targets_min": 5093 + }, + { + "epoch": 1.1758241758241759, + "grad_norm": 0.46020651070927404, + "learning_rate": 3.9442937313608646e-05, + "loss": 0.2275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11856567859649658, + "step": 1445, + "valid_targets_mean": 6329.5, + "valid_targets_min": 4589 + }, + { + "epoch": 1.17989417989418, + "grad_norm": 0.3983320494029986, + "learning_rate": 3.94333868008082e-05, + "loss": 0.2265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12220582365989685, + "step": 1450, + "valid_targets_mean": 7589.2, + "valid_targets_min": 4963 + }, + { + "epoch": 1.183964183964184, + "grad_norm": 0.4581307530062702, + "learning_rate": 3.94237562895221e-05, + "loss": 0.2273, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09797120094299316, + "step": 1455, + "valid_targets_mean": 5577.9, + "valid_targets_min": 4458 + }, + { + "epoch": 1.188034188034188, + "grad_norm": 0.5075652321634042, + "learning_rate": 3.941404581939481e-05, + "loss": 0.2259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10852977633476257, + "step": 1460, + "valid_targets_mean": 5432.5, + "valid_targets_min": 4433 + }, + { + "epoch": 1.1921041921041922, + "grad_norm": 0.6232838095901682, + "learning_rate": 3.940425543039996e-05, + "loss": 0.2301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11723101139068604, + "step": 1465, + "valid_targets_mean": 6361.8, + "valid_targets_min": 5201 + }, + { + "epoch": 1.196174196174196, + "grad_norm": 0.42438428970084924, + "learning_rate": 3.939438516284015e-05, + "loss": 0.2312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10583050549030304, + "step": 1470, + "valid_targets_mean": 5824.4, + "valid_targets_min": 5129 + }, + { + "epoch": 1.2002442002442002, + "grad_norm": 0.8552840375409755, + "learning_rate": 3.938443505734684e-05, + "loss": 0.2043, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1607503890991211, + "step": 1475, + "valid_targets_mean": 1637.6, + "valid_targets_min": 137 + }, + { + "epoch": 1.2043142043142043, + "grad_norm": 0.4282729691315241, + "learning_rate": 3.9374405154880104e-05, + "loss": 0.2251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11325763911008835, + "step": 1480, + "valid_targets_mean": 6474.6, + "valid_targets_min": 4751 + }, + { + "epoch": 1.2083842083842085, + "grad_norm": 0.45919849824696074, + "learning_rate": 3.9364295496728545e-05, + "loss": 0.2223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1222546175122261, + "step": 1485, + "valid_targets_mean": 7186.2, + "valid_targets_min": 5358 + }, + { + "epoch": 1.2124542124542124, + "grad_norm": 0.41905751280920195, + "learning_rate": 3.9354106124509045e-05, + "loss": 0.2288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11492247879505157, + "step": 1490, + "valid_targets_mean": 7126.8, + "valid_targets_min": 4078 + }, + { + "epoch": 1.2165242165242165, + "grad_norm": 0.4228687168767464, + "learning_rate": 3.934383708016667e-05, + "loss": 0.2203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11097435653209686, + "step": 1495, + "valid_targets_mean": 7022.1, + "valid_targets_min": 4940 + }, + { + "epoch": 1.2205942205942206, + "grad_norm": 0.42085225391276915, + "learning_rate": 3.9333488405974434e-05, + "loss": 0.2153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11520600318908691, + "step": 1500, + "valid_targets_mean": 5900.6, + "valid_targets_min": 4779 + }, + { + "epoch": 1.2246642246642248, + "grad_norm": 0.4633839046298897, + "learning_rate": 3.932306014453315e-05, + "loss": 0.2254, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11590239405632019, + "step": 1505, + "valid_targets_mean": 6290.1, + "valid_targets_min": 4620 + }, + { + "epoch": 1.2287342287342287, + "grad_norm": 0.9660012618752344, + "learning_rate": 3.9312552338771284e-05, + "loss": 0.2534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10209794342517853, + "step": 1510, + "valid_targets_mean": 1162.2, + "valid_targets_min": 788 + }, + { + "epoch": 1.2328042328042328, + "grad_norm": 0.8959866942571094, + "learning_rate": 3.9301965031944724e-05, + "loss": 0.2481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11823749542236328, + "step": 1515, + "valid_targets_mean": 1500.6, + "valid_targets_min": 957 + }, + { + "epoch": 1.236874236874237, + "grad_norm": 0.92051115161302, + "learning_rate": 3.929129826763663e-05, + "loss": 0.2339, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11000481992959976, + "step": 1520, + "valid_targets_mean": 1371.2, + "valid_targets_min": 796 + }, + { + "epoch": 1.2409442409442408, + "grad_norm": 0.8905552213698138, + "learning_rate": 3.928055208975726e-05, + "loss": 0.2341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13280996680259705, + "step": 1525, + "valid_targets_mean": 1433.6, + "valid_targets_min": 697 + }, + { + "epoch": 1.245014245014245, + "grad_norm": 0.7573822137035331, + "learning_rate": 3.926972654254379e-05, + "loss": 0.2295, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10896831750869751, + "step": 1530, + "valid_targets_mean": 1578.9, + "valid_targets_min": 638 + }, + { + "epoch": 1.249084249084249, + "grad_norm": 0.7786863011930503, + "learning_rate": 3.92588216705601e-05, + "loss": 0.2278, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11522305011749268, + "step": 1535, + "valid_targets_mean": 1746.4, + "valid_targets_min": 727 + }, + { + "epoch": 1.2531542531542532, + "grad_norm": 0.7938573989349429, + "learning_rate": 3.924783751869663e-05, + "loss": 0.2354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09873868525028229, + "step": 1540, + "valid_targets_mean": 1309.9, + "valid_targets_min": 761 + }, + { + "epoch": 1.2572242572242573, + "grad_norm": 0.9426871570045071, + "learning_rate": 3.923677413217019e-05, + "loss": 0.2181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13097123801708221, + "step": 1545, + "valid_targets_mean": 1368.6, + "valid_targets_min": 714 + }, + { + "epoch": 1.2612942612942613, + "grad_norm": 0.8036694210964767, + "learning_rate": 3.9225631556523744e-05, + "loss": 0.2371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11239443719387054, + "step": 1550, + "valid_targets_mean": 1491.8, + "valid_targets_min": 867 + }, + { + "epoch": 1.2653642653642654, + "grad_norm": 0.8084037583756437, + "learning_rate": 3.921440983762624e-05, + "loss": 0.2265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12547224760055542, + "step": 1555, + "valid_targets_mean": 1994.0, + "valid_targets_min": 790 + }, + { + "epoch": 1.2694342694342695, + "grad_norm": 0.7784763220071113, + "learning_rate": 3.920310902167245e-05, + "loss": 0.2192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11351943016052246, + "step": 1560, + "valid_targets_mean": 1523.4, + "valid_targets_min": 896 + }, + { + "epoch": 1.2735042735042734, + "grad_norm": 0.7880339591266707, + "learning_rate": 3.919172915518271e-05, + "loss": 0.2321, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11668528616428375, + "step": 1565, + "valid_targets_mean": 1790.1, + "valid_targets_min": 1009 + }, + { + "epoch": 1.2775742775742776, + "grad_norm": 0.7920585958427674, + "learning_rate": 3.918027028500282e-05, + "loss": 0.2223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0921139121055603, + "step": 1570, + "valid_targets_mean": 1394.0, + "valid_targets_min": 717 + }, + { + "epoch": 1.2816442816442817, + "grad_norm": 0.925447932695446, + "learning_rate": 3.916873245830376e-05, + "loss": 0.2118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08572347462177277, + "step": 1575, + "valid_targets_mean": 1129.0, + "valid_targets_min": 659 + }, + { + "epoch": 1.2857142857142856, + "grad_norm": 0.9814126638216883, + "learning_rate": 3.915711572258157e-05, + "loss": 0.21, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10210913419723511, + "step": 1580, + "valid_targets_mean": 1466.9, + "valid_targets_min": 642 + }, + { + "epoch": 1.2897842897842897, + "grad_norm": 0.7835577295777144, + "learning_rate": 3.914542012565711e-05, + "loss": 0.2195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07693362236022949, + "step": 1585, + "valid_targets_mean": 1129.4, + "valid_targets_min": 765 + }, + { + "epoch": 1.2938542938542938, + "grad_norm": 0.9086732320776554, + "learning_rate": 3.913364571567586e-05, + "loss": 0.2301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10597139596939087, + "step": 1590, + "valid_targets_mean": 1322.8, + "valid_targets_min": 631 + }, + { + "epoch": 1.297924297924298, + "grad_norm": 0.9559891752335423, + "learning_rate": 3.912179254110777e-05, + "loss": 0.2298, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1286805272102356, + "step": 1595, + "valid_targets_mean": 1612.0, + "valid_targets_min": 623 + }, + { + "epoch": 1.301994301994302, + "grad_norm": 0.7298047621289229, + "learning_rate": 3.9109860650747e-05, + "loss": 0.222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08292436599731445, + "step": 1600, + "valid_targets_mean": 1402.1, + "valid_targets_min": 940 + }, + { + "epoch": 1.306064306064306, + "grad_norm": 0.712092191185825, + "learning_rate": 3.9097850093711775e-05, + "loss": 0.2224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09930809587240219, + "step": 1605, + "valid_targets_mean": 1337.8, + "valid_targets_min": 776 + }, + { + "epoch": 1.3101343101343101, + "grad_norm": 0.8337686772674006, + "learning_rate": 3.908576091944412e-05, + "loss": 0.2199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12775862216949463, + "step": 1610, + "valid_targets_mean": 1525.5, + "valid_targets_min": 623 + }, + { + "epoch": 1.3142043142043143, + "grad_norm": 0.8033816144064492, + "learning_rate": 3.907359317770973e-05, + "loss": 0.226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12140291929244995, + "step": 1615, + "valid_targets_mean": 1441.0, + "valid_targets_min": 693 + }, + { + "epoch": 1.3182743182743182, + "grad_norm": 0.8666725972794019, + "learning_rate": 3.90613469185977e-05, + "loss": 0.22, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06115573272109032, + "step": 1620, + "valid_targets_mean": 847.9, + "valid_targets_min": 611 + }, + { + "epoch": 1.3223443223443223, + "grad_norm": 0.831110404158148, + "learning_rate": 3.904902219252035e-05, + "loss": 0.2135, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11510695517063141, + "step": 1625, + "valid_targets_mean": 1524.9, + "valid_targets_min": 793 + }, + { + "epoch": 1.3264143264143264, + "grad_norm": 0.747854141552853, + "learning_rate": 3.903661905021302e-05, + "loss": 0.2131, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11657039076089859, + "step": 1630, + "valid_targets_mean": 1779.4, + "valid_targets_min": 563 + }, + { + "epoch": 1.3304843304843303, + "grad_norm": 0.7620995195915982, + "learning_rate": 3.9024137542733846e-05, + "loss": 0.2137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12204901874065399, + "step": 1635, + "valid_targets_mean": 1741.2, + "valid_targets_min": 1341 + }, + { + "epoch": 1.3345543345543345, + "grad_norm": 0.8238415295946188, + "learning_rate": 3.9011577721463574e-05, + "loss": 0.2143, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12938036024570465, + "step": 1640, + "valid_targets_mean": 1875.6, + "valid_targets_min": 1107 + }, + { + "epoch": 1.3386243386243386, + "grad_norm": 0.799416998929109, + "learning_rate": 3.899893963810531e-05, + "loss": 0.2233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1031196340918541, + "step": 1645, + "valid_targets_mean": 1419.5, + "valid_targets_min": 680 + }, + { + "epoch": 1.3426943426943427, + "grad_norm": 0.7699078977660259, + "learning_rate": 3.898622334468435e-05, + "loss": 0.2078, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14640526473522186, + "step": 1650, + "valid_targets_mean": 1968.2, + "valid_targets_min": 981 + }, + { + "epoch": 1.3467643467643469, + "grad_norm": 0.7511380642686718, + "learning_rate": 3.897342889354793e-05, + "loss": 0.2129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10663345456123352, + "step": 1655, + "valid_targets_mean": 1568.9, + "valid_targets_min": 937 + }, + { + "epoch": 1.3508343508343508, + "grad_norm": 0.8037134681077442, + "learning_rate": 3.896055633736504e-05, + "loss": 0.2163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11559002846479416, + "step": 1660, + "valid_targets_mean": 1645.2, + "valid_targets_min": 804 + }, + { + "epoch": 1.354904354904355, + "grad_norm": 0.7949561381518031, + "learning_rate": 3.894760572912618e-05, + "loss": 0.218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1040218248963356, + "step": 1665, + "valid_targets_mean": 1574.9, + "valid_targets_min": 1019 + }, + { + "epoch": 1.358974358974359, + "grad_norm": 0.7793788473775995, + "learning_rate": 3.8934577122143156e-05, + "loss": 0.2091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09460228681564331, + "step": 1670, + "valid_targets_mean": 1121.6, + "valid_targets_min": 882 + }, + { + "epoch": 1.363044363044363, + "grad_norm": 0.7618292018526198, + "learning_rate": 3.892147057004888e-05, + "loss": 0.2066, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11588133871555328, + "step": 1675, + "valid_targets_mean": 1818.1, + "valid_targets_min": 1168 + }, + { + "epoch": 1.367114367114367, + "grad_norm": 0.9942145132530782, + "learning_rate": 3.89082861267971e-05, + "loss": 0.1994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09862836450338364, + "step": 1680, + "valid_targets_mean": 1417.8, + "valid_targets_min": 678 + }, + { + "epoch": 1.3711843711843712, + "grad_norm": 0.8502030826682979, + "learning_rate": 3.889502384666223e-05, + "loss": 0.2146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08241011202335358, + "step": 1685, + "valid_targets_mean": 1051.0, + "valid_targets_min": 676 + }, + { + "epoch": 1.3752543752543753, + "grad_norm": 0.696356628682868, + "learning_rate": 3.8881683784239086e-05, + "loss": 0.2097, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09263870120048523, + "step": 1690, + "valid_targets_mean": 1431.1, + "valid_targets_min": 794 + }, + { + "epoch": 1.3793243793243795, + "grad_norm": 0.7609564628672522, + "learning_rate": 3.8868265994442694e-05, + "loss": 0.2116, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11136119812726974, + "step": 1695, + "valid_targets_mean": 1486.9, + "valid_targets_min": 742 + }, + { + "epoch": 1.3833943833943834, + "grad_norm": 0.779149722488893, + "learning_rate": 3.8854770532508036e-05, + "loss": 0.2125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12626802921295166, + "step": 1700, + "valid_targets_mean": 1734.5, + "valid_targets_min": 741 + }, + { + "epoch": 1.3874643874643875, + "grad_norm": 0.8189444335835692, + "learning_rate": 3.884119745398984e-05, + "loss": 0.2214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12554654479026794, + "step": 1705, + "valid_targets_mean": 1712.5, + "valid_targets_min": 1157 + }, + { + "epoch": 1.3915343915343916, + "grad_norm": 0.7562880779738609, + "learning_rate": 3.882754681476235e-05, + "loss": 0.2004, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14017203450202942, + "step": 1710, + "valid_targets_mean": 1723.8, + "valid_targets_min": 862 + }, + { + "epoch": 1.3956043956043955, + "grad_norm": 0.789327271767066, + "learning_rate": 3.881381867101908e-05, + "loss": 0.2231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1333657205104828, + "step": 1715, + "valid_targets_mean": 1633.1, + "valid_targets_min": 1019 + }, + { + "epoch": 1.3996743996743997, + "grad_norm": 0.6904489176477637, + "learning_rate": 3.880001307927262e-05, + "loss": 0.2132, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1002424880862236, + "step": 1720, + "valid_targets_mean": 1376.1, + "valid_targets_min": 1058 + }, + { + "epoch": 1.4037444037444038, + "grad_norm": 0.7331099974034027, + "learning_rate": 3.878613009635434e-05, + "loss": 0.2286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11483565717935562, + "step": 1725, + "valid_targets_mean": 1576.5, + "valid_targets_min": 618 + }, + { + "epoch": 1.4078144078144077, + "grad_norm": 0.7374050495229207, + "learning_rate": 3.877216977941424e-05, + "loss": 0.2045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12511426210403442, + "step": 1730, + "valid_targets_mean": 1615.8, + "valid_targets_min": 629 + }, + { + "epoch": 1.4118844118844118, + "grad_norm": 0.7263856360035532, + "learning_rate": 3.875813218592063e-05, + "loss": 0.212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11008419096469879, + "step": 1735, + "valid_targets_mean": 1881.2, + "valid_targets_min": 1167 + }, + { + "epoch": 1.415954415954416, + "grad_norm": 0.7714742596712739, + "learning_rate": 3.874401737365996e-05, + "loss": 0.2132, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11159549653530121, + "step": 1740, + "valid_targets_mean": 1367.2, + "valid_targets_min": 699 + }, + { + "epoch": 1.42002442002442, + "grad_norm": 0.768333986171705, + "learning_rate": 3.872982540073654e-05, + "loss": 0.2009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08359374105930328, + "step": 1745, + "valid_targets_mean": 1174.9, + "valid_targets_min": 679 + }, + { + "epoch": 1.4240944240944242, + "grad_norm": 0.730500600250479, + "learning_rate": 3.871555632557232e-05, + "loss": 0.2059, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09920322895050049, + "step": 1750, + "valid_targets_mean": 1556.2, + "valid_targets_min": 504 + }, + { + "epoch": 1.4281644281644281, + "grad_norm": 0.8117781426270219, + "learning_rate": 3.870121020690663e-05, + "loss": 0.21, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11729609966278076, + "step": 1755, + "valid_targets_mean": 1885.0, + "valid_targets_min": 1331 + }, + { + "epoch": 1.4322344322344323, + "grad_norm": 0.7215350454916396, + "learning_rate": 3.868678710379599e-05, + "loss": 0.2111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07775722444057465, + "step": 1760, + "valid_targets_mean": 1306.1, + "valid_targets_min": 661 + }, + { + "epoch": 1.4363044363044364, + "grad_norm": 0.837829363749683, + "learning_rate": 3.86722870756138e-05, + "loss": 0.215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08048004657030106, + "step": 1765, + "valid_targets_mean": 1330.1, + "valid_targets_min": 958 + }, + { + "epoch": 1.4403744403744403, + "grad_norm": 0.6691854525025749, + "learning_rate": 3.865771018205014e-05, + "loss": 0.2026, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08131928741931915, + "step": 1770, + "valid_targets_mean": 1465.0, + "valid_targets_min": 963 + }, + { + "epoch": 1.4444444444444444, + "grad_norm": 0.8158369333281974, + "learning_rate": 3.864305648311149e-05, + "loss": 0.2094, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10300800949335098, + "step": 1775, + "valid_targets_mean": 1428.0, + "valid_targets_min": 793 + }, + { + "epoch": 1.4485144485144485, + "grad_norm": 0.948520302318546, + "learning_rate": 3.8628326039120524e-05, + "loss": 0.2104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13352897763252258, + "step": 1780, + "valid_targets_mean": 1633.1, + "valid_targets_min": 917 + }, + { + "epoch": 1.4525844525844525, + "grad_norm": 0.7365952174032329, + "learning_rate": 3.861351891071583e-05, + "loss": 0.2024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08271881937980652, + "step": 1785, + "valid_targets_mean": 1246.5, + "valid_targets_min": 872 + }, + { + "epoch": 1.4566544566544566, + "grad_norm": 0.6720445528339655, + "learning_rate": 3.8598635158851694e-05, + "loss": 0.2104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10842913389205933, + "step": 1790, + "valid_targets_mean": 1903.6, + "valid_targets_min": 1220 + }, + { + "epoch": 1.4607244607244607, + "grad_norm": 0.7972994996959792, + "learning_rate": 3.858367484479779e-05, + "loss": 0.1889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09797993302345276, + "step": 1795, + "valid_targets_mean": 1391.6, + "valid_targets_min": 712 + }, + { + "epoch": 1.4647944647944648, + "grad_norm": 0.8151552299642496, + "learning_rate": 3.856863803013897e-05, + "loss": 0.2032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11069940030574799, + "step": 1800, + "valid_targets_mean": 1295.0, + "valid_targets_min": 605 + }, + { + "epoch": 1.468864468864469, + "grad_norm": 0.6855785516547465, + "learning_rate": 3.855352477677504e-05, + "loss": 0.208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06866128742694855, + "step": 1805, + "valid_targets_mean": 1414.8, + "valid_targets_min": 746 + }, + { + "epoch": 1.4729344729344729, + "grad_norm": 0.8055255610517995, + "learning_rate": 3.853833514692044e-05, + "loss": 0.1952, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0873202309012413, + "step": 1810, + "valid_targets_mean": 1059.4, + "valid_targets_min": 723 + }, + { + "epoch": 1.477004477004477, + "grad_norm": 0.7866122138481166, + "learning_rate": 3.852306920310401e-05, + "loss": 0.2106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11211128532886505, + "step": 1815, + "valid_targets_mean": 1634.2, + "valid_targets_min": 1327 + }, + { + "epoch": 1.4810744810744811, + "grad_norm": 0.7173011268227322, + "learning_rate": 3.850772700816877e-05, + "loss": 0.2035, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09077475965023041, + "step": 1820, + "valid_targets_mean": 1412.5, + "valid_targets_min": 902 + }, + { + "epoch": 1.485144485144485, + "grad_norm": 0.6952841023611934, + "learning_rate": 3.8492308625271596e-05, + "loss": 0.2087, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09670276939868927, + "step": 1825, + "valid_targets_mean": 1558.8, + "valid_targets_min": 803 + }, + { + "epoch": 1.4892144892144892, + "grad_norm": 0.7685210240880278, + "learning_rate": 3.8476814117883034e-05, + "loss": 0.2071, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10378531366586685, + "step": 1830, + "valid_targets_mean": 1441.2, + "valid_targets_min": 1079 + }, + { + "epoch": 1.4932844932844933, + "grad_norm": 0.7239079288465091, + "learning_rate": 3.846124354978697e-05, + "loss": 0.2006, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09632142633199692, + "step": 1835, + "valid_targets_mean": 1449.8, + "valid_targets_min": 1263 + }, + { + "epoch": 1.4973544973544972, + "grad_norm": 0.7850402456186506, + "learning_rate": 3.8445596985080404e-05, + "loss": 0.2141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10480426251888275, + "step": 1840, + "valid_targets_mean": 1487.4, + "valid_targets_min": 844 + }, + { + "epoch": 1.5014245014245016, + "grad_norm": 0.8068791427285449, + "learning_rate": 3.842987448817319e-05, + "loss": 0.2094, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09140418469905853, + "step": 1845, + "valid_targets_mean": 1421.4, + "valid_targets_min": 667 + }, + { + "epoch": 1.5054945054945055, + "grad_norm": 0.7097429620733522, + "learning_rate": 3.841407612378775e-05, + "loss": 0.2126, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12428690493106842, + "step": 1850, + "valid_targets_mean": 1822.6, + "valid_targets_min": 1041 + }, + { + "epoch": 1.5095645095645096, + "grad_norm": 0.7932172765143252, + "learning_rate": 3.839820195695883e-05, + "loss": 0.2104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09613991528749466, + "step": 1855, + "valid_targets_mean": 1327.4, + "valid_targets_min": 777 + }, + { + "epoch": 1.5136345136345137, + "grad_norm": 0.7320076010626049, + "learning_rate": 3.8382252053033196e-05, + "loss": 0.21, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10572035610675812, + "step": 1860, + "valid_targets_mean": 1823.8, + "valid_targets_min": 1283 + }, + { + "epoch": 1.5177045177045176, + "grad_norm": 0.806858170502788, + "learning_rate": 3.836622647766943e-05, + "loss": 0.1985, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06952077150344849, + "step": 1865, + "valid_targets_mean": 924.0, + "valid_targets_min": 620 + }, + { + "epoch": 1.5217745217745218, + "grad_norm": 0.7219548062410487, + "learning_rate": 3.835012529683757e-05, + "loss": 0.2066, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08829911798238754, + "step": 1870, + "valid_targets_mean": 1480.2, + "valid_targets_min": 656 + }, + { + "epoch": 1.525844525844526, + "grad_norm": 0.6994940063547743, + "learning_rate": 3.833394857681894e-05, + "loss": 0.2068, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08126124739646912, + "step": 1875, + "valid_targets_mean": 1415.5, + "valid_targets_min": 697 + }, + { + "epoch": 1.5299145299145298, + "grad_norm": 0.7327643766970442, + "learning_rate": 3.831769638420577e-05, + "loss": 0.201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1019296646118164, + "step": 1880, + "valid_targets_mean": 1655.8, + "valid_targets_min": 816 + }, + { + "epoch": 1.533984533984534, + "grad_norm": 0.6800577601183724, + "learning_rate": 3.830136878590104e-05, + "loss": 0.1983, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09412302821874619, + "step": 1885, + "valid_targets_mean": 1553.0, + "valid_targets_min": 850 + }, + { + "epoch": 1.538054538054538, + "grad_norm": 0.7426379902022742, + "learning_rate": 3.8284965849118066e-05, + "loss": 0.195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09913598001003265, + "step": 1890, + "valid_targets_mean": 1353.9, + "valid_targets_min": 747 + }, + { + "epoch": 1.542124542124542, + "grad_norm": 0.7457989847263609, + "learning_rate": 3.826848764138036e-05, + "loss": 0.2131, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08886919915676117, + "step": 1895, + "valid_targets_mean": 1420.9, + "valid_targets_min": 933 + }, + { + "epoch": 1.5461945461945463, + "grad_norm": 0.7559545112406654, + "learning_rate": 3.825193423052127e-05, + "loss": 0.2021, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09691812098026276, + "step": 1900, + "valid_targets_mean": 1394.9, + "valid_targets_min": 827 + }, + { + "epoch": 1.5502645502645502, + "grad_norm": 0.6854552164584192, + "learning_rate": 3.823530568468371e-05, + "loss": 0.1928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08764408528804779, + "step": 1905, + "valid_targets_mean": 1383.2, + "valid_targets_min": 909 + }, + { + "epoch": 1.5543345543345544, + "grad_norm": 0.6804870010828482, + "learning_rate": 3.821860207231991e-05, + "loss": 0.1948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11076577752828598, + "step": 1910, + "valid_targets_mean": 1682.4, + "valid_targets_min": 1094 + }, + { + "epoch": 1.5584045584045585, + "grad_norm": 0.8066726711083975, + "learning_rate": 3.82018234621911e-05, + "loss": 0.1862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09034565091133118, + "step": 1915, + "valid_targets_mean": 1118.4, + "valid_targets_min": 720 + }, + { + "epoch": 1.5624745624745624, + "grad_norm": 0.7147210480241867, + "learning_rate": 3.818496992336725e-05, + "loss": 0.2039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10571155697107315, + "step": 1920, + "valid_targets_mean": 1538.2, + "valid_targets_min": 610 + }, + { + "epoch": 1.5665445665445665, + "grad_norm": 0.6741394266281067, + "learning_rate": 3.816804152522678e-05, + "loss": 0.1969, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10726112127304077, + "step": 1925, + "valid_targets_mean": 1570.1, + "valid_targets_min": 713 + }, + { + "epoch": 1.5706145706145707, + "grad_norm": 0.6935025181842417, + "learning_rate": 3.815103833745626e-05, + "loss": 0.2066, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09748126566410065, + "step": 1930, + "valid_targets_mean": 1537.8, + "valid_targets_min": 874 + }, + { + "epoch": 1.5746845746845746, + "grad_norm": 0.6821876374487306, + "learning_rate": 3.8133960430050135e-05, + "loss": 0.1984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09882050007581711, + "step": 1935, + "valid_targets_mean": 1471.2, + "valid_targets_min": 765 + }, + { + "epoch": 1.578754578754579, + "grad_norm": 0.6990112986801081, + "learning_rate": 3.811680787331047e-05, + "loss": 0.2032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08577939867973328, + "step": 1940, + "valid_targets_mean": 1254.4, + "valid_targets_min": 816 + }, + { + "epoch": 1.5828245828245828, + "grad_norm": 0.7102883473320752, + "learning_rate": 3.809958073784658e-05, + "loss": 0.2058, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0998358279466629, + "step": 1945, + "valid_targets_mean": 1482.9, + "valid_targets_min": 1182 + }, + { + "epoch": 1.5868945868945867, + "grad_norm": 0.7009814403089336, + "learning_rate": 3.8082279094574815e-05, + "loss": 0.1932, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11195679008960724, + "step": 1950, + "valid_targets_mean": 1583.2, + "valid_targets_min": 363 + }, + { + "epoch": 1.590964590964591, + "grad_norm": 0.41631203954390233, + "learning_rate": 3.8064903014718245e-05, + "loss": 0.1322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07122399657964706, + "step": 1955, + "valid_targets_mean": 3710.1, + "valid_targets_min": 3074 + }, + { + "epoch": 1.595034595034595, + "grad_norm": 0.4600103764733667, + "learning_rate": 3.804745256980634e-05, + "loss": 0.1224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06652016937732697, + "step": 1960, + "valid_targets_mean": 2577.6, + "valid_targets_min": 700 + }, + { + "epoch": 1.5991045991045991, + "grad_norm": 0.4537614450983102, + "learning_rate": 3.80299278316747e-05, + "loss": 0.1233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05843440815806389, + "step": 1965, + "valid_targets_mean": 3309.8, + "valid_targets_min": 803 + }, + { + "epoch": 1.6031746031746033, + "grad_norm": 0.43458913842163255, + "learning_rate": 3.801232887246479e-05, + "loss": 0.1138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06071528047323227, + "step": 1970, + "valid_targets_mean": 2484.8, + "valid_targets_min": 920 + }, + { + "epoch": 1.6072446072446072, + "grad_norm": 0.42621344020743934, + "learning_rate": 3.799465576462357e-05, + "loss": 0.1221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06493829935789108, + "step": 1975, + "valid_targets_mean": 3095.1, + "valid_targets_min": 806 + }, + { + "epoch": 1.6113146113146113, + "grad_norm": 0.7467123259446526, + "learning_rate": 3.7976908580903246e-05, + "loss": 0.1723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10322582721710205, + "step": 1980, + "valid_targets_mean": 1663.1, + "valid_targets_min": 718 + }, + { + "epoch": 1.6153846153846154, + "grad_norm": 0.41841694093835413, + "learning_rate": 3.7959087394360974e-05, + "loss": 0.1278, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0684170350432396, + "step": 1985, + "valid_targets_mean": 3443.2, + "valid_targets_min": 1133 + }, + { + "epoch": 1.6194546194546193, + "grad_norm": 0.4482119640964086, + "learning_rate": 3.794119227835854e-05, + "loss": 0.1293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05158419907093048, + "step": 1990, + "valid_targets_mean": 1609.9, + "valid_targets_min": 859 + }, + { + "epoch": 1.6235246235246237, + "grad_norm": 0.2808356049969262, + "learning_rate": 3.792322330656206e-05, + "loss": 0.1547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.045812997967004776, + "step": 1995, + "valid_targets_mean": 5572.1, + "valid_targets_min": 5150 + }, + { + "epoch": 1.6275946275946276, + "grad_norm": 0.38378216926736985, + "learning_rate": 3.790518055294168e-05, + "loss": 0.1071, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07117560505867004, + "step": 2000, + "valid_targets_mean": 3950.0, + "valid_targets_min": 502 + }, + { + "epoch": 1.6316646316646317, + "grad_norm": 0.3830231524830745, + "learning_rate": 3.788706409177129e-05, + "loss": 0.1128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06173840910196304, + "step": 2005, + "valid_targets_mean": 3669.2, + "valid_targets_min": 600 + }, + { + "epoch": 1.6357346357346358, + "grad_norm": 0.35711060786805376, + "learning_rate": 3.7868873997628174e-05, + "loss": 0.1102, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.050001971423625946, + "step": 2010, + "valid_targets_mean": 2980.4, + "valid_targets_min": 518 + }, + { + "epoch": 1.6398046398046398, + "grad_norm": 0.3974333252134189, + "learning_rate": 3.7850610345392735e-05, + "loss": 0.1717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06219826638698578, + "step": 2015, + "valid_targets_mean": 3675.4, + "valid_targets_min": 549 + }, + { + "epoch": 1.6438746438746439, + "grad_norm": 0.3281743505589197, + "learning_rate": 3.7832273210248214e-05, + "loss": 0.1041, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.045098382979631424, + "step": 2020, + "valid_targets_mean": 4013.2, + "valid_targets_min": 3548 + }, + { + "epoch": 1.647944647944648, + "grad_norm": 0.5366337338266325, + "learning_rate": 3.7813862667680304e-05, + "loss": 0.1251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.061837535351514816, + "step": 2025, + "valid_targets_mean": 2000.5, + "valid_targets_min": 644 + }, + { + "epoch": 1.652014652014652, + "grad_norm": 0.34924838955717313, + "learning_rate": 3.7795378793476904e-05, + "loss": 0.0976, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03679157420992851, + "step": 2030, + "valid_targets_mean": 2904.4, + "valid_targets_min": 720 + }, + { + "epoch": 1.656084656084656, + "grad_norm": 0.48035620858335015, + "learning_rate": 3.777682166372779e-05, + "loss": 0.1255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04888860881328583, + "step": 2035, + "valid_targets_mean": 1487.6, + "valid_targets_min": 516 + }, + { + "epoch": 1.6601546601546602, + "grad_norm": 0.5576087684474084, + "learning_rate": 3.775819135482429e-05, + "loss": 0.1286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07822857797145844, + "step": 2040, + "valid_targets_mean": 2006.0, + "valid_targets_min": 557 + }, + { + "epoch": 1.664224664224664, + "grad_norm": 0.441534949218794, + "learning_rate": 3.773948794345899e-05, + "loss": 0.1316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07287167012691498, + "step": 2045, + "valid_targets_mean": 3359.2, + "valid_targets_min": 1086 + }, + { + "epoch": 1.6682946682946684, + "grad_norm": 0.4390733330236822, + "learning_rate": 3.7720711506625384e-05, + "loss": 0.1316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08404476940631866, + "step": 2050, + "valid_targets_mean": 2866.8, + "valid_targets_min": 1008 + }, + { + "epoch": 1.6723646723646723, + "grad_norm": 0.4736301286588309, + "learning_rate": 3.7701862121617595e-05, + "loss": 0.1765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04762580618262291, + "step": 2055, + "valid_targets_mean": 3259.9, + "valid_targets_min": 2702 + }, + { + "epoch": 1.6764346764346765, + "grad_norm": 0.3397713444353576, + "learning_rate": 3.768293986603003e-05, + "loss": 0.1081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04629997909069061, + "step": 2060, + "valid_targets_mean": 4086.4, + "valid_targets_min": 3798 + }, + { + "epoch": 1.6805046805046806, + "grad_norm": 0.8340021398967473, + "learning_rate": 3.7663944817757094e-05, + "loss": 0.1331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10405413806438446, + "step": 2065, + "valid_targets_mean": 1020.8, + "valid_targets_min": 598 + }, + { + "epoch": 1.6845746845746845, + "grad_norm": 0.4169904470975252, + "learning_rate": 3.7644877054992814e-05, + "loss": 0.146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.047049522399902344, + "step": 2070, + "valid_targets_mean": 2028.4, + "valid_targets_min": 590 + }, + { + "epoch": 1.6886446886446886, + "grad_norm": 0.3457500004146195, + "learning_rate": 3.7625736656230576e-05, + "loss": 0.1384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06108832731842995, + "step": 2075, + "valid_targets_mean": 4046.9, + "valid_targets_min": 3390 + }, + { + "epoch": 1.6927146927146928, + "grad_norm": 0.6869412401670636, + "learning_rate": 3.760652370026277e-05, + "loss": 0.1655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16195940971374512, + "step": 2080, + "valid_targets_mean": 2247.4, + "valid_targets_min": 984 + }, + { + "epoch": 1.6967846967846967, + "grad_norm": 0.40632518442102483, + "learning_rate": 3.758723826618045e-05, + "loss": 0.3003, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0758669525384903, + "step": 2085, + "valid_targets_mean": 3348.9, + "valid_targets_min": 2176 + }, + { + "epoch": 1.7008547008547008, + "grad_norm": 0.4606205763061986, + "learning_rate": 3.7567880433373066e-05, + "loss": 0.1286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.060970671474933624, + "step": 2090, + "valid_targets_mean": 2385.1, + "valid_targets_min": 1074 + }, + { + "epoch": 1.704924704924705, + "grad_norm": 0.45654254366777014, + "learning_rate": 3.754845028152807e-05, + "loss": 0.1317, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.062153562903404236, + "step": 2095, + "valid_targets_mean": 3126.2, + "valid_targets_min": 1212 + }, + { + "epoch": 1.7089947089947088, + "grad_norm": 0.3935119632617659, + "learning_rate": 3.7528947890630635e-05, + "loss": 0.2285, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06273317337036133, + "step": 2100, + "valid_targets_mean": 4386.0, + "valid_targets_min": 2258 + }, + { + "epoch": 1.7130647130647132, + "grad_norm": 0.30725829749200173, + "learning_rate": 3.750937334096331e-05, + "loss": 0.1001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07420308887958527, + "step": 2105, + "valid_targets_mean": 3738.9, + "valid_targets_min": 1466 + }, + { + "epoch": 1.717134717134717, + "grad_norm": 0.3408574469787128, + "learning_rate": 3.7489726713105673e-05, + "loss": 0.1222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05906776338815689, + "step": 2110, + "valid_targets_mean": 3522.6, + "valid_targets_min": 560 + }, + { + "epoch": 1.7212047212047212, + "grad_norm": 0.3873420033129566, + "learning_rate": 3.747000808793404e-05, + "loss": 0.1279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06194659322500229, + "step": 2115, + "valid_targets_mean": 3383.1, + "valid_targets_min": 1189 + }, + { + "epoch": 1.7252747252747254, + "grad_norm": 0.35534762128764597, + "learning_rate": 3.745021754662109e-05, + "loss": 0.1073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.050105899572372437, + "step": 2120, + "valid_targets_mean": 3610.1, + "valid_targets_min": 2257 + }, + { + "epoch": 1.7293447293447293, + "grad_norm": 0.35587621922779594, + "learning_rate": 3.7430355170635536e-05, + "loss": 0.1049, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.040797870606184006, + "step": 2125, + "valid_targets_mean": 3479.8, + "valid_targets_min": 886 + }, + { + "epoch": 1.7334147334147334, + "grad_norm": 0.4101618167732939, + "learning_rate": 3.7410421041741846e-05, + "loss": 0.1137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05459919571876526, + "step": 2130, + "valid_targets_mean": 3346.6, + "valid_targets_min": 2409 + }, + { + "epoch": 1.7374847374847375, + "grad_norm": 0.3706410137638395, + "learning_rate": 3.7390415241999815e-05, + "loss": 0.1038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.049584001302719116, + "step": 2135, + "valid_targets_mean": 3552.0, + "valid_targets_min": 2309 + }, + { + "epoch": 1.7415547415547414, + "grad_norm": 0.6514439786770229, + "learning_rate": 3.737033785376431e-05, + "loss": 0.168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11145202815532684, + "step": 2140, + "valid_targets_mean": 2149.8, + "valid_targets_min": 645 + }, + { + "epoch": 1.7456247456247458, + "grad_norm": 0.3388516246206135, + "learning_rate": 3.735018895968487e-05, + "loss": 0.0977, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04621146619319916, + "step": 2145, + "valid_targets_mean": 3310.2, + "valid_targets_min": 545 + }, + { + "epoch": 1.7496947496947497, + "grad_norm": 0.4563091012984944, + "learning_rate": 3.73299686427054e-05, + "loss": 0.1009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05684323236346245, + "step": 2150, + "valid_targets_mean": 2458.4, + "valid_targets_min": 667 + }, + { + "epoch": 1.7537647537647536, + "grad_norm": 0.329941297300451, + "learning_rate": 3.730967698606383e-05, + "loss": 0.0983, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.046611517667770386, + "step": 2155, + "valid_targets_mean": 3501.5, + "valid_targets_min": 2353 + }, + { + "epoch": 1.757834757834758, + "grad_norm": 0.39991464682747385, + "learning_rate": 3.728931407329174e-05, + "loss": 0.1032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07516320049762726, + "step": 2160, + "valid_targets_mean": 3220.2, + "valid_targets_min": 792 + }, + { + "epoch": 1.7619047619047619, + "grad_norm": 0.33577937245255396, + "learning_rate": 3.7268879988214075e-05, + "loss": 0.1624, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04279305413365364, + "step": 2165, + "valid_targets_mean": 3818.5, + "valid_targets_min": 3276 + }, + { + "epoch": 1.765974765974766, + "grad_norm": 0.4325931023841163, + "learning_rate": 3.724837481494874e-05, + "loss": 0.1356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06765176355838776, + "step": 2170, + "valid_targets_mean": 2390.9, + "valid_targets_min": 647 + }, + { + "epoch": 1.7700447700447701, + "grad_norm": 0.5070623066629864, + "learning_rate": 3.722779863790626e-05, + "loss": 0.1246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06652006506919861, + "step": 2175, + "valid_targets_mean": 1500.6, + "valid_targets_min": 673 + }, + { + "epoch": 1.774114774114774, + "grad_norm": 0.6537069454458774, + "learning_rate": 3.7207151541789505e-05, + "loss": 0.1476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.113385409116745, + "step": 2180, + "valid_targets_mean": 1711.2, + "valid_targets_min": 702 + }, + { + "epoch": 1.7781847781847782, + "grad_norm": 0.44224511896857577, + "learning_rate": 3.7186433611593225e-05, + "loss": 0.1587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07473345100879669, + "step": 2185, + "valid_targets_mean": 3036.1, + "valid_targets_min": 945 + }, + { + "epoch": 1.7822547822547823, + "grad_norm": 0.48920469501680053, + "learning_rate": 3.716564493260381e-05, + "loss": 0.119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08563308417797089, + "step": 2190, + "valid_targets_mean": 3317.2, + "valid_targets_min": 2265 + }, + { + "epoch": 1.7863247863247862, + "grad_norm": 0.32615432603009253, + "learning_rate": 3.714478559039887e-05, + "loss": 0.1445, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.058499690145254135, + "step": 2195, + "valid_targets_mean": 3875.2, + "valid_targets_min": 3051 + }, + { + "epoch": 1.7903947903947905, + "grad_norm": 0.5285568387969577, + "learning_rate": 3.712385567084689e-05, + "loss": 0.1235, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08171027898788452, + "step": 2200, + "valid_targets_mean": 2069.8, + "valid_targets_min": 523 + }, + { + "epoch": 1.7944647944647945, + "grad_norm": 0.3459525440694431, + "learning_rate": 3.710285526010693e-05, + "loss": 0.1107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04602465033531189, + "step": 2205, + "valid_targets_mean": 1640.8, + "valid_targets_min": 468 + }, + { + "epoch": 1.7985347985347986, + "grad_norm": 0.25004285177963237, + "learning_rate": 3.7081784444628185e-05, + "loss": 0.1158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03536680340766907, + "step": 2210, + "valid_targets_mean": 4435.1, + "valid_targets_min": 885 + }, + { + "epoch": 1.8026048026048027, + "grad_norm": 0.38886600769121415, + "learning_rate": 3.7060643311149706e-05, + "loss": 0.1023, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.041570305824279785, + "step": 2215, + "valid_targets_mean": 1889.2, + "valid_targets_min": 546 + }, + { + "epoch": 1.8066748066748066, + "grad_norm": 0.32987051157703745, + "learning_rate": 3.703943194670001e-05, + "loss": 0.1019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.059109874069690704, + "step": 2220, + "valid_targets_mean": 4257.1, + "valid_targets_min": 2414 + }, + { + "epoch": 1.8107448107448108, + "grad_norm": 0.551577193902464, + "learning_rate": 3.7018150438596696e-05, + "loss": 0.1197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.061953105032444, + "step": 2225, + "valid_targets_mean": 1502.4, + "valid_targets_min": 697 + }, + { + "epoch": 1.8148148148148149, + "grad_norm": 0.43626117102890194, + "learning_rate": 3.6996798874446144e-05, + "loss": 0.1251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05732385441660881, + "step": 2230, + "valid_targets_mean": 3466.0, + "valid_targets_min": 2777 + }, + { + "epoch": 1.8188848188848188, + "grad_norm": 0.4137126451869474, + "learning_rate": 3.6975377342143105e-05, + "loss": 0.1235, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06378593295812607, + "step": 2235, + "valid_targets_mean": 3982.1, + "valid_targets_min": 3061 + }, + { + "epoch": 1.822954822954823, + "grad_norm": 0.33696208651772863, + "learning_rate": 3.695388592987036e-05, + "loss": 0.1067, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05193319916725159, + "step": 2240, + "valid_targets_mean": 2788.0, + "valid_targets_min": 868 + }, + { + "epoch": 1.827024827024827, + "grad_norm": 0.37878300759015127, + "learning_rate": 3.693232472609837e-05, + "loss": 0.1137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04331493750214577, + "step": 2245, + "valid_targets_mean": 2120.6, + "valid_targets_min": 752 + }, + { + "epoch": 1.831094831094831, + "grad_norm": 0.35149923790855736, + "learning_rate": 3.6910693819584865e-05, + "loss": 0.1236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05358132719993591, + "step": 2250, + "valid_targets_mean": 3477.1, + "valid_targets_min": 1141 + }, + { + "epoch": 1.8351648351648353, + "grad_norm": 0.31501331438328894, + "learning_rate": 3.688899329937454e-05, + "loss": 0.0938, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05189646780490875, + "step": 2255, + "valid_targets_mean": 3888.4, + "valid_targets_min": 3540 + }, + { + "epoch": 1.8392348392348392, + "grad_norm": 0.3667292332003592, + "learning_rate": 3.6867223254798645e-05, + "loss": 0.1238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04574829339981079, + "step": 2260, + "valid_targets_mean": 3086.8, + "valid_targets_min": 773 + }, + { + "epoch": 1.8433048433048433, + "grad_norm": 0.3290532742643368, + "learning_rate": 3.6845383775474626e-05, + "loss": 0.1172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03846554830670357, + "step": 2265, + "valid_targets_mean": 3558.6, + "valid_targets_min": 2587 + }, + { + "epoch": 1.8473748473748475, + "grad_norm": 0.3938162713057422, + "learning_rate": 3.6823474951305766e-05, + "loss": 0.1351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05593733489513397, + "step": 2270, + "valid_targets_mean": 2688.9, + "valid_targets_min": 773 + }, + { + "epoch": 1.8514448514448514, + "grad_norm": 0.4221991891103622, + "learning_rate": 3.6801496872480825e-05, + "loss": 0.117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06380049139261246, + "step": 2275, + "valid_targets_mean": 3734.4, + "valid_targets_min": 3068 + }, + { + "epoch": 1.8555148555148555, + "grad_norm": 0.608114991462476, + "learning_rate": 3.6779449629473615e-05, + "loss": 0.1291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0737733393907547, + "step": 2280, + "valid_targets_mean": 1816.1, + "valid_targets_min": 893 + }, + { + "epoch": 1.8595848595848596, + "grad_norm": 0.5129055915940605, + "learning_rate": 3.675733331304271e-05, + "loss": 0.1212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05952981114387512, + "step": 2285, + "valid_targets_mean": 1444.4, + "valid_targets_min": 708 + }, + { + "epoch": 1.8636548636548635, + "grad_norm": 0.4126348530013301, + "learning_rate": 3.6735148014230985e-05, + "loss": 0.1227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04681888595223427, + "step": 2290, + "valid_targets_mean": 1768.8, + "valid_targets_min": 617 + }, + { + "epoch": 1.8677248677248677, + "grad_norm": 0.4275179875164524, + "learning_rate": 3.671289382436532e-05, + "loss": 0.1124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05094176530838013, + "step": 2295, + "valid_targets_mean": 2634.9, + "valid_targets_min": 720 + }, + { + "epoch": 1.8717948717948718, + "grad_norm": 0.5049917920987161, + "learning_rate": 3.669057083505617e-05, + "loss": 0.1074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06733274459838867, + "step": 2300, + "valid_targets_mean": 1836.2, + "valid_targets_min": 785 + }, + { + "epoch": 1.8758648758648757, + "grad_norm": 0.5528905832066613, + "learning_rate": 3.6668179138197205e-05, + "loss": 0.1311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10168108344078064, + "step": 2305, + "valid_targets_mean": 2148.2, + "valid_targets_min": 765 + }, + { + "epoch": 1.87993487993488, + "grad_norm": 0.299832096960279, + "learning_rate": 3.664571882596495e-05, + "loss": 0.1033, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.033232755959033966, + "step": 2310, + "valid_targets_mean": 3639.2, + "valid_targets_min": 2863 + }, + { + "epoch": 1.884004884004884, + "grad_norm": 0.36729005263055425, + "learning_rate": 3.662318999081837e-05, + "loss": 0.1037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03816433623433113, + "step": 2315, + "valid_targets_mean": 3033.4, + "valid_targets_min": 555 + }, + { + "epoch": 1.888074888074888, + "grad_norm": 0.43795223384191606, + "learning_rate": 3.660059272549852e-05, + "loss": 0.1148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06973383575677872, + "step": 2320, + "valid_targets_mean": 2346.6, + "valid_targets_min": 721 + }, + { + "epoch": 1.8921448921448922, + "grad_norm": 0.33790000025819755, + "learning_rate": 3.657792712302814e-05, + "loss": 0.0941, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.051636915653944016, + "step": 2325, + "valid_targets_mean": 4902.1, + "valid_targets_min": 3640 + }, + { + "epoch": 1.8962148962148961, + "grad_norm": 0.3604771168113708, + "learning_rate": 3.655519327671129e-05, + "loss": 0.0961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05189387500286102, + "step": 2330, + "valid_targets_mean": 4536.6, + "valid_targets_min": 2020 + }, + { + "epoch": 1.9002849002849003, + "grad_norm": 0.33562019525922326, + "learning_rate": 3.6532391280132964e-05, + "loss": 0.1146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03787705674767494, + "step": 2335, + "valid_targets_mean": 3218.6, + "valid_targets_min": 739 + }, + { + "epoch": 1.9043549043549044, + "grad_norm": 0.3743647264168468, + "learning_rate": 3.650952122715869e-05, + "loss": 0.1353, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.060260578989982605, + "step": 2340, + "valid_targets_mean": 3774.6, + "valid_targets_min": 2945 + }, + { + "epoch": 1.9084249084249083, + "grad_norm": 0.37262671797339586, + "learning_rate": 3.648658321193415e-05, + "loss": 0.1104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05858001112937927, + "step": 2345, + "valid_targets_mean": 3045.4, + "valid_targets_min": 704 + }, + { + "epoch": 1.9124949124949127, + "grad_norm": 0.35884300375055905, + "learning_rate": 3.646357732888482e-05, + "loss": 0.1125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04959436506032944, + "step": 2350, + "valid_targets_mean": 2492.8, + "valid_targets_min": 774 + }, + { + "epoch": 1.9165649165649166, + "grad_norm": 0.3589263199724622, + "learning_rate": 3.644050367271553e-05, + "loss": 0.1383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04746074602007866, + "step": 2355, + "valid_targets_mean": 2564.6, + "valid_targets_min": 485 + }, + { + "epoch": 1.9206349206349205, + "grad_norm": 0.406490304131688, + "learning_rate": 3.641736233841012e-05, + "loss": 0.106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.046355560421943665, + "step": 2360, + "valid_targets_mean": 3002.5, + "valid_targets_min": 1048 + }, + { + "epoch": 1.9247049247049248, + "grad_norm": 0.5534145172199101, + "learning_rate": 3.639415342123101e-05, + "loss": 0.1178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.050700727850198746, + "step": 2365, + "valid_targets_mean": 2588.5, + "valid_targets_min": 722 + }, + { + "epoch": 1.9287749287749287, + "grad_norm": 0.4908901714271384, + "learning_rate": 3.637087701671885e-05, + "loss": 0.1188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06826741248369217, + "step": 2370, + "valid_targets_mean": 1443.8, + "valid_targets_min": 865 + }, + { + "epoch": 1.9328449328449329, + "grad_norm": 0.4557072327685435, + "learning_rate": 3.63475332206921e-05, + "loss": 0.1604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.061702802777290344, + "step": 2375, + "valid_targets_mean": 2283.1, + "valid_targets_min": 675 + }, + { + "epoch": 1.936914936914937, + "grad_norm": 0.41528320209522723, + "learning_rate": 3.6324122129246616e-05, + "loss": 0.1383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.052136339247226715, + "step": 2380, + "valid_targets_mean": 3202.0, + "valid_targets_min": 782 + }, + { + "epoch": 1.940984940984941, + "grad_norm": 0.4546869915497293, + "learning_rate": 3.630064383875533e-05, + "loss": 0.1089, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05638446286320686, + "step": 2385, + "valid_targets_mean": 2586.9, + "valid_targets_min": 795 + }, + { + "epoch": 1.945054945054945, + "grad_norm": 0.4016367333178925, + "learning_rate": 3.627709844586774e-05, + "loss": 0.1106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05390872061252594, + "step": 2390, + "valid_targets_mean": 3345.4, + "valid_targets_min": 2017 + }, + { + "epoch": 1.9491249491249492, + "grad_norm": 0.37713484435403527, + "learning_rate": 3.6253486047509634e-05, + "loss": 0.1097, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.048263922333717346, + "step": 2395, + "valid_targets_mean": 1937.5, + "valid_targets_min": 798 + }, + { + "epoch": 1.953194953194953, + "grad_norm": 0.3376638144663447, + "learning_rate": 3.622980674088258e-05, + "loss": 0.0975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05167309567332268, + "step": 2400, + "valid_targets_mean": 3302.2, + "valid_targets_min": 860 + }, + { + "epoch": 1.9572649572649574, + "grad_norm": 0.3884117732632468, + "learning_rate": 3.620606062346361e-05, + "loss": 0.1393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05166970193386078, + "step": 2405, + "valid_targets_mean": 3287.2, + "valid_targets_min": 878 + }, + { + "epoch": 1.9613349613349613, + "grad_norm": 0.31443143791227623, + "learning_rate": 3.618224779300478e-05, + "loss": 0.0968, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.040656059980392456, + "step": 2410, + "valid_targets_mean": 3800.8, + "valid_targets_min": 2602 + }, + { + "epoch": 1.9654049654049655, + "grad_norm": 0.5020381670968926, + "learning_rate": 3.6158368347532755e-05, + "loss": 0.1142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06920788437128067, + "step": 2415, + "valid_targets_mean": 2853.9, + "valid_targets_min": 945 + }, + { + "epoch": 1.9694749694749696, + "grad_norm": 0.4185287229781912, + "learning_rate": 3.613442238534845e-05, + "loss": 0.1109, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06565561890602112, + "step": 2420, + "valid_targets_mean": 3633.9, + "valid_targets_min": 2059 + }, + { + "epoch": 1.9735449735449735, + "grad_norm": 0.3315436014809084, + "learning_rate": 3.611041000502659e-05, + "loss": 0.1101, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.057864412665367126, + "step": 2425, + "valid_targets_mean": 4244.8, + "valid_targets_min": 2529 + }, + { + "epoch": 1.9776149776149776, + "grad_norm": 0.3194580707254878, + "learning_rate": 3.60863313054153e-05, + "loss": 0.0961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06427641212940216, + "step": 2430, + "valid_targets_mean": 4650.8, + "valid_targets_min": 892 + }, + { + "epoch": 1.9816849816849818, + "grad_norm": 0.25904177449049903, + "learning_rate": 3.6062186385635734e-05, + "loss": 0.1062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03253565728664398, + "step": 2435, + "valid_targets_mean": 4432.1, + "valid_targets_min": 1657 + }, + { + "epoch": 1.9857549857549857, + "grad_norm": 0.5544926636783111, + "learning_rate": 3.603797534508162e-05, + "loss": 0.1641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13297711312770844, + "step": 2440, + "valid_targets_mean": 2023.0, + "valid_targets_min": 709 + }, + { + "epoch": 1.9898249898249898, + "grad_norm": 0.3150367876998634, + "learning_rate": 3.6013698283418896e-05, + "loss": 0.1479, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04680216312408447, + "step": 2445, + "valid_targets_mean": 3241.9, + "valid_targets_min": 527 + }, + { + "epoch": 1.993894993894994, + "grad_norm": 0.37009261570296975, + "learning_rate": 3.598935530058528e-05, + "loss": 0.1168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07674941420555115, + "step": 2450, + "valid_targets_mean": 3313.4, + "valid_targets_min": 2308 + }, + { + "epoch": 1.9979649979649978, + "grad_norm": 0.31909775388216893, + "learning_rate": 3.5964946496789836e-05, + "loss": 0.0989, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05330783128738403, + "step": 2455, + "valid_targets_mean": 3463.5, + "valid_targets_min": 2883 + }, + { + "epoch": 2.0016280016280015, + "grad_norm": 0.4880867496018015, + "learning_rate": 3.5940471972512604e-05, + "loss": 0.1897, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10746130347251892, + "step": 2460, + "valid_targets_mean": 8514.0, + "valid_targets_min": 6788 + }, + { + "epoch": 2.005698005698006, + "grad_norm": 0.4659157360495663, + "learning_rate": 3.591593182850415e-05, + "loss": 0.2389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11680585891008377, + "step": 2465, + "valid_targets_mean": 6668.0, + "valid_targets_min": 5933 + }, + { + "epoch": 2.0097680097680097, + "grad_norm": 0.43232390156358325, + "learning_rate": 3.5891326165785196e-05, + "loss": 0.2266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10285025835037231, + "step": 2470, + "valid_targets_mean": 4107.0, + "valid_targets_min": 161 + }, + { + "epoch": 2.0138380138380136, + "grad_norm": 0.3634051714444297, + "learning_rate": 3.586665508564613e-05, + "loss": 0.2222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10061359405517578, + "step": 2475, + "valid_targets_mean": 7946.0, + "valid_targets_min": 5596 + }, + { + "epoch": 2.017908017908018, + "grad_norm": 0.37783795713600243, + "learning_rate": 3.5841918689646666e-05, + "loss": 0.2125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1137465387582779, + "step": 2480, + "valid_targets_mean": 7392.8, + "valid_targets_min": 5629 + }, + { + "epoch": 2.021978021978022, + "grad_norm": 0.398790604122284, + "learning_rate": 3.581711707961539e-05, + "loss": 0.215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1090719997882843, + "step": 2485, + "valid_targets_mean": 6879.6, + "valid_targets_min": 5113 + }, + { + "epoch": 2.0260480260480263, + "grad_norm": 0.5235045531846907, + "learning_rate": 3.579225035764934e-05, + "loss": 0.2323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11241519451141357, + "step": 2490, + "valid_targets_mean": 6919.5, + "valid_targets_min": 5080 + }, + { + "epoch": 2.03011803011803, + "grad_norm": 0.4208616593976156, + "learning_rate": 3.576731862611359e-05, + "loss": 0.2256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09949975460767746, + "step": 2495, + "valid_targets_mean": 6377.0, + "valid_targets_min": 3939 + }, + { + "epoch": 2.034188034188034, + "grad_norm": 0.3829013185412194, + "learning_rate": 3.5742321987640826e-05, + "loss": 0.2128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10645119845867157, + "step": 2500, + "valid_targets_mean": 6392.2, + "valid_targets_min": 4355 + }, + { + "epoch": 2.0382580382580384, + "grad_norm": 0.44985938212529025, + "learning_rate": 3.571726054513093e-05, + "loss": 0.2059, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12094033509492874, + "step": 2505, + "valid_targets_mean": 5196.5, + "valid_targets_min": 137 + }, + { + "epoch": 2.0423280423280423, + "grad_norm": 0.34320734656442425, + "learning_rate": 3.569213440175057e-05, + "loss": 0.2107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09449905157089233, + "step": 2510, + "valid_targets_mean": 7344.4, + "valid_targets_min": 5244 + }, + { + "epoch": 2.0463980463980462, + "grad_norm": 0.32668719505554467, + "learning_rate": 3.566694366093272e-05, + "loss": 0.2047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08982042968273163, + "step": 2515, + "valid_targets_mean": 7921.0, + "valid_targets_min": 5311 + }, + { + "epoch": 2.0504680504680506, + "grad_norm": 0.3852739489735939, + "learning_rate": 3.564168842637631e-05, + "loss": 0.2145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10812553018331528, + "step": 2520, + "valid_targets_mean": 7931.2, + "valid_targets_min": 6015 + }, + { + "epoch": 2.0545380545380545, + "grad_norm": 0.3845046248072148, + "learning_rate": 3.561636880204573e-05, + "loss": 0.2036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09657958149909973, + "step": 2525, + "valid_targets_mean": 7640.8, + "valid_targets_min": 5248 + }, + { + "epoch": 2.0586080586080584, + "grad_norm": 0.37913655017775, + "learning_rate": 3.559098489217048e-05, + "loss": 0.2041, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10077941417694092, + "step": 2530, + "valid_targets_mean": 7249.6, + "valid_targets_min": 5217 + }, + { + "epoch": 2.0626780626780628, + "grad_norm": 0.3932696577304994, + "learning_rate": 3.556553680124463e-05, + "loss": 0.2097, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11539335548877716, + "step": 2535, + "valid_targets_mean": 7619.9, + "valid_targets_min": 5094 + }, + { + "epoch": 2.0667480667480667, + "grad_norm": 0.4553738196822326, + "learning_rate": 3.554002463402651e-05, + "loss": 0.2039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09798746556043625, + "step": 2540, + "valid_targets_mean": 7732.8, + "valid_targets_min": 5842 + }, + { + "epoch": 2.070818070818071, + "grad_norm": 0.3421874470170016, + "learning_rate": 3.55144484955382e-05, + "loss": 0.2065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10603936016559601, + "step": 2545, + "valid_targets_mean": 7952.9, + "valid_targets_min": 5494 + }, + { + "epoch": 2.074888074888075, + "grad_norm": 0.411703732721423, + "learning_rate": 3.5488808491065115e-05, + "loss": 0.2239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11069735139608383, + "step": 2550, + "valid_targets_mean": 8001.0, + "valid_targets_min": 5454 + }, + { + "epoch": 2.078958078958079, + "grad_norm": 0.4858270959003259, + "learning_rate": 3.546310472615559e-05, + "loss": 0.2287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07974497973918915, + "step": 2555, + "valid_targets_mean": 2985.1, + "valid_targets_min": 1896 + }, + { + "epoch": 2.083028083028083, + "grad_norm": 0.36276165908540825, + "learning_rate": 3.5437337306620426e-05, + "loss": 0.1861, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0890931487083435, + "step": 2560, + "valid_targets_mean": 6714.6, + "valid_targets_min": 4917 + }, + { + "epoch": 2.087098087098087, + "grad_norm": 0.38710060664879364, + "learning_rate": 3.5411506338532467e-05, + "loss": 0.2025, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11066673696041107, + "step": 2565, + "valid_targets_mean": 7595.2, + "valid_targets_min": 5488 + }, + { + "epoch": 2.091168091168091, + "grad_norm": 0.42287295340468783, + "learning_rate": 3.538561192822616e-05, + "loss": 0.205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11034491658210754, + "step": 2570, + "valid_targets_mean": 7046.1, + "valid_targets_min": 5579 + }, + { + "epoch": 2.0952380952380953, + "grad_norm": 0.39997010448045195, + "learning_rate": 3.535965418229709e-05, + "loss": 0.2113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10264366120100021, + "step": 2575, + "valid_targets_mean": 7108.0, + "valid_targets_min": 4963 + }, + { + "epoch": 2.0993080993080993, + "grad_norm": 0.389640401517337, + "learning_rate": 3.53336332076016e-05, + "loss": 0.2072, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10438866168260574, + "step": 2580, + "valid_targets_mean": 6810.1, + "valid_targets_min": 5666 + }, + { + "epoch": 2.1033781033781036, + "grad_norm": 0.36942338263571345, + "learning_rate": 3.530754911125631e-05, + "loss": 0.1992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09709086269140244, + "step": 2585, + "valid_targets_mean": 8068.5, + "valid_targets_min": 5524 + }, + { + "epoch": 2.1074481074481075, + "grad_norm": 0.4068615598584502, + "learning_rate": 3.528140200063766e-05, + "loss": 0.1805, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08543336391448975, + "step": 2590, + "valid_targets_mean": 6553.8, + "valid_targets_min": 3621 + }, + { + "epoch": 2.1115181115181114, + "grad_norm": 0.363276338830108, + "learning_rate": 3.525519198338152e-05, + "loss": 0.1848, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09692707657814026, + "step": 2595, + "valid_targets_mean": 6762.5, + "valid_targets_min": 4968 + }, + { + "epoch": 2.1155881155881158, + "grad_norm": 0.40355801888377707, + "learning_rate": 3.522891916738269e-05, + "loss": 0.1979, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09964744746685028, + "step": 2600, + "valid_targets_mean": 6143.9, + "valid_targets_min": 4917 + }, + { + "epoch": 2.1196581196581197, + "grad_norm": 0.5117847968390805, + "learning_rate": 3.520258366079451e-05, + "loss": 0.2029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14205092191696167, + "step": 2605, + "valid_targets_mean": 4583.8, + "valid_targets_min": 215 + }, + { + "epoch": 2.1237281237281236, + "grad_norm": 0.3623495414988248, + "learning_rate": 3.5176185572028396e-05, + "loss": 0.2115, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10662650316953659, + "step": 2610, + "valid_targets_mean": 7317.1, + "valid_targets_min": 4948 + }, + { + "epoch": 2.127798127798128, + "grad_norm": 0.4038325996304427, + "learning_rate": 3.514972500975334e-05, + "loss": 0.2245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10691466927528381, + "step": 2615, + "valid_targets_mean": 6437.6, + "valid_targets_min": 4510 + }, + { + "epoch": 2.131868131868132, + "grad_norm": 0.41453320525991616, + "learning_rate": 3.512320208289556e-05, + "loss": 0.2166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11678023636341095, + "step": 2620, + "valid_targets_mean": 7707.9, + "valid_targets_min": 5796 + }, + { + "epoch": 2.1359381359381358, + "grad_norm": 0.3827950421697951, + "learning_rate": 3.509661690063796e-05, + "loss": 0.2045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10019416362047195, + "step": 2625, + "valid_targets_mean": 7900.9, + "valid_targets_min": 5643 + }, + { + "epoch": 2.14000814000814, + "grad_norm": 0.4162389900061376, + "learning_rate": 3.506996957241975e-05, + "loss": 0.2082, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11353873461484909, + "step": 2630, + "valid_targets_mean": 7627.2, + "valid_targets_min": 5742 + }, + { + "epoch": 2.144078144078144, + "grad_norm": 0.37061577197439505, + "learning_rate": 3.5043260207935964e-05, + "loss": 0.2116, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12082710862159729, + "step": 2635, + "valid_targets_mean": 8725.6, + "valid_targets_min": 5141 + }, + { + "epoch": 2.148148148148148, + "grad_norm": 0.3989344474846749, + "learning_rate": 3.5016488917137005e-05, + "loss": 0.2076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11526183038949966, + "step": 2640, + "valid_targets_mean": 6967.8, + "valid_targets_min": 6088 + }, + { + "epoch": 2.1522181522181523, + "grad_norm": 0.3765813295326696, + "learning_rate": 3.4989655810228185e-05, + "loss": 0.2105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09672106802463531, + "step": 2645, + "valid_targets_mean": 7140.1, + "valid_targets_min": 4834 + }, + { + "epoch": 2.156288156288156, + "grad_norm": 0.4244375123230793, + "learning_rate": 3.496276099766932e-05, + "loss": 0.2086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09745915234088898, + "step": 2650, + "valid_targets_mean": 6706.9, + "valid_targets_min": 5108 + }, + { + "epoch": 2.1603581603581605, + "grad_norm": 0.41712309629614364, + "learning_rate": 3.493580459017419e-05, + "loss": 0.2119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1050487756729126, + "step": 2655, + "valid_targets_mean": 5931.4, + "valid_targets_min": 4708 + }, + { + "epoch": 2.1644281644281644, + "grad_norm": 0.36084098571878304, + "learning_rate": 3.4908786698710196e-05, + "loss": 0.2033, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09638667106628418, + "step": 2660, + "valid_targets_mean": 7145.6, + "valid_targets_min": 5280 + }, + { + "epoch": 2.1684981684981683, + "grad_norm": 0.660596763158056, + "learning_rate": 3.488170743449779e-05, + "loss": 0.1977, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06803116202354431, + "step": 2665, + "valid_targets_mean": 1554.9, + "valid_targets_min": 592 + }, + { + "epoch": 2.1725681725681727, + "grad_norm": 0.4482747012803121, + "learning_rate": 3.4854566909010074e-05, + "loss": 0.1998, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11789162456989288, + "step": 2670, + "valid_targets_mean": 7069.5, + "valid_targets_min": 4765 + }, + { + "epoch": 2.1766381766381766, + "grad_norm": 0.3901951315778572, + "learning_rate": 3.482736523397237e-05, + "loss": 0.2034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10189478099346161, + "step": 2675, + "valid_targets_mean": 6193.4, + "valid_targets_min": 5587 + }, + { + "epoch": 2.1807081807081805, + "grad_norm": 0.42745988843150207, + "learning_rate": 3.4800102521361686e-05, + "loss": 0.2057, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10757580399513245, + "step": 2680, + "valid_targets_mean": 6612.2, + "valid_targets_min": 4894 + }, + { + "epoch": 2.184778184778185, + "grad_norm": 0.36920716366061657, + "learning_rate": 3.477277888340631e-05, + "loss": 0.1988, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09517497569322586, + "step": 2685, + "valid_targets_mean": 6605.9, + "valid_targets_min": 3795 + }, + { + "epoch": 2.1888481888481888, + "grad_norm": 0.3841241265624308, + "learning_rate": 3.474539443258534e-05, + "loss": 0.2044, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10367844998836517, + "step": 2690, + "valid_targets_mean": 7186.8, + "valid_targets_min": 4890 + }, + { + "epoch": 2.192918192918193, + "grad_norm": 0.412689694061922, + "learning_rate": 3.47179492816282e-05, + "loss": 0.2084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10491199791431427, + "step": 2695, + "valid_targets_mean": 6544.4, + "valid_targets_min": 4615 + }, + { + "epoch": 2.196988196988197, + "grad_norm": 0.4079072132791562, + "learning_rate": 3.4690443543514195e-05, + "loss": 0.2094, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1100861206650734, + "step": 2700, + "valid_targets_mean": 6339.2, + "valid_targets_min": 4798 + }, + { + "epoch": 2.201058201058201, + "grad_norm": 0.49329598417369225, + "learning_rate": 3.466287733147204e-05, + "loss": 0.1755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11788356304168701, + "step": 2705, + "valid_targets_mean": 4558.8, + "valid_targets_min": 148 + }, + { + "epoch": 2.2051282051282053, + "grad_norm": 0.4145578788267481, + "learning_rate": 3.463525075897939e-05, + "loss": 0.205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11438199877738953, + "step": 2710, + "valid_targets_mean": 6807.9, + "valid_targets_min": 5164 + }, + { + "epoch": 2.209198209198209, + "grad_norm": 0.4801367983090269, + "learning_rate": 3.4607563939762376e-05, + "loss": 0.1984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10205637663602829, + "step": 2715, + "valid_targets_mean": 7011.8, + "valid_targets_min": 4974 + }, + { + "epoch": 2.213268213268213, + "grad_norm": 0.3925286843177837, + "learning_rate": 3.4579816987795153e-05, + "loss": 0.2054, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09270621836185455, + "step": 2720, + "valid_targets_mean": 5794.2, + "valid_targets_min": 4809 + }, + { + "epoch": 2.2173382173382175, + "grad_norm": 0.407243764629195, + "learning_rate": 3.45520100172994e-05, + "loss": 0.1948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09255869686603546, + "step": 2725, + "valid_targets_mean": 6401.8, + "valid_targets_min": 4842 + }, + { + "epoch": 2.2214082214082214, + "grad_norm": 0.42364061860080554, + "learning_rate": 3.452414314274386e-05, + "loss": 0.2004, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10335355252027512, + "step": 2730, + "valid_targets_mean": 6182.1, + "valid_targets_min": 5116 + }, + { + "epoch": 2.2254782254782253, + "grad_norm": 0.3622471152141575, + "learning_rate": 3.449621647884389e-05, + "loss": 0.2009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09193432331085205, + "step": 2735, + "valid_targets_mean": 6165.1, + "valid_targets_min": 5764 + }, + { + "epoch": 2.2295482295482296, + "grad_norm": 0.7288165001567957, + "learning_rate": 3.446823014056096e-05, + "loss": 0.2248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08235251158475876, + "step": 2740, + "valid_targets_mean": 1428.2, + "valid_targets_min": 1062 + }, + { + "epoch": 2.2336182336182335, + "grad_norm": 0.7611801465102082, + "learning_rate": 3.444018424310221e-05, + "loss": 0.2024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0987827330827713, + "step": 2745, + "valid_targets_mean": 1663.0, + "valid_targets_min": 960 + }, + { + "epoch": 2.237688237688238, + "grad_norm": 0.7650679543370236, + "learning_rate": 3.441207890191993e-05, + "loss": 0.1961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09653256833553314, + "step": 2750, + "valid_targets_mean": 1476.5, + "valid_targets_min": 933 + }, + { + "epoch": 2.241758241758242, + "grad_norm": 0.83957302169369, + "learning_rate": 3.438391423271115e-05, + "loss": 0.1927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0952417328953743, + "step": 2755, + "valid_targets_mean": 1478.1, + "valid_targets_min": 881 + }, + { + "epoch": 2.2458282458282457, + "grad_norm": 0.8264299057033202, + "learning_rate": 3.435569035141708e-05, + "loss": 0.1963, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11602550745010376, + "step": 2760, + "valid_targets_mean": 1640.6, + "valid_targets_min": 869 + }, + { + "epoch": 2.24989824989825, + "grad_norm": 0.8302853801654866, + "learning_rate": 3.4327407374222726e-05, + "loss": 0.1974, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10423608124256134, + "step": 2765, + "valid_targets_mean": 1709.0, + "valid_targets_min": 1090 + }, + { + "epoch": 2.253968253968254, + "grad_norm": 0.7943422274719701, + "learning_rate": 3.429906541755633e-05, + "loss": 0.1857, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07585322856903076, + "step": 2770, + "valid_targets_mean": 1157.9, + "valid_targets_min": 761 + }, + { + "epoch": 2.258038258038258, + "grad_norm": 0.7565321550178822, + "learning_rate": 3.427066459808896e-05, + "loss": 0.2034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09391198307275772, + "step": 2775, + "valid_targets_mean": 1553.6, + "valid_targets_min": 1241 + }, + { + "epoch": 2.262108262108262, + "grad_norm": 0.7572888191925212, + "learning_rate": 3.4242205032733964e-05, + "loss": 0.1866, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.089296355843544, + "step": 2780, + "valid_targets_mean": 1538.1, + "valid_targets_min": 733 + }, + { + "epoch": 2.266178266178266, + "grad_norm": 0.7064148997241197, + "learning_rate": 3.421368683864653e-05, + "loss": 0.193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05855751782655716, + "step": 2785, + "valid_targets_mean": 1250.6, + "valid_targets_min": 835 + }, + { + "epoch": 2.2702482702482705, + "grad_norm": 0.8701439644199427, + "learning_rate": 3.41851101332232e-05, + "loss": 0.1852, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1104547530412674, + "step": 2790, + "valid_targets_mean": 1596.2, + "valid_targets_min": 1046 + }, + { + "epoch": 2.2743182743182744, + "grad_norm": 0.7800184045885545, + "learning_rate": 3.4156475034101366e-05, + "loss": 0.1964, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09086226671934128, + "step": 2795, + "valid_targets_mean": 1646.5, + "valid_targets_min": 797 + }, + { + "epoch": 2.2783882783882783, + "grad_norm": 0.7543167696467623, + "learning_rate": 3.4127781659158834e-05, + "loss": 0.186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09387262165546417, + "step": 2800, + "valid_targets_mean": 1448.9, + "valid_targets_min": 570 + }, + { + "epoch": 2.2824582824582826, + "grad_norm": 0.7308404855311503, + "learning_rate": 3.409903012651327e-05, + "loss": 0.1791, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06849798560142517, + "step": 2805, + "valid_targets_mean": 1164.1, + "valid_targets_min": 548 + }, + { + "epoch": 2.2865282865282865, + "grad_norm": 0.8596378098769865, + "learning_rate": 3.407022055452176e-05, + "loss": 0.1771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09337285161018372, + "step": 2810, + "valid_targets_mean": 1338.0, + "valid_targets_min": 682 + }, + { + "epoch": 2.2905982905982905, + "grad_norm": 0.7766417971092897, + "learning_rate": 3.404135306178032e-05, + "loss": 0.1929, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09825626015663147, + "step": 2815, + "valid_targets_mean": 1632.8, + "valid_targets_min": 896 + }, + { + "epoch": 2.294668294668295, + "grad_norm": 0.7789615600939394, + "learning_rate": 3.401242776712339e-05, + "loss": 0.1962, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12328001856803894, + "step": 2820, + "valid_targets_mean": 1806.5, + "valid_targets_min": 597 + }, + { + "epoch": 2.2987382987382987, + "grad_norm": 0.7920477301948577, + "learning_rate": 3.3983444789623356e-05, + "loss": 0.1953, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08308230340480804, + "step": 2825, + "valid_targets_mean": 1289.9, + "valid_targets_min": 706 + }, + { + "epoch": 2.3028083028083026, + "grad_norm": 0.8975491925387518, + "learning_rate": 3.395440424859007e-05, + "loss": 0.1868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06022972613573074, + "step": 2830, + "valid_targets_mean": 1354.1, + "valid_targets_min": 754 + }, + { + "epoch": 2.306878306878307, + "grad_norm": 0.7130180704385317, + "learning_rate": 3.3925306263570316e-05, + "loss": 0.1846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08599118888378143, + "step": 2835, + "valid_targets_mean": 1461.9, + "valid_targets_min": 819 + }, + { + "epoch": 2.310948310948311, + "grad_norm": 0.7039318125811921, + "learning_rate": 3.389615095434739e-05, + "loss": 0.1933, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07211598753929138, + "step": 2840, + "valid_targets_mean": 1225.5, + "valid_targets_min": 684 + }, + { + "epoch": 2.315018315018315, + "grad_norm": 0.9104668810680625, + "learning_rate": 3.386693844094055e-05, + "loss": 0.1982, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09691354632377625, + "step": 2845, + "valid_targets_mean": 1282.2, + "valid_targets_min": 711 + }, + { + "epoch": 2.319088319088319, + "grad_norm": 0.7159321952742587, + "learning_rate": 3.3837668843604506e-05, + "loss": 0.1839, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09997346997261047, + "step": 2850, + "valid_targets_mean": 1634.8, + "valid_targets_min": 1190 + }, + { + "epoch": 2.323158323158323, + "grad_norm": 0.7393893216811349, + "learning_rate": 3.380834228282901e-05, + "loss": 0.1797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09477157890796661, + "step": 2855, + "valid_targets_mean": 1615.9, + "valid_targets_min": 774 + }, + { + "epoch": 2.3272283272283274, + "grad_norm": 0.6968353858472797, + "learning_rate": 3.377895887933828e-05, + "loss": 0.1845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09568726271390915, + "step": 2860, + "valid_targets_mean": 1848.4, + "valid_targets_min": 734 + }, + { + "epoch": 2.3312983312983313, + "grad_norm": 0.792449089844968, + "learning_rate": 3.374951875409052e-05, + "loss": 0.1835, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09505010396242142, + "step": 2865, + "valid_targets_mean": 1570.6, + "valid_targets_min": 1011 + }, + { + "epoch": 2.335368335368335, + "grad_norm": 0.8817699677604942, + "learning_rate": 3.372002202827744e-05, + "loss": 0.1854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10640811175107956, + "step": 2870, + "valid_targets_mean": 1697.0, + "valid_targets_min": 803 + }, + { + "epoch": 2.3394383394383396, + "grad_norm": 0.8000585484054449, + "learning_rate": 3.369046882332376e-05, + "loss": 0.186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0811472162604332, + "step": 2875, + "valid_targets_mean": 1307.1, + "valid_targets_min": 782 + }, + { + "epoch": 2.3435083435083435, + "grad_norm": 0.7232841587277343, + "learning_rate": 3.36608592608867e-05, + "loss": 0.1799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09704796224832535, + "step": 2880, + "valid_targets_mean": 1596.6, + "valid_targets_min": 646 + }, + { + "epoch": 2.347578347578348, + "grad_norm": 0.75091103368943, + "learning_rate": 3.363119346285546e-05, + "loss": 0.1913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1288718581199646, + "step": 2885, + "valid_targets_mean": 1817.5, + "valid_targets_min": 922 + }, + { + "epoch": 2.3516483516483517, + "grad_norm": 0.7628714260858689, + "learning_rate": 3.360147155135074e-05, + "loss": 0.1863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10485200583934784, + "step": 2890, + "valid_targets_mean": 1822.2, + "valid_targets_min": 796 + }, + { + "epoch": 2.3557183557183556, + "grad_norm": 0.7313328666663071, + "learning_rate": 3.3571693648724255e-05, + "loss": 0.178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09335388988256454, + "step": 2895, + "valid_targets_mean": 1542.4, + "valid_targets_min": 903 + }, + { + "epoch": 2.35978835978836, + "grad_norm": 0.7976060703436042, + "learning_rate": 3.354185987755818e-05, + "loss": 0.1777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10187899321317673, + "step": 2900, + "valid_targets_mean": 1459.5, + "valid_targets_min": 780 + }, + { + "epoch": 2.363858363858364, + "grad_norm": 0.7155507200782179, + "learning_rate": 3.35119703606647e-05, + "loss": 0.1774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09475696831941605, + "step": 2905, + "valid_targets_mean": 1581.1, + "valid_targets_min": 783 + }, + { + "epoch": 2.367928367928368, + "grad_norm": 0.7284814264168356, + "learning_rate": 3.3482025221085476e-05, + "loss": 0.1794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07198642194271088, + "step": 2910, + "valid_targets_mean": 1342.1, + "valid_targets_min": 662 + }, + { + "epoch": 2.371998371998372, + "grad_norm": 0.7533633677117174, + "learning_rate": 3.345202458209112e-05, + "loss": 0.1824, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08188924193382263, + "step": 2915, + "valid_targets_mean": 1269.1, + "valid_targets_min": 724 + }, + { + "epoch": 2.376068376068376, + "grad_norm": 0.7522234550738248, + "learning_rate": 3.342196856718074e-05, + "loss": 0.177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0987510234117508, + "step": 2920, + "valid_targets_mean": 1510.1, + "valid_targets_min": 966 + }, + { + "epoch": 2.38013838013838, + "grad_norm": 0.7796084834753764, + "learning_rate": 3.339185730008138e-05, + "loss": 0.1838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07946356385946274, + "step": 2925, + "valid_targets_mean": 1327.8, + "valid_targets_min": 773 + }, + { + "epoch": 2.3842083842083843, + "grad_norm": 1.1234420570664228, + "learning_rate": 3.336169090474756e-05, + "loss": 0.181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07645580172538757, + "step": 2930, + "valid_targets_mean": 1353.2, + "valid_targets_min": 807 + }, + { + "epoch": 2.3882783882783882, + "grad_norm": 0.8228399599394453, + "learning_rate": 3.333146950536069e-05, + "loss": 0.1858, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06878844648599625, + "step": 2935, + "valid_targets_mean": 1142.5, + "valid_targets_min": 782 + }, + { + "epoch": 2.392348392348392, + "grad_norm": 0.761793565454502, + "learning_rate": 3.330119322632866e-05, + "loss": 0.177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10029375553131104, + "step": 2940, + "valid_targets_mean": 1849.9, + "valid_targets_min": 634 + }, + { + "epoch": 2.3964183964183965, + "grad_norm": 0.7748556030785365, + "learning_rate": 3.327086219228525e-05, + "loss": 0.1915, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09164921939373016, + "step": 2945, + "valid_targets_mean": 1599.6, + "valid_targets_min": 573 + }, + { + "epoch": 2.4004884004884004, + "grad_norm": 0.6823141885908128, + "learning_rate": 3.324047652808963e-05, + "loss": 0.1843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10002493113279343, + "step": 2950, + "valid_targets_mean": 1994.9, + "valid_targets_min": 1102 + }, + { + "epoch": 2.4045584045584047, + "grad_norm": 0.7203959180089063, + "learning_rate": 3.321003635882588e-05, + "loss": 0.1871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06791776418685913, + "step": 2955, + "valid_targets_mean": 1239.2, + "valid_targets_min": 841 + }, + { + "epoch": 2.4086284086284087, + "grad_norm": 0.8522458805730017, + "learning_rate": 3.3179541809802436e-05, + "loss": 0.178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0885857418179512, + "step": 2960, + "valid_targets_mean": 1467.5, + "valid_targets_min": 956 + }, + { + "epoch": 2.4126984126984126, + "grad_norm": 0.8485280697833679, + "learning_rate": 3.31489930065516e-05, + "loss": 0.1856, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09528080374002457, + "step": 2965, + "valid_targets_mean": 1300.9, + "valid_targets_min": 857 + }, + { + "epoch": 2.416768416768417, + "grad_norm": 0.702328495769866, + "learning_rate": 3.311839007482902e-05, + "loss": 0.1826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09654178470373154, + "step": 2970, + "valid_targets_mean": 1389.8, + "valid_targets_min": 808 + }, + { + "epoch": 2.420838420838421, + "grad_norm": 0.7498418061623674, + "learning_rate": 3.308773314061315e-05, + "loss": 0.1704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0779038816690445, + "step": 2975, + "valid_targets_mean": 1303.8, + "valid_targets_min": 677 + }, + { + "epoch": 2.4249084249084247, + "grad_norm": 0.7339750048913246, + "learning_rate": 3.3057022330104764e-05, + "loss": 0.1756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0759783387184143, + "step": 2980, + "valid_targets_mean": 1220.1, + "valid_targets_min": 727 + }, + { + "epoch": 2.428978428978429, + "grad_norm": 0.7678673826329332, + "learning_rate": 3.30262577697264e-05, + "loss": 0.1803, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07477933913469315, + "step": 2985, + "valid_targets_mean": 1262.0, + "valid_targets_min": 726 + }, + { + "epoch": 2.433048433048433, + "grad_norm": 0.7882443666752398, + "learning_rate": 3.299543958612188e-05, + "loss": 0.1851, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10648937523365021, + "step": 2990, + "valid_targets_mean": 1615.0, + "valid_targets_min": 669 + }, + { + "epoch": 2.4371184371184373, + "grad_norm": 0.7375257054384906, + "learning_rate": 3.2964567906155775e-05, + "loss": 0.1798, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10215851664543152, + "step": 2995, + "valid_targets_mean": 1806.5, + "valid_targets_min": 792 + }, + { + "epoch": 2.4411884411884412, + "grad_norm": 0.7644653496961722, + "learning_rate": 3.293364285691284e-05, + "loss": 0.1777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09152808785438538, + "step": 3000, + "valid_targets_mean": 1627.6, + "valid_targets_min": 736 + }, + { + "epoch": 2.445258445258445, + "grad_norm": 0.836057897371144, + "learning_rate": 3.290266456569756e-05, + "loss": 0.1831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12586501240730286, + "step": 3005, + "valid_targets_mean": 1822.9, + "valid_targets_min": 694 + }, + { + "epoch": 2.4493284493284495, + "grad_norm": 0.7684573774704392, + "learning_rate": 3.2871633160033596e-05, + "loss": 0.1779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11305080354213715, + "step": 3010, + "valid_targets_mean": 1848.9, + "valid_targets_min": 1437 + }, + { + "epoch": 2.4533984533984534, + "grad_norm": 0.7722426718287829, + "learning_rate": 3.2840548767663226e-05, + "loss": 0.1766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11125504225492477, + "step": 3015, + "valid_targets_mean": 1852.4, + "valid_targets_min": 809 + }, + { + "epoch": 2.4574684574684573, + "grad_norm": 0.8693277147754886, + "learning_rate": 3.2809411516546876e-05, + "loss": 0.1758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0825982391834259, + "step": 3020, + "valid_targets_mean": 1375.5, + "valid_targets_min": 790 + }, + { + "epoch": 2.4615384615384617, + "grad_norm": 0.8569411244780402, + "learning_rate": 3.2778221534862554e-05, + "loss": 0.1653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08256229013204575, + "step": 3025, + "valid_targets_mean": 1338.8, + "valid_targets_min": 750 + }, + { + "epoch": 2.4656084656084656, + "grad_norm": 0.8496633562930437, + "learning_rate": 3.274697895100536e-05, + "loss": 0.1798, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11016149818897247, + "step": 3030, + "valid_targets_mean": 1778.8, + "valid_targets_min": 826 + }, + { + "epoch": 2.4696784696784695, + "grad_norm": 0.7550200846250567, + "learning_rate": 3.2715683893586904e-05, + "loss": 0.177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08121539652347565, + "step": 3035, + "valid_targets_mean": 1600.8, + "valid_targets_min": 907 + }, + { + "epoch": 2.473748473748474, + "grad_norm": 0.7407812428035063, + "learning_rate": 3.2684336491434814e-05, + "loss": 0.1717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09471592307090759, + "step": 3040, + "valid_targets_mean": 1474.5, + "valid_targets_min": 1093 + }, + { + "epoch": 2.4778184778184777, + "grad_norm": 1.167858594983234, + "learning_rate": 3.2652936873592206e-05, + "loss": 0.1796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09139646589756012, + "step": 3045, + "valid_targets_mean": 1577.2, + "valid_targets_min": 1119 + }, + { + "epoch": 2.4818884818884817, + "grad_norm": 0.7675583595322624, + "learning_rate": 3.262148516931714e-05, + "loss": 0.1777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10131756961345673, + "step": 3050, + "valid_targets_mean": 1512.1, + "valid_targets_min": 700 + }, + { + "epoch": 2.485958485958486, + "grad_norm": 0.7516413523459232, + "learning_rate": 3.25899815080821e-05, + "loss": 0.1768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07350552827119827, + "step": 3055, + "valid_targets_mean": 1288.2, + "valid_targets_min": 810 + }, + { + "epoch": 2.49002849002849, + "grad_norm": 0.9663636034775042, + "learning_rate": 3.2558426019573435e-05, + "loss": 0.1801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0857287123799324, + "step": 3060, + "valid_targets_mean": 1328.9, + "valid_targets_min": 635 + }, + { + "epoch": 2.4940984940984943, + "grad_norm": 0.7525509366960841, + "learning_rate": 3.2526818833690855e-05, + "loss": 0.1734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10586247593164444, + "step": 3065, + "valid_targets_mean": 1542.5, + "valid_targets_min": 673 + }, + { + "epoch": 2.498168498168498, + "grad_norm": 0.8552319935455616, + "learning_rate": 3.2495160080546895e-05, + "loss": 0.1866, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09937754273414612, + "step": 3070, + "valid_targets_mean": 1215.6, + "valid_targets_min": 641 + }, + { + "epoch": 2.502238502238502, + "grad_norm": 0.7825010890917209, + "learning_rate": 3.246344989046635e-05, + "loss": 0.1744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07913076877593994, + "step": 3075, + "valid_targets_mean": 1222.6, + "valid_targets_min": 701 + }, + { + "epoch": 2.5063085063085064, + "grad_norm": 0.7499645512602379, + "learning_rate": 3.243168839398576e-05, + "loss": 0.1907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09814945608377457, + "step": 3080, + "valid_targets_mean": 1850.6, + "valid_targets_min": 1202 + }, + { + "epoch": 2.5103785103785103, + "grad_norm": 0.7240473408886264, + "learning_rate": 3.239987572185288e-05, + "loss": 0.1777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06967031210660934, + "step": 3085, + "valid_targets_mean": 1265.5, + "valid_targets_min": 657 + }, + { + "epoch": 2.5144485144485147, + "grad_norm": 0.7723805357606571, + "learning_rate": 3.2368012005026136e-05, + "loss": 0.1818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08349703252315521, + "step": 3090, + "valid_targets_mean": 1388.0, + "valid_targets_min": 672 + }, + { + "epoch": 2.5185185185185186, + "grad_norm": 0.740778910182572, + "learning_rate": 3.233609737467407e-05, + "loss": 0.175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10719085484743118, + "step": 3095, + "valid_targets_mean": 1813.6, + "valid_targets_min": 1019 + }, + { + "epoch": 2.5225885225885225, + "grad_norm": 0.7925251844992214, + "learning_rate": 3.2304131962174804e-05, + "loss": 0.1735, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0703994482755661, + "step": 3100, + "valid_targets_mean": 1114.6, + "valid_targets_min": 732 + }, + { + "epoch": 2.526658526658527, + "grad_norm": 0.7739162778424586, + "learning_rate": 3.227211589911554e-05, + "loss": 0.1826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06456971168518066, + "step": 3105, + "valid_targets_mean": 1194.5, + "valid_targets_min": 634 + }, + { + "epoch": 2.5307285307285308, + "grad_norm": 0.6811693756020172, + "learning_rate": 3.224004931729195e-05, + "loss": 0.1712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0884883850812912, + "step": 3110, + "valid_targets_mean": 1756.5, + "valid_targets_min": 905 + }, + { + "epoch": 2.5347985347985347, + "grad_norm": 1.2605982496459476, + "learning_rate": 3.220793234870769e-05, + "loss": 0.1712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10120239853858948, + "step": 3115, + "valid_targets_mean": 1554.9, + "valid_targets_min": 596 + }, + { + "epoch": 2.538868538868539, + "grad_norm": 0.7456047283707952, + "learning_rate": 3.217576512557383e-05, + "loss": 0.1703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07132766395807266, + "step": 3120, + "valid_targets_mean": 1214.6, + "valid_targets_min": 687 + }, + { + "epoch": 2.542938542938543, + "grad_norm": 0.7668765276856079, + "learning_rate": 3.214354778030831e-05, + "loss": 0.1855, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10113206505775452, + "step": 3125, + "valid_targets_mean": 1955.1, + "valid_targets_min": 1199 + }, + { + "epoch": 2.547008547008547, + "grad_norm": 0.6688254932482489, + "learning_rate": 3.211128044553542e-05, + "loss": 0.1733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08862347900867462, + "step": 3130, + "valid_targets_mean": 1758.6, + "valid_targets_min": 1214 + }, + { + "epoch": 2.551078551078551, + "grad_norm": 0.7105824934530232, + "learning_rate": 3.2078963254085186e-05, + "loss": 0.1713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09190790355205536, + "step": 3135, + "valid_targets_mean": 1711.5, + "valid_targets_min": 1122 + }, + { + "epoch": 2.555148555148555, + "grad_norm": 1.574075103571208, + "learning_rate": 3.2046596338992934e-05, + "loss": 0.1663, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08651512861251831, + "step": 3140, + "valid_targets_mean": 1489.6, + "valid_targets_min": 850 + }, + { + "epoch": 2.559218559218559, + "grad_norm": 0.7171578485312335, + "learning_rate": 3.201417983349865e-05, + "loss": 0.1604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07850717753171921, + "step": 3145, + "valid_targets_mean": 1572.4, + "valid_targets_min": 1318 + }, + { + "epoch": 2.5632885632885634, + "grad_norm": 0.784020145965083, + "learning_rate": 3.198171387104645e-05, + "loss": 0.1769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09676879644393921, + "step": 3150, + "valid_targets_mean": 1500.0, + "valid_targets_min": 1043 + }, + { + "epoch": 2.5673585673585673, + "grad_norm": 0.7023031187317561, + "learning_rate": 3.194919858528405e-05, + "loss": 0.1745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10829615592956543, + "step": 3155, + "valid_targets_mean": 1774.9, + "valid_targets_min": 920 + }, + { + "epoch": 2.571428571428571, + "grad_norm": 0.7266495375892049, + "learning_rate": 3.191663411006222e-05, + "loss": 0.172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09090869128704071, + "step": 3160, + "valid_targets_mean": 1547.1, + "valid_targets_min": 847 + }, + { + "epoch": 2.5754985754985755, + "grad_norm": 0.7082754020615566, + "learning_rate": 3.1884020579434216e-05, + "loss": 0.1756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09622791409492493, + "step": 3165, + "valid_targets_mean": 1769.4, + "valid_targets_min": 738 + }, + { + "epoch": 2.5795685795685794, + "grad_norm": 0.7671758979055356, + "learning_rate": 3.1851358127655214e-05, + "loss": 0.1761, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09749025106430054, + "step": 3170, + "valid_targets_mean": 1540.1, + "valid_targets_min": 594 + }, + { + "epoch": 2.583638583638584, + "grad_norm": 0.646884294048327, + "learning_rate": 3.1818646889181815e-05, + "loss": 0.1779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09275157749652863, + "step": 3175, + "valid_targets_mean": 2228.6, + "valid_targets_min": 1393 + }, + { + "epoch": 2.5877085877085877, + "grad_norm": 0.43091544980003527, + "learning_rate": 3.1785886998671406e-05, + "loss": 0.1544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07351458072662354, + "step": 3180, + "valid_targets_mean": 3981.9, + "valid_targets_min": 3545 + }, + { + "epoch": 2.591778591778592, + "grad_norm": 0.40608611395729444, + "learning_rate": 3.1753078590981697e-05, + "loss": 0.1094, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.053135018795728683, + "step": 3185, + "valid_targets_mean": 2837.8, + "valid_targets_min": 1024 + }, + { + "epoch": 2.595848595848596, + "grad_norm": 0.2970617855486435, + "learning_rate": 3.1720221801170076e-05, + "loss": 0.0971, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.029508689418435097, + "step": 3190, + "valid_targets_mean": 3453.5, + "valid_targets_min": 3026 + }, + { + "epoch": 2.5999185999186, + "grad_norm": 0.4040679062651357, + "learning_rate": 3.1687316764493145e-05, + "loss": 0.1199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06386222690343857, + "step": 3195, + "valid_targets_mean": 3272.8, + "valid_targets_min": 2213 + }, + { + "epoch": 2.603988603988604, + "grad_norm": 0.42140297889686756, + "learning_rate": 3.165436361640608e-05, + "loss": 0.0958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06314370036125183, + "step": 3200, + "valid_targets_mean": 3111.5, + "valid_targets_min": 1134 + }, + { + "epoch": 2.608058608058608, + "grad_norm": 0.4318510588456392, + "learning_rate": 3.162136249256214e-05, + "loss": 0.1067, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06179654225707054, + "step": 3205, + "valid_targets_mean": 2179.2, + "valid_targets_min": 663 + }, + { + "epoch": 2.612128612128612, + "grad_norm": 0.6151866793044011, + "learning_rate": 3.158831352881204e-05, + "loss": 0.1586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06750689446926117, + "step": 3210, + "valid_targets_mean": 2128.4, + "valid_targets_min": 535 + }, + { + "epoch": 2.6161986161986164, + "grad_norm": 0.4023184284236594, + "learning_rate": 3.1555216861203466e-05, + "loss": 0.1023, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04353209584951401, + "step": 3215, + "valid_targets_mean": 1964.0, + "valid_targets_min": 811 + }, + { + "epoch": 2.6202686202686203, + "grad_norm": 0.4367154826598129, + "learning_rate": 3.1522072625980466e-05, + "loss": 0.1231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07076150923967361, + "step": 3220, + "valid_targets_mean": 2673.8, + "valid_targets_min": 1498 + }, + { + "epoch": 2.624338624338624, + "grad_norm": 0.24902381526805464, + "learning_rate": 3.1488880959582905e-05, + "loss": 0.1221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03554770350456238, + "step": 3225, + "valid_targets_mean": 4407.2, + "valid_targets_min": 3285 + }, + { + "epoch": 2.6284086284086285, + "grad_norm": 0.2979605188474624, + "learning_rate": 3.14556419986459e-05, + "loss": 0.0961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05047299340367317, + "step": 3230, + "valid_targets_mean": 3491.2, + "valid_targets_min": 1048 + }, + { + "epoch": 2.6324786324786325, + "grad_norm": 0.3571486698231609, + "learning_rate": 3.142235587999924e-05, + "loss": 0.1024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05532723292708397, + "step": 3235, + "valid_targets_mean": 2983.9, + "valid_targets_min": 1290 + }, + { + "epoch": 2.6365486365486364, + "grad_norm": 0.4898779952522662, + "learning_rate": 3.138902274066688e-05, + "loss": 0.0993, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.058787234127521515, + "step": 3240, + "valid_targets_mean": 2018.6, + "valid_targets_min": 720 + }, + { + "epoch": 2.6406186406186407, + "grad_norm": 0.3257155158788977, + "learning_rate": 3.13556427178663e-05, + "loss": 0.1497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05110851302742958, + "step": 3245, + "valid_targets_mean": 3854.4, + "valid_targets_min": 2336 + }, + { + "epoch": 2.6446886446886446, + "grad_norm": 0.34560277752942015, + "learning_rate": 3.1322215949008e-05, + "loss": 0.093, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04530660808086395, + "step": 3250, + "valid_targets_mean": 2544.9, + "valid_targets_min": 782 + }, + { + "epoch": 2.6487586487586485, + "grad_norm": 0.39091465479047305, + "learning_rate": 3.1288742571694905e-05, + "loss": 0.1043, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.036396391689777374, + "step": 3255, + "valid_targets_mean": 3094.4, + "valid_targets_min": 1086 + }, + { + "epoch": 2.652828652828653, + "grad_norm": 0.34705120235384473, + "learning_rate": 3.1255222723721815e-05, + "loss": 0.0876, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.045067206025123596, + "step": 3260, + "valid_targets_mean": 3285.0, + "valid_targets_min": 732 + }, + { + "epoch": 2.656898656898657, + "grad_norm": 0.537067848663477, + "learning_rate": 3.12216565430748e-05, + "loss": 0.1162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0710548609495163, + "step": 3265, + "valid_targets_mean": 1575.8, + "valid_targets_min": 770 + }, + { + "epoch": 2.6609686609686607, + "grad_norm": 0.39399172193475046, + "learning_rate": 3.118804416793069e-05, + "loss": 0.1081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04866083711385727, + "step": 3270, + "valid_targets_mean": 2609.9, + "valid_targets_min": 592 + }, + { + "epoch": 2.665038665038665, + "grad_norm": 0.4293292007143029, + "learning_rate": 3.115438573665649e-05, + "loss": 0.12, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.055250879377126694, + "step": 3275, + "valid_targets_mean": 3045.1, + "valid_targets_min": 2224 + }, + { + "epoch": 2.669108669108669, + "grad_norm": 0.39199172279612116, + "learning_rate": 3.112068138780876e-05, + "loss": 0.1163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.041924867779016495, + "step": 3280, + "valid_targets_mean": 2729.9, + "valid_targets_min": 511 + }, + { + "epoch": 2.6731786731786733, + "grad_norm": 0.36149115315511404, + "learning_rate": 3.108693126013308e-05, + "loss": 0.1538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.056219682097435, + "step": 3285, + "valid_targets_mean": 3708.0, + "valid_targets_min": 2328 + }, + { + "epoch": 2.677248677248677, + "grad_norm": 0.39693264903199954, + "learning_rate": 3.105313549256352e-05, + "loss": 0.0941, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0479307621717453, + "step": 3290, + "valid_targets_mean": 3069.4, + "valid_targets_min": 884 + }, + { + "epoch": 2.6813186813186816, + "grad_norm": 0.6370107315415748, + "learning_rate": 3.1019294224222015e-05, + "loss": 0.1239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06845805794000626, + "step": 3295, + "valid_targets_mean": 1417.6, + "valid_targets_min": 534 + }, + { + "epoch": 2.6853886853886855, + "grad_norm": 0.6791334033335826, + "learning_rate": 3.098540759441778e-05, + "loss": 0.1326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0740579143166542, + "step": 3300, + "valid_targets_mean": 1554.2, + "valid_targets_min": 568 + }, + { + "epoch": 2.6894586894586894, + "grad_norm": 0.40157879779417277, + "learning_rate": 3.0951475742646784e-05, + "loss": 0.1122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05114832520484924, + "step": 3305, + "valid_targets_mean": 3759.4, + "valid_targets_min": 2808 + }, + { + "epoch": 2.6935286935286937, + "grad_norm": 0.48697071062687214, + "learning_rate": 3.0917498808591154e-05, + "loss": 0.1849, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15444746613502502, + "step": 3310, + "valid_targets_mean": 2962.8, + "valid_targets_min": 652 + }, + { + "epoch": 2.6975986975986976, + "grad_norm": 0.35131488324463145, + "learning_rate": 3.088347693211861e-05, + "loss": 0.2389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05616980791091919, + "step": 3315, + "valid_targets_mean": 4014.0, + "valid_targets_min": 3573 + }, + { + "epoch": 2.7016687016687015, + "grad_norm": 0.9395914594142927, + "learning_rate": 3.084941025328185e-05, + "loss": 0.1178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06727755814790726, + "step": 3320, + "valid_targets_mean": 2568.0, + "valid_targets_min": 822 + }, + { + "epoch": 2.705738705738706, + "grad_norm": 0.6735134741940402, + "learning_rate": 3.081529891231802e-05, + "loss": 0.1426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18474054336547852, + "step": 3325, + "valid_targets_mean": 2774.0, + "valid_targets_min": 2135 + }, + { + "epoch": 2.70980870980871, + "grad_norm": 0.3312285374873109, + "learning_rate": 3.078114304964814e-05, + "loss": 0.1753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.045162081718444824, + "step": 3330, + "valid_targets_mean": 4021.2, + "valid_targets_min": 636 + }, + { + "epoch": 2.7138787138787137, + "grad_norm": 0.37157549346068147, + "learning_rate": 3.0746942805876474e-05, + "loss": 0.0963, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07260533422231674, + "step": 3335, + "valid_targets_mean": 4983.9, + "valid_targets_min": 2733 + }, + { + "epoch": 2.717948717948718, + "grad_norm": 0.3681236408727857, + "learning_rate": 3.071269832178999e-05, + "loss": 0.1073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06341656297445297, + "step": 3340, + "valid_targets_mean": 4406.2, + "valid_targets_min": 2571 + }, + { + "epoch": 2.722018722018722, + "grad_norm": 0.30039279390137386, + "learning_rate": 3.0678409738357785e-05, + "loss": 0.1049, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0368649959564209, + "step": 3345, + "valid_targets_mean": 3177.5, + "valid_targets_min": 970 + }, + { + "epoch": 2.726088726088726, + "grad_norm": 0.36018710976640456, + "learning_rate": 3.0644077196730494e-05, + "loss": 0.0974, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.035121314227581024, + "step": 3350, + "valid_targets_mean": 2699.2, + "valid_targets_min": 748 + }, + { + "epoch": 2.7301587301587302, + "grad_norm": 0.4353275473221667, + "learning_rate": 3.060970083823969e-05, + "loss": 0.0983, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06210782751441002, + "step": 3355, + "valid_targets_mean": 3400.1, + "valid_targets_min": 719 + }, + { + "epoch": 2.734228734228734, + "grad_norm": 0.482149801452687, + "learning_rate": 3.057528080439734e-05, + "loss": 0.1012, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05291946232318878, + "step": 3360, + "valid_targets_mean": 2561.8, + "valid_targets_min": 1820 + }, + { + "epoch": 2.738298738298738, + "grad_norm": 0.41734213557928257, + "learning_rate": 3.054081723689518e-05, + "loss": 0.0863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.060540057718753815, + "step": 3365, + "valid_targets_mean": 3014.8, + "valid_targets_min": 950 + }, + { + "epoch": 2.7423687423687424, + "grad_norm": 0.5192547449781431, + "learning_rate": 3.050631027760418e-05, + "loss": 0.154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06714431941509247, + "step": 3370, + "valid_targets_mean": 3569.5, + "valid_targets_min": 3170 + }, + { + "epoch": 2.7464387464387463, + "grad_norm": 0.35017994375089784, + "learning_rate": 3.0471760068573926e-05, + "loss": 0.0779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04404710233211517, + "step": 3375, + "valid_targets_mean": 3777.1, + "valid_targets_min": 3122 + }, + { + "epoch": 2.7505087505087507, + "grad_norm": 0.3637498254659091, + "learning_rate": 3.0437166752032027e-05, + "loss": 0.0901, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05123971402645111, + "step": 3380, + "valid_targets_mean": 4054.6, + "valid_targets_min": 3511 + }, + { + "epoch": 2.7545787545787546, + "grad_norm": 0.3404345345130886, + "learning_rate": 3.0402530470383573e-05, + "loss": 0.087, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.051291994750499725, + "step": 3385, + "valid_targets_mean": 3485.4, + "valid_targets_min": 792 + }, + { + "epoch": 2.758648758648759, + "grad_norm": 0.46398708542556194, + "learning_rate": 3.0367851366210507e-05, + "loss": 0.1075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05058937892317772, + "step": 3390, + "valid_targets_mean": 2089.2, + "valid_targets_min": 795 + }, + { + "epoch": 2.762718762718763, + "grad_norm": 0.34745051162795376, + "learning_rate": 3.0333129582271043e-05, + "loss": 0.1277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04806285351514816, + "step": 3395, + "valid_targets_mean": 3405.9, + "valid_targets_min": 1196 + }, + { + "epoch": 2.7667887667887667, + "grad_norm": 0.4972495007269904, + "learning_rate": 3.029836526149911e-05, + "loss": 0.1255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.058603860437870026, + "step": 3400, + "valid_targets_mean": 2210.1, + "valid_targets_min": 575 + }, + { + "epoch": 2.770858770858771, + "grad_norm": 0.6469804675168275, + "learning_rate": 3.0263558547003734e-05, + "loss": 0.1165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08365291357040405, + "step": 3405, + "valid_targets_mean": 1888.6, + "valid_targets_min": 609 + }, + { + "epoch": 2.774928774928775, + "grad_norm": 0.5555623184854452, + "learning_rate": 3.022870958206845e-05, + "loss": 0.1227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04522637277841568, + "step": 3410, + "valid_targets_mean": 1463.1, + "valid_targets_min": 542 + }, + { + "epoch": 2.778998778998779, + "grad_norm": 0.3659096617640059, + "learning_rate": 3.019381851015072e-05, + "loss": 0.1366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.043918170034885406, + "step": 3415, + "valid_targets_mean": 3743.8, + "valid_targets_min": 2776 + }, + { + "epoch": 2.7830687830687832, + "grad_norm": 0.4195904880972671, + "learning_rate": 3.0158885474881354e-05, + "loss": 0.1064, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.039590660482645035, + "step": 3420, + "valid_targets_mean": 1335.4, + "valid_targets_min": 629 + }, + { + "epoch": 2.787138787138787, + "grad_norm": 0.5872682399941394, + "learning_rate": 3.0123910620063888e-05, + "loss": 0.1287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05974281579256058, + "step": 3425, + "valid_targets_mean": 3961.1, + "valid_targets_min": 3701 + }, + { + "epoch": 2.791208791208791, + "grad_norm": 0.42579563457253944, + "learning_rate": 3.008889408967403e-05, + "loss": 0.1103, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05832969397306442, + "step": 3430, + "valid_targets_mean": 2015.1, + "valid_targets_min": 736 + }, + { + "epoch": 2.7952787952787954, + "grad_norm": 0.49595139616926265, + "learning_rate": 3.0053836027859024e-05, + "loss": 0.1041, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08861617743968964, + "step": 3435, + "valid_targets_mean": 2458.6, + "valid_targets_min": 605 + }, + { + "epoch": 2.7993487993487993, + "grad_norm": 0.31433751021873957, + "learning_rate": 3.0018736578937112e-05, + "loss": 0.0902, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03106316737830639, + "step": 3440, + "valid_targets_mean": 2187.8, + "valid_targets_min": 848 + }, + { + "epoch": 2.8034188034188032, + "grad_norm": 0.30827047949470593, + "learning_rate": 2.9983595887396864e-05, + "loss": 0.0911, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04991922527551651, + "step": 3445, + "valid_targets_mean": 5241.1, + "valid_targets_min": 4456 + }, + { + "epoch": 2.8074888074888076, + "grad_norm": 0.37077247640612376, + "learning_rate": 2.9948414097896678e-05, + "loss": 0.0949, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.057815488427877426, + "step": 3450, + "valid_targets_mean": 4078.9, + "valid_targets_min": 1864 + }, + { + "epoch": 2.8115588115588115, + "grad_norm": 0.3475158518838076, + "learning_rate": 2.9913191355264092e-05, + "loss": 0.1095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04626894369721413, + "step": 3455, + "valid_targets_mean": 3414.1, + "valid_targets_min": 1361 + }, + { + "epoch": 2.8156288156288154, + "grad_norm": 0.3999990815443734, + "learning_rate": 2.9877927804495255e-05, + "loss": 0.1089, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0528426468372345, + "step": 3460, + "valid_targets_mean": 3541.4, + "valid_targets_min": 1814 + }, + { + "epoch": 2.8196988196988197, + "grad_norm": 0.5010947429743733, + "learning_rate": 2.9842623590754294e-05, + "loss": 0.1171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04774967581033707, + "step": 3465, + "valid_targets_mean": 1152.0, + "valid_targets_min": 716 + }, + { + "epoch": 2.8237688237688237, + "grad_norm": 0.33526324008830605, + "learning_rate": 2.980727885937272e-05, + "loss": 0.0832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04189581423997879, + "step": 3470, + "valid_targets_mean": 3898.9, + "valid_targets_min": 2719 + }, + { + "epoch": 2.8278388278388276, + "grad_norm": 0.5262205319214922, + "learning_rate": 2.9771893755848857e-05, + "loss": 0.1081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.039720356464385986, + "step": 3475, + "valid_targets_mean": 1102.4, + "valid_targets_min": 664 + }, + { + "epoch": 2.831908831908832, + "grad_norm": 0.3448120952299084, + "learning_rate": 2.97364684258472e-05, + "loss": 0.1029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03923565521836281, + "step": 3480, + "valid_targets_mean": 2702.8, + "valid_targets_min": 1584 + }, + { + "epoch": 2.835978835978836, + "grad_norm": 0.3424380111555321, + "learning_rate": 2.9701003015197862e-05, + "loss": 0.0824, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.037075385451316833, + "step": 3485, + "valid_targets_mean": 2633.6, + "valid_targets_min": 1003 + }, + { + "epoch": 2.84004884004884, + "grad_norm": 0.37114327921089785, + "learning_rate": 2.9665497669895926e-05, + "loss": 0.1087, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04176440089941025, + "step": 3490, + "valid_targets_mean": 2853.4, + "valid_targets_min": 560 + }, + { + "epoch": 2.844118844118844, + "grad_norm": 0.35943776778483205, + "learning_rate": 2.962995253610089e-05, + "loss": 0.1022, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05177730694413185, + "step": 3495, + "valid_targets_mean": 3300.2, + "valid_targets_min": 1049 + }, + { + "epoch": 2.8481888481888484, + "grad_norm": 0.46323078254655087, + "learning_rate": 2.9594367760136026e-05, + "loss": 0.1228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07231918722391129, + "step": 3500, + "valid_targets_mean": 2829.9, + "valid_targets_min": 1497 + }, + { + "epoch": 2.8522588522588523, + "grad_norm": 0.4260912091223462, + "learning_rate": 2.955874348848781e-05, + "loss": 0.1006, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05701189488172531, + "step": 3505, + "valid_targets_mean": 3187.1, + "valid_targets_min": 825 + }, + { + "epoch": 2.8563288563288562, + "grad_norm": 0.49909290086865893, + "learning_rate": 2.952307986780528e-05, + "loss": 0.117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06764853000640869, + "step": 3510, + "valid_targets_mean": 2835.8, + "valid_targets_min": 829 + }, + { + "epoch": 2.8603988603988606, + "grad_norm": 0.45907551992299245, + "learning_rate": 2.9487377044899487e-05, + "loss": 0.1089, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.050722379237413406, + "step": 3515, + "valid_targets_mean": 2051.9, + "valid_targets_min": 650 + }, + { + "epoch": 2.8644688644688645, + "grad_norm": 0.4353504024468164, + "learning_rate": 2.945163516674284e-05, + "loss": 0.1086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05996803194284439, + "step": 3520, + "valid_targets_mean": 2904.0, + "valid_targets_min": 729 + }, + { + "epoch": 2.8685388685388684, + "grad_norm": 0.30376720990195905, + "learning_rate": 2.9415854380468523e-05, + "loss": 0.0927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03928172215819359, + "step": 3525, + "valid_targets_mean": 3957.1, + "valid_targets_min": 3408 + }, + { + "epoch": 2.8726088726088728, + "grad_norm": 0.3136319878421944, + "learning_rate": 2.9380034833369892e-05, + "loss": 0.0944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03363402932882309, + "step": 3530, + "valid_targets_mean": 3422.8, + "valid_targets_min": 2543 + }, + { + "epoch": 2.8766788766788767, + "grad_norm": 0.43540877616440243, + "learning_rate": 2.934417667289986e-05, + "loss": 0.1261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.061333850026130676, + "step": 3535, + "valid_targets_mean": 2753.1, + "valid_targets_min": 1072 + }, + { + "epoch": 2.8807488807488806, + "grad_norm": 0.3782032866681542, + "learning_rate": 2.9308280046670306e-05, + "loss": 0.0878, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.046056248247623444, + "step": 3540, + "valid_targets_mean": 2810.2, + "valid_targets_min": 702 + }, + { + "epoch": 2.884818884818885, + "grad_norm": 0.38282746322202976, + "learning_rate": 2.9272345102451424e-05, + "loss": 0.0974, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06318642199039459, + "step": 3545, + "valid_targets_mean": 5088.1, + "valid_targets_min": 976 + }, + { + "epoch": 2.888888888888889, + "grad_norm": 0.34609048416468996, + "learning_rate": 2.923637198817118e-05, + "loss": 0.096, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.043254029005765915, + "step": 3550, + "valid_targets_mean": 4085.0, + "valid_targets_min": 484 + }, + { + "epoch": 2.8929588929588927, + "grad_norm": 0.2674998878729295, + "learning_rate": 2.920036085191466e-05, + "loss": 0.0825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03606154024600983, + "step": 3555, + "valid_targets_mean": 4375.1, + "valid_targets_min": 1148 + }, + { + "epoch": 2.897028897028897, + "grad_norm": 0.4242295936507459, + "learning_rate": 2.9164311841923453e-05, + "loss": 0.0987, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07503281533718109, + "step": 3560, + "valid_targets_mean": 4233.1, + "valid_targets_min": 3268 + }, + { + "epoch": 2.901098901098901, + "grad_norm": 0.3646136002979487, + "learning_rate": 2.9128225106595073e-05, + "loss": 0.0969, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.050537366420030594, + "step": 3565, + "valid_targets_mean": 3133.2, + "valid_targets_min": 559 + }, + { + "epoch": 2.905168905168905, + "grad_norm": 0.3850447940122486, + "learning_rate": 2.909210079448233e-05, + "loss": 0.1205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04207482933998108, + "step": 3570, + "valid_targets_mean": 2683.9, + "valid_targets_min": 689 + }, + { + "epoch": 2.9092389092389093, + "grad_norm": 0.4532382464548252, + "learning_rate": 2.905593905429272e-05, + "loss": 0.1218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06049978733062744, + "step": 3575, + "valid_targets_mean": 1883.4, + "valid_targets_min": 616 + }, + { + "epoch": 2.913308913308913, + "grad_norm": 0.4613149439936778, + "learning_rate": 2.9019740034887812e-05, + "loss": 0.078, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.053340524435043335, + "step": 3580, + "valid_targets_mean": 2458.2, + "valid_targets_min": 602 + }, + { + "epoch": 2.9173789173789175, + "grad_norm": 0.3550345526134136, + "learning_rate": 2.898350388528263e-05, + "loss": 0.1219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05539257824420929, + "step": 3585, + "valid_targets_mean": 3971.8, + "valid_targets_min": 3551 + }, + { + "epoch": 2.9214489214489214, + "grad_norm": 0.4104168375230188, + "learning_rate": 2.8947230754645056e-05, + "loss": 0.0957, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03185092657804489, + "step": 3590, + "valid_targets_mean": 1747.2, + "valid_targets_min": 596 + }, + { + "epoch": 2.925518925518926, + "grad_norm": 0.335588797815782, + "learning_rate": 2.891092079229521e-05, + "loss": 0.1003, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04076407849788666, + "step": 3595, + "valid_targets_mean": 3301.9, + "valid_targets_min": 799 + }, + { + "epoch": 2.9295889295889297, + "grad_norm": 0.43098042545889287, + "learning_rate": 2.887457414770482e-05, + "loss": 0.1138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.058630071580410004, + "step": 3600, + "valid_targets_mean": 3122.6, + "valid_targets_min": 706 + }, + { + "epoch": 2.9336589336589336, + "grad_norm": 0.44968298408503327, + "learning_rate": 2.883819097049662e-05, + "loss": 0.1518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05955217033624649, + "step": 3605, + "valid_targets_mean": 1916.1, + "valid_targets_min": 710 + }, + { + "epoch": 2.937728937728938, + "grad_norm": 0.3761169572763773, + "learning_rate": 2.880177141044374e-05, + "loss": 0.106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.042976461350917816, + "step": 3610, + "valid_targets_mean": 3087.8, + "valid_targets_min": 647 + }, + { + "epoch": 2.941798941798942, + "grad_norm": 0.49089665811727856, + "learning_rate": 2.8765315617469083e-05, + "loss": 0.1023, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04839683324098587, + "step": 3615, + "valid_targets_mean": 1806.6, + "valid_targets_min": 716 + }, + { + "epoch": 2.9458689458689458, + "grad_norm": 0.3819345463527236, + "learning_rate": 2.8728823741644693e-05, + "loss": 0.096, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.055881187319755554, + "step": 3620, + "valid_targets_mean": 3530.9, + "valid_targets_min": 2920 + }, + { + "epoch": 2.94993894993895, + "grad_norm": 0.4248398215986827, + "learning_rate": 2.869229593319115e-05, + "loss": 0.0982, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05369921028614044, + "step": 3625, + "valid_targets_mean": 2021.4, + "valid_targets_min": 832 + }, + { + "epoch": 2.954008954008954, + "grad_norm": 0.5366502780379148, + "learning_rate": 2.8655732342476974e-05, + "loss": 0.0871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04955916851758957, + "step": 3630, + "valid_targets_mean": 1252.5, + "valid_targets_min": 759 + }, + { + "epoch": 2.958078958078958, + "grad_norm": 0.3725411467334531, + "learning_rate": 2.8619133120017977e-05, + "loss": 0.1168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04662502557039261, + "step": 3635, + "valid_targets_mean": 3125.5, + "valid_targets_min": 756 + }, + { + "epoch": 2.9621489621489623, + "grad_norm": 0.4735438299144219, + "learning_rate": 2.858249841647663e-05, + "loss": 0.0898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06342704594135284, + "step": 3640, + "valid_targets_mean": 2589.4, + "valid_targets_min": 744 + }, + { + "epoch": 2.966218966218966, + "grad_norm": 0.3229059404155852, + "learning_rate": 2.8545828382661483e-05, + "loss": 0.0944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03534681349992752, + "step": 3645, + "valid_targets_mean": 2489.9, + "valid_targets_min": 529 + }, + { + "epoch": 2.97028897028897, + "grad_norm": 0.4160099670775479, + "learning_rate": 2.850912316952653e-05, + "loss": 0.1063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06118070334196091, + "step": 3650, + "valid_targets_mean": 3904.1, + "valid_targets_min": 2017 + }, + { + "epoch": 2.9743589743589745, + "grad_norm": 0.27448891928689173, + "learning_rate": 2.847238292817057e-05, + "loss": 0.0903, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03835466504096985, + "step": 3655, + "valid_targets_mean": 4848.6, + "valid_targets_min": 516 + }, + { + "epoch": 2.9784289784289784, + "grad_norm": 0.3412081869287908, + "learning_rate": 2.8435607809836585e-05, + "loss": 0.0875, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03919822350144386, + "step": 3660, + "valid_targets_mean": 2295.4, + "valid_targets_min": 772 + }, + { + "epoch": 2.9824989824989823, + "grad_norm": 0.369354587305238, + "learning_rate": 2.8398797965911164e-05, + "loss": 0.0966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05215475335717201, + "step": 3665, + "valid_targets_mean": 4149.0, + "valid_targets_min": 2807 + }, + { + "epoch": 2.9865689865689866, + "grad_norm": 0.4781896046920586, + "learning_rate": 2.836195354792382e-05, + "loss": 0.1795, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11530614644289017, + "step": 3670, + "valid_targets_mean": 3047.1, + "valid_targets_min": 2759 + }, + { + "epoch": 2.9906389906389905, + "grad_norm": 0.32100235515429487, + "learning_rate": 2.8325074707546397e-05, + "loss": 0.102, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03835485503077507, + "step": 3675, + "valid_targets_mean": 3380.2, + "valid_targets_min": 941 + }, + { + "epoch": 2.9947089947089944, + "grad_norm": 0.3685878918749171, + "learning_rate": 2.8288161596592445e-05, + "loss": 0.1049, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.029963500797748566, + "step": 3680, + "valid_targets_mean": 2062.9, + "valid_targets_min": 759 + }, + { + "epoch": 2.998778998778999, + "grad_norm": 0.5570354007672066, + "learning_rate": 2.8251214367016584e-05, + "loss": 0.1036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11446891725063324, + "step": 3685, + "valid_targets_mean": 2155.2, + "valid_targets_min": 927 + }, + { + "epoch": 3.0024420024420024, + "grad_norm": 0.5300936680238741, + "learning_rate": 2.8214233170913897e-05, + "loss": 0.1816, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10420374572277069, + "step": 3690, + "valid_targets_mean": 5435.1, + "valid_targets_min": 3516 + }, + { + "epoch": 3.0065120065120063, + "grad_norm": 0.38181099491046794, + "learning_rate": 2.8177218160519274e-05, + "loss": 0.2127, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10185196250677109, + "step": 3695, + "valid_targets_mean": 7654.2, + "valid_targets_min": 6354 + }, + { + "epoch": 3.0105820105820107, + "grad_norm": 0.3636523334412447, + "learning_rate": 2.8140169488206813e-05, + "loss": 0.2034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11651530861854553, + "step": 3700, + "valid_targets_mean": 7341.6, + "valid_targets_min": 5754 + }, + { + "epoch": 3.0146520146520146, + "grad_norm": 0.35386050254952656, + "learning_rate": 2.810308730648919e-05, + "loss": 0.1961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0877489298582077, + "step": 3705, + "valid_targets_mean": 7666.2, + "valid_targets_min": 5666 + }, + { + "epoch": 3.0187220187220185, + "grad_norm": 0.39000718209727786, + "learning_rate": 2.8065971768017014e-05, + "loss": 0.1975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1017606109380722, + "step": 3710, + "valid_targets_mean": 7469.0, + "valid_targets_min": 5646 + }, + { + "epoch": 3.022792022792023, + "grad_norm": 0.3982430381116518, + "learning_rate": 2.802882302557821e-05, + "loss": 0.1975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11622816324234009, + "step": 3715, + "valid_targets_mean": 7046.9, + "valid_targets_min": 5571 + }, + { + "epoch": 3.0268620268620268, + "grad_norm": 0.3840578048570892, + "learning_rate": 2.7991641232097385e-05, + "loss": 0.2136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11238230019807816, + "step": 3720, + "valid_targets_mean": 7827.6, + "valid_targets_min": 5405 + }, + { + "epoch": 3.030932030932031, + "grad_norm": 0.37593713287199076, + "learning_rate": 2.7954426540635213e-05, + "loss": 0.2008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10586348176002502, + "step": 3725, + "valid_targets_mean": 7023.5, + "valid_targets_min": 5664 + }, + { + "epoch": 3.035002035002035, + "grad_norm": 0.3617967688248237, + "learning_rate": 2.7917179104387792e-05, + "loss": 0.1929, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09483138471841812, + "step": 3730, + "valid_targets_mean": 7069.8, + "valid_targets_min": 5103 + }, + { + "epoch": 3.039072039072039, + "grad_norm": 0.37727049937650137, + "learning_rate": 2.787989907668601e-05, + "loss": 0.1813, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09244755655527115, + "step": 3735, + "valid_targets_mean": 6712.4, + "valid_targets_min": 5469 + }, + { + "epoch": 3.0431420431420433, + "grad_norm": 0.3766512807014509, + "learning_rate": 2.7842586610994913e-05, + "loss": 0.1949, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09342852234840393, + "step": 3740, + "valid_targets_mean": 7289.2, + "valid_targets_min": 5513 + }, + { + "epoch": 3.047212047212047, + "grad_norm": 0.3695612492414105, + "learning_rate": 2.7805241860913095e-05, + "loss": 0.1865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09307433664798737, + "step": 3745, + "valid_targets_mean": 7022.1, + "valid_targets_min": 5691 + }, + { + "epoch": 3.051282051282051, + "grad_norm": 0.39612551211364405, + "learning_rate": 2.7767864980172046e-05, + "loss": 0.1948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10077991336584091, + "step": 3750, + "valid_targets_mean": 6736.0, + "valid_targets_min": 5550 + }, + { + "epoch": 3.0553520553520555, + "grad_norm": 0.4921659164636033, + "learning_rate": 2.773045612263552e-05, + "loss": 0.1827, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09157957881689072, + "step": 3755, + "valid_targets_mean": 6346.9, + "valid_targets_min": 5067 + }, + { + "epoch": 3.0594220594220594, + "grad_norm": 0.4073510419270923, + "learning_rate": 2.7693015442298896e-05, + "loss": 0.1868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.095358707010746, + "step": 3760, + "valid_targets_mean": 6784.0, + "valid_targets_min": 5618 + }, + { + "epoch": 3.0634920634920633, + "grad_norm": 0.4015377800668027, + "learning_rate": 2.7655543093288567e-05, + "loss": 0.1934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10923737287521362, + "step": 3765, + "valid_targets_mean": 7568.4, + "valid_targets_min": 5856 + }, + { + "epoch": 3.0675620675620676, + "grad_norm": 0.4034210421570319, + "learning_rate": 2.7618039229861298e-05, + "loss": 0.1854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09021735936403275, + "step": 3770, + "valid_targets_mean": 7198.5, + "valid_targets_min": 4793 + }, + { + "epoch": 3.0716320716320715, + "grad_norm": 0.3908082661531363, + "learning_rate": 2.7580504006403565e-05, + "loss": 0.1908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09716609120368958, + "step": 3775, + "valid_targets_mean": 6680.5, + "valid_targets_min": 5350 + }, + { + "epoch": 3.075702075702076, + "grad_norm": 0.38406455833735326, + "learning_rate": 2.7542937577430947e-05, + "loss": 0.2048, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1004624292254448, + "step": 3780, + "valid_targets_mean": 6770.6, + "valid_targets_min": 5753 + }, + { + "epoch": 3.07977207977208, + "grad_norm": 0.7240136077873857, + "learning_rate": 2.7505340097587488e-05, + "loss": 0.2012, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07324851304292679, + "step": 3785, + "valid_targets_mean": 1419.8, + "valid_targets_min": 289 + }, + { + "epoch": 3.0838420838420837, + "grad_norm": 0.4204933410198007, + "learning_rate": 2.7467711721645045e-05, + "loss": 0.1675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09934137761592865, + "step": 3790, + "valid_targets_mean": 6784.2, + "valid_targets_min": 5042 + }, + { + "epoch": 3.087912087912088, + "grad_norm": 0.3742729131572597, + "learning_rate": 2.7430052604502663e-05, + "loss": 0.1841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09341548383235931, + "step": 3795, + "valid_targets_mean": 6774.9, + "valid_targets_min": 3875 + }, + { + "epoch": 3.091982091982092, + "grad_norm": 0.41169203877187577, + "learning_rate": 2.7392362901185944e-05, + "loss": 0.1893, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09741862118244171, + "step": 3800, + "valid_targets_mean": 7047.6, + "valid_targets_min": 6134 + }, + { + "epoch": 3.096052096052096, + "grad_norm": 0.3908141039193812, + "learning_rate": 2.7354642766846383e-05, + "loss": 0.1919, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10031354427337646, + "step": 3805, + "valid_targets_mean": 6886.6, + "valid_targets_min": 5869 + }, + { + "epoch": 3.1001221001221, + "grad_norm": 0.3737173994845085, + "learning_rate": 2.7316892356760768e-05, + "loss": 0.1872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0877181887626648, + "step": 3810, + "valid_targets_mean": 7559.8, + "valid_targets_min": 6145 + }, + { + "epoch": 3.104192104192104, + "grad_norm": 0.345233755743295, + "learning_rate": 2.727911182633049e-05, + "loss": 0.1766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08521901816129684, + "step": 3815, + "valid_targets_mean": 8920.5, + "valid_targets_min": 4780 + }, + { + "epoch": 3.1082621082621085, + "grad_norm": 0.3279700741389342, + "learning_rate": 2.724130133108096e-05, + "loss": 0.164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07514619082212448, + "step": 3820, + "valid_targets_mean": 7393.4, + "valid_targets_min": 5116 + }, + { + "epoch": 3.1123321123321124, + "grad_norm": 0.31728564944220194, + "learning_rate": 2.720346102666092e-05, + "loss": 0.1677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08252862095832825, + "step": 3825, + "valid_targets_mean": 8477.5, + "valid_targets_min": 4033 + }, + { + "epoch": 3.1164021164021163, + "grad_norm": 0.3897732561895216, + "learning_rate": 2.7165591068841835e-05, + "loss": 0.1878, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09388996660709381, + "step": 3830, + "valid_targets_mean": 6736.6, + "valid_targets_min": 4646 + }, + { + "epoch": 3.1204721204721206, + "grad_norm": 0.4003693550688361, + "learning_rate": 2.7127691613517236e-05, + "loss": 0.1769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09637527167797089, + "step": 3835, + "valid_targets_mean": 7551.5, + "valid_targets_min": 5461 + }, + { + "epoch": 3.1245421245421245, + "grad_norm": 0.42189953518448664, + "learning_rate": 2.7089762816702072e-05, + "loss": 0.198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11182335764169693, + "step": 3840, + "valid_targets_mean": 7091.0, + "valid_targets_min": 4879 + }, + { + "epoch": 3.1286121286121285, + "grad_norm": 0.3964626223828562, + "learning_rate": 2.70518048345321e-05, + "loss": 0.2008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08981172740459442, + "step": 3845, + "valid_targets_mean": 6269.8, + "valid_targets_min": 4606 + }, + { + "epoch": 3.132682132682133, + "grad_norm": 0.4236778964061071, + "learning_rate": 2.7013817823263206e-05, + "loss": 0.1998, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08929223567247391, + "step": 3850, + "valid_targets_mean": 6818.4, + "valid_targets_min": 5525 + }, + { + "epoch": 3.1367521367521367, + "grad_norm": 0.4184282493040094, + "learning_rate": 2.6975801939270762e-05, + "loss": 0.1842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09297116100788116, + "step": 3855, + "valid_targets_mean": 7365.4, + "valid_targets_min": 5000 + }, + { + "epoch": 3.1408221408221406, + "grad_norm": 0.3991658399742734, + "learning_rate": 2.6937757339049027e-05, + "loss": 0.1928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08477732539176941, + "step": 3860, + "valid_targets_mean": 6212.1, + "valid_targets_min": 3988 + }, + { + "epoch": 3.144892144892145, + "grad_norm": 0.3522450277037621, + "learning_rate": 2.6899684179210446e-05, + "loss": 0.1925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09426316618919373, + "step": 3865, + "valid_targets_mean": 8396.9, + "valid_targets_min": 6131 + }, + { + "epoch": 3.148962148962149, + "grad_norm": 0.40217083920878643, + "learning_rate": 2.6861582616485048e-05, + "loss": 0.1903, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09596344083547592, + "step": 3870, + "valid_targets_mean": 7651.0, + "valid_targets_min": 4061 + }, + { + "epoch": 3.1530321530321532, + "grad_norm": 0.38649324870646884, + "learning_rate": 2.6823452807719763e-05, + "loss": 0.1934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09395226836204529, + "step": 3875, + "valid_targets_mean": 7090.1, + "valid_targets_min": 5203 + }, + { + "epoch": 3.157102157102157, + "grad_norm": 0.37118239164661415, + "learning_rate": 2.678529490987783e-05, + "loss": 0.1927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09067308902740479, + "step": 3880, + "valid_targets_mean": 7001.0, + "valid_targets_min": 5362 + }, + { + "epoch": 3.161172161172161, + "grad_norm": 0.3848861837027517, + "learning_rate": 2.674710908003808e-05, + "loss": 0.1932, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09417563676834106, + "step": 3885, + "valid_targets_mean": 6359.2, + "valid_targets_min": 5236 + }, + { + "epoch": 3.1652421652421654, + "grad_norm": 0.407400812686477, + "learning_rate": 2.6708895475394362e-05, + "loss": 0.1897, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10153548419475555, + "step": 3890, + "valid_targets_mean": 5939.8, + "valid_targets_min": 4693 + }, + { + "epoch": 3.1693121693121693, + "grad_norm": 1.1139169285060038, + "learning_rate": 2.6670654253254834e-05, + "loss": 0.1609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03590000420808792, + "step": 3895, + "valid_targets_mean": 208.6, + "valid_targets_min": 134 + }, + { + "epoch": 3.173382173382173, + "grad_norm": 0.4169772802304493, + "learning_rate": 2.663238557104136e-05, + "loss": 0.1918, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09831123054027557, + "step": 3900, + "valid_targets_mean": 6008.5, + "valid_targets_min": 4956 + }, + { + "epoch": 3.1774521774521776, + "grad_norm": 0.37807853509176925, + "learning_rate": 2.659408958628883e-05, + "loss": 0.1839, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08299198746681213, + "step": 3905, + "valid_targets_mean": 6042.0, + "valid_targets_min": 4817 + }, + { + "epoch": 3.1815221815221815, + "grad_norm": 0.3981260221085175, + "learning_rate": 2.6555766456644553e-05, + "loss": 0.1912, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09234784543514252, + "step": 3910, + "valid_targets_mean": 6608.6, + "valid_targets_min": 5102 + }, + { + "epoch": 3.185592185592186, + "grad_norm": 0.3880642673862131, + "learning_rate": 2.6517416339867544e-05, + "loss": 0.1823, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09438446909189224, + "step": 3915, + "valid_targets_mean": 6670.1, + "valid_targets_min": 4869 + }, + { + "epoch": 3.1896621896621897, + "grad_norm": 0.38644112067025166, + "learning_rate": 2.6479039393827944e-05, + "loss": 0.1834, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08336789160966873, + "step": 3920, + "valid_targets_mean": 6088.4, + "valid_targets_min": 4946 + }, + { + "epoch": 3.1937321937321936, + "grad_norm": 0.4014072145721109, + "learning_rate": 2.6440635776506316e-05, + "loss": 0.1939, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08969050645828247, + "step": 3925, + "valid_targets_mean": 6228.4, + "valid_targets_min": 4856 + }, + { + "epoch": 3.197802197802198, + "grad_norm": 0.4562972639302328, + "learning_rate": 2.6402205645993038e-05, + "loss": 0.1926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09550561010837555, + "step": 3930, + "valid_targets_mean": 5376.6, + "valid_targets_min": 4145 + }, + { + "epoch": 3.201872201872202, + "grad_norm": 0.48273208307655285, + "learning_rate": 2.636374916048761e-05, + "loss": 0.1558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09403058886528015, + "step": 3935, + "valid_targets_mean": 6309.2, + "valid_targets_min": 5305 + }, + { + "epoch": 3.205942205942206, + "grad_norm": 0.40883426520856037, + "learning_rate": 2.6325266478298032e-05, + "loss": 0.1849, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08947273343801498, + "step": 3940, + "valid_targets_mean": 6221.5, + "valid_targets_min": 4636 + }, + { + "epoch": 3.21001221001221, + "grad_norm": 0.39532763897190637, + "learning_rate": 2.6286757757840144e-05, + "loss": 0.1858, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09781331568956375, + "step": 3945, + "valid_targets_mean": 6831.5, + "valid_targets_min": 5668 + }, + { + "epoch": 3.214082214082214, + "grad_norm": 0.4023824820856924, + "learning_rate": 2.6248223157636982e-05, + "loss": 0.187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09898592531681061, + "step": 3950, + "valid_targets_mean": 6437.1, + "valid_targets_min": 5004 + }, + { + "epoch": 3.218152218152218, + "grad_norm": 0.4037103091391165, + "learning_rate": 2.62096628363181e-05, + "loss": 0.1791, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0871603712439537, + "step": 3955, + "valid_targets_mean": 6205.8, + "valid_targets_min": 5341 + }, + { + "epoch": 3.2222222222222223, + "grad_norm": 0.4037836380504369, + "learning_rate": 2.6171076952618943e-05, + "loss": 0.1846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09813414514064789, + "step": 3960, + "valid_targets_mean": 6748.0, + "valid_targets_min": 5363 + }, + { + "epoch": 3.2262922262922262, + "grad_norm": 0.407885387401038, + "learning_rate": 2.61324656653802e-05, + "loss": 0.184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0859188586473465, + "step": 3965, + "valid_targets_mean": 5210.5, + "valid_targets_min": 791 + }, + { + "epoch": 3.23036223036223, + "grad_norm": 0.6923397809278381, + "learning_rate": 2.60938291335471e-05, + "loss": 0.1989, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07540939748287201, + "step": 3970, + "valid_targets_mean": 1279.5, + "valid_targets_min": 746 + }, + { + "epoch": 3.2344322344322345, + "grad_norm": 0.7921805175588379, + "learning_rate": 2.6055167516168828e-05, + "loss": 0.1767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09494996815919876, + "step": 3975, + "valid_targets_mean": 1635.8, + "valid_targets_min": 661 + }, + { + "epoch": 3.2385022385022384, + "grad_norm": 0.7552177145973241, + "learning_rate": 2.6016480972397807e-05, + "loss": 0.1707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08284762501716614, + "step": 3980, + "valid_targets_mean": 1420.2, + "valid_targets_min": 826 + }, + { + "epoch": 3.2425722425722427, + "grad_norm": 0.768222331032816, + "learning_rate": 2.5977769661489102e-05, + "loss": 0.166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07532330602407455, + "step": 3985, + "valid_targets_mean": 1399.9, + "valid_targets_min": 742 + }, + { + "epoch": 3.2466422466422467, + "grad_norm": 0.7896098768251143, + "learning_rate": 2.5939033742799692e-05, + "loss": 0.1753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06821978092193604, + "step": 3990, + "valid_targets_mean": 1309.9, + "valid_targets_min": 718 + }, + { + "epoch": 3.2507122507122506, + "grad_norm": 0.7418241829283232, + "learning_rate": 2.5900273375787898e-05, + "loss": 0.1737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0990295559167862, + "step": 3995, + "valid_targets_mean": 1729.4, + "valid_targets_min": 764 + }, + { + "epoch": 3.254782254782255, + "grad_norm": 0.6949460534292148, + "learning_rate": 2.586148872001265e-05, + "loss": 0.159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08115358650684357, + "step": 4000, + "valid_targets_mean": 1651.4, + "valid_targets_min": 814 + }, + { + "epoch": 3.258852258852259, + "grad_norm": 0.8287359657436814, + "learning_rate": 2.5822679935132876e-05, + "loss": 0.1777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06513510644435883, + "step": 4005, + "valid_targets_mean": 1192.0, + "valid_targets_min": 610 + }, + { + "epoch": 3.2629222629222627, + "grad_norm": 0.7416322068860572, + "learning_rate": 2.578384718090685e-05, + "loss": 0.163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09329591691493988, + "step": 4010, + "valid_targets_mean": 1879.0, + "valid_targets_min": 817 + }, + { + "epoch": 3.266992266992267, + "grad_norm": 0.6590970920655556, + "learning_rate": 2.5744990617191486e-05, + "loss": 0.1659, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.075216144323349, + "step": 4015, + "valid_targets_mean": 1500.4, + "valid_targets_min": 837 + }, + { + "epoch": 3.271062271062271, + "grad_norm": 0.7902390874577276, + "learning_rate": 2.5706110403941724e-05, + "loss": 0.1701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11390472203493118, + "step": 4020, + "valid_targets_mean": 1661.8, + "valid_targets_min": 593 + }, + { + "epoch": 3.2751322751322753, + "grad_norm": 0.6950850372161688, + "learning_rate": 2.5667206701209876e-05, + "loss": 0.1649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08275754749774933, + "step": 4025, + "valid_targets_mean": 1537.8, + "valid_targets_min": 704 + }, + { + "epoch": 3.2792022792022792, + "grad_norm": 0.7394050894613096, + "learning_rate": 2.562827966914492e-05, + "loss": 0.1611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06054488569498062, + "step": 4030, + "valid_targets_mean": 1227.0, + "valid_targets_min": 722 + }, + { + "epoch": 3.283272283272283, + "grad_norm": 0.6772775941130794, + "learning_rate": 2.5589329467991885e-05, + "loss": 0.1581, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07103030383586884, + "step": 4035, + "valid_targets_mean": 1364.5, + "valid_targets_min": 642 + }, + { + "epoch": 3.2873422873422875, + "grad_norm": 0.7940146053459805, + "learning_rate": 2.555035625809118e-05, + "loss": 0.1558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07808074355125427, + "step": 4040, + "valid_targets_mean": 1358.5, + "valid_targets_min": 518 + }, + { + "epoch": 3.2914122914122914, + "grad_norm": 0.7802294896653876, + "learning_rate": 2.5511360199877934e-05, + "loss": 0.1729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10007496178150177, + "step": 4045, + "valid_targets_mean": 1590.6, + "valid_targets_min": 728 + }, + { + "epoch": 3.2954822954822953, + "grad_norm": 0.7087462700416979, + "learning_rate": 2.5472341453881316e-05, + "loss": 0.1702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1003953143954277, + "step": 4050, + "valid_targets_mean": 1791.4, + "valid_targets_min": 820 + }, + { + "epoch": 3.2995522995522997, + "grad_norm": 0.7315742027165163, + "learning_rate": 2.543330018072389e-05, + "loss": 0.1718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09038805961608887, + "step": 4055, + "valid_targets_mean": 1470.8, + "valid_targets_min": 818 + }, + { + "epoch": 3.3036223036223036, + "grad_norm": 0.7849182759993877, + "learning_rate": 2.5394236541120978e-05, + "loss": 0.1585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06879038363695145, + "step": 4060, + "valid_targets_mean": 1278.6, + "valid_targets_min": 625 + }, + { + "epoch": 3.3076923076923075, + "grad_norm": 0.6965689168981399, + "learning_rate": 2.5355150695879952e-05, + "loss": 0.1639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1073441207408905, + "step": 4065, + "valid_targets_mean": 2313.5, + "valid_targets_min": 790 + }, + { + "epoch": 3.311762311762312, + "grad_norm": 0.7410314939380847, + "learning_rate": 2.5316042805899616e-05, + "loss": 0.1712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11727465689182281, + "step": 4070, + "valid_targets_mean": 2231.9, + "valid_targets_min": 1335 + }, + { + "epoch": 3.3158323158323157, + "grad_norm": 0.6764884077292488, + "learning_rate": 2.5276913032169485e-05, + "loss": 0.1645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09181191027164459, + "step": 4075, + "valid_targets_mean": 1729.5, + "valid_targets_min": 719 + }, + { + "epoch": 3.3199023199023197, + "grad_norm": 0.7670282548657762, + "learning_rate": 2.52377615357692e-05, + "loss": 0.156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06349903345108032, + "step": 4080, + "valid_targets_mean": 1222.9, + "valid_targets_min": 697 + }, + { + "epoch": 3.323972323972324, + "grad_norm": 0.7717936984864601, + "learning_rate": 2.5198588477867806e-05, + "loss": 0.158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09327976405620575, + "step": 4085, + "valid_targets_mean": 1521.1, + "valid_targets_min": 998 + }, + { + "epoch": 3.328042328042328, + "grad_norm": 0.7083051922595436, + "learning_rate": 2.515939401972311e-05, + "loss": 0.159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08291327953338623, + "step": 4090, + "valid_targets_mean": 1693.9, + "valid_targets_min": 629 + }, + { + "epoch": 3.3321123321123323, + "grad_norm": 0.7259684658670004, + "learning_rate": 2.5120178322681003e-05, + "loss": 0.1615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0862584039568901, + "step": 4095, + "valid_targets_mean": 1630.2, + "valid_targets_min": 1078 + }, + { + "epoch": 3.336182336182336, + "grad_norm": 0.6785358648236004, + "learning_rate": 2.5080941548174825e-05, + "loss": 0.1595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07994429767131805, + "step": 4100, + "valid_targets_mean": 1843.0, + "valid_targets_min": 1075 + }, + { + "epoch": 3.34025234025234, + "grad_norm": 0.7835115275000952, + "learning_rate": 2.5041683857724676e-05, + "loss": 0.1646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09122762084007263, + "step": 4105, + "valid_targets_mean": 1848.2, + "valid_targets_min": 1317 + }, + { + "epoch": 3.3443223443223444, + "grad_norm": 0.7678656781444083, + "learning_rate": 2.5002405412936748e-05, + "loss": 0.1569, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09253916144371033, + "step": 4110, + "valid_targets_mean": 1727.4, + "valid_targets_min": 957 + }, + { + "epoch": 3.3483923483923483, + "grad_norm": 0.7652220167507279, + "learning_rate": 2.4963106375502673e-05, + "loss": 0.1606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0644087865948677, + "step": 4115, + "valid_targets_mean": 1115.4, + "valid_targets_min": 643 + }, + { + "epoch": 3.3524623524623527, + "grad_norm": 0.749831847944443, + "learning_rate": 2.492378690719887e-05, + "loss": 0.1639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08243843168020248, + "step": 4120, + "valid_targets_mean": 1547.2, + "valid_targets_min": 1111 + }, + { + "epoch": 3.3565323565323566, + "grad_norm": 0.7150068797955984, + "learning_rate": 2.4884447169885855e-05, + "loss": 0.1519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07063218951225281, + "step": 4125, + "valid_targets_mean": 1450.0, + "valid_targets_min": 705 + }, + { + "epoch": 3.3606023606023605, + "grad_norm": 0.8508456830574319, + "learning_rate": 2.4845087325507557e-05, + "loss": 0.1572, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08344951272010803, + "step": 4130, + "valid_targets_mean": 1539.0, + "valid_targets_min": 1288 + }, + { + "epoch": 3.364672364672365, + "grad_norm": 0.75421710601425, + "learning_rate": 2.4805707536090708e-05, + "loss": 0.1478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0741521492600441, + "step": 4135, + "valid_targets_mean": 1321.2, + "valid_targets_min": 753 + }, + { + "epoch": 3.3687423687423688, + "grad_norm": 0.737658592772203, + "learning_rate": 2.476630796374413e-05, + "loss": 0.159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07522574812173843, + "step": 4140, + "valid_targets_mean": 1480.0, + "valid_targets_min": 817 + }, + { + "epoch": 3.3728123728123727, + "grad_norm": 0.7186314188969304, + "learning_rate": 2.4726888770658103e-05, + "loss": 0.1536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06374834477901459, + "step": 4145, + "valid_targets_mean": 1293.9, + "valid_targets_min": 974 + }, + { + "epoch": 3.376882376882377, + "grad_norm": 0.7633726706709022, + "learning_rate": 2.4687450119103637e-05, + "loss": 0.1579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08639413118362427, + "step": 4150, + "valid_targets_mean": 1529.2, + "valid_targets_min": 657 + }, + { + "epoch": 3.380952380952381, + "grad_norm": 1.1964337610025615, + "learning_rate": 2.464799217143188e-05, + "loss": 0.1559, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0945994108915329, + "step": 4155, + "valid_targets_mean": 1678.0, + "valid_targets_min": 880 + }, + { + "epoch": 3.385022385022385, + "grad_norm": 0.7985356155534645, + "learning_rate": 2.46085150900734e-05, + "loss": 0.1614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09271052479743958, + "step": 4160, + "valid_targets_mean": 1609.4, + "valid_targets_min": 1050 + }, + { + "epoch": 3.389092389092389, + "grad_norm": 1.0437056142019698, + "learning_rate": 2.4569019037537525e-05, + "loss": 0.1538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07413659244775772, + "step": 4165, + "valid_targets_mean": 1449.0, + "valid_targets_min": 682 + }, + { + "epoch": 3.393162393162393, + "grad_norm": 0.7535290857067112, + "learning_rate": 2.4529504176411685e-05, + "loss": 0.1568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07152436673641205, + "step": 4170, + "valid_targets_mean": 1344.0, + "valid_targets_min": 703 + }, + { + "epoch": 3.397232397232397, + "grad_norm": 0.805609957537731, + "learning_rate": 2.448997066936073e-05, + "loss": 0.1672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07804272323846817, + "step": 4175, + "valid_targets_mean": 1469.4, + "valid_targets_min": 934 + }, + { + "epoch": 3.4013024013024014, + "grad_norm": 0.9417672703147607, + "learning_rate": 2.445041867912629e-05, + "loss": 0.1694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11873181909322739, + "step": 4180, + "valid_targets_mean": 1829.1, + "valid_targets_min": 734 + }, + { + "epoch": 3.4053724053724053, + "grad_norm": 1.044289943148221, + "learning_rate": 2.4410848368526053e-05, + "loss": 0.157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10154280066490173, + "step": 4185, + "valid_targets_mean": 2010.8, + "valid_targets_min": 1437 + }, + { + "epoch": 3.4094424094424096, + "grad_norm": 0.7104274369527875, + "learning_rate": 2.4371259900453126e-05, + "loss": 0.158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.060282643884420395, + "step": 4190, + "valid_targets_mean": 1405.2, + "valid_targets_min": 551 + }, + { + "epoch": 3.4135124135124135, + "grad_norm": 0.8437670408273207, + "learning_rate": 2.433165343787538e-05, + "loss": 0.164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10241234302520752, + "step": 4195, + "valid_targets_mean": 1817.6, + "valid_targets_min": 931 + }, + { + "epoch": 3.4175824175824174, + "grad_norm": 0.726473705947651, + "learning_rate": 2.4292029143834756e-05, + "loss": 0.1523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07595465332269669, + "step": 4200, + "valid_targets_mean": 1599.1, + "valid_targets_min": 904 + }, + { + "epoch": 3.421652421652422, + "grad_norm": 0.7469567163494483, + "learning_rate": 2.425238718144659e-05, + "loss": 0.1507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08603636920452118, + "step": 4205, + "valid_targets_mean": 1774.8, + "valid_targets_min": 1158 + }, + { + "epoch": 3.4257224257224257, + "grad_norm": 0.8033287596020903, + "learning_rate": 2.4212727713898948e-05, + "loss": 0.148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07098281383514404, + "step": 4210, + "valid_targets_mean": 1229.2, + "valid_targets_min": 756 + }, + { + "epoch": 3.42979242979243, + "grad_norm": 0.8172817454896187, + "learning_rate": 2.417305090445198e-05, + "loss": 0.164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08913031220436096, + "step": 4215, + "valid_targets_mean": 1536.9, + "valid_targets_min": 854 + }, + { + "epoch": 3.433862433862434, + "grad_norm": 0.7239421409227166, + "learning_rate": 2.4133356916437202e-05, + "loss": 0.1581, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0854625403881073, + "step": 4220, + "valid_targets_mean": 1739.5, + "valid_targets_min": 1462 + }, + { + "epoch": 3.437932437932438, + "grad_norm": 0.7197452670352181, + "learning_rate": 2.4093645913256855e-05, + "loss": 0.1544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06945577263832092, + "step": 4225, + "valid_targets_mean": 1475.9, + "valid_targets_min": 944 + }, + { + "epoch": 3.442002442002442, + "grad_norm": 0.7090142042404015, + "learning_rate": 2.405391805838322e-05, + "loss": 0.1531, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07137700915336609, + "step": 4230, + "valid_targets_mean": 1482.6, + "valid_targets_min": 1227 + }, + { + "epoch": 3.446072446072446, + "grad_norm": 0.7513863866998308, + "learning_rate": 2.401417351535795e-05, + "loss": 0.1573, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06505010277032852, + "step": 4235, + "valid_targets_mean": 1276.5, + "valid_targets_min": 740 + }, + { + "epoch": 3.45014245014245, + "grad_norm": 0.7487725404958699, + "learning_rate": 2.3974412447791403e-05, + "loss": 0.1584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09261137992143631, + "step": 4240, + "valid_targets_mean": 2024.8, + "valid_targets_min": 881 + }, + { + "epoch": 3.4542124542124544, + "grad_norm": 0.8187666821446936, + "learning_rate": 2.3934635019361955e-05, + "loss": 0.1561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08315032720565796, + "step": 4245, + "valid_targets_mean": 1288.6, + "valid_targets_min": 636 + }, + { + "epoch": 3.4582824582824583, + "grad_norm": 0.753634047230146, + "learning_rate": 2.3894841393815323e-05, + "loss": 0.1521, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.055352650582790375, + "step": 4250, + "valid_targets_mean": 1103.9, + "valid_targets_min": 814 + }, + { + "epoch": 3.462352462352462, + "grad_norm": 0.8079463943992531, + "learning_rate": 2.3855031734963925e-05, + "loss": 0.1473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07735898345708847, + "step": 4255, + "valid_targets_mean": 1392.9, + "valid_targets_min": 820 + }, + { + "epoch": 3.4664224664224665, + "grad_norm": 0.7707769218886613, + "learning_rate": 2.3815206206686144e-05, + "loss": 0.154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06932565569877625, + "step": 4260, + "valid_targets_mean": 1392.4, + "valid_targets_min": 661 + }, + { + "epoch": 3.4704924704924704, + "grad_norm": 0.7958744117773525, + "learning_rate": 2.3775364972925737e-05, + "loss": 0.1574, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07428084313869476, + "step": 4265, + "valid_targets_mean": 1441.8, + "valid_targets_min": 728 + }, + { + "epoch": 3.4745624745624744, + "grad_norm": 0.7647293162181219, + "learning_rate": 2.3735508197691068e-05, + "loss": 0.1493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07184837758541107, + "step": 4270, + "valid_targets_mean": 1413.5, + "valid_targets_min": 692 + }, + { + "epoch": 3.4786324786324787, + "grad_norm": 0.760684869364684, + "learning_rate": 2.369563604505451e-05, + "loss": 0.1566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07639266550540924, + "step": 4275, + "valid_targets_mean": 1323.5, + "valid_targets_min": 840 + }, + { + "epoch": 3.4827024827024826, + "grad_norm": 0.785131473550303, + "learning_rate": 2.365574867915172e-05, + "loss": 0.1568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07727733254432678, + "step": 4280, + "valid_targets_mean": 1488.4, + "valid_targets_min": 950 + }, + { + "epoch": 3.4867724867724865, + "grad_norm": 0.712055390091951, + "learning_rate": 2.361584626418099e-05, + "loss": 0.1509, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08490264415740967, + "step": 4285, + "valid_targets_mean": 1935.9, + "valid_targets_min": 878 + }, + { + "epoch": 3.490842490842491, + "grad_norm": 0.8293965286613701, + "learning_rate": 2.3575928964402558e-05, + "loss": 0.1579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08114030957221985, + "step": 4290, + "valid_targets_mean": 1502.1, + "valid_targets_min": 836 + }, + { + "epoch": 3.494912494912495, + "grad_norm": 0.7983467061857048, + "learning_rate": 2.353599694413794e-05, + "loss": 0.1521, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08377361297607422, + "step": 4295, + "valid_targets_mean": 1535.1, + "valid_targets_min": 1013 + }, + { + "epoch": 3.498982498982499, + "grad_norm": 0.7795845542928671, + "learning_rate": 2.349605036776925e-05, + "loss": 0.1579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06817035377025604, + "step": 4300, + "valid_targets_mean": 1346.4, + "valid_targets_min": 748 + }, + { + "epoch": 3.503052503052503, + "grad_norm": 0.9206942614110478, + "learning_rate": 2.3456089399738514e-05, + "loss": 0.1538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.079506516456604, + "step": 4305, + "valid_targets_mean": 1543.9, + "valid_targets_min": 729 + }, + { + "epoch": 3.5071225071225074, + "grad_norm": 0.7302785723268027, + "learning_rate": 2.3416114204547e-05, + "loss": 0.1637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07028716802597046, + "step": 4310, + "valid_targets_mean": 1541.9, + "valid_targets_min": 1219 + }, + { + "epoch": 3.5111925111925113, + "grad_norm": 0.7112489536142887, + "learning_rate": 2.3376124946754574e-05, + "loss": 0.155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07776269316673279, + "step": 4315, + "valid_targets_mean": 1632.8, + "valid_targets_min": 916 + }, + { + "epoch": 3.515262515262515, + "grad_norm": 0.7261049103300148, + "learning_rate": 2.3336121790978955e-05, + "loss": 0.1547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06957513093948364, + "step": 4320, + "valid_targets_mean": 1349.9, + "valid_targets_min": 646 + }, + { + "epoch": 3.5193325193325196, + "grad_norm": 0.7903987711410705, + "learning_rate": 2.3296104901895074e-05, + "loss": 0.1508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08000437915325165, + "step": 4325, + "valid_targets_mean": 1383.0, + "valid_targets_min": 906 + }, + { + "epoch": 3.5234025234025235, + "grad_norm": 0.7622357569514845, + "learning_rate": 2.3256074444234437e-05, + "loss": 0.1479, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07310856133699417, + "step": 4330, + "valid_targets_mean": 1577.5, + "valid_targets_min": 848 + }, + { + "epoch": 3.5274725274725274, + "grad_norm": 0.8743060398490866, + "learning_rate": 2.3216030582784358e-05, + "loss": 0.1597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07247571647167206, + "step": 4335, + "valid_targets_mean": 1183.5, + "valid_targets_min": 692 + }, + { + "epoch": 3.5315425315425317, + "grad_norm": 0.7953087241662045, + "learning_rate": 2.317597348238735e-05, + "loss": 0.1468, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06898859143257141, + "step": 4340, + "valid_targets_mean": 1373.9, + "valid_targets_min": 821 + }, + { + "epoch": 3.5356125356125356, + "grad_norm": 0.7438998227523352, + "learning_rate": 2.3135903307940425e-05, + "loss": 0.1473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06783884018659592, + "step": 4345, + "valid_targets_mean": 1428.1, + "valid_targets_min": 1013 + }, + { + "epoch": 3.5396825396825395, + "grad_norm": 0.7403466593709432, + "learning_rate": 2.3095820224394418e-05, + "loss": 0.1497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06895196437835693, + "step": 4350, + "valid_targets_mean": 1420.9, + "valid_targets_min": 781 + }, + { + "epoch": 3.543752543752544, + "grad_norm": 0.7776344239197351, + "learning_rate": 2.305572439675329e-05, + "loss": 0.163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08171670138835907, + "step": 4355, + "valid_targets_mean": 1255.1, + "valid_targets_min": 548 + }, + { + "epoch": 3.547822547822548, + "grad_norm": 0.7334294368492669, + "learning_rate": 2.3015615990073483e-05, + "loss": 0.1475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0776100903749466, + "step": 4360, + "valid_targets_mean": 1411.0, + "valid_targets_min": 694 + }, + { + "epoch": 3.5518925518925517, + "grad_norm": 0.7080206142881729, + "learning_rate": 2.29754951694632e-05, + "loss": 0.156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0899009108543396, + "step": 4365, + "valid_targets_mean": 1973.8, + "valid_targets_min": 1030 + }, + { + "epoch": 3.555962555962556, + "grad_norm": 0.6615510612620584, + "learning_rate": 2.2935362100081767e-05, + "loss": 0.1414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.061547666788101196, + "step": 4370, + "valid_targets_mean": 1580.2, + "valid_targets_min": 682 + }, + { + "epoch": 3.56003256003256, + "grad_norm": 0.81839532426026, + "learning_rate": 2.2895216947138924e-05, + "loss": 0.145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07387113571166992, + "step": 4375, + "valid_targets_mean": 1425.4, + "valid_targets_min": 990 + }, + { + "epoch": 3.564102564102564, + "grad_norm": 0.7054362018903443, + "learning_rate": 2.285505987589415e-05, + "loss": 0.1498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.055615417659282684, + "step": 4380, + "valid_targets_mean": 1299.6, + "valid_targets_min": 639 + }, + { + "epoch": 3.5681725681725682, + "grad_norm": 0.8007028427332834, + "learning_rate": 2.281489105165599e-05, + "loss": 0.1478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0648592859506607, + "step": 4385, + "valid_targets_mean": 1458.8, + "valid_targets_min": 721 + }, + { + "epoch": 3.572242572242572, + "grad_norm": 0.8445299441357773, + "learning_rate": 2.277471063978137e-05, + "loss": 0.1504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07231980562210083, + "step": 4390, + "valid_targets_mean": 1329.8, + "valid_targets_min": 874 + }, + { + "epoch": 3.576312576312576, + "grad_norm": 0.6807816095455992, + "learning_rate": 2.2734518805674913e-05, + "loss": 0.153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07544272392988205, + "step": 4395, + "valid_targets_mean": 1559.9, + "valid_targets_min": 686 + }, + { + "epoch": 3.5803825803825804, + "grad_norm": 0.8452163681063782, + "learning_rate": 2.269431571478828e-05, + "loss": 0.1538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08027100563049316, + "step": 4400, + "valid_targets_mean": 1392.5, + "valid_targets_min": 713 + }, + { + "epoch": 3.5844525844525843, + "grad_norm": 0.7806355035255146, + "learning_rate": 2.2654101532619443e-05, + "loss": 0.152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06163328140974045, + "step": 4405, + "valid_targets_mean": 1209.0, + "valid_targets_min": 393 + }, + { + "epoch": 3.5885225885225887, + "grad_norm": 0.48984256846528007, + "learning_rate": 2.2613876424712052e-05, + "loss": 0.1305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04831802099943161, + "step": 4410, + "valid_targets_mean": 2470.8, + "valid_targets_min": 816 + }, + { + "epoch": 3.5925925925925926, + "grad_norm": 0.39618952222130516, + "learning_rate": 2.257364055665473e-05, + "loss": 0.0913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04768257588148117, + "step": 4415, + "valid_targets_mean": 3278.9, + "valid_targets_min": 2461 + }, + { + "epoch": 3.596662596662597, + "grad_norm": 0.39078618847965113, + "learning_rate": 2.2533394094080397e-05, + "loss": 0.0877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.048612505197525024, + "step": 4420, + "valid_targets_mean": 3783.6, + "valid_targets_min": 2571 + }, + { + "epoch": 3.600732600732601, + "grad_norm": 0.3479538454031919, + "learning_rate": 2.2493137202665566e-05, + "loss": 0.1058, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.046523869037628174, + "step": 4425, + "valid_targets_mean": 3905.0, + "valid_targets_min": 3179 + }, + { + "epoch": 3.6048026048026047, + "grad_norm": 0.40917149997042873, + "learning_rate": 2.2452870048129707e-05, + "loss": 0.0847, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.038769036531448364, + "step": 4430, + "valid_targets_mean": 3000.1, + "valid_targets_min": 943 + }, + { + "epoch": 3.608872608872609, + "grad_norm": 0.7387439093745595, + "learning_rate": 2.241259279623453e-05, + "loss": 0.1063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06168878823518753, + "step": 4435, + "valid_targets_mean": 974.8, + "valid_targets_min": 525 + }, + { + "epoch": 3.612942612942613, + "grad_norm": 0.43905368150585783, + "learning_rate": 2.2372305612783305e-05, + "loss": 0.1354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04938036948442459, + "step": 4440, + "valid_targets_mean": 2743.8, + "valid_targets_min": 954 + }, + { + "epoch": 3.617012617012617, + "grad_norm": 0.4689396969033086, + "learning_rate": 2.233200866362019e-05, + "loss": 0.094, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.060613177716732025, + "step": 4445, + "valid_targets_mean": 2744.4, + "valid_targets_min": 718 + }, + { + "epoch": 3.6210826210826212, + "grad_norm": 0.3460122217852074, + "learning_rate": 2.2291702114629542e-05, + "loss": 0.1055, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0620415098965168, + "step": 4450, + "valid_targets_mean": 3279.1, + "valid_targets_min": 540 + }, + { + "epoch": 3.625152625152625, + "grad_norm": 0.2918413867985773, + "learning_rate": 2.225138613173524e-05, + "loss": 0.1076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.033721111714839935, + "step": 4455, + "valid_targets_mean": 4515.5, + "valid_targets_min": 1008 + }, + { + "epoch": 3.629222629222629, + "grad_norm": 0.3411037126787543, + "learning_rate": 2.2211060880899996e-05, + "loss": 0.0876, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04788324981927872, + "step": 4460, + "valid_targets_mean": 3701.6, + "valid_targets_min": 818 + }, + { + "epoch": 3.6332926332926334, + "grad_norm": 0.29653848428471286, + "learning_rate": 2.217072652812468e-05, + "loss": 0.0905, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.036281127482652664, + "step": 4465, + "valid_targets_mean": 3858.5, + "valid_targets_min": 1006 + }, + { + "epoch": 3.6373626373626373, + "grad_norm": 0.5325825065869296, + "learning_rate": 2.213038323944761e-05, + "loss": 0.1021, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0998295247554779, + "step": 4470, + "valid_targets_mean": 2629.8, + "valid_targets_min": 614 + }, + { + "epoch": 3.6414326414326412, + "grad_norm": 0.37544688292236766, + "learning_rate": 2.209003118094392e-05, + "loss": 0.1219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03794893994927406, + "step": 4475, + "valid_targets_mean": 3368.0, + "valid_targets_min": 1997 + }, + { + "epoch": 3.6455026455026456, + "grad_norm": 0.5425961999646625, + "learning_rate": 2.2049670518724818e-05, + "loss": 0.0897, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04944489151239395, + "step": 4480, + "valid_targets_mean": 1644.2, + "valid_targets_min": 674 + }, + { + "epoch": 3.6495726495726495, + "grad_norm": 0.31434891274177323, + "learning_rate": 2.2009301418936945e-05, + "loss": 0.0828, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.038889069110155106, + "step": 4485, + "valid_targets_mean": 3700.4, + "valid_targets_min": 989 + }, + { + "epoch": 3.6536426536426534, + "grad_norm": 0.3810739437224756, + "learning_rate": 2.1968924047761665e-05, + "loss": 0.0787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03368036821484566, + "step": 4490, + "valid_targets_mean": 3537.8, + "valid_targets_min": 2616 + }, + { + "epoch": 3.6577126577126577, + "grad_norm": 0.43609678797054763, + "learning_rate": 2.1928538571414416e-05, + "loss": 0.1085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05374626815319061, + "step": 4495, + "valid_targets_mean": 2184.5, + "valid_targets_min": 714 + }, + { + "epoch": 3.6617826617826617, + "grad_norm": 0.4437529891094936, + "learning_rate": 2.1888145156143966e-05, + "loss": 0.0984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05205199122428894, + "step": 4500, + "valid_targets_mean": 2054.5, + "valid_targets_min": 893 + }, + { + "epoch": 3.665852665852666, + "grad_norm": 0.43380985224197094, + "learning_rate": 2.184774396823178e-05, + "loss": 0.1051, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0552193820476532, + "step": 4505, + "valid_targets_mean": 3681.5, + "valid_targets_min": 2353 + }, + { + "epoch": 3.66992266992267, + "grad_norm": 0.3799243340192024, + "learning_rate": 2.180733517399133e-05, + "loss": 0.0995, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.049977511167526245, + "step": 4510, + "valid_targets_mean": 3436.9, + "valid_targets_min": 1810 + }, + { + "epoch": 3.6739926739926743, + "grad_norm": 0.32571580848557247, + "learning_rate": 2.1766918939767394e-05, + "loss": 0.1345, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.036109715700149536, + "step": 4515, + "valid_targets_mean": 3740.8, + "valid_targets_min": 1955 + }, + { + "epoch": 3.678062678062678, + "grad_norm": 0.39033800490114573, + "learning_rate": 2.1726495431935364e-05, + "loss": 0.0864, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04185417294502258, + "step": 4520, + "valid_targets_mean": 3092.0, + "valid_targets_min": 711 + }, + { + "epoch": 3.682132682132682, + "grad_norm": 0.6397111709341369, + "learning_rate": 2.1686064816900587e-05, + "loss": 0.1228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07134007662534714, + "step": 4525, + "valid_targets_mean": 1687.1, + "valid_targets_min": 643 + }, + { + "epoch": 3.6862026862026864, + "grad_norm": 0.5232501844782532, + "learning_rate": 2.164562726109766e-05, + "loss": 0.1125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04346204549074173, + "step": 4530, + "valid_targets_mean": 2173.0, + "valid_targets_min": 538 + }, + { + "epoch": 3.6902726902726903, + "grad_norm": 0.39995472780169194, + "learning_rate": 2.1605182930989764e-05, + "loss": 0.0916, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03484119474887848, + "step": 4535, + "valid_targets_mean": 1954.0, + "valid_targets_min": 664 + }, + { + "epoch": 3.6943426943426942, + "grad_norm": 0.5523582831008328, + "learning_rate": 2.1564731993067958e-05, + "loss": 0.2142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10305885225534439, + "step": 4540, + "valid_targets_mean": 2692.1, + "valid_targets_min": 1180 + }, + { + "epoch": 3.6984126984126986, + "grad_norm": 0.43560416380796335, + "learning_rate": 2.1524274613850495e-05, + "loss": 0.1839, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.039621271193027496, + "step": 4545, + "valid_targets_mean": 2071.0, + "valid_targets_min": 756 + }, + { + "epoch": 3.7024827024827025, + "grad_norm": 0.35770234003575513, + "learning_rate": 2.1483810959882154e-05, + "loss": 0.1031, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04059296101331711, + "step": 4550, + "valid_targets_mean": 3086.0, + "valid_targets_min": 844 + }, + { + "epoch": 3.7065527065527064, + "grad_norm": 0.6970042913323712, + "learning_rate": 2.144334119773355e-05, + "loss": 0.1881, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12542730569839478, + "step": 4555, + "valid_targets_mean": 2081.2, + "valid_targets_min": 1146 + }, + { + "epoch": 3.7106227106227108, + "grad_norm": 0.31025486801351915, + "learning_rate": 2.1402865494000435e-05, + "loss": 0.1039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0449528843164444, + "step": 4560, + "valid_targets_mean": 4426.4, + "valid_targets_min": 3507 + }, + { + "epoch": 3.7146927146927147, + "grad_norm": 0.3347224349640861, + "learning_rate": 2.1362384015303002e-05, + "loss": 0.0879, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04645727574825287, + "step": 4565, + "valid_targets_mean": 4391.1, + "valid_targets_min": 3483 + }, + { + "epoch": 3.7187627187627186, + "grad_norm": 0.37334581619913065, + "learning_rate": 2.1321896928285256e-05, + "loss": 0.1023, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.048772167414426804, + "step": 4570, + "valid_targets_mean": 3939.6, + "valid_targets_min": 784 + }, + { + "epoch": 3.722832722832723, + "grad_norm": 0.4447642934119185, + "learning_rate": 2.128140439961426e-05, + "loss": 0.0878, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.033333856612443924, + "step": 4575, + "valid_targets_mean": 1469.4, + "valid_targets_min": 779 + }, + { + "epoch": 3.726902726902727, + "grad_norm": 0.5372359551407455, + "learning_rate": 2.1240906595979488e-05, + "loss": 0.0905, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.039604753255844116, + "step": 4580, + "valid_targets_mean": 1845.1, + "valid_targets_min": 674 + }, + { + "epoch": 3.7309727309727307, + "grad_norm": 0.4637069162414348, + "learning_rate": 2.1200403684092112e-05, + "loss": 0.0839, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04348008334636688, + "step": 4585, + "valid_targets_mean": 2154.5, + "valid_targets_min": 712 + }, + { + "epoch": 3.735042735042735, + "grad_norm": 0.49301713165155914, + "learning_rate": 2.115989583068436e-05, + "loss": 0.0904, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04291559010744095, + "step": 4590, + "valid_targets_mean": 3409.4, + "valid_targets_min": 2422 + }, + { + "epoch": 3.739112739112739, + "grad_norm": 0.34908985580550017, + "learning_rate": 2.1119383202508793e-05, + "loss": 0.0769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.026736728847026825, + "step": 4595, + "valid_targets_mean": 2888.1, + "valid_targets_min": 576 + }, + { + "epoch": 3.743182743182743, + "grad_norm": 0.3051490004834012, + "learning_rate": 2.107886596633761e-05, + "loss": 0.1341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.022518891841173172, + "step": 4600, + "valid_targets_mean": 3605.9, + "valid_targets_min": 772 + }, + { + "epoch": 3.7472527472527473, + "grad_norm": 0.39215842357161595, + "learning_rate": 2.1038344288961995e-05, + "loss": 0.0736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03927973657846451, + "step": 4605, + "valid_targets_mean": 2694.1, + "valid_targets_min": 722 + }, + { + "epoch": 3.751322751322751, + "grad_norm": 0.3416371707837828, + "learning_rate": 2.099781833719142e-05, + "loss": 0.0758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04170195013284683, + "step": 4610, + "valid_targets_mean": 3837.2, + "valid_targets_min": 2788 + }, + { + "epoch": 3.7553927553927555, + "grad_norm": 0.3686746265338282, + "learning_rate": 2.095728827785294e-05, + "loss": 0.0796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03984172269701958, + "step": 4615, + "valid_targets_mean": 2829.4, + "valid_targets_min": 614 + }, + { + "epoch": 3.7594627594627594, + "grad_norm": 0.6031062567308154, + "learning_rate": 2.091675427779052e-05, + "loss": 0.1129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.125014990568161, + "step": 4620, + "valid_targets_mean": 1989.6, + "valid_targets_min": 608 + }, + { + "epoch": 3.763532763532764, + "grad_norm": 0.34464040068494, + "learning_rate": 2.0876216503864348e-05, + "loss": 0.0963, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03857950493693352, + "step": 4625, + "valid_targets_mean": 3851.4, + "valid_targets_min": 3150 + }, + { + "epoch": 3.7676027676027677, + "grad_norm": 0.521731284937972, + "learning_rate": 2.0835675122950155e-05, + "loss": 0.1206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0498424731194973, + "step": 4630, + "valid_targets_mean": 2037.9, + "valid_targets_min": 718 + }, + { + "epoch": 3.7716727716727716, + "grad_norm": 0.6148462798244299, + "learning_rate": 2.079513030193852e-05, + "loss": 0.0937, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04556414484977722, + "step": 4635, + "valid_targets_mean": 3679.2, + "valid_targets_min": 1333 + }, + { + "epoch": 3.775742775742776, + "grad_norm": 0.4489274468020415, + "learning_rate": 2.0754582207734163e-05, + "loss": 0.1115, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0359991192817688, + "step": 4640, + "valid_targets_mean": 1756.9, + "valid_targets_min": 693 + }, + { + "epoch": 3.77981277981278, + "grad_norm": 0.4685040453171337, + "learning_rate": 2.0714031007255308e-05, + "loss": 0.1217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.039639245718717575, + "step": 4645, + "valid_targets_mean": 1352.6, + "valid_targets_min": 803 + }, + { + "epoch": 3.7838827838827838, + "grad_norm": 0.38901622311150574, + "learning_rate": 2.0673476867432945e-05, + "loss": 0.0985, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07358896732330322, + "step": 4650, + "valid_targets_mean": 2522.5, + "valid_targets_min": 902 + }, + { + "epoch": 3.787952787952788, + "grad_norm": 0.45401232368813943, + "learning_rate": 2.0632919955210177e-05, + "loss": 0.1103, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.044406771659851074, + "step": 4655, + "valid_targets_mean": 3511.5, + "valid_targets_min": 2393 + }, + { + "epoch": 3.792022792022792, + "grad_norm": 0.33635740471339176, + "learning_rate": 2.0592360437541506e-05, + "loss": 0.0954, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03269575536251068, + "step": 4660, + "valid_targets_mean": 3454.1, + "valid_targets_min": 804 + }, + { + "epoch": 3.796092796092796, + "grad_norm": 0.3694812668687151, + "learning_rate": 2.055179848139217e-05, + "loss": 0.0927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04042567312717438, + "step": 4665, + "valid_targets_mean": 5271.5, + "valid_targets_min": 4147 + }, + { + "epoch": 3.8001628001628003, + "grad_norm": 0.3719991625867249, + "learning_rate": 2.0511234253737445e-05, + "loss": 0.081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04839537665247917, + "step": 4670, + "valid_targets_mean": 4163.8, + "valid_targets_min": 871 + }, + { + "epoch": 3.804232804232804, + "grad_norm": 0.38045962863863597, + "learning_rate": 2.047066792156195e-05, + "loss": 0.076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.027333011850714684, + "step": 4675, + "valid_targets_mean": 4198.2, + "valid_targets_min": 896 + }, + { + "epoch": 3.808302808302808, + "grad_norm": 0.34894815665055257, + "learning_rate": 2.0430099651858972e-05, + "loss": 0.0897, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.046752214431762695, + "step": 4680, + "valid_targets_mean": 3781.4, + "valid_targets_min": 2862 + }, + { + "epoch": 3.8123728123728124, + "grad_norm": 0.6275665055735403, + "learning_rate": 2.038952961162978e-05, + "loss": 0.109, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06374616920948029, + "step": 4685, + "valid_targets_mean": 1718.8, + "valid_targets_min": 497 + }, + { + "epoch": 3.8164428164428164, + "grad_norm": 0.40826142469048216, + "learning_rate": 2.034895796788292e-05, + "loss": 0.0907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05431363359093666, + "step": 4690, + "valid_targets_mean": 4016.9, + "valid_targets_min": 3217 + }, + { + "epoch": 3.8205128205128203, + "grad_norm": 0.4250331729338565, + "learning_rate": 2.030838488763355e-05, + "loss": 0.1007, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03880765289068222, + "step": 4695, + "valid_targets_mean": 2624.6, + "valid_targets_min": 901 + }, + { + "epoch": 3.8245828245828246, + "grad_norm": 0.39072073946366787, + "learning_rate": 2.0267810537902727e-05, + "loss": 0.0771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04679608345031738, + "step": 4700, + "valid_targets_mean": 3859.0, + "valid_targets_min": 2425 + }, + { + "epoch": 3.8286528286528285, + "grad_norm": 0.3928920752567127, + "learning_rate": 2.0227235085716754e-05, + "loss": 0.0994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06546978652477264, + "step": 4705, + "valid_targets_mean": 3524.6, + "valid_targets_min": 3017 + }, + { + "epoch": 3.832722832722833, + "grad_norm": 0.37248107631418687, + "learning_rate": 2.0186658698106445e-05, + "loss": 0.0862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.036713846027851105, + "step": 4710, + "valid_targets_mean": 3727.2, + "valid_targets_min": 3184 + }, + { + "epoch": 3.836792836792837, + "grad_norm": 0.6173637983811905, + "learning_rate": 2.0146081542106502e-05, + "loss": 0.0846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0795275866985321, + "step": 4715, + "valid_targets_mean": 2059.5, + "valid_targets_min": 478 + }, + { + "epoch": 3.840862840862841, + "grad_norm": 0.506439817469206, + "learning_rate": 2.010550378475475e-05, + "loss": 0.0888, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.050123006105422974, + "step": 4720, + "valid_targets_mean": 1881.8, + "valid_targets_min": 955 + }, + { + "epoch": 3.844932844932845, + "grad_norm": 0.5407218171773867, + "learning_rate": 2.006492559309152e-05, + "loss": 0.095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0604960098862648, + "step": 4725, + "valid_targets_mean": 1654.2, + "valid_targets_min": 959 + }, + { + "epoch": 3.849002849002849, + "grad_norm": 0.3651902043497858, + "learning_rate": 2.0024347134158915e-05, + "loss": 0.0999, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03338472917675972, + "step": 4730, + "valid_targets_mean": 2815.5, + "valid_targets_min": 748 + }, + { + "epoch": 3.8530728530728533, + "grad_norm": 0.4855059874008844, + "learning_rate": 1.9983768575000147e-05, + "loss": 0.0909, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03731710463762283, + "step": 4735, + "valid_targets_mean": 1247.0, + "valid_targets_min": 495 + }, + { + "epoch": 3.857142857142857, + "grad_norm": 0.47569854814679674, + "learning_rate": 1.9943190082658827e-05, + "loss": 0.1019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.031180473044514656, + "step": 4740, + "valid_targets_mean": 2142.4, + "valid_targets_min": 655 + }, + { + "epoch": 3.861212861212861, + "grad_norm": 0.6078700792608549, + "learning_rate": 1.9902611824178306e-05, + "loss": 0.1117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.088534876704216, + "step": 4745, + "valid_targets_mean": 2176.2, + "valid_targets_min": 521 + }, + { + "epoch": 3.8652828652828655, + "grad_norm": 0.3966917197512265, + "learning_rate": 1.986203396660097e-05, + "loss": 0.0864, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04920294135808945, + "step": 4750, + "valid_targets_mean": 3740.4, + "valid_targets_min": 2788 + }, + { + "epoch": 3.8693528693528694, + "grad_norm": 0.3078969308089153, + "learning_rate": 1.9821456676967552e-05, + "loss": 0.0758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03821399807929993, + "step": 4755, + "valid_targets_mean": 3481.5, + "valid_targets_min": 841 + }, + { + "epoch": 3.8734228734228733, + "grad_norm": 0.41592065293330216, + "learning_rate": 1.9780880122316434e-05, + "loss": 0.0881, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04670462757349014, + "step": 4760, + "valid_targets_mean": 2820.0, + "valid_targets_min": 2144 + }, + { + "epoch": 3.8774928774928776, + "grad_norm": 0.42102684627161313, + "learning_rate": 1.9740304469682987e-05, + "loss": 0.1147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05189353972673416, + "step": 4765, + "valid_targets_mean": 3554.1, + "valid_targets_min": 2619 + }, + { + "epoch": 3.8815628815628815, + "grad_norm": 0.3289199750652924, + "learning_rate": 1.9699729886098876e-05, + "loss": 0.0714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03820990025997162, + "step": 4770, + "valid_targets_mean": 3334.5, + "valid_targets_min": 619 + }, + { + "epoch": 3.8856328856328854, + "grad_norm": 0.3257554126268986, + "learning_rate": 1.965915653859135e-05, + "loss": 0.0887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.038173578679561615, + "step": 4775, + "valid_targets_mean": 4034.9, + "valid_targets_min": 486 + }, + { + "epoch": 3.88970288970289, + "grad_norm": 0.31496384696830865, + "learning_rate": 1.9618584594182576e-05, + "loss": 0.0871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.040112294256687164, + "step": 4780, + "valid_targets_mean": 3456.8, + "valid_targets_min": 995 + }, + { + "epoch": 3.8937728937728937, + "grad_norm": 0.3078455051176223, + "learning_rate": 1.957801421988894e-05, + "loss": 0.0722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.047052159905433655, + "step": 4785, + "valid_targets_mean": 4773.8, + "valid_targets_min": 3539 + }, + { + "epoch": 3.8978428978428976, + "grad_norm": 0.3723134741147702, + "learning_rate": 1.9537445582720385e-05, + "loss": 0.0946, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05127771943807602, + "step": 4790, + "valid_targets_mean": 4328.5, + "valid_targets_min": 3172 + }, + { + "epoch": 3.901912901912902, + "grad_norm": 0.306985867833895, + "learning_rate": 1.9496878849679667e-05, + "loss": 0.0781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02703310176730156, + "step": 4795, + "valid_targets_mean": 2120.1, + "valid_targets_min": 549 + }, + { + "epoch": 3.905982905982906, + "grad_norm": 0.3994634886270254, + "learning_rate": 1.9456314187761726e-05, + "loss": 0.1164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.053539011627435684, + "step": 4800, + "valid_targets_mean": 2216.2, + "valid_targets_min": 541 + }, + { + "epoch": 3.91005291005291, + "grad_norm": 0.3309405585861779, + "learning_rate": 1.941575176395298e-05, + "loss": 0.1041, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.029777104035019875, + "step": 4805, + "valid_targets_mean": 3338.9, + "valid_targets_min": 1938 + }, + { + "epoch": 3.914122914122914, + "grad_norm": 0.4467641072962703, + "learning_rate": 1.937519174523063e-05, + "loss": 0.0891, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12722966074943542, + "step": 4810, + "valid_targets_mean": 2226.6, + "valid_targets_min": 1167 + }, + { + "epoch": 3.918192918192918, + "grad_norm": 0.390118295374521, + "learning_rate": 1.9334634298561962e-05, + "loss": 0.0919, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.043785303831100464, + "step": 4815, + "valid_targets_mean": 3561.4, + "valid_targets_min": 2919 + }, + { + "epoch": 3.9222629222629224, + "grad_norm": 0.40458656901794954, + "learning_rate": 1.9294079590903697e-05, + "loss": 0.086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03705420717597008, + "step": 4820, + "valid_targets_mean": 1509.9, + "valid_targets_min": 559 + }, + { + "epoch": 3.9263329263329263, + "grad_norm": 0.525559021088802, + "learning_rate": 1.925352778920126e-05, + "loss": 0.09, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06773480027914047, + "step": 4825, + "valid_targets_mean": 3123.1, + "valid_targets_min": 883 + }, + { + "epoch": 3.9304029304029307, + "grad_norm": 0.7659028123265849, + "learning_rate": 1.9212979060388136e-05, + "loss": 0.1144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07741884142160416, + "step": 4830, + "valid_targets_mean": 1419.0, + "valid_targets_min": 337 + }, + { + "epoch": 3.9344729344729346, + "grad_norm": 0.37012249374521133, + "learning_rate": 1.917243357138514e-05, + "loss": 0.1177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03321508690714836, + "step": 4835, + "valid_targets_mean": 2953.9, + "valid_targets_min": 593 + }, + { + "epoch": 3.9385429385429385, + "grad_norm": 0.38243163663291685, + "learning_rate": 1.9131891489099745e-05, + "loss": 0.0949, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04113879054784775, + "step": 4840, + "valid_targets_mean": 3091.9, + "valid_targets_min": 529 + }, + { + "epoch": 3.942612942612943, + "grad_norm": 0.4160846401676222, + "learning_rate": 1.9091352980425427e-05, + "loss": 0.0951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05502070114016533, + "step": 4845, + "valid_targets_mean": 3593.4, + "valid_targets_min": 2556 + }, + { + "epoch": 3.9466829466829467, + "grad_norm": 0.44382246740561504, + "learning_rate": 1.9050818212240937e-05, + "loss": 0.0843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05704183131456375, + "step": 4850, + "valid_targets_mean": 3753.2, + "valid_targets_min": 3385 + }, + { + "epoch": 3.9507529507529506, + "grad_norm": 0.3788947152767503, + "learning_rate": 1.9010287351409617e-05, + "loss": 0.0822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03963714465498924, + "step": 4855, + "valid_targets_mean": 3442.1, + "valid_targets_min": 932 + }, + { + "epoch": 3.954822954822955, + "grad_norm": 0.6893550041691463, + "learning_rate": 1.896976056477874e-05, + "loss": 0.0852, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05745682120323181, + "step": 4860, + "valid_targets_mean": 1202.9, + "valid_targets_min": 598 + }, + { + "epoch": 3.958892958892959, + "grad_norm": 0.48501252893743335, + "learning_rate": 1.892923801917881e-05, + "loss": 0.0978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04939830303192139, + "step": 4865, + "valid_targets_mean": 3333.5, + "valid_targets_min": 1069 + }, + { + "epoch": 3.962962962962963, + "grad_norm": 1.5705058397396339, + "learning_rate": 1.888871988142285e-05, + "loss": 0.0818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.036230284720659256, + "step": 4870, + "valid_targets_mean": 1234.5, + "valid_targets_min": 680 + }, + { + "epoch": 3.967032967032967, + "grad_norm": 0.38273222211564595, + "learning_rate": 1.8848206318305762e-05, + "loss": 0.0806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.038350578397512436, + "step": 4875, + "valid_targets_mean": 2762.4, + "valid_targets_min": 774 + }, + { + "epoch": 3.971102971102971, + "grad_norm": 0.3525402431797138, + "learning_rate": 1.8807697496603604e-05, + "loss": 0.0942, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04092249274253845, + "step": 4880, + "valid_targets_mean": 3842.5, + "valid_targets_min": 720 + }, + { + "epoch": 3.975172975172975, + "grad_norm": 0.23643952371340574, + "learning_rate": 1.8767193583072917e-05, + "loss": 0.0754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03318733721971512, + "step": 4885, + "valid_targets_mean": 5590.6, + "valid_targets_min": 4657 + }, + { + "epoch": 3.9792429792429793, + "grad_norm": 0.34030143973987964, + "learning_rate": 1.8726694744450046e-05, + "loss": 0.081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04707394912838936, + "step": 4890, + "valid_targets_mean": 4327.4, + "valid_targets_min": 1320 + }, + { + "epoch": 3.9833129833129832, + "grad_norm": 0.3472447381234327, + "learning_rate": 1.868620114745043e-05, + "loss": 0.0863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04898809269070625, + "step": 4895, + "valid_targets_mean": 3547.8, + "valid_targets_min": 2463 + }, + { + "epoch": 3.987382987382987, + "grad_norm": 0.416132959419297, + "learning_rate": 1.8645712958767936e-05, + "loss": 0.1756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.031941697001457214, + "step": 4900, + "valid_targets_mean": 2903.5, + "valid_targets_min": 605 + }, + { + "epoch": 3.9914529914529915, + "grad_norm": 0.33752542112182116, + "learning_rate": 1.8605230345074187e-05, + "loss": 0.0793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.035623304545879364, + "step": 4905, + "valid_targets_mean": 3095.8, + "valid_targets_min": 1014 + }, + { + "epoch": 3.9955229955229954, + "grad_norm": 0.35702567633782445, + "learning_rate": 1.8564753473017815e-05, + "loss": 0.0926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04029485583305359, + "step": 4910, + "valid_targets_mean": 3526.4, + "valid_targets_min": 2807 + }, + { + "epoch": 3.9995929995929997, + "grad_norm": 0.4652040750909582, + "learning_rate": 1.8524282509223857e-05, + "loss": 0.1126, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09600470215082169, + "step": 4915, + "valid_targets_mean": 3006.5, + "valid_targets_min": 1214 + }, + { + "epoch": 4.003256003256003, + "grad_norm": 0.6419873740137033, + "learning_rate": 1.8483817620293002e-05, + "loss": 0.1742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15491032600402832, + "step": 4920, + "valid_targets_mean": 8029.9, + "valid_targets_min": 6142 + }, + { + "epoch": 4.007326007326007, + "grad_norm": 0.4261293015569112, + "learning_rate": 1.8443358972800943e-05, + "loss": 0.197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09373286366462708, + "step": 4925, + "valid_targets_mean": 6664.0, + "valid_targets_min": 5470 + }, + { + "epoch": 4.011396011396012, + "grad_norm": 0.43569183179763055, + "learning_rate": 1.8402906733297686e-05, + "loss": 0.1854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09796077013015747, + "step": 4930, + "valid_targets_mean": 7126.9, + "valid_targets_min": 5368 + }, + { + "epoch": 4.015466015466015, + "grad_norm": 0.3628222940332491, + "learning_rate": 1.836246106830684e-05, + "loss": 0.1787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08767811954021454, + "step": 4935, + "valid_targets_mean": 7634.9, + "valid_targets_min": 5614 + }, + { + "epoch": 4.0195360195360195, + "grad_norm": 0.41186264281524637, + "learning_rate": 1.832202214432497e-05, + "loss": 0.1851, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08911505341529846, + "step": 4940, + "valid_targets_mean": 8630.8, + "valid_targets_min": 6362 + }, + { + "epoch": 4.023606023606024, + "grad_norm": 0.39527021624678743, + "learning_rate": 1.828159012782087e-05, + "loss": 0.1879, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09319797903299332, + "step": 4945, + "valid_targets_mean": 7383.6, + "valid_targets_min": 5844 + }, + { + "epoch": 4.027676027676027, + "grad_norm": 0.3825769189458071, + "learning_rate": 1.824116518523492e-05, + "loss": 0.1958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1047782227396965, + "step": 4950, + "valid_targets_mean": 8017.0, + "valid_targets_min": 5283 + }, + { + "epoch": 4.031746031746032, + "grad_norm": 0.37851649415828814, + "learning_rate": 1.8200747482978358e-05, + "loss": 0.1838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07832890748977661, + "step": 4955, + "valid_targets_mean": 7051.1, + "valid_targets_min": 4135 + }, + { + "epoch": 4.035816035816036, + "grad_norm": 0.3899190146807035, + "learning_rate": 1.8160337187432637e-05, + "loss": 0.1778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07594281435012817, + "step": 4960, + "valid_targets_mean": 5370.4, + "valid_targets_min": 3919 + }, + { + "epoch": 4.0398860398860394, + "grad_norm": 0.3710326532028934, + "learning_rate": 1.8119934464948713e-05, + "loss": 0.1631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07691587507724762, + "step": 4965, + "valid_targets_mean": 6063.6, + "valid_targets_min": 4863 + }, + { + "epoch": 4.043956043956044, + "grad_norm": 0.425559143325838, + "learning_rate": 1.8079539481846366e-05, + "loss": 0.1829, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08631320297718048, + "step": 4970, + "valid_targets_mean": 7001.2, + "valid_targets_min": 6020 + }, + { + "epoch": 4.048026048026048, + "grad_norm": 0.3800530976695048, + "learning_rate": 1.8039152404413513e-05, + "loss": 0.1724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08500252664089203, + "step": 4975, + "valid_targets_mean": 6181.9, + "valid_targets_min": 4669 + }, + { + "epoch": 4.0520960520960525, + "grad_norm": 0.37536923845620823, + "learning_rate": 1.7998773398905536e-05, + "loss": 0.1768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07884109765291214, + "step": 4980, + "valid_targets_mean": 6250.2, + "valid_targets_min": 4900 + }, + { + "epoch": 4.056166056166056, + "grad_norm": 0.369134672032628, + "learning_rate": 1.795840263154457e-05, + "loss": 0.1731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0790032371878624, + "step": 4985, + "valid_targets_mean": 7156.4, + "valid_targets_min": 5323 + }, + { + "epoch": 4.06023606023606, + "grad_norm": 0.3823541536796184, + "learning_rate": 1.7918040268518863e-05, + "loss": 0.1729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08844245970249176, + "step": 4990, + "valid_targets_mean": 7215.8, + "valid_targets_min": 4720 + }, + { + "epoch": 4.064306064306065, + "grad_norm": 0.3864166564460856, + "learning_rate": 1.7877686475982045e-05, + "loss": 0.1791, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08641092479228973, + "step": 4995, + "valid_targets_mean": 6550.8, + "valid_targets_min": 5739 + }, + { + "epoch": 4.068376068376068, + "grad_norm": 0.3793296622743899, + "learning_rate": 1.783734142005248e-05, + "loss": 0.1731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0891452431678772, + "step": 5000, + "valid_targets_mean": 6726.8, + "valid_targets_min": 4540 + }, + { + "epoch": 4.0724460724460725, + "grad_norm": 0.4723995482127612, + "learning_rate": 1.7797005266812556e-05, + "loss": 0.1789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0973764955997467, + "step": 5005, + "valid_targets_mean": 4573.4, + "valid_targets_min": 1039 + }, + { + "epoch": 4.076516076516077, + "grad_norm": 0.41477705618237604, + "learning_rate": 1.7756678182308018e-05, + "loss": 0.1895, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08957439661026001, + "step": 5010, + "valid_targets_mean": 6430.0, + "valid_targets_min": 5412 + }, + { + "epoch": 4.08058608058608, + "grad_norm": 0.6352736728460759, + "learning_rate": 1.7716360332547286e-05, + "loss": 0.1702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.025655508041381836, + "step": 5015, + "valid_targets_mean": 583.1, + "valid_targets_min": 158 + }, + { + "epoch": 4.084656084656085, + "grad_norm": 0.3748310581528748, + "learning_rate": 1.7676051883500746e-05, + "loss": 0.1626, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09119533747434616, + "step": 5020, + "valid_targets_mean": 8230.9, + "valid_targets_min": 5061 + }, + { + "epoch": 4.088726088726089, + "grad_norm": 0.4146647677224428, + "learning_rate": 1.76357530011001e-05, + "loss": 0.1744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09562458842992783, + "step": 5025, + "valid_targets_mean": 7238.9, + "valid_targets_min": 5112 + }, + { + "epoch": 4.0927960927960925, + "grad_norm": 0.3895491315857082, + "learning_rate": 1.7595463851237666e-05, + "loss": 0.1733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07687856256961823, + "step": 5030, + "valid_targets_mean": 6322.8, + "valid_targets_min": 4867 + }, + { + "epoch": 4.096866096866097, + "grad_norm": 0.3535398598607784, + "learning_rate": 1.7555184599765697e-05, + "loss": 0.1787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08618101477622986, + "step": 5035, + "valid_targets_mean": 7887.0, + "valid_targets_min": 6004 + }, + { + "epoch": 4.100936100936101, + "grad_norm": 0.3398185270105302, + "learning_rate": 1.7514915412495696e-05, + "loss": 0.1703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07757728546857834, + "step": 5040, + "valid_targets_mean": 7090.2, + "valid_targets_min": 4917 + }, + { + "epoch": 4.105006105006105, + "grad_norm": 0.3735006074847723, + "learning_rate": 1.7474656455197746e-05, + "loss": 0.1651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08108969032764435, + "step": 5045, + "valid_targets_mean": 7441.9, + "valid_targets_min": 4202 + }, + { + "epoch": 4.109076109076109, + "grad_norm": 0.3963767640674053, + "learning_rate": 1.7434407893599803e-05, + "loss": 0.1518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08019435405731201, + "step": 5050, + "valid_targets_mean": 7901.8, + "valid_targets_min": 5060 + }, + { + "epoch": 4.113146113146113, + "grad_norm": 0.3279720037805319, + "learning_rate": 1.7394169893387044e-05, + "loss": 0.1535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07386630773544312, + "step": 5055, + "valid_targets_mean": 8557.6, + "valid_targets_min": 6331 + }, + { + "epoch": 4.117216117216117, + "grad_norm": 0.43210024185743623, + "learning_rate": 1.735394262020115e-05, + "loss": 0.1791, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08315213024616241, + "step": 5060, + "valid_targets_mean": 4976.4, + "valid_targets_min": 2847 + }, + { + "epoch": 4.121286121286121, + "grad_norm": 0.47219602019627277, + "learning_rate": 1.7313726239639662e-05, + "loss": 0.1603, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09470256417989731, + "step": 5065, + "valid_targets_mean": 7129.1, + "valid_targets_min": 5331 + }, + { + "epoch": 4.1253561253561255, + "grad_norm": 0.43079423430747293, + "learning_rate": 1.7273520917255277e-05, + "loss": 0.1826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09296290576457977, + "step": 5070, + "valid_targets_mean": 6864.0, + "valid_targets_min": 4697 + }, + { + "epoch": 4.12942612942613, + "grad_norm": 0.43490675675482465, + "learning_rate": 1.7233326818555162e-05, + "loss": 0.189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0889597162604332, + "step": 5075, + "valid_targets_mean": 6631.9, + "valid_targets_min": 4623 + }, + { + "epoch": 4.133496133496133, + "grad_norm": 0.3756096966590715, + "learning_rate": 1.7193144109000286e-05, + "loss": 0.1814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08131799101829529, + "step": 5080, + "valid_targets_mean": 6330.4, + "valid_targets_min": 5424 + }, + { + "epoch": 4.137566137566138, + "grad_norm": 0.3746393665646649, + "learning_rate": 1.7152972954004745e-05, + "loss": 0.1741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08426746726036072, + "step": 5085, + "valid_targets_mean": 6814.8, + "valid_targets_min": 4964 + }, + { + "epoch": 4.141636141636142, + "grad_norm": 0.4195480953550515, + "learning_rate": 1.711281351893505e-05, + "loss": 0.1834, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0972888171672821, + "step": 5090, + "valid_targets_mean": 6915.8, + "valid_targets_min": 5321 + }, + { + "epoch": 4.1457061457061455, + "grad_norm": 0.4316053213116156, + "learning_rate": 1.7072665969109485e-05, + "loss": 0.1739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08947832882404327, + "step": 5095, + "valid_targets_mean": 6464.5, + "valid_targets_min": 5185 + }, + { + "epoch": 4.14977614977615, + "grad_norm": 0.39114306149720146, + "learning_rate": 1.703253046979739e-05, + "loss": 0.1774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09024079144001007, + "step": 5100, + "valid_targets_mean": 7048.6, + "valid_targets_min": 4913 + }, + { + "epoch": 4.153846153846154, + "grad_norm": 0.4185179236015934, + "learning_rate": 1.6992407186218512e-05, + "loss": 0.181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09964226186275482, + "step": 5105, + "valid_targets_mean": 7469.2, + "valid_targets_min": 5426 + }, + { + "epoch": 4.157916157916158, + "grad_norm": 0.40064531771229606, + "learning_rate": 1.6952296283542303e-05, + "loss": 0.1795, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09262304753065109, + "step": 5110, + "valid_targets_mean": 6819.4, + "valid_targets_min": 5286 + }, + { + "epoch": 4.161986161986162, + "grad_norm": 0.38311400379103405, + "learning_rate": 1.691219792688726e-05, + "loss": 0.1779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09319957345724106, + "step": 5115, + "valid_targets_mean": 7107.5, + "valid_targets_min": 5201 + }, + { + "epoch": 4.166056166056166, + "grad_norm": 0.3933047787302955, + "learning_rate": 1.6872112281320218e-05, + "loss": 0.1745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08465088903903961, + "step": 5120, + "valid_targets_mean": 5642.6, + "valid_targets_min": 4838 + }, + { + "epoch": 4.17012617012617, + "grad_norm": 0.8525985607334213, + "learning_rate": 1.6832039511855702e-05, + "loss": 0.1417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13957032561302185, + "step": 5125, + "valid_targets_mean": 2054.1, + "valid_targets_min": 134 + }, + { + "epoch": 4.174196174196174, + "grad_norm": 0.4350065324995063, + "learning_rate": 1.6791979783455227e-05, + "loss": 0.1787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08149752765893936, + "step": 5130, + "valid_targets_mean": 6686.4, + "valid_targets_min": 5256 + }, + { + "epoch": 4.1782661782661785, + "grad_norm": 0.4132065668153816, + "learning_rate": 1.6751933261026604e-05, + "loss": 0.1715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09279554337263107, + "step": 5135, + "valid_targets_mean": 6216.6, + "valid_targets_min": 5542 + }, + { + "epoch": 4.182336182336182, + "grad_norm": 0.4704024966834831, + "learning_rate": 1.671190010942331e-05, + "loss": 0.1749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08651970326900482, + "step": 5140, + "valid_targets_mean": 6486.1, + "valid_targets_min": 5531 + }, + { + "epoch": 4.186406186406186, + "grad_norm": 0.41768609875122026, + "learning_rate": 1.667188049344377e-05, + "loss": 0.1703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08536796271800995, + "step": 5145, + "valid_targets_mean": 5797.4, + "valid_targets_min": 3911 + }, + { + "epoch": 4.190476190476191, + "grad_norm": 0.44999542060640974, + "learning_rate": 1.663187457783068e-05, + "loss": 0.1711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08768117427825928, + "step": 5150, + "valid_targets_mean": 6357.5, + "valid_targets_min": 5195 + }, + { + "epoch": 4.194546194546194, + "grad_norm": 0.4473459694163714, + "learning_rate": 1.659188252727035e-05, + "loss": 0.1791, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08737886697053909, + "step": 5155, + "valid_targets_mean": 5808.9, + "valid_targets_min": 4959 + }, + { + "epoch": 4.1986161986161985, + "grad_norm": 0.5313940009593219, + "learning_rate": 1.6551904506392008e-05, + "loss": 0.1746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05604414641857147, + "step": 5160, + "valid_targets_mean": 2591.5, + "valid_targets_min": 842 + }, + { + "epoch": 4.202686202686203, + "grad_norm": 0.4458022538668301, + "learning_rate": 1.651194067976713e-05, + "loss": 0.1437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08318484574556351, + "step": 5165, + "valid_targets_mean": 6006.9, + "valid_targets_min": 4769 + }, + { + "epoch": 4.206756206756207, + "grad_norm": 0.4421797804474541, + "learning_rate": 1.6471991211908768e-05, + "loss": 0.1699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08438228070735931, + "step": 5170, + "valid_targets_mean": 6764.2, + "valid_targets_min": 5046 + }, + { + "epoch": 4.210826210826211, + "grad_norm": 0.44259170394302194, + "learning_rate": 1.6432056267270836e-05, + "loss": 0.1755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09295166283845901, + "step": 5175, + "valid_targets_mean": 6055.8, + "valid_targets_min": 5182 + }, + { + "epoch": 4.214896214896215, + "grad_norm": 0.4174437145697535, + "learning_rate": 1.6392136010247496e-05, + "loss": 0.1699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08445718884468079, + "step": 5180, + "valid_targets_mean": 7101.6, + "valid_targets_min": 5366 + }, + { + "epoch": 4.218966218966219, + "grad_norm": 0.39047654580039387, + "learning_rate": 1.6352230605172438e-05, + "loss": 0.1674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08764854073524475, + "step": 5185, + "valid_targets_mean": 7111.9, + "valid_targets_min": 5192 + }, + { + "epoch": 4.223036223036223, + "grad_norm": 0.44083057032394674, + "learning_rate": 1.63123402163182e-05, + "loss": 0.1726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08842865377664566, + "step": 5190, + "valid_targets_mean": 5509.9, + "valid_targets_min": 4790 + }, + { + "epoch": 4.227106227106227, + "grad_norm": 0.9039249010855949, + "learning_rate": 1.6272465007895528e-05, + "loss": 0.1706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09698623418807983, + "step": 5195, + "valid_targets_mean": 1581.9, + "valid_targets_min": 864 + }, + { + "epoch": 4.2311762311762315, + "grad_norm": 0.6984318659356797, + "learning_rate": 1.623260514405266e-05, + "loss": 0.1718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07721932977437973, + "step": 5200, + "valid_targets_mean": 1333.2, + "valid_targets_min": 632 + }, + { + "epoch": 4.235246235246235, + "grad_norm": 0.7988165421330412, + "learning_rate": 1.619276078887467e-05, + "loss": 0.1566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0798022449016571, + "step": 5205, + "valid_targets_mean": 1602.9, + "valid_targets_min": 875 + }, + { + "epoch": 4.239316239316239, + "grad_norm": 0.8839676972961594, + "learning_rate": 1.6152932106382795e-05, + "loss": 0.1535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07965122908353806, + "step": 5210, + "valid_targets_mean": 1340.4, + "valid_targets_min": 701 + }, + { + "epoch": 4.243386243386244, + "grad_norm": 0.7568566322974944, + "learning_rate": 1.6113119260533743e-05, + "loss": 0.1445, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08617518842220306, + "step": 5215, + "valid_targets_mean": 1792.9, + "valid_targets_min": 898 + }, + { + "epoch": 4.247456247456247, + "grad_norm": 0.7853461839643316, + "learning_rate": 1.6073322415219045e-05, + "loss": 0.15, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07748760282993317, + "step": 5220, + "valid_targets_mean": 1433.1, + "valid_targets_min": 1070 + }, + { + "epoch": 4.2515262515262515, + "grad_norm": 0.775021891537993, + "learning_rate": 1.6033541734264358e-05, + "loss": 0.1558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07678981870412827, + "step": 5225, + "valid_targets_mean": 1589.5, + "valid_targets_min": 970 + }, + { + "epoch": 4.255596255596256, + "grad_norm": 0.7698989145856481, + "learning_rate": 1.5993777381428792e-05, + "loss": 0.1352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06523817032575607, + "step": 5230, + "valid_targets_mean": 1270.5, + "valid_targets_min": 648 + }, + { + "epoch": 4.259666259666259, + "grad_norm": 0.7897405384596183, + "learning_rate": 1.5954029520404252e-05, + "loss": 0.1584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08299176394939423, + "step": 5235, + "valid_targets_mean": 1674.5, + "valid_targets_min": 727 + }, + { + "epoch": 4.263736263736264, + "grad_norm": 0.7201703295282144, + "learning_rate": 1.5914298314814752e-05, + "loss": 0.1395, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05681309849023819, + "step": 5240, + "valid_targets_mean": 1406.0, + "valid_targets_min": 770 + }, + { + "epoch": 4.267806267806268, + "grad_norm": 0.8007703569426596, + "learning_rate": 1.587458392821574e-05, + "loss": 0.1468, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0649387463927269, + "step": 5245, + "valid_targets_mean": 1381.2, + "valid_targets_min": 572 + }, + { + "epoch": 4.2718762718762715, + "grad_norm": 0.770739474815444, + "learning_rate": 1.5834886524093415e-05, + "loss": 0.1488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08127094060182571, + "step": 5250, + "valid_targets_mean": 1580.4, + "valid_targets_min": 633 + }, + { + "epoch": 4.275946275946276, + "grad_norm": 0.8368026866328528, + "learning_rate": 1.5795206265864086e-05, + "loss": 0.1471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0783461183309555, + "step": 5255, + "valid_targets_mean": 1478.8, + "valid_targets_min": 923 + }, + { + "epoch": 4.28001628001628, + "grad_norm": 0.7152654874841173, + "learning_rate": 1.575554331687348e-05, + "loss": 0.1369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06594514846801758, + "step": 5260, + "valid_targets_mean": 1401.0, + "valid_targets_min": 740 + }, + { + "epoch": 4.284086284086284, + "grad_norm": 0.7469453449896892, + "learning_rate": 1.5715897840396056e-05, + "loss": 0.1379, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06428131461143494, + "step": 5265, + "valid_targets_mean": 1588.9, + "valid_targets_min": 870 + }, + { + "epoch": 4.288156288156288, + "grad_norm": 0.7630766511387223, + "learning_rate": 1.5676269999634355e-05, + "loss": 0.1382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10081503540277481, + "step": 5270, + "valid_targets_mean": 2008.4, + "valid_targets_min": 1042 + }, + { + "epoch": 4.292226292226292, + "grad_norm": 0.7719765796935957, + "learning_rate": 1.5636659957718317e-05, + "loss": 0.1471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.058920711278915405, + "step": 5275, + "valid_targets_mean": 1209.0, + "valid_targets_min": 870 + }, + { + "epoch": 4.296296296296296, + "grad_norm": 0.7392001894065844, + "learning_rate": 1.5597067877704627e-05, + "loss": 0.1467, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07120724767446518, + "step": 5280, + "valid_targets_mean": 1535.5, + "valid_targets_min": 740 + }, + { + "epoch": 4.3003663003663, + "grad_norm": 0.6940002890801722, + "learning_rate": 1.5557493922576e-05, + "loss": 0.1484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07191811501979828, + "step": 5285, + "valid_targets_mean": 1642.9, + "valid_targets_min": 906 + }, + { + "epoch": 4.3044363044363045, + "grad_norm": 0.7996020542171901, + "learning_rate": 1.5517938255240558e-05, + "loss": 0.1415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07438832521438599, + "step": 5290, + "valid_targets_mean": 1570.5, + "valid_targets_min": 814 + }, + { + "epoch": 4.308506308506309, + "grad_norm": 0.7539368794630141, + "learning_rate": 1.5478401038531132e-05, + "loss": 0.1442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06699814647436142, + "step": 5295, + "valid_targets_mean": 1722.1, + "valid_targets_min": 832 + }, + { + "epoch": 4.312576312576312, + "grad_norm": 0.6917100664184215, + "learning_rate": 1.543888243520462e-05, + "loss": 0.1418, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05670752376317978, + "step": 5300, + "valid_targets_mean": 1453.5, + "valid_targets_min": 891 + }, + { + "epoch": 4.316646316646317, + "grad_norm": 0.9493477730431549, + "learning_rate": 1.5399382607941267e-05, + "loss": 0.1422, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08421558141708374, + "step": 5305, + "valid_targets_mean": 1584.4, + "valid_targets_min": 734 + }, + { + "epoch": 4.320716320716321, + "grad_norm": 0.7583084640701927, + "learning_rate": 1.535990171934405e-05, + "loss": 0.1323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06385685503482819, + "step": 5310, + "valid_targets_mean": 1495.2, + "valid_targets_min": 737 + }, + { + "epoch": 4.3247863247863245, + "grad_norm": 0.8067972324986055, + "learning_rate": 1.5320439931937968e-05, + "loss": 0.138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07771594822406769, + "step": 5315, + "valid_targets_mean": 1737.0, + "valid_targets_min": 872 + }, + { + "epoch": 4.328856328856329, + "grad_norm": 0.791867781812196, + "learning_rate": 1.5280997408169412e-05, + "loss": 0.1346, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07138702273368835, + "step": 5320, + "valid_targets_mean": 1431.6, + "valid_targets_min": 805 + }, + { + "epoch": 4.332926332926333, + "grad_norm": 0.7550711735218681, + "learning_rate": 1.5241574310405437e-05, + "loss": 0.1433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08015383780002594, + "step": 5325, + "valid_targets_mean": 2060.2, + "valid_targets_min": 1253 + }, + { + "epoch": 4.336996336996337, + "grad_norm": 0.8677822625949797, + "learning_rate": 1.5202170800933157e-05, + "loss": 0.144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0818401500582695, + "step": 5330, + "valid_targets_mean": 2063.9, + "valid_targets_min": 1264 + }, + { + "epoch": 4.341066341066341, + "grad_norm": 0.7474043536365914, + "learning_rate": 1.516278704195904e-05, + "loss": 0.1371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05221828818321228, + "step": 5335, + "valid_targets_mean": 1036.5, + "valid_targets_min": 613 + }, + { + "epoch": 4.345136345136345, + "grad_norm": 0.7889949523343204, + "learning_rate": 1.512342319560826e-05, + "loss": 0.1371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06708300858736038, + "step": 5340, + "valid_targets_mean": 1674.4, + "valid_targets_min": 768 + }, + { + "epoch": 4.349206349206349, + "grad_norm": 0.7873060459244604, + "learning_rate": 1.5084079423924008e-05, + "loss": 0.1365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0583968311548233, + "step": 5345, + "valid_targets_mean": 1249.8, + "valid_targets_min": 714 + }, + { + "epoch": 4.353276353276353, + "grad_norm": 0.7925060855022024, + "learning_rate": 1.5044755888866838e-05, + "loss": 0.144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05081354081630707, + "step": 5350, + "valid_targets_mean": 1114.8, + "valid_targets_min": 800 + }, + { + "epoch": 4.357346357346358, + "grad_norm": 0.780052779669191, + "learning_rate": 1.5005452752314016e-05, + "loss": 0.1348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0766829401254654, + "step": 5355, + "valid_targets_mean": 1780.9, + "valid_targets_min": 933 + }, + { + "epoch": 4.361416361416361, + "grad_norm": 0.8731555755183956, + "learning_rate": 1.4966170176058804e-05, + "loss": 0.1355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06203790381550789, + "step": 5360, + "valid_targets_mean": 1113.6, + "valid_targets_min": 746 + }, + { + "epoch": 4.365486365486365, + "grad_norm": 0.8105488638885902, + "learning_rate": 1.4926908321809856e-05, + "loss": 0.1238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06966659426689148, + "step": 5365, + "valid_targets_mean": 1265.0, + "valid_targets_min": 814 + }, + { + "epoch": 4.36955636955637, + "grad_norm": 0.7570799683681111, + "learning_rate": 1.4887667351190508e-05, + "loss": 0.1387, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07910779118537903, + "step": 5370, + "valid_targets_mean": 1652.1, + "valid_targets_min": 635 + }, + { + "epoch": 4.373626373626374, + "grad_norm": 0.8373628622647403, + "learning_rate": 1.4848447425738135e-05, + "loss": 0.1351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05490382760763168, + "step": 5375, + "valid_targets_mean": 1231.2, + "valid_targets_min": 739 + }, + { + "epoch": 4.3776963776963775, + "grad_norm": 0.7459004049358479, + "learning_rate": 1.4809248706903476e-05, + "loss": 0.134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0814879834651947, + "step": 5380, + "valid_targets_mean": 1682.4, + "valid_targets_min": 831 + }, + { + "epoch": 4.381766381766382, + "grad_norm": 0.7901027972617194, + "learning_rate": 1.4770071356049966e-05, + "loss": 0.1363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06546150892972946, + "step": 5385, + "valid_targets_mean": 1571.6, + "valid_targets_min": 1015 + }, + { + "epoch": 4.385836385836386, + "grad_norm": 0.7459849965828421, + "learning_rate": 1.4730915534453084e-05, + "loss": 0.1364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.059574246406555176, + "step": 5390, + "valid_targets_mean": 1276.8, + "valid_targets_min": 711 + }, + { + "epoch": 4.38990638990639, + "grad_norm": 0.7250412854507754, + "learning_rate": 1.4691781403299695e-05, + "loss": 0.1348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04473428428173065, + "step": 5395, + "valid_targets_mean": 1147.5, + "valid_targets_min": 733 + }, + { + "epoch": 4.393976393976394, + "grad_norm": 0.9341622348888069, + "learning_rate": 1.4652669123687335e-05, + "loss": 0.1388, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08530112355947495, + "step": 5400, + "valid_targets_mean": 1604.0, + "valid_targets_min": 953 + }, + { + "epoch": 4.398046398046398, + "grad_norm": 0.7506012218410362, + "learning_rate": 1.4613578856623634e-05, + "loss": 0.145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0721062421798706, + "step": 5405, + "valid_targets_mean": 1784.0, + "valid_targets_min": 1011 + }, + { + "epoch": 4.402116402116402, + "grad_norm": 0.9539670072016909, + "learning_rate": 1.4574510763025571e-05, + "loss": 0.1517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09979036450386047, + "step": 5410, + "valid_targets_mean": 1729.0, + "valid_targets_min": 1143 + }, + { + "epoch": 4.406186406186406, + "grad_norm": 0.8152959339758997, + "learning_rate": 1.4535465003718872e-05, + "loss": 0.1364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06546957790851593, + "step": 5415, + "valid_targets_mean": 1483.9, + "valid_targets_min": 699 + }, + { + "epoch": 4.410256410256411, + "grad_norm": 1.0303278083543088, + "learning_rate": 1.4496441739437308e-05, + "loss": 0.1309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.059898581355810165, + "step": 5420, + "valid_targets_mean": 1198.4, + "valid_targets_min": 680 + }, + { + "epoch": 4.414326414326414, + "grad_norm": 0.8198228481186385, + "learning_rate": 1.445744113082205e-05, + "loss": 0.1438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06732296943664551, + "step": 5425, + "valid_targets_mean": 1365.2, + "valid_targets_min": 687 + }, + { + "epoch": 4.418396418396418, + "grad_norm": 0.9913925030956331, + "learning_rate": 1.4418463338421014e-05, + "loss": 0.1331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05075017362833023, + "step": 5430, + "valid_targets_mean": 1142.9, + "valid_targets_min": 835 + }, + { + "epoch": 4.422466422466423, + "grad_norm": 0.8461959968216606, + "learning_rate": 1.4379508522688172e-05, + "loss": 0.1288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06075669825077057, + "step": 5435, + "valid_targets_mean": 1316.9, + "valid_targets_min": 829 + }, + { + "epoch": 4.426536426536426, + "grad_norm": 0.7557942053954224, + "learning_rate": 1.4340576843982941e-05, + "loss": 0.1279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07234987616539001, + "step": 5440, + "valid_targets_mean": 1739.2, + "valid_targets_min": 1138 + }, + { + "epoch": 4.430606430606431, + "grad_norm": 0.7526966602303533, + "learning_rate": 1.4301668462569463e-05, + "loss": 0.1382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06122884154319763, + "step": 5445, + "valid_targets_mean": 1592.2, + "valid_targets_min": 1174 + }, + { + "epoch": 4.434676434676435, + "grad_norm": 0.7785215289541277, + "learning_rate": 1.4262783538615997e-05, + "loss": 0.1384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06840664893388748, + "step": 5450, + "valid_targets_mean": 1543.1, + "valid_targets_min": 554 + }, + { + "epoch": 4.438746438746438, + "grad_norm": 0.8035141393477653, + "learning_rate": 1.4223922232194231e-05, + "loss": 0.1342, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06752649694681168, + "step": 5455, + "valid_targets_mean": 1430.0, + "valid_targets_min": 843 + }, + { + "epoch": 4.442816442816443, + "grad_norm": 0.680868471747835, + "learning_rate": 1.4185084703278636e-05, + "loss": 0.1298, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06250318884849548, + "step": 5460, + "valid_targets_mean": 1622.9, + "valid_targets_min": 849 + }, + { + "epoch": 4.446886446886447, + "grad_norm": 0.8578202447129205, + "learning_rate": 1.4146271111745785e-05, + "loss": 0.1394, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06331510841846466, + "step": 5465, + "valid_targets_mean": 1429.0, + "valid_targets_min": 612 + }, + { + "epoch": 4.4509564509564505, + "grad_norm": 0.8243591653669494, + "learning_rate": 1.4107481617373738e-05, + "loss": 0.1366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05521140247583389, + "step": 5470, + "valid_targets_mean": 1283.0, + "valid_targets_min": 782 + }, + { + "epoch": 4.455026455026455, + "grad_norm": 0.8492636328085363, + "learning_rate": 1.406871637984132e-05, + "loss": 0.135, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07561201602220535, + "step": 5475, + "valid_targets_mean": 1457.0, + "valid_targets_min": 926 + }, + { + "epoch": 4.459096459096459, + "grad_norm": 0.7560434633983276, + "learning_rate": 1.4029975558727546e-05, + "loss": 0.1292, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05740554630756378, + "step": 5480, + "valid_targets_mean": 1399.6, + "valid_targets_min": 822 + }, + { + "epoch": 4.463166463166463, + "grad_norm": 0.7108917719684089, + "learning_rate": 1.399125931351088e-05, + "loss": 0.1241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.060493603348731995, + "step": 5485, + "valid_targets_mean": 1464.6, + "valid_targets_min": 670 + }, + { + "epoch": 4.467236467236467, + "grad_norm": 0.8373770508282027, + "learning_rate": 1.3952567803568648e-05, + "loss": 0.1383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06813108921051025, + "step": 5490, + "valid_targets_mean": 1504.8, + "valid_targets_min": 647 + }, + { + "epoch": 4.471306471306471, + "grad_norm": 0.7967991746334874, + "learning_rate": 1.391390118817634e-05, + "loss": 0.135, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.051831118762493134, + "step": 5495, + "valid_targets_mean": 1276.9, + "valid_targets_min": 791 + }, + { + "epoch": 4.475376475376476, + "grad_norm": 0.8724038689238638, + "learning_rate": 1.3875259626506958e-05, + "loss": 0.1294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.058950334787368774, + "step": 5500, + "valid_targets_mean": 1291.4, + "valid_targets_min": 746 + }, + { + "epoch": 4.479446479446479, + "grad_norm": 0.8243173279519246, + "learning_rate": 1.383664327763039e-05, + "loss": 0.1348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05535842478275299, + "step": 5505, + "valid_targets_mean": 1409.1, + "valid_targets_min": 892 + }, + { + "epoch": 4.483516483516484, + "grad_norm": 0.8045208272162921, + "learning_rate": 1.3798052300512707e-05, + "loss": 0.1382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07621202617883682, + "step": 5510, + "valid_targets_mean": 1581.9, + "valid_targets_min": 914 + }, + { + "epoch": 4.487586487586488, + "grad_norm": 0.8000566735592727, + "learning_rate": 1.3759486854015558e-05, + "loss": 0.1319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06206750124692917, + "step": 5515, + "valid_targets_mean": 1647.6, + "valid_targets_min": 636 + }, + { + "epoch": 4.491656491656491, + "grad_norm": 0.841307842616834, + "learning_rate": 1.3720947096895487e-05, + "loss": 0.132, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06576070189476013, + "step": 5520, + "valid_targets_mean": 1523.5, + "valid_targets_min": 641 + }, + { + "epoch": 4.495726495726496, + "grad_norm": 0.8127961832136743, + "learning_rate": 1.3682433187803283e-05, + "loss": 0.1333, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07405531406402588, + "step": 5525, + "valid_targets_mean": 1629.4, + "valid_targets_min": 1020 + }, + { + "epoch": 4.4997964997965, + "grad_norm": 0.8576090738580665, + "learning_rate": 1.3643945285283336e-05, + "loss": 0.1305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05857028067111969, + "step": 5530, + "valid_targets_mean": 1163.4, + "valid_targets_min": 729 + }, + { + "epoch": 4.503866503866504, + "grad_norm": 0.8768940435922552, + "learning_rate": 1.3605483547772977e-05, + "loss": 0.1393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08095380663871765, + "step": 5535, + "valid_targets_mean": 1613.2, + "valid_targets_min": 1001 + }, + { + "epoch": 4.507936507936508, + "grad_norm": 0.8051072533291032, + "learning_rate": 1.3567048133601821e-05, + "loss": 0.1387, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06716311722993851, + "step": 5540, + "valid_targets_mean": 1567.0, + "valid_targets_min": 1175 + }, + { + "epoch": 4.512006512006512, + "grad_norm": 0.8552572932480055, + "learning_rate": 1.352863920099114e-05, + "loss": 0.1346, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08142690360546112, + "step": 5545, + "valid_targets_mean": 1609.0, + "valid_targets_min": 892 + }, + { + "epoch": 4.516076516076516, + "grad_norm": 0.9469817635167835, + "learning_rate": 1.3490256908053165e-05, + "loss": 0.1362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06301309913396835, + "step": 5550, + "valid_targets_mean": 1425.2, + "valid_targets_min": 801 + }, + { + "epoch": 4.52014652014652, + "grad_norm": 0.7579582875823737, + "learning_rate": 1.3451901412790485e-05, + "loss": 0.1237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06315524131059647, + "step": 5555, + "valid_targets_mean": 1462.4, + "valid_targets_min": 564 + }, + { + "epoch": 4.524216524216524, + "grad_norm": 0.7776282955223374, + "learning_rate": 1.341357287309537e-05, + "loss": 0.1266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06340664625167847, + "step": 5560, + "valid_targets_mean": 1749.1, + "valid_targets_min": 651 + }, + { + "epoch": 4.528286528286529, + "grad_norm": 0.8269577317944914, + "learning_rate": 1.3375271446749125e-05, + "loss": 0.1354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.053509172052145004, + "step": 5565, + "valid_targets_mean": 1161.0, + "valid_targets_min": 716 + }, + { + "epoch": 4.532356532356532, + "grad_norm": 0.7967004348591504, + "learning_rate": 1.3336997291421441e-05, + "loss": 0.1275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06124432757496834, + "step": 5570, + "valid_targets_mean": 1425.6, + "valid_targets_min": 744 + }, + { + "epoch": 4.536426536426537, + "grad_norm": 0.7435104069530646, + "learning_rate": 1.3298750564669751e-05, + "loss": 0.1252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05810406804084778, + "step": 5575, + "valid_targets_mean": 1391.1, + "valid_targets_min": 1218 + }, + { + "epoch": 4.540496540496541, + "grad_norm": 0.7999784062535612, + "learning_rate": 1.3260531423938571e-05, + "loss": 0.1327, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07500585913658142, + "step": 5580, + "valid_targets_mean": 1724.5, + "valid_targets_min": 793 + }, + { + "epoch": 4.544566544566544, + "grad_norm": 0.8510244277487944, + "learning_rate": 1.3222340026558855e-05, + "loss": 0.1411, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06173360347747803, + "step": 5585, + "valid_targets_mean": 1366.8, + "valid_targets_min": 846 + }, + { + "epoch": 4.548636548636549, + "grad_norm": 0.8073160848727309, + "learning_rate": 1.3184176529747357e-05, + "loss": 0.1273, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07489972561597824, + "step": 5590, + "valid_targets_mean": 1772.1, + "valid_targets_min": 979 + }, + { + "epoch": 4.552706552706553, + "grad_norm": 0.7253390941686678, + "learning_rate": 1.3146041090605977e-05, + "loss": 0.1351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07174719870090485, + "step": 5595, + "valid_targets_mean": 1778.9, + "valid_targets_min": 1093 + }, + { + "epoch": 4.556776556776557, + "grad_norm": 0.7424924022896014, + "learning_rate": 1.3107933866121117e-05, + "loss": 0.1167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0609469898045063, + "step": 5600, + "valid_targets_mean": 1510.8, + "valid_targets_min": 595 + }, + { + "epoch": 4.560846560846561, + "grad_norm": 0.8618358393453901, + "learning_rate": 1.306985501316302e-05, + "loss": 0.1284, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06146273389458656, + "step": 5605, + "valid_targets_mean": 1330.4, + "valid_targets_min": 868 + }, + { + "epoch": 4.564916564916565, + "grad_norm": 0.8268278139473043, + "learning_rate": 1.3031804688485143e-05, + "loss": 0.1258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07098115980625153, + "step": 5610, + "valid_targets_mean": 1504.2, + "valid_targets_min": 914 + }, + { + "epoch": 4.568986568986569, + "grad_norm": 0.7720719541618248, + "learning_rate": 1.2993783048723515e-05, + "loss": 0.1288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07257020473480225, + "step": 5615, + "valid_targets_mean": 1972.9, + "valid_targets_min": 1080 + }, + { + "epoch": 4.573056573056573, + "grad_norm": 0.7939819756885284, + "learning_rate": 1.295579025039607e-05, + "loss": 0.1297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06880711764097214, + "step": 5620, + "valid_targets_mean": 1519.6, + "valid_targets_min": 658 + }, + { + "epoch": 4.5771265771265774, + "grad_norm": 0.7859404051654513, + "learning_rate": 1.2917826449902005e-05, + "loss": 0.1336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.057696133852005005, + "step": 5625, + "valid_targets_mean": 1362.0, + "valid_targets_min": 1018 + }, + { + "epoch": 4.581196581196581, + "grad_norm": 0.828035069084943, + "learning_rate": 1.2879891803521167e-05, + "loss": 0.1343, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07344723492860794, + "step": 5630, + "valid_targets_mean": 1623.0, + "valid_targets_min": 1041 + }, + { + "epoch": 4.585266585266585, + "grad_norm": 0.7735417380419619, + "learning_rate": 1.2841986467413384e-05, + "loss": 0.125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05607450753450394, + "step": 5635, + "valid_targets_mean": 2889.2, + "valid_targets_min": 534 + }, + { + "epoch": 4.58933658933659, + "grad_norm": 0.460850977797522, + "learning_rate": 1.2804110597617817e-05, + "loss": 0.111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05251425504684448, + "step": 5640, + "valid_targets_mean": 3423.9, + "valid_targets_min": 2404 + }, + { + "epoch": 4.593406593406593, + "grad_norm": 0.3657662471298085, + "learning_rate": 1.2766264350052334e-05, + "loss": 0.0806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.041075143963098526, + "step": 5645, + "valid_targets_mean": 3592.0, + "valid_targets_min": 3064 + }, + { + "epoch": 4.597476597476597, + "grad_norm": 0.3851006948518706, + "learning_rate": 1.2728447880512862e-05, + "loss": 0.0808, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.039379969239234924, + "step": 5650, + "valid_targets_mean": 3425.4, + "valid_targets_min": 2901 + }, + { + "epoch": 4.601546601546602, + "grad_norm": 0.3982103509768099, + "learning_rate": 1.2690661344672755e-05, + "loss": 0.0948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0414591059088707, + "step": 5655, + "valid_targets_mean": 3393.9, + "valid_targets_min": 2392 + }, + { + "epoch": 4.605616605616605, + "grad_norm": 0.46756243462854763, + "learning_rate": 1.2652904898082117e-05, + "loss": 0.078, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.034624554216861725, + "step": 5660, + "valid_targets_mean": 2066.5, + "valid_targets_min": 662 + }, + { + "epoch": 4.60968660968661, + "grad_norm": 0.8771981868563371, + "learning_rate": 1.2615178696167205e-05, + "loss": 0.1068, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06684105098247528, + "step": 5665, + "valid_targets_mean": 1519.1, + "valid_targets_min": 724 + }, + { + "epoch": 4.613756613756614, + "grad_norm": 0.3782756354180073, + "learning_rate": 1.2577482894229777e-05, + "loss": 0.1116, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04186592623591423, + "step": 5670, + "valid_targets_mean": 4022.1, + "valid_targets_min": 3668 + }, + { + "epoch": 4.617826617826617, + "grad_norm": 0.3867031798803203, + "learning_rate": 1.2539817647446446e-05, + "loss": 0.0872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04109447821974754, + "step": 5675, + "valid_targets_mean": 3483.0, + "valid_targets_min": 2891 + }, + { + "epoch": 4.621896621896622, + "grad_norm": 0.5812366237791688, + "learning_rate": 1.2502183110868031e-05, + "loss": 0.1197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09486156702041626, + "step": 5680, + "valid_targets_mean": 2689.9, + "valid_targets_min": 246 + }, + { + "epoch": 4.625966625966626, + "grad_norm": 0.28966277428771003, + "learning_rate": 1.2464579439418943e-05, + "loss": 0.0727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.029673418030142784, + "step": 5685, + "valid_targets_mean": 5332.0, + "valid_targets_min": 886 + }, + { + "epoch": 4.63003663003663, + "grad_norm": 0.4205463767229215, + "learning_rate": 1.2427006787896537e-05, + "loss": 0.0825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03594677895307541, + "step": 5690, + "valid_targets_mean": 2137.1, + "valid_targets_min": 848 + }, + { + "epoch": 4.634106634106634, + "grad_norm": 0.31841598595521275, + "learning_rate": 1.2389465310970459e-05, + "loss": 0.0793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.036279890686273575, + "step": 5695, + "valid_targets_mean": 3335.8, + "valid_targets_min": 758 + }, + { + "epoch": 4.638176638176638, + "grad_norm": 0.3807391992506858, + "learning_rate": 1.2351955163182039e-05, + "loss": 0.0948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04160192608833313, + "step": 5700, + "valid_targets_mean": 2811.1, + "valid_targets_min": 1146 + }, + { + "epoch": 4.642246642246643, + "grad_norm": 0.3742032229362971, + "learning_rate": 1.2314476498943622e-05, + "loss": 0.1083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04018232598900795, + "step": 5705, + "valid_targets_mean": 2773.0, + "valid_targets_min": 645 + }, + { + "epoch": 4.646316646316646, + "grad_norm": 0.3939537525171128, + "learning_rate": 1.2277029472537967e-05, + "loss": 0.0796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.038491107523441315, + "step": 5710, + "valid_targets_mean": 2632.2, + "valid_targets_min": 579 + }, + { + "epoch": 4.6503866503866504, + "grad_norm": 0.4057474887227267, + "learning_rate": 1.2239614238117588e-05, + "loss": 0.0753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04323495551943779, + "step": 5715, + "valid_targets_mean": 2741.0, + "valid_targets_min": 1111 + }, + { + "epoch": 4.654456654456655, + "grad_norm": 0.5815162907436859, + "learning_rate": 1.2202230949704117e-05, + "loss": 0.0808, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06871599704027176, + "step": 5720, + "valid_targets_mean": 2122.4, + "valid_targets_min": 812 + }, + { + "epoch": 4.658526658526658, + "grad_norm": 0.4521817666906546, + "learning_rate": 1.2164879761187691e-05, + "loss": 0.0871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03911435604095459, + "step": 5725, + "valid_targets_mean": 2335.0, + "valid_targets_min": 493 + }, + { + "epoch": 4.662596662596663, + "grad_norm": 0.47178406108628634, + "learning_rate": 1.212756082632631e-05, + "loss": 0.0865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03535778075456619, + "step": 5730, + "valid_targets_mean": 4099.9, + "valid_targets_min": 2949 + }, + { + "epoch": 4.666666666666667, + "grad_norm": 0.43766318103399476, + "learning_rate": 1.2090274298745172e-05, + "loss": 0.0942, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04855756461620331, + "step": 5735, + "valid_targets_mean": 2667.8, + "valid_targets_min": 990 + }, + { + "epoch": 4.67073667073667, + "grad_norm": 0.5685037462804967, + "learning_rate": 1.2053020331936108e-05, + "loss": 0.1052, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09078393876552582, + "step": 5740, + "valid_targets_mean": 1997.2, + "valid_targets_min": 974 + }, + { + "epoch": 4.674806674806675, + "grad_norm": 0.4814600386863112, + "learning_rate": 1.2015799079256876e-05, + "loss": 0.11, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.053439922630786896, + "step": 5745, + "valid_targets_mean": 2656.1, + "valid_targets_min": 896 + }, + { + "epoch": 4.678876678876679, + "grad_norm": 0.4807779629525887, + "learning_rate": 1.1978610693930587e-05, + "loss": 0.0756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03917412832379341, + "step": 5750, + "valid_targets_mean": 1499.9, + "valid_targets_min": 550 + }, + { + "epoch": 4.682946682946683, + "grad_norm": 0.44551985651283915, + "learning_rate": 1.1941455329045047e-05, + "loss": 0.1135, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04586545005440712, + "step": 5755, + "valid_targets_mean": 2963.0, + "valid_targets_min": 873 + }, + { + "epoch": 4.687016687016687, + "grad_norm": 0.4671700313248316, + "learning_rate": 1.1904333137552124e-05, + "loss": 0.0998, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06650416553020477, + "step": 5760, + "valid_targets_mean": 3018.8, + "valid_targets_min": 1247 + }, + { + "epoch": 4.691086691086691, + "grad_norm": 0.54473726834285, + "learning_rate": 1.1867244272267136e-05, + "loss": 0.0842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05167562887072563, + "step": 5765, + "valid_targets_mean": 1737.4, + "valid_targets_min": 800 + }, + { + "epoch": 4.695156695156696, + "grad_norm": 0.6730426959023267, + "learning_rate": 1.1830188885868213e-05, + "loss": 0.2561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13521423935890198, + "step": 5770, + "valid_targets_mean": 2176.9, + "valid_targets_min": 807 + }, + { + "epoch": 4.699226699226699, + "grad_norm": 0.4728336672436709, + "learning_rate": 1.1793167130895656e-05, + "loss": 0.1083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.032692648470401764, + "step": 5775, + "valid_targets_mean": 1465.0, + "valid_targets_min": 507 + }, + { + "epoch": 4.7032967032967035, + "grad_norm": 0.3873581322887633, + "learning_rate": 1.1756179159751322e-05, + "loss": 0.0958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03570934757590294, + "step": 5780, + "valid_targets_mean": 3186.2, + "valid_targets_min": 658 + }, + { + "epoch": 4.707366707366708, + "grad_norm": 0.4264449290678136, + "learning_rate": 1.1719225124698003e-05, + "loss": 0.1856, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04953089728951454, + "step": 5785, + "valid_targets_mean": 3058.2, + "valid_targets_min": 826 + }, + { + "epoch": 4.711436711436711, + "grad_norm": 0.3737908843266182, + "learning_rate": 1.16823051778588e-05, + "loss": 0.0812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.052201047539711, + "step": 5790, + "valid_targets_mean": 4502.5, + "valid_targets_min": 2131 + }, + { + "epoch": 4.715506715506716, + "grad_norm": 0.4570728348397021, + "learning_rate": 1.1645419471216462e-05, + "loss": 0.0854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.056565217673778534, + "step": 5795, + "valid_targets_mean": 3669.0, + "valid_targets_min": 1746 + }, + { + "epoch": 4.71957671957672, + "grad_norm": 0.3818909080202796, + "learning_rate": 1.1608568156612807e-05, + "loss": 0.0854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03912724554538727, + "step": 5800, + "valid_targets_mean": 2759.6, + "valid_targets_min": 595 + }, + { + "epoch": 4.7236467236467234, + "grad_norm": 0.36574902640964757, + "learning_rate": 1.1571751385748082e-05, + "loss": 0.0786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04248278588056564, + "step": 5805, + "valid_targets_mean": 3420.0, + "valid_targets_min": 825 + }, + { + "epoch": 4.727716727716728, + "grad_norm": 0.36160705101402424, + "learning_rate": 1.1534969310180303e-05, + "loss": 0.079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03156093508005142, + "step": 5810, + "valid_targets_mean": 3323.0, + "valid_targets_min": 687 + }, + { + "epoch": 4.731786731786732, + "grad_norm": 0.36054700912548693, + "learning_rate": 1.149822208132469e-05, + "loss": 0.075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.041020315140485764, + "step": 5815, + "valid_targets_mean": 3504.0, + "valid_targets_min": 1956 + }, + { + "epoch": 4.735856735856736, + "grad_norm": 0.4108903603282782, + "learning_rate": 1.1461509850453e-05, + "loss": 0.0814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.033342353999614716, + "step": 5820, + "valid_targets_mean": 3553.0, + "valid_targets_min": 1421 + }, + { + "epoch": 4.73992673992674, + "grad_norm": 0.43529092649816564, + "learning_rate": 1.1424832768692942e-05, + "loss": 0.0718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.045596443116664886, + "step": 5825, + "valid_targets_mean": 3672.6, + "valid_targets_min": 1607 + }, + { + "epoch": 4.743996743996744, + "grad_norm": 0.31603285050379853, + "learning_rate": 1.1388190987027485e-05, + "loss": 0.1137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.021315453574061394, + "step": 5830, + "valid_targets_mean": 2072.2, + "valid_targets_min": 535 + }, + { + "epoch": 4.748066748066748, + "grad_norm": 0.30398769091490463, + "learning_rate": 1.135158465629433e-05, + "loss": 0.0655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.01872686669230461, + "step": 5835, + "valid_targets_mean": 2852.2, + "valid_targets_min": 583 + }, + { + "epoch": 4.752136752136752, + "grad_norm": 0.41779314691880687, + "learning_rate": 1.1315013927185224e-05, + "loss": 0.0714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03134084492921829, + "step": 5840, + "valid_targets_mean": 2778.2, + "valid_targets_min": 591 + }, + { + "epoch": 4.7562067562067565, + "grad_norm": 0.43596477198959716, + "learning_rate": 1.1278478950245364e-05, + "loss": 0.0685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.040790289640426636, + "step": 5845, + "valid_targets_mean": 3474.1, + "valid_targets_min": 2154 + }, + { + "epoch": 4.76027676027676, + "grad_norm": 0.6262422674089861, + "learning_rate": 1.1241979875872748e-05, + "loss": 0.1148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0588751845061779, + "step": 5850, + "valid_targets_mean": 2075.6, + "valid_targets_min": 647 + }, + { + "epoch": 4.764346764346764, + "grad_norm": 0.4373065366822497, + "learning_rate": 1.120551685431761e-05, + "loss": 0.0854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07749606668949127, + "step": 5855, + "valid_targets_mean": 2382.5, + "valid_targets_min": 840 + }, + { + "epoch": 4.768416768416769, + "grad_norm": 0.44549286271516586, + "learning_rate": 1.1169090035681772e-05, + "loss": 0.1014, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.043722283095121384, + "step": 5860, + "valid_targets_mean": 2478.5, + "valid_targets_min": 684 + }, + { + "epoch": 4.772486772486772, + "grad_norm": 0.3836262029735173, + "learning_rate": 1.1132699569917982e-05, + "loss": 0.0804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.033280737698078156, + "step": 5865, + "valid_targets_mean": 3505.0, + "valid_targets_min": 847 + }, + { + "epoch": 4.7765567765567765, + "grad_norm": 0.6061904603600585, + "learning_rate": 1.1096345606829388e-05, + "loss": 0.1076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04993060976266861, + "step": 5870, + "valid_targets_mean": 1690.6, + "valid_targets_min": 949 + }, + { + "epoch": 4.780626780626781, + "grad_norm": 0.45611765176673036, + "learning_rate": 1.1060028296068853e-05, + "loss": 0.1027, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04060452803969383, + "step": 5875, + "valid_targets_mean": 3574.0, + "valid_targets_min": 2201 + }, + { + "epoch": 4.784696784696784, + "grad_norm": 0.714044002886353, + "learning_rate": 1.1023747787138361e-05, + "loss": 0.1082, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04921393096446991, + "step": 5880, + "valid_targets_mean": 1174.5, + "valid_targets_min": 649 + }, + { + "epoch": 4.788766788766789, + "grad_norm": 0.44604008395762645, + "learning_rate": 1.0987504229388391e-05, + "loss": 0.0808, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04379687458276749, + "step": 5885, + "valid_targets_mean": 3146.5, + "valid_targets_min": 707 + }, + { + "epoch": 4.792836792836793, + "grad_norm": 0.43017541020951683, + "learning_rate": 1.0951297772017319e-05, + "loss": 0.0847, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.045811258256435394, + "step": 5890, + "valid_targets_mean": 3508.5, + "valid_targets_min": 766 + }, + { + "epoch": 4.7969067969067964, + "grad_norm": 0.48390962547473454, + "learning_rate": 1.0915128564070803e-05, + "loss": 0.086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.038865868002176285, + "step": 5895, + "valid_targets_mean": 2029.6, + "valid_targets_min": 516 + }, + { + "epoch": 4.800976800976801, + "grad_norm": 0.36825203623662306, + "learning_rate": 1.0878996754441151e-05, + "loss": 0.0702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04049721732735634, + "step": 5900, + "valid_targets_mean": 3818.9, + "valid_targets_min": 763 + }, + { + "epoch": 4.805046805046805, + "grad_norm": 0.43168309660573984, + "learning_rate": 1.0842902491866716e-05, + "loss": 0.0676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.039305686950683594, + "step": 5905, + "valid_targets_mean": 4056.5, + "valid_targets_min": 2036 + }, + { + "epoch": 4.8091168091168095, + "grad_norm": 0.35933047571594207, + "learning_rate": 1.0806845924931296e-05, + "loss": 0.0793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03556220978498459, + "step": 5910, + "valid_targets_mean": 3477.1, + "valid_targets_min": 1122 + }, + { + "epoch": 4.813186813186813, + "grad_norm": 0.36322550413900717, + "learning_rate": 1.0770827202063505e-05, + "loss": 0.0969, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02814421057701111, + "step": 5915, + "valid_targets_mean": 2432.0, + "valid_targets_min": 1139 + }, + { + "epoch": 4.817256817256817, + "grad_norm": 0.540577699819694, + "learning_rate": 1.073484647153619e-05, + "loss": 0.0918, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04813909903168678, + "step": 5920, + "valid_targets_mean": 1705.1, + "valid_targets_min": 782 + }, + { + "epoch": 4.821326821326822, + "grad_norm": 0.2873896338117805, + "learning_rate": 1.0698903881465763e-05, + "loss": 0.0796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.021741347387433052, + "step": 5925, + "valid_targets_mean": 4197.8, + "valid_targets_min": 4050 + }, + { + "epoch": 4.825396825396825, + "grad_norm": 0.34456005761736075, + "learning_rate": 1.0662999579811664e-05, + "loss": 0.0746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02447408251464367, + "step": 5930, + "valid_targets_mean": 3064.9, + "valid_targets_min": 945 + }, + { + "epoch": 4.8294668294668295, + "grad_norm": 0.49034557307772714, + "learning_rate": 1.06271337143757e-05, + "loss": 0.0931, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05810309574007988, + "step": 5935, + "valid_targets_mean": 3935.9, + "valid_targets_min": 3235 + }, + { + "epoch": 4.833536833536834, + "grad_norm": 0.31610766883370367, + "learning_rate": 1.0591306432801467e-05, + "loss": 0.0697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.032884612679481506, + "step": 5940, + "valid_targets_mean": 3656.0, + "valid_targets_min": 2848 + }, + { + "epoch": 4.837606837606837, + "grad_norm": 0.7350255340291643, + "learning_rate": 1.05555178825737e-05, + "loss": 0.0839, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05409277603030205, + "step": 5945, + "valid_targets_mean": 1732.0, + "valid_targets_min": 742 + }, + { + "epoch": 4.841676841676842, + "grad_norm": 0.5190904613777559, + "learning_rate": 1.0519768211017726e-05, + "loss": 0.0792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.054192811250686646, + "step": 5950, + "valid_targets_mean": 2964.1, + "valid_targets_min": 1547 + }, + { + "epoch": 4.845746845746846, + "grad_norm": 0.48868311864434977, + "learning_rate": 1.0484057565298822e-05, + "loss": 0.0814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04929513484239578, + "step": 5955, + "valid_targets_mean": 1643.6, + "valid_targets_min": 811 + }, + { + "epoch": 4.8498168498168495, + "grad_norm": 0.5380227640246409, + "learning_rate": 1.0448386092421586e-05, + "loss": 0.0945, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04821448400616646, + "step": 5960, + "valid_targets_mean": 1432.5, + "valid_targets_min": 854 + }, + { + "epoch": 4.853886853886854, + "grad_norm": 0.5675102154058619, + "learning_rate": 1.0412753939229385e-05, + "loss": 0.0752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04497600346803665, + "step": 5965, + "valid_targets_mean": 1551.0, + "valid_targets_min": 886 + }, + { + "epoch": 4.857956857956858, + "grad_norm": 0.37368458137912997, + "learning_rate": 1.037716125240372e-05, + "loss": 0.0841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0343891903758049, + "step": 5970, + "valid_targets_mean": 3490.9, + "valid_targets_min": 2839 + }, + { + "epoch": 4.8620268620268625, + "grad_norm": 0.465690197105879, + "learning_rate": 1.0341608178463623e-05, + "loss": 0.1007, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0349278599023819, + "step": 5975, + "valid_targets_mean": 3522.9, + "valid_targets_min": 1322 + }, + { + "epoch": 4.866096866096866, + "grad_norm": 0.4539769609059549, + "learning_rate": 1.0306094863765066e-05, + "loss": 0.0791, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04086630046367645, + "step": 5980, + "valid_targets_mean": 2675.1, + "valid_targets_min": 623 + }, + { + "epoch": 4.87016687016687, + "grad_norm": 0.5194040935548523, + "learning_rate": 1.027062145450033e-05, + "loss": 0.0703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03959763050079346, + "step": 5985, + "valid_targets_mean": 2448.1, + "valid_targets_min": 690 + }, + { + "epoch": 4.874236874236875, + "grad_norm": 0.5286302251911614, + "learning_rate": 1.023518809669744e-05, + "loss": 0.091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07098161429166794, + "step": 5990, + "valid_targets_mean": 3094.1, + "valid_targets_min": 1804 + }, + { + "epoch": 4.878306878306878, + "grad_norm": 0.4012211699517951, + "learning_rate": 1.0199794936219554e-05, + "loss": 0.087, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04231107980012894, + "step": 5995, + "valid_targets_mean": 3015.2, + "valid_targets_min": 1605 + }, + { + "epoch": 4.8823768823768825, + "grad_norm": 0.48558169241283544, + "learning_rate": 1.0164442118764328e-05, + "loss": 0.065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04597931355237961, + "step": 6000, + "valid_targets_mean": 1446.4, + "valid_targets_min": 832 + }, + { + "epoch": 4.886446886446887, + "grad_norm": 0.46675560750287587, + "learning_rate": 1.0129129789863375e-05, + "loss": 0.0799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04974772036075592, + "step": 6005, + "valid_targets_mean": 3758.4, + "valid_targets_min": 1295 + }, + { + "epoch": 4.89051689051689, + "grad_norm": 0.3718508133031444, + "learning_rate": 1.0093858094881612e-05, + "loss": 0.075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.029652055352926254, + "step": 6010, + "valid_targets_mean": 3927.1, + "valid_targets_min": 1057 + }, + { + "epoch": 4.894586894586895, + "grad_norm": 0.3548767930814201, + "learning_rate": 1.00586271790167e-05, + "loss": 0.0633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.029149774461984634, + "step": 6015, + "valid_targets_mean": 3467.9, + "valid_targets_min": 853 + }, + { + "epoch": 4.898656898656899, + "grad_norm": 0.36526262775572454, + "learning_rate": 1.002343718729843e-05, + "loss": 0.0873, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03588550537824631, + "step": 6020, + "valid_targets_mean": 4106.0, + "valid_targets_min": 3239 + }, + { + "epoch": 4.9027269027269025, + "grad_norm": 0.39347213994342317, + "learning_rate": 9.988288264588106e-06, + "loss": 0.0707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.041096627712249756, + "step": 6025, + "valid_targets_mean": 3558.8, + "valid_targets_min": 769 + }, + { + "epoch": 4.906796906796907, + "grad_norm": 0.3567865777207391, + "learning_rate": 9.953180555578e-06, + "loss": 0.1034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03674373775720596, + "step": 6030, + "valid_targets_mean": 4000.0, + "valid_targets_min": 2959 + }, + { + "epoch": 4.910866910866911, + "grad_norm": 0.2939515031544418, + "learning_rate": 9.918114204790697e-06, + "loss": 0.0905, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.024303892627358437, + "step": 6035, + "valid_targets_mean": 3030.4, + "valid_targets_min": 629 + }, + { + "epoch": 4.914936914936915, + "grad_norm": 0.5314390212449058, + "learning_rate": 9.883089356578545e-06, + "loss": 0.0958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.045132189989089966, + "step": 6040, + "valid_targets_mean": 3038.5, + "valid_targets_min": 1742 + }, + { + "epoch": 4.919006919006919, + "grad_norm": 0.33340762029888144, + "learning_rate": 9.848106155123045e-06, + "loss": 0.0701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02736440859735012, + "step": 6045, + "valid_targets_mean": 3813.6, + "valid_targets_min": 3003 + }, + { + "epoch": 4.923076923076923, + "grad_norm": 0.538168843602704, + "learning_rate": 9.813164744434256e-06, + "loss": 0.0814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0524737685918808, + "step": 6050, + "valid_targets_mean": 2625.8, + "valid_targets_min": 1025 + }, + { + "epoch": 4.927146927146927, + "grad_norm": 0.39397698811634346, + "learning_rate": 9.778265268350204e-06, + "loss": 0.0767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04419640451669693, + "step": 6055, + "valid_targets_mean": 3914.9, + "valid_targets_min": 3129 + }, + { + "epoch": 4.931216931216931, + "grad_norm": 0.788807885519458, + "learning_rate": 9.743407870536277e-06, + "loss": 0.1089, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.055179398506879807, + "step": 6060, + "valid_targets_mean": 1266.1, + "valid_targets_min": 458 + }, + { + "epoch": 4.9352869352869355, + "grad_norm": 0.4712531044323888, + "learning_rate": 9.708592694484655e-06, + "loss": 0.1036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03245309740304947, + "step": 6065, + "valid_targets_mean": 2087.1, + "valid_targets_min": 459 + }, + { + "epoch": 4.939356939356939, + "grad_norm": 0.5077235785209703, + "learning_rate": 9.673819883513727e-06, + "loss": 0.0858, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06637755781412125, + "step": 6070, + "valid_targets_mean": 3098.8, + "valid_targets_min": 1694 + }, + { + "epoch": 4.943426943426943, + "grad_norm": 0.36026473643652757, + "learning_rate": 9.639089580767445e-06, + "loss": 0.0778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03487565368413925, + "step": 6075, + "valid_targets_mean": 4018.6, + "valid_targets_min": 3151 + }, + { + "epoch": 4.947496947496948, + "grad_norm": 0.42480201057736183, + "learning_rate": 9.604401929214805e-06, + "loss": 0.0778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.033923834562301636, + "step": 6080, + "valid_targets_mean": 3190.0, + "valid_targets_min": 2197 + }, + { + "epoch": 4.951566951566951, + "grad_norm": 0.5467294848551612, + "learning_rate": 9.56975707164922e-06, + "loss": 0.0708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.029525276273489, + "step": 6085, + "valid_targets_mean": 2252.9, + "valid_targets_min": 858 + }, + { + "epoch": 4.9556369556369555, + "grad_norm": 0.9869857674866476, + "learning_rate": 9.535155150687939e-06, + "loss": 0.087, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07059233635663986, + "step": 6090, + "valid_targets_mean": 975.9, + "valid_targets_min": 563 + }, + { + "epoch": 4.95970695970696, + "grad_norm": 0.3873514877863149, + "learning_rate": 9.500596308771462e-06, + "loss": 0.0744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.033523522317409515, + "step": 6095, + "valid_targets_mean": 3326.5, + "valid_targets_min": 701 + }, + { + "epoch": 4.963776963776963, + "grad_norm": 0.4142970786518696, + "learning_rate": 9.466080688162937e-06, + "loss": 0.0751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03642815351486206, + "step": 6100, + "valid_targets_mean": 3462.9, + "valid_targets_min": 2432 + }, + { + "epoch": 4.967846967846968, + "grad_norm": 0.4388097100302869, + "learning_rate": 9.431608430947619e-06, + "loss": 0.0724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03296447917819023, + "step": 6105, + "valid_targets_mean": 3280.9, + "valid_targets_min": 2018 + }, + { + "epoch": 4.971916971916972, + "grad_norm": 0.3737989484289901, + "learning_rate": 9.397179679032219e-06, + "loss": 0.0816, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03461182862520218, + "step": 6110, + "valid_targets_mean": 3928.1, + "valid_targets_min": 485 + }, + { + "epoch": 4.975986975986976, + "grad_norm": 0.31608767099934815, + "learning_rate": 9.362794574144383e-06, + "loss": 0.0694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03851504623889923, + "step": 6115, + "valid_targets_mean": 4310.0, + "valid_targets_min": 800 + }, + { + "epoch": 4.98005698005698, + "grad_norm": 0.3283616246045454, + "learning_rate": 9.328453257832078e-06, + "loss": 0.0714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03032471239566803, + "step": 6120, + "valid_targets_mean": 4135.0, + "valid_targets_min": 740 + }, + { + "epoch": 4.984126984126984, + "grad_norm": 0.46368165314117155, + "learning_rate": 9.294155871463007e-06, + "loss": 0.0869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06997954845428467, + "step": 6125, + "valid_targets_mean": 3285.1, + "valid_targets_min": 1957 + }, + { + "epoch": 4.9881969881969885, + "grad_norm": 0.4185794895076128, + "learning_rate": 9.259902556224034e-06, + "loss": 0.1577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04614701494574547, + "step": 6130, + "valid_targets_mean": 3392.6, + "valid_targets_min": 1137 + }, + { + "epoch": 4.992266992266992, + "grad_norm": 0.5563291333666768, + "learning_rate": 9.225693453120614e-06, + "loss": 0.0786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06407204270362854, + "step": 6135, + "valid_targets_mean": 1517.8, + "valid_targets_min": 716 + }, + { + "epoch": 4.996336996336996, + "grad_norm": 0.38670562354030985, + "learning_rate": 9.191528702976173e-06, + "loss": 0.072, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03636377677321434, + "step": 6140, + "valid_targets_mean": 3538.4, + "valid_targets_min": 886 + }, + { + "epoch": 5.0, + "grad_norm": 0.4464669787231326, + "learning_rate": 9.15740844643159e-06, + "loss": 0.0996, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.051706526428461075, + "step": 6145, + "valid_targets_mean": 4028.4, + "valid_targets_min": 1273 + }, + { + "epoch": 5.004070004070004, + "grad_norm": 0.6854109342653534, + "learning_rate": 9.123332823944552e-06, + "loss": 0.1946, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09130831062793732, + "step": 6150, + "valid_targets_mean": 7389.0, + "valid_targets_min": 5627 + }, + { + "epoch": 5.008140008140008, + "grad_norm": 0.6566317430199434, + "learning_rate": 9.089301975789029e-06, + "loss": 0.1854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.049035608768463135, + "step": 6155, + "valid_targets_mean": 1294.8, + "valid_targets_min": 422 + }, + { + "epoch": 5.012210012210012, + "grad_norm": 0.414825686168276, + "learning_rate": 9.05531604205467e-06, + "loss": 0.1759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09194067865610123, + "step": 6160, + "valid_targets_mean": 8539.9, + "valid_targets_min": 6761 + }, + { + "epoch": 5.0162800162800165, + "grad_norm": 0.40549825227625574, + "learning_rate": 9.021375162646233e-06, + "loss": 0.1706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08057986944913864, + "step": 6165, + "valid_targets_mean": 7579.0, + "valid_targets_min": 6174 + }, + { + "epoch": 5.02035002035002, + "grad_norm": 0.41714578888717524, + "learning_rate": 8.987479477282999e-06, + "loss": 0.1731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07731068134307861, + "step": 6170, + "valid_targets_mean": 7328.2, + "valid_targets_min": 4839 + }, + { + "epoch": 5.024420024420024, + "grad_norm": 0.40700870452137905, + "learning_rate": 8.953629125498227e-06, + "loss": 0.1813, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09170807898044586, + "step": 6175, + "valid_targets_mean": 6838.2, + "valid_targets_min": 5034 + }, + { + "epoch": 5.028490028490029, + "grad_norm": 0.3922582234171617, + "learning_rate": 8.919824246638528e-06, + "loss": 0.1849, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09518324583768845, + "step": 6180, + "valid_targets_mean": 7248.2, + "valid_targets_min": 5567 + }, + { + "epoch": 5.032560032560032, + "grad_norm": 0.3805986393619526, + "learning_rate": 8.886064979863334e-06, + "loss": 0.1709, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07861495763063431, + "step": 6185, + "valid_targets_mean": 6234.0, + "valid_targets_min": 5603 + }, + { + "epoch": 5.0366300366300365, + "grad_norm": 0.6374511935157152, + "learning_rate": 8.852351464144322e-06, + "loss": 0.1626, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.024300407618284225, + "step": 6190, + "valid_targets_mean": 602.0, + "valid_targets_min": 152 + }, + { + "epoch": 5.040700040700041, + "grad_norm": 0.40242759534056755, + "learning_rate": 8.818683838264826e-06, + "loss": 0.1559, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08006134629249573, + "step": 6195, + "valid_targets_mean": 7299.9, + "valid_targets_min": 6158 + }, + { + "epoch": 5.044770044770045, + "grad_norm": 0.3913058263835821, + "learning_rate": 8.785062240819266e-06, + "loss": 0.1725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08000493794679642, + "step": 6200, + "valid_targets_mean": 6140.0, + "valid_targets_min": 4648 + }, + { + "epoch": 5.048840048840049, + "grad_norm": 0.4080986618966172, + "learning_rate": 8.751486810212599e-06, + "loss": 0.1654, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08597160875797272, + "step": 6205, + "valid_targets_mean": 6603.0, + "valid_targets_min": 4882 + }, + { + "epoch": 5.052910052910053, + "grad_norm": 0.3727143747867664, + "learning_rate": 8.717957684659717e-06, + "loss": 0.1656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08394499868154526, + "step": 6210, + "valid_targets_mean": 8015.0, + "valid_targets_min": 6344 + }, + { + "epoch": 5.056980056980057, + "grad_norm": 0.36991121771348234, + "learning_rate": 8.684475002184916e-06, + "loss": 0.1607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07384654879570007, + "step": 6215, + "valid_targets_mean": 6305.2, + "valid_targets_min": 4622 + }, + { + "epoch": 5.061050061050061, + "grad_norm": 0.3865770710996986, + "learning_rate": 8.651038900621277e-06, + "loss": 0.1659, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0872340202331543, + "step": 6220, + "valid_targets_mean": 7890.8, + "valid_targets_min": 5485 + }, + { + "epoch": 5.065120065120065, + "grad_norm": 0.3847284521430894, + "learning_rate": 8.617649517610148e-06, + "loss": 0.1667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07563507556915283, + "step": 6225, + "valid_targets_mean": 7016.6, + "valid_targets_min": 4960 + }, + { + "epoch": 5.0691900691900695, + "grad_norm": 0.4213687533775985, + "learning_rate": 8.584306990600554e-06, + "loss": 0.1648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07866060733795166, + "step": 6230, + "valid_targets_mean": 6527.1, + "valid_targets_min": 5399 + }, + { + "epoch": 5.073260073260073, + "grad_norm": 0.46746065430470146, + "learning_rate": 8.55101145684864e-06, + "loss": 0.1683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07950198650360107, + "step": 6235, + "valid_targets_mean": 4995.0, + "valid_targets_min": 618 + }, + { + "epoch": 5.077330077330077, + "grad_norm": 0.42212928764720686, + "learning_rate": 8.517763053417095e-06, + "loss": 0.1823, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09315831959247589, + "step": 6240, + "valid_targets_mean": 7219.4, + "valid_targets_min": 5764 + }, + { + "epoch": 5.081400081400082, + "grad_norm": 0.4077166480983304, + "learning_rate": 8.484561917174592e-06, + "loss": 0.1477, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08582858741283417, + "step": 6245, + "valid_targets_mean": 7998.5, + "valid_targets_min": 5902 + }, + { + "epoch": 5.085470085470085, + "grad_norm": 0.3787575234434601, + "learning_rate": 8.451408184795242e-06, + "loss": 0.1541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06539824604988098, + "step": 6250, + "valid_targets_mean": 7377.4, + "valid_targets_min": 5015 + }, + { + "epoch": 5.0895400895400895, + "grad_norm": 0.3870195711380443, + "learning_rate": 8.418301992757984e-06, + "loss": 0.164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07054895162582397, + "step": 6255, + "valid_targets_mean": 6794.0, + "valid_targets_min": 5283 + }, + { + "epoch": 5.093610093610094, + "grad_norm": 0.41166952386174893, + "learning_rate": 8.385243477346095e-06, + "loss": 0.1682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09239070862531662, + "step": 6260, + "valid_targets_mean": 6656.4, + "valid_targets_min": 4364 + }, + { + "epoch": 5.097680097680097, + "grad_norm": 0.38241917148742083, + "learning_rate": 8.352232774646545e-06, + "loss": 0.1662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07691332697868347, + "step": 6265, + "valid_targets_mean": 7178.4, + "valid_targets_min": 4448 + }, + { + "epoch": 5.101750101750102, + "grad_norm": 0.4136989734963614, + "learning_rate": 8.319270020549517e-06, + "loss": 0.166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0996970385313034, + "step": 6270, + "valid_targets_mean": 7619.8, + "valid_targets_min": 5737 + }, + { + "epoch": 5.105820105820106, + "grad_norm": 0.37122078417443183, + "learning_rate": 8.286355350747795e-06, + "loss": 0.1471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07079966366291046, + "step": 6275, + "valid_targets_mean": 8754.4, + "valid_targets_min": 5752 + }, + { + "epoch": 5.1098901098901095, + "grad_norm": 0.41739380473502985, + "learning_rate": 8.253488900736226e-06, + "loss": 0.148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.077818363904953, + "step": 6280, + "valid_targets_mean": 6341.0, + "valid_targets_min": 4543 + }, + { + "epoch": 5.113960113960114, + "grad_norm": 0.41626778838918543, + "learning_rate": 8.220670805811156e-06, + "loss": 0.1421, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0813712328672409, + "step": 6285, + "valid_targets_mean": 7181.9, + "valid_targets_min": 4563 + }, + { + "epoch": 5.118030118030118, + "grad_norm": 0.7414749413994536, + "learning_rate": 8.187901201069878e-06, + "loss": 0.1678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.051455169916152954, + "step": 6290, + "valid_targets_mean": 1104.4, + "valid_targets_min": 161 + }, + { + "epoch": 5.122100122100122, + "grad_norm": 0.42212437842472816, + "learning_rate": 8.155180221410062e-06, + "loss": 0.1523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07822007685899734, + "step": 6295, + "valid_targets_mean": 7018.2, + "valid_targets_min": 5424 + }, + { + "epoch": 5.126170126170126, + "grad_norm": 0.43199230917096043, + "learning_rate": 8.12250800152923e-06, + "loss": 0.1716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08775204420089722, + "step": 6300, + "valid_targets_mean": 7738.4, + "valid_targets_min": 5329 + }, + { + "epoch": 5.13024013024013, + "grad_norm": 0.4607805236152848, + "learning_rate": 8.089884675924155e-06, + "loss": 0.1777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08426693081855774, + "step": 6305, + "valid_targets_mean": 6426.6, + "valid_targets_min": 5560 + }, + { + "epoch": 5.134310134310135, + "grad_norm": 0.42455220317990994, + "learning_rate": 8.057310378890362e-06, + "loss": 0.171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0837627574801445, + "step": 6310, + "valid_targets_mean": 6169.9, + "valid_targets_min": 3446 + }, + { + "epoch": 5.138380138380138, + "grad_norm": 0.39791893535614575, + "learning_rate": 8.024785244521528e-06, + "loss": 0.1676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08946311473846436, + "step": 6315, + "valid_targets_mean": 7204.6, + "valid_targets_min": 5381 + }, + { + "epoch": 5.1424501424501425, + "grad_norm": 0.3868007797054446, + "learning_rate": 7.99230940670896e-06, + "loss": 0.1696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08641095459461212, + "step": 6320, + "valid_targets_mean": 7600.4, + "valid_targets_min": 5241 + }, + { + "epoch": 5.146520146520147, + "grad_norm": 0.39428677674139484, + "learning_rate": 7.959882999141032e-06, + "loss": 0.1662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09060313552618027, + "step": 6325, + "valid_targets_mean": 7098.5, + "valid_targets_min": 5607 + }, + { + "epoch": 5.15059015059015, + "grad_norm": 0.4263247652696508, + "learning_rate": 7.92750615530264e-06, + "loss": 0.169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09046140313148499, + "step": 6330, + "valid_targets_mean": 6955.1, + "valid_targets_min": 5530 + }, + { + "epoch": 5.154660154660155, + "grad_norm": 0.4159005125904762, + "learning_rate": 7.895179008474634e-06, + "loss": 0.1699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08619844913482666, + "step": 6335, + "valid_targets_mean": 6748.1, + "valid_targets_min": 5364 + }, + { + "epoch": 5.158730158730159, + "grad_norm": 0.45512866927412166, + "learning_rate": 7.862901691733287e-06, + "loss": 0.169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0810968279838562, + "step": 6340, + "valid_targets_mean": 6273.9, + "valid_targets_min": 5438 + }, + { + "epoch": 5.1628001628001625, + "grad_norm": 0.4004779475142846, + "learning_rate": 7.830674337949756e-06, + "loss": 0.1653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07858355343341827, + "step": 6345, + "valid_targets_mean": 6419.5, + "valid_targets_min": 4879 + }, + { + "epoch": 5.166870166870167, + "grad_norm": 0.4558820682158631, + "learning_rate": 7.798497079789513e-06, + "loss": 0.1637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07956701517105103, + "step": 6350, + "valid_targets_mean": 6483.4, + "valid_targets_min": 4309 + }, + { + "epoch": 5.170940170940171, + "grad_norm": 0.4440243865004471, + "learning_rate": 7.76637004971182e-06, + "loss": 0.1335, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09749727696180344, + "step": 6355, + "valid_targets_mean": 6825.0, + "valid_targets_min": 5205 + }, + { + "epoch": 5.175010175010175, + "grad_norm": 0.4204178711309292, + "learning_rate": 7.734293379969157e-06, + "loss": 0.1636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07635550945997238, + "step": 6360, + "valid_targets_mean": 6729.4, + "valid_targets_min": 5200 + }, + { + "epoch": 5.179080179080179, + "grad_norm": 0.42587388736681253, + "learning_rate": 7.702267202606709e-06, + "loss": 0.1615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07823633402585983, + "step": 6365, + "valid_targets_mean": 6094.0, + "valid_targets_min": 5302 + }, + { + "epoch": 5.183150183150183, + "grad_norm": 0.40638485768353844, + "learning_rate": 7.670291649461798e-06, + "loss": 0.1661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08494766801595688, + "step": 6370, + "valid_targets_mean": 7140.0, + "valid_targets_min": 5270 + }, + { + "epoch": 5.187220187220187, + "grad_norm": 0.3845786657820816, + "learning_rate": 7.638366852163348e-06, + "loss": 0.1593, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07755722105503082, + "step": 6375, + "valid_targets_mean": 7425.0, + "valid_targets_min": 5281 + }, + { + "epoch": 5.191290191290191, + "grad_norm": 0.41277396807257505, + "learning_rate": 7.606492942131336e-06, + "loss": 0.1669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1049172580242157, + "step": 6380, + "valid_targets_mean": 7063.4, + "valid_targets_min": 5386 + }, + { + "epoch": 5.1953601953601956, + "grad_norm": 0.40861047918605575, + "learning_rate": 7.574670050576281e-06, + "loss": 0.1663, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08464010059833527, + "step": 6385, + "valid_targets_mean": 6827.0, + "valid_targets_min": 4547 + }, + { + "epoch": 5.199430199430199, + "grad_norm": 0.5620066997157668, + "learning_rate": 7.542898308498663e-06, + "loss": 0.1451, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.038255929946899414, + "step": 6390, + "valid_targets_mean": 2047.0, + "valid_targets_min": 165 + }, + { + "epoch": 5.203500203500203, + "grad_norm": 0.44632121984559203, + "learning_rate": 7.511177846688413e-06, + "loss": 0.1472, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07810096442699432, + "step": 6395, + "valid_targets_mean": 6754.1, + "valid_targets_min": 5196 + }, + { + "epoch": 5.207570207570208, + "grad_norm": 0.4574803118753031, + "learning_rate": 7.479508795724361e-06, + "loss": 0.1603, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07977963984012604, + "step": 6400, + "valid_targets_mean": 5985.6, + "valid_targets_min": 5028 + }, + { + "epoch": 5.211640211640212, + "grad_norm": 0.42827249479040463, + "learning_rate": 7.447891285973705e-06, + "loss": 0.1663, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07478997111320496, + "step": 6405, + "valid_targets_mean": 5963.9, + "valid_targets_min": 5237 + }, + { + "epoch": 5.2157102157102155, + "grad_norm": 0.4401945100045424, + "learning_rate": 7.416325447591468e-06, + "loss": 0.1622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08579155802726746, + "step": 6410, + "valid_targets_mean": 6319.0, + "valid_targets_min": 5412 + }, + { + "epoch": 5.21978021978022, + "grad_norm": 0.3872584192038548, + "learning_rate": 7.384811410519961e-06, + "loss": 0.1535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06653904169797897, + "step": 6415, + "valid_targets_mean": 7125.8, + "valid_targets_min": 5080 + }, + { + "epoch": 5.223850223850224, + "grad_norm": 0.4118913981470542, + "learning_rate": 7.353349304488251e-06, + "loss": 0.1641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07929787784814835, + "step": 6420, + "valid_targets_mean": 6123.5, + "valid_targets_min": 4521 + }, + { + "epoch": 5.227920227920228, + "grad_norm": 0.8933632330027761, + "learning_rate": 7.321939259011639e-06, + "loss": 0.1614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09677982330322266, + "step": 6425, + "valid_targets_mean": 1886.0, + "valid_targets_min": 1111 + }, + { + "epoch": 5.231990231990232, + "grad_norm": 0.7717354209997601, + "learning_rate": 7.29058140339111e-06, + "loss": 0.1527, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07828053832054138, + "step": 6430, + "valid_targets_mean": 1497.6, + "valid_targets_min": 893 + }, + { + "epoch": 5.236060236060236, + "grad_norm": 0.7156196156160548, + "learning_rate": 7.259275866712812e-06, + "loss": 0.1394, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05080351233482361, + "step": 6435, + "valid_targets_mean": 1356.0, + "valid_targets_min": 914 + }, + { + "epoch": 5.24013024013024, + "grad_norm": 0.7775879541195082, + "learning_rate": 7.22802277784751e-06, + "loss": 0.1354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05790482088923454, + "step": 6440, + "valid_targets_mean": 1324.5, + "valid_targets_min": 680 + }, + { + "epoch": 5.244200244200244, + "grad_norm": 0.7428014404555288, + "learning_rate": 7.196822265450079e-06, + "loss": 0.1342, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06787339597940445, + "step": 6445, + "valid_targets_mean": 1931.8, + "valid_targets_min": 876 + }, + { + "epoch": 5.248270248270249, + "grad_norm": 0.8246845467297721, + "learning_rate": 7.165674457958938e-06, + "loss": 0.1319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05890588089823723, + "step": 6450, + "valid_targets_mean": 1292.2, + "valid_targets_min": 734 + }, + { + "epoch": 5.252340252340252, + "grad_norm": 0.7747801015256733, + "learning_rate": 7.134579483595574e-06, + "loss": 0.1396, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05581791326403618, + "step": 6455, + "valid_targets_mean": 1469.6, + "valid_targets_min": 655 + }, + { + "epoch": 5.256410256410256, + "grad_norm": 0.7889248714375038, + "learning_rate": 7.10353747036395e-06, + "loss": 0.1197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07032492756843567, + "step": 6460, + "valid_targets_mean": 1548.4, + "valid_targets_min": 737 + }, + { + "epoch": 5.260480260480261, + "grad_norm": 0.7576401372568481, + "learning_rate": 7.072548546050038e-06, + "loss": 0.1395, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.056093454360961914, + "step": 6465, + "valid_targets_mean": 1316.5, + "valid_targets_min": 759 + }, + { + "epoch": 5.264550264550264, + "grad_norm": 0.8148494000497493, + "learning_rate": 7.041612838221257e-06, + "loss": 0.1288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0493401475250721, + "step": 6470, + "valid_targets_mean": 1249.6, + "valid_targets_min": 753 + }, + { + "epoch": 5.2686202686202686, + "grad_norm": 0.8821896425064455, + "learning_rate": 7.010730474225958e-06, + "loss": 0.131, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0843823179602623, + "step": 6475, + "valid_targets_mean": 1833.0, + "valid_targets_min": 927 + }, + { + "epoch": 5.272690272690273, + "grad_norm": 0.8128938462953682, + "learning_rate": 6.979901581192903e-06, + "loss": 0.1297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06127440556883812, + "step": 6480, + "valid_targets_mean": 1338.4, + "valid_targets_min": 679 + }, + { + "epoch": 5.276760276760276, + "grad_norm": 0.8516276560604461, + "learning_rate": 6.949126286030739e-06, + "loss": 0.1313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06296617537736893, + "step": 6485, + "valid_targets_mean": 1244.6, + "valid_targets_min": 771 + }, + { + "epoch": 5.280830280830281, + "grad_norm": 0.7641818533358684, + "learning_rate": 6.91840471542746e-06, + "loss": 0.1159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05841711908578873, + "step": 6490, + "valid_targets_mean": 1732.5, + "valid_targets_min": 986 + }, + { + "epoch": 5.284900284900285, + "grad_norm": 0.7751581720642792, + "learning_rate": 6.887736995849925e-06, + "loss": 0.1226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.054849620908498764, + "step": 6495, + "valid_targets_mean": 1389.9, + "valid_targets_min": 1103 + }, + { + "epoch": 5.2889702889702885, + "grad_norm": 0.8647625768666302, + "learning_rate": 6.857123253543286e-06, + "loss": 0.1249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06798947602510452, + "step": 6500, + "valid_targets_mean": 1593.2, + "valid_targets_min": 675 + }, + { + "epoch": 5.293040293040293, + "grad_norm": 0.8101312346504578, + "learning_rate": 6.826563614530511e-06, + "loss": 0.131, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07953788340091705, + "step": 6505, + "valid_targets_mean": 1926.6, + "valid_targets_min": 974 + }, + { + "epoch": 5.297110297110297, + "grad_norm": 0.8479150840387949, + "learning_rate": 6.7960582046118505e-06, + "loss": 0.1282, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07549211382865906, + "step": 6510, + "valid_targets_mean": 1953.1, + "valid_targets_min": 869 + }, + { + "epoch": 5.301180301180302, + "grad_norm": 0.7598838779769721, + "learning_rate": 6.765607149364313e-06, + "loss": 0.1302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.053766507655382156, + "step": 6515, + "valid_targets_mean": 1327.2, + "valid_targets_min": 1023 + }, + { + "epoch": 5.305250305250305, + "grad_norm": 1.6475991371156928, + "learning_rate": 6.735210574141158e-06, + "loss": 0.1296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06451068818569183, + "step": 6520, + "valid_targets_mean": 1717.9, + "valid_targets_min": 709 + }, + { + "epoch": 5.309320309320309, + "grad_norm": 0.8012862936876622, + "learning_rate": 6.704868604071362e-06, + "loss": 0.1261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05108807235956192, + "step": 6525, + "valid_targets_mean": 1139.9, + "valid_targets_min": 658 + }, + { + "epoch": 5.313390313390314, + "grad_norm": 1.10291965479662, + "learning_rate": 6.674581364059138e-06, + "loss": 0.1276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06254150718450546, + "step": 6530, + "valid_targets_mean": 1444.8, + "valid_targets_min": 662 + }, + { + "epoch": 5.317460317460317, + "grad_norm": 0.7975798780270608, + "learning_rate": 6.644348978783375e-06, + "loss": 0.1248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.045557815581560135, + "step": 6535, + "valid_targets_mean": 1242.2, + "valid_targets_min": 859 + }, + { + "epoch": 5.321530321530322, + "grad_norm": 0.808432619410068, + "learning_rate": 6.614171572697172e-06, + "loss": 0.1139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06855520606040955, + "step": 6540, + "valid_targets_mean": 1569.2, + "valid_targets_min": 950 + }, + { + "epoch": 5.325600325600326, + "grad_norm": 0.8165752644870131, + "learning_rate": 6.584049270027291e-06, + "loss": 0.1206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.058723099529743195, + "step": 6545, + "valid_targets_mean": 1539.8, + "valid_targets_min": 686 + }, + { + "epoch": 5.329670329670329, + "grad_norm": 0.8800699749983475, + "learning_rate": 6.553982194773663e-06, + "loss": 0.1238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07176946103572845, + "step": 6550, + "valid_targets_mean": 1610.6, + "valid_targets_min": 702 + }, + { + "epoch": 5.333740333740334, + "grad_norm": 0.7914141839534132, + "learning_rate": 6.523970470708874e-06, + "loss": 0.1199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04290122538805008, + "step": 6555, + "valid_targets_mean": 1190.4, + "valid_targets_min": 779 + }, + { + "epoch": 5.337810337810338, + "grad_norm": 0.8735942419573629, + "learning_rate": 6.494014221377654e-06, + "loss": 0.1315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06523449718952179, + "step": 6560, + "valid_targets_mean": 1467.1, + "valid_targets_min": 822 + }, + { + "epoch": 5.3418803418803416, + "grad_norm": 0.7904142925153533, + "learning_rate": 6.4641135700963555e-06, + "loss": 0.1188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0542932003736496, + "step": 6565, + "valid_targets_mean": 1350.5, + "valid_targets_min": 868 + }, + { + "epoch": 5.345950345950346, + "grad_norm": 0.8188017954142414, + "learning_rate": 6.434268639952482e-06, + "loss": 0.1246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.052746959030628204, + "step": 6570, + "valid_targets_mean": 1283.5, + "valid_targets_min": 781 + }, + { + "epoch": 5.35002035002035, + "grad_norm": 0.8883502243806306, + "learning_rate": 6.4044795538041325e-06, + "loss": 0.1202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.052638888359069824, + "step": 6575, + "valid_targets_mean": 1303.2, + "valid_targets_min": 793 + }, + { + "epoch": 5.354090354090354, + "grad_norm": 0.7346427480548663, + "learning_rate": 6.374746434279542e-06, + "loss": 0.1218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0480545237660408, + "step": 6580, + "valid_targets_mean": 1309.1, + "valid_targets_min": 806 + }, + { + "epoch": 5.358160358160358, + "grad_norm": 0.7749262088030695, + "learning_rate": 6.345069403776547e-06, + "loss": 0.1199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05063071846961975, + "step": 6585, + "valid_targets_mean": 1397.6, + "valid_targets_min": 803 + }, + { + "epoch": 5.362230362230362, + "grad_norm": 0.8904924291717923, + "learning_rate": 6.3154485844620935e-06, + "loss": 0.1202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.045979805290699005, + "step": 6590, + "valid_targets_mean": 1187.8, + "valid_targets_min": 660 + }, + { + "epoch": 5.366300366300366, + "grad_norm": 0.8134025550512829, + "learning_rate": 6.285884098271739e-06, + "loss": 0.1097, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05374240130186081, + "step": 6595, + "valid_targets_mean": 1547.0, + "valid_targets_min": 853 + }, + { + "epoch": 5.37037037037037, + "grad_norm": 0.8806055042632812, + "learning_rate": 6.25637606690912e-06, + "loss": 0.122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06583143770694733, + "step": 6600, + "valid_targets_mean": 1704.2, + "valid_targets_min": 719 + }, + { + "epoch": 5.374440374440375, + "grad_norm": 0.9022742278146144, + "learning_rate": 6.226924611845495e-06, + "loss": 0.1209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04527607560157776, + "step": 6605, + "valid_targets_mean": 1255.0, + "valid_targets_min": 765 + }, + { + "epoch": 5.378510378510379, + "grad_norm": 0.8451608246424978, + "learning_rate": 6.197529854319222e-06, + "loss": 0.1181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06376711279153824, + "step": 6610, + "valid_targets_mean": 1550.8, + "valid_targets_min": 875 + }, + { + "epoch": 5.382580382580382, + "grad_norm": 0.8477923753137665, + "learning_rate": 6.168191915335242e-06, + "loss": 0.1162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06021396815776825, + "step": 6615, + "valid_targets_mean": 1362.0, + "valid_targets_min": 739 + }, + { + "epoch": 5.386650386650387, + "grad_norm": 0.8811854153496774, + "learning_rate": 6.138910915664624e-06, + "loss": 0.1238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.069005087018013, + "step": 6620, + "valid_targets_mean": 1666.8, + "valid_targets_min": 760 + }, + { + "epoch": 5.390720390720391, + "grad_norm": 0.8205504832118758, + "learning_rate": 6.109686975844029e-06, + "loss": 0.1173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05043880641460419, + "step": 6625, + "valid_targets_mean": 1319.6, + "valid_targets_min": 1046 + }, + { + "epoch": 5.394790394790395, + "grad_norm": 1.1123497884965143, + "learning_rate": 6.080520216175236e-06, + "loss": 0.1249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06800902634859085, + "step": 6630, + "valid_targets_mean": 1727.9, + "valid_targets_min": 851 + }, + { + "epoch": 5.398860398860399, + "grad_norm": 0.8955095197538415, + "learning_rate": 6.051410756724638e-06, + "loss": 0.1276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06400962918996811, + "step": 6635, + "valid_targets_mean": 1643.2, + "valid_targets_min": 907 + }, + { + "epoch": 5.402930402930403, + "grad_norm": 0.867837643579163, + "learning_rate": 6.022358717322734e-06, + "loss": 0.135, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05850240960717201, + "step": 6640, + "valid_targets_mean": 1478.5, + "valid_targets_min": 764 + }, + { + "epoch": 5.407000407000407, + "grad_norm": 0.7621902905404546, + "learning_rate": 5.993364217563671e-06, + "loss": 0.1176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05375190079212189, + "step": 6645, + "valid_targets_mean": 1522.6, + "valid_targets_min": 822 + }, + { + "epoch": 5.411070411070411, + "grad_norm": 0.8033822439060038, + "learning_rate": 5.964427376804726e-06, + "loss": 0.1177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06127481535077095, + "step": 6650, + "valid_targets_mean": 1589.0, + "valid_targets_min": 982 + }, + { + "epoch": 5.415140415140415, + "grad_norm": 0.7789522955304922, + "learning_rate": 5.935548314165809e-06, + "loss": 0.1247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05730774253606796, + "step": 6655, + "valid_targets_mean": 1574.8, + "valid_targets_min": 816 + }, + { + "epoch": 5.419210419210419, + "grad_norm": 0.7825880008149688, + "learning_rate": 5.9067271485289945e-06, + "loss": 0.1141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05469139292836189, + "step": 6660, + "valid_targets_mean": 1422.0, + "valid_targets_min": 721 + }, + { + "epoch": 5.423280423280423, + "grad_norm": 0.8034253037962921, + "learning_rate": 5.877963998538019e-06, + "loss": 0.1138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04614481329917908, + "step": 6665, + "valid_targets_mean": 1150.6, + "valid_targets_min": 741 + }, + { + "epoch": 5.427350427350428, + "grad_norm": 0.8550910565715886, + "learning_rate": 5.849258982597801e-06, + "loss": 0.1183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06886758655309677, + "step": 6670, + "valid_targets_mean": 1684.2, + "valid_targets_min": 734 + }, + { + "epoch": 5.431420431420431, + "grad_norm": 0.9381725661390383, + "learning_rate": 5.820612218873927e-06, + "loss": 0.1204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07469278573989868, + "step": 6675, + "valid_targets_mean": 1937.1, + "valid_targets_min": 744 + }, + { + "epoch": 5.435490435490435, + "grad_norm": 0.8330098856323024, + "learning_rate": 5.792023825292201e-06, + "loss": 0.1207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06777373701334, + "step": 6680, + "valid_targets_mean": 1842.2, + "valid_targets_min": 620 + }, + { + "epoch": 5.43956043956044, + "grad_norm": 0.8473418521641808, + "learning_rate": 5.763493919538154e-06, + "loss": 0.1161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06759806722402573, + "step": 6685, + "valid_targets_mean": 1637.6, + "valid_targets_min": 785 + }, + { + "epoch": 5.443630443630443, + "grad_norm": 0.8280879497111717, + "learning_rate": 5.735022619056521e-06, + "loss": 0.1206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06591871380805969, + "step": 6690, + "valid_targets_mean": 1969.9, + "valid_targets_min": 740 + }, + { + "epoch": 5.447700447700448, + "grad_norm": 0.8139854030624378, + "learning_rate": 5.706610041050806e-06, + "loss": 0.1152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05533464252948761, + "step": 6695, + "valid_targets_mean": 1432.1, + "valid_targets_min": 711 + }, + { + "epoch": 5.451770451770452, + "grad_norm": 0.8118262581872678, + "learning_rate": 5.678256302482772e-06, + "loss": 0.1224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06479828804731369, + "step": 6700, + "valid_targets_mean": 1695.6, + "valid_targets_min": 1122 + }, + { + "epoch": 5.455840455840455, + "grad_norm": 0.7908183689246445, + "learning_rate": 5.6499615200719735e-06, + "loss": 0.1174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06296779215335846, + "step": 6705, + "valid_targets_mean": 1524.5, + "valid_targets_min": 722 + }, + { + "epoch": 5.45991045991046, + "grad_norm": 0.8157309366372941, + "learning_rate": 5.621725810295264e-06, + "loss": 0.1128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05387778580188751, + "step": 6710, + "valid_targets_mean": 1289.2, + "valid_targets_min": 661 + }, + { + "epoch": 5.463980463980464, + "grad_norm": 0.7987860552011007, + "learning_rate": 5.593549289386315e-06, + "loss": 0.1107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.055739592760801315, + "step": 6715, + "valid_targets_mean": 1519.5, + "valid_targets_min": 1011 + }, + { + "epoch": 5.4680504680504685, + "grad_norm": 0.8385843690819885, + "learning_rate": 5.565432073335153e-06, + "loss": 0.1237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0668364018201828, + "step": 6720, + "valid_targets_mean": 1731.4, + "valid_targets_min": 1277 + }, + { + "epoch": 5.472120472120472, + "grad_norm": 0.7910981905181974, + "learning_rate": 5.537374277887677e-06, + "loss": 0.1143, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05977032706141472, + "step": 6725, + "valid_targets_mean": 1486.1, + "valid_targets_min": 807 + }, + { + "epoch": 5.476190476190476, + "grad_norm": 0.9696747865228715, + "learning_rate": 5.509376018545161e-06, + "loss": 0.1154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.057927779853343964, + "step": 6730, + "valid_targets_mean": 1549.9, + "valid_targets_min": 679 + }, + { + "epoch": 5.480260480260481, + "grad_norm": 0.8421335406537752, + "learning_rate": 5.481437410563813e-06, + "loss": 0.1212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.060152310878038406, + "step": 6735, + "valid_targets_mean": 1656.0, + "valid_targets_min": 1294 + }, + { + "epoch": 5.484330484330484, + "grad_norm": 0.7856036336209856, + "learning_rate": 5.4535585689542735e-06, + "loss": 0.118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.054788943380117416, + "step": 6740, + "valid_targets_mean": 1368.8, + "valid_targets_min": 790 + }, + { + "epoch": 5.488400488400488, + "grad_norm": 0.827864917037226, + "learning_rate": 5.4257396084811665e-06, + "loss": 0.1177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06671278178691864, + "step": 6745, + "valid_targets_mean": 1765.8, + "valid_targets_min": 592 + }, + { + "epoch": 5.492470492470493, + "grad_norm": 0.8303973263376846, + "learning_rate": 5.397980643662586e-06, + "loss": 0.1178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06511053442955017, + "step": 6750, + "valid_targets_mean": 1586.8, + "valid_targets_min": 662 + }, + { + "epoch": 5.496540496540496, + "grad_norm": 0.8780460301609091, + "learning_rate": 5.370281788769673e-06, + "loss": 0.1163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05283388867974281, + "step": 6755, + "valid_targets_mean": 1336.9, + "valid_targets_min": 622 + }, + { + "epoch": 5.500610500610501, + "grad_norm": 0.8240875330044186, + "learning_rate": 5.342643157826117e-06, + "loss": 0.1131, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03851066157221794, + "step": 6760, + "valid_targets_mean": 1097.1, + "valid_targets_min": 625 + }, + { + "epoch": 5.504680504680505, + "grad_norm": 0.8739039631522074, + "learning_rate": 5.315064864607695e-06, + "loss": 0.1256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05119139701128006, + "step": 6765, + "valid_targets_mean": 1484.9, + "valid_targets_min": 888 + }, + { + "epoch": 5.508750508750508, + "grad_norm": 0.8003068896738135, + "learning_rate": 5.287547022641788e-06, + "loss": 0.1228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06188352033495903, + "step": 6770, + "valid_targets_mean": 1757.0, + "valid_targets_min": 635 + }, + { + "epoch": 5.512820512820513, + "grad_norm": 0.927599280836426, + "learning_rate": 5.260089745206942e-06, + "loss": 0.118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06444002687931061, + "step": 6775, + "valid_targets_mean": 1476.1, + "valid_targets_min": 522 + }, + { + "epoch": 5.516890516890517, + "grad_norm": 0.824485847569823, + "learning_rate": 5.232693145332379e-06, + "loss": 0.1168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04994414374232292, + "step": 6780, + "valid_targets_mean": 1230.9, + "valid_targets_min": 697 + }, + { + "epoch": 5.520960520960521, + "grad_norm": 0.8214091658820951, + "learning_rate": 5.205357335797545e-06, + "loss": 0.1103, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06151646375656128, + "step": 6785, + "valid_targets_mean": 1671.0, + "valid_targets_min": 1010 + }, + { + "epoch": 5.525030525030525, + "grad_norm": 0.880824287835339, + "learning_rate": 5.178082429131628e-06, + "loss": 0.1122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.055703192949295044, + "step": 6790, + "valid_targets_mean": 1411.0, + "valid_targets_min": 769 + }, + { + "epoch": 5.529100529100529, + "grad_norm": 0.8856311775898162, + "learning_rate": 5.150868537613114e-06, + "loss": 0.1171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04835759103298187, + "step": 6795, + "valid_targets_mean": 1298.6, + "valid_targets_min": 745 + }, + { + "epoch": 5.533170533170534, + "grad_norm": 0.8530843893172676, + "learning_rate": 5.123715773269318e-06, + "loss": 0.1102, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.043554842472076416, + "step": 6800, + "valid_targets_mean": 1118.9, + "valid_targets_min": 680 + }, + { + "epoch": 5.537240537240537, + "grad_norm": 0.8234954694495478, + "learning_rate": 5.096624247875925e-06, + "loss": 0.1146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07579924166202545, + "step": 6805, + "valid_targets_mean": 1905.9, + "valid_targets_min": 1347 + }, + { + "epoch": 5.5413105413105415, + "grad_norm": 0.9238596830777893, + "learning_rate": 5.069594072956512e-06, + "loss": 0.1211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08299441635608673, + "step": 6810, + "valid_targets_mean": 1636.0, + "valid_targets_min": 851 + }, + { + "epoch": 5.545380545380546, + "grad_norm": 0.8280105418368178, + "learning_rate": 5.042625359782118e-06, + "loss": 0.1138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04978195205330849, + "step": 6815, + "valid_targets_mean": 1457.2, + "valid_targets_min": 903 + }, + { + "epoch": 5.549450549450549, + "grad_norm": 0.8327669041192984, + "learning_rate": 5.015718219370775e-06, + "loss": 0.1146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06120377779006958, + "step": 6820, + "valid_targets_mean": 1674.4, + "valid_targets_min": 1240 + }, + { + "epoch": 5.553520553520554, + "grad_norm": 0.6091529761830878, + "learning_rate": 4.988872762487029e-06, + "loss": 0.1153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.047283854335546494, + "step": 6825, + "valid_targets_mean": 1697.1, + "valid_targets_min": 917 + }, + { + "epoch": 5.557590557590558, + "grad_norm": 0.7831969192018885, + "learning_rate": 4.962089099641518e-06, + "loss": 0.1051, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05221349745988846, + "step": 6830, + "valid_targets_mean": 1580.9, + "valid_targets_min": 930 + }, + { + "epoch": 5.561660561660561, + "grad_norm": 0.784999322544449, + "learning_rate": 4.935367341090498e-06, + "loss": 0.1161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.059659071266651154, + "step": 6835, + "valid_targets_mean": 1655.1, + "valid_targets_min": 1227 + }, + { + "epoch": 5.565730565730566, + "grad_norm": 0.8561304848098958, + "learning_rate": 4.908707596835396e-06, + "loss": 0.1101, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05701415240764618, + "step": 6840, + "valid_targets_mean": 1565.8, + "valid_targets_min": 870 + }, + { + "epoch": 5.56980056980057, + "grad_norm": 0.8237448435276692, + "learning_rate": 4.882109976622353e-06, + "loss": 0.114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05764477699995041, + "step": 6845, + "valid_targets_mean": 1564.6, + "valid_targets_min": 822 + }, + { + "epoch": 5.573870573870574, + "grad_norm": 0.9077557622146745, + "learning_rate": 4.855574589941763e-06, + "loss": 0.1199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05498945713043213, + "step": 6850, + "valid_targets_mean": 1540.5, + "valid_targets_min": 1214 + }, + { + "epoch": 5.577940577940578, + "grad_norm": 0.8208304804405712, + "learning_rate": 4.829101546027843e-06, + "loss": 0.1097, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05034732073545456, + "step": 6855, + "valid_targets_mean": 1268.6, + "valid_targets_min": 591 + }, + { + "epoch": 5.582010582010582, + "grad_norm": 0.8341724291260191, + "learning_rate": 4.80269095385818e-06, + "loss": 0.1156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.045787129551172256, + "step": 6860, + "valid_targets_mean": 1232.5, + "valid_targets_min": 646 + }, + { + "epoch": 5.586080586080586, + "grad_norm": 0.6220953592726438, + "learning_rate": 4.776342922153252e-06, + "loss": 0.1074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04945782572031021, + "step": 6865, + "valid_targets_mean": 3032.8, + "valid_targets_min": 2115 + }, + { + "epoch": 5.59015059015059, + "grad_norm": 0.514602989750909, + "learning_rate": 4.750057559376027e-06, + "loss": 0.0989, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03673814609646797, + "step": 6870, + "valid_targets_mean": 2553.1, + "valid_targets_min": 324 + }, + { + "epoch": 5.5942205942205945, + "grad_norm": 0.4451969667171798, + "learning_rate": 4.72383497373148e-06, + "loss": 0.0786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04091775789856911, + "step": 6875, + "valid_targets_mean": 2212.5, + "valid_targets_min": 982 + }, + { + "epoch": 5.598290598290598, + "grad_norm": 0.6233305674965636, + "learning_rate": 4.6976752731661755e-06, + "loss": 0.0736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.042984090745449066, + "step": 6880, + "valid_targets_mean": 3619.4, + "valid_targets_min": 2479 + }, + { + "epoch": 5.602360602360602, + "grad_norm": 0.3301906624858536, + "learning_rate": 4.671578565367783e-06, + "loss": 0.0838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.027935273945331573, + "step": 6885, + "valid_targets_mean": 3030.5, + "valid_targets_min": 811 + }, + { + "epoch": 5.606430606430607, + "grad_norm": 0.3998270887471306, + "learning_rate": 4.645544957764683e-06, + "loss": 0.0766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.041048966348171234, + "step": 6890, + "valid_targets_mean": 2922.1, + "valid_targets_min": 1431 + }, + { + "epoch": 5.61050061050061, + "grad_norm": 0.7362755403982966, + "learning_rate": 4.619574557525497e-06, + "loss": 0.1112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09079274535179138, + "step": 6895, + "valid_targets_mean": 1991.0, + "valid_targets_min": 703 + }, + { + "epoch": 5.6145706145706145, + "grad_norm": 0.3819808054209917, + "learning_rate": 4.5936674715586335e-06, + "loss": 0.0923, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.045745231211185455, + "step": 6900, + "valid_targets_mean": 3251.1, + "valid_targets_min": 621 + }, + { + "epoch": 5.618640618640619, + "grad_norm": 0.4913669321147579, + "learning_rate": 4.567823806511882e-06, + "loss": 0.0844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0513700470328331, + "step": 6905, + "valid_targets_mean": 2681.1, + "valid_targets_min": 846 + }, + { + "epoch": 5.622710622710622, + "grad_norm": 0.46623010414455635, + "learning_rate": 4.542043668771956e-06, + "loss": 0.1154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05827459692955017, + "step": 6910, + "valid_targets_mean": 3618.6, + "valid_targets_min": 1369 + }, + { + "epoch": 5.626780626780627, + "grad_norm": 0.40350792589628737, + "learning_rate": 4.516327164464045e-06, + "loss": 0.0608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03749353438615799, + "step": 6915, + "valid_targets_mean": 2980.5, + "valid_targets_min": 754 + }, + { + "epoch": 5.630850630850631, + "grad_norm": 0.44644536414753183, + "learning_rate": 4.490674399451404e-06, + "loss": 0.0748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04275457561016083, + "step": 6920, + "valid_targets_mean": 2610.8, + "valid_targets_min": 776 + }, + { + "epoch": 5.634920634920634, + "grad_norm": 0.3716682275188854, + "learning_rate": 4.465085479334881e-06, + "loss": 0.0738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04192643612623215, + "step": 6925, + "valid_targets_mean": 3132.1, + "valid_targets_min": 754 + }, + { + "epoch": 5.638990638990639, + "grad_norm": 0.6737602005566654, + "learning_rate": 4.439560509452521e-06, + "loss": 0.1201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1757407784461975, + "step": 6930, + "valid_targets_mean": 2020.8, + "valid_targets_min": 664 + }, + { + "epoch": 5.643060643060643, + "grad_norm": 0.38494643743663376, + "learning_rate": 4.414099594879116e-06, + "loss": 0.0683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04006730392575264, + "step": 6935, + "valid_targets_mean": 3713.8, + "valid_targets_min": 2542 + }, + { + "epoch": 5.6471306471306475, + "grad_norm": 0.42182454234126665, + "learning_rate": 4.388702840425747e-06, + "loss": 0.0745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.050172992050647736, + "step": 6940, + "valid_targets_mean": 3191.9, + "valid_targets_min": 1462 + }, + { + "epoch": 5.651200651200651, + "grad_norm": 0.42723433064009736, + "learning_rate": 4.363370350639405e-06, + "loss": 0.0672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.045071978121995926, + "step": 6945, + "valid_targets_mean": 3721.0, + "valid_targets_min": 3281 + }, + { + "epoch": 5.655270655270655, + "grad_norm": 0.4697371582365796, + "learning_rate": 4.338102229802519e-06, + "loss": 0.0768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.037965573370456696, + "step": 6950, + "valid_targets_mean": 3360.4, + "valid_targets_min": 2189 + }, + { + "epoch": 5.65934065934066, + "grad_norm": 0.3866243720482525, + "learning_rate": 4.312898581932543e-06, + "loss": 0.0754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.033566609025001526, + "step": 6955, + "valid_targets_mean": 3223.9, + "valid_targets_min": 1590 + }, + { + "epoch": 5.663410663410663, + "grad_norm": 0.5221435785464758, + "learning_rate": 4.287759510781531e-06, + "loss": 0.0876, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05182216316461563, + "step": 6960, + "valid_targets_mean": 2352.1, + "valid_targets_min": 597 + }, + { + "epoch": 5.6674806674806675, + "grad_norm": 0.4315745464727039, + "learning_rate": 4.262685119835681e-06, + "loss": 0.0824, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04116567224264145, + "step": 6965, + "valid_targets_mean": 2612.9, + "valid_targets_min": 701 + }, + { + "epoch": 5.671550671550672, + "grad_norm": 0.5037919896206252, + "learning_rate": 4.237675512314963e-06, + "loss": 0.112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08254027366638184, + "step": 6970, + "valid_targets_mean": 2791.8, + "valid_targets_min": 1652 + }, + { + "epoch": 5.675620675620675, + "grad_norm": 0.37428492562737725, + "learning_rate": 4.212730791172637e-06, + "loss": 0.0828, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.030793912708759308, + "step": 6975, + "valid_targets_mean": 3704.5, + "valid_targets_min": 1908 + }, + { + "epoch": 5.67969067969068, + "grad_norm": 0.734812670763522, + "learning_rate": 4.1878510590948675e-06, + "loss": 0.0707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.030109543353319168, + "step": 6980, + "valid_targets_mean": 3204.0, + "valid_targets_min": 842 + }, + { + "epoch": 5.683760683760684, + "grad_norm": 0.4553294615748584, + "learning_rate": 4.163036418500288e-06, + "loss": 0.1071, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0399131178855896, + "step": 6985, + "valid_targets_mean": 3402.9, + "valid_targets_min": 2552 + }, + { + "epoch": 5.6878306878306875, + "grad_norm": 0.3690385553206738, + "learning_rate": 4.138286971539578e-06, + "loss": 0.0883, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.030925989151000977, + "step": 6990, + "valid_targets_mean": 2979.9, + "valid_targets_min": 753 + }, + { + "epoch": 5.691900691900692, + "grad_norm": 0.5807379632235751, + "learning_rate": 4.113602820095046e-06, + "loss": 0.0862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05012385547161102, + "step": 6995, + "valid_targets_mean": 1296.9, + "valid_targets_min": 568 + }, + { + "epoch": 5.695970695970696, + "grad_norm": 0.5547047028197435, + "learning_rate": 4.088984065780211e-06, + "loss": 0.2639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10247977077960968, + "step": 7000, + "valid_targets_mean": 3126.9, + "valid_targets_min": 1070 + }, + { + "epoch": 5.7000407000407005, + "grad_norm": 0.5537368442018135, + "learning_rate": 4.064430809939366e-06, + "loss": 0.0833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.041642434895038605, + "step": 7005, + "valid_targets_mean": 2456.6, + "valid_targets_min": 329 + }, + { + "epoch": 5.704110704110704, + "grad_norm": 0.6592126673581086, + "learning_rate": 4.039943153647199e-06, + "loss": 0.0868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04998764768242836, + "step": 7010, + "valid_targets_mean": 1027.8, + "valid_targets_min": 608 + }, + { + "epoch": 5.708180708180708, + "grad_norm": 0.460024733008231, + "learning_rate": 4.015521197708332e-06, + "loss": 0.1726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.048872776329517365, + "step": 7015, + "valid_targets_mean": 4479.8, + "valid_targets_min": 2772 + }, + { + "epoch": 5.712250712250713, + "grad_norm": 0.27240175134306893, + "learning_rate": 3.9911650426569435e-06, + "loss": 0.0681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.022211387753486633, + "step": 7020, + "valid_targets_mean": 4843.0, + "valid_targets_min": 3778 + }, + { + "epoch": 5.716320716320716, + "grad_norm": 0.679333960574683, + "learning_rate": 3.966874788756334e-06, + "loss": 0.0857, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03754306584596634, + "step": 7025, + "valid_targets_mean": 4306.0, + "valid_targets_min": 2336 + }, + { + "epoch": 5.7203907203907205, + "grad_norm": 0.4459001490448593, + "learning_rate": 3.942650535998524e-06, + "loss": 0.0806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04726475849747658, + "step": 7030, + "valid_targets_mean": 3579.5, + "valid_targets_min": 846 + }, + { + "epoch": 5.724460724460725, + "grad_norm": 0.44157380368224075, + "learning_rate": 3.9184923841038295e-06, + "loss": 0.0716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04266761243343353, + "step": 7035, + "valid_targets_mean": 2909.1, + "valid_targets_min": 798 + }, + { + "epoch": 5.728530728530728, + "grad_norm": 0.4449017607427598, + "learning_rate": 3.894400432520469e-06, + "loss": 0.0689, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02835097163915634, + "step": 7040, + "valid_targets_mean": 2008.5, + "valid_targets_min": 533 + }, + { + "epoch": 5.732600732600733, + "grad_norm": 0.4703351541184157, + "learning_rate": 3.870374780424131e-06, + "loss": 0.0723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03980240598320961, + "step": 7045, + "valid_targets_mean": 2598.2, + "valid_targets_min": 833 + }, + { + "epoch": 5.736670736670737, + "grad_norm": 0.3551380809469693, + "learning_rate": 3.846415526717582e-06, + "loss": 0.0685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0330246202647686, + "step": 7050, + "valid_targets_mean": 4031.8, + "valid_targets_min": 3391 + }, + { + "epoch": 5.7407407407407405, + "grad_norm": 0.5831858604258909, + "learning_rate": 3.8225227700302616e-06, + "loss": 0.0848, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10323013365268707, + "step": 7055, + "valid_targets_mean": 1909.9, + "valid_targets_min": 732 + }, + { + "epoch": 5.744810744810745, + "grad_norm": 0.5066452107972234, + "learning_rate": 3.7986966087178733e-06, + "loss": 0.0908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03759615123271942, + "step": 7060, + "valid_targets_mean": 1908.9, + "valid_targets_min": 538 + }, + { + "epoch": 5.748880748880749, + "grad_norm": 0.528867423320594, + "learning_rate": 3.7749371408619718e-06, + "loss": 0.0595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.032433267682790756, + "step": 7065, + "valid_targets_mean": 824.9, + "valid_targets_min": 720 + }, + { + "epoch": 5.752950752950753, + "grad_norm": 0.41003167533389645, + "learning_rate": 3.751244464269568e-06, + "loss": 0.0647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0331251323223114, + "step": 7070, + "valid_targets_mean": 3181.1, + "valid_targets_min": 2328 + }, + { + "epoch": 5.757020757020757, + "grad_norm": 0.48249588908464164, + "learning_rate": 3.727618676472724e-06, + "loss": 0.0628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.028061306104063988, + "step": 7075, + "valid_targets_mean": 2581.4, + "valid_targets_min": 684 + }, + { + "epoch": 5.761090761090761, + "grad_norm": 0.444459931043437, + "learning_rate": 3.704059874728141e-06, + "loss": 0.1082, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04039149731397629, + "step": 7080, + "valid_targets_mean": 3461.2, + "valid_targets_min": 2795 + }, + { + "epoch": 5.765160765160765, + "grad_norm": 0.43861111289426047, + "learning_rate": 3.680568156016786e-06, + "loss": 0.0816, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0352468304336071, + "step": 7085, + "valid_targets_mean": 3382.6, + "valid_targets_min": 744 + }, + { + "epoch": 5.769230769230769, + "grad_norm": 0.3633878194360417, + "learning_rate": 3.6571436170434547e-06, + "loss": 0.0859, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02400396391749382, + "step": 7090, + "valid_targets_mean": 3966.6, + "valid_targets_min": 3279 + }, + { + "epoch": 5.7733007733007735, + "grad_norm": 0.6690219140055973, + "learning_rate": 3.633786354236415e-06, + "loss": 0.0817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.035930804908275604, + "step": 7095, + "valid_targets_mean": 792.1, + "valid_targets_min": 536 + }, + { + "epoch": 5.777370777370777, + "grad_norm": 0.6422113838762212, + "learning_rate": 3.6104964637469755e-06, + "loss": 0.1166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04105771332979202, + "step": 7100, + "valid_targets_mean": 2227.4, + "valid_targets_min": 853 + }, + { + "epoch": 5.781440781440781, + "grad_norm": 0.4559712074457705, + "learning_rate": 3.5872740414491093e-06, + "loss": 0.0771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04004089534282684, + "step": 7105, + "valid_targets_mean": 3105.6, + "valid_targets_min": 2621 + }, + { + "epoch": 5.785510785510786, + "grad_norm": 0.6237273326303014, + "learning_rate": 3.564119182939052e-06, + "loss": 0.1027, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04093822091817856, + "step": 7110, + "valid_targets_mean": 1028.0, + "valid_targets_min": 728 + }, + { + "epoch": 5.789580789580789, + "grad_norm": 0.5635603145419109, + "learning_rate": 3.541031983534915e-06, + "loss": 0.0706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04176001995801926, + "step": 7115, + "valid_targets_mean": 1574.2, + "valid_targets_min": 618 + }, + { + "epoch": 5.7936507936507935, + "grad_norm": 0.36319222074079344, + "learning_rate": 3.5180125382762674e-06, + "loss": 0.0744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.042091649025678635, + "step": 7120, + "valid_targets_mean": 4135.0, + "valid_targets_min": 907 + }, + { + "epoch": 5.797720797720798, + "grad_norm": 0.6553513990147538, + "learning_rate": 3.4950609419237956e-06, + "loss": 0.0831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0537349097430706, + "step": 7125, + "valid_targets_mean": 3060.8, + "valid_targets_min": 580 + }, + { + "epoch": 5.801790801790801, + "grad_norm": 0.3897873956243497, + "learning_rate": 3.4721772889588533e-06, + "loss": 0.0642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.031878359615802765, + "step": 7130, + "valid_targets_mean": 3413.8, + "valid_targets_min": 726 + }, + { + "epoch": 5.805860805860806, + "grad_norm": 0.7307214688735437, + "learning_rate": 3.4493616735831205e-06, + "loss": 0.0627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0403984859585762, + "step": 7135, + "valid_targets_mean": 4560.9, + "valid_targets_min": 2613 + }, + { + "epoch": 5.80993080993081, + "grad_norm": 0.4526517648913433, + "learning_rate": 3.4266141897181917e-06, + "loss": 0.0721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04063914343714714, + "step": 7140, + "valid_targets_mean": 2847.2, + "valid_targets_min": 531 + }, + { + "epoch": 5.814000814000814, + "grad_norm": 0.48774825169971975, + "learning_rate": 3.4039349310051973e-06, + "loss": 0.0897, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.035396378487348557, + "step": 7145, + "valid_targets_mean": 2571.5, + "valid_targets_min": 826 + }, + { + "epoch": 5.818070818070818, + "grad_norm": 0.4021017225060565, + "learning_rate": 3.3813239908044104e-06, + "loss": 0.085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.034908998757600784, + "step": 7150, + "valid_targets_mean": 3709.6, + "valid_targets_min": 3024 + }, + { + "epoch": 5.822140822140822, + "grad_norm": 0.3908988537537165, + "learning_rate": 3.358781462194878e-06, + "loss": 0.0733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0368090383708477, + "step": 7155, + "valid_targets_mean": 3559.0, + "valid_targets_min": 2588 + }, + { + "epoch": 5.8262108262108265, + "grad_norm": 0.5606060832836967, + "learning_rate": 3.336307437974011e-06, + "loss": 0.0756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05241452157497406, + "step": 7160, + "valid_targets_mean": 913.9, + "valid_targets_min": 502 + }, + { + "epoch": 5.83028083028083, + "grad_norm": 0.44223267919961007, + "learning_rate": 3.313902010657226e-06, + "loss": 0.08, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.032404445111751556, + "step": 7165, + "valid_targets_mean": 3301.9, + "valid_targets_min": 1158 + }, + { + "epoch": 5.834350834350834, + "grad_norm": 0.3154823634128118, + "learning_rate": 3.2915652724775616e-06, + "loss": 0.0588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.027957221493124962, + "step": 7170, + "valid_targets_mean": 3874.9, + "valid_targets_min": 3044 + }, + { + "epoch": 5.838420838420839, + "grad_norm": 0.4259827920303877, + "learning_rate": 3.2692973153852936e-06, + "loss": 0.0792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03220093250274658, + "step": 7175, + "valid_targets_mean": 2167.8, + "valid_targets_min": 539 + }, + { + "epoch": 5.842490842490842, + "grad_norm": 0.42833294388406135, + "learning_rate": 3.247098231047552e-06, + "loss": 0.0734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03128783777356148, + "step": 7180, + "valid_targets_mean": 2397.0, + "valid_targets_min": 387 + }, + { + "epoch": 5.8465608465608465, + "grad_norm": 0.5739666740538348, + "learning_rate": 3.22496811084795e-06, + "loss": 0.0823, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04366585612297058, + "step": 7185, + "valid_targets_mean": 2340.9, + "valid_targets_min": 872 + }, + { + "epoch": 5.850630850630851, + "grad_norm": 0.41911962257836954, + "learning_rate": 3.2029070458862145e-06, + "loss": 0.0763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0234956294298172, + "step": 7190, + "valid_targets_mean": 3017.2, + "valid_targets_min": 822 + }, + { + "epoch": 5.854700854700854, + "grad_norm": 0.7353086790754305, + "learning_rate": 3.180915126977795e-06, + "loss": 0.0796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05614851415157318, + "step": 7195, + "valid_targets_mean": 1405.9, + "valid_targets_min": 605 + }, + { + "epoch": 5.858770858770859, + "grad_norm": 0.4594492349495658, + "learning_rate": 3.158992444653497e-06, + "loss": 0.0707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.043180473148822784, + "step": 7200, + "valid_targets_mean": 2249.2, + "valid_targets_min": 808 + }, + { + "epoch": 5.862840862840863, + "grad_norm": 0.41266900678804913, + "learning_rate": 3.137139089159109e-06, + "loss": 0.0917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.035402562469244, + "step": 7205, + "valid_targets_mean": 2900.5, + "valid_targets_min": 1010 + }, + { + "epoch": 5.866910866910867, + "grad_norm": 0.41094688449906835, + "learning_rate": 3.1153551504550397e-06, + "loss": 0.0711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02497289329767227, + "step": 7210, + "valid_targets_mean": 2432.6, + "valid_targets_min": 524 + }, + { + "epoch": 5.870980870980871, + "grad_norm": 0.47665813139114804, + "learning_rate": 3.0936407182159333e-06, + "loss": 0.0659, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02819974161684513, + "step": 7215, + "valid_targets_mean": 1303.5, + "valid_targets_min": 516 + }, + { + "epoch": 5.875050875050875, + "grad_norm": 0.5054406457987558, + "learning_rate": 3.0719958818303165e-06, + "loss": 0.0841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.032017797231674194, + "step": 7220, + "valid_targets_mean": 1762.1, + "valid_targets_min": 342 + }, + { + "epoch": 5.8791208791208796, + "grad_norm": 0.39611619386986135, + "learning_rate": 3.050420730400212e-06, + "loss": 0.0782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03306104242801666, + "step": 7225, + "valid_targets_mean": 3301.8, + "valid_targets_min": 1440 + }, + { + "epoch": 5.883190883190883, + "grad_norm": 0.44746690647319465, + "learning_rate": 3.0289153527407842e-06, + "loss": 0.0613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.031408607959747314, + "step": 7230, + "valid_targets_mean": 2274.2, + "valid_targets_min": 708 + }, + { + "epoch": 5.887260887260887, + "grad_norm": 0.3541822938829694, + "learning_rate": 3.007479837379974e-06, + "loss": 0.0713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.030303731560707092, + "step": 7235, + "valid_targets_mean": 5103.1, + "valid_targets_min": 3653 + }, + { + "epoch": 5.891330891330892, + "grad_norm": 0.35916098515876993, + "learning_rate": 2.9861142725581225e-06, + "loss": 0.068, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02420351281762123, + "step": 7240, + "valid_targets_mean": 2366.5, + "valid_targets_min": 684 + }, + { + "epoch": 5.895400895400895, + "grad_norm": 0.3952741270339185, + "learning_rate": 2.96481874622762e-06, + "loss": 0.0586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.034595150500535965, + "step": 7245, + "valid_targets_mean": 3352.1, + "valid_targets_min": 726 + }, + { + "epoch": 5.8994708994708995, + "grad_norm": 0.33735128445439605, + "learning_rate": 2.94359334605254e-06, + "loss": 0.082, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03757286071777344, + "step": 7250, + "valid_targets_mean": 4009.6, + "valid_targets_min": 3076 + }, + { + "epoch": 5.903540903540904, + "grad_norm": 0.5708917806995089, + "learning_rate": 2.9224381594082807e-06, + "loss": 0.0917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14005272090435028, + "step": 7255, + "valid_targets_mean": 2811.9, + "valid_targets_min": 1536 + }, + { + "epoch": 5.907610907610907, + "grad_norm": 0.39275014659707425, + "learning_rate": 2.9013532733812e-06, + "loss": 0.0702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03683479502797127, + "step": 7260, + "valid_targets_mean": 2669.4, + "valid_targets_min": 582 + }, + { + "epoch": 5.911680911680912, + "grad_norm": 0.34548880597408943, + "learning_rate": 2.880338774768263e-06, + "loss": 0.0819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.025920607149600983, + "step": 7265, + "valid_targets_mean": 3261.9, + "valid_targets_min": 633 + }, + { + "epoch": 5.915750915750916, + "grad_norm": 0.37519369539071246, + "learning_rate": 2.8593947500766805e-06, + "loss": 0.0921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02501959726214409, + "step": 7270, + "valid_targets_mean": 3287.2, + "valid_targets_min": 1130 + }, + { + "epoch": 5.9198209198209195, + "grad_norm": 0.5057331987244915, + "learning_rate": 2.8385212855235477e-06, + "loss": 0.0668, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03711901605129242, + "step": 7275, + "valid_targets_mean": 2858.9, + "valid_targets_min": 492 + }, + { + "epoch": 5.923890923890924, + "grad_norm": 0.5218986162294897, + "learning_rate": 2.8177184670355063e-06, + "loss": 0.0741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04017069190740585, + "step": 7280, + "valid_targets_mean": 2531.5, + "valid_targets_min": 1045 + }, + { + "epoch": 5.927960927960928, + "grad_norm": 0.40603198794295736, + "learning_rate": 2.7969863802483676e-06, + "loss": 0.0666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.023707497864961624, + "step": 7285, + "valid_targets_mean": 2692.0, + "valid_targets_min": 647 + }, + { + "epoch": 5.932030932030932, + "grad_norm": 0.61945163833617, + "learning_rate": 2.7763251105067813e-06, + "loss": 0.1139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09058065712451935, + "step": 7290, + "valid_targets_mean": 1831.5, + "valid_targets_min": 806 + }, + { + "epoch": 5.936100936100936, + "grad_norm": 0.5987985070511739, + "learning_rate": 2.755734742863876e-06, + "loss": 0.0935, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.036716535687446594, + "step": 7295, + "valid_targets_mean": 1300.1, + "valid_targets_min": 575 + }, + { + "epoch": 5.94017094017094, + "grad_norm": 0.4345655736128064, + "learning_rate": 2.7352153620809053e-06, + "loss": 0.0704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03189973905682564, + "step": 7300, + "valid_targets_mean": 3615.9, + "valid_targets_min": 3193 + }, + { + "epoch": 5.944240944240944, + "grad_norm": 0.3798658377694568, + "learning_rate": 2.7147670526268986e-06, + "loss": 0.0699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02867596037685871, + "step": 7305, + "valid_targets_mean": 3944.1, + "valid_targets_min": 3433 + }, + { + "epoch": 5.948310948310948, + "grad_norm": 0.4038694045993181, + "learning_rate": 2.694389898678327e-06, + "loss": 0.07, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.027545157819986343, + "step": 7310, + "valid_targets_mean": 2874.5, + "valid_targets_min": 767 + }, + { + "epoch": 5.9523809523809526, + "grad_norm": 0.4462098257681364, + "learning_rate": 2.674083984118736e-06, + "loss": 0.0641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02901633456349373, + "step": 7315, + "valid_targets_mean": 2262.1, + "valid_targets_min": 785 + }, + { + "epoch": 5.956450956450956, + "grad_norm": 0.7941447901126528, + "learning_rate": 2.65384939253841e-06, + "loss": 0.0865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04680174961686134, + "step": 7320, + "valid_targets_mean": 1222.8, + "valid_targets_min": 758 + }, + { + "epoch": 5.96052096052096, + "grad_norm": 0.6008702314886109, + "learning_rate": 2.6336862072340343e-06, + "loss": 0.0624, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03380628675222397, + "step": 7325, + "valid_targets_mean": 2184.0, + "valid_targets_min": 797 + }, + { + "epoch": 5.964590964590965, + "grad_norm": 0.40310929667447126, + "learning_rate": 2.6135945112083506e-06, + "loss": 0.0677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.031311824917793274, + "step": 7330, + "valid_targets_mean": 3445.0, + "valid_targets_min": 1606 + }, + { + "epoch": 5.968660968660968, + "grad_norm": 0.5761031593235989, + "learning_rate": 2.593574387169804e-06, + "loss": 0.0682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.039855875074863434, + "step": 7335, + "valid_targets_mean": 2760.0, + "valid_targets_min": 678 + }, + { + "epoch": 5.9727309727309725, + "grad_norm": 0.3635293145604907, + "learning_rate": 2.573625917532212e-06, + "loss": 0.0731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.035828523337841034, + "step": 7340, + "valid_targets_mean": 5550.0, + "valid_targets_min": 3997 + }, + { + "epoch": 5.976800976800977, + "grad_norm": 0.39688144504385575, + "learning_rate": 2.553749184414429e-06, + "loss": 0.0649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02166413888335228, + "step": 7345, + "valid_targets_mean": 2142.1, + "valid_targets_min": 706 + }, + { + "epoch": 5.980870980870981, + "grad_norm": 0.6218812931078816, + "learning_rate": 2.5339442696399897e-06, + "loss": 0.0713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04698009043931961, + "step": 7350, + "valid_targets_mean": 1705.0, + "valid_targets_min": 580 + }, + { + "epoch": 5.984940984940985, + "grad_norm": 0.6040764860465824, + "learning_rate": 2.5142112547368005e-06, + "loss": 0.0932, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0763426423072815, + "step": 7355, + "valid_targets_mean": 2792.0, + "valid_targets_min": 2379 + }, + { + "epoch": 5.989010989010989, + "grad_norm": 0.37158486023146986, + "learning_rate": 2.494550220936773e-06, + "loss": 0.1331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.033909812569618225, + "step": 7360, + "valid_targets_mean": 3489.0, + "valid_targets_min": 2861 + }, + { + "epoch": 5.993080993080993, + "grad_norm": 0.39704045184452585, + "learning_rate": 2.4749612491755158e-06, + "loss": 0.0711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03327007219195366, + "step": 7365, + "valid_targets_mean": 3850.4, + "valid_targets_min": 2453 + }, + { + "epoch": 5.997150997150997, + "grad_norm": 0.3981496985616391, + "learning_rate": 2.4554444200919882e-06, + "loss": 0.0662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03435596078634262, + "step": 7370, + "valid_targets_mean": 3932.9, + "valid_targets_min": 3389 + }, + { + "epoch": 6.0008140008140005, + "grad_norm": 0.9799010644276512, + "learning_rate": 2.4359998140281715e-06, + "loss": 0.1184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09912261366844177, + "step": 7375, + "valid_targets_mean": 8215.2, + "valid_targets_min": 6163 + }, + { + "epoch": 6.004884004884005, + "grad_norm": 0.958774868683029, + "learning_rate": 2.416627511028733e-06, + "loss": 0.1947, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10130266845226288, + "step": 7380, + "valid_targets_mean": 8594.4, + "valid_targets_min": 6006 + }, + { + "epoch": 6.008954008954009, + "grad_norm": 0.7944310104614162, + "learning_rate": 2.39732759084071e-06, + "loss": 0.1772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07904568314552307, + "step": 7385, + "valid_targets_mean": 3578.4, + "valid_targets_min": 289 + }, + { + "epoch": 6.013024013024013, + "grad_norm": 0.6679727135866469, + "learning_rate": 2.3781001329131593e-06, + "loss": 0.1762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08894851803779602, + "step": 7390, + "valid_targets_mean": 7019.9, + "valid_targets_min": 4985 + }, + { + "epoch": 6.017094017094017, + "grad_norm": 0.5487601824765074, + "learning_rate": 2.358945216396855e-06, + "loss": 0.1657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08587461709976196, + "step": 7395, + "valid_targets_mean": 6956.6, + "valid_targets_min": 1457 + }, + { + "epoch": 6.021164021164021, + "grad_norm": 0.4456534450702924, + "learning_rate": 2.3398629201439427e-06, + "loss": 0.1706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08091467618942261, + "step": 7400, + "valid_targets_mean": 7361.6, + "valid_targets_min": 4961 + }, + { + "epoch": 6.025234025234025, + "grad_norm": 0.49514854465976493, + "learning_rate": 2.3208533227076257e-06, + "loss": 0.1834, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10835494101047516, + "step": 7405, + "valid_targets_mean": 7058.5, + "valid_targets_min": 5985 + }, + { + "epoch": 6.029304029304029, + "grad_norm": 0.43486218825382905, + "learning_rate": 2.3019165023418433e-06, + "loss": 0.1771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09653938561677933, + "step": 7410, + "valid_targets_mean": 7664.2, + "valid_targets_min": 5169 + }, + { + "epoch": 6.0333740333740336, + "grad_norm": 0.4375911545615398, + "learning_rate": 2.2830525370009405e-06, + "loss": 0.1655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08376707136631012, + "step": 7415, + "valid_targets_mean": 7111.8, + "valid_targets_min": 5348 + }, + { + "epoch": 6.037444037444037, + "grad_norm": 0.48136548478281455, + "learning_rate": 2.2642615043393512e-06, + "loss": 0.1516, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07133744657039642, + "step": 7420, + "valid_targets_mean": 4238.9, + "valid_targets_min": 152 + }, + { + "epoch": 6.041514041514041, + "grad_norm": 0.4381144318946382, + "learning_rate": 2.2455434817112853e-06, + "loss": 0.1572, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08667995780706406, + "step": 7425, + "valid_targets_mean": 7199.4, + "valid_targets_min": 4258 + }, + { + "epoch": 6.045584045584046, + "grad_norm": 0.4042301802869176, + "learning_rate": 2.226898546170384e-06, + "loss": 0.166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08247347176074982, + "step": 7430, + "valid_targets_mean": 7243.4, + "valid_targets_min": 5831 + }, + { + "epoch": 6.04965404965405, + "grad_norm": 0.401406369618756, + "learning_rate": 2.2083267744694494e-06, + "loss": 0.1616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08167104423046112, + "step": 7435, + "valid_targets_mean": 6950.6, + "valid_targets_min": 3862 + }, + { + "epoch": 6.0537240537240535, + "grad_norm": 0.3945925382036933, + "learning_rate": 2.1898282430600727e-06, + "loss": 0.1608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08647415041923523, + "step": 7440, + "valid_targets_mean": 6991.9, + "valid_targets_min": 5696 + }, + { + "epoch": 6.057794057794058, + "grad_norm": 0.42315972384850226, + "learning_rate": 2.171403028092367e-06, + "loss": 0.1573, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0863867774605751, + "step": 7445, + "valid_targets_mean": 6963.5, + "valid_targets_min": 4947 + }, + { + "epoch": 6.061864061864062, + "grad_norm": 0.39104627702301664, + "learning_rate": 2.153051205414631e-06, + "loss": 0.1591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07545962929725647, + "step": 7450, + "valid_targets_mean": 6436.9, + "valid_targets_min": 4988 + }, + { + "epoch": 6.065934065934066, + "grad_norm": 0.4009252559269369, + "learning_rate": 2.1347728505730392e-06, + "loss": 0.1616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07987205684185028, + "step": 7455, + "valid_targets_mean": 7518.0, + "valid_targets_min": 5476 + }, + { + "epoch": 6.07000407000407, + "grad_norm": 0.380485131548382, + "learning_rate": 2.116568038811333e-06, + "loss": 0.1588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07481364905834198, + "step": 7460, + "valid_targets_mean": 7422.6, + "valid_targets_min": 5226 + }, + { + "epoch": 6.074074074074074, + "grad_norm": 0.4378435227613099, + "learning_rate": 2.098436845070504e-06, + "loss": 0.1678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10386194288730621, + "step": 7465, + "valid_targets_mean": 7080.0, + "valid_targets_min": 3753 + }, + { + "epoch": 6.078144078144078, + "grad_norm": 0.4804949566456428, + "learning_rate": 2.080379343988497e-06, + "loss": 0.1842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.093023382127285, + "step": 7470, + "valid_targets_mean": 6064.6, + "valid_targets_min": 5161 + }, + { + "epoch": 6.082214082214082, + "grad_norm": 0.40760982947259294, + "learning_rate": 2.0623956098999056e-06, + "loss": 0.1285, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07654588669538498, + "step": 7475, + "valid_targets_mean": 7388.1, + "valid_targets_min": 5795 + }, + { + "epoch": 6.086284086284087, + "grad_norm": 0.3959750946787557, + "learning_rate": 2.044485716835638e-06, + "loss": 0.1535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07424226403236389, + "step": 7480, + "valid_targets_mean": 7696.0, + "valid_targets_min": 4658 + }, + { + "epoch": 6.09035409035409, + "grad_norm": 0.40969800106923865, + "learning_rate": 2.026649738522648e-06, + "loss": 0.1584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08557029068470001, + "step": 7485, + "valid_targets_mean": 7799.1, + "valid_targets_min": 5508 + }, + { + "epoch": 6.094424094424094, + "grad_norm": 0.4322454122605302, + "learning_rate": 2.00888774838361e-06, + "loss": 0.1651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08102475851774216, + "step": 7490, + "valid_targets_mean": 7341.6, + "valid_targets_min": 6454 + }, + { + "epoch": 6.098494098494099, + "grad_norm": 0.4243940059827788, + "learning_rate": 1.9911998195366267e-06, + "loss": 0.1589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07009638100862503, + "step": 7495, + "valid_targets_mean": 6818.0, + "valid_targets_min": 4392 + }, + { + "epoch": 6.102564102564102, + "grad_norm": 0.3901538287752902, + "learning_rate": 1.9735860247949245e-06, + "loss": 0.1597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07060252130031586, + "step": 7500, + "valid_targets_mean": 7857.6, + "valid_targets_min": 5394 + }, + { + "epoch": 6.1066341066341066, + "grad_norm": 0.3579631569965471, + "learning_rate": 1.956046436666539e-06, + "loss": 0.1407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06699617207050323, + "step": 7505, + "valid_targets_mean": 8314.1, + "valid_targets_min": 4837 + }, + { + "epoch": 6.110704110704111, + "grad_norm": 0.3428888972719672, + "learning_rate": 1.938581127354049e-06, + "loss": 0.1402, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05949755012989044, + "step": 7510, + "valid_targets_mean": 7368.2, + "valid_targets_min": 6027 + }, + { + "epoch": 6.114774114774114, + "grad_norm": 0.435155549282921, + "learning_rate": 1.92119016875425e-06, + "loss": 0.1474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07999107986688614, + "step": 7515, + "valid_targets_mean": 6435.0, + "valid_targets_min": 4238 + }, + { + "epoch": 6.118844118844119, + "grad_norm": 1.1395430138108782, + "learning_rate": 1.903873632457871e-06, + "loss": 0.1424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03771523758769035, + "step": 7520, + "valid_targets_mean": 247.8, + "valid_targets_min": 137 + }, + { + "epoch": 6.122914122914123, + "grad_norm": 0.4163271285571281, + "learning_rate": 1.8866315897492792e-06, + "loss": 0.1656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07711055874824524, + "step": 7525, + "valid_targets_mean": 6807.0, + "valid_targets_min": 5165 + }, + { + "epoch": 6.1269841269841265, + "grad_norm": 0.4805973336300679, + "learning_rate": 1.8694641116061895e-06, + "loss": 0.1706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08931201696395874, + "step": 7530, + "valid_targets_mean": 6989.9, + "valid_targets_min": 5104 + }, + { + "epoch": 6.131054131054131, + "grad_norm": 0.48380408719281154, + "learning_rate": 1.8523712686993644e-06, + "loss": 0.1694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09077896177768707, + "step": 7535, + "valid_targets_mean": 6692.2, + "valid_targets_min": 5068 + }, + { + "epoch": 6.135124135124135, + "grad_norm": 0.4478468576523544, + "learning_rate": 1.8353531313923213e-06, + "loss": 0.1644, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09867697954177856, + "step": 7540, + "valid_targets_mean": 7419.6, + "valid_targets_min": 5578 + }, + { + "epoch": 6.13919413919414, + "grad_norm": 0.41186611739704415, + "learning_rate": 1.818409769741054e-06, + "loss": 0.1604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08206415176391602, + "step": 7545, + "valid_targets_mean": 7536.5, + "valid_targets_min": 5075 + }, + { + "epoch": 6.143264143264143, + "grad_norm": 0.3768181446141734, + "learning_rate": 1.8015412534937438e-06, + "loss": 0.1656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07646715641021729, + "step": 7550, + "valid_targets_mean": 7221.5, + "valid_targets_min": 5841 + }, + { + "epoch": 6.147334147334147, + "grad_norm": 0.38647441084901085, + "learning_rate": 1.7847476520904528e-06, + "loss": 0.1603, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07809443771839142, + "step": 7555, + "valid_targets_mean": 7287.5, + "valid_targets_min": 5151 + }, + { + "epoch": 6.151404151404152, + "grad_norm": 0.40816468239190296, + "learning_rate": 1.7680290346628659e-06, + "loss": 0.1686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0784003883600235, + "step": 7560, + "valid_targets_mean": 6738.4, + "valid_targets_min": 4042 + }, + { + "epoch": 6.155474155474155, + "grad_norm": 0.4323305496400144, + "learning_rate": 1.7513854700339884e-06, + "loss": 0.1626, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08970016241073608, + "step": 7565, + "valid_targets_mean": 6331.0, + "valid_targets_min": 5496 + }, + { + "epoch": 6.15954415954416, + "grad_norm": 0.43997280921953213, + "learning_rate": 1.7348170267178655e-06, + "loss": 0.1631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07909651845693588, + "step": 7570, + "valid_targets_mean": 6271.1, + "valid_targets_min": 4695 + }, + { + "epoch": 6.163614163614164, + "grad_norm": 0.4492944206042751, + "learning_rate": 1.7183237729193081e-06, + "loss": 0.161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08394847810268402, + "step": 7575, + "valid_targets_mean": 5790.8, + "valid_targets_min": 5021 + }, + { + "epoch": 6.167684167684167, + "grad_norm": 0.4731995030501333, + "learning_rate": 1.7019057765335945e-06, + "loss": 0.1525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0583474338054657, + "step": 7580, + "valid_targets_mean": 3386.8, + "valid_targets_min": 2279 + }, + { + "epoch": 6.171754171754172, + "grad_norm": 0.4167788460938727, + "learning_rate": 1.6855631051462084e-06, + "loss": 0.1308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0789489597082138, + "step": 7585, + "valid_targets_mean": 6240.5, + "valid_targets_min": 5093 + }, + { + "epoch": 6.175824175824176, + "grad_norm": 0.43712455668316696, + "learning_rate": 1.6692958260325599e-06, + "loss": 0.157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08045420050621033, + "step": 7590, + "valid_targets_mean": 6329.5, + "valid_targets_min": 4589 + }, + { + "epoch": 6.1798941798941796, + "grad_norm": 0.402788614634297, + "learning_rate": 1.6531040061576909e-06, + "loss": 0.1578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08553263545036316, + "step": 7595, + "valid_targets_mean": 7589.2, + "valid_targets_min": 4963 + }, + { + "epoch": 6.183964183964184, + "grad_norm": 0.41963035365571566, + "learning_rate": 1.6369877121760237e-06, + "loss": 0.158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06892232596874237, + "step": 7600, + "valid_targets_mean": 5577.9, + "valid_targets_min": 4458 + }, + { + "epoch": 6.188034188034188, + "grad_norm": 0.45017103457098034, + "learning_rate": 1.6209470104310666e-06, + "loss": 0.1574, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0763903558254242, + "step": 7605, + "valid_targets_mean": 5432.5, + "valid_targets_min": 4433 + }, + { + "epoch": 6.192104192104192, + "grad_norm": 0.4536922039185944, + "learning_rate": 1.604981966955157e-06, + "loss": 0.1613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08348037302494049, + "step": 7610, + "valid_targets_mean": 6361.8, + "valid_targets_min": 5201 + }, + { + "epoch": 6.196174196174196, + "grad_norm": 0.4307255875359923, + "learning_rate": 1.5890926474691682e-06, + "loss": 0.1609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07572345435619354, + "step": 7615, + "valid_targets_mean": 5824.4, + "valid_targets_min": 5129 + }, + { + "epoch": 6.2002442002442, + "grad_norm": 0.7193073806157995, + "learning_rate": 1.5732791173822626e-06, + "loss": 0.1234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0709250420331955, + "step": 7620, + "valid_targets_mean": 1637.6, + "valid_targets_min": 137 + }, + { + "epoch": 6.204314204314204, + "grad_norm": 0.418121085820752, + "learning_rate": 1.55754144179161e-06, + "loss": 0.1541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07970662415027618, + "step": 7625, + "valid_targets_mean": 6474.6, + "valid_targets_min": 4751 + }, + { + "epoch": 6.208384208384208, + "grad_norm": 0.41818652414348384, + "learning_rate": 1.5418796854821239e-06, + "loss": 0.1545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08583327382802963, + "step": 7630, + "valid_targets_mean": 7186.2, + "valid_targets_min": 5358 + }, + { + "epoch": 6.212454212454213, + "grad_norm": 0.4079409474648988, + "learning_rate": 1.52629391292618e-06, + "loss": 0.1608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08047682046890259, + "step": 7635, + "valid_targets_mean": 7126.8, + "valid_targets_min": 4078 + }, + { + "epoch": 6.216524216524217, + "grad_norm": 0.41113059492418275, + "learning_rate": 1.5107841882833785e-06, + "loss": 0.1535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07577785849571228, + "step": 7640, + "valid_targets_mean": 7022.1, + "valid_targets_min": 4940 + }, + { + "epoch": 6.22059422059422, + "grad_norm": 0.4280680246313017, + "learning_rate": 1.4953505754002562e-06, + "loss": 0.1506, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08161963522434235, + "step": 7645, + "valid_targets_mean": 5900.6, + "valid_targets_min": 4779 + }, + { + "epoch": 6.224664224664225, + "grad_norm": 0.40255377466943804, + "learning_rate": 1.4799931378100386e-06, + "loss": 0.1582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08172336220741272, + "step": 7650, + "valid_targets_mean": 6290.1, + "valid_targets_min": 4620 + }, + { + "epoch": 6.228734228734229, + "grad_norm": 0.8420012750324607, + "learning_rate": 1.4647119387323593e-06, + "loss": 0.1532, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05620827525854111, + "step": 7655, + "valid_targets_mean": 1162.2, + "valid_targets_min": 788 + }, + { + "epoch": 6.232804232804233, + "grad_norm": 0.8176601544559853, + "learning_rate": 1.4495070410730238e-06, + "loss": 0.1392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06722263246774673, + "step": 7660, + "valid_targets_mean": 1500.6, + "valid_targets_min": 957 + }, + { + "epoch": 6.236874236874237, + "grad_norm": 0.7199859456501606, + "learning_rate": 1.4343785074237393e-06, + "loss": 0.1284, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05805957317352295, + "step": 7665, + "valid_targets_mean": 1371.2, + "valid_targets_min": 796 + }, + { + "epoch": 6.240944240944241, + "grad_norm": 0.8078058347677376, + "learning_rate": 1.4193264000618511e-06, + "loss": 0.1277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06448983401060104, + "step": 7670, + "valid_targets_mean": 1433.6, + "valid_targets_min": 697 + }, + { + "epoch": 6.245014245014245, + "grad_norm": 0.7165323289126707, + "learning_rate": 1.4043507809500923e-06, + "loss": 0.1268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06178100407123566, + "step": 7675, + "valid_targets_mean": 1578.9, + "valid_targets_min": 638 + }, + { + "epoch": 6.249084249084249, + "grad_norm": 0.7277559295792052, + "learning_rate": 1.3894517117363294e-06, + "loss": 0.1248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.060996219515800476, + "step": 7680, + "valid_targets_mean": 1746.4, + "valid_targets_min": 727 + }, + { + "epoch": 6.253154253154253, + "grad_norm": 0.753037215595642, + "learning_rate": 1.3746292537533145e-06, + "loss": 0.1284, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04994869977235794, + "step": 7685, + "valid_targets_mean": 1309.9, + "valid_targets_min": 761 + }, + { + "epoch": 6.257224257224257, + "grad_norm": 0.8053654916578108, + "learning_rate": 1.3598834680184124e-06, + "loss": 0.1134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06560010462999344, + "step": 7690, + "valid_targets_mean": 1368.6, + "valid_targets_min": 714 + }, + { + "epoch": 6.261294261294261, + "grad_norm": 0.795230502585329, + "learning_rate": 1.3452144152333734e-06, + "loss": 0.1333, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06102893874049187, + "step": 7695, + "valid_targets_mean": 1491.8, + "valid_targets_min": 867 + }, + { + "epoch": 6.265364265364266, + "grad_norm": 0.7862793929838975, + "learning_rate": 1.330622155784067e-06, + "loss": 0.1255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07237902283668518, + "step": 7700, + "valid_targets_mean": 1994.0, + "valid_targets_min": 790 + }, + { + "epoch": 6.269434269434269, + "grad_norm": 0.8055709736092495, + "learning_rate": 1.316106749740249e-06, + "loss": 0.1178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05973295122385025, + "step": 7705, + "valid_targets_mean": 1523.4, + "valid_targets_min": 896 + }, + { + "epoch": 6.273504273504273, + "grad_norm": 1.1291501131017656, + "learning_rate": 1.3016682568552907e-06, + "loss": 0.1218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05998287722468376, + "step": 7710, + "valid_targets_mean": 1790.1, + "valid_targets_min": 1009 + }, + { + "epoch": 6.277574277574278, + "grad_norm": 0.7166308018469184, + "learning_rate": 1.2873067365659519e-06, + "loss": 0.1227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05086486041545868, + "step": 7715, + "valid_targets_mean": 1394.0, + "valid_targets_min": 717 + }, + { + "epoch": 6.281644281644281, + "grad_norm": 1.0582670753983305, + "learning_rate": 1.273022247992135e-06, + "loss": 0.1122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05162067711353302, + "step": 7720, + "valid_targets_mean": 1129.0, + "valid_targets_min": 659 + }, + { + "epoch": 6.285714285714286, + "grad_norm": 0.7611563612526064, + "learning_rate": 1.2588148499366405e-06, + "loss": 0.1091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.054125986993312836, + "step": 7725, + "valid_targets_mean": 1466.9, + "valid_targets_min": 642 + }, + { + "epoch": 6.28978428978429, + "grad_norm": 0.7818498664594019, + "learning_rate": 1.2446846008849046e-06, + "loss": 0.1154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04109550267457962, + "step": 7730, + "valid_targets_mean": 1129.4, + "valid_targets_min": 765 + }, + { + "epoch": 6.293854293854293, + "grad_norm": 0.8675512810239565, + "learning_rate": 1.2306315590047912e-06, + "loss": 0.1264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.053088150918483734, + "step": 7735, + "valid_targets_mean": 1322.8, + "valid_targets_min": 631 + }, + { + "epoch": 6.297924297924298, + "grad_norm": 0.9623907919932141, + "learning_rate": 1.2166557821463387e-06, + "loss": 0.1265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0787411630153656, + "step": 7740, + "valid_targets_mean": 1612.0, + "valid_targets_min": 623 + }, + { + "epoch": 6.301994301994302, + "grad_norm": 0.7567121449013423, + "learning_rate": 1.2027573278415129e-06, + "loss": 0.1158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.046164147555828094, + "step": 7745, + "valid_targets_mean": 1402.1, + "valid_targets_min": 940 + }, + { + "epoch": 6.3060643060643065, + "grad_norm": 0.7882207258869738, + "learning_rate": 1.188936253303976e-06, + "loss": 0.1216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.051682114601135254, + "step": 7750, + "valid_targets_mean": 1337.8, + "valid_targets_min": 776 + }, + { + "epoch": 6.31013431013431, + "grad_norm": 0.8346483716380687, + "learning_rate": 1.1751926154288572e-06, + "loss": 0.1195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06084754317998886, + "step": 7755, + "valid_targets_mean": 1525.5, + "valid_targets_min": 623 + }, + { + "epoch": 6.314204314204314, + "grad_norm": 0.7952416348423385, + "learning_rate": 1.1615264707925178e-06, + "loss": 0.1177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.056195251643657684, + "step": 7760, + "valid_targets_mean": 1441.0, + "valid_targets_min": 693 + }, + { + "epoch": 6.318274318274319, + "grad_norm": 0.7893730311403948, + "learning_rate": 1.1479378756523008e-06, + "loss": 0.1132, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03072535991668701, + "step": 7765, + "valid_targets_mean": 847.9, + "valid_targets_min": 611 + }, + { + "epoch": 6.322344322344322, + "grad_norm": 1.824214283992368, + "learning_rate": 1.1344268859463292e-06, + "loss": 0.1102, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05747454613447189, + "step": 7770, + "valid_targets_mean": 1524.9, + "valid_targets_min": 793 + }, + { + "epoch": 6.326414326414326, + "grad_norm": 0.7591661900171986, + "learning_rate": 1.1209935572932485e-06, + "loss": 0.1111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.060703910887241364, + "step": 7775, + "valid_targets_mean": 1779.4, + "valid_targets_min": 563 + }, + { + "epoch": 6.330484330484331, + "grad_norm": 0.7391670644699448, + "learning_rate": 1.1076379449920105e-06, + "loss": 0.1148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05903908237814903, + "step": 7780, + "valid_targets_mean": 1741.2, + "valid_targets_min": 1341 + }, + { + "epoch": 6.334554334554334, + "grad_norm": 0.8519469218904264, + "learning_rate": 1.0943601040216522e-06, + "loss": 0.117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0688420832157135, + "step": 7785, + "valid_targets_mean": 1875.6, + "valid_targets_min": 1107 + }, + { + "epoch": 6.338624338624339, + "grad_norm": 0.8426833803284528, + "learning_rate": 1.0811600890410467e-06, + "loss": 0.1213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05542324483394623, + "step": 7790, + "valid_targets_mean": 1419.5, + "valid_targets_min": 680 + }, + { + "epoch": 6.342694342694343, + "grad_norm": 0.7888806915864902, + "learning_rate": 1.0680379543887032e-06, + "loss": 0.1109, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07356111705303192, + "step": 7795, + "valid_targets_mean": 1968.2, + "valid_targets_min": 981 + }, + { + "epoch": 6.346764346764346, + "grad_norm": 0.8256011446110055, + "learning_rate": 1.054993754082534e-06, + "loss": 0.1173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05775183066725731, + "step": 7800, + "valid_targets_mean": 1568.9, + "valid_targets_min": 937 + }, + { + "epoch": 6.350834350834351, + "grad_norm": 0.8120968604513121, + "learning_rate": 1.0420275418196168e-06, + "loss": 0.1121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06070208176970482, + "step": 7805, + "valid_targets_mean": 1645.2, + "valid_targets_min": 804 + }, + { + "epoch": 6.354904354904355, + "grad_norm": 0.8680173034124377, + "learning_rate": 1.0291393709760044e-06, + "loss": 0.116, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.057425472885370255, + "step": 7810, + "valid_targets_mean": 1574.9, + "valid_targets_min": 1019 + }, + { + "epoch": 6.358974358974359, + "grad_norm": 0.8493112270869786, + "learning_rate": 1.0163292946064774e-06, + "loss": 0.1114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.048819221556186676, + "step": 7815, + "valid_targets_mean": 1121.6, + "valid_targets_min": 882 + }, + { + "epoch": 6.363044363044363, + "grad_norm": 0.7714277556830921, + "learning_rate": 1.0035973654443466e-06, + "loss": 0.1085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05912599712610245, + "step": 7820, + "valid_targets_mean": 1818.1, + "valid_targets_min": 1168 + }, + { + "epoch": 6.367114367114367, + "grad_norm": 0.8847457566201997, + "learning_rate": 9.909436359012182e-07, + "loss": 0.1039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05455390363931656, + "step": 7825, + "valid_targets_mean": 1417.8, + "valid_targets_min": 678 + }, + { + "epoch": 6.371184371184372, + "grad_norm": 0.8899939918613994, + "learning_rate": 9.783681580667825e-07, + "loss": 0.1126, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04446838051080704, + "step": 7830, + "valid_targets_mean": 1051.0, + "valid_targets_min": 676 + }, + { + "epoch": 6.375254375254375, + "grad_norm": 0.7355334459190873, + "learning_rate": 9.658709837086144e-07, + "loss": 0.1108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05160418897867203, + "step": 7835, + "valid_targets_mean": 1431.1, + "valid_targets_min": 794 + }, + { + "epoch": 6.3793243793243795, + "grad_norm": 0.7838797957111434, + "learning_rate": 9.534521642719375e-07, + "loss": 0.1105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05564986169338226, + "step": 7840, + "valid_targets_mean": 1486.9, + "valid_targets_min": 742 + }, + { + "epoch": 6.383394383394384, + "grad_norm": 0.7965717627384523, + "learning_rate": 9.411117508794309e-07, + "loss": 0.1111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06882425397634506, + "step": 7845, + "valid_targets_mean": 1734.5, + "valid_targets_min": 741 + }, + { + "epoch": 6.387464387464387, + "grad_norm": 0.8893222531203173, + "learning_rate": 9.288497943310082e-07, + "loss": 0.1183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06830059736967087, + "step": 7850, + "valid_targets_mean": 1712.5, + "valid_targets_min": 1157 + }, + { + "epoch": 6.391534391534392, + "grad_norm": 0.8161513544565203, + "learning_rate": 9.166663451036118e-07, + "loss": 0.1055, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06825710833072662, + "step": 7855, + "valid_targets_mean": 1723.8, + "valid_targets_min": 862 + }, + { + "epoch": 6.395604395604396, + "grad_norm": 0.8646112978766813, + "learning_rate": 9.045614533510072e-07, + "loss": 0.1186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06607664376497269, + "step": 7860, + "valid_targets_mean": 1633.1, + "valid_targets_min": 1019 + }, + { + "epoch": 6.399674399674399, + "grad_norm": 0.7626824305794315, + "learning_rate": 8.925351689035722e-07, + "loss": 0.1161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04722012206912041, + "step": 7865, + "valid_targets_mean": 1376.1, + "valid_targets_min": 1058 + }, + { + "epoch": 6.403744403744404, + "grad_norm": 0.840691332087647, + "learning_rate": 8.80587541268092e-07, + "loss": 0.1293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05784667283296585, + "step": 7870, + "valid_targets_mean": 1576.5, + "valid_targets_min": 618 + }, + { + "epoch": 6.407814407814408, + "grad_norm": 0.8207659704590586, + "learning_rate": 8.687186196275643e-07, + "loss": 0.1103, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06569144874811172, + "step": 7875, + "valid_targets_mean": 1615.8, + "valid_targets_min": 629 + }, + { + "epoch": 6.411884411884412, + "grad_norm": 0.9326833663597143, + "learning_rate": 8.56928452840975e-07, + "loss": 0.1147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06420698761940002, + "step": 7880, + "valid_targets_mean": 1881.2, + "valid_targets_min": 1167 + }, + { + "epoch": 6.415954415954416, + "grad_norm": 0.8554904869093989, + "learning_rate": 8.452170894431267e-07, + "loss": 0.1111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05731596797704697, + "step": 7885, + "valid_targets_mean": 1367.2, + "valid_targets_min": 699 + }, + { + "epoch": 6.42002442002442, + "grad_norm": 0.8406972937569153, + "learning_rate": 8.335845776444218e-07, + "loss": 0.1033, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04333464428782463, + "step": 7890, + "valid_targets_mean": 1174.9, + "valid_targets_min": 679 + }, + { + "epoch": 6.424094424094424, + "grad_norm": 0.8085860922583763, + "learning_rate": 8.22030965330658e-07, + "loss": 0.1085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.054415322840213776, + "step": 7895, + "valid_targets_mean": 1556.2, + "valid_targets_min": 504 + }, + { + "epoch": 6.428164428164428, + "grad_norm": 0.7792125188992498, + "learning_rate": 8.10556300062848e-07, + "loss": 0.1119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06440182030200958, + "step": 7900, + "valid_targets_mean": 1885.0, + "valid_targets_min": 1331 + }, + { + "epoch": 6.4322344322344325, + "grad_norm": 0.9287716280023119, + "learning_rate": 7.991606290770093e-07, + "loss": 0.1095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.041307710111141205, + "step": 7905, + "valid_targets_mean": 1306.1, + "valid_targets_min": 661 + }, + { + "epoch": 6.436304436304436, + "grad_norm": 0.866709090318928, + "learning_rate": 7.878439992839815e-07, + "loss": 0.1149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0448264516890049, + "step": 7910, + "valid_targets_mean": 1330.1, + "valid_targets_min": 958 + }, + { + "epoch": 6.44037444037444, + "grad_norm": 0.7657298637604557, + "learning_rate": 7.766064572692178e-07, + "loss": 0.1066, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04386502504348755, + "step": 7915, + "valid_targets_mean": 1465.0, + "valid_targets_min": 963 + }, + { + "epoch": 6.444444444444445, + "grad_norm": 0.8273726055440597, + "learning_rate": 7.65448049292612e-07, + "loss": 0.1128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05073707178235054, + "step": 7920, + "valid_targets_mean": 1428.0, + "valid_targets_min": 793 + }, + { + "epoch": 6.448514448514448, + "grad_norm": 0.8400987990779193, + "learning_rate": 7.54368821288296e-07, + "loss": 0.1093, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06966409087181091, + "step": 7925, + "valid_targets_mean": 1633.1, + "valid_targets_min": 917 + }, + { + "epoch": 6.4525844525844525, + "grad_norm": 0.781744242763993, + "learning_rate": 7.433688188644517e-07, + "loss": 0.1127, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04392581433057785, + "step": 7930, + "valid_targets_mean": 1246.5, + "valid_targets_min": 872 + }, + { + "epoch": 6.456654456654457, + "grad_norm": 0.8099440855665998, + "learning_rate": 7.324480873031281e-07, + "loss": 0.1119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05493423342704773, + "step": 7935, + "valid_targets_mean": 1903.6, + "valid_targets_min": 1220 + }, + { + "epoch": 6.46072446072446, + "grad_norm": 0.8303935015673607, + "learning_rate": 7.216066715600489e-07, + "loss": 0.1, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04634704440832138, + "step": 7940, + "valid_targets_mean": 1391.6, + "valid_targets_min": 712 + }, + { + "epoch": 6.464794464794465, + "grad_norm": 0.8580005093462261, + "learning_rate": 7.1084461626443e-07, + "loss": 0.106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.053560368716716766, + "step": 7945, + "valid_targets_mean": 1295.0, + "valid_targets_min": 605 + }, + { + "epoch": 6.468864468864469, + "grad_norm": 0.7965111105826138, + "learning_rate": 7.001619657187996e-07, + "loss": 0.1173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04019693285226822, + "step": 7950, + "valid_targets_mean": 1414.8, + "valid_targets_min": 746 + }, + { + "epoch": 6.472934472934473, + "grad_norm": 0.8678383738827472, + "learning_rate": 6.895587638988077e-07, + "loss": 0.1022, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.044681429862976074, + "step": 7955, + "valid_targets_mean": 1059.4, + "valid_targets_min": 723 + }, + { + "epoch": 6.477004477004477, + "grad_norm": 0.8017716642473961, + "learning_rate": 6.790350544530522e-07, + "loss": 0.1093, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06314340978860855, + "step": 7960, + "valid_targets_mean": 1634.2, + "valid_targets_min": 1327 + }, + { + "epoch": 6.481074481074481, + "grad_norm": 0.8064315437918422, + "learning_rate": 6.685908807028996e-07, + "loss": 0.1129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04939217120409012, + "step": 7965, + "valid_targets_mean": 1412.5, + "valid_targets_min": 902 + }, + { + "epoch": 6.4851444851444855, + "grad_norm": 0.8311002011404962, + "learning_rate": 6.582262856423005e-07, + "loss": 0.1101, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05105116218328476, + "step": 7970, + "valid_targets_mean": 1558.8, + "valid_targets_min": 803 + }, + { + "epoch": 6.489214489214489, + "grad_norm": 0.8917053809677671, + "learning_rate": 6.479413119376143e-07, + "loss": 0.1131, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05821871757507324, + "step": 7975, + "valid_targets_mean": 1441.2, + "valid_targets_min": 1079 + }, + { + "epoch": 6.493284493284493, + "grad_norm": 0.7886247369918117, + "learning_rate": 6.377360019274425e-07, + "loss": 0.1054, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04788671433925629, + "step": 7980, + "valid_targets_mean": 1449.8, + "valid_targets_min": 1263 + }, + { + "epoch": 6.497354497354498, + "grad_norm": 0.8165623190693444, + "learning_rate": 6.276103976224401e-07, + "loss": 0.1116, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05484423413872719, + "step": 7985, + "valid_targets_mean": 1487.4, + "valid_targets_min": 844 + }, + { + "epoch": 6.501424501424501, + "grad_norm": 0.9134655853507222, + "learning_rate": 6.175645407051467e-07, + "loss": 0.1066, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05332685261964798, + "step": 7990, + "valid_targets_mean": 1421.4, + "valid_targets_min": 667 + }, + { + "epoch": 6.5054945054945055, + "grad_norm": 0.8551082037730058, + "learning_rate": 6.075984725298245e-07, + "loss": 0.1151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06440360844135284, + "step": 7995, + "valid_targets_mean": 1822.6, + "valid_targets_min": 1041 + }, + { + "epoch": 6.50956450956451, + "grad_norm": 0.8385108112647772, + "learning_rate": 5.977122341222852e-07, + "loss": 0.1129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.046396177262067795, + "step": 8000, + "valid_targets_mean": 1327.4, + "valid_targets_min": 777 + }, + { + "epoch": 6.513634513634513, + "grad_norm": 0.8465014974527149, + "learning_rate": 5.879058661797055e-07, + "loss": 0.1111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05752868950366974, + "step": 8005, + "valid_targets_mean": 1823.8, + "valid_targets_min": 1283 + }, + { + "epoch": 6.517704517704518, + "grad_norm": 0.8181274172180951, + "learning_rate": 5.781794090704806e-07, + "loss": 0.1041, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0392225906252861, + "step": 8010, + "valid_targets_mean": 924.0, + "valid_targets_min": 620 + }, + { + "epoch": 6.521774521774522, + "grad_norm": 0.8248876924646297, + "learning_rate": 5.685329028340492e-07, + "loss": 0.1049, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.045971810817718506, + "step": 8015, + "valid_targets_mean": 1480.2, + "valid_targets_min": 656 + }, + { + "epoch": 6.5258445258445255, + "grad_norm": 0.8286128442083801, + "learning_rate": 5.589663871807216e-07, + "loss": 0.1075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04419014975428581, + "step": 8020, + "valid_targets_mean": 1415.5, + "valid_targets_min": 697 + }, + { + "epoch": 6.52991452991453, + "grad_norm": 0.8155469269825053, + "learning_rate": 5.494799014915275e-07, + "loss": 0.1055, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.049654521048069, + "step": 8025, + "valid_targets_mean": 1655.8, + "valid_targets_min": 816 + }, + { + "epoch": 6.533984533984534, + "grad_norm": 0.8136069829867537, + "learning_rate": 5.400734848180467e-07, + "loss": 0.1001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04807547852396965, + "step": 8030, + "valid_targets_mean": 1553.0, + "valid_targets_min": 850 + }, + { + "epoch": 6.5380545380545385, + "grad_norm": 0.8744821289828539, + "learning_rate": 5.307471758822557e-07, + "loss": 0.1066, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.052176572382450104, + "step": 8035, + "valid_targets_mean": 1353.9, + "valid_targets_min": 747 + }, + { + "epoch": 6.542124542124542, + "grad_norm": 0.7918578875775583, + "learning_rate": 5.215010130763576e-07, + "loss": 0.113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04798971116542816, + "step": 8040, + "valid_targets_mean": 1420.9, + "valid_targets_min": 933 + }, + { + "epoch": 6.546194546194546, + "grad_norm": 0.8293804879113885, + "learning_rate": 5.123350344626343e-07, + "loss": 0.106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.053255513310432434, + "step": 8045, + "valid_targets_mean": 1394.9, + "valid_targets_min": 827 + }, + { + "epoch": 6.550264550264551, + "grad_norm": 0.7710088076942784, + "learning_rate": 5.032492777732856e-07, + "loss": 0.1042, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04495897516608238, + "step": 8050, + "valid_targets_mean": 1383.2, + "valid_targets_min": 909 + }, + { + "epoch": 6.554334554334554, + "grad_norm": 0.7408488115343422, + "learning_rate": 4.942437804102729e-07, + "loss": 0.1065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05502689629793167, + "step": 8055, + "valid_targets_mean": 1682.4, + "valid_targets_min": 1094 + }, + { + "epoch": 6.5584045584045585, + "grad_norm": 0.8161047493613602, + "learning_rate": 4.853185794451643e-07, + "loss": 0.0979, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04552149027585983, + "step": 8060, + "valid_targets_mean": 1118.4, + "valid_targets_min": 720 + }, + { + "epoch": 6.562474562474563, + "grad_norm": 0.810699986703739, + "learning_rate": 4.7647371161898547e-07, + "loss": 0.1075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04979178309440613, + "step": 8065, + "valid_targets_mean": 1538.2, + "valid_targets_min": 610 + }, + { + "epoch": 6.566544566544566, + "grad_norm": 0.7810924502619642, + "learning_rate": 4.677092133420647e-07, + "loss": 0.1015, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05203615128993988, + "step": 8070, + "valid_targets_mean": 1570.1, + "valid_targets_min": 713 + }, + { + "epoch": 6.570614570614571, + "grad_norm": 0.8664468115638617, + "learning_rate": 4.5902512069389006e-07, + "loss": 0.1088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.050125155597925186, + "step": 8075, + "valid_targets_mean": 1537.8, + "valid_targets_min": 874 + }, + { + "epoch": 6.574684574684575, + "grad_norm": 0.784693140282473, + "learning_rate": 4.504214694229525e-07, + "loss": 0.1066, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.051420196890830994, + "step": 8080, + "valid_targets_mean": 1471.2, + "valid_targets_min": 765 + }, + { + "epoch": 6.5787545787545785, + "grad_norm": 0.8589862191743883, + "learning_rate": 4.418982949466011e-07, + "loss": 0.1028, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04462102800607681, + "step": 8085, + "valid_targets_mean": 1254.4, + "valid_targets_min": 816 + }, + { + "epoch": 6.582824582824583, + "grad_norm": 0.8700836145069042, + "learning_rate": 4.334556323509009e-07, + "loss": 0.1058, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04805884137749672, + "step": 8090, + "valid_targets_mean": 1482.9, + "valid_targets_min": 1182 + }, + { + "epoch": 6.586894586894587, + "grad_norm": 0.6773547787375108, + "learning_rate": 4.25093516390489e-07, + "loss": 0.1102, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0742184966802597, + "step": 8095, + "valid_targets_mean": 1583.2, + "valid_targets_min": 363 + }, + { + "epoch": 6.590964590964591, + "grad_norm": 0.49684846547746847, + "learning_rate": 4.1681198148841415e-07, + "loss": 0.0797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04401320964097977, + "step": 8100, + "valid_targets_mean": 3710.1, + "valid_targets_min": 3074 + }, + { + "epoch": 6.595034595034595, + "grad_norm": 0.5269822498666787, + "learning_rate": 4.0861106173602837e-07, + "loss": 0.0753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04181273281574249, + "step": 8105, + "valid_targets_mean": 2577.6, + "valid_targets_min": 700 + }, + { + "epoch": 6.599104599104599, + "grad_norm": 0.5319878660224543, + "learning_rate": 4.004907908928135e-07, + "loss": 0.0794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03852052986621857, + "step": 8110, + "valid_targets_mean": 3309.8, + "valid_targets_min": 803 + }, + { + "epoch": 6.603174603174603, + "grad_norm": 0.4436363617952094, + "learning_rate": 3.9245120238626144e-07, + "loss": 0.0727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03728438913822174, + "step": 8115, + "valid_targets_mean": 2484.8, + "valid_targets_min": 920 + }, + { + "epoch": 6.607244607244607, + "grad_norm": 0.4833719991023813, + "learning_rate": 3.8449232931173195e-07, + "loss": 0.0759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.039660267531871796, + "step": 8120, + "valid_targets_mean": 3095.1, + "valid_targets_min": 806 + }, + { + "epoch": 6.6113146113146115, + "grad_norm": 0.7829899471206379, + "learning_rate": 3.766142044323129e-07, + "loss": 0.1153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06311972439289093, + "step": 8125, + "valid_targets_mean": 1663.1, + "valid_targets_min": 718 + }, + { + "epoch": 6.615384615384615, + "grad_norm": 0.4509111643360801, + "learning_rate": 3.688168601786912e-07, + "loss": 0.0836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0408773347735405, + "step": 8130, + "valid_targets_mean": 3443.2, + "valid_targets_min": 1133 + }, + { + "epoch": 6.619454619454619, + "grad_norm": 0.5161079431546142, + "learning_rate": 3.6110032864901776e-07, + "loss": 0.0832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03457147255539894, + "step": 8135, + "valid_targets_mean": 1609.9, + "valid_targets_min": 859 + }, + { + "epoch": 6.623524623524624, + "grad_norm": 0.3376406967796116, + "learning_rate": 3.5346464160876945e-07, + "loss": 0.1058, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02600034326314926, + "step": 8140, + "valid_targets_mean": 5572.1, + "valid_targets_min": 5150 + }, + { + "epoch": 6.627594627594627, + "grad_norm": 0.4907063131191801, + "learning_rate": 3.459098304906228e-07, + "loss": 0.0679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04523205757141113, + "step": 8145, + "valid_targets_mean": 3950.0, + "valid_targets_min": 502 + }, + { + "epoch": 6.6316646316646315, + "grad_norm": 0.4380405824798826, + "learning_rate": 3.384359263943271e-07, + "loss": 0.0722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04171394184231758, + "step": 8150, + "valid_targets_mean": 3669.2, + "valid_targets_min": 600 + }, + { + "epoch": 6.635734635734636, + "grad_norm": 0.41061389847454, + "learning_rate": 3.310429600865739e-07, + "loss": 0.0688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.030914265662431717, + "step": 8155, + "valid_targets_mean": 2980.4, + "valid_targets_min": 518 + }, + { + "epoch": 6.639804639804639, + "grad_norm": 0.43198145452754944, + "learning_rate": 3.237309620008722e-07, + "loss": 0.1178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0387565903365612, + "step": 8160, + "valid_targets_mean": 3675.4, + "valid_targets_min": 549 + }, + { + "epoch": 6.643874643874644, + "grad_norm": 0.35792992132158885, + "learning_rate": 3.1649996223741765e-07, + "loss": 0.0646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.027515945956110954, + "step": 8165, + "valid_targets_mean": 4013.2, + "valid_targets_min": 3548 + }, + { + "epoch": 6.647944647944648, + "grad_norm": 0.47471099164218855, + "learning_rate": 3.093499905629727e-07, + "loss": 0.076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.038465503603219986, + "step": 8170, + "valid_targets_mean": 2000.5, + "valid_targets_min": 644 + }, + { + "epoch": 6.652014652014652, + "grad_norm": 0.32225997700337605, + "learning_rate": 3.022810764107487e-07, + "loss": 0.0596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02280164137482643, + "step": 8175, + "valid_targets_mean": 2904.4, + "valid_targets_min": 720 + }, + { + "epoch": 6.656084656084656, + "grad_norm": 0.5895755914253757, + "learning_rate": 2.9529324888027287e-07, + "loss": 0.0797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.028042782098054886, + "step": 8180, + "valid_targets_mean": 1487.6, + "valid_targets_min": 516 + }, + { + "epoch": 6.66015466015466, + "grad_norm": 0.5222751795102811, + "learning_rate": 2.8838653673727514e-07, + "loss": 0.0781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.048454202711582184, + "step": 8185, + "valid_targets_mean": 2006.0, + "valid_targets_min": 557 + }, + { + "epoch": 6.6642246642246645, + "grad_norm": 0.5326599642169098, + "learning_rate": 2.8156096841357893e-07, + "loss": 0.0833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04685577005147934, + "step": 8190, + "valid_targets_mean": 3359.2, + "valid_targets_min": 1086 + }, + { + "epoch": 6.668294668294668, + "grad_norm": 0.48322360350526905, + "learning_rate": 2.748165720069684e-07, + "loss": 0.0808, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05052535608410835, + "step": 8195, + "valid_targets_mean": 2866.8, + "valid_targets_min": 1008 + }, + { + "epoch": 6.672364672364672, + "grad_norm": 0.5129442984365581, + "learning_rate": 2.6815337528107723e-07, + "loss": 0.1169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03285352140665054, + "step": 8200, + "valid_targets_mean": 3259.9, + "valid_targets_min": 2702 + }, + { + "epoch": 6.676434676434677, + "grad_norm": 0.3692290656961646, + "learning_rate": 2.615714056652841e-07, + "loss": 0.0686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.030034877359867096, + "step": 8205, + "valid_targets_mean": 4086.4, + "valid_targets_min": 3798 + }, + { + "epoch": 6.68050468050468, + "grad_norm": 0.8498455958044546, + "learning_rate": 2.5507069025458855e-07, + "loss": 0.0841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06665704399347305, + "step": 8210, + "valid_targets_mean": 1020.8, + "valid_targets_min": 598 + }, + { + "epoch": 6.6845746845746845, + "grad_norm": 0.4283220379269479, + "learning_rate": 2.486512558095e-07, + "loss": 0.0925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02849685214459896, + "step": 8215, + "valid_targets_mean": 2028.4, + "valid_targets_min": 590 + }, + { + "epoch": 6.688644688644689, + "grad_norm": 0.4312912789027484, + "learning_rate": 2.423131287559355e-07, + "loss": 0.0879, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0402199849486351, + "step": 8220, + "valid_targets_mean": 4046.9, + "valid_targets_min": 3390 + }, + { + "epoch": 6.692714692714693, + "grad_norm": 0.6525867542972212, + "learning_rate": 2.3605633518510195e-07, + "loss": 0.1139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12717895209789276, + "step": 8225, + "valid_targets_mean": 2247.4, + "valid_targets_min": 984 + }, + { + "epoch": 6.696784696784697, + "grad_norm": 0.48625518043675997, + "learning_rate": 2.2988090085339864e-07, + "loss": 0.2336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04617800936102867, + "step": 8230, + "valid_targets_mean": 3348.9, + "valid_targets_min": 2176 + }, + { + "epoch": 6.700854700854701, + "grad_norm": 0.5666484347927709, + "learning_rate": 2.237868511823016e-07, + "loss": 0.0802, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.038623686879873276, + "step": 8235, + "valid_targets_mean": 2385.1, + "valid_targets_min": 1074 + }, + { + "epoch": 6.704924704924705, + "grad_norm": 0.4480717010310978, + "learning_rate": 2.1777421125826593e-07, + "loss": 0.0848, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03942921385169029, + "step": 8240, + "valid_targets_mean": 3126.2, + "valid_targets_min": 1212 + }, + { + "epoch": 6.708994708994709, + "grad_norm": 0.447193589815513, + "learning_rate": 2.1184300583261263e-07, + "loss": 0.1726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04053179919719696, + "step": 8245, + "valid_targets_mean": 4386.0, + "valid_targets_min": 2258 + }, + { + "epoch": 6.713064713064713, + "grad_norm": 0.37115709592990564, + "learning_rate": 2.059932593214442e-07, + "loss": 0.066, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05170625448226929, + "step": 8250, + "valid_targets_mean": 3738.9, + "valid_targets_min": 1466 + }, + { + "epoch": 6.7171347171347175, + "grad_norm": 0.3666621308583359, + "learning_rate": 2.0022499580552247e-07, + "loss": 0.08, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.033547915518283844, + "step": 8255, + "valid_targets_mean": 3522.6, + "valid_targets_min": 560 + }, + { + "epoch": 6.721204721204721, + "grad_norm": 0.41360520779368637, + "learning_rate": 1.9453823903019086e-07, + "loss": 0.0817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04005451872944832, + "step": 8260, + "valid_targets_mean": 3383.1, + "valid_targets_min": 1189 + }, + { + "epoch": 6.725274725274725, + "grad_norm": 0.39520542502071915, + "learning_rate": 1.8893301240525463e-07, + "loss": 0.0672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03196336701512337, + "step": 8265, + "valid_targets_mean": 3610.1, + "valid_targets_min": 2257 + }, + { + "epoch": 6.72934472934473, + "grad_norm": 0.37918993485934277, + "learning_rate": 1.8340933900490965e-07, + "loss": 0.0652, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.026241963729262352, + "step": 8270, + "valid_targets_mean": 3479.8, + "valid_targets_min": 886 + }, + { + "epoch": 6.733414733414733, + "grad_norm": 0.4561669870328987, + "learning_rate": 1.7796724156762258e-07, + "loss": 0.0729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03602328151464462, + "step": 8275, + "valid_targets_mean": 3346.6, + "valid_targets_min": 2409 + }, + { + "epoch": 6.7374847374847375, + "grad_norm": 0.4635371928947697, + "learning_rate": 1.726067424960576e-07, + "loss": 0.0654, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.031619295477867126, + "step": 8280, + "valid_targets_mean": 3552.0, + "valid_targets_min": 2309 + }, + { + "epoch": 6.741554741554742, + "grad_norm": 0.7003118747711201, + "learning_rate": 1.6732786385696754e-07, + "loss": 0.1105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0696185976266861, + "step": 8285, + "valid_targets_mean": 2149.8, + "valid_targets_min": 645 + }, + { + "epoch": 6.745624745624745, + "grad_norm": 0.3594856692476842, + "learning_rate": 1.6213062738111407e-07, + "loss": 0.06, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02745797298848629, + "step": 8290, + "valid_targets_mean": 3310.2, + "valid_targets_min": 545 + }, + { + "epoch": 6.74969474969475, + "grad_norm": 0.474912995800518, + "learning_rate": 1.5701505446317656e-07, + "loss": 0.0601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03320920467376709, + "step": 8295, + "valid_targets_mean": 2458.4, + "valid_targets_min": 667 + }, + { + "epoch": 6.753764753764754, + "grad_norm": 0.3593430573149411, + "learning_rate": 1.5198116616165881e-07, + "loss": 0.0608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02956734225153923, + "step": 8300, + "valid_targets_mean": 3501.5, + "valid_targets_min": 2353 + }, + { + "epoch": 6.7578347578347575, + "grad_norm": 0.4694070743198895, + "learning_rate": 1.4702898319881142e-07, + "loss": 0.0628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04120086506009102, + "step": 8305, + "valid_targets_mean": 3220.2, + "valid_targets_min": 792 + }, + { + "epoch": 6.761904761904762, + "grad_norm": 0.39254754104169143, + "learning_rate": 1.421585259605318e-07, + "loss": 0.1059, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.027905117720365524, + "step": 8310, + "valid_targets_mean": 3818.5, + "valid_targets_min": 3276 + }, + { + "epoch": 6.765974765974766, + "grad_norm": 0.4934206140709475, + "learning_rate": 1.3736981449629982e-07, + "loss": 0.0929, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.048690035939216614, + "step": 8315, + "valid_targets_mean": 2390.9, + "valid_targets_min": 647 + }, + { + "epoch": 6.77004477004477, + "grad_norm": 0.5152362898858448, + "learning_rate": 1.3266286851907783e-07, + "loss": 0.0741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.041787609457969666, + "step": 8320, + "valid_targets_mean": 1500.6, + "valid_targets_min": 673 + }, + { + "epoch": 6.774114774114774, + "grad_norm": 0.7351478599697331, + "learning_rate": 1.280377074052397e-07, + "loss": 0.0919, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08116742968559265, + "step": 8325, + "valid_targets_mean": 1711.2, + "valid_targets_min": 702 + }, + { + "epoch": 6.778184778184778, + "grad_norm": 0.47783788496330504, + "learning_rate": 1.234943501944863e-07, + "loss": 0.1044, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04755005985498428, + "step": 8330, + "valid_targets_mean": 3036.1, + "valid_targets_min": 945 + }, + { + "epoch": 6.782254782254782, + "grad_norm": 0.5018792752451303, + "learning_rate": 1.1903281558976798e-07, + "loss": 0.0772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.051417913287878036, + "step": 8335, + "valid_targets_mean": 3317.2, + "valid_targets_min": 2265 + }, + { + "epoch": 6.786324786324786, + "grad_norm": 0.3439978747787152, + "learning_rate": 1.1465312195721334e-07, + "loss": 0.0935, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03338252753019333, + "step": 8340, + "valid_targets_mean": 3875.2, + "valid_targets_min": 3051 + }, + { + "epoch": 6.7903947903947905, + "grad_norm": 0.4903813555602027, + "learning_rate": 1.1035528732604272e-07, + "loss": 0.0747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05204244330525398, + "step": 8345, + "valid_targets_mean": 2069.8, + "valid_targets_min": 523 + }, + { + "epoch": 6.794464794464794, + "grad_norm": 0.3832807020853381, + "learning_rate": 1.0613932938850157e-07, + "loss": 0.0698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03278394788503647, + "step": 8350, + "valid_targets_mean": 1640.8, + "valid_targets_min": 468 + }, + { + "epoch": 6.798534798534798, + "grad_norm": 0.3608814015492147, + "learning_rate": 1.020052654997894e-07, + "loss": 0.076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.022406112402677536, + "step": 8355, + "valid_targets_mean": 4435.1, + "valid_targets_min": 885 + }, + { + "epoch": 6.802604802604803, + "grad_norm": 0.3861502768165619, + "learning_rate": 9.79531126779798e-08, + "loss": 0.0654, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.025468919426202774, + "step": 8360, + "valid_targets_mean": 1889.2, + "valid_targets_min": 546 + }, + { + "epoch": 6.806674806674806, + "grad_norm": 0.38892792716177965, + "learning_rate": 9.398288760395836e-08, + "loss": 0.0662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03875254467129707, + "step": 8365, + "valid_targets_mean": 4257.1, + "valid_targets_min": 2414 + }, + { + "epoch": 6.8107448107448105, + "grad_norm": 0.5353675959388392, + "learning_rate": 9.009460662134928e-08, + "loss": 0.079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.043166883289813995, + "step": 8370, + "valid_targets_mean": 1502.4, + "valid_targets_min": 697 + }, + { + "epoch": 6.814814814814815, + "grad_norm": 0.4502618811767403, + "learning_rate": 8.628828573645554e-08, + "loss": 0.0813, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.039157938212156296, + "step": 8375, + "valid_targets_mean": 3466.0, + "valid_targets_min": 2777 + }, + { + "epoch": 6.818884818884819, + "grad_norm": 0.44084545531435015, + "learning_rate": 8.256394061817663e-08, + "loss": 0.0841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.043219827115535736, + "step": 8380, + "valid_targets_mean": 3982.1, + "valid_targets_min": 3061 + }, + { + "epoch": 6.822954822954823, + "grad_norm": 0.37363497568335224, + "learning_rate": 7.892158659796422e-08, + "loss": 0.0682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.034288160502910614, + "step": 8385, + "valid_targets_mean": 2788.0, + "valid_targets_min": 868 + }, + { + "epoch": 6.827024827024827, + "grad_norm": 0.3799289990411093, + "learning_rate": 7.536123866974665e-08, + "loss": 0.0738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02917984500527382, + "step": 8390, + "valid_targets_mean": 2120.6, + "valid_targets_min": 752 + }, + { + "epoch": 6.831094831094831, + "grad_norm": 0.3835533743706495, + "learning_rate": 7.188291148986892e-08, + "loss": 0.077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03172875568270683, + "step": 8395, + "valid_targets_mean": 3477.1, + "valid_targets_min": 1141 + }, + { + "epoch": 6.835164835164835, + "grad_norm": 0.34008156095211195, + "learning_rate": 6.848661937703727e-08, + "loss": 0.0577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03241172060370445, + "step": 8400, + "valid_targets_mean": 3888.4, + "valid_targets_min": 3540 + }, + { + "epoch": 6.839234839234839, + "grad_norm": 0.4090753301091059, + "learning_rate": 6.517237631225026e-08, + "loss": 0.0768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.027315791696310043, + "step": 8405, + "valid_targets_mean": 3086.8, + "valid_targets_min": 773 + }, + { + "epoch": 6.843304843304844, + "grad_norm": 0.39701062406863746, + "learning_rate": 6.194019593875444e-08, + "loss": 0.0726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.025227008387446404, + "step": 8410, + "valid_targets_mean": 3558.6, + "valid_targets_min": 2587 + }, + { + "epoch": 6.847374847374847, + "grad_norm": 0.4392394924145199, + "learning_rate": 5.879009156197768e-08, + "loss": 0.0846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03493019938468933, + "step": 8415, + "valid_targets_mean": 2688.9, + "valid_targets_min": 773 + }, + { + "epoch": 6.851444851444851, + "grad_norm": 0.5083647411957302, + "learning_rate": 5.572207614947589e-08, + "loss": 0.0732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03646247088909149, + "step": 8420, + "valid_targets_mean": 3734.4, + "valid_targets_min": 3068 + }, + { + "epoch": 6.855514855514856, + "grad_norm": 0.628171717161323, + "learning_rate": 5.273616233088641e-08, + "loss": 0.0805, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04628308117389679, + "step": 8425, + "valid_targets_mean": 1816.1, + "valid_targets_min": 893 + }, + { + "epoch": 6.85958485958486, + "grad_norm": 0.6087268426251439, + "learning_rate": 4.983236239787026e-08, + "loss": 0.076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04026108980178833, + "step": 8430, + "valid_targets_mean": 1444.4, + "valid_targets_min": 708 + }, + { + "epoch": 6.8636548636548635, + "grad_norm": 0.4335028192076854, + "learning_rate": 4.701068830405886e-08, + "loss": 0.0804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.029476728290319443, + "step": 8435, + "valid_targets_mean": 1768.8, + "valid_targets_min": 617 + }, + { + "epoch": 6.867724867724868, + "grad_norm": 0.45556499572135306, + "learning_rate": 4.4271151665014055e-08, + "loss": 0.0718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03315385431051254, + "step": 8440, + "valid_targets_mean": 2634.9, + "valid_targets_min": 720 + }, + { + "epoch": 6.871794871794872, + "grad_norm": 0.5629937228346741, + "learning_rate": 4.161376375817039e-08, + "loss": 0.0661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04290713742375374, + "step": 8445, + "valid_targets_mean": 1836.2, + "valid_targets_min": 785 + }, + { + "epoch": 6.875864875864876, + "grad_norm": 0.5669698114208702, + "learning_rate": 3.903853552279513e-08, + "loss": 0.0872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06467582285404205, + "step": 8450, + "valid_targets_mean": 2148.2, + "valid_targets_min": 765 + }, + { + "epoch": 6.87993487993488, + "grad_norm": 0.30486345935213593, + "learning_rate": 3.654547755994164e-08, + "loss": 0.0651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.019002530723810196, + "step": 8455, + "valid_targets_mean": 3639.2, + "valid_targets_min": 2863 + }, + { + "epoch": 6.884004884004884, + "grad_norm": 0.38454514502865644, + "learning_rate": 3.413460013240499e-08, + "loss": 0.0655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.026053160429000854, + "step": 8460, + "valid_targets_mean": 3033.4, + "valid_targets_min": 555 + }, + { + "epoch": 6.888074888074888, + "grad_norm": 0.4941087528400253, + "learning_rate": 3.180591316467974e-08, + "loss": 0.0745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04288317263126373, + "step": 8465, + "valid_targets_mean": 2346.6, + "valid_targets_min": 721 + }, + { + "epoch": 6.892144892144892, + "grad_norm": 0.3509155023887329, + "learning_rate": 2.9559426242919964e-08, + "loss": 0.0606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03283192217350006, + "step": 8470, + "valid_targets_mean": 4902.1, + "valid_targets_min": 3640 + }, + { + "epoch": 6.896214896214897, + "grad_norm": 0.4037435152095895, + "learning_rate": 2.7395148614897115e-08, + "loss": 0.0617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03601628914475441, + "step": 8475, + "valid_targets_mean": 4536.6, + "valid_targets_min": 2020 + }, + { + "epoch": 6.9002849002849, + "grad_norm": 0.34603119742925265, + "learning_rate": 2.5313089189966665e-08, + "loss": 0.0775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.025459568947553635, + "step": 8480, + "valid_targets_mean": 3218.6, + "valid_targets_min": 739 + }, + { + "epoch": 6.904354904354904, + "grad_norm": 0.4268602714698763, + "learning_rate": 2.331325653902816e-08, + "loss": 0.0945, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03908785805106163, + "step": 8485, + "valid_targets_mean": 3774.6, + "valid_targets_min": 2945 + }, + { + "epoch": 6.908424908424909, + "grad_norm": 0.4557025570243415, + "learning_rate": 2.139565889448969e-08, + "loss": 0.07, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04192078113555908, + "step": 8490, + "valid_targets_mean": 3045.4, + "valid_targets_min": 704 + }, + { + "epoch": 6.912494912494912, + "grad_norm": 0.4093961592946955, + "learning_rate": 1.9560304150234576e-08, + "loss": 0.076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.030827539041638374, + "step": 8495, + "valid_targets_mean": 2492.8, + "valid_targets_min": 774 + }, + { + "epoch": 6.916564916564917, + "grad_norm": 0.4284938895525246, + "learning_rate": 1.7807199861594736e-08, + "loss": 0.0906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02964516170322895, + "step": 8500, + "valid_targets_mean": 2564.6, + "valid_targets_min": 485 + }, + { + "epoch": 6.920634920634921, + "grad_norm": 0.43820381846708467, + "learning_rate": 1.613635324530405e-08, + "loss": 0.0682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03012934699654579, + "step": 8505, + "valid_targets_mean": 3002.5, + "valid_targets_min": 1048 + }, + { + "epoch": 6.924704924704924, + "grad_norm": 0.4182221488232721, + "learning_rate": 1.4547771179487246e-08, + "loss": 0.0721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.032236211001873016, + "step": 8510, + "valid_targets_mean": 2588.5, + "valid_targets_min": 722 + }, + { + "epoch": 6.928774928774929, + "grad_norm": 0.5356394748249438, + "learning_rate": 1.304146020361996e-08, + "loss": 0.0757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04836539924144745, + "step": 8515, + "valid_targets_mean": 1443.8, + "valid_targets_min": 865 + }, + { + "epoch": 6.932844932844933, + "grad_norm": 0.4971988244053681, + "learning_rate": 1.1617426518504283e-08, + "loss": 0.1033, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03724466264247894, + "step": 8520, + "valid_targets_mean": 2283.1, + "valid_targets_min": 675 + }, + { + "epoch": 6.9369149369149365, + "grad_norm": 0.41438516750308313, + "learning_rate": 1.0275675986242128e-08, + "loss": 0.0913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03457161411643028, + "step": 8525, + "valid_targets_mean": 3202.0, + "valid_targets_min": 782 + }, + { + "epoch": 6.940984940984941, + "grad_norm": 0.4720882472008192, + "learning_rate": 9.016214130219692e-09, + "loss": 0.0686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03335484117269516, + "step": 8530, + "valid_targets_mean": 2586.9, + "valid_targets_min": 795 + }, + { + "epoch": 6.945054945054945, + "grad_norm": 0.4477158642708563, + "learning_rate": 7.839046135069695e-09, + "loss": 0.069, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.033690642565488815, + "step": 8535, + "valid_targets_mean": 3345.4, + "valid_targets_min": 2017 + }, + { + "epoch": 6.949124949124949, + "grad_norm": 0.4310573655106279, + "learning_rate": 6.744176846664729e-09, + "loss": 0.0679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.029223371297121048, + "step": 8540, + "valid_targets_mean": 1937.5, + "valid_targets_min": 798 + }, + { + "epoch": 6.953194953194953, + "grad_norm": 0.37061900269005643, + "learning_rate": 5.731610772083951e-09, + "loss": 0.0597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.029739029705524445, + "step": 8545, + "valid_targets_mean": 3302.2, + "valid_targets_min": 860 + }, + { + "epoch": 6.957264957264957, + "grad_norm": 0.39293408289174847, + "learning_rate": 4.801352079606414e-09, + "loss": 0.0844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03068091720342636, + "step": 8550, + "valid_targets_mean": 3287.2, + "valid_targets_min": 878 + }, + { + "epoch": 6.961334961334961, + "grad_norm": 0.36550971062080234, + "learning_rate": 3.9534045986888706e-09, + "loss": 0.0616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.026515811681747437, + "step": 8555, + "valid_targets_mean": 3800.8, + "valid_targets_min": 2602 + }, + { + "epoch": 6.965404965404965, + "grad_norm": 0.5094035280811099, + "learning_rate": 3.1877718199480045e-09, + "loss": 0.0705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04128776118159294, + "step": 8560, + "valid_targets_mean": 2853.9, + "valid_targets_min": 945 + }, + { + "epoch": 6.96947496947497, + "grad_norm": 0.43630707115728157, + "learning_rate": 2.5044568951471114e-09, + "loss": 0.0678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.042031463235616684, + "step": 8565, + "valid_targets_mean": 3633.9, + "valid_targets_min": 2059 + }, + { + "epoch": 6.973544973544973, + "grad_norm": 0.4179577364933974, + "learning_rate": 1.9034626371872147e-09, + "loss": 0.0713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03878530487418175, + "step": 8570, + "valid_targets_mean": 4244.8, + "valid_targets_min": 2529 + }, + { + "epoch": 6.977614977614977, + "grad_norm": 0.3982770165173577, + "learning_rate": 1.3847915200893015e-09, + "loss": 0.0615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04271527752280235, + "step": 8575, + "valid_targets_mean": 4650.8, + "valid_targets_min": 892 + }, + { + "epoch": 6.981684981684982, + "grad_norm": 0.28047291358880216, + "learning_rate": 9.484456789876639e-10, + "loss": 0.065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02087704837322235, + "step": 8580, + "valid_targets_mean": 4432.1, + "valid_targets_min": 1657 + }, + { + "epoch": 6.985754985754986, + "grad_norm": 0.6547087358433034, + "learning_rate": 5.944269101232358e-10, + "loss": 0.1178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09982357919216156, + "step": 8585, + "valid_targets_mean": 2023.0, + "valid_targets_min": 709 + }, + { + "epoch": 6.98982498982499, + "grad_norm": 0.3577008023192031, + "learning_rate": 3.2273667083027036e-10, + "loss": 0.1045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.027526959776878357, + "step": 8590, + "valid_targets_mean": 3241.9, + "valid_targets_min": 527 + }, + { + "epoch": 6.993894993894994, + "grad_norm": 0.4179136407746658, + "learning_rate": 1.3337607953634034e-10, + "loss": 0.0743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04843872785568237, + "step": 8595, + "valid_targets_mean": 3313.4, + "valid_targets_min": 2308 + }, + { + "epoch": 6.997964997964998, + "grad_norm": 0.3512093714041848, + "learning_rate": 2.6345915753456243e-11, + "loss": 0.0599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0316237136721611, + "step": 8600, + "valid_targets_mean": 3463.5, + "valid_targets_min": 2883 + }, + { + "epoch": 6.999592999592999, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04449460655450821, + "step": 8602, + "total_flos": 2.7561038524606054e+18, + "train_loss": 0.015238219677667456, + "train_runtime": 26280.0507, + "train_samples_per_second": 5.235, + "train_steps_per_second": 0.327, + "valid_targets_mean": 4028.4, + "valid_targets_min": 1273 + } + ], + "logging_steps": 5, + "max_steps": 8603, + "num_input_tokens_seen": 0, + "num_train_epochs": 7, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.7561038524606054e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}