diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,9265 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 4193, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.008347245409015025, + "grad_norm": 20.125777436153097, + "learning_rate": 3.80952380952381e-07, + "loss": 0.8516, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.830694854259491, + "step": 5, + "valid_targets_mean": 2796.9, + "valid_targets_min": 502 + }, + { + "epoch": 0.01669449081803005, + "grad_norm": 22.315323330587066, + "learning_rate": 8.571428571428572e-07, + "loss": 0.8348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.945982813835144, + "step": 10, + "valid_targets_mean": 1666.1, + "valid_targets_min": 579 + }, + { + "epoch": 0.025041736227045076, + "grad_norm": 17.453196048630293, + "learning_rate": 1.3333333333333334e-06, + "loss": 0.8539, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.8544225692749023, + "step": 15, + "valid_targets_mean": 1624.1, + "valid_targets_min": 749 + }, + { + "epoch": 0.0333889816360601, + "grad_norm": 14.870206418345237, + "learning_rate": 1.8095238095238097e-06, + "loss": 0.7723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.8390950560569763, + "step": 20, + "valid_targets_mean": 1568.9, + "valid_targets_min": 689 + }, + { + "epoch": 0.041736227045075125, + "grad_norm": 11.532926695492177, + "learning_rate": 2.285714285714286e-06, + "loss": 0.7356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.7299785017967224, + "step": 25, + "valid_targets_mean": 2161.1, + "valid_targets_min": 781 + }, + { + "epoch": 0.05008347245409015, + "grad_norm": 6.704783369679939, + "learning_rate": 2.7619047619047625e-06, + "loss": 0.6733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6406428813934326, + "step": 30, + "valid_targets_mean": 2134.3, + "valid_targets_min": 913 + }, + { + "epoch": 0.05843071786310518, + "grad_norm": 3.426760945542158, + "learning_rate": 3.2380952380952385e-06, + "loss": 0.6114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5607920289039612, + "step": 35, + "valid_targets_mean": 2782.6, + "valid_targets_min": 648 + }, + { + "epoch": 0.0667779632721202, + "grad_norm": 2.500234673380324, + "learning_rate": 3.7142857142857146e-06, + "loss": 0.5837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6126732230186462, + "step": 40, + "valid_targets_mean": 2412.5, + "valid_targets_min": 1138 + }, + { + "epoch": 0.07512520868113523, + "grad_norm": 1.6353048788833415, + "learning_rate": 4.190476190476191e-06, + "loss": 0.5074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4867894649505615, + "step": 45, + "valid_targets_mean": 2508.1, + "valid_targets_min": 626 + }, + { + "epoch": 0.08347245409015025, + "grad_norm": 1.6643943844489701, + "learning_rate": 4.666666666666667e-06, + "loss": 0.5153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5278254151344299, + "step": 50, + "valid_targets_mean": 1936.9, + "valid_targets_min": 797 + }, + { + "epoch": 0.09181969949916527, + "grad_norm": 0.9498878936104556, + "learning_rate": 5.142857142857142e-06, + "loss": 0.4827, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4543859660625458, + "step": 55, + "valid_targets_mean": 3347.8, + "valid_targets_min": 543 + }, + { + "epoch": 0.1001669449081803, + "grad_norm": 1.0174237616830506, + "learning_rate": 5.619047619047619e-06, + "loss": 0.4912, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.49389925599098206, + "step": 60, + "valid_targets_mean": 2433.2, + "valid_targets_min": 966 + }, + { + "epoch": 0.10851419031719532, + "grad_norm": 0.8239188853729782, + "learning_rate": 6.095238095238096e-06, + "loss": 0.4612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4277969002723694, + "step": 65, + "valid_targets_mean": 3225.6, + "valid_targets_min": 876 + }, + { + "epoch": 0.11686143572621036, + "grad_norm": 0.7232769829375342, + "learning_rate": 6.571428571428572e-06, + "loss": 0.453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41270703077316284, + "step": 70, + "valid_targets_mean": 3240.1, + "valid_targets_min": 866 + }, + { + "epoch": 0.12520868113522537, + "grad_norm": 0.8683682081988188, + "learning_rate": 7.047619047619048e-06, + "loss": 0.4407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.47156885266304016, + "step": 75, + "valid_targets_mean": 2287.4, + "valid_targets_min": 984 + }, + { + "epoch": 0.1335559265442404, + "grad_norm": 0.828011223922074, + "learning_rate": 7.523809523809524e-06, + "loss": 0.4638, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4064176082611084, + "step": 80, + "valid_targets_mean": 2163.6, + "valid_targets_min": 1110 + }, + { + "epoch": 0.1419031719532554, + "grad_norm": 0.5974334115927779, + "learning_rate": 8.000000000000001e-06, + "loss": 0.4303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40039050579071045, + "step": 85, + "valid_targets_mean": 3918.3, + "valid_targets_min": 1060 + }, + { + "epoch": 0.15025041736227046, + "grad_norm": 0.8880912110853688, + "learning_rate": 8.476190476190477e-06, + "loss": 0.44, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4153106212615967, + "step": 90, + "valid_targets_mean": 1947.6, + "valid_targets_min": 789 + }, + { + "epoch": 0.15859766277128548, + "grad_norm": 0.751148343881156, + "learning_rate": 8.952380952380953e-06, + "loss": 0.412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4028511643409729, + "step": 95, + "valid_targets_mean": 2478.4, + "valid_targets_min": 633 + }, + { + "epoch": 0.1669449081803005, + "grad_norm": 0.7236471980196548, + "learning_rate": 9.42857142857143e-06, + "loss": 0.4115, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3987542390823364, + "step": 100, + "valid_targets_mean": 2565.8, + "valid_targets_min": 361 + }, + { + "epoch": 0.17529215358931552, + "grad_norm": 0.9071686345050813, + "learning_rate": 9.904761904761906e-06, + "loss": 0.4165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4188500642776489, + "step": 105, + "valid_targets_mean": 1897.7, + "valid_targets_min": 782 + }, + { + "epoch": 0.18363939899833054, + "grad_norm": 0.8248284873164098, + "learning_rate": 1.0380952380952383e-05, + "loss": 0.4088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3929654359817505, + "step": 110, + "valid_targets_mean": 1896.4, + "valid_targets_min": 506 + }, + { + "epoch": 0.19198664440734559, + "grad_norm": 0.7823148745443437, + "learning_rate": 1.0857142857142858e-05, + "loss": 0.3967, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3725951313972473, + "step": 115, + "valid_targets_mean": 2162.0, + "valid_targets_min": 865 + }, + { + "epoch": 0.2003338898163606, + "grad_norm": 0.7130290245492041, + "learning_rate": 1.1333333333333334e-05, + "loss": 0.3976, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40969598293304443, + "step": 120, + "valid_targets_mean": 2873.4, + "valid_targets_min": 608 + }, + { + "epoch": 0.20868113522537562, + "grad_norm": 0.6374637811571959, + "learning_rate": 1.180952380952381e-05, + "loss": 0.4056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36969244480133057, + "step": 125, + "valid_targets_mean": 2947.9, + "valid_targets_min": 699 + }, + { + "epoch": 0.21702838063439064, + "grad_norm": 0.7660820864604462, + "learning_rate": 1.2285714285714288e-05, + "loss": 0.3699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33364009857177734, + "step": 130, + "valid_targets_mean": 1937.9, + "valid_targets_min": 672 + }, + { + "epoch": 0.22537562604340566, + "grad_norm": 1.0608408159879852, + "learning_rate": 1.2761904761904762e-05, + "loss": 0.3785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4482278823852539, + "step": 135, + "valid_targets_mean": 2734.9, + "valid_targets_min": 904 + }, + { + "epoch": 0.2337228714524207, + "grad_norm": 0.7937754785168789, + "learning_rate": 1.3238095238095238e-05, + "loss": 0.393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3878324031829834, + "step": 140, + "valid_targets_mean": 2254.6, + "valid_targets_min": 957 + }, + { + "epoch": 0.24207011686143573, + "grad_norm": 0.9530960586446873, + "learning_rate": 1.3714285714285716e-05, + "loss": 0.3822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3858902156352997, + "step": 145, + "valid_targets_mean": 2910.6, + "valid_targets_min": 699 + }, + { + "epoch": 0.25041736227045075, + "grad_norm": 0.793386246739093, + "learning_rate": 1.4190476190476192e-05, + "loss": 0.3885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40752989053726196, + "step": 150, + "valid_targets_mean": 2232.0, + "valid_targets_min": 693 + }, + { + "epoch": 0.2587646076794658, + "grad_norm": 0.7054845625096792, + "learning_rate": 1.4666666666666666e-05, + "loss": 0.3746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36771881580352783, + "step": 155, + "valid_targets_mean": 2490.8, + "valid_targets_min": 520 + }, + { + "epoch": 0.2671118530884808, + "grad_norm": 0.8245710561947659, + "learning_rate": 1.5142857142857144e-05, + "loss": 0.3811, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42931532859802246, + "step": 160, + "valid_targets_mean": 2966.5, + "valid_targets_min": 690 + }, + { + "epoch": 0.27545909849749584, + "grad_norm": 0.9367959207676698, + "learning_rate": 1.5619047619047622e-05, + "loss": 0.3796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39394938945770264, + "step": 165, + "valid_targets_mean": 2375.9, + "valid_targets_min": 909 + }, + { + "epoch": 0.2838063439065108, + "grad_norm": 0.7452788082591367, + "learning_rate": 1.6095238095238096e-05, + "loss": 0.3553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3504602313041687, + "step": 170, + "valid_targets_mean": 2428.2, + "valid_targets_min": 604 + }, + { + "epoch": 0.2921535893155259, + "grad_norm": 0.6706956192945309, + "learning_rate": 1.6571428571428574e-05, + "loss": 0.356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33892327547073364, + "step": 175, + "valid_targets_mean": 2694.3, + "valid_targets_min": 805 + }, + { + "epoch": 0.3005008347245409, + "grad_norm": 0.6450744053432401, + "learning_rate": 1.704761904761905e-05, + "loss": 0.3957, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31530341506004333, + "step": 180, + "valid_targets_mean": 2748.9, + "valid_targets_min": 742 + }, + { + "epoch": 0.3088480801335559, + "grad_norm": 0.7122687871177076, + "learning_rate": 1.7523809523809526e-05, + "loss": 0.3614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3535732626914978, + "step": 185, + "valid_targets_mean": 2628.3, + "valid_targets_min": 618 + }, + { + "epoch": 0.31719532554257096, + "grad_norm": 0.6410610802258473, + "learning_rate": 1.8e-05, + "loss": 0.3721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38299959897994995, + "step": 190, + "valid_targets_mean": 3226.7, + "valid_targets_min": 925 + }, + { + "epoch": 0.32554257095158595, + "grad_norm": 0.7519536354934145, + "learning_rate": 1.8476190476190478e-05, + "loss": 0.3463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39102399349212646, + "step": 195, + "valid_targets_mean": 3061.8, + "valid_targets_min": 1000 + }, + { + "epoch": 0.333889816360601, + "grad_norm": 0.869110988708763, + "learning_rate": 1.8952380952380953e-05, + "loss": 0.3586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35571885108947754, + "step": 200, + "valid_targets_mean": 2081.0, + "valid_targets_min": 949 + }, + { + "epoch": 0.34223706176961605, + "grad_norm": 0.7628250895843997, + "learning_rate": 1.942857142857143e-05, + "loss": 0.3613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37019288539886475, + "step": 205, + "valid_targets_mean": 2288.1, + "valid_targets_min": 638 + }, + { + "epoch": 0.35058430717863104, + "grad_norm": 0.6709774464989092, + "learning_rate": 1.9904761904761908e-05, + "loss": 0.3793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33823031187057495, + "step": 210, + "valid_targets_mean": 3179.8, + "valid_targets_min": 929 + }, + { + "epoch": 0.3589315525876461, + "grad_norm": 0.7192151635508831, + "learning_rate": 2.0380952380952382e-05, + "loss": 0.3352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33868905901908875, + "step": 215, + "valid_targets_mean": 2715.9, + "valid_targets_min": 986 + }, + { + "epoch": 0.3672787979966611, + "grad_norm": 0.6469633018096315, + "learning_rate": 2.085714285714286e-05, + "loss": 0.3404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3615769147872925, + "step": 220, + "valid_targets_mean": 3141.8, + "valid_targets_min": 644 + }, + { + "epoch": 0.3756260434056761, + "grad_norm": 0.7640930887163769, + "learning_rate": 2.1333333333333335e-05, + "loss": 0.3582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3568115830421448, + "step": 225, + "valid_targets_mean": 2293.8, + "valid_targets_min": 1019 + }, + { + "epoch": 0.38397328881469117, + "grad_norm": 0.7181587877168871, + "learning_rate": 2.180952380952381e-05, + "loss": 0.3583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38162344694137573, + "step": 230, + "valid_targets_mean": 2645.1, + "valid_targets_min": 812 + }, + { + "epoch": 0.39232053422370616, + "grad_norm": 0.803834346449949, + "learning_rate": 2.2285714285714287e-05, + "loss": 0.3721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43554484844207764, + "step": 235, + "valid_targets_mean": 2310.6, + "valid_targets_min": 988 + }, + { + "epoch": 0.4006677796327212, + "grad_norm": 0.7952409835075342, + "learning_rate": 2.2761904761904765e-05, + "loss": 0.3278, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3816568851470947, + "step": 240, + "valid_targets_mean": 2280.4, + "valid_targets_min": 982 + }, + { + "epoch": 0.4090150250417362, + "grad_norm": 0.7246105427812677, + "learning_rate": 2.3238095238095242e-05, + "loss": 0.3536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3831411600112915, + "step": 245, + "valid_targets_mean": 2918.2, + "valid_targets_min": 1227 + }, + { + "epoch": 0.41736227045075125, + "grad_norm": 0.8338708974356746, + "learning_rate": 2.3714285714285717e-05, + "loss": 0.3656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4046775698661804, + "step": 250, + "valid_targets_mean": 2320.4, + "valid_targets_min": 580 + }, + { + "epoch": 0.4257095158597663, + "grad_norm": 0.6393485320022019, + "learning_rate": 2.419047619047619e-05, + "loss": 0.3244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3226710557937622, + "step": 255, + "valid_targets_mean": 3065.7, + "valid_targets_min": 708 + }, + { + "epoch": 0.4340567612687813, + "grad_norm": 0.6842895103742899, + "learning_rate": 2.466666666666667e-05, + "loss": 0.3575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37478750944137573, + "step": 260, + "valid_targets_mean": 3703.2, + "valid_targets_min": 933 + }, + { + "epoch": 0.44240400667779634, + "grad_norm": 0.6303158293010908, + "learning_rate": 2.5142857142857143e-05, + "loss": 0.344, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34078970551490784, + "step": 265, + "valid_targets_mean": 2905.3, + "valid_targets_min": 792 + }, + { + "epoch": 0.4507512520868113, + "grad_norm": 0.803236946087365, + "learning_rate": 2.5619047619047618e-05, + "loss": 0.3727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41703659296035767, + "step": 270, + "valid_targets_mean": 2465.1, + "valid_targets_min": 759 + }, + { + "epoch": 0.4590984974958264, + "grad_norm": 0.7803373412241122, + "learning_rate": 2.60952380952381e-05, + "loss": 0.3385, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3831275701522827, + "step": 275, + "valid_targets_mean": 2617.3, + "valid_targets_min": 945 + }, + { + "epoch": 0.4674457429048414, + "grad_norm": 0.8278124298535766, + "learning_rate": 2.6571428571428573e-05, + "loss": 0.3546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4114047884941101, + "step": 280, + "valid_targets_mean": 2283.2, + "valid_targets_min": 485 + }, + { + "epoch": 0.4757929883138564, + "grad_norm": 0.8066324407668412, + "learning_rate": 2.704761904761905e-05, + "loss": 0.3281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36297842860221863, + "step": 285, + "valid_targets_mean": 2427.2, + "valid_targets_min": 964 + }, + { + "epoch": 0.48414023372287146, + "grad_norm": 1.3375136899858193, + "learning_rate": 2.7523809523809525e-05, + "loss": 0.3384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34834030270576477, + "step": 290, + "valid_targets_mean": 2393.3, + "valid_targets_min": 874 + }, + { + "epoch": 0.49248747913188645, + "grad_norm": 0.7358089379961378, + "learning_rate": 2.8e-05, + "loss": 0.343, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4069739580154419, + "step": 295, + "valid_targets_mean": 2538.5, + "valid_targets_min": 687 + }, + { + "epoch": 0.5008347245409015, + "grad_norm": 0.8286273038250276, + "learning_rate": 2.847619047619048e-05, + "loss": 0.3463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.380668967962265, + "step": 300, + "valid_targets_mean": 2283.1, + "valid_targets_min": 1082 + }, + { + "epoch": 0.5091819699499165, + "grad_norm": 0.6754627666288634, + "learning_rate": 2.8952380952380955e-05, + "loss": 0.327, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3209186792373657, + "step": 305, + "valid_targets_mean": 2563.9, + "valid_targets_min": 799 + }, + { + "epoch": 0.5175292153589316, + "grad_norm": 0.9034626565874669, + "learning_rate": 2.9428571428571433e-05, + "loss": 0.3425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3576005697250366, + "step": 310, + "valid_targets_mean": 2912.7, + "valid_targets_min": 1059 + }, + { + "epoch": 0.5258764607679466, + "grad_norm": 0.6229691306190667, + "learning_rate": 2.9904761904761907e-05, + "loss": 0.3908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3698209524154663, + "step": 315, + "valid_targets_mean": 3144.4, + "valid_targets_min": 912 + }, + { + "epoch": 0.5342237061769616, + "grad_norm": 0.8546076575277969, + "learning_rate": 3.038095238095238e-05, + "loss": 0.324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35709044337272644, + "step": 320, + "valid_targets_mean": 1687.7, + "valid_targets_min": 943 + }, + { + "epoch": 0.5425709515859767, + "grad_norm": 0.7088960454793146, + "learning_rate": 3.085714285714286e-05, + "loss": 0.3154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32904744148254395, + "step": 325, + "valid_targets_mean": 2348.4, + "valid_targets_min": 643 + }, + { + "epoch": 0.5509181969949917, + "grad_norm": 0.7925521311779707, + "learning_rate": 3.1333333333333334e-05, + "loss": 0.3392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41147422790527344, + "step": 330, + "valid_targets_mean": 2246.1, + "valid_targets_min": 783 + }, + { + "epoch": 0.5592654424040067, + "grad_norm": 0.8177382664859416, + "learning_rate": 3.180952380952381e-05, + "loss": 0.3457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3118923306465149, + "step": 335, + "valid_targets_mean": 1900.4, + "valid_targets_min": 438 + }, + { + "epoch": 0.5676126878130217, + "grad_norm": 0.7747926368677435, + "learning_rate": 3.228571428571429e-05, + "loss": 0.3495, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3130142092704773, + "step": 340, + "valid_targets_mean": 2124.5, + "valid_targets_min": 496 + }, + { + "epoch": 0.5759599332220368, + "grad_norm": 0.7900340718873202, + "learning_rate": 3.276190476190477e-05, + "loss": 0.3416, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3983057141304016, + "step": 345, + "valid_targets_mean": 2209.1, + "valid_targets_min": 787 + }, + { + "epoch": 0.5843071786310517, + "grad_norm": 0.7121802339807132, + "learning_rate": 3.3238095238095245e-05, + "loss": 0.3309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39473700523376465, + "step": 350, + "valid_targets_mean": 3028.6, + "valid_targets_min": 1079 + }, + { + "epoch": 0.5926544240400667, + "grad_norm": 0.8922891190366636, + "learning_rate": 3.3714285714285716e-05, + "loss": 0.3336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3380250036716461, + "step": 355, + "valid_targets_mean": 2014.2, + "valid_targets_min": 664 + }, + { + "epoch": 0.6010016694490818, + "grad_norm": 0.6695806536241158, + "learning_rate": 3.4190476190476194e-05, + "loss": 0.3441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40140536427497864, + "step": 360, + "valid_targets_mean": 2965.8, + "valid_targets_min": 645 + }, + { + "epoch": 0.6093489148580968, + "grad_norm": 0.710440302237182, + "learning_rate": 3.466666666666667e-05, + "loss": 0.3189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28060203790664673, + "step": 365, + "valid_targets_mean": 2308.4, + "valid_targets_min": 715 + }, + { + "epoch": 0.6176961602671118, + "grad_norm": 0.7097755972881904, + "learning_rate": 3.514285714285714e-05, + "loss": 0.342, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3636694550514221, + "step": 370, + "valid_targets_mean": 2561.2, + "valid_targets_min": 1000 + }, + { + "epoch": 0.6260434056761269, + "grad_norm": 0.8788436972894093, + "learning_rate": 3.561904761904762e-05, + "loss": 0.323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3455510437488556, + "step": 375, + "valid_targets_mean": 1589.6, + "valid_targets_min": 587 + }, + { + "epoch": 0.6343906510851419, + "grad_norm": 0.6597640705879878, + "learning_rate": 3.60952380952381e-05, + "loss": 0.3082, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27953580021858215, + "step": 380, + "valid_targets_mean": 2378.7, + "valid_targets_min": 799 + }, + { + "epoch": 0.6427378964941569, + "grad_norm": 0.7892624596142306, + "learning_rate": 3.6571428571428576e-05, + "loss": 0.3528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30219441652297974, + "step": 385, + "valid_targets_mean": 1884.8, + "valid_targets_min": 618 + }, + { + "epoch": 0.6510851419031719, + "grad_norm": 0.8637864198120716, + "learning_rate": 3.704761904761905e-05, + "loss": 0.3536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30112534761428833, + "step": 390, + "valid_targets_mean": 1779.1, + "valid_targets_min": 528 + }, + { + "epoch": 0.659432387312187, + "grad_norm": 0.7302225440883133, + "learning_rate": 3.7523809523809524e-05, + "loss": 0.3302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3421768844127655, + "step": 395, + "valid_targets_mean": 2351.8, + "valid_targets_min": 578 + }, + { + "epoch": 0.667779632721202, + "grad_norm": 0.7424310243535049, + "learning_rate": 3.8e-05, + "loss": 0.3549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32466602325439453, + "step": 400, + "valid_targets_mean": 2133.6, + "valid_targets_min": 686 + }, + { + "epoch": 0.676126878130217, + "grad_norm": 0.7112119939538417, + "learning_rate": 3.847619047619048e-05, + "loss": 0.3136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.296672523021698, + "step": 405, + "valid_targets_mean": 2450.8, + "valid_targets_min": 963 + }, + { + "epoch": 0.6844741235392321, + "grad_norm": 0.7850124283371369, + "learning_rate": 3.895238095238096e-05, + "loss": 0.315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34217727184295654, + "step": 410, + "valid_targets_mean": 1896.9, + "valid_targets_min": 866 + }, + { + "epoch": 0.6928213689482471, + "grad_norm": 0.6570487622025072, + "learning_rate": 3.9428571428571435e-05, + "loss": 0.3122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34257471561431885, + "step": 415, + "valid_targets_mean": 2862.9, + "valid_targets_min": 707 + }, + { + "epoch": 0.7011686143572621, + "grad_norm": 0.7179325156398435, + "learning_rate": 3.9904761904761906e-05, + "loss": 0.3219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34124815464019775, + "step": 420, + "valid_targets_mean": 2090.7, + "valid_targets_min": 554 + }, + { + "epoch": 0.7095158597662772, + "grad_norm": 0.6144369515649107, + "learning_rate": 3.999988907084209e-05, + "loss": 0.3401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2786844074726105, + "step": 425, + "valid_targets_mean": 2647.7, + "valid_targets_min": 921 + }, + { + "epoch": 0.7178631051752922, + "grad_norm": 0.5985082700689128, + "learning_rate": 3.9999438423247035e-05, + "loss": 0.3475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32772842049598694, + "step": 430, + "valid_targets_mean": 3107.7, + "valid_targets_min": 524 + }, + { + "epoch": 0.7262103505843072, + "grad_norm": 0.6671709920532217, + "learning_rate": 3.999864113194738e-05, + "loss": 0.3259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3186429440975189, + "step": 435, + "valid_targets_mean": 2705.6, + "valid_targets_min": 709 + }, + { + "epoch": 0.7345575959933222, + "grad_norm": 0.8249822436419694, + "learning_rate": 3.999749721076231e-05, + "loss": 0.3005, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3638507127761841, + "step": 440, + "valid_targets_mean": 1918.1, + "valid_targets_min": 649 + }, + { + "epoch": 0.7429048414023373, + "grad_norm": 0.8395818886697235, + "learning_rate": 3.9996006679519054e-05, + "loss": 0.3109, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3661022186279297, + "step": 445, + "valid_targets_mean": 2402.5, + "valid_targets_min": 530 + }, + { + "epoch": 0.7512520868113522, + "grad_norm": 0.5614572174095006, + "learning_rate": 3.9994169564052486e-05, + "loss": 0.3027, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27668917179107666, + "step": 450, + "valid_targets_mean": 3654.5, + "valid_targets_min": 1125 + }, + { + "epoch": 0.7595993322203672, + "grad_norm": 0.6522985817897391, + "learning_rate": 3.999198589620473e-05, + "loss": 0.3655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38943803310394287, + "step": 455, + "valid_targets_mean": 2856.8, + "valid_targets_min": 840 + }, + { + "epoch": 0.7679465776293823, + "grad_norm": 0.7438291259821956, + "learning_rate": 3.998945571382458e-05, + "loss": 0.3167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3215874433517456, + "step": 460, + "valid_targets_mean": 2226.6, + "valid_targets_min": 841 + }, + { + "epoch": 0.7762938230383973, + "grad_norm": 0.726789786479743, + "learning_rate": 3.9986579060766866e-05, + "loss": 0.3321, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2627006769180298, + "step": 465, + "valid_targets_mean": 2022.9, + "valid_targets_min": 879 + }, + { + "epoch": 0.7846410684474123, + "grad_norm": 0.7163196779832347, + "learning_rate": 3.9983355986891664e-05, + "loss": 0.3146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32649943232536316, + "step": 470, + "valid_targets_mean": 2535.6, + "valid_targets_min": 721 + }, + { + "epoch": 0.7929883138564274, + "grad_norm": 0.6970478246231899, + "learning_rate": 3.9979786548063454e-05, + "loss": 0.2902, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26147031784057617, + "step": 475, + "valid_targets_mean": 2178.3, + "valid_targets_min": 513 + }, + { + "epoch": 0.8013355592654424, + "grad_norm": 0.799911102232111, + "learning_rate": 3.997587080615016e-05, + "loss": 0.3406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3076505661010742, + "step": 480, + "valid_targets_mean": 1970.3, + "valid_targets_min": 1051 + }, + { + "epoch": 0.8096828046744574, + "grad_norm": 0.8184414432450198, + "learning_rate": 3.9971608829022036e-05, + "loss": 0.3143, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27848586440086365, + "step": 485, + "valid_targets_mean": 2767.8, + "valid_targets_min": 653 + }, + { + "epoch": 0.8180300500834724, + "grad_norm": 0.7281485204639897, + "learning_rate": 3.996700069055054e-05, + "loss": 0.3028, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31602007150650024, + "step": 490, + "valid_targets_mean": 2257.2, + "valid_targets_min": 641 + }, + { + "epoch": 0.8263772954924875, + "grad_norm": 0.6938466384081636, + "learning_rate": 3.9962046470607034e-05, + "loss": 0.3143, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31599387526512146, + "step": 495, + "valid_targets_mean": 2485.4, + "valid_targets_min": 501 + }, + { + "epoch": 0.8347245409015025, + "grad_norm": 1.1888217696917323, + "learning_rate": 3.995674625506137e-05, + "loss": 0.3197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30969738960266113, + "step": 500, + "valid_targets_mean": 2349.2, + "valid_targets_min": 900 + }, + { + "epoch": 0.8430717863105175, + "grad_norm": 0.5898683146017418, + "learning_rate": 3.995110013578046e-05, + "loss": 0.3213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3188744783401489, + "step": 505, + "valid_targets_mean": 2831.1, + "valid_targets_min": 1139 + }, + { + "epoch": 0.8514190317195326, + "grad_norm": 0.6469354559288124, + "learning_rate": 3.9945108210626635e-05, + "loss": 0.308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27451348304748535, + "step": 510, + "valid_targets_mean": 2271.3, + "valid_targets_min": 990 + }, + { + "epoch": 0.8597662771285476, + "grad_norm": 0.6862712698126633, + "learning_rate": 3.9938770583455955e-05, + "loss": 0.3308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38486266136169434, + "step": 515, + "valid_targets_mean": 3247.4, + "valid_targets_min": 1217 + }, + { + "epoch": 0.8681135225375626, + "grad_norm": 0.5924716914409707, + "learning_rate": 3.9932087364116446e-05, + "loss": 0.3297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2953488826751709, + "step": 520, + "valid_targets_mean": 3172.7, + "valid_targets_min": 710 + }, + { + "epoch": 0.8764607679465777, + "grad_norm": 0.6222540733349502, + "learning_rate": 3.992505866844615e-05, + "loss": 0.2943, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2345266193151474, + "step": 525, + "valid_targets_mean": 2900.1, + "valid_targets_min": 556 + }, + { + "epoch": 0.8848080133555927, + "grad_norm": 0.7543318131125992, + "learning_rate": 3.991768461827114e-05, + "loss": 0.3308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3501376211643219, + "step": 530, + "valid_targets_mean": 2041.9, + "valid_targets_min": 776 + }, + { + "epoch": 0.8931552587646077, + "grad_norm": 0.7099560675938891, + "learning_rate": 3.990996534140342e-05, + "loss": 0.3254, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2954036593437195, + "step": 535, + "valid_targets_mean": 2134.9, + "valid_targets_min": 935 + }, + { + "epoch": 0.9015025041736227, + "grad_norm": 0.7704282842479314, + "learning_rate": 3.990190097163867e-05, + "loss": 0.2939, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2950887084007263, + "step": 540, + "valid_targets_mean": 1977.1, + "valid_targets_min": 568 + }, + { + "epoch": 0.9098497495826378, + "grad_norm": 0.7075265637767415, + "learning_rate": 3.989349164875397e-05, + "loss": 0.3053, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3461377024650574, + "step": 545, + "valid_targets_mean": 2553.9, + "valid_targets_min": 736 + }, + { + "epoch": 0.9181969949916527, + "grad_norm": 0.7646642465906041, + "learning_rate": 3.988473751850536e-05, + "loss": 0.3111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3075540065765381, + "step": 550, + "valid_targets_mean": 2153.9, + "valid_targets_min": 792 + }, + { + "epoch": 0.9265442404006677, + "grad_norm": 0.7455652126408486, + "learning_rate": 3.98756387326253e-05, + "loss": 0.3181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3557898998260498, + "step": 555, + "valid_targets_mean": 2320.8, + "valid_targets_min": 651 + }, + { + "epoch": 0.9348914858096828, + "grad_norm": 0.7448132010417806, + "learning_rate": 3.9866195448820066e-05, + "loss": 0.3122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.292105495929718, + "step": 560, + "valid_targets_mean": 1784.8, + "valid_targets_min": 912 + }, + { + "epoch": 0.9432387312186978, + "grad_norm": 0.5358142548623265, + "learning_rate": 3.985640783076699e-05, + "loss": 0.313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26325780153274536, + "step": 565, + "valid_targets_mean": 3083.5, + "valid_targets_min": 1049 + }, + { + "epoch": 0.9515859766277128, + "grad_norm": 0.6281715819326231, + "learning_rate": 3.984627604811166e-05, + "loss": 0.3201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3475627303123474, + "step": 570, + "valid_targets_mean": 2901.7, + "valid_targets_min": 874 + }, + { + "epoch": 0.9599332220367279, + "grad_norm": 0.6125370271699637, + "learning_rate": 3.983580027646492e-05, + "loss": 0.292, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28098058700561523, + "step": 575, + "valid_targets_mean": 2742.2, + "valid_targets_min": 1126 + }, + { + "epoch": 0.9682804674457429, + "grad_norm": 0.7735134366550798, + "learning_rate": 3.9824980697399906e-05, + "loss": 0.3159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3102591037750244, + "step": 580, + "valid_targets_mean": 1958.9, + "valid_targets_min": 640 + }, + { + "epoch": 0.9766277128547579, + "grad_norm": 0.6709556468624875, + "learning_rate": 3.981381749844882e-05, + "loss": 0.3347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3324657082557678, + "step": 585, + "valid_targets_mean": 2534.3, + "valid_targets_min": 857 + }, + { + "epoch": 0.9849749582637729, + "grad_norm": 0.6511544183236323, + "learning_rate": 3.980231087309971e-05, + "loss": 0.3182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29707223176956177, + "step": 590, + "valid_targets_mean": 2435.8, + "valid_targets_min": 729 + }, + { + "epoch": 0.993322203672788, + "grad_norm": 0.6615644654517941, + "learning_rate": 3.9790461020793166e-05, + "loss": 0.3082, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3110125660896301, + "step": 595, + "valid_targets_mean": 2898.8, + "valid_targets_min": 971 + }, + { + "epoch": 1.001669449081803, + "grad_norm": 0.6925031228370271, + "learning_rate": 3.977826814691878e-05, + "loss": 0.3179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31865161657333374, + "step": 600, + "valid_targets_mean": 2244.9, + "valid_targets_min": 633 + }, + { + "epoch": 1.010016694490818, + "grad_norm": 0.8148668812959788, + "learning_rate": 3.9765732462811625e-05, + "loss": 0.284, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26927098631858826, + "step": 605, + "valid_targets_mean": 2563.4, + "valid_targets_min": 784 + }, + { + "epoch": 1.018363939899833, + "grad_norm": 0.9245401324608014, + "learning_rate": 3.975285418574862e-05, + "loss": 0.3081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31719428300857544, + "step": 610, + "valid_targets_mean": 1617.6, + "valid_targets_min": 689 + }, + { + "epoch": 1.026711185308848, + "grad_norm": 0.730619597919056, + "learning_rate": 3.97396335389447e-05, + "loss": 0.2998, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3002278208732605, + "step": 615, + "valid_targets_mean": 2026.3, + "valid_targets_min": 517 + }, + { + "epoch": 1.0350584307178632, + "grad_norm": 0.6618374979258586, + "learning_rate": 3.972607075154901e-05, + "loss": 0.3004, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3087610602378845, + "step": 620, + "valid_targets_mean": 2641.1, + "valid_targets_min": 750 + }, + { + "epoch": 1.0434056761268782, + "grad_norm": 0.7876798130112941, + "learning_rate": 3.971216605864087e-05, + "loss": 0.3073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3213345408439636, + "step": 625, + "valid_targets_mean": 1834.5, + "valid_targets_min": 465 + }, + { + "epoch": 1.0517529215358932, + "grad_norm": 0.6664972756436637, + "learning_rate": 3.969791970122579e-05, + "loss": 0.2973, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2779656946659088, + "step": 630, + "valid_targets_mean": 2411.5, + "valid_targets_min": 779 + }, + { + "epoch": 1.0601001669449082, + "grad_norm": 0.6557675352120678, + "learning_rate": 3.96833319262312e-05, + "loss": 0.2848, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30672335624694824, + "step": 635, + "valid_targets_mean": 2686.2, + "valid_targets_min": 1016 + }, + { + "epoch": 1.0684474123539232, + "grad_norm": 0.7366689516699302, + "learning_rate": 3.9668402986502214e-05, + "loss": 0.2771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2567709684371948, + "step": 640, + "valid_targets_mean": 2103.4, + "valid_targets_min": 899 + }, + { + "epoch": 1.0767946577629381, + "grad_norm": 0.6786077419659672, + "learning_rate": 3.9653133140797244e-05, + "loss": 0.2866, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28523096442222595, + "step": 645, + "valid_targets_mean": 2235.8, + "valid_targets_min": 728 + }, + { + "epoch": 1.0851419031719534, + "grad_norm": 0.6722798110378215, + "learning_rate": 3.963752265378352e-05, + "loss": 0.2926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3066675066947937, + "step": 650, + "valid_targets_mean": 3009.1, + "valid_targets_min": 1075 + }, + { + "epoch": 1.0934891485809684, + "grad_norm": 0.5809953581012779, + "learning_rate": 3.962157179603249e-05, + "loss": 0.2934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2642176151275635, + "step": 655, + "valid_targets_mean": 2994.9, + "valid_targets_min": 1049 + }, + { + "epoch": 1.1018363939899833, + "grad_norm": 0.6497382434554982, + "learning_rate": 3.960528084401515e-05, + "loss": 0.2955, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2527881860733032, + "step": 660, + "valid_targets_mean": 2512.1, + "valid_targets_min": 790 + }, + { + "epoch": 1.1101836393989983, + "grad_norm": 0.7927971345907558, + "learning_rate": 3.9588650080097196e-05, + "loss": 0.3056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27148202061653137, + "step": 665, + "valid_targets_mean": 2012.3, + "valid_targets_min": 747 + }, + { + "epoch": 1.1185308848080133, + "grad_norm": 0.6247066737404297, + "learning_rate": 3.957167979253424e-05, + "loss": 0.2988, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32327088713645935, + "step": 670, + "valid_targets_mean": 2624.7, + "valid_targets_min": 637 + }, + { + "epoch": 1.1268781302170283, + "grad_norm": 0.7712891792290059, + "learning_rate": 3.955437027546668e-05, + "loss": 0.289, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28323817253112793, + "step": 675, + "valid_targets_mean": 2072.2, + "valid_targets_min": 543 + }, + { + "epoch": 1.1352253756260433, + "grad_norm": 0.7262488608005806, + "learning_rate": 3.953672182891471e-05, + "loss": 0.2718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2692265212535858, + "step": 680, + "valid_targets_mean": 1893.2, + "valid_targets_min": 772 + }, + { + "epoch": 1.1435726210350585, + "grad_norm": 0.7262705295475789, + "learning_rate": 3.951873475877306e-05, + "loss": 0.2864, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3586205244064331, + "step": 685, + "valid_targets_mean": 2606.6, + "valid_targets_min": 690 + }, + { + "epoch": 1.1519198664440735, + "grad_norm": 0.7721103060094741, + "learning_rate": 3.950040937680572e-05, + "loss": 0.2789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2925747036933899, + "step": 690, + "valid_targets_mean": 1796.0, + "valid_targets_min": 663 + }, + { + "epoch": 1.1602671118530885, + "grad_norm": 0.6842744671644466, + "learning_rate": 3.948174600064051e-05, + "loss": 0.2639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2648618817329407, + "step": 695, + "valid_targets_mean": 2290.5, + "valid_targets_min": 737 + }, + { + "epoch": 1.1686143572621035, + "grad_norm": 0.656718120082803, + "learning_rate": 3.946274495376362e-05, + "loss": 0.2964, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2847689688205719, + "step": 700, + "valid_targets_mean": 2546.4, + "valid_targets_min": 791 + }, + { + "epoch": 1.1769616026711185, + "grad_norm": 0.7878968439295934, + "learning_rate": 3.9443406565513963e-05, + "loss": 0.2963, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30074936151504517, + "step": 705, + "valid_targets_mean": 1994.2, + "valid_targets_min": 763 + }, + { + "epoch": 1.1853088480801335, + "grad_norm": 0.7760816104388267, + "learning_rate": 3.9423731171077465e-05, + "loss": 0.3017, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33796241879463196, + "step": 710, + "valid_targets_mean": 1973.6, + "valid_targets_min": 801 + }, + { + "epoch": 1.1936560934891487, + "grad_norm": 0.6538248284993325, + "learning_rate": 3.9403719111481295e-05, + "loss": 0.2882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24952328205108643, + "step": 715, + "valid_targets_mean": 2361.2, + "valid_targets_min": 820 + }, + { + "epoch": 1.2020033388981637, + "grad_norm": 0.668487216975976, + "learning_rate": 3.9383370733587905e-05, + "loss": 0.2731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2628505527973175, + "step": 720, + "valid_targets_mean": 3307.4, + "valid_targets_min": 813 + }, + { + "epoch": 1.2103505843071787, + "grad_norm": 0.6416354597973719, + "learning_rate": 3.936268639008906e-05, + "loss": 0.272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29763877391815186, + "step": 725, + "valid_targets_mean": 3056.4, + "valid_targets_min": 573 + }, + { + "epoch": 1.2186978297161937, + "grad_norm": 0.7622708003529245, + "learning_rate": 3.93416664394997e-05, + "loss": 0.2921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36244598031044006, + "step": 730, + "valid_targets_mean": 2071.6, + "valid_targets_min": 736 + }, + { + "epoch": 1.2270450751252087, + "grad_norm": 0.6696419661832687, + "learning_rate": 3.932031124615172e-05, + "loss": 0.279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27058809995651245, + "step": 735, + "valid_targets_mean": 2621.4, + "valid_targets_min": 750 + }, + { + "epoch": 1.2353923205342237, + "grad_norm": 0.802081725552066, + "learning_rate": 3.92986211801877e-05, + "loss": 0.2719, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33007490634918213, + "step": 740, + "valid_targets_mean": 1970.3, + "valid_targets_min": 743 + }, + { + "epoch": 1.2437395659432386, + "grad_norm": 0.7501025102767808, + "learning_rate": 3.927659661755442e-05, + "loss": 0.2912, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2953328490257263, + "step": 745, + "valid_targets_mean": 2254.4, + "valid_targets_min": 645 + }, + { + "epoch": 1.2520868113522536, + "grad_norm": 0.5643469220052163, + "learning_rate": 3.925423793999641e-05, + "loss": 0.2739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23602071404457092, + "step": 750, + "valid_targets_mean": 3197.0, + "valid_targets_min": 946 + }, + { + "epoch": 1.2604340567612689, + "grad_norm": 0.682690790334389, + "learning_rate": 3.923154553504929e-05, + "loss": 0.2764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2616035044193268, + "step": 755, + "valid_targets_mean": 2231.5, + "valid_targets_min": 573 + }, + { + "epoch": 1.2687813021702838, + "grad_norm": 0.7059052444592488, + "learning_rate": 3.920851979603306e-05, + "loss": 0.2993, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30173906683921814, + "step": 760, + "valid_targets_mean": 2488.7, + "valid_targets_min": 601 + }, + { + "epoch": 1.2771285475792988, + "grad_norm": 0.6938294520982723, + "learning_rate": 3.918516112204532e-05, + "loss": 0.2984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3000032305717468, + "step": 765, + "valid_targets_mean": 2336.4, + "valid_targets_min": 909 + }, + { + "epoch": 1.2854757929883138, + "grad_norm": 0.7316078096187626, + "learning_rate": 3.9161469917954273e-05, + "loss": 0.3162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2872254252433777, + "step": 770, + "valid_targets_mean": 2003.9, + "valid_targets_min": 869 + }, + { + "epoch": 1.2938230383973288, + "grad_norm": 0.6454159589129043, + "learning_rate": 3.913744659439181e-05, + "loss": 0.2742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31085219979286194, + "step": 775, + "valid_targets_mean": 3016.9, + "valid_targets_min": 661 + }, + { + "epoch": 1.302170283806344, + "grad_norm": 0.609800583127232, + "learning_rate": 3.911309156774631e-05, + "loss": 0.2749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26211488246917725, + "step": 780, + "valid_targets_mean": 2690.8, + "valid_targets_min": 1089 + }, + { + "epoch": 1.310517529215359, + "grad_norm": 0.7454228979544445, + "learning_rate": 3.908840526015547e-05, + "loss": 0.2811, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28145360946655273, + "step": 785, + "valid_targets_mean": 2019.0, + "valid_targets_min": 710 + }, + { + "epoch": 1.318864774624374, + "grad_norm": 0.8124525357830179, + "learning_rate": 3.906338809949893e-05, + "loss": 0.3044, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28970903158187866, + "step": 790, + "valid_targets_mean": 1658.8, + "valid_targets_min": 579 + }, + { + "epoch": 1.327212020033389, + "grad_norm": 0.6338374166724755, + "learning_rate": 3.903804051939096e-05, + "loss": 0.2998, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27867740392684937, + "step": 795, + "valid_targets_mean": 2496.6, + "valid_targets_min": 987 + }, + { + "epoch": 1.335559265442404, + "grad_norm": 0.685595016138172, + "learning_rate": 3.9012362959172834e-05, + "loss": 0.3111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30850112438201904, + "step": 800, + "valid_targets_mean": 2616.1, + "valid_targets_min": 781 + }, + { + "epoch": 1.343906510851419, + "grad_norm": 0.6255627776622107, + "learning_rate": 3.898635586390528e-05, + "loss": 0.2954, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2908719480037689, + "step": 805, + "valid_targets_mean": 2888.5, + "valid_targets_min": 1089 + }, + { + "epoch": 1.352253756260434, + "grad_norm": 0.6368055210273411, + "learning_rate": 3.8960019684360756e-05, + "loss": 0.2763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2912788987159729, + "step": 810, + "valid_targets_mean": 2670.8, + "valid_targets_min": 459 + }, + { + "epoch": 1.360601001669449, + "grad_norm": 0.7991727370645318, + "learning_rate": 3.8933354877015606e-05, + "loss": 0.3138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2930360734462738, + "step": 815, + "valid_targets_mean": 1936.2, + "valid_targets_min": 546 + }, + { + "epoch": 1.3689482470784642, + "grad_norm": 0.7817522753492604, + "learning_rate": 3.89063619040422e-05, + "loss": 0.2804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28804096579551697, + "step": 820, + "valid_targets_mean": 1859.6, + "valid_targets_min": 805 + }, + { + "epoch": 1.3772954924874792, + "grad_norm": 0.6724785167267319, + "learning_rate": 3.887904123330088e-05, + "loss": 0.2748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26175278425216675, + "step": 825, + "valid_targets_mean": 2268.2, + "valid_targets_min": 889 + }, + { + "epoch": 1.3856427378964942, + "grad_norm": 0.5396055611622669, + "learning_rate": 3.885139333833186e-05, + "loss": 0.2967, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26283103227615356, + "step": 830, + "valid_targets_mean": 3288.2, + "valid_targets_min": 847 + }, + { + "epoch": 1.3939899833055092, + "grad_norm": 0.7121366274762221, + "learning_rate": 3.882341869834704e-05, + "loss": 0.2512, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24692749977111816, + "step": 835, + "valid_targets_mean": 1878.3, + "valid_targets_min": 706 + }, + { + "epoch": 1.4023372287145242, + "grad_norm": 0.7243796728129323, + "learning_rate": 3.879511779822168e-05, + "loss": 0.2858, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35578879714012146, + "step": 840, + "valid_targets_mean": 2670.4, + "valid_targets_min": 824 + }, + { + "epoch": 1.4106844741235394, + "grad_norm": 0.5806404873594075, + "learning_rate": 3.8766491128486e-05, + "loss": 0.2756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2863224744796753, + "step": 845, + "valid_targets_mean": 3360.6, + "valid_targets_min": 515 + }, + { + "epoch": 1.4190317195325544, + "grad_norm": 0.651007046670553, + "learning_rate": 3.873753918531666e-05, + "loss": 0.2852, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25990986824035645, + "step": 850, + "valid_targets_mean": 2272.2, + "valid_targets_min": 526 + }, + { + "epoch": 1.4273789649415694, + "grad_norm": 0.7884642868985011, + "learning_rate": 3.87082624705282e-05, + "loss": 0.2852, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2782127261161804, + "step": 855, + "valid_targets_mean": 1938.4, + "valid_targets_min": 925 + }, + { + "epoch": 1.4357262103505843, + "grad_norm": 0.6338249659261851, + "learning_rate": 3.867866149156431e-05, + "loss": 0.2701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25381916761398315, + "step": 860, + "valid_targets_mean": 2396.1, + "valid_targets_min": 507 + }, + { + "epoch": 1.4440734557595993, + "grad_norm": 0.6510610863993852, + "learning_rate": 3.8648736761489043e-05, + "loss": 0.3007, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.263117253780365, + "step": 865, + "valid_targets_mean": 2294.6, + "valid_targets_min": 736 + }, + { + "epoch": 1.4524207011686143, + "grad_norm": 0.7433826768152452, + "learning_rate": 3.861848879897794e-05, + "loss": 0.2999, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3034513294696808, + "step": 870, + "valid_targets_mean": 1995.8, + "valid_targets_min": 643 + }, + { + "epoch": 1.4607679465776293, + "grad_norm": 0.7594175670087351, + "learning_rate": 3.8587918128309e-05, + "loss": 0.2732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29318055510520935, + "step": 875, + "valid_targets_mean": 1931.1, + "valid_targets_min": 944 + }, + { + "epoch": 1.4691151919866443, + "grad_norm": 0.7333273932018377, + "learning_rate": 3.855702527935363e-05, + "loss": 0.2951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27202150225639343, + "step": 880, + "valid_targets_mean": 2107.9, + "valid_targets_min": 601 + }, + { + "epoch": 1.4774624373956593, + "grad_norm": 0.742761990673193, + "learning_rate": 3.852581078756745e-05, + "loss": 0.3166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41072383522987366, + "step": 885, + "valid_targets_mean": 2426.4, + "valid_targets_min": 301 + }, + { + "epoch": 1.4858096828046745, + "grad_norm": 0.6845584557112703, + "learning_rate": 3.8494275193981e-05, + "loss": 0.2845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29102623462677, + "step": 890, + "valid_targets_mean": 2549.4, + "valid_targets_min": 896 + }, + { + "epoch": 1.4941569282136895, + "grad_norm": 0.7046436774469004, + "learning_rate": 3.8462419045190385e-05, + "loss": 0.3106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28963133692741394, + "step": 895, + "valid_targets_mean": 2068.2, + "valid_targets_min": 736 + }, + { + "epoch": 1.5025041736227045, + "grad_norm": 0.7907654382570969, + "learning_rate": 3.8430242893347765e-05, + "loss": 0.2842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2805490791797638, + "step": 900, + "valid_targets_mean": 1607.1, + "valid_targets_min": 831 + }, + { + "epoch": 1.5108514190317195, + "grad_norm": 0.6915172015692203, + "learning_rate": 3.839774729615184e-05, + "loss": 0.296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2792760133743286, + "step": 905, + "valid_targets_mean": 2099.8, + "valid_targets_min": 778 + }, + { + "epoch": 1.5191986644407347, + "grad_norm": 0.8432793308275359, + "learning_rate": 3.8364932816838124e-05, + "loss": 0.2883, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2922583818435669, + "step": 910, + "valid_targets_mean": 1637.9, + "valid_targets_min": 1063 + }, + { + "epoch": 1.5275459098497497, + "grad_norm": 0.6933486076711197, + "learning_rate": 3.833180002416922e-05, + "loss": 0.2858, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2783495783805847, + "step": 915, + "valid_targets_mean": 1931.9, + "valid_targets_min": 568 + }, + { + "epoch": 1.5358931552587647, + "grad_norm": 0.8555990601110476, + "learning_rate": 3.829834949242496e-05, + "loss": 0.2702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24759122729301453, + "step": 920, + "valid_targets_mean": 2103.7, + "valid_targets_min": 402 + }, + { + "epoch": 1.5442404006677797, + "grad_norm": 0.6312084882799459, + "learning_rate": 3.826458180139244e-05, + "loss": 0.2723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28182315826416016, + "step": 925, + "valid_targets_mean": 2652.2, + "valid_targets_min": 791 + }, + { + "epoch": 1.5525876460767947, + "grad_norm": 0.7032857827182569, + "learning_rate": 3.823049753635596e-05, + "loss": 0.3098, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2833642065525055, + "step": 930, + "valid_targets_mean": 2140.2, + "valid_targets_min": 433 + }, + { + "epoch": 1.5609348914858097, + "grad_norm": 0.6992936342297782, + "learning_rate": 3.81960972880869e-05, + "loss": 0.2658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.270283967256546, + "step": 935, + "valid_targets_mean": 2097.2, + "valid_targets_min": 529 + }, + { + "epoch": 1.5692821368948247, + "grad_norm": 0.6478109681184681, + "learning_rate": 3.8161381652833485e-05, + "loss": 0.2831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3308855891227722, + "step": 940, + "valid_targets_mean": 2981.6, + "valid_targets_min": 933 + }, + { + "epoch": 1.5776293823038396, + "grad_norm": 0.6430467605687435, + "learning_rate": 3.812635123231043e-05, + "loss": 0.2813, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28764796257019043, + "step": 945, + "valid_targets_mean": 2555.7, + "valid_targets_min": 557 + }, + { + "epoch": 1.5859766277128546, + "grad_norm": 0.6372116005583673, + "learning_rate": 3.809100663368852e-05, + "loss": 0.2678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2468167543411255, + "step": 950, + "valid_targets_mean": 2827.8, + "valid_targets_min": 849 + }, + { + "epoch": 1.5943238731218696, + "grad_norm": 0.7026656221278749, + "learning_rate": 3.805534846958408e-05, + "loss": 0.2855, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28463542461395264, + "step": 955, + "valid_targets_mean": 2876.0, + "valid_targets_min": 1107 + }, + { + "epoch": 1.6026711185308848, + "grad_norm": 0.6149311989350865, + "learning_rate": 3.801937735804838e-05, + "loss": 0.3016, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2641446590423584, + "step": 960, + "valid_targets_mean": 2856.4, + "valid_targets_min": 708 + }, + { + "epoch": 1.6110183639398998, + "grad_norm": 0.8288161564755756, + "learning_rate": 3.79830939225569e-05, + "loss": 0.2758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2795507609844208, + "step": 965, + "valid_targets_mean": 1612.9, + "valid_targets_min": 601 + }, + { + "epoch": 1.6193656093489148, + "grad_norm": 0.5987400086681629, + "learning_rate": 3.79464987919985e-05, + "loss": 0.281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3047257661819458, + "step": 970, + "valid_targets_mean": 3288.7, + "valid_targets_min": 614 + }, + { + "epoch": 1.62771285475793, + "grad_norm": 0.6819190508353469, + "learning_rate": 3.790959260066459e-05, + "loss": 0.2919, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2398405820131302, + "step": 975, + "valid_targets_mean": 1824.6, + "valid_targets_min": 737 + }, + { + "epoch": 1.636060100166945, + "grad_norm": 0.6669712111685446, + "learning_rate": 3.7872375988238075e-05, + "loss": 0.2713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29929858446121216, + "step": 980, + "valid_targets_mean": 2378.9, + "valid_targets_min": 664 + }, + { + "epoch": 1.64440734557596, + "grad_norm": 0.6295279241765963, + "learning_rate": 3.783484959978228e-05, + "loss": 0.2898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2825482487678528, + "step": 985, + "valid_targets_mean": 2830.6, + "valid_targets_min": 792 + }, + { + "epoch": 1.652754590984975, + "grad_norm": 0.6858266007378149, + "learning_rate": 3.7797014085729786e-05, + "loss": 0.2683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.264940470457077, + "step": 990, + "valid_targets_mean": 2416.1, + "valid_targets_min": 996 + }, + { + "epoch": 1.66110183639399, + "grad_norm": 0.5756885313672844, + "learning_rate": 3.7758870101871155e-05, + "loss": 0.2636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23686812818050385, + "step": 995, + "valid_targets_mean": 2789.2, + "valid_targets_min": 923 + }, + { + "epoch": 1.669449081803005, + "grad_norm": 0.65698944081499, + "learning_rate": 3.772041830934352e-05, + "loss": 0.2722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2956932783126831, + "step": 1000, + "valid_targets_mean": 2863.6, + "valid_targets_min": 862 + }, + { + "epoch": 1.67779632721202, + "grad_norm": 0.7268626989208787, + "learning_rate": 3.7681659374619216e-05, + "loss": 0.2955, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2926235795021057, + "step": 1005, + "valid_targets_mean": 1927.5, + "valid_targets_min": 441 + }, + { + "epoch": 1.686143572621035, + "grad_norm": 0.6301728263095548, + "learning_rate": 3.764259396949412e-05, + "loss": 0.2861, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2802674174308777, + "step": 1010, + "valid_targets_mean": 2421.6, + "valid_targets_min": 467 + }, + { + "epoch": 1.69449081803005, + "grad_norm": 0.6733462938417506, + "learning_rate": 3.7603222771076094e-05, + "loss": 0.292, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.293710321187973, + "step": 1015, + "valid_targets_mean": 2150.6, + "valid_targets_min": 714 + }, + { + "epoch": 1.702838063439065, + "grad_norm": 0.725396943635714, + "learning_rate": 3.75635464617732e-05, + "loss": 0.2744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25977623462677, + "step": 1020, + "valid_targets_mean": 1919.0, + "valid_targets_min": 677 + }, + { + "epoch": 1.7111853088480802, + "grad_norm": 0.6403010940459046, + "learning_rate": 3.752356572928189e-05, + "loss": 0.2703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2599361836910248, + "step": 1025, + "valid_targets_mean": 2805.1, + "valid_targets_min": 1039 + }, + { + "epoch": 1.7195325542570952, + "grad_norm": 0.628668831894049, + "learning_rate": 3.748328126657508e-05, + "loss": 0.2806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24749314785003662, + "step": 1030, + "valid_targets_mean": 2325.9, + "valid_targets_min": 966 + }, + { + "epoch": 1.7278797996661102, + "grad_norm": 0.6754406114942553, + "learning_rate": 3.7442693771890134e-05, + "loss": 0.2671, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25488877296447754, + "step": 1035, + "valid_targets_mean": 1955.4, + "valid_targets_min": 952 + }, + { + "epoch": 1.7362270450751254, + "grad_norm": 0.7558472173570536, + "learning_rate": 3.740180394871681e-05, + "loss": 0.2722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2771523594856262, + "step": 1040, + "valid_targets_mean": 1947.2, + "valid_targets_min": 711 + }, + { + "epoch": 1.7445742904841404, + "grad_norm": 0.6832519462615657, + "learning_rate": 3.736061250578498e-05, + "loss": 0.2753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2828722298145294, + "step": 1045, + "valid_targets_mean": 2244.8, + "valid_targets_min": 699 + }, + { + "epoch": 1.7529215358931554, + "grad_norm": 0.7221198179059067, + "learning_rate": 3.7319120157052405e-05, + "loss": 0.2575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27627745270729065, + "step": 1050, + "valid_targets_mean": 2222.8, + "valid_targets_min": 899 + }, + { + "epoch": 1.7612687813021703, + "grad_norm": 0.6101729582811587, + "learning_rate": 3.727732762169236e-05, + "loss": 0.2853, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27856922149658203, + "step": 1055, + "valid_targets_mean": 2990.0, + "valid_targets_min": 1196 + }, + { + "epoch": 1.7696160267111853, + "grad_norm": 0.665026309284673, + "learning_rate": 3.7235235624081144e-05, + "loss": 0.2644, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2866712212562561, + "step": 1060, + "valid_targets_mean": 2176.4, + "valid_targets_min": 767 + }, + { + "epoch": 1.7779632721202003, + "grad_norm": 0.7596606174532765, + "learning_rate": 3.719284489378555e-05, + "loss": 0.2737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2709512710571289, + "step": 1065, + "valid_targets_mean": 1837.7, + "valid_targets_min": 485 + }, + { + "epoch": 1.7863105175292153, + "grad_norm": 0.7240747467629459, + "learning_rate": 3.715015616555019e-05, + "loss": 0.3061, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2847890853881836, + "step": 1070, + "valid_targets_mean": 1882.2, + "valid_targets_min": 758 + }, + { + "epoch": 1.7946577629382303, + "grad_norm": 0.6064730641101225, + "learning_rate": 3.710717017928479e-05, + "loss": 0.2834, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26691943407058716, + "step": 1075, + "valid_targets_mean": 2747.1, + "valid_targets_min": 821 + }, + { + "epoch": 1.8030050083472453, + "grad_norm": 0.611959788625726, + "learning_rate": 3.7063887680051346e-05, + "loss": 0.259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.264186292886734, + "step": 1080, + "valid_targets_mean": 2445.9, + "valid_targets_min": 674 + }, + { + "epoch": 1.8113522537562603, + "grad_norm": 0.6642812468334148, + "learning_rate": 3.702030941805122e-05, + "loss": 0.2852, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26624006032943726, + "step": 1085, + "valid_targets_mean": 2209.1, + "valid_targets_min": 1011 + }, + { + "epoch": 1.8196994991652755, + "grad_norm": 0.6892899295915823, + "learning_rate": 3.697643614861212e-05, + "loss": 0.2949, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27064579725265503, + "step": 1090, + "valid_targets_mean": 2046.2, + "valid_targets_min": 505 + }, + { + "epoch": 1.8280467445742905, + "grad_norm": 0.6580639650984265, + "learning_rate": 3.6932268632175035e-05, + "loss": 0.2929, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2895362675189972, + "step": 1095, + "valid_targets_mean": 2746.6, + "valid_targets_min": 734 + }, + { + "epoch": 1.8363939899833055, + "grad_norm": 0.6370109105360133, + "learning_rate": 3.6887807634281034e-05, + "loss": 0.2794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26561087369918823, + "step": 1100, + "valid_targets_mean": 2142.6, + "valid_targets_min": 935 + }, + { + "epoch": 1.8447412353923205, + "grad_norm": 0.616087046014061, + "learning_rate": 3.684305392555802e-05, + "loss": 0.2854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3370705544948578, + "step": 1105, + "valid_targets_mean": 3029.1, + "valid_targets_min": 705 + }, + { + "epoch": 1.8530884808013357, + "grad_norm": 0.7510555145362542, + "learning_rate": 3.6798008281707326e-05, + "loss": 0.2586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27831149101257324, + "step": 1110, + "valid_targets_mean": 1654.9, + "valid_targets_min": 778 + }, + { + "epoch": 1.8614357262103507, + "grad_norm": 0.6520045541163695, + "learning_rate": 3.675267148349033e-05, + "loss": 0.2673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2550077438354492, + "step": 1115, + "valid_targets_mean": 2392.9, + "valid_targets_min": 904 + }, + { + "epoch": 1.8697829716193657, + "grad_norm": 0.6819208928364223, + "learning_rate": 3.670704431671487e-05, + "loss": 0.2736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26585766673088074, + "step": 1120, + "valid_targets_mean": 2269.6, + "valid_targets_min": 712 + }, + { + "epoch": 1.8781302170283807, + "grad_norm": 0.7225161412119722, + "learning_rate": 3.6661127572221674e-05, + "loss": 0.27, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25078362226486206, + "step": 1125, + "valid_targets_mean": 2047.7, + "valid_targets_min": 642 + }, + { + "epoch": 1.8864774624373957, + "grad_norm": 0.6988676713993968, + "learning_rate": 3.661492204587059e-05, + "loss": 0.2945, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3282058835029602, + "step": 1130, + "valid_targets_mean": 2359.9, + "valid_targets_min": 712 + }, + { + "epoch": 1.8948247078464107, + "grad_norm": 0.5512275061807791, + "learning_rate": 3.656842853852686e-05, + "loss": 0.2693, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2610544264316559, + "step": 1135, + "valid_targets_mean": 2950.7, + "valid_targets_min": 958 + }, + { + "epoch": 1.9031719532554257, + "grad_norm": 0.6572067881124516, + "learning_rate": 3.652164785604718e-05, + "loss": 0.273, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2794402241706848, + "step": 1140, + "valid_targets_mean": 2232.9, + "valid_targets_min": 1096 + }, + { + "epoch": 1.9115191986644406, + "grad_norm": 0.5889030363628657, + "learning_rate": 3.647458080926579e-05, + "loss": 0.2662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23769791424274445, + "step": 1145, + "valid_targets_mean": 2431.9, + "valid_targets_min": 832 + }, + { + "epoch": 1.9198664440734556, + "grad_norm": 0.6763699245418896, + "learning_rate": 3.642722821398036e-05, + "loss": 0.3147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36065638065338135, + "step": 1150, + "valid_targets_mean": 2329.1, + "valid_targets_min": 781 + }, + { + "epoch": 1.9282136894824706, + "grad_norm": 0.5895292517322126, + "learning_rate": 3.637959089093788e-05, + "loss": 0.2565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29195573925971985, + "step": 1155, + "valid_targets_mean": 2797.8, + "valid_targets_min": 1048 + }, + { + "epoch": 1.9365609348914858, + "grad_norm": 0.703920820534324, + "learning_rate": 3.633166966582046e-05, + "loss": 0.2729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26270467042922974, + "step": 1160, + "valid_targets_mean": 1874.8, + "valid_targets_min": 672 + }, + { + "epoch": 1.9449081803005008, + "grad_norm": 0.7179588425200243, + "learning_rate": 3.6283465369230955e-05, + "loss": 0.2587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23984012007713318, + "step": 1165, + "valid_targets_mean": 1565.3, + "valid_targets_min": 645 + }, + { + "epoch": 1.9532554257095158, + "grad_norm": 0.6772780144283352, + "learning_rate": 3.6234978836678635e-05, + "loss": 0.2942, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34247738122940063, + "step": 1170, + "valid_targets_mean": 2453.4, + "valid_targets_min": 849 + }, + { + "epoch": 1.961602671118531, + "grad_norm": 0.5878645868557504, + "learning_rate": 3.618621090856465e-05, + "loss": 0.2713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24168238043785095, + "step": 1175, + "valid_targets_mean": 2615.9, + "valid_targets_min": 718 + }, + { + "epoch": 1.969949916527546, + "grad_norm": 0.6263918058676369, + "learning_rate": 3.6137162430167505e-05, + "loss": 0.2715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24018877744674683, + "step": 1180, + "valid_targets_mean": 2028.0, + "valid_targets_min": 952 + }, + { + "epoch": 1.978297161936561, + "grad_norm": 0.5679649783401323, + "learning_rate": 3.608783425162837e-05, + "loss": 0.2775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2676815092563629, + "step": 1185, + "valid_targets_mean": 2743.6, + "valid_targets_min": 1061 + }, + { + "epoch": 1.986644407345576, + "grad_norm": 0.5650642695085664, + "learning_rate": 3.603822722793641e-05, + "loss": 0.2683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24100539088249207, + "step": 1190, + "valid_targets_mean": 2693.2, + "valid_targets_min": 927 + }, + { + "epoch": 1.994991652754591, + "grad_norm": 0.518428283928646, + "learning_rate": 3.598834221891386e-05, + "loss": 0.3039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2965103089809418, + "step": 1195, + "valid_targets_mean": 3804.1, + "valid_targets_min": 805 + }, + { + "epoch": 2.003338898163606, + "grad_norm": 0.5193791983282685, + "learning_rate": 3.5938180089201236e-05, + "loss": 0.2469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2514622211456299, + "step": 1200, + "valid_targets_mean": 4081.8, + "valid_targets_min": 1185 + }, + { + "epoch": 2.011686143572621, + "grad_norm": 0.6434338747723224, + "learning_rate": 3.588774170824225e-05, + "loss": 0.2559, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2797144651412964, + "step": 1205, + "valid_targets_mean": 2755.6, + "valid_targets_min": 823 + }, + { + "epoch": 2.020033388981636, + "grad_norm": 0.7067410513026908, + "learning_rate": 3.5837027950268845e-05, + "loss": 0.2352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2243640124797821, + "step": 1210, + "valid_targets_mean": 2216.0, + "valid_targets_min": 743 + }, + { + "epoch": 2.028380634390651, + "grad_norm": 0.7961342121604984, + "learning_rate": 3.578603969428592e-05, + "loss": 0.2314, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20412582159042358, + "step": 1215, + "valid_targets_mean": 1890.4, + "valid_targets_min": 792 + }, + { + "epoch": 2.036727879799666, + "grad_norm": 0.5530115591594799, + "learning_rate": 3.57347778240562e-05, + "loss": 0.2476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18387100100517273, + "step": 1220, + "valid_targets_mean": 3251.8, + "valid_targets_min": 1082 + }, + { + "epoch": 2.045075125208681, + "grad_norm": 0.7108831043582498, + "learning_rate": 3.568324322808486e-05, + "loss": 0.2559, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30348148941993713, + "step": 1225, + "valid_targets_mean": 2527.5, + "valid_targets_min": 558 + }, + { + "epoch": 2.053422370617696, + "grad_norm": 0.5550708064223786, + "learning_rate": 3.563143679960415e-05, + "loss": 0.2435, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20590972900390625, + "step": 1230, + "valid_targets_mean": 2877.4, + "valid_targets_min": 686 + }, + { + "epoch": 2.0617696160267114, + "grad_norm": 0.7444742798415956, + "learning_rate": 3.557935943655789e-05, + "loss": 0.2225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22533491253852844, + "step": 1235, + "valid_targets_mean": 2022.6, + "valid_targets_min": 720 + }, + { + "epoch": 2.0701168614357264, + "grad_norm": 0.6427542875704622, + "learning_rate": 3.5527012041585936e-05, + "loss": 0.2537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2653607130050659, + "step": 1240, + "valid_targets_mean": 2934.6, + "valid_targets_min": 600 + }, + { + "epoch": 2.0784641068447414, + "grad_norm": 0.6736377563917679, + "learning_rate": 3.5474395522008496e-05, + "loss": 0.2527, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23162764310836792, + "step": 1245, + "valid_targets_mean": 2378.2, + "valid_targets_min": 501 + }, + { + "epoch": 2.0868113522537564, + "grad_norm": 0.7280982685895938, + "learning_rate": 3.542151078981046e-05, + "loss": 0.232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2432788610458374, + "step": 1250, + "valid_targets_mean": 2093.1, + "valid_targets_min": 680 + }, + { + "epoch": 2.0951585976627713, + "grad_norm": 0.699538815265175, + "learning_rate": 3.5368358761625514e-05, + "loss": 0.2492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2450699359178543, + "step": 1255, + "valid_targets_mean": 2237.4, + "valid_targets_min": 736 + }, + { + "epoch": 2.1035058430717863, + "grad_norm": 0.6844956114186254, + "learning_rate": 3.5314940358720356e-05, + "loss": 0.2438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23026099801063538, + "step": 1260, + "valid_targets_mean": 2317.9, + "valid_targets_min": 791 + }, + { + "epoch": 2.1118530884808013, + "grad_norm": 0.698706207791456, + "learning_rate": 3.5261256506978615e-05, + "loss": 0.2494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21001145243644714, + "step": 1265, + "valid_targets_mean": 1831.2, + "valid_targets_min": 1037 + }, + { + "epoch": 2.1202003338898163, + "grad_norm": 0.7647700825838409, + "learning_rate": 3.5207308136884894e-05, + "loss": 0.2297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2431824952363968, + "step": 1270, + "valid_targets_mean": 1795.0, + "valid_targets_min": 1011 + }, + { + "epoch": 2.1285475792988313, + "grad_norm": 0.6923485232735404, + "learning_rate": 3.515309618350858e-05, + "loss": 0.2542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2632903456687927, + "step": 1275, + "valid_targets_mean": 2339.3, + "valid_targets_min": 820 + }, + { + "epoch": 2.1368948247078463, + "grad_norm": 0.7781081485640542, + "learning_rate": 3.509862158648768e-05, + "loss": 0.251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2773990035057068, + "step": 1280, + "valid_targets_mean": 2198.3, + "valid_targets_min": 698 + }, + { + "epoch": 2.1452420701168613, + "grad_norm": 0.6666148765838359, + "learning_rate": 3.504388529001252e-05, + "loss": 0.2477, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2561338245868683, + "step": 1285, + "valid_targets_mean": 2589.9, + "valid_targets_min": 645 + }, + { + "epoch": 2.1535893155258763, + "grad_norm": 0.8184631100833456, + "learning_rate": 3.498888824280935e-05, + "loss": 0.2291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23771652579307556, + "step": 1290, + "valid_targets_mean": 1722.0, + "valid_targets_min": 1062 + }, + { + "epoch": 2.1619365609348913, + "grad_norm": 0.6991813737801864, + "learning_rate": 3.4933631398123956e-05, + "loss": 0.2432, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20241795480251312, + "step": 1295, + "valid_targets_mean": 2340.5, + "valid_targets_min": 833 + }, + { + "epoch": 2.1702838063439067, + "grad_norm": 0.6130340899300305, + "learning_rate": 3.487811571370509e-05, + "loss": 0.2365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21447286009788513, + "step": 1300, + "valid_targets_mean": 2251.4, + "valid_targets_min": 844 + }, + { + "epoch": 2.1786310517529217, + "grad_norm": 0.7120891173802951, + "learning_rate": 3.482234215178791e-05, + "loss": 0.2313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23181754350662231, + "step": 1305, + "valid_targets_mean": 2261.1, + "valid_targets_min": 645 + }, + { + "epoch": 2.1869782971619367, + "grad_norm": 0.7832344759000813, + "learning_rate": 3.476631167907727e-05, + "loss": 0.2199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23713654279708862, + "step": 1310, + "valid_targets_mean": 2056.3, + "valid_targets_min": 707 + }, + { + "epoch": 2.1953255425709517, + "grad_norm": 0.7148340814235117, + "learning_rate": 3.471002526673094e-05, + "loss": 0.264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27816304564476013, + "step": 1315, + "valid_targets_mean": 2307.6, + "valid_targets_min": 590 + }, + { + "epoch": 2.2036727879799667, + "grad_norm": 0.7100191065638822, + "learning_rate": 3.465348389034287e-05, + "loss": 0.2474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21069149672985077, + "step": 1320, + "valid_targets_mean": 2232.3, + "valid_targets_min": 679 + }, + { + "epoch": 2.2120200333889817, + "grad_norm": 0.6251149655519441, + "learning_rate": 3.459668852992617e-05, + "loss": 0.2378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2373727262020111, + "step": 1325, + "valid_targets_mean": 2580.5, + "valid_targets_min": 1052 + }, + { + "epoch": 2.2203672787979967, + "grad_norm": 0.601330204070378, + "learning_rate": 3.453964016989619e-05, + "loss": 0.2362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22181645035743713, + "step": 1330, + "valid_targets_mean": 3134.7, + "valid_targets_min": 1241 + }, + { + "epoch": 2.2287145242070117, + "grad_norm": 0.8149276967292868, + "learning_rate": 3.4482339799053444e-05, + "loss": 0.2425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25903093814849854, + "step": 1335, + "valid_targets_mean": 1608.3, + "valid_targets_min": 711 + }, + { + "epoch": 2.2370617696160267, + "grad_norm": 0.6327447131495245, + "learning_rate": 3.4424788410566455e-05, + "loss": 0.2316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19700053334236145, + "step": 1340, + "valid_targets_mean": 2296.6, + "valid_targets_min": 795 + }, + { + "epoch": 2.2454090150250416, + "grad_norm": 0.8044246595795511, + "learning_rate": 3.4366987001954555e-05, + "loss": 0.238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26876890659332275, + "step": 1345, + "valid_targets_mean": 1931.4, + "valid_targets_min": 1001 + }, + { + "epoch": 2.2537562604340566, + "grad_norm": 0.5309771416293331, + "learning_rate": 3.43089365750706e-05, + "loss": 0.232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2141474038362503, + "step": 1350, + "valid_targets_mean": 3728.3, + "valid_targets_min": 1014 + }, + { + "epoch": 2.2621035058430716, + "grad_norm": 0.6644682533895913, + "learning_rate": 3.425063813608359e-05, + "loss": 0.2424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2774876356124878, + "step": 1355, + "valid_targets_mean": 2802.4, + "valid_targets_min": 945 + }, + { + "epoch": 2.2704507512520866, + "grad_norm": 0.7455026587962019, + "learning_rate": 3.419209269546125e-05, + "loss": 0.2455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25237128138542175, + "step": 1360, + "valid_targets_mean": 1823.6, + "valid_targets_min": 504 + }, + { + "epoch": 2.278797996661102, + "grad_norm": 0.7277599604934011, + "learning_rate": 3.4133301267952486e-05, + "loss": 0.2318, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2372550368309021, + "step": 1365, + "valid_targets_mean": 2188.1, + "valid_targets_min": 782 + }, + { + "epoch": 2.287145242070117, + "grad_norm": 0.7380556789571296, + "learning_rate": 3.407426487256982e-05, + "loss": 0.2605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30883273482322693, + "step": 1370, + "valid_targets_mean": 2064.0, + "valid_targets_min": 937 + }, + { + "epoch": 2.295492487479132, + "grad_norm": 0.7749276146562591, + "learning_rate": 3.401498453257172e-05, + "loss": 0.2515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24143822491168976, + "step": 1375, + "valid_targets_mean": 1756.7, + "valid_targets_min": 856 + }, + { + "epoch": 2.303839732888147, + "grad_norm": 0.7251504209484195, + "learning_rate": 3.395546127544489e-05, + "loss": 0.2379, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2530919909477234, + "step": 1380, + "valid_targets_mean": 2199.6, + "valid_targets_min": 449 + }, + { + "epoch": 2.312186978297162, + "grad_norm": 0.6703831664282974, + "learning_rate": 3.389569613288641e-05, + "loss": 0.2546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26869308948516846, + "step": 1385, + "valid_targets_mean": 2364.1, + "valid_targets_min": 856 + }, + { + "epoch": 2.320534223706177, + "grad_norm": 0.6395714990020217, + "learning_rate": 3.383569014078587e-05, + "loss": 0.2448, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24285399913787842, + "step": 1390, + "valid_targets_mean": 2583.5, + "valid_targets_min": 727 + }, + { + "epoch": 2.328881469115192, + "grad_norm": 0.6776744324721674, + "learning_rate": 3.3775444339207465e-05, + "loss": 0.2311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21158820390701294, + "step": 1395, + "valid_targets_mean": 2127.4, + "valid_targets_min": 601 + }, + { + "epoch": 2.337228714524207, + "grad_norm": 0.6770674417267395, + "learning_rate": 3.3714959772371885e-05, + "loss": 0.2211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.238697350025177, + "step": 1400, + "valid_targets_mean": 2220.6, + "valid_targets_min": 1108 + }, + { + "epoch": 2.345575959933222, + "grad_norm": 0.6626681483726701, + "learning_rate": 3.3654237488638306e-05, + "loss": 0.2338, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22057095170021057, + "step": 1405, + "valid_targets_mean": 2314.4, + "valid_targets_min": 646 + }, + { + "epoch": 2.353923205342237, + "grad_norm": 0.6782631619393108, + "learning_rate": 3.3593278540486135e-05, + "loss": 0.2258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23829273879528046, + "step": 1410, + "valid_targets_mean": 2254.5, + "valid_targets_min": 783 + }, + { + "epoch": 2.362270450751252, + "grad_norm": 0.6144730572905673, + "learning_rate": 3.353208398449683e-05, + "loss": 0.2497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21382129192352295, + "step": 1415, + "valid_targets_mean": 2535.1, + "valid_targets_min": 914 + }, + { + "epoch": 2.370617696160267, + "grad_norm": 0.6661453498748384, + "learning_rate": 3.347065488133555e-05, + "loss": 0.2489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22840982675552368, + "step": 1420, + "valid_targets_mean": 2266.5, + "valid_targets_min": 712 + }, + { + "epoch": 2.378964941569282, + "grad_norm": 0.6632607942160634, + "learning_rate": 3.340899229573278e-05, + "loss": 0.236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22487732768058777, + "step": 1425, + "valid_targets_mean": 2084.4, + "valid_targets_min": 601 + }, + { + "epoch": 2.3873121869782974, + "grad_norm": 0.6386430366838826, + "learning_rate": 3.334709729646589e-05, + "loss": 0.2504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24176400899887085, + "step": 1430, + "valid_targets_mean": 2506.2, + "valid_targets_min": 705 + }, + { + "epoch": 2.395659432387312, + "grad_norm": 0.5524129897132546, + "learning_rate": 3.32849709563406e-05, + "loss": 0.2388, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21630480885505676, + "step": 1435, + "valid_targets_mean": 3571.8, + "valid_targets_min": 800 + }, + { + "epoch": 2.4040066777963274, + "grad_norm": 0.7020503166023468, + "learning_rate": 3.322261435217237e-05, + "loss": 0.2663, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26422515511512756, + "step": 1440, + "valid_targets_mean": 2308.3, + "valid_targets_min": 618 + }, + { + "epoch": 2.4123539232053424, + "grad_norm": 0.789272389237031, + "learning_rate": 3.316002856476776e-05, + "loss": 0.2436, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25431516766548157, + "step": 1445, + "valid_targets_mean": 2112.1, + "valid_targets_min": 886 + }, + { + "epoch": 2.4207011686143574, + "grad_norm": 0.616671473715282, + "learning_rate": 3.309721467890571e-05, + "loss": 0.2428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23085810244083405, + "step": 1450, + "valid_targets_mean": 2605.9, + "valid_targets_min": 679 + }, + { + "epoch": 2.4290484140233723, + "grad_norm": 0.7930475868728878, + "learning_rate": 3.303417378331867e-05, + "loss": 0.2411, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23393920063972473, + "step": 1455, + "valid_targets_mean": 1837.5, + "valid_targets_min": 904 + }, + { + "epoch": 2.4373956594323873, + "grad_norm": 0.7032008867370768, + "learning_rate": 3.2970906970673814e-05, + "loss": 0.2357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20973175764083862, + "step": 1460, + "valid_targets_mean": 2218.8, + "valid_targets_min": 758 + }, + { + "epoch": 2.4457429048414023, + "grad_norm": 0.7969084821853115, + "learning_rate": 3.2907415337554045e-05, + "loss": 0.2418, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22123488783836365, + "step": 1465, + "valid_targets_mean": 1655.2, + "valid_targets_min": 835 + }, + { + "epoch": 2.4540901502504173, + "grad_norm": 0.6317113154583319, + "learning_rate": 3.284369998443901e-05, + "loss": 0.2505, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20731478929519653, + "step": 1470, + "valid_targets_mean": 2597.5, + "valid_targets_min": 903 + }, + { + "epoch": 2.4624373956594323, + "grad_norm": 0.7512566102291349, + "learning_rate": 3.277976201568603e-05, + "loss": 0.2295, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24652166664600372, + "step": 1475, + "valid_targets_mean": 2205.5, + "valid_targets_min": 670 + }, + { + "epoch": 2.4707846410684473, + "grad_norm": 0.713993361555449, + "learning_rate": 3.2715602539510915e-05, + "loss": 0.2542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21940815448760986, + "step": 1480, + "valid_targets_mean": 1958.4, + "valid_targets_min": 813 + }, + { + "epoch": 2.4791318864774623, + "grad_norm": 0.6825440027750466, + "learning_rate": 3.265122266796884e-05, + "loss": 0.2647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2662467360496521, + "step": 1485, + "valid_targets_mean": 2280.6, + "valid_targets_min": 438 + }, + { + "epoch": 2.4874791318864773, + "grad_norm": 0.6371860820483898, + "learning_rate": 3.258662351693498e-05, + "loss": 0.2315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25087234377861023, + "step": 1490, + "valid_targets_mean": 2939.2, + "valid_targets_min": 941 + }, + { + "epoch": 2.4958263772954927, + "grad_norm": 0.662118894737436, + "learning_rate": 3.252180620608524e-05, + "loss": 0.2424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24761708080768585, + "step": 1495, + "valid_targets_mean": 2687.6, + "valid_targets_min": 1060 + }, + { + "epoch": 2.5041736227045073, + "grad_norm": 0.6596293311660842, + "learning_rate": 3.245677185887678e-05, + "loss": 0.2345, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22490762174129486, + "step": 1500, + "valid_targets_mean": 2352.7, + "valid_targets_min": 901 + }, + { + "epoch": 2.5125208681135227, + "grad_norm": 0.6926751619712956, + "learning_rate": 3.2391521602528634e-05, + "loss": 0.2513, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2553149461746216, + "step": 1505, + "valid_targets_mean": 2310.8, + "valid_targets_min": 909 + }, + { + "epoch": 2.5208681135225377, + "grad_norm": 0.6979481450499412, + "learning_rate": 3.232605656800207e-05, + "loss": 0.2239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24520985782146454, + "step": 1510, + "valid_targets_mean": 2667.4, + "valid_targets_min": 737 + }, + { + "epoch": 2.5292153589315527, + "grad_norm": 0.6555629495703937, + "learning_rate": 3.226037788998105e-05, + "loss": 0.2466, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29187384247779846, + "step": 1515, + "valid_targets_mean": 2969.0, + "valid_targets_min": 923 + }, + { + "epoch": 2.5375626043405677, + "grad_norm": 0.7666678856521503, + "learning_rate": 3.219448670685256e-05, + "loss": 0.2511, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29762178659439087, + "step": 1520, + "valid_targets_mean": 2130.6, + "valid_targets_min": 769 + }, + { + "epoch": 2.5459098497495827, + "grad_norm": 0.635001587521722, + "learning_rate": 3.212838416068685e-05, + "loss": 0.2504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24263107776641846, + "step": 1525, + "valid_targets_mean": 2891.2, + "valid_targets_min": 934 + }, + { + "epoch": 2.5542570951585977, + "grad_norm": 0.7287011835787055, + "learning_rate": 3.206207139721768e-05, + "loss": 0.2368, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23700858652591705, + "step": 1530, + "valid_targets_mean": 1981.3, + "valid_targets_min": 898 + }, + { + "epoch": 2.5626043405676127, + "grad_norm": 0.6870197123559626, + "learning_rate": 3.199554956582241e-05, + "loss": 0.2429, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22994369268417358, + "step": 1535, + "valid_targets_mean": 2409.1, + "valid_targets_min": 691 + }, + { + "epoch": 2.5709515859766277, + "grad_norm": 0.6949292075207361, + "learning_rate": 3.192881981950212e-05, + "loss": 0.2293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23517221212387085, + "step": 1540, + "valid_targets_mean": 2347.6, + "valid_targets_min": 625 + }, + { + "epoch": 2.5792988313856426, + "grad_norm": 0.6890952912783886, + "learning_rate": 3.1861883314861616e-05, + "loss": 0.2451, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25980061292648315, + "step": 1545, + "valid_targets_mean": 2271.5, + "valid_targets_min": 896 + }, + { + "epoch": 2.5876460767946576, + "grad_norm": 0.7160702464639767, + "learning_rate": 3.179474121208937e-05, + "loss": 0.2384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20312431454658508, + "step": 1550, + "valid_targets_mean": 1892.7, + "valid_targets_min": 547 + }, + { + "epoch": 2.5959933222036726, + "grad_norm": 0.7276950171510028, + "learning_rate": 3.172739467493741e-05, + "loss": 0.239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2361968606710434, + "step": 1555, + "valid_targets_mean": 2012.6, + "valid_targets_min": 894 + }, + { + "epoch": 2.604340567612688, + "grad_norm": 0.6647020815612599, + "learning_rate": 3.165984487070118e-05, + "loss": 0.2362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24586845934391022, + "step": 1560, + "valid_targets_mean": 2487.6, + "valid_targets_min": 1049 + }, + { + "epoch": 2.6126878130217026, + "grad_norm": 0.7195149038288103, + "learning_rate": 3.1592092970199266e-05, + "loss": 0.2258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2257130742073059, + "step": 1565, + "valid_targets_mean": 1995.1, + "valid_targets_min": 1064 + }, + { + "epoch": 2.621035058430718, + "grad_norm": 0.6839599973695903, + "learning_rate": 3.152414014775315e-05, + "loss": 0.262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29660463333129883, + "step": 1570, + "valid_targets_mean": 2584.0, + "valid_targets_min": 962 + }, + { + "epoch": 2.629382303839733, + "grad_norm": 0.725502712005601, + "learning_rate": 3.1455987581166784e-05, + "loss": 0.2464, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2801527678966522, + "step": 1575, + "valid_targets_mean": 2065.1, + "valid_targets_min": 605 + }, + { + "epoch": 2.637729549248748, + "grad_norm": 0.7053226716868043, + "learning_rate": 3.138763645170626e-05, + "loss": 0.2301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2274249792098999, + "step": 1580, + "valid_targets_mean": 1978.2, + "valid_targets_min": 400 + }, + { + "epoch": 2.646076794657763, + "grad_norm": 0.7790077967494133, + "learning_rate": 3.1319087944079275e-05, + "loss": 0.2553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20891839265823364, + "step": 1585, + "valid_targets_mean": 1581.3, + "valid_targets_min": 614 + }, + { + "epoch": 2.654424040066778, + "grad_norm": 0.8009563456908694, + "learning_rate": 3.125034324641462e-05, + "loss": 0.2492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29142534732818604, + "step": 1590, + "valid_targets_mean": 2003.0, + "valid_targets_min": 505 + }, + { + "epoch": 2.662771285475793, + "grad_norm": 0.7585464640547334, + "learning_rate": 3.118140355024159e-05, + "loss": 0.2539, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23483462631702423, + "step": 1595, + "valid_targets_mean": 2244.9, + "valid_targets_min": 769 + }, + { + "epoch": 2.671118530884808, + "grad_norm": 0.6757093378546383, + "learning_rate": 3.111227005046932e-05, + "loss": 0.2472, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25684505701065063, + "step": 1600, + "valid_targets_mean": 2694.5, + "valid_targets_min": 1039 + }, + { + "epoch": 2.679465776293823, + "grad_norm": 0.7703481949229823, + "learning_rate": 3.104294394536608e-05, + "loss": 0.2384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3010757565498352, + "step": 1605, + "valid_targets_mean": 2485.1, + "valid_targets_min": 1027 + }, + { + "epoch": 2.687813021702838, + "grad_norm": 0.6400764593805821, + "learning_rate": 3.097342643653849e-05, + "loss": 0.2448, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.228501558303833, + "step": 1610, + "valid_targets_mean": 2393.6, + "valid_targets_min": 1024 + }, + { + "epoch": 2.696160267111853, + "grad_norm": 0.736490810291691, + "learning_rate": 3.090371872891074e-05, + "loss": 0.2269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21466585993766785, + "step": 1615, + "valid_targets_mean": 1701.9, + "valid_targets_min": 958 + }, + { + "epoch": 2.704507512520868, + "grad_norm": 0.7705037351717331, + "learning_rate": 3.083382203070365e-05, + "loss": 0.2471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2398218810558319, + "step": 1620, + "valid_targets_mean": 1538.4, + "valid_targets_min": 356 + }, + { + "epoch": 2.7128547579298834, + "grad_norm": 0.6380889104450443, + "learning_rate": 3.0763737553413766e-05, + "loss": 0.2305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19768619537353516, + "step": 1625, + "valid_targets_mean": 2420.4, + "valid_targets_min": 874 + }, + { + "epoch": 2.721202003338898, + "grad_norm": 0.7557652741904972, + "learning_rate": 3.069346651179233e-05, + "loss": 0.2227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21941837668418884, + "step": 1630, + "valid_targets_mean": 1873.2, + "valid_targets_min": 533 + }, + { + "epoch": 2.7295492487479134, + "grad_norm": 0.9109177697394876, + "learning_rate": 3.0623010123824245e-05, + "loss": 0.2654, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30781060457229614, + "step": 1635, + "valid_targets_mean": 1401.6, + "valid_targets_min": 822 + }, + { + "epoch": 2.7378964941569284, + "grad_norm": 0.7515741045238992, + "learning_rate": 3.0552369610706985e-05, + "loss": 0.2359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2299075722694397, + "step": 1640, + "valid_targets_mean": 1766.3, + "valid_targets_min": 739 + }, + { + "epoch": 2.7462437395659434, + "grad_norm": 0.7242134429840179, + "learning_rate": 3.0481546196829375e-05, + "loss": 0.2327, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25316867232322693, + "step": 1645, + "valid_targets_mean": 2118.2, + "valid_targets_min": 438 + }, + { + "epoch": 2.7545909849749584, + "grad_norm": 0.8594982851019005, + "learning_rate": 3.041054110975041e-05, + "loss": 0.242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.250777006149292, + "step": 1650, + "valid_targets_mean": 1474.4, + "valid_targets_min": 513 + }, + { + "epoch": 2.7629382303839733, + "grad_norm": 0.6928528426680121, + "learning_rate": 3.033935558017797e-05, + "loss": 0.2685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22606828808784485, + "step": 1655, + "valid_targets_mean": 2188.0, + "valid_targets_min": 689 + }, + { + "epoch": 2.7712854757929883, + "grad_norm": 1.359593188853504, + "learning_rate": 3.0267990841947492e-05, + "loss": 0.2542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2838791012763977, + "step": 1660, + "valid_targets_mean": 2975.5, + "valid_targets_min": 901 + }, + { + "epoch": 2.7796327212020033, + "grad_norm": 0.6079701413980375, + "learning_rate": 3.0196448132000563e-05, + "loss": 0.2462, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25636208057403564, + "step": 1665, + "valid_targets_mean": 2884.0, + "valid_targets_min": 502 + }, + { + "epoch": 2.7879799666110183, + "grad_norm": 0.7270380376772345, + "learning_rate": 3.0124728690363504e-05, + "loss": 0.251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27121657133102417, + "step": 1670, + "valid_targets_mean": 2245.4, + "valid_targets_min": 800 + }, + { + "epoch": 2.7963272120200333, + "grad_norm": 0.7845898208867876, + "learning_rate": 3.0052833760125864e-05, + "loss": 0.2562, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2516961097717285, + "step": 1675, + "valid_targets_mean": 2116.7, + "valid_targets_min": 408 + }, + { + "epoch": 2.8046744574290483, + "grad_norm": 0.7018532983083052, + "learning_rate": 2.9980764587418885e-05, + "loss": 0.2452, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23445004224777222, + "step": 1680, + "valid_targets_mean": 1939.8, + "valid_targets_min": 661 + }, + { + "epoch": 2.8130217028380633, + "grad_norm": 0.5921837745995009, + "learning_rate": 2.990852242139389e-05, + "loss": 0.2319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21622955799102783, + "step": 1685, + "valid_targets_mean": 2747.9, + "valid_targets_min": 776 + }, + { + "epoch": 2.8213689482470787, + "grad_norm": 0.5824720536926297, + "learning_rate": 2.983610851420064e-05, + "loss": 0.2472, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20980806648731232, + "step": 1690, + "valid_targets_mean": 2791.8, + "valid_targets_min": 587 + }, + { + "epoch": 2.8297161936560933, + "grad_norm": 0.5021027254030971, + "learning_rate": 2.976352412096563e-05, + "loss": 0.2444, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2591347396373749, + "step": 1695, + "valid_targets_mean": 3924.0, + "valid_targets_min": 1196 + }, + { + "epoch": 2.8380634390651087, + "grad_norm": 0.6615812499102618, + "learning_rate": 2.969077049977034e-05, + "loss": 0.2711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22708380222320557, + "step": 1700, + "valid_targets_mean": 2462.8, + "valid_targets_min": 577 + }, + { + "epoch": 2.8464106844741233, + "grad_norm": 0.6723998430800802, + "learning_rate": 2.9617848911629402e-05, + "loss": 0.2602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.228207066655159, + "step": 1705, + "valid_targets_mean": 2054.6, + "valid_targets_min": 702 + }, + { + "epoch": 2.8547579298831387, + "grad_norm": 0.6644600523627668, + "learning_rate": 2.9544760620468794e-05, + "loss": 0.2281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21405984461307526, + "step": 1710, + "valid_targets_mean": 2316.2, + "valid_targets_min": 898 + }, + { + "epoch": 2.8631051752921537, + "grad_norm": 0.8463918203843788, + "learning_rate": 2.9471506893103883e-05, + "loss": 0.2361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24980512261390686, + "step": 1715, + "valid_targets_mean": 2414.9, + "valid_targets_min": 738 + }, + { + "epoch": 2.8714524207011687, + "grad_norm": 0.5857132344827396, + "learning_rate": 2.939808899921749e-05, + "loss": 0.246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23113596439361572, + "step": 1720, + "valid_targets_mean": 3229.4, + "valid_targets_min": 1171 + }, + { + "epoch": 2.8797996661101837, + "grad_norm": 0.6344993050865212, + "learning_rate": 2.932450821133788e-05, + "loss": 0.2483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23790693283081055, + "step": 1725, + "valid_targets_mean": 2504.1, + "valid_targets_min": 874 + }, + { + "epoch": 2.8881469115191987, + "grad_norm": 0.6612398379376797, + "learning_rate": 2.9250765804816712e-05, + "loss": 0.2307, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24237364530563354, + "step": 1730, + "valid_targets_mean": 2588.6, + "valid_targets_min": 910 + }, + { + "epoch": 2.8964941569282137, + "grad_norm": 0.6361183421149988, + "learning_rate": 2.917686305780692e-05, + "loss": 0.235, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23230323195457458, + "step": 1735, + "valid_targets_mean": 2619.9, + "valid_targets_min": 500 + }, + { + "epoch": 2.9048414023372287, + "grad_norm": 0.6623545853466452, + "learning_rate": 2.9102801251240575e-05, + "loss": 0.2323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26689085364341736, + "step": 1740, + "valid_targets_mean": 2494.6, + "valid_targets_min": 821 + }, + { + "epoch": 2.9131886477462436, + "grad_norm": 0.6961705682075915, + "learning_rate": 2.902858166880667e-05, + "loss": 0.247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24191753566265106, + "step": 1745, + "valid_targets_mean": 1999.0, + "valid_targets_min": 906 + }, + { + "epoch": 2.9215358931552586, + "grad_norm": 0.7882520360796017, + "learning_rate": 2.8954205596928873e-05, + "loss": 0.2516, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28292351961135864, + "step": 1750, + "valid_targets_mean": 2114.1, + "valid_targets_min": 906 + }, + { + "epoch": 2.9298831385642736, + "grad_norm": 0.720653639028986, + "learning_rate": 2.8879674324743246e-05, + "loss": 0.2363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21395087242126465, + "step": 1755, + "valid_targets_mean": 2028.4, + "valid_targets_min": 954 + }, + { + "epoch": 2.9382303839732886, + "grad_norm": 0.66332681260518, + "learning_rate": 2.880498914407587e-05, + "loss": 0.2495, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2460024505853653, + "step": 1760, + "valid_targets_mean": 2476.1, + "valid_targets_min": 569 + }, + { + "epoch": 2.946577629382304, + "grad_norm": 0.7231515624521551, + "learning_rate": 2.8730151349420475e-05, + "loss": 0.2467, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2216128706932068, + "step": 1765, + "valid_targets_mean": 1965.0, + "valid_targets_min": 861 + }, + { + "epoch": 2.9549248747913186, + "grad_norm": 0.7313757341850193, + "learning_rate": 2.865516223791601e-05, + "loss": 0.2575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26219192147254944, + "step": 1770, + "valid_targets_mean": 2038.5, + "valid_targets_min": 944 + }, + { + "epoch": 2.963272120200334, + "grad_norm": 0.6768246806109347, + "learning_rate": 2.8580023109324137e-05, + "loss": 0.2339, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.234646737575531, + "step": 1775, + "valid_targets_mean": 2185.1, + "valid_targets_min": 681 + }, + { + "epoch": 2.971619365609349, + "grad_norm": 0.7122144571530461, + "learning_rate": 2.8504735266006717e-05, + "loss": 0.2358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27237963676452637, + "step": 1780, + "valid_targets_mean": 2903.6, + "valid_targets_min": 614 + }, + { + "epoch": 2.979966611018364, + "grad_norm": 0.7358658160448729, + "learning_rate": 2.8429300012903245e-05, + "loss": 0.2492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28576362133026123, + "step": 1785, + "valid_targets_mean": 1996.7, + "valid_targets_min": 967 + }, + { + "epoch": 2.988313856427379, + "grad_norm": 0.6149098097365543, + "learning_rate": 2.8353718657508206e-05, + "loss": 0.2311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1908757984638214, + "step": 1790, + "valid_targets_mean": 2278.7, + "valid_targets_min": 772 + }, + { + "epoch": 2.996661101836394, + "grad_norm": 0.6789566802468145, + "learning_rate": 2.827799250984844e-05, + "loss": 0.238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2509405016899109, + "step": 1795, + "valid_targets_mean": 2205.6, + "valid_targets_min": 761 + }, + { + "epoch": 3.005008347245409, + "grad_norm": 0.6600698036876375, + "learning_rate": 2.8202122882460418e-05, + "loss": 0.2211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.199271559715271, + "step": 1800, + "valid_targets_mean": 2666.2, + "valid_targets_min": 523 + }, + { + "epoch": 3.013355592654424, + "grad_norm": 0.6880197269898586, + "learning_rate": 2.81261110903675e-05, + "loss": 0.2056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20546582341194153, + "step": 1805, + "valid_targets_mean": 2364.2, + "valid_targets_min": 730 + }, + { + "epoch": 3.021702838063439, + "grad_norm": 0.7244180983476358, + "learning_rate": 2.804995845105714e-05, + "loss": 0.205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19351571798324585, + "step": 1810, + "valid_targets_mean": 2140.0, + "valid_targets_min": 787 + }, + { + "epoch": 3.030050083472454, + "grad_norm": 0.6197068239432737, + "learning_rate": 2.7973666284458048e-05, + "loss": 0.2187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1775825023651123, + "step": 1815, + "valid_targets_mean": 2523.5, + "valid_targets_min": 747 + }, + { + "epoch": 3.038397328881469, + "grad_norm": 0.6186316306349579, + "learning_rate": 2.7897235912917318e-05, + "loss": 0.1907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1805172711610794, + "step": 1820, + "valid_targets_mean": 2927.8, + "valid_targets_min": 711 + }, + { + "epoch": 3.046744574290484, + "grad_norm": 0.7416894691661986, + "learning_rate": 2.7820668661177505e-05, + "loss": 0.217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22550976276397705, + "step": 1825, + "valid_targets_mean": 2538.1, + "valid_targets_min": 912 + }, + { + "epoch": 3.0550918196994994, + "grad_norm": 0.740249273747313, + "learning_rate": 2.7743965856353666e-05, + "loss": 0.2242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17422476410865784, + "step": 1830, + "valid_targets_mean": 1954.8, + "valid_targets_min": 835 + }, + { + "epoch": 3.0634390651085144, + "grad_norm": 0.7535335414584483, + "learning_rate": 2.7667128827910343e-05, + "loss": 0.207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17968419194221497, + "step": 1835, + "valid_targets_mean": 1711.3, + "valid_targets_min": 762 + }, + { + "epoch": 3.0717863105175294, + "grad_norm": 0.7237942366354729, + "learning_rate": 2.7590158907638552e-05, + "loss": 0.2063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18092207610607147, + "step": 1840, + "valid_targets_mean": 1816.8, + "valid_targets_min": 592 + }, + { + "epoch": 3.0801335559265444, + "grad_norm": 0.7385702671649094, + "learning_rate": 2.7513057429632656e-05, + "loss": 0.1963, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1769607961177826, + "step": 1845, + "valid_targets_mean": 2052.9, + "valid_targets_min": 441 + }, + { + "epoch": 3.0884808013355594, + "grad_norm": 0.753893856292005, + "learning_rate": 2.7435825730267285e-05, + "loss": 0.2136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19851276278495789, + "step": 1850, + "valid_targets_mean": 1892.8, + "valid_targets_min": 720 + }, + { + "epoch": 3.0968280467445743, + "grad_norm": 0.7818860359527091, + "learning_rate": 2.7358465148174143e-05, + "loss": 0.2054, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1829618513584137, + "step": 1855, + "valid_targets_mean": 2546.3, + "valid_targets_min": 633 + }, + { + "epoch": 3.1051752921535893, + "grad_norm": 0.6618176032610746, + "learning_rate": 2.728097702421882e-05, + "loss": 0.1986, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17300860583782196, + "step": 1860, + "valid_targets_mean": 2134.3, + "valid_targets_min": 788 + }, + { + "epoch": 3.1135225375626043, + "grad_norm": 0.8399148343537419, + "learning_rate": 2.720336270147754e-05, + "loss": 0.2102, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20378287136554718, + "step": 1865, + "valid_targets_mean": 1742.4, + "valid_targets_min": 743 + }, + { + "epoch": 3.1218697829716193, + "grad_norm": 0.6998272425633003, + "learning_rate": 2.71256235252139e-05, + "loss": 0.202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17975613474845886, + "step": 1870, + "valid_targets_mean": 1958.1, + "valid_targets_min": 573 + }, + { + "epoch": 3.1302170283806343, + "grad_norm": 0.8063543185019679, + "learning_rate": 2.7047760842855536e-05, + "loss": 0.2185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23194459080696106, + "step": 1875, + "valid_targets_mean": 2624.8, + "valid_targets_min": 952 + }, + { + "epoch": 3.1385642737896493, + "grad_norm": 0.7198332608332704, + "learning_rate": 2.6969776003970777e-05, + "loss": 0.2061, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2293994426727295, + "step": 1880, + "valid_targets_mean": 2671.6, + "valid_targets_min": 475 + }, + { + "epoch": 3.1469115191986643, + "grad_norm": 0.8095457359184923, + "learning_rate": 2.6891670360245244e-05, + "loss": 0.2275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22839170694351196, + "step": 1885, + "valid_targets_mean": 1831.1, + "valid_targets_min": 813 + }, + { + "epoch": 3.1552587646076793, + "grad_norm": 0.7207165913622955, + "learning_rate": 2.6813445265458438e-05, + "loss": 0.2047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2124374508857727, + "step": 1890, + "valid_targets_mean": 2360.6, + "valid_targets_min": 779 + }, + { + "epoch": 3.1636060100166947, + "grad_norm": 0.7649134857899963, + "learning_rate": 2.6735102075460257e-05, + "loss": 0.1918, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20324769616127014, + "step": 1895, + "valid_targets_mean": 1896.7, + "valid_targets_min": 810 + }, + { + "epoch": 3.1719532554257097, + "grad_norm": 0.6513034127013698, + "learning_rate": 2.665664214814752e-05, + "loss": 0.2015, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18446782231330872, + "step": 1900, + "valid_targets_mean": 2563.6, + "valid_targets_min": 1052 + }, + { + "epoch": 3.1803005008347247, + "grad_norm": 0.7335008506139117, + "learning_rate": 2.65780668434404e-05, + "loss": 0.1981, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20354166626930237, + "step": 1905, + "valid_targets_mean": 2524.7, + "valid_targets_min": 637 + }, + { + "epoch": 3.1886477462437397, + "grad_norm": 0.8480331519341718, + "learning_rate": 2.649937752325887e-05, + "loss": 0.2133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22984029352664948, + "step": 1910, + "valid_targets_mean": 2089.6, + "valid_targets_min": 966 + }, + { + "epoch": 3.1969949916527547, + "grad_norm": 0.8968895450540663, + "learning_rate": 2.64205755514991e-05, + "loss": 0.2235, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24820029735565186, + "step": 1915, + "valid_targets_mean": 1858.2, + "valid_targets_min": 525 + }, + { + "epoch": 3.2053422370617697, + "grad_norm": 0.8371027743279744, + "learning_rate": 2.634166229400982e-05, + "loss": 0.2435, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23452766239643097, + "step": 1920, + "valid_targets_mean": 2166.8, + "valid_targets_min": 676 + }, + { + "epoch": 3.2136894824707847, + "grad_norm": 0.6007974236801219, + "learning_rate": 2.626263911856863e-05, + "loss": 0.1944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18619805574417114, + "step": 1925, + "valid_targets_mean": 3290.9, + "valid_targets_min": 627 + }, + { + "epoch": 3.2220367278797997, + "grad_norm": 0.8365342869452538, + "learning_rate": 2.6183507394858296e-05, + "loss": 0.2288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21354706585407257, + "step": 1930, + "valid_targets_mean": 1950.3, + "valid_targets_min": 782 + }, + { + "epoch": 3.2303839732888147, + "grad_norm": 0.925359777005885, + "learning_rate": 2.6104268494443027e-05, + "loss": 0.2167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21958017349243164, + "step": 1935, + "valid_targets_mean": 1932.6, + "valid_targets_min": 932 + }, + { + "epoch": 3.2387312186978297, + "grad_norm": 0.6218879507128591, + "learning_rate": 2.6024923790744686e-05, + "loss": 0.192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21151047945022583, + "step": 1940, + "valid_targets_mean": 2720.3, + "valid_targets_min": 1020 + }, + { + "epoch": 3.2470784641068446, + "grad_norm": 0.8132810161080779, + "learning_rate": 2.594547465901899e-05, + "loss": 0.1997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2025194764137268, + "step": 1945, + "valid_targets_mean": 2000.6, + "valid_targets_min": 885 + }, + { + "epoch": 3.2554257095158596, + "grad_norm": 0.750631680842447, + "learning_rate": 2.5865922476331674e-05, + "loss": 0.215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23061227798461914, + "step": 1950, + "valid_targets_mean": 2381.8, + "valid_targets_min": 558 + }, + { + "epoch": 3.2637729549248746, + "grad_norm": 0.7635400209418792, + "learning_rate": 2.5786268621534626e-05, + "loss": 0.2001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19307637214660645, + "step": 1955, + "valid_targets_mean": 1770.5, + "valid_targets_min": 438 + }, + { + "epoch": 3.27212020033389, + "grad_norm": 0.8068310955065211, + "learning_rate": 2.5706514475241964e-05, + "loss": 0.2131, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22574101388454437, + "step": 1960, + "valid_targets_mean": 2430.2, + "valid_targets_min": 710 + }, + { + "epoch": 3.2804674457429046, + "grad_norm": 0.6683046345309371, + "learning_rate": 2.5626661419806147e-05, + "loss": 0.2101, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20542487502098083, + "step": 1965, + "valid_targets_mean": 2587.2, + "valid_targets_min": 628 + }, + { + "epoch": 3.28881469115192, + "grad_norm": 0.8190318720570766, + "learning_rate": 2.5546710839293988e-05, + "loss": 0.2142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2078418731689453, + "step": 1970, + "valid_targets_mean": 1997.2, + "valid_targets_min": 887 + }, + { + "epoch": 3.297161936560935, + "grad_norm": 1.0194362581196132, + "learning_rate": 2.5466664119462667e-05, + "loss": 0.1928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20889687538146973, + "step": 1975, + "valid_targets_mean": 1949.8, + "valid_targets_min": 502 + }, + { + "epoch": 3.30550918196995, + "grad_norm": 0.7456232327178741, + "learning_rate": 2.5386522647735712e-05, + "loss": 0.1978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19417878985404968, + "step": 1980, + "valid_targets_mean": 1997.1, + "valid_targets_min": 530 + }, + { + "epoch": 3.313856427378965, + "grad_norm": 0.6478403904691091, + "learning_rate": 2.530628781317896e-05, + "loss": 0.201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2210979461669922, + "step": 1985, + "valid_targets_mean": 3150.8, + "valid_targets_min": 798 + }, + { + "epoch": 3.32220367278798, + "grad_norm": 0.7733989159362327, + "learning_rate": 2.5225961006476484e-05, + "loss": 0.1999, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23125320672988892, + "step": 1990, + "valid_targets_mean": 1941.2, + "valid_targets_min": 974 + }, + { + "epoch": 3.330550918196995, + "grad_norm": 0.706725558384721, + "learning_rate": 2.5145543619906456e-05, + "loss": 0.194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18903513252735138, + "step": 1995, + "valid_targets_mean": 2310.9, + "valid_targets_min": 925 + }, + { + "epoch": 3.33889816360601, + "grad_norm": 0.7561082223200716, + "learning_rate": 2.5065037047317066e-05, + "loss": 0.207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20730358362197876, + "step": 2000, + "valid_targets_mean": 2447.6, + "valid_targets_min": 639 + }, + { + "epoch": 3.347245409015025, + "grad_norm": 0.6896363314064868, + "learning_rate": 2.4984442684102307e-05, + "loss": 0.2022, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21380692720413208, + "step": 2005, + "valid_targets_mean": 2873.9, + "valid_targets_min": 659 + }, + { + "epoch": 3.35559265442404, + "grad_norm": 0.776766912113279, + "learning_rate": 2.4903761927177853e-05, + "loss": 0.2206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2006554901599884, + "step": 2010, + "valid_targets_mean": 1877.1, + "valid_targets_min": 879 + }, + { + "epoch": 3.363939899833055, + "grad_norm": 0.649648115219964, + "learning_rate": 2.482299617495678e-05, + "loss": 0.2106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18565139174461365, + "step": 2015, + "valid_targets_mean": 2477.9, + "valid_targets_min": 565 + }, + { + "epoch": 3.37228714524207, + "grad_norm": 0.6785698331257374, + "learning_rate": 2.474214682732538e-05, + "loss": 0.2113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18758153915405273, + "step": 2020, + "valid_targets_mean": 2457.2, + "valid_targets_min": 962 + }, + { + "epoch": 3.380634390651085, + "grad_norm": 0.7688542630674376, + "learning_rate": 2.466121528561887e-05, + "loss": 0.1837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19837577641010284, + "step": 2025, + "valid_targets_mean": 1913.9, + "valid_targets_min": 689 + }, + { + "epoch": 3.3889816360601, + "grad_norm": 0.7534689229168448, + "learning_rate": 2.4580202952597106e-05, + "loss": 0.2141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21255707740783691, + "step": 2030, + "valid_targets_mean": 1970.7, + "valid_targets_min": 525 + }, + { + "epoch": 3.3973288814691154, + "grad_norm": 0.6902091805329057, + "learning_rate": 2.4499111232420275e-05, + "loss": 0.2172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19505161046981812, + "step": 2035, + "valid_targets_mean": 3377.1, + "valid_targets_min": 886 + }, + { + "epoch": 3.4056761268781304, + "grad_norm": 0.7883956746439428, + "learning_rate": 2.441794153062457e-05, + "loss": 0.2193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21510925889015198, + "step": 2040, + "valid_targets_mean": 1821.9, + "valid_targets_min": 745 + }, + { + "epoch": 3.4140233722871454, + "grad_norm": 0.6742718385671317, + "learning_rate": 2.4336695254097782e-05, + "loss": 0.1954, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2005099058151245, + "step": 2045, + "valid_targets_mean": 2417.1, + "valid_targets_min": 936 + }, + { + "epoch": 3.4223706176961604, + "grad_norm": 0.6854070730995327, + "learning_rate": 2.425537381105498e-05, + "loss": 0.2036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18244539201259613, + "step": 2050, + "valid_targets_mean": 2198.9, + "valid_targets_min": 564 + }, + { + "epoch": 3.4307178631051753, + "grad_norm": 0.7484967320495022, + "learning_rate": 2.4173978611014053e-05, + "loss": 0.2172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2040974199771881, + "step": 2055, + "valid_targets_mean": 2380.5, + "valid_targets_min": 905 + }, + { + "epoch": 3.4390651085141903, + "grad_norm": 0.7713044455268337, + "learning_rate": 2.40925110647713e-05, + "loss": 0.1978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20483741164207458, + "step": 2060, + "valid_targets_mean": 2150.8, + "valid_targets_min": 780 + }, + { + "epoch": 3.4474123539232053, + "grad_norm": 0.7734911051230774, + "learning_rate": 2.4010972584376963e-05, + "loss": 0.2191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21728011965751648, + "step": 2065, + "valid_targets_mean": 2074.2, + "valid_targets_min": 525 + }, + { + "epoch": 3.4557595993322203, + "grad_norm": 0.767728459680409, + "learning_rate": 2.392936458311078e-05, + "loss": 0.1942, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21184173226356506, + "step": 2070, + "valid_targets_mean": 1850.9, + "valid_targets_min": 966 + }, + { + "epoch": 3.4641068447412353, + "grad_norm": 0.6460453855120204, + "learning_rate": 2.3847688475457455e-05, + "loss": 0.1966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1995435506105423, + "step": 2075, + "valid_targets_mean": 3029.0, + "valid_targets_min": 1015 + }, + { + "epoch": 3.4724540901502503, + "grad_norm": 0.6722751285835894, + "learning_rate": 2.3765945677082168e-05, + "loss": 0.2088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2198968231678009, + "step": 2080, + "valid_targets_mean": 2835.6, + "valid_targets_min": 1009 + }, + { + "epoch": 3.4808013355592653, + "grad_norm": 0.6854820902470308, + "learning_rate": 2.368413760480603e-05, + "loss": 0.1878, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18193744122982025, + "step": 2085, + "valid_targets_mean": 3915.0, + "valid_targets_min": 568 + }, + { + "epoch": 3.4891485809682803, + "grad_norm": 0.711530790653231, + "learning_rate": 2.360226567658151e-05, + "loss": 0.1925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22336947917938232, + "step": 2090, + "valid_targets_mean": 2574.9, + "valid_targets_min": 952 + }, + { + "epoch": 3.4974958263772953, + "grad_norm": 0.619722638576771, + "learning_rate": 2.3520331311467883e-05, + "loss": 0.2207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23564806580543518, + "step": 2095, + "valid_targets_mean": 3082.7, + "valid_targets_min": 831 + }, + { + "epoch": 3.5058430717863107, + "grad_norm": 0.6693280886160189, + "learning_rate": 2.3438335929606613e-05, + "loss": 0.2322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2105301171541214, + "step": 2100, + "valid_targets_mean": 2332.4, + "valid_targets_min": 671 + }, + { + "epoch": 3.5141903171953257, + "grad_norm": 0.6200551765589567, + "learning_rate": 2.3356280952196757e-05, + "loss": 0.2015, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19008010625839233, + "step": 2105, + "valid_targets_mean": 2823.4, + "valid_targets_min": 946 + }, + { + "epoch": 3.5225375626043407, + "grad_norm": 0.7823493214133, + "learning_rate": 2.3274167801470314e-05, + "loss": 0.2026, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22069746255874634, + "step": 2110, + "valid_targets_mean": 2198.3, + "valid_targets_min": 774 + }, + { + "epoch": 3.5308848080133557, + "grad_norm": 0.805122581311334, + "learning_rate": 2.3191997900667588e-05, + "loss": 0.2247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21660353243350983, + "step": 2115, + "valid_targets_mean": 1722.5, + "valid_targets_min": 464 + }, + { + "epoch": 3.5392320534223707, + "grad_norm": 0.7989808566470185, + "learning_rate": 2.310977267401251e-05, + "loss": 0.2059, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.207453191280365, + "step": 2120, + "valid_targets_mean": 1765.3, + "valid_targets_min": 712 + }, + { + "epoch": 3.5475792988313857, + "grad_norm": 0.7562664407895434, + "learning_rate": 2.302749354668795e-05, + "loss": 0.2094, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1830502152442932, + "step": 2125, + "valid_targets_mean": 1835.9, + "valid_targets_min": 612 + }, + { + "epoch": 3.5559265442404007, + "grad_norm": 0.690251516382656, + "learning_rate": 2.2945161944811038e-05, + "loss": 0.2033, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24697524309158325, + "step": 2130, + "valid_targets_mean": 2619.8, + "valid_targets_min": 825 + }, + { + "epoch": 3.5642737896494157, + "grad_norm": 0.5588489722291431, + "learning_rate": 2.2862779295408405e-05, + "loss": 0.2123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22931894659996033, + "step": 2135, + "valid_targets_mean": 3524.6, + "valid_targets_min": 538 + }, + { + "epoch": 3.5726210350584306, + "grad_norm": 0.7625768394762137, + "learning_rate": 2.2780347026391495e-05, + "loss": 0.2176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20581907033920288, + "step": 2140, + "valid_targets_mean": 1909.9, + "valid_targets_min": 746 + }, + { + "epoch": 3.5809682804674456, + "grad_norm": 0.6671235206913544, + "learning_rate": 2.2697866566531775e-05, + "loss": 0.211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18883800506591797, + "step": 2145, + "valid_targets_mean": 2590.1, + "valid_targets_min": 923 + }, + { + "epoch": 3.5893155258764606, + "grad_norm": 0.5448551570935133, + "learning_rate": 2.2615339345436e-05, + "loss": 0.2119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22831106185913086, + "step": 2150, + "valid_targets_mean": 3508.4, + "valid_targets_min": 578 + }, + { + "epoch": 3.597662771285476, + "grad_norm": 0.6792066013173836, + "learning_rate": 2.2532766793521413e-05, + "loss": 0.2221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21002694964408875, + "step": 2155, + "valid_targets_mean": 2160.6, + "valid_targets_min": 550 + }, + { + "epoch": 3.6060100166944906, + "grad_norm": 0.8721197336677428, + "learning_rate": 2.245015034199097e-05, + "loss": 0.227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22117319703102112, + "step": 2160, + "valid_targets_mean": 1553.8, + "valid_targets_min": 555 + }, + { + "epoch": 3.614357262103506, + "grad_norm": 0.6416631529613723, + "learning_rate": 2.2367491422808514e-05, + "loss": 0.2047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20796430110931396, + "step": 2165, + "valid_targets_mean": 3113.5, + "valid_targets_min": 738 + }, + { + "epoch": 3.6227045075125206, + "grad_norm": 0.6598964810382094, + "learning_rate": 2.228479146867397e-05, + "loss": 0.1997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16844062507152557, + "step": 2170, + "valid_targets_mean": 2920.0, + "valid_targets_min": 918 + }, + { + "epoch": 3.631051752921536, + "grad_norm": 0.7732656977571363, + "learning_rate": 2.2202051912998516e-05, + "loss": 0.1889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19828924536705017, + "step": 2175, + "valid_targets_mean": 1879.6, + "valid_targets_min": 933 + }, + { + "epoch": 3.639398998330551, + "grad_norm": 0.7610326159405831, + "learning_rate": 2.2119274189879727e-05, + "loss": 0.2095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2426915466785431, + "step": 2180, + "valid_targets_mean": 2422.4, + "valid_targets_min": 420 + }, + { + "epoch": 3.647746243739566, + "grad_norm": 0.6211188010093095, + "learning_rate": 2.2036459734076715e-05, + "loss": 0.2021, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1922152042388916, + "step": 2185, + "valid_targets_mean": 3298.4, + "valid_targets_min": 801 + }, + { + "epoch": 3.656093489148581, + "grad_norm": 0.7654802209669019, + "learning_rate": 2.1953609980985266e-05, + "loss": 0.1971, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1986810266971588, + "step": 2190, + "valid_targets_mean": 1681.1, + "valid_targets_min": 675 + }, + { + "epoch": 3.664440734557596, + "grad_norm": 0.7644992080701967, + "learning_rate": 2.1870726366612978e-05, + "loss": 0.2327, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23523491621017456, + "step": 2195, + "valid_targets_mean": 2167.8, + "valid_targets_min": 600 + }, + { + "epoch": 3.672787979966611, + "grad_norm": 0.7642624223820179, + "learning_rate": 2.1787810327554345e-05, + "loss": 0.2046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22661182284355164, + "step": 2200, + "valid_targets_mean": 1921.0, + "valid_targets_min": 772 + }, + { + "epoch": 3.681135225375626, + "grad_norm": 0.7027221331618968, + "learning_rate": 2.170486330096586e-05, + "loss": 0.1936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1848253309726715, + "step": 2205, + "valid_targets_mean": 1921.4, + "valid_targets_min": 914 + }, + { + "epoch": 3.689482470784641, + "grad_norm": 0.6429434253544364, + "learning_rate": 2.1621886724541126e-05, + "loss": 0.2034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18040227890014648, + "step": 2210, + "valid_targets_mean": 2432.4, + "valid_targets_min": 733 + }, + { + "epoch": 3.697829716193656, + "grad_norm": 0.7318785216160163, + "learning_rate": 2.1538882036485923e-05, + "loss": 0.2038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21123959124088287, + "step": 2215, + "valid_targets_mean": 2331.3, + "valid_targets_min": 982 + }, + { + "epoch": 3.706176961602671, + "grad_norm": 0.7212005574575913, + "learning_rate": 2.1455850675493267e-05, + "loss": 0.2076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2096509486436844, + "step": 2220, + "valid_targets_mean": 2168.4, + "valid_targets_min": 659 + }, + { + "epoch": 3.714524207011686, + "grad_norm": 0.7422932722800749, + "learning_rate": 2.1372794080718506e-05, + "loss": 0.2029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21337196230888367, + "step": 2225, + "valid_targets_mean": 2458.9, + "valid_targets_min": 1082 + }, + { + "epoch": 3.7228714524207014, + "grad_norm": 0.7809092697038788, + "learning_rate": 2.1289713691754338e-05, + "loss": 0.2218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1921750158071518, + "step": 2230, + "valid_targets_mean": 1931.6, + "valid_targets_min": 596 + }, + { + "epoch": 3.731218697829716, + "grad_norm": 0.7560491634824293, + "learning_rate": 2.1206610948605894e-05, + "loss": 0.2105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19463232159614563, + "step": 2235, + "valid_targets_mean": 1938.1, + "valid_targets_min": 832 + }, + { + "epoch": 3.7395659432387314, + "grad_norm": 0.7459539532072016, + "learning_rate": 2.1123487291665753e-05, + "loss": 0.1937, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24305976927280426, + "step": 2240, + "valid_targets_mean": 2654.8, + "valid_targets_min": 762 + }, + { + "epoch": 3.7479131886477464, + "grad_norm": 0.6488047905600907, + "learning_rate": 2.104034416168899e-05, + "loss": 0.24, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24533326923847198, + "step": 2245, + "valid_targets_mean": 3224.9, + "valid_targets_min": 1407 + }, + { + "epoch": 3.7562604340567614, + "grad_norm": 0.6398628262478103, + "learning_rate": 2.0957182999768198e-05, + "loss": 0.2017, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2013179361820221, + "step": 2250, + "valid_targets_mean": 2803.2, + "valid_targets_min": 893 + }, + { + "epoch": 3.7646076794657763, + "grad_norm": 0.7308505980783866, + "learning_rate": 2.0874005247308512e-05, + "loss": 0.2227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2478942573070526, + "step": 2255, + "valid_targets_mean": 2577.0, + "valid_targets_min": 832 + }, + { + "epoch": 3.7729549248747913, + "grad_norm": 0.6728062023612603, + "learning_rate": 2.0790812346002626e-05, + "loss": 0.1936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1975017935037613, + "step": 2260, + "valid_targets_mean": 2567.7, + "valid_targets_min": 975 + }, + { + "epoch": 3.7813021702838063, + "grad_norm": 0.7267880171435015, + "learning_rate": 2.0707605737805797e-05, + "loss": 0.2117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21225476264953613, + "step": 2265, + "valid_targets_mean": 2138.4, + "valid_targets_min": 712 + }, + { + "epoch": 3.7896494156928213, + "grad_norm": 0.7804764968075086, + "learning_rate": 2.0624386864910876e-05, + "loss": 0.2091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19162751734256744, + "step": 2270, + "valid_targets_mean": 1975.8, + "valid_targets_min": 670 + }, + { + "epoch": 3.7979966611018363, + "grad_norm": 0.694145808170385, + "learning_rate": 2.054115716972328e-05, + "loss": 0.2004, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17722634971141815, + "step": 2275, + "valid_targets_mean": 2189.9, + "valid_targets_min": 614 + }, + { + "epoch": 3.8063439065108513, + "grad_norm": 0.6377495631501621, + "learning_rate": 2.045791809483601e-05, + "loss": 0.2253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21739456057548523, + "step": 2280, + "valid_targets_mean": 2950.0, + "valid_targets_min": 920 + }, + { + "epoch": 3.8146911519198663, + "grad_norm": 0.7049710163243371, + "learning_rate": 2.0374671083004642e-05, + "loss": 0.1952, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20642341673374176, + "step": 2285, + "valid_targets_mean": 2407.1, + "valid_targets_min": 955 + }, + { + "epoch": 3.8230383973288813, + "grad_norm": 0.715307747486492, + "learning_rate": 2.0291417577122314e-05, + "loss": 0.2051, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19637277722358704, + "step": 2290, + "valid_targets_mean": 2259.7, + "valid_targets_min": 659 + }, + { + "epoch": 3.8313856427378967, + "grad_norm": 0.6976199525244242, + "learning_rate": 2.0208159020194734e-05, + "loss": 0.2085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2205023169517517, + "step": 2295, + "valid_targets_mean": 2503.6, + "valid_targets_min": 1159 + }, + { + "epoch": 3.8397328881469113, + "grad_norm": 0.650485187576972, + "learning_rate": 2.012489685531515e-05, + "loss": 0.1886, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2000371515750885, + "step": 2300, + "valid_targets_mean": 2891.1, + "valid_targets_min": 773 + }, + { + "epoch": 3.8480801335559267, + "grad_norm": 0.6299480584067116, + "learning_rate": 2.0041632525639356e-05, + "loss": 0.2162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21248218417167664, + "step": 2305, + "valid_targets_mean": 3329.3, + "valid_targets_min": 917 + }, + { + "epoch": 3.8564273789649417, + "grad_norm": 0.6770111184963179, + "learning_rate": 1.9958367474360648e-05, + "loss": 0.2083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21817216277122498, + "step": 2310, + "valid_targets_mean": 2617.1, + "valid_targets_min": 966 + }, + { + "epoch": 3.8647746243739567, + "grad_norm": 0.6059743850650934, + "learning_rate": 1.9875103144684852e-05, + "loss": 0.1935, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17140190303325653, + "step": 2315, + "valid_targets_mean": 2652.0, + "valid_targets_min": 605 + }, + { + "epoch": 3.8731218697829717, + "grad_norm": 0.6549731388872466, + "learning_rate": 1.979184097980527e-05, + "loss": 0.2087, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2195884734392166, + "step": 2320, + "valid_targets_mean": 3325.1, + "valid_targets_min": 595 + }, + { + "epoch": 3.8814691151919867, + "grad_norm": 0.8016279355136618, + "learning_rate": 1.9708582422877693e-05, + "loss": 0.2197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20927119255065918, + "step": 2325, + "valid_targets_mean": 1874.6, + "valid_targets_min": 506 + }, + { + "epoch": 3.8898163606010017, + "grad_norm": 0.7114167133031067, + "learning_rate": 1.9625328916995365e-05, + "loss": 0.1999, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.189249187707901, + "step": 2330, + "valid_targets_mean": 2111.6, + "valid_targets_min": 584 + }, + { + "epoch": 3.8981636060100167, + "grad_norm": 0.8141535488921566, + "learning_rate": 1.9542081905163997e-05, + "loss": 0.2004, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22716504335403442, + "step": 2335, + "valid_targets_mean": 1641.9, + "valid_targets_min": 512 + }, + { + "epoch": 3.9065108514190316, + "grad_norm": 0.6360167034721957, + "learning_rate": 1.9458842830276724e-05, + "loss": 0.2263, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3129171133041382, + "step": 2340, + "valid_targets_mean": 3491.8, + "valid_targets_min": 573 + }, + { + "epoch": 3.9148580968280466, + "grad_norm": 0.6839140749178415, + "learning_rate": 1.937561313508913e-05, + "loss": 0.1994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21404977142810822, + "step": 2345, + "valid_targets_mean": 2504.5, + "valid_targets_min": 807 + }, + { + "epoch": 3.9232053422370616, + "grad_norm": 0.682830540324075, + "learning_rate": 1.9292394262194213e-05, + "loss": 0.222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18228735029697418, + "step": 2350, + "valid_targets_mean": 2244.4, + "valid_targets_min": 921 + }, + { + "epoch": 3.9315525876460766, + "grad_norm": 0.5903144670873757, + "learning_rate": 1.9209187653997385e-05, + "loss": 0.2111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.154928058385849, + "step": 2355, + "valid_targets_mean": 2992.8, + "valid_targets_min": 504 + }, + { + "epoch": 3.939899833055092, + "grad_norm": 0.8046632262469848, + "learning_rate": 1.91259947526915e-05, + "loss": 0.1927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17956694960594177, + "step": 2360, + "valid_targets_mean": 1573.6, + "valid_targets_min": 918 + }, + { + "epoch": 3.9482470784641066, + "grad_norm": 0.7214459827467645, + "learning_rate": 1.904281700023181e-05, + "loss": 0.209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22078362107276917, + "step": 2365, + "valid_targets_mean": 2344.6, + "valid_targets_min": 1014 + }, + { + "epoch": 3.956594323873122, + "grad_norm": 0.7726818600012659, + "learning_rate": 1.895965583831102e-05, + "loss": 0.2101, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23711560666561127, + "step": 2370, + "valid_targets_mean": 1962.9, + "valid_targets_min": 909 + }, + { + "epoch": 3.964941569282137, + "grad_norm": 0.9719727401517482, + "learning_rate": 1.887651270833425e-05, + "loss": 0.2127, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21433967351913452, + "step": 2375, + "valid_targets_mean": 1982.1, + "valid_targets_min": 781 + }, + { + "epoch": 3.973288814691152, + "grad_norm": 0.8241914077550181, + "learning_rate": 1.8793389051394116e-05, + "loss": 0.2034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2188546061515808, + "step": 2380, + "valid_targets_mean": 1938.2, + "valid_targets_min": 501 + }, + { + "epoch": 3.981636060100167, + "grad_norm": 0.6623290144770669, + "learning_rate": 1.8710286308245665e-05, + "loss": 0.203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2089979350566864, + "step": 2385, + "valid_targets_mean": 2435.5, + "valid_targets_min": 973 + }, + { + "epoch": 3.989983305509182, + "grad_norm": 0.6191703769782088, + "learning_rate": 1.8627205919281507e-05, + "loss": 0.2083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1830480992794037, + "step": 2390, + "valid_targets_mean": 2697.2, + "valid_targets_min": 457 + }, + { + "epoch": 3.998330550918197, + "grad_norm": 0.5849470086893942, + "learning_rate": 1.8544149324506736e-05, + "loss": 0.2144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23716729879379272, + "step": 2395, + "valid_targets_mean": 3264.4, + "valid_targets_min": 784 + }, + { + "epoch": 4.006677796327212, + "grad_norm": 0.5582143247963175, + "learning_rate": 1.8461117963514087e-05, + "loss": 0.1808, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1648591160774231, + "step": 2400, + "valid_targets_mean": 3364.1, + "valid_targets_min": 593 + }, + { + "epoch": 4.015025041736227, + "grad_norm": 0.712609860590282, + "learning_rate": 1.8378113275458877e-05, + "loss": 0.1843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16371040046215057, + "step": 2405, + "valid_targets_mean": 2522.4, + "valid_targets_min": 896 + }, + { + "epoch": 4.023372287145242, + "grad_norm": 0.6302951960527349, + "learning_rate": 1.8295136699034152e-05, + "loss": 0.1922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16981709003448486, + "step": 2410, + "valid_targets_mean": 3448.9, + "valid_targets_min": 720 + }, + { + "epoch": 4.031719532554257, + "grad_norm": 0.6973424752494735, + "learning_rate": 1.8212189672445665e-05, + "loss": 0.1579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18264523148536682, + "step": 2415, + "valid_targets_mean": 2590.7, + "valid_targets_min": 602 + }, + { + "epoch": 4.040066777963272, + "grad_norm": 0.7405848585089333, + "learning_rate": 1.812927363338703e-05, + "loss": 0.1811, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20967480540275574, + "step": 2420, + "valid_targets_mean": 2410.1, + "valid_targets_min": 784 + }, + { + "epoch": 4.048414023372287, + "grad_norm": 0.6468624052165478, + "learning_rate": 1.8046390019014738e-05, + "loss": 0.1705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13376326858997345, + "step": 2425, + "valid_targets_mean": 2640.8, + "valid_targets_min": 876 + }, + { + "epoch": 4.056761268781302, + "grad_norm": 0.7714422118717371, + "learning_rate": 1.7963540265923298e-05, + "loss": 0.169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17131450772285461, + "step": 2430, + "valid_targets_mean": 2827.1, + "valid_targets_min": 506 + }, + { + "epoch": 4.065108514190317, + "grad_norm": 0.8047015948863985, + "learning_rate": 1.788072581012028e-05, + "loss": 0.205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20028193295001984, + "step": 2435, + "valid_targets_mean": 2139.6, + "valid_targets_min": 840 + }, + { + "epoch": 4.073455759599332, + "grad_norm": 0.8168427758567236, + "learning_rate": 1.779794808700149e-05, + "loss": 0.1715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17494890093803406, + "step": 2440, + "valid_targets_mean": 2450.1, + "valid_targets_min": 831 + }, + { + "epoch": 4.081803005008347, + "grad_norm": 0.6952936019629056, + "learning_rate": 1.7715208531326032e-05, + "loss": 0.1822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19658498466014862, + "step": 2445, + "valid_targets_mean": 3185.4, + "valid_targets_min": 806 + }, + { + "epoch": 4.090150250417362, + "grad_norm": 0.7743934240304633, + "learning_rate": 1.76325085771915e-05, + "loss": 0.1923, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2374129444360733, + "step": 2450, + "valid_targets_mean": 2400.8, + "valid_targets_min": 1087 + }, + { + "epoch": 4.098497495826377, + "grad_norm": 0.6609765472235122, + "learning_rate": 1.7549849658009037e-05, + "loss": 0.1706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14749598503112793, + "step": 2455, + "valid_targets_mean": 2871.9, + "valid_targets_min": 957 + }, + { + "epoch": 4.106844741235392, + "grad_norm": 0.6101007189851678, + "learning_rate": 1.7467233206478597e-05, + "loss": 0.165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15067794919013977, + "step": 2460, + "valid_targets_mean": 3465.1, + "valid_targets_min": 1039 + }, + { + "epoch": 4.115191986644407, + "grad_norm": 0.5723123016148967, + "learning_rate": 1.7384660654564006e-05, + "loss": 0.1763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14278893172740936, + "step": 2465, + "valid_targets_mean": 3263.4, + "valid_targets_min": 894 + }, + { + "epoch": 4.123539232053423, + "grad_norm": 0.8367092789249855, + "learning_rate": 1.7302133433468232e-05, + "loss": 0.1896, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17440810799598694, + "step": 2470, + "valid_targets_mean": 1854.0, + "valid_targets_min": 910 + }, + { + "epoch": 4.131886477462437, + "grad_norm": 0.6879161702942003, + "learning_rate": 1.7219652973608512e-05, + "loss": 0.1744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16357824206352234, + "step": 2475, + "valid_targets_mean": 2852.9, + "valid_targets_min": 1087 + }, + { + "epoch": 4.140233722871453, + "grad_norm": 0.7402621755937144, + "learning_rate": 1.7137220704591605e-05, + "loss": 0.1844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23762917518615723, + "step": 2480, + "valid_targets_mean": 2942.2, + "valid_targets_min": 797 + }, + { + "epoch": 4.148580968280467, + "grad_norm": 0.9789294953609318, + "learning_rate": 1.7054838055188972e-05, + "loss": 0.175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14113131165504456, + "step": 2485, + "valid_targets_mean": 2491.5, + "valid_targets_min": 915 + }, + { + "epoch": 4.156928213689483, + "grad_norm": 0.8155143288103909, + "learning_rate": 1.6972506453312057e-05, + "loss": 0.169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1591186821460724, + "step": 2490, + "valid_targets_mean": 2304.6, + "valid_targets_min": 810 + }, + { + "epoch": 4.165275459098497, + "grad_norm": 0.7582965189370305, + "learning_rate": 1.6890227325987498e-05, + "loss": 0.1789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1897093653678894, + "step": 2495, + "valid_targets_mean": 2449.8, + "valid_targets_min": 888 + }, + { + "epoch": 4.173622704507513, + "grad_norm": 0.9022866245657722, + "learning_rate": 1.6808002099332422e-05, + "loss": 0.1748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16646668314933777, + "step": 2500, + "valid_targets_mean": 1401.7, + "valid_targets_min": 459 + }, + { + "epoch": 4.181969949916527, + "grad_norm": 0.6353207443222258, + "learning_rate": 1.672583219852969e-05, + "loss": 0.1855, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19985511898994446, + "step": 2505, + "valid_targets_mean": 2908.8, + "valid_targets_min": 893 + }, + { + "epoch": 4.190317195325543, + "grad_norm": 0.6992695972271169, + "learning_rate": 1.6643719047803243e-05, + "loss": 0.2099, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2118116170167923, + "step": 2510, + "valid_targets_mean": 2788.0, + "valid_targets_min": 538 + }, + { + "epoch": 4.198664440734557, + "grad_norm": 0.7166690117751446, + "learning_rate": 1.656166407039339e-05, + "loss": 0.1703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15704311430454254, + "step": 2515, + "valid_targets_mean": 2429.9, + "valid_targets_min": 679 + }, + { + "epoch": 4.207011686143573, + "grad_norm": 0.6907549087139597, + "learning_rate": 1.647966868853212e-05, + "loss": 0.1797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16582372784614563, + "step": 2520, + "valid_targets_mean": 2947.7, + "valid_targets_min": 776 + }, + { + "epoch": 4.215358931552587, + "grad_norm": 0.5916568264998654, + "learning_rate": 1.6397734323418494e-05, + "loss": 0.1927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19136972725391388, + "step": 2525, + "valid_targets_mean": 3323.2, + "valid_targets_min": 1037 + }, + { + "epoch": 4.223706176961603, + "grad_norm": 0.8355390659981319, + "learning_rate": 1.6315862395193972e-05, + "loss": 0.1587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16667363047599792, + "step": 2530, + "valid_targets_mean": 1949.8, + "valid_targets_min": 887 + }, + { + "epoch": 4.232053422370618, + "grad_norm": 0.8689209760850255, + "learning_rate": 1.6234054322917835e-05, + "loss": 0.1826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20449230074882507, + "step": 2535, + "valid_targets_mean": 2197.4, + "valid_targets_min": 950 + }, + { + "epoch": 4.240400667779633, + "grad_norm": 0.7658079357198947, + "learning_rate": 1.615231152454255e-05, + "loss": 0.1696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15454424917697906, + "step": 2540, + "valid_targets_mean": 2235.7, + "valid_targets_min": 547 + }, + { + "epoch": 4.248747913188648, + "grad_norm": 0.844338405392469, + "learning_rate": 1.6070635416889228e-05, + "loss": 0.1799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18702611327171326, + "step": 2545, + "valid_targets_mean": 1845.4, + "valid_targets_min": 728 + }, + { + "epoch": 4.257095158597663, + "grad_norm": 0.8034921341000566, + "learning_rate": 1.598902741562304e-05, + "loss": 0.1735, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1511102318763733, + "step": 2550, + "valid_targets_mean": 1769.6, + "valid_targets_min": 958 + }, + { + "epoch": 4.265442404006678, + "grad_norm": 0.8547237220801418, + "learning_rate": 1.590748893522871e-05, + "loss": 0.1904, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1948571652173996, + "step": 2555, + "valid_targets_mean": 2333.6, + "valid_targets_min": 929 + }, + { + "epoch": 4.273789649415693, + "grad_norm": 0.8629069589813636, + "learning_rate": 1.5826021388985947e-05, + "loss": 0.1878, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16754035651683807, + "step": 2560, + "valid_targets_mean": 1494.2, + "valid_targets_min": 836 + }, + { + "epoch": 4.282136894824708, + "grad_norm": 0.7403146735207645, + "learning_rate": 1.5744626188945023e-05, + "loss": 0.1642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1810435801744461, + "step": 2565, + "valid_targets_mean": 2366.1, + "valid_targets_min": 842 + }, + { + "epoch": 4.290484140233723, + "grad_norm": 0.6797569126258614, + "learning_rate": 1.5663304745902218e-05, + "loss": 0.1749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15583954751491547, + "step": 2570, + "valid_targets_mean": 2727.9, + "valid_targets_min": 666 + }, + { + "epoch": 4.298831385642738, + "grad_norm": 0.5877960226453054, + "learning_rate": 1.558205846937544e-05, + "loss": 0.1632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15551185607910156, + "step": 2575, + "valid_targets_mean": 3411.2, + "valid_targets_min": 932 + }, + { + "epoch": 4.307178631051753, + "grad_norm": 0.741615218822046, + "learning_rate": 1.5500888767579722e-05, + "loss": 0.1771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1795629858970642, + "step": 2580, + "valid_targets_mean": 2480.2, + "valid_targets_min": 712 + }, + { + "epoch": 4.315525876460768, + "grad_norm": 0.7628211565455336, + "learning_rate": 1.5419797047402897e-05, + "loss": 0.1714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16926071047782898, + "step": 2585, + "valid_targets_mean": 2280.4, + "valid_targets_min": 754 + }, + { + "epoch": 4.323873121869783, + "grad_norm": 0.7388999483238966, + "learning_rate": 1.533878471438113e-05, + "loss": 0.1889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19348183274269104, + "step": 2590, + "valid_targets_mean": 2381.2, + "valid_targets_min": 1017 + }, + { + "epoch": 4.332220367278798, + "grad_norm": 0.7884864288978387, + "learning_rate": 1.5257853172674622e-05, + "loss": 0.1722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19860947132110596, + "step": 2595, + "valid_targets_mean": 2722.5, + "valid_targets_min": 625 + }, + { + "epoch": 4.340567612687813, + "grad_norm": 0.8674405544659288, + "learning_rate": 1.517700382504322e-05, + "loss": 0.1902, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20209550857543945, + "step": 2600, + "valid_targets_mean": 2756.9, + "valid_targets_min": 505 + }, + { + "epoch": 4.348914858096828, + "grad_norm": 0.8455635485526791, + "learning_rate": 1.5096238072822153e-05, + "loss": 0.1889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22444558143615723, + "step": 2605, + "valid_targets_mean": 2094.9, + "valid_targets_min": 893 + }, + { + "epoch": 4.357262103505843, + "grad_norm": 0.7187554593824949, + "learning_rate": 1.5015557315897693e-05, + "loss": 0.1763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17899161577224731, + "step": 2610, + "valid_targets_mean": 2411.4, + "valid_targets_min": 782 + }, + { + "epoch": 4.365609348914858, + "grad_norm": 0.7012500042948445, + "learning_rate": 1.4934962952682943e-05, + "loss": 0.1726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15650507807731628, + "step": 2615, + "valid_targets_mean": 2286.0, + "valid_targets_min": 708 + }, + { + "epoch": 4.373956594323873, + "grad_norm": 0.7136418600160952, + "learning_rate": 1.4854456380093544e-05, + "loss": 0.1844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19611938297748566, + "step": 2620, + "valid_targets_mean": 2849.1, + "valid_targets_min": 749 + }, + { + "epoch": 4.382303839732888, + "grad_norm": 0.6375685432115179, + "learning_rate": 1.4774038993523523e-05, + "loss": 0.1676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16059112548828125, + "step": 2625, + "valid_targets_mean": 2953.4, + "valid_targets_min": 990 + }, + { + "epoch": 4.390651085141903, + "grad_norm": 0.8453867452754014, + "learning_rate": 1.4693712186821039e-05, + "loss": 0.1816, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18165963888168335, + "step": 2630, + "valid_targets_mean": 1884.1, + "valid_targets_min": 738 + }, + { + "epoch": 4.398998330550918, + "grad_norm": 0.7259710450071463, + "learning_rate": 1.4613477352264293e-05, + "loss": 0.1851, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18239548802375793, + "step": 2635, + "valid_targets_mean": 2681.4, + "valid_targets_min": 757 + }, + { + "epoch": 4.407345575959933, + "grad_norm": 0.8138629609968175, + "learning_rate": 1.4533335880537336e-05, + "loss": 0.1788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17651066184043884, + "step": 2640, + "valid_targets_mean": 2010.4, + "valid_targets_min": 945 + }, + { + "epoch": 4.415692821368948, + "grad_norm": 0.6569685325005385, + "learning_rate": 1.4453289160706017e-05, + "loss": 0.171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16272369027137756, + "step": 2645, + "valid_targets_mean": 2699.7, + "valid_targets_min": 872 + }, + { + "epoch": 4.424040066777963, + "grad_norm": 0.667045165519854, + "learning_rate": 1.4373338580193853e-05, + "loss": 0.173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14593958854675293, + "step": 2650, + "valid_targets_mean": 2758.5, + "valid_targets_min": 974 + }, + { + "epoch": 4.432387312186978, + "grad_norm": 0.8246982264820992, + "learning_rate": 1.4293485524758045e-05, + "loss": 0.1815, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19268116354942322, + "step": 2655, + "valid_targets_mean": 2117.9, + "valid_targets_min": 899 + }, + { + "epoch": 4.440734557595993, + "grad_norm": 0.7487672031196141, + "learning_rate": 1.4213731378465379e-05, + "loss": 0.1727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1685689240694046, + "step": 2660, + "valid_targets_mean": 2314.6, + "valid_targets_min": 645 + }, + { + "epoch": 4.449081803005008, + "grad_norm": 0.700420198972114, + "learning_rate": 1.4134077523668327e-05, + "loss": 0.1922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20472325384616852, + "step": 2665, + "valid_targets_mean": 3130.6, + "valid_targets_min": 916 + }, + { + "epoch": 4.457429048414023, + "grad_norm": 0.7954277590596873, + "learning_rate": 1.405452534098101e-05, + "loss": 0.1926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23737061023712158, + "step": 2670, + "valid_targets_mean": 2361.6, + "valid_targets_min": 671 + }, + { + "epoch": 4.465776293823039, + "grad_norm": 0.8079111183378004, + "learning_rate": 1.3975076209255321e-05, + "loss": 0.1716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16911689937114716, + "step": 2675, + "valid_targets_mean": 1777.8, + "valid_targets_min": 816 + }, + { + "epoch": 4.474123539232053, + "grad_norm": 0.6597628112752423, + "learning_rate": 1.3895731505556978e-05, + "loss": 0.1837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17149250209331512, + "step": 2680, + "valid_targets_mean": 3182.1, + "valid_targets_min": 453 + }, + { + "epoch": 4.482470784641069, + "grad_norm": 0.7125400817480053, + "learning_rate": 1.3816492605141712e-05, + "loss": 0.1953, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22295941412448883, + "step": 2685, + "valid_targets_mean": 2685.9, + "valid_targets_min": 573 + }, + { + "epoch": 4.490818030050083, + "grad_norm": 0.7068097619257393, + "learning_rate": 1.3737360881431374e-05, + "loss": 0.1788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15548083186149597, + "step": 2690, + "valid_targets_mean": 2592.1, + "valid_targets_min": 958 + }, + { + "epoch": 4.499165275459099, + "grad_norm": 0.7131140860983529, + "learning_rate": 1.3658337705990185e-05, + "loss": 0.168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15528467297554016, + "step": 2695, + "valid_targets_mean": 2340.9, + "valid_targets_min": 773 + }, + { + "epoch": 4.507512520868113, + "grad_norm": 0.6562872093825353, + "learning_rate": 1.3579424448500901e-05, + "loss": 0.1743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15002676844596863, + "step": 2700, + "valid_targets_mean": 2793.1, + "valid_targets_min": 1235 + }, + { + "epoch": 4.515859766277129, + "grad_norm": 0.7650079636026997, + "learning_rate": 1.3500622476741142e-05, + "loss": 0.1854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16849973797798157, + "step": 2705, + "valid_targets_mean": 2170.2, + "valid_targets_min": 664 + }, + { + "epoch": 4.524207011686143, + "grad_norm": 0.7868169395134512, + "learning_rate": 1.3421933156559605e-05, + "loss": 0.1949, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20308633148670197, + "step": 2710, + "valid_targets_mean": 2012.5, + "valid_targets_min": 712 + }, + { + "epoch": 4.532554257095159, + "grad_norm": 0.719956910883687, + "learning_rate": 1.3343357851852488e-05, + "loss": 0.1661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16637387871742249, + "step": 2715, + "valid_targets_mean": 2267.7, + "valid_targets_min": 701 + }, + { + "epoch": 4.540901502504173, + "grad_norm": 0.7415810532746423, + "learning_rate": 1.3264897924539746e-05, + "loss": 0.1868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17491191625595093, + "step": 2720, + "valid_targets_mean": 2424.7, + "valid_targets_min": 929 + }, + { + "epoch": 4.549248747913189, + "grad_norm": 0.6511631185388485, + "learning_rate": 1.3186554734541574e-05, + "loss": 0.1749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15947668254375458, + "step": 2725, + "valid_targets_mean": 3068.3, + "valid_targets_min": 751 + }, + { + "epoch": 4.557595993322204, + "grad_norm": 0.9227000808193508, + "learning_rate": 1.3108329639754765e-05, + "loss": 0.182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22771112620830536, + "step": 2730, + "valid_targets_mean": 1952.8, + "valid_targets_min": 914 + }, + { + "epoch": 4.565943238731219, + "grad_norm": 0.7927126744203515, + "learning_rate": 1.3030223996029237e-05, + "loss": 0.178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17809253931045532, + "step": 2735, + "valid_targets_mean": 1970.2, + "valid_targets_min": 645 + }, + { + "epoch": 4.574290484140234, + "grad_norm": 0.777900031042274, + "learning_rate": 1.295223915714447e-05, + "loss": 0.1697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14840282499790192, + "step": 2740, + "valid_targets_mean": 1710.5, + "valid_targets_min": 712 + }, + { + "epoch": 4.582637729549249, + "grad_norm": 0.6820773034629997, + "learning_rate": 1.287437647478611e-05, + "loss": 0.1655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14310967922210693, + "step": 2745, + "valid_targets_mean": 2172.4, + "valid_targets_min": 538 + }, + { + "epoch": 4.590984974958264, + "grad_norm": 0.6229884894007276, + "learning_rate": 1.2796637298522466e-05, + "loss": 0.1656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1727065145969391, + "step": 2750, + "valid_targets_mean": 3028.1, + "valid_targets_min": 529 + }, + { + "epoch": 4.599332220367279, + "grad_norm": 0.6038364194577132, + "learning_rate": 1.271902297578119e-05, + "loss": 0.1834, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21104693412780762, + "step": 2755, + "valid_targets_mean": 3364.7, + "valid_targets_min": 390 + }, + { + "epoch": 4.607679465776294, + "grad_norm": 0.7964870049743236, + "learning_rate": 1.2641534851825865e-05, + "loss": 0.1901, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20962056517601013, + "step": 2760, + "valid_targets_mean": 2508.1, + "valid_targets_min": 790 + }, + { + "epoch": 4.616026711185309, + "grad_norm": 0.7710656948800464, + "learning_rate": 1.256417426973272e-05, + "loss": 0.1783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18509964644908905, + "step": 2765, + "valid_targets_mean": 2243.5, + "valid_targets_min": 890 + }, + { + "epoch": 4.624373956594324, + "grad_norm": 0.6691420333793963, + "learning_rate": 1.248694257036735e-05, + "loss": 0.1727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15404322743415833, + "step": 2770, + "valid_targets_mean": 3098.4, + "valid_targets_min": 475 + }, + { + "epoch": 4.632721202003339, + "grad_norm": 0.591134878994522, + "learning_rate": 1.2409841092361457e-05, + "loss": 0.1855, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.198967844247818, + "step": 2775, + "valid_targets_mean": 3817.0, + "valid_targets_min": 596 + }, + { + "epoch": 4.641068447412354, + "grad_norm": 0.9334250080452924, + "learning_rate": 1.233287117208966e-05, + "loss": 0.1765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20634141564369202, + "step": 2780, + "valid_targets_mean": 1813.1, + "valid_targets_min": 432 + }, + { + "epoch": 4.649415692821369, + "grad_norm": 0.6915514898540307, + "learning_rate": 1.2256034143646341e-05, + "loss": 0.1765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16013593971729279, + "step": 2785, + "valid_targets_mean": 2534.8, + "valid_targets_min": 520 + }, + { + "epoch": 4.657762938230384, + "grad_norm": 0.7690312846353624, + "learning_rate": 1.2179331338822498e-05, + "loss": 0.1656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1510334312915802, + "step": 2790, + "valid_targets_mean": 2156.8, + "valid_targets_min": 721 + }, + { + "epoch": 4.666110183639399, + "grad_norm": 0.7433903257586516, + "learning_rate": 1.2102764087082685e-05, + "loss": 0.1834, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17419683933258057, + "step": 2795, + "valid_targets_mean": 2088.9, + "valid_targets_min": 759 + }, + { + "epoch": 4.674457429048414, + "grad_norm": 0.6461669663489145, + "learning_rate": 1.2026333715541959e-05, + "loss": 0.193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1915496289730072, + "step": 2800, + "valid_targets_mean": 3340.2, + "valid_targets_min": 1143 + }, + { + "epoch": 4.682804674457429, + "grad_norm": 0.6523177763616254, + "learning_rate": 1.1950041548942867e-05, + "loss": 0.168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17690129578113556, + "step": 2805, + "valid_targets_mean": 2868.9, + "valid_targets_min": 564 + }, + { + "epoch": 4.691151919866444, + "grad_norm": 0.7960511282080353, + "learning_rate": 1.1873888909632508e-05, + "loss": 0.1888, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1792602241039276, + "step": 2810, + "valid_targets_mean": 1824.0, + "valid_targets_min": 563 + }, + { + "epoch": 4.699499165275459, + "grad_norm": 0.7893666566538475, + "learning_rate": 1.1797877117539587e-05, + "loss": 0.1997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15375570952892303, + "step": 2815, + "valid_targets_mean": 1896.1, + "valid_targets_min": 806 + }, + { + "epoch": 4.707846410684474, + "grad_norm": 0.8217007178733245, + "learning_rate": 1.1722007490151566e-05, + "loss": 0.1697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.158712238073349, + "step": 2820, + "valid_targets_mean": 1936.1, + "valid_targets_min": 965 + }, + { + "epoch": 4.716193656093489, + "grad_norm": 1.0827147315522407, + "learning_rate": 1.16462813424918e-05, + "loss": 0.1732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16912072896957397, + "step": 2825, + "valid_targets_mean": 2437.0, + "valid_targets_min": 662 + }, + { + "epoch": 4.724540901502504, + "grad_norm": 0.8424015476140527, + "learning_rate": 1.1570699987096763e-05, + "loss": 0.1884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17218352854251862, + "step": 2830, + "valid_targets_mean": 1837.6, + "valid_targets_min": 740 + }, + { + "epoch": 4.732888146911519, + "grad_norm": 0.6790398587079144, + "learning_rate": 1.1495264733993288e-05, + "loss": 0.1664, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15455222129821777, + "step": 2835, + "valid_targets_mean": 2666.3, + "valid_targets_min": 679 + }, + { + "epoch": 4.741235392320534, + "grad_norm": 0.854374154720206, + "learning_rate": 1.141997689067587e-05, + "loss": 0.1812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17885088920593262, + "step": 2840, + "valid_targets_mean": 2102.4, + "valid_targets_min": 736 + }, + { + "epoch": 4.749582637729549, + "grad_norm": 0.7389911644705557, + "learning_rate": 1.1344837762083997e-05, + "loss": 0.1651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1637381613254547, + "step": 2845, + "valid_targets_mean": 2150.2, + "valid_targets_min": 1107 + }, + { + "epoch": 4.757929883138564, + "grad_norm": 0.7772270526448651, + "learning_rate": 1.1269848650579532e-05, + "loss": 0.1665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1764950156211853, + "step": 2850, + "valid_targets_mean": 2228.4, + "valid_targets_min": 840 + }, + { + "epoch": 4.766277128547579, + "grad_norm": 0.7629040715420716, + "learning_rate": 1.1195010855924138e-05, + "loss": 0.1851, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16333161294460297, + "step": 2855, + "valid_targets_mean": 2036.3, + "valid_targets_min": 834 + }, + { + "epoch": 4.774624373956595, + "grad_norm": 0.8626015140881829, + "learning_rate": 1.112032567525676e-05, + "loss": 0.1909, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2554243505001068, + "step": 2860, + "valid_targets_mean": 2369.2, + "valid_targets_min": 556 + }, + { + "epoch": 4.782971619365609, + "grad_norm": 0.8413855471638249, + "learning_rate": 1.1045794403071133e-05, + "loss": 0.1822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16166135668754578, + "step": 2865, + "valid_targets_mean": 2081.1, + "valid_targets_min": 742 + }, + { + "epoch": 4.791318864774624, + "grad_norm": 0.9377825257518903, + "learning_rate": 1.0971418331193337e-05, + "loss": 0.1665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.176789790391922, + "step": 2870, + "valid_targets_mean": 1710.5, + "valid_targets_min": 453 + }, + { + "epoch": 4.799666110183639, + "grad_norm": 0.7052539968758408, + "learning_rate": 1.0897198748759435e-05, + "loss": 0.1801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1970149576663971, + "step": 2875, + "valid_targets_mean": 2905.1, + "valid_targets_min": 572 + }, + { + "epoch": 4.808013355592655, + "grad_norm": 0.6944260382589246, + "learning_rate": 1.0823136942193089e-05, + "loss": 0.1953, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16842995584011078, + "step": 2880, + "valid_targets_mean": 2631.3, + "valid_targets_min": 725 + }, + { + "epoch": 4.816360601001669, + "grad_norm": 0.7946388628378875, + "learning_rate": 1.07492341951833e-05, + "loss": 0.176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18271085619926453, + "step": 2885, + "valid_targets_mean": 2143.5, + "valid_targets_min": 578 + }, + { + "epoch": 4.824707846410685, + "grad_norm": 0.6553010955211231, + "learning_rate": 1.0675491788662132e-05, + "loss": 0.1958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2044571042060852, + "step": 2890, + "valid_targets_mean": 2942.9, + "valid_targets_min": 840 + }, + { + "epoch": 4.833055091819699, + "grad_norm": 0.7053451080860554, + "learning_rate": 1.060191100078252e-05, + "loss": 0.18, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1576908975839615, + "step": 2895, + "valid_targets_mean": 2508.3, + "valid_targets_min": 632 + }, + { + "epoch": 4.841402337228715, + "grad_norm": 0.7479367517717694, + "learning_rate": 1.0528493106896126e-05, + "loss": 0.1782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1778864860534668, + "step": 2900, + "valid_targets_mean": 2387.6, + "valid_targets_min": 845 + }, + { + "epoch": 4.849749582637729, + "grad_norm": 0.7206203061881542, + "learning_rate": 1.0455239379531213e-05, + "loss": 0.1808, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17698757350444794, + "step": 2905, + "valid_targets_mean": 2898.8, + "valid_targets_min": 922 + }, + { + "epoch": 4.858096828046745, + "grad_norm": 0.9036743866964919, + "learning_rate": 1.0382151088370605e-05, + "loss": 0.1886, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2069159746170044, + "step": 2910, + "valid_targets_mean": 1690.0, + "valid_targets_min": 661 + }, + { + "epoch": 4.866444073455759, + "grad_norm": 0.769440780037667, + "learning_rate": 1.0309229500229665e-05, + "loss": 0.1723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16133229434490204, + "step": 2915, + "valid_targets_mean": 1955.2, + "valid_targets_min": 528 + }, + { + "epoch": 4.874791318864775, + "grad_norm": 0.7929474438600562, + "learning_rate": 1.023647587903438e-05, + "loss": 0.1828, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16340085864067078, + "step": 2920, + "valid_targets_mean": 1919.8, + "valid_targets_min": 695 + }, + { + "epoch": 4.883138564273789, + "grad_norm": 0.9909614135494529, + "learning_rate": 1.0163891485799362e-05, + "loss": 0.184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20314660668373108, + "step": 2925, + "valid_targets_mean": 2611.6, + "valid_targets_min": 834 + }, + { + "epoch": 4.891485809682805, + "grad_norm": 0.7904652917571813, + "learning_rate": 1.0091477578606121e-05, + "loss": 0.1687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20079383254051208, + "step": 2930, + "valid_targets_mean": 2121.3, + "valid_targets_min": 991 + }, + { + "epoch": 4.89983305509182, + "grad_norm": 0.8061641761629065, + "learning_rate": 1.0019235412581117e-05, + "loss": 0.1812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16205865144729614, + "step": 2935, + "valid_targets_mean": 2475.9, + "valid_targets_min": 507 + }, + { + "epoch": 4.908180300500835, + "grad_norm": 0.7427989253983776, + "learning_rate": 9.947166239874144e-06, + "loss": 0.173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15285256505012512, + "step": 2940, + "valid_targets_mean": 1979.8, + "valid_targets_min": 501 + }, + { + "epoch": 4.91652754590985, + "grad_norm": 0.7206537775313828, + "learning_rate": 9.875271309636498e-06, + "loss": 0.1764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17327359318733215, + "step": 2945, + "valid_targets_mean": 2575.5, + "valid_targets_min": 731 + }, + { + "epoch": 4.924874791318865, + "grad_norm": 0.7962913580737736, + "learning_rate": 9.803551867999445e-06, + "loss": 0.2114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18946567177772522, + "step": 2950, + "valid_targets_mean": 2189.0, + "valid_targets_min": 441 + }, + { + "epoch": 4.93322203672788, + "grad_norm": 0.7413328590892105, + "learning_rate": 9.732009158052508e-06, + "loss": 0.1819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1814548671245575, + "step": 2955, + "valid_targets_mean": 2174.3, + "valid_targets_min": 1139 + }, + { + "epoch": 4.941569282136895, + "grad_norm": 0.9343196174855941, + "learning_rate": 9.660644419822037e-06, + "loss": 0.1966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22384901344776154, + "step": 2960, + "valid_targets_mean": 1787.4, + "valid_targets_min": 703 + }, + { + "epoch": 4.94991652754591, + "grad_norm": 0.7501622442184374, + "learning_rate": 9.589458890249595e-06, + "loss": 0.1975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20376619696617126, + "step": 2965, + "valid_targets_mean": 2312.8, + "valid_targets_min": 849 + }, + { + "epoch": 4.958263772954925, + "grad_norm": 0.7496144594045819, + "learning_rate": 9.518453803170637e-06, + "loss": 0.1846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19321897625923157, + "step": 2970, + "valid_targets_mean": 2504.2, + "valid_targets_min": 509 + }, + { + "epoch": 4.96661101836394, + "grad_norm": 0.7676352471740494, + "learning_rate": 9.447630389293017e-06, + "loss": 0.1931, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18893374502658844, + "step": 2975, + "valid_targets_mean": 2038.6, + "valid_targets_min": 725 + }, + { + "epoch": 4.974958263772955, + "grad_norm": 0.7331032071687464, + "learning_rate": 9.37698987617576e-06, + "loss": 0.1887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20503410696983337, + "step": 2980, + "valid_targets_mean": 2903.5, + "valid_targets_min": 782 + }, + { + "epoch": 4.98330550918197, + "grad_norm": 0.7328757864971213, + "learning_rate": 9.306533488207671e-06, + "loss": 0.1766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19836439192295074, + "step": 2985, + "valid_targets_mean": 2694.4, + "valid_targets_min": 825 + }, + { + "epoch": 4.9916527545909855, + "grad_norm": 0.6540351800954352, + "learning_rate": 9.236262446586239e-06, + "loss": 0.1892, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18135029077529907, + "step": 2990, + "valid_targets_mean": 3412.8, + "valid_targets_min": 1251 + }, + { + "epoch": 5.0, + "grad_norm": 0.7590428320185313, + "learning_rate": 9.166177969296343e-06, + "loss": 0.1865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15430757403373718, + "step": 2995, + "valid_targets_mean": 1744.5, + "valid_targets_min": 653 + }, + { + "epoch": 5.008347245409015, + "grad_norm": 0.7520309827518988, + "learning_rate": 9.096281271089264e-06, + "loss": 0.1798, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1860939860343933, + "step": 3000, + "valid_targets_mean": 2086.6, + "valid_targets_min": 736 + }, + { + "epoch": 5.01669449081803, + "grad_norm": 0.7450856605743555, + "learning_rate": 9.02657356346151e-06, + "loss": 0.149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16095194220542908, + "step": 3005, + "valid_targets_mean": 2473.5, + "valid_targets_min": 635 + }, + { + "epoch": 5.025041736227045, + "grad_norm": 0.8484840850461908, + "learning_rate": 8.957056054633934e-06, + "loss": 0.1486, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14853060245513916, + "step": 3010, + "valid_targets_mean": 2099.6, + "valid_targets_min": 706 + }, + { + "epoch": 5.03338898163606, + "grad_norm": 0.7546355690002501, + "learning_rate": 8.887729949530682e-06, + "loss": 0.1482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14556968212127686, + "step": 3015, + "valid_targets_mean": 2353.9, + "valid_targets_min": 757 + }, + { + "epoch": 5.041736227045075, + "grad_norm": 0.7422805360013857, + "learning_rate": 8.818596449758416e-06, + "loss": 0.1647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19636158645153046, + "step": 3020, + "valid_targets_mean": 3145.4, + "valid_targets_min": 919 + }, + { + "epoch": 5.05008347245409, + "grad_norm": 0.918132548475914, + "learning_rate": 8.749656753585379e-06, + "loss": 0.1667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17617198824882507, + "step": 3025, + "valid_targets_mean": 2369.6, + "valid_targets_min": 1023 + }, + { + "epoch": 5.058430717863105, + "grad_norm": 0.6567541695583812, + "learning_rate": 8.680912055920734e-06, + "loss": 0.1549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1814095377922058, + "step": 3030, + "valid_targets_mean": 3056.4, + "valid_targets_min": 708 + }, + { + "epoch": 5.06677796327212, + "grad_norm": 0.7999183239590794, + "learning_rate": 8.612363548293744e-06, + "loss": 0.15, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14865170419216156, + "step": 3035, + "valid_targets_mean": 2286.2, + "valid_targets_min": 663 + }, + { + "epoch": 5.075125208681135, + "grad_norm": 0.6773391507442554, + "learning_rate": 8.54401241883322e-06, + "loss": 0.1592, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1260058879852295, + "step": 3040, + "valid_targets_mean": 3038.4, + "valid_targets_min": 1184 + }, + { + "epoch": 5.08347245409015, + "grad_norm": 0.719524406758551, + "learning_rate": 8.475859852246854e-06, + "loss": 0.1614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16532570123672485, + "step": 3045, + "valid_targets_mean": 2690.9, + "valid_targets_min": 578 + }, + { + "epoch": 5.091819699499165, + "grad_norm": 0.8971852588300996, + "learning_rate": 8.407907029800732e-06, + "loss": 0.1641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16092701256275177, + "step": 3050, + "valid_targets_mean": 1821.4, + "valid_targets_min": 825 + }, + { + "epoch": 5.10016694490818, + "grad_norm": 0.6078960948092229, + "learning_rate": 8.340155129298824e-06, + "loss": 0.1801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23741009831428528, + "step": 3055, + "valid_targets_mean": 4034.4, + "valid_targets_min": 690 + }, + { + "epoch": 5.108514190317195, + "grad_norm": 0.7882414846730996, + "learning_rate": 8.272605325062595e-06, + "loss": 0.1596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15722253918647766, + "step": 3060, + "valid_targets_mean": 2323.4, + "valid_targets_min": 504 + }, + { + "epoch": 5.116861435726211, + "grad_norm": 0.9098279022892684, + "learning_rate": 8.205258787910636e-06, + "loss": 0.1521, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17256811261177063, + "step": 3065, + "valid_targets_mean": 2206.5, + "valid_targets_min": 608 + }, + { + "epoch": 5.125208681135225, + "grad_norm": 0.7240707093959398, + "learning_rate": 8.138116685138386e-06, + "loss": 0.1588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17344221472740173, + "step": 3070, + "valid_targets_mean": 2538.3, + "valid_targets_min": 690 + }, + { + "epoch": 5.133555926544241, + "grad_norm": 0.7492785353099531, + "learning_rate": 8.07118018049788e-06, + "loss": 0.1438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14222240447998047, + "step": 3075, + "valid_targets_mean": 2094.6, + "valid_targets_min": 1074 + }, + { + "epoch": 5.141903171953255, + "grad_norm": 0.6687999817722378, + "learning_rate": 8.00445043417759e-06, + "loss": 0.1502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12739010155200958, + "step": 3080, + "valid_targets_mean": 3014.4, + "valid_targets_min": 960 + }, + { + "epoch": 5.150250417362271, + "grad_norm": 0.7476929157644402, + "learning_rate": 7.93792860278232e-06, + "loss": 0.1516, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13643479347229004, + "step": 3085, + "valid_targets_mean": 2542.9, + "valid_targets_min": 918 + }, + { + "epoch": 5.158597662771285, + "grad_norm": 0.7050392143330814, + "learning_rate": 7.871615839313147e-06, + "loss": 0.1528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13219505548477173, + "step": 3090, + "valid_targets_mean": 2729.2, + "valid_targets_min": 686 + }, + { + "epoch": 5.166944908180301, + "grad_norm": 0.8051585606978793, + "learning_rate": 7.805513293147441e-06, + "loss": 0.1468, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13955295085906982, + "step": 3095, + "valid_targets_mean": 2180.2, + "valid_targets_min": 628 + }, + { + "epoch": 5.175292153589315, + "grad_norm": 0.6502938338011637, + "learning_rate": 7.739622110018951e-06, + "loss": 0.1608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16826727986335754, + "step": 3100, + "valid_targets_mean": 2908.3, + "valid_targets_min": 787 + }, + { + "epoch": 5.183639398998331, + "grad_norm": 1.194021088514348, + "learning_rate": 7.673943431997935e-06, + "loss": 0.1797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1977127492427826, + "step": 3105, + "valid_targets_mean": 1920.2, + "valid_targets_min": 956 + }, + { + "epoch": 5.191986644407345, + "grad_norm": 0.667375141409639, + "learning_rate": 7.608478397471366e-06, + "loss": 0.1621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2175963819026947, + "step": 3110, + "valid_targets_mean": 3179.4, + "valid_targets_min": 947 + }, + { + "epoch": 5.200333889816361, + "grad_norm": 0.6177024661765164, + "learning_rate": 7.543228141123217e-06, + "loss": 0.1515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1563173234462738, + "step": 3115, + "valid_targets_mean": 3358.5, + "valid_targets_min": 855 + }, + { + "epoch": 5.208681135225375, + "grad_norm": 0.8206104171437563, + "learning_rate": 7.478193793914767e-06, + "loss": 0.1642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17827922105789185, + "step": 3120, + "valid_targets_mean": 2144.1, + "valid_targets_min": 1083 + }, + { + "epoch": 5.217028380634391, + "grad_norm": 0.6762054405691768, + "learning_rate": 7.4133764830650246e-06, + "loss": 0.149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13363927602767944, + "step": 3125, + "valid_targets_mean": 2762.0, + "valid_targets_min": 727 + }, + { + "epoch": 5.225375626043405, + "grad_norm": 0.791354259359337, + "learning_rate": 7.348777332031168e-06, + "loss": 0.1507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14695990085601807, + "step": 3130, + "valid_targets_mean": 2506.4, + "valid_targets_min": 703 + }, + { + "epoch": 5.233722871452421, + "grad_norm": 0.7300958398056937, + "learning_rate": 7.28439746048909e-06, + "loss": 0.171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15373843908309937, + "step": 3135, + "valid_targets_mean": 3188.4, + "valid_targets_min": 507 + }, + { + "epoch": 5.242070116861436, + "grad_norm": 0.7362890272757675, + "learning_rate": 7.22023798431398e-06, + "loss": 0.1441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16128788888454437, + "step": 3140, + "valid_targets_mean": 2276.9, + "valid_targets_min": 887 + }, + { + "epoch": 5.250417362270451, + "grad_norm": 0.8829952521688355, + "learning_rate": 7.156300015560993e-06, + "loss": 0.1669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1735071986913681, + "step": 3145, + "valid_targets_mean": 2579.6, + "valid_targets_min": 871 + }, + { + "epoch": 5.258764607679466, + "grad_norm": 0.7900174807469927, + "learning_rate": 7.092584662445958e-06, + "loss": 0.1526, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14157384634017944, + "step": 3150, + "valid_targets_mean": 2176.6, + "valid_targets_min": 912 + }, + { + "epoch": 5.267111853088481, + "grad_norm": 0.9226995287050941, + "learning_rate": 7.029093029326191e-06, + "loss": 0.1705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1748785674571991, + "step": 3155, + "valid_targets_mean": 1679.8, + "valid_targets_min": 907 + }, + { + "epoch": 5.275459098497496, + "grad_norm": 0.8208375749055641, + "learning_rate": 6.965826216681337e-06, + "loss": 0.1577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17080628871917725, + "step": 3160, + "valid_targets_mean": 2303.8, + "valid_targets_min": 438 + }, + { + "epoch": 5.283806343906511, + "grad_norm": 0.8303410171170952, + "learning_rate": 6.902785321094301e-06, + "loss": 0.1666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15549474954605103, + "step": 3165, + "valid_targets_mean": 2127.9, + "valid_targets_min": 637 + }, + { + "epoch": 5.292153589315526, + "grad_norm": 0.7116830217761911, + "learning_rate": 6.8399714352322424e-06, + "loss": 0.1655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1497003734111786, + "step": 3170, + "valid_targets_mean": 3227.2, + "valid_targets_min": 653 + }, + { + "epoch": 5.300500834724541, + "grad_norm": 0.7469096804819885, + "learning_rate": 6.777385647827639e-06, + "loss": 0.1488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16166070103645325, + "step": 3175, + "valid_targets_mean": 2447.1, + "valid_targets_min": 934 + }, + { + "epoch": 5.308848080133556, + "grad_norm": 0.6464577777865734, + "learning_rate": 6.715029043659409e-06, + "loss": 0.1485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11982201039791107, + "step": 3180, + "valid_targets_mean": 2805.0, + "valid_targets_min": 1081 + }, + { + "epoch": 5.317195325542571, + "grad_norm": 0.9053606512911229, + "learning_rate": 6.652902703534114e-06, + "loss": 0.159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17174623906612396, + "step": 3185, + "valid_targets_mean": 2097.7, + "valid_targets_min": 881 + }, + { + "epoch": 5.325542570951586, + "grad_norm": 0.890630614053283, + "learning_rate": 6.5910077042672246e-06, + "loss": 0.1605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15030130743980408, + "step": 3190, + "valid_targets_mean": 1551.0, + "valid_targets_min": 568 + }, + { + "epoch": 5.333889816360601, + "grad_norm": 0.7198405000575665, + "learning_rate": 6.5293451186644566e-06, + "loss": 0.1396, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16081032156944275, + "step": 3195, + "valid_targets_mean": 3201.9, + "valid_targets_min": 924 + }, + { + "epoch": 5.342237061769616, + "grad_norm": 0.8286468409130533, + "learning_rate": 6.467916015503173e-06, + "loss": 0.1528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15238681435585022, + "step": 3200, + "valid_targets_mean": 1958.5, + "valid_targets_min": 966 + }, + { + "epoch": 5.350584307178631, + "grad_norm": 0.8352977703843605, + "learning_rate": 6.406721459513865e-06, + "loss": 0.1614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15999573469161987, + "step": 3205, + "valid_targets_mean": 2182.2, + "valid_targets_min": 884 + }, + { + "epoch": 5.358931552587646, + "grad_norm": 0.7536279467007496, + "learning_rate": 6.3457625113616995e-06, + "loss": 0.1783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26952141523361206, + "step": 3210, + "valid_targets_mean": 3350.1, + "valid_targets_min": 1139 + }, + { + "epoch": 5.367278797996661, + "grad_norm": 0.8009881431527814, + "learning_rate": 6.2850402276281184e-06, + "loss": 0.1518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1792518049478531, + "step": 3215, + "valid_targets_mean": 2610.6, + "valid_targets_min": 1126 + }, + { + "epoch": 5.375626043405676, + "grad_norm": 0.8268963955933865, + "learning_rate": 6.224555660792546e-06, + "loss": 0.1474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14128682017326355, + "step": 3220, + "valid_targets_mean": 1785.2, + "valid_targets_min": 967 + }, + { + "epoch": 5.383973288814691, + "grad_norm": 0.8578626786933801, + "learning_rate": 6.164309859214135e-06, + "loss": 0.1591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16973763704299927, + "step": 3225, + "valid_targets_mean": 2057.1, + "valid_targets_min": 854 + }, + { + "epoch": 5.392320534223706, + "grad_norm": 0.8833553458695893, + "learning_rate": 6.104303867113599e-06, + "loss": 0.1636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18344926834106445, + "step": 3230, + "valid_targets_mean": 2609.9, + "valid_targets_min": 525 + }, + { + "epoch": 5.400667779632721, + "grad_norm": 0.9329249622574668, + "learning_rate": 6.044538724555109e-06, + "loss": 0.1696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1783263087272644, + "step": 3235, + "valid_targets_mean": 1974.5, + "valid_targets_min": 754 + }, + { + "epoch": 5.409015025041736, + "grad_norm": 0.7246943548888799, + "learning_rate": 5.9850154674282766e-06, + "loss": 0.1597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21516211330890656, + "step": 3240, + "valid_targets_mean": 3427.4, + "valid_targets_min": 603 + }, + { + "epoch": 5.417362270450751, + "grad_norm": 0.7088231725177259, + "learning_rate": 5.925735127430186e-06, + "loss": 0.1508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13976027071475983, + "step": 3245, + "valid_targets_mean": 2651.9, + "valid_targets_min": 906 + }, + { + "epoch": 5.425709515859766, + "grad_norm": 0.7196705764648943, + "learning_rate": 5.866698732047522e-06, + "loss": 0.1867, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17780493199825287, + "step": 3250, + "valid_targets_mean": 3137.8, + "valid_targets_min": 820 + }, + { + "epoch": 5.434056761268781, + "grad_norm": 1.008081924375484, + "learning_rate": 5.8079073045387555e-06, + "loss": 0.1708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16477034986019135, + "step": 3255, + "valid_targets_mean": 1668.0, + "valid_targets_min": 738 + }, + { + "epoch": 5.442404006677796, + "grad_norm": 0.5624799181590624, + "learning_rate": 5.7493618639164115e-06, + "loss": 0.1687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20196178555488586, + "step": 3260, + "valid_targets_mean": 3896.6, + "valid_targets_min": 637 + }, + { + "epoch": 5.450751252086811, + "grad_norm": 0.8613753272442412, + "learning_rate": 5.691063424929406e-06, + "loss": 0.158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1626051962375641, + "step": 3265, + "valid_targets_mean": 1722.1, + "valid_targets_min": 677 + }, + { + "epoch": 5.459098497495827, + "grad_norm": 0.8179273751481095, + "learning_rate": 5.633012998045451e-06, + "loss": 0.1527, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17018984258174896, + "step": 3270, + "valid_targets_mean": 2636.1, + "valid_targets_min": 433 + }, + { + "epoch": 5.467445742904841, + "grad_norm": 0.7311272409154818, + "learning_rate": 5.575211589433554e-06, + "loss": 0.1513, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15486940741539001, + "step": 3275, + "valid_targets_mean": 2879.6, + "valid_targets_min": 1016 + }, + { + "epoch": 5.475792988313857, + "grad_norm": 0.8351281020533409, + "learning_rate": 5.517660200946562e-06, + "loss": 0.1492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16601449251174927, + "step": 3280, + "valid_targets_mean": 2243.8, + "valid_targets_min": 705 + }, + { + "epoch": 5.484140233722871, + "grad_norm": 0.7764436479802024, + "learning_rate": 5.4603598301038145e-06, + "loss": 0.1417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13754409551620483, + "step": 3285, + "valid_targets_mean": 2506.4, + "valid_targets_min": 941 + }, + { + "epoch": 5.492487479131887, + "grad_norm": 0.7465683826971206, + "learning_rate": 5.4033114700738375e-06, + "loss": 0.1498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13822892308235168, + "step": 3290, + "valid_targets_mean": 2512.8, + "valid_targets_min": 799 + }, + { + "epoch": 5.500834724540901, + "grad_norm": 0.7669762719839254, + "learning_rate": 5.346516109657136e-06, + "loss": 0.1585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1455293893814087, + "step": 3295, + "valid_targets_mean": 2365.6, + "valid_targets_min": 750 + }, + { + "epoch": 5.509181969949917, + "grad_norm": 0.6820821328169009, + "learning_rate": 5.289974733269063e-06, + "loss": 0.1596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14104554057121277, + "step": 3300, + "valid_targets_mean": 3066.8, + "valid_targets_min": 855 + }, + { + "epoch": 5.517529215358931, + "grad_norm": 0.6275599910230133, + "learning_rate": 5.233688320922741e-06, + "loss": 0.1484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1358412504196167, + "step": 3305, + "valid_targets_mean": 3737.1, + "valid_targets_min": 929 + }, + { + "epoch": 5.525876460767947, + "grad_norm": 0.7641761073708837, + "learning_rate": 5.177657848212092e-06, + "loss": 0.1734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16747067868709564, + "step": 3310, + "valid_targets_mean": 3085.7, + "valid_targets_min": 1041 + }, + { + "epoch": 5.534223706176961, + "grad_norm": 0.7914093785803261, + "learning_rate": 5.1218842862949115e-06, + "loss": 0.1521, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1578901708126068, + "step": 3315, + "valid_targets_mean": 2408.4, + "valid_targets_min": 1051 + }, + { + "epoch": 5.542570951585977, + "grad_norm": 0.7118185476620853, + "learning_rate": 5.066368601876048e-06, + "loss": 0.177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20359279215335846, + "step": 3320, + "valid_targets_mean": 2890.9, + "valid_targets_min": 612 + }, + { + "epoch": 5.550918196994992, + "grad_norm": 0.7687370906920561, + "learning_rate": 5.011111757190661e-06, + "loss": 0.1525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18415136635303497, + "step": 3325, + "valid_targets_mean": 2670.1, + "valid_targets_min": 971 + }, + { + "epoch": 5.559265442404007, + "grad_norm": 0.8552425365257644, + "learning_rate": 4.956114709987488e-06, + "loss": 0.1604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16587582230567932, + "step": 3330, + "valid_targets_mean": 2253.2, + "valid_targets_min": 1069 + }, + { + "epoch": 5.567612687813021, + "grad_norm": 0.8136795110704207, + "learning_rate": 4.901378413512325e-06, + "loss": 0.1706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17664243280887604, + "step": 3335, + "valid_targets_mean": 2525.9, + "valid_targets_min": 482 + }, + { + "epoch": 5.575959933222037, + "grad_norm": 0.7657322803911494, + "learning_rate": 4.846903816491419e-06, + "loss": 0.1451, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1563817262649536, + "step": 3340, + "valid_targets_mean": 2847.3, + "valid_targets_min": 1094 + }, + { + "epoch": 5.584307178631052, + "grad_norm": 0.8733061315249282, + "learning_rate": 4.792691863115113e-06, + "loss": 0.1813, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17923319339752197, + "step": 3345, + "valid_targets_mean": 1898.2, + "valid_targets_min": 847 + }, + { + "epoch": 5.592654424040067, + "grad_norm": 0.67940653982166, + "learning_rate": 4.738743493021383e-06, + "loss": 0.164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1424332708120346, + "step": 3350, + "valid_targets_mean": 2634.2, + "valid_targets_min": 801 + }, + { + "epoch": 5.601001669449082, + "grad_norm": 0.8828332955807119, + "learning_rate": 4.68505964127965e-06, + "loss": 0.1644, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17155790328979492, + "step": 3355, + "valid_targets_mean": 2005.6, + "valid_targets_min": 538 + }, + { + "epoch": 5.609348914858097, + "grad_norm": 0.6796336949437728, + "learning_rate": 4.631641238374482e-06, + "loss": 0.1617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16817662119865417, + "step": 3360, + "valid_targets_mean": 3273.6, + "valid_targets_min": 835 + }, + { + "epoch": 5.617696160267112, + "grad_norm": 0.6784010851999314, + "learning_rate": 4.578489210189554e-06, + "loss": 0.1489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11624115705490112, + "step": 3365, + "valid_targets_mean": 2313.6, + "valid_targets_min": 978 + }, + { + "epoch": 5.626043405676127, + "grad_norm": 0.7143366967890289, + "learning_rate": 4.525604477991505e-06, + "loss": 0.17, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13993963599205017, + "step": 3370, + "valid_targets_mean": 2707.2, + "valid_targets_min": 301 + }, + { + "epoch": 5.634390651085142, + "grad_norm": 0.8720061547761272, + "learning_rate": 4.472987958414077e-06, + "loss": 0.1789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21348363161087036, + "step": 3375, + "valid_targets_mean": 2037.7, + "valid_targets_min": 715 + }, + { + "epoch": 5.642737896494157, + "grad_norm": 1.0475625543897096, + "learning_rate": 4.420640563442111e-06, + "loss": 0.1468, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14963243901729584, + "step": 3380, + "valid_targets_mean": 2477.6, + "valid_targets_min": 712 + }, + { + "epoch": 5.651085141903172, + "grad_norm": 0.8107519973170887, + "learning_rate": 4.368563200395859e-06, + "loss": 0.1622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1464444398880005, + "step": 3385, + "valid_targets_mean": 1847.6, + "valid_targets_min": 573 + }, + { + "epoch": 5.659432387312187, + "grad_norm": 0.8581055639161416, + "learning_rate": 4.31675677191514e-06, + "loss": 0.1602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16239197552204132, + "step": 3390, + "valid_targets_mean": 2123.2, + "valid_targets_min": 834 + }, + { + "epoch": 5.667779632721202, + "grad_norm": 0.941010389190145, + "learning_rate": 4.265222175943808e-06, + "loss": 0.1764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22993087768554688, + "step": 3395, + "valid_targets_mean": 2226.4, + "valid_targets_min": 632 + }, + { + "epoch": 5.676126878130217, + "grad_norm": 0.7743509613880359, + "learning_rate": 4.213960305714082e-06, + "loss": 0.1651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16907739639282227, + "step": 3400, + "valid_targets_mean": 2501.2, + "valid_targets_min": 449 + }, + { + "epoch": 5.684474123539232, + "grad_norm": 0.8429009936208021, + "learning_rate": 4.162972049731164e-06, + "loss": 0.1563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15070652961730957, + "step": 3405, + "valid_targets_mean": 2058.0, + "valid_targets_min": 857 + }, + { + "epoch": 5.692821368948247, + "grad_norm": 0.8554399683453996, + "learning_rate": 4.112258291757747e-06, + "loss": 0.1547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18287883698940277, + "step": 3410, + "valid_targets_mean": 2382.9, + "valid_targets_min": 872 + }, + { + "epoch": 5.701168614357262, + "grad_norm": 0.8566823129381648, + "learning_rate": 4.061819910798777e-06, + "loss": 0.1583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17844383418560028, + "step": 3415, + "valid_targets_mean": 2137.6, + "valid_targets_min": 705 + }, + { + "epoch": 5.709515859766277, + "grad_norm": 0.9097702275635593, + "learning_rate": 4.01165778108614e-06, + "loss": 0.1493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16221177577972412, + "step": 3420, + "valid_targets_mean": 2094.6, + "valid_targets_min": 614 + }, + { + "epoch": 5.717863105175292, + "grad_norm": 0.7036808144814012, + "learning_rate": 3.961772772063599e-06, + "loss": 0.1706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17053760588169098, + "step": 3425, + "valid_targets_mean": 2376.1, + "valid_targets_min": 1027 + }, + { + "epoch": 5.726210350584307, + "grad_norm": 0.7590311318391576, + "learning_rate": 3.912165748371626e-06, + "loss": 0.1596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16006188094615936, + "step": 3430, + "valid_targets_mean": 2836.8, + "valid_targets_min": 661 + }, + { + "epoch": 5.734557595993322, + "grad_norm": 0.9299011851422595, + "learning_rate": 3.8628375698325045e-06, + "loss": 0.1589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1849474459886551, + "step": 3435, + "valid_targets_mean": 1898.8, + "valid_targets_min": 680 + }, + { + "epoch": 5.742904841402337, + "grad_norm": 0.7986437795279827, + "learning_rate": 3.8137890914353535e-06, + "loss": 0.1618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16792498528957367, + "step": 3440, + "valid_targets_mean": 2337.0, + "valid_targets_min": 617 + }, + { + "epoch": 5.751252086811352, + "grad_norm": 0.6722159467229356, + "learning_rate": 3.765021163321374e-06, + "loss": 0.1508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12963595986366272, + "step": 3445, + "valid_targets_mean": 2810.4, + "valid_targets_min": 778 + }, + { + "epoch": 5.759599332220367, + "grad_norm": 0.7946305155772581, + "learning_rate": 3.7165346307690466e-06, + "loss": 0.169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15621204674243927, + "step": 3450, + "valid_targets_mean": 2033.2, + "valid_targets_min": 1111 + }, + { + "epoch": 5.767946577629383, + "grad_norm": 0.7643828837602218, + "learning_rate": 3.6683303341795483e-06, + "loss": 0.1737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14357686042785645, + "step": 3455, + "valid_targets_mean": 2984.0, + "valid_targets_min": 905 + }, + { + "epoch": 5.776293823038397, + "grad_norm": 1.0355495916054631, + "learning_rate": 3.6204091090621176e-06, + "loss": 0.159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16833853721618652, + "step": 3460, + "valid_targets_mean": 1626.7, + "valid_targets_min": 970 + }, + { + "epoch": 5.784641068447412, + "grad_norm": 0.7378784553327359, + "learning_rate": 3.572771786019649e-06, + "loss": 0.1719, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.142981618642807, + "step": 3465, + "valid_targets_mean": 2791.6, + "valid_targets_min": 711 + }, + { + "epoch": 5.792988313856427, + "grad_norm": 0.6602657193448223, + "learning_rate": 3.5254191907342117e-06, + "loss": 0.1464, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14098285138607025, + "step": 3470, + "valid_targets_mean": 2716.7, + "valid_targets_min": 632 + }, + { + "epoch": 5.801335559265443, + "grad_norm": 0.7778140268165555, + "learning_rate": 3.4783521439528233e-06, + "loss": 0.1599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15775492787361145, + "step": 3475, + "valid_targets_mean": 2488.4, + "valid_targets_min": 1012 + }, + { + "epoch": 5.809682804674457, + "grad_norm": 0.8527334984195997, + "learning_rate": 3.4315714614731467e-06, + "loss": 0.1769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15102088451385498, + "step": 3480, + "valid_targets_mean": 2576.1, + "valid_targets_min": 1006 + }, + { + "epoch": 5.818030050083473, + "grad_norm": 0.7483617870763603, + "learning_rate": 3.385077954129421e-06, + "loss": 0.1617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16600114107131958, + "step": 3485, + "valid_targets_mean": 2965.2, + "valid_targets_min": 1004 + }, + { + "epoch": 5.826377295492487, + "grad_norm": 0.7226573146652634, + "learning_rate": 3.3388724277783347e-06, + "loss": 0.1708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14855024218559265, + "step": 3490, + "valid_targets_mean": 2656.9, + "valid_targets_min": 564 + }, + { + "epoch": 5.834724540901503, + "grad_norm": 0.8221857990345114, + "learning_rate": 3.292955683285135e-06, + "loss": 0.1665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1467684954404831, + "step": 3495, + "valid_targets_mean": 2082.8, + "valid_targets_min": 522 + }, + { + "epoch": 5.843071786310517, + "grad_norm": 0.7230569578324877, + "learning_rate": 3.2473285165096736e-06, + "loss": 0.1564, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13656604290008545, + "step": 3500, + "valid_targets_mean": 2686.7, + "valid_targets_min": 1037 + }, + { + "epoch": 5.851419031719533, + "grad_norm": 0.8536959044999787, + "learning_rate": 3.2019917182926806e-06, + "loss": 0.1581, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17382332682609558, + "step": 3505, + "valid_targets_mean": 2264.3, + "valid_targets_min": 988 + }, + { + "epoch": 5.859766277128547, + "grad_norm": 0.7157632815807343, + "learning_rate": 3.156946074441982e-06, + "loss": 0.1523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14807337522506714, + "step": 3510, + "valid_targets_mean": 2657.1, + "valid_targets_min": 845 + }, + { + "epoch": 5.868113522537563, + "grad_norm": 0.8308098852803112, + "learning_rate": 3.112192365718969e-06, + "loss": 0.1724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15952430665493011, + "step": 3515, + "valid_targets_mean": 1952.7, + "valid_targets_min": 679 + }, + { + "epoch": 5.876460767946577, + "grad_norm": 0.7216238687251982, + "learning_rate": 3.067731367824969e-06, + "loss": 0.1436, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13213233649730682, + "step": 3520, + "valid_targets_mean": 2498.2, + "valid_targets_min": 909 + }, + { + "epoch": 5.884808013355593, + "grad_norm": 0.7580182328273992, + "learning_rate": 3.023563851387885e-06, + "loss": 0.171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2006535828113556, + "step": 3525, + "valid_targets_mean": 2834.3, + "valid_targets_min": 942 + }, + { + "epoch": 5.893155258764608, + "grad_norm": 0.743791785946315, + "learning_rate": 2.979690581948784e-06, + "loss": 0.1474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1370251327753067, + "step": 3530, + "valid_targets_mean": 2285.6, + "valid_targets_min": 456 + }, + { + "epoch": 5.901502504173623, + "grad_norm": 0.7574771577324715, + "learning_rate": 2.936112319948654e-06, + "loss": 0.176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19993768632411957, + "step": 3535, + "valid_targets_mean": 2802.4, + "valid_targets_min": 949 + }, + { + "epoch": 5.909849749582638, + "grad_norm": 0.7799138270228428, + "learning_rate": 2.892829820715208e-06, + "loss": 0.1605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13004331290721893, + "step": 3540, + "valid_targets_mean": 3011.4, + "valid_targets_min": 733 + }, + { + "epoch": 5.918196994991653, + "grad_norm": 0.6122912119902282, + "learning_rate": 2.8498438344498103e-06, + "loss": 0.1568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1826564073562622, + "step": 3545, + "valid_targets_mean": 3161.0, + "valid_targets_min": 769 + }, + { + "epoch": 5.926544240400668, + "grad_norm": 1.3873463159619706, + "learning_rate": 2.8071551062144518e-06, + "loss": 0.1749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14959605038166046, + "step": 3550, + "valid_targets_mean": 2140.4, + "valid_targets_min": 945 + }, + { + "epoch": 5.934891485809683, + "grad_norm": 0.8162427514148738, + "learning_rate": 2.7647643759188557e-06, + "loss": 0.1592, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15046297013759613, + "step": 3555, + "valid_targets_mean": 2093.9, + "valid_targets_min": 465 + }, + { + "epoch": 5.943238731218698, + "grad_norm": 0.7958101696195098, + "learning_rate": 2.7226723783076447e-06, + "loss": 0.1912, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16588354110717773, + "step": 3560, + "valid_targets_mean": 2123.1, + "valid_targets_min": 874 + }, + { + "epoch": 5.951585976627713, + "grad_norm": 1.0098598781474784, + "learning_rate": 2.680879842947601e-06, + "loss": 0.1628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16657009720802307, + "step": 3565, + "valid_targets_mean": 2168.7, + "valid_targets_min": 817 + }, + { + "epoch": 5.959933222036728, + "grad_norm": 0.7515188412881856, + "learning_rate": 2.6393874942150268e-06, + "loss": 0.1544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15513961017131805, + "step": 3570, + "valid_targets_mean": 2491.9, + "valid_targets_min": 1224 + }, + { + "epoch": 5.968280467445743, + "grad_norm": 0.8227134907596251, + "learning_rate": 2.598196051283193e-06, + "loss": 0.1714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13640505075454712, + "step": 3575, + "valid_targets_mean": 1789.6, + "valid_targets_min": 745 + }, + { + "epoch": 5.976627712854758, + "grad_norm": 0.8497855391751271, + "learning_rate": 2.5573062281098638e-06, + "loss": 0.1457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11205454170703888, + "step": 3580, + "valid_targets_mean": 2565.0, + "valid_targets_min": 638 + }, + { + "epoch": 5.984974958263773, + "grad_norm": 0.678370649033699, + "learning_rate": 2.5167187334249277e-06, + "loss": 0.1465, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13231565058231354, + "step": 3585, + "valid_targets_mean": 3191.2, + "valid_targets_min": 697 + }, + { + "epoch": 5.993322203672788, + "grad_norm": 0.7422066388381311, + "learning_rate": 2.4764342707181155e-06, + "loss": 0.1538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16047775745391846, + "step": 3590, + "valid_targets_mean": 2584.1, + "valid_targets_min": 604 + }, + { + "epoch": 6.001669449081803, + "grad_norm": 0.8851835075199939, + "learning_rate": 2.4364535382268017e-06, + "loss": 0.1598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14996086061000824, + "step": 3595, + "valid_targets_mean": 1559.1, + "valid_targets_min": 533 + }, + { + "epoch": 6.010016694490818, + "grad_norm": 0.68295567119716, + "learning_rate": 2.3967772289239055e-06, + "loss": 0.1425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13082125782966614, + "step": 3600, + "valid_targets_mean": 2628.2, + "valid_targets_min": 736 + }, + { + "epoch": 6.018363939899833, + "grad_norm": 0.8919501944293605, + "learning_rate": 2.357406030505878e-06, + "loss": 0.148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17399922013282776, + "step": 3605, + "valid_targets_mean": 1760.0, + "valid_targets_min": 795 + }, + { + "epoch": 6.026711185308848, + "grad_norm": 0.8563823633600588, + "learning_rate": 2.318340625380786e-06, + "loss": 0.1451, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13941307365894318, + "step": 3610, + "valid_targets_mean": 1836.8, + "valid_targets_min": 779 + }, + { + "epoch": 6.035058430717863, + "grad_norm": 0.8400990084985073, + "learning_rate": 2.279581690656476e-06, + "loss": 0.1548, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16140100359916687, + "step": 3615, + "valid_targets_mean": 2342.7, + "valid_targets_min": 816 + }, + { + "epoch": 6.043405676126878, + "grad_norm": 0.7106127667503332, + "learning_rate": 2.241129898128851e-06, + "loss": 0.1458, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13049355149269104, + "step": 3620, + "valid_targets_mean": 2636.1, + "valid_targets_min": 1002 + }, + { + "epoch": 6.051752921535893, + "grad_norm": 0.8818391008759068, + "learning_rate": 2.202985914270215e-06, + "loss": 0.159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1488562822341919, + "step": 3625, + "valid_targets_mean": 1675.2, + "valid_targets_min": 766 + }, + { + "epoch": 6.060100166944908, + "grad_norm": 0.7882238670237989, + "learning_rate": 2.1651504002177236e-06, + "loss": 0.1615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1578988879919052, + "step": 3630, + "valid_targets_mean": 2249.3, + "valid_targets_min": 459 + }, + { + "epoch": 6.068447412353923, + "grad_norm": 0.8515864149791522, + "learning_rate": 2.1276240117619283e-06, + "loss": 0.1525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16811984777450562, + "step": 3635, + "valid_targets_mean": 2181.1, + "valid_targets_min": 853 + }, + { + "epoch": 6.076794657762938, + "grad_norm": 0.6823858985105556, + "learning_rate": 2.09040739933541e-06, + "loss": 0.1574, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15278483927249908, + "step": 3640, + "valid_targets_mean": 3562.9, + "valid_targets_min": 889 + }, + { + "epoch": 6.085141903171953, + "grad_norm": 0.8708022073895972, + "learning_rate": 2.0535012080015006e-06, + "loss": 0.1455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15357419848442078, + "step": 3645, + "valid_targets_mean": 1788.9, + "valid_targets_min": 753 + }, + { + "epoch": 6.093489148580968, + "grad_norm": 0.7107614264087305, + "learning_rate": 2.016906077443106e-06, + "loss": 0.1534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1238049566745758, + "step": 3650, + "valid_targets_mean": 2667.8, + "valid_targets_min": 748 + }, + { + "epoch": 6.101836393989983, + "grad_norm": 0.7206179548098023, + "learning_rate": 1.9806226419516195e-06, + "loss": 0.1316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13964733481407166, + "step": 3655, + "valid_targets_mean": 2906.7, + "valid_targets_min": 1120 + }, + { + "epoch": 6.110183639398999, + "grad_norm": 0.6654522787131666, + "learning_rate": 1.9446515304159198e-06, + "loss": 0.1767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19255736470222473, + "step": 3660, + "valid_targets_mean": 3062.2, + "valid_targets_min": 838 + }, + { + "epoch": 6.118530884808013, + "grad_norm": 0.6139051527372493, + "learning_rate": 1.9089933663114868e-06, + "loss": 0.1432, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1455802321434021, + "step": 3665, + "valid_targets_mean": 3733.2, + "valid_targets_min": 873 + }, + { + "epoch": 6.126878130217029, + "grad_norm": 0.8954177229974751, + "learning_rate": 1.8736487676895754e-06, + "loss": 0.1445, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16447719931602478, + "step": 3670, + "valid_targets_mean": 1722.4, + "valid_targets_min": 584 + }, + { + "epoch": 6.135225375626043, + "grad_norm": 0.7889736850432763, + "learning_rate": 1.8386183471665187e-06, + "loss": 0.1446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15405014157295227, + "step": 3675, + "valid_targets_mean": 2179.7, + "valid_targets_min": 754 + }, + { + "epoch": 6.143572621035059, + "grad_norm": 0.716510609638258, + "learning_rate": 1.8039027119131057e-06, + "loss": 0.1646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14177146553993225, + "step": 3680, + "valid_targets_mean": 2999.8, + "valid_targets_min": 700 + }, + { + "epoch": 6.151919866444073, + "grad_norm": 0.8500395901113726, + "learning_rate": 1.7695024636440484e-06, + "loss": 0.1641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1698373705148697, + "step": 3685, + "valid_targets_mean": 2100.7, + "valid_targets_min": 800 + }, + { + "epoch": 6.160267111853089, + "grad_norm": 0.7370702493584649, + "learning_rate": 1.7354181986075635e-06, + "loss": 0.1628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1334153264760971, + "step": 3690, + "valid_targets_mean": 2306.4, + "valid_targets_min": 797 + }, + { + "epoch": 6.168614357262103, + "grad_norm": 0.930381567032839, + "learning_rate": 1.701650507575039e-06, + "loss": 0.1589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17851126194000244, + "step": 3695, + "valid_targets_mean": 1861.8, + "valid_targets_min": 934 + }, + { + "epoch": 6.176961602671119, + "grad_norm": 0.7905236539808159, + "learning_rate": 1.6681999758307799e-06, + "loss": 0.1545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14857222139835358, + "step": 3700, + "valid_targets_mean": 2295.4, + "valid_targets_min": 921 + }, + { + "epoch": 6.185308848080133, + "grad_norm": 1.1994597822098416, + "learning_rate": 1.6350671831618804e-06, + "loss": 0.148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15778902173042297, + "step": 3705, + "valid_targets_mean": 2525.7, + "valid_targets_min": 632 + }, + { + "epoch": 6.193656093489149, + "grad_norm": 0.9284259884291556, + "learning_rate": 1.602252703848164e-06, + "loss": 0.1575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15259215235710144, + "step": 3710, + "valid_targets_mean": 1974.4, + "valid_targets_min": 845 + }, + { + "epoch": 6.202003338898163, + "grad_norm": 0.7059060863708253, + "learning_rate": 1.5697571066522321e-06, + "loss": 0.1421, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12666380405426025, + "step": 3715, + "valid_targets_mean": 2369.9, + "valid_targets_min": 869 + }, + { + "epoch": 6.210350584307179, + "grad_norm": 0.7480295618422672, + "learning_rate": 1.5375809548096187e-06, + "loss": 0.1397, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17569568753242493, + "step": 3720, + "valid_targets_mean": 2626.1, + "valid_targets_min": 550 + }, + { + "epoch": 6.218697829716193, + "grad_norm": 0.8064888058289421, + "learning_rate": 1.5057248060189956e-06, + "loss": 0.1525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16778752207756042, + "step": 3725, + "valid_targets_mean": 2279.5, + "valid_targets_min": 795 + }, + { + "epoch": 6.227045075125209, + "grad_norm": 0.8964741161740296, + "learning_rate": 1.4741892124325508e-06, + "loss": 0.1561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1577776074409485, + "step": 3730, + "valid_targets_mean": 1738.4, + "valid_targets_min": 405 + }, + { + "epoch": 6.235392320534224, + "grad_norm": 0.8682581411786174, + "learning_rate": 1.4429747206463662e-06, + "loss": 0.1634, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21041740477085114, + "step": 3735, + "valid_targets_mean": 2351.8, + "valid_targets_min": 1003 + }, + { + "epoch": 6.243739565943239, + "grad_norm": 0.6927794152026492, + "learning_rate": 1.4120818716910023e-06, + "loss": 0.1483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14970088005065918, + "step": 3740, + "valid_targets_mean": 3120.8, + "valid_targets_min": 1085 + }, + { + "epoch": 6.252086811352254, + "grad_norm": 0.9493800328839261, + "learning_rate": 1.381511201022061e-06, + "loss": 0.1434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19243334233760834, + "step": 3745, + "valid_targets_mean": 1847.9, + "valid_targets_min": 937 + }, + { + "epoch": 6.260434056761269, + "grad_norm": 0.7922599735178169, + "learning_rate": 1.3512632385109582e-06, + "loss": 0.1462, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1484522521495819, + "step": 3750, + "valid_targets_mean": 2647.9, + "valid_targets_min": 1037 + }, + { + "epoch": 6.268781302170284, + "grad_norm": 0.7230678345988222, + "learning_rate": 1.3213385084356944e-06, + "loss": 0.1478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14309535920619965, + "step": 3755, + "valid_targets_mean": 2840.8, + "valid_targets_min": 967 + }, + { + "epoch": 6.277128547579299, + "grad_norm": 0.82103624534236, + "learning_rate": 1.2917375294718083e-06, + "loss": 0.1529, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14719870686531067, + "step": 3760, + "valid_targets_mean": 2150.3, + "valid_targets_min": 944 + }, + { + "epoch": 6.285475792988314, + "grad_norm": 0.8167877816687908, + "learning_rate": 1.262460814683344e-06, + "loss": 0.1406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12159612774848938, + "step": 3765, + "valid_targets_mean": 1696.8, + "valid_targets_min": 953 + }, + { + "epoch": 6.293823038397329, + "grad_norm": 0.7164665826615882, + "learning_rate": 1.2335088715140065e-06, + "loss": 0.1352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13050119578838348, + "step": 3770, + "valid_targets_mean": 2551.6, + "valid_targets_min": 651 + }, + { + "epoch": 6.302170283806344, + "grad_norm": 0.8318504157865843, + "learning_rate": 1.2048822017783168e-06, + "loss": 0.1676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1429317593574524, + "step": 3775, + "valid_targets_mean": 2180.4, + "valid_targets_min": 557 + }, + { + "epoch": 6.310517529215359, + "grad_norm": 0.8649492258660376, + "learning_rate": 1.1765813016529592e-06, + "loss": 0.1442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14729107916355133, + "step": 3780, + "valid_targets_mean": 2001.9, + "valid_targets_min": 919 + }, + { + "epoch": 6.318864774624374, + "grad_norm": 0.6475240793732475, + "learning_rate": 1.1486066616681413e-06, + "loss": 0.1448, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12821900844573975, + "step": 3785, + "valid_targets_mean": 3358.6, + "valid_targets_min": 639 + }, + { + "epoch": 6.3272120200333895, + "grad_norm": 0.6529794567135447, + "learning_rate": 1.1209587666991273e-06, + "loss": 0.1577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12259849905967712, + "step": 3790, + "valid_targets_mean": 2630.0, + "valid_targets_min": 507 + }, + { + "epoch": 6.335559265442404, + "grad_norm": 0.8562469991700906, + "learning_rate": 1.093638095957803e-06, + "loss": 0.1532, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13961325585842133, + "step": 3795, + "valid_targets_mean": 2000.7, + "valid_targets_min": 788 + }, + { + "epoch": 6.343906510851419, + "grad_norm": 0.7616736404357265, + "learning_rate": 1.0666451229844e-06, + "loss": 0.1393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13437359035015106, + "step": 3800, + "valid_targets_mean": 2390.3, + "valid_targets_min": 742 + }, + { + "epoch": 6.352253756260434, + "grad_norm": 0.7964023011189172, + "learning_rate": 1.0399803156392507e-06, + "loss": 0.1737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1961688995361328, + "step": 3805, + "valid_targets_mean": 3100.4, + "valid_targets_min": 749 + }, + { + "epoch": 6.360601001669449, + "grad_norm": 0.6451812780593027, + "learning_rate": 1.0136441360947247e-06, + "loss": 0.1485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13224376738071442, + "step": 3810, + "valid_targets_mean": 2783.3, + "valid_targets_min": 1025 + }, + { + "epoch": 6.368948247078464, + "grad_norm": 0.780990570956473, + "learning_rate": 9.876370408271675e-07, + "loss": 0.149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14712658524513245, + "step": 3815, + "valid_targets_mean": 2153.8, + "valid_targets_min": 1102 + }, + { + "epoch": 6.377295492487479, + "grad_norm": 0.9058546698768619, + "learning_rate": 9.619594806090449e-07, + "loss": 0.1432, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17065516114234924, + "step": 3820, + "valid_targets_mean": 2582.9, + "valid_targets_min": 774 + }, + { + "epoch": 6.385642737896494, + "grad_norm": 0.80780743496805, + "learning_rate": 9.366119005010699e-07, + "loss": 0.1533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15681153535842896, + "step": 3825, + "valid_targets_mean": 2118.6, + "valid_targets_min": 826 + }, + { + "epoch": 6.393989983305509, + "grad_norm": 0.70463459252326, + "learning_rate": 9.115947398445413e-07, + "loss": 0.1566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14721426367759705, + "step": 3830, + "valid_targets_mean": 2749.4, + "valid_targets_min": 747 + }, + { + "epoch": 6.402337228714524, + "grad_norm": 0.8644015909784643, + "learning_rate": 8.869084322536881e-07, + "loss": 0.138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12953412532806396, + "step": 3835, + "valid_targets_mean": 2124.9, + "valid_targets_min": 851 + }, + { + "epoch": 6.410684474123539, + "grad_norm": 0.7417130201852908, + "learning_rate": 8.62553405608193e-07, + "loss": 0.1508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14260517060756683, + "step": 3840, + "valid_targets_mean": 2690.2, + "valid_targets_min": 1004 + }, + { + "epoch": 6.419031719532554, + "grad_norm": 0.8268422940872953, + "learning_rate": 8.385300820457276e-07, + "loss": 0.1549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15967655181884766, + "step": 3845, + "valid_targets_mean": 2200.2, + "valid_targets_min": 735 + }, + { + "epoch": 6.427378964941569, + "grad_norm": 0.8770658445847365, + "learning_rate": 8.148388779546912e-07, + "loss": 0.1373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14727085828781128, + "step": 3850, + "valid_targets_mean": 1786.2, + "valid_targets_min": 752 + }, + { + "epoch": 6.435726210350584, + "grad_norm": 0.8218639604859261, + "learning_rate": 7.914802039669412e-07, + "loss": 0.1536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14088010787963867, + "step": 3855, + "valid_targets_mean": 2001.0, + "valid_targets_min": 906 + }, + { + "epoch": 6.444073455759599, + "grad_norm": 0.6589311589586826, + "learning_rate": 7.684544649507164e-07, + "loss": 0.1504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1165132224559784, + "step": 3860, + "valid_targets_mean": 2767.9, + "valid_targets_min": 780 + }, + { + "epoch": 6.452420701168615, + "grad_norm": 0.6563808927800845, + "learning_rate": 7.457620600035898e-07, + "loss": 0.1656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19552037119865417, + "step": 3865, + "valid_targets_mean": 3452.3, + "valid_targets_min": 889 + }, + { + "epoch": 6.460767946577629, + "grad_norm": 0.8718152279689915, + "learning_rate": 7.234033824455821e-07, + "loss": 0.1496, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1512538194656372, + "step": 3870, + "valid_targets_mean": 1872.3, + "valid_targets_min": 619 + }, + { + "epoch": 6.469115191986645, + "grad_norm": 0.7803820718033989, + "learning_rate": 7.01378819812304e-07, + "loss": 0.1438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1376628577709198, + "step": 3875, + "valid_targets_mean": 2177.5, + "valid_targets_min": 790 + }, + { + "epoch": 6.477462437395659, + "grad_norm": 0.8404903873415192, + "learning_rate": 6.796887538482821e-07, + "loss": 0.138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1302323341369629, + "step": 3880, + "valid_targets_mean": 1652.6, + "valid_targets_min": 759 + }, + { + "epoch": 6.485809682804675, + "grad_norm": 0.7041496296544049, + "learning_rate": 6.583335605003083e-07, + "loss": 0.1415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13807426393032074, + "step": 3885, + "valid_targets_mean": 2838.5, + "valid_targets_min": 777 + }, + { + "epoch": 6.494156928213689, + "grad_norm": 0.8106972968034039, + "learning_rate": 6.373136099109455e-07, + "loss": 0.1438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13782069087028503, + "step": 3890, + "valid_targets_mean": 2152.6, + "valid_targets_min": 820 + }, + { + "epoch": 6.502504173622705, + "grad_norm": 0.8280130065865383, + "learning_rate": 6.166292664120987e-07, + "loss": 0.1482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1481216847896576, + "step": 3895, + "valid_targets_mean": 3080.4, + "valid_targets_min": 740 + }, + { + "epoch": 6.510851419031719, + "grad_norm": 0.9297980497793329, + "learning_rate": 5.962808885187121e-07, + "loss": 0.1524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1642313301563263, + "step": 3900, + "valid_targets_mean": 1828.1, + "valid_targets_min": 831 + }, + { + "epoch": 6.519198664440735, + "grad_norm": 0.7176871847569163, + "learning_rate": 5.762688289225349e-07, + "loss": 0.1383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12464821338653564, + "step": 3905, + "valid_targets_mean": 2672.1, + "valid_targets_min": 915 + }, + { + "epoch": 6.527545909849749, + "grad_norm": 0.7931047339698734, + "learning_rate": 5.565934344860413e-07, + "loss": 0.1545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16860723495483398, + "step": 3910, + "valid_targets_mean": 2200.2, + "valid_targets_min": 886 + }, + { + "epoch": 6.535893155258765, + "grad_norm": 0.7250577663610173, + "learning_rate": 5.372550462363779e-07, + "loss": 0.1451, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15044459700584412, + "step": 3915, + "valid_targets_mean": 2521.6, + "valid_targets_min": 832 + }, + { + "epoch": 6.54424040066778, + "grad_norm": 0.70964828840811, + "learning_rate": 5.182539993594904e-07, + "loss": 0.1569, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15507769584655762, + "step": 3920, + "valid_targets_mean": 3136.9, + "valid_targets_min": 763 + }, + { + "epoch": 6.552587646076795, + "grad_norm": 0.5479851363438597, + "learning_rate": 4.99590623194286e-07, + "loss": 0.1643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12132815271615982, + "step": 3925, + "valid_targets_mean": 3820.2, + "valid_targets_min": 507 + }, + { + "epoch": 6.560934891485809, + "grad_norm": 0.9976448171759269, + "learning_rate": 4.812652412269448e-07, + "loss": 0.1561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15689195692539215, + "step": 3930, + "valid_targets_mean": 2140.2, + "valid_targets_min": 728 + }, + { + "epoch": 6.569282136894825, + "grad_norm": 0.8316878399735419, + "learning_rate": 4.632781710852929e-07, + "loss": 0.1607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14460167288780212, + "step": 3935, + "valid_targets_mean": 2163.6, + "valid_targets_min": 677 + }, + { + "epoch": 6.57762938230384, + "grad_norm": 0.8666059921643122, + "learning_rate": 4.456297245333252e-07, + "loss": 0.1488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1719382107257843, + "step": 3940, + "valid_targets_mean": 2084.9, + "valid_targets_min": 772 + }, + { + "epoch": 6.585976627712855, + "grad_norm": 0.778464606415217, + "learning_rate": 4.2832020746576666e-07, + "loss": 0.1515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14451493322849274, + "step": 3945, + "valid_targets_mean": 2528.0, + "valid_targets_min": 721 + }, + { + "epoch": 6.59432387312187, + "grad_norm": 0.9300957361061936, + "learning_rate": 4.113499199028037e-07, + "loss": 0.1445, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15607938170433044, + "step": 3950, + "valid_targets_mean": 1620.5, + "valid_targets_min": 1012 + }, + { + "epoch": 6.602671118530885, + "grad_norm": 0.719633133630761, + "learning_rate": 3.9471915598485954e-07, + "loss": 0.1404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13011206686496735, + "step": 3955, + "valid_targets_mean": 2869.9, + "valid_targets_min": 808 + }, + { + "epoch": 6.6110183639399, + "grad_norm": 0.7601853892881769, + "learning_rate": 3.7842820396751134e-07, + "loss": 0.1498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12894247472286224, + "step": 3960, + "valid_targets_mean": 2399.6, + "valid_targets_min": 816 + }, + { + "epoch": 6.619365609348915, + "grad_norm": 0.6867943751621223, + "learning_rate": 3.62477346216481e-07, + "loss": 0.1477, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12438494712114334, + "step": 3965, + "valid_targets_mean": 2482.9, + "valid_targets_min": 527 + }, + { + "epoch": 6.62771285475793, + "grad_norm": 0.8238437846372922, + "learning_rate": 3.468668592027613e-07, + "loss": 0.1426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16205736994743347, + "step": 3970, + "valid_targets_mean": 2345.9, + "valid_targets_min": 456 + }, + { + "epoch": 6.636060100166945, + "grad_norm": 0.8072915256120992, + "learning_rate": 3.3159701349779083e-07, + "loss": 0.1441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15181957185268402, + "step": 3975, + "valid_targets_mean": 2285.8, + "valid_targets_min": 550 + }, + { + "epoch": 6.64440734557596, + "grad_norm": 0.9214323771905132, + "learning_rate": 3.1666807376880436e-07, + "loss": 0.1527, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15702925622463226, + "step": 3980, + "valid_targets_mean": 1984.9, + "valid_targets_min": 513 + }, + { + "epoch": 6.652754590984975, + "grad_norm": 0.8643867218156146, + "learning_rate": 3.0208029877420996e-07, + "loss": 0.1481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14187416434288025, + "step": 3985, + "valid_targets_mean": 1891.2, + "valid_targets_min": 1172 + }, + { + "epoch": 6.66110183639399, + "grad_norm": 0.791139177061262, + "learning_rate": 2.8783394135913245e-07, + "loss": 0.1576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14083585143089294, + "step": 3990, + "valid_targets_mean": 2188.8, + "valid_targets_min": 918 + }, + { + "epoch": 6.669449081803005, + "grad_norm": 0.8683379944084556, + "learning_rate": 2.7392924845100364e-07, + "loss": 0.1528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13562563061714172, + "step": 3995, + "valid_targets_mean": 1675.1, + "valid_targets_min": 999 + }, + { + "epoch": 6.67779632721202, + "grad_norm": 0.720045003491463, + "learning_rate": 2.6036646105530804e-07, + "loss": 0.1481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15499843657016754, + "step": 4000, + "valid_targets_mean": 2623.3, + "valid_targets_min": 745 + }, + { + "epoch": 6.686143572621035, + "grad_norm": 0.75089986646281, + "learning_rate": 2.471458142513861e-07, + "loss": 0.1356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15602612495422363, + "step": 4005, + "valid_targets_mean": 3015.9, + "valid_targets_min": 869 + }, + { + "epoch": 6.69449081803005, + "grad_norm": 0.8709557230002241, + "learning_rate": 2.3426753718837735e-07, + "loss": 0.1497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15754127502441406, + "step": 4010, + "valid_targets_mean": 2058.2, + "valid_targets_min": 628 + }, + { + "epoch": 6.702838063439065, + "grad_norm": 0.7904343487830936, + "learning_rate": 2.2173185308122624e-07, + "loss": 0.1683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14883893728256226, + "step": 4015, + "valid_targets_mean": 2092.8, + "valid_targets_min": 757 + }, + { + "epoch": 6.71118530884808, + "grad_norm": 0.7145063589046023, + "learning_rate": 2.0953897920683807e-07, + "loss": 0.1753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1378771811723709, + "step": 4020, + "valid_targets_mean": 2559.1, + "valid_targets_min": 783 + }, + { + "epoch": 6.719532554257095, + "grad_norm": 0.7598989722253284, + "learning_rate": 1.976891269002934e-07, + "loss": 0.1337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10784684121608734, + "step": 4025, + "valid_targets_mean": 2588.8, + "valid_targets_min": 983 + }, + { + "epoch": 6.72787979966611, + "grad_norm": 0.8413872775162127, + "learning_rate": 1.861825015511931e-07, + "loss": 0.1534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17496861517429352, + "step": 4030, + "valid_targets_mean": 2141.1, + "valid_targets_min": 993 + }, + { + "epoch": 6.736227045075125, + "grad_norm": 0.8634852676022146, + "learning_rate": 1.7501930260009902e-07, + "loss": 0.1676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15318435430526733, + "step": 4035, + "valid_targets_mean": 1876.1, + "valid_targets_min": 525 + }, + { + "epoch": 6.74457429048414, + "grad_norm": 0.7873931330972831, + "learning_rate": 1.6419972353507895e-07, + "loss": 0.1543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13331487774848938, + "step": 4040, + "valid_targets_mean": 2130.6, + "valid_targets_min": 595 + }, + { + "epoch": 6.752921535893155, + "grad_norm": 0.8576645010509434, + "learning_rate": 1.5372395188834265e-07, + "loss": 0.1474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16483411192893982, + "step": 4045, + "valid_targets_mean": 2134.8, + "valid_targets_min": 1088 + }, + { + "epoch": 6.76126878130217, + "grad_norm": 0.8119533846761552, + "learning_rate": 1.435921692330089e-07, + "loss": 0.1542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1412743628025055, + "step": 4050, + "valid_targets_mean": 2252.3, + "valid_targets_min": 677 + }, + { + "epoch": 6.769616026711185, + "grad_norm": 0.7153742652447527, + "learning_rate": 1.3380455117993684e-07, + "loss": 0.1479, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13982108235359192, + "step": 4055, + "valid_targets_mean": 2874.8, + "valid_targets_min": 1056 + }, + { + "epoch": 6.7779632721202, + "grad_norm": 0.7524907083015943, + "learning_rate": 1.2436126737470189e-07, + "loss": 0.1479, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1693270355463028, + "step": 4060, + "valid_targets_mean": 2893.0, + "valid_targets_min": 1166 + }, + { + "epoch": 6.786310517529215, + "grad_norm": 0.8131918622747486, + "learning_rate": 1.1526248149464236e-07, + "loss": 0.1421, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1297546923160553, + "step": 4065, + "valid_targets_mean": 2438.2, + "valid_targets_min": 750 + }, + { + "epoch": 6.794657762938231, + "grad_norm": 0.8254520795899082, + "learning_rate": 1.0650835124603076e-07, + "loss": 0.1513, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17750290036201477, + "step": 4070, + "valid_targets_mean": 2331.2, + "valid_targets_min": 633 + }, + { + "epoch": 6.803005008347245, + "grad_norm": 0.5693397168090245, + "learning_rate": 9.809902836133367e-08, + "loss": 0.1616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11621634662151337, + "step": 4075, + "valid_targets_mean": 3902.2, + "valid_targets_min": 525 + }, + { + "epoch": 6.811352253756261, + "grad_norm": 0.7921103418512198, + "learning_rate": 9.0034658596585e-08, + "loss": 0.1544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1455313265323639, + "step": 4080, + "valid_targets_mean": 2482.9, + "valid_targets_min": 576 + }, + { + "epoch": 6.819699499165275, + "grad_norm": 0.6582144430852905, + "learning_rate": 8.231538172886133e-08, + "loss": 0.1379, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12822675704956055, + "step": 4085, + "valid_targets_mean": 3741.5, + "valid_targets_min": 1190 + }, + { + "epoch": 6.828046744574291, + "grad_norm": 0.8205193387869907, + "learning_rate": 7.494133155385497e-08, + "loss": 0.1513, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14899955689907074, + "step": 4090, + "valid_targets_mean": 2317.9, + "valid_targets_min": 1084 + }, + { + "epoch": 6.836393989983305, + "grad_norm": 0.7539463818014838, + "learning_rate": 6.791263588355801e-08, + "loss": 0.148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14302432537078857, + "step": 4095, + "valid_targets_mean": 2724.6, + "valid_targets_min": 506 + }, + { + "epoch": 6.844741235392321, + "grad_norm": 0.7932352371809603, + "learning_rate": 6.122941654404635e-08, + "loss": 0.147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14877904951572418, + "step": 4100, + "valid_targets_mean": 2295.2, + "valid_targets_min": 686 + }, + { + "epoch": 6.853088480801335, + "grad_norm": 0.7592631042172903, + "learning_rate": 5.489178937337025e-08, + "loss": 0.1598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12802311778068542, + "step": 4105, + "valid_targets_mean": 2435.0, + "valid_targets_min": 645 + }, + { + "epoch": 6.861435726210351, + "grad_norm": 0.740400544183983, + "learning_rate": 4.88998642195404e-08, + "loss": 0.1493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1675623655319214, + "step": 4110, + "valid_targets_mean": 3020.7, + "valid_targets_min": 626 + }, + { + "epoch": 6.869782971619365, + "grad_norm": 0.7551212330644226, + "learning_rate": 4.325374493862944e-08, + "loss": 0.1454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13239090144634247, + "step": 4115, + "valid_targets_mean": 2441.3, + "valid_targets_min": 870 + }, + { + "epoch": 6.878130217028381, + "grad_norm": 0.7873026947247144, + "learning_rate": 3.795352939296892e-08, + "loss": 0.1431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14240020513534546, + "step": 4120, + "valid_targets_mean": 2650.7, + "valid_targets_min": 1026 + }, + { + "epoch": 6.886477462437396, + "grad_norm": 0.7222184243331321, + "learning_rate": 3.2999309449459616e-08, + "loss": 0.1593, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16258357465267181, + "step": 4125, + "valid_targets_mean": 3553.1, + "valid_targets_min": 677 + }, + { + "epoch": 6.894824707846411, + "grad_norm": 0.6934001168798835, + "learning_rate": 2.8391170977968287e-08, + "loss": 0.1398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12724414467811584, + "step": 4130, + "valid_targets_mean": 2916.6, + "valid_targets_min": 438 + }, + { + "epoch": 6.903171953255426, + "grad_norm": 0.9163732670059814, + "learning_rate": 2.4129193849848907e-08, + "loss": 0.1447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15469247102737427, + "step": 4135, + "valid_targets_mean": 2397.9, + "valid_targets_min": 640 + }, + { + "epoch": 6.911519198664441, + "grad_norm": 0.7737472588199307, + "learning_rate": 2.0213451936550445e-08, + "loss": 0.1373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12313120067119598, + "step": 4140, + "valid_targets_mean": 2735.7, + "valid_targets_min": 782 + }, + { + "epoch": 6.919866444073456, + "grad_norm": 0.7576774958636098, + "learning_rate": 1.6644013108342294e-08, + "loss": 0.1495, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13469135761260986, + "step": 4145, + "valid_targets_mean": 2412.2, + "valid_targets_min": 723 + }, + { + "epoch": 6.928213689482471, + "grad_norm": 0.7420215162394664, + "learning_rate": 1.3420939233139696e-08, + "loss": 0.1498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10983003675937653, + "step": 4150, + "valid_targets_mean": 2161.1, + "valid_targets_min": 924 + }, + { + "epoch": 6.936560934891486, + "grad_norm": 0.7934013440268688, + "learning_rate": 1.0544286175422358e-08, + "loss": 0.1615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14139649271965027, + "step": 4155, + "valid_targets_mean": 2119.1, + "valid_targets_min": 875 + }, + { + "epoch": 6.944908180300501, + "grad_norm": 0.8656111522075016, + "learning_rate": 8.01410379527301e-09, + "loss": 0.153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1604747772216797, + "step": 4160, + "valid_targets_mean": 2016.5, + "valid_targets_min": 512 + }, + { + "epoch": 6.953255425709516, + "grad_norm": 0.6875432157123391, + "learning_rate": 5.8304359475158665e-09, + "loss": 0.164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13078603148460388, + "step": 4165, + "valid_targets_mean": 3092.0, + "valid_targets_min": 762 + }, + { + "epoch": 6.961602671118531, + "grad_norm": 0.5864264486139331, + "learning_rate": 3.993320480946139e-09, + "loss": 0.1359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11410185694694519, + "step": 4170, + "valid_targets_mean": 3428.4, + "valid_targets_min": 449 + }, + { + "epoch": 6.969949916527546, + "grad_norm": 0.864314819572472, + "learning_rate": 2.5027892376860984e-09, + "loss": 0.1572, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14249885082244873, + "step": 4175, + "valid_targets_mean": 2076.8, + "valid_targets_min": 935 + }, + { + "epoch": 6.978297161936561, + "grad_norm": 0.723051086359361, + "learning_rate": 1.3588680526255282e-09, + "loss": 0.1533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1290482133626938, + "step": 4180, + "valid_targets_mean": 2310.8, + "valid_targets_min": 888 + }, + { + "epoch": 6.986644407345576, + "grad_norm": 0.769256389319607, + "learning_rate": 5.615767529709715e-10, + "loss": 0.1409, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12539488077163696, + "step": 4185, + "valid_targets_mean": 2040.4, + "valid_targets_min": 375 + }, + { + "epoch": 6.994991652754591, + "grad_norm": 0.7838050810433654, + "learning_rate": 1.1092915791266479e-10, + "loss": 0.1658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1675959825515747, + "step": 4190, + "valid_targets_mean": 2465.7, + "valid_targets_min": 563 + }, + { + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16129948198795319, + "step": 4193, + "total_flos": 746122008920064.0, + "train_loss": 0.22925819348185836, + "train_runtime": 17543.4013, + "train_samples_per_second": 3.821, + "train_steps_per_second": 0.239, + "valid_targets_mean": 1733.8, + "valid_targets_min": 533 + } + ], + "logging_steps": 5, + "max_steps": 4193, + "num_input_tokens_seen": 0, + "num_train_epochs": 7, + "save_steps": 1500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 746122008920064.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}