{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 6.0, "eval_steps": 500, "global_step": 2442, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.012285012285012284, "grad_norm": 21.828573450316345, "learning_rate": 6.530612244897961e-07, "loss": 0.937, "loss_nan_ranks": 0, "loss_rank_avg": 0.9794255495071411, "step": 5, "valid_targets_mean": 268.5, "valid_targets_min": 177 }, { "epoch": 0.02457002457002457, "grad_norm": 18.189500378648972, "learning_rate": 1.469387755102041e-06, "loss": 0.8673, "loss_nan_ranks": 0, "loss_rank_avg": 0.8637964725494385, "step": 10, "valid_targets_mean": 314.2, "valid_targets_min": 189 }, { "epoch": 0.036855036855036855, "grad_norm": 18.23380797156656, "learning_rate": 2.285714285714286e-06, "loss": 0.8424, "loss_nan_ranks": 0, "loss_rank_avg": 0.7877496480941772, "step": 15, "valid_targets_mean": 343.3, "valid_targets_min": 220 }, { "epoch": 0.04914004914004914, "grad_norm": 9.17521011228986, "learning_rate": 3.1020408163265307e-06, "loss": 0.749, "loss_nan_ranks": 0, "loss_rank_avg": 0.7318397760391235, "step": 20, "valid_targets_mean": 299.6, "valid_targets_min": 181 }, { "epoch": 0.06142506142506143, "grad_norm": 8.921541971081542, "learning_rate": 3.9183673469387755e-06, "loss": 0.71, "loss_nan_ranks": 0, "loss_rank_avg": 0.6696276068687439, "step": 25, "valid_targets_mean": 275.7, "valid_targets_min": 161 }, { "epoch": 0.07371007371007371, "grad_norm": 5.854177017012986, "learning_rate": 4.734693877551021e-06, "loss": 0.5951, "loss_nan_ranks": 0, "loss_rank_avg": 0.5909439325332642, "step": 30, "valid_targets_mean": 263.9, "valid_targets_min": 182 }, { "epoch": 0.085995085995086, "grad_norm": 4.416619012683294, "learning_rate": 5.551020408163266e-06, "loss": 0.5023, "loss_nan_ranks": 0, "loss_rank_avg": 0.5014971494674683, "step": 35, "valid_targets_mean": 260.8, "valid_targets_min": 180 }, { "epoch": 0.09828009828009827, "grad_norm": 3.063886083446094, "learning_rate": 6.36734693877551e-06, "loss": 0.443, "loss_nan_ranks": 0, "loss_rank_avg": 0.4250262677669525, "step": 40, "valid_targets_mean": 303.9, "valid_targets_min": 153 }, { "epoch": 0.11056511056511056, "grad_norm": 2.603486486224426, "learning_rate": 7.183673469387755e-06, "loss": 0.3743, "loss_nan_ranks": 0, "loss_rank_avg": 0.3950427174568176, "step": 45, "valid_targets_mean": 267.2, "valid_targets_min": 175 }, { "epoch": 0.12285012285012285, "grad_norm": 2.31305093159146, "learning_rate": 8.000000000000001e-06, "loss": 0.3704, "loss_nan_ranks": 0, "loss_rank_avg": 0.3532523810863495, "step": 50, "valid_targets_mean": 265.7, "valid_targets_min": 175 }, { "epoch": 0.13513513513513514, "grad_norm": 2.4164007864796635, "learning_rate": 8.816326530612247e-06, "loss": 0.3353, "loss_nan_ranks": 0, "loss_rank_avg": 0.3501570224761963, "step": 55, "valid_targets_mean": 275.2, "valid_targets_min": 163 }, { "epoch": 0.14742014742014742, "grad_norm": 2.000226229022591, "learning_rate": 9.63265306122449e-06, "loss": 0.3315, "loss_nan_ranks": 0, "loss_rank_avg": 0.3131003975868225, "step": 60, "valid_targets_mean": 276.6, "valid_targets_min": 178 }, { "epoch": 0.1597051597051597, "grad_norm": 2.6984228937200205, "learning_rate": 1.0448979591836737e-05, "loss": 0.29, "loss_nan_ranks": 0, "loss_rank_avg": 0.30729371309280396, "step": 65, "valid_targets_mean": 299.1, "valid_targets_min": 211 }, { "epoch": 0.171990171990172, "grad_norm": 1.912312656303067, "learning_rate": 1.126530612244898e-05, "loss": 0.3023, "loss_nan_ranks": 0, "loss_rank_avg": 0.28974372148513794, "step": 70, "valid_targets_mean": 300.9, "valid_targets_min": 179 }, { "epoch": 0.18427518427518427, "grad_norm": 1.8097469687267493, "learning_rate": 1.2081632653061225e-05, "loss": 0.2661, "loss_nan_ranks": 0, "loss_rank_avg": 0.21594056487083435, "step": 75, "valid_targets_mean": 308.3, "valid_targets_min": 198 }, { "epoch": 0.19656019656019655, "grad_norm": 1.921005072448764, "learning_rate": 1.2897959183673469e-05, "loss": 0.2624, "loss_nan_ranks": 0, "loss_rank_avg": 0.26075461506843567, "step": 80, "valid_targets_mean": 291.9, "valid_targets_min": 191 }, { "epoch": 0.20884520884520885, "grad_norm": 2.1945600028932764, "learning_rate": 1.3714285714285716e-05, "loss": 0.2603, "loss_nan_ranks": 0, "loss_rank_avg": 0.26103800535202026, "step": 85, "valid_targets_mean": 312.5, "valid_targets_min": 182 }, { "epoch": 0.22113022113022113, "grad_norm": 2.0701456153021085, "learning_rate": 1.4530612244897961e-05, "loss": 0.2684, "loss_nan_ranks": 0, "loss_rank_avg": 0.25262385606765747, "step": 90, "valid_targets_mean": 293.7, "valid_targets_min": 207 }, { "epoch": 0.2334152334152334, "grad_norm": 1.717397960997453, "learning_rate": 1.5346938775510204e-05, "loss": 0.2588, "loss_nan_ranks": 0, "loss_rank_avg": 0.25822433829307556, "step": 95, "valid_targets_mean": 310.6, "valid_targets_min": 155 }, { "epoch": 0.2457002457002457, "grad_norm": 1.8942695378558247, "learning_rate": 1.616326530612245e-05, "loss": 0.2467, "loss_nan_ranks": 0, "loss_rank_avg": 0.26113879680633545, "step": 100, "valid_targets_mean": 295.6, "valid_targets_min": 168 }, { "epoch": 0.257985257985258, "grad_norm": 2.19168702767712, "learning_rate": 1.6979591836734695e-05, "loss": 0.2428, "loss_nan_ranks": 0, "loss_rank_avg": 0.2595846354961395, "step": 105, "valid_targets_mean": 285.9, "valid_targets_min": 175 }, { "epoch": 0.2702702702702703, "grad_norm": 1.9128309803991406, "learning_rate": 1.779591836734694e-05, "loss": 0.2444, "loss_nan_ranks": 0, "loss_rank_avg": 0.2639565169811249, "step": 110, "valid_targets_mean": 292.6, "valid_targets_min": 179 }, { "epoch": 0.28255528255528256, "grad_norm": 1.6648059112852829, "learning_rate": 1.8612244897959185e-05, "loss": 0.2265, "loss_nan_ranks": 0, "loss_rank_avg": 0.203441321849823, "step": 115, "valid_targets_mean": 339.1, "valid_targets_min": 204 }, { "epoch": 0.29484029484029484, "grad_norm": 2.1771719591094585, "learning_rate": 1.942857142857143e-05, "loss": 0.2369, "loss_nan_ranks": 0, "loss_rank_avg": 0.27586930990219116, "step": 120, "valid_targets_mean": 278.7, "valid_targets_min": 176 }, { "epoch": 0.3071253071253071, "grad_norm": 1.7345988983630505, "learning_rate": 2.0244897959183672e-05, "loss": 0.2443, "loss_nan_ranks": 0, "loss_rank_avg": 0.21890853345394135, "step": 125, "valid_targets_mean": 297.8, "valid_targets_min": 175 }, { "epoch": 0.3194103194103194, "grad_norm": 2.0198072880995857, "learning_rate": 2.106122448979592e-05, "loss": 0.2276, "loss_nan_ranks": 0, "loss_rank_avg": 0.21946997940540314, "step": 130, "valid_targets_mean": 276.2, "valid_targets_min": 165 }, { "epoch": 0.3316953316953317, "grad_norm": 2.8171784972025544, "learning_rate": 2.1877551020408166e-05, "loss": 0.2541, "loss_nan_ranks": 0, "loss_rank_avg": 0.23970194160938263, "step": 135, "valid_targets_mean": 289.1, "valid_targets_min": 190 }, { "epoch": 0.343980343980344, "grad_norm": 10.77381911867474, "learning_rate": 2.269387755102041e-05, "loss": 0.2411, "loss_nan_ranks": 0, "loss_rank_avg": 0.2504960298538208, "step": 140, "valid_targets_mean": 350.0, "valid_targets_min": 191 }, { "epoch": 0.35626535626535627, "grad_norm": 1.8132220973726199, "learning_rate": 2.3510204081632656e-05, "loss": 0.2209, "loss_nan_ranks": 0, "loss_rank_avg": 0.200508713722229, "step": 145, "valid_targets_mean": 272.4, "valid_targets_min": 171 }, { "epoch": 0.36855036855036855, "grad_norm": 1.7949247734637428, "learning_rate": 2.4326530612244898e-05, "loss": 0.2463, "loss_nan_ranks": 0, "loss_rank_avg": 0.24943530559539795, "step": 150, "valid_targets_mean": 293.7, "valid_targets_min": 207 }, { "epoch": 0.3808353808353808, "grad_norm": 1.6946433207945966, "learning_rate": 2.5142857142857143e-05, "loss": 0.2283, "loss_nan_ranks": 0, "loss_rank_avg": 0.21148094534873962, "step": 155, "valid_targets_mean": 296.7, "valid_targets_min": 169 }, { "epoch": 0.3931203931203931, "grad_norm": 2.0383990448015057, "learning_rate": 2.5959183673469392e-05, "loss": 0.2268, "loss_nan_ranks": 0, "loss_rank_avg": 0.23519867658615112, "step": 160, "valid_targets_mean": 299.0, "valid_targets_min": 196 }, { "epoch": 0.40540540540540543, "grad_norm": 1.7987266640853492, "learning_rate": 2.6775510204081637e-05, "loss": 0.2247, "loss_nan_ranks": 0, "loss_rank_avg": 0.235760897397995, "step": 165, "valid_targets_mean": 311.4, "valid_targets_min": 190 }, { "epoch": 0.4176904176904177, "grad_norm": 1.9688803673877169, "learning_rate": 2.7591836734693882e-05, "loss": 0.2195, "loss_nan_ranks": 0, "loss_rank_avg": 0.23551854491233826, "step": 170, "valid_targets_mean": 266.2, "valid_targets_min": 193 }, { "epoch": 0.42997542997543, "grad_norm": 1.8031639706326605, "learning_rate": 2.8408163265306124e-05, "loss": 0.2277, "loss_nan_ranks": 0, "loss_rank_avg": 0.23051461577415466, "step": 175, "valid_targets_mean": 263.9, "valid_targets_min": 194 }, { "epoch": 0.44226044226044225, "grad_norm": 1.8343255802732736, "learning_rate": 2.922448979591837e-05, "loss": 0.2313, "loss_nan_ranks": 0, "loss_rank_avg": 0.21711471676826477, "step": 180, "valid_targets_mean": 284.1, "valid_targets_min": 165 }, { "epoch": 0.45454545454545453, "grad_norm": 1.5617851424568638, "learning_rate": 3.0040816326530614e-05, "loss": 0.2292, "loss_nan_ranks": 0, "loss_rank_avg": 0.20747843384742737, "step": 185, "valid_targets_mean": 335.3, "valid_targets_min": 213 }, { "epoch": 0.4668304668304668, "grad_norm": 1.8784838631885186, "learning_rate": 3.085714285714286e-05, "loss": 0.2199, "loss_nan_ranks": 0, "loss_rank_avg": 0.24274489283561707, "step": 190, "valid_targets_mean": 290.1, "valid_targets_min": 174 }, { "epoch": 0.47911547911547914, "grad_norm": 1.5909459305935192, "learning_rate": 3.1673469387755105e-05, "loss": 0.2301, "loss_nan_ranks": 0, "loss_rank_avg": 0.20929014682769775, "step": 195, "valid_targets_mean": 301.6, "valid_targets_min": 188 }, { "epoch": 0.4914004914004914, "grad_norm": 1.6033375323886343, "learning_rate": 3.2489795918367346e-05, "loss": 0.2236, "loss_nan_ranks": 0, "loss_rank_avg": 0.18671980500221252, "step": 200, "valid_targets_mean": 269.0, "valid_targets_min": 180 }, { "epoch": 0.5036855036855037, "grad_norm": 1.8354492552533688, "learning_rate": 3.3306122448979595e-05, "loss": 0.2304, "loss_nan_ranks": 0, "loss_rank_avg": 0.2385072112083435, "step": 205, "valid_targets_mean": 262.4, "valid_targets_min": 170 }, { "epoch": 0.515970515970516, "grad_norm": 1.6800659376124263, "learning_rate": 3.4122448979591843e-05, "loss": 0.2199, "loss_nan_ranks": 0, "loss_rank_avg": 0.2302705943584442, "step": 210, "valid_targets_mean": 275.2, "valid_targets_min": 188 }, { "epoch": 0.5282555282555282, "grad_norm": 1.5865502395767264, "learning_rate": 3.4938775510204085e-05, "loss": 0.2289, "loss_nan_ranks": 0, "loss_rank_avg": 0.21988855302333832, "step": 215, "valid_targets_mean": 298.3, "valid_targets_min": 179 }, { "epoch": 0.5405405405405406, "grad_norm": 1.7905138122246855, "learning_rate": 3.575510204081633e-05, "loss": 0.2128, "loss_nan_ranks": 0, "loss_rank_avg": 0.23109474778175354, "step": 220, "valid_targets_mean": 273.9, "valid_targets_min": 167 }, { "epoch": 0.5528255528255528, "grad_norm": 1.3801231757929844, "learning_rate": 3.6571428571428576e-05, "loss": 0.215, "loss_nan_ranks": 0, "loss_rank_avg": 0.17528986930847168, "step": 225, "valid_targets_mean": 305.2, "valid_targets_min": 209 }, { "epoch": 0.5651105651105651, "grad_norm": 1.5339556243459394, "learning_rate": 3.738775510204082e-05, "loss": 0.2176, "loss_nan_ranks": 0, "loss_rank_avg": 0.217166468501091, "step": 230, "valid_targets_mean": 304.8, "valid_targets_min": 181 }, { "epoch": 0.5773955773955773, "grad_norm": 1.4682727856191702, "learning_rate": 3.8204081632653066e-05, "loss": 0.2069, "loss_nan_ranks": 0, "loss_rank_avg": 0.19995203614234924, "step": 235, "valid_targets_mean": 259.7, "valid_targets_min": 191 }, { "epoch": 0.5896805896805897, "grad_norm": 1.6618654829666388, "learning_rate": 3.902040816326531e-05, "loss": 0.2054, "loss_nan_ranks": 0, "loss_rank_avg": 0.2056364268064499, "step": 240, "valid_targets_mean": 259.0, "valid_targets_min": 185 }, { "epoch": 0.601965601965602, "grad_norm": 1.4143799616785169, "learning_rate": 3.983673469387755e-05, "loss": 0.2251, "loss_nan_ranks": 0, "loss_rank_avg": 0.1939132809638977, "step": 245, "valid_targets_mean": 294.6, "valid_targets_min": 186 }, { "epoch": 0.6142506142506142, "grad_norm": 1.6230149562237202, "learning_rate": 3.9999672841332876e-05, "loss": 0.2049, "loss_nan_ranks": 0, "loss_rank_avg": 0.21055519580841064, "step": 250, "valid_targets_mean": 273.1, "valid_targets_min": 196 }, { "epoch": 0.6265356265356266, "grad_norm": 1.4847547838583997, "learning_rate": 3.999834377759164e-05, "loss": 0.2231, "loss_nan_ranks": 0, "loss_rank_avg": 0.20302093029022217, "step": 255, "valid_targets_mean": 293.2, "valid_targets_min": 202 }, { "epoch": 0.6388206388206388, "grad_norm": 1.5316083998327634, "learning_rate": 3.999599242924703e-05, "loss": 0.231, "loss_nan_ranks": 0, "loss_rank_avg": 0.2048158049583435, "step": 260, "valid_targets_mean": 300.1, "valid_targets_min": 195 }, { "epoch": 0.6511056511056511, "grad_norm": 1.5849150805195251, "learning_rate": 3.999261891649637e-05, "loss": 0.2125, "loss_nan_ranks": 0, "loss_rank_avg": 0.20979838073253632, "step": 265, "valid_targets_mean": 272.2, "valid_targets_min": 177 }, { "epoch": 0.6633906633906634, "grad_norm": 1.5683462961241397, "learning_rate": 3.9988223411788436e-05, "loss": 0.2197, "loss_nan_ranks": 0, "loss_rank_avg": 0.21948650479316711, "step": 270, "valid_targets_mean": 283.1, "valid_targets_min": 191 }, { "epoch": 0.6756756756756757, "grad_norm": 1.548843167061353, "learning_rate": 3.998280613981468e-05, "loss": 0.2163, "loss_nan_ranks": 0, "loss_rank_avg": 0.20030048489570618, "step": 275, "valid_targets_mean": 298.5, "valid_targets_min": 186 }, { "epoch": 0.687960687960688, "grad_norm": 1.318599783868541, "learning_rate": 3.9976367377497725e-05, "loss": 0.2344, "loss_nan_ranks": 0, "loss_rank_avg": 0.21878822147846222, "step": 280, "valid_targets_mean": 327.8, "valid_targets_min": 173 }, { "epoch": 0.7002457002457002, "grad_norm": 1.3949243364279573, "learning_rate": 3.99689074539772e-05, "loss": 0.2151, "loss_nan_ranks": 0, "loss_rank_avg": 0.1877439320087433, "step": 285, "valid_targets_mean": 297.8, "valid_targets_min": 177 }, { "epoch": 0.7125307125307125, "grad_norm": 1.5550310035563013, "learning_rate": 3.9960426750592936e-05, "loss": 0.2128, "loss_nan_ranks": 0, "loss_rank_avg": 0.25338321924209595, "step": 290, "valid_targets_mean": 325.1, "valid_targets_min": 205 }, { "epoch": 0.7248157248157249, "grad_norm": 1.460960353496708, "learning_rate": 3.995092570086546e-05, "loss": 0.2088, "loss_nan_ranks": 0, "loss_rank_avg": 0.21667510271072388, "step": 295, "valid_targets_mean": 294.5, "valid_targets_min": 208 }, { "epoch": 0.7371007371007371, "grad_norm": 1.3995456063934273, "learning_rate": 3.9940404790473825e-05, "loss": 0.2019, "loss_nan_ranks": 0, "loss_rank_avg": 0.2086101472377777, "step": 300, "valid_targets_mean": 281.2, "valid_targets_min": 186 }, { "epoch": 0.7493857493857494, "grad_norm": 1.3214407400802242, "learning_rate": 3.992886455723082e-05, "loss": 0.2136, "loss_nan_ranks": 0, "loss_rank_avg": 0.18029847741127014, "step": 305, "valid_targets_mean": 305.1, "valid_targets_min": 190 }, { "epoch": 0.7616707616707616, "grad_norm": 1.2867669242280386, "learning_rate": 3.991630559105541e-05, "loss": 0.2162, "loss_nan_ranks": 0, "loss_rank_avg": 0.2109563648700714, "step": 310, "valid_targets_mean": 301.1, "valid_targets_min": 151 }, { "epoch": 0.773955773955774, "grad_norm": 1.3183819224156381, "learning_rate": 3.990272853394268e-05, "loss": 0.2014, "loss_nan_ranks": 0, "loss_rank_avg": 0.19336509704589844, "step": 315, "valid_targets_mean": 277.2, "valid_targets_min": 199 }, { "epoch": 0.7862407862407862, "grad_norm": 1.4680677189564573, "learning_rate": 3.988813407993089e-05, "loss": 0.2023, "loss_nan_ranks": 0, "loss_rank_avg": 0.22784331440925598, "step": 320, "valid_targets_mean": 264.2, "valid_targets_min": 161 }, { "epoch": 0.7985257985257985, "grad_norm": 1.3801276733624819, "learning_rate": 3.987252297506613e-05, "loss": 0.2097, "loss_nan_ranks": 0, "loss_rank_avg": 0.22045394778251648, "step": 325, "valid_targets_mean": 263.8, "valid_targets_min": 191 }, { "epoch": 0.8108108108108109, "grad_norm": 1.4550199320231159, "learning_rate": 3.9855896017364075e-05, "loss": 0.2132, "loss_nan_ranks": 0, "loss_rank_avg": 0.21297219395637512, "step": 330, "valid_targets_mean": 282.9, "valid_targets_min": 173 }, { "epoch": 0.8230958230958231, "grad_norm": 1.5560722480235125, "learning_rate": 3.983825405676927e-05, "loss": 0.207, "loss_nan_ranks": 0, "loss_rank_avg": 0.20328381657600403, "step": 335, "valid_targets_mean": 266.1, "valid_targets_min": 189 }, { "epoch": 0.8353808353808354, "grad_norm": 1.410849679108297, "learning_rate": 3.981959799511161e-05, "loss": 0.2046, "loss_nan_ranks": 0, "loss_rank_avg": 0.21027149260044098, "step": 340, "valid_targets_mean": 267.6, "valid_targets_min": 151 }, { "epoch": 0.8476658476658476, "grad_norm": 1.3840083253661812, "learning_rate": 3.979992878606032e-05, "loss": 0.2098, "loss_nan_ranks": 0, "loss_rank_avg": 0.19747713208198547, "step": 345, "valid_targets_mean": 299.4, "valid_targets_min": 184 }, { "epoch": 0.85995085995086, "grad_norm": 1.3652000815105696, "learning_rate": 3.977924743507513e-05, "loss": 0.1951, "loss_nan_ranks": 0, "loss_rank_avg": 0.18659141659736633, "step": 350, "valid_targets_mean": 291.8, "valid_targets_min": 199 }, { "epoch": 0.8722358722358723, "grad_norm": 1.389641915181795, "learning_rate": 3.975755499935492e-05, "loss": 0.2055, "loss_nan_ranks": 0, "loss_rank_avg": 0.1781691163778305, "step": 355, "valid_targets_mean": 314.8, "valid_targets_min": 199 }, { "epoch": 0.8845208845208845, "grad_norm": 1.4449245515640914, "learning_rate": 3.973485258778368e-05, "loss": 0.2034, "loss_nan_ranks": 0, "loss_rank_avg": 0.21806538105010986, "step": 360, "valid_targets_mean": 251.1, "valid_targets_min": 187 }, { "epoch": 0.8968058968058968, "grad_norm": 1.1907028871766794, "learning_rate": 3.971114136087379e-05, "loss": 0.2087, "loss_nan_ranks": 0, "loss_rank_avg": 0.21380890905857086, "step": 365, "valid_targets_mean": 337.8, "valid_targets_min": 168 }, { "epoch": 0.9090909090909091, "grad_norm": 1.4195416879771496, "learning_rate": 3.968642253070675e-05, "loss": 0.2162, "loss_nan_ranks": 0, "loss_rank_avg": 0.20097646117210388, "step": 370, "valid_targets_mean": 245.1, "valid_targets_min": 171 }, { "epoch": 0.9213759213759214, "grad_norm": 1.3119381556363106, "learning_rate": 3.966069736087116e-05, "loss": 0.2093, "loss_nan_ranks": 0, "loss_rank_avg": 0.20315521955490112, "step": 375, "valid_targets_mean": 290.3, "valid_targets_min": 178 }, { "epoch": 0.9336609336609336, "grad_norm": 1.34791883528598, "learning_rate": 3.963396716639818e-05, "loss": 0.2019, "loss_nan_ranks": 0, "loss_rank_avg": 0.2211037278175354, "step": 380, "valid_targets_mean": 268.9, "valid_targets_min": 184 }, { "epoch": 0.9459459459459459, "grad_norm": 1.2417196263795793, "learning_rate": 3.960623331369427e-05, "loss": 0.2019, "loss_nan_ranks": 0, "loss_rank_avg": 0.1735357642173767, "step": 385, "valid_targets_mean": 292.4, "valid_targets_min": 196 }, { "epoch": 0.9582309582309583, "grad_norm": 1.3637099074609842, "learning_rate": 3.957749722047138e-05, "loss": 0.198, "loss_nan_ranks": 0, "loss_rank_avg": 0.2295781522989273, "step": 390, "valid_targets_mean": 277.1, "valid_targets_min": 189 }, { "epoch": 0.9705159705159705, "grad_norm": 1.3329027233726292, "learning_rate": 3.9547760355674405e-05, "loss": 0.1926, "loss_nan_ranks": 0, "loss_rank_avg": 0.2090061455965042, "step": 395, "valid_targets_mean": 293.8, "valid_targets_min": 166 }, { "epoch": 0.9828009828009828, "grad_norm": 1.2975792044668173, "learning_rate": 3.951702423940621e-05, "loss": 0.2069, "loss_nan_ranks": 0, "loss_rank_avg": 0.20602525770664215, "step": 400, "valid_targets_mean": 326.9, "valid_targets_min": 200 }, { "epoch": 0.995085995085995, "grad_norm": 1.566810432257574, "learning_rate": 3.948529044284981e-05, "loss": 0.2056, "loss_nan_ranks": 0, "loss_rank_avg": 0.2413986623287201, "step": 405, "valid_targets_mean": 267.4, "valid_targets_min": 185 }, { "epoch": 1.0073710073710074, "grad_norm": 1.2951424118461912, "learning_rate": 3.9452560588188135e-05, "loss": 0.1838, "loss_nan_ranks": 0, "loss_rank_avg": 0.18647128343582153, "step": 410, "valid_targets_mean": 244.8, "valid_targets_min": 181 }, { "epoch": 1.0196560196560196, "grad_norm": 1.2802480728762462, "learning_rate": 3.9418836348521045e-05, "loss": 0.1817, "loss_nan_ranks": 0, "loss_rank_avg": 0.1765831559896469, "step": 415, "valid_targets_mean": 286.2, "valid_targets_min": 185 }, { "epoch": 1.031941031941032, "grad_norm": 1.5216509857731368, "learning_rate": 3.9384119447779854e-05, "loss": 0.1822, "loss_nan_ranks": 0, "loss_rank_avg": 0.1944650560617447, "step": 420, "valid_targets_mean": 265.7, "valid_targets_min": 185 }, { "epoch": 1.0442260442260443, "grad_norm": 1.1424261544580192, "learning_rate": 3.934841166063919e-05, "loss": 0.183, "loss_nan_ranks": 0, "loss_rank_avg": 0.18103323876857758, "step": 425, "valid_targets_mean": 320.2, "valid_targets_min": 179 }, { "epoch": 1.0565110565110565, "grad_norm": 1.322171195286989, "learning_rate": 3.931171481242625e-05, "loss": 0.164, "loss_nan_ranks": 0, "loss_rank_avg": 0.17217956483364105, "step": 430, "valid_targets_mean": 270.7, "valid_targets_min": 174 }, { "epoch": 1.0687960687960687, "grad_norm": 1.2661422328512346, "learning_rate": 3.927403077902753e-05, "loss": 0.1838, "loss_nan_ranks": 0, "loss_rank_avg": 0.15944147109985352, "step": 435, "valid_targets_mean": 265.6, "valid_targets_min": 173 }, { "epoch": 1.0810810810810811, "grad_norm": 1.2299753921272092, "learning_rate": 3.9235361486792905e-05, "loss": 0.1706, "loss_nan_ranks": 0, "loss_rank_avg": 0.18907806277275085, "step": 440, "valid_targets_mean": 289.7, "valid_targets_min": 200 }, { "epoch": 1.0933660933660934, "grad_norm": 1.3163503443971745, "learning_rate": 3.9195708912437176e-05, "loss": 0.1754, "loss_nan_ranks": 0, "loss_rank_avg": 0.1722145974636078, "step": 445, "valid_targets_mean": 273.1, "valid_targets_min": 175 }, { "epoch": 1.1056511056511056, "grad_norm": 1.2454492144884683, "learning_rate": 3.915507508293901e-05, "loss": 0.1821, "loss_nan_ranks": 0, "loss_rank_avg": 0.1868978589773178, "step": 450, "valid_targets_mean": 291.8, "valid_targets_min": 162 }, { "epoch": 1.117936117936118, "grad_norm": 1.3025160155232147, "learning_rate": 3.911346207543734e-05, "loss": 0.1675, "loss_nan_ranks": 0, "loss_rank_avg": 0.175534188747406, "step": 455, "valid_targets_mean": 283.3, "valid_targets_min": 171 }, { "epoch": 1.1302211302211302, "grad_norm": 1.2806963933607003, "learning_rate": 3.907087201712515e-05, "loss": 0.1673, "loss_nan_ranks": 0, "loss_rank_avg": 0.1741512417793274, "step": 460, "valid_targets_mean": 308.9, "valid_targets_min": 184 }, { "epoch": 1.1425061425061425, "grad_norm": 1.1749968840174028, "learning_rate": 3.902730708514078e-05, "loss": 0.1709, "loss_nan_ranks": 0, "loss_rank_avg": 0.1523202806711197, "step": 465, "valid_targets_mean": 309.1, "valid_targets_min": 202 }, { "epoch": 1.154791154791155, "grad_norm": 1.1785725853985307, "learning_rate": 3.8982769506456616e-05, "loss": 0.1778, "loss_nan_ranks": 0, "loss_rank_avg": 0.1670132577419281, "step": 470, "valid_targets_mean": 320.8, "valid_targets_min": 202 }, { "epoch": 1.1670761670761671, "grad_norm": 1.2722487397082074, "learning_rate": 3.893726155776524e-05, "loss": 0.1842, "loss_nan_ranks": 0, "loss_rank_avg": 0.1820988953113556, "step": 475, "valid_targets_mean": 274.2, "valid_targets_min": 180 }, { "epoch": 1.1793611793611793, "grad_norm": 1.3190086715417202, "learning_rate": 3.8890785565363046e-05, "loss": 0.1782, "loss_nan_ranks": 0, "loss_rank_avg": 0.1790393739938736, "step": 480, "valid_targets_mean": 273.2, "valid_targets_min": 180 }, { "epoch": 1.1916461916461916, "grad_norm": 1.1776601265345403, "learning_rate": 3.884334390503136e-05, "loss": 0.1853, "loss_nan_ranks": 0, "loss_rank_avg": 0.16915017366409302, "step": 485, "valid_targets_mean": 279.8, "valid_targets_min": 177 }, { "epoch": 1.203931203931204, "grad_norm": 1.2302095111987355, "learning_rate": 3.8794939001914955e-05, "loss": 0.1716, "loss_nan_ranks": 0, "loss_rank_avg": 0.17493095993995667, "step": 490, "valid_targets_mean": 280.0, "valid_targets_min": 179 }, { "epoch": 1.2162162162162162, "grad_norm": 1.4349902562438588, "learning_rate": 3.87455733303981e-05, "loss": 0.1868, "loss_nan_ranks": 0, "loss_rank_avg": 0.19841422140598297, "step": 495, "valid_targets_mean": 272.3, "valid_targets_min": 178 }, { "epoch": 1.2285012285012284, "grad_norm": 1.2630360698377743, "learning_rate": 3.869524941397805e-05, "loss": 0.1869, "loss_nan_ranks": 0, "loss_rank_avg": 0.20843303203582764, "step": 500, "valid_targets_mean": 290.9, "valid_targets_min": 195 }, { "epoch": 1.2407862407862407, "grad_norm": 1.192162654133184, "learning_rate": 3.8643969825136095e-05, "loss": 0.1775, "loss_nan_ranks": 0, "loss_rank_avg": 0.17213059961795807, "step": 505, "valid_targets_mean": 303.1, "valid_targets_min": 170 }, { "epoch": 1.253071253071253, "grad_norm": 1.3665979807179884, "learning_rate": 3.8591737185206024e-05, "loss": 0.1845, "loss_nan_ranks": 0, "loss_rank_avg": 0.18279464542865753, "step": 510, "valid_targets_mean": 269.2, "valid_targets_min": 158 }, { "epoch": 1.2653562653562653, "grad_norm": 1.3115790064137476, "learning_rate": 3.853855416424011e-05, "loss": 0.1773, "loss_nan_ranks": 0, "loss_rank_avg": 0.17230263352394104, "step": 515, "valid_targets_mean": 291.9, "valid_targets_min": 201 }, { "epoch": 1.2776412776412776, "grad_norm": 1.51783665510812, "learning_rate": 3.848442348087267e-05, "loss": 0.1786, "loss_nan_ranks": 0, "loss_rank_avg": 0.213800311088562, "step": 520, "valid_targets_mean": 234.1, "valid_targets_min": 191 }, { "epoch": 1.28992628992629, "grad_norm": 1.3040614668953388, "learning_rate": 3.842934790218106e-05, "loss": 0.1841, "loss_nan_ranks": 0, "loss_rank_avg": 0.19344457983970642, "step": 525, "valid_targets_mean": 305.7, "valid_targets_min": 188 }, { "epoch": 1.3022113022113022, "grad_norm": 1.1124424034161675, "learning_rate": 3.837333024354422e-05, "loss": 0.1716, "loss_nan_ranks": 0, "loss_rank_avg": 0.18301331996917725, "step": 530, "valid_targets_mean": 310.6, "valid_targets_min": 191 }, { "epoch": 1.3144963144963144, "grad_norm": 1.351795629570991, "learning_rate": 3.8316373368498794e-05, "loss": 0.1703, "loss_nan_ranks": 0, "loss_rank_avg": 0.19358792901039124, "step": 535, "valid_targets_mean": 263.3, "valid_targets_min": 171 }, { "epoch": 1.3267813267813269, "grad_norm": 1.2961458262411862, "learning_rate": 3.82584801885927e-05, "loss": 0.1903, "loss_nan_ranks": 0, "loss_rank_avg": 0.20222784578800201, "step": 540, "valid_targets_mean": 264.8, "valid_targets_min": 151 }, { "epoch": 1.339066339066339, "grad_norm": 1.1409725355493698, "learning_rate": 3.8199653663236336e-05, "loss": 0.1905, "loss_nan_ranks": 0, "loss_rank_avg": 0.17877429723739624, "step": 545, "valid_targets_mean": 311.7, "valid_targets_min": 190 }, { "epoch": 1.3513513513513513, "grad_norm": 1.456488377309983, "learning_rate": 3.813989679955128e-05, "loss": 0.1734, "loss_nan_ranks": 0, "loss_rank_avg": 0.19150184094905853, "step": 550, "valid_targets_mean": 291.3, "valid_targets_min": 177 }, { "epoch": 1.3636363636363638, "grad_norm": 1.245971551623498, "learning_rate": 3.8079212652216595e-05, "loss": 0.175, "loss_nan_ranks": 0, "loss_rank_avg": 0.17956264317035675, "step": 555, "valid_targets_mean": 282.0, "valid_targets_min": 211 }, { "epoch": 1.375921375921376, "grad_norm": 1.2410609684824025, "learning_rate": 3.8017604323312616e-05, "loss": 0.1743, "loss_nan_ranks": 0, "loss_rank_avg": 0.17557784914970398, "step": 560, "valid_targets_mean": 286.9, "valid_targets_min": 180 }, { "epoch": 1.3882063882063882, "grad_norm": 1.0381782726389142, "learning_rate": 3.795507496216246e-05, "loss": 0.1842, "loss_nan_ranks": 0, "loss_rank_avg": 0.1485171914100647, "step": 565, "valid_targets_mean": 278.0, "valid_targets_min": 188 }, { "epoch": 1.4004914004914004, "grad_norm": 1.0820544978337316, "learning_rate": 3.789162776517098e-05, "loss": 0.1756, "loss_nan_ranks": 0, "loss_rank_avg": 0.16584482789039612, "step": 570, "valid_targets_mean": 317.9, "valid_targets_min": 210 }, { "epoch": 1.4127764127764126, "grad_norm": 1.1991890772641072, "learning_rate": 3.78272659756614e-05, "loss": 0.1869, "loss_nan_ranks": 0, "loss_rank_avg": 0.1857534945011139, "step": 575, "valid_targets_mean": 293.8, "valid_targets_min": 195 }, { "epoch": 1.425061425061425, "grad_norm": 1.600560513503754, "learning_rate": 3.776199288370948e-05, "loss": 0.1824, "loss_nan_ranks": 0, "loss_rank_avg": 0.2185993194580078, "step": 580, "valid_targets_mean": 233.5, "valid_targets_min": 173 }, { "epoch": 1.4373464373464373, "grad_norm": 0.9295022847410005, "learning_rate": 3.7695811825975386e-05, "loss": 0.1675, "loss_nan_ranks": 0, "loss_rank_avg": 0.14968982338905334, "step": 585, "valid_targets_mean": 348.5, "valid_targets_min": 251 }, { "epoch": 1.4496314496314495, "grad_norm": 1.3034896421335485, "learning_rate": 3.76287261855331e-05, "loss": 0.1822, "loss_nan_ranks": 0, "loss_rank_avg": 0.21649613976478577, "step": 590, "valid_targets_mean": 307.3, "valid_targets_min": 173 }, { "epoch": 1.461916461916462, "grad_norm": 1.1209788824780453, "learning_rate": 3.7560739391697465e-05, "loss": 0.1831, "loss_nan_ranks": 0, "loss_rank_avg": 0.17340219020843506, "step": 595, "valid_targets_mean": 282.5, "valid_targets_min": 181 }, { "epoch": 1.4742014742014742, "grad_norm": 1.0128989971213274, "learning_rate": 3.749185491984891e-05, "loss": 0.1768, "loss_nan_ranks": 0, "loss_rank_avg": 0.15685750544071198, "step": 600, "valid_targets_mean": 319.3, "valid_targets_min": 181 }, { "epoch": 1.4864864864864864, "grad_norm": 1.3301912209947289, "learning_rate": 3.7422076291255785e-05, "loss": 0.1782, "loss_nan_ranks": 0, "loss_rank_avg": 0.18290740251541138, "step": 605, "valid_targets_mean": 246.0, "valid_targets_min": 197 }, { "epoch": 1.4987714987714988, "grad_norm": 1.2141371080569727, "learning_rate": 3.7351407072894356e-05, "loss": 0.1791, "loss_nan_ranks": 0, "loss_rank_avg": 0.17221075296401978, "step": 610, "valid_targets_mean": 282.5, "valid_targets_min": 163 }, { "epoch": 1.511056511056511, "grad_norm": 1.3436387213079, "learning_rate": 3.7279850877266486e-05, "loss": 0.1854, "loss_nan_ranks": 0, "loss_rank_avg": 0.19260115921497345, "step": 615, "valid_targets_mean": 257.5, "valid_targets_min": 172 }, { "epoch": 1.5233415233415233, "grad_norm": 1.269034311825868, "learning_rate": 3.720741136221491e-05, "loss": 0.1701, "loss_nan_ranks": 0, "loss_rank_avg": 0.18720345199108124, "step": 620, "valid_targets_mean": 298.3, "valid_targets_min": 174 }, { "epoch": 1.5356265356265357, "grad_norm": 1.1209725107182271, "learning_rate": 3.713409223073636e-05, "loss": 0.181, "loss_nan_ranks": 0, "loss_rank_avg": 0.17303812503814697, "step": 625, "valid_targets_mean": 315.3, "valid_targets_min": 207 }, { "epoch": 1.547911547911548, "grad_norm": 1.1900118019821608, "learning_rate": 3.705989723079214e-05, "loss": 0.1794, "loss_nan_ranks": 0, "loss_rank_avg": 0.17867326736450195, "step": 630, "valid_targets_mean": 259.6, "valid_targets_min": 189 }, { "epoch": 1.5601965601965602, "grad_norm": 1.1857093776091674, "learning_rate": 3.698483015511665e-05, "loss": 0.1652, "loss_nan_ranks": 0, "loss_rank_avg": 0.17728665471076965, "step": 635, "valid_targets_mean": 272.1, "valid_targets_min": 185 }, { "epoch": 1.5724815724815726, "grad_norm": 1.168425452562832, "learning_rate": 3.690889484102344e-05, "loss": 0.1835, "loss_nan_ranks": 0, "loss_rank_avg": 0.16271118819713593, "step": 640, "valid_targets_mean": 300.6, "valid_targets_min": 198 }, { "epoch": 1.5847665847665846, "grad_norm": 1.178629934977339, "learning_rate": 3.683209517020908e-05, "loss": 0.1855, "loss_nan_ranks": 0, "loss_rank_avg": 0.19160902500152588, "step": 645, "valid_targets_mean": 294.7, "valid_targets_min": 186 }, { "epoch": 1.597051597051597, "grad_norm": 1.1113577054541206, "learning_rate": 3.675443506855473e-05, "loss": 0.1721, "loss_nan_ranks": 0, "loss_rank_avg": 0.15952055156230927, "step": 650, "valid_targets_mean": 321.6, "valid_targets_min": 166 }, { "epoch": 1.6093366093366095, "grad_norm": 1.200099720982284, "learning_rate": 3.6675918505925456e-05, "loss": 0.1643, "loss_nan_ranks": 0, "loss_rank_avg": 0.16167262196540833, "step": 655, "valid_targets_mean": 255.8, "valid_targets_min": 172 }, { "epoch": 1.6216216216216215, "grad_norm": 1.0747001780327297, "learning_rate": 3.6596549495967276e-05, "loss": 0.1864, "loss_nan_ranks": 0, "loss_rank_avg": 0.18109141290187836, "step": 660, "valid_targets_mean": 353.0, "valid_targets_min": 172 }, { "epoch": 1.633906633906634, "grad_norm": 1.1956349684205054, "learning_rate": 3.651633209590202e-05, "loss": 0.1716, "loss_nan_ranks": 0, "loss_rank_avg": 0.16201306879520416, "step": 665, "valid_targets_mean": 276.9, "valid_targets_min": 165 }, { "epoch": 1.6461916461916462, "grad_norm": 1.2892179457054922, "learning_rate": 3.6435270406319914e-05, "loss": 0.1935, "loss_nan_ranks": 0, "loss_rank_avg": 0.20578226447105408, "step": 670, "valid_targets_mean": 273.6, "valid_targets_min": 176 }, { "epoch": 1.6584766584766584, "grad_norm": 1.1191217907798112, "learning_rate": 3.635336857096997e-05, "loss": 0.1819, "loss_nan_ranks": 0, "loss_rank_avg": 0.1828370988368988, "step": 675, "valid_targets_mean": 297.2, "valid_targets_min": 187 }, { "epoch": 1.6707616707616708, "grad_norm": 0.9800627597295655, "learning_rate": 3.627063077654815e-05, "loss": 0.1661, "loss_nan_ranks": 0, "loss_rank_avg": 0.15197354555130005, "step": 680, "valid_targets_mean": 281.6, "valid_targets_min": 205 }, { "epoch": 1.683046683046683, "grad_norm": 1.1375998352581815, "learning_rate": 3.618706125248337e-05, "loss": 0.1686, "loss_nan_ranks": 0, "loss_rank_avg": 0.1769002228975296, "step": 685, "valid_targets_mean": 288.8, "valid_targets_min": 204 }, { "epoch": 1.6953316953316953, "grad_norm": 1.1790587870854499, "learning_rate": 3.6102664270721275e-05, "loss": 0.1793, "loss_nan_ranks": 0, "loss_rank_avg": 0.17439332604408264, "step": 690, "valid_targets_mean": 278.9, "valid_targets_min": 198 }, { "epoch": 1.7076167076167077, "grad_norm": 1.2679107612807372, "learning_rate": 3.601744414550589e-05, "loss": 0.1794, "loss_nan_ranks": 0, "loss_rank_avg": 0.20159870386123657, "step": 695, "valid_targets_mean": 271.8, "valid_targets_min": 194 }, { "epoch": 1.71990171990172, "grad_norm": 1.3475850549237658, "learning_rate": 3.593140523315906e-05, "loss": 0.1795, "loss_nan_ranks": 0, "loss_rank_avg": 0.20103436708450317, "step": 700, "valid_targets_mean": 254.8, "valid_targets_min": 169 }, { "epoch": 1.7321867321867321, "grad_norm": 1.2712782427882479, "learning_rate": 3.584455193185778e-05, "loss": 0.1875, "loss_nan_ranks": 0, "loss_rank_avg": 0.18751715123653412, "step": 705, "valid_targets_mean": 272.5, "valid_targets_min": 182 }, { "epoch": 1.7444717444717446, "grad_norm": 1.2832130711780594, "learning_rate": 3.575688868140933e-05, "loss": 0.189, "loss_nan_ranks": 0, "loss_rank_avg": 0.20148850977420807, "step": 710, "valid_targets_mean": 262.1, "valid_targets_min": 175 }, { "epoch": 1.7567567567567568, "grad_norm": 1.3080485041536247, "learning_rate": 3.566841996302438e-05, "loss": 0.1778, "loss_nan_ranks": 0, "loss_rank_avg": 0.19301211833953857, "step": 715, "valid_targets_mean": 249.7, "valid_targets_min": 170 }, { "epoch": 1.769041769041769, "grad_norm": 1.3232267942591605, "learning_rate": 3.557915029908787e-05, "loss": 0.1855, "loss_nan_ranks": 0, "loss_rank_avg": 0.2034870982170105, "step": 720, "valid_targets_mean": 306.4, "valid_targets_min": 167 }, { "epoch": 1.7813267813267815, "grad_norm": 1.0634606496766785, "learning_rate": 3.548908425292784e-05, "loss": 0.1706, "loss_nan_ranks": 0, "loss_rank_avg": 0.16110247373580933, "step": 725, "valid_targets_mean": 308.6, "valid_targets_min": 180 }, { "epoch": 1.7936117936117935, "grad_norm": 1.1440076928342802, "learning_rate": 3.5398226428582165e-05, "loss": 0.1694, "loss_nan_ranks": 0, "loss_rank_avg": 0.16594558954238892, "step": 730, "valid_targets_mean": 292.2, "valid_targets_min": 165 }, { "epoch": 1.805896805896806, "grad_norm": 1.207925739585077, "learning_rate": 3.530658147056321e-05, "loss": 0.1758, "loss_nan_ranks": 0, "loss_rank_avg": 0.19194138050079346, "step": 735, "valid_targets_mean": 263.3, "valid_targets_min": 192 }, { "epoch": 1.8181818181818183, "grad_norm": 1.2364944549853603, "learning_rate": 3.521415406362041e-05, "loss": 0.1778, "loss_nan_ranks": 0, "loss_rank_avg": 0.176443412899971, "step": 740, "valid_targets_mean": 299.1, "valid_targets_min": 168 }, { "epoch": 1.8304668304668303, "grad_norm": 1.2268865114401033, "learning_rate": 3.512094893250076e-05, "loss": 0.1668, "loss_nan_ranks": 0, "loss_rank_avg": 0.14345869421958923, "step": 745, "valid_targets_mean": 274.6, "valid_targets_min": 167 }, { "epoch": 1.8427518427518428, "grad_norm": 1.1293243315280994, "learning_rate": 3.5026970841707366e-05, "loss": 0.1784, "loss_nan_ranks": 0, "loss_rank_avg": 0.17314857244491577, "step": 750, "valid_targets_mean": 313.4, "valid_targets_min": 158 }, { "epoch": 1.855036855036855, "grad_norm": 1.1425794858421863, "learning_rate": 3.493222459525579e-05, "loss": 0.1725, "loss_nan_ranks": 0, "loss_rank_avg": 0.15554465353488922, "step": 755, "valid_targets_mean": 290.2, "valid_targets_min": 202 }, { "epoch": 1.8673218673218672, "grad_norm": 1.1644319804080203, "learning_rate": 3.483671503642858e-05, "loss": 0.1651, "loss_nan_ranks": 0, "loss_rank_avg": 0.17745967209339142, "step": 760, "valid_targets_mean": 263.4, "valid_targets_min": 184 }, { "epoch": 1.8796068796068797, "grad_norm": 1.018998554564584, "learning_rate": 3.474044704752761e-05, "loss": 0.1817, "loss_nan_ranks": 0, "loss_rank_avg": 0.17490056157112122, "step": 765, "valid_targets_mean": 322.0, "valid_targets_min": 175 }, { "epoch": 1.8918918918918919, "grad_norm": 1.2837655093417555, "learning_rate": 3.464342554962454e-05, "loss": 0.1772, "loss_nan_ranks": 0, "loss_rank_avg": 0.20276513695716858, "step": 770, "valid_targets_mean": 256.0, "valid_targets_min": 185 }, { "epoch": 1.904176904176904, "grad_norm": 1.0150932357803577, "learning_rate": 3.4545655502309254e-05, "loss": 0.1639, "loss_nan_ranks": 0, "loss_rank_avg": 0.1496233493089676, "step": 775, "valid_targets_mean": 292.7, "valid_targets_min": 182 }, { "epoch": 1.9164619164619165, "grad_norm": 1.039567065318361, "learning_rate": 3.444714190343633e-05, "loss": 0.1749, "loss_nan_ranks": 0, "loss_rank_avg": 0.15797200798988342, "step": 780, "valid_targets_mean": 287.1, "valid_targets_min": 169 }, { "epoch": 1.9287469287469288, "grad_norm": 1.226025803933689, "learning_rate": 3.434788978886957e-05, "loss": 0.1742, "loss_nan_ranks": 0, "loss_rank_avg": 0.1846623420715332, "step": 785, "valid_targets_mean": 276.4, "valid_targets_min": 184 }, { "epoch": 1.941031941031941, "grad_norm": 1.0838917056279684, "learning_rate": 3.424790423222455e-05, "loss": 0.1608, "loss_nan_ranks": 0, "loss_rank_avg": 0.1821170300245285, "step": 790, "valid_targets_mean": 306.9, "valid_targets_min": 173 }, { "epoch": 1.9533169533169534, "grad_norm": 1.2066577518254178, "learning_rate": 3.414719034460928e-05, "loss": 0.1776, "loss_nan_ranks": 0, "loss_rank_avg": 0.18606868386268616, "step": 795, "valid_targets_mean": 284.3, "valid_targets_min": 177 }, { "epoch": 1.9656019656019657, "grad_norm": 1.182316919132192, "learning_rate": 3.404575327436294e-05, "loss": 0.1835, "loss_nan_ranks": 0, "loss_rank_avg": 0.1879071295261383, "step": 800, "valid_targets_mean": 292.2, "valid_targets_min": 183 }, { "epoch": 1.9778869778869779, "grad_norm": 0.9533388827456248, "learning_rate": 3.3943598206792665e-05, "loss": 0.1672, "loss_nan_ranks": 0, "loss_rank_avg": 0.15850934386253357, "step": 805, "valid_targets_mean": 322.4, "valid_targets_min": 207 }, { "epoch": 1.9901719901719903, "grad_norm": 1.2511719215897503, "learning_rate": 3.384073036390857e-05, "loss": 0.1781, "loss_nan_ranks": 0, "loss_rank_avg": 0.19224214553833008, "step": 810, "valid_targets_mean": 254.6, "valid_targets_min": 172 }, { "epoch": 2.0024570024570023, "grad_norm": 0.9810065756304504, "learning_rate": 3.373715500415667e-05, "loss": 0.1551, "loss_nan_ranks": 0, "loss_rank_avg": 0.14764966070652008, "step": 815, "valid_targets_mean": 272.2, "valid_targets_min": 192 }, { "epoch": 2.0147420147420148, "grad_norm": 1.0832130084750442, "learning_rate": 3.363287742215023e-05, "loss": 0.1397, "loss_nan_ranks": 0, "loss_rank_avg": 0.145850270986557, "step": 820, "valid_targets_mean": 297.0, "valid_targets_min": 201 }, { "epoch": 2.027027027027027, "grad_norm": 1.0816408420091737, "learning_rate": 3.352790294839898e-05, "loss": 0.14, "loss_nan_ranks": 0, "loss_rank_avg": 0.1298796385526657, "step": 825, "valid_targets_mean": 329.5, "valid_targets_min": 201 }, { "epoch": 2.039312039312039, "grad_norm": 1.1857886911136357, "learning_rate": 3.3422236949036726e-05, "loss": 0.1388, "loss_nan_ranks": 0, "loss_rank_avg": 0.13385671377182007, "step": 830, "valid_targets_mean": 315.7, "valid_targets_min": 183 }, { "epoch": 2.0515970515970516, "grad_norm": 1.087447613929306, "learning_rate": 3.331588482554697e-05, "loss": 0.1433, "loss_nan_ranks": 0, "loss_rank_avg": 0.1463298797607422, "step": 835, "valid_targets_mean": 298.2, "valid_targets_min": 185 }, { "epoch": 2.063882063882064, "grad_norm": 1.1139917820008998, "learning_rate": 3.320885201448684e-05, "loss": 0.1439, "loss_nan_ranks": 0, "loss_rank_avg": 0.13291707634925842, "step": 840, "valid_targets_mean": 277.4, "valid_targets_min": 168 }, { "epoch": 2.076167076167076, "grad_norm": 1.1543219827640314, "learning_rate": 3.310114398720917e-05, "loss": 0.1343, "loss_nan_ranks": 0, "loss_rank_avg": 0.13693857192993164, "step": 845, "valid_targets_mean": 300.4, "valid_targets_min": 210 }, { "epoch": 2.0884520884520885, "grad_norm": 1.163656064993873, "learning_rate": 3.299276624958281e-05, "loss": 0.1363, "loss_nan_ranks": 0, "loss_rank_avg": 0.1223965734243393, "step": 850, "valid_targets_mean": 296.1, "valid_targets_min": 191 }, { "epoch": 2.100737100737101, "grad_norm": 1.182907822313183, "learning_rate": 3.288372434171116e-05, "loss": 0.1464, "loss_nan_ranks": 0, "loss_rank_avg": 0.13137024641036987, "step": 855, "valid_targets_mean": 301.5, "valid_targets_min": 204 }, { "epoch": 2.113022113022113, "grad_norm": 1.130151503965982, "learning_rate": 3.2774023837648986e-05, "loss": 0.1381, "loss_nan_ranks": 0, "loss_rank_avg": 0.13580578565597534, "step": 860, "valid_targets_mean": 302.9, "valid_targets_min": 172 }, { "epoch": 2.1253071253071254, "grad_norm": 1.0682493935461657, "learning_rate": 3.26636703451175e-05, "loss": 0.132, "loss_nan_ranks": 0, "loss_rank_avg": 0.11657285690307617, "step": 865, "valid_targets_mean": 324.9, "valid_targets_min": 182 }, { "epoch": 2.1375921375921374, "grad_norm": 1.0036372900570354, "learning_rate": 3.2552669505217646e-05, "loss": 0.1379, "loss_nan_ranks": 0, "loss_rank_avg": 0.11639823764562607, "step": 870, "valid_targets_mean": 302.4, "valid_targets_min": 203 }, { "epoch": 2.14987714987715, "grad_norm": 1.0693984799892393, "learning_rate": 3.24410269921418e-05, "loss": 0.1372, "loss_nan_ranks": 0, "loss_rank_avg": 0.128000408411026, "step": 875, "valid_targets_mean": 278.4, "valid_targets_min": 203 }, { "epoch": 2.1621621621621623, "grad_norm": 1.3498639243212107, "learning_rate": 3.232874851288367e-05, "loss": 0.1429, "loss_nan_ranks": 0, "loss_rank_avg": 0.1626465916633606, "step": 880, "valid_targets_mean": 272.4, "valid_targets_min": 182 }, { "epoch": 2.1744471744471743, "grad_norm": 1.019030334994287, "learning_rate": 3.221583980694659e-05, "loss": 0.1295, "loss_nan_ranks": 0, "loss_rank_avg": 0.121977299451828, "step": 885, "valid_targets_mean": 284.4, "valid_targets_min": 175 }, { "epoch": 2.1867321867321867, "grad_norm": 1.3963541597159956, "learning_rate": 3.21023066460501e-05, "loss": 0.1398, "loss_nan_ranks": 0, "loss_rank_avg": 0.15653429925441742, "step": 890, "valid_targets_mean": 265.3, "valid_targets_min": 190 }, { "epoch": 2.199017199017199, "grad_norm": 1.1001419560754833, "learning_rate": 3.198815483383492e-05, "loss": 0.1372, "loss_nan_ranks": 0, "loss_rank_avg": 0.13391342759132385, "step": 895, "valid_targets_mean": 299.2, "valid_targets_min": 209 }, { "epoch": 2.211302211302211, "grad_norm": 1.1998871632293986, "learning_rate": 3.1873390205566295e-05, "loss": 0.1435, "loss_nan_ranks": 0, "loss_rank_avg": 0.14677861332893372, "step": 900, "valid_targets_mean": 301.2, "valid_targets_min": 188 }, { "epoch": 2.2235872235872236, "grad_norm": 1.249717461629138, "learning_rate": 3.175801862783565e-05, "loss": 0.1378, "loss_nan_ranks": 0, "loss_rank_avg": 0.1533431112766266, "step": 905, "valid_targets_mean": 309.2, "valid_targets_min": 185 }, { "epoch": 2.235872235872236, "grad_norm": 1.1284086236221715, "learning_rate": 3.164204599826077e-05, "loss": 0.1421, "loss_nan_ranks": 0, "loss_rank_avg": 0.126274973154068, "step": 910, "valid_targets_mean": 335.9, "valid_targets_min": 203 }, { "epoch": 2.248157248157248, "grad_norm": 1.0620334936290645, "learning_rate": 3.1525478245184245e-05, "loss": 0.129, "loss_nan_ranks": 0, "loss_rank_avg": 0.12917059659957886, "step": 915, "valid_targets_mean": 316.8, "valid_targets_min": 183 }, { "epoch": 2.2604422604422605, "grad_norm": 1.0690096977912291, "learning_rate": 3.140832132737051e-05, "loss": 0.1258, "loss_nan_ranks": 0, "loss_rank_avg": 0.12072303891181946, "step": 920, "valid_targets_mean": 342.7, "valid_targets_min": 187 }, { "epoch": 2.2727272727272725, "grad_norm": 1.2021793687768567, "learning_rate": 3.129058123370116e-05, "loss": 0.1429, "loss_nan_ranks": 0, "loss_rank_avg": 0.14643311500549316, "step": 925, "valid_targets_mean": 263.2, "valid_targets_min": 162 }, { "epoch": 2.285012285012285, "grad_norm": 0.9817362358524011, "learning_rate": 3.117226398286887e-05, "loss": 0.1394, "loss_nan_ranks": 0, "loss_rank_avg": 0.1279481202363968, "step": 930, "valid_targets_mean": 328.6, "valid_targets_min": 186 }, { "epoch": 2.2972972972972974, "grad_norm": 1.2051439570322329, "learning_rate": 3.105337562306968e-05, "loss": 0.138, "loss_nan_ranks": 0, "loss_rank_avg": 0.14488676190376282, "step": 935, "valid_targets_mean": 273.9, "valid_targets_min": 194 }, { "epoch": 2.30958230958231, "grad_norm": 1.236658816602863, "learning_rate": 3.0933922231693854e-05, "loss": 0.1443, "loss_nan_ranks": 0, "loss_rank_avg": 0.15047407150268555, "step": 940, "valid_targets_mean": 335.5, "valid_targets_min": 194 }, { "epoch": 2.321867321867322, "grad_norm": 1.1126426151331683, "learning_rate": 3.08139099150152e-05, "loss": 0.1388, "loss_nan_ranks": 0, "loss_rank_avg": 0.13728468120098114, "step": 945, "valid_targets_mean": 305.1, "valid_targets_min": 186 }, { "epoch": 2.3341523341523343, "grad_norm": 1.149340566785369, "learning_rate": 3.069334480787893e-05, "loss": 0.1404, "loss_nan_ranks": 0, "loss_rank_avg": 0.13989925384521484, "step": 950, "valid_targets_mean": 276.3, "valid_targets_min": 190 }, { "epoch": 2.3464373464373462, "grad_norm": 1.16144894754095, "learning_rate": 3.057223307338806e-05, "loss": 0.135, "loss_nan_ranks": 0, "loss_rank_avg": 0.13708898425102234, "step": 955, "valid_targets_mean": 310.6, "valid_targets_min": 177 }, { "epoch": 2.3587223587223587, "grad_norm": 1.306012028805935, "learning_rate": 3.0450580902588346e-05, "loss": 0.1442, "loss_nan_ranks": 0, "loss_rank_avg": 0.15716275572776794, "step": 960, "valid_targets_mean": 268.7, "valid_targets_min": 201 }, { "epoch": 2.371007371007371, "grad_norm": 0.9694750467348594, "learning_rate": 3.032839451415182e-05, "loss": 0.1347, "loss_nan_ranks": 0, "loss_rank_avg": 0.12117061018943787, "step": 965, "valid_targets_mean": 349.0, "valid_targets_min": 203 }, { "epoch": 2.383292383292383, "grad_norm": 1.077532460614147, "learning_rate": 3.0205680154058904e-05, "loss": 0.1323, "loss_nan_ranks": 0, "loss_rank_avg": 0.1403960883617401, "step": 970, "valid_targets_mean": 277.4, "valid_targets_min": 205 }, { "epoch": 2.3955773955773956, "grad_norm": 1.2501839738408294, "learning_rate": 3.0082444095279117e-05, "loss": 0.1373, "loss_nan_ranks": 0, "loss_rank_avg": 0.14994065463542938, "step": 975, "valid_targets_mean": 291.8, "valid_targets_min": 189 }, { "epoch": 2.407862407862408, "grad_norm": 1.1037335085331434, "learning_rate": 2.9958692637450406e-05, "loss": 0.1377, "loss_nan_ranks": 0, "loss_rank_avg": 0.13044089078903198, "step": 980, "valid_targets_mean": 290.8, "valid_targets_min": 201 }, { "epoch": 2.42014742014742, "grad_norm": 1.1604373221400255, "learning_rate": 2.983443210655714e-05, "loss": 0.1356, "loss_nan_ranks": 0, "loss_rank_avg": 0.15031030774116516, "step": 985, "valid_targets_mean": 255.2, "valid_targets_min": 179 }, { "epoch": 2.4324324324324325, "grad_norm": 1.0697447874208066, "learning_rate": 2.9709668854606706e-05, "loss": 0.1433, "loss_nan_ranks": 0, "loss_rank_avg": 0.13531816005706787, "step": 990, "valid_targets_mean": 304.6, "valid_targets_min": 210 }, { "epoch": 2.444717444717445, "grad_norm": 1.1703824846772444, "learning_rate": 2.9584409259304828e-05, "loss": 0.1466, "loss_nan_ranks": 0, "loss_rank_avg": 0.140241801738739, "step": 995, "valid_targets_mean": 268.0, "valid_targets_min": 167 }, { "epoch": 2.457002457002457, "grad_norm": 1.2307201377299632, "learning_rate": 2.945865972372954e-05, "loss": 0.1486, "loss_nan_ranks": 0, "loss_rank_avg": 0.1605660617351532, "step": 1000, "valid_targets_mean": 303.4, "valid_targets_min": 182 }, { "epoch": 2.4692874692874693, "grad_norm": 1.178328076326629, "learning_rate": 2.9332426676003858e-05, "loss": 0.1382, "loss_nan_ranks": 0, "loss_rank_avg": 0.1286371350288391, "step": 1005, "valid_targets_mean": 304.6, "valid_targets_min": 182 }, { "epoch": 2.4815724815724813, "grad_norm": 1.0997521874078315, "learning_rate": 2.920571656896722e-05, "loss": 0.1386, "loss_nan_ranks": 0, "loss_rank_avg": 0.127725288271904, "step": 1010, "valid_targets_mean": 303.4, "valid_targets_min": 158 }, { "epoch": 2.493857493857494, "grad_norm": 1.3370190061741811, "learning_rate": 2.907853587984558e-05, "loss": 0.1396, "loss_nan_ranks": 0, "loss_rank_avg": 0.15273752808570862, "step": 1015, "valid_targets_mean": 261.4, "valid_targets_min": 184 }, { "epoch": 2.506142506142506, "grad_norm": 1.130222461608068, "learning_rate": 2.8950891109920333e-05, "loss": 0.1453, "loss_nan_ranks": 0, "loss_rank_avg": 0.1348942667245865, "step": 1020, "valid_targets_mean": 268.6, "valid_targets_min": 188 }, { "epoch": 2.5184275184275187, "grad_norm": 1.223455844173044, "learning_rate": 2.882278878419597e-05, "loss": 0.1433, "loss_nan_ranks": 0, "loss_rank_avg": 0.14979299902915955, "step": 1025, "valid_targets_mean": 274.8, "valid_targets_min": 195 }, { "epoch": 2.5307125307125307, "grad_norm": 1.306715744900679, "learning_rate": 2.8694235451066538e-05, "loss": 0.1454, "loss_nan_ranks": 0, "loss_rank_avg": 0.15734708309173584, "step": 1030, "valid_targets_mean": 270.6, "valid_targets_min": 183 }, { "epoch": 2.542997542997543, "grad_norm": 1.1955269745966193, "learning_rate": 2.8565237681980876e-05, "loss": 0.1354, "loss_nan_ranks": 0, "loss_rank_avg": 0.15619954466819763, "step": 1035, "valid_targets_mean": 297.4, "valid_targets_min": 194 }, { "epoch": 2.555282555282555, "grad_norm": 1.1195365668632218, "learning_rate": 2.843580207110672e-05, "loss": 0.1354, "loss_nan_ranks": 0, "loss_rank_avg": 0.1358446478843689, "step": 1040, "valid_targets_mean": 278.7, "valid_targets_min": 189 }, { "epoch": 2.5675675675675675, "grad_norm": 1.321459683455316, "learning_rate": 2.830593523499361e-05, "loss": 0.1442, "loss_nan_ranks": 0, "loss_rank_avg": 0.15149277448654175, "step": 1045, "valid_targets_mean": 256.7, "valid_targets_min": 178 }, { "epoch": 2.57985257985258, "grad_norm": 1.2375248064739832, "learning_rate": 2.8175643812234627e-05, "loss": 0.1391, "loss_nan_ranks": 0, "loss_rank_avg": 0.1537741720676422, "step": 1050, "valid_targets_mean": 253.6, "valid_targets_min": 167 }, { "epoch": 2.592137592137592, "grad_norm": 1.070320079730073, "learning_rate": 2.8044934463127108e-05, "loss": 0.1379, "loss_nan_ranks": 0, "loss_rank_avg": 0.13954317569732666, "step": 1055, "valid_targets_mean": 302.8, "valid_targets_min": 219 }, { "epoch": 2.6044226044226044, "grad_norm": 1.226285762608703, "learning_rate": 2.7913813869332112e-05, "loss": 0.1376, "loss_nan_ranks": 0, "loss_rank_avg": 0.13144347071647644, "step": 1060, "valid_targets_mean": 280.4, "valid_targets_min": 173 }, { "epoch": 2.616707616707617, "grad_norm": 1.0298534195529359, "learning_rate": 2.7782288733532915e-05, "loss": 0.1303, "loss_nan_ranks": 0, "loss_rank_avg": 0.12828901410102844, "step": 1065, "valid_targets_mean": 313.4, "valid_targets_min": 189 }, { "epoch": 2.628992628992629, "grad_norm": 1.2743909648639802, "learning_rate": 2.7650365779092346e-05, "loss": 0.1279, "loss_nan_ranks": 0, "loss_rank_avg": 0.1474323272705078, "step": 1070, "valid_targets_mean": 267.9, "valid_targets_min": 168 }, { "epoch": 2.6412776412776413, "grad_norm": 1.1981880858241123, "learning_rate": 2.751805174970912e-05, "loss": 0.1416, "loss_nan_ranks": 0, "loss_rank_avg": 0.15064576268196106, "step": 1075, "valid_targets_mean": 277.5, "valid_targets_min": 194 }, { "epoch": 2.6535626535626538, "grad_norm": 1.2419364525532464, "learning_rate": 2.7385353409073093e-05, "loss": 0.1335, "loss_nan_ranks": 0, "loss_rank_avg": 0.1485663503408432, "step": 1080, "valid_targets_mean": 283.0, "valid_targets_min": 177 }, { "epoch": 2.6658476658476657, "grad_norm": 1.3697656018434559, "learning_rate": 2.725227754051953e-05, "loss": 0.1422, "loss_nan_ranks": 0, "loss_rank_avg": 0.16271407902240753, "step": 1085, "valid_targets_mean": 288.6, "valid_targets_min": 156 }, { "epoch": 2.678132678132678, "grad_norm": 1.108260161264064, "learning_rate": 2.711883094668234e-05, "loss": 0.1287, "loss_nan_ranks": 0, "loss_rank_avg": 0.1151181161403656, "step": 1090, "valid_targets_mean": 346.9, "valid_targets_min": 185 }, { "epoch": 2.69041769041769, "grad_norm": 1.1338453430908275, "learning_rate": 2.698502044914633e-05, "loss": 0.1413, "loss_nan_ranks": 0, "loss_rank_avg": 0.1397257149219513, "step": 1095, "valid_targets_mean": 296.2, "valid_targets_min": 193 }, { "epoch": 2.7027027027027026, "grad_norm": 1.0574467892137083, "learning_rate": 2.685085288809853e-05, "loss": 0.1376, "loss_nan_ranks": 0, "loss_rank_avg": 0.12416990846395493, "step": 1100, "valid_targets_mean": 306.5, "valid_targets_min": 174 }, { "epoch": 2.714987714987715, "grad_norm": 1.229195441664209, "learning_rate": 2.671633512197848e-05, "loss": 0.1388, "loss_nan_ranks": 0, "loss_rank_avg": 0.1532745659351349, "step": 1105, "valid_targets_mean": 278.6, "valid_targets_min": 166 }, { "epoch": 2.7272727272727275, "grad_norm": 1.3476772293258508, "learning_rate": 2.658147402712768e-05, "loss": 0.1521, "loss_nan_ranks": 0, "loss_rank_avg": 0.15728528797626495, "step": 1110, "valid_targets_mean": 268.3, "valid_targets_min": 165 }, { "epoch": 2.7395577395577395, "grad_norm": 1.0536447278496572, "learning_rate": 2.6446276497438064e-05, "loss": 0.1261, "loss_nan_ranks": 0, "loss_rank_avg": 0.1281460076570511, "step": 1115, "valid_targets_mean": 314.3, "valid_targets_min": 179 }, { "epoch": 2.751842751842752, "grad_norm": 1.0997586864982594, "learning_rate": 2.6310749443999593e-05, "loss": 0.1338, "loss_nan_ranks": 0, "loss_rank_avg": 0.13919095695018768, "step": 1120, "valid_targets_mean": 326.6, "valid_targets_min": 214 }, { "epoch": 2.764127764127764, "grad_norm": 1.1307158631354388, "learning_rate": 2.617489979474699e-05, "loss": 0.1278, "loss_nan_ranks": 0, "loss_rank_avg": 0.13263410329818726, "step": 1125, "valid_targets_mean": 319.2, "valid_targets_min": 181 }, { "epoch": 2.7764127764127764, "grad_norm": 1.2240870505492338, "learning_rate": 2.6038734494105562e-05, "loss": 0.1423, "loss_nan_ranks": 0, "loss_rank_avg": 0.15749479830265045, "step": 1130, "valid_targets_mean": 268.1, "valid_targets_min": 149 }, { "epoch": 2.788697788697789, "grad_norm": 1.0504853512071426, "learning_rate": 2.590226050263625e-05, "loss": 0.13, "loss_nan_ranks": 0, "loss_rank_avg": 0.11952055245637894, "step": 1135, "valid_targets_mean": 284.1, "valid_targets_min": 177 }, { "epoch": 2.800982800982801, "grad_norm": 1.2839552963232503, "learning_rate": 2.5765484796679768e-05, "loss": 0.1442, "loss_nan_ranks": 0, "loss_rank_avg": 0.1575552225112915, "step": 1140, "valid_targets_mean": 322.7, "valid_targets_min": 169 }, { "epoch": 2.8132678132678133, "grad_norm": 1.1134569825245064, "learning_rate": 2.5628414368000035e-05, "loss": 0.1341, "loss_nan_ranks": 0, "loss_rank_avg": 0.12892982363700867, "step": 1145, "valid_targets_mean": 278.6, "valid_targets_min": 209 }, { "epoch": 2.8255528255528253, "grad_norm": 1.34793219985213, "learning_rate": 2.5491056223426746e-05, "loss": 0.1393, "loss_nan_ranks": 0, "loss_rank_avg": 0.16551215946674347, "step": 1150, "valid_targets_mean": 248.4, "valid_targets_min": 182 }, { "epoch": 2.8378378378378377, "grad_norm": 1.1534651703203966, "learning_rate": 2.5353417384497166e-05, "loss": 0.1435, "loss_nan_ranks": 0, "loss_rank_avg": 0.14097918570041656, "step": 1155, "valid_targets_mean": 297.6, "valid_targets_min": 196 }, { "epoch": 2.85012285012285, "grad_norm": 1.261367316657266, "learning_rate": 2.5215504887097243e-05, "loss": 0.1376, "loss_nan_ranks": 0, "loss_rank_avg": 0.14638900756835938, "step": 1160, "valid_targets_mean": 311.0, "valid_targets_min": 189 }, { "epoch": 2.8624078624078626, "grad_norm": 1.0612956089832728, "learning_rate": 2.5077325781101918e-05, "loss": 0.1353, "loss_nan_ranks": 0, "loss_rank_avg": 0.12194982171058655, "step": 1165, "valid_targets_mean": 306.9, "valid_targets_min": 192 }, { "epoch": 2.8746928746928746, "grad_norm": 0.9573839895843145, "learning_rate": 2.493888713001476e-05, "loss": 0.1363, "loss_nan_ranks": 0, "loss_rank_avg": 0.12127895653247833, "step": 1170, "valid_targets_mean": 330.6, "valid_targets_min": 198 }, { "epoch": 2.886977886977887, "grad_norm": 1.0029010712452617, "learning_rate": 2.480019601060687e-05, "loss": 0.1264, "loss_nan_ranks": 0, "loss_rank_avg": 0.13356783986091614, "step": 1175, "valid_targets_mean": 308.8, "valid_targets_min": 173 }, { "epoch": 2.899262899262899, "grad_norm": 1.1730473596633058, "learning_rate": 2.4661259512555176e-05, "loss": 0.1444, "loss_nan_ranks": 0, "loss_rank_avg": 0.14171163737773895, "step": 1180, "valid_targets_mean": 261.3, "valid_targets_min": 192 }, { "epoch": 2.9115479115479115, "grad_norm": 1.219814383999696, "learning_rate": 2.4522084738079933e-05, "loss": 0.1408, "loss_nan_ranks": 0, "loss_rank_avg": 0.13806191086769104, "step": 1185, "valid_targets_mean": 291.4, "valid_targets_min": 174 }, { "epoch": 2.923832923832924, "grad_norm": 1.1534924825640005, "learning_rate": 2.4382678801581762e-05, "loss": 0.1349, "loss_nan_ranks": 0, "loss_rank_avg": 0.1354420781135559, "step": 1190, "valid_targets_mean": 263.6, "valid_targets_min": 171 }, { "epoch": 2.9361179361179364, "grad_norm": 1.1670933714140332, "learning_rate": 2.4243048829277916e-05, "loss": 0.135, "loss_nan_ranks": 0, "loss_rank_avg": 0.1276843249797821, "step": 1195, "valid_targets_mean": 271.1, "valid_targets_min": 192 }, { "epoch": 2.9484029484029484, "grad_norm": 1.000139209592356, "learning_rate": 2.410320195883802e-05, "loss": 0.1291, "loss_nan_ranks": 0, "loss_rank_avg": 0.12035230547189713, "step": 1200, "valid_targets_mean": 304.2, "valid_targets_min": 196 }, { "epoch": 2.960687960687961, "grad_norm": 1.0589710104373824, "learning_rate": 2.396314533901918e-05, "loss": 0.1405, "loss_nan_ranks": 0, "loss_rank_avg": 0.13579292595386505, "step": 1205, "valid_targets_mean": 269.4, "valid_targets_min": 191 }, { "epoch": 2.972972972972973, "grad_norm": 1.0933308214917397, "learning_rate": 2.3822886129300603e-05, "loss": 0.1409, "loss_nan_ranks": 0, "loss_rank_avg": 0.12709414958953857, "step": 1210, "valid_targets_mean": 273.6, "valid_targets_min": 179 }, { "epoch": 2.9852579852579852, "grad_norm": 1.246593371319452, "learning_rate": 2.368243149951755e-05, "loss": 0.1424, "loss_nan_ranks": 0, "loss_rank_avg": 0.14195743203163147, "step": 1215, "valid_targets_mean": 269.2, "valid_targets_min": 181 }, { "epoch": 2.9975429975429977, "grad_norm": 1.104989340112623, "learning_rate": 2.3541788629494865e-05, "loss": 0.1359, "loss_nan_ranks": 0, "loss_rank_avg": 0.128178209066391, "step": 1220, "valid_targets_mean": 255.1, "valid_targets_min": 206 }, { "epoch": 3.0098280098280097, "grad_norm": 0.9174107036928594, "learning_rate": 2.3400964708679944e-05, "loss": 0.1057, "loss_nan_ranks": 0, "loss_rank_avg": 0.0901394933462143, "step": 1225, "valid_targets_mean": 303.4, "valid_targets_min": 185 }, { "epoch": 3.022113022113022, "grad_norm": 1.4031956873372264, "learning_rate": 2.325996693577522e-05, "loss": 0.1053, "loss_nan_ranks": 0, "loss_rank_avg": 0.12791605293750763, "step": 1230, "valid_targets_mean": 263.9, "valid_targets_min": 179 }, { "epoch": 3.0343980343980346, "grad_norm": 1.2876603734999117, "learning_rate": 2.311880251837019e-05, "loss": 0.0988, "loss_nan_ranks": 0, "loss_rank_avg": 0.09891209006309509, "step": 1235, "valid_targets_mean": 319.4, "valid_targets_min": 196 }, { "epoch": 3.0466830466830466, "grad_norm": 1.3099439525946897, "learning_rate": 2.2977478672572933e-05, "loss": 0.1007, "loss_nan_ranks": 0, "loss_rank_avg": 0.10527482628822327, "step": 1240, "valid_targets_mean": 301.1, "valid_targets_min": 189 }, { "epoch": 3.058968058968059, "grad_norm": 1.0549546106795953, "learning_rate": 2.2836002622641297e-05, "loss": 0.1014, "loss_nan_ranks": 0, "loss_rank_avg": 0.09528301656246185, "step": 1245, "valid_targets_mean": 357.3, "valid_targets_min": 206 }, { "epoch": 3.0712530712530715, "grad_norm": 1.226669517435733, "learning_rate": 2.269438160061354e-05, "loss": 0.1051, "loss_nan_ranks": 0, "loss_rank_avg": 0.11834204196929932, "step": 1250, "valid_targets_mean": 266.8, "valid_targets_min": 173 }, { "epoch": 3.0835380835380835, "grad_norm": 1.3243450281286668, "learning_rate": 2.2552622845938698e-05, "loss": 0.1048, "loss_nan_ranks": 0, "loss_rank_avg": 0.10531710088253021, "step": 1255, "valid_targets_mean": 254.6, "valid_targets_min": 175 }, { "epoch": 3.095823095823096, "grad_norm": 1.5337097359951875, "learning_rate": 2.2410733605106462e-05, "loss": 0.1039, "loss_nan_ranks": 0, "loss_rank_avg": 0.10514850914478302, "step": 1260, "valid_targets_mean": 310.8, "valid_targets_min": 187 }, { "epoch": 3.108108108108108, "grad_norm": 1.3071049484438217, "learning_rate": 2.2268721131276805e-05, "loss": 0.0963, "loss_nan_ranks": 0, "loss_rank_avg": 0.11002233624458313, "step": 1265, "valid_targets_mean": 303.2, "valid_targets_min": 194 }, { "epoch": 3.1203931203931203, "grad_norm": 1.0890014875258, "learning_rate": 2.2126592683909154e-05, "loss": 0.0985, "loss_nan_ranks": 0, "loss_rank_avg": 0.08064033836126328, "step": 1270, "valid_targets_mean": 355.3, "valid_targets_min": 188 }, { "epoch": 3.1326781326781328, "grad_norm": 1.3731767303334204, "learning_rate": 2.1984355528391342e-05, "loss": 0.106, "loss_nan_ranks": 0, "loss_rank_avg": 0.1141047328710556, "step": 1275, "valid_targets_mean": 288.5, "valid_targets_min": 188 }, { "epoch": 3.1449631449631448, "grad_norm": 1.2372245335962193, "learning_rate": 2.1842016935668188e-05, "loss": 0.1015, "loss_nan_ranks": 0, "loss_rank_avg": 0.10686539113521576, "step": 1280, "valid_targets_mean": 289.7, "valid_targets_min": 211 }, { "epoch": 3.157248157248157, "grad_norm": 1.124434316718543, "learning_rate": 2.169958418186982e-05, "loss": 0.1077, "loss_nan_ranks": 0, "loss_rank_avg": 0.08988925069570541, "step": 1285, "valid_targets_mean": 309.7, "valid_targets_min": 180 }, { "epoch": 3.1695331695331697, "grad_norm": 1.0836448070538232, "learning_rate": 2.1557064547939754e-05, "loss": 0.1027, "loss_nan_ranks": 0, "loss_rank_avg": 0.09657120704650879, "step": 1290, "valid_targets_mean": 300.7, "valid_targets_min": 176 }, { "epoch": 3.1818181818181817, "grad_norm": 1.1091160842733732, "learning_rate": 2.1414465319262666e-05, "loss": 0.106, "loss_nan_ranks": 0, "loss_rank_avg": 0.10136106610298157, "step": 1295, "valid_targets_mean": 295.6, "valid_targets_min": 196 }, { "epoch": 3.194103194103194, "grad_norm": 1.3178578011667708, "learning_rate": 2.1271793785291997e-05, "loss": 0.1052, "loss_nan_ranks": 0, "loss_rank_avg": 0.11085499823093414, "step": 1300, "valid_targets_mean": 257.8, "valid_targets_min": 175 }, { "epoch": 3.2063882063882065, "grad_norm": 1.2836613484013242, "learning_rate": 2.1129057239177337e-05, "loss": 0.1075, "loss_nan_ranks": 0, "loss_rank_avg": 0.11580312252044678, "step": 1305, "valid_targets_mean": 283.7, "valid_targets_min": 163 }, { "epoch": 3.2186732186732185, "grad_norm": 1.157669707874955, "learning_rate": 2.0986262977391577e-05, "loss": 0.107, "loss_nan_ranks": 0, "loss_rank_avg": 0.09582555294036865, "step": 1310, "valid_targets_mean": 319.4, "valid_targets_min": 179 }, { "epoch": 3.230958230958231, "grad_norm": 1.0914144808119468, "learning_rate": 2.084341829935796e-05, "loss": 0.1036, "loss_nan_ranks": 0, "loss_rank_avg": 0.0956360250711441, "step": 1315, "valid_targets_mean": 344.6, "valid_targets_min": 214 }, { "epoch": 3.2432432432432434, "grad_norm": 1.119565619384796, "learning_rate": 2.0700530507076916e-05, "loss": 0.1011, "loss_nan_ranks": 0, "loss_rank_avg": 0.09766976535320282, "step": 1320, "valid_targets_mean": 305.0, "valid_targets_min": 195 }, { "epoch": 3.2555282555282554, "grad_norm": 1.3418617083928392, "learning_rate": 2.0557606904752833e-05, "loss": 0.1015, "loss_nan_ranks": 0, "loss_rank_avg": 0.09817899018526077, "step": 1325, "valid_targets_mean": 297.9, "valid_targets_min": 185 }, { "epoch": 3.267813267813268, "grad_norm": 1.2628854876075473, "learning_rate": 2.0414654798420622e-05, "loss": 0.097, "loss_nan_ranks": 0, "loss_rank_avg": 0.10978299379348755, "step": 1330, "valid_targets_mean": 282.4, "valid_targets_min": 165 }, { "epoch": 3.2800982800982803, "grad_norm": 1.0757283605914358, "learning_rate": 2.02716814955723e-05, "loss": 0.0965, "loss_nan_ranks": 0, "loss_rank_avg": 0.09076326340436935, "step": 1335, "valid_targets_mean": 295.6, "valid_targets_min": 192 }, { "epoch": 3.2923832923832923, "grad_norm": 1.177539412801105, "learning_rate": 2.0128694304783406e-05, "loss": 0.098, "loss_nan_ranks": 0, "loss_rank_avg": 0.09272335469722748, "step": 1340, "valid_targets_mean": 335.2, "valid_targets_min": 181 }, { "epoch": 3.3046683046683047, "grad_norm": 1.2164995300137498, "learning_rate": 1.9985700535339406e-05, "loss": 0.1043, "loss_nan_ranks": 0, "loss_rank_avg": 0.10772737115621567, "step": 1345, "valid_targets_mean": 276.4, "valid_targets_min": 185 }, { "epoch": 3.3169533169533167, "grad_norm": 1.23990141162592, "learning_rate": 1.984270749686207e-05, "loss": 0.1013, "loss_nan_ranks": 0, "loss_rank_avg": 0.0992065817117691, "step": 1350, "valid_targets_mean": 254.6, "valid_targets_min": 188 }, { "epoch": 3.329238329238329, "grad_norm": 1.224349687588914, "learning_rate": 1.9699722498935786e-05, "loss": 0.103, "loss_nan_ranks": 0, "loss_rank_avg": 0.10771994292736053, "step": 1355, "valid_targets_mean": 294.9, "valid_targets_min": 173 }, { "epoch": 3.3415233415233416, "grad_norm": 1.2178041513707756, "learning_rate": 1.9556752850733933e-05, "loss": 0.1019, "loss_nan_ranks": 0, "loss_rank_avg": 0.09480111300945282, "step": 1360, "valid_targets_mean": 291.6, "valid_targets_min": 189 }, { "epoch": 3.3538083538083536, "grad_norm": 1.0500619674987501, "learning_rate": 1.9413805860645242e-05, "loss": 0.0991, "loss_nan_ranks": 0, "loss_rank_avg": 0.09143294394016266, "step": 1365, "valid_targets_mean": 339.4, "valid_targets_min": 162 }, { "epoch": 3.366093366093366, "grad_norm": 1.1448861560017454, "learning_rate": 1.9270888835900165e-05, "loss": 0.1042, "loss_nan_ranks": 0, "loss_rank_avg": 0.10382906347513199, "step": 1370, "valid_targets_mean": 304.5, "valid_targets_min": 190 }, { "epoch": 3.3783783783783785, "grad_norm": 1.2424280432346486, "learning_rate": 1.9128009082197417e-05, "loss": 0.109, "loss_nan_ranks": 0, "loss_rank_avg": 0.10171740502119064, "step": 1375, "valid_targets_mean": 289.4, "valid_targets_min": 204 }, { "epoch": 3.3906633906633905, "grad_norm": 1.2194278524387607, "learning_rate": 1.8985173903330428e-05, "loss": 0.099, "loss_nan_ranks": 0, "loss_rank_avg": 0.09036492556333542, "step": 1380, "valid_targets_mean": 326.1, "valid_targets_min": 193 }, { "epoch": 3.402948402948403, "grad_norm": 1.2403890522071876, "learning_rate": 1.884239060081407e-05, "loss": 0.112, "loss_nan_ranks": 0, "loss_rank_avg": 0.10593004524707794, "step": 1385, "valid_targets_mean": 297.9, "valid_targets_min": 163 }, { "epoch": 3.4152334152334154, "grad_norm": 1.284221290714163, "learning_rate": 1.869966647351135e-05, "loss": 0.1025, "loss_nan_ranks": 0, "loss_rank_avg": 0.10611852258443832, "step": 1390, "valid_targets_mean": 291.4, "valid_targets_min": 186 }, { "epoch": 3.4275184275184274, "grad_norm": 1.1976785290411085, "learning_rate": 1.8557008817260343e-05, "loss": 0.1072, "loss_nan_ranks": 0, "loss_rank_avg": 0.10480299592018127, "step": 1395, "valid_targets_mean": 309.3, "valid_targets_min": 170 }, { "epoch": 3.43980343980344, "grad_norm": 1.3395859024253465, "learning_rate": 1.8414424924501222e-05, "loss": 0.1057, "loss_nan_ranks": 0, "loss_rank_avg": 0.1136770099401474, "step": 1400, "valid_targets_mean": 243.6, "valid_targets_min": 161 }, { "epoch": 3.4520884520884523, "grad_norm": 1.1431421292095074, "learning_rate": 1.827192208390347e-05, "loss": 0.1067, "loss_nan_ranks": 0, "loss_rank_avg": 0.09702304005622864, "step": 1405, "valid_targets_mean": 313.2, "valid_targets_min": 187 }, { "epoch": 3.4643734643734643, "grad_norm": 1.1527212532389097, "learning_rate": 1.812950757999334e-05, "loss": 0.1016, "loss_nan_ranks": 0, "loss_rank_avg": 0.09603433310985565, "step": 1410, "valid_targets_mean": 281.2, "valid_targets_min": 178 }, { "epoch": 3.4766584766584767, "grad_norm": 1.327640730439236, "learning_rate": 1.7987188692781417e-05, "loss": 0.1077, "loss_nan_ranks": 0, "loss_rank_avg": 0.1130312979221344, "step": 1415, "valid_targets_mean": 287.2, "valid_targets_min": 192 }, { "epoch": 3.488943488943489, "grad_norm": 1.141559540924862, "learning_rate": 1.784497269739052e-05, "loss": 0.1042, "loss_nan_ranks": 0, "loss_rank_avg": 0.09319710731506348, "step": 1420, "valid_targets_mean": 279.4, "valid_targets_min": 167 }, { "epoch": 3.501228501228501, "grad_norm": 1.1629209220311847, "learning_rate": 1.770286686368381e-05, "loss": 0.0969, "loss_nan_ranks": 0, "loss_rank_avg": 0.09901179373264313, "step": 1425, "valid_targets_mean": 294.4, "valid_targets_min": 171 }, { "epoch": 3.5135135135135136, "grad_norm": 1.180410489173477, "learning_rate": 1.756087845589312e-05, "loss": 0.0942, "loss_nan_ranks": 0, "loss_rank_avg": 0.0902223140001297, "step": 1430, "valid_targets_mean": 284.7, "valid_targets_min": 181 }, { "epoch": 3.5257985257985256, "grad_norm": 1.38818411074106, "learning_rate": 1.7419014732247683e-05, "loss": 0.1075, "loss_nan_ranks": 0, "loss_rank_avg": 0.11814382672309875, "step": 1435, "valid_targets_mean": 293.9, "valid_targets_min": 190 }, { "epoch": 3.538083538083538, "grad_norm": 1.497528211110738, "learning_rate": 1.7277282944603047e-05, "loss": 0.1069, "loss_nan_ranks": 0, "loss_rank_avg": 0.10668017715215683, "step": 1440, "valid_targets_mean": 300.6, "valid_targets_min": 180 }, { "epoch": 3.5503685503685505, "grad_norm": 1.3768871092990174, "learning_rate": 1.713569033807041e-05, "loss": 0.1041, "loss_nan_ranks": 0, "loss_rank_avg": 0.1031336635351181, "step": 1445, "valid_targets_mean": 305.5, "valid_targets_min": 174 }, { "epoch": 3.562653562653563, "grad_norm": 1.2965006822738452, "learning_rate": 1.6994244150646244e-05, "loss": 0.1025, "loss_nan_ranks": 0, "loss_rank_avg": 0.11115489900112152, "step": 1450, "valid_targets_mean": 236.1, "valid_targets_min": 177 }, { "epoch": 3.574938574938575, "grad_norm": 1.0207576469807158, "learning_rate": 1.6852951612842278e-05, "loss": 0.0968, "loss_nan_ranks": 0, "loss_rank_avg": 0.07798905670642853, "step": 1455, "valid_targets_mean": 351.5, "valid_targets_min": 213 }, { "epoch": 3.5872235872235874, "grad_norm": 1.2663016502211655, "learning_rate": 1.671181994731595e-05, "loss": 0.1078, "loss_nan_ranks": 0, "loss_rank_avg": 0.09896829724311829, "step": 1460, "valid_targets_mean": 327.7, "valid_targets_min": 195 }, { "epoch": 3.5995085995085994, "grad_norm": 1.1603273895721846, "learning_rate": 1.6570856368501108e-05, "loss": 0.1033, "loss_nan_ranks": 0, "loss_rank_avg": 0.09762662649154663, "step": 1465, "valid_targets_mean": 329.1, "valid_targets_min": 190 }, { "epoch": 3.611793611793612, "grad_norm": 1.3032355328488239, "learning_rate": 1.643006808223931e-05, "loss": 0.1033, "loss_nan_ranks": 0, "loss_rank_avg": 0.08669596910476685, "step": 1470, "valid_targets_mean": 263.8, "valid_targets_min": 181 }, { "epoch": 3.6240786240786242, "grad_norm": 1.1032373288798538, "learning_rate": 1.6289462285411387e-05, "loss": 0.1044, "loss_nan_ranks": 0, "loss_rank_avg": 0.09125551581382751, "step": 1475, "valid_targets_mean": 327.2, "valid_targets_min": 207 }, { "epoch": 3.6363636363636362, "grad_norm": 1.4782726019764998, "learning_rate": 1.614904616556962e-05, "loss": 0.1, "loss_nan_ranks": 0, "loss_rank_avg": 0.10747089982032776, "step": 1480, "valid_targets_mean": 281.1, "valid_targets_min": 173 }, { "epoch": 3.6486486486486487, "grad_norm": 1.1927945262557165, "learning_rate": 1.6008826900570294e-05, "loss": 0.0994, "loss_nan_ranks": 0, "loss_rank_avg": 0.09641379117965698, "step": 1485, "valid_targets_mean": 292.8, "valid_targets_min": 158 }, { "epoch": 3.6609336609336607, "grad_norm": 1.2166289288548813, "learning_rate": 1.586881165820675e-05, "loss": 0.1061, "loss_nan_ranks": 0, "loss_rank_avg": 0.1032664030790329, "step": 1490, "valid_targets_mean": 283.4, "valid_targets_min": 196 }, { "epoch": 3.673218673218673, "grad_norm": 1.1305403274954269, "learning_rate": 1.5729007595843037e-05, "loss": 0.1038, "loss_nan_ranks": 0, "loss_rank_avg": 0.09991315007209778, "step": 1495, "valid_targets_mean": 281.9, "valid_targets_min": 200 }, { "epoch": 3.6855036855036856, "grad_norm": 1.2568587216169027, "learning_rate": 1.5589421860047986e-05, "loss": 0.1014, "loss_nan_ranks": 0, "loss_rank_avg": 0.10881330072879791, "step": 1500, "valid_targets_mean": 297.8, "valid_targets_min": 183 }, { "epoch": 3.697788697788698, "grad_norm": 1.1760753565070237, "learning_rate": 1.5450061586229903e-05, "loss": 0.1013, "loss_nan_ranks": 0, "loss_rank_avg": 0.09993046522140503, "step": 1505, "valid_targets_mean": 300.7, "valid_targets_min": 197 }, { "epoch": 3.71007371007371, "grad_norm": 1.182258478088467, "learning_rate": 1.5310933898271864e-05, "loss": 0.0966, "loss_nan_ranks": 0, "loss_rank_avg": 0.09755405783653259, "step": 1510, "valid_targets_mean": 291.5, "valid_targets_min": 189 }, { "epoch": 3.7223587223587224, "grad_norm": 1.2773987842414156, "learning_rate": 1.5172045908167462e-05, "loss": 0.1002, "loss_nan_ranks": 0, "loss_rank_avg": 0.10242050141096115, "step": 1515, "valid_targets_mean": 318.1, "valid_targets_min": 182 }, { "epoch": 3.7346437346437344, "grad_norm": 1.241969935062418, "learning_rate": 1.5033404715657344e-05, "loss": 0.1017, "loss_nan_ranks": 0, "loss_rank_avg": 0.10109234601259232, "step": 1520, "valid_targets_mean": 278.4, "valid_targets_min": 191 }, { "epoch": 3.746928746928747, "grad_norm": 1.2524647908620423, "learning_rate": 1.4895017407866217e-05, "loss": 0.1037, "loss_nan_ranks": 0, "loss_rank_avg": 0.1093701645731926, "step": 1525, "valid_targets_mean": 277.9, "valid_targets_min": 184 }, { "epoch": 3.7592137592137593, "grad_norm": 1.2411045467975523, "learning_rate": 1.4756891058940606e-05, "loss": 0.0987, "loss_nan_ranks": 0, "loss_rank_avg": 0.10791376233100891, "step": 1530, "valid_targets_mean": 313.4, "valid_targets_min": 163 }, { "epoch": 3.7714987714987718, "grad_norm": 1.4766222301523344, "learning_rate": 1.4619032729687223e-05, "loss": 0.0993, "loss_nan_ranks": 0, "loss_rank_avg": 0.10581619292497635, "step": 1535, "valid_targets_mean": 282.5, "valid_targets_min": 206 }, { "epoch": 3.7837837837837838, "grad_norm": 1.3829427760324706, "learning_rate": 1.4481449467212004e-05, "loss": 0.1073, "loss_nan_ranks": 0, "loss_rank_avg": 0.10274806618690491, "step": 1540, "valid_targets_mean": 268.6, "valid_targets_min": 202 }, { "epoch": 3.796068796068796, "grad_norm": 1.269274757104079, "learning_rate": 1.4344148304559926e-05, "loss": 0.1073, "loss_nan_ranks": 0, "loss_rank_avg": 0.11024565994739532, "step": 1545, "valid_targets_mean": 310.4, "valid_targets_min": 196 }, { "epoch": 3.808353808353808, "grad_norm": 1.3154745239941397, "learning_rate": 1.4207136260355426e-05, "loss": 0.0953, "loss_nan_ranks": 0, "loss_rank_avg": 0.09093424677848816, "step": 1550, "valid_targets_mean": 327.7, "valid_targets_min": 173 }, { "epoch": 3.8206388206388207, "grad_norm": 1.2733263275584679, "learning_rate": 1.4070420338443667e-05, "loss": 0.1041, "loss_nan_ranks": 0, "loss_rank_avg": 0.10480818152427673, "step": 1555, "valid_targets_mean": 295.5, "valid_targets_min": 180 }, { "epoch": 3.832923832923833, "grad_norm": 1.5841934023970028, "learning_rate": 1.3934007527532494e-05, "loss": 0.103, "loss_nan_ranks": 0, "loss_rank_avg": 0.1243039071559906, "step": 1560, "valid_targets_mean": 236.7, "valid_targets_min": 177 }, { "epoch": 3.845208845208845, "grad_norm": 1.2865458928869575, "learning_rate": 1.3797904800835174e-05, "loss": 0.0996, "loss_nan_ranks": 0, "loss_rank_avg": 0.10926811397075653, "step": 1565, "valid_targets_mean": 257.2, "valid_targets_min": 187 }, { "epoch": 3.8574938574938575, "grad_norm": 1.105049684803498, "learning_rate": 1.3662119115713968e-05, "loss": 0.0955, "loss_nan_ranks": 0, "loss_rank_avg": 0.09268379956483841, "step": 1570, "valid_targets_mean": 307.9, "valid_targets_min": 177 }, { "epoch": 3.8697788697788695, "grad_norm": 1.2083922230204374, "learning_rate": 1.3526657413324427e-05, "loss": 0.1001, "loss_nan_ranks": 0, "loss_rank_avg": 0.09592114388942719, "step": 1575, "valid_targets_mean": 308.8, "valid_targets_min": 191 }, { "epoch": 3.882063882063882, "grad_norm": 1.0920886796309976, "learning_rate": 1.3391526618260636e-05, "loss": 0.1032, "loss_nan_ranks": 0, "loss_rank_avg": 0.0890374630689621, "step": 1580, "valid_targets_mean": 312.7, "valid_targets_min": 203 }, { "epoch": 3.8943488943488944, "grad_norm": 1.4035772053319542, "learning_rate": 1.3256733638201172e-05, "loss": 0.099, "loss_nan_ranks": 0, "loss_rank_avg": 0.10237962752580643, "step": 1585, "valid_targets_mean": 275.1, "valid_targets_min": 169 }, { "epoch": 3.906633906633907, "grad_norm": 1.2091386175019008, "learning_rate": 1.3122285363556053e-05, "loss": 0.0956, "loss_nan_ranks": 0, "loss_rank_avg": 0.09690273553133011, "step": 1590, "valid_targets_mean": 289.5, "valid_targets_min": 198 }, { "epoch": 3.918918918918919, "grad_norm": 1.4419933079161562, "learning_rate": 1.2988188667114487e-05, "loss": 0.1033, "loss_nan_ranks": 0, "loss_rank_avg": 0.10969570279121399, "step": 1595, "valid_targets_mean": 274.1, "valid_targets_min": 186 }, { "epoch": 3.9312039312039313, "grad_norm": 1.2495336641355002, "learning_rate": 1.2854450403693526e-05, "loss": 0.106, "loss_nan_ranks": 0, "loss_rank_avg": 0.11018240451812744, "step": 1600, "valid_targets_mean": 253.7, "valid_targets_min": 180 }, { "epoch": 3.9434889434889433, "grad_norm": 1.2528752281801776, "learning_rate": 1.272107740978769e-05, "loss": 0.1031, "loss_nan_ranks": 0, "loss_rank_avg": 0.10172894597053528, "step": 1605, "valid_targets_mean": 327.1, "valid_targets_min": 192 }, { "epoch": 3.9557739557739557, "grad_norm": 1.2630429456654624, "learning_rate": 1.2588076503219475e-05, "loss": 0.1061, "loss_nan_ranks": 0, "loss_rank_avg": 0.10873597860336304, "step": 1610, "valid_targets_mean": 280.4, "valid_targets_min": 162 }, { "epoch": 3.968058968058968, "grad_norm": 1.2654591919510954, "learning_rate": 1.2455454482790859e-05, "loss": 0.1028, "loss_nan_ranks": 0, "loss_rank_avg": 0.10269074141979218, "step": 1615, "valid_targets_mean": 265.9, "valid_targets_min": 173 }, { "epoch": 3.98034398034398, "grad_norm": 1.259563553238796, "learning_rate": 1.2323218127935714e-05, "loss": 0.0986, "loss_nan_ranks": 0, "loss_rank_avg": 0.09328269958496094, "step": 1620, "valid_targets_mean": 282.9, "valid_targets_min": 188 }, { "epoch": 3.9926289926289926, "grad_norm": 1.1299710113978312, "learning_rate": 1.2191374198373309e-05, "loss": 0.0953, "loss_nan_ranks": 0, "loss_rank_avg": 0.09016802906990051, "step": 1625, "valid_targets_mean": 294.2, "valid_targets_min": 178 }, { "epoch": 4.004914004914005, "grad_norm": 0.9423095992706174, "learning_rate": 1.2059929433762734e-05, "loss": 0.0939, "loss_nan_ranks": 0, "loss_rank_avg": 0.07086224108934402, "step": 1630, "valid_targets_mean": 321.7, "valid_targets_min": 193 }, { "epoch": 4.017199017199017, "grad_norm": 1.3412181865315487, "learning_rate": 1.1928890553358352e-05, "loss": 0.0769, "loss_nan_ranks": 0, "loss_rank_avg": 0.08813311159610748, "step": 1635, "valid_targets_mean": 262.8, "valid_targets_min": 191 }, { "epoch": 4.0294840294840295, "grad_norm": 1.3526792666884382, "learning_rate": 1.1798264255666387e-05, "loss": 0.073, "loss_nan_ranks": 0, "loss_rank_avg": 0.0761113166809082, "step": 1640, "valid_targets_mean": 299.5, "valid_targets_min": 206 }, { "epoch": 4.041769041769042, "grad_norm": 1.581635819431614, "learning_rate": 1.1668057218102436e-05, "loss": 0.0761, "loss_nan_ranks": 0, "loss_rank_avg": 0.08853274583816528, "step": 1645, "valid_targets_mean": 272.3, "valid_targets_min": 184 }, { "epoch": 4.054054054054054, "grad_norm": 1.0638620622527588, "learning_rate": 1.1538276096650175e-05, "loss": 0.0722, "loss_nan_ranks": 0, "loss_rank_avg": 0.07055521011352539, "step": 1650, "valid_targets_mean": 329.6, "valid_targets_min": 217 }, { "epoch": 4.066339066339066, "grad_norm": 1.1714206074152516, "learning_rate": 1.1408927525521118e-05, "loss": 0.0757, "loss_nan_ranks": 0, "loss_rank_avg": 0.07239308953285217, "step": 1655, "valid_targets_mean": 289.9, "valid_targets_min": 169 }, { "epoch": 4.078624078624078, "grad_norm": 1.3151173617754137, "learning_rate": 1.1280018116815438e-05, "loss": 0.074, "loss_nan_ranks": 0, "loss_rank_avg": 0.0699647068977356, "step": 1660, "valid_targets_mean": 300.9, "valid_targets_min": 184 }, { "epoch": 4.090909090909091, "grad_norm": 1.1161651814845959, "learning_rate": 1.115155446018404e-05, "loss": 0.0722, "loss_nan_ranks": 0, "loss_rank_avg": 0.061846908181905746, "step": 1665, "valid_targets_mean": 299.8, "valid_targets_min": 178 }, { "epoch": 4.103194103194103, "grad_norm": 1.423553293721616, "learning_rate": 1.1023543122491626e-05, "loss": 0.0795, "loss_nan_ranks": 0, "loss_rank_avg": 0.07948333770036697, "step": 1670, "valid_targets_mean": 277.2, "valid_targets_min": 175 }, { "epoch": 4.115479115479116, "grad_norm": 1.173909829961548, "learning_rate": 1.089599064748108e-05, "loss": 0.0755, "loss_nan_ranks": 0, "loss_rank_avg": 0.07468120753765106, "step": 1675, "valid_targets_mean": 310.8, "valid_targets_min": 191 }, { "epoch": 4.127764127764128, "grad_norm": 1.2665069516578051, "learning_rate": 1.0768903555438927e-05, "loss": 0.0724, "loss_nan_ranks": 0, "loss_rank_avg": 0.0762133002281189, "step": 1680, "valid_targets_mean": 338.2, "valid_targets_min": 164 }, { "epoch": 4.14004914004914, "grad_norm": 1.2563654651958303, "learning_rate": 1.0642288342862007e-05, "loss": 0.0735, "loss_nan_ranks": 0, "loss_rank_avg": 0.07058044523000717, "step": 1685, "valid_targets_mean": 323.7, "valid_targets_min": 212 }, { "epoch": 4.152334152334152, "grad_norm": 1.2413394007509215, "learning_rate": 1.051615148212544e-05, "loss": 0.0764, "loss_nan_ranks": 0, "loss_rank_avg": 0.08302327990531921, "step": 1690, "valid_targets_mean": 316.6, "valid_targets_min": 217 }, { "epoch": 4.164619164619165, "grad_norm": 1.2490165915075029, "learning_rate": 1.0390499421151706e-05, "loss": 0.0773, "loss_nan_ranks": 0, "loss_rank_avg": 0.06819775700569153, "step": 1695, "valid_targets_mean": 291.6, "valid_targets_min": 174 }, { "epoch": 4.176904176904177, "grad_norm": 1.1237345905325549, "learning_rate": 1.0265338583081088e-05, "loss": 0.069, "loss_nan_ranks": 0, "loss_rank_avg": 0.06545547395944595, "step": 1700, "valid_targets_mean": 312.4, "valid_targets_min": 196 }, { "epoch": 4.1891891891891895, "grad_norm": 1.2111167636237459, "learning_rate": 1.0140675365943284e-05, "loss": 0.0783, "loss_nan_ranks": 0, "loss_rank_avg": 0.0685565173625946, "step": 1705, "valid_targets_mean": 333.6, "valid_targets_min": 187 }, { "epoch": 4.201474201474202, "grad_norm": 1.624343810471699, "learning_rate": 1.0016516142330404e-05, "loss": 0.0789, "loss_nan_ranks": 0, "loss_rank_avg": 0.07445645332336426, "step": 1710, "valid_targets_mean": 310.0, "valid_targets_min": 204 }, { "epoch": 4.2137592137592135, "grad_norm": 1.2390416709603176, "learning_rate": 9.89286725907117e-06, "loss": 0.0806, "loss_nan_ranks": 0, "loss_rank_avg": 0.06910789757966995, "step": 1715, "valid_targets_mean": 311.7, "valid_targets_min": 199 }, { "epoch": 4.226044226044226, "grad_norm": 1.1334683394538962, "learning_rate": 9.769735036906475e-06, "loss": 0.0799, "loss_nan_ranks": 0, "loss_rank_avg": 0.07283744215965271, "step": 1720, "valid_targets_mean": 278.7, "valid_targets_min": 192 }, { "epoch": 4.238329238329238, "grad_norm": 1.4474898305240032, "learning_rate": 9.647125770166321e-06, "loss": 0.0825, "loss_nan_ranks": 0, "loss_rank_avg": 0.08233436942100525, "step": 1725, "valid_targets_mean": 273.5, "valid_targets_min": 188 }, { "epoch": 4.250614250614251, "grad_norm": 1.2769246934623735, "learning_rate": 9.525045726448001e-06, "loss": 0.0757, "loss_nan_ranks": 0, "loss_rank_avg": 0.08285318315029144, "step": 1730, "valid_targets_mean": 286.4, "valid_targets_min": 180 }, { "epoch": 4.262899262899263, "grad_norm": 1.4033209469797288, "learning_rate": 9.40350114629577e-06, "loss": 0.0784, "loss_nan_ranks": 0, "loss_rank_avg": 0.07156778872013092, "step": 1735, "valid_targets_mean": 289.9, "valid_targets_min": 185 }, { "epoch": 4.275184275184275, "grad_norm": 1.2671216074713658, "learning_rate": 9.282498242881784e-06, "loss": 0.069, "loss_nan_ranks": 0, "loss_rank_avg": 0.06984862685203552, "step": 1740, "valid_targets_mean": 315.1, "valid_targets_min": 200 }, { "epoch": 4.287469287469287, "grad_norm": 1.269154724204842, "learning_rate": 9.162043201688517e-06, "loss": 0.0708, "loss_nan_ranks": 0, "loss_rank_avg": 0.07076472043991089, "step": 1745, "valid_targets_mean": 290.8, "valid_targets_min": 163 }, { "epoch": 4.2997542997543, "grad_norm": 1.1761038665821923, "learning_rate": 9.042142180192596e-06, "loss": 0.0697, "loss_nan_ranks": 0, "loss_rank_avg": 0.07158750295639038, "step": 1750, "valid_targets_mean": 315.5, "valid_targets_min": 162 }, { "epoch": 4.312039312039312, "grad_norm": 1.2835150996705, "learning_rate": 8.92280130754998e-06, "loss": 0.0749, "loss_nan_ranks": 0, "loss_rank_avg": 0.08265917003154755, "step": 1755, "valid_targets_mean": 306.9, "valid_targets_min": 197 }, { "epoch": 4.324324324324325, "grad_norm": 1.1473548156273912, "learning_rate": 8.804026684282694e-06, "loss": 0.076, "loss_nan_ranks": 0, "loss_rank_avg": 0.07228017598390579, "step": 1760, "valid_targets_mean": 294.9, "valid_targets_min": 175 }, { "epoch": 4.336609336609337, "grad_norm": 1.2304295881849476, "learning_rate": 8.685824381966975e-06, "loss": 0.0748, "loss_nan_ranks": 0, "loss_rank_avg": 0.07800173759460449, "step": 1765, "valid_targets_mean": 292.2, "valid_targets_min": 189 }, { "epoch": 4.348894348894349, "grad_norm": 1.314547723610967, "learning_rate": 8.568200442922865e-06, "loss": 0.073, "loss_nan_ranks": 0, "loss_rank_avg": 0.06948122382164001, "step": 1770, "valid_targets_mean": 275.4, "valid_targets_min": 176 }, { "epoch": 4.361179361179361, "grad_norm": 1.2390769635035124, "learning_rate": 8.451160879905398e-06, "loss": 0.0773, "loss_nan_ranks": 0, "loss_rank_avg": 0.0868559181690216, "step": 1775, "valid_targets_mean": 301.2, "valid_targets_min": 193 }, { "epoch": 4.3734643734643734, "grad_norm": 1.442584400978057, "learning_rate": 8.33471167579717e-06, "loss": 0.074, "loss_nan_ranks": 0, "loss_rank_avg": 0.07739993929862976, "step": 1780, "valid_targets_mean": 301.6, "valid_targets_min": 173 }, { "epoch": 4.385749385749386, "grad_norm": 1.3373397232217146, "learning_rate": 8.218858783302566e-06, "loss": 0.0753, "loss_nan_ranks": 0, "loss_rank_avg": 0.07708659768104553, "step": 1785, "valid_targets_mean": 291.0, "valid_targets_min": 158 }, { "epoch": 4.398034398034398, "grad_norm": 1.4311436963042043, "learning_rate": 8.103608124643412e-06, "loss": 0.08, "loss_nan_ranks": 0, "loss_rank_avg": 0.08254370838403702, "step": 1790, "valid_targets_mean": 257.2, "valid_targets_min": 170 }, { "epoch": 4.41031941031941, "grad_norm": 1.299011022148241, "learning_rate": 7.988965591256284e-06, "loss": 0.0738, "loss_nan_ranks": 0, "loss_rank_avg": 0.07833302021026611, "step": 1795, "valid_targets_mean": 302.9, "valid_targets_min": 171 }, { "epoch": 4.422604422604422, "grad_norm": 1.6910942579229213, "learning_rate": 7.874937043491331e-06, "loss": 0.0784, "loss_nan_ranks": 0, "loss_rank_avg": 0.08709047734737396, "step": 1800, "valid_targets_mean": 259.4, "valid_targets_min": 187 }, { "epoch": 4.434889434889435, "grad_norm": 1.4791727968423491, "learning_rate": 7.761528310312679e-06, "loss": 0.0756, "loss_nan_ranks": 0, "loss_rank_avg": 0.07255992293357849, "step": 1805, "valid_targets_mean": 269.4, "valid_targets_min": 180 }, { "epoch": 4.447174447174447, "grad_norm": 1.2651420337454626, "learning_rate": 7.648745189000511e-06, "loss": 0.0791, "loss_nan_ranks": 0, "loss_rank_avg": 0.07391591370105743, "step": 1810, "valid_targets_mean": 292.8, "valid_targets_min": 179 }, { "epoch": 4.45945945945946, "grad_norm": 1.4495716949600166, "learning_rate": 7.536593444854663e-06, "loss": 0.0762, "loss_nan_ranks": 0, "loss_rank_avg": 0.08372117578983307, "step": 1815, "valid_targets_mean": 312.4, "valid_targets_min": 178 }, { "epoch": 4.471744471744472, "grad_norm": 1.2400498760439915, "learning_rate": 7.4250788108999686e-06, "loss": 0.0725, "loss_nan_ranks": 0, "loss_rank_avg": 0.06772322952747345, "step": 1820, "valid_targets_mean": 321.8, "valid_targets_min": 194 }, { "epoch": 4.484029484029484, "grad_norm": 1.1972113825024182, "learning_rate": 7.314206987593162e-06, "loss": 0.0718, "loss_nan_ranks": 0, "loss_rank_avg": 0.07139264792203903, "step": 1825, "valid_targets_mean": 303.9, "valid_targets_min": 162 }, { "epoch": 4.496314496314496, "grad_norm": 1.3845800399405668, "learning_rate": 7.203983642531462e-06, "loss": 0.077, "loss_nan_ranks": 0, "loss_rank_avg": 0.07650754600763321, "step": 1830, "valid_targets_mean": 287.9, "valid_targets_min": 155 }, { "epoch": 4.5085995085995085, "grad_norm": 1.285277835152417, "learning_rate": 7.094414410162913e-06, "loss": 0.0748, "loss_nan_ranks": 0, "loss_rank_avg": 0.07509873807430267, "step": 1835, "valid_targets_mean": 326.5, "valid_targets_min": 162 }, { "epoch": 4.520884520884521, "grad_norm": 1.2608681977363196, "learning_rate": 6.985504891498291e-06, "loss": 0.0805, "loss_nan_ranks": 0, "loss_rank_avg": 0.07749956846237183, "step": 1840, "valid_targets_mean": 270.1, "valid_targets_min": 189 }, { "epoch": 4.533169533169533, "grad_norm": 1.3733164012187309, "learning_rate": 6.8772606538248285e-06, "loss": 0.0813, "loss_nan_ranks": 0, "loss_rank_avg": 0.07411514967679977, "step": 1845, "valid_targets_mean": 305.2, "valid_targets_min": 196 }, { "epoch": 4.545454545454545, "grad_norm": 1.3813794421517025, "learning_rate": 6.769687230421638e-06, "loss": 0.0753, "loss_nan_ranks": 0, "loss_rank_avg": 0.07829120755195618, "step": 1850, "valid_targets_mean": 284.2, "valid_targets_min": 149 }, { "epoch": 4.557739557739557, "grad_norm": 1.3694303174861682, "learning_rate": 6.662790120276803e-06, "loss": 0.0756, "loss_nan_ranks": 0, "loss_rank_avg": 0.07427535951137543, "step": 1855, "valid_targets_mean": 278.9, "valid_targets_min": 174 }, { "epoch": 4.57002457002457, "grad_norm": 1.2713622641514912, "learning_rate": 6.556574787806344e-06, "loss": 0.0744, "loss_nan_ranks": 0, "loss_rank_avg": 0.07161825895309448, "step": 1860, "valid_targets_mean": 265.8, "valid_targets_min": 182 }, { "epoch": 4.582309582309582, "grad_norm": 1.3195042631584437, "learning_rate": 6.451046662574831e-06, "loss": 0.0734, "loss_nan_ranks": 0, "loss_rank_avg": 0.07420073449611664, "step": 1865, "valid_targets_mean": 319.4, "valid_targets_min": 222 }, { "epoch": 4.594594594594595, "grad_norm": 1.4295812091769484, "learning_rate": 6.346211139017877e-06, "loss": 0.072, "loss_nan_ranks": 0, "loss_rank_avg": 0.0738724023103714, "step": 1870, "valid_targets_mean": 275.5, "valid_targets_min": 187 }, { "epoch": 4.606879606879607, "grad_norm": 1.2439238715764056, "learning_rate": 6.242073576166337e-06, "loss": 0.0724, "loss_nan_ranks": 0, "loss_rank_avg": 0.07265456765890121, "step": 1875, "valid_targets_mean": 304.5, "valid_targets_min": 188 }, { "epoch": 4.61916461916462, "grad_norm": 1.8376967259613284, "learning_rate": 6.138639297372404e-06, "loss": 0.0728, "loss_nan_ranks": 0, "loss_rank_avg": 0.06977076828479767, "step": 1880, "valid_targets_mean": 327.8, "valid_targets_min": 186 }, { "epoch": 4.631449631449631, "grad_norm": 1.4055034328572096, "learning_rate": 6.035913590037479e-06, "loss": 0.0762, "loss_nan_ranks": 0, "loss_rank_avg": 0.085626982152462, "step": 1885, "valid_targets_mean": 297.2, "valid_targets_min": 182 }, { "epoch": 4.643734643734644, "grad_norm": 1.2461941693133058, "learning_rate": 5.933901705341851e-06, "loss": 0.0734, "loss_nan_ranks": 0, "loss_rank_avg": 0.06267626583576202, "step": 1890, "valid_targets_mean": 300.1, "valid_targets_min": 196 }, { "epoch": 4.656019656019656, "grad_norm": 1.1223784564191235, "learning_rate": 5.832608857976321e-06, "loss": 0.0721, "loss_nan_ranks": 0, "loss_rank_avg": 0.06562580913305283, "step": 1895, "valid_targets_mean": 320.8, "valid_targets_min": 167 }, { "epoch": 4.6683046683046685, "grad_norm": 1.6384029129498416, "learning_rate": 5.732040225875584e-06, "loss": 0.0769, "loss_nan_ranks": 0, "loss_rank_avg": 0.0887017548084259, "step": 1900, "valid_targets_mean": 286.9, "valid_targets_min": 195 }, { "epoch": 4.680589680589681, "grad_norm": 1.4500857394553077, "learning_rate": 5.632200949953579e-06, "loss": 0.0756, "loss_nan_ranks": 0, "loss_rank_avg": 0.08757593482732773, "step": 1905, "valid_targets_mean": 262.7, "valid_targets_min": 168 }, { "epoch": 4.6928746928746925, "grad_norm": 1.6022648038219005, "learning_rate": 5.533096133840677e-06, "loss": 0.0772, "loss_nan_ranks": 0, "loss_rank_avg": 0.08763155341148376, "step": 1910, "valid_targets_mean": 261.4, "valid_targets_min": 156 }, { "epoch": 4.705159705159705, "grad_norm": 1.2593102596892192, "learning_rate": 5.434730843622778e-06, "loss": 0.0702, "loss_nan_ranks": 0, "loss_rank_avg": 0.07523244619369507, "step": 1915, "valid_targets_mean": 305.1, "valid_targets_min": 211 }, { "epoch": 4.717444717444717, "grad_norm": 1.3753521051986204, "learning_rate": 5.337110107582377e-06, "loss": 0.0737, "loss_nan_ranks": 0, "loss_rank_avg": 0.07605773210525513, "step": 1920, "valid_targets_mean": 294.1, "valid_targets_min": 207 }, { "epoch": 4.72972972972973, "grad_norm": 1.215212366130484, "learning_rate": 5.2402389159414755e-06, "loss": 0.078, "loss_nan_ranks": 0, "loss_rank_avg": 0.07748579978942871, "step": 1925, "valid_targets_mean": 294.8, "valid_targets_min": 195 }, { "epoch": 4.742014742014742, "grad_norm": 1.3074889689954898, "learning_rate": 5.144122220606542e-06, "loss": 0.0707, "loss_nan_ranks": 0, "loss_rank_avg": 0.06969805061817169, "step": 1930, "valid_targets_mean": 326.1, "valid_targets_min": 193 }, { "epoch": 4.754299754299755, "grad_norm": 1.4440743427098095, "learning_rate": 5.048764934915349e-06, "loss": 0.0751, "loss_nan_ranks": 0, "loss_rank_avg": 0.07135839760303497, "step": 1935, "valid_targets_mean": 306.8, "valid_targets_min": 211 }, { "epoch": 4.766584766584766, "grad_norm": 1.2847986090010013, "learning_rate": 4.954171933385805e-06, "loss": 0.0751, "loss_nan_ranks": 0, "loss_rank_avg": 0.06975986808538437, "step": 1940, "valid_targets_mean": 301.0, "valid_targets_min": 195 }, { "epoch": 4.778869778869779, "grad_norm": 1.274308585521648, "learning_rate": 4.8603480514667836e-06, "loss": 0.0745, "loss_nan_ranks": 0, "loss_rank_avg": 0.0783599466085434, "step": 1945, "valid_targets_mean": 279.3, "valid_targets_min": 186 }, { "epoch": 4.791154791154791, "grad_norm": 1.2610443359796184, "learning_rate": 4.767298085290963e-06, "loss": 0.0711, "loss_nan_ranks": 0, "loss_rank_avg": 0.06965679675340652, "step": 1950, "valid_targets_mean": 304.8, "valid_targets_min": 194 }, { "epoch": 4.803439803439804, "grad_norm": 1.2451746290006016, "learning_rate": 4.675026791429624e-06, "loss": 0.0706, "loss_nan_ranks": 0, "loss_rank_avg": 0.072019562125206, "step": 1955, "valid_targets_mean": 267.4, "valid_targets_min": 177 }, { "epoch": 4.815724815724816, "grad_norm": 1.6173423478686102, "learning_rate": 4.583538886649525e-06, "loss": 0.0726, "loss_nan_ranks": 0, "loss_rank_avg": 0.08015353977680206, "step": 1960, "valid_targets_mean": 283.2, "valid_targets_min": 174 }, { "epoch": 4.828009828009828, "grad_norm": 1.239716384739282, "learning_rate": 4.492839047671764e-06, "loss": 0.0713, "loss_nan_ranks": 0, "loss_rank_avg": 0.05814147740602493, "step": 1965, "valid_targets_mean": 306.7, "valid_targets_min": 160 }, { "epoch": 4.84029484029484, "grad_norm": 1.2109628267777826, "learning_rate": 4.4029319109327465e-06, "loss": 0.0726, "loss_nan_ranks": 0, "loss_rank_avg": 0.06851934641599655, "step": 1970, "valid_targets_mean": 295.6, "valid_targets_min": 220 }, { "epoch": 4.8525798525798525, "grad_norm": 1.4112535661034031, "learning_rate": 4.313822072347136e-06, "loss": 0.0707, "loss_nan_ranks": 0, "loss_rank_avg": 0.07703422009944916, "step": 1975, "valid_targets_mean": 299.2, "valid_targets_min": 176 }, { "epoch": 4.864864864864865, "grad_norm": 1.1622675345175595, "learning_rate": 4.22551408707296e-06, "loss": 0.0718, "loss_nan_ranks": 0, "loss_rank_avg": 0.07417415827512741, "step": 1980, "valid_targets_mean": 326.9, "valid_targets_min": 210 }, { "epoch": 4.877149877149877, "grad_norm": 1.3799068257972742, "learning_rate": 4.138012469278714e-06, "loss": 0.0734, "loss_nan_ranks": 0, "loss_rank_avg": 0.07426191866397858, "step": 1985, "valid_targets_mean": 273.9, "valid_targets_min": 182 }, { "epoch": 4.88943488943489, "grad_norm": 1.2897930833215843, "learning_rate": 4.051321691912649e-06, "loss": 0.0719, "loss_nan_ranks": 0, "loss_rank_avg": 0.0713033378124237, "step": 1990, "valid_targets_mean": 283.4, "valid_targets_min": 186 }, { "epoch": 4.901719901719901, "grad_norm": 1.1611645540414346, "learning_rate": 3.9654461864740935e-06, "loss": 0.0745, "loss_nan_ranks": 0, "loss_rank_avg": 0.06807668507099152, "step": 1995, "valid_targets_mean": 306.3, "valid_targets_min": 190 }, { "epoch": 4.914004914004914, "grad_norm": 1.5921571211751844, "learning_rate": 3.880390342786915e-06, "loss": 0.0758, "loss_nan_ranks": 0, "loss_rank_avg": 0.07771001756191254, "step": 2000, "valid_targets_mean": 247.5, "valid_targets_min": 179 }, { "epoch": 4.926289926289926, "grad_norm": 1.4132717259504928, "learning_rate": 3.7961585087751516e-06, "loss": 0.0683, "loss_nan_ranks": 0, "loss_rank_avg": 0.06849413365125656, "step": 2005, "valid_targets_mean": 307.6, "valid_targets_min": 192 }, { "epoch": 4.938574938574939, "grad_norm": 1.3533191149366937, "learning_rate": 3.71275499024071e-06, "loss": 0.0747, "loss_nan_ranks": 0, "loss_rank_avg": 0.08212029933929443, "step": 2010, "valid_targets_mean": 284.4, "valid_targets_min": 184 }, { "epoch": 4.950859950859951, "grad_norm": 1.4889917869021732, "learning_rate": 3.6301840506433083e-06, "loss": 0.0695, "loss_nan_ranks": 0, "loss_rank_avg": 0.06790675222873688, "step": 2015, "valid_targets_mean": 293.9, "valid_targets_min": 189 }, { "epoch": 4.963144963144963, "grad_norm": 1.1322546968569716, "learning_rate": 3.5484499108824853e-06, "loss": 0.0712, "loss_nan_ranks": 0, "loss_rank_avg": 0.06254497170448303, "step": 2020, "valid_targets_mean": 311.4, "valid_targets_min": 168 }, { "epoch": 4.975429975429975, "grad_norm": 1.1942192011894526, "learning_rate": 3.4675567490818727e-06, "loss": 0.072, "loss_nan_ranks": 0, "loss_rank_avg": 0.07201623171567917, "step": 2025, "valid_targets_mean": 316.6, "valid_targets_min": 182 }, { "epoch": 4.987714987714988, "grad_norm": 1.0709185398748235, "learning_rate": 3.3875087003756036e-06, "loss": 0.0654, "loss_nan_ranks": 0, "loss_rank_avg": 0.06734339892864227, "step": 2030, "valid_targets_mean": 294.9, "valid_targets_min": 177 }, { "epoch": 5.0, "grad_norm": 1.2630295622336678, "learning_rate": 3.30830985669691e-06, "loss": 0.074, "loss_nan_ranks": 0, "loss_rank_avg": 0.06772519648075104, "step": 2035, "valid_targets_mean": 255.1, "valid_targets_min": 199 }, { "epoch": 5.012285012285012, "grad_norm": 0.977597191503577, "learning_rate": 3.22996426656899e-06, "loss": 0.0553, "loss_nan_ranks": 0, "loss_rank_avg": 0.050149090588092804, "step": 2040, "valid_targets_mean": 322.0, "valid_targets_min": 176 }, { "epoch": 5.024570024570025, "grad_norm": 1.0285099183397046, "learning_rate": 3.1524759348980096e-06, "loss": 0.0581, "loss_nan_ranks": 0, "loss_rank_avg": 0.05904063209891319, "step": 2045, "valid_targets_mean": 334.1, "valid_targets_min": 180 }, { "epoch": 5.036855036855036, "grad_norm": 1.373727675071575, "learning_rate": 3.0758488227684212e-06, "loss": 0.0656, "loss_nan_ranks": 0, "loss_rank_avg": 0.07334539294242859, "step": 2050, "valid_targets_mean": 238.1, "valid_targets_min": 165 }, { "epoch": 5.049140049140049, "grad_norm": 1.017156915258801, "learning_rate": 3.0000868472404423e-06, "loss": 0.0581, "loss_nan_ranks": 0, "loss_rank_avg": 0.052271097898483276, "step": 2055, "valid_targets_mean": 306.3, "valid_targets_min": 188 }, { "epoch": 5.061425061425061, "grad_norm": 1.1450162926471303, "learning_rate": 2.9251938811498436e-06, "loss": 0.0619, "loss_nan_ranks": 0, "loss_rank_avg": 0.056958116590976715, "step": 2060, "valid_targets_mean": 297.9, "valid_targets_min": 201 }, { "epoch": 5.073710073710074, "grad_norm": 1.1835863768992543, "learning_rate": 2.8511737529099704e-06, "loss": 0.0628, "loss_nan_ranks": 0, "loss_rank_avg": 0.06264296174049377, "step": 2065, "valid_targets_mean": 315.8, "valid_targets_min": 210 }, { "epoch": 5.085995085995086, "grad_norm": 1.1347650320782616, "learning_rate": 2.7780302463160235e-06, "loss": 0.0601, "loss_nan_ranks": 0, "loss_rank_avg": 0.053628306835889816, "step": 2070, "valid_targets_mean": 288.5, "valid_targets_min": 158 }, { "epoch": 5.098280098280099, "grad_norm": 1.250397207990186, "learning_rate": 2.705767100351673e-06, "loss": 0.063, "loss_nan_ranks": 0, "loss_rank_avg": 0.06383126974105835, "step": 2075, "valid_targets_mean": 251.8, "valid_targets_min": 182 }, { "epoch": 5.11056511056511, "grad_norm": 1.2894516851493174, "learning_rate": 2.634388008997899e-06, "loss": 0.0616, "loss_nan_ranks": 0, "loss_rank_avg": 0.06406844407320023, "step": 2080, "valid_targets_mean": 264.9, "valid_targets_min": 167 }, { "epoch": 5.122850122850123, "grad_norm": 1.1821790174047793, "learning_rate": 2.5638966210441597e-06, "loss": 0.055, "loss_nan_ranks": 0, "loss_rank_avg": 0.04919495806097984, "step": 2085, "valid_targets_mean": 308.4, "valid_targets_min": 198 }, { "epoch": 5.135135135135135, "grad_norm": 1.4432955198498005, "learning_rate": 2.4942965399018926e-06, "loss": 0.0561, "loss_nan_ranks": 0, "loss_rank_avg": 0.055709220468997955, "step": 2090, "valid_targets_mean": 323.0, "valid_targets_min": 185 }, { "epoch": 5.1474201474201475, "grad_norm": 1.3614944736638088, "learning_rate": 2.425591323420289e-06, "loss": 0.0594, "loss_nan_ranks": 0, "loss_rank_avg": 0.058920104056596756, "step": 2095, "valid_targets_mean": 294.3, "valid_targets_min": 151 }, { "epoch": 5.15970515970516, "grad_norm": 1.2468836205597262, "learning_rate": 2.357784483704444e-06, "loss": 0.0608, "loss_nan_ranks": 0, "loss_rank_avg": 0.05701383948326111, "step": 2100, "valid_targets_mean": 284.6, "valid_targets_min": 190 }, { "epoch": 5.171990171990172, "grad_norm": 1.216036199502135, "learning_rate": 2.2908794869358044e-06, "loss": 0.0607, "loss_nan_ranks": 0, "loss_rank_avg": 0.06217023730278015, "step": 2105, "valid_targets_mean": 272.4, "valid_targets_min": 192 }, { "epoch": 5.184275184275184, "grad_norm": 1.3521847164632959, "learning_rate": 2.2248797531949952e-06, "loss": 0.062, "loss_nan_ranks": 0, "loss_rank_avg": 0.07365967333316803, "step": 2110, "valid_targets_mean": 275.4, "valid_targets_min": 168 }, { "epoch": 5.196560196560196, "grad_norm": 1.2361977418893404, "learning_rate": 2.1597886562869917e-06, "loss": 0.0627, "loss_nan_ranks": 0, "loss_rank_avg": 0.06055378168821335, "step": 2115, "valid_targets_mean": 308.3, "valid_targets_min": 179 }, { "epoch": 5.208845208845209, "grad_norm": 1.2316415548343511, "learning_rate": 2.095609523568638e-06, "loss": 0.0577, "loss_nan_ranks": 0, "loss_rank_avg": 0.060133498162031174, "step": 2120, "valid_targets_mean": 298.7, "valid_targets_min": 185 }, { "epoch": 5.221130221130221, "grad_norm": 1.148444495907104, "learning_rate": 2.0323456357785855e-06, "loss": 0.0618, "loss_nan_ranks": 0, "loss_rank_avg": 0.053872667253017426, "step": 2125, "valid_targets_mean": 285.5, "valid_targets_min": 182 }, { "epoch": 5.233415233415234, "grad_norm": 1.3609487763932329, "learning_rate": 1.970000226869553e-06, "loss": 0.0637, "loss_nan_ranks": 0, "loss_rank_avg": 0.07723841071128845, "step": 2130, "valid_targets_mean": 277.1, "valid_targets_min": 182 }, { "epoch": 5.245700245700245, "grad_norm": 1.0926292098268229, "learning_rate": 1.90857648384305e-06, "loss": 0.0521, "loss_nan_ranks": 0, "loss_rank_avg": 0.04988773167133331, "step": 2135, "valid_targets_mean": 287.9, "valid_targets_min": 173 }, { "epoch": 5.257985257985258, "grad_norm": 1.153669328505823, "learning_rate": 1.848077546586431e-06, "loss": 0.0548, "loss_nan_ranks": 0, "loss_rank_avg": 0.057786066085100174, "step": 2140, "valid_targets_mean": 291.8, "valid_targets_min": 189 }, { "epoch": 5.27027027027027, "grad_norm": 1.197484977452866, "learning_rate": 1.7885065077123976e-06, "loss": 0.0551, "loss_nan_ranks": 0, "loss_rank_avg": 0.058703042566776276, "step": 2145, "valid_targets_mean": 308.6, "valid_targets_min": 193 }, { "epoch": 5.282555282555283, "grad_norm": 1.4022121562167555, "learning_rate": 1.7298664124009245e-06, "loss": 0.0594, "loss_nan_ranks": 0, "loss_rank_avg": 0.07012614607810974, "step": 2150, "valid_targets_mean": 261.3, "valid_targets_min": 174 }, { "epoch": 5.294840294840295, "grad_norm": 1.2194425981565007, "learning_rate": 1.672160258243567e-06, "loss": 0.0583, "loss_nan_ranks": 0, "loss_rank_avg": 0.06708595156669617, "step": 2155, "valid_targets_mean": 293.1, "valid_targets_min": 182 }, { "epoch": 5.3071253071253075, "grad_norm": 1.264610061973535, "learning_rate": 1.615390995090258e-06, "loss": 0.0613, "loss_nan_ranks": 0, "loss_rank_avg": 0.05687159299850464, "step": 2160, "valid_targets_mean": 292.6, "valid_targets_min": 186 }, { "epoch": 5.319410319410319, "grad_norm": 1.1683434677671403, "learning_rate": 1.559561524898492e-06, "loss": 0.057, "loss_nan_ranks": 0, "loss_rank_avg": 0.04983235150575638, "step": 2165, "valid_targets_mean": 286.1, "valid_targets_min": 204 }, { "epoch": 5.3316953316953315, "grad_norm": 1.2824116920770354, "learning_rate": 1.5046747015849893e-06, "loss": 0.0584, "loss_nan_ranks": 0, "loss_rank_avg": 0.05642258748412132, "step": 2170, "valid_targets_mean": 276.5, "valid_targets_min": 212 }, { "epoch": 5.343980343980344, "grad_norm": 1.2857683881648692, "learning_rate": 1.4507333308798255e-06, "loss": 0.0613, "loss_nan_ranks": 0, "loss_rank_avg": 0.05979079753160477, "step": 2175, "valid_targets_mean": 281.7, "valid_targets_min": 178 }, { "epoch": 5.356265356265356, "grad_norm": 1.096631257083289, "learning_rate": 1.3977401701829752e-06, "loss": 0.0549, "loss_nan_ranks": 0, "loss_rank_avg": 0.050769127905368805, "step": 2180, "valid_targets_mean": 318.4, "valid_targets_min": 181 }, { "epoch": 5.368550368550369, "grad_norm": 1.0698344457869844, "learning_rate": 1.345697928423384e-06, "loss": 0.0646, "loss_nan_ranks": 0, "loss_rank_avg": 0.05484703183174133, "step": 2185, "valid_targets_mean": 315.7, "valid_targets_min": 192 }, { "epoch": 5.38083538083538, "grad_norm": 1.2622327806674163, "learning_rate": 1.2946092659204767e-06, "loss": 0.057, "loss_nan_ranks": 0, "loss_rank_avg": 0.06384389102458954, "step": 2190, "valid_targets_mean": 300.4, "valid_targets_min": 195 }, { "epoch": 5.393120393120393, "grad_norm": 1.2637054805293337, "learning_rate": 1.244476794248175e-06, "loss": 0.06, "loss_nan_ranks": 0, "loss_rank_avg": 0.06563755869865417, "step": 2195, "valid_targets_mean": 297.4, "valid_targets_min": 176 }, { "epoch": 5.405405405405405, "grad_norm": 1.3471872543313839, "learning_rate": 1.1953030761014017e-06, "loss": 0.061, "loss_nan_ranks": 0, "loss_rank_avg": 0.07647784054279327, "step": 2200, "valid_targets_mean": 295.8, "valid_targets_min": 172 }, { "epoch": 5.417690417690418, "grad_norm": 1.1316048842367288, "learning_rate": 1.147090625165055e-06, "loss": 0.0585, "loss_nan_ranks": 0, "loss_rank_avg": 0.05756440758705139, "step": 2205, "valid_targets_mean": 296.9, "valid_targets_min": 194 }, { "epoch": 5.42997542997543, "grad_norm": 1.0358913783355548, "learning_rate": 1.0998419059855503e-06, "loss": 0.0571, "loss_nan_ranks": 0, "loss_rank_avg": 0.05179591476917267, "step": 2210, "valid_targets_mean": 307.5, "valid_targets_min": 191 }, { "epoch": 5.442260442260443, "grad_norm": 1.1675363348197318, "learning_rate": 1.053559333844798e-06, "loss": 0.06, "loss_nan_ranks": 0, "loss_rank_avg": 0.06057540327310562, "step": 2215, "valid_targets_mean": 296.4, "valid_targets_min": 175 }, { "epoch": 5.454545454545454, "grad_norm": 1.0124581904496153, "learning_rate": 1.0082452746367721e-06, "loss": 0.054, "loss_nan_ranks": 0, "loss_rank_avg": 0.04826914519071579, "step": 2220, "valid_targets_mean": 343.7, "valid_targets_min": 170 }, { "epoch": 5.466830466830467, "grad_norm": 1.3535067784300931, "learning_rate": 9.639020447465475e-07, "loss": 0.063, "loss_nan_ranks": 0, "loss_rank_avg": 0.07116338610649109, "step": 2225, "valid_targets_mean": 269.2, "valid_targets_min": 204 }, { "epoch": 5.479115479115479, "grad_norm": 0.9586526592178641, "learning_rate": 9.205319109318922e-07, "loss": 0.055, "loss_nan_ranks": 0, "loss_rank_avg": 0.05092500522732735, "step": 2230, "valid_targets_mean": 322.8, "valid_targets_min": 179 }, { "epoch": 5.4914004914004915, "grad_norm": 1.4189235502849598, "learning_rate": 8.781370902074049e-07, "loss": 0.0615, "loss_nan_ranks": 0, "loss_rank_avg": 0.07043673098087311, "step": 2235, "valid_targets_mean": 266.8, "valid_targets_min": 172 }, { "epoch": 5.503685503685504, "grad_norm": 1.1994065534731952, "learning_rate": 8.367197497311719e-07, "loss": 0.0572, "loss_nan_ranks": 0, "loss_rank_avg": 0.054185979068279266, "step": 2240, "valid_targets_mean": 347.1, "valid_targets_min": 184 }, { "epoch": 5.515970515970516, "grad_norm": 1.0398282535706949, "learning_rate": 7.962820066939958e-07, "loss": 0.0555, "loss_nan_ranks": 0, "loss_rank_avg": 0.054929666221141815, "step": 2245, "valid_targets_mean": 318.2, "valid_targets_min": 190 }, { "epoch": 5.528255528255528, "grad_norm": 1.2770386364992248, "learning_rate": 7.568259282111645e-07, "loss": 0.0602, "loss_nan_ranks": 0, "loss_rank_avg": 0.05882716551423073, "step": 2250, "valid_targets_mean": 300.2, "valid_targets_min": 211 }, { "epoch": 5.54054054054054, "grad_norm": 1.2822104108183718, "learning_rate": 7.183535312167755e-07, "loss": 0.0563, "loss_nan_ranks": 0, "loss_rank_avg": 0.060412775725126266, "step": 2255, "valid_targets_mean": 276.4, "valid_targets_min": 202 }, { "epoch": 5.552825552825553, "grad_norm": 1.384189372064152, "learning_rate": 6.808667823606474e-07, "loss": 0.0586, "loss_nan_ranks": 0, "loss_rank_avg": 0.06966982781887054, "step": 2260, "valid_targets_mean": 267.2, "valid_targets_min": 184 }, { "epoch": 5.565110565110565, "grad_norm": 1.277587307625672, "learning_rate": 6.443675979077779e-07, "loss": 0.0599, "loss_nan_ranks": 0, "loss_rank_avg": 0.06707356870174408, "step": 2265, "valid_targets_mean": 290.5, "valid_targets_min": 200 }, { "epoch": 5.577395577395578, "grad_norm": 1.5258616977953152, "learning_rate": 6.088578436403847e-07, "loss": 0.0517, "loss_nan_ranks": 0, "loss_rank_avg": 0.056382156908512115, "step": 2270, "valid_targets_mean": 265.2, "valid_targets_min": 173 }, { "epoch": 5.58968058968059, "grad_norm": 1.1118609133821884, "learning_rate": 5.743393347625436e-07, "loss": 0.0587, "loss_nan_ranks": 0, "loss_rank_avg": 0.06049284711480141, "step": 2275, "valid_targets_mean": 320.7, "valid_targets_min": 186 }, { "epoch": 5.601965601965602, "grad_norm": 1.1814027705274912, "learning_rate": 5.408138358073833e-07, "loss": 0.0595, "loss_nan_ranks": 0, "loss_rank_avg": 0.05395843833684921, "step": 2280, "valid_targets_mean": 337.7, "valid_targets_min": 170 }, { "epoch": 5.614250614250614, "grad_norm": 1.1227185575107823, "learning_rate": 5.082830605468969e-07, "loss": 0.0533, "loss_nan_ranks": 0, "loss_rank_avg": 0.057253506034612656, "step": 2285, "valid_targets_mean": 317.1, "valid_targets_min": 149 }, { "epoch": 5.6265356265356266, "grad_norm": 1.1765251509290517, "learning_rate": 4.767486719043235e-07, "loss": 0.0528, "loss_nan_ranks": 0, "loss_rank_avg": 0.050169896334409714, "step": 2290, "valid_targets_mean": 317.3, "valid_targets_min": 221 }, { "epoch": 5.638820638820639, "grad_norm": 1.263696128200295, "learning_rate": 4.4621228186915833e-07, "loss": 0.0578, "loss_nan_ranks": 0, "loss_rank_avg": 0.06293560564517975, "step": 2295, "valid_targets_mean": 299.8, "valid_targets_min": 214 }, { "epoch": 5.651105651105651, "grad_norm": 1.3018272382130402, "learning_rate": 4.166754514147275e-07, "loss": 0.0538, "loss_nan_ranks": 0, "loss_rank_avg": 0.06175900995731354, "step": 2300, "valid_targets_mean": 268.4, "valid_targets_min": 196 }, { "epoch": 5.663390663390663, "grad_norm": 1.221352892750412, "learning_rate": 3.881396904184231e-07, "loss": 0.0555, "loss_nan_ranks": 0, "loss_rank_avg": 0.055455975234508514, "step": 2305, "valid_targets_mean": 296.9, "valid_targets_min": 206 }, { "epoch": 5.675675675675675, "grad_norm": 1.0922949955208259, "learning_rate": 3.6060645758449584e-07, "loss": 0.0563, "loss_nan_ranks": 0, "loss_rank_avg": 0.04636592045426369, "step": 2310, "valid_targets_mean": 270.9, "valid_targets_min": 180 }, { "epoch": 5.687960687960688, "grad_norm": 1.2739169311475083, "learning_rate": 3.34077160369497e-07, "loss": 0.0575, "loss_nan_ranks": 0, "loss_rank_avg": 0.05191492289304733, "step": 2315, "valid_targets_mean": 315.1, "valid_targets_min": 179 }, { "epoch": 5.7002457002457, "grad_norm": 1.3588174523563947, "learning_rate": 3.08553154910336e-07, "loss": 0.0636, "loss_nan_ranks": 0, "loss_rank_avg": 0.0596894696354866, "step": 2320, "valid_targets_mean": 282.9, "valid_targets_min": 189 }, { "epoch": 5.712530712530713, "grad_norm": 1.194055522599415, "learning_rate": 2.840357459549492e-07, "loss": 0.0569, "loss_nan_ranks": 0, "loss_rank_avg": 0.054943572729825974, "step": 2325, "valid_targets_mean": 287.2, "valid_targets_min": 177 }, { "epoch": 5.724815724815725, "grad_norm": 1.3634221925505468, "learning_rate": 2.6052618679560884e-07, "loss": 0.0635, "loss_nan_ranks": 0, "loss_rank_avg": 0.069933220744133, "step": 2330, "valid_targets_mean": 249.0, "valid_targets_min": 167 }, { "epoch": 5.737100737100737, "grad_norm": 1.2095314194514295, "learning_rate": 2.380256792048541e-07, "loss": 0.0552, "loss_nan_ranks": 0, "loss_rank_avg": 0.05674287676811218, "step": 2335, "valid_targets_mean": 271.7, "valid_targets_min": 176 }, { "epoch": 5.749385749385749, "grad_norm": 1.1923589492016438, "learning_rate": 2.1653537337405383e-07, "loss": 0.0577, "loss_nan_ranks": 0, "loss_rank_avg": 0.05987873673439026, "step": 2340, "valid_targets_mean": 312.6, "valid_targets_min": 195 }, { "epoch": 5.761670761670762, "grad_norm": 1.181570553931504, "learning_rate": 1.9605636785462234e-07, "loss": 0.0591, "loss_nan_ranks": 0, "loss_rank_avg": 0.05523020029067993, "step": 2345, "valid_targets_mean": 290.3, "valid_targets_min": 201 }, { "epoch": 5.773955773955774, "grad_norm": 1.0790091462283764, "learning_rate": 1.7658970950185095e-07, "loss": 0.0571, "loss_nan_ranks": 0, "loss_rank_avg": 0.05092434212565422, "step": 2350, "valid_targets_mean": 314.8, "valid_targets_min": 165 }, { "epoch": 5.7862407862407865, "grad_norm": 1.1328174962479576, "learning_rate": 1.5813639342140197e-07, "loss": 0.0541, "loss_nan_ranks": 0, "loss_rank_avg": 0.061301637440919876, "step": 2355, "valid_targets_mean": 277.0, "valid_targets_min": 205 }, { "epoch": 5.798525798525798, "grad_norm": 1.0654920663781202, "learning_rate": 1.4069736291843605e-07, "loss": 0.0551, "loss_nan_ranks": 0, "loss_rank_avg": 0.04481339454650879, "step": 2360, "valid_targets_mean": 306.3, "valid_targets_min": 177 }, { "epoch": 5.8108108108108105, "grad_norm": 1.156929636541058, "learning_rate": 1.242735094493952e-07, "loss": 0.0578, "loss_nan_ranks": 0, "loss_rank_avg": 0.05702624469995499, "step": 2365, "valid_targets_mean": 286.6, "valid_targets_min": 188 }, { "epoch": 5.823095823095823, "grad_norm": 1.1020183106430024, "learning_rate": 1.0886567257643033e-07, "loss": 0.0558, "loss_nan_ranks": 0, "loss_rank_avg": 0.051131002604961395, "step": 2370, "valid_targets_mean": 307.5, "valid_targets_min": 195 }, { "epoch": 5.835380835380835, "grad_norm": 1.1156827172888786, "learning_rate": 9.447463992448891e-08, "loss": 0.056, "loss_nan_ranks": 0, "loss_rank_avg": 0.05244504287838936, "step": 2375, "valid_targets_mean": 322.5, "valid_targets_min": 167 }, { "epoch": 5.847665847665848, "grad_norm": 1.203304991629248, "learning_rate": 8.110114714104277e-08, "loss": 0.0566, "loss_nan_ranks": 0, "loss_rank_avg": 0.04958263784646988, "step": 2380, "valid_targets_mean": 296.2, "valid_targets_min": 167 }, { "epoch": 5.85995085995086, "grad_norm": 1.281038802675194, "learning_rate": 6.874587785849152e-08, "loss": 0.0575, "loss_nan_ranks": 0, "loss_rank_avg": 0.05190620943903923, "step": 2385, "valid_targets_mean": 314.3, "valid_targets_min": 221 }, { "epoch": 5.872235872235873, "grad_norm": 1.4801858580383123, "learning_rate": 5.7409463659219286e-08, "loss": 0.0592, "loss_nan_ranks": 0, "loss_rank_avg": 0.06994576007127762, "step": 2390, "valid_targets_mean": 255.6, "valid_targets_min": 175 }, { "epoch": 5.884520884520884, "grad_norm": 1.1471257369739076, "learning_rate": 4.709248404329625e-08, "loss": 0.0554, "loss_nan_ranks": 0, "loss_rank_avg": 0.05592045187950134, "step": 2395, "valid_targets_mean": 274.2, "valid_targets_min": 163 }, { "epoch": 5.896805896805897, "grad_norm": 1.2173332697983583, "learning_rate": 3.7795466398868885e-08, "loss": 0.0577, "loss_nan_ranks": 0, "loss_rank_avg": 0.04294394701719284, "step": 2400, "valid_targets_mean": 317.5, "valid_targets_min": 194 }, { "epoch": 5.909090909090909, "grad_norm": 1.252040183425749, "learning_rate": 2.9518885975192702e-08, "loss": 0.0564, "loss_nan_ranks": 0, "loss_rank_avg": 0.05098298192024231, "step": 2405, "valid_targets_mean": 286.9, "valid_targets_min": 179 }, { "epoch": 5.921375921375922, "grad_norm": 1.028221106209322, "learning_rate": 2.226316585833832e-08, "loss": 0.0585, "loss_nan_ranks": 0, "loss_rank_avg": 0.05046099051833153, "step": 2410, "valid_targets_mean": 321.4, "valid_targets_min": 196 }, { "epoch": 5.933660933660933, "grad_norm": 1.3163217772269284, "learning_rate": 1.6028676949570997e-08, "loss": 0.0606, "loss_nan_ranks": 0, "loss_rank_avg": 0.06754690408706665, "step": 2415, "valid_targets_mean": 257.9, "valid_targets_min": 183 }, { "epoch": 5.945945945945946, "grad_norm": 1.4672093994309394, "learning_rate": 1.0815737946383575e-08, "loss": 0.0577, "loss_nan_ranks": 0, "loss_rank_avg": 0.06537715345621109, "step": 2420, "valid_targets_mean": 264.1, "valid_targets_min": 188 }, { "epoch": 5.958230958230958, "grad_norm": 1.2220823128242213, "learning_rate": 6.624615326207284e-09, "loss": 0.0634, "loss_nan_ranks": 0, "loss_rank_avg": 0.06399784982204437, "step": 2425, "valid_targets_mean": 256.9, "valid_targets_min": 173 }, { "epoch": 5.9705159705159705, "grad_norm": 1.2651260380956104, "learning_rate": 3.4555233327893124e-09, "loss": 0.0597, "loss_nan_ranks": 0, "loss_rank_avg": 0.061722077429294586, "step": 2430, "valid_targets_mean": 272.8, "valid_targets_min": 195 }, { "epoch": 5.982800982800983, "grad_norm": 1.0193766389355692, "learning_rate": 1.3086239652415621e-09, "loss": 0.0534, "loss_nan_ranks": 0, "loss_rank_avg": 0.05171266198158264, "step": 2435, "valid_targets_mean": 315.3, "valid_targets_min": 203 }, { "epoch": 5.995085995085995, "grad_norm": 1.359012167945561, "learning_rate": 1.840269697628294e-10, "loss": 0.0583, "loss_nan_ranks": 0, "loss_rank_avg": 0.06471709907054901, "step": 2440, "valid_targets_mean": 265.4, "valid_targets_min": 167 }, { "epoch": 6.0, "loss_nan_ranks": 0, "loss_rank_avg": 0.06114219129085541, "step": 2442, "total_flos": 90716684943360.0, "train_loss": 0.13800091912434492, "train_runtime": 6754.6088, "train_samples_per_second": 5.777, "train_steps_per_second": 0.362, "valid_targets_mean": 336.2, "valid_targets_min": 197 } ], "logging_steps": 5, "max_steps": 2442, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 90716684943360.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }