penfever's picture
End of training
a0fa8b0 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 7.0,
"eval_steps": 500,
"global_step": 1379,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.025380710659898477,
"grad_norm": 8.746054092048038,
"learning_rate": 1.1594202898550726e-06,
"loss": 0.8448,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.46080702543258667,
"step": 5,
"valid_targets_mean": 3391.9,
"valid_targets_min": 986
},
{
"epoch": 0.050761421319796954,
"grad_norm": 8.271188445211049,
"learning_rate": 2.6086956521739132e-06,
"loss": 0.8643,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.522148847579956,
"step": 10,
"valid_targets_mean": 2433.4,
"valid_targets_min": 1473
},
{
"epoch": 0.07614213197969544,
"grad_norm": 4.513541603908623,
"learning_rate": 4.057971014492754e-06,
"loss": 0.8132,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.36975154280662537,
"step": 15,
"valid_targets_mean": 2374.4,
"valid_targets_min": 1212
},
{
"epoch": 0.10152284263959391,
"grad_norm": 2.9836745456488174,
"learning_rate": 5.507246376811595e-06,
"loss": 0.7417,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31274423003196716,
"step": 20,
"valid_targets_mean": 2366.5,
"valid_targets_min": 733
},
{
"epoch": 0.12690355329949238,
"grad_norm": 2.065127504471685,
"learning_rate": 6.956521739130435e-06,
"loss": 0.735,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3327901363372803,
"step": 25,
"valid_targets_mean": 2663.6,
"valid_targets_min": 1324
},
{
"epoch": 0.15228426395939088,
"grad_norm": 1.4643063861293897,
"learning_rate": 8.405797101449275e-06,
"loss": 0.6935,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.35331571102142334,
"step": 30,
"valid_targets_mean": 2389.2,
"valid_targets_min": 1040
},
{
"epoch": 0.17766497461928935,
"grad_norm": 1.3378539239384706,
"learning_rate": 9.855072463768118e-06,
"loss": 0.626,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29063376784324646,
"step": 35,
"valid_targets_mean": 2173.9,
"valid_targets_min": 1262
},
{
"epoch": 0.20304568527918782,
"grad_norm": 0.9665513277849,
"learning_rate": 1.1304347826086957e-05,
"loss": 0.6444,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21201933920383453,
"step": 40,
"valid_targets_mean": 2408.5,
"valid_targets_min": 1394
},
{
"epoch": 0.22842639593908629,
"grad_norm": 0.8987538258480249,
"learning_rate": 1.2753623188405797e-05,
"loss": 0.5968,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21246054768562317,
"step": 45,
"valid_targets_mean": 2066.0,
"valid_targets_min": 1067
},
{
"epoch": 0.25380710659898476,
"grad_norm": 0.9020500925340328,
"learning_rate": 1.420289855072464e-05,
"loss": 0.5712,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18394096195697784,
"step": 50,
"valid_targets_mean": 1613.1,
"valid_targets_min": 1058
},
{
"epoch": 0.27918781725888325,
"grad_norm": 0.8478913125784002,
"learning_rate": 1.565217391304348e-05,
"loss": 0.5423,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3302842974662781,
"step": 55,
"valid_targets_mean": 3342.5,
"valid_targets_min": 1549
},
{
"epoch": 0.30456852791878175,
"grad_norm": 1.5227996339853616,
"learning_rate": 1.710144927536232e-05,
"loss": 0.5736,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.34866365790367126,
"step": 60,
"valid_targets_mean": 2464.1,
"valid_targets_min": 250
},
{
"epoch": 0.3299492385786802,
"grad_norm": 0.9365313312981728,
"learning_rate": 1.8550724637681162e-05,
"loss": 0.5296,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3394869863986969,
"step": 65,
"valid_targets_mean": 2791.2,
"valid_targets_min": 984
},
{
"epoch": 0.3553299492385787,
"grad_norm": 0.8161086659169139,
"learning_rate": 2e-05,
"loss": 0.5151,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3421684801578522,
"step": 70,
"valid_targets_mean": 3055.1,
"valid_targets_min": 959
},
{
"epoch": 0.38071065989847713,
"grad_norm": 0.7755427124666795,
"learning_rate": 2.1449275362318844e-05,
"loss": 0.5267,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26595619320869446,
"step": 75,
"valid_targets_mean": 2610.0,
"valid_targets_min": 1367
},
{
"epoch": 0.40609137055837563,
"grad_norm": 0.8301275472311378,
"learning_rate": 2.2898550724637684e-05,
"loss": 0.534,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2656007409095764,
"step": 80,
"valid_targets_mean": 2173.9,
"valid_targets_min": 1076
},
{
"epoch": 0.43147208121827413,
"grad_norm": 0.7430341766697183,
"learning_rate": 2.4347826086956526e-05,
"loss": 0.5021,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20115895569324493,
"step": 85,
"valid_targets_mean": 2819.4,
"valid_targets_min": 1118
},
{
"epoch": 0.45685279187817257,
"grad_norm": 0.8667322471672515,
"learning_rate": 2.5797101449275362e-05,
"loss": 0.4829,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19809472560882568,
"step": 90,
"valid_targets_mean": 1588.8,
"valid_targets_min": 1052
},
{
"epoch": 0.48223350253807107,
"grad_norm": 0.8666224198147493,
"learning_rate": 2.7246376811594205e-05,
"loss": 0.4635,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23502573370933533,
"step": 95,
"valid_targets_mean": 2046.4,
"valid_targets_min": 1040
},
{
"epoch": 0.5076142131979695,
"grad_norm": 0.7163108956357823,
"learning_rate": 2.8695652173913044e-05,
"loss": 0.4353,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2423144280910492,
"step": 100,
"valid_targets_mean": 3183.2,
"valid_targets_min": 795
},
{
"epoch": 0.5329949238578681,
"grad_norm": 0.865566100726533,
"learning_rate": 3.0144927536231887e-05,
"loss": 0.4693,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15293586254119873,
"step": 105,
"valid_targets_mean": 1570.0,
"valid_targets_min": 443
},
{
"epoch": 0.5583756345177665,
"grad_norm": 0.9232612201171461,
"learning_rate": 3.1594202898550726e-05,
"loss": 0.4865,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24900048971176147,
"step": 110,
"valid_targets_mean": 2685.6,
"valid_targets_min": 1052
},
{
"epoch": 0.583756345177665,
"grad_norm": 0.7835292686525868,
"learning_rate": 3.304347826086957e-05,
"loss": 0.4911,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25293511152267456,
"step": 115,
"valid_targets_mean": 2541.1,
"valid_targets_min": 1027
},
{
"epoch": 0.6091370558375635,
"grad_norm": 0.7820807679495676,
"learning_rate": 3.449275362318841e-05,
"loss": 0.4888,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20399141311645508,
"step": 120,
"valid_targets_mean": 2334.9,
"valid_targets_min": 801
},
{
"epoch": 0.6345177664974619,
"grad_norm": 0.7995301620599062,
"learning_rate": 3.594202898550725e-05,
"loss": 0.4849,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23064780235290527,
"step": 125,
"valid_targets_mean": 2171.4,
"valid_targets_min": 360
},
{
"epoch": 0.6598984771573604,
"grad_norm": 0.6963743946680525,
"learning_rate": 3.739130434782609e-05,
"loss": 0.4522,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18462923169136047,
"step": 130,
"valid_targets_mean": 3114.9,
"valid_targets_min": 1203
},
{
"epoch": 0.6852791878172588,
"grad_norm": 0.8741657110892967,
"learning_rate": 3.884057971014493e-05,
"loss": 0.4803,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2790442705154419,
"step": 135,
"valid_targets_mean": 2300.5,
"valid_targets_min": 997
},
{
"epoch": 0.7106598984771574,
"grad_norm": 0.7921279308943474,
"learning_rate": 3.999993591506466e-05,
"loss": 0.4917,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24704962968826294,
"step": 140,
"valid_targets_mean": 2552.6,
"valid_targets_min": 1212
},
{
"epoch": 0.7360406091370558,
"grad_norm": 0.8137994242183286,
"learning_rate": 3.99976929854497e-05,
"loss": 0.4831,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26479896903038025,
"step": 145,
"valid_targets_mean": 2318.9,
"valid_targets_min": 1106
},
{
"epoch": 0.7614213197969543,
"grad_norm": 0.9619494319652759,
"learning_rate": 3.999224621974382e-05,
"loss": 0.4804,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21784883737564087,
"step": 150,
"valid_targets_mean": 1636.8,
"valid_targets_min": 293
},
{
"epoch": 0.7868020304568528,
"grad_norm": 0.8133912555983293,
"learning_rate": 3.9983596490574876e-05,
"loss": 0.4899,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25159645080566406,
"step": 155,
"valid_targets_mean": 2658.9,
"valid_targets_min": 1305
},
{
"epoch": 0.8121827411167513,
"grad_norm": 0.7469299189370396,
"learning_rate": 3.9971745183718484e-05,
"loss": 0.4689,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24954304099082947,
"step": 160,
"valid_targets_mean": 2515.0,
"valid_targets_min": 1564
},
{
"epoch": 0.8375634517766497,
"grad_norm": 0.8486207699746516,
"learning_rate": 3.995669419787586e-05,
"loss": 0.4777,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22933290898799896,
"step": 165,
"valid_targets_mean": 2142.1,
"valid_targets_min": 1260
},
{
"epoch": 0.8629441624365483,
"grad_norm": 0.7790368486491089,
"learning_rate": 3.9938445944369745e-05,
"loss": 0.447,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31065988540649414,
"step": 170,
"valid_targets_mean": 2739.6,
"valid_targets_min": 1702
},
{
"epoch": 0.8883248730964467,
"grad_norm": 0.877554911194639,
"learning_rate": 3.9917003346758035e-05,
"loss": 0.4579,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17315532267093658,
"step": 175,
"valid_targets_mean": 1833.6,
"valid_targets_min": 717
},
{
"epoch": 0.9137055837563451,
"grad_norm": 0.8295490474717458,
"learning_rate": 3.989236984036541e-05,
"loss": 0.452,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2310536652803421,
"step": 180,
"valid_targets_mean": 2141.0,
"valid_targets_min": 1027
},
{
"epoch": 0.9390862944162437,
"grad_norm": 0.841351322041739,
"learning_rate": 3.986454937173292e-05,
"loss": 0.4723,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2736300230026245,
"step": 185,
"valid_targets_mean": 2538.2,
"valid_targets_min": 664
},
{
"epoch": 0.9644670050761421,
"grad_norm": 0.8292630926233844,
"learning_rate": 3.98335463979858e-05,
"loss": 0.4487,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1952267736196518,
"step": 190,
"valid_targets_mean": 2048.0,
"valid_targets_min": 925
},
{
"epoch": 0.9898477157360406,
"grad_norm": 0.7874022860460002,
"learning_rate": 3.9799365886119304e-05,
"loss": 0.4553,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2375510036945343,
"step": 195,
"valid_targets_mean": 2771.0,
"valid_targets_min": 1014
},
{
"epoch": 1.015228426395939,
"grad_norm": 0.8281005262337682,
"learning_rate": 3.976201331220296e-05,
"loss": 0.4614,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24540838599205017,
"step": 200,
"valid_targets_mean": 2320.5,
"valid_targets_min": 743
},
{
"epoch": 1.0406091370558375,
"grad_norm": 0.863938516440108,
"learning_rate": 3.9721494660503295e-05,
"loss": 0.423,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2663731575012207,
"step": 205,
"valid_targets_mean": 2648.2,
"valid_targets_min": 1396
},
{
"epoch": 1.0659898477157361,
"grad_norm": 0.737462168238837,
"learning_rate": 3.9677816422525024e-05,
"loss": 0.4655,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22477290034294128,
"step": 210,
"valid_targets_mean": 3102.6,
"valid_targets_min": 1448
},
{
"epoch": 1.0913705583756346,
"grad_norm": 0.7157709835306627,
"learning_rate": 3.963098559597112e-05,
"loss": 0.401,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19973540306091309,
"step": 215,
"valid_targets_mean": 3456.8,
"valid_targets_min": 1023
},
{
"epoch": 1.116751269035533,
"grad_norm": 0.6962890917176653,
"learning_rate": 3.9581009683621634e-05,
"loss": 0.4255,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24212422966957092,
"step": 220,
"valid_targets_mean": 3592.0,
"valid_targets_min": 547
},
{
"epoch": 1.1421319796954315,
"grad_norm": 0.8386868847925464,
"learning_rate": 3.952789669213173e-05,
"loss": 0.461,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26728707551956177,
"step": 225,
"valid_targets_mean": 2497.1,
"valid_targets_min": 1037
},
{
"epoch": 1.16751269035533,
"grad_norm": 0.8271050112319727,
"learning_rate": 3.9471655130748894e-05,
"loss": 0.4043,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27514880895614624,
"step": 230,
"valid_targets_mean": 3284.0,
"valid_targets_min": 1193
},
{
"epoch": 1.1928934010152283,
"grad_norm": 0.6759784826022383,
"learning_rate": 3.9412294009949716e-05,
"loss": 0.4098,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18727204203605652,
"step": 235,
"valid_targets_mean": 3005.8,
"valid_targets_min": 1041
},
{
"epoch": 1.218274111675127,
"grad_norm": 0.817175374388556,
"learning_rate": 3.9349822839996266e-05,
"loss": 0.4323,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20295163989067078,
"step": 240,
"valid_targets_mean": 2283.8,
"valid_targets_min": 943
},
{
"epoch": 1.2436548223350254,
"grad_norm": 0.8097233905648007,
"learning_rate": 3.928425162941248e-05,
"loss": 0.4481,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23431196808815002,
"step": 245,
"valid_targets_mean": 2414.5,
"valid_targets_min": 1209
},
{
"epoch": 1.2690355329949239,
"grad_norm": 0.7813237824157772,
"learning_rate": 3.9215590883380687e-05,
"loss": 0.4234,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19021698832511902,
"step": 250,
"valid_targets_mean": 2019.9,
"valid_targets_min": 911
},
{
"epoch": 1.2944162436548223,
"grad_norm": 0.8647188948351984,
"learning_rate": 3.914385160205858e-05,
"loss": 0.4021,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2341812700033188,
"step": 255,
"valid_targets_mean": 2704.2,
"valid_targets_min": 1352
},
{
"epoch": 1.3197969543147208,
"grad_norm": 0.6904746968392748,
"learning_rate": 3.9069045278816844e-05,
"loss": 0.4236,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18902748823165894,
"step": 260,
"valid_targets_mean": 2808.2,
"valid_targets_min": 1029
},
{
"epoch": 1.3451776649746192,
"grad_norm": 0.8806548756271572,
"learning_rate": 3.899118389839785e-05,
"loss": 0.4181,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23270106315612793,
"step": 265,
"valid_targets_mean": 2471.4,
"valid_targets_min": 1637
},
{
"epoch": 1.3705583756345177,
"grad_norm": 0.8986724175664121,
"learning_rate": 3.8910279934995545e-05,
"loss": 0.4117,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2592482566833496,
"step": 270,
"valid_targets_mean": 2158.0,
"valid_targets_min": 1359
},
{
"epoch": 1.3959390862944163,
"grad_norm": 0.8693834754860535,
"learning_rate": 3.8826346350256943e-05,
"loss": 0.398,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15725184977054596,
"step": 275,
"valid_targets_mean": 1973.4,
"valid_targets_min": 260
},
{
"epoch": 1.4213197969543148,
"grad_norm": 0.9695877466874488,
"learning_rate": 3.873939659120558e-05,
"loss": 0.4151,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2322547733783722,
"step": 280,
"valid_targets_mean": 2277.5,
"valid_targets_min": 881
},
{
"epoch": 1.4467005076142132,
"grad_norm": 1.1408976363494896,
"learning_rate": 3.864944458808712e-05,
"loss": 0.4056,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20991933345794678,
"step": 285,
"valid_targets_mean": 2109.6,
"valid_targets_min": 338
},
{
"epoch": 1.4720812182741116,
"grad_norm": 0.7930406989370231,
"learning_rate": 3.855650475213761e-05,
"loss": 0.4243,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19697341322898865,
"step": 290,
"valid_targets_mean": 2129.9,
"valid_targets_min": 954
},
{
"epoch": 1.49746192893401,
"grad_norm": 0.7719998634448374,
"learning_rate": 3.846059197327466e-05,
"loss": 0.4011,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19249945878982544,
"step": 295,
"valid_targets_mean": 2426.6,
"valid_targets_min": 1026
},
{
"epoch": 1.5228426395939088,
"grad_norm": 1.038359126244796,
"learning_rate": 3.836172161771189e-05,
"loss": 0.4049,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19448643922805786,
"step": 300,
"valid_targets_mean": 2199.6,
"valid_targets_min": 1113
},
{
"epoch": 1.548223350253807,
"grad_norm": 0.8411192815275068,
"learning_rate": 3.8259909525497134e-05,
"loss": 0.3955,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23402699828147888,
"step": 305,
"valid_targets_mean": 2455.5,
"valid_targets_min": 1271
},
{
"epoch": 1.5736040609137056,
"grad_norm": 0.6971216673275156,
"learning_rate": 3.81551720079747e-05,
"loss": 0.4128,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20138466358184814,
"step": 310,
"valid_targets_mean": 2834.8,
"valid_targets_min": 664
},
{
"epoch": 1.598984771573604,
"grad_norm": 0.7718252836789953,
"learning_rate": 3.8047525845172104e-05,
"loss": 0.3937,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18358199298381805,
"step": 315,
"valid_targets_mean": 2064.4,
"valid_targets_min": 265
},
{
"epoch": 1.6243654822335025,
"grad_norm": 0.8717500738963505,
"learning_rate": 3.7936988283111764e-05,
"loss": 0.4031,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17218948900699615,
"step": 320,
"valid_targets_mean": 1914.4,
"valid_targets_min": 1034
},
{
"epoch": 1.649746192893401,
"grad_norm": 0.9254432850747054,
"learning_rate": 3.7823577031048e-05,
"loss": 0.4003,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24102842807769775,
"step": 325,
"valid_targets_mean": 1880.4,
"valid_targets_min": 1207
},
{
"epoch": 1.6751269035532994,
"grad_norm": 0.7161238412026414,
"learning_rate": 3.77073102586298e-05,
"loss": 0.4184,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19188612699508667,
"step": 330,
"valid_targets_mean": 2821.5,
"valid_targets_min": 1192
},
{
"epoch": 1.700507614213198,
"grad_norm": 0.7671504397949204,
"learning_rate": 3.758820659298991e-05,
"loss": 0.4147,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19509898126125336,
"step": 335,
"valid_targets_mean": 2548.6,
"valid_targets_min": 395
},
{
"epoch": 1.7258883248730963,
"grad_norm": 0.6536269958225315,
"learning_rate": 3.746628511576054e-05,
"loss": 0.4208,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21844618022441864,
"step": 340,
"valid_targets_mean": 3179.2,
"valid_targets_min": 787
},
{
"epoch": 1.751269035532995,
"grad_norm": 0.7979435685524106,
"learning_rate": 3.734156536001629e-05,
"loss": 0.4288,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15352977812290192,
"step": 345,
"valid_targets_mean": 1618.5,
"valid_targets_min": 663
},
{
"epoch": 1.7766497461928934,
"grad_norm": 0.7326788452634015,
"learning_rate": 3.721406730714476e-05,
"loss": 0.4035,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16227096319198608,
"step": 350,
"valid_targets_mean": 2581.4,
"valid_targets_min": 1239
},
{
"epoch": 1.8020304568527918,
"grad_norm": 0.8737447795021912,
"learning_rate": 3.7083811383645334e-05,
"loss": 0.4057,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2652243673801422,
"step": 355,
"valid_targets_mean": 2171.6,
"valid_targets_min": 840
},
{
"epoch": 1.8274111675126905,
"grad_norm": 0.8207080923695146,
"learning_rate": 3.695081845785663e-05,
"loss": 0.4183,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20286084711551666,
"step": 360,
"valid_targets_mean": 1936.5,
"valid_targets_min": 873
},
{
"epoch": 1.8527918781725887,
"grad_norm": 0.7029601914650748,
"learning_rate": 3.6815109836613165e-05,
"loss": 0.3992,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2135118693113327,
"step": 365,
"valid_targets_mean": 2853.2,
"valid_targets_min": 1190
},
{
"epoch": 1.8781725888324874,
"grad_norm": 0.7056348465600532,
"learning_rate": 3.6676707261831836e-05,
"loss": 0.3993,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21114632487297058,
"step": 370,
"valid_targets_mean": 3049.5,
"valid_targets_min": 1267
},
{
"epoch": 1.9035532994923858,
"grad_norm": 0.7655665068417371,
"learning_rate": 3.6535632907028566e-05,
"loss": 0.4198,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21775612235069275,
"step": 375,
"valid_targets_mean": 2581.8,
"valid_targets_min": 554
},
{
"epoch": 1.9289340101522843,
"grad_norm": 0.9215750903172012,
"learning_rate": 3.6391909373765944e-05,
"loss": 0.4097,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2149774134159088,
"step": 380,
"valid_targets_mean": 1916.9,
"valid_targets_min": 1348
},
{
"epoch": 1.9543147208121827,
"grad_norm": 0.7063465279362279,
"learning_rate": 3.6245559688032176e-05,
"loss": 0.3997,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16683170199394226,
"step": 385,
"valid_targets_mean": 2854.8,
"valid_targets_min": 1031
},
{
"epoch": 1.9796954314720812,
"grad_norm": 0.8653155796886824,
"learning_rate": 3.609660729655212e-05,
"loss": 0.4187,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18881481885910034,
"step": 390,
"valid_targets_mean": 1948.9,
"valid_targets_min": 1062
},
{
"epoch": 2.00507614213198,
"grad_norm": 0.8170991461484087,
"learning_rate": 3.5945076063030835e-05,
"loss": 0.3959,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1733715534210205,
"step": 395,
"valid_targets_mean": 1751.5,
"valid_targets_min": 921
},
{
"epoch": 2.030456852791878,
"grad_norm": 0.7475776291611619,
"learning_rate": 3.579099026433044e-05,
"loss": 0.3661,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19022664427757263,
"step": 400,
"valid_targets_mean": 2863.1,
"valid_targets_min": 1108
},
{
"epoch": 2.0558375634517767,
"grad_norm": 0.6491922016359316,
"learning_rate": 3.563437458658064e-05,
"loss": 0.364,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20632588863372803,
"step": 405,
"valid_targets_mean": 3949.6,
"valid_targets_min": 519
},
{
"epoch": 2.081218274111675,
"grad_norm": 1.0197660638706911,
"learning_rate": 3.547525412122378e-05,
"loss": 0.3568,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21689572930335999,
"step": 410,
"valid_targets_mean": 2777.9,
"valid_targets_min": 293
},
{
"epoch": 2.1065989847715736,
"grad_norm": 0.7134302398281508,
"learning_rate": 3.531365436099497e-05,
"loss": 0.361,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18497325479984283,
"step": 415,
"valid_targets_mean": 3421.8,
"valid_targets_min": 960
},
{
"epoch": 2.1319796954314723,
"grad_norm": 0.6857248384939708,
"learning_rate": 3.5149601195837815e-05,
"loss": 0.3541,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13612838089466095,
"step": 420,
"valid_targets_mean": 2547.1,
"valid_targets_min": 1322
},
{
"epoch": 2.1573604060913705,
"grad_norm": 0.8453797312754958,
"learning_rate": 3.498312090875667e-05,
"loss": 0.3589,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1847008466720581,
"step": 425,
"valid_targets_mean": 2348.5,
"valid_targets_min": 314
},
{
"epoch": 2.182741116751269,
"grad_norm": 0.7391501108956771,
"learning_rate": 3.481424017160574e-05,
"loss": 0.3631,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14230400323867798,
"step": 430,
"valid_targets_mean": 2258.4,
"valid_targets_min": 1001
},
{
"epoch": 2.2081218274111674,
"grad_norm": 0.7770616989599075,
"learning_rate": 3.464298604081607e-05,
"loss": 0.3753,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1601681113243103,
"step": 435,
"valid_targets_mean": 2456.1,
"valid_targets_min": 783
},
{
"epoch": 2.233502538071066,
"grad_norm": 0.7765882607320425,
"learning_rate": 3.4469385953060715e-05,
"loss": 0.355,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17671945691108704,
"step": 440,
"valid_targets_mean": 3366.9,
"valid_targets_min": 299
},
{
"epoch": 2.2588832487309647,
"grad_norm": 0.8902139321477067,
"learning_rate": 3.429346772085923e-05,
"loss": 0.3607,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16170795261859894,
"step": 445,
"valid_targets_mean": 1621.0,
"valid_targets_min": 974
},
{
"epoch": 2.284263959390863,
"grad_norm": 0.8702764129458472,
"learning_rate": 3.4115259528121685e-05,
"loss": 0.3566,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15190370380878448,
"step": 450,
"valid_targets_mean": 1681.5,
"valid_targets_min": 386
},
{
"epoch": 2.3096446700507616,
"grad_norm": 0.7232965659119709,
"learning_rate": 3.3934789925633426e-05,
"loss": 0.3471,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15331853926181793,
"step": 455,
"valid_targets_mean": 2754.1,
"valid_targets_min": 1361
},
{
"epoch": 2.33502538071066,
"grad_norm": 0.9155170217649454,
"learning_rate": 3.37520878264809e-05,
"loss": 0.3891,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20819316804409027,
"step": 460,
"valid_targets_mean": 2690.5,
"valid_targets_min": 1071
},
{
"epoch": 2.3604060913705585,
"grad_norm": 0.8781514672252451,
"learning_rate": 3.356718250141945e-05,
"loss": 0.3724,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.134077787399292,
"step": 465,
"valid_targets_mean": 2086.6,
"valid_targets_min": 1178
},
{
"epoch": 2.3857868020304567,
"grad_norm": 0.7830372566270385,
"learning_rate": 3.33801035741839e-05,
"loss": 0.3503,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18803343176841736,
"step": 470,
"valid_targets_mean": 2633.1,
"valid_targets_min": 1325
},
{
"epoch": 2.4111675126903553,
"grad_norm": 0.8576354377653381,
"learning_rate": 3.3190881016742476e-05,
"loss": 0.3606,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20815789699554443,
"step": 475,
"valid_targets_mean": 2486.2,
"valid_targets_min": 1207
},
{
"epoch": 2.436548223350254,
"grad_norm": 1.5435968887704627,
"learning_rate": 3.2999545144495037e-05,
"loss": 0.3529,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.161598339676857,
"step": 480,
"valid_targets_mean": 1942.1,
"valid_targets_min": 1065
},
{
"epoch": 2.4619289340101522,
"grad_norm": 0.811888246473812,
"learning_rate": 3.280612661141615e-05,
"loss": 0.3768,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19060321152210236,
"step": 485,
"valid_targets_mean": 2876.8,
"valid_targets_min": 730
},
{
"epoch": 2.487309644670051,
"grad_norm": 0.8786943381528795,
"learning_rate": 3.2610656405144155e-05,
"loss": 0.3798,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15384045243263245,
"step": 490,
"valid_targets_mean": 2182.9,
"valid_targets_min": 991
},
{
"epoch": 2.512690355329949,
"grad_norm": 0.820490729426486,
"learning_rate": 3.241316584201647e-05,
"loss": 0.3505,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17183181643486023,
"step": 495,
"valid_targets_mean": 2279.4,
"valid_targets_min": 1108
},
{
"epoch": 2.5380710659898478,
"grad_norm": 0.9204652322698659,
"learning_rate": 3.2213686562052474e-05,
"loss": 0.3732,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1887807548046112,
"step": 500,
"valid_targets_mean": 2191.2,
"valid_targets_min": 1041
},
{
"epoch": 2.563451776649746,
"grad_norm": 0.9465490176668487,
"learning_rate": 3.201225052388446e-05,
"loss": 0.334,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22429196536540985,
"step": 505,
"valid_targets_mean": 3044.9,
"valid_targets_min": 1027
},
{
"epoch": 2.5888324873096447,
"grad_norm": 0.9526205216743764,
"learning_rate": 3.1808889999637496e-05,
"loss": 0.3468,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20097972452640533,
"step": 510,
"valid_targets_mean": 1904.6,
"valid_targets_min": 520
},
{
"epoch": 2.6142131979695433,
"grad_norm": 0.729520567033627,
"learning_rate": 3.16036375697591e-05,
"loss": 0.3585,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16022509336471558,
"step": 515,
"valid_targets_mean": 2627.0,
"valid_targets_min": 989
},
{
"epoch": 2.6395939086294415,
"grad_norm": 1.3536946216616152,
"learning_rate": 3.1396526117799557e-05,
"loss": 0.3765,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18466079235076904,
"step": 520,
"valid_targets_mean": 2746.9,
"valid_targets_min": 1103
},
{
"epoch": 2.66497461928934,
"grad_norm": 0.7475141562550974,
"learning_rate": 3.1187588825143596e-05,
"loss": 0.3718,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15916241705417633,
"step": 525,
"valid_targets_mean": 2781.8,
"valid_targets_min": 1013
},
{
"epoch": 2.6903553299492384,
"grad_norm": 0.8623691385097324,
"learning_rate": 3.097685916569439e-05,
"loss": 0.3475,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15874391794204712,
"step": 530,
"valid_targets_mean": 1746.9,
"valid_targets_min": 960
},
{
"epoch": 2.715736040609137,
"grad_norm": 0.8434226552181769,
"learning_rate": 3.076437090051073e-05,
"loss": 0.3541,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1512419879436493,
"step": 535,
"valid_targets_mean": 1490.5,
"valid_targets_min": 615
},
{
"epoch": 2.7411167512690353,
"grad_norm": 0.8209590984528506,
"learning_rate": 3.0550158072398125e-05,
"loss": 0.3551,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17597289383411407,
"step": 540,
"valid_targets_mean": 2720.9,
"valid_targets_min": 1163
},
{
"epoch": 2.766497461928934,
"grad_norm": 0.7574051365860957,
"learning_rate": 3.0334255000454795e-05,
"loss": 0.3348,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20840352773666382,
"step": 545,
"valid_targets_mean": 3306.6,
"valid_targets_min": 1083
},
{
"epoch": 2.7918781725888326,
"grad_norm": 0.8006767917594756,
"learning_rate": 3.011669627457341e-05,
"loss": 0.3625,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18542571365833282,
"step": 550,
"valid_targets_mean": 2468.4,
"valid_targets_min": 1178
},
{
"epoch": 2.817258883248731,
"grad_norm": 0.9470419157549197,
"learning_rate": 2.989751674989943e-05,
"loss": 0.3654,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14285054802894592,
"step": 555,
"valid_targets_mean": 1392.9,
"valid_targets_min": 891
},
{
"epoch": 2.8426395939086295,
"grad_norm": 0.8645437304433551,
"learning_rate": 2.967675154124696e-05,
"loss": 0.3818,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19589808583259583,
"step": 560,
"valid_targets_mean": 2538.4,
"valid_targets_min": 866
},
{
"epoch": 2.868020304568528,
"grad_norm": 0.7250404475202903,
"learning_rate": 2.945443601747297e-05,
"loss": 0.3504,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14852246642112732,
"step": 565,
"valid_targets_mean": 2435.1,
"valid_targets_min": 654
},
{
"epoch": 2.8934010152284264,
"grad_norm": 0.7824384771297894,
"learning_rate": 2.923060579581087e-05,
"loss": 0.3567,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15607652068138123,
"step": 570,
"valid_targets_mean": 2221.4,
"valid_targets_min": 925
},
{
"epoch": 2.9187817258883246,
"grad_norm": 0.8509884081444614,
"learning_rate": 2.9005296736164246e-05,
"loss": 0.3791,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1786184310913086,
"step": 575,
"valid_targets_mean": 1949.9,
"valid_targets_min": 417
},
{
"epoch": 2.9441624365482233,
"grad_norm": 0.8941701590016254,
"learning_rate": 2.8778544935361742e-05,
"loss": 0.3747,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17315533757209778,
"step": 580,
"valid_targets_mean": 2073.0,
"valid_targets_min": 1230
},
{
"epoch": 2.969543147208122,
"grad_norm": 0.7031934956461914,
"learning_rate": 2.855038672137396e-05,
"loss": 0.3458,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1579364687204361,
"step": 585,
"valid_targets_mean": 3130.5,
"valid_targets_min": 1172
},
{
"epoch": 2.99492385786802,
"grad_norm": 0.783365609912982,
"learning_rate": 2.8320858647493374e-05,
"loss": 0.3659,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15455904603004456,
"step": 590,
"valid_targets_mean": 3087.0,
"valid_targets_min": 1488
},
{
"epoch": 3.020304568527919,
"grad_norm": 0.6900709567681671,
"learning_rate": 2.8089997486478102e-05,
"loss": 0.3371,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15949803590774536,
"step": 595,
"valid_targets_mean": 3190.9,
"valid_targets_min": 891
},
{
"epoch": 3.045685279187817,
"grad_norm": 1.0051215762468377,
"learning_rate": 2.785784022466053e-05,
"loss": 0.3112,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17178702354431152,
"step": 600,
"valid_targets_mean": 2351.5,
"valid_targets_min": 1187
},
{
"epoch": 3.0710659898477157,
"grad_norm": 0.923519604987137,
"learning_rate": 2.7624424056021707e-05,
"loss": 0.3069,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15625596046447754,
"step": 605,
"valid_targets_mean": 1801.5,
"valid_targets_min": 1014
},
{
"epoch": 3.0964467005076144,
"grad_norm": 0.7776067525143058,
"learning_rate": 2.738978637623252e-05,
"loss": 0.3277,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13861015439033508,
"step": 610,
"valid_targets_mean": 2401.6,
"valid_targets_min": 990
},
{
"epoch": 3.1218274111675126,
"grad_norm": 0.816907993425409,
"learning_rate": 2.7153964776662517e-05,
"loss": 0.2997,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16079218685626984,
"step": 615,
"valid_targets_mean": 2913.8,
"valid_targets_min": 1176
},
{
"epoch": 3.1472081218274113,
"grad_norm": 0.9941961064238964,
"learning_rate": 2.691699703835733e-05,
"loss": 0.3111,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.138283371925354,
"step": 620,
"valid_targets_mean": 1757.9,
"valid_targets_min": 1172
},
{
"epoch": 3.1725888324873095,
"grad_norm": 0.8805394597273256,
"learning_rate": 2.6678921125985845e-05,
"loss": 0.3162,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17877605557441711,
"step": 625,
"valid_targets_mean": 2323.0,
"valid_targets_min": 299
},
{
"epoch": 3.197969543147208,
"grad_norm": 0.8327034844764604,
"learning_rate": 2.6439775181757806e-05,
"loss": 0.3114,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1432086080312729,
"step": 630,
"valid_targets_mean": 2427.1,
"valid_targets_min": 832
},
{
"epoch": 3.223350253807107,
"grad_norm": 0.8621722670607938,
"learning_rate": 2.6199597519313092e-05,
"loss": 0.3166,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1823379099369049,
"step": 635,
"valid_targets_mean": 2864.4,
"valid_targets_min": 1810
},
{
"epoch": 3.248730964467005,
"grad_norm": 0.9101400606749903,
"learning_rate": 2.5958426617583417e-05,
"loss": 0.3395,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16888044774532318,
"step": 640,
"valid_targets_mean": 2621.1,
"valid_targets_min": 1312
},
{
"epoch": 3.2741116751269037,
"grad_norm": 0.8038426630515539,
"learning_rate": 2.5716301114627663e-05,
"loss": 0.3072,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1380758136510849,
"step": 645,
"valid_targets_mean": 2481.5,
"valid_targets_min": 1006
},
{
"epoch": 3.299492385786802,
"grad_norm": 0.903322089787113,
"learning_rate": 2.5473259801441663e-05,
"loss": 0.3132,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15146201848983765,
"step": 650,
"valid_targets_mean": 2501.9,
"valid_targets_min": 395
},
{
"epoch": 3.3248730964467006,
"grad_norm": 0.8736692357160077,
"learning_rate": 2.5229341615743423e-05,
"loss": 0.3155,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2004031538963318,
"step": 655,
"valid_targets_mean": 2913.1,
"valid_targets_min": 1399
},
{
"epoch": 3.350253807106599,
"grad_norm": 0.9728468176146295,
"learning_rate": 2.4984585635734995e-05,
"loss": 0.325,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15162807703018188,
"step": 660,
"valid_targets_mean": 1768.1,
"valid_targets_min": 1212
},
{
"epoch": 3.3756345177664975,
"grad_norm": 0.8778493869779497,
"learning_rate": 2.4739031073841652e-05,
"loss": 0.3126,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15126122534275055,
"step": 665,
"valid_targets_mean": 2423.2,
"valid_targets_min": 1162
},
{
"epoch": 3.401015228426396,
"grad_norm": 0.7552014337560956,
"learning_rate": 2.4492717270429736e-05,
"loss": 0.3149,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12232430279254913,
"step": 670,
"valid_targets_mean": 2353.0,
"valid_targets_min": 717
},
{
"epoch": 3.4263959390862944,
"grad_norm": 0.8762298758120327,
"learning_rate": 2.424568368750385e-05,
"loss": 0.3321,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16355693340301514,
"step": 675,
"valid_targets_mean": 2302.6,
"valid_targets_min": 1026
},
{
"epoch": 3.451776649746193,
"grad_norm": 0.8036994015284806,
"learning_rate": 2.3997969902384722e-05,
"loss": 0.3168,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12401950359344482,
"step": 680,
"valid_targets_mean": 2053.5,
"valid_targets_min": 978
},
{
"epoch": 3.4771573604060912,
"grad_norm": 0.7832269547659961,
"learning_rate": 2.3749615601368434e-05,
"loss": 0.309,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1947115808725357,
"step": 685,
"valid_targets_mean": 3647.2,
"valid_targets_min": 2220
},
{
"epoch": 3.50253807106599,
"grad_norm": 0.9751722132017385,
"learning_rate": 2.3500660573368305e-05,
"loss": 0.3173,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16104276478290558,
"step": 690,
"valid_targets_mean": 2197.1,
"valid_targets_min": 993
},
{
"epoch": 3.527918781725888,
"grad_norm": 0.8627897598541073,
"learning_rate": 2.3251144703540313e-05,
"loss": 0.3074,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15009143948554993,
"step": 695,
"valid_targets_mean": 1942.6,
"valid_targets_min": 1051
},
{
"epoch": 3.553299492385787,
"grad_norm": 0.8531625959139169,
"learning_rate": 2.3001107966893054e-05,
"loss": 0.3152,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1586218774318695,
"step": 700,
"valid_targets_mean": 2793.2,
"valid_targets_min": 1260
},
{
"epoch": 3.5786802030456855,
"grad_norm": 0.9602749420779956,
"learning_rate": 2.2750590421883348e-05,
"loss": 0.3211,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1476803421974182,
"step": 705,
"valid_targets_mean": 1785.0,
"valid_targets_min": 1266
},
{
"epoch": 3.6040609137055837,
"grad_norm": 0.785528907194096,
"learning_rate": 2.2499632203998454e-05,
"loss": 0.292,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17573554813861847,
"step": 710,
"valid_targets_mean": 3039.0,
"valid_targets_min": 1455
},
{
"epoch": 3.6294416243654823,
"grad_norm": 0.8225218380412574,
"learning_rate": 2.224827351932596e-05,
"loss": 0.3247,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13142558932304382,
"step": 715,
"valid_targets_mean": 2333.9,
"valid_targets_min": 1222
},
{
"epoch": 3.6548223350253806,
"grad_norm": 0.7879515274116892,
"learning_rate": 2.1996554638112362e-05,
"loss": 0.3082,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11903517693281174,
"step": 720,
"valid_targets_mean": 1829.2,
"valid_targets_min": 1143
},
{
"epoch": 3.6802030456852792,
"grad_norm": 1.1349204285534138,
"learning_rate": 2.174451588831134e-05,
"loss": 0.3192,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2044743299484253,
"step": 725,
"valid_targets_mean": 1458.9,
"valid_targets_min": 329
},
{
"epoch": 3.7055837563451774,
"grad_norm": 0.9017031039608042,
"learning_rate": 2.1492197649122794e-05,
"loss": 0.347,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20912542939186096,
"step": 730,
"valid_targets_mean": 2457.0,
"valid_targets_min": 1013
},
{
"epoch": 3.730964467005076,
"grad_norm": 0.8310776673814788,
"learning_rate": 2.1239640344523735e-05,
"loss": 0.3118,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16861245036125183,
"step": 735,
"valid_targets_mean": 2512.9,
"valid_targets_min": 801
},
{
"epoch": 3.7563451776649748,
"grad_norm": 0.9006914546413552,
"learning_rate": 2.0986884436791875e-05,
"loss": 0.3273,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11753588914871216,
"step": 740,
"valid_targets_mean": 1797.5,
"valid_targets_min": 292
},
{
"epoch": 3.781725888324873,
"grad_norm": 0.7756837667890839,
"learning_rate": 2.073397042002322e-05,
"loss": 0.307,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1464722752571106,
"step": 745,
"valid_targets_mean": 2359.9,
"valid_targets_min": 1051
},
{
"epoch": 3.8071065989847717,
"grad_norm": 0.769625181048491,
"learning_rate": 2.0480938813644443e-05,
"loss": 0.3049,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13366997241973877,
"step": 750,
"valid_targets_mean": 2627.0,
"valid_targets_min": 1478
},
{
"epoch": 3.8324873096446703,
"grad_norm": 1.0106054647061928,
"learning_rate": 2.022783015592132e-05,
"loss": 0.3286,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17920148372650146,
"step": 755,
"valid_targets_mean": 1909.8,
"valid_targets_min": 1184
},
{
"epoch": 3.8578680203045685,
"grad_norm": 0.8497529943702222,
"learning_rate": 1.9974684997463986e-05,
"loss": 0.3126,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17976191639900208,
"step": 760,
"valid_targets_mean": 2787.2,
"valid_targets_min": 1320
},
{
"epoch": 3.8832487309644668,
"grad_norm": 0.8061578083088016,
"learning_rate": 1.9721543894730428e-05,
"loss": 0.3261,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16275309026241302,
"step": 765,
"valid_targets_mean": 2191.4,
"valid_targets_min": 947
},
{
"epoch": 3.9086294416243654,
"grad_norm": 0.8679040422902907,
"learning_rate": 1.946844740352883e-05,
"loss": 0.3214,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1434512734413147,
"step": 770,
"valid_targets_mean": 1871.5,
"valid_targets_min": 293
},
{
"epoch": 3.934010152284264,
"grad_norm": 0.8967859956485915,
"learning_rate": 1.9215436072520167e-05,
"loss": 0.3225,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16888327896595,
"step": 775,
"valid_targets_mean": 2496.9,
"valid_targets_min": 834
},
{
"epoch": 3.9593908629441623,
"grad_norm": 0.9420456448588729,
"learning_rate": 1.8962550436721867e-05,
"loss": 0.3207,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1651691496372223,
"step": 780,
"valid_targets_mean": 1959.6,
"valid_targets_min": 1067
},
{
"epoch": 3.984771573604061,
"grad_norm": 0.8058439772758408,
"learning_rate": 1.8709831011013678e-05,
"loss": 0.2859,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12959708273410797,
"step": 785,
"valid_targets_mean": 2221.1,
"valid_targets_min": 1226
},
{
"epoch": 4.01015228426396,
"grad_norm": 0.7923362785700253,
"learning_rate": 1.8457318283646814e-05,
"loss": 0.3028,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14751429855823517,
"step": 790,
"valid_targets_mean": 3143.8,
"valid_targets_min": 1676
},
{
"epoch": 4.035532994923858,
"grad_norm": 0.8817490825267614,
"learning_rate": 1.8205052709757263e-05,
"loss": 0.2866,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12734201550483704,
"step": 795,
"valid_targets_mean": 2243.2,
"valid_targets_min": 1106
},
{
"epoch": 4.060913705583756,
"grad_norm": 0.811564426430684,
"learning_rate": 1.79530747048845e-05,
"loss": 0.288,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1372891068458557,
"step": 800,
"valid_targets_mean": 3005.8,
"valid_targets_min": 846
},
{
"epoch": 4.086294416243655,
"grad_norm": 1.0172125663810014,
"learning_rate": 1.7701424638496473e-05,
"loss": 0.2697,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17663919925689697,
"step": 805,
"valid_targets_mean": 2413.2,
"valid_targets_min": 879
},
{
"epoch": 4.111675126903553,
"grad_norm": 0.9356174719933191,
"learning_rate": 1.7450142827522027e-05,
"loss": 0.2927,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17932286858558655,
"step": 810,
"valid_targets_mean": 2165.0,
"valid_targets_min": 1046
},
{
"epoch": 4.137055837563452,
"grad_norm": 0.9261663724619607,
"learning_rate": 1.719926952989169e-05,
"loss": 0.2745,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1279735565185547,
"step": 815,
"valid_targets_mean": 2310.0,
"valid_targets_min": 482
},
{
"epoch": 4.16243654822335,
"grad_norm": 0.9696079776656041,
"learning_rate": 1.694884493808795e-05,
"loss": 0.2827,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16480299830436707,
"step": 820,
"valid_targets_mean": 2405.8,
"valid_targets_min": 1108
},
{
"epoch": 4.187817258883249,
"grad_norm": 0.9220163515345636,
"learning_rate": 1.6698909172706e-05,
"loss": 0.2767,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1316625326871872,
"step": 825,
"valid_targets_mean": 2018.5,
"valid_targets_min": 1319
},
{
"epoch": 4.213197969543147,
"grad_norm": 0.9076143574371879,
"learning_rate": 1.644950227602605e-05,
"loss": 0.2829,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1429370939731598,
"step": 830,
"valid_targets_mean": 2114.5,
"valid_targets_min": 1262
},
{
"epoch": 4.238578680203045,
"grad_norm": 0.9803947236168491,
"learning_rate": 1.620066420559805e-05,
"loss": 0.2632,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12085558474063873,
"step": 835,
"valid_targets_mean": 1590.6,
"valid_targets_min": 849
},
{
"epoch": 4.2639593908629445,
"grad_norm": 1.0298161787059386,
"learning_rate": 1.5952434827840187e-05,
"loss": 0.2929,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1661471277475357,
"step": 840,
"valid_targets_mean": 2170.0,
"valid_targets_min": 1001
},
{
"epoch": 4.289340101522843,
"grad_norm": 0.9569708540260855,
"learning_rate": 1.5704853911651777e-05,
"loss": 0.2714,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1327221393585205,
"step": 845,
"valid_targets_mean": 2563.0,
"valid_targets_min": 1147
},
{
"epoch": 4.314720812182741,
"grad_norm": 0.8053576371393363,
"learning_rate": 1.545796112204196e-05,
"loss": 0.2672,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07763060182332993,
"step": 850,
"valid_targets_mean": 1756.2,
"valid_targets_min": 498
},
{
"epoch": 4.340101522842639,
"grad_norm": 0.9055675412263313,
"learning_rate": 1.5211796013774893e-05,
"loss": 0.2767,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14815682172775269,
"step": 855,
"valid_targets_mean": 2407.4,
"valid_targets_min": 959
},
{
"epoch": 4.365482233502538,
"grad_norm": 0.848903444043607,
"learning_rate": 1.4966398025032706e-05,
"loss": 0.2864,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14398732781410217,
"step": 860,
"valid_targets_mean": 2603.8,
"valid_targets_min": 1046
},
{
"epoch": 4.3908629441624365,
"grad_norm": 0.8069661195210427,
"learning_rate": 1.4721806471097104e-05,
"loss": 0.2711,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21310167014598846,
"step": 865,
"valid_targets_mean": 3781.8,
"valid_targets_min": 1261
},
{
"epoch": 4.416243654822335,
"grad_norm": 0.8380373426685702,
"learning_rate": 1.4478060538050622e-05,
"loss": 0.3041,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11656267940998077,
"step": 870,
"valid_targets_mean": 2404.8,
"valid_targets_min": 1298
},
{
"epoch": 4.441624365482234,
"grad_norm": 0.9172317833122811,
"learning_rate": 1.4235199276498652e-05,
"loss": 0.2762,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14826105535030365,
"step": 875,
"valid_targets_mean": 2460.0,
"valid_targets_min": 1559
},
{
"epoch": 4.467005076142132,
"grad_norm": 0.8926977376205748,
"learning_rate": 1.3993261595313094e-05,
"loss": 0.2726,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16165882349014282,
"step": 880,
"valid_targets_mean": 2430.6,
"valid_targets_min": 1124
},
{
"epoch": 4.49238578680203,
"grad_norm": 0.9194576389085425,
"learning_rate": 1.3752286255398794e-05,
"loss": 0.2927,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14470508694648743,
"step": 885,
"valid_targets_mean": 2208.0,
"valid_targets_min": 1093
},
{
"epoch": 4.517766497461929,
"grad_norm": 1.0465128762566749,
"learning_rate": 1.3512311863483606e-05,
"loss": 0.2886,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1443568915128708,
"step": 890,
"valid_targets_mean": 1943.1,
"valid_targets_min": 1047
},
{
"epoch": 4.543147208121828,
"grad_norm": 0.9517867035108439,
"learning_rate": 1.3273376865933236e-05,
"loss": 0.2746,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13925078511238098,
"step": 895,
"valid_targets_mean": 2709.4,
"valid_targets_min": 1132
},
{
"epoch": 4.568527918781726,
"grad_norm": 1.0210841427352948,
"learning_rate": 1.303551954259172e-05,
"loss": 0.263,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11771374195814133,
"step": 900,
"valid_targets_mean": 1918.6,
"valid_targets_min": 281
},
{
"epoch": 4.593908629441624,
"grad_norm": 1.0680504135482736,
"learning_rate": 1.2798778000648602e-05,
"loss": 0.305,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16731064021587372,
"step": 905,
"valid_targets_mean": 1758.8,
"valid_targets_min": 663
},
{
"epoch": 4.619289340101523,
"grad_norm": 0.9639049022822739,
"learning_rate": 1.2563190168533766e-05,
"loss": 0.2869,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16094887256622314,
"step": 910,
"valid_targets_mean": 2596.8,
"valid_targets_min": 1077
},
{
"epoch": 4.644670050761421,
"grad_norm": 1.0257145278211706,
"learning_rate": 1.2328793789840926e-05,
"loss": 0.2811,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13257353007793427,
"step": 915,
"valid_targets_mean": 2801.4,
"valid_targets_min": 868
},
{
"epoch": 4.67005076142132,
"grad_norm": 0.7146965006699328,
"learning_rate": 1.2095626417280686e-05,
"loss": 0.2464,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10758645832538605,
"step": 920,
"valid_targets_mean": 3030.4,
"valid_targets_min": 779
},
{
"epoch": 4.695431472081218,
"grad_norm": 0.922142129356091,
"learning_rate": 1.1863725406664241e-05,
"loss": 0.2708,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12253784388303757,
"step": 925,
"valid_targets_mean": 2085.0,
"valid_targets_min": 1042
},
{
"epoch": 4.720812182741117,
"grad_norm": 0.9829158703940855,
"learning_rate": 1.163312791091858e-05,
"loss": 0.276,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11979790776968002,
"step": 930,
"valid_targets_mean": 2673.2,
"valid_targets_min": 1178
},
{
"epoch": 4.746192893401015,
"grad_norm": 0.9917842043347582,
"learning_rate": 1.1403870874134192e-05,
"loss": 0.2969,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2870727777481079,
"step": 935,
"valid_targets_mean": 3639.1,
"valid_targets_min": 1065
},
{
"epoch": 4.771573604060913,
"grad_norm": 0.7355057489858818,
"learning_rate": 1.1175991025646267e-05,
"loss": 0.2586,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13370351493358612,
"step": 940,
"valid_targets_mean": 3761.0,
"valid_targets_min": 1076
},
{
"epoch": 4.7969543147208125,
"grad_norm": 0.9337802455576882,
"learning_rate": 1.0949524874150246e-05,
"loss": 0.2818,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1805444359779358,
"step": 945,
"valid_targets_mean": 2492.8,
"valid_targets_min": 997
},
{
"epoch": 4.822335025380711,
"grad_norm": 1.0058650819266446,
"learning_rate": 1.0724508701852807e-05,
"loss": 0.2645,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12135922163724899,
"step": 950,
"valid_targets_mean": 2615.0,
"valid_targets_min": 1013
},
{
"epoch": 4.847715736040609,
"grad_norm": 0.887746711444636,
"learning_rate": 1.0500978558659001e-05,
"loss": 0.2732,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11861827224493027,
"step": 955,
"valid_targets_mean": 2106.6,
"valid_targets_min": 1026
},
{
"epoch": 4.873096446700508,
"grad_norm": 0.8819049641510628,
"learning_rate": 1.0278970256396764e-05,
"loss": 0.2978,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15796110033988953,
"step": 960,
"valid_targets_mean": 2376.9,
"valid_targets_min": 1021
},
{
"epoch": 4.898477157360406,
"grad_norm": 1.0905411737905097,
"learning_rate": 1.0058519363079464e-05,
"loss": 0.29,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16498346626758575,
"step": 965,
"valid_targets_mean": 1871.6,
"valid_targets_min": 1180
},
{
"epoch": 4.9238578680203045,
"grad_norm": 0.900093426991199,
"learning_rate": 9.839661197207527e-06,
"loss": 0.2863,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12465168535709381,
"step": 970,
"valid_targets_mean": 2361.0,
"valid_targets_min": 1292
},
{
"epoch": 4.949238578680203,
"grad_norm": 1.0139948291819079,
"learning_rate": 9.622430822110063e-06,
"loss": 0.306,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1524374783039093,
"step": 975,
"valid_targets_mean": 1875.8,
"valid_targets_min": 990
},
{
"epoch": 4.974619289340102,
"grad_norm": 1.0449824510291859,
"learning_rate": 9.40686304032735e-06,
"loss": 0.2732,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1726701259613037,
"step": 980,
"valid_targets_mean": 2441.1,
"valid_targets_min": 2117
},
{
"epoch": 5.0,
"grad_norm": 0.9373152237759808,
"learning_rate": 9.19299238803515e-06,
"loss": 0.2712,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11312133818864822,
"step": 985,
"valid_targets_mean": 1897.9,
"valid_targets_min": 1095
},
{
"epoch": 5.025380710659898,
"grad_norm": 0.7926669218335288,
"learning_rate": 8.980853129511584e-06,
"loss": 0.2557,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15544377267360687,
"step": 990,
"valid_targets_mean": 3132.6,
"valid_targets_min": 1287
},
{
"epoch": 5.050761421319797,
"grad_norm": 0.9251745469506645,
"learning_rate": 8.770479251647708e-06,
"loss": 0.2549,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10877332836389542,
"step": 995,
"valid_targets_mean": 2343.2,
"valid_targets_min": 977
},
{
"epoch": 5.0761421319796955,
"grad_norm": 0.9272380990444373,
"learning_rate": 8.561904458502424e-06,
"loss": 0.2553,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16089127957820892,
"step": 1000,
"valid_targets_mean": 2838.4,
"valid_targets_min": 997
},
{
"epoch": 5.101522842639594,
"grad_norm": 0.9657786473953105,
"learning_rate": 8.355162165902785e-06,
"loss": 0.2701,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11014437675476074,
"step": 1005,
"valid_targets_mean": 2422.8,
"valid_targets_min": 508
},
{
"epoch": 5.126903553299492,
"grad_norm": 1.1440481143388213,
"learning_rate": 8.150285496090388e-06,
"loss": 0.2546,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11987794190645218,
"step": 1010,
"valid_targets_mean": 1924.6,
"valid_targets_min": 1013
},
{
"epoch": 5.152284263959391,
"grad_norm": 0.9481516578247208,
"learning_rate": 7.947307272414874e-06,
"loss": 0.2666,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12054525315761566,
"step": 1015,
"valid_targets_mean": 2759.8,
"valid_targets_min": 1979
},
{
"epoch": 5.177664974619289,
"grad_norm": 0.8900118367067522,
"learning_rate": 7.746260014075293e-06,
"loss": 0.2658,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1677006185054779,
"step": 1020,
"valid_targets_mean": 3171.0,
"valid_targets_min": 1586
},
{
"epoch": 5.2030456852791875,
"grad_norm": 0.9446860024372655,
"learning_rate": 7.547175930910187e-06,
"loss": 0.2772,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12553545832633972,
"step": 1025,
"valid_targets_mean": 2253.2,
"valid_targets_min": 989
},
{
"epoch": 5.228426395939087,
"grad_norm": 1.0092812527349864,
"learning_rate": 7.350086918237238e-06,
"loss": 0.2441,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13850677013397217,
"step": 1030,
"valid_targets_mean": 2710.8,
"valid_targets_min": 1422
},
{
"epoch": 5.253807106598985,
"grad_norm": 0.9400152581379698,
"learning_rate": 7.155024551743317e-06,
"loss": 0.2669,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12904059886932373,
"step": 1035,
"valid_targets_mean": 2357.6,
"valid_targets_min": 1317
},
{
"epoch": 5.279187817258883,
"grad_norm": 0.9286640611302993,
"learning_rate": 6.962020082425749e-06,
"loss": 0.2456,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11589126288890839,
"step": 1040,
"valid_targets_mean": 2505.4,
"valid_targets_min": 1083
},
{
"epoch": 5.304568527918782,
"grad_norm": 0.9294621710654248,
"learning_rate": 6.771104431585551e-06,
"loss": 0.2519,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0984276682138443,
"step": 1045,
"valid_targets_mean": 1809.6,
"valid_targets_min": 801
},
{
"epoch": 5.32994923857868,
"grad_norm": 1.0852871883002646,
"learning_rate": 6.582308185873536e-06,
"loss": 0.2748,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13740497827529907,
"step": 1050,
"valid_targets_mean": 2055.0,
"valid_targets_min": 925
},
{
"epoch": 5.355329949238579,
"grad_norm": 0.8517072925008696,
"learning_rate": 6.3956615923900214e-06,
"loss": 0.2582,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13899219036102295,
"step": 1055,
"valid_targets_mean": 2988.9,
"valid_targets_min": 926
},
{
"epoch": 5.380710659898477,
"grad_norm": 1.0064260155062217,
"learning_rate": 6.211194553838931e-06,
"loss": 0.2501,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15219058096408844,
"step": 1060,
"valid_targets_mean": 2646.2,
"valid_targets_min": 960
},
{
"epoch": 5.406091370558376,
"grad_norm": 0.953883752640116,
"learning_rate": 6.028936623737067e-06,
"loss": 0.2543,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17078690230846405,
"step": 1065,
"valid_targets_mean": 3289.1,
"valid_targets_min": 1180
},
{
"epoch": 5.431472081218274,
"grad_norm": 0.8490004002408478,
"learning_rate": 5.848917001679339e-06,
"loss": 0.2579,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13909263908863068,
"step": 1070,
"valid_targets_mean": 2893.4,
"valid_targets_min": 299
},
{
"epoch": 5.456852791878172,
"grad_norm": 1.1776547438514429,
"learning_rate": 5.671164528660687e-06,
"loss": 0.2572,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14180824160575867,
"step": 1075,
"valid_targets_mean": 2674.9,
"valid_targets_min": 787
},
{
"epoch": 5.482233502538071,
"grad_norm": 1.0869939396664519,
"learning_rate": 5.495707682455464e-06,
"loss": 0.2573,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12028077244758606,
"step": 1080,
"valid_targets_mean": 1954.1,
"valid_targets_min": 395
},
{
"epoch": 5.50761421319797,
"grad_norm": 0.8850078662752396,
"learning_rate": 5.322574573054991e-06,
"loss": 0.247,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12053519487380981,
"step": 1085,
"valid_targets_mean": 3040.0,
"valid_targets_min": 1024
},
{
"epoch": 5.532994923857868,
"grad_norm": 0.9136784171950727,
"learning_rate": 5.151792938164051e-06,
"loss": 0.2527,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12860670685768127,
"step": 1090,
"valid_targets_mean": 3044.2,
"valid_targets_min": 911
},
{
"epoch": 5.558375634517766,
"grad_norm": 1.0173457673617277,
"learning_rate": 4.983390138757027e-06,
"loss": 0.249,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11640293151140213,
"step": 1095,
"valid_targets_mean": 2169.0,
"valid_targets_min": 361
},
{
"epoch": 5.583756345177665,
"grad_norm": 0.9200254654181008,
"learning_rate": 4.817393154694399e-06,
"loss": 0.2634,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09641244262456894,
"step": 1100,
"valid_targets_mean": 1634.8,
"valid_targets_min": 1155
},
{
"epoch": 5.6091370558375635,
"grad_norm": 0.9590562047545811,
"learning_rate": 4.653828580400275e-06,
"loss": 0.2325,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1131504476070404,
"step": 1105,
"valid_targets_mean": 2329.9,
"valid_targets_min": 1633
},
{
"epoch": 5.634517766497462,
"grad_norm": 0.99617246926484,
"learning_rate": 4.4927226206017e-06,
"loss": 0.2703,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15851570665836334,
"step": 1110,
"valid_targets_mean": 2249.6,
"valid_targets_min": 1396
},
{
"epoch": 5.659898477157361,
"grad_norm": 1.022233372274678,
"learning_rate": 4.334101086130409e-06,
"loss": 0.2426,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15162095427513123,
"step": 1115,
"valid_targets_mean": 2983.9,
"valid_targets_min": 795
},
{
"epoch": 5.685279187817259,
"grad_norm": 0.9281317310810118,
"learning_rate": 4.177989389787625e-06,
"loss": 0.2505,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12421190738677979,
"step": 1120,
"valid_targets_mean": 2495.0,
"valid_targets_min": 1154
},
{
"epoch": 5.710659898477157,
"grad_norm": 0.9507092254457873,
"learning_rate": 4.024412542272706e-06,
"loss": 0.249,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14842386543750763,
"step": 1125,
"valid_targets_mean": 2852.2,
"valid_targets_min": 1436
},
{
"epoch": 5.7360406091370555,
"grad_norm": 0.9527860854462858,
"learning_rate": 3.873395148176135e-06,
"loss": 0.2659,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11915292590856552,
"step": 1130,
"valid_targets_mean": 2597.9,
"valid_targets_min": 1289
},
{
"epoch": 5.761421319796955,
"grad_norm": 0.9308894637719127,
"learning_rate": 3.724961402037661e-06,
"loss": 0.2473,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12573911249637604,
"step": 1135,
"valid_targets_mean": 2650.9,
"valid_targets_min": 1046
},
{
"epoch": 5.786802030456853,
"grad_norm": 0.8959759836358597,
"learning_rate": 3.57913508447004e-06,
"loss": 0.2448,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10931817442178726,
"step": 1140,
"valid_targets_mean": 2622.6,
"valid_targets_min": 1115
},
{
"epoch": 5.812182741116751,
"grad_norm": 1.0985606002890143,
"learning_rate": 3.4359395583491594e-06,
"loss": 0.2441,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16124585270881653,
"step": 1145,
"valid_targets_mean": 2679.9,
"valid_targets_min": 1145
},
{
"epoch": 5.837563451776649,
"grad_norm": 0.999132265083642,
"learning_rate": 3.2953977650710513e-06,
"loss": 0.2606,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12139269709587097,
"step": 1150,
"valid_targets_mean": 2462.9,
"valid_targets_min": 1242
},
{
"epoch": 5.862944162436548,
"grad_norm": 0.9773626061096712,
"learning_rate": 3.1575322208764714e-06,
"loss": 0.246,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13929663598537445,
"step": 1155,
"valid_targets_mean": 2238.6,
"valid_targets_min": 1388
},
{
"epoch": 5.888324873096447,
"grad_norm": 0.8513865275091448,
"learning_rate": 3.0223650132435335e-06,
"loss": 0.2597,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10711924731731415,
"step": 1160,
"valid_targets_mean": 2403.5,
"valid_targets_min": 849
},
{
"epoch": 5.913705583756345,
"grad_norm": 1.0153155674068752,
"learning_rate": 2.8899177973490734e-06,
"loss": 0.2564,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13925692439079285,
"step": 1165,
"valid_targets_mean": 2212.4,
"valid_targets_min": 957
},
{
"epoch": 5.939086294416244,
"grad_norm": 0.8959815269726535,
"learning_rate": 2.7602117925992964e-06,
"loss": 0.2538,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08277732878923416,
"step": 1170,
"valid_targets_mean": 2028.4,
"valid_targets_min": 1009
},
{
"epoch": 5.964467005076142,
"grad_norm": 0.9079179631720385,
"learning_rate": 2.6332677792301773e-06,
"loss": 0.234,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11223854869604111,
"step": 1175,
"valid_targets_mean": 2755.0,
"valid_targets_min": 1083
},
{
"epoch": 5.98984771573604,
"grad_norm": 0.9347317535565745,
"learning_rate": 2.5091060949782664e-06,
"loss": 0.2542,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14246557652950287,
"step": 1180,
"valid_targets_mean": 3177.5,
"valid_targets_min": 1675
},
{
"epoch": 6.0152284263959395,
"grad_norm": 0.9650623750727744,
"learning_rate": 2.3877466318223698e-06,
"loss": 0.2437,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10319985449314117,
"step": 1185,
"valid_targets_mean": 2075.1,
"valid_targets_min": 525
},
{
"epoch": 6.040609137055838,
"grad_norm": 0.8339126985510028,
"learning_rate": 2.2692088327966655e-06,
"loss": 0.2396,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10419311374425888,
"step": 1190,
"valid_targets_mean": 3008.8,
"valid_targets_min": 1633
},
{
"epoch": 6.065989847715736,
"grad_norm": 0.7474364288192638,
"learning_rate": 2.153511688875707e-06,
"loss": 0.2258,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11047440022230148,
"step": 1195,
"valid_targets_mean": 3757.4,
"valid_targets_min": 936
},
{
"epoch": 6.091370558375634,
"grad_norm": 0.98041966083737,
"learning_rate": 2.0406737359318797e-06,
"loss": 0.2629,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12280648946762085,
"step": 1200,
"valid_targets_mean": 2120.6,
"valid_targets_min": 360
},
{
"epoch": 6.116751269035533,
"grad_norm": 1.0198475030804934,
"learning_rate": 1.930713051765776e-06,
"loss": 0.2483,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09961174428462982,
"step": 1205,
"valid_targets_mean": 1937.1,
"valid_targets_min": 1127
},
{
"epoch": 6.1421319796954315,
"grad_norm": 0.9437418507000886,
"learning_rate": 1.8236472532099413e-06,
"loss": 0.2653,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09551151841878891,
"step": 1210,
"valid_targets_mean": 2320.2,
"valid_targets_min": 1122
},
{
"epoch": 6.16751269035533,
"grad_norm": 1.0067081945002703,
"learning_rate": 1.7194934933064654e-06,
"loss": 0.2467,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11312228441238403,
"step": 1215,
"valid_targets_mean": 2155.1,
"valid_targets_min": 1108
},
{
"epoch": 6.192893401015229,
"grad_norm": 0.890380894514622,
"learning_rate": 1.6182684585588981e-06,
"loss": 0.2391,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11702115833759308,
"step": 1220,
"valid_targets_mean": 2633.2,
"valid_targets_min": 1372
},
{
"epoch": 6.218274111675127,
"grad_norm": 0.9041801647836321,
"learning_rate": 1.5199883662588954e-06,
"loss": 0.2351,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10423216968774796,
"step": 1225,
"valid_targets_mean": 2829.2,
"valid_targets_min": 1190
},
{
"epoch": 6.243654822335025,
"grad_norm": 0.971484442221189,
"learning_rate": 1.4246689618880472e-06,
"loss": 0.2509,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12599416077136993,
"step": 1230,
"valid_targets_mean": 2065.6,
"valid_targets_min": 1166
},
{
"epoch": 6.269035532994923,
"grad_norm": 1.0397334165105048,
"learning_rate": 1.3323255165952875e-06,
"loss": 0.2452,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1000349223613739,
"step": 1235,
"valid_targets_mean": 1802.4,
"valid_targets_min": 1041
},
{
"epoch": 6.2944162436548226,
"grad_norm": 0.9722470098317174,
"learning_rate": 1.2429728247502926e-06,
"loss": 0.2372,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1916620135307312,
"step": 1240,
"valid_targets_mean": 3071.5,
"valid_targets_min": 876
},
{
"epoch": 6.319796954314721,
"grad_norm": 1.0179156477550857,
"learning_rate": 1.156625201573287e-06,
"loss": 0.2529,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1264421045780182,
"step": 1245,
"valid_targets_mean": 2721.6,
"valid_targets_min": 293
},
{
"epoch": 6.345177664974619,
"grad_norm": 1.048287346549327,
"learning_rate": 1.0732964808415792e-06,
"loss": 0.2599,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11165352165699005,
"step": 1250,
"valid_targets_mean": 2093.5,
"valid_targets_min": 1116
},
{
"epoch": 6.370558375634518,
"grad_norm": 0.8163389750191538,
"learning_rate": 9.93000012673262e-07,
"loss": 0.228,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14192290604114532,
"step": 1255,
"valid_targets_mean": 3407.8,
"valid_targets_min": 1010
},
{
"epoch": 6.395939086294416,
"grad_norm": 1.0294994216450304,
"learning_rate": 9.157486613883759e-07,
"loss": 0.2437,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13059166073799133,
"step": 1260,
"valid_targets_mean": 1657.2,
"valid_targets_min": 913
},
{
"epoch": 6.4213197969543145,
"grad_norm": 1.0422176024676824,
"learning_rate": 8.415548034479215e-07,
"loss": 0.255,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1064755767583847,
"step": 1265,
"valid_targets_mean": 2209.9,
"valid_targets_min": 1136
},
{
"epoch": 6.446700507614214,
"grad_norm": 1.235014195884799,
"learning_rate": 7.704303254710188e-07,
"loss": 0.2317,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09507836401462555,
"step": 1270,
"valid_targets_mean": 2677.4,
"valid_targets_min": 1014
},
{
"epoch": 6.472081218274112,
"grad_norm": 1.0549720907684328,
"learning_rate": 7.023866223305487e-07,
"loss": 0.2469,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11979396641254425,
"step": 1275,
"valid_targets_mean": 2131.8,
"valid_targets_min": 981
},
{
"epoch": 6.49746192893401,
"grad_norm": 0.992296954375115,
"learning_rate": 6.374345953275773e-07,
"loss": 0.2456,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11428581178188324,
"step": 1280,
"valid_targets_mean": 2016.8,
"valid_targets_min": 1251
},
{
"epoch": 6.522842639593908,
"grad_norm": 1.0032523115406662,
"learning_rate": 5.755846504448604e-07,
"loss": 0.2371,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11331555247306824,
"step": 1285,
"valid_targets_mean": 2159.6,
"valid_targets_min": 1436
},
{
"epoch": 6.548223350253807,
"grad_norm": 0.9517700067388988,
"learning_rate": 5.16846696679687e-07,
"loss": 0.2582,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11859509348869324,
"step": 1290,
"valid_targets_mean": 2427.4,
"valid_targets_min": 1475
},
{
"epoch": 6.573604060913706,
"grad_norm": 0.7721200421281694,
"learning_rate": 4.6123014445636605e-07,
"loss": 0.2443,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08160974085330963,
"step": 1295,
"valid_targets_mean": 2601.8,
"valid_targets_min": 1040
},
{
"epoch": 6.598984771573604,
"grad_norm": 0.9140077477818886,
"learning_rate": 4.087439041185781e-07,
"loss": 0.2393,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09139326214790344,
"step": 1300,
"valid_targets_mean": 2322.6,
"valid_targets_min": 832
},
{
"epoch": 6.624365482233502,
"grad_norm": 1.07004687623224,
"learning_rate": 3.5939638450183776e-07,
"loss": 0.2574,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16664224863052368,
"step": 1305,
"valid_targets_mean": 2209.2,
"valid_targets_min": 896
},
{
"epoch": 6.649746192893401,
"grad_norm": 0.9914655537015288,
"learning_rate": 3.1319549158632444e-07,
"loss": 0.2342,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09106150269508362,
"step": 1310,
"valid_targets_mean": 1496.8,
"valid_targets_min": 846
},
{
"epoch": 6.675126903553299,
"grad_norm": 0.998951464628241,
"learning_rate": 2.701486272302534e-07,
"loss": 0.2364,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09747333824634552,
"step": 1315,
"valid_targets_mean": 1789.2,
"valid_targets_min": 990
},
{
"epoch": 6.700507614213198,
"grad_norm": 0.9002671317698705,
"learning_rate": 2.302626879840353e-07,
"loss": 0.2565,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13508570194244385,
"step": 1320,
"valid_targets_mean": 2941.5,
"valid_targets_min": 1487
},
{
"epoch": 6.725888324873097,
"grad_norm": 0.8743963104207289,
"learning_rate": 1.9354406398535363e-07,
"loss": 0.2309,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13274939358234406,
"step": 1325,
"valid_targets_mean": 3342.8,
"valid_targets_min": 1121
},
{
"epoch": 6.751269035532995,
"grad_norm": 0.9310217194018677,
"learning_rate": 1.599986379354257e-07,
"loss": 0.2524,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1376674473285675,
"step": 1330,
"valid_targets_mean": 2423.2,
"valid_targets_min": 743
},
{
"epoch": 6.776649746192893,
"grad_norm": 0.9280165757856627,
"learning_rate": 1.29631784156512e-07,
"loss": 0.2246,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14213162660598755,
"step": 1335,
"valid_targets_mean": 2780.5,
"valid_targets_min": 1301
},
{
"epoch": 6.802030456852792,
"grad_norm": 0.9707954483311023,
"learning_rate": 1.0244836773091182e-07,
"loss": 0.2299,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12803569436073303,
"step": 1340,
"valid_targets_mean": 3245.4,
"valid_targets_min": 977
},
{
"epoch": 6.8274111675126905,
"grad_norm": 1.1217146591780642,
"learning_rate": 7.845274372151767e-08,
"loss": 0.2366,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09864965081214905,
"step": 1345,
"valid_targets_mean": 1694.6,
"valid_targets_min": 787
},
{
"epoch": 6.852791878172589,
"grad_norm": 0.8664511542186972,
"learning_rate": 5.7648756474084636e-08,
"loss": 0.2389,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09746871888637543,
"step": 1350,
"valid_targets_mean": 1966.5,
"valid_targets_min": 417
},
{
"epoch": 6.878172588832487,
"grad_norm": 0.93357375736198,
"learning_rate": 4.003973900133851e-08,
"loss": 0.2403,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1525709331035614,
"step": 1355,
"valid_targets_mean": 2738.1,
"valid_targets_min": 508
},
{
"epoch": 6.903553299492386,
"grad_norm": 0.9786494242138831,
"learning_rate": 2.5628512448987453e-08,
"loss": 0.2497,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13262221217155457,
"step": 1360,
"valid_targets_mean": 2689.0,
"valid_targets_min": 957
},
{
"epoch": 6.928934010152284,
"grad_norm": 1.0963747097882328,
"learning_rate": 1.4417385643741289e-08,
"loss": 0.2481,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13900740444660187,
"step": 1365,
"valid_targets_mean": 2598.9,
"valid_targets_min": 1350
},
{
"epoch": 6.9543147208121825,
"grad_norm": 0.9462888101664159,
"learning_rate": 6.408154723420712e-09,
"loss": 0.2395,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10249761492013931,
"step": 1370,
"valid_targets_mean": 2344.4,
"valid_targets_min": 994
},
{
"epoch": 6.979695431472082,
"grad_norm": 0.9690061063022448,
"learning_rate": 1.6021028491941538e-09,
"loss": 0.2374,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1248205229640007,
"step": 1375,
"valid_targets_mean": 2592.9,
"valid_targets_min": 1056
},
{
"epoch": 7.0,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12962277233600616,
"step": 1379,
"total_flos": 2.2298248732265677e+17,
"train_loss": 0.3448181322210504,
"train_runtime": 6375.7964,
"train_samples_per_second": 3.456,
"train_steps_per_second": 0.216,
"valid_targets_mean": 2546.5,
"valid_targets_min": 1046
}
],
"logging_steps": 5,
"max_steps": 1379,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 1500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.2298248732265677e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}