nl2bash-4ep-restore-hp / trainer_state.json
hf-reset
Reset repository without checkpoints directories
e4bc5da
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 6.0,
"eval_steps": 500,
"global_step": 2442,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.012285012285012284,
"grad_norm": 19.48494191330693,
"learning_rate": 6.530612244897961e-07,
"loss": 1.0196,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.995261013507843,
"step": 5,
"valid_targets_mean": 953.3,
"valid_targets_min": 648
},
{
"epoch": 0.02457002457002457,
"grad_norm": 16.973569102036848,
"learning_rate": 1.469387755102041e-06,
"loss": 0.9788,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.9372879266738892,
"step": 10,
"valid_targets_mean": 1108.8,
"valid_targets_min": 784
},
{
"epoch": 0.036855036855036855,
"grad_norm": 15.639368583656532,
"learning_rate": 2.285714285714286e-06,
"loss": 0.9591,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.9503127336502075,
"step": 15,
"valid_targets_mean": 988.2,
"valid_targets_min": 739
},
{
"epoch": 0.04914004914004914,
"grad_norm": 10.514490782159397,
"learning_rate": 3.1020408163265307e-06,
"loss": 0.8599,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.8373680710792542,
"step": 20,
"valid_targets_mean": 939.6,
"valid_targets_min": 625
},
{
"epoch": 0.06142506142506143,
"grad_norm": 6.7065545820405665,
"learning_rate": 3.9183673469387755e-06,
"loss": 0.7771,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.7272936701774597,
"step": 25,
"valid_targets_mean": 862.8,
"valid_targets_min": 691
},
{
"epoch": 0.07371007371007371,
"grad_norm": 4.7134759825293955,
"learning_rate": 4.734693877551021e-06,
"loss": 0.6692,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6532790660858154,
"step": 30,
"valid_targets_mean": 870.2,
"valid_targets_min": 568
},
{
"epoch": 0.085995085995086,
"grad_norm": 3.1810226037793567,
"learning_rate": 5.551020408163266e-06,
"loss": 0.618,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5970568060874939,
"step": 35,
"valid_targets_mean": 860.2,
"valid_targets_min": 654
},
{
"epoch": 0.09828009828009827,
"grad_norm": 2.2072856666896485,
"learning_rate": 6.36734693877551e-06,
"loss": 0.5438,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5087846517562866,
"step": 40,
"valid_targets_mean": 896.3,
"valid_targets_min": 574
},
{
"epoch": 0.11056511056511056,
"grad_norm": 1.9513850751635282,
"learning_rate": 7.183673469387755e-06,
"loss": 0.4908,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.47997918725013733,
"step": 45,
"valid_targets_mean": 866.0,
"valid_targets_min": 531
},
{
"epoch": 0.12285012285012285,
"grad_norm": 1.779935239542962,
"learning_rate": 8.000000000000001e-06,
"loss": 0.4639,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.44255393743515015,
"step": 50,
"valid_targets_mean": 908.8,
"valid_targets_min": 657
},
{
"epoch": 0.13513513513513514,
"grad_norm": 1.5653434999193199,
"learning_rate": 8.816326530612247e-06,
"loss": 0.4245,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4232107996940613,
"step": 55,
"valid_targets_mean": 962.6,
"valid_targets_min": 620
},
{
"epoch": 0.14742014742014742,
"grad_norm": 1.5087785328600123,
"learning_rate": 9.63265306122449e-06,
"loss": 0.3996,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3812786042690277,
"step": 60,
"valid_targets_mean": 962.4,
"valid_targets_min": 604
},
{
"epoch": 0.1597051597051597,
"grad_norm": 1.5312228956706806,
"learning_rate": 1.0448979591836737e-05,
"loss": 0.3733,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.36668089032173157,
"step": 65,
"valid_targets_mean": 926.8,
"valid_targets_min": 624
},
{
"epoch": 0.171990171990172,
"grad_norm": 1.3789570756738234,
"learning_rate": 1.126530612244898e-05,
"loss": 0.3635,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.35279956459999084,
"step": 70,
"valid_targets_mean": 1032.7,
"valid_targets_min": 652
},
{
"epoch": 0.18427518427518427,
"grad_norm": 1.3210121581543464,
"learning_rate": 1.2081632653061225e-05,
"loss": 0.3491,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.35165709257125854,
"step": 75,
"valid_targets_mean": 948.1,
"valid_targets_min": 661
},
{
"epoch": 0.19656019656019655,
"grad_norm": 1.3570768829251245,
"learning_rate": 1.2897959183673469e-05,
"loss": 0.3245,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3350982069969177,
"step": 80,
"valid_targets_mean": 962.0,
"valid_targets_min": 712
},
{
"epoch": 0.20884520884520885,
"grad_norm": 1.1835417091167943,
"learning_rate": 1.3714285714285716e-05,
"loss": 0.3245,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3199244737625122,
"step": 85,
"valid_targets_mean": 928.4,
"valid_targets_min": 590
},
{
"epoch": 0.22113022113022113,
"grad_norm": 1.2207437844630713,
"learning_rate": 1.4530612244897961e-05,
"loss": 0.3054,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29469019174575806,
"step": 90,
"valid_targets_mean": 924.2,
"valid_targets_min": 649
},
{
"epoch": 0.2334152334152334,
"grad_norm": 1.2072738228334028,
"learning_rate": 1.5346938775510204e-05,
"loss": 0.3124,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3128836154937744,
"step": 95,
"valid_targets_mean": 992.2,
"valid_targets_min": 631
},
{
"epoch": 0.2457002457002457,
"grad_norm": 1.2368210765633527,
"learning_rate": 1.616326530612245e-05,
"loss": 0.3088,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29172301292419434,
"step": 100,
"valid_targets_mean": 955.3,
"valid_targets_min": 656
},
{
"epoch": 0.257985257985258,
"grad_norm": 1.2901546371584578,
"learning_rate": 1.6979591836734695e-05,
"loss": 0.3117,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30096760392189026,
"step": 105,
"valid_targets_mean": 922.8,
"valid_targets_min": 711
},
{
"epoch": 0.2702702702702703,
"grad_norm": 1.1619287144692585,
"learning_rate": 1.779591836734694e-05,
"loss": 0.2942,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3181251287460327,
"step": 110,
"valid_targets_mean": 1017.2,
"valid_targets_min": 698
},
{
"epoch": 0.28255528255528256,
"grad_norm": 1.1354753438215313,
"learning_rate": 1.8612244897959185e-05,
"loss": 0.2809,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2735210657119751,
"step": 115,
"valid_targets_mean": 1038.6,
"valid_targets_min": 740
},
{
"epoch": 0.29484029484029484,
"grad_norm": 1.2766170739852154,
"learning_rate": 1.942857142857143e-05,
"loss": 0.2901,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30684512853622437,
"step": 120,
"valid_targets_mean": 908.9,
"valid_targets_min": 656
},
{
"epoch": 0.3071253071253071,
"grad_norm": 1.1321457276246412,
"learning_rate": 2.0244897959183672e-05,
"loss": 0.2931,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2833458185195923,
"step": 125,
"valid_targets_mean": 973.4,
"valid_targets_min": 686
},
{
"epoch": 0.3194103194103194,
"grad_norm": 1.3525378529895846,
"learning_rate": 2.106122448979592e-05,
"loss": 0.289,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28936123847961426,
"step": 130,
"valid_targets_mean": 822.1,
"valid_targets_min": 663
},
{
"epoch": 0.3316953316953317,
"grad_norm": 1.3198522548302123,
"learning_rate": 2.1877551020408166e-05,
"loss": 0.2901,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30741700530052185,
"step": 135,
"valid_targets_mean": 972.9,
"valid_targets_min": 674
},
{
"epoch": 0.343980343980344,
"grad_norm": 1.1665377558503693,
"learning_rate": 2.269387755102041e-05,
"loss": 0.2925,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32794955372810364,
"step": 140,
"valid_targets_mean": 1073.8,
"valid_targets_min": 637
},
{
"epoch": 0.35626535626535627,
"grad_norm": 1.1034070025878784,
"learning_rate": 2.3510204081632656e-05,
"loss": 0.2661,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2596943974494934,
"step": 145,
"valid_targets_mean": 986.8,
"valid_targets_min": 704
},
{
"epoch": 0.36855036855036855,
"grad_norm": 1.1648155020059545,
"learning_rate": 2.4326530612244898e-05,
"loss": 0.2763,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28700608015060425,
"step": 150,
"valid_targets_mean": 985.8,
"valid_targets_min": 646
},
{
"epoch": 0.3808353808353808,
"grad_norm": 1.1608740590797577,
"learning_rate": 2.5142857142857143e-05,
"loss": 0.2699,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26809966564178467,
"step": 155,
"valid_targets_mean": 908.0,
"valid_targets_min": 700
},
{
"epoch": 0.3931203931203931,
"grad_norm": 1.2038707122717154,
"learning_rate": 2.5959183673469392e-05,
"loss": 0.2861,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2775510251522064,
"step": 160,
"valid_targets_mean": 965.7,
"valid_targets_min": 683
},
{
"epoch": 0.40540540540540543,
"grad_norm": 1.2941106922031897,
"learning_rate": 2.6775510204081637e-05,
"loss": 0.2669,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2653312385082245,
"step": 165,
"valid_targets_mean": 894.4,
"valid_targets_min": 713
},
{
"epoch": 0.4176904176904177,
"grad_norm": 1.2060242790377764,
"learning_rate": 2.7591836734693882e-05,
"loss": 0.2697,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.279617577791214,
"step": 170,
"valid_targets_mean": 972.8,
"valid_targets_min": 598
},
{
"epoch": 0.42997542997543,
"grad_norm": 1.0840840161259355,
"learning_rate": 2.8408163265306124e-05,
"loss": 0.2756,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26907873153686523,
"step": 175,
"valid_targets_mean": 998.5,
"valid_targets_min": 711
},
{
"epoch": 0.44226044226044225,
"grad_norm": 1.256645843082882,
"learning_rate": 2.922448979591837e-05,
"loss": 0.2766,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28155115246772766,
"step": 180,
"valid_targets_mean": 916.9,
"valid_targets_min": 721
},
{
"epoch": 0.45454545454545453,
"grad_norm": 1.1081969066979958,
"learning_rate": 3.0040816326530614e-05,
"loss": 0.2639,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2645479440689087,
"step": 185,
"valid_targets_mean": 936.2,
"valid_targets_min": 516
},
{
"epoch": 0.4668304668304668,
"grad_norm": 1.5408533860486011,
"learning_rate": 3.085714285714286e-05,
"loss": 0.265,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27647218108177185,
"step": 190,
"valid_targets_mean": 980.4,
"valid_targets_min": 619
},
{
"epoch": 0.47911547911547914,
"grad_norm": 1.1901708966642726,
"learning_rate": 3.1673469387755105e-05,
"loss": 0.2609,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2448381930589676,
"step": 195,
"valid_targets_mean": 917.9,
"valid_targets_min": 753
},
{
"epoch": 0.4914004914004914,
"grad_norm": 1.1723517034459403,
"learning_rate": 3.2489795918367346e-05,
"loss": 0.2528,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22118034958839417,
"step": 200,
"valid_targets_mean": 900.4,
"valid_targets_min": 618
},
{
"epoch": 0.5036855036855037,
"grad_norm": 1.2691807590013902,
"learning_rate": 3.3306122448979595e-05,
"loss": 0.2714,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2686021625995636,
"step": 205,
"valid_targets_mean": 956.2,
"valid_targets_min": 601
},
{
"epoch": 0.515970515970516,
"grad_norm": 1.2271107795623217,
"learning_rate": 3.4122448979591843e-05,
"loss": 0.2596,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25188225507736206,
"step": 210,
"valid_targets_mean": 925.4,
"valid_targets_min": 716
},
{
"epoch": 0.5282555282555282,
"grad_norm": 1.5888797236955163,
"learning_rate": 3.4938775510204085e-05,
"loss": 0.2556,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24855633080005646,
"step": 215,
"valid_targets_mean": 951.9,
"valid_targets_min": 727
},
{
"epoch": 0.5405405405405406,
"grad_norm": 1.3275098670748375,
"learning_rate": 3.575510204081633e-05,
"loss": 0.2531,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2647552490234375,
"step": 220,
"valid_targets_mean": 987.1,
"valid_targets_min": 783
},
{
"epoch": 0.5528255528255528,
"grad_norm": 1.0828068510228948,
"learning_rate": 3.6571428571428576e-05,
"loss": 0.2497,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23967652022838593,
"step": 225,
"valid_targets_mean": 926.5,
"valid_targets_min": 696
},
{
"epoch": 0.5651105651105651,
"grad_norm": 1.1445169366996504,
"learning_rate": 3.738775510204082e-05,
"loss": 0.2452,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2608412504196167,
"step": 230,
"valid_targets_mean": 958.7,
"valid_targets_min": 705
},
{
"epoch": 0.5773955773955773,
"grad_norm": 1.055020831726768,
"learning_rate": 3.8204081632653066e-05,
"loss": 0.2477,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2291972041130066,
"step": 235,
"valid_targets_mean": 980.6,
"valid_targets_min": 683
},
{
"epoch": 0.5896805896805897,
"grad_norm": 2.300952598422869,
"learning_rate": 3.902040816326531e-05,
"loss": 0.2612,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23058481514453888,
"step": 240,
"valid_targets_mean": 930.5,
"valid_targets_min": 631
},
{
"epoch": 0.601965601965602,
"grad_norm": 1.2180245686048703,
"learning_rate": 3.983673469387755e-05,
"loss": 0.2613,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.259826123714447,
"step": 245,
"valid_targets_mean": 990.2,
"valid_targets_min": 679
},
{
"epoch": 0.6142506142506142,
"grad_norm": 1.189380446558114,
"learning_rate": 3.9999672841332876e-05,
"loss": 0.2542,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23451372981071472,
"step": 250,
"valid_targets_mean": 996.4,
"valid_targets_min": 643
},
{
"epoch": 0.6265356265356266,
"grad_norm": 1.216345859130752,
"learning_rate": 3.999834377759164e-05,
"loss": 0.2567,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26435649394989014,
"step": 255,
"valid_targets_mean": 967.8,
"valid_targets_min": 582
},
{
"epoch": 0.6388206388206388,
"grad_norm": 1.1231575295082026,
"learning_rate": 3.999599242924703e-05,
"loss": 0.2594,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2635229229927063,
"step": 260,
"valid_targets_mean": 889.4,
"valid_targets_min": 577
},
{
"epoch": 0.6511056511056511,
"grad_norm": 1.1214663878022753,
"learning_rate": 3.999261891649637e-05,
"loss": 0.2552,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24433369934558868,
"step": 265,
"valid_targets_mean": 917.2,
"valid_targets_min": 686
},
{
"epoch": 0.6633906633906634,
"grad_norm": 1.0868047723532783,
"learning_rate": 3.9988223411788436e-05,
"loss": 0.2582,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24705305695533752,
"step": 270,
"valid_targets_mean": 969.6,
"valid_targets_min": 671
},
{
"epoch": 0.6756756756756757,
"grad_norm": 1.090076198257986,
"learning_rate": 3.998280613981468e-05,
"loss": 0.241,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23530612885951996,
"step": 275,
"valid_targets_mean": 973.6,
"valid_targets_min": 725
},
{
"epoch": 0.687960687960688,
"grad_norm": 1.1254667728908574,
"learning_rate": 3.9976367377497725e-05,
"loss": 0.2482,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25306543707847595,
"step": 280,
"valid_targets_mean": 876.2,
"valid_targets_min": 631
},
{
"epoch": 0.7002457002457002,
"grad_norm": 1.1946450393277166,
"learning_rate": 3.99689074539772e-05,
"loss": 0.245,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24335439503192902,
"step": 285,
"valid_targets_mean": 876.1,
"valid_targets_min": 599
},
{
"epoch": 0.7125307125307125,
"grad_norm": 1.0864345277533947,
"learning_rate": 3.9960426750592936e-05,
"loss": 0.2451,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2602050304412842,
"step": 290,
"valid_targets_mean": 945.6,
"valid_targets_min": 710
},
{
"epoch": 0.7248157248157249,
"grad_norm": 1.0748839596204636,
"learning_rate": 3.995092570086546e-05,
"loss": 0.2459,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2375788390636444,
"step": 295,
"valid_targets_mean": 950.0,
"valid_targets_min": 681
},
{
"epoch": 0.7371007371007371,
"grad_norm": 1.1371929708679651,
"learning_rate": 3.9940404790473825e-05,
"loss": 0.24,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22634164988994598,
"step": 300,
"valid_targets_mean": 864.0,
"valid_targets_min": 656
},
{
"epoch": 0.7493857493857494,
"grad_norm": 1.1173189139596496,
"learning_rate": 3.992886455723082e-05,
"loss": 0.2393,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23460198938846588,
"step": 305,
"valid_targets_mean": 890.8,
"valid_targets_min": 546
},
{
"epoch": 0.7616707616707616,
"grad_norm": 1.0806367164790773,
"learning_rate": 3.991630559105541e-05,
"loss": 0.2392,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24468854069709778,
"step": 310,
"valid_targets_mean": 1037.7,
"valid_targets_min": 648
},
{
"epoch": 0.773955773955774,
"grad_norm": 1.0941916978548303,
"learning_rate": 3.990272853394268e-05,
"loss": 0.2436,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22421763837337494,
"step": 315,
"valid_targets_mean": 879.4,
"valid_targets_min": 652
},
{
"epoch": 0.7862407862407862,
"grad_norm": 0.9892087713501029,
"learning_rate": 3.988813407993089e-05,
"loss": 0.2283,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2293543666601181,
"step": 320,
"valid_targets_mean": 926.8,
"valid_targets_min": 709
},
{
"epoch": 0.7985257985257985,
"grad_norm": 1.0439272336097505,
"learning_rate": 3.987252297506613e-05,
"loss": 0.2404,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23711051046848297,
"step": 325,
"valid_targets_mean": 916.6,
"valid_targets_min": 659
},
{
"epoch": 0.8108108108108109,
"grad_norm": 0.9710400731300508,
"learning_rate": 3.9855896017364075e-05,
"loss": 0.2339,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2465747892856598,
"step": 330,
"valid_targets_mean": 934.1,
"valid_targets_min": 559
},
{
"epoch": 0.8230958230958231,
"grad_norm": 1.1162307618645835,
"learning_rate": 3.983825405676927e-05,
"loss": 0.2409,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2302704155445099,
"step": 335,
"valid_targets_mean": 916.3,
"valid_targets_min": 689
},
{
"epoch": 0.8353808353808354,
"grad_norm": 1.0360859227988688,
"learning_rate": 3.981959799511161e-05,
"loss": 0.2371,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22074277698993683,
"step": 340,
"valid_targets_mean": 926.7,
"valid_targets_min": 613
},
{
"epoch": 0.8476658476658476,
"grad_norm": 1.3415832051973777,
"learning_rate": 3.979992878606032e-05,
"loss": 0.2473,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.227852463722229,
"step": 345,
"valid_targets_mean": 873.4,
"valid_targets_min": 683
},
{
"epoch": 0.85995085995086,
"grad_norm": 1.0617306936385849,
"learning_rate": 3.977924743507513e-05,
"loss": 0.2447,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2265169322490692,
"step": 350,
"valid_targets_mean": 949.1,
"valid_targets_min": 592
},
{
"epoch": 0.8722358722358723,
"grad_norm": 1.0877486714407731,
"learning_rate": 3.975755499935492e-05,
"loss": 0.2501,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27971571683883667,
"step": 355,
"valid_targets_mean": 993.2,
"valid_targets_min": 597
},
{
"epoch": 0.8845208845208845,
"grad_norm": 1.0191352294037985,
"learning_rate": 3.973485258778368e-05,
"loss": 0.2266,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22834989428520203,
"step": 360,
"valid_targets_mean": 905.8,
"valid_targets_min": 681
},
{
"epoch": 0.8968058968058968,
"grad_norm": 0.957295460831019,
"learning_rate": 3.971114136087379e-05,
"loss": 0.2412,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23216350376605988,
"step": 365,
"valid_targets_mean": 1064.6,
"valid_targets_min": 577
},
{
"epoch": 0.9090909090909091,
"grad_norm": 1.4136404183668454,
"learning_rate": 3.968642253070675e-05,
"loss": 0.2353,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22791610658168793,
"step": 370,
"valid_targets_mean": 812.2,
"valid_targets_min": 601
},
{
"epoch": 0.9213759213759214,
"grad_norm": 1.0363150123987157,
"learning_rate": 3.966069736087116e-05,
"loss": 0.2429,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22790317237377167,
"step": 375,
"valid_targets_mean": 930.9,
"valid_targets_min": 569
},
{
"epoch": 0.9336609336609336,
"grad_norm": 1.078054793179813,
"learning_rate": 3.963396716639818e-05,
"loss": 0.2337,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22291269898414612,
"step": 380,
"valid_targets_mean": 872.6,
"valid_targets_min": 682
},
{
"epoch": 0.9459459459459459,
"grad_norm": 1.0135836959168179,
"learning_rate": 3.960623331369427e-05,
"loss": 0.2368,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2167268991470337,
"step": 385,
"valid_targets_mean": 873.2,
"valid_targets_min": 749
},
{
"epoch": 0.9582309582309583,
"grad_norm": 0.9632539514971237,
"learning_rate": 3.957749722047138e-05,
"loss": 0.2307,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24117511510849,
"step": 390,
"valid_targets_mean": 908.5,
"valid_targets_min": 639
},
{
"epoch": 0.9705159705159705,
"grad_norm": 1.0439198961040346,
"learning_rate": 3.9547760355674405e-05,
"loss": 0.2211,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2329951822757721,
"step": 395,
"valid_targets_mean": 940.2,
"valid_targets_min": 611
},
{
"epoch": 0.9828009828009828,
"grad_norm": 1.0005974886544775,
"learning_rate": 3.951702423940621e-05,
"loss": 0.2361,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24946032464504242,
"step": 400,
"valid_targets_mean": 1000.1,
"valid_targets_min": 668
},
{
"epoch": 0.995085995085995,
"grad_norm": 1.0320390692487604,
"learning_rate": 3.948529044284981e-05,
"loss": 0.2257,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24351008236408234,
"step": 405,
"valid_targets_mean": 947.5,
"valid_targets_min": 605
},
{
"epoch": 1.0073710073710074,
"grad_norm": 1.2048041606254665,
"learning_rate": 3.9452560588188135e-05,
"loss": 0.231,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21415719389915466,
"step": 410,
"valid_targets_mean": 871.8,
"valid_targets_min": 625
},
{
"epoch": 1.0196560196560196,
"grad_norm": 0.943497120924555,
"learning_rate": 3.9418836348521045e-05,
"loss": 0.2245,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20840199291706085,
"step": 415,
"valid_targets_mean": 1057.2,
"valid_targets_min": 701
},
{
"epoch": 1.031941031941032,
"grad_norm": 1.0489115691729,
"learning_rate": 3.9384119447779854e-05,
"loss": 0.2114,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2008688747882843,
"step": 420,
"valid_targets_mean": 910.7,
"valid_targets_min": 679
},
{
"epoch": 1.0442260442260443,
"grad_norm": 1.0601266266840927,
"learning_rate": 3.934841166063919e-05,
"loss": 0.221,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2155018448829651,
"step": 425,
"valid_targets_mean": 934.6,
"valid_targets_min": 555
},
{
"epoch": 1.0565110565110565,
"grad_norm": 1.0940787633659956,
"learning_rate": 3.931171481242625e-05,
"loss": 0.2017,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19951783120632172,
"step": 430,
"valid_targets_mean": 899.6,
"valid_targets_min": 685
},
{
"epoch": 1.0687960687960687,
"grad_norm": 1.1413230966413799,
"learning_rate": 3.927403077902753e-05,
"loss": 0.2125,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.224505215883255,
"step": 435,
"valid_targets_mean": 912.6,
"valid_targets_min": 685
},
{
"epoch": 1.0810810810810811,
"grad_norm": 1.0555822201833036,
"learning_rate": 3.9235361486792905e-05,
"loss": 0.2193,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23359808325767517,
"step": 440,
"valid_targets_mean": 1005.2,
"valid_targets_min": 839
},
{
"epoch": 1.0933660933660934,
"grad_norm": 1.020710025205007,
"learning_rate": 3.9195708912437176e-05,
"loss": 0.2187,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19746169447898865,
"step": 445,
"valid_targets_mean": 930.9,
"valid_targets_min": 648
},
{
"epoch": 1.1056511056511056,
"grad_norm": 1.2154470553446661,
"learning_rate": 3.915507508293901e-05,
"loss": 0.2089,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22506622970104218,
"step": 450,
"valid_targets_mean": 1011.9,
"valid_targets_min": 687
},
{
"epoch": 1.117936117936118,
"grad_norm": 1.071640402867911,
"learning_rate": 3.911346207543734e-05,
"loss": 0.2088,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2123754620552063,
"step": 455,
"valid_targets_mean": 933.9,
"valid_targets_min": 649
},
{
"epoch": 1.1302211302211302,
"grad_norm": 1.0094838297122273,
"learning_rate": 3.907087201712515e-05,
"loss": 0.2126,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22164851427078247,
"step": 460,
"valid_targets_mean": 984.8,
"valid_targets_min": 516
},
{
"epoch": 1.1425061425061425,
"grad_norm": 1.0456991601463468,
"learning_rate": 3.902730708514078e-05,
"loss": 0.2056,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18665668368339539,
"step": 465,
"valid_targets_mean": 906.1,
"valid_targets_min": 707
},
{
"epoch": 1.154791154791155,
"grad_norm": 1.0789156231899877,
"learning_rate": 3.8982769506456616e-05,
"loss": 0.2091,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22141095995903015,
"step": 470,
"valid_targets_mean": 986.8,
"valid_targets_min": 633
},
{
"epoch": 1.1670761670761671,
"grad_norm": 1.009247300625938,
"learning_rate": 3.893726155776524e-05,
"loss": 0.2073,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19400489330291748,
"step": 475,
"valid_targets_mean": 974.9,
"valid_targets_min": 552
},
{
"epoch": 1.1793611793611793,
"grad_norm": 1.1007983939488537,
"learning_rate": 3.8890785565363046e-05,
"loss": 0.2091,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22441557049751282,
"step": 480,
"valid_targets_mean": 903.4,
"valid_targets_min": 681
},
{
"epoch": 1.1916461916461916,
"grad_norm": 1.1324267441384634,
"learning_rate": 3.884334390503136e-05,
"loss": 0.2176,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21191783249378204,
"step": 485,
"valid_targets_mean": 887.9,
"valid_targets_min": 623
},
{
"epoch": 1.203931203931204,
"grad_norm": 1.3013874210452543,
"learning_rate": 3.8794939001914955e-05,
"loss": 0.2115,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2267088145017624,
"step": 490,
"valid_targets_mean": 960.1,
"valid_targets_min": 695
},
{
"epoch": 1.2162162162162162,
"grad_norm": 1.0927495139559071,
"learning_rate": 3.87455733303981e-05,
"loss": 0.2226,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22308525443077087,
"step": 495,
"valid_targets_mean": 903.3,
"valid_targets_min": 559
},
{
"epoch": 1.2285012285012284,
"grad_norm": 1.04814919371086,
"learning_rate": 3.869524941397805e-05,
"loss": 0.2062,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22436878085136414,
"step": 500,
"valid_targets_mean": 928.8,
"valid_targets_min": 702
},
{
"epoch": 1.2407862407862407,
"grad_norm": 1.1075100249688812,
"learning_rate": 3.8643969825136095e-05,
"loss": 0.2128,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2219080626964569,
"step": 505,
"valid_targets_mean": 900.2,
"valid_targets_min": 541
},
{
"epoch": 1.253071253071253,
"grad_norm": 0.91847772621059,
"learning_rate": 3.8591737185206024e-05,
"loss": 0.2114,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1968231201171875,
"step": 510,
"valid_targets_mean": 886.4,
"valid_targets_min": 680
},
{
"epoch": 1.2653562653562653,
"grad_norm": 1.0428588573826258,
"learning_rate": 3.853855416424011e-05,
"loss": 0.2108,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23146593570709229,
"step": 515,
"valid_targets_mean": 958.9,
"valid_targets_min": 666
},
{
"epoch": 1.2776412776412776,
"grad_norm": 0.9531033959892421,
"learning_rate": 3.848442348087267e-05,
"loss": 0.2144,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20124828815460205,
"step": 520,
"valid_targets_mean": 909.4,
"valid_targets_min": 658
},
{
"epoch": 1.28992628992629,
"grad_norm": 0.9969526285724082,
"learning_rate": 3.842934790218106e-05,
"loss": 0.2095,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1991349309682846,
"step": 525,
"valid_targets_mean": 965.3,
"valid_targets_min": 666
},
{
"epoch": 1.3022113022113022,
"grad_norm": 1.0967602900326623,
"learning_rate": 3.837333024354422e-05,
"loss": 0.2091,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2304757535457611,
"step": 530,
"valid_targets_mean": 991.1,
"valid_targets_min": 563
},
{
"epoch": 1.3144963144963144,
"grad_norm": 1.0292442436624027,
"learning_rate": 3.8316373368498794e-05,
"loss": 0.1968,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20452198386192322,
"step": 535,
"valid_targets_mean": 920.8,
"valid_targets_min": 604
},
{
"epoch": 1.3267813267813269,
"grad_norm": 1.0845488755936965,
"learning_rate": 3.82584801885927e-05,
"loss": 0.2138,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21353307366371155,
"step": 540,
"valid_targets_mean": 941.1,
"valid_targets_min": 652
},
{
"epoch": 1.339066339066339,
"grad_norm": 1.002091400863676,
"learning_rate": 3.8199653663236336e-05,
"loss": 0.2153,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23313114047050476,
"step": 545,
"valid_targets_mean": 1035.6,
"valid_targets_min": 732
},
{
"epoch": 1.3513513513513513,
"grad_norm": 0.937407231724259,
"learning_rate": 3.813989679955128e-05,
"loss": 0.2141,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20936307311058044,
"step": 550,
"valid_targets_mean": 972.9,
"valid_targets_min": 635
},
{
"epoch": 1.3636363636363638,
"grad_norm": 1.3073136711778546,
"learning_rate": 3.8079212652216595e-05,
"loss": 0.1975,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2040657103061676,
"step": 555,
"valid_targets_mean": 887.8,
"valid_targets_min": 602
},
{
"epoch": 1.375921375921376,
"grad_norm": 1.0087184524887758,
"learning_rate": 3.8017604323312616e-05,
"loss": 0.2038,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19966718554496765,
"step": 560,
"valid_targets_mean": 916.1,
"valid_targets_min": 717
},
{
"epoch": 1.3882063882063882,
"grad_norm": 1.1197468550965275,
"learning_rate": 3.795507496216246e-05,
"loss": 0.2156,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23064757883548737,
"step": 565,
"valid_targets_mean": 926.8,
"valid_targets_min": 755
},
{
"epoch": 1.4004914004914004,
"grad_norm": 0.9363733312332334,
"learning_rate": 3.789162776517098e-05,
"loss": 0.2091,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21238617599010468,
"step": 570,
"valid_targets_mean": 1013.4,
"valid_targets_min": 677
},
{
"epoch": 1.4127764127764126,
"grad_norm": 0.9461807359368896,
"learning_rate": 3.78272659756614e-05,
"loss": 0.2189,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2300427407026291,
"step": 575,
"valid_targets_mean": 977.1,
"valid_targets_min": 655
},
{
"epoch": 1.425061425061425,
"grad_norm": 1.032783725340241,
"learning_rate": 3.776199288370948e-05,
"loss": 0.2184,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22637906670570374,
"step": 580,
"valid_targets_mean": 858.8,
"valid_targets_min": 617
},
{
"epoch": 1.4373464373464373,
"grad_norm": 0.9213588980109625,
"learning_rate": 3.7695811825975386e-05,
"loss": 0.2043,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1935548335313797,
"step": 585,
"valid_targets_mean": 985.6,
"valid_targets_min": 571
},
{
"epoch": 1.4496314496314495,
"grad_norm": 0.8869344099651987,
"learning_rate": 3.76287261855331e-05,
"loss": 0.199,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20661897957324982,
"step": 590,
"valid_targets_mean": 1090.0,
"valid_targets_min": 720
},
{
"epoch": 1.461916461916462,
"grad_norm": 1.3056434875874352,
"learning_rate": 3.7560739391697465e-05,
"loss": 0.2089,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18137860298156738,
"step": 595,
"valid_targets_mean": 917.4,
"valid_targets_min": 600
},
{
"epoch": 1.4742014742014742,
"grad_norm": 0.8741112793999194,
"learning_rate": 3.749185491984891e-05,
"loss": 0.2044,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19978901743888855,
"step": 600,
"valid_targets_mean": 1061.6,
"valid_targets_min": 770
},
{
"epoch": 1.4864864864864864,
"grad_norm": 0.9079781231576824,
"learning_rate": 3.7422076291255785e-05,
"loss": 0.1965,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19094155728816986,
"step": 605,
"valid_targets_mean": 806.2,
"valid_targets_min": 677
},
{
"epoch": 1.4987714987714988,
"grad_norm": 0.8533490681827205,
"learning_rate": 3.7351407072894356e-05,
"loss": 0.2072,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19833627343177795,
"step": 610,
"valid_targets_mean": 930.6,
"valid_targets_min": 631
},
{
"epoch": 1.511056511056511,
"grad_norm": 0.9262106949924384,
"learning_rate": 3.7279850877266486e-05,
"loss": 0.2195,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23002958297729492,
"step": 615,
"valid_targets_mean": 927.2,
"valid_targets_min": 670
},
{
"epoch": 1.5233415233415233,
"grad_norm": 0.9996893911622978,
"learning_rate": 3.720741136221491e-05,
"loss": 0.2013,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22016549110412598,
"step": 620,
"valid_targets_mean": 975.1,
"valid_targets_min": 704
},
{
"epoch": 1.5356265356265357,
"grad_norm": 0.9304706089787511,
"learning_rate": 3.713409223073636e-05,
"loss": 0.2094,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21507513523101807,
"step": 625,
"valid_targets_mean": 954.0,
"valid_targets_min": 501
},
{
"epoch": 1.547911547911548,
"grad_norm": 0.9415941135469472,
"learning_rate": 3.705989723079214e-05,
"loss": 0.2076,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20272324979305267,
"step": 630,
"valid_targets_mean": 913.0,
"valid_targets_min": 516
},
{
"epoch": 1.5601965601965602,
"grad_norm": 1.0140127705853008,
"learning_rate": 3.698483015511665e-05,
"loss": 0.2097,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21423688530921936,
"step": 635,
"valid_targets_mean": 965.7,
"valid_targets_min": 608
},
{
"epoch": 1.5724815724815726,
"grad_norm": 0.9286120395429476,
"learning_rate": 3.690889484102344e-05,
"loss": 0.2046,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19090348482131958,
"step": 640,
"valid_targets_mean": 877.8,
"valid_targets_min": 587
},
{
"epoch": 1.5847665847665846,
"grad_norm": 0.9203938587479324,
"learning_rate": 3.683209517020908e-05,
"loss": 0.21,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22043265402317047,
"step": 645,
"valid_targets_mean": 998.6,
"valid_targets_min": 678
},
{
"epoch": 1.597051597051597,
"grad_norm": 1.0623655317031668,
"learning_rate": 3.675443506855473e-05,
"loss": 0.2124,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.263114869594574,
"step": 650,
"valid_targets_mean": 1080.6,
"valid_targets_min": 620
},
{
"epoch": 1.6093366093366095,
"grad_norm": 0.9064187903868257,
"learning_rate": 3.6675918505925456e-05,
"loss": 0.2113,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1894787847995758,
"step": 655,
"valid_targets_mean": 876.0,
"valid_targets_min": 622
},
{
"epoch": 1.6216216216216215,
"grad_norm": 0.9529120963239415,
"learning_rate": 3.6596549495967276e-05,
"loss": 0.2106,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22982874512672424,
"step": 660,
"valid_targets_mean": 1071.1,
"valid_targets_min": 736
},
{
"epoch": 1.633906633906634,
"grad_norm": 0.8840416714465907,
"learning_rate": 3.651633209590202e-05,
"loss": 0.2117,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17622330784797668,
"step": 665,
"valid_targets_mean": 925.9,
"valid_targets_min": 532
},
{
"epoch": 1.6461916461916462,
"grad_norm": 1.1810228059095147,
"learning_rate": 3.6435270406319914e-05,
"loss": 0.227,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21883201599121094,
"step": 670,
"valid_targets_mean": 870.9,
"valid_targets_min": 691
},
{
"epoch": 1.6584766584766584,
"grad_norm": 0.8643704317470531,
"learning_rate": 3.635336857096997e-05,
"loss": 0.205,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21042178571224213,
"step": 675,
"valid_targets_mean": 1000.4,
"valid_targets_min": 786
},
{
"epoch": 1.6707616707616708,
"grad_norm": 0.8551915698329935,
"learning_rate": 3.627063077654815e-05,
"loss": 0.2013,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18336012959480286,
"step": 680,
"valid_targets_mean": 926.6,
"valid_targets_min": 538
},
{
"epoch": 1.683046683046683,
"grad_norm": 0.9439302138863646,
"learning_rate": 3.618706125248337e-05,
"loss": 0.2029,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20368976891040802,
"step": 685,
"valid_targets_mean": 976.3,
"valid_targets_min": 747
},
{
"epoch": 1.6953316953316953,
"grad_norm": 0.9224278855037739,
"learning_rate": 3.6102664270721275e-05,
"loss": 0.2105,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20123934745788574,
"step": 690,
"valid_targets_mean": 958.1,
"valid_targets_min": 655
},
{
"epoch": 1.7076167076167077,
"grad_norm": 0.8820361407024188,
"learning_rate": 3.601744414550589e-05,
"loss": 0.1977,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1965818554162979,
"step": 695,
"valid_targets_mean": 938.6,
"valid_targets_min": 563
},
{
"epoch": 1.71990171990172,
"grad_norm": 0.9576279590944234,
"learning_rate": 3.593140523315906e-05,
"loss": 0.216,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21179933845996857,
"step": 700,
"valid_targets_mean": 886.4,
"valid_targets_min": 618
},
{
"epoch": 1.7321867321867321,
"grad_norm": 0.9812823218721463,
"learning_rate": 3.584455193185778e-05,
"loss": 0.2108,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2067493200302124,
"step": 705,
"valid_targets_mean": 849.6,
"valid_targets_min": 641
},
{
"epoch": 1.7444717444717446,
"grad_norm": 1.0444309881677845,
"learning_rate": 3.575688868140933e-05,
"loss": 0.2084,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19522926211357117,
"step": 710,
"valid_targets_mean": 911.2,
"valid_targets_min": 657
},
{
"epoch": 1.7567567567567568,
"grad_norm": 0.9040650326743758,
"learning_rate": 3.566841996302438e-05,
"loss": 0.2102,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20031523704528809,
"step": 715,
"valid_targets_mean": 914.9,
"valid_targets_min": 639
},
{
"epoch": 1.769041769041769,
"grad_norm": 0.9220766346495909,
"learning_rate": 3.557915029908787e-05,
"loss": 0.2111,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20691350102424622,
"step": 720,
"valid_targets_mean": 888.4,
"valid_targets_min": 651
},
{
"epoch": 1.7813267813267815,
"grad_norm": 0.8821864755825576,
"learning_rate": 3.548908425292784e-05,
"loss": 0.206,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20301777124404907,
"step": 725,
"valid_targets_mean": 954.6,
"valid_targets_min": 666
},
{
"epoch": 1.7936117936117935,
"grad_norm": 1.0027606605638184,
"learning_rate": 3.5398226428582165e-05,
"loss": 0.201,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18714162707328796,
"step": 730,
"valid_targets_mean": 870.8,
"valid_targets_min": 576
},
{
"epoch": 1.805896805896806,
"grad_norm": 0.9279423327114711,
"learning_rate": 3.530658147056321e-05,
"loss": 0.2128,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21059077978134155,
"step": 735,
"valid_targets_mean": 954.6,
"valid_targets_min": 748
},
{
"epoch": 1.8181818181818183,
"grad_norm": 0.9495556780469476,
"learning_rate": 3.521415406362041e-05,
"loss": 0.2111,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2139360010623932,
"step": 740,
"valid_targets_mean": 887.1,
"valid_targets_min": 695
},
{
"epoch": 1.8304668304668303,
"grad_norm": 0.9370993861785392,
"learning_rate": 3.512094893250076e-05,
"loss": 0.2079,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19548596441745758,
"step": 745,
"valid_targets_mean": 814.6,
"valid_targets_min": 592
},
{
"epoch": 1.8427518427518428,
"grad_norm": 0.904789512120929,
"learning_rate": 3.5026970841707366e-05,
"loss": 0.2074,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2096291035413742,
"step": 750,
"valid_targets_mean": 986.6,
"valid_targets_min": 671
},
{
"epoch": 1.855036855036855,
"grad_norm": 0.9121919269177917,
"learning_rate": 3.493222459525579e-05,
"loss": 0.1951,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19862934947013855,
"step": 755,
"valid_targets_mean": 937.2,
"valid_targets_min": 727
},
{
"epoch": 1.8673218673218672,
"grad_norm": 0.9341489709506934,
"learning_rate": 3.483671503642858e-05,
"loss": 0.2006,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1980162113904953,
"step": 760,
"valid_targets_mean": 958.4,
"valid_targets_min": 681
},
{
"epoch": 1.8796068796068797,
"grad_norm": 0.8748700101579973,
"learning_rate": 3.474044704752761e-05,
"loss": 0.2109,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2048308551311493,
"step": 765,
"valid_targets_mean": 993.3,
"valid_targets_min": 690
},
{
"epoch": 1.8918918918918919,
"grad_norm": 0.9000001364671186,
"learning_rate": 3.464342554962454e-05,
"loss": 0.2013,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20421837270259857,
"step": 770,
"valid_targets_mean": 999.3,
"valid_targets_min": 605
},
{
"epoch": 1.904176904176904,
"grad_norm": 0.8853023548149183,
"learning_rate": 3.4545655502309254e-05,
"loss": 0.1966,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18318688869476318,
"step": 775,
"valid_targets_mean": 947.4,
"valid_targets_min": 681
},
{
"epoch": 1.9164619164619165,
"grad_norm": 0.9313876881231712,
"learning_rate": 3.444714190343633e-05,
"loss": 0.2026,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18997597694396973,
"step": 780,
"valid_targets_mean": 898.9,
"valid_targets_min": 655
},
{
"epoch": 1.9287469287469288,
"grad_norm": 1.0221192176591836,
"learning_rate": 3.434788978886957e-05,
"loss": 0.204,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2042032778263092,
"step": 785,
"valid_targets_mean": 834.0,
"valid_targets_min": 582
},
{
"epoch": 1.941031941031941,
"grad_norm": 0.9155860522611501,
"learning_rate": 3.424790423222455e-05,
"loss": 0.1967,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19884389638900757,
"step": 790,
"valid_targets_mean": 952.8,
"valid_targets_min": 578
},
{
"epoch": 1.9533169533169534,
"grad_norm": 0.9317970936714676,
"learning_rate": 3.414719034460928e-05,
"loss": 0.1948,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21337175369262695,
"step": 795,
"valid_targets_mean": 964.0,
"valid_targets_min": 740
},
{
"epoch": 1.9656019656019657,
"grad_norm": 0.9474687249839954,
"learning_rate": 3.404575327436294e-05,
"loss": 0.2002,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19337508082389832,
"step": 800,
"valid_targets_mean": 994.7,
"valid_targets_min": 620
},
{
"epoch": 1.9778869778869779,
"grad_norm": 0.968841368102766,
"learning_rate": 3.3943598206792665e-05,
"loss": 0.1974,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19192612171173096,
"step": 805,
"valid_targets_mean": 907.4,
"valid_targets_min": 665
},
{
"epoch": 1.9901719901719903,
"grad_norm": 0.9417909387557403,
"learning_rate": 3.384073036390857e-05,
"loss": 0.2013,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20656394958496094,
"step": 810,
"valid_targets_mean": 893.9,
"valid_targets_min": 720
},
{
"epoch": 2.0024570024570023,
"grad_norm": 0.8585352938516232,
"learning_rate": 3.373715500415667e-05,
"loss": 0.199,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17259010672569275,
"step": 815,
"valid_targets_mean": 984.1,
"valid_targets_min": 621
},
{
"epoch": 2.0147420147420148,
"grad_norm": 0.8727424930672738,
"learning_rate": 3.363287742215023e-05,
"loss": 0.1717,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17072346806526184,
"step": 820,
"valid_targets_mean": 1022.6,
"valid_targets_min": 689
},
{
"epoch": 2.027027027027027,
"grad_norm": 0.9653669368105697,
"learning_rate": 3.352790294839898e-05,
"loss": 0.1773,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17556941509246826,
"step": 825,
"valid_targets_mean": 984.9,
"valid_targets_min": 608
},
{
"epoch": 2.039312039312039,
"grad_norm": 0.9856863810593398,
"learning_rate": 3.3422236949036726e-05,
"loss": 0.1735,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17215654253959656,
"step": 830,
"valid_targets_mean": 926.0,
"valid_targets_min": 527
},
{
"epoch": 2.0515970515970516,
"grad_norm": 0.985148571117365,
"learning_rate": 3.331588482554697e-05,
"loss": 0.1705,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.171957865357399,
"step": 835,
"valid_targets_mean": 944.8,
"valid_targets_min": 676
},
{
"epoch": 2.063882063882064,
"grad_norm": 0.909871062353881,
"learning_rate": 3.320885201448684e-05,
"loss": 0.1745,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16490240395069122,
"step": 840,
"valid_targets_mean": 948.2,
"valid_targets_min": 670
},
{
"epoch": 2.076167076167076,
"grad_norm": 1.056134140927179,
"learning_rate": 3.310114398720917e-05,
"loss": 0.1764,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1747320294380188,
"step": 845,
"valid_targets_mean": 914.8,
"valid_targets_min": 713
},
{
"epoch": 2.0884520884520885,
"grad_norm": 0.9821463448392567,
"learning_rate": 3.299276624958281e-05,
"loss": 0.1754,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18433263897895813,
"step": 850,
"valid_targets_mean": 989.0,
"valid_targets_min": 672
},
{
"epoch": 2.100737100737101,
"grad_norm": 0.9220392026293057,
"learning_rate": 3.288372434171116e-05,
"loss": 0.1737,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16827696561813354,
"step": 855,
"valid_targets_mean": 952.9,
"valid_targets_min": 729
},
{
"epoch": 2.113022113022113,
"grad_norm": 1.08905073711043,
"learning_rate": 3.2774023837648986e-05,
"loss": 0.1759,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1930348426103592,
"step": 860,
"valid_targets_mean": 953.7,
"valid_targets_min": 591
},
{
"epoch": 2.1253071253071254,
"grad_norm": 1.2768244141830352,
"learning_rate": 3.26636703451175e-05,
"loss": 0.1751,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16934962570667267,
"step": 865,
"valid_targets_mean": 932.2,
"valid_targets_min": 536
},
{
"epoch": 2.1375921375921374,
"grad_norm": 0.8963972595628271,
"learning_rate": 3.2552669505217646e-05,
"loss": 0.1721,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1723167896270752,
"step": 870,
"valid_targets_mean": 1024.2,
"valid_targets_min": 640
},
{
"epoch": 2.14987714987715,
"grad_norm": 0.9216164189642129,
"learning_rate": 3.24410269921418e-05,
"loss": 0.1696,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15483501553535461,
"step": 875,
"valid_targets_mean": 935.6,
"valid_targets_min": 807
},
{
"epoch": 2.1621621621621623,
"grad_norm": 0.9542992798473027,
"learning_rate": 3.232874851288367e-05,
"loss": 0.1765,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1586456149816513,
"step": 880,
"valid_targets_mean": 902.9,
"valid_targets_min": 669
},
{
"epoch": 2.1744471744471743,
"grad_norm": 0.8672518091555641,
"learning_rate": 3.221583980694659e-05,
"loss": 0.179,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16514278948307037,
"step": 885,
"valid_targets_mean": 927.9,
"valid_targets_min": 708
},
{
"epoch": 2.1867321867321867,
"grad_norm": 1.0142855190330105,
"learning_rate": 3.21023066460501e-05,
"loss": 0.1658,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17116622626781464,
"step": 890,
"valid_targets_mean": 939.3,
"valid_targets_min": 662
},
{
"epoch": 2.199017199017199,
"grad_norm": 0.9530051059651002,
"learning_rate": 3.198815483383492e-05,
"loss": 0.1749,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17124095559120178,
"step": 895,
"valid_targets_mean": 933.6,
"valid_targets_min": 474
},
{
"epoch": 2.211302211302211,
"grad_norm": 0.9824515095538127,
"learning_rate": 3.1873390205566295e-05,
"loss": 0.175,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19697478413581848,
"step": 900,
"valid_targets_mean": 998.1,
"valid_targets_min": 647
},
{
"epoch": 2.2235872235872236,
"grad_norm": 0.993108312882805,
"learning_rate": 3.175801862783565e-05,
"loss": 0.1764,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1796160489320755,
"step": 905,
"valid_targets_mean": 934.2,
"valid_targets_min": 605
},
{
"epoch": 2.235872235872236,
"grad_norm": 0.9603082825048654,
"learning_rate": 3.164204599826077e-05,
"loss": 0.1715,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17281116545200348,
"step": 910,
"valid_targets_mean": 1006.9,
"valid_targets_min": 707
},
{
"epoch": 2.248157248157248,
"grad_norm": 0.9377440070609073,
"learning_rate": 3.1525478245184245e-05,
"loss": 0.1697,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18679824471473694,
"step": 915,
"valid_targets_mean": 995.6,
"valid_targets_min": 700
},
{
"epoch": 2.2604422604422605,
"grad_norm": 0.8648186518182739,
"learning_rate": 3.140832132737051e-05,
"loss": 0.1704,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17448900640010834,
"step": 920,
"valid_targets_mean": 1037.5,
"valid_targets_min": 694
},
{
"epoch": 2.2727272727272725,
"grad_norm": 0.8834468920885888,
"learning_rate": 3.129058123370116e-05,
"loss": 0.1771,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17446479201316833,
"step": 925,
"valid_targets_mean": 987.5,
"valid_targets_min": 687
},
{
"epoch": 2.285012285012285,
"grad_norm": 0.9786788923931473,
"learning_rate": 3.117226398286887e-05,
"loss": 0.1714,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1690976619720459,
"step": 930,
"valid_targets_mean": 969.7,
"valid_targets_min": 637
},
{
"epoch": 2.2972972972972974,
"grad_norm": 0.9575695952922517,
"learning_rate": 3.105337562306968e-05,
"loss": 0.1756,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.166093111038208,
"step": 935,
"valid_targets_mean": 923.9,
"valid_targets_min": 660
},
{
"epoch": 2.30958230958231,
"grad_norm": 0.966989114335053,
"learning_rate": 3.0933922231693854e-05,
"loss": 0.1723,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18171045184135437,
"step": 940,
"valid_targets_mean": 1018.1,
"valid_targets_min": 701
},
{
"epoch": 2.321867321867322,
"grad_norm": 0.9755092370999333,
"learning_rate": 3.08139099150152e-05,
"loss": 0.18,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1950107365846634,
"step": 945,
"valid_targets_mean": 1096.1,
"valid_targets_min": 734
},
{
"epoch": 2.3341523341523343,
"grad_norm": 0.975219441056167,
"learning_rate": 3.069334480787893e-05,
"loss": 0.1753,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17046800255775452,
"step": 950,
"valid_targets_mean": 940.8,
"valid_targets_min": 564
},
{
"epoch": 2.3464373464373462,
"grad_norm": 0.9039204160667244,
"learning_rate": 3.057223307338806e-05,
"loss": 0.1754,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17663127183914185,
"step": 955,
"valid_targets_mean": 1080.9,
"valid_targets_min": 617
},
{
"epoch": 2.3587223587223587,
"grad_norm": 1.0055696002058934,
"learning_rate": 3.0450580902588346e-05,
"loss": 0.1739,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18415383994579315,
"step": 960,
"valid_targets_mean": 891.5,
"valid_targets_min": 742
},
{
"epoch": 2.371007371007371,
"grad_norm": 1.1318477100077704,
"learning_rate": 3.032839451415182e-05,
"loss": 0.1769,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17538920044898987,
"step": 965,
"valid_targets_mean": 953.4,
"valid_targets_min": 457
},
{
"epoch": 2.383292383292383,
"grad_norm": 1.114567082556806,
"learning_rate": 3.0205680154058904e-05,
"loss": 0.1732,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17116034030914307,
"step": 970,
"valid_targets_mean": 1048.6,
"valid_targets_min": 697
},
{
"epoch": 2.3955773955773956,
"grad_norm": 1.3308084828541589,
"learning_rate": 3.0082444095279117e-05,
"loss": 0.1738,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17864802479743958,
"step": 975,
"valid_targets_mean": 994.0,
"valid_targets_min": 708
},
{
"epoch": 2.407862407862408,
"grad_norm": 0.9504512606927061,
"learning_rate": 2.9958692637450406e-05,
"loss": 0.1717,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15201088786125183,
"step": 980,
"valid_targets_mean": 935.3,
"valid_targets_min": 643
},
{
"epoch": 2.42014742014742,
"grad_norm": 0.9750082562199718,
"learning_rate": 2.983443210655714e-05,
"loss": 0.1672,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16214288771152496,
"step": 985,
"valid_targets_mean": 854.0,
"valid_targets_min": 652
},
{
"epoch": 2.4324324324324325,
"grad_norm": 0.9264357827508429,
"learning_rate": 2.9709668854606706e-05,
"loss": 0.1806,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1679236888885498,
"step": 990,
"valid_targets_mean": 975.3,
"valid_targets_min": 614
},
{
"epoch": 2.444717444717445,
"grad_norm": 0.9912779025878137,
"learning_rate": 2.9584409259304828e-05,
"loss": 0.1747,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16841384768486023,
"step": 995,
"valid_targets_mean": 950.8,
"valid_targets_min": 664
},
{
"epoch": 2.457002457002457,
"grad_norm": 1.011226779105394,
"learning_rate": 2.945865972372954e-05,
"loss": 0.1746,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1771933138370514,
"step": 1000,
"valid_targets_mean": 1023.0,
"valid_targets_min": 652
},
{
"epoch": 2.4692874692874693,
"grad_norm": 0.8778923994125223,
"learning_rate": 2.9332426676003858e-05,
"loss": 0.1739,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15943680703639984,
"step": 1005,
"valid_targets_mean": 978.3,
"valid_targets_min": 673
},
{
"epoch": 2.4815724815724813,
"grad_norm": 0.9391261650343821,
"learning_rate": 2.920571656896722e-05,
"loss": 0.1727,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17108768224716187,
"step": 1010,
"valid_targets_mean": 1033.1,
"valid_targets_min": 668
},
{
"epoch": 2.493857493857494,
"grad_norm": 0.9820148281088876,
"learning_rate": 2.907853587984558e-05,
"loss": 0.1707,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17560836672782898,
"step": 1015,
"valid_targets_mean": 868.2,
"valid_targets_min": 655
},
{
"epoch": 2.506142506142506,
"grad_norm": 0.9118604910543656,
"learning_rate": 2.8950891109920333e-05,
"loss": 0.1721,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17220911383628845,
"step": 1020,
"valid_targets_mean": 892.5,
"valid_targets_min": 639
},
{
"epoch": 2.5184275184275187,
"grad_norm": 0.937533300150789,
"learning_rate": 2.882278878419597e-05,
"loss": 0.1746,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1721157729625702,
"step": 1025,
"valid_targets_mean": 927.7,
"valid_targets_min": 579
},
{
"epoch": 2.5307125307125307,
"grad_norm": 1.0018178519708052,
"learning_rate": 2.8694235451066538e-05,
"loss": 0.1753,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18641093373298645,
"step": 1030,
"valid_targets_mean": 960.5,
"valid_targets_min": 697
},
{
"epoch": 2.542997542997543,
"grad_norm": 0.912243193730379,
"learning_rate": 2.8565237681980876e-05,
"loss": 0.1686,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1784767508506775,
"step": 1035,
"valid_targets_mean": 1008.4,
"valid_targets_min": 797
},
{
"epoch": 2.555282555282555,
"grad_norm": 0.8615949496360548,
"learning_rate": 2.843580207110672e-05,
"loss": 0.1674,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1611432135105133,
"step": 1040,
"valid_targets_mean": 997.3,
"valid_targets_min": 736
},
{
"epoch": 2.5675675675675675,
"grad_norm": 1.003183895460324,
"learning_rate": 2.830593523499361e-05,
"loss": 0.1748,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16985324025154114,
"step": 1045,
"valid_targets_mean": 906.9,
"valid_targets_min": 681
},
{
"epoch": 2.57985257985258,
"grad_norm": 0.912522735391224,
"learning_rate": 2.8175643812234627e-05,
"loss": 0.1747,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17298677563667297,
"step": 1050,
"valid_targets_mean": 912.9,
"valid_targets_min": 656
},
{
"epoch": 2.592137592137592,
"grad_norm": 1.0807283933470113,
"learning_rate": 2.8044934463127108e-05,
"loss": 0.1735,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17781515419483185,
"step": 1055,
"valid_targets_mean": 1025.3,
"valid_targets_min": 759
},
{
"epoch": 2.6044226044226044,
"grad_norm": 0.9196179864615218,
"learning_rate": 2.7913813869332112e-05,
"loss": 0.1715,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1666468381881714,
"step": 1060,
"valid_targets_mean": 861.7,
"valid_targets_min": 654
},
{
"epoch": 2.616707616707617,
"grad_norm": 0.938863322434105,
"learning_rate": 2.7782288733532915e-05,
"loss": 0.1767,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1822303682565689,
"step": 1065,
"valid_targets_mean": 975.1,
"valid_targets_min": 671
},
{
"epoch": 2.628992628992629,
"grad_norm": 0.8851465187299596,
"learning_rate": 2.7650365779092346e-05,
"loss": 0.171,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16219617426395416,
"step": 1070,
"valid_targets_mean": 967.1,
"valid_targets_min": 632
},
{
"epoch": 2.6412776412776413,
"grad_norm": 0.9421668377359279,
"learning_rate": 2.751805174970912e-05,
"loss": 0.1765,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18036647140979767,
"step": 1075,
"valid_targets_mean": 976.8,
"valid_targets_min": 724
},
{
"epoch": 2.6535626535626538,
"grad_norm": 0.992873674615618,
"learning_rate": 2.7385353409073093e-05,
"loss": 0.1748,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18397009372711182,
"step": 1080,
"valid_targets_mean": 875.6,
"valid_targets_min": 626
},
{
"epoch": 2.6658476658476657,
"grad_norm": 0.9728070369066828,
"learning_rate": 2.725227754051953e-05,
"loss": 0.1773,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18314458429813385,
"step": 1085,
"valid_targets_mean": 899.2,
"valid_targets_min": 677
},
{
"epoch": 2.678132678132678,
"grad_norm": 0.9143288429643125,
"learning_rate": 2.711883094668234e-05,
"loss": 0.1641,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16809183359146118,
"step": 1090,
"valid_targets_mean": 994.4,
"valid_targets_min": 571
},
{
"epoch": 2.69041769041769,
"grad_norm": 1.54128042531331,
"learning_rate": 2.698502044914633e-05,
"loss": 0.1714,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18388208746910095,
"step": 1095,
"valid_targets_mean": 972.0,
"valid_targets_min": 710
},
{
"epoch": 2.7027027027027026,
"grad_norm": 1.049164865794871,
"learning_rate": 2.685085288809853e-05,
"loss": 0.1743,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18375930190086365,
"step": 1100,
"valid_targets_mean": 972.8,
"valid_targets_min": 698
},
{
"epoch": 2.714987714987715,
"grad_norm": 0.9600224337581525,
"learning_rate": 2.671633512197848e-05,
"loss": 0.176,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17532390356063843,
"step": 1105,
"valid_targets_mean": 992.3,
"valid_targets_min": 620
},
{
"epoch": 2.7272727272727275,
"grad_norm": 0.9403664365373092,
"learning_rate": 2.658147402712768e-05,
"loss": 0.1753,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18095794320106506,
"step": 1110,
"valid_targets_mean": 928.3,
"valid_targets_min": 667
},
{
"epoch": 2.7395577395577395,
"grad_norm": 1.162242904424703,
"learning_rate": 2.6446276497438064e-05,
"loss": 0.1691,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19149112701416016,
"step": 1115,
"valid_targets_mean": 884.9,
"valid_targets_min": 596
},
{
"epoch": 2.751842751842752,
"grad_norm": 0.9999696190235531,
"learning_rate": 2.6310749443999593e-05,
"loss": 0.1787,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18875834345817566,
"step": 1120,
"valid_targets_mean": 1027.4,
"valid_targets_min": 747
},
{
"epoch": 2.764127764127764,
"grad_norm": 0.9810912416799812,
"learning_rate": 2.617489979474699e-05,
"loss": 0.1714,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18806487321853638,
"step": 1125,
"valid_targets_mean": 939.4,
"valid_targets_min": 620
},
{
"epoch": 2.7764127764127764,
"grad_norm": 1.1849732651553062,
"learning_rate": 2.6038734494105562e-05,
"loss": 0.175,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17712372541427612,
"step": 1130,
"valid_targets_mean": 957.6,
"valid_targets_min": 653
},
{
"epoch": 2.788697788697789,
"grad_norm": 0.9030343977921143,
"learning_rate": 2.590226050263625e-05,
"loss": 0.1682,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1648157835006714,
"step": 1135,
"valid_targets_mean": 909.4,
"valid_targets_min": 632
},
{
"epoch": 2.800982800982801,
"grad_norm": 1.2038410845517444,
"learning_rate": 2.5765484796679768e-05,
"loss": 0.174,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1805713176727295,
"step": 1140,
"valid_targets_mean": 933.9,
"valid_targets_min": 586
},
{
"epoch": 2.8132678132678133,
"grad_norm": 0.9898717229094194,
"learning_rate": 2.5628414368000035e-05,
"loss": 0.1722,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16944530606269836,
"step": 1145,
"valid_targets_mean": 959.3,
"valid_targets_min": 750
},
{
"epoch": 2.8255528255528253,
"grad_norm": 0.9531190415010581,
"learning_rate": 2.5491056223426746e-05,
"loss": 0.1781,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17504331469535828,
"step": 1150,
"valid_targets_mean": 915.2,
"valid_targets_min": 642
},
{
"epoch": 2.8378378378378377,
"grad_norm": 1.0671121984464367,
"learning_rate": 2.5353417384497166e-05,
"loss": 0.1726,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17695313692092896,
"step": 1155,
"valid_targets_mean": 882.2,
"valid_targets_min": 631
},
{
"epoch": 2.85012285012285,
"grad_norm": 1.0685164913692538,
"learning_rate": 2.5215504887097243e-05,
"loss": 0.1757,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20810116827487946,
"step": 1160,
"valid_targets_mean": 967.5,
"valid_targets_min": 652
},
{
"epoch": 2.8624078624078626,
"grad_norm": 0.8784263870335288,
"learning_rate": 2.5077325781101918e-05,
"loss": 0.1807,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17300809919834137,
"step": 1165,
"valid_targets_mean": 1010.9,
"valid_targets_min": 582
},
{
"epoch": 2.8746928746928746,
"grad_norm": 0.9297310781873073,
"learning_rate": 2.493888713001476e-05,
"loss": 0.1796,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17841032147407532,
"step": 1170,
"valid_targets_mean": 968.2,
"valid_targets_min": 691
},
{
"epoch": 2.886977886977887,
"grad_norm": 0.8915623950018948,
"learning_rate": 2.480019601060687e-05,
"loss": 0.1711,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18827411532402039,
"step": 1175,
"valid_targets_mean": 1003.5,
"valid_targets_min": 651
},
{
"epoch": 2.899262899262899,
"grad_norm": 0.9259985446100989,
"learning_rate": 2.4661259512555176e-05,
"loss": 0.1768,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18165713548660278,
"step": 1180,
"valid_targets_mean": 1009.4,
"valid_targets_min": 686
},
{
"epoch": 2.9115479115479115,
"grad_norm": 0.9897274256876722,
"learning_rate": 2.4522084738079933e-05,
"loss": 0.1756,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1986805945634842,
"step": 1185,
"valid_targets_mean": 953.1,
"valid_targets_min": 694
},
{
"epoch": 2.923832923832924,
"grad_norm": 1.07572825455023,
"learning_rate": 2.4382678801581762e-05,
"loss": 0.1729,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16371658444404602,
"step": 1190,
"valid_targets_mean": 887.8,
"valid_targets_min": 633
},
{
"epoch": 2.9361179361179364,
"grad_norm": 0.8911223010515249,
"learning_rate": 2.4243048829277916e-05,
"loss": 0.1724,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1594182252883911,
"step": 1195,
"valid_targets_mean": 882.2,
"valid_targets_min": 615
},
{
"epoch": 2.9484029484029484,
"grad_norm": 0.8992271888116871,
"learning_rate": 2.410320195883802e-05,
"loss": 0.1677,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17592017352581024,
"step": 1200,
"valid_targets_mean": 993.3,
"valid_targets_min": 576
},
{
"epoch": 2.960687960687961,
"grad_norm": 0.8882940826799905,
"learning_rate": 2.396314533901918e-05,
"loss": 0.1748,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16428948938846588,
"step": 1205,
"valid_targets_mean": 894.0,
"valid_targets_min": 605
},
{
"epoch": 2.972972972972973,
"grad_norm": 1.0619171838658064,
"learning_rate": 2.3822886129300603e-05,
"loss": 0.1776,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1795843243598938,
"step": 1210,
"valid_targets_mean": 995.9,
"valid_targets_min": 758
},
{
"epoch": 2.9852579852579852,
"grad_norm": 1.1545333955599706,
"learning_rate": 2.368243149951755e-05,
"loss": 0.1751,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16789449751377106,
"step": 1215,
"valid_targets_mean": 888.7,
"valid_targets_min": 623
},
{
"epoch": 2.9975429975429977,
"grad_norm": 0.8908167630467976,
"learning_rate": 2.3541788629494865e-05,
"loss": 0.1728,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16199368238449097,
"step": 1220,
"valid_targets_mean": 908.7,
"valid_targets_min": 635
},
{
"epoch": 3.0098280098280097,
"grad_norm": 0.8444984262032558,
"learning_rate": 2.3400964708679944e-05,
"loss": 0.1475,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1309882402420044,
"step": 1225,
"valid_targets_mean": 924.3,
"valid_targets_min": 651
},
{
"epoch": 3.022113022113022,
"grad_norm": 1.1132820322990213,
"learning_rate": 2.325996693577522e-05,
"loss": 0.1452,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1460825353860855,
"step": 1230,
"valid_targets_mean": 1024.1,
"valid_targets_min": 681
},
{
"epoch": 3.0343980343980346,
"grad_norm": 1.0882996142948094,
"learning_rate": 2.311880251837019e-05,
"loss": 0.1442,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1443278044462204,
"step": 1235,
"valid_targets_mean": 955.6,
"valid_targets_min": 696
},
{
"epoch": 3.0466830466830466,
"grad_norm": 1.0386792573778023,
"learning_rate": 2.2977478672572933e-05,
"loss": 0.1461,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14893858134746552,
"step": 1240,
"valid_targets_mean": 1041.4,
"valid_targets_min": 666
},
{
"epoch": 3.058968058968059,
"grad_norm": 0.9689321094652921,
"learning_rate": 2.2836002622641297e-05,
"loss": 0.143,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14730066061019897,
"step": 1245,
"valid_targets_mean": 1079.1,
"valid_targets_min": 695
},
{
"epoch": 3.0712530712530715,
"grad_norm": 0.9602934336334937,
"learning_rate": 2.269438160061354e-05,
"loss": 0.1442,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1352572739124298,
"step": 1250,
"valid_targets_mean": 920.0,
"valid_targets_min": 676
},
{
"epoch": 3.0835380835380835,
"grad_norm": 0.9885414365472923,
"learning_rate": 2.2552622845938698e-05,
"loss": 0.1396,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12953315675258636,
"step": 1255,
"valid_targets_mean": 876.9,
"valid_targets_min": 679
},
{
"epoch": 3.095823095823096,
"grad_norm": 1.0771727969197218,
"learning_rate": 2.2410733605106462e-05,
"loss": 0.1497,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1568758636713028,
"step": 1260,
"valid_targets_mean": 850.2,
"valid_targets_min": 604
},
{
"epoch": 3.108108108108108,
"grad_norm": 1.1491068715024852,
"learning_rate": 2.2268721131276805e-05,
"loss": 0.1437,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15370234847068787,
"step": 1265,
"valid_targets_mean": 977.6,
"valid_targets_min": 693
},
{
"epoch": 3.1203931203931203,
"grad_norm": 0.968706151633579,
"learning_rate": 2.2126592683909154e-05,
"loss": 0.1419,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1496562510728836,
"step": 1270,
"valid_targets_mean": 955.6,
"valid_targets_min": 614
},
{
"epoch": 3.1326781326781328,
"grad_norm": 1.0277324102635916,
"learning_rate": 2.1984355528391342e-05,
"loss": 0.1436,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1483316421508789,
"step": 1275,
"valid_targets_mean": 938.6,
"valid_targets_min": 598
},
{
"epoch": 3.1449631449631448,
"grad_norm": 0.987683127269443,
"learning_rate": 2.1842016935668188e-05,
"loss": 0.1445,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15175923705101013,
"step": 1280,
"valid_targets_mean": 1015.1,
"valid_targets_min": 746
},
{
"epoch": 3.157248157248157,
"grad_norm": 0.9390509636386285,
"learning_rate": 2.169958418186982e-05,
"loss": 0.153,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1446429193019867,
"step": 1285,
"valid_targets_mean": 984.0,
"valid_targets_min": 713
},
{
"epoch": 3.1695331695331697,
"grad_norm": 1.0394272751093805,
"learning_rate": 2.1557064547939754e-05,
"loss": 0.1471,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13773059844970703,
"step": 1290,
"valid_targets_mean": 1005.0,
"valid_targets_min": 736
},
{
"epoch": 3.1818181818181817,
"grad_norm": 0.9407221598412456,
"learning_rate": 2.1414465319262666e-05,
"loss": 0.1494,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13250461220741272,
"step": 1295,
"valid_targets_mean": 909.2,
"valid_targets_min": 681
},
{
"epoch": 3.194103194103194,
"grad_norm": 1.0435380904119864,
"learning_rate": 2.1271793785291997e-05,
"loss": 0.1444,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13971221446990967,
"step": 1300,
"valid_targets_mean": 917.8,
"valid_targets_min": 602
},
{
"epoch": 3.2063882063882065,
"grad_norm": 0.9549420894567353,
"learning_rate": 2.1129057239177337e-05,
"loss": 0.1495,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1558254361152649,
"step": 1305,
"valid_targets_mean": 933.1,
"valid_targets_min": 732
},
{
"epoch": 3.2186732186732185,
"grad_norm": 0.9125235147288177,
"learning_rate": 2.0986262977391577e-05,
"loss": 0.148,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14063915610313416,
"step": 1310,
"valid_targets_mean": 931.4,
"valid_targets_min": 728
},
{
"epoch": 3.230958230958231,
"grad_norm": 1.0365586973179586,
"learning_rate": 2.084341829935796e-05,
"loss": 0.1462,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15706020593643188,
"step": 1315,
"valid_targets_mean": 1020.5,
"valid_targets_min": 664
},
{
"epoch": 3.2432432432432434,
"grad_norm": 0.9664270233881987,
"learning_rate": 2.0700530507076916e-05,
"loss": 0.1392,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13748334348201752,
"step": 1320,
"valid_targets_mean": 973.8,
"valid_targets_min": 616
},
{
"epoch": 3.2555282555282554,
"grad_norm": 0.9835787375345787,
"learning_rate": 2.0557606904752833e-05,
"loss": 0.1431,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14928527176380157,
"step": 1325,
"valid_targets_mean": 914.9,
"valid_targets_min": 474
},
{
"epoch": 3.267813267813268,
"grad_norm": 0.9999571019320207,
"learning_rate": 2.0414654798420622e-05,
"loss": 0.1331,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1445634961128235,
"step": 1330,
"valid_targets_mean": 919.4,
"valid_targets_min": 635
},
{
"epoch": 3.2800982800982803,
"grad_norm": 0.9402396257207996,
"learning_rate": 2.02716814955723e-05,
"loss": 0.1446,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1313110738992691,
"step": 1335,
"valid_targets_mean": 927.1,
"valid_targets_min": 597
},
{
"epoch": 3.2923832923832923,
"grad_norm": 1.018542017945054,
"learning_rate": 2.0128694304783406e-05,
"loss": 0.1467,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1506882905960083,
"step": 1340,
"valid_targets_mean": 927.5,
"valid_targets_min": 730
},
{
"epoch": 3.3046683046683047,
"grad_norm": 0.9490500420853842,
"learning_rate": 1.9985700535339406e-05,
"loss": 0.1471,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13769789040088654,
"step": 1345,
"valid_targets_mean": 902.7,
"valid_targets_min": 722
},
{
"epoch": 3.3169533169533167,
"grad_norm": 0.9838326609104752,
"learning_rate": 1.984270749686207e-05,
"loss": 0.147,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12807482481002808,
"step": 1350,
"valid_targets_mean": 884.1,
"valid_targets_min": 536
},
{
"epoch": 3.329238329238329,
"grad_norm": 1.0387317349514822,
"learning_rate": 1.9699722498935786e-05,
"loss": 0.1445,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14848026633262634,
"step": 1355,
"valid_targets_mean": 915.0,
"valid_targets_min": 563
},
{
"epoch": 3.3415233415233416,
"grad_norm": 1.0308878761143394,
"learning_rate": 1.9556752850733933e-05,
"loss": 0.1429,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12545965611934662,
"step": 1360,
"valid_targets_mean": 904.6,
"valid_targets_min": 706
},
{
"epoch": 3.3538083538083536,
"grad_norm": 0.9764650250717721,
"learning_rate": 1.9413805860645242e-05,
"loss": 0.1515,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.161744624376297,
"step": 1365,
"valid_targets_mean": 1072.9,
"valid_targets_min": 648
},
{
"epoch": 3.366093366093366,
"grad_norm": 0.9956928130533306,
"learning_rate": 1.9270888835900165e-05,
"loss": 0.1478,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1572413146495819,
"step": 1370,
"valid_targets_mean": 958.1,
"valid_targets_min": 654
},
{
"epoch": 3.3783783783783785,
"grad_norm": 1.0372403351419428,
"learning_rate": 1.9128009082197417e-05,
"loss": 0.1436,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14318522810935974,
"step": 1375,
"valid_targets_mean": 882.1,
"valid_targets_min": 685
},
{
"epoch": 3.3906633906633905,
"grad_norm": 0.9371080430346982,
"learning_rate": 1.8985173903330428e-05,
"loss": 0.1429,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14232918620109558,
"step": 1380,
"valid_targets_mean": 987.7,
"valid_targets_min": 494
},
{
"epoch": 3.402948402948403,
"grad_norm": 0.9461579387456934,
"learning_rate": 1.884239060081407e-05,
"loss": 0.1399,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13573700189590454,
"step": 1385,
"valid_targets_mean": 935.5,
"valid_targets_min": 733
},
{
"epoch": 3.4152334152334154,
"grad_norm": 1.1361411106468413,
"learning_rate": 1.869966647351135e-05,
"loss": 0.1438,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1459515392780304,
"step": 1390,
"valid_targets_mean": 912.1,
"valid_targets_min": 667
},
{
"epoch": 3.4275184275184274,
"grad_norm": 0.928334710297314,
"learning_rate": 1.8557008817260343e-05,
"loss": 0.1447,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13418202102184296,
"step": 1395,
"valid_targets_mean": 1008.3,
"valid_targets_min": 651
},
{
"epoch": 3.43980343980344,
"grad_norm": 0.9501584881036313,
"learning_rate": 1.8414424924501222e-05,
"loss": 0.1531,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13538123667240143,
"step": 1400,
"valid_targets_mean": 898.5,
"valid_targets_min": 666
},
{
"epoch": 3.4520884520884523,
"grad_norm": 0.9514020070355609,
"learning_rate": 1.827192208390347e-05,
"loss": 0.1418,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13934513926506042,
"step": 1405,
"valid_targets_mean": 957.5,
"valid_targets_min": 532
},
{
"epoch": 3.4643734643734643,
"grad_norm": 0.9533792191195336,
"learning_rate": 1.812950757999334e-05,
"loss": 0.1448,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13233372569084167,
"step": 1410,
"valid_targets_mean": 871.8,
"valid_targets_min": 516
},
{
"epoch": 3.4766584766584767,
"grad_norm": 0.9795866790310279,
"learning_rate": 1.7987188692781417e-05,
"loss": 0.1435,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14241893589496613,
"step": 1415,
"valid_targets_mean": 936.1,
"valid_targets_min": 676
},
{
"epoch": 3.488943488943489,
"grad_norm": 0.914661052152663,
"learning_rate": 1.784497269739052e-05,
"loss": 0.1459,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1311112344264984,
"step": 1420,
"valid_targets_mean": 917.9,
"valid_targets_min": 477
},
{
"epoch": 3.501228501228501,
"grad_norm": 1.0403358753849694,
"learning_rate": 1.770286686368381e-05,
"loss": 0.1423,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1444162130355835,
"step": 1425,
"valid_targets_mean": 925.6,
"valid_targets_min": 663
},
{
"epoch": 3.5135135135135136,
"grad_norm": 1.0284489507821293,
"learning_rate": 1.756087845589312e-05,
"loss": 0.1517,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1424056887626648,
"step": 1430,
"valid_targets_mean": 891.7,
"valid_targets_min": 599
},
{
"epoch": 3.5257985257985256,
"grad_norm": 1.047707904104722,
"learning_rate": 1.7419014732247683e-05,
"loss": 0.1445,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14654859900474548,
"step": 1435,
"valid_targets_mean": 939.6,
"valid_targets_min": 736
},
{
"epoch": 3.538083538083538,
"grad_norm": 1.0369128036732569,
"learning_rate": 1.7277282944603047e-05,
"loss": 0.1536,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15306830406188965,
"step": 1440,
"valid_targets_mean": 1005.7,
"valid_targets_min": 700
},
{
"epoch": 3.5503685503685505,
"grad_norm": 1.0180125385512913,
"learning_rate": 1.713569033807041e-05,
"loss": 0.1454,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14173316955566406,
"step": 1445,
"valid_targets_mean": 894.1,
"valid_targets_min": 516
},
{
"epoch": 3.562653562653563,
"grad_norm": 0.9519346924504087,
"learning_rate": 1.6994244150646244e-05,
"loss": 0.1376,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12116233259439468,
"step": 1450,
"valid_targets_mean": 900.7,
"valid_targets_min": 618
},
{
"epoch": 3.574938574938575,
"grad_norm": 0.9294534036479501,
"learning_rate": 1.6852951612842278e-05,
"loss": 0.1435,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.142903134226799,
"step": 1455,
"valid_targets_mean": 1152.5,
"valid_targets_min": 720
},
{
"epoch": 3.5872235872235874,
"grad_norm": 0.9121434584648127,
"learning_rate": 1.671181994731595e-05,
"loss": 0.1423,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14141470193862915,
"step": 1460,
"valid_targets_mean": 1007.2,
"valid_targets_min": 717
},
{
"epoch": 3.5995085995085994,
"grad_norm": 1.0189171300919593,
"learning_rate": 1.6570856368501108e-05,
"loss": 0.1485,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16055302321910858,
"step": 1465,
"valid_targets_mean": 1005.6,
"valid_targets_min": 640
},
{
"epoch": 3.611793611793612,
"grad_norm": 1.0085418017413093,
"learning_rate": 1.643006808223931e-05,
"loss": 0.1411,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13635236024856567,
"step": 1470,
"valid_targets_mean": 955.1,
"valid_targets_min": 576
},
{
"epoch": 3.6240786240786242,
"grad_norm": 0.9875856191876736,
"learning_rate": 1.6289462285411387e-05,
"loss": 0.1389,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14985120296478271,
"step": 1475,
"valid_targets_mean": 977.2,
"valid_targets_min": 754
},
{
"epoch": 3.6363636363636362,
"grad_norm": 1.3052844343483891,
"learning_rate": 1.614904616556962e-05,
"loss": 0.1411,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14070774614810944,
"step": 1480,
"valid_targets_mean": 922.2,
"valid_targets_min": 617
},
{
"epoch": 3.6486486486486487,
"grad_norm": 0.967691777275294,
"learning_rate": 1.6008826900570294e-05,
"loss": 0.1455,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13695186376571655,
"step": 1485,
"valid_targets_mean": 891.7,
"valid_targets_min": 656
},
{
"epoch": 3.6609336609336607,
"grad_norm": 0.9944529130674382,
"learning_rate": 1.586881165820675e-05,
"loss": 0.1482,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14366337656974792,
"step": 1490,
"valid_targets_mean": 875.6,
"valid_targets_min": 621
},
{
"epoch": 3.673218673218673,
"grad_norm": 0.9510017148787837,
"learning_rate": 1.5729007595843037e-05,
"loss": 0.1454,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13302364945411682,
"step": 1495,
"valid_targets_mean": 931.1,
"valid_targets_min": 682
},
{
"epoch": 3.6855036855036856,
"grad_norm": 1.0079869124770597,
"learning_rate": 1.5589421860047986e-05,
"loss": 0.15,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1522165983915329,
"step": 1500,
"valid_targets_mean": 1022.2,
"valid_targets_min": 697
},
{
"epoch": 3.697788697788698,
"grad_norm": 0.9714028705719118,
"learning_rate": 1.5450061586229903e-05,
"loss": 0.144,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14417463541030884,
"step": 1505,
"valid_targets_mean": 945.3,
"valid_targets_min": 638
},
{
"epoch": 3.71007371007371,
"grad_norm": 0.9579562277963518,
"learning_rate": 1.5310933898271864e-05,
"loss": 0.1443,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14955665171146393,
"step": 1510,
"valid_targets_mean": 980.5,
"valid_targets_min": 627
},
{
"epoch": 3.7223587223587224,
"grad_norm": 0.9994122793110526,
"learning_rate": 1.5172045908167462e-05,
"loss": 0.1423,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14200064539909363,
"step": 1515,
"valid_targets_mean": 949.6,
"valid_targets_min": 619
},
{
"epoch": 3.7346437346437344,
"grad_norm": 0.9821936979113775,
"learning_rate": 1.5033404715657344e-05,
"loss": 0.1411,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13359785079956055,
"step": 1520,
"valid_targets_mean": 900.1,
"valid_targets_min": 639
},
{
"epoch": 3.746928746928747,
"grad_norm": 1.0501978524899511,
"learning_rate": 1.4895017407866217e-05,
"loss": 0.1491,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14831113815307617,
"step": 1525,
"valid_targets_mean": 836.9,
"valid_targets_min": 578
},
{
"epoch": 3.7592137592137593,
"grad_norm": 1.0009555437478268,
"learning_rate": 1.4756891058940606e-05,
"loss": 0.1439,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1430947482585907,
"step": 1530,
"valid_targets_mean": 937.2,
"valid_targets_min": 631
},
{
"epoch": 3.7714987714987718,
"grad_norm": 0.9989703655858909,
"learning_rate": 1.4619032729687223e-05,
"loss": 0.1468,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1486039161682129,
"step": 1535,
"valid_targets_mean": 1009.9,
"valid_targets_min": 703
},
{
"epoch": 3.7837837837837838,
"grad_norm": 1.0318735775529813,
"learning_rate": 1.4481449467212004e-05,
"loss": 0.1436,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1425326019525528,
"step": 1540,
"valid_targets_mean": 845.5,
"valid_targets_min": 661
},
{
"epoch": 3.796068796068796,
"grad_norm": 0.9608581870874865,
"learning_rate": 1.4344148304559926e-05,
"loss": 0.1461,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14720866084098816,
"step": 1545,
"valid_targets_mean": 1006.9,
"valid_targets_min": 751
},
{
"epoch": 3.808353808353808,
"grad_norm": 0.933587010759985,
"learning_rate": 1.4207136260355426e-05,
"loss": 0.1423,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14576342701911926,
"step": 1550,
"valid_targets_mean": 1017.6,
"valid_targets_min": 662
},
{
"epoch": 3.8206388206388207,
"grad_norm": 0.9646457265318146,
"learning_rate": 1.4070420338443667e-05,
"loss": 0.1482,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14640560746192932,
"step": 1555,
"valid_targets_mean": 961.2,
"valid_targets_min": 652
},
{
"epoch": 3.832923832923833,
"grad_norm": 0.9689718165593759,
"learning_rate": 1.3934007527532494e-05,
"loss": 0.1428,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1452617645263672,
"step": 1560,
"valid_targets_mean": 941.9,
"valid_targets_min": 660
},
{
"epoch": 3.845208845208845,
"grad_norm": 0.9663490950140802,
"learning_rate": 1.3797904800835174e-05,
"loss": 0.1412,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1348852664232254,
"step": 1565,
"valid_targets_mean": 898.6,
"valid_targets_min": 680
},
{
"epoch": 3.8574938574938575,
"grad_norm": 0.9016980830369478,
"learning_rate": 1.3662119115713968e-05,
"loss": 0.1476,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12884706258773804,
"step": 1570,
"valid_targets_mean": 1025.2,
"valid_targets_min": 745
},
{
"epoch": 3.8697788697788695,
"grad_norm": 0.9387203419785546,
"learning_rate": 1.3526657413324427e-05,
"loss": 0.1383,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13017988204956055,
"step": 1575,
"valid_targets_mean": 1044.4,
"valid_targets_min": 661
},
{
"epoch": 3.882063882063882,
"grad_norm": 0.9800633394989181,
"learning_rate": 1.3391526618260636e-05,
"loss": 0.1439,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14418141543865204,
"step": 1580,
"valid_targets_mean": 920.8,
"valid_targets_min": 640
},
{
"epoch": 3.8943488943488944,
"grad_norm": 1.0779786099105024,
"learning_rate": 1.3256733638201172e-05,
"loss": 0.1459,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15096238255500793,
"step": 1585,
"valid_targets_mean": 868.6,
"valid_targets_min": 669
},
{
"epoch": 3.906633906633907,
"grad_norm": 1.0606136768853318,
"learning_rate": 1.3122285363556053e-05,
"loss": 0.1416,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13585801422595978,
"step": 1590,
"valid_targets_mean": 950.3,
"valid_targets_min": 552
},
{
"epoch": 3.918918918918919,
"grad_norm": 1.070526566734902,
"learning_rate": 1.2988188667114487e-05,
"loss": 0.1446,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15988577902317047,
"step": 1595,
"valid_targets_mean": 918.4,
"valid_targets_min": 596
},
{
"epoch": 3.9312039312039313,
"grad_norm": 0.9951058407303669,
"learning_rate": 1.2854450403693526e-05,
"loss": 0.1445,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14960238337516785,
"step": 1600,
"valid_targets_mean": 881.3,
"valid_targets_min": 670
},
{
"epoch": 3.9434889434889433,
"grad_norm": 0.968596082582197,
"learning_rate": 1.272107740978769e-05,
"loss": 0.1482,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14312288165092468,
"step": 1605,
"valid_targets_mean": 970.6,
"valid_targets_min": 612
},
{
"epoch": 3.9557739557739557,
"grad_norm": 0.9045477600182205,
"learning_rate": 1.2588076503219475e-05,
"loss": 0.1435,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1324017196893692,
"step": 1610,
"valid_targets_mean": 993.7,
"valid_targets_min": 675
},
{
"epoch": 3.968058968058968,
"grad_norm": 1.0064100486978322,
"learning_rate": 1.2455454482790859e-05,
"loss": 0.146,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1510857343673706,
"step": 1615,
"valid_targets_mean": 898.1,
"valid_targets_min": 680
},
{
"epoch": 3.98034398034398,
"grad_norm": 0.9667913255808215,
"learning_rate": 1.2323218127935714e-05,
"loss": 0.1407,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1537216305732727,
"step": 1620,
"valid_targets_mean": 922.4,
"valid_targets_min": 706
},
{
"epoch": 3.9926289926289926,
"grad_norm": 1.0925109360863716,
"learning_rate": 1.2191374198373309e-05,
"loss": 0.1473,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14623141288757324,
"step": 1625,
"valid_targets_mean": 949.4,
"valid_targets_min": 604
},
{
"epoch": 4.004914004914005,
"grad_norm": 0.8697439145249658,
"learning_rate": 1.2059929433762734e-05,
"loss": 0.1405,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12668581306934357,
"step": 1630,
"valid_targets_mean": 1056.8,
"valid_targets_min": 710
},
{
"epoch": 4.017199017199017,
"grad_norm": 0.9786218062777593,
"learning_rate": 1.1928890553358352e-05,
"loss": 0.1254,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12963074445724487,
"step": 1635,
"valid_targets_mean": 1016.4,
"valid_targets_min": 678
},
{
"epoch": 4.0294840294840295,
"grad_norm": 1.013466228322274,
"learning_rate": 1.1798264255666387e-05,
"loss": 0.1198,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13302694261074066,
"step": 1640,
"valid_targets_mean": 959.4,
"valid_targets_min": 602
},
{
"epoch": 4.041769041769042,
"grad_norm": 0.9500770614642219,
"learning_rate": 1.1668057218102436e-05,
"loss": 0.1206,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10990549623966217,
"step": 1645,
"valid_targets_mean": 963.8,
"valid_targets_min": 597
},
{
"epoch": 4.054054054054054,
"grad_norm": 1.0429136985966856,
"learning_rate": 1.1538276096650175e-05,
"loss": 0.1207,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12115078419446945,
"step": 1650,
"valid_targets_mean": 938.2,
"valid_targets_min": 510
},
{
"epoch": 4.066339066339066,
"grad_norm": 0.9248235147185616,
"learning_rate": 1.1408927525521118e-05,
"loss": 0.1253,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10874859988689423,
"step": 1655,
"valid_targets_mean": 998.1,
"valid_targets_min": 646
},
{
"epoch": 4.078624078624078,
"grad_norm": 0.9128362154627097,
"learning_rate": 1.1280018116815438e-05,
"loss": 0.1217,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11440710723400116,
"step": 1660,
"valid_targets_mean": 990.1,
"valid_targets_min": 675
},
{
"epoch": 4.090909090909091,
"grad_norm": 1.0210751300631842,
"learning_rate": 1.115155446018404e-05,
"loss": 0.1197,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11153914034366608,
"step": 1665,
"valid_targets_mean": 869.9,
"valid_targets_min": 618
},
{
"epoch": 4.103194103194103,
"grad_norm": 1.088600870316487,
"learning_rate": 1.1023543122491626e-05,
"loss": 0.1209,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1248767226934433,
"step": 1670,
"valid_targets_mean": 924.6,
"valid_targets_min": 619
},
{
"epoch": 4.115479115479116,
"grad_norm": 0.9652107721756708,
"learning_rate": 1.089599064748108e-05,
"loss": 0.1216,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1146978884935379,
"step": 1675,
"valid_targets_mean": 925.7,
"valid_targets_min": 729
},
{
"epoch": 4.127764127764128,
"grad_norm": 0.9917060080272326,
"learning_rate": 1.0768903555438927e-05,
"loss": 0.1206,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1253787726163864,
"step": 1680,
"valid_targets_mean": 950.2,
"valid_targets_min": 663
},
{
"epoch": 4.14004914004914,
"grad_norm": 0.9615299803477797,
"learning_rate": 1.0642288342862007e-05,
"loss": 0.1204,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12344183027744293,
"step": 1685,
"valid_targets_mean": 1037.2,
"valid_targets_min": 814
},
{
"epoch": 4.152334152334152,
"grad_norm": 0.9793999861662677,
"learning_rate": 1.051615148212544e-05,
"loss": 0.1198,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12122024595737457,
"step": 1690,
"valid_targets_mean": 1047.0,
"valid_targets_min": 673
},
{
"epoch": 4.164619164619165,
"grad_norm": 1.0089991272381962,
"learning_rate": 1.0390499421151706e-05,
"loss": 0.1183,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11438318341970444,
"step": 1695,
"valid_targets_mean": 987.9,
"valid_targets_min": 659
},
{
"epoch": 4.176904176904177,
"grad_norm": 1.0434287723993414,
"learning_rate": 1.0265338583081088e-05,
"loss": 0.12,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11772973835468292,
"step": 1700,
"valid_targets_mean": 973.6,
"valid_targets_min": 644
},
{
"epoch": 4.1891891891891895,
"grad_norm": 1.0281938517002083,
"learning_rate": 1.0140675365943284e-05,
"loss": 0.1221,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12870125472545624,
"step": 1705,
"valid_targets_mean": 933.7,
"valid_targets_min": 690
},
{
"epoch": 4.201474201474202,
"grad_norm": 1.5080272835779485,
"learning_rate": 1.0016516142330404e-05,
"loss": 0.1217,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12202489376068115,
"step": 1710,
"valid_targets_mean": 993.0,
"valid_targets_min": 687
},
{
"epoch": 4.2137592137592135,
"grad_norm": 1.0757473296800761,
"learning_rate": 9.89286725907117e-06,
"loss": 0.1209,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12647372484207153,
"step": 1715,
"valid_targets_mean": 965.9,
"valid_targets_min": 720
},
{
"epoch": 4.226044226044226,
"grad_norm": 1.0266691124569305,
"learning_rate": 9.769735036906475e-06,
"loss": 0.1209,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11845774948596954,
"step": 1720,
"valid_targets_mean": 886.0,
"valid_targets_min": 670
},
{
"epoch": 4.238329238329238,
"grad_norm": 1.055790871475488,
"learning_rate": 9.647125770166321e-06,
"loss": 0.1211,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11783865094184875,
"step": 1725,
"valid_targets_mean": 882.2,
"valid_targets_min": 582
},
{
"epoch": 4.250614250614251,
"grad_norm": 1.0468500852253888,
"learning_rate": 9.525045726448001e-06,
"loss": 0.1218,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12612420320510864,
"step": 1730,
"valid_targets_mean": 968.2,
"valid_targets_min": 666
},
{
"epoch": 4.262899262899263,
"grad_norm": 0.9941585620438288,
"learning_rate": 9.40350114629577e-06,
"loss": 0.1209,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12407036870718002,
"step": 1735,
"valid_targets_mean": 940.1,
"valid_targets_min": 693
},
{
"epoch": 4.275184275184275,
"grad_norm": 1.0030585645401304,
"learning_rate": 9.282498242881784e-06,
"loss": 0.1237,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12118291109800339,
"step": 1740,
"valid_targets_mean": 975.1,
"valid_targets_min": 687
},
{
"epoch": 4.287469287469287,
"grad_norm": 0.9567063485416096,
"learning_rate": 9.162043201688517e-06,
"loss": 0.1226,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11729326844215393,
"step": 1745,
"valid_targets_mean": 913.5,
"valid_targets_min": 620
},
{
"epoch": 4.2997542997543,
"grad_norm": 1.093321079367913,
"learning_rate": 9.042142180192596e-06,
"loss": 0.1216,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1232294961810112,
"step": 1750,
"valid_targets_mean": 983.2,
"valid_targets_min": 712
},
{
"epoch": 4.312039312039312,
"grad_norm": 1.0223977302692675,
"learning_rate": 8.92280130754998e-06,
"loss": 0.1176,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12798799574375153,
"step": 1755,
"valid_targets_mean": 936.3,
"valid_targets_min": 683
},
{
"epoch": 4.324324324324325,
"grad_norm": 0.9969902090774578,
"learning_rate": 8.804026684282694e-06,
"loss": 0.1213,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1234356239438057,
"step": 1760,
"valid_targets_mean": 982.8,
"valid_targets_min": 708
},
{
"epoch": 4.336609336609337,
"grad_norm": 1.0513411652135027,
"learning_rate": 8.685824381966975e-06,
"loss": 0.1193,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12967775762081146,
"step": 1765,
"valid_targets_mean": 944.9,
"valid_targets_min": 661
},
{
"epoch": 4.348894348894349,
"grad_norm": 0.9974585088572387,
"learning_rate": 8.568200442922865e-06,
"loss": 0.1194,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11659836024045944,
"step": 1770,
"valid_targets_mean": 987.1,
"valid_targets_min": 624
},
{
"epoch": 4.361179361179361,
"grad_norm": 1.0033626517127674,
"learning_rate": 8.451160879905398e-06,
"loss": 0.1222,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12519827485084534,
"step": 1775,
"valid_targets_mean": 917.5,
"valid_targets_min": 699
},
{
"epoch": 4.3734643734643734,
"grad_norm": 1.067385742275141,
"learning_rate": 8.33471167579717e-06,
"loss": 0.124,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12871462106704712,
"step": 1780,
"valid_targets_mean": 932.5,
"valid_targets_min": 676
},
{
"epoch": 4.385749385749386,
"grad_norm": 1.1028517874182766,
"learning_rate": 8.218858783302566e-06,
"loss": 0.1277,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12894824147224426,
"step": 1785,
"valid_targets_mean": 922.2,
"valid_targets_min": 619
},
{
"epoch": 4.398034398034398,
"grad_norm": 0.9694246238021301,
"learning_rate": 8.103608124643412e-06,
"loss": 0.1171,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10765311866998672,
"step": 1790,
"valid_targets_mean": 901.9,
"valid_targets_min": 719
},
{
"epoch": 4.41031941031941,
"grad_norm": 1.0096991183773807,
"learning_rate": 7.988965591256284e-06,
"loss": 0.1263,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12528453767299652,
"step": 1795,
"valid_targets_mean": 950.7,
"valid_targets_min": 657
},
{
"epoch": 4.422604422604422,
"grad_norm": 0.9966981145704281,
"learning_rate": 7.874937043491331e-06,
"loss": 0.1242,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12716656923294067,
"step": 1800,
"valid_targets_mean": 918.1,
"valid_targets_min": 654
},
{
"epoch": 4.434889434889435,
"grad_norm": 1.0272608704748118,
"learning_rate": 7.761528310312679e-06,
"loss": 0.1211,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11819181591272354,
"step": 1805,
"valid_targets_mean": 931.4,
"valid_targets_min": 658
},
{
"epoch": 4.447174447174447,
"grad_norm": 1.0137561846111187,
"learning_rate": 7.648745189000511e-06,
"loss": 0.118,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1136414185166359,
"step": 1810,
"valid_targets_mean": 912.7,
"valid_targets_min": 607
},
{
"epoch": 4.45945945945946,
"grad_norm": 1.0873501889956356,
"learning_rate": 7.536593444854663e-06,
"loss": 0.1216,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12807413935661316,
"step": 1815,
"valid_targets_mean": 1031.0,
"valid_targets_min": 679
},
{
"epoch": 4.471744471744472,
"grad_norm": 1.0068476190598987,
"learning_rate": 7.4250788108999686e-06,
"loss": 0.1203,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1230648085474968,
"step": 1820,
"valid_targets_mean": 974.1,
"valid_targets_min": 640
},
{
"epoch": 4.484029484029484,
"grad_norm": 1.0433094724916965,
"learning_rate": 7.314206987593162e-06,
"loss": 0.1256,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12586304545402527,
"step": 1825,
"valid_targets_mean": 1011.7,
"valid_targets_min": 656
},
{
"epoch": 4.496314496314496,
"grad_norm": 0.987211648906778,
"learning_rate": 7.203983642531462e-06,
"loss": 0.1221,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12281675636768341,
"step": 1830,
"valid_targets_mean": 994.2,
"valid_targets_min": 619
},
{
"epoch": 4.5085995085995085,
"grad_norm": 1.1029480180851556,
"learning_rate": 7.094414410162913e-06,
"loss": 0.1275,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1318090856075287,
"step": 1835,
"valid_targets_mean": 1020.6,
"valid_targets_min": 687
},
{
"epoch": 4.520884520884521,
"grad_norm": 0.9175739238118313,
"learning_rate": 6.985504891498291e-06,
"loss": 0.1181,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10475833714008331,
"step": 1840,
"valid_targets_mean": 957.6,
"valid_targets_min": 661
},
{
"epoch": 4.533169533169533,
"grad_norm": 0.9770170212372375,
"learning_rate": 6.8772606538248285e-06,
"loss": 0.123,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11924491077661514,
"step": 1845,
"valid_targets_mean": 983.9,
"valid_targets_min": 680
},
{
"epoch": 4.545454545454545,
"grad_norm": 1.1013986620909682,
"learning_rate": 6.769687230421638e-06,
"loss": 0.1274,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13231635093688965,
"step": 1850,
"valid_targets_mean": 905.4,
"valid_targets_min": 621
},
{
"epoch": 4.557739557739557,
"grad_norm": 1.0528293798177313,
"learning_rate": 6.662790120276803e-06,
"loss": 0.1267,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12604084610939026,
"step": 1855,
"valid_targets_mean": 923.2,
"valid_targets_min": 673
},
{
"epoch": 4.57002457002457,
"grad_norm": 1.1158231290547191,
"learning_rate": 6.556574787806344e-06,
"loss": 0.123,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13169091939926147,
"step": 1860,
"valid_targets_mean": 849.7,
"valid_targets_min": 578
},
{
"epoch": 4.582309582309582,
"grad_norm": 1.1024933402787964,
"learning_rate": 6.451046662574831e-06,
"loss": 0.1215,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12675680220127106,
"step": 1865,
"valid_targets_mean": 947.2,
"valid_targets_min": 640
},
{
"epoch": 4.594594594594595,
"grad_norm": 1.0812034920547042,
"learning_rate": 6.346211139017877e-06,
"loss": 0.1233,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12591159343719482,
"step": 1870,
"valid_targets_mean": 866.4,
"valid_targets_min": 645
},
{
"epoch": 4.606879606879607,
"grad_norm": 1.0597747711467687,
"learning_rate": 6.242073576166337e-06,
"loss": 0.1206,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11885634064674377,
"step": 1875,
"valid_targets_mean": 910.1,
"valid_targets_min": 697
},
{
"epoch": 4.61916461916462,
"grad_norm": 1.064538896175725,
"learning_rate": 6.138639297372404e-06,
"loss": 0.119,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12688076496124268,
"step": 1880,
"valid_targets_mean": 991.1,
"valid_targets_min": 703
},
{
"epoch": 4.631449631449631,
"grad_norm": 1.0405836028766522,
"learning_rate": 6.035913590037479e-06,
"loss": 0.1207,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12629568576812744,
"step": 1885,
"valid_targets_mean": 906.2,
"valid_targets_min": 666
},
{
"epoch": 4.643734643734644,
"grad_norm": 1.0590657189064507,
"learning_rate": 5.933901705341851e-06,
"loss": 0.1235,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12081333994865417,
"step": 1890,
"valid_targets_mean": 846.7,
"valid_targets_min": 582
},
{
"epoch": 4.656019656019656,
"grad_norm": 0.9685387980316024,
"learning_rate": 5.832608857976321e-06,
"loss": 0.1188,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12511588633060455,
"step": 1895,
"valid_targets_mean": 1002.4,
"valid_targets_min": 604
},
{
"epoch": 4.6683046683046685,
"grad_norm": 1.0165029943633133,
"learning_rate": 5.732040225875584e-06,
"loss": 0.1228,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12953898310661316,
"step": 1900,
"valid_targets_mean": 964.8,
"valid_targets_min": 683
},
{
"epoch": 4.680589680589681,
"grad_norm": 1.3325503122751237,
"learning_rate": 5.632200949953579e-06,
"loss": 0.1202,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11685385555028915,
"step": 1905,
"valid_targets_mean": 896.8,
"valid_targets_min": 508
},
{
"epoch": 4.6928746928746925,
"grad_norm": 1.0663672971922626,
"learning_rate": 5.533096133840677e-06,
"loss": 0.1197,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11813405156135559,
"step": 1910,
"valid_targets_mean": 959.2,
"valid_targets_min": 624
},
{
"epoch": 4.705159705159705,
"grad_norm": 1.0349308880833799,
"learning_rate": 5.434730843622778e-06,
"loss": 0.1143,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10479003936052322,
"step": 1915,
"valid_targets_mean": 1024.8,
"valid_targets_min": 724
},
{
"epoch": 4.717444717444717,
"grad_norm": 1.0757884754145068,
"learning_rate": 5.337110107582377e-06,
"loss": 0.1197,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12124276906251907,
"step": 1920,
"valid_targets_mean": 1009.4,
"valid_targets_min": 671
},
{
"epoch": 4.72972972972973,
"grad_norm": 0.9995886752697994,
"learning_rate": 5.2402389159414755e-06,
"loss": 0.1222,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1158638596534729,
"step": 1925,
"valid_targets_mean": 925.9,
"valid_targets_min": 631
},
{
"epoch": 4.742014742014742,
"grad_norm": 1.0223521571398488,
"learning_rate": 5.144122220606542e-06,
"loss": 0.1169,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11703236401081085,
"step": 1930,
"valid_targets_mean": 984.0,
"valid_targets_min": 702
},
{
"epoch": 4.754299754299755,
"grad_norm": 1.0404694060373552,
"learning_rate": 5.048764934915349e-06,
"loss": 0.1207,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11991409212350845,
"step": 1935,
"valid_targets_mean": 949.8,
"valid_targets_min": 690
},
{
"epoch": 4.766584766584766,
"grad_norm": 0.9380080260291878,
"learning_rate": 4.954171933385805e-06,
"loss": 0.1173,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11270684748888016,
"step": 1940,
"valid_targets_mean": 948.9,
"valid_targets_min": 664
},
{
"epoch": 4.778869778869779,
"grad_norm": 1.0832637396880282,
"learning_rate": 4.8603480514667836e-06,
"loss": 0.1246,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11928368359804153,
"step": 1945,
"valid_targets_mean": 1022.1,
"valid_targets_min": 725
},
{
"epoch": 4.791154791154791,
"grad_norm": 1.0768971031975272,
"learning_rate": 4.767298085290963e-06,
"loss": 0.1237,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1263669729232788,
"step": 1950,
"valid_targets_mean": 897.8,
"valid_targets_min": 697
},
{
"epoch": 4.803439803439804,
"grad_norm": 0.9702737774903263,
"learning_rate": 4.675026791429624e-06,
"loss": 0.1241,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12418300658464432,
"step": 1955,
"valid_targets_mean": 904.6,
"valid_targets_min": 618
},
{
"epoch": 4.815724815724816,
"grad_norm": 0.9847296992516871,
"learning_rate": 4.583538886649525e-06,
"loss": 0.1193,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11021877080202103,
"step": 1960,
"valid_targets_mean": 935.5,
"valid_targets_min": 652
},
{
"epoch": 4.828009828009828,
"grad_norm": 1.063535643945527,
"learning_rate": 4.492839047671764e-06,
"loss": 0.1285,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13300850987434387,
"step": 1965,
"valid_targets_mean": 922.7,
"valid_targets_min": 654
},
{
"epoch": 4.84029484029484,
"grad_norm": 1.0210663343814104,
"learning_rate": 4.4029319109327465e-06,
"loss": 0.1196,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11711201071739197,
"step": 1970,
"valid_targets_mean": 880.4,
"valid_targets_min": 569
},
{
"epoch": 4.8525798525798525,
"grad_norm": 0.9712468912862798,
"learning_rate": 4.313822072347136e-06,
"loss": 0.119,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12143044173717499,
"step": 1975,
"valid_targets_mean": 999.6,
"valid_targets_min": 744
},
{
"epoch": 4.864864864864865,
"grad_norm": 1.0590420720026885,
"learning_rate": 4.22551408707296e-06,
"loss": 0.1178,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11938400566577911,
"step": 1980,
"valid_targets_mean": 953.6,
"valid_targets_min": 608
},
{
"epoch": 4.877149877149877,
"grad_norm": 0.9655040975370661,
"learning_rate": 4.138012469278714e-06,
"loss": 0.118,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11308306455612183,
"step": 1985,
"valid_targets_mean": 985.0,
"valid_targets_min": 675
},
{
"epoch": 4.88943488943489,
"grad_norm": 0.9853903900388323,
"learning_rate": 4.051321691912649e-06,
"loss": 0.1241,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11850358545780182,
"step": 1990,
"valid_targets_mean": 927.3,
"valid_targets_min": 666
},
{
"epoch": 4.901719901719901,
"grad_norm": 1.136610862996903,
"learning_rate": 3.9654461864740935e-06,
"loss": 0.1209,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11766823381185532,
"step": 1995,
"valid_targets_mean": 1034.9,
"valid_targets_min": 652
},
{
"epoch": 4.914004914004914,
"grad_norm": 1.0345672304711442,
"learning_rate": 3.880390342786915e-06,
"loss": 0.1182,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12373542040586472,
"step": 2000,
"valid_targets_mean": 928.6,
"valid_targets_min": 683
},
{
"epoch": 4.926289926289926,
"grad_norm": 0.9310707915085795,
"learning_rate": 3.7961585087751516e-06,
"loss": 0.1217,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10688027739524841,
"step": 2005,
"valid_targets_mean": 1003.1,
"valid_targets_min": 639
},
{
"epoch": 4.938574938574939,
"grad_norm": 1.0010633378479221,
"learning_rate": 3.71275499024071e-06,
"loss": 0.1202,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12126513570547104,
"step": 2010,
"valid_targets_mean": 929.6,
"valid_targets_min": 700
},
{
"epoch": 4.950859950859951,
"grad_norm": 0.998842822620143,
"learning_rate": 3.6301840506433083e-06,
"loss": 0.1173,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11304894089698792,
"step": 2015,
"valid_targets_mean": 968.3,
"valid_targets_min": 672
},
{
"epoch": 4.963144963144963,
"grad_norm": 1.0608010526163005,
"learning_rate": 3.5484499108824853e-06,
"loss": 0.1193,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12789671123027802,
"step": 2020,
"valid_targets_mean": 950.9,
"valid_targets_min": 662
},
{
"epoch": 4.975429975429975,
"grad_norm": 1.1814620072009632,
"learning_rate": 3.4675567490818727e-06,
"loss": 0.1284,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13000145554542542,
"step": 2025,
"valid_targets_mean": 998.1,
"valid_targets_min": 600
},
{
"epoch": 4.987714987714988,
"grad_norm": 0.9757619779263266,
"learning_rate": 3.3875087003756036e-06,
"loss": 0.1197,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12253376841545105,
"step": 2030,
"valid_targets_mean": 1016.6,
"valid_targets_min": 642
},
{
"epoch": 5.0,
"grad_norm": 0.985373116622916,
"learning_rate": 3.30830985669691e-06,
"loss": 0.1194,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10901984572410583,
"step": 2035,
"valid_targets_mean": 903.1,
"valid_targets_min": 603
},
{
"epoch": 5.012285012285012,
"grad_norm": 0.898249950933453,
"learning_rate": 3.22996426656899e-06,
"loss": 0.1119,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1041560173034668,
"step": 2040,
"valid_targets_mean": 1034.1,
"valid_targets_min": 649
},
{
"epoch": 5.024570024570025,
"grad_norm": 0.9657629403750108,
"learning_rate": 3.1524759348980096e-06,
"loss": 0.1107,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10987867414951324,
"step": 2045,
"valid_targets_mean": 975.7,
"valid_targets_min": 668
},
{
"epoch": 5.036855036855036,
"grad_norm": 0.9916984065270475,
"learning_rate": 3.0758488227684212e-06,
"loss": 0.109,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11240162700414658,
"step": 2050,
"valid_targets_mean": 850.5,
"valid_targets_min": 546
},
{
"epoch": 5.049140049140049,
"grad_norm": 1.0292386491980945,
"learning_rate": 3.0000868472404423e-06,
"loss": 0.1125,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10477884113788605,
"step": 2055,
"valid_targets_mean": 900.2,
"valid_targets_min": 666
},
{
"epoch": 5.061425061425061,
"grad_norm": 1.0337177621260107,
"learning_rate": 2.9251938811498436e-06,
"loss": 0.1087,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10738644003868103,
"step": 2060,
"valid_targets_mean": 907.1,
"valid_targets_min": 571
},
{
"epoch": 5.073710073710074,
"grad_norm": 0.9841850019109009,
"learning_rate": 2.8511737529099704e-06,
"loss": 0.1096,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1120748221874237,
"step": 2065,
"valid_targets_mean": 992.1,
"valid_targets_min": 696
},
{
"epoch": 5.085995085995086,
"grad_norm": 1.0628658129689879,
"learning_rate": 2.7780302463160235e-06,
"loss": 0.1099,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1170399859547615,
"step": 2070,
"valid_targets_mean": 849.2,
"valid_targets_min": 707
},
{
"epoch": 5.098280098280099,
"grad_norm": 1.080297138638144,
"learning_rate": 2.705767100351673e-06,
"loss": 0.1091,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10279746353626251,
"step": 2075,
"valid_targets_mean": 891.6,
"valid_targets_min": 564
},
{
"epoch": 5.11056511056511,
"grad_norm": 1.0571562491669322,
"learning_rate": 2.634388008997899e-06,
"loss": 0.1101,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11071351170539856,
"step": 2080,
"valid_targets_mean": 887.2,
"valid_targets_min": 658
},
{
"epoch": 5.122850122850123,
"grad_norm": 0.9638229013862132,
"learning_rate": 2.5638966210441597e-06,
"loss": 0.1051,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1005311831831932,
"step": 2085,
"valid_targets_mean": 962.9,
"valid_targets_min": 680
},
{
"epoch": 5.135135135135135,
"grad_norm": 1.0067721680943875,
"learning_rate": 2.4942965399018926e-06,
"loss": 0.1099,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11731775104999542,
"step": 2090,
"valid_targets_mean": 978.4,
"valid_targets_min": 477
},
{
"epoch": 5.1474201474201475,
"grad_norm": 1.0328796742285484,
"learning_rate": 2.425591323420289e-06,
"loss": 0.1102,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1096126064658165,
"step": 2095,
"valid_targets_mean": 954.4,
"valid_targets_min": 725
},
{
"epoch": 5.15970515970516,
"grad_norm": 1.0933867172427512,
"learning_rate": 2.357784483704444e-06,
"loss": 0.1084,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11263054609298706,
"step": 2100,
"valid_targets_mean": 874.9,
"valid_targets_min": 618
},
{
"epoch": 5.171990171990172,
"grad_norm": 1.0757324206967882,
"learning_rate": 2.2908794869358044e-06,
"loss": 0.1069,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10877275466918945,
"step": 2105,
"valid_targets_mean": 894.8,
"valid_targets_min": 621
},
{
"epoch": 5.184275184275184,
"grad_norm": 1.016487012616333,
"learning_rate": 2.2248797531949952e-06,
"loss": 0.1103,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10262925177812576,
"step": 2110,
"valid_targets_mean": 939.4,
"valid_targets_min": 600
},
{
"epoch": 5.196560196560196,
"grad_norm": 1.0146263893123428,
"learning_rate": 2.1597886562869917e-06,
"loss": 0.1093,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1074049323797226,
"step": 2115,
"valid_targets_mean": 925.9,
"valid_targets_min": 609
},
{
"epoch": 5.208845208845209,
"grad_norm": 1.0056972893991754,
"learning_rate": 2.095609523568638e-06,
"loss": 0.1085,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10091559588909149,
"step": 2120,
"valid_targets_mean": 1022.2,
"valid_targets_min": 725
},
{
"epoch": 5.221130221130221,
"grad_norm": 1.0358191262515057,
"learning_rate": 2.0323456357785855e-06,
"loss": 0.109,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1039903312921524,
"step": 2125,
"valid_targets_mean": 951.8,
"valid_targets_min": 670
},
{
"epoch": 5.233415233415234,
"grad_norm": 1.0134901577637836,
"learning_rate": 1.970000226869553e-06,
"loss": 0.1063,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10696413367986679,
"step": 2130,
"valid_targets_mean": 885.4,
"valid_targets_min": 675
},
{
"epoch": 5.245700245700245,
"grad_norm": 1.024498628158875,
"learning_rate": 1.90857648384305e-06,
"loss": 0.106,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10174258053302765,
"step": 2135,
"valid_targets_mean": 937.8,
"valid_targets_min": 695
},
{
"epoch": 5.257985257985258,
"grad_norm": 1.0778598519574534,
"learning_rate": 1.848077546586431e-06,
"loss": 0.1066,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10440458357334137,
"step": 2140,
"valid_targets_mean": 919.8,
"valid_targets_min": 585
},
{
"epoch": 5.27027027027027,
"grad_norm": 1.0810321423664624,
"learning_rate": 1.7885065077123976e-06,
"loss": 0.1086,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1029202789068222,
"step": 2145,
"valid_targets_mean": 937.9,
"valid_targets_min": 652
},
{
"epoch": 5.282555282555283,
"grad_norm": 1.0163320736086876,
"learning_rate": 1.7298664124009245e-06,
"loss": 0.1119,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10604019463062286,
"step": 2150,
"valid_targets_mean": 922.1,
"valid_targets_min": 651
},
{
"epoch": 5.294840294840295,
"grad_norm": 1.0436926280950187,
"learning_rate": 1.672160258243567e-06,
"loss": 0.1081,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10767054557800293,
"step": 2155,
"valid_targets_mean": 910.1,
"valid_targets_min": 577
},
{
"epoch": 5.3071253071253075,
"grad_norm": 1.025380052621418,
"learning_rate": 1.615390995090258e-06,
"loss": 0.1071,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11412075161933899,
"step": 2160,
"valid_targets_mean": 907.0,
"valid_targets_min": 694
},
{
"epoch": 5.319410319410319,
"grad_norm": 1.06066295062257,
"learning_rate": 1.559561524898492e-06,
"loss": 0.1094,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10931138694286346,
"step": 2165,
"valid_targets_mean": 950.4,
"valid_targets_min": 697
},
{
"epoch": 5.3316953316953315,
"grad_norm": 1.0290583672325067,
"learning_rate": 1.5046747015849893e-06,
"loss": 0.1078,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1094687283039093,
"step": 2170,
"valid_targets_mean": 1013.2,
"valid_targets_min": 730
},
{
"epoch": 5.343980343980344,
"grad_norm": 0.9748950643645085,
"learning_rate": 1.4507333308798255e-06,
"loss": 0.1126,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09912136197090149,
"step": 2175,
"valid_targets_mean": 875.9,
"valid_targets_min": 604
},
{
"epoch": 5.356265356265356,
"grad_norm": 0.99285365978212,
"learning_rate": 1.3977401701829752e-06,
"loss": 0.1001,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09728066623210907,
"step": 2180,
"valid_targets_mean": 969.6,
"valid_targets_min": 669
},
{
"epoch": 5.368550368550369,
"grad_norm": 0.9843852113753192,
"learning_rate": 1.345697928423384e-06,
"loss": 0.108,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10213703662157059,
"step": 2185,
"valid_targets_mean": 1037.0,
"valid_targets_min": 753
},
{
"epoch": 5.38083538083538,
"grad_norm": 1.1134497136798602,
"learning_rate": 1.2946092659204767e-06,
"loss": 0.1105,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11370056867599487,
"step": 2190,
"valid_targets_mean": 860.1,
"valid_targets_min": 626
},
{
"epoch": 5.393120393120393,
"grad_norm": 1.0604549866907864,
"learning_rate": 1.244476794248175e-06,
"loss": 0.1072,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10983552038669586,
"step": 2195,
"valid_targets_mean": 963.8,
"valid_targets_min": 690
},
{
"epoch": 5.405405405405405,
"grad_norm": 1.0695198729391404,
"learning_rate": 1.1953030761014017e-06,
"loss": 0.1056,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1127047911286354,
"step": 2200,
"valid_targets_mean": 917.5,
"valid_targets_min": 587
},
{
"epoch": 5.417690417690418,
"grad_norm": 1.0678405165116567,
"learning_rate": 1.147090625165055e-06,
"loss": 0.1047,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.106284499168396,
"step": 2205,
"valid_targets_mean": 926.1,
"valid_targets_min": 650
},
{
"epoch": 5.42997542997543,
"grad_norm": 0.9735583312512783,
"learning_rate": 1.0998419059855503e-06,
"loss": 0.11,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09887248277664185,
"step": 2210,
"valid_targets_mean": 907.0,
"valid_targets_min": 693
},
{
"epoch": 5.442260442260443,
"grad_norm": 1.0424786244158735,
"learning_rate": 1.053559333844798e-06,
"loss": 0.1078,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1106153056025505,
"step": 2215,
"valid_targets_mean": 814.6,
"valid_targets_min": 619
},
{
"epoch": 5.454545454545454,
"grad_norm": 1.0499265026728355,
"learning_rate": 1.0082452746367721e-06,
"loss": 0.1052,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1073426604270935,
"step": 2220,
"valid_targets_mean": 964.6,
"valid_targets_min": 695
},
{
"epoch": 5.466830466830467,
"grad_norm": 0.9821498811128075,
"learning_rate": 9.639020447465475e-07,
"loss": 0.105,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10381324589252472,
"step": 2225,
"valid_targets_mean": 964.9,
"valid_targets_min": 705
},
{
"epoch": 5.479115479115479,
"grad_norm": 1.0016907393710126,
"learning_rate": 9.205319109318922e-07,
"loss": 0.1045,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10114198923110962,
"step": 2230,
"valid_targets_mean": 944.8,
"valid_targets_min": 682
},
{
"epoch": 5.4914004914004915,
"grad_norm": 0.9641568696889248,
"learning_rate": 8.781370902074049e-07,
"loss": 0.104,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10037557780742645,
"step": 2235,
"valid_targets_mean": 1001.0,
"valid_targets_min": 647
},
{
"epoch": 5.503685503685504,
"grad_norm": 0.9594456923036004,
"learning_rate": 8.367197497311719e-07,
"loss": 0.1046,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1010562852025032,
"step": 2240,
"valid_targets_mean": 1058.5,
"valid_targets_min": 723
},
{
"epoch": 5.515970515970516,
"grad_norm": 1.0378146778929243,
"learning_rate": 7.962820066939958e-07,
"loss": 0.1104,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10684297233819962,
"step": 2245,
"valid_targets_mean": 934.9,
"valid_targets_min": 651
},
{
"epoch": 5.528255528255528,
"grad_norm": 1.0414454051454436,
"learning_rate": 7.568259282111645e-07,
"loss": 0.1079,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11059882491827011,
"step": 2250,
"valid_targets_mean": 951.8,
"valid_targets_min": 525
},
{
"epoch": 5.54054054054054,
"grad_norm": 1.0679202374122483,
"learning_rate": 7.183535312167755e-07,
"loss": 0.1132,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11686760187149048,
"step": 2255,
"valid_targets_mean": 963.2,
"valid_targets_min": 711
},
{
"epoch": 5.552825552825553,
"grad_norm": 0.9421516676878753,
"learning_rate": 6.808667823606474e-07,
"loss": 0.1073,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09937810152769089,
"step": 2260,
"valid_targets_mean": 953.2,
"valid_targets_min": 633
},
{
"epoch": 5.565110565110565,
"grad_norm": 1.1326899561762167,
"learning_rate": 6.443675979077779e-07,
"loss": 0.1055,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11326974630355835,
"step": 2265,
"valid_targets_mean": 913.2,
"valid_targets_min": 648
},
{
"epoch": 5.577395577395578,
"grad_norm": 1.0305503917388301,
"learning_rate": 6.088578436403847e-07,
"loss": 0.1047,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10154630988836288,
"step": 2270,
"valid_targets_mean": 897.8,
"valid_targets_min": 722
},
{
"epoch": 5.58968058968059,
"grad_norm": 1.1943199506211184,
"learning_rate": 5.743393347625436e-07,
"loss": 0.1077,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11854109913110733,
"step": 2275,
"valid_targets_mean": 1039.3,
"valid_targets_min": 682
},
{
"epoch": 5.601965601965602,
"grad_norm": 0.9707605132234587,
"learning_rate": 5.408138358073833e-07,
"loss": 0.1068,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10274074226617813,
"step": 2280,
"valid_targets_mean": 1006.6,
"valid_targets_min": 651
},
{
"epoch": 5.614250614250614,
"grad_norm": 1.0265651098429756,
"learning_rate": 5.082830605468969e-07,
"loss": 0.1106,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10965031385421753,
"step": 2285,
"valid_targets_mean": 1016.5,
"valid_targets_min": 621
},
{
"epoch": 5.6265356265356266,
"grad_norm": 0.973625595592468,
"learning_rate": 4.767486719043235e-07,
"loss": 0.1111,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11382775008678436,
"step": 2290,
"valid_targets_mean": 1066.2,
"valid_targets_min": 618
},
{
"epoch": 5.638820638820639,
"grad_norm": 1.0117380057721537,
"learning_rate": 4.4621228186915833e-07,
"loss": 0.1048,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10289210826158524,
"step": 2295,
"valid_targets_mean": 1000.9,
"valid_targets_min": 663
},
{
"epoch": 5.651105651105651,
"grad_norm": 1.0474838253089391,
"learning_rate": 4.166754514147275e-07,
"loss": 0.1118,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10855976492166519,
"step": 2300,
"valid_targets_mean": 955.1,
"valid_targets_min": 694
},
{
"epoch": 5.663390663390663,
"grad_norm": 0.98024038477301,
"learning_rate": 3.881396904184231e-07,
"loss": 0.1077,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11711513996124268,
"step": 2305,
"valid_targets_mean": 1028.2,
"valid_targets_min": 682
},
{
"epoch": 5.675675675675675,
"grad_norm": 0.9692492106629949,
"learning_rate": 3.6060645758449584e-07,
"loss": 0.1034,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09136329591274261,
"step": 2310,
"valid_targets_mean": 895.3,
"valid_targets_min": 729
},
{
"epoch": 5.687960687960688,
"grad_norm": 1.249363779180284,
"learning_rate": 3.34077160369497e-07,
"loss": 0.1087,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11524377018213272,
"step": 2315,
"valid_targets_mean": 924.3,
"valid_targets_min": 648
},
{
"epoch": 5.7002457002457,
"grad_norm": 0.9650264170829322,
"learning_rate": 3.08553154910336e-07,
"loss": 0.1086,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10062923282384872,
"step": 2320,
"valid_targets_mean": 936.2,
"valid_targets_min": 697
},
{
"epoch": 5.712530712530713,
"grad_norm": 1.0905576626909141,
"learning_rate": 2.840357459549492e-07,
"loss": 0.1048,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11498048901557922,
"step": 2325,
"valid_targets_mean": 945.2,
"valid_targets_min": 673
},
{
"epoch": 5.724815724815725,
"grad_norm": 1.0784679939229243,
"learning_rate": 2.6052618679560884e-07,
"loss": 0.1076,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1087181344628334,
"step": 2330,
"valid_targets_mean": 878.7,
"valid_targets_min": 586
},
{
"epoch": 5.737100737100737,
"grad_norm": 1.109625440357797,
"learning_rate": 2.380256792048541e-07,
"loss": 0.1045,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10212244093418121,
"step": 2335,
"valid_targets_mean": 857.2,
"valid_targets_min": 650
},
{
"epoch": 5.749385749385749,
"grad_norm": 1.1038910625984446,
"learning_rate": 2.1653537337405383e-07,
"loss": 0.1057,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10803759843111038,
"step": 2340,
"valid_targets_mean": 878.2,
"valid_targets_min": 683
},
{
"epoch": 5.761670761670762,
"grad_norm": 0.9881133477924188,
"learning_rate": 1.9605636785462234e-07,
"loss": 0.1046,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10298296809196472,
"step": 2345,
"valid_targets_mean": 1027.2,
"valid_targets_min": 647
},
{
"epoch": 5.773955773955774,
"grad_norm": 1.0253155264535176,
"learning_rate": 1.7658970950185095e-07,
"loss": 0.1046,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10427086055278778,
"step": 2350,
"valid_targets_mean": 961.8,
"valid_targets_min": 627
},
{
"epoch": 5.7862407862407865,
"grad_norm": 1.0906096465506647,
"learning_rate": 1.5813639342140197e-07,
"loss": 0.1038,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10633835196495056,
"step": 2355,
"valid_targets_mean": 1000.7,
"valid_targets_min": 559
},
{
"epoch": 5.798525798525798,
"grad_norm": 1.8182311216583416,
"learning_rate": 1.4069736291843605e-07,
"loss": 0.1071,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10948808491230011,
"step": 2360,
"valid_targets_mean": 982.2,
"valid_targets_min": 638
},
{
"epoch": 5.8108108108108105,
"grad_norm": 1.0293292710364181,
"learning_rate": 1.242735094493952e-07,
"loss": 0.1105,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11027572304010391,
"step": 2365,
"valid_targets_mean": 936.6,
"valid_targets_min": 709
},
{
"epoch": 5.823095823095823,
"grad_norm": 0.9852029775566562,
"learning_rate": 1.0886567257643033e-07,
"loss": 0.1078,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1116725355386734,
"step": 2370,
"valid_targets_mean": 999.9,
"valid_targets_min": 680
},
{
"epoch": 5.835380835380835,
"grad_norm": 1.0578402030344896,
"learning_rate": 9.447463992448891e-08,
"loss": 0.1073,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10911673307418823,
"step": 2375,
"valid_targets_mean": 1026.6,
"valid_targets_min": 728
},
{
"epoch": 5.847665847665848,
"grad_norm": 1.0386635253743162,
"learning_rate": 8.110114714104277e-08,
"loss": 0.1061,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10054969787597656,
"step": 2380,
"valid_targets_mean": 959.7,
"valid_targets_min": 763
},
{
"epoch": 5.85995085995086,
"grad_norm": 1.0529356284674416,
"learning_rate": 6.874587785849152e-08,
"loss": 0.1027,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10330841690301895,
"step": 2385,
"valid_targets_mean": 943.1,
"valid_targets_min": 563
},
{
"epoch": 5.872235872235873,
"grad_norm": 1.0438404646255461,
"learning_rate": 5.7409463659219286e-08,
"loss": 0.1087,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10719597339630127,
"step": 2390,
"valid_targets_mean": 958.3,
"valid_targets_min": 639
},
{
"epoch": 5.884520884520884,
"grad_norm": 1.0624307976403193,
"learning_rate": 4.709248404329625e-08,
"loss": 0.1069,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10844609141349792,
"step": 2395,
"valid_targets_mean": 888.2,
"valid_targets_min": 620
},
{
"epoch": 5.896805896805897,
"grad_norm": 1.0904697662506537,
"learning_rate": 3.7795466398868885e-08,
"loss": 0.1077,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10921336710453033,
"step": 2400,
"valid_targets_mean": 914.4,
"valid_targets_min": 633
},
{
"epoch": 5.909090909090909,
"grad_norm": 1.0358904150466581,
"learning_rate": 2.9518885975192702e-08,
"loss": 0.1084,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10594348609447479,
"step": 2405,
"valid_targets_mean": 927.4,
"valid_targets_min": 655
},
{
"epoch": 5.921375921375922,
"grad_norm": 1.1266522336395215,
"learning_rate": 2.226316585833832e-08,
"loss": 0.1112,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1268506497144699,
"step": 2410,
"valid_targets_mean": 949.9,
"valid_targets_min": 538
},
{
"epoch": 5.933660933660933,
"grad_norm": 1.1007783479652495,
"learning_rate": 1.6028676949570997e-08,
"loss": 0.1126,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10957098007202148,
"step": 2415,
"valid_targets_mean": 867.8,
"valid_targets_min": 697
},
{
"epoch": 5.945945945945946,
"grad_norm": 1.0215671203230696,
"learning_rate": 1.0815737946383575e-08,
"loss": 0.1053,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10414718091487885,
"step": 2420,
"valid_targets_mean": 981.7,
"valid_targets_min": 666
},
{
"epoch": 5.958230958230958,
"grad_norm": 1.0154249846900063,
"learning_rate": 6.624615326207284e-09,
"loss": 0.1083,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10006681829690933,
"step": 2425,
"valid_targets_mean": 848.8,
"valid_targets_min": 575
},
{
"epoch": 5.9705159705159705,
"grad_norm": 1.0008218944709328,
"learning_rate": 3.4555233327893124e-09,
"loss": 0.1123,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09888888895511627,
"step": 2430,
"valid_targets_mean": 907.8,
"valid_targets_min": 720
},
{
"epoch": 5.982800982800983,
"grad_norm": 1.0056204830892612,
"learning_rate": 1.3086239652415621e-09,
"loss": 0.1021,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09257914125919342,
"step": 2435,
"valid_targets_mean": 986.1,
"valid_targets_min": 715
},
{
"epoch": 5.995085995085995,
"grad_norm": 1.1057817949511841,
"learning_rate": 1.840269697628294e-10,
"loss": 0.1083,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11148717999458313,
"step": 2440,
"valid_targets_mean": 902.6,
"valid_targets_min": 663
},
{
"epoch": 6.0,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10436549037694931,
"step": 2442,
"total_flos": 197844182237184.0,
"train_loss": 0.18043228318757643,
"train_runtime": 7422.9369,
"train_samples_per_second": 5.256,
"train_steps_per_second": 0.329,
"valid_targets_mean": 1001.2,
"valid_targets_min": 670
}
],
"logging_steps": 5,
"max_steps": 2442,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 197844182237184.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}