diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,5415 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 6.0, + "eval_steps": 500, + "global_step": 2442, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.012285012285012284, + "grad_norm": 19.48494191330693, + "learning_rate": 6.530612244897961e-07, + "loss": 1.0196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.995261013507843, + "step": 5, + "valid_targets_mean": 953.3, + "valid_targets_min": 648 + }, + { + "epoch": 0.02457002457002457, + "grad_norm": 16.973569102036848, + "learning_rate": 1.469387755102041e-06, + "loss": 0.9788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.9372879266738892, + "step": 10, + "valid_targets_mean": 1108.8, + "valid_targets_min": 784 + }, + { + "epoch": 0.036855036855036855, + "grad_norm": 15.639368583656532, + "learning_rate": 2.285714285714286e-06, + "loss": 0.9591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.9503127336502075, + "step": 15, + "valid_targets_mean": 988.2, + "valid_targets_min": 739 + }, + { + "epoch": 0.04914004914004914, + "grad_norm": 10.514490782159397, + "learning_rate": 3.1020408163265307e-06, + "loss": 0.8599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.8373680710792542, + "step": 20, + "valid_targets_mean": 939.6, + "valid_targets_min": 625 + }, + { + "epoch": 0.06142506142506143, + "grad_norm": 6.7065545820405665, + "learning_rate": 3.9183673469387755e-06, + "loss": 0.7771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.7272936701774597, + "step": 25, + "valid_targets_mean": 862.8, + "valid_targets_min": 691 + }, + { + "epoch": 0.07371007371007371, + "grad_norm": 4.7134759825293955, + "learning_rate": 4.734693877551021e-06, + "loss": 0.6692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6532790660858154, + "step": 30, + "valid_targets_mean": 870.2, + "valid_targets_min": 568 + }, + { + "epoch": 0.085995085995086, + "grad_norm": 3.1810226037793567, + "learning_rate": 5.551020408163266e-06, + "loss": 0.618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5970568060874939, + "step": 35, + "valid_targets_mean": 860.2, + "valid_targets_min": 654 + }, + { + "epoch": 0.09828009828009827, + "grad_norm": 2.2072856666896485, + "learning_rate": 6.36734693877551e-06, + "loss": 0.5438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5087846517562866, + "step": 40, + "valid_targets_mean": 896.3, + "valid_targets_min": 574 + }, + { + "epoch": 0.11056511056511056, + "grad_norm": 1.9513850751635282, + "learning_rate": 7.183673469387755e-06, + "loss": 0.4908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.47997918725013733, + "step": 45, + "valid_targets_mean": 866.0, + "valid_targets_min": 531 + }, + { + "epoch": 0.12285012285012285, + "grad_norm": 1.779935239542962, + "learning_rate": 8.000000000000001e-06, + "loss": 0.4639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44255393743515015, + "step": 50, + "valid_targets_mean": 908.8, + "valid_targets_min": 657 + }, + { + "epoch": 0.13513513513513514, + "grad_norm": 1.5653434999193199, + "learning_rate": 8.816326530612247e-06, + "loss": 0.4245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4232107996940613, + "step": 55, + "valid_targets_mean": 962.6, + "valid_targets_min": 620 + }, + { + "epoch": 0.14742014742014742, + "grad_norm": 1.5087785328600123, + "learning_rate": 9.63265306122449e-06, + "loss": 0.3996, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3812786042690277, + "step": 60, + "valid_targets_mean": 962.4, + "valid_targets_min": 604 + }, + { + "epoch": 0.1597051597051597, + "grad_norm": 1.5312228956706806, + "learning_rate": 1.0448979591836737e-05, + "loss": 0.3733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36668089032173157, + "step": 65, + "valid_targets_mean": 926.8, + "valid_targets_min": 624 + }, + { + "epoch": 0.171990171990172, + "grad_norm": 1.3789570756738234, + "learning_rate": 1.126530612244898e-05, + "loss": 0.3635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35279956459999084, + "step": 70, + "valid_targets_mean": 1032.7, + "valid_targets_min": 652 + }, + { + "epoch": 0.18427518427518427, + "grad_norm": 1.3210121581543464, + "learning_rate": 1.2081632653061225e-05, + "loss": 0.3491, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35165709257125854, + "step": 75, + "valid_targets_mean": 948.1, + "valid_targets_min": 661 + }, + { + "epoch": 0.19656019656019655, + "grad_norm": 1.3570768829251245, + "learning_rate": 1.2897959183673469e-05, + "loss": 0.3245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3350982069969177, + "step": 80, + "valid_targets_mean": 962.0, + "valid_targets_min": 712 + }, + { + "epoch": 0.20884520884520885, + "grad_norm": 1.1835417091167943, + "learning_rate": 1.3714285714285716e-05, + "loss": 0.3245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3199244737625122, + "step": 85, + "valid_targets_mean": 928.4, + "valid_targets_min": 590 + }, + { + "epoch": 0.22113022113022113, + "grad_norm": 1.2207437844630713, + "learning_rate": 1.4530612244897961e-05, + "loss": 0.3054, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29469019174575806, + "step": 90, + "valid_targets_mean": 924.2, + "valid_targets_min": 649 + }, + { + "epoch": 0.2334152334152334, + "grad_norm": 1.2072738228334028, + "learning_rate": 1.5346938775510204e-05, + "loss": 0.3124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3128836154937744, + "step": 95, + "valid_targets_mean": 992.2, + "valid_targets_min": 631 + }, + { + "epoch": 0.2457002457002457, + "grad_norm": 1.2368210765633527, + "learning_rate": 1.616326530612245e-05, + "loss": 0.3088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29172301292419434, + "step": 100, + "valid_targets_mean": 955.3, + "valid_targets_min": 656 + }, + { + "epoch": 0.257985257985258, + "grad_norm": 1.2901546371584578, + "learning_rate": 1.6979591836734695e-05, + "loss": 0.3117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30096760392189026, + "step": 105, + "valid_targets_mean": 922.8, + "valid_targets_min": 711 + }, + { + "epoch": 0.2702702702702703, + "grad_norm": 1.1619287144692585, + "learning_rate": 1.779591836734694e-05, + "loss": 0.2942, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3181251287460327, + "step": 110, + "valid_targets_mean": 1017.2, + "valid_targets_min": 698 + }, + { + "epoch": 0.28255528255528256, + "grad_norm": 1.1354753438215313, + "learning_rate": 1.8612244897959185e-05, + "loss": 0.2809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2735210657119751, + "step": 115, + "valid_targets_mean": 1038.6, + "valid_targets_min": 740 + }, + { + "epoch": 0.29484029484029484, + "grad_norm": 1.2766170739852154, + "learning_rate": 1.942857142857143e-05, + "loss": 0.2901, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30684512853622437, + "step": 120, + "valid_targets_mean": 908.9, + "valid_targets_min": 656 + }, + { + "epoch": 0.3071253071253071, + "grad_norm": 1.1321457276246412, + "learning_rate": 2.0244897959183672e-05, + "loss": 0.2931, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2833458185195923, + "step": 125, + "valid_targets_mean": 973.4, + "valid_targets_min": 686 + }, + { + "epoch": 0.3194103194103194, + "grad_norm": 1.3525378529895846, + "learning_rate": 2.106122448979592e-05, + "loss": 0.289, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28936123847961426, + "step": 130, + "valid_targets_mean": 822.1, + "valid_targets_min": 663 + }, + { + "epoch": 0.3316953316953317, + "grad_norm": 1.3198522548302123, + "learning_rate": 2.1877551020408166e-05, + "loss": 0.2901, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30741700530052185, + "step": 135, + "valid_targets_mean": 972.9, + "valid_targets_min": 674 + }, + { + "epoch": 0.343980343980344, + "grad_norm": 1.1665377558503693, + "learning_rate": 2.269387755102041e-05, + "loss": 0.2925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32794955372810364, + "step": 140, + "valid_targets_mean": 1073.8, + "valid_targets_min": 637 + }, + { + "epoch": 0.35626535626535627, + "grad_norm": 1.1034070025878784, + "learning_rate": 2.3510204081632656e-05, + "loss": 0.2661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2596943974494934, + "step": 145, + "valid_targets_mean": 986.8, + "valid_targets_min": 704 + }, + { + "epoch": 0.36855036855036855, + "grad_norm": 1.1648155020059545, + "learning_rate": 2.4326530612244898e-05, + "loss": 0.2763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28700608015060425, + "step": 150, + "valid_targets_mean": 985.8, + "valid_targets_min": 646 + }, + { + "epoch": 0.3808353808353808, + "grad_norm": 1.1608740590797577, + "learning_rate": 2.5142857142857143e-05, + "loss": 0.2699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26809966564178467, + "step": 155, + "valid_targets_mean": 908.0, + "valid_targets_min": 700 + }, + { + "epoch": 0.3931203931203931, + "grad_norm": 1.2038707122717154, + "learning_rate": 2.5959183673469392e-05, + "loss": 0.2861, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2775510251522064, + "step": 160, + "valid_targets_mean": 965.7, + "valid_targets_min": 683 + }, + { + "epoch": 0.40540540540540543, + "grad_norm": 1.2941106922031897, + "learning_rate": 2.6775510204081637e-05, + "loss": 0.2669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2653312385082245, + "step": 165, + "valid_targets_mean": 894.4, + "valid_targets_min": 713 + }, + { + "epoch": 0.4176904176904177, + "grad_norm": 1.2060242790377764, + "learning_rate": 2.7591836734693882e-05, + "loss": 0.2697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.279617577791214, + "step": 170, + "valid_targets_mean": 972.8, + "valid_targets_min": 598 + }, + { + "epoch": 0.42997542997543, + "grad_norm": 1.0840840161259355, + "learning_rate": 2.8408163265306124e-05, + "loss": 0.2756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26907873153686523, + "step": 175, + "valid_targets_mean": 998.5, + "valid_targets_min": 711 + }, + { + "epoch": 0.44226044226044225, + "grad_norm": 1.256645843082882, + "learning_rate": 2.922448979591837e-05, + "loss": 0.2766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28155115246772766, + "step": 180, + "valid_targets_mean": 916.9, + "valid_targets_min": 721 + }, + { + "epoch": 0.45454545454545453, + "grad_norm": 1.1081969066979958, + "learning_rate": 3.0040816326530614e-05, + "loss": 0.2639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2645479440689087, + "step": 185, + "valid_targets_mean": 936.2, + "valid_targets_min": 516 + }, + { + "epoch": 0.4668304668304668, + "grad_norm": 1.5408533860486011, + "learning_rate": 3.085714285714286e-05, + "loss": 0.265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27647218108177185, + "step": 190, + "valid_targets_mean": 980.4, + "valid_targets_min": 619 + }, + { + "epoch": 0.47911547911547914, + "grad_norm": 1.1901708966642726, + "learning_rate": 3.1673469387755105e-05, + "loss": 0.2609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2448381930589676, + "step": 195, + "valid_targets_mean": 917.9, + "valid_targets_min": 753 + }, + { + "epoch": 0.4914004914004914, + "grad_norm": 1.1723517034459403, + "learning_rate": 3.2489795918367346e-05, + "loss": 0.2528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22118034958839417, + "step": 200, + "valid_targets_mean": 900.4, + "valid_targets_min": 618 + }, + { + "epoch": 0.5036855036855037, + "grad_norm": 1.2691807590013902, + "learning_rate": 3.3306122448979595e-05, + "loss": 0.2714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2686021625995636, + "step": 205, + "valid_targets_mean": 956.2, + "valid_targets_min": 601 + }, + { + "epoch": 0.515970515970516, + "grad_norm": 1.2271107795623217, + "learning_rate": 3.4122448979591843e-05, + "loss": 0.2596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25188225507736206, + "step": 210, + "valid_targets_mean": 925.4, + "valid_targets_min": 716 + }, + { + "epoch": 0.5282555282555282, + "grad_norm": 1.5888797236955163, + "learning_rate": 3.4938775510204085e-05, + "loss": 0.2556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24855633080005646, + "step": 215, + "valid_targets_mean": 951.9, + "valid_targets_min": 727 + }, + { + "epoch": 0.5405405405405406, + "grad_norm": 1.3275098670748375, + "learning_rate": 3.575510204081633e-05, + "loss": 0.2531, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2647552490234375, + "step": 220, + "valid_targets_mean": 987.1, + "valid_targets_min": 783 + }, + { + "epoch": 0.5528255528255528, + "grad_norm": 1.0828068510228948, + "learning_rate": 3.6571428571428576e-05, + "loss": 0.2497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23967652022838593, + "step": 225, + "valid_targets_mean": 926.5, + "valid_targets_min": 696 + }, + { + "epoch": 0.5651105651105651, + "grad_norm": 1.1445169366996504, + "learning_rate": 3.738775510204082e-05, + "loss": 0.2452, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2608412504196167, + "step": 230, + "valid_targets_mean": 958.7, + "valid_targets_min": 705 + }, + { + "epoch": 0.5773955773955773, + "grad_norm": 1.055020831726768, + "learning_rate": 3.8204081632653066e-05, + "loss": 0.2477, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2291972041130066, + "step": 235, + "valid_targets_mean": 980.6, + "valid_targets_min": 683 + }, + { + "epoch": 0.5896805896805897, + "grad_norm": 2.300952598422869, + "learning_rate": 3.902040816326531e-05, + "loss": 0.2612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23058481514453888, + "step": 240, + "valid_targets_mean": 930.5, + "valid_targets_min": 631 + }, + { + "epoch": 0.601965601965602, + "grad_norm": 1.2180245686048703, + "learning_rate": 3.983673469387755e-05, + "loss": 0.2613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.259826123714447, + "step": 245, + "valid_targets_mean": 990.2, + "valid_targets_min": 679 + }, + { + "epoch": 0.6142506142506142, + "grad_norm": 1.189380446558114, + "learning_rate": 3.9999672841332876e-05, + "loss": 0.2542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23451372981071472, + "step": 250, + "valid_targets_mean": 996.4, + "valid_targets_min": 643 + }, + { + "epoch": 0.6265356265356266, + "grad_norm": 1.216345859130752, + "learning_rate": 3.999834377759164e-05, + "loss": 0.2567, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26435649394989014, + "step": 255, + "valid_targets_mean": 967.8, + "valid_targets_min": 582 + }, + { + "epoch": 0.6388206388206388, + "grad_norm": 1.1231575295082026, + "learning_rate": 3.999599242924703e-05, + "loss": 0.2594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2635229229927063, + "step": 260, + "valid_targets_mean": 889.4, + "valid_targets_min": 577 + }, + { + "epoch": 0.6511056511056511, + "grad_norm": 1.1214663878022753, + "learning_rate": 3.999261891649637e-05, + "loss": 0.2552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24433369934558868, + "step": 265, + "valid_targets_mean": 917.2, + "valid_targets_min": 686 + }, + { + "epoch": 0.6633906633906634, + "grad_norm": 1.0868047723532783, + "learning_rate": 3.9988223411788436e-05, + "loss": 0.2582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24705305695533752, + "step": 270, + "valid_targets_mean": 969.6, + "valid_targets_min": 671 + }, + { + "epoch": 0.6756756756756757, + "grad_norm": 1.090076198257986, + "learning_rate": 3.998280613981468e-05, + "loss": 0.241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23530612885951996, + "step": 275, + "valid_targets_mean": 973.6, + "valid_targets_min": 725 + }, + { + "epoch": 0.687960687960688, + "grad_norm": 1.1254667728908574, + "learning_rate": 3.9976367377497725e-05, + "loss": 0.2482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25306543707847595, + "step": 280, + "valid_targets_mean": 876.2, + "valid_targets_min": 631 + }, + { + "epoch": 0.7002457002457002, + "grad_norm": 1.1946450393277166, + "learning_rate": 3.99689074539772e-05, + "loss": 0.245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24335439503192902, + "step": 285, + "valid_targets_mean": 876.1, + "valid_targets_min": 599 + }, + { + "epoch": 0.7125307125307125, + "grad_norm": 1.0864345277533947, + "learning_rate": 3.9960426750592936e-05, + "loss": 0.2451, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2602050304412842, + "step": 290, + "valid_targets_mean": 945.6, + "valid_targets_min": 710 + }, + { + "epoch": 0.7248157248157249, + "grad_norm": 1.0748839596204636, + "learning_rate": 3.995092570086546e-05, + "loss": 0.2459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2375788390636444, + "step": 295, + "valid_targets_mean": 950.0, + "valid_targets_min": 681 + }, + { + "epoch": 0.7371007371007371, + "grad_norm": 1.1371929708679651, + "learning_rate": 3.9940404790473825e-05, + "loss": 0.24, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22634164988994598, + "step": 300, + "valid_targets_mean": 864.0, + "valid_targets_min": 656 + }, + { + "epoch": 0.7493857493857494, + "grad_norm": 1.1173189139596496, + "learning_rate": 3.992886455723082e-05, + "loss": 0.2393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23460198938846588, + "step": 305, + "valid_targets_mean": 890.8, + "valid_targets_min": 546 + }, + { + "epoch": 0.7616707616707616, + "grad_norm": 1.0806367164790773, + "learning_rate": 3.991630559105541e-05, + "loss": 0.2392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24468854069709778, + "step": 310, + "valid_targets_mean": 1037.7, + "valid_targets_min": 648 + }, + { + "epoch": 0.773955773955774, + "grad_norm": 1.0941916978548303, + "learning_rate": 3.990272853394268e-05, + "loss": 0.2436, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22421763837337494, + "step": 315, + "valid_targets_mean": 879.4, + "valid_targets_min": 652 + }, + { + "epoch": 0.7862407862407862, + "grad_norm": 0.9892087713501029, + "learning_rate": 3.988813407993089e-05, + "loss": 0.2283, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2293543666601181, + "step": 320, + "valid_targets_mean": 926.8, + "valid_targets_min": 709 + }, + { + "epoch": 0.7985257985257985, + "grad_norm": 1.0439272336097505, + "learning_rate": 3.987252297506613e-05, + "loss": 0.2404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23711051046848297, + "step": 325, + "valid_targets_mean": 916.6, + "valid_targets_min": 659 + }, + { + "epoch": 0.8108108108108109, + "grad_norm": 0.9710400731300508, + "learning_rate": 3.9855896017364075e-05, + "loss": 0.2339, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2465747892856598, + "step": 330, + "valid_targets_mean": 934.1, + "valid_targets_min": 559 + }, + { + "epoch": 0.8230958230958231, + "grad_norm": 1.1162307618645835, + "learning_rate": 3.983825405676927e-05, + "loss": 0.2409, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2302704155445099, + "step": 335, + "valid_targets_mean": 916.3, + "valid_targets_min": 689 + }, + { + "epoch": 0.8353808353808354, + "grad_norm": 1.0360859227988688, + "learning_rate": 3.981959799511161e-05, + "loss": 0.2371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22074277698993683, + "step": 340, + "valid_targets_mean": 926.7, + "valid_targets_min": 613 + }, + { + "epoch": 0.8476658476658476, + "grad_norm": 1.3415832051973777, + "learning_rate": 3.979992878606032e-05, + "loss": 0.2473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.227852463722229, + "step": 345, + "valid_targets_mean": 873.4, + "valid_targets_min": 683 + }, + { + "epoch": 0.85995085995086, + "grad_norm": 1.0617306936385849, + "learning_rate": 3.977924743507513e-05, + "loss": 0.2447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2265169322490692, + "step": 350, + "valid_targets_mean": 949.1, + "valid_targets_min": 592 + }, + { + "epoch": 0.8722358722358723, + "grad_norm": 1.0877486714407731, + "learning_rate": 3.975755499935492e-05, + "loss": 0.2501, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27971571683883667, + "step": 355, + "valid_targets_mean": 993.2, + "valid_targets_min": 597 + }, + { + "epoch": 0.8845208845208845, + "grad_norm": 1.0191352294037985, + "learning_rate": 3.973485258778368e-05, + "loss": 0.2266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22834989428520203, + "step": 360, + "valid_targets_mean": 905.8, + "valid_targets_min": 681 + }, + { + "epoch": 0.8968058968058968, + "grad_norm": 0.957295460831019, + "learning_rate": 3.971114136087379e-05, + "loss": 0.2412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23216350376605988, + "step": 365, + "valid_targets_mean": 1064.6, + "valid_targets_min": 577 + }, + { + "epoch": 0.9090909090909091, + "grad_norm": 1.4136404183668454, + "learning_rate": 3.968642253070675e-05, + "loss": 0.2353, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22791610658168793, + "step": 370, + "valid_targets_mean": 812.2, + "valid_targets_min": 601 + }, + { + "epoch": 0.9213759213759214, + "grad_norm": 1.0363150123987157, + "learning_rate": 3.966069736087116e-05, + "loss": 0.2429, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22790317237377167, + "step": 375, + "valid_targets_mean": 930.9, + "valid_targets_min": 569 + }, + { + "epoch": 0.9336609336609336, + "grad_norm": 1.078054793179813, + "learning_rate": 3.963396716639818e-05, + "loss": 0.2337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22291269898414612, + "step": 380, + "valid_targets_mean": 872.6, + "valid_targets_min": 682 + }, + { + "epoch": 0.9459459459459459, + "grad_norm": 1.0135836959168179, + "learning_rate": 3.960623331369427e-05, + "loss": 0.2368, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2167268991470337, + "step": 385, + "valid_targets_mean": 873.2, + "valid_targets_min": 749 + }, + { + "epoch": 0.9582309582309583, + "grad_norm": 0.9632539514971237, + "learning_rate": 3.957749722047138e-05, + "loss": 0.2307, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24117511510849, + "step": 390, + "valid_targets_mean": 908.5, + "valid_targets_min": 639 + }, + { + "epoch": 0.9705159705159705, + "grad_norm": 1.0439198961040346, + "learning_rate": 3.9547760355674405e-05, + "loss": 0.2211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2329951822757721, + "step": 395, + "valid_targets_mean": 940.2, + "valid_targets_min": 611 + }, + { + "epoch": 0.9828009828009828, + "grad_norm": 1.0005974886544775, + "learning_rate": 3.951702423940621e-05, + "loss": 0.2361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24946032464504242, + "step": 400, + "valid_targets_mean": 1000.1, + "valid_targets_min": 668 + }, + { + "epoch": 0.995085995085995, + "grad_norm": 1.0320390692487604, + "learning_rate": 3.948529044284981e-05, + "loss": 0.2257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24351008236408234, + "step": 405, + "valid_targets_mean": 947.5, + "valid_targets_min": 605 + }, + { + "epoch": 1.0073710073710074, + "grad_norm": 1.2048041606254665, + "learning_rate": 3.9452560588188135e-05, + "loss": 0.231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21415719389915466, + "step": 410, + "valid_targets_mean": 871.8, + "valid_targets_min": 625 + }, + { + "epoch": 1.0196560196560196, + "grad_norm": 0.943497120924555, + "learning_rate": 3.9418836348521045e-05, + "loss": 0.2245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20840199291706085, + "step": 415, + "valid_targets_mean": 1057.2, + "valid_targets_min": 701 + }, + { + "epoch": 1.031941031941032, + "grad_norm": 1.0489115691729, + "learning_rate": 3.9384119447779854e-05, + "loss": 0.2114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2008688747882843, + "step": 420, + "valid_targets_mean": 910.7, + "valid_targets_min": 679 + }, + { + "epoch": 1.0442260442260443, + "grad_norm": 1.0601266266840927, + "learning_rate": 3.934841166063919e-05, + "loss": 0.221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2155018448829651, + "step": 425, + "valid_targets_mean": 934.6, + "valid_targets_min": 555 + }, + { + "epoch": 1.0565110565110565, + "grad_norm": 1.0940787633659956, + "learning_rate": 3.931171481242625e-05, + "loss": 0.2017, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19951783120632172, + "step": 430, + "valid_targets_mean": 899.6, + "valid_targets_min": 685 + }, + { + "epoch": 1.0687960687960687, + "grad_norm": 1.1413230966413799, + "learning_rate": 3.927403077902753e-05, + "loss": 0.2125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.224505215883255, + "step": 435, + "valid_targets_mean": 912.6, + "valid_targets_min": 685 + }, + { + "epoch": 1.0810810810810811, + "grad_norm": 1.0555822201833036, + "learning_rate": 3.9235361486792905e-05, + "loss": 0.2193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23359808325767517, + "step": 440, + "valid_targets_mean": 1005.2, + "valid_targets_min": 839 + }, + { + "epoch": 1.0933660933660934, + "grad_norm": 1.020710025205007, + "learning_rate": 3.9195708912437176e-05, + "loss": 0.2187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19746169447898865, + "step": 445, + "valid_targets_mean": 930.9, + "valid_targets_min": 648 + }, + { + "epoch": 1.1056511056511056, + "grad_norm": 1.2154470553446661, + "learning_rate": 3.915507508293901e-05, + "loss": 0.2089, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22506622970104218, + "step": 450, + "valid_targets_mean": 1011.9, + "valid_targets_min": 687 + }, + { + "epoch": 1.117936117936118, + "grad_norm": 1.071640402867911, + "learning_rate": 3.911346207543734e-05, + "loss": 0.2088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2123754620552063, + "step": 455, + "valid_targets_mean": 933.9, + "valid_targets_min": 649 + }, + { + "epoch": 1.1302211302211302, + "grad_norm": 1.0094838297122273, + "learning_rate": 3.907087201712515e-05, + "loss": 0.2126, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22164851427078247, + "step": 460, + "valid_targets_mean": 984.8, + "valid_targets_min": 516 + }, + { + "epoch": 1.1425061425061425, + "grad_norm": 1.0456991601463468, + "learning_rate": 3.902730708514078e-05, + "loss": 0.2056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18665668368339539, + "step": 465, + "valid_targets_mean": 906.1, + "valid_targets_min": 707 + }, + { + "epoch": 1.154791154791155, + "grad_norm": 1.0789156231899877, + "learning_rate": 3.8982769506456616e-05, + "loss": 0.2091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22141095995903015, + "step": 470, + "valid_targets_mean": 986.8, + "valid_targets_min": 633 + }, + { + "epoch": 1.1670761670761671, + "grad_norm": 1.009247300625938, + "learning_rate": 3.893726155776524e-05, + "loss": 0.2073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19400489330291748, + "step": 475, + "valid_targets_mean": 974.9, + "valid_targets_min": 552 + }, + { + "epoch": 1.1793611793611793, + "grad_norm": 1.1007983939488537, + "learning_rate": 3.8890785565363046e-05, + "loss": 0.2091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22441557049751282, + "step": 480, + "valid_targets_mean": 903.4, + "valid_targets_min": 681 + }, + { + "epoch": 1.1916461916461916, + "grad_norm": 1.1324267441384634, + "learning_rate": 3.884334390503136e-05, + "loss": 0.2176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21191783249378204, + "step": 485, + "valid_targets_mean": 887.9, + "valid_targets_min": 623 + }, + { + "epoch": 1.203931203931204, + "grad_norm": 1.3013874210452543, + "learning_rate": 3.8794939001914955e-05, + "loss": 0.2115, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2267088145017624, + "step": 490, + "valid_targets_mean": 960.1, + "valid_targets_min": 695 + }, + { + "epoch": 1.2162162162162162, + "grad_norm": 1.0927495139559071, + "learning_rate": 3.87455733303981e-05, + "loss": 0.2226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22308525443077087, + "step": 495, + "valid_targets_mean": 903.3, + "valid_targets_min": 559 + }, + { + "epoch": 1.2285012285012284, + "grad_norm": 1.04814919371086, + "learning_rate": 3.869524941397805e-05, + "loss": 0.2062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22436878085136414, + "step": 500, + "valid_targets_mean": 928.8, + "valid_targets_min": 702 + }, + { + "epoch": 1.2407862407862407, + "grad_norm": 1.1075100249688812, + "learning_rate": 3.8643969825136095e-05, + "loss": 0.2128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2219080626964569, + "step": 505, + "valid_targets_mean": 900.2, + "valid_targets_min": 541 + }, + { + "epoch": 1.253071253071253, + "grad_norm": 0.91847772621059, + "learning_rate": 3.8591737185206024e-05, + "loss": 0.2114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1968231201171875, + "step": 510, + "valid_targets_mean": 886.4, + "valid_targets_min": 680 + }, + { + "epoch": 1.2653562653562653, + "grad_norm": 1.0428588573826258, + "learning_rate": 3.853855416424011e-05, + "loss": 0.2108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23146593570709229, + "step": 515, + "valid_targets_mean": 958.9, + "valid_targets_min": 666 + }, + { + "epoch": 1.2776412776412776, + "grad_norm": 0.9531033959892421, + "learning_rate": 3.848442348087267e-05, + "loss": 0.2144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20124828815460205, + "step": 520, + "valid_targets_mean": 909.4, + "valid_targets_min": 658 + }, + { + "epoch": 1.28992628992629, + "grad_norm": 0.9969526285724082, + "learning_rate": 3.842934790218106e-05, + "loss": 0.2095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1991349309682846, + "step": 525, + "valid_targets_mean": 965.3, + "valid_targets_min": 666 + }, + { + "epoch": 1.3022113022113022, + "grad_norm": 1.0967602900326623, + "learning_rate": 3.837333024354422e-05, + "loss": 0.2091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2304757535457611, + "step": 530, + "valid_targets_mean": 991.1, + "valid_targets_min": 563 + }, + { + "epoch": 1.3144963144963144, + "grad_norm": 1.0292442436624027, + "learning_rate": 3.8316373368498794e-05, + "loss": 0.1968, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20452198386192322, + "step": 535, + "valid_targets_mean": 920.8, + "valid_targets_min": 604 + }, + { + "epoch": 1.3267813267813269, + "grad_norm": 1.0845488755936965, + "learning_rate": 3.82584801885927e-05, + "loss": 0.2138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21353307366371155, + "step": 540, + "valid_targets_mean": 941.1, + "valid_targets_min": 652 + }, + { + "epoch": 1.339066339066339, + "grad_norm": 1.002091400863676, + "learning_rate": 3.8199653663236336e-05, + "loss": 0.2153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23313114047050476, + "step": 545, + "valid_targets_mean": 1035.6, + "valid_targets_min": 732 + }, + { + "epoch": 1.3513513513513513, + "grad_norm": 0.937407231724259, + "learning_rate": 3.813989679955128e-05, + "loss": 0.2141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20936307311058044, + "step": 550, + "valid_targets_mean": 972.9, + "valid_targets_min": 635 + }, + { + "epoch": 1.3636363636363638, + "grad_norm": 1.3073136711778546, + "learning_rate": 3.8079212652216595e-05, + "loss": 0.1975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2040657103061676, + "step": 555, + "valid_targets_mean": 887.8, + "valid_targets_min": 602 + }, + { + "epoch": 1.375921375921376, + "grad_norm": 1.0087184524887758, + "learning_rate": 3.8017604323312616e-05, + "loss": 0.2038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19966718554496765, + "step": 560, + "valid_targets_mean": 916.1, + "valid_targets_min": 717 + }, + { + "epoch": 1.3882063882063882, + "grad_norm": 1.1197468550965275, + "learning_rate": 3.795507496216246e-05, + "loss": 0.2156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23064757883548737, + "step": 565, + "valid_targets_mean": 926.8, + "valid_targets_min": 755 + }, + { + "epoch": 1.4004914004914004, + "grad_norm": 0.9363733312332334, + "learning_rate": 3.789162776517098e-05, + "loss": 0.2091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21238617599010468, + "step": 570, + "valid_targets_mean": 1013.4, + "valid_targets_min": 677 + }, + { + "epoch": 1.4127764127764126, + "grad_norm": 0.9461807359368896, + "learning_rate": 3.78272659756614e-05, + "loss": 0.2189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2300427407026291, + "step": 575, + "valid_targets_mean": 977.1, + "valid_targets_min": 655 + }, + { + "epoch": 1.425061425061425, + "grad_norm": 1.032783725340241, + "learning_rate": 3.776199288370948e-05, + "loss": 0.2184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22637906670570374, + "step": 580, + "valid_targets_mean": 858.8, + "valid_targets_min": 617 + }, + { + "epoch": 1.4373464373464373, + "grad_norm": 0.9213588980109625, + "learning_rate": 3.7695811825975386e-05, + "loss": 0.2043, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1935548335313797, + "step": 585, + "valid_targets_mean": 985.6, + "valid_targets_min": 571 + }, + { + "epoch": 1.4496314496314495, + "grad_norm": 0.8869344099651987, + "learning_rate": 3.76287261855331e-05, + "loss": 0.199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20661897957324982, + "step": 590, + "valid_targets_mean": 1090.0, + "valid_targets_min": 720 + }, + { + "epoch": 1.461916461916462, + "grad_norm": 1.3056434875874352, + "learning_rate": 3.7560739391697465e-05, + "loss": 0.2089, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18137860298156738, + "step": 595, + "valid_targets_mean": 917.4, + "valid_targets_min": 600 + }, + { + "epoch": 1.4742014742014742, + "grad_norm": 0.8741112793999194, + "learning_rate": 3.749185491984891e-05, + "loss": 0.2044, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19978901743888855, + "step": 600, + "valid_targets_mean": 1061.6, + "valid_targets_min": 770 + }, + { + "epoch": 1.4864864864864864, + "grad_norm": 0.9079781231576824, + "learning_rate": 3.7422076291255785e-05, + "loss": 0.1965, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19094155728816986, + "step": 605, + "valid_targets_mean": 806.2, + "valid_targets_min": 677 + }, + { + "epoch": 1.4987714987714988, + "grad_norm": 0.8533490681827205, + "learning_rate": 3.7351407072894356e-05, + "loss": 0.2072, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19833627343177795, + "step": 610, + "valid_targets_mean": 930.6, + "valid_targets_min": 631 + }, + { + "epoch": 1.511056511056511, + "grad_norm": 0.9262106949924384, + "learning_rate": 3.7279850877266486e-05, + "loss": 0.2195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23002958297729492, + "step": 615, + "valid_targets_mean": 927.2, + "valid_targets_min": 670 + }, + { + "epoch": 1.5233415233415233, + "grad_norm": 0.9996893911622978, + "learning_rate": 3.720741136221491e-05, + "loss": 0.2013, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22016549110412598, + "step": 620, + "valid_targets_mean": 975.1, + "valid_targets_min": 704 + }, + { + "epoch": 1.5356265356265357, + "grad_norm": 0.9304706089787511, + "learning_rate": 3.713409223073636e-05, + "loss": 0.2094, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21507513523101807, + "step": 625, + "valid_targets_mean": 954.0, + "valid_targets_min": 501 + }, + { + "epoch": 1.547911547911548, + "grad_norm": 0.9415941135469472, + "learning_rate": 3.705989723079214e-05, + "loss": 0.2076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20272324979305267, + "step": 630, + "valid_targets_mean": 913.0, + "valid_targets_min": 516 + }, + { + "epoch": 1.5601965601965602, + "grad_norm": 1.0140127705853008, + "learning_rate": 3.698483015511665e-05, + "loss": 0.2097, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21423688530921936, + "step": 635, + "valid_targets_mean": 965.7, + "valid_targets_min": 608 + }, + { + "epoch": 1.5724815724815726, + "grad_norm": 0.9286120395429476, + "learning_rate": 3.690889484102344e-05, + "loss": 0.2046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19090348482131958, + "step": 640, + "valid_targets_mean": 877.8, + "valid_targets_min": 587 + }, + { + "epoch": 1.5847665847665846, + "grad_norm": 0.9203938587479324, + "learning_rate": 3.683209517020908e-05, + "loss": 0.21, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22043265402317047, + "step": 645, + "valid_targets_mean": 998.6, + "valid_targets_min": 678 + }, + { + "epoch": 1.597051597051597, + "grad_norm": 1.0623655317031668, + "learning_rate": 3.675443506855473e-05, + "loss": 0.2124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.263114869594574, + "step": 650, + "valid_targets_mean": 1080.6, + "valid_targets_min": 620 + }, + { + "epoch": 1.6093366093366095, + "grad_norm": 0.9064187903868257, + "learning_rate": 3.6675918505925456e-05, + "loss": 0.2113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1894787847995758, + "step": 655, + "valid_targets_mean": 876.0, + "valid_targets_min": 622 + }, + { + "epoch": 1.6216216216216215, + "grad_norm": 0.9529120963239415, + "learning_rate": 3.6596549495967276e-05, + "loss": 0.2106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22982874512672424, + "step": 660, + "valid_targets_mean": 1071.1, + "valid_targets_min": 736 + }, + { + "epoch": 1.633906633906634, + "grad_norm": 0.8840416714465907, + "learning_rate": 3.651633209590202e-05, + "loss": 0.2117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17622330784797668, + "step": 665, + "valid_targets_mean": 925.9, + "valid_targets_min": 532 + }, + { + "epoch": 1.6461916461916462, + "grad_norm": 1.1810228059095147, + "learning_rate": 3.6435270406319914e-05, + "loss": 0.227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21883201599121094, + "step": 670, + "valid_targets_mean": 870.9, + "valid_targets_min": 691 + }, + { + "epoch": 1.6584766584766584, + "grad_norm": 0.8643704317470531, + "learning_rate": 3.635336857096997e-05, + "loss": 0.205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21042178571224213, + "step": 675, + "valid_targets_mean": 1000.4, + "valid_targets_min": 786 + }, + { + "epoch": 1.6707616707616708, + "grad_norm": 0.8551915698329935, + "learning_rate": 3.627063077654815e-05, + "loss": 0.2013, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18336012959480286, + "step": 680, + "valid_targets_mean": 926.6, + "valid_targets_min": 538 + }, + { + "epoch": 1.683046683046683, + "grad_norm": 0.9439302138863646, + "learning_rate": 3.618706125248337e-05, + "loss": 0.2029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20368976891040802, + "step": 685, + "valid_targets_mean": 976.3, + "valid_targets_min": 747 + }, + { + "epoch": 1.6953316953316953, + "grad_norm": 0.9224278855037739, + "learning_rate": 3.6102664270721275e-05, + "loss": 0.2105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20123934745788574, + "step": 690, + "valid_targets_mean": 958.1, + "valid_targets_min": 655 + }, + { + "epoch": 1.7076167076167077, + "grad_norm": 0.8820361407024188, + "learning_rate": 3.601744414550589e-05, + "loss": 0.1977, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1965818554162979, + "step": 695, + "valid_targets_mean": 938.6, + "valid_targets_min": 563 + }, + { + "epoch": 1.71990171990172, + "grad_norm": 0.9576279590944234, + "learning_rate": 3.593140523315906e-05, + "loss": 0.216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21179933845996857, + "step": 700, + "valid_targets_mean": 886.4, + "valid_targets_min": 618 + }, + { + "epoch": 1.7321867321867321, + "grad_norm": 0.9812823218721463, + "learning_rate": 3.584455193185778e-05, + "loss": 0.2108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2067493200302124, + "step": 705, + "valid_targets_mean": 849.6, + "valid_targets_min": 641 + }, + { + "epoch": 1.7444717444717446, + "grad_norm": 1.0444309881677845, + "learning_rate": 3.575688868140933e-05, + "loss": 0.2084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19522926211357117, + "step": 710, + "valid_targets_mean": 911.2, + "valid_targets_min": 657 + }, + { + "epoch": 1.7567567567567568, + "grad_norm": 0.9040650326743758, + "learning_rate": 3.566841996302438e-05, + "loss": 0.2102, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20031523704528809, + "step": 715, + "valid_targets_mean": 914.9, + "valid_targets_min": 639 + }, + { + "epoch": 1.769041769041769, + "grad_norm": 0.9220766346495909, + "learning_rate": 3.557915029908787e-05, + "loss": 0.2111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20691350102424622, + "step": 720, + "valid_targets_mean": 888.4, + "valid_targets_min": 651 + }, + { + "epoch": 1.7813267813267815, + "grad_norm": 0.8821864755825576, + "learning_rate": 3.548908425292784e-05, + "loss": 0.206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20301777124404907, + "step": 725, + "valid_targets_mean": 954.6, + "valid_targets_min": 666 + }, + { + "epoch": 1.7936117936117935, + "grad_norm": 1.0027606605638184, + "learning_rate": 3.5398226428582165e-05, + "loss": 0.201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18714162707328796, + "step": 730, + "valid_targets_mean": 870.8, + "valid_targets_min": 576 + }, + { + "epoch": 1.805896805896806, + "grad_norm": 0.9279423327114711, + "learning_rate": 3.530658147056321e-05, + "loss": 0.2128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21059077978134155, + "step": 735, + "valid_targets_mean": 954.6, + "valid_targets_min": 748 + }, + { + "epoch": 1.8181818181818183, + "grad_norm": 0.9495556780469476, + "learning_rate": 3.521415406362041e-05, + "loss": 0.2111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2139360010623932, + "step": 740, + "valid_targets_mean": 887.1, + "valid_targets_min": 695 + }, + { + "epoch": 1.8304668304668303, + "grad_norm": 0.9370993861785392, + "learning_rate": 3.512094893250076e-05, + "loss": 0.2079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19548596441745758, + "step": 745, + "valid_targets_mean": 814.6, + "valid_targets_min": 592 + }, + { + "epoch": 1.8427518427518428, + "grad_norm": 0.904789512120929, + "learning_rate": 3.5026970841707366e-05, + "loss": 0.2074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2096291035413742, + "step": 750, + "valid_targets_mean": 986.6, + "valid_targets_min": 671 + }, + { + "epoch": 1.855036855036855, + "grad_norm": 0.9121919269177917, + "learning_rate": 3.493222459525579e-05, + "loss": 0.1951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19862934947013855, + "step": 755, + "valid_targets_mean": 937.2, + "valid_targets_min": 727 + }, + { + "epoch": 1.8673218673218672, + "grad_norm": 0.9341489709506934, + "learning_rate": 3.483671503642858e-05, + "loss": 0.2006, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1980162113904953, + "step": 760, + "valid_targets_mean": 958.4, + "valid_targets_min": 681 + }, + { + "epoch": 1.8796068796068797, + "grad_norm": 0.8748700101579973, + "learning_rate": 3.474044704752761e-05, + "loss": 0.2109, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2048308551311493, + "step": 765, + "valid_targets_mean": 993.3, + "valid_targets_min": 690 + }, + { + "epoch": 1.8918918918918919, + "grad_norm": 0.9000001364671186, + "learning_rate": 3.464342554962454e-05, + "loss": 0.2013, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20421837270259857, + "step": 770, + "valid_targets_mean": 999.3, + "valid_targets_min": 605 + }, + { + "epoch": 1.904176904176904, + "grad_norm": 0.8853023548149183, + "learning_rate": 3.4545655502309254e-05, + "loss": 0.1966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18318688869476318, + "step": 775, + "valid_targets_mean": 947.4, + "valid_targets_min": 681 + }, + { + "epoch": 1.9164619164619165, + "grad_norm": 0.9313876881231712, + "learning_rate": 3.444714190343633e-05, + "loss": 0.2026, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18997597694396973, + "step": 780, + "valid_targets_mean": 898.9, + "valid_targets_min": 655 + }, + { + "epoch": 1.9287469287469288, + "grad_norm": 1.0221192176591836, + "learning_rate": 3.434788978886957e-05, + "loss": 0.204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2042032778263092, + "step": 785, + "valid_targets_mean": 834.0, + "valid_targets_min": 582 + }, + { + "epoch": 1.941031941031941, + "grad_norm": 0.9155860522611501, + "learning_rate": 3.424790423222455e-05, + "loss": 0.1967, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19884389638900757, + "step": 790, + "valid_targets_mean": 952.8, + "valid_targets_min": 578 + }, + { + "epoch": 1.9533169533169534, + "grad_norm": 0.9317970936714676, + "learning_rate": 3.414719034460928e-05, + "loss": 0.1948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21337175369262695, + "step": 795, + "valid_targets_mean": 964.0, + "valid_targets_min": 740 + }, + { + "epoch": 1.9656019656019657, + "grad_norm": 0.9474687249839954, + "learning_rate": 3.404575327436294e-05, + "loss": 0.2002, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19337508082389832, + "step": 800, + "valid_targets_mean": 994.7, + "valid_targets_min": 620 + }, + { + "epoch": 1.9778869778869779, + "grad_norm": 0.968841368102766, + "learning_rate": 3.3943598206792665e-05, + "loss": 0.1974, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19192612171173096, + "step": 805, + "valid_targets_mean": 907.4, + "valid_targets_min": 665 + }, + { + "epoch": 1.9901719901719903, + "grad_norm": 0.9417909387557403, + "learning_rate": 3.384073036390857e-05, + "loss": 0.2013, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20656394958496094, + "step": 810, + "valid_targets_mean": 893.9, + "valid_targets_min": 720 + }, + { + "epoch": 2.0024570024570023, + "grad_norm": 0.8585352938516232, + "learning_rate": 3.373715500415667e-05, + "loss": 0.199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17259010672569275, + "step": 815, + "valid_targets_mean": 984.1, + "valid_targets_min": 621 + }, + { + "epoch": 2.0147420147420148, + "grad_norm": 0.8727424930672738, + "learning_rate": 3.363287742215023e-05, + "loss": 0.1717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17072346806526184, + "step": 820, + "valid_targets_mean": 1022.6, + "valid_targets_min": 689 + }, + { + "epoch": 2.027027027027027, + "grad_norm": 0.9653669368105697, + "learning_rate": 3.352790294839898e-05, + "loss": 0.1773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17556941509246826, + "step": 825, + "valid_targets_mean": 984.9, + "valid_targets_min": 608 + }, + { + "epoch": 2.039312039312039, + "grad_norm": 0.9856863810593398, + "learning_rate": 3.3422236949036726e-05, + "loss": 0.1735, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17215654253959656, + "step": 830, + "valid_targets_mean": 926.0, + "valid_targets_min": 527 + }, + { + "epoch": 2.0515970515970516, + "grad_norm": 0.985148571117365, + "learning_rate": 3.331588482554697e-05, + "loss": 0.1705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.171957865357399, + "step": 835, + "valid_targets_mean": 944.8, + "valid_targets_min": 676 + }, + { + "epoch": 2.063882063882064, + "grad_norm": 0.909871062353881, + "learning_rate": 3.320885201448684e-05, + "loss": 0.1745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16490240395069122, + "step": 840, + "valid_targets_mean": 948.2, + "valid_targets_min": 670 + }, + { + "epoch": 2.076167076167076, + "grad_norm": 1.056134140927179, + "learning_rate": 3.310114398720917e-05, + "loss": 0.1764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1747320294380188, + "step": 845, + "valid_targets_mean": 914.8, + "valid_targets_min": 713 + }, + { + "epoch": 2.0884520884520885, + "grad_norm": 0.9821463448392567, + "learning_rate": 3.299276624958281e-05, + "loss": 0.1754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18433263897895813, + "step": 850, + "valid_targets_mean": 989.0, + "valid_targets_min": 672 + }, + { + "epoch": 2.100737100737101, + "grad_norm": 0.9220392026293057, + "learning_rate": 3.288372434171116e-05, + "loss": 0.1737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16827696561813354, + "step": 855, + "valid_targets_mean": 952.9, + "valid_targets_min": 729 + }, + { + "epoch": 2.113022113022113, + "grad_norm": 1.08905073711043, + "learning_rate": 3.2774023837648986e-05, + "loss": 0.1759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1930348426103592, + "step": 860, + "valid_targets_mean": 953.7, + "valid_targets_min": 591 + }, + { + "epoch": 2.1253071253071254, + "grad_norm": 1.2768244141830352, + "learning_rate": 3.26636703451175e-05, + "loss": 0.1751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16934962570667267, + "step": 865, + "valid_targets_mean": 932.2, + "valid_targets_min": 536 + }, + { + "epoch": 2.1375921375921374, + "grad_norm": 0.8963972595628271, + "learning_rate": 3.2552669505217646e-05, + "loss": 0.1721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1723167896270752, + "step": 870, + "valid_targets_mean": 1024.2, + "valid_targets_min": 640 + }, + { + "epoch": 2.14987714987715, + "grad_norm": 0.9216164189642129, + "learning_rate": 3.24410269921418e-05, + "loss": 0.1696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15483501553535461, + "step": 875, + "valid_targets_mean": 935.6, + "valid_targets_min": 807 + }, + { + "epoch": 2.1621621621621623, + "grad_norm": 0.9542992798473027, + "learning_rate": 3.232874851288367e-05, + "loss": 0.1765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1586456149816513, + "step": 880, + "valid_targets_mean": 902.9, + "valid_targets_min": 669 + }, + { + "epoch": 2.1744471744471743, + "grad_norm": 0.8672518091555641, + "learning_rate": 3.221583980694659e-05, + "loss": 0.179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16514278948307037, + "step": 885, + "valid_targets_mean": 927.9, + "valid_targets_min": 708 + }, + { + "epoch": 2.1867321867321867, + "grad_norm": 1.0142855190330105, + "learning_rate": 3.21023066460501e-05, + "loss": 0.1658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17116622626781464, + "step": 890, + "valid_targets_mean": 939.3, + "valid_targets_min": 662 + }, + { + "epoch": 2.199017199017199, + "grad_norm": 0.9530051059651002, + "learning_rate": 3.198815483383492e-05, + "loss": 0.1749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17124095559120178, + "step": 895, + "valid_targets_mean": 933.6, + "valid_targets_min": 474 + }, + { + "epoch": 2.211302211302211, + "grad_norm": 0.9824515095538127, + "learning_rate": 3.1873390205566295e-05, + "loss": 0.175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19697478413581848, + "step": 900, + "valid_targets_mean": 998.1, + "valid_targets_min": 647 + }, + { + "epoch": 2.2235872235872236, + "grad_norm": 0.993108312882805, + "learning_rate": 3.175801862783565e-05, + "loss": 0.1764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1796160489320755, + "step": 905, + "valid_targets_mean": 934.2, + "valid_targets_min": 605 + }, + { + "epoch": 2.235872235872236, + "grad_norm": 0.9603082825048654, + "learning_rate": 3.164204599826077e-05, + "loss": 0.1715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17281116545200348, + "step": 910, + "valid_targets_mean": 1006.9, + "valid_targets_min": 707 + }, + { + "epoch": 2.248157248157248, + "grad_norm": 0.9377440070609073, + "learning_rate": 3.1525478245184245e-05, + "loss": 0.1697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18679824471473694, + "step": 915, + "valid_targets_mean": 995.6, + "valid_targets_min": 700 + }, + { + "epoch": 2.2604422604422605, + "grad_norm": 0.8648186518182739, + "learning_rate": 3.140832132737051e-05, + "loss": 0.1704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17448900640010834, + "step": 920, + "valid_targets_mean": 1037.5, + "valid_targets_min": 694 + }, + { + "epoch": 2.2727272727272725, + "grad_norm": 0.8834468920885888, + "learning_rate": 3.129058123370116e-05, + "loss": 0.1771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17446479201316833, + "step": 925, + "valid_targets_mean": 987.5, + "valid_targets_min": 687 + }, + { + "epoch": 2.285012285012285, + "grad_norm": 0.9786788923931473, + "learning_rate": 3.117226398286887e-05, + "loss": 0.1714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1690976619720459, + "step": 930, + "valid_targets_mean": 969.7, + "valid_targets_min": 637 + }, + { + "epoch": 2.2972972972972974, + "grad_norm": 0.9575695952922517, + "learning_rate": 3.105337562306968e-05, + "loss": 0.1756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.166093111038208, + "step": 935, + "valid_targets_mean": 923.9, + "valid_targets_min": 660 + }, + { + "epoch": 2.30958230958231, + "grad_norm": 0.966989114335053, + "learning_rate": 3.0933922231693854e-05, + "loss": 0.1723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18171045184135437, + "step": 940, + "valid_targets_mean": 1018.1, + "valid_targets_min": 701 + }, + { + "epoch": 2.321867321867322, + "grad_norm": 0.9755092370999333, + "learning_rate": 3.08139099150152e-05, + "loss": 0.18, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1950107365846634, + "step": 945, + "valid_targets_mean": 1096.1, + "valid_targets_min": 734 + }, + { + "epoch": 2.3341523341523343, + "grad_norm": 0.975219441056167, + "learning_rate": 3.069334480787893e-05, + "loss": 0.1753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17046800255775452, + "step": 950, + "valid_targets_mean": 940.8, + "valid_targets_min": 564 + }, + { + "epoch": 2.3464373464373462, + "grad_norm": 0.9039204160667244, + "learning_rate": 3.057223307338806e-05, + "loss": 0.1754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17663127183914185, + "step": 955, + "valid_targets_mean": 1080.9, + "valid_targets_min": 617 + }, + { + "epoch": 2.3587223587223587, + "grad_norm": 1.0055696002058934, + "learning_rate": 3.0450580902588346e-05, + "loss": 0.1739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18415383994579315, + "step": 960, + "valid_targets_mean": 891.5, + "valid_targets_min": 742 + }, + { + "epoch": 2.371007371007371, + "grad_norm": 1.1318477100077704, + "learning_rate": 3.032839451415182e-05, + "loss": 0.1769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17538920044898987, + "step": 965, + "valid_targets_mean": 953.4, + "valid_targets_min": 457 + }, + { + "epoch": 2.383292383292383, + "grad_norm": 1.114567082556806, + "learning_rate": 3.0205680154058904e-05, + "loss": 0.1732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17116034030914307, + "step": 970, + "valid_targets_mean": 1048.6, + "valid_targets_min": 697 + }, + { + "epoch": 2.3955773955773956, + "grad_norm": 1.3308084828541589, + "learning_rate": 3.0082444095279117e-05, + "loss": 0.1738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17864802479743958, + "step": 975, + "valid_targets_mean": 994.0, + "valid_targets_min": 708 + }, + { + "epoch": 2.407862407862408, + "grad_norm": 0.9504512606927061, + "learning_rate": 2.9958692637450406e-05, + "loss": 0.1717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15201088786125183, + "step": 980, + "valid_targets_mean": 935.3, + "valid_targets_min": 643 + }, + { + "epoch": 2.42014742014742, + "grad_norm": 0.9750082562199718, + "learning_rate": 2.983443210655714e-05, + "loss": 0.1672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16214288771152496, + "step": 985, + "valid_targets_mean": 854.0, + "valid_targets_min": 652 + }, + { + "epoch": 2.4324324324324325, + "grad_norm": 0.9264357827508429, + "learning_rate": 2.9709668854606706e-05, + "loss": 0.1806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1679236888885498, + "step": 990, + "valid_targets_mean": 975.3, + "valid_targets_min": 614 + }, + { + "epoch": 2.444717444717445, + "grad_norm": 0.9912779025878137, + "learning_rate": 2.9584409259304828e-05, + "loss": 0.1747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16841384768486023, + "step": 995, + "valid_targets_mean": 950.8, + "valid_targets_min": 664 + }, + { + "epoch": 2.457002457002457, + "grad_norm": 1.011226779105394, + "learning_rate": 2.945865972372954e-05, + "loss": 0.1746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1771933138370514, + "step": 1000, + "valid_targets_mean": 1023.0, + "valid_targets_min": 652 + }, + { + "epoch": 2.4692874692874693, + "grad_norm": 0.8778923994125223, + "learning_rate": 2.9332426676003858e-05, + "loss": 0.1739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15943680703639984, + "step": 1005, + "valid_targets_mean": 978.3, + "valid_targets_min": 673 + }, + { + "epoch": 2.4815724815724813, + "grad_norm": 0.9391261650343821, + "learning_rate": 2.920571656896722e-05, + "loss": 0.1727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17108768224716187, + "step": 1010, + "valid_targets_mean": 1033.1, + "valid_targets_min": 668 + }, + { + "epoch": 2.493857493857494, + "grad_norm": 0.9820148281088876, + "learning_rate": 2.907853587984558e-05, + "loss": 0.1707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17560836672782898, + "step": 1015, + "valid_targets_mean": 868.2, + "valid_targets_min": 655 + }, + { + "epoch": 2.506142506142506, + "grad_norm": 0.9118604910543656, + "learning_rate": 2.8950891109920333e-05, + "loss": 0.1721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17220911383628845, + "step": 1020, + "valid_targets_mean": 892.5, + "valid_targets_min": 639 + }, + { + "epoch": 2.5184275184275187, + "grad_norm": 0.937533300150789, + "learning_rate": 2.882278878419597e-05, + "loss": 0.1746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1721157729625702, + "step": 1025, + "valid_targets_mean": 927.7, + "valid_targets_min": 579 + }, + { + "epoch": 2.5307125307125307, + "grad_norm": 1.0018178519708052, + "learning_rate": 2.8694235451066538e-05, + "loss": 0.1753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18641093373298645, + "step": 1030, + "valid_targets_mean": 960.5, + "valid_targets_min": 697 + }, + { + "epoch": 2.542997542997543, + "grad_norm": 0.912243193730379, + "learning_rate": 2.8565237681980876e-05, + "loss": 0.1686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1784767508506775, + "step": 1035, + "valid_targets_mean": 1008.4, + "valid_targets_min": 797 + }, + { + "epoch": 2.555282555282555, + "grad_norm": 0.8615949496360548, + "learning_rate": 2.843580207110672e-05, + "loss": 0.1674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1611432135105133, + "step": 1040, + "valid_targets_mean": 997.3, + "valid_targets_min": 736 + }, + { + "epoch": 2.5675675675675675, + "grad_norm": 1.003183895460324, + "learning_rate": 2.830593523499361e-05, + "loss": 0.1748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16985324025154114, + "step": 1045, + "valid_targets_mean": 906.9, + "valid_targets_min": 681 + }, + { + "epoch": 2.57985257985258, + "grad_norm": 0.912522735391224, + "learning_rate": 2.8175643812234627e-05, + "loss": 0.1747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17298677563667297, + "step": 1050, + "valid_targets_mean": 912.9, + "valid_targets_min": 656 + }, + { + "epoch": 2.592137592137592, + "grad_norm": 1.0807283933470113, + "learning_rate": 2.8044934463127108e-05, + "loss": 0.1735, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17781515419483185, + "step": 1055, + "valid_targets_mean": 1025.3, + "valid_targets_min": 759 + }, + { + "epoch": 2.6044226044226044, + "grad_norm": 0.9196179864615218, + "learning_rate": 2.7913813869332112e-05, + "loss": 0.1715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1666468381881714, + "step": 1060, + "valid_targets_mean": 861.7, + "valid_targets_min": 654 + }, + { + "epoch": 2.616707616707617, + "grad_norm": 0.938863322434105, + "learning_rate": 2.7782288733532915e-05, + "loss": 0.1767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1822303682565689, + "step": 1065, + "valid_targets_mean": 975.1, + "valid_targets_min": 671 + }, + { + "epoch": 2.628992628992629, + "grad_norm": 0.8851465187299596, + "learning_rate": 2.7650365779092346e-05, + "loss": 0.171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16219617426395416, + "step": 1070, + "valid_targets_mean": 967.1, + "valid_targets_min": 632 + }, + { + "epoch": 2.6412776412776413, + "grad_norm": 0.9421668377359279, + "learning_rate": 2.751805174970912e-05, + "loss": 0.1765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18036647140979767, + "step": 1075, + "valid_targets_mean": 976.8, + "valid_targets_min": 724 + }, + { + "epoch": 2.6535626535626538, + "grad_norm": 0.992873674615618, + "learning_rate": 2.7385353409073093e-05, + "loss": 0.1748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18397009372711182, + "step": 1080, + "valid_targets_mean": 875.6, + "valid_targets_min": 626 + }, + { + "epoch": 2.6658476658476657, + "grad_norm": 0.9728070369066828, + "learning_rate": 2.725227754051953e-05, + "loss": 0.1773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18314458429813385, + "step": 1085, + "valid_targets_mean": 899.2, + "valid_targets_min": 677 + }, + { + "epoch": 2.678132678132678, + "grad_norm": 0.9143288429643125, + "learning_rate": 2.711883094668234e-05, + "loss": 0.1641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16809183359146118, + "step": 1090, + "valid_targets_mean": 994.4, + "valid_targets_min": 571 + }, + { + "epoch": 2.69041769041769, + "grad_norm": 1.54128042531331, + "learning_rate": 2.698502044914633e-05, + "loss": 0.1714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18388208746910095, + "step": 1095, + "valid_targets_mean": 972.0, + "valid_targets_min": 710 + }, + { + "epoch": 2.7027027027027026, + "grad_norm": 1.049164865794871, + "learning_rate": 2.685085288809853e-05, + "loss": 0.1743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18375930190086365, + "step": 1100, + "valid_targets_mean": 972.8, + "valid_targets_min": 698 + }, + { + "epoch": 2.714987714987715, + "grad_norm": 0.9600224337581525, + "learning_rate": 2.671633512197848e-05, + "loss": 0.176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17532390356063843, + "step": 1105, + "valid_targets_mean": 992.3, + "valid_targets_min": 620 + }, + { + "epoch": 2.7272727272727275, + "grad_norm": 0.9403664365373092, + "learning_rate": 2.658147402712768e-05, + "loss": 0.1753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18095794320106506, + "step": 1110, + "valid_targets_mean": 928.3, + "valid_targets_min": 667 + }, + { + "epoch": 2.7395577395577395, + "grad_norm": 1.162242904424703, + "learning_rate": 2.6446276497438064e-05, + "loss": 0.1691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19149112701416016, + "step": 1115, + "valid_targets_mean": 884.9, + "valid_targets_min": 596 + }, + { + "epoch": 2.751842751842752, + "grad_norm": 0.9999696190235531, + "learning_rate": 2.6310749443999593e-05, + "loss": 0.1787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18875834345817566, + "step": 1120, + "valid_targets_mean": 1027.4, + "valid_targets_min": 747 + }, + { + "epoch": 2.764127764127764, + "grad_norm": 0.9810912416799812, + "learning_rate": 2.617489979474699e-05, + "loss": 0.1714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18806487321853638, + "step": 1125, + "valid_targets_mean": 939.4, + "valid_targets_min": 620 + }, + { + "epoch": 2.7764127764127764, + "grad_norm": 1.1849732651553062, + "learning_rate": 2.6038734494105562e-05, + "loss": 0.175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17712372541427612, + "step": 1130, + "valid_targets_mean": 957.6, + "valid_targets_min": 653 + }, + { + "epoch": 2.788697788697789, + "grad_norm": 0.9030343977921143, + "learning_rate": 2.590226050263625e-05, + "loss": 0.1682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1648157835006714, + "step": 1135, + "valid_targets_mean": 909.4, + "valid_targets_min": 632 + }, + { + "epoch": 2.800982800982801, + "grad_norm": 1.2038410845517444, + "learning_rate": 2.5765484796679768e-05, + "loss": 0.174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1805713176727295, + "step": 1140, + "valid_targets_mean": 933.9, + "valid_targets_min": 586 + }, + { + "epoch": 2.8132678132678133, + "grad_norm": 0.9898717229094194, + "learning_rate": 2.5628414368000035e-05, + "loss": 0.1722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16944530606269836, + "step": 1145, + "valid_targets_mean": 959.3, + "valid_targets_min": 750 + }, + { + "epoch": 2.8255528255528253, + "grad_norm": 0.9531190415010581, + "learning_rate": 2.5491056223426746e-05, + "loss": 0.1781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17504331469535828, + "step": 1150, + "valid_targets_mean": 915.2, + "valid_targets_min": 642 + }, + { + "epoch": 2.8378378378378377, + "grad_norm": 1.0671121984464367, + "learning_rate": 2.5353417384497166e-05, + "loss": 0.1726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17695313692092896, + "step": 1155, + "valid_targets_mean": 882.2, + "valid_targets_min": 631 + }, + { + "epoch": 2.85012285012285, + "grad_norm": 1.0685164913692538, + "learning_rate": 2.5215504887097243e-05, + "loss": 0.1757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20810116827487946, + "step": 1160, + "valid_targets_mean": 967.5, + "valid_targets_min": 652 + }, + { + "epoch": 2.8624078624078626, + "grad_norm": 0.8784263870335288, + "learning_rate": 2.5077325781101918e-05, + "loss": 0.1807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17300809919834137, + "step": 1165, + "valid_targets_mean": 1010.9, + "valid_targets_min": 582 + }, + { + "epoch": 2.8746928746928746, + "grad_norm": 0.9297310781873073, + "learning_rate": 2.493888713001476e-05, + "loss": 0.1796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17841032147407532, + "step": 1170, + "valid_targets_mean": 968.2, + "valid_targets_min": 691 + }, + { + "epoch": 2.886977886977887, + "grad_norm": 0.8915623950018948, + "learning_rate": 2.480019601060687e-05, + "loss": 0.1711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18827411532402039, + "step": 1175, + "valid_targets_mean": 1003.5, + "valid_targets_min": 651 + }, + { + "epoch": 2.899262899262899, + "grad_norm": 0.9259985446100989, + "learning_rate": 2.4661259512555176e-05, + "loss": 0.1768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18165713548660278, + "step": 1180, + "valid_targets_mean": 1009.4, + "valid_targets_min": 686 + }, + { + "epoch": 2.9115479115479115, + "grad_norm": 0.9897274256876722, + "learning_rate": 2.4522084738079933e-05, + "loss": 0.1756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1986805945634842, + "step": 1185, + "valid_targets_mean": 953.1, + "valid_targets_min": 694 + }, + { + "epoch": 2.923832923832924, + "grad_norm": 1.07572825455023, + "learning_rate": 2.4382678801581762e-05, + "loss": 0.1729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16371658444404602, + "step": 1190, + "valid_targets_mean": 887.8, + "valid_targets_min": 633 + }, + { + "epoch": 2.9361179361179364, + "grad_norm": 0.8911223010515249, + "learning_rate": 2.4243048829277916e-05, + "loss": 0.1724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1594182252883911, + "step": 1195, + "valid_targets_mean": 882.2, + "valid_targets_min": 615 + }, + { + "epoch": 2.9484029484029484, + "grad_norm": 0.8992271888116871, + "learning_rate": 2.410320195883802e-05, + "loss": 0.1677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17592017352581024, + "step": 1200, + "valid_targets_mean": 993.3, + "valid_targets_min": 576 + }, + { + "epoch": 2.960687960687961, + "grad_norm": 0.8882940826799905, + "learning_rate": 2.396314533901918e-05, + "loss": 0.1748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16428948938846588, + "step": 1205, + "valid_targets_mean": 894.0, + "valid_targets_min": 605 + }, + { + "epoch": 2.972972972972973, + "grad_norm": 1.0619171838658064, + "learning_rate": 2.3822886129300603e-05, + "loss": 0.1776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1795843243598938, + "step": 1210, + "valid_targets_mean": 995.9, + "valid_targets_min": 758 + }, + { + "epoch": 2.9852579852579852, + "grad_norm": 1.1545333955599706, + "learning_rate": 2.368243149951755e-05, + "loss": 0.1751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16789449751377106, + "step": 1215, + "valid_targets_mean": 888.7, + "valid_targets_min": 623 + }, + { + "epoch": 2.9975429975429977, + "grad_norm": 0.8908167630467976, + "learning_rate": 2.3541788629494865e-05, + "loss": 0.1728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16199368238449097, + "step": 1220, + "valid_targets_mean": 908.7, + "valid_targets_min": 635 + }, + { + "epoch": 3.0098280098280097, + "grad_norm": 0.8444984262032558, + "learning_rate": 2.3400964708679944e-05, + "loss": 0.1475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1309882402420044, + "step": 1225, + "valid_targets_mean": 924.3, + "valid_targets_min": 651 + }, + { + "epoch": 3.022113022113022, + "grad_norm": 1.1132820322990213, + "learning_rate": 2.325996693577522e-05, + "loss": 0.1452, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1460825353860855, + "step": 1230, + "valid_targets_mean": 1024.1, + "valid_targets_min": 681 + }, + { + "epoch": 3.0343980343980346, + "grad_norm": 1.0882996142948094, + "learning_rate": 2.311880251837019e-05, + "loss": 0.1442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1443278044462204, + "step": 1235, + "valid_targets_mean": 955.6, + "valid_targets_min": 696 + }, + { + "epoch": 3.0466830466830466, + "grad_norm": 1.0386792573778023, + "learning_rate": 2.2977478672572933e-05, + "loss": 0.1461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14893858134746552, + "step": 1240, + "valid_targets_mean": 1041.4, + "valid_targets_min": 666 + }, + { + "epoch": 3.058968058968059, + "grad_norm": 0.9689321094652921, + "learning_rate": 2.2836002622641297e-05, + "loss": 0.143, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14730066061019897, + "step": 1245, + "valid_targets_mean": 1079.1, + "valid_targets_min": 695 + }, + { + "epoch": 3.0712530712530715, + "grad_norm": 0.9602934336334937, + "learning_rate": 2.269438160061354e-05, + "loss": 0.1442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1352572739124298, + "step": 1250, + "valid_targets_mean": 920.0, + "valid_targets_min": 676 + }, + { + "epoch": 3.0835380835380835, + "grad_norm": 0.9885414365472923, + "learning_rate": 2.2552622845938698e-05, + "loss": 0.1396, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12953315675258636, + "step": 1255, + "valid_targets_mean": 876.9, + "valid_targets_min": 679 + }, + { + "epoch": 3.095823095823096, + "grad_norm": 1.0771727969197218, + "learning_rate": 2.2410733605106462e-05, + "loss": 0.1497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1568758636713028, + "step": 1260, + "valid_targets_mean": 850.2, + "valid_targets_min": 604 + }, + { + "epoch": 3.108108108108108, + "grad_norm": 1.1491068715024852, + "learning_rate": 2.2268721131276805e-05, + "loss": 0.1437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15370234847068787, + "step": 1265, + "valid_targets_mean": 977.6, + "valid_targets_min": 693 + }, + { + "epoch": 3.1203931203931203, + "grad_norm": 0.968706151633579, + "learning_rate": 2.2126592683909154e-05, + "loss": 0.1419, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1496562510728836, + "step": 1270, + "valid_targets_mean": 955.6, + "valid_targets_min": 614 + }, + { + "epoch": 3.1326781326781328, + "grad_norm": 1.0277324102635916, + "learning_rate": 2.1984355528391342e-05, + "loss": 0.1436, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1483316421508789, + "step": 1275, + "valid_targets_mean": 938.6, + "valid_targets_min": 598 + }, + { + "epoch": 3.1449631449631448, + "grad_norm": 0.987683127269443, + "learning_rate": 2.1842016935668188e-05, + "loss": 0.1445, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15175923705101013, + "step": 1280, + "valid_targets_mean": 1015.1, + "valid_targets_min": 746 + }, + { + "epoch": 3.157248157248157, + "grad_norm": 0.9390509636386285, + "learning_rate": 2.169958418186982e-05, + "loss": 0.153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1446429193019867, + "step": 1285, + "valid_targets_mean": 984.0, + "valid_targets_min": 713 + }, + { + "epoch": 3.1695331695331697, + "grad_norm": 1.0394272751093805, + "learning_rate": 2.1557064547939754e-05, + "loss": 0.1471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13773059844970703, + "step": 1290, + "valid_targets_mean": 1005.0, + "valid_targets_min": 736 + }, + { + "epoch": 3.1818181818181817, + "grad_norm": 0.9407221598412456, + "learning_rate": 2.1414465319262666e-05, + "loss": 0.1494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13250461220741272, + "step": 1295, + "valid_targets_mean": 909.2, + "valid_targets_min": 681 + }, + { + "epoch": 3.194103194103194, + "grad_norm": 1.0435380904119864, + "learning_rate": 2.1271793785291997e-05, + "loss": 0.1444, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13971221446990967, + "step": 1300, + "valid_targets_mean": 917.8, + "valid_targets_min": 602 + }, + { + "epoch": 3.2063882063882065, + "grad_norm": 0.9549420894567353, + "learning_rate": 2.1129057239177337e-05, + "loss": 0.1495, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1558254361152649, + "step": 1305, + "valid_targets_mean": 933.1, + "valid_targets_min": 732 + }, + { + "epoch": 3.2186732186732185, + "grad_norm": 0.9125235147288177, + "learning_rate": 2.0986262977391577e-05, + "loss": 0.148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14063915610313416, + "step": 1310, + "valid_targets_mean": 931.4, + "valid_targets_min": 728 + }, + { + "epoch": 3.230958230958231, + "grad_norm": 1.0365586973179586, + "learning_rate": 2.084341829935796e-05, + "loss": 0.1462, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15706020593643188, + "step": 1315, + "valid_targets_mean": 1020.5, + "valid_targets_min": 664 + }, + { + "epoch": 3.2432432432432434, + "grad_norm": 0.9664270233881987, + "learning_rate": 2.0700530507076916e-05, + "loss": 0.1392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13748334348201752, + "step": 1320, + "valid_targets_mean": 973.8, + "valid_targets_min": 616 + }, + { + "epoch": 3.2555282555282554, + "grad_norm": 0.9835787375345787, + "learning_rate": 2.0557606904752833e-05, + "loss": 0.1431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14928527176380157, + "step": 1325, + "valid_targets_mean": 914.9, + "valid_targets_min": 474 + }, + { + "epoch": 3.267813267813268, + "grad_norm": 0.9999571019320207, + "learning_rate": 2.0414654798420622e-05, + "loss": 0.1331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1445634961128235, + "step": 1330, + "valid_targets_mean": 919.4, + "valid_targets_min": 635 + }, + { + "epoch": 3.2800982800982803, + "grad_norm": 0.9402396257207996, + "learning_rate": 2.02716814955723e-05, + "loss": 0.1446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1313110738992691, + "step": 1335, + "valid_targets_mean": 927.1, + "valid_targets_min": 597 + }, + { + "epoch": 3.2923832923832923, + "grad_norm": 1.018542017945054, + "learning_rate": 2.0128694304783406e-05, + "loss": 0.1467, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1506882905960083, + "step": 1340, + "valid_targets_mean": 927.5, + "valid_targets_min": 730 + }, + { + "epoch": 3.3046683046683047, + "grad_norm": 0.9490500420853842, + "learning_rate": 1.9985700535339406e-05, + "loss": 0.1471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13769789040088654, + "step": 1345, + "valid_targets_mean": 902.7, + "valid_targets_min": 722 + }, + { + "epoch": 3.3169533169533167, + "grad_norm": 0.9838326609104752, + "learning_rate": 1.984270749686207e-05, + "loss": 0.147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12807482481002808, + "step": 1350, + "valid_targets_mean": 884.1, + "valid_targets_min": 536 + }, + { + "epoch": 3.329238329238329, + "grad_norm": 1.0387317349514822, + "learning_rate": 1.9699722498935786e-05, + "loss": 0.1445, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14848026633262634, + "step": 1355, + "valid_targets_mean": 915.0, + "valid_targets_min": 563 + }, + { + "epoch": 3.3415233415233416, + "grad_norm": 1.0308878761143394, + "learning_rate": 1.9556752850733933e-05, + "loss": 0.1429, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12545965611934662, + "step": 1360, + "valid_targets_mean": 904.6, + "valid_targets_min": 706 + }, + { + "epoch": 3.3538083538083536, + "grad_norm": 0.9764650250717721, + "learning_rate": 1.9413805860645242e-05, + "loss": 0.1515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.161744624376297, + "step": 1365, + "valid_targets_mean": 1072.9, + "valid_targets_min": 648 + }, + { + "epoch": 3.366093366093366, + "grad_norm": 0.9956928130533306, + "learning_rate": 1.9270888835900165e-05, + "loss": 0.1478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1572413146495819, + "step": 1370, + "valid_targets_mean": 958.1, + "valid_targets_min": 654 + }, + { + "epoch": 3.3783783783783785, + "grad_norm": 1.0372403351419428, + "learning_rate": 1.9128009082197417e-05, + "loss": 0.1436, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14318522810935974, + "step": 1375, + "valid_targets_mean": 882.1, + "valid_targets_min": 685 + }, + { + "epoch": 3.3906633906633905, + "grad_norm": 0.9371080430346982, + "learning_rate": 1.8985173903330428e-05, + "loss": 0.1429, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14232918620109558, + "step": 1380, + "valid_targets_mean": 987.7, + "valid_targets_min": 494 + }, + { + "epoch": 3.402948402948403, + "grad_norm": 0.9461579387456934, + "learning_rate": 1.884239060081407e-05, + "loss": 0.1399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13573700189590454, + "step": 1385, + "valid_targets_mean": 935.5, + "valid_targets_min": 733 + }, + { + "epoch": 3.4152334152334154, + "grad_norm": 1.1361411106468413, + "learning_rate": 1.869966647351135e-05, + "loss": 0.1438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1459515392780304, + "step": 1390, + "valid_targets_mean": 912.1, + "valid_targets_min": 667 + }, + { + "epoch": 3.4275184275184274, + "grad_norm": 0.928334710297314, + "learning_rate": 1.8557008817260343e-05, + "loss": 0.1447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13418202102184296, + "step": 1395, + "valid_targets_mean": 1008.3, + "valid_targets_min": 651 + }, + { + "epoch": 3.43980343980344, + "grad_norm": 0.9501584881036313, + "learning_rate": 1.8414424924501222e-05, + "loss": 0.1531, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13538123667240143, + "step": 1400, + "valid_targets_mean": 898.5, + "valid_targets_min": 666 + }, + { + "epoch": 3.4520884520884523, + "grad_norm": 0.9514020070355609, + "learning_rate": 1.827192208390347e-05, + "loss": 0.1418, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13934513926506042, + "step": 1405, + "valid_targets_mean": 957.5, + "valid_targets_min": 532 + }, + { + "epoch": 3.4643734643734643, + "grad_norm": 0.9533792191195336, + "learning_rate": 1.812950757999334e-05, + "loss": 0.1448, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13233372569084167, + "step": 1410, + "valid_targets_mean": 871.8, + "valid_targets_min": 516 + }, + { + "epoch": 3.4766584766584767, + "grad_norm": 0.9795866790310279, + "learning_rate": 1.7987188692781417e-05, + "loss": 0.1435, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14241893589496613, + "step": 1415, + "valid_targets_mean": 936.1, + "valid_targets_min": 676 + }, + { + "epoch": 3.488943488943489, + "grad_norm": 0.914661052152663, + "learning_rate": 1.784497269739052e-05, + "loss": 0.1459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1311112344264984, + "step": 1420, + "valid_targets_mean": 917.9, + "valid_targets_min": 477 + }, + { + "epoch": 3.501228501228501, + "grad_norm": 1.0403358753849694, + "learning_rate": 1.770286686368381e-05, + "loss": 0.1423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1444162130355835, + "step": 1425, + "valid_targets_mean": 925.6, + "valid_targets_min": 663 + }, + { + "epoch": 3.5135135135135136, + "grad_norm": 1.0284489507821293, + "learning_rate": 1.756087845589312e-05, + "loss": 0.1517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1424056887626648, + "step": 1430, + "valid_targets_mean": 891.7, + "valid_targets_min": 599 + }, + { + "epoch": 3.5257985257985256, + "grad_norm": 1.047707904104722, + "learning_rate": 1.7419014732247683e-05, + "loss": 0.1445, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14654859900474548, + "step": 1435, + "valid_targets_mean": 939.6, + "valid_targets_min": 736 + }, + { + "epoch": 3.538083538083538, + "grad_norm": 1.0369128036732569, + "learning_rate": 1.7277282944603047e-05, + "loss": 0.1536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15306830406188965, + "step": 1440, + "valid_targets_mean": 1005.7, + "valid_targets_min": 700 + }, + { + "epoch": 3.5503685503685505, + "grad_norm": 1.0180125385512913, + "learning_rate": 1.713569033807041e-05, + "loss": 0.1454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14173316955566406, + "step": 1445, + "valid_targets_mean": 894.1, + "valid_targets_min": 516 + }, + { + "epoch": 3.562653562653563, + "grad_norm": 0.9519346924504087, + "learning_rate": 1.6994244150646244e-05, + "loss": 0.1376, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12116233259439468, + "step": 1450, + "valid_targets_mean": 900.7, + "valid_targets_min": 618 + }, + { + "epoch": 3.574938574938575, + "grad_norm": 0.9294534036479501, + "learning_rate": 1.6852951612842278e-05, + "loss": 0.1435, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.142903134226799, + "step": 1455, + "valid_targets_mean": 1152.5, + "valid_targets_min": 720 + }, + { + "epoch": 3.5872235872235874, + "grad_norm": 0.9121434584648127, + "learning_rate": 1.671181994731595e-05, + "loss": 0.1423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14141470193862915, + "step": 1460, + "valid_targets_mean": 1007.2, + "valid_targets_min": 717 + }, + { + "epoch": 3.5995085995085994, + "grad_norm": 1.0189171300919593, + "learning_rate": 1.6570856368501108e-05, + "loss": 0.1485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16055302321910858, + "step": 1465, + "valid_targets_mean": 1005.6, + "valid_targets_min": 640 + }, + { + "epoch": 3.611793611793612, + "grad_norm": 1.0085418017413093, + "learning_rate": 1.643006808223931e-05, + "loss": 0.1411, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13635236024856567, + "step": 1470, + "valid_targets_mean": 955.1, + "valid_targets_min": 576 + }, + { + "epoch": 3.6240786240786242, + "grad_norm": 0.9875856191876736, + "learning_rate": 1.6289462285411387e-05, + "loss": 0.1389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14985120296478271, + "step": 1475, + "valid_targets_mean": 977.2, + "valid_targets_min": 754 + }, + { + "epoch": 3.6363636363636362, + "grad_norm": 1.3052844343483891, + "learning_rate": 1.614904616556962e-05, + "loss": 0.1411, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14070774614810944, + "step": 1480, + "valid_targets_mean": 922.2, + "valid_targets_min": 617 + }, + { + "epoch": 3.6486486486486487, + "grad_norm": 0.967691777275294, + "learning_rate": 1.6008826900570294e-05, + "loss": 0.1455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13695186376571655, + "step": 1485, + "valid_targets_mean": 891.7, + "valid_targets_min": 656 + }, + { + "epoch": 3.6609336609336607, + "grad_norm": 0.9944529130674382, + "learning_rate": 1.586881165820675e-05, + "loss": 0.1482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14366337656974792, + "step": 1490, + "valid_targets_mean": 875.6, + "valid_targets_min": 621 + }, + { + "epoch": 3.673218673218673, + "grad_norm": 0.9510017148787837, + "learning_rate": 1.5729007595843037e-05, + "loss": 0.1454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13302364945411682, + "step": 1495, + "valid_targets_mean": 931.1, + "valid_targets_min": 682 + }, + { + "epoch": 3.6855036855036856, + "grad_norm": 1.0079869124770597, + "learning_rate": 1.5589421860047986e-05, + "loss": 0.15, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1522165983915329, + "step": 1500, + "valid_targets_mean": 1022.2, + "valid_targets_min": 697 + }, + { + "epoch": 3.697788697788698, + "grad_norm": 0.9714028705719118, + "learning_rate": 1.5450061586229903e-05, + "loss": 0.144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14417463541030884, + "step": 1505, + "valid_targets_mean": 945.3, + "valid_targets_min": 638 + }, + { + "epoch": 3.71007371007371, + "grad_norm": 0.9579562277963518, + "learning_rate": 1.5310933898271864e-05, + "loss": 0.1443, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14955665171146393, + "step": 1510, + "valid_targets_mean": 980.5, + "valid_targets_min": 627 + }, + { + "epoch": 3.7223587223587224, + "grad_norm": 0.9994122793110526, + "learning_rate": 1.5172045908167462e-05, + "loss": 0.1423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14200064539909363, + "step": 1515, + "valid_targets_mean": 949.6, + "valid_targets_min": 619 + }, + { + "epoch": 3.7346437346437344, + "grad_norm": 0.9821936979113775, + "learning_rate": 1.5033404715657344e-05, + "loss": 0.1411, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13359785079956055, + "step": 1520, + "valid_targets_mean": 900.1, + "valid_targets_min": 639 + }, + { + "epoch": 3.746928746928747, + "grad_norm": 1.0501978524899511, + "learning_rate": 1.4895017407866217e-05, + "loss": 0.1491, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14831113815307617, + "step": 1525, + "valid_targets_mean": 836.9, + "valid_targets_min": 578 + }, + { + "epoch": 3.7592137592137593, + "grad_norm": 1.0009555437478268, + "learning_rate": 1.4756891058940606e-05, + "loss": 0.1439, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1430947482585907, + "step": 1530, + "valid_targets_mean": 937.2, + "valid_targets_min": 631 + }, + { + "epoch": 3.7714987714987718, + "grad_norm": 0.9989703655858909, + "learning_rate": 1.4619032729687223e-05, + "loss": 0.1468, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1486039161682129, + "step": 1535, + "valid_targets_mean": 1009.9, + "valid_targets_min": 703 + }, + { + "epoch": 3.7837837837837838, + "grad_norm": 1.0318735775529813, + "learning_rate": 1.4481449467212004e-05, + "loss": 0.1436, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1425326019525528, + "step": 1540, + "valid_targets_mean": 845.5, + "valid_targets_min": 661 + }, + { + "epoch": 3.796068796068796, + "grad_norm": 0.9608581870874865, + "learning_rate": 1.4344148304559926e-05, + "loss": 0.1461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14720866084098816, + "step": 1545, + "valid_targets_mean": 1006.9, + "valid_targets_min": 751 + }, + { + "epoch": 3.808353808353808, + "grad_norm": 0.933587010759985, + "learning_rate": 1.4207136260355426e-05, + "loss": 0.1423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14576342701911926, + "step": 1550, + "valid_targets_mean": 1017.6, + "valid_targets_min": 662 + }, + { + "epoch": 3.8206388206388207, + "grad_norm": 0.9646457265318146, + "learning_rate": 1.4070420338443667e-05, + "loss": 0.1482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14640560746192932, + "step": 1555, + "valid_targets_mean": 961.2, + "valid_targets_min": 652 + }, + { + "epoch": 3.832923832923833, + "grad_norm": 0.9689718165593759, + "learning_rate": 1.3934007527532494e-05, + "loss": 0.1428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1452617645263672, + "step": 1560, + "valid_targets_mean": 941.9, + "valid_targets_min": 660 + }, + { + "epoch": 3.845208845208845, + "grad_norm": 0.9663490950140802, + "learning_rate": 1.3797904800835174e-05, + "loss": 0.1412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1348852664232254, + "step": 1565, + "valid_targets_mean": 898.6, + "valid_targets_min": 680 + }, + { + "epoch": 3.8574938574938575, + "grad_norm": 0.9016980830369478, + "learning_rate": 1.3662119115713968e-05, + "loss": 0.1476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12884706258773804, + "step": 1570, + "valid_targets_mean": 1025.2, + "valid_targets_min": 745 + }, + { + "epoch": 3.8697788697788695, + "grad_norm": 0.9387203419785546, + "learning_rate": 1.3526657413324427e-05, + "loss": 0.1383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13017988204956055, + "step": 1575, + "valid_targets_mean": 1044.4, + "valid_targets_min": 661 + }, + { + "epoch": 3.882063882063882, + "grad_norm": 0.9800633394989181, + "learning_rate": 1.3391526618260636e-05, + "loss": 0.1439, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14418141543865204, + "step": 1580, + "valid_targets_mean": 920.8, + "valid_targets_min": 640 + }, + { + "epoch": 3.8943488943488944, + "grad_norm": 1.0779786099105024, + "learning_rate": 1.3256733638201172e-05, + "loss": 0.1459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15096238255500793, + "step": 1585, + "valid_targets_mean": 868.6, + "valid_targets_min": 669 + }, + { + "epoch": 3.906633906633907, + "grad_norm": 1.0606136768853318, + "learning_rate": 1.3122285363556053e-05, + "loss": 0.1416, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13585801422595978, + "step": 1590, + "valid_targets_mean": 950.3, + "valid_targets_min": 552 + }, + { + "epoch": 3.918918918918919, + "grad_norm": 1.070526566734902, + "learning_rate": 1.2988188667114487e-05, + "loss": 0.1446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15988577902317047, + "step": 1595, + "valid_targets_mean": 918.4, + "valid_targets_min": 596 + }, + { + "epoch": 3.9312039312039313, + "grad_norm": 0.9951058407303669, + "learning_rate": 1.2854450403693526e-05, + "loss": 0.1445, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14960238337516785, + "step": 1600, + "valid_targets_mean": 881.3, + "valid_targets_min": 670 + }, + { + "epoch": 3.9434889434889433, + "grad_norm": 0.968596082582197, + "learning_rate": 1.272107740978769e-05, + "loss": 0.1482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14312288165092468, + "step": 1605, + "valid_targets_mean": 970.6, + "valid_targets_min": 612 + }, + { + "epoch": 3.9557739557739557, + "grad_norm": 0.9045477600182205, + "learning_rate": 1.2588076503219475e-05, + "loss": 0.1435, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1324017196893692, + "step": 1610, + "valid_targets_mean": 993.7, + "valid_targets_min": 675 + }, + { + "epoch": 3.968058968058968, + "grad_norm": 1.0064100486978322, + "learning_rate": 1.2455454482790859e-05, + "loss": 0.146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1510857343673706, + "step": 1615, + "valid_targets_mean": 898.1, + "valid_targets_min": 680 + }, + { + "epoch": 3.98034398034398, + "grad_norm": 0.9667913255808215, + "learning_rate": 1.2323218127935714e-05, + "loss": 0.1407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1537216305732727, + "step": 1620, + "valid_targets_mean": 922.4, + "valid_targets_min": 706 + }, + { + "epoch": 3.9926289926289926, + "grad_norm": 1.0925109360863716, + "learning_rate": 1.2191374198373309e-05, + "loss": 0.1473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14623141288757324, + "step": 1625, + "valid_targets_mean": 949.4, + "valid_targets_min": 604 + }, + { + "epoch": 4.004914004914005, + "grad_norm": 0.8697439145249658, + "learning_rate": 1.2059929433762734e-05, + "loss": 0.1405, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12668581306934357, + "step": 1630, + "valid_targets_mean": 1056.8, + "valid_targets_min": 710 + }, + { + "epoch": 4.017199017199017, + "grad_norm": 0.9786218062777593, + "learning_rate": 1.1928890553358352e-05, + "loss": 0.1254, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12963074445724487, + "step": 1635, + "valid_targets_mean": 1016.4, + "valid_targets_min": 678 + }, + { + "epoch": 4.0294840294840295, + "grad_norm": 1.013466228322274, + "learning_rate": 1.1798264255666387e-05, + "loss": 0.1198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13302694261074066, + "step": 1640, + "valid_targets_mean": 959.4, + "valid_targets_min": 602 + }, + { + "epoch": 4.041769041769042, + "grad_norm": 0.9500770614642219, + "learning_rate": 1.1668057218102436e-05, + "loss": 0.1206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10990549623966217, + "step": 1645, + "valid_targets_mean": 963.8, + "valid_targets_min": 597 + }, + { + "epoch": 4.054054054054054, + "grad_norm": 1.0429136985966856, + "learning_rate": 1.1538276096650175e-05, + "loss": 0.1207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12115078419446945, + "step": 1650, + "valid_targets_mean": 938.2, + "valid_targets_min": 510 + }, + { + "epoch": 4.066339066339066, + "grad_norm": 0.9248235147185616, + "learning_rate": 1.1408927525521118e-05, + "loss": 0.1253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10874859988689423, + "step": 1655, + "valid_targets_mean": 998.1, + "valid_targets_min": 646 + }, + { + "epoch": 4.078624078624078, + "grad_norm": 0.9128362154627097, + "learning_rate": 1.1280018116815438e-05, + "loss": 0.1217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11440710723400116, + "step": 1660, + "valid_targets_mean": 990.1, + "valid_targets_min": 675 + }, + { + "epoch": 4.090909090909091, + "grad_norm": 1.0210751300631842, + "learning_rate": 1.115155446018404e-05, + "loss": 0.1197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11153914034366608, + "step": 1665, + "valid_targets_mean": 869.9, + "valid_targets_min": 618 + }, + { + "epoch": 4.103194103194103, + "grad_norm": 1.088600870316487, + "learning_rate": 1.1023543122491626e-05, + "loss": 0.1209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1248767226934433, + "step": 1670, + "valid_targets_mean": 924.6, + "valid_targets_min": 619 + }, + { + "epoch": 4.115479115479116, + "grad_norm": 0.9652107721756708, + "learning_rate": 1.089599064748108e-05, + "loss": 0.1216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1146978884935379, + "step": 1675, + "valid_targets_mean": 925.7, + "valid_targets_min": 729 + }, + { + "epoch": 4.127764127764128, + "grad_norm": 0.9917060080272326, + "learning_rate": 1.0768903555438927e-05, + "loss": 0.1206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1253787726163864, + "step": 1680, + "valid_targets_mean": 950.2, + "valid_targets_min": 663 + }, + { + "epoch": 4.14004914004914, + "grad_norm": 0.9615299803477797, + "learning_rate": 1.0642288342862007e-05, + "loss": 0.1204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12344183027744293, + "step": 1685, + "valid_targets_mean": 1037.2, + "valid_targets_min": 814 + }, + { + "epoch": 4.152334152334152, + "grad_norm": 0.9793999861662677, + "learning_rate": 1.051615148212544e-05, + "loss": 0.1198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12122024595737457, + "step": 1690, + "valid_targets_mean": 1047.0, + "valid_targets_min": 673 + }, + { + "epoch": 4.164619164619165, + "grad_norm": 1.0089991272381962, + "learning_rate": 1.0390499421151706e-05, + "loss": 0.1183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11438318341970444, + "step": 1695, + "valid_targets_mean": 987.9, + "valid_targets_min": 659 + }, + { + "epoch": 4.176904176904177, + "grad_norm": 1.0434287723993414, + "learning_rate": 1.0265338583081088e-05, + "loss": 0.12, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11772973835468292, + "step": 1700, + "valid_targets_mean": 973.6, + "valid_targets_min": 644 + }, + { + "epoch": 4.1891891891891895, + "grad_norm": 1.0281938517002083, + "learning_rate": 1.0140675365943284e-05, + "loss": 0.1221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12870125472545624, + "step": 1705, + "valid_targets_mean": 933.7, + "valid_targets_min": 690 + }, + { + "epoch": 4.201474201474202, + "grad_norm": 1.5080272835779485, + "learning_rate": 1.0016516142330404e-05, + "loss": 0.1217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12202489376068115, + "step": 1710, + "valid_targets_mean": 993.0, + "valid_targets_min": 687 + }, + { + "epoch": 4.2137592137592135, + "grad_norm": 1.0757473296800761, + "learning_rate": 9.89286725907117e-06, + "loss": 0.1209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12647372484207153, + "step": 1715, + "valid_targets_mean": 965.9, + "valid_targets_min": 720 + }, + { + "epoch": 4.226044226044226, + "grad_norm": 1.0266691124569305, + "learning_rate": 9.769735036906475e-06, + "loss": 0.1209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11845774948596954, + "step": 1720, + "valid_targets_mean": 886.0, + "valid_targets_min": 670 + }, + { + "epoch": 4.238329238329238, + "grad_norm": 1.055790871475488, + "learning_rate": 9.647125770166321e-06, + "loss": 0.1211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11783865094184875, + "step": 1725, + "valid_targets_mean": 882.2, + "valid_targets_min": 582 + }, + { + "epoch": 4.250614250614251, + "grad_norm": 1.0468500852253888, + "learning_rate": 9.525045726448001e-06, + "loss": 0.1218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12612420320510864, + "step": 1730, + "valid_targets_mean": 968.2, + "valid_targets_min": 666 + }, + { + "epoch": 4.262899262899263, + "grad_norm": 0.9941585620438288, + "learning_rate": 9.40350114629577e-06, + "loss": 0.1209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12407036870718002, + "step": 1735, + "valid_targets_mean": 940.1, + "valid_targets_min": 693 + }, + { + "epoch": 4.275184275184275, + "grad_norm": 1.0030585645401304, + "learning_rate": 9.282498242881784e-06, + "loss": 0.1237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12118291109800339, + "step": 1740, + "valid_targets_mean": 975.1, + "valid_targets_min": 687 + }, + { + "epoch": 4.287469287469287, + "grad_norm": 0.9567063485416096, + "learning_rate": 9.162043201688517e-06, + "loss": 0.1226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11729326844215393, + "step": 1745, + "valid_targets_mean": 913.5, + "valid_targets_min": 620 + }, + { + "epoch": 4.2997542997543, + "grad_norm": 1.093321079367913, + "learning_rate": 9.042142180192596e-06, + "loss": 0.1216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1232294961810112, + "step": 1750, + "valid_targets_mean": 983.2, + "valid_targets_min": 712 + }, + { + "epoch": 4.312039312039312, + "grad_norm": 1.0223977302692675, + "learning_rate": 8.92280130754998e-06, + "loss": 0.1176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12798799574375153, + "step": 1755, + "valid_targets_mean": 936.3, + "valid_targets_min": 683 + }, + { + "epoch": 4.324324324324325, + "grad_norm": 0.9969902090774578, + "learning_rate": 8.804026684282694e-06, + "loss": 0.1213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1234356239438057, + "step": 1760, + "valid_targets_mean": 982.8, + "valid_targets_min": 708 + }, + { + "epoch": 4.336609336609337, + "grad_norm": 1.0513411652135027, + "learning_rate": 8.685824381966975e-06, + "loss": 0.1193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12967775762081146, + "step": 1765, + "valid_targets_mean": 944.9, + "valid_targets_min": 661 + }, + { + "epoch": 4.348894348894349, + "grad_norm": 0.9974585088572387, + "learning_rate": 8.568200442922865e-06, + "loss": 0.1194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11659836024045944, + "step": 1770, + "valid_targets_mean": 987.1, + "valid_targets_min": 624 + }, + { + "epoch": 4.361179361179361, + "grad_norm": 1.0033626517127674, + "learning_rate": 8.451160879905398e-06, + "loss": 0.1222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12519827485084534, + "step": 1775, + "valid_targets_mean": 917.5, + "valid_targets_min": 699 + }, + { + "epoch": 4.3734643734643734, + "grad_norm": 1.067385742275141, + "learning_rate": 8.33471167579717e-06, + "loss": 0.124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12871462106704712, + "step": 1780, + "valid_targets_mean": 932.5, + "valid_targets_min": 676 + }, + { + "epoch": 4.385749385749386, + "grad_norm": 1.1028517874182766, + "learning_rate": 8.218858783302566e-06, + "loss": 0.1277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12894824147224426, + "step": 1785, + "valid_targets_mean": 922.2, + "valid_targets_min": 619 + }, + { + "epoch": 4.398034398034398, + "grad_norm": 0.9694246238021301, + "learning_rate": 8.103608124643412e-06, + "loss": 0.1171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10765311866998672, + "step": 1790, + "valid_targets_mean": 901.9, + "valid_targets_min": 719 + }, + { + "epoch": 4.41031941031941, + "grad_norm": 1.0096991183773807, + "learning_rate": 7.988965591256284e-06, + "loss": 0.1263, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12528453767299652, + "step": 1795, + "valid_targets_mean": 950.7, + "valid_targets_min": 657 + }, + { + "epoch": 4.422604422604422, + "grad_norm": 0.9966981145704281, + "learning_rate": 7.874937043491331e-06, + "loss": 0.1242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12716656923294067, + "step": 1800, + "valid_targets_mean": 918.1, + "valid_targets_min": 654 + }, + { + "epoch": 4.434889434889435, + "grad_norm": 1.0272608704748118, + "learning_rate": 7.761528310312679e-06, + "loss": 0.1211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11819181591272354, + "step": 1805, + "valid_targets_mean": 931.4, + "valid_targets_min": 658 + }, + { + "epoch": 4.447174447174447, + "grad_norm": 1.0137561846111187, + "learning_rate": 7.648745189000511e-06, + "loss": 0.118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1136414185166359, + "step": 1810, + "valid_targets_mean": 912.7, + "valid_targets_min": 607 + }, + { + "epoch": 4.45945945945946, + "grad_norm": 1.0873501889956356, + "learning_rate": 7.536593444854663e-06, + "loss": 0.1216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12807413935661316, + "step": 1815, + "valid_targets_mean": 1031.0, + "valid_targets_min": 679 + }, + { + "epoch": 4.471744471744472, + "grad_norm": 1.0068476190598987, + "learning_rate": 7.4250788108999686e-06, + "loss": 0.1203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1230648085474968, + "step": 1820, + "valid_targets_mean": 974.1, + "valid_targets_min": 640 + }, + { + "epoch": 4.484029484029484, + "grad_norm": 1.0433094724916965, + "learning_rate": 7.314206987593162e-06, + "loss": 0.1256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12586304545402527, + "step": 1825, + "valid_targets_mean": 1011.7, + "valid_targets_min": 656 + }, + { + "epoch": 4.496314496314496, + "grad_norm": 0.987211648906778, + "learning_rate": 7.203983642531462e-06, + "loss": 0.1221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12281675636768341, + "step": 1830, + "valid_targets_mean": 994.2, + "valid_targets_min": 619 + }, + { + "epoch": 4.5085995085995085, + "grad_norm": 1.1029480180851556, + "learning_rate": 7.094414410162913e-06, + "loss": 0.1275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1318090856075287, + "step": 1835, + "valid_targets_mean": 1020.6, + "valid_targets_min": 687 + }, + { + "epoch": 4.520884520884521, + "grad_norm": 0.9175739238118313, + "learning_rate": 6.985504891498291e-06, + "loss": 0.1181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10475833714008331, + "step": 1840, + "valid_targets_mean": 957.6, + "valid_targets_min": 661 + }, + { + "epoch": 4.533169533169533, + "grad_norm": 0.9770170212372375, + "learning_rate": 6.8772606538248285e-06, + "loss": 0.123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11924491077661514, + "step": 1845, + "valid_targets_mean": 983.9, + "valid_targets_min": 680 + }, + { + "epoch": 4.545454545454545, + "grad_norm": 1.1013986620909682, + "learning_rate": 6.769687230421638e-06, + "loss": 0.1274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13231635093688965, + "step": 1850, + "valid_targets_mean": 905.4, + "valid_targets_min": 621 + }, + { + "epoch": 4.557739557739557, + "grad_norm": 1.0528293798177313, + "learning_rate": 6.662790120276803e-06, + "loss": 0.1267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12604084610939026, + "step": 1855, + "valid_targets_mean": 923.2, + "valid_targets_min": 673 + }, + { + "epoch": 4.57002457002457, + "grad_norm": 1.1158231290547191, + "learning_rate": 6.556574787806344e-06, + "loss": 0.123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13169091939926147, + "step": 1860, + "valid_targets_mean": 849.7, + "valid_targets_min": 578 + }, + { + "epoch": 4.582309582309582, + "grad_norm": 1.1024933402787964, + "learning_rate": 6.451046662574831e-06, + "loss": 0.1215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12675680220127106, + "step": 1865, + "valid_targets_mean": 947.2, + "valid_targets_min": 640 + }, + { + "epoch": 4.594594594594595, + "grad_norm": 1.0812034920547042, + "learning_rate": 6.346211139017877e-06, + "loss": 0.1233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12591159343719482, + "step": 1870, + "valid_targets_mean": 866.4, + "valid_targets_min": 645 + }, + { + "epoch": 4.606879606879607, + "grad_norm": 1.0597747711467687, + "learning_rate": 6.242073576166337e-06, + "loss": 0.1206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11885634064674377, + "step": 1875, + "valid_targets_mean": 910.1, + "valid_targets_min": 697 + }, + { + "epoch": 4.61916461916462, + "grad_norm": 1.064538896175725, + "learning_rate": 6.138639297372404e-06, + "loss": 0.119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12688076496124268, + "step": 1880, + "valid_targets_mean": 991.1, + "valid_targets_min": 703 + }, + { + "epoch": 4.631449631449631, + "grad_norm": 1.0405836028766522, + "learning_rate": 6.035913590037479e-06, + "loss": 0.1207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12629568576812744, + "step": 1885, + "valid_targets_mean": 906.2, + "valid_targets_min": 666 + }, + { + "epoch": 4.643734643734644, + "grad_norm": 1.0590657189064507, + "learning_rate": 5.933901705341851e-06, + "loss": 0.1235, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12081333994865417, + "step": 1890, + "valid_targets_mean": 846.7, + "valid_targets_min": 582 + }, + { + "epoch": 4.656019656019656, + "grad_norm": 0.9685387980316024, + "learning_rate": 5.832608857976321e-06, + "loss": 0.1188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12511588633060455, + "step": 1895, + "valid_targets_mean": 1002.4, + "valid_targets_min": 604 + }, + { + "epoch": 4.6683046683046685, + "grad_norm": 1.0165029943633133, + "learning_rate": 5.732040225875584e-06, + "loss": 0.1228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12953898310661316, + "step": 1900, + "valid_targets_mean": 964.8, + "valid_targets_min": 683 + }, + { + "epoch": 4.680589680589681, + "grad_norm": 1.3325503122751237, + "learning_rate": 5.632200949953579e-06, + "loss": 0.1202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11685385555028915, + "step": 1905, + "valid_targets_mean": 896.8, + "valid_targets_min": 508 + }, + { + "epoch": 4.6928746928746925, + "grad_norm": 1.0663672971922626, + "learning_rate": 5.533096133840677e-06, + "loss": 0.1197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11813405156135559, + "step": 1910, + "valid_targets_mean": 959.2, + "valid_targets_min": 624 + }, + { + "epoch": 4.705159705159705, + "grad_norm": 1.0349308880833799, + "learning_rate": 5.434730843622778e-06, + "loss": 0.1143, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10479003936052322, + "step": 1915, + "valid_targets_mean": 1024.8, + "valid_targets_min": 724 + }, + { + "epoch": 4.717444717444717, + "grad_norm": 1.0757884754145068, + "learning_rate": 5.337110107582377e-06, + "loss": 0.1197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12124276906251907, + "step": 1920, + "valid_targets_mean": 1009.4, + "valid_targets_min": 671 + }, + { + "epoch": 4.72972972972973, + "grad_norm": 0.9995886752697994, + "learning_rate": 5.2402389159414755e-06, + "loss": 0.1222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1158638596534729, + "step": 1925, + "valid_targets_mean": 925.9, + "valid_targets_min": 631 + }, + { + "epoch": 4.742014742014742, + "grad_norm": 1.0223521571398488, + "learning_rate": 5.144122220606542e-06, + "loss": 0.1169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11703236401081085, + "step": 1930, + "valid_targets_mean": 984.0, + "valid_targets_min": 702 + }, + { + "epoch": 4.754299754299755, + "grad_norm": 1.0404694060373552, + "learning_rate": 5.048764934915349e-06, + "loss": 0.1207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11991409212350845, + "step": 1935, + "valid_targets_mean": 949.8, + "valid_targets_min": 690 + }, + { + "epoch": 4.766584766584766, + "grad_norm": 0.9380080260291878, + "learning_rate": 4.954171933385805e-06, + "loss": 0.1173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11270684748888016, + "step": 1940, + "valid_targets_mean": 948.9, + "valid_targets_min": 664 + }, + { + "epoch": 4.778869778869779, + "grad_norm": 1.0832637396880282, + "learning_rate": 4.8603480514667836e-06, + "loss": 0.1246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11928368359804153, + "step": 1945, + "valid_targets_mean": 1022.1, + "valid_targets_min": 725 + }, + { + "epoch": 4.791154791154791, + "grad_norm": 1.0768971031975272, + "learning_rate": 4.767298085290963e-06, + "loss": 0.1237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1263669729232788, + "step": 1950, + "valid_targets_mean": 897.8, + "valid_targets_min": 697 + }, + { + "epoch": 4.803439803439804, + "grad_norm": 0.9702737774903263, + "learning_rate": 4.675026791429624e-06, + "loss": 0.1241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12418300658464432, + "step": 1955, + "valid_targets_mean": 904.6, + "valid_targets_min": 618 + }, + { + "epoch": 4.815724815724816, + "grad_norm": 0.9847296992516871, + "learning_rate": 4.583538886649525e-06, + "loss": 0.1193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11021877080202103, + "step": 1960, + "valid_targets_mean": 935.5, + "valid_targets_min": 652 + }, + { + "epoch": 4.828009828009828, + "grad_norm": 1.063535643945527, + "learning_rate": 4.492839047671764e-06, + "loss": 0.1285, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13300850987434387, + "step": 1965, + "valid_targets_mean": 922.7, + "valid_targets_min": 654 + }, + { + "epoch": 4.84029484029484, + "grad_norm": 1.0210663343814104, + "learning_rate": 4.4029319109327465e-06, + "loss": 0.1196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11711201071739197, + "step": 1970, + "valid_targets_mean": 880.4, + "valid_targets_min": 569 + }, + { + "epoch": 4.8525798525798525, + "grad_norm": 0.9712468912862798, + "learning_rate": 4.313822072347136e-06, + "loss": 0.119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12143044173717499, + "step": 1975, + "valid_targets_mean": 999.6, + "valid_targets_min": 744 + }, + { + "epoch": 4.864864864864865, + "grad_norm": 1.0590420720026885, + "learning_rate": 4.22551408707296e-06, + "loss": 0.1178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11938400566577911, + "step": 1980, + "valid_targets_mean": 953.6, + "valid_targets_min": 608 + }, + { + "epoch": 4.877149877149877, + "grad_norm": 0.9655040975370661, + "learning_rate": 4.138012469278714e-06, + "loss": 0.118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11308306455612183, + "step": 1985, + "valid_targets_mean": 985.0, + "valid_targets_min": 675 + }, + { + "epoch": 4.88943488943489, + "grad_norm": 0.9853903900388323, + "learning_rate": 4.051321691912649e-06, + "loss": 0.1241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11850358545780182, + "step": 1990, + "valid_targets_mean": 927.3, + "valid_targets_min": 666 + }, + { + "epoch": 4.901719901719901, + "grad_norm": 1.136610862996903, + "learning_rate": 3.9654461864740935e-06, + "loss": 0.1209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11766823381185532, + "step": 1995, + "valid_targets_mean": 1034.9, + "valid_targets_min": 652 + }, + { + "epoch": 4.914004914004914, + "grad_norm": 1.0345672304711442, + "learning_rate": 3.880390342786915e-06, + "loss": 0.1182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12373542040586472, + "step": 2000, + "valid_targets_mean": 928.6, + "valid_targets_min": 683 + }, + { + "epoch": 4.926289926289926, + "grad_norm": 0.9310707915085795, + "learning_rate": 3.7961585087751516e-06, + "loss": 0.1217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10688027739524841, + "step": 2005, + "valid_targets_mean": 1003.1, + "valid_targets_min": 639 + }, + { + "epoch": 4.938574938574939, + "grad_norm": 1.0010633378479221, + "learning_rate": 3.71275499024071e-06, + "loss": 0.1202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12126513570547104, + "step": 2010, + "valid_targets_mean": 929.6, + "valid_targets_min": 700 + }, + { + "epoch": 4.950859950859951, + "grad_norm": 0.998842822620143, + "learning_rate": 3.6301840506433083e-06, + "loss": 0.1173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11304894089698792, + "step": 2015, + "valid_targets_mean": 968.3, + "valid_targets_min": 672 + }, + { + "epoch": 4.963144963144963, + "grad_norm": 1.0608010526163005, + "learning_rate": 3.5484499108824853e-06, + "loss": 0.1193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12789671123027802, + "step": 2020, + "valid_targets_mean": 950.9, + "valid_targets_min": 662 + }, + { + "epoch": 4.975429975429975, + "grad_norm": 1.1814620072009632, + "learning_rate": 3.4675567490818727e-06, + "loss": 0.1284, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13000145554542542, + "step": 2025, + "valid_targets_mean": 998.1, + "valid_targets_min": 600 + }, + { + "epoch": 4.987714987714988, + "grad_norm": 0.9757619779263266, + "learning_rate": 3.3875087003756036e-06, + "loss": 0.1197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12253376841545105, + "step": 2030, + "valid_targets_mean": 1016.6, + "valid_targets_min": 642 + }, + { + "epoch": 5.0, + "grad_norm": 0.985373116622916, + "learning_rate": 3.30830985669691e-06, + "loss": 0.1194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10901984572410583, + "step": 2035, + "valid_targets_mean": 903.1, + "valid_targets_min": 603 + }, + { + "epoch": 5.012285012285012, + "grad_norm": 0.898249950933453, + "learning_rate": 3.22996426656899e-06, + "loss": 0.1119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1041560173034668, + "step": 2040, + "valid_targets_mean": 1034.1, + "valid_targets_min": 649 + }, + { + "epoch": 5.024570024570025, + "grad_norm": 0.9657629403750108, + "learning_rate": 3.1524759348980096e-06, + "loss": 0.1107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10987867414951324, + "step": 2045, + "valid_targets_mean": 975.7, + "valid_targets_min": 668 + }, + { + "epoch": 5.036855036855036, + "grad_norm": 0.9916984065270475, + "learning_rate": 3.0758488227684212e-06, + "loss": 0.109, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11240162700414658, + "step": 2050, + "valid_targets_mean": 850.5, + "valid_targets_min": 546 + }, + { + "epoch": 5.049140049140049, + "grad_norm": 1.0292386491980945, + "learning_rate": 3.0000868472404423e-06, + "loss": 0.1125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10477884113788605, + "step": 2055, + "valid_targets_mean": 900.2, + "valid_targets_min": 666 + }, + { + "epoch": 5.061425061425061, + "grad_norm": 1.0337177621260107, + "learning_rate": 2.9251938811498436e-06, + "loss": 0.1087, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10738644003868103, + "step": 2060, + "valid_targets_mean": 907.1, + "valid_targets_min": 571 + }, + { + "epoch": 5.073710073710074, + "grad_norm": 0.9841850019109009, + "learning_rate": 2.8511737529099704e-06, + "loss": 0.1096, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1120748221874237, + "step": 2065, + "valid_targets_mean": 992.1, + "valid_targets_min": 696 + }, + { + "epoch": 5.085995085995086, + "grad_norm": 1.0628658129689879, + "learning_rate": 2.7780302463160235e-06, + "loss": 0.1099, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1170399859547615, + "step": 2070, + "valid_targets_mean": 849.2, + "valid_targets_min": 707 + }, + { + "epoch": 5.098280098280099, + "grad_norm": 1.080297138638144, + "learning_rate": 2.705767100351673e-06, + "loss": 0.1091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10279746353626251, + "step": 2075, + "valid_targets_mean": 891.6, + "valid_targets_min": 564 + }, + { + "epoch": 5.11056511056511, + "grad_norm": 1.0571562491669322, + "learning_rate": 2.634388008997899e-06, + "loss": 0.1101, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11071351170539856, + "step": 2080, + "valid_targets_mean": 887.2, + "valid_targets_min": 658 + }, + { + "epoch": 5.122850122850123, + "grad_norm": 0.9638229013862132, + "learning_rate": 2.5638966210441597e-06, + "loss": 0.1051, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1005311831831932, + "step": 2085, + "valid_targets_mean": 962.9, + "valid_targets_min": 680 + }, + { + "epoch": 5.135135135135135, + "grad_norm": 1.0067721680943875, + "learning_rate": 2.4942965399018926e-06, + "loss": 0.1099, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11731775104999542, + "step": 2090, + "valid_targets_mean": 978.4, + "valid_targets_min": 477 + }, + { + "epoch": 5.1474201474201475, + "grad_norm": 1.0328796742285484, + "learning_rate": 2.425591323420289e-06, + "loss": 0.1102, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1096126064658165, + "step": 2095, + "valid_targets_mean": 954.4, + "valid_targets_min": 725 + }, + { + "epoch": 5.15970515970516, + "grad_norm": 1.0933867172427512, + "learning_rate": 2.357784483704444e-06, + "loss": 0.1084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11263054609298706, + "step": 2100, + "valid_targets_mean": 874.9, + "valid_targets_min": 618 + }, + { + "epoch": 5.171990171990172, + "grad_norm": 1.0757324206967882, + "learning_rate": 2.2908794869358044e-06, + "loss": 0.1069, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10877275466918945, + "step": 2105, + "valid_targets_mean": 894.8, + "valid_targets_min": 621 + }, + { + "epoch": 5.184275184275184, + "grad_norm": 1.016487012616333, + "learning_rate": 2.2248797531949952e-06, + "loss": 0.1103, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10262925177812576, + "step": 2110, + "valid_targets_mean": 939.4, + "valid_targets_min": 600 + }, + { + "epoch": 5.196560196560196, + "grad_norm": 1.0146263893123428, + "learning_rate": 2.1597886562869917e-06, + "loss": 0.1093, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1074049323797226, + "step": 2115, + "valid_targets_mean": 925.9, + "valid_targets_min": 609 + }, + { + "epoch": 5.208845208845209, + "grad_norm": 1.0056972893991754, + "learning_rate": 2.095609523568638e-06, + "loss": 0.1085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10091559588909149, + "step": 2120, + "valid_targets_mean": 1022.2, + "valid_targets_min": 725 + }, + { + "epoch": 5.221130221130221, + "grad_norm": 1.0358191262515057, + "learning_rate": 2.0323456357785855e-06, + "loss": 0.109, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1039903312921524, + "step": 2125, + "valid_targets_mean": 951.8, + "valid_targets_min": 670 + }, + { + "epoch": 5.233415233415234, + "grad_norm": 1.0134901577637836, + "learning_rate": 1.970000226869553e-06, + "loss": 0.1063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10696413367986679, + "step": 2130, + "valid_targets_mean": 885.4, + "valid_targets_min": 675 + }, + { + "epoch": 5.245700245700245, + "grad_norm": 1.024498628158875, + "learning_rate": 1.90857648384305e-06, + "loss": 0.106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10174258053302765, + "step": 2135, + "valid_targets_mean": 937.8, + "valid_targets_min": 695 + }, + { + "epoch": 5.257985257985258, + "grad_norm": 1.0778598519574534, + "learning_rate": 1.848077546586431e-06, + "loss": 0.1066, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10440458357334137, + "step": 2140, + "valid_targets_mean": 919.8, + "valid_targets_min": 585 + }, + { + "epoch": 5.27027027027027, + "grad_norm": 1.0810321423664624, + "learning_rate": 1.7885065077123976e-06, + "loss": 0.1086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1029202789068222, + "step": 2145, + "valid_targets_mean": 937.9, + "valid_targets_min": 652 + }, + { + "epoch": 5.282555282555283, + "grad_norm": 1.0163320736086876, + "learning_rate": 1.7298664124009245e-06, + "loss": 0.1119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10604019463062286, + "step": 2150, + "valid_targets_mean": 922.1, + "valid_targets_min": 651 + }, + { + "epoch": 5.294840294840295, + "grad_norm": 1.0436926280950187, + "learning_rate": 1.672160258243567e-06, + "loss": 0.1081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10767054557800293, + "step": 2155, + "valid_targets_mean": 910.1, + "valid_targets_min": 577 + }, + { + "epoch": 5.3071253071253075, + "grad_norm": 1.025380052621418, + "learning_rate": 1.615390995090258e-06, + "loss": 0.1071, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11412075161933899, + "step": 2160, + "valid_targets_mean": 907.0, + "valid_targets_min": 694 + }, + { + "epoch": 5.319410319410319, + "grad_norm": 1.06066295062257, + "learning_rate": 1.559561524898492e-06, + "loss": 0.1094, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10931138694286346, + "step": 2165, + "valid_targets_mean": 950.4, + "valid_targets_min": 697 + }, + { + "epoch": 5.3316953316953315, + "grad_norm": 1.0290583672325067, + "learning_rate": 1.5046747015849893e-06, + "loss": 0.1078, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1094687283039093, + "step": 2170, + "valid_targets_mean": 1013.2, + "valid_targets_min": 730 + }, + { + "epoch": 5.343980343980344, + "grad_norm": 0.9748950643645085, + "learning_rate": 1.4507333308798255e-06, + "loss": 0.1126, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09912136197090149, + "step": 2175, + "valid_targets_mean": 875.9, + "valid_targets_min": 604 + }, + { + "epoch": 5.356265356265356, + "grad_norm": 0.99285365978212, + "learning_rate": 1.3977401701829752e-06, + "loss": 0.1001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09728066623210907, + "step": 2180, + "valid_targets_mean": 969.6, + "valid_targets_min": 669 + }, + { + "epoch": 5.368550368550369, + "grad_norm": 0.9843852113753192, + "learning_rate": 1.345697928423384e-06, + "loss": 0.108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10213703662157059, + "step": 2185, + "valid_targets_mean": 1037.0, + "valid_targets_min": 753 + }, + { + "epoch": 5.38083538083538, + "grad_norm": 1.1134497136798602, + "learning_rate": 1.2946092659204767e-06, + "loss": 0.1105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11370056867599487, + "step": 2190, + "valid_targets_mean": 860.1, + "valid_targets_min": 626 + }, + { + "epoch": 5.393120393120393, + "grad_norm": 1.0604549866907864, + "learning_rate": 1.244476794248175e-06, + "loss": 0.1072, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10983552038669586, + "step": 2195, + "valid_targets_mean": 963.8, + "valid_targets_min": 690 + }, + { + "epoch": 5.405405405405405, + "grad_norm": 1.0695198729391404, + "learning_rate": 1.1953030761014017e-06, + "loss": 0.1056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1127047911286354, + "step": 2200, + "valid_targets_mean": 917.5, + "valid_targets_min": 587 + }, + { + "epoch": 5.417690417690418, + "grad_norm": 1.0678405165116567, + "learning_rate": 1.147090625165055e-06, + "loss": 0.1047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.106284499168396, + "step": 2205, + "valid_targets_mean": 926.1, + "valid_targets_min": 650 + }, + { + "epoch": 5.42997542997543, + "grad_norm": 0.9735583312512783, + "learning_rate": 1.0998419059855503e-06, + "loss": 0.11, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09887248277664185, + "step": 2210, + "valid_targets_mean": 907.0, + "valid_targets_min": 693 + }, + { + "epoch": 5.442260442260443, + "grad_norm": 1.0424786244158735, + "learning_rate": 1.053559333844798e-06, + "loss": 0.1078, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1106153056025505, + "step": 2215, + "valid_targets_mean": 814.6, + "valid_targets_min": 619 + }, + { + "epoch": 5.454545454545454, + "grad_norm": 1.0499265026728355, + "learning_rate": 1.0082452746367721e-06, + "loss": 0.1052, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1073426604270935, + "step": 2220, + "valid_targets_mean": 964.6, + "valid_targets_min": 695 + }, + { + "epoch": 5.466830466830467, + "grad_norm": 0.9821498811128075, + "learning_rate": 9.639020447465475e-07, + "loss": 0.105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10381324589252472, + "step": 2225, + "valid_targets_mean": 964.9, + "valid_targets_min": 705 + }, + { + "epoch": 5.479115479115479, + "grad_norm": 1.0016907393710126, + "learning_rate": 9.205319109318922e-07, + "loss": 0.1045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10114198923110962, + "step": 2230, + "valid_targets_mean": 944.8, + "valid_targets_min": 682 + }, + { + "epoch": 5.4914004914004915, + "grad_norm": 0.9641568696889248, + "learning_rate": 8.781370902074049e-07, + "loss": 0.104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10037557780742645, + "step": 2235, + "valid_targets_mean": 1001.0, + "valid_targets_min": 647 + }, + { + "epoch": 5.503685503685504, + "grad_norm": 0.9594456923036004, + "learning_rate": 8.367197497311719e-07, + "loss": 0.1046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1010562852025032, + "step": 2240, + "valid_targets_mean": 1058.5, + "valid_targets_min": 723 + }, + { + "epoch": 5.515970515970516, + "grad_norm": 1.0378146778929243, + "learning_rate": 7.962820066939958e-07, + "loss": 0.1104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10684297233819962, + "step": 2245, + "valid_targets_mean": 934.9, + "valid_targets_min": 651 + }, + { + "epoch": 5.528255528255528, + "grad_norm": 1.0414454051454436, + "learning_rate": 7.568259282111645e-07, + "loss": 0.1079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11059882491827011, + "step": 2250, + "valid_targets_mean": 951.8, + "valid_targets_min": 525 + }, + { + "epoch": 5.54054054054054, + "grad_norm": 1.0679202374122483, + "learning_rate": 7.183535312167755e-07, + "loss": 0.1132, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11686760187149048, + "step": 2255, + "valid_targets_mean": 963.2, + "valid_targets_min": 711 + }, + { + "epoch": 5.552825552825553, + "grad_norm": 0.9421516676878753, + "learning_rate": 6.808667823606474e-07, + "loss": 0.1073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09937810152769089, + "step": 2260, + "valid_targets_mean": 953.2, + "valid_targets_min": 633 + }, + { + "epoch": 5.565110565110565, + "grad_norm": 1.1326899561762167, + "learning_rate": 6.443675979077779e-07, + "loss": 0.1055, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11326974630355835, + "step": 2265, + "valid_targets_mean": 913.2, + "valid_targets_min": 648 + }, + { + "epoch": 5.577395577395578, + "grad_norm": 1.0305503917388301, + "learning_rate": 6.088578436403847e-07, + "loss": 0.1047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10154630988836288, + "step": 2270, + "valid_targets_mean": 897.8, + "valid_targets_min": 722 + }, + { + "epoch": 5.58968058968059, + "grad_norm": 1.1943199506211184, + "learning_rate": 5.743393347625436e-07, + "loss": 0.1077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11854109913110733, + "step": 2275, + "valid_targets_mean": 1039.3, + "valid_targets_min": 682 + }, + { + "epoch": 5.601965601965602, + "grad_norm": 0.9707605132234587, + "learning_rate": 5.408138358073833e-07, + "loss": 0.1068, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10274074226617813, + "step": 2280, + "valid_targets_mean": 1006.6, + "valid_targets_min": 651 + }, + { + "epoch": 5.614250614250614, + "grad_norm": 1.0265651098429756, + "learning_rate": 5.082830605468969e-07, + "loss": 0.1106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10965031385421753, + "step": 2285, + "valid_targets_mean": 1016.5, + "valid_targets_min": 621 + }, + { + "epoch": 5.6265356265356266, + "grad_norm": 0.973625595592468, + "learning_rate": 4.767486719043235e-07, + "loss": 0.1111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11382775008678436, + "step": 2290, + "valid_targets_mean": 1066.2, + "valid_targets_min": 618 + }, + { + "epoch": 5.638820638820639, + "grad_norm": 1.0117380057721537, + "learning_rate": 4.4621228186915833e-07, + "loss": 0.1048, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10289210826158524, + "step": 2295, + "valid_targets_mean": 1000.9, + "valid_targets_min": 663 + }, + { + "epoch": 5.651105651105651, + "grad_norm": 1.0474838253089391, + "learning_rate": 4.166754514147275e-07, + "loss": 0.1118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10855976492166519, + "step": 2300, + "valid_targets_mean": 955.1, + "valid_targets_min": 694 + }, + { + "epoch": 5.663390663390663, + "grad_norm": 0.98024038477301, + "learning_rate": 3.881396904184231e-07, + "loss": 0.1077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11711513996124268, + "step": 2305, + "valid_targets_mean": 1028.2, + "valid_targets_min": 682 + }, + { + "epoch": 5.675675675675675, + "grad_norm": 0.9692492106629949, + "learning_rate": 3.6060645758449584e-07, + "loss": 0.1034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09136329591274261, + "step": 2310, + "valid_targets_mean": 895.3, + "valid_targets_min": 729 + }, + { + "epoch": 5.687960687960688, + "grad_norm": 1.249363779180284, + "learning_rate": 3.34077160369497e-07, + "loss": 0.1087, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11524377018213272, + "step": 2315, + "valid_targets_mean": 924.3, + "valid_targets_min": 648 + }, + { + "epoch": 5.7002457002457, + "grad_norm": 0.9650264170829322, + "learning_rate": 3.08553154910336e-07, + "loss": 0.1086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10062923282384872, + "step": 2320, + "valid_targets_mean": 936.2, + "valid_targets_min": 697 + }, + { + "epoch": 5.712530712530713, + "grad_norm": 1.0905576626909141, + "learning_rate": 2.840357459549492e-07, + "loss": 0.1048, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11498048901557922, + "step": 2325, + "valid_targets_mean": 945.2, + "valid_targets_min": 673 + }, + { + "epoch": 5.724815724815725, + "grad_norm": 1.0784679939229243, + "learning_rate": 2.6052618679560884e-07, + "loss": 0.1076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1087181344628334, + "step": 2330, + "valid_targets_mean": 878.7, + "valid_targets_min": 586 + }, + { + "epoch": 5.737100737100737, + "grad_norm": 1.109625440357797, + "learning_rate": 2.380256792048541e-07, + "loss": 0.1045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10212244093418121, + "step": 2335, + "valid_targets_mean": 857.2, + "valid_targets_min": 650 + }, + { + "epoch": 5.749385749385749, + "grad_norm": 1.1038910625984446, + "learning_rate": 2.1653537337405383e-07, + "loss": 0.1057, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10803759843111038, + "step": 2340, + "valid_targets_mean": 878.2, + "valid_targets_min": 683 + }, + { + "epoch": 5.761670761670762, + "grad_norm": 0.9881133477924188, + "learning_rate": 1.9605636785462234e-07, + "loss": 0.1046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10298296809196472, + "step": 2345, + "valid_targets_mean": 1027.2, + "valid_targets_min": 647 + }, + { + "epoch": 5.773955773955774, + "grad_norm": 1.0253155264535176, + "learning_rate": 1.7658970950185095e-07, + "loss": 0.1046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10427086055278778, + "step": 2350, + "valid_targets_mean": 961.8, + "valid_targets_min": 627 + }, + { + "epoch": 5.7862407862407865, + "grad_norm": 1.0906096465506647, + "learning_rate": 1.5813639342140197e-07, + "loss": 0.1038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10633835196495056, + "step": 2355, + "valid_targets_mean": 1000.7, + "valid_targets_min": 559 + }, + { + "epoch": 5.798525798525798, + "grad_norm": 1.8182311216583416, + "learning_rate": 1.4069736291843605e-07, + "loss": 0.1071, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10948808491230011, + "step": 2360, + "valid_targets_mean": 982.2, + "valid_targets_min": 638 + }, + { + "epoch": 5.8108108108108105, + "grad_norm": 1.0293292710364181, + "learning_rate": 1.242735094493952e-07, + "loss": 0.1105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11027572304010391, + "step": 2365, + "valid_targets_mean": 936.6, + "valid_targets_min": 709 + }, + { + "epoch": 5.823095823095823, + "grad_norm": 0.9852029775566562, + "learning_rate": 1.0886567257643033e-07, + "loss": 0.1078, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1116725355386734, + "step": 2370, + "valid_targets_mean": 999.9, + "valid_targets_min": 680 + }, + { + "epoch": 5.835380835380835, + "grad_norm": 1.0578402030344896, + "learning_rate": 9.447463992448891e-08, + "loss": 0.1073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10911673307418823, + "step": 2375, + "valid_targets_mean": 1026.6, + "valid_targets_min": 728 + }, + { + "epoch": 5.847665847665848, + "grad_norm": 1.0386635253743162, + "learning_rate": 8.110114714104277e-08, + "loss": 0.1061, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10054969787597656, + "step": 2380, + "valid_targets_mean": 959.7, + "valid_targets_min": 763 + }, + { + "epoch": 5.85995085995086, + "grad_norm": 1.0529356284674416, + "learning_rate": 6.874587785849152e-08, + "loss": 0.1027, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10330841690301895, + "step": 2385, + "valid_targets_mean": 943.1, + "valid_targets_min": 563 + }, + { + "epoch": 5.872235872235873, + "grad_norm": 1.0438404646255461, + "learning_rate": 5.7409463659219286e-08, + "loss": 0.1087, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10719597339630127, + "step": 2390, + "valid_targets_mean": 958.3, + "valid_targets_min": 639 + }, + { + "epoch": 5.884520884520884, + "grad_norm": 1.0624307976403193, + "learning_rate": 4.709248404329625e-08, + "loss": 0.1069, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10844609141349792, + "step": 2395, + "valid_targets_mean": 888.2, + "valid_targets_min": 620 + }, + { + "epoch": 5.896805896805897, + "grad_norm": 1.0904697662506537, + "learning_rate": 3.7795466398868885e-08, + "loss": 0.1077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10921336710453033, + "step": 2400, + "valid_targets_mean": 914.4, + "valid_targets_min": 633 + }, + { + "epoch": 5.909090909090909, + "grad_norm": 1.0358904150466581, + "learning_rate": 2.9518885975192702e-08, + "loss": 0.1084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10594348609447479, + "step": 2405, + "valid_targets_mean": 927.4, + "valid_targets_min": 655 + }, + { + "epoch": 5.921375921375922, + "grad_norm": 1.1266522336395215, + "learning_rate": 2.226316585833832e-08, + "loss": 0.1112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1268506497144699, + "step": 2410, + "valid_targets_mean": 949.9, + "valid_targets_min": 538 + }, + { + "epoch": 5.933660933660933, + "grad_norm": 1.1007783479652495, + "learning_rate": 1.6028676949570997e-08, + "loss": 0.1126, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10957098007202148, + "step": 2415, + "valid_targets_mean": 867.8, + "valid_targets_min": 697 + }, + { + "epoch": 5.945945945945946, + "grad_norm": 1.0215671203230696, + "learning_rate": 1.0815737946383575e-08, + "loss": 0.1053, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10414718091487885, + "step": 2420, + "valid_targets_mean": 981.7, + "valid_targets_min": 666 + }, + { + "epoch": 5.958230958230958, + "grad_norm": 1.0154249846900063, + "learning_rate": 6.624615326207284e-09, + "loss": 0.1083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10006681829690933, + "step": 2425, + "valid_targets_mean": 848.8, + "valid_targets_min": 575 + }, + { + "epoch": 5.9705159705159705, + "grad_norm": 1.0008218944709328, + "learning_rate": 3.4555233327893124e-09, + "loss": 0.1123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09888888895511627, + "step": 2430, + "valid_targets_mean": 907.8, + "valid_targets_min": 720 + }, + { + "epoch": 5.982800982800983, + "grad_norm": 1.0056204830892612, + "learning_rate": 1.3086239652415621e-09, + "loss": 0.1021, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09257914125919342, + "step": 2435, + "valid_targets_mean": 986.1, + "valid_targets_min": 715 + }, + { + "epoch": 5.995085995085995, + "grad_norm": 1.1057817949511841, + "learning_rate": 1.840269697628294e-10, + "loss": 0.1083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11148717999458313, + "step": 2440, + "valid_targets_mean": 902.6, + "valid_targets_min": 663 + }, + { + "epoch": 6.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10436549037694931, + "step": 2442, + "total_flos": 197844182237184.0, + "train_loss": 0.18043228318757643, + "train_runtime": 7422.9369, + "train_samples_per_second": 5.256, + "train_steps_per_second": 0.329, + "valid_targets_mean": 1001.2, + "valid_targets_min": 670 + } + ], + "logging_steps": 5, + "max_steps": 2442, + "num_input_tokens_seen": 0, + "num_train_epochs": 6, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 197844182237184.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}