{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 6.0, "eval_steps": 500, "global_step": 2442, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.012285012285012284, "grad_norm": 20.458864687141354, "learning_rate": 6.530612244897961e-07, "loss": 1.0261, "loss_nan_ranks": 0, "loss_rank_avg": 1.017302393913269, "step": 5, "valid_targets_mean": 1250.2, "valid_targets_min": 648 }, { "epoch": 0.02457002457002457, "grad_norm": 16.62050892535432, "learning_rate": 1.469387755102041e-06, "loss": 0.9884, "loss_nan_ranks": 0, "loss_rank_avg": 0.9597591161727905, "step": 10, "valid_targets_mean": 1490.1, "valid_targets_min": 971 }, { "epoch": 0.036855036855036855, "grad_norm": 16.77513160699125, "learning_rate": 2.285714285714286e-06, "loss": 0.9736, "loss_nan_ranks": 0, "loss_rank_avg": 0.9614359140396118, "step": 15, "valid_targets_mean": 1209.8, "valid_targets_min": 739 }, { "epoch": 0.04914004914004914, "grad_norm": 10.374535357780216, "learning_rate": 3.1020408163265307e-06, "loss": 0.864, "loss_nan_ranks": 0, "loss_rank_avg": 0.8312211036682129, "step": 20, "valid_targets_mean": 1436.6, "valid_targets_min": 853 }, { "epoch": 0.06142506142506143, "grad_norm": 6.786503884512912, "learning_rate": 3.9183673469387755e-06, "loss": 0.7762, "loss_nan_ranks": 0, "loss_rank_avg": 0.7279175519943237, "step": 25, "valid_targets_mean": 1216.4, "valid_targets_min": 699 }, { "epoch": 0.07371007371007371, "grad_norm": 4.583415663665711, "learning_rate": 4.734693877551021e-06, "loss": 0.677, "loss_nan_ranks": 0, "loss_rank_avg": 0.6565301418304443, "step": 30, "valid_targets_mean": 1385.0, "valid_targets_min": 568 }, { "epoch": 0.085995085995086, "grad_norm": 2.731049507027889, "learning_rate": 5.551020408163266e-06, "loss": 0.612, "loss_nan_ranks": 0, "loss_rank_avg": 0.5778182148933411, "step": 35, "valid_targets_mean": 1544.8, "valid_targets_min": 654 }, { "epoch": 0.09828009828009827, "grad_norm": 1.9598661561285227, "learning_rate": 6.36734693877551e-06, "loss": 0.5422, "loss_nan_ranks": 0, "loss_rank_avg": 0.5209269523620605, "step": 40, "valid_targets_mean": 1367.7, "valid_targets_min": 574 }, { "epoch": 0.11056511056511056, "grad_norm": 1.638653833946308, "learning_rate": 7.183673469387755e-06, "loss": 0.4922, "loss_nan_ranks": 0, "loss_rank_avg": 0.47820889949798584, "step": 45, "valid_targets_mean": 1329.5, "valid_targets_min": 531 }, { "epoch": 0.12285012285012285, "grad_norm": 1.6480129713282212, "learning_rate": 8.000000000000001e-06, "loss": 0.4663, "loss_nan_ranks": 0, "loss_rank_avg": 0.4506987929344177, "step": 50, "valid_targets_mean": 1316.3, "valid_targets_min": 783 }, { "epoch": 0.13513513513513514, "grad_norm": 1.3915776930314507, "learning_rate": 8.816326530612247e-06, "loss": 0.421, "loss_nan_ranks": 0, "loss_rank_avg": 0.41274315118789673, "step": 55, "valid_targets_mean": 1350.4, "valid_targets_min": 794 }, { "epoch": 0.14742014742014742, "grad_norm": 1.8150172535705966, "learning_rate": 9.63265306122449e-06, "loss": 0.4052, "loss_nan_ranks": 0, "loss_rank_avg": 0.37979835271835327, "step": 60, "valid_targets_mean": 1348.1, "valid_targets_min": 693 }, { "epoch": 0.1597051597051597, "grad_norm": 1.2911082102402043, "learning_rate": 1.0448979591836737e-05, "loss": 0.3751, "loss_nan_ranks": 0, "loss_rank_avg": 0.36601579189300537, "step": 65, "valid_targets_mean": 1233.3, "valid_targets_min": 624 }, { "epoch": 0.171990171990172, "grad_norm": 1.1066917837756176, "learning_rate": 1.126530612244898e-05, "loss": 0.3725, "loss_nan_ranks": 0, "loss_rank_avg": 0.3778034746646881, "step": 70, "valid_targets_mean": 1599.6, "valid_targets_min": 768 }, { "epoch": 0.18427518427518427, "grad_norm": 1.2330283871793921, "learning_rate": 1.2081632653061225e-05, "loss": 0.3497, "loss_nan_ranks": 0, "loss_rank_avg": 0.33981752395629883, "step": 75, "valid_targets_mean": 1175.1, "valid_targets_min": 661 }, { "epoch": 0.19656019656019655, "grad_norm": 1.1955197787044425, "learning_rate": 1.2897959183673469e-05, "loss": 0.3275, "loss_nan_ranks": 0, "loss_rank_avg": 0.3365780711174011, "step": 80, "valid_targets_mean": 1151.1, "valid_targets_min": 712 }, { "epoch": 0.20884520884520885, "grad_norm": 1.1040514955581373, "learning_rate": 1.3714285714285716e-05, "loss": 0.3205, "loss_nan_ranks": 0, "loss_rank_avg": 0.31247782707214355, "step": 85, "valid_targets_mean": 1273.6, "valid_targets_min": 590 }, { "epoch": 0.22113022113022113, "grad_norm": 1.1067102481311106, "learning_rate": 1.4530612244897961e-05, "loss": 0.3052, "loss_nan_ranks": 0, "loss_rank_avg": 0.3124191164970398, "step": 90, "valid_targets_mean": 1315.8, "valid_targets_min": 649 }, { "epoch": 0.2334152334152334, "grad_norm": 1.1174617597851082, "learning_rate": 1.5346938775510204e-05, "loss": 0.3146, "loss_nan_ranks": 0, "loss_rank_avg": 0.31378817558288574, "step": 95, "valid_targets_mean": 1209.7, "valid_targets_min": 631 }, { "epoch": 0.2457002457002457, "grad_norm": 1.0357427151455243, "learning_rate": 1.616326530612245e-05, "loss": 0.3138, "loss_nan_ranks": 0, "loss_rank_avg": 0.29703712463378906, "step": 100, "valid_targets_mean": 1322.4, "valid_targets_min": 940 }, { "epoch": 0.257985257985258, "grad_norm": 1.1422371590736977, "learning_rate": 1.6979591836734695e-05, "loss": 0.3098, "loss_nan_ranks": 0, "loss_rank_avg": 0.3078041970729828, "step": 105, "valid_targets_mean": 1323.4, "valid_targets_min": 711 }, { "epoch": 0.2702702702702703, "grad_norm": 1.0038192317155883, "learning_rate": 1.779591836734694e-05, "loss": 0.2963, "loss_nan_ranks": 0, "loss_rank_avg": 0.3173219859600067, "step": 110, "valid_targets_mean": 1408.8, "valid_targets_min": 897 }, { "epoch": 0.28255528255528256, "grad_norm": 0.9855120304223134, "learning_rate": 1.8612244897959185e-05, "loss": 0.2866, "loss_nan_ranks": 0, "loss_rank_avg": 0.2685781717300415, "step": 115, "valid_targets_mean": 1392.1, "valid_targets_min": 813 }, { "epoch": 0.29484029484029484, "grad_norm": 1.0672252795112558, "learning_rate": 1.942857142857143e-05, "loss": 0.289, "loss_nan_ranks": 0, "loss_rank_avg": 0.2954533100128174, "step": 120, "valid_targets_mean": 1364.4, "valid_targets_min": 710 }, { "epoch": 0.3071253071253071, "grad_norm": 1.0799002257104326, "learning_rate": 2.0244897959183672e-05, "loss": 0.2895, "loss_nan_ranks": 0, "loss_rank_avg": 0.2752310037612915, "step": 125, "valid_targets_mean": 1465.1, "valid_targets_min": 941 }, { "epoch": 0.3194103194103194, "grad_norm": 1.3135467049033194, "learning_rate": 2.106122448979592e-05, "loss": 0.2925, "loss_nan_ranks": 0, "loss_rank_avg": 0.29970723390579224, "step": 130, "valid_targets_mean": 1072.5, "valid_targets_min": 663 }, { "epoch": 0.3316953316953317, "grad_norm": 1.1052416113035322, "learning_rate": 2.1877551020408166e-05, "loss": 0.2846, "loss_nan_ranks": 0, "loss_rank_avg": 0.3040231466293335, "step": 135, "valid_targets_mean": 1425.1, "valid_targets_min": 727 }, { "epoch": 0.343980343980344, "grad_norm": 1.0034696175995714, "learning_rate": 2.269387755102041e-05, "loss": 0.2975, "loss_nan_ranks": 0, "loss_rank_avg": 0.31153547763824463, "step": 140, "valid_targets_mean": 1287.7, "valid_targets_min": 637 }, { "epoch": 0.35626535626535627, "grad_norm": 1.0317670697456185, "learning_rate": 2.3510204081632656e-05, "loss": 0.275, "loss_nan_ranks": 0, "loss_rank_avg": 0.28750118613243103, "step": 145, "valid_targets_mean": 1450.8, "valid_targets_min": 724 }, { "epoch": 0.36855036855036855, "grad_norm": 1.045825397854041, "learning_rate": 2.4326530612244898e-05, "loss": 0.2869, "loss_nan_ranks": 0, "loss_rank_avg": 0.28550174832344055, "step": 150, "valid_targets_mean": 1355.8, "valid_targets_min": 646 }, { "epoch": 0.3808353808353808, "grad_norm": 1.0635125133091923, "learning_rate": 2.5142857142857143e-05, "loss": 0.2729, "loss_nan_ranks": 0, "loss_rank_avg": 0.2671389877796173, "step": 155, "valid_targets_mean": 1190.7, "valid_targets_min": 700 }, { "epoch": 0.3931203931203931, "grad_norm": 1.0793917463505718, "learning_rate": 2.5959183673469392e-05, "loss": 0.2829, "loss_nan_ranks": 0, "loss_rank_avg": 0.2713095545768738, "step": 160, "valid_targets_mean": 1198.3, "valid_targets_min": 800 }, { "epoch": 0.40540540540540543, "grad_norm": 1.2207070496489467, "learning_rate": 2.6775510204081637e-05, "loss": 0.2733, "loss_nan_ranks": 0, "loss_rank_avg": 0.2674906253814697, "step": 165, "valid_targets_mean": 1239.4, "valid_targets_min": 713 }, { "epoch": 0.4176904176904177, "grad_norm": 1.0046068676053646, "learning_rate": 2.7591836734693882e-05, "loss": 0.2754, "loss_nan_ranks": 0, "loss_rank_avg": 0.26432591676712036, "step": 170, "valid_targets_mean": 1440.5, "valid_targets_min": 598 }, { "epoch": 0.42997542997543, "grad_norm": 0.9375984105225126, "learning_rate": 2.8408163265306124e-05, "loss": 0.2772, "loss_nan_ranks": 0, "loss_rank_avg": 0.2688313126564026, "step": 175, "valid_targets_mean": 1560.9, "valid_targets_min": 872 }, { "epoch": 0.44226044226044225, "grad_norm": 1.1180598027008593, "learning_rate": 2.922448979591837e-05, "loss": 0.2782, "loss_nan_ranks": 0, "loss_rank_avg": 0.2788759171962738, "step": 180, "valid_targets_mean": 1178.6, "valid_targets_min": 721 }, { "epoch": 0.45454545454545453, "grad_norm": 1.115636815298234, "learning_rate": 3.0040816326530614e-05, "loss": 0.2688, "loss_nan_ranks": 0, "loss_rank_avg": 0.27034085988998413, "step": 185, "valid_targets_mean": 1167.0, "valid_targets_min": 516 }, { "epoch": 0.4668304668304668, "grad_norm": 1.1455483058873124, "learning_rate": 3.085714285714286e-05, "loss": 0.264, "loss_nan_ranks": 0, "loss_rank_avg": 0.28708407282829285, "step": 190, "valid_targets_mean": 1329.9, "valid_targets_min": 619 }, { "epoch": 0.47911547911547914, "grad_norm": 1.1215868583441444, "learning_rate": 3.1673469387755105e-05, "loss": 0.26, "loss_nan_ranks": 0, "loss_rank_avg": 0.24477200210094452, "step": 195, "valid_targets_mean": 1164.9, "valid_targets_min": 756 }, { "epoch": 0.4914004914004914, "grad_norm": 1.0774130715647405, "learning_rate": 3.2489795918367346e-05, "loss": 0.252, "loss_nan_ranks": 0, "loss_rank_avg": 0.22574767470359802, "step": 200, "valid_targets_mean": 1299.4, "valid_targets_min": 618 }, { "epoch": 0.5036855036855037, "grad_norm": 1.4181221032778561, "learning_rate": 3.3306122448979595e-05, "loss": 0.2661, "loss_nan_ranks": 0, "loss_rank_avg": 0.26352864503860474, "step": 205, "valid_targets_mean": 1260.2, "valid_targets_min": 874 }, { "epoch": 0.515970515970516, "grad_norm": 1.147550012481648, "learning_rate": 3.4122448979591843e-05, "loss": 0.261, "loss_nan_ranks": 0, "loss_rank_avg": 0.261576384305954, "step": 210, "valid_targets_mean": 1420.3, "valid_targets_min": 861 }, { "epoch": 0.5282555282555282, "grad_norm": 0.9387739790313563, "learning_rate": 3.4938775510204085e-05, "loss": 0.2496, "loss_nan_ranks": 0, "loss_rank_avg": 0.2405305653810501, "step": 215, "valid_targets_mean": 1357.9, "valid_targets_min": 1000 }, { "epoch": 0.5405405405405406, "grad_norm": 1.0028258368925376, "learning_rate": 3.575510204081633e-05, "loss": 0.2573, "loss_nan_ranks": 0, "loss_rank_avg": 0.25683438777923584, "step": 220, "valid_targets_mean": 1370.2, "valid_targets_min": 783 }, { "epoch": 0.5528255528255528, "grad_norm": 1.0087280455087901, "learning_rate": 3.6571428571428576e-05, "loss": 0.2552, "loss_nan_ranks": 0, "loss_rank_avg": 0.2451547086238861, "step": 225, "valid_targets_mean": 1205.2, "valid_targets_min": 696 }, { "epoch": 0.5651105651105651, "grad_norm": 1.0332327473283356, "learning_rate": 3.738775510204082e-05, "loss": 0.2577, "loss_nan_ranks": 0, "loss_rank_avg": 0.2729160785675049, "step": 230, "valid_targets_mean": 1312.5, "valid_targets_min": 809 }, { "epoch": 0.5773955773955773, "grad_norm": 1.0664198583479307, "learning_rate": 3.8204081632653066e-05, "loss": 0.251, "loss_nan_ranks": 0, "loss_rank_avg": 0.24643297493457794, "step": 235, "valid_targets_mean": 1375.5, "valid_targets_min": 725 }, { "epoch": 0.5896805896805897, "grad_norm": 1.3724435186192199, "learning_rate": 3.902040816326531e-05, "loss": 0.2615, "loss_nan_ranks": 0, "loss_rank_avg": 0.23138344287872314, "step": 240, "valid_targets_mean": 1403.4, "valid_targets_min": 631 }, { "epoch": 0.601965601965602, "grad_norm": 0.9920775530391814, "learning_rate": 3.983673469387755e-05, "loss": 0.262, "loss_nan_ranks": 0, "loss_rank_avg": 0.2616458237171173, "step": 245, "valid_targets_mean": 1307.3, "valid_targets_min": 679 }, { "epoch": 0.6142506142506142, "grad_norm": 1.0722956456758705, "learning_rate": 3.9999672841332876e-05, "loss": 0.2551, "loss_nan_ranks": 0, "loss_rank_avg": 0.23813892900943756, "step": 250, "valid_targets_mean": 1384.2, "valid_targets_min": 643 }, { "epoch": 0.6265356265356266, "grad_norm": 1.187546618081259, "learning_rate": 3.999834377759164e-05, "loss": 0.2605, "loss_nan_ranks": 0, "loss_rank_avg": 0.25846004486083984, "step": 255, "valid_targets_mean": 1287.4, "valid_targets_min": 582 }, { "epoch": 0.6388206388206388, "grad_norm": 1.0313710783686258, "learning_rate": 3.999599242924703e-05, "loss": 0.2569, "loss_nan_ranks": 0, "loss_rank_avg": 0.25932109355926514, "step": 260, "valid_targets_mean": 1212.7, "valid_targets_min": 577 }, { "epoch": 0.6511056511056511, "grad_norm": 0.9509239983764558, "learning_rate": 3.999261891649637e-05, "loss": 0.2569, "loss_nan_ranks": 0, "loss_rank_avg": 0.23473910987377167, "step": 265, "valid_targets_mean": 1461.9, "valid_targets_min": 979 }, { "epoch": 0.6633906633906634, "grad_norm": 0.9843909928851137, "learning_rate": 3.9988223411788436e-05, "loss": 0.2612, "loss_nan_ranks": 0, "loss_rank_avg": 0.2610856294631958, "step": 270, "valid_targets_mean": 1577.8, "valid_targets_min": 753 }, { "epoch": 0.6756756756756757, "grad_norm": 2.3257950263930125, "learning_rate": 3.998280613981468e-05, "loss": 0.2481, "loss_nan_ranks": 0, "loss_rank_avg": 0.24677515029907227, "step": 275, "valid_targets_mean": 1449.1, "valid_targets_min": 819 }, { "epoch": 0.687960687960688, "grad_norm": 1.0120069901357338, "learning_rate": 3.9976367377497725e-05, "loss": 0.252, "loss_nan_ranks": 0, "loss_rank_avg": 0.243765190243721, "step": 280, "valid_targets_mean": 1120.2, "valid_targets_min": 631 }, { "epoch": 0.7002457002457002, "grad_norm": 1.0056916875862807, "learning_rate": 3.99689074539772e-05, "loss": 0.2498, "loss_nan_ranks": 0, "loss_rank_avg": 0.24412012100219727, "step": 285, "valid_targets_mean": 1168.4, "valid_targets_min": 599 }, { "epoch": 0.7125307125307125, "grad_norm": 0.9928969394751843, "learning_rate": 3.9960426750592936e-05, "loss": 0.2447, "loss_nan_ranks": 0, "loss_rank_avg": 0.25321725010871887, "step": 290, "valid_targets_mean": 1205.3, "valid_targets_min": 833 }, { "epoch": 0.7248157248157249, "grad_norm": 1.0035539521741665, "learning_rate": 3.995092570086546e-05, "loss": 0.2502, "loss_nan_ranks": 0, "loss_rank_avg": 0.23981334269046783, "step": 295, "valid_targets_mean": 1309.0, "valid_targets_min": 681 }, { "epoch": 0.7371007371007371, "grad_norm": 0.9217446118721824, "learning_rate": 3.9940404790473825e-05, "loss": 0.2461, "loss_nan_ranks": 0, "loss_rank_avg": 0.22987031936645508, "step": 300, "valid_targets_mean": 1249.8, "valid_targets_min": 656 }, { "epoch": 0.7493857493857494, "grad_norm": 0.9911047619338179, "learning_rate": 3.992886455723082e-05, "loss": 0.2502, "loss_nan_ranks": 0, "loss_rank_avg": 0.25888746976852417, "step": 305, "valid_targets_mean": 1174.4, "valid_targets_min": 546 }, { "epoch": 0.7616707616707616, "grad_norm": 0.8658673852164612, "learning_rate": 3.991630559105541e-05, "loss": 0.236, "loss_nan_ranks": 0, "loss_rank_avg": 0.22796308994293213, "step": 310, "valid_targets_mean": 1366.1, "valid_targets_min": 828 }, { "epoch": 0.773955773955774, "grad_norm": 1.0842055642509907, "learning_rate": 3.990272853394268e-05, "loss": 0.2446, "loss_nan_ranks": 0, "loss_rank_avg": 0.23061205446720123, "step": 315, "valid_targets_mean": 1168.6, "valid_targets_min": 692 }, { "epoch": 0.7862407862407862, "grad_norm": 0.9014834838441966, "learning_rate": 3.988813407993089e-05, "loss": 0.2254, "loss_nan_ranks": 0, "loss_rank_avg": 0.23373663425445557, "step": 320, "valid_targets_mean": 1399.3, "valid_targets_min": 848 }, { "epoch": 0.7985257985257985, "grad_norm": 0.9338172197869813, "learning_rate": 3.987252297506613e-05, "loss": 0.2405, "loss_nan_ranks": 0, "loss_rank_avg": 0.2474566400051117, "step": 325, "valid_targets_mean": 1277.9, "valid_targets_min": 659 }, { "epoch": 0.8108108108108109, "grad_norm": 0.913549961658726, "learning_rate": 3.9855896017364075e-05, "loss": 0.2373, "loss_nan_ranks": 0, "loss_rank_avg": 0.24477137625217438, "step": 330, "valid_targets_mean": 1268.5, "valid_targets_min": 559 }, { "epoch": 0.8230958230958231, "grad_norm": 0.9127581901150593, "learning_rate": 3.983825405676927e-05, "loss": 0.2442, "loss_nan_ranks": 0, "loss_rank_avg": 0.2348695546388626, "step": 335, "valid_targets_mean": 1384.6, "valid_targets_min": 898 }, { "epoch": 0.8353808353808354, "grad_norm": 0.886670563471073, "learning_rate": 3.981959799511161e-05, "loss": 0.242, "loss_nan_ranks": 0, "loss_rank_avg": 0.23363958299160004, "step": 340, "valid_targets_mean": 1429.6, "valid_targets_min": 613 }, { "epoch": 0.8476658476658476, "grad_norm": 1.101260931849648, "learning_rate": 3.979992878606032e-05, "loss": 0.2486, "loss_nan_ranks": 0, "loss_rank_avg": 0.23046278953552246, "step": 345, "valid_targets_mean": 1162.9, "valid_targets_min": 683 }, { "epoch": 0.85995085995086, "grad_norm": 0.9340190900844615, "learning_rate": 3.977924743507513e-05, "loss": 0.2479, "loss_nan_ranks": 0, "loss_rank_avg": 0.23310105502605438, "step": 350, "valid_targets_mean": 1256.2, "valid_targets_min": 592 }, { "epoch": 0.8722358722358723, "grad_norm": 0.941031348253831, "learning_rate": 3.975755499935492e-05, "loss": 0.2552, "loss_nan_ranks": 0, "loss_rank_avg": 0.27752816677093506, "step": 355, "valid_targets_mean": 1220.6, "valid_targets_min": 597 }, { "epoch": 0.8845208845208845, "grad_norm": 0.7902767470010282, "learning_rate": 3.973485258778368e-05, "loss": 0.2316, "loss_nan_ranks": 0, "loss_rank_avg": 0.230636328458786, "step": 360, "valid_targets_mean": 1582.8, "valid_targets_min": 696 }, { "epoch": 0.8968058968058968, "grad_norm": 0.9584038623179534, "learning_rate": 3.971114136087379e-05, "loss": 0.2409, "loss_nan_ranks": 0, "loss_rank_avg": 0.24100783467292786, "step": 365, "valid_targets_mean": 1402.8, "valid_targets_min": 834 }, { "epoch": 0.9090909090909091, "grad_norm": 0.9401862753457677, "learning_rate": 3.968642253070675e-05, "loss": 0.2353, "loss_nan_ranks": 0, "loss_rank_avg": 0.23034992814064026, "step": 370, "valid_targets_mean": 1309.1, "valid_targets_min": 601 }, { "epoch": 0.9213759213759214, "grad_norm": 0.926409081388446, "learning_rate": 3.966069736087116e-05, "loss": 0.2481, "loss_nan_ranks": 0, "loss_rank_avg": 0.24051252007484436, "step": 375, "valid_targets_mean": 1465.2, "valid_targets_min": 569 }, { "epoch": 0.9336609336609336, "grad_norm": 0.9392772230831478, "learning_rate": 3.963396716639818e-05, "loss": 0.2361, "loss_nan_ranks": 0, "loss_rank_avg": 0.2311394214630127, "step": 380, "valid_targets_mean": 1381.5, "valid_targets_min": 855 }, { "epoch": 0.9459459459459459, "grad_norm": 1.1295683321200982, "learning_rate": 3.960623331369427e-05, "loss": 0.2355, "loss_nan_ranks": 0, "loss_rank_avg": 0.20992863178253174, "step": 385, "valid_targets_mean": 1067.1, "valid_targets_min": 833 }, { "epoch": 0.9582309582309583, "grad_norm": 0.8472505054920229, "learning_rate": 3.957749722047138e-05, "loss": 0.231, "loss_nan_ranks": 0, "loss_rank_avg": 0.2383286952972412, "step": 390, "valid_targets_mean": 1291.5, "valid_targets_min": 639 }, { "epoch": 0.9705159705159705, "grad_norm": 0.8747395591274454, "learning_rate": 3.9547760355674405e-05, "loss": 0.2226, "loss_nan_ranks": 0, "loss_rank_avg": 0.22372183203697205, "step": 395, "valid_targets_mean": 1429.6, "valid_targets_min": 860 }, { "epoch": 0.9828009828009828, "grad_norm": 0.8723755304470501, "learning_rate": 3.951702423940621e-05, "loss": 0.235, "loss_nan_ranks": 0, "loss_rank_avg": 0.2421160787343979, "step": 400, "valid_targets_mean": 1270.6, "valid_targets_min": 668 }, { "epoch": 0.995085995085995, "grad_norm": 1.05363286328019, "learning_rate": 3.948529044284981e-05, "loss": 0.2302, "loss_nan_ranks": 0, "loss_rank_avg": 0.24994471669197083, "step": 405, "valid_targets_mean": 1449.1, "valid_targets_min": 605 }, { "epoch": 1.0073710073710074, "grad_norm": 0.8920261500246442, "learning_rate": 3.9452560588188135e-05, "loss": 0.2446, "loss_nan_ranks": 0, "loss_rank_avg": 0.2395254522562027, "step": 410, "valid_targets_mean": 1442.7, "valid_targets_min": 625 }, { "epoch": 1.0196560196560196, "grad_norm": 0.8788201882105066, "learning_rate": 3.9418836348521045e-05, "loss": 0.2311, "loss_nan_ranks": 0, "loss_rank_avg": 0.22359757125377655, "step": 415, "valid_targets_mean": 1561.7, "valid_targets_min": 854 }, { "epoch": 1.031941031941032, "grad_norm": 0.8963576975802414, "learning_rate": 3.9384119447779854e-05, "loss": 0.2234, "loss_nan_ranks": 0, "loss_rank_avg": 0.22188884019851685, "step": 420, "valid_targets_mean": 1369.3, "valid_targets_min": 679 }, { "epoch": 1.0442260442260443, "grad_norm": 0.9312090201385392, "learning_rate": 3.934841166063919e-05, "loss": 0.2195, "loss_nan_ranks": 0, "loss_rank_avg": 0.21513287723064423, "step": 425, "valid_targets_mean": 1252.8, "valid_targets_min": 555 }, { "epoch": 1.0565110565110565, "grad_norm": 0.9090544401101063, "learning_rate": 3.931171481242625e-05, "loss": 0.2099, "loss_nan_ranks": 0, "loss_rank_avg": 0.20362290740013123, "step": 430, "valid_targets_mean": 1281.4, "valid_targets_min": 685 }, { "epoch": 1.0687960687960687, "grad_norm": 0.8960022842390729, "learning_rate": 3.927403077902753e-05, "loss": 0.2134, "loss_nan_ranks": 0, "loss_rank_avg": 0.22219829261302948, "step": 435, "valid_targets_mean": 1247.5, "valid_targets_min": 696 }, { "epoch": 1.0810810810810811, "grad_norm": 1.096520420802368, "learning_rate": 3.9235361486792905e-05, "loss": 0.2235, "loss_nan_ranks": 0, "loss_rank_avg": 0.24039161205291748, "step": 440, "valid_targets_mean": 1353.1, "valid_targets_min": 859 }, { "epoch": 1.0933660933660934, "grad_norm": 0.8339795162374838, "learning_rate": 3.9195708912437176e-05, "loss": 0.2176, "loss_nan_ranks": 0, "loss_rank_avg": 0.1995536983013153, "step": 445, "valid_targets_mean": 1291.4, "valid_targets_min": 832 }, { "epoch": 1.1056511056511056, "grad_norm": 0.8344607923251784, "learning_rate": 3.915507508293901e-05, "loss": 0.2135, "loss_nan_ranks": 0, "loss_rank_avg": 0.2221432775259018, "step": 450, "valid_targets_mean": 1466.1, "valid_targets_min": 928 }, { "epoch": 1.117936117936118, "grad_norm": 0.8563952361791388, "learning_rate": 3.911346207543734e-05, "loss": 0.2097, "loss_nan_ranks": 0, "loss_rank_avg": 0.2145756334066391, "step": 455, "valid_targets_mean": 1386.1, "valid_targets_min": 676 }, { "epoch": 1.1302211302211302, "grad_norm": 1.0053828040779496, "learning_rate": 3.907087201712515e-05, "loss": 0.2339, "loss_nan_ranks": 0, "loss_rank_avg": 0.23731562495231628, "step": 460, "valid_targets_mean": 1351.2, "valid_targets_min": 516 }, { "epoch": 1.1425061425061425, "grad_norm": 0.9200687870617267, "learning_rate": 3.902730708514078e-05, "loss": 0.2079, "loss_nan_ranks": 0, "loss_rank_avg": 0.19236290454864502, "step": 465, "valid_targets_mean": 1237.4, "valid_targets_min": 707 }, { "epoch": 1.154791154791155, "grad_norm": 1.956829606116063, "learning_rate": 3.8982769506456616e-05, "loss": 0.2138, "loss_nan_ranks": 0, "loss_rank_avg": 0.2312776744365692, "step": 470, "valid_targets_mean": 1205.5, "valid_targets_min": 633 }, { "epoch": 1.1670761670761671, "grad_norm": 0.8376975426348914, "learning_rate": 3.893726155776524e-05, "loss": 0.2174, "loss_nan_ranks": 0, "loss_rank_avg": 0.20141255855560303, "step": 475, "valid_targets_mean": 1293.6, "valid_targets_min": 552 }, { "epoch": 1.1793611793611793, "grad_norm": 0.9106721894053932, "learning_rate": 3.8890785565363046e-05, "loss": 0.2105, "loss_nan_ranks": 0, "loss_rank_avg": 0.21375897526741028, "step": 480, "valid_targets_mean": 1276.3, "valid_targets_min": 772 }, { "epoch": 1.1916461916461916, "grad_norm": 0.9575923245260296, "learning_rate": 3.884334390503136e-05, "loss": 0.219, "loss_nan_ranks": 0, "loss_rank_avg": 0.21761855483055115, "step": 485, "valid_targets_mean": 1113.1, "valid_targets_min": 623 }, { "epoch": 1.203931203931204, "grad_norm": 0.9627502770339411, "learning_rate": 3.8794939001914955e-05, "loss": 0.2135, "loss_nan_ranks": 0, "loss_rank_avg": 0.22376301884651184, "step": 490, "valid_targets_mean": 1444.7, "valid_targets_min": 721 }, { "epoch": 1.2162162162162162, "grad_norm": 0.9284874253755052, "learning_rate": 3.87455733303981e-05, "loss": 0.2181, "loss_nan_ranks": 0, "loss_rank_avg": 0.22301726043224335, "step": 495, "valid_targets_mean": 1192.3, "valid_targets_min": 559 }, { "epoch": 1.2285012285012284, "grad_norm": 0.9095646070613896, "learning_rate": 3.869524941397805e-05, "loss": 0.2138, "loss_nan_ranks": 0, "loss_rank_avg": 0.2163151502609253, "step": 500, "valid_targets_mean": 1267.6, "valid_targets_min": 702 }, { "epoch": 1.2407862407862407, "grad_norm": 2.108591173486237, "learning_rate": 3.8643969825136095e-05, "loss": 0.2161, "loss_nan_ranks": 0, "loss_rank_avg": 0.22375309467315674, "step": 505, "valid_targets_mean": 1189.6, "valid_targets_min": 599 }, { "epoch": 1.253071253071253, "grad_norm": 0.8526514308763375, "learning_rate": 3.8591737185206024e-05, "loss": 0.2155, "loss_nan_ranks": 0, "loss_rank_avg": 0.20474407076835632, "step": 510, "valid_targets_mean": 1360.9, "valid_targets_min": 680 }, { "epoch": 1.2653562653562653, "grad_norm": 0.8781899367047357, "learning_rate": 3.853855416424011e-05, "loss": 0.2153, "loss_nan_ranks": 0, "loss_rank_avg": 0.22370408475399017, "step": 515, "valid_targets_mean": 1262.0, "valid_targets_min": 666 }, { "epoch": 1.2776412776412776, "grad_norm": 0.9403635720815047, "learning_rate": 3.848442348087267e-05, "loss": 0.2134, "loss_nan_ranks": 0, "loss_rank_avg": 0.202105313539505, "step": 520, "valid_targets_mean": 1425.7, "valid_targets_min": 764 }, { "epoch": 1.28992628992629, "grad_norm": 0.8090564776917256, "learning_rate": 3.842934790218106e-05, "loss": 0.2104, "loss_nan_ranks": 0, "loss_rank_avg": 0.21103046834468842, "step": 525, "valid_targets_mean": 1308.6, "valid_targets_min": 856 }, { "epoch": 1.3022113022113022, "grad_norm": 1.0851226060699104, "learning_rate": 3.837333024354422e-05, "loss": 0.215, "loss_nan_ranks": 0, "loss_rank_avg": 0.23397046327590942, "step": 530, "valid_targets_mean": 1284.8, "valid_targets_min": 563 }, { "epoch": 1.3144963144963144, "grad_norm": 0.8157174831500646, "learning_rate": 3.8316373368498794e-05, "loss": 0.1986, "loss_nan_ranks": 0, "loss_rank_avg": 0.21336279809474945, "step": 535, "valid_targets_mean": 1468.4, "valid_targets_min": 604 }, { "epoch": 1.3267813267813269, "grad_norm": 0.8259937885003744, "learning_rate": 3.82584801885927e-05, "loss": 0.2177, "loss_nan_ranks": 0, "loss_rank_avg": 0.21354883909225464, "step": 540, "valid_targets_mean": 1441.9, "valid_targets_min": 695 }, { "epoch": 1.339066339066339, "grad_norm": 0.7719178515354956, "learning_rate": 3.8199653663236336e-05, "loss": 0.2204, "loss_nan_ranks": 0, "loss_rank_avg": 0.22929129004478455, "step": 545, "valid_targets_mean": 1425.4, "valid_targets_min": 957 }, { "epoch": 1.3513513513513513, "grad_norm": 0.9713509399154334, "learning_rate": 3.813989679955128e-05, "loss": 0.2103, "loss_nan_ranks": 0, "loss_rank_avg": 0.20775477588176727, "step": 550, "valid_targets_mean": 1368.8, "valid_targets_min": 826 }, { "epoch": 1.3636363636363638, "grad_norm": 1.5657871968007089, "learning_rate": 3.8079212652216595e-05, "loss": 0.2053, "loss_nan_ranks": 0, "loss_rank_avg": 0.2107270210981369, "step": 555, "valid_targets_mean": 1369.9, "valid_targets_min": 602 }, { "epoch": 1.375921375921376, "grad_norm": 0.7739864999218562, "learning_rate": 3.8017604323312616e-05, "loss": 0.2089, "loss_nan_ranks": 0, "loss_rank_avg": 0.20910859107971191, "step": 560, "valid_targets_mean": 1351.6, "valid_targets_min": 717 }, { "epoch": 1.3882063882063882, "grad_norm": 0.8934410106791703, "learning_rate": 3.795507496216246e-05, "loss": 0.2218, "loss_nan_ranks": 0, "loss_rank_avg": 0.22969551384449005, "step": 565, "valid_targets_mean": 1307.3, "valid_targets_min": 755 }, { "epoch": 1.4004914004914004, "grad_norm": 0.8511838263014904, "learning_rate": 3.789162776517098e-05, "loss": 0.2131, "loss_nan_ranks": 0, "loss_rank_avg": 0.2171616405248642, "step": 570, "valid_targets_mean": 1304.6, "valid_targets_min": 677 }, { "epoch": 1.4127764127764126, "grad_norm": 0.8141913250178237, "learning_rate": 3.78272659756614e-05, "loss": 0.2192, "loss_nan_ranks": 0, "loss_rank_avg": 0.23532284796237946, "step": 575, "valid_targets_mean": 1437.6, "valid_targets_min": 699 }, { "epoch": 1.425061425061425, "grad_norm": 0.8420915956597627, "learning_rate": 3.776199288370948e-05, "loss": 0.2192, "loss_nan_ranks": 0, "loss_rank_avg": 0.23164448142051697, "step": 580, "valid_targets_mean": 1419.3, "valid_targets_min": 851 }, { "epoch": 1.4373464373464373, "grad_norm": 0.8968223148938935, "learning_rate": 3.7695811825975386e-05, "loss": 0.2063, "loss_nan_ranks": 0, "loss_rank_avg": 0.19434265792369843, "step": 585, "valid_targets_mean": 1047.1, "valid_targets_min": 571 }, { "epoch": 1.4496314496314495, "grad_norm": 0.7610632883336738, "learning_rate": 3.76287261855331e-05, "loss": 0.208, "loss_nan_ranks": 0, "loss_rank_avg": 0.20634359121322632, "step": 590, "valid_targets_mean": 1468.5, "valid_targets_min": 906 }, { "epoch": 1.461916461916462, "grad_norm": 0.7589326597800894, "learning_rate": 3.7560739391697465e-05, "loss": 0.2154, "loss_nan_ranks": 0, "loss_rank_avg": 0.18889199197292328, "step": 595, "valid_targets_mean": 1226.5, "valid_targets_min": 600 }, { "epoch": 1.4742014742014742, "grad_norm": 0.8216098043735577, "learning_rate": 3.749185491984891e-05, "loss": 0.2122, "loss_nan_ranks": 0, "loss_rank_avg": 0.20000270009040833, "step": 600, "valid_targets_mean": 1265.7, "valid_targets_min": 772 }, { "epoch": 1.4864864864864864, "grad_norm": 0.8373041646274438, "learning_rate": 3.7422076291255785e-05, "loss": 0.1991, "loss_nan_ranks": 0, "loss_rank_avg": 0.1893223524093628, "step": 605, "valid_targets_mean": 1247.5, "valid_targets_min": 677 }, { "epoch": 1.4987714987714988, "grad_norm": 0.8414739647341087, "learning_rate": 3.7351407072894356e-05, "loss": 0.2101, "loss_nan_ranks": 0, "loss_rank_avg": 0.20384536683559418, "step": 610, "valid_targets_mean": 1286.6, "valid_targets_min": 957 }, { "epoch": 1.511056511056511, "grad_norm": 0.869973506541461, "learning_rate": 3.7279850877266486e-05, "loss": 0.2223, "loss_nan_ranks": 0, "loss_rank_avg": 0.22863122820854187, "step": 615, "valid_targets_mean": 1334.2, "valid_targets_min": 727 }, { "epoch": 1.5233415233415233, "grad_norm": 0.8167658605627112, "learning_rate": 3.720741136221491e-05, "loss": 0.2093, "loss_nan_ranks": 0, "loss_rank_avg": 0.2142532467842102, "step": 620, "valid_targets_mean": 1292.1, "valid_targets_min": 704 }, { "epoch": 1.5356265356265357, "grad_norm": 0.862827915655825, "learning_rate": 3.713409223073636e-05, "loss": 0.2175, "loss_nan_ranks": 0, "loss_rank_avg": 0.21128416061401367, "step": 625, "valid_targets_mean": 1296.1, "valid_targets_min": 501 }, { "epoch": 1.547911547911548, "grad_norm": 0.8137378186782543, "learning_rate": 3.705989723079214e-05, "loss": 0.2188, "loss_nan_ranks": 0, "loss_rank_avg": 0.22958746552467346, "step": 630, "valid_targets_mean": 1526.4, "valid_targets_min": 516 }, { "epoch": 1.5601965601965602, "grad_norm": 0.8249041342154185, "learning_rate": 3.698483015511665e-05, "loss": 0.2115, "loss_nan_ranks": 0, "loss_rank_avg": 0.21446937322616577, "step": 635, "valid_targets_mean": 1382.8, "valid_targets_min": 608 }, { "epoch": 1.5724815724815726, "grad_norm": 0.7761727919338764, "learning_rate": 3.690889484102344e-05, "loss": 0.2082, "loss_nan_ranks": 0, "loss_rank_avg": 0.1898980289697647, "step": 640, "valid_targets_mean": 1198.1, "valid_targets_min": 587 }, { "epoch": 1.5847665847665846, "grad_norm": 0.85132599232256, "learning_rate": 3.683209517020908e-05, "loss": 0.2111, "loss_nan_ranks": 0, "loss_rank_avg": 0.2267095148563385, "step": 645, "valid_targets_mean": 1355.8, "valid_targets_min": 678 }, { "epoch": 1.597051597051597, "grad_norm": 0.8632075917415805, "learning_rate": 3.675443506855473e-05, "loss": 0.2149, "loss_nan_ranks": 0, "loss_rank_avg": 0.2519699037075043, "step": 650, "valid_targets_mean": 1355.8, "valid_targets_min": 749 }, { "epoch": 1.6093366093366095, "grad_norm": 0.8029636768151777, "learning_rate": 3.6675918505925456e-05, "loss": 0.2128, "loss_nan_ranks": 0, "loss_rank_avg": 0.19434180855751038, "step": 655, "valid_targets_mean": 1209.3, "valid_targets_min": 622 }, { "epoch": 1.6216216216216215, "grad_norm": 0.8533947419088043, "learning_rate": 3.6596549495967276e-05, "loss": 0.2128, "loss_nan_ranks": 0, "loss_rank_avg": 0.22300590574741364, "step": 660, "valid_targets_mean": 1284.0, "valid_targets_min": 749 }, { "epoch": 1.633906633906634, "grad_norm": 0.7643303692582221, "learning_rate": 3.651633209590202e-05, "loss": 0.213, "loss_nan_ranks": 0, "loss_rank_avg": 0.18270307779312134, "step": 665, "valid_targets_mean": 1283.6, "valid_targets_min": 532 }, { "epoch": 1.6461916461916462, "grad_norm": 0.7807796150123283, "learning_rate": 3.6435270406319914e-05, "loss": 0.2281, "loss_nan_ranks": 0, "loss_rank_avg": 0.21742983162403107, "step": 670, "valid_targets_mean": 1435.8, "valid_targets_min": 691 }, { "epoch": 1.6584766584766584, "grad_norm": 0.8076168248855204, "learning_rate": 3.635336857096997e-05, "loss": 0.2102, "loss_nan_ranks": 0, "loss_rank_avg": 0.20645713806152344, "step": 675, "valid_targets_mean": 1312.5, "valid_targets_min": 863 }, { "epoch": 1.6707616707616708, "grad_norm": 0.7686324259536815, "learning_rate": 3.627063077654815e-05, "loss": 0.2117, "loss_nan_ranks": 0, "loss_rank_avg": 0.2024710476398468, "step": 680, "valid_targets_mean": 1363.4, "valid_targets_min": 538 }, { "epoch": 1.683046683046683, "grad_norm": 0.8120597616491396, "learning_rate": 3.618706125248337e-05, "loss": 0.2057, "loss_nan_ranks": 0, "loss_rank_avg": 0.20566058158874512, "step": 685, "valid_targets_mean": 1409.5, "valid_targets_min": 747 }, { "epoch": 1.6953316953316953, "grad_norm": 0.7975177403301514, "learning_rate": 3.6102664270721275e-05, "loss": 0.2171, "loss_nan_ranks": 0, "loss_rank_avg": 0.21366316080093384, "step": 690, "valid_targets_mean": 1345.3, "valid_targets_min": 655 }, { "epoch": 1.7076167076167077, "grad_norm": 0.693128955960592, "learning_rate": 3.601744414550589e-05, "loss": 0.2072, "loss_nan_ranks": 0, "loss_rank_avg": 0.203857883810997, "step": 695, "valid_targets_mean": 1457.3, "valid_targets_min": 563 }, { "epoch": 1.71990171990172, "grad_norm": 0.8655978074299355, "learning_rate": 3.593140523315906e-05, "loss": 0.2153, "loss_nan_ranks": 0, "loss_rank_avg": 0.21074074506759644, "step": 700, "valid_targets_mean": 1263.1, "valid_targets_min": 618 }, { "epoch": 1.7321867321867321, "grad_norm": 0.8174739337432844, "learning_rate": 3.584455193185778e-05, "loss": 0.2155, "loss_nan_ranks": 0, "loss_rank_avg": 0.2070631980895996, "step": 705, "valid_targets_mean": 1293.8, "valid_targets_min": 641 }, { "epoch": 1.7444717444717446, "grad_norm": 0.821190279821601, "learning_rate": 3.575688868140933e-05, "loss": 0.22, "loss_nan_ranks": 0, "loss_rank_avg": 0.20903602242469788, "step": 710, "valid_targets_mean": 1464.4, "valid_targets_min": 796 }, { "epoch": 1.7567567567567568, "grad_norm": 0.7124088474881533, "learning_rate": 3.566841996302438e-05, "loss": 0.2083, "loss_nan_ranks": 0, "loss_rank_avg": 0.2009987235069275, "step": 715, "valid_targets_mean": 1362.8, "valid_targets_min": 639 }, { "epoch": 1.769041769041769, "grad_norm": 0.7766872594933402, "learning_rate": 3.557915029908787e-05, "loss": 0.2159, "loss_nan_ranks": 0, "loss_rank_avg": 0.22496528923511505, "step": 720, "valid_targets_mean": 1335.9, "valid_targets_min": 775 }, { "epoch": 1.7813267813267815, "grad_norm": 0.778311753577005, "learning_rate": 3.548908425292784e-05, "loss": 0.2102, "loss_nan_ranks": 0, "loss_rank_avg": 0.20054762065410614, "step": 725, "valid_targets_mean": 1222.9, "valid_targets_min": 765 }, { "epoch": 1.7936117936117935, "grad_norm": 0.7697237115798328, "learning_rate": 3.5398226428582165e-05, "loss": 0.2051, "loss_nan_ranks": 0, "loss_rank_avg": 0.18973074853420258, "step": 730, "valid_targets_mean": 1225.9, "valid_targets_min": 576 }, { "epoch": 1.805896805896806, "grad_norm": 0.7435081743751965, "learning_rate": 3.530658147056321e-05, "loss": 0.2074, "loss_nan_ranks": 0, "loss_rank_avg": 0.21077962219715118, "step": 735, "valid_targets_mean": 1544.0, "valid_targets_min": 748 }, { "epoch": 1.8181818181818183, "grad_norm": 0.8072606835461681, "learning_rate": 3.521415406362041e-05, "loss": 0.2111, "loss_nan_ranks": 0, "loss_rank_avg": 0.22379997372627258, "step": 740, "valid_targets_mean": 1295.1, "valid_targets_min": 695 }, { "epoch": 1.8304668304668303, "grad_norm": 0.8303839843270181, "learning_rate": 3.512094893250076e-05, "loss": 0.2112, "loss_nan_ranks": 0, "loss_rank_avg": 0.19904112815856934, "step": 745, "valid_targets_mean": 1146.8, "valid_targets_min": 592 }, { "epoch": 1.8427518427518428, "grad_norm": 0.806403930167987, "learning_rate": 3.5026970841707366e-05, "loss": 0.2095, "loss_nan_ranks": 0, "loss_rank_avg": 0.22263360023498535, "step": 750, "valid_targets_mean": 1444.1, "valid_targets_min": 936 }, { "epoch": 1.855036855036855, "grad_norm": 0.832539093115592, "learning_rate": 3.493222459525579e-05, "loss": 0.1997, "loss_nan_ranks": 0, "loss_rank_avg": 0.2039949595928192, "step": 755, "valid_targets_mean": 1232.6, "valid_targets_min": 750 }, { "epoch": 1.8673218673218672, "grad_norm": 0.7634964231003126, "learning_rate": 3.483671503642858e-05, "loss": 0.2061, "loss_nan_ranks": 0, "loss_rank_avg": 0.20757383108139038, "step": 760, "valid_targets_mean": 1327.8, "valid_targets_min": 681 }, { "epoch": 1.8796068796068797, "grad_norm": 0.734644093470408, "learning_rate": 3.474044704752761e-05, "loss": 0.2108, "loss_nan_ranks": 0, "loss_rank_avg": 0.20907098054885864, "step": 765, "valid_targets_mean": 1293.1, "valid_targets_min": 868 }, { "epoch": 1.8918918918918919, "grad_norm": 0.7599652457636882, "learning_rate": 3.464342554962454e-05, "loss": 0.1995, "loss_nan_ranks": 0, "loss_rank_avg": 0.20856548845767975, "step": 770, "valid_targets_mean": 1729.1, "valid_targets_min": 1076 }, { "epoch": 1.904176904176904, "grad_norm": 0.7360153071481739, "learning_rate": 3.4545655502309254e-05, "loss": 0.2031, "loss_nan_ranks": 0, "loss_rank_avg": 0.18852345645427704, "step": 775, "valid_targets_mean": 1302.1, "valid_targets_min": 690 }, { "epoch": 1.9164619164619165, "grad_norm": 0.7714929717750981, "learning_rate": 3.444714190343633e-05, "loss": 0.2085, "loss_nan_ranks": 0, "loss_rank_avg": 0.2067737728357315, "step": 780, "valid_targets_mean": 1459.2, "valid_targets_min": 655 }, { "epoch": 1.9287469287469288, "grad_norm": 0.7966394203579434, "learning_rate": 3.434788978886957e-05, "loss": 0.2102, "loss_nan_ranks": 0, "loss_rank_avg": 0.21493887901306152, "step": 785, "valid_targets_mean": 1388.4, "valid_targets_min": 582 }, { "epoch": 1.941031941031941, "grad_norm": 0.7664698984697156, "learning_rate": 3.424790423222455e-05, "loss": 0.1962, "loss_nan_ranks": 0, "loss_rank_avg": 0.2068059891462326, "step": 790, "valid_targets_mean": 1329.3, "valid_targets_min": 578 }, { "epoch": 1.9533169533169534, "grad_norm": 0.7879676439663363, "learning_rate": 3.414719034460928e-05, "loss": 0.2001, "loss_nan_ranks": 0, "loss_rank_avg": 0.2054266482591629, "step": 795, "valid_targets_mean": 1332.4, "valid_targets_min": 854 }, { "epoch": 1.9656019656019657, "grad_norm": 0.7910168582778149, "learning_rate": 3.404575327436294e-05, "loss": 0.2026, "loss_nan_ranks": 0, "loss_rank_avg": 0.19492971897125244, "step": 800, "valid_targets_mean": 1276.2, "valid_targets_min": 620 }, { "epoch": 1.9778869778869779, "grad_norm": 0.8143146289838762, "learning_rate": 3.3943598206792665e-05, "loss": 0.1994, "loss_nan_ranks": 0, "loss_rank_avg": 0.19378644227981567, "step": 805, "valid_targets_mean": 1181.9, "valid_targets_min": 665 }, { "epoch": 1.9901719901719903, "grad_norm": 0.7759024846717739, "learning_rate": 3.384073036390857e-05, "loss": 0.2021, "loss_nan_ranks": 0, "loss_rank_avg": 0.20919588208198547, "step": 810, "valid_targets_mean": 1480.4, "valid_targets_min": 720 }, { "epoch": 2.0024570024570023, "grad_norm": 0.8889852312804324, "learning_rate": 3.373715500415667e-05, "loss": 0.203, "loss_nan_ranks": 0, "loss_rank_avg": 0.17950613796710968, "step": 815, "valid_targets_mean": 1413.7, "valid_targets_min": 758 }, { "epoch": 2.0147420147420148, "grad_norm": 0.8529043749791839, "learning_rate": 3.363287742215023e-05, "loss": 0.1742, "loss_nan_ranks": 0, "loss_rank_avg": 0.1715715229511261, "step": 820, "valid_targets_mean": 1223.9, "valid_targets_min": 806 }, { "epoch": 2.027027027027027, "grad_norm": 0.861295844522495, "learning_rate": 3.352790294839898e-05, "loss": 0.1838, "loss_nan_ranks": 0, "loss_rank_avg": 0.17608895897865295, "step": 825, "valid_targets_mean": 1208.3, "valid_targets_min": 608 }, { "epoch": 2.039312039312039, "grad_norm": 0.8658818114720375, "learning_rate": 3.3422236949036726e-05, "loss": 0.1778, "loss_nan_ranks": 0, "loss_rank_avg": 0.18546447157859802, "step": 830, "valid_targets_mean": 1115.2, "valid_targets_min": 527 }, { "epoch": 2.0515970515970516, "grad_norm": 0.8483551528421704, "learning_rate": 3.331588482554697e-05, "loss": 0.1785, "loss_nan_ranks": 0, "loss_rank_avg": 0.17613749206066132, "step": 835, "valid_targets_mean": 1319.5, "valid_targets_min": 753 }, { "epoch": 2.063882063882064, "grad_norm": 0.7743600766705527, "learning_rate": 3.320885201448684e-05, "loss": 0.182, "loss_nan_ranks": 0, "loss_rank_avg": 0.17434000968933105, "step": 840, "valid_targets_mean": 1287.9, "valid_targets_min": 865 }, { "epoch": 2.076167076167076, "grad_norm": 0.8493923341230093, "learning_rate": 3.310114398720917e-05, "loss": 0.1824, "loss_nan_ranks": 0, "loss_rank_avg": 0.17663569748401642, "step": 845, "valid_targets_mean": 1190.2, "valid_targets_min": 713 }, { "epoch": 2.0884520884520885, "grad_norm": 0.8478553827027294, "learning_rate": 3.299276624958281e-05, "loss": 0.1849, "loss_nan_ranks": 0, "loss_rank_avg": 0.19623138010501862, "step": 850, "valid_targets_mean": 1453.6, "valid_targets_min": 672 }, { "epoch": 2.100737100737101, "grad_norm": 0.8011647896279039, "learning_rate": 3.288372434171116e-05, "loss": 0.1776, "loss_nan_ranks": 0, "loss_rank_avg": 0.17543333768844604, "step": 855, "valid_targets_mean": 1394.6, "valid_targets_min": 742 }, { "epoch": 2.113022113022113, "grad_norm": 0.8904716311684531, "learning_rate": 3.2774023837648986e-05, "loss": 0.1849, "loss_nan_ranks": 0, "loss_rank_avg": 0.20401468873023987, "step": 860, "valid_targets_mean": 1273.4, "valid_targets_min": 591 }, { "epoch": 2.1253071253071254, "grad_norm": 0.7994042104976049, "learning_rate": 3.26636703451175e-05, "loss": 0.1808, "loss_nan_ranks": 0, "loss_rank_avg": 0.16975076496601105, "step": 865, "valid_targets_mean": 1109.5, "valid_targets_min": 536 }, { "epoch": 2.1375921375921374, "grad_norm": 0.815977947868972, "learning_rate": 3.2552669505217646e-05, "loss": 0.183, "loss_nan_ranks": 0, "loss_rank_avg": 0.18750914931297302, "step": 870, "valid_targets_mean": 1309.8, "valid_targets_min": 640 }, { "epoch": 2.14987714987715, "grad_norm": 0.8080552902212362, "learning_rate": 3.24410269921418e-05, "loss": 0.1792, "loss_nan_ranks": 0, "loss_rank_avg": 0.16167230904102325, "step": 875, "valid_targets_mean": 1132.6, "valid_targets_min": 807 }, { "epoch": 2.1621621621621623, "grad_norm": 0.8331786616540037, "learning_rate": 3.232874851288367e-05, "loss": 0.1824, "loss_nan_ranks": 0, "loss_rank_avg": 0.1691512167453766, "step": 880, "valid_targets_mean": 1280.0, "valid_targets_min": 706 }, { "epoch": 2.1744471744471743, "grad_norm": 0.9322586407992784, "learning_rate": 3.221583980694659e-05, "loss": 0.183, "loss_nan_ranks": 0, "loss_rank_avg": 0.1737692803144455, "step": 885, "valid_targets_mean": 1235.9, "valid_targets_min": 742 }, { "epoch": 2.1867321867321867, "grad_norm": 0.7957232692494968, "learning_rate": 3.21023066460501e-05, "loss": 0.1747, "loss_nan_ranks": 0, "loss_rank_avg": 0.18011856079101562, "step": 890, "valid_targets_mean": 1525.5, "valid_targets_min": 826 }, { "epoch": 2.199017199017199, "grad_norm": 0.836674749989076, "learning_rate": 3.198815483383492e-05, "loss": 0.1812, "loss_nan_ranks": 0, "loss_rank_avg": 0.17540715634822845, "step": 895, "valid_targets_mean": 1178.8, "valid_targets_min": 474 }, { "epoch": 2.211302211302211, "grad_norm": 0.8699604703326298, "learning_rate": 3.1873390205566295e-05, "loss": 0.1836, "loss_nan_ranks": 0, "loss_rank_avg": 0.194318026304245, "step": 900, "valid_targets_mean": 1225.6, "valid_targets_min": 647 }, { "epoch": 2.2235872235872236, "grad_norm": 0.7976579724339753, "learning_rate": 3.175801862783565e-05, "loss": 0.1826, "loss_nan_ranks": 0, "loss_rank_avg": 0.19349230825901031, "step": 905, "valid_targets_mean": 1512.1, "valid_targets_min": 762 }, { "epoch": 2.235872235872236, "grad_norm": 0.8510536664782925, "learning_rate": 3.164204599826077e-05, "loss": 0.1807, "loss_nan_ranks": 0, "loss_rank_avg": 0.1855241358280182, "step": 910, "valid_targets_mean": 1290.1, "valid_targets_min": 707 }, { "epoch": 2.248157248157248, "grad_norm": 1.2961464486220478, "learning_rate": 3.1525478245184245e-05, "loss": 0.1785, "loss_nan_ranks": 0, "loss_rank_avg": 0.20424135029315948, "step": 915, "valid_targets_mean": 1385.6, "valid_targets_min": 760 }, { "epoch": 2.2604422604422605, "grad_norm": 0.723340082244006, "learning_rate": 3.140832132737051e-05, "loss": 0.1761, "loss_nan_ranks": 0, "loss_rank_avg": 0.17882639169692993, "step": 920, "valid_targets_mean": 1393.4, "valid_targets_min": 745 }, { "epoch": 2.2727272727272725, "grad_norm": 0.7235527044863573, "learning_rate": 3.129058123370116e-05, "loss": 0.1817, "loss_nan_ranks": 0, "loss_rank_avg": 0.17998242378234863, "step": 925, "valid_targets_mean": 1463.0, "valid_targets_min": 806 }, { "epoch": 2.285012285012285, "grad_norm": 0.8449406721479917, "learning_rate": 3.117226398286887e-05, "loss": 0.1759, "loss_nan_ranks": 0, "loss_rank_avg": 0.17853769659996033, "step": 930, "valid_targets_mean": 1234.6, "valid_targets_min": 637 }, { "epoch": 2.2972972972972974, "grad_norm": 0.7732211800284947, "learning_rate": 3.105337562306968e-05, "loss": 0.1872, "loss_nan_ranks": 0, "loss_rank_avg": 0.191191628575325, "step": 935, "valid_targets_mean": 1597.8, "valid_targets_min": 939 }, { "epoch": 2.30958230958231, "grad_norm": 0.8005078939965097, "learning_rate": 3.0933922231693854e-05, "loss": 0.1797, "loss_nan_ranks": 0, "loss_rank_avg": 0.1898137778043747, "step": 940, "valid_targets_mean": 1381.9, "valid_targets_min": 768 }, { "epoch": 2.321867321867322, "grad_norm": 0.9221082841518858, "learning_rate": 3.08139099150152e-05, "loss": 0.1886, "loss_nan_ranks": 0, "loss_rank_avg": 0.1998477727174759, "step": 945, "valid_targets_mean": 1377.6, "valid_targets_min": 902 }, { "epoch": 2.3341523341523343, "grad_norm": 0.7707479063679702, "learning_rate": 3.069334480787893e-05, "loss": 0.1808, "loss_nan_ranks": 0, "loss_rank_avg": 0.17489567399024963, "step": 950, "valid_targets_mean": 1442.1, "valid_targets_min": 564 }, { "epoch": 2.3464373464373462, "grad_norm": 0.8256834025961682, "learning_rate": 3.057223307338806e-05, "loss": 0.1837, "loss_nan_ranks": 0, "loss_rank_avg": 0.18947067856788635, "step": 955, "valid_targets_mean": 1536.8, "valid_targets_min": 823 }, { "epoch": 2.3587223587223587, "grad_norm": 0.8531224678622473, "learning_rate": 3.0450580902588346e-05, "loss": 0.1773, "loss_nan_ranks": 0, "loss_rank_avg": 0.1829451620578766, "step": 960, "valid_targets_mean": 1313.1, "valid_targets_min": 777 }, { "epoch": 2.371007371007371, "grad_norm": 0.8371259257645897, "learning_rate": 3.032839451415182e-05, "loss": 0.1835, "loss_nan_ranks": 0, "loss_rank_avg": 0.17725329101085663, "step": 965, "valid_targets_mean": 1203.5, "valid_targets_min": 457 }, { "epoch": 2.383292383292383, "grad_norm": 0.717363860123874, "learning_rate": 3.0205680154058904e-05, "loss": 0.179, "loss_nan_ranks": 0, "loss_rank_avg": 0.1754624843597412, "step": 970, "valid_targets_mean": 1550.9, "valid_targets_min": 697 }, { "epoch": 2.3955773955773956, "grad_norm": 0.8214153230293283, "learning_rate": 3.0082444095279117e-05, "loss": 0.1802, "loss_nan_ranks": 0, "loss_rank_avg": 0.20176392793655396, "step": 975, "valid_targets_mean": 1422.4, "valid_targets_min": 708 }, { "epoch": 2.407862407862408, "grad_norm": 0.8026368482781924, "learning_rate": 2.9958692637450406e-05, "loss": 0.1767, "loss_nan_ranks": 0, "loss_rank_avg": 0.16252771019935608, "step": 980, "valid_targets_mean": 1248.3, "valid_targets_min": 643 }, { "epoch": 2.42014742014742, "grad_norm": 0.8815336408634351, "learning_rate": 2.983443210655714e-05, "loss": 0.1752, "loss_nan_ranks": 0, "loss_rank_avg": 0.17958936095237732, "step": 985, "valid_targets_mean": 1289.1, "valid_targets_min": 682 }, { "epoch": 2.4324324324324325, "grad_norm": 0.7836836708413403, "learning_rate": 2.9709668854606706e-05, "loss": 0.1896, "loss_nan_ranks": 0, "loss_rank_avg": 0.1911127269268036, "step": 990, "valid_targets_mean": 1440.7, "valid_targets_min": 614 }, { "epoch": 2.444717444717445, "grad_norm": 0.7554743565043565, "learning_rate": 2.9584409259304828e-05, "loss": 0.1845, "loss_nan_ranks": 0, "loss_rank_avg": 0.1768302172422409, "step": 995, "valid_targets_mean": 1373.0, "valid_targets_min": 770 }, { "epoch": 2.457002457002457, "grad_norm": 0.7711572760011336, "learning_rate": 2.945865972372954e-05, "loss": 0.1806, "loss_nan_ranks": 0, "loss_rank_avg": 0.1825973093509674, "step": 1000, "valid_targets_mean": 1467.0, "valid_targets_min": 958 }, { "epoch": 2.4692874692874693, "grad_norm": 0.7763115308115468, "learning_rate": 2.9332426676003858e-05, "loss": 0.1843, "loss_nan_ranks": 0, "loss_rank_avg": 0.18232324719429016, "step": 1005, "valid_targets_mean": 1405.0, "valid_targets_min": 802 }, { "epoch": 2.4815724815724813, "grad_norm": 0.8101073760100002, "learning_rate": 2.920571656896722e-05, "loss": 0.179, "loss_nan_ranks": 0, "loss_rank_avg": 0.17708787322044373, "step": 1010, "valid_targets_mean": 1309.1, "valid_targets_min": 782 }, { "epoch": 2.493857493857494, "grad_norm": 0.7835475920518248, "learning_rate": 2.907853587984558e-05, "loss": 0.182, "loss_nan_ranks": 0, "loss_rank_avg": 0.18842971324920654, "step": 1015, "valid_targets_mean": 1459.9, "valid_targets_min": 685 }, { "epoch": 2.506142506142506, "grad_norm": 0.7907834667766721, "learning_rate": 2.8950891109920333e-05, "loss": 0.1807, "loss_nan_ranks": 0, "loss_rank_avg": 0.18541795015335083, "step": 1020, "valid_targets_mean": 1363.8, "valid_targets_min": 770 }, { "epoch": 2.5184275184275187, "grad_norm": 0.7806222134145253, "learning_rate": 2.882278878419597e-05, "loss": 0.1765, "loss_nan_ranks": 0, "loss_rank_avg": 0.17934384942054749, "step": 1025, "valid_targets_mean": 1293.1, "valid_targets_min": 579 }, { "epoch": 2.5307125307125307, "grad_norm": 0.7736823415875187, "learning_rate": 2.8694235451066538e-05, "loss": 0.1808, "loss_nan_ranks": 0, "loss_rank_avg": 0.18719598650932312, "step": 1030, "valid_targets_mean": 1573.0, "valid_targets_min": 856 }, { "epoch": 2.542997542997543, "grad_norm": 0.7531581920130256, "learning_rate": 2.8565237681980876e-05, "loss": 0.1743, "loss_nan_ranks": 0, "loss_rank_avg": 0.1793087124824524, "step": 1035, "valid_targets_mean": 1425.1, "valid_targets_min": 836 }, { "epoch": 2.555282555282555, "grad_norm": 0.7399212256403678, "learning_rate": 2.843580207110672e-05, "loss": 0.1734, "loss_nan_ranks": 0, "loss_rank_avg": 0.16769655048847198, "step": 1040, "valid_targets_mean": 1410.6, "valid_targets_min": 736 }, { "epoch": 2.5675675675675675, "grad_norm": 0.741628811394016, "learning_rate": 2.830593523499361e-05, "loss": 0.1815, "loss_nan_ranks": 0, "loss_rank_avg": 0.18228626251220703, "step": 1045, "valid_targets_mean": 1439.1, "valid_targets_min": 771 }, { "epoch": 2.57985257985258, "grad_norm": 0.7969152441113768, "learning_rate": 2.8175643812234627e-05, "loss": 0.1798, "loss_nan_ranks": 0, "loss_rank_avg": 0.17840053141117096, "step": 1050, "valid_targets_mean": 1492.2, "valid_targets_min": 826 }, { "epoch": 2.592137592137592, "grad_norm": 0.8367342853086918, "learning_rate": 2.8044934463127108e-05, "loss": 0.1815, "loss_nan_ranks": 0, "loss_rank_avg": 0.18805266916751862, "step": 1055, "valid_targets_mean": 1261.8, "valid_targets_min": 759 }, { "epoch": 2.6044226044226044, "grad_norm": 0.7677291227603371, "learning_rate": 2.7913813869332112e-05, "loss": 0.1787, "loss_nan_ranks": 0, "loss_rank_avg": 0.17599162459373474, "step": 1060, "valid_targets_mean": 1296.4, "valid_targets_min": 654 }, { "epoch": 2.616707616707617, "grad_norm": 0.7992239103679709, "learning_rate": 2.7782288733532915e-05, "loss": 0.1795, "loss_nan_ranks": 0, "loss_rank_avg": 0.17146100103855133, "step": 1065, "valid_targets_mean": 1234.5, "valid_targets_min": 671 }, { "epoch": 2.628992628992629, "grad_norm": 0.8304316096602254, "learning_rate": 2.7650365779092346e-05, "loss": 0.177, "loss_nan_ranks": 0, "loss_rank_avg": 0.17182031273841858, "step": 1070, "valid_targets_mean": 1255.4, "valid_targets_min": 632 }, { "epoch": 2.6412776412776413, "grad_norm": 0.805604710666297, "learning_rate": 2.751805174970912e-05, "loss": 0.1825, "loss_nan_ranks": 0, "loss_rank_avg": 0.19578979909420013, "step": 1075, "valid_targets_mean": 1437.3, "valid_targets_min": 724 }, { "epoch": 2.6535626535626538, "grad_norm": 0.7937637333321657, "learning_rate": 2.7385353409073093e-05, "loss": 0.183, "loss_nan_ranks": 0, "loss_rank_avg": 0.18653550744056702, "step": 1080, "valid_targets_mean": 1224.4, "valid_targets_min": 626 }, { "epoch": 2.6658476658476657, "grad_norm": 0.8392997092676029, "learning_rate": 2.725227754051953e-05, "loss": 0.182, "loss_nan_ranks": 0, "loss_rank_avg": 0.18633800745010376, "step": 1085, "valid_targets_mean": 1351.3, "valid_targets_min": 711 }, { "epoch": 2.678132678132678, "grad_norm": 0.82826418253686, "learning_rate": 2.711883094668234e-05, "loss": 0.1772, "loss_nan_ranks": 0, "loss_rank_avg": 0.16905131936073303, "step": 1090, "valid_targets_mean": 1228.1, "valid_targets_min": 571 }, { "epoch": 2.69041769041769, "grad_norm": 0.7749583275108434, "learning_rate": 2.698502044914633e-05, "loss": 0.1735, "loss_nan_ranks": 0, "loss_rank_avg": 0.18027430772781372, "step": 1095, "valid_targets_mean": 1298.4, "valid_targets_min": 814 }, { "epoch": 2.7027027027027026, "grad_norm": 0.8377694942716309, "learning_rate": 2.685085288809853e-05, "loss": 0.1817, "loss_nan_ranks": 0, "loss_rank_avg": 0.18739664554595947, "step": 1100, "valid_targets_mean": 1326.9, "valid_targets_min": 763 }, { "epoch": 2.714987714987715, "grad_norm": 0.7605797734549805, "learning_rate": 2.671633512197848e-05, "loss": 0.1831, "loss_nan_ranks": 0, "loss_rank_avg": 0.1845276653766632, "step": 1105, "valid_targets_mean": 1511.4, "valid_targets_min": 1088 }, { "epoch": 2.7272727272727275, "grad_norm": 0.734192726408221, "learning_rate": 2.658147402712768e-05, "loss": 0.1798, "loss_nan_ranks": 0, "loss_rank_avg": 0.1821923404932022, "step": 1110, "valid_targets_mean": 1476.5, "valid_targets_min": 968 }, { "epoch": 2.7395577395577395, "grad_norm": 0.933923702321532, "learning_rate": 2.6446276497438064e-05, "loss": 0.1772, "loss_nan_ranks": 0, "loss_rank_avg": 0.1841639280319214, "step": 1115, "valid_targets_mean": 1164.2, "valid_targets_min": 700 }, { "epoch": 2.751842751842752, "grad_norm": 0.8577205410075446, "learning_rate": 2.6310749443999593e-05, "loss": 0.1853, "loss_nan_ranks": 0, "loss_rank_avg": 0.19331184029579163, "step": 1120, "valid_targets_mean": 1148.2, "valid_targets_min": 782 }, { "epoch": 2.764127764127764, "grad_norm": 0.8240964794783217, "learning_rate": 2.617489979474699e-05, "loss": 0.181, "loss_nan_ranks": 0, "loss_rank_avg": 0.1863320767879486, "step": 1125, "valid_targets_mean": 1221.2, "valid_targets_min": 620 }, { "epoch": 2.7764127764127764, "grad_norm": 0.6884072653384222, "learning_rate": 2.6038734494105562e-05, "loss": 0.1832, "loss_nan_ranks": 0, "loss_rank_avg": 0.19115372002124786, "step": 1130, "valid_targets_mean": 1645.6, "valid_targets_min": 865 }, { "epoch": 2.788697788697789, "grad_norm": 0.8365132438504224, "learning_rate": 2.590226050263625e-05, "loss": 0.1738, "loss_nan_ranks": 0, "loss_rank_avg": 0.17121455073356628, "step": 1135, "valid_targets_mean": 1232.6, "valid_targets_min": 710 }, { "epoch": 2.800982800982801, "grad_norm": 0.8932011031329712, "learning_rate": 2.5765484796679768e-05, "loss": 0.1766, "loss_nan_ranks": 0, "loss_rank_avg": 0.1942731738090515, "step": 1140, "valid_targets_mean": 1251.2, "valid_targets_min": 674 }, { "epoch": 2.8132678132678133, "grad_norm": 0.8005242041150714, "learning_rate": 2.5628414368000035e-05, "loss": 0.1836, "loss_nan_ranks": 0, "loss_rank_avg": 0.1775723397731781, "step": 1145, "valid_targets_mean": 1480.1, "valid_targets_min": 784 }, { "epoch": 2.8255528255528253, "grad_norm": 0.7503467618496086, "learning_rate": 2.5491056223426746e-05, "loss": 0.1848, "loss_nan_ranks": 0, "loss_rank_avg": 0.18719075620174408, "step": 1150, "valid_targets_mean": 1456.2, "valid_targets_min": 873 }, { "epoch": 2.8378378378378377, "grad_norm": 0.8501440933671442, "learning_rate": 2.5353417384497166e-05, "loss": 0.1798, "loss_nan_ranks": 0, "loss_rank_avg": 0.1862514168024063, "step": 1155, "valid_targets_mean": 1129.2, "valid_targets_min": 631 }, { "epoch": 2.85012285012285, "grad_norm": 0.8153346046411876, "learning_rate": 2.5215504887097243e-05, "loss": 0.183, "loss_nan_ranks": 0, "loss_rank_avg": 0.20934267342090607, "step": 1160, "valid_targets_mean": 1318.6, "valid_targets_min": 683 }, { "epoch": 2.8624078624078626, "grad_norm": 0.7763002556957465, "learning_rate": 2.5077325781101918e-05, "loss": 0.183, "loss_nan_ranks": 0, "loss_rank_avg": 0.17470885813236237, "step": 1165, "valid_targets_mean": 1339.2, "valid_targets_min": 582 }, { "epoch": 2.8746928746928746, "grad_norm": 0.8016048702566863, "learning_rate": 2.493888713001476e-05, "loss": 0.1793, "loss_nan_ranks": 0, "loss_rank_avg": 0.18066394329071045, "step": 1170, "valid_targets_mean": 1284.4, "valid_targets_min": 691 }, { "epoch": 2.886977886977887, "grad_norm": 0.7717525435471911, "learning_rate": 2.480019601060687e-05, "loss": 0.1789, "loss_nan_ranks": 0, "loss_rank_avg": 0.1963619589805603, "step": 1175, "valid_targets_mean": 1382.4, "valid_targets_min": 830 }, { "epoch": 2.899262899262899, "grad_norm": 0.7784504210078957, "learning_rate": 2.4661259512555176e-05, "loss": 0.1811, "loss_nan_ranks": 0, "loss_rank_avg": 0.18142808973789215, "step": 1180, "valid_targets_mean": 1400.8, "valid_targets_min": 1080 }, { "epoch": 2.9115479115479115, "grad_norm": 0.7978518183157296, "learning_rate": 2.4522084738079933e-05, "loss": 0.1784, "loss_nan_ranks": 0, "loss_rank_avg": 0.1911832094192505, "step": 1185, "valid_targets_mean": 1342.9, "valid_targets_min": 773 }, { "epoch": 2.923832923832924, "grad_norm": 0.7773097305802383, "learning_rate": 2.4382678801581762e-05, "loss": 0.1757, "loss_nan_ranks": 0, "loss_rank_avg": 0.16415411233901978, "step": 1190, "valid_targets_mean": 1346.6, "valid_targets_min": 633 }, { "epoch": 2.9361179361179364, "grad_norm": 0.7149177870829416, "learning_rate": 2.4243048829277916e-05, "loss": 0.1846, "loss_nan_ranks": 0, "loss_rank_avg": 0.16982388496398926, "step": 1195, "valid_targets_mean": 1398.8, "valid_targets_min": 615 }, { "epoch": 2.9484029484029484, "grad_norm": 0.8133991765071973, "learning_rate": 2.410320195883802e-05, "loss": 0.1733, "loss_nan_ranks": 0, "loss_rank_avg": 0.1808069348335266, "step": 1200, "valid_targets_mean": 1261.6, "valid_targets_min": 576 }, { "epoch": 2.960687960687961, "grad_norm": 0.8035380696411945, "learning_rate": 2.396314533901918e-05, "loss": 0.1799, "loss_nan_ranks": 0, "loss_rank_avg": 0.17814520001411438, "step": 1205, "valid_targets_mean": 1265.3, "valid_targets_min": 605 }, { "epoch": 2.972972972972973, "grad_norm": 0.8103346517230893, "learning_rate": 2.3822886129300603e-05, "loss": 0.1814, "loss_nan_ranks": 0, "loss_rank_avg": 0.18759378790855408, "step": 1210, "valid_targets_mean": 1393.0, "valid_targets_min": 898 }, { "epoch": 2.9852579852579852, "grad_norm": 0.9427441660740232, "learning_rate": 2.368243149951755e-05, "loss": 0.1833, "loss_nan_ranks": 0, "loss_rank_avg": 0.1856885552406311, "step": 1215, "valid_targets_mean": 1184.1, "valid_targets_min": 623 }, { "epoch": 2.9975429975429977, "grad_norm": 0.740828622122419, "learning_rate": 2.3541788629494865e-05, "loss": 0.1822, "loss_nan_ranks": 0, "loss_rank_avg": 0.17210790514945984, "step": 1220, "valid_targets_mean": 1348.9, "valid_targets_min": 635 }, { "epoch": 3.0098280098280097, "grad_norm": 0.7164578724146792, "learning_rate": 2.3400964708679944e-05, "loss": 0.1549, "loss_nan_ranks": 0, "loss_rank_avg": 0.13863927125930786, "step": 1225, "valid_targets_mean": 1282.5, "valid_targets_min": 651 }, { "epoch": 3.022113022113022, "grad_norm": 0.8312096014116472, "learning_rate": 2.325996693577522e-05, "loss": 0.159, "loss_nan_ranks": 0, "loss_rank_avg": 0.15467259287834167, "step": 1230, "valid_targets_mean": 1422.1, "valid_targets_min": 799 }, { "epoch": 3.0343980343980346, "grad_norm": 0.8375454565582668, "learning_rate": 2.311880251837019e-05, "loss": 0.1522, "loss_nan_ranks": 0, "loss_rank_avg": 0.15046252310276031, "step": 1235, "valid_targets_mean": 1260.6, "valid_targets_min": 696 }, { "epoch": 3.0466830466830466, "grad_norm": 0.8180945627432928, "learning_rate": 2.2977478672572933e-05, "loss": 0.154, "loss_nan_ranks": 0, "loss_rank_avg": 0.15951278805732727, "step": 1240, "valid_targets_mean": 1366.7, "valid_targets_min": 666 }, { "epoch": 3.058968058968059, "grad_norm": 1.4817440168716653, "learning_rate": 2.2836002622641297e-05, "loss": 0.1502, "loss_nan_ranks": 0, "loss_rank_avg": 0.16168934106826782, "step": 1245, "valid_targets_mean": 1357.0, "valid_targets_min": 695 }, { "epoch": 3.0712530712530715, "grad_norm": 0.789782000868707, "learning_rate": 2.269438160061354e-05, "loss": 0.1554, "loss_nan_ranks": 0, "loss_rank_avg": 0.14563557505607605, "step": 1250, "valid_targets_mean": 1295.6, "valid_targets_min": 810 }, { "epoch": 3.0835380835380835, "grad_norm": 0.7958342463964752, "learning_rate": 2.2552622845938698e-05, "loss": 0.1565, "loss_nan_ranks": 0, "loss_rank_avg": 0.16075897216796875, "step": 1255, "valid_targets_mean": 1529.2, "valid_targets_min": 857 }, { "epoch": 3.095823095823096, "grad_norm": 0.8394291652788792, "learning_rate": 2.2410733605106462e-05, "loss": 0.1581, "loss_nan_ranks": 0, "loss_rank_avg": 0.16368389129638672, "step": 1260, "valid_targets_mean": 1204.4, "valid_targets_min": 604 }, { "epoch": 3.108108108108108, "grad_norm": 0.8183277107345958, "learning_rate": 2.2268721131276805e-05, "loss": 0.1512, "loss_nan_ranks": 0, "loss_rank_avg": 0.1558169722557068, "step": 1265, "valid_targets_mean": 1444.3, "valid_targets_min": 747 }, { "epoch": 3.1203931203931203, "grad_norm": 0.8691016138218304, "learning_rate": 2.2126592683909154e-05, "loss": 0.1501, "loss_nan_ranks": 0, "loss_rank_avg": 0.15810123085975647, "step": 1270, "valid_targets_mean": 1228.3, "valid_targets_min": 614 }, { "epoch": 3.1326781326781328, "grad_norm": 0.7988341801875339, "learning_rate": 2.1984355528391342e-05, "loss": 0.1532, "loss_nan_ranks": 0, "loss_rank_avg": 0.1563870906829834, "step": 1275, "valid_targets_mean": 1397.4, "valid_targets_min": 758 }, { "epoch": 3.1449631449631448, "grad_norm": 0.8981627797019425, "learning_rate": 2.1842016935668188e-05, "loss": 0.1532, "loss_nan_ranks": 0, "loss_rank_avg": 0.172378808259964, "step": 1280, "valid_targets_mean": 1452.6, "valid_targets_min": 746 }, { "epoch": 3.157248157248157, "grad_norm": 0.8274025893882145, "learning_rate": 2.169958418186982e-05, "loss": 0.1624, "loss_nan_ranks": 0, "loss_rank_avg": 0.1557457149028778, "step": 1285, "valid_targets_mean": 1294.4, "valid_targets_min": 738 }, { "epoch": 3.1695331695331697, "grad_norm": 0.8220493363877825, "learning_rate": 2.1557064547939754e-05, "loss": 0.1576, "loss_nan_ranks": 0, "loss_rank_avg": 0.14473125338554382, "step": 1290, "valid_targets_mean": 1266.2, "valid_targets_min": 810 }, { "epoch": 3.1818181818181817, "grad_norm": 0.7360182914990048, "learning_rate": 2.1414465319262666e-05, "loss": 0.1564, "loss_nan_ranks": 0, "loss_rank_avg": 0.14730754494667053, "step": 1295, "valid_targets_mean": 1345.4, "valid_targets_min": 681 }, { "epoch": 3.194103194103194, "grad_norm": 0.761094766359276, "learning_rate": 2.1271793785291997e-05, "loss": 0.151, "loss_nan_ranks": 0, "loss_rank_avg": 0.1422240436077118, "step": 1300, "valid_targets_mean": 1342.6, "valid_targets_min": 602 }, { "epoch": 3.2063882063882065, "grad_norm": 0.8176967843846148, "learning_rate": 2.1129057239177337e-05, "loss": 0.1569, "loss_nan_ranks": 0, "loss_rank_avg": 0.1573348492383957, "step": 1305, "valid_targets_mean": 1261.3, "valid_targets_min": 748 }, { "epoch": 3.2186732186732185, "grad_norm": 0.8262216828068331, "learning_rate": 2.0986262977391577e-05, "loss": 0.154, "loss_nan_ranks": 0, "loss_rank_avg": 0.15253204107284546, "step": 1310, "valid_targets_mean": 1228.0, "valid_targets_min": 728 }, { "epoch": 3.230958230958231, "grad_norm": 0.8436083601387745, "learning_rate": 2.084341829935796e-05, "loss": 0.1537, "loss_nan_ranks": 0, "loss_rank_avg": 0.16099226474761963, "step": 1315, "valid_targets_mean": 1279.6, "valid_targets_min": 664 }, { "epoch": 3.2432432432432434, "grad_norm": 0.8107364765343557, "learning_rate": 2.0700530507076916e-05, "loss": 0.1469, "loss_nan_ranks": 0, "loss_rank_avg": 0.14447104930877686, "step": 1320, "valid_targets_mean": 1242.2, "valid_targets_min": 807 }, { "epoch": 3.2555282555282554, "grad_norm": 0.8823201088197631, "learning_rate": 2.0557606904752833e-05, "loss": 0.1535, "loss_nan_ranks": 0, "loss_rank_avg": 0.15442629158496857, "step": 1325, "valid_targets_mean": 1201.7, "valid_targets_min": 474 }, { "epoch": 3.267813267813268, "grad_norm": 0.8163225673327624, "learning_rate": 2.0414654798420622e-05, "loss": 0.145, "loss_nan_ranks": 0, "loss_rank_avg": 0.15767508745193481, "step": 1330, "valid_targets_mean": 1397.9, "valid_targets_min": 772 }, { "epoch": 3.2800982800982803, "grad_norm": 0.811563363023717, "learning_rate": 2.02716814955723e-05, "loss": 0.1551, "loss_nan_ranks": 0, "loss_rank_avg": 0.14335688948631287, "step": 1335, "valid_targets_mean": 1254.7, "valid_targets_min": 597 }, { "epoch": 3.2923832923832923, "grad_norm": 0.8679959811700956, "learning_rate": 2.0128694304783406e-05, "loss": 0.1568, "loss_nan_ranks": 0, "loss_rank_avg": 0.15623366832733154, "step": 1340, "valid_targets_mean": 1235.6, "valid_targets_min": 730 }, { "epoch": 3.3046683046683047, "grad_norm": 0.8311214799635258, "learning_rate": 1.9985700535339406e-05, "loss": 0.1591, "loss_nan_ranks": 0, "loss_rank_avg": 0.15222451090812683, "step": 1345, "valid_targets_mean": 1168.9, "valid_targets_min": 845 }, { "epoch": 3.3169533169533167, "grad_norm": 1.5033255139159805, "learning_rate": 1.984270749686207e-05, "loss": 0.155, "loss_nan_ranks": 0, "loss_rank_avg": 0.13309861719608307, "step": 1350, "valid_targets_mean": 1288.8, "valid_targets_min": 536 }, { "epoch": 3.329238329238329, "grad_norm": 0.845526593227459, "learning_rate": 1.9699722498935786e-05, "loss": 0.1545, "loss_nan_ranks": 0, "loss_rank_avg": 0.1539156436920166, "step": 1355, "valid_targets_mean": 1236.1, "valid_targets_min": 563 }, { "epoch": 3.3415233415233416, "grad_norm": 0.7782983419960925, "learning_rate": 1.9556752850733933e-05, "loss": 0.1525, "loss_nan_ranks": 0, "loss_rank_avg": 0.14023908972740173, "step": 1360, "valid_targets_mean": 1311.5, "valid_targets_min": 706 }, { "epoch": 3.3538083538083536, "grad_norm": 0.9209403431550032, "learning_rate": 1.9413805860645242e-05, "loss": 0.1619, "loss_nan_ranks": 0, "loss_rank_avg": 0.17440494894981384, "step": 1365, "valid_targets_mean": 1309.7, "valid_targets_min": 692 }, { "epoch": 3.366093366093366, "grad_norm": 0.9139513931631872, "learning_rate": 1.9270888835900165e-05, "loss": 0.1618, "loss_nan_ranks": 0, "loss_rank_avg": 0.16123470664024353, "step": 1370, "valid_targets_mean": 1285.9, "valid_targets_min": 700 }, { "epoch": 3.3783783783783785, "grad_norm": 0.896363024278394, "learning_rate": 1.9128009082197417e-05, "loss": 0.1507, "loss_nan_ranks": 0, "loss_rank_avg": 0.14758946001529694, "step": 1375, "valid_targets_mean": 1197.5, "valid_targets_min": 685 }, { "epoch": 3.3906633906633905, "grad_norm": 0.7857845304860243, "learning_rate": 1.8985173903330428e-05, "loss": 0.1538, "loss_nan_ranks": 0, "loss_rank_avg": 0.15281282365322113, "step": 1380, "valid_targets_mean": 1328.5, "valid_targets_min": 494 }, { "epoch": 3.402948402948403, "grad_norm": 0.7926903268037055, "learning_rate": 1.884239060081407e-05, "loss": 0.1454, "loss_nan_ranks": 0, "loss_rank_avg": 0.14091375470161438, "step": 1385, "valid_targets_mean": 1239.3, "valid_targets_min": 808 }, { "epoch": 3.4152334152334154, "grad_norm": 0.8981590353433372, "learning_rate": 1.869966647351135e-05, "loss": 0.1553, "loss_nan_ranks": 0, "loss_rank_avg": 0.15038526058197021, "step": 1390, "valid_targets_mean": 1271.1, "valid_targets_min": 667 }, { "epoch": 3.4275184275184274, "grad_norm": 0.781660273156141, "learning_rate": 1.8557008817260343e-05, "loss": 0.156, "loss_nan_ranks": 0, "loss_rank_avg": 0.14509668946266174, "step": 1395, "valid_targets_mean": 1379.3, "valid_targets_min": 871 }, { "epoch": 3.43980343980344, "grad_norm": 0.8026197207302569, "learning_rate": 1.8414424924501222e-05, "loss": 0.1615, "loss_nan_ranks": 0, "loss_rank_avg": 0.14742882549762726, "step": 1400, "valid_targets_mean": 1413.8, "valid_targets_min": 681 }, { "epoch": 3.4520884520884523, "grad_norm": 0.8123747210830401, "learning_rate": 1.827192208390347e-05, "loss": 0.1488, "loss_nan_ranks": 0, "loss_rank_avg": 0.14872382581233978, "step": 1405, "valid_targets_mean": 1303.1, "valid_targets_min": 532 }, { "epoch": 3.4643734643734643, "grad_norm": 0.8341496291476699, "learning_rate": 1.812950757999334e-05, "loss": 0.1573, "loss_nan_ranks": 0, "loss_rank_avg": 0.1435258984565735, "step": 1410, "valid_targets_mean": 1249.4, "valid_targets_min": 516 }, { "epoch": 3.4766584766584767, "grad_norm": 0.8201503217545428, "learning_rate": 1.7987188692781417e-05, "loss": 0.1506, "loss_nan_ranks": 0, "loss_rank_avg": 0.15042570233345032, "step": 1415, "valid_targets_mean": 1345.8, "valid_targets_min": 828 }, { "epoch": 3.488943488943489, "grad_norm": 0.7853127186556507, "learning_rate": 1.784497269739052e-05, "loss": 0.1546, "loss_nan_ranks": 0, "loss_rank_avg": 0.14669057726860046, "step": 1420, "valid_targets_mean": 1384.6, "valid_targets_min": 477 }, { "epoch": 3.501228501228501, "grad_norm": 0.8842882650050354, "learning_rate": 1.770286686368381e-05, "loss": 0.1527, "loss_nan_ranks": 0, "loss_rank_avg": 0.15415064990520477, "step": 1425, "valid_targets_mean": 1237.2, "valid_targets_min": 755 }, { "epoch": 3.5135135135135136, "grad_norm": 0.813011039927028, "learning_rate": 1.756087845589312e-05, "loss": 0.1573, "loss_nan_ranks": 0, "loss_rank_avg": 0.14477777481079102, "step": 1430, "valid_targets_mean": 1236.9, "valid_targets_min": 599 }, { "epoch": 3.5257985257985256, "grad_norm": 0.7872126972705287, "learning_rate": 1.7419014732247683e-05, "loss": 0.1584, "loss_nan_ranks": 0, "loss_rank_avg": 0.16638851165771484, "step": 1435, "valid_targets_mean": 1305.9, "valid_targets_min": 736 }, { "epoch": 3.538083538083538, "grad_norm": 0.858779956828706, "learning_rate": 1.7277282944603047e-05, "loss": 0.1621, "loss_nan_ranks": 0, "loss_rank_avg": 0.16495858132839203, "step": 1440, "valid_targets_mean": 1440.2, "valid_targets_min": 700 }, { "epoch": 3.5503685503685505, "grad_norm": 0.8728820790914598, "learning_rate": 1.713569033807041e-05, "loss": 0.1538, "loss_nan_ranks": 0, "loss_rank_avg": 0.1443580836057663, "step": 1445, "valid_targets_mean": 1183.0, "valid_targets_min": 516 }, { "epoch": 3.562653562653563, "grad_norm": 0.6901953229711102, "learning_rate": 1.6994244150646244e-05, "loss": 0.1491, "loss_nan_ranks": 0, "loss_rank_avg": 0.135453999042511, "step": 1450, "valid_targets_mean": 1456.6, "valid_targets_min": 618 }, { "epoch": 3.574938574938575, "grad_norm": 0.7882552220256581, "learning_rate": 1.6852951612842278e-05, "loss": 0.1543, "loss_nan_ranks": 0, "loss_rank_avg": 0.15203431248664856, "step": 1455, "valid_targets_mean": 1427.3, "valid_targets_min": 922 }, { "epoch": 3.5872235872235874, "grad_norm": 0.802201807828957, "learning_rate": 1.671181994731595e-05, "loss": 0.1535, "loss_nan_ranks": 0, "loss_rank_avg": 0.14671474695205688, "step": 1460, "valid_targets_mean": 1319.3, "valid_targets_min": 755 }, { "epoch": 3.5995085995085994, "grad_norm": 0.855708476401229, "learning_rate": 1.6570856368501108e-05, "loss": 0.16, "loss_nan_ranks": 0, "loss_rank_avg": 0.1802348643541336, "step": 1465, "valid_targets_mean": 1353.6, "valid_targets_min": 640 }, { "epoch": 3.611793611793612, "grad_norm": 0.8013086713913176, "learning_rate": 1.643006808223931e-05, "loss": 0.1513, "loss_nan_ranks": 0, "loss_rank_avg": 0.14372509717941284, "step": 1470, "valid_targets_mean": 1331.5, "valid_targets_min": 576 }, { "epoch": 3.6240786240786242, "grad_norm": 0.8833598909227577, "learning_rate": 1.6289462285411387e-05, "loss": 0.151, "loss_nan_ranks": 0, "loss_rank_avg": 0.1556214988231659, "step": 1475, "valid_targets_mean": 1267.2, "valid_targets_min": 754 }, { "epoch": 3.6363636363636362, "grad_norm": 0.8363554061363515, "learning_rate": 1.614904616556962e-05, "loss": 0.1453, "loss_nan_ranks": 0, "loss_rank_avg": 0.144898921251297, "step": 1480, "valid_targets_mean": 1380.3, "valid_targets_min": 684 }, { "epoch": 3.6486486486486487, "grad_norm": 0.8598319240396789, "learning_rate": 1.6008826900570294e-05, "loss": 0.1513, "loss_nan_ranks": 0, "loss_rank_avg": 0.14435847103595734, "step": 1485, "valid_targets_mean": 1139.2, "valid_targets_min": 699 }, { "epoch": 3.6609336609336607, "grad_norm": 0.8495838229620162, "learning_rate": 1.586881165820675e-05, "loss": 0.1553, "loss_nan_ranks": 0, "loss_rank_avg": 0.15599597990512848, "step": 1490, "valid_targets_mean": 1231.2, "valid_targets_min": 621 }, { "epoch": 3.673218673218673, "grad_norm": 0.7933104198530476, "learning_rate": 1.5729007595843037e-05, "loss": 0.1527, "loss_nan_ranks": 0, "loss_rank_avg": 0.1428956240415573, "step": 1495, "valid_targets_mean": 1268.3, "valid_targets_min": 776 }, { "epoch": 3.6855036855036856, "grad_norm": 0.9253518944844057, "learning_rate": 1.5589421860047986e-05, "loss": 0.1556, "loss_nan_ranks": 0, "loss_rank_avg": 0.15546691417694092, "step": 1500, "valid_targets_mean": 1399.1, "valid_targets_min": 868 }, { "epoch": 3.697788697788698, "grad_norm": 0.8710193566221253, "learning_rate": 1.5450061586229903e-05, "loss": 0.1572, "loss_nan_ranks": 0, "loss_rank_avg": 0.15553036332130432, "step": 1505, "valid_targets_mean": 1175.8, "valid_targets_min": 679 }, { "epoch": 3.71007371007371, "grad_norm": 0.8088094494880697, "learning_rate": 1.5310933898271864e-05, "loss": 0.1526, "loss_nan_ranks": 0, "loss_rank_avg": 0.15640608966350555, "step": 1510, "valid_targets_mean": 1268.0, "valid_targets_min": 627 }, { "epoch": 3.7223587223587224, "grad_norm": 0.927057394855993, "learning_rate": 1.5172045908167462e-05, "loss": 0.1498, "loss_nan_ranks": 0, "loss_rank_avg": 0.15059244632720947, "step": 1515, "valid_targets_mean": 1296.2, "valid_targets_min": 619 }, { "epoch": 3.7346437346437344, "grad_norm": 0.8799125344635643, "learning_rate": 1.5033404715657344e-05, "loss": 0.1527, "loss_nan_ranks": 0, "loss_rank_avg": 0.14563912153244019, "step": 1520, "valid_targets_mean": 1261.3, "valid_targets_min": 639 }, { "epoch": 3.746928746928747, "grad_norm": 0.9103297618631646, "learning_rate": 1.4895017407866217e-05, "loss": 0.158, "loss_nan_ranks": 0, "loss_rank_avg": 0.155216783285141, "step": 1525, "valid_targets_mean": 1103.7, "valid_targets_min": 578 }, { "epoch": 3.7592137592137593, "grad_norm": 0.7687412095426766, "learning_rate": 1.4756891058940606e-05, "loss": 0.1576, "loss_nan_ranks": 0, "loss_rank_avg": 0.16270983219146729, "step": 1530, "valid_targets_mean": 1470.4, "valid_targets_min": 631 }, { "epoch": 3.7714987714987718, "grad_norm": 0.9064979122055239, "learning_rate": 1.4619032729687223e-05, "loss": 0.1525, "loss_nan_ranks": 0, "loss_rank_avg": 0.15882454812526703, "step": 1535, "valid_targets_mean": 1394.7, "valid_targets_min": 703 }, { "epoch": 3.7837837837837838, "grad_norm": 0.8424145043338477, "learning_rate": 1.4481449467212004e-05, "loss": 0.1506, "loss_nan_ranks": 0, "loss_rank_avg": 0.14801853895187378, "step": 1540, "valid_targets_mean": 1197.9, "valid_targets_min": 661 }, { "epoch": 3.796068796068796, "grad_norm": 0.8343369574704005, "learning_rate": 1.4344148304559926e-05, "loss": 0.1525, "loss_nan_ranks": 0, "loss_rank_avg": 0.15639081597328186, "step": 1545, "valid_targets_mean": 1394.4, "valid_targets_min": 930 }, { "epoch": 3.808353808353808, "grad_norm": 0.8129609975131263, "learning_rate": 1.4207136260355426e-05, "loss": 0.1512, "loss_nan_ranks": 0, "loss_rank_avg": 0.15776801109313965, "step": 1550, "valid_targets_mean": 1388.5, "valid_targets_min": 844 }, { "epoch": 3.8206388206388207, "grad_norm": 0.8580808574668879, "learning_rate": 1.4070420338443667e-05, "loss": 0.1569, "loss_nan_ranks": 0, "loss_rank_avg": 0.15302756428718567, "step": 1555, "valid_targets_mean": 1418.8, "valid_targets_min": 652 }, { "epoch": 3.832923832923833, "grad_norm": 0.777430435398723, "learning_rate": 1.3934007527532494e-05, "loss": 0.1559, "loss_nan_ranks": 0, "loss_rank_avg": 0.16227784752845764, "step": 1560, "valid_targets_mean": 1578.3, "valid_targets_min": 1012 }, { "epoch": 3.845208845208845, "grad_norm": 0.793914409365788, "learning_rate": 1.3797904800835174e-05, "loss": 0.1537, "loss_nan_ranks": 0, "loss_rank_avg": 0.14964772760868073, "step": 1565, "valid_targets_mean": 1403.3, "valid_targets_min": 680 }, { "epoch": 3.8574938574938575, "grad_norm": 0.8085824166690417, "learning_rate": 1.3662119115713968e-05, "loss": 0.1547, "loss_nan_ranks": 0, "loss_rank_avg": 0.13584861159324646, "step": 1570, "valid_targets_mean": 1334.8, "valid_targets_min": 946 }, { "epoch": 3.8697788697788695, "grad_norm": 0.8070987527644918, "learning_rate": 1.3526657413324427e-05, "loss": 0.1491, "loss_nan_ranks": 0, "loss_rank_avg": 0.14322111010551453, "step": 1575, "valid_targets_mean": 1394.0, "valid_targets_min": 755 }, { "epoch": 3.882063882063882, "grad_norm": 0.7998968510753872, "learning_rate": 1.3391526618260636e-05, "loss": 0.152, "loss_nan_ranks": 0, "loss_rank_avg": 0.15165430307388306, "step": 1580, "valid_targets_mean": 1313.9, "valid_targets_min": 640 }, { "epoch": 3.8943488943488944, "grad_norm": 0.8639486361401842, "learning_rate": 1.3256733638201172e-05, "loss": 0.1524, "loss_nan_ranks": 0, "loss_rank_avg": 0.16121874749660492, "step": 1585, "valid_targets_mean": 1215.0, "valid_targets_min": 683 }, { "epoch": 3.906633906633907, "grad_norm": 0.8360820023117336, "learning_rate": 1.3122285363556053e-05, "loss": 0.1524, "loss_nan_ranks": 0, "loss_rank_avg": 0.14884233474731445, "step": 1590, "valid_targets_mean": 1404.9, "valid_targets_min": 552 }, { "epoch": 3.918918918918919, "grad_norm": 1.0324227056373336, "learning_rate": 1.2988188667114487e-05, "loss": 0.1532, "loss_nan_ranks": 0, "loss_rank_avg": 0.17249566316604614, "step": 1595, "valid_targets_mean": 1450.9, "valid_targets_min": 749 }, { "epoch": 3.9312039312039313, "grad_norm": 0.7758969682797754, "learning_rate": 1.2854450403693526e-05, "loss": 0.1552, "loss_nan_ranks": 0, "loss_rank_avg": 0.1592610478401184, "step": 1600, "valid_targets_mean": 1363.2, "valid_targets_min": 670 }, { "epoch": 3.9434889434889433, "grad_norm": 0.8109033037431925, "learning_rate": 1.272107740978769e-05, "loss": 0.155, "loss_nan_ranks": 0, "loss_rank_avg": 0.1471540331840515, "step": 1605, "valid_targets_mean": 1326.2, "valid_targets_min": 612 }, { "epoch": 3.9557739557739557, "grad_norm": 0.8004506548550595, "learning_rate": 1.2588076503219475e-05, "loss": 0.1545, "loss_nan_ranks": 0, "loss_rank_avg": 0.15889285504817963, "step": 1610, "valid_targets_mean": 1378.2, "valid_targets_min": 792 }, { "epoch": 3.968058968058968, "grad_norm": 0.7415942779166481, "learning_rate": 1.2455454482790859e-05, "loss": 0.1607, "loss_nan_ranks": 0, "loss_rank_avg": 0.15655523538589478, "step": 1615, "valid_targets_mean": 1420.8, "valid_targets_min": 680 }, { "epoch": 3.98034398034398, "grad_norm": 0.8039429952990631, "learning_rate": 1.2323218127935714e-05, "loss": 0.1508, "loss_nan_ranks": 0, "loss_rank_avg": 0.1585637331008911, "step": 1620, "valid_targets_mean": 1429.4, "valid_targets_min": 706 }, { "epoch": 3.9926289926289926, "grad_norm": 0.8692606501071803, "learning_rate": 1.2191374198373309e-05, "loss": 0.1531, "loss_nan_ranks": 0, "loss_rank_avg": 0.1549098938703537, "step": 1625, "valid_targets_mean": 1184.1, "valid_targets_min": 693 }, { "epoch": 4.004914004914005, "grad_norm": 0.7418231127350439, "learning_rate": 1.2059929433762734e-05, "loss": 0.1511, "loss_nan_ranks": 0, "loss_rank_avg": 0.13540256023406982, "step": 1630, "valid_targets_mean": 1401.8, "valid_targets_min": 710 }, { "epoch": 4.017199017199017, "grad_norm": 0.7491106138112695, "learning_rate": 1.1928890553358352e-05, "loss": 0.136, "loss_nan_ranks": 0, "loss_rank_avg": 0.1356310099363327, "step": 1635, "valid_targets_mean": 1565.2, "valid_targets_min": 987 }, { "epoch": 4.0294840294840295, "grad_norm": 0.8758510629920021, "learning_rate": 1.1798264255666387e-05, "loss": 0.1324, "loss_nan_ranks": 0, "loss_rank_avg": 0.14381960034370422, "step": 1640, "valid_targets_mean": 1300.3, "valid_targets_min": 602 }, { "epoch": 4.041769041769042, "grad_norm": 0.7566832249876295, "learning_rate": 1.1668057218102436e-05, "loss": 0.1289, "loss_nan_ranks": 0, "loss_rank_avg": 0.11850036680698395, "step": 1645, "valid_targets_mean": 1487.4, "valid_targets_min": 597 }, { "epoch": 4.054054054054054, "grad_norm": 0.8861198265110193, "learning_rate": 1.1538276096650175e-05, "loss": 0.1321, "loss_nan_ranks": 0, "loss_rank_avg": 0.13395559787750244, "step": 1650, "valid_targets_mean": 1204.8, "valid_targets_min": 510 }, { "epoch": 4.066339066339066, "grad_norm": 0.7931290609747208, "learning_rate": 1.1408927525521118e-05, "loss": 0.1356, "loss_nan_ranks": 0, "loss_rank_avg": 0.12991078197956085, "step": 1655, "valid_targets_mean": 1414.1, "valid_targets_min": 768 }, { "epoch": 4.078624078624078, "grad_norm": 0.8148588065033624, "learning_rate": 1.1280018116815438e-05, "loss": 0.1312, "loss_nan_ranks": 0, "loss_rank_avg": 0.1274513453245163, "step": 1660, "valid_targets_mean": 1330.9, "valid_targets_min": 675 }, { "epoch": 4.090909090909091, "grad_norm": 0.8651740089430039, "learning_rate": 1.115155446018404e-05, "loss": 0.1309, "loss_nan_ranks": 0, "loss_rank_avg": 0.12686586380004883, "step": 1665, "valid_targets_mean": 1129.1, "valid_targets_min": 618 }, { "epoch": 4.103194103194103, "grad_norm": 1.0152247871489999, "learning_rate": 1.1023543122491626e-05, "loss": 0.1329, "loss_nan_ranks": 0, "loss_rank_avg": 0.14235243201255798, "step": 1670, "valid_targets_mean": 1496.2, "valid_targets_min": 619 }, { "epoch": 4.115479115479116, "grad_norm": 0.8387772219526038, "learning_rate": 1.089599064748108e-05, "loss": 0.1343, "loss_nan_ranks": 0, "loss_rank_avg": 0.12564003467559814, "step": 1675, "valid_targets_mean": 1218.8, "valid_targets_min": 829 }, { "epoch": 4.127764127764128, "grad_norm": 0.8758223941652693, "learning_rate": 1.0768903555438927e-05, "loss": 0.1331, "loss_nan_ranks": 0, "loss_rank_avg": 0.13461729884147644, "step": 1680, "valid_targets_mean": 1221.2, "valid_targets_min": 866 }, { "epoch": 4.14004914004914, "grad_norm": 0.8892247132266647, "learning_rate": 1.0642288342862007e-05, "loss": 0.1296, "loss_nan_ranks": 0, "loss_rank_avg": 0.1310172975063324, "step": 1685, "valid_targets_mean": 1356.1, "valid_targets_min": 821 }, { "epoch": 4.152334152334152, "grad_norm": 0.9050342496671671, "learning_rate": 1.051615148212544e-05, "loss": 0.1327, "loss_nan_ranks": 0, "loss_rank_avg": 0.13743871450424194, "step": 1690, "valid_targets_mean": 1221.8, "valid_targets_min": 673 }, { "epoch": 4.164619164619165, "grad_norm": 0.7939266146430992, "learning_rate": 1.0390499421151706e-05, "loss": 0.1303, "loss_nan_ranks": 0, "loss_rank_avg": 0.1256050169467926, "step": 1695, "valid_targets_mean": 1561.8, "valid_targets_min": 838 }, { "epoch": 4.176904176904177, "grad_norm": 0.8958126329856702, "learning_rate": 1.0265338583081088e-05, "loss": 0.1301, "loss_nan_ranks": 0, "loss_rank_avg": 0.13179880380630493, "step": 1700, "valid_targets_mean": 1211.4, "valid_targets_min": 644 }, { "epoch": 4.1891891891891895, "grad_norm": 0.937480505694556, "learning_rate": 1.0140675365943284e-05, "loss": 0.1331, "loss_nan_ranks": 0, "loss_rank_avg": 0.14077922701835632, "step": 1705, "valid_targets_mean": 1244.7, "valid_targets_min": 740 }, { "epoch": 4.201474201474202, "grad_norm": 0.8649258597004559, "learning_rate": 1.0016516142330404e-05, "loss": 0.1353, "loss_nan_ranks": 0, "loss_rank_avg": 0.13584747910499573, "step": 1710, "valid_targets_mean": 1241.2, "valid_targets_min": 817 }, { "epoch": 4.2137592137592135, "grad_norm": 0.8702025042777511, "learning_rate": 9.89286725907117e-06, "loss": 0.1304, "loss_nan_ranks": 0, "loss_rank_avg": 0.1355808675289154, "step": 1715, "valid_targets_mean": 1278.6, "valid_targets_min": 764 }, { "epoch": 4.226044226044226, "grad_norm": 1.0405293797225805, "learning_rate": 9.769735036906475e-06, "loss": 0.1348, "loss_nan_ranks": 0, "loss_rank_avg": 0.13069278001785278, "step": 1720, "valid_targets_mean": 1170.8, "valid_targets_min": 686 }, { "epoch": 4.238329238329238, "grad_norm": 0.8353293938513687, "learning_rate": 9.647125770166321e-06, "loss": 0.1323, "loss_nan_ranks": 0, "loss_rank_avg": 0.13137099146842957, "step": 1725, "valid_targets_mean": 1290.1, "valid_targets_min": 582 }, { "epoch": 4.250614250614251, "grad_norm": 0.8776999596670313, "learning_rate": 9.525045726448001e-06, "loss": 0.1331, "loss_nan_ranks": 0, "loss_rank_avg": 0.1377831995487213, "step": 1730, "valid_targets_mean": 1338.8, "valid_targets_min": 708 }, { "epoch": 4.262899262899263, "grad_norm": 0.876277681541209, "learning_rate": 9.40350114629577e-06, "loss": 0.1317, "loss_nan_ranks": 0, "loss_rank_avg": 0.13550785183906555, "step": 1735, "valid_targets_mean": 1207.9, "valid_targets_min": 702 }, { "epoch": 4.275184275184275, "grad_norm": 0.8944643000153863, "learning_rate": 9.282498242881784e-06, "loss": 0.1339, "loss_nan_ranks": 0, "loss_rank_avg": 0.1361280232667923, "step": 1740, "valid_targets_mean": 1364.2, "valid_targets_min": 687 }, { "epoch": 4.287469287469287, "grad_norm": 0.8582961426311289, "learning_rate": 9.162043201688517e-06, "loss": 0.1326, "loss_nan_ranks": 0, "loss_rank_avg": 0.12418873608112335, "step": 1745, "valid_targets_mean": 1242.6, "valid_targets_min": 695 }, { "epoch": 4.2997542997543, "grad_norm": 0.8460722988037566, "learning_rate": 9.042142180192596e-06, "loss": 0.1384, "loss_nan_ranks": 0, "loss_rank_avg": 0.13723894953727722, "step": 1750, "valid_targets_mean": 1232.2, "valid_targets_min": 750 }, { "epoch": 4.312039312039312, "grad_norm": 0.8185360622222694, "learning_rate": 8.92280130754998e-06, "loss": 0.1319, "loss_nan_ranks": 0, "loss_rank_avg": 0.13691049814224243, "step": 1755, "valid_targets_mean": 1385.4, "valid_targets_min": 683 }, { "epoch": 4.324324324324325, "grad_norm": 0.8089554216095896, "learning_rate": 8.804026684282694e-06, "loss": 0.1347, "loss_nan_ranks": 0, "loss_rank_avg": 0.1337611973285675, "step": 1760, "valid_targets_mean": 1322.9, "valid_targets_min": 723 }, { "epoch": 4.336609336609337, "grad_norm": 2.6163400061885085, "learning_rate": 8.685824381966975e-06, "loss": 0.134, "loss_nan_ranks": 0, "loss_rank_avg": 0.14328783750534058, "step": 1765, "valid_targets_mean": 1257.4, "valid_targets_min": 661 }, { "epoch": 4.348894348894349, "grad_norm": 0.8727646019220853, "learning_rate": 8.568200442922865e-06, "loss": 0.1346, "loss_nan_ranks": 0, "loss_rank_avg": 0.13159531354904175, "step": 1770, "valid_targets_mean": 1367.1, "valid_targets_min": 624 }, { "epoch": 4.361179361179361, "grad_norm": 0.8996659837757238, "learning_rate": 8.451160879905398e-06, "loss": 0.1337, "loss_nan_ranks": 0, "loss_rank_avg": 0.1381177306175232, "step": 1775, "valid_targets_mean": 1295.1, "valid_targets_min": 699 }, { "epoch": 4.3734643734643734, "grad_norm": 0.8221175548847971, "learning_rate": 8.33471167579717e-06, "loss": 0.1368, "loss_nan_ranks": 0, "loss_rank_avg": 0.14100399613380432, "step": 1780, "valid_targets_mean": 1438.7, "valid_targets_min": 925 }, { "epoch": 4.385749385749386, "grad_norm": 0.9160963463737738, "learning_rate": 8.218858783302566e-06, "loss": 0.1372, "loss_nan_ranks": 0, "loss_rank_avg": 0.13693374395370483, "step": 1785, "valid_targets_mean": 1326.6, "valid_targets_min": 619 }, { "epoch": 4.398034398034398, "grad_norm": 0.8441030171356969, "learning_rate": 8.103608124643412e-06, "loss": 0.1298, "loss_nan_ranks": 0, "loss_rank_avg": 0.12508758902549744, "step": 1790, "valid_targets_mean": 1355.0, "valid_targets_min": 745 }, { "epoch": 4.41031941031941, "grad_norm": 0.8520294326439847, "learning_rate": 7.988965591256284e-06, "loss": 0.1393, "loss_nan_ranks": 0, "loss_rank_avg": 0.13980035483837128, "step": 1795, "valid_targets_mean": 1313.2, "valid_targets_min": 677 }, { "epoch": 4.422604422604422, "grad_norm": 0.8246418406935714, "learning_rate": 7.874937043491331e-06, "loss": 0.1364, "loss_nan_ranks": 0, "loss_rank_avg": 0.13462932407855988, "step": 1800, "valid_targets_mean": 1353.4, "valid_targets_min": 654 }, { "epoch": 4.434889434889435, "grad_norm": 0.8457223705197832, "learning_rate": 7.761528310312679e-06, "loss": 0.136, "loss_nan_ranks": 0, "loss_rank_avg": 0.13229894638061523, "step": 1805, "valid_targets_mean": 1344.6, "valid_targets_min": 658 }, { "epoch": 4.447174447174447, "grad_norm": 0.833607462401412, "learning_rate": 7.648745189000511e-06, "loss": 0.129, "loss_nan_ranks": 0, "loss_rank_avg": 0.12600962817668915, "step": 1810, "valid_targets_mean": 1313.6, "valid_targets_min": 607 }, { "epoch": 4.45945945945946, "grad_norm": 0.8510312578108182, "learning_rate": 7.536593444854663e-06, "loss": 0.1324, "loss_nan_ranks": 0, "loss_rank_avg": 0.13862791657447815, "step": 1815, "valid_targets_mean": 1411.5, "valid_targets_min": 679 }, { "epoch": 4.471744471744472, "grad_norm": 0.9145076730321617, "learning_rate": 7.4250788108999686e-06, "loss": 0.136, "loss_nan_ranks": 0, "loss_rank_avg": 0.13466450572013855, "step": 1820, "valid_targets_mean": 1134.2, "valid_targets_min": 640 }, { "epoch": 4.484029484029484, "grad_norm": 0.8548827601262811, "learning_rate": 7.314206987593162e-06, "loss": 0.1404, "loss_nan_ranks": 0, "loss_rank_avg": 0.13373500108718872, "step": 1825, "valid_targets_mean": 1223.4, "valid_targets_min": 776 }, { "epoch": 4.496314496314496, "grad_norm": 0.8938428905617729, "learning_rate": 7.203983642531462e-06, "loss": 0.1365, "loss_nan_ranks": 0, "loss_rank_avg": 0.13550472259521484, "step": 1830, "valid_targets_mean": 1318.0, "valid_targets_min": 619 }, { "epoch": 4.5085995085995085, "grad_norm": 0.8603104670065109, "learning_rate": 7.094414410162913e-06, "loss": 0.1388, "loss_nan_ranks": 0, "loss_rank_avg": 0.13778984546661377, "step": 1835, "valid_targets_mean": 1195.8, "valid_targets_min": 689 }, { "epoch": 4.520884520884521, "grad_norm": 0.7919032263244239, "learning_rate": 6.985504891498291e-06, "loss": 0.1329, "loss_nan_ranks": 0, "loss_rank_avg": 0.1213904470205307, "step": 1840, "valid_targets_mean": 1309.6, "valid_targets_min": 661 }, { "epoch": 4.533169533169533, "grad_norm": 0.8592680814494961, "learning_rate": 6.8772606538248285e-06, "loss": 0.1337, "loss_nan_ranks": 0, "loss_rank_avg": 0.13496822118759155, "step": 1845, "valid_targets_mean": 1331.1, "valid_targets_min": 680 }, { "epoch": 4.545454545454545, "grad_norm": 0.8466687407616076, "learning_rate": 6.769687230421638e-06, "loss": 0.1348, "loss_nan_ranks": 0, "loss_rank_avg": 0.13918258249759674, "step": 1850, "valid_targets_mean": 1431.8, "valid_targets_min": 637 }, { "epoch": 4.557739557739557, "grad_norm": 0.8216050231021137, "learning_rate": 6.662790120276803e-06, "loss": 0.138, "loss_nan_ranks": 0, "loss_rank_avg": 0.13874514400959015, "step": 1855, "valid_targets_mean": 1524.4, "valid_targets_min": 802 }, { "epoch": 4.57002457002457, "grad_norm": 0.9669401045446978, "learning_rate": 6.556574787806344e-06, "loss": 0.1337, "loss_nan_ranks": 0, "loss_rank_avg": 0.14022761583328247, "step": 1860, "valid_targets_mean": 1152.1, "valid_targets_min": 578 }, { "epoch": 4.582309582309582, "grad_norm": 0.9375234740980322, "learning_rate": 6.451046662574831e-06, "loss": 0.1377, "loss_nan_ranks": 0, "loss_rank_avg": 0.13871565461158752, "step": 1865, "valid_targets_mean": 1150.8, "valid_targets_min": 640 }, { "epoch": 4.594594594594595, "grad_norm": 0.836770791087053, "learning_rate": 6.346211139017877e-06, "loss": 0.1373, "loss_nan_ranks": 0, "loss_rank_avg": 0.13808581233024597, "step": 1870, "valid_targets_mean": 1486.2, "valid_targets_min": 684 }, { "epoch": 4.606879606879607, "grad_norm": 0.950259784141947, "learning_rate": 6.242073576166337e-06, "loss": 0.1334, "loss_nan_ranks": 0, "loss_rank_avg": 0.13348250091075897, "step": 1875, "valid_targets_mean": 1282.6, "valid_targets_min": 700 }, { "epoch": 4.61916461916462, "grad_norm": 0.9276466101362375, "learning_rate": 6.138639297372404e-06, "loss": 0.1287, "loss_nan_ranks": 0, "loss_rank_avg": 0.13664673268795013, "step": 1880, "valid_targets_mean": 1407.1, "valid_targets_min": 873 }, { "epoch": 4.631449631449631, "grad_norm": 0.9134688417135232, "learning_rate": 6.035913590037479e-06, "loss": 0.1329, "loss_nan_ranks": 0, "loss_rank_avg": 0.13874787092208862, "step": 1885, "valid_targets_mean": 1228.9, "valid_targets_min": 726 }, { "epoch": 4.643734643734644, "grad_norm": 0.9364687166887621, "learning_rate": 5.933901705341851e-06, "loss": 0.1351, "loss_nan_ranks": 0, "loss_rank_avg": 0.13511444628238678, "step": 1890, "valid_targets_mean": 1213.6, "valid_targets_min": 582 }, { "epoch": 4.656019656019656, "grad_norm": 0.8226051755557579, "learning_rate": 5.832608857976321e-06, "loss": 0.1323, "loss_nan_ranks": 0, "loss_rank_avg": 0.14297759532928467, "step": 1895, "valid_targets_mean": 1434.4, "valid_targets_min": 604 }, { "epoch": 4.6683046683046685, "grad_norm": 0.8928576176089511, "learning_rate": 5.732040225875584e-06, "loss": 0.138, "loss_nan_ranks": 0, "loss_rank_avg": 0.14191249012947083, "step": 1900, "valid_targets_mean": 1351.0, "valid_targets_min": 740 }, { "epoch": 4.680589680589681, "grad_norm": 0.8362193728819766, "learning_rate": 5.632200949953579e-06, "loss": 0.1324, "loss_nan_ranks": 0, "loss_rank_avg": 0.13142593204975128, "step": 1905, "valid_targets_mean": 1434.6, "valid_targets_min": 508 }, { "epoch": 4.6928746928746925, "grad_norm": 0.8558162872587993, "learning_rate": 5.533096133840677e-06, "loss": 0.1313, "loss_nan_ranks": 0, "loss_rank_avg": 0.1275939792394638, "step": 1910, "valid_targets_mean": 1292.1, "valid_targets_min": 624 }, { "epoch": 4.705159705159705, "grad_norm": 0.7983343068498832, "learning_rate": 5.434730843622778e-06, "loss": 0.1294, "loss_nan_ranks": 0, "loss_rank_avg": 0.1256292462348938, "step": 1915, "valid_targets_mean": 1475.9, "valid_targets_min": 941 }, { "epoch": 4.717444717444717, "grad_norm": 1.3118893758847399, "learning_rate": 5.337110107582377e-06, "loss": 0.1313, "loss_nan_ranks": 0, "loss_rank_avg": 0.13079731166362762, "step": 1920, "valid_targets_mean": 1585.5, "valid_targets_min": 933 }, { "epoch": 4.72972972972973, "grad_norm": 0.8080898258134603, "learning_rate": 5.2402389159414755e-06, "loss": 0.1343, "loss_nan_ranks": 0, "loss_rank_avg": 0.13039739429950714, "step": 1925, "valid_targets_mean": 1359.4, "valid_targets_min": 631 }, { "epoch": 4.742014742014742, "grad_norm": 1.0236112284503025, "learning_rate": 5.144122220606542e-06, "loss": 0.1325, "loss_nan_ranks": 0, "loss_rank_avg": 0.12727615237236023, "step": 1930, "valid_targets_mean": 1334.2, "valid_targets_min": 836 }, { "epoch": 4.754299754299755, "grad_norm": 0.8608999172912609, "learning_rate": 5.048764934915349e-06, "loss": 0.1333, "loss_nan_ranks": 0, "loss_rank_avg": 0.12973430752754211, "step": 1935, "valid_targets_mean": 1372.7, "valid_targets_min": 690 }, { "epoch": 4.766584766584766, "grad_norm": 0.8457132249837475, "learning_rate": 4.954171933385805e-06, "loss": 0.1317, "loss_nan_ranks": 0, "loss_rank_avg": 0.12466098368167877, "step": 1940, "valid_targets_mean": 1208.1, "valid_targets_min": 664 }, { "epoch": 4.778869778869779, "grad_norm": 0.8118421199942646, "learning_rate": 4.8603480514667836e-06, "loss": 0.1385, "loss_nan_ranks": 0, "loss_rank_avg": 0.1356389820575714, "step": 1945, "valid_targets_mean": 1539.8, "valid_targets_min": 1046 }, { "epoch": 4.791154791154791, "grad_norm": 0.9109442080712121, "learning_rate": 4.767298085290963e-06, "loss": 0.1407, "loss_nan_ranks": 0, "loss_rank_avg": 0.13706953823566437, "step": 1950, "valid_targets_mean": 1200.8, "valid_targets_min": 814 }, { "epoch": 4.803439803439804, "grad_norm": 0.7836538521572521, "learning_rate": 4.675026791429624e-06, "loss": 0.132, "loss_nan_ranks": 0, "loss_rank_avg": 0.13943910598754883, "step": 1955, "valid_targets_mean": 1431.2, "valid_targets_min": 618 }, { "epoch": 4.815724815724816, "grad_norm": 0.8338736840520049, "learning_rate": 4.583538886649525e-06, "loss": 0.1281, "loss_nan_ranks": 0, "loss_rank_avg": 0.11881544440984726, "step": 1960, "valid_targets_mean": 1207.3, "valid_targets_min": 769 }, { "epoch": 4.828009828009828, "grad_norm": 0.8631974963169733, "learning_rate": 4.492839047671764e-06, "loss": 0.1387, "loss_nan_ranks": 0, "loss_rank_avg": 0.13759952783584595, "step": 1965, "valid_targets_mean": 1240.6, "valid_targets_min": 716 }, { "epoch": 4.84029484029484, "grad_norm": 0.8445274635568915, "learning_rate": 4.4029319109327465e-06, "loss": 0.1348, "loss_nan_ranks": 0, "loss_rank_avg": 0.1327393352985382, "step": 1970, "valid_targets_mean": 1349.8, "valid_targets_min": 569 }, { "epoch": 4.8525798525798525, "grad_norm": 0.8379355840832601, "learning_rate": 4.313822072347136e-06, "loss": 0.1293, "loss_nan_ranks": 0, "loss_rank_avg": 0.13074269890785217, "step": 1975, "valid_targets_mean": 1367.1, "valid_targets_min": 802 }, { "epoch": 4.864864864864865, "grad_norm": 0.9109766321758263, "learning_rate": 4.22551408707296e-06, "loss": 0.127, "loss_nan_ranks": 0, "loss_rank_avg": 0.12870089709758759, "step": 1980, "valid_targets_mean": 1204.9, "valid_targets_min": 608 }, { "epoch": 4.877149877149877, "grad_norm": 0.826848683962763, "learning_rate": 4.138012469278714e-06, "loss": 0.1302, "loss_nan_ranks": 0, "loss_rank_avg": 0.12679742276668549, "step": 1985, "valid_targets_mean": 1459.3, "valid_targets_min": 754 }, { "epoch": 4.88943488943489, "grad_norm": 1.0072797324517473, "learning_rate": 4.051321691912649e-06, "loss": 0.1333, "loss_nan_ranks": 0, "loss_rank_avg": 0.1277749389410019, "step": 1990, "valid_targets_mean": 1285.3, "valid_targets_min": 837 }, { "epoch": 4.901719901719901, "grad_norm": 0.8582064043845651, "learning_rate": 3.9654461864740935e-06, "loss": 0.1311, "loss_nan_ranks": 0, "loss_rank_avg": 0.12809713184833527, "step": 1995, "valid_targets_mean": 1380.1, "valid_targets_min": 808 }, { "epoch": 4.914004914004914, "grad_norm": 0.8154469160438075, "learning_rate": 3.880390342786915e-06, "loss": 0.1301, "loss_nan_ranks": 0, "loss_rank_avg": 0.13291479647159576, "step": 2000, "valid_targets_mean": 1429.1, "valid_targets_min": 683 }, { "epoch": 4.926289926289926, "grad_norm": 0.8649560778284509, "learning_rate": 3.7961585087751516e-06, "loss": 0.1318, "loss_nan_ranks": 0, "loss_rank_avg": 0.12271484732627869, "step": 2005, "valid_targets_mean": 1190.2, "valid_targets_min": 655 }, { "epoch": 4.938574938574939, "grad_norm": 0.8829456549947029, "learning_rate": 3.71275499024071e-06, "loss": 0.1323, "loss_nan_ranks": 0, "loss_rank_avg": 0.12908603250980377, "step": 2010, "valid_targets_mean": 1133.1, "valid_targets_min": 700 }, { "epoch": 4.950859950859951, "grad_norm": 0.8485559058439717, "learning_rate": 3.6301840506433083e-06, "loss": 0.1292, "loss_nan_ranks": 0, "loss_rank_avg": 0.12414588779211044, "step": 2015, "valid_targets_mean": 1268.0, "valid_targets_min": 672 }, { "epoch": 4.963144963144963, "grad_norm": 0.9312954958571251, "learning_rate": 3.5484499108824853e-06, "loss": 0.1349, "loss_nan_ranks": 0, "loss_rank_avg": 0.13587301969528198, "step": 2020, "valid_targets_mean": 1167.4, "valid_targets_min": 685 }, { "epoch": 4.975429975429975, "grad_norm": 0.8826698941979991, "learning_rate": 3.4675567490818727e-06, "loss": 0.1396, "loss_nan_ranks": 0, "loss_rank_avg": 0.13824445009231567, "step": 2025, "valid_targets_mean": 1295.9, "valid_targets_min": 600 }, { "epoch": 4.987714987714988, "grad_norm": 0.842942106749072, "learning_rate": 3.3875087003756036e-06, "loss": 0.133, "loss_nan_ranks": 0, "loss_rank_avg": 0.1280279904603958, "step": 2030, "valid_targets_mean": 1316.2, "valid_targets_min": 714 }, { "epoch": 5.0, "grad_norm": 1.560417863126269, "learning_rate": 3.30830985669691e-06, "loss": 0.1304, "loss_nan_ranks": 0, "loss_rank_avg": 0.12781304121017456, "step": 2035, "valid_targets_mean": 1413.3, "valid_targets_min": 603 }, { "epoch": 5.012285012285012, "grad_norm": 0.7931305692109261, "learning_rate": 3.22996426656899e-06, "loss": 0.1234, "loss_nan_ranks": 0, "loss_rank_avg": 0.11616753041744232, "step": 2040, "valid_targets_mean": 1353.3, "valid_targets_min": 818 }, { "epoch": 5.024570024570025, "grad_norm": 1.0099583617389296, "learning_rate": 3.1524759348980096e-06, "loss": 0.1222, "loss_nan_ranks": 0, "loss_rank_avg": 0.12158218771219254, "step": 2045, "valid_targets_mean": 1211.6, "valid_targets_min": 668 }, { "epoch": 5.036855036855036, "grad_norm": 0.8500902588731212, "learning_rate": 3.0758488227684212e-06, "loss": 0.1223, "loss_nan_ranks": 0, "loss_rank_avg": 0.12790502607822418, "step": 2050, "valid_targets_mean": 1328.2, "valid_targets_min": 546 }, { "epoch": 5.049140049140049, "grad_norm": 0.928449305764292, "learning_rate": 3.0000868472404423e-06, "loss": 0.1266, "loss_nan_ranks": 0, "loss_rank_avg": 0.12052057683467865, "step": 2055, "valid_targets_mean": 1168.4, "valid_targets_min": 666 }, { "epoch": 5.061425061425061, "grad_norm": 0.88759587464759, "learning_rate": 2.9251938811498436e-06, "loss": 0.1226, "loss_nan_ranks": 0, "loss_rank_avg": 0.11871778219938278, "step": 2060, "valid_targets_mean": 1182.1, "valid_targets_min": 571 }, { "epoch": 5.073710073710074, "grad_norm": 0.8236200534470612, "learning_rate": 2.8511737529099704e-06, "loss": 0.1218, "loss_nan_ranks": 0, "loss_rank_avg": 0.11925345659255981, "step": 2065, "valid_targets_mean": 1338.1, "valid_targets_min": 741 }, { "epoch": 5.085995085995086, "grad_norm": 0.9644824242433888, "learning_rate": 2.7780302463160235e-06, "loss": 0.1281, "loss_nan_ranks": 0, "loss_rank_avg": 0.13304205238819122, "step": 2070, "valid_targets_mean": 1192.4, "valid_targets_min": 754 }, { "epoch": 5.098280098280099, "grad_norm": 0.8128725043575888, "learning_rate": 2.705767100351673e-06, "loss": 0.1279, "loss_nan_ranks": 0, "loss_rank_avg": 0.11420956254005432, "step": 2075, "valid_targets_mean": 1345.1, "valid_targets_min": 564 }, { "epoch": 5.11056511056511, "grad_norm": 0.9134050119992433, "learning_rate": 2.634388008997899e-06, "loss": 0.1274, "loss_nan_ranks": 0, "loss_rank_avg": 0.12336855381727219, "step": 2080, "valid_targets_mean": 1291.5, "valid_targets_min": 802 }, { "epoch": 5.122850122850123, "grad_norm": 0.8346188690601416, "learning_rate": 2.5638966210441597e-06, "loss": 0.1185, "loss_nan_ranks": 0, "loss_rank_avg": 0.11434096097946167, "step": 2085, "valid_targets_mean": 1294.2, "valid_targets_min": 680 }, { "epoch": 5.135135135135135, "grad_norm": 0.8971067231810141, "learning_rate": 2.4942965399018926e-06, "loss": 0.1244, "loss_nan_ranks": 0, "loss_rank_avg": 0.12700599431991577, "step": 2090, "valid_targets_mean": 1269.8, "valid_targets_min": 477 }, { "epoch": 5.1474201474201475, "grad_norm": 0.8396329615955295, "learning_rate": 2.425591323420289e-06, "loss": 0.1234, "loss_nan_ranks": 0, "loss_rank_avg": 0.12476441264152527, "step": 2095, "valid_targets_mean": 1452.9, "valid_targets_min": 725 }, { "epoch": 5.15970515970516, "grad_norm": 0.9668038964121843, "learning_rate": 2.357784483704444e-06, "loss": 0.1218, "loss_nan_ranks": 0, "loss_rank_avg": 0.12222275882959366, "step": 2100, "valid_targets_mean": 1202.8, "valid_targets_min": 618 }, { "epoch": 5.171990171990172, "grad_norm": 0.8530860924157005, "learning_rate": 2.2908794869358044e-06, "loss": 0.1216, "loss_nan_ranks": 0, "loss_rank_avg": 0.11945752799510956, "step": 2105, "valid_targets_mean": 1264.2, "valid_targets_min": 669 }, { "epoch": 5.184275184275184, "grad_norm": 0.8756759676757957, "learning_rate": 2.2248797531949952e-06, "loss": 0.1234, "loss_nan_ranks": 0, "loss_rank_avg": 0.12002584338188171, "step": 2110, "valid_targets_mean": 1310.5, "valid_targets_min": 600 }, { "epoch": 5.196560196560196, "grad_norm": 0.8828888300496266, "learning_rate": 2.1597886562869917e-06, "loss": 0.1242, "loss_nan_ranks": 0, "loss_rank_avg": 0.13054341077804565, "step": 2115, "valid_targets_mean": 1316.6, "valid_targets_min": 609 }, { "epoch": 5.208845208845209, "grad_norm": 0.8394875589131532, "learning_rate": 2.095609523568638e-06, "loss": 0.1186, "loss_nan_ranks": 0, "loss_rank_avg": 0.1151779368519783, "step": 2120, "valid_targets_mean": 1418.0, "valid_targets_min": 800 }, { "epoch": 5.221130221130221, "grad_norm": 0.8903601792892257, "learning_rate": 2.0323456357785855e-06, "loss": 0.1241, "loss_nan_ranks": 0, "loss_rank_avg": 0.1157693862915039, "step": 2125, "valid_targets_mean": 1233.6, "valid_targets_min": 736 }, { "epoch": 5.233415233415234, "grad_norm": 0.8083599530344519, "learning_rate": 1.970000226869553e-06, "loss": 0.1208, "loss_nan_ranks": 0, "loss_rank_avg": 0.11680185794830322, "step": 2130, "valid_targets_mean": 1303.5, "valid_targets_min": 698 }, { "epoch": 5.245700245700245, "grad_norm": 0.887959202055401, "learning_rate": 1.90857648384305e-06, "loss": 0.1198, "loss_nan_ranks": 0, "loss_rank_avg": 0.11497928202152252, "step": 2135, "valid_targets_mean": 1166.8, "valid_targets_min": 849 }, { "epoch": 5.257985257985258, "grad_norm": 0.8924545156140798, "learning_rate": 1.848077546586431e-06, "loss": 0.1194, "loss_nan_ranks": 0, "loss_rank_avg": 0.1226976066827774, "step": 2140, "valid_targets_mean": 1300.3, "valid_targets_min": 585 }, { "epoch": 5.27027027027027, "grad_norm": 0.887818676899762, "learning_rate": 1.7885065077123976e-06, "loss": 0.1246, "loss_nan_ranks": 0, "loss_rank_avg": 0.11834403872489929, "step": 2145, "valid_targets_mean": 1208.6, "valid_targets_min": 700 }, { "epoch": 5.282555282555283, "grad_norm": 0.8761041176465963, "learning_rate": 1.7298664124009245e-06, "loss": 0.1257, "loss_nan_ranks": 0, "loss_rank_avg": 0.12676090002059937, "step": 2150, "valid_targets_mean": 1258.4, "valid_targets_min": 774 }, { "epoch": 5.294840294840295, "grad_norm": 0.9376811939994709, "learning_rate": 1.672160258243567e-06, "loss": 0.1188, "loss_nan_ranks": 0, "loss_rank_avg": 0.11709423363208771, "step": 2155, "valid_targets_mean": 1109.1, "valid_targets_min": 577 }, { "epoch": 5.3071253071253075, "grad_norm": 0.9358803313254709, "learning_rate": 1.615390995090258e-06, "loss": 0.1197, "loss_nan_ranks": 0, "loss_rank_avg": 0.1253659427165985, "step": 2160, "valid_targets_mean": 1176.1, "valid_targets_min": 716 }, { "epoch": 5.319410319410319, "grad_norm": 0.9101281679651285, "learning_rate": 1.559561524898492e-06, "loss": 0.1225, "loss_nan_ranks": 0, "loss_rank_avg": 0.12124703824520111, "step": 2165, "valid_targets_mean": 1176.6, "valid_targets_min": 697 }, { "epoch": 5.3316953316953315, "grad_norm": 0.9020013037671613, "learning_rate": 1.5046747015849893e-06, "loss": 0.1181, "loss_nan_ranks": 0, "loss_rank_avg": 0.12182813882827759, "step": 2170, "valid_targets_mean": 1287.6, "valid_targets_min": 943 }, { "epoch": 5.343980343980344, "grad_norm": 0.8665564025261498, "learning_rate": 1.4507333308798255e-06, "loss": 0.1251, "loss_nan_ranks": 0, "loss_rank_avg": 0.11275840550661087, "step": 2175, "valid_targets_mean": 1213.7, "valid_targets_min": 680 }, { "epoch": 5.356265356265356, "grad_norm": 0.7948586556833295, "learning_rate": 1.3977401701829752e-06, "loss": 0.1148, "loss_nan_ranks": 0, "loss_rank_avg": 0.11124136298894882, "step": 2180, "valid_targets_mean": 1512.9, "valid_targets_min": 673 }, { "epoch": 5.368550368550369, "grad_norm": 0.8479955134968785, "learning_rate": 1.345697928423384e-06, "loss": 0.1222, "loss_nan_ranks": 0, "loss_rank_avg": 0.11807121336460114, "step": 2185, "valid_targets_mean": 1355.6, "valid_targets_min": 841 }, { "epoch": 5.38083538083538, "grad_norm": 0.913604687178217, "learning_rate": 1.2946092659204767e-06, "loss": 0.1226, "loss_nan_ranks": 0, "loss_rank_avg": 0.11881860345602036, "step": 2190, "valid_targets_mean": 1087.2, "valid_targets_min": 626 }, { "epoch": 5.393120393120393, "grad_norm": 0.9143190692852058, "learning_rate": 1.244476794248175e-06, "loss": 0.1225, "loss_nan_ranks": 0, "loss_rank_avg": 0.12613148987293243, "step": 2195, "valid_targets_mean": 1249.9, "valid_targets_min": 690 }, { "epoch": 5.405405405405405, "grad_norm": 0.9554493796932091, "learning_rate": 1.1953030761014017e-06, "loss": 0.12, "loss_nan_ranks": 0, "loss_rank_avg": 0.1262340396642685, "step": 2200, "valid_targets_mean": 1098.0, "valid_targets_min": 587 }, { "epoch": 5.417690417690418, "grad_norm": 0.8665422814682507, "learning_rate": 1.147090625165055e-06, "loss": 0.1206, "loss_nan_ranks": 0, "loss_rank_avg": 0.11446169763803482, "step": 2205, "valid_targets_mean": 1287.6, "valid_targets_min": 650 }, { "epoch": 5.42997542997543, "grad_norm": 0.8488658718651519, "learning_rate": 1.0998419059855503e-06, "loss": 0.1246, "loss_nan_ranks": 0, "loss_rank_avg": 0.11071212589740753, "step": 2210, "valid_targets_mean": 1244.9, "valid_targets_min": 693 }, { "epoch": 5.442260442260443, "grad_norm": 0.9677990651511228, "learning_rate": 1.053559333844798e-06, "loss": 0.1209, "loss_nan_ranks": 0, "loss_rank_avg": 0.12183261662721634, "step": 2215, "valid_targets_mean": 1159.0, "valid_targets_min": 619 }, { "epoch": 5.454545454545454, "grad_norm": 1.0224261972543698, "learning_rate": 1.0082452746367721e-06, "loss": 0.1186, "loss_nan_ranks": 0, "loss_rank_avg": 0.1227094978094101, "step": 2220, "valid_targets_mean": 1384.5, "valid_targets_min": 695 }, { "epoch": 5.466830466830467, "grad_norm": 0.9075016092237645, "learning_rate": 9.639020447465475e-07, "loss": 0.1229, "loss_nan_ranks": 0, "loss_rank_avg": 0.12406787276268005, "step": 2225, "valid_targets_mean": 1512.6, "valid_targets_min": 864 }, { "epoch": 5.479115479115479, "grad_norm": 0.854872277774117, "learning_rate": 9.205319109318922e-07, "loss": 0.118, "loss_nan_ranks": 0, "loss_rank_avg": 0.11251235008239746, "step": 2230, "valid_targets_mean": 1181.9, "valid_targets_min": 692 }, { "epoch": 5.4914004914004915, "grad_norm": 0.8276956734834705, "learning_rate": 8.781370902074049e-07, "loss": 0.1213, "loss_nan_ranks": 0, "loss_rank_avg": 0.11970800161361694, "step": 2235, "valid_targets_mean": 1536.9, "valid_targets_min": 789 }, { "epoch": 5.503685503685504, "grad_norm": 0.8219281728800845, "learning_rate": 8.367197497311719e-07, "loss": 0.1234, "loss_nan_ranks": 0, "loss_rank_avg": 0.11715812981128693, "step": 2240, "valid_targets_mean": 1464.1, "valid_targets_min": 881 }, { "epoch": 5.515970515970516, "grad_norm": 0.9336086469402823, "learning_rate": 7.962820066939958e-07, "loss": 0.1255, "loss_nan_ranks": 0, "loss_rank_avg": 0.1221204623579979, "step": 2245, "valid_targets_mean": 1244.7, "valid_targets_min": 651 }, { "epoch": 5.528255528255528, "grad_norm": 0.8878725870062483, "learning_rate": 7.568259282111645e-07, "loss": 0.1224, "loss_nan_ranks": 0, "loss_rank_avg": 0.1210436075925827, "step": 2250, "valid_targets_mean": 1273.2, "valid_targets_min": 525 }, { "epoch": 5.54054054054054, "grad_norm": 0.9305753159284449, "learning_rate": 7.183535312167755e-07, "loss": 0.1259, "loss_nan_ranks": 0, "loss_rank_avg": 0.12594647705554962, "step": 2255, "valid_targets_mean": 1302.9, "valid_targets_min": 711 }, { "epoch": 5.552825552825553, "grad_norm": 0.8196405271986151, "learning_rate": 6.808667823606474e-07, "loss": 0.1191, "loss_nan_ranks": 0, "loss_rank_avg": 0.11032858490943909, "step": 2260, "valid_targets_mean": 1217.7, "valid_targets_min": 633 }, { "epoch": 5.565110565110565, "grad_norm": 0.9629415002181919, "learning_rate": 6.443675979077779e-07, "loss": 0.1194, "loss_nan_ranks": 0, "loss_rank_avg": 0.11967958509922028, "step": 2265, "valid_targets_mean": 1159.2, "valid_targets_min": 648 }, { "epoch": 5.577395577395578, "grad_norm": 0.8877542687460271, "learning_rate": 6.088578436403847e-07, "loss": 0.1197, "loss_nan_ranks": 0, "loss_rank_avg": 0.12060631066560745, "step": 2270, "valid_targets_mean": 1184.3, "valid_targets_min": 773 }, { "epoch": 5.58968058968059, "grad_norm": 0.9064893261528624, "learning_rate": 5.743393347625436e-07, "loss": 0.1203, "loss_nan_ranks": 0, "loss_rank_avg": 0.12473144382238388, "step": 2275, "valid_targets_mean": 1232.4, "valid_targets_min": 682 }, { "epoch": 5.601965601965602, "grad_norm": 0.891255492857794, "learning_rate": 5.408138358073833e-07, "loss": 0.1215, "loss_nan_ranks": 0, "loss_rank_avg": 0.11696426570415497, "step": 2280, "valid_targets_mean": 1211.9, "valid_targets_min": 817 }, { "epoch": 5.614250614250614, "grad_norm": 0.8804064021646336, "learning_rate": 5.082830605468969e-07, "loss": 0.1249, "loss_nan_ranks": 0, "loss_rank_avg": 0.12240994721651077, "step": 2285, "valid_targets_mean": 1465.6, "valid_targets_min": 693 }, { "epoch": 5.6265356265356266, "grad_norm": 0.884415144400364, "learning_rate": 4.767486719043235e-07, "loss": 0.1265, "loss_nan_ranks": 0, "loss_rank_avg": 0.12938669323921204, "step": 2290, "valid_targets_mean": 1303.9, "valid_targets_min": 618 }, { "epoch": 5.638820638820639, "grad_norm": 0.853083172107117, "learning_rate": 4.4621228186915833e-07, "loss": 0.1199, "loss_nan_ranks": 0, "loss_rank_avg": 0.11663314700126648, "step": 2295, "valid_targets_mean": 1375.6, "valid_targets_min": 663 }, { "epoch": 5.651105651105651, "grad_norm": 0.8179139441747029, "learning_rate": 4.166754514147275e-07, "loss": 0.1205, "loss_nan_ranks": 0, "loss_rank_avg": 0.11481982469558716, "step": 2300, "valid_targets_mean": 1349.8, "valid_targets_min": 694 }, { "epoch": 5.663390663390663, "grad_norm": 0.8642291461817891, "learning_rate": 3.881396904184231e-07, "loss": 0.1249, "loss_nan_ranks": 0, "loss_rank_avg": 0.13626883924007416, "step": 2305, "valid_targets_mean": 1455.5, "valid_targets_min": 857 }, { "epoch": 5.675675675675675, "grad_norm": 0.855015151000873, "learning_rate": 3.6060645758449584e-07, "loss": 0.118, "loss_nan_ranks": 0, "loss_rank_avg": 0.10772322118282318, "step": 2310, "valid_targets_mean": 1306.6, "valid_targets_min": 748 }, { "epoch": 5.687960687960688, "grad_norm": 0.939814937762572, "learning_rate": 3.34077160369497e-07, "loss": 0.123, "loss_nan_ranks": 0, "loss_rank_avg": 0.12797702848911285, "step": 2315, "valid_targets_mean": 1318.8, "valid_targets_min": 648 }, { "epoch": 5.7002457002457, "grad_norm": 0.8694466054617536, "learning_rate": 3.08553154910336e-07, "loss": 0.1236, "loss_nan_ranks": 0, "loss_rank_avg": 0.11525967717170715, "step": 2320, "valid_targets_mean": 1257.1, "valid_targets_min": 745 }, { "epoch": 5.712530712530713, "grad_norm": 0.9213076662745712, "learning_rate": 2.840357459549492e-07, "loss": 0.1209, "loss_nan_ranks": 0, "loss_rank_avg": 0.12923762202262878, "step": 2325, "valid_targets_mean": 1301.2, "valid_targets_min": 673 }, { "epoch": 5.724815724815725, "grad_norm": 0.8510568275894363, "learning_rate": 2.6052618679560884e-07, "loss": 0.124, "loss_nan_ranks": 0, "loss_rank_avg": 0.12109372019767761, "step": 2330, "valid_targets_mean": 1400.9, "valid_targets_min": 586 }, { "epoch": 5.737100737100737, "grad_norm": 0.9026468731374053, "learning_rate": 2.380256792048541e-07, "loss": 0.117, "loss_nan_ranks": 0, "loss_rank_avg": 0.11053834855556488, "step": 2335, "valid_targets_mean": 1192.9, "valid_targets_min": 690 }, { "epoch": 5.749385749385749, "grad_norm": 0.8932267089116724, "learning_rate": 2.1653537337405383e-07, "loss": 0.1199, "loss_nan_ranks": 0, "loss_rank_avg": 0.12436593323945999, "step": 2340, "valid_targets_mean": 1306.6, "valid_targets_min": 683 }, { "epoch": 5.761670761670762, "grad_norm": 0.8144937839460926, "learning_rate": 1.9605636785462234e-07, "loss": 0.1179, "loss_nan_ranks": 0, "loss_rank_avg": 0.11719940602779388, "step": 2345, "valid_targets_mean": 1488.6, "valid_targets_min": 647 }, { "epoch": 5.773955773955774, "grad_norm": 0.8509307547721932, "learning_rate": 1.7658970950185095e-07, "loss": 0.1189, "loss_nan_ranks": 0, "loss_rank_avg": 0.1175820529460907, "step": 2350, "valid_targets_mean": 1377.8, "valid_targets_min": 627 }, { "epoch": 5.7862407862407865, "grad_norm": 0.8755385347465204, "learning_rate": 1.5813639342140197e-07, "loss": 0.1207, "loss_nan_ranks": 0, "loss_rank_avg": 0.122462198138237, "step": 2355, "valid_targets_mean": 1414.1, "valid_targets_min": 559 }, { "epoch": 5.798525798525798, "grad_norm": 0.8354198048278062, "learning_rate": 1.4069736291843605e-07, "loss": 0.1216, "loss_nan_ranks": 0, "loss_rank_avg": 0.12454007565975189, "step": 2360, "valid_targets_mean": 1355.3, "valid_targets_min": 769 }, { "epoch": 5.8108108108108105, "grad_norm": 0.9279865336013997, "learning_rate": 1.242735094493952e-07, "loss": 0.1267, "loss_nan_ranks": 0, "loss_rank_avg": 0.1297607570886612, "step": 2365, "valid_targets_mean": 1301.8, "valid_targets_min": 709 }, { "epoch": 5.823095823095823, "grad_norm": 0.8356599989544432, "learning_rate": 1.0886567257643033e-07, "loss": 0.1246, "loss_nan_ranks": 0, "loss_rank_avg": 0.13703671097755432, "step": 2370, "valid_targets_mean": 1503.9, "valid_targets_min": 754 }, { "epoch": 5.835380835380835, "grad_norm": 0.8976360214913625, "learning_rate": 9.447463992448891e-08, "loss": 0.1223, "loss_nan_ranks": 0, "loss_rank_avg": 0.1237124353647232, "step": 2375, "valid_targets_mean": 1253.7, "valid_targets_min": 728 }, { "epoch": 5.847665847665848, "grad_norm": 0.8469797015789384, "learning_rate": 8.110114714104277e-08, "loss": 0.1194, "loss_nan_ranks": 0, "loss_rank_avg": 0.12047050893306732, "step": 2380, "valid_targets_mean": 1357.1, "valid_targets_min": 798 }, { "epoch": 5.85995085995086, "grad_norm": 0.8474816619307004, "learning_rate": 6.874587785849152e-08, "loss": 0.1159, "loss_nan_ranks": 0, "loss_rank_avg": 0.11204469203948975, "step": 2385, "valid_targets_mean": 1219.0, "valid_targets_min": 563 }, { "epoch": 5.872235872235873, "grad_norm": 0.7930581021314657, "learning_rate": 5.7409463659219286e-08, "loss": 0.1248, "loss_nan_ranks": 0, "loss_rank_avg": 0.12501925230026245, "step": 2390, "valid_targets_mean": 1584.6, "valid_targets_min": 639 }, { "epoch": 5.884520884520884, "grad_norm": 0.8356276415586511, "learning_rate": 4.709248404329625e-08, "loss": 0.1185, "loss_nan_ranks": 0, "loss_rank_avg": 0.11507894843816757, "step": 2395, "valid_targets_mean": 1260.2, "valid_targets_min": 783 }, { "epoch": 5.896805896805897, "grad_norm": 0.902619975905144, "learning_rate": 3.7795466398868885e-08, "loss": 0.1201, "loss_nan_ranks": 0, "loss_rank_avg": 0.11913271248340607, "step": 2400, "valid_targets_mean": 1240.2, "valid_targets_min": 633 }, { "epoch": 5.909090909090909, "grad_norm": 0.8934023630859167, "learning_rate": 2.9518885975192702e-08, "loss": 0.1223, "loss_nan_ranks": 0, "loss_rank_avg": 0.12368829548358917, "step": 2405, "valid_targets_mean": 1286.1, "valid_targets_min": 679 }, { "epoch": 5.921375921375922, "grad_norm": 0.9345968672915899, "learning_rate": 2.226316585833832e-08, "loss": 0.1244, "loss_nan_ranks": 0, "loss_rank_avg": 0.13608361780643463, "step": 2410, "valid_targets_mean": 1285.1, "valid_targets_min": 538 }, { "epoch": 5.933660933660933, "grad_norm": 0.9107913050491543, "learning_rate": 1.6028676949570997e-08, "loss": 0.1272, "loss_nan_ranks": 0, "loss_rank_avg": 0.1253403127193451, "step": 2415, "valid_targets_mean": 1235.6, "valid_targets_min": 770 }, { "epoch": 5.945945945945946, "grad_norm": 0.84381108981527, "learning_rate": 1.0815737946383575e-08, "loss": 0.1185, "loss_nan_ranks": 0, "loss_rank_avg": 0.11838357150554657, "step": 2420, "valid_targets_mean": 1372.6, "valid_targets_min": 965 }, { "epoch": 5.958230958230958, "grad_norm": 0.8810683681797915, "learning_rate": 6.624615326207284e-09, "loss": 0.1186, "loss_nan_ranks": 0, "loss_rank_avg": 0.1146058738231659, "step": 2425, "valid_targets_mean": 1213.4, "valid_targets_min": 575 }, { "epoch": 5.9705159705159705, "grad_norm": 0.7852839819224546, "learning_rate": 3.4555233327893124e-09, "loss": 0.1289, "loss_nan_ranks": 0, "loss_rank_avg": 0.11223162710666656, "step": 2430, "valid_targets_mean": 1430.3, "valid_targets_min": 724 }, { "epoch": 5.982800982800983, "grad_norm": 0.8528999444132811, "learning_rate": 1.3086239652415621e-09, "loss": 0.1189, "loss_nan_ranks": 0, "loss_rank_avg": 0.11062179505825043, "step": 2435, "valid_targets_mean": 1334.7, "valid_targets_min": 751 }, { "epoch": 5.995085995085995, "grad_norm": 0.8859257547201402, "learning_rate": 1.840269697628294e-10, "loss": 0.1209, "loss_nan_ranks": 0, "loss_rank_avg": 0.12078575789928436, "step": 2440, "valid_targets_mean": 1338.7, "valid_targets_min": 687 }, { "epoch": 6.0, "loss_nan_ranks": 0, "loss_rank_avg": 0.12043584883213043, "step": 2442, "total_flos": 254043172110336.0, "train_loss": 0.18853386797248878, "train_runtime": 8750.677, "train_samples_per_second": 4.459, "train_steps_per_second": 0.279, "valid_targets_mean": 1261.1, "valid_targets_min": 857 } ], "logging_steps": 5, "max_steps": 2442, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 254043172110336.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }