diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,9496 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 4298, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.008143322475570033, + "grad_norm": 8.311565913810094, + "learning_rate": 3.7209302325581396e-07, + "loss": 0.8883, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.9355264902114868, + "step": 5, + "valid_targets_mean": 4925.9, + "valid_targets_min": 914 + }, + { + "epoch": 0.016286644951140065, + "grad_norm": 8.753673353957197, + "learning_rate": 8.372093023255814e-07, + "loss": 0.9065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.9554992914199829, + "step": 10, + "valid_targets_mean": 3590.0, + "valid_targets_min": 869 + }, + { + "epoch": 0.024429967426710098, + "grad_norm": 9.806302899207305, + "learning_rate": 1.302325581395349e-06, + "loss": 0.9135, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.9325845241546631, + "step": 15, + "valid_targets_mean": 3602.4, + "valid_targets_min": 702 + }, + { + "epoch": 0.03257328990228013, + "grad_norm": 7.541374737682111, + "learning_rate": 1.7674418604651164e-06, + "loss": 0.8664, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.8187031745910645, + "step": 20, + "valid_targets_mean": 2945.5, + "valid_targets_min": 788 + }, + { + "epoch": 0.04071661237785016, + "grad_norm": 5.1866113296187955, + "learning_rate": 2.232558139534884e-06, + "loss": 0.8427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.7606691122055054, + "step": 25, + "valid_targets_mean": 5313.6, + "valid_targets_min": 790 + }, + { + "epoch": 0.048859934853420196, + "grad_norm": 4.368155175315456, + "learning_rate": 2.6976744186046517e-06, + "loss": 0.8224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.8725709915161133, + "step": 30, + "valid_targets_mean": 3389.4, + "valid_targets_min": 837 + }, + { + "epoch": 0.057003257328990226, + "grad_norm": 2.8568585860357194, + "learning_rate": 3.1627906976744187e-06, + "loss": 0.7759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6626513004302979, + "step": 35, + "valid_targets_mean": 3402.8, + "valid_targets_min": 769 + }, + { + "epoch": 0.06514657980456026, + "grad_norm": 1.7015593860664007, + "learning_rate": 3.6279069767441866e-06, + "loss": 0.7459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.7180029153823853, + "step": 40, + "valid_targets_mean": 5698.9, + "valid_targets_min": 678 + }, + { + "epoch": 0.0732899022801303, + "grad_norm": 1.717961222716014, + "learning_rate": 4.0930232558139536e-06, + "loss": 0.7252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6760666966438293, + "step": 45, + "valid_targets_mean": 2480.9, + "valid_targets_min": 857 + }, + { + "epoch": 0.08143322475570032, + "grad_norm": 1.5570855826171561, + "learning_rate": 4.558139534883721e-06, + "loss": 0.6641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6781320571899414, + "step": 50, + "valid_targets_mean": 2977.6, + "valid_targets_min": 675 + }, + { + "epoch": 0.08957654723127036, + "grad_norm": 1.2951768898678446, + "learning_rate": 5.023255813953489e-06, + "loss": 0.6741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.7115561962127686, + "step": 55, + "valid_targets_mean": 2806.4, + "valid_targets_min": 783 + }, + { + "epoch": 0.09771986970684039, + "grad_norm": 1.1597039424791364, + "learning_rate": 5.488372093023256e-06, + "loss": 0.7487, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.7965254783630371, + "step": 60, + "valid_targets_mean": 4181.8, + "valid_targets_min": 775 + }, + { + "epoch": 0.10586319218241043, + "grad_norm": 0.923688536220421, + "learning_rate": 5.953488372093023e-06, + "loss": 0.6809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.7125787734985352, + "step": 65, + "valid_targets_mean": 4796.8, + "valid_targets_min": 733 + }, + { + "epoch": 0.11400651465798045, + "grad_norm": 0.7304099607301832, + "learning_rate": 6.418604651162791e-06, + "loss": 0.6509, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6906214356422424, + "step": 70, + "valid_targets_mean": 7410.8, + "valid_targets_min": 718 + }, + { + "epoch": 0.12214983713355049, + "grad_norm": 0.9698657385948507, + "learning_rate": 6.883720930232559e-06, + "loss": 0.6553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5701311230659485, + "step": 75, + "valid_targets_mean": 2271.5, + "valid_targets_min": 707 + }, + { + "epoch": 0.13029315960912052, + "grad_norm": 0.9093836693724042, + "learning_rate": 7.348837209302326e-06, + "loss": 0.634, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.7477360963821411, + "step": 80, + "valid_targets_mean": 3256.5, + "valid_targets_min": 714 + }, + { + "epoch": 0.13843648208469056, + "grad_norm": 0.9167741781230302, + "learning_rate": 7.813953488372094e-06, + "loss": 0.5491, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5618689060211182, + "step": 85, + "valid_targets_mean": 3836.1, + "valid_targets_min": 834 + }, + { + "epoch": 0.1465798045602606, + "grad_norm": 0.648418054751768, + "learning_rate": 8.279069767441861e-06, + "loss": 0.6285, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6093778014183044, + "step": 90, + "valid_targets_mean": 5542.2, + "valid_targets_min": 930 + }, + { + "epoch": 0.15472312703583063, + "grad_norm": 0.8102487667597547, + "learning_rate": 8.74418604651163e-06, + "loss": 0.5763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4609588086605072, + "step": 95, + "valid_targets_mean": 1985.1, + "valid_targets_min": 756 + }, + { + "epoch": 0.16286644951140064, + "grad_norm": 0.6129432904271541, + "learning_rate": 9.209302325581397e-06, + "loss": 0.6286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6550267934799194, + "step": 100, + "valid_targets_mean": 6673.8, + "valid_targets_min": 1104 + }, + { + "epoch": 0.17100977198697068, + "grad_norm": 0.6274899663681142, + "learning_rate": 9.674418604651164e-06, + "loss": 0.6397, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6272561550140381, + "step": 105, + "valid_targets_mean": 4838.2, + "valid_targets_min": 904 + }, + { + "epoch": 0.1791530944625407, + "grad_norm": 0.7169036028643929, + "learning_rate": 1.0139534883720932e-05, + "loss": 0.6452, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6799885630607605, + "step": 110, + "valid_targets_mean": 5698.4, + "valid_targets_min": 1030 + }, + { + "epoch": 0.18729641693811075, + "grad_norm": 0.6562555598064262, + "learning_rate": 1.0604651162790698e-05, + "loss": 0.5926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5903303623199463, + "step": 115, + "valid_targets_mean": 4719.6, + "valid_targets_min": 665 + }, + { + "epoch": 0.19543973941368079, + "grad_norm": 0.9018596207395803, + "learning_rate": 1.1069767441860466e-05, + "loss": 0.5446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43482571840286255, + "step": 120, + "valid_targets_mean": 1722.9, + "valid_targets_min": 713 + }, + { + "epoch": 0.20358306188925082, + "grad_norm": 0.7727449075635818, + "learning_rate": 1.1534883720930235e-05, + "loss": 0.5331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5089750289916992, + "step": 125, + "valid_targets_mean": 2620.0, + "valid_targets_min": 787 + }, + { + "epoch": 0.21172638436482086, + "grad_norm": 0.5455879525853441, + "learning_rate": 1.2e-05, + "loss": 0.5647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4707012474536896, + "step": 130, + "valid_targets_mean": 5089.4, + "valid_targets_min": 801 + }, + { + "epoch": 0.21986970684039087, + "grad_norm": 0.6648212551456909, + "learning_rate": 1.2465116279069769e-05, + "loss": 0.5768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.7134335041046143, + "step": 135, + "valid_targets_mean": 7332.2, + "valid_targets_min": 892 + }, + { + "epoch": 0.2280130293159609, + "grad_norm": 0.5677633095923937, + "learning_rate": 1.2930232558139534e-05, + "loss": 0.5335, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5589157342910767, + "step": 140, + "valid_targets_mean": 4799.8, + "valid_targets_min": 872 + }, + { + "epoch": 0.23615635179153094, + "grad_norm": 0.7942662623274093, + "learning_rate": 1.3395348837209303e-05, + "loss": 0.4997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4136490225791931, + "step": 145, + "valid_targets_mean": 1973.1, + "valid_targets_min": 833 + }, + { + "epoch": 0.24429967426710097, + "grad_norm": 0.5237810512895014, + "learning_rate": 1.3860465116279072e-05, + "loss": 0.5374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5460580587387085, + "step": 150, + "valid_targets_mean": 6243.1, + "valid_targets_min": 796 + }, + { + "epoch": 0.252442996742671, + "grad_norm": 0.6388383010933352, + "learning_rate": 1.4325581395348837e-05, + "loss": 0.5511, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5365778803825378, + "step": 155, + "valid_targets_mean": 5688.1, + "valid_targets_min": 836 + }, + { + "epoch": 0.26058631921824105, + "grad_norm": 0.7519729010364815, + "learning_rate": 1.4790697674418606e-05, + "loss": 0.522, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4626702070236206, + "step": 160, + "valid_targets_mean": 3171.6, + "valid_targets_min": 729 + }, + { + "epoch": 0.2687296416938111, + "grad_norm": 0.853073948819013, + "learning_rate": 1.5255813953488374e-05, + "loss": 0.4995, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36380690336227417, + "step": 165, + "valid_targets_mean": 1792.9, + "valid_targets_min": 853 + }, + { + "epoch": 0.2768729641693811, + "grad_norm": 0.8269851416919346, + "learning_rate": 1.572093023255814e-05, + "loss": 0.5833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6016668081283569, + "step": 170, + "valid_targets_mean": 4212.9, + "valid_targets_min": 824 + }, + { + "epoch": 0.28501628664495116, + "grad_norm": 0.680056613801129, + "learning_rate": 1.618604651162791e-05, + "loss": 0.5346, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5464662313461304, + "step": 175, + "valid_targets_mean": 5401.3, + "valid_targets_min": 877 + }, + { + "epoch": 0.2931596091205212, + "grad_norm": 0.6942873506874393, + "learning_rate": 1.6651162790697674e-05, + "loss": 0.4935, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3854817748069763, + "step": 180, + "valid_targets_mean": 2997.4, + "valid_targets_min": 787 + }, + { + "epoch": 0.30130293159609123, + "grad_norm": 0.5273158223723317, + "learning_rate": 1.7116279069767444e-05, + "loss": 0.5563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5888770818710327, + "step": 185, + "valid_targets_mean": 7522.1, + "valid_targets_min": 839 + }, + { + "epoch": 0.30944625407166126, + "grad_norm": 0.5806374772703676, + "learning_rate": 1.758139534883721e-05, + "loss": 0.4873, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.45297908782958984, + "step": 190, + "valid_targets_mean": 3991.6, + "valid_targets_min": 755 + }, + { + "epoch": 0.31758957654723124, + "grad_norm": 0.8104127174065296, + "learning_rate": 1.8046511627906978e-05, + "loss": 0.4719, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4446621239185333, + "step": 195, + "valid_targets_mean": 2060.2, + "valid_targets_min": 1020 + }, + { + "epoch": 0.3257328990228013, + "grad_norm": 0.5722537572908555, + "learning_rate": 1.8511627906976745e-05, + "loss": 0.481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43109047412872314, + "step": 200, + "valid_targets_mean": 5595.8, + "valid_targets_min": 931 + }, + { + "epoch": 0.3338762214983713, + "grad_norm": 0.5908029643934973, + "learning_rate": 1.8976744186046516e-05, + "loss": 0.5152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5604087114334106, + "step": 205, + "valid_targets_mean": 5611.2, + "valid_targets_min": 875 + }, + { + "epoch": 0.34201954397394135, + "grad_norm": 0.8790252398344034, + "learning_rate": 1.944186046511628e-05, + "loss": 0.4569, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3378850221633911, + "step": 210, + "valid_targets_mean": 1683.9, + "valid_targets_min": 779 + }, + { + "epoch": 0.3501628664495114, + "grad_norm": 0.6747714553086839, + "learning_rate": 1.990697674418605e-05, + "loss": 0.4961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5570134520530701, + "step": 215, + "valid_targets_mean": 4686.6, + "valid_targets_min": 780 + }, + { + "epoch": 0.3583061889250814, + "grad_norm": 0.6596973456182018, + "learning_rate": 2.0372093023255813e-05, + "loss": 0.513, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6082668304443359, + "step": 220, + "valid_targets_mean": 5191.1, + "valid_targets_min": 766 + }, + { + "epoch": 0.36644951140065146, + "grad_norm": 0.6756508136051012, + "learning_rate": 2.0837209302325584e-05, + "loss": 0.4512, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3995095491409302, + "step": 225, + "valid_targets_mean": 3127.7, + "valid_targets_min": 750 + }, + { + "epoch": 0.3745928338762215, + "grad_norm": 0.6186267946792766, + "learning_rate": 2.130232558139535e-05, + "loss": 0.486, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44020891189575195, + "step": 230, + "valid_targets_mean": 4827.0, + "valid_targets_min": 970 + }, + { + "epoch": 0.38273615635179153, + "grad_norm": 0.7194340950893335, + "learning_rate": 2.176744186046512e-05, + "loss": 0.5006, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5615401268005371, + "step": 235, + "valid_targets_mean": 3825.9, + "valid_targets_min": 899 + }, + { + "epoch": 0.39087947882736157, + "grad_norm": 0.8745288909484125, + "learning_rate": 2.2232558139534888e-05, + "loss": 0.5293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3951263129711151, + "step": 240, + "valid_targets_mean": 1810.2, + "valid_targets_min": 881 + }, + { + "epoch": 0.3990228013029316, + "grad_norm": 0.7114259209489511, + "learning_rate": 2.269767441860465e-05, + "loss": 0.5131, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5825862884521484, + "step": 245, + "valid_targets_mean": 4072.7, + "valid_targets_min": 805 + }, + { + "epoch": 0.40716612377850164, + "grad_norm": 0.7885529447444148, + "learning_rate": 2.316279069767442e-05, + "loss": 0.505, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39739561080932617, + "step": 250, + "valid_targets_mean": 2379.0, + "valid_targets_min": 775 + }, + { + "epoch": 0.4153094462540717, + "grad_norm": 0.5471931718098969, + "learning_rate": 2.362790697674419e-05, + "loss": 0.4529, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5028834342956543, + "step": 255, + "valid_targets_mean": 7715.4, + "valid_targets_min": 731 + }, + { + "epoch": 0.4234527687296417, + "grad_norm": 0.7624882453443145, + "learning_rate": 2.4093023255813956e-05, + "loss": 0.5279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.587142825126648, + "step": 260, + "valid_targets_mean": 8774.4, + "valid_targets_min": 732 + }, + { + "epoch": 0.43159609120521175, + "grad_norm": 0.8985294190542156, + "learning_rate": 2.4558139534883726e-05, + "loss": 0.4909, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.49783024191856384, + "step": 265, + "valid_targets_mean": 5730.7, + "valid_targets_min": 688 + }, + { + "epoch": 0.43973941368078173, + "grad_norm": 0.6496964432593677, + "learning_rate": 2.502325581395349e-05, + "loss": 0.4574, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.46534672379493713, + "step": 270, + "valid_targets_mean": 4001.8, + "valid_targets_min": 1133 + }, + { + "epoch": 0.44788273615635177, + "grad_norm": 0.5768157302772724, + "learning_rate": 2.5488372093023257e-05, + "loss": 0.4979, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44525301456451416, + "step": 275, + "valid_targets_mean": 4573.9, + "valid_targets_min": 588 + }, + { + "epoch": 0.4560260586319218, + "grad_norm": 0.7159867213905933, + "learning_rate": 2.5953488372093024e-05, + "loss": 0.5181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5269831418991089, + "step": 280, + "valid_targets_mean": 3979.0, + "valid_targets_min": 835 + }, + { + "epoch": 0.46416938110749184, + "grad_norm": 0.5632017729712374, + "learning_rate": 2.6418604651162794e-05, + "loss": 0.4754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4767885208129883, + "step": 285, + "valid_targets_mean": 6571.1, + "valid_targets_min": 816 + }, + { + "epoch": 0.4723127035830619, + "grad_norm": 0.7935114056080335, + "learning_rate": 2.688372093023256e-05, + "loss": 0.5187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6057520508766174, + "step": 290, + "valid_targets_mean": 4861.0, + "valid_targets_min": 874 + }, + { + "epoch": 0.4804560260586319, + "grad_norm": 0.6125530864942849, + "learning_rate": 2.7348837209302325e-05, + "loss": 0.4471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4736894965171814, + "step": 295, + "valid_targets_mean": 4213.1, + "valid_targets_min": 883 + }, + { + "epoch": 0.48859934853420195, + "grad_norm": 0.6583651388460154, + "learning_rate": 2.7813953488372095e-05, + "loss": 0.4619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4546867311000824, + "step": 300, + "valid_targets_mean": 3463.4, + "valid_targets_min": 1254 + }, + { + "epoch": 0.496742671009772, + "grad_norm": 0.6788002504903913, + "learning_rate": 2.8279069767441862e-05, + "loss": 0.51, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.619672417640686, + "step": 305, + "valid_targets_mean": 6291.1, + "valid_targets_min": 842 + }, + { + "epoch": 0.504885993485342, + "grad_norm": 0.8232111487386503, + "learning_rate": 2.874418604651163e-05, + "loss": 0.4872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5414068698883057, + "step": 310, + "valid_targets_mean": 4610.5, + "valid_targets_min": 791 + }, + { + "epoch": 0.5130293159609121, + "grad_norm": 0.8182634293527089, + "learning_rate": 2.92093023255814e-05, + "loss": 0.4844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3856525123119354, + "step": 315, + "valid_targets_mean": 2094.9, + "valid_targets_min": 901 + }, + { + "epoch": 0.5211726384364821, + "grad_norm": 0.6552893854175649, + "learning_rate": 2.9674418604651167e-05, + "loss": 0.5055, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5040791034698486, + "step": 320, + "valid_targets_mean": 4607.6, + "valid_targets_min": 700 + }, + { + "epoch": 0.5293159609120521, + "grad_norm": 0.763351351988472, + "learning_rate": 3.013953488372093e-05, + "loss": 0.4491, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38178160786628723, + "step": 325, + "valid_targets_mean": 2645.2, + "valid_targets_min": 916 + }, + { + "epoch": 0.5374592833876222, + "grad_norm": 0.7801169419286078, + "learning_rate": 3.06046511627907e-05, + "loss": 0.5025, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5423239469528198, + "step": 330, + "valid_targets_mean": 3118.2, + "valid_targets_min": 686 + }, + { + "epoch": 0.5456026058631922, + "grad_norm": 0.7607420797537494, + "learning_rate": 3.1069767441860465e-05, + "loss": 0.4988, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4902603030204773, + "step": 335, + "valid_targets_mean": 3903.1, + "valid_targets_min": 840 + }, + { + "epoch": 0.5537459283387622, + "grad_norm": 0.768831079174095, + "learning_rate": 3.1534883720930235e-05, + "loss": 0.4514, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.47980964183807373, + "step": 340, + "valid_targets_mean": 2837.4, + "valid_targets_min": 875 + }, + { + "epoch": 0.5618892508143323, + "grad_norm": 0.7354420953406804, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.4391, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4737946391105652, + "step": 345, + "valid_targets_mean": 5034.9, + "valid_targets_min": 837 + }, + { + "epoch": 0.5700325732899023, + "grad_norm": 0.5678619892248343, + "learning_rate": 3.246511627906977e-05, + "loss": 0.4728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36025869846343994, + "step": 350, + "valid_targets_mean": 3677.4, + "valid_targets_min": 796 + }, + { + "epoch": 0.5781758957654723, + "grad_norm": 0.6709898199500682, + "learning_rate": 3.293023255813954e-05, + "loss": 0.5133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4977305829524994, + "step": 355, + "valid_targets_mean": 4110.8, + "valid_targets_min": 758 + }, + { + "epoch": 0.5863192182410424, + "grad_norm": 0.6370361603955759, + "learning_rate": 3.33953488372093e-05, + "loss": 0.4417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5579526424407959, + "step": 360, + "valid_targets_mean": 6926.8, + "valid_targets_min": 870 + }, + { + "epoch": 0.5944625407166124, + "grad_norm": 0.7938233991874791, + "learning_rate": 3.386046511627907e-05, + "loss": 0.4257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3984709680080414, + "step": 365, + "valid_targets_mean": 2719.4, + "valid_targets_min": 913 + }, + { + "epoch": 0.6026058631921825, + "grad_norm": 0.9370480665847201, + "learning_rate": 3.4325581395348844e-05, + "loss": 0.4209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4783487319946289, + "step": 370, + "valid_targets_mean": 3183.9, + "valid_targets_min": 936 + }, + { + "epoch": 0.6107491856677525, + "grad_norm": 0.8199142034777909, + "learning_rate": 3.479069767441861e-05, + "loss": 0.501, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5220547914505005, + "step": 375, + "valid_targets_mean": 2896.6, + "valid_targets_min": 812 + }, + { + "epoch": 0.6188925081433225, + "grad_norm": 0.5464204200627687, + "learning_rate": 3.525581395348837e-05, + "loss": 0.4522, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.432759553194046, + "step": 380, + "valid_targets_mean": 5079.4, + "valid_targets_min": 871 + }, + { + "epoch": 0.6270358306188925, + "grad_norm": 0.691558514407719, + "learning_rate": 3.572093023255814e-05, + "loss": 0.4427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.464864581823349, + "step": 385, + "valid_targets_mean": 4909.5, + "valid_targets_min": 889 + }, + { + "epoch": 0.6351791530944625, + "grad_norm": 0.6884985093908113, + "learning_rate": 3.618604651162791e-05, + "loss": 0.4493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5042122602462769, + "step": 390, + "valid_targets_mean": 4825.1, + "valid_targets_min": 894 + }, + { + "epoch": 0.6433224755700325, + "grad_norm": 0.7449463159503883, + "learning_rate": 3.6651162790697675e-05, + "loss": 0.4958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5508541464805603, + "step": 395, + "valid_targets_mean": 4888.5, + "valid_targets_min": 795 + }, + { + "epoch": 0.6514657980456026, + "grad_norm": 0.7140948162001058, + "learning_rate": 3.7116279069767446e-05, + "loss": 0.4838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6287133693695068, + "step": 400, + "valid_targets_mean": 5661.4, + "valid_targets_min": 1023 + }, + { + "epoch": 0.6596091205211726, + "grad_norm": 0.7298224733596144, + "learning_rate": 3.758139534883721e-05, + "loss": 0.5033, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5601798295974731, + "step": 405, + "valid_targets_mean": 5108.4, + "valid_targets_min": 920 + }, + { + "epoch": 0.6677524429967426, + "grad_norm": 0.6684916393646991, + "learning_rate": 3.804651162790698e-05, + "loss": 0.4532, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38387423753738403, + "step": 410, + "valid_targets_mean": 3076.4, + "valid_targets_min": 792 + }, + { + "epoch": 0.6758957654723127, + "grad_norm": 0.6967768446616088, + "learning_rate": 3.851162790697675e-05, + "loss": 0.5134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5079735517501831, + "step": 415, + "valid_targets_mean": 3755.5, + "valid_targets_min": 762 + }, + { + "epoch": 0.6840390879478827, + "grad_norm": 0.7138632859606141, + "learning_rate": 3.8976744186046514e-05, + "loss": 0.4536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5169891715049744, + "step": 420, + "valid_targets_mean": 4002.9, + "valid_targets_min": 845 + }, + { + "epoch": 0.6921824104234527, + "grad_norm": 0.7324446974375245, + "learning_rate": 3.9441860465116284e-05, + "loss": 0.4912, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4767378866672516, + "step": 425, + "valid_targets_mean": 3738.6, + "valid_targets_min": 848 + }, + { + "epoch": 0.7003257328990228, + "grad_norm": 0.7843477154513698, + "learning_rate": 3.990697674418605e-05, + "loss": 0.4859, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.47191423177719116, + "step": 430, + "valid_targets_mean": 2533.3, + "valid_targets_min": 803 + }, + { + "epoch": 0.7084690553745928, + "grad_norm": 0.6340501514775936, + "learning_rate": 3.999989445287326e-05, + "loss": 0.4518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5396028757095337, + "step": 435, + "valid_targets_mean": 8157.7, + "valid_targets_min": 830 + }, + { + "epoch": 0.7166123778501629, + "grad_norm": 0.661085779275152, + "learning_rate": 3.9999465669580134e-05, + "loss": 0.4849, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.539644718170166, + "step": 440, + "valid_targets_mean": 5053.9, + "valid_targets_min": 981 + }, + { + "epoch": 0.7247557003257329, + "grad_norm": 0.7779828538059848, + "learning_rate": 3.999870706049115e-05, + "loss": 0.5091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4706076979637146, + "step": 445, + "valid_targets_mean": 2791.2, + "valid_targets_min": 875 + }, + { + "epoch": 0.7328990228013029, + "grad_norm": 0.6130507047443169, + "learning_rate": 3.9997618638117076e-05, + "loss": 0.4617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5734138488769531, + "step": 450, + "valid_targets_mean": 7812.5, + "valid_targets_min": 734 + }, + { + "epoch": 0.741042345276873, + "grad_norm": 0.5713940102019508, + "learning_rate": 3.9996200420407885e-05, + "loss": 0.4547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5123910903930664, + "step": 455, + "valid_targets_mean": 5041.9, + "valid_targets_min": 866 + }, + { + "epoch": 0.749185667752443, + "grad_norm": 0.5820283125430189, + "learning_rate": 3.9994452430752446e-05, + "loss": 0.3966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4304669499397278, + "step": 460, + "valid_targets_mean": 4386.4, + "valid_targets_min": 858 + }, + { + "epoch": 0.757328990228013, + "grad_norm": 0.7666626046087583, + "learning_rate": 3.999237469797813e-05, + "loss": 0.453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3763735890388489, + "step": 465, + "valid_targets_mean": 2827.5, + "valid_targets_min": 916 + }, + { + "epoch": 0.7654723127035831, + "grad_norm": 0.7033055196397916, + "learning_rate": 3.998996725635034e-05, + "loss": 0.4348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.48391178250312805, + "step": 470, + "valid_targets_mean": 3320.4, + "valid_targets_min": 902 + }, + { + "epoch": 0.7736156351791531, + "grad_norm": 0.694198239655852, + "learning_rate": 3.998723014557196e-05, + "loss": 0.4555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5769744515419006, + "step": 475, + "valid_targets_mean": 6925.4, + "valid_targets_min": 1091 + }, + { + "epoch": 0.7817589576547231, + "grad_norm": 0.5971738355826249, + "learning_rate": 3.9984163410782676e-05, + "loss": 0.4571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5059537291526794, + "step": 480, + "valid_targets_mean": 4825.0, + "valid_targets_min": 824 + }, + { + "epoch": 0.7899022801302932, + "grad_norm": 0.6876257481299002, + "learning_rate": 3.998076710255827e-05, + "loss": 0.4868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5543452501296997, + "step": 485, + "valid_targets_mean": 4136.2, + "valid_targets_min": 856 + }, + { + "epoch": 0.7980456026058632, + "grad_norm": 0.7866349533708771, + "learning_rate": 3.997704127690972e-05, + "loss": 0.4533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3384542465209961, + "step": 490, + "valid_targets_mean": 1973.2, + "valid_targets_min": 890 + }, + { + "epoch": 0.8061889250814332, + "grad_norm": 0.6638982152347231, + "learning_rate": 3.997298599528235e-05, + "loss": 0.4418, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4130619466304779, + "step": 495, + "valid_targets_mean": 3086.3, + "valid_targets_min": 826 + }, + { + "epoch": 0.8143322475570033, + "grad_norm": 0.6166721486063286, + "learning_rate": 3.9968601324554767e-05, + "loss": 0.4744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42455142736434937, + "step": 500, + "valid_targets_mean": 3316.7, + "valid_targets_min": 931 + }, + { + "epoch": 0.8224755700325733, + "grad_norm": 0.4841308662430794, + "learning_rate": 3.9963887337037774e-05, + "loss": 0.3944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42772823572158813, + "step": 505, + "valid_targets_mean": 7171.5, + "valid_targets_min": 818 + }, + { + "epoch": 0.8306188925081434, + "grad_norm": 0.6967402327834242, + "learning_rate": 3.9958844110473196e-05, + "loss": 0.4427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.46920889616012573, + "step": 510, + "valid_targets_mean": 3804.2, + "valid_targets_min": 830 + }, + { + "epoch": 0.8387622149837134, + "grad_norm": 0.8941465603694801, + "learning_rate": 3.9953471728032554e-05, + "loss": 0.4795, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5124310851097107, + "step": 515, + "valid_targets_mean": 3921.4, + "valid_targets_min": 703 + }, + { + "epoch": 0.8469055374592834, + "grad_norm": 2.097515645284266, + "learning_rate": 3.9947770278315736e-05, + "loss": 0.481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4894912838935852, + "step": 520, + "valid_targets_mean": 6187.9, + "valid_targets_min": 790 + }, + { + "epoch": 0.8550488599348535, + "grad_norm": 0.5390648778494891, + "learning_rate": 3.994173985534951e-05, + "loss": 0.4228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3967466950416565, + "step": 525, + "valid_targets_mean": 4348.5, + "valid_targets_min": 946 + }, + { + "epoch": 0.8631921824104235, + "grad_norm": 0.6376768424868567, + "learning_rate": 3.993538055858598e-05, + "loss": 0.5433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5437548756599426, + "step": 530, + "valid_targets_mean": 5344.4, + "valid_targets_min": 780 + }, + { + "epoch": 0.8713355048859935, + "grad_norm": 0.6133585853877465, + "learning_rate": 3.9928692492900975e-05, + "loss": 0.4726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.622114896774292, + "step": 535, + "valid_targets_mean": 7473.9, + "valid_targets_min": 960 + }, + { + "epoch": 0.8794788273615635, + "grad_norm": 0.525533624000233, + "learning_rate": 3.992167576859225e-05, + "loss": 0.4814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4058448374271393, + "step": 540, + "valid_targets_mean": 4574.2, + "valid_targets_min": 849 + }, + { + "epoch": 0.8876221498371335, + "grad_norm": 0.730078124464456, + "learning_rate": 3.991433050137774e-05, + "loss": 0.4578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3911706209182739, + "step": 545, + "valid_targets_mean": 2227.4, + "valid_targets_min": 860 + }, + { + "epoch": 0.8957654723127035, + "grad_norm": 0.7678713772669523, + "learning_rate": 3.990665681239361e-05, + "loss": 0.4289, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3062934875488281, + "step": 550, + "valid_targets_mean": 2076.8, + "valid_targets_min": 898 + }, + { + "epoch": 0.9039087947882736, + "grad_norm": 0.6007370775539534, + "learning_rate": 3.989865482819227e-05, + "loss": 0.4591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5096837282180786, + "step": 555, + "valid_targets_mean": 5085.8, + "valid_targets_min": 847 + }, + { + "epoch": 0.9120521172638436, + "grad_norm": 0.6541501193120934, + "learning_rate": 3.989032468074028e-05, + "loss": 0.3826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3783084750175476, + "step": 560, + "valid_targets_mean": 2823.6, + "valid_targets_min": 623 + }, + { + "epoch": 0.9201954397394136, + "grad_norm": 0.7653414682696564, + "learning_rate": 3.98816665074162e-05, + "loss": 0.4496, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5284523367881775, + "step": 565, + "valid_targets_mean": 3250.3, + "valid_targets_min": 662 + }, + { + "epoch": 0.9283387622149837, + "grad_norm": 0.5500475613020404, + "learning_rate": 3.987268045100828e-05, + "loss": 0.4287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42497968673706055, + "step": 570, + "valid_targets_mean": 4868.6, + "valid_targets_min": 861 + }, + { + "epoch": 0.9364820846905537, + "grad_norm": 0.5781220699943573, + "learning_rate": 3.9863366659712135e-05, + "loss": 0.4266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4173704981803894, + "step": 575, + "valid_targets_mean": 4612.5, + "valid_targets_min": 903 + }, + { + "epoch": 0.9446254071661238, + "grad_norm": 0.6516720898290254, + "learning_rate": 3.985372528712831e-05, + "loss": 0.5196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4456716775894165, + "step": 580, + "valid_targets_mean": 3945.1, + "valid_targets_min": 755 + }, + { + "epoch": 0.9527687296416938, + "grad_norm": 0.6294759732283977, + "learning_rate": 3.984375649225972e-05, + "loss": 0.418, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3829343616962433, + "step": 585, + "valid_targets_mean": 2538.1, + "valid_targets_min": 833 + }, + { + "epoch": 0.9609120521172638, + "grad_norm": 0.5822059406595022, + "learning_rate": 3.983346043950904e-05, + "loss": 0.4317, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.49146950244903564, + "step": 590, + "valid_targets_mean": 4587.2, + "valid_targets_min": 794 + }, + { + "epoch": 0.9690553745928339, + "grad_norm": 0.6382198073710749, + "learning_rate": 3.9822837298675996e-05, + "loss": 0.4725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.47583264112472534, + "step": 595, + "valid_targets_mean": 4410.8, + "valid_targets_min": 786 + }, + { + "epoch": 0.9771986970684039, + "grad_norm": 0.582398030283584, + "learning_rate": 3.981188724495456e-05, + "loss": 0.4543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.48681798577308655, + "step": 600, + "valid_targets_mean": 5142.8, + "valid_targets_min": 785 + }, + { + "epoch": 0.9853420195439739, + "grad_norm": 0.6752163233071634, + "learning_rate": 3.9800610458930075e-05, + "loss": 0.4259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3721882104873657, + "step": 605, + "valid_targets_mean": 2278.1, + "valid_targets_min": 715 + }, + { + "epoch": 0.993485342019544, + "grad_norm": 0.7649094935851412, + "learning_rate": 3.978900712657624e-05, + "loss": 0.3849, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43373265862464905, + "step": 610, + "valid_targets_mean": 2242.9, + "valid_targets_min": 854 + }, + { + "epoch": 1.001628664495114, + "grad_norm": 0.6002577954418794, + "learning_rate": 3.977707743925211e-05, + "loss": 0.4403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5208808183670044, + "step": 615, + "valid_targets_mean": 5248.0, + "valid_targets_min": 872 + }, + { + "epoch": 1.009771986970684, + "grad_norm": 0.5594141289864251, + "learning_rate": 3.9764821593698844e-05, + "loss": 0.476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4486175775527954, + "step": 620, + "valid_targets_mean": 4866.4, + "valid_targets_min": 1023 + }, + { + "epoch": 1.017915309446254, + "grad_norm": 0.6229515439785842, + "learning_rate": 3.975223979203656e-05, + "loss": 0.4056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.46202701330184937, + "step": 625, + "valid_targets_mean": 4252.4, + "valid_targets_min": 958 + }, + { + "epoch": 1.0260586319218241, + "grad_norm": 0.6411331765555095, + "learning_rate": 3.973933224176093e-05, + "loss": 0.4441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3704216480255127, + "step": 630, + "valid_targets_mean": 3084.8, + "valid_targets_min": 829 + }, + { + "epoch": 1.0342019543973942, + "grad_norm": 0.5561241027945789, + "learning_rate": 3.97260991557398e-05, + "loss": 0.4385, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4445428252220154, + "step": 635, + "valid_targets_mean": 4378.7, + "valid_targets_min": 837 + }, + { + "epoch": 1.0423452768729642, + "grad_norm": 0.6818888124988391, + "learning_rate": 3.9712540752209654e-05, + "loss": 0.4144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39885133504867554, + "step": 640, + "valid_targets_mean": 2750.4, + "valid_targets_min": 966 + }, + { + "epoch": 1.0504885993485342, + "grad_norm": 0.5657736670555032, + "learning_rate": 3.969865725477203e-05, + "loss": 0.4384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42283380031585693, + "step": 645, + "valid_targets_mean": 6118.6, + "valid_targets_min": 918 + }, + { + "epoch": 1.0586319218241043, + "grad_norm": 0.7297451807131801, + "learning_rate": 3.9684448892389814e-05, + "loss": 0.4152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3972225487232208, + "step": 650, + "valid_targets_mean": 2383.1, + "valid_targets_min": 877 + }, + { + "epoch": 1.0667752442996743, + "grad_norm": 0.6497288724975299, + "learning_rate": 3.96699158993835e-05, + "loss": 0.4217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3985196053981781, + "step": 655, + "valid_targets_mean": 2973.6, + "valid_targets_min": 959 + }, + { + "epoch": 1.0749185667752443, + "grad_norm": 0.7321687315454672, + "learning_rate": 3.9655058515427284e-05, + "loss": 0.4379, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.431718111038208, + "step": 660, + "valid_targets_mean": 2563.3, + "valid_targets_min": 847 + }, + { + "epoch": 1.0830618892508144, + "grad_norm": 0.6645070846288498, + "learning_rate": 3.963987698554514e-05, + "loss": 0.4091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38921231031417847, + "step": 665, + "valid_targets_mean": 3009.5, + "valid_targets_min": 808 + }, + { + "epoch": 1.0912052117263844, + "grad_norm": 0.5561581540550469, + "learning_rate": 3.9624371560106764e-05, + "loss": 0.4145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42446261644363403, + "step": 670, + "valid_targets_mean": 4279.9, + "valid_targets_min": 767 + }, + { + "epoch": 1.0993485342019544, + "grad_norm": 0.6093344627902815, + "learning_rate": 3.9608542494823455e-05, + "loss": 0.3565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34918397665023804, + "step": 675, + "valid_targets_mean": 2574.4, + "valid_targets_min": 840 + }, + { + "epoch": 1.1074918566775245, + "grad_norm": 0.5353958136155785, + "learning_rate": 3.9592390050743884e-05, + "loss": 0.4178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4316074252128601, + "step": 680, + "valid_targets_mean": 4754.8, + "valid_targets_min": 681 + }, + { + "epoch": 1.1156351791530945, + "grad_norm": 0.5827014708375629, + "learning_rate": 3.9575914494249817e-05, + "loss": 0.434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.46669405698776245, + "step": 685, + "valid_targets_mean": 5039.7, + "valid_targets_min": 600 + }, + { + "epoch": 1.1237785016286646, + "grad_norm": 0.5892786875114913, + "learning_rate": 3.955911609705167e-05, + "loss": 0.4405, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4764605164527893, + "step": 690, + "valid_targets_mean": 4719.8, + "valid_targets_min": 802 + }, + { + "epoch": 1.1319218241042346, + "grad_norm": 0.6606246660683135, + "learning_rate": 3.9541995136184095e-05, + "loss": 0.4242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3902358114719391, + "step": 695, + "valid_targets_mean": 2729.4, + "valid_targets_min": 750 + }, + { + "epoch": 1.1400651465798046, + "grad_norm": 0.546088795050995, + "learning_rate": 3.9524551894001355e-05, + "loss": 0.4449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5018824338912964, + "step": 700, + "valid_targets_mean": 6094.3, + "valid_targets_min": 970 + }, + { + "epoch": 1.1482084690553747, + "grad_norm": 0.6324113868172324, + "learning_rate": 3.950678665817271e-05, + "loss": 0.414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3968406021595001, + "step": 705, + "valid_targets_mean": 2951.1, + "valid_targets_min": 797 + }, + { + "epoch": 1.1563517915309447, + "grad_norm": 0.5285171581934991, + "learning_rate": 3.948869972167762e-05, + "loss": 0.4212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39238280057907104, + "step": 710, + "valid_targets_mean": 4476.9, + "valid_targets_min": 843 + }, + { + "epoch": 1.1644951140065147, + "grad_norm": 0.6885709948130666, + "learning_rate": 3.9470291382800985e-05, + "loss": 0.4144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3998885750770569, + "step": 715, + "valid_targets_mean": 3139.4, + "valid_targets_min": 836 + }, + { + "epoch": 1.1726384364820848, + "grad_norm": 0.7292128426798414, + "learning_rate": 3.945156194512815e-05, + "loss": 0.413, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43786197900772095, + "step": 720, + "valid_targets_mean": 2545.2, + "valid_targets_min": 812 + }, + { + "epoch": 1.1807817589576548, + "grad_norm": 0.5768951046721075, + "learning_rate": 3.943251171753997e-05, + "loss": 0.3948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34994542598724365, + "step": 725, + "valid_targets_mean": 3955.4, + "valid_targets_min": 799 + }, + { + "epoch": 1.1889250814332248, + "grad_norm": 0.6535965649905378, + "learning_rate": 3.941314101420764e-05, + "loss": 0.4437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3746069371700287, + "step": 730, + "valid_targets_mean": 2703.0, + "valid_targets_min": 834 + }, + { + "epoch": 1.1970684039087949, + "grad_norm": 0.4813136434148974, + "learning_rate": 3.939345015458759e-05, + "loss": 0.4068, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4959977865219116, + "step": 735, + "valid_targets_mean": 7419.9, + "valid_targets_min": 619 + }, + { + "epoch": 1.205211726384365, + "grad_norm": 0.6650872766076252, + "learning_rate": 3.9373439463416176e-05, + "loss": 0.4208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4341118335723877, + "step": 740, + "valid_targets_mean": 2942.9, + "valid_targets_min": 836 + }, + { + "epoch": 1.213355048859935, + "grad_norm": 0.7010800573311282, + "learning_rate": 3.935310927070431e-05, + "loss": 0.4707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4485361576080322, + "step": 745, + "valid_targets_mean": 2994.1, + "valid_targets_min": 900 + }, + { + "epoch": 1.221498371335505, + "grad_norm": 0.6062984107899768, + "learning_rate": 3.9332459911732046e-05, + "loss": 0.4539, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4605371654033661, + "step": 750, + "valid_targets_mean": 4438.5, + "valid_targets_min": 873 + }, + { + "epoch": 1.2296416938110748, + "grad_norm": 0.5390466848130968, + "learning_rate": 3.931149172704306e-05, + "loss": 0.4202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38458991050720215, + "step": 755, + "valid_targets_mean": 4395.5, + "valid_targets_min": 858 + }, + { + "epoch": 1.237785016286645, + "grad_norm": 0.6769595773456667, + "learning_rate": 3.9290205062438985e-05, + "loss": 0.4024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40879014134407043, + "step": 760, + "valid_targets_mean": 5257.8, + "valid_targets_min": 931 + }, + { + "epoch": 1.2459283387622149, + "grad_norm": 0.597762111327205, + "learning_rate": 3.926860026897376e-05, + "loss": 0.3992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42727530002593994, + "step": 765, + "valid_targets_mean": 4229.3, + "valid_targets_min": 903 + }, + { + "epoch": 1.2540716612377851, + "grad_norm": 0.4977273944411193, + "learning_rate": 3.9246677702947813e-05, + "loss": 0.3993, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4921976923942566, + "step": 770, + "valid_targets_mean": 6508.4, + "valid_targets_min": 873 + }, + { + "epoch": 1.262214983713355, + "grad_norm": 0.5375722152431383, + "learning_rate": 3.92244377259022e-05, + "loss": 0.4575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4393521249294281, + "step": 775, + "valid_targets_mean": 4928.7, + "valid_targets_min": 875 + }, + { + "epoch": 1.2703583061889252, + "grad_norm": 0.7593603684937291, + "learning_rate": 3.920188070461262e-05, + "loss": 0.3827, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3719305098056793, + "step": 780, + "valid_targets_mean": 2690.6, + "valid_targets_min": 826 + }, + { + "epoch": 1.278501628664495, + "grad_norm": 0.7083608398376542, + "learning_rate": 3.917900701108338e-05, + "loss": 0.4209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3645278215408325, + "step": 785, + "valid_targets_mean": 2314.7, + "valid_targets_min": 921 + }, + { + "epoch": 1.2866449511400653, + "grad_norm": 0.6394487766015668, + "learning_rate": 3.9155817022541283e-05, + "loss": 0.4094, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.49210530519485474, + "step": 790, + "valid_targets_mean": 5412.1, + "valid_targets_min": 828 + }, + { + "epoch": 1.294788273615635, + "grad_norm": 0.5285729040037943, + "learning_rate": 3.913231112142936e-05, + "loss": 0.4029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3241245448589325, + "step": 795, + "valid_targets_mean": 3647.6, + "valid_targets_min": 733 + }, + { + "epoch": 1.3029315960912053, + "grad_norm": 0.5089361094824487, + "learning_rate": 3.910848969540059e-05, + "loss": 0.4366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5328578948974609, + "step": 800, + "valid_targets_mean": 7962.4, + "valid_targets_min": 917 + }, + { + "epoch": 1.3110749185667752, + "grad_norm": 0.5580300116911924, + "learning_rate": 3.9084353137311514e-05, + "loss": 0.5088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5066008567810059, + "step": 805, + "valid_targets_mean": 5835.2, + "valid_targets_min": 878 + }, + { + "epoch": 1.3192182410423452, + "grad_norm": 0.5298675639106072, + "learning_rate": 3.905990184521574e-05, + "loss": 0.4694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5332962274551392, + "step": 810, + "valid_targets_mean": 6569.8, + "valid_targets_min": 982 + }, + { + "epoch": 1.3273615635179152, + "grad_norm": 0.736899873315086, + "learning_rate": 3.9035136222357384e-05, + "loss": 0.4092, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40034666657447815, + "step": 815, + "valid_targets_mean": 2731.8, + "valid_targets_min": 824 + }, + { + "epoch": 1.3355048859934853, + "grad_norm": 0.5663204630263984, + "learning_rate": 3.901005667716443e-05, + "loss": 0.4745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5070164203643799, + "step": 820, + "valid_targets_mean": 5676.5, + "valid_targets_min": 957 + }, + { + "epoch": 1.3436482084690553, + "grad_norm": 0.7143701972454842, + "learning_rate": 3.8984663623241955e-05, + "loss": 0.3877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3736136257648468, + "step": 825, + "valid_targets_mean": 2460.6, + "valid_targets_min": 665 + }, + { + "epoch": 1.3517915309446253, + "grad_norm": 0.5641176552351401, + "learning_rate": 3.8958957479365374e-05, + "loss": 0.4369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4111298620700836, + "step": 830, + "valid_targets_mean": 3870.2, + "valid_targets_min": 682 + }, + { + "epoch": 1.3599348534201954, + "grad_norm": 0.535858776802953, + "learning_rate": 3.893293866947348e-05, + "loss": 0.4338, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5166705846786499, + "step": 835, + "valid_targets_mean": 7477.9, + "valid_targets_min": 1077 + }, + { + "epoch": 1.3680781758957654, + "grad_norm": 0.579125183129326, + "learning_rate": 3.890660762266147e-05, + "loss": 0.3959, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5937281847000122, + "step": 840, + "valid_targets_mean": 8253.5, + "valid_targets_min": 896 + }, + { + "epoch": 1.3762214983713354, + "grad_norm": 0.6080285030247317, + "learning_rate": 3.8879964773173865e-05, + "loss": 0.4149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.46752139925956726, + "step": 845, + "valid_targets_mean": 3483.1, + "valid_targets_min": 992 + }, + { + "epoch": 1.3843648208469055, + "grad_norm": 0.594043269294761, + "learning_rate": 3.885301056039736e-05, + "loss": 0.3875, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37410348653793335, + "step": 850, + "valid_targets_mean": 3190.7, + "valid_targets_min": 732 + }, + { + "epoch": 1.3925081433224755, + "grad_norm": 0.6195516899746912, + "learning_rate": 3.882574542885357e-05, + "loss": 0.3916, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4371996521949768, + "step": 855, + "valid_targets_mean": 3298.1, + "valid_targets_min": 889 + }, + { + "epoch": 1.4006514657980456, + "grad_norm": 0.7163785827397556, + "learning_rate": 3.8798169828191674e-05, + "loss": 0.4113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43644750118255615, + "step": 860, + "valid_targets_mean": 3238.8, + "valid_targets_min": 975 + }, + { + "epoch": 1.4087947882736156, + "grad_norm": 0.6445225518653187, + "learning_rate": 3.877028421318107e-05, + "loss": 0.4317, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.46091002225875854, + "step": 865, + "valid_targets_mean": 4708.1, + "valid_targets_min": 778 + }, + { + "epoch": 1.4169381107491856, + "grad_norm": 0.6074800116125663, + "learning_rate": 3.874208904370378e-05, + "loss": 0.4336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41307497024536133, + "step": 870, + "valid_targets_mean": 3559.9, + "valid_targets_min": 715 + }, + { + "epoch": 1.4250814332247557, + "grad_norm": 0.5297724860630073, + "learning_rate": 3.871358478474695e-05, + "loss": 0.4123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4638717472553253, + "step": 875, + "valid_targets_mean": 7146.1, + "valid_targets_min": 809 + }, + { + "epoch": 1.4332247557003257, + "grad_norm": 0.7039739556324185, + "learning_rate": 3.868477190639514e-05, + "loss": 0.409, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4513535797595978, + "step": 880, + "valid_targets_mean": 2914.1, + "valid_targets_min": 873 + }, + { + "epoch": 1.4413680781758957, + "grad_norm": 0.6273141906083939, + "learning_rate": 3.865565088382255e-05, + "loss": 0.3966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4425792694091797, + "step": 885, + "valid_targets_mean": 3206.8, + "valid_targets_min": 855 + }, + { + "epoch": 1.4495114006514658, + "grad_norm": 0.5899280566751449, + "learning_rate": 3.862622219728525e-05, + "loss": 0.4549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.392936110496521, + "step": 890, + "valid_targets_mean": 3736.4, + "valid_targets_min": 869 + }, + { + "epoch": 1.4576547231270358, + "grad_norm": 0.5973947359269157, + "learning_rate": 3.85964863321132e-05, + "loss": 0.4312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3965773284435272, + "step": 895, + "valid_targets_mean": 3711.5, + "valid_targets_min": 884 + }, + { + "epoch": 1.4657980456026058, + "grad_norm": 0.5603432406682844, + "learning_rate": 3.856644377870227e-05, + "loss": 0.4144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4848753809928894, + "step": 900, + "valid_targets_mean": 5117.5, + "valid_targets_min": 892 + }, + { + "epoch": 1.4739413680781759, + "grad_norm": 0.6259773527027673, + "learning_rate": 3.8536095032506155e-05, + "loss": 0.4367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3485148251056671, + "step": 905, + "valid_targets_mean": 2533.5, + "valid_targets_min": 731 + }, + { + "epoch": 1.482084690553746, + "grad_norm": 0.6203857147426626, + "learning_rate": 3.850544059402819e-05, + "loss": 0.3988, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4292371869087219, + "step": 910, + "valid_targets_mean": 3780.3, + "valid_targets_min": 784 + }, + { + "epoch": 1.490228013029316, + "grad_norm": 0.576291925864017, + "learning_rate": 3.847448096881312e-05, + "loss": 0.3998, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41010645031929016, + "step": 915, + "valid_targets_mean": 4459.9, + "valid_targets_min": 774 + }, + { + "epoch": 1.498371335504886, + "grad_norm": 0.6313991032833804, + "learning_rate": 3.844321666743872e-05, + "loss": 0.3717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38843148946762085, + "step": 920, + "valid_targets_mean": 3548.2, + "valid_targets_min": 829 + }, + { + "epoch": 1.506514657980456, + "grad_norm": 0.5459406998684688, + "learning_rate": 3.841164820550744e-05, + "loss": 0.4444, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4433070719242096, + "step": 925, + "valid_targets_mean": 5131.3, + "valid_targets_min": 775 + }, + { + "epoch": 1.514657980456026, + "grad_norm": 0.6071831693740676, + "learning_rate": 3.8379776103637824e-05, + "loss": 0.4652, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42189866304397583, + "step": 930, + "valid_targets_mean": 3385.4, + "valid_targets_min": 951 + }, + { + "epoch": 1.522801302931596, + "grad_norm": 0.6055492728401147, + "learning_rate": 3.834760088745599e-05, + "loss": 0.4562, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42511823773384094, + "step": 935, + "valid_targets_mean": 3835.5, + "valid_targets_min": 1042 + }, + { + "epoch": 1.5309446254071661, + "grad_norm": 0.5364523145768575, + "learning_rate": 3.831512308758693e-05, + "loss": 0.3913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35987794399261475, + "step": 940, + "valid_targets_mean": 4003.6, + "valid_targets_min": 978 + }, + { + "epoch": 1.5390879478827362, + "grad_norm": 0.49305744066021706, + "learning_rate": 3.828234323964576e-05, + "loss": 0.4498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.45336177945137024, + "step": 945, + "valid_targets_mean": 6003.6, + "valid_targets_min": 879 + }, + { + "epoch": 1.5472312703583062, + "grad_norm": 0.5475008804959645, + "learning_rate": 3.824926188422891e-05, + "loss": 0.4125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44161492586135864, + "step": 950, + "valid_targets_mean": 6523.3, + "valid_targets_min": 735 + }, + { + "epoch": 1.5553745928338762, + "grad_norm": 0.5581254736037339, + "learning_rate": 3.8215879566905156e-05, + "loss": 0.3914, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29592400789260864, + "step": 955, + "valid_targets_mean": 2971.3, + "valid_targets_min": 839 + }, + { + "epoch": 1.5635179153094463, + "grad_norm": 0.6478359055974314, + "learning_rate": 3.818219683820668e-05, + "loss": 0.4077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4026212990283966, + "step": 960, + "valid_targets_mean": 2913.6, + "valid_targets_min": 821 + }, + { + "epoch": 1.5716612377850163, + "grad_norm": 0.6204966018107344, + "learning_rate": 3.814821425361997e-05, + "loss": 0.4627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.471854031085968, + "step": 965, + "valid_targets_mean": 3954.1, + "valid_targets_min": 751 + }, + { + "epoch": 1.5798045602605864, + "grad_norm": 0.6003528820412882, + "learning_rate": 3.811393237357663e-05, + "loss": 0.4398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.49959659576416016, + "step": 970, + "valid_targets_mean": 4508.7, + "valid_targets_min": 893 + }, + { + "epoch": 1.5879478827361564, + "grad_norm": 0.572098552279729, + "learning_rate": 3.8079351763444205e-05, + "loss": 0.4071, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41257143020629883, + "step": 975, + "valid_targets_mean": 3515.9, + "valid_targets_min": 714 + }, + { + "epoch": 1.5960912052117264, + "grad_norm": 0.6581304866164168, + "learning_rate": 3.804447299351679e-05, + "loss": 0.3812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4794001579284668, + "step": 980, + "valid_targets_mean": 3670.5, + "valid_targets_min": 817 + }, + { + "epoch": 1.6042345276872965, + "grad_norm": 0.4451951632863299, + "learning_rate": 3.8009296639005644e-05, + "loss": 0.3941, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3873254358768463, + "step": 985, + "valid_targets_mean": 5797.1, + "valid_targets_min": 774 + }, + { + "epoch": 1.6123778501628665, + "grad_norm": 0.6803741094263188, + "learning_rate": 3.797382328002973e-05, + "loss": 0.4066, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40355053544044495, + "step": 990, + "valid_targets_mean": 2508.6, + "valid_targets_min": 956 + }, + { + "epoch": 1.6205211726384365, + "grad_norm": 0.531365913278838, + "learning_rate": 3.793805350160613e-05, + "loss": 0.4178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43091487884521484, + "step": 995, + "valid_targets_mean": 4539.8, + "valid_targets_min": 977 + }, + { + "epoch": 1.6286644951140063, + "grad_norm": 0.5401067586586091, + "learning_rate": 3.790198789364036e-05, + "loss": 0.4532, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4436972737312317, + "step": 1000, + "valid_targets_mean": 4226.6, + "valid_targets_min": 830 + }, + { + "epoch": 1.6368078175895766, + "grad_norm": 0.6450651776309458, + "learning_rate": 3.786562705091672e-05, + "loss": 0.3528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3267674744129181, + "step": 1005, + "valid_targets_mean": 2441.9, + "valid_targets_min": 896 + }, + { + "epoch": 1.6449511400651464, + "grad_norm": 0.6227739584274612, + "learning_rate": 3.782897157308843e-05, + "loss": 0.3803, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.288798451423645, + "step": 1010, + "valid_targets_mean": 2278.3, + "valid_targets_min": 754 + }, + { + "epoch": 1.6530944625407167, + "grad_norm": 0.5662950467595115, + "learning_rate": 3.779202206466774e-05, + "loss": 0.4058, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4580032229423523, + "step": 1015, + "valid_targets_mean": 4435.3, + "valid_targets_min": 751 + }, + { + "epoch": 1.6612377850162865, + "grad_norm": 0.5261745260455306, + "learning_rate": 3.775477913501598e-05, + "loss": 0.3862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29939186573028564, + "step": 1020, + "valid_targets_mean": 3607.8, + "valid_targets_min": 718 + }, + { + "epoch": 1.6693811074918568, + "grad_norm": 0.49354121872150997, + "learning_rate": 3.771724339833351e-05, + "loss": 0.4126, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4164675772190094, + "step": 1025, + "valid_targets_mean": 5005.8, + "valid_targets_min": 805 + }, + { + "epoch": 1.6775244299674266, + "grad_norm": 0.5706995074279902, + "learning_rate": 3.7679415473649574e-05, + "loss": 0.4224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31082987785339355, + "step": 1030, + "valid_targets_mean": 3069.1, + "valid_targets_min": 830 + }, + { + "epoch": 1.6856677524429968, + "grad_norm": 0.7406461098891799, + "learning_rate": 3.7641295984812114e-05, + "loss": 0.414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30098259449005127, + "step": 1035, + "valid_targets_mean": 1761.9, + "valid_targets_min": 775 + }, + { + "epoch": 1.6938110749185666, + "grad_norm": 0.7047381769100605, + "learning_rate": 3.760288556047745e-05, + "loss": 0.4655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.49528998136520386, + "step": 1040, + "valid_targets_mean": 3514.5, + "valid_targets_min": 798 + }, + { + "epoch": 1.701954397394137, + "grad_norm": 0.6496281274500635, + "learning_rate": 3.756418483409996e-05, + "loss": 0.4044, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.47903183102607727, + "step": 1045, + "valid_targets_mean": 3331.8, + "valid_targets_min": 816 + }, + { + "epoch": 1.7100977198697067, + "grad_norm": 0.5524641928024946, + "learning_rate": 3.752519444392158e-05, + "loss": 0.4433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43874186277389526, + "step": 1050, + "valid_targets_mean": 4299.6, + "valid_targets_min": 983 + }, + { + "epoch": 1.718241042345277, + "grad_norm": 0.6509783434726939, + "learning_rate": 3.748591503296131e-05, + "loss": 0.4289, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3853952884674072, + "step": 1055, + "valid_targets_mean": 2620.2, + "valid_targets_min": 987 + }, + { + "epoch": 1.7263843648208468, + "grad_norm": 0.6501193828750986, + "learning_rate": 3.744634724900463e-05, + "loss": 0.3709, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3299080729484558, + "step": 1060, + "valid_targets_mean": 2436.8, + "valid_targets_min": 662 + }, + { + "epoch": 1.734527687296417, + "grad_norm": 0.6697602418640541, + "learning_rate": 3.740649174459273e-05, + "loss": 0.4311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44796931743621826, + "step": 1065, + "valid_targets_mean": 5464.5, + "valid_targets_min": 954 + }, + { + "epoch": 1.7426710097719869, + "grad_norm": 0.5822688147153671, + "learning_rate": 3.7366349177011864e-05, + "loss": 0.4253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3572904169559479, + "step": 1070, + "valid_targets_mean": 2843.8, + "valid_targets_min": 979 + }, + { + "epoch": 1.7508143322475571, + "grad_norm": 0.5048263815915769, + "learning_rate": 3.732592020828243e-05, + "loss": 0.4693, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4151160418987274, + "step": 1075, + "valid_targets_mean": 4626.9, + "valid_targets_min": 927 + }, + { + "epoch": 1.758957654723127, + "grad_norm": 0.6369901410065246, + "learning_rate": 3.728520550514808e-05, + "loss": 0.4122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3482610285282135, + "step": 1080, + "valid_targets_mean": 2535.7, + "valid_targets_min": 784 + }, + { + "epoch": 1.7671009771986972, + "grad_norm": 0.8243713983397156, + "learning_rate": 3.7244205739064726e-05, + "loss": 0.3892, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3723037540912628, + "step": 1085, + "valid_targets_mean": 2685.9, + "valid_targets_min": 896 + }, + { + "epoch": 1.775244299674267, + "grad_norm": 0.5345737660864993, + "learning_rate": 3.720292158618945e-05, + "loss": 0.4053, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36261144280433655, + "step": 1090, + "valid_targets_mean": 3520.4, + "valid_targets_min": 819 + }, + { + "epoch": 1.7833876221498373, + "grad_norm": 0.69593137355099, + "learning_rate": 3.716135372736936e-05, + "loss": 0.4042, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3598507344722748, + "step": 1095, + "valid_targets_mean": 2259.4, + "valid_targets_min": 955 + }, + { + "epoch": 1.791530944625407, + "grad_norm": 0.49272578165712444, + "learning_rate": 3.7119502848130405e-05, + "loss": 0.4754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5129920244216919, + "step": 1100, + "valid_targets_mean": 7371.8, + "valid_targets_min": 798 + }, + { + "epoch": 1.7996742671009773, + "grad_norm": 0.7820390905800039, + "learning_rate": 3.707736963866598e-05, + "loss": 0.3918, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.385663777589798, + "step": 1105, + "valid_targets_mean": 2418.9, + "valid_targets_min": 938 + }, + { + "epoch": 1.8078175895765471, + "grad_norm": 0.6376743820198543, + "learning_rate": 3.7034954793825625e-05, + "loss": 0.4189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3605830669403076, + "step": 1110, + "valid_targets_mean": 2575.6, + "valid_targets_min": 808 + }, + { + "epoch": 1.8159609120521174, + "grad_norm": 0.5857410500344302, + "learning_rate": 3.699225901310353e-05, + "loss": 0.4266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4638155698776245, + "step": 1115, + "valid_targets_mean": 5498.8, + "valid_targets_min": 806 + }, + { + "epoch": 1.8241042345276872, + "grad_norm": 0.5429970204453551, + "learning_rate": 3.694928300062698e-05, + "loss": 0.4616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5007045269012451, + "step": 1120, + "valid_targets_mean": 5730.5, + "valid_targets_min": 856 + }, + { + "epoch": 1.8322475570032575, + "grad_norm": 0.6542749006845264, + "learning_rate": 3.690602746514481e-05, + "loss": 0.4107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3724113702774048, + "step": 1125, + "valid_targets_mean": 2721.6, + "valid_targets_min": 922 + }, + { + "epoch": 1.8403908794788273, + "grad_norm": 0.6529290235171168, + "learning_rate": 3.686249312001564e-05, + "loss": 0.3868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38023194670677185, + "step": 1130, + "valid_targets_mean": 3034.7, + "valid_targets_min": 836 + }, + { + "epoch": 1.8485342019543975, + "grad_norm": 0.6025123721009671, + "learning_rate": 3.681868068319614e-05, + "loss": 0.4296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4393656849861145, + "step": 1135, + "valid_targets_mean": 3376.8, + "valid_targets_min": 953 + }, + { + "epoch": 1.8566775244299674, + "grad_norm": 0.5615164722582863, + "learning_rate": 3.677459087722922e-05, + "loss": 0.4461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5215327739715576, + "step": 1140, + "valid_targets_mean": 5030.0, + "valid_targets_min": 939 + }, + { + "epoch": 1.8648208469055376, + "grad_norm": 0.6033714563236224, + "learning_rate": 3.6730224429232084e-05, + "loss": 0.425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.382057785987854, + "step": 1145, + "valid_targets_mean": 3076.8, + "valid_targets_min": 914 + }, + { + "epoch": 1.8729641693811074, + "grad_norm": 0.6435458263171235, + "learning_rate": 3.668558207088421e-05, + "loss": 0.4463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.441327303647995, + "step": 1150, + "valid_targets_mean": 2921.1, + "valid_targets_min": 773 + }, + { + "epoch": 1.8811074918566775, + "grad_norm": 0.648996992995554, + "learning_rate": 3.664066453841534e-05, + "loss": 0.4256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36870846152305603, + "step": 1155, + "valid_targets_mean": 2279.9, + "valid_targets_min": 790 + }, + { + "epoch": 1.8892508143322475, + "grad_norm": 0.4747317434914963, + "learning_rate": 3.659547257259331e-05, + "loss": 0.4081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4306727647781372, + "step": 1160, + "valid_targets_mean": 5922.5, + "valid_targets_min": 999 + }, + { + "epoch": 1.8973941368078175, + "grad_norm": 0.6025746664540975, + "learning_rate": 3.655000691871185e-05, + "loss": 0.4095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42406460642814636, + "step": 1165, + "valid_targets_mean": 3573.9, + "valid_targets_min": 1000 + }, + { + "epoch": 1.9055374592833876, + "grad_norm": 0.50506254387959, + "learning_rate": 3.650426832657825e-05, + "loss": 0.4514, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4524383544921875, + "step": 1170, + "valid_targets_mean": 6740.8, + "valid_targets_min": 956 + }, + { + "epoch": 1.9136807817589576, + "grad_norm": 0.4936197219460003, + "learning_rate": 3.645825755050105e-05, + "loss": 0.4338, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3502442538738251, + "step": 1175, + "valid_targets_mean": 4575.3, + "valid_targets_min": 707 + }, + { + "epoch": 1.9218241042345277, + "grad_norm": 0.4928717040369041, + "learning_rate": 3.6411975349277554e-05, + "loss": 0.4016, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3613849878311157, + "step": 1180, + "valid_targets_mean": 4935.9, + "valid_targets_min": 792 + }, + { + "epoch": 1.9299674267100977, + "grad_norm": 0.5670311705935634, + "learning_rate": 3.6365422486181356e-05, + "loss": 0.4294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.48796892166137695, + "step": 1185, + "valid_targets_mean": 4604.7, + "valid_targets_min": 902 + }, + { + "epoch": 1.9381107491856677, + "grad_norm": 0.673837950665888, + "learning_rate": 3.631859972894972e-05, + "loss": 0.4267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5297560095787048, + "step": 1190, + "valid_targets_mean": 3990.8, + "valid_targets_min": 823 + }, + { + "epoch": 1.9462540716612378, + "grad_norm": 0.5029352416329238, + "learning_rate": 3.627150784977093e-05, + "loss": 0.4234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5054534673690796, + "step": 1195, + "valid_targets_mean": 8607.8, + "valid_targets_min": 854 + }, + { + "epoch": 1.9543973941368078, + "grad_norm": 0.5175400712064422, + "learning_rate": 3.6224147625271576e-05, + "loss": 0.4383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44223955273628235, + "step": 1200, + "valid_targets_mean": 5911.9, + "valid_targets_min": 797 + }, + { + "epoch": 1.9625407166123778, + "grad_norm": 0.5021191497954278, + "learning_rate": 3.617651983650369e-05, + "loss": 0.4226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4783463478088379, + "step": 1205, + "valid_targets_mean": 6259.8, + "valid_targets_min": 847 + }, + { + "epoch": 1.9706840390879479, + "grad_norm": 0.4536863626833413, + "learning_rate": 3.612862526893194e-05, + "loss": 0.431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4203566312789917, + "step": 1210, + "valid_targets_mean": 7130.9, + "valid_targets_min": 839 + }, + { + "epoch": 1.978827361563518, + "grad_norm": 0.46868966773403903, + "learning_rate": 3.608046471242062e-05, + "loss": 0.4427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.523578405380249, + "step": 1215, + "valid_targets_mean": 7517.8, + "valid_targets_min": 1116 + }, + { + "epoch": 1.986970684039088, + "grad_norm": 0.3927589448722763, + "learning_rate": 3.603203896122064e-05, + "loss": 0.3887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3904946446418762, + "step": 1220, + "valid_targets_mean": 7597.1, + "valid_targets_min": 758 + }, + { + "epoch": 1.995114006514658, + "grad_norm": 0.5135031351983272, + "learning_rate": 3.598334881395643e-05, + "loss": 0.3842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44291985034942627, + "step": 1225, + "valid_targets_mean": 4957.8, + "valid_targets_min": 855 + }, + { + "epoch": 2.003257328990228, + "grad_norm": 0.5735126922140658, + "learning_rate": 3.593439507361278e-05, + "loss": 0.4018, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38659003376960754, + "step": 1230, + "valid_targets_mean": 3839.8, + "valid_targets_min": 850 + }, + { + "epoch": 2.011400651465798, + "grad_norm": 0.9104872453342409, + "learning_rate": 3.588517854752157e-05, + "loss": 0.4309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4143409729003906, + "step": 1235, + "valid_targets_mean": 5490.9, + "valid_targets_min": 874 + }, + { + "epoch": 2.019543973941368, + "grad_norm": 0.518863199653592, + "learning_rate": 3.583570004734848e-05, + "loss": 0.4215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3660213351249695, + "step": 1240, + "valid_targets_mean": 3823.6, + "valid_targets_min": 678 + }, + { + "epoch": 2.027687296416938, + "grad_norm": 0.6548172635874032, + "learning_rate": 3.57859603890796e-05, + "loss": 0.4049, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5501856803894043, + "step": 1245, + "valid_targets_mean": 4937.9, + "valid_targets_min": 867 + }, + { + "epoch": 2.035830618892508, + "grad_norm": 0.6548703797971612, + "learning_rate": 3.5735960393007955e-05, + "loss": 0.4493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.49028003215789795, + "step": 1250, + "valid_targets_mean": 3904.1, + "valid_targets_min": 917 + }, + { + "epoch": 2.043973941368078, + "grad_norm": 0.6291373485748836, + "learning_rate": 3.568570088372001e-05, + "loss": 0.4447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5294812321662903, + "step": 1255, + "valid_targets_mean": 7234.1, + "valid_targets_min": 909 + }, + { + "epoch": 2.0521172638436482, + "grad_norm": 0.7145111466125614, + "learning_rate": 3.563518269008204e-05, + "loss": 0.3891, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2554847002029419, + "step": 1260, + "valid_targets_mean": 1902.1, + "valid_targets_min": 837 + }, + { + "epoch": 2.060260586319218, + "grad_norm": 0.581583099143951, + "learning_rate": 3.5584406645226474e-05, + "loss": 0.3474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40127965807914734, + "step": 1265, + "valid_targets_mean": 3475.3, + "valid_targets_min": 754 + }, + { + "epoch": 2.0684039087947883, + "grad_norm": 0.6315559503430457, + "learning_rate": 3.5533373586538156e-05, + "loss": 0.4221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.46138134598731995, + "step": 1270, + "valid_targets_mean": 3885.0, + "valid_targets_min": 920 + }, + { + "epoch": 2.076547231270358, + "grad_norm": 0.6118521789815867, + "learning_rate": 3.548208435564052e-05, + "loss": 0.3975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41776740550994873, + "step": 1275, + "valid_targets_mean": 3041.0, + "valid_targets_min": 814 + }, + { + "epoch": 2.0846905537459284, + "grad_norm": 0.6199166918638226, + "learning_rate": 3.543053979838175e-05, + "loss": 0.4352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4905391037464142, + "step": 1280, + "valid_targets_mean": 4127.6, + "valid_targets_min": 814 + }, + { + "epoch": 2.092833876221498, + "grad_norm": 0.5791518711522746, + "learning_rate": 3.537874076482077e-05, + "loss": 0.4326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4245043992996216, + "step": 1285, + "valid_targets_mean": 4018.1, + "valid_targets_min": 854 + }, + { + "epoch": 2.1009771986970684, + "grad_norm": 0.6088387148051235, + "learning_rate": 3.532668810921329e-05, + "loss": 0.3556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32733964920043945, + "step": 1290, + "valid_targets_mean": 2749.9, + "valid_targets_min": 692 + }, + { + "epoch": 2.1091205211726383, + "grad_norm": 0.5811469683460134, + "learning_rate": 3.527438268999768e-05, + "loss": 0.4009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37757039070129395, + "step": 1295, + "valid_targets_mean": 3569.8, + "valid_targets_min": 771 + }, + { + "epoch": 2.1172638436482085, + "grad_norm": 0.5296612098479514, + "learning_rate": 3.522182536978078e-05, + "loss": 0.4197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35972586274147034, + "step": 1300, + "valid_targets_mean": 4180.2, + "valid_targets_min": 1106 + }, + { + "epoch": 2.1254071661237783, + "grad_norm": 0.4779406710998466, + "learning_rate": 3.516901701532378e-05, + "loss": 0.4344, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.46301886439323425, + "step": 1305, + "valid_targets_mean": 6762.2, + "valid_targets_min": 1046 + }, + { + "epoch": 2.1335504885993486, + "grad_norm": 0.6589644223143998, + "learning_rate": 3.5115958497527806e-05, + "loss": 0.3742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3894103169441223, + "step": 1310, + "valid_targets_mean": 2875.3, + "valid_targets_min": 814 + }, + { + "epoch": 2.1416938110749184, + "grad_norm": 1.4095127167371486, + "learning_rate": 3.5062650691419634e-05, + "loss": 0.3978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3322986662387848, + "step": 1315, + "valid_targets_mean": 4842.5, + "valid_targets_min": 961 + }, + { + "epoch": 2.1498371335504887, + "grad_norm": 0.6851722042375856, + "learning_rate": 3.500909447613723e-05, + "loss": 0.4404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40488457679748535, + "step": 1320, + "valid_targets_mean": 3470.6, + "valid_targets_min": 780 + }, + { + "epoch": 2.1579804560260585, + "grad_norm": 0.5251985034902503, + "learning_rate": 3.4955290734915255e-05, + "loss": 0.3901, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3979756236076355, + "step": 1325, + "valid_targets_mean": 4426.2, + "valid_targets_min": 959 + }, + { + "epoch": 2.1661237785016287, + "grad_norm": 0.6486511231022041, + "learning_rate": 3.4901240355070506e-05, + "loss": 0.3918, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3934767544269562, + "step": 1330, + "valid_targets_mean": 5272.5, + "valid_targets_min": 922 + }, + { + "epoch": 2.1742671009771986, + "grad_norm": 0.5281974006384081, + "learning_rate": 3.484694422798727e-05, + "loss": 0.3899, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30738508701324463, + "step": 1335, + "valid_targets_mean": 3700.5, + "valid_targets_min": 861 + }, + { + "epoch": 2.182410423452769, + "grad_norm": 0.6640647455080223, + "learning_rate": 3.479240324910264e-05, + "loss": 0.3698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4088769257068634, + "step": 1340, + "valid_targets_mean": 2609.1, + "valid_targets_min": 791 + }, + { + "epoch": 2.1905537459283386, + "grad_norm": 0.7694309995683591, + "learning_rate": 3.473761831789174e-05, + "loss": 0.3746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38660961389541626, + "step": 1345, + "valid_targets_mean": 4414.8, + "valid_targets_min": 924 + }, + { + "epoch": 2.198697068403909, + "grad_norm": 0.5131992304229263, + "learning_rate": 3.4682590337852886e-05, + "loss": 0.4163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4272652864456177, + "step": 1350, + "valid_targets_mean": 4841.6, + "valid_targets_min": 918 + }, + { + "epoch": 2.2068403908794787, + "grad_norm": 0.5242634330794429, + "learning_rate": 3.462732021649268e-05, + "loss": 0.3495, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33907708525657654, + "step": 1355, + "valid_targets_mean": 3697.6, + "valid_targets_min": 853 + }, + { + "epoch": 2.214983713355049, + "grad_norm": 0.6726573196118447, + "learning_rate": 3.457180886531106e-05, + "loss": 0.4053, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4229174852371216, + "step": 1360, + "valid_targets_mean": 3569.9, + "valid_targets_min": 817 + }, + { + "epoch": 2.2231270358306188, + "grad_norm": 0.549481480135827, + "learning_rate": 3.451605719978627e-05, + "loss": 0.4139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40029001235961914, + "step": 1365, + "valid_targets_mean": 4090.3, + "valid_targets_min": 961 + }, + { + "epoch": 2.231270358306189, + "grad_norm": 0.5903099156521626, + "learning_rate": 3.446006613935975e-05, + "loss": 0.3652, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3473586142063141, + "step": 1370, + "valid_targets_mean": 3203.1, + "valid_targets_min": 732 + }, + { + "epoch": 2.239413680781759, + "grad_norm": 0.6608735204411983, + "learning_rate": 3.440383660742096e-05, + "loss": 0.3334, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.269184410572052, + "step": 1375, + "valid_targets_mean": 2001.4, + "valid_targets_min": 814 + }, + { + "epoch": 2.247557003257329, + "grad_norm": 0.7257119545883688, + "learning_rate": 3.434736953129221e-05, + "loss": 0.4051, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.338829904794693, + "step": 1380, + "valid_targets_mean": 2020.2, + "valid_targets_min": 796 + }, + { + "epoch": 2.255700325732899, + "grad_norm": 0.6157688767755378, + "learning_rate": 3.4290665842213267e-05, + "loss": 0.3535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3956828713417053, + "step": 1385, + "valid_targets_mean": 3396.9, + "valid_targets_min": 734 + }, + { + "epoch": 2.263843648208469, + "grad_norm": 0.5135167573314222, + "learning_rate": 3.4233726475326086e-05, + "loss": 0.4129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.48269638419151306, + "step": 1390, + "valid_targets_mean": 7162.6, + "valid_targets_min": 842 + }, + { + "epoch": 2.271986970684039, + "grad_norm": 0.6029213133159707, + "learning_rate": 3.417655236965937e-05, + "loss": 0.4339, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.460470587015152, + "step": 1395, + "valid_targets_mean": 4254.5, + "valid_targets_min": 796 + }, + { + "epoch": 2.2801302931596092, + "grad_norm": 0.5417600828853789, + "learning_rate": 3.411914446811305e-05, + "loss": 0.3885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4097944498062134, + "step": 1400, + "valid_targets_mean": 4104.9, + "valid_targets_min": 857 + }, + { + "epoch": 2.288273615635179, + "grad_norm": 0.6042234397858103, + "learning_rate": 3.406150371744275e-05, + "loss": 0.4297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36458754539489746, + "step": 1405, + "valid_targets_mean": 2886.0, + "valid_targets_min": 842 + }, + { + "epoch": 2.2964169381107493, + "grad_norm": 1.6297322989648841, + "learning_rate": 3.40036310682442e-05, + "loss": 0.4325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.46638843417167664, + "step": 1410, + "valid_targets_mean": 4565.4, + "valid_targets_min": 913 + }, + { + "epoch": 2.304560260586319, + "grad_norm": 0.6904220033327714, + "learning_rate": 3.3945527474937516e-05, + "loss": 0.4517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4787893295288086, + "step": 1415, + "valid_targets_mean": 3580.6, + "valid_targets_min": 707 + }, + { + "epoch": 2.3127035830618894, + "grad_norm": 0.672041296676618, + "learning_rate": 3.3887193895751515e-05, + "loss": 0.379, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39845556020736694, + "step": 1420, + "valid_targets_mean": 2616.2, + "valid_targets_min": 912 + }, + { + "epoch": 2.320846905537459, + "grad_norm": 0.5985566671497945, + "learning_rate": 3.382863129270784e-05, + "loss": 0.3825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37515610456466675, + "step": 1425, + "valid_targets_mean": 3806.2, + "valid_targets_min": 1140 + }, + { + "epoch": 2.3289902280130295, + "grad_norm": 0.5200277549471368, + "learning_rate": 3.3769840631605166e-05, + "loss": 0.3956, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3815835118293762, + "step": 1430, + "valid_targets_mean": 5724.2, + "valid_targets_min": 846 + }, + { + "epoch": 2.3371335504885993, + "grad_norm": 0.7003473700261261, + "learning_rate": 3.37108228820032e-05, + "loss": 0.3431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3787077069282532, + "step": 1435, + "valid_targets_mean": 2372.8, + "valid_targets_min": 896 + }, + { + "epoch": 2.3452768729641695, + "grad_norm": 0.5833958144344444, + "learning_rate": 3.365157901720679e-05, + "loss": 0.4133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3986330032348633, + "step": 1440, + "valid_targets_mean": 3451.4, + "valid_targets_min": 795 + }, + { + "epoch": 2.3534201954397393, + "grad_norm": 0.5727465360873358, + "learning_rate": 3.3592110014249765e-05, + "loss": 0.3866, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4161834120750427, + "step": 1445, + "valid_targets_mean": 3710.1, + "valid_targets_min": 860 + }, + { + "epoch": 2.3615635179153096, + "grad_norm": 0.503826425803254, + "learning_rate": 3.353241685387888e-05, + "loss": 0.376, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28357994556427, + "step": 1450, + "valid_targets_mean": 3598.2, + "valid_targets_min": 745 + }, + { + "epoch": 2.3697068403908794, + "grad_norm": 0.5828827836469597, + "learning_rate": 3.3472500520537655e-05, + "loss": 0.4104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43337634205818176, + "step": 1455, + "valid_targets_mean": 4701.9, + "valid_targets_min": 852 + }, + { + "epoch": 2.3778501628664497, + "grad_norm": 0.554300917082971, + "learning_rate": 3.3412362002350105e-05, + "loss": 0.3813, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3967621326446533, + "step": 1460, + "valid_targets_mean": 3966.9, + "valid_targets_min": 919 + }, + { + "epoch": 2.3859934853420195, + "grad_norm": 0.5699443799818354, + "learning_rate": 3.3352002291104455e-05, + "loss": 0.3908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3716140389442444, + "step": 1465, + "valid_targets_mean": 3499.2, + "valid_targets_min": 1032 + }, + { + "epoch": 2.3941368078175898, + "grad_norm": 0.6398444180910327, + "learning_rate": 3.329142238223679e-05, + "loss": 0.344, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36295872926712036, + "step": 1470, + "valid_targets_mean": 4054.2, + "valid_targets_min": 880 + }, + { + "epoch": 2.4022801302931596, + "grad_norm": 0.5865875938371107, + "learning_rate": 3.323062327481463e-05, + "loss": 0.3795, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3570999503135681, + "step": 1475, + "valid_targets_mean": 3064.3, + "valid_targets_min": 1144 + }, + { + "epoch": 2.41042345276873, + "grad_norm": 0.5604770437101241, + "learning_rate": 3.316960597152048e-05, + "loss": 0.3884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34385985136032104, + "step": 1480, + "valid_targets_mean": 3760.7, + "valid_targets_min": 783 + }, + { + "epoch": 2.4185667752442996, + "grad_norm": 0.7272703452166338, + "learning_rate": 3.3108371478635235e-05, + "loss": 0.4008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3627995252609253, + "step": 1485, + "valid_targets_mean": 2564.5, + "valid_targets_min": 853 + }, + { + "epoch": 2.42671009771987, + "grad_norm": 0.5302349592955444, + "learning_rate": 3.304692080602164e-05, + "loss": 0.3842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42551520466804504, + "step": 1490, + "valid_targets_mean": 4688.8, + "valid_targets_min": 827 + }, + { + "epoch": 2.4348534201954397, + "grad_norm": 0.527727891013627, + "learning_rate": 3.2985254967107645e-05, + "loss": 0.3801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5107170939445496, + "step": 1495, + "valid_targets_mean": 6544.8, + "valid_targets_min": 848 + }, + { + "epoch": 2.44299674267101, + "grad_norm": 0.7863103237781619, + "learning_rate": 3.292337497886962e-05, + "loss": 0.3916, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35082465410232544, + "step": 1500, + "valid_targets_mean": 3918.2, + "valid_targets_min": 754 + }, + { + "epoch": 2.45114006514658, + "grad_norm": 0.521119384478141, + "learning_rate": 3.2861281861815656e-05, + "loss": 0.3762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36874860525131226, + "step": 1505, + "valid_targets_mean": 4538.2, + "valid_targets_min": 837 + }, + { + "epoch": 2.4592833876221496, + "grad_norm": 0.9007574087969191, + "learning_rate": 3.279897663996869e-05, + "loss": 0.3454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30335745215415955, + "step": 1510, + "valid_targets_mean": 2077.3, + "valid_targets_min": 982 + }, + { + "epoch": 2.46742671009772, + "grad_norm": 0.5575163720334637, + "learning_rate": 3.2736460340849654e-05, + "loss": 0.3753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36782410740852356, + "step": 1515, + "valid_targets_mean": 3671.9, + "valid_targets_min": 783 + }, + { + "epoch": 2.47557003257329, + "grad_norm": 0.5573651303331029, + "learning_rate": 3.26737339954605e-05, + "loss": 0.4478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4205915033817291, + "step": 1520, + "valid_targets_mean": 5427.0, + "valid_targets_min": 856 + }, + { + "epoch": 2.48371335504886, + "grad_norm": 0.6689419913606189, + "learning_rate": 3.261079863826719e-05, + "loss": 0.366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32394590973854065, + "step": 1525, + "valid_targets_mean": 2309.1, + "valid_targets_min": 690 + }, + { + "epoch": 2.4918566775244297, + "grad_norm": 0.5337697214574574, + "learning_rate": 3.2547655307182675e-05, + "loss": 0.4039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39687618613243103, + "step": 1530, + "valid_targets_mean": 4458.9, + "valid_targets_min": 970 + }, + { + "epoch": 2.5, + "grad_norm": 0.514924017647839, + "learning_rate": 3.248430504354975e-05, + "loss": 0.4114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42001527547836304, + "step": 1535, + "valid_targets_mean": 4596.4, + "valid_targets_min": 897 + }, + { + "epoch": 2.5081433224755703, + "grad_norm": 0.5146054852497547, + "learning_rate": 3.242074889212388e-05, + "loss": 0.3865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3165473937988281, + "step": 1540, + "valid_targets_mean": 4028.6, + "valid_targets_min": 700 + }, + { + "epoch": 2.51628664495114, + "grad_norm": 0.5719686649997544, + "learning_rate": 3.235698790105598e-05, + "loss": 0.3665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4056331515312195, + "step": 1545, + "valid_targets_mean": 3663.0, + "valid_targets_min": 831 + }, + { + "epoch": 2.52442996742671, + "grad_norm": 0.5891519047825186, + "learning_rate": 3.2293023121875126e-05, + "loss": 0.3758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31530335545539856, + "step": 1550, + "valid_targets_mean": 2829.2, + "valid_targets_min": 931 + }, + { + "epoch": 2.53257328990228, + "grad_norm": 0.4157670144938022, + "learning_rate": 3.222885560947121e-05, + "loss": 0.4027, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3494027853012085, + "step": 1555, + "valid_targets_mean": 5868.6, + "valid_targets_min": 869 + }, + { + "epoch": 2.5407166123778504, + "grad_norm": 0.618094188203553, + "learning_rate": 3.216448642207754e-05, + "loss": 0.3874, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.48195070028305054, + "step": 1560, + "valid_targets_mean": 4120.7, + "valid_targets_min": 888 + }, + { + "epoch": 2.54885993485342, + "grad_norm": 0.5502229845767689, + "learning_rate": 3.209991662125342e-05, + "loss": 0.3935, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38307374715805054, + "step": 1565, + "valid_targets_mean": 3965.1, + "valid_targets_min": 735 + }, + { + "epoch": 2.55700325732899, + "grad_norm": 0.5450076747013342, + "learning_rate": 3.203514727186657e-05, + "loss": 0.3917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34361016750335693, + "step": 1570, + "valid_targets_mean": 3699.5, + "valid_targets_min": 750 + }, + { + "epoch": 2.5651465798045603, + "grad_norm": 0.5427337706849031, + "learning_rate": 3.197017944207567e-05, + "loss": 0.3809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3216630816459656, + "step": 1575, + "valid_targets_mean": 3111.4, + "valid_targets_min": 924 + }, + { + "epoch": 2.5732899022801305, + "grad_norm": 0.5838876147684144, + "learning_rate": 3.190501420331261e-05, + "loss": 0.3713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.285226970911026, + "step": 1580, + "valid_targets_mean": 2464.8, + "valid_targets_min": 909 + }, + { + "epoch": 2.5814332247557004, + "grad_norm": 0.5739652343821701, + "learning_rate": 3.1839652630264986e-05, + "loss": 0.3559, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3472951650619507, + "step": 1585, + "valid_targets_mean": 3215.1, + "valid_targets_min": 826 + }, + { + "epoch": 2.58957654723127, + "grad_norm": 0.477471377533063, + "learning_rate": 3.17740958008582e-05, + "loss": 0.3338, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.298251748085022, + "step": 1590, + "valid_targets_mean": 4829.8, + "valid_targets_min": 716 + }, + { + "epoch": 2.5977198697068404, + "grad_norm": 0.504907487649724, + "learning_rate": 3.170834479623783e-05, + "loss": 0.38, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3564375042915344, + "step": 1595, + "valid_targets_mean": 4524.7, + "valid_targets_min": 791 + }, + { + "epoch": 2.6058631921824107, + "grad_norm": 0.574884688381691, + "learning_rate": 3.164240070075171e-05, + "loss": 0.3702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4318529963493347, + "step": 1600, + "valid_targets_mean": 4223.0, + "valid_targets_min": 862 + }, + { + "epoch": 2.6140065146579805, + "grad_norm": 0.5041502769120333, + "learning_rate": 3.157626460193209e-05, + "loss": 0.3882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4731264114379883, + "step": 1605, + "valid_targets_mean": 5622.9, + "valid_targets_min": 808 + }, + { + "epoch": 2.6221498371335503, + "grad_norm": 0.5611578500052632, + "learning_rate": 3.1509937590477675e-05, + "loss": 0.3744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34151336550712585, + "step": 1610, + "valid_targets_mean": 4067.8, + "valid_targets_min": 802 + }, + { + "epoch": 2.6302931596091206, + "grad_norm": 0.5813733283367294, + "learning_rate": 3.144342076023566e-05, + "loss": 0.4655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39996781945228577, + "step": 1615, + "valid_targets_mean": 4269.6, + "valid_targets_min": 929 + }, + { + "epoch": 2.6384364820846904, + "grad_norm": 0.6154237344234993, + "learning_rate": 3.137671520818367e-05, + "loss": 0.4271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4073317050933838, + "step": 1620, + "valid_targets_mean": 3470.6, + "valid_targets_min": 823 + }, + { + "epoch": 2.6465798045602607, + "grad_norm": 0.5872463617813265, + "learning_rate": 3.130982203441169e-05, + "loss": 0.4542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4568232297897339, + "step": 1625, + "valid_targets_mean": 4053.1, + "valid_targets_min": 852 + }, + { + "epoch": 2.6547231270358305, + "grad_norm": 0.5608791765060234, + "learning_rate": 3.124274234210391e-05, + "loss": 0.4006, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4161519408226013, + "step": 1630, + "valid_targets_mean": 3801.7, + "valid_targets_min": 830 + }, + { + "epoch": 2.6628664495114007, + "grad_norm": 0.4936075272802567, + "learning_rate": 3.117547723752052e-05, + "loss": 0.3837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40324047207832336, + "step": 1635, + "valid_targets_mean": 5207.6, + "valid_targets_min": 999 + }, + { + "epoch": 2.6710097719869705, + "grad_norm": 0.5818622136437307, + "learning_rate": 3.11080278299795e-05, + "loss": 0.4199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41308146715164185, + "step": 1640, + "valid_targets_mean": 3657.2, + "valid_targets_min": 795 + }, + { + "epoch": 2.679153094462541, + "grad_norm": 0.5193285478680708, + "learning_rate": 3.104039523183829e-05, + "loss": 0.3901, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29065650701522827, + "step": 1645, + "valid_targets_mean": 3265.1, + "valid_targets_min": 1018 + }, + { + "epoch": 2.6872964169381106, + "grad_norm": 0.6376562469636174, + "learning_rate": 3.097258055847547e-05, + "loss": 0.3277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27989137172698975, + "step": 1650, + "valid_targets_mean": 2113.3, + "valid_targets_min": 807 + }, + { + "epoch": 2.695439739413681, + "grad_norm": 0.5550994732648101, + "learning_rate": 3.0904584928272336e-05, + "loss": 0.4005, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3938174247741699, + "step": 1655, + "valid_targets_mean": 3507.9, + "valid_targets_min": 973 + }, + { + "epoch": 2.7035830618892507, + "grad_norm": 0.46854736668320074, + "learning_rate": 3.08364094625945e-05, + "loss": 0.3631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40149998664855957, + "step": 1660, + "valid_targets_mean": 5709.1, + "valid_targets_min": 867 + }, + { + "epoch": 2.711726384364821, + "grad_norm": 0.5864558466908356, + "learning_rate": 3.076805528577336e-05, + "loss": 0.4233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3051266074180603, + "step": 1665, + "valid_targets_mean": 3120.8, + "valid_targets_min": 866 + }, + { + "epoch": 2.7198697068403908, + "grad_norm": 0.6671932496949183, + "learning_rate": 3.069952352508758e-05, + "loss": 0.3835, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2984146475791931, + "step": 1670, + "valid_targets_mean": 2057.2, + "valid_targets_min": 770 + }, + { + "epoch": 2.728013029315961, + "grad_norm": 0.5999453170819166, + "learning_rate": 3.0630815310744465e-05, + "loss": 0.3473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3447234332561493, + "step": 1675, + "valid_targets_mean": 2882.4, + "valid_targets_min": 891 + }, + { + "epoch": 2.736156351791531, + "grad_norm": 0.47932761322505907, + "learning_rate": 3.0561931775861364e-05, + "loss": 0.4019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31661492586135864, + "step": 1680, + "valid_targets_mean": 4319.8, + "valid_targets_min": 654 + }, + { + "epoch": 2.744299674267101, + "grad_norm": 0.43707573562893365, + "learning_rate": 3.0492874056446973e-05, + "loss": 0.4039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38458573818206787, + "step": 1685, + "valid_targets_mean": 6070.9, + "valid_targets_min": 826 + }, + { + "epoch": 2.752442996742671, + "grad_norm": 0.5532960630991639, + "learning_rate": 3.0423643291382582e-05, + "loss": 0.4019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5015804767608643, + "step": 1690, + "valid_targets_mean": 5468.4, + "valid_targets_min": 718 + }, + { + "epoch": 2.760586319218241, + "grad_norm": 0.4504532170814088, + "learning_rate": 3.0354240622403294e-05, + "loss": 0.3748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44392630457878113, + "step": 1695, + "valid_targets_mean": 7866.9, + "valid_targets_min": 775 + }, + { + "epoch": 2.768729641693811, + "grad_norm": 0.4646434459760435, + "learning_rate": 3.0284667194079217e-05, + "loss": 0.4414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4584432542324066, + "step": 1700, + "valid_targets_mean": 6272.4, + "valid_targets_min": 825 + }, + { + "epoch": 2.7768729641693812, + "grad_norm": 0.6349975205737249, + "learning_rate": 3.021492415379658e-05, + "loss": 0.3757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41838324069976807, + "step": 1705, + "valid_targets_mean": 4283.9, + "valid_targets_min": 866 + }, + { + "epoch": 2.785016286644951, + "grad_norm": 0.45727676818275853, + "learning_rate": 3.014501265173879e-05, + "loss": 0.3976, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3660026490688324, + "step": 1710, + "valid_targets_mean": 5215.8, + "valid_targets_min": 820 + }, + { + "epoch": 2.7931596091205213, + "grad_norm": 0.5782449058214303, + "learning_rate": 3.0074933840867505e-05, + "loss": 0.3661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43505918979644775, + "step": 1715, + "valid_targets_mean": 3792.4, + "valid_targets_min": 925 + }, + { + "epoch": 2.801302931596091, + "grad_norm": 0.5828112287483032, + "learning_rate": 3.0004688876903566e-05, + "loss": 0.3828, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38363444805145264, + "step": 1720, + "valid_targets_mean": 4339.9, + "valid_targets_min": 906 + }, + { + "epoch": 2.8094462540716614, + "grad_norm": 0.6292399101290617, + "learning_rate": 2.9934278918307987e-05, + "loss": 0.3865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28075674176216125, + "step": 1725, + "valid_targets_mean": 2279.7, + "valid_targets_min": 865 + }, + { + "epoch": 2.817589576547231, + "grad_norm": 0.6475716586976032, + "learning_rate": 2.986370512626282e-05, + "loss": 0.3823, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2639043629169464, + "step": 1730, + "valid_targets_mean": 2206.9, + "valid_targets_min": 745 + }, + { + "epoch": 2.8257328990228014, + "grad_norm": 0.5384734380015455, + "learning_rate": 2.979296866465202e-05, + "loss": 0.396, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3511195182800293, + "step": 1735, + "valid_targets_mean": 4276.8, + "valid_targets_min": 582 + }, + { + "epoch": 2.8338762214983713, + "grad_norm": 0.5060215432705198, + "learning_rate": 2.9722070700042237e-05, + "loss": 0.3964, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3756062090396881, + "step": 1740, + "valid_targets_mean": 4356.0, + "valid_targets_min": 953 + }, + { + "epoch": 2.8420195439739415, + "grad_norm": 0.5446419274549089, + "learning_rate": 2.9651012401663587e-05, + "loss": 0.4141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42974019050598145, + "step": 1745, + "valid_targets_mean": 4334.9, + "valid_targets_min": 981 + }, + { + "epoch": 2.8501628664495113, + "grad_norm": 0.6569808375962571, + "learning_rate": 2.957979494139038e-05, + "loss": 0.4182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41067197918891907, + "step": 1750, + "valid_targets_mean": 4460.6, + "valid_targets_min": 818 + }, + { + "epoch": 2.8583061889250816, + "grad_norm": 0.5059948457565091, + "learning_rate": 2.9508419493721755e-05, + "loss": 0.3381, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3753646910190582, + "step": 1755, + "valid_targets_mean": 5279.4, + "valid_targets_min": 814 + }, + { + "epoch": 2.8664495114006514, + "grad_norm": 0.5287152711441316, + "learning_rate": 2.9436887235762365e-05, + "loss": 0.4148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4531913995742798, + "step": 1760, + "valid_targets_mean": 5802.7, + "valid_targets_min": 1007 + }, + { + "epoch": 2.8745928338762217, + "grad_norm": 0.5248947640196178, + "learning_rate": 2.9365199347202917e-05, + "loss": 0.4325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4455067813396454, + "step": 1765, + "valid_targets_mean": 4964.6, + "valid_targets_min": 729 + }, + { + "epoch": 2.8827361563517915, + "grad_norm": 0.37703999975690505, + "learning_rate": 2.929335701030074e-05, + "loss": 0.4121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35953304171562195, + "step": 1770, + "valid_targets_mean": 7873.3, + "valid_targets_min": 889 + }, + { + "epoch": 2.8908794788273617, + "grad_norm": 0.5791771847265265, + "learning_rate": 2.9221361409860284e-05, + "loss": 0.3692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42298299074172974, + "step": 1775, + "valid_targets_mean": 4124.2, + "valid_targets_min": 845 + }, + { + "epoch": 2.8990228013029316, + "grad_norm": 0.466361083909102, + "learning_rate": 2.914921373321359e-05, + "loss": 0.4281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32893645763397217, + "step": 1780, + "valid_targets_mean": 4442.7, + "valid_targets_min": 789 + }, + { + "epoch": 2.9071661237785014, + "grad_norm": 0.5334544006996779, + "learning_rate": 2.907691517020068e-05, + "loss": 0.3985, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4179403781890869, + "step": 1785, + "valid_targets_mean": 4163.1, + "valid_targets_min": 696 + }, + { + "epoch": 2.9153094462540716, + "grad_norm": 0.6010828035515186, + "learning_rate": 2.900446691314997e-05, + "loss": 0.3852, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3059876561164856, + "step": 1790, + "valid_targets_mean": 2706.9, + "valid_targets_min": 731 + }, + { + "epoch": 2.923452768729642, + "grad_norm": 0.49494086610869825, + "learning_rate": 2.8931870156858573e-05, + "loss": 0.3879, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38742202520370483, + "step": 1795, + "valid_targets_mean": 4825.2, + "valid_targets_min": 763 + }, + { + "epoch": 2.9315960912052117, + "grad_norm": 0.48458419769207306, + "learning_rate": 2.885912609857264e-05, + "loss": 0.3597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4001881182193756, + "step": 1800, + "valid_targets_mean": 6619.2, + "valid_targets_min": 762 + }, + { + "epoch": 2.9397394136807815, + "grad_norm": 0.6267747662885939, + "learning_rate": 2.8786235937967553e-05, + "loss": 0.358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2945130169391632, + "step": 1805, + "valid_targets_mean": 2253.6, + "valid_targets_min": 871 + }, + { + "epoch": 2.9478827361563518, + "grad_norm": 0.477819672996166, + "learning_rate": 2.871320087712819e-05, + "loss": 0.4204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38395076990127563, + "step": 1810, + "valid_targets_mean": 5239.2, + "valid_targets_min": 851 + }, + { + "epoch": 2.956026058631922, + "grad_norm": 0.6066788283383678, + "learning_rate": 2.8640022120529082e-05, + "loss": 0.3778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34224873781204224, + "step": 1815, + "valid_targets_mean": 2654.8, + "valid_targets_min": 737 + }, + { + "epoch": 2.964169381107492, + "grad_norm": 0.5273517621226159, + "learning_rate": 2.8566700875014564e-05, + "loss": 0.3694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3907949924468994, + "step": 1820, + "valid_targets_mean": 4087.4, + "valid_targets_min": 775 + }, + { + "epoch": 2.9723127035830617, + "grad_norm": 0.558206324111843, + "learning_rate": 2.8493238349778845e-05, + "loss": 0.3657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3832775354385376, + "step": 1825, + "valid_targets_mean": 3851.6, + "valid_targets_min": 1338 + }, + { + "epoch": 2.980456026058632, + "grad_norm": 0.4407935915178145, + "learning_rate": 2.8419635756346077e-05, + "loss": 0.3732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3979068398475647, + "step": 1830, + "valid_targets_mean": 6318.3, + "valid_targets_min": 884 + }, + { + "epoch": 2.988599348534202, + "grad_norm": 0.560709267135834, + "learning_rate": 2.8345894308550392e-05, + "loss": 0.3605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39971548318862915, + "step": 1835, + "valid_targets_mean": 3983.1, + "valid_targets_min": 857 + }, + { + "epoch": 2.996742671009772, + "grad_norm": 0.4810664218838111, + "learning_rate": 2.827201522251587e-05, + "loss": 0.377, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3614308834075928, + "step": 1840, + "valid_targets_mean": 5526.2, + "valid_targets_min": 938 + }, + { + "epoch": 3.004885993485342, + "grad_norm": 0.5391247161853154, + "learning_rate": 2.819799971663648e-05, + "loss": 0.3249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3049616813659668, + "step": 1845, + "valid_targets_mean": 3187.5, + "valid_targets_min": 924 + }, + { + "epoch": 3.013029315960912, + "grad_norm": 0.5989902171231254, + "learning_rate": 2.8123849011555983e-05, + "loss": 0.3987, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4156782925128937, + "step": 1850, + "valid_targets_mean": 3565.4, + "valid_targets_min": 866 + }, + { + "epoch": 3.021172638436482, + "grad_norm": 0.5007053399629641, + "learning_rate": 2.8049564330147823e-05, + "loss": 0.3762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3126833438873291, + "step": 1855, + "valid_targets_mean": 3892.6, + "valid_targets_min": 834 + }, + { + "epoch": 3.029315960912052, + "grad_norm": 0.639594526715952, + "learning_rate": 2.7975146897494933e-05, + "loss": 0.3849, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39225274324417114, + "step": 1860, + "valid_targets_mean": 4900.6, + "valid_targets_min": 807 + }, + { + "epoch": 3.037459283387622, + "grad_norm": 0.5753666403096663, + "learning_rate": 2.7900597940869548e-05, + "loss": 0.3711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41801905632019043, + "step": 1865, + "valid_targets_mean": 5792.0, + "valid_targets_min": 840 + }, + { + "epoch": 3.045602605863192, + "grad_norm": 0.4743816586834026, + "learning_rate": 2.782591868971297e-05, + "loss": 0.3816, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3171934485435486, + "step": 1870, + "valid_targets_mean": 4610.8, + "valid_targets_min": 938 + }, + { + "epoch": 3.053745928338762, + "grad_norm": 0.5420522973924808, + "learning_rate": 2.7751110375615273e-05, + "loss": 0.3626, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38823622465133667, + "step": 1875, + "valid_targets_mean": 4236.9, + "valid_targets_min": 698 + }, + { + "epoch": 3.0618892508143323, + "grad_norm": 0.5336259178060211, + "learning_rate": 2.7676174232295e-05, + "loss": 0.3832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.45788949728012085, + "step": 1880, + "valid_targets_mean": 4595.0, + "valid_targets_min": 800 + }, + { + "epoch": 3.070032573289902, + "grad_norm": 0.5604431408837038, + "learning_rate": 2.760111149557882e-05, + "loss": 0.423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4732162356376648, + "step": 1885, + "valid_targets_mean": 5765.7, + "valid_targets_min": 798 + }, + { + "epoch": 3.0781758957654723, + "grad_norm": 0.5972241668713916, + "learning_rate": 2.7525923403381162e-05, + "loss": 0.3869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4449957311153412, + "step": 1890, + "valid_targets_mean": 4609.5, + "valid_targets_min": 876 + }, + { + "epoch": 3.086319218241042, + "grad_norm": 0.6345827656540779, + "learning_rate": 2.7450611195683755e-05, + "loss": 0.3928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33713364601135254, + "step": 1895, + "valid_targets_mean": 2753.9, + "valid_targets_min": 791 + }, + { + "epoch": 3.0944625407166124, + "grad_norm": 0.4913652005763087, + "learning_rate": 2.737517611451524e-05, + "loss": 0.3483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3640807569026947, + "step": 1900, + "valid_targets_mean": 5310.2, + "valid_targets_min": 1092 + }, + { + "epoch": 3.1026058631921822, + "grad_norm": 0.6078513004178369, + "learning_rate": 2.7299619403930626e-05, + "loss": 0.3728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3834528923034668, + "step": 1905, + "valid_targets_mean": 3132.4, + "valid_targets_min": 635 + }, + { + "epoch": 3.1107491856677525, + "grad_norm": 0.6798831464711517, + "learning_rate": 2.722394230999082e-05, + "loss": 0.4025, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5015262365341187, + "step": 1910, + "valid_targets_mean": 7552.9, + "valid_targets_min": 635 + }, + { + "epoch": 3.1188925081433223, + "grad_norm": 0.5053761119623489, + "learning_rate": 2.7148146080742045e-05, + "loss": 0.3628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38888177275657654, + "step": 1915, + "valid_targets_mean": 5197.4, + "valid_targets_min": 889 + }, + { + "epoch": 3.1270358306188926, + "grad_norm": 0.5305667518670434, + "learning_rate": 2.7072231966195287e-05, + "loss": 0.3884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4207892417907715, + "step": 1920, + "valid_targets_mean": 4759.9, + "valid_targets_min": 850 + }, + { + "epoch": 3.1351791530944624, + "grad_norm": 0.5834175939551854, + "learning_rate": 2.6996201218305663e-05, + "loss": 0.3921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.45480918884277344, + "step": 1925, + "valid_targets_mean": 3938.6, + "valid_targets_min": 795 + }, + { + "epoch": 3.1433224755700326, + "grad_norm": 0.7658840606234408, + "learning_rate": 2.6920055090951752e-05, + "loss": 0.3809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.391740620136261, + "step": 1930, + "valid_targets_mean": 3069.9, + "valid_targets_min": 835 + }, + { + "epoch": 3.1514657980456025, + "grad_norm": 0.6174886641419374, + "learning_rate": 2.6843794839914966e-05, + "loss": 0.4036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3642096519470215, + "step": 1935, + "valid_targets_mean": 2902.2, + "valid_targets_min": 840 + }, + { + "epoch": 3.1596091205211727, + "grad_norm": 0.6665446838057973, + "learning_rate": 2.67674217228588e-05, + "loss": 0.4004, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4679354429244995, + "step": 1940, + "valid_targets_mean": 4601.4, + "valid_targets_min": 837 + }, + { + "epoch": 3.1677524429967425, + "grad_norm": 0.5719818589640558, + "learning_rate": 2.6690936999308113e-05, + "loss": 0.4189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3626285195350647, + "step": 1945, + "valid_targets_mean": 3358.7, + "valid_targets_min": 813 + }, + { + "epoch": 3.175895765472313, + "grad_norm": 0.8332468151926798, + "learning_rate": 2.6614341930628352e-05, + "loss": 0.359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35883480310440063, + "step": 1950, + "valid_targets_mean": 3696.6, + "valid_targets_min": 766 + }, + { + "epoch": 3.1840390879478826, + "grad_norm": 0.5940925293801734, + "learning_rate": 2.6537637780004722e-05, + "loss": 0.3698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3348199129104614, + "step": 1955, + "valid_targets_mean": 3463.5, + "valid_targets_min": 865 + }, + { + "epoch": 3.192182410423453, + "grad_norm": 0.5770324016928695, + "learning_rate": 2.6460825812421417e-05, + "loss": 0.446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.49566924571990967, + "step": 1960, + "valid_targets_mean": 5365.8, + "valid_targets_min": 808 + }, + { + "epoch": 3.2003257328990227, + "grad_norm": 0.6905739149759272, + "learning_rate": 2.6383907294640684e-05, + "loss": 0.3616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36633580923080444, + "step": 1965, + "valid_targets_mean": 2318.8, + "valid_targets_min": 800 + }, + { + "epoch": 3.208469055374593, + "grad_norm": 0.6650062142941536, + "learning_rate": 2.6306883495181974e-05, + "loss": 0.3718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29867032170295715, + "step": 1970, + "valid_targets_mean": 2374.9, + "valid_targets_min": 741 + }, + { + "epoch": 3.2166123778501627, + "grad_norm": 0.5421460492619593, + "learning_rate": 2.6229755684301036e-05, + "loss": 0.3891, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4494415819644928, + "step": 1975, + "valid_targets_mean": 6205.4, + "valid_targets_min": 905 + }, + { + "epoch": 3.224755700325733, + "grad_norm": 0.5993788494954284, + "learning_rate": 2.6152525133968932e-05, + "loss": 0.3598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4457775950431824, + "step": 1980, + "valid_targets_mean": 4127.9, + "valid_targets_min": 1126 + }, + { + "epoch": 3.232899022801303, + "grad_norm": 0.5277474918584322, + "learning_rate": 2.6075193117851067e-05, + "loss": 0.39, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38143354654312134, + "step": 1985, + "valid_targets_mean": 4497.4, + "valid_targets_min": 787 + }, + { + "epoch": 3.241042345276873, + "grad_norm": 0.5346506869159616, + "learning_rate": 2.5997760911286208e-05, + "loss": 0.3772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4200761616230011, + "step": 1990, + "valid_targets_mean": 4754.2, + "valid_targets_min": 745 + }, + { + "epoch": 3.249185667752443, + "grad_norm": 0.4918966983646168, + "learning_rate": 2.5920229791265422e-05, + "loss": 0.331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29003095626831055, + "step": 1995, + "valid_targets_mean": 4002.1, + "valid_targets_min": 823 + }, + { + "epoch": 3.257328990228013, + "grad_norm": 0.5154393602314412, + "learning_rate": 2.584260103641105e-05, + "loss": 0.4067, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4466646909713745, + "step": 2000, + "valid_targets_mean": 4695.6, + "valid_targets_min": 623 + }, + { + "epoch": 3.265472312703583, + "grad_norm": 0.5915491600346998, + "learning_rate": 2.576487592695558e-05, + "loss": 0.3702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3153283894062042, + "step": 2005, + "valid_targets_mean": 3149.9, + "valid_targets_min": 860 + }, + { + "epoch": 3.273615635179153, + "grad_norm": 0.50832169492039, + "learning_rate": 2.5687055744720563e-05, + "loss": 0.3605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3015378713607788, + "step": 2010, + "valid_targets_mean": 3920.1, + "valid_targets_min": 816 + }, + { + "epoch": 3.281758957654723, + "grad_norm": 0.6849583173071804, + "learning_rate": 2.5609141773095462e-05, + "loss": 0.343, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36728227138519287, + "step": 2015, + "valid_targets_mean": 2872.2, + "valid_targets_min": 756 + }, + { + "epoch": 3.2899022801302933, + "grad_norm": 0.4536242387082828, + "learning_rate": 2.5531135297016497e-05, + "loss": 0.3946, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.47613370418548584, + "step": 2020, + "valid_targets_mean": 10156.1, + "valid_targets_min": 1106 + }, + { + "epoch": 3.298045602605863, + "grad_norm": 0.5682821928786413, + "learning_rate": 2.5453037602945438e-05, + "loss": 0.3686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3663817346096039, + "step": 2025, + "valid_targets_mean": 3717.7, + "valid_targets_min": 754 + }, + { + "epoch": 3.3061889250814334, + "grad_norm": 0.5410093392747461, + "learning_rate": 2.5374849978848395e-05, + "loss": 0.3881, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36933469772338867, + "step": 2030, + "valid_targets_mean": 4199.4, + "valid_targets_min": 967 + }, + { + "epoch": 3.314332247557003, + "grad_norm": 0.43802085053669565, + "learning_rate": 2.529657371417459e-05, + "loss": 0.344, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34438949823379517, + "step": 2035, + "valid_targets_mean": 6095.7, + "valid_targets_min": 979 + }, + { + "epoch": 3.3224755700325734, + "grad_norm": 0.5487243449932767, + "learning_rate": 2.5218210099835077e-05, + "loss": 0.3078, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3584398031234741, + "step": 2040, + "valid_targets_mean": 4636.1, + "valid_targets_min": 742 + }, + { + "epoch": 3.3306188925081432, + "grad_norm": 0.5162757881743348, + "learning_rate": 2.5139760428181453e-05, + "loss": 0.43, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3786334693431854, + "step": 2045, + "valid_targets_mean": 4361.4, + "valid_targets_min": 716 + }, + { + "epoch": 3.3387622149837135, + "grad_norm": 0.5705393855346792, + "learning_rate": 2.506122599298455e-05, + "loss": 0.3793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4032687246799469, + "step": 2050, + "valid_targets_mean": 4729.7, + "valid_targets_min": 884 + }, + { + "epoch": 3.3469055374592833, + "grad_norm": 0.5715010750667091, + "learning_rate": 2.498260808941311e-05, + "loss": 0.3312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3806294798851013, + "step": 2055, + "valid_targets_mean": 3879.2, + "valid_targets_min": 735 + }, + { + "epoch": 3.3550488599348536, + "grad_norm": 0.4714260747141006, + "learning_rate": 2.490390801401239e-05, + "loss": 0.3765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4187000095844269, + "step": 2060, + "valid_targets_mean": 7533.1, + "valid_targets_min": 820 + }, + { + "epoch": 3.3631921824104234, + "grad_norm": 0.6338807615077798, + "learning_rate": 2.4825127064682825e-05, + "loss": 0.3788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3861987590789795, + "step": 2065, + "valid_targets_mean": 3655.9, + "valid_targets_min": 899 + }, + { + "epoch": 3.3713355048859937, + "grad_norm": 0.48637191714276307, + "learning_rate": 2.4746266540658593e-05, + "loss": 0.3575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29827210307121277, + "step": 2070, + "valid_targets_mean": 3979.8, + "valid_targets_min": 917 + }, + { + "epoch": 3.3794788273615635, + "grad_norm": 0.747998364122814, + "learning_rate": 2.4667327742486185e-05, + "loss": 0.3543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.313834011554718, + "step": 2075, + "valid_targets_mean": 2125.9, + "valid_targets_min": 820 + }, + { + "epoch": 3.3876221498371337, + "grad_norm": 0.4612334097692858, + "learning_rate": 2.458831197200299e-05, + "loss": 0.3916, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4305063486099243, + "step": 2080, + "valid_targets_mean": 7388.8, + "valid_targets_min": 869 + }, + { + "epoch": 3.3957654723127035, + "grad_norm": 0.6095363960047905, + "learning_rate": 2.4509220532315783e-05, + "loss": 0.3525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2685038447380066, + "step": 2085, + "valid_targets_mean": 3337.9, + "valid_targets_min": 805 + }, + { + "epoch": 3.403908794788274, + "grad_norm": 0.7768492738209594, + "learning_rate": 2.4430054727779266e-05, + "loss": 0.3911, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24858295917510986, + "step": 2090, + "valid_targets_mean": 1771.1, + "valid_targets_min": 700 + }, + { + "epoch": 3.4120521172638436, + "grad_norm": 0.5180679830043349, + "learning_rate": 2.4350815863974546e-05, + "loss": 0.36, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2730467915534973, + "step": 2095, + "valid_targets_mean": 3542.4, + "valid_targets_min": 868 + }, + { + "epoch": 3.420195439739414, + "grad_norm": 0.5088875087524373, + "learning_rate": 2.42715052476876e-05, + "loss": 0.3497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35513824224472046, + "step": 2100, + "valid_targets_mean": 5209.9, + "valid_targets_min": 818 + }, + { + "epoch": 3.4283387622149837, + "grad_norm": 0.4531428577481038, + "learning_rate": 2.4192124186887727e-05, + "loss": 0.3459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3849831521511078, + "step": 2105, + "valid_targets_mean": 6211.0, + "valid_targets_min": 809 + }, + { + "epoch": 3.436482084690554, + "grad_norm": 0.638458428210392, + "learning_rate": 2.411267399070598e-05, + "loss": 0.3823, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25012654066085815, + "step": 2110, + "valid_targets_mean": 2193.0, + "valid_targets_min": 824 + }, + { + "epoch": 3.4446254071661238, + "grad_norm": 0.5735207355600547, + "learning_rate": 2.4033155969413585e-05, + "loss": 0.3438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3478139340877533, + "step": 2115, + "valid_targets_mean": 3078.1, + "valid_targets_min": 865 + }, + { + "epoch": 3.4527687296416936, + "grad_norm": 0.5483365820326391, + "learning_rate": 2.3953571434400296e-05, + "loss": 0.3628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3322025537490845, + "step": 2120, + "valid_targets_mean": 3534.9, + "valid_targets_min": 853 + }, + { + "epoch": 3.460912052117264, + "grad_norm": 0.5587956451547775, + "learning_rate": 2.387392169815282e-05, + "loss": 0.3635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.375313401222229, + "step": 2125, + "valid_targets_mean": 3711.8, + "valid_targets_min": 922 + }, + { + "epoch": 3.469055374592834, + "grad_norm": 0.5486783635461537, + "learning_rate": 2.3794208074233126e-05, + "loss": 0.3746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43224895000457764, + "step": 2130, + "valid_targets_mean": 4583.8, + "valid_targets_min": 771 + }, + { + "epoch": 3.477198697068404, + "grad_norm": 0.5804330625073005, + "learning_rate": 2.3714431877256807e-05, + "loss": 0.3675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35571742057800293, + "step": 2135, + "valid_targets_mean": 4026.6, + "valid_targets_min": 871 + }, + { + "epoch": 3.4853420195439737, + "grad_norm": 0.6287505205318633, + "learning_rate": 2.3634594422871406e-05, + "loss": 0.3431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3206663131713867, + "step": 2140, + "valid_targets_mean": 2721.0, + "valid_targets_min": 715 + }, + { + "epoch": 3.493485342019544, + "grad_norm": 0.689273866702449, + "learning_rate": 2.3554697027734683e-05, + "loss": 0.3975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32829296588897705, + "step": 2145, + "valid_targets_mean": 2251.5, + "valid_targets_min": 857 + }, + { + "epoch": 3.5016286644951142, + "grad_norm": 0.6899260618276054, + "learning_rate": 2.3474741009492945e-05, + "loss": 0.3648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3998834490776062, + "step": 2150, + "valid_targets_mean": 3183.8, + "valid_targets_min": 1021 + }, + { + "epoch": 3.509771986970684, + "grad_norm": 0.5770654930836119, + "learning_rate": 2.3394727686759283e-05, + "loss": 0.3473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3679700791835785, + "step": 2155, + "valid_targets_mean": 3629.1, + "valid_targets_min": 934 + }, + { + "epoch": 3.517915309446254, + "grad_norm": 0.5620538756121047, + "learning_rate": 2.331465837909185e-05, + "loss": 0.3865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.357778936624527, + "step": 2160, + "valid_targets_mean": 3530.0, + "valid_targets_min": 782 + }, + { + "epoch": 3.526058631921824, + "grad_norm": 0.5592126057425982, + "learning_rate": 2.323453440697208e-05, + "loss": 0.3122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3689309358596802, + "step": 2165, + "valid_targets_mean": 3760.3, + "valid_targets_min": 934 + }, + { + "epoch": 3.5342019543973944, + "grad_norm": 0.5660339482369962, + "learning_rate": 2.3154357091782917e-05, + "loss": 0.3824, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3940810561180115, + "step": 2170, + "valid_targets_mean": 3987.1, + "valid_targets_min": 825 + }, + { + "epoch": 3.542345276872964, + "grad_norm": 0.5527949788809973, + "learning_rate": 2.3074127755787025e-05, + "loss": 0.3483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3553178310394287, + "step": 2175, + "valid_targets_mean": 4019.5, + "valid_targets_min": 662 + }, + { + "epoch": 3.550488599348534, + "grad_norm": 0.8688838386006648, + "learning_rate": 2.2993847722104988e-05, + "loss": 0.3608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37927645444869995, + "step": 2180, + "valid_targets_mean": 4755.4, + "valid_targets_min": 740 + }, + { + "epoch": 3.5586319218241043, + "grad_norm": 0.6213071231989562, + "learning_rate": 2.2913518314693478e-05, + "loss": 0.382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35325872898101807, + "step": 2185, + "valid_targets_mean": 2924.2, + "valid_targets_min": 793 + }, + { + "epoch": 3.5667752442996745, + "grad_norm": 0.6391609782904011, + "learning_rate": 2.2833140858323418e-05, + "loss": 0.3903, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.46210145950317383, + "step": 2190, + "valid_targets_mean": 4072.6, + "valid_targets_min": 917 + }, + { + "epoch": 3.5749185667752443, + "grad_norm": 0.7136358391667674, + "learning_rate": 2.2752716678558152e-05, + "loss": 0.3696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40729600191116333, + "step": 2195, + "valid_targets_mean": 2558.4, + "valid_targets_min": 862 + }, + { + "epoch": 3.583061889250814, + "grad_norm": 0.6176960805537862, + "learning_rate": 2.267224710173157e-05, + "loss": 0.3521, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3556419610977173, + "step": 2200, + "valid_targets_mean": 3046.1, + "valid_targets_min": 882 + }, + { + "epoch": 3.5912052117263844, + "grad_norm": 0.6889044277917883, + "learning_rate": 2.259173345492624e-05, + "loss": 0.3716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30551785230636597, + "step": 2205, + "valid_targets_mean": 2162.4, + "valid_targets_min": 754 + }, + { + "epoch": 3.5993485342019547, + "grad_norm": 0.48397480178312724, + "learning_rate": 2.2511177065951516e-05, + "loss": 0.406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3165225386619568, + "step": 2210, + "valid_targets_mean": 4622.5, + "valid_targets_min": 944 + }, + { + "epoch": 3.6074918566775245, + "grad_norm": 0.6391611207356119, + "learning_rate": 2.243057926332165e-05, + "loss": 0.337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2823382616043091, + "step": 2215, + "valid_targets_mean": 2280.6, + "valid_targets_min": 758 + }, + { + "epoch": 3.6156351791530943, + "grad_norm": 0.9749960777874954, + "learning_rate": 2.234994137623386e-05, + "loss": 0.3767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42044317722320557, + "step": 2220, + "valid_targets_mean": 4867.3, + "valid_targets_min": 789 + }, + { + "epoch": 3.6237785016286646, + "grad_norm": 0.6042837760662583, + "learning_rate": 2.2269264734546462e-05, + "loss": 0.3706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4333944320678711, + "step": 2225, + "valid_targets_mean": 4063.4, + "valid_targets_min": 825 + }, + { + "epoch": 3.6319218241042344, + "grad_norm": 0.4016035969454483, + "learning_rate": 2.2188550668756855e-05, + "loss": 0.3624, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2868938446044922, + "step": 2230, + "valid_targets_mean": 6540.7, + "valid_targets_min": 837 + }, + { + "epoch": 3.6400651465798046, + "grad_norm": 0.5667099157836677, + "learning_rate": 2.2107800509979674e-05, + "loss": 0.3669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3285192847251892, + "step": 2235, + "valid_targets_mean": 3355.1, + "valid_targets_min": 869 + }, + { + "epoch": 3.6482084690553744, + "grad_norm": 0.4323226021394091, + "learning_rate": 2.2027015589924758e-05, + "loss": 0.3679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33684366941452026, + "step": 2240, + "valid_targets_mean": 5806.7, + "valid_targets_min": 1020 + }, + { + "epoch": 3.6563517915309447, + "grad_norm": 0.5058691253983775, + "learning_rate": 2.194619724087522e-05, + "loss": 0.354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3251740634441376, + "step": 2245, + "valid_targets_mean": 4143.8, + "valid_targets_min": 806 + }, + { + "epoch": 3.6644951140065145, + "grad_norm": 0.6342651064042057, + "learning_rate": 2.1865346795665493e-05, + "loss": 0.3775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40073034167289734, + "step": 2250, + "valid_targets_mean": 3272.3, + "valid_targets_min": 950 + }, + { + "epoch": 3.6726384364820848, + "grad_norm": 0.5805053845423003, + "learning_rate": 2.178446558765932e-05, + "loss": 0.3905, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34984055161476135, + "step": 2255, + "valid_targets_mean": 3679.0, + "valid_targets_min": 952 + }, + { + "epoch": 3.6807817589576546, + "grad_norm": 0.5382716801547162, + "learning_rate": 2.1703554950727775e-05, + "loss": 0.3018, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3209000527858734, + "step": 2260, + "valid_targets_mean": 3720.3, + "valid_targets_min": 825 + }, + { + "epoch": 3.688925081433225, + "grad_norm": 0.5848882238574273, + "learning_rate": 2.1622616219227267e-05, + "loss": 0.4205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41884294152259827, + "step": 2265, + "valid_targets_mean": 4028.1, + "valid_targets_min": 739 + }, + { + "epoch": 3.6970684039087947, + "grad_norm": 0.45660034188581705, + "learning_rate": 2.154165072797754e-05, + "loss": 0.3974, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3824225664138794, + "step": 2270, + "valid_targets_mean": 6237.6, + "valid_targets_min": 874 + }, + { + "epoch": 3.705211726384365, + "grad_norm": 0.5133153491863328, + "learning_rate": 2.1460659812239646e-05, + "loss": 0.4095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5106462836265564, + "step": 2275, + "valid_targets_mean": 6027.9, + "valid_targets_min": 773 + }, + { + "epoch": 3.7133550488599347, + "grad_norm": 0.6047472752778675, + "learning_rate": 2.137964480769393e-05, + "loss": 0.3774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4398422837257385, + "step": 2280, + "valid_targets_mean": 3714.4, + "valid_targets_min": 903 + }, + { + "epoch": 3.721498371335505, + "grad_norm": 0.5956408958915163, + "learning_rate": 2.129860705041801e-05, + "loss": 0.369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39330923557281494, + "step": 2285, + "valid_targets_mean": 4237.5, + "valid_targets_min": 773 + }, + { + "epoch": 3.729641693811075, + "grad_norm": 0.5049107304065991, + "learning_rate": 2.121754787686472e-05, + "loss": 0.3625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4648467004299164, + "step": 2290, + "valid_targets_mean": 6949.9, + "valid_targets_min": 1004 + }, + { + "epoch": 3.737785016286645, + "grad_norm": 0.5552226296566772, + "learning_rate": 2.1136468623840104e-05, + "loss": 0.3644, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26887446641921997, + "step": 2295, + "valid_targets_mean": 2898.2, + "valid_targets_min": 816 + }, + { + "epoch": 3.745928338762215, + "grad_norm": 0.5392088071458845, + "learning_rate": 2.105537062848134e-05, + "loss": 0.3578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.46389466524124146, + "step": 2300, + "valid_targets_mean": 4835.1, + "valid_targets_min": 1000 + }, + { + "epoch": 3.754071661237785, + "grad_norm": 0.5382331599513713, + "learning_rate": 2.0974255228234702e-05, + "loss": 0.3847, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.433110773563385, + "step": 2305, + "valid_targets_mean": 4831.1, + "valid_targets_min": 992 + }, + { + "epoch": 3.762214983713355, + "grad_norm": 0.45710225454038295, + "learning_rate": 2.089312376083351e-05, + "loss": 0.3638, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4174147844314575, + "step": 2310, + "valid_targets_mean": 8823.1, + "valid_targets_min": 844 + }, + { + "epoch": 3.770358306188925, + "grad_norm": 0.4718700370457537, + "learning_rate": 2.0811977564276036e-05, + "loss": 0.3651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3402407169342041, + "step": 2315, + "valid_targets_mean": 4897.5, + "valid_targets_min": 823 + }, + { + "epoch": 3.778501628664495, + "grad_norm": 0.6442552887791623, + "learning_rate": 2.0730817976803492e-05, + "loss": 0.303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2550942897796631, + "step": 2320, + "valid_targets_mean": 2200.6, + "valid_targets_min": 888 + }, + { + "epoch": 3.7866449511400653, + "grad_norm": 0.5493245719795031, + "learning_rate": 2.0649646336877907e-05, + "loss": 0.3611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3092440664768219, + "step": 2325, + "valid_targets_mean": 3462.5, + "valid_targets_min": 855 + }, + { + "epoch": 3.794788273615635, + "grad_norm": 0.4563563752003002, + "learning_rate": 2.056846398316008e-05, + "loss": 0.3679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39923614263534546, + "step": 2330, + "valid_targets_mean": 6884.1, + "valid_targets_min": 830 + }, + { + "epoch": 3.8029315960912053, + "grad_norm": 0.5337464610562437, + "learning_rate": 2.0487272254487508e-05, + "loss": 0.3624, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36951112747192383, + "step": 2335, + "valid_targets_mean": 4063.8, + "valid_targets_min": 1054 + }, + { + "epoch": 3.811074918566775, + "grad_norm": 0.6572892440064814, + "learning_rate": 2.0406072489852283e-05, + "loss": 0.3479, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35555851459503174, + "step": 2340, + "valid_targets_mean": 2385.8, + "valid_targets_min": 839 + }, + { + "epoch": 3.8192182410423454, + "grad_norm": 0.6374905536765376, + "learning_rate": 2.032486602837904e-05, + "loss": 0.3332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2738039493560791, + "step": 2345, + "valid_targets_mean": 2382.4, + "valid_targets_min": 753 + }, + { + "epoch": 3.8273615635179152, + "grad_norm": 0.6080594505725934, + "learning_rate": 2.0243654209302836e-05, + "loss": 0.3367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2432512491941452, + "step": 2350, + "valid_targets_mean": 2506.8, + "valid_targets_min": 938 + }, + { + "epoch": 3.8355048859934855, + "grad_norm": 0.5726803851001272, + "learning_rate": 2.0162438371947105e-05, + "loss": 0.3658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33333465456962585, + "step": 2355, + "valid_targets_mean": 3638.1, + "valid_targets_min": 925 + }, + { + "epoch": 3.8436482084690553, + "grad_norm": 0.6247296091398131, + "learning_rate": 2.0081219855701532e-05, + "loss": 0.3314, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3508688807487488, + "step": 2360, + "valid_targets_mean": 3421.2, + "valid_targets_min": 1045 + }, + { + "epoch": 3.8517915309446256, + "grad_norm": 0.485869631864777, + "learning_rate": 2e-05, + "loss": 0.3535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3471268117427826, + "step": 2365, + "valid_targets_mean": 4826.6, + "valid_targets_min": 841 + }, + { + "epoch": 3.8599348534201954, + "grad_norm": 0.5249076872532655, + "learning_rate": 1.991878014429847e-05, + "loss": 0.4272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5446373224258423, + "step": 2370, + "valid_targets_mean": 6799.1, + "valid_targets_min": 896 + }, + { + "epoch": 3.8680781758957656, + "grad_norm": 0.47371851084122385, + "learning_rate": 1.9837561628052905e-05, + "loss": 0.3939, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4418119788169861, + "step": 2375, + "valid_targets_mean": 7057.5, + "valid_targets_min": 723 + }, + { + "epoch": 3.8762214983713354, + "grad_norm": 0.6666248448760385, + "learning_rate": 1.975634579069717e-05, + "loss": 0.3679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39431995153427124, + "step": 2380, + "valid_targets_mean": 3545.8, + "valid_targets_min": 884 + }, + { + "epoch": 3.8843648208469057, + "grad_norm": 0.4468779507123536, + "learning_rate": 1.9675133971620968e-05, + "loss": 0.3858, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2512926757335663, + "step": 2385, + "valid_targets_mean": 4092.9, + "valid_targets_min": 787 + }, + { + "epoch": 3.8925081433224755, + "grad_norm": 0.5213045810460247, + "learning_rate": 1.9593927510147723e-05, + "loss": 0.4071, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42186450958251953, + "step": 2390, + "valid_targets_mean": 5043.2, + "valid_targets_min": 894 + }, + { + "epoch": 3.9006514657980453, + "grad_norm": 0.566892962182667, + "learning_rate": 1.95127277455125e-05, + "loss": 0.3387, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3200119733810425, + "step": 2395, + "valid_targets_mean": 3024.9, + "valid_targets_min": 796 + }, + { + "epoch": 3.9087947882736156, + "grad_norm": 0.650866443340574, + "learning_rate": 1.9431536016839923e-05, + "loss": 0.3805, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3626767694950104, + "step": 2400, + "valid_targets_mean": 4115.6, + "valid_targets_min": 917 + }, + { + "epoch": 3.916938110749186, + "grad_norm": 0.4677000737138607, + "learning_rate": 1.9350353663122096e-05, + "loss": 0.4042, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3708629608154297, + "step": 2405, + "valid_targets_mean": 5815.8, + "valid_targets_min": 718 + }, + { + "epoch": 3.9250814332247557, + "grad_norm": 0.7104202696995817, + "learning_rate": 1.926918202319651e-05, + "loss": 0.3252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25468599796295166, + "step": 2410, + "valid_targets_mean": 1989.3, + "valid_targets_min": 770 + }, + { + "epoch": 3.9332247557003255, + "grad_norm": 0.48233834148790106, + "learning_rate": 1.9188022435723967e-05, + "loss": 0.3281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2924768328666687, + "step": 2415, + "valid_targets_mean": 4265.2, + "valid_targets_min": 826 + }, + { + "epoch": 3.9413680781758957, + "grad_norm": 0.49498107943784647, + "learning_rate": 1.9106876239166498e-05, + "loss": 0.3863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39922454953193665, + "step": 2420, + "valid_targets_mean": 5296.6, + "valid_targets_min": 962 + }, + { + "epoch": 3.949511400651466, + "grad_norm": 0.4511756680260551, + "learning_rate": 1.90257447717653e-05, + "loss": 0.3611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.417417049407959, + "step": 2425, + "valid_targets_mean": 6489.8, + "valid_targets_min": 948 + }, + { + "epoch": 3.957654723127036, + "grad_norm": 0.5499860801642303, + "learning_rate": 1.8944629371518667e-05, + "loss": 0.3683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28530269861221313, + "step": 2430, + "valid_targets_mean": 3262.8, + "valid_targets_min": 899 + }, + { + "epoch": 3.9657980456026056, + "grad_norm": 0.5627588319608113, + "learning_rate": 1.88635313761599e-05, + "loss": 0.3525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36559420824050903, + "step": 2435, + "valid_targets_mean": 3338.1, + "valid_targets_min": 767 + }, + { + "epoch": 3.973941368078176, + "grad_norm": 0.5833176347790358, + "learning_rate": 1.8782452123135287e-05, + "loss": 0.3779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.378980815410614, + "step": 2440, + "valid_targets_mean": 3117.9, + "valid_targets_min": 804 + }, + { + "epoch": 3.982084690553746, + "grad_norm": 0.5986112154491725, + "learning_rate": 1.8701392949581998e-05, + "loss": 0.3728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3972218930721283, + "step": 2445, + "valid_targets_mean": 3437.8, + "valid_targets_min": 702 + }, + { + "epoch": 3.990228013029316, + "grad_norm": 0.5099789916162351, + "learning_rate": 1.8620355192306073e-05, + "loss": 0.3684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4507391154766083, + "step": 2450, + "valid_targets_mean": 5349.5, + "valid_targets_min": 914 + }, + { + "epoch": 3.9983713355048858, + "grad_norm": 0.6862229752460923, + "learning_rate": 1.8539340187760357e-05, + "loss": 0.3617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26892462372779846, + "step": 2455, + "valid_targets_mean": 2010.2, + "valid_targets_min": 943 + }, + { + "epoch": 4.006514657980456, + "grad_norm": 0.6554792334349541, + "learning_rate": 1.845834927202246e-05, + "loss": 0.3809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3444823622703552, + "step": 2460, + "valid_targets_mean": 2406.3, + "valid_targets_min": 735 + }, + { + "epoch": 4.014657980456026, + "grad_norm": 0.38130273267096393, + "learning_rate": 1.8377383780772733e-05, + "loss": 0.3895, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3942192792892456, + "step": 2465, + "valid_targets_mean": 9486.0, + "valid_targets_min": 735 + }, + { + "epoch": 4.022801302931596, + "grad_norm": 0.5369537583353431, + "learning_rate": 1.8296445049272228e-05, + "loss": 0.3683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32799696922302246, + "step": 2470, + "valid_targets_mean": 3973.9, + "valid_targets_min": 654 + }, + { + "epoch": 4.030944625407166, + "grad_norm": 0.5777556136594723, + "learning_rate": 1.8215534412340682e-05, + "loss": 0.3489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22816243767738342, + "step": 2475, + "valid_targets_mean": 3055.5, + "valid_targets_min": 1003 + }, + { + "epoch": 4.039087947882736, + "grad_norm": 0.5999726914106774, + "learning_rate": 1.813465320433451e-05, + "loss": 0.3615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3062085211277008, + "step": 2480, + "valid_targets_mean": 3035.2, + "valid_targets_min": 1056 + }, + { + "epoch": 4.047231270358306, + "grad_norm": 0.49985499861592736, + "learning_rate": 1.805380275912478e-05, + "loss": 0.378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29686933755874634, + "step": 2485, + "valid_targets_mean": 4693.1, + "valid_targets_min": 802 + }, + { + "epoch": 4.055374592833876, + "grad_norm": 0.47107497958098904, + "learning_rate": 1.797298441007525e-05, + "loss": 0.3656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37165796756744385, + "step": 2490, + "valid_targets_mean": 7287.9, + "valid_targets_min": 792 + }, + { + "epoch": 4.063517915309446, + "grad_norm": 0.5017893125921581, + "learning_rate": 1.789219949002033e-05, + "loss": 0.4009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4540955424308777, + "step": 2495, + "valid_targets_mean": 6126.5, + "valid_targets_min": 774 + }, + { + "epoch": 4.071661237785016, + "grad_norm": 0.6001807753499669, + "learning_rate": 1.781144933124314e-05, + "loss": 0.3374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43817412853240967, + "step": 2500, + "valid_targets_mean": 4212.2, + "valid_targets_min": 816 + }, + { + "epoch": 4.079804560260587, + "grad_norm": 0.5113428995213198, + "learning_rate": 1.773073526545354e-05, + "loss": 0.3497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3754735589027405, + "step": 2505, + "valid_targets_mean": 4996.5, + "valid_targets_min": 788 + }, + { + "epoch": 4.087947882736156, + "grad_norm": 0.515506101758126, + "learning_rate": 1.765005862376614e-05, + "loss": 0.3389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3578193485736847, + "step": 2510, + "valid_targets_mean": 5825.2, + "valid_targets_min": 858 + }, + { + "epoch": 4.096091205211726, + "grad_norm": 0.8105498272394117, + "learning_rate": 1.7569420736678354e-05, + "loss": 0.2533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3060300052165985, + "step": 2515, + "valid_targets_mean": 3246.7, + "valid_targets_min": 900 + }, + { + "epoch": 4.1042345276872965, + "grad_norm": 0.5004595156511767, + "learning_rate": 1.7488822934048487e-05, + "loss": 0.4047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4213087260723114, + "step": 2520, + "valid_targets_mean": 5423.6, + "valid_targets_min": 890 + }, + { + "epoch": 4.112377850162867, + "grad_norm": 0.6326666996495912, + "learning_rate": 1.740826654507376e-05, + "loss": 0.3021, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2622280716896057, + "step": 2525, + "valid_targets_mean": 2403.1, + "valid_targets_min": 867 + }, + { + "epoch": 4.120521172638436, + "grad_norm": 0.593285516487315, + "learning_rate": 1.7327752898268438e-05, + "loss": 0.3587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35973063111305237, + "step": 2530, + "valid_targets_mean": 3453.0, + "valid_targets_min": 826 + }, + { + "epoch": 4.128664495114006, + "grad_norm": 0.6093561589121673, + "learning_rate": 1.7247283321441858e-05, + "loss": 0.3144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3643072843551636, + "step": 2535, + "valid_targets_mean": 3707.2, + "valid_targets_min": 943 + }, + { + "epoch": 4.136807817589577, + "grad_norm": 0.5799195648126669, + "learning_rate": 1.7166859141676592e-05, + "loss": 0.3617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4091092348098755, + "step": 2540, + "valid_targets_mean": 4417.6, + "valid_targets_min": 925 + }, + { + "epoch": 4.144951140065147, + "grad_norm": 0.4936683373775205, + "learning_rate": 1.7086481685306532e-05, + "loss": 0.3514, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3667007088661194, + "step": 2545, + "valid_targets_mean": 5730.0, + "valid_targets_min": 857 + }, + { + "epoch": 4.153094462540716, + "grad_norm": 0.5018125899140504, + "learning_rate": 1.700615227789502e-05, + "loss": 0.2997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2875678539276123, + "step": 2550, + "valid_targets_mean": 4319.4, + "valid_targets_min": 823 + }, + { + "epoch": 4.1612377850162865, + "grad_norm": 0.5747343183072302, + "learning_rate": 1.692587224421298e-05, + "loss": 0.3386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34501761198043823, + "step": 2555, + "valid_targets_mean": 3751.1, + "valid_targets_min": 858 + }, + { + "epoch": 4.169381107491857, + "grad_norm": 0.5784995221148913, + "learning_rate": 1.6845642908217093e-05, + "loss": 0.3738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42867255210876465, + "step": 2560, + "valid_targets_mean": 3921.6, + "valid_targets_min": 787 + }, + { + "epoch": 4.177524429967427, + "grad_norm": 0.5410448534928791, + "learning_rate": 1.676546559302793e-05, + "loss": 0.3919, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37978121638298035, + "step": 2565, + "valid_targets_mean": 4277.4, + "valid_targets_min": 879 + }, + { + "epoch": 4.185667752442996, + "grad_norm": 0.5677422084260004, + "learning_rate": 1.668534162090816e-05, + "loss": 0.3513, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3591586649417877, + "step": 2570, + "valid_targets_mean": 3989.4, + "valid_targets_min": 737 + }, + { + "epoch": 4.193811074918567, + "grad_norm": 0.6991266472154859, + "learning_rate": 1.6605272313240724e-05, + "loss": 0.3778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2696525752544403, + "step": 2575, + "valid_targets_mean": 2198.5, + "valid_targets_min": 774 + }, + { + "epoch": 4.201954397394137, + "grad_norm": 0.6275268813161536, + "learning_rate": 1.652525899050707e-05, + "loss": 0.3591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40018337965011597, + "step": 2580, + "valid_targets_mean": 3200.4, + "valid_targets_min": 635 + }, + { + "epoch": 4.210097719869707, + "grad_norm": 0.6088464201211283, + "learning_rate": 1.6445302972265327e-05, + "loss": 0.379, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4248189926147461, + "step": 2585, + "valid_targets_mean": 3765.9, + "valid_targets_min": 1036 + }, + { + "epoch": 4.2182410423452765, + "grad_norm": 0.6281379723327402, + "learning_rate": 1.6365405577128607e-05, + "loss": 0.3669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3308522701263428, + "step": 2590, + "valid_targets_mean": 3075.9, + "valid_targets_min": 873 + }, + { + "epoch": 4.226384364820847, + "grad_norm": 0.5004863121765238, + "learning_rate": 1.6285568122743197e-05, + "loss": 0.3284, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32210153341293335, + "step": 2595, + "valid_targets_mean": 4832.1, + "valid_targets_min": 781 + }, + { + "epoch": 4.234527687296417, + "grad_norm": 1.363629275333219, + "learning_rate": 1.620579192576688e-05, + "loss": 0.3367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3683038055896759, + "step": 2600, + "valid_targets_mean": 3934.5, + "valid_targets_min": 858 + }, + { + "epoch": 4.242671009771987, + "grad_norm": 0.5780162013564789, + "learning_rate": 1.612607830184719e-05, + "loss": 0.3723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3626374900341034, + "step": 2605, + "valid_targets_mean": 4513.6, + "valid_targets_min": 774 + }, + { + "epoch": 4.250814332247557, + "grad_norm": 0.4821901389741692, + "learning_rate": 1.604642856559971e-05, + "loss": 0.3582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4471210539340973, + "step": 2610, + "valid_targets_mean": 7102.8, + "valid_targets_min": 820 + }, + { + "epoch": 4.258957654723127, + "grad_norm": 0.6182077639364346, + "learning_rate": 1.5966844030586422e-05, + "loss": 0.3915, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41703593730926514, + "step": 2615, + "valid_targets_mean": 3855.7, + "valid_targets_min": 750 + }, + { + "epoch": 4.267100977198697, + "grad_norm": 0.49554539098762873, + "learning_rate": 1.5887326009294026e-05, + "loss": 0.3852, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3934766352176666, + "step": 2620, + "valid_targets_mean": 6170.2, + "valid_targets_min": 823 + }, + { + "epoch": 4.2752442996742674, + "grad_norm": 0.5533100442506653, + "learning_rate": 1.5807875813112283e-05, + "loss": 0.3632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41659030318260193, + "step": 2625, + "valid_targets_mean": 4296.0, + "valid_targets_min": 837 + }, + { + "epoch": 4.283387622149837, + "grad_norm": 0.5213896665793853, + "learning_rate": 1.5728494752312408e-05, + "loss": 0.3557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33601921796798706, + "step": 2630, + "valid_targets_mean": 4276.9, + "valid_targets_min": 863 + }, + { + "epoch": 4.291530944625407, + "grad_norm": 0.6186580144744267, + "learning_rate": 1.564918413602546e-05, + "loss": 0.3425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3147323727607727, + "step": 2635, + "valid_targets_mean": 2824.4, + "valid_targets_min": 874 + }, + { + "epoch": 4.299674267100977, + "grad_norm": 0.6661677111673004, + "learning_rate": 1.5569945272220737e-05, + "loss": 0.3737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4251142740249634, + "step": 2640, + "valid_targets_mean": 3124.2, + "valid_targets_min": 630 + }, + { + "epoch": 4.307817589576548, + "grad_norm": 0.5401451417716396, + "learning_rate": 1.5490779467684224e-05, + "loss": 0.3725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.451515257358551, + "step": 2645, + "valid_targets_mean": 5181.9, + "valid_targets_min": 992 + }, + { + "epoch": 4.315960912052117, + "grad_norm": 0.6963146043629623, + "learning_rate": 1.5411688027997015e-05, + "loss": 0.3554, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3456081748008728, + "step": 2650, + "valid_targets_mean": 2512.6, + "valid_targets_min": 850 + }, + { + "epoch": 4.324104234527687, + "grad_norm": 0.679114057024846, + "learning_rate": 1.533267225751382e-05, + "loss": 0.361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3229619860649109, + "step": 2655, + "valid_targets_mean": 3072.0, + "valid_targets_min": 745 + }, + { + "epoch": 4.3322475570032575, + "grad_norm": 0.5923109370009395, + "learning_rate": 1.5253733459341415e-05, + "loss": 0.3724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.394045352935791, + "step": 2660, + "valid_targets_mean": 3837.3, + "valid_targets_min": 700 + }, + { + "epoch": 4.340390879478828, + "grad_norm": 0.4583492246706972, + "learning_rate": 1.517487293531718e-05, + "loss": 0.3467, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31510335206985474, + "step": 2665, + "valid_targets_mean": 5464.9, + "valid_targets_min": 774 + }, + { + "epoch": 4.348534201954397, + "grad_norm": 0.652771068582801, + "learning_rate": 1.5096091985987616e-05, + "loss": 0.3819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28531715273857117, + "step": 2670, + "valid_targets_mean": 2867.5, + "valid_targets_min": 861 + }, + { + "epoch": 4.356677524429967, + "grad_norm": 0.6473926873227939, + "learning_rate": 1.50173919105869e-05, + "loss": 0.3708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4054400324821472, + "step": 2675, + "valid_targets_mean": 3225.4, + "valid_targets_min": 893 + }, + { + "epoch": 4.364820846905538, + "grad_norm": 0.6110011338130815, + "learning_rate": 1.4938774007015458e-05, + "loss": 0.3865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3898206055164337, + "step": 2680, + "valid_targets_mean": 3528.0, + "valid_targets_min": 870 + }, + { + "epoch": 4.372964169381108, + "grad_norm": 0.6058835379874294, + "learning_rate": 1.4860239571818555e-05, + "loss": 0.3333, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40088576078414917, + "step": 2685, + "valid_targets_mean": 3550.5, + "valid_targets_min": 826 + }, + { + "epoch": 4.381107491856677, + "grad_norm": 0.5484305538475023, + "learning_rate": 1.4781789900164932e-05, + "loss": 0.3537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3369537889957428, + "step": 2690, + "valid_targets_mean": 3933.1, + "valid_targets_min": 795 + }, + { + "epoch": 4.3892508143322475, + "grad_norm": 0.5524540774232479, + "learning_rate": 1.4703426285825415e-05, + "loss": 0.3568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43001753091812134, + "step": 2695, + "valid_targets_mean": 4829.5, + "valid_targets_min": 706 + }, + { + "epoch": 4.397394136807818, + "grad_norm": 0.5444981151848481, + "learning_rate": 1.4625150021151609e-05, + "loss": 0.3738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4166078269481659, + "step": 2700, + "valid_targets_mean": 4530.8, + "valid_targets_min": 857 + }, + { + "epoch": 4.405537459283388, + "grad_norm": 0.5892586939304584, + "learning_rate": 1.4546962397054572e-05, + "loss": 0.3654, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3994485139846802, + "step": 2705, + "valid_targets_mean": 4350.1, + "valid_targets_min": 939 + }, + { + "epoch": 4.413680781758957, + "grad_norm": 0.572209424813564, + "learning_rate": 1.446886470298351e-05, + "loss": 0.3749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3160708546638489, + "step": 2710, + "valid_targets_mean": 3239.8, + "valid_targets_min": 866 + }, + { + "epoch": 4.421824104234528, + "grad_norm": 0.6377571557702658, + "learning_rate": 1.4390858226904543e-05, + "loss": 0.3742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3196048438549042, + "step": 2715, + "valid_targets_mean": 2932.3, + "valid_targets_min": 873 + }, + { + "epoch": 4.429967426710098, + "grad_norm": 0.6259372401710589, + "learning_rate": 1.4312944255279444e-05, + "loss": 0.3499, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.366549551486969, + "step": 2720, + "valid_targets_mean": 3186.7, + "valid_targets_min": 917 + }, + { + "epoch": 4.438110749185668, + "grad_norm": 0.4952418249133253, + "learning_rate": 1.4235124073044426e-05, + "loss": 0.3737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.310574471950531, + "step": 2725, + "valid_targets_mean": 4698.6, + "valid_targets_min": 1018 + }, + { + "epoch": 4.4462540716612375, + "grad_norm": 0.5254026137779945, + "learning_rate": 1.4157398963588955e-05, + "loss": 0.3695, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40291064977645874, + "step": 2730, + "valid_targets_mean": 5185.2, + "valid_targets_min": 685 + }, + { + "epoch": 4.454397394136808, + "grad_norm": 0.6019900089931389, + "learning_rate": 1.407977020873458e-05, + "loss": 0.3564, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4070732593536377, + "step": 2735, + "valid_targets_mean": 3613.7, + "valid_targets_min": 879 + }, + { + "epoch": 4.462540716612378, + "grad_norm": 0.4962581182352448, + "learning_rate": 1.40022390887138e-05, + "loss": 0.3713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3233620524406433, + "step": 2740, + "valid_targets_mean": 4877.0, + "valid_targets_min": 850 + }, + { + "epoch": 4.470684039087947, + "grad_norm": 0.6632293750645901, + "learning_rate": 1.392480688214894e-05, + "loss": 0.2997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.276858389377594, + "step": 2745, + "valid_targets_mean": 2247.4, + "valid_targets_min": 850 + }, + { + "epoch": 4.478827361563518, + "grad_norm": 0.7750509672351025, + "learning_rate": 1.3847474866031073e-05, + "loss": 0.3075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2583996653556824, + "step": 2750, + "valid_targets_mean": 2897.9, + "valid_targets_min": 1034 + }, + { + "epoch": 4.486970684039088, + "grad_norm": 0.5358972607557195, + "learning_rate": 1.3770244315698969e-05, + "loss": 0.3544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41334599256515503, + "step": 2755, + "valid_targets_mean": 4873.2, + "valid_targets_min": 948 + }, + { + "epoch": 4.495114006514658, + "grad_norm": 0.5680012204572885, + "learning_rate": 1.369311650481803e-05, + "loss": 0.3337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33305543661117554, + "step": 2760, + "valid_targets_mean": 4350.2, + "valid_targets_min": 832 + }, + { + "epoch": 4.5032573289902285, + "grad_norm": 0.5449267630276484, + "learning_rate": 1.3616092705359326e-05, + "loss": 0.3583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34392380714416504, + "step": 2765, + "valid_targets_mean": 4001.8, + "valid_targets_min": 1109 + }, + { + "epoch": 4.511400651465798, + "grad_norm": 0.5604889202130752, + "learning_rate": 1.3539174187578588e-05, + "loss": 0.3643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33320653438568115, + "step": 2770, + "valid_targets_mean": 3762.9, + "valid_targets_min": 861 + }, + { + "epoch": 4.519543973941368, + "grad_norm": 0.4843390725889363, + "learning_rate": 1.3462362219995278e-05, + "loss": 0.3619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.346549928188324, + "step": 2775, + "valid_targets_mean": 5384.7, + "valid_targets_min": 871 + }, + { + "epoch": 4.527687296416938, + "grad_norm": 0.6348806946189841, + "learning_rate": 1.3385658069371652e-05, + "loss": 0.3048, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3165673315525055, + "step": 2780, + "valid_targets_mean": 3335.6, + "valid_targets_min": 860 + }, + { + "epoch": 4.535830618892508, + "grad_norm": 0.644665933890569, + "learning_rate": 1.3309063000691887e-05, + "loss": 0.3827, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3510773181915283, + "step": 2785, + "valid_targets_mean": 3066.2, + "valid_targets_min": 1012 + }, + { + "epoch": 4.543973941368078, + "grad_norm": 0.6090177690595272, + "learning_rate": 1.3232578277141202e-05, + "loss": 0.3398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3894731402397156, + "step": 2790, + "valid_targets_mean": 3558.1, + "valid_targets_min": 892 + }, + { + "epoch": 4.552117263843648, + "grad_norm": 0.609260968763469, + "learning_rate": 1.3156205160085036e-05, + "loss": 0.3426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33815351128578186, + "step": 2795, + "valid_targets_mean": 3133.1, + "valid_targets_min": 737 + }, + { + "epoch": 4.5602605863192185, + "grad_norm": 0.5997708594541221, + "learning_rate": 1.3079944909048253e-05, + "loss": 0.3719, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4252645969390869, + "step": 2800, + "valid_targets_mean": 4810.9, + "valid_targets_min": 952 + }, + { + "epoch": 4.568403908794789, + "grad_norm": 0.5459128101448909, + "learning_rate": 1.300379878169434e-05, + "loss": 0.3291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39307522773742676, + "step": 2805, + "valid_targets_mean": 4775.3, + "valid_targets_min": 894 + }, + { + "epoch": 4.576547231270358, + "grad_norm": 0.6994505854755723, + "learning_rate": 1.2927768033804711e-05, + "loss": 0.3, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23931142687797546, + "step": 2810, + "valid_targets_mean": 2075.6, + "valid_targets_min": 886 + }, + { + "epoch": 4.584690553745928, + "grad_norm": 0.6052007183392397, + "learning_rate": 1.2851853919257955e-05, + "loss": 0.3658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31525593996047974, + "step": 2815, + "valid_targets_mean": 3294.6, + "valid_targets_min": 849 + }, + { + "epoch": 4.592833876221499, + "grad_norm": 0.5220887200952162, + "learning_rate": 1.2776057690009185e-05, + "loss": 0.3461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3467927575111389, + "step": 2820, + "valid_targets_mean": 4955.8, + "valid_targets_min": 763 + }, + { + "epoch": 4.600977198697068, + "grad_norm": 0.5262166216131438, + "learning_rate": 1.2700380596069377e-05, + "loss": 0.3106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3941749334335327, + "step": 2825, + "valid_targets_mean": 4788.2, + "valid_targets_min": 772 + }, + { + "epoch": 4.609120521172638, + "grad_norm": 0.5275890905962088, + "learning_rate": 1.2624823885484765e-05, + "loss": 0.354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38075023889541626, + "step": 2830, + "valid_targets_mean": 5212.9, + "valid_targets_min": 959 + }, + { + "epoch": 4.6172638436482085, + "grad_norm": 0.6532523569123814, + "learning_rate": 1.2549388804316247e-05, + "loss": 0.3166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3309089243412018, + "step": 2835, + "valid_targets_mean": 2952.9, + "valid_targets_min": 897 + }, + { + "epoch": 4.625407166123779, + "grad_norm": 0.5186190439548938, + "learning_rate": 1.2474076596618843e-05, + "loss": 0.3682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3452737331390381, + "step": 2840, + "valid_targets_mean": 4351.3, + "valid_targets_min": 1044 + }, + { + "epoch": 4.633550488599348, + "grad_norm": 0.7269871741074836, + "learning_rate": 1.2398888504421181e-05, + "loss": 0.3466, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2606259882450104, + "step": 2845, + "valid_targets_mean": 1825.9, + "valid_targets_min": 751 + }, + { + "epoch": 4.641693811074918, + "grad_norm": 0.5673919247857871, + "learning_rate": 1.232382576770501e-05, + "loss": 0.3948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31079190969467163, + "step": 2850, + "valid_targets_mean": 3317.1, + "valid_targets_min": 1005 + }, + { + "epoch": 4.649837133550489, + "grad_norm": 0.50919326514204, + "learning_rate": 1.2248889624384734e-05, + "loss": 0.366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3224192261695862, + "step": 2855, + "valid_targets_mean": 4459.4, + "valid_targets_min": 943 + }, + { + "epoch": 4.657980456026059, + "grad_norm": 0.4784877275054749, + "learning_rate": 1.2174081310287034e-05, + "loss": 0.3315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29432088136672974, + "step": 2860, + "valid_targets_mean": 5240.3, + "valid_targets_min": 933 + }, + { + "epoch": 4.666123778501628, + "grad_norm": 0.6959652610033052, + "learning_rate": 1.2099402059130454e-05, + "loss": 0.3304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28155753016471863, + "step": 2865, + "valid_targets_mean": 2573.4, + "valid_targets_min": 682 + }, + { + "epoch": 4.6742671009771986, + "grad_norm": 0.48316499897271725, + "learning_rate": 1.2024853102505077e-05, + "loss": 0.3453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3847194314002991, + "step": 2870, + "valid_targets_mean": 5275.1, + "valid_targets_min": 1045 + }, + { + "epoch": 4.682410423452769, + "grad_norm": 0.6465128405449747, + "learning_rate": 1.1950435669852187e-05, + "loss": 0.343, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40625742077827454, + "step": 2875, + "valid_targets_mean": 3250.8, + "valid_targets_min": 757 + }, + { + "epoch": 4.690553745928339, + "grad_norm": 0.68939242366872, + "learning_rate": 1.1876150988444027e-05, + "loss": 0.3028, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3121936619281769, + "step": 2880, + "valid_targets_mean": 2536.4, + "valid_targets_min": 716 + }, + { + "epoch": 4.698697068403908, + "grad_norm": 0.6541692778246158, + "learning_rate": 1.180200028336353e-05, + "loss": 0.4127, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41360142827033997, + "step": 2885, + "valid_targets_mean": 3265.6, + "valid_targets_min": 907 + }, + { + "epoch": 4.706840390879479, + "grad_norm": 0.6897917929222491, + "learning_rate": 1.1727984777484136e-05, + "loss": 0.3305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3353661596775055, + "step": 2890, + "valid_targets_mean": 2544.9, + "valid_targets_min": 751 + }, + { + "epoch": 4.714983713355049, + "grad_norm": 0.48951240342927677, + "learning_rate": 1.1654105691449615e-05, + "loss": 0.3458, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41796308755874634, + "step": 2895, + "valid_targets_mean": 5841.2, + "valid_targets_min": 821 + }, + { + "epoch": 4.723127035830619, + "grad_norm": 0.4834549353001191, + "learning_rate": 1.1580364243653937e-05, + "loss": 0.3461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4209936261177063, + "step": 2900, + "valid_targets_mean": 5993.5, + "valid_targets_min": 795 + }, + { + "epoch": 4.731270358306189, + "grad_norm": 0.5328260823927892, + "learning_rate": 1.1506761650221167e-05, + "loss": 0.383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42982587218284607, + "step": 2905, + "valid_targets_mean": 4767.3, + "valid_targets_min": 837 + }, + { + "epoch": 4.739413680781759, + "grad_norm": 0.5307377810552082, + "learning_rate": 1.143329912498545e-05, + "loss": 0.3261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2705893814563751, + "step": 2910, + "valid_targets_mean": 3511.1, + "valid_targets_min": 842 + }, + { + "epoch": 4.747557003257329, + "grad_norm": 0.5146926980000055, + "learning_rate": 1.1359977879470923e-05, + "loss": 0.3703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2976660132408142, + "step": 2915, + "valid_targets_mean": 4355.8, + "valid_targets_min": 831 + }, + { + "epoch": 4.755700325732899, + "grad_norm": 0.45666030318301937, + "learning_rate": 1.1286799122871823e-05, + "loss": 0.3667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3741254508495331, + "step": 2920, + "valid_targets_mean": 6491.5, + "valid_targets_min": 935 + }, + { + "epoch": 4.763843648208469, + "grad_norm": 0.579002611463936, + "learning_rate": 1.1213764062032455e-05, + "loss": 0.3516, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34902578592300415, + "step": 2925, + "valid_targets_mean": 3574.4, + "valid_targets_min": 721 + }, + { + "epoch": 4.771986970684039, + "grad_norm": 0.5159466625390355, + "learning_rate": 1.114087390142737e-05, + "loss": 0.3403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3471105992794037, + "step": 2930, + "valid_targets_mean": 4713.9, + "valid_targets_min": 869 + }, + { + "epoch": 4.780130293159609, + "grad_norm": 0.413255058784331, + "learning_rate": 1.1068129843141425e-05, + "loss": 0.352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33599501848220825, + "step": 2935, + "valid_targets_mean": 6388.8, + "valid_targets_min": 723 + }, + { + "epoch": 4.7882736156351795, + "grad_norm": 0.5550314069361483, + "learning_rate": 1.0995533086850037e-05, + "loss": 0.3746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.342750608921051, + "step": 2940, + "valid_targets_mean": 3869.1, + "valid_targets_min": 834 + }, + { + "epoch": 4.796416938110749, + "grad_norm": 0.47667239514711435, + "learning_rate": 1.092308482979933e-05, + "loss": 0.3754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34755033254623413, + "step": 2945, + "valid_targets_mean": 5154.2, + "valid_targets_min": 948 + }, + { + "epoch": 4.804560260586319, + "grad_norm": 0.5476792186819857, + "learning_rate": 1.0850786266786418e-05, + "loss": 0.3809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34450531005859375, + "step": 2950, + "valid_targets_mean": 3824.6, + "valid_targets_min": 871 + }, + { + "epoch": 4.812703583061889, + "grad_norm": 0.5368229186855042, + "learning_rate": 1.0778638590139722e-05, + "loss": 0.3644, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40777623653411865, + "step": 2955, + "valid_targets_mean": 4317.0, + "valid_targets_min": 928 + }, + { + "epoch": 4.82084690553746, + "grad_norm": 0.6105238600426505, + "learning_rate": 1.0706642989699266e-05, + "loss": 0.3554, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28248894214630127, + "step": 2960, + "valid_targets_mean": 2900.4, + "valid_targets_min": 890 + }, + { + "epoch": 4.828990228013029, + "grad_norm": 1.6990019654351358, + "learning_rate": 1.0634800652797093e-05, + "loss": 0.3497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2146090567111969, + "step": 2965, + "valid_targets_mean": 1829.7, + "valid_targets_min": 862 + }, + { + "epoch": 4.837133550488599, + "grad_norm": 0.6197368207246012, + "learning_rate": 1.056311276423764e-05, + "loss": 0.3799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.386025995016098, + "step": 2970, + "valid_targets_mean": 3909.1, + "valid_targets_min": 964 + }, + { + "epoch": 4.8452768729641695, + "grad_norm": 0.5227825878449366, + "learning_rate": 1.049158050627825e-05, + "loss": 0.3061, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32775095105171204, + "step": 2975, + "valid_targets_mean": 3848.9, + "valid_targets_min": 855 + }, + { + "epoch": 4.85342019543974, + "grad_norm": 0.9141598301510183, + "learning_rate": 1.0420205058609624e-05, + "loss": 0.3084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26486915349960327, + "step": 2980, + "valid_targets_mean": 2203.0, + "valid_targets_min": 825 + }, + { + "epoch": 4.861563517915309, + "grad_norm": 0.48680109983999337, + "learning_rate": 1.0348987598336418e-05, + "loss": 0.3656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33496737480163574, + "step": 2985, + "valid_targets_mean": 4809.6, + "valid_targets_min": 787 + }, + { + "epoch": 4.869706840390879, + "grad_norm": 0.5497742748622828, + "learning_rate": 1.0277929299957768e-05, + "loss": 0.4052, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42894837260246277, + "step": 2990, + "valid_targets_mean": 4440.6, + "valid_targets_min": 821 + }, + { + "epoch": 4.87785016286645, + "grad_norm": 0.5594469426004747, + "learning_rate": 1.0207031335347986e-05, + "loss": 0.3902, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.361088365316391, + "step": 2995, + "valid_targets_mean": 3951.8, + "valid_targets_min": 809 + }, + { + "epoch": 4.88599348534202, + "grad_norm": 0.550863746758289, + "learning_rate": 1.0136294873737187e-05, + "loss": 0.3483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3585723638534546, + "step": 3000, + "valid_targets_mean": 4956.7, + "valid_targets_min": 884 + }, + { + "epoch": 4.894136807817589, + "grad_norm": 0.5523064205916566, + "learning_rate": 1.006572108169202e-05, + "loss": 0.3803, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3387215733528137, + "step": 3005, + "valid_targets_mean": 3964.2, + "valid_targets_min": 889 + }, + { + "epoch": 4.90228013029316, + "grad_norm": 0.6475318364014226, + "learning_rate": 9.995311123096443e-06, + "loss": 0.3914, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43229687213897705, + "step": 3010, + "valid_targets_mean": 3220.9, + "valid_targets_min": 877 + }, + { + "epoch": 4.91042345276873, + "grad_norm": 0.554158760011115, + "learning_rate": 9.9250661591325e-06, + "loss": 0.3467, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3512037396430969, + "step": 3015, + "valid_targets_mean": 4529.6, + "valid_targets_min": 789 + }, + { + "epoch": 4.918566775244299, + "grad_norm": 0.42652287821524, + "learning_rate": 9.854987348261214e-06, + "loss": 0.3953, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42916154861450195, + "step": 3020, + "valid_targets_mean": 7950.4, + "valid_targets_min": 852 + }, + { + "epoch": 4.9267100977198695, + "grad_norm": 0.6740149799151485, + "learning_rate": 9.785075846203425e-06, + "loss": 0.3738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3933200240135193, + "step": 3025, + "valid_targets_mean": 3559.9, + "valid_targets_min": 712 + }, + { + "epoch": 4.93485342019544, + "grad_norm": 0.5658445464125015, + "learning_rate": 9.715332805920788e-06, + "loss": 0.3539, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3760247826576233, + "step": 3030, + "valid_targets_mean": 4014.9, + "valid_targets_min": 714 + }, + { + "epoch": 4.94299674267101, + "grad_norm": 0.7237908520088671, + "learning_rate": 9.645759377596708e-06, + "loss": 0.3135, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22529226541519165, + "step": 3035, + "valid_targets_mean": 1762.6, + "valid_targets_min": 964 + }, + { + "epoch": 4.95114006514658, + "grad_norm": 0.4813004451625151, + "learning_rate": 9.576356708617425e-06, + "loss": 0.3441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30827683210372925, + "step": 3040, + "valid_targets_mean": 4904.4, + "valid_targets_min": 824 + }, + { + "epoch": 4.95928338762215, + "grad_norm": 0.5172695101740169, + "learning_rate": 9.507125943553034e-06, + "loss": 0.355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36669835448265076, + "step": 3045, + "valid_targets_mean": 5527.7, + "valid_targets_min": 918 + }, + { + "epoch": 4.96742671009772, + "grad_norm": 0.6644843142266282, + "learning_rate": 9.43806822413864e-06, + "loss": 0.3785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30082109570503235, + "step": 3050, + "valid_targets_mean": 2617.1, + "valid_targets_min": 678 + }, + { + "epoch": 4.97557003257329, + "grad_norm": 1.0327945886036216, + "learning_rate": 9.369184689255545e-06, + "loss": 0.3741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3290201425552368, + "step": 3055, + "valid_targets_mean": 4114.0, + "valid_targets_min": 961 + }, + { + "epoch": 4.9837133550488595, + "grad_norm": 0.595012423193659, + "learning_rate": 9.300476474912428e-06, + "loss": 0.3035, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3585374057292938, + "step": 3060, + "valid_targets_mean": 3532.6, + "valid_targets_min": 837 + }, + { + "epoch": 4.99185667752443, + "grad_norm": 0.6195064890779589, + "learning_rate": 9.231944714226646e-06, + "loss": 0.3656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25184592604637146, + "step": 3065, + "valid_targets_mean": 2616.6, + "valid_targets_min": 733 + }, + { + "epoch": 5.0, + "grad_norm": 0.6807431933801542, + "learning_rate": 9.163590537405502e-06, + "loss": 0.3347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3455483317375183, + "step": 3070, + "valid_targets_mean": 2808.8, + "valid_targets_min": 843 + }, + { + "epoch": 5.00814332247557, + "grad_norm": 0.5812582883480483, + "learning_rate": 9.095415071727674e-06, + "loss": 0.3009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31391996145248413, + "step": 3075, + "valid_targets_mean": 3478.0, + "valid_targets_min": 788 + }, + { + "epoch": 5.01628664495114, + "grad_norm": 0.6177706124934579, + "learning_rate": 9.02741944152454e-06, + "loss": 0.3749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28202974796295166, + "step": 3080, + "valid_targets_mean": 2610.4, + "valid_targets_min": 775 + }, + { + "epoch": 5.02442996742671, + "grad_norm": 0.6050619620755082, + "learning_rate": 8.959604768161716e-06, + "loss": 0.3235, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33545809984207153, + "step": 3085, + "valid_targets_mean": 3396.2, + "valid_targets_min": 805 + }, + { + "epoch": 5.03257328990228, + "grad_norm": 0.4968364436658486, + "learning_rate": 8.89197217002051e-06, + "loss": 0.3699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3799287676811218, + "step": 3090, + "valid_targets_mean": 5570.4, + "valid_targets_min": 808 + }, + { + "epoch": 5.04071661237785, + "grad_norm": 0.5503352601390993, + "learning_rate": 8.824522762479484e-06, + "loss": 0.3387, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36172720789909363, + "step": 3095, + "valid_targets_mean": 4376.9, + "valid_targets_min": 905 + }, + { + "epoch": 5.04885993485342, + "grad_norm": 0.8087037268800001, + "learning_rate": 8.757257657896102e-06, + "loss": 0.3775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3777019679546356, + "step": 3100, + "valid_targets_mean": 4757.4, + "valid_targets_min": 846 + }, + { + "epoch": 5.05700325732899, + "grad_norm": 0.6068652111312317, + "learning_rate": 8.69017796558831e-06, + "loss": 0.326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3585216999053955, + "step": 3105, + "valid_targets_mean": 3218.3, + "valid_targets_min": 790 + }, + { + "epoch": 5.06514657980456, + "grad_norm": 0.5600960594268481, + "learning_rate": 8.623284791816335e-06, + "loss": 0.3974, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3959062993526459, + "step": 3110, + "valid_targets_mean": 4425.6, + "valid_targets_min": 796 + }, + { + "epoch": 5.0732899022801305, + "grad_norm": 0.5285342987580417, + "learning_rate": 8.556579239764342e-06, + "loss": 0.3228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3428380489349365, + "step": 3115, + "valid_targets_mean": 4663.2, + "valid_targets_min": 1562 + }, + { + "epoch": 5.0814332247557, + "grad_norm": 0.4463534204066952, + "learning_rate": 8.490062409522329e-06, + "loss": 0.3322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31646081805229187, + "step": 3120, + "valid_targets_mean": 5901.8, + "valid_targets_min": 769 + }, + { + "epoch": 5.08957654723127, + "grad_norm": 0.46294496906573473, + "learning_rate": 8.423735398067909e-06, + "loss": 0.3415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.395237535238266, + "step": 3125, + "valid_targets_mean": 6119.4, + "valid_targets_min": 882 + }, + { + "epoch": 5.09771986970684, + "grad_norm": 0.6300515613908372, + "learning_rate": 8.357599299248289e-06, + "loss": 0.3538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39229094982147217, + "step": 3130, + "valid_targets_mean": 5606.7, + "valid_targets_min": 1078 + }, + { + "epoch": 5.105863192182411, + "grad_norm": 0.518696984780842, + "learning_rate": 8.291655203762167e-06, + "loss": 0.3254, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3848794400691986, + "step": 3135, + "valid_targets_mean": 5532.6, + "valid_targets_min": 1036 + }, + { + "epoch": 5.11400651465798, + "grad_norm": 0.5237019483189311, + "learning_rate": 8.225904199141801e-06, + "loss": 0.3395, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3151305317878723, + "step": 3140, + "valid_targets_mean": 4475.4, + "valid_targets_min": 982 + }, + { + "epoch": 5.12214983713355, + "grad_norm": 0.6007547530072094, + "learning_rate": 8.160347369735024e-06, + "loss": 0.3063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3742336630821228, + "step": 3145, + "valid_targets_mean": 4501.6, + "valid_targets_min": 780 + }, + { + "epoch": 5.130293159609121, + "grad_norm": 1.1785125774610736, + "learning_rate": 8.094985796687386e-06, + "loss": 0.3058, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2847822904586792, + "step": 3150, + "valid_targets_mean": 2793.4, + "valid_targets_min": 945 + }, + { + "epoch": 5.138436482084691, + "grad_norm": 0.6441296132184219, + "learning_rate": 8.029820557924341e-06, + "loss": 0.3262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3614216446876526, + "step": 3155, + "valid_targets_mean": 3584.8, + "valid_targets_min": 824 + }, + { + "epoch": 5.14657980456026, + "grad_norm": 0.5497851790519935, + "learning_rate": 7.964852728133426e-06, + "loss": 0.3272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3798540234565735, + "step": 3160, + "valid_targets_mean": 4291.7, + "valid_targets_min": 854 + }, + { + "epoch": 5.1547231270358305, + "grad_norm": 0.7334736971878594, + "learning_rate": 7.900083378746585e-06, + "loss": 0.3368, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24019399285316467, + "step": 3165, + "valid_targets_mean": 2123.7, + "valid_targets_min": 904 + }, + { + "epoch": 5.162866449511401, + "grad_norm": 0.5908502625630112, + "learning_rate": 7.835513577922454e-06, + "loss": 0.3211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31780850887298584, + "step": 3170, + "valid_targets_mean": 3293.4, + "valid_targets_min": 871 + }, + { + "epoch": 5.171009771986971, + "grad_norm": 0.5675118331098262, + "learning_rate": 7.7711443905288e-06, + "loss": 0.3629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3794068992137909, + "step": 3175, + "valid_targets_mean": 4657.7, + "valid_targets_min": 804 + }, + { + "epoch": 5.17915309446254, + "grad_norm": 0.6066635490440568, + "learning_rate": 7.70697687812488e-06, + "loss": 0.3436, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31147515773773193, + "step": 3180, + "valid_targets_mean": 3167.9, + "valid_targets_min": 806 + }, + { + "epoch": 5.187296416938111, + "grad_norm": 0.4187712084401716, + "learning_rate": 7.64301209894403e-06, + "loss": 0.3735, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.416037917137146, + "step": 3185, + "valid_targets_mean": 9439.3, + "valid_targets_min": 809 + }, + { + "epoch": 5.195439739413681, + "grad_norm": 0.528065040019513, + "learning_rate": 7.5792511078761245e-06, + "loss": 0.325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29006874561309814, + "step": 3190, + "valid_targets_mean": 3821.1, + "valid_targets_min": 853 + }, + { + "epoch": 5.203583061889251, + "grad_norm": 0.6995094973023214, + "learning_rate": 7.515694956450259e-06, + "loss": 0.3187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3409353494644165, + "step": 3195, + "valid_targets_mean": 2844.1, + "valid_targets_min": 900 + }, + { + "epoch": 5.2117263843648205, + "grad_norm": 0.5149804113158356, + "learning_rate": 7.452344692817331e-06, + "loss": 0.3128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3099649250507355, + "step": 3200, + "valid_targets_mean": 5747.8, + "valid_targets_min": 696 + }, + { + "epoch": 5.219869706840391, + "grad_norm": 0.6444487118960017, + "learning_rate": 7.389201361732819e-06, + "loss": 0.374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3280758857727051, + "step": 3205, + "valid_targets_mean": 2907.1, + "valid_targets_min": 755 + }, + { + "epoch": 5.228013029315961, + "grad_norm": 0.5701259619688572, + "learning_rate": 7.326266004539513e-06, + "loss": 0.326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3533059060573578, + "step": 3210, + "valid_targets_mean": 3544.6, + "valid_targets_min": 865 + }, + { + "epoch": 5.236156351791531, + "grad_norm": 0.5739889433587851, + "learning_rate": 7.263539659150349e-06, + "loss": 0.3106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3035031855106354, + "step": 3215, + "valid_targets_mean": 4036.7, + "valid_targets_min": 880 + }, + { + "epoch": 5.244299674267101, + "grad_norm": 0.4503102318572572, + "learning_rate": 7.201023360031316e-06, + "loss": 0.3172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4360430836677551, + "step": 3220, + "valid_targets_mean": 7593.9, + "valid_targets_min": 1004 + }, + { + "epoch": 5.252442996742671, + "grad_norm": 0.5649852545876197, + "learning_rate": 7.13871813818435e-06, + "loss": 0.3324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33924832940101624, + "step": 3225, + "valid_targets_mean": 4102.2, + "valid_targets_min": 882 + }, + { + "epoch": 5.260586319218241, + "grad_norm": 0.525341820531561, + "learning_rate": 7.076625021130387e-06, + "loss": 0.3778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4442111551761627, + "step": 3230, + "valid_targets_mean": 5876.0, + "valid_targets_min": 1103 + }, + { + "epoch": 5.268729641693811, + "grad_norm": 0.5615121809010597, + "learning_rate": 7.014745032892358e-06, + "loss": 0.3586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4222922623157501, + "step": 3235, + "valid_targets_mean": 4303.2, + "valid_targets_min": 892 + }, + { + "epoch": 5.276872964169381, + "grad_norm": 0.6379760230494554, + "learning_rate": 6.953079193978363e-06, + "loss": 0.3772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3080676198005676, + "step": 3240, + "valid_targets_mean": 3054.6, + "valid_targets_min": 775 + }, + { + "epoch": 5.285016286644951, + "grad_norm": 0.6169333630819953, + "learning_rate": 6.891628521364773e-06, + "loss": 0.3485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.409217894077301, + "step": 3245, + "valid_targets_mean": 4054.2, + "valid_targets_min": 767 + }, + { + "epoch": 5.293159609120521, + "grad_norm": 0.6404451819883726, + "learning_rate": 6.83039402847953e-06, + "loss": 0.3211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28220751881599426, + "step": 3250, + "valid_targets_mean": 3264.7, + "valid_targets_min": 730 + }, + { + "epoch": 5.301302931596092, + "grad_norm": 0.6234505590940888, + "learning_rate": 6.769376725185375e-06, + "loss": 0.3025, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3264787197113037, + "step": 3255, + "valid_targets_mean": 5000.3, + "valid_targets_min": 921 + }, + { + "epoch": 5.309446254071661, + "grad_norm": 0.719276968177611, + "learning_rate": 6.708577617763215e-06, + "loss": 0.3459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30914127826690674, + "step": 3260, + "valid_targets_mean": 2299.4, + "valid_targets_min": 808 + }, + { + "epoch": 5.317589576547231, + "grad_norm": 0.5150449549953823, + "learning_rate": 6.647997708895553e-06, + "loss": 0.3223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31567275524139404, + "step": 3265, + "valid_targets_mean": 4906.2, + "valid_targets_min": 913 + }, + { + "epoch": 5.3257328990228014, + "grad_norm": 0.7197928682488813, + "learning_rate": 6.587637997649899e-06, + "loss": 0.3012, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24579644203186035, + "step": 3270, + "valid_targets_mean": 2307.4, + "valid_targets_min": 907 + }, + { + "epoch": 5.333876221498372, + "grad_norm": 0.5380663645490125, + "learning_rate": 6.527499479462349e-06, + "loss": 0.3386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28828147053718567, + "step": 3275, + "valid_targets_mean": 4640.5, + "valid_targets_min": 1025 + }, + { + "epoch": 5.342019543973941, + "grad_norm": 0.423273727165684, + "learning_rate": 6.467583146121123e-06, + "loss": 0.3393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4063699245452881, + "step": 3280, + "valid_targets_mean": 8311.6, + "valid_targets_min": 987 + }, + { + "epoch": 5.350162866449511, + "grad_norm": 0.5605191071387993, + "learning_rate": 6.407889985750246e-06, + "loss": 0.3839, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4190571904182434, + "step": 3285, + "valid_targets_mean": 6660.4, + "valid_targets_min": 895 + }, + { + "epoch": 5.358306188925082, + "grad_norm": 0.9350188736325327, + "learning_rate": 6.3484209827932135e-06, + "loss": 0.3272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22402456402778625, + "step": 3290, + "valid_targets_mean": 1934.2, + "valid_targets_min": 722 + }, + { + "epoch": 5.366449511400652, + "grad_norm": 0.42545558936832645, + "learning_rate": 6.289177117996798e-06, + "loss": 0.3642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4242863357067108, + "step": 3295, + "valid_targets_mean": 7993.3, + "valid_targets_min": 943 + }, + { + "epoch": 5.374592833876221, + "grad_norm": 0.4150493985415665, + "learning_rate": 6.230159368394841e-06, + "loss": 0.3586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38261932134628296, + "step": 3300, + "valid_targets_mean": 7713.1, + "valid_targets_min": 751 + }, + { + "epoch": 5.3827361563517915, + "grad_norm": 0.4505047290629119, + "learning_rate": 6.171368707292163e-06, + "loss": 0.3331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36251628398895264, + "step": 3305, + "valid_targets_mean": 6935.5, + "valid_targets_min": 853 + }, + { + "epoch": 5.390879478827362, + "grad_norm": 0.6199039155414343, + "learning_rate": 6.112806104248492e-06, + "loss": 0.3384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35015520453453064, + "step": 3310, + "valid_targets_mean": 3612.3, + "valid_targets_min": 718 + }, + { + "epoch": 5.399022801302932, + "grad_norm": 0.7720758331929449, + "learning_rate": 6.054472525062484e-06, + "loss": 0.3151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25902456045150757, + "step": 3315, + "valid_targets_mean": 2273.0, + "valid_targets_min": 782 + }, + { + "epoch": 5.407166123778501, + "grad_norm": 0.5550079799604618, + "learning_rate": 5.996368931755812e-06, + "loss": 0.3608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40394026041030884, + "step": 3320, + "valid_targets_mean": 4429.4, + "valid_targets_min": 893 + }, + { + "epoch": 5.415309446254072, + "grad_norm": 0.5251375453783347, + "learning_rate": 5.938496282557258e-06, + "loss": 0.3777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38108551502227783, + "step": 3325, + "valid_targets_mean": 5312.4, + "valid_targets_min": 716 + }, + { + "epoch": 5.423452768729642, + "grad_norm": 0.7176239116693027, + "learning_rate": 5.880855531886962e-06, + "loss": 0.3229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18912485241889954, + "step": 3330, + "valid_targets_mean": 1849.2, + "valid_targets_min": 836 + }, + { + "epoch": 5.431596091205212, + "grad_norm": 0.49839457851897967, + "learning_rate": 5.823447630340633e-06, + "loss": 0.34, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30988097190856934, + "step": 3335, + "valid_targets_mean": 4851.6, + "valid_targets_min": 840 + }, + { + "epoch": 5.4397394136807815, + "grad_norm": 0.6100695105459214, + "learning_rate": 5.766273524673916e-06, + "loss": 0.3419, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3784428834915161, + "step": 3340, + "valid_targets_mean": 3955.6, + "valid_targets_min": 766 + }, + { + "epoch": 5.447882736156352, + "grad_norm": 0.5128790559817762, + "learning_rate": 5.709334157786737e-06, + "loss": 0.3363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33215999603271484, + "step": 3345, + "valid_targets_mean": 6091.2, + "valid_targets_min": 780 + }, + { + "epoch": 5.456026058631922, + "grad_norm": 0.5209524874355423, + "learning_rate": 5.652630468707796e-06, + "loss": 0.3352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35430628061294556, + "step": 3350, + "valid_targets_mean": 5691.1, + "valid_targets_min": 600 + }, + { + "epoch": 5.464169381107492, + "grad_norm": 0.6874934921939582, + "learning_rate": 5.596163392579039e-06, + "loss": 0.3326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.386844664812088, + "step": 3355, + "valid_targets_mean": 2949.2, + "valid_targets_min": 796 + }, + { + "epoch": 5.472312703583062, + "grad_norm": 0.6180911741120035, + "learning_rate": 5.539933860640254e-06, + "loss": 0.3525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34674057364463806, + "step": 3360, + "valid_targets_mean": 3524.0, + "valid_targets_min": 842 + }, + { + "epoch": 5.480456026058632, + "grad_norm": 0.5994583196860667, + "learning_rate": 5.483942800213737e-06, + "loss": 0.3463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36063152551651, + "step": 3365, + "valid_targets_mean": 3700.8, + "valid_targets_min": 774 + }, + { + "epoch": 5.488599348534202, + "grad_norm": 0.4764597839774939, + "learning_rate": 5.428191134688947e-06, + "loss": 0.3649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3111908435821533, + "step": 3370, + "valid_targets_mean": 5056.0, + "valid_targets_min": 740 + }, + { + "epoch": 5.4967426710097715, + "grad_norm": 0.5394461727982579, + "learning_rate": 5.372679783507333e-06, + "loss": 0.332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32376742362976074, + "step": 3375, + "valid_targets_mean": 4716.6, + "valid_targets_min": 930 + }, + { + "epoch": 5.504885993485342, + "grad_norm": 0.6155515672676715, + "learning_rate": 5.317409662147119e-06, + "loss": 0.3334, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3764294683933258, + "step": 3380, + "valid_targets_mean": 3745.0, + "valid_targets_min": 795 + }, + { + "epoch": 5.513029315960912, + "grad_norm": 0.5659760434981728, + "learning_rate": 5.262381682108262e-06, + "loss": 0.3549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.45110446214675903, + "step": 3385, + "valid_targets_mean": 5805.2, + "valid_targets_min": 862 + }, + { + "epoch": 5.521172638436482, + "grad_norm": 0.5532115166161099, + "learning_rate": 5.207596750897357e-06, + "loss": 0.3568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36183202266693115, + "step": 3390, + "valid_targets_mean": 4167.7, + "valid_targets_min": 831 + }, + { + "epoch": 5.529315960912053, + "grad_norm": 0.6361069938767329, + "learning_rate": 5.153055772012734e-06, + "loss": 0.3331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4014270305633545, + "step": 3395, + "valid_targets_mean": 4049.1, + "valid_targets_min": 921 + }, + { + "epoch": 5.537459283387622, + "grad_norm": 0.5085705487583053, + "learning_rate": 5.098759644929497e-06, + "loss": 0.3685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3132648766040802, + "step": 3400, + "valid_targets_mean": 5040.1, + "valid_targets_min": 1041 + }, + { + "epoch": 5.545602605863192, + "grad_norm": 0.4556852632783768, + "learning_rate": 5.044709265084748e-06, + "loss": 0.3567, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3614157736301422, + "step": 3405, + "valid_targets_mean": 6380.1, + "valid_targets_min": 935 + }, + { + "epoch": 5.5537459283387625, + "grad_norm": 0.532327765771621, + "learning_rate": 4.9909055238627765e-06, + "loss": 0.3624, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4253925383090973, + "step": 3410, + "valid_targets_mean": 5577.2, + "valid_targets_min": 820 + }, + { + "epoch": 5.561889250814332, + "grad_norm": 1.0370218004088598, + "learning_rate": 4.937349308580368e-06, + "loss": 0.361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2869694232940674, + "step": 3415, + "valid_targets_mean": 4726.0, + "valid_targets_min": 886 + }, + { + "epoch": 5.570032573289902, + "grad_norm": 0.5907367152146072, + "learning_rate": 4.884041502472199e-06, + "loss": 0.3365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26210567355155945, + "step": 3420, + "valid_targets_mean": 3180.3, + "valid_targets_min": 836 + }, + { + "epoch": 5.578175895765472, + "grad_norm": 0.4587730799400921, + "learning_rate": 4.83098298467622e-06, + "loss": 0.3592, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3444317877292633, + "step": 3425, + "valid_targets_mean": 6012.6, + "valid_targets_min": 950 + }, + { + "epoch": 5.586319218241043, + "grad_norm": 0.7254099833315035, + "learning_rate": 4.778174630219221e-06, + "loss": 0.3667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32727593183517456, + "step": 3430, + "valid_targets_mean": 2231.6, + "valid_targets_min": 890 + }, + { + "epoch": 5.594462540716613, + "grad_norm": 0.5442318388005675, + "learning_rate": 4.725617310002328e-06, + "loss": 0.3732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3951669931411743, + "step": 3435, + "valid_targets_mean": 4692.3, + "valid_targets_min": 814 + }, + { + "epoch": 5.602605863192182, + "grad_norm": 0.7392983956204453, + "learning_rate": 4.673311890786712e-06, + "loss": 0.3873, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4227117598056793, + "step": 3440, + "valid_targets_mean": 2604.6, + "valid_targets_min": 632 + }, + { + "epoch": 5.6107491856677525, + "grad_norm": 0.4311633588877623, + "learning_rate": 4.621259235179229e-06, + "loss": 0.3556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4555113911628723, + "step": 3445, + "valid_targets_mean": 8965.2, + "valid_targets_min": 720 + }, + { + "epoch": 5.618892508143323, + "grad_norm": 0.5211188088619322, + "learning_rate": 4.569460201618257e-06, + "loss": 0.3828, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4197236895561218, + "step": 3450, + "valid_targets_mean": 5365.9, + "valid_targets_min": 814 + }, + { + "epoch": 5.627035830618892, + "grad_norm": 0.5514546316841529, + "learning_rate": 4.517915644359485e-06, + "loss": 0.3241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39828914403915405, + "step": 3455, + "valid_targets_mean": 4856.9, + "valid_targets_min": 915 + }, + { + "epoch": 5.635179153094462, + "grad_norm": 0.5457611509569015, + "learning_rate": 4.4666264134618495e-06, + "loss": 0.2977, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25139832496643066, + "step": 3460, + "valid_targets_mean": 3980.9, + "valid_targets_min": 847 + }, + { + "epoch": 5.643322475570033, + "grad_norm": 0.6447152919731025, + "learning_rate": 4.415593354773528e-06, + "loss": 0.3496, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3342139720916748, + "step": 3465, + "valid_targets_mean": 3259.5, + "valid_targets_min": 775 + }, + { + "epoch": 5.651465798045603, + "grad_norm": 0.6909858021763735, + "learning_rate": 4.364817309917959e-06, + "loss": 0.3452, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3476819097995758, + "step": 3470, + "valid_targets_mean": 2816.8, + "valid_targets_min": 815 + }, + { + "epoch": 5.659609120521172, + "grad_norm": 0.5566334736884017, + "learning_rate": 4.314299116279992e-06, + "loss": 0.3268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3450655937194824, + "step": 3475, + "valid_targets_mean": 4355.2, + "valid_targets_min": 947 + }, + { + "epoch": 5.6677524429967425, + "grad_norm": 0.45597284211637507, + "learning_rate": 4.264039606992045e-06, + "loss": 0.3404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3211209177970886, + "step": 3480, + "valid_targets_mean": 5757.8, + "valid_targets_min": 734 + }, + { + "epoch": 5.675895765472313, + "grad_norm": 0.6613201274538745, + "learning_rate": 4.2140396109204065e-06, + "loss": 0.3973, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43754416704177856, + "step": 3485, + "valid_targets_mean": 5806.5, + "valid_targets_min": 952 + }, + { + "epoch": 5.684039087947883, + "grad_norm": 0.6290867143899168, + "learning_rate": 4.164299952651522e-06, + "loss": 0.353, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38061946630477905, + "step": 3490, + "valid_targets_mean": 3460.9, + "valid_targets_min": 747 + }, + { + "epoch": 5.692182410423452, + "grad_norm": 0.7610505091027158, + "learning_rate": 4.1148214524784435e-06, + "loss": 0.3134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2939208745956421, + "step": 3495, + "valid_targets_mean": 2457.1, + "valid_targets_min": 974 + }, + { + "epoch": 5.700325732899023, + "grad_norm": 0.6167702996739569, + "learning_rate": 4.0656049263872295e-06, + "loss": 0.3114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3962528705596924, + "step": 3500, + "valid_targets_mean": 3896.5, + "valid_targets_min": 808 + }, + { + "epoch": 5.708469055374593, + "grad_norm": 0.4588822367614284, + "learning_rate": 4.016651186043579e-06, + "loss": 0.3439, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4151820242404938, + "step": 3505, + "valid_targets_mean": 6879.2, + "valid_targets_min": 943 + }, + { + "epoch": 5.716612377850163, + "grad_norm": 0.5510667585993431, + "learning_rate": 3.967961038779366e-06, + "loss": 0.3409, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3815336525440216, + "step": 3510, + "valid_targets_mean": 4410.1, + "valid_targets_min": 707 + }, + { + "epoch": 5.7247557003257326, + "grad_norm": 0.5576236289622526, + "learning_rate": 3.919535287579388e-06, + "loss": 0.3743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4427952170372009, + "step": 3515, + "valid_targets_mean": 5429.9, + "valid_targets_min": 832 + }, + { + "epoch": 5.732899022801303, + "grad_norm": 0.5175140875432503, + "learning_rate": 3.871374731068067e-06, + "loss": 0.3111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3049759864807129, + "step": 3520, + "valid_targets_mean": 4762.6, + "valid_targets_min": 837 + }, + { + "epoch": 5.741042345276873, + "grad_norm": 0.5067445347201364, + "learning_rate": 3.823480163496314e-06, + "loss": 0.3871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3573338985443115, + "step": 3525, + "valid_targets_mean": 6251.9, + "valid_targets_min": 754 + }, + { + "epoch": 5.749185667752443, + "grad_norm": 0.5517197389878234, + "learning_rate": 3.775852374728437e-06, + "loss": 0.3248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3076989948749542, + "step": 3530, + "valid_targets_mean": 3542.4, + "valid_targets_min": 807 + }, + { + "epoch": 5.757328990228013, + "grad_norm": 0.700076945893088, + "learning_rate": 3.728492150229073e-06, + "loss": 0.3219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26955580711364746, + "step": 3535, + "valid_targets_mean": 2220.2, + "valid_targets_min": 942 + }, + { + "epoch": 5.765472312703583, + "grad_norm": 0.5785753017795928, + "learning_rate": 3.681400271050288e-06, + "loss": 0.3569, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3811410367488861, + "step": 3540, + "valid_targets_mean": 4355.4, + "valid_targets_min": 951 + }, + { + "epoch": 5.773615635179153, + "grad_norm": 0.6473418218014863, + "learning_rate": 3.634577513818649e-06, + "loss": 0.3587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3514581024646759, + "step": 3545, + "valid_targets_mean": 3079.8, + "valid_targets_min": 872 + }, + { + "epoch": 5.7817589576547235, + "grad_norm": 0.6909571291742235, + "learning_rate": 3.5880246507224527e-06, + "loss": 0.3262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26181545853614807, + "step": 3550, + "valid_targets_mean": 2218.9, + "valid_targets_min": 880 + }, + { + "epoch": 5.789902280130293, + "grad_norm": 0.6744135027777312, + "learning_rate": 3.541742449498957e-06, + "loss": 0.324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29041510820388794, + "step": 3555, + "valid_targets_mean": 2439.5, + "valid_targets_min": 825 + }, + { + "epoch": 5.798045602605863, + "grad_norm": 0.49115508765235966, + "learning_rate": 3.495731673421754e-06, + "loss": 0.3543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3981338441371918, + "step": 3560, + "valid_targets_mean": 5539.9, + "valid_targets_min": 978 + }, + { + "epoch": 5.806188925081433, + "grad_norm": 0.4231774260275391, + "learning_rate": 3.449993081288152e-06, + "loss": 0.3799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3987569808959961, + "step": 3565, + "valid_targets_mean": 7911.4, + "valid_targets_min": 782 + }, + { + "epoch": 5.814332247557004, + "grad_norm": 0.5263056963357977, + "learning_rate": 3.4045274274066896e-06, + "loss": 0.3566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4571700692176819, + "step": 3570, + "valid_targets_mean": 5906.6, + "valid_targets_min": 860 + }, + { + "epoch": 5.822475570032573, + "grad_norm": 0.5261091354761828, + "learning_rate": 3.3593354615846695e-06, + "loss": 0.3587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4236190915107727, + "step": 3575, + "valid_targets_mean": 5251.8, + "valid_targets_min": 968 + }, + { + "epoch": 5.830618892508143, + "grad_norm": 0.6544703103551001, + "learning_rate": 3.3144179291157984e-06, + "loss": 0.3794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3526955842971802, + "step": 3580, + "valid_targets_mean": 3065.6, + "valid_targets_min": 884 + }, + { + "epoch": 5.8387622149837135, + "grad_norm": 0.5741480323207931, + "learning_rate": 3.2697755707679257e-06, + "loss": 0.3346, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23875439167022705, + "step": 3585, + "valid_targets_mean": 3737.6, + "valid_targets_min": 944 + }, + { + "epoch": 5.846905537459284, + "grad_norm": 0.8073810506927592, + "learning_rate": 3.2254091227707774e-06, + "loss": 0.3283, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42679363489151, + "step": 3590, + "valid_targets_mean": 3998.2, + "valid_targets_min": 860 + }, + { + "epoch": 5.855048859934853, + "grad_norm": 0.4686789577741846, + "learning_rate": 3.1813193168038636e-06, + "loss": 0.3, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2691139578819275, + "step": 3595, + "valid_targets_mean": 5064.6, + "valid_targets_min": 619 + }, + { + "epoch": 5.863192182410423, + "grad_norm": 0.6170193190659855, + "learning_rate": 3.137506879984369e-06, + "loss": 0.3549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3195459842681885, + "step": 3600, + "valid_targets_mean": 3280.6, + "valid_targets_min": 729 + }, + { + "epoch": 5.871335504885994, + "grad_norm": 0.8397493396056619, + "learning_rate": 3.0939725348551963e-06, + "loss": 0.3221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2398374378681183, + "step": 3605, + "valid_targets_mean": 1870.1, + "valid_targets_min": 751 + }, + { + "epoch": 5.879478827361563, + "grad_norm": 0.4786306797990019, + "learning_rate": 3.05071699937302e-06, + "loss": 0.3208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33298879861831665, + "step": 3610, + "valid_targets_mean": 5383.3, + "valid_targets_min": 630 + }, + { + "epoch": 5.887622149837133, + "grad_norm": 0.46037829641917904, + "learning_rate": 3.007740986896479e-06, + "loss": 0.3535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4190775156021118, + "step": 3615, + "valid_targets_mean": 6631.6, + "valid_targets_min": 706 + }, + { + "epoch": 5.8957654723127035, + "grad_norm": 0.6915651564902173, + "learning_rate": 2.9650452061743794e-06, + "loss": 0.3774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34009355306625366, + "step": 3620, + "valid_targets_mean": 2773.3, + "valid_targets_min": 858 + }, + { + "epoch": 5.903908794788274, + "grad_norm": 0.5841571921125752, + "learning_rate": 2.9226303613340225e-06, + "loss": 0.349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29162871837615967, + "step": 3625, + "valid_targets_mean": 3390.9, + "valid_targets_min": 958 + }, + { + "epoch": 5.912052117263844, + "grad_norm": 0.5483390663044974, + "learning_rate": 2.880497151869601e-06, + "loss": 0.3914, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3777734637260437, + "step": 3630, + "valid_targets_mean": 4392.4, + "valid_targets_min": 750 + }, + { + "epoch": 5.920195439739413, + "grad_norm": 0.7051236978032706, + "learning_rate": 2.8386462726306384e-06, + "loss": 0.2806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2927796244621277, + "step": 3635, + "valid_targets_mean": 2360.4, + "valid_targets_min": 984 + }, + { + "epoch": 5.928338762214984, + "grad_norm": 0.7027831617099631, + "learning_rate": 2.797078413810561e-06, + "loss": 0.3373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31731951236724854, + "step": 3640, + "valid_targets_mean": 3445.6, + "valid_targets_min": 886 + }, + { + "epoch": 5.936482084690554, + "grad_norm": 0.45387637358457905, + "learning_rate": 2.7557942609352805e-06, + "loss": 0.299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36751508712768555, + "step": 3645, + "valid_targets_mean": 6469.0, + "valid_targets_min": 732 + }, + { + "epoch": 5.944625407166123, + "grad_norm": 0.7718578776958555, + "learning_rate": 2.7147944948519245e-06, + "loss": 0.3233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2052096128463745, + "step": 3650, + "valid_targets_mean": 1584.3, + "valid_targets_min": 842 + }, + { + "epoch": 5.952768729641694, + "grad_norm": 0.5702248881178423, + "learning_rate": 2.674079791717572e-06, + "loss": 0.3757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3853203058242798, + "step": 3655, + "valid_targets_mean": 4447.0, + "valid_targets_min": 1018 + }, + { + "epoch": 5.960912052117264, + "grad_norm": 0.556373364998352, + "learning_rate": 2.6336508229881406e-06, + "loss": 0.3662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3854537010192871, + "step": 3660, + "valid_targets_mean": 4557.5, + "valid_targets_min": 844 + }, + { + "epoch": 5.969055374592834, + "grad_norm": 0.4902745201019764, + "learning_rate": 2.593508255407278e-06, + "loss": 0.3084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3378230333328247, + "step": 3665, + "valid_targets_mean": 5519.5, + "valid_targets_min": 821 + }, + { + "epoch": 5.977198697068404, + "grad_norm": 0.5627873116289096, + "learning_rate": 2.553652750995379e-06, + "loss": 0.3329, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33455243706703186, + "step": 3670, + "valid_targets_mean": 4633.3, + "valid_targets_min": 842 + }, + { + "epoch": 5.985342019543974, + "grad_norm": 0.682539093585395, + "learning_rate": 2.514084967038688e-06, + "loss": 0.355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3416430354118347, + "step": 3675, + "valid_targets_mean": 3117.3, + "valid_targets_min": 900 + }, + { + "epoch": 5.993485342019544, + "grad_norm": 0.684582769839456, + "learning_rate": 2.4748055560784232e-06, + "loss": 0.2881, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26188501715660095, + "step": 3680, + "valid_targets_mean": 2286.5, + "valid_targets_min": 896 + }, + { + "epoch": 6.001628664495114, + "grad_norm": 0.6640940807907215, + "learning_rate": 2.435815165900046e-06, + "loss": 0.3523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3568960130214691, + "step": 3685, + "valid_targets_mean": 3858.1, + "valid_targets_min": 882 + }, + { + "epoch": 6.009771986970684, + "grad_norm": 0.6122936179835217, + "learning_rate": 2.3971144395225523e-06, + "loss": 0.3532, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4162420928478241, + "step": 3690, + "valid_targets_mean": 5099.8, + "valid_targets_min": 781 + }, + { + "epoch": 6.017915309446254, + "grad_norm": 0.735081667907351, + "learning_rate": 2.358704015187896e-06, + "loss": 0.2938, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24211198091506958, + "step": 3695, + "valid_targets_mean": 2213.0, + "valid_targets_min": 907 + }, + { + "epoch": 6.026058631921824, + "grad_norm": 0.6655177792234814, + "learning_rate": 2.320584526350429e-06, + "loss": 0.3549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3161996006965637, + "step": 3700, + "valid_targets_mean": 2961.3, + "valid_targets_min": 777 + }, + { + "epoch": 6.034201954397394, + "grad_norm": 0.610664833990933, + "learning_rate": 2.282756601666496e-06, + "loss": 0.3049, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33553919196128845, + "step": 3705, + "valid_targets_mean": 5984.8, + "valid_targets_min": 788 + }, + { + "epoch": 6.042345276872964, + "grad_norm": 0.5187981755487766, + "learning_rate": 2.2452208649840215e-06, + "loss": 0.3617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37308138608932495, + "step": 3710, + "valid_targets_mean": 5297.1, + "valid_targets_min": 827 + }, + { + "epoch": 6.050488599348534, + "grad_norm": 0.45212903070747296, + "learning_rate": 2.207977935332264e-06, + "loss": 0.326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39245519042015076, + "step": 3715, + "valid_targets_mean": 6947.1, + "valid_targets_min": 937 + }, + { + "epoch": 6.058631921824104, + "grad_norm": 0.5108609609093802, + "learning_rate": 2.1710284269115744e-06, + "loss": 0.3695, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42447125911712646, + "step": 3720, + "valid_targets_mean": 5905.4, + "valid_targets_min": 654 + }, + { + "epoch": 6.0667752442996745, + "grad_norm": 0.7206971277744036, + "learning_rate": 2.134372949083279e-06, + "loss": 0.3348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.301357626914978, + "step": 3725, + "valid_targets_mean": 2339.6, + "valid_targets_min": 681 + }, + { + "epoch": 6.074918566775244, + "grad_norm": 0.6089322178593772, + "learning_rate": 2.098012106359646e-06, + "loss": 0.3269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3033623993396759, + "step": 3730, + "valid_targets_mean": 3046.9, + "valid_targets_min": 828 + }, + { + "epoch": 6.083061889250814, + "grad_norm": 0.5778970868982783, + "learning_rate": 2.061946498393879e-06, + "loss": 0.3864, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42139577865600586, + "step": 3735, + "valid_targets_mean": 4701.6, + "valid_targets_min": 849 + }, + { + "epoch": 6.091205211726384, + "grad_norm": 0.5219007188573626, + "learning_rate": 2.0261767199702696e-06, + "loss": 0.3312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33774128556251526, + "step": 3740, + "valid_targets_mean": 5268.8, + "valid_targets_min": 913 + }, + { + "epoch": 6.099348534201955, + "grad_norm": 0.5750903956218592, + "learning_rate": 1.990703360994355e-06, + "loss": 0.3047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3497207760810852, + "step": 3745, + "valid_targets_mean": 3955.9, + "valid_targets_min": 972 + }, + { + "epoch": 6.107491856677524, + "grad_norm": 0.5170254209133062, + "learning_rate": 1.9555270064832133e-06, + "loss": 0.3371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3021817207336426, + "step": 3750, + "valid_targets_mean": 6645.6, + "valid_targets_min": 1156 + }, + { + "epoch": 6.115635179153094, + "grad_norm": 0.5308512204389633, + "learning_rate": 1.920648236555791e-06, + "loss": 0.3047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3398710787296295, + "step": 3755, + "valid_targets_mean": 4566.8, + "valid_targets_min": 735 + }, + { + "epoch": 6.1237785016286646, + "grad_norm": 0.47045635214228565, + "learning_rate": 1.886067626423369e-06, + "loss": 0.3794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3941476345062256, + "step": 3760, + "valid_targets_mean": 6495.2, + "valid_targets_min": 1072 + }, + { + "epoch": 6.131921824104235, + "grad_norm": 0.5618970399408014, + "learning_rate": 1.8517857463800393e-06, + "loss": 0.3348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4066775441169739, + "step": 3765, + "valid_targets_mean": 4668.1, + "valid_targets_min": 834 + }, + { + "epoch": 6.140065146579804, + "grad_norm": 0.6094281810379755, + "learning_rate": 1.8178031617933212e-06, + "loss": 0.3267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38877031207084656, + "step": 3770, + "valid_targets_mean": 3938.0, + "valid_targets_min": 939 + }, + { + "epoch": 6.148208469055374, + "grad_norm": 0.5874432031190339, + "learning_rate": 1.7841204330948492e-06, + "loss": 0.3294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41187214851379395, + "step": 3775, + "valid_targets_mean": 4416.0, + "valid_targets_min": 860 + }, + { + "epoch": 6.156351791530945, + "grad_norm": 0.760679454462141, + "learning_rate": 1.7507381157710913e-06, + "loss": 0.3316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3444948196411133, + "step": 3780, + "valid_targets_mean": 2428.4, + "valid_targets_min": 794 + }, + { + "epoch": 6.164495114006515, + "grad_norm": 0.7311511779489177, + "learning_rate": 1.717656760354236e-06, + "loss": 0.3319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3171062469482422, + "step": 3785, + "valid_targets_mean": 2492.9, + "valid_targets_min": 872 + }, + { + "epoch": 6.172638436482084, + "grad_norm": 0.5866690999446248, + "learning_rate": 1.6848769124130692e-06, + "loss": 0.3429, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2825160622596741, + "step": 3790, + "valid_targets_mean": 3189.6, + "valid_targets_min": 864 + }, + { + "epoch": 6.180781758957655, + "grad_norm": 0.6303183695223189, + "learning_rate": 1.6523991125440141e-06, + "loss": 0.315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3238790035247803, + "step": 3795, + "valid_targets_mean": 4216.9, + "valid_targets_min": 839 + }, + { + "epoch": 6.188925081433225, + "grad_norm": 0.5597246869774607, + "learning_rate": 1.6202238963621807e-06, + "loss": 0.2749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32232385873794556, + "step": 3800, + "valid_targets_mean": 4294.9, + "valid_targets_min": 872 + }, + { + "epoch": 6.197068403908795, + "grad_norm": 0.4831221447403182, + "learning_rate": 1.5883517944925663e-06, + "loss": 0.2997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3162875473499298, + "step": 3805, + "valid_targets_mean": 6574.1, + "valid_targets_min": 897 + }, + { + "epoch": 6.2052117263843645, + "grad_norm": 0.5488910140836988, + "learning_rate": 1.5567833325612802e-06, + "loss": 0.3391, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33755624294281006, + "step": 3810, + "valid_targets_mean": 5105.0, + "valid_targets_min": 867 + }, + { + "epoch": 6.213355048859935, + "grad_norm": 0.5244588243768696, + "learning_rate": 1.525519031186884e-06, + "loss": 0.3537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31947261095046997, + "step": 3815, + "valid_targets_mean": 4621.2, + "valid_targets_min": 766 + }, + { + "epoch": 6.221498371335505, + "grad_norm": 0.5282035386163249, + "learning_rate": 1.4945594059718094e-06, + "loss": 0.3316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30010557174682617, + "step": 3820, + "valid_targets_mean": 4132.6, + "valid_targets_min": 852 + }, + { + "epoch": 6.229641693811075, + "grad_norm": 0.5450156537993913, + "learning_rate": 1.4639049674938477e-06, + "loss": 0.3011, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2887446880340576, + "step": 3825, + "valid_targets_mean": 4258.2, + "valid_targets_min": 1021 + }, + { + "epoch": 6.237785016286645, + "grad_norm": 0.511797527319191, + "learning_rate": 1.4335562212977294e-06, + "loss": 0.2896, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3580203056335449, + "step": 3830, + "valid_targets_mean": 5290.2, + "valid_targets_min": 827 + }, + { + "epoch": 6.245928338762215, + "grad_norm": 0.5647866337804425, + "learning_rate": 1.4035136678868044e-06, + "loss": 0.3113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3674638271331787, + "step": 3835, + "valid_targets_mean": 4620.9, + "valid_targets_min": 834 + }, + { + "epoch": 6.254071661237785, + "grad_norm": 0.5492637094040284, + "learning_rate": 1.3737778027147552e-06, + "loss": 0.3401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32453885674476624, + "step": 3840, + "valid_targets_mean": 4875.6, + "valid_targets_min": 935 + }, + { + "epoch": 6.262214983713355, + "grad_norm": 0.5368978238779512, + "learning_rate": 1.3443491161774547e-06, + "loss": 0.3827, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4142525792121887, + "step": 3845, + "valid_targets_mean": 5000.2, + "valid_targets_min": 872 + }, + { + "epoch": 6.270358306188925, + "grad_norm": 0.5812689309701172, + "learning_rate": 1.3152280936048701e-06, + "loss": 0.3268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3608083724975586, + "step": 3850, + "valid_targets_mean": 3862.6, + "valid_targets_min": 819 + }, + { + "epoch": 6.278501628664495, + "grad_norm": 0.7708442996688833, + "learning_rate": 1.2864152152530495e-06, + "loss": 0.31, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19530707597732544, + "step": 3855, + "valid_targets_mean": 1756.1, + "valid_targets_min": 917 + }, + { + "epoch": 6.286644951140065, + "grad_norm": 0.5388682494197552, + "learning_rate": 1.2579109562962222e-06, + "loss": 0.3786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35750913619995117, + "step": 3860, + "valid_targets_mean": 4640.2, + "valid_targets_min": 805 + }, + { + "epoch": 6.2947882736156355, + "grad_norm": 0.5363303389448282, + "learning_rate": 1.2297157868189347e-06, + "loss": 0.3321, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29718783497810364, + "step": 3865, + "valid_targets_mean": 4501.1, + "valid_targets_min": 788 + }, + { + "epoch": 6.302931596091205, + "grad_norm": 0.6565762139521889, + "learning_rate": 1.2018301718083269e-06, + "loss": 0.3168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3138154149055481, + "step": 3870, + "valid_targets_mean": 3089.3, + "valid_targets_min": 882 + }, + { + "epoch": 6.311074918566775, + "grad_norm": 0.6064347591019186, + "learning_rate": 1.1742545711464382e-06, + "loss": 0.3112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24476546049118042, + "step": 3875, + "valid_targets_mean": 3029.4, + "valid_targets_min": 870 + }, + { + "epoch": 6.319218241042345, + "grad_norm": 0.6894306516727864, + "learning_rate": 1.1469894396026437e-06, + "loss": 0.2812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2821471691131592, + "step": 3880, + "valid_targets_mean": 4558.2, + "valid_targets_min": 619 + }, + { + "epoch": 6.327361563517916, + "grad_norm": 0.5443311831609526, + "learning_rate": 1.1200352268261394e-06, + "loss": 0.3289, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2997708022594452, + "step": 3885, + "valid_targets_mean": 4810.2, + "valid_targets_min": 782 + }, + { + "epoch": 6.335504885993485, + "grad_norm": 0.7723112211249017, + "learning_rate": 1.0933923773385336e-06, + "loss": 0.3314, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20624123513698578, + "step": 3890, + "valid_targets_mean": 1781.8, + "valid_targets_min": 917 + }, + { + "epoch": 6.343648208469055, + "grad_norm": 0.6295185245576173, + "learning_rate": 1.0670613305265242e-06, + "loss": 0.3301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24920423328876495, + "step": 3895, + "valid_targets_mean": 2553.4, + "valid_targets_min": 758 + }, + { + "epoch": 6.351791530944626, + "grad_norm": 0.7326406021397233, + "learning_rate": 1.0410425206346275e-06, + "loss": 0.3193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26034015417099, + "step": 3900, + "valid_targets_mean": 2279.7, + "valid_targets_min": 892 + }, + { + "epoch": 6.359934853420196, + "grad_norm": 0.4685736108926292, + "learning_rate": 1.0153363767580492e-06, + "loss": 0.3059, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2652076482772827, + "step": 3905, + "valid_targets_mean": 5103.9, + "valid_targets_min": 1001 + }, + { + "epoch": 6.368078175895765, + "grad_norm": 0.6458830948294415, + "learning_rate": 9.899433228355782e-07, + "loss": 0.3399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3515450358390808, + "step": 3910, + "valid_targets_mean": 3198.8, + "valid_targets_min": 886 + }, + { + "epoch": 6.3762214983713354, + "grad_norm": 0.49258907479393277, + "learning_rate": 9.648637776426173e-07, + "loss": 0.3401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35113492608070374, + "step": 3915, + "valid_targets_mean": 5119.9, + "valid_targets_min": 836 + }, + { + "epoch": 6.384364820846906, + "grad_norm": 0.5214200049549012, + "learning_rate": 9.400981547842613e-07, + "loss": 0.3809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4157213866710663, + "step": 3920, + "valid_targets_mean": 5283.0, + "valid_targets_min": 722 + }, + { + "epoch": 6.392508143322476, + "grad_norm": 0.760873275189635, + "learning_rate": 9.156468626884884e-07, + "loss": 0.3427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3256635069847107, + "step": 3925, + "valid_targets_mean": 2499.0, + "valid_targets_min": 819 + }, + { + "epoch": 6.400651465798045, + "grad_norm": 0.5676312307940954, + "learning_rate": 8.915103045994145e-07, + "loss": 0.3381, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2938845753669739, + "step": 3930, + "valid_targets_mean": 3754.5, + "valid_targets_min": 778 + }, + { + "epoch": 6.408794788273616, + "grad_norm": 0.6978218387772783, + "learning_rate": 8.67688878570645e-07, + "loss": 0.2728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2310534119606018, + "step": 3935, + "valid_targets_mean": 2254.2, + "valid_targets_min": 849 + }, + { + "epoch": 6.416938110749186, + "grad_norm": 0.6778492501103248, + "learning_rate": 8.441829774587207e-07, + "loss": 0.3262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3058690130710602, + "step": 3940, + "valid_targets_mean": 2611.7, + "valid_targets_min": 905 + }, + { + "epoch": 6.425081433224756, + "grad_norm": 0.6692469613630986, + "learning_rate": 8.209929889166201e-07, + "loss": 0.2982, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2507542371749878, + "step": 3945, + "valid_targets_mean": 2859.6, + "valid_targets_min": 702 + }, + { + "epoch": 6.4332247557003255, + "grad_norm": 0.7859112876477466, + "learning_rate": 7.981192953873873e-07, + "loss": 0.3444, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39793962240219116, + "step": 3950, + "valid_targets_mean": 4406.9, + "valid_targets_min": 945 + }, + { + "epoch": 6.441368078175896, + "grad_norm": 0.6249352298032912, + "learning_rate": 7.755622740978053e-07, + "loss": 0.3589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3558174967765808, + "step": 3955, + "valid_targets_mean": 3656.8, + "valid_targets_min": 752 + }, + { + "epoch": 6.449511400651466, + "grad_norm": 0.7171886585434384, + "learning_rate": 7.533222970521903e-07, + "loss": 0.4011, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34979936480522156, + "step": 3960, + "valid_targets_mean": 2409.8, + "valid_targets_min": 678 + }, + { + "epoch": 6.457654723127035, + "grad_norm": 0.5680794253766248, + "learning_rate": 7.313997310262455e-07, + "loss": 0.3559, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4528966248035431, + "step": 3965, + "valid_targets_mean": 4197.6, + "valid_targets_min": 805 + }, + { + "epoch": 6.465798045602606, + "grad_norm": 0.46834702977094644, + "learning_rate": 7.097949375610236e-07, + "loss": 0.2975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34341466426849365, + "step": 3970, + "valid_targets_mean": 5849.8, + "valid_targets_min": 851 + }, + { + "epoch": 6.473941368078176, + "grad_norm": 0.73026146933331, + "learning_rate": 6.88508272956947e-07, + "loss": 0.3626, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27743712067604065, + "step": 3975, + "valid_targets_mean": 2468.8, + "valid_targets_min": 757 + }, + { + "epoch": 6.482084690553746, + "grad_norm": 0.5088236386290539, + "learning_rate": 6.675400882679573e-07, + "loss": 0.3229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3674625754356384, + "step": 3980, + "valid_targets_mean": 5174.3, + "valid_targets_min": 773 + }, + { + "epoch": 6.490228013029316, + "grad_norm": 0.8352132501763471, + "learning_rate": 6.468907292956972e-07, + "loss": 0.3447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3398285508155823, + "step": 3985, + "valid_targets_mean": 3215.6, + "valid_targets_min": 790 + }, + { + "epoch": 6.498371335504886, + "grad_norm": 0.6177398136269792, + "learning_rate": 6.265605365838267e-07, + "loss": 0.3383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3849717378616333, + "step": 3990, + "valid_targets_mean": 3666.9, + "valid_targets_min": 888 + }, + { + "epoch": 6.506514657980456, + "grad_norm": 0.7299812146354134, + "learning_rate": 6.065498454124074e-07, + "loss": 0.3527, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34643954038619995, + "step": 3995, + "valid_targets_mean": 2793.6, + "valid_targets_min": 726 + }, + { + "epoch": 6.514657980456026, + "grad_norm": 0.5169958003536245, + "learning_rate": 5.868589857923623e-07, + "loss": 0.3367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3389505445957184, + "step": 4000, + "valid_targets_mean": 4841.8, + "valid_targets_min": 792 + }, + { + "epoch": 6.522801302931596, + "grad_norm": 0.7377132364977999, + "learning_rate": 5.67488282460038e-07, + "loss": 0.3066, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17984521389007568, + "step": 4005, + "valid_targets_mean": 1807.9, + "valid_targets_min": 898 + }, + { + "epoch": 6.530944625407166, + "grad_norm": 0.7143086620414634, + "learning_rate": 5.484380548718493e-07, + "loss": 0.2907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.276581346988678, + "step": 4010, + "valid_targets_mean": 2050.7, + "valid_targets_min": 864 + }, + { + "epoch": 6.539087947882736, + "grad_norm": 0.693811493223318, + "learning_rate": 5.297086171990207e-07, + "loss": 0.3509, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2641972005367279, + "step": 4015, + "valid_targets_mean": 3110.8, + "valid_targets_min": 890 + }, + { + "epoch": 6.547231270358306, + "grad_norm": 0.4712665516035758, + "learning_rate": 5.113002783223797e-07, + "loss": 0.3244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29624348878860474, + "step": 4020, + "valid_targets_mean": 4992.9, + "valid_targets_min": 685 + }, + { + "epoch": 6.555374592833877, + "grad_norm": 0.4685209019897719, + "learning_rate": 4.932133418272967e-07, + "loss": 0.3805, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36349472403526306, + "step": 4025, + "valid_targets_mean": 6369.7, + "valid_targets_min": 786 + }, + { + "epoch": 6.563517915309446, + "grad_norm": 0.6424829443740881, + "learning_rate": 4.75448105998646e-07, + "loss": 0.309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27156513929367065, + "step": 4030, + "valid_targets_mean": 3449.8, + "valid_targets_min": 849 + }, + { + "epoch": 6.571661237785016, + "grad_norm": 0.6277656765376219, + "learning_rate": 4.5800486381590847e-07, + "loss": 0.3506, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3389224112033844, + "step": 4035, + "valid_targets_mean": 3598.0, + "valid_targets_min": 814 + }, + { + "epoch": 6.579804560260587, + "grad_norm": 0.7155044739370692, + "learning_rate": 4.4088390294833473e-07, + "loss": 0.3367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24517980217933655, + "step": 4040, + "valid_targets_mean": 2185.8, + "valid_targets_min": 831 + }, + { + "epoch": 6.587947882736156, + "grad_norm": 0.5670772848607728, + "learning_rate": 4.2408550575018916e-07, + "loss": 0.378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3753878176212311, + "step": 4045, + "valid_targets_mean": 4285.1, + "valid_targets_min": 900 + }, + { + "epoch": 6.596091205211726, + "grad_norm": 0.7083158155709571, + "learning_rate": 4.076099492561159e-07, + "loss": 0.3545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24855034053325653, + "step": 4050, + "valid_targets_mean": 2208.5, + "valid_targets_min": 713 + }, + { + "epoch": 6.6042345276872965, + "grad_norm": 0.6002944768019384, + "learning_rate": 3.914575051765468e-07, + "loss": 0.387, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3668072819709778, + "step": 4055, + "valid_targets_mean": 3880.9, + "valid_targets_min": 839 + }, + { + "epoch": 6.612377850162867, + "grad_norm": 0.5336818453173313, + "learning_rate": 3.756284398932386e-07, + "loss": 0.3431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37711822986602783, + "step": 4060, + "valid_targets_mean": 5389.2, + "valid_targets_min": 973 + }, + { + "epoch": 6.620521172638437, + "grad_norm": 0.9542773287178665, + "learning_rate": 3.601230144548629e-07, + "loss": 0.351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2880703806877136, + "step": 4065, + "valid_targets_mean": 3215.4, + "valid_targets_min": 884 + }, + { + "epoch": 6.628664495114006, + "grad_norm": 0.6850410587721556, + "learning_rate": 3.4494148457271836e-07, + "loss": 0.322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3143194317817688, + "step": 4070, + "valid_targets_mean": 2748.0, + "valid_targets_min": 934 + }, + { + "epoch": 6.636807817589577, + "grad_norm": 0.49452270488140615, + "learning_rate": 3.300841006165056e-07, + "loss": 0.3424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32955706119537354, + "step": 4075, + "valid_targets_mean": 5227.8, + "valid_targets_min": 736 + }, + { + "epoch": 6.644951140065147, + "grad_norm": 0.6015102563466416, + "learning_rate": 3.1555110761019024e-07, + "loss": 0.3275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2955281436443329, + "step": 4080, + "valid_targets_mean": 3714.2, + "valid_targets_min": 918 + }, + { + "epoch": 6.653094462540716, + "grad_norm": 0.5938232414357245, + "learning_rate": 3.0134274522797937e-07, + "loss": 0.3401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35595929622650146, + "step": 4085, + "valid_targets_mean": 3612.4, + "valid_targets_min": 875 + }, + { + "epoch": 6.6612377850162865, + "grad_norm": 0.5315760509874679, + "learning_rate": 2.8745924779034927e-07, + "loss": 0.3638, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.444208562374115, + "step": 4090, + "valid_targets_mean": 5404.6, + "valid_targets_min": 849 + }, + { + "epoch": 6.669381107491857, + "grad_norm": 0.7288969927496485, + "learning_rate": 2.7390084426020426e-07, + "loss": 0.3255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27820539474487305, + "step": 4095, + "valid_targets_mean": 2200.1, + "valid_targets_min": 812 + }, + { + "epoch": 6.677524429967427, + "grad_norm": 0.5320163440030201, + "learning_rate": 2.6066775823907043e-07, + "loss": 0.3129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28354156017303467, + "step": 4100, + "valid_targets_mean": 4260.7, + "valid_targets_min": 783 + }, + { + "epoch": 6.685667752442996, + "grad_norm": 0.5463575189816313, + "learning_rate": 2.4776020796344333e-07, + "loss": 0.3766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40800559520721436, + "step": 4105, + "valid_targets_mean": 4731.5, + "valid_targets_min": 866 + }, + { + "epoch": 6.693811074918567, + "grad_norm": 0.7151680301504658, + "learning_rate": 2.3517840630115752e-07, + "loss": 0.3211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21862143278121948, + "step": 4110, + "valid_targets_mean": 2087.1, + "valid_targets_min": 793 + }, + { + "epoch": 6.701954397394137, + "grad_norm": 0.6227629387034008, + "learning_rate": 2.2292256074789574e-07, + "loss": 0.3193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33563071489334106, + "step": 4115, + "valid_targets_mean": 3418.9, + "valid_targets_min": 732 + }, + { + "epoch": 6.710097719869707, + "grad_norm": 0.44511039888221593, + "learning_rate": 2.109928734237565e-07, + "loss": 0.3739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44039425253868103, + "step": 4120, + "valid_targets_mean": 7263.7, + "valid_targets_min": 747 + }, + { + "epoch": 6.7182410423452765, + "grad_norm": 0.5561821980620066, + "learning_rate": 1.9938954106993204e-07, + "loss": 0.3348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34715205430984497, + "step": 4125, + "valid_targets_mean": 4391.3, + "valid_targets_min": 757 + }, + { + "epoch": 6.726384364820847, + "grad_norm": 0.8124873761514199, + "learning_rate": 1.8811275504544424e-07, + "loss": 0.3358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3954392671585083, + "step": 4130, + "valid_targets_mean": 4121.2, + "valid_targets_min": 662 + }, + { + "epoch": 6.734527687296417, + "grad_norm": 0.7197962437958496, + "learning_rate": 1.7716270132401182e-07, + "loss": 0.3585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29642653465270996, + "step": 4135, + "valid_targets_mean": 2342.0, + "valid_targets_min": 869 + }, + { + "epoch": 6.742671009771987, + "grad_norm": 0.6087826637086723, + "learning_rate": 1.665395604909681e-07, + "loss": 0.3361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4003857374191284, + "step": 4140, + "valid_targets_mean": 4047.8, + "valid_targets_min": 857 + }, + { + "epoch": 6.750814332247557, + "grad_norm": 0.572017278460819, + "learning_rate": 1.5624350774028797e-07, + "loss": 0.3119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24375978112220764, + "step": 4145, + "valid_targets_mean": 3520.5, + "valid_targets_min": 985 + }, + { + "epoch": 6.758957654723127, + "grad_norm": 0.6513575541176351, + "learning_rate": 1.4627471287169682e-07, + "loss": 0.3321, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3124695420265198, + "step": 4150, + "valid_targets_mean": 2907.6, + "valid_targets_min": 966 + }, + { + "epoch": 6.767100977198697, + "grad_norm": 0.472000163252002, + "learning_rate": 1.3663334028787057e-07, + "loss": 0.3627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39135321974754333, + "step": 4155, + "valid_targets_mean": 6093.2, + "valid_targets_min": 721 + }, + { + "epoch": 6.7752442996742674, + "grad_norm": 0.48098100164413315, + "learning_rate": 1.2731954899172894e-07, + "loss": 0.3423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3841916620731354, + "step": 4160, + "valid_targets_mean": 5802.6, + "valid_targets_min": 1181 + }, + { + "epoch": 6.783387622149837, + "grad_norm": 0.7764340983828756, + "learning_rate": 1.1833349258380423e-07, + "loss": 0.3189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2061777263879776, + "step": 4165, + "valid_targets_mean": 1799.6, + "valid_targets_min": 747 + }, + { + "epoch": 6.791530944625407, + "grad_norm": 0.40608448299991085, + "learning_rate": 1.0967531925972108e-07, + "loss": 0.3582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34125253558158875, + "step": 4170, + "valid_targets_mean": 8026.2, + "valid_targets_min": 945 + }, + { + "epoch": 6.799674267100977, + "grad_norm": 0.4753194339073461, + "learning_rate": 1.0134517180773407e-07, + "loss": 0.3275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.355404794216156, + "step": 4175, + "valid_targets_mean": 6141.6, + "valid_targets_min": 778 + }, + { + "epoch": 6.807817589576548, + "grad_norm": 0.5282490961476108, + "learning_rate": 9.334318760639394e-08, + "loss": 0.3322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34441474080085754, + "step": 4180, + "valid_targets_mean": 4676.9, + "valid_targets_min": 937 + }, + { + "epoch": 6.815960912052117, + "grad_norm": 0.6111023667311495, + "learning_rate": 8.566949862226282e-08, + "loss": 0.3599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3501906991004944, + "step": 4185, + "valid_targets_mean": 4139.8, + "valid_targets_min": 857 + }, + { + "epoch": 6.824104234527687, + "grad_norm": 0.7812310076742701, + "learning_rate": 7.832423140775147e-08, + "loss": 0.3388, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2835553288459778, + "step": 4190, + "valid_targets_mean": 1900.2, + "valid_targets_min": 757 + }, + { + "epoch": 6.8322475570032575, + "grad_norm": 0.5800371737709853, + "learning_rate": 7.13075070990299e-08, + "loss": 0.3665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4668848216533661, + "step": 4195, + "valid_targets_mean": 4460.9, + "valid_targets_min": 953 + }, + { + "epoch": 6.840390879478828, + "grad_norm": 0.5618895931680352, + "learning_rate": 6.461944141401777e-08, + "loss": 0.34, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3719158470630646, + "step": 4200, + "valid_targets_mean": 4110.5, + "valid_targets_min": 880 + }, + { + "epoch": 6.848534201954397, + "grad_norm": 0.5303751657439842, + "learning_rate": 5.82601446504949e-08, + "loss": 0.3842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.428359717130661, + "step": 4205, + "valid_targets_mean": 5392.2, + "valid_targets_min": 735 + }, + { + "epoch": 6.856677524429967, + "grad_norm": 0.5313665963363677, + "learning_rate": 5.222972168426932e-08, + "loss": 0.3836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3794805407524109, + "step": 4210, + "valid_targets_mean": 4938.8, + "valid_targets_min": 700 + }, + { + "epoch": 6.864820846905538, + "grad_norm": 0.6450728395561811, + "learning_rate": 4.65282719674498e-08, + "loss": 0.3788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37538132071495056, + "step": 4215, + "valid_targets_mean": 3366.8, + "valid_targets_min": 837 + }, + { + "epoch": 6.872964169381108, + "grad_norm": 0.7146659949055134, + "learning_rate": 4.115588952680716e-08, + "loss": 0.3075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28741201758384705, + "step": 4220, + "valid_targets_mean": 2594.6, + "valid_targets_min": 945 + }, + { + "epoch": 6.881107491856677, + "grad_norm": 0.6648181478805534, + "learning_rate": 3.6112662962226594e-08, + "loss": 0.3281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3017386198043823, + "step": 4225, + "valid_targets_mean": 3756.8, + "valid_targets_min": 884 + }, + { + "epoch": 6.8892508143322475, + "grad_norm": 0.5272738659579296, + "learning_rate": 3.139867544523778e-08, + "loss": 0.3753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4799354076385498, + "step": 4230, + "valid_targets_mean": 5594.6, + "valid_targets_min": 816 + }, + { + "epoch": 6.897394136807818, + "grad_norm": 0.6714580811715728, + "learning_rate": 2.7014004717653698e-08, + "loss": 0.3384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37778422236442566, + "step": 4235, + "valid_targets_mean": 3835.7, + "valid_targets_min": 715 + }, + { + "epoch": 6.905537459283387, + "grad_norm": 0.6044305366151768, + "learning_rate": 2.29587230902828e-08, + "loss": 0.3658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3965587317943573, + "step": 4240, + "valid_targets_mean": 4455.4, + "valid_targets_min": 854 + }, + { + "epoch": 6.913680781758957, + "grad_norm": 0.5351092338130102, + "learning_rate": 1.92328974417344e-08, + "loss": 0.3286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36077630519866943, + "step": 4245, + "valid_targets_mean": 4522.1, + "valid_targets_min": 998 + }, + { + "epoch": 6.921824104234528, + "grad_norm": 0.5797353029530031, + "learning_rate": 1.5836589217321784e-08, + "loss": 0.3174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39402931928634644, + "step": 4250, + "valid_targets_mean": 4744.8, + "valid_targets_min": 798 + }, + { + "epoch": 6.929967426710098, + "grad_norm": 0.6227919964971375, + "learning_rate": 1.2769854428043015e-08, + "loss": 0.364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28821349143981934, + "step": 4255, + "valid_targets_mean": 2795.4, + "valid_targets_min": 862 + }, + { + "epoch": 6.938110749185668, + "grad_norm": 0.4394604157565578, + "learning_rate": 1.003274364966389e-08, + "loss": 0.3529, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3350231647491455, + "step": 4260, + "valid_targets_mean": 7206.3, + "valid_targets_min": 1016 + }, + { + "epoch": 6.9462540716612375, + "grad_norm": 0.5960272339515396, + "learning_rate": 7.62530202187417e-09, + "loss": 0.3909, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.342350572347641, + "step": 4265, + "valid_targets_mean": 3631.2, + "valid_targets_min": 885 + }, + { + "epoch": 6.954397394136808, + "grad_norm": 0.569060494530746, + "learning_rate": 5.5475692475570605e-09, + "loss": 0.3573, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3874856233596802, + "step": 4270, + "valid_targets_mean": 4406.1, + "valid_targets_min": 862 + }, + { + "epoch": 6.962540716612378, + "grad_norm": 0.5273832195562178, + "learning_rate": 3.799579592116409e-09, + "loss": 0.3535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3336706757545471, + "step": 4275, + "valid_targets_mean": 4709.2, + "valid_targets_min": 918 + }, + { + "epoch": 6.970684039087947, + "grad_norm": 0.5640070865974828, + "learning_rate": 2.381361882928257e-09, + "loss": 0.3481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44218719005584717, + "step": 4280, + "valid_targets_mean": 4784.6, + "valid_targets_min": 720 + }, + { + "epoch": 6.978827361563518, + "grad_norm": 0.4176130409602581, + "learning_rate": 1.292939508856783e-09, + "loss": 0.3108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3313027620315552, + "step": 4285, + "valid_targets_mean": 7068.1, + "valid_targets_min": 908 + }, + { + "epoch": 6.986970684039088, + "grad_norm": 0.600473050700247, + "learning_rate": 5.34330419870166e-10, + "loss": 0.3364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4015370309352875, + "step": 4290, + "valid_targets_mean": 4300.8, + "valid_targets_min": 767 + }, + { + "epoch": 6.995114006514658, + "grad_norm": 0.6286218444431447, + "learning_rate": 1.0554712674526458e-10, + "loss": 0.3586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3334238529205322, + "step": 4295, + "valid_targets_mean": 3764.9, + "valid_targets_min": 904 + }, + { + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3913819491863251, + "step": 4298, + "total_flos": 803372905594880.0, + "train_loss": 0.3917123807092332, + "train_runtime": 26436.6082, + "train_samples_per_second": 2.6, + "train_steps_per_second": 0.163, + "valid_targets_mean": 3851.4, + "valid_targets_min": 823 + } + ], + "logging_steps": 5, + "max_steps": 4298, + "num_input_tokens_seen": 0, + "num_train_epochs": 7, + "save_steps": 1500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 803372905594880.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}