diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,11388 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 5159, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0067842605156037995, + "grad_norm": 11.89448977442727, + "learning_rate": 3.1007751937984497e-07, + "loss": 0.7467, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.753227710723877, + "step": 5, + "valid_targets_mean": 2444.9, + "valid_targets_min": 744 + }, + { + "epoch": 0.013568521031207599, + "grad_norm": 11.235151503546374, + "learning_rate": 6.976744186046513e-07, + "loss": 0.7355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.7996399998664856, + "step": 10, + "valid_targets_mean": 2697.8, + "valid_targets_min": 1231 + }, + { + "epoch": 0.0203527815468114, + "grad_norm": 10.797290846276908, + "learning_rate": 1.0852713178294575e-06, + "loss": 0.7424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.7481732368469238, + "step": 15, + "valid_targets_mean": 2544.7, + "valid_targets_min": 794 + }, + { + "epoch": 0.027137042062415198, + "grad_norm": 8.962671209264652, + "learning_rate": 1.4728682170542638e-06, + "loss": 0.721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.750222384929657, + "step": 20, + "valid_targets_mean": 2245.1, + "valid_targets_min": 869 + }, + { + "epoch": 0.033921302578018994, + "grad_norm": 5.830676916492257, + "learning_rate": 1.86046511627907e-06, + "loss": 0.7025, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6887879371643066, + "step": 25, + "valid_targets_mean": 3070.0, + "valid_targets_min": 777 + }, + { + "epoch": 0.0407055630936228, + "grad_norm": 4.8564311609396285, + "learning_rate": 2.2480620155038763e-06, + "loss": 0.6958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6251484155654907, + "step": 30, + "valid_targets_mean": 2736.8, + "valid_targets_min": 721 + }, + { + "epoch": 0.04748982360922659, + "grad_norm": 3.646908378962616, + "learning_rate": 2.635658914728683e-06, + "loss": 0.6536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6334314942359924, + "step": 35, + "valid_targets_mean": 2762.1, + "valid_targets_min": 1005 + }, + { + "epoch": 0.054274084124830396, + "grad_norm": 2.9965745618812183, + "learning_rate": 3.0232558139534885e-06, + "loss": 0.612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6031469106674194, + "step": 40, + "valid_targets_mean": 2219.0, + "valid_targets_min": 584 + }, + { + "epoch": 0.06105834464043419, + "grad_norm": 2.746946360828467, + "learning_rate": 3.4108527131782946e-06, + "loss": 0.5748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5974681973457336, + "step": 45, + "valid_targets_mean": 2428.1, + "valid_targets_min": 819 + }, + { + "epoch": 0.06784260515603799, + "grad_norm": 1.5002667732002524, + "learning_rate": 3.798449612403101e-06, + "loss": 0.5401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.528899073600769, + "step": 50, + "valid_targets_mean": 2882.4, + "valid_targets_min": 924 + }, + { + "epoch": 0.07462686567164178, + "grad_norm": 1.1607016531427559, + "learning_rate": 4.186046511627907e-06, + "loss": 0.5035, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5087602734565735, + "step": 55, + "valid_targets_mean": 2938.0, + "valid_targets_min": 1086 + }, + { + "epoch": 0.0814111261872456, + "grad_norm": 0.9535068201891985, + "learning_rate": 4.573643410852713e-06, + "loss": 0.4885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4569432735443115, + "step": 60, + "valid_targets_mean": 2973.9, + "valid_targets_min": 627 + }, + { + "epoch": 0.08819538670284939, + "grad_norm": 1.1135265052091592, + "learning_rate": 4.9612403100775195e-06, + "loss": 0.5046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5376325845718384, + "step": 65, + "valid_targets_mean": 2420.6, + "valid_targets_min": 876 + }, + { + "epoch": 0.09497964721845319, + "grad_norm": 0.9604258885113336, + "learning_rate": 5.348837209302326e-06, + "loss": 0.4622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44064652919769287, + "step": 70, + "valid_targets_mean": 2443.4, + "valid_targets_min": 904 + }, + { + "epoch": 0.10176390773405698, + "grad_norm": 0.7991192270229877, + "learning_rate": 5.736434108527133e-06, + "loss": 0.4649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4826815724372864, + "step": 75, + "valid_targets_mean": 3380.5, + "valid_targets_min": 908 + }, + { + "epoch": 0.10854816824966079, + "grad_norm": 0.769158116469445, + "learning_rate": 6.124031007751938e-06, + "loss": 0.4427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44103240966796875, + "step": 80, + "valid_targets_mean": 3035.8, + "valid_targets_min": 1083 + }, + { + "epoch": 0.11533242876526459, + "grad_norm": 0.7875257411449904, + "learning_rate": 6.511627906976745e-06, + "loss": 0.4481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4029495120048523, + "step": 85, + "valid_targets_mean": 2706.4, + "valid_targets_min": 854 + }, + { + "epoch": 0.12211668928086838, + "grad_norm": 0.8473139007291898, + "learning_rate": 6.899224806201551e-06, + "loss": 0.4384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.45044875144958496, + "step": 90, + "valid_targets_mean": 2473.2, + "valid_targets_min": 777 + }, + { + "epoch": 0.12890094979647218, + "grad_norm": 0.7757846680357573, + "learning_rate": 7.286821705426357e-06, + "loss": 0.4459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4469519853591919, + "step": 95, + "valid_targets_mean": 2856.1, + "valid_targets_min": 819 + }, + { + "epoch": 0.13568521031207598, + "grad_norm": 0.8491234814195904, + "learning_rate": 7.674418604651164e-06, + "loss": 0.4352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.439635306596756, + "step": 100, + "valid_targets_mean": 2584.4, + "valid_targets_min": 998 + }, + { + "epoch": 0.14246947082767977, + "grad_norm": 0.7130925742464007, + "learning_rate": 8.06201550387597e-06, + "loss": 0.4203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4034441113471985, + "step": 105, + "valid_targets_mean": 3170.1, + "valid_targets_min": 719 + }, + { + "epoch": 0.14925373134328357, + "grad_norm": 0.8093168817770919, + "learning_rate": 8.449612403100775e-06, + "loss": 0.4547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4224606156349182, + "step": 110, + "valid_targets_mean": 2654.3, + "valid_targets_min": 740 + }, + { + "epoch": 0.1560379918588874, + "grad_norm": 0.8309738324960866, + "learning_rate": 8.837209302325582e-06, + "loss": 0.4215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43095940351486206, + "step": 115, + "valid_targets_mean": 2718.1, + "valid_targets_min": 918 + }, + { + "epoch": 0.1628222523744912, + "grad_norm": 0.793319858169495, + "learning_rate": 9.224806201550389e-06, + "loss": 0.4186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43605878949165344, + "step": 120, + "valid_targets_mean": 2385.8, + "valid_targets_min": 571 + }, + { + "epoch": 0.16960651289009498, + "grad_norm": 1.0230399253622091, + "learning_rate": 9.612403100775196e-06, + "loss": 0.4211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3982701897621155, + "step": 125, + "valid_targets_mean": 1617.4, + "valid_targets_min": 683 + }, + { + "epoch": 0.17639077340569878, + "grad_norm": 0.8209896026771017, + "learning_rate": 1e-05, + "loss": 0.4032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4078044891357422, + "step": 130, + "valid_targets_mean": 2368.6, + "valid_targets_min": 700 + }, + { + "epoch": 0.18317503392130258, + "grad_norm": 0.7095946770161736, + "learning_rate": 1.0387596899224808e-05, + "loss": 0.414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3884209394454956, + "step": 135, + "valid_targets_mean": 2926.9, + "valid_targets_min": 882 + }, + { + "epoch": 0.18995929443690637, + "grad_norm": 0.6958216140407364, + "learning_rate": 1.0775193798449613e-05, + "loss": 0.4256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37612026929855347, + "step": 140, + "valid_targets_mean": 3520.8, + "valid_targets_min": 1095 + }, + { + "epoch": 0.19674355495251017, + "grad_norm": 0.8291040584046291, + "learning_rate": 1.116279069767442e-05, + "loss": 0.4085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4283609986305237, + "step": 145, + "valid_targets_mean": 2811.3, + "valid_targets_min": 1326 + }, + { + "epoch": 0.20352781546811397, + "grad_norm": 0.8093278558733981, + "learning_rate": 1.1550387596899227e-05, + "loss": 0.4014, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4302545189857483, + "step": 150, + "valid_targets_mean": 2660.5, + "valid_targets_min": 874 + }, + { + "epoch": 0.21031207598371776, + "grad_norm": 0.7238386266854082, + "learning_rate": 1.193798449612403e-05, + "loss": 0.4173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37872761487960815, + "step": 155, + "valid_targets_mean": 2928.2, + "valid_targets_min": 1407 + }, + { + "epoch": 0.21709633649932158, + "grad_norm": 0.7853375198765415, + "learning_rate": 1.2325581395348838e-05, + "loss": 0.4018, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40379324555397034, + "step": 160, + "valid_targets_mean": 2898.9, + "valid_targets_min": 1279 + }, + { + "epoch": 0.22388059701492538, + "grad_norm": 0.7686517468969368, + "learning_rate": 1.2713178294573645e-05, + "loss": 0.4011, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4392387270927429, + "step": 165, + "valid_targets_mean": 2946.6, + "valid_targets_min": 597 + }, + { + "epoch": 0.23066485753052918, + "grad_norm": 0.7993134466572939, + "learning_rate": 1.3100775193798451e-05, + "loss": 0.3991, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41066741943359375, + "step": 170, + "valid_targets_mean": 2813.2, + "valid_targets_min": 1002 + }, + { + "epoch": 0.23744911804613297, + "grad_norm": 0.6677659446481017, + "learning_rate": 1.3488372093023257e-05, + "loss": 0.3902, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3710253834724426, + "step": 175, + "valid_targets_mean": 3316.8, + "valid_targets_min": 1334 + }, + { + "epoch": 0.24423337856173677, + "grad_norm": 0.7909997170086058, + "learning_rate": 1.3875968992248064e-05, + "loss": 0.3856, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41473937034606934, + "step": 180, + "valid_targets_mean": 2416.4, + "valid_targets_min": 715 + }, + { + "epoch": 0.2510176390773406, + "grad_norm": 0.8792540068344556, + "learning_rate": 1.426356589147287e-05, + "loss": 0.3846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3967583477497101, + "step": 185, + "valid_targets_mean": 2736.9, + "valid_targets_min": 640 + }, + { + "epoch": 0.25780189959294436, + "grad_norm": 0.7958264159408515, + "learning_rate": 1.4651162790697674e-05, + "loss": 0.4017, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40616077184677124, + "step": 190, + "valid_targets_mean": 2568.8, + "valid_targets_min": 623 + }, + { + "epoch": 0.2645861601085482, + "grad_norm": 0.7604022300459651, + "learning_rate": 1.5038759689922481e-05, + "loss": 0.3754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33513033390045166, + "step": 195, + "valid_targets_mean": 2750.9, + "valid_targets_min": 791 + }, + { + "epoch": 0.27137042062415195, + "grad_norm": 0.8244610679195269, + "learning_rate": 1.542635658914729e-05, + "loss": 0.3939, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38060298562049866, + "step": 200, + "valid_targets_mean": 2832.4, + "valid_targets_min": 1334 + }, + { + "epoch": 0.2781546811397558, + "grad_norm": 0.729183257219207, + "learning_rate": 1.5813953488372095e-05, + "loss": 0.3801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36861342191696167, + "step": 205, + "valid_targets_mean": 2902.9, + "valid_targets_min": 1030 + }, + { + "epoch": 0.28493894165535955, + "grad_norm": 0.8051272259035377, + "learning_rate": 1.62015503875969e-05, + "loss": 0.391, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4003250300884247, + "step": 210, + "valid_targets_mean": 2374.9, + "valid_targets_min": 572 + }, + { + "epoch": 0.29172320217096337, + "grad_norm": 0.7887590950471755, + "learning_rate": 1.6589147286821706e-05, + "loss": 0.3754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3678756356239319, + "step": 215, + "valid_targets_mean": 3166.3, + "valid_targets_min": 1348 + }, + { + "epoch": 0.29850746268656714, + "grad_norm": 0.8029733763158426, + "learning_rate": 1.697674418604651e-05, + "loss": 0.3681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3723628520965576, + "step": 220, + "valid_targets_mean": 2474.6, + "valid_targets_min": 518 + }, + { + "epoch": 0.30529172320217096, + "grad_norm": 0.7343226056865998, + "learning_rate": 1.736434108527132e-05, + "loss": 0.3632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38321518898010254, + "step": 225, + "valid_targets_mean": 3262.1, + "valid_targets_min": 1123 + }, + { + "epoch": 0.3120759837177748, + "grad_norm": 0.7607257346962287, + "learning_rate": 1.7751937984496125e-05, + "loss": 0.3715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34952956438064575, + "step": 230, + "valid_targets_mean": 2810.3, + "valid_targets_min": 1113 + }, + { + "epoch": 0.31886024423337855, + "grad_norm": 0.8227068076920964, + "learning_rate": 1.813953488372093e-05, + "loss": 0.3819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3929305970668793, + "step": 235, + "valid_targets_mean": 2834.8, + "valid_targets_min": 1701 + }, + { + "epoch": 0.3256445047489824, + "grad_norm": 0.6827841124705409, + "learning_rate": 1.852713178294574e-05, + "loss": 0.3618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3641515374183655, + "step": 240, + "valid_targets_mean": 3186.5, + "valid_targets_min": 898 + }, + { + "epoch": 0.33242876526458615, + "grad_norm": 0.7602966954331676, + "learning_rate": 1.8914728682170544e-05, + "loss": 0.3751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3500159978866577, + "step": 245, + "valid_targets_mean": 2656.6, + "valid_targets_min": 618 + }, + { + "epoch": 0.33921302578018997, + "grad_norm": 0.7668023931379694, + "learning_rate": 1.9302325581395353e-05, + "loss": 0.3739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4043837785720825, + "step": 250, + "valid_targets_mean": 3122.2, + "valid_targets_min": 1390 + }, + { + "epoch": 0.34599728629579374, + "grad_norm": 0.7093670546464718, + "learning_rate": 1.9689922480620155e-05, + "loss": 0.3778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38684576749801636, + "step": 255, + "valid_targets_mean": 3311.2, + "valid_targets_min": 869 + }, + { + "epoch": 0.35278154681139756, + "grad_norm": 0.6761466382547776, + "learning_rate": 2.0077519379844963e-05, + "loss": 0.3692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34655070304870605, + "step": 260, + "valid_targets_mean": 3112.8, + "valid_targets_min": 754 + }, + { + "epoch": 0.35956580732700133, + "grad_norm": 0.6694834479453904, + "learning_rate": 2.046511627906977e-05, + "loss": 0.356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3410634398460388, + "step": 265, + "valid_targets_mean": 3315.6, + "valid_targets_min": 1369 + }, + { + "epoch": 0.36635006784260515, + "grad_norm": 0.8155424962452198, + "learning_rate": 2.0852713178294577e-05, + "loss": 0.3685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35310614109039307, + "step": 270, + "valid_targets_mean": 2339.4, + "valid_targets_min": 614 + }, + { + "epoch": 0.373134328358209, + "grad_norm": 0.9070653857602943, + "learning_rate": 2.1240310077519383e-05, + "loss": 0.3449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3375753164291382, + "step": 275, + "valid_targets_mean": 3212.9, + "valid_targets_min": 746 + }, + { + "epoch": 0.37991858887381275, + "grad_norm": 0.7447980160423101, + "learning_rate": 2.1627906976744188e-05, + "loss": 0.3549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3524802327156067, + "step": 280, + "valid_targets_mean": 2781.6, + "valid_targets_min": 1074 + }, + { + "epoch": 0.38670284938941657, + "grad_norm": 0.910428084386681, + "learning_rate": 2.2015503875968993e-05, + "loss": 0.3546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34246036410331726, + "step": 285, + "valid_targets_mean": 2999.9, + "valid_targets_min": 656 + }, + { + "epoch": 0.39348710990502034, + "grad_norm": 0.6635427507067181, + "learning_rate": 2.2403100775193802e-05, + "loss": 0.3692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35911792516708374, + "step": 290, + "valid_targets_mean": 3345.4, + "valid_targets_min": 882 + }, + { + "epoch": 0.40027137042062416, + "grad_norm": 0.8108891809655495, + "learning_rate": 2.2790697674418607e-05, + "loss": 0.3676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36157429218292236, + "step": 295, + "valid_targets_mean": 2752.9, + "valid_targets_min": 776 + }, + { + "epoch": 0.40705563093622793, + "grad_norm": 0.7092963268541641, + "learning_rate": 2.3178294573643412e-05, + "loss": 0.357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3586769700050354, + "step": 300, + "valid_targets_mean": 3229.6, + "valid_targets_min": 644 + }, + { + "epoch": 0.41383989145183175, + "grad_norm": 0.8313919502710964, + "learning_rate": 2.356589147286822e-05, + "loss": 0.369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40778008103370667, + "step": 305, + "valid_targets_mean": 3072.8, + "valid_targets_min": 714 + }, + { + "epoch": 0.4206241519674355, + "grad_norm": 0.8521822829383703, + "learning_rate": 2.3953488372093023e-05, + "loss": 0.3571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38653799891471863, + "step": 310, + "valid_targets_mean": 2318.4, + "valid_targets_min": 882 + }, + { + "epoch": 0.42740841248303935, + "grad_norm": 0.7709743279914623, + "learning_rate": 2.434108527131783e-05, + "loss": 0.3515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36175817251205444, + "step": 315, + "valid_targets_mean": 2692.5, + "valid_targets_min": 783 + }, + { + "epoch": 0.43419267299864317, + "grad_norm": 0.7913741131309853, + "learning_rate": 2.4728682170542637e-05, + "loss": 0.3564, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34055837988853455, + "step": 320, + "valid_targets_mean": 2549.1, + "valid_targets_min": 700 + }, + { + "epoch": 0.44097693351424694, + "grad_norm": 0.7251245992538848, + "learning_rate": 2.5116279069767445e-05, + "loss": 0.3424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3071543574333191, + "step": 325, + "valid_targets_mean": 2746.8, + "valid_targets_min": 1022 + }, + { + "epoch": 0.44776119402985076, + "grad_norm": 0.877109135791174, + "learning_rate": 2.550387596899225e-05, + "loss": 0.3515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3644331395626068, + "step": 330, + "valid_targets_mean": 2258.4, + "valid_targets_min": 731 + }, + { + "epoch": 0.45454545454545453, + "grad_norm": 0.7463994697165139, + "learning_rate": 2.589147286821706e-05, + "loss": 0.3452, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3452218770980835, + "step": 335, + "valid_targets_mean": 2618.1, + "valid_targets_min": 1032 + }, + { + "epoch": 0.46132971506105835, + "grad_norm": 0.826153332286381, + "learning_rate": 2.627906976744186e-05, + "loss": 0.3557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3592166006565094, + "step": 340, + "valid_targets_mean": 2649.8, + "valid_targets_min": 648 + }, + { + "epoch": 0.4681139755766621, + "grad_norm": 0.7556466009720993, + "learning_rate": 2.6666666666666667e-05, + "loss": 0.3796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4219163656234741, + "step": 345, + "valid_targets_mean": 2855.0, + "valid_targets_min": 721 + }, + { + "epoch": 0.47489823609226595, + "grad_norm": 0.7276710198296943, + "learning_rate": 2.7054263565891475e-05, + "loss": 0.3541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.333509236574173, + "step": 350, + "valid_targets_mean": 2856.6, + "valid_targets_min": 754 + }, + { + "epoch": 0.4816824966078697, + "grad_norm": 0.8103248615360679, + "learning_rate": 2.744186046511628e-05, + "loss": 0.3502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3477519452571869, + "step": 355, + "valid_targets_mean": 2232.4, + "valid_targets_min": 712 + }, + { + "epoch": 0.48846675712347354, + "grad_norm": 0.9073845669791214, + "learning_rate": 2.782945736434109e-05, + "loss": 0.3475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36534401774406433, + "step": 360, + "valid_targets_mean": 2373.4, + "valid_targets_min": 1054 + }, + { + "epoch": 0.49525101763907736, + "grad_norm": 0.8050927376280175, + "learning_rate": 2.8217054263565894e-05, + "loss": 0.3677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3501133620738983, + "step": 365, + "valid_targets_mean": 2434.8, + "valid_targets_min": 694 + }, + { + "epoch": 0.5020352781546812, + "grad_norm": 0.7660020717643617, + "learning_rate": 2.8604651162790703e-05, + "loss": 0.3492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3313218355178833, + "step": 370, + "valid_targets_mean": 2836.5, + "valid_targets_min": 1353 + }, + { + "epoch": 0.508819538670285, + "grad_norm": 0.7443344419339243, + "learning_rate": 2.8992248062015505e-05, + "loss": 0.3423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33690541982650757, + "step": 375, + "valid_targets_mean": 2837.2, + "valid_targets_min": 890 + }, + { + "epoch": 0.5156037991858887, + "grad_norm": 0.779004361301433, + "learning_rate": 2.937984496124031e-05, + "loss": 0.3473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34115269780158997, + "step": 380, + "valid_targets_mean": 2826.7, + "valid_targets_min": 662 + }, + { + "epoch": 0.5223880597014925, + "grad_norm": 0.9341133562668817, + "learning_rate": 2.976744186046512e-05, + "loss": 0.3386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3078441023826599, + "step": 385, + "valid_targets_mean": 1897.9, + "valid_targets_min": 537 + }, + { + "epoch": 0.5291723202170964, + "grad_norm": 0.9049226382408442, + "learning_rate": 3.0155038759689924e-05, + "loss": 0.3555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34988129138946533, + "step": 390, + "valid_targets_mean": 3014.6, + "valid_targets_min": 937 + }, + { + "epoch": 0.5359565807327001, + "grad_norm": 0.8125257555203335, + "learning_rate": 3.054263565891473e-05, + "loss": 0.3505, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3929121792316437, + "step": 395, + "valid_targets_mean": 2705.0, + "valid_targets_min": 1179 + }, + { + "epoch": 0.5427408412483039, + "grad_norm": 0.7384330929303173, + "learning_rate": 3.093023255813954e-05, + "loss": 0.324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30182796716690063, + "step": 400, + "valid_targets_mean": 3520.5, + "valid_targets_min": 764 + }, + { + "epoch": 0.5495251017639078, + "grad_norm": 0.9344307339860809, + "learning_rate": 3.1317829457364343e-05, + "loss": 0.3489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37407636642456055, + "step": 405, + "valid_targets_mean": 2585.3, + "valid_targets_min": 1025 + }, + { + "epoch": 0.5563093622795116, + "grad_norm": 0.706765156270403, + "learning_rate": 3.170542635658915e-05, + "loss": 0.3633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3846889138221741, + "step": 410, + "valid_targets_mean": 3670.8, + "valid_targets_min": 1083 + }, + { + "epoch": 0.5630936227951153, + "grad_norm": 0.7091546038528599, + "learning_rate": 3.2093023255813954e-05, + "loss": 0.3388, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2954094111919403, + "step": 415, + "valid_targets_mean": 2686.8, + "valid_targets_min": 933 + }, + { + "epoch": 0.5698778833107191, + "grad_norm": 0.720646680324316, + "learning_rate": 3.248062015503876e-05, + "loss": 0.359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36445194482803345, + "step": 420, + "valid_targets_mean": 3180.9, + "valid_targets_min": 978 + }, + { + "epoch": 0.576662143826323, + "grad_norm": 0.7561273241851728, + "learning_rate": 3.286821705426357e-05, + "loss": 0.3382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.330274760723114, + "step": 425, + "valid_targets_mean": 2636.8, + "valid_targets_min": 816 + }, + { + "epoch": 0.5834464043419267, + "grad_norm": 0.8694755557547249, + "learning_rate": 3.3255813953488377e-05, + "loss": 0.3452, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3319804072380066, + "step": 430, + "valid_targets_mean": 2225.6, + "valid_targets_min": 1286 + }, + { + "epoch": 0.5902306648575305, + "grad_norm": 0.68772821314539, + "learning_rate": 3.364341085271318e-05, + "loss": 0.3435, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33189690113067627, + "step": 435, + "valid_targets_mean": 3022.4, + "valid_targets_min": 837 + }, + { + "epoch": 0.5970149253731343, + "grad_norm": 0.9588382883930207, + "learning_rate": 3.403100775193799e-05, + "loss": 0.3443, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3494105339050293, + "step": 440, + "valid_targets_mean": 2476.9, + "valid_targets_min": 800 + }, + { + "epoch": 0.6037991858887382, + "grad_norm": 0.7838789395466246, + "learning_rate": 3.441860465116279e-05, + "loss": 0.3446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32996422052383423, + "step": 445, + "valid_targets_mean": 2342.1, + "valid_targets_min": 1149 + }, + { + "epoch": 0.6105834464043419, + "grad_norm": 0.7416365052685329, + "learning_rate": 3.48062015503876e-05, + "loss": 0.3324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3227633237838745, + "step": 450, + "valid_targets_mean": 3124.1, + "valid_targets_min": 1365 + }, + { + "epoch": 0.6173677069199457, + "grad_norm": 0.6258235950313648, + "learning_rate": 3.51937984496124e-05, + "loss": 0.3313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29047924280166626, + "step": 455, + "valid_targets_mean": 3342.6, + "valid_targets_min": 878 + }, + { + "epoch": 0.6241519674355496, + "grad_norm": 0.6880630346475627, + "learning_rate": 3.5581395348837215e-05, + "loss": 0.3664, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.352108359336853, + "step": 460, + "valid_targets_mean": 3073.2, + "valid_targets_min": 1075 + }, + { + "epoch": 0.6309362279511533, + "grad_norm": 0.7002916188745265, + "learning_rate": 3.596899224806202e-05, + "loss": 0.3412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33518415689468384, + "step": 465, + "valid_targets_mean": 2779.1, + "valid_targets_min": 1072 + }, + { + "epoch": 0.6377204884667571, + "grad_norm": 0.7760406431300181, + "learning_rate": 3.6356589147286826e-05, + "loss": 0.3503, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32304704189300537, + "step": 470, + "valid_targets_mean": 2418.1, + "valid_targets_min": 1143 + }, + { + "epoch": 0.6445047489823609, + "grad_norm": 0.81128665595444, + "learning_rate": 3.674418604651163e-05, + "loss": 0.3423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35187655687332153, + "step": 475, + "valid_targets_mean": 2206.1, + "valid_targets_min": 849 + }, + { + "epoch": 0.6512890094979648, + "grad_norm": 0.6699530012511279, + "learning_rate": 3.7131782945736436e-05, + "loss": 0.3289, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30837348103523254, + "step": 480, + "valid_targets_mean": 3204.2, + "valid_targets_min": 933 + }, + { + "epoch": 0.6580732700135685, + "grad_norm": 0.8161088245962455, + "learning_rate": 3.751937984496124e-05, + "loss": 0.341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3431815803050995, + "step": 485, + "valid_targets_mean": 2313.1, + "valid_targets_min": 680 + }, + { + "epoch": 0.6648575305291723, + "grad_norm": 0.6636433541876118, + "learning_rate": 3.7906976744186053e-05, + "loss": 0.3527, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3255542516708374, + "step": 490, + "valid_targets_mean": 3133.2, + "valid_targets_min": 702 + }, + { + "epoch": 0.6716417910447762, + "grad_norm": 0.7193982612960406, + "learning_rate": 3.829457364341086e-05, + "loss": 0.3413, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3598499894142151, + "step": 495, + "valid_targets_mean": 2609.6, + "valid_targets_min": 758 + }, + { + "epoch": 0.6784260515603799, + "grad_norm": 0.7893940888720667, + "learning_rate": 3.8682170542635664e-05, + "loss": 0.3461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3467223644256592, + "step": 500, + "valid_targets_mean": 2836.8, + "valid_targets_min": 1106 + }, + { + "epoch": 0.6852103120759837, + "grad_norm": 0.7057772565861374, + "learning_rate": 3.906976744186047e-05, + "loss": 0.3299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3372578024864197, + "step": 505, + "valid_targets_mean": 2930.9, + "valid_targets_min": 654 + }, + { + "epoch": 0.6919945725915875, + "grad_norm": 0.6963494863151088, + "learning_rate": 3.9457364341085275e-05, + "loss": 0.3462, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3436594605445862, + "step": 510, + "valid_targets_mean": 3018.2, + "valid_targets_min": 717 + }, + { + "epoch": 0.6987788331071914, + "grad_norm": 0.7394424529088445, + "learning_rate": 3.984496124031008e-05, + "loss": 0.3466, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3247559666633606, + "step": 515, + "valid_targets_mean": 2679.7, + "valid_targets_min": 728 + }, + { + "epoch": 0.7055630936227951, + "grad_norm": 0.6838375834791657, + "learning_rate": 3.9999958795491475e-05, + "loss": 0.3409, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3312002420425415, + "step": 520, + "valid_targets_mean": 3034.2, + "valid_targets_min": 754 + }, + { + "epoch": 0.7123473541383989, + "grad_norm": 0.6067455993466966, + "learning_rate": 3.999970699077645e-05, + "loss": 0.3146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3216730058193207, + "step": 525, + "valid_targets_mean": 3705.9, + "valid_targets_min": 739 + }, + { + "epoch": 0.7191316146540027, + "grad_norm": 0.6985270320464033, + "learning_rate": 3.9999226275618605e-05, + "loss": 0.3366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33391982316970825, + "step": 530, + "valid_targets_mean": 2748.6, + "valid_targets_min": 869 + }, + { + "epoch": 0.7259158751696065, + "grad_norm": 0.7662978798224946, + "learning_rate": 3.9998516655520055e-05, + "loss": 0.3397, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3417017459869385, + "step": 535, + "valid_targets_mean": 2394.5, + "valid_targets_min": 730 + }, + { + "epoch": 0.7327001356852103, + "grad_norm": 0.669919043363021, + "learning_rate": 3.999757813860289e-05, + "loss": 0.336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31267890334129333, + "step": 540, + "valid_targets_mean": 3009.2, + "valid_targets_min": 1030 + }, + { + "epoch": 0.7394843962008141, + "grad_norm": 0.7274166571678258, + "learning_rate": 3.999641073560909e-05, + "loss": 0.3236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31356334686279297, + "step": 545, + "valid_targets_mean": 2435.3, + "valid_targets_min": 544 + }, + { + "epoch": 0.746268656716418, + "grad_norm": 0.7210784987955194, + "learning_rate": 3.999501445990039e-05, + "loss": 0.3518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.358939528465271, + "step": 550, + "valid_targets_mean": 2823.4, + "valid_targets_min": 814 + }, + { + "epoch": 0.7530529172320217, + "grad_norm": 0.7953791505748733, + "learning_rate": 3.9993389327458125e-05, + "loss": 0.3391, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3532836139202118, + "step": 555, + "valid_targets_mean": 2465.7, + "valid_targets_min": 958 + }, + { + "epoch": 0.7598371777476255, + "grad_norm": 0.8035999831102332, + "learning_rate": 3.9991535356883055e-05, + "loss": 0.3176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3166276812553406, + "step": 560, + "valid_targets_mean": 2324.9, + "valid_targets_min": 698 + }, + { + "epoch": 0.7666214382632293, + "grad_norm": 0.8057221319350136, + "learning_rate": 3.9989452569395154e-05, + "loss": 0.3283, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37396541237831116, + "step": 565, + "valid_targets_mean": 2373.8, + "valid_targets_min": 919 + }, + { + "epoch": 0.7734056987788331, + "grad_norm": 0.8130298785771777, + "learning_rate": 3.9987140988833355e-05, + "loss": 0.3452, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32148557901382446, + "step": 570, + "valid_targets_mean": 2525.6, + "valid_targets_min": 629 + }, + { + "epoch": 0.7801899592944369, + "grad_norm": 0.7048055789019534, + "learning_rate": 3.9984600641655305e-05, + "loss": 0.334, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3230056166648865, + "step": 575, + "valid_targets_mean": 2960.2, + "valid_targets_min": 1018 + }, + { + "epoch": 0.7869742198100407, + "grad_norm": 0.7505005839634938, + "learning_rate": 3.998183155693702e-05, + "loss": 0.3561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36198025941848755, + "step": 580, + "valid_targets_mean": 2640.8, + "valid_targets_min": 720 + }, + { + "epoch": 0.7937584803256446, + "grad_norm": 0.5953296109502596, + "learning_rate": 3.997883376637258e-05, + "loss": 0.3336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35237035155296326, + "step": 585, + "valid_targets_mean": 4056.9, + "valid_targets_min": 1333 + }, + { + "epoch": 0.8005427408412483, + "grad_norm": 0.6965335680749082, + "learning_rate": 3.997560730427379e-05, + "loss": 0.3361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.349098265171051, + "step": 590, + "valid_targets_mean": 2842.4, + "valid_targets_min": 740 + }, + { + "epoch": 0.8073270013568521, + "grad_norm": 0.8417459794727778, + "learning_rate": 3.99721522075697e-05, + "loss": 0.3176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3189961016178131, + "step": 595, + "valid_targets_mean": 2172.4, + "valid_targets_min": 584 + }, + { + "epoch": 0.8141112618724559, + "grad_norm": 0.8232217865179002, + "learning_rate": 3.99684685158063e-05, + "loss": 0.3363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35521090030670166, + "step": 600, + "valid_targets_mean": 3057.4, + "valid_targets_min": 924 + }, + { + "epoch": 0.8208955223880597, + "grad_norm": 0.6415450067556615, + "learning_rate": 3.996455627114598e-05, + "loss": 0.3267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30212873220443726, + "step": 605, + "valid_targets_mean": 3003.2, + "valid_targets_min": 989 + }, + { + "epoch": 0.8276797829036635, + "grad_norm": 0.7030806565946134, + "learning_rate": 3.996041551836707e-05, + "loss": 0.3382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34246373176574707, + "step": 610, + "valid_targets_mean": 2909.0, + "valid_targets_min": 819 + }, + { + "epoch": 0.8344640434192673, + "grad_norm": 0.6504343613445783, + "learning_rate": 3.9956046304863354e-05, + "loss": 0.3362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3366634249687195, + "step": 615, + "valid_targets_mean": 3280.8, + "valid_targets_min": 563 + }, + { + "epoch": 0.841248303934871, + "grad_norm": 0.8323739984409763, + "learning_rate": 3.9951448680643485e-05, + "loss": 0.326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31927552819252014, + "step": 620, + "valid_targets_mean": 2005.1, + "valid_targets_min": 656 + }, + { + "epoch": 0.8480325644504749, + "grad_norm": 0.7434853063494745, + "learning_rate": 3.994662269833044e-05, + "loss": 0.3414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33908501267433167, + "step": 625, + "valid_targets_mean": 2536.9, + "valid_targets_min": 665 + }, + { + "epoch": 0.8548168249660787, + "grad_norm": 0.6740114975827273, + "learning_rate": 3.994156841316093e-05, + "loss": 0.322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3202781677246094, + "step": 630, + "valid_targets_mean": 2874.8, + "valid_targets_min": 801 + }, + { + "epoch": 0.8616010854816825, + "grad_norm": 0.7855713565247058, + "learning_rate": 3.9936285882984723e-05, + "loss": 0.3335, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34169870615005493, + "step": 635, + "valid_targets_mean": 2232.2, + "valid_targets_min": 740 + }, + { + "epoch": 0.8683853459972863, + "grad_norm": 0.6561553473826617, + "learning_rate": 3.993077516826403e-05, + "loss": 0.3374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3322102427482605, + "step": 640, + "valid_targets_mean": 2907.2, + "valid_targets_min": 805 + }, + { + "epoch": 0.8751696065128901, + "grad_norm": 0.7829492620549515, + "learning_rate": 3.9925036332072775e-05, + "loss": 0.3253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32236772775650024, + "step": 645, + "valid_targets_mean": 2319.2, + "valid_targets_min": 981 + }, + { + "epoch": 0.8819538670284939, + "grad_norm": 0.7216603888066516, + "learning_rate": 3.99190694400959e-05, + "loss": 0.3162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35016581416130066, + "step": 650, + "valid_targets_mean": 2640.7, + "valid_targets_min": 828 + }, + { + "epoch": 0.8887381275440976, + "grad_norm": 0.6378166749356016, + "learning_rate": 3.991287456062861e-05, + "loss": 0.3396, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34149160981178284, + "step": 655, + "valid_targets_mean": 3447.8, + "valid_targets_min": 1261 + }, + { + "epoch": 0.8955223880597015, + "grad_norm": 0.7082789982684017, + "learning_rate": 3.9906451764575544e-05, + "loss": 0.3388, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.348561555147171, + "step": 660, + "valid_targets_mean": 2913.9, + "valid_targets_min": 723 + }, + { + "epoch": 0.9023066485753053, + "grad_norm": 0.7353124573695241, + "learning_rate": 3.989980112545007e-05, + "loss": 0.315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3169977068901062, + "step": 665, + "valid_targets_mean": 2372.2, + "valid_targets_min": 893 + }, + { + "epoch": 0.9090909090909091, + "grad_norm": 0.6779717293015843, + "learning_rate": 3.989292271937332e-05, + "loss": 0.3354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3415604531764984, + "step": 670, + "valid_targets_mean": 3224.8, + "valid_targets_min": 1172 + }, + { + "epoch": 0.9158751696065129, + "grad_norm": 0.6802704573331115, + "learning_rate": 3.988581662507339e-05, + "loss": 0.3256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3390202820301056, + "step": 675, + "valid_targets_mean": 2996.1, + "valid_targets_min": 464 + }, + { + "epoch": 0.9226594301221167, + "grad_norm": 0.7137326701115103, + "learning_rate": 3.987848292388445e-05, + "loss": 0.3156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3149383068084717, + "step": 680, + "valid_targets_mean": 2476.1, + "valid_targets_min": 770 + }, + { + "epoch": 0.9294436906377205, + "grad_norm": 0.7976853977114939, + "learning_rate": 3.987092169974576e-05, + "loss": 0.3385, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34196972846984863, + "step": 685, + "valid_targets_mean": 2134.6, + "valid_targets_min": 722 + }, + { + "epoch": 0.9362279511533242, + "grad_norm": 0.7571960144185641, + "learning_rate": 3.986313303920074e-05, + "loss": 0.3058, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31737232208251953, + "step": 690, + "valid_targets_mean": 2642.4, + "valid_targets_min": 1254 + }, + { + "epoch": 0.9430122116689281, + "grad_norm": 0.7686352923550949, + "learning_rate": 3.985511703139599e-05, + "loss": 0.3108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30339232087135315, + "step": 695, + "valid_targets_mean": 2095.3, + "valid_targets_min": 714 + }, + { + "epoch": 0.9497964721845319, + "grad_norm": 0.6986874211530488, + "learning_rate": 3.984687376808025e-05, + "loss": 0.3319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34129583835601807, + "step": 700, + "valid_targets_mean": 2803.9, + "valid_targets_min": 744 + }, + { + "epoch": 0.9565807327001357, + "grad_norm": 0.7610343107451978, + "learning_rate": 3.983840334360336e-05, + "loss": 0.3358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33215272426605225, + "step": 705, + "valid_targets_mean": 2625.3, + "valid_targets_min": 1206 + }, + { + "epoch": 0.9633649932157394, + "grad_norm": 0.6276014942943633, + "learning_rate": 3.982970585491516e-05, + "loss": 0.3281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3128705620765686, + "step": 710, + "valid_targets_mean": 3215.4, + "valid_targets_min": 1391 + }, + { + "epoch": 0.9701492537313433, + "grad_norm": 0.6798894086802646, + "learning_rate": 3.982078140156441e-05, + "loss": 0.3062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2989204525947571, + "step": 715, + "valid_targets_mean": 2666.4, + "valid_targets_min": 593 + }, + { + "epoch": 0.9769335142469471, + "grad_norm": 0.7410349510063327, + "learning_rate": 3.981163008569764e-05, + "loss": 0.3355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31131139397621155, + "step": 720, + "valid_targets_mean": 3063.8, + "valid_targets_min": 612 + }, + { + "epoch": 0.9837177747625508, + "grad_norm": 0.6643176359530586, + "learning_rate": 3.9802252012057974e-05, + "loss": 0.3363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3558084964752197, + "step": 725, + "valid_targets_mean": 3521.2, + "valid_targets_min": 619 + }, + { + "epoch": 0.9905020352781547, + "grad_norm": 0.7267283614828455, + "learning_rate": 3.979264728798392e-05, + "loss": 0.3334, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3494442105293274, + "step": 730, + "valid_targets_mean": 2547.4, + "valid_targets_min": 837 + }, + { + "epoch": 0.9972862957937585, + "grad_norm": 0.6814846127090516, + "learning_rate": 3.9782816023408194e-05, + "loss": 0.3327, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3272936940193176, + "step": 735, + "valid_targets_mean": 2686.4, + "valid_targets_min": 960 + }, + { + "epoch": 1.0040705563093624, + "grad_norm": 0.6861216126300802, + "learning_rate": 3.977275833085638e-05, + "loss": 0.302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31268030405044556, + "step": 740, + "valid_targets_mean": 2643.2, + "valid_targets_min": 776 + }, + { + "epoch": 1.010854816824966, + "grad_norm": 0.6898996438799597, + "learning_rate": 3.976247432544571e-05, + "loss": 0.283, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2911531925201416, + "step": 745, + "valid_targets_mean": 2797.4, + "valid_targets_min": 793 + }, + { + "epoch": 1.01763907734057, + "grad_norm": 0.8254261194413537, + "learning_rate": 3.9751964124883735e-05, + "loss": 0.3147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3192952275276184, + "step": 750, + "valid_targets_mean": 2253.1, + "valid_targets_min": 692 + }, + { + "epoch": 1.0244233378561738, + "grad_norm": 0.7754318957680647, + "learning_rate": 3.974122784946694e-05, + "loss": 0.2937, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27857261896133423, + "step": 755, + "valid_targets_mean": 2432.6, + "valid_targets_min": 767 + }, + { + "epoch": 1.0312075983717774, + "grad_norm": 0.7881514631070836, + "learning_rate": 3.97302656220794e-05, + "loss": 0.3074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.320145845413208, + "step": 760, + "valid_targets_mean": 2445.4, + "valid_targets_min": 748 + }, + { + "epoch": 1.0379918588873813, + "grad_norm": 0.6506473704894072, + "learning_rate": 3.9719077568191376e-05, + "loss": 0.2972, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28449857234954834, + "step": 765, + "valid_targets_mean": 3023.9, + "valid_targets_min": 901 + }, + { + "epoch": 1.044776119402985, + "grad_norm": 0.8042677555990032, + "learning_rate": 3.970766381585786e-05, + "loss": 0.2826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26466116309165955, + "step": 770, + "valid_targets_mean": 2212.1, + "valid_targets_min": 820 + }, + { + "epoch": 1.0515603799185889, + "grad_norm": 0.676421887365203, + "learning_rate": 3.969602449571712e-05, + "loss": 0.2962, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29949331283569336, + "step": 775, + "valid_targets_mean": 3120.0, + "valid_targets_min": 652 + }, + { + "epoch": 1.0583446404341927, + "grad_norm": 0.7474747697048539, + "learning_rate": 3.96841597409892e-05, + "loss": 0.3093, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30292749404907227, + "step": 780, + "valid_targets_mean": 2577.1, + "valid_targets_min": 844 + }, + { + "epoch": 1.0651289009497964, + "grad_norm": 0.7960660690648608, + "learning_rate": 3.9672069687474415e-05, + "loss": 0.2917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28914356231689453, + "step": 785, + "valid_targets_mean": 2216.7, + "valid_targets_min": 656 + }, + { + "epoch": 1.0719131614654003, + "grad_norm": 0.6763388576498762, + "learning_rate": 3.965975447355176e-05, + "loss": 0.3005, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30142050981521606, + "step": 790, + "valid_targets_mean": 2811.3, + "valid_targets_min": 1151 + }, + { + "epoch": 1.0786974219810042, + "grad_norm": 0.8330584303910075, + "learning_rate": 3.964721424017735e-05, + "loss": 0.3027, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.303778737783432, + "step": 795, + "valid_targets_mean": 2451.8, + "valid_targets_min": 521 + }, + { + "epoch": 1.0854816824966078, + "grad_norm": 0.7262140518757996, + "learning_rate": 3.963444913088281e-05, + "loss": 0.2993, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3419913649559021, + "step": 800, + "valid_targets_mean": 3062.5, + "valid_targets_min": 678 + }, + { + "epoch": 1.0922659430122117, + "grad_norm": 0.701846588477317, + "learning_rate": 3.9621459291773626e-05, + "loss": 0.2948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3039608597755432, + "step": 805, + "valid_targets_mean": 2721.3, + "valid_targets_min": 649 + }, + { + "epoch": 1.0990502035278156, + "grad_norm": 0.764825483304104, + "learning_rate": 3.960824487152746e-05, + "loss": 0.3129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3330846428871155, + "step": 810, + "valid_targets_mean": 2494.1, + "valid_targets_min": 704 + }, + { + "epoch": 1.1058344640434192, + "grad_norm": 0.8702738985222226, + "learning_rate": 3.959480602139247e-05, + "loss": 0.2953, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2918407917022705, + "step": 815, + "valid_targets_mean": 2434.2, + "valid_targets_min": 759 + }, + { + "epoch": 1.112618724559023, + "grad_norm": 0.6076347441974812, + "learning_rate": 3.958114289518557e-05, + "loss": 0.2949, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2711153030395508, + "step": 820, + "valid_targets_mean": 3189.2, + "valid_targets_min": 809 + }, + { + "epoch": 1.1194029850746268, + "grad_norm": 0.6404883109837152, + "learning_rate": 3.956725564929066e-05, + "loss": 0.2885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2949562072753906, + "step": 825, + "valid_targets_mean": 3202.6, + "valid_targets_min": 1170 + }, + { + "epoch": 1.1261872455902306, + "grad_norm": 0.6942618107302919, + "learning_rate": 3.955314444265685e-05, + "loss": 0.2952, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29515475034713745, + "step": 830, + "valid_targets_mean": 2622.8, + "valid_targets_min": 753 + }, + { + "epoch": 1.1329715061058345, + "grad_norm": 0.7378195219174956, + "learning_rate": 3.9538809436796635e-05, + "loss": 0.3012, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31367671489715576, + "step": 835, + "valid_targets_mean": 2731.7, + "valid_targets_min": 1299 + }, + { + "epoch": 1.1397557666214382, + "grad_norm": 0.7537202284076141, + "learning_rate": 3.952425079578405e-05, + "loss": 0.3048, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3013504147529602, + "step": 840, + "valid_targets_mean": 2165.1, + "valid_targets_min": 675 + }, + { + "epoch": 1.146540027137042, + "grad_norm": 0.7523754539948808, + "learning_rate": 3.9509468686252776e-05, + "loss": 0.3017, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2941988706588745, + "step": 845, + "valid_targets_mean": 2384.8, + "valid_targets_min": 835 + }, + { + "epoch": 1.153324287652646, + "grad_norm": 0.5769173901162691, + "learning_rate": 3.9494463277394256e-05, + "loss": 0.2817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2812822461128235, + "step": 850, + "valid_targets_mean": 3832.6, + "valid_targets_min": 944 + }, + { + "epoch": 1.1601085481682496, + "grad_norm": 0.6902875767301633, + "learning_rate": 3.947923474095575e-05, + "loss": 0.2994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2795878052711487, + "step": 855, + "valid_targets_mean": 2588.9, + "valid_targets_min": 937 + }, + { + "epoch": 1.1668928086838535, + "grad_norm": 0.6932765985873842, + "learning_rate": 3.946378325123836e-05, + "loss": 0.3018, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29586702585220337, + "step": 860, + "valid_targets_mean": 2557.6, + "valid_targets_min": 1059 + }, + { + "epoch": 1.1736770691994574, + "grad_norm": 0.8142184847264059, + "learning_rate": 3.9448108985095036e-05, + "loss": 0.3229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3577713966369629, + "step": 865, + "valid_targets_mean": 2609.6, + "valid_targets_min": 1031 + }, + { + "epoch": 1.180461329715061, + "grad_norm": 0.6535181326922942, + "learning_rate": 3.943221212192859e-05, + "loss": 0.3037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3148359954357147, + "step": 870, + "valid_targets_mean": 3229.5, + "valid_targets_min": 1220 + }, + { + "epoch": 1.187245590230665, + "grad_norm": 0.6505841244224432, + "learning_rate": 3.941609284368956e-05, + "loss": 0.3067, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2802058458328247, + "step": 875, + "valid_targets_mean": 3026.5, + "valid_targets_min": 858 + }, + { + "epoch": 1.1940298507462686, + "grad_norm": 0.6960891267075039, + "learning_rate": 3.939975133487422e-05, + "loss": 0.2933, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3029175400733948, + "step": 880, + "valid_targets_mean": 2756.3, + "valid_targets_min": 611 + }, + { + "epoch": 1.2008141112618724, + "grad_norm": 0.6800823657754136, + "learning_rate": 3.938318778252241e-05, + "loss": 0.3207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29470711946487427, + "step": 885, + "valid_targets_mean": 3122.9, + "valid_targets_min": 1028 + }, + { + "epoch": 1.2075983717774763, + "grad_norm": 0.7814295994587976, + "learning_rate": 3.9366402376215405e-05, + "loss": 0.3077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31138426065444946, + "step": 890, + "valid_targets_mean": 2320.1, + "valid_targets_min": 714 + }, + { + "epoch": 1.21438263229308, + "grad_norm": 0.7335457886775086, + "learning_rate": 3.934939530807376e-05, + "loss": 0.308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2950824797153473, + "step": 895, + "valid_targets_mean": 2272.6, + "valid_targets_min": 623 + }, + { + "epoch": 1.2211668928086838, + "grad_norm": 0.7406055989848646, + "learning_rate": 3.933216677275512e-05, + "loss": 0.2979, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30795565247535706, + "step": 900, + "valid_targets_mean": 2708.2, + "valid_targets_min": 618 + }, + { + "epoch": 1.2279511533242877, + "grad_norm": 0.7271310816643761, + "learning_rate": 3.931471696745194e-05, + "loss": 0.2949, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3034670650959015, + "step": 905, + "valid_targets_mean": 2719.8, + "valid_targets_min": 1144 + }, + { + "epoch": 1.2347354138398914, + "grad_norm": 0.6098483261250427, + "learning_rate": 3.929704609188927e-05, + "loss": 0.2926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30650919675827026, + "step": 910, + "valid_targets_mean": 3541.2, + "valid_targets_min": 837 + }, + { + "epoch": 1.2415196743554953, + "grad_norm": 0.6695184378675495, + "learning_rate": 3.9279154348322495e-05, + "loss": 0.2949, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29480671882629395, + "step": 915, + "valid_targets_mean": 2631.8, + "valid_targets_min": 627 + }, + { + "epoch": 1.2483039348710991, + "grad_norm": 0.6364092125880288, + "learning_rate": 3.926104194153495e-05, + "loss": 0.2941, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2782565951347351, + "step": 920, + "valid_targets_mean": 2974.1, + "valid_targets_min": 881 + }, + { + "epoch": 1.2550881953867028, + "grad_norm": 0.8301944651771248, + "learning_rate": 3.924270907883562e-05, + "loss": 0.2939, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2791467607021332, + "step": 925, + "valid_targets_mean": 2883.7, + "valid_targets_min": 703 + }, + { + "epoch": 1.2618724559023067, + "grad_norm": 0.8022627958583087, + "learning_rate": 3.922415597005677e-05, + "loss": 0.2958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.281793475151062, + "step": 930, + "valid_targets_mean": 1910.9, + "valid_targets_min": 643 + }, + { + "epoch": 1.2686567164179103, + "grad_norm": 0.6363883403354091, + "learning_rate": 3.920538282755153e-05, + "loss": 0.2933, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2905217707157135, + "step": 935, + "valid_targets_mean": 3314.0, + "valid_targets_min": 1133 + }, + { + "epoch": 1.2754409769335142, + "grad_norm": 0.637738645846229, + "learning_rate": 3.918638986619146e-05, + "loss": 0.2977, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2899988889694214, + "step": 940, + "valid_targets_mean": 3262.2, + "valid_targets_min": 807 + }, + { + "epoch": 1.282225237449118, + "grad_norm": 0.6487116870961588, + "learning_rate": 3.9167177303364115e-05, + "loss": 0.3026, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3026847243309021, + "step": 945, + "valid_targets_mean": 3640.3, + "valid_targets_min": 1299 + }, + { + "epoch": 1.289009497964722, + "grad_norm": 0.7490100317858217, + "learning_rate": 3.9147745358970524e-05, + "loss": 0.3071, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28088971972465515, + "step": 950, + "valid_targets_mean": 2377.6, + "valid_targets_min": 909 + }, + { + "epoch": 1.2957937584803256, + "grad_norm": 0.7137946081945997, + "learning_rate": 3.9128094255422696e-05, + "loss": 0.2934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2796930968761444, + "step": 955, + "valid_targets_mean": 2555.9, + "valid_targets_min": 631 + }, + { + "epoch": 1.3025780189959295, + "grad_norm": 0.5993098074500324, + "learning_rate": 3.910822421764106e-05, + "loss": 0.2958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2874249219894409, + "step": 960, + "valid_targets_mean": 3206.5, + "valid_targets_min": 1560 + }, + { + "epoch": 1.3093622795115332, + "grad_norm": 0.9231815200437856, + "learning_rate": 3.9088135473051914e-05, + "loss": 0.3072, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33113178610801697, + "step": 965, + "valid_targets_mean": 2220.5, + "valid_targets_min": 484 + }, + { + "epoch": 1.316146540027137, + "grad_norm": 0.6697716973175908, + "learning_rate": 3.906782825158479e-05, + "loss": 0.2955, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.285530149936676, + "step": 970, + "valid_targets_mean": 2935.3, + "valid_targets_min": 1225 + }, + { + "epoch": 1.322930800542741, + "grad_norm": 0.7281815877744984, + "learning_rate": 3.904730278566986e-05, + "loss": 0.3115, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3005235493183136, + "step": 975, + "valid_targets_mean": 2603.3, + "valid_targets_min": 768 + }, + { + "epoch": 1.3297150610583446, + "grad_norm": 0.748646343253343, + "learning_rate": 3.9026559310235234e-05, + "loss": 0.29, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2904489040374756, + "step": 980, + "valid_targets_mean": 2551.4, + "valid_targets_min": 1193 + }, + { + "epoch": 1.3364993215739485, + "grad_norm": 0.6174464137752114, + "learning_rate": 3.900559806270429e-05, + "loss": 0.2787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24410629272460938, + "step": 985, + "valid_targets_mean": 2941.9, + "valid_targets_min": 692 + }, + { + "epoch": 1.3432835820895521, + "grad_norm": 0.7452637300432898, + "learning_rate": 3.898441928299297e-05, + "loss": 0.3051, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30062615871429443, + "step": 990, + "valid_targets_mean": 2270.9, + "valid_targets_min": 937 + }, + { + "epoch": 1.350067842605156, + "grad_norm": 0.6464889713515524, + "learning_rate": 3.896302321350702e-05, + "loss": 0.313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2945151925086975, + "step": 995, + "valid_targets_mean": 3044.5, + "valid_targets_min": 1167 + }, + { + "epoch": 1.3568521031207599, + "grad_norm": 0.7017509437824359, + "learning_rate": 3.894141009913921e-05, + "loss": 0.3039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29814016819000244, + "step": 1000, + "valid_targets_mean": 2468.2, + "valid_targets_min": 530 + }, + { + "epoch": 1.3636363636363638, + "grad_norm": 0.6780559700998617, + "learning_rate": 3.891958018726655e-05, + "loss": 0.2888, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28629612922668457, + "step": 1005, + "valid_targets_mean": 2853.8, + "valid_targets_min": 807 + }, + { + "epoch": 1.3704206241519674, + "grad_norm": 0.8191037771052176, + "learning_rate": 3.8897533727747434e-05, + "loss": 0.2924, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3040386438369751, + "step": 1010, + "valid_targets_mean": 2648.5, + "valid_targets_min": 740 + }, + { + "epoch": 1.3772048846675713, + "grad_norm": 0.668502617185383, + "learning_rate": 3.88752709729188e-05, + "loss": 0.2884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31058913469314575, + "step": 1015, + "valid_targets_mean": 3066.1, + "valid_targets_min": 1388 + }, + { + "epoch": 1.383989145183175, + "grad_norm": 0.7094563085559425, + "learning_rate": 3.885279217759324e-05, + "loss": 0.293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27286526560783386, + "step": 1020, + "valid_targets_mean": 2670.2, + "valid_targets_min": 698 + }, + { + "epoch": 1.3907734056987788, + "grad_norm": 0.6912293288922038, + "learning_rate": 3.883009759905607e-05, + "loss": 0.2901, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29000505805015564, + "step": 1025, + "valid_targets_mean": 3009.6, + "valid_targets_min": 635 + }, + { + "epoch": 1.3975576662143827, + "grad_norm": 0.598939732528779, + "learning_rate": 3.8807187497062394e-05, + "loss": 0.3045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27007484436035156, + "step": 1030, + "valid_targets_mean": 3264.9, + "valid_targets_min": 637 + }, + { + "epoch": 1.4043419267299864, + "grad_norm": 0.6399031295655998, + "learning_rate": 3.8784062133834136e-05, + "loss": 0.3077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30463242530822754, + "step": 1035, + "valid_targets_mean": 2838.9, + "valid_targets_min": 957 + }, + { + "epoch": 1.4111261872455902, + "grad_norm": 0.6746984691711082, + "learning_rate": 3.876072177405703e-05, + "loss": 0.2943, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3121001720428467, + "step": 1040, + "valid_targets_mean": 2916.5, + "valid_targets_min": 956 + }, + { + "epoch": 1.417910447761194, + "grad_norm": 0.7284630600724608, + "learning_rate": 3.8737166684877596e-05, + "loss": 0.2738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2644261121749878, + "step": 1045, + "valid_targets_mean": 2068.2, + "valid_targets_min": 671 + }, + { + "epoch": 1.4246947082767978, + "grad_norm": 0.6562294087271845, + "learning_rate": 3.871339713590007e-05, + "loss": 0.2954, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2642728388309479, + "step": 1050, + "valid_targets_mean": 2884.8, + "valid_targets_min": 920 + }, + { + "epoch": 1.4314789687924017, + "grad_norm": 0.6478250945987496, + "learning_rate": 3.868941339918335e-05, + "loss": 0.2929, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32172685861587524, + "step": 1055, + "valid_targets_mean": 3082.5, + "valid_targets_min": 1197 + }, + { + "epoch": 1.4382632293080055, + "grad_norm": 0.644202603849787, + "learning_rate": 3.866521574923784e-05, + "loss": 0.3059, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31228554248809814, + "step": 1060, + "valid_targets_mean": 3175.8, + "valid_targets_min": 1451 + }, + { + "epoch": 1.4450474898236092, + "grad_norm": 0.661879013668605, + "learning_rate": 3.864080446302234e-05, + "loss": 0.3022, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3252270519733429, + "step": 1065, + "valid_targets_mean": 3194.7, + "valid_targets_min": 711 + }, + { + "epoch": 1.451831750339213, + "grad_norm": 0.6549935813841651, + "learning_rate": 3.8616179819940857e-05, + "loss": 0.3017, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28648945689201355, + "step": 1070, + "valid_targets_mean": 2721.0, + "valid_targets_min": 1331 + }, + { + "epoch": 1.4586160108548167, + "grad_norm": 0.6531723880805292, + "learning_rate": 3.859134210183941e-05, + "loss": 0.3165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31207776069641113, + "step": 1075, + "valid_targets_mean": 2777.7, + "valid_targets_min": 676 + }, + { + "epoch": 1.4654002713704206, + "grad_norm": 0.6716764880355328, + "learning_rate": 3.856629159300283e-05, + "loss": 0.3037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32089391350746155, + "step": 1080, + "valid_targets_mean": 3052.2, + "valid_targets_min": 690 + }, + { + "epoch": 1.4721845318860245, + "grad_norm": 0.6655307018039186, + "learning_rate": 3.8541028580151465e-05, + "loss": 0.2882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26921534538269043, + "step": 1085, + "valid_targets_mean": 2680.0, + "valid_targets_min": 889 + }, + { + "epoch": 1.4789687924016282, + "grad_norm": 0.7389639492671433, + "learning_rate": 3.8515553352437927e-05, + "loss": 0.2983, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28306636214256287, + "step": 1090, + "valid_targets_mean": 2348.4, + "valid_targets_min": 737 + }, + { + "epoch": 1.485753052917232, + "grad_norm": 0.6679028777069137, + "learning_rate": 3.848986620144376e-05, + "loss": 0.2958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26913198828697205, + "step": 1095, + "valid_targets_mean": 2657.7, + "valid_targets_min": 1243 + }, + { + "epoch": 1.4925373134328357, + "grad_norm": 0.6297786728759884, + "learning_rate": 3.846396742117614e-05, + "loss": 0.2942, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31999313831329346, + "step": 1100, + "valid_targets_mean": 3368.2, + "valid_targets_min": 1524 + }, + { + "epoch": 1.4993215739484396, + "grad_norm": 0.6149463581344302, + "learning_rate": 3.843785730806447e-05, + "loss": 0.2946, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29018402099609375, + "step": 1105, + "valid_targets_mean": 3319.4, + "valid_targets_min": 1354 + }, + { + "epoch": 1.5061058344640434, + "grad_norm": 0.809702645817836, + "learning_rate": 3.8411536160957004e-05, + "loss": 0.2942, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29155030846595764, + "step": 1110, + "valid_targets_mean": 2079.1, + "valid_targets_min": 939 + }, + { + "epoch": 1.5128900949796473, + "grad_norm": 0.6375577416325251, + "learning_rate": 3.8385004281117425e-05, + "loss": 0.2877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26376283168792725, + "step": 1115, + "valid_targets_mean": 3094.1, + "valid_targets_min": 951 + }, + { + "epoch": 1.519674355495251, + "grad_norm": 0.7117061338880089, + "learning_rate": 3.83582619722214e-05, + "loss": 0.2951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32851043343544006, + "step": 1120, + "valid_targets_mean": 2709.3, + "valid_targets_min": 1321 + }, + { + "epoch": 1.5264586160108549, + "grad_norm": 0.7195810818077915, + "learning_rate": 3.833130954035311e-05, + "loss": 0.2864, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29876279830932617, + "step": 1125, + "valid_targets_mean": 2322.1, + "valid_targets_min": 728 + }, + { + "epoch": 1.5332428765264585, + "grad_norm": 0.6893087648021691, + "learning_rate": 3.830414729400172e-05, + "loss": 0.2955, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32284027338027954, + "step": 1130, + "valid_targets_mean": 2666.9, + "valid_targets_min": 882 + }, + { + "epoch": 1.5400271370420624, + "grad_norm": 0.6375253452820985, + "learning_rate": 3.8276775544057886e-05, + "loss": 0.2918, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2919315695762634, + "step": 1135, + "valid_targets_mean": 3260.2, + "valid_targets_min": 754 + }, + { + "epoch": 1.5468113975576663, + "grad_norm": 0.7850898957886953, + "learning_rate": 3.824919460381016e-05, + "loss": 0.3168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2958727180957794, + "step": 1140, + "valid_targets_mean": 3326.9, + "valid_targets_min": 1021 + }, + { + "epoch": 1.5535956580732702, + "grad_norm": 0.6951131898750251, + "learning_rate": 3.822140478894143e-05, + "loss": 0.2965, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3108687102794647, + "step": 1145, + "valid_targets_mean": 2787.1, + "valid_targets_min": 842 + }, + { + "epoch": 1.5603799185888738, + "grad_norm": 0.733903133533046, + "learning_rate": 3.8193406417525304e-05, + "loss": 0.2998, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3235079050064087, + "step": 1150, + "valid_targets_mean": 2633.2, + "valid_targets_min": 1181 + }, + { + "epoch": 1.5671641791044775, + "grad_norm": 0.731200133367719, + "learning_rate": 3.816519981002245e-05, + "loss": 0.3037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2996997535228729, + "step": 1155, + "valid_targets_mean": 2690.8, + "valid_targets_min": 1251 + }, + { + "epoch": 1.5739484396200814, + "grad_norm": 0.6385315926667753, + "learning_rate": 3.8136785289276953e-05, + "loss": 0.2806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27406877279281616, + "step": 1160, + "valid_targets_mean": 3035.6, + "valid_targets_min": 1429 + }, + { + "epoch": 1.5807327001356852, + "grad_norm": 0.7362793886550267, + "learning_rate": 3.81081631805126e-05, + "loss": 0.2901, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34110113978385925, + "step": 1165, + "valid_targets_mean": 2940.3, + "valid_targets_min": 805 + }, + { + "epoch": 1.587516960651289, + "grad_norm": 0.7349732590117125, + "learning_rate": 3.807933381132917e-05, + "loss": 0.3104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2936994135379791, + "step": 1170, + "valid_targets_mean": 2527.9, + "valid_targets_min": 746 + }, + { + "epoch": 1.5943012211668928, + "grad_norm": 0.7064877699762256, + "learning_rate": 3.805029751169869e-05, + "loss": 0.2972, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29555004835128784, + "step": 1175, + "valid_targets_mean": 2654.1, + "valid_targets_min": 650 + }, + { + "epoch": 1.6010854816824966, + "grad_norm": 0.6447256804044132, + "learning_rate": 3.802105461396163e-05, + "loss": 0.2908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31151026487350464, + "step": 1180, + "valid_targets_mean": 2946.4, + "valid_targets_min": 738 + }, + { + "epoch": 1.6078697421981003, + "grad_norm": 0.5108082252163869, + "learning_rate": 3.799160545282312e-05, + "loss": 0.279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2245091050863266, + "step": 1185, + "valid_targets_mean": 3714.3, + "valid_targets_min": 846 + }, + { + "epoch": 1.6146540027137042, + "grad_norm": 0.6878930154133356, + "learning_rate": 3.796195036534914e-05, + "loss": 0.2658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2854118049144745, + "step": 1190, + "valid_targets_mean": 2541.6, + "valid_targets_min": 772 + }, + { + "epoch": 1.621438263229308, + "grad_norm": 0.8006673353310524, + "learning_rate": 3.793208969096261e-05, + "loss": 0.2921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28456225991249084, + "step": 1195, + "valid_targets_mean": 2468.4, + "valid_targets_min": 700 + }, + { + "epoch": 1.628222523744912, + "grad_norm": 0.7653832467394758, + "learning_rate": 3.790202377143956e-05, + "loss": 0.2975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31006136536598206, + "step": 1200, + "valid_targets_mean": 2955.1, + "valid_targets_min": 1195 + }, + { + "epoch": 1.6350067842605156, + "grad_norm": 0.7315864774837488, + "learning_rate": 3.7871752950905174e-05, + "loss": 0.304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3193378448486328, + "step": 1205, + "valid_targets_mean": 2490.4, + "valid_targets_min": 612 + }, + { + "epoch": 1.6417910447761193, + "grad_norm": 0.7643610632774577, + "learning_rate": 3.784127757582987e-05, + "loss": 0.302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3098391592502594, + "step": 1210, + "valid_targets_mean": 2237.9, + "valid_targets_min": 662 + }, + { + "epoch": 1.6485753052917231, + "grad_norm": 0.6008604291150627, + "learning_rate": 3.7810597995025336e-05, + "loss": 0.2835, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2691340446472168, + "step": 1215, + "valid_targets_mean": 2998.4, + "valid_targets_min": 685 + }, + { + "epoch": 1.655359565807327, + "grad_norm": 0.6364039836954427, + "learning_rate": 3.7779714559640555e-05, + "loss": 0.2894, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29183879494667053, + "step": 1220, + "valid_targets_mean": 3299.7, + "valid_targets_min": 768 + }, + { + "epoch": 1.662143826322931, + "grad_norm": 0.8360956147817608, + "learning_rate": 3.774862762315772e-05, + "loss": 0.2918, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30887937545776367, + "step": 1225, + "valid_targets_mean": 2721.2, + "valid_targets_min": 467 + }, + { + "epoch": 1.6689280868385346, + "grad_norm": 0.6028843978675598, + "learning_rate": 3.771733754138829e-05, + "loss": 0.2873, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2811635732650757, + "step": 1230, + "valid_targets_mean": 3361.2, + "valid_targets_min": 839 + }, + { + "epoch": 1.6757123473541384, + "grad_norm": 0.7428753179701643, + "learning_rate": 3.768584467246883e-05, + "loss": 0.3046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30133670568466187, + "step": 1235, + "valid_targets_mean": 3111.6, + "valid_targets_min": 683 + }, + { + "epoch": 1.682496607869742, + "grad_norm": 0.7569776252384394, + "learning_rate": 3.765414937685695e-05, + "loss": 0.2999, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3009093999862671, + "step": 1240, + "valid_targets_mean": 2188.5, + "valid_targets_min": 697 + }, + { + "epoch": 1.689280868385346, + "grad_norm": 0.5814940877820675, + "learning_rate": 3.762225201732715e-05, + "loss": 0.2836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26204395294189453, + "step": 1245, + "valid_targets_mean": 3493.9, + "valid_targets_min": 800 + }, + { + "epoch": 1.6960651289009498, + "grad_norm": 0.726614967916978, + "learning_rate": 3.7590152958966745e-05, + "loss": 0.2898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.321370929479599, + "step": 1250, + "valid_targets_mean": 2850.1, + "valid_targets_min": 715 + }, + { + "epoch": 1.7028493894165537, + "grad_norm": 0.7426123839264059, + "learning_rate": 3.7557852569171585e-05, + "loss": 0.2917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2817172110080719, + "step": 1255, + "valid_targets_mean": 2224.9, + "valid_targets_min": 754 + }, + { + "epoch": 1.7096336499321574, + "grad_norm": 0.8799943306194825, + "learning_rate": 3.7525351217641915e-05, + "loss": 0.3085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3357008993625641, + "step": 1260, + "valid_targets_mean": 3111.0, + "valid_targets_min": 578 + }, + { + "epoch": 1.716417910447761, + "grad_norm": 0.6111490118620997, + "learning_rate": 3.7492649276378124e-05, + "loss": 0.2828, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2888554632663727, + "step": 1265, + "valid_targets_mean": 3284.9, + "valid_targets_min": 1267 + }, + { + "epoch": 1.723202170963365, + "grad_norm": 0.6589094616205049, + "learning_rate": 3.7459747119676484e-05, + "loss": 0.305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30003267526626587, + "step": 1270, + "valid_targets_mean": 2895.2, + "valid_targets_min": 1178 + }, + { + "epoch": 1.7299864314789688, + "grad_norm": 0.640067102221961, + "learning_rate": 3.742664512412488e-05, + "loss": 0.3087, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31731560826301575, + "step": 1275, + "valid_targets_mean": 3285.7, + "valid_targets_min": 1465 + }, + { + "epoch": 1.7367706919945727, + "grad_norm": 0.6939365904317435, + "learning_rate": 3.7393343668598495e-05, + "loss": 0.2927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2893563210964203, + "step": 1280, + "valid_targets_mean": 2671.6, + "valid_targets_min": 1204 + }, + { + "epoch": 1.7435549525101763, + "grad_norm": 0.6861699873490201, + "learning_rate": 3.735984313425544e-05, + "loss": 0.2781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3145902156829834, + "step": 1285, + "valid_targets_mean": 2858.2, + "valid_targets_min": 1055 + }, + { + "epoch": 1.7503392130257802, + "grad_norm": 0.5893287065962871, + "learning_rate": 3.732614390453245e-05, + "loss": 0.2994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2815445065498352, + "step": 1290, + "valid_targets_mean": 3251.5, + "valid_targets_min": 760 + }, + { + "epoch": 1.7571234735413839, + "grad_norm": 0.6605129026405406, + "learning_rate": 3.729224636514046e-05, + "loss": 0.2863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28478604555130005, + "step": 1295, + "valid_targets_mean": 2549.3, + "valid_targets_min": 961 + }, + { + "epoch": 1.7639077340569878, + "grad_norm": 0.7080612755113385, + "learning_rate": 3.7258150904060184e-05, + "loss": 0.2862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30780327320098877, + "step": 1300, + "valid_targets_mean": 2727.4, + "valid_targets_min": 1274 + }, + { + "epoch": 1.7706919945725916, + "grad_norm": 0.6157145299604274, + "learning_rate": 3.72238579115377e-05, + "loss": 0.2958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31349170207977295, + "step": 1305, + "valid_targets_mean": 3526.4, + "valid_targets_min": 911 + }, + { + "epoch": 1.7774762550881955, + "grad_norm": 0.6628037819653686, + "learning_rate": 3.7189367780079956e-05, + "loss": 0.3035, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30656638741493225, + "step": 1310, + "valid_targets_mean": 2991.2, + "valid_targets_min": 815 + }, + { + "epoch": 1.7842605156037992, + "grad_norm": 0.7303577186056781, + "learning_rate": 3.7154680904450306e-05, + "loss": 0.2947, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25550299882888794, + "step": 1315, + "valid_targets_mean": 2137.6, + "valid_targets_min": 780 + }, + { + "epoch": 1.7910447761194028, + "grad_norm": 0.7432172204712651, + "learning_rate": 3.711979768166397e-05, + "loss": 0.2878, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2851966619491577, + "step": 1320, + "valid_targets_mean": 2175.0, + "valid_targets_min": 782 + }, + { + "epoch": 1.7978290366350067, + "grad_norm": 0.6101418921075195, + "learning_rate": 3.708471851098349e-05, + "loss": 0.2989, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29730525612831116, + "step": 1325, + "valid_targets_mean": 3097.9, + "valid_targets_min": 819 + }, + { + "epoch": 1.8046132971506106, + "grad_norm": 0.6995136361312011, + "learning_rate": 3.704944379391418e-05, + "loss": 0.2947, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28219103813171387, + "step": 1330, + "valid_targets_mean": 2626.4, + "valid_targets_min": 963 + }, + { + "epoch": 1.8113975576662145, + "grad_norm": 0.7747003781816696, + "learning_rate": 3.70139739341995e-05, + "loss": 0.304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2927844226360321, + "step": 1335, + "valid_targets_mean": 2138.4, + "valid_targets_min": 1030 + }, + { + "epoch": 1.8181818181818183, + "grad_norm": 0.6296472975547316, + "learning_rate": 3.6978309337816475e-05, + "loss": 0.3003, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2978728413581848, + "step": 1340, + "valid_targets_mean": 2901.1, + "valid_targets_min": 1269 + }, + { + "epoch": 1.824966078697422, + "grad_norm": 0.6529444503372535, + "learning_rate": 3.6942450412971006e-05, + "loss": 0.2869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2776811420917511, + "step": 1345, + "valid_targets_mean": 2990.2, + "valid_targets_min": 1290 + }, + { + "epoch": 1.8317503392130257, + "grad_norm": 0.6073872705332342, + "learning_rate": 3.690639757009322e-05, + "loss": 0.2851, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2847082316875458, + "step": 1350, + "valid_targets_mean": 3336.8, + "valid_targets_min": 1011 + }, + { + "epoch": 1.8385345997286295, + "grad_norm": 0.6726342078161094, + "learning_rate": 3.687015122183278e-05, + "loss": 0.2985, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31284165382385254, + "step": 1355, + "valid_targets_mean": 3056.4, + "valid_targets_min": 878 + }, + { + "epoch": 1.8453188602442334, + "grad_norm": 0.6570381591584823, + "learning_rate": 3.6833711783054146e-05, + "loss": 0.2845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27733927965164185, + "step": 1360, + "valid_targets_mean": 2751.9, + "valid_targets_min": 627 + }, + { + "epoch": 1.8521031207598373, + "grad_norm": 0.7873750592257918, + "learning_rate": 3.6797079670831835e-05, + "loss": 0.307, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2843400537967682, + "step": 1365, + "valid_targets_mean": 2333.1, + "valid_targets_min": 1105 + }, + { + "epoch": 1.858887381275441, + "grad_norm": 0.6324090204988919, + "learning_rate": 3.676025530444563e-05, + "loss": 0.2728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2516278326511383, + "step": 1370, + "valid_targets_mean": 2830.2, + "valid_targets_min": 739 + }, + { + "epoch": 1.8656716417910446, + "grad_norm": 0.7359610094620258, + "learning_rate": 3.672323910537581e-05, + "loss": 0.2843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31318145990371704, + "step": 1375, + "valid_targets_mean": 2449.1, + "valid_targets_min": 627 + }, + { + "epoch": 1.8724559023066485, + "grad_norm": 0.6497309545854958, + "learning_rate": 3.668603149729832e-05, + "loss": 0.2844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26779574155807495, + "step": 1380, + "valid_targets_mean": 2840.0, + "valid_targets_min": 1094 + }, + { + "epoch": 1.8792401628222524, + "grad_norm": 0.6267804873051095, + "learning_rate": 3.664863290607989e-05, + "loss": 0.3077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28004786372184753, + "step": 1385, + "valid_targets_mean": 2897.0, + "valid_targets_min": 929 + }, + { + "epoch": 1.8860244233378562, + "grad_norm": 0.6879680447864663, + "learning_rate": 3.6611043759773195e-05, + "loss": 0.2888, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31352704763412476, + "step": 1390, + "valid_targets_mean": 2756.0, + "valid_targets_min": 928 + }, + { + "epoch": 1.8928086838534601, + "grad_norm": 0.6301096600514708, + "learning_rate": 3.6573264488611944e-05, + "loss": 0.2915, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3064451217651367, + "step": 1395, + "valid_targets_mean": 3032.6, + "valid_targets_min": 791 + }, + { + "epoch": 1.8995929443690638, + "grad_norm": 0.6958211841970261, + "learning_rate": 3.6535295525005964e-05, + "loss": 0.2965, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2943972647190094, + "step": 1400, + "valid_targets_mean": 2836.4, + "valid_targets_min": 648 + }, + { + "epoch": 1.9063772048846674, + "grad_norm": 0.7887387389485876, + "learning_rate": 3.649713730353623e-05, + "loss": 0.2989, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28154975175857544, + "step": 1405, + "valid_targets_mean": 2262.3, + "valid_targets_min": 916 + }, + { + "epoch": 1.9131614654002713, + "grad_norm": 1.5313410215337464, + "learning_rate": 3.645879026094991e-05, + "loss": 0.3075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29764658212661743, + "step": 1410, + "valid_targets_mean": 2811.7, + "valid_targets_min": 821 + }, + { + "epoch": 1.9199457259158752, + "grad_norm": 0.7151356767653663, + "learning_rate": 3.642025483615536e-05, + "loss": 0.3223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3090359568595886, + "step": 1415, + "valid_targets_mean": 2566.2, + "valid_targets_min": 659 + }, + { + "epoch": 1.926729986431479, + "grad_norm": 0.7117696747551553, + "learning_rate": 3.6381531470217095e-05, + "loss": 0.2912, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32978102564811707, + "step": 1420, + "valid_targets_mean": 2933.4, + "valid_targets_min": 1166 + }, + { + "epoch": 1.9335142469470827, + "grad_norm": 0.7645454065347908, + "learning_rate": 3.634262060635075e-05, + "loss": 0.2951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31343501806259155, + "step": 1425, + "valid_targets_mean": 2486.8, + "valid_targets_min": 706 + }, + { + "epoch": 1.9402985074626866, + "grad_norm": 0.6774335342238738, + "learning_rate": 3.6303522689918015e-05, + "loss": 0.2876, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2950223684310913, + "step": 1430, + "valid_targets_mean": 2928.6, + "valid_targets_min": 713 + }, + { + "epoch": 1.9470827679782903, + "grad_norm": 0.6640212258014019, + "learning_rate": 3.6264238168421505e-05, + "loss": 0.2872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2745462656021118, + "step": 1435, + "valid_targets_mean": 2628.1, + "valid_targets_min": 1206 + }, + { + "epoch": 1.9538670284938942, + "grad_norm": 0.6832788009297351, + "learning_rate": 3.622476749149967e-05, + "loss": 0.2829, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2921183705329895, + "step": 1440, + "valid_targets_mean": 2838.4, + "valid_targets_min": 617 + }, + { + "epoch": 1.960651289009498, + "grad_norm": 1.02465763567763, + "learning_rate": 3.618511111092164e-05, + "loss": 0.2885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30975252389907837, + "step": 1445, + "valid_targets_mean": 2425.6, + "valid_targets_min": 1024 + }, + { + "epoch": 1.967435549525102, + "grad_norm": 0.853915586974603, + "learning_rate": 3.6145269480582035e-05, + "loss": 0.309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3293916583061218, + "step": 1450, + "valid_targets_mean": 3230.9, + "valid_targets_min": 1358 + }, + { + "epoch": 1.9742198100407056, + "grad_norm": 0.7500252855517454, + "learning_rate": 3.61052430564958e-05, + "loss": 0.2786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.285286545753479, + "step": 1455, + "valid_targets_mean": 1947.4, + "valid_targets_min": 848 + }, + { + "epoch": 1.9810040705563092, + "grad_norm": 0.6777810077646718, + "learning_rate": 3.606503229679299e-05, + "loss": 0.302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30354389548301697, + "step": 1460, + "valid_targets_mean": 2624.3, + "valid_targets_min": 1035 + }, + { + "epoch": 1.987788331071913, + "grad_norm": 0.7750533185261688, + "learning_rate": 3.6024637661713463e-05, + "loss": 0.2893, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2793115973472595, + "step": 1465, + "valid_targets_mean": 2612.2, + "valid_targets_min": 941 + }, + { + "epoch": 1.994572591587517, + "grad_norm": 0.7919665782855059, + "learning_rate": 3.59840596136017e-05, + "loss": 0.2908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27840328216552734, + "step": 1470, + "valid_targets_mean": 2205.1, + "valid_targets_min": 992 + }, + { + "epoch": 2.001356852103121, + "grad_norm": 0.6677080223559784, + "learning_rate": 3.5943298616901434e-05, + "loss": 0.2717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.252011239528656, + "step": 1475, + "valid_targets_mean": 3309.2, + "valid_targets_min": 1337 + }, + { + "epoch": 2.0081411126187247, + "grad_norm": 0.7207007939374871, + "learning_rate": 3.590235513815041e-05, + "loss": 0.2529, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2334105670452118, + "step": 1480, + "valid_targets_mean": 2218.1, + "valid_targets_min": 739 + }, + { + "epoch": 2.014925373134328, + "grad_norm": 0.7632243131847694, + "learning_rate": 3.586122964597499e-05, + "loss": 0.2697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2754613161087036, + "step": 1485, + "valid_targets_mean": 2588.3, + "valid_targets_min": 678 + }, + { + "epoch": 2.021709633649932, + "grad_norm": 0.7149262249272994, + "learning_rate": 3.581992261108478e-05, + "loss": 0.2478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22789128124713898, + "step": 1490, + "valid_targets_mean": 2391.2, + "valid_targets_min": 791 + }, + { + "epoch": 2.028493894165536, + "grad_norm": 0.7021382513288259, + "learning_rate": 3.577843450626729e-05, + "loss": 0.251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2544996738433838, + "step": 1495, + "valid_targets_mean": 3086.4, + "valid_targets_min": 1196 + }, + { + "epoch": 2.03527815468114, + "grad_norm": 0.6740781431872369, + "learning_rate": 3.573676580638252e-05, + "loss": 0.2586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2830080986022949, + "step": 1500, + "valid_targets_mean": 3231.4, + "valid_targets_min": 820 + }, + { + "epoch": 2.0420624151967437, + "grad_norm": 0.6625091780634826, + "learning_rate": 3.569491698835747e-05, + "loss": 0.2481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2644336223602295, + "step": 1505, + "valid_targets_mean": 3546.1, + "valid_targets_min": 790 + }, + { + "epoch": 2.0488466757123476, + "grad_norm": 0.772346491835651, + "learning_rate": 3.565288853118073e-05, + "loss": 0.2565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25193676352500916, + "step": 1510, + "valid_targets_mean": 2011.5, + "valid_targets_min": 692 + }, + { + "epoch": 2.055630936227951, + "grad_norm": 0.7015358520017895, + "learning_rate": 3.561068091589699e-05, + "loss": 0.2513, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2524373531341553, + "step": 1515, + "valid_targets_mean": 2833.7, + "valid_targets_min": 1076 + }, + { + "epoch": 2.062415196743555, + "grad_norm": 0.6819591135273353, + "learning_rate": 3.556829462560152e-05, + "loss": 0.2622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2732108533382416, + "step": 1520, + "valid_targets_mean": 3037.2, + "valid_targets_min": 544 + }, + { + "epoch": 2.0691994572591588, + "grad_norm": 0.7308638462885366, + "learning_rate": 3.552573014543465e-05, + "loss": 0.2549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24330492317676544, + "step": 1525, + "valid_targets_mean": 2520.2, + "valid_targets_min": 965 + }, + { + "epoch": 2.0759837177747626, + "grad_norm": 0.7835094284328342, + "learning_rate": 3.548298796257623e-05, + "loss": 0.2589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2812742590904236, + "step": 1530, + "valid_targets_mean": 2378.4, + "valid_targets_min": 512 + }, + { + "epoch": 2.0827679782903665, + "grad_norm": 0.6968412144913585, + "learning_rate": 3.5440068566240006e-05, + "loss": 0.2536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25412964820861816, + "step": 1535, + "valid_targets_mean": 2384.0, + "valid_targets_min": 972 + }, + { + "epoch": 2.08955223880597, + "grad_norm": 0.7616955455624203, + "learning_rate": 3.539697244766809e-05, + "loss": 0.2738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29562467336654663, + "step": 1540, + "valid_targets_mean": 2267.1, + "valid_targets_min": 777 + }, + { + "epoch": 2.096336499321574, + "grad_norm": 0.7131875884565378, + "learning_rate": 3.5353700100125285e-05, + "loss": 0.256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2696763873100281, + "step": 1545, + "valid_targets_mean": 2582.3, + "valid_targets_min": 909 + }, + { + "epoch": 2.1031207598371777, + "grad_norm": 0.7768997711520144, + "learning_rate": 3.531025201889346e-05, + "loss": 0.2649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27121901512145996, + "step": 1550, + "valid_targets_mean": 2734.2, + "valid_targets_min": 684 + }, + { + "epoch": 2.1099050203527816, + "grad_norm": 0.5972671434746235, + "learning_rate": 3.526662870126588e-05, + "loss": 0.2467, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21913185715675354, + "step": 1555, + "valid_targets_mean": 3436.5, + "valid_targets_min": 880 + }, + { + "epoch": 2.1166892808683855, + "grad_norm": 0.8928905338020054, + "learning_rate": 3.5222830646541495e-05, + "loss": 0.2407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2423502504825592, + "step": 1560, + "valid_targets_mean": 3034.4, + "valid_targets_min": 1466 + }, + { + "epoch": 2.123473541383989, + "grad_norm": 0.7792302275809843, + "learning_rate": 3.517885835601927e-05, + "loss": 0.2544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23429229855537415, + "step": 1565, + "valid_targets_mean": 2170.3, + "valid_targets_min": 656 + }, + { + "epoch": 2.130257801899593, + "grad_norm": 0.6637166367797426, + "learning_rate": 3.5134712332992396e-05, + "loss": 0.2545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2654414474964142, + "step": 1570, + "valid_targets_mean": 3309.1, + "valid_targets_min": 783 + }, + { + "epoch": 2.1370420624151967, + "grad_norm": 0.7708919189354086, + "learning_rate": 3.5090393082742555e-05, + "loss": 0.2539, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26184701919555664, + "step": 1575, + "valid_targets_mean": 2298.2, + "valid_targets_min": 737 + }, + { + "epoch": 2.1438263229308006, + "grad_norm": 0.7215973806908841, + "learning_rate": 3.504590111253414e-05, + "loss": 0.2532, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27381983399391174, + "step": 1580, + "valid_targets_mean": 2703.3, + "valid_targets_min": 1289 + }, + { + "epoch": 2.1506105834464044, + "grad_norm": 0.8043249601230251, + "learning_rate": 3.5001236931608446e-05, + "loss": 0.2609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2702006995677948, + "step": 1585, + "valid_targets_mean": 2177.4, + "valid_targets_min": 793 + }, + { + "epoch": 2.1573948439620083, + "grad_norm": 0.6914069724407004, + "learning_rate": 3.495640105117784e-05, + "loss": 0.2629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27623242139816284, + "step": 1590, + "valid_targets_mean": 2991.8, + "valid_targets_min": 1354 + }, + { + "epoch": 2.1641791044776117, + "grad_norm": 0.8082050307344478, + "learning_rate": 3.491139398441989e-05, + "loss": 0.2543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2768654525279999, + "step": 1595, + "valid_targets_mean": 2209.0, + "valid_targets_min": 740 + }, + { + "epoch": 2.1709633649932156, + "grad_norm": 0.7574042859615202, + "learning_rate": 3.4866216246471536e-05, + "loss": 0.2571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28827136754989624, + "step": 1600, + "valid_targets_mean": 2482.6, + "valid_targets_min": 746 + }, + { + "epoch": 2.1777476255088195, + "grad_norm": 0.6561439016270234, + "learning_rate": 3.482086835442313e-05, + "loss": 0.2579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2732161581516266, + "step": 1605, + "valid_targets_mean": 3250.7, + "valid_targets_min": 819 + }, + { + "epoch": 2.1845318860244234, + "grad_norm": 0.6444399840162086, + "learning_rate": 3.47753508273126e-05, + "loss": 0.2402, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27326977252960205, + "step": 1610, + "valid_targets_mean": 2993.8, + "valid_targets_min": 997 + }, + { + "epoch": 2.1913161465400273, + "grad_norm": 0.6624015010390857, + "learning_rate": 3.472966418611944e-05, + "loss": 0.2552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2766575217247009, + "step": 1615, + "valid_targets_mean": 3185.8, + "valid_targets_min": 728 + }, + { + "epoch": 2.198100407055631, + "grad_norm": 0.6436526455011548, + "learning_rate": 3.468380895375876e-05, + "loss": 0.2685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2507815957069397, + "step": 1620, + "valid_targets_mean": 3060.2, + "valid_targets_min": 933 + }, + { + "epoch": 2.2048846675712346, + "grad_norm": 0.6752004219899516, + "learning_rate": 3.463778565507536e-05, + "loss": 0.2596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24225379526615143, + "step": 1625, + "valid_targets_mean": 2709.7, + "valid_targets_min": 758 + }, + { + "epoch": 2.2116689280868385, + "grad_norm": 0.6096818559003151, + "learning_rate": 3.45915948168376e-05, + "loss": 0.2615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24749873578548431, + "step": 1630, + "valid_targets_mean": 3076.5, + "valid_targets_min": 708 + }, + { + "epoch": 2.2184531886024423, + "grad_norm": 0.6382946984463229, + "learning_rate": 3.454523696773152e-05, + "loss": 0.2564, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28292831778526306, + "step": 1635, + "valid_targets_mean": 3444.9, + "valid_targets_min": 661 + }, + { + "epoch": 2.225237449118046, + "grad_norm": 0.7180530567460727, + "learning_rate": 3.4498712638354646e-05, + "loss": 0.2605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2660506069660187, + "step": 1640, + "valid_targets_mean": 2654.6, + "valid_targets_min": 1015 + }, + { + "epoch": 2.23202170963365, + "grad_norm": 0.6477313971324171, + "learning_rate": 3.445202236121006e-05, + "loss": 0.249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23461966216564178, + "step": 1645, + "valid_targets_mean": 2946.6, + "valid_targets_min": 876 + }, + { + "epoch": 2.2388059701492535, + "grad_norm": 0.758634223425707, + "learning_rate": 3.440516667070017e-05, + "loss": 0.2496, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2572581470012665, + "step": 1650, + "valid_targets_mean": 2419.9, + "valid_targets_min": 799 + }, + { + "epoch": 2.2455902306648574, + "grad_norm": 0.7525020456973835, + "learning_rate": 3.435814610312068e-05, + "loss": 0.2547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2762409448623657, + "step": 1655, + "valid_targets_mean": 2593.2, + "valid_targets_min": 930 + }, + { + "epoch": 2.2523744911804613, + "grad_norm": 0.6900366742315267, + "learning_rate": 3.431096119665443e-05, + "loss": 0.258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2573353052139282, + "step": 1660, + "valid_targets_mean": 2404.2, + "valid_targets_min": 832 + }, + { + "epoch": 2.259158751696065, + "grad_norm": 0.7966484389836349, + "learning_rate": 3.426361249136522e-05, + "loss": 0.2598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2512604892253876, + "step": 1665, + "valid_targets_mean": 2740.3, + "valid_targets_min": 933 + }, + { + "epoch": 2.265943012211669, + "grad_norm": 0.6267399197635503, + "learning_rate": 3.421610052919163e-05, + "loss": 0.2561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2536308467388153, + "step": 1670, + "valid_targets_mean": 2806.4, + "valid_targets_min": 666 + }, + { + "epoch": 2.2727272727272725, + "grad_norm": 0.710274498225842, + "learning_rate": 3.4168425853940865e-05, + "loss": 0.2546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23908887803554535, + "step": 1675, + "valid_targets_mean": 2732.9, + "valid_targets_min": 803 + }, + { + "epoch": 2.2795115332428764, + "grad_norm": 0.6223626538847287, + "learning_rate": 3.412058901128245e-05, + "loss": 0.2396, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23127231001853943, + "step": 1680, + "valid_targets_mean": 2821.8, + "valid_targets_min": 1270 + }, + { + "epoch": 2.2862957937584802, + "grad_norm": 0.6294319468393673, + "learning_rate": 3.407259054874206e-05, + "loss": 0.2541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2410857379436493, + "step": 1685, + "valid_targets_mean": 2965.4, + "valid_targets_min": 734 + }, + { + "epoch": 2.293080054274084, + "grad_norm": 0.8008654337703052, + "learning_rate": 3.402443101569521e-05, + "loss": 0.2499, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26027989387512207, + "step": 1690, + "valid_targets_mean": 2395.8, + "valid_targets_min": 881 + }, + { + "epoch": 2.299864314789688, + "grad_norm": 0.7218010335927416, + "learning_rate": 3.397611096336097e-05, + "loss": 0.2645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3087459206581116, + "step": 1695, + "valid_targets_mean": 2789.3, + "valid_targets_min": 820 + }, + { + "epoch": 2.306648575305292, + "grad_norm": 0.7847076898478889, + "learning_rate": 3.392763094479568e-05, + "loss": 0.2561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26217982172966003, + "step": 1700, + "valid_targets_mean": 2132.0, + "valid_targets_min": 722 + }, + { + "epoch": 2.3134328358208958, + "grad_norm": 0.7515258396714922, + "learning_rate": 3.387899151488659e-05, + "loss": 0.2671, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2646181583404541, + "step": 1705, + "valid_targets_mean": 2402.3, + "valid_targets_min": 627 + }, + { + "epoch": 2.320217096336499, + "grad_norm": 0.6310084529124328, + "learning_rate": 3.383019323034552e-05, + "loss": 0.2712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26773834228515625, + "step": 1710, + "valid_targets_mean": 3745.5, + "valid_targets_min": 1312 + }, + { + "epoch": 2.327001356852103, + "grad_norm": 0.6476866135246807, + "learning_rate": 3.378123664970252e-05, + "loss": 0.2547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23596899211406708, + "step": 1715, + "valid_targets_mean": 2782.4, + "valid_targets_min": 1037 + }, + { + "epoch": 2.333785617367707, + "grad_norm": 0.6635878932623026, + "learning_rate": 3.373212233329941e-05, + "loss": 0.268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2669086158275604, + "step": 1720, + "valid_targets_mean": 2656.4, + "valid_targets_min": 656 + }, + { + "epoch": 2.340569877883311, + "grad_norm": 0.8372787858249849, + "learning_rate": 3.368285084328343e-05, + "loss": 0.2622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27093416452407837, + "step": 1725, + "valid_targets_mean": 2007.2, + "valid_targets_min": 788 + }, + { + "epoch": 2.3473541383989147, + "grad_norm": 0.7084081316996159, + "learning_rate": 3.363342274360076e-05, + "loss": 0.2597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27293896675109863, + "step": 1730, + "valid_targets_mean": 2628.9, + "valid_targets_min": 670 + }, + { + "epoch": 2.354138398914518, + "grad_norm": 0.7361649736055962, + "learning_rate": 3.358383859999011e-05, + "loss": 0.2715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24949458241462708, + "step": 1735, + "valid_targets_mean": 2166.1, + "valid_targets_min": 682 + }, + { + "epoch": 2.360922659430122, + "grad_norm": 0.6601077278545686, + "learning_rate": 3.353409897997621e-05, + "loss": 0.2585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.252005398273468, + "step": 1740, + "valid_targets_mean": 2885.6, + "valid_targets_min": 953 + }, + { + "epoch": 2.367706919945726, + "grad_norm": 0.7833459948759297, + "learning_rate": 3.348420445286331e-05, + "loss": 0.2539, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25346460938453674, + "step": 1745, + "valid_targets_mean": 2880.2, + "valid_targets_min": 635 + }, + { + "epoch": 2.37449118046133, + "grad_norm": 0.6917728614350838, + "learning_rate": 3.3434155589728695e-05, + "loss": 0.2581, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21869820356369019, + "step": 1750, + "valid_targets_mean": 2367.4, + "valid_targets_min": 859 + }, + { + "epoch": 2.3812754409769337, + "grad_norm": 0.6338539881836546, + "learning_rate": 3.338395296341614e-05, + "loss": 0.2466, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2602981925010681, + "step": 1755, + "valid_targets_mean": 3345.6, + "valid_targets_min": 1233 + }, + { + "epoch": 2.388059701492537, + "grad_norm": 0.6645949196725213, + "learning_rate": 3.333359714852932e-05, + "loss": 0.2528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2770848870277405, + "step": 1760, + "valid_targets_mean": 2870.8, + "valid_targets_min": 699 + }, + { + "epoch": 2.394843962008141, + "grad_norm": 0.6971754384837836, + "learning_rate": 3.328308872142528e-05, + "loss": 0.2549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24311913549900055, + "step": 1765, + "valid_targets_mean": 2503.1, + "valid_targets_min": 1021 + }, + { + "epoch": 2.401628222523745, + "grad_norm": 0.6356007444165893, + "learning_rate": 3.3232428260207804e-05, + "loss": 0.2657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26365482807159424, + "step": 1770, + "valid_targets_mean": 3150.6, + "valid_targets_min": 1107 + }, + { + "epoch": 2.4084124830393487, + "grad_norm": 0.7060984927170915, + "learning_rate": 3.318161634472083e-05, + "loss": 0.2592, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27592113614082336, + "step": 1775, + "valid_targets_mean": 2829.7, + "valid_targets_min": 992 + }, + { + "epoch": 2.4151967435549526, + "grad_norm": 0.6510612732829962, + "learning_rate": 3.3130653556541774e-05, + "loss": 0.2494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24161876738071442, + "step": 1780, + "valid_targets_mean": 3045.9, + "valid_targets_min": 746 + }, + { + "epoch": 2.4219810040705565, + "grad_norm": 0.6847258251540859, + "learning_rate": 3.30795404789749e-05, + "loss": 0.264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27078160643577576, + "step": 1785, + "valid_targets_mean": 3208.2, + "valid_targets_min": 759 + }, + { + "epoch": 2.42876526458616, + "grad_norm": 0.7808291039965765, + "learning_rate": 3.302827769704464e-05, + "loss": 0.2588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25728699564933777, + "step": 1790, + "valid_targets_mean": 2815.0, + "valid_targets_min": 1277 + }, + { + "epoch": 2.435549525101764, + "grad_norm": 0.7272148707674393, + "learning_rate": 3.29768657974889e-05, + "loss": 0.25, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.258420467376709, + "step": 1795, + "valid_targets_mean": 2726.6, + "valid_targets_min": 720 + }, + { + "epoch": 2.4423337856173677, + "grad_norm": 0.7459339825138199, + "learning_rate": 3.2925305368752346e-05, + "loss": 0.2616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28670960664749146, + "step": 1800, + "valid_targets_mean": 2654.2, + "valid_targets_min": 807 + }, + { + "epoch": 2.4491180461329716, + "grad_norm": 0.7287330807026949, + "learning_rate": 3.2873597000979626e-05, + "loss": 0.2656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25203102827072144, + "step": 1805, + "valid_targets_mean": 3238.3, + "valid_targets_min": 1310 + }, + { + "epoch": 2.4559023066485755, + "grad_norm": 0.7104083422326701, + "learning_rate": 3.28217412860087e-05, + "loss": 0.2553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21682174503803253, + "step": 1810, + "valid_targets_mean": 3128.4, + "valid_targets_min": 640 + }, + { + "epoch": 2.4626865671641793, + "grad_norm": 0.6057916542411326, + "learning_rate": 3.276973881736399e-05, + "loss": 0.259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25353389978408813, + "step": 1815, + "valid_targets_mean": 3672.1, + "valid_targets_min": 1024 + }, + { + "epoch": 2.4694708276797828, + "grad_norm": 0.7923986851890575, + "learning_rate": 3.2717590190249617e-05, + "loss": 0.2632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22225558757781982, + "step": 1820, + "valid_targets_mean": 2796.2, + "valid_targets_min": 750 + }, + { + "epoch": 2.4762550881953866, + "grad_norm": 0.6652659164621177, + "learning_rate": 3.2665296001542606e-05, + "loss": 0.2598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28310340642929077, + "step": 1825, + "valid_targets_mean": 3224.8, + "valid_targets_min": 916 + }, + { + "epoch": 2.4830393487109905, + "grad_norm": 0.7654192310941611, + "learning_rate": 3.261285684978601e-05, + "loss": 0.2578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2906759977340698, + "step": 1830, + "valid_targets_mean": 2817.2, + "valid_targets_min": 867 + }, + { + "epoch": 2.4898236092265944, + "grad_norm": 0.7118429223959195, + "learning_rate": 3.256027333518209e-05, + "loss": 0.2425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2705893814563751, + "step": 1835, + "valid_targets_mean": 2739.5, + "valid_targets_min": 785 + }, + { + "epoch": 2.4966078697421983, + "grad_norm": 0.7194235047620599, + "learning_rate": 3.250754605958546e-05, + "loss": 0.2629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2895210385322571, + "step": 1840, + "valid_targets_mean": 2776.8, + "valid_targets_min": 875 + }, + { + "epoch": 2.5033921302578017, + "grad_norm": 0.7159119323725041, + "learning_rate": 3.245467562649615e-05, + "loss": 0.2531, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22497795522212982, + "step": 1845, + "valid_targets_mean": 2419.1, + "valid_targets_min": 685 + }, + { + "epoch": 2.5101763907734056, + "grad_norm": 0.6776377638938433, + "learning_rate": 3.240166264105274e-05, + "loss": 0.257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2763141989707947, + "step": 1850, + "valid_targets_mean": 2846.4, + "valid_targets_min": 713 + }, + { + "epoch": 2.5169606512890095, + "grad_norm": 0.6865655138064035, + "learning_rate": 3.234850771002542e-05, + "loss": 0.2594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24978536367416382, + "step": 1855, + "valid_targets_mean": 2978.5, + "valid_targets_min": 968 + }, + { + "epoch": 2.5237449118046134, + "grad_norm": 0.6699601179099225, + "learning_rate": 3.2295211441809043e-05, + "loss": 0.2614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25676673650741577, + "step": 1860, + "valid_targets_mean": 2917.8, + "valid_targets_min": 1075 + }, + { + "epoch": 2.5305291723202172, + "grad_norm": 0.6889764980124685, + "learning_rate": 3.224177444641616e-05, + "loss": 0.2743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3100902736186981, + "step": 1865, + "valid_targets_mean": 2546.6, + "valid_targets_min": 912 + }, + { + "epoch": 2.5373134328358207, + "grad_norm": 0.6793757318097332, + "learning_rate": 3.218819733547006e-05, + "loss": 0.2538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2357414960861206, + "step": 1870, + "valid_targets_mean": 2497.8, + "valid_targets_min": 772 + }, + { + "epoch": 2.5440976933514245, + "grad_norm": 0.8152453145431383, + "learning_rate": 3.2134480722197707e-05, + "loss": 0.262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29958784580230713, + "step": 1875, + "valid_targets_mean": 2523.8, + "valid_targets_min": 584 + }, + { + "epoch": 2.5508819538670284, + "grad_norm": 0.7495650389529989, + "learning_rate": 3.208062522142282e-05, + "loss": 0.2551, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2574930191040039, + "step": 1880, + "valid_targets_mean": 2723.4, + "valid_targets_min": 951 + }, + { + "epoch": 2.5576662143826323, + "grad_norm": 0.7714988898913233, + "learning_rate": 3.202663144955875e-05, + "loss": 0.2504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2438960075378418, + "step": 1885, + "valid_targets_mean": 2427.2, + "valid_targets_min": 1263 + }, + { + "epoch": 2.564450474898236, + "grad_norm": 0.8149406397413984, + "learning_rate": 3.1972500024601475e-05, + "loss": 0.2536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2427600771188736, + "step": 1890, + "valid_targets_mean": 2379.7, + "valid_targets_min": 648 + }, + { + "epoch": 2.5712347354138396, + "grad_norm": 0.835575869140074, + "learning_rate": 3.1918231566122467e-05, + "loss": 0.2634, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2951461672782898, + "step": 1895, + "valid_targets_mean": 2883.2, + "valid_targets_min": 691 + }, + { + "epoch": 2.578018995929444, + "grad_norm": 0.6480761129447208, + "learning_rate": 3.186382669526169e-05, + "loss": 0.2505, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25713399052619934, + "step": 1900, + "valid_targets_mean": 3203.1, + "valid_targets_min": 1042 + }, + { + "epoch": 2.5848032564450474, + "grad_norm": 0.6296775116161838, + "learning_rate": 3.180928603472041e-05, + "loss": 0.2502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24473142623901367, + "step": 1905, + "valid_targets_mean": 3100.0, + "valid_targets_min": 683 + }, + { + "epoch": 2.5915875169606513, + "grad_norm": 0.6495398797854156, + "learning_rate": 3.175461020875412e-05, + "loss": 0.2457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23860132694244385, + "step": 1910, + "valid_targets_mean": 3028.4, + "valid_targets_min": 1066 + }, + { + "epoch": 2.598371777476255, + "grad_norm": 0.6700738944682313, + "learning_rate": 3.1699799843165356e-05, + "loss": 0.2608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2498638927936554, + "step": 1915, + "valid_targets_mean": 2918.0, + "valid_targets_min": 530 + }, + { + "epoch": 2.605156037991859, + "grad_norm": 0.6577566326274685, + "learning_rate": 3.164485556529657e-05, + "loss": 0.2655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25191181898117065, + "step": 1920, + "valid_targets_mean": 2755.8, + "valid_targets_min": 693 + }, + { + "epoch": 2.611940298507463, + "grad_norm": 0.6054859630469727, + "learning_rate": 3.158977800402292e-05, + "loss": 0.2537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2440011352300644, + "step": 1925, + "valid_targets_mean": 3179.1, + "valid_targets_min": 1141 + }, + { + "epoch": 2.6187245590230663, + "grad_norm": 0.6786451801971204, + "learning_rate": 3.1534567789745084e-05, + "loss": 0.2631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2818109095096588, + "step": 1930, + "valid_targets_mean": 2949.1, + "valid_targets_min": 999 + }, + { + "epoch": 2.62550881953867, + "grad_norm": 0.6942883497416937, + "learning_rate": 3.147922555438206e-05, + "loss": 0.2538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24890519678592682, + "step": 1935, + "valid_targets_mean": 2585.8, + "valid_targets_min": 837 + }, + { + "epoch": 2.632293080054274, + "grad_norm": 0.7058789473959215, + "learning_rate": 3.14237519313639e-05, + "loss": 0.2705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2726551294326782, + "step": 1940, + "valid_targets_mean": 2646.4, + "valid_targets_min": 764 + }, + { + "epoch": 2.639077340569878, + "grad_norm": 0.6700353317554494, + "learning_rate": 3.1368147555624486e-05, + "loss": 0.2599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2708538770675659, + "step": 1945, + "valid_targets_mean": 3069.9, + "valid_targets_min": 1199 + }, + { + "epoch": 2.645861601085482, + "grad_norm": 0.7323325511038016, + "learning_rate": 3.131241306359426e-05, + "loss": 0.2517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2605024576187134, + "step": 1950, + "valid_targets_mean": 2873.1, + "valid_targets_min": 680 + }, + { + "epoch": 2.6526458616010853, + "grad_norm": 0.6879553895964564, + "learning_rate": 3.125654909319294e-05, + "loss": 0.2528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2747609615325928, + "step": 1955, + "valid_targets_mean": 3061.4, + "valid_targets_min": 865 + }, + { + "epoch": 2.659430122116689, + "grad_norm": 0.6920353888734679, + "learning_rate": 3.120055628382218e-05, + "loss": 0.2556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23946809768676758, + "step": 1960, + "valid_targets_mean": 2961.5, + "valid_targets_min": 1092 + }, + { + "epoch": 2.666214382632293, + "grad_norm": 0.6453205678625881, + "learning_rate": 3.1144435276358325e-05, + "loss": 0.2582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27227306365966797, + "step": 1965, + "valid_targets_mean": 3088.8, + "valid_targets_min": 1145 + }, + { + "epoch": 2.672998643147897, + "grad_norm": 0.6921138419734034, + "learning_rate": 3.1088186713145024e-05, + "loss": 0.2586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25944316387176514, + "step": 1970, + "valid_targets_mean": 2601.7, + "valid_targets_min": 762 + }, + { + "epoch": 2.679782903663501, + "grad_norm": 0.6778135167815581, + "learning_rate": 3.103181123798587e-05, + "loss": 0.2501, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24725311994552612, + "step": 1975, + "valid_targets_mean": 2674.5, + "valid_targets_min": 869 + }, + { + "epoch": 2.6865671641791042, + "grad_norm": 0.7351183532910395, + "learning_rate": 3.0975309496137066e-05, + "loss": 0.2567, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25560200214385986, + "step": 1980, + "valid_targets_mean": 2353.3, + "valid_targets_min": 538 + }, + { + "epoch": 2.693351424694708, + "grad_norm": 0.6182324179687427, + "learning_rate": 3.091868213430004e-05, + "loss": 0.2587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22743821144104004, + "step": 1985, + "valid_targets_mean": 2698.8, + "valid_targets_min": 701 + }, + { + "epoch": 2.700135685210312, + "grad_norm": 0.7363935013486361, + "learning_rate": 3.086192980061399e-05, + "loss": 0.2633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27139535546302795, + "step": 1990, + "valid_targets_mean": 2369.6, + "valid_targets_min": 1089 + }, + { + "epoch": 2.706919945725916, + "grad_norm": 0.6440601737571228, + "learning_rate": 3.080505314464854e-05, + "loss": 0.247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23245973885059357, + "step": 1995, + "valid_targets_mean": 2775.4, + "valid_targets_min": 810 + }, + { + "epoch": 2.7137042062415198, + "grad_norm": 0.6702282934429393, + "learning_rate": 3.0748052817396254e-05, + "loss": 0.2568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2478809952735901, + "step": 2000, + "valid_targets_mean": 2646.8, + "valid_targets_min": 1399 + }, + { + "epoch": 2.7204884667571236, + "grad_norm": 0.6800473944399235, + "learning_rate": 3.0690929471265185e-05, + "loss": 0.2654, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24637192487716675, + "step": 2005, + "valid_targets_mean": 2696.4, + "valid_targets_min": 793 + }, + { + "epoch": 2.7272727272727275, + "grad_norm": 0.6303354724046744, + "learning_rate": 3.063368376007145e-05, + "loss": 0.2606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26109838485717773, + "step": 2010, + "valid_targets_mean": 3292.2, + "valid_targets_min": 1629 + }, + { + "epoch": 2.734056987788331, + "grad_norm": 0.6765904914853859, + "learning_rate": 3.057631633903167e-05, + "loss": 0.2505, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24430710077285767, + "step": 2015, + "valid_targets_mean": 2731.6, + "valid_targets_min": 746 + }, + { + "epoch": 2.740841248303935, + "grad_norm": 0.6432972439515613, + "learning_rate": 3.05188278647556e-05, + "loss": 0.2501, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23854488134384155, + "step": 2020, + "valid_targets_mean": 2842.9, + "valid_targets_min": 1268 + }, + { + "epoch": 2.7476255088195387, + "grad_norm": 0.6577825459454796, + "learning_rate": 3.0461218995238453e-05, + "loss": 0.2594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.254770427942276, + "step": 2025, + "valid_targets_mean": 2721.9, + "valid_targets_min": 1257 + }, + { + "epoch": 2.7544097693351426, + "grad_norm": 0.6949351773395782, + "learning_rate": 3.0403490389853484e-05, + "loss": 0.2488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23568594455718994, + "step": 2030, + "valid_targets_mean": 2796.5, + "valid_targets_min": 1147 + }, + { + "epoch": 2.7611940298507465, + "grad_norm": 0.7323091372332017, + "learning_rate": 3.034564270934442e-05, + "loss": 0.2481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26838138699531555, + "step": 2035, + "valid_targets_mean": 2450.1, + "valid_targets_min": 627 + }, + { + "epoch": 2.76797829036635, + "grad_norm": 0.7072050298374907, + "learning_rate": 3.0287676615817854e-05, + "loss": 0.2588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2603970468044281, + "step": 2040, + "valid_targets_mean": 2724.6, + "valid_targets_min": 662 + }, + { + "epoch": 2.774762550881954, + "grad_norm": 0.7013954312412192, + "learning_rate": 3.0229592772735717e-05, + "loss": 0.2375, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2462655007839203, + "step": 2045, + "valid_targets_mean": 2669.0, + "valid_targets_min": 796 + }, + { + "epoch": 2.7815468113975577, + "grad_norm": 0.7194087010991239, + "learning_rate": 3.0171391844907663e-05, + "loss": 0.2455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22803866863250732, + "step": 2050, + "valid_targets_mean": 2516.1, + "valid_targets_min": 1199 + }, + { + "epoch": 2.7883310719131615, + "grad_norm": 0.6532424708668592, + "learning_rate": 3.011307449848345e-05, + "loss": 0.2669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25693702697753906, + "step": 2055, + "valid_targets_mean": 2905.6, + "valid_targets_min": 981 + }, + { + "epoch": 2.7951153324287654, + "grad_norm": 0.6019841138198014, + "learning_rate": 3.0054641400945354e-05, + "loss": 0.2691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2629098892211914, + "step": 2060, + "valid_targets_mean": 3336.9, + "valid_targets_min": 1009 + }, + { + "epoch": 2.801899592944369, + "grad_norm": 0.590938009064258, + "learning_rate": 2.9996093221100468e-05, + "loss": 0.2546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24467141926288605, + "step": 2065, + "valid_targets_mean": 3226.9, + "valid_targets_min": 597 + }, + { + "epoch": 2.8086838534599727, + "grad_norm": 0.6853913375855859, + "learning_rate": 2.993743062907311e-05, + "loss": 0.2773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3042706549167633, + "step": 2070, + "valid_targets_mean": 3042.9, + "valid_targets_min": 1170 + }, + { + "epoch": 2.8154681139755766, + "grad_norm": 0.6566966730503214, + "learning_rate": 2.9878654296297112e-05, + "loss": 0.259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26403605937957764, + "step": 2075, + "valid_targets_mean": 3035.4, + "valid_targets_min": 1044 + }, + { + "epoch": 2.8222523744911805, + "grad_norm": 0.7543020252020352, + "learning_rate": 2.981976489550814e-05, + "loss": 0.2571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24550700187683105, + "step": 2080, + "valid_targets_mean": 2845.9, + "valid_targets_min": 1216 + }, + { + "epoch": 2.8290366350067844, + "grad_norm": 0.7093980696165589, + "learning_rate": 2.9760763100736016e-05, + "loss": 0.2486, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2348649501800537, + "step": 2085, + "valid_targets_mean": 2346.6, + "valid_targets_min": 692 + }, + { + "epoch": 2.835820895522388, + "grad_norm": 0.6549172444022449, + "learning_rate": 2.970164958729698e-05, + "loss": 0.2377, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21750161051750183, + "step": 2090, + "valid_targets_mean": 2408.1, + "valid_targets_min": 799 + }, + { + "epoch": 2.842605156037992, + "grad_norm": 0.8065514310435659, + "learning_rate": 2.964242503178597e-05, + "loss": 0.2622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26003286242485046, + "step": 2095, + "valid_targets_mean": 1907.9, + "valid_targets_min": 793 + }, + { + "epoch": 2.8493894165535956, + "grad_norm": 0.578124344699749, + "learning_rate": 2.958309011206888e-05, + "loss": 0.2461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2605810761451721, + "step": 2100, + "valid_targets_mean": 3794.0, + "valid_targets_min": 1360 + }, + { + "epoch": 2.8561736770691994, + "grad_norm": 0.6706934642907637, + "learning_rate": 2.9523645507274798e-05, + "loss": 0.2416, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2569592297077179, + "step": 2105, + "valid_targets_mean": 2656.1, + "valid_targets_min": 1266 + }, + { + "epoch": 2.8629579375848033, + "grad_norm": 0.7102884408604969, + "learning_rate": 2.9464091897788232e-05, + "loss": 0.2548, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27441930770874023, + "step": 2110, + "valid_targets_mean": 2542.4, + "valid_targets_min": 821 + }, + { + "epoch": 2.869742198100407, + "grad_norm": 0.6396640888363082, + "learning_rate": 2.940442996524132e-05, + "loss": 0.2529, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28722479939460754, + "step": 2115, + "valid_targets_mean": 3116.9, + "valid_targets_min": 1036 + }, + { + "epoch": 2.876526458616011, + "grad_norm": 0.8432323434605307, + "learning_rate": 2.9344660392506043e-05, + "loss": 0.2592, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28614795207977295, + "step": 2120, + "valid_targets_mean": 3104.6, + "valid_targets_min": 818 + }, + { + "epoch": 2.8833107191316145, + "grad_norm": 0.6395419218584879, + "learning_rate": 2.9284783863686382e-05, + "loss": 0.2581, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2634587287902832, + "step": 2125, + "valid_targets_mean": 3117.6, + "valid_targets_min": 760 + }, + { + "epoch": 2.8900949796472184, + "grad_norm": 0.5718851345375481, + "learning_rate": 2.922480106411053e-05, + "loss": 0.2697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2612381875514984, + "step": 2130, + "valid_targets_mean": 3635.0, + "valid_targets_min": 761 + }, + { + "epoch": 2.8968792401628223, + "grad_norm": 0.6528582956501181, + "learning_rate": 2.9164712680322984e-05, + "loss": 0.2462, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2157207727432251, + "step": 2135, + "valid_targets_mean": 2491.8, + "valid_targets_min": 765 + }, + { + "epoch": 2.903663500678426, + "grad_norm": 0.8098720111841451, + "learning_rate": 2.9104519400076766e-05, + "loss": 0.264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25750282406806946, + "step": 2140, + "valid_targets_mean": 2004.3, + "valid_targets_min": 743 + }, + { + "epoch": 2.91044776119403, + "grad_norm": 0.6201356506537019, + "learning_rate": 2.9044221912325486e-05, + "loss": 0.2483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2485671043395996, + "step": 2145, + "valid_targets_mean": 3226.1, + "valid_targets_min": 697 + }, + { + "epoch": 2.9172320217096335, + "grad_norm": 0.6786949165909485, + "learning_rate": 2.8983820907215475e-05, + "loss": 0.2546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21794003248214722, + "step": 2150, + "valid_targets_mean": 2685.1, + "valid_targets_min": 803 + }, + { + "epoch": 2.9240162822252374, + "grad_norm": 0.6196782492846893, + "learning_rate": 2.8923317076077915e-05, + "loss": 0.2589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.259111225605011, + "step": 2155, + "valid_targets_mean": 3168.1, + "valid_targets_min": 1185 + }, + { + "epoch": 2.9308005427408412, + "grad_norm": 0.6506882542165565, + "learning_rate": 2.8862711111420893e-05, + "loss": 0.2461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25076574087142944, + "step": 2160, + "valid_targets_mean": 3034.2, + "valid_targets_min": 1049 + }, + { + "epoch": 2.937584803256445, + "grad_norm": 0.6853337250211714, + "learning_rate": 2.8802003706921468e-05, + "loss": 0.2427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2712341547012329, + "step": 2165, + "valid_targets_mean": 2867.3, + "valid_targets_min": 786 + }, + { + "epoch": 2.944369063772049, + "grad_norm": 0.7641884011034288, + "learning_rate": 2.8741195557417775e-05, + "loss": 0.2627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2769216001033783, + "step": 2170, + "valid_targets_mean": 2302.2, + "valid_targets_min": 705 + }, + { + "epoch": 2.9511533242876524, + "grad_norm": 0.6826360549379976, + "learning_rate": 2.8680287358901028e-05, + "loss": 0.2553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24993981420993805, + "step": 2175, + "valid_targets_mean": 2850.9, + "valid_targets_min": 761 + }, + { + "epoch": 2.9579375848032563, + "grad_norm": 0.7221044998473106, + "learning_rate": 2.861927980850757e-05, + "loss": 0.2556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25909918546676636, + "step": 2180, + "valid_targets_mean": 2386.6, + "valid_targets_min": 671 + }, + { + "epoch": 2.96472184531886, + "grad_norm": 0.7377714080476292, + "learning_rate": 2.8558173604510907e-05, + "loss": 0.2368, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22513189911842346, + "step": 2185, + "valid_targets_mean": 2357.4, + "valid_targets_min": 634 + }, + { + "epoch": 2.971506105834464, + "grad_norm": 0.5623921608095559, + "learning_rate": 2.849696944631369e-05, + "loss": 0.2559, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26851212978363037, + "step": 2190, + "valid_targets_mean": 4109.2, + "valid_targets_min": 1134 + }, + { + "epoch": 2.978290366350068, + "grad_norm": 0.6103524586471685, + "learning_rate": 2.8435668034439738e-05, + "loss": 0.2564, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2774103879928589, + "step": 2195, + "valid_targets_mean": 3372.1, + "valid_targets_min": 1052 + }, + { + "epoch": 2.9850746268656714, + "grad_norm": 0.6165970882379855, + "learning_rate": 2.8374270070525988e-05, + "loss": 0.2509, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23035350441932678, + "step": 2200, + "valid_targets_mean": 3327.9, + "valid_targets_min": 857 + }, + { + "epoch": 2.9918588873812757, + "grad_norm": 0.7660874337227206, + "learning_rate": 2.8312776257314497e-05, + "loss": 0.2542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2771637737751007, + "step": 2205, + "valid_targets_mean": 2188.8, + "valid_targets_min": 712 + }, + { + "epoch": 2.998643147896879, + "grad_norm": 0.678819967572549, + "learning_rate": 2.8251187298644372e-05, + "loss": 0.2513, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25125089287757874, + "step": 2210, + "valid_targets_mean": 2593.7, + "valid_targets_min": 826 + }, + { + "epoch": 3.005427408412483, + "grad_norm": 0.6195799648166698, + "learning_rate": 2.818950389944374e-05, + "loss": 0.2267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21490924060344696, + "step": 2215, + "valid_targets_mean": 3076.2, + "valid_targets_min": 1375 + }, + { + "epoch": 3.012211668928087, + "grad_norm": 0.741799685467653, + "learning_rate": 2.812772676572165e-05, + "loss": 0.2295, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2147355079650879, + "step": 2220, + "valid_targets_mean": 2800.7, + "valid_targets_min": 842 + }, + { + "epoch": 3.0189959294436908, + "grad_norm": 0.6481778989984706, + "learning_rate": 2.806585660456001e-05, + "loss": 0.2233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21204230189323425, + "step": 2225, + "valid_targets_mean": 3253.1, + "valid_targets_min": 1058 + }, + { + "epoch": 3.0257801899592947, + "grad_norm": 0.7528798552940739, + "learning_rate": 2.8003894124105494e-05, + "loss": 0.2308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24220474064350128, + "step": 2230, + "valid_targets_mean": 2921.9, + "valid_targets_min": 1386 + }, + { + "epoch": 3.032564450474898, + "grad_norm": 0.7539503673228278, + "learning_rate": 2.794184003356144e-05, + "loss": 0.2344, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2737279236316681, + "step": 2235, + "valid_targets_mean": 2459.6, + "valid_targets_min": 938 + }, + { + "epoch": 3.039348710990502, + "grad_norm": 0.6860093065024846, + "learning_rate": 2.787969504317972e-05, + "loss": 0.2326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23056598007678986, + "step": 2240, + "valid_targets_mean": 3120.1, + "valid_targets_min": 700 + }, + { + "epoch": 3.046132971506106, + "grad_norm": 0.6742437476536095, + "learning_rate": 2.7817459864252606e-05, + "loss": 0.2293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2076399177312851, + "step": 2245, + "valid_targets_mean": 2751.0, + "valid_targets_min": 716 + }, + { + "epoch": 3.0529172320217097, + "grad_norm": 0.6601870684447021, + "learning_rate": 2.7755135209104673e-05, + "loss": 0.2115, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21040399372577667, + "step": 2250, + "valid_targets_mean": 2970.5, + "valid_targets_min": 1266 + }, + { + "epoch": 3.0597014925373136, + "grad_norm": 0.7655435099255181, + "learning_rate": 2.769272179108458e-05, + "loss": 0.2252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24985858798027039, + "step": 2255, + "valid_targets_mean": 3437.8, + "valid_targets_min": 1114 + }, + { + "epoch": 3.066485753052917, + "grad_norm": 0.7149714320640186, + "learning_rate": 2.7630220324556947e-05, + "loss": 0.2163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21348509192466736, + "step": 2260, + "valid_targets_mean": 2941.0, + "valid_targets_min": 1039 + }, + { + "epoch": 3.073270013568521, + "grad_norm": 0.7792687892843089, + "learning_rate": 2.756763152489418e-05, + "loss": 0.2094, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20965218544006348, + "step": 2265, + "valid_targets_mean": 1918.6, + "valid_targets_min": 822 + }, + { + "epoch": 3.080054274084125, + "grad_norm": 0.6597990538607253, + "learning_rate": 2.7504956108468262e-05, + "loss": 0.2239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24037772417068481, + "step": 2270, + "valid_targets_mean": 3295.2, + "valid_targets_min": 1204 + }, + { + "epoch": 3.0868385345997287, + "grad_norm": 0.7852404129824124, + "learning_rate": 2.7442194792642576e-05, + "loss": 0.228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26370474696159363, + "step": 2275, + "valid_targets_mean": 2461.3, + "valid_targets_min": 1207 + }, + { + "epoch": 3.0936227951153326, + "grad_norm": 0.6324909016089244, + "learning_rate": 2.737934829576367e-05, + "loss": 0.2228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19589462876319885, + "step": 2280, + "valid_targets_mean": 2882.0, + "valid_targets_min": 1016 + }, + { + "epoch": 3.1004070556309364, + "grad_norm": 0.7132616114200901, + "learning_rate": 2.7316417337153053e-05, + "loss": 0.2263, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21955709159374237, + "step": 2285, + "valid_targets_mean": 2424.4, + "valid_targets_min": 744 + }, + { + "epoch": 3.10719131614654, + "grad_norm": 0.6627034885187564, + "learning_rate": 2.7253402637098963e-05, + "loss": 0.2212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2161223590373993, + "step": 2290, + "valid_targets_mean": 2994.9, + "valid_targets_min": 1254 + }, + { + "epoch": 3.1139755766621438, + "grad_norm": 0.7133154934701709, + "learning_rate": 2.7190304916848114e-05, + "loss": 0.2225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22376814484596252, + "step": 2295, + "valid_targets_mean": 2787.6, + "valid_targets_min": 952 + }, + { + "epoch": 3.1207598371777476, + "grad_norm": 0.7291074383267475, + "learning_rate": 2.712712489859743e-05, + "loss": 0.222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23834912478923798, + "step": 2300, + "valid_targets_mean": 2901.0, + "valid_targets_min": 615 + }, + { + "epoch": 3.1275440976933515, + "grad_norm": 0.7568349432173351, + "learning_rate": 2.706386330548581e-05, + "loss": 0.2084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19825339317321777, + "step": 2305, + "valid_targets_mean": 2677.8, + "valid_targets_min": 967 + }, + { + "epoch": 3.1343283582089554, + "grad_norm": 0.7241979030877912, + "learning_rate": 2.7000520861585825e-05, + "loss": 0.2177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22395247220993042, + "step": 2310, + "valid_targets_mean": 2555.9, + "valid_targets_min": 890 + }, + { + "epoch": 3.141112618724559, + "grad_norm": 0.7176875343032588, + "learning_rate": 2.693709829189544e-05, + "loss": 0.2191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21689847111701965, + "step": 2315, + "valid_targets_mean": 2447.6, + "valid_targets_min": 865 + }, + { + "epoch": 3.1478968792401627, + "grad_norm": 0.8624810704747251, + "learning_rate": 2.6873596322329717e-05, + "loss": 0.2307, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2509344816207886, + "step": 2320, + "valid_targets_mean": 1913.9, + "valid_targets_min": 676 + }, + { + "epoch": 3.1546811397557666, + "grad_norm": 0.6434418388849583, + "learning_rate": 2.6810015679712505e-05, + "loss": 0.2149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22354134917259216, + "step": 2325, + "valid_targets_mean": 3262.2, + "valid_targets_min": 1113 + }, + { + "epoch": 3.1614654002713705, + "grad_norm": 0.7907645012767848, + "learning_rate": 2.6746357091768116e-05, + "loss": 0.2232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22720026969909668, + "step": 2330, + "valid_targets_mean": 2312.9, + "valid_targets_min": 774 + }, + { + "epoch": 3.1682496607869743, + "grad_norm": 0.7095701119398697, + "learning_rate": 2.6682621287113e-05, + "loss": 0.2241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23232200741767883, + "step": 2335, + "valid_targets_mean": 2940.6, + "valid_targets_min": 1009 + }, + { + "epoch": 3.175033921302578, + "grad_norm": 0.6882795435130347, + "learning_rate": 2.6618808995247408e-05, + "loss": 0.2196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22832772135734558, + "step": 2340, + "valid_targets_mean": 2938.8, + "valid_targets_min": 888 + }, + { + "epoch": 3.1818181818181817, + "grad_norm": 0.6135099997369059, + "learning_rate": 2.6554920946547044e-05, + "loss": 0.2113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23073089122772217, + "step": 2345, + "valid_targets_mean": 3286.8, + "valid_targets_min": 768 + }, + { + "epoch": 3.1886024423337855, + "grad_norm": 0.9423077941270365, + "learning_rate": 2.649095787225469e-05, + "loss": 0.2231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2254088819026947, + "step": 2350, + "valid_targets_mean": 2568.0, + "valid_targets_min": 1236 + }, + { + "epoch": 3.1953867028493894, + "grad_norm": 0.810975408154495, + "learning_rate": 2.6426920504471865e-05, + "loss": 0.225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21835219860076904, + "step": 2355, + "valid_targets_mean": 2296.8, + "valid_targets_min": 692 + }, + { + "epoch": 3.2021709633649933, + "grad_norm": 0.672655439805317, + "learning_rate": 2.636280957615041e-05, + "loss": 0.2103, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22122296690940857, + "step": 2360, + "valid_targets_mean": 2792.6, + "valid_targets_min": 802 + }, + { + "epoch": 3.208955223880597, + "grad_norm": 0.7434847650835706, + "learning_rate": 2.6298625821084128e-05, + "loss": 0.2201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21797284483909607, + "step": 2365, + "valid_targets_mean": 2498.4, + "valid_targets_min": 1153 + }, + { + "epoch": 3.2157394843962006, + "grad_norm": 0.6570222965023365, + "learning_rate": 2.6234369973900372e-05, + "loss": 0.2208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2033863663673401, + "step": 2370, + "valid_targets_mean": 2630.6, + "valid_targets_min": 770 + }, + { + "epoch": 3.2225237449118045, + "grad_norm": 0.8657617678949259, + "learning_rate": 2.6170042770051635e-05, + "loss": 0.228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.225327730178833, + "step": 2375, + "valid_targets_mean": 1854.9, + "valid_targets_min": 732 + }, + { + "epoch": 3.2293080054274084, + "grad_norm": 0.7046910890483484, + "learning_rate": 2.610564494580714e-05, + "loss": 0.2241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22602598369121552, + "step": 2380, + "valid_targets_mean": 2542.6, + "valid_targets_min": 742 + }, + { + "epoch": 3.2360922659430122, + "grad_norm": 0.6740596140779037, + "learning_rate": 2.604117723824441e-05, + "loss": 0.2288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23400309681892395, + "step": 2385, + "valid_targets_mean": 3017.3, + "valid_targets_min": 760 + }, + { + "epoch": 3.242876526458616, + "grad_norm": 0.7404070619144895, + "learning_rate": 2.597664038524083e-05, + "loss": 0.2283, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2237531542778015, + "step": 2390, + "valid_targets_mean": 2550.8, + "valid_targets_min": 1095 + }, + { + "epoch": 3.24966078697422, + "grad_norm": 0.6731318456534926, + "learning_rate": 2.5912035125465208e-05, + "loss": 0.2131, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2236143946647644, + "step": 2395, + "valid_targets_mean": 2946.1, + "valid_targets_min": 800 + }, + { + "epoch": 3.2564450474898234, + "grad_norm": 0.7464562402100164, + "learning_rate": 2.5847362198369297e-05, + "loss": 0.2262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2542741298675537, + "step": 2400, + "valid_targets_mean": 2603.9, + "valid_targets_min": 746 + }, + { + "epoch": 3.2632293080054273, + "grad_norm": 0.7388159380991954, + "learning_rate": 2.578262234417937e-05, + "loss": 0.2202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21536928415298462, + "step": 2405, + "valid_targets_mean": 2555.2, + "valid_targets_min": 974 + }, + { + "epoch": 3.270013568521031, + "grad_norm": 0.7355746785542672, + "learning_rate": 2.5717816303887703e-05, + "loss": 0.2224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21950095891952515, + "step": 2410, + "valid_targets_mean": 2990.9, + "valid_targets_min": 996 + }, + { + "epoch": 3.276797829036635, + "grad_norm": 0.7517171398028017, + "learning_rate": 2.565294481924415e-05, + "loss": 0.2181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20042955875396729, + "step": 2415, + "valid_targets_mean": 2606.8, + "valid_targets_min": 799 + }, + { + "epoch": 3.283582089552239, + "grad_norm": 0.7068185853149664, + "learning_rate": 2.5588008632747593e-05, + "loss": 0.2311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22197219729423523, + "step": 2420, + "valid_targets_mean": 3084.1, + "valid_targets_min": 825 + }, + { + "epoch": 3.290366350067843, + "grad_norm": 0.7279583182044714, + "learning_rate": 2.5523008487637482e-05, + "loss": 0.2259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2149057388305664, + "step": 2425, + "valid_targets_mean": 2391.9, + "valid_targets_min": 734 + }, + { + "epoch": 3.2971506105834463, + "grad_norm": 0.6422722168754591, + "learning_rate": 2.5457945127885318e-05, + "loss": 0.2204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24687312543392181, + "step": 2430, + "valid_targets_mean": 3709.7, + "valid_targets_min": 952 + }, + { + "epoch": 3.30393487109905, + "grad_norm": 0.6945878436621437, + "learning_rate": 2.539281929818614e-05, + "loss": 0.2212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2088148444890976, + "step": 2435, + "valid_targets_mean": 2683.1, + "valid_targets_min": 797 + }, + { + "epoch": 3.310719131614654, + "grad_norm": 0.8469589878253486, + "learning_rate": 2.5327631743949982e-05, + "loss": 0.2232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22623956203460693, + "step": 2440, + "valid_targets_mean": 2050.2, + "valid_targets_min": 625 + }, + { + "epoch": 3.317503392130258, + "grad_norm": 0.7039682068648079, + "learning_rate": 2.5262383211293386e-05, + "loss": 0.2256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22464679181575775, + "step": 2445, + "valid_targets_mean": 3066.5, + "valid_targets_min": 835 + }, + { + "epoch": 3.324287652645862, + "grad_norm": 0.6549956329113884, + "learning_rate": 2.51970744470308e-05, + "loss": 0.2345, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25542882084846497, + "step": 2450, + "valid_targets_mean": 3588.2, + "valid_targets_min": 741 + }, + { + "epoch": 3.3310719131614652, + "grad_norm": 0.6922641541400616, + "learning_rate": 2.5131706198666104e-05, + "loss": 0.2177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21417692303657532, + "step": 2455, + "valid_targets_mean": 2714.5, + "valid_targets_min": 933 + }, + { + "epoch": 3.337856173677069, + "grad_norm": 0.6313775940838555, + "learning_rate": 2.506627921438397e-05, + "loss": 0.2214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20802749693393707, + "step": 2460, + "valid_targets_mean": 3045.8, + "valid_targets_min": 852 + }, + { + "epoch": 3.344640434192673, + "grad_norm": 0.6419716617358177, + "learning_rate": 2.5000794243041386e-05, + "loss": 0.2146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20835299789905548, + "step": 2465, + "valid_targets_mean": 2813.1, + "valid_targets_min": 734 + }, + { + "epoch": 3.351424694708277, + "grad_norm": 0.6155938516766296, + "learning_rate": 2.4935252034159016e-05, + "loss": 0.2169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23357100784778595, + "step": 2470, + "valid_targets_mean": 3461.4, + "valid_targets_min": 1589 + }, + { + "epoch": 3.3582089552238807, + "grad_norm": 0.6931971140884506, + "learning_rate": 2.4869653337912652e-05, + "loss": 0.2246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23858122527599335, + "step": 2475, + "valid_targets_mean": 3166.2, + "valid_targets_min": 1026 + }, + { + "epoch": 3.364993215739484, + "grad_norm": 0.7398849431239813, + "learning_rate": 2.480399890512462e-05, + "loss": 0.2195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24428343772888184, + "step": 2480, + "valid_targets_mean": 2540.5, + "valid_targets_min": 1101 + }, + { + "epoch": 3.371777476255088, + "grad_norm": 0.7128166603516436, + "learning_rate": 2.4738289487255198e-05, + "loss": 0.2252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22879432141780853, + "step": 2485, + "valid_targets_mean": 2861.1, + "valid_targets_min": 1266 + }, + { + "epoch": 3.378561736770692, + "grad_norm": 0.7552252749221059, + "learning_rate": 2.4672525836394003e-05, + "loss": 0.227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22698278725147247, + "step": 2490, + "valid_targets_mean": 2579.4, + "valid_targets_min": 889 + }, + { + "epoch": 3.385345997286296, + "grad_norm": 0.8012197815964346, + "learning_rate": 2.4606708705251383e-05, + "loss": 0.2134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23777778446674347, + "step": 2495, + "valid_targets_mean": 2214.4, + "valid_targets_min": 873 + }, + { + "epoch": 3.3921302578018997, + "grad_norm": 0.7100109651223878, + "learning_rate": 2.4540838847149805e-05, + "loss": 0.2169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2334587723016739, + "step": 2500, + "valid_targets_mean": 2754.7, + "valid_targets_min": 1320 + }, + { + "epoch": 3.3989145183175036, + "grad_norm": 0.7594298936519028, + "learning_rate": 2.4474917016015233e-05, + "loss": 0.2259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21707573533058167, + "step": 2505, + "valid_targets_mean": 2143.0, + "valid_targets_min": 538 + }, + { + "epoch": 3.405698778833107, + "grad_norm": 0.6554539535978531, + "learning_rate": 2.4408943966368502e-05, + "loss": 0.232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22429423034191132, + "step": 2510, + "valid_targets_mean": 3002.5, + "valid_targets_min": 698 + }, + { + "epoch": 3.412483039348711, + "grad_norm": 0.5678144037578754, + "learning_rate": 2.4342920453316677e-05, + "loss": 0.2226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21168120205402374, + "step": 2515, + "valid_targets_mean": 4258.6, + "valid_targets_min": 839 + }, + { + "epoch": 3.4192672998643148, + "grad_norm": 1.1608859512029313, + "learning_rate": 2.42768472325444e-05, + "loss": 0.2278, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2512350082397461, + "step": 2520, + "valid_targets_mean": 2730.8, + "valid_targets_min": 1116 + }, + { + "epoch": 3.4260515603799186, + "grad_norm": 0.6794646407442558, + "learning_rate": 2.4210725060305277e-05, + "loss": 0.2264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23531343042850494, + "step": 2525, + "valid_targets_mean": 3514.7, + "valid_targets_min": 1334 + }, + { + "epoch": 3.4328358208955225, + "grad_norm": 0.6399640137952785, + "learning_rate": 2.4144554693413165e-05, + "loss": 0.2266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22536537051200867, + "step": 2530, + "valid_targets_mean": 3192.2, + "valid_targets_min": 960 + }, + { + "epoch": 3.4396200814111264, + "grad_norm": 0.6662119295193443, + "learning_rate": 2.407833688923357e-05, + "loss": 0.2236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22147254645824432, + "step": 2535, + "valid_targets_mean": 2992.2, + "valid_targets_min": 1501 + }, + { + "epoch": 3.44640434192673, + "grad_norm": 0.6891237720043712, + "learning_rate": 2.401207240567493e-05, + "loss": 0.2241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20989638566970825, + "step": 2540, + "valid_targets_mean": 2869.4, + "valid_targets_min": 810 + }, + { + "epoch": 3.4531886024423337, + "grad_norm": 0.5733593611937268, + "learning_rate": 2.3945762001179988e-05, + "loss": 0.2258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20587453246116638, + "step": 2545, + "valid_targets_mean": 3843.9, + "valid_targets_min": 1250 + }, + { + "epoch": 3.4599728629579376, + "grad_norm": 0.7227077896335035, + "learning_rate": 2.3879406434717048e-05, + "loss": 0.2404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2688782811164856, + "step": 2550, + "valid_targets_mean": 3092.1, + "valid_targets_min": 800 + }, + { + "epoch": 3.4667571234735415, + "grad_norm": 0.729804276811994, + "learning_rate": 2.381300646577135e-05, + "loss": 0.223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2293173372745514, + "step": 2555, + "valid_targets_mean": 2722.3, + "valid_targets_min": 725 + }, + { + "epoch": 3.4735413839891454, + "grad_norm": 0.7089336749170233, + "learning_rate": 2.3746562854336347e-05, + "loss": 0.2319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24235443770885468, + "step": 2560, + "valid_targets_mean": 2771.2, + "valid_targets_min": 795 + }, + { + "epoch": 3.480325644504749, + "grad_norm": 0.7144987230267648, + "learning_rate": 2.3680076360904996e-05, + "loss": 0.2172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22750988602638245, + "step": 2565, + "valid_targets_mean": 2600.1, + "valid_targets_min": 686 + }, + { + "epoch": 3.4871099050203527, + "grad_norm": 0.6752212573282049, + "learning_rate": 2.3613547746461082e-05, + "loss": 0.2304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24150076508522034, + "step": 2570, + "valid_targets_mean": 3270.1, + "valid_targets_min": 874 + }, + { + "epoch": 3.4938941655359566, + "grad_norm": 0.7590308205931211, + "learning_rate": 2.3546977772470494e-05, + "loss": 0.2309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22371584177017212, + "step": 2575, + "valid_targets_mean": 2490.2, + "valid_targets_min": 810 + }, + { + "epoch": 3.5006784260515604, + "grad_norm": 0.7870240421440323, + "learning_rate": 2.3480367200872504e-05, + "loss": 0.2228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23015907406806946, + "step": 2580, + "valid_targets_mean": 2033.2, + "valid_targets_min": 1066 + }, + { + "epoch": 3.5074626865671643, + "grad_norm": 0.923419749026049, + "learning_rate": 2.341371679407106e-05, + "loss": 0.2171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.207190603017807, + "step": 2585, + "valid_targets_mean": 2550.9, + "valid_targets_min": 822 + }, + { + "epoch": 3.5142469470827677, + "grad_norm": 0.7046771506431182, + "learning_rate": 2.3347027314926032e-05, + "loss": 0.2174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22681497037410736, + "step": 2590, + "valid_targets_mean": 2633.1, + "valid_targets_min": 719 + }, + { + "epoch": 3.5210312075983716, + "grad_norm": 0.6097178720133104, + "learning_rate": 2.328029952674452e-05, + "loss": 0.2303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21856877207756042, + "step": 2595, + "valid_targets_mean": 4060.4, + "valid_targets_min": 1722 + }, + { + "epoch": 3.5278154681139755, + "grad_norm": 0.7646075625057801, + "learning_rate": 2.321353419327209e-05, + "loss": 0.2212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2248585820198059, + "step": 2600, + "valid_targets_mean": 2429.2, + "valid_targets_min": 810 + }, + { + "epoch": 3.5345997286295794, + "grad_norm": 0.6929279550589781, + "learning_rate": 2.314673207868404e-05, + "loss": 0.2298, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24431878328323364, + "step": 2605, + "valid_targets_mean": 2962.9, + "valid_targets_min": 655 + }, + { + "epoch": 3.5413839891451833, + "grad_norm": 0.6480909726367313, + "learning_rate": 2.307989394757665e-05, + "loss": 0.2141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22717615962028503, + "step": 2610, + "valid_targets_mean": 3731.4, + "valid_targets_min": 1001 + }, + { + "epoch": 3.5481682496607867, + "grad_norm": 0.7186879086898086, + "learning_rate": 2.301302056495845e-05, + "loss": 0.2204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2136261910200119, + "step": 2615, + "valid_targets_mean": 2418.2, + "valid_targets_min": 705 + }, + { + "epoch": 3.554952510176391, + "grad_norm": 0.806809670872252, + "learning_rate": 2.2946112696241414e-05, + "loss": 0.2191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22997555136680603, + "step": 2620, + "valid_targets_mean": 2120.0, + "valid_targets_min": 680 + }, + { + "epoch": 3.5617367706919945, + "grad_norm": 0.6741591468113703, + "learning_rate": 2.2879171107232274e-05, + "loss": 0.2183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2275179922580719, + "step": 2625, + "valid_targets_mean": 3001.9, + "valid_targets_min": 1175 + }, + { + "epoch": 3.5685210312075983, + "grad_norm": 0.7318902025413816, + "learning_rate": 2.2812196564123683e-05, + "loss": 0.2223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21598154306411743, + "step": 2630, + "valid_targets_mean": 2405.4, + "valid_targets_min": 986 + }, + { + "epoch": 3.575305291723202, + "grad_norm": 0.7506970585357591, + "learning_rate": 2.274518983348549e-05, + "loss": 0.2304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24321120977401733, + "step": 2635, + "valid_targets_mean": 2461.9, + "valid_targets_min": 907 + }, + { + "epoch": 3.582089552238806, + "grad_norm": 0.6629205823156629, + "learning_rate": 2.267815168225596e-05, + "loss": 0.2188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2191721647977829, + "step": 2640, + "valid_targets_mean": 2622.9, + "valid_targets_min": 882 + }, + { + "epoch": 3.58887381275441, + "grad_norm": 0.7533189468195941, + "learning_rate": 2.2611082877732954e-05, + "loss": 0.2254, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24864350259304047, + "step": 2645, + "valid_targets_mean": 2555.6, + "valid_targets_min": 684 + }, + { + "epoch": 3.5956580732700134, + "grad_norm": 0.6177690316825468, + "learning_rate": 2.2543984187565227e-05, + "loss": 0.2271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23729315400123596, + "step": 2650, + "valid_targets_mean": 3457.9, + "valid_targets_min": 1279 + }, + { + "epoch": 3.6024423337856173, + "grad_norm": 0.7158165102846912, + "learning_rate": 2.2476856379743567e-05, + "loss": 0.2329, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23067063093185425, + "step": 2655, + "valid_targets_mean": 2960.2, + "valid_targets_min": 722 + }, + { + "epoch": 3.609226594301221, + "grad_norm": 0.7707372511866211, + "learning_rate": 2.2409700222592042e-05, + "loss": 0.2212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22069093585014343, + "step": 2660, + "valid_targets_mean": 2546.6, + "valid_targets_min": 668 + }, + { + "epoch": 3.616010854816825, + "grad_norm": 0.6052180371912904, + "learning_rate": 2.23425164847592e-05, + "loss": 0.2268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21380861103534698, + "step": 2665, + "valid_targets_mean": 3455.1, + "valid_targets_min": 742 + }, + { + "epoch": 3.622795115332429, + "grad_norm": 0.7047656744984836, + "learning_rate": 2.227530593520926e-05, + "loss": 0.2322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24715712666511536, + "step": 2670, + "valid_targets_mean": 3540.6, + "valid_targets_min": 1327 + }, + { + "epoch": 3.6295793758480324, + "grad_norm": 0.6745230951755198, + "learning_rate": 2.2208069343213326e-05, + "loss": 0.2226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23651158809661865, + "step": 2675, + "valid_targets_mean": 3037.1, + "valid_targets_min": 972 + }, + { + "epoch": 3.6363636363636362, + "grad_norm": 0.623368182400207, + "learning_rate": 2.2140807478340582e-05, + "loss": 0.2255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22128023207187653, + "step": 2680, + "valid_targets_mean": 3301.1, + "valid_targets_min": 724 + }, + { + "epoch": 3.64314789687924, + "grad_norm": 0.6649128121001402, + "learning_rate": 2.2073521110449456e-05, + "loss": 0.224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2295609414577484, + "step": 2685, + "valid_targets_mean": 3177.9, + "valid_targets_min": 1144 + }, + { + "epoch": 3.649932157394844, + "grad_norm": 0.6882954051754054, + "learning_rate": 2.200621100967886e-05, + "loss": 0.2192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2442229986190796, + "step": 2690, + "valid_targets_mean": 3074.8, + "valid_targets_min": 856 + }, + { + "epoch": 3.656716417910448, + "grad_norm": 0.6820903463422161, + "learning_rate": 2.193887794643932e-05, + "loss": 0.2324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23110562562942505, + "step": 2695, + "valid_targets_mean": 3172.8, + "valid_targets_min": 1124 + }, + { + "epoch": 3.6635006784260513, + "grad_norm": 0.6997238880911637, + "learning_rate": 2.187152269140419e-05, + "loss": 0.2147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21552921831607819, + "step": 2700, + "valid_targets_mean": 2677.4, + "valid_targets_min": 814 + }, + { + "epoch": 3.670284938941655, + "grad_norm": 0.7131884522201282, + "learning_rate": 2.180414601550084e-05, + "loss": 0.219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2269384115934372, + "step": 2705, + "valid_targets_mean": 2801.8, + "valid_targets_min": 1388 + }, + { + "epoch": 3.677069199457259, + "grad_norm": 0.7172347353831631, + "learning_rate": 2.1736748689901792e-05, + "loss": 0.2251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22936025261878967, + "step": 2710, + "valid_targets_mean": 2702.1, + "valid_targets_min": 1312 + }, + { + "epoch": 3.683853459972863, + "grad_norm": 0.6583901504326424, + "learning_rate": 2.1669331486015942e-05, + "loss": 0.2189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.221341073513031, + "step": 2715, + "valid_targets_mean": 2726.4, + "valid_targets_min": 1008 + }, + { + "epoch": 3.690637720488467, + "grad_norm": 0.6670649918476067, + "learning_rate": 2.1601895175479677e-05, + "loss": 0.2069, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18742845952510834, + "step": 2720, + "valid_targets_mean": 2824.9, + "valid_targets_min": 741 + }, + { + "epoch": 3.6974219810040707, + "grad_norm": 0.7043411361057197, + "learning_rate": 2.1534440530148104e-05, + "loss": 0.218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.224558025598526, + "step": 2725, + "valid_targets_mean": 2730.9, + "valid_targets_min": 901 + }, + { + "epoch": 3.7042062415196746, + "grad_norm": 0.8290546440140429, + "learning_rate": 2.1466968322086168e-05, + "loss": 0.2314, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24871769547462463, + "step": 2730, + "valid_targets_mean": 2595.6, + "valid_targets_min": 629 + }, + { + "epoch": 3.710990502035278, + "grad_norm": 0.6684634109147515, + "learning_rate": 2.1399479323559837e-05, + "loss": 0.2212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22964484989643097, + "step": 2735, + "valid_targets_mean": 3235.6, + "valid_targets_min": 657 + }, + { + "epoch": 3.717774762550882, + "grad_norm": 0.6468283601913088, + "learning_rate": 2.133197430702725e-05, + "loss": 0.2233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22138440608978271, + "step": 2740, + "valid_targets_mean": 2955.9, + "valid_targets_min": 742 + }, + { + "epoch": 3.724559023066486, + "grad_norm": 0.6848302786495842, + "learning_rate": 2.1264454045129885e-05, + "loss": 0.2332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23357048630714417, + "step": 2745, + "valid_targets_mean": 2971.2, + "valid_targets_min": 876 + }, + { + "epoch": 3.7313432835820897, + "grad_norm": 0.8076399629104075, + "learning_rate": 2.1196919310683722e-05, + "loss": 0.2277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24134433269500732, + "step": 2750, + "valid_targets_mean": 2742.0, + "valid_targets_min": 1227 + }, + { + "epoch": 3.7381275440976935, + "grad_norm": 0.7937429467781824, + "learning_rate": 2.112937087667039e-05, + "loss": 0.2276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23753595352172852, + "step": 2755, + "valid_targets_mean": 2422.7, + "valid_targets_min": 1122 + }, + { + "epoch": 3.744911804613297, + "grad_norm": 0.6806559152552468, + "learning_rate": 2.106180951622829e-05, + "loss": 0.2232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.241061270236969, + "step": 2760, + "valid_targets_mean": 2973.4, + "valid_targets_min": 1078 + }, + { + "epoch": 3.751696065128901, + "grad_norm": 0.695950276005158, + "learning_rate": 2.0994236002643822e-05, + "loss": 0.2019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20735937356948853, + "step": 2765, + "valid_targets_mean": 2495.0, + "valid_targets_min": 728 + }, + { + "epoch": 3.7584803256445047, + "grad_norm": 0.8597190728087525, + "learning_rate": 2.0926651109342457e-05, + "loss": 0.2282, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2300506830215454, + "step": 2770, + "valid_targets_mean": 2055.6, + "valid_targets_min": 730 + }, + { + "epoch": 3.7652645861601086, + "grad_norm": 0.7385601024810353, + "learning_rate": 2.0859055609879916e-05, + "loss": 0.2286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23074880242347717, + "step": 2775, + "valid_targets_mean": 2834.2, + "valid_targets_min": 611 + }, + { + "epoch": 3.7720488466757125, + "grad_norm": 0.7000695708428455, + "learning_rate": 2.0791450277933322e-05, + "loss": 0.2262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21346724033355713, + "step": 2780, + "valid_targets_mean": 2711.2, + "valid_targets_min": 763 + }, + { + "epoch": 3.778833107191316, + "grad_norm": 0.6921395024706539, + "learning_rate": 2.0723835887292334e-05, + "loss": 0.232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2142827957868576, + "step": 2785, + "valid_targets_mean": 2578.4, + "valid_targets_min": 1125 + }, + { + "epoch": 3.78561736770692, + "grad_norm": 0.7682439574392755, + "learning_rate": 2.0656213211850295e-05, + "loss": 0.2211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21348156034946442, + "step": 2790, + "valid_targets_mean": 2177.8, + "valid_targets_min": 635 + }, + { + "epoch": 3.7924016282225237, + "grad_norm": 0.7600371041182261, + "learning_rate": 2.058858302559537e-05, + "loss": 0.2212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23701968789100647, + "step": 2795, + "valid_targets_mean": 2633.8, + "valid_targets_min": 868 + }, + { + "epoch": 3.7991858887381276, + "grad_norm": 0.7139158160982614, + "learning_rate": 2.05209461026017e-05, + "loss": 0.2276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.233786478638649, + "step": 2800, + "valid_targets_mean": 2587.3, + "valid_targets_min": 943 + }, + { + "epoch": 3.8059701492537314, + "grad_norm": 0.7176024981852444, + "learning_rate": 2.045330321702053e-05, + "loss": 0.2271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22029541432857513, + "step": 2805, + "valid_targets_mean": 2586.6, + "valid_targets_min": 770 + }, + { + "epoch": 3.812754409769335, + "grad_norm": 0.6149619152466096, + "learning_rate": 2.0385655143071336e-05, + "loss": 0.2159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19177527725696564, + "step": 2810, + "valid_targets_mean": 3115.8, + "valid_targets_min": 634 + }, + { + "epoch": 3.819538670284939, + "grad_norm": 0.6945022382481121, + "learning_rate": 2.031800265503299e-05, + "loss": 0.2217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2345021814107895, + "step": 2815, + "valid_targets_mean": 3089.4, + "valid_targets_min": 484 + }, + { + "epoch": 3.8263229308005426, + "grad_norm": 0.718365382422882, + "learning_rate": 2.02503465272349e-05, + "loss": 0.2186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21639087796211243, + "step": 2820, + "valid_targets_mean": 2522.1, + "valid_targets_min": 675 + }, + { + "epoch": 3.8331071913161465, + "grad_norm": 0.7049711808427084, + "learning_rate": 2.0182687534048107e-05, + "loss": 0.2216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24925465881824493, + "step": 2825, + "valid_targets_mean": 2881.4, + "valid_targets_min": 875 + }, + { + "epoch": 3.8398914518317504, + "grad_norm": 0.7569193242813216, + "learning_rate": 2.011502644987646e-05, + "loss": 0.2344, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2595372796058655, + "step": 2830, + "valid_targets_mean": 2309.4, + "valid_targets_min": 801 + }, + { + "epoch": 3.8466757123473543, + "grad_norm": 0.6067978450699282, + "learning_rate": 2.0047364049147747e-05, + "loss": 0.2319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21039123833179474, + "step": 2835, + "valid_targets_mean": 3391.3, + "valid_targets_min": 805 + }, + { + "epoch": 3.853459972862958, + "grad_norm": 0.6539018194253448, + "learning_rate": 1.9979701106304824e-05, + "loss": 0.2298, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.241075336933136, + "step": 2840, + "valid_targets_mean": 2985.6, + "valid_targets_min": 1362 + }, + { + "epoch": 3.8602442333785616, + "grad_norm": 0.672599881101891, + "learning_rate": 1.991203839579674e-05, + "loss": 0.2311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2360815405845642, + "step": 2845, + "valid_targets_mean": 2885.5, + "valid_targets_min": 663 + }, + { + "epoch": 3.8670284938941655, + "grad_norm": 0.7800657928131475, + "learning_rate": 1.984437669206989e-05, + "loss": 0.2148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20892655849456787, + "step": 2850, + "valid_targets_mean": 2320.9, + "valid_targets_min": 917 + }, + { + "epoch": 3.8738127544097694, + "grad_norm": 0.6676687527814975, + "learning_rate": 1.9776716769559157e-05, + "loss": 0.2368, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22498619556427002, + "step": 2855, + "valid_targets_mean": 3164.1, + "valid_targets_min": 1241 + }, + { + "epoch": 3.8805970149253732, + "grad_norm": 0.5995122983093996, + "learning_rate": 1.970905940267902e-05, + "loss": 0.2152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21802213788032532, + "step": 2860, + "valid_targets_mean": 3898.9, + "valid_targets_min": 914 + }, + { + "epoch": 3.887381275440977, + "grad_norm": 0.647688327782899, + "learning_rate": 1.9641405365814717e-05, + "loss": 0.2242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21417051553726196, + "step": 2865, + "valid_targets_mean": 3167.7, + "valid_targets_min": 758 + }, + { + "epoch": 3.8941655359565805, + "grad_norm": 0.6546310967035971, + "learning_rate": 1.9573755433313378e-05, + "loss": 0.2223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2323242872953415, + "step": 2870, + "valid_targets_mean": 3041.1, + "valid_targets_min": 1319 + }, + { + "epoch": 3.9009497964721844, + "grad_norm": 0.7383143695334415, + "learning_rate": 1.9506110379475128e-05, + "loss": 0.2258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2311961054801941, + "step": 2875, + "valid_targets_mean": 2362.9, + "valid_targets_min": 750 + }, + { + "epoch": 3.9077340569877883, + "grad_norm": 0.8430450237904688, + "learning_rate": 1.9438470978544287e-05, + "loss": 0.2198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21592223644256592, + "step": 2880, + "valid_targets_mean": 3625.7, + "valid_targets_min": 1167 + }, + { + "epoch": 3.914518317503392, + "grad_norm": 0.7707831179099163, + "learning_rate": 1.9370838004700455e-05, + "loss": 0.2287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24675382673740387, + "step": 2885, + "valid_targets_mean": 2393.0, + "valid_targets_min": 1011 + }, + { + "epoch": 3.921302578018996, + "grad_norm": 0.7280249588483818, + "learning_rate": 1.930321223204967e-05, + "loss": 0.2228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24415768682956696, + "step": 2890, + "valid_targets_mean": 2570.4, + "valid_targets_min": 1242 + }, + { + "epoch": 3.9280868385345995, + "grad_norm": 0.7871720247148984, + "learning_rate": 1.9235594434615552e-05, + "loss": 0.2298, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21705761551856995, + "step": 2895, + "valid_targets_mean": 2329.9, + "valid_targets_min": 934 + }, + { + "epoch": 3.9348710990502034, + "grad_norm": 0.6824607839533505, + "learning_rate": 1.9167985386330435e-05, + "loss": 0.2179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2021501511335373, + "step": 2900, + "valid_targets_mean": 3006.3, + "valid_targets_min": 1460 + }, + { + "epoch": 3.9416553595658073, + "grad_norm": 0.580360164037747, + "learning_rate": 1.910038586102652e-05, + "loss": 0.2063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18379102647304535, + "step": 2905, + "valid_targets_mean": 2916.4, + "valid_targets_min": 927 + }, + { + "epoch": 3.948439620081411, + "grad_norm": 0.7244276692797579, + "learning_rate": 1.9032796632427e-05, + "loss": 0.2194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22763890027999878, + "step": 2910, + "valid_targets_mean": 2483.5, + "valid_targets_min": 831 + }, + { + "epoch": 3.955223880597015, + "grad_norm": 0.7606015895849964, + "learning_rate": 1.896521847413722e-05, + "loss": 0.2238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2129872590303421, + "step": 2915, + "valid_targets_mean": 2604.8, + "valid_targets_min": 1424 + }, + { + "epoch": 3.9620081411126185, + "grad_norm": 0.7996005811272329, + "learning_rate": 1.8897652159635826e-05, + "loss": 0.2351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2276165634393692, + "step": 2920, + "valid_targets_mean": 2545.5, + "valid_targets_min": 1333 + }, + { + "epoch": 3.9687924016282228, + "grad_norm": 0.6306491453103433, + "learning_rate": 1.8830098462265892e-05, + "loss": 0.2182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2229669839143753, + "step": 2925, + "valid_targets_mean": 3271.3, + "valid_targets_min": 1296 + }, + { + "epoch": 3.975576662143826, + "grad_norm": 0.7155034374723849, + "learning_rate": 1.8762558155226086e-05, + "loss": 0.2299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23799753189086914, + "step": 2930, + "valid_targets_mean": 2748.9, + "valid_targets_min": 632 + }, + { + "epoch": 3.98236092265943, + "grad_norm": 0.8976533438979097, + "learning_rate": 1.869503201156181e-05, + "loss": 0.212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20464091002941132, + "step": 2935, + "valid_targets_mean": 1909.6, + "valid_targets_min": 883 + }, + { + "epoch": 3.989145183175034, + "grad_norm": 0.6624349548194711, + "learning_rate": 1.8627520804156365e-05, + "loss": 0.2258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22667071223258972, + "step": 2940, + "valid_targets_mean": 3271.6, + "valid_targets_min": 623 + }, + { + "epoch": 3.995929443690638, + "grad_norm": 0.6151495628097419, + "learning_rate": 1.856002530572209e-05, + "loss": 0.2189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21382832527160645, + "step": 2945, + "valid_targets_mean": 3332.6, + "valid_targets_min": 1190 + }, + { + "epoch": 4.002713704206242, + "grad_norm": 0.615787649304689, + "learning_rate": 1.8492546288791518e-05, + "loss": 0.2096, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19524888694286346, + "step": 2950, + "valid_targets_mean": 3521.4, + "valid_targets_min": 1057 + }, + { + "epoch": 4.009497964721845, + "grad_norm": 0.7774165923273912, + "learning_rate": 1.842508452570855e-05, + "loss": 0.2003, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20001152157783508, + "step": 2955, + "valid_targets_mean": 2349.4, + "valid_targets_min": 662 + }, + { + "epoch": 4.0162822252374495, + "grad_norm": 0.664596861061304, + "learning_rate": 1.8357640788619605e-05, + "loss": 0.1992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21963298320770264, + "step": 2960, + "valid_targets_mean": 3053.3, + "valid_targets_min": 789 + }, + { + "epoch": 4.023066485753053, + "grad_norm": 0.7193062756351686, + "learning_rate": 1.8290215849464773e-05, + "loss": 0.1936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2050497829914093, + "step": 2965, + "valid_targets_mean": 2874.3, + "valid_targets_min": 1465 + }, + { + "epoch": 4.029850746268656, + "grad_norm": 0.7682247189848644, + "learning_rate": 1.8222810479969e-05, + "loss": 0.1952, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20245233178138733, + "step": 2970, + "valid_targets_mean": 2529.4, + "valid_targets_min": 706 + }, + { + "epoch": 4.036635006784261, + "grad_norm": 0.7099650203393719, + "learning_rate": 1.815542545163323e-05, + "loss": 0.1908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18782241642475128, + "step": 2975, + "valid_targets_mean": 2867.7, + "valid_targets_min": 856 + }, + { + "epoch": 4.043419267299864, + "grad_norm": 0.6679826007373775, + "learning_rate": 1.808806153572561e-05, + "loss": 0.1932, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18531766533851624, + "step": 2980, + "valid_targets_mean": 3430.7, + "valid_targets_min": 733 + }, + { + "epoch": 4.050203527815468, + "grad_norm": 0.6971278596727647, + "learning_rate": 1.802071950327261e-05, + "loss": 0.189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1951214224100113, + "step": 2985, + "valid_targets_mean": 2909.7, + "valid_targets_min": 728 + }, + { + "epoch": 4.056987788331072, + "grad_norm": 0.6975225843392714, + "learning_rate": 1.7953400125050252e-05, + "loss": 0.1945, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19596266746520996, + "step": 2990, + "valid_targets_mean": 3136.7, + "valid_targets_min": 591 + }, + { + "epoch": 4.063772048846675, + "grad_norm": 0.616610532499199, + "learning_rate": 1.7886104171575264e-05, + "loss": 0.1916, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17603719234466553, + "step": 2995, + "valid_targets_mean": 3788.1, + "valid_targets_min": 700 + }, + { + "epoch": 4.07055630936228, + "grad_norm": 0.8204028517819303, + "learning_rate": 1.7818832413096248e-05, + "loss": 0.1846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1822405755519867, + "step": 3000, + "valid_targets_mean": 2084.1, + "valid_targets_min": 950 + }, + { + "epoch": 4.077340569877883, + "grad_norm": 0.8644454831039673, + "learning_rate": 1.775158561958489e-05, + "loss": 0.208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21637216210365295, + "step": 3005, + "valid_targets_mean": 2142.1, + "valid_targets_min": 1117 + }, + { + "epoch": 4.084124830393487, + "grad_norm": 0.6868713845692973, + "learning_rate": 1.768436456072713e-05, + "loss": 0.1927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17520518600940704, + "step": 3010, + "valid_targets_mean": 3152.6, + "valid_targets_min": 1144 + }, + { + "epoch": 4.090909090909091, + "grad_norm": 0.646335349816201, + "learning_rate": 1.7617170005914363e-05, + "loss": 0.194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18921396136283875, + "step": 3015, + "valid_targets_mean": 3166.6, + "valid_targets_min": 1255 + }, + { + "epoch": 4.097693351424695, + "grad_norm": 0.7095934220573488, + "learning_rate": 1.755000272423461e-05, + "loss": 0.191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18535733222961426, + "step": 3020, + "valid_targets_mean": 2847.2, + "valid_targets_min": 1211 + }, + { + "epoch": 4.104477611940299, + "grad_norm": 0.7722733457306606, + "learning_rate": 1.7482863484463747e-05, + "loss": 0.1943, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1878623217344284, + "step": 3025, + "valid_targets_mean": 2357.7, + "valid_targets_min": 796 + }, + { + "epoch": 4.111261872455902, + "grad_norm": 0.7558210158635994, + "learning_rate": 1.7415753055056693e-05, + "loss": 0.2009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20962506532669067, + "step": 3030, + "valid_targets_mean": 2774.4, + "valid_targets_min": 689 + }, + { + "epoch": 4.118046132971506, + "grad_norm": 0.7019234157644559, + "learning_rate": 1.73486722041386e-05, + "loss": 0.1964, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17214052379131317, + "step": 3035, + "valid_targets_mean": 2344.8, + "valid_targets_min": 720 + }, + { + "epoch": 4.12483039348711, + "grad_norm": 0.7056358240413717, + "learning_rate": 1.7281621699496086e-05, + "loss": 0.1809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18809084594249725, + "step": 3040, + "valid_targets_mean": 2753.2, + "valid_targets_min": 985 + }, + { + "epoch": 4.131614654002714, + "grad_norm": 0.7928593105079518, + "learning_rate": 1.7214602308568426e-05, + "loss": 0.1858, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16787604987621307, + "step": 3045, + "valid_targets_mean": 2298.5, + "valid_targets_min": 883 + }, + { + "epoch": 4.1383989145183175, + "grad_norm": 0.7453927190983547, + "learning_rate": 1.714761479843879e-05, + "loss": 0.2071, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2303515374660492, + "step": 3050, + "valid_targets_mean": 3051.8, + "valid_targets_min": 1135 + }, + { + "epoch": 4.145183175033921, + "grad_norm": 0.7004503623184913, + "learning_rate": 1.708065993582543e-05, + "loss": 0.1842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1861414909362793, + "step": 3055, + "valid_targets_mean": 2926.8, + "valid_targets_min": 704 + }, + { + "epoch": 4.151967435549525, + "grad_norm": 0.5969912948853561, + "learning_rate": 1.701373848707294e-05, + "loss": 0.1826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16732157766819, + "step": 3060, + "valid_targets_mean": 3249.8, + "valid_targets_min": 759 + }, + { + "epoch": 4.158751696065129, + "grad_norm": 0.7889354028527934, + "learning_rate": 1.6946851218143464e-05, + "loss": 0.1977, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2116738259792328, + "step": 3065, + "valid_targets_mean": 2610.2, + "valid_targets_min": 1017 + }, + { + "epoch": 4.165535956580733, + "grad_norm": 0.6969382167538735, + "learning_rate": 1.6879998894607937e-05, + "loss": 0.2062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.184125155210495, + "step": 3070, + "valid_targets_mean": 2778.3, + "valid_targets_min": 761 + }, + { + "epoch": 4.1723202170963365, + "grad_norm": 0.7661282970421094, + "learning_rate": 1.6813182281637318e-05, + "loss": 0.1878, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18725869059562683, + "step": 3075, + "valid_targets_mean": 2373.2, + "valid_targets_min": 785 + }, + { + "epoch": 4.17910447761194, + "grad_norm": 0.7127514162035592, + "learning_rate": 1.674640214399383e-05, + "loss": 0.1981, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20451834797859192, + "step": 3080, + "valid_targets_mean": 3058.9, + "valid_targets_min": 804 + }, + { + "epoch": 4.185888738127544, + "grad_norm": 0.7261600863010719, + "learning_rate": 1.6679659246022224e-05, + "loss": 0.207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19646155834197998, + "step": 3085, + "valid_targets_mean": 2840.0, + "valid_targets_min": 742 + }, + { + "epoch": 4.192672998643148, + "grad_norm": 0.7318920710785716, + "learning_rate": 1.6612954351641004e-05, + "loss": 0.1871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1673353910446167, + "step": 3090, + "valid_targets_mean": 2456.0, + "valid_targets_min": 774 + }, + { + "epoch": 4.199457259158752, + "grad_norm": 0.7618896273234488, + "learning_rate": 1.65462882243337e-05, + "loss": 0.2045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2328355312347412, + "step": 3095, + "valid_targets_mean": 2732.4, + "valid_targets_min": 662 + }, + { + "epoch": 4.2062415196743554, + "grad_norm": 0.6950613889947367, + "learning_rate": 1.6479661627140125e-05, + "loss": 0.199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21082782745361328, + "step": 3100, + "valid_targets_mean": 3076.4, + "valid_targets_min": 993 + }, + { + "epoch": 4.213025780189959, + "grad_norm": 0.7422212651396944, + "learning_rate": 1.6413075322647645e-05, + "loss": 0.202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17804861068725586, + "step": 3105, + "valid_targets_mean": 2741.3, + "valid_targets_min": 1230 + }, + { + "epoch": 4.219810040705563, + "grad_norm": 0.8313162708546203, + "learning_rate": 1.634653007298245e-05, + "loss": 0.2025, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21381892263889313, + "step": 3110, + "valid_targets_mean": 2521.1, + "valid_targets_min": 918 + }, + { + "epoch": 4.226594301221167, + "grad_norm": 0.7343807869521366, + "learning_rate": 1.628002663980083e-05, + "loss": 0.1958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18454739451408386, + "step": 3115, + "valid_targets_mean": 3389.7, + "valid_targets_min": 944 + }, + { + "epoch": 4.233378561736771, + "grad_norm": 0.7180737684834317, + "learning_rate": 1.6213565784280448e-05, + "loss": 0.1972, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20628373324871063, + "step": 3120, + "valid_targets_mean": 2695.5, + "valid_targets_min": 1330 + }, + { + "epoch": 4.240162822252374, + "grad_norm": 0.7446535802715553, + "learning_rate": 1.6147148267111648e-05, + "loss": 0.205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20070384442806244, + "step": 3125, + "valid_targets_mean": 2581.9, + "valid_targets_min": 1108 + }, + { + "epoch": 4.246947082767978, + "grad_norm": 0.7867961809991622, + "learning_rate": 1.608077484848872e-05, + "loss": 0.2001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21188589930534363, + "step": 3130, + "valid_targets_mean": 2598.0, + "valid_targets_min": 754 + }, + { + "epoch": 4.253731343283582, + "grad_norm": 0.7452760690333677, + "learning_rate": 1.601444628810124e-05, + "loss": 0.1859, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19212831556797028, + "step": 3135, + "valid_targets_mean": 2548.9, + "valid_targets_min": 801 + }, + { + "epoch": 4.260515603799186, + "grad_norm": 0.640609370408385, + "learning_rate": 1.5948163345125322e-05, + "loss": 0.1927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1867392212152481, + "step": 3140, + "valid_targets_mean": 2978.2, + "valid_targets_min": 893 + }, + { + "epoch": 4.26729986431479, + "grad_norm": 0.7904463927412828, + "learning_rate": 1.5881926778214985e-05, + "loss": 0.1936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18848653137683868, + "step": 3145, + "valid_targets_mean": 2162.2, + "valid_targets_min": 657 + }, + { + "epoch": 4.274084124830393, + "grad_norm": 0.6862590326254344, + "learning_rate": 1.581573734549342e-05, + "loss": 0.2075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19391348958015442, + "step": 3150, + "valid_targets_mean": 2840.2, + "valid_targets_min": 796 + }, + { + "epoch": 4.280868385345998, + "grad_norm": 0.753930226852254, + "learning_rate": 1.574959580454435e-05, + "loss": 0.2073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2218829095363617, + "step": 3155, + "valid_targets_mean": 2770.5, + "valid_targets_min": 791 + }, + { + "epoch": 4.287652645861601, + "grad_norm": 0.7173848707428967, + "learning_rate": 1.5683502912403334e-05, + "loss": 0.19, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18789169192314148, + "step": 3160, + "valid_targets_mean": 2473.1, + "valid_targets_min": 1324 + }, + { + "epoch": 4.2944369063772045, + "grad_norm": 0.6756808344570617, + "learning_rate": 1.5617459425549118e-05, + "loss": 0.1889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17631642520427704, + "step": 3165, + "valid_targets_mean": 2783.0, + "valid_targets_min": 898 + }, + { + "epoch": 4.301221166892809, + "grad_norm": 0.7596274040196479, + "learning_rate": 1.555146609989496e-05, + "loss": 0.1942, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19371306896209717, + "step": 3170, + "valid_targets_mean": 2678.5, + "valid_targets_min": 999 + }, + { + "epoch": 4.308005427408412, + "grad_norm": 0.7950503499609197, + "learning_rate": 1.5485523690780003e-05, + "loss": 0.2032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20015594363212585, + "step": 3175, + "valid_targets_mean": 2270.9, + "valid_targets_min": 1330 + }, + { + "epoch": 4.314789687924017, + "grad_norm": 0.6284596925256105, + "learning_rate": 1.5419632952960605e-05, + "loss": 0.1853, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16846656799316406, + "step": 3180, + "valid_targets_mean": 3001.5, + "valid_targets_min": 584 + }, + { + "epoch": 4.32157394843962, + "grad_norm": 0.6201630615003697, + "learning_rate": 1.5353794640601716e-05, + "loss": 0.1971, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18524964153766632, + "step": 3185, + "valid_targets_mean": 3645.7, + "valid_targets_min": 1501 + }, + { + "epoch": 4.3283582089552235, + "grad_norm": 0.711809826493421, + "learning_rate": 1.5288009507268237e-05, + "loss": 0.1998, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21540531516075134, + "step": 3190, + "valid_targets_mean": 3402.2, + "valid_targets_min": 1522 + }, + { + "epoch": 4.335142469470828, + "grad_norm": 0.6972934803265656, + "learning_rate": 1.5222278305916398e-05, + "loss": 0.1922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18640825152397156, + "step": 3195, + "valid_targets_mean": 3293.5, + "valid_targets_min": 1126 + }, + { + "epoch": 4.341926729986431, + "grad_norm": 0.6481026199784268, + "learning_rate": 1.5156601788885148e-05, + "loss": 0.1877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1833941638469696, + "step": 3200, + "valid_targets_mean": 3167.9, + "valid_targets_min": 1106 + }, + { + "epoch": 4.348710990502036, + "grad_norm": 0.8208339691617204, + "learning_rate": 1.5090980707887516e-05, + "loss": 0.2051, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2145204246044159, + "step": 3205, + "valid_targets_mean": 2193.7, + "valid_targets_min": 739 + }, + { + "epoch": 4.355495251017639, + "grad_norm": 0.6739050449873922, + "learning_rate": 1.5025415814002053e-05, + "loss": 0.1943, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19558805227279663, + "step": 3210, + "valid_targets_mean": 3247.9, + "valid_targets_min": 930 + }, + { + "epoch": 4.362279511533243, + "grad_norm": 0.7330699785639507, + "learning_rate": 1.4959907857664193e-05, + "loss": 0.2004, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20514100790023804, + "step": 3215, + "valid_targets_mean": 2830.6, + "valid_targets_min": 947 + }, + { + "epoch": 4.369063772048847, + "grad_norm": 0.7397083988883265, + "learning_rate": 1.4894457588657685e-05, + "loss": 0.1957, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20222502946853638, + "step": 3220, + "valid_targets_mean": 2764.8, + "valid_targets_min": 815 + }, + { + "epoch": 4.37584803256445, + "grad_norm": 0.8585950766535199, + "learning_rate": 1.4829065756106011e-05, + "loss": 0.1869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2083539068698883, + "step": 3225, + "valid_targets_mean": 3063.8, + "valid_targets_min": 704 + }, + { + "epoch": 4.3826322930800545, + "grad_norm": 0.6992154976029811, + "learning_rate": 1.47637331084638e-05, + "loss": 0.1968, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18280436098575592, + "step": 3230, + "valid_targets_mean": 2991.1, + "valid_targets_min": 922 + }, + { + "epoch": 4.389416553595658, + "grad_norm": 0.7220348383997678, + "learning_rate": 1.4698460393508279e-05, + "loss": 0.1861, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17603710293769836, + "step": 3235, + "valid_targets_mean": 2676.9, + "valid_targets_min": 683 + }, + { + "epoch": 4.396200814111262, + "grad_norm": 0.6815437556740436, + "learning_rate": 1.4633248358330685e-05, + "loss": 0.1892, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17538878321647644, + "step": 3240, + "valid_targets_mean": 2802.6, + "valid_targets_min": 923 + }, + { + "epoch": 4.402985074626866, + "grad_norm": 0.7423631904328546, + "learning_rate": 1.4568097749327755e-05, + "loss": 0.1975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2101781815290451, + "step": 3245, + "valid_targets_mean": 2564.9, + "valid_targets_min": 933 + }, + { + "epoch": 4.409769335142469, + "grad_norm": 0.6279743544457285, + "learning_rate": 1.4503009312193145e-05, + "loss": 0.1992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18776188790798187, + "step": 3250, + "valid_targets_mean": 3485.3, + "valid_targets_min": 1216 + }, + { + "epoch": 4.4165535956580735, + "grad_norm": 0.72461497188728, + "learning_rate": 1.4437983791908927e-05, + "loss": 0.1932, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19717328250408173, + "step": 3255, + "valid_targets_mean": 2907.9, + "valid_targets_min": 875 + }, + { + "epoch": 4.423337856173677, + "grad_norm": 0.8394035988938123, + "learning_rate": 1.4373021932737029e-05, + "loss": 0.198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2060231864452362, + "step": 3260, + "valid_targets_mean": 2521.2, + "valid_targets_min": 861 + }, + { + "epoch": 4.430122116689281, + "grad_norm": 0.7846496377698511, + "learning_rate": 1.4308124478210743e-05, + "loss": 0.1909, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18877476453781128, + "step": 3265, + "valid_targets_mean": 2120.6, + "valid_targets_min": 908 + }, + { + "epoch": 4.436906377204885, + "grad_norm": 0.7718040186575822, + "learning_rate": 1.4243292171126206e-05, + "loss": 0.189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1813274621963501, + "step": 3270, + "valid_targets_mean": 2067.4, + "valid_targets_min": 744 + }, + { + "epoch": 4.443690637720488, + "grad_norm": 0.8763562192740045, + "learning_rate": 1.4178525753533898e-05, + "loss": 0.1911, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1990664005279541, + "step": 3275, + "valid_targets_mean": 1895.3, + "valid_targets_min": 675 + }, + { + "epoch": 4.450474898236092, + "grad_norm": 0.7818367524680697, + "learning_rate": 1.4113825966730141e-05, + "loss": 0.1987, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2248629331588745, + "step": 3280, + "valid_targets_mean": 2445.3, + "valid_targets_min": 686 + }, + { + "epoch": 4.457259158751696, + "grad_norm": 0.7080706103494964, + "learning_rate": 1.4049193551248625e-05, + "loss": 0.1931, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19344773888587952, + "step": 3285, + "valid_targets_mean": 2806.9, + "valid_targets_min": 1092 + }, + { + "epoch": 4.4640434192673, + "grad_norm": 0.6352432822796971, + "learning_rate": 1.3984629246851938e-05, + "loss": 0.1937, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18108458817005157, + "step": 3290, + "valid_targets_mean": 3230.6, + "valid_targets_min": 703 + }, + { + "epoch": 4.470827679782904, + "grad_norm": 0.7766733822813426, + "learning_rate": 1.3920133792523075e-05, + "loss": 0.1977, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20187707245349884, + "step": 3295, + "valid_targets_mean": 2388.1, + "valid_targets_min": 898 + }, + { + "epoch": 4.477611940298507, + "grad_norm": 0.6809158943751472, + "learning_rate": 1.3855707926457003e-05, + "loss": 0.1904, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1838938295841217, + "step": 3300, + "valid_targets_mean": 2920.3, + "valid_targets_min": 837 + }, + { + "epoch": 4.484396200814111, + "grad_norm": 0.7081716006567864, + "learning_rate": 1.3791352386052201e-05, + "loss": 0.1895, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18974432349205017, + "step": 3305, + "valid_targets_mean": 2833.9, + "valid_targets_min": 818 + }, + { + "epoch": 4.491180461329715, + "grad_norm": 0.767864152806456, + "learning_rate": 1.3727067907902232e-05, + "loss": 0.1928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18521273136138916, + "step": 3310, + "valid_targets_mean": 2255.4, + "valid_targets_min": 420 + }, + { + "epoch": 4.497964721845319, + "grad_norm": 0.688596857727443, + "learning_rate": 1.366285522778728e-05, + "loss": 0.1982, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1874987781047821, + "step": 3315, + "valid_targets_mean": 2975.4, + "valid_targets_min": 1163 + }, + { + "epoch": 4.504748982360923, + "grad_norm": 0.7386731368747045, + "learning_rate": 1.359871508066577e-05, + "loss": 0.2076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20306339859962463, + "step": 3320, + "valid_targets_mean": 2857.8, + "valid_targets_min": 757 + }, + { + "epoch": 4.511533242876526, + "grad_norm": 0.8257894045478894, + "learning_rate": 1.3534648200665933e-05, + "loss": 0.2001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18575474619865417, + "step": 3325, + "valid_targets_mean": 2783.4, + "valid_targets_min": 980 + }, + { + "epoch": 4.51831750339213, + "grad_norm": 0.6867092598234874, + "learning_rate": 1.3470655321077403e-05, + "loss": 0.1932, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18126091361045837, + "step": 3330, + "valid_targets_mean": 2647.6, + "valid_targets_min": 848 + }, + { + "epoch": 4.525101763907734, + "grad_norm": 0.7798112286441644, + "learning_rate": 1.3406737174342834e-05, + "loss": 0.2022, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23899130523204803, + "step": 3335, + "valid_targets_mean": 2828.8, + "valid_targets_min": 738 + }, + { + "epoch": 4.531886024423338, + "grad_norm": 0.7069921488313058, + "learning_rate": 1.3342894492049504e-05, + "loss": 0.2048, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20431819558143616, + "step": 3340, + "valid_targets_mean": 3127.2, + "valid_targets_min": 1317 + }, + { + "epoch": 4.5386702849389415, + "grad_norm": 0.7506355898759934, + "learning_rate": 1.3279128004920958e-05, + "loss": 0.1914, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1895807385444641, + "step": 3345, + "valid_targets_mean": 2666.6, + "valid_targets_min": 1005 + }, + { + "epoch": 4.545454545454545, + "grad_norm": 0.6725468343286256, + "learning_rate": 1.3215438442808624e-05, + "loss": 0.1984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2019132673740387, + "step": 3350, + "valid_targets_mean": 3209.2, + "valid_targets_min": 1059 + }, + { + "epoch": 4.552238805970149, + "grad_norm": 0.6889050275687484, + "learning_rate": 1.3151826534683474e-05, + "loss": 0.1971, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1907675564289093, + "step": 3355, + "valid_targets_mean": 3152.4, + "valid_targets_min": 785 + }, + { + "epoch": 4.559023066485753, + "grad_norm": 0.7190846116998305, + "learning_rate": 1.308829300862768e-05, + "loss": 0.1878, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18840348720550537, + "step": 3360, + "valid_targets_mean": 2810.2, + "valid_targets_min": 803 + }, + { + "epoch": 4.565807327001357, + "grad_norm": 0.6702795320500231, + "learning_rate": 1.3024838591826274e-05, + "loss": 0.1987, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18903514742851257, + "step": 3365, + "valid_targets_mean": 3127.1, + "valid_targets_min": 1283 + }, + { + "epoch": 4.5725915875169605, + "grad_norm": 0.7488732753982411, + "learning_rate": 1.296146401055883e-05, + "loss": 0.2061, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22193178534507751, + "step": 3370, + "valid_targets_mean": 3123.5, + "valid_targets_min": 700 + }, + { + "epoch": 4.579375848032565, + "grad_norm": 0.7263613305452004, + "learning_rate": 1.2898169990191148e-05, + "loss": 0.1999, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1917208731174469, + "step": 3375, + "valid_targets_mean": 2815.2, + "valid_targets_min": 734 + }, + { + "epoch": 4.586160108548168, + "grad_norm": 0.7868953900274589, + "learning_rate": 1.2834957255166948e-05, + "loss": 0.1892, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17863771319389343, + "step": 3380, + "valid_targets_mean": 2261.1, + "valid_targets_min": 537 + }, + { + "epoch": 4.592944369063772, + "grad_norm": 0.7888410061782655, + "learning_rate": 1.2771826528999602e-05, + "loss": 0.1982, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20688292384147644, + "step": 3385, + "valid_targets_mean": 2547.2, + "valid_targets_min": 1028 + }, + { + "epoch": 4.599728629579376, + "grad_norm": 0.690580382285912, + "learning_rate": 1.2708778534263803e-05, + "loss": 0.1928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17476820945739746, + "step": 3390, + "valid_targets_mean": 2701.8, + "valid_targets_min": 832 + }, + { + "epoch": 4.606512890094979, + "grad_norm": 0.7470662126944148, + "learning_rate": 1.2645813992587352e-05, + "loss": 0.1908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.187617227435112, + "step": 3395, + "valid_targets_mean": 2586.8, + "valid_targets_min": 682 + }, + { + "epoch": 4.613297150610584, + "grad_norm": 1.2159270387590686, + "learning_rate": 1.258293362464286e-05, + "loss": 0.1988, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1858140230178833, + "step": 3400, + "valid_targets_mean": 2249.7, + "valid_targets_min": 1083 + }, + { + "epoch": 4.620081411126187, + "grad_norm": 0.7718829974398066, + "learning_rate": 1.2520138150139515e-05, + "loss": 0.1967, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.183822900056839, + "step": 3405, + "valid_targets_mean": 2386.8, + "valid_targets_min": 821 + }, + { + "epoch": 4.6268656716417915, + "grad_norm": 0.7237060148317359, + "learning_rate": 1.2457428287814843e-05, + "loss": 0.2056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2160126268863678, + "step": 3410, + "valid_targets_mean": 3024.9, + "valid_targets_min": 1350 + }, + { + "epoch": 4.633649932157395, + "grad_norm": 0.6970676682341086, + "learning_rate": 1.2394804755426478e-05, + "loss": 0.1911, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19945994019508362, + "step": 3415, + "valid_targets_mean": 2918.2, + "valid_targets_min": 474 + }, + { + "epoch": 4.640434192672998, + "grad_norm": 0.7002590495581039, + "learning_rate": 1.233226826974395e-05, + "loss": 0.1959, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1719842553138733, + "step": 3420, + "valid_targets_mean": 2718.7, + "valid_targets_min": 691 + }, + { + "epoch": 4.647218453188603, + "grad_norm": 0.6667044269630936, + "learning_rate": 1.2269819546540463e-05, + "loss": 0.1948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17064428329467773, + "step": 3425, + "valid_targets_mean": 2865.0, + "valid_targets_min": 760 + }, + { + "epoch": 4.654002713704206, + "grad_norm": 0.6723526548267423, + "learning_rate": 1.2207459300584743e-05, + "loss": 0.2023, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17956231534481049, + "step": 3430, + "valid_targets_mean": 2932.9, + "valid_targets_min": 777 + }, + { + "epoch": 4.6607869742198105, + "grad_norm": 0.7597739606585969, + "learning_rate": 1.2145188245632825e-05, + "loss": 0.1928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2012956440448761, + "step": 3435, + "valid_targets_mean": 2589.2, + "valid_targets_min": 847 + }, + { + "epoch": 4.667571234735414, + "grad_norm": 0.7411616368636825, + "learning_rate": 1.2083007094419883e-05, + "loss": 0.1928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20920173823833466, + "step": 3440, + "valid_targets_mean": 2706.8, + "valid_targets_min": 802 + }, + { + "epoch": 4.674355495251017, + "grad_norm": 0.7282082144235389, + "learning_rate": 1.2020916558652089e-05, + "loss": 0.1921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19379201531410217, + "step": 3445, + "valid_targets_mean": 2854.1, + "valid_targets_min": 1386 + }, + { + "epoch": 4.681139755766622, + "grad_norm": 0.7634331226060735, + "learning_rate": 1.195891734899846e-05, + "loss": 0.1915, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17866259813308716, + "step": 3450, + "valid_targets_mean": 2573.5, + "valid_targets_min": 655 + }, + { + "epoch": 4.687924016282225, + "grad_norm": 0.7399644965932646, + "learning_rate": 1.1897010175082722e-05, + "loss": 0.1957, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17755639553070068, + "step": 3455, + "valid_targets_mean": 2317.8, + "valid_targets_min": 727 + }, + { + "epoch": 4.694708276797829, + "grad_norm": 0.6760992330649304, + "learning_rate": 1.1835195745475167e-05, + "loss": 0.2131, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2119104415178299, + "step": 3460, + "valid_targets_mean": 3410.1, + "valid_targets_min": 1576 + }, + { + "epoch": 4.701492537313433, + "grad_norm": 0.804513682015714, + "learning_rate": 1.17734747676846e-05, + "loss": 0.1916, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19752076268196106, + "step": 3465, + "valid_targets_mean": 2760.2, + "valid_targets_min": 1040 + }, + { + "epoch": 4.708276797829036, + "grad_norm": 0.707949135936285, + "learning_rate": 1.1711847948150186e-05, + "loss": 0.1997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20585331320762634, + "step": 3470, + "valid_targets_mean": 3060.6, + "valid_targets_min": 822 + }, + { + "epoch": 4.715061058344641, + "grad_norm": 0.7166396129531338, + "learning_rate": 1.1650315992233385e-05, + "loss": 0.2016, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20005938410758972, + "step": 3475, + "valid_targets_mean": 2918.9, + "valid_targets_min": 1040 + }, + { + "epoch": 4.721845318860244, + "grad_norm": 0.7074627055555082, + "learning_rate": 1.1588879604209881e-05, + "loss": 0.201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19769035279750824, + "step": 3480, + "valid_targets_mean": 2911.6, + "valid_targets_min": 897 + }, + { + "epoch": 4.728629579375848, + "grad_norm": 0.6735143020870807, + "learning_rate": 1.1527539487261506e-05, + "loss": 0.1982, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22932162880897522, + "step": 3485, + "valid_targets_mean": 3123.0, + "valid_targets_min": 992 + }, + { + "epoch": 4.735413839891452, + "grad_norm": 0.869273594031702, + "learning_rate": 1.1466296343468226e-05, + "loss": 0.2019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19944173097610474, + "step": 3490, + "valid_targets_mean": 3747.2, + "valid_targets_min": 1122 + }, + { + "epoch": 4.742198100407055, + "grad_norm": 0.6999556266338184, + "learning_rate": 1.1405150873800061e-05, + "loss": 0.2049, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19584204256534576, + "step": 3495, + "valid_targets_mean": 2844.7, + "valid_targets_min": 659 + }, + { + "epoch": 4.74898236092266, + "grad_norm": 0.8746934337613864, + "learning_rate": 1.1344103778109087e-05, + "loss": 0.2027, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20564094185829163, + "step": 3500, + "valid_targets_mean": 2151.8, + "valid_targets_min": 854 + }, + { + "epoch": 4.755766621438263, + "grad_norm": 0.7003484221778359, + "learning_rate": 1.1283155755121435e-05, + "loss": 0.1916, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18846553564071655, + "step": 3505, + "valid_targets_mean": 3061.2, + "valid_targets_min": 734 + }, + { + "epoch": 4.762550881953867, + "grad_norm": 0.6735835369736797, + "learning_rate": 1.1222307502429263e-05, + "loss": 0.1944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19506067037582397, + "step": 3510, + "valid_targets_mean": 3378.4, + "valid_targets_min": 1042 + }, + { + "epoch": 4.769335142469471, + "grad_norm": 0.6771476818137822, + "learning_rate": 1.116155971648281e-05, + "loss": 0.1976, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2082287073135376, + "step": 3515, + "valid_targets_mean": 3448.1, + "valid_targets_min": 1111 + }, + { + "epoch": 4.776119402985074, + "grad_norm": 0.6422854879412622, + "learning_rate": 1.1100913092582374e-05, + "loss": 0.1898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17519763112068176, + "step": 3520, + "valid_targets_mean": 3188.4, + "valid_targets_min": 627 + }, + { + "epoch": 4.7829036635006785, + "grad_norm": 0.6470472236301349, + "learning_rate": 1.1040368324870423e-05, + "loss": 0.1965, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18372002243995667, + "step": 3525, + "valid_targets_mean": 3492.9, + "valid_targets_min": 1281 + }, + { + "epoch": 4.789687924016282, + "grad_norm": 0.6869308415003494, + "learning_rate": 1.0979926106323573e-05, + "loss": 0.197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19661110639572144, + "step": 3530, + "valid_targets_mean": 2938.6, + "valid_targets_min": 1216 + }, + { + "epoch": 4.796472184531886, + "grad_norm": 0.7662043660493066, + "learning_rate": 1.0919587128744706e-05, + "loss": 0.1965, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22166971862316132, + "step": 3535, + "valid_targets_mean": 2690.3, + "valid_targets_min": 870 + }, + { + "epoch": 4.80325644504749, + "grad_norm": 0.7199094564088079, + "learning_rate": 1.0859352082755063e-05, + "loss": 0.1978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20869404077529907, + "step": 3540, + "valid_targets_mean": 2830.1, + "valid_targets_min": 974 + }, + { + "epoch": 4.810040705563093, + "grad_norm": 0.7328326298477729, + "learning_rate": 1.0799221657786277e-05, + "loss": 0.1975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19416038691997528, + "step": 3545, + "valid_targets_mean": 2855.2, + "valid_targets_min": 930 + }, + { + "epoch": 4.8168249660786975, + "grad_norm": 0.7338773028161634, + "learning_rate": 1.0739196542072554e-05, + "loss": 0.1909, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18774661421775818, + "step": 3550, + "valid_targets_mean": 2434.1, + "valid_targets_min": 548 + }, + { + "epoch": 4.823609226594301, + "grad_norm": 0.7453504801028396, + "learning_rate": 1.067927742264274e-05, + "loss": 0.1999, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19881319999694824, + "step": 3555, + "valid_targets_mean": 2629.6, + "valid_targets_min": 898 + }, + { + "epoch": 4.830393487109905, + "grad_norm": 0.7369431692578411, + "learning_rate": 1.0619464985312504e-05, + "loss": 0.1935, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17585620284080505, + "step": 3560, + "valid_targets_mean": 2719.1, + "valid_targets_min": 788 + }, + { + "epoch": 4.837177747625509, + "grad_norm": 0.9297708502664351, + "learning_rate": 1.0559759914676446e-05, + "loss": 0.2079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22836661338806152, + "step": 3565, + "valid_targets_mean": 2168.1, + "valid_targets_min": 1005 + }, + { + "epoch": 4.843962008141113, + "grad_norm": 0.6894574277930866, + "learning_rate": 1.0500162894100274e-05, + "loss": 0.2012, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19786593317985535, + "step": 3570, + "valid_targets_mean": 3260.4, + "valid_targets_min": 927 + }, + { + "epoch": 4.850746268656716, + "grad_norm": 0.8540680237621792, + "learning_rate": 1.0440674605713017e-05, + "loss": 0.1912, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18931138515472412, + "step": 3575, + "valid_targets_mean": 2123.2, + "valid_targets_min": 835 + }, + { + "epoch": 4.85753052917232, + "grad_norm": 0.7445669474855375, + "learning_rate": 1.0381295730399156e-05, + "loss": 0.1927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.191728413105011, + "step": 3580, + "valid_targets_mean": 2410.1, + "valid_targets_min": 926 + }, + { + "epoch": 4.864314789687924, + "grad_norm": 0.7329229972435735, + "learning_rate": 1.03220269477909e-05, + "loss": 0.2127, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22075408697128296, + "step": 3585, + "valid_targets_mean": 2925.0, + "valid_targets_min": 1200 + }, + { + "epoch": 4.871099050203528, + "grad_norm": 0.6758139766478201, + "learning_rate": 1.026286893626033e-05, + "loss": 0.1927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19056375324726105, + "step": 3590, + "valid_targets_mean": 3506.6, + "valid_targets_min": 906 + }, + { + "epoch": 4.877883310719132, + "grad_norm": 0.7551370606263685, + "learning_rate": 1.0203822372911714e-05, + "loss": 0.2036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2027992308139801, + "step": 3595, + "valid_targets_mean": 2676.6, + "valid_targets_min": 1214 + }, + { + "epoch": 4.884667571234735, + "grad_norm": 0.7381200265506531, + "learning_rate": 1.0144887933573686e-05, + "loss": 0.186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18408526480197906, + "step": 3600, + "valid_targets_mean": 2631.5, + "valid_targets_min": 930 + }, + { + "epoch": 4.89145183175034, + "grad_norm": 0.7270532833777729, + "learning_rate": 1.0086066292791573e-05, + "loss": 0.1945, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17591014504432678, + "step": 3605, + "valid_targets_mean": 2436.2, + "valid_targets_min": 817 + }, + { + "epoch": 4.898236092265943, + "grad_norm": 0.7296787884141677, + "learning_rate": 1.0027358123819625e-05, + "loss": 0.1968, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1968274563550949, + "step": 3610, + "valid_targets_mean": 2829.1, + "valid_targets_min": 733 + }, + { + "epoch": 4.905020352781547, + "grad_norm": 0.6735262562716569, + "learning_rate": 9.968764098613329e-06, + "loss": 0.1874, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.178671196103096, + "step": 3615, + "valid_targets_mean": 2898.8, + "valid_targets_min": 727 + }, + { + "epoch": 4.911804613297151, + "grad_norm": 0.7274784816663047, + "learning_rate": 9.910284887821733e-06, + "loss": 0.2001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20386825501918793, + "step": 3620, + "valid_targets_mean": 2990.9, + "valid_targets_min": 1363 + }, + { + "epoch": 4.918588873812754, + "grad_norm": 0.8018638530971335, + "learning_rate": 9.851921160779729e-06, + "loss": 0.192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19886431097984314, + "step": 3625, + "valid_targets_mean": 2382.4, + "valid_targets_min": 614 + }, + { + "epoch": 4.925373134328359, + "grad_norm": 0.8315431805467934, + "learning_rate": 9.793673585500454e-06, + "loss": 0.1842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18963271379470825, + "step": 3630, + "valid_targets_mean": 2400.6, + "valid_targets_min": 770 + }, + { + "epoch": 4.932157394843962, + "grad_norm": 0.7403065285740312, + "learning_rate": 9.73554282866757e-06, + "loss": 0.1961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1916884034872055, + "step": 3635, + "valid_targets_mean": 2484.9, + "valid_targets_min": 801 + }, + { + "epoch": 4.9389416553595655, + "grad_norm": 0.8056592037843991, + "learning_rate": 9.677529555627705e-06, + "loss": 0.2015, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1968856304883957, + "step": 3640, + "valid_targets_mean": 2373.9, + "valid_targets_min": 1222 + }, + { + "epoch": 4.94572591587517, + "grad_norm": 0.6847276217753911, + "learning_rate": 9.61963443038278e-06, + "loss": 0.1993, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19165253639221191, + "step": 3645, + "valid_targets_mean": 3296.9, + "valid_targets_min": 642 + }, + { + "epoch": 4.952510176390773, + "grad_norm": 0.7481331426159193, + "learning_rate": 9.561858115582432e-06, + "loss": 0.1864, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1881834864616394, + "step": 3650, + "valid_targets_mean": 2459.1, + "valid_targets_min": 737 + }, + { + "epoch": 4.959294436906378, + "grad_norm": 0.7380750762114332, + "learning_rate": 9.504201272516456e-06, + "loss": 0.199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18983861804008484, + "step": 3655, + "valid_targets_mean": 2577.3, + "valid_targets_min": 896 + }, + { + "epoch": 4.966078697421981, + "grad_norm": 0.7036147611876263, + "learning_rate": 9.44666456110718e-06, + "loss": 0.1896, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19570358097553253, + "step": 3660, + "valid_targets_mean": 3040.5, + "valid_targets_min": 724 + }, + { + "epoch": 4.9728629579375845, + "grad_norm": 0.7932038159884709, + "learning_rate": 9.38924863990197e-06, + "loss": 0.198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1836714744567871, + "step": 3665, + "valid_targets_mean": 2151.7, + "valid_targets_min": 923 + }, + { + "epoch": 4.979647218453189, + "grad_norm": 0.6964386837916756, + "learning_rate": 9.331954166065635e-06, + "loss": 0.1952, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18505831062793732, + "step": 3670, + "valid_targets_mean": 2731.6, + "valid_targets_min": 1165 + }, + { + "epoch": 4.986431478968792, + "grad_norm": 0.7601196537242972, + "learning_rate": 9.27478179537297e-06, + "loss": 0.1947, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19253486394882202, + "step": 3675, + "valid_targets_mean": 2509.4, + "valid_targets_min": 548 + }, + { + "epoch": 4.993215739484397, + "grad_norm": 0.8155086323774635, + "learning_rate": 9.217732182201184e-06, + "loss": 0.1946, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19509343802928925, + "step": 3680, + "valid_targets_mean": 2026.9, + "valid_targets_min": 644 + }, + { + "epoch": 5.0, + "grad_norm": 0.7646808703457486, + "learning_rate": 9.160805979522452e-06, + "loss": 0.1994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18792404234409332, + "step": 3685, + "valid_targets_mean": 2684.2, + "valid_targets_min": 685 + }, + { + "epoch": 5.006784260515603, + "grad_norm": 0.6860326719249955, + "learning_rate": 9.104003838896445e-06, + "loss": 0.1778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1950758993625641, + "step": 3690, + "valid_targets_mean": 3155.7, + "valid_targets_min": 1030 + }, + { + "epoch": 5.013568521031208, + "grad_norm": 0.8441160581115864, + "learning_rate": 9.047326410462829e-06, + "loss": 0.1715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.161452516913414, + "step": 3695, + "valid_targets_mean": 1863.4, + "valid_targets_min": 839 + }, + { + "epoch": 5.020352781546811, + "grad_norm": 0.7067203784439574, + "learning_rate": 8.990774342933888e-06, + "loss": 0.1871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1699763685464859, + "step": 3700, + "valid_targets_mean": 3295.1, + "valid_targets_min": 923 + }, + { + "epoch": 5.0271370420624155, + "grad_norm": 0.7615352549068712, + "learning_rate": 8.934348283587029e-06, + "loss": 0.1767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1905319094657898, + "step": 3705, + "valid_targets_mean": 2832.2, + "valid_targets_min": 886 + }, + { + "epoch": 5.033921302578019, + "grad_norm": 0.7103293756116349, + "learning_rate": 8.878048878257443e-06, + "loss": 0.1783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1622033417224884, + "step": 3710, + "valid_targets_mean": 2561.1, + "valid_targets_min": 1241 + }, + { + "epoch": 5.040705563093622, + "grad_norm": 0.6718184525017369, + "learning_rate": 8.82187677133065e-06, + "loss": 0.1707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16609933972358704, + "step": 3715, + "valid_targets_mean": 3126.9, + "valid_targets_min": 881 + }, + { + "epoch": 5.047489823609227, + "grad_norm": 0.7837080600442082, + "learning_rate": 8.765832605735159e-06, + "loss": 0.1766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19446608424186707, + "step": 3720, + "valid_targets_mean": 2856.1, + "valid_targets_min": 880 + }, + { + "epoch": 5.05427408412483, + "grad_norm": 0.6563421792158702, + "learning_rate": 8.709917022935117e-06, + "loss": 0.1711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14738069474697113, + "step": 3725, + "valid_targets_mean": 2943.3, + "valid_targets_min": 901 + }, + { + "epoch": 5.0610583446404345, + "grad_norm": 0.8034887747351711, + "learning_rate": 8.654130662922927e-06, + "loss": 0.1875, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18572455644607544, + "step": 3730, + "valid_targets_mean": 3047.0, + "valid_targets_min": 772 + }, + { + "epoch": 5.067842605156038, + "grad_norm": 0.7592407786525501, + "learning_rate": 8.59847416421198e-06, + "loss": 0.1764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18500494956970215, + "step": 3735, + "valid_targets_mean": 2576.2, + "valid_targets_min": 1356 + }, + { + "epoch": 5.074626865671641, + "grad_norm": 0.7045855658185989, + "learning_rate": 8.542948163829281e-06, + "loss": 0.1742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16853509843349457, + "step": 3740, + "valid_targets_mean": 3002.3, + "valid_targets_min": 765 + }, + { + "epoch": 5.081411126187246, + "grad_norm": 0.7766566314367146, + "learning_rate": 8.48755329730822e-06, + "loss": 0.1698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1695399284362793, + "step": 3745, + "valid_targets_mean": 3074.8, + "valid_targets_min": 705 + }, + { + "epoch": 5.088195386702849, + "grad_norm": 0.7988370685668423, + "learning_rate": 8.432290198681252e-06, + "loss": 0.1772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2006351202726364, + "step": 3750, + "valid_targets_mean": 2525.8, + "valid_targets_min": 1122 + }, + { + "epoch": 5.094979647218453, + "grad_norm": 0.7054424649548406, + "learning_rate": 8.377159500472655e-06, + "loss": 0.1789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17677809298038483, + "step": 3755, + "valid_targets_mean": 3203.8, + "valid_targets_min": 1011 + }, + { + "epoch": 5.101763907734057, + "grad_norm": 0.713725738511338, + "learning_rate": 8.322161833691314e-06, + "loss": 0.1795, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16771814227104187, + "step": 3760, + "valid_targets_mean": 2823.8, + "valid_targets_min": 822 + }, + { + "epoch": 5.108548168249661, + "grad_norm": 0.8555144387463031, + "learning_rate": 8.267297827823451e-06, + "loss": 0.185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18097899854183197, + "step": 3765, + "valid_targets_mean": 2239.5, + "valid_targets_min": 873 + }, + { + "epoch": 5.115332428765265, + "grad_norm": 0.6934604273299277, + "learning_rate": 8.212568110825475e-06, + "loss": 0.1761, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15618851780891418, + "step": 3770, + "valid_targets_mean": 2868.4, + "valid_targets_min": 914 + }, + { + "epoch": 5.122116689280868, + "grad_norm": 0.7648492466356746, + "learning_rate": 8.157973309116734e-06, + "loss": 0.1746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17531245946884155, + "step": 3775, + "valid_targets_mean": 2618.1, + "valid_targets_min": 722 + }, + { + "epoch": 5.128900949796472, + "grad_norm": 0.7285693028786157, + "learning_rate": 8.103514047572409e-06, + "loss": 0.1724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17099301517009735, + "step": 3780, + "valid_targets_mean": 3425.1, + "valid_targets_min": 675 + }, + { + "epoch": 5.135685210312076, + "grad_norm": 0.8202795361017104, + "learning_rate": 8.049190949516312e-06, + "loss": 0.1658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1414903700351715, + "step": 3785, + "valid_targets_mean": 1912.8, + "valid_targets_min": 727 + }, + { + "epoch": 5.14246947082768, + "grad_norm": 0.8263248998725861, + "learning_rate": 7.995004636713763e-06, + "loss": 0.189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19762666523456573, + "step": 3790, + "valid_targets_mean": 2408.8, + "valid_targets_min": 952 + }, + { + "epoch": 5.149253731343284, + "grad_norm": 0.7393185445325677, + "learning_rate": 7.940955729364508e-06, + "loss": 0.1819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1720680445432663, + "step": 3795, + "valid_targets_mean": 2906.4, + "valid_targets_min": 1179 + }, + { + "epoch": 5.156037991858887, + "grad_norm": 0.803370399333462, + "learning_rate": 7.88704484609556e-06, + "loss": 0.1756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17175140976905823, + "step": 3800, + "valid_targets_mean": 2323.2, + "valid_targets_min": 754 + }, + { + "epoch": 5.162822252374491, + "grad_norm": 0.6536820438575788, + "learning_rate": 7.83327260395418e-06, + "loss": 0.1747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15294823050498962, + "step": 3805, + "valid_targets_mean": 3594.9, + "valid_targets_min": 692 + }, + { + "epoch": 5.169606512890095, + "grad_norm": 1.122135081296357, + "learning_rate": 7.779639618400761e-06, + "loss": 0.1765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16186411678791046, + "step": 3810, + "valid_targets_mean": 2047.9, + "valid_targets_min": 617 + }, + { + "epoch": 5.176390773405699, + "grad_norm": 0.8371034996880778, + "learning_rate": 7.726146503301835e-06, + "loss": 0.1707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18086284399032593, + "step": 3815, + "valid_targets_mean": 2635.9, + "valid_targets_min": 946 + }, + { + "epoch": 5.1831750339213025, + "grad_norm": 0.8310613229880927, + "learning_rate": 7.672793870922988e-06, + "loss": 0.1842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1729108989238739, + "step": 3820, + "valid_targets_mean": 2154.4, + "valid_targets_min": 714 + }, + { + "epoch": 5.189959294436906, + "grad_norm": 0.7736398422191684, + "learning_rate": 7.619582331921918e-06, + "loss": 0.1821, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19843143224716187, + "step": 3825, + "valid_targets_mean": 3357.2, + "valid_targets_min": 648 + }, + { + "epoch": 5.19674355495251, + "grad_norm": 0.7257509154939583, + "learning_rate": 7.566512495341387e-06, + "loss": 0.1812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17544004321098328, + "step": 3830, + "valid_targets_mean": 2957.9, + "valid_targets_min": 1043 + }, + { + "epoch": 5.203527815468114, + "grad_norm": 0.7915636689184143, + "learning_rate": 7.513584968602279e-06, + "loss": 0.1721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17739593982696533, + "step": 3835, + "valid_targets_mean": 3008.4, + "valid_targets_min": 991 + }, + { + "epoch": 5.210312075983718, + "grad_norm": 0.7239442896123227, + "learning_rate": 7.4608003574966604e-06, + "loss": 0.1728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15659013390541077, + "step": 3840, + "valid_targets_mean": 2958.6, + "valid_targets_min": 1152 + }, + { + "epoch": 5.2170963364993215, + "grad_norm": 0.8396562792144856, + "learning_rate": 7.408159266180803e-06, + "loss": 0.1713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17542997002601624, + "step": 3845, + "valid_targets_mean": 2428.6, + "valid_targets_min": 772 + }, + { + "epoch": 5.223880597014926, + "grad_norm": 0.7134303390915493, + "learning_rate": 7.3556622971683246e-06, + "loss": 0.1698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1654246747493744, + "step": 3850, + "valid_targets_mean": 3071.6, + "valid_targets_min": 671 + }, + { + "epoch": 5.230664857530529, + "grad_norm": 0.8451388168730941, + "learning_rate": 7.3033100513232356e-06, + "loss": 0.1836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1715916395187378, + "step": 3855, + "valid_targets_mean": 2175.2, + "valid_targets_min": 1131 + }, + { + "epoch": 5.237449118046133, + "grad_norm": 0.717910328678344, + "learning_rate": 7.251103127853119e-06, + "loss": 0.1757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17889514565467834, + "step": 3860, + "valid_targets_mean": 2811.1, + "valid_targets_min": 917 + }, + { + "epoch": 5.244233378561737, + "grad_norm": 0.6701275306479537, + "learning_rate": 7.199042124302218e-06, + "loss": 0.1757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15984272956848145, + "step": 3865, + "valid_targets_mean": 3448.8, + "valid_targets_min": 1410 + }, + { + "epoch": 5.25101763907734, + "grad_norm": 0.7396236758430029, + "learning_rate": 7.1471276365446265e-06, + "loss": 0.1691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16578418016433716, + "step": 3870, + "valid_targets_mean": 2790.6, + "valid_targets_min": 788 + }, + { + "epoch": 5.257801899592945, + "grad_norm": 0.7761448636163958, + "learning_rate": 7.095360258777479e-06, + "loss": 0.1801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16495108604431152, + "step": 3875, + "valid_targets_mean": 2547.1, + "valid_targets_min": 649 + }, + { + "epoch": 5.264586160108548, + "grad_norm": 0.7169834901617055, + "learning_rate": 7.043740583514116e-06, + "loss": 0.1727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16059651970863342, + "step": 3880, + "valid_targets_mean": 2682.7, + "valid_targets_min": 1125 + }, + { + "epoch": 5.271370420624152, + "grad_norm": 0.937784973378471, + "learning_rate": 6.99226920157734e-06, + "loss": 0.1759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15862375497817993, + "step": 3885, + "valid_targets_mean": 2327.4, + "valid_targets_min": 819 + }, + { + "epoch": 5.278154681139756, + "grad_norm": 0.7636077182810278, + "learning_rate": 6.9409467020926105e-06, + "loss": 0.1692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15665987133979797, + "step": 3890, + "valid_targets_mean": 2581.5, + "valid_targets_min": 770 + }, + { + "epoch": 5.284938941655359, + "grad_norm": 0.8082211996097235, + "learning_rate": 6.88977367248135e-06, + "loss": 0.1767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17556774616241455, + "step": 3895, + "valid_targets_mean": 2410.4, + "valid_targets_min": 714 + }, + { + "epoch": 5.291723202170964, + "grad_norm": 0.7855611135912653, + "learning_rate": 6.838750698454179e-06, + "loss": 0.1823, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19265274703502655, + "step": 3900, + "valid_targets_mean": 2630.9, + "valid_targets_min": 640 + }, + { + "epoch": 5.298507462686567, + "grad_norm": 0.773979909790802, + "learning_rate": 6.787878364004223e-06, + "loss": 0.1805, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1742939054965973, + "step": 3905, + "valid_targets_mean": 2754.6, + "valid_targets_min": 911 + }, + { + "epoch": 5.305291723202171, + "grad_norm": 0.7045798341954811, + "learning_rate": 6.7371572514004565e-06, + "loss": 0.1759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1639530062675476, + "step": 3910, + "valid_targets_mean": 3152.7, + "valid_targets_min": 642 + }, + { + "epoch": 5.312075983717775, + "grad_norm": 0.7118839049525844, + "learning_rate": 6.6865879411809905e-06, + "loss": 0.1702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1681211143732071, + "step": 3915, + "valid_targets_mean": 3016.1, + "valid_targets_min": 548 + }, + { + "epoch": 5.318860244233378, + "grad_norm": 0.7619077612605996, + "learning_rate": 6.636171012146475e-06, + "loss": 0.1774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1695559173822403, + "step": 3920, + "valid_targets_mean": 2828.6, + "valid_targets_min": 734 + }, + { + "epoch": 5.325644504748983, + "grad_norm": 0.704742231689896, + "learning_rate": 6.58590704135343e-06, + "loss": 0.1677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15426993370056152, + "step": 3925, + "valid_targets_mean": 2844.4, + "valid_targets_min": 662 + }, + { + "epoch": 5.332428765264586, + "grad_norm": 1.083531396358083, + "learning_rate": 6.535796604107689e-06, + "loss": 0.1759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15194523334503174, + "step": 3930, + "valid_targets_mean": 2859.6, + "valid_targets_min": 1012 + }, + { + "epoch": 5.3392130257801895, + "grad_norm": 0.7267810541836737, + "learning_rate": 6.485840273957764e-06, + "loss": 0.1736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18725672364234924, + "step": 3935, + "valid_targets_mean": 3104.9, + "valid_targets_min": 910 + }, + { + "epoch": 5.345997286295794, + "grad_norm": 0.7803241832429276, + "learning_rate": 6.4360386226883096e-06, + "loss": 0.1762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18757139146327972, + "step": 3940, + "valid_targets_mean": 2826.7, + "valid_targets_min": 803 + }, + { + "epoch": 5.352781546811397, + "grad_norm": 0.7469635091923277, + "learning_rate": 6.386392220313595e-06, + "loss": 0.1844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1943964660167694, + "step": 3945, + "valid_targets_mean": 2944.8, + "valid_targets_min": 1105 + }, + { + "epoch": 5.359565807327002, + "grad_norm": 0.7710548111024483, + "learning_rate": 6.336901635070924e-06, + "loss": 0.1837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18395206332206726, + "step": 3950, + "valid_targets_mean": 2640.2, + "valid_targets_min": 744 + }, + { + "epoch": 5.366350067842605, + "grad_norm": 0.7839171636791334, + "learning_rate": 6.287567433414203e-06, + "loss": 0.1738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18562139570713043, + "step": 3955, + "valid_targets_mean": 2582.2, + "valid_targets_min": 788 + }, + { + "epoch": 5.373134328358209, + "grad_norm": 0.7856837884639124, + "learning_rate": 6.238390180007388e-06, + "loss": 0.1809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1797139048576355, + "step": 3960, + "valid_targets_mean": 2337.2, + "valid_targets_min": 890 + }, + { + "epoch": 5.379918588873813, + "grad_norm": 0.7809120404347694, + "learning_rate": 6.189370437718076e-06, + "loss": 0.1708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17183539271354675, + "step": 3965, + "valid_targets_mean": 2353.1, + "valid_targets_min": 894 + }, + { + "epoch": 5.386702849389416, + "grad_norm": 0.7115407389633623, + "learning_rate": 6.140508767611031e-06, + "loss": 0.1789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17769712209701538, + "step": 3970, + "valid_targets_mean": 3229.7, + "valid_targets_min": 682 + }, + { + "epoch": 5.393487109905021, + "grad_norm": 0.9077544313783328, + "learning_rate": 6.091805728941766e-06, + "loss": 0.1831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19499380886554718, + "step": 3975, + "valid_targets_mean": 3067.6, + "valid_targets_min": 1205 + }, + { + "epoch": 5.400271370420624, + "grad_norm": 0.8077064097851708, + "learning_rate": 6.0432618791501685e-06, + "loss": 0.1715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1870405375957489, + "step": 3980, + "valid_targets_mean": 2559.1, + "valid_targets_min": 824 + }, + { + "epoch": 5.407055630936228, + "grad_norm": 0.6398380496001846, + "learning_rate": 5.994877773854073e-06, + "loss": 0.1702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15979993343353271, + "step": 3985, + "valid_targets_mean": 3374.1, + "valid_targets_min": 765 + }, + { + "epoch": 5.413839891451832, + "grad_norm": 0.7502361275025041, + "learning_rate": 5.946653966842952e-06, + "loss": 0.168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17689886689186096, + "step": 3990, + "valid_targets_mean": 2697.2, + "valid_targets_min": 985 + }, + { + "epoch": 5.420624151967435, + "grad_norm": 0.7109443485696391, + "learning_rate": 5.89859101007153e-06, + "loss": 0.1753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19721101224422455, + "step": 3995, + "valid_targets_mean": 3432.0, + "valid_targets_min": 1392 + }, + { + "epoch": 5.4274084124830395, + "grad_norm": 0.7778834659042302, + "learning_rate": 5.850689453653519e-06, + "loss": 0.1741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.176166370511055, + "step": 4000, + "valid_targets_mean": 2819.3, + "valid_targets_min": 648 + }, + { + "epoch": 5.434192672998643, + "grad_norm": 0.7986804713666946, + "learning_rate": 5.802949845855268e-06, + "loss": 0.1845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17381024360656738, + "step": 4005, + "valid_targets_mean": 2511.1, + "valid_targets_min": 682 + }, + { + "epoch": 5.440976933514247, + "grad_norm": 0.74764269275807, + "learning_rate": 5.75537273308951e-06, + "loss": 0.1694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1764945685863495, + "step": 4010, + "valid_targets_mean": 3111.7, + "valid_targets_min": 1171 + }, + { + "epoch": 5.447761194029851, + "grad_norm": 0.7387352711479095, + "learning_rate": 5.7079586599091386e-06, + "loss": 0.1768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1689624935388565, + "step": 4015, + "valid_targets_mean": 2689.8, + "valid_targets_min": 746 + }, + { + "epoch": 5.454545454545454, + "grad_norm": 0.6334842178503496, + "learning_rate": 5.660708169000915e-06, + "loss": 0.1778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1706801950931549, + "step": 4020, + "valid_targets_mean": 4078.6, + "valid_targets_min": 1344 + }, + { + "epoch": 5.4613297150610585, + "grad_norm": 0.7687849593222028, + "learning_rate": 5.61362180117931e-06, + "loss": 0.1822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.180070698261261, + "step": 4025, + "valid_targets_mean": 2676.0, + "valid_targets_min": 1013 + }, + { + "epoch": 5.468113975576662, + "grad_norm": 0.730085829715646, + "learning_rate": 5.566700095380278e-06, + "loss": 0.1688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17584823071956635, + "step": 4030, + "valid_targets_mean": 3169.8, + "valid_targets_min": 1001 + }, + { + "epoch": 5.474898236092266, + "grad_norm": 0.7919939571403866, + "learning_rate": 5.519943588655119e-06, + "loss": 0.1783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16983819007873535, + "step": 4035, + "valid_targets_mean": 2544.8, + "valid_targets_min": 711 + }, + { + "epoch": 5.48168249660787, + "grad_norm": 0.7573834464121695, + "learning_rate": 5.473352816164297e-06, + "loss": 0.1774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18410179018974304, + "step": 4040, + "valid_targets_mean": 2772.2, + "valid_targets_min": 1090 + }, + { + "epoch": 5.488466757123474, + "grad_norm": 0.8466367207178821, + "learning_rate": 5.426928311171349e-06, + "loss": 0.1724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1880597323179245, + "step": 4045, + "valid_targets_mean": 2396.6, + "valid_targets_min": 830 + }, + { + "epoch": 5.495251017639077, + "grad_norm": 0.7284020341330157, + "learning_rate": 5.3806706050367595e-06, + "loss": 0.1723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1683264523744583, + "step": 4050, + "valid_targets_mean": 2992.8, + "valid_targets_min": 837 + }, + { + "epoch": 5.502035278154681, + "grad_norm": 0.7784161722246488, + "learning_rate": 5.334580227211876e-06, + "loss": 0.1792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1845816969871521, + "step": 4055, + "valid_targets_mean": 2580.1, + "valid_targets_min": 742 + }, + { + "epoch": 5.508819538670285, + "grad_norm": 0.8548009473097017, + "learning_rate": 5.28865770523288e-06, + "loss": 0.1802, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17506195604801178, + "step": 4060, + "valid_targets_mean": 2178.6, + "valid_targets_min": 642 + }, + { + "epoch": 5.515603799185889, + "grad_norm": 0.7774055293795816, + "learning_rate": 5.242903564714703e-06, + "loss": 0.179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16525691747665405, + "step": 4065, + "valid_targets_mean": 2520.9, + "valid_targets_min": 720 + }, + { + "epoch": 5.522388059701493, + "grad_norm": 0.796456602697807, + "learning_rate": 5.197318329345053e-06, + "loss": 0.1789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1665637195110321, + "step": 4070, + "valid_targets_mean": 2367.8, + "valid_targets_min": 721 + }, + { + "epoch": 5.529172320217096, + "grad_norm": 0.646421264270265, + "learning_rate": 5.151902520878389e-06, + "loss": 0.1733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15533316135406494, + "step": 4075, + "valid_targets_mean": 2986.1, + "valid_targets_min": 692 + }, + { + "epoch": 5.5359565807327, + "grad_norm": 0.7819970698101568, + "learning_rate": 5.106656659129976e-06, + "loss": 0.1939, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20608392357826233, + "step": 4080, + "valid_targets_mean": 2478.6, + "valid_targets_min": 615 + }, + { + "epoch": 5.542740841248304, + "grad_norm": 0.7853975289145698, + "learning_rate": 5.061581261969908e-06, + "loss": 0.1796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17922022938728333, + "step": 4085, + "valid_targets_mean": 2634.1, + "valid_targets_min": 632 + }, + { + "epoch": 5.549525101763908, + "grad_norm": 0.7225473013309462, + "learning_rate": 5.016676845317195e-06, + "loss": 0.1677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1765461564064026, + "step": 4090, + "valid_targets_mean": 3014.4, + "valid_targets_min": 911 + }, + { + "epoch": 5.556309362279512, + "grad_norm": 0.7295640966157877, + "learning_rate": 4.971943923133871e-06, + "loss": 0.1729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17156416177749634, + "step": 4095, + "valid_targets_mean": 3041.5, + "valid_targets_min": 912 + }, + { + "epoch": 5.563093622795115, + "grad_norm": 0.9219762006603129, + "learning_rate": 4.9273830074190775e-06, + "loss": 0.1765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18373648822307587, + "step": 4100, + "valid_targets_mean": 2428.7, + "valid_targets_min": 722 + }, + { + "epoch": 5.569877883310719, + "grad_norm": 0.702767206840844, + "learning_rate": 4.8829946082032485e-06, + "loss": 0.1673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13803914189338684, + "step": 4105, + "valid_targets_mean": 2718.2, + "valid_targets_min": 761 + }, + { + "epoch": 5.576662143826323, + "grad_norm": 0.7199250201736765, + "learning_rate": 4.838779233542219e-06, + "loss": 0.1814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18196873366832733, + "step": 4110, + "valid_targets_mean": 3093.8, + "valid_targets_min": 1395 + }, + { + "epoch": 5.5834464043419265, + "grad_norm": 0.702683524158407, + "learning_rate": 4.794737389511466e-06, + "loss": 0.1667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15686261653900146, + "step": 4115, + "valid_targets_mean": 2956.6, + "valid_targets_min": 1365 + }, + { + "epoch": 5.590230664857531, + "grad_norm": 0.8226186328669812, + "learning_rate": 4.750869580200268e-06, + "loss": 0.1713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18233484029769897, + "step": 4120, + "valid_targets_mean": 2818.7, + "valid_targets_min": 848 + }, + { + "epoch": 5.597014925373134, + "grad_norm": 0.757006026009914, + "learning_rate": 4.707176307705958e-06, + "loss": 0.1801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17816491425037384, + "step": 4125, + "valid_targets_mean": 2692.9, + "valid_targets_min": 578 + }, + { + "epoch": 5.603799185888738, + "grad_norm": 0.7711809358808944, + "learning_rate": 4.6636580721281945e-06, + "loss": 0.1845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1927419900894165, + "step": 4130, + "valid_targets_mean": 2813.9, + "valid_targets_min": 1322 + }, + { + "epoch": 5.610583446404342, + "grad_norm": 0.8362493387614779, + "learning_rate": 4.620315371563188e-06, + "loss": 0.1812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16866496205329895, + "step": 4135, + "valid_targets_mean": 2549.8, + "valid_targets_min": 728 + }, + { + "epoch": 5.6173677069199455, + "grad_norm": 0.7202504972034123, + "learning_rate": 4.577148702098064e-06, + "loss": 0.1708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18742281198501587, + "step": 4140, + "valid_targets_mean": 3108.2, + "valid_targets_min": 727 + }, + { + "epoch": 5.62415196743555, + "grad_norm": 0.7230613052656376, + "learning_rate": 4.5341585578051125e-06, + "loss": 0.172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1717241406440735, + "step": 4145, + "valid_targets_mean": 2795.6, + "valid_targets_min": 955 + }, + { + "epoch": 5.630936227951153, + "grad_norm": 0.7394571746760491, + "learning_rate": 4.491345430736207e-06, + "loss": 0.1661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17502638697624207, + "step": 4150, + "valid_targets_mean": 2911.4, + "valid_targets_min": 1097 + }, + { + "epoch": 5.637720488466757, + "grad_norm": 1.1380143885731062, + "learning_rate": 4.4487098109171115e-06, + "loss": 0.1777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16936932504177094, + "step": 4155, + "valid_targets_mean": 2618.9, + "valid_targets_min": 892 + }, + { + "epoch": 5.644504748982361, + "grad_norm": 0.880683030604768, + "learning_rate": 4.406252186341904e-06, + "loss": 0.1814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1713424026966095, + "step": 4160, + "valid_targets_mean": 2082.8, + "valid_targets_min": 654 + }, + { + "epoch": 5.651289009497964, + "grad_norm": 0.8944130226850632, + "learning_rate": 4.363973042967393e-06, + "loss": 0.1892, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2198825478553772, + "step": 4165, + "valid_targets_mean": 2161.4, + "valid_targets_min": 793 + }, + { + "epoch": 5.658073270013569, + "grad_norm": 0.7666908944066096, + "learning_rate": 4.3218728647075324e-06, + "loss": 0.1646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16684460639953613, + "step": 4170, + "valid_targets_mean": 2747.1, + "valid_targets_min": 1033 + }, + { + "epoch": 5.664857530529172, + "grad_norm": 1.1331036484064003, + "learning_rate": 4.2799521334279155e-06, + "loss": 0.1777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16542381048202515, + "step": 4175, + "valid_targets_mean": 2846.9, + "valid_targets_min": 822 + }, + { + "epoch": 5.6716417910447765, + "grad_norm": 0.7113624200207286, + "learning_rate": 4.238211328940214e-06, + "loss": 0.175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15958712995052338, + "step": 4180, + "valid_targets_mean": 2826.9, + "valid_targets_min": 904 + }, + { + "epoch": 5.67842605156038, + "grad_norm": 0.7443765505636735, + "learning_rate": 4.196650928996744e-06, + "loss": 0.1792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17205163836479187, + "step": 4185, + "valid_targets_mean": 2878.1, + "valid_targets_min": 1151 + }, + { + "epoch": 5.685210312075983, + "grad_norm": 0.765002712688627, + "learning_rate": 4.155271409284947e-06, + "loss": 0.1769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17750981450080872, + "step": 4190, + "valid_targets_mean": 2678.2, + "valid_targets_min": 1337 + }, + { + "epoch": 5.691994572591588, + "grad_norm": 0.7973473759974611, + "learning_rate": 4.114073243421964e-06, + "loss": 0.1797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18639904260635376, + "step": 4195, + "valid_targets_mean": 2673.8, + "valid_targets_min": 1544 + }, + { + "epoch": 5.698778833107191, + "grad_norm": 0.8006125558694456, + "learning_rate": 4.073056902949233e-06, + "loss": 0.1822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.181000754237175, + "step": 4200, + "valid_targets_mean": 2716.2, + "valid_targets_min": 893 + }, + { + "epoch": 5.7055630936227955, + "grad_norm": 0.7771322898882181, + "learning_rate": 4.032222857327055e-06, + "loss": 0.1837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1973440945148468, + "step": 4205, + "valid_targets_mean": 3029.1, + "valid_targets_min": 1149 + }, + { + "epoch": 5.712347354138399, + "grad_norm": 0.7542045578109661, + "learning_rate": 3.991571573929263e-06, + "loss": 0.1703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16543592512607574, + "step": 4210, + "valid_targets_mean": 2804.5, + "valid_targets_min": 1133 + }, + { + "epoch": 5.719131614654002, + "grad_norm": 0.6746826061759319, + "learning_rate": 3.951103518037822e-06, + "loss": 0.1694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17549774050712585, + "step": 4215, + "valid_targets_mean": 3377.5, + "valid_targets_min": 675 + }, + { + "epoch": 5.725915875169607, + "grad_norm": 0.7884807830662328, + "learning_rate": 3.910819152837564e-06, + "loss": 0.1888, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19728972017765045, + "step": 4220, + "valid_targets_mean": 2609.3, + "valid_targets_min": 924 + }, + { + "epoch": 5.73270013568521, + "grad_norm": 0.6844292385894344, + "learning_rate": 3.870718939410829e-06, + "loss": 0.1858, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2012067437171936, + "step": 4225, + "valid_targets_mean": 3169.2, + "valid_targets_min": 1216 + }, + { + "epoch": 5.739484396200814, + "grad_norm": 0.6779140929423958, + "learning_rate": 3.8308033367322185e-06, + "loss": 0.1738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16013997793197632, + "step": 4230, + "valid_targets_mean": 3190.8, + "valid_targets_min": 741 + }, + { + "epoch": 5.746268656716418, + "grad_norm": 0.7186843297493842, + "learning_rate": 3.791072801663349e-06, + "loss": 0.1858, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17085684835910797, + "step": 4235, + "valid_targets_mean": 3101.9, + "valid_targets_min": 964 + }, + { + "epoch": 5.753052917232022, + "grad_norm": 0.773844588416225, + "learning_rate": 3.751527788947593e-06, + "loss": 0.1782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18918392062187195, + "step": 4240, + "valid_targets_mean": 2950.2, + "valid_targets_min": 1426 + }, + { + "epoch": 5.759837177747626, + "grad_norm": 0.8943301985541035, + "learning_rate": 3.7121687512049075e-06, + "loss": 0.1657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1794520914554596, + "step": 4245, + "valid_targets_mean": 3305.4, + "valid_targets_min": 1077 + }, + { + "epoch": 5.766621438263229, + "grad_norm": 0.71555932932248, + "learning_rate": 3.672996138926623e-06, + "loss": 0.1787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1853591352701187, + "step": 4250, + "valid_targets_mean": 3303.0, + "valid_targets_min": 1317 + }, + { + "epoch": 5.773405698778833, + "grad_norm": 0.7600028735931071, + "learning_rate": 3.634010400470318e-06, + "loss": 0.1715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15217794477939606, + "step": 4255, + "valid_targets_mean": 2485.2, + "valid_targets_min": 765 + }, + { + "epoch": 5.780189959294437, + "grad_norm": 0.6651550656338701, + "learning_rate": 3.595211982054652e-06, + "loss": 0.1807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15896007418632507, + "step": 4260, + "valid_targets_mean": 3312.4, + "valid_targets_min": 739 + }, + { + "epoch": 5.786974219810041, + "grad_norm": 0.793423124225591, + "learning_rate": 3.5566013277542987e-06, + "loss": 0.1882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1872842162847519, + "step": 4265, + "valid_targets_mean": 2579.4, + "valid_targets_min": 1098 + }, + { + "epoch": 5.7937584803256446, + "grad_norm": 0.7190178787946112, + "learning_rate": 3.5181788794948267e-06, + "loss": 0.1739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1685473471879959, + "step": 4270, + "valid_targets_mean": 2938.9, + "valid_targets_min": 795 + }, + { + "epoch": 5.800542740841248, + "grad_norm": 0.7427543324860382, + "learning_rate": 3.479945077047655e-06, + "loss": 0.1907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18788832426071167, + "step": 4275, + "valid_targets_mean": 3514.1, + "valid_targets_min": 655 + }, + { + "epoch": 5.807327001356852, + "grad_norm": 0.7301184905700668, + "learning_rate": 3.4419003580250386e-06, + "loss": 0.1817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1724769026041031, + "step": 4280, + "valid_targets_mean": 2761.8, + "valid_targets_min": 782 + }, + { + "epoch": 5.814111261872456, + "grad_norm": 0.6996558660986915, + "learning_rate": 3.40404515787502e-06, + "loss": 0.1775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17133083939552307, + "step": 4285, + "valid_targets_mean": 3322.6, + "valid_targets_min": 1164 + }, + { + "epoch": 5.82089552238806, + "grad_norm": 0.7470138239152765, + "learning_rate": 3.366379909876487e-06, + "loss": 0.1814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1647733896970749, + "step": 4290, + "valid_targets_mean": 2699.9, + "valid_targets_min": 856 + }, + { + "epoch": 5.8276797829036635, + "grad_norm": 0.8027648641349744, + "learning_rate": 3.3289050451341785e-06, + "loss": 0.1864, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17798423767089844, + "step": 4295, + "valid_targets_mean": 2830.8, + "valid_targets_min": 1027 + }, + { + "epoch": 5.834464043419267, + "grad_norm": 0.6833745641979659, + "learning_rate": 3.291620992573781e-06, + "loss": 0.1753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1725693941116333, + "step": 4300, + "valid_targets_mean": 3136.2, + "valid_targets_min": 1115 + }, + { + "epoch": 5.841248303934871, + "grad_norm": 0.7632930036843467, + "learning_rate": 3.254528178936991e-06, + "loss": 0.1732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1607835590839386, + "step": 4305, + "valid_targets_mean": 2220.1, + "valid_targets_min": 1083 + }, + { + "epoch": 5.848032564450475, + "grad_norm": 0.7766685814823788, + "learning_rate": 3.217627028776642e-06, + "loss": 0.1711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17807729542255402, + "step": 4310, + "valid_targets_mean": 3144.8, + "valid_targets_min": 1348 + }, + { + "epoch": 5.854816824966079, + "grad_norm": 0.7615851643722991, + "learning_rate": 3.180917964451864e-06, + "loss": 0.177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18378789722919464, + "step": 4315, + "valid_targets_mean": 2792.7, + "valid_targets_min": 722 + }, + { + "epoch": 5.8616010854816825, + "grad_norm": 0.7720381142011892, + "learning_rate": 3.1444014061232096e-06, + "loss": 0.1774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19216835498809814, + "step": 4320, + "valid_targets_mean": 2816.5, + "valid_targets_min": 681 + }, + { + "epoch": 5.868385345997286, + "grad_norm": 0.7413182551927285, + "learning_rate": 3.1080777717478906e-06, + "loss": 0.1658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17600227892398834, + "step": 4325, + "valid_targets_mean": 2807.1, + "valid_targets_min": 1049 + }, + { + "epoch": 5.87516960651289, + "grad_norm": 0.830639238497367, + "learning_rate": 3.071947477074948e-06, + "loss": 0.1744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20260506868362427, + "step": 4330, + "valid_targets_mean": 3276.9, + "valid_targets_min": 802 + }, + { + "epoch": 5.881953867028494, + "grad_norm": 0.7247615457982057, + "learning_rate": 3.036010935640541e-06, + "loss": 0.1807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1745220124721527, + "step": 4335, + "valid_targets_mean": 3025.8, + "valid_targets_min": 1149 + }, + { + "epoch": 5.888738127544098, + "grad_norm": 0.7666810644080457, + "learning_rate": 3.0002685587631665e-06, + "loss": 0.181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19823196530342102, + "step": 4340, + "valid_targets_mean": 2815.8, + "valid_targets_min": 933 + }, + { + "epoch": 5.895522388059701, + "grad_norm": 0.6891577535572125, + "learning_rate": 2.9647207555389833e-06, + "loss": 0.1752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1756839156150818, + "step": 4345, + "valid_targets_mean": 3165.6, + "valid_targets_min": 796 + }, + { + "epoch": 5.902306648575305, + "grad_norm": 0.6280441768186072, + "learning_rate": 2.929367932837128e-06, + "loss": 0.169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16393789649009705, + "step": 4350, + "valid_targets_mean": 3511.4, + "valid_targets_min": 1349 + }, + { + "epoch": 5.909090909090909, + "grad_norm": 0.7212401001251478, + "learning_rate": 2.8942104952950358e-06, + "loss": 0.1698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1590360701084137, + "step": 4355, + "valid_targets_mean": 2814.4, + "valid_targets_min": 781 + }, + { + "epoch": 5.915875169606513, + "grad_norm": 0.7587496985425881, + "learning_rate": 2.8592488453138402e-06, + "loss": 0.1756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18076002597808838, + "step": 4360, + "valid_targets_mean": 2772.0, + "valid_targets_min": 1406 + }, + { + "epoch": 5.922659430122117, + "grad_norm": 0.7026766056453736, + "learning_rate": 2.8244833830537334e-06, + "loss": 0.1775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16852641105651855, + "step": 4365, + "valid_targets_mean": 3198.9, + "valid_targets_min": 1248 + }, + { + "epoch": 5.92944369063772, + "grad_norm": 0.7591755791375419, + "learning_rate": 2.789914506429423e-06, + "loss": 0.1773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18854087591171265, + "step": 4370, + "valid_targets_mean": 3410.3, + "valid_targets_min": 960 + }, + { + "epoch": 5.936227951153324, + "grad_norm": 0.7317196861574712, + "learning_rate": 2.755542611105544e-06, + "loss": 0.175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17983709275722504, + "step": 4375, + "valid_targets_mean": 2910.4, + "valid_targets_min": 777 + }, + { + "epoch": 5.943012211668928, + "grad_norm": 0.7873291444565179, + "learning_rate": 2.7213680904921426e-06, + "loss": 0.1713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15739159286022186, + "step": 4380, + "valid_targets_mean": 2447.4, + "valid_targets_min": 1274 + }, + { + "epoch": 5.949796472184532, + "grad_norm": 0.783627540382998, + "learning_rate": 2.687391335740195e-06, + "loss": 0.1814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18448659777641296, + "step": 4385, + "valid_targets_mean": 2981.9, + "valid_targets_min": 940 + }, + { + "epoch": 5.956580732700136, + "grad_norm": 0.8163415230485238, + "learning_rate": 2.6536127357370834e-06, + "loss": 0.1802, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17967082560062408, + "step": 4390, + "valid_targets_mean": 2268.2, + "valid_targets_min": 1087 + }, + { + "epoch": 5.963364993215739, + "grad_norm": 0.6972529066512584, + "learning_rate": 2.6200326771021977e-06, + "loss": 0.1757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17101269960403442, + "step": 4395, + "valid_targets_mean": 3305.3, + "valid_targets_min": 1304 + }, + { + "epoch": 5.970149253731344, + "grad_norm": 0.7444293238213098, + "learning_rate": 2.5866515441824637e-06, + "loss": 0.1822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16424033045768738, + "step": 4400, + "valid_targets_mean": 2531.2, + "valid_targets_min": 761 + }, + { + "epoch": 5.976933514246947, + "grad_norm": 0.7787083692853177, + "learning_rate": 2.553469719047983e-06, + "loss": 0.1827, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18210354447364807, + "step": 4405, + "valid_targets_mean": 3141.8, + "valid_targets_min": 810 + }, + { + "epoch": 5.9837177747625505, + "grad_norm": 0.7791998751111936, + "learning_rate": 2.5204875814876318e-06, + "loss": 0.1801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19235186278820038, + "step": 4410, + "valid_targets_mean": 2803.9, + "valid_targets_min": 708 + }, + { + "epoch": 5.990502035278155, + "grad_norm": 0.7009703971990965, + "learning_rate": 2.487705509004721e-06, + "loss": 0.1793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18249055743217468, + "step": 4415, + "valid_targets_mean": 3232.0, + "valid_targets_min": 484 + }, + { + "epoch": 5.997286295793758, + "grad_norm": 0.7781270453801223, + "learning_rate": 2.4551238768126906e-06, + "loss": 0.1778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16195908188819885, + "step": 4420, + "valid_targets_mean": 2633.4, + "valid_targets_min": 814 + }, + { + "epoch": 6.004070556309363, + "grad_norm": 0.7415388448727406, + "learning_rate": 2.422743057830792e-06, + "loss": 0.1743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19907845556735992, + "step": 4425, + "valid_targets_mean": 2871.6, + "valid_targets_min": 690 + }, + { + "epoch": 6.010854816824966, + "grad_norm": 0.742053212238427, + "learning_rate": 2.3905634226798415e-06, + "loss": 0.1751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18320277333259583, + "step": 4430, + "valid_targets_mean": 3028.6, + "valid_targets_min": 750 + }, + { + "epoch": 6.0176390773405695, + "grad_norm": 0.8079591238507426, + "learning_rate": 2.3585853396779546e-06, + "loss": 0.166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17492371797561646, + "step": 4435, + "valid_targets_mean": 2414.6, + "valid_targets_min": 610 + }, + { + "epoch": 6.024423337856174, + "grad_norm": 0.7245282326645127, + "learning_rate": 2.326809174836355e-06, + "loss": 0.1639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16501660645008087, + "step": 4440, + "valid_targets_mean": 2658.6, + "valid_targets_min": 805 + }, + { + "epoch": 6.031207598371777, + "grad_norm": 0.7529821726671959, + "learning_rate": 2.295235291855171e-06, + "loss": 0.1621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16563096642494202, + "step": 4445, + "valid_targets_mean": 2985.2, + "valid_targets_min": 1155 + }, + { + "epoch": 6.0379918588873815, + "grad_norm": 0.7218429838629064, + "learning_rate": 2.2638640521192666e-06, + "loss": 0.1683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16327796876430511, + "step": 4450, + "valid_targets_mean": 2918.9, + "valid_targets_min": 820 + }, + { + "epoch": 6.044776119402985, + "grad_norm": 0.7670474706596653, + "learning_rate": 2.23269581469413e-06, + "loss": 0.1683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17224714159965515, + "step": 4455, + "valid_targets_mean": 2854.1, + "valid_targets_min": 711 + }, + { + "epoch": 6.051560379918589, + "grad_norm": 0.85371010583432, + "learning_rate": 2.201730936321731e-06, + "loss": 0.1751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18799713253974915, + "step": 4460, + "valid_targets_mean": 2257.5, + "valid_targets_min": 809 + }, + { + "epoch": 6.058344640434193, + "grad_norm": 0.8581102151455461, + "learning_rate": 2.170969771416471e-06, + "loss": 0.1736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19284003973007202, + "step": 4465, + "valid_targets_mean": 2541.4, + "valid_targets_min": 903 + }, + { + "epoch": 6.065128900949796, + "grad_norm": 0.7082075259100371, + "learning_rate": 2.140412672061094e-06, + "loss": 0.1696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14846593141555786, + "step": 4470, + "valid_targets_mean": 2874.5, + "valid_targets_min": 900 + }, + { + "epoch": 6.0719131614654005, + "grad_norm": 0.6593303484533789, + "learning_rate": 2.1100599880026885e-06, + "loss": 0.1604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17295196652412415, + "step": 4475, + "valid_targets_mean": 3751.6, + "valid_targets_min": 1730 + }, + { + "epoch": 6.078697421981004, + "grad_norm": 0.7194386838659942, + "learning_rate": 2.079912066648655e-06, + "loss": 0.1669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14499616622924805, + "step": 4480, + "valid_targets_mean": 2578.9, + "valid_targets_min": 1121 + }, + { + "epoch": 6.085481682496608, + "grad_norm": 0.7703414212546942, + "learning_rate": 2.049969253062758e-06, + "loss": 0.1651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17155641317367554, + "step": 4485, + "valid_targets_mean": 2867.4, + "valid_targets_min": 727 + }, + { + "epoch": 6.092265943012212, + "grad_norm": 0.7581834478927615, + "learning_rate": 2.0202318899611483e-06, + "loss": 0.1596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14970597624778748, + "step": 4490, + "valid_targets_mean": 2776.1, + "valid_targets_min": 810 + }, + { + "epoch": 6.099050203527815, + "grad_norm": 0.8858694998395675, + "learning_rate": 1.9907003177084605e-06, + "loss": 0.1717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19518548250198364, + "step": 4495, + "valid_targets_mean": 2301.6, + "valid_targets_min": 858 + }, + { + "epoch": 6.1058344640434195, + "grad_norm": 0.7698959242156672, + "learning_rate": 1.9613748743139126e-06, + "loss": 0.1741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18513622879981995, + "step": 4500, + "valid_targets_mean": 3196.8, + "valid_targets_min": 1009 + }, + { + "epoch": 6.112618724559023, + "grad_norm": 0.9050028409977037, + "learning_rate": 1.9322558954274305e-06, + "loss": 0.1693, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17871133983135223, + "step": 4505, + "valid_targets_mean": 2160.6, + "valid_targets_min": 748 + }, + { + "epoch": 6.119402985074627, + "grad_norm": 0.7740731558411673, + "learning_rate": 1.9033437143358213e-06, + "loss": 0.1642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16579106450080872, + "step": 4510, + "valid_targets_mean": 2948.0, + "valid_targets_min": 1076 + }, + { + "epoch": 6.126187245590231, + "grad_norm": 0.8348067497419805, + "learning_rate": 1.874638661958934e-06, + "loss": 0.1691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16571199893951416, + "step": 4515, + "valid_targets_mean": 2415.9, + "valid_targets_min": 627 + }, + { + "epoch": 6.132971506105834, + "grad_norm": 0.7256235619648512, + "learning_rate": 1.8461410668459035e-06, + "loss": 0.1567, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14996132254600525, + "step": 4520, + "valid_targets_mean": 2668.2, + "valid_targets_min": 806 + }, + { + "epoch": 6.139755766621438, + "grad_norm": 0.7614175427466571, + "learning_rate": 1.8178512551713568e-06, + "loss": 0.1658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15067064762115479, + "step": 4525, + "valid_targets_mean": 3234.1, + "valid_targets_min": 656 + }, + { + "epoch": 6.146540027137042, + "grad_norm": 0.7569505883546243, + "learning_rate": 1.7897695507317036e-06, + "loss": 0.1753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17070305347442627, + "step": 4530, + "valid_targets_mean": 2981.2, + "valid_targets_min": 1323 + }, + { + "epoch": 6.153324287652646, + "grad_norm": 0.715139884326026, + "learning_rate": 1.761896274941426e-06, + "loss": 0.1684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1826499104499817, + "step": 4535, + "valid_targets_mean": 3501.9, + "valid_targets_min": 841 + }, + { + "epoch": 6.16010854816825, + "grad_norm": 0.7854237661191246, + "learning_rate": 1.7342317468293912e-06, + "loss": 0.1703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16877636313438416, + "step": 4540, + "valid_targets_mean": 2512.0, + "valid_targets_min": 1055 + }, + { + "epoch": 6.166892808683853, + "grad_norm": 0.8303757582473726, + "learning_rate": 1.7067762830352096e-06, + "loss": 0.1728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17255139350891113, + "step": 4545, + "valid_targets_mean": 2534.2, + "valid_targets_min": 942 + }, + { + "epoch": 6.173677069199457, + "grad_norm": 0.7385224765472438, + "learning_rate": 1.679530197805599e-06, + "loss": 0.1658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15643605589866638, + "step": 4550, + "valid_targets_mean": 2944.8, + "valid_targets_min": 1366 + }, + { + "epoch": 6.180461329715061, + "grad_norm": 0.7564443718183899, + "learning_rate": 1.652493802990811e-06, + "loss": 0.1753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17550310492515564, + "step": 4555, + "valid_targets_mean": 3110.1, + "valid_targets_min": 1058 + }, + { + "epoch": 6.187245590230665, + "grad_norm": 1.71255016027657, + "learning_rate": 1.6256674080410185e-06, + "loss": 0.1628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15688160061836243, + "step": 4560, + "valid_targets_mean": 3095.8, + "valid_targets_min": 1214 + }, + { + "epoch": 6.1940298507462686, + "grad_norm": 0.8040556589037928, + "learning_rate": 1.5990513200028269e-06, + "loss": 0.1642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1727936565876007, + "step": 4565, + "valid_targets_mean": 2527.2, + "valid_targets_min": 991 + }, + { + "epoch": 6.200814111261873, + "grad_norm": 0.7945263075310672, + "learning_rate": 1.5726458435157255e-06, + "loss": 0.1625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18288177251815796, + "step": 4570, + "valid_targets_mean": 3045.4, + "valid_targets_min": 821 + }, + { + "epoch": 6.207598371777476, + "grad_norm": 0.7545719664187707, + "learning_rate": 1.5464512808086008e-06, + "loss": 0.1694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17064568400382996, + "step": 4575, + "valid_targets_mean": 3098.4, + "valid_targets_min": 796 + }, + { + "epoch": 6.21438263229308, + "grad_norm": 0.747591105064271, + "learning_rate": 1.5204679316963012e-06, + "loss": 0.1751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1849783957004547, + "step": 4580, + "valid_targets_mean": 3041.9, + "valid_targets_min": 1643 + }, + { + "epoch": 6.221166892808684, + "grad_norm": 0.7177247952162282, + "learning_rate": 1.4946960935761734e-06, + "loss": 0.1636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17120692133903503, + "step": 4585, + "valid_targets_mean": 3201.8, + "valid_targets_min": 563 + }, + { + "epoch": 6.2279511533242875, + "grad_norm": 0.8094458246760512, + "learning_rate": 1.4691360614246897e-06, + "loss": 0.1692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16246119141578674, + "step": 4590, + "valid_targets_mean": 2322.5, + "valid_targets_min": 692 + }, + { + "epoch": 6.234735413839892, + "grad_norm": 0.7798473673390547, + "learning_rate": 1.443788127794048e-06, + "loss": 0.1681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17415021359920502, + "step": 4595, + "valid_targets_mean": 2854.2, + "valid_targets_min": 1190 + }, + { + "epoch": 6.241519674355495, + "grad_norm": 0.7946140484277268, + "learning_rate": 1.4186525828088282e-06, + "loss": 0.1709, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15857259929180145, + "step": 4600, + "valid_targets_mean": 2525.8, + "valid_targets_min": 1018 + }, + { + "epoch": 6.248303934871099, + "grad_norm": 0.7801428383995377, + "learning_rate": 1.3937297141626904e-06, + "loss": 0.1726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1684061586856842, + "step": 4605, + "valid_targets_mean": 2809.8, + "valid_targets_min": 917 + }, + { + "epoch": 6.255088195386703, + "grad_norm": 0.6643638326526149, + "learning_rate": 1.3690198071150528e-06, + "loss": 0.1599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15296955406665802, + "step": 4610, + "valid_targets_mean": 3476.2, + "valid_targets_min": 1305 + }, + { + "epoch": 6.2618724559023065, + "grad_norm": 0.7686142175477614, + "learning_rate": 1.3445231444878504e-06, + "loss": 0.1648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17054758965969086, + "step": 4615, + "valid_targets_mean": 2753.3, + "valid_targets_min": 830 + }, + { + "epoch": 6.268656716417911, + "grad_norm": 0.8432191119730226, + "learning_rate": 1.3202400066622834e-06, + "loss": 0.1713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17865519225597382, + "step": 4620, + "valid_targets_mean": 2344.4, + "valid_targets_min": 765 + }, + { + "epoch": 6.275440976933514, + "grad_norm": 0.6339497035055569, + "learning_rate": 1.2961706715756184e-06, + "loss": 0.1587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1531352996826172, + "step": 4625, + "valid_targets_mean": 3586.7, + "valid_targets_min": 2054 + }, + { + "epoch": 6.282225237449118, + "grad_norm": 0.8544021195040931, + "learning_rate": 1.2723154147179973e-06, + "loss": 0.1629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1579844206571579, + "step": 4630, + "valid_targets_mean": 3154.9, + "valid_targets_min": 869 + }, + { + "epoch": 6.289009497964722, + "grad_norm": 0.8007481753817313, + "learning_rate": 1.2486745091292862e-06, + "loss": 0.1678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1802925169467926, + "step": 4635, + "valid_targets_mean": 2729.8, + "valid_targets_min": 957 + }, + { + "epoch": 6.295793758480325, + "grad_norm": 0.8142132607230317, + "learning_rate": 1.2252482253959653e-06, + "loss": 0.1719, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18052034080028534, + "step": 4640, + "valid_targets_mean": 2541.6, + "valid_targets_min": 974 + }, + { + "epoch": 6.30257801899593, + "grad_norm": 0.6774345228089872, + "learning_rate": 1.202036831648008e-06, + "loss": 0.1589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.150282084941864, + "step": 4645, + "valid_targets_mean": 3526.9, + "valid_targets_min": 1051 + }, + { + "epoch": 6.309362279511533, + "grad_norm": 0.7516698326728172, + "learning_rate": 1.1790405935558292e-06, + "loss": 0.1764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16584287583827972, + "step": 4650, + "valid_targets_mean": 2976.7, + "valid_targets_min": 1024 + }, + { + "epoch": 6.3161465400271375, + "grad_norm": 0.7046916433526038, + "learning_rate": 1.156259774327233e-06, + "loss": 0.1654, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16168582439422607, + "step": 4655, + "valid_targets_mean": 3335.2, + "valid_targets_min": 872 + }, + { + "epoch": 6.322930800542741, + "grad_norm": 0.767262026885646, + "learning_rate": 1.1336946347044164e-06, + "loss": 0.164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15575771033763885, + "step": 4660, + "valid_targets_mean": 2716.2, + "valid_targets_min": 1074 + }, + { + "epoch": 6.329715061058344, + "grad_norm": 0.792890542631687, + "learning_rate": 1.111345432960964e-06, + "loss": 0.1677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16956427693367004, + "step": 4665, + "valid_targets_mean": 2624.8, + "valid_targets_min": 593 + }, + { + "epoch": 6.336499321573949, + "grad_norm": 0.7772682272896229, + "learning_rate": 1.089212424898909e-06, + "loss": 0.1704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16567683219909668, + "step": 4670, + "valid_targets_mean": 2692.4, + "valid_targets_min": 420 + }, + { + "epoch": 6.343283582089552, + "grad_norm": 0.7218817470491743, + "learning_rate": 1.0672958638457963e-06, + "loss": 0.1715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1656082570552826, + "step": 4675, + "valid_targets_mean": 3137.2, + "valid_targets_min": 1181 + }, + { + "epoch": 6.350067842605156, + "grad_norm": 0.7660758671491292, + "learning_rate": 1.0455960006517828e-06, + "loss": 0.1615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16689500212669373, + "step": 4680, + "valid_targets_mean": 2776.9, + "valid_targets_min": 1184 + }, + { + "epoch": 6.35685210312076, + "grad_norm": 0.7614176463332171, + "learning_rate": 1.0241130836867775e-06, + "loss": 0.1675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1655552089214325, + "step": 4685, + "valid_targets_mean": 2639.2, + "valid_targets_min": 1340 + }, + { + "epoch": 6.363636363636363, + "grad_norm": 0.731905552102256, + "learning_rate": 1.0028473588375775e-06, + "loss": 0.1613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15648706257343292, + "step": 4690, + "valid_targets_mean": 2678.6, + "valid_targets_min": 1071 + }, + { + "epoch": 6.370420624151968, + "grad_norm": 0.7374044504633019, + "learning_rate": 9.817990695050804e-07, + "loss": 0.1661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17541341483592987, + "step": 4695, + "valid_targets_mean": 3396.6, + "valid_targets_min": 788 + }, + { + "epoch": 6.377204884667571, + "grad_norm": 0.8070797537585928, + "learning_rate": 9.60968456601472e-07, + "loss": 0.1721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17572826147079468, + "step": 4700, + "valid_targets_mean": 2604.2, + "valid_targets_min": 880 + }, + { + "epoch": 6.383989145183175, + "grad_norm": 0.7275287979567348, + "learning_rate": 9.403557585474954e-07, + "loss": 0.1662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15313151478767395, + "step": 4705, + "valid_targets_mean": 2704.1, + "valid_targets_min": 627 + }, + { + "epoch": 6.390773405698779, + "grad_norm": 0.8970798559467905, + "learning_rate": 9.199612112696843e-07, + "loss": 0.1649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1705400049686432, + "step": 4710, + "valid_targets_mean": 2101.6, + "valid_targets_min": 818 + }, + { + "epoch": 6.397557666214382, + "grad_norm": 0.7536549104416713, + "learning_rate": 8.9978504819771e-07, + "loss": 0.1677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1547962725162506, + "step": 4715, + "valid_targets_mean": 2824.7, + "valid_targets_min": 779 + }, + { + "epoch": 6.404341926729987, + "grad_norm": 0.8045187393820044, + "learning_rate": 8.798275002616785e-07, + "loss": 0.168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17857302725315094, + "step": 4720, + "valid_targets_mean": 2904.6, + "valid_targets_min": 699 + }, + { + "epoch": 6.41112618724559, + "grad_norm": 0.8644670987087135, + "learning_rate": 8.600887958894866e-07, + "loss": 0.1734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18547791242599487, + "step": 4725, + "valid_targets_mean": 2751.9, + "valid_targets_min": 623 + }, + { + "epoch": 6.417910447761194, + "grad_norm": 0.8231644619228083, + "learning_rate": 8.4056916100423e-07, + "loss": 0.1619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16595998406410217, + "step": 4730, + "valid_targets_mean": 2423.4, + "valid_targets_min": 692 + }, + { + "epoch": 6.424694708276798, + "grad_norm": 0.7911518564082718, + "learning_rate": 8.212688190215879e-07, + "loss": 0.1667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17415902018547058, + "step": 4735, + "valid_targets_mean": 2703.4, + "valid_targets_min": 793 + }, + { + "epoch": 6.431478968792401, + "grad_norm": 0.8160726715736674, + "learning_rate": 8.021879908472962e-07, + "loss": 0.1679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18597552180290222, + "step": 4740, + "valid_targets_mean": 2784.6, + "valid_targets_min": 1092 + }, + { + "epoch": 6.4382632293080055, + "grad_norm": 0.7067244227602366, + "learning_rate": 7.833268948745854e-07, + "loss": 0.172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16125139594078064, + "step": 4745, + "valid_targets_mean": 3260.1, + "valid_targets_min": 896 + }, + { + "epoch": 6.445047489823609, + "grad_norm": 0.7408785373138851, + "learning_rate": 7.646857469817148e-07, + "loss": 0.169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15260982513427734, + "step": 4750, + "valid_targets_mean": 2774.1, + "valid_targets_min": 797 + }, + { + "epoch": 6.451831750339213, + "grad_norm": 0.6966270003794208, + "learning_rate": 7.462647605294849e-07, + "loss": 0.1615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.148808091878891, + "step": 4755, + "valid_targets_mean": 2975.9, + "valid_targets_min": 681 + }, + { + "epoch": 6.458616010854817, + "grad_norm": 0.7126074027304123, + "learning_rate": 7.28064146358789e-07, + "loss": 0.1656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15134704113006592, + "step": 4760, + "valid_targets_mean": 3576.7, + "valid_targets_min": 689 + }, + { + "epoch": 6.46540027137042, + "grad_norm": 0.8135549579243775, + "learning_rate": 7.100841127882119e-07, + "loss": 0.159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1627068817615509, + "step": 4765, + "valid_targets_mean": 2379.0, + "valid_targets_min": 890 + }, + { + "epoch": 6.4721845318860245, + "grad_norm": 0.6946858722656691, + "learning_rate": 6.9232486561164e-07, + "loss": 0.159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15446284413337708, + "step": 4770, + "valid_targets_mean": 3180.2, + "valid_targets_min": 1145 + }, + { + "epoch": 6.478968792401628, + "grad_norm": 0.8707673966033334, + "learning_rate": 6.74786608095912e-07, + "loss": 0.1638, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17315858602523804, + "step": 4775, + "valid_targets_mean": 2392.1, + "valid_targets_min": 740 + }, + { + "epoch": 6.485753052917232, + "grad_norm": 0.7302574945303589, + "learning_rate": 6.574695409784792e-07, + "loss": 0.1674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1769123077392578, + "step": 4780, + "valid_targets_mean": 3501.0, + "valid_targets_min": 735 + }, + { + "epoch": 6.492537313432836, + "grad_norm": 0.8695981905879026, + "learning_rate": 6.403738624651201e-07, + "loss": 0.1702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1758110076189041, + "step": 4785, + "valid_targets_mean": 2153.2, + "valid_targets_min": 1133 + }, + { + "epoch": 6.49932157394844, + "grad_norm": 0.6446530009894795, + "learning_rate": 6.234997682276711e-07, + "loss": 0.161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1484566032886505, + "step": 4790, + "valid_targets_mean": 3468.8, + "valid_targets_min": 1345 + }, + { + "epoch": 6.5061058344640434, + "grad_norm": 0.7063043963291199, + "learning_rate": 6.068474514017797e-07, + "loss": 0.1683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1765894889831543, + "step": 4795, + "valid_targets_mean": 3490.4, + "valid_targets_min": 954 + }, + { + "epoch": 6.512890094979647, + "grad_norm": 0.8151482914810542, + "learning_rate": 5.904171025847016e-07, + "loss": 0.1649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16545557975769043, + "step": 4800, + "valid_targets_mean": 2565.6, + "valid_targets_min": 767 + }, + { + "epoch": 6.519674355495251, + "grad_norm": 0.7173252334227114, + "learning_rate": 5.742089098331094e-07, + "loss": 0.1636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15513482689857483, + "step": 4805, + "valid_targets_mean": 3147.9, + "valid_targets_min": 1185 + }, + { + "epoch": 6.526458616010855, + "grad_norm": 0.744561572381357, + "learning_rate": 5.582230586609538e-07, + "loss": 0.1617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1628095507621765, + "step": 4810, + "valid_targets_mean": 2782.1, + "valid_targets_min": 1219 + }, + { + "epoch": 6.533242876526459, + "grad_norm": 0.9358406796473209, + "learning_rate": 5.424597320373259e-07, + "loss": 0.1728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18553614616394043, + "step": 4815, + "valid_targets_mean": 2122.8, + "valid_targets_min": 719 + }, + { + "epoch": 6.540027137042062, + "grad_norm": 0.7762174577670051, + "learning_rate": 5.269191103843719e-07, + "loss": 0.1638, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16511686146259308, + "step": 4820, + "valid_targets_mean": 2672.0, + "valid_targets_min": 998 + }, + { + "epoch": 6.546811397557666, + "grad_norm": 0.683256793284698, + "learning_rate": 5.116013715752299e-07, + "loss": 0.1751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17595712840557098, + "step": 4825, + "valid_targets_mean": 3763.1, + "valid_targets_min": 1182 + }, + { + "epoch": 6.55359565807327, + "grad_norm": 0.7956506160614817, + "learning_rate": 4.965066909319837e-07, + "loss": 0.1695, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17820480465888977, + "step": 4830, + "valid_targets_mean": 2759.7, + "valid_targets_min": 1292 + }, + { + "epoch": 6.560379918588874, + "grad_norm": 0.7587179233666759, + "learning_rate": 4.816352412236702e-07, + "loss": 0.1685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17972393333911896, + "step": 4835, + "valid_targets_mean": 2840.0, + "valid_targets_min": 718 + }, + { + "epoch": 6.567164179104478, + "grad_norm": 0.7306219090534811, + "learning_rate": 4.669871926642877e-07, + "loss": 0.1649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15142644941806793, + "step": 4840, + "valid_targets_mean": 3011.1, + "valid_targets_min": 686 + }, + { + "epoch": 6.573948439620081, + "grad_norm": 0.8468764362506153, + "learning_rate": 4.5256271291085785e-07, + "loss": 0.1718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16415920853614807, + "step": 4845, + "valid_targets_mean": 2226.1, + "valid_targets_min": 799 + }, + { + "epoch": 6.580732700135686, + "grad_norm": 0.7218042166118971, + "learning_rate": 4.3836196706150026e-07, + "loss": 0.1591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14739587903022766, + "step": 4850, + "valid_targets_mean": 3025.1, + "valid_targets_min": 788 + }, + { + "epoch": 6.587516960651289, + "grad_norm": 0.7548311142057721, + "learning_rate": 4.243851176535474e-07, + "loss": 0.1685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1665259748697281, + "step": 4855, + "valid_targets_mean": 2818.0, + "valid_targets_min": 728 + }, + { + "epoch": 6.5943012211668925, + "grad_norm": 0.7611024739102207, + "learning_rate": 4.106323246616817e-07, + "loss": 0.1673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15098467469215393, + "step": 4860, + "valid_targets_mean": 3311.1, + "valid_targets_min": 806 + }, + { + "epoch": 6.601085481682497, + "grad_norm": 0.8170024951749895, + "learning_rate": 3.971037454961058e-07, + "loss": 0.1704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1671171486377716, + "step": 4865, + "valid_targets_mean": 2565.4, + "valid_targets_min": 798 + }, + { + "epoch": 6.6078697421981, + "grad_norm": 0.7074879855210716, + "learning_rate": 3.8379953500074617e-07, + "loss": 0.1625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1538231521844864, + "step": 4870, + "valid_targets_mean": 3038.6, + "valid_targets_min": 742 + }, + { + "epoch": 6.614654002713705, + "grad_norm": 0.7784406456308786, + "learning_rate": 3.7071984545146157e-07, + "loss": 0.1767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17332404851913452, + "step": 4875, + "valid_targets_mean": 2704.2, + "valid_targets_min": 1374 + }, + { + "epoch": 6.621438263229308, + "grad_norm": 0.7707976870962088, + "learning_rate": 3.578648265543261e-07, + "loss": 0.1601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15194040536880493, + "step": 4880, + "valid_targets_mean": 2598.7, + "valid_targets_min": 770 + }, + { + "epoch": 6.6282225237449115, + "grad_norm": 0.7794811748817996, + "learning_rate": 3.4523462544389987e-07, + "loss": 0.1746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1765557825565338, + "step": 4885, + "valid_targets_mean": 2984.3, + "valid_targets_min": 1174 + }, + { + "epoch": 6.635006784260516, + "grad_norm": 0.7299801758059727, + "learning_rate": 3.328293866815435e-07, + "loss": 0.1591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16560907661914825, + "step": 4890, + "valid_targets_mean": 3176.7, + "valid_targets_min": 1444 + }, + { + "epoch": 6.641791044776119, + "grad_norm": 0.8430277934827622, + "learning_rate": 3.2064925225377297e-07, + "loss": 0.1737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1819601058959961, + "step": 4895, + "valid_targets_mean": 2420.4, + "valid_targets_min": 642 + }, + { + "epoch": 6.648575305291724, + "grad_norm": 0.8143191944576621, + "learning_rate": 3.086943615706295e-07, + "loss": 0.1743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18464431166648865, + "step": 4900, + "valid_targets_mean": 2860.9, + "valid_targets_min": 717 + }, + { + "epoch": 6.655359565807327, + "grad_norm": 0.7808926512242406, + "learning_rate": 2.969648514640855e-07, + "loss": 0.1637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17209918797016144, + "step": 4905, + "valid_targets_mean": 2871.8, + "valid_targets_min": 817 + }, + { + "epoch": 6.6621438263229305, + "grad_norm": 0.7932529850799425, + "learning_rate": 2.854608561864702e-07, + "loss": 0.1641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17645207047462463, + "step": 4910, + "valid_targets_mean": 2660.2, + "valid_targets_min": 927 + }, + { + "epoch": 6.668928086838535, + "grad_norm": 0.92065568822347, + "learning_rate": 2.7418250740895325e-07, + "loss": 0.1685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19371803104877472, + "step": 4915, + "valid_targets_mean": 2207.4, + "valid_targets_min": 856 + }, + { + "epoch": 6.675712347354138, + "grad_norm": 0.6839393586837104, + "learning_rate": 2.6312993422001e-07, + "loss": 0.1756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17107108235359192, + "step": 4920, + "valid_targets_mean": 3477.2, + "valid_targets_min": 930 + }, + { + "epoch": 6.6824966078697425, + "grad_norm": 0.710980917983464, + "learning_rate": 2.5230326312397193e-07, + "loss": 0.1636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1745888739824295, + "step": 4925, + "valid_targets_mean": 3478.5, + "valid_targets_min": 642 + }, + { + "epoch": 6.689280868385346, + "grad_norm": 0.7823508117950982, + "learning_rate": 2.417026180395476e-07, + "loss": 0.1703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15715618431568146, + "step": 4930, + "valid_targets_mean": 2545.1, + "valid_targets_min": 895 + }, + { + "epoch": 6.696065128900949, + "grad_norm": 0.7659619371709346, + "learning_rate": 2.3132812029844187e-07, + "loss": 0.1631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14484542608261108, + "step": 4935, + "valid_targets_mean": 2404.1, + "valid_targets_min": 548 + }, + { + "epoch": 6.702849389416554, + "grad_norm": 0.7229493539815992, + "learning_rate": 2.2117988864393424e-07, + "loss": 0.171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15829598903656006, + "step": 4940, + "valid_targets_mean": 3077.1, + "valid_targets_min": 714 + }, + { + "epoch": 6.709633649932157, + "grad_norm": 0.7896285355647491, + "learning_rate": 2.112580392295338e-07, + "loss": 0.171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17784717679023743, + "step": 4945, + "valid_targets_mean": 2630.4, + "valid_targets_min": 1081 + }, + { + "epoch": 6.7164179104477615, + "grad_norm": 0.9253668056101276, + "learning_rate": 2.015626856176578e-07, + "loss": 0.1674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16507627069950104, + "step": 4950, + "valid_targets_mean": 2150.9, + "valid_targets_min": 815 + }, + { + "epoch": 6.723202170963365, + "grad_norm": 0.8173264202820435, + "learning_rate": 1.9209393877831273e-07, + "loss": 0.1732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18161508440971375, + "step": 4955, + "valid_targets_mean": 2604.2, + "valid_targets_min": 840 + }, + { + "epoch": 6.729986431478968, + "grad_norm": 0.7523430361745242, + "learning_rate": 1.8285190708783984e-07, + "loss": 0.1658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1569797694683075, + "step": 4960, + "valid_targets_mean": 2848.9, + "valid_targets_min": 1035 + }, + { + "epoch": 6.736770691994573, + "grad_norm": 0.7772846302429779, + "learning_rate": 1.7383669632766276e-07, + "loss": 0.1725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17585310339927673, + "step": 4965, + "valid_targets_mean": 2752.7, + "valid_targets_min": 792 + }, + { + "epoch": 6.743554952510176, + "grad_norm": 0.9312669800652025, + "learning_rate": 1.6504840968309288e-07, + "loss": 0.1774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16369280219078064, + "step": 4970, + "valid_targets_mean": 2244.8, + "valid_targets_min": 618 + }, + { + "epoch": 6.75033921302578, + "grad_norm": 0.740973564771969, + "learning_rate": 1.5648714774213037e-07, + "loss": 0.164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16859431564807892, + "step": 4975, + "valid_targets_mean": 3112.4, + "valid_targets_min": 538 + }, + { + "epoch": 6.757123473541384, + "grad_norm": 0.800910909512139, + "learning_rate": 1.4815300849432278e-07, + "loss": 0.1645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1737082302570343, + "step": 4980, + "valid_targets_mean": 2754.4, + "valid_targets_min": 1617 + }, + { + "epoch": 6.763907734056987, + "grad_norm": 0.725161307786821, + "learning_rate": 1.400460873296461e-07, + "loss": 0.1658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16292381286621094, + "step": 4985, + "valid_targets_mean": 3145.3, + "valid_targets_min": 912 + }, + { + "epoch": 6.770691994572592, + "grad_norm": 0.8381499766215631, + "learning_rate": 1.3216647703740315e-07, + "loss": 0.1658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18227586150169373, + "step": 4990, + "valid_targets_mean": 2443.8, + "valid_targets_min": 944 + }, + { + "epoch": 6.777476255088195, + "grad_norm": 0.8004952099912124, + "learning_rate": 1.2451426780517363e-07, + "loss": 0.1761, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17602723836898804, + "step": 4995, + "valid_targets_mean": 2544.9, + "valid_targets_min": 814 + }, + { + "epoch": 6.784260515603799, + "grad_norm": 0.8175798384888614, + "learning_rate": 1.1708954721776355e-07, + "loss": 0.1707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15319892764091492, + "step": 5000, + "valid_targets_mean": 3087.2, + "valid_targets_min": 1206 + }, + { + "epoch": 6.791044776119403, + "grad_norm": 0.8311080394214818, + "learning_rate": 1.0989240025622627e-07, + "loss": 0.1742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1745099425315857, + "step": 5005, + "valid_targets_mean": 2356.4, + "valid_targets_min": 754 + }, + { + "epoch": 6.797829036635007, + "grad_norm": 0.765582281484442, + "learning_rate": 1.0292290929687421e-07, + "loss": 0.1598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15547017753124237, + "step": 5010, + "valid_targets_mean": 2607.4, + "valid_targets_min": 656 + }, + { + "epoch": 6.804613297150611, + "grad_norm": 0.8814252430063856, + "learning_rate": 9.618115411033524e-08, + "loss": 0.1608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16623856127262115, + "step": 5015, + "valid_targets_mean": 2101.0, + "valid_targets_min": 732 + }, + { + "epoch": 6.811397557666214, + "grad_norm": 0.7599170245144763, + "learning_rate": 8.966721186065341e-08, + "loss": 0.1648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1648900806903839, + "step": 5020, + "valid_targets_mean": 2629.4, + "valid_targets_min": 1313 + }, + { + "epoch": 6.818181818181818, + "grad_norm": 0.6566181393738637, + "learning_rate": 8.338115710438744e-08, + "loss": 0.1743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1621677428483963, + "step": 5025, + "valid_targets_mean": 3358.0, + "valid_targets_min": 654 + }, + { + "epoch": 6.824966078697422, + "grad_norm": 0.7442894660376455, + "learning_rate": 7.732306178977134e-08, + "loss": 0.1661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1600368767976761, + "step": 5030, + "valid_targets_mean": 3117.9, + "valid_targets_min": 880 + }, + { + "epoch": 6.831750339213026, + "grad_norm": 0.7314337251400312, + "learning_rate": 7.149299525588405e-08, + "loss": 0.159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1598661243915558, + "step": 5035, + "valid_targets_mean": 2865.1, + "valid_targets_min": 514 + }, + { + "epoch": 6.8385345997286295, + "grad_norm": 0.712971911902212, + "learning_rate": 6.58910242318611e-08, + "loss": 0.1768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1687866449356079, + "step": 5040, + "valid_targets_mean": 3268.2, + "valid_targets_min": 889 + }, + { + "epoch": 6.845318860244234, + "grad_norm": 0.7170233413590963, + "learning_rate": 6.051721283612422e-08, + "loss": 0.1695, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16001065075397491, + "step": 5045, + "valid_targets_mean": 3043.4, + "valid_targets_min": 712 + }, + { + "epoch": 6.852103120759837, + "grad_norm": 0.8748422499637547, + "learning_rate": 5.537162257565065e-08, + "loss": 0.1636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17490287125110626, + "step": 5050, + "valid_targets_mean": 2234.8, + "valid_targets_min": 632 + }, + { + "epoch": 6.858887381275441, + "grad_norm": 0.7527414733725077, + "learning_rate": 5.045431234527165e-08, + "loss": 0.1722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17598162591457367, + "step": 5055, + "valid_targets_mean": 3049.6, + "valid_targets_min": 648 + }, + { + "epoch": 6.865671641791045, + "grad_norm": 0.750418327168722, + "learning_rate": 4.576533842699294e-08, + "loss": 0.1612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1612885594367981, + "step": 5060, + "valid_targets_mean": 2946.2, + "valid_targets_min": 981 + }, + { + "epoch": 6.8724559023066485, + "grad_norm": 0.8234037034287423, + "learning_rate": 4.1304754489359666e-08, + "loss": 0.1673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15995310246944427, + "step": 5065, + "valid_targets_mean": 2353.8, + "valid_targets_min": 803 + }, + { + "epoch": 6.879240162822253, + "grad_norm": 0.8151052308072695, + "learning_rate": 3.707261158682807e-08, + "loss": 0.172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16730859875679016, + "step": 5070, + "valid_targets_mean": 2366.6, + "valid_targets_min": 706 + }, + { + "epoch": 6.886024423337856, + "grad_norm": 0.794569340466791, + "learning_rate": 3.306895815919475e-08, + "loss": 0.1614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17812877893447876, + "step": 5075, + "valid_targets_mean": 2787.9, + "valid_targets_min": 1092 + }, + { + "epoch": 6.89280868385346, + "grad_norm": 0.6779205812201277, + "learning_rate": 2.929384003103497e-08, + "loss": 0.1607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1553945243358612, + "step": 5080, + "valid_targets_mean": 3201.9, + "valid_targets_min": 858 + }, + { + "epoch": 6.899592944369064, + "grad_norm": 0.7802177039337713, + "learning_rate": 2.5747300411180787e-08, + "loss": 0.1687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16561156511306763, + "step": 5085, + "valid_targets_mean": 2595.4, + "valid_targets_min": 761 + }, + { + "epoch": 6.906377204884667, + "grad_norm": 0.811535089999137, + "learning_rate": 2.2429379892221493e-08, + "loss": 0.1713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1710594892501831, + "step": 5090, + "valid_targets_mean": 2819.6, + "valid_targets_min": 782 + }, + { + "epoch": 6.913161465400272, + "grad_norm": 0.7976488997817401, + "learning_rate": 1.9340116450050628e-08, + "loss": 0.1599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1631883978843689, + "step": 5095, + "valid_targets_mean": 2428.8, + "valid_targets_min": 697 + }, + { + "epoch": 6.919945725915875, + "grad_norm": 0.8090475833391716, + "learning_rate": 1.6479545443415236e-08, + "loss": 0.1643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1432981789112091, + "step": 5100, + "valid_targets_mean": 2219.2, + "valid_targets_min": 764 + }, + { + "epoch": 6.926729986431479, + "grad_norm": 0.682864252053581, + "learning_rate": 1.3847699613527276e-08, + "loss": 0.1572, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1406230330467224, + "step": 5105, + "valid_targets_mean": 2982.7, + "valid_targets_min": 1119 + }, + { + "epoch": 6.933514246947083, + "grad_norm": 0.6898488676376657, + "learning_rate": 1.1444609083675062e-08, + "loss": 0.1682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1759582757949829, + "step": 5110, + "valid_targets_mean": 3367.1, + "valid_targets_min": 1487 + }, + { + "epoch": 6.940298507462686, + "grad_norm": 0.7796796211939258, + "learning_rate": 9.270301358890177e-09, + "loss": 0.1688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1705056130886078, + "step": 5115, + "valid_targets_mean": 2493.1, + "valid_targets_min": 780 + }, + { + "epoch": 6.947082767978291, + "grad_norm": 1.019436094106403, + "learning_rate": 7.324801325621078e-09, + "loss": 0.1664, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1720581352710724, + "step": 5120, + "valid_targets_mean": 2621.9, + "valid_targets_min": 615 + }, + { + "epoch": 6.953867028493894, + "grad_norm": 0.7032830768741243, + "learning_rate": 5.6081312514599805e-09, + "loss": 0.1614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14902475476264954, + "step": 5125, + "valid_targets_mean": 3147.9, + "valid_targets_min": 1474 + }, + { + "epoch": 6.960651289009498, + "grad_norm": 0.7432217757535253, + "learning_rate": 4.120310784878623e-09, + "loss": 0.1608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1647299975156784, + "step": 5130, + "valid_targets_mean": 2841.1, + "valid_targets_min": 578 + }, + { + "epoch": 6.967435549525102, + "grad_norm": 0.7089666153527104, + "learning_rate": 2.861356955008443e-09, + "loss": 0.1764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19622814655303955, + "step": 5135, + "valid_targets_mean": 3448.8, + "valid_targets_min": 1062 + }, + { + "epoch": 6.974219810040705, + "grad_norm": 0.9344960536682286, + "learning_rate": 1.8312841714474005e-09, + "loss": 0.1624, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16499242186546326, + "step": 5140, + "valid_targets_mean": 2747.8, + "valid_targets_min": 804 + }, + { + "epoch": 6.98100407055631, + "grad_norm": 0.7980431188161958, + "learning_rate": 1.030104224086781e-09, + "loss": 0.1656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15351396799087524, + "step": 5145, + "valid_targets_mean": 2372.8, + "valid_targets_min": 727 + }, + { + "epoch": 6.987788331071913, + "grad_norm": 0.6998390532387075, + "learning_rate": 4.578262829846303e-10, + "loss": 0.1642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1696673333644867, + "step": 5150, + "valid_targets_mean": 3469.1, + "valid_targets_min": 659 + }, + { + "epoch": 6.9945725915875165, + "grad_norm": 0.7592131753298821, + "learning_rate": 1.1445689825473339e-10, + "loss": 0.1601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16776353120803833, + "step": 5155, + "valid_targets_mean": 2719.2, + "valid_targets_min": 997 + }, + { + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16736872494220734, + "step": 5159, + "total_flos": 1027340714311680.0, + "train_loss": 0.24267188053672734, + "train_runtime": 23911.9379, + "train_samples_per_second": 3.449, + "train_steps_per_second": 0.216, + "valid_targets_mean": 2441.9, + "valid_targets_min": 1168 + } + ], + "logging_steps": 5, + "max_steps": 5159, + "num_input_tokens_seen": 0, + "num_train_epochs": 7, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1027340714311680.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}