| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 6.999053926206244, | |
| "eval_steps": 500, | |
| "global_step": 3702, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00946073793755913, | |
| "grad_norm": 13.292386854280902, | |
| "learning_rate": 4.3126684636118604e-07, | |
| "loss": 0.6679, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3716040849685669, | |
| "step": 5, | |
| "valid_targets_mean": 5111.2, | |
| "valid_targets_min": 2497 | |
| }, | |
| { | |
| "epoch": 0.01892147587511826, | |
| "grad_norm": 12.976120695301127, | |
| "learning_rate": 9.703504043126686e-07, | |
| "loss": 0.6941, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35731813311576843, | |
| "step": 10, | |
| "valid_targets_mean": 6041.9, | |
| "valid_targets_min": 2037 | |
| }, | |
| { | |
| "epoch": 0.02838221381267739, | |
| "grad_norm": 10.94543088614748, | |
| "learning_rate": 1.509433962264151e-06, | |
| "loss": 0.6462, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3353500962257385, | |
| "step": 15, | |
| "valid_targets_mean": 7048.9, | |
| "valid_targets_min": 4111 | |
| }, | |
| { | |
| "epoch": 0.03784295175023652, | |
| "grad_norm": 6.852368736442926, | |
| "learning_rate": 2.0485175202156334e-06, | |
| "loss": 0.6181, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3063059449195862, | |
| "step": 20, | |
| "valid_targets_mean": 5856.1, | |
| "valid_targets_min": 1539 | |
| }, | |
| { | |
| "epoch": 0.04730368968779565, | |
| "grad_norm": 5.3977018080734664, | |
| "learning_rate": 2.587601078167116e-06, | |
| "loss": 0.5614, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2615411579608917, | |
| "step": 25, | |
| "valid_targets_mean": 5350.4, | |
| "valid_targets_min": 3761 | |
| }, | |
| { | |
| "epoch": 0.05676442762535478, | |
| "grad_norm": 5.111568881695973, | |
| "learning_rate": 3.126684636118599e-06, | |
| "loss": 0.5349, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26117661595344543, | |
| "step": 30, | |
| "valid_targets_mean": 5490.1, | |
| "valid_targets_min": 3061 | |
| }, | |
| { | |
| "epoch": 0.06622516556291391, | |
| "grad_norm": 2.365906594146852, | |
| "learning_rate": 3.665768194070081e-06, | |
| "loss": 0.4884, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2334306240081787, | |
| "step": 35, | |
| "valid_targets_mean": 4899.1, | |
| "valid_targets_min": 3363 | |
| }, | |
| { | |
| "epoch": 0.07568590350047304, | |
| "grad_norm": 1.5154295873967274, | |
| "learning_rate": 4.204851752021563e-06, | |
| "loss": 0.4499, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2000514566898346, | |
| "step": 40, | |
| "valid_targets_mean": 6593.0, | |
| "valid_targets_min": 4641 | |
| }, | |
| { | |
| "epoch": 0.08514664143803216, | |
| "grad_norm": 1.0580075894605687, | |
| "learning_rate": 4.7439353099730466e-06, | |
| "loss": 0.4271, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20875361561775208, | |
| "step": 45, | |
| "valid_targets_mean": 5423.5, | |
| "valid_targets_min": 2180 | |
| }, | |
| { | |
| "epoch": 0.0946073793755913, | |
| "grad_norm": 0.8888840325402044, | |
| "learning_rate": 5.283018867924529e-06, | |
| "loss": 0.405, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1873297393321991, | |
| "step": 50, | |
| "valid_targets_mean": 4690.0, | |
| "valid_targets_min": 2271 | |
| }, | |
| { | |
| "epoch": 0.10406811731315042, | |
| "grad_norm": 0.8025966922773096, | |
| "learning_rate": 5.822102425876012e-06, | |
| "loss": 0.4054, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1847410500049591, | |
| "step": 55, | |
| "valid_targets_mean": 4564.8, | |
| "valid_targets_min": 2231 | |
| }, | |
| { | |
| "epoch": 0.11352885525070956, | |
| "grad_norm": 0.6830098620481699, | |
| "learning_rate": 6.3611859838274934e-06, | |
| "loss": 0.3772, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2122303694486618, | |
| "step": 60, | |
| "valid_targets_mean": 5382.4, | |
| "valid_targets_min": 3694 | |
| }, | |
| { | |
| "epoch": 0.12298959318826869, | |
| "grad_norm": 0.6468269510795552, | |
| "learning_rate": 6.9002695417789766e-06, | |
| "loss": 0.3941, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21036577224731445, | |
| "step": 65, | |
| "valid_targets_mean": 5506.9, | |
| "valid_targets_min": 2545 | |
| }, | |
| { | |
| "epoch": 0.13245033112582782, | |
| "grad_norm": 0.7269588227001985, | |
| "learning_rate": 7.439353099730459e-06, | |
| "loss": 0.3786, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2026107907295227, | |
| "step": 70, | |
| "valid_targets_mean": 4666.8, | |
| "valid_targets_min": 802 | |
| }, | |
| { | |
| "epoch": 0.14191106906338694, | |
| "grad_norm": 0.5464115365342573, | |
| "learning_rate": 7.978436657681942e-06, | |
| "loss": 0.386, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2121516317129135, | |
| "step": 75, | |
| "valid_targets_mean": 6398.2, | |
| "valid_targets_min": 3254 | |
| }, | |
| { | |
| "epoch": 0.15137180700094607, | |
| "grad_norm": 0.4679041488767007, | |
| "learning_rate": 8.517520215633423e-06, | |
| "loss": 0.3615, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17857977747917175, | |
| "step": 80, | |
| "valid_targets_mean": 7090.0, | |
| "valid_targets_min": 5052 | |
| }, | |
| { | |
| "epoch": 0.1608325449385052, | |
| "grad_norm": 0.5071840524513592, | |
| "learning_rate": 9.056603773584907e-06, | |
| "loss": 0.3474, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16061782836914062, | |
| "step": 85, | |
| "valid_targets_mean": 5421.2, | |
| "valid_targets_min": 4155 | |
| }, | |
| { | |
| "epoch": 0.17029328287606432, | |
| "grad_norm": 0.48784327498178953, | |
| "learning_rate": 9.595687331536388e-06, | |
| "loss": 0.3496, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17591124773025513, | |
| "step": 90, | |
| "valid_targets_mean": 6018.5, | |
| "valid_targets_min": 3622 | |
| }, | |
| { | |
| "epoch": 0.17975402081362346, | |
| "grad_norm": 0.581900644799806, | |
| "learning_rate": 1.0134770889487871e-05, | |
| "loss": 0.3459, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16716624796390533, | |
| "step": 95, | |
| "valid_targets_mean": 5178.4, | |
| "valid_targets_min": 2243 | |
| }, | |
| { | |
| "epoch": 0.1892147587511826, | |
| "grad_norm": 0.5846177681039163, | |
| "learning_rate": 1.0673854447439354e-05, | |
| "loss": 0.3315, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15679748356342316, | |
| "step": 100, | |
| "valid_targets_mean": 4752.1, | |
| "valid_targets_min": 2315 | |
| }, | |
| { | |
| "epoch": 0.1986754966887417, | |
| "grad_norm": 0.4995428447108845, | |
| "learning_rate": 1.1212938005390836e-05, | |
| "loss": 0.3175, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1597537100315094, | |
| "step": 105, | |
| "valid_targets_mean": 5816.5, | |
| "valid_targets_min": 4068 | |
| }, | |
| { | |
| "epoch": 0.20813623462630085, | |
| "grad_norm": 0.4479628789945412, | |
| "learning_rate": 1.1752021563342319e-05, | |
| "loss": 0.315, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13424637913703918, | |
| "step": 110, | |
| "valid_targets_mean": 6035.2, | |
| "valid_targets_min": 2868 | |
| }, | |
| { | |
| "epoch": 0.21759697256385999, | |
| "grad_norm": 0.4837864823725436, | |
| "learning_rate": 1.2291105121293802e-05, | |
| "loss": 0.314, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.145084410905838, | |
| "step": 115, | |
| "valid_targets_mean": 5793.4, | |
| "valid_targets_min": 1596 | |
| }, | |
| { | |
| "epoch": 0.22705771050141912, | |
| "grad_norm": 0.44889049614325, | |
| "learning_rate": 1.2830188679245283e-05, | |
| "loss": 0.3122, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16734957695007324, | |
| "step": 120, | |
| "valid_targets_mean": 7349.1, | |
| "valid_targets_min": 5043 | |
| }, | |
| { | |
| "epoch": 0.23651844843897823, | |
| "grad_norm": 0.5223670515741717, | |
| "learning_rate": 1.3369272237196767e-05, | |
| "loss": 0.3124, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16438248753547668, | |
| "step": 125, | |
| "valid_targets_mean": 6178.2, | |
| "valid_targets_min": 3920 | |
| }, | |
| { | |
| "epoch": 0.24597918637653737, | |
| "grad_norm": 0.5321194979031634, | |
| "learning_rate": 1.390835579514825e-05, | |
| "loss": 0.3084, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16952776908874512, | |
| "step": 130, | |
| "valid_targets_mean": 5167.1, | |
| "valid_targets_min": 2006 | |
| }, | |
| { | |
| "epoch": 0.2554399243140965, | |
| "grad_norm": 0.4835554531647342, | |
| "learning_rate": 1.4447439353099733e-05, | |
| "loss": 0.2976, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16980065405368805, | |
| "step": 135, | |
| "valid_targets_mean": 7014.2, | |
| "valid_targets_min": 4174 | |
| }, | |
| { | |
| "epoch": 0.26490066225165565, | |
| "grad_norm": 0.5105644710675021, | |
| "learning_rate": 1.4986522911051213e-05, | |
| "loss": 0.3204, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1570666879415512, | |
| "step": 140, | |
| "valid_targets_mean": 5961.8, | |
| "valid_targets_min": 3944 | |
| }, | |
| { | |
| "epoch": 0.27436140018921473, | |
| "grad_norm": 0.5169522126281623, | |
| "learning_rate": 1.5525606469002698e-05, | |
| "loss": 0.3139, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.141907200217247, | |
| "step": 145, | |
| "valid_targets_mean": 4846.5, | |
| "valid_targets_min": 770 | |
| }, | |
| { | |
| "epoch": 0.28382213812677387, | |
| "grad_norm": 0.5685567387490885, | |
| "learning_rate": 1.606469002695418e-05, | |
| "loss": 0.2948, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16263222694396973, | |
| "step": 150, | |
| "valid_targets_mean": 5686.2, | |
| "valid_targets_min": 1786 | |
| }, | |
| { | |
| "epoch": 0.293282876064333, | |
| "grad_norm": 0.5517286923292966, | |
| "learning_rate": 1.6603773584905664e-05, | |
| "loss": 0.3139, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16692663729190826, | |
| "step": 155, | |
| "valid_targets_mean": 7328.5, | |
| "valid_targets_min": 3874 | |
| }, | |
| { | |
| "epoch": 0.30274361400189215, | |
| "grad_norm": 0.5473844718628933, | |
| "learning_rate": 1.7142857142857142e-05, | |
| "loss": 0.3036, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14560210704803467, | |
| "step": 160, | |
| "valid_targets_mean": 5436.2, | |
| "valid_targets_min": 2790 | |
| }, | |
| { | |
| "epoch": 0.3122043519394513, | |
| "grad_norm": 0.5176673018224341, | |
| "learning_rate": 1.7681940700808627e-05, | |
| "loss": 0.2961, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1517486870288849, | |
| "step": 165, | |
| "valid_targets_mean": 6114.5, | |
| "valid_targets_min": 3759 | |
| }, | |
| { | |
| "epoch": 0.3216650898770104, | |
| "grad_norm": 0.5490412433918316, | |
| "learning_rate": 1.8221024258760108e-05, | |
| "loss": 0.2951, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16231662034988403, | |
| "step": 170, | |
| "valid_targets_mean": 5257.4, | |
| "valid_targets_min": 1685 | |
| }, | |
| { | |
| "epoch": 0.33112582781456956, | |
| "grad_norm": 0.6410599828811149, | |
| "learning_rate": 1.8760107816711593e-05, | |
| "loss": 0.2955, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1378370225429535, | |
| "step": 175, | |
| "valid_targets_mean": 5678.0, | |
| "valid_targets_min": 1733 | |
| }, | |
| { | |
| "epoch": 0.34058656575212864, | |
| "grad_norm": 0.5033919157641096, | |
| "learning_rate": 1.9299191374663074e-05, | |
| "loss": 0.3137, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1563076674938202, | |
| "step": 180, | |
| "valid_targets_mean": 6886.4, | |
| "valid_targets_min": 5241 | |
| }, | |
| { | |
| "epoch": 0.3500473036896878, | |
| "grad_norm": 0.5392935569955783, | |
| "learning_rate": 1.9838274932614556e-05, | |
| "loss": 0.2789, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14293016493320465, | |
| "step": 185, | |
| "valid_targets_mean": 5918.0, | |
| "valid_targets_min": 2193 | |
| }, | |
| { | |
| "epoch": 0.3595080416272469, | |
| "grad_norm": 0.6472564298900539, | |
| "learning_rate": 2.037735849056604e-05, | |
| "loss": 0.2987, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15237551927566528, | |
| "step": 190, | |
| "valid_targets_mean": 4608.6, | |
| "valid_targets_min": 2221 | |
| }, | |
| { | |
| "epoch": 0.36896877956480606, | |
| "grad_norm": 0.523153782721405, | |
| "learning_rate": 2.0916442048517522e-05, | |
| "loss": 0.294, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1513247787952423, | |
| "step": 195, | |
| "valid_targets_mean": 5764.5, | |
| "valid_targets_min": 2296 | |
| }, | |
| { | |
| "epoch": 0.3784295175023652, | |
| "grad_norm": 0.469616501289577, | |
| "learning_rate": 2.1455525606469007e-05, | |
| "loss": 0.2792, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1403075009584427, | |
| "step": 200, | |
| "valid_targets_mean": 6505.8, | |
| "valid_targets_min": 4664 | |
| }, | |
| { | |
| "epoch": 0.38789025543992434, | |
| "grad_norm": 0.5985188869226783, | |
| "learning_rate": 2.199460916442049e-05, | |
| "loss": 0.2952, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14567717909812927, | |
| "step": 205, | |
| "valid_targets_mean": 4861.4, | |
| "valid_targets_min": 1942 | |
| }, | |
| { | |
| "epoch": 0.3973509933774834, | |
| "grad_norm": 0.5326077341773564, | |
| "learning_rate": 2.253369272237197e-05, | |
| "loss": 0.3083, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1557682603597641, | |
| "step": 210, | |
| "valid_targets_mean": 5819.1, | |
| "valid_targets_min": 2582 | |
| }, | |
| { | |
| "epoch": 0.40681173131504256, | |
| "grad_norm": 0.5326693796335378, | |
| "learning_rate": 2.307277628032345e-05, | |
| "loss": 0.2832, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14654016494750977, | |
| "step": 215, | |
| "valid_targets_mean": 5031.5, | |
| "valid_targets_min": 2136 | |
| }, | |
| { | |
| "epoch": 0.4162724692526017, | |
| "grad_norm": 0.5314292194307515, | |
| "learning_rate": 2.3611859838274933e-05, | |
| "loss": 0.2803, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14354923367500305, | |
| "step": 220, | |
| "valid_targets_mean": 6880.5, | |
| "valid_targets_min": 4200 | |
| }, | |
| { | |
| "epoch": 0.42573320719016083, | |
| "grad_norm": 0.6305540146764549, | |
| "learning_rate": 2.4150943396226418e-05, | |
| "loss": 0.3098, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1796898990869522, | |
| "step": 225, | |
| "valid_targets_mean": 6977.6, | |
| "valid_targets_min": 4666 | |
| }, | |
| { | |
| "epoch": 0.43519394512771997, | |
| "grad_norm": 0.5230649535123192, | |
| "learning_rate": 2.46900269541779e-05, | |
| "loss": 0.285, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11889778077602386, | |
| "step": 230, | |
| "valid_targets_mean": 4415.1, | |
| "valid_targets_min": 1499 | |
| }, | |
| { | |
| "epoch": 0.4446546830652791, | |
| "grad_norm": 0.4766065436527936, | |
| "learning_rate": 2.5229110512129384e-05, | |
| "loss": 0.2957, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1293986439704895, | |
| "step": 235, | |
| "valid_targets_mean": 6849.5, | |
| "valid_targets_min": 3405 | |
| }, | |
| { | |
| "epoch": 0.45411542100283825, | |
| "grad_norm": 0.48511924294158426, | |
| "learning_rate": 2.5768194070080865e-05, | |
| "loss": 0.2755, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14730864763259888, | |
| "step": 240, | |
| "valid_targets_mean": 6809.1, | |
| "valid_targets_min": 4854 | |
| }, | |
| { | |
| "epoch": 0.46357615894039733, | |
| "grad_norm": 0.5187932119629264, | |
| "learning_rate": 2.6307277628032347e-05, | |
| "loss": 0.2674, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18547989428043365, | |
| "step": 245, | |
| "valid_targets_mean": 5634.8, | |
| "valid_targets_min": 1005 | |
| }, | |
| { | |
| "epoch": 0.47303689687795647, | |
| "grad_norm": 0.5491633455944653, | |
| "learning_rate": 2.684636118598383e-05, | |
| "loss": 0.2672, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1189369261264801, | |
| "step": 250, | |
| "valid_targets_mean": 4453.9, | |
| "valid_targets_min": 2453 | |
| }, | |
| { | |
| "epoch": 0.4824976348155156, | |
| "grad_norm": 0.5668350991526823, | |
| "learning_rate": 2.7385444743935313e-05, | |
| "loss": 0.2703, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14132535457611084, | |
| "step": 255, | |
| "valid_targets_mean": 5330.2, | |
| "valid_targets_min": 4411 | |
| }, | |
| { | |
| "epoch": 0.49195837275307475, | |
| "grad_norm": 0.6537132489951805, | |
| "learning_rate": 2.7924528301886794e-05, | |
| "loss": 0.2875, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14046913385391235, | |
| "step": 260, | |
| "valid_targets_mean": 4396.1, | |
| "valid_targets_min": 2557 | |
| }, | |
| { | |
| "epoch": 0.5014191106906338, | |
| "grad_norm": 0.539558133701292, | |
| "learning_rate": 2.8463611859838276e-05, | |
| "loss": 0.2727, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12411429733037949, | |
| "step": 265, | |
| "valid_targets_mean": 4548.0, | |
| "valid_targets_min": 1822 | |
| }, | |
| { | |
| "epoch": 0.510879848628193, | |
| "grad_norm": 0.6355091138525413, | |
| "learning_rate": 2.9002695417789757e-05, | |
| "loss": 0.2786, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13668765127658844, | |
| "step": 270, | |
| "valid_targets_mean": 4930.8, | |
| "valid_targets_min": 2156 | |
| }, | |
| { | |
| "epoch": 0.5203405865657521, | |
| "grad_norm": 0.6045780561492375, | |
| "learning_rate": 2.9541778975741242e-05, | |
| "loss": 0.2829, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15746136009693146, | |
| "step": 275, | |
| "valid_targets_mean": 4315.5, | |
| "valid_targets_min": 953 | |
| }, | |
| { | |
| "epoch": 0.5298013245033113, | |
| "grad_norm": 0.517544515682634, | |
| "learning_rate": 3.0080862533692724e-05, | |
| "loss": 0.2698, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13427172601222992, | |
| "step": 280, | |
| "valid_targets_mean": 6005.6, | |
| "valid_targets_min": 4778 | |
| }, | |
| { | |
| "epoch": 0.5392620624408704, | |
| "grad_norm": 0.5242395110498128, | |
| "learning_rate": 3.061994609164421e-05, | |
| "loss": 0.2738, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17519372701644897, | |
| "step": 285, | |
| "valid_targets_mean": 6570.9, | |
| "valid_targets_min": 1759 | |
| }, | |
| { | |
| "epoch": 0.5487228003784295, | |
| "grad_norm": 0.5371360158794343, | |
| "learning_rate": 3.115902964959569e-05, | |
| "loss": 0.2632, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12191037833690643, | |
| "step": 290, | |
| "valid_targets_mean": 5542.8, | |
| "valid_targets_min": 2077 | |
| }, | |
| { | |
| "epoch": 0.5581835383159887, | |
| "grad_norm": 0.4852881118368173, | |
| "learning_rate": 3.169811320754717e-05, | |
| "loss": 0.2718, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14533182978630066, | |
| "step": 295, | |
| "valid_targets_mean": 6815.5, | |
| "valid_targets_min": 5442 | |
| }, | |
| { | |
| "epoch": 0.5676442762535477, | |
| "grad_norm": 0.5631185669619292, | |
| "learning_rate": 3.223719676549865e-05, | |
| "loss": 0.261, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16672319173812866, | |
| "step": 300, | |
| "valid_targets_mean": 6902.6, | |
| "valid_targets_min": 2676 | |
| }, | |
| { | |
| "epoch": 0.5771050141911069, | |
| "grad_norm": 0.5415919755101368, | |
| "learning_rate": 3.2776280323450134e-05, | |
| "loss": 0.2718, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13607017695903778, | |
| "step": 305, | |
| "valid_targets_mean": 5954.1, | |
| "valid_targets_min": 2974 | |
| }, | |
| { | |
| "epoch": 0.586565752128666, | |
| "grad_norm": 0.543397988361979, | |
| "learning_rate": 3.3315363881401616e-05, | |
| "loss": 0.2567, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17378760874271393, | |
| "step": 310, | |
| "valid_targets_mean": 6095.9, | |
| "valid_targets_min": 863 | |
| }, | |
| { | |
| "epoch": 0.5960264900662252, | |
| "grad_norm": 0.49985231481941134, | |
| "learning_rate": 3.3854447439353104e-05, | |
| "loss": 0.2715, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11618228256702423, | |
| "step": 315, | |
| "valid_targets_mean": 6216.5, | |
| "valid_targets_min": 3483 | |
| }, | |
| { | |
| "epoch": 0.6054872280037843, | |
| "grad_norm": 0.46620526718099603, | |
| "learning_rate": 3.4393530997304585e-05, | |
| "loss": 0.2824, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10228373855352402, | |
| "step": 320, | |
| "valid_targets_mean": 6214.8, | |
| "valid_targets_min": 4465 | |
| }, | |
| { | |
| "epoch": 0.6149479659413434, | |
| "grad_norm": 0.47078096185272866, | |
| "learning_rate": 3.493261455525607e-05, | |
| "loss": 0.2672, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14976316690444946, | |
| "step": 325, | |
| "valid_targets_mean": 5927.4, | |
| "valid_targets_min": 1832 | |
| }, | |
| { | |
| "epoch": 0.6244087038789026, | |
| "grad_norm": 0.4726439579386601, | |
| "learning_rate": 3.547169811320755e-05, | |
| "loss": 0.2559, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11369471251964569, | |
| "step": 330, | |
| "valid_targets_mean": 6614.4, | |
| "valid_targets_min": 3123 | |
| }, | |
| { | |
| "epoch": 0.6338694418164617, | |
| "grad_norm": 0.5320768196095412, | |
| "learning_rate": 3.6010781671159037e-05, | |
| "loss": 0.2505, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1185142993927002, | |
| "step": 335, | |
| "valid_targets_mean": 5876.6, | |
| "valid_targets_min": 3191 | |
| }, | |
| { | |
| "epoch": 0.6433301797540208, | |
| "grad_norm": 0.575530878563886, | |
| "learning_rate": 3.654986522911052e-05, | |
| "loss": 0.2669, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1460418999195099, | |
| "step": 340, | |
| "valid_targets_mean": 5636.5, | |
| "valid_targets_min": 2726 | |
| }, | |
| { | |
| "epoch": 0.6527909176915799, | |
| "grad_norm": 0.5147665527633908, | |
| "learning_rate": 3.708894878706199e-05, | |
| "loss": 0.2557, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1212029904127121, | |
| "step": 345, | |
| "valid_targets_mean": 5861.5, | |
| "valid_targets_min": 2541 | |
| }, | |
| { | |
| "epoch": 0.6622516556291391, | |
| "grad_norm": 0.5595903582250489, | |
| "learning_rate": 3.762803234501348e-05, | |
| "loss": 0.2786, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16101306676864624, | |
| "step": 350, | |
| "valid_targets_mean": 6207.0, | |
| "valid_targets_min": 2967 | |
| }, | |
| { | |
| "epoch": 0.6717123935666982, | |
| "grad_norm": 0.5018111369885371, | |
| "learning_rate": 3.816711590296496e-05, | |
| "loss": 0.276, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10931664705276489, | |
| "step": 355, | |
| "valid_targets_mean": 4495.9, | |
| "valid_targets_min": 3049 | |
| }, | |
| { | |
| "epoch": 0.6811731315042573, | |
| "grad_norm": 0.5359690061267144, | |
| "learning_rate": 3.8706199460916444e-05, | |
| "loss": 0.2626, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11740469187498093, | |
| "step": 360, | |
| "valid_targets_mean": 5280.0, | |
| "valid_targets_min": 1868 | |
| }, | |
| { | |
| "epoch": 0.6906338694418165, | |
| "grad_norm": 0.5193656060152103, | |
| "learning_rate": 3.9245283018867925e-05, | |
| "loss": 0.2627, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13573291897773743, | |
| "step": 365, | |
| "valid_targets_mean": 6098.8, | |
| "valid_targets_min": 3118 | |
| }, | |
| { | |
| "epoch": 0.7000946073793756, | |
| "grad_norm": 0.49067627735159125, | |
| "learning_rate": 3.9784366576819413e-05, | |
| "loss": 0.2548, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13638558983802795, | |
| "step": 370, | |
| "valid_targets_mean": 6597.8, | |
| "valid_targets_min": 4328 | |
| }, | |
| { | |
| "epoch": 0.7095553453169348, | |
| "grad_norm": 0.4717473773346668, | |
| "learning_rate": 3.999991999226427e-05, | |
| "loss": 0.2605, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10433489084243774, | |
| "step": 375, | |
| "valid_targets_mean": 6252.5, | |
| "valid_targets_min": 3697 | |
| }, | |
| { | |
| "epoch": 0.7190160832544938, | |
| "grad_norm": 0.5221922736723413, | |
| "learning_rate": 3.9999431058419585e-05, | |
| "loss": 0.2661, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10979779809713364, | |
| "step": 380, | |
| "valid_targets_mean": 5049.9, | |
| "valid_targets_min": 1939 | |
| }, | |
| { | |
| "epoch": 0.7284768211920529, | |
| "grad_norm": 0.4484218939396918, | |
| "learning_rate": 3.999849765032536e-05, | |
| "loss": 0.2544, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11232994496822357, | |
| "step": 385, | |
| "valid_targets_mean": 6655.4, | |
| "valid_targets_min": 980 | |
| }, | |
| { | |
| "epoch": 0.7379375591296121, | |
| "grad_norm": 0.5274283464918212, | |
| "learning_rate": 3.999711978872596e-05, | |
| "loss": 0.2503, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14474813640117645, | |
| "step": 390, | |
| "valid_targets_mean": 6887.2, | |
| "valid_targets_min": 4247 | |
| }, | |
| { | |
| "epoch": 0.7473982970671712, | |
| "grad_norm": 0.5017473030214095, | |
| "learning_rate": 3.9995297504243475e-05, | |
| "loss": 0.2553, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12366019934415817, | |
| "step": 395, | |
| "valid_targets_mean": 5045.4, | |
| "valid_targets_min": 1967 | |
| }, | |
| { | |
| "epoch": 0.7568590350047304, | |
| "grad_norm": 0.4797605012842134, | |
| "learning_rate": 3.9993030837376985e-05, | |
| "loss": 0.2556, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12161791324615479, | |
| "step": 400, | |
| "valid_targets_mean": 5767.5, | |
| "valid_targets_min": 3755 | |
| }, | |
| { | |
| "epoch": 0.7663197729422895, | |
| "grad_norm": 0.5591748230420274, | |
| "learning_rate": 3.999031983850166e-05, | |
| "loss": 0.2624, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13065151870250702, | |
| "step": 405, | |
| "valid_targets_mean": 4916.5, | |
| "valid_targets_min": 2663 | |
| }, | |
| { | |
| "epoch": 0.7757805108798487, | |
| "grad_norm": 0.5768281800754327, | |
| "learning_rate": 3.9987164567867677e-05, | |
| "loss": 0.2495, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12214967608451843, | |
| "step": 410, | |
| "valid_targets_mean": 6337.4, | |
| "valid_targets_min": 4123 | |
| }, | |
| { | |
| "epoch": 0.7852412488174078, | |
| "grad_norm": 0.565949691954824, | |
| "learning_rate": 3.998356509559886e-05, | |
| "loss": 0.2606, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17523278295993805, | |
| "step": 415, | |
| "valid_targets_mean": 7005.5, | |
| "valid_targets_min": 2171 | |
| }, | |
| { | |
| "epoch": 0.7947019867549668, | |
| "grad_norm": 0.5329664506953494, | |
| "learning_rate": 3.997952150169114e-05, | |
| "loss": 0.2594, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13902133703231812, | |
| "step": 420, | |
| "valid_targets_mean": 5315.2, | |
| "valid_targets_min": 1666 | |
| }, | |
| { | |
| "epoch": 0.804162724692526, | |
| "grad_norm": 0.43499678564104083, | |
| "learning_rate": 3.997503387601071e-05, | |
| "loss": 0.2468, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10180394351482391, | |
| "step": 425, | |
| "valid_targets_mean": 6322.8, | |
| "valid_targets_min": 4931 | |
| }, | |
| { | |
| "epoch": 0.8136234626300851, | |
| "grad_norm": 0.5283450256927239, | |
| "learning_rate": 3.9970102318292136e-05, | |
| "loss": 0.2636, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12599720060825348, | |
| "step": 430, | |
| "valid_targets_mean": 4579.4, | |
| "valid_targets_min": 1802 | |
| }, | |
| { | |
| "epoch": 0.8230842005676443, | |
| "grad_norm": 0.5213620926766653, | |
| "learning_rate": 3.996472693813604e-05, | |
| "loss": 0.2549, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10283877700567245, | |
| "step": 435, | |
| "valid_targets_mean": 5204.2, | |
| "valid_targets_min": 1539 | |
| }, | |
| { | |
| "epoch": 0.8325449385052034, | |
| "grad_norm": 0.5538525613580271, | |
| "learning_rate": 3.995890785500673e-05, | |
| "loss": 0.2536, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17035773396492004, | |
| "step": 440, | |
| "valid_targets_mean": 5642.4, | |
| "valid_targets_min": 1934 | |
| }, | |
| { | |
| "epoch": 0.8420056764427626, | |
| "grad_norm": 0.5952043541599361, | |
| "learning_rate": 3.995264519822952e-05, | |
| "loss": 0.242, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12181451171636581, | |
| "step": 445, | |
| "valid_targets_mean": 6117.1, | |
| "valid_targets_min": 4919 | |
| }, | |
| { | |
| "epoch": 0.8514664143803217, | |
| "grad_norm": 0.6436408548808031, | |
| "learning_rate": 3.994593910698784e-05, | |
| "loss": 0.2657, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11498317122459412, | |
| "step": 450, | |
| "valid_targets_mean": 4079.9, | |
| "valid_targets_min": 1209 | |
| }, | |
| { | |
| "epoch": 0.8609271523178808, | |
| "grad_norm": 0.5717709103219628, | |
| "learning_rate": 3.9938789730320184e-05, | |
| "loss": 0.2546, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12665092945098877, | |
| "step": 455, | |
| "valid_targets_mean": 5659.0, | |
| "valid_targets_min": 2717 | |
| }, | |
| { | |
| "epoch": 0.8703878902554399, | |
| "grad_norm": 0.5017091918547645, | |
| "learning_rate": 3.993119722711676e-05, | |
| "loss": 0.2469, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11396554857492447, | |
| "step": 460, | |
| "valid_targets_mean": 5425.6, | |
| "valid_targets_min": 1864 | |
| }, | |
| { | |
| "epoch": 0.879848628192999, | |
| "grad_norm": 0.49964492415668416, | |
| "learning_rate": 3.9923161766115975e-05, | |
| "loss": 0.2463, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14272227883338928, | |
| "step": 465, | |
| "valid_targets_mean": 5635.0, | |
| "valid_targets_min": 2072 | |
| }, | |
| { | |
| "epoch": 0.8893093661305582, | |
| "grad_norm": 0.47619622508106707, | |
| "learning_rate": 3.991468352590069e-05, | |
| "loss": 0.2364, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10488211363554001, | |
| "step": 470, | |
| "valid_targets_mean": 6403.9, | |
| "valid_targets_min": 4611 | |
| }, | |
| { | |
| "epoch": 0.8987701040681173, | |
| "grad_norm": 0.5493491032822436, | |
| "learning_rate": 3.990576269489424e-05, | |
| "loss": 0.2606, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14750590920448303, | |
| "step": 475, | |
| "valid_targets_mean": 6374.0, | |
| "valid_targets_min": 3082 | |
| }, | |
| { | |
| "epoch": 0.9082308420056765, | |
| "grad_norm": 0.4929437751955924, | |
| "learning_rate": 3.9896399471356234e-05, | |
| "loss": 0.2475, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10722912847995758, | |
| "step": 480, | |
| "valid_targets_mean": 5189.4, | |
| "valid_targets_min": 1142 | |
| }, | |
| { | |
| "epoch": 0.9176915799432356, | |
| "grad_norm": 0.521230777881251, | |
| "learning_rate": 3.9886594063378185e-05, | |
| "loss": 0.2409, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12368421256542206, | |
| "step": 485, | |
| "valid_targets_mean": 7125.4, | |
| "valid_targets_min": 5267 | |
| }, | |
| { | |
| "epoch": 0.9271523178807947, | |
| "grad_norm": 0.501801100355575, | |
| "learning_rate": 3.987634668887887e-05, | |
| "loss": 0.2506, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1744682639837265, | |
| "step": 490, | |
| "valid_targets_mean": 6161.8, | |
| "valid_targets_min": 2821 | |
| }, | |
| { | |
| "epoch": 0.9366130558183539, | |
| "grad_norm": 0.565532137296215, | |
| "learning_rate": 3.986565757559945e-05, | |
| "loss": 0.2551, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11137732863426208, | |
| "step": 495, | |
| "valid_targets_mean": 5410.9, | |
| "valid_targets_min": 2383 | |
| }, | |
| { | |
| "epoch": 0.9460737937559129, | |
| "grad_norm": 0.4739992359384424, | |
| "learning_rate": 3.985452696109849e-05, | |
| "loss": 0.2524, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12591904401779175, | |
| "step": 500, | |
| "valid_targets_mean": 5950.1, | |
| "valid_targets_min": 4596 | |
| }, | |
| { | |
| "epoch": 0.9555345316934721, | |
| "grad_norm": 0.4916133738725745, | |
| "learning_rate": 3.984295509274659e-05, | |
| "loss": 0.2455, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12242468446493149, | |
| "step": 505, | |
| "valid_targets_mean": 6121.0, | |
| "valid_targets_min": 3563 | |
| }, | |
| { | |
| "epoch": 0.9649952696310312, | |
| "grad_norm": 1.3514558105735277, | |
| "learning_rate": 3.983094222772094e-05, | |
| "loss": 0.2592, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14152546226978302, | |
| "step": 510, | |
| "valid_targets_mean": 5866.8, | |
| "valid_targets_min": 2482 | |
| }, | |
| { | |
| "epoch": 0.9744560075685903, | |
| "grad_norm": 0.47290691029164367, | |
| "learning_rate": 3.981848863299959e-05, | |
| "loss": 0.2541, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1114603728055954, | |
| "step": 515, | |
| "valid_targets_mean": 6109.9, | |
| "valid_targets_min": 3956 | |
| }, | |
| { | |
| "epoch": 0.9839167455061495, | |
| "grad_norm": 0.6155174685504273, | |
| "learning_rate": 3.9805594585355536e-05, | |
| "loss": 0.2472, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13362428545951843, | |
| "step": 520, | |
| "valid_targets_mean": 4292.6, | |
| "valid_targets_min": 1739 | |
| }, | |
| { | |
| "epoch": 0.9933774834437086, | |
| "grad_norm": 0.47493615981359083, | |
| "learning_rate": 3.9792260371350526e-05, | |
| "loss": 0.2472, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09970583021640778, | |
| "step": 525, | |
| "valid_targets_mean": 5427.1, | |
| "valid_targets_min": 2479 | |
| }, | |
| { | |
| "epoch": 1.0018921475875118, | |
| "grad_norm": 0.5624585903547664, | |
| "learning_rate": 3.977848628732872e-05, | |
| "loss": 0.2447, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11141172051429749, | |
| "step": 530, | |
| "valid_targets_mean": 4299.5, | |
| "valid_targets_min": 1091 | |
| }, | |
| { | |
| "epoch": 1.0113528855250709, | |
| "grad_norm": 0.5499877032447721, | |
| "learning_rate": 3.976427263941013e-05, | |
| "loss": 0.2268, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10701591521501541, | |
| "step": 535, | |
| "valid_targets_mean": 4795.1, | |
| "valid_targets_min": 920 | |
| }, | |
| { | |
| "epoch": 1.0208136234626302, | |
| "grad_norm": 0.4838371795926738, | |
| "learning_rate": 3.9749619743483754e-05, | |
| "loss": 0.2474, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12711255252361298, | |
| "step": 540, | |
| "valid_targets_mean": 6530.5, | |
| "valid_targets_min": 4416 | |
| }, | |
| { | |
| "epoch": 1.0302743614001892, | |
| "grad_norm": 0.5169768860706256, | |
| "learning_rate": 3.9734527925200594e-05, | |
| "loss": 0.2268, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11072207987308502, | |
| "step": 545, | |
| "valid_targets_mean": 5097.2, | |
| "valid_targets_min": 3049 | |
| }, | |
| { | |
| "epoch": 1.0397350993377483, | |
| "grad_norm": 0.4926699291142721, | |
| "learning_rate": 3.9718997519966444e-05, | |
| "loss": 0.244, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12459664046764374, | |
| "step": 550, | |
| "valid_targets_mean": 5628.1, | |
| "valid_targets_min": 2771 | |
| }, | |
| { | |
| "epoch": 1.0491958372753074, | |
| "grad_norm": 0.4966980686685516, | |
| "learning_rate": 3.970302887293437e-05, | |
| "loss": 0.2361, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11457451432943344, | |
| "step": 555, | |
| "valid_targets_mean": 5203.2, | |
| "valid_targets_min": 3449 | |
| }, | |
| { | |
| "epoch": 1.0586565752128667, | |
| "grad_norm": 0.5305519729560744, | |
| "learning_rate": 3.968662233899708e-05, | |
| "loss": 0.24, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12242069840431213, | |
| "step": 560, | |
| "valid_targets_mean": 5241.4, | |
| "valid_targets_min": 2249 | |
| }, | |
| { | |
| "epoch": 1.0681173131504258, | |
| "grad_norm": 0.5240554490130925, | |
| "learning_rate": 3.966977828277905e-05, | |
| "loss": 0.2398, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1389508992433548, | |
| "step": 565, | |
| "valid_targets_mean": 5881.6, | |
| "valid_targets_min": 2174 | |
| }, | |
| { | |
| "epoch": 1.0775780510879849, | |
| "grad_norm": 0.5289561091662918, | |
| "learning_rate": 3.96524970786284e-05, | |
| "loss": 0.2384, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11946485936641693, | |
| "step": 570, | |
| "valid_targets_mean": 5437.6, | |
| "valid_targets_min": 3626 | |
| }, | |
| { | |
| "epoch": 1.087038789025544, | |
| "grad_norm": 0.5405317266225187, | |
| "learning_rate": 3.963477911060855e-05, | |
| "loss": 0.2445, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11820094287395477, | |
| "step": 575, | |
| "valid_targets_mean": 5014.4, | |
| "valid_targets_min": 2052 | |
| }, | |
| { | |
| "epoch": 1.096499526963103, | |
| "grad_norm": 0.500712987964929, | |
| "learning_rate": 3.961662477248973e-05, | |
| "loss": 0.2201, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09292806684970856, | |
| "step": 580, | |
| "valid_targets_mean": 5671.1, | |
| "valid_targets_min": 3119 | |
| }, | |
| { | |
| "epoch": 1.1059602649006623, | |
| "grad_norm": 0.5638466017332864, | |
| "learning_rate": 3.959803446774022e-05, | |
| "loss": 0.2526, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1443258821964264, | |
| "step": 585, | |
| "valid_targets_mean": 5097.2, | |
| "valid_targets_min": 1365 | |
| }, | |
| { | |
| "epoch": 1.1154210028382214, | |
| "grad_norm": 0.4867201503195808, | |
| "learning_rate": 3.957900860951736e-05, | |
| "loss": 0.2368, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13219431042671204, | |
| "step": 590, | |
| "valid_targets_mean": 6045.9, | |
| "valid_targets_min": 2643 | |
| }, | |
| { | |
| "epoch": 1.1248817407757805, | |
| "grad_norm": 0.4757816321592032, | |
| "learning_rate": 3.9559547620658366e-05, | |
| "loss": 0.2272, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10074618458747864, | |
| "step": 595, | |
| "valid_targets_mean": 5344.8, | |
| "valid_targets_min": 2280 | |
| }, | |
| { | |
| "epoch": 1.1343424787133396, | |
| "grad_norm": 0.6128758775517067, | |
| "learning_rate": 3.9539651933670977e-05, | |
| "loss": 0.2495, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1107979565858841, | |
| "step": 600, | |
| "valid_targets_mean": 3639.5, | |
| "valid_targets_min": 467 | |
| }, | |
| { | |
| "epoch": 1.1438032166508987, | |
| "grad_norm": 0.5121856535536162, | |
| "learning_rate": 3.9519321990723796e-05, | |
| "loss": 0.2303, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11904571950435638, | |
| "step": 605, | |
| "valid_targets_mean": 6200.5, | |
| "valid_targets_min": 2171 | |
| }, | |
| { | |
| "epoch": 1.153263954588458, | |
| "grad_norm": 0.4762180442392512, | |
| "learning_rate": 3.949855824363647e-05, | |
| "loss": 0.2393, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.115460604429245, | |
| "step": 610, | |
| "valid_targets_mean": 5414.8, | |
| "valid_targets_min": 835 | |
| }, | |
| { | |
| "epoch": 1.162724692526017, | |
| "grad_norm": 0.4836410961268961, | |
| "learning_rate": 3.94773611538697e-05, | |
| "loss": 0.238, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1263769567012787, | |
| "step": 615, | |
| "valid_targets_mean": 5787.6, | |
| "valid_targets_min": 1293 | |
| }, | |
| { | |
| "epoch": 1.1721854304635762, | |
| "grad_norm": 0.48420895744183057, | |
| "learning_rate": 3.945573119251489e-05, | |
| "loss": 0.2321, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12540149688720703, | |
| "step": 620, | |
| "valid_targets_mean": 5621.6, | |
| "valid_targets_min": 3937 | |
| }, | |
| { | |
| "epoch": 1.1816461684011352, | |
| "grad_norm": 0.5668323089533568, | |
| "learning_rate": 3.9433668840283756e-05, | |
| "loss": 0.237, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11952169239521027, | |
| "step": 625, | |
| "valid_targets_mean": 5713.2, | |
| "valid_targets_min": 182 | |
| }, | |
| { | |
| "epoch": 1.1911069063386943, | |
| "grad_norm": 0.5079376916359959, | |
| "learning_rate": 3.9411174587497636e-05, | |
| "loss": 0.2362, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10352667421102524, | |
| "step": 630, | |
| "valid_targets_mean": 5267.4, | |
| "valid_targets_min": 3874 | |
| }, | |
| { | |
| "epoch": 1.2005676442762536, | |
| "grad_norm": 0.44993465294635854, | |
| "learning_rate": 3.938824893407655e-05, | |
| "loss": 0.2375, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09414789080619812, | |
| "step": 635, | |
| "valid_targets_mean": 5855.6, | |
| "valid_targets_min": 3171 | |
| }, | |
| { | |
| "epoch": 1.2100283822138127, | |
| "grad_norm": 0.4266804320387502, | |
| "learning_rate": 3.9364892389528116e-05, | |
| "loss": 0.2248, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09374960511922836, | |
| "step": 640, | |
| "valid_targets_mean": 6662.6, | |
| "valid_targets_min": 4858 | |
| }, | |
| { | |
| "epoch": 1.2194891201513718, | |
| "grad_norm": 0.4894876148798196, | |
| "learning_rate": 3.9341105472936234e-05, | |
| "loss": 0.2279, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11348379403352737, | |
| "step": 645, | |
| "valid_targets_mean": 5026.5, | |
| "valid_targets_min": 2383 | |
| }, | |
| { | |
| "epoch": 1.2289498580889309, | |
| "grad_norm": 0.4861141591545541, | |
| "learning_rate": 3.9316888712949546e-05, | |
| "loss": 0.2323, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10222115367650986, | |
| "step": 650, | |
| "valid_targets_mean": 4663.4, | |
| "valid_targets_min": 501 | |
| }, | |
| { | |
| "epoch": 1.23841059602649, | |
| "grad_norm": 0.5580509660668194, | |
| "learning_rate": 3.9292242647769664e-05, | |
| "loss": 0.2276, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10276462882757187, | |
| "step": 655, | |
| "valid_targets_mean": 5688.8, | |
| "valid_targets_min": 2494 | |
| }, | |
| { | |
| "epoch": 1.2478713339640493, | |
| "grad_norm": 0.5600748404923863, | |
| "learning_rate": 3.926716782513924e-05, | |
| "loss": 0.2384, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12437956780195236, | |
| "step": 660, | |
| "valid_targets_mean": 5957.5, | |
| "valid_targets_min": 1816 | |
| }, | |
| { | |
| "epoch": 1.2573320719016083, | |
| "grad_norm": 0.48680567022663485, | |
| "learning_rate": 3.924166480232977e-05, | |
| "loss": 0.2305, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10580234229564667, | |
| "step": 665, | |
| "valid_targets_mean": 4815.4, | |
| "valid_targets_min": 2054 | |
| }, | |
| { | |
| "epoch": 1.2667928098391674, | |
| "grad_norm": 0.5012312020890741, | |
| "learning_rate": 3.921573414612923e-05, | |
| "loss": 0.2365, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10871803760528564, | |
| "step": 670, | |
| "valid_targets_mean": 5644.4, | |
| "valid_targets_min": 3270 | |
| }, | |
| { | |
| "epoch": 1.2762535477767265, | |
| "grad_norm": 0.6526213288816137, | |
| "learning_rate": 3.918937643282946e-05, | |
| "loss": 0.2507, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13784456253051758, | |
| "step": 675, | |
| "valid_targets_mean": 4337.4, | |
| "valid_targets_min": 1461 | |
| }, | |
| { | |
| "epoch": 1.2857142857142856, | |
| "grad_norm": 0.45243111873658154, | |
| "learning_rate": 3.9162592248213364e-05, | |
| "loss": 0.2328, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09919239580631256, | |
| "step": 680, | |
| "valid_targets_mean": 5475.1, | |
| "valid_targets_min": 1303 | |
| }, | |
| { | |
| "epoch": 1.295175023651845, | |
| "grad_norm": 0.46993510603366306, | |
| "learning_rate": 3.913538218754189e-05, | |
| "loss": 0.2431, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11857247352600098, | |
| "step": 685, | |
| "valid_targets_mean": 5503.6, | |
| "valid_targets_min": 2736 | |
| }, | |
| { | |
| "epoch": 1.304635761589404, | |
| "grad_norm": 0.5288647977319975, | |
| "learning_rate": 3.9107746855540815e-05, | |
| "loss": 0.2259, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10459893196821213, | |
| "step": 690, | |
| "valid_targets_mean": 4416.6, | |
| "valid_targets_min": 1457 | |
| }, | |
| { | |
| "epoch": 1.314096499526963, | |
| "grad_norm": 0.47853397074365556, | |
| "learning_rate": 3.907968686638728e-05, | |
| "loss": 0.2262, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11762518435716629, | |
| "step": 695, | |
| "valid_targets_mean": 5999.4, | |
| "valid_targets_min": 1795 | |
| }, | |
| { | |
| "epoch": 1.3235572374645224, | |
| "grad_norm": 0.4653678345529489, | |
| "learning_rate": 3.9051202843696154e-05, | |
| "loss": 0.2288, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11448341608047485, | |
| "step": 700, | |
| "valid_targets_mean": 5763.0, | |
| "valid_targets_min": 1931 | |
| }, | |
| { | |
| "epoch": 1.3330179754020814, | |
| "grad_norm": 0.4855084018479891, | |
| "learning_rate": 3.902229542050617e-05, | |
| "loss": 0.2338, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11349400877952576, | |
| "step": 705, | |
| "valid_targets_mean": 4518.6, | |
| "valid_targets_min": 2050 | |
| }, | |
| { | |
| "epoch": 1.3424787133396405, | |
| "grad_norm": 0.5064849346903338, | |
| "learning_rate": 3.899296523926588e-05, | |
| "loss": 0.2399, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12586739659309387, | |
| "step": 710, | |
| "valid_targets_mean": 5059.2, | |
| "valid_targets_min": 2228 | |
| }, | |
| { | |
| "epoch": 1.3519394512771996, | |
| "grad_norm": 0.5683940427049139, | |
| "learning_rate": 3.896321295181932e-05, | |
| "loss": 0.2389, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11889487504959106, | |
| "step": 715, | |
| "valid_targets_mean": 5142.8, | |
| "valid_targets_min": 2903 | |
| }, | |
| { | |
| "epoch": 1.3614001892147587, | |
| "grad_norm": 0.5067048884888564, | |
| "learning_rate": 3.8933039219391604e-05, | |
| "loss": 0.2316, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09034863114356995, | |
| "step": 720, | |
| "valid_targets_mean": 4853.4, | |
| "valid_targets_min": 1109 | |
| }, | |
| { | |
| "epoch": 1.370860927152318, | |
| "grad_norm": 0.5713266940700944, | |
| "learning_rate": 3.890244471257415e-05, | |
| "loss": 0.223, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10502117872238159, | |
| "step": 725, | |
| "valid_targets_mean": 6282.2, | |
| "valid_targets_min": 2621 | |
| }, | |
| { | |
| "epoch": 1.380321665089877, | |
| "grad_norm": 0.47167945802336014, | |
| "learning_rate": 3.8871430111309817e-05, | |
| "loss": 0.2246, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10523657500743866, | |
| "step": 730, | |
| "valid_targets_mean": 5265.6, | |
| "valid_targets_min": 1669 | |
| }, | |
| { | |
| "epoch": 1.3897824030274362, | |
| "grad_norm": 0.4972267194987453, | |
| "learning_rate": 3.883999610487782e-05, | |
| "loss": 0.2133, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11264175176620483, | |
| "step": 735, | |
| "valid_targets_mean": 5413.1, | |
| "valid_targets_min": 1752 | |
| }, | |
| { | |
| "epoch": 1.3992431409649952, | |
| "grad_norm": 0.4857236391537851, | |
| "learning_rate": 3.880814339187832e-05, | |
| "loss": 0.2522, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1228969469666481, | |
| "step": 740, | |
| "valid_targets_mean": 6460.1, | |
| "valid_targets_min": 2751 | |
| }, | |
| { | |
| "epoch": 1.4087038789025543, | |
| "grad_norm": 0.47731532767819274, | |
| "learning_rate": 3.877587268021701e-05, | |
| "loss": 0.2279, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1144258975982666, | |
| "step": 745, | |
| "valid_targets_mean": 6142.0, | |
| "valid_targets_min": 3992 | |
| }, | |
| { | |
| "epoch": 1.4181646168401136, | |
| "grad_norm": 0.45393688275595756, | |
| "learning_rate": 3.874318468708931e-05, | |
| "loss": 0.2323, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09872148185968399, | |
| "step": 750, | |
| "valid_targets_mean": 5503.8, | |
| "valid_targets_min": 2060 | |
| }, | |
| { | |
| "epoch": 1.4276253547776727, | |
| "grad_norm": 0.5303507483697673, | |
| "learning_rate": 3.871008013896444e-05, | |
| "loss": 0.2433, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10745839029550552, | |
| "step": 755, | |
| "valid_targets_mean": 5164.8, | |
| "valid_targets_min": 1538 | |
| }, | |
| { | |
| "epoch": 1.4370860927152318, | |
| "grad_norm": 0.461682203378725, | |
| "learning_rate": 3.8676559771569294e-05, | |
| "loss": 0.23, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11055511236190796, | |
| "step": 760, | |
| "valid_targets_mean": 6175.9, | |
| "valid_targets_min": 3847 | |
| }, | |
| { | |
| "epoch": 1.4465468306527909, | |
| "grad_norm": 0.4296391460871339, | |
| "learning_rate": 3.864262432987206e-05, | |
| "loss": 0.2337, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09028683602809906, | |
| "step": 765, | |
| "valid_targets_mean": 5604.2, | |
| "valid_targets_min": 3031 | |
| }, | |
| { | |
| "epoch": 1.45600756859035, | |
| "grad_norm": 0.4667109187535618, | |
| "learning_rate": 3.860827456806571e-05, | |
| "loss": 0.2406, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12354136258363724, | |
| "step": 770, | |
| "valid_targets_mean": 5927.1, | |
| "valid_targets_min": 3111 | |
| }, | |
| { | |
| "epoch": 1.4654683065279093, | |
| "grad_norm": 0.4716805812736505, | |
| "learning_rate": 3.857351124955118e-05, | |
| "loss": 0.2385, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10110399127006531, | |
| "step": 775, | |
| "valid_targets_mean": 5077.1, | |
| "valid_targets_min": 1075 | |
| }, | |
| { | |
| "epoch": 1.4749290444654684, | |
| "grad_norm": 0.4722294808263683, | |
| "learning_rate": 3.853833514692044e-05, | |
| "loss": 0.2391, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10581814497709274, | |
| "step": 780, | |
| "valid_targets_mean": 5985.8, | |
| "valid_targets_min": 1757 | |
| }, | |
| { | |
| "epoch": 1.4843897824030274, | |
| "grad_norm": 0.46649393641526554, | |
| "learning_rate": 3.850274704193932e-05, | |
| "loss": 0.229, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10153012722730637, | |
| "step": 785, | |
| "valid_targets_mean": 6183.6, | |
| "valid_targets_min": 2006 | |
| }, | |
| { | |
| "epoch": 1.4938505203405865, | |
| "grad_norm": 0.4773875252367172, | |
| "learning_rate": 3.846674772553014e-05, | |
| "loss": 0.2272, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12223786860704422, | |
| "step": 790, | |
| "valid_targets_mean": 6154.8, | |
| "valid_targets_min": 5044 | |
| }, | |
| { | |
| "epoch": 1.5033112582781456, | |
| "grad_norm": 0.4748127013380771, | |
| "learning_rate": 3.843033799775411e-05, | |
| "loss": 0.2245, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.093946173787117, | |
| "step": 795, | |
| "valid_targets_mean": 6119.9, | |
| "valid_targets_min": 3720 | |
| }, | |
| { | |
| "epoch": 1.512771996215705, | |
| "grad_norm": 0.467050356933497, | |
| "learning_rate": 3.839351866779358e-05, | |
| "loss": 0.2385, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10543806850910187, | |
| "step": 800, | |
| "valid_targets_mean": 6435.6, | |
| "valid_targets_min": 5034 | |
| }, | |
| { | |
| "epoch": 1.522232734153264, | |
| "grad_norm": 0.43867305636821574, | |
| "learning_rate": 3.835629055393401e-05, | |
| "loss": 0.226, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11679120361804962, | |
| "step": 805, | |
| "valid_targets_mean": 6308.0, | |
| "valid_targets_min": 3711 | |
| }, | |
| { | |
| "epoch": 1.531693472090823, | |
| "grad_norm": 0.48079409142806057, | |
| "learning_rate": 3.8318654483545865e-05, | |
| "loss": 0.2225, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12533718347549438, | |
| "step": 810, | |
| "valid_targets_mean": 6162.1, | |
| "valid_targets_min": 3455 | |
| }, | |
| { | |
| "epoch": 1.5411542100283824, | |
| "grad_norm": 0.41889792201675763, | |
| "learning_rate": 3.828061129306612e-05, | |
| "loss": 0.2176, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10481228679418564, | |
| "step": 815, | |
| "valid_targets_mean": 6216.2, | |
| "valid_targets_min": 3346 | |
| }, | |
| { | |
| "epoch": 1.5506149479659412, | |
| "grad_norm": 0.4955138763469212, | |
| "learning_rate": 3.824216182797976e-05, | |
| "loss": 0.2332, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11373977363109589, | |
| "step": 820, | |
| "valid_targets_mean": 5359.9, | |
| "valid_targets_min": 2126 | |
| }, | |
| { | |
| "epoch": 1.5600756859035005, | |
| "grad_norm": 0.4872548119969948, | |
| "learning_rate": 3.8203306942800956e-05, | |
| "loss": 0.2274, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10969603061676025, | |
| "step": 825, | |
| "valid_targets_mean": 4700.9, | |
| "valid_targets_min": 1915 | |
| }, | |
| { | |
| "epoch": 1.5695364238410596, | |
| "grad_norm": 0.47970332008609884, | |
| "learning_rate": 3.8164047501054064e-05, | |
| "loss": 0.2256, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10819444060325623, | |
| "step": 830, | |
| "valid_targets_mean": 5609.9, | |
| "valid_targets_min": 3566 | |
| }, | |
| { | |
| "epoch": 1.5789971617786187, | |
| "grad_norm": 0.580079095541343, | |
| "learning_rate": 3.8124384375254454e-05, | |
| "loss": 0.2356, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10750003904104233, | |
| "step": 835, | |
| "valid_targets_mean": 5338.2, | |
| "valid_targets_min": 571 | |
| }, | |
| { | |
| "epoch": 1.588457899716178, | |
| "grad_norm": 0.5673488057662988, | |
| "learning_rate": 3.808431844688911e-05, | |
| "loss": 0.2199, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11036446690559387, | |
| "step": 840, | |
| "valid_targets_mean": 5426.0, | |
| "valid_targets_min": 2462 | |
| }, | |
| { | |
| "epoch": 1.5979186376537369, | |
| "grad_norm": 0.42103950384674316, | |
| "learning_rate": 3.8043850606397026e-05, | |
| "loss": 0.2307, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09853754937648773, | |
| "step": 845, | |
| "valid_targets_mean": 6793.1, | |
| "valid_targets_min": 4564 | |
| }, | |
| { | |
| "epoch": 1.6073793755912962, | |
| "grad_norm": 0.4776716012442452, | |
| "learning_rate": 3.800298175314943e-05, | |
| "loss": 0.2288, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10229594260454178, | |
| "step": 850, | |
| "valid_targets_mean": 4796.5, | |
| "valid_targets_min": 869 | |
| }, | |
| { | |
| "epoch": 1.6168401135288553, | |
| "grad_norm": 0.4784444842339034, | |
| "learning_rate": 3.796171279542983e-05, | |
| "loss": 0.2184, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12016277015209198, | |
| "step": 855, | |
| "valid_targets_mean": 5666.8, | |
| "valid_targets_min": 4213 | |
| }, | |
| { | |
| "epoch": 1.6263008514664143, | |
| "grad_norm": 0.514714610117917, | |
| "learning_rate": 3.792004465041374e-05, | |
| "loss": 0.2262, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10403918474912643, | |
| "step": 860, | |
| "valid_targets_mean": 5254.9, | |
| "valid_targets_min": 3210 | |
| }, | |
| { | |
| "epoch": 1.6357615894039736, | |
| "grad_norm": 0.5112000027216482, | |
| "learning_rate": 3.787797824414839e-05, | |
| "loss": 0.2305, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10923796147108078, | |
| "step": 865, | |
| "valid_targets_mean": 4763.4, | |
| "valid_targets_min": 2980 | |
| }, | |
| { | |
| "epoch": 1.6452223273415325, | |
| "grad_norm": 0.4787828716925675, | |
| "learning_rate": 3.7835514511532106e-05, | |
| "loss": 0.22, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11811286211013794, | |
| "step": 870, | |
| "valid_targets_mean": 6723.4, | |
| "valid_targets_min": 2899 | |
| }, | |
| { | |
| "epoch": 1.6546830652790918, | |
| "grad_norm": 0.4851598879974133, | |
| "learning_rate": 3.779265439629349e-05, | |
| "loss": 0.2224, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12126254290342331, | |
| "step": 875, | |
| "valid_targets_mean": 5780.9, | |
| "valid_targets_min": 4520 | |
| }, | |
| { | |
| "epoch": 1.664143803216651, | |
| "grad_norm": 0.4742437333915908, | |
| "learning_rate": 3.774939885097054e-05, | |
| "loss": 0.2419, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11021271347999573, | |
| "step": 880, | |
| "valid_targets_mean": 4854.0, | |
| "valid_targets_min": 2821 | |
| }, | |
| { | |
| "epoch": 1.67360454115421, | |
| "grad_norm": 0.4960836751821523, | |
| "learning_rate": 3.7705748836889394e-05, | |
| "loss": 0.2342, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13386490941047668, | |
| "step": 885, | |
| "valid_targets_mean": 6546.0, | |
| "valid_targets_min": 1321 | |
| }, | |
| { | |
| "epoch": 1.6830652790917693, | |
| "grad_norm": 0.4036549231565226, | |
| "learning_rate": 3.7661705324143015e-05, | |
| "loss": 0.2093, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08332087099552155, | |
| "step": 890, | |
| "valid_targets_mean": 6718.4, | |
| "valid_targets_min": 2034 | |
| }, | |
| { | |
| "epoch": 1.6925260170293281, | |
| "grad_norm": 0.49131023862826795, | |
| "learning_rate": 3.761726929156961e-05, | |
| "loss": 0.2264, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11716383695602417, | |
| "step": 895, | |
| "valid_targets_mean": 6500.2, | |
| "valid_targets_min": 3450 | |
| }, | |
| { | |
| "epoch": 1.7019867549668874, | |
| "grad_norm": 0.45414353126458956, | |
| "learning_rate": 3.757244172673089e-05, | |
| "loss": 0.2248, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11022776365280151, | |
| "step": 900, | |
| "valid_targets_mean": 6545.5, | |
| "valid_targets_min": 5104 | |
| }, | |
| { | |
| "epoch": 1.7114474929044465, | |
| "grad_norm": 0.4665861040208616, | |
| "learning_rate": 3.75272236258901e-05, | |
| "loss": 0.2205, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10109938681125641, | |
| "step": 905, | |
| "valid_targets_mean": 5423.8, | |
| "valid_targets_min": 2046 | |
| }, | |
| { | |
| "epoch": 1.7209082308420056, | |
| "grad_norm": 0.5390908507043201, | |
| "learning_rate": 3.74816159939899e-05, | |
| "loss": 0.2157, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11916151642799377, | |
| "step": 910, | |
| "valid_targets_mean": 5598.9, | |
| "valid_targets_min": 3342 | |
| }, | |
| { | |
| "epoch": 1.730368968779565, | |
| "grad_norm": 0.4802137890384671, | |
| "learning_rate": 3.743561984463002e-05, | |
| "loss": 0.2465, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20065706968307495, | |
| "step": 915, | |
| "valid_targets_mean": 4943.9, | |
| "valid_targets_min": 1064 | |
| }, | |
| { | |
| "epoch": 1.7398297067171238, | |
| "grad_norm": 0.4905362325837713, | |
| "learning_rate": 3.738923620004475e-05, | |
| "loss": 0.2566, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.107704758644104, | |
| "step": 920, | |
| "valid_targets_mean": 5806.9, | |
| "valid_targets_min": 4816 | |
| }, | |
| { | |
| "epoch": 1.749290444654683, | |
| "grad_norm": 0.4581079118093888, | |
| "learning_rate": 3.734246609108018e-05, | |
| "loss": 0.2262, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09412605315446854, | |
| "step": 925, | |
| "valid_targets_mean": 4781.8, | |
| "valid_targets_min": 449 | |
| }, | |
| { | |
| "epoch": 1.7587511825922422, | |
| "grad_norm": 0.45771521539019777, | |
| "learning_rate": 3.729531055717135e-05, | |
| "loss": 0.2138, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08951415121555328, | |
| "step": 930, | |
| "valid_targets_mean": 5657.9, | |
| "valid_targets_min": 4066 | |
| }, | |
| { | |
| "epoch": 1.7682119205298013, | |
| "grad_norm": 0.4219395696573571, | |
| "learning_rate": 3.724777064631909e-05, | |
| "loss": 0.2196, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10585498809814453, | |
| "step": 935, | |
| "valid_targets_mean": 6137.9, | |
| "valid_targets_min": 1896 | |
| }, | |
| { | |
| "epoch": 1.7776726584673606, | |
| "grad_norm": 0.4750153042604821, | |
| "learning_rate": 3.719984741506676e-05, | |
| "loss": 0.2285, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1056283712387085, | |
| "step": 940, | |
| "valid_targets_mean": 6157.4, | |
| "valid_targets_min": 4827 | |
| }, | |
| { | |
| "epoch": 1.7871333964049196, | |
| "grad_norm": 0.5007065871315426, | |
| "learning_rate": 3.7151541928476775e-05, | |
| "loss": 0.2253, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11300000548362732, | |
| "step": 945, | |
| "valid_targets_mean": 6049.8, | |
| "valid_targets_min": 1787 | |
| }, | |
| { | |
| "epoch": 1.7965941343424787, | |
| "grad_norm": 0.4556642995476161, | |
| "learning_rate": 3.710285526010693e-05, | |
| "loss": 0.2235, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10112877935171127, | |
| "step": 950, | |
| "valid_targets_mean": 5844.6, | |
| "valid_targets_min": 1902 | |
| }, | |
| { | |
| "epoch": 1.8060548722800378, | |
| "grad_norm": 0.4695420382110502, | |
| "learning_rate": 3.705378849198651e-05, | |
| "loss": 0.2233, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09273075312376022, | |
| "step": 955, | |
| "valid_targets_mean": 5799.4, | |
| "valid_targets_min": 3405 | |
| }, | |
| { | |
| "epoch": 1.8155156102175969, | |
| "grad_norm": 0.4582464273228831, | |
| "learning_rate": 3.700434271459229e-05, | |
| "loss": 0.2233, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11421730369329453, | |
| "step": 960, | |
| "valid_targets_mean": 6082.0, | |
| "valid_targets_min": 2838 | |
| }, | |
| { | |
| "epoch": 1.8249763481551562, | |
| "grad_norm": 0.45724998496227603, | |
| "learning_rate": 3.6954519026824265e-05, | |
| "loss": 0.2307, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11946368962526321, | |
| "step": 965, | |
| "valid_targets_mean": 6126.4, | |
| "valid_targets_min": 3686 | |
| }, | |
| { | |
| "epoch": 1.8344370860927153, | |
| "grad_norm": 0.42553397621822625, | |
| "learning_rate": 3.6904318535981254e-05, | |
| "loss": 0.2112, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09127740561962128, | |
| "step": 970, | |
| "valid_targets_mean": 6550.4, | |
| "valid_targets_min": 4443 | |
| }, | |
| { | |
| "epoch": 1.8438978240302744, | |
| "grad_norm": 0.5055145067804998, | |
| "learning_rate": 3.6853742357736265e-05, | |
| "loss": 0.2236, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13003841042518616, | |
| "step": 975, | |
| "valid_targets_mean": 6482.6, | |
| "valid_targets_min": 3467 | |
| }, | |
| { | |
| "epoch": 1.8533585619678334, | |
| "grad_norm": 0.46511645025886095, | |
| "learning_rate": 3.6802791616111716e-05, | |
| "loss": 0.222, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1227150559425354, | |
| "step": 980, | |
| "valid_targets_mean": 6854.8, | |
| "valid_targets_min": 3556 | |
| }, | |
| { | |
| "epoch": 1.8628192999053925, | |
| "grad_norm": 0.5656028012040354, | |
| "learning_rate": 3.6751467443454455e-05, | |
| "loss": 0.2172, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10680273920297623, | |
| "step": 985, | |
| "valid_targets_mean": 5882.6, | |
| "valid_targets_min": 3572 | |
| }, | |
| { | |
| "epoch": 1.8722800378429518, | |
| "grad_norm": 0.6508509853322608, | |
| "learning_rate": 3.6699770980410586e-05, | |
| "loss": 0.2344, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1327436864376068, | |
| "step": 990, | |
| "valid_targets_mean": 5812.0, | |
| "valid_targets_min": 2484 | |
| }, | |
| { | |
| "epoch": 1.881740775780511, | |
| "grad_norm": 0.5305852714830951, | |
| "learning_rate": 3.664770337590011e-05, | |
| "loss": 0.2163, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0990646705031395, | |
| "step": 995, | |
| "valid_targets_mean": 5233.1, | |
| "valid_targets_min": 1906 | |
| }, | |
| { | |
| "epoch": 1.89120151371807, | |
| "grad_norm": 0.5306949827252334, | |
| "learning_rate": 3.659526578709144e-05, | |
| "loss": 0.2245, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13009144365787506, | |
| "step": 1000, | |
| "valid_targets_mean": 4655.4, | |
| "valid_targets_min": 2019 | |
| }, | |
| { | |
| "epoch": 1.9006622516556293, | |
| "grad_norm": 0.46674719483256766, | |
| "learning_rate": 3.654245937937561e-05, | |
| "loss": 0.2197, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11824624240398407, | |
| "step": 1005, | |
| "valid_targets_mean": 6255.9, | |
| "valid_targets_min": 3204 | |
| }, | |
| { | |
| "epoch": 1.9101229895931882, | |
| "grad_norm": 0.47173335317739623, | |
| "learning_rate": 3.6489285326340424e-05, | |
| "loss": 0.222, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1149347722530365, | |
| "step": 1010, | |
| "valid_targets_mean": 5612.1, | |
| "valid_targets_min": 3308 | |
| }, | |
| { | |
| "epoch": 1.9195837275307475, | |
| "grad_norm": 0.416058820987745, | |
| "learning_rate": 3.6435744809744376e-05, | |
| "loss": 0.2358, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10606338828802109, | |
| "step": 1015, | |
| "valid_targets_mean": 6607.4, | |
| "valid_targets_min": 1916 | |
| }, | |
| { | |
| "epoch": 1.9290444654683065, | |
| "grad_norm": 0.4694345655227754, | |
| "learning_rate": 3.638183901949036e-05, | |
| "loss": 0.22, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1187274232506752, | |
| "step": 1020, | |
| "valid_targets_mean": 5897.4, | |
| "valid_targets_min": 3745 | |
| }, | |
| { | |
| "epoch": 1.9385052034058656, | |
| "grad_norm": 0.47018658031073185, | |
| "learning_rate": 3.6327569153599236e-05, | |
| "loss": 0.2334, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12189055234193802, | |
| "step": 1025, | |
| "valid_targets_mean": 5487.2, | |
| "valid_targets_min": 1457 | |
| }, | |
| { | |
| "epoch": 1.947965941343425, | |
| "grad_norm": 0.5101801225439047, | |
| "learning_rate": 3.62729364181832e-05, | |
| "loss": 0.2116, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09667861461639404, | |
| "step": 1030, | |
| "valid_targets_mean": 4755.6, | |
| "valid_targets_min": 1412 | |
| }, | |
| { | |
| "epoch": 1.9574266792809838, | |
| "grad_norm": 0.45299632084357455, | |
| "learning_rate": 3.6217942027419025e-05, | |
| "loss": 0.221, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12895835936069489, | |
| "step": 1035, | |
| "valid_targets_mean": 7236.1, | |
| "valid_targets_min": 4546 | |
| }, | |
| { | |
| "epoch": 1.966887417218543, | |
| "grad_norm": 0.6043045119332607, | |
| "learning_rate": 3.616258720352097e-05, | |
| "loss": 0.2233, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12101536989212036, | |
| "step": 1040, | |
| "valid_targets_mean": 6235.6, | |
| "valid_targets_min": 4103 | |
| }, | |
| { | |
| "epoch": 1.9763481551561022, | |
| "grad_norm": 0.4657858161318268, | |
| "learning_rate": 3.6106873176713764e-05, | |
| "loss": 0.2199, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1308978646993637, | |
| "step": 1045, | |
| "valid_targets_mean": 6750.4, | |
| "valid_targets_min": 4365 | |
| }, | |
| { | |
| "epoch": 1.9858088930936613, | |
| "grad_norm": 0.4339354389433891, | |
| "learning_rate": 3.60508011852051e-05, | |
| "loss": 0.2346, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08882288634777069, | |
| "step": 1050, | |
| "valid_targets_mean": 4971.6, | |
| "valid_targets_min": 1957 | |
| }, | |
| { | |
| "epoch": 1.9952696310312206, | |
| "grad_norm": 0.4452210444563202, | |
| "learning_rate": 3.5994372475158276e-05, | |
| "loss": 0.2179, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09936431050300598, | |
| "step": 1055, | |
| "valid_targets_mean": 6430.6, | |
| "valid_targets_min": 5264 | |
| }, | |
| { | |
| "epoch": 2.0037842951750235, | |
| "grad_norm": 0.4902441128877412, | |
| "learning_rate": 3.593758830066438e-05, | |
| "loss": 0.2372, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12074028700590134, | |
| "step": 1060, | |
| "valid_targets_mean": 5225.4, | |
| "valid_targets_min": 2454 | |
| }, | |
| { | |
| "epoch": 2.013245033112583, | |
| "grad_norm": 0.5026112447798428, | |
| "learning_rate": 3.5880449923714484e-05, | |
| "loss": 0.2168, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10387119650840759, | |
| "step": 1065, | |
| "valid_targets_mean": 5289.6, | |
| "valid_targets_min": 2424 | |
| }, | |
| { | |
| "epoch": 2.0227057710501417, | |
| "grad_norm": 0.4750712293258598, | |
| "learning_rate": 3.582295861417158e-05, | |
| "loss": 0.2055, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08582209795713425, | |
| "step": 1070, | |
| "valid_targets_mean": 6166.4, | |
| "valid_targets_min": 3853 | |
| }, | |
| { | |
| "epoch": 2.032166508987701, | |
| "grad_norm": 0.5282966500285186, | |
| "learning_rate": 3.576511564974233e-05, | |
| "loss": 0.2082, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09549730271100998, | |
| "step": 1075, | |
| "valid_targets_mean": 5699.4, | |
| "valid_targets_min": 3149 | |
| }, | |
| { | |
| "epoch": 2.0416272469252603, | |
| "grad_norm": 0.4390532347283631, | |
| "learning_rate": 3.5706922315948726e-05, | |
| "loss": 0.207, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09387944638729095, | |
| "step": 1080, | |
| "valid_targets_mean": 6100.9, | |
| "valid_targets_min": 1599 | |
| }, | |
| { | |
| "epoch": 2.051087984862819, | |
| "grad_norm": 0.46546608867761813, | |
| "learning_rate": 3.5648379906099474e-05, | |
| "loss": 0.2087, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11074842512607574, | |
| "step": 1085, | |
| "valid_targets_mean": 5845.8, | |
| "valid_targets_min": 4153 | |
| }, | |
| { | |
| "epoch": 2.0605487228003785, | |
| "grad_norm": 0.81977637281519, | |
| "learning_rate": 3.558948972126127e-05, | |
| "loss": 0.2097, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10561281442642212, | |
| "step": 1090, | |
| "valid_targets_mean": 7041.9, | |
| "valid_targets_min": 4457 | |
| }, | |
| { | |
| "epoch": 2.0700094607379373, | |
| "grad_norm": 0.4202717416437528, | |
| "learning_rate": 3.5530253070229886e-05, | |
| "loss": 0.193, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10468418896198273, | |
| "step": 1095, | |
| "valid_targets_mean": 7051.8, | |
| "valid_targets_min": 5570 | |
| }, | |
| { | |
| "epoch": 2.0794701986754967, | |
| "grad_norm": 0.4337227095105579, | |
| "learning_rate": 3.547067126950106e-05, | |
| "loss": 0.194, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1010643020272255, | |
| "step": 1100, | |
| "valid_targets_mean": 6779.9, | |
| "valid_targets_min": 4810 | |
| }, | |
| { | |
| "epoch": 2.088930936613056, | |
| "grad_norm": 0.4925480521020738, | |
| "learning_rate": 3.541074564324129e-05, | |
| "loss": 0.203, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12354034930467606, | |
| "step": 1105, | |
| "valid_targets_mean": 6516.4, | |
| "valid_targets_min": 2833 | |
| }, | |
| { | |
| "epoch": 2.098391674550615, | |
| "grad_norm": 0.42636976344945043, | |
| "learning_rate": 3.5350477523258334e-05, | |
| "loss": 0.2014, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10801876336336136, | |
| "step": 1110, | |
| "valid_targets_mean": 6759.4, | |
| "valid_targets_min": 2643 | |
| }, | |
| { | |
| "epoch": 2.107852412488174, | |
| "grad_norm": 0.42517617143639164, | |
| "learning_rate": 3.528986824897167e-05, | |
| "loss": 0.2049, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08121705800294876, | |
| "step": 1115, | |
| "valid_targets_mean": 5760.4, | |
| "valid_targets_min": 2593 | |
| }, | |
| { | |
| "epoch": 2.1173131504257334, | |
| "grad_norm": 0.5312337215642596, | |
| "learning_rate": 3.522891916738269e-05, | |
| "loss": 0.2087, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08871275186538696, | |
| "step": 1120, | |
| "valid_targets_mean": 4302.4, | |
| "valid_targets_min": 1285 | |
| }, | |
| { | |
| "epoch": 2.1267738883632923, | |
| "grad_norm": 0.4692448326166772, | |
| "learning_rate": 3.516763163304481e-05, | |
| "loss": 0.2019, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10534701496362686, | |
| "step": 1125, | |
| "valid_targets_mean": 5519.6, | |
| "valid_targets_min": 2362 | |
| }, | |
| { | |
| "epoch": 2.1362346263008516, | |
| "grad_norm": 0.4268146337366665, | |
| "learning_rate": 3.5106007008033306e-05, | |
| "loss": 0.2108, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11514265835285187, | |
| "step": 1130, | |
| "valid_targets_mean": 7479.1, | |
| "valid_targets_min": 1440 | |
| }, | |
| { | |
| "epoch": 2.1456953642384105, | |
| "grad_norm": 0.49100296123591713, | |
| "learning_rate": 3.50440466619151e-05, | |
| "loss": 0.214, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08796236664056778, | |
| "step": 1135, | |
| "valid_targets_mean": 5922.1, | |
| "valid_targets_min": 3934 | |
| }, | |
| { | |
| "epoch": 2.1551561021759698, | |
| "grad_norm": 0.5216092308914664, | |
| "learning_rate": 3.498175197171827e-05, | |
| "loss": 0.208, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10339146107435226, | |
| "step": 1140, | |
| "valid_targets_mean": 5577.0, | |
| "valid_targets_min": 1475 | |
| }, | |
| { | |
| "epoch": 2.164616840113529, | |
| "grad_norm": 0.42621845398225777, | |
| "learning_rate": 3.491912432190147e-05, | |
| "loss": 0.2057, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10287053883075714, | |
| "step": 1145, | |
| "valid_targets_mean": 6644.6, | |
| "valid_targets_min": 4517 | |
| }, | |
| { | |
| "epoch": 2.174077578051088, | |
| "grad_norm": 0.49466569394221804, | |
| "learning_rate": 3.485616510432321e-05, | |
| "loss": 0.2046, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12411276996135712, | |
| "step": 1150, | |
| "valid_targets_mean": 5787.5, | |
| "valid_targets_min": 4705 | |
| }, | |
| { | |
| "epoch": 2.1835383159886472, | |
| "grad_norm": 0.4788842052660544, | |
| "learning_rate": 3.479287571821082e-05, | |
| "loss": 0.2142, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09818103909492493, | |
| "step": 1155, | |
| "valid_targets_mean": 5502.1, | |
| "valid_targets_min": 2850 | |
| }, | |
| { | |
| "epoch": 2.192999053926206, | |
| "grad_norm": 0.4760444243683465, | |
| "learning_rate": 3.4729257570129436e-05, | |
| "loss": 0.2026, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10555780678987503, | |
| "step": 1160, | |
| "valid_targets_mean": 5504.0, | |
| "valid_targets_min": 2222 | |
| }, | |
| { | |
| "epoch": 2.2024597918637654, | |
| "grad_norm": 0.5175946771783035, | |
| "learning_rate": 3.466531207395072e-05, | |
| "loss": 0.2035, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11507289856672287, | |
| "step": 1165, | |
| "valid_targets_mean": 4728.1, | |
| "valid_targets_min": 1177 | |
| }, | |
| { | |
| "epoch": 2.2119205298013247, | |
| "grad_norm": 0.5117776327063412, | |
| "learning_rate": 3.46010406508214e-05, | |
| "loss": 0.2142, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11044911295175552, | |
| "step": 1170, | |
| "valid_targets_mean": 4711.1, | |
| "valid_targets_min": 2573 | |
| }, | |
| { | |
| "epoch": 2.2213812677388836, | |
| "grad_norm": 0.5590952933682073, | |
| "learning_rate": 3.453644472913176e-05, | |
| "loss": 0.2087, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10522548109292984, | |
| "step": 1175, | |
| "valid_targets_mean": 4766.1, | |
| "valid_targets_min": 1915 | |
| }, | |
| { | |
| "epoch": 2.230842005676443, | |
| "grad_norm": 0.5025362223001963, | |
| "learning_rate": 3.4471525744483826e-05, | |
| "loss": 0.2118, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10610745847225189, | |
| "step": 1180, | |
| "valid_targets_mean": 6096.0, | |
| "valid_targets_min": 4585 | |
| }, | |
| { | |
| "epoch": 2.2403027436140017, | |
| "grad_norm": 0.4645208598899087, | |
| "learning_rate": 3.440628513965947e-05, | |
| "loss": 0.1906, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08891929686069489, | |
| "step": 1185, | |
| "valid_targets_mean": 4895.5, | |
| "valid_targets_min": 1524 | |
| }, | |
| { | |
| "epoch": 2.249763481551561, | |
| "grad_norm": 0.6318891734380099, | |
| "learning_rate": 3.4340724364588405e-05, | |
| "loss": 0.197, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08534630388021469, | |
| "step": 1190, | |
| "valid_targets_mean": 4529.2, | |
| "valid_targets_min": 1166 | |
| }, | |
| { | |
| "epoch": 2.2592242194891203, | |
| "grad_norm": 0.44200205332456727, | |
| "learning_rate": 3.4274844876315885e-05, | |
| "loss": 0.2075, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09655225276947021, | |
| "step": 1195, | |
| "valid_targets_mean": 6800.4, | |
| "valid_targets_min": 3424 | |
| }, | |
| { | |
| "epoch": 2.268684957426679, | |
| "grad_norm": 0.4615805475383118, | |
| "learning_rate": 3.4208648138970366e-05, | |
| "loss": 0.2098, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09694427251815796, | |
| "step": 1200, | |
| "valid_targets_mean": 5259.1, | |
| "valid_targets_min": 3358 | |
| }, | |
| { | |
| "epoch": 2.2781456953642385, | |
| "grad_norm": 0.4050381330325965, | |
| "learning_rate": 3.4142135623730954e-05, | |
| "loss": 0.215, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10238818824291229, | |
| "step": 1205, | |
| "valid_targets_mean": 7746.4, | |
| "valid_targets_min": 3172 | |
| }, | |
| { | |
| "epoch": 2.2876064333017974, | |
| "grad_norm": 0.47392587771674227, | |
| "learning_rate": 3.407530880879472e-05, | |
| "loss": 0.2024, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09218434244394302, | |
| "step": 1210, | |
| "valid_targets_mean": 4792.6, | |
| "valid_targets_min": 1795 | |
| }, | |
| { | |
| "epoch": 2.2970671712393567, | |
| "grad_norm": 0.5019723788934695, | |
| "learning_rate": 3.400816917934383e-05, | |
| "loss": 0.1957, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11557125300168991, | |
| "step": 1215, | |
| "valid_targets_mean": 6118.2, | |
| "valid_targets_min": 2081 | |
| }, | |
| { | |
| "epoch": 2.306527909176916, | |
| "grad_norm": 0.4341351471127814, | |
| "learning_rate": 3.394071822751255e-05, | |
| "loss": 0.2154, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11432911455631256, | |
| "step": 1220, | |
| "valid_targets_mean": 7413.2, | |
| "valid_targets_min": 5062 | |
| }, | |
| { | |
| "epoch": 2.315988647114475, | |
| "grad_norm": 0.48447458616548483, | |
| "learning_rate": 3.3872957452354085e-05, | |
| "loss": 0.1974, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10207293927669525, | |
| "step": 1225, | |
| "valid_targets_mean": 5568.5, | |
| "valid_targets_min": 2966 | |
| }, | |
| { | |
| "epoch": 2.325449385052034, | |
| "grad_norm": 0.49502495306040073, | |
| "learning_rate": 3.380488835980726e-05, | |
| "loss": 0.2041, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1068793311715126, | |
| "step": 1230, | |
| "valid_targets_mean": 5244.4, | |
| "valid_targets_min": 3183 | |
| }, | |
| { | |
| "epoch": 2.334910122989593, | |
| "grad_norm": 0.472235805272251, | |
| "learning_rate": 3.373651246266306e-05, | |
| "loss": 0.2023, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10191547870635986, | |
| "step": 1235, | |
| "valid_targets_mean": 6612.2, | |
| "valid_targets_min": 4516 | |
| }, | |
| { | |
| "epoch": 2.3443708609271523, | |
| "grad_norm": 0.4651068349442188, | |
| "learning_rate": 3.366783128053097e-05, | |
| "loss": 0.1952, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08761796355247498, | |
| "step": 1240, | |
| "valid_targets_mean": 4062.6, | |
| "valid_targets_min": 1091 | |
| }, | |
| { | |
| "epoch": 2.3538315988647116, | |
| "grad_norm": 0.44567297458048116, | |
| "learning_rate": 3.359884633980528e-05, | |
| "loss": 0.196, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08401072025299072, | |
| "step": 1245, | |
| "valid_targets_mean": 5205.4, | |
| "valid_targets_min": 1799 | |
| }, | |
| { | |
| "epoch": 2.3632923368022705, | |
| "grad_norm": 0.5648360804071922, | |
| "learning_rate": 3.352955917363108e-05, | |
| "loss": 0.2311, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12823820114135742, | |
| "step": 1250, | |
| "valid_targets_mean": 5323.9, | |
| "valid_targets_min": 2926 | |
| }, | |
| { | |
| "epoch": 2.3727530747398298, | |
| "grad_norm": 0.39813339146433463, | |
| "learning_rate": 3.345997132187022e-05, | |
| "loss": 0.1942, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09695559740066528, | |
| "step": 1255, | |
| "valid_targets_mean": 7675.4, | |
| "valid_targets_min": 2773 | |
| }, | |
| { | |
| "epoch": 2.3822138126773886, | |
| "grad_norm": 0.4375240114279496, | |
| "learning_rate": 3.339008433106713e-05, | |
| "loss": 0.1924, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08221010863780975, | |
| "step": 1260, | |
| "valid_targets_mean": 6793.2, | |
| "valid_targets_min": 3433 | |
| }, | |
| { | |
| "epoch": 2.391674550614948, | |
| "grad_norm": 0.46728845602150615, | |
| "learning_rate": 3.331989975441437e-05, | |
| "loss": 0.2064, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1225174069404602, | |
| "step": 1265, | |
| "valid_targets_mean": 6902.6, | |
| "valid_targets_min": 4716 | |
| }, | |
| { | |
| "epoch": 2.4011352885525072, | |
| "grad_norm": 0.4907015096558724, | |
| "learning_rate": 3.324941915171817e-05, | |
| "loss": 0.2059, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1086108461022377, | |
| "step": 1270, | |
| "valid_targets_mean": 5772.2, | |
| "valid_targets_min": 4619 | |
| }, | |
| { | |
| "epoch": 2.410596026490066, | |
| "grad_norm": 0.5288905896743813, | |
| "learning_rate": 3.3178644089363726e-05, | |
| "loss": 0.2047, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0829245075583458, | |
| "step": 1275, | |
| "valid_targets_mean": 4927.6, | |
| "valid_targets_min": 1720 | |
| }, | |
| { | |
| "epoch": 2.4200567644276254, | |
| "grad_norm": 0.4616750373070165, | |
| "learning_rate": 3.310757614028043e-05, | |
| "loss": 0.2008, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10950164496898651, | |
| "step": 1280, | |
| "valid_targets_mean": 5469.9, | |
| "valid_targets_min": 2366 | |
| }, | |
| { | |
| "epoch": 2.4295175023651847, | |
| "grad_norm": 0.48785513225115845, | |
| "learning_rate": 3.303621688390688e-05, | |
| "loss": 0.2016, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10254321247339249, | |
| "step": 1285, | |
| "valid_targets_mean": 4784.5, | |
| "valid_targets_min": 1528 | |
| }, | |
| { | |
| "epoch": 2.4389782403027436, | |
| "grad_norm": 0.47006830003545064, | |
| "learning_rate": 3.2964567906155775e-05, | |
| "loss": 0.2105, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10443936288356781, | |
| "step": 1290, | |
| "valid_targets_mean": 5922.1, | |
| "valid_targets_min": 4665 | |
| }, | |
| { | |
| "epoch": 2.448438978240303, | |
| "grad_norm": 0.5303981405733078, | |
| "learning_rate": 3.28926307993787e-05, | |
| "loss": 0.2089, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10440479218959808, | |
| "step": 1295, | |
| "valid_targets_mean": 4983.1, | |
| "valid_targets_min": 1544 | |
| }, | |
| { | |
| "epoch": 2.4578997161778617, | |
| "grad_norm": 0.41697399320560447, | |
| "learning_rate": 3.282040716233073e-05, | |
| "loss": 0.1965, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11425549536943436, | |
| "step": 1300, | |
| "valid_targets_mean": 7874.9, | |
| "valid_targets_min": 3015 | |
| }, | |
| { | |
| "epoch": 2.467360454115421, | |
| "grad_norm": 0.4637165828897811, | |
| "learning_rate": 3.274789860013484e-05, | |
| "loss": 0.2033, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10167668759822845, | |
| "step": 1305, | |
| "valid_targets_mean": 5048.4, | |
| "valid_targets_min": 4028 | |
| }, | |
| { | |
| "epoch": 2.47682119205298, | |
| "grad_norm": 0.45904308662548016, | |
| "learning_rate": 3.267510672424633e-05, | |
| "loss": 0.208, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10144510865211487, | |
| "step": 1310, | |
| "valid_targets_mean": 5632.4, | |
| "valid_targets_min": 1561 | |
| }, | |
| { | |
| "epoch": 2.486281929990539, | |
| "grad_norm": 0.5117482006460289, | |
| "learning_rate": 3.260203315241693e-05, | |
| "loss": 0.2167, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11750157177448273, | |
| "step": 1315, | |
| "valid_targets_mean": 5832.9, | |
| "valid_targets_min": 2221 | |
| }, | |
| { | |
| "epoch": 2.4957426679280985, | |
| "grad_norm": 0.4821146207280302, | |
| "learning_rate": 3.25286795086589e-05, | |
| "loss": 0.1974, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10111722350120544, | |
| "step": 1320, | |
| "valid_targets_mean": 6177.2, | |
| "valid_targets_min": 4508 | |
| }, | |
| { | |
| "epoch": 2.5052034058656574, | |
| "grad_norm": 0.4703343391187789, | |
| "learning_rate": 3.245504742320889e-05, | |
| "loss": 0.2076, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.111258864402771, | |
| "step": 1325, | |
| "valid_targets_mean": 6082.4, | |
| "valid_targets_min": 1854 | |
| }, | |
| { | |
| "epoch": 2.5146641438032167, | |
| "grad_norm": 0.4096048017619596, | |
| "learning_rate": 3.238113853249176e-05, | |
| "loss": 0.1901, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09202168136835098, | |
| "step": 1330, | |
| "valid_targets_mean": 6705.9, | |
| "valid_targets_min": 5581 | |
| }, | |
| { | |
| "epoch": 2.524124881740776, | |
| "grad_norm": 0.4794504102641663, | |
| "learning_rate": 3.230695447908416e-05, | |
| "loss": 0.2132, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12047106772661209, | |
| "step": 1335, | |
| "valid_targets_mean": 6179.9, | |
| "valid_targets_min": 1678 | |
| }, | |
| { | |
| "epoch": 2.533585619678335, | |
| "grad_norm": 0.42853949355775744, | |
| "learning_rate": 3.223249691167808e-05, | |
| "loss": 0.2056, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09987708181142807, | |
| "step": 1340, | |
| "valid_targets_mean": 6150.5, | |
| "valid_targets_min": 2052 | |
| }, | |
| { | |
| "epoch": 2.543046357615894, | |
| "grad_norm": 0.45038054848136166, | |
| "learning_rate": 3.215776748504415e-05, | |
| "loss": 0.1952, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09696616977453232, | |
| "step": 1345, | |
| "valid_targets_mean": 5926.5, | |
| "valid_targets_min": 4793 | |
| }, | |
| { | |
| "epoch": 2.552507095553453, | |
| "grad_norm": 0.46275515871952516, | |
| "learning_rate": 3.208276785999491e-05, | |
| "loss": 0.1982, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09807568788528442, | |
| "step": 1350, | |
| "valid_targets_mean": 5176.4, | |
| "valid_targets_min": 3229 | |
| }, | |
| { | |
| "epoch": 2.5619678334910123, | |
| "grad_norm": 0.5405973660041345, | |
| "learning_rate": 3.200749970334788e-05, | |
| "loss": 0.2122, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11471271514892578, | |
| "step": 1355, | |
| "valid_targets_mean": 6159.6, | |
| "valid_targets_min": 2018 | |
| }, | |
| { | |
| "epoch": 2.571428571428571, | |
| "grad_norm": 0.6206982063058315, | |
| "learning_rate": 3.193196468788852e-05, | |
| "loss": 0.212, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11576743423938751, | |
| "step": 1360, | |
| "valid_targets_mean": 4677.5, | |
| "valid_targets_min": 1588 | |
| }, | |
| { | |
| "epoch": 2.5808893093661305, | |
| "grad_norm": 0.4509216554262047, | |
| "learning_rate": 3.1856164492333045e-05, | |
| "loss": 0.1953, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09978030622005463, | |
| "step": 1365, | |
| "valid_targets_mean": 6171.2, | |
| "valid_targets_min": 4374 | |
| }, | |
| { | |
| "epoch": 2.59035004730369, | |
| "grad_norm": 0.5022130091375908, | |
| "learning_rate": 3.178010080129114e-05, | |
| "loss": 0.2109, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11894027888774872, | |
| "step": 1370, | |
| "valid_targets_mean": 4665.2, | |
| "valid_targets_min": 2530 | |
| }, | |
| { | |
| "epoch": 2.5998107852412486, | |
| "grad_norm": 0.49048450775706764, | |
| "learning_rate": 3.1703775305228476e-05, | |
| "loss": 0.213, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12461340427398682, | |
| "step": 1375, | |
| "valid_targets_mean": 5434.1, | |
| "valid_targets_min": 2890 | |
| }, | |
| { | |
| "epoch": 2.609271523178808, | |
| "grad_norm": 0.4352184740333695, | |
| "learning_rate": 3.16271897004292e-05, | |
| "loss": 0.1981, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0782010406255722, | |
| "step": 1380, | |
| "valid_targets_mean": 5664.6, | |
| "valid_targets_min": 3486 | |
| }, | |
| { | |
| "epoch": 2.6187322611163673, | |
| "grad_norm": 0.548234031499346, | |
| "learning_rate": 3.1550345688958186e-05, | |
| "loss": 0.2031, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12204037606716156, | |
| "step": 1385, | |
| "valid_targets_mean": 4753.4, | |
| "valid_targets_min": 2308 | |
| }, | |
| { | |
| "epoch": 2.628192999053926, | |
| "grad_norm": 0.4695384861258868, | |
| "learning_rate": 3.147324497862323e-05, | |
| "loss": 0.2069, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09967264533042908, | |
| "step": 1390, | |
| "valid_targets_mean": 5173.1, | |
| "valid_targets_min": 2358 | |
| }, | |
| { | |
| "epoch": 2.6376537369914854, | |
| "grad_norm": 0.4704020408844116, | |
| "learning_rate": 3.139588928293711e-05, | |
| "loss": 0.2016, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12265658378601074, | |
| "step": 1395, | |
| "valid_targets_mean": 6174.9, | |
| "valid_targets_min": 2043 | |
| }, | |
| { | |
| "epoch": 2.6471144749290447, | |
| "grad_norm": 0.5094346857161342, | |
| "learning_rate": 3.131828032107945e-05, | |
| "loss": 0.2027, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08958130329847336, | |
| "step": 1400, | |
| "valid_targets_mean": 4513.2, | |
| "valid_targets_min": 2441 | |
| }, | |
| { | |
| "epoch": 2.6565752128666036, | |
| "grad_norm": 0.4335492269233461, | |
| "learning_rate": 3.124041981785859e-05, | |
| "loss": 0.2038, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10349221527576447, | |
| "step": 1405, | |
| "valid_targets_mean": 6694.0, | |
| "valid_targets_min": 4944 | |
| }, | |
| { | |
| "epoch": 2.666035950804163, | |
| "grad_norm": 0.48204121851722115, | |
| "learning_rate": 3.1162309503673176e-05, | |
| "loss": 0.213, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1139221042394638, | |
| "step": 1410, | |
| "valid_targets_mean": 6066.9, | |
| "valid_targets_min": 3755 | |
| }, | |
| { | |
| "epoch": 2.6754966887417218, | |
| "grad_norm": 0.5313576753857829, | |
| "learning_rate": 3.108395111447376e-05, | |
| "loss": 0.1993, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10339981317520142, | |
| "step": 1415, | |
| "valid_targets_mean": 5164.8, | |
| "valid_targets_min": 3608 | |
| }, | |
| { | |
| "epoch": 2.684957426679281, | |
| "grad_norm": 0.4992222498448238, | |
| "learning_rate": 3.1005346391724195e-05, | |
| "loss": 0.2098, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11738509684801102, | |
| "step": 1420, | |
| "valid_targets_mean": 5114.2, | |
| "valid_targets_min": 1631 | |
| }, | |
| { | |
| "epoch": 2.69441816461684, | |
| "grad_norm": 0.46001559873179254, | |
| "learning_rate": 3.092649708236293e-05, | |
| "loss": 0.2064, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0868842750787735, | |
| "step": 1425, | |
| "valid_targets_mean": 5085.1, | |
| "valid_targets_min": 3443 | |
| }, | |
| { | |
| "epoch": 2.703878902554399, | |
| "grad_norm": 0.4496410745031232, | |
| "learning_rate": 3.08474049387642e-05, | |
| "loss": 0.1993, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07558328658342361, | |
| "step": 1430, | |
| "valid_targets_mean": 5919.5, | |
| "valid_targets_min": 2787 | |
| }, | |
| { | |
| "epoch": 2.7133396404919585, | |
| "grad_norm": 0.42890955889144455, | |
| "learning_rate": 3.076807171869907e-05, | |
| "loss": 0.2074, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09730438888072968, | |
| "step": 1435, | |
| "valid_targets_mean": 6386.8, | |
| "valid_targets_min": 4754 | |
| }, | |
| { | |
| "epoch": 2.7228003784295174, | |
| "grad_norm": 0.5252335037883175, | |
| "learning_rate": 3.068849918529635e-05, | |
| "loss": 0.1999, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11477208137512207, | |
| "step": 1440, | |
| "valid_targets_mean": 6028.5, | |
| "valid_targets_min": 2391 | |
| }, | |
| { | |
| "epoch": 2.7322611163670767, | |
| "grad_norm": 0.4705003773792761, | |
| "learning_rate": 3.060868910700348e-05, | |
| "loss": 0.2136, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1162232756614685, | |
| "step": 1445, | |
| "valid_targets_mean": 5627.2, | |
| "valid_targets_min": 2560 | |
| }, | |
| { | |
| "epoch": 2.741721854304636, | |
| "grad_norm": 0.4885389985755956, | |
| "learning_rate": 3.052864325754712e-05, | |
| "loss": 0.209, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10556751489639282, | |
| "step": 1450, | |
| "valid_targets_mean": 5362.4, | |
| "valid_targets_min": 807 | |
| }, | |
| { | |
| "epoch": 2.751182592242195, | |
| "grad_norm": 0.4311091763286013, | |
| "learning_rate": 3.0448363415893838e-05, | |
| "loss": 0.1933, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09206240624189377, | |
| "step": 1455, | |
| "valid_targets_mean": 6032.2, | |
| "valid_targets_min": 3468 | |
| }, | |
| { | |
| "epoch": 2.760643330179754, | |
| "grad_norm": 0.5188781789839139, | |
| "learning_rate": 3.0367851366210507e-05, | |
| "loss": 0.1991, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1059696152806282, | |
| "step": 1460, | |
| "valid_targets_mean": 5727.2, | |
| "valid_targets_min": 2454 | |
| }, | |
| { | |
| "epoch": 2.770104068117313, | |
| "grad_norm": 0.44581971955569505, | |
| "learning_rate": 3.028710889782466e-05, | |
| "loss": 0.2051, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0931185930967331, | |
| "step": 1465, | |
| "valid_targets_mean": 6201.0, | |
| "valid_targets_min": 2649 | |
| }, | |
| { | |
| "epoch": 2.7795648060548723, | |
| "grad_norm": 0.5236206344540041, | |
| "learning_rate": 3.020613780518476e-05, | |
| "loss": 0.213, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10026657581329346, | |
| "step": 1470, | |
| "valid_targets_mean": 3980.8, | |
| "valid_targets_min": 2704 | |
| }, | |
| { | |
| "epoch": 2.789025543992431, | |
| "grad_norm": 0.47947188557970327, | |
| "learning_rate": 3.0124939887820264e-05, | |
| "loss": 0.2018, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0977272316813469, | |
| "step": 1475, | |
| "valid_targets_mean": 5940.4, | |
| "valid_targets_min": 1596 | |
| }, | |
| { | |
| "epoch": 2.7984862819299905, | |
| "grad_norm": 0.5143786896824261, | |
| "learning_rate": 3.0043516950301695e-05, | |
| "loss": 0.2022, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10160660743713379, | |
| "step": 1480, | |
| "valid_targets_mean": 4532.6, | |
| "valid_targets_min": 1334 | |
| }, | |
| { | |
| "epoch": 2.80794701986755, | |
| "grad_norm": 0.5672714165027751, | |
| "learning_rate": 2.996187080220047e-05, | |
| "loss": 0.2011, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10727030038833618, | |
| "step": 1485, | |
| "valid_targets_mean": 5066.0, | |
| "valid_targets_min": 3600 | |
| }, | |
| { | |
| "epoch": 2.8174077578051087, | |
| "grad_norm": 0.4306113138051286, | |
| "learning_rate": 2.9880003258048723e-05, | |
| "loss": 0.1892, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08836518228054047, | |
| "step": 1490, | |
| "valid_targets_mean": 5612.1, | |
| "valid_targets_min": 2974 | |
| }, | |
| { | |
| "epoch": 2.826868495742668, | |
| "grad_norm": 0.45476672936562934, | |
| "learning_rate": 2.9797916137298988e-05, | |
| "loss": 0.1933, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10338494181632996, | |
| "step": 1495, | |
| "valid_targets_mean": 5809.8, | |
| "valid_targets_min": 4592 | |
| }, | |
| { | |
| "epoch": 2.8363292336802273, | |
| "grad_norm": 0.5113296216520785, | |
| "learning_rate": 2.9715611264283723e-05, | |
| "loss": 0.215, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1314087212085724, | |
| "step": 1500, | |
| "valid_targets_mean": 5553.6, | |
| "valid_targets_min": 3370 | |
| }, | |
| { | |
| "epoch": 2.845789971617786, | |
| "grad_norm": 0.4672791860190598, | |
| "learning_rate": 2.96330904681748e-05, | |
| "loss": 0.2123, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10095086693763733, | |
| "step": 1505, | |
| "valid_targets_mean": 6344.9, | |
| "valid_targets_min": 2582 | |
| }, | |
| { | |
| "epoch": 2.8552507095553454, | |
| "grad_norm": 0.46439688017624076, | |
| "learning_rate": 2.955035558294283e-05, | |
| "loss": 0.2011, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09525062888860703, | |
| "step": 1510, | |
| "valid_targets_mean": 5109.4, | |
| "valid_targets_min": 2243 | |
| }, | |
| { | |
| "epoch": 2.8647114474929043, | |
| "grad_norm": 0.5570784090311692, | |
| "learning_rate": 2.946740844731643e-05, | |
| "loss": 0.2044, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10325586795806885, | |
| "step": 1515, | |
| "valid_targets_mean": 6684.5, | |
| "valid_targets_min": 3214 | |
| }, | |
| { | |
| "epoch": 2.8741721854304636, | |
| "grad_norm": 0.5490977740057562, | |
| "learning_rate": 2.9384250904741328e-05, | |
| "loss": 0.2176, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13034790754318237, | |
| "step": 1520, | |
| "valid_targets_mean": 6380.8, | |
| "valid_targets_min": 3437 | |
| }, | |
| { | |
| "epoch": 2.8836329233680225, | |
| "grad_norm": 0.43561915051047767, | |
| "learning_rate": 2.9300884803339412e-05, | |
| "loss": 0.2118, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11554377526044846, | |
| "step": 1525, | |
| "valid_targets_mean": 7014.2, | |
| "valid_targets_min": 1085 | |
| }, | |
| { | |
| "epoch": 2.8930936613055818, | |
| "grad_norm": 0.46308857342667414, | |
| "learning_rate": 2.921731199586766e-05, | |
| "loss": 0.2059, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11941996216773987, | |
| "step": 1530, | |
| "valid_targets_mean": 6537.1, | |
| "valid_targets_min": 5272 | |
| }, | |
| { | |
| "epoch": 2.902554399243141, | |
| "grad_norm": 0.494911072684399, | |
| "learning_rate": 2.9133534339676954e-05, | |
| "loss": 0.2127, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11458069831132889, | |
| "step": 1535, | |
| "valid_targets_mean": 5210.6, | |
| "valid_targets_min": 1040 | |
| }, | |
| { | |
| "epoch": 2.9120151371807, | |
| "grad_norm": 0.4565372967082708, | |
| "learning_rate": 2.904955369667079e-05, | |
| "loss": 0.196, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1161235123872757, | |
| "step": 1540, | |
| "valid_targets_mean": 6230.1, | |
| "valid_targets_min": 3877 | |
| }, | |
| { | |
| "epoch": 2.9214758751182592, | |
| "grad_norm": 0.5257513036521178, | |
| "learning_rate": 2.896537193326394e-05, | |
| "loss": 0.208, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07970878481864929, | |
| "step": 1545, | |
| "valid_targets_mean": 6027.5, | |
| "valid_targets_min": 1539 | |
| }, | |
| { | |
| "epoch": 2.9309366130558185, | |
| "grad_norm": 0.45864182955167, | |
| "learning_rate": 2.8880990920340934e-05, | |
| "loss": 0.2112, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09612005203962326, | |
| "step": 1550, | |
| "valid_targets_mean": 6322.1, | |
| "valid_targets_min": 2915 | |
| }, | |
| { | |
| "epoch": 2.9403973509933774, | |
| "grad_norm": 0.5623643431157161, | |
| "learning_rate": 2.879641253321447e-05, | |
| "loss": 0.222, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10082189738750458, | |
| "step": 1555, | |
| "valid_targets_mean": 5255.1, | |
| "valid_targets_min": 3086 | |
| }, | |
| { | |
| "epoch": 2.9498580889309367, | |
| "grad_norm": 0.419930873720357, | |
| "learning_rate": 2.8711638651583797e-05, | |
| "loss": 0.2144, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09671846032142639, | |
| "step": 1560, | |
| "valid_targets_mean": 6054.9, | |
| "valid_targets_min": 4136 | |
| }, | |
| { | |
| "epoch": 2.959318826868496, | |
| "grad_norm": 0.49967856491339036, | |
| "learning_rate": 2.862667115949287e-05, | |
| "loss": 0.1979, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09958822280168533, | |
| "step": 1565, | |
| "valid_targets_mean": 5343.4, | |
| "valid_targets_min": 3509 | |
| }, | |
| { | |
| "epoch": 2.968779564806055, | |
| "grad_norm": 0.4934284890500926, | |
| "learning_rate": 2.8541511945288523e-05, | |
| "loss": 0.1931, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09759525954723358, | |
| "step": 1570, | |
| "valid_targets_mean": 4621.4, | |
| "valid_targets_min": 926 | |
| }, | |
| { | |
| "epoch": 2.9782403027436137, | |
| "grad_norm": 0.48690869973678, | |
| "learning_rate": 2.8456162901578487e-05, | |
| "loss": 0.2099, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11360782384872437, | |
| "step": 1575, | |
| "valid_targets_mean": 6146.0, | |
| "valid_targets_min": 4705 | |
| }, | |
| { | |
| "epoch": 2.987701040681173, | |
| "grad_norm": 0.4432563490002925, | |
| "learning_rate": 2.837062592518933e-05, | |
| "loss": 0.2009, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09611258655786514, | |
| "step": 1580, | |
| "valid_targets_mean": 6504.1, | |
| "valid_targets_min": 3859 | |
| }, | |
| { | |
| "epoch": 2.9971617786187323, | |
| "grad_norm": 0.4504000335980572, | |
| "learning_rate": 2.82849029171243e-05, | |
| "loss": 0.1988, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0998939573764801, | |
| "step": 1585, | |
| "valid_targets_mean": 5823.0, | |
| "valid_targets_min": 2563 | |
| }, | |
| { | |
| "epoch": 3.0056764427625353, | |
| "grad_norm": 0.42152223908884334, | |
| "learning_rate": 2.8198995782521077e-05, | |
| "loss": 0.1837, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09116984158754349, | |
| "step": 1590, | |
| "valid_targets_mean": 5818.8, | |
| "valid_targets_min": 2488 | |
| }, | |
| { | |
| "epoch": 3.0151371807000946, | |
| "grad_norm": 0.4506184282378153, | |
| "learning_rate": 2.8112906430609422e-05, | |
| "loss": 0.1913, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08673819899559021, | |
| "step": 1595, | |
| "valid_targets_mean": 6112.6, | |
| "valid_targets_min": 802 | |
| }, | |
| { | |
| "epoch": 3.024597918637654, | |
| "grad_norm": 0.4670333283993799, | |
| "learning_rate": 2.8026636774668783e-05, | |
| "loss": 0.1888, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09848645329475403, | |
| "step": 1600, | |
| "valid_targets_mean": 5506.5, | |
| "valid_targets_min": 1044 | |
| }, | |
| { | |
| "epoch": 3.034058656575213, | |
| "grad_norm": 0.47281036714970165, | |
| "learning_rate": 2.794018873198572e-05, | |
| "loss": 0.1868, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08072401583194733, | |
| "step": 1605, | |
| "valid_targets_mean": 5369.8, | |
| "valid_targets_min": 3436 | |
| }, | |
| { | |
| "epoch": 3.043519394512772, | |
| "grad_norm": 0.5353102435511196, | |
| "learning_rate": 2.7853564223811335e-05, | |
| "loss": 0.1985, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12510089576244354, | |
| "step": 1610, | |
| "valid_targets_mean": 6217.0, | |
| "valid_targets_min": 1687 | |
| }, | |
| { | |
| "epoch": 3.052980132450331, | |
| "grad_norm": 0.45973494835215695, | |
| "learning_rate": 2.776676517531856e-05, | |
| "loss": 0.1971, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09097345173358917, | |
| "step": 1615, | |
| "valid_targets_mean": 5983.0, | |
| "valid_targets_min": 3373 | |
| }, | |
| { | |
| "epoch": 3.0624408703878903, | |
| "grad_norm": 0.4445904803046461, | |
| "learning_rate": 2.7679793515559353e-05, | |
| "loss": 0.186, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09214864671230316, | |
| "step": 1620, | |
| "valid_targets_mean": 6430.4, | |
| "valid_targets_min": 3760 | |
| }, | |
| { | |
| "epoch": 3.0719016083254496, | |
| "grad_norm": 0.45328388544245035, | |
| "learning_rate": 2.759265117742188e-05, | |
| "loss": 0.1837, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08195392042398453, | |
| "step": 1625, | |
| "valid_targets_mean": 6079.6, | |
| "valid_targets_min": 1854 | |
| }, | |
| { | |
| "epoch": 3.0813623462630084, | |
| "grad_norm": 0.4690831711406225, | |
| "learning_rate": 2.7505340097587488e-05, | |
| "loss": 0.1961, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07273884117603302, | |
| "step": 1630, | |
| "valid_targets_mean": 4507.9, | |
| "valid_targets_min": 1597 | |
| }, | |
| { | |
| "epoch": 3.0908230842005677, | |
| "grad_norm": 0.4982006186841341, | |
| "learning_rate": 2.741786221648771e-05, | |
| "loss": 0.1907, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09364055097103119, | |
| "step": 1635, | |
| "valid_targets_mean": 5213.9, | |
| "valid_targets_min": 2643 | |
| }, | |
| { | |
| "epoch": 3.1002838221381266, | |
| "grad_norm": 0.46695428152371093, | |
| "learning_rate": 2.7330219478261138e-05, | |
| "loss": 0.1792, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09143832325935364, | |
| "step": 1640, | |
| "valid_targets_mean": 5311.4, | |
| "valid_targets_min": 3182 | |
| }, | |
| { | |
| "epoch": 3.109744560075686, | |
| "grad_norm": 0.49109477671766877, | |
| "learning_rate": 2.724241383071019e-05, | |
| "loss": 0.1809, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10723137855529785, | |
| "step": 1645, | |
| "valid_targets_mean": 5124.5, | |
| "valid_targets_min": 2652 | |
| }, | |
| { | |
| "epoch": 3.119205298013245, | |
| "grad_norm": 0.5290442016673559, | |
| "learning_rate": 2.7154447225257842e-05, | |
| "loss": 0.1987, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09807565808296204, | |
| "step": 1650, | |
| "valid_targets_mean": 5157.8, | |
| "valid_targets_min": 3662 | |
| }, | |
| { | |
| "epoch": 3.128666035950804, | |
| "grad_norm": 0.48302398328264, | |
| "learning_rate": 2.706632161690426e-05, | |
| "loss": 0.1799, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09389762580394745, | |
| "step": 1655, | |
| "valid_targets_mean": 5592.8, | |
| "valid_targets_min": 1365 | |
| }, | |
| { | |
| "epoch": 3.1381267738883634, | |
| "grad_norm": 0.5462595291125024, | |
| "learning_rate": 2.697803896418334e-05, | |
| "loss": 0.1736, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10734741389751434, | |
| "step": 1660, | |
| "valid_targets_mean": 7472.8, | |
| "valid_targets_min": 4511 | |
| }, | |
| { | |
| "epoch": 3.1475875118259222, | |
| "grad_norm": 0.45299896030231057, | |
| "learning_rate": 2.688960122911918e-05, | |
| "loss": 0.1897, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08676080405712128, | |
| "step": 1665, | |
| "valid_targets_mean": 5139.8, | |
| "valid_targets_min": 1530 | |
| }, | |
| { | |
| "epoch": 3.1570482497634815, | |
| "grad_norm": 0.46794779615343984, | |
| "learning_rate": 2.6801010377182498e-05, | |
| "loss": 0.1892, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08032511174678802, | |
| "step": 1670, | |
| "valid_targets_mean": 5465.9, | |
| "valid_targets_min": 3171 | |
| }, | |
| { | |
| "epoch": 3.166508987701041, | |
| "grad_norm": 0.4856091599422471, | |
| "learning_rate": 2.6712268377246913e-05, | |
| "loss": 0.1922, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0756412222981453, | |
| "step": 1675, | |
| "valid_targets_mean": 5436.9, | |
| "valid_targets_min": 627 | |
| }, | |
| { | |
| "epoch": 3.1759697256385997, | |
| "grad_norm": 0.47735489114418606, | |
| "learning_rate": 2.6623377201545232e-05, | |
| "loss": 0.1931, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09691169857978821, | |
| "step": 1680, | |
| "valid_targets_mean": 5656.2, | |
| "valid_targets_min": 2768 | |
| }, | |
| { | |
| "epoch": 3.185430463576159, | |
| "grad_norm": 0.648435722014724, | |
| "learning_rate": 2.6534338825625577e-05, | |
| "loss": 0.1871, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10382549464702606, | |
| "step": 1685, | |
| "valid_targets_mean": 4963.2, | |
| "valid_targets_min": 2124 | |
| }, | |
| { | |
| "epoch": 3.194891201513718, | |
| "grad_norm": 0.4544568017139005, | |
| "learning_rate": 2.644515522830751e-05, | |
| "loss": 0.1816, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10306219756603241, | |
| "step": 1690, | |
| "valid_targets_mean": 6162.6, | |
| "valid_targets_min": 3162 | |
| }, | |
| { | |
| "epoch": 3.204351939451277, | |
| "grad_norm": 0.4291214030564758, | |
| "learning_rate": 2.6355828391638036e-05, | |
| "loss": 0.1868, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09292907267808914, | |
| "step": 1695, | |
| "valid_targets_mean": 5851.0, | |
| "valid_targets_min": 2481 | |
| }, | |
| { | |
| "epoch": 3.2138126773888365, | |
| "grad_norm": 0.45738947684126524, | |
| "learning_rate": 2.6266360300847563e-05, | |
| "loss": 0.1822, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1131470575928688, | |
| "step": 1700, | |
| "valid_targets_mean": 6184.0, | |
| "valid_targets_min": 4281 | |
| }, | |
| { | |
| "epoch": 3.2232734153263953, | |
| "grad_norm": 0.47467928987027785, | |
| "learning_rate": 2.6176752944305783e-05, | |
| "loss": 0.1796, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08909299969673157, | |
| "step": 1705, | |
| "valid_targets_mean": 6111.5, | |
| "valid_targets_min": 1841 | |
| }, | |
| { | |
| "epoch": 3.2327341532639546, | |
| "grad_norm": 0.5220218655815788, | |
| "learning_rate": 2.6087008313477466e-05, | |
| "loss": 0.1914, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10415120422840118, | |
| "step": 1710, | |
| "valid_targets_mean": 5395.8, | |
| "valid_targets_min": 2458 | |
| }, | |
| { | |
| "epoch": 3.242194891201514, | |
| "grad_norm": 0.4495107583419274, | |
| "learning_rate": 2.5997128402878233e-05, | |
| "loss": 0.1863, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09865536540746689, | |
| "step": 1715, | |
| "valid_targets_mean": 6275.6, | |
| "valid_targets_min": 869 | |
| }, | |
| { | |
| "epoch": 3.251655629139073, | |
| "grad_norm": 0.49558805488934193, | |
| "learning_rate": 2.5907115210030197e-05, | |
| "loss": 0.1869, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09733089804649353, | |
| "step": 1720, | |
| "valid_targets_mean": 5592.8, | |
| "valid_targets_min": 2553 | |
| }, | |
| { | |
| "epoch": 3.261116367076632, | |
| "grad_norm": 0.4876674508910489, | |
| "learning_rate": 2.5816970735417578e-05, | |
| "loss": 0.1883, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07756301015615463, | |
| "step": 1725, | |
| "valid_targets_mean": 5107.4, | |
| "valid_targets_min": 767 | |
| }, | |
| { | |
| "epoch": 3.270577105014191, | |
| "grad_norm": 0.46350530049832817, | |
| "learning_rate": 2.5726696982442258e-05, | |
| "loss": 0.1823, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10111121833324432, | |
| "step": 1730, | |
| "valid_targets_mean": 7226.6, | |
| "valid_targets_min": 1849 | |
| }, | |
| { | |
| "epoch": 3.2800378429517503, | |
| "grad_norm": 0.47528950500021455, | |
| "learning_rate": 2.5636295957379233e-05, | |
| "loss": 0.1959, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09195686876773834, | |
| "step": 1735, | |
| "valid_targets_mean": 4867.9, | |
| "valid_targets_min": 3253 | |
| }, | |
| { | |
| "epoch": 3.289498580889309, | |
| "grad_norm": 0.40763971058722337, | |
| "learning_rate": 2.554576966933205e-05, | |
| "loss": 0.1824, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07847525179386139, | |
| "step": 1740, | |
| "valid_targets_mean": 6001.9, | |
| "valid_targets_min": 3797 | |
| }, | |
| { | |
| "epoch": 3.2989593188268684, | |
| "grad_norm": 0.5034722798936512, | |
| "learning_rate": 2.5455120130188135e-05, | |
| "loss": 0.1707, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08941084891557693, | |
| "step": 1745, | |
| "valid_targets_mean": 5322.4, | |
| "valid_targets_min": 2052 | |
| }, | |
| { | |
| "epoch": 3.3084200567644277, | |
| "grad_norm": 0.5247106920229991, | |
| "learning_rate": 2.5364349354574088e-05, | |
| "loss": 0.1962, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1243121474981308, | |
| "step": 1750, | |
| "valid_targets_mean": 6789.8, | |
| "valid_targets_min": 4550 | |
| }, | |
| { | |
| "epoch": 3.3178807947019866, | |
| "grad_norm": 0.4698194956877122, | |
| "learning_rate": 2.527345935981093e-05, | |
| "loss": 0.1881, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07875726372003555, | |
| "step": 1755, | |
| "valid_targets_mean": 4845.9, | |
| "valid_targets_min": 3364 | |
| }, | |
| { | |
| "epoch": 3.327341532639546, | |
| "grad_norm": 0.48545938207903017, | |
| "learning_rate": 2.5182452165869228e-05, | |
| "loss": 0.1872, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10978685319423676, | |
| "step": 1760, | |
| "valid_targets_mean": 4839.4, | |
| "valid_targets_min": 2004 | |
| }, | |
| { | |
| "epoch": 3.336802270577105, | |
| "grad_norm": 0.5109977977924978, | |
| "learning_rate": 2.5091329795324216e-05, | |
| "loss": 0.1923, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12365156412124634, | |
| "step": 1765, | |
| "valid_targets_mean": 5637.5, | |
| "valid_targets_min": 1829 | |
| }, | |
| { | |
| "epoch": 3.346263008514664, | |
| "grad_norm": 0.4419123564130073, | |
| "learning_rate": 2.500009427331088e-05, | |
| "loss": 0.1825, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07800058275461197, | |
| "step": 1770, | |
| "valid_targets_mean": 5704.9, | |
| "valid_targets_min": 3640 | |
| }, | |
| { | |
| "epoch": 3.3557237464522234, | |
| "grad_norm": 0.45652225709926536, | |
| "learning_rate": 2.4908747627478907e-05, | |
| "loss": 0.175, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08916126191616058, | |
| "step": 1775, | |
| "valid_targets_mean": 5361.8, | |
| "valid_targets_min": 1670 | |
| }, | |
| { | |
| "epoch": 3.3651844843897822, | |
| "grad_norm": 0.4662575708880977, | |
| "learning_rate": 2.481729188794764e-05, | |
| "loss": 0.1833, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08234363794326782, | |
| "step": 1780, | |
| "valid_targets_mean": 5800.2, | |
| "valid_targets_min": 3387 | |
| }, | |
| { | |
| "epoch": 3.3746452223273415, | |
| "grad_norm": 0.46065275741497946, | |
| "learning_rate": 2.472572908726096e-05, | |
| "loss": 0.1782, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09126449376344681, | |
| "step": 1785, | |
| "valid_targets_mean": 6144.8, | |
| "valid_targets_min": 3222 | |
| }, | |
| { | |
| "epoch": 3.384105960264901, | |
| "grad_norm": 0.47805331428588804, | |
| "learning_rate": 2.4634061260342107e-05, | |
| "loss": 0.1892, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09208405017852783, | |
| "step": 1790, | |
| "valid_targets_mean": 5238.8, | |
| "valid_targets_min": 3058 | |
| }, | |
| { | |
| "epoch": 3.3935666982024597, | |
| "grad_norm": 0.4669551990683149, | |
| "learning_rate": 2.4542290444448474e-05, | |
| "loss": 0.1824, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09105228632688522, | |
| "step": 1795, | |
| "valid_targets_mean": 5250.6, | |
| "valid_targets_min": 1781 | |
| }, | |
| { | |
| "epoch": 3.403027436140019, | |
| "grad_norm": 0.4590860719695206, | |
| "learning_rate": 2.445041867912629e-05, | |
| "loss": 0.1873, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09819218516349792, | |
| "step": 1800, | |
| "valid_targets_mean": 6047.4, | |
| "valid_targets_min": 4673 | |
| }, | |
| { | |
| "epoch": 3.412488174077578, | |
| "grad_norm": 0.4362989208375527, | |
| "learning_rate": 2.4358448006165345e-05, | |
| "loss": 0.1818, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08421623706817627, | |
| "step": 1805, | |
| "valid_targets_mean": 6545.9, | |
| "valid_targets_min": 4508 | |
| }, | |
| { | |
| "epoch": 3.421948912015137, | |
| "grad_norm": 0.48268909107786695, | |
| "learning_rate": 2.4266380469553586e-05, | |
| "loss": 0.1859, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08917456865310669, | |
| "step": 1810, | |
| "valid_targets_mean": 5196.9, | |
| "valid_targets_min": 3762 | |
| }, | |
| { | |
| "epoch": 3.4314096499526965, | |
| "grad_norm": 0.4149350257700459, | |
| "learning_rate": 2.4174218115431664e-05, | |
| "loss": 0.1764, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07774756103754044, | |
| "step": 1815, | |
| "valid_targets_mean": 4807.4, | |
| "valid_targets_min": 981 | |
| }, | |
| { | |
| "epoch": 3.4408703878902553, | |
| "grad_norm": 0.45130542433765514, | |
| "learning_rate": 2.408196299204751e-05, | |
| "loss": 0.1981, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09889756143093109, | |
| "step": 1820, | |
| "valid_targets_mean": 7054.2, | |
| "valid_targets_min": 4049 | |
| }, | |
| { | |
| "epoch": 3.4503311258278146, | |
| "grad_norm": 0.4989612810684145, | |
| "learning_rate": 2.3989617149710795e-05, | |
| "loss": 0.1865, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0977402999997139, | |
| "step": 1825, | |
| "valid_targets_mean": 5030.0, | |
| "valid_targets_min": 1329 | |
| }, | |
| { | |
| "epoch": 3.4597918637653735, | |
| "grad_norm": 0.42867511962488714, | |
| "learning_rate": 2.3897182640747336e-05, | |
| "loss": 0.1718, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08124013245105743, | |
| "step": 1830, | |
| "valid_targets_mean": 5519.9, | |
| "valid_targets_min": 3561 | |
| }, | |
| { | |
| "epoch": 3.469252601702933, | |
| "grad_norm": 0.4419227173372084, | |
| "learning_rate": 2.3804661519453532e-05, | |
| "loss": 0.1834, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07726781070232391, | |
| "step": 1835, | |
| "valid_targets_mean": 5114.1, | |
| "valid_targets_min": 2530 | |
| }, | |
| { | |
| "epoch": 3.478713339640492, | |
| "grad_norm": 0.4542904237834484, | |
| "learning_rate": 2.3712055842050676e-05, | |
| "loss": 0.1983, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11132301390171051, | |
| "step": 1840, | |
| "valid_targets_mean": 6203.1, | |
| "valid_targets_min": 3088 | |
| }, | |
| { | |
| "epoch": 3.488174077578051, | |
| "grad_norm": 0.5030994582312769, | |
| "learning_rate": 2.3619367666639256e-05, | |
| "loss": 0.1837, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09856734424829483, | |
| "step": 1845, | |
| "valid_targets_mean": 4904.2, | |
| "valid_targets_min": 2168 | |
| }, | |
| { | |
| "epoch": 3.4976348155156103, | |
| "grad_norm": 0.4591629662594089, | |
| "learning_rate": 2.3526599053153235e-05, | |
| "loss": 0.1897, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10106822848320007, | |
| "step": 1850, | |
| "valid_targets_mean": 5359.8, | |
| "valid_targets_min": 1166 | |
| }, | |
| { | |
| "epoch": 3.507095553453169, | |
| "grad_norm": 0.4555316025566089, | |
| "learning_rate": 2.3433752063314254e-05, | |
| "loss": 0.1833, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0851808488368988, | |
| "step": 1855, | |
| "valid_targets_mean": 5974.2, | |
| "valid_targets_min": 1865 | |
| }, | |
| { | |
| "epoch": 3.5165562913907285, | |
| "grad_norm": 0.5024235368504653, | |
| "learning_rate": 2.3340828760585827e-05, | |
| "loss": 0.1855, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08733783662319183, | |
| "step": 1860, | |
| "valid_targets_mean": 5503.6, | |
| "valid_targets_min": 1538 | |
| }, | |
| { | |
| "epoch": 3.5260170293282878, | |
| "grad_norm": 0.4567600934191136, | |
| "learning_rate": 2.3247831210127454e-05, | |
| "loss": 0.1773, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08546096086502075, | |
| "step": 1865, | |
| "valid_targets_mean": 5547.2, | |
| "valid_targets_min": 3440 | |
| }, | |
| { | |
| "epoch": 3.5354777672658466, | |
| "grad_norm": 0.4245415652272897, | |
| "learning_rate": 2.3154761478748752e-05, | |
| "loss": 0.1755, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08123698830604553, | |
| "step": 1870, | |
| "valid_targets_mean": 6187.4, | |
| "valid_targets_min": 2815 | |
| }, | |
| { | |
| "epoch": 3.544938505203406, | |
| "grad_norm": 0.4365000880114343, | |
| "learning_rate": 2.3061621634863524e-05, | |
| "loss": 0.1861, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10243307054042816, | |
| "step": 1875, | |
| "valid_targets_mean": 6236.2, | |
| "valid_targets_min": 2980 | |
| }, | |
| { | |
| "epoch": 3.5543992431409652, | |
| "grad_norm": 0.3915829839381538, | |
| "learning_rate": 2.296841374844375e-05, | |
| "loss": 0.1798, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07283614575862885, | |
| "step": 1880, | |
| "valid_targets_mean": 6081.2, | |
| "valid_targets_min": 4115 | |
| }, | |
| { | |
| "epoch": 3.563859981078524, | |
| "grad_norm": 0.529852157820633, | |
| "learning_rate": 2.287513989097364e-05, | |
| "loss": 0.1856, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07238370925188065, | |
| "step": 1885, | |
| "valid_targets_mean": 3611.2, | |
| "valid_targets_min": 1075 | |
| }, | |
| { | |
| "epoch": 3.5733207190160834, | |
| "grad_norm": 0.4293563741817337, | |
| "learning_rate": 2.2781802135403537e-05, | |
| "loss": 0.1872, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11047909408807755, | |
| "step": 1890, | |
| "valid_targets_mean": 7641.8, | |
| "valid_targets_min": 5269 | |
| }, | |
| { | |
| "epoch": 3.5827814569536423, | |
| "grad_norm": 0.47152119543132176, | |
| "learning_rate": 2.2688402556103906e-05, | |
| "loss": 0.1967, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09570799767971039, | |
| "step": 1895, | |
| "valid_targets_mean": 5092.1, | |
| "valid_targets_min": 2448 | |
| }, | |
| { | |
| "epoch": 3.5922421948912016, | |
| "grad_norm": 0.45350737772342575, | |
| "learning_rate": 2.2594943228819202e-05, | |
| "loss": 0.1862, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08095633238554001, | |
| "step": 1900, | |
| "valid_targets_mean": 5481.2, | |
| "valid_targets_min": 1967 | |
| }, | |
| { | |
| "epoch": 3.6017029328287604, | |
| "grad_norm": 0.4755232122518845, | |
| "learning_rate": 2.2501426230621703e-05, | |
| "loss": 0.183, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10834428668022156, | |
| "step": 1905, | |
| "valid_targets_mean": 5960.0, | |
| "valid_targets_min": 1879 | |
| }, | |
| { | |
| "epoch": 3.6111636707663197, | |
| "grad_norm": 0.45128717519347217, | |
| "learning_rate": 2.240785363986543e-05, | |
| "loss": 0.1919, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09623943269252777, | |
| "step": 1910, | |
| "valid_targets_mean": 5373.8, | |
| "valid_targets_min": 1537 | |
| }, | |
| { | |
| "epoch": 3.620624408703879, | |
| "grad_norm": 0.4387093767351713, | |
| "learning_rate": 2.2314227536139893e-05, | |
| "loss": 0.1839, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09231283515691757, | |
| "step": 1915, | |
| "valid_targets_mean": 6112.9, | |
| "valid_targets_min": 4051 | |
| }, | |
| { | |
| "epoch": 3.630085146641438, | |
| "grad_norm": 0.4402931522259467, | |
| "learning_rate": 2.2220550000223886e-05, | |
| "loss": 0.1924, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0882110446691513, | |
| "step": 1920, | |
| "valid_targets_mean": 6283.5, | |
| "valid_targets_min": 1538 | |
| }, | |
| { | |
| "epoch": 3.639545884578997, | |
| "grad_norm": 0.5398531884274245, | |
| "learning_rate": 2.212682311403926e-05, | |
| "loss": 0.1909, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10761941969394684, | |
| "step": 1925, | |
| "valid_targets_mean": 4044.2, | |
| "valid_targets_min": 1596 | |
| }, | |
| { | |
| "epoch": 3.6490066225165565, | |
| "grad_norm": 0.4565238432681483, | |
| "learning_rate": 2.2033048960604648e-05, | |
| "loss": 0.1862, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09304339438676834, | |
| "step": 1930, | |
| "valid_targets_mean": 6390.2, | |
| "valid_targets_min": 4754 | |
| }, | |
| { | |
| "epoch": 3.6584673604541154, | |
| "grad_norm": 0.44850280991108504, | |
| "learning_rate": 2.1939229623989146e-05, | |
| "loss": 0.1906, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11164131760597229, | |
| "step": 1935, | |
| "valid_targets_mean": 6644.2, | |
| "valid_targets_min": 1349 | |
| }, | |
| { | |
| "epoch": 3.6679280983916747, | |
| "grad_norm": 0.4640262426777603, | |
| "learning_rate": 2.184536718926604e-05, | |
| "loss": 0.192, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09215334802865982, | |
| "step": 1940, | |
| "valid_targets_mean": 5631.9, | |
| "valid_targets_min": 3043 | |
| }, | |
| { | |
| "epoch": 3.6773888363292335, | |
| "grad_norm": 0.46824021321893416, | |
| "learning_rate": 2.1751463742466437e-05, | |
| "loss": 0.1817, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0910329818725586, | |
| "step": 1945, | |
| "valid_targets_mean": 5201.4, | |
| "valid_targets_min": 3511 | |
| }, | |
| { | |
| "epoch": 3.686849574266793, | |
| "grad_norm": 0.4431506823294413, | |
| "learning_rate": 2.1657521370532897e-05, | |
| "loss": 0.1907, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08795459568500519, | |
| "step": 1950, | |
| "valid_targets_mean": 5397.9, | |
| "valid_targets_min": 3132 | |
| }, | |
| { | |
| "epoch": 3.6963103122043517, | |
| "grad_norm": 0.4633802916301476, | |
| "learning_rate": 2.1563542161273077e-05, | |
| "loss": 0.1873, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12115620076656342, | |
| "step": 1955, | |
| "valid_targets_mean": 6784.1, | |
| "valid_targets_min": 5711 | |
| }, | |
| { | |
| "epoch": 3.705771050141911, | |
| "grad_norm": 0.524813702266504, | |
| "learning_rate": 2.146952820331332e-05, | |
| "loss": 0.1865, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0889645665884018, | |
| "step": 1960, | |
| "valid_targets_mean": 4261.2, | |
| "valid_targets_min": 1085 | |
| }, | |
| { | |
| "epoch": 3.7152317880794703, | |
| "grad_norm": 0.5179766333042282, | |
| "learning_rate": 2.1375481586052237e-05, | |
| "loss": 0.1968, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15775936841964722, | |
| "step": 1965, | |
| "valid_targets_mean": 5552.2, | |
| "valid_targets_min": 1842 | |
| }, | |
| { | |
| "epoch": 3.724692526017029, | |
| "grad_norm": 0.43519671742917404, | |
| "learning_rate": 2.128140439961426e-05, | |
| "loss": 0.179, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07054340094327927, | |
| "step": 1970, | |
| "valid_targets_mean": 5188.1, | |
| "valid_targets_min": 2924 | |
| }, | |
| { | |
| "epoch": 3.7341532639545885, | |
| "grad_norm": 0.4910575570595109, | |
| "learning_rate": 2.1187298734803214e-05, | |
| "loss": 0.1815, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08431971073150635, | |
| "step": 1975, | |
| "valid_targets_mean": 4949.9, | |
| "valid_targets_min": 797 | |
| }, | |
| { | |
| "epoch": 3.7436140018921478, | |
| "grad_norm": 0.4630093421088686, | |
| "learning_rate": 2.1093166683055832e-05, | |
| "loss": 0.1853, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11314146220684052, | |
| "step": 1980, | |
| "valid_targets_mean": 7022.4, | |
| "valid_targets_min": 5342 | |
| }, | |
| { | |
| "epoch": 3.7530747398297066, | |
| "grad_norm": 0.535517789749373, | |
| "learning_rate": 2.0999010336395273e-05, | |
| "loss": 0.1959, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09282414615154266, | |
| "step": 1985, | |
| "valid_targets_mean": 6285.4, | |
| "valid_targets_min": 2106 | |
| }, | |
| { | |
| "epoch": 3.762535477767266, | |
| "grad_norm": 0.5571000940662704, | |
| "learning_rate": 2.0904831787384645e-05, | |
| "loss": 0.1791, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07217632234096527, | |
| "step": 1990, | |
| "valid_targets_mean": 6126.5, | |
| "valid_targets_min": 3404 | |
| }, | |
| { | |
| "epoch": 3.7719962157048252, | |
| "grad_norm": 0.4268240494799538, | |
| "learning_rate": 2.081063312908049e-05, | |
| "loss": 0.1757, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07058891654014587, | |
| "step": 1995, | |
| "valid_targets_mean": 5600.5, | |
| "valid_targets_min": 3873 | |
| }, | |
| { | |
| "epoch": 3.781456953642384, | |
| "grad_norm": 0.5099795351473646, | |
| "learning_rate": 2.0716416454986242e-05, | |
| "loss": 0.2006, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10760383307933807, | |
| "step": 2000, | |
| "valid_targets_mean": 5524.0, | |
| "valid_targets_min": 2901 | |
| }, | |
| { | |
| "epoch": 3.790917691579943, | |
| "grad_norm": 0.5097294694697546, | |
| "learning_rate": 2.0622183859005762e-05, | |
| "loss": 0.1955, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0837247222661972, | |
| "step": 2005, | |
| "valid_targets_mean": 5495.4, | |
| "valid_targets_min": 3572 | |
| }, | |
| { | |
| "epoch": 3.8003784295175023, | |
| "grad_norm": 0.4534635455267388, | |
| "learning_rate": 2.052793743539673e-05, | |
| "loss": 0.1847, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08845619112253189, | |
| "step": 2010, | |
| "valid_targets_mean": 4920.2, | |
| "valid_targets_min": 1973 | |
| }, | |
| { | |
| "epoch": 3.8098391674550616, | |
| "grad_norm": 0.4737421696248978, | |
| "learning_rate": 2.043367927872416e-05, | |
| "loss": 0.1766, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08393751084804535, | |
| "step": 2015, | |
| "valid_targets_mean": 4830.4, | |
| "valid_targets_min": 2229 | |
| }, | |
| { | |
| "epoch": 3.8192999053926204, | |
| "grad_norm": 0.5249850580959375, | |
| "learning_rate": 2.0339411483813812e-05, | |
| "loss": 0.1975, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09723856300115585, | |
| "step": 2020, | |
| "valid_targets_mean": 6302.8, | |
| "valid_targets_min": 4072 | |
| }, | |
| { | |
| "epoch": 3.8287606433301797, | |
| "grad_norm": 0.4980980063470468, | |
| "learning_rate": 2.0245136145705648e-05, | |
| "loss": 0.1874, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09031252562999725, | |
| "step": 2025, | |
| "valid_targets_mean": 5300.4, | |
| "valid_targets_min": 874 | |
| }, | |
| { | |
| "epoch": 3.838221381267739, | |
| "grad_norm": 0.44214747777838764, | |
| "learning_rate": 2.0150855359607293e-05, | |
| "loss": 0.1824, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07880973070859909, | |
| "step": 2030, | |
| "valid_targets_mean": 5693.4, | |
| "valid_targets_min": 4152 | |
| }, | |
| { | |
| "epoch": 3.847682119205298, | |
| "grad_norm": 0.46991534430567616, | |
| "learning_rate": 2.0056571220847427e-05, | |
| "loss": 0.1896, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09112639725208282, | |
| "step": 2035, | |
| "valid_targets_mean": 5097.6, | |
| "valid_targets_min": 746 | |
| }, | |
| { | |
| "epoch": 3.857142857142857, | |
| "grad_norm": 0.4257490092767326, | |
| "learning_rate": 1.9962285824829245e-05, | |
| "loss": 0.1777, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08267439901828766, | |
| "step": 2040, | |
| "valid_targets_mean": 6324.6, | |
| "valid_targets_min": 3184 | |
| }, | |
| { | |
| "epoch": 3.8666035950804165, | |
| "grad_norm": 0.45072050304120914, | |
| "learning_rate": 1.986800126698389e-05, | |
| "loss": 0.1776, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09375502169132233, | |
| "step": 2045, | |
| "valid_targets_mean": 5897.2, | |
| "valid_targets_min": 4521 | |
| }, | |
| { | |
| "epoch": 3.8760643330179754, | |
| "grad_norm": 0.4256841534823553, | |
| "learning_rate": 1.9773719642723883e-05, | |
| "loss": 0.1882, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07670668512582779, | |
| "step": 2050, | |
| "valid_targets_mean": 6021.9, | |
| "valid_targets_min": 5247 | |
| }, | |
| { | |
| "epoch": 3.8855250709555347, | |
| "grad_norm": 0.46173000109780016, | |
| "learning_rate": 1.967944304739653e-05, | |
| "loss": 0.1892, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10058119893074036, | |
| "step": 2055, | |
| "valid_targets_mean": 6129.8, | |
| "valid_targets_min": 1951 | |
| }, | |
| { | |
| "epoch": 3.8949858088930935, | |
| "grad_norm": 0.49198637917271304, | |
| "learning_rate": 1.958517357623738e-05, | |
| "loss": 0.2021, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0934433713555336, | |
| "step": 2060, | |
| "valid_targets_mean": 4680.2, | |
| "valid_targets_min": 1565 | |
| }, | |
| { | |
| "epoch": 3.904446546830653, | |
| "grad_norm": 0.41090104211443634, | |
| "learning_rate": 1.949091332432367e-05, | |
| "loss": 0.1692, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10091197490692139, | |
| "step": 2065, | |
| "valid_targets_mean": 7233.2, | |
| "valid_targets_min": 5038 | |
| }, | |
| { | |
| "epoch": 3.9139072847682117, | |
| "grad_norm": 0.4804194009201903, | |
| "learning_rate": 1.939666438652772e-05, | |
| "loss": 0.1806, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08922461420297623, | |
| "step": 2070, | |
| "valid_targets_mean": 5340.9, | |
| "valid_targets_min": 950 | |
| }, | |
| { | |
| "epoch": 3.923368022705771, | |
| "grad_norm": 0.4806891457866739, | |
| "learning_rate": 1.9302428857470406e-05, | |
| "loss": 0.1951, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11550113558769226, | |
| "step": 2075, | |
| "valid_targets_mean": 6336.2, | |
| "valid_targets_min": 3086 | |
| }, | |
| { | |
| "epoch": 3.9328287606433303, | |
| "grad_norm": 0.4278376583395593, | |
| "learning_rate": 1.9208208831474618e-05, | |
| "loss": 0.1894, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08728399872779846, | |
| "step": 2080, | |
| "valid_targets_mean": 5975.9, | |
| "valid_targets_min": 2398 | |
| }, | |
| { | |
| "epoch": 3.942289498580889, | |
| "grad_norm": 0.46404606456299635, | |
| "learning_rate": 1.9114006402518676e-05, | |
| "loss": 0.1791, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07773632556200027, | |
| "step": 2085, | |
| "valid_targets_mean": 5166.8, | |
| "valid_targets_min": 1719 | |
| }, | |
| { | |
| "epoch": 3.9517502365184485, | |
| "grad_norm": 0.45528452379559725, | |
| "learning_rate": 1.901982366418985e-05, | |
| "loss": 0.1744, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09388162940740585, | |
| "step": 2090, | |
| "valid_targets_mean": 6227.4, | |
| "valid_targets_min": 4431 | |
| }, | |
| { | |
| "epoch": 3.961210974456008, | |
| "grad_norm": 0.438918695698072, | |
| "learning_rate": 1.892566270963777e-05, | |
| "loss": 0.1802, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08582787215709686, | |
| "step": 2095, | |
| "valid_targets_mean": 5624.0, | |
| "valid_targets_min": 501 | |
| }, | |
| { | |
| "epoch": 3.9706717123935666, | |
| "grad_norm": 0.44371795309461276, | |
| "learning_rate": 1.883152563152795e-05, | |
| "loss": 0.1898, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09151076525449753, | |
| "step": 2100, | |
| "valid_targets_mean": 5600.0, | |
| "valid_targets_min": 3358 | |
| }, | |
| { | |
| "epoch": 3.980132450331126, | |
| "grad_norm": 0.4515824110525669, | |
| "learning_rate": 1.8737414521995268e-05, | |
| "loss": 0.1947, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09049001336097717, | |
| "step": 2105, | |
| "valid_targets_mean": 5852.4, | |
| "valid_targets_min": 3330 | |
| }, | |
| { | |
| "epoch": 3.989593188268685, | |
| "grad_norm": 0.49711976335752656, | |
| "learning_rate": 1.8643331472597445e-05, | |
| "loss": 0.1941, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08675132691860199, | |
| "step": 2110, | |
| "valid_targets_mean": 4558.4, | |
| "valid_targets_min": 1326 | |
| }, | |
| { | |
| "epoch": 3.999053926206244, | |
| "grad_norm": 0.45261577331220815, | |
| "learning_rate": 1.8549278574268618e-05, | |
| "loss": 0.1903, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08939523249864578, | |
| "step": 2115, | |
| "valid_targets_mean": 6261.1, | |
| "valid_targets_min": 2221 | |
| }, | |
| { | |
| "epoch": 4.007568590350047, | |
| "grad_norm": 0.47605379171873335, | |
| "learning_rate": 1.8455257917272814e-05, | |
| "loss": 0.181, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07639576494693756, | |
| "step": 2120, | |
| "valid_targets_mean": 6170.1, | |
| "valid_targets_min": 4187 | |
| }, | |
| { | |
| "epoch": 4.017029328287607, | |
| "grad_norm": 0.47606983027063804, | |
| "learning_rate": 1.836127159115752e-05, | |
| "loss": 0.1709, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07952439785003662, | |
| "step": 2125, | |
| "valid_targets_mean": 5953.5, | |
| "valid_targets_min": 2745 | |
| }, | |
| { | |
| "epoch": 4.026490066225166, | |
| "grad_norm": 0.47621520504940396, | |
| "learning_rate": 1.8267321684707246e-05, | |
| "loss": 0.1636, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07942505925893784, | |
| "step": 2130, | |
| "valid_targets_mean": 5075.5, | |
| "valid_targets_min": 2145 | |
| }, | |
| { | |
| "epoch": 4.035950804162725, | |
| "grad_norm": 0.44019831135362675, | |
| "learning_rate": 1.817341028589709e-05, | |
| "loss": 0.1771, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10214540362358093, | |
| "step": 2135, | |
| "valid_targets_mean": 7163.8, | |
| "valid_targets_min": 2462 | |
| }, | |
| { | |
| "epoch": 4.045411542100283, | |
| "grad_norm": 0.4937271391982351, | |
| "learning_rate": 1.8079539481846366e-05, | |
| "loss": 0.1617, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07459040731191635, | |
| "step": 2140, | |
| "valid_targets_mean": 6227.9, | |
| "valid_targets_min": 3553 | |
| }, | |
| { | |
| "epoch": 4.054872280037843, | |
| "grad_norm": 0.4565164265520384, | |
| "learning_rate": 1.7985711358772165e-05, | |
| "loss": 0.1639, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07631679624319077, | |
| "step": 2145, | |
| "valid_targets_mean": 6136.8, | |
| "valid_targets_min": 4986 | |
| }, | |
| { | |
| "epoch": 4.064333017975402, | |
| "grad_norm": 0.4635680900045017, | |
| "learning_rate": 1.789192800194305e-05, | |
| "loss": 0.1662, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06983435899019241, | |
| "step": 2150, | |
| "valid_targets_mean": 5001.0, | |
| "valid_targets_min": 3895 | |
| }, | |
| { | |
| "epoch": 4.073793755912961, | |
| "grad_norm": 0.4728478107816903, | |
| "learning_rate": 1.7798191495632656e-05, | |
| "loss": 0.1719, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07341806590557098, | |
| "step": 2155, | |
| "valid_targets_mean": 6114.5, | |
| "valid_targets_min": 3650 | |
| }, | |
| { | |
| "epoch": 4.083254493850521, | |
| "grad_norm": 0.43092833708069367, | |
| "learning_rate": 1.7704503923073414e-05, | |
| "loss": 0.1574, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07183924317359924, | |
| "step": 2160, | |
| "valid_targets_mean": 5524.2, | |
| "valid_targets_min": 2899 | |
| }, | |
| { | |
| "epoch": 4.0927152317880795, | |
| "grad_norm": 0.44675377011480427, | |
| "learning_rate": 1.7610867366410228e-05, | |
| "loss": 0.1624, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08465928584337234, | |
| "step": 2165, | |
| "valid_targets_mean": 6738.9, | |
| "valid_targets_min": 4782 | |
| }, | |
| { | |
| "epoch": 4.102175969725638, | |
| "grad_norm": 0.5030878317134609, | |
| "learning_rate": 1.751728390665422e-05, | |
| "loss": 0.1816, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10309462249279022, | |
| "step": 2170, | |
| "valid_targets_mean": 6185.4, | |
| "valid_targets_min": 4371 | |
| }, | |
| { | |
| "epoch": 4.111636707663198, | |
| "grad_norm": 0.46070987441440847, | |
| "learning_rate": 1.742375562363645e-05, | |
| "loss": 0.1768, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0805835872888565, | |
| "step": 2175, | |
| "valid_targets_mean": 5687.2, | |
| "valid_targets_min": 1089 | |
| }, | |
| { | |
| "epoch": 4.121097445600757, | |
| "grad_norm": 0.4770082395886628, | |
| "learning_rate": 1.7330284595961714e-05, | |
| "loss": 0.1722, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07416245341300964, | |
| "step": 2180, | |
| "valid_targets_mean": 4618.5, | |
| "valid_targets_min": 1323 | |
| }, | |
| { | |
| "epoch": 4.130558183538316, | |
| "grad_norm": 0.5071152049171963, | |
| "learning_rate": 1.7236872900962364e-05, | |
| "loss": 0.176, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0895686000585556, | |
| "step": 2185, | |
| "valid_targets_mean": 5050.6, | |
| "valid_targets_min": 1848 | |
| }, | |
| { | |
| "epoch": 4.140018921475875, | |
| "grad_norm": 0.49903371784517364, | |
| "learning_rate": 1.7143522614652087e-05, | |
| "loss": 0.1712, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.079046830534935, | |
| "step": 2190, | |
| "valid_targets_mean": 6004.9, | |
| "valid_targets_min": 1044 | |
| }, | |
| { | |
| "epoch": 4.149479659413434, | |
| "grad_norm": 0.4735587488254344, | |
| "learning_rate": 1.7050235811679842e-05, | |
| "loss": 0.1615, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07874190807342529, | |
| "step": 2195, | |
| "valid_targets_mean": 5291.8, | |
| "valid_targets_min": 3722 | |
| }, | |
| { | |
| "epoch": 4.158940397350993, | |
| "grad_norm": 0.4906110256360143, | |
| "learning_rate": 1.6957014565283686e-05, | |
| "loss": 0.1655, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08479055762290955, | |
| "step": 2200, | |
| "valid_targets_mean": 5371.8, | |
| "valid_targets_min": 3449 | |
| }, | |
| { | |
| "epoch": 4.168401135288552, | |
| "grad_norm": 0.45624047631312886, | |
| "learning_rate": 1.6863860947244734e-05, | |
| "loss": 0.1737, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0816778838634491, | |
| "step": 2205, | |
| "valid_targets_mean": 6315.0, | |
| "valid_targets_min": 2799 | |
| }, | |
| { | |
| "epoch": 4.177861873226112, | |
| "grad_norm": 0.5699518768002438, | |
| "learning_rate": 1.67707770278411e-05, | |
| "loss": 0.1757, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10105790197849274, | |
| "step": 2210, | |
| "valid_targets_mean": 5623.0, | |
| "valid_targets_min": 1596 | |
| }, | |
| { | |
| "epoch": 4.187322611163671, | |
| "grad_norm": 0.5215881624136443, | |
| "learning_rate": 1.6677764875801896e-05, | |
| "loss": 0.1738, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08306705951690674, | |
| "step": 2215, | |
| "valid_targets_mean": 4635.6, | |
| "valid_targets_min": 2345 | |
| }, | |
| { | |
| "epoch": 4.19678334910123, | |
| "grad_norm": 0.51490913642158, | |
| "learning_rate": 1.658482655826125e-05, | |
| "loss": 0.1673, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08734717965126038, | |
| "step": 2220, | |
| "valid_targets_mean": 5146.8, | |
| "valid_targets_min": 807 | |
| }, | |
| { | |
| "epoch": 4.206244087038789, | |
| "grad_norm": 0.5117573321781882, | |
| "learning_rate": 1.649196414071237e-05, | |
| "loss": 0.1758, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08336880803108215, | |
| "step": 2225, | |
| "valid_targets_mean": 5012.2, | |
| "valid_targets_min": 3057 | |
| }, | |
| { | |
| "epoch": 4.215704824976348, | |
| "grad_norm": 0.47924671481016884, | |
| "learning_rate": 1.6399179686961626e-05, | |
| "loss": 0.172, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08556775748729706, | |
| "step": 2230, | |
| "valid_targets_mean": 6178.0, | |
| "valid_targets_min": 4463 | |
| }, | |
| { | |
| "epoch": 4.225165562913907, | |
| "grad_norm": 0.4866400336787381, | |
| "learning_rate": 1.630647525908271e-05, | |
| "loss": 0.1772, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08188188076019287, | |
| "step": 2235, | |
| "valid_targets_mean": 5382.4, | |
| "valid_targets_min": 3049 | |
| }, | |
| { | |
| "epoch": 4.234626300851467, | |
| "grad_norm": 0.4733351788699037, | |
| "learning_rate": 1.621385291737076e-05, | |
| "loss": 0.1769, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07345275580883026, | |
| "step": 2240, | |
| "valid_targets_mean": 4783.5, | |
| "valid_targets_min": 2041 | |
| }, | |
| { | |
| "epoch": 4.244087038789026, | |
| "grad_norm": 0.4767465652248829, | |
| "learning_rate": 1.6121314720296655e-05, | |
| "loss": 0.1664, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08385774493217468, | |
| "step": 2245, | |
| "valid_targets_mean": 5386.6, | |
| "valid_targets_min": 3619 | |
| }, | |
| { | |
| "epoch": 4.253547776726585, | |
| "grad_norm": 0.4789402563300116, | |
| "learning_rate": 1.6028862724461162e-05, | |
| "loss": 0.1741, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09487953782081604, | |
| "step": 2250, | |
| "valid_targets_mean": 5879.5, | |
| "valid_targets_min": 4405 | |
| }, | |
| { | |
| "epoch": 4.263008514664143, | |
| "grad_norm": 0.443105700964735, | |
| "learning_rate": 1.593649898454932e-05, | |
| "loss": 0.1654, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08754053711891174, | |
| "step": 2255, | |
| "valid_targets_mean": 5570.6, | |
| "valid_targets_min": 2637 | |
| }, | |
| { | |
| "epoch": 4.272469252601703, | |
| "grad_norm": 0.5369571580173819, | |
| "learning_rate": 1.5844225553284708e-05, | |
| "loss": 0.1694, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07940442860126495, | |
| "step": 2260, | |
| "valid_targets_mean": 3860.5, | |
| "valid_targets_min": 1223 | |
| }, | |
| { | |
| "epoch": 4.281929990539262, | |
| "grad_norm": 0.4378025466243674, | |
| "learning_rate": 1.5752044481383875e-05, | |
| "loss": 0.1744, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09036465734243393, | |
| "step": 2265, | |
| "valid_targets_mean": 6985.9, | |
| "valid_targets_min": 4865 | |
| }, | |
| { | |
| "epoch": 4.291390728476821, | |
| "grad_norm": 0.4820274642035886, | |
| "learning_rate": 1.565995781751073e-05, | |
| "loss": 0.1634, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06915217638015747, | |
| "step": 2270, | |
| "valid_targets_mean": 4996.8, | |
| "valid_targets_min": 3137 | |
| }, | |
| { | |
| "epoch": 4.300851466414381, | |
| "grad_norm": 0.48393554696939894, | |
| "learning_rate": 1.556796760823105e-05, | |
| "loss": 0.1772, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08670508116483688, | |
| "step": 2275, | |
| "valid_targets_mean": 5565.1, | |
| "valid_targets_min": 2756 | |
| }, | |
| { | |
| "epoch": 4.3103122043519395, | |
| "grad_norm": 0.48251450451372285, | |
| "learning_rate": 1.5476075897966943e-05, | |
| "loss": 0.1869, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09289459884166718, | |
| "step": 2280, | |
| "valid_targets_mean": 7153.2, | |
| "valid_targets_min": 5303 | |
| }, | |
| { | |
| "epoch": 4.319772942289498, | |
| "grad_norm": 0.4834522235548724, | |
| "learning_rate": 1.538428472895145e-05, | |
| "loss": 0.1758, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09639063477516174, | |
| "step": 2285, | |
| "valid_targets_mean": 6249.6, | |
| "valid_targets_min": 3680 | |
| }, | |
| { | |
| "epoch": 4.329233680227058, | |
| "grad_norm": 0.48082534237663743, | |
| "learning_rate": 1.5292596141183156e-05, | |
| "loss": 0.1734, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08183325827121735, | |
| "step": 2290, | |
| "valid_targets_mean": 5584.1, | |
| "valid_targets_min": 926 | |
| }, | |
| { | |
| "epoch": 4.338694418164617, | |
| "grad_norm": 0.5160950372006214, | |
| "learning_rate": 1.5201012172380834e-05, | |
| "loss": 0.1718, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07628428936004639, | |
| "step": 2295, | |
| "valid_targets_mean": 4932.9, | |
| "valid_targets_min": 802 | |
| }, | |
| { | |
| "epoch": 4.348155156102176, | |
| "grad_norm": 0.47312968698632135, | |
| "learning_rate": 1.5109534857938181e-05, | |
| "loss": 0.1698, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08713920414447784, | |
| "step": 2300, | |
| "valid_targets_mean": 5098.2, | |
| "valid_targets_min": 2424 | |
| }, | |
| { | |
| "epoch": 4.357615894039735, | |
| "grad_norm": 0.5690038403341784, | |
| "learning_rate": 1.501816623087857e-05, | |
| "loss": 0.1734, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07945723831653595, | |
| "step": 2305, | |
| "valid_targets_mean": 4997.6, | |
| "valid_targets_min": 627 | |
| }, | |
| { | |
| "epoch": 4.3670766319772945, | |
| "grad_norm": 0.497199283715766, | |
| "learning_rate": 1.4926908321809856e-05, | |
| "loss": 0.1778, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09394356608390808, | |
| "step": 2310, | |
| "valid_targets_mean": 4530.1, | |
| "valid_targets_min": 1636 | |
| }, | |
| { | |
| "epoch": 4.376537369914853, | |
| "grad_norm": 0.5548377322347214, | |
| "learning_rate": 1.4835763158879264e-05, | |
| "loss": 0.1745, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08540955185890198, | |
| "step": 2315, | |
| "valid_targets_mean": 6093.8, | |
| "valid_targets_min": 3232 | |
| }, | |
| { | |
| "epoch": 4.385998107852412, | |
| "grad_norm": 0.44852133106940606, | |
| "learning_rate": 1.474473276772831e-05, | |
| "loss": 0.1681, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06924474239349365, | |
| "step": 2320, | |
| "valid_targets_mean": 5439.8, | |
| "valid_targets_min": 1041 | |
| }, | |
| { | |
| "epoch": 4.395458845789972, | |
| "grad_norm": 0.46438627822043954, | |
| "learning_rate": 1.4653819171447802e-05, | |
| "loss": 0.1911, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06627480685710907, | |
| "step": 2325, | |
| "valid_targets_mean": 5166.6, | |
| "valid_targets_min": 2940 | |
| }, | |
| { | |
| "epoch": 4.404919583727531, | |
| "grad_norm": 0.47334974760463755, | |
| "learning_rate": 1.4563024390532828e-05, | |
| "loss": 0.1711, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07604990899562836, | |
| "step": 2330, | |
| "valid_targets_mean": 5193.4, | |
| "valid_targets_min": 3623 | |
| }, | |
| { | |
| "epoch": 4.41438032166509, | |
| "grad_norm": 0.5009910260511469, | |
| "learning_rate": 1.4472350442837892e-05, | |
| "loss": 0.1599, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09670796990394592, | |
| "step": 2335, | |
| "valid_targets_mean": 5357.4, | |
| "valid_targets_min": 2511 | |
| }, | |
| { | |
| "epoch": 4.423841059602649, | |
| "grad_norm": 0.46984143631798586, | |
| "learning_rate": 1.4381799343532073e-05, | |
| "loss": 0.1702, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0717625617980957, | |
| "step": 2340, | |
| "valid_targets_mean": 5341.1, | |
| "valid_targets_min": 2983 | |
| }, | |
| { | |
| "epoch": 4.433301797540208, | |
| "grad_norm": 0.513188568847778, | |
| "learning_rate": 1.4291373105054201e-05, | |
| "loss": 0.1742, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09518257528543472, | |
| "step": 2345, | |
| "valid_targets_mean": 5414.4, | |
| "valid_targets_min": 2589 | |
| }, | |
| { | |
| "epoch": 4.442762535477767, | |
| "grad_norm": 0.5036887196599628, | |
| "learning_rate": 1.4201073737068182e-05, | |
| "loss": 0.1697, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08094947040081024, | |
| "step": 2350, | |
| "valid_targets_mean": 4974.0, | |
| "valid_targets_min": 2419 | |
| }, | |
| { | |
| "epoch": 4.452223273415326, | |
| "grad_norm": 0.4912192592188947, | |
| "learning_rate": 1.411090324641829e-05, | |
| "loss": 0.1714, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07559478282928467, | |
| "step": 2355, | |
| "valid_targets_mean": 5369.4, | |
| "valid_targets_min": 1880 | |
| }, | |
| { | |
| "epoch": 4.461684011352886, | |
| "grad_norm": 0.44942292554051916, | |
| "learning_rate": 1.4020863637084597e-05, | |
| "loss": 0.1727, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08044009655714035, | |
| "step": 2360, | |
| "valid_targets_mean": 6921.1, | |
| "valid_targets_min": 3977 | |
| }, | |
| { | |
| "epoch": 4.471144749290445, | |
| "grad_norm": 0.5240965033441205, | |
| "learning_rate": 1.3930956910138407e-05, | |
| "loss": 0.1766, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09544815123081207, | |
| "step": 2365, | |
| "valid_targets_mean": 5657.5, | |
| "valid_targets_min": 1822 | |
| }, | |
| { | |
| "epoch": 4.4806054872280034, | |
| "grad_norm": 0.48796641053338774, | |
| "learning_rate": 1.3841185063697817e-05, | |
| "loss": 0.166, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09811213612556458, | |
| "step": 2370, | |
| "valid_targets_mean": 6156.5, | |
| "valid_targets_min": 1085 | |
| }, | |
| { | |
| "epoch": 4.490066225165563, | |
| "grad_norm": 0.5152407926424893, | |
| "learning_rate": 1.3751550092883275e-05, | |
| "loss": 0.1699, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08462321758270264, | |
| "step": 2375, | |
| "valid_targets_mean": 4567.0, | |
| "valid_targets_min": 1628 | |
| }, | |
| { | |
| "epoch": 4.499526963103122, | |
| "grad_norm": 0.42297714903267936, | |
| "learning_rate": 1.366205398977329e-05, | |
| "loss": 0.1662, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08978421986103058, | |
| "step": 2380, | |
| "valid_targets_mean": 6711.9, | |
| "valid_targets_min": 5091 | |
| }, | |
| { | |
| "epoch": 4.508987701040681, | |
| "grad_norm": 0.5111927828536853, | |
| "learning_rate": 1.3572698743360086e-05, | |
| "loss": 0.1706, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10813774168491364, | |
| "step": 2385, | |
| "valid_targets_mean": 6821.0, | |
| "valid_targets_min": 2143 | |
| }, | |
| { | |
| "epoch": 4.518448438978241, | |
| "grad_norm": 0.5302388829745546, | |
| "learning_rate": 1.3483486339505476e-05, | |
| "loss": 0.1636, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07856719195842743, | |
| "step": 2390, | |
| "valid_targets_mean": 5057.1, | |
| "valid_targets_min": 1893 | |
| }, | |
| { | |
| "epoch": 4.5279091769157995, | |
| "grad_norm": 0.5657954220828462, | |
| "learning_rate": 1.3394418760896665e-05, | |
| "loss": 0.1634, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07074756920337677, | |
| "step": 2395, | |
| "valid_targets_mean": 5564.0, | |
| "valid_targets_min": 3312 | |
| }, | |
| { | |
| "epoch": 4.537369914853358, | |
| "grad_norm": 0.44816213903081314, | |
| "learning_rate": 1.3305497987002214e-05, | |
| "loss": 0.1766, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10855835676193237, | |
| "step": 2400, | |
| "valid_targets_mean": 7582.5, | |
| "valid_targets_min": 4586 | |
| }, | |
| { | |
| "epoch": 4.546830652790918, | |
| "grad_norm": 0.4969643603677432, | |
| "learning_rate": 1.3216725994028065e-05, | |
| "loss": 0.1645, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07549931108951569, | |
| "step": 2405, | |
| "valid_targets_mean": 4293.8, | |
| "valid_targets_min": 2050 | |
| }, | |
| { | |
| "epoch": 4.556291390728477, | |
| "grad_norm": 0.49714025262854705, | |
| "learning_rate": 1.3128104754873592e-05, | |
| "loss": 0.1613, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09053511917591095, | |
| "step": 2410, | |
| "valid_targets_mean": 6208.1, | |
| "valid_targets_min": 4083 | |
| }, | |
| { | |
| "epoch": 4.565752128666036, | |
| "grad_norm": 0.4658167126447159, | |
| "learning_rate": 1.3039636239087751e-05, | |
| "loss": 0.1585, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08036954700946808, | |
| "step": 2415, | |
| "valid_targets_mean": 6865.4, | |
| "valid_targets_min": 5076 | |
| }, | |
| { | |
| "epoch": 4.575212866603595, | |
| "grad_norm": 0.4915257649668184, | |
| "learning_rate": 1.2951322412825333e-05, | |
| "loss": 0.1644, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09244567155838013, | |
| "step": 2420, | |
| "valid_targets_mean": 5903.8, | |
| "valid_targets_min": 2127 | |
| }, | |
| { | |
| "epoch": 4.5846736045411545, | |
| "grad_norm": 0.4567472797832718, | |
| "learning_rate": 1.2863165238803252e-05, | |
| "loss": 0.1565, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07485067844390869, | |
| "step": 2425, | |
| "valid_targets_mean": 4524.6, | |
| "valid_targets_min": 1753 | |
| }, | |
| { | |
| "epoch": 4.594134342478713, | |
| "grad_norm": 0.5247752395742293, | |
| "learning_rate": 1.2775166676256942e-05, | |
| "loss": 0.1723, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08491607010364532, | |
| "step": 2430, | |
| "valid_targets_mean": 5091.6, | |
| "valid_targets_min": 2419 | |
| }, | |
| { | |
| "epoch": 4.603595080416272, | |
| "grad_norm": 0.512670381259889, | |
| "learning_rate": 1.2687328680896784e-05, | |
| "loss": 0.1735, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09027966111898422, | |
| "step": 2435, | |
| "valid_targets_mean": 5586.4, | |
| "valid_targets_min": 1893 | |
| }, | |
| { | |
| "epoch": 4.613055818353832, | |
| "grad_norm": 0.5086538957041402, | |
| "learning_rate": 1.2599653204864656e-05, | |
| "loss": 0.1598, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08313116431236267, | |
| "step": 2440, | |
| "valid_targets_mean": 4873.9, | |
| "valid_targets_min": 3246 | |
| }, | |
| { | |
| "epoch": 4.622516556291391, | |
| "grad_norm": 0.48363850099286876, | |
| "learning_rate": 1.2512142196690573e-05, | |
| "loss": 0.1623, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0974661335349083, | |
| "step": 2445, | |
| "valid_targets_mean": 6357.2, | |
| "valid_targets_min": 807 | |
| }, | |
| { | |
| "epoch": 4.63197729422895, | |
| "grad_norm": 0.4322906872044424, | |
| "learning_rate": 1.2424797601249328e-05, | |
| "loss": 0.1515, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06740396469831467, | |
| "step": 2450, | |
| "valid_targets_mean": 5795.9, | |
| "valid_targets_min": 3054 | |
| }, | |
| { | |
| "epoch": 4.6414380321665085, | |
| "grad_norm": 0.5417059342860636, | |
| "learning_rate": 1.2337621359717333e-05, | |
| "loss": 0.1613, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08276567608118057, | |
| "step": 2455, | |
| "valid_targets_mean": 6291.6, | |
| "valid_targets_min": 3234 | |
| }, | |
| { | |
| "epoch": 4.650898770104068, | |
| "grad_norm": 0.4795632027077425, | |
| "learning_rate": 1.2250615409529427e-05, | |
| "loss": 0.1589, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07418248802423477, | |
| "step": 2460, | |
| "valid_targets_mean": 5510.9, | |
| "valid_targets_min": 1409 | |
| }, | |
| { | |
| "epoch": 4.660359508041627, | |
| "grad_norm": 0.49378217745950864, | |
| "learning_rate": 1.2163781684335831e-05, | |
| "loss": 0.1647, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09448840469121933, | |
| "step": 2465, | |
| "valid_targets_mean": 7180.1, | |
| "valid_targets_min": 4937 | |
| }, | |
| { | |
| "epoch": 4.669820245979186, | |
| "grad_norm": 0.5039190071239315, | |
| "learning_rate": 1.2077122113959186e-05, | |
| "loss": 0.1848, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08793889731168747, | |
| "step": 2470, | |
| "valid_targets_mean": 5509.6, | |
| "valid_targets_min": 1816 | |
| }, | |
| { | |
| "epoch": 4.679280983916746, | |
| "grad_norm": 0.4931542543015456, | |
| "learning_rate": 1.1990638624351659e-05, | |
| "loss": 0.1638, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08114581555128098, | |
| "step": 2475, | |
| "valid_targets_mean": 6421.4, | |
| "valid_targets_min": 3250 | |
| }, | |
| { | |
| "epoch": 4.688741721854305, | |
| "grad_norm": 0.5489707303546564, | |
| "learning_rate": 1.1904333137552124e-05, | |
| "loss": 0.1602, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08878085017204285, | |
| "step": 2480, | |
| "valid_targets_mean": 5445.0, | |
| "valid_targets_min": 2077 | |
| }, | |
| { | |
| "epoch": 4.6982024597918635, | |
| "grad_norm": 0.4837787673198404, | |
| "learning_rate": 1.1818207571643484e-05, | |
| "loss": 0.1581, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07125557959079742, | |
| "step": 2485, | |
| "valid_targets_mean": 4444.9, | |
| "valid_targets_min": 2616 | |
| }, | |
| { | |
| "epoch": 4.707663197729423, | |
| "grad_norm": 0.5083177062405144, | |
| "learning_rate": 1.173226384070999e-05, | |
| "loss": 0.1658, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07098717987537384, | |
| "step": 2490, | |
| "valid_targets_mean": 5205.4, | |
| "valid_targets_min": 1173 | |
| }, | |
| { | |
| "epoch": 4.717123935666982, | |
| "grad_norm": 0.5575925952865973, | |
| "learning_rate": 1.1646503854794746e-05, | |
| "loss": 0.1667, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11164502054452896, | |
| "step": 2495, | |
| "valid_targets_mean": 5729.4, | |
| "valid_targets_min": 1426 | |
| }, | |
| { | |
| "epoch": 4.726584673604541, | |
| "grad_norm": 0.5223389265424682, | |
| "learning_rate": 1.1560929519857246e-05, | |
| "loss": 0.1585, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08164171129465103, | |
| "step": 2500, | |
| "valid_targets_mean": 4500.6, | |
| "valid_targets_min": 1089 | |
| }, | |
| { | |
| "epoch": 4.736045411542101, | |
| "grad_norm": 0.5383697567770299, | |
| "learning_rate": 1.1475542737730998e-05, | |
| "loss": 0.1537, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08602418750524521, | |
| "step": 2505, | |
| "valid_targets_mean": 5347.1, | |
| "valid_targets_min": 2448 | |
| }, | |
| { | |
| "epoch": 4.7455061494796595, | |
| "grad_norm": 0.5239083651471129, | |
| "learning_rate": 1.1390345406081286e-05, | |
| "loss": 0.1591, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08246322721242905, | |
| "step": 2510, | |
| "valid_targets_mean": 5422.6, | |
| "valid_targets_min": 2932 | |
| }, | |
| { | |
| "epoch": 4.754966887417218, | |
| "grad_norm": 0.48646478838090534, | |
| "learning_rate": 1.1305339418362978e-05, | |
| "loss": 0.1613, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07415419816970825, | |
| "step": 2515, | |
| "valid_targets_mean": 5297.6, | |
| "valid_targets_min": 1772 | |
| }, | |
| { | |
| "epoch": 4.764427625354777, | |
| "grad_norm": 0.4743203401979834, | |
| "learning_rate": 1.1220526663778441e-05, | |
| "loss": 0.1601, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09026531875133514, | |
| "step": 2520, | |
| "valid_targets_mean": 6998.8, | |
| "valid_targets_min": 4582 | |
| }, | |
| { | |
| "epoch": 4.773888363292337, | |
| "grad_norm": 0.49095486039648706, | |
| "learning_rate": 1.113590902723557e-05, | |
| "loss": 0.1629, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08181262016296387, | |
| "step": 2525, | |
| "valid_targets_mean": 5897.6, | |
| "valid_targets_min": 2450 | |
| }, | |
| { | |
| "epoch": 4.783349101229896, | |
| "grad_norm": 0.4902911174921005, | |
| "learning_rate": 1.1051488389305875e-05, | |
| "loss": 0.1593, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08572474122047424, | |
| "step": 2530, | |
| "valid_targets_mean": 5079.1, | |
| "valid_targets_min": 571 | |
| }, | |
| { | |
| "epoch": 4.792809839167455, | |
| "grad_norm": 0.4613815865050699, | |
| "learning_rate": 1.0967266626182726e-05, | |
| "loss": 0.1659, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07123850286006927, | |
| "step": 2535, | |
| "valid_targets_mean": 6506.1, | |
| "valid_targets_min": 4824 | |
| }, | |
| { | |
| "epoch": 4.8022705771050145, | |
| "grad_norm": 0.4813549393736916, | |
| "learning_rate": 1.0883245609639622e-05, | |
| "loss": 0.1622, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07271705567836761, | |
| "step": 2540, | |
| "valid_targets_mean": 5694.1, | |
| "valid_targets_min": 4281 | |
| }, | |
| { | |
| "epoch": 4.811731315042573, | |
| "grad_norm": 0.48283752024336907, | |
| "learning_rate": 1.0799427206988588e-05, | |
| "loss": 0.1595, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08985312283039093, | |
| "step": 2545, | |
| "valid_targets_mean": 6184.9, | |
| "valid_targets_min": 2244 | |
| }, | |
| { | |
| "epoch": 4.821192052980132, | |
| "grad_norm": 0.5092895093118036, | |
| "learning_rate": 1.0715813281038697e-05, | |
| "loss": 0.1671, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07882203161716461, | |
| "step": 2550, | |
| "valid_targets_mean": 5018.8, | |
| "valid_targets_min": 2927 | |
| }, | |
| { | |
| "epoch": 4.830652790917692, | |
| "grad_norm": 0.47902608616249853, | |
| "learning_rate": 1.0632405690054652e-05, | |
| "loss": 0.1537, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07882026582956314, | |
| "step": 2555, | |
| "valid_targets_mean": 5350.1, | |
| "valid_targets_min": 2041 | |
| }, | |
| { | |
| "epoch": 4.840113528855251, | |
| "grad_norm": 0.4781631389195755, | |
| "learning_rate": 1.0549206287715524e-05, | |
| "loss": 0.1627, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07253912091255188, | |
| "step": 2560, | |
| "valid_targets_mean": 5826.4, | |
| "valid_targets_min": 4815 | |
| }, | |
| { | |
| "epoch": 4.84957426679281, | |
| "grad_norm": 0.50927564435645, | |
| "learning_rate": 1.0466216923073497e-05, | |
| "loss": 0.1729, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07280386984348297, | |
| "step": 2565, | |
| "valid_targets_mean": 4362.0, | |
| "valid_targets_min": 1897 | |
| }, | |
| { | |
| "epoch": 4.859035004730369, | |
| "grad_norm": 0.4990588524253479, | |
| "learning_rate": 1.0383439440512814e-05, | |
| "loss": 0.1604, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0832209438085556, | |
| "step": 2570, | |
| "valid_targets_mean": 5917.6, | |
| "valid_targets_min": 2058 | |
| }, | |
| { | |
| "epoch": 4.868495742667928, | |
| "grad_norm": 0.5104279600936655, | |
| "learning_rate": 1.030087567970879e-05, | |
| "loss": 0.1602, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10466784238815308, | |
| "step": 2575, | |
| "valid_targets_mean": 6902.9, | |
| "valid_targets_min": 4028 | |
| }, | |
| { | |
| "epoch": 4.877956480605487, | |
| "grad_norm": 0.49021362537296576, | |
| "learning_rate": 1.0218527475586902e-05, | |
| "loss": 0.1575, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08758172392845154, | |
| "step": 2580, | |
| "valid_targets_mean": 5222.5, | |
| "valid_targets_min": 3552 | |
| }, | |
| { | |
| "epoch": 4.887417218543046, | |
| "grad_norm": 0.5214528334399346, | |
| "learning_rate": 1.013639665828201e-05, | |
| "loss": 0.156, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0988951027393341, | |
| "step": 2585, | |
| "valid_targets_mean": 5775.8, | |
| "valid_targets_min": 3433 | |
| }, | |
| { | |
| "epoch": 4.896877956480606, | |
| "grad_norm": 0.4992920444778697, | |
| "learning_rate": 1.0054485053097731e-05, | |
| "loss": 0.1606, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08480118215084076, | |
| "step": 2590, | |
| "valid_targets_mean": 5527.4, | |
| "valid_targets_min": 3358 | |
| }, | |
| { | |
| "epoch": 4.906338694418165, | |
| "grad_norm": 0.5073995907257243, | |
| "learning_rate": 9.972794480465798e-06, | |
| "loss": 0.1649, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08439338207244873, | |
| "step": 2595, | |
| "valid_targets_mean": 5569.0, | |
| "valid_targets_min": 3082 | |
| }, | |
| { | |
| "epoch": 4.9157994323557235, | |
| "grad_norm": 0.46526402755578683, | |
| "learning_rate": 9.891326755905652e-06, | |
| "loss": 0.1523, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07682204246520996, | |
| "step": 2600, | |
| "valid_targets_mean": 5154.4, | |
| "valid_targets_min": 2569 | |
| }, | |
| { | |
| "epoch": 4.925260170293283, | |
| "grad_norm": 0.5207018903668406, | |
| "learning_rate": 9.8100836899841e-06, | |
| "loss": 0.153, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08420620113611221, | |
| "step": 2605, | |
| "valid_targets_mean": 5129.8, | |
| "valid_targets_min": 1379 | |
| }, | |
| { | |
| "epoch": 4.934720908230842, | |
| "grad_norm": 0.4284701079205504, | |
| "learning_rate": 9.729067088275025e-06, | |
| "loss": 0.1706, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07169349491596222, | |
| "step": 2610, | |
| "valid_targets_mean": 6409.8, | |
| "valid_targets_min": 5031 | |
| }, | |
| { | |
| "epoch": 4.944181646168401, | |
| "grad_norm": 0.4820116779766079, | |
| "learning_rate": 9.648278751319329e-06, | |
| "loss": 0.1646, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08313654363155365, | |
| "step": 2615, | |
| "valid_targets_mean": 7001.9, | |
| "valid_targets_min": 3211 | |
| }, | |
| { | |
| "epoch": 4.95364238410596, | |
| "grad_norm": 0.455960966421983, | |
| "learning_rate": 9.56772047458485e-06, | |
| "loss": 0.1625, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08207933604717255, | |
| "step": 2620, | |
| "valid_targets_mean": 6887.5, | |
| "valid_targets_min": 4307 | |
| }, | |
| { | |
| "epoch": 4.9631031220435196, | |
| "grad_norm": 0.49589408534775636, | |
| "learning_rate": 9.487394048426497e-06, | |
| "loss": 0.1622, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06637847423553467, | |
| "step": 2625, | |
| "valid_targets_mean": 5183.2, | |
| "valid_targets_min": 2566 | |
| }, | |
| { | |
| "epoch": 4.972563859981078, | |
| "grad_norm": 0.48599414771340765, | |
| "learning_rate": 9.407301258046454e-06, | |
| "loss": 0.1732, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09902942180633545, | |
| "step": 2630, | |
| "valid_targets_mean": 6159.5, | |
| "valid_targets_min": 3864 | |
| }, | |
| { | |
| "epoch": 4.982024597918637, | |
| "grad_norm": 0.5152781317137514, | |
| "learning_rate": 9.327443883454499e-06, | |
| "loss": 0.1574, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07795896381139755, | |
| "step": 2635, | |
| "valid_targets_mean": 4861.8, | |
| "valid_targets_min": 1064 | |
| }, | |
| { | |
| "epoch": 4.991485335856197, | |
| "grad_norm": 0.5038511985913933, | |
| "learning_rate": 9.247823699428452e-06, | |
| "loss": 0.1665, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09703533351421356, | |
| "step": 2640, | |
| "valid_targets_mean": 5297.1, | |
| "valid_targets_min": 1495 | |
| }, | |
| { | |
| "epoch": 5.001892147587512, | |
| "grad_norm": 0.7919432421674072, | |
| "learning_rate": 9.168442475474737e-06, | |
| "loss": 0.1865, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07412499934434891, | |
| "step": 2645, | |
| "valid_targets_mean": 5746.4, | |
| "valid_targets_min": 1906 | |
| }, | |
| { | |
| "epoch": 5.011352885525071, | |
| "grad_norm": 0.47230625720793334, | |
| "learning_rate": 9.089301975789029e-06, | |
| "loss": 0.1546, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07187912613153458, | |
| "step": 2650, | |
| "valid_targets_mean": 5863.1, | |
| "valid_targets_min": 1526 | |
| }, | |
| { | |
| "epoch": 5.02081362346263, | |
| "grad_norm": 0.4893299689425291, | |
| "learning_rate": 9.010403959217078e-06, | |
| "loss": 0.1586, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07913671433925629, | |
| "step": 2655, | |
| "valid_targets_mean": 5488.1, | |
| "valid_targets_min": 2092 | |
| }, | |
| { | |
| "epoch": 5.030274361400189, | |
| "grad_norm": 0.6130415734002539, | |
| "learning_rate": 8.931750179215586e-06, | |
| "loss": 0.1555, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07517177611589432, | |
| "step": 2660, | |
| "valid_targets_mean": 6251.6, | |
| "valid_targets_min": 4729 | |
| }, | |
| { | |
| "epoch": 5.039735099337748, | |
| "grad_norm": 0.49993267762896326, | |
| "learning_rate": 8.853342383813289e-06, | |
| "loss": 0.1693, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08262951672077179, | |
| "step": 2665, | |
| "valid_targets_mean": 6646.4, | |
| "valid_targets_min": 3061 | |
| }, | |
| { | |
| "epoch": 5.049195837275308, | |
| "grad_norm": 0.5096897089548484, | |
| "learning_rate": 8.775182315572044e-06, | |
| "loss": 0.1624, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07983925938606262, | |
| "step": 2670, | |
| "valid_targets_mean": 4711.5, | |
| "valid_targets_min": 1457 | |
| }, | |
| { | |
| "epoch": 5.058656575212867, | |
| "grad_norm": 0.5485108474130074, | |
| "learning_rate": 8.697271711548163e-06, | |
| "loss": 0.1632, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08494468778371811, | |
| "step": 2675, | |
| "valid_targets_mean": 5836.0, | |
| "valid_targets_min": 2637 | |
| }, | |
| { | |
| "epoch": 5.068117313150426, | |
| "grad_norm": 0.5863734037093357, | |
| "learning_rate": 8.619612303253759e-06, | |
| "loss": 0.1708, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07812988758087158, | |
| "step": 2680, | |
| "valid_targets_mean": 5071.0, | |
| "valid_targets_min": 3161 | |
| }, | |
| { | |
| "epoch": 5.077578051087984, | |
| "grad_norm": 0.5169864922201843, | |
| "learning_rate": 8.54220581661829e-06, | |
| "loss": 0.166, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09739792346954346, | |
| "step": 2685, | |
| "valid_targets_mean": 5116.9, | |
| "valid_targets_min": 2506 | |
| }, | |
| { | |
| "epoch": 5.087038789025544, | |
| "grad_norm": 0.4924483330266412, | |
| "learning_rate": 8.465053971950188e-06, | |
| "loss": 0.16, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.089106485247612, | |
| "step": 2690, | |
| "valid_targets_mean": 5403.6, | |
| "valid_targets_min": 3050 | |
| }, | |
| { | |
| "epoch": 5.096499526963103, | |
| "grad_norm": 0.4762786682998264, | |
| "learning_rate": 8.388158483898661e-06, | |
| "loss": 0.155, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07820412516593933, | |
| "step": 2695, | |
| "valid_targets_mean": 6105.2, | |
| "valid_targets_min": 1913 | |
| }, | |
| { | |
| "epoch": 5.105960264900662, | |
| "grad_norm": 0.5062357344918998, | |
| "learning_rate": 8.31152106141553e-06, | |
| "loss": 0.1591, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09622031450271606, | |
| "step": 2700, | |
| "valid_targets_mean": 5775.8, | |
| "valid_targets_min": 2980 | |
| }, | |
| { | |
| "epoch": 5.115421002838222, | |
| "grad_norm": 0.5002871838224989, | |
| "learning_rate": 8.235143407717282e-06, | |
| "loss": 0.16, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0585002526640892, | |
| "step": 2705, | |
| "valid_targets_mean": 3764.5, | |
| "valid_targets_min": 1599 | |
| }, | |
| { | |
| "epoch": 5.1248817407757805, | |
| "grad_norm": 0.5250930353279408, | |
| "learning_rate": 8.159027220247238e-06, | |
| "loss": 0.161, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08297573029994965, | |
| "step": 2710, | |
| "valid_targets_mean": 6368.1, | |
| "valid_targets_min": 2296 | |
| }, | |
| { | |
| "epoch": 5.134342478713339, | |
| "grad_norm": 0.5007368663874092, | |
| "learning_rate": 8.083174190637766e-06, | |
| "loss": 0.1552, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0794142335653305, | |
| "step": 2715, | |
| "valid_targets_mean": 4909.6, | |
| "valid_targets_min": 3604 | |
| }, | |
| { | |
| "epoch": 5.143803216650899, | |
| "grad_norm": 0.5230754061779214, | |
| "learning_rate": 8.00758600467276e-06, | |
| "loss": 0.1653, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08960853517055511, | |
| "step": 2720, | |
| "valid_targets_mean": 5194.1, | |
| "valid_targets_min": 2815 | |
| }, | |
| { | |
| "epoch": 5.153263954588458, | |
| "grad_norm": 0.5064462916090743, | |
| "learning_rate": 7.932264342250112e-06, | |
| "loss": 0.1602, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09582977741956711, | |
| "step": 2725, | |
| "valid_targets_mean": 5520.9, | |
| "valid_targets_min": 4252 | |
| }, | |
| { | |
| "epoch": 5.162724692526017, | |
| "grad_norm": 0.49932423593853265, | |
| "learning_rate": 7.857210877344405e-06, | |
| "loss": 0.1527, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0778404176235199, | |
| "step": 2730, | |
| "valid_targets_mean": 5829.5, | |
| "valid_targets_min": 2468 | |
| }, | |
| { | |
| "epoch": 5.172185430463577, | |
| "grad_norm": 0.5323844417559039, | |
| "learning_rate": 7.782427277969715e-06, | |
| "loss": 0.1658, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0745185911655426, | |
| "step": 2735, | |
| "valid_targets_mean": 5312.0, | |
| "valid_targets_min": 3180 | |
| }, | |
| { | |
| "epoch": 5.1816461684011355, | |
| "grad_norm": 0.5557657439250057, | |
| "learning_rate": 7.707915206142536e-06, | |
| "loss": 0.1585, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05902181565761566, | |
| "step": 2740, | |
| "valid_targets_mean": 5351.9, | |
| "valid_targets_min": 746 | |
| }, | |
| { | |
| "epoch": 5.191106906338694, | |
| "grad_norm": 0.5332886262361239, | |
| "learning_rate": 7.63367631784484e-06, | |
| "loss": 0.1635, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07059160619974136, | |
| "step": 2745, | |
| "valid_targets_mean": 5607.6, | |
| "valid_targets_min": 3521 | |
| }, | |
| { | |
| "epoch": 5.200567644276253, | |
| "grad_norm": 0.5179154050055391, | |
| "learning_rate": 7.559712262987269e-06, | |
| "loss": 0.1499, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.070362389087677, | |
| "step": 2750, | |
| "valid_targets_mean": 5177.0, | |
| "valid_targets_min": 1471 | |
| }, | |
| { | |
| "epoch": 5.210028382213813, | |
| "grad_norm": 0.5265176493412506, | |
| "learning_rate": 7.486024685372468e-06, | |
| "loss": 0.1546, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07411413639783859, | |
| "step": 2755, | |
| "valid_targets_mean": 6020.9, | |
| "valid_targets_min": 627 | |
| }, | |
| { | |
| "epoch": 5.219489120151372, | |
| "grad_norm": 0.554707229367045, | |
| "learning_rate": 7.412615222658566e-06, | |
| "loss": 0.1579, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09033337235450745, | |
| "step": 2760, | |
| "valid_targets_mean": 4811.0, | |
| "valid_targets_min": 2461 | |
| }, | |
| { | |
| "epoch": 5.228949858088931, | |
| "grad_norm": 0.5515956673496363, | |
| "learning_rate": 7.339485506322755e-06, | |
| "loss": 0.168, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0772365927696228, | |
| "step": 2765, | |
| "valid_targets_mean": 5292.4, | |
| "valid_targets_min": 648 | |
| }, | |
| { | |
| "epoch": 5.23841059602649, | |
| "grad_norm": 0.47803954262560044, | |
| "learning_rate": 7.266637161625074e-06, | |
| "loss": 0.169, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09605518728494644, | |
| "step": 2770, | |
| "valid_targets_mean": 6811.4, | |
| "valid_targets_min": 4733 | |
| }, | |
| { | |
| "epoch": 5.247871333964049, | |
| "grad_norm": 0.5398489333939209, | |
| "learning_rate": 7.194071807572234e-06, | |
| "loss": 0.1607, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0798153206706047, | |
| "step": 2775, | |
| "valid_targets_mean": 4869.1, | |
| "valid_targets_min": 1916 | |
| }, | |
| { | |
| "epoch": 5.257332071901608, | |
| "grad_norm": 0.511779870847518, | |
| "learning_rate": 7.121791056881688e-06, | |
| "loss": 0.1664, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08416005969047546, | |
| "step": 2780, | |
| "valid_targets_mean": 5786.0, | |
| "valid_targets_min": 3360 | |
| }, | |
| { | |
| "epoch": 5.266792809839168, | |
| "grad_norm": 0.46117106595655755, | |
| "learning_rate": 7.049796515945748e-06, | |
| "loss": 0.1706, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08570249378681183, | |
| "step": 2785, | |
| "valid_targets_mean": 6217.8, | |
| "valid_targets_min": 3805 | |
| }, | |
| { | |
| "epoch": 5.276253547776727, | |
| "grad_norm": 0.48698826185764776, | |
| "learning_rate": 6.9780897847959005e-06, | |
| "loss": 0.1537, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08505302667617798, | |
| "step": 2790, | |
| "valid_targets_mean": 6306.8, | |
| "valid_targets_min": 4518 | |
| }, | |
| { | |
| "epoch": 5.285714285714286, | |
| "grad_norm": 0.47067540750278847, | |
| "learning_rate": 6.906672457067272e-06, | |
| "loss": 0.1606, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06407542526721954, | |
| "step": 2795, | |
| "valid_targets_mean": 4664.0, | |
| "valid_targets_min": 777 | |
| }, | |
| { | |
| "epoch": 5.2951750236518444, | |
| "grad_norm": 0.4814404184659197, | |
| "learning_rate": 6.835546119963159e-06, | |
| "loss": 0.1619, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07698783278465271, | |
| "step": 2800, | |
| "valid_targets_mean": 5979.5, | |
| "valid_targets_min": 874 | |
| }, | |
| { | |
| "epoch": 5.304635761589404, | |
| "grad_norm": 0.4138254655184686, | |
| "learning_rate": 6.764712354219798e-06, | |
| "loss": 0.1566, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06797531247138977, | |
| "step": 2805, | |
| "valid_targets_mean": 7091.4, | |
| "valid_targets_min": 5493 | |
| }, | |
| { | |
| "epoch": 5.314096499526963, | |
| "grad_norm": 0.5158181059013228, | |
| "learning_rate": 6.694172734071209e-06, | |
| "loss": 0.1734, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12109321355819702, | |
| "step": 2810, | |
| "valid_targets_mean": 6526.1, | |
| "valid_targets_min": 4415 | |
| }, | |
| { | |
| "epoch": 5.323557237464522, | |
| "grad_norm": 0.5116457962386629, | |
| "learning_rate": 6.623928827214234e-06, | |
| "loss": 0.1686, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08392614126205444, | |
| "step": 2815, | |
| "valid_targets_mean": 4956.9, | |
| "valid_targets_min": 1126 | |
| }, | |
| { | |
| "epoch": 5.333017975402082, | |
| "grad_norm": 0.4637640125901983, | |
| "learning_rate": 6.553982194773663e-06, | |
| "loss": 0.1586, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06488104909658432, | |
| "step": 2820, | |
| "valid_targets_mean": 5014.0, | |
| "valid_targets_min": 1970 | |
| }, | |
| { | |
| "epoch": 5.3424787133396405, | |
| "grad_norm": 0.5489483258773076, | |
| "learning_rate": 6.4843343912675775e-06, | |
| "loss": 0.1517, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07855500280857086, | |
| "step": 2825, | |
| "valid_targets_mean": 5010.8, | |
| "valid_targets_min": 3460 | |
| }, | |
| { | |
| "epoch": 5.351939451277199, | |
| "grad_norm": 0.5566628520939422, | |
| "learning_rate": 6.4149869645727604e-06, | |
| "loss": 0.1626, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09175878018140793, | |
| "step": 2830, | |
| "valid_targets_mean": 5209.1, | |
| "valid_targets_min": 1971 | |
| }, | |
| { | |
| "epoch": 5.361400189214759, | |
| "grad_norm": 0.5194511420264721, | |
| "learning_rate": 6.34594145589033e-06, | |
| "loss": 0.1623, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09560522437095642, | |
| "step": 2835, | |
| "valid_targets_mean": 5432.2, | |
| "valid_targets_min": 4445 | |
| }, | |
| { | |
| "epoch": 5.370860927152318, | |
| "grad_norm": 0.5297795083082508, | |
| "learning_rate": 6.277199399711462e-06, | |
| "loss": 0.1603, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09491391479969025, | |
| "step": 2840, | |
| "valid_targets_mean": 5806.8, | |
| "valid_targets_min": 4124 | |
| }, | |
| { | |
| "epoch": 5.380321665089877, | |
| "grad_norm": 0.5019727889753064, | |
| "learning_rate": 6.208762323783317e-06, | |
| "loss": 0.1634, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07884600758552551, | |
| "step": 2845, | |
| "valid_targets_mean": 5524.0, | |
| "valid_targets_min": 1902 | |
| }, | |
| { | |
| "epoch": 5.389782403027436, | |
| "grad_norm": 0.50283030126958, | |
| "learning_rate": 6.140631749075063e-06, | |
| "loss": 0.1699, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08332569897174835, | |
| "step": 2850, | |
| "valid_targets_mean": 5927.8, | |
| "valid_targets_min": 4868 | |
| }, | |
| { | |
| "epoch": 5.3992431409649955, | |
| "grad_norm": 0.4809018227791537, | |
| "learning_rate": 6.0728091897440734e-06, | |
| "loss": 0.1614, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07353633642196655, | |
| "step": 2855, | |
| "valid_targets_mean": 5367.0, | |
| "valid_targets_min": 3740 | |
| }, | |
| { | |
| "epoch": 5.408703878902554, | |
| "grad_norm": 0.4864649688470974, | |
| "learning_rate": 6.005296153102285e-06, | |
| "loss": 0.1578, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0782986655831337, | |
| "step": 2860, | |
| "valid_targets_mean": 5988.9, | |
| "valid_targets_min": 1588 | |
| }, | |
| { | |
| "epoch": 5.418164616840113, | |
| "grad_norm": 0.48061818782181764, | |
| "learning_rate": 5.9380941395826926e-06, | |
| "loss": 0.1616, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0813487246632576, | |
| "step": 2865, | |
| "valid_targets_mean": 5932.9, | |
| "valid_targets_min": 1655 | |
| }, | |
| { | |
| "epoch": 5.427625354777673, | |
| "grad_norm": 0.4924319395860947, | |
| "learning_rate": 5.871204642706006e-06, | |
| "loss": 0.1792, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07597285509109497, | |
| "step": 2870, | |
| "valid_targets_mean": 5485.5, | |
| "valid_targets_min": 3119 | |
| }, | |
| { | |
| "epoch": 5.437086092715232, | |
| "grad_norm": 0.4985432480570909, | |
| "learning_rate": 5.8046291490474695e-06, | |
| "loss": 0.1607, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08164849877357483, | |
| "step": 2875, | |
| "valid_targets_mean": 5445.2, | |
| "valid_targets_min": 4168 | |
| }, | |
| { | |
| "epoch": 5.446546830652791, | |
| "grad_norm": 0.48269318781366277, | |
| "learning_rate": 5.73836913820379e-06, | |
| "loss": 0.1606, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07020534574985504, | |
| "step": 2880, | |
| "valid_targets_mean": 6313.1, | |
| "valid_targets_min": 4442 | |
| }, | |
| { | |
| "epoch": 5.45600756859035, | |
| "grad_norm": 0.49113593900775243, | |
| "learning_rate": 5.672426082760305e-06, | |
| "loss": 0.1618, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07037980854511261, | |
| "step": 2885, | |
| "valid_targets_mean": 5693.4, | |
| "valid_targets_min": 4083 | |
| }, | |
| { | |
| "epoch": 5.465468306527909, | |
| "grad_norm": 0.5515240633580824, | |
| "learning_rate": 5.606801448258199e-06, | |
| "loss": 0.1648, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08901975303888321, | |
| "step": 2890, | |
| "valid_targets_mean": 4930.4, | |
| "valid_targets_min": 2773 | |
| }, | |
| { | |
| "epoch": 5.474929044465468, | |
| "grad_norm": 0.4994868431495095, | |
| "learning_rate": 5.541496693161963e-06, | |
| "loss": 0.1584, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08624856919050217, | |
| "step": 2895, | |
| "valid_targets_mean": 6319.2, | |
| "valid_targets_min": 3351 | |
| }, | |
| { | |
| "epoch": 5.484389782403028, | |
| "grad_norm": 0.47317427516488886, | |
| "learning_rate": 5.4765132688269975e-06, | |
| "loss": 0.1584, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07870611548423767, | |
| "step": 2900, | |
| "valid_targets_mean": 6180.1, | |
| "valid_targets_min": 3064 | |
| }, | |
| { | |
| "epoch": 5.493850520340587, | |
| "grad_norm": 0.5117713572156433, | |
| "learning_rate": 5.411852619467319e-06, | |
| "loss": 0.1582, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0791124477982521, | |
| "step": 2905, | |
| "valid_targets_mean": 5138.5, | |
| "valid_targets_min": 1201 | |
| }, | |
| { | |
| "epoch": 5.503311258278146, | |
| "grad_norm": 0.44743979186538946, | |
| "learning_rate": 5.347516182123482e-06, | |
| "loss": 0.1574, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08641517162322998, | |
| "step": 2910, | |
| "valid_targets_mean": 6519.6, | |
| "valid_targets_min": 2545 | |
| }, | |
| { | |
| "epoch": 5.5127719962157045, | |
| "grad_norm": 0.5598404931767829, | |
| "learning_rate": 5.283505386630656e-06, | |
| "loss": 0.1659, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09298183023929596, | |
| "step": 2915, | |
| "valid_targets_mean": 5395.4, | |
| "valid_targets_min": 3680 | |
| }, | |
| { | |
| "epoch": 5.522232734153264, | |
| "grad_norm": 0.46606600596730763, | |
| "learning_rate": 5.219821655586821e-06, | |
| "loss": 0.1619, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08851338177919388, | |
| "step": 2920, | |
| "valid_targets_mean": 6989.0, | |
| "valid_targets_min": 3815 | |
| }, | |
| { | |
| "epoch": 5.531693472090823, | |
| "grad_norm": 0.4685726242174628, | |
| "learning_rate": 5.156466404321159e-06, | |
| "loss": 0.1624, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09037988632917404, | |
| "step": 2925, | |
| "valid_targets_mean": 6915.8, | |
| "valid_targets_min": 1670 | |
| }, | |
| { | |
| "epoch": 5.541154210028382, | |
| "grad_norm": 0.5160408959477768, | |
| "learning_rate": 5.0934410408626235e-06, | |
| "loss": 0.1639, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08055752515792847, | |
| "step": 2930, | |
| "valid_targets_mean": 5192.4, | |
| "valid_targets_min": 3354 | |
| }, | |
| { | |
| "epoch": 5.550614947965942, | |
| "grad_norm": 0.5746904433215911, | |
| "learning_rate": 5.030746965908613e-06, | |
| "loss": 0.1665, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10669153928756714, | |
| "step": 2935, | |
| "valid_targets_mean": 5440.9, | |
| "valid_targets_min": 2202 | |
| }, | |
| { | |
| "epoch": 5.5600756859035005, | |
| "grad_norm": 0.45883493673475956, | |
| "learning_rate": 4.968385572793859e-06, | |
| "loss": 0.1547, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05654348433017731, | |
| "step": 2940, | |
| "valid_targets_mean": 4961.6, | |
| "valid_targets_min": 2024 | |
| }, | |
| { | |
| "epoch": 5.569536423841059, | |
| "grad_norm": 0.4993450808594117, | |
| "learning_rate": 4.906358247459451e-06, | |
| "loss": 0.1615, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.068525031208992, | |
| "step": 2945, | |
| "valid_targets_mean": 4959.1, | |
| "valid_targets_min": 1511 | |
| }, | |
| { | |
| "epoch": 5.578997161778618, | |
| "grad_norm": 0.5382032760412773, | |
| "learning_rate": 4.844666368422055e-06, | |
| "loss": 0.1612, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0909319519996643, | |
| "step": 2950, | |
| "valid_targets_mean": 5311.0, | |
| "valid_targets_min": 770 | |
| }, | |
| { | |
| "epoch": 5.588457899716178, | |
| "grad_norm": 0.5525554928211415, | |
| "learning_rate": 4.783311306743259e-06, | |
| "loss": 0.1687, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08873642981052399, | |
| "step": 2955, | |
| "valid_targets_mean": 5606.0, | |
| "valid_targets_min": 1685 | |
| }, | |
| { | |
| "epoch": 5.597918637653737, | |
| "grad_norm": 0.461910556182754, | |
| "learning_rate": 4.722294425999099e-06, | |
| "loss": 0.163, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09890357404947281, | |
| "step": 2960, | |
| "valid_targets_mean": 7342.2, | |
| "valid_targets_min": 3229 | |
| }, | |
| { | |
| "epoch": 5.607379375591296, | |
| "grad_norm": 0.4464092110112635, | |
| "learning_rate": 4.661617082249765e-06, | |
| "loss": 0.1603, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07442782819271088, | |
| "step": 2965, | |
| "valid_targets_mean": 6529.2, | |
| "valid_targets_min": 3249 | |
| }, | |
| { | |
| "epoch": 5.6168401135288555, | |
| "grad_norm": 0.49577901930229196, | |
| "learning_rate": 4.601280624009459e-06, | |
| "loss": 0.1645, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08115290105342865, | |
| "step": 2970, | |
| "valid_targets_mean": 5643.5, | |
| "valid_targets_min": 4384 | |
| }, | |
| { | |
| "epoch": 5.626300851466414, | |
| "grad_norm": 0.46421865938624207, | |
| "learning_rate": 4.541286392216419e-06, | |
| "loss": 0.1596, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06690236926078796, | |
| "step": 2975, | |
| "valid_targets_mean": 5475.8, | |
| "valid_targets_min": 3336 | |
| }, | |
| { | |
| "epoch": 5.635761589403973, | |
| "grad_norm": 0.47388885299577094, | |
| "learning_rate": 4.481635720203139e-06, | |
| "loss": 0.1689, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08113521337509155, | |
| "step": 2980, | |
| "valid_targets_mean": 5951.1, | |
| "valid_targets_min": 4602 | |
| }, | |
| { | |
| "epoch": 5.645222327341533, | |
| "grad_norm": 0.48953790334528496, | |
| "learning_rate": 4.4223299336667226e-06, | |
| "loss": 0.1579, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06641364097595215, | |
| "step": 2985, | |
| "valid_targets_mean": 5389.5, | |
| "valid_targets_min": 2234 | |
| }, | |
| { | |
| "epoch": 5.654683065279092, | |
| "grad_norm": 0.7042938717688352, | |
| "learning_rate": 4.363370350639405e-06, | |
| "loss": 0.1678, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08492471277713776, | |
| "step": 2990, | |
| "valid_targets_mean": 5449.6, | |
| "valid_targets_min": 3031 | |
| }, | |
| { | |
| "epoch": 5.664143803216651, | |
| "grad_norm": 0.9121963480606142, | |
| "learning_rate": 4.304758281459283e-06, | |
| "loss": 0.1635, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07324929535388947, | |
| "step": 2995, | |
| "valid_targets_mean": 4495.8, | |
| "valid_targets_min": 1353 | |
| }, | |
| { | |
| "epoch": 5.67360454115421, | |
| "grad_norm": 0.4938793058364134, | |
| "learning_rate": 4.24649502874118e-06, | |
| "loss": 0.1715, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08978196978569031, | |
| "step": 3000, | |
| "valid_targets_mean": 5755.1, | |
| "valid_targets_min": 3092 | |
| }, | |
| { | |
| "epoch": 5.683065279091769, | |
| "grad_norm": 0.4581010846815238, | |
| "learning_rate": 4.1885818873477156e-06, | |
| "loss": 0.1653, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07784257084131241, | |
| "step": 3005, | |
| "valid_targets_mean": 5838.5, | |
| "valid_targets_min": 4027 | |
| }, | |
| { | |
| "epoch": 5.692526017029328, | |
| "grad_norm": 0.5339513481968639, | |
| "learning_rate": 4.131020144360505e-06, | |
| "loss": 0.151, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0724765881896019, | |
| "step": 3010, | |
| "valid_targets_mean": 4792.6, | |
| "valid_targets_min": 835 | |
| }, | |
| { | |
| "epoch": 5.701986754966887, | |
| "grad_norm": 0.4657301423666137, | |
| "learning_rate": 4.073811079051557e-06, | |
| "loss": 0.1525, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06874331831932068, | |
| "step": 3015, | |
| "valid_targets_mean": 5342.6, | |
| "valid_targets_min": 2502 | |
| }, | |
| { | |
| "epoch": 5.711447492904447, | |
| "grad_norm": 0.5175010346747354, | |
| "learning_rate": 4.016955962854874e-06, | |
| "loss": 0.1663, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07748369872570038, | |
| "step": 3020, | |
| "valid_targets_mean": 5557.0, | |
| "valid_targets_min": 2561 | |
| }, | |
| { | |
| "epoch": 5.720908230842006, | |
| "grad_norm": 0.46047001239287016, | |
| "learning_rate": 3.9604560593381444e-06, | |
| "loss": 0.1612, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07033313065767288, | |
| "step": 3025, | |
| "valid_targets_mean": 6183.8, | |
| "valid_targets_min": 3650 | |
| }, | |
| { | |
| "epoch": 5.7303689687795645, | |
| "grad_norm": 0.5039104334017726, | |
| "learning_rate": 3.9043126241747e-06, | |
| "loss": 0.1627, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08076108247041702, | |
| "step": 3030, | |
| "valid_targets_mean": 5565.2, | |
| "valid_targets_min": 1265 | |
| }, | |
| { | |
| "epoch": 5.739829706717124, | |
| "grad_norm": 0.4333442468065866, | |
| "learning_rate": 3.8485269051156015e-06, | |
| "loss": 0.1601, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06987737119197845, | |
| "step": 3035, | |
| "valid_targets_mean": 6822.2, | |
| "valid_targets_min": 1650 | |
| }, | |
| { | |
| "epoch": 5.749290444654683, | |
| "grad_norm": 0.4382347126402435, | |
| "learning_rate": 3.7931001419618963e-06, | |
| "loss": 0.1575, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07224809378385544, | |
| "step": 3040, | |
| "valid_targets_mean": 6522.6, | |
| "valid_targets_min": 3391 | |
| }, | |
| { | |
| "epoch": 5.758751182592242, | |
| "grad_norm": 0.5135189211645638, | |
| "learning_rate": 3.7380335665370693e-06, | |
| "loss": 0.1654, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0703924223780632, | |
| "step": 3045, | |
| "valid_targets_mean": 4980.6, | |
| "valid_targets_min": 1296 | |
| }, | |
| { | |
| "epoch": 5.768211920529802, | |
| "grad_norm": 0.4884310150604858, | |
| "learning_rate": 3.6833284026596827e-06, | |
| "loss": 0.1531, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07352404296398163, | |
| "step": 3050, | |
| "valid_targets_mean": 5628.5, | |
| "valid_targets_min": 1475 | |
| }, | |
| { | |
| "epoch": 5.7776726584673606, | |
| "grad_norm": 0.46184445778343136, | |
| "learning_rate": 3.6289858661161435e-06, | |
| "loss": 0.1536, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07652977108955383, | |
| "step": 3055, | |
| "valid_targets_mean": 5814.8, | |
| "valid_targets_min": 3954 | |
| }, | |
| { | |
| "epoch": 5.787133396404919, | |
| "grad_norm": 0.4579796667457961, | |
| "learning_rate": 3.5750071646337283e-06, | |
| "loss": 0.1597, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07104137539863586, | |
| "step": 3060, | |
| "valid_targets_mean": 6471.2, | |
| "valid_targets_min": 3462 | |
| }, | |
| { | |
| "epoch": 5.796594134342479, | |
| "grad_norm": 0.4537041582090879, | |
| "learning_rate": 3.5213934978537002e-06, | |
| "loss": 0.1592, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08454744517803192, | |
| "step": 3065, | |
| "valid_targets_mean": 5621.0, | |
| "valid_targets_min": 4099 | |
| }, | |
| { | |
| "epoch": 5.806054872280038, | |
| "grad_norm": 0.5257566347643703, | |
| "learning_rate": 3.4681460573046667e-06, | |
| "loss": 0.1805, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08486692607402802, | |
| "step": 3070, | |
| "valid_targets_mean": 4147.2, | |
| "valid_targets_min": 1579 | |
| }, | |
| { | |
| "epoch": 5.815515610217597, | |
| "grad_norm": 0.4551568372177165, | |
| "learning_rate": 3.415266026376105e-06, | |
| "loss": 0.1638, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06996514648199081, | |
| "step": 3075, | |
| "valid_targets_mean": 6100.4, | |
| "valid_targets_min": 3362 | |
| }, | |
| { | |
| "epoch": 5.824976348155156, | |
| "grad_norm": 0.5279020357535302, | |
| "learning_rate": 3.3627545802920402e-06, | |
| "loss": 0.1682, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06808288395404816, | |
| "step": 3080, | |
| "valid_targets_mean": 6382.6, | |
| "valid_targets_min": 1899 | |
| }, | |
| { | |
| "epoch": 5.8344370860927155, | |
| "grad_norm": 0.509574069681122, | |
| "learning_rate": 3.310612886084961e-06, | |
| "loss": 0.1589, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07354620844125748, | |
| "step": 3085, | |
| "valid_targets_mean": 4710.0, | |
| "valid_targets_min": 1412 | |
| }, | |
| { | |
| "epoch": 5.843897824030274, | |
| "grad_norm": 0.5099345034078288, | |
| "learning_rate": 3.2588421025698525e-06, | |
| "loss": 0.1613, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07914125919342041, | |
| "step": 3090, | |
| "valid_targets_mean": 5402.9, | |
| "valid_targets_min": 771 | |
| }, | |
| { | |
| "epoch": 5.853358561967833, | |
| "grad_norm": 0.4921920783781287, | |
| "learning_rate": 3.207443380318449e-06, | |
| "loss": 0.1585, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0780281201004982, | |
| "step": 3095, | |
| "valid_targets_mean": 5046.9, | |
| "valid_targets_min": 2910 | |
| }, | |
| { | |
| "epoch": 5.862819299905393, | |
| "grad_norm": 0.5076451745790288, | |
| "learning_rate": 3.1564178616336737e-06, | |
| "loss": 0.1682, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08828535676002502, | |
| "step": 3100, | |
| "valid_targets_mean": 6130.9, | |
| "valid_targets_min": 4471 | |
| }, | |
| { | |
| "epoch": 5.872280037842952, | |
| "grad_norm": 0.4724679364763835, | |
| "learning_rate": 3.1057666805242336e-06, | |
| "loss": 0.1655, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08091306686401367, | |
| "step": 3105, | |
| "valid_targets_mean": 6078.6, | |
| "valid_targets_min": 1741 | |
| }, | |
| { | |
| "epoch": 5.881740775780511, | |
| "grad_norm": 0.4996674681366472, | |
| "learning_rate": 3.055490962679448e-06, | |
| "loss": 0.1573, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07268786430358887, | |
| "step": 3110, | |
| "valid_targets_mean": 5243.2, | |
| "valid_targets_min": 817 | |
| }, | |
| { | |
| "epoch": 5.8912015137180695, | |
| "grad_norm": 0.5291137259397091, | |
| "learning_rate": 3.005591825444194e-06, | |
| "loss": 0.1725, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08995255827903748, | |
| "step": 3115, | |
| "valid_targets_mean": 5615.5, | |
| "valid_targets_min": 2952 | |
| }, | |
| { | |
| "epoch": 5.900662251655629, | |
| "grad_norm": 0.535236288933447, | |
| "learning_rate": 2.956070377794096e-06, | |
| "loss": 0.164, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08721289783716202, | |
| "step": 3120, | |
| "valid_targets_mean": 5925.4, | |
| "valid_targets_min": 3090 | |
| }, | |
| { | |
| "epoch": 5.910122989593188, | |
| "grad_norm": 0.5685850842714396, | |
| "learning_rate": 2.906927720310884e-06, | |
| "loss": 0.1647, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09268994629383087, | |
| "step": 3125, | |
| "valid_targets_mean": 5151.6, | |
| "valid_targets_min": 2762 | |
| }, | |
| { | |
| "epoch": 5.919583727530747, | |
| "grad_norm": 0.5012001406123452, | |
| "learning_rate": 2.8581649451579083e-06, | |
| "loss": 0.1669, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07973651587963104, | |
| "step": 3130, | |
| "valid_targets_mean": 5932.1, | |
| "valid_targets_min": 3486 | |
| }, | |
| { | |
| "epoch": 5.929044465468307, | |
| "grad_norm": 0.5152900354765085, | |
| "learning_rate": 2.809783136055895e-06, | |
| "loss": 0.1592, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0881601944565773, | |
| "step": 3135, | |
| "valid_targets_mean": 5902.5, | |
| "valid_targets_min": 2004 | |
| }, | |
| { | |
| "epoch": 5.938505203405866, | |
| "grad_norm": 0.6871324672441096, | |
| "learning_rate": 2.761783368258852e-06, | |
| "loss": 0.1568, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08433617651462555, | |
| "step": 3140, | |
| "valid_targets_mean": 6727.5, | |
| "valid_targets_min": 4594 | |
| }, | |
| { | |
| "epoch": 5.9479659413434245, | |
| "grad_norm": 0.522395677503641, | |
| "learning_rate": 2.7141667085301593e-06, | |
| "loss": 0.1626, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06753349304199219, | |
| "step": 3145, | |
| "valid_targets_mean": 4657.8, | |
| "valid_targets_min": 1787 | |
| }, | |
| { | |
| "epoch": 5.957426679280984, | |
| "grad_norm": 0.49161260461585427, | |
| "learning_rate": 2.6669342151188704e-06, | |
| "loss": 0.1636, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08413293957710266, | |
| "step": 3150, | |
| "valid_targets_mean": 6002.9, | |
| "valid_targets_min": 3061 | |
| }, | |
| { | |
| "epoch": 5.966887417218543, | |
| "grad_norm": 0.5461851756628411, | |
| "learning_rate": 2.62008693773621e-06, | |
| "loss": 0.1591, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08240652084350586, | |
| "step": 3155, | |
| "valid_targets_mean": 5138.6, | |
| "valid_targets_min": 2077 | |
| }, | |
| { | |
| "epoch": 5.976348155156102, | |
| "grad_norm": 0.5821132136646191, | |
| "learning_rate": 2.573625917532212e-06, | |
| "loss": 0.1598, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07987844944000244, | |
| "step": 3160, | |
| "valid_targets_mean": 5757.9, | |
| "valid_targets_min": 3017 | |
| }, | |
| { | |
| "epoch": 5.985808893093662, | |
| "grad_norm": 0.47968943718028695, | |
| "learning_rate": 2.5275521870726107e-06, | |
| "loss": 0.1642, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08667580038309097, | |
| "step": 3165, | |
| "valid_targets_mean": 5866.8, | |
| "valid_targets_min": 4790 | |
| }, | |
| { | |
| "epoch": 5.995269631031221, | |
| "grad_norm": 0.4965790737728136, | |
| "learning_rate": 2.481866770315866e-06, | |
| "loss": 0.1577, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08443500101566315, | |
| "step": 3170, | |
| "valid_targets_mean": 5814.4, | |
| "valid_targets_min": 4152 | |
| }, | |
| { | |
| "epoch": 6.0037842951750235, | |
| "grad_norm": 0.584119229089462, | |
| "learning_rate": 2.4365706825904335e-06, | |
| "loss": 0.157, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0846308022737503, | |
| "step": 3175, | |
| "valid_targets_mean": 5266.2, | |
| "valid_targets_min": 4242 | |
| }, | |
| { | |
| "epoch": 6.013245033112582, | |
| "grad_norm": 0.5318032612027898, | |
| "learning_rate": 2.391664930572175e-06, | |
| "loss": 0.1588, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07814428210258484, | |
| "step": 3180, | |
| "valid_targets_mean": 5013.9, | |
| "valid_targets_min": 850 | |
| }, | |
| { | |
| "epoch": 6.022705771050142, | |
| "grad_norm": 0.5144448514440982, | |
| "learning_rate": 2.347150512262002e-06, | |
| "loss": 0.158, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07846082746982574, | |
| "step": 3185, | |
| "valid_targets_mean": 5425.6, | |
| "valid_targets_min": 2247 | |
| }, | |
| { | |
| "epoch": 6.032166508987701, | |
| "grad_norm": 0.4646205864187392, | |
| "learning_rate": 2.303028416963693e-06, | |
| "loss": 0.1583, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07777369767427444, | |
| "step": 3190, | |
| "valid_targets_mean": 5879.8, | |
| "valid_targets_min": 1877 | |
| }, | |
| { | |
| "epoch": 6.04162724692526, | |
| "grad_norm": 0.47396458926137663, | |
| "learning_rate": 2.259299625261906e-06, | |
| "loss": 0.1504, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0792025625705719, | |
| "step": 3195, | |
| "valid_targets_mean": 6722.5, | |
| "valid_targets_min": 1546 | |
| }, | |
| { | |
| "epoch": 6.05108798486282, | |
| "grad_norm": 0.45843753096435663, | |
| "learning_rate": 2.2159651090003774e-06, | |
| "loss": 0.1594, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06743055582046509, | |
| "step": 3200, | |
| "valid_targets_mean": 5288.9, | |
| "valid_targets_min": 1868 | |
| }, | |
| { | |
| "epoch": 6.0605487228003785, | |
| "grad_norm": 0.4553963970008123, | |
| "learning_rate": 2.173025831260336e-06, | |
| "loss": 0.1547, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0739666223526001, | |
| "step": 3205, | |
| "valid_targets_mean": 6655.4, | |
| "valid_targets_min": 5252 | |
| }, | |
| { | |
| "epoch": 6.070009460737937, | |
| "grad_norm": 0.49731449399968497, | |
| "learning_rate": 2.1304827463390843e-06, | |
| "loss": 0.1526, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07816332578659058, | |
| "step": 3210, | |
| "valid_targets_mean": 6025.1, | |
| "valid_targets_min": 3547 | |
| }, | |
| { | |
| "epoch": 6.079470198675497, | |
| "grad_norm": 0.4889765047812308, | |
| "learning_rate": 2.088336799728814e-06, | |
| "loss": 0.155, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0794171690940857, | |
| "step": 3215, | |
| "valid_targets_mean": 6134.9, | |
| "valid_targets_min": 4135 | |
| }, | |
| { | |
| "epoch": 6.088930936613056, | |
| "grad_norm": 0.47878941127207625, | |
| "learning_rate": 2.046588928095563e-06, | |
| "loss": 0.1451, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07988835871219635, | |
| "step": 3220, | |
| "valid_targets_mean": 5659.5, | |
| "valid_targets_min": 4115 | |
| }, | |
| { | |
| "epoch": 6.098391674550615, | |
| "grad_norm": 0.4997195025958504, | |
| "learning_rate": 2.005240059258431e-06, | |
| "loss": 0.1472, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0862731784582138, | |
| "step": 3225, | |
| "valid_targets_mean": 5721.6, | |
| "valid_targets_min": 2052 | |
| }, | |
| { | |
| "epoch": 6.107852412488174, | |
| "grad_norm": 0.5692060263245817, | |
| "learning_rate": 1.9642911121689233e-06, | |
| "loss": 0.1652, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07118255645036697, | |
| "step": 3230, | |
| "valid_targets_mean": 3830.5, | |
| "valid_targets_min": 699 | |
| }, | |
| { | |
| "epoch": 6.117313150425733, | |
| "grad_norm": 0.5686969457407623, | |
| "learning_rate": 1.9237429968905586e-06, | |
| "loss": 0.1647, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06574267148971558, | |
| "step": 3235, | |
| "valid_targets_mean": 3772.6, | |
| "valid_targets_min": 2061 | |
| }, | |
| { | |
| "epoch": 6.126773888363292, | |
| "grad_norm": 0.49763597561095985, | |
| "learning_rate": 1.8835966145786222e-06, | |
| "loss": 0.1548, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08373168855905533, | |
| "step": 3240, | |
| "valid_targets_mean": 6183.5, | |
| "valid_targets_min": 2730 | |
| }, | |
| { | |
| "epoch": 6.136234626300851, | |
| "grad_norm": 0.4887902369131846, | |
| "learning_rate": 1.84385285746016e-06, | |
| "loss": 0.1548, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08439414203166962, | |
| "step": 3245, | |
| "valid_targets_mean": 5588.5, | |
| "valid_targets_min": 3357 | |
| }, | |
| { | |
| "epoch": 6.145695364238411, | |
| "grad_norm": 0.4985086513937735, | |
| "learning_rate": 1.8045126088141262e-06, | |
| "loss": 0.1454, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07852502912282944, | |
| "step": 3250, | |
| "valid_targets_mean": 5644.4, | |
| "valid_targets_min": 3873 | |
| }, | |
| { | |
| "epoch": 6.15515610217597, | |
| "grad_norm": 0.533162317600248, | |
| "learning_rate": 1.7655767429517645e-06, | |
| "loss": 0.1515, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08291523158550262, | |
| "step": 3255, | |
| "valid_targets_mean": 5257.0, | |
| "valid_targets_min": 1362 | |
| }, | |
| { | |
| "epoch": 6.164616840113529, | |
| "grad_norm": 0.8163603542581688, | |
| "learning_rate": 1.727046125197185e-06, | |
| "loss": 0.1725, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11358313262462616, | |
| "step": 3260, | |
| "valid_targets_mean": 5585.5, | |
| "valid_targets_min": 3323 | |
| }, | |
| { | |
| "epoch": 6.174077578051088, | |
| "grad_norm": 0.5275817332491582, | |
| "learning_rate": 1.6889216118681107e-06, | |
| "loss": 0.1538, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07413968443870544, | |
| "step": 3265, | |
| "valid_targets_mean": 5179.8, | |
| "valid_targets_min": 1578 | |
| }, | |
| { | |
| "epoch": 6.183538315988647, | |
| "grad_norm": 0.47663914651303735, | |
| "learning_rate": 1.6512040502568761e-06, | |
| "loss": 0.1583, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0781627744436264, | |
| "step": 3270, | |
| "valid_targets_mean": 6326.0, | |
| "valid_targets_min": 2718 | |
| }, | |
| { | |
| "epoch": 6.192999053926206, | |
| "grad_norm": 0.5198414647863658, | |
| "learning_rate": 1.6138942786115653e-06, | |
| "loss": 0.1574, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08433681726455688, | |
| "step": 3275, | |
| "valid_targets_mean": 5501.0, | |
| "valid_targets_min": 1788 | |
| }, | |
| { | |
| "epoch": 6.202459791863765, | |
| "grad_norm": 0.6408667201086448, | |
| "learning_rate": 1.5769931261174055e-06, | |
| "loss": 0.1524, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08084458112716675, | |
| "step": 3280, | |
| "valid_targets_mean": 6616.8, | |
| "valid_targets_min": 1689 | |
| }, | |
| { | |
| "epoch": 6.211920529801325, | |
| "grad_norm": 0.4944308322849977, | |
| "learning_rate": 1.5405014128783236e-06, | |
| "loss": 0.1544, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08102227747440338, | |
| "step": 3285, | |
| "valid_targets_mean": 5647.5, | |
| "valid_targets_min": 2694 | |
| }, | |
| { | |
| "epoch": 6.221381267738884, | |
| "grad_norm": 0.48233444172077344, | |
| "learning_rate": 1.5044199498987456e-06, | |
| "loss": 0.154, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0742645263671875, | |
| "step": 3290, | |
| "valid_targets_mean": 5459.2, | |
| "valid_targets_min": 2468 | |
| }, | |
| { | |
| "epoch": 6.230842005676442, | |
| "grad_norm": 0.5321614423819155, | |
| "learning_rate": 1.4687495390655282e-06, | |
| "loss": 0.1641, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07728166878223419, | |
| "step": 3295, | |
| "valid_targets_mean": 4666.1, | |
| "valid_targets_min": 2232 | |
| }, | |
| { | |
| "epoch": 6.240302743614002, | |
| "grad_norm": 0.45554443433516056, | |
| "learning_rate": 1.4334909731301893e-06, | |
| "loss": 0.1596, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07903093844652176, | |
| "step": 3300, | |
| "valid_targets_mean": 6301.2, | |
| "valid_targets_min": 2023 | |
| }, | |
| { | |
| "epoch": 6.249763481551561, | |
| "grad_norm": 0.4369901640686527, | |
| "learning_rate": 1.398645035691244e-06, | |
| "loss": 0.1539, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07268842309713364, | |
| "step": 3305, | |
| "valid_targets_mean": 6521.2, | |
| "valid_targets_min": 4385 | |
| }, | |
| { | |
| "epoch": 6.25922421948912, | |
| "grad_norm": 0.5144149069493703, | |
| "learning_rate": 1.3642125011768204e-06, | |
| "loss": 0.1625, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0853855162858963, | |
| "step": 3310, | |
| "valid_targets_mean": 4916.5, | |
| "valid_targets_min": 1306 | |
| }, | |
| { | |
| "epoch": 6.26868495742668, | |
| "grad_norm": 0.4662790470481307, | |
| "learning_rate": 1.3301941348274316e-06, | |
| "loss": 0.1538, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06581012159585953, | |
| "step": 3315, | |
| "valid_targets_mean": 5482.5, | |
| "valid_targets_min": 3959 | |
| }, | |
| { | |
| "epoch": 6.2781456953642385, | |
| "grad_norm": 0.543005624322293, | |
| "learning_rate": 1.2965906926789807e-06, | |
| "loss": 0.1609, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08316712081432343, | |
| "step": 3320, | |
| "valid_targets_mean": 5355.4, | |
| "valid_targets_min": 3827 | |
| }, | |
| { | |
| "epoch": 6.287606433301797, | |
| "grad_norm": 0.4642963328041462, | |
| "learning_rate": 1.2634029215459442e-06, | |
| "loss": 0.1546, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07344529032707214, | |
| "step": 3325, | |
| "valid_targets_mean": 6743.8, | |
| "valid_targets_min": 3629 | |
| }, | |
| { | |
| "epoch": 6.297067171239357, | |
| "grad_norm": 0.4729718094257136, | |
| "learning_rate": 1.2306315590047912e-06, | |
| "loss": 0.1578, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08172772079706192, | |
| "step": 3330, | |
| "valid_targets_mean": 6132.5, | |
| "valid_targets_min": 3527 | |
| }, | |
| { | |
| "epoch": 6.306527909176916, | |
| "grad_norm": 0.47615121755084255, | |
| "learning_rate": 1.1982773333775822e-06, | |
| "loss": 0.1501, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06997595727443695, | |
| "step": 3335, | |
| "valid_targets_mean": 6265.9, | |
| "valid_targets_min": 3729 | |
| }, | |
| { | |
| "epoch": 6.315988647114475, | |
| "grad_norm": 0.49720672723911824, | |
| "learning_rate": 1.1663409637157685e-06, | |
| "loss": 0.1719, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08522418141365051, | |
| "step": 3340, | |
| "valid_targets_mean": 6098.0, | |
| "valid_targets_min": 1787 | |
| }, | |
| { | |
| "epoch": 6.325449385052034, | |
| "grad_norm": 0.5893163672320445, | |
| "learning_rate": 1.1348231597842508e-06, | |
| "loss": 0.1652, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07012220472097397, | |
| "step": 3345, | |
| "valid_targets_mean": 5598.1, | |
| "valid_targets_min": 3357 | |
| }, | |
| { | |
| "epoch": 6.334910122989593, | |
| "grad_norm": 0.5056344429691663, | |
| "learning_rate": 1.1037246220455611e-06, | |
| "loss": 0.1603, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07550933957099915, | |
| "step": 3350, | |
| "valid_targets_mean": 5193.8, | |
| "valid_targets_min": 3603 | |
| }, | |
| { | |
| "epoch": 6.344370860927152, | |
| "grad_norm": 0.49788948816850587, | |
| "learning_rate": 1.0730460416443233e-06, | |
| "loss": 0.16, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07792874425649643, | |
| "step": 3355, | |
| "valid_targets_mean": 5297.8, | |
| "valid_targets_min": 1526 | |
| }, | |
| { | |
| "epoch": 6.353831598864711, | |
| "grad_norm": 0.47295359132021303, | |
| "learning_rate": 1.0427881003918783e-06, | |
| "loss": 0.1549, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06006645783782005, | |
| "step": 3360, | |
| "valid_targets_mean": 5776.5, | |
| "valid_targets_min": 1930 | |
| }, | |
| { | |
| "epoch": 6.363292336802271, | |
| "grad_norm": 0.5180446573609132, | |
| "learning_rate": 1.012951470751149e-06, | |
| "loss": 0.1571, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06573215126991272, | |
| "step": 3365, | |
| "valid_targets_mean": 4157.4, | |
| "valid_targets_min": 1603 | |
| }, | |
| { | |
| "epoch": 6.37275307473983, | |
| "grad_norm": 0.5142987786973194, | |
| "learning_rate": 9.835368158216707e-07, | |
| "loss": 0.1567, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0875168964266777, | |
| "step": 3370, | |
| "valid_targets_mean": 5995.5, | |
| "valid_targets_min": 3569 | |
| }, | |
| { | |
| "epoch": 6.382213812677389, | |
| "grad_norm": 0.549315279583512, | |
| "learning_rate": 9.545447893248827e-07, | |
| "loss": 0.1581, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08440011739730835, | |
| "step": 3375, | |
| "valid_targets_mean": 5372.4, | |
| "valid_targets_min": 1524 | |
| }, | |
| { | |
| "epoch": 6.391674550614948, | |
| "grad_norm": 0.4975303100993707, | |
| "learning_rate": 9.259760355895664e-07, | |
| "loss": 0.1594, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08323267102241516, | |
| "step": 3380, | |
| "valid_targets_mean": 5243.5, | |
| "valid_targets_min": 3619 | |
| }, | |
| { | |
| "epoch": 6.401135288552507, | |
| "grad_norm": 0.44933429326711627, | |
| "learning_rate": 8.978311895375569e-07, | |
| "loss": 0.1529, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06366902589797974, | |
| "step": 3385, | |
| "valid_targets_mean": 5606.0, | |
| "valid_targets_min": 771 | |
| }, | |
| { | |
| "epoch": 6.410596026490066, | |
| "grad_norm": 0.5244538230451249, | |
| "learning_rate": 8.701108766696098e-07, | |
| "loss": 0.1617, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08314728736877441, | |
| "step": 3390, | |
| "valid_targets_mean": 5397.2, | |
| "valid_targets_min": 976 | |
| }, | |
| { | |
| "epoch": 6.420056764427625, | |
| "grad_norm": 0.5321046922657727, | |
| "learning_rate": 8.428157130515169e-07, | |
| "loss": 0.1578, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07390115410089493, | |
| "step": 3395, | |
| "valid_targets_mean": 4900.6, | |
| "valid_targets_min": 2668 | |
| }, | |
| { | |
| "epoch": 6.429517502365185, | |
| "grad_norm": 0.4997987625144715, | |
| "learning_rate": 8.159463053004058e-07, | |
| "loss": 0.1599, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07713502645492554, | |
| "step": 3400, | |
| "valid_targets_mean": 5670.5, | |
| "valid_targets_min": 3714 | |
| }, | |
| { | |
| "epoch": 6.438978240302744, | |
| "grad_norm": 0.4783857613174314, | |
| "learning_rate": 7.89503250571253e-07, | |
| "loss": 0.1578, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07338370382785797, | |
| "step": 3405, | |
| "valid_targets_mean": 6021.6, | |
| "valid_targets_min": 4140 | |
| }, | |
| { | |
| "epoch": 6.448438978240302, | |
| "grad_norm": 0.5084014116990427, | |
| "learning_rate": 7.634871365436192e-07, | |
| "loss": 0.1577, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08246712386608124, | |
| "step": 3410, | |
| "valid_targets_mean": 6041.9, | |
| "valid_targets_min": 4465 | |
| }, | |
| { | |
| "epoch": 6.457899716177862, | |
| "grad_norm": 0.5069553293411639, | |
| "learning_rate": 7.378985414085949e-07, | |
| "loss": 0.156, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07229432463645935, | |
| "step": 3415, | |
| "valid_targets_mean": 5251.4, | |
| "valid_targets_min": 4371 | |
| }, | |
| { | |
| "epoch": 6.467360454115421, | |
| "grad_norm": 0.5195346482191306, | |
| "learning_rate": 7.127380338559331e-07, | |
| "loss": 0.1533, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08095690608024597, | |
| "step": 3420, | |
| "valid_targets_mean": 5833.1, | |
| "valid_targets_min": 2788 | |
| }, | |
| { | |
| "epoch": 6.47682119205298, | |
| "grad_norm": 0.44961617162875356, | |
| "learning_rate": 6.880061730614307e-07, | |
| "loss": 0.1556, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06503139436244965, | |
| "step": 3425, | |
| "valid_targets_mean": 5194.5, | |
| "valid_targets_min": 2034 | |
| }, | |
| { | |
| "epoch": 6.48628192999054, | |
| "grad_norm": 0.512212152514479, | |
| "learning_rate": 6.637035086744825e-07, | |
| "loss": 0.1524, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08502800762653351, | |
| "step": 3430, | |
| "valid_targets_mean": 6129.9, | |
| "valid_targets_min": 3237 | |
| }, | |
| { | |
| "epoch": 6.4957426679280985, | |
| "grad_norm": 0.533866271670798, | |
| "learning_rate": 6.398305808058869e-07, | |
| "loss": 0.1512, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.080892413854599, | |
| "step": 3435, | |
| "valid_targets_mean": 5465.8, | |
| "valid_targets_min": 802 | |
| }, | |
| { | |
| "epoch": 6.505203405865657, | |
| "grad_norm": 0.4821294366129078, | |
| "learning_rate": 6.163879200158151e-07, | |
| "loss": 0.1577, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0807921439409256, | |
| "step": 3440, | |
| "valid_targets_mean": 6166.4, | |
| "valid_targets_min": 1645 | |
| }, | |
| { | |
| "epoch": 6.514664143803216, | |
| "grad_norm": 0.47581791967584, | |
| "learning_rate": 5.933760473020411e-07, | |
| "loss": 0.1492, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08440031111240387, | |
| "step": 3445, | |
| "valid_targets_mean": 7176.6, | |
| "valid_targets_min": 2313 | |
| }, | |
| { | |
| "epoch": 6.524124881740776, | |
| "grad_norm": 0.49665456440010425, | |
| "learning_rate": 5.707954740883592e-07, | |
| "loss": 0.152, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07877863943576813, | |
| "step": 3450, | |
| "valid_targets_mean": 6034.4, | |
| "valid_targets_min": 4409 | |
| }, | |
| { | |
| "epoch": 6.533585619678335, | |
| "grad_norm": 0.46914359671651773, | |
| "learning_rate": 5.486467022132114e-07, | |
| "loss": 0.1462, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07220163941383362, | |
| "step": 3455, | |
| "valid_targets_mean": 5201.1, | |
| "valid_targets_min": 3269 | |
| }, | |
| { | |
| "epoch": 6.543046357615894, | |
| "grad_norm": 0.4999247739317717, | |
| "learning_rate": 5.269302239185359e-07, | |
| "loss": 0.161, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06515920162200928, | |
| "step": 3460, | |
| "valid_targets_mean": 4510.6, | |
| "valid_targets_min": 2249 | |
| }, | |
| { | |
| "epoch": 6.5525070955534535, | |
| "grad_norm": 0.49427440693924146, | |
| "learning_rate": 5.056465218388363e-07, | |
| "loss": 0.153, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07092322409152985, | |
| "step": 3465, | |
| "valid_targets_mean": 5033.9, | |
| "valid_targets_min": 1748 | |
| }, | |
| { | |
| "epoch": 6.561967833491012, | |
| "grad_norm": 0.693524396791674, | |
| "learning_rate": 4.847960689904385e-07, | |
| "loss": 0.1533, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06443572044372559, | |
| "step": 3470, | |
| "valid_targets_mean": 5021.1, | |
| "valid_targets_min": 2541 | |
| }, | |
| { | |
| "epoch": 6.571428571428571, | |
| "grad_norm": 0.4652753136072147, | |
| "learning_rate": 4.6437932876099767e-07, | |
| "loss": 0.1529, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07002725452184677, | |
| "step": 3475, | |
| "valid_targets_mean": 5402.4, | |
| "valid_targets_min": 1915 | |
| }, | |
| { | |
| "epoch": 6.580889309366131, | |
| "grad_norm": 0.46451032890247035, | |
| "learning_rate": 4.443967548991857e-07, | |
| "loss": 0.163, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07609806954860687, | |
| "step": 3480, | |
| "valid_targets_mean": 6261.8, | |
| "valid_targets_min": 1548 | |
| }, | |
| { | |
| "epoch": 6.59035004730369, | |
| "grad_norm": 0.4742091398940524, | |
| "learning_rate": 4.2484879150461067e-07, | |
| "loss": 0.1541, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07329309731721878, | |
| "step": 3485, | |
| "valid_targets_mean": 5271.6, | |
| "valid_targets_min": 4216 | |
| }, | |
| { | |
| "epoch": 6.599810785241249, | |
| "grad_norm": 0.4912655014563391, | |
| "learning_rate": 4.0573587301794947e-07, | |
| "loss": 0.1545, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08175991475582123, | |
| "step": 3490, | |
| "valid_targets_mean": 5803.6, | |
| "valid_targets_min": 3450 | |
| }, | |
| { | |
| "epoch": 6.609271523178808, | |
| "grad_norm": 0.47411330765123344, | |
| "learning_rate": 3.870584242112885e-07, | |
| "loss": 0.1547, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08022162318229675, | |
| "step": 3495, | |
| "valid_targets_mean": 6148.5, | |
| "valid_targets_min": 1108 | |
| }, | |
| { | |
| "epoch": 6.618732261116367, | |
| "grad_norm": 0.5467862296497042, | |
| "learning_rate": 3.688168601786912e-07, | |
| "loss": 0.1589, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08909142762422562, | |
| "step": 3500, | |
| "valid_targets_mean": 5511.9, | |
| "valid_targets_min": 2056 | |
| }, | |
| { | |
| "epoch": 6.628192999053926, | |
| "grad_norm": 0.4811284532821337, | |
| "learning_rate": 3.5101158632696584e-07, | |
| "loss": 0.1628, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0686873197555542, | |
| "step": 3505, | |
| "valid_targets_mean": 5850.6, | |
| "valid_targets_min": 3425 | |
| }, | |
| { | |
| "epoch": 6.637653736991485, | |
| "grad_norm": 0.4764682488197729, | |
| "learning_rate": 3.336429983666545e-07, | |
| "loss": 0.159, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08067235350608826, | |
| "step": 3510, | |
| "valid_targets_mean": 6091.9, | |
| "valid_targets_min": 3877 | |
| }, | |
| { | |
| "epoch": 6.647114474929045, | |
| "grad_norm": 0.4894456490714887, | |
| "learning_rate": 3.1671148230324246e-07, | |
| "loss": 0.1523, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07609429210424423, | |
| "step": 3515, | |
| "valid_targets_mean": 6449.8, | |
| "valid_targets_min": 3351 | |
| }, | |
| { | |
| "epoch": 6.656575212866604, | |
| "grad_norm": 0.5064247856959551, | |
| "learning_rate": 3.0021741442857634e-07, | |
| "loss": 0.1568, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07491397857666016, | |
| "step": 3520, | |
| "valid_targets_mean": 5559.9, | |
| "valid_targets_min": 3138 | |
| }, | |
| { | |
| "epoch": 6.6660359508041624, | |
| "grad_norm": 0.5105415474802601, | |
| "learning_rate": 2.8416116131250836e-07, | |
| "loss": 0.1643, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06692364066839218, | |
| "step": 3525, | |
| "valid_targets_mean": 5484.8, | |
| "valid_targets_min": 2488 | |
| }, | |
| { | |
| "epoch": 6.675496688741722, | |
| "grad_norm": 0.5083849374762096, | |
| "learning_rate": 2.6854307979474306e-07, | |
| "loss": 0.1575, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08597913384437561, | |
| "step": 3530, | |
| "valid_targets_mean": 6459.1, | |
| "valid_targets_min": 767 | |
| }, | |
| { | |
| "epoch": 6.684957426679281, | |
| "grad_norm": 0.4342549973357616, | |
| "learning_rate": 2.5336351697690553e-07, | |
| "loss": 0.1585, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0754256471991539, | |
| "step": 3535, | |
| "valid_targets_mean": 5868.1, | |
| "valid_targets_min": 3233 | |
| }, | |
| { | |
| "epoch": 6.69441816461684, | |
| "grad_norm": 0.5062031786978933, | |
| "learning_rate": 2.386228102148347e-07, | |
| "loss": 0.1623, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07034958899021149, | |
| "step": 3540, | |
| "valid_targets_mean": 5718.9, | |
| "valid_targets_min": 2398 | |
| }, | |
| { | |
| "epoch": 6.703878902554399, | |
| "grad_norm": 0.500948493630646, | |
| "learning_rate": 2.2432128711107558e-07, | |
| "loss": 0.1517, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07873102277517319, | |
| "step": 3545, | |
| "valid_targets_mean": 6648.0, | |
| "valid_targets_min": 4973 | |
| }, | |
| { | |
| "epoch": 6.7133396404919585, | |
| "grad_norm": 0.45111358239646104, | |
| "learning_rate": 2.1045926550760988e-07, | |
| "loss": 0.1484, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07121957838535309, | |
| "step": 3550, | |
| "valid_targets_mean": 5770.5, | |
| "valid_targets_min": 3291 | |
| }, | |
| { | |
| "epoch": 6.722800378429517, | |
| "grad_norm": 0.46097603655795455, | |
| "learning_rate": 1.9703705347878355e-07, | |
| "loss": 0.1492, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06768956780433655, | |
| "step": 3555, | |
| "valid_targets_mean": 5942.2, | |
| "valid_targets_min": 3064 | |
| }, | |
| { | |
| "epoch": 6.732261116367076, | |
| "grad_norm": 0.4964329737190062, | |
| "learning_rate": 1.8405494932446366e-07, | |
| "loss": 0.1615, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06672897189855576, | |
| "step": 3560, | |
| "valid_targets_mean": 5239.6, | |
| "valid_targets_min": 1596 | |
| }, | |
| { | |
| "epoch": 6.741721854304636, | |
| "grad_norm": 0.5504838449968519, | |
| "learning_rate": 1.7151324156340355e-07, | |
| "loss": 0.1496, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07267464697360992, | |
| "step": 3565, | |
| "valid_targets_mean": 4650.8, | |
| "valid_targets_min": 2054 | |
| }, | |
| { | |
| "epoch": 6.751182592242195, | |
| "grad_norm": 0.5237161400407134, | |
| "learning_rate": 1.5941220892684572e-07, | |
| "loss": 0.1534, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08781194686889648, | |
| "step": 3570, | |
| "valid_targets_mean": 6332.1, | |
| "valid_targets_min": 4998 | |
| }, | |
| { | |
| "epoch": 6.760643330179754, | |
| "grad_norm": 0.5490099004104453, | |
| "learning_rate": 1.4775212035230691e-07, | |
| "loss": 0.157, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08914782106876373, | |
| "step": 3575, | |
| "valid_targets_mean": 4676.5, | |
| "valid_targets_min": 746 | |
| }, | |
| { | |
| "epoch": 6.7701040681173135, | |
| "grad_norm": 0.4995209315574402, | |
| "learning_rate": 1.3653323497761607e-07, | |
| "loss": 0.1548, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06628039479255676, | |
| "step": 3580, | |
| "valid_targets_mean": 5913.0, | |
| "valid_targets_min": 3502 | |
| }, | |
| { | |
| "epoch": 6.779564806054872, | |
| "grad_norm": 0.513748933958977, | |
| "learning_rate": 1.2575580213514792e-07, | |
| "loss": 0.1619, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08485302329063416, | |
| "step": 3585, | |
| "valid_targets_mean": 6715.2, | |
| "valid_targets_min": 2699 | |
| }, | |
| { | |
| "epoch": 6.789025543992431, | |
| "grad_norm": 0.5698785487991007, | |
| "learning_rate": 1.1542006134628747e-07, | |
| "loss": 0.156, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09485235810279846, | |
| "step": 3590, | |
| "valid_targets_mean": 5735.1, | |
| "valid_targets_min": 1654 | |
| }, | |
| { | |
| "epoch": 6.798486281929991, | |
| "grad_norm": 0.502423517172928, | |
| "learning_rate": 1.0552624231609632e-07, | |
| "loss": 0.1528, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09140385687351227, | |
| "step": 3595, | |
| "valid_targets_mean": 6541.4, | |
| "valid_targets_min": 3404 | |
| }, | |
| { | |
| "epoch": 6.80794701986755, | |
| "grad_norm": 0.5486811174751066, | |
| "learning_rate": 9.607456492822132e-08, | |
| "loss": 0.1592, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06690894812345505, | |
| "step": 3600, | |
| "valid_targets_mean": 4560.6, | |
| "valid_targets_min": 1293 | |
| }, | |
| { | |
| "epoch": 6.815515610217597, | |
| "grad_norm": 0.41971865894051924, | |
| "learning_rate": 8.706523924000066e-08, | |
| "loss": 0.1448, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06027888506650925, | |
| "step": 3605, | |
| "valid_targets_mean": 6555.0, | |
| "valid_targets_min": 2788 | |
| }, | |
| { | |
| "epoch": 6.824976348155156, | |
| "grad_norm": 0.4407706789613994, | |
| "learning_rate": 7.849846547778983e-08, | |
| "loss": 0.1496, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06519781798124313, | |
| "step": 3610, | |
| "valid_targets_mean": 6884.4, | |
| "valid_targets_min": 1265 | |
| }, | |
| { | |
| "epoch": 6.8344370860927155, | |
| "grad_norm": 0.4978073691618193, | |
| "learning_rate": 7.03744340325252e-08, | |
| "loss": 0.1498, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07424402236938477, | |
| "step": 3615, | |
| "valid_targets_mean": 6403.1, | |
| "valid_targets_min": 4674 | |
| }, | |
| { | |
| "epoch": 6.843897824030274, | |
| "grad_norm": 0.5085529248955364, | |
| "learning_rate": 6.269332545548068e-08, | |
| "loss": 0.1473, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08670083433389664, | |
| "step": 3620, | |
| "valid_targets_mean": 4922.5, | |
| "valid_targets_min": 1785 | |
| }, | |
| { | |
| "epoch": 6.853358561967833, | |
| "grad_norm": 0.4344651976137186, | |
| "learning_rate": 5.5455310454259894e-08, | |
| "loss": 0.1514, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07209931313991547, | |
| "step": 3625, | |
| "valid_targets_mean": 7308.1, | |
| "valid_targets_min": 4659 | |
| }, | |
| { | |
| "epoch": 6.862819299905393, | |
| "grad_norm": 0.4413038947876362, | |
| "learning_rate": 4.866054988900581e-08, | |
| "loss": 0.1464, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06930118799209595, | |
| "step": 3630, | |
| "valid_targets_mean": 6234.8, | |
| "valid_targets_min": 976 | |
| }, | |
| { | |
| "epoch": 6.872280037842952, | |
| "grad_norm": 0.48107253716280135, | |
| "learning_rate": 4.230919476881479e-08, | |
| "loss": 0.1487, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06881730258464813, | |
| "step": 3635, | |
| "valid_targets_mean": 5447.9, | |
| "valid_targets_min": 2560 | |
| }, | |
| { | |
| "epoch": 6.881740775780511, | |
| "grad_norm": 0.5104224535773499, | |
| "learning_rate": 3.640138624839695e-08, | |
| "loss": 0.1429, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06860077381134033, | |
| "step": 3640, | |
| "valid_targets_mean": 6469.2, | |
| "valid_targets_min": 3160 | |
| }, | |
| { | |
| "epoch": 6.8912015137180695, | |
| "grad_norm": 0.4870345502815453, | |
| "learning_rate": 3.093725562492544e-08, | |
| "loss": 0.1445, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07753711938858032, | |
| "step": 3645, | |
| "valid_targets_mean": 6092.4, | |
| "valid_targets_min": 2908 | |
| }, | |
| { | |
| "epoch": 6.900662251655629, | |
| "grad_norm": 0.4461190608500686, | |
| "learning_rate": 2.591692433511872e-08, | |
| "loss": 0.148, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.060674458742141724, | |
| "step": 3650, | |
| "valid_targets_mean": 4704.4, | |
| "valid_targets_min": 1739 | |
| }, | |
| { | |
| "epoch": 6.910122989593188, | |
| "grad_norm": 0.46744896878817527, | |
| "learning_rate": 2.1340503952551606e-08, | |
| "loss": 0.1454, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07023951411247253, | |
| "step": 3655, | |
| "valid_targets_mean": 6859.5, | |
| "valid_targets_min": 4199 | |
| }, | |
| { | |
| "epoch": 6.919583727530747, | |
| "grad_norm": 0.4423365813985784, | |
| "learning_rate": 1.720809618516839e-08, | |
| "loss": 0.1392, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05241403728723526, | |
| "step": 3660, | |
| "valid_targets_mean": 5074.6, | |
| "valid_targets_min": 1461 | |
| }, | |
| { | |
| "epoch": 6.929044465468307, | |
| "grad_norm": 0.4940830080464678, | |
| "learning_rate": 1.351979287302463e-08, | |
| "loss": 0.1519, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06597153842449188, | |
| "step": 3665, | |
| "valid_targets_mean": 5494.9, | |
| "valid_targets_min": 1333 | |
| }, | |
| { | |
| "epoch": 6.938505203405866, | |
| "grad_norm": 0.5141965819400358, | |
| "learning_rate": 1.0275675986242128e-08, | |
| "loss": 0.1502, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06885077059268951, | |
| "step": 3670, | |
| "valid_targets_mean": 5493.0, | |
| "valid_targets_min": 1550 | |
| }, | |
| { | |
| "epoch": 6.9479659413434245, | |
| "grad_norm": 0.49214237360203794, | |
| "learning_rate": 7.475817623194826e-09, | |
| "loss": 0.1505, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06208660453557968, | |
| "step": 3675, | |
| "valid_targets_mean": 4805.9, | |
| "valid_targets_min": 699 | |
| }, | |
| { | |
| "epoch": 6.957426679280984, | |
| "grad_norm": 0.5374621708278976, | |
| "learning_rate": 5.120280008901191e-09, | |
| "loss": 0.1467, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.080912746489048, | |
| "step": 3680, | |
| "valid_targets_mean": 5787.4, | |
| "valid_targets_min": 4241 | |
| }, | |
| { | |
| "epoch": 6.966887417218543, | |
| "grad_norm": 0.5296512592742303, | |
| "learning_rate": 3.2091154936386705e-09, | |
| "loss": 0.1559, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06907324492931366, | |
| "step": 3685, | |
| "valid_targets_mean": 5773.0, | |
| "valid_targets_min": 3043 | |
| }, | |
| { | |
| "epoch": 6.976348155156102, | |
| "grad_norm": 0.4419333491426742, | |
| "learning_rate": 1.7423665517868338e-09, | |
| "loss": 0.1487, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05245755612850189, | |
| "step": 3690, | |
| "valid_targets_mean": 5486.1, | |
| "valid_targets_min": 1693 | |
| }, | |
| { | |
| "epoch": 6.985808893093662, | |
| "grad_norm": 0.508769965540501, | |
| "learning_rate": 7.200657808792422e-10, | |
| "loss": 0.1516, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08677756786346436, | |
| "step": 3695, | |
| "valid_targets_mean": 6286.6, | |
| "valid_targets_min": 2001 | |
| }, | |
| { | |
| "epoch": 6.995269631031221, | |
| "grad_norm": 0.47535964660935565, | |
| "learning_rate": 1.4223590088180416e-10, | |
| "loss": 0.1429, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07410024106502533, | |
| "step": 3700, | |
| "valid_targets_mean": 5791.9, | |
| "valid_targets_min": 2377 | |
| }, | |
| { | |
| "epoch": 6.999053926206244, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14962854981422424, | |
| "step": 3702, | |
| "total_flos": 2.2812121694270915e+18, | |
| "train_loss": 0.004161321395542221, | |
| "train_runtime": 3645.582, | |
| "train_samples_per_second": 16.237, | |
| "train_steps_per_second": 1.016, | |
| "valid_targets_mean": 6778.8, | |
| "valid_targets_min": 4131 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 3703, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 7, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.2812121694270915e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |