diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,10101 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 4571, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.007656967840735069, + "grad_norm": 13.997882752776386, + "learning_rate": 3.4934497816593887e-07, + "loss": 0.6676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6494572758674622, + "step": 5, + "valid_targets_mean": 5030.6, + "valid_targets_min": 868 + }, + { + "epoch": 0.015313935681470138, + "grad_norm": 16.042150352702777, + "learning_rate": 7.860262008733626e-07, + "loss": 0.6736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6432679295539856, + "step": 10, + "valid_targets_mean": 5537.1, + "valid_targets_min": 972 + }, + { + "epoch": 0.022970903522205207, + "grad_norm": 13.768207464178623, + "learning_rate": 1.222707423580786e-06, + "loss": 0.657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5933096408843994, + "step": 15, + "valid_targets_mean": 5550.1, + "valid_targets_min": 622 + }, + { + "epoch": 0.030627871362940276, + "grad_norm": 11.169416861656957, + "learning_rate": 1.6593886462882098e-06, + "loss": 0.6091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5853960514068604, + "step": 20, + "valid_targets_mean": 4935.4, + "valid_targets_min": 885 + }, + { + "epoch": 0.03828483920367534, + "grad_norm": 7.70196829557587, + "learning_rate": 2.096069868995633e-06, + "loss": 0.5846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5225365161895752, + "step": 25, + "valid_targets_mean": 4887.4, + "valid_targets_min": 281 + }, + { + "epoch": 0.045941807044410414, + "grad_norm": 4.492171953461492, + "learning_rate": 2.5327510917030567e-06, + "loss": 0.5128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5070335268974304, + "step": 30, + "valid_targets_mean": 5811.4, + "valid_targets_min": 885 + }, + { + "epoch": 0.05359877488514548, + "grad_norm": 2.953910981015742, + "learning_rate": 2.9694323144104806e-06, + "loss": 0.4624, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.46929267048835754, + "step": 35, + "valid_targets_mean": 4496.3, + "valid_targets_min": 578 + }, + { + "epoch": 0.06125574272588055, + "grad_norm": 1.7185069687468981, + "learning_rate": 3.406113537117904e-06, + "loss": 0.4371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.45726278424263, + "step": 40, + "valid_targets_mean": 4568.8, + "valid_targets_min": 588 + }, + { + "epoch": 0.06891271056661562, + "grad_norm": 1.2031548219005852, + "learning_rate": 3.842794759825328e-06, + "loss": 0.4393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40769797563552856, + "step": 45, + "valid_targets_mean": 5177.7, + "valid_targets_min": 660 + }, + { + "epoch": 0.07656967840735068, + "grad_norm": 1.0641842496634077, + "learning_rate": 4.279475982532751e-06, + "loss": 0.4176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4184693992137909, + "step": 50, + "valid_targets_mean": 4836.5, + "valid_targets_min": 461 + }, + { + "epoch": 0.08422664624808576, + "grad_norm": 0.828475734146539, + "learning_rate": 4.716157205240175e-06, + "loss": 0.3962, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39677250385284424, + "step": 55, + "valid_targets_mean": 5396.1, + "valid_targets_min": 1014 + }, + { + "epoch": 0.09188361408882083, + "grad_norm": 0.7394723747598436, + "learning_rate": 5.152838427947598e-06, + "loss": 0.3992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4369763731956482, + "step": 60, + "valid_targets_mean": 5712.2, + "valid_targets_min": 701 + }, + { + "epoch": 0.0995405819295559, + "grad_norm": 0.7090139134911532, + "learning_rate": 5.589519650655022e-06, + "loss": 0.3872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3982025682926178, + "step": 65, + "valid_targets_mean": 5389.4, + "valid_targets_min": 2834 + }, + { + "epoch": 0.10719754977029096, + "grad_norm": 0.593009086861499, + "learning_rate": 6.0262008733624455e-06, + "loss": 0.3983, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3946113586425781, + "step": 70, + "valid_targets_mean": 5376.4, + "valid_targets_min": 2715 + }, + { + "epoch": 0.11485451761102604, + "grad_norm": 0.636095693800591, + "learning_rate": 6.462882096069869e-06, + "loss": 0.3619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32831794023513794, + "step": 75, + "valid_targets_mean": 5254.6, + "valid_targets_min": 966 + }, + { + "epoch": 0.1225114854517611, + "grad_norm": 0.6038700595775229, + "learning_rate": 6.8995633187772934e-06, + "loss": 0.354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3565768003463745, + "step": 80, + "valid_targets_mean": 4155.2, + "valid_targets_min": 629 + }, + { + "epoch": 0.13016845329249618, + "grad_norm": 0.9336428663228459, + "learning_rate": 7.336244541484717e-06, + "loss": 0.3673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34234848618507385, + "step": 85, + "valid_targets_mean": 4569.0, + "valid_targets_min": 750 + }, + { + "epoch": 0.13782542113323124, + "grad_norm": 0.4930514762752864, + "learning_rate": 7.77292576419214e-06, + "loss": 0.326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29909199476242065, + "step": 90, + "valid_targets_mean": 5865.1, + "valid_targets_min": 731 + }, + { + "epoch": 0.14548238897396631, + "grad_norm": 0.5516406220071951, + "learning_rate": 8.209606986899564e-06, + "loss": 0.3488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35991543531417847, + "step": 95, + "valid_targets_mean": 4539.4, + "valid_targets_min": 718 + }, + { + "epoch": 0.15313935681470137, + "grad_norm": 0.512386584582494, + "learning_rate": 8.646288209606988e-06, + "loss": 0.3378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31739532947540283, + "step": 100, + "valid_targets_mean": 5295.0, + "valid_targets_min": 358 + }, + { + "epoch": 0.16079632465543645, + "grad_norm": 0.5553107691519705, + "learning_rate": 9.082969432314411e-06, + "loss": 0.3145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34066033363342285, + "step": 105, + "valid_targets_mean": 4267.7, + "valid_targets_min": 671 + }, + { + "epoch": 0.16845329249617153, + "grad_norm": 0.5564940658094145, + "learning_rate": 9.519650655021835e-06, + "loss": 0.3333, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3298887014389038, + "step": 110, + "valid_targets_mean": 4755.4, + "valid_targets_min": 862 + }, + { + "epoch": 0.17611026033690658, + "grad_norm": 0.5091671224165091, + "learning_rate": 9.956331877729258e-06, + "loss": 0.3392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32190486788749695, + "step": 115, + "valid_targets_mean": 4750.4, + "valid_targets_min": 971 + }, + { + "epoch": 0.18376722817764166, + "grad_norm": 0.4905326318477757, + "learning_rate": 1.0393013100436682e-05, + "loss": 0.3471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3529035151004791, + "step": 120, + "valid_targets_mean": 5538.1, + "valid_targets_min": 935 + }, + { + "epoch": 0.19142419601837674, + "grad_norm": 0.5048261747177855, + "learning_rate": 1.0829694323144107e-05, + "loss": 0.3196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30244866013526917, + "step": 125, + "valid_targets_mean": 4734.5, + "valid_targets_min": 638 + }, + { + "epoch": 0.1990811638591118, + "grad_norm": 0.5361239607380978, + "learning_rate": 1.1266375545851529e-05, + "loss": 0.3294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.308383584022522, + "step": 130, + "valid_targets_mean": 4705.6, + "valid_targets_min": 873 + }, + { + "epoch": 0.20673813169984687, + "grad_norm": 0.46801822029453244, + "learning_rate": 1.1703056768558954e-05, + "loss": 0.2881, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29804354906082153, + "step": 135, + "valid_targets_mean": 5411.8, + "valid_targets_min": 2391 + }, + { + "epoch": 0.21439509954058192, + "grad_norm": 0.47487492624683453, + "learning_rate": 1.2139737991266376e-05, + "loss": 0.3113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26761284470558167, + "step": 140, + "valid_targets_mean": 4593.1, + "valid_targets_min": 564 + }, + { + "epoch": 0.222052067381317, + "grad_norm": 0.4825202183874898, + "learning_rate": 1.2576419213973801e-05, + "loss": 0.3008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27101266384124756, + "step": 145, + "valid_targets_mean": 4800.3, + "valid_targets_min": 720 + }, + { + "epoch": 0.22970903522205208, + "grad_norm": 0.5108014073726733, + "learning_rate": 1.3013100436681223e-05, + "loss": 0.3192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32333505153656006, + "step": 150, + "valid_targets_mean": 4791.2, + "valid_targets_min": 832 + }, + { + "epoch": 0.23736600306278713, + "grad_norm": 0.516438690481993, + "learning_rate": 1.3449781659388648e-05, + "loss": 0.3057, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3009350001811981, + "step": 155, + "valid_targets_mean": 5810.0, + "valid_targets_min": 412 + }, + { + "epoch": 0.2450229709035222, + "grad_norm": 0.5405272774787385, + "learning_rate": 1.388646288209607e-05, + "loss": 0.2992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2833111882209778, + "step": 160, + "valid_targets_mean": 4378.3, + "valid_targets_min": 700 + }, + { + "epoch": 0.25267993874425726, + "grad_norm": 0.48541608893286375, + "learning_rate": 1.4323144104803495e-05, + "loss": 0.3087, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2991043031215668, + "step": 165, + "valid_targets_mean": 5449.2, + "valid_targets_min": 854 + }, + { + "epoch": 0.26033690658499237, + "grad_norm": 0.4369414808456571, + "learning_rate": 1.4759825327510919e-05, + "loss": 0.3029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29198282957077026, + "step": 170, + "valid_targets_mean": 5611.1, + "valid_targets_min": 470 + }, + { + "epoch": 0.2679938744257274, + "grad_norm": 0.4650699347244396, + "learning_rate": 1.5196506550218343e-05, + "loss": 0.2759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2635342478752136, + "step": 175, + "valid_targets_mean": 5909.5, + "valid_targets_min": 904 + }, + { + "epoch": 0.27565084226646247, + "grad_norm": 0.38904002970727797, + "learning_rate": 1.5633187772925766e-05, + "loss": 0.28, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23884126543998718, + "step": 180, + "valid_targets_mean": 6394.3, + "valid_targets_min": 2659 + }, + { + "epoch": 0.2833078101071976, + "grad_norm": 0.4430505165500373, + "learning_rate": 1.6069868995633188e-05, + "loss": 0.2997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2831938862800598, + "step": 185, + "valid_targets_mean": 6014.1, + "valid_targets_min": 2080 + }, + { + "epoch": 0.29096477794793263, + "grad_norm": 0.5388790759052032, + "learning_rate": 1.6506550218340613e-05, + "loss": 0.2739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2641902565956116, + "step": 190, + "valid_targets_mean": 5561.6, + "valid_targets_min": 2686 + }, + { + "epoch": 0.2986217457886677, + "grad_norm": 0.42663945124008945, + "learning_rate": 1.6943231441048035e-05, + "loss": 0.2874, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2681276798248291, + "step": 195, + "valid_targets_mean": 5588.3, + "valid_targets_min": 814 + }, + { + "epoch": 0.30627871362940273, + "grad_norm": 0.4943538108066963, + "learning_rate": 1.737991266375546e-05, + "loss": 0.3049, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2886020839214325, + "step": 200, + "valid_targets_mean": 4703.8, + "valid_targets_min": 837 + }, + { + "epoch": 0.31393568147013784, + "grad_norm": 0.5295715062499827, + "learning_rate": 1.7816593886462882e-05, + "loss": 0.2966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28761541843414307, + "step": 205, + "valid_targets_mean": 4730.3, + "valid_targets_min": 648 + }, + { + "epoch": 0.3215926493108729, + "grad_norm": 0.5017011172561703, + "learning_rate": 1.8253275109170307e-05, + "loss": 0.2838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.292784184217453, + "step": 210, + "valid_targets_mean": 5370.1, + "valid_targets_min": 1679 + }, + { + "epoch": 0.32924961715160794, + "grad_norm": 0.5384473207239906, + "learning_rate": 1.868995633187773e-05, + "loss": 0.2944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2923944890499115, + "step": 215, + "valid_targets_mean": 4821.5, + "valid_targets_min": 561 + }, + { + "epoch": 0.33690658499234305, + "grad_norm": 0.54252385993879, + "learning_rate": 1.9126637554585155e-05, + "loss": 0.2731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25203627347946167, + "step": 220, + "valid_targets_mean": 4703.9, + "valid_targets_min": 862 + }, + { + "epoch": 0.3445635528330781, + "grad_norm": 0.4590346647689083, + "learning_rate": 1.9563318777292576e-05, + "loss": 0.2939, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26956477761268616, + "step": 225, + "valid_targets_mean": 5403.2, + "valid_targets_min": 292 + }, + { + "epoch": 0.35222052067381315, + "grad_norm": 0.591472293986723, + "learning_rate": 2e-05, + "loss": 0.2893, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2686610817909241, + "step": 230, + "valid_targets_mean": 5279.8, + "valid_targets_min": 312 + }, + { + "epoch": 0.35987748851454826, + "grad_norm": 0.5351371337014471, + "learning_rate": 2.0436681222707423e-05, + "loss": 0.2962, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.302329421043396, + "step": 235, + "valid_targets_mean": 4985.8, + "valid_targets_min": 585 + }, + { + "epoch": 0.3675344563552833, + "grad_norm": 0.50551102825952, + "learning_rate": 2.0873362445414852e-05, + "loss": 0.2767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26980355381965637, + "step": 240, + "valid_targets_mean": 4849.7, + "valid_targets_min": 512 + }, + { + "epoch": 0.37519142419601836, + "grad_norm": 0.5476580929537973, + "learning_rate": 2.1310043668122274e-05, + "loss": 0.2783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30016613006591797, + "step": 245, + "valid_targets_mean": 4602.2, + "valid_targets_min": 611 + }, + { + "epoch": 0.38284839203675347, + "grad_norm": 0.538603533015244, + "learning_rate": 2.1746724890829696e-05, + "loss": 0.2917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2950373888015747, + "step": 250, + "valid_targets_mean": 4244.2, + "valid_targets_min": 614 + }, + { + "epoch": 0.3905053598774885, + "grad_norm": 0.5405956170504431, + "learning_rate": 2.2183406113537118e-05, + "loss": 0.2691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28181028366088867, + "step": 255, + "valid_targets_mean": 4765.5, + "valid_targets_min": 625 + }, + { + "epoch": 0.3981623277182236, + "grad_norm": 0.4349349330799182, + "learning_rate": 2.2620087336244546e-05, + "loss": 0.2767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23736879229545593, + "step": 260, + "valid_targets_mean": 5519.2, + "valid_targets_min": 829 + }, + { + "epoch": 0.4058192955589586, + "grad_norm": 0.479699481190532, + "learning_rate": 2.3056768558951968e-05, + "loss": 0.2741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.250985324382782, + "step": 265, + "valid_targets_mean": 4890.2, + "valid_targets_min": 267 + }, + { + "epoch": 0.41347626339969373, + "grad_norm": 0.48110082764216044, + "learning_rate": 2.349344978165939e-05, + "loss": 0.2735, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2574954926967621, + "step": 270, + "valid_targets_mean": 5548.7, + "valid_targets_min": 1079 + }, + { + "epoch": 0.4211332312404288, + "grad_norm": 0.5259890409455534, + "learning_rate": 2.3930131004366812e-05, + "loss": 0.2685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2683018445968628, + "step": 275, + "valid_targets_mean": 5199.2, + "valid_targets_min": 764 + }, + { + "epoch": 0.42879019908116384, + "grad_norm": 0.4872321788705009, + "learning_rate": 2.436681222707424e-05, + "loss": 0.2817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2631978988647461, + "step": 280, + "valid_targets_mean": 5506.2, + "valid_targets_min": 2013 + }, + { + "epoch": 0.43644716692189894, + "grad_norm": 0.49171872502106884, + "learning_rate": 2.4803493449781662e-05, + "loss": 0.2738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2735797166824341, + "step": 285, + "valid_targets_mean": 5573.5, + "valid_targets_min": 701 + }, + { + "epoch": 0.444104134762634, + "grad_norm": 0.5457671973926761, + "learning_rate": 2.5240174672489084e-05, + "loss": 0.2705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.272636353969574, + "step": 290, + "valid_targets_mean": 4353.8, + "valid_targets_min": 851 + }, + { + "epoch": 0.45176110260336905, + "grad_norm": 0.6850256638019349, + "learning_rate": 2.567685589519651e-05, + "loss": 0.2741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25342246890068054, + "step": 295, + "valid_targets_mean": 5045.3, + "valid_targets_min": 585 + }, + { + "epoch": 0.45941807044410415, + "grad_norm": 0.5394245332540767, + "learning_rate": 2.6113537117903935e-05, + "loss": 0.2751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2724701166152954, + "step": 300, + "valid_targets_mean": 4828.3, + "valid_targets_min": 875 + }, + { + "epoch": 0.4670750382848392, + "grad_norm": 0.5175333803982558, + "learning_rate": 2.6550218340611357e-05, + "loss": 0.2564, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2658703625202179, + "step": 305, + "valid_targets_mean": 5024.8, + "valid_targets_min": 301 + }, + { + "epoch": 0.47473200612557426, + "grad_norm": 0.4885980644156084, + "learning_rate": 2.698689956331878e-05, + "loss": 0.2811, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27572572231292725, + "step": 310, + "valid_targets_mean": 4917.0, + "valid_targets_min": 993 + }, + { + "epoch": 0.48238897396630936, + "grad_norm": 0.9846395603780147, + "learning_rate": 2.7423580786026204e-05, + "loss": 0.2814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2755519151687622, + "step": 315, + "valid_targets_mean": 4701.9, + "valid_targets_min": 802 + }, + { + "epoch": 0.4900459418070444, + "grad_norm": 0.5476680571665029, + "learning_rate": 2.786026200873363e-05, + "loss": 0.2835, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31073257327079773, + "step": 320, + "valid_targets_mean": 5116.8, + "valid_targets_min": 806 + }, + { + "epoch": 0.49770290964777947, + "grad_norm": 0.48787777876251054, + "learning_rate": 2.829694323144105e-05, + "loss": 0.2691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24321046471595764, + "step": 325, + "valid_targets_mean": 4564.9, + "valid_targets_min": 591 + }, + { + "epoch": 0.5053598774885145, + "grad_norm": 0.581959700960055, + "learning_rate": 2.8733624454148473e-05, + "loss": 0.2604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2841954827308655, + "step": 330, + "valid_targets_mean": 5433.1, + "valid_targets_min": 538 + }, + { + "epoch": 0.5130168453292496, + "grad_norm": 0.5197427771747507, + "learning_rate": 2.9170305676855898e-05, + "loss": 0.2606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24852226674556732, + "step": 335, + "valid_targets_mean": 5383.6, + "valid_targets_min": 2532 + }, + { + "epoch": 0.5206738131699847, + "grad_norm": 0.5832961166696086, + "learning_rate": 2.960698689956332e-05, + "loss": 0.2628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.283719003200531, + "step": 340, + "valid_targets_mean": 4863.8, + "valid_targets_min": 809 + }, + { + "epoch": 0.5283307810107197, + "grad_norm": 0.5190165410837098, + "learning_rate": 3.0043668122270745e-05, + "loss": 0.2647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2984423041343689, + "step": 345, + "valid_targets_mean": 5248.1, + "valid_targets_min": 472 + }, + { + "epoch": 0.5359877488514548, + "grad_norm": 0.5424715125473839, + "learning_rate": 3.0480349344978167e-05, + "loss": 0.2731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27597424387931824, + "step": 350, + "valid_targets_mean": 4519.0, + "valid_targets_min": 645 + }, + { + "epoch": 0.5436447166921899, + "grad_norm": 0.5792391186945091, + "learning_rate": 3.091703056768559e-05, + "loss": 0.27, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26070481538772583, + "step": 355, + "valid_targets_mean": 4367.1, + "valid_targets_min": 625 + }, + { + "epoch": 0.5513016845329249, + "grad_norm": 0.61377111290995, + "learning_rate": 3.1353711790393014e-05, + "loss": 0.2653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30032479763031006, + "step": 360, + "valid_targets_mean": 4076.1, + "valid_targets_min": 655 + }, + { + "epoch": 0.55895865237366, + "grad_norm": 0.4499780741388462, + "learning_rate": 3.1790393013100436e-05, + "loss": 0.2696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26613035798072815, + "step": 365, + "valid_targets_mean": 5930.6, + "valid_targets_min": 2546 + }, + { + "epoch": 0.5666156202143952, + "grad_norm": 0.6040296111678708, + "learning_rate": 3.2227074235807864e-05, + "loss": 0.2621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26369449496269226, + "step": 370, + "valid_targets_mean": 4356.6, + "valid_targets_min": 767 + }, + { + "epoch": 0.5742725880551302, + "grad_norm": 0.5341830406768366, + "learning_rate": 3.2663755458515286e-05, + "loss": 0.2657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30968743562698364, + "step": 375, + "valid_targets_mean": 5326.2, + "valid_targets_min": 924 + }, + { + "epoch": 0.5819295558958653, + "grad_norm": 0.6231930070736144, + "learning_rate": 3.310043668122271e-05, + "loss": 0.267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29652100801467896, + "step": 380, + "valid_targets_mean": 4574.1, + "valid_targets_min": 313 + }, + { + "epoch": 0.5895865237366003, + "grad_norm": 0.575410318717094, + "learning_rate": 3.353711790393013e-05, + "loss": 0.2579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2684730589389801, + "step": 385, + "valid_targets_mean": 4358.5, + "valid_targets_min": 308 + }, + { + "epoch": 0.5972434915773354, + "grad_norm": 0.4673654478032364, + "learning_rate": 3.397379912663756e-05, + "loss": 0.2557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2228243201971054, + "step": 390, + "valid_targets_mean": 5666.8, + "valid_targets_min": 625 + }, + { + "epoch": 0.6049004594180705, + "grad_norm": 0.48675286049843114, + "learning_rate": 3.441048034934498e-05, + "loss": 0.2616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23089733719825745, + "step": 395, + "valid_targets_mean": 5237.9, + "valid_targets_min": 379 + }, + { + "epoch": 0.6125574272588055, + "grad_norm": 0.560591662575993, + "learning_rate": 3.48471615720524e-05, + "loss": 0.2688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2665928602218628, + "step": 400, + "valid_targets_mean": 4672.1, + "valid_targets_min": 645 + }, + { + "epoch": 0.6202143950995406, + "grad_norm": 0.42731570802442403, + "learning_rate": 3.5283842794759824e-05, + "loss": 0.2403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22624076902866364, + "step": 405, + "valid_targets_mean": 5482.8, + "valid_targets_min": 592 + }, + { + "epoch": 0.6278713629402757, + "grad_norm": 0.5347556972076133, + "learning_rate": 3.572052401746725e-05, + "loss": 0.2643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23603232204914093, + "step": 410, + "valid_targets_mean": 5739.4, + "valid_targets_min": 743 + }, + { + "epoch": 0.6355283307810107, + "grad_norm": 0.5325226811766738, + "learning_rate": 3.6157205240174675e-05, + "loss": 0.2515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2480698823928833, + "step": 415, + "valid_targets_mean": 4790.4, + "valid_targets_min": 1224 + }, + { + "epoch": 0.6431852986217458, + "grad_norm": 0.48048305351290477, + "learning_rate": 3.6593886462882097e-05, + "loss": 0.2573, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24673986434936523, + "step": 420, + "valid_targets_mean": 5280.0, + "valid_targets_min": 949 + }, + { + "epoch": 0.6508422664624809, + "grad_norm": 0.5456250025761155, + "learning_rate": 3.7030567685589525e-05, + "loss": 0.2501, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2480792999267578, + "step": 425, + "valid_targets_mean": 5479.8, + "valid_targets_min": 1915 + }, + { + "epoch": 0.6584992343032159, + "grad_norm": 0.564296936232767, + "learning_rate": 3.746724890829695e-05, + "loss": 0.2549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2615630030632019, + "step": 430, + "valid_targets_mean": 5361.0, + "valid_targets_min": 877 + }, + { + "epoch": 0.666156202143951, + "grad_norm": 0.48996027764636907, + "learning_rate": 3.790393013100437e-05, + "loss": 0.2639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24756395816802979, + "step": 435, + "valid_targets_mean": 4916.1, + "valid_targets_min": 326 + }, + { + "epoch": 0.6738131699846861, + "grad_norm": 0.44623955424804534, + "learning_rate": 3.834061135371179e-05, + "loss": 0.2372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23491841554641724, + "step": 440, + "valid_targets_mean": 5672.4, + "valid_targets_min": 429 + }, + { + "epoch": 0.6814701378254211, + "grad_norm": 0.45284851458486386, + "learning_rate": 3.877729257641922e-05, + "loss": 0.2497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2342417687177658, + "step": 445, + "valid_targets_mean": 5099.3, + "valid_targets_min": 687 + }, + { + "epoch": 0.6891271056661562, + "grad_norm": 0.5324943693738649, + "learning_rate": 3.921397379912664e-05, + "loss": 0.2514, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27402836084365845, + "step": 450, + "valid_targets_mean": 4484.3, + "valid_targets_min": 580 + }, + { + "epoch": 0.6967840735068913, + "grad_norm": 0.4646196239617948, + "learning_rate": 3.965065502183406e-05, + "loss": 0.2696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24006010591983795, + "step": 455, + "valid_targets_mean": 4958.8, + "valid_targets_min": 911 + }, + { + "epoch": 0.7044410413476263, + "grad_norm": 0.4405348727571011, + "learning_rate": 3.9999994165786676e-05, + "loss": 0.2406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22806835174560547, + "step": 460, + "valid_targets_mean": 5592.2, + "valid_targets_min": 2673 + }, + { + "epoch": 0.7120980091883614, + "grad_norm": 0.5562861867740798, + "learning_rate": 3.9999789968677496e-05, + "loss": 0.2741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2417786568403244, + "step": 465, + "valid_targets_mean": 4365.9, + "valid_targets_min": 566 + }, + { + "epoch": 0.7197549770290965, + "grad_norm": 0.5080252815095521, + "learning_rate": 3.999929406430558e-05, + "loss": 0.2568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2633582651615143, + "step": 470, + "valid_targets_mean": 4811.5, + "valid_targets_min": 530 + }, + { + "epoch": 0.7274119448698315, + "grad_norm": 0.5272035775867764, + "learning_rate": 3.999850645990394e-05, + "loss": 0.2524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24008730053901672, + "step": 475, + "valid_targets_mean": 4771.0, + "valid_targets_min": 686 + }, + { + "epoch": 0.7350689127105666, + "grad_norm": 0.4754003904766261, + "learning_rate": 3.999742716696021e-05, + "loss": 0.2617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23725548386573792, + "step": 480, + "valid_targets_mean": 5124.4, + "valid_targets_min": 464 + }, + { + "epoch": 0.7427258805513017, + "grad_norm": 0.6101466292156634, + "learning_rate": 3.999605620121641e-05, + "loss": 0.2598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24445194005966187, + "step": 485, + "valid_targets_mean": 5324.8, + "valid_targets_min": 660 + }, + { + "epoch": 0.7503828483920367, + "grad_norm": 0.4637006971657633, + "learning_rate": 3.9994393582668806e-05, + "loss": 0.2527, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2397041618824005, + "step": 490, + "valid_targets_mean": 5648.9, + "valid_targets_min": 777 + }, + { + "epoch": 0.7580398162327718, + "grad_norm": 0.5016071627691048, + "learning_rate": 3.999243933556753e-05, + "loss": 0.2535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2539515197277069, + "step": 495, + "valid_targets_mean": 4441.4, + "valid_targets_min": 321 + }, + { + "epoch": 0.7656967840735069, + "grad_norm": 0.4754215254300854, + "learning_rate": 3.9990193488416304e-05, + "loss": 0.2622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2377694994211197, + "step": 500, + "valid_targets_mean": 5602.7, + "valid_targets_min": 674 + }, + { + "epoch": 0.7733537519142419, + "grad_norm": 0.4442741247780648, + "learning_rate": 3.9987656073971946e-05, + "loss": 0.2403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2323368787765503, + "step": 505, + "valid_targets_mean": 5325.0, + "valid_targets_min": 841 + }, + { + "epoch": 0.781010719754977, + "grad_norm": 0.499596334503463, + "learning_rate": 3.998482712924397e-05, + "loss": 0.2368, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2620323896408081, + "step": 510, + "valid_targets_mean": 4717.8, + "valid_targets_min": 740 + }, + { + "epoch": 0.7886676875957122, + "grad_norm": 0.4715666857814281, + "learning_rate": 3.9981706695493996e-05, + "loss": 0.2432, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27169761061668396, + "step": 515, + "valid_targets_mean": 5186.8, + "valid_targets_min": 2363 + }, + { + "epoch": 0.7963246554364471, + "grad_norm": 0.5743001328378289, + "learning_rate": 3.997829481823515e-05, + "loss": 0.2699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27855122089385986, + "step": 520, + "valid_targets_mean": 4762.1, + "valid_targets_min": 593 + }, + { + "epoch": 0.8039816232771823, + "grad_norm": 0.47759303305733847, + "learning_rate": 3.997459154723144e-05, + "loss": 0.2535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2423936426639557, + "step": 525, + "valid_targets_mean": 5297.0, + "valid_targets_min": 854 + }, + { + "epoch": 0.8116385911179173, + "grad_norm": 0.46683109553118957, + "learning_rate": 3.9970596936496976e-05, + "loss": 0.2477, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2846606373786926, + "step": 530, + "valid_targets_mean": 6037.3, + "valid_targets_min": 691 + }, + { + "epoch": 0.8192955589586524, + "grad_norm": 0.4707779779562285, + "learning_rate": 3.996631104429521e-05, + "loss": 0.2614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22087471187114716, + "step": 535, + "valid_targets_mean": 5019.9, + "valid_targets_min": 876 + }, + { + "epoch": 0.8269525267993875, + "grad_norm": 0.5479924065007178, + "learning_rate": 3.9961733933138106e-05, + "loss": 0.2394, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23031362891197205, + "step": 540, + "valid_targets_mean": 4613.0, + "valid_targets_min": 2571 + }, + { + "epoch": 0.8346094946401225, + "grad_norm": 0.5706626665016054, + "learning_rate": 3.9956865669785185e-05, + "loss": 0.2591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2653731405735016, + "step": 545, + "valid_targets_mean": 4250.2, + "valid_targets_min": 499 + }, + { + "epoch": 0.8422664624808576, + "grad_norm": 0.45823962344649066, + "learning_rate": 3.9951706325242595e-05, + "loss": 0.2598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.214570090174675, + "step": 550, + "valid_targets_mean": 5601.8, + "valid_targets_min": 1935 + }, + { + "epoch": 0.8499234303215927, + "grad_norm": 0.4798067262429478, + "learning_rate": 3.9946255974762023e-05, + "loss": 0.2574, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22499428689479828, + "step": 555, + "valid_targets_mean": 4887.4, + "valid_targets_min": 559 + }, + { + "epoch": 0.8575803981623277, + "grad_norm": 0.5354869537077036, + "learning_rate": 3.9940514697839654e-05, + "loss": 0.2707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2552046775817871, + "step": 560, + "valid_targets_mean": 4646.8, + "valid_targets_min": 584 + }, + { + "epoch": 0.8652373660030628, + "grad_norm": 0.404422046345916, + "learning_rate": 3.993448257821498e-05, + "loss": 0.2363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2055911421775818, + "step": 565, + "valid_targets_mean": 5405.6, + "valid_targets_min": 2000 + }, + { + "epoch": 0.8728943338437979, + "grad_norm": 0.9512439674975726, + "learning_rate": 3.992815970386956e-05, + "loss": 0.2309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22652213275432587, + "step": 570, + "valid_targets_mean": 5747.8, + "valid_targets_min": 731 + }, + { + "epoch": 0.8805513016845329, + "grad_norm": 0.49581746326271475, + "learning_rate": 3.99215461670258e-05, + "loss": 0.2564, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25263649225234985, + "step": 575, + "valid_targets_mean": 4818.8, + "valid_targets_min": 636 + }, + { + "epoch": 0.888208269525268, + "grad_norm": 0.5193294216924808, + "learning_rate": 3.9914642064145555e-05, + "loss": 0.2306, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24224933981895447, + "step": 580, + "valid_targets_mean": 4697.4, + "valid_targets_min": 768 + }, + { + "epoch": 0.8958652373660031, + "grad_norm": 0.5081023804449541, + "learning_rate": 3.990744749592871e-05, + "loss": 0.2514, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25167709589004517, + "step": 585, + "valid_targets_mean": 5235.0, + "valid_targets_min": 447 + }, + { + "epoch": 0.9035222052067381, + "grad_norm": 0.43150283564796665, + "learning_rate": 3.989996256731178e-05, + "loss": 0.2655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24848490953445435, + "step": 590, + "valid_targets_mean": 5873.1, + "valid_targets_min": 631 + }, + { + "epoch": 0.9111791730474732, + "grad_norm": 0.490082147198131, + "learning_rate": 3.9892187387466286e-05, + "loss": 0.2498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2544364631175995, + "step": 595, + "valid_targets_mean": 5240.1, + "valid_targets_min": 776 + }, + { + "epoch": 0.9188361408882083, + "grad_norm": 0.47301661085243424, + "learning_rate": 3.9884122069797256e-05, + "loss": 0.2388, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23627778887748718, + "step": 600, + "valid_targets_mean": 4547.9, + "valid_targets_min": 1232 + }, + { + "epoch": 0.9264931087289433, + "grad_norm": 0.6020593350379793, + "learning_rate": 3.9875766731941514e-05, + "loss": 0.244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25998401641845703, + "step": 605, + "valid_targets_mean": 5206.4, + "valid_targets_min": 1670 + }, + { + "epoch": 0.9341500765696784, + "grad_norm": 0.4343199562247558, + "learning_rate": 3.986712149576597e-05, + "loss": 0.2343, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21079614758491516, + "step": 610, + "valid_targets_mean": 5721.4, + "valid_targets_min": 661 + }, + { + "epoch": 0.9418070444104135, + "grad_norm": 0.4959240813023885, + "learning_rate": 3.985818648736588e-05, + "loss": 0.2437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2344963252544403, + "step": 615, + "valid_targets_mean": 5268.9, + "valid_targets_min": 645 + }, + { + "epoch": 0.9494640122511485, + "grad_norm": 0.44727712842280476, + "learning_rate": 3.984896183706291e-05, + "loss": 0.2273, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21470850706100464, + "step": 620, + "valid_targets_mean": 4735.4, + "valid_targets_min": 727 + }, + { + "epoch": 0.9571209800918836, + "grad_norm": 0.4770114196646438, + "learning_rate": 3.983944767940339e-05, + "loss": 0.2428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.251364529132843, + "step": 625, + "valid_targets_mean": 5323.6, + "valid_targets_min": 1961 + }, + { + "epoch": 0.9647779479326187, + "grad_norm": 0.5049867980521303, + "learning_rate": 3.98296441531562e-05, + "loss": 0.2433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2861921489238739, + "step": 630, + "valid_targets_mean": 4449.9, + "valid_targets_min": 357 + }, + { + "epoch": 0.9724349157733537, + "grad_norm": 0.4585093358813056, + "learning_rate": 3.9819551401310834e-05, + "loss": 0.2386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2122216820716858, + "step": 635, + "valid_targets_mean": 5522.1, + "valid_targets_min": 886 + }, + { + "epoch": 0.9800918836140888, + "grad_norm": 0.5195599231781355, + "learning_rate": 3.980916957107529e-05, + "loss": 0.2456, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2382555454969406, + "step": 640, + "valid_targets_mean": 4636.5, + "valid_targets_min": 365 + }, + { + "epoch": 0.9877488514548239, + "grad_norm": 0.5243648163119432, + "learning_rate": 3.979849881387393e-05, + "loss": 0.2617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2904030680656433, + "step": 645, + "valid_targets_mean": 4576.8, + "valid_targets_min": 664 + }, + { + "epoch": 0.9954058192955589, + "grad_norm": 0.5320338443965096, + "learning_rate": 3.9787539285345245e-05, + "loss": 0.2519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2518049478530884, + "step": 650, + "valid_targets_mean": 3986.9, + "valid_targets_min": 582 + }, + { + "epoch": 1.003062787136294, + "grad_norm": 0.4543275980951745, + "learning_rate": 3.977629114533963e-05, + "loss": 0.2389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21149328351020813, + "step": 655, + "valid_targets_mean": 4705.1, + "valid_targets_min": 596 + }, + { + "epoch": 1.010719754977029, + "grad_norm": 0.4610185176199923, + "learning_rate": 3.9764754557917e-05, + "loss": 0.2355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2165687382221222, + "step": 660, + "valid_targets_mean": 4701.8, + "valid_targets_min": 757 + }, + { + "epoch": 1.0183767228177643, + "grad_norm": 0.3938486079154482, + "learning_rate": 3.975292969134445e-05, + "loss": 0.2124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19741511344909668, + "step": 665, + "valid_targets_mean": 5695.2, + "valid_targets_min": 763 + }, + { + "epoch": 1.0260336906584993, + "grad_norm": 0.4323156308860325, + "learning_rate": 3.974081671809376e-05, + "loss": 0.2386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22385263442993164, + "step": 670, + "valid_targets_mean": 5580.4, + "valid_targets_min": 3056 + }, + { + "epoch": 1.0336906584992342, + "grad_norm": 0.47472316536136555, + "learning_rate": 3.97284158148389e-05, + "loss": 0.2539, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2535420060157776, + "step": 675, + "valid_targets_mean": 5442.3, + "valid_targets_min": 382 + }, + { + "epoch": 1.0413476263399695, + "grad_norm": 0.462265375894869, + "learning_rate": 3.971572716245344e-05, + "loss": 0.2286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21901056170463562, + "step": 680, + "valid_targets_mean": 5023.1, + "valid_targets_min": 741 + }, + { + "epoch": 1.0490045941807045, + "grad_norm": 0.48248662335215253, + "learning_rate": 3.970275094600794e-05, + "loss": 0.2398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24081990122795105, + "step": 685, + "valid_targets_mean": 4785.9, + "valid_targets_min": 665 + }, + { + "epoch": 1.0566615620214395, + "grad_norm": 0.4832238002221963, + "learning_rate": 3.968948735476721e-05, + "loss": 0.222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2480914294719696, + "step": 690, + "valid_targets_mean": 4995.2, + "valid_targets_min": 307 + }, + { + "epoch": 1.0643185298621747, + "grad_norm": 0.49185121927326225, + "learning_rate": 3.9675936582187574e-05, + "loss": 0.2286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22463348507881165, + "step": 695, + "valid_targets_mean": 4966.1, + "valid_targets_min": 571 + }, + { + "epoch": 1.0719754977029097, + "grad_norm": 0.4852510851215775, + "learning_rate": 3.966209882591404e-05, + "loss": 0.2413, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23765654861927032, + "step": 700, + "valid_targets_mean": 4573.9, + "valid_targets_min": 1632 + }, + { + "epoch": 1.0796324655436447, + "grad_norm": 0.7554692659763621, + "learning_rate": 3.9647974287777444e-05, + "loss": 0.2311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21256719529628754, + "step": 705, + "valid_targets_mean": 5479.7, + "valid_targets_min": 1174 + }, + { + "epoch": 1.0872894333843799, + "grad_norm": 0.5138481666787079, + "learning_rate": 3.9633563173791454e-05, + "loss": 0.2352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24174924194812775, + "step": 710, + "valid_targets_mean": 4438.2, + "valid_targets_min": 709 + }, + { + "epoch": 1.0949464012251149, + "grad_norm": 0.5480222726912267, + "learning_rate": 3.961886569414962e-05, + "loss": 0.2251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23572365939617157, + "step": 715, + "valid_targets_mean": 4192.2, + "valid_targets_min": 393 + }, + { + "epoch": 1.1026033690658499, + "grad_norm": 0.4970895044179645, + "learning_rate": 3.9603882063222254e-05, + "loss": 0.2157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2110937386751175, + "step": 720, + "valid_targets_mean": 4099.9, + "valid_targets_min": 610 + }, + { + "epoch": 1.110260336906585, + "grad_norm": 0.3803758195568299, + "learning_rate": 3.958861249955336e-05, + "loss": 0.2162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19598951935768127, + "step": 725, + "valid_targets_mean": 5967.6, + "valid_targets_min": 319 + }, + { + "epoch": 1.11791730474732, + "grad_norm": 0.44999478220240136, + "learning_rate": 3.957305722585742e-05, + "loss": 0.2272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24348898231983185, + "step": 730, + "valid_targets_mean": 5976.7, + "valid_targets_min": 3547 + }, + { + "epoch": 1.125574272588055, + "grad_norm": 0.6543210798518898, + "learning_rate": 3.955721646901611e-05, + "loss": 0.2363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2491251528263092, + "step": 735, + "valid_targets_mean": 4430.6, + "valid_targets_min": 655 + }, + { + "epoch": 1.13323124042879, + "grad_norm": 0.5116096170224053, + "learning_rate": 3.954109046007506e-05, + "loss": 0.2448, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24491837620735168, + "step": 740, + "valid_targets_mean": 5439.1, + "valid_targets_min": 3023 + }, + { + "epoch": 1.1408882082695253, + "grad_norm": 0.4657142210971288, + "learning_rate": 3.9524679434240426e-05, + "loss": 0.2492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2513515055179596, + "step": 745, + "valid_targets_mean": 5713.0, + "valid_targets_min": 953 + }, + { + "epoch": 1.1485451761102603, + "grad_norm": 0.47461273340221555, + "learning_rate": 3.95079836308755e-05, + "loss": 0.2442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22947482764720917, + "step": 750, + "valid_targets_mean": 4901.0, + "valid_targets_min": 637 + }, + { + "epoch": 1.1562021439509955, + "grad_norm": 0.5357763463044893, + "learning_rate": 3.94910032934972e-05, + "loss": 0.2455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31903350353240967, + "step": 755, + "valid_targets_mean": 4305.5, + "valid_targets_min": 601 + }, + { + "epoch": 1.1638591117917305, + "grad_norm": 0.44975376536296746, + "learning_rate": 3.947373866977251e-05, + "loss": 0.2417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2732940912246704, + "step": 760, + "valid_targets_mean": 5532.4, + "valid_targets_min": 864 + }, + { + "epoch": 1.1715160796324655, + "grad_norm": 0.5172389291640855, + "learning_rate": 3.945619001151487e-05, + "loss": 0.241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2661622166633606, + "step": 765, + "valid_targets_mean": 4083.5, + "valid_targets_min": 316 + }, + { + "epoch": 1.1791730474732005, + "grad_norm": 0.5218749820672336, + "learning_rate": 3.9438357574680536e-05, + "loss": 0.2267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2443704754114151, + "step": 770, + "valid_targets_mean": 4730.9, + "valid_targets_min": 324 + }, + { + "epoch": 1.1868300153139357, + "grad_norm": 0.5132882152134416, + "learning_rate": 3.9420241619364794e-05, + "loss": 0.2348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23841838538646698, + "step": 775, + "valid_targets_mean": 4711.1, + "valid_targets_min": 761 + }, + { + "epoch": 1.1944869831546707, + "grad_norm": 0.4750023702302424, + "learning_rate": 3.940184240979822e-05, + "loss": 0.2236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20796674489974976, + "step": 780, + "valid_targets_mean": 4511.1, + "valid_targets_min": 635 + }, + { + "epoch": 1.202143950995406, + "grad_norm": 0.4336368898063612, + "learning_rate": 3.9383160214342775e-05, + "loss": 0.214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22842389345169067, + "step": 785, + "valid_targets_mean": 5245.5, + "valid_targets_min": 1511 + }, + { + "epoch": 1.209800918836141, + "grad_norm": 0.461955332746975, + "learning_rate": 3.9364195305487926e-05, + "loss": 0.2162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19852107763290405, + "step": 790, + "valid_targets_mean": 4429.8, + "valid_targets_min": 274 + }, + { + "epoch": 1.217457886676876, + "grad_norm": 0.44263849837581704, + "learning_rate": 3.934494795984666e-05, + "loss": 0.2424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2133197784423828, + "step": 795, + "valid_targets_mean": 6112.6, + "valid_targets_min": 2092 + }, + { + "epoch": 1.225114854517611, + "grad_norm": 0.46310194439122904, + "learning_rate": 3.932541845815145e-05, + "loss": 0.2452, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2494787722826004, + "step": 800, + "valid_targets_mean": 5680.5, + "valid_targets_min": 797 + }, + { + "epoch": 1.2327718223583461, + "grad_norm": 0.534237892887851, + "learning_rate": 3.930560708525018e-05, + "loss": 0.2285, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2164071798324585, + "step": 805, + "valid_targets_mean": 4722.7, + "valid_targets_min": 844 + }, + { + "epoch": 1.2404287901990811, + "grad_norm": 0.5447789231267499, + "learning_rate": 3.9285514130101916e-05, + "loss": 0.2291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2128959447145462, + "step": 810, + "valid_targets_mean": 5702.2, + "valid_targets_min": 2581 + }, + { + "epoch": 1.2480857580398161, + "grad_norm": 0.4541670432548241, + "learning_rate": 3.926513988577282e-05, + "loss": 0.23, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24208541214466095, + "step": 815, + "valid_targets_mean": 5627.6, + "valid_targets_min": 756 + }, + { + "epoch": 1.2557427258805514, + "grad_norm": 0.5066058652602967, + "learning_rate": 3.924448464943174e-05, + "loss": 0.2214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20324429869651794, + "step": 820, + "valid_targets_mean": 5178.3, + "valid_targets_min": 590 + }, + { + "epoch": 1.2633996937212864, + "grad_norm": 0.4824685977219757, + "learning_rate": 3.922354872234596e-05, + "loss": 0.2196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21685411036014557, + "step": 825, + "valid_targets_mean": 5625.1, + "valid_targets_min": 2583 + }, + { + "epoch": 1.2710566615620214, + "grad_norm": 0.5000431811675772, + "learning_rate": 3.9202332409876814e-05, + "loss": 0.231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22782090306282043, + "step": 830, + "valid_targets_mean": 4467.5, + "valid_targets_min": 357 + }, + { + "epoch": 1.2787136294027566, + "grad_norm": 0.46323331152102815, + "learning_rate": 3.918083602147515e-05, + "loss": 0.248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2275102138519287, + "step": 835, + "valid_targets_mean": 5634.8, + "valid_targets_min": 867 + }, + { + "epoch": 1.2863705972434916, + "grad_norm": 0.4773797488887894, + "learning_rate": 3.91590598706769e-05, + "loss": 0.2345, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24412119388580322, + "step": 840, + "valid_targets_mean": 4812.3, + "valid_targets_min": 622 + }, + { + "epoch": 1.2940275650842268, + "grad_norm": 0.5076582879336042, + "learning_rate": 3.913700427509847e-05, + "loss": 0.251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2958303689956665, + "step": 845, + "valid_targets_mean": 5836.1, + "valid_targets_min": 727 + }, + { + "epoch": 1.3016845329249618, + "grad_norm": 0.5949340076342235, + "learning_rate": 3.911466955643209e-05, + "loss": 0.2284, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23251014947891235, + "step": 850, + "valid_targets_mean": 5130.4, + "valid_targets_min": 230 + }, + { + "epoch": 1.3093415007656968, + "grad_norm": 0.4670023051943422, + "learning_rate": 3.909205604044119e-05, + "loss": 0.2309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2273961305618286, + "step": 855, + "valid_targets_mean": 5627.8, + "valid_targets_min": 649 + }, + { + "epoch": 1.3169984686064318, + "grad_norm": 0.4559961360039699, + "learning_rate": 3.9069164056955556e-05, + "loss": 0.2226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20973217487335205, + "step": 860, + "valid_targets_mean": 5071.0, + "valid_targets_min": 453 + }, + { + "epoch": 1.324655436447167, + "grad_norm": 1.055965972410453, + "learning_rate": 3.90459939398666e-05, + "loss": 0.2236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2360004484653473, + "step": 865, + "valid_targets_mean": 4669.8, + "valid_targets_min": 709 + }, + { + "epoch": 1.332312404287902, + "grad_norm": 0.4403899930388177, + "learning_rate": 3.902254602712242e-05, + "loss": 0.2311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21914419531822205, + "step": 870, + "valid_targets_mean": 5948.8, + "valid_targets_min": 755 + }, + { + "epoch": 1.339969372128637, + "grad_norm": 0.49919259365975516, + "learning_rate": 3.899882066072296e-05, + "loss": 0.2289, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2369828075170517, + "step": 875, + "valid_targets_mean": 4776.4, + "valid_targets_min": 294 + }, + { + "epoch": 1.3476263399693722, + "grad_norm": 0.465536683491622, + "learning_rate": 3.897481818671493e-05, + "loss": 0.2346, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21700987219810486, + "step": 880, + "valid_targets_mean": 5091.3, + "valid_targets_min": 536 + }, + { + "epoch": 1.3552833078101072, + "grad_norm": 0.38610096562448637, + "learning_rate": 3.895053895518679e-05, + "loss": 0.2316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20991790294647217, + "step": 885, + "valid_targets_mean": 6598.3, + "valid_targets_min": 3662 + }, + { + "epoch": 1.3629402756508422, + "grad_norm": 0.3931810607665223, + "learning_rate": 3.892598332026368e-05, + "loss": 0.2151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20161226391792297, + "step": 890, + "valid_targets_mean": 5230.8, + "valid_targets_min": 874 + }, + { + "epoch": 1.3705972434915774, + "grad_norm": 0.46758214305344503, + "learning_rate": 3.8901151640102214e-05, + "loss": 0.2393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2412872463464737, + "step": 895, + "valid_targets_mean": 5018.2, + "valid_targets_min": 597 + }, + { + "epoch": 1.3782542113323124, + "grad_norm": 0.47535688154677647, + "learning_rate": 3.8876044276885264e-05, + "loss": 0.2195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20891433954238892, + "step": 900, + "valid_targets_mean": 4952.4, + "valid_targets_min": 426 + }, + { + "epoch": 1.3859111791730474, + "grad_norm": 0.3800112743149217, + "learning_rate": 3.885066159681668e-05, + "loss": 0.2212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20787039399147034, + "step": 905, + "valid_targets_mean": 5950.2, + "valid_targets_min": 872 + }, + { + "epoch": 1.3935681470137826, + "grad_norm": 0.5717375079447776, + "learning_rate": 3.882500397011597e-05, + "loss": 0.2334, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2375756800174713, + "step": 910, + "valid_targets_mean": 4286.0, + "valid_targets_min": 648 + }, + { + "epoch": 1.4012251148545176, + "grad_norm": 0.4423799703448592, + "learning_rate": 3.8799071771012865e-05, + "loss": 0.2355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2233145534992218, + "step": 915, + "valid_targets_mean": 5010.4, + "valid_targets_min": 826 + }, + { + "epoch": 1.4088820826952526, + "grad_norm": 0.7674568548996449, + "learning_rate": 3.877286537774187e-05, + "loss": 0.2399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25533410906791687, + "step": 920, + "valid_targets_mean": 5108.3, + "valid_targets_min": 758 + }, + { + "epoch": 1.4165390505359878, + "grad_norm": 0.4672643825009114, + "learning_rate": 3.874638517253676e-05, + "loss": 0.2481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24158219993114471, + "step": 925, + "valid_targets_mean": 4805.2, + "valid_targets_min": 2561 + }, + { + "epoch": 1.4241960183767228, + "grad_norm": 0.4803000711402261, + "learning_rate": 3.871963154162501e-05, + "loss": 0.2359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2522413730621338, + "step": 930, + "valid_targets_mean": 4650.3, + "valid_targets_min": 895 + }, + { + "epoch": 1.4318529862174578, + "grad_norm": 0.4622132223927811, + "learning_rate": 3.869260487522213e-05, + "loss": 0.2223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2327278107404709, + "step": 935, + "valid_targets_mean": 4991.4, + "valid_targets_min": 754 + }, + { + "epoch": 1.439509954058193, + "grad_norm": 0.4466120611595715, + "learning_rate": 3.866530556752601e-05, + "loss": 0.2374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2425045520067215, + "step": 940, + "valid_targets_mean": 4719.2, + "valid_targets_min": 734 + }, + { + "epoch": 1.447166921898928, + "grad_norm": 0.4573816713524243, + "learning_rate": 3.8637734016711144e-05, + "loss": 0.2347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2244962453842163, + "step": 945, + "valid_targets_mean": 4643.3, + "valid_targets_min": 702 + }, + { + "epoch": 1.454823889739663, + "grad_norm": 0.458299903716274, + "learning_rate": 3.860989062492284e-05, + "loss": 0.2369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2286277562379837, + "step": 950, + "valid_targets_mean": 4489.7, + "valid_targets_min": 692 + }, + { + "epoch": 1.462480857580398, + "grad_norm": 0.5032857134165045, + "learning_rate": 3.858177579827133e-05, + "loss": 0.257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26451292634010315, + "step": 955, + "valid_targets_mean": 6107.7, + "valid_targets_min": 926 + }, + { + "epoch": 1.4701378254211332, + "grad_norm": 0.4842909516880842, + "learning_rate": 3.8553389946825896e-05, + "loss": 0.2147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24591636657714844, + "step": 960, + "valid_targets_mean": 4898.8, + "valid_targets_min": 911 + }, + { + "epoch": 1.4777947932618682, + "grad_norm": 0.4073502413011468, + "learning_rate": 3.8524733484608824e-05, + "loss": 0.2195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20459729433059692, + "step": 965, + "valid_targets_mean": 6015.3, + "valid_targets_min": 1521 + }, + { + "epoch": 1.4854517611026035, + "grad_norm": 0.40576274013727376, + "learning_rate": 3.8495806829589416e-05, + "loss": 0.2258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21941059827804565, + "step": 970, + "valid_targets_mean": 5581.0, + "valid_targets_min": 2235 + }, + { + "epoch": 1.4931087289433385, + "grad_norm": 0.6974578241353532, + "learning_rate": 3.8466610403677874e-05, + "loss": 0.2222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19623053073883057, + "step": 975, + "valid_targets_mean": 5069.4, + "valid_targets_min": 313 + }, + { + "epoch": 1.5007656967840735, + "grad_norm": 0.45967960346904296, + "learning_rate": 3.8437144632719136e-05, + "loss": 0.2415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2094549983739853, + "step": 980, + "valid_targets_mean": 4371.3, + "valid_targets_min": 732 + }, + { + "epoch": 1.5084226646248085, + "grad_norm": 0.42898168106530515, + "learning_rate": 3.840740994648669e-05, + "loss": 0.2163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20533907413482666, + "step": 985, + "valid_targets_mean": 4905.2, + "valid_targets_min": 1112 + }, + { + "epoch": 1.5160796324655437, + "grad_norm": 0.5771406355795161, + "learning_rate": 3.837740677867628e-05, + "loss": 0.241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26875361800193787, + "step": 990, + "valid_targets_mean": 4721.5, + "valid_targets_min": 845 + }, + { + "epoch": 1.5237366003062787, + "grad_norm": 0.4902824785618498, + "learning_rate": 3.8347135566899616e-05, + "loss": 0.2474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2583337128162384, + "step": 995, + "valid_targets_mean": 4502.8, + "valid_targets_min": 618 + }, + { + "epoch": 1.5313935681470139, + "grad_norm": 0.44320917141796234, + "learning_rate": 3.831659675267793e-05, + "loss": 0.2282, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20274272561073303, + "step": 1000, + "valid_targets_mean": 4891.5, + "valid_targets_min": 686 + }, + { + "epoch": 1.5390505359877489, + "grad_norm": 0.44737506920889025, + "learning_rate": 3.828579078143561e-05, + "loss": 0.2247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24201388657093048, + "step": 1005, + "valid_targets_mean": 5657.9, + "valid_targets_min": 3308 + }, + { + "epoch": 1.5467075038284839, + "grad_norm": 0.4987453352941786, + "learning_rate": 3.825471810249365e-05, + "loss": 0.2264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23472508788108826, + "step": 1010, + "valid_targets_mean": 5547.6, + "valid_targets_min": 1485 + }, + { + "epoch": 1.5543644716692189, + "grad_norm": 0.48227849169472226, + "learning_rate": 3.822337916906311e-05, + "loss": 0.2288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2520979642868042, + "step": 1015, + "valid_targets_mean": 4113.2, + "valid_targets_min": 629 + }, + { + "epoch": 1.562021439509954, + "grad_norm": 0.40392077613965266, + "learning_rate": 3.8191774438238514e-05, + "loss": 0.2278, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21043488383293152, + "step": 1020, + "valid_targets_mean": 5685.4, + "valid_targets_min": 1845 + }, + { + "epoch": 1.569678407350689, + "grad_norm": 0.4284123643349941, + "learning_rate": 3.815990437099118e-05, + "loss": 0.2188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21044665575027466, + "step": 1025, + "valid_targets_mean": 5459.0, + "valid_targets_min": 720 + }, + { + "epoch": 1.5773353751914243, + "grad_norm": 0.4674790436901617, + "learning_rate": 3.81277694321625e-05, + "loss": 0.241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23535355925559998, + "step": 1030, + "valid_targets_mean": 4770.8, + "valid_targets_min": 714 + }, + { + "epoch": 1.5849923430321593, + "grad_norm": 0.3788371319532689, + "learning_rate": 3.809537009045714e-05, + "loss": 0.2226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20070108771324158, + "step": 1035, + "valid_targets_mean": 5832.2, + "valid_targets_min": 318 + }, + { + "epoch": 1.5926493108728943, + "grad_norm": 0.5196182043035402, + "learning_rate": 3.8062706818436234e-05, + "loss": 0.2395, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26477953791618347, + "step": 1040, + "valid_targets_mean": 3962.2, + "valid_targets_min": 410 + }, + { + "epoch": 1.6003062787136293, + "grad_norm": 0.48358135151926473, + "learning_rate": 3.802978009251046e-05, + "loss": 0.2416, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25546973943710327, + "step": 1045, + "valid_targets_mean": 4565.6, + "valid_targets_min": 940 + }, + { + "epoch": 1.6079632465543645, + "grad_norm": 0.4289401594279837, + "learning_rate": 3.799659039293312e-05, + "loss": 0.2357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2194889783859253, + "step": 1050, + "valid_targets_mean": 5301.4, + "valid_targets_min": 582 + }, + { + "epoch": 1.6156202143950995, + "grad_norm": 0.451232811004955, + "learning_rate": 3.796313820379313e-05, + "loss": 0.2202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2255152463912964, + "step": 1055, + "valid_targets_mean": 5610.7, + "valid_targets_min": 596 + }, + { + "epoch": 1.6232771822358347, + "grad_norm": 0.4193660210903287, + "learning_rate": 3.792942401300792e-05, + "loss": 0.2254, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20200134813785553, + "step": 1060, + "valid_targets_mean": 5545.6, + "valid_targets_min": 528 + }, + { + "epoch": 1.6309341500765697, + "grad_norm": 0.4869469520460917, + "learning_rate": 3.789544831231639e-05, + "loss": 0.219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2149246633052826, + "step": 1065, + "valid_targets_mean": 3840.6, + "valid_targets_min": 294 + }, + { + "epoch": 1.6385911179173047, + "grad_norm": 0.8289501528499609, + "learning_rate": 3.7861211597271655e-05, + "loss": 0.2436, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23823891580104828, + "step": 1070, + "valid_targets_mean": 5751.8, + "valid_targets_min": 3017 + }, + { + "epoch": 1.6462480857580397, + "grad_norm": 0.3834423104766572, + "learning_rate": 3.782671436723389e-05, + "loss": 0.2078, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19142815470695496, + "step": 1075, + "valid_targets_mean": 6619.1, + "valid_targets_min": 3373 + }, + { + "epoch": 1.653905053598775, + "grad_norm": 0.48705512770145654, + "learning_rate": 3.779195712536301e-05, + "loss": 0.2383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23004969954490662, + "step": 1080, + "valid_targets_mean": 4073.9, + "valid_targets_min": 294 + }, + { + "epoch": 1.66156202143951, + "grad_norm": 0.4542717256577794, + "learning_rate": 3.775694037861134e-05, + "loss": 0.2153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2153184711933136, + "step": 1085, + "valid_targets_mean": 5164.8, + "valid_targets_min": 661 + }, + { + "epoch": 1.6692189892802451, + "grad_norm": 0.45851010452010027, + "learning_rate": 3.772166463771619e-05, + "loss": 0.2273, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2138391137123108, + "step": 1090, + "valid_targets_mean": 5276.1, + "valid_targets_min": 1501 + }, + { + "epoch": 1.6768759571209801, + "grad_norm": 0.5506935584762426, + "learning_rate": 3.768613041719247e-05, + "loss": 0.231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22046592831611633, + "step": 1095, + "valid_targets_mean": 4694.3, + "valid_targets_min": 899 + }, + { + "epoch": 1.6845329249617151, + "grad_norm": 0.46507047548755864, + "learning_rate": 3.765033823532514e-05, + "loss": 0.2245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22383110225200653, + "step": 1100, + "valid_targets_mean": 4433.4, + "valid_targets_min": 788 + }, + { + "epoch": 1.6921898928024501, + "grad_norm": 0.5379797863275028, + "learning_rate": 3.7614288614161625e-05, + "loss": 0.2493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27269816398620605, + "step": 1105, + "valid_targets_mean": 3752.3, + "valid_targets_min": 530 + }, + { + "epoch": 1.6998468606431854, + "grad_norm": 0.43058162892643365, + "learning_rate": 3.7577982079504284e-05, + "loss": 0.2123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2153037041425705, + "step": 1110, + "valid_targets_mean": 5146.8, + "valid_targets_min": 706 + }, + { + "epoch": 1.7075038284839203, + "grad_norm": 0.44249590505034403, + "learning_rate": 3.754141916090266e-05, + "loss": 0.2118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20741818845272064, + "step": 1115, + "valid_targets_mean": 5802.1, + "valid_targets_min": 679 + }, + { + "epoch": 1.7151607963246556, + "grad_norm": 0.458772872006437, + "learning_rate": 3.750460039164581e-05, + "loss": 0.2336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24594131112098694, + "step": 1120, + "valid_targets_mean": 4699.8, + "valid_targets_min": 184 + }, + { + "epoch": 1.7228177641653906, + "grad_norm": 0.7672926531697714, + "learning_rate": 3.746752630875448e-05, + "loss": 0.2302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24634478986263275, + "step": 1125, + "valid_targets_mean": 3561.6, + "valid_targets_min": 663 + }, + { + "epoch": 1.7304747320061256, + "grad_norm": 0.43972274729807126, + "learning_rate": 3.743019745297332e-05, + "loss": 0.2311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22277215123176575, + "step": 1130, + "valid_targets_mean": 5361.2, + "valid_targets_min": 1695 + }, + { + "epoch": 1.7381316998468606, + "grad_norm": 0.4516327506576068, + "learning_rate": 3.739261436876296e-05, + "loss": 0.2254, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23480652272701263, + "step": 1135, + "valid_targets_mean": 4729.9, + "valid_targets_min": 635 + }, + { + "epoch": 1.7457886676875956, + "grad_norm": 0.4949615258488163, + "learning_rate": 3.73547776042921e-05, + "loss": 0.2365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2477773129940033, + "step": 1140, + "valid_targets_mean": 4868.1, + "valid_targets_min": 380 + }, + { + "epoch": 1.7534456355283308, + "grad_norm": 0.4717486814671122, + "learning_rate": 3.731668771142946e-05, + "loss": 0.2329, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20434430241584778, + "step": 1145, + "valid_targets_mean": 4753.1, + "valid_targets_min": 661 + }, + { + "epoch": 1.761102603369066, + "grad_norm": 0.43298582473400343, + "learning_rate": 3.727834524573582e-05, + "loss": 0.2248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23231241106987, + "step": 1150, + "valid_targets_mean": 5028.3, + "valid_targets_min": 680 + }, + { + "epoch": 1.768759571209801, + "grad_norm": 0.4520348267564731, + "learning_rate": 3.7239750766455826e-05, + "loss": 0.2288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22502996027469635, + "step": 1155, + "valid_targets_mean": 4861.9, + "valid_targets_min": 571 + }, + { + "epoch": 1.776416539050536, + "grad_norm": 0.5150942320605032, + "learning_rate": 3.720090483650988e-05, + "loss": 0.2385, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2381991147994995, + "step": 1160, + "valid_targets_mean": 4304.5, + "valid_targets_min": 880 + }, + { + "epoch": 1.784073506891271, + "grad_norm": 0.5161348929268305, + "learning_rate": 3.7161808022485935e-05, + "loss": 0.2207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21709483861923218, + "step": 1165, + "valid_targets_mean": 4438.7, + "valid_targets_min": 601 + }, + { + "epoch": 1.791730474732006, + "grad_norm": 0.49665971280240234, + "learning_rate": 3.7122460894631204e-05, + "loss": 0.2245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24442270398139954, + "step": 1170, + "valid_targets_mean": 4556.2, + "valid_targets_min": 594 + }, + { + "epoch": 1.7993874425727412, + "grad_norm": 0.8745968372963754, + "learning_rate": 3.708286402684387e-05, + "loss": 0.227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23762422800064087, + "step": 1175, + "valid_targets_mean": 4245.6, + "valid_targets_min": 739 + }, + { + "epoch": 1.8070444104134764, + "grad_norm": 0.41278379589280434, + "learning_rate": 3.704301799666469e-05, + "loss": 0.2495, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20009422302246094, + "step": 1180, + "valid_targets_mean": 5268.8, + "valid_targets_min": 876 + }, + { + "epoch": 1.8147013782542114, + "grad_norm": 0.48742732045154236, + "learning_rate": 3.700292338526858e-05, + "loss": 0.2343, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24549022316932678, + "step": 1185, + "valid_targets_mean": 4213.9, + "valid_targets_min": 544 + }, + { + "epoch": 1.8223583460949464, + "grad_norm": 0.4712771047324667, + "learning_rate": 3.696258077745616e-05, + "loss": 0.2226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24899449944496155, + "step": 1190, + "valid_targets_mean": 4609.4, + "valid_targets_min": 566 + }, + { + "epoch": 1.8300153139356814, + "grad_norm": 0.44975298875115, + "learning_rate": 3.6921990761645185e-05, + "loss": 0.2287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21963663399219513, + "step": 1195, + "valid_targets_mean": 4574.2, + "valid_targets_min": 534 + }, + { + "epoch": 1.8376722817764164, + "grad_norm": 0.3951327552159224, + "learning_rate": 3.6881153929861995e-05, + "loss": 0.2211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2228214144706726, + "step": 1200, + "valid_targets_mean": 6362.1, + "valid_targets_min": 762 + }, + { + "epoch": 1.8453292496171516, + "grad_norm": 0.42372166371340003, + "learning_rate": 3.684007087773287e-05, + "loss": 0.2252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21967345476150513, + "step": 1205, + "valid_targets_mean": 5326.0, + "valid_targets_min": 998 + }, + { + "epoch": 1.8529862174578868, + "grad_norm": 0.5177828906855314, + "learning_rate": 3.679874220447533e-05, + "loss": 0.2311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23139753937721252, + "step": 1210, + "valid_targets_mean": 3876.4, + "valid_targets_min": 661 + }, + { + "epoch": 1.8606431852986218, + "grad_norm": 0.4257753688447525, + "learning_rate": 3.675716851288942e-05, + "loss": 0.2101, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21256472170352936, + "step": 1215, + "valid_targets_mean": 5345.1, + "valid_targets_min": 743 + }, + { + "epoch": 1.8683001531393568, + "grad_norm": 0.41030398631754234, + "learning_rate": 3.671535040934889e-05, + "loss": 0.2182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21335265040397644, + "step": 1220, + "valid_targets_mean": 4680.1, + "valid_targets_min": 664 + }, + { + "epoch": 1.8759571209800918, + "grad_norm": 0.430169115187478, + "learning_rate": 3.667328850379238e-05, + "loss": 0.2066, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20409463346004486, + "step": 1225, + "valid_targets_mean": 5608.6, + "valid_targets_min": 2533 + }, + { + "epoch": 1.8836140888208268, + "grad_norm": 0.42025014500169766, + "learning_rate": 3.6630983409714494e-05, + "loss": 0.2257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21930241584777832, + "step": 1230, + "valid_targets_mean": 5487.7, + "valid_targets_min": 1754 + }, + { + "epoch": 1.891271056661562, + "grad_norm": 0.5021327369610216, + "learning_rate": 3.6588435744156865e-05, + "loss": 0.2206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19022399187088013, + "step": 1235, + "valid_targets_mean": 5496.7, + "valid_targets_min": 938 + }, + { + "epoch": 1.8989280245022973, + "grad_norm": 0.540321749042443, + "learning_rate": 3.654564612769917e-05, + "loss": 0.2479, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2716904878616333, + "step": 1240, + "valid_targets_mean": 3926.7, + "valid_targets_min": 693 + }, + { + "epoch": 1.9065849923430322, + "grad_norm": 0.4729363531042848, + "learning_rate": 3.650261518445006e-05, + "loss": 0.2127, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21322724223136902, + "step": 1245, + "valid_targets_mean": 5020.2, + "valid_targets_min": 654 + }, + { + "epoch": 1.9142419601837672, + "grad_norm": 0.4446714421133037, + "learning_rate": 3.6459343542038056e-05, + "loss": 0.2233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20671671628952026, + "step": 1250, + "valid_targets_mean": 5615.5, + "valid_targets_min": 2404 + }, + { + "epoch": 1.9218989280245022, + "grad_norm": 0.3944661996414203, + "learning_rate": 3.64158318316024e-05, + "loss": 0.2179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21900799870491028, + "step": 1255, + "valid_targets_mean": 6057.3, + "valid_targets_min": 3739 + }, + { + "epoch": 1.9295558958652372, + "grad_norm": 0.4723508700169769, + "learning_rate": 3.6372080687783864e-05, + "loss": 0.2149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22037751972675323, + "step": 1260, + "valid_targets_mean": 4365.8, + "valid_targets_min": 478 + }, + { + "epoch": 1.9372128637059725, + "grad_norm": 0.45131110819991693, + "learning_rate": 3.632809074871546e-05, + "loss": 0.21, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20661242306232452, + "step": 1265, + "valid_targets_mean": 4677.1, + "valid_targets_min": 494 + }, + { + "epoch": 1.9448698315467075, + "grad_norm": 0.5463896519037112, + "learning_rate": 3.628386265601317e-05, + "loss": 0.2103, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21639251708984375, + "step": 1270, + "valid_targets_mean": 5609.1, + "valid_targets_min": 292 + }, + { + "epoch": 1.9525267993874427, + "grad_norm": 0.47101652563636187, + "learning_rate": 3.623939705476655e-05, + "loss": 0.2423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2689882516860962, + "step": 1275, + "valid_targets_mean": 4807.4, + "valid_targets_min": 941 + }, + { + "epoch": 1.9601837672281777, + "grad_norm": 0.5829538504135483, + "learning_rate": 3.619469459352937e-05, + "loss": 0.2184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19414472579956055, + "step": 1280, + "valid_targets_mean": 5776.8, + "valid_targets_min": 1769 + }, + { + "epoch": 1.9678407350689127, + "grad_norm": 0.47130592895075707, + "learning_rate": 3.614975592431009e-05, + "loss": 0.2167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2240133434534073, + "step": 1285, + "valid_targets_mean": 4178.6, + "valid_targets_min": 393 + }, + { + "epoch": 1.9754977029096477, + "grad_norm": 0.41933414032795246, + "learning_rate": 3.6104581702562406e-05, + "loss": 0.2223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2266603261232376, + "step": 1290, + "valid_targets_mean": 5151.5, + "valid_targets_min": 643 + }, + { + "epoch": 1.9831546707503829, + "grad_norm": 0.4099884275428022, + "learning_rate": 3.605917258717567e-05, + "loss": 0.2189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2012246549129486, + "step": 1295, + "valid_targets_mean": 5393.4, + "valid_targets_min": 2048 + }, + { + "epoch": 1.9908116385911179, + "grad_norm": 0.3989801489279899, + "learning_rate": 3.6013529240465284e-05, + "loss": 0.2089, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19280970096588135, + "step": 1300, + "valid_targets_mean": 5744.2, + "valid_targets_min": 904 + }, + { + "epoch": 1.998468606431853, + "grad_norm": 0.402617048006408, + "learning_rate": 3.596765232816301e-05, + "loss": 0.2278, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2282564640045166, + "step": 1305, + "valid_targets_mean": 5270.4, + "valid_targets_min": 962 + }, + { + "epoch": 2.006125574272588, + "grad_norm": 0.4373173508442768, + "learning_rate": 3.5921542519407305e-05, + "loss": 0.212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20129822194576263, + "step": 1310, + "valid_targets_mean": 5563.7, + "valid_targets_min": 1904 + }, + { + "epoch": 2.013782542113323, + "grad_norm": 0.40992921140400285, + "learning_rate": 3.587520048673354e-05, + "loss": 0.206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1945541501045227, + "step": 1315, + "valid_targets_mean": 5979.3, + "valid_targets_min": 3547 + }, + { + "epoch": 2.021439509954058, + "grad_norm": 0.4970730710034263, + "learning_rate": 3.582862690606419e-05, + "loss": 0.2077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19818368554115295, + "step": 1320, + "valid_targets_mean": 4507.1, + "valid_targets_min": 723 + }, + { + "epoch": 2.029096477794793, + "grad_norm": 0.475172498602143, + "learning_rate": 3.578182245669896e-05, + "loss": 0.2034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19407986104488373, + "step": 1325, + "valid_targets_mean": 4450.6, + "valid_targets_min": 584 + }, + { + "epoch": 2.0367534456355285, + "grad_norm": 0.47059170257977145, + "learning_rate": 3.573478782130494e-05, + "loss": 0.2033, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2392202615737915, + "step": 1330, + "valid_targets_mean": 4831.3, + "valid_targets_min": 1514 + }, + { + "epoch": 2.0444104134762635, + "grad_norm": 0.528011468967076, + "learning_rate": 3.5687523685906535e-05, + "loss": 0.2094, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22966377437114716, + "step": 1335, + "valid_targets_mean": 3790.3, + "valid_targets_min": 753 + }, + { + "epoch": 2.0520673813169985, + "grad_norm": 0.5074078705476603, + "learning_rate": 3.564003073987559e-05, + "loss": 0.2244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24435581266880035, + "step": 1340, + "valid_targets_mean": 4693.4, + "valid_targets_min": 721 + }, + { + "epoch": 2.0597243491577335, + "grad_norm": 0.3844129003728788, + "learning_rate": 3.559230967592123e-05, + "loss": 0.2125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17261341214179993, + "step": 1345, + "valid_targets_mean": 5297.9, + "valid_targets_min": 321 + }, + { + "epoch": 2.0673813169984685, + "grad_norm": 0.4224982802578748, + "learning_rate": 3.554436119007982e-05, + "loss": 0.2004, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2082083821296692, + "step": 1350, + "valid_targets_mean": 4975.2, + "valid_targets_min": 621 + }, + { + "epoch": 2.0750382848392035, + "grad_norm": 0.437850384380176, + "learning_rate": 3.5496185981704775e-05, + "loss": 0.2206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20134752988815308, + "step": 1355, + "valid_targets_mean": 5147.9, + "valid_targets_min": 390 + }, + { + "epoch": 2.082695252679939, + "grad_norm": 0.4207133103422171, + "learning_rate": 3.544778475345639e-05, + "loss": 0.2132, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2060418426990509, + "step": 1360, + "valid_targets_mean": 5071.6, + "valid_targets_min": 752 + }, + { + "epoch": 2.090352220520674, + "grad_norm": 1.3112730233999392, + "learning_rate": 3.539915821129156e-05, + "loss": 0.2027, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2119438499212265, + "step": 1365, + "valid_targets_mean": 5674.6, + "valid_targets_min": 797 + }, + { + "epoch": 2.098009188361409, + "grad_norm": 0.5554558757769965, + "learning_rate": 3.535030706445352e-05, + "loss": 0.2227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21164335310459137, + "step": 1370, + "valid_targets_mean": 4558.6, + "valid_targets_min": 301 + }, + { + "epoch": 2.105666156202144, + "grad_norm": 0.4420544764278569, + "learning_rate": 3.530123202546146e-05, + "loss": 0.2047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2061401605606079, + "step": 1375, + "valid_targets_mean": 5457.4, + "valid_targets_min": 661 + }, + { + "epoch": 2.113323124042879, + "grad_norm": 0.46858657202849646, + "learning_rate": 3.525193381010015e-05, + "loss": 0.2015, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17351192235946655, + "step": 1380, + "valid_targets_mean": 4331.8, + "valid_targets_min": 747 + }, + { + "epoch": 2.120980091883614, + "grad_norm": 0.4114806177904376, + "learning_rate": 3.520241313740954e-05, + "loss": 0.1985, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18135160207748413, + "step": 1385, + "valid_targets_mean": 5179.8, + "valid_targets_min": 788 + }, + { + "epoch": 2.1286370597243494, + "grad_norm": 0.4150389909635014, + "learning_rate": 3.51526707296742e-05, + "loss": 0.2062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21126386523246765, + "step": 1390, + "valid_targets_mean": 5510.6, + "valid_targets_min": 2213 + }, + { + "epoch": 2.1362940275650844, + "grad_norm": 0.4189310905969695, + "learning_rate": 3.510270731241282e-05, + "loss": 0.1986, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2002798169851303, + "step": 1395, + "valid_targets_mean": 6350.9, + "valid_targets_min": 873 + }, + { + "epoch": 2.1439509954058193, + "grad_norm": 0.41400366324510934, + "learning_rate": 3.505252361436765e-05, + "loss": 0.2166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1952013075351715, + "step": 1400, + "valid_targets_mean": 5682.1, + "valid_targets_min": 905 + }, + { + "epoch": 2.1516079632465543, + "grad_norm": 0.49444430141106194, + "learning_rate": 3.500212036749382e-05, + "loss": 0.2159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22853989899158478, + "step": 1405, + "valid_targets_mean": 4394.3, + "valid_targets_min": 693 + }, + { + "epoch": 2.1592649310872893, + "grad_norm": 0.4644344666905227, + "learning_rate": 3.495149830694872e-05, + "loss": 0.202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1914132535457611, + "step": 1410, + "valid_targets_mean": 5267.1, + "valid_targets_min": 583 + }, + { + "epoch": 2.1669218989280243, + "grad_norm": 0.45615206271339814, + "learning_rate": 3.490065817108124e-05, + "loss": 0.1936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20476290583610535, + "step": 1415, + "valid_targets_mean": 4469.4, + "valid_targets_min": 614 + }, + { + "epoch": 2.1745788667687598, + "grad_norm": 0.47393370598028417, + "learning_rate": 3.484960070142102e-05, + "loss": 0.2204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22236575186252594, + "step": 1420, + "valid_targets_mean": 4936.5, + "valid_targets_min": 268 + }, + { + "epoch": 2.1822358346094948, + "grad_norm": 0.4253170924277631, + "learning_rate": 3.4798326642667587e-05, + "loss": 0.2047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18139034509658813, + "step": 1425, + "valid_targets_mean": 4734.9, + "valid_targets_min": 794 + }, + { + "epoch": 2.1898928024502298, + "grad_norm": 0.49911646366760776, + "learning_rate": 3.474683674267959e-05, + "loss": 0.1984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20690619945526123, + "step": 1430, + "valid_targets_mean": 5103.0, + "valid_targets_min": 889 + }, + { + "epoch": 2.1975497702909648, + "grad_norm": 0.49119882276790566, + "learning_rate": 3.469513175246379e-05, + "loss": 0.1907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1835339367389679, + "step": 1435, + "valid_targets_mean": 4820.6, + "valid_targets_min": 301 + }, + { + "epoch": 2.2052067381316998, + "grad_norm": 0.5154475611308721, + "learning_rate": 3.464321242616418e-05, + "loss": 0.2189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19209043681621552, + "step": 1440, + "valid_targets_mean": 4761.6, + "valid_targets_min": 357 + }, + { + "epoch": 2.2128637059724348, + "grad_norm": 0.45164627691834747, + "learning_rate": 3.459107952105091e-05, + "loss": 0.2153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1891171634197235, + "step": 1445, + "valid_targets_mean": 5681.1, + "valid_targets_min": 251 + }, + { + "epoch": 2.22052067381317, + "grad_norm": 0.5335767275763915, + "learning_rate": 3.4538733797509355e-05, + "loss": 0.2146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2298070639371872, + "step": 1450, + "valid_targets_mean": 4577.5, + "valid_targets_min": 884 + }, + { + "epoch": 2.228177641653905, + "grad_norm": 0.4679949420186815, + "learning_rate": 3.44861760190289e-05, + "loss": 0.208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19955570995807648, + "step": 1455, + "valid_targets_mean": 4832.8, + "valid_targets_min": 240 + }, + { + "epoch": 2.23583460949464, + "grad_norm": 0.42095307623333295, + "learning_rate": 3.443340695219188e-05, + "loss": 0.1979, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1974397748708725, + "step": 1460, + "valid_targets_mean": 5598.1, + "valid_targets_min": 877 + }, + { + "epoch": 2.243491577335375, + "grad_norm": 0.4745463301945368, + "learning_rate": 3.4380427366662376e-05, + "loss": 0.1934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1813713014125824, + "step": 1465, + "valid_targets_mean": 5202.5, + "valid_targets_min": 727 + }, + { + "epoch": 2.25114854517611, + "grad_norm": 0.39487703795653906, + "learning_rate": 3.432723803517501e-05, + "loss": 0.2099, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1820264607667923, + "step": 1470, + "valid_targets_mean": 6077.2, + "valid_targets_min": 3521 + }, + { + "epoch": 2.258805513016845, + "grad_norm": 0.5769539475945872, + "learning_rate": 3.427383973352363e-05, + "loss": 0.2001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2072950303554535, + "step": 1475, + "valid_targets_mean": 3227.7, + "valid_targets_min": 559 + }, + { + "epoch": 2.26646248085758, + "grad_norm": 0.4288473452558018, + "learning_rate": 3.422023324055005e-05, + "loss": 0.195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17287051677703857, + "step": 1480, + "valid_targets_mean": 5125.8, + "valid_targets_min": 2542 + }, + { + "epoch": 2.2741194486983156, + "grad_norm": 0.4123498873195868, + "learning_rate": 3.4166419338132636e-05, + "loss": 0.2029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2294299602508545, + "step": 1485, + "valid_targets_mean": 5843.1, + "valid_targets_min": 719 + }, + { + "epoch": 2.2817764165390506, + "grad_norm": 0.4730663649794135, + "learning_rate": 3.411239881117494e-05, + "loss": 0.2045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22474753856658936, + "step": 1490, + "valid_targets_mean": 4959.0, + "valid_targets_min": 1705 + }, + { + "epoch": 2.2894333843797856, + "grad_norm": 0.3875549685911907, + "learning_rate": 3.4058172447594255e-05, + "loss": 0.1889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1825382262468338, + "step": 1495, + "valid_targets_mean": 6264.6, + "valid_targets_min": 580 + }, + { + "epoch": 2.2970903522205206, + "grad_norm": 0.4656618705136615, + "learning_rate": 3.400374103831007e-05, + "loss": 0.216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21203726530075073, + "step": 1500, + "valid_targets_mean": 4578.1, + "valid_targets_min": 871 + }, + { + "epoch": 2.3047473200612556, + "grad_norm": 0.5002570246078484, + "learning_rate": 3.394910537723259e-05, + "loss": 0.215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21089878678321838, + "step": 1505, + "valid_targets_mean": 4343.4, + "valid_targets_min": 962 + }, + { + "epoch": 2.312404287901991, + "grad_norm": 0.452438730699335, + "learning_rate": 3.3894266261251163e-05, + "loss": 0.1995, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18554329872131348, + "step": 1510, + "valid_targets_mean": 4893.6, + "valid_targets_min": 872 + }, + { + "epoch": 2.320061255742726, + "grad_norm": 0.45774747263893145, + "learning_rate": 3.3839224490222594e-05, + "loss": 0.1945, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18411816656589508, + "step": 1515, + "valid_targets_mean": 4389.2, + "valid_targets_min": 292 + }, + { + "epoch": 2.327718223583461, + "grad_norm": 0.4396767692269071, + "learning_rate": 3.378398086695954e-05, + "loss": 0.1967, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19679993391036987, + "step": 1520, + "valid_targets_mean": 4803.2, + "valid_targets_min": 1214 + }, + { + "epoch": 2.335375191424196, + "grad_norm": 0.49013046313660535, + "learning_rate": 3.372853619721876e-05, + "loss": 0.2095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20178654789924622, + "step": 1525, + "valid_targets_mean": 4873.6, + "valid_targets_min": 591 + }, + { + "epoch": 2.343032159264931, + "grad_norm": 0.3922824720718304, + "learning_rate": 3.367289128968939e-05, + "loss": 0.1947, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16767504811286926, + "step": 1530, + "valid_targets_mean": 6036.3, + "valid_targets_min": 1865 + }, + { + "epoch": 2.350689127105666, + "grad_norm": 0.5498579719590082, + "learning_rate": 3.361704695598115e-05, + "loss": 0.2136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22038382291793823, + "step": 1535, + "valid_targets_mean": 5159.2, + "valid_targets_min": 968 + }, + { + "epoch": 2.358346094946401, + "grad_norm": 0.4055300698729176, + "learning_rate": 3.3561004010612466e-05, + "loss": 0.2244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19593411684036255, + "step": 1540, + "valid_targets_mean": 5894.4, + "valid_targets_min": 303 + }, + { + "epoch": 2.3660030627871365, + "grad_norm": 0.3932581284453595, + "learning_rate": 3.3504763270998634e-05, + "loss": 0.2148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20369234681129456, + "step": 1545, + "valid_targets_mean": 6333.2, + "valid_targets_min": 2885 + }, + { + "epoch": 2.3736600306278715, + "grad_norm": 0.47076653510770716, + "learning_rate": 3.344832555743988e-05, + "loss": 0.1924, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21554359793663025, + "step": 1550, + "valid_targets_mean": 4756.6, + "valid_targets_min": 315 + }, + { + "epoch": 2.3813169984686064, + "grad_norm": 0.48873017026939236, + "learning_rate": 3.33916916931094e-05, + "loss": 0.2117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22236159443855286, + "step": 1555, + "valid_targets_mean": 4995.3, + "valid_targets_min": 587 + }, + { + "epoch": 2.3889739663093414, + "grad_norm": 0.4125303912579736, + "learning_rate": 3.3334862504041336e-05, + "loss": 0.1906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20083507895469666, + "step": 1560, + "valid_targets_mean": 5715.2, + "valid_targets_min": 313 + }, + { + "epoch": 2.3966309341500764, + "grad_norm": 0.3853433874516468, + "learning_rate": 3.327783881911876e-05, + "loss": 0.1948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17324942350387573, + "step": 1565, + "valid_targets_mean": 6147.9, + "valid_targets_min": 4254 + }, + { + "epoch": 2.404287901990812, + "grad_norm": 0.4211655279711124, + "learning_rate": 3.322062147006156e-05, + "loss": 0.193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18347764015197754, + "step": 1570, + "valid_targets_mean": 5185.8, + "valid_targets_min": 281 + }, + { + "epoch": 2.411944869831547, + "grad_norm": 0.648964580388655, + "learning_rate": 3.3163211291414304e-05, + "loss": 0.1974, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19629991054534912, + "step": 1575, + "valid_targets_mean": 4286.4, + "valid_targets_min": 858 + }, + { + "epoch": 2.419601837672282, + "grad_norm": 0.4962453331492355, + "learning_rate": 3.310560912053409e-05, + "loss": 0.1945, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2081303745508194, + "step": 1580, + "valid_targets_mean": 4552.9, + "valid_targets_min": 825 + }, + { + "epoch": 2.427258805513017, + "grad_norm": 0.4555354604900568, + "learning_rate": 3.304781579757833e-05, + "loss": 0.2094, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17186057567596436, + "step": 1585, + "valid_targets_mean": 5026.6, + "valid_targets_min": 308 + }, + { + "epoch": 2.434915773353752, + "grad_norm": 0.4285448366359402, + "learning_rate": 3.298983216549248e-05, + "loss": 0.2224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2216184288263321, + "step": 1590, + "valid_targets_mean": 5668.2, + "valid_targets_min": 912 + }, + { + "epoch": 2.442572741194487, + "grad_norm": 0.5893928303619657, + "learning_rate": 3.2931659069997735e-05, + "loss": 0.2046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21629182994365692, + "step": 1595, + "valid_targets_mean": 3893.3, + "valid_targets_min": 304 + }, + { + "epoch": 2.450229709035222, + "grad_norm": 0.48884082069491913, + "learning_rate": 3.287329735957874e-05, + "loss": 0.215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2368190437555313, + "step": 1600, + "valid_targets_mean": 4901.4, + "valid_targets_min": 1007 + }, + { + "epoch": 2.4578866768759573, + "grad_norm": 0.52785052949108, + "learning_rate": 3.281474788547118e-05, + "loss": 0.2159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22330242395401, + "step": 1605, + "valid_targets_mean": 4853.1, + "valid_targets_min": 648 + }, + { + "epoch": 2.4655436447166923, + "grad_norm": 0.3987288953747758, + "learning_rate": 3.275601150164935e-05, + "loss": 0.206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2003602832555771, + "step": 1610, + "valid_targets_mean": 5620.3, + "valid_targets_min": 2039 + }, + { + "epoch": 2.4732006125574273, + "grad_norm": 0.47491465305943875, + "learning_rate": 3.269708906481374e-05, + "loss": 0.1866, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1774139255285263, + "step": 1615, + "valid_targets_mean": 4716.2, + "valid_targets_min": 713 + }, + { + "epoch": 2.4808575803981623, + "grad_norm": 0.44594046656836617, + "learning_rate": 3.263798143437851e-05, + "loss": 0.2193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20561468601226807, + "step": 1620, + "valid_targets_mean": 5183.3, + "valid_targets_min": 794 + }, + { + "epoch": 2.4885145482388973, + "grad_norm": 0.42727062717581415, + "learning_rate": 3.2578689472458976e-05, + "loss": 0.1963, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18722905218601227, + "step": 1625, + "valid_targets_mean": 5382.0, + "valid_targets_min": 553 + }, + { + "epoch": 2.4961715160796323, + "grad_norm": 0.5834450873795829, + "learning_rate": 3.251921404385901e-05, + "loss": 0.2102, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2354370504617691, + "step": 1630, + "valid_targets_mean": 4581.8, + "valid_targets_min": 678 + }, + { + "epoch": 2.5038284839203673, + "grad_norm": 0.5345211557255457, + "learning_rate": 3.245955601605845e-05, + "loss": 0.2198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2337321937084198, + "step": 1635, + "valid_targets_mean": 4913.5, + "valid_targets_min": 728 + }, + { + "epoch": 2.5114854517611027, + "grad_norm": 0.37333243741751854, + "learning_rate": 3.239971625920043e-05, + "loss": 0.208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19104315340518951, + "step": 1640, + "valid_targets_mean": 5793.1, + "valid_targets_min": 2257 + }, + { + "epoch": 2.5191424196018377, + "grad_norm": 0.5550955042373348, + "learning_rate": 3.23396956460787e-05, + "loss": 0.2156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21975022554397583, + "step": 1645, + "valid_targets_mean": 3863.6, + "valid_targets_min": 293 + }, + { + "epoch": 2.5267993874425727, + "grad_norm": 0.4228650870100245, + "learning_rate": 3.2279495052124884e-05, + "loss": 0.1913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1825849711894989, + "step": 1650, + "valid_targets_mean": 5644.5, + "valid_targets_min": 2558 + }, + { + "epoch": 2.5344563552833077, + "grad_norm": 0.46523951256002843, + "learning_rate": 3.2219115355395745e-05, + "loss": 0.2126, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20776934921741486, + "step": 1655, + "valid_targets_mean": 4995.9, + "valid_targets_min": 573 + }, + { + "epoch": 2.5421133231240427, + "grad_norm": 0.4225684699080134, + "learning_rate": 3.2158557436560317e-05, + "loss": 0.1996, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19467893242835999, + "step": 1660, + "valid_targets_mean": 4794.1, + "valid_targets_min": 671 + }, + { + "epoch": 2.549770290964778, + "grad_norm": 0.4533547241905163, + "learning_rate": 3.2097822178887114e-05, + "loss": 0.2058, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20666176080703735, + "step": 1665, + "valid_targets_mean": 5426.8, + "valid_targets_min": 3917 + }, + { + "epoch": 2.557427258805513, + "grad_norm": 0.7691377736423072, + "learning_rate": 3.203691046823124e-05, + "loss": 0.228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2768425941467285, + "step": 1670, + "valid_targets_mean": 4918.1, + "valid_targets_min": 660 + }, + { + "epoch": 2.565084226646248, + "grad_norm": 0.4647968234361939, + "learning_rate": 3.197582319302143e-05, + "loss": 0.2048, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2030567228794098, + "step": 1675, + "valid_targets_mean": 4793.9, + "valid_targets_min": 898 + }, + { + "epoch": 2.572741194486983, + "grad_norm": 0.47503096589440647, + "learning_rate": 3.191456124424715e-05, + "loss": 0.2104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20649227499961853, + "step": 1680, + "valid_targets_mean": 4376.4, + "valid_targets_min": 627 + }, + { + "epoch": 2.580398162327718, + "grad_norm": 0.38967161506770254, + "learning_rate": 3.185312551544553e-05, + "loss": 0.2027, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16294370591640472, + "step": 1685, + "valid_targets_mean": 5400.3, + "valid_targets_min": 306 + }, + { + "epoch": 2.5880551301684536, + "grad_norm": 0.4137207933362876, + "learning_rate": 3.179151690268842e-05, + "loss": 0.2182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20800091326236725, + "step": 1690, + "valid_targets_mean": 5615.9, + "valid_targets_min": 1817 + }, + { + "epoch": 2.595712098009188, + "grad_norm": 0.43845263427784364, + "learning_rate": 3.1729736304569216e-05, + "loss": 0.1951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1801062524318695, + "step": 1695, + "valid_targets_mean": 5320.6, + "valid_targets_min": 494 + }, + { + "epoch": 2.6033690658499236, + "grad_norm": 0.4583531820739542, + "learning_rate": 3.1667784622189866e-05, + "loss": 0.201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2043706327676773, + "step": 1700, + "valid_targets_mean": 4630.8, + "valid_targets_min": 625 + }, + { + "epoch": 2.6110260336906586, + "grad_norm": 0.5834610593137702, + "learning_rate": 3.160566275914763e-05, + "loss": 0.2091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1968202143907547, + "step": 1705, + "valid_targets_mean": 4307.7, + "valid_targets_min": 328 + }, + { + "epoch": 2.6186830015313936, + "grad_norm": 0.5472036514053488, + "learning_rate": 3.154337162152196e-05, + "loss": 0.2111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23707276582717896, + "step": 1710, + "valid_targets_mean": 3378.8, + "valid_targets_min": 593 + }, + { + "epoch": 2.6263399693721285, + "grad_norm": 0.44150272015060515, + "learning_rate": 3.148091211786126e-05, + "loss": 0.208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21165896952152252, + "step": 1715, + "valid_targets_mean": 5223.7, + "valid_targets_min": 365 + }, + { + "epoch": 2.6339969372128635, + "grad_norm": 0.5441798770208954, + "learning_rate": 3.141828515916963e-05, + "loss": 0.1955, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19593748450279236, + "step": 1720, + "valid_targets_mean": 5229.9, + "valid_targets_min": 928 + }, + { + "epoch": 2.641653905053599, + "grad_norm": 0.49587618350763246, + "learning_rate": 3.135549165889361e-05, + "loss": 0.2112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22288836538791656, + "step": 1725, + "valid_targets_mean": 4101.1, + "valid_targets_min": 274 + }, + { + "epoch": 2.649310872894334, + "grad_norm": 0.4169833894502891, + "learning_rate": 3.1292532532908814e-05, + "loss": 0.1954, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19745498895645142, + "step": 1730, + "valid_targets_mean": 5347.1, + "valid_targets_min": 288 + }, + { + "epoch": 2.656967840735069, + "grad_norm": 0.4322133840167338, + "learning_rate": 3.12294086995066e-05, + "loss": 0.1932, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17262643575668335, + "step": 1735, + "valid_targets_mean": 4846.4, + "valid_targets_min": 2380 + }, + { + "epoch": 2.664624808575804, + "grad_norm": 0.5366545162742864, + "learning_rate": 3.116612107938068e-05, + "loss": 0.2199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22421014308929443, + "step": 1740, + "valid_targets_mean": 4210.1, + "valid_targets_min": 594 + }, + { + "epoch": 2.672281776416539, + "grad_norm": 0.45585126107024354, + "learning_rate": 3.1102670595613654e-05, + "loss": 0.2011, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19032257795333862, + "step": 1745, + "valid_targets_mean": 4588.5, + "valid_targets_min": 713 + }, + { + "epoch": 2.679938744257274, + "grad_norm": 0.5669095838367388, + "learning_rate": 3.10390581736636e-05, + "loss": 0.1995, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20808973908424377, + "step": 1750, + "valid_targets_mean": 4542.7, + "valid_targets_min": 637 + }, + { + "epoch": 2.687595712098009, + "grad_norm": 0.42969387483485466, + "learning_rate": 3.0975284741350535e-05, + "loss": 0.2043, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.184463769197464, + "step": 1755, + "valid_targets_mean": 5571.9, + "valid_targets_min": 899 + }, + { + "epoch": 2.6952526799387444, + "grad_norm": 0.4688512511006581, + "learning_rate": 3.091135122884289e-05, + "loss": 0.2072, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21699786186218262, + "step": 1760, + "valid_targets_mean": 5923.4, + "valid_targets_min": 851 + }, + { + "epoch": 2.7029096477794794, + "grad_norm": 0.48031854884765524, + "learning_rate": 3.084725856864395e-05, + "loss": 0.2133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21168527007102966, + "step": 1765, + "valid_targets_mean": 5652.4, + "valid_targets_min": 996 + }, + { + "epoch": 2.7105666156202144, + "grad_norm": 0.3909694462296888, + "learning_rate": 3.078300769557827e-05, + "loss": 0.2139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16804447770118713, + "step": 1770, + "valid_targets_mean": 5812.6, + "valid_targets_min": 669 + }, + { + "epoch": 2.7182235834609494, + "grad_norm": 0.44319625905779303, + "learning_rate": 3.0718599546778e-05, + "loss": 0.2029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21331599354743958, + "step": 1775, + "valid_targets_mean": 4729.1, + "valid_targets_min": 706 + }, + { + "epoch": 2.7258805513016844, + "grad_norm": 0.4495408359076918, + "learning_rate": 3.065403506166925e-05, + "loss": 0.2083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20311203598976135, + "step": 1780, + "valid_targets_mean": 5382.8, + "valid_targets_min": 2506 + }, + { + "epoch": 2.73353751914242, + "grad_norm": 0.46444461555955624, + "learning_rate": 3.058931518195834e-05, + "loss": 0.207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21994590759277344, + "step": 1785, + "valid_targets_mean": 4179.7, + "valid_targets_min": 685 + }, + { + "epoch": 2.741194486983155, + "grad_norm": 0.591047392543951, + "learning_rate": 3.052444085161818e-05, + "loss": 0.2112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20447468757629395, + "step": 1790, + "valid_targets_mean": 5012.3, + "valid_targets_min": 685 + }, + { + "epoch": 2.74885145482389, + "grad_norm": 0.46171591187608774, + "learning_rate": 3.0459413016874334e-05, + "loss": 0.2035, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22244763374328613, + "step": 1795, + "valid_targets_mean": 4043.6, + "valid_targets_min": 747 + }, + { + "epoch": 2.756508422664625, + "grad_norm": 0.4755204959145075, + "learning_rate": 3.039423262619137e-05, + "loss": 0.2217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23722028732299805, + "step": 1800, + "valid_targets_mean": 4548.3, + "valid_targets_min": 1499 + }, + { + "epoch": 2.76416539050536, + "grad_norm": 0.43118554325602115, + "learning_rate": 3.0328900630258924e-05, + "loss": 0.2163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20272132754325867, + "step": 1805, + "valid_targets_mean": 5041.2, + "valid_targets_min": 940 + }, + { + "epoch": 2.771822358346095, + "grad_norm": 0.40532349261110173, + "learning_rate": 3.02634179819779e-05, + "loss": 0.2001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18425460159778595, + "step": 1810, + "valid_targets_mean": 5047.2, + "valid_targets_min": 635 + }, + { + "epoch": 2.77947932618683, + "grad_norm": 0.40870340425176643, + "learning_rate": 3.0197785636446516e-05, + "loss": 0.1994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19097676873207092, + "step": 1815, + "valid_targets_mean": 5106.1, + "valid_targets_min": 360 + }, + { + "epoch": 2.7871362940275652, + "grad_norm": 0.4466837704739254, + "learning_rate": 3.0132004550946438e-05, + "loss": 0.2052, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24000149965286255, + "step": 1820, + "valid_targets_mean": 5685.8, + "valid_targets_min": 603 + }, + { + "epoch": 2.7947932618683002, + "grad_norm": 0.4157130523321726, + "learning_rate": 3.006607568492875e-05, + "loss": 0.2277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20618754625320435, + "step": 1825, + "valid_targets_mean": 5365.9, + "valid_targets_min": 344 + }, + { + "epoch": 2.8024502297090352, + "grad_norm": 0.45846280599364925, + "learning_rate": 3.0000000000000004e-05, + "loss": 0.1982, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20795312523841858, + "step": 1830, + "valid_targets_mean": 5361.0, + "valid_targets_min": 975 + }, + { + "epoch": 2.8101071975497702, + "grad_norm": 0.42423011488107815, + "learning_rate": 2.9933778459908178e-05, + "loss": 0.1997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2021748125553131, + "step": 1835, + "valid_targets_mean": 5163.9, + "valid_targets_min": 846 + }, + { + "epoch": 2.8177641653905052, + "grad_norm": 0.5922093821939035, + "learning_rate": 2.986741203052863e-05, + "loss": 0.2007, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2244371771812439, + "step": 1840, + "valid_targets_mean": 5787.4, + "valid_targets_min": 732 + }, + { + "epoch": 2.8254211332312407, + "grad_norm": 0.41160121668642163, + "learning_rate": 2.9800901679849993e-05, + "loss": 0.1953, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20778921246528625, + "step": 1845, + "valid_targets_mean": 5501.7, + "valid_targets_min": 663 + }, + { + "epoch": 2.8330781010719757, + "grad_norm": 0.4427215800064586, + "learning_rate": 2.9734248377960072e-05, + "loss": 0.2139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19352000951766968, + "step": 1850, + "valid_targets_mean": 5902.9, + "valid_targets_min": 478 + }, + { + "epoch": 2.8407350689127107, + "grad_norm": 0.4807115826614534, + "learning_rate": 2.9667453097031695e-05, + "loss": 0.211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24455857276916504, + "step": 1855, + "valid_targets_mean": 4574.5, + "valid_targets_min": 774 + }, + { + "epoch": 2.8483920367534457, + "grad_norm": 0.5374002834819784, + "learning_rate": 2.9600516811308516e-05, + "loss": 0.2138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22875338792800903, + "step": 1860, + "valid_targets_mean": 4644.6, + "valid_targets_min": 659 + }, + { + "epoch": 2.8560490045941807, + "grad_norm": 0.5090559947438412, + "learning_rate": 2.953344049709082e-05, + "loss": 0.1961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19830799102783203, + "step": 1865, + "valid_targets_mean": 4229.7, + "valid_targets_min": 827 + }, + { + "epoch": 2.8637059724349156, + "grad_norm": 0.45737005237173484, + "learning_rate": 2.9466225132721285e-05, + "loss": 0.2102, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20132991671562195, + "step": 1870, + "valid_targets_mean": 4853.1, + "valid_targets_min": 792 + }, + { + "epoch": 2.8713629402756506, + "grad_norm": 0.40896772467478815, + "learning_rate": 2.9398871698570706e-05, + "loss": 0.2006, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20456016063690186, + "step": 1875, + "valid_targets_mean": 5263.5, + "valid_targets_min": 364 + }, + { + "epoch": 2.879019908116386, + "grad_norm": 0.46658454506306835, + "learning_rate": 2.9331381177023685e-05, + "loss": 0.1953, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20330816507339478, + "step": 1880, + "valid_targets_mean": 4714.1, + "valid_targets_min": 1107 + }, + { + "epoch": 2.886676875957121, + "grad_norm": 0.45821454150634894, + "learning_rate": 2.9263754552464338e-05, + "loss": 0.2046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19405120611190796, + "step": 1885, + "valid_targets_mean": 4531.8, + "valid_targets_min": 353 + }, + { + "epoch": 2.894333843797856, + "grad_norm": 0.5547254247208179, + "learning_rate": 2.9195992811261897e-05, + "loss": 0.2065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23308220505714417, + "step": 1890, + "valid_targets_mean": 3445.6, + "valid_targets_min": 270 + }, + { + "epoch": 2.901990811638591, + "grad_norm": 0.45421334304878075, + "learning_rate": 2.912809694175634e-05, + "loss": 0.2187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2073233723640442, + "step": 1895, + "valid_targets_mean": 4847.8, + "valid_targets_min": 610 + }, + { + "epoch": 2.909647779479326, + "grad_norm": 0.528454036702243, + "learning_rate": 2.906006793424398e-05, + "loss": 0.2079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21321921050548553, + "step": 1900, + "valid_targets_mean": 3648.8, + "valid_targets_min": 680 + }, + { + "epoch": 2.9173047473200615, + "grad_norm": 0.41825864046575006, + "learning_rate": 2.8991906780963014e-05, + "loss": 0.194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19795027375221252, + "step": 1905, + "valid_targets_mean": 5839.6, + "valid_targets_min": 2864 + }, + { + "epoch": 2.924961715160796, + "grad_norm": 0.41700524250275683, + "learning_rate": 2.8923614476079053e-05, + "loss": 0.2008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20309601724147797, + "step": 1910, + "valid_targets_mean": 5556.4, + "valid_targets_min": 1212 + }, + { + "epoch": 2.9326186830015315, + "grad_norm": 0.41390443168776353, + "learning_rate": 2.885519201567063e-05, + "loss": 0.2072, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19842462241649628, + "step": 1915, + "valid_targets_mean": 5288.7, + "valid_targets_min": 966 + }, + { + "epoch": 2.9402756508422665, + "grad_norm": 0.44113143651113673, + "learning_rate": 2.878664039771466e-05, + "loss": 0.2055, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1972970962524414, + "step": 1920, + "valid_targets_mean": 4468.6, + "valid_targets_min": 724 + }, + { + "epoch": 2.9479326186830015, + "grad_norm": 0.42348693430683976, + "learning_rate": 2.8717960622071875e-05, + "loss": 0.1982, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2290765643119812, + "step": 1925, + "valid_targets_mean": 5191.4, + "valid_targets_min": 1011 + }, + { + "epoch": 2.9555895865237365, + "grad_norm": 0.40281713608998554, + "learning_rate": 2.8649153690472258e-05, + "loss": 0.1955, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18338808417320251, + "step": 1930, + "valid_targets_mean": 5284.5, + "valid_targets_min": 734 + }, + { + "epoch": 2.9632465543644715, + "grad_norm": 0.4313394629221367, + "learning_rate": 2.858022060650045e-05, + "loss": 0.2046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20001380145549774, + "step": 1935, + "valid_targets_mean": 4850.2, + "valid_targets_min": 294 + }, + { + "epoch": 2.970903522205207, + "grad_norm": 0.4330030636250435, + "learning_rate": 2.851116237558106e-05, + "loss": 0.2071, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19025284051895142, + "step": 1940, + "valid_targets_mean": 4625.9, + "valid_targets_min": 313 + }, + { + "epoch": 2.978560490045942, + "grad_norm": 0.4537727728009908, + "learning_rate": 2.8441980004964035e-05, + "loss": 0.2165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22758567333221436, + "step": 1945, + "valid_targets_mean": 5108.7, + "valid_targets_min": 908 + }, + { + "epoch": 2.986217457886677, + "grad_norm": 0.5070336992907599, + "learning_rate": 2.8372674503709988e-05, + "loss": 0.2076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22816471755504608, + "step": 1950, + "valid_targets_mean": 4112.2, + "valid_targets_min": 731 + }, + { + "epoch": 2.993874425727412, + "grad_norm": 0.5222307071784804, + "learning_rate": 2.8303246882675422e-05, + "loss": 0.2063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.230756014585495, + "step": 1955, + "valid_targets_mean": 4718.6, + "valid_targets_min": 822 + }, + { + "epoch": 3.001531393568147, + "grad_norm": 0.44388401663627725, + "learning_rate": 2.8233698154498042e-05, + "loss": 0.2, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21255435049533844, + "step": 1960, + "valid_targets_mean": 5199.9, + "valid_targets_min": 538 + }, + { + "epoch": 3.009188361408882, + "grad_norm": 0.49805799320752814, + "learning_rate": 2.8164029333581964e-05, + "loss": 0.1928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17959743738174438, + "step": 1965, + "valid_targets_mean": 4954.9, + "valid_targets_min": 685 + }, + { + "epoch": 3.0168453292496173, + "grad_norm": 0.451755079299758, + "learning_rate": 2.809424143608289e-05, + "loss": 0.2047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1869395226240158, + "step": 1970, + "valid_targets_mean": 5274.4, + "valid_targets_min": 538 + }, + { + "epoch": 3.0245022970903523, + "grad_norm": 0.4237197860277832, + "learning_rate": 2.802433547989336e-05, + "loss": 0.1855, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1696140170097351, + "step": 1975, + "valid_targets_mean": 5256.1, + "valid_targets_min": 869 + }, + { + "epoch": 3.0321592649310873, + "grad_norm": 0.48674324881062975, + "learning_rate": 2.7954312484627824e-05, + "loss": 0.1877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1883959323167801, + "step": 1980, + "valid_targets_mean": 5084.5, + "valid_targets_min": 702 + }, + { + "epoch": 3.0398162327718223, + "grad_norm": 0.49321268064240037, + "learning_rate": 2.788417347160783e-05, + "loss": 0.1898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20648150146007538, + "step": 1985, + "valid_targets_mean": 4628.4, + "valid_targets_min": 682 + }, + { + "epoch": 3.0474732006125573, + "grad_norm": 0.541476424950113, + "learning_rate": 2.7813919463847094e-05, + "loss": 0.1906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19818390905857086, + "step": 1990, + "valid_targets_mean": 4582.2, + "valid_targets_min": 680 + }, + { + "epoch": 3.0551301684532923, + "grad_norm": 0.4501064317153738, + "learning_rate": 2.7743551486036588e-05, + "loss": 0.1948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18310943245887756, + "step": 1995, + "valid_targets_mean": 5253.2, + "valid_targets_min": 846 + }, + { + "epoch": 3.0627871362940278, + "grad_norm": 0.47833852227635554, + "learning_rate": 2.7673070564529606e-05, + "loss": 0.1746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1747608482837677, + "step": 2000, + "valid_targets_mean": 5987.4, + "valid_targets_min": 661 + }, + { + "epoch": 3.0704441041347628, + "grad_norm": 0.44582266307962753, + "learning_rate": 2.7602477727326764e-05, + "loss": 0.1831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20294205844402313, + "step": 2005, + "valid_targets_mean": 5650.4, + "valid_targets_min": 345 + }, + { + "epoch": 3.0781010719754978, + "grad_norm": 0.4439541326450507, + "learning_rate": 2.7531774004061057e-05, + "loss": 0.1956, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18605002760887146, + "step": 2010, + "valid_targets_mean": 5244.9, + "valid_targets_min": 736 + }, + { + "epoch": 3.0857580398162328, + "grad_norm": 0.8975814777494563, + "learning_rate": 2.746096042598279e-05, + "loss": 0.1872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1635829508304596, + "step": 2015, + "valid_targets_mean": 4843.5, + "valid_targets_min": 776 + }, + { + "epoch": 3.0934150076569678, + "grad_norm": 0.4399161374325912, + "learning_rate": 2.739003802594456e-05, + "loss": 0.179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19688273966312408, + "step": 2020, + "valid_targets_mean": 5672.7, + "valid_targets_min": 580 + }, + { + "epoch": 3.1010719754977027, + "grad_norm": 0.4325179298872115, + "learning_rate": 2.7319007838386177e-05, + "loss": 0.1818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1865837574005127, + "step": 2025, + "valid_targets_mean": 5061.4, + "valid_targets_min": 1569 + }, + { + "epoch": 3.108728943338438, + "grad_norm": 0.5041193832066779, + "learning_rate": 2.724787089931962e-05, + "loss": 0.1907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17695216834545135, + "step": 2030, + "valid_targets_mean": 4700.2, + "valid_targets_min": 635 + }, + { + "epoch": 3.116385911179173, + "grad_norm": 0.6636882689104748, + "learning_rate": 2.7176628246313864e-05, + "loss": 0.1833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17805099487304688, + "step": 2035, + "valid_targets_mean": 5752.7, + "valid_targets_min": 274 + }, + { + "epoch": 3.124042879019908, + "grad_norm": 0.4991191331475684, + "learning_rate": 2.7105280918479775e-05, + "loss": 0.1787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18973186612129211, + "step": 2040, + "valid_targets_mean": 5067.0, + "valid_targets_min": 706 + }, + { + "epoch": 3.131699846860643, + "grad_norm": 0.4629329569115915, + "learning_rate": 2.7033829956454992e-05, + "loss": 0.193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22558261454105377, + "step": 2045, + "valid_targets_mean": 4791.7, + "valid_targets_min": 880 + }, + { + "epoch": 3.139356814701378, + "grad_norm": 0.4955623156729709, + "learning_rate": 2.696227640238867e-05, + "loss": 0.1806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18404105305671692, + "step": 2050, + "valid_targets_mean": 4818.3, + "valid_targets_min": 294 + }, + { + "epoch": 3.147013782542113, + "grad_norm": 0.40572235426798386, + "learning_rate": 2.6890621299926337e-05, + "loss": 0.1846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16386070847511292, + "step": 2055, + "valid_targets_mean": 5887.9, + "valid_targets_min": 854 + }, + { + "epoch": 3.1546707503828486, + "grad_norm": 0.4822367465005572, + "learning_rate": 2.681886569419467e-05, + "loss": 0.1872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1752379685640335, + "step": 2060, + "valid_targets_mean": 4959.8, + "valid_targets_min": 559 + }, + { + "epoch": 3.1623277182235836, + "grad_norm": 0.42573426451272567, + "learning_rate": 2.674701063178621e-05, + "loss": 0.1962, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1862529069185257, + "step": 2065, + "valid_targets_mean": 5424.7, + "valid_targets_min": 924 + }, + { + "epoch": 3.1699846860643186, + "grad_norm": 0.555365775012984, + "learning_rate": 2.6675057160744157e-05, + "loss": 0.1824, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2169274091720581, + "step": 2070, + "valid_targets_mean": 4135.9, + "valid_targets_min": 665 + }, + { + "epoch": 3.1776416539050536, + "grad_norm": 0.49960455879913235, + "learning_rate": 2.660300633054703e-05, + "loss": 0.1881, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19349941611289978, + "step": 2075, + "valid_targets_mean": 4799.2, + "valid_targets_min": 692 + }, + { + "epoch": 3.1852986217457886, + "grad_norm": 0.4401708479718189, + "learning_rate": 2.653085919209339e-05, + "loss": 0.1859, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19372834265232086, + "step": 2080, + "valid_targets_mean": 5538.8, + "valid_targets_min": 2930 + }, + { + "epoch": 3.1929555895865236, + "grad_norm": 0.40630670332354507, + "learning_rate": 2.64586167976865e-05, + "loss": 0.1785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18609587848186493, + "step": 2085, + "valid_targets_mean": 5602.0, + "valid_targets_min": 1947 + }, + { + "epoch": 3.2006125574272586, + "grad_norm": 0.4486946026376343, + "learning_rate": 2.6386280201018978e-05, + "loss": 0.1776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19235703349113464, + "step": 2090, + "valid_targets_mean": 5800.1, + "valid_targets_min": 834 + }, + { + "epoch": 3.208269525267994, + "grad_norm": 0.43778644558935853, + "learning_rate": 2.6313850457157446e-05, + "loss": 0.2011, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18642935156822205, + "step": 2095, + "valid_targets_mean": 5270.8, + "valid_targets_min": 2460 + }, + { + "epoch": 3.215926493108729, + "grad_norm": 0.4771876713178929, + "learning_rate": 2.6241328622527097e-05, + "loss": 0.1865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20062386989593506, + "step": 2100, + "valid_targets_mean": 5161.2, + "valid_targets_min": 2477 + }, + { + "epoch": 3.223583460949464, + "grad_norm": 0.46215779428957415, + "learning_rate": 2.6168715754896346e-05, + "loss": 0.1972, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19280359148979187, + "step": 2105, + "valid_targets_mean": 5252.8, + "valid_targets_min": 965 + }, + { + "epoch": 3.231240428790199, + "grad_norm": 0.445313686278001, + "learning_rate": 2.6096012913361355e-05, + "loss": 0.1722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17572081089019775, + "step": 2110, + "valid_targets_mean": 5731.9, + "valid_targets_min": 1637 + }, + { + "epoch": 3.238897396630934, + "grad_norm": 0.5062526832195221, + "learning_rate": 2.60232211583306e-05, + "loss": 0.1885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18368211388587952, + "step": 2115, + "valid_targets_mean": 4119.7, + "valid_targets_min": 889 + }, + { + "epoch": 3.2465543644716695, + "grad_norm": 0.5086260485057701, + "learning_rate": 2.5950341551509417e-05, + "loss": 0.185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16073405742645264, + "step": 2120, + "valid_targets_mean": 5029.1, + "valid_targets_min": 801 + }, + { + "epoch": 3.2542113323124044, + "grad_norm": 0.4553370142955746, + "learning_rate": 2.58773751558845e-05, + "loss": 0.1741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1590225100517273, + "step": 2125, + "valid_targets_mean": 5276.3, + "valid_targets_min": 645 + }, + { + "epoch": 3.2618683001531394, + "grad_norm": 0.5007894833828535, + "learning_rate": 2.5804323035708398e-05, + "loss": 0.1764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17388451099395752, + "step": 2130, + "valid_targets_mean": 4547.9, + "valid_targets_min": 294 + }, + { + "epoch": 3.2695252679938744, + "grad_norm": 0.4373602866755202, + "learning_rate": 2.5731186256484e-05, + "loss": 0.2035, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18840092420578003, + "step": 2135, + "valid_targets_mean": 4721.8, + "valid_targets_min": 546 + }, + { + "epoch": 3.2771822358346094, + "grad_norm": 0.6742452779283673, + "learning_rate": 2.5657965884949e-05, + "loss": 0.1971, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2183414101600647, + "step": 2140, + "valid_targets_mean": 4429.9, + "valid_targets_min": 860 + }, + { + "epoch": 3.2848392036753444, + "grad_norm": 0.45898450490614895, + "learning_rate": 2.5584662989060317e-05, + "loss": 0.1904, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17275741696357727, + "step": 2145, + "valid_targets_mean": 5442.9, + "valid_targets_min": 743 + }, + { + "epoch": 3.2924961715160794, + "grad_norm": 0.44882146875151396, + "learning_rate": 2.5511278637978532e-05, + "loss": 0.1879, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15515941381454468, + "step": 2150, + "valid_targets_mean": 5243.6, + "valid_targets_min": 682 + }, + { + "epoch": 3.300153139356815, + "grad_norm": 0.4467734518319107, + "learning_rate": 2.5437813902052292e-05, + "loss": 0.1774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18329621851444244, + "step": 2155, + "valid_targets_mean": 5117.8, + "valid_targets_min": 616 + }, + { + "epoch": 3.30781010719755, + "grad_norm": 0.4152520371866648, + "learning_rate": 2.536426985280271e-05, + "loss": 0.2025, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1850317418575287, + "step": 2160, + "valid_targets_mean": 5134.1, + "valid_targets_min": 662 + }, + { + "epoch": 3.315467075038285, + "grad_norm": 0.44998774771356215, + "learning_rate": 2.5290647562907705e-05, + "loss": 0.1929, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21751946210861206, + "step": 2165, + "valid_targets_mean": 5467.7, + "valid_targets_min": 2622 + }, + { + "epoch": 3.32312404287902, + "grad_norm": 0.4019431753462474, + "learning_rate": 2.5216948106186395e-05, + "loss": 0.1782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1585729718208313, + "step": 2170, + "valid_targets_mean": 5651.1, + "valid_targets_min": 789 + }, + { + "epoch": 3.330781010719755, + "grad_norm": 0.5618290697674159, + "learning_rate": 2.5143172557583412e-05, + "loss": 0.1885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21604791283607483, + "step": 2175, + "valid_targets_mean": 3703.8, + "valid_targets_min": 748 + }, + { + "epoch": 3.3384379785604903, + "grad_norm": 0.43902097786183897, + "learning_rate": 2.506932199315321e-05, + "loss": 0.1921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19183912873268127, + "step": 2180, + "valid_targets_mean": 4844.0, + "valid_targets_min": 744 + }, + { + "epoch": 3.3460949464012253, + "grad_norm": 0.45644637337033184, + "learning_rate": 2.499539749004441e-05, + "loss": 0.2002, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20222628116607666, + "step": 2185, + "valid_targets_mean": 5212.4, + "valid_targets_min": 773 + }, + { + "epoch": 3.3537519142419603, + "grad_norm": 0.4393001352247523, + "learning_rate": 2.4921400126484057e-05, + "loss": 0.1925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20443087816238403, + "step": 2190, + "valid_targets_mean": 5666.1, + "valid_targets_min": 1784 + }, + { + "epoch": 3.3614088820826953, + "grad_norm": 0.5628152912241892, + "learning_rate": 2.4847330981761893e-05, + "loss": 0.1818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18845781683921814, + "step": 2195, + "valid_targets_mean": 3962.0, + "valid_targets_min": 610 + }, + { + "epoch": 3.3690658499234303, + "grad_norm": 0.4499446422169732, + "learning_rate": 2.4773191136214655e-05, + "loss": 0.1951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1923513114452362, + "step": 2200, + "valid_targets_mean": 4977.8, + "valid_targets_min": 830 + }, + { + "epoch": 3.3767228177641653, + "grad_norm": 0.4645104369113949, + "learning_rate": 2.4698981671210253e-05, + "loss": 0.1809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1823941171169281, + "step": 2205, + "valid_targets_mean": 4065.4, + "valid_targets_min": 358 + }, + { + "epoch": 3.3843797856049003, + "grad_norm": 0.46636495279671436, + "learning_rate": 2.462470366913206e-05, + "loss": 0.1882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18425936996936798, + "step": 2210, + "valid_targets_mean": 4618.2, + "valid_targets_min": 601 + }, + { + "epoch": 3.3920367534456357, + "grad_norm": 0.5055126421992738, + "learning_rate": 2.4550358213363083e-05, + "loss": 0.1874, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19574221968650818, + "step": 2215, + "valid_targets_mean": 3900.2, + "valid_targets_min": 591 + }, + { + "epoch": 3.3996937212863707, + "grad_norm": 0.5503070083738346, + "learning_rate": 2.4475946388270172e-05, + "loss": 0.1848, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17538341879844666, + "step": 2220, + "valid_targets_mean": 3802.2, + "valid_targets_min": 255 + }, + { + "epoch": 3.4073506891271057, + "grad_norm": 0.47088674144945025, + "learning_rate": 2.440146927918823e-05, + "loss": 0.1889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2073318362236023, + "step": 2225, + "valid_targets_mean": 4473.6, + "valid_targets_min": 421 + }, + { + "epoch": 3.4150076569678407, + "grad_norm": 0.4740661356667313, + "learning_rate": 2.4326927972404333e-05, + "loss": 0.1995, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20011824369430542, + "step": 2230, + "valid_targets_mean": 4605.9, + "valid_targets_min": 626 + }, + { + "epoch": 3.4226646248085757, + "grad_norm": 0.4165429827415794, + "learning_rate": 2.4252323555141935e-05, + "loss": 0.1817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17027318477630615, + "step": 2235, + "valid_targets_mean": 5750.4, + "valid_targets_min": 2722 + }, + { + "epoch": 3.4303215926493107, + "grad_norm": 0.4794637308355578, + "learning_rate": 2.417765711554498e-05, + "loss": 0.1843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16433902084827423, + "step": 2240, + "valid_targets_mean": 4362.0, + "valid_targets_min": 621 + }, + { + "epoch": 3.437978560490046, + "grad_norm": 0.5136159685801481, + "learning_rate": 2.410292974266203e-05, + "loss": 0.1972, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20960760116577148, + "step": 2245, + "valid_targets_mean": 3976.4, + "valid_targets_min": 758 + }, + { + "epoch": 3.445635528330781, + "grad_norm": 0.5270719518942621, + "learning_rate": 2.402814252643042e-05, + "loss": 0.1958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20371288061141968, + "step": 2250, + "valid_targets_mean": 4996.0, + "valid_targets_min": 1014 + }, + { + "epoch": 3.453292496171516, + "grad_norm": 0.48963006601475756, + "learning_rate": 2.3953296557660288e-05, + "loss": 0.1925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18395353853702545, + "step": 2255, + "valid_targets_mean": 5188.1, + "valid_targets_min": 827 + }, + { + "epoch": 3.460949464012251, + "grad_norm": 0.4145211188615159, + "learning_rate": 2.387839292801875e-05, + "loss": 0.1779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1779845654964447, + "step": 2260, + "valid_targets_mean": 5619.1, + "valid_targets_min": 663 + }, + { + "epoch": 3.468606431852986, + "grad_norm": 0.4701954182659396, + "learning_rate": 2.3803432730013913e-05, + "loss": 0.1833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18874239921569824, + "step": 2265, + "valid_targets_mean": 4970.4, + "valid_targets_min": 854 + }, + { + "epoch": 3.476263399693721, + "grad_norm": 0.5084547938774928, + "learning_rate": 2.372841705697897e-05, + "loss": 0.1884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19924131035804749, + "step": 2270, + "valid_targets_mean": 4273.2, + "valid_targets_min": 525 + }, + { + "epoch": 3.4839203675344566, + "grad_norm": 0.42346772066137994, + "learning_rate": 2.365334700305624e-05, + "loss": 0.1857, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18509188294410706, + "step": 2275, + "valid_targets_mean": 5318.5, + "valid_targets_min": 593 + }, + { + "epoch": 3.4915773353751915, + "grad_norm": 0.43876011826770084, + "learning_rate": 2.3578223663181214e-05, + "loss": 0.1868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19643978774547577, + "step": 2280, + "valid_targets_mean": 4982.8, + "valid_targets_min": 744 + }, + { + "epoch": 3.4992343032159265, + "grad_norm": 0.4259076717957742, + "learning_rate": 2.35030481330666e-05, + "loss": 0.1914, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17723461985588074, + "step": 2285, + "valid_targets_mean": 5322.2, + "valid_targets_min": 1669 + }, + { + "epoch": 3.5068912710566615, + "grad_norm": 0.40790611814620886, + "learning_rate": 2.3427821509186308e-05, + "loss": 0.1914, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16672778129577637, + "step": 2290, + "valid_targets_mean": 5282.6, + "valid_targets_min": 1002 + }, + { + "epoch": 3.5145482388973965, + "grad_norm": 0.43724973404898343, + "learning_rate": 2.3352544888759495e-05, + "loss": 0.1775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17628073692321777, + "step": 2295, + "valid_targets_mean": 5093.2, + "valid_targets_min": 732 + }, + { + "epoch": 3.522205206738132, + "grad_norm": 0.4419945752454893, + "learning_rate": 2.3277219369734537e-05, + "loss": 0.2059, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17800669372081757, + "step": 2300, + "valid_targets_mean": 5829.1, + "valid_targets_min": 464 + }, + { + "epoch": 3.5298621745788665, + "grad_norm": 0.40029502890365776, + "learning_rate": 2.320184605077302e-05, + "loss": 0.1806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1498422920703888, + "step": 2305, + "valid_targets_mean": 6132.4, + "valid_targets_min": 3259 + }, + { + "epoch": 3.537519142419602, + "grad_norm": 0.4317895472992919, + "learning_rate": 2.3126426031233714e-05, + "loss": 0.1799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20129495859146118, + "step": 2310, + "valid_targets_mean": 5208.1, + "valid_targets_min": 901 + }, + { + "epoch": 3.545176110260337, + "grad_norm": 0.40601945027019737, + "learning_rate": 2.3050960411156546e-05, + "loss": 0.1767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18385669589042664, + "step": 2315, + "valid_targets_mean": 6188.5, + "valid_targets_min": 3363 + }, + { + "epoch": 3.552833078101072, + "grad_norm": 0.4955956305407851, + "learning_rate": 2.2975450291246536e-05, + "loss": 0.1815, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16455523669719696, + "step": 2320, + "valid_targets_mean": 5401.9, + "valid_targets_min": 434 + }, + { + "epoch": 3.560490045941807, + "grad_norm": 0.4775531036710853, + "learning_rate": 2.289989677285779e-05, + "loss": 0.1904, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20387563109397888, + "step": 2325, + "valid_targets_mean": 5242.1, + "valid_targets_min": 680 + }, + { + "epoch": 3.568147013782542, + "grad_norm": 0.42783431960706375, + "learning_rate": 2.282430095797737e-05, + "loss": 0.1916, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17975592613220215, + "step": 2330, + "valid_targets_mean": 5704.2, + "valid_targets_min": 792 + }, + { + "epoch": 3.5758039816232774, + "grad_norm": 0.4653094697113535, + "learning_rate": 2.274866394920927e-05, + "loss": 0.184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19249895215034485, + "step": 2335, + "valid_targets_mean": 5539.1, + "valid_targets_min": 916 + }, + { + "epoch": 3.5834609494640124, + "grad_norm": 0.42954428223445007, + "learning_rate": 2.2672986849758316e-05, + "loss": 0.1834, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17228573560714722, + "step": 2340, + "valid_targets_mean": 5570.5, + "valid_targets_min": 2379 + }, + { + "epoch": 3.5911179173047474, + "grad_norm": 0.8433824524079548, + "learning_rate": 2.259727076341407e-05, + "loss": 0.1891, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15344902873039246, + "step": 2345, + "valid_targets_mean": 5395.2, + "valid_targets_min": 1700 + }, + { + "epoch": 3.5987748851454824, + "grad_norm": 0.43204330288459675, + "learning_rate": 2.252151679453475e-05, + "loss": 0.1856, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1977773904800415, + "step": 2350, + "valid_targets_mean": 4850.1, + "valid_targets_min": 794 + }, + { + "epoch": 3.6064318529862174, + "grad_norm": 0.3957799588447967, + "learning_rate": 2.2445726048031104e-05, + "loss": 0.1802, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18701013922691345, + "step": 2355, + "valid_targets_mean": 6515.1, + "valid_targets_min": 1939 + }, + { + "epoch": 3.6140888208269524, + "grad_norm": 0.46060379254586875, + "learning_rate": 2.23698996293503e-05, + "loss": 0.2011, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19811898469924927, + "step": 2360, + "valid_targets_mean": 5086.2, + "valid_targets_min": 1335 + }, + { + "epoch": 3.6217457886676874, + "grad_norm": 0.4761035270932635, + "learning_rate": 2.2294038644459805e-05, + "loss": 0.1966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18033194541931152, + "step": 2365, + "valid_targets_mean": 4584.5, + "valid_targets_min": 607 + }, + { + "epoch": 3.629402756508423, + "grad_norm": 0.4869396442286812, + "learning_rate": 2.221814419983125e-05, + "loss": 0.195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20814746618270874, + "step": 2370, + "valid_targets_mean": 5545.9, + "valid_targets_min": 2727 + }, + { + "epoch": 3.637059724349158, + "grad_norm": 0.46298467756924694, + "learning_rate": 2.2142217402424296e-05, + "loss": 0.1854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.176169753074646, + "step": 2375, + "valid_targets_mean": 4827.2, + "valid_targets_min": 581 + }, + { + "epoch": 3.644716692189893, + "grad_norm": 0.4195617237812523, + "learning_rate": 2.2066259359670485e-05, + "loss": 0.1936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17234407365322113, + "step": 2380, + "valid_targets_mean": 5661.3, + "valid_targets_min": 814 + }, + { + "epoch": 3.652373660030628, + "grad_norm": 0.43491867592878586, + "learning_rate": 2.1990271179457082e-05, + "loss": 0.1858, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17926275730133057, + "step": 2385, + "valid_targets_mean": 5154.2, + "valid_targets_min": 1010 + }, + { + "epoch": 3.660030627871363, + "grad_norm": 0.5808883209817489, + "learning_rate": 2.1914253970110937e-05, + "loss": 0.2006, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22726327180862427, + "step": 2390, + "valid_targets_mean": 3341.1, + "valid_targets_min": 582 + }, + { + "epoch": 3.6676875957120982, + "grad_norm": 0.39616224115059184, + "learning_rate": 2.1838208840382294e-05, + "loss": 0.1836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17501763999462128, + "step": 2395, + "valid_targets_mean": 5985.6, + "valid_targets_min": 1035 + }, + { + "epoch": 3.6753445635528332, + "grad_norm": 0.5537720074927155, + "learning_rate": 2.176213689942863e-05, + "loss": 0.1896, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2179131954908371, + "step": 2400, + "valid_targets_mean": 3625.4, + "valid_targets_min": 364 + }, + { + "epoch": 3.6830015313935682, + "grad_norm": 0.43256442597937816, + "learning_rate": 2.168603925679849e-05, + "loss": 0.1609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14384448528289795, + "step": 2405, + "valid_targets_mean": 4353.9, + "valid_targets_min": 335 + }, + { + "epoch": 3.6906584992343032, + "grad_norm": 0.4852843793132942, + "learning_rate": 2.160991702241527e-05, + "loss": 0.1968, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20300811529159546, + "step": 2410, + "valid_targets_mean": 4493.1, + "valid_targets_min": 591 + }, + { + "epoch": 3.698315467075038, + "grad_norm": 0.4046986527750801, + "learning_rate": 2.1533771306561066e-05, + "loss": 0.1897, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17245183885097504, + "step": 2415, + "valid_targets_mean": 6083.0, + "valid_targets_min": 837 + }, + { + "epoch": 3.705972434915773, + "grad_norm": 0.4681618550416894, + "learning_rate": 2.1457603219860457e-05, + "loss": 0.1894, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20374765992164612, + "step": 2420, + "valid_targets_mean": 4759.5, + "valid_targets_min": 492 + }, + { + "epoch": 3.713629402756508, + "grad_norm": 0.35609101937418786, + "learning_rate": 2.1381413873264315e-05, + "loss": 0.1729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16857865452766418, + "step": 2425, + "valid_targets_mean": 6080.7, + "valid_targets_min": 3141 + }, + { + "epoch": 3.7212863705972437, + "grad_norm": 0.39199321408980886, + "learning_rate": 2.1305204378033598e-05, + "loss": 0.1798, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17050239443778992, + "step": 2430, + "valid_targets_mean": 6238.8, + "valid_targets_min": 2991 + }, + { + "epoch": 3.7289433384379786, + "grad_norm": 0.5289287346109518, + "learning_rate": 2.1228975845723137e-05, + "loss": 0.1837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18746045231819153, + "step": 2435, + "valid_targets_mean": 3991.0, + "valid_targets_min": 251 + }, + { + "epoch": 3.7366003062787136, + "grad_norm": 0.4065373365511912, + "learning_rate": 2.115272938816544e-05, + "loss": 0.1844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14783808588981628, + "step": 2440, + "valid_targets_mean": 4898.0, + "valid_targets_min": 661 + }, + { + "epoch": 3.7442572741194486, + "grad_norm": 0.465243722366574, + "learning_rate": 2.107646611745445e-05, + "loss": 0.1912, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2061719298362732, + "step": 2445, + "valid_targets_mean": 5343.1, + "valid_targets_min": 720 + }, + { + "epoch": 3.7519142419601836, + "grad_norm": 0.5540659684720433, + "learning_rate": 2.1000187145929347e-05, + "loss": 0.1931, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20314675569534302, + "step": 2450, + "valid_targets_mean": 3803.7, + "valid_targets_min": 490 + }, + { + "epoch": 3.759571209800919, + "grad_norm": 0.5243985701272287, + "learning_rate": 2.092389358615832e-05, + "loss": 0.2083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2511424124240875, + "step": 2455, + "valid_targets_mean": 4707.7, + "valid_targets_min": 611 + }, + { + "epoch": 3.7672281776416536, + "grad_norm": 0.42664805575678655, + "learning_rate": 2.0847586550922326e-05, + "loss": 0.1889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2017786204814911, + "step": 2460, + "valid_targets_mean": 5301.8, + "valid_targets_min": 918 + }, + { + "epoch": 3.774885145482389, + "grad_norm": 0.4453346796586349, + "learning_rate": 2.0771267153198873e-05, + "loss": 0.1919, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17807403206825256, + "step": 2465, + "valid_targets_mean": 5417.9, + "valid_targets_min": 928 + }, + { + "epoch": 3.782542113323124, + "grad_norm": 0.4219997971711044, + "learning_rate": 2.069493650614578e-05, + "loss": 0.1863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1867009550333023, + "step": 2470, + "valid_targets_mean": 5047.1, + "valid_targets_min": 616 + }, + { + "epoch": 3.790199081163859, + "grad_norm": 0.3945998573310309, + "learning_rate": 2.0618595723084938e-05, + "loss": 0.1803, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16966620087623596, + "step": 2475, + "valid_targets_mean": 5595.2, + "valid_targets_min": 727 + }, + { + "epoch": 3.797856049004594, + "grad_norm": 0.3896927875127744, + "learning_rate": 2.054224591748609e-05, + "loss": 0.1828, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17560520768165588, + "step": 2480, + "valid_targets_mean": 5799.6, + "valid_targets_min": 1214 + }, + { + "epoch": 3.805513016845329, + "grad_norm": 0.4767292234124444, + "learning_rate": 2.046588820295057e-05, + "loss": 0.1881, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2139269858598709, + "step": 2485, + "valid_targets_mean": 5065.8, + "valid_targets_min": 499 + }, + { + "epoch": 3.8131699846860645, + "grad_norm": 0.48305615805465524, + "learning_rate": 2.038952369319507e-05, + "loss": 0.1901, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1714230179786682, + "step": 2490, + "valid_targets_mean": 5173.9, + "valid_targets_min": 1005 + }, + { + "epoch": 3.8208269525267995, + "grad_norm": 0.4591287515515576, + "learning_rate": 2.031315350203539e-05, + "loss": 0.1907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20354583859443665, + "step": 2495, + "valid_targets_mean": 5899.7, + "valid_targets_min": 1542 + }, + { + "epoch": 3.8284839203675345, + "grad_norm": 0.405791559738873, + "learning_rate": 2.02367787433702e-05, + "loss": 0.172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1725158542394638, + "step": 2500, + "valid_targets_mean": 5390.0, + "valid_targets_min": 639 + }, + { + "epoch": 3.8361408882082695, + "grad_norm": 0.4497822748514485, + "learning_rate": 2.0160400531164787e-05, + "loss": 0.1729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15819357335567474, + "step": 2505, + "valid_targets_mean": 5183.4, + "valid_targets_min": 813 + }, + { + "epoch": 3.8437978560490045, + "grad_norm": 0.38677614976809455, + "learning_rate": 2.008401997943481e-05, + "loss": 0.1955, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19747616350650787, + "step": 2510, + "valid_targets_mean": 6532.3, + "valid_targets_min": 875 + }, + { + "epoch": 3.85145482388974, + "grad_norm": 0.4278038294940775, + "learning_rate": 2.0007638202230053e-05, + "loss": 0.1814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1896567940711975, + "step": 2515, + "valid_targets_mean": 5536.9, + "valid_targets_min": 838 + }, + { + "epoch": 3.8591117917304745, + "grad_norm": 0.5403737252126551, + "learning_rate": 1.9931256313618173e-05, + "loss": 0.185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19202588498592377, + "step": 2520, + "valid_targets_mean": 4081.1, + "valid_targets_min": 352 + }, + { + "epoch": 3.86676875957121, + "grad_norm": 0.44283695481669993, + "learning_rate": 1.9854875427668453e-05, + "loss": 0.183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1849876344203949, + "step": 2525, + "valid_targets_mean": 5332.9, + "valid_targets_min": 1213 + }, + { + "epoch": 3.874425727411945, + "grad_norm": 0.45612268357938296, + "learning_rate": 1.9778496658435552e-05, + "loss": 0.1864, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2018076777458191, + "step": 2530, + "valid_targets_mean": 4947.2, + "valid_targets_min": 713 + }, + { + "epoch": 3.88208269525268, + "grad_norm": 0.4511802661455763, + "learning_rate": 1.970212111994325e-05, + "loss": 0.1776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1800006926059723, + "step": 2535, + "valid_targets_mean": 5933.2, + "valid_targets_min": 1511 + }, + { + "epoch": 3.889739663093415, + "grad_norm": 0.4550636487612721, + "learning_rate": 1.9625749926168205e-05, + "loss": 0.2011, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19659721851348877, + "step": 2540, + "valid_targets_mean": 5744.1, + "valid_targets_min": 2064 + }, + { + "epoch": 3.89739663093415, + "grad_norm": 0.45143924481673986, + "learning_rate": 1.954938419102372e-05, + "loss": 0.1906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17691466212272644, + "step": 2545, + "valid_targets_mean": 4654.2, + "valid_targets_min": 892 + }, + { + "epoch": 3.9050535987748853, + "grad_norm": 0.4108146672941242, + "learning_rate": 1.9473025028343464e-05, + "loss": 0.1859, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1811871975660324, + "step": 2550, + "valid_targets_mean": 5904.1, + "valid_targets_min": 2536 + }, + { + "epoch": 3.9127105666156203, + "grad_norm": 0.39297017745981955, + "learning_rate": 1.9396673551865245e-05, + "loss": 0.1808, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16155928373336792, + "step": 2555, + "valid_targets_mean": 5933.3, + "valid_targets_min": 1125 + }, + { + "epoch": 3.9203675344563553, + "grad_norm": 0.4647337901010682, + "learning_rate": 1.932033087521478e-05, + "loss": 0.1824, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20087887346744537, + "step": 2560, + "valid_targets_mean": 5496.6, + "valid_targets_min": 672 + }, + { + "epoch": 3.9280245022970903, + "grad_norm": 0.51196014419268, + "learning_rate": 1.9243998111889422e-05, + "loss": 0.1855, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18994739651679993, + "step": 2565, + "valid_targets_mean": 4321.1, + "valid_targets_min": 768 + }, + { + "epoch": 3.9356814701378253, + "grad_norm": 0.486482225957426, + "learning_rate": 1.916767637524193e-05, + "loss": 0.1867, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20536579191684723, + "step": 2570, + "valid_targets_mean": 4581.4, + "valid_targets_min": 574 + }, + { + "epoch": 3.9433384379785608, + "grad_norm": 0.5397289328340875, + "learning_rate": 1.9091366778464236e-05, + "loss": 0.1935, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1987244337797165, + "step": 2575, + "valid_targets_mean": 4776.2, + "valid_targets_min": 822 + }, + { + "epoch": 3.9509954058192953, + "grad_norm": 0.39898386260649726, + "learning_rate": 1.9015070434571214e-05, + "loss": 0.18, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15328550338745117, + "step": 2580, + "valid_targets_mean": 5600.8, + "valid_targets_min": 247 + }, + { + "epoch": 3.9586523736600308, + "grad_norm": 0.4317711975925466, + "learning_rate": 1.8938788456384435e-05, + "loss": 0.1925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19141104817390442, + "step": 2585, + "valid_targets_mean": 5771.4, + "valid_targets_min": 266 + }, + { + "epoch": 3.9663093415007658, + "grad_norm": 0.471345314428317, + "learning_rate": 1.886252195651593e-05, + "loss": 0.1945, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18774153292179108, + "step": 2590, + "valid_targets_mean": 5299.0, + "valid_targets_min": 654 + }, + { + "epoch": 3.9739663093415007, + "grad_norm": 0.48013416700446687, + "learning_rate": 1.8786272047351974e-05, + "loss": 0.1983, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20652708411216736, + "step": 2595, + "valid_targets_mean": 5300.6, + "valid_targets_min": 1524 + }, + { + "epoch": 3.9816232771822357, + "grad_norm": 0.4624682372289305, + "learning_rate": 1.8710039841036868e-05, + "loss": 0.1848, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17302487790584564, + "step": 2600, + "valid_targets_mean": 4120.8, + "valid_targets_min": 390 + }, + { + "epoch": 3.9892802450229707, + "grad_norm": 0.4168745532084026, + "learning_rate": 1.8633826449456694e-05, + "loss": 0.192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17362730205059052, + "step": 2605, + "valid_targets_mean": 5236.6, + "valid_targets_min": 625 + }, + { + "epoch": 3.996937212863706, + "grad_norm": 0.4932527691180615, + "learning_rate": 1.8557632984223124e-05, + "loss": 0.1946, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1852743923664093, + "step": 2610, + "valid_targets_mean": 5870.2, + "valid_targets_min": 3261 + }, + { + "epoch": 4.004594180704441, + "grad_norm": 0.4117858119372678, + "learning_rate": 1.848146055665718e-05, + "loss": 0.1845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16399316489696503, + "step": 2615, + "valid_targets_mean": 5134.5, + "valid_targets_min": 742 + }, + { + "epoch": 4.012251148545176, + "grad_norm": 0.4808770384706559, + "learning_rate": 1.840531027777306e-05, + "loss": 0.1696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1545652449131012, + "step": 2620, + "valid_targets_mean": 5073.8, + "valid_targets_min": 291 + }, + { + "epoch": 4.019908116385912, + "grad_norm": 0.4716619083630413, + "learning_rate": 1.832918325826188e-05, + "loss": 0.1668, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1703418791294098, + "step": 2625, + "valid_targets_mean": 4764.6, + "valid_targets_min": 274 + }, + { + "epoch": 4.027565084226646, + "grad_norm": 0.4838374411041368, + "learning_rate": 1.825308060847554e-05, + "loss": 0.1755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17043305933475494, + "step": 2630, + "valid_targets_mean": 4994.8, + "valid_targets_min": 626 + }, + { + "epoch": 4.035222052067382, + "grad_norm": 0.4840997051081212, + "learning_rate": 1.8177003438410468e-05, + "loss": 0.1712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16094501316547394, + "step": 2635, + "valid_targets_mean": 5405.5, + "valid_targets_min": 429 + }, + { + "epoch": 4.042879019908116, + "grad_norm": 0.4485354827030893, + "learning_rate": 1.8100952857691478e-05, + "loss": 0.1778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16980960965156555, + "step": 2640, + "valid_targets_mean": 5238.2, + "valid_targets_min": 2252 + }, + { + "epoch": 4.050535987748852, + "grad_norm": 0.5691308127289896, + "learning_rate": 1.802492997555554e-05, + "loss": 0.1872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18940171599388123, + "step": 2645, + "valid_targets_mean": 3804.6, + "valid_targets_min": 529 + }, + { + "epoch": 4.058192955589586, + "grad_norm": 0.5431514385919676, + "learning_rate": 1.7948935900835666e-05, + "loss": 0.1798, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17891111969947815, + "step": 2650, + "valid_targets_mean": 4574.4, + "valid_targets_min": 727 + }, + { + "epoch": 4.065849923430322, + "grad_norm": 0.5041080458794216, + "learning_rate": 1.7872971741944657e-05, + "loss": 0.1807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20006835460662842, + "step": 2655, + "valid_targets_mean": 4423.0, + "valid_targets_min": 587 + }, + { + "epoch": 4.073506891271057, + "grad_norm": 0.48183764813968805, + "learning_rate": 1.779703860685899e-05, + "loss": 0.1838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16695404052734375, + "step": 2660, + "valid_targets_mean": 4707.2, + "valid_targets_min": 593 + }, + { + "epoch": 4.081163859111792, + "grad_norm": 0.44635472401034965, + "learning_rate": 1.772113760310265e-05, + "loss": 0.1759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16373205184936523, + "step": 2665, + "valid_targets_mean": 5316.4, + "valid_targets_min": 2045 + }, + { + "epoch": 4.088820826952527, + "grad_norm": 0.5341676996658703, + "learning_rate": 1.7645269837730964e-05, + "loss": 0.1661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18219105899333954, + "step": 2670, + "valid_targets_mean": 4616.1, + "valid_targets_min": 801 + }, + { + "epoch": 4.096477794793262, + "grad_norm": 0.5082489734934296, + "learning_rate": 1.7569436417314454e-05, + "loss": 0.1654, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15915320813655853, + "step": 2675, + "valid_targets_mean": 5307.9, + "valid_targets_min": 1855 + }, + { + "epoch": 4.104134762633997, + "grad_norm": 0.48315077118735306, + "learning_rate": 1.7493638447922724e-05, + "loss": 0.1763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17643246054649353, + "step": 2680, + "valid_targets_mean": 4735.8, + "valid_targets_min": 603 + }, + { + "epoch": 4.111791730474732, + "grad_norm": 0.4158258538790409, + "learning_rate": 1.741787703510828e-05, + "loss": 0.1648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14290867745876312, + "step": 2685, + "valid_targets_mean": 5752.9, + "valid_targets_min": 801 + }, + { + "epoch": 4.119448698315467, + "grad_norm": 0.4464039644440533, + "learning_rate": 1.7342153283890454e-05, + "loss": 0.1889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16524581611156464, + "step": 2690, + "valid_targets_mean": 4866.8, + "valid_targets_min": 273 + }, + { + "epoch": 4.1271056661562024, + "grad_norm": 0.4276684308904948, + "learning_rate": 1.7266468298739248e-05, + "loss": 0.1706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17335769534111023, + "step": 2695, + "valid_targets_mean": 5832.6, + "valid_targets_min": 972 + }, + { + "epoch": 4.134762633996937, + "grad_norm": 0.4728772362847781, + "learning_rate": 1.719082318355924e-05, + "loss": 0.1559, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1612071692943573, + "step": 2700, + "valid_targets_mean": 5342.1, + "valid_targets_min": 2692 + }, + { + "epoch": 4.142419601837672, + "grad_norm": 0.4765083268369304, + "learning_rate": 1.7115219041673513e-05, + "loss": 0.1869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16713187098503113, + "step": 2705, + "valid_targets_mean": 4761.3, + "valid_targets_min": 669 + }, + { + "epoch": 4.150076569678407, + "grad_norm": 0.4842020502823108, + "learning_rate": 1.703965697580749e-05, + "loss": 0.1722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1749369502067566, + "step": 2710, + "valid_targets_mean": 4355.8, + "valid_targets_min": 638 + }, + { + "epoch": 4.157733537519142, + "grad_norm": 0.49332601951947624, + "learning_rate": 1.6964138088072927e-05, + "loss": 0.1844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20040933787822723, + "step": 2715, + "valid_targets_mean": 4893.5, + "valid_targets_min": 767 + }, + { + "epoch": 4.165390505359878, + "grad_norm": 0.4114065993741536, + "learning_rate": 1.6888663479951787e-05, + "loss": 0.1836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15738333761692047, + "step": 2720, + "valid_targets_mean": 5901.4, + "valid_targets_min": 615 + }, + { + "epoch": 4.173047473200612, + "grad_norm": 0.49020449855560566, + "learning_rate": 1.6813234252280198e-05, + "loss": 0.1675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17966817319393158, + "step": 2725, + "valid_targets_mean": 4505.4, + "valid_targets_min": 393 + }, + { + "epoch": 4.180704441041348, + "grad_norm": 0.467609857800558, + "learning_rate": 1.673785150523239e-05, + "loss": 0.1642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1697966456413269, + "step": 2730, + "valid_targets_mean": 4822.9, + "valid_targets_min": 624 + }, + { + "epoch": 4.188361408882082, + "grad_norm": 0.405164834964215, + "learning_rate": 1.6662516338304653e-05, + "loss": 0.1745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15354806184768677, + "step": 2735, + "valid_targets_mean": 5354.1, + "valid_targets_min": 649 + }, + { + "epoch": 4.196018376722818, + "grad_norm": 0.4338536588919433, + "learning_rate": 1.658722985029928e-05, + "loss": 0.1864, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17717352509498596, + "step": 2740, + "valid_targets_mean": 5779.4, + "valid_targets_min": 2130 + }, + { + "epoch": 4.203675344563552, + "grad_norm": 0.5850087613394777, + "learning_rate": 1.6511993139308593e-05, + "loss": 0.1675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15216293931007385, + "step": 2745, + "valid_targets_mean": 4756.1, + "valid_targets_min": 737 + }, + { + "epoch": 4.211332312404288, + "grad_norm": 0.4176714174791368, + "learning_rate": 1.6436807302698853e-05, + "loss": 0.1523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15484420955181122, + "step": 2750, + "valid_targets_mean": 5400.6, + "valid_targets_min": 692 + }, + { + "epoch": 4.218989280245023, + "grad_norm": 0.5045586445327894, + "learning_rate": 1.6361673437094306e-05, + "loss": 0.1671, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18908411264419556, + "step": 2755, + "valid_targets_mean": 4536.8, + "valid_targets_min": 559 + }, + { + "epoch": 4.226646248085758, + "grad_norm": 0.47376908314362176, + "learning_rate": 1.6286592638361176e-05, + "loss": 0.1526, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16104529798030853, + "step": 2760, + "valid_targets_mean": 5460.4, + "valid_targets_min": 912 + }, + { + "epoch": 4.234303215926493, + "grad_norm": 0.4804874523270708, + "learning_rate": 1.6211566001591673e-05, + "loss": 0.1719, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16267931461334229, + "step": 2765, + "valid_targets_mean": 4827.1, + "valid_targets_min": 720 + }, + { + "epoch": 4.241960183767228, + "grad_norm": 0.5114521946324453, + "learning_rate": 1.6136594621088038e-05, + "loss": 0.1882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19819220900535583, + "step": 2770, + "valid_targets_mean": 4684.1, + "valid_targets_min": 567 + }, + { + "epoch": 4.249617151607963, + "grad_norm": 0.5128617395395278, + "learning_rate": 1.606167959034656e-05, + "loss": 0.156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1607045829296112, + "step": 2775, + "valid_targets_mean": 4665.6, + "valid_targets_min": 625 + }, + { + "epoch": 4.257274119448699, + "grad_norm": 0.5017550199119629, + "learning_rate": 1.5986822002041645e-05, + "loss": 0.1663, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.167169451713562, + "step": 2780, + "valid_targets_mean": 4271.9, + "valid_targets_min": 792 + }, + { + "epoch": 4.264931087289433, + "grad_norm": 0.44395083608432084, + "learning_rate": 1.5912022948009862e-05, + "loss": 0.1695, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15831434726715088, + "step": 2785, + "valid_targets_mean": 5820.2, + "valid_targets_min": 2996 + }, + { + "epoch": 4.272588055130169, + "grad_norm": 0.4722239671745089, + "learning_rate": 1.5837283519234038e-05, + "loss": 0.172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1687237173318863, + "step": 2790, + "valid_targets_mean": 5822.8, + "valid_targets_min": 842 + }, + { + "epoch": 4.280245022970903, + "grad_norm": 0.45638928620066843, + "learning_rate": 1.5762604805827323e-05, + "loss": 0.1746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1702617108821869, + "step": 2795, + "valid_targets_mean": 5522.4, + "valid_targets_min": 625 + }, + { + "epoch": 4.287901990811639, + "grad_norm": 0.4756632270967452, + "learning_rate": 1.5687987897017324e-05, + "loss": 0.1732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21105210483074188, + "step": 2800, + "valid_targets_mean": 5453.9, + "valid_targets_min": 553 + }, + { + "epoch": 4.295558958652373, + "grad_norm": 0.4018470816825365, + "learning_rate": 1.561343388113017e-05, + "loss": 0.1633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1360223889350891, + "step": 2805, + "valid_targets_mean": 6373.5, + "valid_targets_min": 2733 + }, + { + "epoch": 4.303215926493109, + "grad_norm": 0.4931211767025457, + "learning_rate": 1.5538943845574674e-05, + "loss": 0.1698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15920180082321167, + "step": 2810, + "valid_targets_mean": 6204.0, + "valid_targets_min": 2948 + }, + { + "epoch": 4.310872894333844, + "grad_norm": 0.5467551374335786, + "learning_rate": 1.5464518876826474e-05, + "loss": 0.1841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2006562054157257, + "step": 2815, + "valid_targets_mean": 4931.8, + "valid_targets_min": 2745 + }, + { + "epoch": 4.318529862174579, + "grad_norm": 0.42757772305840064, + "learning_rate": 1.5390160060412153e-05, + "loss": 0.1633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15964971482753754, + "step": 2820, + "valid_targets_mean": 6229.6, + "valid_targets_min": 667 + }, + { + "epoch": 4.326186830015314, + "grad_norm": 0.4781249802666833, + "learning_rate": 1.531586848089345e-05, + "loss": 0.1753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1797013282775879, + "step": 2825, + "valid_targets_mean": 5788.6, + "valid_targets_min": 2048 + }, + { + "epoch": 4.333843797856049, + "grad_norm": 0.4881439374190577, + "learning_rate": 1.5241645221851405e-05, + "loss": 0.1806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18294446170330048, + "step": 2830, + "valid_targets_mean": 5210.4, + "valid_targets_min": 672 + }, + { + "epoch": 4.341500765696784, + "grad_norm": 0.40894754528830446, + "learning_rate": 1.5167491365870573e-05, + "loss": 0.1632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14472708106040955, + "step": 2835, + "valid_targets_mean": 5870.3, + "valid_targets_min": 3287 + }, + { + "epoch": 4.3491577335375196, + "grad_norm": 0.49768141849572484, + "learning_rate": 1.5093407994523234e-05, + "loss": 0.1647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.178148090839386, + "step": 2840, + "valid_targets_mean": 5438.2, + "valid_targets_min": 1641 + }, + { + "epoch": 4.356814701378254, + "grad_norm": 0.428585898075749, + "learning_rate": 1.501939618835361e-05, + "loss": 0.1833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15074047446250916, + "step": 2845, + "valid_targets_mean": 4899.4, + "valid_targets_min": 557 + }, + { + "epoch": 4.3644716692189895, + "grad_norm": 0.5203366302958021, + "learning_rate": 1.4945457026862102e-05, + "loss": 0.1756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18068398535251617, + "step": 2850, + "valid_targets_mean": 5078.4, + "valid_targets_min": 591 + }, + { + "epoch": 4.372128637059724, + "grad_norm": 0.42841342187225284, + "learning_rate": 1.4871591588489558e-05, + "loss": 0.1764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15299955010414124, + "step": 2855, + "valid_targets_mean": 5253.9, + "valid_targets_min": 393 + }, + { + "epoch": 4.3797856049004595, + "grad_norm": 0.4510836298560107, + "learning_rate": 1.4797800950601527e-05, + "loss": 0.1753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.195342555642128, + "step": 2860, + "valid_targets_mean": 6093.7, + "valid_targets_min": 611 + }, + { + "epoch": 4.387442572741194, + "grad_norm": 0.5035923423896532, + "learning_rate": 1.4724086189472573e-05, + "loss": 0.1698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18418394029140472, + "step": 2865, + "valid_targets_mean": 5024.9, + "valid_targets_min": 824 + }, + { + "epoch": 4.3950995405819295, + "grad_norm": 0.511919119688101, + "learning_rate": 1.4650448380270542e-05, + "loss": 0.182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18659475445747375, + "step": 2870, + "valid_targets_mean": 4247.2, + "valid_targets_min": 654 + }, + { + "epoch": 4.402756508422665, + "grad_norm": 0.49765259928346983, + "learning_rate": 1.4576888597040897e-05, + "loss": 0.1823, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1821971833705902, + "step": 2875, + "valid_targets_mean": 4381.1, + "valid_targets_min": 379 + }, + { + "epoch": 4.4104134762633995, + "grad_norm": 0.4844340089149697, + "learning_rate": 1.450340791269106e-05, + "loss": 0.1648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16185416281223297, + "step": 2880, + "valid_targets_mean": 4287.1, + "valid_targets_min": 258 + }, + { + "epoch": 4.418070444104135, + "grad_norm": 0.4422597910254744, + "learning_rate": 1.4430007398974751e-05, + "loss": 0.1772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15629816055297852, + "step": 2885, + "valid_targets_mean": 5202.2, + "valid_targets_min": 571 + }, + { + "epoch": 4.4257274119448695, + "grad_norm": 0.448577660568666, + "learning_rate": 1.4356688126476352e-05, + "loss": 0.1667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15885485708713531, + "step": 2890, + "valid_targets_mean": 4905.8, + "valid_targets_min": 255 + }, + { + "epoch": 4.433384379785605, + "grad_norm": 0.5127193213614976, + "learning_rate": 1.428345116459532e-05, + "loss": 0.1787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1999489665031433, + "step": 2895, + "valid_targets_mean": 5543.8, + "valid_targets_min": 942 + }, + { + "epoch": 4.44104134762634, + "grad_norm": 0.3696874292839933, + "learning_rate": 1.421029758153055e-05, + "loss": 0.1613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13624325394630432, + "step": 2900, + "valid_targets_mean": 6215.7, + "valid_targets_min": 2870 + }, + { + "epoch": 4.448698315467075, + "grad_norm": 0.47165957515150697, + "learning_rate": 1.413722844426482e-05, + "loss": 0.1755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17101368308067322, + "step": 2905, + "valid_targets_mean": 5829.2, + "valid_targets_min": 3825 + }, + { + "epoch": 4.45635528330781, + "grad_norm": 0.529309619643836, + "learning_rate": 1.4064244818549227e-05, + "loss": 0.1673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20200702548027039, + "step": 2910, + "valid_targets_mean": 5083.9, + "valid_targets_min": 1085 + }, + { + "epoch": 4.464012251148545, + "grad_norm": 0.4583986271288342, + "learning_rate": 1.3991347768887629e-05, + "loss": 0.1673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15790319442749023, + "step": 2915, + "valid_targets_mean": 5490.7, + "valid_targets_min": 2776 + }, + { + "epoch": 4.47166921898928, + "grad_norm": 0.4672454081699034, + "learning_rate": 1.3918538358521136e-05, + "loss": 0.1696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1702156662940979, + "step": 2920, + "valid_targets_mean": 6327.2, + "valid_targets_min": 3255 + }, + { + "epoch": 4.479326186830015, + "grad_norm": 0.5413334749004304, + "learning_rate": 1.384581764941259e-05, + "loss": 0.1813, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18624061346054077, + "step": 2925, + "valid_targets_mean": 3659.4, + "valid_targets_min": 276 + }, + { + "epoch": 4.48698315467075, + "grad_norm": 0.4203208806439039, + "learning_rate": 1.3773186702231076e-05, + "loss": 0.1713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18235370516777039, + "step": 2930, + "valid_targets_mean": 6541.8, + "valid_targets_min": 2382 + }, + { + "epoch": 4.494640122511486, + "grad_norm": 0.49042923690230983, + "learning_rate": 1.3700646576336469e-05, + "loss": 0.1598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16061678528785706, + "step": 2935, + "valid_targets_mean": 4792.9, + "valid_targets_min": 733 + }, + { + "epoch": 4.50229709035222, + "grad_norm": 0.4834191846877714, + "learning_rate": 1.362819832976395e-05, + "loss": 0.1752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.187465101480484, + "step": 2940, + "valid_targets_mean": 4979.9, + "valid_targets_min": 553 + }, + { + "epoch": 4.509954058192956, + "grad_norm": 0.5358280569932836, + "learning_rate": 1.3555843019208604e-05, + "loss": 0.1775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16426986455917358, + "step": 2945, + "valid_targets_mean": 5311.7, + "valid_targets_min": 745 + }, + { + "epoch": 4.51761102603369, + "grad_norm": 0.5228285623940019, + "learning_rate": 1.3483581700009988e-05, + "loss": 0.1704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2028902769088745, + "step": 2950, + "valid_targets_mean": 4474.3, + "valid_targets_min": 600 + }, + { + "epoch": 4.525267993874426, + "grad_norm": 0.4362550606192867, + "learning_rate": 1.3411415426136754e-05, + "loss": 0.1662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18145951628684998, + "step": 2955, + "valid_targets_mean": 5741.4, + "valid_targets_min": 3720 + }, + { + "epoch": 4.53292496171516, + "grad_norm": 0.5334592365964695, + "learning_rate": 1.333934525017127e-05, + "loss": 0.1779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19370566308498383, + "step": 2960, + "valid_targets_mean": 4627.0, + "valid_targets_min": 687 + }, + { + "epoch": 4.540581929555896, + "grad_norm": 0.5166440369660882, + "learning_rate": 1.3267372223294258e-05, + "loss": 0.18, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1994536966085434, + "step": 2965, + "valid_targets_mean": 4718.9, + "valid_targets_min": 600 + }, + { + "epoch": 4.548238897396631, + "grad_norm": 0.5598640848233768, + "learning_rate": 1.319549739526948e-05, + "loss": 0.1672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22695991396903992, + "step": 2970, + "valid_targets_mean": 4743.5, + "valid_targets_min": 439 + }, + { + "epoch": 4.555895865237366, + "grad_norm": 0.4771347776369329, + "learning_rate": 1.3123721814428408e-05, + "loss": 0.1759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18281838297843933, + "step": 2975, + "valid_targets_mean": 4889.7, + "valid_targets_min": 918 + }, + { + "epoch": 4.563552833078101, + "grad_norm": 0.42341232828878017, + "learning_rate": 1.3052046527654948e-05, + "loss": 0.1783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16951890289783478, + "step": 2980, + "valid_targets_mean": 5872.5, + "valid_targets_min": 357 + }, + { + "epoch": 4.571209800918836, + "grad_norm": 0.4384091653374271, + "learning_rate": 1.2980472580370162e-05, + "loss": 0.1669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16993127763271332, + "step": 2985, + "valid_targets_mean": 5593.4, + "valid_targets_min": 990 + }, + { + "epoch": 4.578866768759571, + "grad_norm": 0.4736958546897791, + "learning_rate": 1.2909001016517031e-05, + "loss": 0.1655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15475991368293762, + "step": 2990, + "valid_targets_mean": 5100.8, + "valid_targets_min": 812 + }, + { + "epoch": 4.586523736600307, + "grad_norm": 0.4500490682468214, + "learning_rate": 1.2837632878545212e-05, + "loss": 0.1607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17424660921096802, + "step": 2995, + "valid_targets_mean": 5034.8, + "valid_targets_min": 2439 + }, + { + "epoch": 4.594180704441041, + "grad_norm": 0.4982990021173331, + "learning_rate": 1.2766369207395845e-05, + "loss": 0.1796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15128692984580994, + "step": 3000, + "valid_targets_mean": 4580.6, + "valid_targets_min": 655 + }, + { + "epoch": 4.601837672281777, + "grad_norm": 0.5411255865457929, + "learning_rate": 1.269521104248637e-05, + "loss": 0.1759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19686409831047058, + "step": 3005, + "valid_targets_mean": 3894.5, + "valid_targets_min": 412 + }, + { + "epoch": 4.609494640122511, + "grad_norm": 0.46544029566434875, + "learning_rate": 1.2624159421695354e-05, + "loss": 0.1704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17594610154628754, + "step": 3010, + "valid_targets_mean": 5448.4, + "valid_targets_min": 2259 + }, + { + "epoch": 4.617151607963247, + "grad_norm": 0.5041573873911557, + "learning_rate": 1.2553215381347377e-05, + "loss": 0.1788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17713363468647003, + "step": 3015, + "valid_targets_mean": 3941.9, + "valid_targets_min": 294 + }, + { + "epoch": 4.624808575803982, + "grad_norm": 0.4460422876457042, + "learning_rate": 1.2482379956197898e-05, + "loss": 0.1617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1611134260892868, + "step": 3020, + "valid_targets_mean": 5502.9, + "valid_targets_min": 649 + }, + { + "epoch": 4.632465543644717, + "grad_norm": 0.5123541360019072, + "learning_rate": 1.2411654179418162e-05, + "loss": 0.1817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15525534749031067, + "step": 3025, + "valid_targets_mean": 4013.7, + "valid_targets_min": 666 + }, + { + "epoch": 4.640122511485452, + "grad_norm": 0.5056279153295342, + "learning_rate": 1.2341039082580143e-05, + "loss": 0.1839, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20131169259548187, + "step": 3030, + "valid_targets_mean": 4715.8, + "valid_targets_min": 453 + }, + { + "epoch": 4.647779479326187, + "grad_norm": 0.4963953600906092, + "learning_rate": 1.2270535695641488e-05, + "loss": 0.1702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1591898500919342, + "step": 3035, + "valid_targets_mean": 4588.5, + "valid_targets_min": 875 + }, + { + "epoch": 4.655436447166922, + "grad_norm": 0.4787452476331541, + "learning_rate": 1.2200145046930494e-05, + "loss": 0.1863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18031710386276245, + "step": 3040, + "valid_targets_mean": 4756.6, + "valid_targets_min": 619 + }, + { + "epoch": 4.663093415007657, + "grad_norm": 0.4353032563440157, + "learning_rate": 1.2129868163131115e-05, + "loss": 0.1612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15741820633411407, + "step": 3045, + "valid_targets_mean": 5642.2, + "valid_targets_min": 781 + }, + { + "epoch": 4.670750382848392, + "grad_norm": 0.4956709263367504, + "learning_rate": 1.2059706069267985e-05, + "loss": 0.1987, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20289403200149536, + "step": 3050, + "valid_targets_mean": 4478.3, + "valid_targets_min": 654 + }, + { + "epoch": 4.6784073506891275, + "grad_norm": 0.4374701574959408, + "learning_rate": 1.1989659788691472e-05, + "loss": 0.1723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15316550433635712, + "step": 3055, + "valid_targets_mean": 5378.1, + "valid_targets_min": 326 + }, + { + "epoch": 4.686064318529862, + "grad_norm": 0.4312228148138217, + "learning_rate": 1.1919730343062742e-05, + "loss": 0.182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1685815006494522, + "step": 3060, + "valid_targets_mean": 5913.8, + "valid_targets_min": 2858 + }, + { + "epoch": 4.6937212863705975, + "grad_norm": 0.6788655954584126, + "learning_rate": 1.1849918752338864e-05, + "loss": 0.1591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1469895839691162, + "step": 3065, + "valid_targets_mean": 5040.0, + "valid_targets_min": 876 + }, + { + "epoch": 4.701378254211332, + "grad_norm": 0.5027169074416509, + "learning_rate": 1.1780226034757938e-05, + "loss": 0.1785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15269100666046143, + "step": 3070, + "valid_targets_mean": 3950.4, + "valid_targets_min": 714 + }, + { + "epoch": 4.7090352220520675, + "grad_norm": 0.45849599624176834, + "learning_rate": 1.1710653206824225e-05, + "loss": 0.1806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1836073398590088, + "step": 3075, + "valid_targets_mean": 5161.0, + "valid_targets_min": 617 + }, + { + "epoch": 4.716692189892802, + "grad_norm": 0.463224080205975, + "learning_rate": 1.164120128329334e-05, + "loss": 0.1693, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1551927775144577, + "step": 3080, + "valid_targets_mean": 4956.4, + "valid_targets_min": 421 + }, + { + "epoch": 4.7243491577335375, + "grad_norm": 0.6749074626071137, + "learning_rate": 1.1571871277157458e-05, + "loss": 0.1748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1819312572479248, + "step": 3085, + "valid_targets_mean": 3737.6, + "valid_targets_min": 721 + }, + { + "epoch": 4.732006125574273, + "grad_norm": 0.4507533807504539, + "learning_rate": 1.15026641996305e-05, + "loss": 0.161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1480093002319336, + "step": 3090, + "valid_targets_mean": 5161.5, + "valid_targets_min": 270 + }, + { + "epoch": 4.7396630934150075, + "grad_norm": 0.5183685281293393, + "learning_rate": 1.1433581060133432e-05, + "loss": 0.178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1799740195274353, + "step": 3095, + "valid_targets_mean": 5317.7, + "valid_targets_min": 801 + }, + { + "epoch": 4.747320061255743, + "grad_norm": 0.5028716072176976, + "learning_rate": 1.136462286627952e-05, + "loss": 0.1671, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17748679220676422, + "step": 3100, + "valid_targets_mean": 4244.8, + "valid_targets_min": 726 + }, + { + "epoch": 4.7549770290964775, + "grad_norm": 0.7306366733577314, + "learning_rate": 1.1295790623859605e-05, + "loss": 0.175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16387039422988892, + "step": 3105, + "valid_targets_mean": 6011.9, + "valid_targets_min": 1828 + }, + { + "epoch": 4.762633996937213, + "grad_norm": 0.4663498360213423, + "learning_rate": 1.1227085336827492e-05, + "loss": 0.1662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18103697896003723, + "step": 3110, + "valid_targets_mean": 5026.4, + "valid_targets_min": 410 + }, + { + "epoch": 4.7702909647779475, + "grad_norm": 0.483563480045064, + "learning_rate": 1.1158508007285266e-05, + "loss": 0.17, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16695986688137054, + "step": 3115, + "valid_targets_mean": 5319.2, + "valid_targets_min": 707 + }, + { + "epoch": 4.777947932618683, + "grad_norm": 0.4827606579725246, + "learning_rate": 1.1090059635468693e-05, + "loss": 0.1762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.167944997549057, + "step": 3120, + "valid_targets_mean": 4909.1, + "valid_targets_min": 667 + }, + { + "epoch": 4.785604900459418, + "grad_norm": 0.43356790112848625, + "learning_rate": 1.1021741219732602e-05, + "loss": 0.1575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1488049477338791, + "step": 3125, + "valid_targets_mean": 4725.1, + "valid_targets_min": 1542 + }, + { + "epoch": 4.793261868300153, + "grad_norm": 0.4798237312494164, + "learning_rate": 1.0953553756536363e-05, + "loss": 0.1785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.202002614736557, + "step": 3130, + "valid_targets_mean": 5125.2, + "valid_targets_min": 588 + }, + { + "epoch": 4.800918836140888, + "grad_norm": 0.4223262568134376, + "learning_rate": 1.0885498240429344e-05, + "loss": 0.1835, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16876980662345886, + "step": 3135, + "valid_targets_mean": 5708.7, + "valid_targets_min": 1016 + }, + { + "epoch": 4.808575803981624, + "grad_norm": 0.4239980387727623, + "learning_rate": 1.0817575664036371e-05, + "loss": 0.1688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1675986349582672, + "step": 3140, + "valid_targets_mean": 4858.1, + "valid_targets_min": 2378 + }, + { + "epoch": 4.816232771822358, + "grad_norm": 0.4847147778344571, + "learning_rate": 1.07497870180433e-05, + "loss": 0.1829, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16506324708461761, + "step": 3145, + "valid_targets_mean": 5388.0, + "valid_targets_min": 2902 + }, + { + "epoch": 4.823889739663094, + "grad_norm": 0.5548302078937414, + "learning_rate": 1.0682133291182522e-05, + "loss": 0.1713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14651057124137878, + "step": 3150, + "valid_targets_mean": 5324.9, + "valid_targets_min": 671 + }, + { + "epoch": 4.831546707503828, + "grad_norm": 0.5265431034812131, + "learning_rate": 1.0614615470218585e-05, + "loss": 0.1808, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19283831119537354, + "step": 3155, + "valid_targets_mean": 4184.3, + "valid_targets_min": 777 + }, + { + "epoch": 4.839203675344564, + "grad_norm": 0.4752269724565345, + "learning_rate": 1.0547234539933755e-05, + "loss": 0.1766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17586413025856018, + "step": 3160, + "valid_targets_mean": 4714.7, + "valid_targets_min": 949 + }, + { + "epoch": 4.846860643185298, + "grad_norm": 1.169559990539335, + "learning_rate": 1.0479991483113697e-05, + "loss": 0.1805, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17083494365215302, + "step": 3165, + "valid_targets_mean": 4708.2, + "valid_targets_min": 812 + }, + { + "epoch": 4.854517611026034, + "grad_norm": 0.4368959854231952, + "learning_rate": 1.0412887280533117e-05, + "loss": 0.1786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1704709231853485, + "step": 3170, + "valid_targets_mean": 5273.7, + "valid_targets_min": 1007 + }, + { + "epoch": 4.862174578866769, + "grad_norm": 0.4944289144103065, + "learning_rate": 1.0345922910941448e-05, + "loss": 0.1695, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14916256070137024, + "step": 3175, + "valid_targets_mean": 6193.1, + "valid_targets_min": 3214 + }, + { + "epoch": 4.869831546707504, + "grad_norm": 0.46502943737171365, + "learning_rate": 1.0279099351048602e-05, + "loss": 0.169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17156018316745758, + "step": 3180, + "valid_targets_mean": 5356.7, + "valid_targets_min": 670 + }, + { + "epoch": 4.877488514548239, + "grad_norm": 0.5097200882162136, + "learning_rate": 1.0212417575510694e-05, + "loss": 0.1712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1614779531955719, + "step": 3185, + "valid_targets_mean": 5069.6, + "valid_targets_min": 591 + }, + { + "epoch": 4.885145482388974, + "grad_norm": 0.47471570009753655, + "learning_rate": 1.0145878556915849e-05, + "loss": 0.1734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1738322228193283, + "step": 3190, + "valid_targets_mean": 4969.6, + "valid_targets_min": 680 + }, + { + "epoch": 4.892802450229709, + "grad_norm": 0.4998838240469851, + "learning_rate": 1.0079483265770019e-05, + "loss": 0.1626, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17251190543174744, + "step": 3195, + "valid_targets_mean": 4337.6, + "valid_targets_min": 953 + }, + { + "epoch": 4.900459418070444, + "grad_norm": 0.531925874666518, + "learning_rate": 1.001323267048278e-05, + "loss": 0.1702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1674518585205078, + "step": 3200, + "valid_targets_mean": 4905.7, + "valid_targets_min": 646 + }, + { + "epoch": 4.908116385911179, + "grad_norm": 0.5453180840806279, + "learning_rate": 9.947127737353306e-06, + "loss": 0.1853, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17539682984352112, + "step": 3205, + "valid_targets_mean": 5963.2, + "valid_targets_min": 788 + }, + { + "epoch": 4.915773353751915, + "grad_norm": 0.4250839312770307, + "learning_rate": 9.88116943055615e-06, + "loss": 0.1715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16129732131958008, + "step": 3210, + "valid_targets_mean": 5374.8, + "valid_targets_min": 1861 + }, + { + "epoch": 4.923430321592649, + "grad_norm": 0.5076367049894257, + "learning_rate": 9.81535871212729e-06, + "loss": 0.176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18412069976329803, + "step": 3215, + "valid_targets_mean": 4940.6, + "valid_targets_min": 763 + }, + { + "epoch": 4.931087289433385, + "grad_norm": 0.46983192162441934, + "learning_rate": 9.749696541950013e-06, + "loss": 0.1682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18911156058311462, + "step": 3220, + "valid_targets_mean": 5087.8, + "valid_targets_min": 494 + }, + { + "epoch": 4.938744257274119, + "grad_norm": 0.43659187427488777, + "learning_rate": 9.684183877740985e-06, + "loss": 0.1618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17905791103839874, + "step": 3225, + "valid_targets_mean": 6008.7, + "valid_targets_min": 3603 + }, + { + "epoch": 4.946401225114855, + "grad_norm": 0.4841288841367283, + "learning_rate": 9.61882167503624e-06, + "loss": 0.193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20017869770526886, + "step": 3230, + "valid_targets_mean": 5367.9, + "valid_targets_min": 878 + }, + { + "epoch": 4.954058192955589, + "grad_norm": 0.512592936573579, + "learning_rate": 9.553610887177246e-06, + "loss": 0.1716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19410035014152527, + "step": 3235, + "valid_targets_mean": 4450.0, + "valid_targets_min": 544 + }, + { + "epoch": 4.961715160796325, + "grad_norm": 0.5188515954114221, + "learning_rate": 9.488552465297015e-06, + "loss": 0.168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1623811423778534, + "step": 3240, + "valid_targets_mean": 4658.7, + "valid_targets_min": 591 + }, + { + "epoch": 4.96937212863706, + "grad_norm": 0.476731013903736, + "learning_rate": 9.423647358306218e-06, + "loss": 0.1726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1714991182088852, + "step": 3245, + "valid_targets_mean": 4713.1, + "valid_targets_min": 789 + }, + { + "epoch": 4.977029096477795, + "grad_norm": 0.5546365002101505, + "learning_rate": 9.358896512879358e-06, + "loss": 0.17, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15670126676559448, + "step": 3250, + "valid_targets_mean": 5500.0, + "valid_targets_min": 2235 + }, + { + "epoch": 4.98468606431853, + "grad_norm": 0.4853561972328554, + "learning_rate": 9.294300873440936e-06, + "loss": 0.1673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17436271905899048, + "step": 3255, + "valid_targets_mean": 4673.0, + "valid_targets_min": 908 + }, + { + "epoch": 4.992343032159265, + "grad_norm": 0.5222923370657017, + "learning_rate": 9.22986138215171e-06, + "loss": 0.1898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2152424305677414, + "step": 3260, + "valid_targets_mean": 5085.4, + "valid_targets_min": 743 + }, + { + "epoch": 5.0, + "grad_norm": 0.6441979554730815, + "learning_rate": 9.165578978894937e-06, + "loss": 0.1734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17200767993927002, + "step": 3265, + "valid_targets_mean": 3869.9, + "valid_targets_min": 351 + }, + { + "epoch": 5.007656967840735, + "grad_norm": 0.4220356269014037, + "learning_rate": 9.10145460126265e-06, + "loss": 0.1668, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17021849751472473, + "step": 3270, + "valid_targets_mean": 5500.6, + "valid_targets_min": 667 + }, + { + "epoch": 5.01531393568147, + "grad_norm": 0.4289204570010935, + "learning_rate": 9.03748918454201e-06, + "loss": 0.1649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16679969429969788, + "step": 3275, + "valid_targets_mean": 6136.5, + "valid_targets_min": 3099 + }, + { + "epoch": 5.022970903522205, + "grad_norm": 0.4712426003028533, + "learning_rate": 8.973683661701637e-06, + "loss": 0.1524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1566212773323059, + "step": 3280, + "valid_targets_mean": 5120.7, + "valid_targets_min": 851 + }, + { + "epoch": 5.03062787136294, + "grad_norm": 0.4518245512255529, + "learning_rate": 8.910038963378032e-06, + "loss": 0.1627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14622384309768677, + "step": 3285, + "valid_targets_mean": 4985.9, + "valid_targets_min": 1493 + }, + { + "epoch": 5.038284839203675, + "grad_norm": 0.4982253074593932, + "learning_rate": 8.846556017861987e-06, + "loss": 0.1669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15284357964992523, + "step": 3290, + "valid_targets_mean": 4108.5, + "valid_targets_min": 593 + }, + { + "epoch": 5.04594180704441, + "grad_norm": 0.5111382953009579, + "learning_rate": 8.783235751085016e-06, + "loss": 0.1613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16670887172222137, + "step": 3295, + "valid_targets_mean": 5229.1, + "valid_targets_min": 1762 + }, + { + "epoch": 5.053598774885145, + "grad_norm": 0.508139696790504, + "learning_rate": 8.72007908660593e-06, + "loss": 0.1588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17335477471351624, + "step": 3300, + "valid_targets_mean": 4433.8, + "valid_targets_min": 324 + }, + { + "epoch": 5.061255742725881, + "grad_norm": 0.4971355581613195, + "learning_rate": 8.657086945597273e-06, + "loss": 0.1555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1560318022966385, + "step": 3305, + "valid_targets_mean": 4618.1, + "valid_targets_min": 709 + }, + { + "epoch": 5.068912710566615, + "grad_norm": 0.519951349237222, + "learning_rate": 8.594260246831954e-06, + "loss": 0.1624, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17906685173511505, + "step": 3310, + "valid_targets_mean": 4824.6, + "valid_targets_min": 727 + }, + { + "epoch": 5.076569678407351, + "grad_norm": 0.48195723569060445, + "learning_rate": 8.531599906669802e-06, + "loss": 0.1699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17248311638832092, + "step": 3315, + "valid_targets_mean": 5084.2, + "valid_targets_min": 559 + }, + { + "epoch": 5.084226646248085, + "grad_norm": 0.5032723828958181, + "learning_rate": 8.469106839044232e-06, + "loss": 0.1708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18109896779060364, + "step": 3320, + "valid_targets_mean": 4405.3, + "valid_targets_min": 276 + }, + { + "epoch": 5.091883614088821, + "grad_norm": 0.45559814554047867, + "learning_rate": 8.406781955448913e-06, + "loss": 0.1537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16298320889472961, + "step": 3325, + "valid_targets_mean": 5751.0, + "valid_targets_min": 428 + }, + { + "epoch": 5.099540581929556, + "grad_norm": 0.4939004046341742, + "learning_rate": 8.344626164924436e-06, + "loss": 0.1698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1718837022781372, + "step": 3330, + "valid_targets_mean": 5382.8, + "valid_targets_min": 2252 + }, + { + "epoch": 5.107197549770291, + "grad_norm": 0.5085695929621461, + "learning_rate": 8.28264037404511e-06, + "loss": 0.1509, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1606028974056244, + "step": 3335, + "valid_targets_mean": 4314.8, + "valid_targets_min": 535 + }, + { + "epoch": 5.114854517611026, + "grad_norm": 0.5096351984468025, + "learning_rate": 8.220825486905686e-06, + "loss": 0.1482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14969605207443237, + "step": 3340, + "valid_targets_mean": 4719.4, + "valid_targets_min": 839 + }, + { + "epoch": 5.122511485451761, + "grad_norm": 0.47515607105527047, + "learning_rate": 8.159182405108222e-06, + "loss": 0.1571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1551935225725174, + "step": 3345, + "valid_targets_mean": 5535.8, + "valid_targets_min": 2886 + }, + { + "epoch": 5.130168453292496, + "grad_norm": 0.4465656432414435, + "learning_rate": 8.097712027748879e-06, + "loss": 0.1585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1423056423664093, + "step": 3350, + "valid_targets_mean": 4984.5, + "valid_targets_min": 980 + }, + { + "epoch": 5.137825421133231, + "grad_norm": 0.5427524948981346, + "learning_rate": 8.036415251404855e-06, + "loss": 0.1622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1889774203300476, + "step": 3355, + "valid_targets_mean": 5009.1, + "valid_targets_min": 819 + }, + { + "epoch": 5.145482388973966, + "grad_norm": 0.4892462664411317, + "learning_rate": 7.975292970121286e-06, + "loss": 0.152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15726116299629211, + "step": 3360, + "valid_targets_mean": 5218.2, + "valid_targets_min": 268 + }, + { + "epoch": 5.153139356814702, + "grad_norm": 0.4770823262528399, + "learning_rate": 7.914346075398191e-06, + "loss": 0.1634, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16496963798999786, + "step": 3365, + "valid_targets_mean": 4660.6, + "valid_targets_min": 696 + }, + { + "epoch": 5.160796324655436, + "grad_norm": 0.8056092258122578, + "learning_rate": 7.85357545617751e-06, + "loss": 0.1651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13913197815418243, + "step": 3370, + "valid_targets_mean": 4270.4, + "valid_targets_min": 379 + }, + { + "epoch": 5.168453292496172, + "grad_norm": 0.5061408770101286, + "learning_rate": 7.792981998830092e-06, + "loss": 0.1589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16775824129581451, + "step": 3375, + "valid_targets_mean": 5022.6, + "valid_targets_min": 574 + }, + { + "epoch": 5.176110260336906, + "grad_norm": 0.6037385627128377, + "learning_rate": 7.732566587142793e-06, + "loss": 0.1548, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17123952507972717, + "step": 3380, + "valid_targets_mean": 4231.1, + "valid_targets_min": 626 + }, + { + "epoch": 5.183767228177642, + "grad_norm": 0.5311926260698461, + "learning_rate": 7.672330102305596e-06, + "loss": 0.1713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19350102543830872, + "step": 3385, + "valid_targets_mean": 4315.9, + "valid_targets_min": 292 + }, + { + "epoch": 5.191424196018377, + "grad_norm": 0.5044523176604377, + "learning_rate": 7.612273422898726e-06, + "loss": 0.1562, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19003711640834808, + "step": 3390, + "valid_targets_mean": 4566.2, + "valid_targets_min": 457 + }, + { + "epoch": 5.199081163859112, + "grad_norm": 0.6176433774855741, + "learning_rate": 7.5523974248798714e-06, + "loss": 0.1582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16127054393291473, + "step": 3395, + "valid_targets_mean": 5243.8, + "valid_targets_min": 936 + }, + { + "epoch": 5.206738131699847, + "grad_norm": 0.5162091315973347, + "learning_rate": 7.492702981571363e-06, + "loss": 0.1535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1597318947315216, + "step": 3400, + "valid_targets_mean": 4938.6, + "valid_targets_min": 814 + }, + { + "epoch": 5.214395099540582, + "grad_norm": 0.8676299780329471, + "learning_rate": 7.433190963647488e-06, + "loss": 0.1611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14614874124526978, + "step": 3405, + "valid_targets_mean": 4358.2, + "valid_targets_min": 840 + }, + { + "epoch": 5.222052067381317, + "grad_norm": 0.4774915723911, + "learning_rate": 7.373862239121743e-06, + "loss": 0.1612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16033905744552612, + "step": 3410, + "valid_targets_mean": 4894.8, + "valid_targets_min": 648 + }, + { + "epoch": 5.229709035222052, + "grad_norm": 0.5040954116535451, + "learning_rate": 7.314717673334213e-06, + "loss": 0.1672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16919949650764465, + "step": 3415, + "valid_targets_mean": 6000.2, + "valid_targets_min": 3105 + }, + { + "epoch": 5.237366003062787, + "grad_norm": 0.4912501104493769, + "learning_rate": 7.255758128938934e-06, + "loss": 0.1678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20043662190437317, + "step": 3420, + "valid_targets_mean": 6128.0, + "valid_targets_min": 804 + }, + { + "epoch": 5.2450229709035225, + "grad_norm": 0.530597468241474, + "learning_rate": 7.196984465891288e-06, + "loss": 0.1666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16545242071151733, + "step": 3425, + "valid_targets_mean": 4632.1, + "valid_targets_min": 792 + }, + { + "epoch": 5.252679938744257, + "grad_norm": 0.4704553688188211, + "learning_rate": 7.138397541435513e-06, + "loss": 0.1751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16121527552604675, + "step": 3430, + "valid_targets_mean": 5139.7, + "valid_targets_min": 1540 + }, + { + "epoch": 5.2603369065849925, + "grad_norm": 0.5036246695889622, + "learning_rate": 7.079998210092132e-06, + "loss": 0.1535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18528419733047485, + "step": 3435, + "valid_targets_mean": 4868.1, + "valid_targets_min": 727 + }, + { + "epoch": 5.267993874425727, + "grad_norm": 0.5946952911939519, + "learning_rate": 7.021787323645557e-06, + "loss": 0.1583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1626713126897812, + "step": 3440, + "valid_targets_mean": 5682.8, + "valid_targets_min": 776 + }, + { + "epoch": 5.2756508422664625, + "grad_norm": 0.44380451085617817, + "learning_rate": 6.963765731131622e-06, + "loss": 0.1619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13598023355007172, + "step": 3445, + "valid_targets_mean": 5084.1, + "valid_targets_min": 372 + }, + { + "epoch": 5.283307810107198, + "grad_norm": 0.44937678766204686, + "learning_rate": 6.9059342788252035e-06, + "loss": 0.1656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15446916222572327, + "step": 3450, + "valid_targets_mean": 5280.4, + "valid_targets_min": 743 + }, + { + "epoch": 5.2909647779479325, + "grad_norm": 0.5423325722642703, + "learning_rate": 6.848293810227901e-06, + "loss": 0.1598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16436201333999634, + "step": 3455, + "valid_targets_mean": 3904.1, + "valid_targets_min": 709 + }, + { + "epoch": 5.298621745788668, + "grad_norm": 0.5230875510151383, + "learning_rate": 6.790845166055699e-06, + "loss": 0.1737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1833193153142929, + "step": 3460, + "valid_targets_mean": 4362.5, + "valid_targets_min": 633 + }, + { + "epoch": 5.3062787136294025, + "grad_norm": 0.47630676748649625, + "learning_rate": 6.733589184226747e-06, + "loss": 0.1593, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14603200554847717, + "step": 3465, + "valid_targets_mean": 5092.8, + "valid_targets_min": 636 + }, + { + "epoch": 5.313935681470138, + "grad_norm": 0.47996042244501036, + "learning_rate": 6.676526699849086e-06, + "loss": 0.1627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17878413200378418, + "step": 3470, + "valid_targets_mean": 5376.2, + "valid_targets_min": 2328 + }, + { + "epoch": 5.3215926493108725, + "grad_norm": 0.49793974395131096, + "learning_rate": 6.619658545208523e-06, + "loss": 0.164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17316502332687378, + "step": 3475, + "valid_targets_mean": 4947.2, + "valid_targets_min": 490 + }, + { + "epoch": 5.329249617151608, + "grad_norm": 0.5080242803851914, + "learning_rate": 6.562985549756448e-06, + "loss": 0.1518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14953093230724335, + "step": 3480, + "valid_targets_mean": 4898.6, + "valid_targets_min": 643 + }, + { + "epoch": 5.336906584992343, + "grad_norm": 0.514443547063424, + "learning_rate": 6.506508540097769e-06, + "loss": 0.1659, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16423040628433228, + "step": 3485, + "valid_targets_mean": 5819.7, + "valid_targets_min": 1855 + }, + { + "epoch": 5.344563552833078, + "grad_norm": 0.4584620928236497, + "learning_rate": 6.450228339978832e-06, + "loss": 0.1637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17829495668411255, + "step": 3490, + "valid_targets_mean": 5519.6, + "valid_targets_min": 478 + }, + { + "epoch": 5.352220520673813, + "grad_norm": 0.47394337082009697, + "learning_rate": 6.394145770275402e-06, + "loss": 0.1545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1514066755771637, + "step": 3495, + "valid_targets_mean": 5396.5, + "valid_targets_min": 566 + }, + { + "epoch": 5.359877488514548, + "grad_norm": 0.4729834195916555, + "learning_rate": 6.338261648980728e-06, + "loss": 0.1559, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16408702731132507, + "step": 3500, + "valid_targets_mean": 4929.8, + "valid_targets_min": 593 + }, + { + "epoch": 5.367534456355283, + "grad_norm": 0.4695111134489663, + "learning_rate": 6.282576791193557e-06, + "loss": 0.157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14791680872440338, + "step": 3505, + "valid_targets_mean": 4815.1, + "valid_targets_min": 274 + }, + { + "epoch": 5.375191424196018, + "grad_norm": 0.6948673104574568, + "learning_rate": 6.227092009106301e-06, + "loss": 0.1622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1767755150794983, + "step": 3510, + "valid_targets_mean": 3131.2, + "valid_targets_min": 566 + }, + { + "epoch": 5.382848392036753, + "grad_norm": 0.5735437955530691, + "learning_rate": 6.171808111993158e-06, + "loss": 0.1752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17738190293312073, + "step": 3515, + "valid_targets_mean": 4039.9, + "valid_targets_min": 591 + }, + { + "epoch": 5.390505359877489, + "grad_norm": 0.4761051088696013, + "learning_rate": 6.116725906198297e-06, + "loss": 0.1558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1486128270626068, + "step": 3520, + "valid_targets_mean": 4567.4, + "valid_targets_min": 847 + }, + { + "epoch": 5.398162327718223, + "grad_norm": 0.4579204274900233, + "learning_rate": 6.061846195124144e-06, + "loss": 0.1677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18317697942256927, + "step": 3525, + "valid_targets_mean": 5270.5, + "valid_targets_min": 888 + }, + { + "epoch": 5.405819295558959, + "grad_norm": 0.41159114268724806, + "learning_rate": 6.007169779219606e-06, + "loss": 0.1648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1442817747592926, + "step": 3530, + "valid_targets_mean": 5373.3, + "valid_targets_min": 291 + }, + { + "epoch": 5.413476263399693, + "grad_norm": 0.665150948244762, + "learning_rate": 5.952697455968444e-06, + "loss": 0.1674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17882975935935974, + "step": 3535, + "valid_targets_mean": 3959.0, + "valid_targets_min": 326 + }, + { + "epoch": 5.421133231240429, + "grad_norm": 0.5112194729196444, + "learning_rate": 5.898430019877626e-06, + "loss": 0.1609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1770792305469513, + "step": 3540, + "valid_targets_mean": 4908.1, + "valid_targets_min": 1011 + }, + { + "epoch": 5.428790199081164, + "grad_norm": 0.49899364818543357, + "learning_rate": 5.8443682624657095e-06, + "loss": 0.1502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14375297725200653, + "step": 3545, + "valid_targets_mean": 4505.1, + "valid_targets_min": 582 + }, + { + "epoch": 5.436447166921899, + "grad_norm": 0.5057554070553661, + "learning_rate": 5.790512972251356e-06, + "loss": 0.1581, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14775539934635162, + "step": 3550, + "valid_targets_mean": 5433.1, + "valid_targets_min": 767 + }, + { + "epoch": 5.444104134762634, + "grad_norm": 0.4379349439343321, + "learning_rate": 5.736864934741764e-06, + "loss": 0.1621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14294332265853882, + "step": 3555, + "valid_targets_mean": 5075.1, + "valid_targets_min": 615 + }, + { + "epoch": 5.451761102603369, + "grad_norm": 0.46265699177561026, + "learning_rate": 5.683424932421273e-06, + "loss": 0.1556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15184825658798218, + "step": 3560, + "valid_targets_mean": 5332.7, + "valid_targets_min": 611 + }, + { + "epoch": 5.459418070444104, + "grad_norm": 0.5952621899721654, + "learning_rate": 5.630193744739896e-06, + "loss": 0.172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19496630132198334, + "step": 3565, + "valid_targets_mean": 4201.9, + "valid_targets_min": 704 + }, + { + "epoch": 5.46707503828484, + "grad_norm": 0.48070697420874536, + "learning_rate": 5.577172148101993e-06, + "loss": 0.1682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16540558636188507, + "step": 3570, + "valid_targets_mean": 4890.1, + "valid_targets_min": 803 + }, + { + "epoch": 5.474732006125574, + "grad_norm": 0.6848512357907565, + "learning_rate": 5.52436091585493e-06, + "loss": 0.1602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17848747968673706, + "step": 3575, + "valid_targets_mean": 4479.4, + "valid_targets_min": 675 + }, + { + "epoch": 5.48238897396631, + "grad_norm": 0.47722149531129565, + "learning_rate": 5.471760818277792e-06, + "loss": 0.172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17572689056396484, + "step": 3580, + "valid_targets_mean": 5303.5, + "valid_targets_min": 378 + }, + { + "epoch": 5.490045941807044, + "grad_norm": 0.5271976003353499, + "learning_rate": 5.419372622570169e-06, + "loss": 0.1557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15122967958450317, + "step": 3585, + "valid_targets_mean": 4763.6, + "valid_targets_min": 597 + }, + { + "epoch": 5.49770290964778, + "grad_norm": 0.7659869620019119, + "learning_rate": 5.367197092840932e-06, + "loss": 0.1642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17659571766853333, + "step": 3590, + "valid_targets_mean": 4276.9, + "valid_targets_min": 756 + }, + { + "epoch": 5.505359877488514, + "grad_norm": 0.5759762429875664, + "learning_rate": 5.315234990097131e-06, + "loss": 0.1569, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14629271626472473, + "step": 3595, + "valid_targets_mean": 5444.4, + "valid_targets_min": 686 + }, + { + "epoch": 5.51301684532925, + "grad_norm": 0.4702162740174821, + "learning_rate": 5.263487072232851e-06, + "loss": 0.1652, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15715420246124268, + "step": 3600, + "valid_targets_mean": 5552.1, + "valid_targets_min": 464 + }, + { + "epoch": 5.520673813169985, + "grad_norm": 0.5664693454943723, + "learning_rate": 5.211954094018201e-06, + "loss": 0.1737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2129855751991272, + "step": 3605, + "valid_targets_mean": 4471.0, + "valid_targets_min": 669 + }, + { + "epoch": 5.52833078101072, + "grad_norm": 0.439009368809084, + "learning_rate": 5.160636807088277e-06, + "loss": 0.1594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1415124386548996, + "step": 3610, + "valid_targets_mean": 4938.8, + "valid_targets_min": 837 + }, + { + "epoch": 5.535987748851455, + "grad_norm": 0.4784953299934146, + "learning_rate": 5.109535959932195e-06, + "loss": 0.1552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16233739256858826, + "step": 3615, + "valid_targets_mean": 4896.0, + "valid_targets_min": 445 + }, + { + "epoch": 5.54364471669219, + "grad_norm": 0.48393748501482625, + "learning_rate": 5.058652297882205e-06, + "loss": 0.164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16299815475940704, + "step": 3620, + "valid_targets_mean": 4792.2, + "valid_targets_min": 578 + }, + { + "epoch": 5.551301684532925, + "grad_norm": 0.4788193457811368, + "learning_rate": 5.007986563102778e-06, + "loss": 0.1612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13831590116024017, + "step": 3625, + "valid_targets_mean": 5167.0, + "valid_targets_min": 565 + }, + { + "epoch": 5.55895865237366, + "grad_norm": 0.5713684082322842, + "learning_rate": 4.9575394945798236e-06, + "loss": 0.1612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15097324550151825, + "step": 3630, + "valid_targets_mean": 4927.1, + "valid_targets_min": 434 + }, + { + "epoch": 5.566615620214395, + "grad_norm": 0.5133104989832143, + "learning_rate": 4.9073118281098845e-06, + "loss": 0.1882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19625042378902435, + "step": 3635, + "valid_targets_mean": 5513.1, + "valid_targets_min": 2604 + }, + { + "epoch": 5.5742725880551305, + "grad_norm": 0.454081092007902, + "learning_rate": 4.857304296289398e-06, + "loss": 0.1598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15363352000713348, + "step": 3640, + "valid_targets_mean": 5523.7, + "valid_targets_min": 600 + }, + { + "epoch": 5.581929555895865, + "grad_norm": 0.46884469793283995, + "learning_rate": 4.807517628504048e-06, + "loss": 0.1727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17339780926704407, + "step": 3645, + "valid_targets_mean": 5304.4, + "valid_targets_min": 1990 + }, + { + "epoch": 5.5895865237366005, + "grad_norm": 0.45801637501032694, + "learning_rate": 4.757952550918077e-06, + "loss": 0.1632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14689268171787262, + "step": 3650, + "valid_targets_mean": 5640.2, + "valid_targets_min": 576 + }, + { + "epoch": 5.597243491577335, + "grad_norm": 0.5120675322489687, + "learning_rate": 4.7086097864637444e-06, + "loss": 0.1574, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1544780433177948, + "step": 3655, + "valid_targets_mean": 5030.8, + "valid_targets_min": 610 + }, + { + "epoch": 5.6049004594180705, + "grad_norm": 0.567671223880722, + "learning_rate": 4.659490054830729e-06, + "loss": 0.1689, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1720244139432907, + "step": 3660, + "valid_targets_mean": 4914.8, + "valid_targets_min": 724 + }, + { + "epoch": 5.612557427258805, + "grad_norm": 0.43509274496020417, + "learning_rate": 4.6105940724557e-06, + "loss": 0.1625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15081757307052612, + "step": 3665, + "valid_targets_mean": 6018.0, + "valid_targets_min": 805 + }, + { + "epoch": 5.6202143950995405, + "grad_norm": 0.5912507132310114, + "learning_rate": 4.561922552511788e-06, + "loss": 0.1624, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17240439355373383, + "step": 3670, + "valid_targets_mean": 3949.9, + "valid_targets_min": 240 + }, + { + "epoch": 5.627871362940276, + "grad_norm": 0.4793859887692694, + "learning_rate": 4.5134762048982485e-06, + "loss": 0.1606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1666552871465683, + "step": 3675, + "valid_targets_mean": 5170.1, + "valid_targets_min": 319 + }, + { + "epoch": 5.6355283307810105, + "grad_norm": 0.45725888831198136, + "learning_rate": 4.465255736230076e-06, + "loss": 0.1575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15449725091457367, + "step": 3680, + "valid_targets_mean": 5772.3, + "valid_targets_min": 726 + }, + { + "epoch": 5.643185298621746, + "grad_norm": 0.4785523703342554, + "learning_rate": 4.417261849827696e-06, + "loss": 0.1753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19263219833374023, + "step": 3685, + "valid_targets_mean": 5215.2, + "valid_targets_min": 689 + }, + { + "epoch": 5.650842266462481, + "grad_norm": 0.4757831273839956, + "learning_rate": 4.369495245706729e-06, + "loss": 0.1618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1494811326265335, + "step": 3690, + "valid_targets_mean": 5826.9, + "valid_targets_min": 583 + }, + { + "epoch": 5.658499234303216, + "grad_norm": 0.4702461000587345, + "learning_rate": 4.321956620567751e-06, + "loss": 0.1739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1776936650276184, + "step": 3695, + "valid_targets_mean": 5043.8, + "valid_targets_min": 871 + }, + { + "epoch": 5.666156202143951, + "grad_norm": 0.46987683467332103, + "learning_rate": 4.274646667786157e-06, + "loss": 0.1557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15853236615657806, + "step": 3700, + "valid_targets_mean": 4678.7, + "valid_targets_min": 592 + }, + { + "epoch": 5.673813169984686, + "grad_norm": 0.4680178929081938, + "learning_rate": 4.227566077402041e-06, + "loss": 0.1647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1535928100347519, + "step": 3705, + "valid_targets_mean": 4963.2, + "valid_targets_min": 261 + }, + { + "epoch": 5.681470137825421, + "grad_norm": 0.44046042021206055, + "learning_rate": 4.180715536110112e-06, + "loss": 0.1614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13984829187393188, + "step": 3710, + "valid_targets_mean": 5743.1, + "valid_targets_min": 588 + }, + { + "epoch": 5.689127105666156, + "grad_norm": 0.5050717822406969, + "learning_rate": 4.1340957272497115e-06, + "loss": 0.1557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14051397144794464, + "step": 3715, + "valid_targets_mean": 4413.8, + "valid_targets_min": 663 + }, + { + "epoch": 5.696784073506891, + "grad_norm": 0.609576054055646, + "learning_rate": 4.087707330794814e-06, + "loss": 0.1745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14713960886001587, + "step": 3720, + "valid_targets_mean": 5758.8, + "valid_targets_min": 1866 + }, + { + "epoch": 5.704441041347627, + "grad_norm": 0.5071420592448429, + "learning_rate": 4.041551023344139e-06, + "loss": 0.17, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17213720083236694, + "step": 3725, + "valid_targets_mean": 5253.4, + "valid_targets_min": 301 + }, + { + "epoch": 5.712098009188361, + "grad_norm": 0.46046580007849597, + "learning_rate": 3.995627478111264e-06, + "loss": 0.1682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1543341875076294, + "step": 3730, + "valid_targets_mean": 5808.6, + "valid_targets_min": 606 + }, + { + "epoch": 5.719754977029097, + "grad_norm": 0.4717948081992276, + "learning_rate": 3.949937364914798e-06, + "loss": 0.158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14789888262748718, + "step": 3735, + "valid_targets_mean": 5842.7, + "valid_targets_min": 913 + }, + { + "epoch": 5.727411944869831, + "grad_norm": 0.472692526766977, + "learning_rate": 3.904481350168641e-06, + "loss": 0.1733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18231379985809326, + "step": 3740, + "valid_targets_mean": 5480.3, + "valid_targets_min": 582 + }, + { + "epoch": 5.735068912710567, + "grad_norm": 0.4774164157350961, + "learning_rate": 3.8592600968722285e-06, + "loss": 0.1641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17668023705482483, + "step": 3745, + "valid_targets_mean": 5286.9, + "valid_targets_min": 806 + }, + { + "epoch": 5.742725880551301, + "grad_norm": 0.5089014992120109, + "learning_rate": 3.814274264600899e-06, + "loss": 0.1612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15314318239688873, + "step": 3750, + "valid_targets_mean": 4953.4, + "valid_targets_min": 747 + }, + { + "epoch": 5.750382848392037, + "grad_norm": 0.5179707303157852, + "learning_rate": 3.7695245094962228e-06, + "loss": 0.1756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16455323994159698, + "step": 3755, + "valid_targets_mean": 5168.4, + "valid_targets_min": 840 + }, + { + "epoch": 5.758039816232772, + "grad_norm": 0.49543638175235816, + "learning_rate": 3.7250114842565087e-06, + "loss": 0.1613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15587033331394196, + "step": 3760, + "valid_targets_mean": 4512.6, + "valid_targets_min": 679 + }, + { + "epoch": 5.765696784073507, + "grad_norm": 0.5025738435987257, + "learning_rate": 3.6807358381271963e-06, + "loss": 0.1806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2389712631702423, + "step": 3765, + "valid_targets_mean": 5495.2, + "valid_targets_min": 294 + }, + { + "epoch": 5.773353751914242, + "grad_norm": 0.44563197876271343, + "learning_rate": 3.6366982168914456e-06, + "loss": 0.1625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14085888862609863, + "step": 3770, + "valid_targets_mean": 5305.1, + "valid_targets_min": 1943 + }, + { + "epoch": 5.781010719754977, + "grad_norm": 0.4711699929533897, + "learning_rate": 3.5928992628607075e-06, + "loss": 0.1561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14801645278930664, + "step": 3775, + "valid_targets_mean": 5146.2, + "valid_targets_min": 1545 + }, + { + "epoch": 5.788667687595712, + "grad_norm": 0.45863808724665833, + "learning_rate": 3.549339614865328e-06, + "loss": 0.1628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17908339202404022, + "step": 3780, + "valid_targets_mean": 5437.6, + "valid_targets_min": 1859 + }, + { + "epoch": 5.796324655436447, + "grad_norm": 0.5602836811382915, + "learning_rate": 3.506019908245275e-06, + "loss": 0.1479, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15862153470516205, + "step": 3785, + "valid_targets_mean": 4928.1, + "valid_targets_min": 593 + }, + { + "epoch": 5.803981623277182, + "grad_norm": 0.5625845577017787, + "learning_rate": 3.462940774840826e-06, + "loss": 0.1696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18470574915409088, + "step": 3790, + "valid_targets_mean": 4665.9, + "valid_targets_min": 933 + }, + { + "epoch": 5.811638591117918, + "grad_norm": 0.5156477028052144, + "learning_rate": 3.4201028429833883e-06, + "loss": 0.1529, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14838847517967224, + "step": 3795, + "valid_targets_mean": 5494.6, + "valid_targets_min": 661 + }, + { + "epoch": 5.819295558958652, + "grad_norm": 0.4397002728342675, + "learning_rate": 3.37750673748632e-06, + "loss": 0.1651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14019045233726501, + "step": 3800, + "valid_targets_mean": 5286.6, + "valid_targets_min": 983 + }, + { + "epoch": 5.826952526799388, + "grad_norm": 0.48750838244321903, + "learning_rate": 3.3351530796358024e-06, + "loss": 0.1572, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1837264895439148, + "step": 3805, + "valid_targets_mean": 5369.9, + "valid_targets_min": 2433 + }, + { + "epoch": 5.834609494640122, + "grad_norm": 0.575464842240813, + "learning_rate": 3.2930424871818145e-06, + "loss": 0.1565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1724276840686798, + "step": 3810, + "valid_targets_mean": 5045.8, + "valid_targets_min": 380 + }, + { + "epoch": 5.842266462480858, + "grad_norm": 0.5014265385817863, + "learning_rate": 3.2511755743290774e-06, + "loss": 0.1676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16347980499267578, + "step": 3815, + "valid_targets_mean": 5700.8, + "valid_targets_min": 1001 + }, + { + "epoch": 5.849923430321593, + "grad_norm": 0.4971195883757721, + "learning_rate": 3.2095529517281365e-06, + "loss": 0.1817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15201014280319214, + "step": 3820, + "valid_targets_mean": 4555.1, + "valid_targets_min": 251 + }, + { + "epoch": 5.857580398162328, + "grad_norm": 0.5008524466215926, + "learning_rate": 3.1681752264664387e-06, + "loss": 0.1526, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15063107013702393, + "step": 3825, + "valid_targets_mean": 4804.3, + "valid_targets_min": 903 + }, + { + "epoch": 5.865237366003063, + "grad_norm": 0.5064320357447261, + "learning_rate": 3.12704300205946e-06, + "loss": 0.1565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16848312318325043, + "step": 3830, + "valid_targets_mean": 4334.7, + "valid_targets_min": 249 + }, + { + "epoch": 5.8728943338437976, + "grad_norm": 0.5582372456603737, + "learning_rate": 3.0861568784419393e-06, + "loss": 0.1543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15561410784721375, + "step": 3835, + "valid_targets_mean": 4067.6, + "valid_targets_min": 762 + }, + { + "epoch": 5.880551301684533, + "grad_norm": 0.48015271820719657, + "learning_rate": 3.0455174519590926e-06, + "loss": 0.1655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17181405425071716, + "step": 3840, + "valid_targets_mean": 5108.1, + "valid_targets_min": 800 + }, + { + "epoch": 5.888208269525268, + "grad_norm": 0.5032917025542417, + "learning_rate": 3.0051253153579373e-06, + "loss": 0.1554, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15245842933654785, + "step": 3845, + "valid_targets_mean": 4856.6, + "valid_targets_min": 591 + }, + { + "epoch": 5.895865237366003, + "grad_norm": 0.4568927865552627, + "learning_rate": 2.964981057778644e-06, + "loss": 0.1717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15734651684761047, + "step": 3850, + "valid_targets_mean": 5474.4, + "valid_targets_min": 1911 + }, + { + "epoch": 5.903522205206738, + "grad_norm": 0.5333563811950146, + "learning_rate": 2.9250852647459418e-06, + "loss": 0.1591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14090844988822937, + "step": 3855, + "valid_targets_mean": 4721.5, + "valid_targets_min": 553 + }, + { + "epoch": 5.911179173047473, + "grad_norm": 0.4809515679578391, + "learning_rate": 2.8854385181605594e-06, + "loss": 0.1684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1540873646736145, + "step": 3860, + "valid_targets_mean": 5109.4, + "valid_targets_min": 1901 + }, + { + "epoch": 5.918836140888208, + "grad_norm": 0.48239544970207976, + "learning_rate": 2.8460413962907705e-06, + "loss": 0.1544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17352712154388428, + "step": 3865, + "valid_targets_mean": 4727.1, + "valid_targets_min": 847 + }, + { + "epoch": 5.926493108728943, + "grad_norm": 0.5337046272163664, + "learning_rate": 2.8068944737639436e-06, + "loss": 0.1675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16555720567703247, + "step": 3870, + "valid_targets_mean": 4828.6, + "valid_targets_min": 586 + }, + { + "epoch": 5.934150076569678, + "grad_norm": 0.475103452331955, + "learning_rate": 2.7679983215581474e-06, + "loss": 0.1585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19165559113025665, + "step": 3875, + "valid_targets_mean": 6190.2, + "valid_targets_min": 816 + }, + { + "epoch": 5.941807044410414, + "grad_norm": 0.5134784747632062, + "learning_rate": 2.72935350699385e-06, + "loss": 0.1575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15154170989990234, + "step": 3880, + "valid_targets_mean": 4244.9, + "valid_targets_min": 812 + }, + { + "epoch": 5.949464012251148, + "grad_norm": 0.43408459090260726, + "learning_rate": 2.69096059372562e-06, + "loss": 0.1677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1547510176897049, + "step": 3885, + "valid_targets_mean": 5395.6, + "valid_targets_min": 717 + }, + { + "epoch": 5.957120980091884, + "grad_norm": 0.6503354175258022, + "learning_rate": 2.6528201417339205e-06, + "loss": 0.1794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1949981451034546, + "step": 3890, + "valid_targets_mean": 3938.8, + "valid_targets_min": 612 + }, + { + "epoch": 5.964777947932618, + "grad_norm": 0.5246115940102106, + "learning_rate": 2.614932707316942e-06, + "loss": 0.1604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17228254675865173, + "step": 3895, + "valid_targets_mean": 4690.4, + "valid_targets_min": 1224 + }, + { + "epoch": 5.972434915773354, + "grad_norm": 0.4986410635561786, + "learning_rate": 2.5772988430824697e-06, + "loss": 0.1512, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16765034198760986, + "step": 3900, + "valid_targets_mean": 5091.7, + "valid_targets_min": 587 + }, + { + "epoch": 5.980091883614088, + "grad_norm": 0.41080740486480766, + "learning_rate": 2.5399190979398493e-06, + "loss": 0.1605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1336369514465332, + "step": 3905, + "valid_targets_mean": 6159.9, + "valid_targets_min": 941 + }, + { + "epoch": 5.987748851454824, + "grad_norm": 0.42306492813111624, + "learning_rate": 2.5027940170919583e-06, + "loss": 0.1454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14831745624542236, + "step": 3910, + "valid_targets_mean": 6090.9, + "valid_targets_min": 2635 + }, + { + "epoch": 5.995405819295559, + "grad_norm": 0.4748890363586371, + "learning_rate": 2.4659241420272716e-06, + "loss": 0.16, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1577981561422348, + "step": 3915, + "valid_targets_mean": 5881.8, + "valid_targets_min": 697 + }, + { + "epoch": 6.003062787136294, + "grad_norm": 0.46055683667700925, + "learning_rate": 2.429310010511956e-06, + "loss": 0.1633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16992008686065674, + "step": 3920, + "valid_targets_mean": 5102.2, + "valid_targets_min": 1521 + }, + { + "epoch": 6.010719754977029, + "grad_norm": 0.42406910050920277, + "learning_rate": 2.392952156582018e-06, + "loss": 0.1566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13615508377552032, + "step": 3925, + "valid_targets_mean": 5358.2, + "valid_targets_min": 693 + }, + { + "epoch": 6.018376722817764, + "grad_norm": 0.4799597914576974, + "learning_rate": 2.3568511105355363e-06, + "loss": 0.1502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1690567135810852, + "step": 3930, + "valid_targets_mean": 4816.2, + "valid_targets_min": 663 + }, + { + "epoch": 6.026033690658499, + "grad_norm": 0.4808874892486829, + "learning_rate": 2.321007398924897e-06, + "loss": 0.1569, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14551788568496704, + "step": 3935, + "valid_targets_mean": 4578.4, + "valid_targets_min": 430 + }, + { + "epoch": 6.033690658499235, + "grad_norm": 0.44957556088245915, + "learning_rate": 2.2854215445491467e-06, + "loss": 0.1656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15582698583602905, + "step": 3940, + "valid_targets_mean": 5508.3, + "valid_targets_min": 1228 + }, + { + "epoch": 6.041347626339969, + "grad_norm": 0.5135673979652564, + "learning_rate": 2.250094066446342e-06, + "loss": 0.1561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1671498417854309, + "step": 3945, + "valid_targets_mean": 4560.2, + "valid_targets_min": 742 + }, + { + "epoch": 6.049004594180705, + "grad_norm": 0.5547826648523004, + "learning_rate": 2.215025479885999e-06, + "loss": 0.1617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16672340035438538, + "step": 3950, + "valid_targets_mean": 4601.8, + "valid_targets_min": 737 + }, + { + "epoch": 6.056661562021439, + "grad_norm": 0.4009473872690044, + "learning_rate": 2.180216296361548e-06, + "loss": 0.155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12265469133853912, + "step": 3955, + "valid_targets_mean": 6534.8, + "valid_targets_min": 1368 + }, + { + "epoch": 6.064318529862175, + "grad_norm": 0.4564705546836802, + "learning_rate": 2.145667023582907e-06, + "loss": 0.1611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.144794762134552, + "step": 3960, + "valid_targets_mean": 5498.0, + "valid_targets_min": 276 + }, + { + "epoch": 6.071975497702909, + "grad_norm": 0.5051561406739382, + "learning_rate": 2.1113781654690624e-06, + "loss": 0.1574, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1356779932975769, + "step": 3965, + "valid_targets_mean": 4497.1, + "valid_targets_min": 566 + }, + { + "epoch": 6.079632465543645, + "grad_norm": 0.47564995155516737, + "learning_rate": 2.077350222140704e-06, + "loss": 0.1448, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1437039077281952, + "step": 3970, + "valid_targets_mean": 5306.3, + "valid_targets_min": 560 + }, + { + "epoch": 6.08728943338438, + "grad_norm": 0.5940128337403686, + "learning_rate": 2.0435836899129624e-06, + "loss": 0.1696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19864995777606964, + "step": 3975, + "valid_targets_mean": 4722.4, + "valid_targets_min": 1353 + }, + { + "epoch": 6.094946401225115, + "grad_norm": 0.49070285695934485, + "learning_rate": 2.0100790612881392e-06, + "loss": 0.1555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19582757353782654, + "step": 3980, + "valid_targets_mean": 5257.1, + "valid_targets_min": 272 + }, + { + "epoch": 6.10260336906585, + "grad_norm": 0.46266996157161816, + "learning_rate": 1.9768368249485427e-06, + "loss": 0.1526, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16311945021152496, + "step": 3985, + "valid_targets_mean": 5434.1, + "valid_targets_min": 472 + }, + { + "epoch": 6.110260336906585, + "grad_norm": 0.46875844903787695, + "learning_rate": 1.9438574657493547e-06, + "loss": 0.1492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.148982971906662, + "step": 3990, + "valid_targets_mean": 5669.6, + "valid_targets_min": 3120 + }, + { + "epoch": 6.11791730474732, + "grad_norm": 0.4856031191051758, + "learning_rate": 1.9111414647115545e-06, + "loss": 0.1582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14689423143863678, + "step": 3995, + "valid_targets_mean": 5054.9, + "valid_targets_min": 2586 + }, + { + "epoch": 6.1255742725880555, + "grad_norm": 0.6163929918826091, + "learning_rate": 1.878689299014913e-06, + "loss": 0.1644, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15235333144664764, + "step": 4000, + "valid_targets_mean": 4759.5, + "valid_targets_min": 1983 + }, + { + "epoch": 6.13323124042879, + "grad_norm": 0.429934054934571, + "learning_rate": 1.8465014419910155e-06, + "loss": 0.1377, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1379457414150238, + "step": 4005, + "valid_targets_mean": 6366.9, + "valid_targets_min": 3204 + }, + { + "epoch": 6.1408882082695255, + "grad_norm": 0.7233086750971975, + "learning_rate": 1.8145783631163772e-06, + "loss": 0.1685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18254977464675903, + "step": 4010, + "valid_targets_mean": 3828.1, + "valid_targets_min": 434 + }, + { + "epoch": 6.14854517611026, + "grad_norm": 0.5027392672006016, + "learning_rate": 1.7829205280055938e-06, + "loss": 0.1504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1422107219696045, + "step": 4015, + "valid_targets_mean": 4783.3, + "valid_targets_min": 661 + }, + { + "epoch": 6.1562021439509955, + "grad_norm": 0.5617038740562822, + "learning_rate": 1.7515283984045228e-06, + "loss": 0.1608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1379905641078949, + "step": 4020, + "valid_targets_mean": 3973.9, + "valid_targets_min": 240 + }, + { + "epoch": 6.16385911179173, + "grad_norm": 0.48304445431238197, + "learning_rate": 1.7204024321835944e-06, + "loss": 0.1608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14369803667068481, + "step": 4025, + "valid_targets_mean": 5558.6, + "valid_targets_min": 1845 + }, + { + "epoch": 6.1715160796324655, + "grad_norm": 0.5855970422459711, + "learning_rate": 1.6895430833310844e-06, + "loss": 0.1454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1536564975976944, + "step": 4030, + "valid_targets_mean": 3549.4, + "valid_targets_min": 767 + }, + { + "epoch": 6.179173047473201, + "grad_norm": 0.48239528326391967, + "learning_rate": 1.6589508019465395e-06, + "loss": 0.1548, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16883978247642517, + "step": 4035, + "valid_targets_mean": 5138.6, + "valid_targets_min": 1466 + }, + { + "epoch": 6.1868300153139355, + "grad_norm": 0.5343661046550146, + "learning_rate": 1.628626034234173e-06, + "loss": 0.1648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16661593317985535, + "step": 4040, + "valid_targets_mean": 5237.5, + "valid_targets_min": 758 + }, + { + "epoch": 6.194486983154671, + "grad_norm": 0.42129658187000146, + "learning_rate": 1.5985692224963844e-06, + "loss": 0.1571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12375140190124512, + "step": 4045, + "valid_targets_mean": 6090.9, + "valid_targets_min": 787 + }, + { + "epoch": 6.2021439509954055, + "grad_norm": 0.4842019759636806, + "learning_rate": 1.5687808051272835e-06, + "loss": 0.1607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18923643231391907, + "step": 4050, + "valid_targets_mean": 5465.9, + "valid_targets_min": 357 + }, + { + "epoch": 6.209800918836141, + "grad_norm": 1.005454460289286, + "learning_rate": 1.5392612166063203e-06, + "loss": 0.1691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18108555674552917, + "step": 4055, + "valid_targets_mean": 3432.4, + "valid_targets_min": 666 + }, + { + "epoch": 6.217457886676876, + "grad_norm": 0.5016868489631979, + "learning_rate": 1.5100108874919395e-06, + "loss": 0.1471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12715457379817963, + "step": 4060, + "valid_targets_mean": 4946.0, + "valid_targets_min": 563 + }, + { + "epoch": 6.225114854517611, + "grad_norm": 0.49871078521671197, + "learning_rate": 1.4810302444152868e-06, + "loss": 0.1659, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17101529240608215, + "step": 4065, + "valid_targets_mean": 5428.0, + "valid_targets_min": 533 + }, + { + "epoch": 6.232771822358346, + "grad_norm": 0.44896200586577506, + "learning_rate": 1.4523197100740127e-06, + "loss": 0.1538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14551226794719696, + "step": 4070, + "valid_targets_mean": 5273.6, + "valid_targets_min": 1350 + }, + { + "epoch": 6.240428790199081, + "grad_norm": 0.5369466730400164, + "learning_rate": 1.423879703226072e-06, + "loss": 0.1595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18292181193828583, + "step": 4075, + "valid_targets_mean": 4546.9, + "valid_targets_min": 429 + }, + { + "epoch": 6.248085758039816, + "grad_norm": 0.4534004010866802, + "learning_rate": 1.3957106386836584e-06, + "loss": 0.1553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14585313200950623, + "step": 4080, + "valid_targets_mean": 5585.4, + "valid_targets_min": 353 + }, + { + "epoch": 6.255742725880551, + "grad_norm": 0.4562447826282773, + "learning_rate": 1.3678129273071194e-06, + "loss": 0.1642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18082283437252045, + "step": 4085, + "valid_targets_mean": 5402.2, + "valid_targets_min": 822 + }, + { + "epoch": 6.263399693721286, + "grad_norm": 0.5446884739075963, + "learning_rate": 1.340186975998976e-06, + "loss": 0.1621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15701700747013092, + "step": 4090, + "valid_targets_mean": 4313.9, + "valid_targets_min": 724 + }, + { + "epoch": 6.271056661562022, + "grad_norm": 0.5562186277826435, + "learning_rate": 1.3128331876979994e-06, + "loss": 0.1523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1641978621482849, + "step": 4095, + "valid_targets_mean": 4410.6, + "valid_targets_min": 858 + }, + { + "epoch": 6.278713629402756, + "grad_norm": 0.4458800063927126, + "learning_rate": 1.285751961373305e-06, + "loss": 0.1594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14536157250404358, + "step": 4100, + "valid_targets_mean": 5645.1, + "valid_targets_min": 700 + }, + { + "epoch": 6.286370597243492, + "grad_norm": 0.5283911789373491, + "learning_rate": 1.2589436920185661e-06, + "loss": 0.1575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16219042241573334, + "step": 4105, + "valid_targets_mean": 4158.7, + "valid_targets_min": 542 + }, + { + "epoch": 6.294027565084226, + "grad_norm": 0.5094779670955533, + "learning_rate": 1.232408770646234e-06, + "loss": 0.166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17320656776428223, + "step": 4110, + "valid_targets_mean": 4912.1, + "valid_targets_min": 621 + }, + { + "epoch": 6.301684532924962, + "grad_norm": 0.5719763141764206, + "learning_rate": 1.2061475842818337e-06, + "loss": 0.1647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17003297805786133, + "step": 4115, + "valid_targets_mean": 4195.1, + "valid_targets_min": 326 + }, + { + "epoch": 6.309341500765697, + "grad_norm": 0.5256383923265986, + "learning_rate": 1.1801605159583307e-06, + "loss": 0.1599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1623782217502594, + "step": 4120, + "valid_targets_mean": 4425.1, + "valid_targets_min": 728 + }, + { + "epoch": 6.316998468606432, + "grad_norm": 0.48481180094784476, + "learning_rate": 1.1544479447105261e-06, + "loss": 0.1576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16785955429077148, + "step": 4125, + "valid_targets_mean": 5730.3, + "valid_targets_min": 511 + }, + { + "epoch": 6.324655436447167, + "grad_norm": 0.4572254396324727, + "learning_rate": 1.1290102455695595e-06, + "loss": 0.1585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16064852476119995, + "step": 4130, + "valid_targets_mean": 5513.2, + "valid_targets_min": 1849 + }, + { + "epoch": 6.332312404287902, + "grad_norm": 0.4530850293667914, + "learning_rate": 1.1038477895573974e-06, + "loss": 0.1612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16299015283584595, + "step": 4135, + "valid_targets_mean": 5793.6, + "valid_targets_min": 880 + }, + { + "epoch": 6.339969372128637, + "grad_norm": 0.48028598319389537, + "learning_rate": 1.0789609436814552e-06, + "loss": 0.1521, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14780756831169128, + "step": 4140, + "valid_targets_mean": 5281.0, + "valid_targets_min": 773 + }, + { + "epoch": 6.347626339969372, + "grad_norm": 0.4496051344582939, + "learning_rate": 1.0543500709292309e-06, + "loss": 0.1613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17510706186294556, + "step": 4145, + "valid_targets_mean": 6073.4, + "valid_targets_min": 762 + }, + { + "epoch": 6.355283307810107, + "grad_norm": 0.509916750158182, + "learning_rate": 1.0300155302630045e-06, + "loss": 0.172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18384206295013428, + "step": 4150, + "valid_targets_mean": 4543.0, + "valid_targets_min": 312 + }, + { + "epoch": 6.362940275650843, + "grad_norm": 0.4914861553380316, + "learning_rate": 1.005957676614624e-06, + "loss": 0.1632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15742814540863037, + "step": 4155, + "valid_targets_mean": 5673.4, + "valid_targets_min": 730 + }, + { + "epoch": 6.370597243491577, + "grad_norm": 0.44389672499265986, + "learning_rate": 9.821768608802995e-07, + "loss": 0.1483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12984803318977356, + "step": 4160, + "valid_targets_mean": 5247.4, + "valid_targets_min": 313 + }, + { + "epoch": 6.378254211332313, + "grad_norm": 0.5068179023268576, + "learning_rate": 9.58673429915511e-07, + "loss": 0.1578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18095433712005615, + "step": 4165, + "valid_targets_mean": 4936.8, + "valid_targets_min": 493 + }, + { + "epoch": 6.385911179173047, + "grad_norm": 0.6110721344279758, + "learning_rate": 9.354477265299277e-07, + "loss": 0.157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14109715819358826, + "step": 4170, + "valid_targets_mean": 4727.2, + "valid_targets_min": 844 + }, + { + "epoch": 6.393568147013783, + "grad_norm": 0.4593471805913175, + "learning_rate": 9.125000894824332e-07, + "loss": 0.1568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15586645901203156, + "step": 4175, + "valid_targets_mean": 5135.5, + "valid_targets_min": 779 + }, + { + "epoch": 6.401225114854517, + "grad_norm": 0.5144834066021605, + "learning_rate": 8.898308534761591e-07, + "loss": 0.164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15124112367630005, + "step": 4180, + "valid_targets_mean": 4934.0, + "valid_targets_min": 616 + }, + { + "epoch": 6.408882082695253, + "grad_norm": 0.4240329535706579, + "learning_rate": 8.674403491536121e-07, + "loss": 0.162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1325221210718155, + "step": 4185, + "valid_targets_mean": 6155.0, + "valid_targets_min": 2846 + }, + { + "epoch": 6.416539050535988, + "grad_norm": 0.4349870239932218, + "learning_rate": 8.453289030918643e-07, + "loss": 0.153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13124766945838928, + "step": 4190, + "valid_targets_mean": 5847.3, + "valid_targets_min": 594 + }, + { + "epoch": 6.424196018376723, + "grad_norm": 0.4258108290720063, + "learning_rate": 8.234968377977704e-07, + "loss": 0.1502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13588905334472656, + "step": 4195, + "valid_targets_mean": 5795.6, + "valid_targets_min": 2571 + }, + { + "epoch": 6.431852986217458, + "grad_norm": 0.4823382335960822, + "learning_rate": 8.019444717032732e-07, + "loss": 0.146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14344608783721924, + "step": 4200, + "valid_targets_mean": 5538.8, + "valid_targets_min": 801 + }, + { + "epoch": 6.439509954058193, + "grad_norm": 0.5440916742867541, + "learning_rate": 7.806721191607658e-07, + "loss": 0.1533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14522123336791992, + "step": 4205, + "valid_targets_mean": 4628.5, + "valid_targets_min": 840 + }, + { + "epoch": 6.447166921898928, + "grad_norm": 0.47576315988744305, + "learning_rate": 7.596800904384838e-07, + "loss": 0.1389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14390070736408234, + "step": 4210, + "valid_targets_mean": 5652.9, + "valid_targets_min": 966 + }, + { + "epoch": 6.4548238897396635, + "grad_norm": 0.5218061277679277, + "learning_rate": 7.38968691716011e-07, + "loss": 0.1641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18774065375328064, + "step": 4215, + "valid_targets_mean": 4977.1, + "valid_targets_min": 723 + }, + { + "epoch": 6.462480857580398, + "grad_norm": 0.4286228931036304, + "learning_rate": 7.185382250797901e-07, + "loss": 0.1528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1377241015434265, + "step": 4220, + "valid_targets_mean": 5731.1, + "valid_targets_min": 615 + }, + { + "epoch": 6.4701378254211335, + "grad_norm": 0.47837224891886565, + "learning_rate": 6.983889885187279e-07, + "loss": 0.1578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15443077683448792, + "step": 4225, + "valid_targets_mean": 5042.2, + "valid_targets_min": 2433 + }, + { + "epoch": 6.477794793261868, + "grad_norm": 0.49422631910990883, + "learning_rate": 6.785212759198345e-07, + "loss": 0.1649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15702563524246216, + "step": 4230, + "valid_targets_mean": 4542.4, + "valid_targets_min": 379 + }, + { + "epoch": 6.4854517611026035, + "grad_norm": 0.477290567718433, + "learning_rate": 6.58935377063965e-07, + "loss": 0.1629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1609748899936676, + "step": 4235, + "valid_targets_mean": 5713.4, + "valid_targets_min": 538 + }, + { + "epoch": 6.493108728943339, + "grad_norm": 0.4756474679960268, + "learning_rate": 6.396315776215645e-07, + "loss": 0.1662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16133789718151093, + "step": 4240, + "valid_targets_mean": 5269.2, + "valid_targets_min": 685 + }, + { + "epoch": 6.5007656967840735, + "grad_norm": 0.4929632772608458, + "learning_rate": 6.206101591485092e-07, + "loss": 0.1612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14727333188056946, + "step": 4245, + "valid_targets_mean": 5250.6, + "valid_targets_min": 686 + }, + { + "epoch": 6.508422664624809, + "grad_norm": 0.5014543137245218, + "learning_rate": 6.018713990820168e-07, + "loss": 0.1592, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18269553780555725, + "step": 4250, + "valid_targets_mean": 4937.0, + "valid_targets_min": 1441 + }, + { + "epoch": 6.5160796324655434, + "grad_norm": 0.5773327888472211, + "learning_rate": 5.834155707365696e-07, + "loss": 0.1672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16463427245616913, + "step": 4255, + "valid_targets_mean": 4428.4, + "valid_targets_min": 750 + }, + { + "epoch": 6.523736600306279, + "grad_norm": 0.5158369739919062, + "learning_rate": 5.652429432999596e-07, + "loss": 0.1744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20093436539173126, + "step": 4260, + "valid_targets_mean": 5806.6, + "valid_targets_min": 816 + }, + { + "epoch": 6.531393568147013, + "grad_norm": 0.5089399556927194, + "learning_rate": 5.47353781829334e-07, + "loss": 0.1534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14460662007331848, + "step": 4265, + "valid_targets_mean": 4637.4, + "valid_targets_min": 426 + }, + { + "epoch": 6.539050535987749, + "grad_norm": 0.5388069418287857, + "learning_rate": 5.297483472473541e-07, + "loss": 0.1538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15606024861335754, + "step": 4270, + "valid_targets_mean": 4018.7, + "valid_targets_min": 534 + }, + { + "epoch": 6.546707503828484, + "grad_norm": 0.47769896995919425, + "learning_rate": 5.12426896338376e-07, + "loss": 0.1542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12930753827095032, + "step": 4275, + "valid_targets_mean": 5469.5, + "valid_targets_min": 1085 + }, + { + "epoch": 6.554364471669219, + "grad_norm": 0.5133848406051136, + "learning_rate": 4.953896817446957e-07, + "loss": 0.1558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14288434386253357, + "step": 4280, + "valid_targets_mean": 4725.6, + "valid_targets_min": 618 + }, + { + "epoch": 6.562021439509954, + "grad_norm": 0.5203682808767163, + "learning_rate": 4.78636951962892e-07, + "loss": 0.1652, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18543046712875366, + "step": 4285, + "valid_targets_mean": 5126.2, + "valid_targets_min": 685 + }, + { + "epoch": 6.569678407350689, + "grad_norm": 0.46911153856516075, + "learning_rate": 4.621689513401739e-07, + "loss": 0.1566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14522890746593475, + "step": 4290, + "valid_targets_mean": 5616.9, + "valid_targets_min": 736 + }, + { + "epoch": 6.577335375191424, + "grad_norm": 0.49903648422517666, + "learning_rate": 4.4598592007083277e-07, + "loss": 0.1468, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15592968463897705, + "step": 4295, + "valid_targets_mean": 4984.1, + "valid_targets_min": 751 + }, + { + "epoch": 6.584992343032159, + "grad_norm": 0.562087312070281, + "learning_rate": 4.300880941927399e-07, + "loss": 0.1584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15358403325080872, + "step": 4300, + "valid_targets_mean": 5085.4, + "valid_targets_min": 1655 + }, + { + "epoch": 6.592649310872894, + "grad_norm": 0.47791617240764883, + "learning_rate": 4.1447570558388774e-07, + "loss": 0.1532, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1530168354511261, + "step": 4305, + "valid_targets_mean": 5281.8, + "valid_targets_min": 854 + }, + { + "epoch": 6.60030627871363, + "grad_norm": 0.45343705198477624, + "learning_rate": 3.991489819590322e-07, + "loss": 0.1468, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15143750607967377, + "step": 4310, + "valid_targets_mean": 5688.8, + "valid_targets_min": 717 + }, + { + "epoch": 6.607963246554364, + "grad_norm": 0.6088682794383126, + "learning_rate": 3.8410814686634214e-07, + "loss": 0.1557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1674603819847107, + "step": 4315, + "valid_targets_mean": 4535.4, + "valid_targets_min": 712 + }, + { + "epoch": 6.6156202143951, + "grad_norm": 0.5417614540657483, + "learning_rate": 3.6935341968417305e-07, + "loss": 0.1538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1633644849061966, + "step": 4320, + "valid_targets_mean": 4694.1, + "valid_targets_min": 310 + }, + { + "epoch": 6.623277182235834, + "grad_norm": 0.5680715181021335, + "learning_rate": 3.548850156178274e-07, + "loss": 0.161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15728434920310974, + "step": 4325, + "valid_targets_mean": 5259.0, + "valid_targets_min": 478 + }, + { + "epoch": 6.63093415007657, + "grad_norm": 0.4664775223352316, + "learning_rate": 3.407031456964571e-07, + "loss": 0.156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1763657182455063, + "step": 4330, + "valid_targets_mean": 5762.2, + "valid_targets_min": 490 + }, + { + "epoch": 6.638591117917304, + "grad_norm": 0.6365218167161506, + "learning_rate": 3.2680801676995724e-07, + "loss": 0.1682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19776296615600586, + "step": 4335, + "valid_targets_mean": 4274.9, + "valid_targets_min": 466 + }, + { + "epoch": 6.64624808575804, + "grad_norm": 0.4910720869258167, + "learning_rate": 3.1319983150595035e-07, + "loss": 0.1412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1406899392604828, + "step": 4340, + "valid_targets_mean": 5006.8, + "valid_targets_min": 1618 + }, + { + "epoch": 6.653905053598775, + "grad_norm": 0.569329802811622, + "learning_rate": 2.998787883868537e-07, + "loss": 0.1539, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16043078899383545, + "step": 4345, + "valid_targets_mean": 3629.7, + "valid_targets_min": 599 + }, + { + "epoch": 6.66156202143951, + "grad_norm": 0.5169327909748895, + "learning_rate": 2.868450817069501e-07, + "loss": 0.1523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18156372010707855, + "step": 4350, + "valid_targets_mean": 5023.1, + "valid_targets_min": 1035 + }, + { + "epoch": 6.669218989280245, + "grad_norm": 0.5620209579007543, + "learning_rate": 2.7409890156958607e-07, + "loss": 0.1576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16176734864711761, + "step": 4355, + "valid_targets_mean": 4832.9, + "valid_targets_min": 1225 + }, + { + "epoch": 6.676875957120981, + "grad_norm": 0.4949524076974463, + "learning_rate": 2.616404338843803e-07, + "loss": 0.1547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16081801056861877, + "step": 4360, + "valid_targets_mean": 5138.5, + "valid_targets_min": 850 + }, + { + "epoch": 6.684532924961715, + "grad_norm": 0.4562359420788241, + "learning_rate": 2.4946986036451294e-07, + "loss": 0.1536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16743341088294983, + "step": 4365, + "valid_targets_mean": 5915.8, + "valid_targets_min": 805 + }, + { + "epoch": 6.692189892802451, + "grad_norm": 0.4911008520965554, + "learning_rate": 2.375873585240851e-07, + "loss": 0.1629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14914241433143616, + "step": 4370, + "valid_targets_mean": 4897.8, + "valid_targets_min": 321 + }, + { + "epoch": 6.699846860643185, + "grad_norm": 0.5140272838780794, + "learning_rate": 2.2599310167551902e-07, + "loss": 0.1585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15968918800354004, + "step": 4375, + "valid_targets_mean": 4938.5, + "valid_targets_min": 453 + }, + { + "epoch": 6.707503828483921, + "grad_norm": 0.5219624600556243, + "learning_rate": 2.1468725892704212e-07, + "loss": 0.161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15002906322479248, + "step": 4380, + "valid_targets_mean": 5464.5, + "valid_targets_min": 360 + }, + { + "epoch": 6.715160796324655, + "grad_norm": 0.5319553307201998, + "learning_rate": 2.0366999518020015e-07, + "loss": 0.1516, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15449857711791992, + "step": 4385, + "valid_targets_mean": 4223.0, + "valid_targets_min": 313 + }, + { + "epoch": 6.722817764165391, + "grad_norm": 0.6335557414758894, + "learning_rate": 1.9294147112748129e-07, + "loss": 0.1673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18669471144676208, + "step": 4390, + "valid_targets_mean": 3495.2, + "valid_targets_min": 382 + }, + { + "epoch": 6.730474732006126, + "grad_norm": 0.5264194401944684, + "learning_rate": 1.8250184324994258e-07, + "loss": 0.172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15428876876831055, + "step": 4395, + "valid_targets_mean": 4849.3, + "valid_targets_min": 635 + }, + { + "epoch": 6.738131699846861, + "grad_norm": 0.5483060983513385, + "learning_rate": 1.7235126381494716e-07, + "loss": 0.1608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15400370955467224, + "step": 4400, + "valid_targets_mean": 3996.1, + "valid_targets_min": 763 + }, + { + "epoch": 6.745788667687596, + "grad_norm": 0.484798407756023, + "learning_rate": 1.6248988087393946e-07, + "loss": 0.1649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18481765687465668, + "step": 4405, + "valid_targets_mean": 5657.8, + "valid_targets_min": 625 + }, + { + "epoch": 6.7534456355283305, + "grad_norm": 0.4730938326668684, + "learning_rate": 1.529178382602803e-07, + "loss": 0.149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15649360418319702, + "step": 4410, + "valid_targets_mean": 5236.0, + "valid_targets_min": 559 + }, + { + "epoch": 6.761102603369066, + "grad_norm": 0.5092523070882127, + "learning_rate": 1.4363527558715286e-07, + "loss": 0.1685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1409115046262741, + "step": 4415, + "valid_targets_mean": 5457.8, + "valid_targets_min": 874 + }, + { + "epoch": 6.7687595712098005, + "grad_norm": 0.524901721297739, + "learning_rate": 1.346423282455267e-07, + "loss": 0.1477, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1688336730003357, + "step": 4420, + "valid_targets_mean": 4615.9, + "valid_targets_min": 713 + }, + { + "epoch": 6.776416539050536, + "grad_norm": 0.5105117143096871, + "learning_rate": 1.259391274021815e-07, + "loss": 0.1609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1753007471561432, + "step": 4425, + "valid_targets_mean": 4862.4, + "valid_targets_min": 594 + }, + { + "epoch": 6.784073506891271, + "grad_norm": 0.5143595147260921, + "learning_rate": 1.1752579999779523e-07, + "loss": 0.1497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18723537027835846, + "step": 4430, + "valid_targets_mean": 5300.6, + "valid_targets_min": 602 + }, + { + "epoch": 6.791730474732006, + "grad_norm": 0.47454601556028797, + "learning_rate": 1.094024687450923e-07, + "loss": 0.1543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13127397000789642, + "step": 4435, + "valid_targets_mean": 4709.1, + "valid_targets_min": 614 + }, + { + "epoch": 6.799387442572741, + "grad_norm": 0.520781711890744, + "learning_rate": 1.0156925212705171e-07, + "loss": 0.1466, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1573190838098526, + "step": 4440, + "valid_targets_mean": 4981.8, + "valid_targets_min": 929 + }, + { + "epoch": 6.807044410413476, + "grad_norm": 0.4496519457965003, + "learning_rate": 9.402626439518393e-08, + "loss": 0.1629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14165663719177246, + "step": 4445, + "valid_targets_mean": 5795.9, + "valid_targets_min": 3105 + }, + { + "epoch": 6.814701378254211, + "grad_norm": 0.6215392778464087, + "learning_rate": 8.677361556786113e-08, + "loss": 0.1545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18268686532974243, + "step": 4450, + "valid_targets_mean": 6543.2, + "valid_targets_min": 535 + }, + { + "epoch": 6.822358346094946, + "grad_norm": 0.49471966346976204, + "learning_rate": 7.98114114287052e-08, + "loss": 0.1512, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15616750717163086, + "step": 4455, + "valid_targets_mean": 4753.4, + "valid_targets_min": 910 + }, + { + "epoch": 6.830015313935681, + "grad_norm": 0.4930676116391729, + "learning_rate": 7.313975352506442e-08, + "loss": 0.1605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18801480531692505, + "step": 4460, + "valid_targets_mean": 4775.8, + "valid_targets_min": 581 + }, + { + "epoch": 6.837672281776417, + "grad_norm": 0.52961697008991, + "learning_rate": 6.675873916651032e-08, + "loss": 0.1587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17023101449012756, + "step": 4465, + "valid_targets_mean": 4687.8, + "valid_targets_min": 619 + }, + { + "epoch": 6.845329249617151, + "grad_norm": 0.49808821468991554, + "learning_rate": 6.066846142343208e-08, + "loss": 0.1562, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15167677402496338, + "step": 4470, + "valid_targets_mean": 4647.4, + "valid_targets_min": 689 + }, + { + "epoch": 6.852986217457887, + "grad_norm": 0.5002402449999928, + "learning_rate": 5.4869009125677606e-08, + "loss": 0.1577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16921664774417877, + "step": 4475, + "valid_targets_mean": 5446.4, + "valid_targets_min": 801 + }, + { + "epoch": 6.860643185298621, + "grad_norm": 0.49397088265179795, + "learning_rate": 4.936046686125018e-08, + "loss": 0.1558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18386386334896088, + "step": 4480, + "valid_targets_mean": 5190.6, + "valid_targets_min": 1004 + }, + { + "epoch": 6.868300153139357, + "grad_norm": 0.4374868435323064, + "learning_rate": 4.414291497508494e-08, + "loss": 0.1614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12061470746994019, + "step": 4485, + "valid_targets_mean": 5660.0, + "valid_targets_min": 747 + }, + { + "epoch": 6.875957120980092, + "grad_norm": 0.46344991151869086, + "learning_rate": 3.921642956786764e-08, + "loss": 0.1526, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15344613790512085, + "step": 4490, + "valid_targets_mean": 5533.7, + "valid_targets_min": 1023 + }, + { + "epoch": 6.883614088820827, + "grad_norm": 0.5499673381962632, + "learning_rate": 3.4581082494933306e-08, + "loss": 0.1378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1682521402835846, + "step": 4495, + "valid_targets_mean": 5424.2, + "valid_targets_min": 680 + }, + { + "epoch": 6.891271056661562, + "grad_norm": 0.5661611699091585, + "learning_rate": 3.023694136521149e-08, + "loss": 0.1537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19145449995994568, + "step": 4500, + "valid_targets_mean": 5180.9, + "valid_targets_min": 901 + }, + { + "epoch": 6.898928024502297, + "grad_norm": 0.5138739009042879, + "learning_rate": 2.6184069540244883e-08, + "loss": 0.1493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1396559178829193, + "step": 4505, + "valid_targets_mean": 4953.2, + "valid_targets_min": 571 + }, + { + "epoch": 6.906584992343032, + "grad_norm": 0.4734940775508067, + "learning_rate": 2.2422526133258905e-08, + "loss": 0.1566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1580895334482193, + "step": 4510, + "valid_targets_mean": 5021.9, + "valid_targets_min": 576 + }, + { + "epoch": 6.914241960183768, + "grad_norm": 0.4706534435530353, + "learning_rate": 1.8952366008309076e-08, + "loss": 0.1721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1342153698205948, + "step": 4515, + "valid_targets_mean": 5286.7, + "valid_targets_min": 2509 + }, + { + "epoch": 6.921898928024502, + "grad_norm": 0.48739925177110227, + "learning_rate": 1.5773639779470552e-08, + "loss": 0.1599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1816117912530899, + "step": 4520, + "valid_targets_mean": 5406.6, + "valid_targets_min": 975 + }, + { + "epoch": 6.929555895865238, + "grad_norm": 0.5208874030422277, + "learning_rate": 1.288639381010759e-08, + "loss": 0.1542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15851663053035736, + "step": 4525, + "valid_targets_mean": 4442.0, + "valid_targets_min": 763 + }, + { + "epoch": 6.937212863705972, + "grad_norm": 0.48457417601240316, + "learning_rate": 1.0290670212191878e-08, + "loss": 0.152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.165914848446846, + "step": 4530, + "valid_targets_mean": 4708.8, + "valid_targets_min": 345 + }, + { + "epoch": 6.944869831546708, + "grad_norm": 0.4583395498556372, + "learning_rate": 7.986506845696351e-09, + "loss": 0.1599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13576406240463257, + "step": 4535, + "valid_targets_mean": 4882.4, + "valid_targets_min": 495 + }, + { + "epoch": 6.952526799387442, + "grad_norm": 0.5546301713936433, + "learning_rate": 5.973937318028977e-09, + "loss": 0.1567, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17114219069480896, + "step": 4540, + "valid_targets_mean": 4222.1, + "valid_targets_min": 291 + }, + { + "epoch": 6.960183767228178, + "grad_norm": 0.4983059823209655, + "learning_rate": 4.2529909835553604e-09, + "loss": 0.1405, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13904789090156555, + "step": 4545, + "valid_targets_mean": 4797.2, + "valid_targets_min": 803 + }, + { + "epoch": 6.967840735068913, + "grad_norm": 0.45715360287978724, + "learning_rate": 2.8236929431701975e-09, + "loss": 0.1495, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15358659625053406, + "step": 4550, + "valid_targets_mean": 6263.0, + "valid_targets_min": 3534 + }, + { + "epoch": 6.975497702909648, + "grad_norm": 0.48704332473921397, + "learning_rate": 1.6860640439197995e-09, + "loss": 0.1583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14753298461437225, + "step": 4555, + "valid_targets_mean": 4750.9, + "valid_targets_min": 792 + }, + { + "epoch": 6.983154670750383, + "grad_norm": 0.6282723983499863, + "learning_rate": 8.401208787112147e-10, + "loss": 0.1647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16771043837070465, + "step": 4560, + "valid_targets_mean": 4766.7, + "valid_targets_min": 680 + }, + { + "epoch": 6.990811638591118, + "grad_norm": 0.49667792484401724, + "learning_rate": 2.858757860590977e-10, + "loss": 0.1633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1469840109348297, + "step": 4565, + "valid_targets_mean": 4585.7, + "valid_targets_min": 648 + }, + { + "epoch": 6.998468606431853, + "grad_norm": 0.5186330018791039, + "learning_rate": 2.3336849919175508e-11, + "loss": 0.1821, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18415594100952148, + "step": 4570, + "valid_targets_mean": 5133.5, + "valid_targets_min": 1042 + }, + { + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15374836325645447, + "step": 4571, + "total_flos": 2281086641373184.0, + "train_loss": 0.20197117474362877, + "train_runtime": 35007.247, + "train_samples_per_second": 2.086, + "train_steps_per_second": 0.131, + "valid_targets_mean": 5963.9, + "valid_targets_min": 640 + } + ], + "logging_steps": 5, + "max_steps": 4571, + "num_input_tokens_seen": 0, + "num_train_epochs": 7, + "save_steps": 1500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2281086641373184.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}